1 /* $OpenBSD: ip_carp.c,v 1.195 2012/04/11 17:42:53 mikeb Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/proc.h> 41 #include <sys/systm.h> 42 #include <sys/mbuf.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 52 #include <machine/cpu.h> 53 54 #include <net/if.h> 55 #include <net/if_types.h> 56 #include <net/if_llc.h> 57 #include <net/route.h> 58 #include <net/netisr.h> 59 60 /* for arc4random() */ 61 #include <dev/rndvar.h> 62 63 #if NFDDI > 0 64 #include <net/if_fddi.h> 65 #endif 66 67 #include <crypto/sha1.h> 68 69 #ifdef INET 70 #include <netinet/in.h> 71 #include <netinet/in_systm.h> 72 #include <netinet/in_var.h> 73 #include <netinet/ip.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #include <netinet/ip_ipsp.h> 77 78 #include <net/if_enc.h> 79 #include <net/if_dl.h> 80 #endif 81 82 #ifdef INET6 83 #include <netinet/icmp6.h> 84 #include <netinet/ip6.h> 85 #include <netinet6/ip6_var.h> 86 #include <netinet6/nd6.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif 89 90 #include "bpfilter.h" 91 #if NBPFILTER > 0 92 #include <net/bpf.h> 93 #endif 94 95 #include <netinet/ip_carp.h> 96 97 struct carp_mc_entry { 98 LIST_ENTRY(carp_mc_entry) mc_entries; 99 union { 100 struct ether_multi *mcu_enm; 101 } mc_u; 102 struct sockaddr_storage mc_addr; 103 }; 104 #define mc_enm mc_u.mcu_enm 105 106 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 107 108 struct carp_vhost_entry { 109 LIST_ENTRY(carp_vhost_entry) vhost_entries; 110 struct carp_softc *parent_sc; 111 int vhe_leader; 112 int vhid; 113 int advskew; 114 enum { INIT = 0, BACKUP, MASTER } state; 115 struct timeout ad_tmo; /* advertisement timeout */ 116 struct timeout md_tmo; /* master down timeout */ 117 struct timeout md6_tmo; /* master down timeout */ 118 119 u_int64_t vhe_replay_cookie; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char vhe_pad[CARP_HMAC_PAD]; 124 SHA1_CTX vhe_sha1[HMAC_MAX]; 125 126 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 127 struct sockaddr_dl vhe_sdl; /* for IPv6 ndp balancing */ 128 }; 129 130 struct carp_softc { 131 struct arpcom sc_ac; 132 #define sc_if sc_ac.ac_if 133 #define sc_carpdev sc_ac.ac_if.if_carpdev 134 void *ah_cookie; 135 void *lh_cookie; 136 struct ip_moptions sc_imo; 137 #ifdef INET6 138 struct ip6_moptions sc_im6o; 139 #endif /* INET6 */ 140 TAILQ_ENTRY(carp_softc) sc_list; 141 142 int sc_suppress; 143 int sc_bow_out; 144 int sc_demote_cnt; 145 146 int sc_sendad_errors; 147 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 148 int sc_sendad_success; 149 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 150 151 char sc_curlladdr[ETHER_ADDR_LEN]; 152 153 LIST_HEAD(__carp_vhosthead, carp_vhost_entry) carp_vhosts; 154 int sc_vhe_count; 155 u_int8_t sc_vhids[CARP_MAXNODES]; 156 u_int8_t sc_advskews[CARP_MAXNODES]; 157 u_int8_t sc_balancing; 158 159 int sc_naddrs; 160 int sc_naddrs6; 161 int sc_advbase; /* seconds */ 162 163 /* authentication */ 164 unsigned char sc_key[CARP_KEY_LEN]; 165 166 u_int32_t sc_hashkey[2]; 167 u_int32_t sc_lsmask; /* load sharing mask */ 168 int sc_lscount; /* # load sharing interfaces (max 32) */ 169 int sc_delayed_arp; /* delayed ARP request countdown */ 170 171 struct in_addr sc_peer; 172 173 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 174 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 175 }; 176 177 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 178 struct carpstats carpstats; 179 180 struct carp_if { 181 TAILQ_HEAD(, carp_softc) vhif_vrs; 182 int vhif_nvrs; 183 184 struct ifnet *vhif_ifp; 185 }; 186 187 #define CARP_LOG(l, sc, s) \ 188 do { \ 189 if (carp_opts[CARPCTL_LOG] >= l) { \ 190 if (sc) \ 191 log(l, "%s: ", \ 192 (sc)->sc_if.if_xname); \ 193 else \ 194 log(l, "carp: "); \ 195 addlog s; \ 196 addlog("\n"); \ 197 } \ 198 } while (0) 199 200 void carp_hmac_prepare(struct carp_softc *); 201 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 202 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 203 unsigned char *, u_int8_t); 204 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 205 unsigned char *); 206 void carp_setroute(struct carp_softc *, int); 207 void carp_proto_input_c(struct mbuf *, struct carp_header *, int, 208 sa_family_t); 209 void carpattach(int); 210 void carpdetach(struct carp_softc *); 211 int carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 212 struct carp_header *); 213 void carp_send_ad_all(void); 214 void carp_vhe_send_ad_all(struct carp_softc *); 215 void carp_send_ad(void *); 216 void carp_send_arp(struct carp_softc *); 217 void carp_master_down(void *); 218 int carp_ioctl(struct ifnet *, u_long, caddr_t); 219 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 220 int carp_check_dup_vhids(struct carp_softc *, struct carp_if *, 221 struct carpreq *); 222 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 223 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 224 void carp_start(struct ifnet *); 225 void carp_setrun_all(struct carp_softc *, sa_family_t); 226 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 227 void carp_set_state_all(struct carp_softc *, int); 228 void carp_set_state(struct carp_vhost_entry *, int); 229 void carp_multicast_cleanup(struct carp_softc *); 230 int carp_set_ifp(struct carp_softc *, struct ifnet *); 231 void carp_set_enaddr(struct carp_softc *); 232 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 233 void carp_addr_updated(void *); 234 u_int32_t carp_hash(struct carp_softc *, u_char *); 235 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 236 int carp_join_multicast(struct carp_softc *); 237 #ifdef INET6 238 void carp_send_na(struct carp_softc *); 239 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 240 int carp_join_multicast6(struct carp_softc *); 241 #endif 242 int carp_clone_create(struct if_clone *, int); 243 int carp_clone_destroy(struct ifnet *); 244 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 245 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 246 void carp_ether_purgemulti(struct carp_softc *); 247 int carp_group_demote_count(struct carp_softc *); 248 void carp_update_lsmask(struct carp_softc *); 249 int carp_new_vhost(struct carp_softc *, int, int); 250 void carp_destroy_vhosts(struct carp_softc *); 251 void carp_del_all_timeouts(struct carp_softc *); 252 253 struct if_clone carp_cloner = 254 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 255 256 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 257 #define CARP_IFQ_PRIO 6 258 259 void 260 carp_hmac_prepare(struct carp_softc *sc) 261 { 262 struct carp_vhost_entry *vhe; 263 u_int8_t i; 264 265 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 266 for (i = 0; i < HMAC_MAX; i++) { 267 carp_hmac_prepare_ctx(vhe, i); 268 } 269 } 270 } 271 272 void 273 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 274 { 275 struct carp_softc *sc = vhe->parent_sc; 276 277 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 278 u_int8_t vhid = vhe->vhid & 0xff; 279 SHA1_CTX sha1ctx; 280 u_int32_t kmd[5]; 281 struct ifaddr *ifa; 282 int i, found; 283 struct in_addr last, cur, in; 284 #ifdef INET6 285 struct in6_addr last6, cur6, in6; 286 #endif /* INET6 */ 287 288 /* compute ipad from key */ 289 bzero(vhe->vhe_pad, sizeof(vhe->vhe_pad)); 290 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 291 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 292 vhe->vhe_pad[i] ^= 0x36; 293 294 /* precompute first part of inner hash */ 295 SHA1Init(&vhe->vhe_sha1[ctx]); 296 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 297 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 298 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 299 300 /* generate a key for the arpbalance hash, before the vhid is hashed */ 301 if (vhe->vhe_leader) { 302 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 303 SHA1Final((unsigned char *)kmd, &sha1ctx); 304 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 305 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 306 } 307 308 /* the rest of the precomputation */ 309 if (vhe->vhe_leader && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, 310 ETHER_ADDR_LEN) != 0) 311 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 312 ETHER_ADDR_LEN); 313 314 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 315 316 /* Hash the addresses from smallest to largest, not interface order */ 317 #ifdef INET 318 cur.s_addr = 0; 319 do { 320 found = 0; 321 last = cur; 322 cur.s_addr = 0xffffffff; 323 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 324 if (ifa->ifa_addr->sa_family != AF_INET) 325 continue; 326 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 327 if (ntohl(in.s_addr) > ntohl(last.s_addr) && 328 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 329 cur.s_addr = in.s_addr; 330 found++; 331 } 332 } 333 if (found) 334 SHA1Update(&vhe->vhe_sha1[ctx], 335 (void *)&cur, sizeof(cur)); 336 } while (found); 337 #endif /* INET */ 338 #ifdef INET6 339 memset(&cur6, 0x00, sizeof(cur6)); 340 do { 341 found = 0; 342 last6 = cur6; 343 memset(&cur6, 0xff, sizeof(cur6)); 344 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 345 if (ifa->ifa_addr->sa_family != AF_INET6) 346 continue; 347 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 348 if (IN6_IS_SCOPE_EMBED(&in6)) { 349 if (ctx == HMAC_NOV6LL) 350 continue; 351 in6.s6_addr16[1] = 0; 352 } 353 if (memcmp(&in6, &last6, sizeof(in6)) > 0 && 354 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 355 cur6 = in6; 356 found++; 357 } 358 } 359 if (found) 360 SHA1Update(&vhe->vhe_sha1[ctx], 361 (void *)&cur6, sizeof(cur6)); 362 } while (found); 363 #endif /* INET6 */ 364 365 /* convert ipad to opad */ 366 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 367 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 368 } 369 370 void 371 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 372 unsigned char md[20], u_int8_t ctx) 373 { 374 SHA1_CTX sha1ctx; 375 376 /* fetch first half of inner hash */ 377 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 378 379 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 380 SHA1Final(md, &sha1ctx); 381 382 /* outer hash */ 383 SHA1Init(&sha1ctx); 384 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 385 SHA1Update(&sha1ctx, md, 20); 386 SHA1Final(md, &sha1ctx); 387 } 388 389 int 390 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 391 unsigned char md[20]) 392 { 393 unsigned char md2[20]; 394 u_int8_t i; 395 396 for (i = 0; i < HMAC_MAX; i++) { 397 carp_hmac_generate(vhe, counter, md2, i); 398 if (!timingsafe_bcmp(md, md2, sizeof(md2))) 399 return (0); 400 } 401 return (1); 402 } 403 404 void 405 carp_setroute(struct carp_softc *sc, int cmd) 406 { 407 struct ifaddr *ifa; 408 int s; 409 410 /* XXX this mess needs fixing */ 411 412 s = splsoftnet(); 413 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 414 switch (ifa->ifa_addr->sa_family) { 415 case AF_INET: { 416 int error; 417 struct sockaddr sa; 418 struct rtentry *rt; 419 struct radix_node_head *rnh; 420 struct radix_node *rn; 421 struct rt_addrinfo info; 422 int hr_otherif, nr_ourif; 423 struct sockaddr_rtlabel sa_rl; 424 const char *label; 425 426 /* Remove the existing host route, if any */ 427 bzero(&info, sizeof(info)); 428 info.rti_info[RTAX_DST] = ifa->ifa_addr; 429 info.rti_flags = RTF_HOST; 430 error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED, 431 NULL, sc->sc_if.if_rdomain); 432 rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL, 433 error, sc->sc_if.if_rdomain); 434 435 /* Check for our address on another interface */ 436 /* XXX cries for proper API */ 437 rnh = rt_gettable(ifa->ifa_addr->sa_family, 438 sc->sc_if.if_rdomain); 439 rn = rnh->rnh_matchaddr(ifa->ifa_addr, rnh); 440 rt = (struct rtentry *)rn; 441 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && 442 rt->rt_flags & (RTF_CLONING|RTF_CLONED)); 443 444 /* Check for a network route on our interface */ 445 bcopy(ifa->ifa_addr, &sa, sizeof(sa)); 446 satosin(&sa)->sin_addr.s_addr = satosin(ifa->ifa_netmask 447 )->sin_addr.s_addr & satosin(&sa)->sin_addr.s_addr; 448 rt = (struct rtentry *)rt_lookup(&sa, 449 ifa->ifa_netmask, sc->sc_if.if_rdomain); 450 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if); 451 452 /* Restore the route label */ 453 bzero(&sa_rl, sizeof(sa_rl)); 454 if (rt && rt->rt_labelid) { 455 sa_rl.sr_len = sizeof(sa_rl); 456 sa_rl.sr_family = AF_UNSPEC; 457 label = rtlabel_id2name(rt->rt_labelid); 458 if (label != NULL) 459 strlcpy(sa_rl.sr_label, label, 460 sizeof(sa_rl.sr_label)); 461 } 462 463 switch (cmd) { 464 case RTM_ADD: 465 if (hr_otherif) { 466 ifa->ifa_rtrequest = NULL; 467 ifa->ifa_flags &= ~RTF_CLONING; 468 bzero(&info, sizeof(info)); 469 info.rti_info[RTAX_DST] = ifa->ifa_addr; 470 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 471 info.rti_flags = RTF_UP | RTF_HOST; 472 error = rtrequest1(RTM_ADD, &info, 473 RTP_CONNECTED, NULL, 474 sc->sc_if.if_rdomain); 475 rt_missmsg(RTM_ADD, &info, 476 info.rti_flags, &sc->sc_if, 477 error, sc->sc_if.if_rdomain); 478 } 479 if (!hr_otherif || nr_ourif || !rt) { 480 if (nr_ourif && !(rt->rt_flags & 481 RTF_CLONING)) { 482 bzero(&info, sizeof(info)); 483 info.rti_info[RTAX_DST] = &sa; 484 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 485 error = rtrequest1(RTM_DELETE, 486 &info, RTP_CONNECTED, NULL, 487 sc->sc_if.if_rdomain); 488 rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL, 489 error, sc->sc_if.if_rdomain); 490 } 491 492 ifa->ifa_rtrequest = arp_rtrequest; 493 ifa->ifa_flags |= RTF_CLONING; 494 495 bzero(&info, sizeof(info)); 496 info.rti_info[RTAX_DST] = &sa; 497 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 498 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 499 info.rti_info[RTAX_LABEL] = 500 (struct sockaddr *)&sa_rl; 501 error = rtrequest1(RTM_ADD, &info, 502 RTP_CONNECTED, NULL, 503 sc->sc_if.if_rdomain); 504 if (error == 0) 505 ifa->ifa_flags |= IFA_ROUTE; 506 rt_missmsg(RTM_ADD, &info, info.rti_flags, 507 &sc->sc_if, error, sc->sc_if.if_rdomain); 508 } 509 break; 510 case RTM_DELETE: 511 break; 512 default: 513 break; 514 } 515 break; 516 } 517 518 #ifdef INET6 519 case AF_INET6: 520 if (sc->sc_balancing >= CARP_BAL_IP) 521 continue; 522 if (cmd == RTM_ADD) 523 in6_ifaddloop(ifa); 524 else 525 in6_ifremloop(ifa); 526 break; 527 #endif /* INET6 */ 528 default: 529 break; 530 } 531 } 532 splx(s); 533 } 534 535 /* 536 * process input packet. 537 * we have rearranged checks order compared to the rfc, 538 * but it seems more efficient this way or not possible otherwise. 539 */ 540 void 541 carp_proto_input(struct mbuf *m, ...) 542 { 543 struct ip *ip = mtod(m, struct ip *); 544 struct ifnet *ifp = m->m_pkthdr.rcvif; 545 struct carp_softc *sc = NULL; 546 struct carp_header *ch; 547 int iplen, len, hlen, ismulti; 548 va_list ap; 549 550 va_start(ap, m); 551 hlen = va_arg(ap, int); 552 va_end(ap); 553 554 carpstats.carps_ipackets++; 555 556 if (!carp_opts[CARPCTL_ALLOW]) { 557 m_freem(m); 558 return; 559 } 560 561 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 562 563 /* check if received on a valid carp interface */ 564 if (!((ifp->if_type == IFT_CARP && ismulti) || 565 (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) { 566 carpstats.carps_badif++; 567 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 568 m->m_pkthdr.rcvif->if_xname)); 569 m_freem(m); 570 return; 571 } 572 573 /* verify that the IP TTL is 255. */ 574 if (ip->ip_ttl != CARP_DFLTTL) { 575 carpstats.carps_badttl++; 576 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip->ip_ttl, 577 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname)); 578 m_freem(m); 579 return; 580 } 581 582 /* 583 * verify that the received packet length is 584 * equal to the CARP header 585 */ 586 iplen = ip->ip_hl << 2; 587 len = iplen + sizeof(*ch); 588 if (len > m->m_pkthdr.len) { 589 carpstats.carps_badlen++; 590 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", m->m_pkthdr.len, 591 m->m_pkthdr.rcvif->if_xname)); 592 m_freem(m); 593 return; 594 } 595 596 if ((m = m_pullup(m, len)) == NULL) { 597 carpstats.carps_hdrops++; 598 return; 599 } 600 ip = mtod(m, struct ip *); 601 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen); 602 603 /* verify the CARP checksum */ 604 m->m_data += iplen; 605 if (carp_cksum(m, len - iplen)) { 606 carpstats.carps_badsum++; 607 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 608 m->m_pkthdr.rcvif->if_xname)); 609 m_freem(m); 610 return; 611 } 612 m->m_data -= iplen; 613 614 carp_proto_input_c(m, ch, ismulti, AF_INET); 615 } 616 617 #ifdef INET6 618 int 619 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 620 { 621 struct mbuf *m = *mp; 622 struct carp_softc *sc = NULL; 623 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 624 struct carp_header *ch; 625 u_int len; 626 627 carpstats.carps_ipackets6++; 628 629 if (!carp_opts[CARPCTL_ALLOW]) { 630 m_freem(m); 631 return (IPPROTO_DONE); 632 } 633 634 /* check if received on a valid carp interface */ 635 if (m->m_pkthdr.rcvif->if_type != IFT_CARP) { 636 carpstats.carps_badif++; 637 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 638 m->m_pkthdr.rcvif->if_xname)); 639 m_freem(m); 640 return (IPPROTO_DONE); 641 } 642 643 /* verify that the IP TTL is 255 */ 644 if (ip6->ip6_hlim != CARP_DFLTTL) { 645 carpstats.carps_badttl++; 646 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, 647 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname)); 648 m_freem(m); 649 return (IPPROTO_DONE); 650 } 651 652 /* verify that we have a complete carp packet */ 653 len = m->m_len; 654 if ((m = m_pullup(m, *offp + sizeof(*ch))) == NULL) { 655 carpstats.carps_badlen++; 656 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 657 return (IPPROTO_DONE); 658 } 659 ch = (struct carp_header *)(mtod(m, caddr_t) + *offp); 660 661 /* verify the CARP checksum */ 662 m->m_data += *offp; 663 if (carp_cksum(m, sizeof(*ch))) { 664 carpstats.carps_badsum++; 665 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 666 m->m_pkthdr.rcvif->if_xname)); 667 m_freem(m); 668 return (IPPROTO_DONE); 669 } 670 m->m_data -= *offp; 671 672 carp_proto_input_c(m, ch, 1, AF_INET6); 673 return (IPPROTO_DONE); 674 } 675 #endif /* INET6 */ 676 677 void 678 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, int ismulti, 679 sa_family_t af) 680 { 681 struct ifnet *ifp = m->m_pkthdr.rcvif; 682 struct carp_softc *sc; 683 struct carp_vhost_entry *vhe; 684 struct timeval sc_tv, ch_tv; 685 struct carp_if *cif; 686 687 if (ifp->if_type == IFT_CARP) 688 cif = (struct carp_if *)ifp->if_carpdev->if_carp; 689 else 690 cif = (struct carp_if *)ifp->if_carp; 691 692 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 693 if (af == AF_INET && 694 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 695 continue; 696 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 697 if (vhe->vhid == ch->carp_vhid) 698 goto found; 699 } 700 } 701 found: 702 703 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 704 (IFF_UP|IFF_RUNNING)) { 705 carpstats.carps_badvhid++; 706 m_freem(m); 707 return; 708 } 709 710 getmicrotime(&sc->sc_if.if_lastchange); 711 sc->sc_if.if_ipackets++; 712 sc->sc_if.if_ibytes += m->m_pkthdr.len; 713 714 /* verify the CARP version. */ 715 if (ch->carp_version != CARP_VERSION) { 716 carpstats.carps_badver++; 717 sc->sc_if.if_ierrors++; 718 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 719 ch->carp_version, CARP_VERSION)); 720 m_freem(m); 721 return; 722 } 723 724 /* verify the hash */ 725 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 726 carpstats.carps_badauth++; 727 sc->sc_if.if_ierrors++; 728 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 729 m_freem(m); 730 return; 731 } 732 733 if (!bcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 734 sizeof(ch->carp_counter))) { 735 /* Do not log duplicates from non simplex interfaces */ 736 if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { 737 carpstats.carps_badauth++; 738 sc->sc_if.if_ierrors++; 739 CARP_LOG(LOG_WARNING, sc, 740 ("replay or network loop detected")); 741 } 742 m_freem(m); 743 return; 744 } 745 746 sc_tv.tv_sec = sc->sc_advbase; 747 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 748 ch_tv.tv_sec = ch->carp_advbase; 749 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 750 751 switch (vhe->state) { 752 case INIT: 753 break; 754 case MASTER: 755 /* 756 * If we receive an advertisement from a master who's going to 757 * be more frequent than us, and whose demote count is not higher 758 * than ours, go into BACKUP state. If his demote count is lower, 759 * also go into BACKUP. 760 */ 761 if (((timercmp(&sc_tv, &ch_tv, >) || 762 timercmp(&sc_tv, &ch_tv, ==)) && 763 (ch->carp_demote <= carp_group_demote_count(sc))) || 764 ch->carp_demote < carp_group_demote_count(sc)) { 765 timeout_del(&vhe->ad_tmo); 766 carp_set_state(vhe, BACKUP); 767 carp_setrun(vhe, 0); 768 if (vhe->vhe_leader) 769 carp_setroute(sc, RTM_DELETE); 770 } 771 break; 772 case BACKUP: 773 /* 774 * If we're pre-empting masters who advertise slower than us, 775 * and do not have a better demote count, treat them as down. 776 * 777 */ 778 if (carp_opts[CARPCTL_PREEMPT] && 779 timercmp(&sc_tv, &ch_tv, <) && 780 ch->carp_demote >= carp_group_demote_count(sc)) { 781 carp_master_down(vhe); 782 break; 783 } 784 785 /* 786 * Take over masters advertising with a higher demote count, 787 * regardless of CARPCTL_PREEMPT. 788 */ 789 if (ch->carp_demote > carp_group_demote_count(sc)) { 790 carp_master_down(vhe); 791 break; 792 } 793 794 /* 795 * If the master is going to advertise at such a low frequency 796 * that he's guaranteed to time out, we'd might as well just 797 * treat him as timed out now. 798 */ 799 sc_tv.tv_sec = sc->sc_advbase * 3; 800 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) { 801 carp_master_down(vhe); 802 break; 803 } 804 805 /* 806 * Otherwise, we reset the counter and wait for the next 807 * advertisement. 808 */ 809 carp_setrun(vhe, af); 810 break; 811 } 812 813 m_freem(m); 814 return; 815 } 816 817 int 818 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 819 size_t newlen) 820 { 821 /* All sysctl names at this level are terminal. */ 822 if (namelen != 1) 823 return (ENOTDIR); 824 825 switch (name[0]) { 826 case CARPCTL_STATS: 827 if (newp != NULL) 828 return (EPERM); 829 return (sysctl_struct(oldp, oldlenp, newp, newlen, 830 &carpstats, sizeof(carpstats))); 831 default: 832 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 833 return (ENOPROTOOPT); 834 return sysctl_int(oldp, oldlenp, newp, newlen, 835 &carp_opts[name[0]]); 836 } 837 } 838 839 /* 840 * Interface side of the CARP implementation. 841 */ 842 843 /* ARGSUSED */ 844 void 845 carpattach(int n) 846 { 847 struct ifg_group *ifg; 848 849 if ((ifg = if_creategroup("carp")) != NULL) 850 ifg->ifg_refcnt++; /* keep around even if empty */ 851 if_clone_attach(&carp_cloner); 852 } 853 854 int 855 carp_clone_create(ifc, unit) 856 struct if_clone *ifc; 857 int unit; 858 { 859 struct carp_softc *sc; 860 struct ifnet *ifp; 861 862 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 863 if (!sc) 864 return (ENOMEM); 865 866 LIST_INIT(&sc->carp_vhosts); 867 sc->sc_vhe_count = 0; 868 if (carp_new_vhost(sc, 0, 0)) { 869 free(sc, M_DEVBUF); 870 return (ENOMEM); 871 } 872 873 sc->sc_suppress = 0; 874 sc->sc_advbase = CARP_DFLTINTV; 875 sc->sc_naddrs = sc->sc_naddrs6 = 0; 876 #ifdef INET6 877 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 878 #endif /* INET6 */ 879 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 880 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 881 M_WAITOK|M_ZERO); 882 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 883 884 LIST_INIT(&sc->carp_mc_listhead); 885 ifp = &sc->sc_if; 886 ifp->if_softc = sc; 887 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 888 unit); 889 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 890 ifp->if_ioctl = carp_ioctl; 891 ifp->if_start = carp_start; 892 ifp->if_output = carp_output; 893 ifp->if_type = IFT_CARP; 894 ifp->if_addrlen = ETHER_ADDR_LEN; 895 ifp->if_hdrlen = ETHER_HDR_LEN; 896 ifp->if_mtu = ETHERMTU; 897 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 898 IFQ_SET_READY(&ifp->if_snd); 899 if_attach(ifp); 900 901 if_alloc_sadl(ifp); 902 LIST_INIT(&sc->sc_ac.ac_multiaddrs); 903 #if NBPFILTER > 0 904 bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN); 905 #endif 906 907 /* Hook carp_addr_updated to cope with address and route changes. */ 908 sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0, 909 carp_addr_updated, sc); 910 carp_set_state_all(sc, INIT); 911 912 return (0); 913 } 914 915 int 916 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 917 { 918 struct carp_vhost_entry *vhe, *vhe0; 919 920 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 921 if (vhe == NULL) 922 return (ENOMEM); 923 924 vhe->parent_sc = sc; 925 vhe->vhid = vhid; 926 vhe->advskew = advskew; 927 timeout_set(&vhe->ad_tmo, carp_send_ad, vhe); 928 timeout_set(&vhe->md_tmo, carp_master_down, vhe); 929 timeout_set(&vhe->md6_tmo, carp_master_down, vhe); 930 931 /* mark the first vhe as leader */ 932 if (LIST_EMPTY(&sc->carp_vhosts)) { 933 vhe->vhe_leader = 1; 934 LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries); 935 sc->sc_vhe_count = 1; 936 return (0); 937 } 938 939 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) 940 if (LIST_NEXT(vhe0, vhost_entries) == NULL) 941 break; 942 LIST_INSERT_AFTER(vhe0, vhe, vhost_entries); 943 sc->sc_vhe_count++; 944 945 return (0); 946 } 947 948 int 949 carp_clone_destroy(struct ifnet *ifp) 950 { 951 struct carp_softc *sc = ifp->if_softc; 952 953 carpdetach(sc); 954 ether_ifdetach(ifp); 955 if_detach(ifp); 956 carp_destroy_vhosts(ifp->if_softc); 957 free(sc->sc_imo.imo_membership, M_IPMOPTS); 958 free(sc, M_DEVBUF); 959 960 return (0); 961 } 962 963 void 964 carp_del_all_timeouts(struct carp_softc *sc) 965 { 966 struct carp_vhost_entry *vhe; 967 968 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 969 timeout_del(&vhe->ad_tmo); 970 timeout_del(&vhe->md_tmo); 971 timeout_del(&vhe->md6_tmo); 972 } 973 } 974 975 void 976 carpdetach(struct carp_softc *sc) 977 { 978 struct carp_if *cif; 979 int s; 980 981 carp_del_all_timeouts(sc); 982 983 if (sc->sc_demote_cnt) 984 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach"); 985 sc->sc_suppress = 0; 986 sc->sc_sendad_errors = 0; 987 988 carp_set_state_all(sc, INIT); 989 sc->sc_if.if_flags &= ~IFF_UP; 990 carp_setrun_all(sc, 0); 991 carp_multicast_cleanup(sc); 992 993 s = splnet(); 994 if (sc->ah_cookie != NULL) 995 hook_disestablish(sc->sc_if.if_addrhooks, sc->ah_cookie); 996 if (sc->sc_carpdev != NULL) { 997 if (sc->lh_cookie != NULL) 998 hook_disestablish(sc->sc_carpdev->if_linkstatehooks, 999 sc->lh_cookie); 1000 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1001 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1002 if (!--cif->vhif_nvrs) { 1003 ifpromisc(sc->sc_carpdev, 0); 1004 sc->sc_carpdev->if_carp = NULL; 1005 free(cif, M_IFADDR); 1006 } 1007 } 1008 sc->sc_carpdev = NULL; 1009 splx(s); 1010 } 1011 1012 /* Detach an interface from the carp. */ 1013 void 1014 carp_ifdetach(struct ifnet *ifp) 1015 { 1016 struct carp_softc *sc, *nextsc; 1017 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 1018 1019 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 1020 nextsc = TAILQ_NEXT(sc, sc_list); 1021 carpdetach(sc); 1022 } 1023 } 1024 1025 void 1026 carp_destroy_vhosts(struct carp_softc *sc) 1027 { 1028 /* XXX bow out? */ 1029 struct carp_vhost_entry *vhe, *nvhe; 1030 1031 for (vhe = LIST_FIRST(&sc->carp_vhosts); 1032 vhe != LIST_END(&sc->carp_vhosts); vhe = nvhe) { 1033 nvhe = LIST_NEXT(vhe, vhost_entries); 1034 free(vhe, M_DEVBUF); 1035 } 1036 LIST_INIT(&sc->carp_vhosts); 1037 sc->sc_vhe_count = 0; 1038 } 1039 1040 int 1041 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 1042 struct carp_header *ch) 1043 { 1044 if (!vhe->vhe_replay_cookie) { 1045 arc4random_buf(&vhe->vhe_replay_cookie, 1046 sizeof(vhe->vhe_replay_cookie)); 1047 } 1048 1049 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 1050 sizeof(ch->carp_counter)); 1051 1052 /* 1053 * For the time being, do not include the IPv6 linklayer addresses 1054 * in the HMAC. 1055 */ 1056 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 1057 1058 return (0); 1059 } 1060 1061 void 1062 carp_send_ad_all(void) 1063 { 1064 struct ifnet *ifp; 1065 struct carp_if *cif; 1066 struct carp_softc *vh; 1067 1068 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1069 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 1070 continue; 1071 1072 cif = (struct carp_if *)ifp->if_carp; 1073 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1074 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1075 (IFF_UP|IFF_RUNNING)) { 1076 carp_vhe_send_ad_all(vh); 1077 } 1078 } 1079 } 1080 } 1081 1082 void 1083 carp_vhe_send_ad_all(struct carp_softc *sc) 1084 { 1085 struct carp_vhost_entry *vhe; 1086 1087 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1088 if (vhe->state == MASTER) 1089 carp_send_ad(vhe); 1090 } 1091 } 1092 1093 void 1094 carp_send_ad(void *v) 1095 { 1096 struct carp_header ch; 1097 struct timeval tv; 1098 struct carp_vhost_entry *vhe = v; 1099 struct carp_softc *sc = vhe->parent_sc; 1100 struct carp_header *ch_ptr; 1101 1102 struct mbuf *m; 1103 int error, len, advbase, advskew, s; 1104 struct ifaddr *ifa; 1105 struct sockaddr sa; 1106 1107 if (sc->sc_carpdev == NULL) { 1108 sc->sc_if.if_oerrors++; 1109 return; 1110 } 1111 1112 s = splsoftnet(); 1113 1114 /* bow out if we've gone to backup (the carp interface is going down) */ 1115 if (sc->sc_bow_out) { 1116 advbase = 255; 1117 advskew = 255; 1118 } else { 1119 advbase = sc->sc_advbase; 1120 advskew = vhe->advskew; 1121 tv.tv_sec = advbase; 1122 if (advbase == 0 && advskew == 0) 1123 tv.tv_usec = 1 * 1000000 / 256; 1124 else 1125 tv.tv_usec = advskew * 1000000 / 256; 1126 } 1127 1128 ch.carp_version = CARP_VERSION; 1129 ch.carp_type = CARP_ADVERTISEMENT; 1130 ch.carp_vhid = vhe->vhid; 1131 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1132 ch.carp_advbase = advbase; 1133 ch.carp_advskew = advskew; 1134 ch.carp_authlen = 7; /* XXX DEFINE */ 1135 ch.carp_cksum = 0; 1136 1137 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1138 1139 #ifdef INET 1140 if (sc->sc_naddrs) { 1141 struct ip *ip; 1142 1143 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1144 if (m == NULL) { 1145 sc->sc_if.if_oerrors++; 1146 carpstats.carps_onomem++; 1147 /* XXX maybe less ? */ 1148 goto retry_later; 1149 } 1150 len = sizeof(*ip) + sizeof(ch); 1151 m->m_pkthdr.len = len; 1152 m->m_pkthdr.rcvif = NULL; 1153 m->m_pkthdr.rdomain = sc->sc_if.if_rdomain; 1154 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1155 m->m_len = len; 1156 MH_ALIGN(m, m->m_len); 1157 ip = mtod(m, struct ip *); 1158 ip->ip_v = IPVERSION; 1159 ip->ip_hl = sizeof(*ip) >> 2; 1160 ip->ip_tos = IPTOS_LOWDELAY; 1161 ip->ip_len = htons(len); 1162 ip->ip_id = htons(ip_randomid()); 1163 ip->ip_off = htons(IP_DF); 1164 ip->ip_ttl = CARP_DFLTTL; 1165 ip->ip_p = IPPROTO_CARP; 1166 ip->ip_sum = 0; 1167 1168 bzero(&sa, sizeof(sa)); 1169 sa.sa_family = AF_INET; 1170 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1171 if (ifa == NULL) 1172 ip->ip_src.s_addr = 0; 1173 else 1174 ip->ip_src.s_addr = 1175 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1176 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1177 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1178 m->m_flags |= M_MCAST; 1179 1180 ch_ptr = (struct carp_header *)(ip + 1); 1181 bcopy(&ch, ch_ptr, sizeof(ch)); 1182 if (carp_prepare_ad(m, vhe, ch_ptr)) 1183 goto retry_later; 1184 1185 m->m_data += sizeof(*ip); 1186 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1187 m->m_data -= sizeof(*ip); 1188 1189 getmicrotime(&sc->sc_if.if_lastchange); 1190 sc->sc_if.if_opackets++; 1191 sc->sc_if.if_obytes += len; 1192 carpstats.carps_opackets++; 1193 1194 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1195 NULL); 1196 if (error) { 1197 if (error == ENOBUFS) 1198 carpstats.carps_onomem++; 1199 else 1200 CARP_LOG(LOG_WARNING, sc, 1201 ("ip_output failed: %d", error)); 1202 sc->sc_if.if_oerrors++; 1203 if (sc->sc_sendad_errors < INT_MAX) 1204 sc->sc_sendad_errors++; 1205 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1206 carp_group_demote_adj(&sc->sc_if, 1, 1207 "> snderrors"); 1208 sc->sc_sendad_success = 0; 1209 } else { 1210 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1211 if (++sc->sc_sendad_success >= 1212 CARP_SENDAD_MIN_SUCCESS(sc)) { 1213 carp_group_demote_adj(&sc->sc_if, -1, 1214 "< snderrors"); 1215 sc->sc_sendad_errors = 0; 1216 } 1217 } else 1218 sc->sc_sendad_errors = 0; 1219 } 1220 if (vhe->vhe_leader) { 1221 if (sc->sc_delayed_arp > 0) 1222 sc->sc_delayed_arp--; 1223 if (sc->sc_delayed_arp == 0) { 1224 carp_send_arp(sc); 1225 sc->sc_delayed_arp = -1; 1226 } 1227 } 1228 } 1229 #endif /* INET */ 1230 #ifdef INET6 1231 if (sc->sc_naddrs6) { 1232 struct ip6_hdr *ip6; 1233 1234 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1235 if (m == NULL) { 1236 sc->sc_if.if_oerrors++; 1237 carpstats.carps_onomem++; 1238 /* XXX maybe less ? */ 1239 goto retry_later; 1240 } 1241 len = sizeof(*ip6) + sizeof(ch); 1242 m->m_pkthdr.len = len; 1243 m->m_pkthdr.rcvif = NULL; 1244 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1245 /* XXX m->m_pkthdr.rdomain = sc->sc_if.if_rdomain; */ 1246 m->m_len = len; 1247 MH_ALIGN(m, m->m_len); 1248 m->m_flags |= M_MCAST; 1249 ip6 = mtod(m, struct ip6_hdr *); 1250 bzero(ip6, sizeof(*ip6)); 1251 ip6->ip6_vfc |= IPV6_VERSION; 1252 ip6->ip6_hlim = CARP_DFLTTL; 1253 ip6->ip6_nxt = IPPROTO_CARP; 1254 1255 /* set the source address */ 1256 bzero(&sa, sizeof(sa)); 1257 sa.sa_family = AF_INET6; 1258 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1259 if (ifa == NULL) /* This should never happen with IPv6 */ 1260 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1261 else 1262 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1263 &ip6->ip6_src, sizeof(struct in6_addr)); 1264 /* set the multicast destination */ 1265 1266 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1267 ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index); 1268 ip6->ip6_dst.s6_addr8[15] = 0x12; 1269 1270 ch_ptr = (struct carp_header *)(ip6 + 1); 1271 bcopy(&ch, ch_ptr, sizeof(ch)); 1272 if (carp_prepare_ad(m, vhe, ch_ptr)) 1273 goto retry_later; 1274 1275 m->m_data += sizeof(*ip6); 1276 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1277 m->m_data -= sizeof(*ip6); 1278 1279 getmicrotime(&sc->sc_if.if_lastchange); 1280 sc->sc_if.if_opackets++; 1281 sc->sc_if.if_obytes += len; 1282 carpstats.carps_opackets6++; 1283 1284 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1285 if (error) { 1286 if (error == ENOBUFS) 1287 carpstats.carps_onomem++; 1288 else 1289 CARP_LOG(LOG_WARNING, sc, 1290 ("ip6_output failed: %d", error)); 1291 sc->sc_if.if_oerrors++; 1292 if (sc->sc_sendad_errors < INT_MAX) 1293 sc->sc_sendad_errors++; 1294 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1295 carp_group_demote_adj(&sc->sc_if, 1, 1296 "> snd6errors"); 1297 sc->sc_sendad_success = 0; 1298 } else { 1299 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1300 if (++sc->sc_sendad_success >= 1301 CARP_SENDAD_MIN_SUCCESS(sc)) { 1302 carp_group_demote_adj(&sc->sc_if, -1, 1303 "< snd6errors"); 1304 sc->sc_sendad_errors = 0; 1305 } 1306 } else 1307 sc->sc_sendad_errors = 0; 1308 } 1309 } 1310 #endif /* INET6 */ 1311 1312 retry_later: 1313 sc->cur_vhe = NULL; 1314 splx(s); 1315 if (advbase != 255 || advskew != 255) 1316 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1317 } 1318 1319 /* 1320 * Broadcast a gratuitous ARP request containing 1321 * the virtual router MAC address for each IP address 1322 * associated with the virtual router. 1323 */ 1324 void 1325 carp_send_arp(struct carp_softc *sc) 1326 { 1327 struct ifaddr *ifa; 1328 in_addr_t in; 1329 int s = splsoftnet(); 1330 1331 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1332 1333 if (ifa->ifa_addr->sa_family != AF_INET) 1334 continue; 1335 1336 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1337 arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr); 1338 DELAY(1000); /* XXX */ 1339 } 1340 splx(s); 1341 } 1342 1343 #ifdef INET6 1344 void 1345 carp_send_na(struct carp_softc *sc) 1346 { 1347 struct ifaddr *ifa; 1348 struct in6_addr *in6; 1349 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1350 int s = splsoftnet(); 1351 1352 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1353 1354 if (ifa->ifa_addr->sa_family != AF_INET6) 1355 continue; 1356 1357 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1358 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1359 ND_NA_FLAG_OVERRIDE, 1, NULL); 1360 DELAY(1000); /* XXX */ 1361 } 1362 splx(s); 1363 } 1364 #endif /* INET6 */ 1365 1366 /* 1367 * Based on bridge_hash() in if_bridge.c 1368 */ 1369 #define mix(a,b,c) \ 1370 do { \ 1371 a -= b; a -= c; a ^= (c >> 13); \ 1372 b -= c; b -= a; b ^= (a << 8); \ 1373 c -= a; c -= b; c ^= (b >> 13); \ 1374 a -= b; a -= c; a ^= (c >> 12); \ 1375 b -= c; b -= a; b ^= (a << 16); \ 1376 c -= a; c -= b; c ^= (b >> 5); \ 1377 a -= b; a -= c; a ^= (c >> 3); \ 1378 b -= c; b -= a; b ^= (a << 10); \ 1379 c -= a; c -= b; c ^= (b >> 15); \ 1380 } while (0) 1381 1382 u_int32_t 1383 carp_hash(struct carp_softc *sc, u_char *src) 1384 { 1385 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1386 1387 c += sc->sc_key[3] << 24; 1388 c += sc->sc_key[2] << 16; 1389 c += sc->sc_key[1] << 8; 1390 c += sc->sc_key[0]; 1391 b += src[5] << 8; 1392 b += src[4]; 1393 a += src[3] << 24; 1394 a += src[2] << 16; 1395 a += src[1] << 8; 1396 a += src[0]; 1397 1398 mix(a, b, c); 1399 return (c); 1400 } 1401 1402 void 1403 carp_update_lsmask(struct carp_softc *sc) 1404 { 1405 struct carp_vhost_entry *vhe; 1406 int count; 1407 1408 if (!sc->sc_balancing) 1409 return; 1410 1411 sc->sc_lsmask = 0; 1412 count = 0; 1413 1414 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1415 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1416 sc->sc_lsmask |= 1 << count; 1417 count++; 1418 } 1419 sc->sc_lscount = count; 1420 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1421 } 1422 1423 int 1424 carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha, 1425 u_int8_t **ether_shost) 1426 { 1427 struct carp_softc *sc = ia->ia_ifp->if_softc; 1428 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1429 1430 if (sc->sc_balancing == CARP_BAL_ARP) { 1431 int lshash; 1432 /* 1433 * We use the source MAC address to decide which virtual host 1434 * should handle the request. If we're master of that virtual 1435 * host, then we respond, otherwise, just drop the arp packet 1436 * on the floor. 1437 */ 1438 1439 if (sc->sc_lscount == 0) /* just to be safe */ 1440 return (0); 1441 lshash = carp_hash(sc, src) % sc->sc_lscount; 1442 if ((1 << lshash) & sc->sc_lsmask) { 1443 int i = 0; 1444 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1445 if (i++ == lshash) 1446 break; 1447 } 1448 if (vhe == NULL) 1449 return (0); 1450 *sha = vhe->vhe_enaddr; 1451 return (1); 1452 } 1453 } else if (sc->sc_balancing == CARP_BAL_IPSTEALTH || 1454 sc->sc_balancing == CARP_BAL_IP) { 1455 if (vhe->state == MASTER) { 1456 *ether_shost = ((struct arpcom *)sc->sc_carpdev)-> 1457 ac_enaddr; 1458 return (1); 1459 } 1460 } else { 1461 if (vhe->state == MASTER) 1462 return (1); 1463 } 1464 1465 return (0); 1466 } 1467 1468 #ifdef INET6 1469 int 1470 carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl) 1471 { 1472 struct carp_softc *sc = ifp->if_softc; 1473 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1474 1475 if (sc->sc_balancing == CARP_BAL_ARP) { 1476 int lshash; 1477 /* 1478 * We use the source MAC address to decide which virtual host 1479 * should handle the request. If we're master of that virtual 1480 * host, then we respond, otherwise, just drop the ndp packet 1481 * on the floor. 1482 */ 1483 1484 /* can happen if optional src lladdr is not provided */ 1485 if (src == NULL) 1486 return (0); 1487 if (sc->sc_lscount == 0) /* just to be safe */ 1488 return (0); 1489 lshash = carp_hash(sc, src) % sc->sc_lscount; 1490 if ((1 << lshash) & sc->sc_lsmask) { 1491 int i = 0; 1492 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1493 if (i++ == lshash) 1494 break; 1495 } 1496 if (vhe == NULL) 1497 return (0); 1498 *sdl = &vhe->vhe_sdl; 1499 return (1); 1500 } 1501 } else { 1502 if (vhe->state == MASTER) 1503 return (1); 1504 } 1505 1506 return (0); 1507 } 1508 #endif /* INET6 */ 1509 1510 struct ifnet * 1511 carp_ourether(void *v, struct ether_header *eh, int src) 1512 { 1513 struct carp_if *cif = (struct carp_if *)v; 1514 struct carp_softc *vh; 1515 u_int8_t *ena; 1516 1517 if (src) 1518 ena = (u_int8_t *)&eh->ether_shost; 1519 else 1520 ena = (u_int8_t *)&eh->ether_dhost; 1521 1522 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1523 struct carp_vhost_entry *vhe; 1524 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1525 (IFF_UP|IFF_RUNNING)) 1526 continue; 1527 if (vh->sc_balancing == CARP_BAL_ARP) { 1528 LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries) 1529 if (vhe->state == MASTER && 1530 !bcmp(ena, vhe->vhe_enaddr, ETHER_ADDR_LEN)) 1531 return (&vh->sc_if); 1532 } else { 1533 vhe = LIST_FIRST(&vh->carp_vhosts); 1534 if ((vhe->state == MASTER || 1535 vh->sc_balancing >= CARP_BAL_IP) && 1536 !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) 1537 return (&vh->sc_if); 1538 } 1539 } 1540 return (NULL); 1541 } 1542 1543 void 1544 carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr) 1545 { 1546 struct carp_softc *sc = ifp->if_softc; 1547 1548 if (sc->sc_balancing != CARP_BAL_IPSTEALTH && 1549 sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) { 1550 if (sc->cur_vhe->vhe_leader) 1551 bcopy((caddr_t)sc->sc_ac.ac_enaddr, 1552 (caddr_t)s_enaddr, ETHER_ADDR_LEN); 1553 else 1554 bcopy((caddr_t)sc->cur_vhe->vhe_enaddr, 1555 (caddr_t)s_enaddr, ETHER_ADDR_LEN); 1556 } 1557 } 1558 1559 int 1560 carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr) 1561 { 1562 struct carp_softc *sc = ifp->if_softc; 1563 1564 if (sc->sc_balancing != CARP_BAL_IP) 1565 return (0); 1566 1567 return(!bcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN)); 1568 } 1569 1570 1571 int 1572 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) 1573 { 1574 struct ether_header eh; 1575 struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp; 1576 struct ifnet *ifp; 1577 1578 bcopy(shost, &eh.ether_shost, sizeof(eh.ether_shost)); 1579 bcopy(dhost, &eh.ether_dhost, sizeof(eh.ether_dhost)); 1580 eh.ether_type = etype; 1581 1582 if ((ifp = carp_ourether(cif, &eh, 0))) 1583 ; 1584 else if (m->m_flags & (M_BCAST|M_MCAST)) { 1585 struct carp_softc *vh; 1586 struct mbuf *m0; 1587 1588 /* 1589 * XXX Should really check the list of multicast addresses 1590 * for each CARP interface _before_ copying. 1591 */ 1592 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1593 if (!(vh->sc_if.if_flags & IFF_UP)) 1594 continue; 1595 m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1596 if (m0 == NULL) 1597 continue; 1598 m0->m_pkthdr.rcvif = &vh->sc_if; 1599 #if NBPFILTER > 0 1600 if (vh->sc_if.if_bpf) 1601 bpf_mtap_hdr(vh->sc_if.if_bpf, (char *)&eh, 1602 ETHER_HDR_LEN, m0, BPF_DIRECTION_IN); 1603 #endif 1604 vh->sc_if.if_ipackets++; 1605 ether_input(&vh->sc_if, &eh, m0); 1606 } 1607 return (1); 1608 } 1609 1610 if (ifp == NULL) 1611 return (1); 1612 1613 m->m_pkthdr.rcvif = ifp; 1614 1615 #if NBPFILTER > 0 1616 if (ifp->if_bpf) 1617 bpf_mtap_hdr(ifp->if_bpf, (char *)&eh, ETHER_HDR_LEN, m, 1618 BPF_DIRECTION_IN); 1619 #endif 1620 ifp->if_ipackets++; 1621 ether_input(ifp, &eh, m); 1622 1623 return (0); 1624 } 1625 1626 int 1627 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst) 1628 { 1629 struct carp_softc *sc = m->m_pkthdr.rcvif->if_softc; 1630 int match; 1631 u_int32_t fold; 1632 1633 if (sc->sc_balancing < CARP_BAL_IP) 1634 return (0); 1635 /* 1636 * Never drop carp advertisements. 1637 * XXX Bad idea to pass all broadcast / multicast traffic? 1638 */ 1639 if (m->m_flags & (M_BCAST|M_MCAST)) 1640 return (0); 1641 1642 fold = src[0] ^ dst[0]; 1643 #ifdef INET6 1644 if (af == AF_INET6) { 1645 int i; 1646 for (i = 1; i < 4; i++) 1647 fold ^= src[i] ^ dst[i]; 1648 } 1649 #endif 1650 if (sc->sc_lscount == 0) /* just to be safe */ 1651 return (1); 1652 match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask; 1653 1654 return (!match); 1655 } 1656 1657 void 1658 carp_master_down(void *v) 1659 { 1660 struct carp_vhost_entry *vhe = v; 1661 struct carp_softc *sc = vhe->parent_sc; 1662 1663 switch (vhe->state) { 1664 case INIT: 1665 printf("%s: master_down event in INIT state\n", 1666 sc->sc_if.if_xname); 1667 break; 1668 case MASTER: 1669 break; 1670 case BACKUP: 1671 carp_set_state(vhe, MASTER); 1672 carp_send_ad(vhe); 1673 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1674 carp_send_arp(sc); 1675 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1676 sc->sc_delayed_arp = 2; 1677 #ifdef INET6 1678 carp_send_na(sc); 1679 #endif /* INET6 */ 1680 } 1681 carp_setrun(vhe, 0); 1682 if (vhe->vhe_leader) 1683 carp_setroute(sc, RTM_ADD); 1684 carpstats.carps_preempt++; 1685 break; 1686 } 1687 } 1688 1689 void 1690 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1691 { 1692 struct carp_vhost_entry *vhe; 1693 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1694 carp_setrun(vhe, af); 1695 } 1696 } 1697 1698 /* 1699 * When in backup state, af indicates whether to reset the master down timer 1700 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1701 */ 1702 void 1703 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1704 { 1705 struct timeval tv; 1706 struct carp_softc *sc = vhe->parent_sc; 1707 1708 if (sc->sc_carpdev == NULL) { 1709 sc->sc_if.if_flags &= ~IFF_RUNNING; 1710 carp_set_state_all(sc, INIT); 1711 return; 1712 } 1713 1714 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1715 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1716 sc->sc_if.if_flags |= IFF_RUNNING; 1717 } else { 1718 sc->sc_if.if_flags &= ~IFF_RUNNING; 1719 if (vhe->vhe_leader) 1720 carp_setroute(sc, RTM_DELETE); 1721 return; 1722 } 1723 1724 switch (vhe->state) { 1725 case INIT: 1726 carp_set_state(vhe, BACKUP); 1727 if (vhe->vhe_leader) 1728 carp_setroute(sc, RTM_DELETE); 1729 carp_setrun(vhe, 0); 1730 break; 1731 case BACKUP: 1732 timeout_del(&vhe->ad_tmo); 1733 tv.tv_sec = 3 * sc->sc_advbase; 1734 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1735 tv.tv_usec = 3 * 1000000 / 256; 1736 else if (sc->sc_advbase == 0) 1737 tv.tv_usec = 3 * vhe->advskew * 1000000 / 256; 1738 else 1739 tv.tv_usec = vhe->advskew * 1000000 / 256; 1740 if (vhe->vhe_leader) 1741 sc->sc_delayed_arp = -1; 1742 switch (af) { 1743 #ifdef INET 1744 case AF_INET: 1745 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1746 break; 1747 #endif /* INET */ 1748 #ifdef INET6 1749 case AF_INET6: 1750 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1751 break; 1752 #endif /* INET6 */ 1753 default: 1754 if (sc->sc_naddrs) 1755 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1756 if (sc->sc_naddrs6) 1757 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1758 break; 1759 } 1760 break; 1761 case MASTER: 1762 tv.tv_sec = sc->sc_advbase; 1763 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1764 tv.tv_usec = 1 * 1000000 / 256; 1765 else 1766 tv.tv_usec = vhe->advskew * 1000000 / 256; 1767 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1768 break; 1769 } 1770 } 1771 1772 void 1773 carp_multicast_cleanup(struct carp_softc *sc) 1774 { 1775 struct ip_moptions *imo = &sc->sc_imo; 1776 #ifdef INET6 1777 struct ip6_moptions *im6o = &sc->sc_im6o; 1778 #endif 1779 u_int16_t n = imo->imo_num_memberships; 1780 1781 /* Clean up our own multicast memberships */ 1782 while (n-- > 0) { 1783 if (imo->imo_membership[n] != NULL) { 1784 in_delmulti(imo->imo_membership[n]); 1785 imo->imo_membership[n] = NULL; 1786 } 1787 } 1788 imo->imo_num_memberships = 0; 1789 imo->imo_multicast_ifp = NULL; 1790 1791 #ifdef INET6 1792 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1793 struct in6_multi_mship *imm = 1794 LIST_FIRST(&im6o->im6o_memberships); 1795 1796 LIST_REMOVE(imm, i6mm_chain); 1797 in6_leavegroup(imm); 1798 } 1799 im6o->im6o_multicast_ifp = NULL; 1800 #endif 1801 1802 /* And any other multicast memberships */ 1803 carp_ether_purgemulti(sc); 1804 } 1805 1806 int 1807 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1808 { 1809 struct carp_if *cif, *ncif = NULL; 1810 struct carp_softc *vr, *after = NULL; 1811 int myself = 0, error = 0; 1812 int s; 1813 1814 if (ifp == sc->sc_carpdev) 1815 return (0); 1816 1817 if (ifp != NULL) { 1818 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1819 return (EADDRNOTAVAIL); 1820 1821 if (ifp->if_type == IFT_CARP) 1822 return (EINVAL); 1823 1824 if (ifp->if_carp == NULL) { 1825 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT|M_ZERO); 1826 if (ncif == NULL) 1827 return (ENOBUFS); 1828 if ((error = ifpromisc(ifp, 1))) { 1829 free(ncif, M_IFADDR); 1830 return (error); 1831 } 1832 1833 ncif->vhif_ifp = ifp; 1834 TAILQ_INIT(&ncif->vhif_vrs); 1835 } else { 1836 cif = (struct carp_if *)ifp->if_carp; 1837 if (carp_check_dup_vhids(sc, cif, NULL)) 1838 return (EINVAL); 1839 } 1840 1841 /* detach from old interface */ 1842 if (sc->sc_carpdev != NULL) 1843 carpdetach(sc); 1844 1845 /* join multicast groups */ 1846 if (sc->sc_naddrs < 0 && 1847 (error = carp_join_multicast(sc)) != 0) { 1848 if (ncif != NULL) 1849 free(ncif, M_IFADDR); 1850 return (error); 1851 } 1852 1853 #ifdef INET6 1854 if (sc->sc_naddrs6 < 0 && 1855 (error = carp_join_multicast6(sc)) != 0) { 1856 if (ncif != NULL) 1857 free(ncif, M_IFADDR); 1858 carp_multicast_cleanup(sc); 1859 return (error); 1860 } 1861 #endif 1862 1863 /* attach carp interface to physical interface */ 1864 if (ncif != NULL) 1865 ifp->if_carp = (caddr_t)ncif; 1866 sc->sc_carpdev = ifp; 1867 cif = (struct carp_if *)ifp->if_carp; 1868 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1869 if (vr == sc) 1870 myself = 1; 1871 if (LIST_FIRST(&vr->carp_vhosts)->vhid < 1872 LIST_FIRST(&sc->carp_vhosts)->vhid) 1873 after = vr; 1874 } 1875 1876 if (!myself) { 1877 /* We're trying to keep things in order */ 1878 if (after == NULL) { 1879 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1880 } else { 1881 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1882 sc, sc_list); 1883 } 1884 cif->vhif_nvrs++; 1885 } 1886 if (sc->sc_naddrs || sc->sc_naddrs6) 1887 sc->sc_if.if_flags |= IFF_UP; 1888 carp_set_enaddr(sc); 1889 s = splnet(); 1890 sc->lh_cookie = hook_establish(ifp->if_linkstatehooks, 1, 1891 carp_carpdev_state, ifp); 1892 carp_carpdev_state(ifp); 1893 splx(s); 1894 } else { 1895 carpdetach(sc); 1896 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1897 } 1898 return (0); 1899 } 1900 1901 void 1902 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1903 { 1904 struct carp_softc *sc = vhe->parent_sc; 1905 1906 if (vhe->vhid != 0 && sc->sc_carpdev) { 1907 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1908 vhe->vhe_enaddr[0] = 1; 1909 else 1910 vhe->vhe_enaddr[0] = 0; 1911 vhe->vhe_enaddr[1] = 0; 1912 vhe->vhe_enaddr[2] = 0x5e; 1913 vhe->vhe_enaddr[3] = 0; 1914 vhe->vhe_enaddr[4] = 1; 1915 vhe->vhe_enaddr[5] = vhe->vhid; 1916 1917 vhe->vhe_sdl.sdl_family = AF_LINK; 1918 vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN; 1919 bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN); 1920 } else 1921 bzero(vhe->vhe_enaddr, ETHER_ADDR_LEN); 1922 } 1923 1924 void 1925 carp_set_enaddr(struct carp_softc *sc) 1926 { 1927 struct carp_vhost_entry *vhe; 1928 1929 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 1930 carp_set_vhe_enaddr(vhe); 1931 1932 vhe = LIST_FIRST(&sc->carp_vhosts); 1933 1934 /* 1935 * Use the carp lladdr if the running one isn't manually set. 1936 * Only compare static parts of the lladdr. 1937 */ 1938 if ((bcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1939 ETHER_ADDR_LEN - 2) == 0) || 1940 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1941 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1942 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1943 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1944 1945 /* Make sure the enaddr has changed before further twiddling. */ 1946 if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1947 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1948 ETHER_ADDR_LEN); 1949 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1950 #ifdef INET6 1951 /* 1952 * (re)attach a link-local address which matches 1953 * our new MAC address. 1954 */ 1955 in6_ifattach_linklocal(&sc->sc_if, NULL); 1956 #endif 1957 carp_set_state_all(sc, INIT); 1958 carp_setrun_all(sc, 0); 1959 } 1960 } 1961 1962 void 1963 carp_addr_updated(void *v) 1964 { 1965 struct carp_softc *sc = (struct carp_softc *) v; 1966 struct ifaddr *ifa; 1967 int new_naddrs = 0, new_naddrs6 = 0; 1968 1969 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1970 if (ifa->ifa_addr->sa_family == AF_INET) 1971 new_naddrs++; 1972 else if (ifa->ifa_addr->sa_family == AF_INET6 && 1973 !IN6_IS_ADDR_LINKLOCAL(&ifatoia6(ifa)->ia_addr.sin6_addr)) 1974 new_naddrs6++; 1975 } 1976 1977 /* We received address changes from if_addrhooks callback */ 1978 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) { 1979 struct in_addr mc_addr; 1980 struct in_multi *inm; 1981 1982 sc->sc_naddrs = new_naddrs; 1983 sc->sc_naddrs6 = new_naddrs6; 1984 1985 /* Re-establish multicast membership removed by in_control */ 1986 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1987 mc_addr.s_addr = sc->sc_peer.s_addr; 1988 IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm); 1989 if (inm == NULL) { 1990 struct in_multi **imm = 1991 sc->sc_imo.imo_membership; 1992 u_int16_t maxmem = 1993 sc->sc_imo.imo_max_memberships; 1994 1995 bzero(&sc->sc_imo, sizeof(sc->sc_imo)); 1996 sc->sc_imo.imo_membership = imm; 1997 sc->sc_imo.imo_max_memberships = maxmem; 1998 1999 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 2000 carp_join_multicast(sc); 2001 } 2002 } 2003 2004 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 2005 sc->sc_if.if_flags &= ~IFF_UP; 2006 carp_set_state_all(sc, INIT); 2007 } else 2008 carp_hmac_prepare(sc); 2009 } 2010 2011 carp_setrun_all(sc, 0); 2012 } 2013 2014 int 2015 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 2016 { 2017 struct ifnet *ifp = sc->sc_carpdev; 2018 struct in_ifaddr *ia, *ia_if; 2019 int error = 0; 2020 2021 /* XXX is this necessary? */ 2022 if (sin->sin_addr.s_addr == 0) { 2023 if (!(sc->sc_if.if_flags & IFF_UP)) 2024 carp_set_state_all(sc, INIT); 2025 if (sc->sc_naddrs) 2026 sc->sc_if.if_flags |= IFF_UP; 2027 carp_setrun_all(sc, 0); 2028 return (0); 2029 } 2030 2031 /* we have to do this by hand to ensure we don't match on ourselves */ 2032 ia_if = NULL; 2033 for (ia = TAILQ_FIRST(&in_ifaddr); ia; 2034 ia = TAILQ_NEXT(ia, ia_list)) { 2035 2036 /* and, yeah, we need a multicast-capable iface too */ 2037 if (ia->ia_ifp != &sc->sc_if && 2038 ia->ia_ifp->if_type != IFT_CARP && 2039 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 2040 ia->ia_ifp->if_rdomain == sc->sc_if.if_rdomain && 2041 (sin->sin_addr.s_addr & ia->ia_netmask) == 2042 ia->ia_net) { 2043 if (!ia_if) 2044 ia_if = ia; 2045 } 2046 } 2047 2048 if (ia_if) { 2049 ia = ia_if; 2050 if (ifp) { 2051 if (ifp != ia->ia_ifp) 2052 return (EADDRNOTAVAIL); 2053 } else { 2054 ifp = ia->ia_ifp; 2055 } 2056 } 2057 2058 if ((error = carp_set_ifp(sc, ifp))) 2059 return (error); 2060 2061 if (sc->sc_carpdev == NULL) 2062 return (EADDRNOTAVAIL); 2063 2064 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 2065 return (error); 2066 2067 if (sc->sc_carpdev != NULL) 2068 sc->sc_if.if_flags |= IFF_UP; 2069 2070 carp_set_state_all(sc, INIT); 2071 2072 return (0); 2073 } 2074 2075 int 2076 carp_join_multicast(struct carp_softc *sc) 2077 { 2078 struct ip_moptions *imo = &sc->sc_imo; 2079 struct in_multi *imm; 2080 struct in_addr addr; 2081 2082 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 2083 return (0); 2084 2085 addr.s_addr = sc->sc_peer.s_addr; 2086 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 2087 return (ENOBUFS); 2088 2089 imo->imo_membership[0] = imm; 2090 imo->imo_num_memberships = 1; 2091 imo->imo_multicast_ifp = &sc->sc_if; 2092 imo->imo_multicast_ttl = CARP_DFLTTL; 2093 imo->imo_multicast_loop = 0; 2094 return (0); 2095 } 2096 2097 2098 #ifdef INET6 2099 int 2100 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 2101 { 2102 struct ifnet *ifp = sc->sc_carpdev; 2103 struct in6_ifaddr *ia, *ia_if; 2104 int error = 0; 2105 2106 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 2107 if (!(sc->sc_if.if_flags & IFF_UP)) 2108 carp_set_state_all(sc, INIT); 2109 if (sc->sc_naddrs6) 2110 sc->sc_if.if_flags |= IFF_UP; 2111 carp_setrun_all(sc, 0); 2112 return (0); 2113 } 2114 2115 /* we have to do this by hand to ensure we don't match on ourselves */ 2116 ia_if = NULL; 2117 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 2118 int i; 2119 2120 for (i = 0; i < 4; i++) { 2121 if ((sin6->sin6_addr.s6_addr32[i] & 2122 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 2123 (ia->ia_addr.sin6_addr.s6_addr32[i] & 2124 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 2125 break; 2126 } 2127 /* and, yeah, we need a multicast-capable iface too */ 2128 if (ia->ia_ifp != &sc->sc_if && 2129 ia->ia_ifp->if_type != IFT_CARP && 2130 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 2131 (i == 4)) { 2132 if (!ia_if) 2133 ia_if = ia; 2134 } 2135 } 2136 2137 if (ia_if) { 2138 ia = ia_if; 2139 if (sc->sc_carpdev) { 2140 if (sc->sc_carpdev != ia->ia_ifp) 2141 return (EADDRNOTAVAIL); 2142 } else { 2143 ifp = ia->ia_ifp; 2144 } 2145 } 2146 2147 if ((error = carp_set_ifp(sc, ifp))) 2148 return (error); 2149 2150 if (sc->sc_carpdev == NULL) 2151 return (EADDRNOTAVAIL); 2152 2153 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 2154 return (error); 2155 2156 if (sc->sc_carpdev != NULL) 2157 sc->sc_if.if_flags |= IFF_UP; 2158 2159 carp_set_state_all(sc, INIT); 2160 2161 return (0); 2162 } 2163 2164 int 2165 carp_join_multicast6(struct carp_softc *sc) 2166 { 2167 struct in6_multi_mship *imm, *imm2; 2168 struct ip6_moptions *im6o = &sc->sc_im6o; 2169 struct sockaddr_in6 addr6; 2170 int error; 2171 2172 /* Join IPv6 CARP multicast group */ 2173 bzero(&addr6, sizeof(addr6)); 2174 addr6.sin6_family = AF_INET6; 2175 addr6.sin6_len = sizeof(addr6); 2176 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2177 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2178 addr6.sin6_addr.s6_addr8[15] = 0x12; 2179 if ((imm = in6_joingroup(&sc->sc_if, 2180 &addr6.sin6_addr, &error)) == NULL) { 2181 return (error); 2182 } 2183 /* join solicited multicast address */ 2184 bzero(&addr6.sin6_addr, sizeof(addr6.sin6_addr)); 2185 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2186 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2187 addr6.sin6_addr.s6_addr32[1] = 0; 2188 addr6.sin6_addr.s6_addr32[2] = htonl(1); 2189 addr6.sin6_addr.s6_addr32[3] = 0; 2190 addr6.sin6_addr.s6_addr8[12] = 0xff; 2191 if ((imm2 = in6_joingroup(&sc->sc_if, 2192 &addr6.sin6_addr, &error)) == NULL) { 2193 in6_leavegroup(imm); 2194 return (error); 2195 } 2196 2197 /* apply v6 multicast membership */ 2198 im6o->im6o_multicast_ifp = &sc->sc_if; 2199 if (imm) 2200 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2201 i6mm_chain); 2202 if (imm2) 2203 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2204 i6mm_chain); 2205 2206 return (0); 2207 } 2208 2209 #endif /* INET6 */ 2210 2211 int 2212 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2213 { 2214 struct proc *p = curproc; /* XXX */ 2215 struct carp_softc *sc = ifp->if_softc; 2216 struct carp_vhost_entry *vhe; 2217 struct carpreq carpr; 2218 struct ifaddr *ifa = (struct ifaddr *)addr; 2219 struct ifreq *ifr = (struct ifreq *)addr; 2220 struct ifnet *cdev = NULL; 2221 int i, error = 0; 2222 2223 switch (cmd) { 2224 case SIOCSIFADDR: 2225 switch (ifa->ifa_addr->sa_family) { 2226 #ifdef INET 2227 case AF_INET: 2228 sc->sc_if.if_flags |= IFF_UP; 2229 /* 2230 * emulate arp_ifinit() without doing a gratious arp 2231 * request so that the routes are setup correctly. 2232 */ 2233 ifa->ifa_rtrequest = arp_rtrequest; 2234 ifa->ifa_flags |= RTF_CLONING; 2235 2236 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2237 break; 2238 #endif /* INET */ 2239 #ifdef INET6 2240 case AF_INET6: 2241 sc->sc_if.if_flags |= IFF_UP; 2242 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2243 break; 2244 #endif /* INET6 */ 2245 default: 2246 error = EAFNOSUPPORT; 2247 break; 2248 } 2249 break; 2250 2251 case SIOCSIFFLAGS: 2252 vhe = LIST_FIRST(&sc->carp_vhosts); 2253 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2254 carp_del_all_timeouts(sc); 2255 2256 /* we need the interface up to bow out */ 2257 sc->sc_if.if_flags |= IFF_UP; 2258 sc->sc_bow_out = 1; 2259 carp_vhe_send_ad_all(sc); 2260 sc->sc_bow_out = 0; 2261 2262 sc->sc_if.if_flags &= ~IFF_UP; 2263 carp_set_state_all(sc, INIT); 2264 carp_setrun_all(sc, 0); 2265 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2266 sc->sc_if.if_flags |= IFF_UP; 2267 carp_setrun_all(sc, 0); 2268 } 2269 break; 2270 2271 case SIOCSVH: 2272 vhe = LIST_FIRST(&sc->carp_vhosts); 2273 if ((error = suser(p, 0)) != 0) 2274 break; 2275 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2276 break; 2277 error = 1; 2278 if (carpr.carpr_carpdev[0] != '\0' && 2279 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2280 return (EINVAL); 2281 if (carpr.carpr_peer.s_addr == 0) 2282 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2283 else 2284 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2285 if ((error = carp_set_ifp(sc, cdev))) 2286 return (error); 2287 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2288 switch (carpr.carpr_state) { 2289 case BACKUP: 2290 timeout_del(&vhe->ad_tmo); 2291 carp_set_state_all(sc, BACKUP); 2292 carp_setrun_all(sc, 0); 2293 carp_setroute(sc, RTM_DELETE); 2294 break; 2295 case MASTER: 2296 LIST_FOREACH(vhe, &sc->carp_vhosts, 2297 vhost_entries) 2298 carp_master_down(vhe); 2299 break; 2300 default: 2301 break; 2302 } 2303 } 2304 if ((error = carp_vhids_ioctl(sc, &carpr))) 2305 return (error); 2306 if (carpr.carpr_advbase >= 0) { 2307 if (carpr.carpr_advbase > 255) { 2308 error = EINVAL; 2309 break; 2310 } 2311 sc->sc_advbase = carpr.carpr_advbase; 2312 error--; 2313 } 2314 if (bcmp(sc->sc_advskews, carpr.carpr_advskews, 2315 sizeof(sc->sc_advskews))) { 2316 i = 0; 2317 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2318 vhe->advskew = carpr.carpr_advskews[i++]; 2319 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2320 sizeof(sc->sc_advskews)); 2321 } 2322 if (sc->sc_balancing != carpr.carpr_balancing) { 2323 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2324 error = EINVAL; 2325 break; 2326 } 2327 sc->sc_balancing = carpr.carpr_balancing; 2328 carp_set_enaddr(sc); 2329 carp_update_lsmask(sc); 2330 } 2331 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2332 if (error > 0) 2333 error = EINVAL; 2334 else { 2335 error = 0; 2336 carp_hmac_prepare(sc); 2337 carp_setrun_all(sc, 0); 2338 } 2339 break; 2340 2341 case SIOCGVH: 2342 bzero(&carpr, sizeof(carpr)); 2343 if (sc->sc_carpdev != NULL) 2344 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2345 IFNAMSIZ); 2346 i = 0; 2347 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 2348 carpr.carpr_vhids[i] = vhe->vhid; 2349 carpr.carpr_advskews[i] = vhe->advskew; 2350 carpr.carpr_states[i] = vhe->state; 2351 i++; 2352 } 2353 carpr.carpr_advbase = sc->sc_advbase; 2354 carpr.carpr_balancing = sc->sc_balancing; 2355 if (suser(p, 0) == 0) 2356 bcopy(sc->sc_key, carpr.carpr_key, 2357 sizeof(carpr.carpr_key)); 2358 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2359 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2360 break; 2361 2362 case SIOCADDMULTI: 2363 error = carp_ether_addmulti(sc, ifr); 2364 break; 2365 2366 case SIOCDELMULTI: 2367 error = carp_ether_delmulti(sc, ifr); 2368 break; 2369 case SIOCAIFGROUP: 2370 case SIOCDIFGROUP: 2371 if (sc->sc_demote_cnt) 2372 carp_ifgroup_ioctl(ifp, cmd, addr); 2373 break; 2374 case SIOCSIFGATTR: 2375 carp_ifgattr_ioctl(ifp, cmd, addr); 2376 break; 2377 default: 2378 error = ENOTTY; 2379 } 2380 2381 if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2382 carp_set_enaddr(sc); 2383 return (error); 2384 } 2385 2386 int 2387 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif, 2388 struct carpreq *carpr) 2389 { 2390 struct carp_softc *vr; 2391 struct carp_vhost_entry *vhe, *vhe0; 2392 int i; 2393 2394 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2395 if (vr == sc) 2396 continue; 2397 LIST_FOREACH(vhe, &vr->carp_vhosts, vhost_entries) { 2398 if (carpr) { 2399 for (i = 0; carpr->carpr_vhids[i]; i++) { 2400 if (vhe->vhid == carpr->carpr_vhids[i]) 2401 return (EINVAL); 2402 } 2403 } 2404 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) { 2405 if (vhe->vhid == vhe0->vhid) 2406 return (EINVAL); 2407 } 2408 } 2409 } 2410 return (0); 2411 } 2412 2413 int 2414 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2415 { 2416 int i, j; 2417 u_int8_t taken_vhids[256]; 2418 2419 if (carpr->carpr_vhids[0] == 0 || 2420 !bcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2421 return (0); 2422 2423 bzero(taken_vhids, sizeof(taken_vhids)); 2424 for (i = 0; carpr->carpr_vhids[i]; i++) { 2425 if (taken_vhids[carpr->carpr_vhids[i]]) 2426 return (EINVAL); 2427 taken_vhids[carpr->carpr_vhids[i]] = 1; 2428 2429 if (sc->sc_carpdev) { 2430 struct carp_if *cif; 2431 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2432 if (carp_check_dup_vhids(sc, cif, carpr)) 2433 return (EINVAL); 2434 } 2435 if (carpr->carpr_advskews[i] >= 255) 2436 return (EINVAL); 2437 } 2438 /* set sane balancing defaults */ 2439 if (i <= 1) 2440 carpr->carpr_balancing = CARP_BAL_NONE; 2441 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2442 sc->sc_balancing == CARP_BAL_NONE) 2443 carpr->carpr_balancing = CARP_BAL_IP; 2444 2445 /* destroy all */ 2446 carp_del_all_timeouts(sc); 2447 carp_destroy_vhosts(sc); 2448 bzero(sc->sc_vhids, sizeof(sc->sc_vhids)); 2449 2450 /* sort vhosts list by vhid */ 2451 for (j = 1; j <= 255; j++) { 2452 for (i = 0; carpr->carpr_vhids[i]; i++) { 2453 if (carpr->carpr_vhids[i] != j) 2454 continue; 2455 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2456 carpr->carpr_advskews[i])) 2457 return (ENOMEM); 2458 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2459 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2460 } 2461 } 2462 carp_set_enaddr(sc); 2463 carp_set_state_all(sc, INIT); 2464 return (0); 2465 } 2466 2467 void 2468 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2469 { 2470 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2471 struct ifg_list *ifgl; 2472 int *dm, adj; 2473 2474 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2475 return; 2476 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2477 if (cmd == SIOCDIFGROUP) 2478 adj = adj * -1; 2479 2480 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2481 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2482 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2483 if (*dm + adj >= 0) 2484 *dm += adj; 2485 else 2486 *dm = 0; 2487 } 2488 } 2489 2490 void 2491 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2492 { 2493 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2494 struct carp_softc *sc = ifp->if_softc; 2495 2496 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2497 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2498 carp_vhe_send_ad_all(sc); 2499 } 2500 2501 /* 2502 * Start output on carp interface. This function should never be called. 2503 */ 2504 void 2505 carp_start(struct ifnet *ifp) 2506 { 2507 #ifdef DEBUG 2508 printf("%s: start called\n", ifp->if_xname); 2509 #endif 2510 } 2511 2512 int 2513 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2514 struct rtentry *rt) 2515 { 2516 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2517 struct carp_vhost_entry *vhe; 2518 2519 vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts); 2520 2521 if (sc->sc_carpdev != NULL && 2522 (sc->sc_balancing || vhe->state == MASTER)) 2523 return (sc->sc_carpdev->if_output(ifp, m, sa, rt)); 2524 else { 2525 m_freem(m); 2526 return (ENETUNREACH); 2527 } 2528 } 2529 2530 void 2531 carp_set_state_all(struct carp_softc *sc, int state) 2532 { 2533 struct carp_vhost_entry *vhe; 2534 2535 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2536 carp_set_state(vhe, state); 2537 } 2538 2539 void 2540 carp_set_state(struct carp_vhost_entry *vhe, int state) 2541 { 2542 struct carp_softc *sc = vhe->parent_sc; 2543 static const char *carp_states[] = { CARP_STATES }; 2544 int loglevel; 2545 2546 if (vhe->state == state) 2547 return; 2548 if (vhe->state == INIT || state == INIT) 2549 loglevel = LOG_WARNING; 2550 else 2551 loglevel = LOG_CRIT; 2552 2553 if (sc->sc_vhe_count > 1) 2554 CARP_LOG(loglevel, sc, 2555 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2556 carp_states[vhe->state], carp_states[state])); 2557 else 2558 CARP_LOG(loglevel, sc, 2559 ("state transition: %s -> %s", 2560 carp_states[vhe->state], carp_states[state])); 2561 2562 vhe->state = state; 2563 carp_update_lsmask(sc); 2564 2565 /* only the master vhe creates link state messages */ 2566 if (!vhe->vhe_leader) 2567 return; 2568 2569 switch (state) { 2570 case BACKUP: 2571 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2572 break; 2573 case MASTER: 2574 sc->sc_if.if_link_state = LINK_STATE_UP; 2575 break; 2576 default: 2577 sc->sc_if.if_link_state = LINK_STATE_INVALID; 2578 break; 2579 } 2580 if_link_state_change(&sc->sc_if); 2581 } 2582 2583 void 2584 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2585 { 2586 struct ifg_list *ifgl; 2587 int *dm; 2588 struct carp_softc *nil = NULL; 2589 2590 if (ifp->if_type == IFT_CARP) { 2591 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2592 if (*dm + adj >= 0) 2593 *dm += adj; 2594 else 2595 *dm = 0; 2596 } 2597 2598 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2599 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2600 continue; 2601 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2602 2603 if (*dm + adj >= 0) 2604 *dm += adj; 2605 else 2606 *dm = 0; 2607 2608 if (adj > 0 && *dm == 1) 2609 carp_send_ad_all(); 2610 CARP_LOG(LOG_ERR, nil, 2611 ("%s demoted group %s by %d to %d (%s)", 2612 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2613 adj, *dm, reason)); 2614 } 2615 } 2616 2617 int 2618 carp_group_demote_count(struct carp_softc *sc) 2619 { 2620 struct ifg_list *ifgl; 2621 int count = 0; 2622 2623 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2624 count += ifgl->ifgl_group->ifg_carp_demoted; 2625 2626 if (count == 0 && sc->sc_demote_cnt) 2627 count = sc->sc_demote_cnt; 2628 2629 return (count > 255 ? 255 : count); 2630 } 2631 2632 void 2633 carp_carpdev_state(void *v) 2634 { 2635 struct carp_if *cif; 2636 struct carp_softc *sc; 2637 struct ifnet *ifp = v; 2638 2639 if (ifp->if_type == IFT_CARP) 2640 return; 2641 2642 cif = (struct carp_if *)ifp->if_carp; 2643 2644 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2645 int suppressed = sc->sc_suppress; 2646 2647 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2648 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2649 sc->sc_if.if_flags &= ~IFF_RUNNING; 2650 carp_del_all_timeouts(sc); 2651 carp_set_state_all(sc, INIT); 2652 sc->sc_suppress = 1; 2653 carp_setrun_all(sc, 0); 2654 if (!suppressed) 2655 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2656 } else if (suppressed) { 2657 carp_set_state_all(sc, INIT); 2658 sc->sc_suppress = 0; 2659 carp_setrun_all(sc, 0); 2660 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2661 } 2662 } 2663 } 2664 2665 int 2666 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2667 { 2668 struct ifnet *ifp; 2669 struct carp_mc_entry *mc; 2670 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2671 int error; 2672 2673 ifp = sc->sc_carpdev; 2674 if (ifp == NULL) 2675 return (EINVAL); 2676 2677 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2678 if (error != ENETRESET) 2679 return (error); 2680 2681 /* 2682 * This is new multicast address. We have to tell parent 2683 * about it. Also, remember this multicast address so that 2684 * we can delete them on unconfigure. 2685 */ 2686 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2687 if (mc == NULL) { 2688 error = ENOMEM; 2689 goto alloc_failed; 2690 } 2691 2692 /* 2693 * As ether_addmulti() returns ENETRESET, following two 2694 * statement shouldn't fail. 2695 */ 2696 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2697 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2698 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2699 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2700 2701 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)ifr); 2702 if (error != 0) 2703 goto ioctl_failed; 2704 2705 return (error); 2706 2707 ioctl_failed: 2708 LIST_REMOVE(mc, mc_entries); 2709 free(mc, M_DEVBUF); 2710 alloc_failed: 2711 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2712 2713 return (error); 2714 } 2715 2716 int 2717 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2718 { 2719 struct ifnet *ifp; 2720 struct ether_multi *enm; 2721 struct carp_mc_entry *mc; 2722 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2723 int error; 2724 2725 ifp = sc->sc_carpdev; 2726 if (ifp == NULL) 2727 return (EINVAL); 2728 2729 /* 2730 * Find a key to lookup carp_mc_entry. We have to do this 2731 * before calling ether_delmulti for obvious reason. 2732 */ 2733 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2734 return (error); 2735 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2736 if (enm == NULL) 2737 return (EINVAL); 2738 2739 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2740 if (mc->mc_enm == enm) 2741 break; 2742 2743 /* We won't delete entries we didn't add */ 2744 if (mc == NULL) 2745 return (EINVAL); 2746 2747 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2748 if (error != ENETRESET) 2749 return (error); 2750 2751 /* We no longer use this multicast address. Tell parent so. */ 2752 error = (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2753 if (error == 0) { 2754 /* And forget about this address. */ 2755 LIST_REMOVE(mc, mc_entries); 2756 free(mc, M_DEVBUF); 2757 } else 2758 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2759 return (error); 2760 } 2761 2762 /* 2763 * Delete any multicast address we have asked to add from parent 2764 * interface. Called when the carp is being unconfigured. 2765 */ 2766 void 2767 carp_ether_purgemulti(struct carp_softc *sc) 2768 { 2769 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2770 struct carp_mc_entry *mc; 2771 union { 2772 struct ifreq ifreq; 2773 struct { 2774 char ifr_name[IFNAMSIZ]; 2775 struct sockaddr_storage ifr_ss; 2776 } ifreq_storage; 2777 } u; 2778 struct ifreq *ifr = &u.ifreq; 2779 2780 if (ifp == NULL) 2781 return; 2782 2783 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 2784 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2785 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2786 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2787 LIST_REMOVE(mc, mc_entries); 2788 free(mc, M_DEVBUF); 2789 } 2790 } 2791