1 /* $NetBSD: ip_carp.c,v 1.117 2022/09/02 23:48:11 thorpej Exp $ */ 2 /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */ 3 4 /* 5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 6 * Copyright (c) 2003 Ryan McBride. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifdef _KERNEL_OPT 31 #include "opt_inet.h" 32 #include "opt_mbuftrace.h" 33 #endif 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.117 2022/09/02 23:48:11 thorpej Exp $"); 37 38 /* 39 * TODO: 40 * - iface reconfigure 41 * - support for hardware checksum calculations; 42 * 43 */ 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/mbuf.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/callout.h> 51 #include <sys/ioctl.h> 52 #include <sys/errno.h> 53 #include <sys/device.h> 54 #include <sys/time.h> 55 #include <sys/kernel.h> 56 #include <sys/kauth.h> 57 #include <sys/sysctl.h> 58 #include <sys/ucred.h> 59 #include <sys/syslog.h> 60 #include <sys/acct.h> 61 #include <sys/cprng.h> 62 #include <sys/cpu.h> 63 #include <sys/pserialize.h> 64 #include <sys/psref.h> 65 66 #include <net/if.h> 67 #include <net/pfil.h> 68 #include <net/if_types.h> 69 #include <net/if_ether.h> 70 #include <net/route.h> 71 #include <net/net_stats.h> 72 #include <netinet/if_inarp.h> 73 #include <netinet/wqinput.h> 74 75 #ifdef INET 76 #include <netinet/in.h> 77 #include <netinet/in_systm.h> 78 #include <netinet/in_var.h> 79 #include <netinet/ip.h> 80 #include <netinet/ip_var.h> 81 82 #include <net/if_dl.h> 83 #endif 84 85 #ifdef INET6 86 #include <netinet/icmp6.h> 87 #include <netinet/ip6.h> 88 #include <netinet6/ip6_var.h> 89 #include <netinet6/nd6.h> 90 #include <netinet6/scope6_var.h> 91 #include <netinet6/in6_var.h> 92 #endif 93 94 #include <net/bpf.h> 95 96 #include <sys/sha1.h> 97 98 #include <netinet/ip_carp.h> 99 100 #include "ioconf.h" 101 102 struct carp_mc_entry { 103 LIST_ENTRY(carp_mc_entry) mc_entries; 104 union { 105 struct ether_multi *mcu_enm; 106 } mc_u; 107 struct sockaddr_storage mc_addr; 108 }; 109 #define mc_enm mc_u.mcu_enm 110 111 struct carp_softc { 112 struct ethercom sc_ac; 113 #define sc_if sc_ac.ec_if 114 #define sc_carpdev sc_ac.ec_if.if_carpdev 115 void *sc_linkstate_hook; 116 int ah_cookie; 117 int lh_cookie; 118 struct ip_moptions sc_imo; 119 #ifdef INET6 120 struct ip6_moptions sc_im6o; 121 #endif /* INET6 */ 122 TAILQ_ENTRY(carp_softc) sc_list; 123 124 enum { INIT = 0, BACKUP, MASTER } sc_state; 125 126 int sc_suppress; 127 int sc_bow_out; 128 129 int sc_sendad_errors; 130 #define CARP_SENDAD_MAX_ERRORS 3 131 int sc_sendad_success; 132 #define CARP_SENDAD_MIN_SUCCESS 3 133 134 int sc_vhid; 135 int sc_advskew; 136 int sc_naddrs; 137 int sc_naddrs6; 138 int sc_advbase; /* seconds */ 139 int sc_init_counter; 140 u_int64_t sc_counter; 141 142 /* authentication */ 143 #define CARP_HMAC_PAD 64 144 unsigned char sc_key[CARP_KEY_LEN]; 145 unsigned char sc_pad[CARP_HMAC_PAD]; 146 SHA1_CTX sc_sha1; 147 u_int32_t sc_hashkey[2]; 148 149 struct callout sc_ad_tmo; /* advertisement timeout */ 150 struct callout sc_md_tmo; /* master down timeout */ 151 struct callout sc_md6_tmo; /* master down timeout */ 152 153 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 154 }; 155 156 int carp_suppress_preempt = 0; 157 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */ 158 159 static percpu_t *carpstat_percpu; 160 161 #define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x) 162 163 #ifdef MBUFTRACE 164 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx"); 165 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx"); 166 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx"); 167 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx"); 168 #endif 169 170 struct carp_if { 171 TAILQ_HEAD(, carp_softc) vhif_vrs; 172 int vhif_nvrs; 173 174 struct ifnet *vhif_ifp; 175 }; 176 177 #define CARP_LOG(sc, s) \ 178 if (carp_opts[CARPCTL_LOG]) { \ 179 if (sc) \ 180 log(LOG_INFO, "%s: ", \ 181 (sc)->sc_if.if_xname); \ 182 else \ 183 log(LOG_INFO, "carp: "); \ 184 addlog s; \ 185 addlog("\n"); \ 186 } 187 188 static void carp_hmac_prepare(struct carp_softc *); 189 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 190 unsigned char *); 191 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 192 unsigned char *); 193 static void carp_setroute(struct carp_softc *, int); 194 static void carp_proto_input_c(struct mbuf *, struct carp_header *, 195 sa_family_t); 196 static void carpdetach(struct carp_softc *); 197 static void carp_prepare_ad(struct mbuf *, struct carp_softc *, 198 struct carp_header *); 199 static void carp_send_ad_all(void); 200 static void carp_send_ad(void *); 201 static void carp_send_arp(struct carp_softc *); 202 static void carp_master_down(void *); 203 static int carp_ioctl(struct ifnet *, u_long, void *); 204 static void carp_start(struct ifnet *); 205 static void carp_setrun(struct carp_softc *, sa_family_t); 206 static void carp_set_state(struct carp_softc *, int); 207 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 208 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 209 210 static void carp_multicast_cleanup(struct carp_softc *); 211 static int carp_set_ifp(struct carp_softc *, struct ifnet *); 212 static void carp_set_enaddr(struct carp_softc *); 213 #if 0 214 static void carp_addr_updated(void *); 215 #endif 216 static u_int32_t carp_hash(struct carp_softc *, u_char *); 217 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 218 static int carp_join_multicast(struct carp_softc *); 219 #ifdef INET6 220 static void carp_send_na(struct carp_softc *); 221 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 222 static int carp_join_multicast6(struct carp_softc *); 223 #endif 224 static int carp_clone_create(struct if_clone *, int); 225 static int carp_clone_destroy(struct ifnet *); 226 static int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 227 static int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 228 static void carp_ether_purgemulti(struct carp_softc *); 229 static void carp_update_link_state(struct carp_softc *sc); 230 231 static void sysctl_net_inet_carp_setup(struct sysctllog **); 232 233 /* workqueue-based pr_input */ 234 static struct wqinput *carp_wqinput; 235 static void _carp_proto_input(struct mbuf *, int, int); 236 #ifdef INET6 237 static struct wqinput *carp6_wqinput; 238 static void _carp6_proto_input(struct mbuf *, int, int); 239 #endif 240 241 struct if_clone carp_cloner = 242 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 243 244 static __inline u_int16_t 245 carp_cksum(struct mbuf *m, int len) 246 { 247 return (in_cksum(m, len)); 248 } 249 250 #ifdef INET6 251 static __inline u_int16_t 252 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len) 253 { 254 return (in6_cksum(m, IPPROTO_CARP, off, len)); 255 } 256 #endif 257 258 static void 259 carp_hmac_prepare(struct carp_softc *sc) 260 { 261 u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT; 262 u_int8_t vhid = sc->sc_vhid & 0xff; 263 SHA1_CTX sha1ctx; 264 u_int32_t kmd[5]; 265 struct ifaddr *ifa; 266 int i, found; 267 struct in_addr last, cur, in; 268 #ifdef INET6 269 struct in6_addr last6, cur6, in6; 270 #endif /* INET6 */ 271 272 /* compute ipad from key */ 273 memset(sc->sc_pad, 0, sizeof(sc->sc_pad)); 274 memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key)); 275 for (i = 0; i < sizeof(sc->sc_pad); i++) 276 sc->sc_pad[i] ^= 0x36; 277 278 /* precompute first part of inner hash */ 279 SHA1Init(&sc->sc_sha1); 280 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 281 SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version)); 282 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 283 284 /* generate a key for the arpbalance hash, before the vhid is hashed */ 285 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 286 SHA1Final((unsigned char *)kmd, &sha1ctx); 287 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 288 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 289 290 /* the rest of the precomputation */ 291 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 292 293 /* Hash the addresses from smallest to largest, not interface order */ 294 #ifdef INET 295 cur.s_addr = 0; 296 do { 297 int s; 298 found = 0; 299 last = cur; 300 cur.s_addr = 0xffffffff; 301 s = pserialize_read_enter(); 302 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 303 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 304 if (ifa->ifa_addr->sa_family == AF_INET && 305 ntohl(in.s_addr) > ntohl(last.s_addr) && 306 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 307 cur.s_addr = in.s_addr; 308 found++; 309 } 310 } 311 pserialize_read_exit(s); 312 if (found) 313 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 314 } while (found); 315 #endif /* INET */ 316 317 #ifdef INET6 318 memset(&cur6, 0x00, sizeof(cur6)); 319 do { 320 int s; 321 found = 0; 322 last6 = cur6; 323 memset(&cur6, 0xff, sizeof(cur6)); 324 s = pserialize_read_enter(); 325 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 326 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 327 if (IN6_IS_ADDR_LINKLOCAL(&in6)) 328 in6.s6_addr16[1] = 0; 329 if (ifa->ifa_addr->sa_family == AF_INET6 && 330 memcmp(&in6, &last6, sizeof(in6)) > 0 && 331 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 332 cur6 = in6; 333 found++; 334 } 335 } 336 pserialize_read_exit(s); 337 if (found) 338 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 339 } while (found); 340 #endif /* INET6 */ 341 342 /* convert ipad to opad */ 343 for (i = 0; i < sizeof(sc->sc_pad); i++) 344 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 345 } 346 347 static void 348 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 349 unsigned char md[20]) 350 { 351 SHA1_CTX sha1ctx; 352 353 /* fetch first half of inner hash */ 354 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 355 356 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 357 SHA1Final(md, &sha1ctx); 358 359 /* outer hash */ 360 SHA1Init(&sha1ctx); 361 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 362 SHA1Update(&sha1ctx, md, 20); 363 SHA1Final(md, &sha1ctx); 364 } 365 366 static int 367 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 368 unsigned char md[20]) 369 { 370 unsigned char md2[20]; 371 372 carp_hmac_generate(sc, counter, md2); 373 374 return (memcmp(md, md2, sizeof(md2))); 375 } 376 377 static void 378 carp_setroute(struct carp_softc *sc, int cmd) 379 { 380 struct ifaddr *ifa; 381 int s, bound; 382 383 KERNEL_LOCK(1, NULL); 384 bound = curlwp_bind(); 385 s = pserialize_read_enter(); 386 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 387 struct psref psref; 388 ifa_acquire(ifa, &psref); 389 pserialize_read_exit(s); 390 391 switch (ifa->ifa_addr->sa_family) { 392 case AF_INET: { 393 int count = 0; 394 struct rtentry *rt; 395 int hr_otherif, nr_ourif; 396 397 /* 398 * Avoid screwing with the routes if there are other 399 * carp interfaces which are master and have the same 400 * address. 401 */ 402 if (sc->sc_carpdev != NULL && 403 sc->sc_carpdev->if_carp != NULL) { 404 count = carp_addrcount( 405 (struct carp_if *)sc->sc_carpdev->if_carp, 406 ifatoia(ifa), CARP_COUNT_MASTER); 407 if ((cmd == RTM_ADD && count != 1) || 408 (cmd == RTM_DELETE && count != 0)) 409 goto next; 410 } 411 412 /* Remove the existing host route, if any */ 413 rtrequest(RTM_DELETE, ifa->ifa_addr, 414 ifa->ifa_addr, ifa->ifa_netmask, 415 RTF_HOST, NULL); 416 417 rt = NULL; 418 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 419 ifa->ifa_netmask, RTF_HOST, &rt); 420 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && 421 (rt->rt_flags & RTF_CONNECTED)); 422 if (rt != NULL) { 423 rt_unref(rt); 424 rt = NULL; 425 } 426 427 /* Check for a network route on our interface */ 428 429 rt = NULL; 430 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 431 ifa->ifa_netmask, 0, &rt); 432 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if); 433 434 switch (cmd) { 435 case RTM_ADD: 436 if (hr_otherif) { 437 ifa->ifa_rtrequest = NULL; 438 ifa->ifa_flags &= ~RTF_CONNECTED; 439 440 rtrequest(RTM_ADD, ifa->ifa_addr, 441 ifa->ifa_addr, ifa->ifa_netmask, 442 RTF_UP | RTF_HOST, NULL); 443 } 444 if (!hr_otherif || nr_ourif || !rt) { 445 if (nr_ourif && 446 (rt->rt_flags & RTF_CONNECTED) == 0) 447 rtrequest(RTM_DELETE, 448 ifa->ifa_addr, 449 ifa->ifa_addr, 450 ifa->ifa_netmask, 0, NULL); 451 452 ifa->ifa_rtrequest = arp_rtrequest; 453 ifa->ifa_flags |= RTF_CONNECTED; 454 455 if (rtrequest(RTM_ADD, ifa->ifa_addr, 456 ifa->ifa_addr, ifa->ifa_netmask, 0, 457 NULL) == 0) 458 ifa->ifa_flags |= IFA_ROUTE; 459 } 460 break; 461 case RTM_DELETE: 462 break; 463 default: 464 break; 465 } 466 if (rt != NULL) { 467 rt_unref(rt); 468 rt = NULL; 469 } 470 break; 471 } 472 473 #ifdef INET6 474 case AF_INET6: 475 if (cmd == RTM_ADD) 476 in6_ifaddlocal(ifa); 477 else 478 in6_ifremlocal(ifa); 479 break; 480 #endif /* INET6 */ 481 default: 482 break; 483 } 484 next: 485 s = pserialize_read_enter(); 486 ifa_release(ifa, &psref); 487 } 488 pserialize_read_exit(s); 489 curlwp_bindx(bound); 490 KERNEL_UNLOCK_ONE(NULL); 491 } 492 493 /* 494 * process input packet. 495 * we have rearranged checks order compared to the rfc, 496 * but it seems more efficient this way or not possible otherwise. 497 */ 498 static void 499 _carp_proto_input(struct mbuf *m, int hlen, int proto) 500 { 501 struct ip *ip = mtod(m, struct ip *); 502 struct carp_softc *sc = NULL; 503 struct carp_header *ch; 504 int iplen, len; 505 struct ifnet *rcvif; 506 507 CARP_STATINC(CARP_STAT_IPACKETS); 508 MCLAIM(m, &carp_proto_mowner_rx); 509 510 if (!carp_opts[CARPCTL_ALLOW]) { 511 m_freem(m); 512 return; 513 } 514 515 rcvif = m_get_rcvif_NOMPSAFE(m); 516 /* check if received on a valid carp interface */ 517 if (rcvif->if_type != IFT_CARP) { 518 CARP_STATINC(CARP_STAT_BADIF); 519 CARP_LOG(sc, ("packet received on non-carp interface: %s", 520 rcvif->if_xname)); 521 m_freem(m); 522 return; 523 } 524 525 /* verify that the IP TTL is 255. */ 526 if (ip->ip_ttl != CARP_DFLTTL) { 527 CARP_STATINC(CARP_STAT_BADTTL); 528 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl, 529 CARP_DFLTTL, rcvif->if_xname)); 530 m_freem(m); 531 return; 532 } 533 534 /* 535 * verify that the received packet length is 536 * equal to the CARP header 537 */ 538 iplen = ip->ip_hl << 2; 539 len = iplen + sizeof(*ch); 540 if (len > m->m_pkthdr.len) { 541 CARP_STATINC(CARP_STAT_BADLEN); 542 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len, 543 rcvif->if_xname)); 544 m_freem(m); 545 return; 546 } 547 548 if ((m = m_pullup(m, len)) == NULL) { 549 CARP_STATINC(CARP_STAT_HDROPS); 550 return; 551 } 552 ip = mtod(m, struct ip *); 553 ch = (struct carp_header *)((char *)ip + iplen); 554 /* verify the CARP checksum */ 555 m->m_data += iplen; 556 if (carp_cksum(m, len - iplen)) { 557 CARP_STATINC(CARP_STAT_BADSUM); 558 CARP_LOG(sc, ("checksum failed on %s", 559 rcvif->if_xname)); 560 m_freem(m); 561 return; 562 } 563 m->m_data -= iplen; 564 565 carp_proto_input_c(m, ch, AF_INET); 566 } 567 568 void 569 carp_proto_input(struct mbuf *m, int off, int proto) 570 { 571 572 wqinput_input(carp_wqinput, m, 0, 0); 573 } 574 575 #ifdef INET6 576 static void 577 _carp6_proto_input(struct mbuf *m, int off, int proto) 578 { 579 struct carp_softc *sc = NULL; 580 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 581 struct carp_header *ch; 582 u_int len; 583 struct ifnet *rcvif; 584 585 CARP_STATINC(CARP_STAT_IPACKETS6); 586 MCLAIM(m, &carp_proto6_mowner_rx); 587 588 if (!carp_opts[CARPCTL_ALLOW]) { 589 m_freem(m); 590 return; 591 } 592 593 rcvif = m_get_rcvif_NOMPSAFE(m); 594 595 /* check if received on a valid carp interface */ 596 if (rcvif->if_type != IFT_CARP) { 597 CARP_STATINC(CARP_STAT_BADIF); 598 CARP_LOG(sc, ("packet received on non-carp interface: %s", 599 rcvif->if_xname)); 600 m_freem(m); 601 return; 602 } 603 604 /* verify that the IP TTL is 255 */ 605 if (ip6->ip6_hlim != CARP_DFLTTL) { 606 CARP_STATINC(CARP_STAT_BADTTL); 607 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, 608 CARP_DFLTTL, rcvif->if_xname)); 609 m_freem(m); 610 return; 611 } 612 613 /* verify that we have a complete carp packet */ 614 len = m->m_len; 615 M_REGION_GET(ch, struct carp_header *, m, off, sizeof(*ch)); 616 if (ch == NULL) { 617 CARP_STATINC(CARP_STAT_BADLEN); 618 CARP_LOG(sc, ("packet size %u too small", len)); 619 return; 620 } 621 622 /* verify the CARP checksum */ 623 if (carp6_cksum(m, off, sizeof(*ch))) { 624 CARP_STATINC(CARP_STAT_BADSUM); 625 CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname)); 626 m_freem(m); 627 return; 628 } 629 630 carp_proto_input_c(m, ch, AF_INET6); 631 return; 632 } 633 634 int 635 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 636 { 637 638 wqinput_input(carp6_wqinput, *mp, *offp, proto); 639 640 return IPPROTO_DONE; 641 } 642 #endif /* INET6 */ 643 644 static void 645 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 646 { 647 struct carp_softc *sc; 648 u_int64_t tmp_counter; 649 struct timeval sc_tv, ch_tv; 650 651 TAILQ_FOREACH(sc, &((struct carp_if *) 652 m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list) 653 if (sc->sc_vhid == ch->carp_vhid) 654 break; 655 656 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 657 (IFF_UP|IFF_RUNNING)) { 658 CARP_STATINC(CARP_STAT_BADVHID); 659 m_freem(m); 660 return; 661 } 662 663 /* 664 * Check if our own advertisement was duplicated 665 * from a non simplex interface. 666 * XXX If there is no address on our physical interface 667 * there is no way to distinguish our ads from the ones 668 * another carp host might have sent us. 669 */ 670 if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) { 671 struct sockaddr sa; 672 struct ifaddr *ifa; 673 int s; 674 675 memset(&sa, 0, sizeof(sa)); 676 sa.sa_family = af; 677 s = pserialize_read_enter(); 678 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 679 680 if (ifa && af == AF_INET) { 681 struct ip *ip = mtod(m, struct ip *); 682 if (ip->ip_src.s_addr == 683 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 684 pserialize_read_exit(s); 685 m_freem(m); 686 return; 687 } 688 } 689 #ifdef INET6 690 if (ifa && af == AF_INET6) { 691 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 692 struct in6_addr in6_src, in6_found; 693 694 in6_src = ip6->ip6_src; 695 in6_found = ifatoia6(ifa)->ia_addr.sin6_addr; 696 if (IN6_IS_ADDR_LINKLOCAL(&in6_src)) 697 in6_src.s6_addr16[1] = 0; 698 if (IN6_IS_ADDR_LINKLOCAL(&in6_found)) 699 in6_found.s6_addr16[1] = 0; 700 if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) { 701 pserialize_read_exit(s); 702 m_freem(m); 703 return; 704 } 705 } 706 #endif /* INET6 */ 707 pserialize_read_exit(s); 708 } 709 710 nanotime(&sc->sc_if.if_lastchange); 711 if_statadd2(&sc->sc_if, if_ipackets, 1, if_ibytes, m->m_pkthdr.len); 712 713 /* verify the CARP version. */ 714 if (ch->carp_version != CARP_VERSION) { 715 CARP_STATINC(CARP_STAT_BADVER); 716 if_statinc(&sc->sc_if, if_ierrors); 717 CARP_LOG(sc, ("invalid version %d != %d", 718 ch->carp_version, CARP_VERSION)); 719 m_freem(m); 720 return; 721 } 722 723 /* verify the hash */ 724 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 725 struct ip *ip; 726 char ipbuf[INET_ADDRSTRLEN]; 727 #ifdef INET6 728 struct ip6_hdr *ip6; 729 char ip6buf[INET6_ADDRSTRLEN]; 730 #endif 731 732 CARP_STATINC(CARP_STAT_BADAUTH); 733 if_statinc(&sc->sc_if, if_ierrors); 734 735 switch(af) { 736 case AF_INET: 737 ip = mtod(m, struct ip *); 738 CARP_LOG(sc, ("incorrect hash from %s", 739 IN_PRINT(ipbuf, &ip->ip_src))); 740 break; 741 742 #ifdef INET6 743 case AF_INET6: 744 ip6 = mtod(m, struct ip6_hdr *); 745 CARP_LOG(sc, ("incorrect hash from %s", 746 IN6_PRINT(ip6buf, &ip6->ip6_src))); 747 break; 748 #endif 749 750 default: CARP_LOG(sc, ("incorrect hash")); 751 break; 752 } 753 m_freem(m); 754 return; 755 } 756 757 tmp_counter = ntohl(ch->carp_counter[0]); 758 tmp_counter = tmp_counter<<32; 759 tmp_counter += ntohl(ch->carp_counter[1]); 760 761 /* XXX Replay protection goes here */ 762 763 sc->sc_init_counter = 0; 764 sc->sc_counter = tmp_counter; 765 766 767 sc_tv.tv_sec = sc->sc_advbase; 768 if (carp_suppress_preempt && sc->sc_advskew < 240) 769 sc_tv.tv_usec = 240 * 1000000 / 256; 770 else 771 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 772 ch_tv.tv_sec = ch->carp_advbase; 773 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 774 775 switch (sc->sc_state) { 776 case INIT: 777 break; 778 case MASTER: 779 /* 780 * If we receive an advertisement from a backup who's going to 781 * be more frequent than us, go into BACKUP state. 782 */ 783 if (timercmp(&sc_tv, &ch_tv, >) || 784 timercmp(&sc_tv, &ch_tv, ==)) { 785 callout_stop(&sc->sc_ad_tmo); 786 CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)")); 787 carp_set_state(sc, BACKUP); 788 carp_setrun(sc, 0); 789 carp_setroute(sc, RTM_DELETE); 790 } 791 break; 792 case BACKUP: 793 /* 794 * If we're pre-empting masters who advertise slower than us, 795 * and this one claims to be slower, treat him as down. 796 */ 797 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) { 798 CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)")); 799 carp_master_down(sc); 800 break; 801 } 802 803 /* 804 * If the master is going to advertise at such a low frequency 805 * that he's guaranteed to time out, we'd might as well just 806 * treat him as timed out now. 807 */ 808 sc_tv.tv_sec = sc->sc_advbase * 3; 809 if (timercmp(&sc_tv, &ch_tv, <)) { 810 CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)")); 811 carp_master_down(sc); 812 break; 813 } 814 815 /* 816 * Otherwise, we reset the counter and wait for the next 817 * advertisement. 818 */ 819 carp_setrun(sc, af); 820 break; 821 } 822 823 m_freem(m); 824 return; 825 } 826 827 /* 828 * Interface side of the CARP implementation. 829 */ 830 831 /* ARGSUSED */ 832 void 833 carpattach(int n) 834 { 835 if_clone_attach(&carp_cloner); 836 837 carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS); 838 } 839 840 static int 841 carp_clone_create(struct if_clone *ifc, int unit) 842 { 843 extern int ifqmaxlen; 844 struct carp_softc *sc; 845 struct ifnet *ifp; 846 847 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 848 if (!sc) 849 return (ENOMEM); 850 851 sc->sc_suppress = 0; 852 sc->sc_advbase = CARP_DFLTINTV; 853 sc->sc_vhid = -1; /* required setting */ 854 sc->sc_advskew = 0; 855 sc->sc_init_counter = 1; 856 sc->sc_naddrs = sc->sc_naddrs6 = 0; 857 #ifdef INET6 858 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 859 #endif /* INET6 */ 860 861 callout_init(&sc->sc_ad_tmo, 0); 862 callout_init(&sc->sc_md_tmo, 0); 863 callout_init(&sc->sc_md6_tmo, 0); 864 865 callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc); 866 callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc); 867 callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc); 868 869 LIST_INIT(&sc->carp_mc_listhead); 870 ifp = &sc->sc_if; 871 ifp->if_softc = sc; 872 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 873 unit); 874 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 875 ifp->if_ioctl = carp_ioctl; 876 ifp->if_start = carp_start; 877 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 878 IFQ_SET_READY(&ifp->if_snd); 879 if_initialize(ifp); 880 ether_ifattach(ifp, NULL); 881 /* Overwrite ethernet defaults */ 882 ifp->if_type = IFT_CARP; 883 ifp->if_output = carp_output; 884 ifp->if_link_state = LINK_STATE_DOWN; 885 carp_set_enaddr(sc); 886 if_register(ifp); 887 888 return (0); 889 } 890 891 static int 892 carp_clone_destroy(struct ifnet *ifp) 893 { 894 struct carp_softc *sc = ifp->if_softc; 895 896 carpdetach(ifp->if_softc); 897 ether_ifdetach(ifp); 898 if_detach(ifp); 899 callout_destroy(&sc->sc_ad_tmo); 900 callout_destroy(&sc->sc_md_tmo); 901 callout_destroy(&sc->sc_md6_tmo); 902 free(ifp->if_softc, M_DEVBUF); 903 904 return (0); 905 } 906 907 static void 908 carpdetach(struct carp_softc *sc) 909 { 910 struct ifnet *ifp; 911 struct carp_if *cif; 912 int s; 913 914 callout_stop(&sc->sc_ad_tmo); 915 callout_stop(&sc->sc_md_tmo); 916 callout_stop(&sc->sc_md6_tmo); 917 918 if (sc->sc_suppress) 919 carp_suppress_preempt--; 920 sc->sc_suppress = 0; 921 922 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 923 carp_suppress_preempt--; 924 sc->sc_sendad_errors = 0; 925 926 carp_set_state(sc, INIT); 927 sc->sc_if.if_flags &= ~IFF_UP; 928 carp_setrun(sc, 0); 929 carp_multicast_cleanup(sc); 930 931 KERNEL_LOCK(1, NULL); 932 s = splnet(); 933 ifp = sc->sc_carpdev; 934 if (ifp != NULL) { 935 if_linkstate_change_disestablish(ifp, 936 sc->sc_linkstate_hook, NULL); 937 938 cif = (struct carp_if *)ifp->if_carp; 939 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 940 if (!--cif->vhif_nvrs) { 941 ifpromisc(ifp, 0); 942 ifp->if_carp = NULL; 943 free(cif, M_IFADDR); 944 } 945 } 946 sc->sc_carpdev = NULL; 947 splx(s); 948 KERNEL_UNLOCK_ONE(NULL); 949 } 950 951 /* Detach an interface from the carp. */ 952 void 953 carp_ifdetach(struct ifnet *ifp) 954 { 955 struct carp_softc *sc, *nextsc; 956 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 957 958 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 959 nextsc = TAILQ_NEXT(sc, sc_list); 960 carpdetach(sc); 961 } 962 } 963 964 static void 965 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, 966 struct carp_header *ch) 967 { 968 if (sc->sc_init_counter) { 969 /* this could also be seconds since unix epoch */ 970 sc->sc_counter = cprng_fast64(); 971 } else 972 sc->sc_counter++; 973 974 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 975 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 976 977 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 978 } 979 980 static void 981 carp_send_ad_all(void) 982 { 983 struct ifnet *ifp; 984 struct carp_if *cif; 985 struct carp_softc *vh; 986 int s; 987 int bound = curlwp_bind(); 988 989 s = pserialize_read_enter(); 990 IFNET_READER_FOREACH(ifp) { 991 struct psref psref; 992 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 993 continue; 994 995 if_acquire(ifp, &psref); 996 pserialize_read_exit(s); 997 998 cif = (struct carp_if *)ifp->if_carp; 999 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1000 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1001 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER) 1002 carp_send_ad(vh); 1003 } 1004 1005 s = pserialize_read_enter(); 1006 if_release(ifp, &psref); 1007 } 1008 pserialize_read_exit(s); 1009 curlwp_bindx(bound); 1010 } 1011 1012 1013 static void 1014 carp_send_ad(void *v) 1015 { 1016 struct carp_header ch; 1017 struct timeval tv; 1018 struct carp_softc *sc = v; 1019 struct carp_header *ch_ptr; 1020 struct mbuf *m; 1021 int error, len, advbase, advskew, s; 1022 struct sockaddr sa; 1023 1024 KERNEL_LOCK(1, NULL); 1025 s = splsoftnet(); 1026 1027 advbase = advskew = 0; /* Sssssh compiler */ 1028 if (sc->sc_carpdev == NULL) { 1029 if_statinc(&sc->sc_if, if_oerrors); 1030 goto retry_later; 1031 } 1032 1033 /* bow out if we've gone to backup (the carp interface is going down) */ 1034 if (sc->sc_bow_out) { 1035 sc->sc_bow_out = 0; 1036 advbase = 255; 1037 advskew = 255; 1038 } else { 1039 advbase = sc->sc_advbase; 1040 if (!carp_suppress_preempt || sc->sc_advskew > 240) 1041 advskew = sc->sc_advskew; 1042 else 1043 advskew = 240; 1044 tv.tv_sec = advbase; 1045 tv.tv_usec = advskew * 1000000 / 256; 1046 } 1047 1048 ch.carp_version = CARP_VERSION; 1049 ch.carp_type = CARP_ADVERTISEMENT; 1050 ch.carp_vhid = sc->sc_vhid; 1051 ch.carp_advbase = advbase; 1052 ch.carp_advskew = advskew; 1053 ch.carp_authlen = 7; /* XXX DEFINE */ 1054 ch.carp_pad1 = 0; /* must be zero */ 1055 ch.carp_cksum = 0; 1056 1057 1058 #ifdef INET 1059 if (sc->sc_naddrs) { 1060 struct ip *ip; 1061 struct ifaddr *ifa; 1062 int _s; 1063 1064 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1065 if (m == NULL) { 1066 if_statinc(&sc->sc_if, if_oerrors); 1067 CARP_STATINC(CARP_STAT_ONOMEM); 1068 /* XXX maybe less ? */ 1069 goto retry_later; 1070 } 1071 MCLAIM(m, &carp_proto_mowner_tx); 1072 len = sizeof(*ip) + sizeof(ch); 1073 m->m_pkthdr.len = len; 1074 m_reset_rcvif(m); 1075 m->m_len = len; 1076 m_align(m, m->m_len); 1077 m->m_flags |= M_MCAST; 1078 ip = mtod(m, struct ip *); 1079 ip->ip_v = IPVERSION; 1080 ip->ip_hl = sizeof(*ip) >> 2; 1081 ip->ip_tos = IPTOS_LOWDELAY; 1082 ip->ip_len = htons(len); 1083 ip->ip_id = 0; /* no need for id, we don't support fragments */ 1084 ip->ip_off = htons(IP_DF); 1085 ip->ip_ttl = CARP_DFLTTL; 1086 ip->ip_p = IPPROTO_CARP; 1087 ip->ip_sum = 0; 1088 1089 memset(&sa, 0, sizeof(sa)); 1090 sa.sa_family = AF_INET; 1091 _s = pserialize_read_enter(); 1092 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1093 if (ifa == NULL) 1094 ip->ip_src.s_addr = 0; 1095 else 1096 ip->ip_src.s_addr = 1097 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1098 pserialize_read_exit(_s); 1099 ip->ip_dst.s_addr = INADDR_CARP_GROUP; 1100 1101 ch_ptr = (struct carp_header *)(&ip[1]); 1102 memcpy(ch_ptr, &ch, sizeof(ch)); 1103 carp_prepare_ad(m, sc, ch_ptr); 1104 1105 m->m_data += sizeof(*ip); 1106 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1107 m->m_data -= sizeof(*ip); 1108 1109 nanotime(&sc->sc_if.if_lastchange); 1110 if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len); 1111 CARP_STATINC(CARP_STAT_OPACKETS); 1112 1113 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1114 NULL); 1115 if (error) { 1116 if (error == ENOBUFS) 1117 CARP_STATINC(CARP_STAT_ONOMEM); 1118 else 1119 CARP_LOG(sc, ("ip_output failed: %d", error)); 1120 if_statinc(&sc->sc_if, if_oerrors); 1121 if (sc->sc_sendad_errors < INT_MAX) 1122 sc->sc_sendad_errors++; 1123 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1124 carp_suppress_preempt++; 1125 if (carp_suppress_preempt == 1) 1126 carp_send_ad_all(); 1127 } 1128 sc->sc_sendad_success = 0; 1129 } else { 1130 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1131 if (++sc->sc_sendad_success >= 1132 CARP_SENDAD_MIN_SUCCESS) { 1133 carp_suppress_preempt--; 1134 sc->sc_sendad_errors = 0; 1135 } 1136 } else 1137 sc->sc_sendad_errors = 0; 1138 } 1139 } 1140 #endif /* INET */ 1141 #ifdef INET6 1142 if (sc->sc_naddrs6) { 1143 struct ip6_hdr *ip6; 1144 struct ifaddr *ifa; 1145 int _s; 1146 1147 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1148 if (m == NULL) { 1149 if_statinc(&sc->sc_if, if_oerrors); 1150 CARP_STATINC(CARP_STAT_ONOMEM); 1151 /* XXX maybe less ? */ 1152 goto retry_later; 1153 } 1154 MCLAIM(m, &carp_proto6_mowner_tx); 1155 len = sizeof(*ip6) + sizeof(ch); 1156 m->m_pkthdr.len = len; 1157 m_reset_rcvif(m); 1158 m->m_len = len; 1159 m_align(m, m->m_len); 1160 m->m_flags |= M_MCAST; 1161 ip6 = mtod(m, struct ip6_hdr *); 1162 memset(ip6, 0, sizeof(*ip6)); 1163 ip6->ip6_vfc |= IPV6_VERSION; 1164 ip6->ip6_hlim = CARP_DFLTTL; 1165 ip6->ip6_nxt = IPPROTO_CARP; 1166 1167 /* set the source address */ 1168 memset(&sa, 0, sizeof(sa)); 1169 sa.sa_family = AF_INET6; 1170 _s = pserialize_read_enter(); 1171 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1172 if (ifa == NULL) /* This should never happen with IPv6 */ 1173 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr)); 1174 else 1175 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1176 &ip6->ip6_src, sizeof(struct in6_addr)); 1177 pserialize_read_exit(_s); 1178 /* set the multicast destination */ 1179 1180 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1181 ip6->ip6_dst.s6_addr8[15] = 0x12; 1182 if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) { 1183 if_statinc(&sc->sc_if, if_oerrors); 1184 m_freem(m); 1185 CARP_LOG(sc, ("in6_setscope failed")); 1186 goto retry_later; 1187 } 1188 1189 ch_ptr = (struct carp_header *)(&ip6[1]); 1190 memcpy(ch_ptr, &ch, sizeof(ch)); 1191 carp_prepare_ad(m, sc, ch_ptr); 1192 1193 ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6), 1194 len - sizeof(*ip6)); 1195 1196 nanotime(&sc->sc_if.if_lastchange); 1197 if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len); 1198 CARP_STATINC(CARP_STAT_OPACKETS6); 1199 1200 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1201 if (error) { 1202 if (error == ENOBUFS) 1203 CARP_STATINC(CARP_STAT_ONOMEM); 1204 else 1205 CARP_LOG(sc, ("ip6_output failed: %d", error)); 1206 if_statinc(&sc->sc_if, if_oerrors); 1207 if (sc->sc_sendad_errors < INT_MAX) 1208 sc->sc_sendad_errors++; 1209 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1210 carp_suppress_preempt++; 1211 if (carp_suppress_preempt == 1) 1212 carp_send_ad_all(); 1213 } 1214 sc->sc_sendad_success = 0; 1215 } else { 1216 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1217 if (++sc->sc_sendad_success >= 1218 CARP_SENDAD_MIN_SUCCESS) { 1219 carp_suppress_preempt--; 1220 sc->sc_sendad_errors = 0; 1221 } 1222 } else 1223 sc->sc_sendad_errors = 0; 1224 } 1225 } 1226 #endif /* INET6 */ 1227 1228 retry_later: 1229 splx(s); 1230 KERNEL_UNLOCK_ONE(NULL); 1231 if (advbase != 255 || advskew != 255) 1232 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1233 } 1234 1235 /* 1236 * Broadcast a gratuitous ARP request containing 1237 * the virtual router MAC address for each IP address 1238 * associated with the virtual router. 1239 */ 1240 static void 1241 carp_send_arp(struct carp_softc *sc) 1242 { 1243 struct ifaddr *ifa; 1244 int s, bound; 1245 1246 KERNEL_LOCK(1, NULL); 1247 bound = curlwp_bind(); 1248 s = pserialize_read_enter(); 1249 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1250 struct psref psref; 1251 1252 if (ifa->ifa_addr->sa_family != AF_INET) 1253 continue; 1254 1255 ifa_acquire(ifa, &psref); 1256 pserialize_read_exit(s); 1257 1258 arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl)); 1259 1260 s = pserialize_read_enter(); 1261 ifa_release(ifa, &psref); 1262 } 1263 pserialize_read_exit(s); 1264 curlwp_bindx(bound); 1265 KERNEL_UNLOCK_ONE(NULL); 1266 } 1267 1268 #ifdef INET6 1269 static void 1270 carp_send_na(struct carp_softc *sc) 1271 { 1272 struct ifaddr *ifa; 1273 struct in6_addr *in6; 1274 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1275 int s, bound; 1276 1277 KERNEL_LOCK(1, NULL); 1278 bound = curlwp_bind(); 1279 s = pserialize_read_enter(); 1280 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1281 struct psref psref; 1282 1283 if (ifa->ifa_addr->sa_family != AF_INET6) 1284 continue; 1285 1286 ifa_acquire(ifa, &psref); 1287 pserialize_read_exit(s); 1288 1289 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1290 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1291 ND_NA_FLAG_OVERRIDE, 1, NULL); 1292 1293 s = pserialize_read_enter(); 1294 ifa_release(ifa, &psref); 1295 } 1296 pserialize_read_exit(s); 1297 curlwp_bindx(bound); 1298 KERNEL_UNLOCK_ONE(NULL); 1299 } 1300 #endif /* INET6 */ 1301 1302 /* 1303 * Based on bridge_hash() in if_bridge.c 1304 */ 1305 #define mix(a,b,c) \ 1306 do { \ 1307 a -= b; a -= c; a ^= (c >> 13); \ 1308 b -= c; b -= a; b ^= (a << 8); \ 1309 c -= a; c -= b; c ^= (b >> 13); \ 1310 a -= b; a -= c; a ^= (c >> 12); \ 1311 b -= c; b -= a; b ^= (a << 16); \ 1312 c -= a; c -= b; c ^= (b >> 5); \ 1313 a -= b; a -= c; a ^= (c >> 3); \ 1314 b -= c; b -= a; b ^= (a << 10); \ 1315 c -= a; c -= b; c ^= (b >> 15); \ 1316 } while (0) 1317 1318 static u_int32_t 1319 carp_hash(struct carp_softc *sc, u_char *src) 1320 { 1321 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1322 1323 c += sc->sc_key[3] << 24; 1324 c += sc->sc_key[2] << 16; 1325 c += sc->sc_key[1] << 8; 1326 c += sc->sc_key[0]; 1327 b += src[5] << 8; 1328 b += src[4]; 1329 a += src[3] << 24; 1330 a += src[2] << 16; 1331 a += src[1] << 8; 1332 a += src[0]; 1333 1334 mix(a, b, c); 1335 return (c); 1336 } 1337 1338 static int 1339 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1340 { 1341 struct carp_softc *vh; 1342 struct ifaddr *ifa; 1343 int count = 0; 1344 1345 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1346 if ((type == CARP_COUNT_RUNNING && 1347 (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1348 (IFF_UP|IFF_RUNNING)) || 1349 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1350 int s = pserialize_read_enter(); 1351 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1352 if (ifa->ifa_addr->sa_family == AF_INET && 1353 ia->ia_addr.sin_addr.s_addr == 1354 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1355 count++; 1356 } 1357 pserialize_read_exit(s); 1358 } 1359 } 1360 return (count); 1361 } 1362 1363 int 1364 carp_iamatch(struct in_ifaddr *ia, u_char *src, 1365 u_int32_t *count, u_int32_t index) 1366 { 1367 struct carp_softc *sc = ia->ia_ifp->if_softc; 1368 1369 if (carp_opts[CARPCTL_ARPBALANCE]) { 1370 /* 1371 * We use the source ip to decide which virtual host should 1372 * handle the request. If we're master of that virtual host, 1373 * then we respond, otherwise, just drop the arp packet on 1374 * the floor. 1375 */ 1376 1377 /* Count the elegible carp interfaces with this address */ 1378 if (*count == 0) 1379 *count = carp_addrcount( 1380 (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp, 1381 ia, CARP_COUNT_RUNNING); 1382 1383 /* This should never happen, but... */ 1384 if (*count == 0) 1385 return (0); 1386 1387 if (carp_hash(sc, src) % *count == index - 1 && 1388 sc->sc_state == MASTER) { 1389 return (1); 1390 } 1391 } else { 1392 if (sc->sc_state == MASTER) 1393 return (1); 1394 } 1395 1396 return (0); 1397 } 1398 1399 #ifdef INET6 1400 struct ifaddr * 1401 carp_iamatch6(void *v, struct in6_addr *taddr) 1402 { 1403 struct carp_if *cif = v; 1404 struct carp_softc *vh; 1405 struct ifaddr *ifa; 1406 1407 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1408 int s = pserialize_read_enter(); 1409 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1410 if (IN6_ARE_ADDR_EQUAL(taddr, 1411 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1412 ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1413 (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER) 1414 return (ifa); 1415 } 1416 pserialize_read_exit(s); 1417 } 1418 1419 return (NULL); 1420 } 1421 #endif /* INET6 */ 1422 1423 struct ifnet * 1424 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src) 1425 { 1426 struct carp_if *cif = (struct carp_if *)v; 1427 struct carp_softc *vh; 1428 u_int8_t *ena; 1429 1430 if (src) 1431 ena = (u_int8_t *)&eh->ether_shost; 1432 else 1433 ena = (u_int8_t *)&eh->ether_dhost; 1434 1435 switch (iftype) { 1436 case IFT_ETHER: 1437 case IFT_FDDI: 1438 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1439 return (NULL); 1440 break; 1441 case IFT_ISO88025: 1442 if (ena[0] != 3 || ena[1] || ena[4] || ena[5]) 1443 return (NULL); 1444 break; 1445 default: 1446 return (NULL); 1447 break; 1448 } 1449 1450 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1451 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1452 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && 1453 !memcmp(ena, CLLADDR(vh->sc_if.if_sadl), 1454 ETHER_ADDR_LEN)) { 1455 return (&vh->sc_if); 1456 } 1457 1458 return (NULL); 1459 } 1460 1461 int 1462 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) 1463 { 1464 struct ether_header eh; 1465 struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp; 1466 struct ifnet *ifp; 1467 1468 memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost)); 1469 memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost)); 1470 eh.ether_type = etype; 1471 1472 if (m->m_flags & (M_BCAST|M_MCAST)) { 1473 struct carp_softc *vh; 1474 struct mbuf *m0; 1475 1476 /* 1477 * XXX Should really check the list of multicast addresses 1478 * for each CARP interface _before_ copying. 1479 */ 1480 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1481 m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT); 1482 if (m0 == NULL) 1483 continue; 1484 m_set_rcvif(m0, &vh->sc_if); 1485 ether_input(&vh->sc_if, m0); 1486 } 1487 return (1); 1488 } 1489 1490 ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0); 1491 if (ifp == NULL) { 1492 return (1); 1493 } 1494 1495 m_set_rcvif(m, ifp); 1496 1497 bpf_mtap(ifp, m, BPF_D_IN); 1498 if_statinc(ifp, if_ipackets); 1499 ether_input(ifp, m); 1500 return (0); 1501 } 1502 1503 static void 1504 carp_master_down(void *v) 1505 { 1506 struct carp_softc *sc = v; 1507 1508 switch (sc->sc_state) { 1509 case INIT: 1510 printf("%s: master_down event in INIT state\n", 1511 sc->sc_if.if_xname); 1512 break; 1513 case MASTER: 1514 break; 1515 case BACKUP: 1516 CARP_LOG(sc, ("INIT -> MASTER (preempting)")); 1517 carp_set_state(sc, MASTER); 1518 carp_send_ad(sc); 1519 carp_send_arp(sc); 1520 #ifdef INET6 1521 carp_send_na(sc); 1522 #endif /* INET6 */ 1523 carp_setrun(sc, 0); 1524 carp_setroute(sc, RTM_ADD); 1525 break; 1526 } 1527 } 1528 1529 /* 1530 * When in backup state, af indicates whether to reset the master down timer 1531 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1532 */ 1533 static void 1534 carp_setrun(struct carp_softc *sc, sa_family_t af) 1535 { 1536 struct timeval tv; 1537 1538 if (sc->sc_carpdev == NULL) { 1539 sc->sc_if.if_flags &= ~IFF_RUNNING; 1540 carp_set_state(sc, INIT); 1541 return; 1542 } 1543 1544 if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 && 1545 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1546 sc->sc_if.if_flags |= IFF_RUNNING; 1547 } else { 1548 sc->sc_if.if_flags &= ~IFF_RUNNING; 1549 carp_setroute(sc, RTM_DELETE); 1550 return; 1551 } 1552 1553 switch (sc->sc_state) { 1554 case INIT: 1555 carp_set_state(sc, BACKUP); 1556 carp_setroute(sc, RTM_DELETE); 1557 carp_setrun(sc, 0); 1558 break; 1559 case BACKUP: 1560 callout_stop(&sc->sc_ad_tmo); 1561 tv.tv_sec = 3 * sc->sc_advbase; 1562 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1563 switch (af) { 1564 #ifdef INET 1565 case AF_INET: 1566 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1567 break; 1568 #endif /* INET */ 1569 #ifdef INET6 1570 case AF_INET6: 1571 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1572 break; 1573 #endif /* INET6 */ 1574 default: 1575 if (sc->sc_naddrs) 1576 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1577 #ifdef INET6 1578 if (sc->sc_naddrs6) 1579 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1580 #endif /* INET6 */ 1581 break; 1582 } 1583 break; 1584 case MASTER: 1585 tv.tv_sec = sc->sc_advbase; 1586 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1587 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1588 break; 1589 } 1590 } 1591 1592 static void 1593 carp_multicast_cleanup(struct carp_softc *sc) 1594 { 1595 struct ip_moptions *imo = &sc->sc_imo; 1596 #ifdef INET6 1597 struct ip6_moptions *im6o = &sc->sc_im6o; 1598 #endif 1599 u_int16_t n = imo->imo_num_memberships; 1600 1601 /* Clean up our own multicast memberships */ 1602 while (n-- > 0) { 1603 if (imo->imo_membership[n] != NULL) { 1604 in_delmulti(imo->imo_membership[n]); 1605 imo->imo_membership[n] = NULL; 1606 } 1607 } 1608 imo->imo_num_memberships = 0; 1609 imo->imo_multicast_if_index = 0; 1610 1611 #ifdef INET6 1612 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1613 struct in6_multi_mship *imm = 1614 LIST_FIRST(&im6o->im6o_memberships); 1615 1616 LIST_REMOVE(imm, i6mm_chain); 1617 in6_leavegroup(imm); 1618 } 1619 im6o->im6o_multicast_if_index = 0; 1620 #endif 1621 1622 /* And any other multicast memberships */ 1623 carp_ether_purgemulti(sc); 1624 } 1625 1626 static int 1627 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1628 { 1629 struct carp_if *cif, *ncif = NULL; 1630 struct carp_softc *vr, *after = NULL; 1631 int myself = 0, error = 0; 1632 int s; 1633 1634 if (ifp == sc->sc_carpdev) 1635 return (0); 1636 1637 if (ifp != NULL) { 1638 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1639 return (EADDRNOTAVAIL); 1640 1641 if (ifp->if_type == IFT_CARP) 1642 return (EINVAL); 1643 1644 if (ifp->if_carp == NULL) { 1645 ncif = malloc(sizeof(*cif), M_IFADDR, M_WAITOK); 1646 if ((error = ifpromisc(ifp, 1))) { 1647 free(ncif, M_IFADDR); 1648 return (error); 1649 } 1650 1651 ncif->vhif_ifp = ifp; 1652 TAILQ_INIT(&ncif->vhif_vrs); 1653 } else { 1654 cif = (struct carp_if *)ifp->if_carp; 1655 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1656 if (vr != sc && vr->sc_vhid == sc->sc_vhid) 1657 return (EINVAL); 1658 } 1659 1660 /* detach from old interface */ 1661 if (sc->sc_carpdev != NULL) 1662 carpdetach(sc); 1663 1664 /* join multicast groups */ 1665 if (sc->sc_naddrs < 0 && 1666 (error = carp_join_multicast(sc)) != 0) { 1667 if (ncif != NULL) 1668 free(ncif, M_IFADDR); 1669 return (error); 1670 } 1671 1672 #ifdef INET6 1673 if (sc->sc_naddrs6 < 0 && 1674 (error = carp_join_multicast6(sc)) != 0) { 1675 if (ncif != NULL) 1676 free(ncif, M_IFADDR); 1677 carp_multicast_cleanup(sc); 1678 return (error); 1679 } 1680 #endif 1681 1682 /* attach carp interface to physical interface */ 1683 if (ncif != NULL) 1684 ifp->if_carp = (void *)ncif; 1685 sc->sc_carpdev = ifp; 1686 sc->sc_if.if_capabilities = ifp->if_capabilities & 1687 (IFCAP_TSOv4 | IFCAP_TSOv6 | 1688 IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx| 1689 IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx| 1690 IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx| 1691 IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx| 1692 IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx); 1693 1694 cif = (struct carp_if *)ifp->if_carp; 1695 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1696 if (vr == sc) 1697 myself = 1; 1698 if (vr->sc_vhid < sc->sc_vhid) 1699 after = vr; 1700 } 1701 1702 if (!myself) { 1703 /* We're trying to keep things in order */ 1704 if (after == NULL) { 1705 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1706 } else { 1707 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1708 sc, sc_list); 1709 } 1710 cif->vhif_nvrs++; 1711 } 1712 if (sc->sc_naddrs || sc->sc_naddrs6) 1713 sc->sc_if.if_flags |= IFF_UP; 1714 carp_set_enaddr(sc); 1715 sc->sc_linkstate_hook = if_linkstate_change_establish(ifp, 1716 carp_carpdev_state, (void *)ifp); 1717 KERNEL_LOCK(1, NULL); 1718 s = splnet(); 1719 carp_carpdev_state(ifp); 1720 splx(s); 1721 KERNEL_UNLOCK_ONE(NULL); 1722 } else { 1723 carpdetach(sc); 1724 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1725 } 1726 return (0); 1727 } 1728 1729 static void 1730 carp_set_enaddr(struct carp_softc *sc) 1731 { 1732 struct ifnet *ifp = &sc->sc_if; 1733 uint8_t enaddr[ETHER_ADDR_LEN]; 1734 1735 if (sc->sc_vhid == -1) { 1736 ifp->if_addrlen = 0; 1737 if_alloc_sadl(ifp); 1738 return; 1739 } 1740 1741 if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) { 1742 enaddr[0] = 3; 1743 enaddr[1] = 0; 1744 enaddr[2] = 0x40 >> (sc->sc_vhid - 1); 1745 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1); 1746 enaddr[4] = 0; 1747 enaddr[5] = 0; 1748 } else { 1749 enaddr[0] = 0; 1750 enaddr[1] = 0; 1751 enaddr[2] = 0x5e; 1752 enaddr[3] = 0; 1753 enaddr[4] = 1; 1754 enaddr[5] = sc->sc_vhid; 1755 } 1756 1757 if_set_sadl(ifp, enaddr, sizeof(enaddr), false); 1758 } 1759 1760 #if 0 1761 static void 1762 carp_addr_updated(void *v) 1763 { 1764 struct carp_softc *sc = (struct carp_softc *) v; 1765 struct ifaddr *ifa; 1766 int new_naddrs = 0, new_naddrs6 = 0; 1767 1768 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1769 if (ifa->ifa_addr->sa_family == AF_INET) 1770 new_naddrs++; 1771 else if (ifa->ifa_addr->sa_family == AF_INET6) 1772 new_naddrs6++; 1773 } 1774 1775 /* Handle a callback after SIOCDIFADDR */ 1776 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) { 1777 struct in_addr mc_addr; 1778 1779 sc->sc_naddrs = new_naddrs; 1780 sc->sc_naddrs6 = new_naddrs6; 1781 1782 /* Re-establish multicast membership removed by in_control */ 1783 mc_addr.s_addr = INADDR_CARP_GROUP; 1784 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) { 1785 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1786 1787 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1788 carp_join_multicast(sc); 1789 } 1790 1791 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1792 sc->sc_if.if_flags &= ~IFF_UP; 1793 carp_set_state(sc, INIT); 1794 } else 1795 carp_hmac_prepare(sc); 1796 } 1797 1798 carp_setrun(sc, 0); 1799 } 1800 #endif 1801 1802 static int 1803 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1804 { 1805 struct ifnet *ifp = sc->sc_carpdev; 1806 struct in_ifaddr *ia, *ia_if; 1807 int error = 0; 1808 int s; 1809 1810 if (sin->sin_addr.s_addr == 0) { 1811 if (!(sc->sc_if.if_flags & IFF_UP)) 1812 carp_set_state(sc, INIT); 1813 if (sc->sc_naddrs) 1814 sc->sc_if.if_flags |= IFF_UP; 1815 carp_setrun(sc, 0); 1816 return (0); 1817 } 1818 1819 /* we have to do this by hand to ensure we don't match on ourselves */ 1820 ia_if = NULL; 1821 s = pserialize_read_enter(); 1822 IN_ADDRLIST_READER_FOREACH(ia) { 1823 /* and, yeah, we need a multicast-capable iface too */ 1824 if (ia->ia_ifp != &sc->sc_if && 1825 ia->ia_ifp->if_type != IFT_CARP && 1826 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1827 (sin->sin_addr.s_addr & ia->ia_subnetmask) == 1828 ia->ia_subnet) { 1829 if (!ia_if) 1830 ia_if = ia; 1831 } 1832 } 1833 1834 if (ia_if) { 1835 ia = ia_if; 1836 if (ifp) { 1837 if (ifp != ia->ia_ifp) 1838 return (EADDRNOTAVAIL); 1839 } else { 1840 /* FIXME NOMPSAFE */ 1841 ifp = ia->ia_ifp; 1842 } 1843 } 1844 pserialize_read_exit(s); 1845 1846 if ((error = carp_set_ifp(sc, ifp))) 1847 return (error); 1848 1849 if (sc->sc_carpdev == NULL) 1850 return (EADDRNOTAVAIL); 1851 1852 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1853 return (error); 1854 1855 sc->sc_naddrs++; 1856 if (sc->sc_carpdev != NULL) 1857 sc->sc_if.if_flags |= IFF_UP; 1858 1859 carp_set_state(sc, INIT); 1860 carp_setrun(sc, 0); 1861 1862 /* 1863 * Hook if_addrhooks so that we get a callback after in_ifinit has run, 1864 * to correct any inappropriate routes that it inserted. 1865 */ 1866 if (sc->ah_cookie == 0) { 1867 /* XXX link address hook */ 1868 } 1869 1870 return (0); 1871 } 1872 1873 static int 1874 carp_join_multicast(struct carp_softc *sc) 1875 { 1876 struct ip_moptions *imo = &sc->sc_imo, tmpimo; 1877 struct in_addr addr; 1878 1879 memset(&tmpimo, 0, sizeof(tmpimo)); 1880 addr.s_addr = INADDR_CARP_GROUP; 1881 if ((tmpimo.imo_membership[0] = 1882 in_addmulti(&addr, &sc->sc_if)) == NULL) { 1883 return (ENOBUFS); 1884 } 1885 1886 imo->imo_membership[0] = tmpimo.imo_membership[0]; 1887 imo->imo_num_memberships = 1; 1888 imo->imo_multicast_if_index = sc->sc_if.if_index; 1889 imo->imo_multicast_ttl = CARP_DFLTTL; 1890 imo->imo_multicast_loop = 0; 1891 return (0); 1892 } 1893 1894 1895 #ifdef INET6 1896 static int 1897 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1898 { 1899 struct ifnet *ifp = sc->sc_carpdev; 1900 struct in6_ifaddr *ia, *ia_if; 1901 int error = 0; 1902 int s; 1903 1904 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1905 if (!(sc->sc_if.if_flags & IFF_UP)) 1906 carp_set_state(sc, INIT); 1907 if (sc->sc_naddrs6) 1908 sc->sc_if.if_flags |= IFF_UP; 1909 carp_setrun(sc, 0); 1910 return (0); 1911 } 1912 1913 /* we have to do this by hand to ensure we don't match on ourselves */ 1914 ia_if = NULL; 1915 s = pserialize_read_enter(); 1916 IN6_ADDRLIST_READER_FOREACH(ia) { 1917 int i; 1918 1919 for (i = 0; i < 4; i++) { 1920 if ((sin6->sin6_addr.s6_addr32[i] & 1921 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1922 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1923 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1924 break; 1925 } 1926 /* and, yeah, we need a multicast-capable iface too */ 1927 if (ia->ia_ifp != &sc->sc_if && 1928 ia->ia_ifp->if_type != IFT_CARP && 1929 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1930 (i == 4)) { 1931 if (!ia_if) 1932 ia_if = ia; 1933 } 1934 } 1935 pserialize_read_exit(s); 1936 1937 if (ia_if) { 1938 ia = ia_if; 1939 if (sc->sc_carpdev) { 1940 if (sc->sc_carpdev != ia->ia_ifp) 1941 return (EADDRNOTAVAIL); 1942 } else { 1943 ifp = ia->ia_ifp; 1944 } 1945 } 1946 1947 if ((error = carp_set_ifp(sc, ifp))) 1948 return (error); 1949 1950 if (sc->sc_carpdev == NULL) 1951 return (EADDRNOTAVAIL); 1952 1953 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1954 return (error); 1955 1956 sc->sc_naddrs6++; 1957 if (sc->sc_carpdev != NULL) 1958 sc->sc_if.if_flags |= IFF_UP; 1959 carp_set_state(sc, INIT); 1960 carp_setrun(sc, 0); 1961 1962 return (0); 1963 } 1964 1965 static int 1966 carp_join_multicast6(struct carp_softc *sc) 1967 { 1968 struct in6_multi_mship *imm, *imm2; 1969 struct ip6_moptions *im6o = &sc->sc_im6o; 1970 struct sockaddr_in6 addr6; 1971 int error; 1972 1973 /* Join IPv6 CARP multicast group */ 1974 memset(&addr6, 0, sizeof(addr6)); 1975 addr6.sin6_family = AF_INET6; 1976 addr6.sin6_len = sizeof(addr6); 1977 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1978 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1979 addr6.sin6_addr.s6_addr8[15] = 0x12; 1980 if ((imm = in6_joingroup(&sc->sc_if, 1981 &addr6.sin6_addr, &error, 0)) == NULL) { 1982 return (error); 1983 } 1984 /* join solicited multicast address */ 1985 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1986 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1987 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1988 addr6.sin6_addr.s6_addr32[1] = 0; 1989 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1990 addr6.sin6_addr.s6_addr32[3] = 0; 1991 addr6.sin6_addr.s6_addr8[12] = 0xff; 1992 if ((imm2 = in6_joingroup(&sc->sc_if, 1993 &addr6.sin6_addr, &error, 0)) == NULL) { 1994 in6_leavegroup(imm); 1995 return (error); 1996 } 1997 1998 /* apply v6 multicast membership */ 1999 im6o->im6o_multicast_if_index = sc->sc_if.if_index; 2000 if (imm) 2001 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2002 i6mm_chain); 2003 if (imm2) 2004 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2005 i6mm_chain); 2006 2007 return (0); 2008 } 2009 2010 #endif /* INET6 */ 2011 2012 static int 2013 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data) 2014 { 2015 struct lwp *l = curlwp; /* XXX */ 2016 struct carp_softc *sc = ifp->if_softc, *vr; 2017 struct carpreq carpr; 2018 struct ifaddr *ifa; 2019 struct ifreq *ifr; 2020 struct ifnet *cdev = NULL; 2021 int error = 0; 2022 2023 ifa = (struct ifaddr *)data; 2024 ifr = (struct ifreq *)data; 2025 2026 switch (cmd) { 2027 case SIOCINITIFADDR: 2028 switch (ifa->ifa_addr->sa_family) { 2029 #ifdef INET 2030 case AF_INET: 2031 sc->sc_if.if_flags |= IFF_UP; 2032 memcpy(ifa->ifa_dstaddr, ifa->ifa_addr, 2033 sizeof(struct sockaddr)); 2034 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2035 break; 2036 #endif /* INET */ 2037 #ifdef INET6 2038 case AF_INET6: 2039 sc->sc_if.if_flags|= IFF_UP; 2040 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2041 break; 2042 #endif /* INET6 */ 2043 default: 2044 error = EAFNOSUPPORT; 2045 break; 2046 } 2047 break; 2048 2049 case SIOCSIFFLAGS: 2050 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 2051 break; 2052 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2053 callout_stop(&sc->sc_ad_tmo); 2054 callout_stop(&sc->sc_md_tmo); 2055 callout_stop(&sc->sc_md6_tmo); 2056 if (sc->sc_state == MASTER) { 2057 /* we need the interface up to bow out */ 2058 sc->sc_if.if_flags |= IFF_UP; 2059 sc->sc_bow_out = 1; 2060 carp_send_ad(sc); 2061 } 2062 sc->sc_if.if_flags &= ~IFF_UP; 2063 carp_set_state(sc, INIT); 2064 carp_setrun(sc, 0); 2065 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 2066 sc->sc_if.if_flags |= IFF_UP; 2067 carp_setrun(sc, 0); 2068 } 2069 carp_update_link_state(sc); 2070 break; 2071 2072 case SIOCSVH: 2073 if (l == NULL) 2074 break; 2075 if ((error = kauth_authorize_network(l->l_cred, 2076 KAUTH_NETWORK_INTERFACE, 2077 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2078 NULL)) != 0) 2079 break; 2080 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2081 break; 2082 error = 1; 2083 if (carpr.carpr_carpdev[0] != '\0' && 2084 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2085 return (EINVAL); 2086 if ((error = carp_set_ifp(sc, cdev))) 2087 return (error); 2088 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 2089 switch (carpr.carpr_state) { 2090 case BACKUP: 2091 callout_stop(&sc->sc_ad_tmo); 2092 carp_set_state(sc, BACKUP); 2093 carp_setrun(sc, 0); 2094 carp_setroute(sc, RTM_DELETE); 2095 break; 2096 case MASTER: 2097 carp_master_down(sc); 2098 break; 2099 default: 2100 break; 2101 } 2102 } 2103 if (carpr.carpr_vhid > 0) { 2104 if (carpr.carpr_vhid > 255) { 2105 error = EINVAL; 2106 break; 2107 } 2108 if (sc->sc_carpdev) { 2109 struct carp_if *cif; 2110 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2111 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 2112 if (vr != sc && 2113 vr->sc_vhid == carpr.carpr_vhid) 2114 return (EINVAL); 2115 } 2116 sc->sc_vhid = carpr.carpr_vhid; 2117 carp_set_enaddr(sc); 2118 carp_set_state(sc, INIT); 2119 error--; 2120 } 2121 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 2122 if (carpr.carpr_advskew > 254) { 2123 error = EINVAL; 2124 break; 2125 } 2126 if (carpr.carpr_advbase > 255) { 2127 error = EINVAL; 2128 break; 2129 } 2130 sc->sc_advbase = carpr.carpr_advbase; 2131 sc->sc_advskew = carpr.carpr_advskew; 2132 error--; 2133 } 2134 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key)); 2135 if (error > 0) 2136 error = EINVAL; 2137 else { 2138 error = 0; 2139 carp_setrun(sc, 0); 2140 } 2141 break; 2142 2143 case SIOCGVH: 2144 memset(&carpr, 0, sizeof(carpr)); 2145 if (sc->sc_carpdev != NULL) 2146 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2147 IFNAMSIZ); 2148 carpr.carpr_state = sc->sc_state; 2149 carpr.carpr_vhid = sc->sc_vhid; 2150 carpr.carpr_advbase = sc->sc_advbase; 2151 carpr.carpr_advskew = sc->sc_advskew; 2152 2153 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred, 2154 KAUTH_NETWORK_INTERFACE, 2155 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2156 NULL)) == 0) 2157 memcpy(carpr.carpr_key, sc->sc_key, 2158 sizeof(carpr.carpr_key)); 2159 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2160 break; 2161 2162 case SIOCADDMULTI: 2163 error = carp_ether_addmulti(sc, ifr); 2164 break; 2165 2166 case SIOCDELMULTI: 2167 error = carp_ether_delmulti(sc, ifr); 2168 break; 2169 2170 case SIOCSIFCAP: 2171 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 2172 error = 0; 2173 break; 2174 2175 default: 2176 error = ether_ioctl(ifp, cmd, data); 2177 } 2178 2179 carp_hmac_prepare(sc); 2180 return (error); 2181 } 2182 2183 2184 /* 2185 * Start output on carp interface. This function should never be called. 2186 */ 2187 static void 2188 carp_start(struct ifnet *ifp) 2189 { 2190 #ifdef DEBUG 2191 printf("%s: start called\n", ifp->if_xname); 2192 #endif 2193 } 2194 2195 int 2196 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa, 2197 const struct rtentry *rt) 2198 { 2199 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2200 KASSERT(KERNEL_LOCKED_P()); 2201 2202 if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) { 2203 return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt); 2204 } else { 2205 m_freem(m); 2206 return (ENETUNREACH); 2207 } 2208 } 2209 2210 static void 2211 carp_set_state(struct carp_softc *sc, int state) 2212 { 2213 static const char *carp_states[] = { CARP_STATES }; 2214 2215 if (sc->sc_state == state) 2216 return; 2217 2218 CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state])); 2219 2220 sc->sc_state = state; 2221 carp_update_link_state(sc); 2222 } 2223 2224 static void 2225 carp_update_link_state(struct carp_softc *sc) 2226 { 2227 int link_state; 2228 2229 switch (sc->sc_state) { 2230 case BACKUP: 2231 link_state = LINK_STATE_DOWN; 2232 break; 2233 case MASTER: 2234 link_state = LINK_STATE_UP; 2235 break; 2236 default: 2237 /* Not useable, so down makes perfect sense. */ 2238 link_state = LINK_STATE_DOWN; 2239 break; 2240 } 2241 if_link_state_change(&sc->sc_if, link_state); 2242 } 2243 2244 void 2245 carp_carpdev_state(void *v) 2246 { 2247 struct carp_if *cif; 2248 struct carp_softc *sc; 2249 struct ifnet *ifp = v; 2250 2251 if (ifp->if_type == IFT_CARP) 2252 return; 2253 2254 cif = (struct carp_if *)ifp->if_carp; 2255 2256 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2257 int suppressed = sc->sc_suppress; 2258 2259 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2260 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2261 sc->sc_if.if_flags &= ~IFF_RUNNING; 2262 callout_stop(&sc->sc_ad_tmo); 2263 callout_stop(&sc->sc_md_tmo); 2264 callout_stop(&sc->sc_md6_tmo); 2265 carp_set_state(sc, INIT); 2266 sc->sc_suppress = 1; 2267 carp_setrun(sc, 0); 2268 if (!suppressed) { 2269 carp_suppress_preempt++; 2270 if (carp_suppress_preempt == 1) 2271 carp_send_ad_all(); 2272 } 2273 } else { 2274 carp_set_state(sc, INIT); 2275 sc->sc_suppress = 0; 2276 carp_setrun(sc, 0); 2277 if (suppressed) 2278 carp_suppress_preempt--; 2279 } 2280 } 2281 } 2282 2283 static int 2284 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2285 { 2286 const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr); 2287 struct ifnet *ifp; 2288 struct carp_mc_entry *mc; 2289 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2290 int error; 2291 2292 ifp = sc->sc_carpdev; 2293 if (ifp == NULL) 2294 return (EINVAL); 2295 2296 error = ether_addmulti(sa, &sc->sc_ac); 2297 if (error != ENETRESET) 2298 return (error); 2299 2300 /* 2301 * This is new multicast address. We have to tell parent 2302 * about it. Also, remember this multicast address so that 2303 * we can delete them on unconfigure. 2304 */ 2305 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2306 if (mc == NULL) { 2307 error = ENOMEM; 2308 goto alloc_failed; 2309 } 2310 2311 /* 2312 * As ether_addmulti() returns ENETRESET, following two 2313 * statement shouldn't fail. 2314 */ 2315 (void)ether_multiaddr(sa, addrlo, addrhi); 2316 2317 ETHER_LOCK(&sc->sc_ac); 2318 mc->mc_enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac); 2319 ETHER_UNLOCK(&sc->sc_ac); 2320 2321 memcpy(&mc->mc_addr, sa, sa->sa_len); 2322 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2323 2324 error = if_mcast_op(ifp, SIOCADDMULTI, sa); 2325 if (error != 0) 2326 goto ioctl_failed; 2327 2328 return (error); 2329 2330 ioctl_failed: 2331 LIST_REMOVE(mc, mc_entries); 2332 free(mc, M_DEVBUF); 2333 alloc_failed: 2334 (void)ether_delmulti(sa, &sc->sc_ac); 2335 2336 return (error); 2337 } 2338 2339 static int 2340 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2341 { 2342 const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr); 2343 struct ifnet *ifp; 2344 struct ether_multi *enm; 2345 struct carp_mc_entry *mc; 2346 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2347 int error; 2348 2349 ifp = sc->sc_carpdev; 2350 if (ifp == NULL) 2351 return (EINVAL); 2352 2353 /* 2354 * Find a key to lookup carp_mc_entry. We have to do this 2355 * before calling ether_delmulti for obvious reason. 2356 */ 2357 if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0) 2358 return (error); 2359 2360 ETHER_LOCK(&sc->sc_ac); 2361 enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac); 2362 ETHER_UNLOCK(&sc->sc_ac); 2363 if (enm == NULL) 2364 return (EINVAL); 2365 2366 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2367 if (mc->mc_enm == enm) 2368 break; 2369 2370 /* We won't delete entries we didn't add */ 2371 if (mc == NULL) 2372 return (EINVAL); 2373 2374 error = ether_delmulti(sa, &sc->sc_ac); 2375 if (error != ENETRESET) 2376 return (error); 2377 2378 /* We no longer use this multicast address. Tell parent so. */ 2379 error = if_mcast_op(ifp, SIOCDELMULTI, sa); 2380 if (error == 0) { 2381 /* And forget about this address. */ 2382 LIST_REMOVE(mc, mc_entries); 2383 free(mc, M_DEVBUF); 2384 } else 2385 (void)ether_addmulti(sa, &sc->sc_ac); 2386 return (error); 2387 } 2388 2389 /* 2390 * Delete any multicast address we have asked to add from parent 2391 * interface. Called when the carp is being unconfigured. 2392 */ 2393 static void 2394 carp_ether_purgemulti(struct carp_softc *sc) 2395 { 2396 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2397 struct carp_mc_entry *mc; 2398 2399 if (ifp == NULL) 2400 return; 2401 2402 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2403 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr)); 2404 LIST_REMOVE(mc, mc_entries); 2405 free(mc, M_DEVBUF); 2406 } 2407 } 2408 2409 static int 2410 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS) 2411 { 2412 2413 return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS)); 2414 } 2415 2416 void 2417 carp_init(void) 2418 { 2419 2420 sysctl_net_inet_carp_setup(NULL); 2421 #ifdef MBUFTRACE 2422 MOWNER_ATTACH(&carp_proto_mowner_rx); 2423 MOWNER_ATTACH(&carp_proto_mowner_tx); 2424 MOWNER_ATTACH(&carp_proto6_mowner_rx); 2425 MOWNER_ATTACH(&carp_proto6_mowner_tx); 2426 #endif 2427 2428 carp_wqinput = wqinput_create("carp", _carp_proto_input); 2429 #ifdef INET6 2430 carp6_wqinput = wqinput_create("carp6", _carp6_proto_input); 2431 #endif 2432 } 2433 2434 static void 2435 sysctl_net_inet_carp_setup(struct sysctllog **clog) 2436 { 2437 2438 sysctl_createv(clog, 0, NULL, NULL, 2439 CTLFLAG_PERMANENT, 2440 CTLTYPE_NODE, "inet", NULL, 2441 NULL, 0, NULL, 0, 2442 CTL_NET, PF_INET, CTL_EOL); 2443 sysctl_createv(clog, 0, NULL, NULL, 2444 CTLFLAG_PERMANENT, 2445 CTLTYPE_NODE, "carp", 2446 SYSCTL_DESCR("CARP related settings"), 2447 NULL, 0, NULL, 0, 2448 CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL); 2449 2450 sysctl_createv(clog, 0, NULL, NULL, 2451 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2452 CTLTYPE_INT, "preempt", 2453 SYSCTL_DESCR("Enable CARP Preempt"), 2454 NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0, 2455 CTL_NET, PF_INET, IPPROTO_CARP, 2456 CTL_CREATE, CTL_EOL); 2457 sysctl_createv(clog, 0, NULL, NULL, 2458 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2459 CTLTYPE_INT, "arpbalance", 2460 SYSCTL_DESCR("Enable ARP balancing"), 2461 NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0, 2462 CTL_NET, PF_INET, IPPROTO_CARP, 2463 CTL_CREATE, CTL_EOL); 2464 sysctl_createv(clog, 0, NULL, NULL, 2465 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2466 CTLTYPE_INT, "allow", 2467 SYSCTL_DESCR("Enable CARP"), 2468 NULL, 0, &carp_opts[CARPCTL_ALLOW], 0, 2469 CTL_NET, PF_INET, IPPROTO_CARP, 2470 CTL_CREATE, CTL_EOL); 2471 sysctl_createv(clog, 0, NULL, NULL, 2472 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2473 CTLTYPE_INT, "log", 2474 SYSCTL_DESCR("CARP logging"), 2475 NULL, 0, &carp_opts[CARPCTL_LOG], 0, 2476 CTL_NET, PF_INET, IPPROTO_CARP, 2477 CTL_CREATE, CTL_EOL); 2478 sysctl_createv(clog, 0, NULL, NULL, 2479 CTLFLAG_PERMANENT, 2480 CTLTYPE_STRUCT, "stats", 2481 SYSCTL_DESCR("CARP statistics"), 2482 sysctl_net_inet_carp_stats, 0, NULL, 0, 2483 CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS, 2484 CTL_EOL); 2485 } 2486