1 /* $NetBSD: ip_carp.c,v 1.99 2018/06/26 06:48:03 msaitoh Exp $ */ 2 /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */ 3 4 /* 5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 6 * Copyright (c) 2003 Ryan McBride. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifdef _KERNEL_OPT 31 #include "opt_inet.h" 32 #include "opt_mbuftrace.h" 33 #endif 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.99 2018/06/26 06:48:03 msaitoh Exp $"); 37 38 /* 39 * TODO: 40 * - iface reconfigure 41 * - support for hardware checksum calculations; 42 * 43 */ 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/mbuf.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/callout.h> 51 #include <sys/ioctl.h> 52 #include <sys/errno.h> 53 #include <sys/device.h> 54 #include <sys/time.h> 55 #include <sys/kernel.h> 56 #include <sys/kauth.h> 57 #include <sys/sysctl.h> 58 #include <sys/ucred.h> 59 #include <sys/syslog.h> 60 #include <sys/acct.h> 61 #include <sys/cprng.h> 62 #include <sys/cpu.h> 63 #include <sys/pserialize.h> 64 #include <sys/psref.h> 65 66 #include <net/if.h> 67 #include <net/pfil.h> 68 #include <net/if_types.h> 69 #include <net/if_ether.h> 70 #include <net/route.h> 71 #include <net/netisr.h> 72 #include <net/net_stats.h> 73 #include <netinet/if_inarp.h> 74 #include <netinet/wqinput.h> 75 76 #if NFDDI > 0 77 #include <net/if_fddi.h> 78 #endif 79 #if NTOKEN > 0 80 #include <net/if_token.h> 81 #endif 82 83 #ifdef INET 84 #include <netinet/in.h> 85 #include <netinet/in_systm.h> 86 #include <netinet/in_var.h> 87 #include <netinet/ip.h> 88 #include <netinet/ip_var.h> 89 90 #include <net/if_dl.h> 91 #endif 92 93 #ifdef INET6 94 #include <netinet/icmp6.h> 95 #include <netinet/ip6.h> 96 #include <netinet6/ip6_var.h> 97 #include <netinet6/nd6.h> 98 #include <netinet6/scope6_var.h> 99 #include <netinet6/in6_var.h> 100 #endif 101 102 #include <net/bpf.h> 103 104 #include <sys/sha1.h> 105 106 #include <netinet/ip_carp.h> 107 108 #include "ioconf.h" 109 110 struct carp_mc_entry { 111 LIST_ENTRY(carp_mc_entry) mc_entries; 112 union { 113 struct ether_multi *mcu_enm; 114 } mc_u; 115 struct sockaddr_storage mc_addr; 116 }; 117 #define mc_enm mc_u.mcu_enm 118 119 struct carp_softc { 120 struct ethercom sc_ac; 121 #define sc_if sc_ac.ec_if 122 #define sc_carpdev sc_ac.ec_if.if_carpdev 123 int ah_cookie; 124 int lh_cookie; 125 struct ip_moptions sc_imo; 126 #ifdef INET6 127 struct ip6_moptions sc_im6o; 128 #endif /* INET6 */ 129 TAILQ_ENTRY(carp_softc) sc_list; 130 131 enum { INIT = 0, BACKUP, MASTER } sc_state; 132 133 int sc_suppress; 134 int sc_bow_out; 135 136 int sc_sendad_errors; 137 #define CARP_SENDAD_MAX_ERRORS 3 138 int sc_sendad_success; 139 #define CARP_SENDAD_MIN_SUCCESS 3 140 141 int sc_vhid; 142 int sc_advskew; 143 int sc_naddrs; 144 int sc_naddrs6; 145 int sc_advbase; /* seconds */ 146 int sc_init_counter; 147 u_int64_t sc_counter; 148 149 /* authentication */ 150 #define CARP_HMAC_PAD 64 151 unsigned char sc_key[CARP_KEY_LEN]; 152 unsigned char sc_pad[CARP_HMAC_PAD]; 153 SHA1_CTX sc_sha1; 154 u_int32_t sc_hashkey[2]; 155 156 struct callout sc_ad_tmo; /* advertisement timeout */ 157 struct callout sc_md_tmo; /* master down timeout */ 158 struct callout sc_md6_tmo; /* master down timeout */ 159 160 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 161 }; 162 163 int carp_suppress_preempt = 0; 164 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */ 165 166 static percpu_t *carpstat_percpu; 167 168 #define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x) 169 170 #ifdef MBUFTRACE 171 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx"); 172 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx"); 173 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx"); 174 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx"); 175 #endif 176 177 struct carp_if { 178 TAILQ_HEAD(, carp_softc) vhif_vrs; 179 int vhif_nvrs; 180 181 struct ifnet *vhif_ifp; 182 }; 183 184 #define CARP_LOG(sc, s) \ 185 if (carp_opts[CARPCTL_LOG]) { \ 186 if (sc) \ 187 log(LOG_INFO, "%s: ", \ 188 (sc)->sc_if.if_xname); \ 189 else \ 190 log(LOG_INFO, "carp: "); \ 191 addlog s; \ 192 addlog("\n"); \ 193 } 194 195 static void carp_hmac_prepare(struct carp_softc *); 196 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 197 unsigned char *); 198 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 199 unsigned char *); 200 static void carp_setroute(struct carp_softc *, int); 201 static void carp_proto_input_c(struct mbuf *, struct carp_header *, 202 sa_family_t); 203 static void carpdetach(struct carp_softc *); 204 static void carp_prepare_ad(struct mbuf *, struct carp_softc *, 205 struct carp_header *); 206 static void carp_send_ad_all(void); 207 static void carp_send_ad(void *); 208 static void carp_send_arp(struct carp_softc *); 209 static void carp_master_down(void *); 210 static int carp_ioctl(struct ifnet *, u_long, void *); 211 static void carp_start(struct ifnet *); 212 static void carp_setrun(struct carp_softc *, sa_family_t); 213 static void carp_set_state(struct carp_softc *, int); 214 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 215 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 216 217 static void carp_multicast_cleanup(struct carp_softc *); 218 static int carp_set_ifp(struct carp_softc *, struct ifnet *); 219 static void carp_set_enaddr(struct carp_softc *); 220 #if 0 221 static void carp_addr_updated(void *); 222 #endif 223 static u_int32_t carp_hash(struct carp_softc *, u_char *); 224 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 225 static int carp_join_multicast(struct carp_softc *); 226 #ifdef INET6 227 static void carp_send_na(struct carp_softc *); 228 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 229 static int carp_join_multicast6(struct carp_softc *); 230 #endif 231 static int carp_clone_create(struct if_clone *, int); 232 static int carp_clone_destroy(struct ifnet *); 233 static int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 234 static int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 235 static void carp_ether_purgemulti(struct carp_softc *); 236 237 static void sysctl_net_inet_carp_setup(struct sysctllog **); 238 239 /* workqueue-based pr_input */ 240 static struct wqinput *carp_wqinput; 241 static void _carp_proto_input(struct mbuf *, int, int); 242 #ifdef INET6 243 static struct wqinput *carp6_wqinput; 244 static void _carp6_proto_input(struct mbuf *, int, int); 245 #endif 246 247 struct if_clone carp_cloner = 248 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 249 250 static __inline u_int16_t 251 carp_cksum(struct mbuf *m, int len) 252 { 253 return (in_cksum(m, len)); 254 } 255 256 static __inline u_int16_t 257 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len) 258 { 259 return (in6_cksum(m, IPPROTO_CARP, off, len)); 260 } 261 262 static void 263 carp_hmac_prepare(struct carp_softc *sc) 264 { 265 u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT; 266 u_int8_t vhid = sc->sc_vhid & 0xff; 267 SHA1_CTX sha1ctx; 268 u_int32_t kmd[5]; 269 struct ifaddr *ifa; 270 int i, found; 271 struct in_addr last, cur, in; 272 #ifdef INET6 273 struct in6_addr last6, cur6, in6; 274 #endif /* INET6 */ 275 276 /* compute ipad from key */ 277 memset(sc->sc_pad, 0, sizeof(sc->sc_pad)); 278 memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key)); 279 for (i = 0; i < sizeof(sc->sc_pad); i++) 280 sc->sc_pad[i] ^= 0x36; 281 282 /* precompute first part of inner hash */ 283 SHA1Init(&sc->sc_sha1); 284 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 285 SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version)); 286 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 287 288 /* generate a key for the arpbalance hash, before the vhid is hashed */ 289 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 290 SHA1Final((unsigned char *)kmd, &sha1ctx); 291 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 292 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 293 294 /* the rest of the precomputation */ 295 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 296 297 /* Hash the addresses from smallest to largest, not interface order */ 298 #ifdef INET 299 cur.s_addr = 0; 300 do { 301 int s; 302 found = 0; 303 last = cur; 304 cur.s_addr = 0xffffffff; 305 s = pserialize_read_enter(); 306 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 307 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 308 if (ifa->ifa_addr->sa_family == AF_INET && 309 ntohl(in.s_addr) > ntohl(last.s_addr) && 310 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 311 cur.s_addr = in.s_addr; 312 found++; 313 } 314 } 315 pserialize_read_exit(s); 316 if (found) 317 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 318 } while (found); 319 #endif /* INET */ 320 321 #ifdef INET6 322 memset(&cur6, 0x00, sizeof(cur6)); 323 do { 324 int s; 325 found = 0; 326 last6 = cur6; 327 memset(&cur6, 0xff, sizeof(cur6)); 328 s = pserialize_read_enter(); 329 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 330 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 331 if (IN6_IS_ADDR_LINKLOCAL(&in6)) 332 in6.s6_addr16[1] = 0; 333 if (ifa->ifa_addr->sa_family == AF_INET6 && 334 memcmp(&in6, &last6, sizeof(in6)) > 0 && 335 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 336 cur6 = in6; 337 found++; 338 } 339 } 340 pserialize_read_exit(s); 341 if (found) 342 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 343 } while (found); 344 #endif /* INET6 */ 345 346 /* convert ipad to opad */ 347 for (i = 0; i < sizeof(sc->sc_pad); i++) 348 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 349 } 350 351 static void 352 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 353 unsigned char md[20]) 354 { 355 SHA1_CTX sha1ctx; 356 357 /* fetch first half of inner hash */ 358 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 359 360 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 361 SHA1Final(md, &sha1ctx); 362 363 /* outer hash */ 364 SHA1Init(&sha1ctx); 365 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 366 SHA1Update(&sha1ctx, md, 20); 367 SHA1Final(md, &sha1ctx); 368 } 369 370 static int 371 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 372 unsigned char md[20]) 373 { 374 unsigned char md2[20]; 375 376 carp_hmac_generate(sc, counter, md2); 377 378 return (memcmp(md, md2, sizeof(md2))); 379 } 380 381 static void 382 carp_setroute(struct carp_softc *sc, int cmd) 383 { 384 struct ifaddr *ifa; 385 int s, bound; 386 387 KERNEL_LOCK(1, NULL); 388 bound = curlwp_bind(); 389 s = pserialize_read_enter(); 390 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 391 struct psref psref; 392 ifa_acquire(ifa, &psref); 393 pserialize_read_exit(s); 394 395 switch (ifa->ifa_addr->sa_family) { 396 case AF_INET: { 397 int count = 0; 398 struct rtentry *rt; 399 int hr_otherif, nr_ourif; 400 401 /* 402 * Avoid screwing with the routes if there are other 403 * carp interfaces which are master and have the same 404 * address. 405 */ 406 if (sc->sc_carpdev != NULL && 407 sc->sc_carpdev->if_carp != NULL) { 408 count = carp_addrcount( 409 (struct carp_if *)sc->sc_carpdev->if_carp, 410 ifatoia(ifa), CARP_COUNT_MASTER); 411 if ((cmd == RTM_ADD && count != 1) || 412 (cmd == RTM_DELETE && count != 0)) 413 continue; 414 } 415 416 /* Remove the existing host route, if any */ 417 rtrequest(RTM_DELETE, ifa->ifa_addr, 418 ifa->ifa_addr, ifa->ifa_netmask, 419 RTF_HOST, NULL); 420 421 rt = NULL; 422 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 423 ifa->ifa_netmask, RTF_HOST, &rt); 424 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && 425 (rt->rt_flags & RTF_CONNECTED)); 426 if (rt != NULL) { 427 rt_unref(rt); 428 rt = NULL; 429 } 430 431 /* Check for a network route on our interface */ 432 433 rt = NULL; 434 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 435 ifa->ifa_netmask, 0, &rt); 436 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if); 437 438 switch (cmd) { 439 case RTM_ADD: 440 if (hr_otherif) { 441 ifa->ifa_rtrequest = NULL; 442 ifa->ifa_flags &= ~RTF_CONNECTED; 443 444 rtrequest(RTM_ADD, ifa->ifa_addr, 445 ifa->ifa_addr, ifa->ifa_netmask, 446 RTF_UP | RTF_HOST, NULL); 447 } 448 if (!hr_otherif || nr_ourif || !rt) { 449 if (nr_ourif && 450 (rt->rt_flags & RTF_CONNECTED) == 0) 451 rtrequest(RTM_DELETE, 452 ifa->ifa_addr, 453 ifa->ifa_addr, 454 ifa->ifa_netmask, 0, NULL); 455 456 ifa->ifa_rtrequest = arp_rtrequest; 457 ifa->ifa_flags |= RTF_CONNECTED; 458 459 if (rtrequest(RTM_ADD, ifa->ifa_addr, 460 ifa->ifa_addr, ifa->ifa_netmask, 0, 461 NULL) == 0) 462 ifa->ifa_flags |= IFA_ROUTE; 463 } 464 break; 465 case RTM_DELETE: 466 break; 467 default: 468 break; 469 } 470 if (rt != NULL) { 471 rt_unref(rt); 472 rt = NULL; 473 } 474 break; 475 } 476 477 #ifdef INET6 478 case AF_INET6: 479 if (cmd == RTM_ADD) 480 in6_ifaddlocal(ifa); 481 else 482 in6_ifremlocal(ifa); 483 break; 484 #endif /* INET6 */ 485 default: 486 break; 487 } 488 s = pserialize_read_enter(); 489 ifa_release(ifa, &psref); 490 } 491 pserialize_read_exit(s); 492 curlwp_bindx(bound); 493 KERNEL_UNLOCK_ONE(NULL); 494 } 495 496 /* 497 * process input packet. 498 * we have rearranged checks order compared to the rfc, 499 * but it seems more efficient this way or not possible otherwise. 500 */ 501 static void 502 _carp_proto_input(struct mbuf *m, int hlen, int proto) 503 { 504 struct ip *ip = mtod(m, struct ip *); 505 struct carp_softc *sc = NULL; 506 struct carp_header *ch; 507 int iplen, len; 508 struct ifnet *rcvif; 509 510 CARP_STATINC(CARP_STAT_IPACKETS); 511 MCLAIM(m, &carp_proto_mowner_rx); 512 513 if (!carp_opts[CARPCTL_ALLOW]) { 514 m_freem(m); 515 return; 516 } 517 518 rcvif = m_get_rcvif_NOMPSAFE(m); 519 /* check if received on a valid carp interface */ 520 if (rcvif->if_type != IFT_CARP) { 521 CARP_STATINC(CARP_STAT_BADIF); 522 CARP_LOG(sc, ("packet received on non-carp interface: %s", 523 rcvif->if_xname)); 524 m_freem(m); 525 return; 526 } 527 528 /* verify that the IP TTL is 255. */ 529 if (ip->ip_ttl != CARP_DFLTTL) { 530 CARP_STATINC(CARP_STAT_BADTTL); 531 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl, 532 CARP_DFLTTL, rcvif->if_xname)); 533 m_freem(m); 534 return; 535 } 536 537 /* 538 * verify that the received packet length is 539 * equal to the CARP header 540 */ 541 iplen = ip->ip_hl << 2; 542 len = iplen + sizeof(*ch); 543 if (len > m->m_pkthdr.len) { 544 CARP_STATINC(CARP_STAT_BADLEN); 545 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len, 546 rcvif->if_xname)); 547 m_freem(m); 548 return; 549 } 550 551 if ((m = m_pullup(m, len)) == NULL) { 552 CARP_STATINC(CARP_STAT_HDROPS); 553 return; 554 } 555 ip = mtod(m, struct ip *); 556 ch = (struct carp_header *)((char *)ip + iplen); 557 /* verify the CARP checksum */ 558 m->m_data += iplen; 559 if (carp_cksum(m, len - iplen)) { 560 CARP_STATINC(CARP_STAT_BADSUM); 561 CARP_LOG(sc, ("checksum failed on %s", 562 rcvif->if_xname)); 563 m_freem(m); 564 return; 565 } 566 m->m_data -= iplen; 567 568 carp_proto_input_c(m, ch, AF_INET); 569 } 570 571 void 572 carp_proto_input(struct mbuf *m, ...) 573 { 574 575 wqinput_input(carp_wqinput, m, 0, 0); 576 } 577 578 #ifdef INET6 579 static void 580 _carp6_proto_input(struct mbuf *m, int off, int proto) 581 { 582 struct carp_softc *sc = NULL; 583 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 584 struct carp_header *ch; 585 u_int len; 586 struct ifnet *rcvif; 587 588 CARP_STATINC(CARP_STAT_IPACKETS6); 589 MCLAIM(m, &carp_proto6_mowner_rx); 590 591 if (!carp_opts[CARPCTL_ALLOW]) { 592 m_freem(m); 593 return; 594 } 595 596 rcvif = m_get_rcvif_NOMPSAFE(m); 597 598 /* check if received on a valid carp interface */ 599 if (rcvif->if_type != IFT_CARP) { 600 CARP_STATINC(CARP_STAT_BADIF); 601 CARP_LOG(sc, ("packet received on non-carp interface: %s", 602 rcvif->if_xname)); 603 m_freem(m); 604 return; 605 } 606 607 /* verify that the IP TTL is 255 */ 608 if (ip6->ip6_hlim != CARP_DFLTTL) { 609 CARP_STATINC(CARP_STAT_BADTTL); 610 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, 611 CARP_DFLTTL, rcvif->if_xname)); 612 m_freem(m); 613 return; 614 } 615 616 /* verify that we have a complete carp packet */ 617 len = m->m_len; 618 M_REGION_GET(ch, struct carp_header *, m, off, sizeof(*ch)); 619 if (ch == NULL) { 620 CARP_STATINC(CARP_STAT_BADLEN); 621 CARP_LOG(sc, ("packet size %u too small", len)); 622 return; 623 } 624 625 /* verify the CARP checksum */ 626 if (carp6_cksum(m, off, sizeof(*ch))) { 627 CARP_STATINC(CARP_STAT_BADSUM); 628 CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname)); 629 m_freem(m); 630 return; 631 } 632 633 carp_proto_input_c(m, ch, AF_INET6); 634 return; 635 } 636 637 int 638 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 639 { 640 641 wqinput_input(carp6_wqinput, *mp, *offp, proto); 642 643 return IPPROTO_DONE; 644 } 645 #endif /* INET6 */ 646 647 static void 648 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 649 { 650 struct carp_softc *sc; 651 u_int64_t tmp_counter; 652 struct timeval sc_tv, ch_tv; 653 654 TAILQ_FOREACH(sc, &((struct carp_if *) 655 m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list) 656 if (sc->sc_vhid == ch->carp_vhid) 657 break; 658 659 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 660 (IFF_UP|IFF_RUNNING)) { 661 CARP_STATINC(CARP_STAT_BADVHID); 662 m_freem(m); 663 return; 664 } 665 666 /* 667 * Check if our own advertisement was duplicated 668 * from a non simplex interface. 669 * XXX If there is no address on our physical interface 670 * there is no way to distinguish our ads from the ones 671 * another carp host might have sent us. 672 */ 673 if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) { 674 struct sockaddr sa; 675 struct ifaddr *ifa; 676 int s; 677 678 memset(&sa, 0, sizeof(sa)); 679 sa.sa_family = af; 680 s = pserialize_read_enter(); 681 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 682 683 if (ifa && af == AF_INET) { 684 struct ip *ip = mtod(m, struct ip *); 685 if (ip->ip_src.s_addr == 686 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 687 pserialize_read_exit(s); 688 m_freem(m); 689 return; 690 } 691 } 692 #ifdef INET6 693 if (ifa && af == AF_INET6) { 694 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 695 struct in6_addr in6_src, in6_found; 696 697 in6_src = ip6->ip6_src; 698 in6_found = ifatoia6(ifa)->ia_addr.sin6_addr; 699 if (IN6_IS_ADDR_LINKLOCAL(&in6_src)) 700 in6_src.s6_addr16[1] = 0; 701 if (IN6_IS_ADDR_LINKLOCAL(&in6_found)) 702 in6_found.s6_addr16[1] = 0; 703 if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) { 704 pserialize_read_exit(s); 705 m_freem(m); 706 return; 707 } 708 } 709 #endif /* INET6 */ 710 pserialize_read_exit(s); 711 } 712 713 nanotime(&sc->sc_if.if_lastchange); 714 sc->sc_if.if_ipackets++; 715 sc->sc_if.if_ibytes += m->m_pkthdr.len; 716 717 /* verify the CARP version. */ 718 if (ch->carp_version != CARP_VERSION) { 719 CARP_STATINC(CARP_STAT_BADVER); 720 sc->sc_if.if_ierrors++; 721 CARP_LOG(sc, ("invalid version %d != %d", 722 ch->carp_version, CARP_VERSION)); 723 m_freem(m); 724 return; 725 } 726 727 /* verify the hash */ 728 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 729 struct ip *ip; 730 char ipbuf[INET_ADDRSTRLEN]; 731 #ifdef INET6 732 struct ip6_hdr *ip6; 733 char ip6buf[INET6_ADDRSTRLEN]; 734 #endif 735 736 CARP_STATINC(CARP_STAT_BADAUTH); 737 sc->sc_if.if_ierrors++; 738 739 switch(af) { 740 case AF_INET: 741 ip = mtod(m, struct ip *); 742 CARP_LOG(sc, ("incorrect hash from %s", 743 IN_PRINT(ipbuf, &ip->ip_src))); 744 break; 745 746 #ifdef INET6 747 case AF_INET6: 748 ip6 = mtod(m, struct ip6_hdr *); 749 CARP_LOG(sc, ("incorrect hash from %s", 750 IN6_PRINT(ip6buf, &ip6->ip6_src))); 751 break; 752 #endif 753 754 default: CARP_LOG(sc, ("incorrect hash")); 755 break; 756 } 757 m_freem(m); 758 return; 759 } 760 761 tmp_counter = ntohl(ch->carp_counter[0]); 762 tmp_counter = tmp_counter<<32; 763 tmp_counter += ntohl(ch->carp_counter[1]); 764 765 /* XXX Replay protection goes here */ 766 767 sc->sc_init_counter = 0; 768 sc->sc_counter = tmp_counter; 769 770 771 sc_tv.tv_sec = sc->sc_advbase; 772 if (carp_suppress_preempt && sc->sc_advskew < 240) 773 sc_tv.tv_usec = 240 * 1000000 / 256; 774 else 775 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 776 ch_tv.tv_sec = ch->carp_advbase; 777 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 778 779 switch (sc->sc_state) { 780 case INIT: 781 break; 782 case MASTER: 783 /* 784 * If we receive an advertisement from a backup who's going to 785 * be more frequent than us, go into BACKUP state. 786 */ 787 if (timercmp(&sc_tv, &ch_tv, >) || 788 timercmp(&sc_tv, &ch_tv, ==)) { 789 callout_stop(&sc->sc_ad_tmo); 790 CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)")); 791 carp_set_state(sc, BACKUP); 792 carp_setrun(sc, 0); 793 carp_setroute(sc, RTM_DELETE); 794 } 795 break; 796 case BACKUP: 797 /* 798 * If we're pre-empting masters who advertise slower than us, 799 * and this one claims to be slower, treat him as down. 800 */ 801 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) { 802 CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)")); 803 carp_master_down(sc); 804 break; 805 } 806 807 /* 808 * If the master is going to advertise at such a low frequency 809 * that he's guaranteed to time out, we'd might as well just 810 * treat him as timed out now. 811 */ 812 sc_tv.tv_sec = sc->sc_advbase * 3; 813 if (timercmp(&sc_tv, &ch_tv, <)) { 814 CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)")); 815 carp_master_down(sc); 816 break; 817 } 818 819 /* 820 * Otherwise, we reset the counter and wait for the next 821 * advertisement. 822 */ 823 carp_setrun(sc, af); 824 break; 825 } 826 827 m_freem(m); 828 return; 829 } 830 831 /* 832 * Interface side of the CARP implementation. 833 */ 834 835 /* ARGSUSED */ 836 void 837 carpattach(int n) 838 { 839 if_clone_attach(&carp_cloner); 840 841 carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS); 842 } 843 844 static int 845 carp_clone_create(struct if_clone *ifc, int unit) 846 { 847 extern int ifqmaxlen; 848 struct carp_softc *sc; 849 struct ifnet *ifp; 850 int rv; 851 852 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 853 if (!sc) 854 return (ENOMEM); 855 856 sc->sc_suppress = 0; 857 sc->sc_advbase = CARP_DFLTINTV; 858 sc->sc_vhid = -1; /* required setting */ 859 sc->sc_advskew = 0; 860 sc->sc_init_counter = 1; 861 sc->sc_naddrs = sc->sc_naddrs6 = 0; 862 #ifdef INET6 863 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 864 #endif /* INET6 */ 865 866 callout_init(&sc->sc_ad_tmo, 0); 867 callout_init(&sc->sc_md_tmo, 0); 868 callout_init(&sc->sc_md6_tmo, 0); 869 870 callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc); 871 callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc); 872 callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc); 873 874 LIST_INIT(&sc->carp_mc_listhead); 875 ifp = &sc->sc_if; 876 ifp->if_softc = sc; 877 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 878 unit); 879 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 880 ifp->if_ioctl = carp_ioctl; 881 ifp->if_start = carp_start; 882 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 883 IFQ_SET_READY(&ifp->if_snd); 884 rv = if_initialize(ifp); 885 if (rv != 0) { 886 callout_destroy(&sc->sc_ad_tmo); 887 callout_destroy(&sc->sc_md_tmo); 888 callout_destroy(&sc->sc_md6_tmo); 889 free(ifp->if_softc, M_DEVBUF); 890 891 return rv; 892 } 893 ether_ifattach(ifp, NULL); 894 carp_set_enaddr(sc); 895 /* Overwrite ethernet defaults */ 896 ifp->if_type = IFT_CARP; 897 ifp->if_output = carp_output; 898 if_register(ifp); 899 900 return (0); 901 } 902 903 static int 904 carp_clone_destroy(struct ifnet *ifp) 905 { 906 struct carp_softc *sc = ifp->if_softc; 907 908 carpdetach(ifp->if_softc); 909 ether_ifdetach(ifp); 910 if_detach(ifp); 911 callout_destroy(&sc->sc_ad_tmo); 912 callout_destroy(&sc->sc_md_tmo); 913 callout_destroy(&sc->sc_md6_tmo); 914 free(ifp->if_softc, M_DEVBUF); 915 916 return (0); 917 } 918 919 static void 920 carpdetach(struct carp_softc *sc) 921 { 922 struct carp_if *cif; 923 int s; 924 925 callout_stop(&sc->sc_ad_tmo); 926 callout_stop(&sc->sc_md_tmo); 927 callout_stop(&sc->sc_md6_tmo); 928 929 if (sc->sc_suppress) 930 carp_suppress_preempt--; 931 sc->sc_suppress = 0; 932 933 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 934 carp_suppress_preempt--; 935 sc->sc_sendad_errors = 0; 936 937 carp_set_state(sc, INIT); 938 sc->sc_if.if_flags &= ~IFF_UP; 939 carp_setrun(sc, 0); 940 carp_multicast_cleanup(sc); 941 942 KERNEL_LOCK(1, NULL); 943 s = splnet(); 944 if (sc->sc_carpdev != NULL) { 945 /* XXX linkstatehook removal */ 946 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 947 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 948 if (!--cif->vhif_nvrs) { 949 ifpromisc(sc->sc_carpdev, 0); 950 sc->sc_carpdev->if_carp = NULL; 951 free(cif, M_IFADDR); 952 } 953 } 954 sc->sc_carpdev = NULL; 955 splx(s); 956 KERNEL_UNLOCK_ONE(NULL); 957 } 958 959 /* Detach an interface from the carp. */ 960 void 961 carp_ifdetach(struct ifnet *ifp) 962 { 963 struct carp_softc *sc, *nextsc; 964 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 965 966 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 967 nextsc = TAILQ_NEXT(sc, sc_list); 968 carpdetach(sc); 969 } 970 } 971 972 static void 973 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, 974 struct carp_header *ch) 975 { 976 if (sc->sc_init_counter) { 977 /* this could also be seconds since unix epoch */ 978 sc->sc_counter = cprng_fast64(); 979 } else 980 sc->sc_counter++; 981 982 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 983 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 984 985 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 986 } 987 988 static void 989 carp_send_ad_all(void) 990 { 991 struct ifnet *ifp; 992 struct carp_if *cif; 993 struct carp_softc *vh; 994 int s; 995 int bound = curlwp_bind(); 996 997 s = pserialize_read_enter(); 998 IFNET_READER_FOREACH(ifp) { 999 struct psref psref; 1000 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 1001 continue; 1002 1003 if_acquire(ifp, &psref); 1004 pserialize_read_exit(s); 1005 1006 cif = (struct carp_if *)ifp->if_carp; 1007 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1008 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1009 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER) 1010 carp_send_ad(vh); 1011 } 1012 1013 s = pserialize_read_enter(); 1014 if_release(ifp, &psref); 1015 } 1016 pserialize_read_exit(s); 1017 curlwp_bindx(bound); 1018 } 1019 1020 1021 static void 1022 carp_send_ad(void *v) 1023 { 1024 struct carp_header ch; 1025 struct timeval tv; 1026 struct carp_softc *sc = v; 1027 struct carp_header *ch_ptr; 1028 struct mbuf *m; 1029 int error, len, advbase, advskew, s; 1030 struct sockaddr sa; 1031 1032 KERNEL_LOCK(1, NULL); 1033 s = splsoftnet(); 1034 1035 advbase = advskew = 0; /* Sssssh compiler */ 1036 if (sc->sc_carpdev == NULL) { 1037 sc->sc_if.if_oerrors++; 1038 goto retry_later; 1039 } 1040 1041 /* bow out if we've gone to backup (the carp interface is going down) */ 1042 if (sc->sc_bow_out) { 1043 sc->sc_bow_out = 0; 1044 advbase = 255; 1045 advskew = 255; 1046 } else { 1047 advbase = sc->sc_advbase; 1048 if (!carp_suppress_preempt || sc->sc_advskew > 240) 1049 advskew = sc->sc_advskew; 1050 else 1051 advskew = 240; 1052 tv.tv_sec = advbase; 1053 tv.tv_usec = advskew * 1000000 / 256; 1054 } 1055 1056 ch.carp_version = CARP_VERSION; 1057 ch.carp_type = CARP_ADVERTISEMENT; 1058 ch.carp_vhid = sc->sc_vhid; 1059 ch.carp_advbase = advbase; 1060 ch.carp_advskew = advskew; 1061 ch.carp_authlen = 7; /* XXX DEFINE */ 1062 ch.carp_pad1 = 0; /* must be zero */ 1063 ch.carp_cksum = 0; 1064 1065 1066 #ifdef INET 1067 if (sc->sc_naddrs) { 1068 struct ip *ip; 1069 struct ifaddr *ifa; 1070 int _s; 1071 1072 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1073 if (m == NULL) { 1074 sc->sc_if.if_oerrors++; 1075 CARP_STATINC(CARP_STAT_ONOMEM); 1076 /* XXX maybe less ? */ 1077 goto retry_later; 1078 } 1079 MCLAIM(m, &carp_proto_mowner_tx); 1080 len = sizeof(*ip) + sizeof(ch); 1081 m->m_pkthdr.len = len; 1082 m_reset_rcvif(m); 1083 m->m_len = len; 1084 MH_ALIGN(m, m->m_len); 1085 m->m_flags |= M_MCAST; 1086 ip = mtod(m, struct ip *); 1087 ip->ip_v = IPVERSION; 1088 ip->ip_hl = sizeof(*ip) >> 2; 1089 ip->ip_tos = IPTOS_LOWDELAY; 1090 ip->ip_len = htons(len); 1091 ip->ip_id = 0; /* no need for id, we don't support fragments */ 1092 ip->ip_off = htons(IP_DF); 1093 ip->ip_ttl = CARP_DFLTTL; 1094 ip->ip_p = IPPROTO_CARP; 1095 ip->ip_sum = 0; 1096 1097 memset(&sa, 0, sizeof(sa)); 1098 sa.sa_family = AF_INET; 1099 _s = pserialize_read_enter(); 1100 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1101 if (ifa == NULL) 1102 ip->ip_src.s_addr = 0; 1103 else 1104 ip->ip_src.s_addr = 1105 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1106 pserialize_read_exit(_s); 1107 ip->ip_dst.s_addr = INADDR_CARP_GROUP; 1108 1109 ch_ptr = (struct carp_header *)(&ip[1]); 1110 memcpy(ch_ptr, &ch, sizeof(ch)); 1111 carp_prepare_ad(m, sc, ch_ptr); 1112 1113 m->m_data += sizeof(*ip); 1114 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1115 m->m_data -= sizeof(*ip); 1116 1117 nanotime(&sc->sc_if.if_lastchange); 1118 sc->sc_if.if_opackets++; 1119 sc->sc_if.if_obytes += len; 1120 CARP_STATINC(CARP_STAT_OPACKETS); 1121 1122 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1123 NULL); 1124 if (error) { 1125 if (error == ENOBUFS) 1126 CARP_STATINC(CARP_STAT_ONOMEM); 1127 else 1128 CARP_LOG(sc, ("ip_output failed: %d", error)); 1129 sc->sc_if.if_oerrors++; 1130 if (sc->sc_sendad_errors < INT_MAX) 1131 sc->sc_sendad_errors++; 1132 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1133 carp_suppress_preempt++; 1134 if (carp_suppress_preempt == 1) 1135 carp_send_ad_all(); 1136 } 1137 sc->sc_sendad_success = 0; 1138 } else { 1139 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1140 if (++sc->sc_sendad_success >= 1141 CARP_SENDAD_MIN_SUCCESS) { 1142 carp_suppress_preempt--; 1143 sc->sc_sendad_errors = 0; 1144 } 1145 } else 1146 sc->sc_sendad_errors = 0; 1147 } 1148 } 1149 #endif /* INET */ 1150 #ifdef INET6 1151 if (sc->sc_naddrs6) { 1152 struct ip6_hdr *ip6; 1153 struct ifaddr *ifa; 1154 int _s; 1155 1156 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1157 if (m == NULL) { 1158 sc->sc_if.if_oerrors++; 1159 CARP_STATINC(CARP_STAT_ONOMEM); 1160 /* XXX maybe less ? */ 1161 goto retry_later; 1162 } 1163 MCLAIM(m, &carp_proto6_mowner_tx); 1164 len = sizeof(*ip6) + sizeof(ch); 1165 m->m_pkthdr.len = len; 1166 m_reset_rcvif(m); 1167 m->m_len = len; 1168 MH_ALIGN(m, m->m_len); 1169 m->m_flags |= M_MCAST; 1170 ip6 = mtod(m, struct ip6_hdr *); 1171 memset(ip6, 0, sizeof(*ip6)); 1172 ip6->ip6_vfc |= IPV6_VERSION; 1173 ip6->ip6_hlim = CARP_DFLTTL; 1174 ip6->ip6_nxt = IPPROTO_CARP; 1175 1176 /* set the source address */ 1177 memset(&sa, 0, sizeof(sa)); 1178 sa.sa_family = AF_INET6; 1179 _s = pserialize_read_enter(); 1180 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1181 if (ifa == NULL) /* This should never happen with IPv6 */ 1182 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr)); 1183 else 1184 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1185 &ip6->ip6_src, sizeof(struct in6_addr)); 1186 pserialize_read_exit(_s); 1187 /* set the multicast destination */ 1188 1189 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1190 ip6->ip6_dst.s6_addr8[15] = 0x12; 1191 if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) { 1192 sc->sc_if.if_oerrors++; 1193 m_freem(m); 1194 CARP_LOG(sc, ("in6_setscope failed")); 1195 goto retry_later; 1196 } 1197 1198 ch_ptr = (struct carp_header *)(&ip6[1]); 1199 memcpy(ch_ptr, &ch, sizeof(ch)); 1200 carp_prepare_ad(m, sc, ch_ptr); 1201 1202 ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6), 1203 len - sizeof(*ip6)); 1204 1205 nanotime(&sc->sc_if.if_lastchange); 1206 sc->sc_if.if_opackets++; 1207 sc->sc_if.if_obytes += len; 1208 CARP_STATINC(CARP_STAT_OPACKETS6); 1209 1210 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1211 if (error) { 1212 if (error == ENOBUFS) 1213 CARP_STATINC(CARP_STAT_ONOMEM); 1214 else 1215 CARP_LOG(sc, ("ip6_output failed: %d", error)); 1216 sc->sc_if.if_oerrors++; 1217 if (sc->sc_sendad_errors < INT_MAX) 1218 sc->sc_sendad_errors++; 1219 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1220 carp_suppress_preempt++; 1221 if (carp_suppress_preempt == 1) 1222 carp_send_ad_all(); 1223 } 1224 sc->sc_sendad_success = 0; 1225 } else { 1226 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1227 if (++sc->sc_sendad_success >= 1228 CARP_SENDAD_MIN_SUCCESS) { 1229 carp_suppress_preempt--; 1230 sc->sc_sendad_errors = 0; 1231 } 1232 } else 1233 sc->sc_sendad_errors = 0; 1234 } 1235 } 1236 #endif /* INET6 */ 1237 1238 retry_later: 1239 splx(s); 1240 KERNEL_UNLOCK_ONE(NULL); 1241 if (advbase != 255 || advskew != 255) 1242 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1243 } 1244 1245 /* 1246 * Broadcast a gratuitous ARP request containing 1247 * the virtual router MAC address for each IP address 1248 * associated with the virtual router. 1249 */ 1250 static void 1251 carp_send_arp(struct carp_softc *sc) 1252 { 1253 struct ifaddr *ifa; 1254 int s, bound; 1255 1256 KERNEL_LOCK(1, NULL); 1257 bound = curlwp_bind(); 1258 s = pserialize_read_enter(); 1259 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1260 struct psref psref; 1261 1262 if (ifa->ifa_addr->sa_family != AF_INET) 1263 continue; 1264 1265 ifa_acquire(ifa, &psref); 1266 pserialize_read_exit(s); 1267 1268 arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl)); 1269 1270 s = pserialize_read_enter(); 1271 ifa_release(ifa, &psref); 1272 } 1273 pserialize_read_exit(s); 1274 curlwp_bindx(bound); 1275 KERNEL_UNLOCK_ONE(NULL); 1276 } 1277 1278 #ifdef INET6 1279 static void 1280 carp_send_na(struct carp_softc *sc) 1281 { 1282 struct ifaddr *ifa; 1283 struct in6_addr *in6; 1284 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1285 int s, bound; 1286 1287 KERNEL_LOCK(1, NULL); 1288 bound = curlwp_bind(); 1289 s = pserialize_read_enter(); 1290 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1291 struct psref psref; 1292 1293 if (ifa->ifa_addr->sa_family != AF_INET6) 1294 continue; 1295 1296 ifa_acquire(ifa, &psref); 1297 pserialize_read_exit(s); 1298 1299 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1300 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1301 ND_NA_FLAG_OVERRIDE, 1, NULL); 1302 1303 s = pserialize_read_enter(); 1304 ifa_release(ifa, &psref); 1305 } 1306 pserialize_read_exit(s); 1307 curlwp_bindx(bound); 1308 KERNEL_UNLOCK_ONE(NULL); 1309 } 1310 #endif /* INET6 */ 1311 1312 /* 1313 * Based on bridge_hash() in if_bridge.c 1314 */ 1315 #define mix(a,b,c) \ 1316 do { \ 1317 a -= b; a -= c; a ^= (c >> 13); \ 1318 b -= c; b -= a; b ^= (a << 8); \ 1319 c -= a; c -= b; c ^= (b >> 13); \ 1320 a -= b; a -= c; a ^= (c >> 12); \ 1321 b -= c; b -= a; b ^= (a << 16); \ 1322 c -= a; c -= b; c ^= (b >> 5); \ 1323 a -= b; a -= c; a ^= (c >> 3); \ 1324 b -= c; b -= a; b ^= (a << 10); \ 1325 c -= a; c -= b; c ^= (b >> 15); \ 1326 } while (0) 1327 1328 static u_int32_t 1329 carp_hash(struct carp_softc *sc, u_char *src) 1330 { 1331 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1332 1333 c += sc->sc_key[3] << 24; 1334 c += sc->sc_key[2] << 16; 1335 c += sc->sc_key[1] << 8; 1336 c += sc->sc_key[0]; 1337 b += src[5] << 8; 1338 b += src[4]; 1339 a += src[3] << 24; 1340 a += src[2] << 16; 1341 a += src[1] << 8; 1342 a += src[0]; 1343 1344 mix(a, b, c); 1345 return (c); 1346 } 1347 1348 static int 1349 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1350 { 1351 struct carp_softc *vh; 1352 struct ifaddr *ifa; 1353 int count = 0; 1354 1355 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1356 if ((type == CARP_COUNT_RUNNING && 1357 (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1358 (IFF_UP|IFF_RUNNING)) || 1359 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1360 int s = pserialize_read_enter(); 1361 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1362 if (ifa->ifa_addr->sa_family == AF_INET && 1363 ia->ia_addr.sin_addr.s_addr == 1364 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1365 count++; 1366 } 1367 pserialize_read_exit(s); 1368 } 1369 } 1370 return (count); 1371 } 1372 1373 int 1374 carp_iamatch(struct in_ifaddr *ia, u_char *src, 1375 u_int32_t *count, u_int32_t index) 1376 { 1377 struct carp_softc *sc = ia->ia_ifp->if_softc; 1378 1379 if (carp_opts[CARPCTL_ARPBALANCE]) { 1380 /* 1381 * We use the source ip to decide which virtual host should 1382 * handle the request. If we're master of that virtual host, 1383 * then we respond, otherwise, just drop the arp packet on 1384 * the floor. 1385 */ 1386 1387 /* Count the elegible carp interfaces with this address */ 1388 if (*count == 0) 1389 *count = carp_addrcount( 1390 (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp, 1391 ia, CARP_COUNT_RUNNING); 1392 1393 /* This should never happen, but... */ 1394 if (*count == 0) 1395 return (0); 1396 1397 if (carp_hash(sc, src) % *count == index - 1 && 1398 sc->sc_state == MASTER) { 1399 return (1); 1400 } 1401 } else { 1402 if (sc->sc_state == MASTER) 1403 return (1); 1404 } 1405 1406 return (0); 1407 } 1408 1409 #ifdef INET6 1410 struct ifaddr * 1411 carp_iamatch6(void *v, struct in6_addr *taddr) 1412 { 1413 struct carp_if *cif = v; 1414 struct carp_softc *vh; 1415 struct ifaddr *ifa; 1416 1417 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1418 int s = pserialize_read_enter(); 1419 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1420 if (IN6_ARE_ADDR_EQUAL(taddr, 1421 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1422 ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1423 (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER) 1424 return (ifa); 1425 } 1426 pserialize_read_exit(s); 1427 } 1428 1429 return (NULL); 1430 } 1431 #endif /* INET6 */ 1432 1433 struct ifnet * 1434 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src) 1435 { 1436 struct carp_if *cif = (struct carp_if *)v; 1437 struct carp_softc *vh; 1438 u_int8_t *ena; 1439 1440 if (src) 1441 ena = (u_int8_t *)&eh->ether_shost; 1442 else 1443 ena = (u_int8_t *)&eh->ether_dhost; 1444 1445 switch (iftype) { 1446 case IFT_ETHER: 1447 case IFT_FDDI: 1448 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1449 return (NULL); 1450 break; 1451 case IFT_ISO88025: 1452 if (ena[0] != 3 || ena[1] || ena[4] || ena[5]) 1453 return (NULL); 1454 break; 1455 default: 1456 return (NULL); 1457 break; 1458 } 1459 1460 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1461 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1462 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && 1463 !memcmp(ena, CLLADDR(vh->sc_if.if_sadl), 1464 ETHER_ADDR_LEN)) { 1465 return (&vh->sc_if); 1466 } 1467 1468 return (NULL); 1469 } 1470 1471 int 1472 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) 1473 { 1474 struct ether_header eh; 1475 struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp; 1476 struct ifnet *ifp; 1477 1478 memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost)); 1479 memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost)); 1480 eh.ether_type = etype; 1481 1482 if (m->m_flags & (M_BCAST|M_MCAST)) { 1483 struct carp_softc *vh; 1484 struct mbuf *m0; 1485 1486 /* 1487 * XXX Should really check the list of multicast addresses 1488 * for each CARP interface _before_ copying. 1489 */ 1490 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1491 m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT); 1492 if (m0 == NULL) 1493 continue; 1494 m_set_rcvif(m0, &vh->sc_if); 1495 ether_input(&vh->sc_if, m0); 1496 } 1497 return (1); 1498 } 1499 1500 ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0); 1501 if (ifp == NULL) { 1502 return (1); 1503 } 1504 1505 m_set_rcvif(m, ifp); 1506 1507 bpf_mtap(ifp, m, BPF_D_IN); 1508 ifp->if_ipackets++; 1509 ether_input(ifp, m); 1510 return (0); 1511 } 1512 1513 static void 1514 carp_master_down(void *v) 1515 { 1516 struct carp_softc *sc = v; 1517 1518 switch (sc->sc_state) { 1519 case INIT: 1520 printf("%s: master_down event in INIT state\n", 1521 sc->sc_if.if_xname); 1522 break; 1523 case MASTER: 1524 break; 1525 case BACKUP: 1526 CARP_LOG(sc, ("INIT -> MASTER (preempting)")); 1527 carp_set_state(sc, MASTER); 1528 carp_send_ad(sc); 1529 carp_send_arp(sc); 1530 #ifdef INET6 1531 carp_send_na(sc); 1532 #endif /* INET6 */ 1533 carp_setrun(sc, 0); 1534 carp_setroute(sc, RTM_ADD); 1535 break; 1536 } 1537 } 1538 1539 /* 1540 * When in backup state, af indicates whether to reset the master down timer 1541 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1542 */ 1543 static void 1544 carp_setrun(struct carp_softc *sc, sa_family_t af) 1545 { 1546 struct timeval tv; 1547 1548 if (sc->sc_carpdev == NULL) { 1549 sc->sc_if.if_flags &= ~IFF_RUNNING; 1550 carp_set_state(sc, INIT); 1551 return; 1552 } 1553 1554 if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 && 1555 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1556 sc->sc_if.if_flags |= IFF_RUNNING; 1557 } else { 1558 sc->sc_if.if_flags &= ~IFF_RUNNING; 1559 carp_setroute(sc, RTM_DELETE); 1560 return; 1561 } 1562 1563 switch (sc->sc_state) { 1564 case INIT: 1565 carp_set_state(sc, BACKUP); 1566 carp_setroute(sc, RTM_DELETE); 1567 carp_setrun(sc, 0); 1568 break; 1569 case BACKUP: 1570 callout_stop(&sc->sc_ad_tmo); 1571 tv.tv_sec = 3 * sc->sc_advbase; 1572 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1573 switch (af) { 1574 #ifdef INET 1575 case AF_INET: 1576 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1577 break; 1578 #endif /* INET */ 1579 #ifdef INET6 1580 case AF_INET6: 1581 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1582 break; 1583 #endif /* INET6 */ 1584 default: 1585 if (sc->sc_naddrs) 1586 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1587 #ifdef INET6 1588 if (sc->sc_naddrs6) 1589 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1590 #endif /* INET6 */ 1591 break; 1592 } 1593 break; 1594 case MASTER: 1595 tv.tv_sec = sc->sc_advbase; 1596 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1597 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1598 break; 1599 } 1600 } 1601 1602 static void 1603 carp_multicast_cleanup(struct carp_softc *sc) 1604 { 1605 struct ip_moptions *imo = &sc->sc_imo; 1606 #ifdef INET6 1607 struct ip6_moptions *im6o = &sc->sc_im6o; 1608 #endif 1609 u_int16_t n = imo->imo_num_memberships; 1610 1611 /* Clean up our own multicast memberships */ 1612 while (n-- > 0) { 1613 if (imo->imo_membership[n] != NULL) { 1614 in_delmulti(imo->imo_membership[n]); 1615 imo->imo_membership[n] = NULL; 1616 } 1617 } 1618 imo->imo_num_memberships = 0; 1619 imo->imo_multicast_if_index = 0; 1620 1621 #ifdef INET6 1622 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1623 struct in6_multi_mship *imm = 1624 LIST_FIRST(&im6o->im6o_memberships); 1625 1626 LIST_REMOVE(imm, i6mm_chain); 1627 in6_leavegroup(imm); 1628 } 1629 im6o->im6o_multicast_if_index = 0; 1630 #endif 1631 1632 /* And any other multicast memberships */ 1633 carp_ether_purgemulti(sc); 1634 } 1635 1636 static int 1637 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1638 { 1639 struct carp_if *cif, *ncif = NULL; 1640 struct carp_softc *vr, *after = NULL; 1641 int myself = 0, error = 0; 1642 int s; 1643 1644 if (ifp == sc->sc_carpdev) 1645 return (0); 1646 1647 if (ifp != NULL) { 1648 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1649 return (EADDRNOTAVAIL); 1650 1651 if (ifp->if_type == IFT_CARP) 1652 return (EINVAL); 1653 1654 if (ifp->if_carp == NULL) { 1655 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT); 1656 if (ncif == NULL) 1657 return (ENOBUFS); 1658 if ((error = ifpromisc(ifp, 1))) { 1659 free(ncif, M_IFADDR); 1660 return (error); 1661 } 1662 1663 ncif->vhif_ifp = ifp; 1664 TAILQ_INIT(&ncif->vhif_vrs); 1665 } else { 1666 cif = (struct carp_if *)ifp->if_carp; 1667 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1668 if (vr != sc && vr->sc_vhid == sc->sc_vhid) 1669 return (EINVAL); 1670 } 1671 1672 /* detach from old interface */ 1673 if (sc->sc_carpdev != NULL) 1674 carpdetach(sc); 1675 1676 /* join multicast groups */ 1677 if (sc->sc_naddrs < 0 && 1678 (error = carp_join_multicast(sc)) != 0) { 1679 if (ncif != NULL) 1680 free(ncif, M_IFADDR); 1681 return (error); 1682 } 1683 1684 #ifdef INET6 1685 if (sc->sc_naddrs6 < 0 && 1686 (error = carp_join_multicast6(sc)) != 0) { 1687 if (ncif != NULL) 1688 free(ncif, M_IFADDR); 1689 carp_multicast_cleanup(sc); 1690 return (error); 1691 } 1692 #endif 1693 1694 /* attach carp interface to physical interface */ 1695 if (ncif != NULL) 1696 ifp->if_carp = (void *)ncif; 1697 sc->sc_carpdev = ifp; 1698 sc->sc_if.if_capabilities = ifp->if_capabilities & 1699 (IFCAP_TSOv4 | IFCAP_TSOv6 | 1700 IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx| 1701 IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx| 1702 IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx| 1703 IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx| 1704 IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx); 1705 1706 cif = (struct carp_if *)ifp->if_carp; 1707 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1708 if (vr == sc) 1709 myself = 1; 1710 if (vr->sc_vhid < sc->sc_vhid) 1711 after = vr; 1712 } 1713 1714 if (!myself) { 1715 /* We're trying to keep things in order */ 1716 if (after == NULL) { 1717 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1718 } else { 1719 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1720 sc, sc_list); 1721 } 1722 cif->vhif_nvrs++; 1723 } 1724 if (sc->sc_naddrs || sc->sc_naddrs6) 1725 sc->sc_if.if_flags |= IFF_UP; 1726 carp_set_enaddr(sc); 1727 KERNEL_LOCK(1, NULL); 1728 s = splnet(); 1729 /* XXX linkstatehooks establish */ 1730 carp_carpdev_state(ifp); 1731 splx(s); 1732 KERNEL_UNLOCK_ONE(NULL); 1733 } else { 1734 carpdetach(sc); 1735 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1736 } 1737 return (0); 1738 } 1739 1740 static void 1741 carp_set_enaddr(struct carp_softc *sc) 1742 { 1743 uint8_t enaddr[ETHER_ADDR_LEN]; 1744 if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) { 1745 enaddr[0] = 3; 1746 enaddr[1] = 0; 1747 enaddr[2] = 0x40 >> (sc->sc_vhid - 1); 1748 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1); 1749 enaddr[4] = 0; 1750 enaddr[5] = 0; 1751 } else { 1752 enaddr[0] = 0; 1753 enaddr[1] = 0; 1754 enaddr[2] = 0x5e; 1755 enaddr[3] = 0; 1756 enaddr[4] = 1; 1757 enaddr[5] = sc->sc_vhid; 1758 } 1759 if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false); 1760 } 1761 1762 #if 0 1763 static void 1764 carp_addr_updated(void *v) 1765 { 1766 struct carp_softc *sc = (struct carp_softc *) v; 1767 struct ifaddr *ifa; 1768 int new_naddrs = 0, new_naddrs6 = 0; 1769 1770 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1771 if (ifa->ifa_addr->sa_family == AF_INET) 1772 new_naddrs++; 1773 else if (ifa->ifa_addr->sa_family == AF_INET6) 1774 new_naddrs6++; 1775 } 1776 1777 /* Handle a callback after SIOCDIFADDR */ 1778 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) { 1779 struct in_addr mc_addr; 1780 1781 sc->sc_naddrs = new_naddrs; 1782 sc->sc_naddrs6 = new_naddrs6; 1783 1784 /* Re-establish multicast membership removed by in_control */ 1785 mc_addr.s_addr = INADDR_CARP_GROUP; 1786 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) { 1787 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1788 1789 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1790 carp_join_multicast(sc); 1791 } 1792 1793 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1794 sc->sc_if.if_flags &= ~IFF_UP; 1795 carp_set_state(sc, INIT); 1796 } else 1797 carp_hmac_prepare(sc); 1798 } 1799 1800 carp_setrun(sc, 0); 1801 } 1802 #endif 1803 1804 static int 1805 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1806 { 1807 struct ifnet *ifp = sc->sc_carpdev; 1808 struct in_ifaddr *ia, *ia_if; 1809 int error = 0; 1810 int s; 1811 1812 if (sin->sin_addr.s_addr == 0) { 1813 if (!(sc->sc_if.if_flags & IFF_UP)) 1814 carp_set_state(sc, INIT); 1815 if (sc->sc_naddrs) 1816 sc->sc_if.if_flags |= IFF_UP; 1817 carp_setrun(sc, 0); 1818 return (0); 1819 } 1820 1821 /* we have to do this by hand to ensure we don't match on ourselves */ 1822 ia_if = NULL; 1823 s = pserialize_read_enter(); 1824 IN_ADDRLIST_READER_FOREACH(ia) { 1825 /* and, yeah, we need a multicast-capable iface too */ 1826 if (ia->ia_ifp != &sc->sc_if && 1827 ia->ia_ifp->if_type != IFT_CARP && 1828 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1829 (sin->sin_addr.s_addr & ia->ia_subnetmask) == 1830 ia->ia_subnet) { 1831 if (!ia_if) 1832 ia_if = ia; 1833 } 1834 } 1835 1836 if (ia_if) { 1837 ia = ia_if; 1838 if (ifp) { 1839 if (ifp != ia->ia_ifp) 1840 return (EADDRNOTAVAIL); 1841 } else { 1842 /* FIXME NOMPSAFE */ 1843 ifp = ia->ia_ifp; 1844 } 1845 } 1846 pserialize_read_exit(s); 1847 1848 if ((error = carp_set_ifp(sc, ifp))) 1849 return (error); 1850 1851 if (sc->sc_carpdev == NULL) 1852 return (EADDRNOTAVAIL); 1853 1854 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1855 return (error); 1856 1857 sc->sc_naddrs++; 1858 if (sc->sc_carpdev != NULL) 1859 sc->sc_if.if_flags |= IFF_UP; 1860 1861 carp_set_state(sc, INIT); 1862 carp_setrun(sc, 0); 1863 1864 /* 1865 * Hook if_addrhooks so that we get a callback after in_ifinit has run, 1866 * to correct any inappropriate routes that it inserted. 1867 */ 1868 if (sc->ah_cookie == 0) { 1869 /* XXX link address hook */ 1870 } 1871 1872 return (0); 1873 } 1874 1875 static int 1876 carp_join_multicast(struct carp_softc *sc) 1877 { 1878 struct ip_moptions *imo = &sc->sc_imo, tmpimo; 1879 struct in_addr addr; 1880 1881 memset(&tmpimo, 0, sizeof(tmpimo)); 1882 addr.s_addr = INADDR_CARP_GROUP; 1883 if ((tmpimo.imo_membership[0] = 1884 in_addmulti(&addr, &sc->sc_if)) == NULL) { 1885 return (ENOBUFS); 1886 } 1887 1888 imo->imo_membership[0] = tmpimo.imo_membership[0]; 1889 imo->imo_num_memberships = 1; 1890 imo->imo_multicast_if_index = sc->sc_if.if_index; 1891 imo->imo_multicast_ttl = CARP_DFLTTL; 1892 imo->imo_multicast_loop = 0; 1893 return (0); 1894 } 1895 1896 1897 #ifdef INET6 1898 static int 1899 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1900 { 1901 struct ifnet *ifp = sc->sc_carpdev; 1902 struct in6_ifaddr *ia, *ia_if; 1903 int error = 0; 1904 int s; 1905 1906 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1907 if (!(sc->sc_if.if_flags & IFF_UP)) 1908 carp_set_state(sc, INIT); 1909 if (sc->sc_naddrs6) 1910 sc->sc_if.if_flags |= IFF_UP; 1911 carp_setrun(sc, 0); 1912 return (0); 1913 } 1914 1915 /* we have to do this by hand to ensure we don't match on ourselves */ 1916 ia_if = NULL; 1917 s = pserialize_read_enter(); 1918 IN6_ADDRLIST_READER_FOREACH(ia) { 1919 int i; 1920 1921 for (i = 0; i < 4; i++) { 1922 if ((sin6->sin6_addr.s6_addr32[i] & 1923 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1924 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1925 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1926 break; 1927 } 1928 /* and, yeah, we need a multicast-capable iface too */ 1929 if (ia->ia_ifp != &sc->sc_if && 1930 ia->ia_ifp->if_type != IFT_CARP && 1931 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1932 (i == 4)) { 1933 if (!ia_if) 1934 ia_if = ia; 1935 } 1936 } 1937 pserialize_read_exit(s); 1938 1939 if (ia_if) { 1940 ia = ia_if; 1941 if (sc->sc_carpdev) { 1942 if (sc->sc_carpdev != ia->ia_ifp) 1943 return (EADDRNOTAVAIL); 1944 } else { 1945 ifp = ia->ia_ifp; 1946 } 1947 } 1948 1949 if ((error = carp_set_ifp(sc, ifp))) 1950 return (error); 1951 1952 if (sc->sc_carpdev == NULL) 1953 return (EADDRNOTAVAIL); 1954 1955 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1956 return (error); 1957 1958 sc->sc_naddrs6++; 1959 if (sc->sc_carpdev != NULL) 1960 sc->sc_if.if_flags |= IFF_UP; 1961 carp_set_state(sc, INIT); 1962 carp_setrun(sc, 0); 1963 1964 return (0); 1965 } 1966 1967 static int 1968 carp_join_multicast6(struct carp_softc *sc) 1969 { 1970 struct in6_multi_mship *imm, *imm2; 1971 struct ip6_moptions *im6o = &sc->sc_im6o; 1972 struct sockaddr_in6 addr6; 1973 int error; 1974 1975 /* Join IPv6 CARP multicast group */ 1976 memset(&addr6, 0, sizeof(addr6)); 1977 addr6.sin6_family = AF_INET6; 1978 addr6.sin6_len = sizeof(addr6); 1979 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1980 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1981 addr6.sin6_addr.s6_addr8[15] = 0x12; 1982 if ((imm = in6_joingroup(&sc->sc_if, 1983 &addr6.sin6_addr, &error, 0)) == NULL) { 1984 return (error); 1985 } 1986 /* join solicited multicast address */ 1987 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1988 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1989 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1990 addr6.sin6_addr.s6_addr32[1] = 0; 1991 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1992 addr6.sin6_addr.s6_addr32[3] = 0; 1993 addr6.sin6_addr.s6_addr8[12] = 0xff; 1994 if ((imm2 = in6_joingroup(&sc->sc_if, 1995 &addr6.sin6_addr, &error, 0)) == NULL) { 1996 in6_leavegroup(imm); 1997 return (error); 1998 } 1999 2000 /* apply v6 multicast membership */ 2001 im6o->im6o_multicast_if_index = sc->sc_if.if_index; 2002 if (imm) 2003 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2004 i6mm_chain); 2005 if (imm2) 2006 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2007 i6mm_chain); 2008 2009 return (0); 2010 } 2011 2012 #endif /* INET6 */ 2013 2014 static int 2015 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data) 2016 { 2017 struct lwp *l = curlwp; /* XXX */ 2018 struct carp_softc *sc = ifp->if_softc, *vr; 2019 struct carpreq carpr; 2020 struct ifaddr *ifa; 2021 struct ifreq *ifr; 2022 struct ifnet *cdev = NULL; 2023 int error = 0; 2024 2025 ifa = (struct ifaddr *)data; 2026 ifr = (struct ifreq *)data; 2027 2028 switch (cmd) { 2029 case SIOCINITIFADDR: 2030 switch (ifa->ifa_addr->sa_family) { 2031 #ifdef INET 2032 case AF_INET: 2033 sc->sc_if.if_flags |= IFF_UP; 2034 memcpy(ifa->ifa_dstaddr, ifa->ifa_addr, 2035 sizeof(struct sockaddr)); 2036 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2037 break; 2038 #endif /* INET */ 2039 #ifdef INET6 2040 case AF_INET6: 2041 sc->sc_if.if_flags|= IFF_UP; 2042 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2043 break; 2044 #endif /* INET6 */ 2045 default: 2046 error = EAFNOSUPPORT; 2047 break; 2048 } 2049 break; 2050 2051 case SIOCSIFFLAGS: 2052 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 2053 break; 2054 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2055 callout_stop(&sc->sc_ad_tmo); 2056 callout_stop(&sc->sc_md_tmo); 2057 callout_stop(&sc->sc_md6_tmo); 2058 if (sc->sc_state == MASTER) { 2059 /* we need the interface up to bow out */ 2060 sc->sc_if.if_flags |= IFF_UP; 2061 sc->sc_bow_out = 1; 2062 carp_send_ad(sc); 2063 } 2064 sc->sc_if.if_flags &= ~IFF_UP; 2065 carp_set_state(sc, INIT); 2066 carp_setrun(sc, 0); 2067 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 2068 sc->sc_if.if_flags |= IFF_UP; 2069 carp_setrun(sc, 0); 2070 } 2071 break; 2072 2073 case SIOCSVH: 2074 if (l == NULL) 2075 break; 2076 if ((error = kauth_authorize_network(l->l_cred, 2077 KAUTH_NETWORK_INTERFACE, 2078 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2079 NULL)) != 0) 2080 break; 2081 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2082 break; 2083 error = 1; 2084 if (carpr.carpr_carpdev[0] != '\0' && 2085 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2086 return (EINVAL); 2087 if ((error = carp_set_ifp(sc, cdev))) 2088 return (error); 2089 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 2090 switch (carpr.carpr_state) { 2091 case BACKUP: 2092 callout_stop(&sc->sc_ad_tmo); 2093 carp_set_state(sc, BACKUP); 2094 carp_setrun(sc, 0); 2095 carp_setroute(sc, RTM_DELETE); 2096 break; 2097 case MASTER: 2098 carp_master_down(sc); 2099 break; 2100 default: 2101 break; 2102 } 2103 } 2104 if (carpr.carpr_vhid > 0) { 2105 if (carpr.carpr_vhid > 255) { 2106 error = EINVAL; 2107 break; 2108 } 2109 if (sc->sc_carpdev) { 2110 struct carp_if *cif; 2111 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2112 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 2113 if (vr != sc && 2114 vr->sc_vhid == carpr.carpr_vhid) 2115 return (EINVAL); 2116 } 2117 sc->sc_vhid = carpr.carpr_vhid; 2118 carp_set_enaddr(sc); 2119 carp_set_state(sc, INIT); 2120 error--; 2121 } 2122 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 2123 if (carpr.carpr_advskew > 254) { 2124 error = EINVAL; 2125 break; 2126 } 2127 if (carpr.carpr_advbase > 255) { 2128 error = EINVAL; 2129 break; 2130 } 2131 sc->sc_advbase = carpr.carpr_advbase; 2132 sc->sc_advskew = carpr.carpr_advskew; 2133 error--; 2134 } 2135 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key)); 2136 if (error > 0) 2137 error = EINVAL; 2138 else { 2139 error = 0; 2140 carp_setrun(sc, 0); 2141 } 2142 break; 2143 2144 case SIOCGVH: 2145 memset(&carpr, 0, sizeof(carpr)); 2146 if (sc->sc_carpdev != NULL) 2147 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2148 IFNAMSIZ); 2149 carpr.carpr_state = sc->sc_state; 2150 carpr.carpr_vhid = sc->sc_vhid; 2151 carpr.carpr_advbase = sc->sc_advbase; 2152 carpr.carpr_advskew = sc->sc_advskew; 2153 2154 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred, 2155 KAUTH_NETWORK_INTERFACE, 2156 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2157 NULL)) == 0) 2158 memcpy(carpr.carpr_key, sc->sc_key, 2159 sizeof(carpr.carpr_key)); 2160 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2161 break; 2162 2163 case SIOCADDMULTI: 2164 error = carp_ether_addmulti(sc, ifr); 2165 break; 2166 2167 case SIOCDELMULTI: 2168 error = carp_ether_delmulti(sc, ifr); 2169 break; 2170 2171 case SIOCSIFCAP: 2172 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 2173 error = 0; 2174 break; 2175 2176 default: 2177 error = ether_ioctl(ifp, cmd, data); 2178 } 2179 2180 carp_hmac_prepare(sc); 2181 return (error); 2182 } 2183 2184 2185 /* 2186 * Start output on carp interface. This function should never be called. 2187 */ 2188 static void 2189 carp_start(struct ifnet *ifp) 2190 { 2191 #ifdef DEBUG 2192 printf("%s: start called\n", ifp->if_xname); 2193 #endif 2194 } 2195 2196 int 2197 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa, 2198 const struct rtentry *rt) 2199 { 2200 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2201 KASSERT(KERNEL_LOCKED_P()); 2202 2203 if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) { 2204 return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt); 2205 } else { 2206 m_freem(m); 2207 return (ENETUNREACH); 2208 } 2209 } 2210 2211 static void 2212 carp_set_state(struct carp_softc *sc, int state) 2213 { 2214 static const char *carp_states[] = { CARP_STATES }; 2215 int link_state; 2216 2217 if (sc->sc_state == state) 2218 return; 2219 2220 CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state])); 2221 2222 sc->sc_state = state; 2223 switch (state) { 2224 case BACKUP: 2225 link_state = LINK_STATE_DOWN; 2226 break; 2227 case MASTER: 2228 link_state = LINK_STATE_UP; 2229 break; 2230 default: 2231 link_state = LINK_STATE_UNKNOWN; 2232 break; 2233 } 2234 /* 2235 * The lock is needed to serialize a call of 2236 * if_link_state_change_softint from here and a call from softint. 2237 */ 2238 KERNEL_LOCK(1, NULL); 2239 if_link_state_change_softint(&sc->sc_if, link_state); 2240 KERNEL_UNLOCK_ONE(NULL); 2241 } 2242 2243 void 2244 carp_carpdev_state(void *v) 2245 { 2246 struct carp_if *cif; 2247 struct carp_softc *sc; 2248 struct ifnet *ifp = v; 2249 2250 if (ifp->if_type == IFT_CARP) 2251 return; 2252 2253 cif = (struct carp_if *)ifp->if_carp; 2254 2255 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2256 int suppressed = sc->sc_suppress; 2257 2258 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2259 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2260 sc->sc_if.if_flags &= ~IFF_RUNNING; 2261 callout_stop(&sc->sc_ad_tmo); 2262 callout_stop(&sc->sc_md_tmo); 2263 callout_stop(&sc->sc_md6_tmo); 2264 carp_set_state(sc, INIT); 2265 sc->sc_suppress = 1; 2266 carp_setrun(sc, 0); 2267 if (!suppressed) { 2268 carp_suppress_preempt++; 2269 if (carp_suppress_preempt == 1) 2270 carp_send_ad_all(); 2271 } 2272 } else { 2273 carp_set_state(sc, INIT); 2274 sc->sc_suppress = 0; 2275 carp_setrun(sc, 0); 2276 if (suppressed) 2277 carp_suppress_preempt--; 2278 } 2279 } 2280 } 2281 2282 static int 2283 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2284 { 2285 const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr); 2286 struct ifnet *ifp; 2287 struct carp_mc_entry *mc; 2288 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2289 int error; 2290 2291 ifp = sc->sc_carpdev; 2292 if (ifp == NULL) 2293 return (EINVAL); 2294 2295 error = ether_addmulti(sa, &sc->sc_ac); 2296 if (error != ENETRESET) 2297 return (error); 2298 2299 /* 2300 * This is new multicast address. We have to tell parent 2301 * about it. Also, remember this multicast address so that 2302 * we can delete them on unconfigure. 2303 */ 2304 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2305 if (mc == NULL) { 2306 error = ENOMEM; 2307 goto alloc_failed; 2308 } 2309 2310 /* 2311 * As ether_addmulti() returns ENETRESET, following two 2312 * statement shouldn't fail. 2313 */ 2314 (void)ether_multiaddr(sa, addrlo, addrhi); 2315 2316 ETHER_LOCK(&sc->sc_ac); 2317 mc->mc_enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac); 2318 ETHER_UNLOCK(&sc->sc_ac); 2319 2320 memcpy(&mc->mc_addr, sa, sa->sa_len); 2321 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2322 2323 error = if_mcast_op(ifp, SIOCADDMULTI, sa); 2324 if (error != 0) 2325 goto ioctl_failed; 2326 2327 return (error); 2328 2329 ioctl_failed: 2330 LIST_REMOVE(mc, mc_entries); 2331 free(mc, M_DEVBUF); 2332 alloc_failed: 2333 (void)ether_delmulti(sa, &sc->sc_ac); 2334 2335 return (error); 2336 } 2337 2338 static int 2339 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2340 { 2341 const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr); 2342 struct ifnet *ifp; 2343 struct ether_multi *enm; 2344 struct carp_mc_entry *mc; 2345 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2346 int error; 2347 2348 ifp = sc->sc_carpdev; 2349 if (ifp == NULL) 2350 return (EINVAL); 2351 2352 /* 2353 * Find a key to lookup carp_mc_entry. We have to do this 2354 * before calling ether_delmulti for obvious reason. 2355 */ 2356 if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0) 2357 return (error); 2358 2359 ETHER_LOCK(&sc->sc_ac); 2360 enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac); 2361 ETHER_UNLOCK(&sc->sc_ac); 2362 if (enm == NULL) 2363 return (EINVAL); 2364 2365 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2366 if (mc->mc_enm == enm) 2367 break; 2368 2369 /* We won't delete entries we didn't add */ 2370 if (mc == NULL) 2371 return (EINVAL); 2372 2373 error = ether_delmulti(sa, &sc->sc_ac); 2374 if (error != ENETRESET) 2375 return (error); 2376 2377 /* We no longer use this multicast address. Tell parent so. */ 2378 error = if_mcast_op(ifp, SIOCDELMULTI, sa); 2379 if (error == 0) { 2380 /* And forget about this address. */ 2381 LIST_REMOVE(mc, mc_entries); 2382 free(mc, M_DEVBUF); 2383 } else 2384 (void)ether_addmulti(sa, &sc->sc_ac); 2385 return (error); 2386 } 2387 2388 /* 2389 * Delete any multicast address we have asked to add from parent 2390 * interface. Called when the carp is being unconfigured. 2391 */ 2392 static void 2393 carp_ether_purgemulti(struct carp_softc *sc) 2394 { 2395 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2396 struct carp_mc_entry *mc; 2397 2398 if (ifp == NULL) 2399 return; 2400 2401 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2402 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr)); 2403 LIST_REMOVE(mc, mc_entries); 2404 free(mc, M_DEVBUF); 2405 } 2406 } 2407 2408 static int 2409 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS) 2410 { 2411 2412 return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS)); 2413 } 2414 2415 void 2416 carp_init(void) 2417 { 2418 2419 sysctl_net_inet_carp_setup(NULL); 2420 #ifdef MBUFTRACE 2421 MOWNER_ATTACH(&carp_proto_mowner_rx); 2422 MOWNER_ATTACH(&carp_proto_mowner_tx); 2423 MOWNER_ATTACH(&carp_proto6_mowner_rx); 2424 MOWNER_ATTACH(&carp_proto6_mowner_tx); 2425 #endif 2426 2427 carp_wqinput = wqinput_create("carp", _carp_proto_input); 2428 #ifdef INET6 2429 carp6_wqinput = wqinput_create("carp6", _carp6_proto_input); 2430 #endif 2431 } 2432 2433 static void 2434 sysctl_net_inet_carp_setup(struct sysctllog **clog) 2435 { 2436 2437 sysctl_createv(clog, 0, NULL, NULL, 2438 CTLFLAG_PERMANENT, 2439 CTLTYPE_NODE, "inet", NULL, 2440 NULL, 0, NULL, 0, 2441 CTL_NET, PF_INET, CTL_EOL); 2442 sysctl_createv(clog, 0, NULL, NULL, 2443 CTLFLAG_PERMANENT, 2444 CTLTYPE_NODE, "carp", 2445 SYSCTL_DESCR("CARP related settings"), 2446 NULL, 0, NULL, 0, 2447 CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL); 2448 2449 sysctl_createv(clog, 0, NULL, NULL, 2450 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2451 CTLTYPE_INT, "preempt", 2452 SYSCTL_DESCR("Enable CARP Preempt"), 2453 NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0, 2454 CTL_NET, PF_INET, IPPROTO_CARP, 2455 CTL_CREATE, CTL_EOL); 2456 sysctl_createv(clog, 0, NULL, NULL, 2457 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2458 CTLTYPE_INT, "arpbalance", 2459 SYSCTL_DESCR("Enable ARP balancing"), 2460 NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0, 2461 CTL_NET, PF_INET, IPPROTO_CARP, 2462 CTL_CREATE, CTL_EOL); 2463 sysctl_createv(clog, 0, NULL, NULL, 2464 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2465 CTLTYPE_INT, "allow", 2466 SYSCTL_DESCR("Enable CARP"), 2467 NULL, 0, &carp_opts[CARPCTL_ALLOW], 0, 2468 CTL_NET, PF_INET, IPPROTO_CARP, 2469 CTL_CREATE, CTL_EOL); 2470 sysctl_createv(clog, 0, NULL, NULL, 2471 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2472 CTLTYPE_INT, "log", 2473 SYSCTL_DESCR("CARP logging"), 2474 NULL, 0, &carp_opts[CARPCTL_LOG], 0, 2475 CTL_NET, PF_INET, IPPROTO_CARP, 2476 CTL_CREATE, CTL_EOL); 2477 sysctl_createv(clog, 0, NULL, NULL, 2478 CTLFLAG_PERMANENT, 2479 CTLTYPE_STRUCT, "stats", 2480 SYSCTL_DESCR("CARP statistics"), 2481 sysctl_net_inet_carp_stats, 0, NULL, 0, 2482 CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS, 2483 CTL_EOL); 2484 } 2485