1 /* $NetBSD: ip_carp.c,v 1.121 2024/12/20 00:49:08 rin Exp $ */ 2 /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */ 3 4 /* 5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 6 * Copyright (c) 2003 Ryan McBride. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifdef _KERNEL_OPT 31 #include "opt_inet.h" 32 #include "opt_mbuftrace.h" 33 #endif 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.121 2024/12/20 00:49:08 rin Exp $"); 37 38 /* 39 * TODO: 40 * - iface reconfigure 41 * - support for hardware checksum calculations; 42 * 43 */ 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/mbuf.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/callout.h> 51 #include <sys/ioctl.h> 52 #include <sys/errno.h> 53 #include <sys/device.h> 54 #include <sys/time.h> 55 #include <sys/kernel.h> 56 #include <sys/kauth.h> 57 #include <sys/sysctl.h> 58 #include <sys/ucred.h> 59 #include <sys/syslog.h> 60 #include <sys/acct.h> 61 #include <sys/cprng.h> 62 #include <sys/cpu.h> 63 #include <sys/pserialize.h> 64 #include <sys/psref.h> 65 66 #include <net/if.h> 67 #include <net/pfil.h> 68 #include <net/if_types.h> 69 #include <net/if_ether.h> 70 #include <net/route.h> 71 #include <net/net_stats.h> 72 #include <netinet/if_inarp.h> 73 #include <netinet/wqinput.h> 74 75 #ifdef INET 76 #include <netinet/in.h> 77 #include <netinet/in_systm.h> 78 #include <netinet/in_var.h> 79 #include <netinet/ip.h> 80 #include <netinet/ip_var.h> 81 82 #include <net/if_dl.h> 83 #endif 84 85 #ifdef INET6 86 #include <netinet/icmp6.h> 87 #include <netinet/ip6.h> 88 #include <netinet6/ip6_var.h> 89 #include <netinet6/nd6.h> 90 #include <netinet6/scope6_var.h> 91 #include <netinet6/in6_var.h> 92 #endif 93 94 #include <net/bpf.h> 95 96 #include <sys/sha1.h> 97 98 #include <netinet/ip_carp.h> 99 100 #include "ioconf.h" 101 102 struct carp_mc_entry { 103 LIST_ENTRY(carp_mc_entry) mc_entries; 104 union { 105 struct ether_multi *mcu_enm; 106 } mc_u; 107 struct sockaddr_storage mc_addr; 108 }; 109 #define mc_enm mc_u.mcu_enm 110 111 struct carp_softc { 112 struct ethercom sc_ac; 113 #define sc_if sc_ac.ec_if 114 #define sc_carpdev sc_ac.ec_if.if_carpdev 115 void *sc_linkstate_hook; 116 int ah_cookie; 117 int lh_cookie; 118 struct ip_moptions sc_imo; 119 #ifdef INET6 120 struct ip6_moptions sc_im6o; 121 #endif /* INET6 */ 122 TAILQ_ENTRY(carp_softc) sc_list; 123 124 enum { INIT = 0, BACKUP, MASTER } sc_state; 125 126 int sc_suppress; 127 int sc_bow_out; 128 129 int sc_sendad_errors; 130 #define CARP_SENDAD_MAX_ERRORS 3 131 int sc_sendad_success; 132 #define CARP_SENDAD_MIN_SUCCESS 3 133 134 int sc_vhid; 135 int sc_advskew; 136 int sc_naddrs; 137 int sc_naddrs6; 138 int sc_advbase; /* seconds */ 139 int sc_init_counter; 140 u_int64_t sc_counter; 141 142 /* authentication */ 143 #define CARP_HMAC_PAD 64 144 unsigned char sc_key[CARP_KEY_LEN]; 145 unsigned char sc_pad[CARP_HMAC_PAD]; 146 SHA1_CTX sc_sha1; 147 u_int32_t sc_hashkey[2]; 148 149 struct callout sc_ad_tmo; /* advertisement timeout */ 150 struct callout sc_md_tmo; /* master down timeout */ 151 struct callout sc_md6_tmo; /* master down timeout */ 152 153 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 154 }; 155 156 int carp_suppress_preempt = 0; 157 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */ 158 159 static percpu_t *carpstat_percpu; 160 161 #define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x) 162 163 #ifdef MBUFTRACE 164 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx"); 165 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx"); 166 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx"); 167 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx"); 168 #endif 169 170 struct carp_if { 171 TAILQ_HEAD(, carp_softc) vhif_vrs; 172 int vhif_nvrs; 173 174 struct ifnet *vhif_ifp; 175 }; 176 177 #define CARP_LOG(sc, s) \ 178 if (carp_opts[CARPCTL_LOG]) { \ 179 if (sc) \ 180 log(LOG_INFO, "%s: ", \ 181 (sc)->sc_if.if_xname); \ 182 else \ 183 log(LOG_INFO, "carp: "); \ 184 addlog s; \ 185 addlog("\n"); \ 186 } 187 188 static void carp_hmac_prepare(struct carp_softc *); 189 static void carp_hmac_generate(struct carp_softc *, u_int32_t[2], 190 unsigned char[20]); 191 static int carp_hmac_verify(struct carp_softc *, u_int32_t[2], 192 unsigned char[20]); 193 static void carp_setroute(struct carp_softc *, int); 194 static void carp_proto_input_c(struct mbuf *, struct carp_header *, 195 sa_family_t); 196 static void carpdetach(struct carp_softc *); 197 static void carp_prepare_ad(struct mbuf *, struct carp_softc *, 198 struct carp_header *); 199 static void carp_send_ad_all(void); 200 static void carp_send_ad(void *); 201 static void carp_send_arp(struct carp_softc *); 202 static void carp_master_down(void *); 203 static int carp_ioctl(struct ifnet *, u_long, void *); 204 static void carp_start(struct ifnet *); 205 static void carp_setrun(struct carp_softc *, sa_family_t); 206 static void carp_set_state(struct carp_softc *, int); 207 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 208 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 209 210 static void carp_multicast_cleanup(struct carp_softc *); 211 static int carp_set_ifp(struct carp_softc *, struct ifnet *); 212 static void carp_set_enaddr(struct carp_softc *); 213 #if 0 214 static void carp_addr_updated(void *); 215 #endif 216 static u_int32_t carp_hash(struct carp_softc *, u_char *); 217 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 218 static int carp_join_multicast(struct carp_softc *); 219 #ifdef INET6 220 static void carp_send_na(struct carp_softc *); 221 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 222 static int carp_join_multicast6(struct carp_softc *); 223 #endif 224 static int carp_clone_create(struct if_clone *, int); 225 static int carp_clone_destroy(struct ifnet *); 226 static int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 227 static int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 228 static void carp_ether_purgemulti(struct carp_softc *); 229 static void carp_update_link_state(struct carp_softc *sc); 230 231 static void sysctl_net_inet_carp_setup(struct sysctllog **); 232 233 /* workqueue-based pr_input */ 234 static struct wqinput *carp_wqinput; 235 static void _carp_proto_input(struct mbuf *, int, int); 236 #ifdef INET6 237 static struct wqinput *carp6_wqinput; 238 static void _carp6_proto_input(struct mbuf *, int, int); 239 #endif 240 241 struct if_clone carp_cloner = 242 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 243 244 static __inline u_int16_t 245 carp_cksum(struct mbuf *m, int len) 246 { 247 return (in_cksum(m, len)); 248 } 249 250 #ifdef INET6 251 static __inline u_int16_t 252 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len) 253 { 254 return (in6_cksum(m, IPPROTO_CARP, off, len)); 255 } 256 #endif 257 258 static void 259 carp_hmac_prepare(struct carp_softc *sc) 260 { 261 u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT; 262 u_int8_t vhid = sc->sc_vhid & 0xff; 263 SHA1_CTX sha1ctx; 264 u_int32_t kmd[5]; 265 struct ifaddr *ifa; 266 int i, found; 267 struct in_addr last, cur, in; 268 #ifdef INET6 269 struct in6_addr last6, cur6, in6; 270 #endif /* INET6 */ 271 272 /* compute ipad from key */ 273 memset(sc->sc_pad, 0, sizeof(sc->sc_pad)); 274 memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key)); 275 for (i = 0; i < sizeof(sc->sc_pad); i++) 276 sc->sc_pad[i] ^= 0x36; 277 278 /* precompute first part of inner hash */ 279 SHA1Init(&sc->sc_sha1); 280 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 281 SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version)); 282 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 283 284 /* generate a key for the arpbalance hash, before the vhid is hashed */ 285 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 286 SHA1Final((unsigned char *)kmd, &sha1ctx); 287 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 288 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 289 290 /* the rest of the precomputation */ 291 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 292 293 /* Hash the addresses from smallest to largest, not interface order */ 294 #ifdef INET 295 cur.s_addr = 0; 296 do { 297 int s; 298 found = 0; 299 last = cur; 300 cur.s_addr = 0xffffffff; 301 s = pserialize_read_enter(); 302 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 303 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 304 if (ifa->ifa_addr->sa_family == AF_INET && 305 ntohl(in.s_addr) > ntohl(last.s_addr) && 306 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 307 cur.s_addr = in.s_addr; 308 found++; 309 } 310 } 311 pserialize_read_exit(s); 312 if (found) 313 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 314 } while (found); 315 #endif /* INET */ 316 317 #ifdef INET6 318 memset(&cur6, 0x00, sizeof(cur6)); 319 do { 320 int s; 321 found = 0; 322 last6 = cur6; 323 memset(&cur6, 0xff, sizeof(cur6)); 324 s = pserialize_read_enter(); 325 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 326 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 327 if (IN6_IS_ADDR_LINKLOCAL(&in6)) 328 in6.s6_addr16[1] = 0; 329 if (ifa->ifa_addr->sa_family == AF_INET6 && 330 memcmp(&in6, &last6, sizeof(in6)) > 0 && 331 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 332 cur6 = in6; 333 found++; 334 } 335 } 336 pserialize_read_exit(s); 337 if (found) 338 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 339 } while (found); 340 #endif /* INET6 */ 341 342 /* convert ipad to opad */ 343 for (i = 0; i < sizeof(sc->sc_pad); i++) 344 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 345 } 346 347 static void 348 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 349 unsigned char md[20]) 350 { 351 SHA1_CTX sha1ctx; 352 353 /* fetch first half of inner hash */ 354 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 355 356 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 357 SHA1Final(md, &sha1ctx); 358 359 /* outer hash */ 360 SHA1Init(&sha1ctx); 361 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 362 SHA1Update(&sha1ctx, md, 20); 363 SHA1Final(md, &sha1ctx); 364 } 365 366 static int 367 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 368 unsigned char md[20]) 369 { 370 unsigned char md2[20]; 371 372 carp_hmac_generate(sc, counter, md2); 373 374 return (memcmp(md, md2, sizeof(md2))); 375 } 376 377 static void 378 carp_setroute(struct carp_softc *sc, int cmd) 379 { 380 struct ifaddr *ifa; 381 int s, bound; 382 383 KERNEL_LOCK(1, NULL); 384 bound = curlwp_bind(); 385 s = pserialize_read_enter(); 386 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 387 struct psref psref; 388 ifa_acquire(ifa, &psref); 389 pserialize_read_exit(s); 390 391 switch (ifa->ifa_addr->sa_family) { 392 case AF_INET: { 393 int count = 0; 394 struct rtentry *rt; 395 int hr_otherif, nr_ourif; 396 397 /* 398 * Avoid screwing with the routes if there are other 399 * carp interfaces which are master and have the same 400 * address. 401 */ 402 if (sc->sc_carpdev != NULL && 403 sc->sc_carpdev->if_carp != NULL) { 404 count = carp_addrcount( 405 (struct carp_if *)sc->sc_carpdev->if_carp, 406 ifatoia(ifa), CARP_COUNT_MASTER); 407 if ((cmd == RTM_ADD && count != 1) || 408 (cmd == RTM_DELETE && count != 0)) 409 goto next; 410 } 411 412 /* Remove the existing host route, if any */ 413 rtrequest(RTM_DELETE, ifa->ifa_addr, 414 ifa->ifa_addr, ifa->ifa_netmask, 415 RTF_HOST, NULL); 416 417 rt = NULL; 418 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 419 ifa->ifa_netmask, RTF_HOST, &rt); 420 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && 421 (rt->rt_flags & RTF_CONNECTED)); 422 if (rt != NULL) { 423 rt_unref(rt); 424 rt = NULL; 425 } 426 427 /* Check for a network route on our interface */ 428 429 rt = NULL; 430 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 431 ifa->ifa_netmask, 0, &rt); 432 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if); 433 434 switch (cmd) { 435 case RTM_ADD: 436 if (hr_otherif) { 437 ifa->ifa_rtrequest = NULL; 438 ifa->ifa_flags &= ~RTF_CONNECTED; 439 440 rtrequest(RTM_ADD, ifa->ifa_addr, 441 ifa->ifa_addr, ifa->ifa_netmask, 442 RTF_UP | RTF_HOST, NULL); 443 } 444 if (!hr_otherif || nr_ourif || !rt) { 445 if (nr_ourif && 446 (rt->rt_flags & RTF_CONNECTED) == 0) 447 rtrequest(RTM_DELETE, 448 ifa->ifa_addr, 449 ifa->ifa_addr, 450 ifa->ifa_netmask, 0, NULL); 451 452 ifa->ifa_rtrequest = arp_rtrequest; 453 ifa->ifa_flags |= RTF_CONNECTED; 454 455 if (rtrequest(RTM_ADD, ifa->ifa_addr, 456 ifa->ifa_addr, ifa->ifa_netmask, 0, 457 NULL) == 0) 458 ifa->ifa_flags |= IFA_ROUTE; 459 } 460 break; 461 case RTM_DELETE: 462 break; 463 default: 464 break; 465 } 466 if (rt != NULL) { 467 rt_unref(rt); 468 rt = NULL; 469 } 470 break; 471 } 472 473 #ifdef INET6 474 case AF_INET6: 475 if (cmd == RTM_ADD) 476 in6_ifaddlocal(ifa); 477 else 478 in6_ifremlocal(ifa); 479 break; 480 #endif /* INET6 */ 481 default: 482 break; 483 } 484 next: 485 s = pserialize_read_enter(); 486 ifa_release(ifa, &psref); 487 } 488 pserialize_read_exit(s); 489 curlwp_bindx(bound); 490 KERNEL_UNLOCK_ONE(NULL); 491 } 492 493 /* 494 * process input packet. 495 * we have rearranged checks order compared to the rfc, 496 * but it seems more efficient this way or not possible otherwise. 497 */ 498 static void 499 _carp_proto_input(struct mbuf *m, int hlen, int proto) 500 { 501 struct ip *ip = mtod(m, struct ip *); 502 struct carp_softc *sc = NULL; 503 struct carp_header *ch; 504 int iplen, len; 505 struct ifnet *rcvif; 506 507 CARP_STATINC(CARP_STAT_IPACKETS); 508 MCLAIM(m, &carp_proto_mowner_rx); 509 510 if (!carp_opts[CARPCTL_ALLOW]) { 511 m_freem(m); 512 return; 513 } 514 515 rcvif = m_get_rcvif_NOMPSAFE(m); 516 /* check if received on a valid carp interface */ 517 if (rcvif->if_type != IFT_CARP) { 518 CARP_STATINC(CARP_STAT_BADIF); 519 CARP_LOG(sc, ("packet received on non-carp interface: %s", 520 rcvif->if_xname)); 521 m_freem(m); 522 return; 523 } 524 525 /* verify that the IP TTL is 255. */ 526 if (ip->ip_ttl != CARP_DFLTTL) { 527 CARP_STATINC(CARP_STAT_BADTTL); 528 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl, 529 CARP_DFLTTL, rcvif->if_xname)); 530 m_freem(m); 531 return; 532 } 533 534 /* 535 * verify that the received packet length is 536 * equal to the CARP header 537 */ 538 iplen = ip->ip_hl << 2; 539 len = iplen + sizeof(*ch); 540 if (len > m->m_pkthdr.len) { 541 CARP_STATINC(CARP_STAT_BADLEN); 542 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len, 543 rcvif->if_xname)); 544 m_freem(m); 545 return; 546 } 547 548 if ((m = m_pullup(m, len)) == NULL) { 549 CARP_STATINC(CARP_STAT_HDROPS); 550 return; 551 } 552 ip = mtod(m, struct ip *); 553 ch = (struct carp_header *)((char *)ip + iplen); 554 /* verify the CARP checksum */ 555 m->m_data += iplen; 556 if (carp_cksum(m, len - iplen)) { 557 CARP_STATINC(CARP_STAT_BADSUM); 558 CARP_LOG(sc, ("checksum failed on %s", 559 rcvif->if_xname)); 560 m_freem(m); 561 return; 562 } 563 m->m_data -= iplen; 564 565 carp_proto_input_c(m, ch, AF_INET); 566 } 567 568 void 569 carp_proto_input(struct mbuf *m, int off, int proto) 570 { 571 572 wqinput_input(carp_wqinput, m, 0, 0); 573 } 574 575 #ifdef INET6 576 static void 577 _carp6_proto_input(struct mbuf *m, int off, int proto) 578 { 579 struct carp_softc *sc = NULL; 580 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 581 struct carp_header *ch; 582 u_int len; 583 struct ifnet *rcvif; 584 585 CARP_STATINC(CARP_STAT_IPACKETS6); 586 MCLAIM(m, &carp_proto6_mowner_rx); 587 588 if (!carp_opts[CARPCTL_ALLOW]) { 589 m_freem(m); 590 return; 591 } 592 593 rcvif = m_get_rcvif_NOMPSAFE(m); 594 595 /* check if received on a valid carp interface */ 596 if (rcvif->if_type != IFT_CARP) { 597 CARP_STATINC(CARP_STAT_BADIF); 598 CARP_LOG(sc, ("packet received on non-carp interface: %s", 599 rcvif->if_xname)); 600 m_freem(m); 601 return; 602 } 603 604 /* verify that the IP TTL is 255 */ 605 if (ip6->ip6_hlim != CARP_DFLTTL) { 606 CARP_STATINC(CARP_STAT_BADTTL); 607 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, 608 CARP_DFLTTL, rcvif->if_xname)); 609 m_freem(m); 610 return; 611 } 612 613 /* verify that we have a complete carp packet */ 614 len = m->m_len; 615 M_REGION_GET(ch, struct carp_header *, m, off, sizeof(*ch)); 616 if (ch == NULL) { 617 CARP_STATINC(CARP_STAT_BADLEN); 618 CARP_LOG(sc, ("packet size %u too small", len)); 619 return; 620 } 621 622 /* verify the CARP checksum */ 623 if (carp6_cksum(m, off, sizeof(*ch))) { 624 CARP_STATINC(CARP_STAT_BADSUM); 625 CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname)); 626 m_freem(m); 627 return; 628 } 629 630 carp_proto_input_c(m, ch, AF_INET6); 631 return; 632 } 633 634 int 635 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 636 { 637 638 wqinput_input(carp6_wqinput, *mp, *offp, proto); 639 640 return IPPROTO_DONE; 641 } 642 #endif /* INET6 */ 643 644 static void 645 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 646 { 647 struct carp_softc *sc; 648 u_int64_t tmp_counter; 649 struct timeval sc_tv, ch_tv; 650 651 TAILQ_FOREACH(sc, &((struct carp_if *) 652 m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list) 653 if (sc->sc_vhid == ch->carp_vhid) 654 break; 655 656 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 657 (IFF_UP|IFF_RUNNING)) { 658 CARP_STATINC(CARP_STAT_BADVHID); 659 m_freem(m); 660 return; 661 } 662 663 /* 664 * Check if our own advertisement was duplicated 665 * from a non simplex interface. 666 * XXX If there is no address on our physical interface 667 * there is no way to distinguish our ads from the ones 668 * another carp host might have sent us. 669 */ 670 if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) { 671 struct sockaddr sa; 672 struct ifaddr *ifa; 673 int s; 674 675 memset(&sa, 0, sizeof(sa)); 676 sa.sa_family = af; 677 s = pserialize_read_enter(); 678 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 679 680 if (ifa && af == AF_INET) { 681 struct ip *ip = mtod(m, struct ip *); 682 if (ip->ip_src.s_addr == 683 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 684 pserialize_read_exit(s); 685 m_freem(m); 686 return; 687 } 688 } 689 #ifdef INET6 690 if (ifa && af == AF_INET6) { 691 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 692 struct in6_addr in6_src, in6_found; 693 694 in6_src = ip6->ip6_src; 695 in6_found = ifatoia6(ifa)->ia_addr.sin6_addr; 696 if (IN6_IS_ADDR_LINKLOCAL(&in6_src)) 697 in6_src.s6_addr16[1] = 0; 698 if (IN6_IS_ADDR_LINKLOCAL(&in6_found)) 699 in6_found.s6_addr16[1] = 0; 700 if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) { 701 pserialize_read_exit(s); 702 m_freem(m); 703 return; 704 } 705 } 706 #endif /* INET6 */ 707 pserialize_read_exit(s); 708 } 709 710 nanotime(&sc->sc_if.if_lastchange); 711 if_statadd2(&sc->sc_if, if_ipackets, 1, if_ibytes, m->m_pkthdr.len); 712 713 /* verify the CARP version. */ 714 if (ch->carp_version != CARP_VERSION) { 715 CARP_STATINC(CARP_STAT_BADVER); 716 if_statinc(&sc->sc_if, if_ierrors); 717 CARP_LOG(sc, ("invalid version %d != %d", 718 ch->carp_version, CARP_VERSION)); 719 m_freem(m); 720 return; 721 } 722 723 /* verify the hash */ 724 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 725 struct ip *ip; 726 char ipbuf[INET_ADDRSTRLEN]; 727 #ifdef INET6 728 struct ip6_hdr *ip6; 729 char ip6buf[INET6_ADDRSTRLEN]; 730 #endif 731 732 CARP_STATINC(CARP_STAT_BADAUTH); 733 if_statinc(&sc->sc_if, if_ierrors); 734 735 switch(af) { 736 case AF_INET: 737 ip = mtod(m, struct ip *); 738 CARP_LOG(sc, ("incorrect hash from %s", 739 IN_PRINT(ipbuf, &ip->ip_src))); 740 break; 741 742 #ifdef INET6 743 case AF_INET6: 744 ip6 = mtod(m, struct ip6_hdr *); 745 CARP_LOG(sc, ("incorrect hash from %s", 746 IN6_PRINT(ip6buf, &ip6->ip6_src))); 747 break; 748 #endif 749 750 default: CARP_LOG(sc, ("incorrect hash")); 751 break; 752 } 753 m_freem(m); 754 return; 755 } 756 757 tmp_counter = ntohl(ch->carp_counter[0]); 758 tmp_counter = tmp_counter<<32; 759 tmp_counter += ntohl(ch->carp_counter[1]); 760 761 /* XXX Replay protection goes here */ 762 763 sc->sc_init_counter = 0; 764 sc->sc_counter = tmp_counter; 765 766 767 sc_tv.tv_sec = sc->sc_advbase; 768 if (carp_suppress_preempt && sc->sc_advskew < 240) 769 sc_tv.tv_usec = 240 * 1000000 / 256; 770 else 771 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 772 ch_tv.tv_sec = ch->carp_advbase; 773 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 774 775 switch (sc->sc_state) { 776 case INIT: 777 break; 778 case MASTER: 779 /* 780 * If we receive an advertisement from a backup who's going to 781 * be more frequent than us, go into BACKUP state. 782 */ 783 if (timercmp(&sc_tv, &ch_tv, >) || 784 timercmp(&sc_tv, &ch_tv, ==)) { 785 callout_stop(&sc->sc_ad_tmo); 786 CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)")); 787 carp_set_state(sc, BACKUP); 788 carp_setrun(sc, 0); 789 carp_setroute(sc, RTM_DELETE); 790 } 791 break; 792 case BACKUP: 793 /* 794 * If we're pre-empting masters who advertise slower than us, 795 * and this one claims to be slower, treat him as down. 796 */ 797 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) { 798 CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)")); 799 carp_master_down(sc); 800 break; 801 } 802 803 /* 804 * If the master is going to advertise at such a low frequency 805 * that he's guaranteed to time out, we'd might as well just 806 * treat him as timed out now. 807 */ 808 sc_tv.tv_sec = sc->sc_advbase * 3; 809 if (timercmp(&sc_tv, &ch_tv, <)) { 810 CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)")); 811 carp_master_down(sc); 812 break; 813 } 814 815 /* 816 * Otherwise, we reset the counter and wait for the next 817 * advertisement. 818 */ 819 carp_setrun(sc, af); 820 break; 821 } 822 823 m_freem(m); 824 return; 825 } 826 827 /* 828 * Interface side of the CARP implementation. 829 */ 830 831 /* ARGSUSED */ 832 void 833 carpattach(int n) 834 { 835 if_clone_attach(&carp_cloner); 836 837 carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS); 838 } 839 840 static int 841 carp_clone_create(struct if_clone *ifc, int unit) 842 { 843 extern int ifqmaxlen; 844 struct carp_softc *sc; 845 struct ifnet *ifp; 846 847 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 848 if (!sc) 849 return (ENOMEM); 850 851 sc->sc_suppress = 0; 852 sc->sc_advbase = CARP_DFLTINTV; 853 sc->sc_vhid = -1; /* required setting */ 854 sc->sc_advskew = 0; 855 sc->sc_init_counter = 1; 856 sc->sc_naddrs = sc->sc_naddrs6 = 0; 857 #ifdef INET6 858 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 859 #endif /* INET6 */ 860 861 callout_init(&sc->sc_ad_tmo, 0); 862 callout_init(&sc->sc_md_tmo, 0); 863 callout_init(&sc->sc_md6_tmo, 0); 864 865 callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc); 866 callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc); 867 callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc); 868 869 LIST_INIT(&sc->carp_mc_listhead); 870 ifp = &sc->sc_if; 871 ifp->if_softc = sc; 872 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 873 unit); 874 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 875 ifp->if_ioctl = carp_ioctl; 876 ifp->if_start = carp_start; 877 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 878 IFQ_SET_READY(&ifp->if_snd); 879 if_initialize(ifp); 880 ether_ifattach(ifp, NULL); 881 /* Overwrite ethernet defaults */ 882 ifp->if_type = IFT_CARP; 883 ifp->if_output = carp_output; 884 ifp->if_link_state = LINK_STATE_DOWN; 885 carp_set_enaddr(sc); 886 if_register(ifp); 887 888 return (0); 889 } 890 891 static int 892 carp_clone_destroy(struct ifnet *ifp) 893 { 894 struct carp_softc *sc = ifp->if_softc; 895 896 carpdetach(ifp->if_softc); 897 ether_ifdetach(ifp); 898 if_detach(ifp); 899 callout_destroy(&sc->sc_ad_tmo); 900 callout_destroy(&sc->sc_md_tmo); 901 callout_destroy(&sc->sc_md6_tmo); 902 free(ifp->if_softc, M_DEVBUF); 903 904 return (0); 905 } 906 907 static void 908 carpdetach(struct carp_softc *sc) 909 { 910 struct ifnet *ifp; 911 struct carp_if *cif; 912 int s; 913 914 callout_stop(&sc->sc_ad_tmo); 915 callout_stop(&sc->sc_md_tmo); 916 callout_stop(&sc->sc_md6_tmo); 917 918 if (sc->sc_suppress) 919 carp_suppress_preempt--; 920 sc->sc_suppress = 0; 921 922 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 923 carp_suppress_preempt--; 924 sc->sc_sendad_errors = 0; 925 926 carp_set_state(sc, INIT); 927 sc->sc_if.if_flags &= ~IFF_UP; 928 carp_setrun(sc, 0); 929 carp_multicast_cleanup(sc); 930 931 KERNEL_LOCK(1, NULL); 932 s = splnet(); 933 ifp = sc->sc_carpdev; 934 if (ifp != NULL) { 935 if_linkstate_change_disestablish(ifp, 936 sc->sc_linkstate_hook, NULL); 937 938 cif = (struct carp_if *)ifp->if_carp; 939 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 940 if (!--cif->vhif_nvrs) { 941 ifpromisc(ifp, 0); 942 ifp->if_carp = NULL; 943 free(cif, M_IFADDR); 944 } 945 } 946 sc->sc_carpdev = NULL; 947 splx(s); 948 KERNEL_UNLOCK_ONE(NULL); 949 } 950 951 /* Detach an interface from the carp. */ 952 void 953 carp_ifdetach(struct ifnet *ifp) 954 { 955 struct carp_softc *sc, *nextsc; 956 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 957 958 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 959 nextsc = TAILQ_NEXT(sc, sc_list); 960 carpdetach(sc); 961 } 962 } 963 964 static void 965 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, 966 struct carp_header *ch) 967 { 968 if (sc->sc_init_counter) { 969 /* this could also be seconds since unix epoch */ 970 sc->sc_counter = cprng_fast64(); 971 } else 972 sc->sc_counter++; 973 974 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 975 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 976 977 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 978 } 979 980 static void 981 carp_send_ad_all(void) 982 { 983 struct ifnet *ifp; 984 struct carp_if *cif; 985 struct carp_softc *vh; 986 int s; 987 int bound = curlwp_bind(); 988 989 s = pserialize_read_enter(); 990 IFNET_READER_FOREACH(ifp) { 991 struct psref psref; 992 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 993 continue; 994 995 if_acquire(ifp, &psref); 996 pserialize_read_exit(s); 997 998 cif = (struct carp_if *)ifp->if_carp; 999 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1000 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1001 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER) 1002 carp_send_ad(vh); 1003 } 1004 1005 s = pserialize_read_enter(); 1006 if_release(ifp, &psref); 1007 } 1008 pserialize_read_exit(s); 1009 curlwp_bindx(bound); 1010 } 1011 1012 1013 static void 1014 carp_send_ad(void *v) 1015 { 1016 struct carp_header ch; 1017 struct timeval tv; 1018 struct carp_softc *sc = v; 1019 struct carp_header *ch_ptr; 1020 struct mbuf *m; 1021 int error, len, advbase, advskew, s; 1022 struct sockaddr sa; 1023 1024 KERNEL_LOCK(1, NULL); 1025 s = splsoftnet(); 1026 1027 advbase = advskew = 0; /* Sssssh compiler */ 1028 if (sc->sc_carpdev == NULL) { 1029 if_statinc(&sc->sc_if, if_oerrors); 1030 goto retry_later; 1031 } 1032 1033 /* bow out if we've gone to backup (the carp interface is going down) */ 1034 if (sc->sc_bow_out) { 1035 sc->sc_bow_out = 0; 1036 advbase = 255; 1037 advskew = 255; 1038 } else { 1039 advbase = sc->sc_advbase; 1040 if (!carp_suppress_preempt || sc->sc_advskew > 240) 1041 advskew = sc->sc_advskew; 1042 else 1043 advskew = 240; 1044 tv.tv_sec = advbase; 1045 tv.tv_usec = advskew * 1000000 / 256; 1046 } 1047 1048 ch.carp_version = CARP_VERSION; 1049 ch.carp_type = CARP_ADVERTISEMENT; 1050 ch.carp_vhid = sc->sc_vhid; 1051 ch.carp_advbase = advbase; 1052 ch.carp_advskew = advskew; 1053 ch.carp_authlen = 7; /* XXX DEFINE */ 1054 ch.carp_pad1 = 0; /* must be zero */ 1055 ch.carp_cksum = 0; 1056 1057 1058 #ifdef INET 1059 if (sc->sc_naddrs) { 1060 struct ip *ip; 1061 struct ifaddr *ifa; 1062 int _s; 1063 1064 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1065 if (m == NULL) { 1066 if_statinc(&sc->sc_if, if_oerrors); 1067 CARP_STATINC(CARP_STAT_ONOMEM); 1068 /* XXX maybe less ? */ 1069 goto retry_later; 1070 } 1071 MCLAIM(m, &carp_proto_mowner_tx); 1072 len = sizeof(*ip) + sizeof(ch); 1073 m->m_pkthdr.len = len; 1074 m_reset_rcvif(m); 1075 m->m_len = len; 1076 m_align(m, m->m_len); 1077 m->m_flags |= M_MCAST; 1078 ip = mtod(m, struct ip *); 1079 ip->ip_v = IPVERSION; 1080 ip->ip_hl = sizeof(*ip) >> 2; 1081 ip->ip_tos = IPTOS_LOWDELAY; 1082 ip->ip_len = htons(len); 1083 ip->ip_id = 0; /* no need for id, we don't support fragments */ 1084 ip->ip_off = htons(IP_DF); 1085 ip->ip_ttl = CARP_DFLTTL; 1086 ip->ip_p = IPPROTO_CARP; 1087 ip->ip_sum = 0; 1088 1089 memset(&sa, 0, sizeof(sa)); 1090 sa.sa_family = AF_INET; 1091 _s = pserialize_read_enter(); 1092 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1093 if (ifa == NULL) 1094 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1095 if (ifa == NULL) 1096 ip->ip_src.s_addr = 0; 1097 else 1098 ip->ip_src.s_addr = 1099 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1100 pserialize_read_exit(_s); 1101 ip->ip_dst.s_addr = INADDR_CARP_GROUP; 1102 1103 ch_ptr = (struct carp_header *)(&ip[1]); 1104 memcpy(ch_ptr, &ch, sizeof(ch)); 1105 carp_prepare_ad(m, sc, ch_ptr); 1106 1107 m->m_data += sizeof(*ip); 1108 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1109 m->m_data -= sizeof(*ip); 1110 1111 nanotime(&sc->sc_if.if_lastchange); 1112 if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len); 1113 CARP_STATINC(CARP_STAT_OPACKETS); 1114 1115 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1116 NULL); 1117 if (error) { 1118 if (error == ENOBUFS) 1119 CARP_STATINC(CARP_STAT_ONOMEM); 1120 else 1121 CARP_LOG(sc, ("ip_output failed: %d", error)); 1122 if_statinc(&sc->sc_if, if_oerrors); 1123 if (sc->sc_sendad_errors < INT_MAX) 1124 sc->sc_sendad_errors++; 1125 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1126 carp_suppress_preempt++; 1127 if (carp_suppress_preempt == 1) 1128 carp_send_ad_all(); 1129 } 1130 sc->sc_sendad_success = 0; 1131 } else { 1132 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1133 if (++sc->sc_sendad_success >= 1134 CARP_SENDAD_MIN_SUCCESS) { 1135 carp_suppress_preempt--; 1136 sc->sc_sendad_errors = 0; 1137 } 1138 } else 1139 sc->sc_sendad_errors = 0; 1140 } 1141 } 1142 #endif /* INET */ 1143 #ifdef INET6 1144 if (sc->sc_naddrs6) { 1145 struct ip6_hdr *ip6; 1146 struct ifaddr *ifa; 1147 struct ifnet *ifp; 1148 int _s; 1149 1150 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1151 if (m == NULL) { 1152 if_statinc(&sc->sc_if, if_oerrors); 1153 CARP_STATINC(CARP_STAT_ONOMEM); 1154 /* XXX maybe less ? */ 1155 goto retry_later; 1156 } 1157 MCLAIM(m, &carp_proto6_mowner_tx); 1158 len = sizeof(*ip6) + sizeof(ch); 1159 m->m_pkthdr.len = len; 1160 m_reset_rcvif(m); 1161 m->m_len = len; 1162 m_align(m, m->m_len); 1163 m->m_flags |= M_MCAST; 1164 ip6 = mtod(m, struct ip6_hdr *); 1165 memset(ip6, 0, sizeof(*ip6)); 1166 ip6->ip6_vfc |= IPV6_VERSION; 1167 ip6->ip6_hlim = CARP_DFLTTL; 1168 ip6->ip6_nxt = IPPROTO_CARP; 1169 1170 /* set the source address */ 1171 memset(&sa, 0, sizeof(sa)); 1172 sa.sa_family = AF_INET6; 1173 _s = pserialize_read_enter(); 1174 ifp = sc->sc_carpdev; 1175 ifa = ifaof_ifpforaddr(&sa, ifp); 1176 if (ifa == NULL) { /* This should never happen with IPv6 */ 1177 ifp = &sc->sc_if; 1178 ifa = ifaof_ifpforaddr(&sa, ifp); 1179 } 1180 if (ifa == NULL) /* This should never happen with IPv6 */ 1181 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr)); 1182 else 1183 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1184 &ip6->ip6_src, sizeof(struct in6_addr)); 1185 pserialize_read_exit(_s); 1186 /* set the multicast destination */ 1187 1188 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1189 ip6->ip6_dst.s6_addr8[15] = 0x12; 1190 if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) { 1191 if_statinc(&sc->sc_if, if_oerrors); 1192 m_freem(m); 1193 CARP_LOG(sc, ("in6_setscope failed")); 1194 goto retry_later; 1195 } 1196 1197 ch_ptr = (struct carp_header *)(&ip6[1]); 1198 memcpy(ch_ptr, &ch, sizeof(ch)); 1199 carp_prepare_ad(m, sc, ch_ptr); 1200 1201 ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6), 1202 len - sizeof(*ip6)); 1203 1204 nanotime(&sc->sc_if.if_lastchange); 1205 if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len); 1206 CARP_STATINC(CARP_STAT_OPACKETS6); 1207 1208 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1209 if (error) { 1210 if (error == ENOBUFS) 1211 CARP_STATINC(CARP_STAT_ONOMEM); 1212 else 1213 CARP_LOG(sc, ("ip6_output failed: %d", error)); 1214 if_statinc(&sc->sc_if, if_oerrors); 1215 if (sc->sc_sendad_errors < INT_MAX) 1216 sc->sc_sendad_errors++; 1217 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1218 carp_suppress_preempt++; 1219 if (carp_suppress_preempt == 1) 1220 carp_send_ad_all(); 1221 } 1222 sc->sc_sendad_success = 0; 1223 } else { 1224 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1225 if (++sc->sc_sendad_success >= 1226 CARP_SENDAD_MIN_SUCCESS) { 1227 carp_suppress_preempt--; 1228 sc->sc_sendad_errors = 0; 1229 } 1230 } else 1231 sc->sc_sendad_errors = 0; 1232 } 1233 } 1234 #endif /* INET6 */ 1235 1236 retry_later: 1237 splx(s); 1238 KERNEL_UNLOCK_ONE(NULL); 1239 if (advbase != 255 || advskew != 255) 1240 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1241 } 1242 1243 /* 1244 * Broadcast a gratuitous ARP request containing 1245 * the virtual router MAC address for each IP address 1246 * associated with the virtual router. 1247 */ 1248 static void 1249 carp_send_arp(struct carp_softc *sc) 1250 { 1251 struct ifaddr *ifa; 1252 int s, bound; 1253 1254 KERNEL_LOCK(1, NULL); 1255 bound = curlwp_bind(); 1256 s = pserialize_read_enter(); 1257 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1258 struct psref psref; 1259 1260 if (ifa->ifa_addr->sa_family != AF_INET) 1261 continue; 1262 1263 ifa_acquire(ifa, &psref); 1264 pserialize_read_exit(s); 1265 1266 arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl)); 1267 1268 s = pserialize_read_enter(); 1269 ifa_release(ifa, &psref); 1270 } 1271 pserialize_read_exit(s); 1272 curlwp_bindx(bound); 1273 KERNEL_UNLOCK_ONE(NULL); 1274 } 1275 1276 #ifdef INET6 1277 static void 1278 carp_send_na(struct carp_softc *sc) 1279 { 1280 struct ifaddr *ifa; 1281 struct in6_addr *in6; 1282 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1283 int s, bound; 1284 1285 KERNEL_LOCK(1, NULL); 1286 bound = curlwp_bind(); 1287 s = pserialize_read_enter(); 1288 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1289 struct psref psref; 1290 1291 if (ifa->ifa_addr->sa_family != AF_INET6) 1292 continue; 1293 1294 ifa_acquire(ifa, &psref); 1295 pserialize_read_exit(s); 1296 1297 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1298 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1299 ND_NA_FLAG_OVERRIDE, 1, NULL); 1300 1301 s = pserialize_read_enter(); 1302 ifa_release(ifa, &psref); 1303 } 1304 pserialize_read_exit(s); 1305 curlwp_bindx(bound); 1306 KERNEL_UNLOCK_ONE(NULL); 1307 } 1308 #endif /* INET6 */ 1309 1310 /* 1311 * Based on bridge_hash() in if_bridge.c 1312 */ 1313 #define mix(a,b,c) \ 1314 do { \ 1315 a -= b; a -= c; a ^= (c >> 13); \ 1316 b -= c; b -= a; b ^= (a << 8); \ 1317 c -= a; c -= b; c ^= (b >> 13); \ 1318 a -= b; a -= c; a ^= (c >> 12); \ 1319 b -= c; b -= a; b ^= (a << 16); \ 1320 c -= a; c -= b; c ^= (b >> 5); \ 1321 a -= b; a -= c; a ^= (c >> 3); \ 1322 b -= c; b -= a; b ^= (a << 10); \ 1323 c -= a; c -= b; c ^= (b >> 15); \ 1324 } while (0) 1325 1326 static u_int32_t 1327 carp_hash(struct carp_softc *sc, u_char *src) 1328 { 1329 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1330 1331 c += sc->sc_key[3] << 24; 1332 c += sc->sc_key[2] << 16; 1333 c += sc->sc_key[1] << 8; 1334 c += sc->sc_key[0]; 1335 b += src[5] << 8; 1336 b += src[4]; 1337 a += src[3] << 24; 1338 a += src[2] << 16; 1339 a += src[1] << 8; 1340 a += src[0]; 1341 1342 mix(a, b, c); 1343 return (c); 1344 } 1345 1346 static int 1347 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1348 { 1349 struct carp_softc *vh; 1350 struct ifaddr *ifa; 1351 int count = 0; 1352 1353 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1354 if ((type == CARP_COUNT_RUNNING && 1355 (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1356 (IFF_UP|IFF_RUNNING)) || 1357 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1358 int s = pserialize_read_enter(); 1359 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1360 if (ifa->ifa_addr->sa_family == AF_INET && 1361 ia->ia_addr.sin_addr.s_addr == 1362 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1363 count++; 1364 } 1365 pserialize_read_exit(s); 1366 } 1367 } 1368 return (count); 1369 } 1370 1371 int 1372 carp_iamatch(struct in_ifaddr *ia, u_char *src, 1373 u_int32_t *count, u_int32_t index) 1374 { 1375 struct carp_softc *sc = ia->ia_ifp->if_softc; 1376 1377 if (carp_opts[CARPCTL_ARPBALANCE]) { 1378 /* 1379 * We use the source ip to decide which virtual host should 1380 * handle the request. If we're master of that virtual host, 1381 * then we respond, otherwise, just drop the arp packet on 1382 * the floor. 1383 */ 1384 1385 /* Count the elegible carp interfaces with this address */ 1386 if (*count == 0) 1387 *count = carp_addrcount( 1388 (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp, 1389 ia, CARP_COUNT_RUNNING); 1390 1391 /* This should never happen, but... */ 1392 if (*count == 0) 1393 return (0); 1394 1395 if (carp_hash(sc, src) % *count == index - 1 && 1396 sc->sc_state == MASTER) { 1397 return (1); 1398 } 1399 } else { 1400 if (sc->sc_state == MASTER) 1401 return (1); 1402 } 1403 1404 return (0); 1405 } 1406 1407 #ifdef INET6 1408 struct ifaddr * 1409 carp_iamatch6(void *v, struct in6_addr *taddr) 1410 { 1411 struct carp_if *cif = v; 1412 struct carp_softc *vh; 1413 struct ifaddr *ifa; 1414 1415 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1416 int s = pserialize_read_enter(); 1417 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1418 if (IN6_ARE_ADDR_EQUAL(taddr, 1419 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1420 ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1421 (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER) 1422 return (ifa); 1423 } 1424 pserialize_read_exit(s); 1425 } 1426 1427 return (NULL); 1428 } 1429 #endif /* INET6 */ 1430 1431 struct ifnet * 1432 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src) 1433 { 1434 struct carp_if *cif = (struct carp_if *)v; 1435 struct carp_softc *vh; 1436 u_int8_t *ena; 1437 1438 if (src) 1439 ena = (u_int8_t *)&eh->ether_shost; 1440 else 1441 ena = (u_int8_t *)&eh->ether_dhost; 1442 1443 switch (iftype) { 1444 case IFT_ETHER: 1445 case IFT_FDDI: 1446 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1447 return (NULL); 1448 break; 1449 case IFT_ISO88025: 1450 if (ena[0] != 3 || ena[1] || ena[4] || ena[5]) 1451 return (NULL); 1452 break; 1453 default: 1454 return (NULL); 1455 break; 1456 } 1457 1458 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1459 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1460 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && 1461 !memcmp(ena, CLLADDR(vh->sc_if.if_sadl), 1462 ETHER_ADDR_LEN)) { 1463 return (&vh->sc_if); 1464 } 1465 1466 return (NULL); 1467 } 1468 1469 int 1470 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) 1471 { 1472 struct ether_header eh; 1473 struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp; 1474 struct ifnet *ifp; 1475 1476 memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost)); 1477 memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost)); 1478 eh.ether_type = etype; 1479 1480 if (m->m_flags & (M_BCAST|M_MCAST)) { 1481 struct carp_softc *vh; 1482 struct mbuf *m0; 1483 1484 /* 1485 * XXX Should really check the list of multicast addresses 1486 * for each CARP interface _before_ copying. 1487 */ 1488 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1489 m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT); 1490 if (m0 == NULL) 1491 continue; 1492 m_set_rcvif(m0, &vh->sc_if); 1493 ether_input(&vh->sc_if, m0); 1494 } 1495 return (1); 1496 } 1497 1498 ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0); 1499 if (ifp == NULL) { 1500 return (1); 1501 } 1502 1503 m_set_rcvif(m, ifp); 1504 1505 bpf_mtap(ifp, m, BPF_D_IN); 1506 if_statinc(ifp, if_ipackets); 1507 ether_input(ifp, m); 1508 return (0); 1509 } 1510 1511 static void 1512 carp_master_down(void *v) 1513 { 1514 struct carp_softc *sc = v; 1515 1516 switch (sc->sc_state) { 1517 case INIT: 1518 printf("%s: master_down event in INIT state\n", 1519 sc->sc_if.if_xname); 1520 break; 1521 case MASTER: 1522 break; 1523 case BACKUP: 1524 CARP_LOG(sc, ("INIT -> MASTER (preempting)")); 1525 carp_set_state(sc, MASTER); 1526 carp_send_ad(sc); 1527 carp_send_arp(sc); 1528 #ifdef INET6 1529 carp_send_na(sc); 1530 #endif /* INET6 */ 1531 carp_setrun(sc, 0); 1532 carp_setroute(sc, RTM_ADD); 1533 break; 1534 } 1535 } 1536 1537 /* 1538 * When in backup state, af indicates whether to reset the master down timer 1539 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1540 */ 1541 static void 1542 carp_setrun(struct carp_softc *sc, sa_family_t af) 1543 { 1544 struct timeval tv; 1545 1546 if (sc->sc_carpdev == NULL) { 1547 sc->sc_if.if_flags &= ~IFF_RUNNING; 1548 carp_set_state(sc, INIT); 1549 return; 1550 } 1551 1552 if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 && 1553 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1554 sc->sc_if.if_flags |= IFF_RUNNING; 1555 } else { 1556 sc->sc_if.if_flags &= ~IFF_RUNNING; 1557 carp_setroute(sc, RTM_DELETE); 1558 return; 1559 } 1560 1561 switch (sc->sc_state) { 1562 case INIT: 1563 carp_set_state(sc, BACKUP); 1564 carp_setroute(sc, RTM_DELETE); 1565 carp_setrun(sc, 0); 1566 break; 1567 case BACKUP: 1568 callout_stop(&sc->sc_ad_tmo); 1569 tv.tv_sec = 3 * sc->sc_advbase; 1570 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1571 switch (af) { 1572 #ifdef INET 1573 case AF_INET: 1574 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1575 break; 1576 #endif /* INET */ 1577 #ifdef INET6 1578 case AF_INET6: 1579 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1580 break; 1581 #endif /* INET6 */ 1582 default: 1583 if (sc->sc_naddrs) 1584 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1585 #ifdef INET6 1586 if (sc->sc_naddrs6) 1587 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1588 #endif /* INET6 */ 1589 break; 1590 } 1591 break; 1592 case MASTER: 1593 tv.tv_sec = sc->sc_advbase; 1594 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1595 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1596 break; 1597 } 1598 } 1599 1600 static void 1601 carp_multicast_cleanup(struct carp_softc *sc) 1602 { 1603 struct ip_moptions *imo = &sc->sc_imo; 1604 #ifdef INET6 1605 struct ip6_moptions *im6o = &sc->sc_im6o; 1606 #endif 1607 u_int16_t n = imo->imo_num_memberships; 1608 1609 /* Clean up our own multicast memberships */ 1610 while (n-- > 0) { 1611 if (imo->imo_membership[n] != NULL) { 1612 in_delmulti(imo->imo_membership[n]); 1613 imo->imo_membership[n] = NULL; 1614 } 1615 } 1616 imo->imo_num_memberships = 0; 1617 imo->imo_multicast_if_index = 0; 1618 1619 #ifdef INET6 1620 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1621 struct in6_multi_mship *imm = 1622 LIST_FIRST(&im6o->im6o_memberships); 1623 1624 LIST_REMOVE(imm, i6mm_chain); 1625 in6_leavegroup(imm); 1626 } 1627 im6o->im6o_multicast_if_index = 0; 1628 #endif 1629 1630 /* And any other multicast memberships */ 1631 carp_ether_purgemulti(sc); 1632 } 1633 1634 static int 1635 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1636 { 1637 struct carp_if *cif, *ncif = NULL; 1638 struct carp_softc *vr, *after = NULL; 1639 int myself = 0, error = 0; 1640 int s; 1641 1642 if (ifp == sc->sc_carpdev) 1643 return (0); 1644 1645 if (ifp != NULL) { 1646 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1647 return (EADDRNOTAVAIL); 1648 1649 if (ifp->if_type == IFT_CARP) 1650 return (EINVAL); 1651 1652 if (ifp->if_carp == NULL) { 1653 ncif = malloc(sizeof(*cif), M_IFADDR, M_WAITOK); 1654 if ((error = ifpromisc(ifp, 1))) { 1655 free(ncif, M_IFADDR); 1656 return (error); 1657 } 1658 1659 ncif->vhif_ifp = ifp; 1660 TAILQ_INIT(&ncif->vhif_vrs); 1661 } else { 1662 cif = (struct carp_if *)ifp->if_carp; 1663 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1664 if (vr != sc && vr->sc_vhid == sc->sc_vhid) 1665 return (EINVAL); 1666 } 1667 1668 /* detach from old interface */ 1669 if (sc->sc_carpdev != NULL) 1670 carpdetach(sc); 1671 1672 /* join multicast groups */ 1673 if (sc->sc_naddrs < 0 && 1674 (error = carp_join_multicast(sc)) != 0) { 1675 if (ncif != NULL) 1676 free(ncif, M_IFADDR); 1677 return (error); 1678 } 1679 1680 #ifdef INET6 1681 if (sc->sc_naddrs6 < 0 && 1682 (error = carp_join_multicast6(sc)) != 0) { 1683 if (ncif != NULL) 1684 free(ncif, M_IFADDR); 1685 carp_multicast_cleanup(sc); 1686 return (error); 1687 } 1688 #endif 1689 1690 /* attach carp interface to physical interface */ 1691 if (ncif != NULL) 1692 ifp->if_carp = (void *)ncif; 1693 sc->sc_carpdev = ifp; 1694 sc->sc_if.if_capabilities = ifp->if_capabilities & 1695 (IFCAP_TSOv4 | IFCAP_TSOv6 | 1696 IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx| 1697 IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx| 1698 IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx| 1699 IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx| 1700 IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx); 1701 1702 cif = (struct carp_if *)ifp->if_carp; 1703 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1704 if (vr == sc) 1705 myself = 1; 1706 if (vr->sc_vhid < sc->sc_vhid) 1707 after = vr; 1708 } 1709 1710 if (!myself) { 1711 /* We're trying to keep things in order */ 1712 if (after == NULL) { 1713 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1714 } else { 1715 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1716 sc, sc_list); 1717 } 1718 cif->vhif_nvrs++; 1719 } 1720 if (sc->sc_naddrs || sc->sc_naddrs6) 1721 sc->sc_if.if_flags |= IFF_UP; 1722 carp_set_enaddr(sc); 1723 sc->sc_linkstate_hook = if_linkstate_change_establish(ifp, 1724 carp_carpdev_state, (void *)ifp); 1725 KERNEL_LOCK(1, NULL); 1726 s = splnet(); 1727 carp_carpdev_state(ifp); 1728 splx(s); 1729 KERNEL_UNLOCK_ONE(NULL); 1730 } else { 1731 carpdetach(sc); 1732 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1733 } 1734 return (0); 1735 } 1736 1737 static void 1738 carp_set_enaddr(struct carp_softc *sc) 1739 { 1740 struct ifnet *ifp = &sc->sc_if; 1741 uint8_t enaddr[ETHER_ADDR_LEN]; 1742 1743 if (sc->sc_vhid == -1) { 1744 ifp->if_addrlen = 0; 1745 if_alloc_sadl(ifp); 1746 return; 1747 } 1748 1749 if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) { 1750 enaddr[0] = 3; 1751 enaddr[1] = 0; 1752 enaddr[2] = 0x40 >> (sc->sc_vhid - 1); 1753 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1); 1754 enaddr[4] = 0; 1755 enaddr[5] = 0; 1756 } else { 1757 enaddr[0] = 0; 1758 enaddr[1] = 0; 1759 enaddr[2] = 0x5e; 1760 enaddr[3] = 0; 1761 enaddr[4] = 1; 1762 enaddr[5] = sc->sc_vhid; 1763 } 1764 1765 if_set_sadl(ifp, enaddr, sizeof(enaddr), false); 1766 } 1767 1768 #if 0 1769 static void 1770 carp_addr_updated(void *v) 1771 { 1772 struct carp_softc *sc = (struct carp_softc *) v; 1773 struct ifaddr *ifa; 1774 int new_naddrs = 0, new_naddrs6 = 0; 1775 1776 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1777 if (ifa->ifa_addr->sa_family == AF_INET) 1778 new_naddrs++; 1779 else if (ifa->ifa_addr->sa_family == AF_INET6) 1780 new_naddrs6++; 1781 } 1782 1783 /* Handle a callback after SIOCDIFADDR */ 1784 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) { 1785 struct in_addr mc_addr; 1786 1787 sc->sc_naddrs = new_naddrs; 1788 sc->sc_naddrs6 = new_naddrs6; 1789 1790 /* Re-establish multicast membership removed by in_control */ 1791 mc_addr.s_addr = INADDR_CARP_GROUP; 1792 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) { 1793 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1794 1795 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1796 carp_join_multicast(sc); 1797 } 1798 1799 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1800 sc->sc_if.if_flags &= ~IFF_UP; 1801 carp_set_state(sc, INIT); 1802 } else 1803 carp_hmac_prepare(sc); 1804 } 1805 1806 carp_setrun(sc, 0); 1807 } 1808 #endif 1809 1810 static int 1811 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1812 { 1813 struct ifnet *ifp = sc->sc_carpdev; 1814 struct in_ifaddr *ia, *ia_if; 1815 int error = 0; 1816 int s; 1817 1818 if (sin->sin_addr.s_addr == 0) { 1819 if (!(sc->sc_if.if_flags & IFF_UP)) 1820 carp_set_state(sc, INIT); 1821 if (sc->sc_naddrs) 1822 sc->sc_if.if_flags |= IFF_UP; 1823 carp_setrun(sc, 0); 1824 return (0); 1825 } 1826 1827 /* we have to do this by hand to ensure we don't match on ourselves */ 1828 ia_if = NULL; 1829 s = pserialize_read_enter(); 1830 IN_ADDRLIST_READER_FOREACH(ia) { 1831 /* and, yeah, we need a multicast-capable iface too */ 1832 if (ia->ia_ifp != &sc->sc_if && 1833 ia->ia_ifp->if_type != IFT_CARP && 1834 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1835 (sin->sin_addr.s_addr & ia->ia_subnetmask) == 1836 ia->ia_subnet) { 1837 if (!ia_if) 1838 ia_if = ia; 1839 } 1840 } 1841 1842 if (ia_if) { 1843 ia = ia_if; 1844 if (ifp) { 1845 if (ifp != ia->ia_ifp) 1846 return (EADDRNOTAVAIL); 1847 } else { 1848 /* FIXME NOMPSAFE */ 1849 ifp = ia->ia_ifp; 1850 } 1851 } 1852 pserialize_read_exit(s); 1853 1854 if ((error = carp_set_ifp(sc, ifp))) 1855 return (error); 1856 1857 if (sc->sc_carpdev == NULL) 1858 return (EADDRNOTAVAIL); 1859 1860 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1861 return (error); 1862 1863 sc->sc_naddrs++; 1864 if (sc->sc_carpdev != NULL) 1865 sc->sc_if.if_flags |= IFF_UP; 1866 1867 carp_set_state(sc, INIT); 1868 carp_setrun(sc, 0); 1869 1870 /* 1871 * Hook if_addrhooks so that we get a callback after in_ifinit has run, 1872 * to correct any inappropriate routes that it inserted. 1873 */ 1874 if (sc->ah_cookie == 0) { 1875 /* XXX link address hook */ 1876 } 1877 1878 return (0); 1879 } 1880 1881 static int 1882 carp_join_multicast(struct carp_softc *sc) 1883 { 1884 struct ip_moptions *imo = &sc->sc_imo; 1885 struct in_multi *imm; 1886 struct in_addr addr; 1887 1888 if (sc->sc_carpdev == NULL) 1889 return (ENETDOWN); 1890 1891 addr.s_addr = INADDR_CARP_GROUP; 1892 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 1893 return (ENOBUFS); 1894 1895 imo->imo_membership[0] = imm; 1896 imo->imo_num_memberships = 1; 1897 imo->imo_multicast_if_index = sc->sc_carpdev->if_index; 1898 imo->imo_multicast_ttl = CARP_DFLTTL; 1899 imo->imo_multicast_loop = 0; 1900 return (0); 1901 } 1902 1903 1904 #ifdef INET6 1905 static int 1906 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1907 { 1908 struct ifnet *ifp = sc->sc_carpdev; 1909 struct in6_ifaddr *ia, *ia_if; 1910 int error = 0; 1911 int s; 1912 1913 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1914 if (!(sc->sc_if.if_flags & IFF_UP)) 1915 carp_set_state(sc, INIT); 1916 if (sc->sc_naddrs6) 1917 sc->sc_if.if_flags |= IFF_UP; 1918 carp_setrun(sc, 0); 1919 return (0); 1920 } 1921 1922 /* we have to do this by hand to ensure we don't match on ourselves */ 1923 ia_if = NULL; 1924 s = pserialize_read_enter(); 1925 IN6_ADDRLIST_READER_FOREACH(ia) { 1926 int i; 1927 1928 for (i = 0; i < 4; i++) { 1929 if ((sin6->sin6_addr.s6_addr32[i] & 1930 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1931 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1932 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1933 break; 1934 } 1935 /* and, yeah, we need a multicast-capable iface too */ 1936 if (ia->ia_ifp != &sc->sc_if && 1937 ia->ia_ifp->if_type != IFT_CARP && 1938 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1939 (i == 4)) { 1940 if (!ia_if) 1941 ia_if = ia; 1942 } 1943 } 1944 pserialize_read_exit(s); 1945 1946 if (ia_if) { 1947 ia = ia_if; 1948 if (sc->sc_carpdev) { 1949 if (sc->sc_carpdev != ia->ia_ifp) 1950 return (EADDRNOTAVAIL); 1951 } else { 1952 ifp = ia->ia_ifp; 1953 } 1954 } 1955 1956 if ((error = carp_set_ifp(sc, ifp))) 1957 return (error); 1958 1959 if (sc->sc_carpdev == NULL) 1960 return (EADDRNOTAVAIL); 1961 1962 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1963 return (error); 1964 1965 sc->sc_naddrs6++; 1966 if (sc->sc_carpdev != NULL) 1967 sc->sc_if.if_flags |= IFF_UP; 1968 carp_set_state(sc, INIT); 1969 carp_setrun(sc, 0); 1970 1971 return (0); 1972 } 1973 1974 static int 1975 carp_join_multicast6(struct carp_softc *sc) 1976 { 1977 struct in6_multi_mship *imm, *imm2; 1978 struct ip6_moptions *im6o = &sc->sc_im6o; 1979 struct sockaddr_in6 addr6; 1980 int error; 1981 1982 if (sc->sc_carpdev == NULL) 1983 return (ENETDOWN); 1984 1985 /* Join IPv6 CARP multicast group */ 1986 memset(&addr6, 0, sizeof(addr6)); 1987 addr6.sin6_family = AF_INET6; 1988 addr6.sin6_len = sizeof(addr6); 1989 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1990 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1991 addr6.sin6_addr.s6_addr8[15] = 0x12; 1992 if ((imm = in6_joingroup(&sc->sc_if, 1993 &addr6.sin6_addr, &error, 0)) == NULL) { 1994 return (error); 1995 } 1996 /* join solicited multicast address */ 1997 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1998 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1999 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2000 addr6.sin6_addr.s6_addr32[1] = 0; 2001 addr6.sin6_addr.s6_addr32[2] = htonl(1); 2002 addr6.sin6_addr.s6_addr32[3] = 0; 2003 addr6.sin6_addr.s6_addr8[12] = 0xff; 2004 if ((imm2 = in6_joingroup(&sc->sc_if, 2005 &addr6.sin6_addr, &error, 0)) == NULL) { 2006 in6_leavegroup(imm); 2007 return (error); 2008 } 2009 2010 /* apply v6 multicast membership */ 2011 im6o->im6o_multicast_if_index = sc->sc_carpdev->if_index; 2012 if (imm) 2013 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2014 i6mm_chain); 2015 if (imm2) 2016 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2017 i6mm_chain); 2018 2019 return (0); 2020 } 2021 2022 #endif /* INET6 */ 2023 2024 static int 2025 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data) 2026 { 2027 struct lwp *l = curlwp; /* XXX */ 2028 struct carp_softc *sc = ifp->if_softc, *vr; 2029 struct carpreq carpr; 2030 struct ifaddr *ifa; 2031 struct ifreq *ifr; 2032 struct ifnet *cdev = NULL; 2033 int error = 0; 2034 2035 ifa = (struct ifaddr *)data; 2036 ifr = (struct ifreq *)data; 2037 2038 switch (cmd) { 2039 case SIOCINITIFADDR: 2040 switch (ifa->ifa_addr->sa_family) { 2041 #ifdef INET 2042 case AF_INET: 2043 sc->sc_if.if_flags |= IFF_UP; 2044 memcpy(ifa->ifa_dstaddr, ifa->ifa_addr, 2045 sizeof(struct sockaddr)); 2046 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2047 break; 2048 #endif /* INET */ 2049 #ifdef INET6 2050 case AF_INET6: 2051 sc->sc_if.if_flags|= IFF_UP; 2052 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2053 break; 2054 #endif /* INET6 */ 2055 default: 2056 error = EAFNOSUPPORT; 2057 break; 2058 } 2059 break; 2060 2061 case SIOCSIFFLAGS: 2062 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 2063 break; 2064 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2065 callout_stop(&sc->sc_ad_tmo); 2066 callout_stop(&sc->sc_md_tmo); 2067 callout_stop(&sc->sc_md6_tmo); 2068 if (sc->sc_state == MASTER) { 2069 /* we need the interface up to bow out */ 2070 sc->sc_if.if_flags |= IFF_UP; 2071 sc->sc_bow_out = 1; 2072 carp_send_ad(sc); 2073 } 2074 sc->sc_if.if_flags &= ~IFF_UP; 2075 carp_set_state(sc, INIT); 2076 carp_setrun(sc, 0); 2077 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 2078 sc->sc_if.if_flags |= IFF_UP; 2079 carp_setrun(sc, 0); 2080 } 2081 carp_update_link_state(sc); 2082 break; 2083 2084 case SIOCSVH: 2085 if (l == NULL) 2086 break; 2087 if ((error = kauth_authorize_network(l->l_cred, 2088 KAUTH_NETWORK_INTERFACE, 2089 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2090 NULL)) != 0) 2091 break; 2092 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2093 break; 2094 error = 1; 2095 if (carpr.carpr_carpdev[0] != '\0' && 2096 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2097 return (EINVAL); 2098 if ((error = carp_set_ifp(sc, cdev))) 2099 return (error); 2100 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 2101 switch (carpr.carpr_state) { 2102 case BACKUP: 2103 callout_stop(&sc->sc_ad_tmo); 2104 carp_set_state(sc, BACKUP); 2105 carp_setrun(sc, 0); 2106 carp_setroute(sc, RTM_DELETE); 2107 break; 2108 case MASTER: 2109 carp_master_down(sc); 2110 break; 2111 default: 2112 break; 2113 } 2114 } 2115 if (carpr.carpr_vhid > 0) { 2116 if (carpr.carpr_vhid > 255) { 2117 error = EINVAL; 2118 break; 2119 } 2120 if (sc->sc_carpdev) { 2121 struct carp_if *cif; 2122 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2123 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 2124 if (vr != sc && 2125 vr->sc_vhid == carpr.carpr_vhid) 2126 return (EINVAL); 2127 } 2128 sc->sc_vhid = carpr.carpr_vhid; 2129 carp_set_enaddr(sc); 2130 carp_set_state(sc, INIT); 2131 error--; 2132 } 2133 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 2134 if (carpr.carpr_advskew > 254) { 2135 error = EINVAL; 2136 break; 2137 } 2138 if (carpr.carpr_advbase > 255) { 2139 error = EINVAL; 2140 break; 2141 } 2142 sc->sc_advbase = carpr.carpr_advbase; 2143 sc->sc_advskew = carpr.carpr_advskew; 2144 error--; 2145 } 2146 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key)); 2147 if (error > 0) 2148 error = EINVAL; 2149 else { 2150 error = 0; 2151 carp_setrun(sc, 0); 2152 } 2153 break; 2154 2155 case SIOCGVH: 2156 memset(&carpr, 0, sizeof(carpr)); 2157 if (sc->sc_carpdev != NULL) 2158 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2159 IFNAMSIZ); 2160 carpr.carpr_state = sc->sc_state; 2161 carpr.carpr_vhid = sc->sc_vhid; 2162 carpr.carpr_advbase = sc->sc_advbase; 2163 carpr.carpr_advskew = sc->sc_advskew; 2164 2165 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred, 2166 KAUTH_NETWORK_INTERFACE, 2167 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2168 NULL)) == 0) 2169 memcpy(carpr.carpr_key, sc->sc_key, 2170 sizeof(carpr.carpr_key)); 2171 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2172 break; 2173 2174 case SIOCADDMULTI: 2175 error = carp_ether_addmulti(sc, ifr); 2176 break; 2177 2178 case SIOCDELMULTI: 2179 error = carp_ether_delmulti(sc, ifr); 2180 break; 2181 2182 case SIOCSIFCAP: 2183 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 2184 error = 0; 2185 break; 2186 2187 default: 2188 error = ether_ioctl(ifp, cmd, data); 2189 } 2190 2191 carp_hmac_prepare(sc); 2192 return (error); 2193 } 2194 2195 2196 /* 2197 * Start output on carp interface. This function should never be called. 2198 */ 2199 static void 2200 carp_start(struct ifnet *ifp) 2201 { 2202 #ifdef DEBUG 2203 printf("%s: start called\n", ifp->if_xname); 2204 #endif 2205 } 2206 2207 int 2208 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa, 2209 const struct rtentry *rt) 2210 { 2211 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2212 KASSERT(KERNEL_LOCKED_P()); 2213 2214 if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) { 2215 return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt); 2216 } else { 2217 m_freem(m); 2218 return (ENETUNREACH); 2219 } 2220 } 2221 2222 static void 2223 carp_set_state(struct carp_softc *sc, int state) 2224 { 2225 static const char *carp_states[] = { CARP_STATES }; 2226 2227 if (sc->sc_state == state) 2228 return; 2229 2230 CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state])); 2231 2232 sc->sc_state = state; 2233 carp_update_link_state(sc); 2234 } 2235 2236 static void 2237 carp_update_link_state(struct carp_softc *sc) 2238 { 2239 int link_state; 2240 2241 switch (sc->sc_state) { 2242 case BACKUP: 2243 link_state = LINK_STATE_DOWN; 2244 break; 2245 case MASTER: 2246 link_state = LINK_STATE_UP; 2247 break; 2248 default: 2249 /* Not useable, so down makes perfect sense. */ 2250 link_state = LINK_STATE_DOWN; 2251 break; 2252 } 2253 if_link_state_change(&sc->sc_if, link_state); 2254 } 2255 2256 void 2257 carp_carpdev_state(void *v) 2258 { 2259 struct carp_if *cif; 2260 struct carp_softc *sc; 2261 struct ifnet *ifp = v; 2262 2263 if (ifp->if_type == IFT_CARP) 2264 return; 2265 2266 cif = (struct carp_if *)ifp->if_carp; 2267 2268 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2269 int suppressed = sc->sc_suppress; 2270 2271 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2272 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2273 sc->sc_if.if_flags &= ~IFF_RUNNING; 2274 callout_stop(&sc->sc_ad_tmo); 2275 callout_stop(&sc->sc_md_tmo); 2276 callout_stop(&sc->sc_md6_tmo); 2277 carp_set_state(sc, INIT); 2278 sc->sc_suppress = 1; 2279 carp_setrun(sc, 0); 2280 if (!suppressed) { 2281 carp_suppress_preempt++; 2282 if (carp_suppress_preempt == 1) 2283 carp_send_ad_all(); 2284 } 2285 } else { 2286 carp_set_state(sc, INIT); 2287 sc->sc_suppress = 0; 2288 carp_setrun(sc, 0); 2289 if (suppressed) 2290 carp_suppress_preempt--; 2291 } 2292 } 2293 } 2294 2295 static int 2296 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2297 { 2298 const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr); 2299 struct ifnet *ifp; 2300 struct carp_mc_entry *mc; 2301 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2302 int error; 2303 2304 ifp = sc->sc_carpdev; 2305 if (ifp == NULL) 2306 return (EINVAL); 2307 2308 error = ether_addmulti(sa, &sc->sc_ac); 2309 if (error != ENETRESET) 2310 return (error); 2311 2312 /* 2313 * This is new multicast address. We have to tell parent 2314 * about it. Also, remember this multicast address so that 2315 * we can delete them on unconfigure. 2316 */ 2317 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2318 if (mc == NULL) { 2319 error = ENOMEM; 2320 goto alloc_failed; 2321 } 2322 2323 /* 2324 * As ether_addmulti() returns ENETRESET, following two 2325 * statement shouldn't fail. 2326 */ 2327 (void)ether_multiaddr(sa, addrlo, addrhi); 2328 2329 ETHER_LOCK(&sc->sc_ac); 2330 mc->mc_enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac); 2331 ETHER_UNLOCK(&sc->sc_ac); 2332 2333 memcpy(&mc->mc_addr, sa, sa->sa_len); 2334 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2335 2336 error = if_mcast_op(ifp, SIOCADDMULTI, sa); 2337 if (error != 0) 2338 goto ioctl_failed; 2339 2340 return (error); 2341 2342 ioctl_failed: 2343 LIST_REMOVE(mc, mc_entries); 2344 free(mc, M_DEVBUF); 2345 alloc_failed: 2346 (void)ether_delmulti(sa, &sc->sc_ac); 2347 2348 return (error); 2349 } 2350 2351 static int 2352 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2353 { 2354 const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr); 2355 struct ifnet *ifp; 2356 struct ether_multi *enm; 2357 struct carp_mc_entry *mc; 2358 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2359 int error; 2360 2361 ifp = sc->sc_carpdev; 2362 if (ifp == NULL) 2363 return (EINVAL); 2364 2365 /* 2366 * Find a key to lookup carp_mc_entry. We have to do this 2367 * before calling ether_delmulti for obvious reason. 2368 */ 2369 if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0) 2370 return (error); 2371 2372 ETHER_LOCK(&sc->sc_ac); 2373 enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac); 2374 ETHER_UNLOCK(&sc->sc_ac); 2375 if (enm == NULL) 2376 return (EINVAL); 2377 2378 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2379 if (mc->mc_enm == enm) 2380 break; 2381 2382 /* We won't delete entries we didn't add */ 2383 if (mc == NULL) 2384 return (EINVAL); 2385 2386 error = ether_delmulti(sa, &sc->sc_ac); 2387 if (error != ENETRESET) 2388 return (error); 2389 2390 /* We no longer use this multicast address. Tell parent so. */ 2391 error = if_mcast_op(ifp, SIOCDELMULTI, sa); 2392 if (error == 0) { 2393 /* And forget about this address. */ 2394 LIST_REMOVE(mc, mc_entries); 2395 free(mc, M_DEVBUF); 2396 } else 2397 (void)ether_addmulti(sa, &sc->sc_ac); 2398 return (error); 2399 } 2400 2401 /* 2402 * Delete any multicast address we have asked to add from parent 2403 * interface. Called when the carp is being unconfigured. 2404 */ 2405 static void 2406 carp_ether_purgemulti(struct carp_softc *sc) 2407 { 2408 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2409 struct carp_mc_entry *mc; 2410 2411 if (ifp == NULL) 2412 return; 2413 2414 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2415 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr)); 2416 LIST_REMOVE(mc, mc_entries); 2417 free(mc, M_DEVBUF); 2418 } 2419 } 2420 2421 static int 2422 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS) 2423 { 2424 2425 return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS)); 2426 } 2427 2428 void 2429 carp_init(void) 2430 { 2431 2432 sysctl_net_inet_carp_setup(NULL); 2433 #ifdef MBUFTRACE 2434 MOWNER_ATTACH(&carp_proto_mowner_rx); 2435 MOWNER_ATTACH(&carp_proto_mowner_tx); 2436 MOWNER_ATTACH(&carp_proto6_mowner_rx); 2437 MOWNER_ATTACH(&carp_proto6_mowner_tx); 2438 #endif 2439 2440 carp_wqinput = wqinput_create("carp", _carp_proto_input); 2441 #ifdef INET6 2442 carp6_wqinput = wqinput_create("carp6", _carp6_proto_input); 2443 #endif 2444 } 2445 2446 static void 2447 sysctl_net_inet_carp_setup(struct sysctllog **clog) 2448 { 2449 2450 sysctl_createv(clog, 0, NULL, NULL, 2451 CTLFLAG_PERMANENT, 2452 CTLTYPE_NODE, "inet", NULL, 2453 NULL, 0, NULL, 0, 2454 CTL_NET, PF_INET, CTL_EOL); 2455 sysctl_createv(clog, 0, NULL, NULL, 2456 CTLFLAG_PERMANENT, 2457 CTLTYPE_NODE, "carp", 2458 SYSCTL_DESCR("CARP related settings"), 2459 NULL, 0, NULL, 0, 2460 CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL); 2461 2462 sysctl_createv(clog, 0, NULL, NULL, 2463 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2464 CTLTYPE_INT, "preempt", 2465 SYSCTL_DESCR("Enable CARP Preempt"), 2466 NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0, 2467 CTL_NET, PF_INET, IPPROTO_CARP, 2468 CTL_CREATE, CTL_EOL); 2469 sysctl_createv(clog, 0, NULL, NULL, 2470 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2471 CTLTYPE_INT, "arpbalance", 2472 SYSCTL_DESCR("Enable ARP balancing"), 2473 NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0, 2474 CTL_NET, PF_INET, IPPROTO_CARP, 2475 CTL_CREATE, CTL_EOL); 2476 sysctl_createv(clog, 0, NULL, NULL, 2477 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2478 CTLTYPE_INT, "allow", 2479 SYSCTL_DESCR("Enable CARP"), 2480 NULL, 0, &carp_opts[CARPCTL_ALLOW], 0, 2481 CTL_NET, PF_INET, IPPROTO_CARP, 2482 CTL_CREATE, CTL_EOL); 2483 sysctl_createv(clog, 0, NULL, NULL, 2484 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2485 CTLTYPE_INT, "log", 2486 SYSCTL_DESCR("CARP logging"), 2487 NULL, 0, &carp_opts[CARPCTL_LOG], 0, 2488 CTL_NET, PF_INET, IPPROTO_CARP, 2489 CTL_CREATE, CTL_EOL); 2490 sysctl_createv(clog, 0, NULL, NULL, 2491 CTLFLAG_PERMANENT, 2492 CTLTYPE_STRUCT, "stats", 2493 SYSCTL_DESCR("CARP statistics"), 2494 sysctl_net_inet_carp_stats, 0, NULL, 0, 2495 CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS, 2496 CTL_EOL); 2497 } 2498