1 /* $NetBSD: ip_carp.c,v 1.114 2020/10/14 13:43:56 roy Exp $ */ 2 /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */ 3 4 /* 5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 6 * Copyright (c) 2003 Ryan McBride. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifdef _KERNEL_OPT 31 #include "opt_inet.h" 32 #include "opt_mbuftrace.h" 33 #endif 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.114 2020/10/14 13:43:56 roy Exp $"); 37 38 /* 39 * TODO: 40 * - iface reconfigure 41 * - support for hardware checksum calculations; 42 * 43 */ 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/mbuf.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/callout.h> 51 #include <sys/ioctl.h> 52 #include <sys/errno.h> 53 #include <sys/device.h> 54 #include <sys/time.h> 55 #include <sys/kernel.h> 56 #include <sys/kauth.h> 57 #include <sys/sysctl.h> 58 #include <sys/ucred.h> 59 #include <sys/syslog.h> 60 #include <sys/acct.h> 61 #include <sys/cprng.h> 62 #include <sys/cpu.h> 63 #include <sys/pserialize.h> 64 #include <sys/psref.h> 65 66 #include <net/if.h> 67 #include <net/pfil.h> 68 #include <net/if_types.h> 69 #include <net/if_ether.h> 70 #include <net/route.h> 71 #include <net/netisr.h> 72 #include <net/net_stats.h> 73 #include <netinet/if_inarp.h> 74 #include <netinet/wqinput.h> 75 76 #ifdef INET 77 #include <netinet/in.h> 78 #include <netinet/in_systm.h> 79 #include <netinet/in_var.h> 80 #include <netinet/ip.h> 81 #include <netinet/ip_var.h> 82 83 #include <net/if_dl.h> 84 #endif 85 86 #ifdef INET6 87 #include <netinet/icmp6.h> 88 #include <netinet/ip6.h> 89 #include <netinet6/ip6_var.h> 90 #include <netinet6/nd6.h> 91 #include <netinet6/scope6_var.h> 92 #include <netinet6/in6_var.h> 93 #endif 94 95 #include <net/bpf.h> 96 97 #include <sys/sha1.h> 98 99 #include <netinet/ip_carp.h> 100 101 #include "ioconf.h" 102 103 struct carp_mc_entry { 104 LIST_ENTRY(carp_mc_entry) mc_entries; 105 union { 106 struct ether_multi *mcu_enm; 107 } mc_u; 108 struct sockaddr_storage mc_addr; 109 }; 110 #define mc_enm mc_u.mcu_enm 111 112 struct carp_softc { 113 struct ethercom sc_ac; 114 #define sc_if sc_ac.ec_if 115 #define sc_carpdev sc_ac.ec_if.if_carpdev 116 int ah_cookie; 117 int lh_cookie; 118 struct ip_moptions sc_imo; 119 #ifdef INET6 120 struct ip6_moptions sc_im6o; 121 #endif /* INET6 */ 122 TAILQ_ENTRY(carp_softc) sc_list; 123 124 enum { INIT = 0, BACKUP, MASTER } sc_state; 125 126 int sc_suppress; 127 int sc_bow_out; 128 129 int sc_sendad_errors; 130 #define CARP_SENDAD_MAX_ERRORS 3 131 int sc_sendad_success; 132 #define CARP_SENDAD_MIN_SUCCESS 3 133 134 int sc_vhid; 135 int sc_advskew; 136 int sc_naddrs; 137 int sc_naddrs6; 138 int sc_advbase; /* seconds */ 139 int sc_init_counter; 140 u_int64_t sc_counter; 141 142 /* authentication */ 143 #define CARP_HMAC_PAD 64 144 unsigned char sc_key[CARP_KEY_LEN]; 145 unsigned char sc_pad[CARP_HMAC_PAD]; 146 SHA1_CTX sc_sha1; 147 u_int32_t sc_hashkey[2]; 148 149 struct callout sc_ad_tmo; /* advertisement timeout */ 150 struct callout sc_md_tmo; /* master down timeout */ 151 struct callout sc_md6_tmo; /* master down timeout */ 152 153 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 154 }; 155 156 int carp_suppress_preempt = 0; 157 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */ 158 159 static percpu_t *carpstat_percpu; 160 161 #define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x) 162 163 #ifdef MBUFTRACE 164 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx"); 165 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx"); 166 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx"); 167 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx"); 168 #endif 169 170 struct carp_if { 171 TAILQ_HEAD(, carp_softc) vhif_vrs; 172 int vhif_nvrs; 173 174 struct ifnet *vhif_ifp; 175 }; 176 177 #define CARP_LOG(sc, s) \ 178 if (carp_opts[CARPCTL_LOG]) { \ 179 if (sc) \ 180 log(LOG_INFO, "%s: ", \ 181 (sc)->sc_if.if_xname); \ 182 else \ 183 log(LOG_INFO, "carp: "); \ 184 addlog s; \ 185 addlog("\n"); \ 186 } 187 188 static void carp_hmac_prepare(struct carp_softc *); 189 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 190 unsigned char *); 191 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 192 unsigned char *); 193 static void carp_setroute(struct carp_softc *, int); 194 static void carp_proto_input_c(struct mbuf *, struct carp_header *, 195 sa_family_t); 196 static void carpdetach(struct carp_softc *); 197 static void carp_prepare_ad(struct mbuf *, struct carp_softc *, 198 struct carp_header *); 199 static void carp_send_ad_all(void); 200 static void carp_send_ad(void *); 201 static void carp_send_arp(struct carp_softc *); 202 static void carp_master_down(void *); 203 static int carp_ioctl(struct ifnet *, u_long, void *); 204 static void carp_start(struct ifnet *); 205 static void carp_setrun(struct carp_softc *, sa_family_t); 206 static void carp_set_state(struct carp_softc *, int); 207 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 208 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 209 210 static void carp_multicast_cleanup(struct carp_softc *); 211 static int carp_set_ifp(struct carp_softc *, struct ifnet *); 212 static void carp_set_enaddr(struct carp_softc *); 213 #if 0 214 static void carp_addr_updated(void *); 215 #endif 216 static u_int32_t carp_hash(struct carp_softc *, u_char *); 217 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 218 static int carp_join_multicast(struct carp_softc *); 219 #ifdef INET6 220 static void carp_send_na(struct carp_softc *); 221 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 222 static int carp_join_multicast6(struct carp_softc *); 223 #endif 224 static int carp_clone_create(struct if_clone *, int); 225 static int carp_clone_destroy(struct ifnet *); 226 static int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 227 static int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 228 static void carp_ether_purgemulti(struct carp_softc *); 229 static void carp_update_link_state(struct carp_softc *sc); 230 231 static void sysctl_net_inet_carp_setup(struct sysctllog **); 232 233 /* workqueue-based pr_input */ 234 static struct wqinput *carp_wqinput; 235 static void _carp_proto_input(struct mbuf *, int, int); 236 #ifdef INET6 237 static struct wqinput *carp6_wqinput; 238 static void _carp6_proto_input(struct mbuf *, int, int); 239 #endif 240 241 struct if_clone carp_cloner = 242 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 243 244 static __inline u_int16_t 245 carp_cksum(struct mbuf *m, int len) 246 { 247 return (in_cksum(m, len)); 248 } 249 250 #ifdef INET6 251 static __inline u_int16_t 252 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len) 253 { 254 return (in6_cksum(m, IPPROTO_CARP, off, len)); 255 } 256 #endif 257 258 static void 259 carp_hmac_prepare(struct carp_softc *sc) 260 { 261 u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT; 262 u_int8_t vhid = sc->sc_vhid & 0xff; 263 SHA1_CTX sha1ctx; 264 u_int32_t kmd[5]; 265 struct ifaddr *ifa; 266 int i, found; 267 struct in_addr last, cur, in; 268 #ifdef INET6 269 struct in6_addr last6, cur6, in6; 270 #endif /* INET6 */ 271 272 /* compute ipad from key */ 273 memset(sc->sc_pad, 0, sizeof(sc->sc_pad)); 274 memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key)); 275 for (i = 0; i < sizeof(sc->sc_pad); i++) 276 sc->sc_pad[i] ^= 0x36; 277 278 /* precompute first part of inner hash */ 279 SHA1Init(&sc->sc_sha1); 280 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 281 SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version)); 282 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 283 284 /* generate a key for the arpbalance hash, before the vhid is hashed */ 285 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 286 SHA1Final((unsigned char *)kmd, &sha1ctx); 287 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 288 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 289 290 /* the rest of the precomputation */ 291 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 292 293 /* Hash the addresses from smallest to largest, not interface order */ 294 #ifdef INET 295 cur.s_addr = 0; 296 do { 297 int s; 298 found = 0; 299 last = cur; 300 cur.s_addr = 0xffffffff; 301 s = pserialize_read_enter(); 302 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 303 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 304 if (ifa->ifa_addr->sa_family == AF_INET && 305 ntohl(in.s_addr) > ntohl(last.s_addr) && 306 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 307 cur.s_addr = in.s_addr; 308 found++; 309 } 310 } 311 pserialize_read_exit(s); 312 if (found) 313 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 314 } while (found); 315 #endif /* INET */ 316 317 #ifdef INET6 318 memset(&cur6, 0x00, sizeof(cur6)); 319 do { 320 int s; 321 found = 0; 322 last6 = cur6; 323 memset(&cur6, 0xff, sizeof(cur6)); 324 s = pserialize_read_enter(); 325 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 326 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 327 if (IN6_IS_ADDR_LINKLOCAL(&in6)) 328 in6.s6_addr16[1] = 0; 329 if (ifa->ifa_addr->sa_family == AF_INET6 && 330 memcmp(&in6, &last6, sizeof(in6)) > 0 && 331 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 332 cur6 = in6; 333 found++; 334 } 335 } 336 pserialize_read_exit(s); 337 if (found) 338 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 339 } while (found); 340 #endif /* INET6 */ 341 342 /* convert ipad to opad */ 343 for (i = 0; i < sizeof(sc->sc_pad); i++) 344 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 345 } 346 347 static void 348 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 349 unsigned char md[20]) 350 { 351 SHA1_CTX sha1ctx; 352 353 /* fetch first half of inner hash */ 354 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 355 356 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 357 SHA1Final(md, &sha1ctx); 358 359 /* outer hash */ 360 SHA1Init(&sha1ctx); 361 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 362 SHA1Update(&sha1ctx, md, 20); 363 SHA1Final(md, &sha1ctx); 364 } 365 366 static int 367 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 368 unsigned char md[20]) 369 { 370 unsigned char md2[20]; 371 372 carp_hmac_generate(sc, counter, md2); 373 374 return (memcmp(md, md2, sizeof(md2))); 375 } 376 377 static void 378 carp_setroute(struct carp_softc *sc, int cmd) 379 { 380 struct ifaddr *ifa; 381 int s, bound; 382 383 KERNEL_LOCK(1, NULL); 384 bound = curlwp_bind(); 385 s = pserialize_read_enter(); 386 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 387 struct psref psref; 388 ifa_acquire(ifa, &psref); 389 pserialize_read_exit(s); 390 391 switch (ifa->ifa_addr->sa_family) { 392 case AF_INET: { 393 int count = 0; 394 struct rtentry *rt; 395 int hr_otherif, nr_ourif; 396 397 /* 398 * Avoid screwing with the routes if there are other 399 * carp interfaces which are master and have the same 400 * address. 401 */ 402 if (sc->sc_carpdev != NULL && 403 sc->sc_carpdev->if_carp != NULL) { 404 count = carp_addrcount( 405 (struct carp_if *)sc->sc_carpdev->if_carp, 406 ifatoia(ifa), CARP_COUNT_MASTER); 407 if ((cmd == RTM_ADD && count != 1) || 408 (cmd == RTM_DELETE && count != 0)) 409 goto next; 410 } 411 412 /* Remove the existing host route, if any */ 413 rtrequest(RTM_DELETE, ifa->ifa_addr, 414 ifa->ifa_addr, ifa->ifa_netmask, 415 RTF_HOST, NULL); 416 417 rt = NULL; 418 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 419 ifa->ifa_netmask, RTF_HOST, &rt); 420 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && 421 (rt->rt_flags & RTF_CONNECTED)); 422 if (rt != NULL) { 423 rt_unref(rt); 424 rt = NULL; 425 } 426 427 /* Check for a network route on our interface */ 428 429 rt = NULL; 430 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 431 ifa->ifa_netmask, 0, &rt); 432 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if); 433 434 switch (cmd) { 435 case RTM_ADD: 436 if (hr_otherif) { 437 ifa->ifa_rtrequest = NULL; 438 ifa->ifa_flags &= ~RTF_CONNECTED; 439 440 rtrequest(RTM_ADD, ifa->ifa_addr, 441 ifa->ifa_addr, ifa->ifa_netmask, 442 RTF_UP | RTF_HOST, NULL); 443 } 444 if (!hr_otherif || nr_ourif || !rt) { 445 if (nr_ourif && 446 (rt->rt_flags & RTF_CONNECTED) == 0) 447 rtrequest(RTM_DELETE, 448 ifa->ifa_addr, 449 ifa->ifa_addr, 450 ifa->ifa_netmask, 0, NULL); 451 452 ifa->ifa_rtrequest = arp_rtrequest; 453 ifa->ifa_flags |= RTF_CONNECTED; 454 455 if (rtrequest(RTM_ADD, ifa->ifa_addr, 456 ifa->ifa_addr, ifa->ifa_netmask, 0, 457 NULL) == 0) 458 ifa->ifa_flags |= IFA_ROUTE; 459 } 460 break; 461 case RTM_DELETE: 462 break; 463 default: 464 break; 465 } 466 if (rt != NULL) { 467 rt_unref(rt); 468 rt = NULL; 469 } 470 break; 471 } 472 473 #ifdef INET6 474 case AF_INET6: 475 if (cmd == RTM_ADD) 476 in6_ifaddlocal(ifa); 477 else 478 in6_ifremlocal(ifa); 479 break; 480 #endif /* INET6 */ 481 default: 482 break; 483 } 484 next: 485 s = pserialize_read_enter(); 486 ifa_release(ifa, &psref); 487 } 488 pserialize_read_exit(s); 489 curlwp_bindx(bound); 490 KERNEL_UNLOCK_ONE(NULL); 491 } 492 493 /* 494 * process input packet. 495 * we have rearranged checks order compared to the rfc, 496 * but it seems more efficient this way or not possible otherwise. 497 */ 498 static void 499 _carp_proto_input(struct mbuf *m, int hlen, int proto) 500 { 501 struct ip *ip = mtod(m, struct ip *); 502 struct carp_softc *sc = NULL; 503 struct carp_header *ch; 504 int iplen, len; 505 struct ifnet *rcvif; 506 507 CARP_STATINC(CARP_STAT_IPACKETS); 508 MCLAIM(m, &carp_proto_mowner_rx); 509 510 if (!carp_opts[CARPCTL_ALLOW]) { 511 m_freem(m); 512 return; 513 } 514 515 rcvif = m_get_rcvif_NOMPSAFE(m); 516 /* check if received on a valid carp interface */ 517 if (rcvif->if_type != IFT_CARP) { 518 CARP_STATINC(CARP_STAT_BADIF); 519 CARP_LOG(sc, ("packet received on non-carp interface: %s", 520 rcvif->if_xname)); 521 m_freem(m); 522 return; 523 } 524 525 /* verify that the IP TTL is 255. */ 526 if (ip->ip_ttl != CARP_DFLTTL) { 527 CARP_STATINC(CARP_STAT_BADTTL); 528 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl, 529 CARP_DFLTTL, rcvif->if_xname)); 530 m_freem(m); 531 return; 532 } 533 534 /* 535 * verify that the received packet length is 536 * equal to the CARP header 537 */ 538 iplen = ip->ip_hl << 2; 539 len = iplen + sizeof(*ch); 540 if (len > m->m_pkthdr.len) { 541 CARP_STATINC(CARP_STAT_BADLEN); 542 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len, 543 rcvif->if_xname)); 544 m_freem(m); 545 return; 546 } 547 548 if ((m = m_pullup(m, len)) == NULL) { 549 CARP_STATINC(CARP_STAT_HDROPS); 550 return; 551 } 552 ip = mtod(m, struct ip *); 553 ch = (struct carp_header *)((char *)ip + iplen); 554 /* verify the CARP checksum */ 555 m->m_data += iplen; 556 if (carp_cksum(m, len - iplen)) { 557 CARP_STATINC(CARP_STAT_BADSUM); 558 CARP_LOG(sc, ("checksum failed on %s", 559 rcvif->if_xname)); 560 m_freem(m); 561 return; 562 } 563 m->m_data -= iplen; 564 565 carp_proto_input_c(m, ch, AF_INET); 566 } 567 568 void 569 carp_proto_input(struct mbuf *m, int off, int proto) 570 { 571 572 wqinput_input(carp_wqinput, m, 0, 0); 573 } 574 575 #ifdef INET6 576 static void 577 _carp6_proto_input(struct mbuf *m, int off, int proto) 578 { 579 struct carp_softc *sc = NULL; 580 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 581 struct carp_header *ch; 582 u_int len; 583 struct ifnet *rcvif; 584 585 CARP_STATINC(CARP_STAT_IPACKETS6); 586 MCLAIM(m, &carp_proto6_mowner_rx); 587 588 if (!carp_opts[CARPCTL_ALLOW]) { 589 m_freem(m); 590 return; 591 } 592 593 rcvif = m_get_rcvif_NOMPSAFE(m); 594 595 /* check if received on a valid carp interface */ 596 if (rcvif->if_type != IFT_CARP) { 597 CARP_STATINC(CARP_STAT_BADIF); 598 CARP_LOG(sc, ("packet received on non-carp interface: %s", 599 rcvif->if_xname)); 600 m_freem(m); 601 return; 602 } 603 604 /* verify that the IP TTL is 255 */ 605 if (ip6->ip6_hlim != CARP_DFLTTL) { 606 CARP_STATINC(CARP_STAT_BADTTL); 607 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, 608 CARP_DFLTTL, rcvif->if_xname)); 609 m_freem(m); 610 return; 611 } 612 613 /* verify that we have a complete carp packet */ 614 len = m->m_len; 615 M_REGION_GET(ch, struct carp_header *, m, off, sizeof(*ch)); 616 if (ch == NULL) { 617 CARP_STATINC(CARP_STAT_BADLEN); 618 CARP_LOG(sc, ("packet size %u too small", len)); 619 return; 620 } 621 622 /* verify the CARP checksum */ 623 if (carp6_cksum(m, off, sizeof(*ch))) { 624 CARP_STATINC(CARP_STAT_BADSUM); 625 CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname)); 626 m_freem(m); 627 return; 628 } 629 630 carp_proto_input_c(m, ch, AF_INET6); 631 return; 632 } 633 634 int 635 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 636 { 637 638 wqinput_input(carp6_wqinput, *mp, *offp, proto); 639 640 return IPPROTO_DONE; 641 } 642 #endif /* INET6 */ 643 644 static void 645 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 646 { 647 struct carp_softc *sc; 648 u_int64_t tmp_counter; 649 struct timeval sc_tv, ch_tv; 650 651 TAILQ_FOREACH(sc, &((struct carp_if *) 652 m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list) 653 if (sc->sc_vhid == ch->carp_vhid) 654 break; 655 656 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 657 (IFF_UP|IFF_RUNNING)) { 658 CARP_STATINC(CARP_STAT_BADVHID); 659 m_freem(m); 660 return; 661 } 662 663 /* 664 * Check if our own advertisement was duplicated 665 * from a non simplex interface. 666 * XXX If there is no address on our physical interface 667 * there is no way to distinguish our ads from the ones 668 * another carp host might have sent us. 669 */ 670 if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) { 671 struct sockaddr sa; 672 struct ifaddr *ifa; 673 int s; 674 675 memset(&sa, 0, sizeof(sa)); 676 sa.sa_family = af; 677 s = pserialize_read_enter(); 678 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 679 680 if (ifa && af == AF_INET) { 681 struct ip *ip = mtod(m, struct ip *); 682 if (ip->ip_src.s_addr == 683 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 684 pserialize_read_exit(s); 685 m_freem(m); 686 return; 687 } 688 } 689 #ifdef INET6 690 if (ifa && af == AF_INET6) { 691 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 692 struct in6_addr in6_src, in6_found; 693 694 in6_src = ip6->ip6_src; 695 in6_found = ifatoia6(ifa)->ia_addr.sin6_addr; 696 if (IN6_IS_ADDR_LINKLOCAL(&in6_src)) 697 in6_src.s6_addr16[1] = 0; 698 if (IN6_IS_ADDR_LINKLOCAL(&in6_found)) 699 in6_found.s6_addr16[1] = 0; 700 if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) { 701 pserialize_read_exit(s); 702 m_freem(m); 703 return; 704 } 705 } 706 #endif /* INET6 */ 707 pserialize_read_exit(s); 708 } 709 710 nanotime(&sc->sc_if.if_lastchange); 711 if_statadd2(&sc->sc_if, if_ipackets, 1, if_ibytes, m->m_pkthdr.len); 712 713 /* verify the CARP version. */ 714 if (ch->carp_version != CARP_VERSION) { 715 CARP_STATINC(CARP_STAT_BADVER); 716 if_statinc(&sc->sc_if, if_ierrors); 717 CARP_LOG(sc, ("invalid version %d != %d", 718 ch->carp_version, CARP_VERSION)); 719 m_freem(m); 720 return; 721 } 722 723 /* verify the hash */ 724 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 725 struct ip *ip; 726 char ipbuf[INET_ADDRSTRLEN]; 727 #ifdef INET6 728 struct ip6_hdr *ip6; 729 char ip6buf[INET6_ADDRSTRLEN]; 730 #endif 731 732 CARP_STATINC(CARP_STAT_BADAUTH); 733 if_statinc(&sc->sc_if, if_ierrors); 734 735 switch(af) { 736 case AF_INET: 737 ip = mtod(m, struct ip *); 738 CARP_LOG(sc, ("incorrect hash from %s", 739 IN_PRINT(ipbuf, &ip->ip_src))); 740 break; 741 742 #ifdef INET6 743 case AF_INET6: 744 ip6 = mtod(m, struct ip6_hdr *); 745 CARP_LOG(sc, ("incorrect hash from %s", 746 IN6_PRINT(ip6buf, &ip6->ip6_src))); 747 break; 748 #endif 749 750 default: CARP_LOG(sc, ("incorrect hash")); 751 break; 752 } 753 m_freem(m); 754 return; 755 } 756 757 tmp_counter = ntohl(ch->carp_counter[0]); 758 tmp_counter = tmp_counter<<32; 759 tmp_counter += ntohl(ch->carp_counter[1]); 760 761 /* XXX Replay protection goes here */ 762 763 sc->sc_init_counter = 0; 764 sc->sc_counter = tmp_counter; 765 766 767 sc_tv.tv_sec = sc->sc_advbase; 768 if (carp_suppress_preempt && sc->sc_advskew < 240) 769 sc_tv.tv_usec = 240 * 1000000 / 256; 770 else 771 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 772 ch_tv.tv_sec = ch->carp_advbase; 773 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 774 775 switch (sc->sc_state) { 776 case INIT: 777 break; 778 case MASTER: 779 /* 780 * If we receive an advertisement from a backup who's going to 781 * be more frequent than us, go into BACKUP state. 782 */ 783 if (timercmp(&sc_tv, &ch_tv, >) || 784 timercmp(&sc_tv, &ch_tv, ==)) { 785 callout_stop(&sc->sc_ad_tmo); 786 CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)")); 787 carp_set_state(sc, BACKUP); 788 carp_setrun(sc, 0); 789 carp_setroute(sc, RTM_DELETE); 790 } 791 break; 792 case BACKUP: 793 /* 794 * If we're pre-empting masters who advertise slower than us, 795 * and this one claims to be slower, treat him as down. 796 */ 797 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) { 798 CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)")); 799 carp_master_down(sc); 800 break; 801 } 802 803 /* 804 * If the master is going to advertise at such a low frequency 805 * that he's guaranteed to time out, we'd might as well just 806 * treat him as timed out now. 807 */ 808 sc_tv.tv_sec = sc->sc_advbase * 3; 809 if (timercmp(&sc_tv, &ch_tv, <)) { 810 CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)")); 811 carp_master_down(sc); 812 break; 813 } 814 815 /* 816 * Otherwise, we reset the counter and wait for the next 817 * advertisement. 818 */ 819 carp_setrun(sc, af); 820 break; 821 } 822 823 m_freem(m); 824 return; 825 } 826 827 /* 828 * Interface side of the CARP implementation. 829 */ 830 831 /* ARGSUSED */ 832 void 833 carpattach(int n) 834 { 835 if_clone_attach(&carp_cloner); 836 837 carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS); 838 } 839 840 static int 841 carp_clone_create(struct if_clone *ifc, int unit) 842 { 843 extern int ifqmaxlen; 844 struct carp_softc *sc; 845 struct ifnet *ifp; 846 int rv; 847 848 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 849 if (!sc) 850 return (ENOMEM); 851 852 sc->sc_suppress = 0; 853 sc->sc_advbase = CARP_DFLTINTV; 854 sc->sc_vhid = -1; /* required setting */ 855 sc->sc_advskew = 0; 856 sc->sc_init_counter = 1; 857 sc->sc_naddrs = sc->sc_naddrs6 = 0; 858 #ifdef INET6 859 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 860 #endif /* INET6 */ 861 862 callout_init(&sc->sc_ad_tmo, 0); 863 callout_init(&sc->sc_md_tmo, 0); 864 callout_init(&sc->sc_md6_tmo, 0); 865 866 callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc); 867 callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc); 868 callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc); 869 870 LIST_INIT(&sc->carp_mc_listhead); 871 ifp = &sc->sc_if; 872 ifp->if_softc = sc; 873 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 874 unit); 875 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 876 ifp->if_ioctl = carp_ioctl; 877 ifp->if_start = carp_start; 878 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 879 IFQ_SET_READY(&ifp->if_snd); 880 rv = if_initialize(ifp); 881 if (rv != 0) { 882 callout_destroy(&sc->sc_ad_tmo); 883 callout_destroy(&sc->sc_md_tmo); 884 callout_destroy(&sc->sc_md6_tmo); 885 free(ifp->if_softc, M_DEVBUF); 886 887 return rv; 888 } 889 ether_ifattach(ifp, NULL); 890 /* Overwrite ethernet defaults */ 891 ifp->if_type = IFT_CARP; 892 ifp->if_output = carp_output; 893 ifp->if_link_state = LINK_STATE_DOWN; 894 carp_set_enaddr(sc); 895 if_register(ifp); 896 897 return (0); 898 } 899 900 static int 901 carp_clone_destroy(struct ifnet *ifp) 902 { 903 struct carp_softc *sc = ifp->if_softc; 904 905 carpdetach(ifp->if_softc); 906 ether_ifdetach(ifp); 907 if_detach(ifp); 908 callout_destroy(&sc->sc_ad_tmo); 909 callout_destroy(&sc->sc_md_tmo); 910 callout_destroy(&sc->sc_md6_tmo); 911 free(ifp->if_softc, M_DEVBUF); 912 913 return (0); 914 } 915 916 static void 917 carpdetach(struct carp_softc *sc) 918 { 919 struct carp_if *cif; 920 int s; 921 922 callout_stop(&sc->sc_ad_tmo); 923 callout_stop(&sc->sc_md_tmo); 924 callout_stop(&sc->sc_md6_tmo); 925 926 if (sc->sc_suppress) 927 carp_suppress_preempt--; 928 sc->sc_suppress = 0; 929 930 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 931 carp_suppress_preempt--; 932 sc->sc_sendad_errors = 0; 933 934 carp_set_state(sc, INIT); 935 sc->sc_if.if_flags &= ~IFF_UP; 936 carp_setrun(sc, 0); 937 carp_multicast_cleanup(sc); 938 939 KERNEL_LOCK(1, NULL); 940 s = splnet(); 941 if (sc->sc_carpdev != NULL) { 942 /* XXX linkstatehook removal */ 943 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 944 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 945 if (!--cif->vhif_nvrs) { 946 ifpromisc(sc->sc_carpdev, 0); 947 sc->sc_carpdev->if_carp = NULL; 948 free(cif, M_IFADDR); 949 } 950 } 951 sc->sc_carpdev = NULL; 952 splx(s); 953 KERNEL_UNLOCK_ONE(NULL); 954 } 955 956 /* Detach an interface from the carp. */ 957 void 958 carp_ifdetach(struct ifnet *ifp) 959 { 960 struct carp_softc *sc, *nextsc; 961 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 962 963 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 964 nextsc = TAILQ_NEXT(sc, sc_list); 965 carpdetach(sc); 966 } 967 } 968 969 static void 970 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, 971 struct carp_header *ch) 972 { 973 if (sc->sc_init_counter) { 974 /* this could also be seconds since unix epoch */ 975 sc->sc_counter = cprng_fast64(); 976 } else 977 sc->sc_counter++; 978 979 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 980 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 981 982 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 983 } 984 985 static void 986 carp_send_ad_all(void) 987 { 988 struct ifnet *ifp; 989 struct carp_if *cif; 990 struct carp_softc *vh; 991 int s; 992 int bound = curlwp_bind(); 993 994 s = pserialize_read_enter(); 995 IFNET_READER_FOREACH(ifp) { 996 struct psref psref; 997 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 998 continue; 999 1000 if_acquire(ifp, &psref); 1001 pserialize_read_exit(s); 1002 1003 cif = (struct carp_if *)ifp->if_carp; 1004 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1005 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1006 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER) 1007 carp_send_ad(vh); 1008 } 1009 1010 s = pserialize_read_enter(); 1011 if_release(ifp, &psref); 1012 } 1013 pserialize_read_exit(s); 1014 curlwp_bindx(bound); 1015 } 1016 1017 1018 static void 1019 carp_send_ad(void *v) 1020 { 1021 struct carp_header ch; 1022 struct timeval tv; 1023 struct carp_softc *sc = v; 1024 struct carp_header *ch_ptr; 1025 struct mbuf *m; 1026 int error, len, advbase, advskew, s; 1027 struct sockaddr sa; 1028 1029 KERNEL_LOCK(1, NULL); 1030 s = splsoftnet(); 1031 1032 advbase = advskew = 0; /* Sssssh compiler */ 1033 if (sc->sc_carpdev == NULL) { 1034 if_statinc(&sc->sc_if, if_oerrors); 1035 goto retry_later; 1036 } 1037 1038 /* bow out if we've gone to backup (the carp interface is going down) */ 1039 if (sc->sc_bow_out) { 1040 sc->sc_bow_out = 0; 1041 advbase = 255; 1042 advskew = 255; 1043 } else { 1044 advbase = sc->sc_advbase; 1045 if (!carp_suppress_preempt || sc->sc_advskew > 240) 1046 advskew = sc->sc_advskew; 1047 else 1048 advskew = 240; 1049 tv.tv_sec = advbase; 1050 tv.tv_usec = advskew * 1000000 / 256; 1051 } 1052 1053 ch.carp_version = CARP_VERSION; 1054 ch.carp_type = CARP_ADVERTISEMENT; 1055 ch.carp_vhid = sc->sc_vhid; 1056 ch.carp_advbase = advbase; 1057 ch.carp_advskew = advskew; 1058 ch.carp_authlen = 7; /* XXX DEFINE */ 1059 ch.carp_pad1 = 0; /* must be zero */ 1060 ch.carp_cksum = 0; 1061 1062 1063 #ifdef INET 1064 if (sc->sc_naddrs) { 1065 struct ip *ip; 1066 struct ifaddr *ifa; 1067 int _s; 1068 1069 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1070 if (m == NULL) { 1071 if_statinc(&sc->sc_if, if_oerrors); 1072 CARP_STATINC(CARP_STAT_ONOMEM); 1073 /* XXX maybe less ? */ 1074 goto retry_later; 1075 } 1076 MCLAIM(m, &carp_proto_mowner_tx); 1077 len = sizeof(*ip) + sizeof(ch); 1078 m->m_pkthdr.len = len; 1079 m_reset_rcvif(m); 1080 m->m_len = len; 1081 m_align(m, m->m_len); 1082 m->m_flags |= M_MCAST; 1083 ip = mtod(m, struct ip *); 1084 ip->ip_v = IPVERSION; 1085 ip->ip_hl = sizeof(*ip) >> 2; 1086 ip->ip_tos = IPTOS_LOWDELAY; 1087 ip->ip_len = htons(len); 1088 ip->ip_id = 0; /* no need for id, we don't support fragments */ 1089 ip->ip_off = htons(IP_DF); 1090 ip->ip_ttl = CARP_DFLTTL; 1091 ip->ip_p = IPPROTO_CARP; 1092 ip->ip_sum = 0; 1093 1094 memset(&sa, 0, sizeof(sa)); 1095 sa.sa_family = AF_INET; 1096 _s = pserialize_read_enter(); 1097 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1098 if (ifa == NULL) 1099 ip->ip_src.s_addr = 0; 1100 else 1101 ip->ip_src.s_addr = 1102 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1103 pserialize_read_exit(_s); 1104 ip->ip_dst.s_addr = INADDR_CARP_GROUP; 1105 1106 ch_ptr = (struct carp_header *)(&ip[1]); 1107 memcpy(ch_ptr, &ch, sizeof(ch)); 1108 carp_prepare_ad(m, sc, ch_ptr); 1109 1110 m->m_data += sizeof(*ip); 1111 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1112 m->m_data -= sizeof(*ip); 1113 1114 nanotime(&sc->sc_if.if_lastchange); 1115 if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len); 1116 CARP_STATINC(CARP_STAT_OPACKETS); 1117 1118 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1119 NULL); 1120 if (error) { 1121 if (error == ENOBUFS) 1122 CARP_STATINC(CARP_STAT_ONOMEM); 1123 else 1124 CARP_LOG(sc, ("ip_output failed: %d", error)); 1125 if_statinc(&sc->sc_if, if_oerrors); 1126 if (sc->sc_sendad_errors < INT_MAX) 1127 sc->sc_sendad_errors++; 1128 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1129 carp_suppress_preempt++; 1130 if (carp_suppress_preempt == 1) 1131 carp_send_ad_all(); 1132 } 1133 sc->sc_sendad_success = 0; 1134 } else { 1135 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1136 if (++sc->sc_sendad_success >= 1137 CARP_SENDAD_MIN_SUCCESS) { 1138 carp_suppress_preempt--; 1139 sc->sc_sendad_errors = 0; 1140 } 1141 } else 1142 sc->sc_sendad_errors = 0; 1143 } 1144 } 1145 #endif /* INET */ 1146 #ifdef INET6 1147 if (sc->sc_naddrs6) { 1148 struct ip6_hdr *ip6; 1149 struct ifaddr *ifa; 1150 int _s; 1151 1152 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1153 if (m == NULL) { 1154 if_statinc(&sc->sc_if, if_oerrors); 1155 CARP_STATINC(CARP_STAT_ONOMEM); 1156 /* XXX maybe less ? */ 1157 goto retry_later; 1158 } 1159 MCLAIM(m, &carp_proto6_mowner_tx); 1160 len = sizeof(*ip6) + sizeof(ch); 1161 m->m_pkthdr.len = len; 1162 m_reset_rcvif(m); 1163 m->m_len = len; 1164 m_align(m, m->m_len); 1165 m->m_flags |= M_MCAST; 1166 ip6 = mtod(m, struct ip6_hdr *); 1167 memset(ip6, 0, sizeof(*ip6)); 1168 ip6->ip6_vfc |= IPV6_VERSION; 1169 ip6->ip6_hlim = CARP_DFLTTL; 1170 ip6->ip6_nxt = IPPROTO_CARP; 1171 1172 /* set the source address */ 1173 memset(&sa, 0, sizeof(sa)); 1174 sa.sa_family = AF_INET6; 1175 _s = pserialize_read_enter(); 1176 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1177 if (ifa == NULL) /* This should never happen with IPv6 */ 1178 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr)); 1179 else 1180 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1181 &ip6->ip6_src, sizeof(struct in6_addr)); 1182 pserialize_read_exit(_s); 1183 /* set the multicast destination */ 1184 1185 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1186 ip6->ip6_dst.s6_addr8[15] = 0x12; 1187 if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) { 1188 if_statinc(&sc->sc_if, if_oerrors); 1189 m_freem(m); 1190 CARP_LOG(sc, ("in6_setscope failed")); 1191 goto retry_later; 1192 } 1193 1194 ch_ptr = (struct carp_header *)(&ip6[1]); 1195 memcpy(ch_ptr, &ch, sizeof(ch)); 1196 carp_prepare_ad(m, sc, ch_ptr); 1197 1198 ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6), 1199 len - sizeof(*ip6)); 1200 1201 nanotime(&sc->sc_if.if_lastchange); 1202 if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len); 1203 CARP_STATINC(CARP_STAT_OPACKETS6); 1204 1205 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1206 if (error) { 1207 if (error == ENOBUFS) 1208 CARP_STATINC(CARP_STAT_ONOMEM); 1209 else 1210 CARP_LOG(sc, ("ip6_output failed: %d", error)); 1211 if_statinc(&sc->sc_if, if_oerrors); 1212 if (sc->sc_sendad_errors < INT_MAX) 1213 sc->sc_sendad_errors++; 1214 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1215 carp_suppress_preempt++; 1216 if (carp_suppress_preempt == 1) 1217 carp_send_ad_all(); 1218 } 1219 sc->sc_sendad_success = 0; 1220 } else { 1221 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1222 if (++sc->sc_sendad_success >= 1223 CARP_SENDAD_MIN_SUCCESS) { 1224 carp_suppress_preempt--; 1225 sc->sc_sendad_errors = 0; 1226 } 1227 } else 1228 sc->sc_sendad_errors = 0; 1229 } 1230 } 1231 #endif /* INET6 */ 1232 1233 retry_later: 1234 splx(s); 1235 KERNEL_UNLOCK_ONE(NULL); 1236 if (advbase != 255 || advskew != 255) 1237 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1238 } 1239 1240 /* 1241 * Broadcast a gratuitous ARP request containing 1242 * the virtual router MAC address for each IP address 1243 * associated with the virtual router. 1244 */ 1245 static void 1246 carp_send_arp(struct carp_softc *sc) 1247 { 1248 struct ifaddr *ifa; 1249 int s, bound; 1250 1251 KERNEL_LOCK(1, NULL); 1252 bound = curlwp_bind(); 1253 s = pserialize_read_enter(); 1254 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1255 struct psref psref; 1256 1257 if (ifa->ifa_addr->sa_family != AF_INET) 1258 continue; 1259 1260 ifa_acquire(ifa, &psref); 1261 pserialize_read_exit(s); 1262 1263 arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl)); 1264 1265 s = pserialize_read_enter(); 1266 ifa_release(ifa, &psref); 1267 } 1268 pserialize_read_exit(s); 1269 curlwp_bindx(bound); 1270 KERNEL_UNLOCK_ONE(NULL); 1271 } 1272 1273 #ifdef INET6 1274 static void 1275 carp_send_na(struct carp_softc *sc) 1276 { 1277 struct ifaddr *ifa; 1278 struct in6_addr *in6; 1279 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1280 int s, bound; 1281 1282 KERNEL_LOCK(1, NULL); 1283 bound = curlwp_bind(); 1284 s = pserialize_read_enter(); 1285 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1286 struct psref psref; 1287 1288 if (ifa->ifa_addr->sa_family != AF_INET6) 1289 continue; 1290 1291 ifa_acquire(ifa, &psref); 1292 pserialize_read_exit(s); 1293 1294 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1295 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1296 ND_NA_FLAG_OVERRIDE, 1, NULL); 1297 1298 s = pserialize_read_enter(); 1299 ifa_release(ifa, &psref); 1300 } 1301 pserialize_read_exit(s); 1302 curlwp_bindx(bound); 1303 KERNEL_UNLOCK_ONE(NULL); 1304 } 1305 #endif /* INET6 */ 1306 1307 /* 1308 * Based on bridge_hash() in if_bridge.c 1309 */ 1310 #define mix(a,b,c) \ 1311 do { \ 1312 a -= b; a -= c; a ^= (c >> 13); \ 1313 b -= c; b -= a; b ^= (a << 8); \ 1314 c -= a; c -= b; c ^= (b >> 13); \ 1315 a -= b; a -= c; a ^= (c >> 12); \ 1316 b -= c; b -= a; b ^= (a << 16); \ 1317 c -= a; c -= b; c ^= (b >> 5); \ 1318 a -= b; a -= c; a ^= (c >> 3); \ 1319 b -= c; b -= a; b ^= (a << 10); \ 1320 c -= a; c -= b; c ^= (b >> 15); \ 1321 } while (0) 1322 1323 static u_int32_t 1324 carp_hash(struct carp_softc *sc, u_char *src) 1325 { 1326 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1327 1328 c += sc->sc_key[3] << 24; 1329 c += sc->sc_key[2] << 16; 1330 c += sc->sc_key[1] << 8; 1331 c += sc->sc_key[0]; 1332 b += src[5] << 8; 1333 b += src[4]; 1334 a += src[3] << 24; 1335 a += src[2] << 16; 1336 a += src[1] << 8; 1337 a += src[0]; 1338 1339 mix(a, b, c); 1340 return (c); 1341 } 1342 1343 static int 1344 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1345 { 1346 struct carp_softc *vh; 1347 struct ifaddr *ifa; 1348 int count = 0; 1349 1350 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1351 if ((type == CARP_COUNT_RUNNING && 1352 (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1353 (IFF_UP|IFF_RUNNING)) || 1354 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1355 int s = pserialize_read_enter(); 1356 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1357 if (ifa->ifa_addr->sa_family == AF_INET && 1358 ia->ia_addr.sin_addr.s_addr == 1359 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1360 count++; 1361 } 1362 pserialize_read_exit(s); 1363 } 1364 } 1365 return (count); 1366 } 1367 1368 int 1369 carp_iamatch(struct in_ifaddr *ia, u_char *src, 1370 u_int32_t *count, u_int32_t index) 1371 { 1372 struct carp_softc *sc = ia->ia_ifp->if_softc; 1373 1374 if (carp_opts[CARPCTL_ARPBALANCE]) { 1375 /* 1376 * We use the source ip to decide which virtual host should 1377 * handle the request. If we're master of that virtual host, 1378 * then we respond, otherwise, just drop the arp packet on 1379 * the floor. 1380 */ 1381 1382 /* Count the elegible carp interfaces with this address */ 1383 if (*count == 0) 1384 *count = carp_addrcount( 1385 (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp, 1386 ia, CARP_COUNT_RUNNING); 1387 1388 /* This should never happen, but... */ 1389 if (*count == 0) 1390 return (0); 1391 1392 if (carp_hash(sc, src) % *count == index - 1 && 1393 sc->sc_state == MASTER) { 1394 return (1); 1395 } 1396 } else { 1397 if (sc->sc_state == MASTER) 1398 return (1); 1399 } 1400 1401 return (0); 1402 } 1403 1404 #ifdef INET6 1405 struct ifaddr * 1406 carp_iamatch6(void *v, struct in6_addr *taddr) 1407 { 1408 struct carp_if *cif = v; 1409 struct carp_softc *vh; 1410 struct ifaddr *ifa; 1411 1412 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1413 int s = pserialize_read_enter(); 1414 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1415 if (IN6_ARE_ADDR_EQUAL(taddr, 1416 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1417 ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1418 (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER) 1419 return (ifa); 1420 } 1421 pserialize_read_exit(s); 1422 } 1423 1424 return (NULL); 1425 } 1426 #endif /* INET6 */ 1427 1428 struct ifnet * 1429 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src) 1430 { 1431 struct carp_if *cif = (struct carp_if *)v; 1432 struct carp_softc *vh; 1433 u_int8_t *ena; 1434 1435 if (src) 1436 ena = (u_int8_t *)&eh->ether_shost; 1437 else 1438 ena = (u_int8_t *)&eh->ether_dhost; 1439 1440 switch (iftype) { 1441 case IFT_ETHER: 1442 case IFT_FDDI: 1443 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1444 return (NULL); 1445 break; 1446 case IFT_ISO88025: 1447 if (ena[0] != 3 || ena[1] || ena[4] || ena[5]) 1448 return (NULL); 1449 break; 1450 default: 1451 return (NULL); 1452 break; 1453 } 1454 1455 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1456 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1457 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && 1458 !memcmp(ena, CLLADDR(vh->sc_if.if_sadl), 1459 ETHER_ADDR_LEN)) { 1460 return (&vh->sc_if); 1461 } 1462 1463 return (NULL); 1464 } 1465 1466 int 1467 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) 1468 { 1469 struct ether_header eh; 1470 struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp; 1471 struct ifnet *ifp; 1472 1473 memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost)); 1474 memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost)); 1475 eh.ether_type = etype; 1476 1477 if (m->m_flags & (M_BCAST|M_MCAST)) { 1478 struct carp_softc *vh; 1479 struct mbuf *m0; 1480 1481 /* 1482 * XXX Should really check the list of multicast addresses 1483 * for each CARP interface _before_ copying. 1484 */ 1485 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1486 m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT); 1487 if (m0 == NULL) 1488 continue; 1489 m_set_rcvif(m0, &vh->sc_if); 1490 ether_input(&vh->sc_if, m0); 1491 } 1492 return (1); 1493 } 1494 1495 ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0); 1496 if (ifp == NULL) { 1497 return (1); 1498 } 1499 1500 m_set_rcvif(m, ifp); 1501 1502 bpf_mtap(ifp, m, BPF_D_IN); 1503 if_statinc(ifp, if_ipackets); 1504 ether_input(ifp, m); 1505 return (0); 1506 } 1507 1508 static void 1509 carp_master_down(void *v) 1510 { 1511 struct carp_softc *sc = v; 1512 1513 switch (sc->sc_state) { 1514 case INIT: 1515 printf("%s: master_down event in INIT state\n", 1516 sc->sc_if.if_xname); 1517 break; 1518 case MASTER: 1519 break; 1520 case BACKUP: 1521 CARP_LOG(sc, ("INIT -> MASTER (preempting)")); 1522 carp_set_state(sc, MASTER); 1523 carp_send_ad(sc); 1524 carp_send_arp(sc); 1525 #ifdef INET6 1526 carp_send_na(sc); 1527 #endif /* INET6 */ 1528 carp_setrun(sc, 0); 1529 carp_setroute(sc, RTM_ADD); 1530 break; 1531 } 1532 } 1533 1534 /* 1535 * When in backup state, af indicates whether to reset the master down timer 1536 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1537 */ 1538 static void 1539 carp_setrun(struct carp_softc *sc, sa_family_t af) 1540 { 1541 struct timeval tv; 1542 1543 if (sc->sc_carpdev == NULL) { 1544 sc->sc_if.if_flags &= ~IFF_RUNNING; 1545 carp_set_state(sc, INIT); 1546 return; 1547 } 1548 1549 if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 && 1550 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1551 sc->sc_if.if_flags |= IFF_RUNNING; 1552 } else { 1553 sc->sc_if.if_flags &= ~IFF_RUNNING; 1554 carp_setroute(sc, RTM_DELETE); 1555 return; 1556 } 1557 1558 switch (sc->sc_state) { 1559 case INIT: 1560 carp_set_state(sc, BACKUP); 1561 carp_setroute(sc, RTM_DELETE); 1562 carp_setrun(sc, 0); 1563 break; 1564 case BACKUP: 1565 callout_stop(&sc->sc_ad_tmo); 1566 tv.tv_sec = 3 * sc->sc_advbase; 1567 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1568 switch (af) { 1569 #ifdef INET 1570 case AF_INET: 1571 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1572 break; 1573 #endif /* INET */ 1574 #ifdef INET6 1575 case AF_INET6: 1576 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1577 break; 1578 #endif /* INET6 */ 1579 default: 1580 if (sc->sc_naddrs) 1581 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1582 #ifdef INET6 1583 if (sc->sc_naddrs6) 1584 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1585 #endif /* INET6 */ 1586 break; 1587 } 1588 break; 1589 case MASTER: 1590 tv.tv_sec = sc->sc_advbase; 1591 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1592 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1593 break; 1594 } 1595 } 1596 1597 static void 1598 carp_multicast_cleanup(struct carp_softc *sc) 1599 { 1600 struct ip_moptions *imo = &sc->sc_imo; 1601 #ifdef INET6 1602 struct ip6_moptions *im6o = &sc->sc_im6o; 1603 #endif 1604 u_int16_t n = imo->imo_num_memberships; 1605 1606 /* Clean up our own multicast memberships */ 1607 while (n-- > 0) { 1608 if (imo->imo_membership[n] != NULL) { 1609 in_delmulti(imo->imo_membership[n]); 1610 imo->imo_membership[n] = NULL; 1611 } 1612 } 1613 imo->imo_num_memberships = 0; 1614 imo->imo_multicast_if_index = 0; 1615 1616 #ifdef INET6 1617 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1618 struct in6_multi_mship *imm = 1619 LIST_FIRST(&im6o->im6o_memberships); 1620 1621 LIST_REMOVE(imm, i6mm_chain); 1622 in6_leavegroup(imm); 1623 } 1624 im6o->im6o_multicast_if_index = 0; 1625 #endif 1626 1627 /* And any other multicast memberships */ 1628 carp_ether_purgemulti(sc); 1629 } 1630 1631 static int 1632 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1633 { 1634 struct carp_if *cif, *ncif = NULL; 1635 struct carp_softc *vr, *after = NULL; 1636 int myself = 0, error = 0; 1637 int s; 1638 1639 if (ifp == sc->sc_carpdev) 1640 return (0); 1641 1642 if (ifp != NULL) { 1643 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1644 return (EADDRNOTAVAIL); 1645 1646 if (ifp->if_type == IFT_CARP) 1647 return (EINVAL); 1648 1649 if (ifp->if_carp == NULL) { 1650 ncif = malloc(sizeof(*cif), M_IFADDR, M_WAITOK); 1651 if ((error = ifpromisc(ifp, 1))) { 1652 free(ncif, M_IFADDR); 1653 return (error); 1654 } 1655 1656 ncif->vhif_ifp = ifp; 1657 TAILQ_INIT(&ncif->vhif_vrs); 1658 } else { 1659 cif = (struct carp_if *)ifp->if_carp; 1660 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1661 if (vr != sc && vr->sc_vhid == sc->sc_vhid) 1662 return (EINVAL); 1663 } 1664 1665 /* detach from old interface */ 1666 if (sc->sc_carpdev != NULL) 1667 carpdetach(sc); 1668 1669 /* join multicast groups */ 1670 if (sc->sc_naddrs < 0 && 1671 (error = carp_join_multicast(sc)) != 0) { 1672 if (ncif != NULL) 1673 free(ncif, M_IFADDR); 1674 return (error); 1675 } 1676 1677 #ifdef INET6 1678 if (sc->sc_naddrs6 < 0 && 1679 (error = carp_join_multicast6(sc)) != 0) { 1680 if (ncif != NULL) 1681 free(ncif, M_IFADDR); 1682 carp_multicast_cleanup(sc); 1683 return (error); 1684 } 1685 #endif 1686 1687 /* attach carp interface to physical interface */ 1688 if (ncif != NULL) 1689 ifp->if_carp = (void *)ncif; 1690 sc->sc_carpdev = ifp; 1691 sc->sc_if.if_capabilities = ifp->if_capabilities & 1692 (IFCAP_TSOv4 | IFCAP_TSOv6 | 1693 IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx| 1694 IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx| 1695 IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx| 1696 IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx| 1697 IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx); 1698 1699 cif = (struct carp_if *)ifp->if_carp; 1700 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1701 if (vr == sc) 1702 myself = 1; 1703 if (vr->sc_vhid < sc->sc_vhid) 1704 after = vr; 1705 } 1706 1707 if (!myself) { 1708 /* We're trying to keep things in order */ 1709 if (after == NULL) { 1710 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1711 } else { 1712 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1713 sc, sc_list); 1714 } 1715 cif->vhif_nvrs++; 1716 } 1717 if (sc->sc_naddrs || sc->sc_naddrs6) 1718 sc->sc_if.if_flags |= IFF_UP; 1719 carp_set_enaddr(sc); 1720 KERNEL_LOCK(1, NULL); 1721 s = splnet(); 1722 /* XXX linkstatehooks establish */ 1723 carp_carpdev_state(ifp); 1724 splx(s); 1725 KERNEL_UNLOCK_ONE(NULL); 1726 } else { 1727 carpdetach(sc); 1728 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1729 } 1730 return (0); 1731 } 1732 1733 static void 1734 carp_set_enaddr(struct carp_softc *sc) 1735 { 1736 struct ifnet *ifp = &sc->sc_if; 1737 uint8_t enaddr[ETHER_ADDR_LEN]; 1738 1739 if (sc->sc_vhid == -1) { 1740 ifp->if_addrlen = 0; 1741 if_alloc_sadl(ifp); 1742 return; 1743 } 1744 1745 if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) { 1746 enaddr[0] = 3; 1747 enaddr[1] = 0; 1748 enaddr[2] = 0x40 >> (sc->sc_vhid - 1); 1749 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1); 1750 enaddr[4] = 0; 1751 enaddr[5] = 0; 1752 } else { 1753 enaddr[0] = 0; 1754 enaddr[1] = 0; 1755 enaddr[2] = 0x5e; 1756 enaddr[3] = 0; 1757 enaddr[4] = 1; 1758 enaddr[5] = sc->sc_vhid; 1759 } 1760 1761 if_set_sadl(ifp, enaddr, sizeof(enaddr), false); 1762 } 1763 1764 #if 0 1765 static void 1766 carp_addr_updated(void *v) 1767 { 1768 struct carp_softc *sc = (struct carp_softc *) v; 1769 struct ifaddr *ifa; 1770 int new_naddrs = 0, new_naddrs6 = 0; 1771 1772 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1773 if (ifa->ifa_addr->sa_family == AF_INET) 1774 new_naddrs++; 1775 else if (ifa->ifa_addr->sa_family == AF_INET6) 1776 new_naddrs6++; 1777 } 1778 1779 /* Handle a callback after SIOCDIFADDR */ 1780 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) { 1781 struct in_addr mc_addr; 1782 1783 sc->sc_naddrs = new_naddrs; 1784 sc->sc_naddrs6 = new_naddrs6; 1785 1786 /* Re-establish multicast membership removed by in_control */ 1787 mc_addr.s_addr = INADDR_CARP_GROUP; 1788 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) { 1789 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1790 1791 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1792 carp_join_multicast(sc); 1793 } 1794 1795 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1796 sc->sc_if.if_flags &= ~IFF_UP; 1797 carp_set_state(sc, INIT); 1798 } else 1799 carp_hmac_prepare(sc); 1800 } 1801 1802 carp_setrun(sc, 0); 1803 } 1804 #endif 1805 1806 static int 1807 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1808 { 1809 struct ifnet *ifp = sc->sc_carpdev; 1810 struct in_ifaddr *ia, *ia_if; 1811 int error = 0; 1812 int s; 1813 1814 if (sin->sin_addr.s_addr == 0) { 1815 if (!(sc->sc_if.if_flags & IFF_UP)) 1816 carp_set_state(sc, INIT); 1817 if (sc->sc_naddrs) 1818 sc->sc_if.if_flags |= IFF_UP; 1819 carp_setrun(sc, 0); 1820 return (0); 1821 } 1822 1823 /* we have to do this by hand to ensure we don't match on ourselves */ 1824 ia_if = NULL; 1825 s = pserialize_read_enter(); 1826 IN_ADDRLIST_READER_FOREACH(ia) { 1827 /* and, yeah, we need a multicast-capable iface too */ 1828 if (ia->ia_ifp != &sc->sc_if && 1829 ia->ia_ifp->if_type != IFT_CARP && 1830 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1831 (sin->sin_addr.s_addr & ia->ia_subnetmask) == 1832 ia->ia_subnet) { 1833 if (!ia_if) 1834 ia_if = ia; 1835 } 1836 } 1837 1838 if (ia_if) { 1839 ia = ia_if; 1840 if (ifp) { 1841 if (ifp != ia->ia_ifp) 1842 return (EADDRNOTAVAIL); 1843 } else { 1844 /* FIXME NOMPSAFE */ 1845 ifp = ia->ia_ifp; 1846 } 1847 } 1848 pserialize_read_exit(s); 1849 1850 if ((error = carp_set_ifp(sc, ifp))) 1851 return (error); 1852 1853 if (sc->sc_carpdev == NULL) 1854 return (EADDRNOTAVAIL); 1855 1856 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1857 return (error); 1858 1859 sc->sc_naddrs++; 1860 if (sc->sc_carpdev != NULL) 1861 sc->sc_if.if_flags |= IFF_UP; 1862 1863 carp_set_state(sc, INIT); 1864 carp_setrun(sc, 0); 1865 1866 /* 1867 * Hook if_addrhooks so that we get a callback after in_ifinit has run, 1868 * to correct any inappropriate routes that it inserted. 1869 */ 1870 if (sc->ah_cookie == 0) { 1871 /* XXX link address hook */ 1872 } 1873 1874 return (0); 1875 } 1876 1877 static int 1878 carp_join_multicast(struct carp_softc *sc) 1879 { 1880 struct ip_moptions *imo = &sc->sc_imo, tmpimo; 1881 struct in_addr addr; 1882 1883 memset(&tmpimo, 0, sizeof(tmpimo)); 1884 addr.s_addr = INADDR_CARP_GROUP; 1885 if ((tmpimo.imo_membership[0] = 1886 in_addmulti(&addr, &sc->sc_if)) == NULL) { 1887 return (ENOBUFS); 1888 } 1889 1890 imo->imo_membership[0] = tmpimo.imo_membership[0]; 1891 imo->imo_num_memberships = 1; 1892 imo->imo_multicast_if_index = sc->sc_if.if_index; 1893 imo->imo_multicast_ttl = CARP_DFLTTL; 1894 imo->imo_multicast_loop = 0; 1895 return (0); 1896 } 1897 1898 1899 #ifdef INET6 1900 static int 1901 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1902 { 1903 struct ifnet *ifp = sc->sc_carpdev; 1904 struct in6_ifaddr *ia, *ia_if; 1905 int error = 0; 1906 int s; 1907 1908 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1909 if (!(sc->sc_if.if_flags & IFF_UP)) 1910 carp_set_state(sc, INIT); 1911 if (sc->sc_naddrs6) 1912 sc->sc_if.if_flags |= IFF_UP; 1913 carp_setrun(sc, 0); 1914 return (0); 1915 } 1916 1917 /* we have to do this by hand to ensure we don't match on ourselves */ 1918 ia_if = NULL; 1919 s = pserialize_read_enter(); 1920 IN6_ADDRLIST_READER_FOREACH(ia) { 1921 int i; 1922 1923 for (i = 0; i < 4; i++) { 1924 if ((sin6->sin6_addr.s6_addr32[i] & 1925 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1926 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1927 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1928 break; 1929 } 1930 /* and, yeah, we need a multicast-capable iface too */ 1931 if (ia->ia_ifp != &sc->sc_if && 1932 ia->ia_ifp->if_type != IFT_CARP && 1933 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1934 (i == 4)) { 1935 if (!ia_if) 1936 ia_if = ia; 1937 } 1938 } 1939 pserialize_read_exit(s); 1940 1941 if (ia_if) { 1942 ia = ia_if; 1943 if (sc->sc_carpdev) { 1944 if (sc->sc_carpdev != ia->ia_ifp) 1945 return (EADDRNOTAVAIL); 1946 } else { 1947 ifp = ia->ia_ifp; 1948 } 1949 } 1950 1951 if ((error = carp_set_ifp(sc, ifp))) 1952 return (error); 1953 1954 if (sc->sc_carpdev == NULL) 1955 return (EADDRNOTAVAIL); 1956 1957 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1958 return (error); 1959 1960 sc->sc_naddrs6++; 1961 if (sc->sc_carpdev != NULL) 1962 sc->sc_if.if_flags |= IFF_UP; 1963 carp_set_state(sc, INIT); 1964 carp_setrun(sc, 0); 1965 1966 return (0); 1967 } 1968 1969 static int 1970 carp_join_multicast6(struct carp_softc *sc) 1971 { 1972 struct in6_multi_mship *imm, *imm2; 1973 struct ip6_moptions *im6o = &sc->sc_im6o; 1974 struct sockaddr_in6 addr6; 1975 int error; 1976 1977 /* Join IPv6 CARP multicast group */ 1978 memset(&addr6, 0, sizeof(addr6)); 1979 addr6.sin6_family = AF_INET6; 1980 addr6.sin6_len = sizeof(addr6); 1981 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1982 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1983 addr6.sin6_addr.s6_addr8[15] = 0x12; 1984 if ((imm = in6_joingroup(&sc->sc_if, 1985 &addr6.sin6_addr, &error, 0)) == NULL) { 1986 return (error); 1987 } 1988 /* join solicited multicast address */ 1989 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1990 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1991 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1992 addr6.sin6_addr.s6_addr32[1] = 0; 1993 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1994 addr6.sin6_addr.s6_addr32[3] = 0; 1995 addr6.sin6_addr.s6_addr8[12] = 0xff; 1996 if ((imm2 = in6_joingroup(&sc->sc_if, 1997 &addr6.sin6_addr, &error, 0)) == NULL) { 1998 in6_leavegroup(imm); 1999 return (error); 2000 } 2001 2002 /* apply v6 multicast membership */ 2003 im6o->im6o_multicast_if_index = sc->sc_if.if_index; 2004 if (imm) 2005 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2006 i6mm_chain); 2007 if (imm2) 2008 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2009 i6mm_chain); 2010 2011 return (0); 2012 } 2013 2014 #endif /* INET6 */ 2015 2016 static int 2017 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data) 2018 { 2019 struct lwp *l = curlwp; /* XXX */ 2020 struct carp_softc *sc = ifp->if_softc, *vr; 2021 struct carpreq carpr; 2022 struct ifaddr *ifa; 2023 struct ifreq *ifr; 2024 struct ifnet *cdev = NULL; 2025 int error = 0; 2026 2027 ifa = (struct ifaddr *)data; 2028 ifr = (struct ifreq *)data; 2029 2030 switch (cmd) { 2031 case SIOCINITIFADDR: 2032 switch (ifa->ifa_addr->sa_family) { 2033 #ifdef INET 2034 case AF_INET: 2035 sc->sc_if.if_flags |= IFF_UP; 2036 memcpy(ifa->ifa_dstaddr, ifa->ifa_addr, 2037 sizeof(struct sockaddr)); 2038 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2039 break; 2040 #endif /* INET */ 2041 #ifdef INET6 2042 case AF_INET6: 2043 sc->sc_if.if_flags|= IFF_UP; 2044 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2045 break; 2046 #endif /* INET6 */ 2047 default: 2048 error = EAFNOSUPPORT; 2049 break; 2050 } 2051 break; 2052 2053 case SIOCSIFFLAGS: 2054 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 2055 break; 2056 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2057 callout_stop(&sc->sc_ad_tmo); 2058 callout_stop(&sc->sc_md_tmo); 2059 callout_stop(&sc->sc_md6_tmo); 2060 if (sc->sc_state == MASTER) { 2061 /* we need the interface up to bow out */ 2062 sc->sc_if.if_flags |= IFF_UP; 2063 sc->sc_bow_out = 1; 2064 carp_send_ad(sc); 2065 } 2066 sc->sc_if.if_flags &= ~IFF_UP; 2067 carp_set_state(sc, INIT); 2068 carp_setrun(sc, 0); 2069 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 2070 sc->sc_if.if_flags |= IFF_UP; 2071 carp_setrun(sc, 0); 2072 } 2073 carp_update_link_state(sc); 2074 break; 2075 2076 case SIOCSVH: 2077 if (l == NULL) 2078 break; 2079 if ((error = kauth_authorize_network(l->l_cred, 2080 KAUTH_NETWORK_INTERFACE, 2081 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2082 NULL)) != 0) 2083 break; 2084 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2085 break; 2086 error = 1; 2087 if (carpr.carpr_carpdev[0] != '\0' && 2088 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2089 return (EINVAL); 2090 if ((error = carp_set_ifp(sc, cdev))) 2091 return (error); 2092 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 2093 switch (carpr.carpr_state) { 2094 case BACKUP: 2095 callout_stop(&sc->sc_ad_tmo); 2096 carp_set_state(sc, BACKUP); 2097 carp_setrun(sc, 0); 2098 carp_setroute(sc, RTM_DELETE); 2099 break; 2100 case MASTER: 2101 carp_master_down(sc); 2102 break; 2103 default: 2104 break; 2105 } 2106 } 2107 if (carpr.carpr_vhid > 0) { 2108 if (carpr.carpr_vhid > 255) { 2109 error = EINVAL; 2110 break; 2111 } 2112 if (sc->sc_carpdev) { 2113 struct carp_if *cif; 2114 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2115 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 2116 if (vr != sc && 2117 vr->sc_vhid == carpr.carpr_vhid) 2118 return (EINVAL); 2119 } 2120 sc->sc_vhid = carpr.carpr_vhid; 2121 carp_set_enaddr(sc); 2122 carp_set_state(sc, INIT); 2123 error--; 2124 } 2125 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 2126 if (carpr.carpr_advskew > 254) { 2127 error = EINVAL; 2128 break; 2129 } 2130 if (carpr.carpr_advbase > 255) { 2131 error = EINVAL; 2132 break; 2133 } 2134 sc->sc_advbase = carpr.carpr_advbase; 2135 sc->sc_advskew = carpr.carpr_advskew; 2136 error--; 2137 } 2138 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key)); 2139 if (error > 0) 2140 error = EINVAL; 2141 else { 2142 error = 0; 2143 carp_setrun(sc, 0); 2144 } 2145 break; 2146 2147 case SIOCGVH: 2148 memset(&carpr, 0, sizeof(carpr)); 2149 if (sc->sc_carpdev != NULL) 2150 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2151 IFNAMSIZ); 2152 carpr.carpr_state = sc->sc_state; 2153 carpr.carpr_vhid = sc->sc_vhid; 2154 carpr.carpr_advbase = sc->sc_advbase; 2155 carpr.carpr_advskew = sc->sc_advskew; 2156 2157 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred, 2158 KAUTH_NETWORK_INTERFACE, 2159 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2160 NULL)) == 0) 2161 memcpy(carpr.carpr_key, sc->sc_key, 2162 sizeof(carpr.carpr_key)); 2163 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2164 break; 2165 2166 case SIOCADDMULTI: 2167 error = carp_ether_addmulti(sc, ifr); 2168 break; 2169 2170 case SIOCDELMULTI: 2171 error = carp_ether_delmulti(sc, ifr); 2172 break; 2173 2174 case SIOCSIFCAP: 2175 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 2176 error = 0; 2177 break; 2178 2179 default: 2180 error = ether_ioctl(ifp, cmd, data); 2181 } 2182 2183 carp_hmac_prepare(sc); 2184 return (error); 2185 } 2186 2187 2188 /* 2189 * Start output on carp interface. This function should never be called. 2190 */ 2191 static void 2192 carp_start(struct ifnet *ifp) 2193 { 2194 #ifdef DEBUG 2195 printf("%s: start called\n", ifp->if_xname); 2196 #endif 2197 } 2198 2199 int 2200 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa, 2201 const struct rtentry *rt) 2202 { 2203 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2204 KASSERT(KERNEL_LOCKED_P()); 2205 2206 if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) { 2207 return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt); 2208 } else { 2209 m_freem(m); 2210 return (ENETUNREACH); 2211 } 2212 } 2213 2214 static void 2215 carp_set_state(struct carp_softc *sc, int state) 2216 { 2217 static const char *carp_states[] = { CARP_STATES }; 2218 2219 if (sc->sc_state == state) 2220 return; 2221 2222 CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state])); 2223 2224 sc->sc_state = state; 2225 carp_update_link_state(sc); 2226 } 2227 2228 static void 2229 carp_update_link_state(struct carp_softc *sc) 2230 { 2231 int link_state; 2232 2233 switch (sc->sc_state) { 2234 case BACKUP: 2235 link_state = LINK_STATE_DOWN; 2236 break; 2237 case MASTER: 2238 link_state = LINK_STATE_UP; 2239 break; 2240 default: 2241 /* Not useable, so down makes perfect sense. */ 2242 link_state = LINK_STATE_DOWN; 2243 break; 2244 } 2245 if_link_state_change(&sc->sc_if, link_state); 2246 } 2247 2248 void 2249 carp_carpdev_state(void *v) 2250 { 2251 struct carp_if *cif; 2252 struct carp_softc *sc; 2253 struct ifnet *ifp = v; 2254 2255 if (ifp->if_type == IFT_CARP) 2256 return; 2257 2258 cif = (struct carp_if *)ifp->if_carp; 2259 2260 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2261 int suppressed = sc->sc_suppress; 2262 2263 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2264 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2265 sc->sc_if.if_flags &= ~IFF_RUNNING; 2266 callout_stop(&sc->sc_ad_tmo); 2267 callout_stop(&sc->sc_md_tmo); 2268 callout_stop(&sc->sc_md6_tmo); 2269 carp_set_state(sc, INIT); 2270 sc->sc_suppress = 1; 2271 carp_setrun(sc, 0); 2272 if (!suppressed) { 2273 carp_suppress_preempt++; 2274 if (carp_suppress_preempt == 1) 2275 carp_send_ad_all(); 2276 } 2277 } else { 2278 carp_set_state(sc, INIT); 2279 sc->sc_suppress = 0; 2280 carp_setrun(sc, 0); 2281 if (suppressed) 2282 carp_suppress_preempt--; 2283 } 2284 } 2285 } 2286 2287 static int 2288 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2289 { 2290 const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr); 2291 struct ifnet *ifp; 2292 struct carp_mc_entry *mc; 2293 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2294 int error; 2295 2296 ifp = sc->sc_carpdev; 2297 if (ifp == NULL) 2298 return (EINVAL); 2299 2300 error = ether_addmulti(sa, &sc->sc_ac); 2301 if (error != ENETRESET) 2302 return (error); 2303 2304 /* 2305 * This is new multicast address. We have to tell parent 2306 * about it. Also, remember this multicast address so that 2307 * we can delete them on unconfigure. 2308 */ 2309 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2310 if (mc == NULL) { 2311 error = ENOMEM; 2312 goto alloc_failed; 2313 } 2314 2315 /* 2316 * As ether_addmulti() returns ENETRESET, following two 2317 * statement shouldn't fail. 2318 */ 2319 (void)ether_multiaddr(sa, addrlo, addrhi); 2320 2321 ETHER_LOCK(&sc->sc_ac); 2322 mc->mc_enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac); 2323 ETHER_UNLOCK(&sc->sc_ac); 2324 2325 memcpy(&mc->mc_addr, sa, sa->sa_len); 2326 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2327 2328 error = if_mcast_op(ifp, SIOCADDMULTI, sa); 2329 if (error != 0) 2330 goto ioctl_failed; 2331 2332 return (error); 2333 2334 ioctl_failed: 2335 LIST_REMOVE(mc, mc_entries); 2336 free(mc, M_DEVBUF); 2337 alloc_failed: 2338 (void)ether_delmulti(sa, &sc->sc_ac); 2339 2340 return (error); 2341 } 2342 2343 static int 2344 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2345 { 2346 const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr); 2347 struct ifnet *ifp; 2348 struct ether_multi *enm; 2349 struct carp_mc_entry *mc; 2350 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2351 int error; 2352 2353 ifp = sc->sc_carpdev; 2354 if (ifp == NULL) 2355 return (EINVAL); 2356 2357 /* 2358 * Find a key to lookup carp_mc_entry. We have to do this 2359 * before calling ether_delmulti for obvious reason. 2360 */ 2361 if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0) 2362 return (error); 2363 2364 ETHER_LOCK(&sc->sc_ac); 2365 enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac); 2366 ETHER_UNLOCK(&sc->sc_ac); 2367 if (enm == NULL) 2368 return (EINVAL); 2369 2370 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2371 if (mc->mc_enm == enm) 2372 break; 2373 2374 /* We won't delete entries we didn't add */ 2375 if (mc == NULL) 2376 return (EINVAL); 2377 2378 error = ether_delmulti(sa, &sc->sc_ac); 2379 if (error != ENETRESET) 2380 return (error); 2381 2382 /* We no longer use this multicast address. Tell parent so. */ 2383 error = if_mcast_op(ifp, SIOCDELMULTI, sa); 2384 if (error == 0) { 2385 /* And forget about this address. */ 2386 LIST_REMOVE(mc, mc_entries); 2387 free(mc, M_DEVBUF); 2388 } else 2389 (void)ether_addmulti(sa, &sc->sc_ac); 2390 return (error); 2391 } 2392 2393 /* 2394 * Delete any multicast address we have asked to add from parent 2395 * interface. Called when the carp is being unconfigured. 2396 */ 2397 static void 2398 carp_ether_purgemulti(struct carp_softc *sc) 2399 { 2400 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2401 struct carp_mc_entry *mc; 2402 2403 if (ifp == NULL) 2404 return; 2405 2406 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2407 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr)); 2408 LIST_REMOVE(mc, mc_entries); 2409 free(mc, M_DEVBUF); 2410 } 2411 } 2412 2413 static int 2414 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS) 2415 { 2416 2417 return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS)); 2418 } 2419 2420 void 2421 carp_init(void) 2422 { 2423 2424 sysctl_net_inet_carp_setup(NULL); 2425 #ifdef MBUFTRACE 2426 MOWNER_ATTACH(&carp_proto_mowner_rx); 2427 MOWNER_ATTACH(&carp_proto_mowner_tx); 2428 MOWNER_ATTACH(&carp_proto6_mowner_rx); 2429 MOWNER_ATTACH(&carp_proto6_mowner_tx); 2430 #endif 2431 2432 carp_wqinput = wqinput_create("carp", _carp_proto_input); 2433 #ifdef INET6 2434 carp6_wqinput = wqinput_create("carp6", _carp6_proto_input); 2435 #endif 2436 } 2437 2438 static void 2439 sysctl_net_inet_carp_setup(struct sysctllog **clog) 2440 { 2441 2442 sysctl_createv(clog, 0, NULL, NULL, 2443 CTLFLAG_PERMANENT, 2444 CTLTYPE_NODE, "inet", NULL, 2445 NULL, 0, NULL, 0, 2446 CTL_NET, PF_INET, CTL_EOL); 2447 sysctl_createv(clog, 0, NULL, NULL, 2448 CTLFLAG_PERMANENT, 2449 CTLTYPE_NODE, "carp", 2450 SYSCTL_DESCR("CARP related settings"), 2451 NULL, 0, NULL, 0, 2452 CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL); 2453 2454 sysctl_createv(clog, 0, NULL, NULL, 2455 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2456 CTLTYPE_INT, "preempt", 2457 SYSCTL_DESCR("Enable CARP Preempt"), 2458 NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0, 2459 CTL_NET, PF_INET, IPPROTO_CARP, 2460 CTL_CREATE, CTL_EOL); 2461 sysctl_createv(clog, 0, NULL, NULL, 2462 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2463 CTLTYPE_INT, "arpbalance", 2464 SYSCTL_DESCR("Enable ARP balancing"), 2465 NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0, 2466 CTL_NET, PF_INET, IPPROTO_CARP, 2467 CTL_CREATE, CTL_EOL); 2468 sysctl_createv(clog, 0, NULL, NULL, 2469 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2470 CTLTYPE_INT, "allow", 2471 SYSCTL_DESCR("Enable CARP"), 2472 NULL, 0, &carp_opts[CARPCTL_ALLOW], 0, 2473 CTL_NET, PF_INET, IPPROTO_CARP, 2474 CTL_CREATE, CTL_EOL); 2475 sysctl_createv(clog, 0, NULL, NULL, 2476 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2477 CTLTYPE_INT, "log", 2478 SYSCTL_DESCR("CARP logging"), 2479 NULL, 0, &carp_opts[CARPCTL_LOG], 0, 2480 CTL_NET, PF_INET, IPPROTO_CARP, 2481 CTL_CREATE, CTL_EOL); 2482 sysctl_createv(clog, 0, NULL, NULL, 2483 CTLFLAG_PERMANENT, 2484 CTLTYPE_STRUCT, "stats", 2485 SYSCTL_DESCR("CARP statistics"), 2486 sysctl_net_inet_carp_stats, 0, NULL, 0, 2487 CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS, 2488 CTL_EOL); 2489 } 2490