1 /* $NetBSD: ip_carp.c,v 1.93 2017/11/22 07:40:45 ozaki-r Exp $ */ 2 /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */ 3 4 /* 5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 6 * Copyright (c) 2003 Ryan McBride. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifdef _KERNEL_OPT 31 #include "opt_inet.h" 32 #include "opt_mbuftrace.h" 33 #endif 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.93 2017/11/22 07:40:45 ozaki-r Exp $"); 37 38 /* 39 * TODO: 40 * - iface reconfigure 41 * - support for hardware checksum calculations; 42 * 43 */ 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/mbuf.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/callout.h> 51 #include <sys/ioctl.h> 52 #include <sys/errno.h> 53 #include <sys/device.h> 54 #include <sys/time.h> 55 #include <sys/kernel.h> 56 #include <sys/kauth.h> 57 #include <sys/sysctl.h> 58 #include <sys/ucred.h> 59 #include <sys/syslog.h> 60 #include <sys/acct.h> 61 #include <sys/cprng.h> 62 #include <sys/cpu.h> 63 #include <sys/pserialize.h> 64 #include <sys/psref.h> 65 66 #include <net/if.h> 67 #include <net/pfil.h> 68 #include <net/if_types.h> 69 #include <net/if_ether.h> 70 #include <net/route.h> 71 #include <net/netisr.h> 72 #include <net/net_stats.h> 73 #include <netinet/if_inarp.h> 74 #include <netinet/wqinput.h> 75 76 #if NFDDI > 0 77 #include <net/if_fddi.h> 78 #endif 79 #if NTOKEN > 0 80 #include <net/if_token.h> 81 #endif 82 83 #ifdef INET 84 #include <netinet/in.h> 85 #include <netinet/in_systm.h> 86 #include <netinet/in_var.h> 87 #include <netinet/ip.h> 88 #include <netinet/ip_var.h> 89 90 #include <net/if_dl.h> 91 #endif 92 93 #ifdef INET6 94 #include <netinet/icmp6.h> 95 #include <netinet/ip6.h> 96 #include <netinet6/ip6_var.h> 97 #include <netinet6/nd6.h> 98 #include <netinet6/scope6_var.h> 99 #include <netinet6/in6_var.h> 100 #endif 101 102 #include <net/bpf.h> 103 104 #include <sys/sha1.h> 105 106 #include <netinet/ip_carp.h> 107 108 #include "ioconf.h" 109 110 struct carp_mc_entry { 111 LIST_ENTRY(carp_mc_entry) mc_entries; 112 union { 113 struct ether_multi *mcu_enm; 114 } mc_u; 115 struct sockaddr_storage mc_addr; 116 }; 117 #define mc_enm mc_u.mcu_enm 118 119 struct carp_softc { 120 struct ethercom sc_ac; 121 #define sc_if sc_ac.ec_if 122 #define sc_carpdev sc_ac.ec_if.if_carpdev 123 int ah_cookie; 124 int lh_cookie; 125 struct ip_moptions sc_imo; 126 #ifdef INET6 127 struct ip6_moptions sc_im6o; 128 #endif /* INET6 */ 129 TAILQ_ENTRY(carp_softc) sc_list; 130 131 enum { INIT = 0, BACKUP, MASTER } sc_state; 132 133 int sc_suppress; 134 int sc_bow_out; 135 136 int sc_sendad_errors; 137 #define CARP_SENDAD_MAX_ERRORS 3 138 int sc_sendad_success; 139 #define CARP_SENDAD_MIN_SUCCESS 3 140 141 int sc_vhid; 142 int sc_advskew; 143 int sc_naddrs; 144 int sc_naddrs6; 145 int sc_advbase; /* seconds */ 146 int sc_init_counter; 147 u_int64_t sc_counter; 148 149 /* authentication */ 150 #define CARP_HMAC_PAD 64 151 unsigned char sc_key[CARP_KEY_LEN]; 152 unsigned char sc_pad[CARP_HMAC_PAD]; 153 SHA1_CTX sc_sha1; 154 u_int32_t sc_hashkey[2]; 155 156 struct callout sc_ad_tmo; /* advertisement timeout */ 157 struct callout sc_md_tmo; /* master down timeout */ 158 struct callout sc_md6_tmo; /* master down timeout */ 159 160 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 161 }; 162 163 int carp_suppress_preempt = 0; 164 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */ 165 166 static percpu_t *carpstat_percpu; 167 168 #define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x) 169 170 #ifdef MBUFTRACE 171 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx"); 172 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx"); 173 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx"); 174 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx"); 175 #endif 176 177 struct carp_if { 178 TAILQ_HEAD(, carp_softc) vhif_vrs; 179 int vhif_nvrs; 180 181 struct ifnet *vhif_ifp; 182 }; 183 184 #define CARP_LOG(sc, s) \ 185 if (carp_opts[CARPCTL_LOG]) { \ 186 if (sc) \ 187 log(LOG_INFO, "%s: ", \ 188 (sc)->sc_if.if_xname); \ 189 else \ 190 log(LOG_INFO, "carp: "); \ 191 addlog s; \ 192 addlog("\n"); \ 193 } 194 195 static void carp_hmac_prepare(struct carp_softc *); 196 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 197 unsigned char *); 198 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 199 unsigned char *); 200 static void carp_setroute(struct carp_softc *, int); 201 static void carp_proto_input_c(struct mbuf *, struct carp_header *, 202 sa_family_t); 203 static void carpdetach(struct carp_softc *); 204 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 205 struct carp_header *); 206 static void carp_send_ad_all(void); 207 static void carp_send_ad(void *); 208 static void carp_send_arp(struct carp_softc *); 209 static void carp_master_down(void *); 210 static int carp_ioctl(struct ifnet *, u_long, void *); 211 static void carp_start(struct ifnet *); 212 static void carp_setrun(struct carp_softc *, sa_family_t); 213 static void carp_set_state(struct carp_softc *, int); 214 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 215 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 216 217 static void carp_multicast_cleanup(struct carp_softc *); 218 static int carp_set_ifp(struct carp_softc *, struct ifnet *); 219 static void carp_set_enaddr(struct carp_softc *); 220 #if 0 221 static void carp_addr_updated(void *); 222 #endif 223 static u_int32_t carp_hash(struct carp_softc *, u_char *); 224 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 225 static int carp_join_multicast(struct carp_softc *); 226 #ifdef INET6 227 static void carp_send_na(struct carp_softc *); 228 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 229 static int carp_join_multicast6(struct carp_softc *); 230 #endif 231 static int carp_clone_create(struct if_clone *, int); 232 static int carp_clone_destroy(struct ifnet *); 233 static int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 234 static int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 235 static void carp_ether_purgemulti(struct carp_softc *); 236 237 static void sysctl_net_inet_carp_setup(struct sysctllog **); 238 239 /* workqueue-based pr_input */ 240 static struct wqinput *carp_wqinput; 241 static void _carp_proto_input(struct mbuf *, int, int); 242 #ifdef INET6 243 static struct wqinput *carp6_wqinput; 244 static void _carp6_proto_input(struct mbuf *, int, int); 245 #endif 246 247 struct if_clone carp_cloner = 248 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 249 250 static __inline u_int16_t 251 carp_cksum(struct mbuf *m, int len) 252 { 253 return (in_cksum(m, len)); 254 } 255 256 static __inline u_int16_t 257 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len) 258 { 259 return (in6_cksum(m, IPPROTO_CARP, off, len)); 260 } 261 262 static void 263 carp_hmac_prepare(struct carp_softc *sc) 264 { 265 u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT; 266 u_int8_t vhid = sc->sc_vhid & 0xff; 267 SHA1_CTX sha1ctx; 268 u_int32_t kmd[5]; 269 struct ifaddr *ifa; 270 int i, found; 271 struct in_addr last, cur, in; 272 #ifdef INET6 273 struct in6_addr last6, cur6, in6; 274 #endif /* INET6 */ 275 276 /* compute ipad from key */ 277 memset(sc->sc_pad, 0, sizeof(sc->sc_pad)); 278 memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key)); 279 for (i = 0; i < sizeof(sc->sc_pad); i++) 280 sc->sc_pad[i] ^= 0x36; 281 282 /* precompute first part of inner hash */ 283 SHA1Init(&sc->sc_sha1); 284 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 285 SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version)); 286 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 287 288 /* generate a key for the arpbalance hash, before the vhid is hashed */ 289 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 290 SHA1Final((unsigned char *)kmd, &sha1ctx); 291 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 292 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 293 294 /* the rest of the precomputation */ 295 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 296 297 /* Hash the addresses from smallest to largest, not interface order */ 298 #ifdef INET 299 cur.s_addr = 0; 300 do { 301 int s; 302 found = 0; 303 last = cur; 304 cur.s_addr = 0xffffffff; 305 s = pserialize_read_enter(); 306 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 307 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 308 if (ifa->ifa_addr->sa_family == AF_INET && 309 ntohl(in.s_addr) > ntohl(last.s_addr) && 310 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 311 cur.s_addr = in.s_addr; 312 found++; 313 } 314 } 315 pserialize_read_exit(s); 316 if (found) 317 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 318 } while (found); 319 #endif /* INET */ 320 321 #ifdef INET6 322 memset(&cur6, 0x00, sizeof(cur6)); 323 do { 324 int s; 325 found = 0; 326 last6 = cur6; 327 memset(&cur6, 0xff, sizeof(cur6)); 328 s = pserialize_read_enter(); 329 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 330 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 331 if (IN6_IS_ADDR_LINKLOCAL(&in6)) 332 in6.s6_addr16[1] = 0; 333 if (ifa->ifa_addr->sa_family == AF_INET6 && 334 memcmp(&in6, &last6, sizeof(in6)) > 0 && 335 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 336 cur6 = in6; 337 found++; 338 } 339 } 340 pserialize_read_exit(s); 341 if (found) 342 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 343 } while (found); 344 #endif /* INET6 */ 345 346 /* convert ipad to opad */ 347 for (i = 0; i < sizeof(sc->sc_pad); i++) 348 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 349 } 350 351 static void 352 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 353 unsigned char md[20]) 354 { 355 SHA1_CTX sha1ctx; 356 357 /* fetch first half of inner hash */ 358 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx)); 359 360 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 361 SHA1Final(md, &sha1ctx); 362 363 /* outer hash */ 364 SHA1Init(&sha1ctx); 365 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 366 SHA1Update(&sha1ctx, md, 20); 367 SHA1Final(md, &sha1ctx); 368 } 369 370 static int 371 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 372 unsigned char md[20]) 373 { 374 unsigned char md2[20]; 375 376 carp_hmac_generate(sc, counter, md2); 377 378 return (memcmp(md, md2, sizeof(md2))); 379 } 380 381 static void 382 carp_setroute(struct carp_softc *sc, int cmd) 383 { 384 struct ifaddr *ifa; 385 int s, bound; 386 387 KERNEL_LOCK(1, NULL); 388 bound = curlwp_bind(); 389 s = pserialize_read_enter(); 390 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 391 struct psref psref; 392 ifa_acquire(ifa, &psref); 393 pserialize_read_exit(s); 394 395 switch (ifa->ifa_addr->sa_family) { 396 case AF_INET: { 397 int count = 0; 398 struct rtentry *rt; 399 int hr_otherif, nr_ourif; 400 401 /* 402 * Avoid screwing with the routes if there are other 403 * carp interfaces which are master and have the same 404 * address. 405 */ 406 if (sc->sc_carpdev != NULL && 407 sc->sc_carpdev->if_carp != NULL) { 408 count = carp_addrcount( 409 (struct carp_if *)sc->sc_carpdev->if_carp, 410 ifatoia(ifa), CARP_COUNT_MASTER); 411 if ((cmd == RTM_ADD && count != 1) || 412 (cmd == RTM_DELETE && count != 0)) 413 continue; 414 } 415 416 /* Remove the existing host route, if any */ 417 rtrequest(RTM_DELETE, ifa->ifa_addr, 418 ifa->ifa_addr, ifa->ifa_netmask, 419 RTF_HOST, NULL); 420 421 rt = NULL; 422 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 423 ifa->ifa_netmask, RTF_HOST, &rt); 424 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && 425 (rt->rt_flags & RTF_CONNECTED)); 426 if (rt != NULL) { 427 rt_unref(rt); 428 rt = NULL; 429 } 430 431 /* Check for a network route on our interface */ 432 433 rt = NULL; 434 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr, 435 ifa->ifa_netmask, 0, &rt); 436 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if); 437 438 switch (cmd) { 439 case RTM_ADD: 440 if (hr_otherif) { 441 ifa->ifa_rtrequest = NULL; 442 ifa->ifa_flags &= ~RTF_CONNECTED; 443 444 rtrequest(RTM_ADD, ifa->ifa_addr, 445 ifa->ifa_addr, ifa->ifa_netmask, 446 RTF_UP | RTF_HOST, NULL); 447 } 448 if (!hr_otherif || nr_ourif || !rt) { 449 if (nr_ourif && 450 (rt->rt_flags & RTF_CONNECTED) == 0) 451 rtrequest(RTM_DELETE, 452 ifa->ifa_addr, 453 ifa->ifa_addr, 454 ifa->ifa_netmask, 0, NULL); 455 456 ifa->ifa_rtrequest = arp_rtrequest; 457 ifa->ifa_flags |= RTF_CONNECTED; 458 459 if (rtrequest(RTM_ADD, ifa->ifa_addr, 460 ifa->ifa_addr, ifa->ifa_netmask, 0, 461 NULL) == 0) 462 ifa->ifa_flags |= IFA_ROUTE; 463 } 464 break; 465 case RTM_DELETE: 466 break; 467 default: 468 break; 469 } 470 if (rt != NULL) { 471 rt_unref(rt); 472 rt = NULL; 473 } 474 break; 475 } 476 477 #ifdef INET6 478 case AF_INET6: 479 if (cmd == RTM_ADD) 480 in6_ifaddlocal(ifa); 481 else 482 in6_ifremlocal(ifa); 483 break; 484 #endif /* INET6 */ 485 default: 486 break; 487 } 488 s = pserialize_read_enter(); 489 ifa_release(ifa, &psref); 490 } 491 pserialize_read_exit(s); 492 curlwp_bindx(bound); 493 KERNEL_UNLOCK_ONE(NULL); 494 } 495 496 /* 497 * process input packet. 498 * we have rearranged checks order compared to the rfc, 499 * but it seems more efficient this way or not possible otherwise. 500 */ 501 static void 502 _carp_proto_input(struct mbuf *m, int hlen, int proto) 503 { 504 struct ip *ip = mtod(m, struct ip *); 505 struct carp_softc *sc = NULL; 506 struct carp_header *ch; 507 int iplen, len; 508 struct ifnet *rcvif; 509 510 CARP_STATINC(CARP_STAT_IPACKETS); 511 MCLAIM(m, &carp_proto_mowner_rx); 512 513 if (!carp_opts[CARPCTL_ALLOW]) { 514 m_freem(m); 515 return; 516 } 517 518 rcvif = m_get_rcvif_NOMPSAFE(m); 519 /* check if received on a valid carp interface */ 520 if (rcvif->if_type != IFT_CARP) { 521 CARP_STATINC(CARP_STAT_BADIF); 522 CARP_LOG(sc, ("packet received on non-carp interface: %s", 523 rcvif->if_xname)); 524 m_freem(m); 525 return; 526 } 527 528 /* verify that the IP TTL is 255. */ 529 if (ip->ip_ttl != CARP_DFLTTL) { 530 CARP_STATINC(CARP_STAT_BADTTL); 531 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl, 532 CARP_DFLTTL, rcvif->if_xname)); 533 m_freem(m); 534 return; 535 } 536 537 /* 538 * verify that the received packet length is 539 * equal to the CARP header 540 */ 541 iplen = ip->ip_hl << 2; 542 len = iplen + sizeof(*ch); 543 if (len > m->m_pkthdr.len) { 544 CARP_STATINC(CARP_STAT_BADLEN); 545 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len, 546 rcvif->if_xname)); 547 m_freem(m); 548 return; 549 } 550 551 if ((m = m_pullup(m, len)) == NULL) { 552 CARP_STATINC(CARP_STAT_HDROPS); 553 return; 554 } 555 ip = mtod(m, struct ip *); 556 ch = (struct carp_header *)((char *)ip + iplen); 557 /* verify the CARP checksum */ 558 m->m_data += iplen; 559 if (carp_cksum(m, len - iplen)) { 560 CARP_STATINC(CARP_STAT_BADSUM); 561 CARP_LOG(sc, ("checksum failed on %s", 562 rcvif->if_xname)); 563 m_freem(m); 564 return; 565 } 566 m->m_data -= iplen; 567 568 carp_proto_input_c(m, ch, AF_INET); 569 } 570 571 void 572 carp_proto_input(struct mbuf *m, ...) 573 { 574 575 wqinput_input(carp_wqinput, m, 0, 0); 576 } 577 578 #ifdef INET6 579 static void 580 _carp6_proto_input(struct mbuf *m, int off, int proto) 581 { 582 struct carp_softc *sc = NULL; 583 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 584 struct carp_header *ch; 585 u_int len; 586 struct ifnet *rcvif; 587 588 CARP_STATINC(CARP_STAT_IPACKETS6); 589 MCLAIM(m, &carp_proto6_mowner_rx); 590 591 if (!carp_opts[CARPCTL_ALLOW]) { 592 m_freem(m); 593 return; 594 } 595 596 rcvif = m_get_rcvif_NOMPSAFE(m); 597 598 /* check if received on a valid carp interface */ 599 if (rcvif->if_type != IFT_CARP) { 600 CARP_STATINC(CARP_STAT_BADIF); 601 CARP_LOG(sc, ("packet received on non-carp interface: %s", 602 rcvif->if_xname)); 603 m_freem(m); 604 return; 605 } 606 607 /* verify that the IP TTL is 255 */ 608 if (ip6->ip6_hlim != CARP_DFLTTL) { 609 CARP_STATINC(CARP_STAT_BADTTL); 610 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, 611 CARP_DFLTTL, rcvif->if_xname)); 612 m_freem(m); 613 return; 614 } 615 616 /* verify that we have a complete carp packet */ 617 len = m->m_len; 618 IP6_EXTHDR_GET(ch, struct carp_header *, m, off, sizeof(*ch)); 619 if (ch == NULL) { 620 CARP_STATINC(CARP_STAT_BADLEN); 621 CARP_LOG(sc, ("packet size %u too small", len)); 622 return; 623 } 624 625 /* verify the CARP checksum */ 626 if (carp6_cksum(m, off, sizeof(*ch))) { 627 CARP_STATINC(CARP_STAT_BADSUM); 628 CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname)); 629 m_freem(m); 630 return; 631 } 632 633 carp_proto_input_c(m, ch, AF_INET6); 634 return; 635 } 636 637 int 638 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 639 { 640 641 wqinput_input(carp6_wqinput, *mp, *offp, proto); 642 643 return IPPROTO_DONE; 644 } 645 #endif /* INET6 */ 646 647 static void 648 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 649 { 650 struct carp_softc *sc; 651 u_int64_t tmp_counter; 652 struct timeval sc_tv, ch_tv; 653 654 TAILQ_FOREACH(sc, &((struct carp_if *) 655 m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list) 656 if (sc->sc_vhid == ch->carp_vhid) 657 break; 658 659 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 660 (IFF_UP|IFF_RUNNING)) { 661 CARP_STATINC(CARP_STAT_BADVHID); 662 m_freem(m); 663 return; 664 } 665 666 /* 667 * Check if our own advertisement was duplicated 668 * from a non simplex interface. 669 * XXX If there is no address on our physical interface 670 * there is no way to distinguish our ads from the ones 671 * another carp host might have sent us. 672 */ 673 if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) { 674 struct sockaddr sa; 675 struct ifaddr *ifa; 676 int s; 677 678 memset(&sa, 0, sizeof(sa)); 679 sa.sa_family = af; 680 s = pserialize_read_enter(); 681 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 682 683 if (ifa && af == AF_INET) { 684 struct ip *ip = mtod(m, struct ip *); 685 if (ip->ip_src.s_addr == 686 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 687 pserialize_read_exit(s); 688 m_freem(m); 689 return; 690 } 691 } 692 #ifdef INET6 693 if (ifa && af == AF_INET6) { 694 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 695 struct in6_addr in6_src, in6_found; 696 697 in6_src = ip6->ip6_src; 698 in6_found = ifatoia6(ifa)->ia_addr.sin6_addr; 699 if (IN6_IS_ADDR_LINKLOCAL(&in6_src)) 700 in6_src.s6_addr16[1] = 0; 701 if (IN6_IS_ADDR_LINKLOCAL(&in6_found)) 702 in6_found.s6_addr16[1] = 0; 703 if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) { 704 pserialize_read_exit(s); 705 m_freem(m); 706 return; 707 } 708 } 709 #endif /* INET6 */ 710 pserialize_read_exit(s); 711 } 712 713 nanotime(&sc->sc_if.if_lastchange); 714 sc->sc_if.if_ipackets++; 715 sc->sc_if.if_ibytes += m->m_pkthdr.len; 716 717 /* verify the CARP version. */ 718 if (ch->carp_version != CARP_VERSION) { 719 CARP_STATINC(CARP_STAT_BADVER); 720 sc->sc_if.if_ierrors++; 721 CARP_LOG(sc, ("invalid version %d != %d", 722 ch->carp_version, CARP_VERSION)); 723 m_freem(m); 724 return; 725 } 726 727 /* verify the hash */ 728 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 729 struct ip *ip; 730 char ipbuf[INET_ADDRSTRLEN]; 731 #ifdef INET6 732 struct ip6_hdr *ip6; 733 char ip6buf[INET6_ADDRSTRLEN]; 734 #endif 735 736 CARP_STATINC(CARP_STAT_BADAUTH); 737 sc->sc_if.if_ierrors++; 738 739 switch(af) { 740 case AF_INET: 741 ip = mtod(m, struct ip *); 742 CARP_LOG(sc, ("incorrect hash from %s", 743 IN_PRINT(ipbuf, &ip->ip_src))); 744 break; 745 746 #ifdef INET6 747 case AF_INET6: 748 ip6 = mtod(m, struct ip6_hdr *); 749 CARP_LOG(sc, ("incorrect hash from %s", 750 IN6_PRINT(ip6buf, &ip6->ip6_src))); 751 break; 752 #endif 753 754 default: CARP_LOG(sc, ("incorrect hash")); 755 break; 756 } 757 m_freem(m); 758 return; 759 } 760 761 tmp_counter = ntohl(ch->carp_counter[0]); 762 tmp_counter = tmp_counter<<32; 763 tmp_counter += ntohl(ch->carp_counter[1]); 764 765 /* XXX Replay protection goes here */ 766 767 sc->sc_init_counter = 0; 768 sc->sc_counter = tmp_counter; 769 770 771 sc_tv.tv_sec = sc->sc_advbase; 772 if (carp_suppress_preempt && sc->sc_advskew < 240) 773 sc_tv.tv_usec = 240 * 1000000 / 256; 774 else 775 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 776 ch_tv.tv_sec = ch->carp_advbase; 777 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 778 779 switch (sc->sc_state) { 780 case INIT: 781 break; 782 case MASTER: 783 /* 784 * If we receive an advertisement from a backup who's going to 785 * be more frequent than us, go into BACKUP state. 786 */ 787 if (timercmp(&sc_tv, &ch_tv, >) || 788 timercmp(&sc_tv, &ch_tv, ==)) { 789 callout_stop(&sc->sc_ad_tmo); 790 CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)")); 791 carp_set_state(sc, BACKUP); 792 carp_setrun(sc, 0); 793 carp_setroute(sc, RTM_DELETE); 794 } 795 break; 796 case BACKUP: 797 /* 798 * If we're pre-empting masters who advertise slower than us, 799 * and this one claims to be slower, treat him as down. 800 */ 801 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) { 802 CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)")); 803 carp_master_down(sc); 804 break; 805 } 806 807 /* 808 * If the master is going to advertise at such a low frequency 809 * that he's guaranteed to time out, we'd might as well just 810 * treat him as timed out now. 811 */ 812 sc_tv.tv_sec = sc->sc_advbase * 3; 813 if (timercmp(&sc_tv, &ch_tv, <)) { 814 CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)")); 815 carp_master_down(sc); 816 break; 817 } 818 819 /* 820 * Otherwise, we reset the counter and wait for the next 821 * advertisement. 822 */ 823 carp_setrun(sc, af); 824 break; 825 } 826 827 m_freem(m); 828 return; 829 } 830 831 /* 832 * Interface side of the CARP implementation. 833 */ 834 835 /* ARGSUSED */ 836 void 837 carpattach(int n) 838 { 839 if_clone_attach(&carp_cloner); 840 841 carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS); 842 } 843 844 static int 845 carp_clone_create(struct if_clone *ifc, int unit) 846 { 847 extern int ifqmaxlen; 848 struct carp_softc *sc; 849 struct ifnet *ifp; 850 int rv; 851 852 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 853 if (!sc) 854 return (ENOMEM); 855 856 sc->sc_suppress = 0; 857 sc->sc_advbase = CARP_DFLTINTV; 858 sc->sc_vhid = -1; /* required setting */ 859 sc->sc_advskew = 0; 860 sc->sc_init_counter = 1; 861 sc->sc_naddrs = sc->sc_naddrs6 = 0; 862 #ifdef INET6 863 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 864 #endif /* INET6 */ 865 866 callout_init(&sc->sc_ad_tmo, 0); 867 callout_init(&sc->sc_md_tmo, 0); 868 callout_init(&sc->sc_md6_tmo, 0); 869 870 callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc); 871 callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc); 872 callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc); 873 874 LIST_INIT(&sc->carp_mc_listhead); 875 ifp = &sc->sc_if; 876 ifp->if_softc = sc; 877 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 878 unit); 879 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 880 ifp->if_ioctl = carp_ioctl; 881 ifp->if_start = carp_start; 882 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 883 IFQ_SET_READY(&ifp->if_snd); 884 rv = if_initialize(ifp); 885 if (rv != 0) { 886 callout_destroy(&sc->sc_ad_tmo); 887 callout_destroy(&sc->sc_md_tmo); 888 callout_destroy(&sc->sc_md6_tmo); 889 free(ifp->if_softc, M_DEVBUF); 890 891 return rv; 892 } 893 ether_ifattach(ifp, NULL); 894 carp_set_enaddr(sc); 895 /* Overwrite ethernet defaults */ 896 ifp->if_type = IFT_CARP; 897 ifp->if_output = carp_output; 898 if_register(ifp); 899 900 return (0); 901 } 902 903 static int 904 carp_clone_destroy(struct ifnet *ifp) 905 { 906 struct carp_softc *sc = ifp->if_softc; 907 908 carpdetach(ifp->if_softc); 909 ether_ifdetach(ifp); 910 if_detach(ifp); 911 callout_destroy(&sc->sc_ad_tmo); 912 callout_destroy(&sc->sc_md_tmo); 913 callout_destroy(&sc->sc_md6_tmo); 914 free(ifp->if_softc, M_DEVBUF); 915 916 return (0); 917 } 918 919 static void 920 carpdetach(struct carp_softc *sc) 921 { 922 struct carp_if *cif; 923 int s; 924 925 callout_stop(&sc->sc_ad_tmo); 926 callout_stop(&sc->sc_md_tmo); 927 callout_stop(&sc->sc_md6_tmo); 928 929 if (sc->sc_suppress) 930 carp_suppress_preempt--; 931 sc->sc_suppress = 0; 932 933 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 934 carp_suppress_preempt--; 935 sc->sc_sendad_errors = 0; 936 937 carp_set_state(sc, INIT); 938 sc->sc_if.if_flags &= ~IFF_UP; 939 carp_setrun(sc, 0); 940 carp_multicast_cleanup(sc); 941 942 KERNEL_LOCK(1, NULL); 943 s = splnet(); 944 if (sc->sc_carpdev != NULL) { 945 /* XXX linkstatehook removal */ 946 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 947 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 948 if (!--cif->vhif_nvrs) { 949 ifpromisc(sc->sc_carpdev, 0); 950 sc->sc_carpdev->if_carp = NULL; 951 free(cif, M_IFADDR); 952 } 953 } 954 sc->sc_carpdev = NULL; 955 splx(s); 956 KERNEL_UNLOCK_ONE(NULL); 957 } 958 959 /* Detach an interface from the carp. */ 960 void 961 carp_ifdetach(struct ifnet *ifp) 962 { 963 struct carp_softc *sc, *nextsc; 964 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 965 966 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 967 nextsc = TAILQ_NEXT(sc, sc_list); 968 carpdetach(sc); 969 } 970 } 971 972 static int 973 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, 974 struct carp_header *ch) 975 { 976 if (sc->sc_init_counter) { 977 /* this could also be seconds since unix epoch */ 978 sc->sc_counter = cprng_fast64(); 979 } else 980 sc->sc_counter++; 981 982 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 983 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 984 985 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 986 987 return (0); 988 } 989 990 static void 991 carp_send_ad_all(void) 992 { 993 struct ifnet *ifp; 994 struct carp_if *cif; 995 struct carp_softc *vh; 996 int s; 997 int bound = curlwp_bind(); 998 999 s = pserialize_read_enter(); 1000 IFNET_READER_FOREACH(ifp) { 1001 struct psref psref; 1002 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 1003 continue; 1004 1005 if_acquire(ifp, &psref); 1006 pserialize_read_exit(s); 1007 1008 cif = (struct carp_if *)ifp->if_carp; 1009 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1010 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1011 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER) 1012 carp_send_ad(vh); 1013 } 1014 1015 s = pserialize_read_enter(); 1016 if_release(ifp, &psref); 1017 } 1018 pserialize_read_exit(s); 1019 curlwp_bindx(bound); 1020 } 1021 1022 1023 static void 1024 carp_send_ad(void *v) 1025 { 1026 struct carp_header ch; 1027 struct timeval tv; 1028 struct carp_softc *sc = v; 1029 struct carp_header *ch_ptr; 1030 struct mbuf *m; 1031 int error, len, advbase, advskew, s; 1032 struct sockaddr sa; 1033 1034 KERNEL_LOCK(1, NULL); 1035 s = splsoftnet(); 1036 1037 advbase = advskew = 0; /* Sssssh compiler */ 1038 if (sc->sc_carpdev == NULL) { 1039 sc->sc_if.if_oerrors++; 1040 goto retry_later; 1041 } 1042 1043 /* bow out if we've gone to backup (the carp interface is going down) */ 1044 if (sc->sc_bow_out) { 1045 sc->sc_bow_out = 0; 1046 advbase = 255; 1047 advskew = 255; 1048 } else { 1049 advbase = sc->sc_advbase; 1050 if (!carp_suppress_preempt || sc->sc_advskew > 240) 1051 advskew = sc->sc_advskew; 1052 else 1053 advskew = 240; 1054 tv.tv_sec = advbase; 1055 tv.tv_usec = advskew * 1000000 / 256; 1056 } 1057 1058 ch.carp_version = CARP_VERSION; 1059 ch.carp_type = CARP_ADVERTISEMENT; 1060 ch.carp_vhid = sc->sc_vhid; 1061 ch.carp_advbase = advbase; 1062 ch.carp_advskew = advskew; 1063 ch.carp_authlen = 7; /* XXX DEFINE */ 1064 ch.carp_pad1 = 0; /* must be zero */ 1065 ch.carp_cksum = 0; 1066 1067 1068 #ifdef INET 1069 if (sc->sc_naddrs) { 1070 struct ip *ip; 1071 struct ifaddr *ifa; 1072 int _s; 1073 1074 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1075 if (m == NULL) { 1076 sc->sc_if.if_oerrors++; 1077 CARP_STATINC(CARP_STAT_ONOMEM); 1078 /* XXX maybe less ? */ 1079 goto retry_later; 1080 } 1081 MCLAIM(m, &carp_proto_mowner_tx); 1082 len = sizeof(*ip) + sizeof(ch); 1083 m->m_pkthdr.len = len; 1084 m_reset_rcvif(m); 1085 m->m_len = len; 1086 MH_ALIGN(m, m->m_len); 1087 m->m_flags |= M_MCAST; 1088 ip = mtod(m, struct ip *); 1089 ip->ip_v = IPVERSION; 1090 ip->ip_hl = sizeof(*ip) >> 2; 1091 ip->ip_tos = IPTOS_LOWDELAY; 1092 ip->ip_len = htons(len); 1093 ip->ip_id = 0; /* no need for id, we don't support fragments */ 1094 ip->ip_off = htons(IP_DF); 1095 ip->ip_ttl = CARP_DFLTTL; 1096 ip->ip_p = IPPROTO_CARP; 1097 ip->ip_sum = 0; 1098 1099 memset(&sa, 0, sizeof(sa)); 1100 sa.sa_family = AF_INET; 1101 _s = pserialize_read_enter(); 1102 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1103 if (ifa == NULL) 1104 ip->ip_src.s_addr = 0; 1105 else 1106 ip->ip_src.s_addr = 1107 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1108 pserialize_read_exit(_s); 1109 ip->ip_dst.s_addr = INADDR_CARP_GROUP; 1110 1111 ch_ptr = (struct carp_header *)(&ip[1]); 1112 memcpy(ch_ptr, &ch, sizeof(ch)); 1113 if (carp_prepare_ad(m, sc, ch_ptr)) 1114 goto retry_later; 1115 1116 m->m_data += sizeof(*ip); 1117 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1118 m->m_data -= sizeof(*ip); 1119 1120 nanotime(&sc->sc_if.if_lastchange); 1121 sc->sc_if.if_opackets++; 1122 sc->sc_if.if_obytes += len; 1123 CARP_STATINC(CARP_STAT_OPACKETS); 1124 1125 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1126 NULL); 1127 if (error) { 1128 if (error == ENOBUFS) 1129 CARP_STATINC(CARP_STAT_ONOMEM); 1130 else 1131 CARP_LOG(sc, ("ip_output failed: %d", error)); 1132 sc->sc_if.if_oerrors++; 1133 if (sc->sc_sendad_errors < INT_MAX) 1134 sc->sc_sendad_errors++; 1135 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1136 carp_suppress_preempt++; 1137 if (carp_suppress_preempt == 1) 1138 carp_send_ad_all(); 1139 } 1140 sc->sc_sendad_success = 0; 1141 } else { 1142 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1143 if (++sc->sc_sendad_success >= 1144 CARP_SENDAD_MIN_SUCCESS) { 1145 carp_suppress_preempt--; 1146 sc->sc_sendad_errors = 0; 1147 } 1148 } else 1149 sc->sc_sendad_errors = 0; 1150 } 1151 } 1152 #endif /* INET */ 1153 #ifdef INET6 1154 if (sc->sc_naddrs6) { 1155 struct ip6_hdr *ip6; 1156 struct ifaddr *ifa; 1157 int _s; 1158 1159 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1160 if (m == NULL) { 1161 sc->sc_if.if_oerrors++; 1162 CARP_STATINC(CARP_STAT_ONOMEM); 1163 /* XXX maybe less ? */ 1164 goto retry_later; 1165 } 1166 MCLAIM(m, &carp_proto6_mowner_tx); 1167 len = sizeof(*ip6) + sizeof(ch); 1168 m->m_pkthdr.len = len; 1169 m_reset_rcvif(m); 1170 m->m_len = len; 1171 MH_ALIGN(m, m->m_len); 1172 m->m_flags |= M_MCAST; 1173 ip6 = mtod(m, struct ip6_hdr *); 1174 memset(ip6, 0, sizeof(*ip6)); 1175 ip6->ip6_vfc |= IPV6_VERSION; 1176 ip6->ip6_hlim = CARP_DFLTTL; 1177 ip6->ip6_nxt = IPPROTO_CARP; 1178 1179 /* set the source address */ 1180 memset(&sa, 0, sizeof(sa)); 1181 sa.sa_family = AF_INET6; 1182 _s = pserialize_read_enter(); 1183 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1184 if (ifa == NULL) /* This should never happen with IPv6 */ 1185 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr)); 1186 else 1187 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1188 &ip6->ip6_src, sizeof(struct in6_addr)); 1189 pserialize_read_exit(_s); 1190 /* set the multicast destination */ 1191 1192 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1193 ip6->ip6_dst.s6_addr8[15] = 0x12; 1194 if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) { 1195 sc->sc_if.if_oerrors++; 1196 m_freem(m); 1197 CARP_LOG(sc, ("in6_setscope failed")); 1198 goto retry_later; 1199 } 1200 1201 ch_ptr = (struct carp_header *)(&ip6[1]); 1202 memcpy(ch_ptr, &ch, sizeof(ch)); 1203 if (carp_prepare_ad(m, sc, ch_ptr)) 1204 goto retry_later; 1205 1206 ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6), 1207 len - sizeof(*ip6)); 1208 1209 nanotime(&sc->sc_if.if_lastchange); 1210 sc->sc_if.if_opackets++; 1211 sc->sc_if.if_obytes += len; 1212 CARP_STATINC(CARP_STAT_OPACKETS6); 1213 1214 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1215 if (error) { 1216 if (error == ENOBUFS) 1217 CARP_STATINC(CARP_STAT_ONOMEM); 1218 else 1219 CARP_LOG(sc, ("ip6_output failed: %d", error)); 1220 sc->sc_if.if_oerrors++; 1221 if (sc->sc_sendad_errors < INT_MAX) 1222 sc->sc_sendad_errors++; 1223 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1224 carp_suppress_preempt++; 1225 if (carp_suppress_preempt == 1) 1226 carp_send_ad_all(); 1227 } 1228 sc->sc_sendad_success = 0; 1229 } else { 1230 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1231 if (++sc->sc_sendad_success >= 1232 CARP_SENDAD_MIN_SUCCESS) { 1233 carp_suppress_preempt--; 1234 sc->sc_sendad_errors = 0; 1235 } 1236 } else 1237 sc->sc_sendad_errors = 0; 1238 } 1239 } 1240 #endif /* INET6 */ 1241 1242 retry_later: 1243 splx(s); 1244 KERNEL_UNLOCK_ONE(NULL); 1245 if (advbase != 255 || advskew != 255) 1246 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1247 } 1248 1249 /* 1250 * Broadcast a gratuitous ARP request containing 1251 * the virtual router MAC address for each IP address 1252 * associated with the virtual router. 1253 */ 1254 static void 1255 carp_send_arp(struct carp_softc *sc) 1256 { 1257 struct ifaddr *ifa; 1258 int s, bound; 1259 1260 KERNEL_LOCK(1, NULL); 1261 bound = curlwp_bind(); 1262 s = pserialize_read_enter(); 1263 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1264 struct psref psref; 1265 1266 if (ifa->ifa_addr->sa_family != AF_INET) 1267 continue; 1268 1269 ifa_acquire(ifa, &psref); 1270 pserialize_read_exit(s); 1271 1272 arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl)); 1273 1274 s = pserialize_read_enter(); 1275 ifa_release(ifa, &psref); 1276 } 1277 pserialize_read_exit(s); 1278 curlwp_bindx(bound); 1279 KERNEL_UNLOCK_ONE(NULL); 1280 } 1281 1282 #ifdef INET6 1283 static void 1284 carp_send_na(struct carp_softc *sc) 1285 { 1286 struct ifaddr *ifa; 1287 struct in6_addr *in6; 1288 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1289 int s, bound; 1290 1291 KERNEL_LOCK(1, NULL); 1292 bound = curlwp_bind(); 1293 s = pserialize_read_enter(); 1294 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1295 struct psref psref; 1296 1297 if (ifa->ifa_addr->sa_family != AF_INET6) 1298 continue; 1299 1300 ifa_acquire(ifa, &psref); 1301 pserialize_read_exit(s); 1302 1303 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1304 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1305 ND_NA_FLAG_OVERRIDE, 1, NULL); 1306 1307 s = pserialize_read_enter(); 1308 ifa_release(ifa, &psref); 1309 } 1310 pserialize_read_exit(s); 1311 curlwp_bindx(bound); 1312 KERNEL_UNLOCK_ONE(NULL); 1313 } 1314 #endif /* INET6 */ 1315 1316 /* 1317 * Based on bridge_hash() in if_bridge.c 1318 */ 1319 #define mix(a,b,c) \ 1320 do { \ 1321 a -= b; a -= c; a ^= (c >> 13); \ 1322 b -= c; b -= a; b ^= (a << 8); \ 1323 c -= a; c -= b; c ^= (b >> 13); \ 1324 a -= b; a -= c; a ^= (c >> 12); \ 1325 b -= c; b -= a; b ^= (a << 16); \ 1326 c -= a; c -= b; c ^= (b >> 5); \ 1327 a -= b; a -= c; a ^= (c >> 3); \ 1328 b -= c; b -= a; b ^= (a << 10); \ 1329 c -= a; c -= b; c ^= (b >> 15); \ 1330 } while (0) 1331 1332 static u_int32_t 1333 carp_hash(struct carp_softc *sc, u_char *src) 1334 { 1335 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1336 1337 c += sc->sc_key[3] << 24; 1338 c += sc->sc_key[2] << 16; 1339 c += sc->sc_key[1] << 8; 1340 c += sc->sc_key[0]; 1341 b += src[5] << 8; 1342 b += src[4]; 1343 a += src[3] << 24; 1344 a += src[2] << 16; 1345 a += src[1] << 8; 1346 a += src[0]; 1347 1348 mix(a, b, c); 1349 return (c); 1350 } 1351 1352 static int 1353 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1354 { 1355 struct carp_softc *vh; 1356 struct ifaddr *ifa; 1357 int count = 0; 1358 1359 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1360 if ((type == CARP_COUNT_RUNNING && 1361 (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1362 (IFF_UP|IFF_RUNNING)) || 1363 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1364 int s = pserialize_read_enter(); 1365 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1366 if (ifa->ifa_addr->sa_family == AF_INET && 1367 ia->ia_addr.sin_addr.s_addr == 1368 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1369 count++; 1370 } 1371 pserialize_read_exit(s); 1372 } 1373 } 1374 return (count); 1375 } 1376 1377 int 1378 carp_iamatch(struct in_ifaddr *ia, u_char *src, 1379 u_int32_t *count, u_int32_t index) 1380 { 1381 struct carp_softc *sc = ia->ia_ifp->if_softc; 1382 1383 if (carp_opts[CARPCTL_ARPBALANCE]) { 1384 /* 1385 * We use the source ip to decide which virtual host should 1386 * handle the request. If we're master of that virtual host, 1387 * then we respond, otherwise, just drop the arp packet on 1388 * the floor. 1389 */ 1390 1391 /* Count the elegible carp interfaces with this address */ 1392 if (*count == 0) 1393 *count = carp_addrcount( 1394 (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp, 1395 ia, CARP_COUNT_RUNNING); 1396 1397 /* This should never happen, but... */ 1398 if (*count == 0) 1399 return (0); 1400 1401 if (carp_hash(sc, src) % *count == index - 1 && 1402 sc->sc_state == MASTER) { 1403 return (1); 1404 } 1405 } else { 1406 if (sc->sc_state == MASTER) 1407 return (1); 1408 } 1409 1410 return (0); 1411 } 1412 1413 #ifdef INET6 1414 struct ifaddr * 1415 carp_iamatch6(void *v, struct in6_addr *taddr) 1416 { 1417 struct carp_if *cif = v; 1418 struct carp_softc *vh; 1419 struct ifaddr *ifa; 1420 1421 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1422 int s = pserialize_read_enter(); 1423 IFADDR_READER_FOREACH(ifa, &vh->sc_if) { 1424 if (IN6_ARE_ADDR_EQUAL(taddr, 1425 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1426 ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1427 (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER) 1428 return (ifa); 1429 } 1430 pserialize_read_exit(s); 1431 } 1432 1433 return (NULL); 1434 } 1435 #endif /* INET6 */ 1436 1437 struct ifnet * 1438 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src) 1439 { 1440 struct carp_if *cif = (struct carp_if *)v; 1441 struct carp_softc *vh; 1442 u_int8_t *ena; 1443 1444 if (src) 1445 ena = (u_int8_t *)&eh->ether_shost; 1446 else 1447 ena = (u_int8_t *)&eh->ether_dhost; 1448 1449 switch (iftype) { 1450 case IFT_ETHER: 1451 case IFT_FDDI: 1452 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1453 return (NULL); 1454 break; 1455 case IFT_ISO88025: 1456 if (ena[0] != 3 || ena[1] || ena[4] || ena[5]) 1457 return (NULL); 1458 break; 1459 default: 1460 return (NULL); 1461 break; 1462 } 1463 1464 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1465 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1466 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && 1467 !memcmp(ena, CLLADDR(vh->sc_if.if_sadl), 1468 ETHER_ADDR_LEN)) { 1469 return (&vh->sc_if); 1470 } 1471 1472 return (NULL); 1473 } 1474 1475 int 1476 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) 1477 { 1478 struct ether_header eh; 1479 struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp; 1480 struct ifnet *ifp; 1481 1482 memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost)); 1483 memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost)); 1484 eh.ether_type = etype; 1485 1486 if (m->m_flags & (M_BCAST|M_MCAST)) { 1487 struct carp_softc *vh; 1488 struct mbuf *m0; 1489 1490 /* 1491 * XXX Should really check the list of multicast addresses 1492 * for each CARP interface _before_ copying. 1493 */ 1494 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1495 m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT); 1496 if (m0 == NULL) 1497 continue; 1498 m_set_rcvif(m0, &vh->sc_if); 1499 ether_input(&vh->sc_if, m0); 1500 } 1501 return (1); 1502 } 1503 1504 ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0); 1505 if (ifp == NULL) { 1506 return (1); 1507 } 1508 1509 m_set_rcvif(m, ifp); 1510 1511 bpf_mtap(ifp, m); 1512 ifp->if_ipackets++; 1513 ether_input(ifp, m); 1514 return (0); 1515 } 1516 1517 static void 1518 carp_master_down(void *v) 1519 { 1520 struct carp_softc *sc = v; 1521 1522 switch (sc->sc_state) { 1523 case INIT: 1524 printf("%s: master_down event in INIT state\n", 1525 sc->sc_if.if_xname); 1526 break; 1527 case MASTER: 1528 break; 1529 case BACKUP: 1530 CARP_LOG(sc, ("INIT -> MASTER (preempting)")); 1531 carp_set_state(sc, MASTER); 1532 carp_send_ad(sc); 1533 carp_send_arp(sc); 1534 #ifdef INET6 1535 carp_send_na(sc); 1536 #endif /* INET6 */ 1537 carp_setrun(sc, 0); 1538 carp_setroute(sc, RTM_ADD); 1539 break; 1540 } 1541 } 1542 1543 /* 1544 * When in backup state, af indicates whether to reset the master down timer 1545 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1546 */ 1547 static void 1548 carp_setrun(struct carp_softc *sc, sa_family_t af) 1549 { 1550 struct timeval tv; 1551 1552 if (sc->sc_carpdev == NULL) { 1553 sc->sc_if.if_flags &= ~IFF_RUNNING; 1554 carp_set_state(sc, INIT); 1555 return; 1556 } 1557 1558 if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 && 1559 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1560 sc->sc_if.if_flags |= IFF_RUNNING; 1561 } else { 1562 sc->sc_if.if_flags &= ~IFF_RUNNING; 1563 carp_setroute(sc, RTM_DELETE); 1564 return; 1565 } 1566 1567 switch (sc->sc_state) { 1568 case INIT: 1569 carp_set_state(sc, BACKUP); 1570 carp_setroute(sc, RTM_DELETE); 1571 carp_setrun(sc, 0); 1572 break; 1573 case BACKUP: 1574 callout_stop(&sc->sc_ad_tmo); 1575 tv.tv_sec = 3 * sc->sc_advbase; 1576 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1577 switch (af) { 1578 #ifdef INET 1579 case AF_INET: 1580 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1581 break; 1582 #endif /* INET */ 1583 #ifdef INET6 1584 case AF_INET6: 1585 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1586 break; 1587 #endif /* INET6 */ 1588 default: 1589 if (sc->sc_naddrs) 1590 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv)); 1591 #ifdef INET6 1592 if (sc->sc_naddrs6) 1593 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv)); 1594 #endif /* INET6 */ 1595 break; 1596 } 1597 break; 1598 case MASTER: 1599 tv.tv_sec = sc->sc_advbase; 1600 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1601 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv)); 1602 break; 1603 } 1604 } 1605 1606 static void 1607 carp_multicast_cleanup(struct carp_softc *sc) 1608 { 1609 struct ip_moptions *imo = &sc->sc_imo; 1610 #ifdef INET6 1611 struct ip6_moptions *im6o = &sc->sc_im6o; 1612 #endif 1613 u_int16_t n = imo->imo_num_memberships; 1614 1615 /* Clean up our own multicast memberships */ 1616 while (n-- > 0) { 1617 if (imo->imo_membership[n] != NULL) { 1618 in_delmulti(imo->imo_membership[n]); 1619 imo->imo_membership[n] = NULL; 1620 } 1621 } 1622 imo->imo_num_memberships = 0; 1623 imo->imo_multicast_if_index = 0; 1624 1625 #ifdef INET6 1626 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1627 struct in6_multi_mship *imm = 1628 LIST_FIRST(&im6o->im6o_memberships); 1629 1630 LIST_REMOVE(imm, i6mm_chain); 1631 in6_leavegroup(imm); 1632 } 1633 im6o->im6o_multicast_if_index = 0; 1634 #endif 1635 1636 /* And any other multicast memberships */ 1637 carp_ether_purgemulti(sc); 1638 } 1639 1640 static int 1641 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1642 { 1643 struct carp_if *cif, *ncif = NULL; 1644 struct carp_softc *vr, *after = NULL; 1645 int myself = 0, error = 0; 1646 int s; 1647 1648 if (ifp == sc->sc_carpdev) 1649 return (0); 1650 1651 if (ifp != NULL) { 1652 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1653 return (EADDRNOTAVAIL); 1654 1655 if (ifp->if_type == IFT_CARP) 1656 return (EINVAL); 1657 1658 if (ifp->if_carp == NULL) { 1659 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT); 1660 if (ncif == NULL) 1661 return (ENOBUFS); 1662 if ((error = ifpromisc(ifp, 1))) { 1663 free(ncif, M_IFADDR); 1664 return (error); 1665 } 1666 1667 ncif->vhif_ifp = ifp; 1668 TAILQ_INIT(&ncif->vhif_vrs); 1669 } else { 1670 cif = (struct carp_if *)ifp->if_carp; 1671 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1672 if (vr != sc && vr->sc_vhid == sc->sc_vhid) 1673 return (EINVAL); 1674 } 1675 1676 /* detach from old interface */ 1677 if (sc->sc_carpdev != NULL) 1678 carpdetach(sc); 1679 1680 /* join multicast groups */ 1681 if (sc->sc_naddrs < 0 && 1682 (error = carp_join_multicast(sc)) != 0) { 1683 if (ncif != NULL) 1684 free(ncif, M_IFADDR); 1685 return (error); 1686 } 1687 1688 #ifdef INET6 1689 if (sc->sc_naddrs6 < 0 && 1690 (error = carp_join_multicast6(sc)) != 0) { 1691 if (ncif != NULL) 1692 free(ncif, M_IFADDR); 1693 carp_multicast_cleanup(sc); 1694 return (error); 1695 } 1696 #endif 1697 1698 /* attach carp interface to physical interface */ 1699 if (ncif != NULL) 1700 ifp->if_carp = (void *)ncif; 1701 sc->sc_carpdev = ifp; 1702 sc->sc_if.if_capabilities = ifp->if_capabilities & 1703 (IFCAP_TSOv4 | IFCAP_TSOv6 | 1704 IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx| 1705 IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx| 1706 IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx| 1707 IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx| 1708 IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx); 1709 1710 cif = (struct carp_if *)ifp->if_carp; 1711 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1712 if (vr == sc) 1713 myself = 1; 1714 if (vr->sc_vhid < sc->sc_vhid) 1715 after = vr; 1716 } 1717 1718 if (!myself) { 1719 /* We're trying to keep things in order */ 1720 if (after == NULL) { 1721 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1722 } else { 1723 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1724 sc, sc_list); 1725 } 1726 cif->vhif_nvrs++; 1727 } 1728 if (sc->sc_naddrs || sc->sc_naddrs6) 1729 sc->sc_if.if_flags |= IFF_UP; 1730 carp_set_enaddr(sc); 1731 KERNEL_LOCK(1, NULL); 1732 s = splnet(); 1733 /* XXX linkstatehooks establish */ 1734 carp_carpdev_state(ifp); 1735 splx(s); 1736 KERNEL_UNLOCK_ONE(NULL); 1737 } else { 1738 carpdetach(sc); 1739 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1740 } 1741 return (0); 1742 } 1743 1744 static void 1745 carp_set_enaddr(struct carp_softc *sc) 1746 { 1747 uint8_t enaddr[ETHER_ADDR_LEN]; 1748 if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) { 1749 enaddr[0] = 3; 1750 enaddr[1] = 0; 1751 enaddr[2] = 0x40 >> (sc->sc_vhid - 1); 1752 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1); 1753 enaddr[4] = 0; 1754 enaddr[5] = 0; 1755 } else { 1756 enaddr[0] = 0; 1757 enaddr[1] = 0; 1758 enaddr[2] = 0x5e; 1759 enaddr[3] = 0; 1760 enaddr[4] = 1; 1761 enaddr[5] = sc->sc_vhid; 1762 } 1763 if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false); 1764 } 1765 1766 #if 0 1767 static void 1768 carp_addr_updated(void *v) 1769 { 1770 struct carp_softc *sc = (struct carp_softc *) v; 1771 struct ifaddr *ifa; 1772 int new_naddrs = 0, new_naddrs6 = 0; 1773 1774 IFADDR_READER_FOREACH(ifa, &sc->sc_if) { 1775 if (ifa->ifa_addr->sa_family == AF_INET) 1776 new_naddrs++; 1777 else if (ifa->ifa_addr->sa_family == AF_INET6) 1778 new_naddrs6++; 1779 } 1780 1781 /* Handle a callback after SIOCDIFADDR */ 1782 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) { 1783 struct in_addr mc_addr; 1784 1785 sc->sc_naddrs = new_naddrs; 1786 sc->sc_naddrs6 = new_naddrs6; 1787 1788 /* Re-establish multicast membership removed by in_control */ 1789 mc_addr.s_addr = INADDR_CARP_GROUP; 1790 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) { 1791 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1792 1793 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1794 carp_join_multicast(sc); 1795 } 1796 1797 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1798 sc->sc_if.if_flags &= ~IFF_UP; 1799 carp_set_state(sc, INIT); 1800 } else 1801 carp_hmac_prepare(sc); 1802 } 1803 1804 carp_setrun(sc, 0); 1805 } 1806 #endif 1807 1808 static int 1809 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1810 { 1811 struct ifnet *ifp = sc->sc_carpdev; 1812 struct in_ifaddr *ia, *ia_if; 1813 int error = 0; 1814 int s; 1815 1816 if (sin->sin_addr.s_addr == 0) { 1817 if (!(sc->sc_if.if_flags & IFF_UP)) 1818 carp_set_state(sc, INIT); 1819 if (sc->sc_naddrs) 1820 sc->sc_if.if_flags |= IFF_UP; 1821 carp_setrun(sc, 0); 1822 return (0); 1823 } 1824 1825 /* we have to do this by hand to ensure we don't match on ourselves */ 1826 ia_if = NULL; 1827 s = pserialize_read_enter(); 1828 IN_ADDRLIST_READER_FOREACH(ia) { 1829 /* and, yeah, we need a multicast-capable iface too */ 1830 if (ia->ia_ifp != &sc->sc_if && 1831 ia->ia_ifp->if_type != IFT_CARP && 1832 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1833 (sin->sin_addr.s_addr & ia->ia_subnetmask) == 1834 ia->ia_subnet) { 1835 if (!ia_if) 1836 ia_if = ia; 1837 } 1838 } 1839 1840 if (ia_if) { 1841 ia = ia_if; 1842 if (ifp) { 1843 if (ifp != ia->ia_ifp) 1844 return (EADDRNOTAVAIL); 1845 } else { 1846 /* FIXME NOMPSAFE */ 1847 ifp = ia->ia_ifp; 1848 } 1849 } 1850 pserialize_read_exit(s); 1851 1852 if ((error = carp_set_ifp(sc, ifp))) 1853 return (error); 1854 1855 if (sc->sc_carpdev == NULL) 1856 return (EADDRNOTAVAIL); 1857 1858 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1859 return (error); 1860 1861 sc->sc_naddrs++; 1862 if (sc->sc_carpdev != NULL) 1863 sc->sc_if.if_flags |= IFF_UP; 1864 1865 carp_set_state(sc, INIT); 1866 carp_setrun(sc, 0); 1867 1868 /* 1869 * Hook if_addrhooks so that we get a callback after in_ifinit has run, 1870 * to correct any inappropriate routes that it inserted. 1871 */ 1872 if (sc->ah_cookie == 0) { 1873 /* XXX link address hook */ 1874 } 1875 1876 return (0); 1877 } 1878 1879 static int 1880 carp_join_multicast(struct carp_softc *sc) 1881 { 1882 struct ip_moptions *imo = &sc->sc_imo, tmpimo; 1883 struct in_addr addr; 1884 1885 memset(&tmpimo, 0, sizeof(tmpimo)); 1886 addr.s_addr = INADDR_CARP_GROUP; 1887 if ((tmpimo.imo_membership[0] = 1888 in_addmulti(&addr, &sc->sc_if)) == NULL) { 1889 return (ENOBUFS); 1890 } 1891 1892 imo->imo_membership[0] = tmpimo.imo_membership[0]; 1893 imo->imo_num_memberships = 1; 1894 imo->imo_multicast_if_index = sc->sc_if.if_index; 1895 imo->imo_multicast_ttl = CARP_DFLTTL; 1896 imo->imo_multicast_loop = 0; 1897 return (0); 1898 } 1899 1900 1901 #ifdef INET6 1902 static int 1903 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1904 { 1905 struct ifnet *ifp = sc->sc_carpdev; 1906 struct in6_ifaddr *ia, *ia_if; 1907 int error = 0; 1908 int s; 1909 1910 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1911 if (!(sc->sc_if.if_flags & IFF_UP)) 1912 carp_set_state(sc, INIT); 1913 if (sc->sc_naddrs6) 1914 sc->sc_if.if_flags |= IFF_UP; 1915 carp_setrun(sc, 0); 1916 return (0); 1917 } 1918 1919 /* we have to do this by hand to ensure we don't match on ourselves */ 1920 ia_if = NULL; 1921 s = pserialize_read_enter(); 1922 IN6_ADDRLIST_READER_FOREACH(ia) { 1923 int i; 1924 1925 for (i = 0; i < 4; i++) { 1926 if ((sin6->sin6_addr.s6_addr32[i] & 1927 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1928 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1929 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1930 break; 1931 } 1932 /* and, yeah, we need a multicast-capable iface too */ 1933 if (ia->ia_ifp != &sc->sc_if && 1934 ia->ia_ifp->if_type != IFT_CARP && 1935 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1936 (i == 4)) { 1937 if (!ia_if) 1938 ia_if = ia; 1939 } 1940 } 1941 pserialize_read_exit(s); 1942 1943 if (ia_if) { 1944 ia = ia_if; 1945 if (sc->sc_carpdev) { 1946 if (sc->sc_carpdev != ia->ia_ifp) 1947 return (EADDRNOTAVAIL); 1948 } else { 1949 ifp = ia->ia_ifp; 1950 } 1951 } 1952 1953 if ((error = carp_set_ifp(sc, ifp))) 1954 return (error); 1955 1956 if (sc->sc_carpdev == NULL) 1957 return (EADDRNOTAVAIL); 1958 1959 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1960 return (error); 1961 1962 sc->sc_naddrs6++; 1963 if (sc->sc_carpdev != NULL) 1964 sc->sc_if.if_flags |= IFF_UP; 1965 carp_set_state(sc, INIT); 1966 carp_setrun(sc, 0); 1967 1968 return (0); 1969 } 1970 1971 static int 1972 carp_join_multicast6(struct carp_softc *sc) 1973 { 1974 struct in6_multi_mship *imm, *imm2; 1975 struct ip6_moptions *im6o = &sc->sc_im6o; 1976 struct sockaddr_in6 addr6; 1977 int error; 1978 1979 /* Join IPv6 CARP multicast group */ 1980 memset(&addr6, 0, sizeof(addr6)); 1981 addr6.sin6_family = AF_INET6; 1982 addr6.sin6_len = sizeof(addr6); 1983 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1984 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1985 addr6.sin6_addr.s6_addr8[15] = 0x12; 1986 if ((imm = in6_joingroup(&sc->sc_if, 1987 &addr6.sin6_addr, &error, 0)) == NULL) { 1988 return (error); 1989 } 1990 /* join solicited multicast address */ 1991 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1992 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1993 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1994 addr6.sin6_addr.s6_addr32[1] = 0; 1995 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1996 addr6.sin6_addr.s6_addr32[3] = 0; 1997 addr6.sin6_addr.s6_addr8[12] = 0xff; 1998 if ((imm2 = in6_joingroup(&sc->sc_if, 1999 &addr6.sin6_addr, &error, 0)) == NULL) { 2000 in6_leavegroup(imm); 2001 return (error); 2002 } 2003 2004 /* apply v6 multicast membership */ 2005 im6o->im6o_multicast_if_index = sc->sc_if.if_index; 2006 if (imm) 2007 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2008 i6mm_chain); 2009 if (imm2) 2010 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2011 i6mm_chain); 2012 2013 return (0); 2014 } 2015 2016 #endif /* INET6 */ 2017 2018 static int 2019 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data) 2020 { 2021 struct lwp *l = curlwp; /* XXX */ 2022 struct carp_softc *sc = ifp->if_softc, *vr; 2023 struct carpreq carpr; 2024 struct ifaddr *ifa; 2025 struct ifreq *ifr; 2026 struct ifnet *cdev = NULL; 2027 int error = 0; 2028 2029 ifa = (struct ifaddr *)data; 2030 ifr = (struct ifreq *)data; 2031 2032 switch (cmd) { 2033 case SIOCINITIFADDR: 2034 switch (ifa->ifa_addr->sa_family) { 2035 #ifdef INET 2036 case AF_INET: 2037 sc->sc_if.if_flags |= IFF_UP; 2038 memcpy(ifa->ifa_dstaddr, ifa->ifa_addr, 2039 sizeof(struct sockaddr)); 2040 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2041 break; 2042 #endif /* INET */ 2043 #ifdef INET6 2044 case AF_INET6: 2045 sc->sc_if.if_flags|= IFF_UP; 2046 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2047 break; 2048 #endif /* INET6 */ 2049 default: 2050 error = EAFNOSUPPORT; 2051 break; 2052 } 2053 break; 2054 2055 case SIOCSIFFLAGS: 2056 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 2057 break; 2058 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2059 callout_stop(&sc->sc_ad_tmo); 2060 callout_stop(&sc->sc_md_tmo); 2061 callout_stop(&sc->sc_md6_tmo); 2062 if (sc->sc_state == MASTER) { 2063 /* we need the interface up to bow out */ 2064 sc->sc_if.if_flags |= IFF_UP; 2065 sc->sc_bow_out = 1; 2066 carp_send_ad(sc); 2067 } 2068 sc->sc_if.if_flags &= ~IFF_UP; 2069 carp_set_state(sc, INIT); 2070 carp_setrun(sc, 0); 2071 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 2072 sc->sc_if.if_flags |= IFF_UP; 2073 carp_setrun(sc, 0); 2074 } 2075 break; 2076 2077 case SIOCSVH: 2078 if (l == NULL) 2079 break; 2080 if ((error = kauth_authorize_network(l->l_cred, 2081 KAUTH_NETWORK_INTERFACE, 2082 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2083 NULL)) != 0) 2084 break; 2085 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2086 break; 2087 error = 1; 2088 if (carpr.carpr_carpdev[0] != '\0' && 2089 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2090 return (EINVAL); 2091 if ((error = carp_set_ifp(sc, cdev))) 2092 return (error); 2093 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 2094 switch (carpr.carpr_state) { 2095 case BACKUP: 2096 callout_stop(&sc->sc_ad_tmo); 2097 carp_set_state(sc, BACKUP); 2098 carp_setrun(sc, 0); 2099 carp_setroute(sc, RTM_DELETE); 2100 break; 2101 case MASTER: 2102 carp_master_down(sc); 2103 break; 2104 default: 2105 break; 2106 } 2107 } 2108 if (carpr.carpr_vhid > 0) { 2109 if (carpr.carpr_vhid > 255) { 2110 error = EINVAL; 2111 break; 2112 } 2113 if (sc->sc_carpdev) { 2114 struct carp_if *cif; 2115 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2116 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 2117 if (vr != sc && 2118 vr->sc_vhid == carpr.carpr_vhid) 2119 return (EINVAL); 2120 } 2121 sc->sc_vhid = carpr.carpr_vhid; 2122 carp_set_enaddr(sc); 2123 carp_set_state(sc, INIT); 2124 error--; 2125 } 2126 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 2127 if (carpr.carpr_advskew > 254) { 2128 error = EINVAL; 2129 break; 2130 } 2131 if (carpr.carpr_advbase > 255) { 2132 error = EINVAL; 2133 break; 2134 } 2135 sc->sc_advbase = carpr.carpr_advbase; 2136 sc->sc_advskew = carpr.carpr_advskew; 2137 error--; 2138 } 2139 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key)); 2140 if (error > 0) 2141 error = EINVAL; 2142 else { 2143 error = 0; 2144 carp_setrun(sc, 0); 2145 } 2146 break; 2147 2148 case SIOCGVH: 2149 memset(&carpr, 0, sizeof(carpr)); 2150 if (sc->sc_carpdev != NULL) 2151 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2152 IFNAMSIZ); 2153 carpr.carpr_state = sc->sc_state; 2154 carpr.carpr_vhid = sc->sc_vhid; 2155 carpr.carpr_advbase = sc->sc_advbase; 2156 carpr.carpr_advskew = sc->sc_advskew; 2157 2158 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred, 2159 KAUTH_NETWORK_INTERFACE, 2160 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2161 NULL)) == 0) 2162 memcpy(carpr.carpr_key, sc->sc_key, 2163 sizeof(carpr.carpr_key)); 2164 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2165 break; 2166 2167 case SIOCADDMULTI: 2168 error = carp_ether_addmulti(sc, ifr); 2169 break; 2170 2171 case SIOCDELMULTI: 2172 error = carp_ether_delmulti(sc, ifr); 2173 break; 2174 2175 case SIOCSIFCAP: 2176 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 2177 error = 0; 2178 break; 2179 2180 default: 2181 error = ether_ioctl(ifp, cmd, data); 2182 } 2183 2184 carp_hmac_prepare(sc); 2185 return (error); 2186 } 2187 2188 2189 /* 2190 * Start output on carp interface. This function should never be called. 2191 */ 2192 static void 2193 carp_start(struct ifnet *ifp) 2194 { 2195 #ifdef DEBUG 2196 printf("%s: start called\n", ifp->if_xname); 2197 #endif 2198 } 2199 2200 int 2201 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa, 2202 const struct rtentry *rt) 2203 { 2204 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2205 KASSERT(KERNEL_LOCKED_P()); 2206 2207 if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) { 2208 return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt); 2209 } else { 2210 m_freem(m); 2211 return (ENETUNREACH); 2212 } 2213 } 2214 2215 static void 2216 carp_set_state(struct carp_softc *sc, int state) 2217 { 2218 static const char *carp_states[] = { CARP_STATES }; 2219 int link_state; 2220 2221 if (sc->sc_state == state) 2222 return; 2223 2224 CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state])); 2225 2226 sc->sc_state = state; 2227 switch (state) { 2228 case BACKUP: 2229 link_state = LINK_STATE_DOWN; 2230 break; 2231 case MASTER: 2232 link_state = LINK_STATE_UP; 2233 break; 2234 default: 2235 link_state = LINK_STATE_UNKNOWN; 2236 break; 2237 } 2238 if_link_state_change_softint(&sc->sc_if, link_state); 2239 } 2240 2241 void 2242 carp_carpdev_state(void *v) 2243 { 2244 struct carp_if *cif; 2245 struct carp_softc *sc; 2246 struct ifnet *ifp = v; 2247 2248 if (ifp->if_type == IFT_CARP) 2249 return; 2250 2251 cif = (struct carp_if *)ifp->if_carp; 2252 2253 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2254 int suppressed = sc->sc_suppress; 2255 2256 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2257 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2258 sc->sc_if.if_flags &= ~IFF_RUNNING; 2259 callout_stop(&sc->sc_ad_tmo); 2260 callout_stop(&sc->sc_md_tmo); 2261 callout_stop(&sc->sc_md6_tmo); 2262 carp_set_state(sc, INIT); 2263 sc->sc_suppress = 1; 2264 carp_setrun(sc, 0); 2265 if (!suppressed) { 2266 carp_suppress_preempt++; 2267 if (carp_suppress_preempt == 1) 2268 carp_send_ad_all(); 2269 } 2270 } else { 2271 carp_set_state(sc, INIT); 2272 sc->sc_suppress = 0; 2273 carp_setrun(sc, 0); 2274 if (suppressed) 2275 carp_suppress_preempt--; 2276 } 2277 } 2278 } 2279 2280 static int 2281 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2282 { 2283 const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr); 2284 struct ifnet *ifp; 2285 struct carp_mc_entry *mc; 2286 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2287 int error; 2288 2289 ifp = sc->sc_carpdev; 2290 if (ifp == NULL) 2291 return (EINVAL); 2292 2293 error = ether_addmulti(sa, &sc->sc_ac); 2294 if (error != ENETRESET) 2295 return (error); 2296 2297 /* 2298 * This is new multicast address. We have to tell parent 2299 * about it. Also, remember this multicast address so that 2300 * we can delete them on unconfigure. 2301 */ 2302 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2303 if (mc == NULL) { 2304 error = ENOMEM; 2305 goto alloc_failed; 2306 } 2307 2308 /* 2309 * As ether_addmulti() returns ENETRESET, following two 2310 * statement shouldn't fail. 2311 */ 2312 (void)ether_multiaddr(sa, addrlo, addrhi); 2313 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2314 memcpy(&mc->mc_addr, sa, sa->sa_len); 2315 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2316 2317 error = if_mcast_op(ifp, SIOCADDMULTI, sa); 2318 if (error != 0) 2319 goto ioctl_failed; 2320 2321 return (error); 2322 2323 ioctl_failed: 2324 LIST_REMOVE(mc, mc_entries); 2325 free(mc, M_DEVBUF); 2326 alloc_failed: 2327 (void)ether_delmulti(sa, &sc->sc_ac); 2328 2329 return (error); 2330 } 2331 2332 static int 2333 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2334 { 2335 const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr); 2336 struct ifnet *ifp; 2337 struct ether_multi *enm; 2338 struct carp_mc_entry *mc; 2339 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2340 int error; 2341 2342 ifp = sc->sc_carpdev; 2343 if (ifp == NULL) 2344 return (EINVAL); 2345 2346 /* 2347 * Find a key to lookup carp_mc_entry. We have to do this 2348 * before calling ether_delmulti for obvious reason. 2349 */ 2350 if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0) 2351 return (error); 2352 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2353 if (enm == NULL) 2354 return (EINVAL); 2355 2356 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2357 if (mc->mc_enm == enm) 2358 break; 2359 2360 /* We won't delete entries we didn't add */ 2361 if (mc == NULL) 2362 return (EINVAL); 2363 2364 error = ether_delmulti(sa, &sc->sc_ac); 2365 if (error != ENETRESET) 2366 return (error); 2367 2368 /* We no longer use this multicast address. Tell parent so. */ 2369 error = if_mcast_op(ifp, SIOCDELMULTI, sa); 2370 if (error == 0) { 2371 /* And forget about this address. */ 2372 LIST_REMOVE(mc, mc_entries); 2373 free(mc, M_DEVBUF); 2374 } else 2375 (void)ether_addmulti(sa, &sc->sc_ac); 2376 return (error); 2377 } 2378 2379 /* 2380 * Delete any multicast address we have asked to add from parent 2381 * interface. Called when the carp is being unconfigured. 2382 */ 2383 static void 2384 carp_ether_purgemulti(struct carp_softc *sc) 2385 { 2386 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2387 struct carp_mc_entry *mc; 2388 2389 if (ifp == NULL) 2390 return; 2391 2392 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2393 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr)); 2394 LIST_REMOVE(mc, mc_entries); 2395 free(mc, M_DEVBUF); 2396 } 2397 } 2398 2399 static int 2400 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS) 2401 { 2402 2403 return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS)); 2404 } 2405 2406 void 2407 carp_init(void) 2408 { 2409 2410 sysctl_net_inet_carp_setup(NULL); 2411 #ifdef MBUFTRACE 2412 MOWNER_ATTACH(&carp_proto_mowner_rx); 2413 MOWNER_ATTACH(&carp_proto_mowner_tx); 2414 MOWNER_ATTACH(&carp_proto6_mowner_rx); 2415 MOWNER_ATTACH(&carp_proto6_mowner_tx); 2416 #endif 2417 2418 carp_wqinput = wqinput_create("carp", _carp_proto_input); 2419 #ifdef INET6 2420 carp6_wqinput = wqinput_create("carp6", _carp6_proto_input); 2421 #endif 2422 } 2423 2424 static void 2425 sysctl_net_inet_carp_setup(struct sysctllog **clog) 2426 { 2427 2428 sysctl_createv(clog, 0, NULL, NULL, 2429 CTLFLAG_PERMANENT, 2430 CTLTYPE_NODE, "inet", NULL, 2431 NULL, 0, NULL, 0, 2432 CTL_NET, PF_INET, CTL_EOL); 2433 sysctl_createv(clog, 0, NULL, NULL, 2434 CTLFLAG_PERMANENT, 2435 CTLTYPE_NODE, "carp", 2436 SYSCTL_DESCR("CARP related settings"), 2437 NULL, 0, NULL, 0, 2438 CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL); 2439 2440 sysctl_createv(clog, 0, NULL, NULL, 2441 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2442 CTLTYPE_INT, "preempt", 2443 SYSCTL_DESCR("Enable CARP Preempt"), 2444 NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0, 2445 CTL_NET, PF_INET, IPPROTO_CARP, 2446 CTL_CREATE, CTL_EOL); 2447 sysctl_createv(clog, 0, NULL, NULL, 2448 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2449 CTLTYPE_INT, "arpbalance", 2450 SYSCTL_DESCR("Enable ARP balancing"), 2451 NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0, 2452 CTL_NET, PF_INET, IPPROTO_CARP, 2453 CTL_CREATE, CTL_EOL); 2454 sysctl_createv(clog, 0, NULL, NULL, 2455 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2456 CTLTYPE_INT, "allow", 2457 SYSCTL_DESCR("Enable CARP"), 2458 NULL, 0, &carp_opts[CARPCTL_ALLOW], 0, 2459 CTL_NET, PF_INET, IPPROTO_CARP, 2460 CTL_CREATE, CTL_EOL); 2461 sysctl_createv(clog, 0, NULL, NULL, 2462 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2463 CTLTYPE_INT, "log", 2464 SYSCTL_DESCR("CARP logging"), 2465 NULL, 0, &carp_opts[CARPCTL_LOG], 0, 2466 CTL_NET, PF_INET, IPPROTO_CARP, 2467 CTL_CREATE, CTL_EOL); 2468 sysctl_createv(clog, 0, NULL, NULL, 2469 CTLFLAG_PERMANENT, 2470 CTLTYPE_STRUCT, "stats", 2471 SYSCTL_DESCR("CARP statistics"), 2472 sysctl_net_inet_carp_stats, 0, NULL, 0, 2473 CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS, 2474 CTL_EOL); 2475 } 2476