1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 /* 27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $ 28 */ 29 30 #include "opt_carp.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/in_cksum.h> 38 #include <sys/limits.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/msgport2.h> 42 #include <sys/time.h> 43 #include <sys/proc.h> 44 #include <sys/priv.h> 45 #include <sys/sockio.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/thread.h> 50 51 #include <machine/stdarg.h> 52 #include <crypto/sha1.h> 53 54 #include <net/bpf.h> 55 #include <net/ethernet.h> 56 #include <net/if.h> 57 #include <net/if_dl.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/if_clone.h> 61 #include <net/if_var.h> 62 #include <net/ifq_var.h> 63 #include <net/netmsg2.h> 64 65 #ifdef INET 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/in_systm.h> 69 #include <netinet/ip.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/if_ether.h> 72 #endif 73 74 #ifdef INET6 75 #include <netinet/icmp6.h> 76 #include <netinet/ip6.h> 77 #include <netinet6/ip6_var.h> 78 #include <netinet6/scope6_var.h> 79 #include <netinet6/nd6.h> 80 #endif 81 82 #include <netinet/ip_carp.h> 83 84 /* 85 * Note about carp's MP safe approach: 86 * 87 * Brief: carp_softc (softc), carp_softc_container (scc) 88 * 89 * - All configuration operation, e.g. ioctl, add/delete inet addresses 90 * is serialized by netisr0; not by carp's serializer 91 * 92 * - Backing interface's if_carp and carp_softc's relationship: 93 * 94 * +---------+ 95 * if_carp -->| carp_if | 96 * +---------+ 97 * | 98 * | 99 * V +---------+ 100 * +-----+ | | 101 * | scc |-->| softc | 102 * +-----+ | | 103 * | +---------+ 104 * | 105 * V +---------+ 106 * +-----+ | | 107 * | scc |-->| softc | 108 * +-----+ | | 109 * +---------+ 110 * 111 * - if_carp creation, modification and deletion all happen in netisr0, 112 * as stated previously. Since if_carp is accessed by multiple netisrs, 113 * the modification to if_carp is conducted in the following way: 114 * 115 * Adding carp_softc: 116 * 117 * 1) Duplicate the old carp_if to new carp_if (ncif), and insert the 118 * to-be-added carp_softc to the new carp_if (ncif): 119 * 120 * if_carp ncif 121 * | | 122 * V V 123 * +---------+ +---------+ 124 * | carp_if | | carp_if | 125 * +---------+ +---------+ 126 * | | 127 * | | 128 * V +-------+ V 129 * +-----+ | | +-----+ 130 * | scc |---->| softc |<----| scc | 131 * +-----+ | | +-----+ 132 * | +-------+ | 133 * | | 134 * V +-------+ V 135 * +-----+ | | +-----+ 136 * | scc |---->| softc |<----| scc | 137 * +-----+ | | +-----+ 138 * +-------+ | 139 * | 140 * +-------+ V 141 * | | +-----+ 142 * | softc |<----| scc | 143 * | | +-----+ 144 * +-------+ 145 * 146 * 2) Switch save if_carp into ocif and switch if_carp to ncif: 147 * 148 * ocif if_carp 149 * | | 150 * V V 151 * +---------+ +---------+ 152 * | carp_if | | carp_if | 153 * +---------+ +---------+ 154 * | | 155 * | | 156 * V +-------+ V 157 * +-----+ | | +-----+ 158 * | scc |---->| softc |<----| scc | 159 * +-----+ | | +-----+ 160 * | +-------+ | 161 * | | 162 * V +-------+ V 163 * +-----+ | | +-----+ 164 * | scc |---->| softc |<----| scc | 165 * +-----+ | | +-----+ 166 * +-------+ | 167 * | 168 * +-------+ V 169 * | | +-----+ 170 * | softc |<----| scc | 171 * | | +-----+ 172 * +-------+ 173 * 174 * 3) Run netmsg_service_sync(), which will make sure that 175 * ocif is no longer accessed (all network operations 176 * are happened only in network threads). 177 * 4) Free ocif -- only carp_if and scc are freed. 178 * 179 * 180 * Removing carp_softc: 181 * 182 * 1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted 183 * carp_softc will not be duplicated. 184 * 185 * if_carp ncif 186 * | | 187 * V V 188 * +---------+ +---------+ 189 * | carp_if | | carp_if | 190 * +---------+ +---------+ 191 * | | 192 * | | 193 * V +-------+ V 194 * +-----+ | | +-----+ 195 * | scc |---->| softc |<----| scc | 196 * +-----+ | | +-----+ 197 * | +-------+ | 198 * | | 199 * V +-------+ | 200 * +-----+ | | | 201 * | scc |---->| softc | | 202 * +-----+ | | | 203 * | +-------+ | 204 * | | 205 * V +-------+ V 206 * +-----+ | | +-----+ 207 * | scc |---->| softc |<----| scc | 208 * +-----+ | | +-----+ 209 * +-------+ 210 * 211 * 2) Switch save if_carp into ocif and switch if_carp to ncif: 212 * 213 * ocif if_carp 214 * | | 215 * V V 216 * +---------+ +---------+ 217 * | carp_if | | carp_if | 218 * +---------+ +---------+ 219 * | | 220 * | | 221 * V +-------+ V 222 * +-----+ | | +-----+ 223 * | scc |---->| softc |<----| scc | 224 * +-----+ | | +-----+ 225 * | +-------+ | 226 * | | 227 * V +-------+ | 228 * +-----+ | | | 229 * | scc |---->| softc | | 230 * +-----+ | | | 231 * | +-------+ | 232 * | | 233 * V +-------+ V 234 * +-----+ | | +-----+ 235 * | scc |---->| softc |<----| scc | 236 * +-----+ | | +-----+ 237 * +-------+ 238 * 239 * 3) Run netmsg_service_sync(), which will make sure that 240 * ocif is no longer accessed (all network operations 241 * are happened only in network threads). 242 * 4) Free ocif -- only carp_if and scc are freed. 243 * 244 * - if_carp accessing: 245 * The accessing code should cache the if_carp in a local temporary 246 * variable and accessing the temporary variable along the code path 247 * instead of accessing if_carp later on. 248 */ 249 250 #define CARP_IFNAME "carp" 251 #define CARP_IS_RUNNING(ifp) \ 252 (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) 253 254 struct carp_softc; 255 256 struct carp_vhaddr { 257 uint32_t vha_flags; /* CARP_VHAF_ */ 258 struct in_ifaddr *vha_ia; /* carp address */ 259 struct in_ifaddr *vha_iaback; /* backing address */ 260 TAILQ_ENTRY(carp_vhaddr) vha_link; 261 }; 262 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr); 263 264 struct netmsg_carp { 265 struct netmsg_base base; 266 struct ifnet *nc_carpdev; 267 struct carp_softc *nc_softc; 268 void *nc_data; 269 size_t nc_datalen; 270 }; 271 272 struct carp_softc { 273 struct arpcom arpcom; 274 struct ifnet *sc_carpdev; /* parent interface */ 275 struct carp_vhaddr_list sc_vha_list; /* virtual addr list */ 276 277 const struct in_ifaddr *sc_ia; /* primary iface address v4 */ 278 struct ip_moptions sc_imo; 279 280 #ifdef INET6 281 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 282 struct ip6_moptions sc_im6o; 283 #endif /* INET6 */ 284 285 enum { INIT = 0, BACKUP, MASTER } 286 sc_state; 287 boolean_t sc_dead; 288 289 int sc_suppress; 290 291 int sc_sendad_errors; 292 #define CARP_SENDAD_MAX_ERRORS 3 293 int sc_sendad_success; 294 #define CARP_SENDAD_MIN_SUCCESS 3 295 296 int sc_vhid; 297 int sc_advskew; 298 int sc_naddrs; /* actually used IPv4 vha */ 299 int sc_naddrs6; 300 int sc_advbase; /* seconds */ 301 int sc_init_counter; 302 uint64_t sc_counter; 303 304 /* authentication */ 305 #define CARP_HMAC_PAD 64 306 unsigned char sc_key[CARP_KEY_LEN]; 307 unsigned char sc_pad[CARP_HMAC_PAD]; 308 SHA1_CTX sc_sha1; 309 310 struct callout sc_ad_tmo; /* advertisement timeout */ 311 struct netmsg_carp sc_ad_msg; /* adv timeout netmsg */ 312 struct callout sc_md_tmo; /* ip4 master down timeout */ 313 struct callout sc_md6_tmo; /* ip6 master down timeout */ 314 struct netmsg_carp sc_md_msg; /* master down timeout netmsg */ 315 316 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 317 }; 318 319 #define sc_if arpcom.ac_if 320 321 struct carp_softc_container { 322 TAILQ_ENTRY(carp_softc_container) scc_link; 323 struct carp_softc *scc_softc; 324 }; 325 TAILQ_HEAD(carp_if, carp_softc_container); 326 327 SYSCTL_DECL(_net_inet_carp); 328 329 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 330 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 331 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 332 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 333 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 334 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 335 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 336 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 337 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 338 339 static int carp_suppress_preempt = 0; 340 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 341 &carp_suppress_preempt, 0, "Preemption is suppressed"); 342 343 static struct carpstats carpstats; 344 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 345 &carpstats, carpstats, 346 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 347 348 #define CARP_LOG(...) do { \ 349 if (carp_opts[CARPCTL_LOG] > 0) \ 350 log(LOG_INFO, __VA_ARGS__); \ 351 } while (0) 352 353 #define CARP_DEBUG(...) do { \ 354 if (carp_opts[CARPCTL_LOG] > 1) \ 355 log(LOG_DEBUG, __VA_ARGS__); \ 356 } while (0) 357 358 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token); 359 360 static void carp_hmac_prepare(struct carp_softc *); 361 static void carp_hmac_generate(struct carp_softc *, uint32_t *, 362 unsigned char *); 363 static int carp_hmac_verify(struct carp_softc *, uint32_t *, 364 unsigned char *); 365 static void carp_setroute(struct carp_softc *, int); 366 static void carp_proto_input_c(struct carp_softc *, struct mbuf *, 367 struct carp_header *, sa_family_t); 368 static int carp_clone_create(struct if_clone *, int, caddr_t); 369 static int carp_clone_destroy(struct ifnet *); 370 static void carp_detach(struct carp_softc *, boolean_t, boolean_t); 371 static void carp_prepare_ad(struct carp_softc *, struct carp_header *); 372 static void carp_send_ad_all(void); 373 static void carp_send_ad_timeout(void *); 374 static void carp_send_ad(struct carp_softc *); 375 static void carp_send_arp(struct carp_softc *); 376 static void carp_master_down_timeout(void *); 377 static void carp_master_down(struct carp_softc *); 378 static void carp_setrun(struct carp_softc *, sa_family_t); 379 static void carp_set_state(struct carp_softc *, int); 380 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *); 381 382 static void carp_init(void *); 383 static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 384 static int carp_output(struct ifnet *, struct mbuf *, struct sockaddr *, 385 struct rtentry *); 386 static void carp_start(struct ifnet *); 387 388 static void carp_multicast_cleanup(struct carp_softc *); 389 static void carp_add_addr(struct carp_softc *, struct ifaddr *); 390 static void carp_del_addr(struct carp_softc *, struct ifaddr *); 391 static void carp_config_addr(struct carp_softc *, struct ifaddr *); 392 static void carp_link_addrs(struct carp_softc *, struct ifnet *, 393 struct ifaddr *); 394 static void carp_unlink_addrs(struct carp_softc *, struct ifnet *, 395 struct ifaddr *); 396 static void carp_update_addrs(struct carp_softc *, struct ifaddr *); 397 398 static int carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *, 399 struct in_ifaddr *); 400 static int carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *, 401 struct ifnet *, struct in_ifaddr *, int); 402 static void carp_deactivate_vhaddr(struct carp_softc *, 403 struct carp_vhaddr *, boolean_t); 404 static int carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *); 405 static void carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *, 406 boolean_t); 407 408 #ifdef foo 409 static void carp_sc_state(struct carp_softc *); 410 #endif 411 #ifdef INET6 412 static void carp_send_na(struct carp_softc *); 413 #ifdef notyet 414 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 415 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 416 #endif 417 static void carp_multicast6_cleanup(struct carp_softc *); 418 #endif 419 static void carp_stop(struct carp_softc *, boolean_t); 420 static void carp_suspend(struct carp_softc *, boolean_t); 421 static void carp_ioctl_stop(struct carp_softc *); 422 static int carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *); 423 static int carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *); 424 static int carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *); 425 static int carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *); 426 427 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *); 428 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *); 429 static void carp_if_free(struct carp_if *); 430 431 static void carp_ifaddr(void *, struct ifnet *, enum ifaddr_event, 432 struct ifaddr *); 433 static void carp_ifdetach(void *, struct ifnet *); 434 435 static void carp_ifdetach_dispatch(netmsg_t); 436 static void carp_clone_destroy_dispatch(netmsg_t); 437 static void carp_init_dispatch(netmsg_t); 438 static void carp_ioctl_stop_dispatch(netmsg_t); 439 static void carp_ioctl_setvh_dispatch(netmsg_t); 440 static void carp_ioctl_getvh_dispatch(netmsg_t); 441 static void carp_ioctl_getdevname_dispatch(netmsg_t); 442 static void carp_ioctl_getvhaddr_dispatch(netmsg_t); 443 static void carp_send_ad_timeout_dispatch(netmsg_t); 444 static void carp_master_down_timeout_dispatch(netmsg_t); 445 446 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 447 448 static LIST_HEAD(, carp_softc) carpif_list; 449 450 static struct if_clone carp_cloner = 451 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 452 0, IF_MAXUNIT); 453 454 static uint8_t carp_etheraddr[ETHER_ADDR_LEN] = { 0, 0, 0x5e, 0, 1, 0 }; 455 456 static eventhandler_tag carp_ifdetach_event; 457 static eventhandler_tag carp_ifaddr_event; 458 459 static __inline void 460 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new) 461 { 462 struct carp_vhaddr *vha; 463 u_long new_addr, addr; 464 465 KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0); 466 467 /* 468 * Virtual address list is sorted; smaller one first 469 */ 470 new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr); 471 472 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 473 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr); 474 475 if (addr > new_addr) 476 break; 477 } 478 if (vha == NULL) 479 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link); 480 else 481 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link); 482 vha_new->vha_flags |= CARP_VHAF_ONLIST; 483 } 484 485 static __inline void 486 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 487 { 488 KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST); 489 vha->vha_flags &= ~CARP_VHAF_ONLIST; 490 TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link); 491 } 492 493 static void 494 carp_hmac_prepare(struct carp_softc *sc) 495 { 496 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 497 uint8_t vhid = sc->sc_vhid & 0xff; 498 int i; 499 #ifdef INET6 500 struct ifaddr_container *ifac; 501 struct in6_addr in6; 502 #endif 503 #ifdef INET 504 struct carp_vhaddr *vha; 505 #endif 506 507 /* XXX: possible race here */ 508 509 /* compute ipad from key */ 510 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 511 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 512 for (i = 0; i < sizeof(sc->sc_pad); i++) 513 sc->sc_pad[i] ^= 0x36; 514 515 /* precompute first part of inner hash */ 516 SHA1Init(&sc->sc_sha1); 517 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 518 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 519 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 520 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 521 #ifdef INET 522 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 523 SHA1Update(&sc->sc_sha1, 524 (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr, 525 sizeof(struct in_addr)); 526 } 527 #endif /* INET */ 528 #ifdef INET6 529 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 530 struct ifaddr *ifa = ifac->ifa; 531 532 if (ifa->ifa_addr->sa_family == AF_INET6) { 533 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 534 in6_clearscope(&in6); 535 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 536 } 537 } 538 #endif /* INET6 */ 539 540 /* convert ipad to opad */ 541 for (i = 0; i < sizeof(sc->sc_pad); i++) 542 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 543 } 544 545 static void 546 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 547 unsigned char md[20]) 548 { 549 SHA1_CTX sha1ctx; 550 551 /* fetch first half of inner hash */ 552 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 553 554 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 555 SHA1Final(md, &sha1ctx); 556 557 /* outer hash */ 558 SHA1Init(&sha1ctx); 559 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 560 SHA1Update(&sha1ctx, md, 20); 561 SHA1Final(md, &sha1ctx); 562 } 563 564 static int 565 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 566 unsigned char md[20]) 567 { 568 unsigned char md2[20]; 569 570 carp_hmac_generate(sc, counter, md2); 571 return (bcmp(md, md2, sizeof(md2))); 572 } 573 574 static void 575 carp_setroute(struct carp_softc *sc, int cmd) 576 { 577 #ifdef INET6 578 struct ifaddr_container *ifac; 579 #endif 580 struct carp_vhaddr *vha; 581 582 KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD); 583 584 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 585 if (vha->vha_iaback == NULL) 586 continue; 587 if (cmd == RTM_DELETE) 588 carp_delroute_vhaddr(sc, vha, FALSE); 589 else 590 carp_addroute_vhaddr(sc, vha); 591 } 592 593 #ifdef INET6 594 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 595 struct ifaddr *ifa = ifac->ifa; 596 597 if (ifa->ifa_addr->sa_family == AF_INET6) { 598 if (cmd == RTM_ADD) 599 in6_ifaddloop(ifa); 600 else 601 in6_ifremloop(ifa); 602 } 603 } 604 #endif /* INET6 */ 605 } 606 607 static int 608 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 609 { 610 struct carp_softc *sc; 611 struct ifnet *ifp; 612 613 sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO); 614 ifp = &sc->sc_if; 615 616 sc->sc_suppress = 0; 617 sc->sc_advbase = CARP_DFLTINTV; 618 sc->sc_vhid = -1; /* required setting */ 619 sc->sc_advskew = 0; 620 sc->sc_init_counter = 1; 621 sc->sc_naddrs = 0; 622 sc->sc_naddrs6 = 0; 623 624 TAILQ_INIT(&sc->sc_vha_list); 625 626 #ifdef INET6 627 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 628 #endif 629 630 callout_init_mp(&sc->sc_ad_tmo); 631 netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport, 632 MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch); 633 sc->sc_ad_msg.nc_softc = sc; 634 635 callout_init_mp(&sc->sc_md_tmo); 636 callout_init_mp(&sc->sc_md6_tmo); 637 netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport, 638 MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch); 639 sc->sc_md_msg.nc_softc = sc; 640 641 if_initname(ifp, CARP_IFNAME, unit); 642 ifp->if_softc = sc; 643 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 644 ifp->if_init = carp_init; 645 ifp->if_ioctl = carp_ioctl; 646 ifp->if_start = carp_start; 647 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 648 ifq_set_ready(&ifp->if_snd); 649 650 ether_ifattach(ifp, carp_etheraddr, NULL); 651 652 ifp->if_type = IFT_CARP; 653 ifp->if_output = carp_output; 654 655 lwkt_gettoken(&carp_listtok); 656 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 657 lwkt_reltoken(&carp_listtok); 658 659 return (0); 660 } 661 662 static void 663 carp_clone_destroy_dispatch(netmsg_t msg) 664 { 665 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 666 struct carp_softc *sc = cmsg->nc_softc; 667 668 sc->sc_dead = TRUE; 669 carp_detach(sc, TRUE, FALSE); 670 671 callout_stop_sync(&sc->sc_ad_tmo); 672 callout_stop_sync(&sc->sc_md_tmo); 673 callout_stop_sync(&sc->sc_md6_tmo); 674 675 crit_enter(); 676 if ((sc->sc_ad_msg.base.lmsg.ms_flags & MSGF_DONE) == 0) 677 lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg); 678 if ((sc->sc_md_msg.base.lmsg.ms_flags & MSGF_DONE) == 0) 679 lwkt_dropmsg(&sc->sc_md_msg.base.lmsg); 680 crit_exit(); 681 682 lwkt_replymsg(&cmsg->base.lmsg, 0); 683 } 684 685 static int 686 carp_clone_destroy(struct ifnet *ifp) 687 { 688 struct carp_softc *sc = ifp->if_softc; 689 struct netmsg_carp cmsg; 690 691 bzero(&cmsg, sizeof(cmsg)); 692 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 693 carp_clone_destroy_dispatch); 694 cmsg.nc_softc = sc; 695 696 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 697 698 lwkt_gettoken(&carp_listtok); 699 LIST_REMOVE(sc, sc_next); 700 lwkt_reltoken(&carp_listtok); 701 702 bpfdetach(ifp); 703 if_detach(ifp); 704 705 KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active")); 706 kfree(sc, M_CARP); 707 708 return 0; 709 } 710 711 static struct carp_if * 712 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc) 713 { 714 struct carp_softc_container *oscc, *scc; 715 struct carp_if *cif; 716 int count = 0; 717 #ifdef INVARIANTS 718 int found = 0; 719 #endif 720 721 TAILQ_FOREACH(oscc, ocif, scc_link) { 722 ++count; 723 #ifdef INVARIANTS 724 if (oscc->scc_softc == sc) 725 found = 1; 726 #endif 727 } 728 KASSERT(found, ("%s carp_softc is not on carp_if", __func__)); 729 730 if (count == 1) { 731 /* Last one is going to be unlinked */ 732 return NULL; 733 } 734 735 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 736 TAILQ_INIT(cif); 737 738 TAILQ_FOREACH(oscc, ocif, scc_link) { 739 if (oscc->scc_softc == sc) 740 continue; 741 742 scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO); 743 scc->scc_softc = oscc->scc_softc; 744 TAILQ_INSERT_TAIL(cif, scc, scc_link); 745 } 746 747 return cif; 748 } 749 750 static struct carp_if * 751 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc) 752 { 753 struct carp_softc_container *oscc; 754 int onlist; 755 756 onlist = 0; 757 if (ocif != NULL) { 758 TAILQ_FOREACH(oscc, ocif, scc_link) { 759 if (oscc->scc_softc == sc) 760 onlist = 1; 761 } 762 } 763 764 #ifdef INVARIANTS 765 if (sc->sc_carpdev != NULL) { 766 KASSERT(onlist, ("%s is not on %s carp list", 767 sc->sc_if.if_xname, sc->sc_carpdev->if_xname)); 768 } else { 769 KASSERT(!onlist, ("%s is already on carp list", 770 sc->sc_if.if_xname)); 771 } 772 #endif 773 774 if (!onlist) { 775 struct carp_if *cif; 776 struct carp_softc_container *new_scc, *scc; 777 int inserted = 0; 778 779 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 780 TAILQ_INIT(cif); 781 782 new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO); 783 new_scc->scc_softc = sc; 784 785 if (ocif != NULL) { 786 TAILQ_FOREACH(oscc, ocif, scc_link) { 787 if (!inserted && 788 oscc->scc_softc->sc_vhid > sc->sc_vhid) { 789 TAILQ_INSERT_TAIL(cif, new_scc, 790 scc_link); 791 inserted = 1; 792 } 793 794 scc = kmalloc(sizeof(*scc), M_CARP, 795 M_WAITOK | M_ZERO); 796 scc->scc_softc = oscc->scc_softc; 797 TAILQ_INSERT_TAIL(cif, scc, scc_link); 798 } 799 } 800 if (!inserted) 801 TAILQ_INSERT_TAIL(cif, new_scc, scc_link); 802 803 return cif; 804 } else { 805 return ocif; 806 } 807 } 808 809 static void 810 carp_if_free(struct carp_if *cif) 811 { 812 struct carp_softc_container *scc; 813 814 while ((scc = TAILQ_FIRST(cif)) != NULL) { 815 TAILQ_REMOVE(cif, scc, scc_link); 816 kfree(scc, M_CARP); 817 } 818 kfree(cif, M_CARP); 819 } 820 821 static void 822 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback) 823 { 824 carp_suspend(sc, detach); 825 826 carp_multicast_cleanup(sc); 827 #ifdef INET6 828 carp_multicast6_cleanup(sc); 829 #endif 830 831 if (!sc->sc_dead && detach) { 832 struct carp_vhaddr *vha; 833 834 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 835 carp_deactivate_vhaddr(sc, vha, del_iaback); 836 KKASSERT(sc->sc_naddrs == 0); 837 } 838 839 if (sc->sc_carpdev != NULL) { 840 struct ifnet *ifp = sc->sc_carpdev; 841 struct carp_if *ocif = ifp->if_carp; 842 843 ifp->if_carp = carp_if_remove(ocif, sc); 844 KASSERT(ifp->if_carp != ocif, 845 ("%s carp_if_remove failed", __func__)); 846 847 sc->sc_carpdev = NULL; 848 sc->sc_ia = NULL; 849 850 /* 851 * Make sure that all protocol threads see the 852 * sc_carpdev and if_carp changes 853 */ 854 netmsg_service_sync(); 855 856 if (ifp->if_carp == NULL) { 857 /* 858 * No more carp interfaces using 859 * ifp as the backing interface, 860 * move it out of promiscous mode. 861 */ 862 ifpromisc(ifp, 0); 863 } 864 865 /* 866 * The old carp list could be safely free now, 867 * since no one can access it. 868 */ 869 carp_if_free(ocif); 870 } 871 } 872 873 static void 874 carp_ifdetach_dispatch(netmsg_t msg) 875 { 876 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 877 struct ifnet *ifp = cmsg->nc_carpdev; 878 879 while (ifp->if_carp) { 880 struct carp_softc_container *scc; 881 882 scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp)); 883 carp_detach(scc->scc_softc, TRUE, TRUE); 884 } 885 lwkt_replymsg(&cmsg->base.lmsg, 0); 886 } 887 888 /* Detach an interface from the carp. */ 889 static void 890 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 891 { 892 struct netmsg_carp cmsg; 893 894 ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp); 895 896 bzero(&cmsg, sizeof(cmsg)); 897 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 898 carp_ifdetach_dispatch); 899 cmsg.nc_carpdev = ifp; 900 901 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 902 } 903 904 /* 905 * process input packet. 906 * we have rearranged checks order compared to the rfc, 907 * but it seems more efficient this way or not possible otherwise. 908 */ 909 int 910 carp_proto_input(struct mbuf **mp, int *offp, int proto) 911 { 912 struct mbuf *m = *mp; 913 struct ip *ip = mtod(m, struct ip *); 914 struct ifnet *ifp = m->m_pkthdr.rcvif; 915 struct carp_header *ch; 916 struct carp_softc *sc; 917 int len, iphlen; 918 919 iphlen = *offp; 920 *mp = NULL; 921 922 carpstats.carps_ipackets++; 923 924 if (!carp_opts[CARPCTL_ALLOW]) { 925 m_freem(m); 926 goto back; 927 } 928 929 /* Check if received on a valid carp interface */ 930 if (ifp->if_type != IFT_CARP) { 931 carpstats.carps_badif++; 932 CARP_LOG("carp_proto_input: packet received on non-carp " 933 "interface: %s\n", ifp->if_xname); 934 m_freem(m); 935 goto back; 936 } 937 938 if (!CARP_IS_RUNNING(ifp)) { 939 carpstats.carps_badif++; 940 CARP_LOG("carp_proto_input: packet received on stopped carp " 941 "interface: %s\n", ifp->if_xname); 942 m_freem(m); 943 goto back; 944 } 945 946 sc = ifp->if_softc; 947 if (sc->sc_carpdev == NULL) { 948 carpstats.carps_badif++; 949 CARP_LOG("carp_proto_input: packet received on defunc carp " 950 "interface: %s\n", ifp->if_xname); 951 m_freem(m); 952 goto back; 953 } 954 955 if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 956 carpstats.carps_badif++; 957 CARP_LOG("carp_proto_input: non-mcast packet on " 958 "interface: %s\n", ifp->if_xname); 959 m_freem(m); 960 goto back; 961 } 962 963 /* Verify that the IP TTL is CARP_DFLTTL. */ 964 if (ip->ip_ttl != CARP_DFLTTL) { 965 carpstats.carps_badttl++; 966 CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n", 967 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname); 968 m_freem(m); 969 goto back; 970 } 971 972 /* Minimal CARP packet size */ 973 len = iphlen + sizeof(*ch); 974 975 /* 976 * Verify that the received packet length is 977 * not less than the CARP header 978 */ 979 if (m->m_pkthdr.len < len) { 980 carpstats.carps_badlen++; 981 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len, 982 ifp->if_xname); 983 m_freem(m); 984 goto back; 985 } 986 987 /* Make sure that CARP header is contiguous */ 988 if (len > m->m_len) { 989 m = m_pullup(m, len); 990 if (m == NULL) { 991 carpstats.carps_hdrops++; 992 CARP_LOG("carp_proto_input: m_pullup failed\n"); 993 goto back; 994 } 995 ip = mtod(m, struct ip *); 996 } 997 ch = (struct carp_header *)((uint8_t *)ip + iphlen); 998 999 /* Verify the CARP checksum */ 1000 if (in_cksum_skip(m, len, iphlen)) { 1001 carpstats.carps_badsum++; 1002 CARP_LOG("carp_proto_input: checksum failed on %s\n", 1003 ifp->if_xname); 1004 m_freem(m); 1005 goto back; 1006 } 1007 carp_proto_input_c(sc, m, ch, AF_INET); 1008 back: 1009 return(IPPROTO_DONE); 1010 } 1011 1012 #ifdef INET6 1013 int 1014 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 1015 { 1016 struct mbuf *m = *mp; 1017 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1018 struct ifnet *ifp = m->m_pkthdr.rcvif; 1019 struct carp_header *ch; 1020 struct carp_softc *sc; 1021 u_int len; 1022 1023 carpstats.carps_ipackets6++; 1024 1025 if (!carp_opts[CARPCTL_ALLOW]) { 1026 m_freem(m); 1027 goto back; 1028 } 1029 1030 /* check if received on a valid carp interface */ 1031 if (ifp->if_type != IFT_CARP) { 1032 carpstats.carps_badif++; 1033 CARP_LOG("carp6_proto_input: packet received on non-carp " 1034 "interface: %s\n", ifp->if_xname); 1035 m_freem(m); 1036 goto back; 1037 } 1038 1039 if (!CARP_IS_RUNNING(ifp)) { 1040 carpstats.carps_badif++; 1041 CARP_LOG("carp_proto_input: packet received on stopped carp " 1042 "interface: %s\n", ifp->if_xname); 1043 m_freem(m); 1044 goto back; 1045 } 1046 1047 sc = ifp->if_softc; 1048 if (sc->sc_carpdev == NULL) { 1049 carpstats.carps_badif++; 1050 CARP_LOG("carp6_proto_input: packet received on defunc-carp " 1051 "interface: %s\n", ifp->if_xname); 1052 m_freem(m); 1053 goto back; 1054 } 1055 1056 /* verify that the IP TTL is 255 */ 1057 if (ip6->ip6_hlim != CARP_DFLTTL) { 1058 carpstats.carps_badttl++; 1059 CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n", 1060 ip6->ip6_hlim, ifp->if_xname); 1061 m_freem(m); 1062 goto back; 1063 } 1064 1065 /* verify that we have a complete carp packet */ 1066 len = m->m_len; 1067 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 1068 if (ch == NULL) { 1069 carpstats.carps_badlen++; 1070 CARP_LOG("carp6_proto_input: packet size %u too small\n", len); 1071 goto back; 1072 } 1073 1074 /* verify the CARP checksum */ 1075 if (in_cksum_range(m, 0, *offp, sizeof(*ch))) { 1076 carpstats.carps_badsum++; 1077 CARP_LOG("carp6_proto_input: checksum failed, on %s\n", 1078 ifp->if_xname); 1079 m_freem(m); 1080 goto back; 1081 } 1082 1083 carp_proto_input_c(sc, m, ch, AF_INET6); 1084 back: 1085 return (IPPROTO_DONE); 1086 } 1087 #endif /* INET6 */ 1088 1089 static void 1090 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m, 1091 struct carp_header *ch, sa_family_t af) 1092 { 1093 struct ifnet *cifp; 1094 uint64_t tmp_counter; 1095 struct timeval sc_tv, ch_tv; 1096 1097 if (sc->sc_vhid != ch->carp_vhid) { 1098 /* 1099 * CARP uses multicast, however, multicast packets 1100 * are tapped to all CARP interfaces on the physical 1101 * interface receiving the CARP packets, so we don't 1102 * update any stats here. 1103 */ 1104 m_freem(m); 1105 return; 1106 } 1107 cifp = &sc->sc_if; 1108 1109 /* verify the CARP version. */ 1110 if (ch->carp_version != CARP_VERSION) { 1111 carpstats.carps_badver++; 1112 CARP_LOG("%s; invalid version %d\n", cifp->if_xname, 1113 ch->carp_version); 1114 m_freem(m); 1115 return; 1116 } 1117 1118 /* verify the hash */ 1119 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 1120 carpstats.carps_badauth++; 1121 CARP_LOG("%s: incorrect hash\n", cifp->if_xname); 1122 m_freem(m); 1123 return; 1124 } 1125 1126 tmp_counter = ntohl(ch->carp_counter[0]); 1127 tmp_counter = tmp_counter<<32; 1128 tmp_counter += ntohl(ch->carp_counter[1]); 1129 1130 /* XXX Replay protection goes here */ 1131 1132 sc->sc_init_counter = 0; 1133 sc->sc_counter = tmp_counter; 1134 1135 sc_tv.tv_sec = sc->sc_advbase; 1136 if (carp_suppress_preempt && sc->sc_advskew < 240) 1137 sc_tv.tv_usec = 240 * 1000000 / 256; 1138 else 1139 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1140 ch_tv.tv_sec = ch->carp_advbase; 1141 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 1142 1143 switch (sc->sc_state) { 1144 case INIT: 1145 break; 1146 1147 case MASTER: 1148 /* 1149 * If we receive an advertisement from a master who's going to 1150 * be more frequent than us, go into BACKUP state. 1151 */ 1152 if (timevalcmp(&sc_tv, &ch_tv, >) || 1153 timevalcmp(&sc_tv, &ch_tv, ==)) { 1154 callout_stop(&sc->sc_ad_tmo); 1155 CARP_DEBUG("%s: MASTER -> BACKUP " 1156 "(more frequent advertisement received)\n", 1157 cifp->if_xname); 1158 carp_set_state(sc, BACKUP); 1159 carp_setrun(sc, 0); 1160 carp_setroute(sc, RTM_DELETE); 1161 } 1162 break; 1163 1164 case BACKUP: 1165 /* 1166 * If we're pre-empting masters who advertise slower than us, 1167 * and this one claims to be slower, treat him as down. 1168 */ 1169 if (carp_opts[CARPCTL_PREEMPT] && 1170 timevalcmp(&sc_tv, &ch_tv, <)) { 1171 CARP_DEBUG("%s: BACKUP -> MASTER " 1172 "(preempting a slower master)\n", cifp->if_xname); 1173 carp_master_down(sc); 1174 break; 1175 } 1176 1177 /* 1178 * If the master is going to advertise at such a low frequency 1179 * that he's guaranteed to time out, we'd might as well just 1180 * treat him as timed out now. 1181 */ 1182 sc_tv.tv_sec = sc->sc_advbase * 3; 1183 if (timevalcmp(&sc_tv, &ch_tv, <)) { 1184 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 1185 cifp->if_xname); 1186 carp_master_down(sc); 1187 break; 1188 } 1189 1190 /* 1191 * Otherwise, we reset the counter and wait for the next 1192 * advertisement. 1193 */ 1194 carp_setrun(sc, af); 1195 break; 1196 } 1197 m_freem(m); 1198 } 1199 1200 struct mbuf * 1201 carp_input(void *v, struct mbuf *m) 1202 { 1203 struct carp_if *cif = v; 1204 struct ether_header *eh; 1205 struct carp_softc_container *scc; 1206 struct ifnet *ifp; 1207 1208 eh = mtod(m, struct ether_header *); 1209 1210 ifp = carp_forus(cif, eh->ether_dhost); 1211 if (ifp != NULL) { 1212 ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF); 1213 return NULL; 1214 } 1215 1216 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) 1217 return m; 1218 1219 /* 1220 * XXX Should really check the list of multicast addresses 1221 * for each CARP interface _before_ copying. 1222 */ 1223 TAILQ_FOREACH(scc, cif, scc_link) { 1224 struct carp_softc *sc = scc->scc_softc; 1225 struct mbuf *m0; 1226 1227 if ((sc->sc_if.if_flags & IFF_UP) == 0) 1228 continue; 1229 1230 m0 = m_dup(m, MB_DONTWAIT); 1231 if (m0 == NULL) 1232 continue; 1233 1234 ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF); 1235 } 1236 return m; 1237 } 1238 1239 static void 1240 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch) 1241 { 1242 if (sc->sc_init_counter) { 1243 /* this could also be seconds since unix epoch */ 1244 sc->sc_counter = karc4random(); 1245 sc->sc_counter = sc->sc_counter << 32; 1246 sc->sc_counter += karc4random(); 1247 } else { 1248 sc->sc_counter++; 1249 } 1250 1251 ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff); 1252 ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff); 1253 1254 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 1255 } 1256 1257 static void 1258 carp_send_ad_all(void) 1259 { 1260 struct carp_softc *sc; 1261 1262 LIST_FOREACH(sc, &carpif_list, sc_next) { 1263 if (sc->sc_carpdev == NULL) 1264 continue; 1265 1266 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER) 1267 carp_send_ad(sc); 1268 } 1269 } 1270 1271 static void 1272 carp_send_ad_timeout(void *xsc) 1273 { 1274 struct carp_softc *sc = xsc; 1275 struct netmsg_carp *cmsg = &sc->sc_ad_msg; 1276 1277 KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d", 1278 __func__, mycpuid)); 1279 1280 crit_enter(); 1281 if (cmsg->base.lmsg.ms_flags & MSGF_DONE) 1282 lwkt_sendmsg(netisr_portfn(0), &cmsg->base.lmsg); 1283 crit_exit(); 1284 } 1285 1286 static void 1287 carp_send_ad_timeout_dispatch(netmsg_t msg) 1288 { 1289 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 1290 struct carp_softc *sc = cmsg->nc_softc; 1291 1292 /* Reply ASAP */ 1293 crit_enter(); 1294 lwkt_replymsg(&cmsg->base.lmsg, 0); 1295 crit_exit(); 1296 1297 carp_send_ad(sc); 1298 } 1299 1300 static void 1301 carp_send_ad(struct carp_softc *sc) 1302 { 1303 struct ifnet *cifp = &sc->sc_if; 1304 struct carp_header ch; 1305 struct timeval tv; 1306 struct carp_header *ch_ptr; 1307 struct mbuf *m; 1308 int len, advbase, advskew; 1309 1310 if (!CARP_IS_RUNNING(cifp)) { 1311 /* Bow out */ 1312 advbase = 255; 1313 advskew = 255; 1314 } else { 1315 advbase = sc->sc_advbase; 1316 if (!carp_suppress_preempt || sc->sc_advskew > 240) 1317 advskew = sc->sc_advskew; 1318 else 1319 advskew = 240; 1320 tv.tv_sec = advbase; 1321 tv.tv_usec = advskew * 1000000 / 256; 1322 } 1323 1324 ch.carp_version = CARP_VERSION; 1325 ch.carp_type = CARP_ADVERTISEMENT; 1326 ch.carp_vhid = sc->sc_vhid; 1327 ch.carp_advbase = advbase; 1328 ch.carp_advskew = advskew; 1329 ch.carp_authlen = 7; /* XXX DEFINE */ 1330 ch.carp_pad1 = 0; /* must be zero */ 1331 ch.carp_cksum = 0; 1332 1333 #ifdef INET 1334 if (sc->sc_ia != NULL) { 1335 struct ip *ip; 1336 1337 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 1338 if (m == NULL) { 1339 cifp->if_oerrors++; 1340 carpstats.carps_onomem++; 1341 /* XXX maybe less ? */ 1342 if (advbase != 255 || advskew != 255) 1343 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1344 carp_send_ad_timeout, sc); 1345 return; 1346 } 1347 len = sizeof(*ip) + sizeof(ch); 1348 m->m_pkthdr.len = len; 1349 m->m_pkthdr.rcvif = NULL; 1350 m->m_len = len; 1351 MH_ALIGN(m, m->m_len); 1352 m->m_flags |= M_MCAST; 1353 ip = mtod(m, struct ip *); 1354 ip->ip_v = IPVERSION; 1355 ip->ip_hl = sizeof(*ip) >> 2; 1356 ip->ip_tos = IPTOS_LOWDELAY; 1357 ip->ip_len = len; 1358 ip->ip_id = ip_newid(); 1359 ip->ip_off = IP_DF; 1360 ip->ip_ttl = CARP_DFLTTL; 1361 ip->ip_p = IPPROTO_CARP; 1362 ip->ip_sum = 0; 1363 ip->ip_src = sc->sc_ia->ia_addr.sin_addr; 1364 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 1365 1366 ch_ptr = (struct carp_header *)(&ip[1]); 1367 bcopy(&ch, ch_ptr, sizeof(ch)); 1368 carp_prepare_ad(sc, ch_ptr); 1369 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip)); 1370 1371 getmicrotime(&cifp->if_lastchange); 1372 cifp->if_opackets++; 1373 cifp->if_obytes += len; 1374 carpstats.carps_opackets++; 1375 1376 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 1377 cifp->if_oerrors++; 1378 if (sc->sc_sendad_errors < INT_MAX) 1379 sc->sc_sendad_errors++; 1380 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1381 carp_suppress_preempt++; 1382 if (carp_suppress_preempt == 1) { 1383 carp_send_ad_all(); 1384 } 1385 } 1386 sc->sc_sendad_success = 0; 1387 } else { 1388 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1389 if (++sc->sc_sendad_success >= 1390 CARP_SENDAD_MIN_SUCCESS) { 1391 carp_suppress_preempt--; 1392 sc->sc_sendad_errors = 0; 1393 } 1394 } else { 1395 sc->sc_sendad_errors = 0; 1396 } 1397 } 1398 } 1399 #endif /* INET */ 1400 #ifdef INET6 1401 if (sc->sc_ia6) { 1402 struct ip6_hdr *ip6; 1403 1404 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 1405 if (m == NULL) { 1406 cifp->if_oerrors++; 1407 carpstats.carps_onomem++; 1408 /* XXX maybe less ? */ 1409 if (advbase != 255 || advskew != 255) 1410 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1411 carp_send_ad_timeout, sc); 1412 return; 1413 } 1414 len = sizeof(*ip6) + sizeof(ch); 1415 m->m_pkthdr.len = len; 1416 m->m_pkthdr.rcvif = NULL; 1417 m->m_len = len; 1418 MH_ALIGN(m, m->m_len); 1419 m->m_flags |= M_MCAST; 1420 ip6 = mtod(m, struct ip6_hdr *); 1421 bzero(ip6, sizeof(*ip6)); 1422 ip6->ip6_vfc |= IPV6_VERSION; 1423 ip6->ip6_hlim = CARP_DFLTTL; 1424 ip6->ip6_nxt = IPPROTO_CARP; 1425 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 1426 sizeof(struct in6_addr)); 1427 /* set the multicast destination */ 1428 1429 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1430 ip6->ip6_dst.s6_addr8[15] = 0x12; 1431 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1432 cifp->if_oerrors++; 1433 m_freem(m); 1434 CARP_LOG("%s: in6_setscope failed\n", __func__); 1435 return; 1436 } 1437 1438 ch_ptr = (struct carp_header *)(&ip6[1]); 1439 bcopy(&ch, ch_ptr, sizeof(ch)); 1440 carp_prepare_ad(sc, ch_ptr); 1441 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6)); 1442 1443 getmicrotime(&cifp->if_lastchange); 1444 cifp->if_opackets++; 1445 cifp->if_obytes += len; 1446 carpstats.carps_opackets6++; 1447 1448 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 1449 cifp->if_oerrors++; 1450 if (sc->sc_sendad_errors < INT_MAX) 1451 sc->sc_sendad_errors++; 1452 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1453 carp_suppress_preempt++; 1454 if (carp_suppress_preempt == 1) { 1455 carp_send_ad_all(); 1456 } 1457 } 1458 sc->sc_sendad_success = 0; 1459 } else { 1460 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1461 if (++sc->sc_sendad_success >= 1462 CARP_SENDAD_MIN_SUCCESS) { 1463 carp_suppress_preempt--; 1464 sc->sc_sendad_errors = 0; 1465 } 1466 } else { 1467 sc->sc_sendad_errors = 0; 1468 } 1469 } 1470 } 1471 #endif /* INET6 */ 1472 1473 if (advbase != 255 || advskew != 255) 1474 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1475 carp_send_ad_timeout, sc); 1476 } 1477 1478 /* 1479 * Broadcast a gratuitous ARP request containing 1480 * the virtual router MAC address for each IP address 1481 * associated with the virtual router. 1482 */ 1483 static void 1484 carp_send_arp(struct carp_softc *sc) 1485 { 1486 const struct carp_vhaddr *vha; 1487 1488 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1489 if (vha->vha_iaback == NULL) 1490 continue; 1491 arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa); 1492 } 1493 } 1494 1495 #ifdef INET6 1496 static void 1497 carp_send_na(struct carp_softc *sc) 1498 { 1499 struct ifaddr_container *ifac; 1500 struct in6_addr *in6; 1501 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1502 1503 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 1504 struct ifaddr *ifa = ifac->ifa; 1505 1506 if (ifa->ifa_addr->sa_family != AF_INET6) 1507 continue; 1508 1509 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1510 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1511 ND_NA_FLAG_OVERRIDE, 1, NULL); 1512 DELAY(1000); /* XXX */ 1513 } 1514 } 1515 #endif /* INET6 */ 1516 1517 static __inline const struct carp_vhaddr * 1518 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr) 1519 { 1520 struct carp_vhaddr *vha; 1521 1522 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1523 if (vha->vha_iaback == NULL) 1524 continue; 1525 1526 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr) 1527 return vha; 1528 } 1529 return NULL; 1530 } 1531 1532 #ifdef notyet 1533 static int 1534 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr, 1535 const struct in_addr *isaddr, uint8_t **enaddr) 1536 { 1537 const struct carp_softc *vh; 1538 int index, count = 0; 1539 1540 /* 1541 * XXX proof of concept implementation. 1542 * We use the source ip to decide which virtual host should 1543 * handle the request. If we're master of that virtual host, 1544 * then we respond, otherwise, just drop the arp packet on 1545 * the floor. 1546 */ 1547 1548 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1549 if (!CARP_IS_RUNNING(&vh->sc_if)) 1550 continue; 1551 1552 if (carp_find_addr(vh, itaddr) != NULL) 1553 count++; 1554 } 1555 if (count == 0) 1556 return 0; 1557 1558 /* this should be a hash, like pf_hash() */ 1559 index = ntohl(isaddr->s_addr) % count; 1560 count = 0; 1561 1562 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1563 if (!CARP_IS_RUNNING(&vh->sc_if)) 1564 continue; 1565 1566 if (carp_find_addr(vh, itaddr) == NULL) 1567 continue; 1568 1569 if (count == index) { 1570 if (vh->sc_state == MASTER) { 1571 *enaddr = IF_LLADDR(&vh->sc_if); 1572 return 1; 1573 } else { 1574 return 0; 1575 } 1576 } 1577 count++; 1578 } 1579 return 0; 1580 } 1581 #endif 1582 1583 int 1584 carp_iamatch(const struct in_ifaddr *ia) 1585 { 1586 const struct carp_softc *sc = ia->ia_ifp->if_softc; 1587 1588 KASSERT(&curthread->td_msgport == netisr_portfn(0), 1589 ("not in netisr0")); 1590 1591 #ifdef notyet 1592 if (carp_opts[CARPCTL_ARPBALANCE]) 1593 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr); 1594 #endif 1595 1596 if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER) 1597 return 0; 1598 1599 return 1; 1600 } 1601 1602 #ifdef INET6 1603 struct ifaddr * 1604 carp_iamatch6(void *v, struct in6_addr *taddr) 1605 { 1606 #ifdef foo 1607 struct carp_if *cif = v; 1608 struct carp_softc *vh; 1609 1610 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1611 struct ifaddr_container *ifac; 1612 1613 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid], 1614 ifa_link) { 1615 struct ifaddr *ifa = ifac->ifa; 1616 1617 if (IN6_ARE_ADDR_EQUAL(taddr, 1618 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1619 CARP_IS_RUNNING(&vh->sc_if) && 1620 vh->sc_state == MASTER) { 1621 return (ifa); 1622 } 1623 } 1624 } 1625 #endif 1626 return (NULL); 1627 } 1628 1629 void * 1630 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1631 { 1632 #ifdef foo 1633 struct m_tag *mtag; 1634 struct carp_if *cif = v; 1635 struct carp_softc *sc; 1636 1637 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1638 struct ifaddr_container *ifac; 1639 1640 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], 1641 ifa_link) { 1642 struct ifaddr *ifa = ifac->ifa; 1643 1644 if (IN6_ARE_ADDR_EQUAL(taddr, 1645 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1646 CARP_IS_RUNNING(&sc->sc_if)) { 1647 struct ifnet *ifp = &sc->sc_if; 1648 1649 mtag = m_tag_get(PACKET_TAG_CARP, 1650 sizeof(struct ifnet *), MB_DONTWAIT); 1651 if (mtag == NULL) { 1652 /* better a bit than nothing */ 1653 return (IF_LLADDR(ifp)); 1654 } 1655 bcopy(&ifp, (caddr_t)(mtag + 1), 1656 sizeof(struct ifnet *)); 1657 m_tag_prepend(m, mtag); 1658 1659 return (IF_LLADDR(ifp)); 1660 } 1661 } 1662 } 1663 #endif 1664 return (NULL); 1665 } 1666 #endif 1667 1668 static struct ifnet * 1669 carp_forus(struct carp_if *cif, const uint8_t *dhost) 1670 { 1671 struct carp_softc_container *scc; 1672 1673 if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0) 1674 return NULL; 1675 1676 TAILQ_FOREACH(scc, cif, scc_link) { 1677 struct carp_softc *sc = scc->scc_softc; 1678 struct ifnet *ifp = &sc->sc_if; 1679 1680 if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER && 1681 !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN)) 1682 return ifp; 1683 } 1684 return NULL; 1685 } 1686 1687 static void 1688 carp_master_down_timeout(void *xsc) 1689 { 1690 struct carp_softc *sc = xsc; 1691 struct netmsg_carp *cmsg = &sc->sc_md_msg; 1692 1693 KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d", 1694 __func__, mycpuid)); 1695 1696 crit_enter(); 1697 if (cmsg->base.lmsg.ms_flags & MSGF_DONE) 1698 lwkt_sendmsg(netisr_portfn(0), &cmsg->base.lmsg); 1699 crit_exit(); 1700 } 1701 1702 static void 1703 carp_master_down_timeout_dispatch(netmsg_t msg) 1704 { 1705 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 1706 struct carp_softc *sc = cmsg->nc_softc; 1707 1708 /* Reply ASAP */ 1709 crit_enter(); 1710 lwkt_replymsg(&cmsg->base.lmsg, 0); 1711 crit_exit(); 1712 1713 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 1714 sc->sc_if.if_xname); 1715 carp_master_down(sc); 1716 } 1717 1718 static void 1719 carp_master_down(struct carp_softc *sc) 1720 { 1721 switch (sc->sc_state) { 1722 case INIT: 1723 kprintf("%s: master_down event in INIT state\n", 1724 sc->sc_if.if_xname); 1725 break; 1726 1727 case MASTER: 1728 break; 1729 1730 case BACKUP: 1731 carp_set_state(sc, MASTER); 1732 carp_send_ad(sc); 1733 carp_send_arp(sc); 1734 #ifdef INET6 1735 carp_send_na(sc); 1736 #endif /* INET6 */ 1737 carp_setrun(sc, 0); 1738 carp_setroute(sc, RTM_ADD); 1739 break; 1740 } 1741 } 1742 1743 /* 1744 * When in backup state, af indicates whether to reset the master down timer 1745 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1746 */ 1747 static void 1748 carp_setrun(struct carp_softc *sc, sa_family_t af) 1749 { 1750 struct ifnet *cifp = &sc->sc_if; 1751 struct timeval tv; 1752 1753 if (sc->sc_carpdev == NULL) { 1754 carp_set_state(sc, INIT); 1755 return; 1756 } 1757 1758 if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 && 1759 (sc->sc_naddrs || sc->sc_naddrs6)) { 1760 /* Nothing */ 1761 } else { 1762 carp_setroute(sc, RTM_DELETE); 1763 return; 1764 } 1765 1766 switch (sc->sc_state) { 1767 case INIT: 1768 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1769 carp_send_ad(sc); 1770 carp_send_arp(sc); 1771 #ifdef INET6 1772 carp_send_na(sc); 1773 #endif /* INET6 */ 1774 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1775 cifp->if_xname); 1776 carp_set_state(sc, MASTER); 1777 carp_setroute(sc, RTM_ADD); 1778 } else { 1779 CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname); 1780 carp_set_state(sc, BACKUP); 1781 carp_setroute(sc, RTM_DELETE); 1782 carp_setrun(sc, 0); 1783 } 1784 break; 1785 1786 case BACKUP: 1787 callout_stop(&sc->sc_ad_tmo); 1788 tv.tv_sec = 3 * sc->sc_advbase; 1789 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1790 switch (af) { 1791 #ifdef INET 1792 case AF_INET: 1793 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1794 carp_master_down_timeout, sc); 1795 break; 1796 #endif /* INET */ 1797 #ifdef INET6 1798 case AF_INET6: 1799 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1800 carp_master_down_timeout, sc); 1801 break; 1802 #endif /* INET6 */ 1803 default: 1804 if (sc->sc_naddrs) 1805 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1806 carp_master_down_timeout, sc); 1807 if (sc->sc_naddrs6) 1808 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1809 carp_master_down_timeout, sc); 1810 break; 1811 } 1812 break; 1813 1814 case MASTER: 1815 tv.tv_sec = sc->sc_advbase; 1816 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1817 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1818 carp_send_ad_timeout, sc); 1819 break; 1820 } 1821 } 1822 1823 static void 1824 carp_multicast_cleanup(struct carp_softc *sc) 1825 { 1826 struct ip_moptions *imo = &sc->sc_imo; 1827 1828 if (imo->imo_num_memberships == 0) 1829 return; 1830 KKASSERT(imo->imo_num_memberships == 1); 1831 1832 in_delmulti(imo->imo_membership[0]); 1833 imo->imo_membership[0] = NULL; 1834 imo->imo_num_memberships = 0; 1835 imo->imo_multicast_ifp = NULL; 1836 } 1837 1838 #ifdef INET6 1839 static void 1840 carp_multicast6_cleanup(struct carp_softc *sc) 1841 { 1842 struct ip6_moptions *im6o = &sc->sc_im6o; 1843 1844 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1845 struct in6_multi_mship *imm = 1846 LIST_FIRST(&im6o->im6o_memberships); 1847 1848 LIST_REMOVE(imm, i6mm_chain); 1849 in6_leavegroup(imm); 1850 } 1851 im6o->im6o_multicast_ifp = NULL; 1852 } 1853 #endif 1854 1855 static void 1856 carp_ioctl_getvhaddr_dispatch(netmsg_t msg) 1857 { 1858 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 1859 struct carp_softc *sc = cmsg->nc_softc; 1860 const struct carp_vhaddr *vha; 1861 struct ifcarpvhaddr *carpa, *carpa0; 1862 int count, len, error = 0; 1863 1864 count = 0; 1865 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1866 ++count; 1867 1868 if (cmsg->nc_datalen == 0) { 1869 cmsg->nc_datalen = count * sizeof(*carpa); 1870 goto back; 1871 } else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) { 1872 cmsg->nc_datalen = 0; 1873 goto back; 1874 } 1875 len = min(cmsg->nc_datalen, sizeof(*carpa) * count); 1876 KKASSERT(len >= sizeof(*carpa)); 1877 1878 carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO); 1879 if (carpa == NULL) { 1880 error = ENOMEM; 1881 goto back; 1882 } 1883 1884 count = 0; 1885 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1886 if (len < sizeof(*carpa)) 1887 break; 1888 1889 carpa->carpa_flags = vha->vha_flags; 1890 carpa->carpa_addr.sin_family = AF_INET; 1891 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr; 1892 1893 carpa->carpa_baddr.sin_family = AF_INET; 1894 if (vha->vha_iaback == NULL) { 1895 carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY; 1896 } else { 1897 carpa->carpa_baddr.sin_addr = 1898 vha->vha_iaback->ia_addr.sin_addr; 1899 } 1900 1901 ++carpa; 1902 ++count; 1903 len -= sizeof(*carpa); 1904 } 1905 cmsg->nc_datalen = sizeof(*carpa) * count; 1906 KKASSERT(cmsg->nc_datalen > 0); 1907 1908 cmsg->nc_data = carpa0; 1909 1910 back: 1911 lwkt_replymsg(&cmsg->base.lmsg, error); 1912 } 1913 1914 static int 1915 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd) 1916 { 1917 struct ifnet *ifp = &sc->arpcom.ac_if; 1918 struct netmsg_carp cmsg; 1919 int error; 1920 1921 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1922 ifnet_deserialize_all(ifp); 1923 1924 bzero(&cmsg, sizeof(cmsg)); 1925 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 1926 carp_ioctl_getvhaddr_dispatch); 1927 cmsg.nc_softc = sc; 1928 cmsg.nc_datalen = ifd->ifd_len; 1929 1930 error = lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 1931 1932 if (!error) { 1933 if (cmsg.nc_data != NULL) { 1934 error = copyout(cmsg.nc_data, ifd->ifd_data, 1935 cmsg.nc_datalen); 1936 kfree(cmsg.nc_data, M_TEMP); 1937 } 1938 ifd->ifd_len = cmsg.nc_datalen; 1939 } else { 1940 KASSERT(cmsg.nc_data == NULL, 1941 ("%s temp vhaddr is alloc upon error", __func__)); 1942 } 1943 1944 ifnet_serialize_all(ifp); 1945 return error; 1946 } 1947 1948 static int 1949 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 1950 struct in_ifaddr *ia_del) 1951 { 1952 struct ifnet *ifp; 1953 struct in_ifaddr *ia_if; 1954 struct in_ifaddr_container *iac; 1955 const struct sockaddr_in *sin; 1956 u_long iaddr; 1957 int own; 1958 1959 KKASSERT(vha->vha_ia != NULL); 1960 1961 sin = &vha->vha_ia->ia_addr; 1962 iaddr = ntohl(sin->sin_addr.s_addr); 1963 1964 ia_if = NULL; 1965 own = 0; 1966 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 1967 struct in_ifaddr *ia = iac->ia; 1968 1969 if (ia == ia_del) 1970 continue; 1971 1972 if (ia->ia_ifp->if_type == IFT_CARP) 1973 continue; 1974 1975 if ((ia->ia_ifp->if_flags & IFF_UP) == 0) 1976 continue; 1977 1978 /* and, yeah, we need a multicast-capable iface too */ 1979 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) 1980 continue; 1981 1982 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1983 if (sin->sin_addr.s_addr == 1984 ia->ia_addr.sin_addr.s_addr) 1985 own = 1; 1986 if (ia_if == NULL) 1987 ia_if = ia; 1988 else if (sc->sc_carpdev != NULL && 1989 sc->sc_carpdev == ia->ia_ifp) 1990 ia_if = ia; 1991 } 1992 } 1993 1994 carp_deactivate_vhaddr(sc, vha, FALSE); 1995 if (!ia_if) 1996 return ENOENT; 1997 1998 ifp = ia_if->ia_ifp; 1999 2000 /* XXX Don't allow parent iface to be changed */ 2001 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) 2002 return EEXIST; 2003 2004 return carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 2005 } 2006 2007 static void 2008 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 2009 { 2010 struct carp_vhaddr *vha_new; 2011 struct in_ifaddr *carp_ia; 2012 #ifdef INVARIANTS 2013 struct carp_vhaddr *vha; 2014 #endif 2015 2016 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 2017 carp_ia = ifatoia(carp_ifa); 2018 2019 #ifdef INVARIANTS 2020 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 2021 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia); 2022 #endif 2023 2024 vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO); 2025 vha_new->vha_ia = carp_ia; 2026 carp_insert_vhaddr(sc, vha_new); 2027 2028 if (carp_config_vhaddr(sc, vha_new, NULL) != 0) { 2029 /* 2030 * If the above configuration fails, it may only mean 2031 * that the new address is problematic. However, the 2032 * carp(4) interface may already have several working 2033 * addresses. Since the expected behaviour of 2034 * SIOC[AS]IFADDR is to put the NIC into working state, 2035 * we try starting the state machine manually here with 2036 * the hope that the carp(4)'s previously working 2037 * addresses still could be brought up. 2038 */ 2039 carp_hmac_prepare(sc); 2040 carp_set_state(sc, INIT); 2041 carp_setrun(sc, 0); 2042 } 2043 } 2044 2045 static void 2046 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 2047 { 2048 struct carp_vhaddr *vha; 2049 struct in_ifaddr *carp_ia; 2050 2051 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 2052 carp_ia = ifatoia(carp_ifa); 2053 2054 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2055 KKASSERT(vha->vha_ia != NULL); 2056 if (vha->vha_ia == carp_ia) 2057 break; 2058 } 2059 KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa)); 2060 2061 /* 2062 * Remove the vhaddr from the list before deactivating 2063 * the vhaddr, so that the HMAC could be correctly 2064 * updated in carp_deactivate_vhaddr() 2065 */ 2066 carp_remove_vhaddr(sc, vha); 2067 2068 carp_deactivate_vhaddr(sc, vha, FALSE); 2069 kfree(vha, M_CARP); 2070 } 2071 2072 static void 2073 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 2074 { 2075 struct carp_vhaddr *vha; 2076 struct in_ifaddr *carp_ia; 2077 2078 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 2079 carp_ia = ifatoia(carp_ifa); 2080 2081 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2082 KKASSERT(vha->vha_ia != NULL); 2083 if (vha->vha_ia == carp_ia) 2084 break; 2085 } 2086 KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa)); 2087 2088 /* Remove then reinsert, to keep the vhaddr list sorted */ 2089 carp_remove_vhaddr(sc, vha); 2090 carp_insert_vhaddr(sc, vha); 2091 2092 if (carp_config_vhaddr(sc, vha, NULL) != 0) { 2093 /* See the comment in carp_add_addr() */ 2094 carp_hmac_prepare(sc); 2095 carp_set_state(sc, INIT); 2096 carp_setrun(sc, 0); 2097 } 2098 } 2099 2100 #ifdef notyet 2101 2102 #ifdef INET6 2103 static int 2104 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 2105 { 2106 struct ifnet *ifp; 2107 struct carp_if *cif; 2108 struct in6_ifaddr *ia, *ia_if; 2109 struct ip6_moptions *im6o = &sc->sc_im6o; 2110 struct in6_multi_mship *imm; 2111 struct in6_addr in6; 2112 int own, error; 2113 2114 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 2115 carp_setrun(sc, 0); 2116 return (0); 2117 } 2118 2119 /* we have to do it by hands to check we won't match on us */ 2120 ia_if = NULL; own = 0; 2121 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 2122 int i; 2123 2124 for (i = 0; i < 4; i++) { 2125 if ((sin6->sin6_addr.s6_addr32[i] & 2126 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 2127 (ia->ia_addr.sin6_addr.s6_addr32[i] & 2128 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 2129 break; 2130 } 2131 /* and, yeah, we need a multicast-capable iface too */ 2132 if (ia->ia_ifp != &sc->sc_if && 2133 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 2134 (i == 4)) { 2135 if (!ia_if) 2136 ia_if = ia; 2137 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 2138 &ia->ia_addr.sin6_addr)) 2139 own++; 2140 } 2141 } 2142 2143 if (!ia_if) 2144 return (EADDRNOTAVAIL); 2145 ia = ia_if; 2146 ifp = ia->ia_ifp; 2147 2148 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 2149 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 2150 return (EADDRNOTAVAIL); 2151 2152 if (!sc->sc_naddrs6) { 2153 im6o->im6o_multicast_ifp = ifp; 2154 2155 /* join CARP multicast address */ 2156 bzero(&in6, sizeof(in6)); 2157 in6.s6_addr16[0] = htons(0xff02); 2158 in6.s6_addr8[15] = 0x12; 2159 if (in6_setscope(&in6, ifp, NULL) != 0) 2160 goto cleanup; 2161 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 2162 goto cleanup; 2163 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 2164 2165 /* join solicited multicast address */ 2166 bzero(&in6, sizeof(in6)); 2167 in6.s6_addr16[0] = htons(0xff02); 2168 in6.s6_addr32[1] = 0; 2169 in6.s6_addr32[2] = htonl(1); 2170 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 2171 in6.s6_addr8[12] = 0xff; 2172 if (in6_setscope(&in6, ifp, NULL) != 0) 2173 goto cleanup; 2174 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 2175 goto cleanup; 2176 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 2177 } 2178 2179 #ifdef foo 2180 if (!ifp->if_carp) { 2181 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 2182 2183 if ((error = ifpromisc(ifp, 1))) { 2184 kfree(cif, M_CARP); 2185 goto cleanup; 2186 } 2187 2188 TAILQ_INIT(&cif->vhif_vrs); 2189 ifp->if_carp = cif; 2190 } else { 2191 struct carp_softc *vr; 2192 2193 cif = ifp->if_carp; 2194 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2195 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 2196 error = EINVAL; 2197 goto cleanup; 2198 } 2199 } 2200 } 2201 #endif 2202 sc->sc_ia6 = ia; 2203 sc->sc_carpdev = ifp; 2204 2205 #ifdef foo 2206 { /* XXX prevent endless loop if already in queue */ 2207 struct carp_softc *vr, *after = NULL; 2208 int myself = 0; 2209 cif = ifp->if_carp; 2210 2211 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2212 if (vr == sc) 2213 myself = 1; 2214 if (vr->sc_vhid < sc->sc_vhid) 2215 after = vr; 2216 } 2217 2218 if (!myself) { 2219 /* We're trying to keep things in order */ 2220 if (after == NULL) 2221 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 2222 else 2223 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 2224 } 2225 } 2226 #endif 2227 2228 sc->sc_naddrs6++; 2229 if (own) 2230 sc->sc_advskew = 0; 2231 carp_sc_state(sc); 2232 carp_setrun(sc, 0); 2233 2234 return (0); 2235 2236 cleanup: 2237 /* clean up multicast memberships */ 2238 if (!sc->sc_naddrs6) { 2239 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 2240 imm = LIST_FIRST(&im6o->im6o_memberships); 2241 LIST_REMOVE(imm, i6mm_chain); 2242 in6_leavegroup(imm); 2243 } 2244 } 2245 return (error); 2246 } 2247 2248 static int 2249 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 2250 { 2251 int error = 0; 2252 2253 if (!--sc->sc_naddrs6) { 2254 struct carp_if *cif = sc->sc_carpdev->if_carp; 2255 struct ip6_moptions *im6o = &sc->sc_im6o; 2256 2257 callout_stop(&sc->sc_ad_tmo); 2258 sc->sc_vhid = -1; 2259 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 2260 struct in6_multi_mship *imm = 2261 LIST_FIRST(&im6o->im6o_memberships); 2262 2263 LIST_REMOVE(imm, i6mm_chain); 2264 in6_leavegroup(imm); 2265 } 2266 im6o->im6o_multicast_ifp = NULL; 2267 #ifdef foo 2268 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 2269 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 2270 sc->sc_carpdev->if_carp = NULL; 2271 kfree(cif, M_IFADDR); 2272 } 2273 #endif 2274 } 2275 return (error); 2276 } 2277 #endif /* INET6 */ 2278 2279 #endif 2280 2281 static int 2282 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) 2283 { 2284 struct carp_softc *sc = ifp->if_softc; 2285 struct ifreq *ifr = (struct ifreq *)addr; 2286 struct ifdrv *ifd = (struct ifdrv *)addr; 2287 int error = 0; 2288 2289 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2290 2291 switch (cmd) { 2292 case SIOCSIFFLAGS: 2293 if (ifp->if_flags & IFF_UP) { 2294 if ((ifp->if_flags & IFF_RUNNING) == 0) 2295 carp_init(sc); 2296 } else if (ifp->if_flags & IFF_RUNNING) { 2297 carp_ioctl_stop(sc); 2298 } 2299 break; 2300 2301 case SIOCSVH: 2302 error = carp_ioctl_setvh(sc, ifr->ifr_data, cr); 2303 break; 2304 2305 case SIOCGVH: 2306 error = carp_ioctl_getvh(sc, ifr->ifr_data, cr); 2307 break; 2308 2309 case SIOCGDRVSPEC: 2310 switch (ifd->ifd_cmd) { 2311 case CARPGDEVNAME: 2312 error = carp_ioctl_getdevname(sc, ifd); 2313 break; 2314 2315 case CARPGVHADDR: 2316 error = carp_ioctl_getvhaddr(sc, ifd); 2317 break; 2318 2319 default: 2320 error = EINVAL; 2321 break; 2322 } 2323 break; 2324 2325 default: 2326 error = ether_ioctl(ifp, cmd, addr); 2327 break; 2328 } 2329 2330 return error; 2331 } 2332 2333 static void 2334 carp_ioctl_stop_dispatch(netmsg_t msg) 2335 { 2336 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2337 struct carp_softc *sc = cmsg->nc_softc; 2338 2339 carp_stop(sc, FALSE); 2340 lwkt_replymsg(&cmsg->base.lmsg, 0); 2341 } 2342 2343 static void 2344 carp_ioctl_stop(struct carp_softc *sc) 2345 { 2346 struct ifnet *ifp = &sc->arpcom.ac_if; 2347 struct netmsg_carp cmsg; 2348 2349 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2350 2351 ifnet_deserialize_all(ifp); 2352 2353 bzero(&cmsg, sizeof(cmsg)); 2354 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2355 carp_ioctl_stop_dispatch); 2356 cmsg.nc_softc = sc; 2357 2358 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2359 2360 ifnet_serialize_all(ifp); 2361 } 2362 2363 static void 2364 carp_ioctl_setvh_dispatch(netmsg_t msg) 2365 { 2366 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2367 struct carp_softc *sc = cmsg->nc_softc; 2368 struct ifnet *ifp = &sc->arpcom.ac_if; 2369 const struct carpreq *carpr = cmsg->nc_data; 2370 int error; 2371 2372 error = 1; 2373 if ((ifp->if_flags & IFF_RUNNING) && 2374 sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) { 2375 switch (carpr->carpr_state) { 2376 case BACKUP: 2377 callout_stop(&sc->sc_ad_tmo); 2378 carp_set_state(sc, BACKUP); 2379 carp_setrun(sc, 0); 2380 carp_setroute(sc, RTM_DELETE); 2381 break; 2382 2383 case MASTER: 2384 carp_master_down(sc); 2385 break; 2386 2387 default: 2388 break; 2389 } 2390 } 2391 if (carpr->carpr_vhid > 0) { 2392 if (carpr->carpr_vhid > 255) { 2393 error = EINVAL; 2394 goto back; 2395 } 2396 if (sc->sc_carpdev) { 2397 struct carp_if *cif = sc->sc_carpdev->if_carp; 2398 struct carp_softc_container *scc; 2399 2400 TAILQ_FOREACH(scc, cif, scc_link) { 2401 struct carp_softc *vr = scc->scc_softc; 2402 2403 if (vr != sc && 2404 vr->sc_vhid == carpr->carpr_vhid) { 2405 error = EEXIST; 2406 goto back; 2407 } 2408 } 2409 } 2410 sc->sc_vhid = carpr->carpr_vhid; 2411 2412 IF_LLADDR(ifp)[5] = sc->sc_vhid; 2413 bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr, 2414 ETHER_ADDR_LEN); 2415 2416 error--; 2417 } 2418 if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) { 2419 if (carpr->carpr_advskew >= 255) { 2420 error = EINVAL; 2421 goto back; 2422 } 2423 if (carpr->carpr_advbase > 255) { 2424 error = EINVAL; 2425 goto back; 2426 } 2427 sc->sc_advbase = carpr->carpr_advbase; 2428 sc->sc_advskew = carpr->carpr_advskew; 2429 error--; 2430 } 2431 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2432 if (error > 0) { 2433 error = EINVAL; 2434 } else { 2435 error = 0; 2436 carp_setrun(sc, 0); 2437 } 2438 back: 2439 carp_hmac_prepare(sc); 2440 2441 lwkt_replymsg(&cmsg->base.lmsg, error); 2442 } 2443 2444 static int 2445 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr) 2446 { 2447 struct ifnet *ifp = &sc->arpcom.ac_if; 2448 struct netmsg_carp cmsg; 2449 struct carpreq carpr; 2450 int error; 2451 2452 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2453 ifnet_deserialize_all(ifp); 2454 2455 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 2456 if (error) 2457 goto back; 2458 2459 error = copyin(udata, &carpr, sizeof(carpr)); 2460 if (error) 2461 goto back; 2462 2463 bzero(&cmsg, sizeof(cmsg)); 2464 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2465 carp_ioctl_setvh_dispatch); 2466 cmsg.nc_softc = sc; 2467 cmsg.nc_data = &carpr; 2468 2469 error = lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2470 2471 back: 2472 ifnet_serialize_all(ifp); 2473 return error; 2474 } 2475 2476 static void 2477 carp_ioctl_getvh_dispatch(netmsg_t msg) 2478 { 2479 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2480 struct carp_softc *sc = cmsg->nc_softc; 2481 struct carpreq *carpr = cmsg->nc_data; 2482 2483 carpr->carpr_state = sc->sc_state; 2484 carpr->carpr_vhid = sc->sc_vhid; 2485 carpr->carpr_advbase = sc->sc_advbase; 2486 carpr->carpr_advskew = sc->sc_advskew; 2487 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 2488 2489 lwkt_replymsg(&cmsg->base.lmsg, 0); 2490 } 2491 2492 static int 2493 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr) 2494 { 2495 struct ifnet *ifp = &sc->arpcom.ac_if; 2496 struct netmsg_carp cmsg; 2497 struct carpreq carpr; 2498 int error; 2499 2500 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2501 ifnet_deserialize_all(ifp); 2502 2503 bzero(&cmsg, sizeof(cmsg)); 2504 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2505 carp_ioctl_getvh_dispatch); 2506 cmsg.nc_softc = sc; 2507 cmsg.nc_data = &carpr; 2508 2509 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2510 2511 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 2512 if (error) 2513 bzero(carpr.carpr_key, sizeof(carpr.carpr_key)); 2514 2515 error = copyout(&carpr, udata, sizeof(carpr)); 2516 2517 ifnet_serialize_all(ifp); 2518 return error; 2519 } 2520 2521 static void 2522 carp_ioctl_getdevname_dispatch(netmsg_t msg) 2523 { 2524 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2525 struct carp_softc *sc = cmsg->nc_softc; 2526 char *devname = cmsg->nc_data; 2527 2528 bzero(devname, IFNAMSIZ); 2529 if (sc->sc_carpdev != NULL) 2530 strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ); 2531 2532 lwkt_replymsg(&cmsg->base.lmsg, 0); 2533 } 2534 2535 static int 2536 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd) 2537 { 2538 struct ifnet *ifp = &sc->arpcom.ac_if; 2539 struct netmsg_carp cmsg; 2540 char devname[IFNAMSIZ]; 2541 int error; 2542 2543 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2544 2545 if (ifd->ifd_len != sizeof(devname)) 2546 return EINVAL; 2547 2548 ifnet_deserialize_all(ifp); 2549 2550 bzero(&cmsg, sizeof(cmsg)); 2551 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2552 carp_ioctl_getdevname_dispatch); 2553 cmsg.nc_softc = sc; 2554 cmsg.nc_data = devname; 2555 2556 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2557 2558 error = copyout(devname, ifd->ifd_data, sizeof(devname)); 2559 2560 ifnet_serialize_all(ifp); 2561 return error; 2562 } 2563 2564 static void 2565 carp_init_dispatch(netmsg_t msg) 2566 { 2567 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2568 struct carp_softc *sc = cmsg->nc_softc; 2569 2570 sc->sc_if.if_flags |= IFF_RUNNING; 2571 carp_hmac_prepare(sc); 2572 carp_set_state(sc, INIT); 2573 carp_setrun(sc, 0); 2574 2575 lwkt_replymsg(&cmsg->base.lmsg, 0); 2576 } 2577 2578 static void 2579 carp_init(void *xsc) 2580 { 2581 struct carp_softc *sc = xsc; 2582 struct ifnet *ifp = &sc->arpcom.ac_if; 2583 struct netmsg_carp cmsg; 2584 2585 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2586 2587 ifnet_deserialize_all(ifp); 2588 2589 bzero(&cmsg, sizeof(cmsg)); 2590 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2591 carp_init_dispatch); 2592 cmsg.nc_softc = sc; 2593 2594 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2595 2596 ifnet_serialize_all(ifp); 2597 } 2598 2599 static int 2600 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 2601 struct rtentry *rt) 2602 { 2603 struct carp_softc *sc = ifp->if_softc; 2604 struct ifnet *carpdev; 2605 int error = 0; 2606 2607 carpdev = sc->sc_carpdev; 2608 if (carpdev != NULL) { 2609 /* 2610 * NOTE: 2611 * CARP's ifp is passed to backing device's 2612 * if_output method. 2613 */ 2614 carpdev->if_output(ifp, m, dst, rt); 2615 } else { 2616 m_freem(m); 2617 error = ENETUNREACH; 2618 } 2619 return error; 2620 } 2621 2622 /* 2623 * Start output on carp interface. This function should never be called. 2624 */ 2625 static void 2626 carp_start(struct ifnet *ifp) 2627 { 2628 panic("%s: start called", ifp->if_xname); 2629 } 2630 2631 static void 2632 carp_set_state(struct carp_softc *sc, int state) 2633 { 2634 struct ifnet *cifp = &sc->sc_if; 2635 2636 if (sc->sc_state == state) 2637 return; 2638 sc->sc_state = state; 2639 2640 switch (sc->sc_state) { 2641 case BACKUP: 2642 cifp->if_link_state = LINK_STATE_DOWN; 2643 break; 2644 2645 case MASTER: 2646 cifp->if_link_state = LINK_STATE_UP; 2647 break; 2648 2649 default: 2650 cifp->if_link_state = LINK_STATE_UNKNOWN; 2651 break; 2652 } 2653 rt_ifmsg(cifp); 2654 } 2655 2656 void 2657 carp_group_demote_adj(struct ifnet *ifp, int adj) 2658 { 2659 struct ifg_list *ifgl; 2660 int *dm; 2661 2662 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2663 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2664 continue; 2665 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2666 2667 if (*dm + adj >= 0) 2668 *dm += adj; 2669 else 2670 *dm = 0; 2671 2672 if (adj > 0 && *dm == 1) 2673 carp_send_ad_all(); 2674 CARP_LOG("%s demoted group %s to %d", ifp->if_xname, 2675 ifgl->ifgl_group->ifg_group, *dm); 2676 } 2677 } 2678 2679 #ifdef foo 2680 void 2681 carp_carpdev_state(void *v) 2682 { 2683 struct carp_if *cif = v; 2684 struct carp_softc *sc; 2685 2686 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2687 carp_sc_state(sc); 2688 } 2689 2690 static void 2691 carp_sc_state(struct carp_softc *sc) 2692 { 2693 if (!(sc->sc_carpdev->if_flags & IFF_UP)) { 2694 callout_stop(&sc->sc_ad_tmo); 2695 callout_stop(&sc->sc_md_tmo); 2696 callout_stop(&sc->sc_md6_tmo); 2697 carp_set_state(sc, INIT); 2698 carp_setrun(sc, 0); 2699 if (!sc->sc_suppress) { 2700 carp_suppress_preempt++; 2701 if (carp_suppress_preempt == 1) 2702 carp_send_ad_all(); 2703 } 2704 sc->sc_suppress = 1; 2705 } else { 2706 carp_set_state(sc, INIT); 2707 carp_setrun(sc, 0); 2708 if (sc->sc_suppress) 2709 carp_suppress_preempt--; 2710 sc->sc_suppress = 0; 2711 } 2712 } 2713 #endif 2714 2715 static void 2716 carp_stop(struct carp_softc *sc, boolean_t detach) 2717 { 2718 sc->sc_if.if_flags &= ~IFF_RUNNING; 2719 2720 callout_stop(&sc->sc_ad_tmo); 2721 callout_stop(&sc->sc_md_tmo); 2722 callout_stop(&sc->sc_md6_tmo); 2723 2724 if (!detach && sc->sc_state == MASTER) 2725 carp_send_ad(sc); 2726 2727 if (sc->sc_suppress) 2728 carp_suppress_preempt--; 2729 sc->sc_suppress = 0; 2730 2731 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 2732 carp_suppress_preempt--; 2733 sc->sc_sendad_errors = 0; 2734 sc->sc_sendad_success = 0; 2735 2736 carp_set_state(sc, INIT); 2737 carp_setrun(sc, 0); 2738 } 2739 2740 static void 2741 carp_suspend(struct carp_softc *sc, boolean_t detach) 2742 { 2743 struct ifnet *cifp = &sc->sc_if; 2744 2745 carp_stop(sc, detach); 2746 2747 /* Retain the running state, if we are not dead yet */ 2748 if (!sc->sc_dead && (cifp->if_flags & IFF_UP)) 2749 cifp->if_flags |= IFF_RUNNING; 2750 } 2751 2752 static int 2753 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 2754 struct ifnet *ifp, struct in_ifaddr *ia_if, int own) 2755 { 2756 struct ip_moptions *imo = &sc->sc_imo; 2757 struct carp_if *ocif = ifp->if_carp; 2758 int error; 2759 2760 KKASSERT(vha->vha_ia != NULL); 2761 2762 KASSERT(ia_if != NULL, ("NULL backing address")); 2763 KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha)); 2764 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2765 ("inactive vhaddr %p is the address owner", vha)); 2766 2767 KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp, 2768 ("%s is already on %s", sc->sc_if.if_xname, 2769 sc->sc_carpdev->if_xname)); 2770 2771 if (ocif == NULL) { 2772 KASSERT(sc->sc_carpdev == NULL, 2773 ("%s is already on %s", sc->sc_if.if_xname, 2774 sc->sc_carpdev->if_xname)); 2775 2776 error = ifpromisc(ifp, 1); 2777 if (error) 2778 return error; 2779 } else { 2780 struct carp_softc_container *scc; 2781 2782 TAILQ_FOREACH(scc, ocif, scc_link) { 2783 struct carp_softc *vr = scc->scc_softc; 2784 2785 if (vr != sc && vr->sc_vhid == sc->sc_vhid) 2786 return EINVAL; 2787 } 2788 } 2789 2790 ifp->if_carp = carp_if_insert(ocif, sc); 2791 KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__)); 2792 2793 sc->sc_ia = ia_if; 2794 sc->sc_carpdev = ifp; 2795 2796 /* 2797 * Make sure that all protocol threads see the sc_carpdev and 2798 * if_carp changes 2799 */ 2800 netmsg_service_sync(); 2801 2802 if (ocif != NULL && ifp->if_carp != ocif) { 2803 /* 2804 * The old carp list could be safely free now, 2805 * since no one can access it. 2806 */ 2807 carp_if_free(ocif); 2808 } 2809 2810 vha->vha_iaback = ia_if; 2811 sc->sc_naddrs++; 2812 2813 if (own) { 2814 vha->vha_flags |= CARP_VHAF_OWNER; 2815 2816 /* XXX save user configured advskew? */ 2817 sc->sc_advskew = 0; 2818 } 2819 2820 carp_addroute_vhaddr(sc, vha); 2821 2822 /* 2823 * Join the multicast group only after the backing interface 2824 * has been hooked with the CARP interface. 2825 */ 2826 KASSERT(imo->imo_multicast_ifp == NULL || 2827 imo->imo_multicast_ifp == &sc->sc_if, 2828 ("%s didn't leave mcast group on %s", 2829 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname)); 2830 2831 if (imo->imo_num_memberships == 0) { 2832 struct in_addr addr; 2833 2834 addr.s_addr = htonl(INADDR_CARP_GROUP); 2835 imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if); 2836 if (imo->imo_membership[0] == NULL) { 2837 carp_deactivate_vhaddr(sc, vha, FALSE); 2838 return ENOBUFS; 2839 } 2840 2841 imo->imo_num_memberships++; 2842 imo->imo_multicast_ifp = &sc->sc_if; 2843 imo->imo_multicast_ttl = CARP_DFLTTL; 2844 imo->imo_multicast_loop = 0; 2845 } 2846 2847 carp_hmac_prepare(sc); 2848 carp_set_state(sc, INIT); 2849 carp_setrun(sc, 0); 2850 return 0; 2851 } 2852 2853 static void 2854 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 2855 boolean_t del_iaback) 2856 { 2857 KKASSERT(vha->vha_ia != NULL); 2858 2859 carp_hmac_prepare(sc); 2860 2861 if (vha->vha_iaback == NULL) { 2862 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2863 ("inactive vhaddr %p is the address owner", vha)); 2864 return; 2865 } 2866 2867 vha->vha_flags &= ~CARP_VHAF_OWNER; 2868 carp_delroute_vhaddr(sc, vha, del_iaback); 2869 2870 KKASSERT(sc->sc_naddrs > 0); 2871 vha->vha_iaback = NULL; 2872 sc->sc_naddrs--; 2873 if (!sc->sc_naddrs) { 2874 if (sc->sc_naddrs6) { 2875 carp_multicast_cleanup(sc); 2876 sc->sc_ia = NULL; 2877 } else { 2878 carp_detach(sc, FALSE, del_iaback); 2879 } 2880 } 2881 } 2882 2883 static void 2884 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if) 2885 { 2886 struct carp_vhaddr *vha; 2887 struct in_ifaddr *ia_if; 2888 2889 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2890 ia_if = ifatoia(ifa_if); 2891 2892 /* 2893 * Test each inactive vhaddr against the newly added address. 2894 * If the newly added address could be the backing address, 2895 * then activate the matching vhaddr. 2896 */ 2897 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2898 const struct in_ifaddr *ia; 2899 u_long iaddr; 2900 int own; 2901 2902 if (vha->vha_iaback != NULL) 2903 continue; 2904 2905 ia = vha->vha_ia; 2906 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr); 2907 2908 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet) 2909 continue; 2910 2911 own = 0; 2912 if (ia->ia_addr.sin_addr.s_addr == 2913 ia_if->ia_addr.sin_addr.s_addr) 2914 own = 1; 2915 2916 carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 2917 } 2918 } 2919 2920 static void 2921 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp, 2922 struct ifaddr *ifa_if) 2923 { 2924 struct carp_vhaddr *vha; 2925 struct in_ifaddr *ia_if; 2926 2927 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2928 ia_if = ifatoia(ifa_if); 2929 2930 /* 2931 * Ad src address is deleted; set it to NULL. 2932 * Following loop will try pick up a new ad src address 2933 * if one of the vhaddr could retain its backing address. 2934 */ 2935 if (sc->sc_ia == ia_if) 2936 sc->sc_ia = NULL; 2937 2938 /* 2939 * Test each active vhaddr against the deleted address. 2940 * If the deleted address is vhaddr address's backing 2941 * address, then deactivate the vhaddr. 2942 */ 2943 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2944 if (vha->vha_iaback == NULL) 2945 continue; 2946 2947 if (vha->vha_iaback == ia_if) 2948 carp_deactivate_vhaddr(sc, vha, TRUE); 2949 else if (sc->sc_ia == NULL) 2950 sc->sc_ia = vha->vha_iaback; 2951 } 2952 } 2953 2954 static void 2955 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del) 2956 { 2957 struct carp_vhaddr *vha; 2958 2959 KKASSERT(sc->sc_carpdev == NULL); 2960 2961 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 2962 carp_config_vhaddr(sc, vha, ifatoia(ifa_del)); 2963 } 2964 2965 static void 2966 carp_ifaddr(void *arg __unused, struct ifnet *ifp, 2967 enum ifaddr_event event, struct ifaddr *ifa) 2968 { 2969 struct carp_softc *sc; 2970 2971 if (ifa->ifa_addr->sa_family != AF_INET) 2972 return; 2973 2974 KASSERT(&curthread->td_msgport == netisr_portfn(0), 2975 ("not in netisr0")); 2976 2977 if (ifp->if_type == IFT_CARP) { 2978 /* 2979 * Address is changed on carp(4) interface 2980 */ 2981 switch (event) { 2982 case IFADDR_EVENT_ADD: 2983 carp_add_addr(ifp->if_softc, ifa); 2984 break; 2985 2986 case IFADDR_EVENT_CHANGE: 2987 carp_config_addr(ifp->if_softc, ifa); 2988 break; 2989 2990 case IFADDR_EVENT_DELETE: 2991 carp_del_addr(ifp->if_softc, ifa); 2992 break; 2993 } 2994 return; 2995 } 2996 2997 /* 2998 * Address is changed on non-carp(4) interface 2999 */ 3000 if ((ifp->if_flags & IFF_MULTICAST) == 0) 3001 return; 3002 3003 LIST_FOREACH(sc, &carpif_list, sc_next) { 3004 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) { 3005 /* Not the parent iface; skip */ 3006 continue; 3007 } 3008 3009 switch (event) { 3010 case IFADDR_EVENT_ADD: 3011 carp_link_addrs(sc, ifp, ifa); 3012 break; 3013 3014 case IFADDR_EVENT_DELETE: 3015 if (sc->sc_carpdev != NULL) { 3016 carp_unlink_addrs(sc, ifp, ifa); 3017 if (sc->sc_carpdev == NULL) { 3018 /* 3019 * We no longer have the parent 3020 * interface, however, certain 3021 * virtual addresses, which are 3022 * not used because they can't 3023 * match the previous parent 3024 * interface's addresses, may now 3025 * match different interface's 3026 * addresses. 3027 */ 3028 carp_update_addrs(sc, ifa); 3029 } 3030 } else { 3031 /* 3032 * The carp(4) interface didn't have a 3033 * parent iface, so it is not possible 3034 * that it will contain any address to 3035 * be unlinked. 3036 */ 3037 } 3038 break; 3039 3040 case IFADDR_EVENT_CHANGE: 3041 if (sc->sc_carpdev == NULL) { 3042 /* 3043 * The carp(4) interface didn't have a 3044 * parent iface, so it is not possible 3045 * that it will contain any address to 3046 * be updated. 3047 */ 3048 carp_link_addrs(sc, ifp, ifa); 3049 } else { 3050 /* 3051 * First try breaking tie with the old 3052 * address. Then see whether we could 3053 * link certain vhaddr to the new address. 3054 * If that fails, i.e. carpdev is NULL, 3055 * we try a global update. 3056 * 3057 * NOTE: The above order is critical. 3058 */ 3059 carp_unlink_addrs(sc, ifp, ifa); 3060 carp_link_addrs(sc, ifp, ifa); 3061 if (sc->sc_carpdev == NULL) { 3062 /* 3063 * See the comment in the above 3064 * IFADDR_EVENT_DELETE block. 3065 */ 3066 carp_update_addrs(sc, NULL); 3067 } 3068 } 3069 break; 3070 } 3071 } 3072 } 3073 3074 void 3075 carp_proto_ctlinput(netmsg_t msg) 3076 { 3077 int cmd = msg->ctlinput.nm_cmd; 3078 struct sockaddr *sa = msg->ctlinput.nm_arg; 3079 struct in_ifaddr_container *iac; 3080 3081 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 3082 struct in_ifaddr *ia = iac->ia; 3083 struct ifnet *ifp = ia->ia_ifp; 3084 3085 if (ifp->if_type == IFT_CARP) 3086 continue; 3087 3088 if (ia->ia_ifa.ifa_addr == sa) { 3089 if (cmd == PRC_IFDOWN) { 3090 carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE, 3091 &ia->ia_ifa); 3092 } else if (cmd == PRC_IFUP) { 3093 carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD, 3094 &ia->ia_ifa); 3095 } 3096 break; 3097 } 3098 } 3099 3100 lwkt_replymsg(&msg->lmsg, 0); 3101 } 3102 3103 struct ifnet * 3104 carp_parent(struct ifnet *cifp) 3105 { 3106 struct carp_softc *sc; 3107 3108 KKASSERT(cifp->if_type == IFT_CARP); 3109 sc = cifp->if_softc; 3110 3111 return sc->sc_carpdev; 3112 } 3113 3114 #define rtinitflags(x) \ 3115 (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \ 3116 ? RTF_HOST : 0) 3117 3118 static int 3119 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 3120 { 3121 struct in_ifaddr *ia, *iaback; 3122 int error; 3123 3124 if (sc->sc_state != MASTER) 3125 return 0; 3126 3127 ia = vha->vha_ia; 3128 KKASSERT(ia != NULL); 3129 3130 iaback = vha->vha_iaback; 3131 KKASSERT(iaback != NULL); 3132 3133 rtinit(&iaback->ia_ifa, RTM_DELETE, rtinitflags(iaback)); 3134 in_ifadown(&iaback->ia_ifa, 1); 3135 iaback->ia_flags &= ~IFA_ROUTE; 3136 3137 error = rtinit(&ia->ia_ifa, RTM_ADD, rtinitflags(ia) | RTF_UP); 3138 if (!error) 3139 ia->ia_flags |= IFA_ROUTE; 3140 return error; 3141 } 3142 3143 static void 3144 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 3145 boolean_t del_iaback) 3146 { 3147 struct in_ifaddr *ia, *iaback; 3148 3149 ia = vha->vha_ia; 3150 KKASSERT(ia != NULL); 3151 3152 iaback = vha->vha_iaback; 3153 KKASSERT(iaback != NULL); 3154 3155 rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia)); 3156 in_ifadown(&ia->ia_ifa, 1); 3157 ia->ia_flags &= ~IFA_ROUTE; 3158 3159 if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) { 3160 int error; 3161 3162 error = rtinit(&iaback->ia_ifa, RTM_ADD, 3163 rtinitflags(iaback) | RTF_UP); 3164 if (!error) 3165 iaback->ia_flags |= IFA_ROUTE; 3166 } 3167 } 3168 3169 static int 3170 carp_modevent(module_t mod, int type, void *data) 3171 { 3172 switch (type) { 3173 case MOD_LOAD: 3174 LIST_INIT(&carpif_list); 3175 carp_ifdetach_event = 3176 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL, 3177 EVENTHANDLER_PRI_ANY); 3178 carp_ifaddr_event = 3179 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL, 3180 EVENTHANDLER_PRI_FIRST); 3181 if_clone_attach(&carp_cloner); 3182 break; 3183 3184 case MOD_UNLOAD: 3185 EVENTHANDLER_DEREGISTER(ifnet_detach_event, 3186 carp_ifdetach_event); 3187 EVENTHANDLER_DEREGISTER(ifaddr_event, 3188 carp_ifaddr_event); 3189 if_clone_detach(&carp_cloner); 3190 break; 3191 3192 default: 3193 return (EINVAL); 3194 } 3195 return (0); 3196 } 3197 3198 static moduledata_t carp_mod = { 3199 "carp", 3200 carp_modevent, 3201 0 3202 }; 3203 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3204