1 /* $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_carp.h" 32 33 #include <sys/param.h> 34 #include <sys/endian.h> 35 #include <sys/proc.h> 36 #include <sys/priv.h> 37 #include <sys/systm.h> 38 #include <sys/time.h> 39 #include <sys/mbuf.h> 40 #include <sys/socket.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/module.h> 44 #include <sys/sockio.h> 45 #include <sys/thread2.h> 46 47 #include <machine/inttypes.h> 48 49 #include <net/if.h> 50 #include <net/if_types.h> 51 #include <net/ifq_var.h> 52 #include <net/route.h> 53 #include <net/bpf.h> 54 #include <netinet/in.h> 55 #include <netinet/if_ether.h> 56 #include <netinet/ip_carp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/tcp_seq.h> 59 60 #ifdef INET 61 #include <netinet/in_systm.h> 62 #include <netinet/in_var.h> 63 #include <netinet/ip.h> 64 #include <netinet/ip_var.h> 65 #endif 66 67 #ifdef INET6 68 #include <netinet6/nd6.h> 69 #endif /* INET6 */ 70 71 #include <net/pf/pfvar.h> 72 #include <net/pf/if_pfsync.h> 73 74 #define PFSYNCNAME "pfsync" 75 76 #define PFSYNC_MINMTU \ 77 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 78 79 #ifdef PFSYNCDEBUG 80 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0) 81 int pfsyncdebug; 82 #else 83 #define DPRINTF(x) 84 #endif 85 86 struct pfsync_softc *pfsyncif = NULL; 87 struct pfsyncstats pfsyncstats; 88 89 void pfsyncattach(int); 90 static int pfsync_clone_destroy(struct ifnet *); 91 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 92 void pfsync_setmtu(struct pfsync_softc *, int); 93 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 94 struct pf_state_peer *); 95 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 96 struct rtentry *); 97 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 98 void pfsyncstart(struct ifnet *); 99 100 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 101 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 102 int pfsync_sendout(struct pfsync_softc *); 103 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 104 void pfsync_timeout(void *); 105 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 106 void pfsync_bulk_update(void *); 107 void pfsync_bulkfail(void *); 108 109 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); 110 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; 111 112 int pfsync_sync_ok; 113 114 struct if_clone pfsync_cloner = 115 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1); 116 117 void 118 pfsyncattach(int npfsync) 119 { 120 if_clone_attach(&pfsync_cloner); 121 } 122 static int 123 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 124 { 125 struct pfsync_softc *sc; 126 struct ifnet *ifp; 127 128 lwkt_gettoken(&pf_token); 129 130 sc = kmalloc(sizeof(*sc), M_PFSYNC, M_WAITOK | M_ZERO); 131 pfsync_sync_ok = 1; 132 sc->sc_mbuf = NULL; 133 sc->sc_mbuf_net = NULL; 134 sc->sc_mbuf_tdb = NULL; 135 sc->sc_statep.s = NULL; 136 sc->sc_statep_net.s = NULL; 137 sc->sc_statep_tdb.t = NULL; 138 sc->sc_maxupdates = 128; 139 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP); 140 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); 141 sc->sc_ureq_received = 0; 142 sc->sc_ureq_sent = 0; 143 sc->sc_bulk_send_next = NULL; 144 sc->sc_bulk_terminator = NULL; 145 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS; 146 lwkt_reltoken(&pf_token); 147 ifp = &sc->sc_if; 148 ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 149 if_initname(ifp, ifc->ifc_name, unit); 150 ifp->if_ioctl = pfsyncioctl; 151 ifp->if_output = pfsyncoutput; 152 ifp->if_start = pfsyncstart; 153 ifp->if_type = IFT_PFSYNC; 154 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 155 ifp->if_hdrlen = PFSYNC_HDRLEN; 156 ifp->if_baudrate = IF_Mbps(100); 157 ifp->if_softc = sc; 158 pfsync_setmtu(sc, MCLBYTES); 159 callout_init(&sc->sc_tmo); 160 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 161 callout_init(&sc->sc_bulk_tmo); 162 callout_init(&sc->sc_bulkfail_tmo); 163 if_attach(ifp, NULL); 164 165 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); 166 167 168 #if NCARP > 0 169 if_addgroup(ifp, "carp"); 170 #endif 171 172 #if NBPFILTER > 0 173 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); 174 #endif 175 lwkt_gettoken(&pf_token); 176 177 lwkt_reltoken(&pf_token); 178 return (0); 179 } 180 181 static int 182 pfsync_clone_destroy(struct ifnet *ifp) 183 { 184 lwkt_gettoken(&pf_token); 185 lwkt_reltoken(&pf_token); 186 187 struct pfsync_softc *sc = ifp->if_softc; 188 callout_stop(&sc->sc_tmo); 189 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 190 callout_stop(&sc->sc_bulk_tmo); 191 callout_stop(&sc->sc_bulkfail_tmo); 192 #if NCARP > 0 193 if (!pfsync_sync_ok) 194 carp_group_demote_adj(&sc->sc_if, -1); 195 #endif 196 #if NBPFILTER > 0 197 bpfdetach(ifp); 198 #endif 199 if_detach(ifp); 200 lwkt_gettoken(&pf_token); 201 LIST_REMOVE(sc, sc_next); 202 kfree(sc, M_PFSYNC); 203 lwkt_reltoken(&pf_token); 204 205 206 return 0; 207 } 208 209 /* 210 * Start output on the pfsync interface. 211 */ 212 void 213 pfsyncstart(struct ifnet *ifp) 214 { 215 ifq_purge(&ifp->if_snd); 216 } 217 218 int 219 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 220 struct pf_state_peer *d) 221 { 222 if (s->scrub.scrub_flag && d->scrub == NULL) { 223 d->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSYNC, M_NOWAIT|M_ZERO); 224 225 if (d->scrub == NULL) 226 return (ENOMEM); 227 } 228 229 return (0); 230 } 231 232 void 233 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 234 { 235 bzero(sp, sizeof(struct pfsync_state)); 236 237 /* copy from state key */ 238 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 239 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 240 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 241 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 242 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 243 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 244 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 245 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 246 sp->proto = st->key[PF_SK_WIRE]->proto; 247 sp->af = st->key[PF_SK_WIRE]->af; 248 249 /* copy from state */ 250 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 251 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 252 sp->creation = htonl(time_second - st->creation); 253 sp->expire = pf_state_expires(st); 254 if (sp->expire <= time_second) 255 sp->expire = htonl(0); 256 else 257 sp->expire = htonl(sp->expire - time_second); 258 259 sp->direction = st->direction; 260 sp->log = st->log; 261 sp->timeout = st->timeout; 262 sp->state_flags = st->state_flags; 263 if (st->src_node) 264 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 265 if (st->nat_src_node) 266 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 267 268 bcopy(&st->id, &sp->id, sizeof(sp->id)); 269 sp->creatorid = st->creatorid; 270 pf_state_peer_hton(&st->src, &sp->src); 271 pf_state_peer_hton(&st->dst, &sp->dst); 272 273 if (st->rule.ptr == NULL) 274 sp->rule = htonl(-1); 275 else 276 sp->rule = htonl(st->rule.ptr->nr); 277 if (st->anchor.ptr == NULL) 278 sp->anchor = htonl(-1); 279 else 280 sp->anchor = htonl(st->anchor.ptr->nr); 281 if (st->nat_rule.ptr == NULL) 282 sp->nat_rule = htonl(-1); 283 else 284 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 285 286 pf_state_counter_hton(st->packets[0], sp->packets[0]); 287 pf_state_counter_hton(st->packets[1], sp->packets[1]); 288 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 289 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 290 291 } 292 293 int 294 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 295 { 296 struct pf_state *st = NULL; 297 struct pf_state_key *skw = NULL, *sks = NULL; 298 struct pf_rule *r = NULL; 299 struct pfi_kif *kif; 300 int pool_flags; 301 int error; 302 303 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 304 kprintf("pfsync_insert_net_state: invalid creator id:" 305 " %08x\n", ntohl(sp->creatorid)); 306 return (EINVAL); 307 } 308 309 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 310 if (pf_status.debug >= PF_DEBUG_MISC) 311 kprintf("pfsync_insert_net_state: " 312 "unknown interface: %s\n", sp->ifname); 313 if (flags & PFSYNC_SI_IOCTL) 314 return (EINVAL); 315 return (0); /* skip this state */ 316 } 317 318 /* 319 * If the ruleset checksums match or the state is coming from the ioctl, 320 * it's safe to associate the state with the rule of that number. 321 */ 322 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 323 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 324 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 325 r = pf_main_ruleset.rules[ 326 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 327 else 328 r = &pf_default_rule; 329 330 if ((r->max_states && r->states_cur >= r->max_states)) 331 goto cleanup; 332 333 if (flags & PFSYNC_SI_IOCTL) 334 pool_flags = M_WAITOK | M_NULLOK | M_ZERO; 335 else 336 pool_flags = M_WAITOK | M_ZERO; 337 338 if ((st = kmalloc(sizeof(struct pf_state), M_PFSYNC, pool_flags)) == NULL) 339 goto cleanup; 340 341 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 342 goto cleanup; 343 344 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 345 &sp->key[PF_SK_STACK].addr[0], sp->af) || 346 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 347 &sp->key[PF_SK_STACK].addr[1], sp->af) || 348 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 349 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 350 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 351 goto cleanup; 352 } else 353 sks = skw; 354 355 /* allocate memory for scrub info */ 356 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 357 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 358 goto cleanup; 359 360 /* copy to state key(s) */ 361 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 362 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 363 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 364 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 365 skw->proto = sp->proto; 366 skw->af = sp->af; 367 if (sks != skw) { 368 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 369 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 370 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 371 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 372 sks->proto = sp->proto; 373 sks->af = sp->af; 374 } 375 376 /* copy to state */ 377 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 378 st->creation = time_second - ntohl(sp->creation); 379 st->expire = time_second; 380 if (sp->expire) { 381 /* XXX No adaptive scaling. */ 382 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 383 } 384 385 st->expire = ntohl(sp->expire) + time_second; 386 st->direction = sp->direction; 387 st->log = sp->log; 388 st->timeout = sp->timeout; 389 st->state_flags = sp->state_flags; 390 if (!(flags & PFSYNC_SI_IOCTL)) 391 st->sync_flags = PFSTATE_FROMSYNC; 392 393 bcopy(sp->id, &st->id, sizeof(st->id)); 394 st->creatorid = sp->creatorid; 395 pf_state_peer_ntoh(&sp->src, &st->src); 396 pf_state_peer_ntoh(&sp->dst, &st->dst); 397 398 st->rule.ptr = r; 399 st->nat_rule.ptr = NULL; 400 st->anchor.ptr = NULL; 401 st->rt_kif = NULL; 402 403 st->pfsync_time = 0; 404 405 406 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 407 r->states_cur++; 408 r->states_tot++; 409 410 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 411 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 412 r->states_cur--; 413 goto cleanup_state; 414 } 415 416 return (0); 417 418 cleanup: 419 error = ENOMEM; 420 if (skw == sks) 421 sks = NULL; 422 if (skw != NULL) 423 kfree(skw, M_PFSYNC); 424 if (sks != NULL) 425 kfree(sks, M_PFSYNC); 426 427 cleanup_state: /* pf_state_insert frees the state keys */ 428 if (st) { 429 if (st->dst.scrub) 430 kfree(st->dst.scrub, M_PFSYNC); 431 if (st->src.scrub) 432 kfree(st->src.scrub, M_PFSYNC); 433 kfree(st, M_PFSYNC); 434 } 435 return (error); 436 } 437 438 void 439 pfsync_input(struct mbuf *m, ...) 440 { 441 struct ip *ip = mtod(m, struct ip *); 442 struct pfsync_header *ph; 443 struct pfsync_softc *sc = pfsyncif; 444 struct pf_state *st; 445 struct pf_state_key *sk; 446 struct pf_state_item *si; 447 struct pf_state_cmp id_key; 448 struct pfsync_state *sp; 449 struct pfsync_state_upd *up; 450 struct pfsync_state_del *dp; 451 struct pfsync_state_clr *cp; 452 struct pfsync_state_upd_req *rup; 453 struct pfsync_state_bus *bus; 454 #ifdef IPSEC 455 struct pfsync_tdb *pt; 456 #endif 457 struct in_addr src; 458 struct mbuf *mp; 459 int iplen, action, error, i, count, offp, sfail, stale = 0; 460 u_int8_t flags = 0; 461 462 /* This function is not yet called from anywhere */ 463 /* Still we assume for safety that pf_token must be held */ 464 ASSERT_LWKT_TOKEN_HELD(&pf_token); 465 466 pfsyncstats.pfsyncs_ipackets++; 467 468 /* verify that we have a sync interface configured */ 469 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 470 goto done; 471 472 /* verify that the packet came in on the right interface */ 473 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 474 pfsyncstats.pfsyncs_badif++; 475 goto done; 476 } 477 478 /* verify that the IP TTL is 255. */ 479 if (ip->ip_ttl != PFSYNC_DFLTTL) { 480 pfsyncstats.pfsyncs_badttl++; 481 goto done; 482 } 483 484 iplen = ip->ip_hl << 2; 485 486 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 487 pfsyncstats.pfsyncs_hdrops++; 488 goto done; 489 } 490 491 if (iplen + sizeof(*ph) > m->m_len) { 492 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 493 pfsyncstats.pfsyncs_hdrops++; 494 goto done; 495 } 496 ip = mtod(m, struct ip *); 497 } 498 ph = (struct pfsync_header *)((char *)ip + iplen); 499 500 /* verify the version */ 501 if (ph->version != PFSYNC_VERSION) { 502 pfsyncstats.pfsyncs_badver++; 503 goto done; 504 } 505 506 action = ph->action; 507 count = ph->count; 508 509 /* make sure it's a valid action code */ 510 if (action >= PFSYNC_ACT_MAX) { 511 pfsyncstats.pfsyncs_badact++; 512 goto done; 513 } 514 515 /* Cheaper to grab this now than having to mess with mbufs later */ 516 src = ip->ip_src; 517 518 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 519 flags |= PFSYNC_SI_CKSUM; 520 521 switch (action) { 522 case PFSYNC_ACT_CLR: { 523 struct pf_state *nexts; 524 struct pf_state_key *nextsk; 525 struct pfi_kif *kif; 526 u_int32_t creatorid; 527 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 528 sizeof(*cp), &offp)) == NULL) { 529 pfsyncstats.pfsyncs_badlen++; 530 return; 531 } 532 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 533 creatorid = cp->creatorid; 534 535 crit_enter(); 536 if (cp->ifname[0] == '\0') { 537 for (st = RB_MIN(pf_state_tree_id, &tree_id); 538 st; st = nexts) { 539 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 540 if (st->creatorid == creatorid) { 541 st->sync_flags |= PFSTATE_FROMSYNC; 542 pf_unlink_state(st); 543 } 544 } 545 } else { 546 if ((kif = pfi_kif_get(cp->ifname)) == NULL) { 547 crit_exit(); 548 return; 549 } 550 /* XXX correct? */ 551 for (sk = RB_MIN(pf_state_tree, 552 &pf_statetbl); sk; sk = nextsk) { 553 nextsk = RB_NEXT(pf_state_tree, 554 &pf_statetbl, sk); 555 TAILQ_FOREACH(si, &sk->states, entry) { 556 if (si->s->creatorid == creatorid) { 557 si->s->sync_flags |= 558 PFSTATE_FROMSYNC; 559 pf_unlink_state(si->s); 560 } 561 } 562 } 563 } 564 crit_exit(); 565 566 break; 567 } 568 case PFSYNC_ACT_INS: 569 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 570 count * sizeof(*sp), &offp)) == NULL) { 571 pfsyncstats.pfsyncs_badlen++; 572 return; 573 } 574 575 crit_enter(); 576 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 577 i < count; i++, sp++) { 578 /* check for invalid values */ 579 if (sp->timeout >= PFTM_MAX || 580 sp->src.state > PF_TCPS_PROXY_DST || 581 sp->dst.state > PF_TCPS_PROXY_DST || 582 sp->direction > PF_OUT || 583 (sp->af != AF_INET && sp->af != AF_INET6)) { 584 if (pf_status.debug >= PF_DEBUG_MISC) 585 kprintf("pfsync_insert: PFSYNC_ACT_INS: " 586 "invalid value\n"); 587 pfsyncstats.pfsyncs_badval++; 588 continue; 589 } 590 591 if ((error = pfsync_state_import(sp, flags))) { 592 if (error == ENOMEM) { 593 crit_exit(); 594 goto done; 595 } 596 continue; 597 } 598 } 599 crit_exit(); 600 break; 601 case PFSYNC_ACT_UPD: 602 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 603 count * sizeof(*sp), &offp)) == NULL) { 604 pfsyncstats.pfsyncs_badlen++; 605 return; 606 } 607 608 crit_enter(); 609 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 610 i < count; i++, sp++) { 611 int flags = PFSYNC_FLAG_STALE; 612 613 /* check for invalid values */ 614 if (sp->timeout >= PFTM_MAX || 615 sp->src.state > PF_TCPS_PROXY_DST || 616 sp->dst.state > PF_TCPS_PROXY_DST) { 617 if (pf_status.debug >= PF_DEBUG_MISC) 618 kprintf("pfsync_insert: PFSYNC_ACT_UPD: " 619 "invalid value\n"); 620 pfsyncstats.pfsyncs_badval++; 621 continue; 622 } 623 624 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 625 id_key.creatorid = sp->creatorid; 626 627 st = pf_find_state_byid(&id_key); 628 if (st == NULL) { 629 /* insert the update */ 630 if (pfsync_state_import(sp, flags)) 631 pfsyncstats.pfsyncs_badstate++; 632 continue; 633 } 634 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 635 sfail = 0; 636 if (sk->proto == IPPROTO_TCP) { 637 /* 638 * The state should never go backwards except 639 * for syn-proxy states. Neither should the 640 * sequence window slide backwards. 641 */ 642 if (st->src.state > sp->src.state && 643 (st->src.state < PF_TCPS_PROXY_SRC || 644 sp->src.state >= PF_TCPS_PROXY_SRC)) 645 sfail = 1; 646 else if (SEQ_GT(st->src.seqlo, 647 ntohl(sp->src.seqlo))) 648 sfail = 3; 649 else if (st->dst.state > sp->dst.state) { 650 /* There might still be useful 651 * information about the src state here, 652 * so import that part of the update, 653 * then "fail" so we send the updated 654 * state back to the peer who is missing 655 * our what we know. */ 656 pf_state_peer_ntoh(&sp->src, &st->src); 657 /* XXX do anything with timeouts? */ 658 sfail = 7; 659 flags = 0; 660 } else if (st->dst.state >= TCPS_SYN_SENT && 661 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 662 sfail = 4; 663 } else { 664 /* 665 * Non-TCP protocol state machine always go 666 * forwards 667 */ 668 if (st->src.state > sp->src.state) 669 sfail = 5; 670 else if (st->dst.state > sp->dst.state) 671 sfail = 6; 672 } 673 if (sfail) { 674 if (pf_status.debug >= PF_DEBUG_MISC) 675 kprintf("pfsync: %s stale update " 676 "(%d) id: %016jx " 677 "creatorid: %08x\n", 678 (sfail < 7 ? "ignoring" 679 : "partial"), sfail, 680 (uintmax_t)be64toh(st->id), 681 ntohl(st->creatorid)); 682 pfsyncstats.pfsyncs_stale++; 683 684 if (!(sp->sync_flags & PFSTATE_STALE)) { 685 /* we have a better state, send it */ 686 if (sc->sc_mbuf != NULL && !stale) 687 pfsync_sendout(sc); 688 stale++; 689 if (!st->sync_flags) 690 pfsync_pack_state( 691 PFSYNC_ACT_UPD, st, flags); 692 } 693 continue; 694 } 695 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 696 pf_state_peer_ntoh(&sp->src, &st->src); 697 pf_state_peer_ntoh(&sp->dst, &st->dst); 698 st->expire = ntohl(sp->expire) + time_second; 699 st->timeout = sp->timeout; 700 } 701 if (stale && sc->sc_mbuf != NULL) 702 pfsync_sendout(sc); 703 crit_exit(); 704 break; 705 /* 706 * It's not strictly necessary for us to support the "uncompressed" 707 * delete action, but it's relatively simple and maintains consistency. 708 */ 709 case PFSYNC_ACT_DEL: 710 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 711 count * sizeof(*sp), &offp)) == NULL) { 712 pfsyncstats.pfsyncs_badlen++; 713 return; 714 } 715 716 crit_enter(); 717 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 718 i < count; i++, sp++) { 719 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 720 id_key.creatorid = sp->creatorid; 721 722 st = pf_find_state_byid(&id_key); 723 if (st == NULL) { 724 pfsyncstats.pfsyncs_badstate++; 725 continue; 726 } 727 st->sync_flags |= PFSTATE_FROMSYNC; 728 pf_unlink_state(st); 729 } 730 crit_exit(); 731 break; 732 case PFSYNC_ACT_UPD_C: { 733 int update_requested = 0; 734 735 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 736 count * sizeof(*up), &offp)) == NULL) { 737 pfsyncstats.pfsyncs_badlen++; 738 return; 739 } 740 741 crit_enter(); 742 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 743 i < count; i++, up++) { 744 /* check for invalid values */ 745 if (up->timeout >= PFTM_MAX || 746 up->src.state > PF_TCPS_PROXY_DST || 747 up->dst.state > PF_TCPS_PROXY_DST) { 748 if (pf_status.debug >= PF_DEBUG_MISC) 749 kprintf("pfsync_insert: " 750 "PFSYNC_ACT_UPD_C: " 751 "invalid value\n"); 752 pfsyncstats.pfsyncs_badval++; 753 continue; 754 } 755 756 bcopy(up->id, &id_key.id, sizeof(id_key.id)); 757 id_key.creatorid = up->creatorid; 758 759 st = pf_find_state_byid(&id_key); 760 if (st == NULL) { 761 /* We don't have this state. Ask for it. */ 762 error = pfsync_request_update(up, &src); 763 if (error == ENOMEM) { 764 crit_exit(); 765 goto done; 766 } 767 update_requested = 1; 768 pfsyncstats.pfsyncs_badstate++; 769 continue; 770 } 771 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 772 sfail = 0; 773 if (sk->proto == IPPROTO_TCP) { 774 /* 775 * The state should never go backwards except 776 * for syn-proxy states. Neither should the 777 * sequence window slide backwards. 778 */ 779 if (st->src.state > up->src.state && 780 (st->src.state < PF_TCPS_PROXY_SRC || 781 up->src.state >= PF_TCPS_PROXY_SRC)) 782 sfail = 1; 783 else if (st->dst.state > up->dst.state) 784 sfail = 2; 785 else if (SEQ_GT(st->src.seqlo, 786 ntohl(up->src.seqlo))) 787 sfail = 3; 788 else if (st->dst.state >= TCPS_SYN_SENT && 789 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 790 sfail = 4; 791 } else { 792 /* 793 * Non-TCP protocol state machine always go 794 * forwards 795 */ 796 if (st->src.state > up->src.state) 797 sfail = 5; 798 else if (st->dst.state > up->dst.state) 799 sfail = 6; 800 } 801 if (sfail) { 802 if (pf_status.debug >= PF_DEBUG_MISC) 803 kprintf("pfsync: ignoring stale update " 804 "(%d) id: %016" PRIx64 " " 805 "creatorid: %08x\n", sfail, 806 be64toh(st->id), 807 ntohl(st->creatorid)); 808 pfsyncstats.pfsyncs_stale++; 809 810 /* we have a better state, send it out */ 811 if ((!stale || update_requested) && 812 sc->sc_mbuf != NULL) { 813 pfsync_sendout(sc); 814 update_requested = 0; 815 } 816 stale++; 817 if (!st->sync_flags) 818 pfsync_pack_state(PFSYNC_ACT_UPD, st, 819 PFSYNC_FLAG_STALE); 820 continue; 821 } 822 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 823 pf_state_peer_ntoh(&up->src, &st->src); 824 pf_state_peer_ntoh(&up->dst, &st->dst); 825 st->expire = ntohl(up->expire) + time_second; 826 st->timeout = up->timeout; 827 } 828 if ((update_requested || stale) && sc->sc_mbuf) 829 pfsync_sendout(sc); 830 crit_exit(); 831 break; 832 } 833 case PFSYNC_ACT_DEL_C: 834 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 835 count * sizeof(*dp), &offp)) == NULL) { 836 pfsyncstats.pfsyncs_badlen++; 837 return; 838 } 839 840 crit_enter(); 841 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 842 i < count; i++, dp++) { 843 bcopy(dp->id, &id_key.id, sizeof(id_key.id)); 844 id_key.creatorid = dp->creatorid; 845 846 st = pf_find_state_byid(&id_key); 847 if (st == NULL) { 848 pfsyncstats.pfsyncs_badstate++; 849 continue; 850 } 851 st->sync_flags |= PFSTATE_FROMSYNC; 852 pf_unlink_state(st); 853 } 854 crit_exit(); 855 break; 856 case PFSYNC_ACT_INS_F: 857 case PFSYNC_ACT_DEL_F: 858 /* not implemented */ 859 break; 860 case PFSYNC_ACT_UREQ: 861 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 862 count * sizeof(*rup), &offp)) == NULL) { 863 pfsyncstats.pfsyncs_badlen++; 864 return; 865 } 866 867 crit_enter(); 868 if (sc->sc_mbuf != NULL) 869 pfsync_sendout(sc); 870 for (i = 0, 871 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 872 i < count; i++, rup++) { 873 bcopy(rup->id, &id_key.id, sizeof(id_key.id)); 874 id_key.creatorid = rup->creatorid; 875 876 if (id_key.id == 0 && id_key.creatorid == 0) { 877 sc->sc_ureq_received = mycpu->gd_time_seconds; 878 if (sc->sc_bulk_send_next == NULL) 879 sc->sc_bulk_send_next = 880 TAILQ_FIRST(&state_list); 881 sc->sc_bulk_terminator = sc->sc_bulk_send_next; 882 if (pf_status.debug >= PF_DEBUG_MISC) 883 kprintf("pfsync: received " 884 "bulk update request\n"); 885 pfsync_send_bus(sc, PFSYNC_BUS_START); 886 lwkt_reltoken(&pf_token); 887 callout_init(&sc->sc_bulk_tmo); 888 lwkt_gettoken(&pf_token); 889 } else { 890 st = pf_find_state_byid(&id_key); 891 if (st == NULL) { 892 pfsyncstats.pfsyncs_badstate++; 893 continue; 894 } 895 if (!st->sync_flags) 896 pfsync_pack_state(PFSYNC_ACT_UPD, 897 st, 0); 898 } 899 } 900 if (sc->sc_mbuf != NULL) 901 pfsync_sendout(sc); 902 crit_exit(); 903 break; 904 case PFSYNC_ACT_BUS: 905 /* If we're not waiting for a bulk update, who cares. */ 906 if (sc->sc_ureq_sent == 0) 907 break; 908 909 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 910 sizeof(*bus), &offp)) == NULL) { 911 pfsyncstats.pfsyncs_badlen++; 912 return; 913 } 914 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 915 switch (bus->status) { 916 case PFSYNC_BUS_START: 917 lwkt_reltoken(&pf_token); 918 callout_reset(&sc->sc_bulkfail_tmo, 919 pf_pool_limits[PF_LIMIT_STATES].limit / 920 (PFSYNC_BULKPACKETS * sc->sc_maxcount), 921 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 922 lwkt_gettoken(&pf_token); 923 if (pf_status.debug >= PF_DEBUG_MISC) 924 kprintf("pfsync: received bulk " 925 "update start\n"); 926 break; 927 case PFSYNC_BUS_END: 928 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= 929 sc->sc_ureq_sent) { 930 /* that's it, we're happy */ 931 sc->sc_ureq_sent = 0; 932 sc->sc_bulk_tries = 0; 933 lwkt_reltoken(&pf_token); 934 callout_stop(&sc->sc_bulkfail_tmo); 935 lwkt_gettoken(&pf_token); 936 #if NCARP > 0 937 if (!pfsync_sync_ok) { 938 lwkt_reltoken(&pf_token); 939 carp_group_demote_adj(&sc->sc_if, -1); 940 lwkt_gettoken(&pf_token); 941 } 942 #endif 943 pfsync_sync_ok = 1; 944 if (pf_status.debug >= PF_DEBUG_MISC) 945 kprintf("pfsync: received valid " 946 "bulk update end\n"); 947 } else { 948 if (pf_status.debug >= PF_DEBUG_MISC) 949 kprintf("pfsync: received invalid " 950 "bulk update end: bad timestamp\n"); 951 } 952 break; 953 } 954 break; 955 #ifdef IPSEC 956 case PFSYNC_ACT_TDB_UPD: 957 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 958 count * sizeof(*pt), &offp)) == NULL) { 959 pfsyncstats.pfsyncs_badlen++; 960 return; 961 } 962 crit_enter(); 963 for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); 964 i < count; i++, pt++) 965 pfsync_update_net_tdb(pt); 966 crit_exit(); 967 break; 968 #endif 969 } 970 971 done: 972 if (m) 973 m_freem(m); 974 } 975 976 int 977 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 978 struct rtentry *rt) 979 { 980 m_freem(m); 981 return (0); 982 } 983 984 /* ARGSUSED */ 985 int 986 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 987 { 988 struct pfsync_softc *sc = ifp->if_softc; 989 struct ifreq *ifr = (struct ifreq *)data; 990 struct ip_moptions *imo = &sc->sc_imo; 991 struct pfsyncreq pfsyncr; 992 struct ifnet *sifp; 993 int error; 994 995 lwkt_gettoken(&pf_token); 996 997 switch (cmd) { 998 case SIOCSIFADDR: 999 case SIOCAIFADDR: 1000 case SIOCSIFDSTADDR: 1001 case SIOCSIFFLAGS: 1002 if (ifp->if_flags & IFF_UP) 1003 ifp->if_flags |= IFF_RUNNING; 1004 else 1005 ifp->if_flags &= ~IFF_RUNNING; 1006 break; 1007 case SIOCSIFMTU: 1008 if (ifr->ifr_mtu < PFSYNC_MINMTU) { 1009 lwkt_reltoken(&pf_token); 1010 return (EINVAL); 1011 } 1012 if (ifr->ifr_mtu > MCLBYTES) 1013 ifr->ifr_mtu = MCLBYTES; 1014 crit_enter(); 1015 if (ifr->ifr_mtu < ifp->if_mtu) 1016 pfsync_sendout(sc); 1017 pfsync_setmtu(sc, ifr->ifr_mtu); 1018 crit_exit(); 1019 break; 1020 case SIOCGETPFSYNC: 1021 bzero(&pfsyncr, sizeof(pfsyncr)); 1022 if (sc->sc_sync_ifp) 1023 strlcpy(pfsyncr.pfsyncr_syncdev, 1024 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 1025 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1026 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1027 lwkt_reltoken(&pf_token); 1028 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 1029 return (error); 1030 lwkt_gettoken(&pf_token); 1031 break; 1032 case SIOCSETPFSYNC: 1033 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) { 1034 lwkt_reltoken(&pf_token); 1035 return (error); 1036 } 1037 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) { 1038 lwkt_reltoken(&pf_token); 1039 return (error); 1040 } 1041 1042 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1043 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1044 else 1045 sc->sc_sync_peer.s_addr = 1046 pfsyncr.pfsyncr_syncpeer.s_addr; 1047 1048 if (pfsyncr.pfsyncr_maxupdates > 255) { 1049 lwkt_reltoken(&pf_token); 1050 return (EINVAL); 1051 } 1052 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1053 1054 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1055 sc->sc_sync_ifp = NULL; 1056 if (sc->sc_mbuf_net != NULL) { 1057 /* Don't keep stale pfsync packets around. */ 1058 crit_enter(); 1059 m_freem(sc->sc_mbuf_net); 1060 sc->sc_mbuf_net = NULL; 1061 sc->sc_statep_net.s = NULL; 1062 crit_exit(); 1063 } 1064 if (imo->imo_num_memberships > 0) { 1065 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1066 imo->imo_multicast_ifp = NULL; 1067 } 1068 break; 1069 } 1070 1071 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1072 lwkt_reltoken(&pf_token); 1073 return (EINVAL); 1074 } 1075 1076 crit_enter(); 1077 if (sifp->if_mtu < sc->sc_if.if_mtu || 1078 (sc->sc_sync_ifp != NULL && 1079 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 1080 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1081 pfsync_sendout(sc); 1082 sc->sc_sync_ifp = sifp; 1083 1084 pfsync_setmtu(sc, sc->sc_if.if_mtu); 1085 1086 if (imo->imo_num_memberships > 0) { 1087 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1088 imo->imo_multicast_ifp = NULL; 1089 } 1090 1091 if (sc->sc_sync_ifp && 1092 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1093 struct in_addr addr; 1094 1095 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 1096 sc->sc_sync_ifp = NULL; 1097 lwkt_reltoken(&pf_token); 1098 crit_exit(); 1099 return (EADDRNOTAVAIL); 1100 } 1101 1102 addr.s_addr = INADDR_PFSYNC_GROUP; 1103 1104 if ((imo->imo_membership[0] = 1105 in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { 1106 sc->sc_sync_ifp = NULL; 1107 lwkt_reltoken(&pf_token); 1108 crit_exit(); 1109 return (ENOBUFS); 1110 } 1111 imo->imo_num_memberships++; 1112 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1113 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1114 imo->imo_multicast_loop = 0; 1115 } 1116 1117 if (sc->sc_sync_ifp || 1118 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1119 /* Request a full state table update. */ 1120 sc->sc_ureq_sent = mycpu->gd_time_seconds; 1121 #if NCARP > 0 1122 if (pfsync_sync_ok) 1123 carp_group_demote_adj(&sc->sc_if, 1); 1124 #endif 1125 pfsync_sync_ok = 0; 1126 if (pf_status.debug >= PF_DEBUG_MISC) 1127 kprintf("pfsync: requesting bulk update\n"); 1128 lwkt_reltoken(&pf_token); 1129 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 1130 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 1131 lwkt_gettoken(&pf_token); 1132 error = pfsync_request_update(NULL, NULL); 1133 if (error == ENOMEM) { 1134 lwkt_reltoken(&pf_token); 1135 crit_exit(); 1136 return (ENOMEM); 1137 } 1138 pfsync_sendout(sc); 1139 } 1140 crit_exit(); 1141 1142 break; 1143 1144 default: 1145 lwkt_reltoken(&pf_token); 1146 return (ENOTTY); 1147 } 1148 1149 lwkt_reltoken(&pf_token); 1150 return (0); 1151 } 1152 1153 void 1154 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1155 { 1156 int mtu; 1157 1158 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1159 mtu = sc->sc_sync_ifp->if_mtu; 1160 else 1161 mtu = mtu_req; 1162 1163 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1164 sizeof(struct pfsync_state); 1165 if (sc->sc_maxcount > 254) 1166 sc->sc_maxcount = 254; 1167 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1168 sc->sc_maxcount * sizeof(struct pfsync_state); 1169 } 1170 1171 struct mbuf * 1172 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1173 { 1174 struct pfsync_header *h; 1175 struct mbuf *m; 1176 int len; 1177 1178 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1179 1180 MGETHDR(m, M_WAITOK, MT_DATA); 1181 if (m == NULL) { 1182 sc->sc_if.if_oerrors++; 1183 return (NULL); 1184 } 1185 1186 switch (action) { 1187 case PFSYNC_ACT_CLR: 1188 len = sizeof(struct pfsync_header) + 1189 sizeof(struct pfsync_state_clr); 1190 break; 1191 case PFSYNC_ACT_UPD_C: 1192 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1193 sizeof(struct pfsync_header); 1194 break; 1195 case PFSYNC_ACT_DEL_C: 1196 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1197 sizeof(struct pfsync_header); 1198 break; 1199 case PFSYNC_ACT_UREQ: 1200 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1201 sizeof(struct pfsync_header); 1202 break; 1203 case PFSYNC_ACT_BUS: 1204 len = sizeof(struct pfsync_header) + 1205 sizeof(struct pfsync_state_bus); 1206 break; 1207 case PFSYNC_ACT_TDB_UPD: 1208 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1209 sizeof(struct pfsync_header); 1210 break; 1211 default: 1212 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1213 sizeof(struct pfsync_header); 1214 break; 1215 } 1216 1217 if (len > MHLEN) { 1218 MCLGET(m, M_WAITOK); 1219 if ((m->m_flags & M_EXT) == 0) { 1220 m_free(m); 1221 sc->sc_if.if_oerrors++; 1222 return (NULL); 1223 } 1224 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); 1225 } else 1226 MH_ALIGN(m, len); 1227 1228 m->m_pkthdr.rcvif = NULL; 1229 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1230 h = mtod(m, struct pfsync_header *); 1231 h->version = PFSYNC_VERSION; 1232 h->af = 0; 1233 h->count = 0; 1234 h->action = action; 1235 1236 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1237 lwkt_reltoken(&pf_token); 1238 callout_reset(&sc->sc_tmo, hz, pfsync_timeout, 1239 LIST_FIRST(&pfsync_list)); 1240 lwkt_gettoken(&pf_token); 1241 return (m); 1242 } 1243 1244 int 1245 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1246 { 1247 struct ifnet *ifp = NULL; 1248 struct pfsync_softc *sc = pfsyncif; 1249 struct pfsync_header *h, *h_net; 1250 struct pfsync_state *sp = NULL; 1251 struct pfsync_state_upd *up = NULL; 1252 struct pfsync_state_del *dp = NULL; 1253 int ret = 0; 1254 u_int8_t i = 255, newaction = 0; 1255 1256 if (sc == NULL) 1257 return (0); 1258 ifp = &sc->sc_if; 1259 1260 /* 1261 * If a packet falls in the forest and there's nobody around to 1262 * hear, does it make a sound? 1263 */ 1264 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1265 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1266 /* Don't leave any stale pfsync packets hanging around. */ 1267 if (sc->sc_mbuf != NULL) { 1268 m_freem(sc->sc_mbuf); 1269 sc->sc_mbuf = NULL; 1270 sc->sc_statep.s = NULL; 1271 } 1272 return (0); 1273 } 1274 1275 if (action >= PFSYNC_ACT_MAX) 1276 return (EINVAL); 1277 1278 crit_enter(); 1279 if (sc->sc_mbuf == NULL) { 1280 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1281 (void *)&sc->sc_statep.s)) == NULL) { 1282 crit_exit(); 1283 return (ENOMEM); 1284 } 1285 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1286 } else { 1287 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1288 if (h->action != action) { 1289 pfsync_sendout(sc); 1290 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1291 (void *)&sc->sc_statep.s)) == NULL) { 1292 crit_exit(); 1293 return (ENOMEM); 1294 } 1295 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1296 } else { 1297 /* 1298 * If it's an update, look in the packet to see if 1299 * we already have an update for the state. 1300 */ 1301 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1302 struct pfsync_state *usp = 1303 (void *)((char *)h + PFSYNC_HDRLEN); 1304 1305 for (i = 0; i < h->count; i++) { 1306 if (!memcmp(usp->id, &st->id, 1307 PFSYNC_ID_LEN) && 1308 usp->creatorid == st->creatorid) { 1309 sp = usp; 1310 sp->updates++; 1311 break; 1312 } 1313 usp++; 1314 } 1315 } 1316 } 1317 } 1318 1319 st->pfsync_time = mycpu->gd_time_seconds; 1320 1321 if (sp == NULL) { 1322 /* not a "duplicate" update */ 1323 i = 255; 1324 sp = sc->sc_statep.s++; 1325 sc->sc_mbuf->m_pkthdr.len = 1326 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1327 h->count++; 1328 bzero(sp, sizeof(*sp)); 1329 1330 pfsync_state_export(sp, st); 1331 1332 if (flags & PFSYNC_FLAG_STALE) 1333 sp->sync_flags |= PFSTATE_STALE; 1334 } else { 1335 pf_state_peer_hton(&st->src, &sp->src); 1336 pf_state_peer_hton(&st->dst, &sp->dst); 1337 1338 if (st->expire <= time_second) 1339 sp->expire = htonl(0); 1340 else 1341 sp->expire = htonl(st->expire - time_second); 1342 } 1343 1344 /* do we need to build "compressed" actions for network transfer? */ 1345 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1346 switch (action) { 1347 case PFSYNC_ACT_UPD: 1348 newaction = PFSYNC_ACT_UPD_C; 1349 break; 1350 case PFSYNC_ACT_DEL: 1351 newaction = PFSYNC_ACT_DEL_C; 1352 break; 1353 default: 1354 /* by default we just send the uncompressed states */ 1355 break; 1356 } 1357 } 1358 1359 if (newaction) { 1360 if (sc->sc_mbuf_net == NULL) { 1361 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1362 (void *)&sc->sc_statep_net.s)) == NULL) { 1363 crit_exit(); 1364 return (ENOMEM); 1365 } 1366 } 1367 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1368 1369 switch (newaction) { 1370 case PFSYNC_ACT_UPD_C: 1371 if (i != 255) { 1372 up = (void *)((char *)h_net + 1373 PFSYNC_HDRLEN + (i * sizeof(*up))); 1374 up->updates++; 1375 } else { 1376 h_net->count++; 1377 sc->sc_mbuf_net->m_pkthdr.len = 1378 sc->sc_mbuf_net->m_len += sizeof(*up); 1379 up = sc->sc_statep_net.u++; 1380 1381 bzero(up, sizeof(*up)); 1382 bcopy(&st->id, up->id, sizeof(up->id)); 1383 up->creatorid = st->creatorid; 1384 } 1385 up->timeout = st->timeout; 1386 up->expire = sp->expire; 1387 up->src = sp->src; 1388 up->dst = sp->dst; 1389 break; 1390 case PFSYNC_ACT_DEL_C: 1391 sc->sc_mbuf_net->m_pkthdr.len = 1392 sc->sc_mbuf_net->m_len += sizeof(*dp); 1393 dp = sc->sc_statep_net.d++; 1394 h_net->count++; 1395 1396 bzero(dp, sizeof(*dp)); 1397 bcopy(&st->id, dp->id, sizeof(dp->id)); 1398 dp->creatorid = st->creatorid; 1399 break; 1400 } 1401 } 1402 1403 if (h->count == sc->sc_maxcount || 1404 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1405 ret = pfsync_sendout(sc); 1406 1407 crit_exit(); 1408 return (ret); 1409 } 1410 1411 int 1412 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1413 { 1414 struct pfsync_header *h; 1415 struct pfsync_softc *sc = pfsyncif; 1416 struct pfsync_state_upd_req *rup; 1417 int ret = 0; 1418 1419 if (sc == NULL) 1420 return (0); 1421 1422 if (sc->sc_mbuf == NULL) { 1423 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1424 (void *)&sc->sc_statep.s)) == NULL) 1425 return (ENOMEM); 1426 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1427 } else { 1428 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1429 if (h->action != PFSYNC_ACT_UREQ) { 1430 pfsync_sendout(sc); 1431 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1432 (void *)&sc->sc_statep.s)) == NULL) 1433 return (ENOMEM); 1434 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1435 } 1436 } 1437 1438 if (src != NULL) 1439 sc->sc_sendaddr = *src; 1440 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1441 h->count++; 1442 rup = sc->sc_statep.r++; 1443 bzero(rup, sizeof(*rup)); 1444 if (up != NULL) { 1445 bcopy(up->id, rup->id, sizeof(rup->id)); 1446 rup->creatorid = up->creatorid; 1447 } 1448 1449 if (h->count == sc->sc_maxcount) 1450 ret = pfsync_sendout(sc); 1451 1452 return (ret); 1453 } 1454 1455 int 1456 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1457 { 1458 struct pfsync_softc *sc = pfsyncif; 1459 struct pfsync_state_clr *cp; 1460 int ret; 1461 1462 if (sc == NULL) 1463 return (0); 1464 1465 crit_enter(); 1466 if (sc->sc_mbuf != NULL) 1467 pfsync_sendout(sc); 1468 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1469 (void *)&sc->sc_statep.c)) == NULL) { 1470 crit_exit(); 1471 return (ENOMEM); 1472 } 1473 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1474 cp = sc->sc_statep.c; 1475 cp->creatorid = creatorid; 1476 if (ifname != NULL) 1477 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1478 1479 ret = (pfsync_sendout(sc)); 1480 crit_exit(); 1481 return (ret); 1482 } 1483 1484 void 1485 pfsync_timeout(void *v) 1486 { 1487 struct pfsync_softc *sc = v; 1488 1489 crit_enter(); 1490 pfsync_sendout(sc); 1491 crit_exit(); 1492 } 1493 1494 void 1495 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1496 { 1497 struct pfsync_state_bus *bus; 1498 1499 if (sc->sc_mbuf != NULL) 1500 pfsync_sendout(sc); 1501 1502 if (pfsync_sync_ok && 1503 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1504 (void *)&sc->sc_statep.b)) != NULL) { 1505 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1506 bus = sc->sc_statep.b; 1507 bus->creatorid = pf_status.hostid; 1508 bus->status = status; 1509 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); 1510 pfsync_sendout(sc); 1511 } 1512 } 1513 1514 void 1515 pfsync_bulk_update(void *v) 1516 { 1517 struct pfsync_softc *sc = v; 1518 int i = 0; 1519 struct pf_state *state; 1520 1521 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1522 1523 crit_enter(); 1524 if (sc->sc_mbuf != NULL) 1525 pfsync_sendout(sc); 1526 1527 /* 1528 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1529 * been sent since the latest request was made. 1530 */ 1531 state = sc->sc_bulk_send_next; 1532 if (state) 1533 do { 1534 /* send state update if syncable and not already sent */ 1535 if (!state->sync_flags 1536 && state->timeout < PFTM_MAX 1537 && state->pfsync_time <= sc->sc_ureq_received) { 1538 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1539 i++; 1540 } 1541 1542 /* figure next state to send */ 1543 state = TAILQ_NEXT(state, entry_list); 1544 1545 /* wrap to start of list if we hit the end */ 1546 if (!state) 1547 state = TAILQ_FIRST(&state_list); 1548 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1549 state != sc->sc_bulk_terminator); 1550 1551 if (!state || state == sc->sc_bulk_terminator) { 1552 /* we're done */ 1553 pfsync_send_bus(sc, PFSYNC_BUS_END); 1554 sc->sc_ureq_received = 0; 1555 sc->sc_bulk_send_next = NULL; 1556 sc->sc_bulk_terminator = NULL; 1557 lwkt_reltoken(&pf_token); 1558 callout_stop(&sc->sc_bulk_tmo); 1559 lwkt_gettoken(&pf_token); 1560 if (pf_status.debug >= PF_DEBUG_MISC) 1561 kprintf("pfsync: bulk update complete\n"); 1562 } else { 1563 /* look again for more in a bit */ 1564 lwkt_reltoken(&pf_token); 1565 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, 1566 LIST_FIRST(&pfsync_list)); 1567 lwkt_gettoken(&pf_token); 1568 sc->sc_bulk_send_next = state; 1569 } 1570 if (sc->sc_mbuf != NULL) 1571 pfsync_sendout(sc); 1572 crit_exit(); 1573 } 1574 1575 void 1576 pfsync_bulkfail(void *v) 1577 { 1578 struct pfsync_softc *sc = v; 1579 int error; 1580 1581 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1582 1583 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1584 /* Try again in a bit */ 1585 lwkt_reltoken(&pf_token); 1586 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, 1587 LIST_FIRST(&pfsync_list)); 1588 lwkt_gettoken(&pf_token); 1589 crit_enter(); 1590 error = pfsync_request_update(NULL, NULL); 1591 if (error == ENOMEM) { 1592 if (pf_status.debug >= PF_DEBUG_MISC) 1593 kprintf("pfsync: cannot allocate mbufs for " 1594 "bulk update\n"); 1595 } else 1596 pfsync_sendout(sc); 1597 crit_exit(); 1598 } else { 1599 /* Pretend like the transfer was ok */ 1600 sc->sc_ureq_sent = 0; 1601 sc->sc_bulk_tries = 0; 1602 #if NCARP > 0 1603 if (!pfsync_sync_ok) 1604 carp_group_demote_adj(&sc->sc_if, -1); 1605 #endif 1606 pfsync_sync_ok = 1; 1607 if (pf_status.debug >= PF_DEBUG_MISC) 1608 kprintf("pfsync: failed to receive " 1609 "bulk update status\n"); 1610 lwkt_reltoken(&pf_token); 1611 callout_stop(&sc->sc_bulkfail_tmo); 1612 lwkt_gettoken(&pf_token); 1613 } 1614 } 1615 1616 /* This must be called in splnet() */ 1617 int 1618 pfsync_sendout(struct pfsync_softc *sc) 1619 { 1620 #if NBPFILTER > 0 1621 struct ifnet *ifp = &sc->sc_if; 1622 #endif 1623 struct mbuf *m; 1624 1625 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1626 1627 lwkt_reltoken(&pf_token); 1628 callout_stop(&sc->sc_tmo); 1629 lwkt_gettoken(&pf_token); 1630 1631 if (sc->sc_mbuf == NULL) 1632 return (0); 1633 m = sc->sc_mbuf; 1634 sc->sc_mbuf = NULL; 1635 sc->sc_statep.s = NULL; 1636 1637 #if NBPFILTER > 0 1638 if (ifp->if_bpf) { 1639 bpf_gettoken(); 1640 if (ifp->if_bpf) 1641 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1642 bpf_reltoken(); 1643 } 1644 #endif 1645 1646 if (sc->sc_mbuf_net) { 1647 m_freem(m); 1648 m = sc->sc_mbuf_net; 1649 sc->sc_mbuf_net = NULL; 1650 sc->sc_statep_net.s = NULL; 1651 } 1652 1653 return pfsync_sendout_mbuf(sc, m); 1654 } 1655 1656 int 1657 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1658 { 1659 struct sockaddr sa; 1660 struct ip *ip; 1661 1662 if (sc->sc_sync_ifp || 1663 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1664 M_PREPEND(m, sizeof(struct ip), M_WAITOK); 1665 if (m == NULL) { 1666 pfsyncstats.pfsyncs_onomem++; 1667 return (0); 1668 } 1669 ip = mtod(m, struct ip *); 1670 ip->ip_v = IPVERSION; 1671 ip->ip_hl = sizeof(*ip) >> 2; 1672 ip->ip_tos = IPTOS_LOWDELAY; 1673 ip->ip_len = htons(m->m_pkthdr.len); 1674 ip->ip_id = htons(ip_randomid()); 1675 ip->ip_off = htons(IP_DF); 1676 ip->ip_ttl = PFSYNC_DFLTTL; 1677 ip->ip_p = IPPROTO_PFSYNC; 1678 ip->ip_sum = 0; 1679 1680 bzero(&sa, sizeof(sa)); 1681 ip->ip_src.s_addr = INADDR_ANY; 1682 1683 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1684 m->m_flags |= M_MCAST; 1685 ip->ip_dst = sc->sc_sendaddr; 1686 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1687 1688 pfsyncstats.pfsyncs_opackets++; 1689 1690 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) 1691 pfsyncstats.pfsyncs_oerrors++; 1692 } else 1693 m_freem(m); 1694 1695 return (0); 1696 } 1697 1698 static int 1699 pfsync_modevent(module_t mod, int type, void *data) 1700 { 1701 int error = 0; 1702 1703 struct pfsync_softc *pfs_if, *tmp; 1704 1705 lwkt_gettoken(&pf_token); 1706 1707 switch (type) { 1708 case MOD_LOAD: 1709 LIST_INIT(&pfsync_list); 1710 lwkt_reltoken(&pf_token); 1711 if_clone_attach(&pfsync_cloner); 1712 lwkt_gettoken(&pf_token); 1713 /* Override the function pointer for pf_ioctl.c */ 1714 break; 1715 1716 case MOD_UNLOAD: 1717 lwkt_reltoken(&pf_token); 1718 if_clone_detach(&pfsync_cloner); 1719 lwkt_gettoken(&pf_token); 1720 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) { 1721 pfsync_clone_destroy(&pfs_if->sc_if); 1722 } 1723 break; 1724 1725 default: 1726 error = EINVAL; 1727 break; 1728 } 1729 1730 lwkt_reltoken(&pf_token); 1731 return error; 1732 } 1733 1734 static moduledata_t pfsync_mod = { 1735 "pfsync", 1736 pfsync_modevent, 1737 0 1738 }; 1739 1740 #define PFSYNC_MODVER 44 1741 1742 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1743 MODULE_VERSION(pfsync, PFSYNC_MODVER); 1744 1745 1746 1747