1 /* $OpenBSD: if_pfsync.c,v 1.101 2008/09/17 20:10:37 chl Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 30 #include <sys/param.h> 31 #include <sys/proc.h> 32 #include <sys/systm.h> 33 #include <sys/time.h> 34 #include <sys/mbuf.h> 35 #include <sys/socket.h> 36 #include <sys/ioctl.h> 37 #include <sys/timeout.h> 38 #include <sys/kernel.h> 39 #include <sys/sysctl.h> 40 41 #include <net/if.h> 42 #include <net/if_types.h> 43 #include <net/route.h> 44 #include <net/bpf.h> 45 #include <netinet/in.h> 46 #include <netinet/if_ether.h> 47 #include <netinet/tcp.h> 48 #include <netinet/tcp_seq.h> 49 #include <sys/pool.h> 50 51 #ifdef INET 52 #include <netinet/in_systm.h> 53 #include <netinet/in_var.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip_var.h> 56 #endif 57 58 #ifdef INET6 59 #include <netinet6/nd6.h> 60 #endif /* INET6 */ 61 62 #include "carp.h" 63 #if NCARP > 0 64 #include <netinet/ip_carp.h> 65 #endif 66 67 #include <net/pfvar.h> 68 #include <net/if_pfsync.h> 69 70 #include "bpfilter.h" 71 #include "pfsync.h" 72 73 #define PFSYNC_MINMTU \ 74 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 75 76 #ifdef PFSYNCDEBUG 77 #define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0) 78 int pfsyncdebug; 79 #else 80 #define DPRINTF(x) 81 #endif 82 83 struct pfsync_softc *pfsyncif = NULL; 84 struct pfsyncstats pfsyncstats; 85 86 void pfsyncattach(int); 87 int pfsync_clone_create(struct if_clone *, int); 88 int pfsync_clone_destroy(struct ifnet *); 89 void pfsync_setmtu(struct pfsync_softc *, int); 90 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 91 struct pf_state_peer *); 92 void pfsync_update_net_tdb(struct pfsync_tdb *); 93 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 94 struct rtentry *); 95 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 96 void pfsyncstart(struct ifnet *); 97 98 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 99 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 100 int pfsync_sendout(struct pfsync_softc *); 101 int pfsync_tdb_sendout(struct pfsync_softc *); 102 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 103 void pfsync_timeout(void *); 104 void pfsync_tdb_timeout(void *); 105 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 106 void pfsync_bulk_update(void *); 107 void pfsync_bulkfail(void *); 108 109 int pfsync_sync_ok; 110 111 struct if_clone pfsync_cloner = 112 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 113 114 void 115 pfsyncattach(int npfsync) 116 { 117 if_clone_attach(&pfsync_cloner); 118 } 119 int 120 pfsync_clone_create(struct if_clone *ifc, int unit) 121 { 122 struct ifnet *ifp; 123 124 if (unit != 0) 125 return (EINVAL); 126 127 pfsync_sync_ok = 1; 128 if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, 129 M_NOWAIT|M_ZERO)) == NULL) 130 return (ENOMEM); 131 pfsyncif->sc_mbuf = NULL; 132 pfsyncif->sc_mbuf_net = NULL; 133 pfsyncif->sc_mbuf_tdb = NULL; 134 pfsyncif->sc_statep.s = NULL; 135 pfsyncif->sc_statep_net.s = NULL; 136 pfsyncif->sc_statep_tdb.t = NULL; 137 pfsyncif->sc_maxupdates = 128; 138 pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 139 pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; 140 pfsyncif->sc_ureq_received = 0; 141 pfsyncif->sc_ureq_sent = 0; 142 pfsyncif->sc_bulk_send_next = NULL; 143 pfsyncif->sc_bulk_terminator = NULL; 144 pfsyncif->sc_imo.imo_membership = (struct in_multi **)malloc( 145 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 146 M_WAITOK|M_ZERO); 147 pfsyncif->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 148 ifp = &pfsyncif->sc_if; 149 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 150 ifp->if_softc = pfsyncif; 151 ifp->if_ioctl = pfsyncioctl; 152 ifp->if_output = pfsyncoutput; 153 ifp->if_start = pfsyncstart; 154 ifp->if_type = IFT_PFSYNC; 155 ifp->if_snd.ifq_maxlen = ifqmaxlen; 156 ifp->if_hdrlen = PFSYNC_HDRLEN; 157 pfsync_setmtu(pfsyncif, ETHERMTU); 158 timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif); 159 timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif); 160 timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif); 161 timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif); 162 if_attach(ifp); 163 if_alloc_sadl(ifp); 164 165 #if NCARP > 0 166 if_addgroup(ifp, "carp"); 167 #endif 168 169 #if NBPFILTER > 0 170 bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 171 #endif 172 173 return (0); 174 } 175 176 int 177 pfsync_clone_destroy(struct ifnet *ifp) 178 { 179 struct pfsync_softc *sc = ifp->if_softc; 180 181 timeout_del(&sc->sc_tmo); 182 timeout_del(&sc->sc_tdb_tmo); 183 timeout_del(&sc->sc_bulk_tmo); 184 timeout_del(&sc->sc_bulkfail_tmo); 185 #if NCARP > 0 186 if (!pfsync_sync_ok) 187 carp_group_demote_adj(&sc->sc_if, -1); 188 #endif 189 #if NBPFILTER > 0 190 bpfdetach(ifp); 191 #endif 192 if_detach(ifp); 193 free(pfsyncif->sc_imo.imo_membership, M_IPMOPTS); 194 free(pfsyncif, M_DEVBUF); 195 pfsyncif = NULL; 196 return (0); 197 } 198 199 /* 200 * Start output on the pfsync interface. 201 */ 202 void 203 pfsyncstart(struct ifnet *ifp) 204 { 205 struct mbuf *m; 206 int s; 207 208 for (;;) { 209 s = splnet(); 210 IF_DROP(&ifp->if_snd); 211 IF_DEQUEUE(&ifp->if_snd, m); 212 splx(s); 213 214 if (m == NULL) 215 return; 216 else 217 m_freem(m); 218 } 219 } 220 221 int 222 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 223 struct pf_state_peer *d) 224 { 225 if (s->scrub.scrub_flag && d->scrub == NULL) { 226 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 227 if (d->scrub == NULL) 228 return (ENOMEM); 229 } 230 231 return (0); 232 } 233 234 void 235 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 236 { 237 bzero(sp, sizeof(struct pfsync_state)); 238 239 /* copy from state key */ 240 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 241 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 242 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 243 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 244 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 245 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 246 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 247 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 248 sp->proto = st->key[PF_SK_WIRE]->proto; 249 sp->af = st->key[PF_SK_WIRE]->af; 250 251 /* copy from state */ 252 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 253 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 254 sp->creation = htonl(time_second - st->creation); 255 sp->expire = pf_state_expires(st); 256 if (sp->expire <= time_second) 257 sp->expire = htonl(0); 258 else 259 sp->expire = htonl(sp->expire - time_second); 260 261 sp->direction = st->direction; 262 sp->log = st->log; 263 sp->timeout = st->timeout; 264 sp->state_flags = st->state_flags; 265 if (st->src_node) 266 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 267 if (st->nat_src_node) 268 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 269 270 bcopy(&st->id, &sp->id, sizeof(sp->id)); 271 sp->creatorid = st->creatorid; 272 pf_state_peer_hton(&st->src, &sp->src); 273 pf_state_peer_hton(&st->dst, &sp->dst); 274 275 if (st->rule.ptr == NULL) 276 sp->rule = htonl(-1); 277 else 278 sp->rule = htonl(st->rule.ptr->nr); 279 if (st->anchor.ptr == NULL) 280 sp->anchor = htonl(-1); 281 else 282 sp->anchor = htonl(st->anchor.ptr->nr); 283 if (st->nat_rule.ptr == NULL) 284 sp->nat_rule = htonl(-1); 285 else 286 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 287 288 pf_state_counter_hton(st->packets[0], sp->packets[0]); 289 pf_state_counter_hton(st->packets[1], sp->packets[1]); 290 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 291 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 292 293 } 294 295 int 296 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 297 { 298 struct pf_state *st = NULL; 299 struct pf_state_key *skw = NULL, *sks = NULL; 300 struct pf_rule *r = NULL; 301 struct pfi_kif *kif; 302 int pool_flags; 303 int error; 304 305 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 306 printf("pfsync_state_import: invalid creator id:" 307 " %08x\n", ntohl(sp->creatorid)); 308 return (EINVAL); 309 } 310 311 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 312 if (pf_status.debug >= PF_DEBUG_MISC) 313 printf("pfsync_state_import: " 314 "unknown interface: %s\n", sp->ifname); 315 if (flags & PFSYNC_SI_IOCTL) 316 return (EINVAL); 317 return (0); /* skip this state */ 318 } 319 320 /* 321 * If the ruleset checksums match or the state is coming from the ioctl, 322 * it's safe to associate the state with the rule of that number. 323 */ 324 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 325 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 326 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 327 r = pf_main_ruleset.rules[ 328 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 329 else 330 r = &pf_default_rule; 331 332 if ((r->max_states && r->states_cur >= r->max_states)) 333 goto cleanup; 334 335 if (flags & PFSYNC_SI_IOCTL) 336 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 337 else 338 pool_flags = PR_LIMITFAIL | PR_ZERO; 339 340 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 341 goto cleanup; 342 343 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 344 goto cleanup; 345 346 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 347 &sp->key[PF_SK_STACK].addr[0], sp->af) || 348 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 349 &sp->key[PF_SK_STACK].addr[1], sp->af) || 350 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 351 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 352 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 353 goto cleanup; 354 } else 355 sks = skw; 356 357 /* allocate memory for scrub info */ 358 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 359 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 360 goto cleanup; 361 362 /* copy to state key(s) */ 363 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 364 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 365 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 366 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 367 skw->proto = sp->proto; 368 skw->af = sp->af; 369 if (sks != skw) { 370 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 371 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 372 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 373 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 374 sks->proto = sp->proto; 375 sks->af = sp->af; 376 } 377 378 /* copy to state */ 379 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 380 st->creation = time_second - ntohl(sp->creation); 381 st->expire = time_second; 382 if (sp->expire) { 383 /* XXX No adaptive scaling. */ 384 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 385 } 386 387 st->expire = ntohl(sp->expire) + time_second; 388 st->direction = sp->direction; 389 st->log = sp->log; 390 st->timeout = sp->timeout; 391 st->state_flags = sp->state_flags; 392 if (!(flags & PFSYNC_SI_IOCTL)) 393 st->sync_flags = PFSTATE_FROMSYNC; 394 395 bcopy(sp->id, &st->id, sizeof(st->id)); 396 st->creatorid = sp->creatorid; 397 pf_state_peer_ntoh(&sp->src, &st->src); 398 pf_state_peer_ntoh(&sp->dst, &st->dst); 399 400 st->rule.ptr = r; 401 st->nat_rule.ptr = NULL; 402 st->anchor.ptr = NULL; 403 st->rt_kif = NULL; 404 405 st->pfsync_time = 0; 406 407 408 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 409 r->states_cur++; 410 r->states_tot++; 411 412 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 413 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 414 r->states_cur--; 415 goto cleanup_state; 416 } 417 418 return (0); 419 420 cleanup: 421 error = ENOMEM; 422 if (skw == sks) 423 sks = NULL; 424 if (skw != NULL) 425 pool_put(&pf_state_key_pl, skw); 426 if (sks != NULL) 427 pool_put(&pf_state_key_pl, sks); 428 429 cleanup_state: /* pf_state_insert frees the state keys */ 430 if (st) { 431 if (st->dst.scrub) 432 pool_put(&pf_state_scrub_pl, st->dst.scrub); 433 if (st->src.scrub) 434 pool_put(&pf_state_scrub_pl, st->src.scrub); 435 pool_put(&pf_state_pl, st); 436 } 437 return (error); 438 } 439 440 void 441 pfsync_input(struct mbuf *m, ...) 442 { 443 struct ip *ip = mtod(m, struct ip *); 444 struct pfsync_header *ph; 445 struct pfsync_softc *sc = pfsyncif; 446 struct pf_state *st; 447 struct pf_state_key *sk; 448 struct pf_state_item *si; 449 struct pf_state_cmp id_key; 450 struct pfsync_state *sp; 451 struct pfsync_state_upd *up; 452 struct pfsync_state_del *dp; 453 struct pfsync_state_clr *cp; 454 struct pfsync_state_upd_req *rup; 455 struct pfsync_state_bus *bus; 456 #ifdef IPSEC 457 struct pfsync_tdb *pt; 458 #endif 459 struct in_addr src; 460 struct mbuf *mp; 461 int iplen, action, error, i, s, count, offp, sfail, stale = 0; 462 u_int8_t flags = 0; 463 464 pfsyncstats.pfsyncs_ipackets++; 465 466 /* verify that we have a sync interface configured */ 467 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 468 goto done; 469 470 /* verify that the packet came in on the right interface */ 471 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 472 pfsyncstats.pfsyncs_badif++; 473 goto done; 474 } 475 476 /* verify that the IP TTL is 255. */ 477 if (ip->ip_ttl != PFSYNC_DFLTTL) { 478 pfsyncstats.pfsyncs_badttl++; 479 goto done; 480 } 481 482 iplen = ip->ip_hl << 2; 483 484 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 485 pfsyncstats.pfsyncs_hdrops++; 486 goto done; 487 } 488 489 if (iplen + sizeof(*ph) > m->m_len) { 490 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 491 pfsyncstats.pfsyncs_hdrops++; 492 goto done; 493 } 494 ip = mtod(m, struct ip *); 495 } 496 ph = (struct pfsync_header *)((char *)ip + iplen); 497 498 /* verify the version */ 499 if (ph->version != PFSYNC_VERSION) { 500 pfsyncstats.pfsyncs_badver++; 501 goto done; 502 } 503 504 action = ph->action; 505 count = ph->count; 506 507 /* make sure it's a valid action code */ 508 if (action >= PFSYNC_ACT_MAX) { 509 pfsyncstats.pfsyncs_badact++; 510 goto done; 511 } 512 513 /* Cheaper to grab this now than having to mess with mbufs later */ 514 src = ip->ip_src; 515 516 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 517 flags |= PFSYNC_SI_CKSUM; 518 519 switch (action) { 520 case PFSYNC_ACT_CLR: { 521 struct pf_state *nexts; 522 struct pf_state_key *nextsk; 523 u_int32_t creatorid; 524 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 525 sizeof(*cp), &offp)) == NULL) { 526 pfsyncstats.pfsyncs_badlen++; 527 return; 528 } 529 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 530 creatorid = cp->creatorid; 531 532 s = splsoftnet(); 533 if (cp->ifname[0] == '\0') { 534 for (st = RB_MIN(pf_state_tree_id, &tree_id); 535 st; st = nexts) { 536 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 537 if (st->creatorid == creatorid) { 538 st->sync_flags |= PFSTATE_FROMSYNC; 539 pf_unlink_state(st); 540 } 541 } 542 } else { 543 if (pfi_kif_get(cp->ifname) == NULL) { 544 splx(s); 545 return; 546 } 547 /* XXX correct? */ 548 for (sk = RB_MIN(pf_state_tree, 549 &pf_statetbl); sk; sk = nextsk) { 550 nextsk = RB_NEXT(pf_state_tree, 551 &pf_statetbl, sk); 552 TAILQ_FOREACH(si, &sk->states, entry) { 553 if (si->s->creatorid == creatorid) { 554 si->s->sync_flags |= 555 PFSTATE_FROMSYNC; 556 pf_unlink_state(si->s); 557 } 558 } 559 } 560 } 561 splx(s); 562 563 break; 564 } 565 case PFSYNC_ACT_INS: 566 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 567 count * sizeof(*sp), &offp)) == NULL) { 568 pfsyncstats.pfsyncs_badlen++; 569 return; 570 } 571 572 s = splsoftnet(); 573 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 574 i < count; i++, sp++) { 575 /* check for invalid values */ 576 if (sp->timeout >= PFTM_MAX || 577 sp->src.state > PF_TCPS_PROXY_DST || 578 sp->dst.state > PF_TCPS_PROXY_DST || 579 sp->direction > PF_OUT || 580 (sp->af != AF_INET && sp->af != AF_INET6)) { 581 if (pf_status.debug >= PF_DEBUG_MISC) 582 printf("pfsync_input: PFSYNC_ACT_INS: " 583 "invalid value\n"); 584 pfsyncstats.pfsyncs_badval++; 585 continue; 586 } 587 588 if ((error = pfsync_state_import(sp, flags))) { 589 if (error == ENOMEM) { 590 splx(s); 591 goto done; 592 } 593 continue; 594 } 595 } 596 splx(s); 597 break; 598 case PFSYNC_ACT_UPD: 599 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 600 count * sizeof(*sp), &offp)) == NULL) { 601 pfsyncstats.pfsyncs_badlen++; 602 return; 603 } 604 605 s = splsoftnet(); 606 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 607 i < count; i++, sp++) { 608 int flags = PFSYNC_FLAG_STALE; 609 610 /* check for invalid values */ 611 if (sp->timeout >= PFTM_MAX || 612 sp->src.state > PF_TCPS_PROXY_DST || 613 sp->dst.state > PF_TCPS_PROXY_DST) { 614 if (pf_status.debug >= PF_DEBUG_MISC) 615 printf("pfsync_input: PFSYNC_ACT_UPD: " 616 "invalid value\n"); 617 pfsyncstats.pfsyncs_badval++; 618 continue; 619 } 620 621 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 622 id_key.creatorid = sp->creatorid; 623 624 st = pf_find_state_byid(&id_key); 625 if (st == NULL) { 626 /* insert the update */ 627 if (pfsync_state_import(sp, flags)) 628 pfsyncstats.pfsyncs_badstate++; 629 continue; 630 } 631 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 632 sfail = 0; 633 if (sk->proto == IPPROTO_TCP) { 634 /* 635 * The state should never go backwards except 636 * for syn-proxy states. Neither should the 637 * sequence window slide backwards. 638 */ 639 if (st->src.state > sp->src.state && 640 (st->src.state < PF_TCPS_PROXY_SRC || 641 sp->src.state >= PF_TCPS_PROXY_SRC)) 642 sfail = 1; 643 else if (SEQ_GT(st->src.seqlo, 644 ntohl(sp->src.seqlo))) 645 sfail = 3; 646 else if (st->dst.state > sp->dst.state) { 647 /* There might still be useful 648 * information about the src state here, 649 * so import that part of the update, 650 * then "fail" so we send the updated 651 * state back to the peer who is missing 652 * our what we know. */ 653 pf_state_peer_ntoh(&sp->src, &st->src); 654 /* XXX do anything with timeouts? */ 655 sfail = 7; 656 flags = 0; 657 } else if (st->dst.state >= TCPS_SYN_SENT && 658 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 659 sfail = 4; 660 } else { 661 /* 662 * Non-TCP protocol state machine always go 663 * forwards 664 */ 665 if (st->src.state > sp->src.state) 666 sfail = 5; 667 else if (st->dst.state > sp->dst.state) 668 sfail = 6; 669 } 670 if (sfail) { 671 if (pf_status.debug >= PF_DEBUG_MISC) 672 printf("pfsync: %s stale update " 673 "(%d) id: %016llx " 674 "creatorid: %08x\n", 675 (sfail < 7 ? "ignoring" 676 : "partial"), sfail, 677 betoh64(st->id), 678 ntohl(st->creatorid)); 679 pfsyncstats.pfsyncs_stale++; 680 681 if (!(sp->sync_flags & PFSTATE_STALE)) { 682 /* we have a better state, send it */ 683 if (sc->sc_mbuf != NULL && !stale) 684 pfsync_sendout(sc); 685 stale++; 686 if (!st->sync_flags) 687 pfsync_pack_state( 688 PFSYNC_ACT_UPD, st, flags); 689 } 690 continue; 691 } 692 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 693 pf_state_peer_ntoh(&sp->src, &st->src); 694 pf_state_peer_ntoh(&sp->dst, &st->dst); 695 st->expire = ntohl(sp->expire) + time_second; 696 st->timeout = sp->timeout; 697 } 698 if (stale && sc->sc_mbuf != NULL) 699 pfsync_sendout(sc); 700 splx(s); 701 break; 702 /* 703 * It's not strictly necessary for us to support the "uncompressed" 704 * delete action, but it's relatively simple and maintains consistency. 705 */ 706 case PFSYNC_ACT_DEL: 707 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 708 count * sizeof(*sp), &offp)) == NULL) { 709 pfsyncstats.pfsyncs_badlen++; 710 return; 711 } 712 713 s = splsoftnet(); 714 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 715 i < count; i++, sp++) { 716 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 717 id_key.creatorid = sp->creatorid; 718 719 st = pf_find_state_byid(&id_key); 720 if (st == NULL) { 721 pfsyncstats.pfsyncs_badstate++; 722 continue; 723 } 724 st->sync_flags |= PFSTATE_FROMSYNC; 725 pf_unlink_state(st); 726 } 727 splx(s); 728 break; 729 case PFSYNC_ACT_UPD_C: { 730 int update_requested = 0; 731 732 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 733 count * sizeof(*up), &offp)) == NULL) { 734 pfsyncstats.pfsyncs_badlen++; 735 return; 736 } 737 738 s = splsoftnet(); 739 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 740 i < count; i++, up++) { 741 /* check for invalid values */ 742 if (up->timeout >= PFTM_MAX || 743 up->src.state > PF_TCPS_PROXY_DST || 744 up->dst.state > PF_TCPS_PROXY_DST) { 745 if (pf_status.debug >= PF_DEBUG_MISC) 746 printf("pfsync_input: " 747 "PFSYNC_ACT_UPD_C: " 748 "invalid value\n"); 749 pfsyncstats.pfsyncs_badval++; 750 continue; 751 } 752 753 bcopy(up->id, &id_key.id, sizeof(id_key.id)); 754 id_key.creatorid = up->creatorid; 755 756 st = pf_find_state_byid(&id_key); 757 if (st == NULL) { 758 /* We don't have this state. Ask for it. */ 759 error = pfsync_request_update(up, &src); 760 if (error == ENOMEM) { 761 splx(s); 762 goto done; 763 } 764 update_requested = 1; 765 pfsyncstats.pfsyncs_badstate++; 766 continue; 767 } 768 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 769 sfail = 0; 770 if (sk->proto == IPPROTO_TCP) { 771 /* 772 * The state should never go backwards except 773 * for syn-proxy states. Neither should the 774 * sequence window slide backwards. 775 */ 776 if (st->src.state > up->src.state && 777 (st->src.state < PF_TCPS_PROXY_SRC || 778 up->src.state >= PF_TCPS_PROXY_SRC)) 779 sfail = 1; 780 else if (st->dst.state > up->dst.state) 781 sfail = 2; 782 else if (SEQ_GT(st->src.seqlo, 783 ntohl(up->src.seqlo))) 784 sfail = 3; 785 else if (st->dst.state >= TCPS_SYN_SENT && 786 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 787 sfail = 4; 788 } else { 789 /* 790 * Non-TCP protocol state machine always go 791 * forwards 792 */ 793 if (st->src.state > up->src.state) 794 sfail = 5; 795 else if (st->dst.state > up->dst.state) 796 sfail = 6; 797 } 798 if (sfail) { 799 if (pf_status.debug >= PF_DEBUG_MISC) 800 printf("pfsync: ignoring stale update " 801 "(%d) id: %016llx " 802 "creatorid: %08x\n", sfail, 803 betoh64(st->id), 804 ntohl(st->creatorid)); 805 pfsyncstats.pfsyncs_stale++; 806 807 /* we have a better state, send it out */ 808 if ((!stale || update_requested) && 809 sc->sc_mbuf != NULL) { 810 pfsync_sendout(sc); 811 update_requested = 0; 812 } 813 stale++; 814 if (!st->sync_flags) 815 pfsync_pack_state(PFSYNC_ACT_UPD, st, 816 PFSYNC_FLAG_STALE); 817 continue; 818 } 819 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 820 pf_state_peer_ntoh(&up->src, &st->src); 821 pf_state_peer_ntoh(&up->dst, &st->dst); 822 st->expire = ntohl(up->expire) + time_second; 823 st->timeout = up->timeout; 824 } 825 if ((update_requested || stale) && sc->sc_mbuf) 826 pfsync_sendout(sc); 827 splx(s); 828 break; 829 } 830 case PFSYNC_ACT_DEL_C: 831 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 832 count * sizeof(*dp), &offp)) == NULL) { 833 pfsyncstats.pfsyncs_badlen++; 834 return; 835 } 836 837 s = splsoftnet(); 838 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 839 i < count; i++, dp++) { 840 bcopy(dp->id, &id_key.id, sizeof(id_key.id)); 841 id_key.creatorid = dp->creatorid; 842 843 st = pf_find_state_byid(&id_key); 844 if (st == NULL) { 845 pfsyncstats.pfsyncs_badstate++; 846 continue; 847 } 848 st->sync_flags |= PFSTATE_FROMSYNC; 849 pf_unlink_state(st); 850 } 851 splx(s); 852 break; 853 case PFSYNC_ACT_INS_F: 854 case PFSYNC_ACT_DEL_F: 855 /* not implemented */ 856 break; 857 case PFSYNC_ACT_UREQ: 858 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 859 count * sizeof(*rup), &offp)) == NULL) { 860 pfsyncstats.pfsyncs_badlen++; 861 return; 862 } 863 864 s = splsoftnet(); 865 if (sc->sc_mbuf != NULL) 866 pfsync_sendout(sc); 867 for (i = 0, 868 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 869 i < count; i++, rup++) { 870 bcopy(rup->id, &id_key.id, sizeof(id_key.id)); 871 id_key.creatorid = rup->creatorid; 872 873 if (id_key.id == 0 && id_key.creatorid == 0) { 874 sc->sc_ureq_received = time_uptime; 875 if (sc->sc_bulk_send_next == NULL) 876 sc->sc_bulk_send_next = 877 TAILQ_FIRST(&state_list); 878 sc->sc_bulk_terminator = sc->sc_bulk_send_next; 879 if (pf_status.debug >= PF_DEBUG_MISC) 880 printf("pfsync: received " 881 "bulk update request\n"); 882 pfsync_send_bus(sc, PFSYNC_BUS_START); 883 timeout_add_sec(&sc->sc_bulk_tmo, 1); 884 } else { 885 st = pf_find_state_byid(&id_key); 886 if (st == NULL) { 887 pfsyncstats.pfsyncs_badstate++; 888 continue; 889 } 890 if (!st->sync_flags) 891 pfsync_pack_state(PFSYNC_ACT_UPD, 892 st, 0); 893 } 894 } 895 if (sc->sc_mbuf != NULL) 896 pfsync_sendout(sc); 897 splx(s); 898 break; 899 case PFSYNC_ACT_BUS: 900 /* If we're not waiting for a bulk update, who cares. */ 901 if (sc->sc_ureq_sent == 0) 902 break; 903 904 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 905 sizeof(*bus), &offp)) == NULL) { 906 pfsyncstats.pfsyncs_badlen++; 907 return; 908 } 909 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 910 switch (bus->status) { 911 case PFSYNC_BUS_START: 912 timeout_add(&sc->sc_bulkfail_tmo, 913 pf_pool_limits[PF_LIMIT_STATES].limit / 914 (PFSYNC_BULKPACKETS * sc->sc_maxcount)); 915 if (pf_status.debug >= PF_DEBUG_MISC) 916 printf("pfsync: received bulk " 917 "update start\n"); 918 break; 919 case PFSYNC_BUS_END: 920 if (time_uptime - ntohl(bus->endtime) >= 921 sc->sc_ureq_sent) { 922 /* that's it, we're happy */ 923 sc->sc_ureq_sent = 0; 924 sc->sc_bulk_tries = 0; 925 timeout_del(&sc->sc_bulkfail_tmo); 926 #if NCARP > 0 927 if (!pfsync_sync_ok) 928 carp_group_demote_adj(&sc->sc_if, -1); 929 #endif 930 pfsync_sync_ok = 1; 931 if (pf_status.debug >= PF_DEBUG_MISC) 932 printf("pfsync: received valid " 933 "bulk update end\n"); 934 } else { 935 if (pf_status.debug >= PF_DEBUG_MISC) 936 printf("pfsync: received invalid " 937 "bulk update end: bad timestamp\n"); 938 } 939 break; 940 } 941 break; 942 #ifdef IPSEC 943 case PFSYNC_ACT_TDB_UPD: 944 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 945 count * sizeof(*pt), &offp)) == NULL) { 946 pfsyncstats.pfsyncs_badlen++; 947 return; 948 } 949 s = splsoftnet(); 950 for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); 951 i < count; i++, pt++) 952 pfsync_update_net_tdb(pt); 953 splx(s); 954 break; 955 #endif 956 } 957 958 done: 959 if (m) 960 m_freem(m); 961 } 962 963 int 964 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 965 struct rtentry *rt) 966 { 967 m_freem(m); 968 return (0); 969 } 970 971 /* ARGSUSED */ 972 int 973 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 974 { 975 struct proc *p = curproc; 976 struct pfsync_softc *sc = ifp->if_softc; 977 struct ifreq *ifr = (struct ifreq *)data; 978 struct ip_moptions *imo = &sc->sc_imo; 979 struct pfsyncreq pfsyncr; 980 struct ifnet *sifp; 981 int s, error; 982 983 switch (cmd) { 984 case SIOCSIFADDR: 985 case SIOCAIFADDR: 986 case SIOCSIFDSTADDR: 987 case SIOCSIFFLAGS: 988 if (ifp->if_flags & IFF_UP) 989 ifp->if_flags |= IFF_RUNNING; 990 else 991 ifp->if_flags &= ~IFF_RUNNING; 992 break; 993 case SIOCSIFMTU: 994 if (ifr->ifr_mtu < PFSYNC_MINMTU) 995 return (EINVAL); 996 if (ifr->ifr_mtu > MCLBYTES) 997 ifr->ifr_mtu = MCLBYTES; 998 s = splnet(); 999 if (ifr->ifr_mtu < ifp->if_mtu) 1000 pfsync_sendout(sc); 1001 pfsync_setmtu(sc, ifr->ifr_mtu); 1002 splx(s); 1003 break; 1004 case SIOCGETPFSYNC: 1005 bzero(&pfsyncr, sizeof(pfsyncr)); 1006 if (sc->sc_sync_ifp) 1007 strlcpy(pfsyncr.pfsyncr_syncdev, 1008 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 1009 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1010 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1011 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 1012 return (error); 1013 break; 1014 case SIOCSETPFSYNC: 1015 if ((error = suser(p, p->p_acflag)) != 0) 1016 return (error); 1017 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1018 return (error); 1019 1020 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1021 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1022 else 1023 sc->sc_sync_peer.s_addr = 1024 pfsyncr.pfsyncr_syncpeer.s_addr; 1025 1026 if (pfsyncr.pfsyncr_maxupdates > 255) 1027 return (EINVAL); 1028 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1029 1030 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1031 sc->sc_sync_ifp = NULL; 1032 if (sc->sc_mbuf_net != NULL) { 1033 /* Don't keep stale pfsync packets around. */ 1034 s = splnet(); 1035 m_freem(sc->sc_mbuf_net); 1036 sc->sc_mbuf_net = NULL; 1037 sc->sc_statep_net.s = NULL; 1038 splx(s); 1039 } 1040 if (imo->imo_num_memberships > 0) { 1041 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1042 imo->imo_multicast_ifp = NULL; 1043 } 1044 break; 1045 } 1046 1047 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) 1048 return (EINVAL); 1049 1050 s = splnet(); 1051 if (sifp->if_mtu < sc->sc_if.if_mtu || 1052 (sc->sc_sync_ifp != NULL && 1053 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 1054 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1055 pfsync_sendout(sc); 1056 sc->sc_sync_ifp = sifp; 1057 1058 pfsync_setmtu(sc, sc->sc_if.if_mtu); 1059 1060 if (imo->imo_num_memberships > 0) { 1061 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1062 imo->imo_multicast_ifp = NULL; 1063 } 1064 1065 if (sc->sc_sync_ifp && 1066 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1067 struct in_addr addr; 1068 1069 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 1070 sc->sc_sync_ifp = NULL; 1071 splx(s); 1072 return (EADDRNOTAVAIL); 1073 } 1074 1075 addr.s_addr = INADDR_PFSYNC_GROUP; 1076 1077 if ((imo->imo_membership[0] = 1078 in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { 1079 sc->sc_sync_ifp = NULL; 1080 splx(s); 1081 return (ENOBUFS); 1082 } 1083 imo->imo_num_memberships++; 1084 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1085 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1086 imo->imo_multicast_loop = 0; 1087 } 1088 1089 if (sc->sc_sync_ifp || 1090 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1091 /* Request a full state table update. */ 1092 sc->sc_ureq_sent = time_uptime; 1093 #if NCARP > 0 1094 if (pfsync_sync_ok) 1095 carp_group_demote_adj(&sc->sc_if, 1); 1096 #endif 1097 pfsync_sync_ok = 0; 1098 if (pf_status.debug >= PF_DEBUG_MISC) 1099 printf("pfsync: requesting bulk update\n"); 1100 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 1101 error = pfsync_request_update(NULL, NULL); 1102 if (error == ENOMEM) { 1103 splx(s); 1104 return (ENOMEM); 1105 } 1106 pfsync_sendout(sc); 1107 } 1108 splx(s); 1109 1110 break; 1111 1112 default: 1113 return (ENOTTY); 1114 } 1115 1116 return (0); 1117 } 1118 1119 void 1120 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1121 { 1122 int mtu; 1123 1124 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1125 mtu = sc->sc_sync_ifp->if_mtu; 1126 else 1127 mtu = mtu_req; 1128 1129 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1130 sizeof(struct pfsync_state); 1131 if (sc->sc_maxcount > 254) 1132 sc->sc_maxcount = 254; 1133 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1134 sc->sc_maxcount * sizeof(struct pfsync_state); 1135 } 1136 1137 struct mbuf * 1138 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1139 { 1140 struct pfsync_header *h; 1141 struct mbuf *m; 1142 int len; 1143 1144 MGETHDR(m, M_DONTWAIT, MT_DATA); 1145 if (m == NULL) { 1146 sc->sc_if.if_oerrors++; 1147 return (NULL); 1148 } 1149 1150 switch (action) { 1151 case PFSYNC_ACT_CLR: 1152 len = sizeof(struct pfsync_header) + 1153 sizeof(struct pfsync_state_clr); 1154 break; 1155 case PFSYNC_ACT_UPD_C: 1156 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1157 sizeof(struct pfsync_header); 1158 break; 1159 case PFSYNC_ACT_DEL_C: 1160 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1161 sizeof(struct pfsync_header); 1162 break; 1163 case PFSYNC_ACT_UREQ: 1164 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1165 sizeof(struct pfsync_header); 1166 break; 1167 case PFSYNC_ACT_BUS: 1168 len = sizeof(struct pfsync_header) + 1169 sizeof(struct pfsync_state_bus); 1170 break; 1171 case PFSYNC_ACT_TDB_UPD: 1172 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1173 sizeof(struct pfsync_header); 1174 break; 1175 default: 1176 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1177 sizeof(struct pfsync_header); 1178 break; 1179 } 1180 1181 if (len > MHLEN) { 1182 MCLGET(m, M_DONTWAIT); 1183 if ((m->m_flags & M_EXT) == 0) { 1184 m_free(m); 1185 sc->sc_if.if_oerrors++; 1186 return (NULL); 1187 } 1188 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); 1189 } else 1190 MH_ALIGN(m, len); 1191 1192 m->m_pkthdr.rcvif = NULL; 1193 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1194 h = mtod(m, struct pfsync_header *); 1195 h->version = PFSYNC_VERSION; 1196 h->af = 0; 1197 h->count = 0; 1198 h->action = action; 1199 if (action != PFSYNC_ACT_TDB_UPD) 1200 bcopy(&pf_status.pf_chksum, &h->pf_chksum, 1201 PF_MD5_DIGEST_LENGTH); 1202 1203 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1204 if (action == PFSYNC_ACT_TDB_UPD) 1205 timeout_add_sec(&sc->sc_tdb_tmo, 1); 1206 else 1207 timeout_add_sec(&sc->sc_tmo, 1); 1208 return (m); 1209 } 1210 1211 int 1212 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1213 { 1214 struct ifnet *ifp = NULL; 1215 struct pfsync_softc *sc = pfsyncif; 1216 struct pfsync_header *h, *h_net; 1217 struct pfsync_state *sp = NULL; 1218 struct pfsync_state_upd *up = NULL; 1219 struct pfsync_state_del *dp = NULL; 1220 int s, ret = 0; 1221 u_int8_t i = 255, newaction = 0; 1222 1223 if (sc == NULL) 1224 return (0); 1225 ifp = &sc->sc_if; 1226 1227 /* 1228 * If a packet falls in the forest and there's nobody around to 1229 * hear, does it make a sound? 1230 */ 1231 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1232 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1233 /* Don't leave any stale pfsync packets hanging around. */ 1234 if (sc->sc_mbuf != NULL) { 1235 m_freem(sc->sc_mbuf); 1236 sc->sc_mbuf = NULL; 1237 sc->sc_statep.s = NULL; 1238 } 1239 return (0); 1240 } 1241 1242 if (action >= PFSYNC_ACT_MAX) 1243 return (EINVAL); 1244 1245 s = splnet(); 1246 if (sc->sc_mbuf == NULL) { 1247 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1248 (void *)&sc->sc_statep.s)) == NULL) { 1249 splx(s); 1250 return (ENOMEM); 1251 } 1252 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1253 } else { 1254 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1255 if (h->action != action) { 1256 pfsync_sendout(sc); 1257 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1258 (void *)&sc->sc_statep.s)) == NULL) { 1259 splx(s); 1260 return (ENOMEM); 1261 } 1262 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1263 } else { 1264 /* 1265 * If it's an update, look in the packet to see if 1266 * we already have an update for the state. 1267 */ 1268 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1269 struct pfsync_state *usp = 1270 (void *)((char *)h + PFSYNC_HDRLEN); 1271 1272 for (i = 0; i < h->count; i++) { 1273 if (!memcmp(usp->id, &st->id, 1274 PFSYNC_ID_LEN) && 1275 usp->creatorid == st->creatorid) { 1276 sp = usp; 1277 sp->updates++; 1278 break; 1279 } 1280 usp++; 1281 } 1282 } 1283 } 1284 } 1285 1286 st->pfsync_time = time_uptime; 1287 1288 if (sp == NULL) { 1289 /* not a "duplicate" update */ 1290 i = 255; 1291 sp = sc->sc_statep.s++; 1292 sc->sc_mbuf->m_pkthdr.len = 1293 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1294 h->count++; 1295 bzero(sp, sizeof(*sp)); 1296 1297 pfsync_state_export(sp, st); 1298 1299 if (flags & PFSYNC_FLAG_STALE) 1300 sp->sync_flags |= PFSTATE_STALE; 1301 } else { 1302 pf_state_peer_hton(&st->src, &sp->src); 1303 pf_state_peer_hton(&st->dst, &sp->dst); 1304 1305 if (st->expire <= time_second) 1306 sp->expire = htonl(0); 1307 else 1308 sp->expire = htonl(st->expire - time_second); 1309 } 1310 1311 /* do we need to build "compressed" actions for network transfer? */ 1312 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1313 switch (action) { 1314 case PFSYNC_ACT_UPD: 1315 newaction = PFSYNC_ACT_UPD_C; 1316 break; 1317 case PFSYNC_ACT_DEL: 1318 newaction = PFSYNC_ACT_DEL_C; 1319 break; 1320 default: 1321 /* by default we just send the uncompressed states */ 1322 break; 1323 } 1324 } 1325 1326 if (newaction) { 1327 if (sc->sc_mbuf_net == NULL) { 1328 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1329 (void *)&sc->sc_statep_net.s)) == NULL) { 1330 splx(s); 1331 return (ENOMEM); 1332 } 1333 } 1334 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1335 1336 switch (newaction) { 1337 case PFSYNC_ACT_UPD_C: 1338 if (i != 255) { 1339 up = (void *)((char *)h_net + 1340 PFSYNC_HDRLEN + (i * sizeof(*up))); 1341 up->updates++; 1342 } else { 1343 h_net->count++; 1344 sc->sc_mbuf_net->m_pkthdr.len = 1345 sc->sc_mbuf_net->m_len += sizeof(*up); 1346 up = sc->sc_statep_net.u++; 1347 1348 bzero(up, sizeof(*up)); 1349 bcopy(&st->id, up->id, sizeof(up->id)); 1350 up->creatorid = st->creatorid; 1351 } 1352 up->timeout = st->timeout; 1353 up->expire = sp->expire; 1354 up->src = sp->src; 1355 up->dst = sp->dst; 1356 break; 1357 case PFSYNC_ACT_DEL_C: 1358 sc->sc_mbuf_net->m_pkthdr.len = 1359 sc->sc_mbuf_net->m_len += sizeof(*dp); 1360 dp = sc->sc_statep_net.d++; 1361 h_net->count++; 1362 1363 bzero(dp, sizeof(*dp)); 1364 bcopy(&st->id, dp->id, sizeof(dp->id)); 1365 dp->creatorid = st->creatorid; 1366 break; 1367 } 1368 } 1369 1370 if (h->count == sc->sc_maxcount || 1371 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1372 ret = pfsync_sendout(sc); 1373 1374 splx(s); 1375 return (ret); 1376 } 1377 1378 /* This must be called in splnet() */ 1379 int 1380 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1381 { 1382 struct pfsync_header *h; 1383 struct pfsync_softc *sc = pfsyncif; 1384 struct pfsync_state_upd_req *rup; 1385 int ret = 0; 1386 1387 if (sc == NULL) 1388 return (0); 1389 1390 if (sc->sc_mbuf == NULL) { 1391 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1392 (void *)&sc->sc_statep.s)) == NULL) 1393 return (ENOMEM); 1394 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1395 } else { 1396 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1397 if (h->action != PFSYNC_ACT_UREQ) { 1398 pfsync_sendout(sc); 1399 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1400 (void *)&sc->sc_statep.s)) == NULL) 1401 return (ENOMEM); 1402 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1403 } 1404 } 1405 1406 if (src != NULL) 1407 sc->sc_sendaddr = *src; 1408 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1409 h->count++; 1410 rup = sc->sc_statep.r++; 1411 bzero(rup, sizeof(*rup)); 1412 if (up != NULL) { 1413 bcopy(up->id, rup->id, sizeof(rup->id)); 1414 rup->creatorid = up->creatorid; 1415 } 1416 1417 if (h->count == sc->sc_maxcount) 1418 ret = pfsync_sendout(sc); 1419 1420 return (ret); 1421 } 1422 1423 int 1424 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1425 { 1426 struct pfsync_softc *sc = pfsyncif; 1427 struct pfsync_state_clr *cp; 1428 int s, ret; 1429 1430 if (sc == NULL) 1431 return (0); 1432 1433 s = splnet(); 1434 if (sc->sc_mbuf != NULL) 1435 pfsync_sendout(sc); 1436 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1437 (void *)&sc->sc_statep.c)) == NULL) { 1438 splx(s); 1439 return (ENOMEM); 1440 } 1441 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1442 cp = sc->sc_statep.c; 1443 cp->creatorid = creatorid; 1444 if (ifname != NULL) 1445 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1446 1447 ret = (pfsync_sendout(sc)); 1448 splx(s); 1449 return (ret); 1450 } 1451 1452 void 1453 pfsync_timeout(void *v) 1454 { 1455 struct pfsync_softc *sc = v; 1456 int s; 1457 1458 s = splnet(); 1459 pfsync_sendout(sc); 1460 splx(s); 1461 } 1462 1463 void 1464 pfsync_tdb_timeout(void *v) 1465 { 1466 struct pfsync_softc *sc = v; 1467 int s; 1468 1469 s = splnet(); 1470 pfsync_tdb_sendout(sc); 1471 splx(s); 1472 } 1473 1474 /* This must be called in splnet() */ 1475 void 1476 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1477 { 1478 struct pfsync_state_bus *bus; 1479 1480 if (sc->sc_mbuf != NULL) 1481 pfsync_sendout(sc); 1482 1483 if (pfsync_sync_ok && 1484 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1485 (void *)&sc->sc_statep.b)) != NULL) { 1486 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1487 bus = sc->sc_statep.b; 1488 bus->creatorid = pf_status.hostid; 1489 bus->status = status; 1490 bus->endtime = htonl(time_uptime - sc->sc_ureq_received); 1491 pfsync_sendout(sc); 1492 } 1493 } 1494 1495 void 1496 pfsync_bulk_update(void *v) 1497 { 1498 struct pfsync_softc *sc = v; 1499 int s, i = 0; 1500 struct pf_state *state; 1501 1502 s = splnet(); 1503 if (sc->sc_mbuf != NULL) 1504 pfsync_sendout(sc); 1505 1506 /* 1507 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1508 * been sent since the latest request was made. 1509 */ 1510 state = sc->sc_bulk_send_next; 1511 if (state) 1512 do { 1513 /* send state update if syncable and not already sent */ 1514 if (!state->sync_flags 1515 && state->timeout < PFTM_MAX 1516 && state->pfsync_time <= sc->sc_ureq_received) { 1517 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1518 i++; 1519 } 1520 1521 /* figure next state to send */ 1522 state = TAILQ_NEXT(state, entry_list); 1523 1524 /* wrap to start of list if we hit the end */ 1525 if (!state) 1526 state = TAILQ_FIRST(&state_list); 1527 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1528 state != sc->sc_bulk_terminator); 1529 1530 if (!state || state == sc->sc_bulk_terminator) { 1531 /* we're done */ 1532 pfsync_send_bus(sc, PFSYNC_BUS_END); 1533 sc->sc_ureq_received = 0; 1534 sc->sc_bulk_send_next = NULL; 1535 sc->sc_bulk_terminator = NULL; 1536 timeout_del(&sc->sc_bulk_tmo); 1537 if (pf_status.debug >= PF_DEBUG_MISC) 1538 printf("pfsync: bulk update complete\n"); 1539 } else { 1540 /* look again for more in a bit */ 1541 timeout_add(&sc->sc_bulk_tmo, 1); 1542 sc->sc_bulk_send_next = state; 1543 } 1544 if (sc->sc_mbuf != NULL) 1545 pfsync_sendout(sc); 1546 splx(s); 1547 } 1548 1549 void 1550 pfsync_bulkfail(void *v) 1551 { 1552 struct pfsync_softc *sc = v; 1553 int s, error; 1554 1555 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1556 /* Try again in a bit */ 1557 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 1558 s = splnet(); 1559 error = pfsync_request_update(NULL, NULL); 1560 if (error == ENOMEM) { 1561 if (pf_status.debug >= PF_DEBUG_MISC) 1562 printf("pfsync: cannot allocate mbufs for " 1563 "bulk update\n"); 1564 } else 1565 pfsync_sendout(sc); 1566 splx(s); 1567 } else { 1568 /* Pretend like the transfer was ok */ 1569 sc->sc_ureq_sent = 0; 1570 sc->sc_bulk_tries = 0; 1571 #if NCARP > 0 1572 if (!pfsync_sync_ok) 1573 carp_group_demote_adj(&sc->sc_if, -1); 1574 #endif 1575 pfsync_sync_ok = 1; 1576 if (pf_status.debug >= PF_DEBUG_MISC) 1577 printf("pfsync: failed to receive " 1578 "bulk update status\n"); 1579 timeout_del(&sc->sc_bulkfail_tmo); 1580 } 1581 } 1582 1583 /* This must be called in splnet() */ 1584 int 1585 pfsync_sendout(struct pfsync_softc *sc) 1586 { 1587 #if NBPFILTER > 0 1588 struct ifnet *ifp = &sc->sc_if; 1589 #endif 1590 struct mbuf *m; 1591 1592 timeout_del(&sc->sc_tmo); 1593 1594 if (sc->sc_mbuf == NULL) 1595 return (0); 1596 m = sc->sc_mbuf; 1597 sc->sc_mbuf = NULL; 1598 sc->sc_statep.s = NULL; 1599 1600 #if NBPFILTER > 0 1601 if (ifp->if_bpf) 1602 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1603 #endif 1604 1605 if (sc->sc_mbuf_net) { 1606 m_freem(m); 1607 m = sc->sc_mbuf_net; 1608 sc->sc_mbuf_net = NULL; 1609 sc->sc_statep_net.s = NULL; 1610 } 1611 1612 return pfsync_sendout_mbuf(sc, m); 1613 } 1614 1615 int 1616 pfsync_tdb_sendout(struct pfsync_softc *sc) 1617 { 1618 #if NBPFILTER > 0 1619 struct ifnet *ifp = &sc->sc_if; 1620 #endif 1621 struct mbuf *m; 1622 1623 timeout_del(&sc->sc_tdb_tmo); 1624 1625 if (sc->sc_mbuf_tdb == NULL) 1626 return (0); 1627 m = sc->sc_mbuf_tdb; 1628 sc->sc_mbuf_tdb = NULL; 1629 sc->sc_statep_tdb.t = NULL; 1630 1631 #if NBPFILTER > 0 1632 if (ifp->if_bpf) 1633 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1634 #endif 1635 1636 return pfsync_sendout_mbuf(sc, m); 1637 } 1638 1639 int 1640 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1641 { 1642 struct sockaddr sa; 1643 struct ip *ip; 1644 1645 if (sc->sc_sync_ifp || 1646 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1647 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 1648 if (m == NULL) { 1649 pfsyncstats.pfsyncs_onomem++; 1650 return (0); 1651 } 1652 ip = mtod(m, struct ip *); 1653 ip->ip_v = IPVERSION; 1654 ip->ip_hl = sizeof(*ip) >> 2; 1655 ip->ip_tos = IPTOS_LOWDELAY; 1656 ip->ip_len = htons(m->m_pkthdr.len); 1657 ip->ip_id = htons(ip_randomid()); 1658 ip->ip_off = htons(IP_DF); 1659 ip->ip_ttl = PFSYNC_DFLTTL; 1660 ip->ip_p = IPPROTO_PFSYNC; 1661 ip->ip_sum = 0; 1662 1663 bzero(&sa, sizeof(sa)); 1664 ip->ip_src.s_addr = INADDR_ANY; 1665 1666 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1667 m->m_flags |= M_MCAST; 1668 ip->ip_dst = sc->sc_sendaddr; 1669 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1670 1671 pfsyncstats.pfsyncs_opackets++; 1672 1673 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) 1674 pfsyncstats.pfsyncs_oerrors++; 1675 } else 1676 m_freem(m); 1677 1678 return (0); 1679 } 1680 1681 #ifdef IPSEC 1682 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1683 void 1684 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1685 { 1686 struct tdb *tdb; 1687 int s; 1688 1689 /* check for invalid values */ 1690 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1691 (pt->dst.sa.sa_family != AF_INET && 1692 pt->dst.sa.sa_family != AF_INET6)) 1693 goto bad; 1694 1695 s = spltdb(); 1696 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1697 if (tdb) { 1698 pt->rpl = ntohl(pt->rpl); 1699 pt->cur_bytes = betoh64(pt->cur_bytes); 1700 1701 /* Neither replay nor byte counter should ever decrease. */ 1702 if (pt->rpl < tdb->tdb_rpl || 1703 pt->cur_bytes < tdb->tdb_cur_bytes) { 1704 splx(s); 1705 goto bad; 1706 } 1707 1708 tdb->tdb_rpl = pt->rpl; 1709 tdb->tdb_cur_bytes = pt->cur_bytes; 1710 } 1711 splx(s); 1712 return; 1713 1714 bad: 1715 if (pf_status.debug >= PF_DEBUG_MISC) 1716 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1717 "invalid value\n"); 1718 pfsyncstats.pfsyncs_badstate++; 1719 return; 1720 } 1721 1722 /* One of our local tdbs have been updated, need to sync rpl with others */ 1723 int 1724 pfsync_update_tdb(struct tdb *tdb, int output) 1725 { 1726 struct ifnet *ifp = NULL; 1727 struct pfsync_softc *sc = pfsyncif; 1728 struct pfsync_header *h; 1729 struct pfsync_tdb *pt = NULL; 1730 int s, i, ret; 1731 1732 if (sc == NULL) 1733 return (0); 1734 1735 ifp = &sc->sc_if; 1736 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1737 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1738 /* Don't leave any stale pfsync packets hanging around. */ 1739 if (sc->sc_mbuf_tdb != NULL) { 1740 m_freem(sc->sc_mbuf_tdb); 1741 sc->sc_mbuf_tdb = NULL; 1742 sc->sc_statep_tdb.t = NULL; 1743 } 1744 return (0); 1745 } 1746 1747 s = splnet(); 1748 if (sc->sc_mbuf_tdb == NULL) { 1749 if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD, 1750 (void *)&sc->sc_statep_tdb.t)) == NULL) { 1751 splx(s); 1752 return (ENOMEM); 1753 } 1754 h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); 1755 } else { 1756 h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); 1757 if (h->action != PFSYNC_ACT_TDB_UPD) { 1758 /* 1759 * XXX will never happen as long as there's 1760 * only one "TDB action". 1761 */ 1762 pfsync_tdb_sendout(sc); 1763 sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, 1764 PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t); 1765 if (sc->sc_mbuf_tdb == NULL) { 1766 splx(s); 1767 return (ENOMEM); 1768 } 1769 h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); 1770 } else if (sc->sc_maxupdates) { 1771 /* 1772 * If it's an update, look in the packet to see if 1773 * we already have an update for the state. 1774 */ 1775 struct pfsync_tdb *u = 1776 (void *)((char *)h + PFSYNC_HDRLEN); 1777 1778 for (i = 0; !pt && i < h->count; i++) { 1779 if (tdb->tdb_spi == u->spi && 1780 tdb->tdb_sproto == u->sproto && 1781 !bcmp(&tdb->tdb_dst, &u->dst, 1782 SA_LEN(&u->dst.sa))) { 1783 pt = u; 1784 pt->updates++; 1785 } 1786 u++; 1787 } 1788 } 1789 } 1790 1791 if (pt == NULL) { 1792 /* not a "duplicate" update */ 1793 pt = sc->sc_statep_tdb.t++; 1794 sc->sc_mbuf_tdb->m_pkthdr.len = 1795 sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb); 1796 h->count++; 1797 bzero(pt, sizeof(*pt)); 1798 1799 pt->spi = tdb->tdb_spi; 1800 memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst); 1801 pt->sproto = tdb->tdb_sproto; 1802 } 1803 1804 /* 1805 * When a failover happens, the master's rpl is probably above 1806 * what we see here (we may be up to a second late), so 1807 * increase it a bit for outbound tdbs to manage most such 1808 * situations. 1809 * 1810 * For now, just add an offset that is likely to be larger 1811 * than the number of packets we can see in one second. The RFC 1812 * just says the next packet must have a higher seq value. 1813 * 1814 * XXX What is a good algorithm for this? We could use 1815 * a rate-determined increase, but to know it, we would have 1816 * to extend struct tdb. 1817 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 1818 * will soon be replaced anyway. For now, just don't handle 1819 * this edge case. 1820 */ 1821 #define RPL_INCR 16384 1822 pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0)); 1823 pt->cur_bytes = htobe64(tdb->tdb_cur_bytes); 1824 1825 if (h->count == sc->sc_maxcount || 1826 (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates))) 1827 ret = pfsync_tdb_sendout(sc); 1828 1829 splx(s); 1830 return (ret); 1831 } 1832 #endif 1833 1834 int 1835 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1836 size_t newlen) 1837 { 1838 /* All sysctl names at this level are terminal. */ 1839 if (namelen != 1) 1840 return (ENOTDIR); 1841 1842 switch (name[0]) { 1843 case PFSYNCCTL_STATS: 1844 if (newp != NULL) 1845 return (EPERM); 1846 return (sysctl_struct(oldp, oldlenp, newp, newlen, 1847 &pfsyncstats, sizeof(pfsyncstats))); 1848 default: 1849 return (ENOPROTOOPT); 1850 } 1851 } 1852