1 /* $NetBSD: if_pfsync.c,v 1.3 2009/09/14 10:36:49 degroote Exp $ */ 2 /* $OpenBSD: if_pfsync.c,v 1.83 2007/06/26 14:44:12 mcbride Exp $ */ 3 4 /* 5 * Copyright (c) 2002 Michael Shalayeff 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: if_pfsync.c,v 1.3 2009/09/14 10:36:49 degroote Exp $"); 32 33 #ifdef _KERNEL_OPT 34 #include "opt_inet.h" 35 #include "opt_inet6.h" 36 #endif 37 38 #include <sys/param.h> 39 #include <sys/proc.h> 40 #include <sys/systm.h> 41 #include <sys/time.h> 42 #include <sys/mbuf.h> 43 #include <sys/socket.h> 44 #include <sys/ioctl.h> 45 #include <sys/callout.h> 46 #include <sys/kernel.h> 47 48 #include <net/if.h> 49 #include <net/if_types.h> 50 #include <net/route.h> 51 #include <net/bpf.h> 52 #include <netinet/in.h> 53 #ifndef __NetBSD__ 54 #include <netinet/if_ether.h> 55 #else 56 #include <net/if_ether.h> 57 #endif /* __NetBSD__ */ 58 #include <netinet/tcp.h> 59 #include <netinet/tcp_seq.h> 60 61 #ifdef INET 62 #include <netinet/in_systm.h> 63 #include <netinet/in_var.h> 64 #include <netinet/ip.h> 65 #include <netinet/ip_var.h> 66 #endif 67 68 #ifdef INET6 69 #include <netinet6/nd6.h> 70 #endif /* INET6 */ 71 72 #include "carp.h" 73 #if NCARP > 0 74 extern int carp_suppress_preempt; 75 #endif 76 77 #include <net/pfvar.h> 78 #include <net/if_pfsync.h> 79 80 #ifdef __NetBSD__ 81 #include <sys/conf.h> 82 #include <sys/lwp.h> 83 #include <sys/kauth.h> 84 #include <sys/sysctl.h> 85 86 #include <net/net_stats.h> 87 88 percpu_t *pfsyncstat_percpu; 89 90 #define PFSYNC_STATINC(x) _NET_STATINC(pfsyncstat_percpu, x) 91 #endif /* __NetBSD__ */ 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_MINMTU \ 97 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 98 99 #ifdef PFSYNCDEBUG 100 #define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0) 101 int pfsyncdebug; 102 #else 103 #define DPRINTF(x) 104 #endif 105 106 extern int ifqmaxlen; /* XXX */ 107 108 struct pfsync_softc *pfsyncif = NULL; 109 110 void pfsyncattach(int); 111 int pfsync_clone_create(struct if_clone *, int); 112 int pfsync_clone_destroy(struct ifnet *); 113 void pfsync_setmtu(struct pfsync_softc *, int); 114 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 115 struct pf_state_peer *); 116 int pfsync_insert_net_state(struct pfsync_state *, u_int8_t); 117 void pfsync_update_net_tdb(struct pfsync_tdb *); 118 int pfsyncoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, 119 struct rtentry *); 120 int pfsyncioctl(struct ifnet *, u_long, void*); 121 void pfsyncstart(struct ifnet *); 122 123 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 124 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 125 int pfsync_sendout(struct pfsync_softc *); 126 int pfsync_tdb_sendout(struct pfsync_softc *); 127 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 128 void pfsync_timeout(void *); 129 void pfsync_tdb_timeout(void *); 130 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 131 void pfsync_bulk_update(void *); 132 void pfsync_bulkfail(void *); 133 134 int pfsync_sync_ok; 135 136 struct if_clone pfsync_cloner = 137 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 138 139 void 140 pfsyncattach(int npfsync) 141 { 142 if_clone_attach(&pfsync_cloner); 143 144 pfsyncstat_percpu = percpu_alloc(sizeof(uint64_t) * PFSYNC_NSTATS); 145 } 146 147 int 148 pfsync_clone_create(struct if_clone *ifc, int unit) 149 { 150 struct ifnet *ifp; 151 152 if (unit != 0) 153 return (EINVAL); 154 155 pfsync_sync_ok = 1; 156 if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL) 157 return (ENOMEM); 158 memset(pfsyncif, 0, sizeof(*pfsyncif)); 159 pfsyncif->sc_mbuf = NULL; 160 pfsyncif->sc_mbuf_net = NULL; 161 pfsyncif->sc_mbuf_tdb = NULL; 162 pfsyncif->sc_statep.s = NULL; 163 pfsyncif->sc_statep_net.s = NULL; 164 pfsyncif->sc_statep_tdb.t = NULL; 165 pfsyncif->sc_maxupdates = 128; 166 pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 167 pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; 168 pfsyncif->sc_ureq_received = 0; 169 pfsyncif->sc_ureq_sent = 0; 170 pfsyncif->sc_bulk_send_next = NULL; 171 pfsyncif->sc_bulk_terminator = NULL; 172 ifp = &pfsyncif->sc_if; 173 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 174 ifp->if_softc = pfsyncif; 175 ifp->if_ioctl = pfsyncioctl; 176 ifp->if_output = pfsyncoutput; 177 ifp->if_start = pfsyncstart; 178 ifp->if_type = IFT_PFSYNC; 179 ifp->if_snd.ifq_maxlen = ifqmaxlen; 180 ifp->if_hdrlen = PFSYNC_HDRLEN; 181 pfsync_setmtu(pfsyncif, ETHERMTU); 182 183 callout_init(&pfsyncif->sc_tmo, 0); 184 callout_init(&pfsyncif->sc_tdb_tmo, 0); 185 callout_init(&pfsyncif->sc_bulk_tmo, 0); 186 callout_init(&pfsyncif->sc_bulkfail_tmo, 0); 187 callout_setfunc(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif); 188 callout_setfunc(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif); 189 callout_setfunc(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif); 190 callout_setfunc(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif); 191 192 if_attach(ifp); 193 if_alloc_sadl(ifp); 194 195 #if NBPFILTER > 0 196 bpfattach(&pfsyncif->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); 197 #endif 198 199 return (0); 200 } 201 202 int 203 pfsync_clone_destroy(struct ifnet *ifp) 204 { 205 #if NBPFILTER > 0 206 bpfdetach(ifp); 207 #endif 208 if_detach(ifp); 209 free(pfsyncif, M_DEVBUF); 210 pfsyncif = NULL; 211 return (0); 212 } 213 214 /* 215 * Start output on the pfsync interface. 216 */ 217 void 218 pfsyncstart(struct ifnet *ifp) 219 { 220 struct mbuf *m; 221 int s; 222 223 for (;;) { 224 s = splnet(); 225 IF_DROP(&ifp->if_snd); 226 IF_DEQUEUE(&ifp->if_snd, m); 227 splx(s); 228 229 if (m == NULL) 230 return; 231 else 232 m_freem(m); 233 } 234 } 235 236 int 237 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 238 struct pf_state_peer *d) 239 { 240 if (s->scrub.scrub_flag && d->scrub == NULL) { 241 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 242 if (d->scrub == NULL) 243 return (ENOMEM); 244 memset(d->scrub, 0, sizeof(*d->scrub)); 245 } 246 247 return (0); 248 } 249 250 int 251 pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) 252 { 253 struct pf_state *st = NULL; 254 struct pf_state_key *sk = NULL; 255 struct pf_rule *r = NULL; 256 struct pfi_kif *kif; 257 258 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 259 printf("pfsync_insert_net_state: invalid creator id:" 260 " %08x\n", ntohl(sp->creatorid)); 261 return (EINVAL); 262 } 263 264 kif = pfi_kif_get(sp->ifname); 265 if (kif == NULL) { 266 if (pf_status.debug >= PF_DEBUG_MISC) 267 printf("pfsync_insert_net_state: " 268 "unknown interface: %s\n", sp->ifname); 269 /* skip this state */ 270 return (0); 271 } 272 273 /* 274 * If the ruleset checksums match, it's safe to associate the state 275 * with the rule of that number. 276 */ 277 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag && 278 ntohl(sp->rule) < 279 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 280 r = pf_main_ruleset.rules[ 281 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 282 else 283 r = &pf_default_rule; 284 285 if (!r->max_states || r->states < r->max_states) 286 st = pool_get(&pf_state_pl, PR_NOWAIT); 287 if (st == NULL) { 288 pfi_kif_unref(kif, PFI_KIF_REF_NONE); 289 return (ENOMEM); 290 } 291 memset(st, 0, sizeof(*st)); 292 293 if ((sk = pf_alloc_state_key(st)) == NULL) { 294 pool_put(&pf_state_pl, st); 295 pfi_kif_unref(kif, PFI_KIF_REF_NONE); 296 return (ENOMEM); 297 } 298 299 /* allocate memory for scrub info */ 300 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 301 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) { 302 pfi_kif_unref(kif, PFI_KIF_REF_NONE); 303 if (st->src.scrub) 304 pool_put(&pf_state_scrub_pl, st->src.scrub); 305 pool_put(&pf_state_pl, st); 306 pool_put(&pf_state_key_pl, sk); 307 return (ENOMEM); 308 } 309 310 st->rule.ptr = r; 311 /* XXX get pointers to nat_rule and anchor */ 312 313 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 314 r->states++; 315 316 /* fill in the rest of the state entry */ 317 pf_state_host_ntoh(&sp->lan, &sk->lan); 318 pf_state_host_ntoh(&sp->gwy, &sk->gwy); 319 pf_state_host_ntoh(&sp->ext, &sk->ext); 320 321 pf_state_peer_ntoh(&sp->src, &st->src); 322 pf_state_peer_ntoh(&sp->dst, &st->dst); 323 324 memcpy(&st->rt_addr, &sp->rt_addr, sizeof(st->rt_addr)); 325 st->creation = time_second - ntohl(sp->creation); 326 st->expire = ntohl(sp->expire) + time_second; 327 328 sk->af = sp->af; 329 sk->proto = sp->proto; 330 sk->direction = sp->direction; 331 st->log = sp->log; 332 st->timeout = sp->timeout; 333 st->allow_opts = sp->allow_opts; 334 335 memcpy(&st->id, sp->id, sizeof(st->id)); 336 st->creatorid = sp->creatorid; 337 st->sync_flags = PFSTATE_FROMSYNC; 338 339 if (pf_insert_state(kif, st)) { 340 pfi_kif_unref(kif, PFI_KIF_REF_NONE); 341 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 342 r->states--; 343 if (st->dst.scrub) 344 pool_put(&pf_state_scrub_pl, st->dst.scrub); 345 if (st->src.scrub) 346 pool_put(&pf_state_scrub_pl, st->src.scrub); 347 pool_put(&pf_state_pl, st); 348 return (EINVAL); 349 } 350 351 return (0); 352 } 353 354 void 355 pfsync_input(struct mbuf *m, ...) 356 { 357 struct ip *ip = mtod(m, struct ip *); 358 struct pfsync_header *ph; 359 struct pfsync_softc *sc = pfsyncif; 360 struct pf_state *st; 361 struct pf_state_key *sk; 362 struct pf_state_cmp id_key; 363 struct pfsync_state *sp; 364 struct pfsync_state_upd *up; 365 struct pfsync_state_del *dp; 366 struct pfsync_state_clr *cp; 367 struct pfsync_state_upd_req *rup; 368 struct pfsync_state_bus *bus; 369 #ifdef IPSEC 370 struct pfsync_tdb *pt; 371 #endif 372 struct in_addr src; 373 struct mbuf *mp; 374 int iplen, action, error, i, s, count, offp, sfail, stale = 0; 375 u_int8_t chksum_flag = 0; 376 377 PFSYNC_STATINC(PFSYNC_STAT_IPACKETS); 378 379 /* verify that we have a sync interface configured */ 380 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 381 goto done; 382 383 /* verify that the packet came in on the right interface */ 384 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 385 PFSYNC_STATINC(PFSYNC_STAT_BADIF); 386 goto done; 387 } 388 389 /* verify that the IP TTL is 255. */ 390 if (ip->ip_ttl != PFSYNC_DFLTTL) { 391 PFSYNC_STATINC(PFSYNC_STAT_BADTTL); 392 goto done; 393 } 394 395 iplen = ip->ip_hl << 2; 396 397 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 398 PFSYNC_STATINC(PFSYNC_STAT_HDROPS); 399 goto done; 400 } 401 402 if (iplen + sizeof(*ph) > m->m_len) { 403 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 404 PFSYNC_STATINC(PFSYNC_STAT_HDROPS); 405 goto done; 406 } 407 ip = mtod(m, struct ip *); 408 } 409 ph = (struct pfsync_header *)((char *)ip + iplen); 410 411 /* verify the version */ 412 if (ph->version != PFSYNC_VERSION) { 413 PFSYNC_STATINC(PFSYNC_STAT_BADVER); 414 goto done; 415 } 416 417 action = ph->action; 418 count = ph->count; 419 420 /* make sure it's a valid action code */ 421 if (action >= PFSYNC_ACT_MAX) { 422 PFSYNC_STATINC(PFSYNC_STAT_BADACT); 423 goto done; 424 } 425 426 /* Cheaper to grab this now than having to mess with mbufs later */ 427 src = ip->ip_src; 428 429 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 430 chksum_flag++; 431 432 switch (action) { 433 case PFSYNC_ACT_CLR: { 434 struct pf_state *nexts; 435 struct pf_state_key *nextsk; 436 struct pfi_kif *kif; 437 u_int32_t creatorid; 438 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 439 sizeof(*cp), &offp)) == NULL) { 440 PFSYNC_STATINC(PFSYNC_STAT_BADLEN); 441 return; 442 } 443 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 444 creatorid = cp->creatorid; 445 446 s = splsoftnet(); 447 if (cp->ifname[0] == '\0') { 448 for (st = RB_MIN(pf_state_tree_id, &tree_id); 449 st; st = nexts) { 450 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 451 if (st->creatorid == creatorid) { 452 st->sync_flags |= PFSTATE_FROMSYNC; 453 pf_unlink_state(st); 454 } 455 } 456 } else { 457 if ((kif = pfi_kif_get(cp->ifname)) == NULL) { 458 splx(s); 459 return; 460 } 461 for (sk = RB_MIN(pf_state_tree_lan_ext, 462 &pf_statetbl_lan_ext); sk; sk = nextsk) { 463 nextsk = RB_NEXT(pf_state_tree_lan_ext, 464 &pf_statetbl_lan_ext, sk); 465 TAILQ_FOREACH(st, &sk->states, next) { 466 if (st->creatorid == creatorid) { 467 st->sync_flags |= 468 PFSTATE_FROMSYNC; 469 pf_unlink_state(st); 470 } 471 } 472 } 473 } 474 splx(s); 475 476 break; 477 } 478 case PFSYNC_ACT_INS: 479 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 480 count * sizeof(*sp), &offp)) == NULL) { 481 PFSYNC_STATINC(PFSYNC_STAT_BADLEN); 482 return; 483 } 484 485 s = splsoftnet(); 486 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 487 i < count; i++, sp++) { 488 /* check for invalid values */ 489 if (sp->timeout >= PFTM_MAX || 490 sp->src.state > PF_TCPS_PROXY_DST || 491 sp->dst.state > PF_TCPS_PROXY_DST || 492 sp->direction > PF_OUT || 493 (sp->af != AF_INET && sp->af != AF_INET6)) { 494 if (pf_status.debug >= PF_DEBUG_MISC) 495 printf("pfsync_insert: PFSYNC_ACT_INS: " 496 "invalid value\n"); 497 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 498 continue; 499 } 500 501 if ((error = pfsync_insert_net_state(sp, 502 chksum_flag))) { 503 if (error == ENOMEM) { 504 splx(s); 505 goto done; 506 } 507 continue; 508 } 509 } 510 splx(s); 511 break; 512 case PFSYNC_ACT_UPD: 513 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 514 count * sizeof(*sp), &offp)) == NULL) { 515 PFSYNC_STATINC(PFSYNC_STAT_BADLEN); 516 return; 517 } 518 519 s = splsoftnet(); 520 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 521 i < count; i++, sp++) { 522 int flags = PFSYNC_FLAG_STALE; 523 524 /* check for invalid values */ 525 if (sp->timeout >= PFTM_MAX || 526 sp->src.state > PF_TCPS_PROXY_DST || 527 sp->dst.state > PF_TCPS_PROXY_DST) { 528 if (pf_status.debug >= PF_DEBUG_MISC) 529 printf("pfsync_insert: PFSYNC_ACT_UPD: " 530 "invalid value\n"); 531 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 532 continue; 533 } 534 535 memcpy(&id_key.id, sp->id, sizeof(id_key.id)); 536 id_key.creatorid = sp->creatorid; 537 538 st = pf_find_state_byid(&id_key); 539 if (st == NULL) { 540 /* insert the update */ 541 if (pfsync_insert_net_state(sp, chksum_flag)) { 542 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 543 } 544 continue; 545 } 546 sk = st->state_key; 547 sfail = 0; 548 if (sk->proto == IPPROTO_TCP) { 549 /* 550 * The state should never go backwards except 551 * for syn-proxy states. Neither should the 552 * sequence window slide backwards. 553 */ 554 if (st->src.state > sp->src.state && 555 (st->src.state < PF_TCPS_PROXY_SRC || 556 sp->src.state >= PF_TCPS_PROXY_SRC)) 557 sfail = 1; 558 else if (SEQ_GT(st->src.seqlo, 559 ntohl(sp->src.seqlo))) 560 sfail = 3; 561 else if (st->dst.state > sp->dst.state) { 562 /* There might still be useful 563 * information about the src state here, 564 * so import that part of the update, 565 * then "fail" so we send the updated 566 * state back to the peer who is missing 567 * our what we know. */ 568 pf_state_peer_ntoh(&sp->src, &st->src); 569 /* XXX do anything with timeouts? */ 570 sfail = 7; 571 flags = 0; 572 } else if (st->dst.state >= TCPS_SYN_SENT && 573 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 574 sfail = 4; 575 } else { 576 /* 577 * Non-TCP protocol state machine always go 578 * forwards 579 */ 580 if (st->src.state > sp->src.state) 581 sfail = 5; 582 else if (st->dst.state > sp->dst.state) 583 sfail = 6; 584 } 585 if (sfail) { 586 if (pf_status.debug >= PF_DEBUG_MISC) 587 printf("pfsync: %s stale update " 588 "(%d) id: %016" PRIu64 "" 589 "creatorid: %08x\n", 590 (sfail < 7 ? "ignoring" 591 : "partial"), sfail, 592 be64toh(st->id), 593 ntohl(st->creatorid)); 594 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 595 596 if (!(sp->sync_flags & PFSTATE_STALE)) { 597 /* we have a better state, send it */ 598 if (sc->sc_mbuf != NULL && !stale) 599 pfsync_sendout(sc); 600 stale++; 601 if (!st->sync_flags) 602 pfsync_pack_state( 603 PFSYNC_ACT_UPD, st, flags); 604 } 605 continue; 606 } 607 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 608 pf_state_peer_ntoh(&sp->src, &st->src); 609 pf_state_peer_ntoh(&sp->dst, &st->dst); 610 st->expire = ntohl(sp->expire) + time_second; 611 st->timeout = sp->timeout; 612 } 613 if (stale && sc->sc_mbuf != NULL) 614 pfsync_sendout(sc); 615 splx(s); 616 break; 617 /* 618 * It's not strictly necessary for us to support the "uncompressed" 619 * delete action, but it's relatively simple and maintains consistency. 620 */ 621 case PFSYNC_ACT_DEL: 622 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 623 count * sizeof(*sp), &offp)) == NULL) { 624 PFSYNC_STATINC(PFSYNC_STAT_BADLEN); 625 return; 626 } 627 628 s = splsoftnet(); 629 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 630 i < count; i++, sp++) { 631 memcpy(&id_key.id, sp->id, sizeof(id_key.id)); 632 id_key.creatorid = sp->creatorid; 633 634 st = pf_find_state_byid(&id_key); 635 if (st == NULL) { 636 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 637 continue; 638 } 639 st->sync_flags |= PFSTATE_FROMSYNC; 640 pf_unlink_state(st); 641 } 642 splx(s); 643 break; 644 case PFSYNC_ACT_UPD_C: { 645 int update_requested = 0; 646 647 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 648 count * sizeof(*up), &offp)) == NULL) { 649 PFSYNC_STATINC(PFSYNC_STAT_BADLEN); 650 return; 651 } 652 653 s = splsoftnet(); 654 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 655 i < count; i++, up++) { 656 /* check for invalid values */ 657 if (up->timeout >= PFTM_MAX || 658 up->src.state > PF_TCPS_PROXY_DST || 659 up->dst.state > PF_TCPS_PROXY_DST) { 660 if (pf_status.debug >= PF_DEBUG_MISC) 661 printf("pfsync_insert: " 662 "PFSYNC_ACT_UPD_C: " 663 "invalid value\n"); 664 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 665 continue; 666 } 667 668 memcpy(&id_key.id, up->id, sizeof(id_key.id)); 669 id_key.creatorid = up->creatorid; 670 671 st = pf_find_state_byid(&id_key); 672 if (st == NULL) { 673 /* We don't have this state. Ask for it. */ 674 error = pfsync_request_update(up, &src); 675 if (error == ENOMEM) { 676 splx(s); 677 goto done; 678 } 679 update_requested = 1; 680 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 681 continue; 682 } 683 sk = st->state_key; 684 sfail = 0; 685 if (sk->proto == IPPROTO_TCP) { 686 /* 687 * The state should never go backwards except 688 * for syn-proxy states. Neither should the 689 * sequence window slide backwards. 690 */ 691 if (st->src.state > up->src.state && 692 (st->src.state < PF_TCPS_PROXY_SRC || 693 up->src.state >= PF_TCPS_PROXY_SRC)) 694 sfail = 1; 695 else if (st->dst.state > up->dst.state) 696 sfail = 2; 697 else if (SEQ_GT(st->src.seqlo, 698 ntohl(up->src.seqlo))) 699 sfail = 3; 700 else if (st->dst.state >= TCPS_SYN_SENT && 701 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 702 sfail = 4; 703 } else { 704 /* 705 * Non-TCP protocol state machine always go 706 * forwards 707 */ 708 if (st->src.state > up->src.state) 709 sfail = 5; 710 else if (st->dst.state > up->dst.state) 711 sfail = 6; 712 } 713 if (sfail) { 714 if (pf_status.debug >= PF_DEBUG_MISC) 715 printf("pfsync: ignoring stale update " 716 "(%d) id: %016" PRIu64 "" 717 "creatorid: %08x\n", sfail, 718 be64toh(st->id), 719 ntohl(st->creatorid)); 720 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 721 722 /* we have a better state, send it out */ 723 if ((!stale || update_requested) && 724 sc->sc_mbuf != NULL) { 725 pfsync_sendout(sc); 726 update_requested = 0; 727 } 728 stale++; 729 if (!st->sync_flags) 730 pfsync_pack_state(PFSYNC_ACT_UPD, st, 731 PFSYNC_FLAG_STALE); 732 continue; 733 } 734 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 735 pf_state_peer_ntoh(&up->src, &st->src); 736 pf_state_peer_ntoh(&up->dst, &st->dst); 737 st->expire = ntohl(up->expire) + time_second; 738 st->timeout = up->timeout; 739 } 740 if ((update_requested || stale) && sc->sc_mbuf) 741 pfsync_sendout(sc); 742 splx(s); 743 break; 744 } 745 case PFSYNC_ACT_DEL_C: 746 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 747 count * sizeof(*dp), &offp)) == NULL) { 748 PFSYNC_STATINC(PFSYNC_STAT_BADLEN); 749 return; 750 } 751 752 s = splsoftnet(); 753 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 754 i < count; i++, dp++) { 755 memcpy(&id_key.id, dp->id, sizeof(id_key.id)); 756 id_key.creatorid = dp->creatorid; 757 758 st = pf_find_state_byid(&id_key); 759 if (st == NULL) { 760 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 761 continue; 762 } 763 st->sync_flags |= PFSTATE_FROMSYNC; 764 pf_unlink_state(st); 765 } 766 splx(s); 767 break; 768 case PFSYNC_ACT_INS_F: 769 case PFSYNC_ACT_DEL_F: 770 /* not implemented */ 771 break; 772 case PFSYNC_ACT_UREQ: 773 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 774 count * sizeof(*rup), &offp)) == NULL) { 775 PFSYNC_STATINC(PFSYNC_STAT_BADLEN); 776 return; 777 } 778 779 s = splsoftnet(); 780 if (sc->sc_mbuf != NULL) 781 pfsync_sendout(sc); 782 for (i = 0, 783 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 784 i < count; i++, rup++) { 785 memcpy(&id_key.id, rup->id, sizeof(id_key.id)); 786 id_key.creatorid = rup->creatorid; 787 788 if (id_key.id == 0 && id_key.creatorid == 0) { 789 sc->sc_ureq_received = time_uptime; 790 if (sc->sc_bulk_send_next == NULL) 791 sc->sc_bulk_send_next = 792 TAILQ_FIRST(&state_list); 793 sc->sc_bulk_terminator = sc->sc_bulk_send_next; 794 if (pf_status.debug >= PF_DEBUG_MISC) 795 printf("pfsync: received " 796 "bulk update request\n"); 797 pfsync_send_bus(sc, PFSYNC_BUS_START); 798 callout_schedule(&sc->sc_bulk_tmo, 1 * hz); 799 } else { 800 st = pf_find_state_byid(&id_key); 801 if (st == NULL) { 802 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 803 continue; 804 } 805 if (!st->sync_flags) 806 pfsync_pack_state(PFSYNC_ACT_UPD, 807 st, 0); 808 } 809 } 810 if (sc->sc_mbuf != NULL) 811 pfsync_sendout(sc); 812 splx(s); 813 break; 814 case PFSYNC_ACT_BUS: 815 /* If we're not waiting for a bulk update, who cares. */ 816 if (sc->sc_ureq_sent == 0) 817 break; 818 819 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 820 sizeof(*bus), &offp)) == NULL) { 821 PFSYNC_STATINC(PFSYNC_STAT_BADLEN); 822 return; 823 } 824 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 825 switch (bus->status) { 826 case PFSYNC_BUS_START: 827 callout_schedule(&sc->sc_bulkfail_tmo, 828 pf_pool_limits[PF_LIMIT_STATES].limit / 829 (PFSYNC_BULKPACKETS * sc->sc_maxcount)); 830 if (pf_status.debug >= PF_DEBUG_MISC) 831 printf("pfsync: received bulk " 832 "update start\n"); 833 break; 834 case PFSYNC_BUS_END: 835 if (time_uptime - ntohl(bus->endtime) >= 836 sc->sc_ureq_sent) { 837 /* that's it, we're happy */ 838 sc->sc_ureq_sent = 0; 839 sc->sc_bulk_tries = 0; 840 callout_stop(&sc->sc_bulkfail_tmo); 841 #if NCARP > 0 842 if (!pfsync_sync_ok) 843 carp_suppress_preempt--; 844 #endif 845 pfsync_sync_ok = 1; 846 if (pf_status.debug >= PF_DEBUG_MISC) 847 printf("pfsync: received valid " 848 "bulk update end\n"); 849 } else { 850 if (pf_status.debug >= PF_DEBUG_MISC) 851 printf("pfsync: received invalid " 852 "bulk update end: bad timestamp\n"); 853 } 854 break; 855 } 856 break; 857 #ifdef IPSEC 858 case PFSYNC_ACT_TDB_UPD: 859 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 860 count * sizeof(*pt), &offp)) == NULL) { 861 PFSYNC_STATINC(PFSYNC_STAT_BADLEN); 862 return; 863 } 864 s = splsoftnet(); 865 for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); 866 i < count; i++, pt++) 867 pfsync_update_net_tdb(pt); 868 splx(s); 869 break; 870 #endif 871 } 872 873 done: 874 if (m) 875 m_freem(m); 876 } 877 878 int 879 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 880 struct rtentry *rt) 881 { 882 m_freem(m); 883 return (0); 884 } 885 886 /* ARGSUSED */ 887 int 888 pfsyncioctl(struct ifnet *ifp, u_long cmd, void* data) 889 { 890 struct lwp *l = curlwp; 891 struct pfsync_softc *sc = ifp->if_softc; 892 struct ifreq *ifr = (struct ifreq *)data; 893 struct ip_moptions *imo = &sc->sc_imo; 894 struct pfsyncreq pfsyncr; 895 struct ifnet *sifp; 896 int s, error; 897 898 switch (cmd) { 899 case SIOCSIFADDR: 900 case SIOCAIFADDR: 901 case SIOCSIFDSTADDR: 902 case SIOCSIFFLAGS: 903 if (ifp->if_flags & IFF_UP) 904 ifp->if_flags |= IFF_RUNNING; 905 else 906 ifp->if_flags &= ~IFF_RUNNING; 907 break; 908 case SIOCSIFMTU: 909 if (ifr->ifr_mtu < PFSYNC_MINMTU) 910 return (EINVAL); 911 if (ifr->ifr_mtu > MCLBYTES) 912 ifr->ifr_mtu = MCLBYTES; 913 s = splnet(); 914 if (ifr->ifr_mtu < ifp->if_mtu) 915 pfsync_sendout(sc); 916 pfsync_setmtu(sc, ifr->ifr_mtu); 917 splx(s); 918 break; 919 case SIOCGETPFSYNC: 920 if ((error = kauth_authorize_network(l->l_cred, 921 KAUTH_NETWORK_INTERFACE, 922 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, ifp, (void *)cmd, 923 NULL)) != 0) 924 return (error); 925 memset(&pfsyncr, 0, sizeof(pfsyncr)); 926 if (sc->sc_sync_ifp) 927 strlcpy(pfsyncr.pfsyncr_syncdev, 928 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 929 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 930 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 931 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 932 return (error); 933 break; 934 case SIOCSETPFSYNC: 935 if ((error = kauth_authorize_network(l->l_cred, 936 KAUTH_NETWORK_INTERFACE, 937 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 938 NULL)) != 0) 939 return (error); 940 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 941 return (error); 942 943 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 944 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 945 else 946 sc->sc_sync_peer.s_addr = 947 pfsyncr.pfsyncr_syncpeer.s_addr; 948 949 if (pfsyncr.pfsyncr_maxupdates > 255) 950 return (EINVAL); 951 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 952 953 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 954 sc->sc_sync_ifp = NULL; 955 if (sc->sc_mbuf_net != NULL) { 956 /* Don't keep stale pfsync packets around. */ 957 s = splnet(); 958 m_freem(sc->sc_mbuf_net); 959 sc->sc_mbuf_net = NULL; 960 sc->sc_statep_net.s = NULL; 961 splx(s); 962 } 963 if (imo->imo_num_memberships > 0) { 964 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 965 imo->imo_multicast_ifp = NULL; 966 } 967 break; 968 } 969 970 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) 971 return (EINVAL); 972 973 s = splnet(); 974 if (sifp->if_mtu < sc->sc_if.if_mtu || 975 (sc->sc_sync_ifp != NULL && 976 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 977 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 978 pfsync_sendout(sc); 979 sc->sc_sync_ifp = sifp; 980 981 pfsync_setmtu(sc, sc->sc_if.if_mtu); 982 983 if (imo->imo_num_memberships > 0) { 984 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 985 imo->imo_multicast_ifp = NULL; 986 } 987 988 if (sc->sc_sync_ifp && 989 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 990 struct in_addr addr; 991 992 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 993 sc->sc_sync_ifp = NULL; 994 splx(s); 995 return (EADDRNOTAVAIL); 996 } 997 998 addr.s_addr = INADDR_PFSYNC_GROUP; 999 1000 if ((imo->imo_membership[0] = 1001 in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { 1002 sc->sc_sync_ifp = NULL; 1003 splx(s); 1004 return (ENOBUFS); 1005 } 1006 imo->imo_num_memberships++; 1007 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1008 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1009 imo->imo_multicast_loop = 0; 1010 } 1011 1012 if (sc->sc_sync_ifp || 1013 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1014 /* Request a full state table update. */ 1015 sc->sc_ureq_sent = time_uptime; 1016 #if NCARP > 0 1017 if (pfsync_sync_ok) 1018 carp_suppress_preempt ++; 1019 #endif 1020 pfsync_sync_ok = 0; 1021 if (pf_status.debug >= PF_DEBUG_MISC) 1022 printf("pfsync: requesting bulk update\n"); 1023 callout_schedule(&sc->sc_bulkfail_tmo, 5 * hz); 1024 error = pfsync_request_update(NULL, NULL); 1025 if (error == ENOMEM) { 1026 splx(s); 1027 return (ENOMEM); 1028 } 1029 pfsync_sendout(sc); 1030 } 1031 splx(s); 1032 1033 break; 1034 1035 default: 1036 return ifioctl_common(ifp, cmd, data); 1037 } 1038 1039 return (0); 1040 } 1041 1042 void 1043 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1044 { 1045 int mtu; 1046 1047 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1048 mtu = sc->sc_sync_ifp->if_mtu; 1049 else 1050 mtu = mtu_req; 1051 1052 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1053 sizeof(struct pfsync_state); 1054 if (sc->sc_maxcount > 254) 1055 sc->sc_maxcount = 254; 1056 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1057 sc->sc_maxcount * sizeof(struct pfsync_state); 1058 } 1059 1060 struct mbuf * 1061 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1062 { 1063 struct pfsync_header *h; 1064 struct mbuf *m; 1065 int len; 1066 1067 MGETHDR(m, M_DONTWAIT, MT_DATA); 1068 if (m == NULL) { 1069 sc->sc_if.if_oerrors++; 1070 return (NULL); 1071 } 1072 1073 switch (action) { 1074 case PFSYNC_ACT_CLR: 1075 len = sizeof(struct pfsync_header) + 1076 sizeof(struct pfsync_state_clr); 1077 break; 1078 case PFSYNC_ACT_UPD_C: 1079 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1080 sizeof(struct pfsync_header); 1081 break; 1082 case PFSYNC_ACT_DEL_C: 1083 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1084 sizeof(struct pfsync_header); 1085 break; 1086 case PFSYNC_ACT_UREQ: 1087 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1088 sizeof(struct pfsync_header); 1089 break; 1090 case PFSYNC_ACT_BUS: 1091 len = sizeof(struct pfsync_header) + 1092 sizeof(struct pfsync_state_bus); 1093 break; 1094 case PFSYNC_ACT_TDB_UPD: 1095 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1096 sizeof(struct pfsync_header); 1097 break; 1098 default: 1099 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1100 sizeof(struct pfsync_header); 1101 break; 1102 } 1103 1104 if (len > MHLEN) { 1105 MCLGET(m, M_DONTWAIT); 1106 if ((m->m_flags & M_EXT) == 0) { 1107 m_free(m); 1108 sc->sc_if.if_oerrors++; 1109 return (NULL); 1110 } 1111 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); 1112 } else 1113 MH_ALIGN(m, len); 1114 1115 m->m_pkthdr.rcvif = NULL; 1116 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1117 h = mtod(m, struct pfsync_header *); 1118 h->version = PFSYNC_VERSION; 1119 h->af = 0; 1120 h->count = 0; 1121 h->action = action; 1122 if (action != PFSYNC_ACT_TDB_UPD) 1123 memcpy(&h->pf_chksum, &pf_status.pf_chksum, 1124 PF_MD5_DIGEST_LENGTH); 1125 1126 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1127 if (action == PFSYNC_ACT_TDB_UPD) 1128 callout_schedule(&sc->sc_tdb_tmo, hz); 1129 else 1130 callout_schedule(&sc->sc_tmo, hz); 1131 return (m); 1132 } 1133 1134 int 1135 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1136 { 1137 struct ifnet *ifp = NULL; 1138 struct pfsync_softc *sc = pfsyncif; 1139 struct pfsync_header *h, *h_net; 1140 struct pfsync_state *sp = NULL; 1141 struct pfsync_state_upd *up = NULL; 1142 struct pfsync_state_del *dp = NULL; 1143 struct pf_state_key *sk = st->state_key; 1144 struct pf_rule *r; 1145 u_long secs; 1146 int s, ret = 0; 1147 u_int8_t i = 255, newaction = 0; 1148 1149 if (sc == NULL) 1150 return (0); 1151 ifp = &sc->sc_if; 1152 1153 /* 1154 * If a packet falls in the forest and there's nobody around to 1155 * hear, does it make a sound? 1156 */ 1157 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1158 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1159 /* Don't leave any stale pfsync packets hanging around. */ 1160 if (sc->sc_mbuf != NULL) { 1161 m_freem(sc->sc_mbuf); 1162 sc->sc_mbuf = NULL; 1163 sc->sc_statep.s = NULL; 1164 } 1165 return (0); 1166 } 1167 1168 if (action >= PFSYNC_ACT_MAX) 1169 return (EINVAL); 1170 1171 s = splnet(); 1172 if (sc->sc_mbuf == NULL) { 1173 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1174 (void *)&sc->sc_statep.s)) == NULL) { 1175 splx(s); 1176 return (ENOMEM); 1177 } 1178 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1179 } else { 1180 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1181 if (h->action != action) { 1182 pfsync_sendout(sc); 1183 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1184 (void *)&sc->sc_statep.s)) == NULL) { 1185 splx(s); 1186 return (ENOMEM); 1187 } 1188 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1189 } else { 1190 /* 1191 * If it's an update, look in the packet to see if 1192 * we already have an update for the state. 1193 */ 1194 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1195 struct pfsync_state *usp = 1196 (void *)((char *)h + PFSYNC_HDRLEN); 1197 1198 for (i = 0; i < h->count; i++) { 1199 if (!memcmp(usp->id, &st->id, 1200 PFSYNC_ID_LEN) && 1201 usp->creatorid == st->creatorid) { 1202 sp = usp; 1203 sp->updates++; 1204 break; 1205 } 1206 usp++; 1207 } 1208 } 1209 } 1210 } 1211 1212 secs = time_second; 1213 1214 st->pfsync_time = time_uptime; 1215 1216 if (sp == NULL) { 1217 /* not a "duplicate" update */ 1218 i = 255; 1219 sp = sc->sc_statep.s++; 1220 sc->sc_mbuf->m_pkthdr.len = 1221 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1222 h->count++; 1223 memset(sp, 0, sizeof(*sp)); 1224 1225 memcpy(sp->id, &st->id, sizeof(sp->id)); 1226 sp->creatorid = st->creatorid; 1227 1228 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1229 pf_state_host_hton(&sk->lan, &sp->lan); 1230 pf_state_host_hton(&sk->gwy, &sp->gwy); 1231 pf_state_host_hton(&sk->ext, &sp->ext); 1232 1233 memcpy(&sp->rt_addr, &st->rt_addr, sizeof(sp->rt_addr)); 1234 1235 sp->creation = htonl(secs - st->creation); 1236 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1237 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1238 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1239 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1240 if ((r = st->rule.ptr) == NULL) 1241 sp->rule = htonl(-1); 1242 else 1243 sp->rule = htonl(r->nr); 1244 if ((r = st->anchor.ptr) == NULL) 1245 sp->anchor = htonl(-1); 1246 else 1247 sp->anchor = htonl(r->nr); 1248 sp->af = sk->af; 1249 sp->proto = sk->proto; 1250 sp->direction = sk->direction; 1251 sp->log = st->log; 1252 sp->allow_opts = st->allow_opts; 1253 sp->timeout = st->timeout; 1254 1255 if (flags & PFSYNC_FLAG_STALE) 1256 sp->sync_flags |= PFSTATE_STALE; 1257 } 1258 1259 pf_state_peer_hton(&st->src, &sp->src); 1260 pf_state_peer_hton(&st->dst, &sp->dst); 1261 1262 if (st->expire <= secs) 1263 sp->expire = htonl(0); 1264 else 1265 sp->expire = htonl(st->expire - secs); 1266 1267 /* do we need to build "compressed" actions for network transfer? */ 1268 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1269 switch (action) { 1270 case PFSYNC_ACT_UPD: 1271 newaction = PFSYNC_ACT_UPD_C; 1272 break; 1273 case PFSYNC_ACT_DEL: 1274 newaction = PFSYNC_ACT_DEL_C; 1275 break; 1276 default: 1277 /* by default we just send the uncompressed states */ 1278 break; 1279 } 1280 } 1281 1282 if (newaction) { 1283 if (sc->sc_mbuf_net == NULL) { 1284 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1285 (void *)&sc->sc_statep_net.s)) == NULL) { 1286 splx(s); 1287 return (ENOMEM); 1288 } 1289 } 1290 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1291 1292 switch (newaction) { 1293 case PFSYNC_ACT_UPD_C: 1294 if (i != 255) { 1295 up = (void *)((char *)h_net + 1296 PFSYNC_HDRLEN + (i * sizeof(*up))); 1297 up->updates++; 1298 } else { 1299 h_net->count++; 1300 sc->sc_mbuf_net->m_pkthdr.len = 1301 sc->sc_mbuf_net->m_len += sizeof(*up); 1302 up = sc->sc_statep_net.u++; 1303 1304 memset(up, 0, sizeof(*up)); 1305 memcpy(up->id, &st->id, sizeof(up->id)); 1306 up->creatorid = st->creatorid; 1307 } 1308 up->timeout = st->timeout; 1309 up->expire = sp->expire; 1310 up->src = sp->src; 1311 up->dst = sp->dst; 1312 break; 1313 case PFSYNC_ACT_DEL_C: 1314 sc->sc_mbuf_net->m_pkthdr.len = 1315 sc->sc_mbuf_net->m_len += sizeof(*dp); 1316 dp = sc->sc_statep_net.d++; 1317 h_net->count++; 1318 1319 memset(dp, 0, sizeof(*dp)); 1320 memcpy(dp->id, &st->id, sizeof(dp->id)); 1321 dp->creatorid = st->creatorid; 1322 break; 1323 } 1324 } 1325 1326 if (h->count == sc->sc_maxcount || 1327 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1328 ret = pfsync_sendout(sc); 1329 1330 splx(s); 1331 return (ret); 1332 } 1333 1334 /* This must be called in splnet() */ 1335 int 1336 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1337 { 1338 struct ifnet *ifp = NULL; 1339 struct pfsync_header *h; 1340 struct pfsync_softc *sc = pfsyncif; 1341 struct pfsync_state_upd_req *rup; 1342 int ret = 0; 1343 1344 if (sc == NULL) 1345 return (0); 1346 1347 ifp = &sc->sc_if; 1348 if (sc->sc_mbuf == NULL) { 1349 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1350 (void *)&sc->sc_statep.s)) == NULL) 1351 return (ENOMEM); 1352 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1353 } else { 1354 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1355 if (h->action != PFSYNC_ACT_UREQ) { 1356 pfsync_sendout(sc); 1357 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1358 (void *)&sc->sc_statep.s)) == NULL) 1359 return (ENOMEM); 1360 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1361 } 1362 } 1363 1364 if (src != NULL) 1365 sc->sc_sendaddr = *src; 1366 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1367 h->count++; 1368 rup = sc->sc_statep.r++; 1369 memset(rup, 0, sizeof(*rup)); 1370 if (up != NULL) { 1371 memcpy(rup->id, up->id, sizeof(rup->id)); 1372 rup->creatorid = up->creatorid; 1373 } 1374 1375 if (h->count == sc->sc_maxcount) 1376 ret = pfsync_sendout(sc); 1377 1378 return (ret); 1379 } 1380 1381 int 1382 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1383 { 1384 struct ifnet *ifp = NULL; 1385 struct pfsync_softc *sc = pfsyncif; 1386 struct pfsync_state_clr *cp; 1387 int s, ret; 1388 1389 if (sc == NULL) 1390 return (0); 1391 1392 ifp = &sc->sc_if; 1393 s = splnet(); 1394 if (sc->sc_mbuf != NULL) 1395 pfsync_sendout(sc); 1396 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1397 (void *)&sc->sc_statep.c)) == NULL) { 1398 splx(s); 1399 return (ENOMEM); 1400 } 1401 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1402 cp = sc->sc_statep.c; 1403 cp->creatorid = creatorid; 1404 if (ifname != NULL) 1405 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1406 1407 ret = (pfsync_sendout(sc)); 1408 splx(s); 1409 return (ret); 1410 } 1411 1412 void 1413 pfsync_timeout(void *v) 1414 { 1415 struct pfsync_softc *sc = v; 1416 int s; 1417 1418 s = splnet(); 1419 pfsync_sendout(sc); 1420 splx(s); 1421 } 1422 1423 void 1424 pfsync_tdb_timeout(void *v) 1425 { 1426 struct pfsync_softc *sc = v; 1427 int s; 1428 1429 s = splnet(); 1430 pfsync_tdb_sendout(sc); 1431 splx(s); 1432 } 1433 1434 /* This must be called in splnet() */ 1435 void 1436 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1437 { 1438 struct pfsync_state_bus *bus; 1439 1440 if (sc->sc_mbuf != NULL) 1441 pfsync_sendout(sc); 1442 1443 if (pfsync_sync_ok && 1444 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1445 (void *)&sc->sc_statep.b)) != NULL) { 1446 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1447 bus = sc->sc_statep.b; 1448 bus->creatorid = pf_status.hostid; 1449 bus->status = status; 1450 bus->endtime = htonl(time_uptime - sc->sc_ureq_received); 1451 pfsync_sendout(sc); 1452 } 1453 } 1454 1455 void 1456 pfsync_bulk_update(void *v) 1457 { 1458 struct pfsync_softc *sc = v; 1459 int s, i = 0; 1460 struct pf_state *state; 1461 1462 s = splnet(); 1463 if (sc->sc_mbuf != NULL) 1464 pfsync_sendout(sc); 1465 1466 /* 1467 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1468 * been sent since the latest request was made. 1469 */ 1470 state = sc->sc_bulk_send_next; 1471 if (state) 1472 do { 1473 /* send state update if syncable and not already sent */ 1474 if (!state->sync_flags 1475 && state->timeout < PFTM_MAX 1476 && state->pfsync_time <= sc->sc_ureq_received) { 1477 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1478 i++; 1479 } 1480 1481 /* figure next state to send */ 1482 state = TAILQ_NEXT(state, entry_list); 1483 1484 /* wrap to start of list if we hit the end */ 1485 if (!state) 1486 state = TAILQ_FIRST(&state_list); 1487 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1488 state != sc->sc_bulk_terminator); 1489 1490 if (!state || state == sc->sc_bulk_terminator) { 1491 /* we're done */ 1492 pfsync_send_bus(sc, PFSYNC_BUS_END); 1493 sc->sc_ureq_received = 0; 1494 sc->sc_bulk_send_next = NULL; 1495 sc->sc_bulk_terminator = NULL; 1496 callout_stop(&sc->sc_bulk_tmo); 1497 if (pf_status.debug >= PF_DEBUG_MISC) 1498 printf("pfsync: bulk update complete\n"); 1499 } else { 1500 /* look again for more in a bit */ 1501 callout_schedule(&sc->sc_bulk_tmo, 1); 1502 sc->sc_bulk_send_next = state; 1503 } 1504 if (sc->sc_mbuf != NULL) 1505 pfsync_sendout(sc); 1506 splx(s); 1507 } 1508 1509 void 1510 pfsync_bulkfail(void *v) 1511 { 1512 struct pfsync_softc *sc = v; 1513 int s, error; 1514 1515 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1516 /* Try again in a bit */ 1517 callout_schedule(&sc->sc_bulkfail_tmo, 5 * hz); 1518 s = splnet(); 1519 error = pfsync_request_update(NULL, NULL); 1520 if (error == ENOMEM) { 1521 if (pf_status.debug >= PF_DEBUG_MISC) 1522 printf("pfsync: cannot allocate mbufs for " 1523 "bulk update\n"); 1524 } else 1525 pfsync_sendout(sc); 1526 splx(s); 1527 } else { 1528 /* Pretend like the transfer was ok */ 1529 sc->sc_ureq_sent = 0; 1530 sc->sc_bulk_tries = 0; 1531 #if NCARP > 0 1532 if (!pfsync_sync_ok) 1533 carp_suppress_preempt --; 1534 #endif 1535 pfsync_sync_ok = 1; 1536 if (pf_status.debug >= PF_DEBUG_MISC) 1537 printf("pfsync: failed to receive " 1538 "bulk update status\n"); 1539 callout_stop(&sc->sc_bulkfail_tmo); 1540 } 1541 } 1542 1543 /* This must be called in splnet() */ 1544 int 1545 pfsync_sendout(struct pfsync_softc *sc) 1546 { 1547 #if NBPFILTER > 0 1548 struct ifnet *ifp = &sc->sc_if; 1549 #endif 1550 struct mbuf *m; 1551 1552 callout_stop(&sc->sc_tmo); 1553 1554 if (sc->sc_mbuf == NULL) 1555 return (0); 1556 m = sc->sc_mbuf; 1557 sc->sc_mbuf = NULL; 1558 sc->sc_statep.s = NULL; 1559 1560 #if NBPFILTER > 0 1561 if (ifp->if_bpf) 1562 bpf_mtap(ifp->if_bpf, m); 1563 #endif 1564 1565 if (sc->sc_mbuf_net) { 1566 m_freem(m); 1567 m = sc->sc_mbuf_net; 1568 sc->sc_mbuf_net = NULL; 1569 sc->sc_statep_net.s = NULL; 1570 } 1571 1572 return pfsync_sendout_mbuf(sc, m); 1573 } 1574 1575 int 1576 pfsync_tdb_sendout(struct pfsync_softc *sc) 1577 { 1578 #if NBPFILTER > 0 1579 struct ifnet *ifp = &sc->sc_if; 1580 #endif 1581 struct mbuf *m; 1582 1583 callout_stop(&sc->sc_tdb_tmo); 1584 1585 if (sc->sc_mbuf_tdb == NULL) 1586 return (0); 1587 m = sc->sc_mbuf_tdb; 1588 sc->sc_mbuf_tdb = NULL; 1589 sc->sc_statep_tdb.t = NULL; 1590 1591 #if NBPFILTER > 0 1592 if (ifp->if_bpf) 1593 bpf_mtap(ifp->if_bpf, m); 1594 #endif 1595 1596 return pfsync_sendout_mbuf(sc, m); 1597 } 1598 1599 int 1600 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1601 { 1602 struct sockaddr sa; 1603 struct ip *ip; 1604 1605 if (sc->sc_sync_ifp || 1606 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1607 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 1608 if (m == NULL) { 1609 PFSYNC_STATINC(PFSYNC_STAT_ONOMEM); 1610 return (0); 1611 } 1612 ip = mtod(m, struct ip *); 1613 ip->ip_v = IPVERSION; 1614 ip->ip_hl = sizeof(*ip) >> 2; 1615 ip->ip_tos = IPTOS_LOWDELAY; 1616 ip->ip_len = htons(m->m_pkthdr.len); 1617 ip->ip_id = htons(ip_randomid(0)); 1618 ip->ip_off = htons(IP_DF); 1619 ip->ip_ttl = PFSYNC_DFLTTL; 1620 ip->ip_p = IPPROTO_PFSYNC; 1621 ip->ip_sum = 0; 1622 1623 memset(&sa, 0, sizeof(sa)); 1624 ip->ip_src.s_addr = INADDR_ANY; 1625 1626 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1627 m->m_flags |= M_MCAST; 1628 ip->ip_dst = sc->sc_sendaddr; 1629 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1630 1631 PFSYNC_STATINC(PFSYNC_STAT_OPACKETS); 1632 1633 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 1634 PFSYNC_STATINC(PFSYNC_STAT_OERRORS); 1635 } 1636 } else 1637 m_freem(m); 1638 1639 return (0); 1640 } 1641 1642 #ifdef IPSEC 1643 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1644 void 1645 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1646 { 1647 struct tdb *tdb; 1648 int s; 1649 1650 /* check for invalid values */ 1651 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1652 (pt->dst.sa.sa_family != AF_INET && 1653 pt->dst.sa.sa_family != AF_INET6)) 1654 goto bad; 1655 1656 s = spltdb(); 1657 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1658 if (tdb) { 1659 pt->rpl = ntohl(pt->rpl); 1660 pt->cur_bytes = betoh64(pt->cur_bytes); 1661 1662 /* Neither replay nor byte counter should ever decrease. */ 1663 if (pt->rpl < tdb->tdb_rpl || 1664 pt->cur_bytes < tdb->tdb_cur_bytes) { 1665 splx(s); 1666 goto bad; 1667 } 1668 1669 tdb->tdb_rpl = pt->rpl; 1670 tdb->tdb_cur_bytes = pt->cur_bytes; 1671 } 1672 splx(s); 1673 return; 1674 1675 bad: 1676 if (pf_status.debug >= PF_DEBUG_MISC) 1677 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1678 "invalid value\n"); 1679 PFSYNC_STATINC(PFSYNC_STAT_BADSTATE); 1680 return; 1681 } 1682 1683 /* One of our local tdbs have been updated, need to sync rpl with others */ 1684 int 1685 pfsync_update_tdb(struct tdb *tdb, int output) 1686 { 1687 struct ifnet *ifp = NULL; 1688 struct pfsync_softc *sc = pfsyncif; 1689 struct pfsync_header *h; 1690 struct pfsync_tdb *pt = NULL; 1691 int s, i, ret; 1692 1693 if (sc == NULL) 1694 return (0); 1695 1696 ifp = &sc->sc_if; 1697 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1698 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1699 /* Don't leave any stale pfsync packets hanging around. */ 1700 if (sc->sc_mbuf_tdb != NULL) { 1701 m_freem(sc->sc_mbuf_tdb); 1702 sc->sc_mbuf_tdb = NULL; 1703 sc->sc_statep_tdb.t = NULL; 1704 } 1705 return (0); 1706 } 1707 1708 s = splnet(); 1709 if (sc->sc_mbuf_tdb == NULL) { 1710 if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD, 1711 (void *)&sc->sc_statep_tdb.t)) == NULL) { 1712 splx(s); 1713 return (ENOMEM); 1714 } 1715 h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); 1716 } else { 1717 h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); 1718 if (h->action != PFSYNC_ACT_TDB_UPD) { 1719 /* 1720 * XXX will never happen as long as there's 1721 * only one "TDB action". 1722 */ 1723 pfsync_tdb_sendout(sc); 1724 sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, 1725 PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t); 1726 if (sc->sc_mbuf_tdb == NULL) { 1727 splx(s); 1728 return (ENOMEM); 1729 } 1730 h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); 1731 } else if (sc->sc_maxupdates) { 1732 /* 1733 * If it's an update, look in the packet to see if 1734 * we already have an update for the state. 1735 */ 1736 struct pfsync_tdb *u = 1737 (void *)((char *)h + PFSYNC_HDRLEN); 1738 1739 for (i = 0; !pt && i < h->count; i++) { 1740 if (tdb->tdb_spi == u->spi && 1741 tdb->tdb_sproto == u->sproto && 1742 !bcmp(&tdb->tdb_dst, &u->dst, 1743 SA_LEN(&u->dst.sa))) { 1744 pt = u; 1745 pt->updates++; 1746 } 1747 u++; 1748 } 1749 } 1750 } 1751 1752 if (pt == NULL) { 1753 /* not a "duplicate" update */ 1754 pt = sc->sc_statep_tdb.t++; 1755 sc->sc_mbuf_tdb->m_pkthdr.len = 1756 sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb); 1757 h->count++; 1758 memset(pt, 0, sizeof(*pt)); 1759 1760 pt->spi = tdb->tdb_spi; 1761 memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst); 1762 pt->sproto = tdb->tdb_sproto; 1763 } 1764 1765 /* 1766 * When a failover happens, the master's rpl is probably above 1767 * what we see here (we may be up to a second late), so 1768 * increase it a bit for outbound tdbs to manage most such 1769 * situations. 1770 * 1771 * For now, just add an offset that is likely to be larger 1772 * than the number of packets we can see in one second. The RFC 1773 * just says the next packet must have a higher seq value. 1774 * 1775 * XXX What is a good algorithm for this? We could use 1776 * a rate-determined increase, but to know it, we would have 1777 * to extend struct tdb. 1778 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 1779 * will soon be replaced anyway. For now, just don't handle 1780 * this edge case. 1781 */ 1782 #define RPL_INCR 16384 1783 pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0)); 1784 pt->cur_bytes = htobe64(tdb->tdb_cur_bytes); 1785 1786 if (h->count == sc->sc_maxcount || 1787 (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates))) 1788 ret = pfsync_tdb_sendout(sc); 1789 1790 splx(s); 1791 return (ret); 1792 } 1793 #endif 1794 1795 static int 1796 sysctl_net_inet_pfsync_stats(SYSCTLFN_ARGS) 1797 { 1798 1799 return (NETSTAT_SYSCTL(pfsyncstat_percpu, PFSYNC_NSTATS)); 1800 } 1801 1802 SYSCTL_SETUP(sysctl_net_inet_pfsync_setup, "sysctl net.inet.pfsync subtree setup") 1803 { 1804 1805 sysctl_createv(clog, 0, NULL, NULL, 1806 CTLFLAG_PERMANENT, 1807 CTLTYPE_NODE, "net", NULL, 1808 NULL, 0, NULL, 0, 1809 CTL_NET, CTL_EOL); 1810 sysctl_createv(clog, 0, NULL, NULL, 1811 CTLFLAG_PERMANENT, 1812 CTLTYPE_NODE, "inet", NULL, 1813 NULL, 0, NULL, 0, 1814 CTL_NET, PF_INET, CTL_EOL); 1815 sysctl_createv(clog, 0, NULL, NULL, 1816 CTLFLAG_PERMANENT, 1817 CTLTYPE_NODE, "pfsync", 1818 SYSCTL_DESCR("pfsync related settings"), 1819 NULL, 0, NULL, 0, 1820 CTL_NET, PF_INET, IPPROTO_PFSYNC, CTL_EOL); 1821 sysctl_createv(clog, 0, NULL, NULL, 1822 CTLFLAG_PERMANENT|CTLFLAG_READONLY, 1823 CTLTYPE_STRUCT, "stats", 1824 SYSCTL_DESCR("pfsync statistics"), 1825 sysctl_net_inet_pfsync_stats, 0, NULL, 0, 1826 CTL_NET, PF_INET, IPPROTO_PFSYNC, 1827 CTL_CREATE, CTL_EOL); 1828 } 1829