1 /* $OpenBSD: if_pfsync.c,v 1.231 2016/09/15 02:00:18 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 #include <netinet/in.h> 63 #include <netinet/if_ether.h> 64 #include <netinet/tcp.h> 65 #include <netinet/tcp_seq.h> 66 #include <netinet/tcp_fsm.h> 67 68 #include <netinet/in_var.h> 69 #include <netinet/ip.h> 70 #include <netinet/ip_var.h> 71 72 #ifdef IPSEC 73 #include <netinet/ip_ipsp.h> 74 #endif /* IPSEC */ 75 76 #ifdef INET6 77 #include <netinet6/in6_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #endif /* INET6 */ 82 83 #include "carp.h" 84 #if NCARP > 0 85 #include <netinet/ip_carp.h> 86 #endif 87 88 #define PF_DEBUGNAME "pfsync: " 89 #include <net/pfvar.h> 90 #include <netinet/ip_ipsp.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_MINPKT ( \ 97 sizeof(struct ip) + \ 98 sizeof(struct pfsync_header)) 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(caddr_t, int, int, int); 104 int pfsync_in_iack(caddr_t, int, int, int); 105 int pfsync_in_upd_c(caddr_t, int, int, int); 106 int pfsync_in_ureq(caddr_t, int, int, int); 107 int pfsync_in_del(caddr_t, int, int, int); 108 int pfsync_in_del_c(caddr_t, int, int, int); 109 int pfsync_in_bus(caddr_t, int, int, int); 110 int pfsync_in_tdb(caddr_t, int, int, int); 111 int pfsync_in_ins(caddr_t, int, int, int); 112 int pfsync_in_upd(caddr_t, int, int, int); 113 int pfsync_in_eof(caddr_t, int, int, int); 114 115 int pfsync_in_error(caddr_t, int, int, int); 116 117 struct { 118 int (*in)(caddr_t, int, int, int); 119 size_t len; 120 } pfsync_acts[] = { 121 /* PFSYNC_ACT_CLR */ 122 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 123 /* PFSYNC_ACT_OINS */ 124 { pfsync_in_error, 0 }, 125 /* PFSYNC_ACT_INS_ACK */ 126 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 127 /* PFSYNC_ACT_OUPD */ 128 { pfsync_in_error, 0 }, 129 /* PFSYNC_ACT_UPD_C */ 130 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 131 /* PFSYNC_ACT_UPD_REQ */ 132 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 133 /* PFSYNC_ACT_DEL */ 134 { pfsync_in_del, sizeof(struct pfsync_state) }, 135 /* PFSYNC_ACT_DEL_C */ 136 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 137 /* PFSYNC_ACT_INS_F */ 138 { pfsync_in_error, 0 }, 139 /* PFSYNC_ACT_DEL_F */ 140 { pfsync_in_error, 0 }, 141 /* PFSYNC_ACT_BUS */ 142 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 143 /* PFSYNC_ACT_OTDB */ 144 { pfsync_in_error, 0 }, 145 /* PFSYNC_ACT_EOF */ 146 { pfsync_in_error, 0 }, 147 /* PFSYNC_ACT_INS */ 148 { pfsync_in_ins, sizeof(struct pfsync_state) }, 149 /* PFSYNC_ACT_UPD */ 150 { pfsync_in_upd, sizeof(struct pfsync_state) }, 151 /* PFSYNC_ACT_TDB */ 152 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 153 }; 154 155 struct pfsync_q { 156 void (*write)(struct pf_state *, void *); 157 size_t len; 158 u_int8_t action; 159 }; 160 161 /* we have one of these for every PFSYNC_S_ */ 162 void pfsync_out_state(struct pf_state *, void *); 163 void pfsync_out_iack(struct pf_state *, void *); 164 void pfsync_out_upd_c(struct pf_state *, void *); 165 void pfsync_out_del(struct pf_state *, void *); 166 167 struct pfsync_q pfsync_qs[] = { 168 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 169 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 170 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 171 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 172 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 173 }; 174 175 void pfsync_q_ins(struct pf_state *, int); 176 void pfsync_q_del(struct pf_state *); 177 178 struct pfsync_upd_req_item { 179 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 180 struct pfsync_upd_req ur_msg; 181 }; 182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 183 184 struct pfsync_deferral { 185 TAILQ_ENTRY(pfsync_deferral) pd_entry; 186 struct pf_state *pd_st; 187 struct mbuf *pd_m; 188 struct timeout pd_tmo; 189 }; 190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 191 192 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 193 sizeof(struct pfsync_deferral)) 194 195 void pfsync_out_tdb(struct tdb *, void *); 196 197 struct pfsync_softc { 198 struct ifnet sc_if; 199 struct ifnet *sc_sync_if; 200 201 struct pool sc_pool; 202 203 struct ip_moptions sc_imo; 204 205 struct in_addr sc_sync_peer; 206 u_int8_t sc_maxupdates; 207 208 struct ip sc_template; 209 210 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 211 size_t sc_len; 212 213 struct pfsync_upd_reqs sc_upd_req_list; 214 215 int sc_initial_bulk; 216 int sc_link_demoted; 217 218 int sc_defer; 219 struct pfsync_deferrals sc_deferrals; 220 u_int sc_deferred; 221 222 void *sc_plus; 223 size_t sc_pluslen; 224 225 u_int32_t sc_ureq_sent; 226 int sc_bulk_tries; 227 struct timeout sc_bulkfail_tmo; 228 229 u_int32_t sc_ureq_received; 230 struct pf_state *sc_bulk_next; 231 struct pf_state *sc_bulk_last; 232 struct timeout sc_bulk_tmo; 233 234 TAILQ_HEAD(, tdb) sc_tdb_q; 235 236 void *sc_lhcookie; 237 238 struct timeout sc_tmo; 239 }; 240 241 struct pfsync_softc *pfsyncif = NULL; 242 struct pfsyncstats pfsyncstats; 243 244 void pfsyncattach(int); 245 int pfsync_clone_create(struct if_clone *, int); 246 int pfsync_clone_destroy(struct ifnet *); 247 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 248 struct pf_state_peer *); 249 void pfsync_update_net_tdb(struct pfsync_tdb *); 250 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 251 struct rtentry *); 252 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 253 void pfsyncstart(struct ifnet *); 254 void pfsync_syncdev_state(void *); 255 256 void pfsync_deferred(struct pf_state *, int); 257 void pfsync_undefer(struct pfsync_deferral *, int); 258 void pfsync_defer_tmo(void *); 259 260 void pfsync_cancel_full_update(struct pfsync_softc *); 261 void pfsync_request_full_update(struct pfsync_softc *); 262 void pfsync_request_update(u_int32_t, u_int64_t); 263 void pfsync_update_state_req(struct pf_state *); 264 265 void pfsync_drop(struct pfsync_softc *); 266 void pfsync_sendout(void); 267 void pfsync_send_plus(void *, size_t); 268 void pfsync_timeout(void *); 269 void pfsync_tdb_timeout(void *); 270 271 void pfsync_bulk_start(void); 272 void pfsync_bulk_status(u_int8_t); 273 void pfsync_bulk_update(void *); 274 void pfsync_bulk_fail(void *); 275 276 #define PFSYNC_MAX_BULKTRIES 12 277 int pfsync_sync_ok; 278 279 struct if_clone pfsync_cloner = 280 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 281 282 void 283 pfsyncattach(int npfsync) 284 { 285 if_clone_attach(&pfsync_cloner); 286 } 287 288 int 289 pfsync_clone_create(struct if_clone *ifc, int unit) 290 { 291 struct pfsync_softc *sc; 292 struct ifnet *ifp; 293 int q; 294 295 if (unit != 0) 296 return (EINVAL); 297 298 pfsync_sync_ok = 1; 299 300 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO); 301 302 for (q = 0; q < PFSYNC_S_COUNT; q++) 303 TAILQ_INIT(&sc->sc_qs[q]); 304 305 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync", 306 NULL); 307 TAILQ_INIT(&sc->sc_upd_req_list); 308 TAILQ_INIT(&sc->sc_deferrals); 309 sc->sc_deferred = 0; 310 311 TAILQ_INIT(&sc->sc_tdb_q); 312 313 sc->sc_len = PFSYNC_MINPKT; 314 sc->sc_maxupdates = 128; 315 316 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 317 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 318 M_WAITOK | M_ZERO); 319 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 320 321 ifp = &sc->sc_if; 322 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 323 ifp->if_softc = sc; 324 ifp->if_ioctl = pfsyncioctl; 325 ifp->if_output = pfsyncoutput; 326 ifp->if_start = pfsyncstart; 327 ifp->if_type = IFT_PFSYNC; 328 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 329 ifp->if_hdrlen = sizeof(struct pfsync_header); 330 ifp->if_mtu = ETHERMTU; 331 timeout_set(&sc->sc_tmo, pfsync_timeout, sc); 332 timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); 333 timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); 334 335 if_attach(ifp); 336 if_alloc_sadl(ifp); 337 338 #if NCARP > 0 339 if_addgroup(ifp, "carp"); 340 #endif 341 342 #if NBPFILTER > 0 343 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 344 #endif 345 346 pfsyncif = sc; 347 348 return (0); 349 } 350 351 int 352 pfsync_clone_destroy(struct ifnet *ifp) 353 { 354 struct pfsync_softc *sc = ifp->if_softc; 355 struct pfsync_deferral *pd; 356 int s; 357 358 s = splsoftnet(); 359 timeout_del(&sc->sc_bulkfail_tmo); 360 timeout_del(&sc->sc_bulk_tmo); 361 timeout_del(&sc->sc_tmo); 362 #if NCARP > 0 363 if (!pfsync_sync_ok) 364 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 365 if (sc->sc_link_demoted) 366 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 367 #endif 368 if (sc->sc_sync_if) 369 hook_disestablish( 370 sc->sc_sync_if->if_linkstatehooks, 371 sc->sc_lhcookie); 372 if_detach(ifp); 373 374 pfsync_drop(sc); 375 376 while (sc->sc_deferred > 0) { 377 pd = TAILQ_FIRST(&sc->sc_deferrals); 378 timeout_del(&pd->pd_tmo); 379 pfsync_undefer(pd, 0); 380 } 381 382 pool_destroy(&sc->sc_pool); 383 free(sc->sc_imo.imo_membership, M_IPMOPTS, 0); 384 free(sc, M_DEVBUF, sizeof(*sc)); 385 386 pfsyncif = NULL; 387 splx(s); 388 389 return (0); 390 } 391 392 /* 393 * Start output on the pfsync interface. 394 */ 395 void 396 pfsyncstart(struct ifnet *ifp) 397 { 398 IFQ_PURGE(&ifp->if_snd); 399 } 400 401 void 402 pfsync_syncdev_state(void *arg) 403 { 404 struct pfsync_softc *sc = arg; 405 406 if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP)) 407 return; 408 409 if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) { 410 sc->sc_if.if_flags &= ~IFF_RUNNING; 411 if (!sc->sc_link_demoted) { 412 #if NCARP > 0 413 carp_group_demote_adj(&sc->sc_if, 1, 414 "pfsync link state down"); 415 #endif 416 sc->sc_link_demoted = 1; 417 } 418 419 /* drop everything */ 420 timeout_del(&sc->sc_tmo); 421 pfsync_drop(sc); 422 423 pfsync_cancel_full_update(sc); 424 } else if (sc->sc_link_demoted) { 425 sc->sc_if.if_flags |= IFF_RUNNING; 426 427 pfsync_request_full_update(sc); 428 } 429 } 430 431 int 432 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 433 struct pf_state_peer *d) 434 { 435 if (s->scrub.scrub_flag && d->scrub == NULL) { 436 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 437 if (d->scrub == NULL) 438 return (ENOMEM); 439 } 440 441 return (0); 442 } 443 444 void 445 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 446 { 447 pf_state_export(sp, st); 448 } 449 450 int 451 pfsync_state_import(struct pfsync_state *sp, int flags) 452 { 453 struct pf_state *st = NULL; 454 struct pf_state_key *skw = NULL, *sks = NULL; 455 struct pf_rule *r = NULL; 456 struct pfi_kif *kif; 457 int pool_flags; 458 int error; 459 460 if (sp->creatorid == 0) { 461 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 462 "invalid creator id: %08x", ntohl(sp->creatorid)); 463 return (EINVAL); 464 } 465 466 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 467 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 468 "unknown interface: %s", sp->ifname); 469 if (flags & PFSYNC_SI_IOCTL) 470 return (EINVAL); 471 return (0); /* skip this state */ 472 } 473 474 if (sp->af == 0) 475 return (0); /* skip this state */ 476 477 /* 478 * If the ruleset checksums match or the state is coming from the ioctl, 479 * it's safe to associate the state with the rule of that number. 480 */ 481 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 482 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 483 pf_main_ruleset.rules.active.rcount) 484 r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)]; 485 else 486 r = &pf_default_rule; 487 488 if ((r->max_states && r->states_cur >= r->max_states)) 489 goto cleanup; 490 491 if (flags & PFSYNC_SI_IOCTL) 492 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 493 else 494 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 495 496 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 497 goto cleanup; 498 499 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 500 goto cleanup; 501 502 if ((sp->key[PF_SK_WIRE].af && 503 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 504 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 505 &sp->key[PF_SK_STACK].addr[0], sp->af) || 506 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 507 &sp->key[PF_SK_STACK].addr[1], sp->af) || 508 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 509 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 510 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 511 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 512 goto cleanup; 513 } else 514 sks = skw; 515 516 /* allocate memory for scrub info */ 517 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 518 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 519 goto cleanup; 520 521 /* copy to state key(s) */ 522 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 523 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 524 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 525 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 526 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 527 PF_REF_INIT(skw->refcnt); 528 skw->proto = sp->proto; 529 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 530 skw->af = sp->af; 531 if (sks != skw) { 532 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 533 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 534 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 535 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 536 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 537 PF_REF_INIT(sks->refcnt); 538 if (!(sks->af = sp->key[PF_SK_STACK].af)) 539 sks->af = sp->af; 540 if (sks->af != skw->af) { 541 switch (sp->proto) { 542 case IPPROTO_ICMP: 543 sks->proto = IPPROTO_ICMPV6; 544 break; 545 case IPPROTO_ICMPV6: 546 sks->proto = IPPROTO_ICMP; 547 break; 548 default: 549 sks->proto = sp->proto; 550 } 551 } else 552 sks->proto = sp->proto; 553 } 554 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 555 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 556 557 /* copy to state */ 558 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 559 st->creation = time_uptime - ntohl(sp->creation); 560 st->expire = time_uptime; 561 if (ntohl(sp->expire)) { 562 u_int32_t timeout; 563 564 timeout = r->timeout[sp->timeout]; 565 if (!timeout) 566 timeout = pf_default_rule.timeout[sp->timeout]; 567 568 /* sp->expire may have been adaptively scaled by export. */ 569 st->expire -= timeout - ntohl(sp->expire); 570 } 571 572 st->direction = sp->direction; 573 st->log = sp->log; 574 st->timeout = sp->timeout; 575 st->state_flags = ntohs(sp->state_flags); 576 st->max_mss = ntohs(sp->max_mss); 577 st->min_ttl = sp->min_ttl; 578 st->set_tos = sp->set_tos; 579 st->set_prio[0] = sp->set_prio[0]; 580 st->set_prio[1] = sp->set_prio[1]; 581 582 st->id = sp->id; 583 st->creatorid = sp->creatorid; 584 pf_state_peer_ntoh(&sp->src, &st->src); 585 pf_state_peer_ntoh(&sp->dst, &st->dst); 586 587 st->rule.ptr = r; 588 st->anchor.ptr = NULL; 589 st->rt_kif = NULL; 590 591 st->pfsync_time = time_uptime; 592 st->sync_state = PFSYNC_S_NONE; 593 594 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 595 r->states_cur++; 596 r->states_tot++; 597 598 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 599 SET(st->state_flags, PFSTATE_NOSYNC); 600 601 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 602 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 603 r->states_cur--; 604 error = EEXIST; 605 goto cleanup_state; 606 } 607 608 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 609 CLR(st->state_flags, PFSTATE_NOSYNC); 610 if (ISSET(st->state_flags, PFSTATE_ACK)) { 611 pfsync_q_ins(st, PFSYNC_S_IACK); 612 schednetisr(NETISR_PFSYNC); 613 } 614 } 615 CLR(st->state_flags, PFSTATE_ACK); 616 617 return (0); 618 619 cleanup: 620 error = ENOMEM; 621 if (skw == sks) 622 sks = NULL; 623 if (skw != NULL) 624 pool_put(&pf_state_key_pl, skw); 625 if (sks != NULL) 626 pool_put(&pf_state_key_pl, sks); 627 628 cleanup_state: /* pf_state_insert frees the state keys */ 629 if (st) { 630 if (st->dst.scrub) 631 pool_put(&pf_state_scrub_pl, st->dst.scrub); 632 if (st->src.scrub) 633 pool_put(&pf_state_scrub_pl, st->src.scrub); 634 pool_put(&pf_state_pl, st); 635 } 636 return (error); 637 } 638 639 void 640 pfsync_input(struct mbuf *m, ...) 641 { 642 struct pfsync_softc *sc = pfsyncif; 643 struct ip *ip = mtod(m, struct ip *); 644 struct mbuf *mp; 645 struct pfsync_header *ph; 646 struct pfsync_subheader subh; 647 648 int offset, offp, len, count, mlen, flags = 0; 649 650 pfsyncstats.pfsyncs_ipackets++; 651 652 /* verify that we have a sync interface configured */ 653 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 654 sc->sc_sync_if == NULL || !pf_status.running) 655 goto done; 656 657 /* verify that the packet came in on the right interface */ 658 if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) { 659 pfsyncstats.pfsyncs_badif++; 660 goto done; 661 } 662 663 sc->sc_if.if_ipackets++; 664 sc->sc_if.if_ibytes += m->m_pkthdr.len; 665 666 /* verify that the IP TTL is 255. */ 667 if (ip->ip_ttl != PFSYNC_DFLTTL) { 668 pfsyncstats.pfsyncs_badttl++; 669 goto done; 670 } 671 672 offset = ip->ip_hl << 2; 673 mp = m_pulldown(m, offset, sizeof(*ph), &offp); 674 if (mp == NULL) { 675 pfsyncstats.pfsyncs_hdrops++; 676 return; 677 } 678 ph = (struct pfsync_header *)(mp->m_data + offp); 679 680 /* verify the version */ 681 if (ph->version != PFSYNC_VERSION) { 682 pfsyncstats.pfsyncs_badver++; 683 goto done; 684 } 685 len = ntohs(ph->len) + offset; 686 if (m->m_pkthdr.len < len) { 687 pfsyncstats.pfsyncs_badlen++; 688 goto done; 689 } 690 691 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 692 flags = PFSYNC_SI_CKSUM; 693 694 offset += sizeof(*ph); 695 while (offset <= len - sizeof(subh)) { 696 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 697 offset += sizeof(subh); 698 699 mlen = subh.len << 2; 700 count = ntohs(subh.count); 701 702 if (subh.action >= PFSYNC_ACT_MAX || 703 subh.action >= nitems(pfsync_acts) || 704 mlen < pfsync_acts[subh.action].len) { 705 /* 706 * subheaders are always followed by at least one 707 * message, so if the peer is new 708 * enough to tell us how big its messages are then we 709 * know enough to skip them. 710 */ 711 if (count > 0 && mlen > 0) { 712 offset += count * mlen; 713 continue; 714 } 715 pfsyncstats.pfsyncs_badact++; 716 goto done; 717 } 718 719 mp = m_pulldown(m, offset, mlen * count, &offp); 720 if (mp == NULL) { 721 pfsyncstats.pfsyncs_badlen++; 722 return; 723 } 724 725 if (pfsync_acts[subh.action].in(mp->m_data + offp, 726 mlen, count, flags) != 0) 727 goto done; 728 729 offset += mlen * count; 730 } 731 732 done: 733 m_freem(m); 734 } 735 736 int 737 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 738 { 739 struct pfsync_clr *clr; 740 struct pf_state *st, *nexts; 741 struct pfi_kif *kif; 742 u_int32_t creatorid; 743 int i; 744 745 for (i = 0; i < count; i++) { 746 clr = (struct pfsync_clr *)buf + len * i; 747 kif = NULL; 748 creatorid = clr->creatorid; 749 if (strlen(clr->ifname) && 750 (kif = pfi_kif_find(clr->ifname)) == NULL) 751 continue; 752 753 for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { 754 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 755 if (st->creatorid == creatorid && 756 ((kif && st->kif == kif) || !kif)) { 757 SET(st->state_flags, PFSTATE_NOSYNC); 758 pf_remove_state(st); 759 } 760 } 761 } 762 763 return (0); 764 } 765 766 int 767 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 768 { 769 struct pfsync_state *sp; 770 sa_family_t af1, af2; 771 int i; 772 773 for (i = 0; i < count; i++) { 774 sp = (struct pfsync_state *)(buf + len * i); 775 af1 = sp->key[0].af; 776 af2 = sp->key[1].af; 777 778 /* check for invalid values */ 779 if (sp->timeout >= PFTM_MAX || 780 sp->src.state > PF_TCPS_PROXY_DST || 781 sp->dst.state > PF_TCPS_PROXY_DST || 782 sp->direction > PF_OUT || 783 (((af1 || af2) && 784 ((af1 != AF_INET && af1 != AF_INET6) || 785 (af2 != AF_INET && af2 != AF_INET6))) || 786 (sp->af != AF_INET && sp->af != AF_INET6))) { 787 DPFPRINTF(LOG_NOTICE, 788 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 789 pfsyncstats.pfsyncs_badval++; 790 continue; 791 } 792 793 if (pfsync_state_import(sp, flags) == ENOMEM) { 794 /* drop out, but process the rest of the actions */ 795 break; 796 } 797 } 798 799 return (0); 800 } 801 802 int 803 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 804 { 805 struct pfsync_ins_ack *ia; 806 struct pf_state_cmp id_key; 807 struct pf_state *st; 808 int i; 809 810 for (i = 0; i < count; i++) { 811 ia = (struct pfsync_ins_ack *)(buf + len * i); 812 813 id_key.id = ia->id; 814 id_key.creatorid = ia->creatorid; 815 816 st = pf_find_state_byid(&id_key); 817 if (st == NULL) 818 continue; 819 820 if (ISSET(st->state_flags, PFSTATE_ACK)) 821 pfsync_deferred(st, 0); 822 } 823 824 return (0); 825 } 826 827 int 828 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 829 struct pfsync_state_peer *dst) 830 { 831 int sync = 0; 832 833 /* 834 * The state should never go backwards except 835 * for syn-proxy states. Neither should the 836 * sequence window slide backwards. 837 */ 838 if ((st->src.state > src->state && 839 (st->src.state < PF_TCPS_PROXY_SRC || 840 src->state >= PF_TCPS_PROXY_SRC)) || 841 842 (st->src.state == src->state && 843 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 844 sync++; 845 else 846 pf_state_peer_ntoh(src, &st->src); 847 848 if ((st->dst.state > dst->state) || 849 850 (st->dst.state >= TCPS_SYN_SENT && 851 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 852 sync++; 853 else 854 pf_state_peer_ntoh(dst, &st->dst); 855 856 return (sync); 857 } 858 859 int 860 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 861 { 862 struct pfsync_state *sp; 863 struct pf_state_cmp id_key; 864 struct pf_state *st; 865 int sync; 866 867 int i; 868 869 for (i = 0; i < count; i++) { 870 sp = (struct pfsync_state *)(buf + len * i); 871 872 /* check for invalid values */ 873 if (sp->timeout >= PFTM_MAX || 874 sp->src.state > PF_TCPS_PROXY_DST || 875 sp->dst.state > PF_TCPS_PROXY_DST) { 876 DPFPRINTF(LOG_NOTICE, 877 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 878 pfsyncstats.pfsyncs_badval++; 879 continue; 880 } 881 882 id_key.id = sp->id; 883 id_key.creatorid = sp->creatorid; 884 885 st = pf_find_state_byid(&id_key); 886 if (st == NULL) { 887 /* insert the update */ 888 if (pfsync_state_import(sp, flags)) 889 pfsyncstats.pfsyncs_badstate++; 890 continue; 891 } 892 893 if (ISSET(st->state_flags, PFSTATE_ACK)) 894 pfsync_deferred(st, 1); 895 896 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 897 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 898 else { 899 sync = 0; 900 901 /* 902 * Non-TCP protocol state machine always go 903 * forwards 904 */ 905 if (st->src.state > sp->src.state) 906 sync++; 907 else 908 pf_state_peer_ntoh(&sp->src, &st->src); 909 910 if (st->dst.state > sp->dst.state) 911 sync++; 912 else 913 pf_state_peer_ntoh(&sp->dst, &st->dst); 914 } 915 916 if (sync < 2) { 917 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 918 pf_state_peer_ntoh(&sp->dst, &st->dst); 919 st->expire = time_uptime; 920 st->timeout = sp->timeout; 921 } 922 st->pfsync_time = time_uptime; 923 924 if (sync) { 925 pfsyncstats.pfsyncs_stale++; 926 927 pfsync_update_state(st); 928 schednetisr(NETISR_PFSYNC); 929 } 930 } 931 932 return (0); 933 } 934 935 int 936 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 937 { 938 struct pfsync_upd_c *up; 939 struct pf_state_cmp id_key; 940 struct pf_state *st; 941 942 int sync; 943 944 int i; 945 946 for (i = 0; i < count; i++) { 947 up = (struct pfsync_upd_c *)(buf + len * i); 948 949 /* check for invalid values */ 950 if (up->timeout >= PFTM_MAX || 951 up->src.state > PF_TCPS_PROXY_DST || 952 up->dst.state > PF_TCPS_PROXY_DST) { 953 DPFPRINTF(LOG_NOTICE, 954 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 955 pfsyncstats.pfsyncs_badval++; 956 continue; 957 } 958 959 id_key.id = up->id; 960 id_key.creatorid = up->creatorid; 961 962 st = pf_find_state_byid(&id_key); 963 if (st == NULL) { 964 /* We don't have this state. Ask for it. */ 965 pfsync_request_update(id_key.creatorid, id_key.id); 966 continue; 967 } 968 969 if (ISSET(st->state_flags, PFSTATE_ACK)) 970 pfsync_deferred(st, 1); 971 972 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 973 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 974 else { 975 sync = 0; 976 /* 977 * Non-TCP protocol state machine always go 978 * forwards 979 */ 980 if (st->src.state > up->src.state) 981 sync++; 982 else 983 pf_state_peer_ntoh(&up->src, &st->src); 984 985 if (st->dst.state > up->dst.state) 986 sync++; 987 else 988 pf_state_peer_ntoh(&up->dst, &st->dst); 989 } 990 if (sync < 2) { 991 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 992 pf_state_peer_ntoh(&up->dst, &st->dst); 993 st->expire = time_uptime; 994 st->timeout = up->timeout; 995 } 996 st->pfsync_time = time_uptime; 997 998 if (sync) { 999 pfsyncstats.pfsyncs_stale++; 1000 1001 pfsync_update_state(st); 1002 schednetisr(NETISR_PFSYNC); 1003 } 1004 } 1005 1006 return (0); 1007 } 1008 1009 int 1010 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1011 { 1012 struct pfsync_upd_req *ur; 1013 int i; 1014 1015 struct pf_state_cmp id_key; 1016 struct pf_state *st; 1017 1018 for (i = 0; i < count; i++) { 1019 ur = (struct pfsync_upd_req *)(buf + len * i); 1020 1021 id_key.id = ur->id; 1022 id_key.creatorid = ur->creatorid; 1023 1024 if (id_key.id == 0 && id_key.creatorid == 0) 1025 pfsync_bulk_start(); 1026 else { 1027 st = pf_find_state_byid(&id_key); 1028 if (st == NULL) { 1029 pfsyncstats.pfsyncs_badstate++; 1030 continue; 1031 } 1032 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1033 continue; 1034 1035 pfsync_update_state_req(st); 1036 } 1037 } 1038 1039 return (0); 1040 } 1041 1042 int 1043 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1044 { 1045 struct pfsync_state *sp; 1046 struct pf_state_cmp id_key; 1047 struct pf_state *st; 1048 int i; 1049 1050 for (i = 0; i < count; i++) { 1051 sp = (struct pfsync_state *)(buf + len * i); 1052 1053 id_key.id = sp->id; 1054 id_key.creatorid = sp->creatorid; 1055 1056 st = pf_find_state_byid(&id_key); 1057 if (st == NULL) { 1058 pfsyncstats.pfsyncs_badstate++; 1059 continue; 1060 } 1061 SET(st->state_flags, PFSTATE_NOSYNC); 1062 pf_remove_state(st); 1063 } 1064 1065 return (0); 1066 } 1067 1068 int 1069 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1070 { 1071 struct pfsync_del_c *sp; 1072 struct pf_state_cmp id_key; 1073 struct pf_state *st; 1074 int i; 1075 1076 for (i = 0; i < count; i++) { 1077 sp = (struct pfsync_del_c *)(buf + len * i); 1078 1079 id_key.id = sp->id; 1080 id_key.creatorid = sp->creatorid; 1081 1082 st = pf_find_state_byid(&id_key); 1083 if (st == NULL) { 1084 pfsyncstats.pfsyncs_badstate++; 1085 continue; 1086 } 1087 1088 SET(st->state_flags, PFSTATE_NOSYNC); 1089 pf_remove_state(st); 1090 } 1091 1092 return (0); 1093 } 1094 1095 int 1096 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1097 { 1098 struct pfsync_softc *sc = pfsyncif; 1099 struct pfsync_bus *bus; 1100 1101 /* If we're not waiting for a bulk update, who cares. */ 1102 if (sc->sc_ureq_sent == 0) 1103 return (0); 1104 1105 bus = (struct pfsync_bus *)buf; 1106 1107 switch (bus->status) { 1108 case PFSYNC_BUS_START: 1109 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1110 pf_pool_limits[PF_LIMIT_STATES].limit / 1111 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1112 sizeof(struct pfsync_state))); 1113 DPFPRINTF(LOG_INFO, "received bulk update start"); 1114 break; 1115 1116 case PFSYNC_BUS_END: 1117 if (time_uptime - ntohl(bus->endtime) >= 1118 sc->sc_ureq_sent) { 1119 /* that's it, we're happy */ 1120 sc->sc_ureq_sent = 0; 1121 sc->sc_bulk_tries = 0; 1122 timeout_del(&sc->sc_bulkfail_tmo); 1123 #if NCARP > 0 1124 if (!pfsync_sync_ok) 1125 carp_group_demote_adj(&sc->sc_if, -1, 1126 sc->sc_link_demoted ? 1127 "pfsync link state up" : 1128 "pfsync bulk done"); 1129 if (sc->sc_initial_bulk) { 1130 carp_group_demote_adj(&sc->sc_if, -32, 1131 "pfsync init"); 1132 sc->sc_initial_bulk = 0; 1133 } 1134 #endif 1135 pfsync_sync_ok = 1; 1136 sc->sc_link_demoted = 0; 1137 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1138 } else { 1139 DPFPRINTF(LOG_WARNING, "received invalid " 1140 "bulk update end: bad timestamp"); 1141 } 1142 break; 1143 } 1144 1145 return (0); 1146 } 1147 1148 int 1149 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1150 { 1151 #if defined(IPSEC) 1152 struct pfsync_tdb *tp; 1153 int i; 1154 1155 for (i = 0; i < count; i++) { 1156 tp = (struct pfsync_tdb *)(buf + len * i); 1157 pfsync_update_net_tdb(tp); 1158 } 1159 #endif 1160 1161 return (0); 1162 } 1163 1164 #if defined(IPSEC) 1165 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1166 void 1167 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1168 { 1169 struct tdb *tdb; 1170 int s; 1171 1172 /* check for invalid values */ 1173 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1174 (pt->dst.sa.sa_family != AF_INET && 1175 pt->dst.sa.sa_family != AF_INET6)) 1176 goto bad; 1177 1178 s = splsoftnet(); 1179 tdb = gettdb(ntohs(pt->rdomain), pt->spi, 1180 (union sockaddr_union *)&pt->dst, pt->sproto); 1181 if (tdb) { 1182 pt->rpl = betoh64(pt->rpl); 1183 pt->cur_bytes = betoh64(pt->cur_bytes); 1184 1185 /* Neither replay nor byte counter should ever decrease. */ 1186 if (pt->rpl < tdb->tdb_rpl || 1187 pt->cur_bytes < tdb->tdb_cur_bytes) { 1188 splx(s); 1189 goto bad; 1190 } 1191 1192 tdb->tdb_rpl = pt->rpl; 1193 tdb->tdb_cur_bytes = pt->cur_bytes; 1194 } 1195 splx(s); 1196 return; 1197 1198 bad: 1199 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1200 "invalid value"); 1201 pfsyncstats.pfsyncs_badstate++; 1202 return; 1203 } 1204 #endif 1205 1206 1207 int 1208 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1209 { 1210 if (len > 0 || count > 0) 1211 pfsyncstats.pfsyncs_badact++; 1212 1213 /* we're done. let the caller return */ 1214 return (1); 1215 } 1216 1217 int 1218 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1219 { 1220 pfsyncstats.pfsyncs_badact++; 1221 return (-1); 1222 } 1223 1224 int 1225 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1226 struct rtentry *rt) 1227 { 1228 m_freem(m); /* drop packet */ 1229 return (EAFNOSUPPORT); 1230 } 1231 1232 int 1233 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1234 { 1235 struct proc *p = curproc; 1236 struct pfsync_softc *sc = ifp->if_softc; 1237 struct ifreq *ifr = (struct ifreq *)data; 1238 struct ip_moptions *imo = &sc->sc_imo; 1239 struct pfsyncreq pfsyncr; 1240 struct ifnet *sifp; 1241 struct ip *ip; 1242 int s, error; 1243 1244 switch (cmd) { 1245 #if 0 1246 case SIOCSIFADDR: 1247 case SIOCAIFADDR: 1248 case SIOCSIFDSTADDR: 1249 #endif 1250 case SIOCSIFFLAGS: 1251 s = splnet(); 1252 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1253 (ifp->if_flags & IFF_UP)) { 1254 ifp->if_flags |= IFF_RUNNING; 1255 1256 #if NCARP > 0 1257 sc->sc_initial_bulk = 1; 1258 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1259 #endif 1260 1261 pfsync_request_full_update(sc); 1262 } 1263 if ((ifp->if_flags & IFF_RUNNING) && 1264 (ifp->if_flags & IFF_UP) == 0) { 1265 ifp->if_flags &= ~IFF_RUNNING; 1266 1267 /* drop everything */ 1268 timeout_del(&sc->sc_tmo); 1269 pfsync_drop(sc); 1270 1271 pfsync_cancel_full_update(sc); 1272 } 1273 splx(s); 1274 break; 1275 case SIOCSIFMTU: 1276 if (!sc->sc_sync_if || 1277 ifr->ifr_mtu <= PFSYNC_MINPKT || 1278 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1279 return (EINVAL); 1280 s = splnet(); 1281 if (ifr->ifr_mtu < ifp->if_mtu) 1282 pfsync_sendout(); 1283 ifp->if_mtu = ifr->ifr_mtu; 1284 splx(s); 1285 break; 1286 case SIOCGETPFSYNC: 1287 bzero(&pfsyncr, sizeof(pfsyncr)); 1288 if (sc->sc_sync_if) { 1289 strlcpy(pfsyncr.pfsyncr_syncdev, 1290 sc->sc_sync_if->if_xname, IFNAMSIZ); 1291 } 1292 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1293 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1294 pfsyncr.pfsyncr_defer = sc->sc_defer; 1295 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1296 1297 case SIOCSETPFSYNC: 1298 if ((error = suser(p, 0)) != 0) 1299 return (error); 1300 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1301 return (error); 1302 1303 s = splnet(); 1304 1305 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1306 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1307 else 1308 sc->sc_sync_peer.s_addr = 1309 pfsyncr.pfsyncr_syncpeer.s_addr; 1310 1311 if (pfsyncr.pfsyncr_maxupdates > 255) { 1312 splx(s); 1313 return (EINVAL); 1314 } 1315 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1316 1317 sc->sc_defer = pfsyncr.pfsyncr_defer; 1318 1319 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1320 if (sc->sc_sync_if) 1321 hook_disestablish( 1322 sc->sc_sync_if->if_linkstatehooks, 1323 sc->sc_lhcookie); 1324 sc->sc_sync_if = NULL; 1325 if (imo->imo_num_memberships > 0) { 1326 in_delmulti(imo->imo_membership[ 1327 --imo->imo_num_memberships]); 1328 imo->imo_ifidx = 0; 1329 } 1330 splx(s); 1331 break; 1332 } 1333 1334 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1335 splx(s); 1336 return (EINVAL); 1337 } 1338 1339 if (sifp->if_mtu < sc->sc_if.if_mtu || 1340 (sc->sc_sync_if != NULL && 1341 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1342 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1343 pfsync_sendout(); 1344 1345 if (sc->sc_sync_if) 1346 hook_disestablish( 1347 sc->sc_sync_if->if_linkstatehooks, 1348 sc->sc_lhcookie); 1349 sc->sc_sync_if = sifp; 1350 1351 if (imo->imo_num_memberships > 0) { 1352 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1353 imo->imo_ifidx = 0; 1354 } 1355 1356 if (sc->sc_sync_if && 1357 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1358 struct in_addr addr; 1359 1360 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1361 sc->sc_sync_if = NULL; 1362 splx(s); 1363 return (EADDRNOTAVAIL); 1364 } 1365 1366 addr.s_addr = INADDR_PFSYNC_GROUP; 1367 1368 if ((imo->imo_membership[0] = 1369 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1370 sc->sc_sync_if = NULL; 1371 splx(s); 1372 return (ENOBUFS); 1373 } 1374 imo->imo_num_memberships++; 1375 imo->imo_ifidx = sc->sc_sync_if->if_index; 1376 imo->imo_ttl = PFSYNC_DFLTTL; 1377 imo->imo_loop = 0; 1378 } 1379 1380 ip = &sc->sc_template; 1381 bzero(ip, sizeof(*ip)); 1382 ip->ip_v = IPVERSION; 1383 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1384 ip->ip_tos = IPTOS_LOWDELAY; 1385 /* len and id are set later */ 1386 ip->ip_off = htons(IP_DF); 1387 ip->ip_ttl = PFSYNC_DFLTTL; 1388 ip->ip_p = IPPROTO_PFSYNC; 1389 ip->ip_src.s_addr = INADDR_ANY; 1390 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1391 1392 sc->sc_lhcookie = 1393 hook_establish(sc->sc_sync_if->if_linkstatehooks, 1, 1394 pfsync_syncdev_state, sc); 1395 1396 pfsync_request_full_update(sc); 1397 splx(s); 1398 1399 break; 1400 1401 default: 1402 return (ENOTTY); 1403 } 1404 1405 return (0); 1406 } 1407 1408 void 1409 pfsync_out_state(struct pf_state *st, void *buf) 1410 { 1411 struct pfsync_state *sp = buf; 1412 1413 pfsync_state_export(sp, st); 1414 } 1415 1416 void 1417 pfsync_out_iack(struct pf_state *st, void *buf) 1418 { 1419 struct pfsync_ins_ack *iack = buf; 1420 1421 iack->id = st->id; 1422 iack->creatorid = st->creatorid; 1423 } 1424 1425 void 1426 pfsync_out_upd_c(struct pf_state *st, void *buf) 1427 { 1428 struct pfsync_upd_c *up = buf; 1429 1430 bzero(up, sizeof(*up)); 1431 up->id = st->id; 1432 pf_state_peer_hton(&st->src, &up->src); 1433 pf_state_peer_hton(&st->dst, &up->dst); 1434 up->creatorid = st->creatorid; 1435 up->timeout = st->timeout; 1436 } 1437 1438 void 1439 pfsync_out_del(struct pf_state *st, void *buf) 1440 { 1441 struct pfsync_del_c *dp = buf; 1442 1443 dp->id = st->id; 1444 dp->creatorid = st->creatorid; 1445 1446 SET(st->state_flags, PFSTATE_NOSYNC); 1447 } 1448 1449 void 1450 pfsync_drop(struct pfsync_softc *sc) 1451 { 1452 struct pf_state *st; 1453 struct pfsync_upd_req_item *ur; 1454 struct tdb *t; 1455 int q; 1456 1457 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1458 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1459 continue; 1460 1461 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1462 #ifdef PFSYNC_DEBUG 1463 KASSERT(st->sync_state == q); 1464 #endif 1465 st->sync_state = PFSYNC_S_NONE; 1466 } 1467 TAILQ_INIT(&sc->sc_qs[q]); 1468 } 1469 1470 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1471 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1472 pool_put(&sc->sc_pool, ur); 1473 } 1474 1475 sc->sc_plus = NULL; 1476 1477 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1478 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) 1479 CLR(t->tdb_flags, TDBF_PFSYNC); 1480 1481 TAILQ_INIT(&sc->sc_tdb_q); 1482 } 1483 1484 sc->sc_len = PFSYNC_MINPKT; 1485 } 1486 1487 void 1488 pfsync_sendout(void) 1489 { 1490 struct pfsync_softc *sc = pfsyncif; 1491 #if NBPFILTER > 0 1492 struct ifnet *ifp = &sc->sc_if; 1493 #endif 1494 struct mbuf *m; 1495 struct ip *ip; 1496 struct pfsync_header *ph; 1497 struct pfsync_subheader *subh; 1498 struct pf_state *st; 1499 struct pfsync_upd_req_item *ur; 1500 struct tdb *t; 1501 1502 int offset; 1503 int q, count = 0; 1504 1505 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1506 return; 1507 1508 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1509 #if NBPFILTER > 0 1510 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1511 #else 1512 sc->sc_sync_if == NULL) { 1513 #endif 1514 pfsync_drop(sc); 1515 return; 1516 } 1517 1518 MGETHDR(m, M_DONTWAIT, MT_DATA); 1519 if (m == NULL) { 1520 sc->sc_if.if_oerrors++; 1521 pfsyncstats.pfsyncs_onomem++; 1522 pfsync_drop(sc); 1523 return; 1524 } 1525 1526 if (max_linkhdr + sc->sc_len > MHLEN) { 1527 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1528 if (!ISSET(m->m_flags, M_EXT)) { 1529 m_free(m); 1530 sc->sc_if.if_oerrors++; 1531 pfsyncstats.pfsyncs_onomem++; 1532 pfsync_drop(sc); 1533 return; 1534 } 1535 } 1536 m->m_data += max_linkhdr; 1537 m->m_len = m->m_pkthdr.len = sc->sc_len; 1538 1539 /* build the ip header */ 1540 ip = mtod(m, struct ip *); 1541 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1542 offset = sizeof(*ip); 1543 1544 ip->ip_len = htons(m->m_pkthdr.len); 1545 ip->ip_id = htons(ip_randomid()); 1546 1547 /* build the pfsync header */ 1548 ph = (struct pfsync_header *)(m->m_data + offset); 1549 bzero(ph, sizeof(*ph)); 1550 offset += sizeof(*ph); 1551 1552 ph->version = PFSYNC_VERSION; 1553 ph->len = htons(sc->sc_len - sizeof(*ip)); 1554 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1555 1556 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1557 subh = (struct pfsync_subheader *)(m->m_data + offset); 1558 offset += sizeof(*subh); 1559 1560 count = 0; 1561 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1562 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1563 1564 bcopy(&ur->ur_msg, m->m_data + offset, 1565 sizeof(ur->ur_msg)); 1566 offset += sizeof(ur->ur_msg); 1567 1568 pool_put(&sc->sc_pool, ur); 1569 1570 count++; 1571 } 1572 1573 bzero(subh, sizeof(*subh)); 1574 subh->len = sizeof(ur->ur_msg) >> 2; 1575 subh->action = PFSYNC_ACT_UPD_REQ; 1576 subh->count = htons(count); 1577 } 1578 1579 /* has someone built a custom region for us to add? */ 1580 if (sc->sc_plus != NULL) { 1581 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1582 offset += sc->sc_pluslen; 1583 1584 sc->sc_plus = NULL; 1585 } 1586 1587 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1588 subh = (struct pfsync_subheader *)(m->m_data + offset); 1589 offset += sizeof(*subh); 1590 1591 count = 0; 1592 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { 1593 pfsync_out_tdb(t, m->m_data + offset); 1594 offset += sizeof(struct pfsync_tdb); 1595 CLR(t->tdb_flags, TDBF_PFSYNC); 1596 1597 count++; 1598 } 1599 TAILQ_INIT(&sc->sc_tdb_q); 1600 1601 bzero(subh, sizeof(*subh)); 1602 subh->action = PFSYNC_ACT_TDB; 1603 subh->len = sizeof(struct pfsync_tdb) >> 2; 1604 subh->count = htons(count); 1605 } 1606 1607 /* walk the queues */ 1608 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1609 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1610 continue; 1611 1612 subh = (struct pfsync_subheader *)(m->m_data + offset); 1613 offset += sizeof(*subh); 1614 1615 count = 0; 1616 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1617 #ifdef PFSYNC_DEBUG 1618 KASSERT(st->sync_state == q); 1619 #endif 1620 pfsync_qs[q].write(st, m->m_data + offset); 1621 offset += pfsync_qs[q].len; 1622 1623 st->sync_state = PFSYNC_S_NONE; 1624 count++; 1625 } 1626 TAILQ_INIT(&sc->sc_qs[q]); 1627 1628 bzero(subh, sizeof(*subh)); 1629 subh->action = pfsync_qs[q].action; 1630 subh->len = pfsync_qs[q].len >> 2; 1631 subh->count = htons(count); 1632 } 1633 1634 /* we're done, let's put it on the wire */ 1635 #if NBPFILTER > 0 1636 if (ifp->if_bpf) { 1637 m->m_data += sizeof(*ip); 1638 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1639 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1640 m->m_data -= sizeof(*ip); 1641 m->m_len = m->m_pkthdr.len = sc->sc_len; 1642 } 1643 1644 if (sc->sc_sync_if == NULL) { 1645 sc->sc_len = PFSYNC_MINPKT; 1646 m_freem(m); 1647 return; 1648 } 1649 #endif 1650 1651 /* start again */ 1652 sc->sc_len = PFSYNC_MINPKT; 1653 1654 sc->sc_if.if_opackets++; 1655 sc->sc_if.if_obytes += m->m_pkthdr.len; 1656 1657 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1658 1659 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0) 1660 pfsyncstats.pfsyncs_opackets++; 1661 else 1662 pfsyncstats.pfsyncs_oerrors++; 1663 } 1664 1665 void 1666 pfsync_insert_state(struct pf_state *st) 1667 { 1668 struct pfsync_softc *sc = pfsyncif; 1669 1670 splsoftassert(IPL_SOFTNET); 1671 1672 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1673 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1674 SET(st->state_flags, PFSTATE_NOSYNC); 1675 return; 1676 } 1677 1678 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1679 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1680 return; 1681 1682 #ifdef PFSYNC_DEBUG 1683 KASSERT(st->sync_state == PFSYNC_S_NONE); 1684 #endif 1685 1686 if (sc->sc_len == PFSYNC_MINPKT) 1687 timeout_add_sec(&sc->sc_tmo, 1); 1688 1689 pfsync_q_ins(st, PFSYNC_S_INS); 1690 1691 st->sync_updates = 0; 1692 } 1693 1694 int 1695 pfsync_defer(struct pf_state *st, struct mbuf *m) 1696 { 1697 struct pfsync_softc *sc = pfsyncif; 1698 struct pfsync_deferral *pd; 1699 1700 splsoftassert(IPL_SOFTNET); 1701 1702 if (!sc->sc_defer || 1703 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1704 m->m_flags & (M_BCAST|M_MCAST)) 1705 return (0); 1706 1707 if (sc->sc_deferred >= 128) { 1708 pd = TAILQ_FIRST(&sc->sc_deferrals); 1709 if (timeout_del(&pd->pd_tmo)) 1710 pfsync_undefer(pd, 0); 1711 } 1712 1713 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1714 if (pd == NULL) 1715 return (0); 1716 1717 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1718 SET(st->state_flags, PFSTATE_ACK); 1719 1720 pd->pd_st = st; 1721 pd->pd_m = m; 1722 1723 sc->sc_deferred++; 1724 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1725 1726 timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); 1727 timeout_add_msec(&pd->pd_tmo, 20); 1728 1729 schednetisr(NETISR_PFSYNC); 1730 1731 return (1); 1732 } 1733 1734 void 1735 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1736 { 1737 struct pfsync_softc *sc = pfsyncif; 1738 1739 splsoftassert(IPL_SOFTNET); 1740 1741 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1742 sc->sc_deferred--; 1743 1744 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1745 if (drop) 1746 m_freem(pd->pd_m); 1747 else { 1748 if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) { 1749 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1750 case AF_INET: 1751 pf_route(&pd->pd_m, pd->pd_st->rule.ptr, 1752 pd->pd_st->direction, 1753 pd->pd_st->rt_kif->pfik_ifp, pd->pd_st); 1754 break; 1755 #ifdef INET6 1756 case AF_INET6: 1757 pf_route6(&pd->pd_m, pd->pd_st->rule.ptr, 1758 pd->pd_st->direction, 1759 pd->pd_st->rt_kif->pfik_ifp, pd->pd_st); 1760 break; 1761 #endif /* INET6 */ 1762 } 1763 } else { 1764 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1765 case AF_INET: 1766 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 1767 0); 1768 break; 1769 #ifdef INET6 1770 case AF_INET6: 1771 ip6_output(pd->pd_m, NULL, NULL, 0, 1772 NULL, NULL); 1773 break; 1774 #endif /* INET6 */ 1775 } 1776 } 1777 } 1778 1779 pool_put(&sc->sc_pool, pd); 1780 } 1781 1782 void 1783 pfsync_defer_tmo(void *arg) 1784 { 1785 int s; 1786 1787 s = splsoftnet(); 1788 pfsync_undefer(arg, 0); 1789 splx(s); 1790 } 1791 1792 void 1793 pfsync_deferred(struct pf_state *st, int drop) 1794 { 1795 struct pfsync_softc *sc = pfsyncif; 1796 struct pfsync_deferral *pd; 1797 1798 splsoftassert(IPL_SOFTNET); 1799 1800 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1801 if (pd->pd_st == st) { 1802 if (timeout_del(&pd->pd_tmo)) 1803 pfsync_undefer(pd, drop); 1804 return; 1805 } 1806 } 1807 1808 panic("pfsync_deferred: unable to find deferred state"); 1809 } 1810 1811 void 1812 pfsync_update_state(struct pf_state *st) 1813 { 1814 struct pfsync_softc *sc = pfsyncif; 1815 int sync = 0; 1816 1817 splsoftassert(IPL_SOFTNET); 1818 1819 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1820 return; 1821 1822 if (ISSET(st->state_flags, PFSTATE_ACK)) 1823 pfsync_deferred(st, 0); 1824 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1825 if (st->sync_state != PFSYNC_S_NONE) 1826 pfsync_q_del(st); 1827 return; 1828 } 1829 1830 if (sc->sc_len == PFSYNC_MINPKT) 1831 timeout_add_sec(&sc->sc_tmo, 1); 1832 1833 switch (st->sync_state) { 1834 case PFSYNC_S_UPD_C: 1835 case PFSYNC_S_UPD: 1836 case PFSYNC_S_INS: 1837 /* we're already handling it */ 1838 1839 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1840 st->sync_updates++; 1841 if (st->sync_updates >= sc->sc_maxupdates) 1842 sync = 1; 1843 } 1844 break; 1845 1846 case PFSYNC_S_IACK: 1847 pfsync_q_del(st); 1848 case PFSYNC_S_NONE: 1849 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1850 st->sync_updates = 0; 1851 break; 1852 1853 default: 1854 panic("pfsync_update_state: unexpected sync state %d", 1855 st->sync_state); 1856 } 1857 1858 if (sync || (time_uptime - st->pfsync_time) < 2) 1859 schednetisr(NETISR_PFSYNC); 1860 } 1861 1862 void 1863 pfsync_cancel_full_update(struct pfsync_softc *sc) 1864 { 1865 if (timeout_pending(&sc->sc_bulkfail_tmo) || 1866 timeout_pending(&sc->sc_bulk_tmo)) { 1867 #if NCARP > 0 1868 if (!pfsync_sync_ok) 1869 carp_group_demote_adj(&sc->sc_if, -1, 1870 "pfsync bulk cancelled"); 1871 if (sc->sc_initial_bulk) { 1872 carp_group_demote_adj(&sc->sc_if, -32, 1873 "pfsync init"); 1874 sc->sc_initial_bulk = 0; 1875 } 1876 #endif 1877 pfsync_sync_ok = 1; 1878 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 1879 } 1880 timeout_del(&sc->sc_bulkfail_tmo); 1881 timeout_del(&sc->sc_bulk_tmo); 1882 sc->sc_bulk_next = NULL; 1883 sc->sc_bulk_last = NULL; 1884 sc->sc_ureq_sent = 0; 1885 sc->sc_bulk_tries = 0; 1886 } 1887 1888 void 1889 pfsync_request_full_update(struct pfsync_softc *sc) 1890 { 1891 if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 1892 /* Request a full state table update. */ 1893 sc->sc_ureq_sent = time_uptime; 1894 #if NCARP > 0 1895 if (!sc->sc_link_demoted && pfsync_sync_ok) 1896 carp_group_demote_adj(&sc->sc_if, 1, 1897 "pfsync bulk start"); 1898 #endif 1899 pfsync_sync_ok = 0; 1900 DPFPRINTF(LOG_INFO, "requesting bulk update"); 1901 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1902 pf_pool_limits[PF_LIMIT_STATES].limit / 1903 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1904 sizeof(struct pfsync_state))); 1905 pfsync_request_update(0, 0); 1906 } 1907 } 1908 1909 void 1910 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1911 { 1912 struct pfsync_softc *sc = pfsyncif; 1913 struct pfsync_upd_req_item *item; 1914 size_t nlen = sizeof(struct pfsync_upd_req); 1915 1916 /* 1917 * this code does nothing to prevent multiple update requests for the 1918 * same state being generated. 1919 */ 1920 1921 item = pool_get(&sc->sc_pool, PR_NOWAIT); 1922 if (item == NULL) { 1923 /* XXX stats */ 1924 return; 1925 } 1926 1927 item->ur_msg.id = id; 1928 item->ur_msg.creatorid = creatorid; 1929 1930 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1931 nlen += sizeof(struct pfsync_subheader); 1932 1933 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 1934 pfsync_sendout(); 1935 1936 nlen = sizeof(struct pfsync_subheader) + 1937 sizeof(struct pfsync_upd_req); 1938 } 1939 1940 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1941 sc->sc_len += nlen; 1942 1943 schednetisr(NETISR_PFSYNC); 1944 } 1945 1946 void 1947 pfsync_update_state_req(struct pf_state *st) 1948 { 1949 struct pfsync_softc *sc = pfsyncif; 1950 1951 if (sc == NULL) 1952 panic("pfsync_update_state_req: nonexistant instance"); 1953 1954 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1955 if (st->sync_state != PFSYNC_S_NONE) 1956 pfsync_q_del(st); 1957 return; 1958 } 1959 1960 switch (st->sync_state) { 1961 case PFSYNC_S_UPD_C: 1962 case PFSYNC_S_IACK: 1963 pfsync_q_del(st); 1964 case PFSYNC_S_NONE: 1965 pfsync_q_ins(st, PFSYNC_S_UPD); 1966 schednetisr(NETISR_PFSYNC); 1967 return; 1968 1969 case PFSYNC_S_INS: 1970 case PFSYNC_S_UPD: 1971 case PFSYNC_S_DEL: 1972 /* we're already handling it */ 1973 return; 1974 1975 default: 1976 panic("pfsync_update_state_req: unexpected sync state %d", 1977 st->sync_state); 1978 } 1979 } 1980 1981 void 1982 pfsync_delete_state(struct pf_state *st) 1983 { 1984 struct pfsync_softc *sc = pfsyncif; 1985 1986 splsoftassert(IPL_SOFTNET); 1987 1988 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1989 return; 1990 1991 if (ISSET(st->state_flags, PFSTATE_ACK)) 1992 pfsync_deferred(st, 1); 1993 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1994 if (st->sync_state != PFSYNC_S_NONE) 1995 pfsync_q_del(st); 1996 return; 1997 } 1998 1999 if (sc->sc_len == PFSYNC_MINPKT) 2000 timeout_add_sec(&sc->sc_tmo, 1); 2001 2002 switch (st->sync_state) { 2003 case PFSYNC_S_INS: 2004 /* we never got to tell the world so just forget about it */ 2005 pfsync_q_del(st); 2006 return; 2007 2008 case PFSYNC_S_UPD_C: 2009 case PFSYNC_S_UPD: 2010 case PFSYNC_S_IACK: 2011 pfsync_q_del(st); 2012 /* FALLTHROUGH to putting it on the del list */ 2013 2014 case PFSYNC_S_NONE: 2015 pfsync_q_ins(st, PFSYNC_S_DEL); 2016 return; 2017 2018 default: 2019 panic("pfsync_delete_state: unexpected sync state %d", 2020 st->sync_state); 2021 } 2022 } 2023 2024 void 2025 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2026 { 2027 struct pfsync_softc *sc = pfsyncif; 2028 struct { 2029 struct pfsync_subheader subh; 2030 struct pfsync_clr clr; 2031 } __packed r; 2032 2033 splsoftassert(IPL_SOFTNET); 2034 2035 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2036 return; 2037 2038 bzero(&r, sizeof(r)); 2039 2040 r.subh.action = PFSYNC_ACT_CLR; 2041 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2042 r.subh.count = htons(1); 2043 2044 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2045 r.clr.creatorid = creatorid; 2046 2047 pfsync_send_plus(&r, sizeof(r)); 2048 } 2049 2050 void 2051 pfsync_q_ins(struct pf_state *st, int q) 2052 { 2053 struct pfsync_softc *sc = pfsyncif; 2054 size_t nlen = pfsync_qs[q].len; 2055 2056 KASSERT(st->sync_state == PFSYNC_S_NONE); 2057 2058 #if defined(PFSYNC_DEBUG) 2059 if (sc->sc_len < PFSYNC_MINPKT) 2060 panic("pfsync pkt len is too low %d", sc->sc_len); 2061 #endif 2062 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2063 nlen += sizeof(struct pfsync_subheader); 2064 2065 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2066 pfsync_sendout(); 2067 2068 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2069 } 2070 2071 sc->sc_len += nlen; 2072 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2073 st->sync_state = q; 2074 } 2075 2076 void 2077 pfsync_q_del(struct pf_state *st) 2078 { 2079 struct pfsync_softc *sc = pfsyncif; 2080 int q = st->sync_state; 2081 2082 KASSERT(st->sync_state != PFSYNC_S_NONE); 2083 2084 sc->sc_len -= pfsync_qs[q].len; 2085 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2086 st->sync_state = PFSYNC_S_NONE; 2087 2088 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2089 sc->sc_len -= sizeof(struct pfsync_subheader); 2090 } 2091 2092 void 2093 pfsync_update_tdb(struct tdb *t, int output) 2094 { 2095 struct pfsync_softc *sc = pfsyncif; 2096 size_t nlen = sizeof(struct pfsync_tdb); 2097 2098 if (sc == NULL) 2099 return; 2100 2101 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2102 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2103 nlen += sizeof(struct pfsync_subheader); 2104 2105 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2106 pfsync_sendout(); 2107 2108 nlen = sizeof(struct pfsync_subheader) + 2109 sizeof(struct pfsync_tdb); 2110 } 2111 2112 sc->sc_len += nlen; 2113 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2114 SET(t->tdb_flags, TDBF_PFSYNC); 2115 t->tdb_updates = 0; 2116 } else { 2117 if (++t->tdb_updates >= sc->sc_maxupdates) 2118 schednetisr(NETISR_PFSYNC); 2119 } 2120 2121 if (output) 2122 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2123 else 2124 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2125 } 2126 2127 void 2128 pfsync_delete_tdb(struct tdb *t) 2129 { 2130 struct pfsync_softc *sc = pfsyncif; 2131 2132 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2133 return; 2134 2135 sc->sc_len -= sizeof(struct pfsync_tdb); 2136 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2137 CLR(t->tdb_flags, TDBF_PFSYNC); 2138 2139 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2140 sc->sc_len -= sizeof(struct pfsync_subheader); 2141 } 2142 2143 void 2144 pfsync_out_tdb(struct tdb *t, void *buf) 2145 { 2146 struct pfsync_tdb *ut = buf; 2147 2148 bzero(ut, sizeof(*ut)); 2149 ut->spi = t->tdb_spi; 2150 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2151 /* 2152 * When a failover happens, the master's rpl is probably above 2153 * what we see here (we may be up to a second late), so 2154 * increase it a bit for outbound tdbs to manage most such 2155 * situations. 2156 * 2157 * For now, just add an offset that is likely to be larger 2158 * than the number of packets we can see in one second. The RFC 2159 * just says the next packet must have a higher seq value. 2160 * 2161 * XXX What is a good algorithm for this? We could use 2162 * a rate-determined increase, but to know it, we would have 2163 * to extend struct tdb. 2164 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2165 * will soon be replaced anyway. For now, just don't handle 2166 * this edge case. 2167 */ 2168 #define RPL_INCR 16384 2169 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2170 RPL_INCR : 0)); 2171 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2172 ut->sproto = t->tdb_sproto; 2173 ut->rdomain = htons(t->tdb_rdomain); 2174 } 2175 2176 void 2177 pfsync_bulk_start(void) 2178 { 2179 struct pfsync_softc *sc = pfsyncif; 2180 2181 DPFPRINTF(LOG_INFO, "received bulk update request"); 2182 2183 if (TAILQ_EMPTY(&state_list)) 2184 pfsync_bulk_status(PFSYNC_BUS_END); 2185 else { 2186 sc->sc_ureq_received = time_uptime; 2187 2188 if (sc->sc_bulk_next == NULL) 2189 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2190 sc->sc_bulk_last = sc->sc_bulk_next; 2191 2192 pfsync_bulk_status(PFSYNC_BUS_START); 2193 timeout_add(&sc->sc_bulk_tmo, 0); 2194 } 2195 } 2196 2197 void 2198 pfsync_bulk_update(void *arg) 2199 { 2200 struct pfsync_softc *sc = arg; 2201 struct pf_state *st; 2202 int i = 0; 2203 int s; 2204 2205 s = splsoftnet(); 2206 2207 st = sc->sc_bulk_next; 2208 2209 for (;;) { 2210 if (st->sync_state == PFSYNC_S_NONE && 2211 st->timeout < PFTM_MAX && 2212 st->pfsync_time <= sc->sc_ureq_received) { 2213 pfsync_update_state_req(st); 2214 i++; 2215 } 2216 2217 st = TAILQ_NEXT(st, entry_list); 2218 if (st == NULL) 2219 st = TAILQ_FIRST(&state_list); 2220 2221 if (st == sc->sc_bulk_last) { 2222 /* we're done */ 2223 sc->sc_bulk_next = NULL; 2224 sc->sc_bulk_last = NULL; 2225 pfsync_bulk_status(PFSYNC_BUS_END); 2226 break; 2227 } 2228 2229 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2230 sizeof(struct pfsync_state)) { 2231 /* we've filled a packet */ 2232 sc->sc_bulk_next = st; 2233 timeout_add(&sc->sc_bulk_tmo, 1); 2234 break; 2235 } 2236 } 2237 2238 splx(s); 2239 } 2240 2241 void 2242 pfsync_bulk_status(u_int8_t status) 2243 { 2244 struct { 2245 struct pfsync_subheader subh; 2246 struct pfsync_bus bus; 2247 } __packed r; 2248 2249 struct pfsync_softc *sc = pfsyncif; 2250 2251 bzero(&r, sizeof(r)); 2252 2253 r.subh.action = PFSYNC_ACT_BUS; 2254 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2255 r.subh.count = htons(1); 2256 2257 r.bus.creatorid = pf_status.hostid; 2258 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2259 r.bus.status = status; 2260 2261 pfsync_send_plus(&r, sizeof(r)); 2262 } 2263 2264 void 2265 pfsync_bulk_fail(void *arg) 2266 { 2267 struct pfsync_softc *sc = arg; 2268 int s; 2269 2270 s = splsoftnet(); 2271 2272 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2273 /* Try again */ 2274 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2275 pfsync_request_update(0, 0); 2276 } else { 2277 /* Pretend like the transfer was ok */ 2278 sc->sc_ureq_sent = 0; 2279 sc->sc_bulk_tries = 0; 2280 #if NCARP > 0 2281 if (!pfsync_sync_ok) 2282 carp_group_demote_adj(&sc->sc_if, -1, 2283 sc->sc_link_demoted ? 2284 "pfsync link state up" : 2285 "pfsync bulk fail"); 2286 if (sc->sc_initial_bulk) { 2287 carp_group_demote_adj(&sc->sc_if, -32, 2288 "pfsync init"); 2289 sc->sc_initial_bulk = 0; 2290 } 2291 #endif 2292 pfsync_sync_ok = 1; 2293 sc->sc_link_demoted = 0; 2294 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2295 } 2296 2297 splx(s); 2298 } 2299 2300 void 2301 pfsync_send_plus(void *plus, size_t pluslen) 2302 { 2303 struct pfsync_softc *sc = pfsyncif; 2304 2305 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2306 pfsync_sendout(); 2307 2308 sc->sc_plus = plus; 2309 sc->sc_len += (sc->sc_pluslen = pluslen); 2310 2311 pfsync_sendout(); 2312 } 2313 2314 int 2315 pfsync_up(void) 2316 { 2317 struct pfsync_softc *sc = pfsyncif; 2318 2319 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2320 return (0); 2321 2322 return (1); 2323 } 2324 2325 int 2326 pfsync_state_in_use(struct pf_state *st) 2327 { 2328 struct pfsync_softc *sc = pfsyncif; 2329 2330 if (sc == NULL) 2331 return (0); 2332 2333 if (st->sync_state != PFSYNC_S_NONE || 2334 st == sc->sc_bulk_next || 2335 st == sc->sc_bulk_last) 2336 return (1); 2337 2338 return (0); 2339 } 2340 2341 void 2342 pfsync_timeout(void *arg) 2343 { 2344 int s; 2345 2346 s = splsoftnet(); 2347 pfsync_sendout(); 2348 splx(s); 2349 } 2350 2351 /* this is a softnet/netisr handler */ 2352 void 2353 pfsyncintr(void) 2354 { 2355 pfsync_sendout(); 2356 } 2357 2358 int 2359 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2360 size_t newlen) 2361 { 2362 /* All sysctl names at this level are terminal. */ 2363 if (namelen != 1) 2364 return (ENOTDIR); 2365 2366 switch (name[0]) { 2367 case PFSYNCCTL_STATS: 2368 if (newp != NULL) 2369 return (EPERM); 2370 return (sysctl_struct(oldp, oldlenp, newp, newlen, 2371 &pfsyncstats, sizeof(pfsyncstats))); 2372 default: 2373 return (ENOPROTOOPT); 2374 } 2375 } 2376