1 /* $OpenBSD: if_pfsync.c,v 1.254 2017/08/11 21:24:19 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_ipsp.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp6.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/udp.h> 75 76 #ifdef INET6 77 #include <netinet6/in6_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #endif /* INET6 */ 82 83 #include "carp.h" 84 #if NCARP > 0 85 #include <netinet/ip_carp.h> 86 #endif 87 88 #define PF_DEBUGNAME "pfsync: " 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_MINPKT ( \ 97 sizeof(struct ip) + \ 98 sizeof(struct pfsync_header)) 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(caddr_t, int, int, int); 104 int pfsync_in_iack(caddr_t, int, int, int); 105 int pfsync_in_upd_c(caddr_t, int, int, int); 106 int pfsync_in_ureq(caddr_t, int, int, int); 107 int pfsync_in_del(caddr_t, int, int, int); 108 int pfsync_in_del_c(caddr_t, int, int, int); 109 int pfsync_in_bus(caddr_t, int, int, int); 110 int pfsync_in_tdb(caddr_t, int, int, int); 111 int pfsync_in_ins(caddr_t, int, int, int); 112 int pfsync_in_upd(caddr_t, int, int, int); 113 int pfsync_in_eof(caddr_t, int, int, int); 114 115 int pfsync_in_error(caddr_t, int, int, int); 116 117 struct { 118 int (*in)(caddr_t, int, int, int); 119 size_t len; 120 } pfsync_acts[] = { 121 /* PFSYNC_ACT_CLR */ 122 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 123 /* PFSYNC_ACT_OINS */ 124 { pfsync_in_error, 0 }, 125 /* PFSYNC_ACT_INS_ACK */ 126 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 127 /* PFSYNC_ACT_OUPD */ 128 { pfsync_in_error, 0 }, 129 /* PFSYNC_ACT_UPD_C */ 130 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 131 /* PFSYNC_ACT_UPD_REQ */ 132 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 133 /* PFSYNC_ACT_DEL */ 134 { pfsync_in_del, sizeof(struct pfsync_state) }, 135 /* PFSYNC_ACT_DEL_C */ 136 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 137 /* PFSYNC_ACT_INS_F */ 138 { pfsync_in_error, 0 }, 139 /* PFSYNC_ACT_DEL_F */ 140 { pfsync_in_error, 0 }, 141 /* PFSYNC_ACT_BUS */ 142 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 143 /* PFSYNC_ACT_OTDB */ 144 { pfsync_in_error, 0 }, 145 /* PFSYNC_ACT_EOF */ 146 { pfsync_in_error, 0 }, 147 /* PFSYNC_ACT_INS */ 148 { pfsync_in_ins, sizeof(struct pfsync_state) }, 149 /* PFSYNC_ACT_UPD */ 150 { pfsync_in_upd, sizeof(struct pfsync_state) }, 151 /* PFSYNC_ACT_TDB */ 152 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 153 }; 154 155 struct pfsync_q { 156 void (*write)(struct pf_state *, void *); 157 size_t len; 158 u_int8_t action; 159 }; 160 161 /* we have one of these for every PFSYNC_S_ */ 162 void pfsync_out_state(struct pf_state *, void *); 163 void pfsync_out_iack(struct pf_state *, void *); 164 void pfsync_out_upd_c(struct pf_state *, void *); 165 void pfsync_out_del(struct pf_state *, void *); 166 167 struct pfsync_q pfsync_qs[] = { 168 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 169 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 170 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 171 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 172 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 173 }; 174 175 void pfsync_q_ins(struct pf_state *, int); 176 void pfsync_q_del(struct pf_state *); 177 178 struct pfsync_upd_req_item { 179 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 180 struct pfsync_upd_req ur_msg; 181 }; 182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 183 184 struct pfsync_deferral { 185 TAILQ_ENTRY(pfsync_deferral) pd_entry; 186 struct pf_state *pd_st; 187 struct mbuf *pd_m; 188 struct timeout pd_tmo; 189 }; 190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 191 192 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 193 sizeof(struct pfsync_deferral)) 194 195 void pfsync_out_tdb(struct tdb *, void *); 196 197 struct pfsync_softc { 198 struct ifnet sc_if; 199 struct ifnet *sc_sync_if; 200 201 struct pool sc_pool; 202 203 struct ip_moptions sc_imo; 204 205 struct in_addr sc_sync_peer; 206 u_int8_t sc_maxupdates; 207 208 struct ip sc_template; 209 210 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 211 size_t sc_len; 212 213 struct pfsync_upd_reqs sc_upd_req_list; 214 215 int sc_initial_bulk; 216 int sc_link_demoted; 217 218 int sc_defer; 219 struct pfsync_deferrals sc_deferrals; 220 u_int sc_deferred; 221 222 void *sc_plus; 223 size_t sc_pluslen; 224 225 u_int32_t sc_ureq_sent; 226 int sc_bulk_tries; 227 struct timeout sc_bulkfail_tmo; 228 229 u_int32_t sc_ureq_received; 230 struct pf_state *sc_bulk_next; 231 struct pf_state *sc_bulk_last; 232 struct timeout sc_bulk_tmo; 233 234 TAILQ_HEAD(, tdb) sc_tdb_q; 235 236 void *sc_lhcookie; 237 void *sc_dhcookie; 238 239 struct timeout sc_tmo; 240 }; 241 242 struct pfsync_softc *pfsyncif = NULL; 243 struct cpumem *pfsynccounters; 244 245 void pfsyncattach(int); 246 int pfsync_clone_create(struct if_clone *, int); 247 int pfsync_clone_destroy(struct ifnet *); 248 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 249 struct pf_state_peer *); 250 void pfsync_update_net_tdb(struct pfsync_tdb *); 251 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 252 struct rtentry *); 253 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 254 void pfsyncstart(struct ifnet *); 255 void pfsync_syncdev_state(void *); 256 void pfsync_ifdetach(void *); 257 258 void pfsync_deferred(struct pf_state *, int); 259 void pfsync_undefer(struct pfsync_deferral *, int); 260 void pfsync_defer_tmo(void *); 261 262 void pfsync_cancel_full_update(struct pfsync_softc *); 263 void pfsync_request_full_update(struct pfsync_softc *); 264 void pfsync_request_update(u_int32_t, u_int64_t); 265 void pfsync_update_state_req(struct pf_state *); 266 267 void pfsync_drop(struct pfsync_softc *); 268 void pfsync_sendout(void); 269 void pfsync_send_plus(void *, size_t); 270 void pfsync_timeout(void *); 271 void pfsync_tdb_timeout(void *); 272 273 void pfsync_bulk_start(void); 274 void pfsync_bulk_status(u_int8_t); 275 void pfsync_bulk_update(void *); 276 void pfsync_bulk_fail(void *); 277 278 #define PFSYNC_MAX_BULKTRIES 12 279 int pfsync_sync_ok; 280 281 struct if_clone pfsync_cloner = 282 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 283 284 void 285 pfsyncattach(int npfsync) 286 { 287 if_clone_attach(&pfsync_cloner); 288 pfsynccounters = counters_alloc(pfsyncs_ncounters); 289 } 290 291 int 292 pfsync_clone_create(struct if_clone *ifc, int unit) 293 { 294 struct pfsync_softc *sc; 295 struct ifnet *ifp; 296 int q; 297 298 if (unit != 0) 299 return (EINVAL); 300 301 pfsync_sync_ok = 1; 302 303 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO); 304 305 for (q = 0; q < PFSYNC_S_COUNT; q++) 306 TAILQ_INIT(&sc->sc_qs[q]); 307 308 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync", 309 NULL); 310 TAILQ_INIT(&sc->sc_upd_req_list); 311 TAILQ_INIT(&sc->sc_deferrals); 312 sc->sc_deferred = 0; 313 314 TAILQ_INIT(&sc->sc_tdb_q); 315 316 sc->sc_len = PFSYNC_MINPKT; 317 sc->sc_maxupdates = 128; 318 319 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 320 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 321 M_WAITOK | M_ZERO); 322 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 323 324 ifp = &sc->sc_if; 325 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 326 ifp->if_softc = sc; 327 ifp->if_ioctl = pfsyncioctl; 328 ifp->if_output = pfsyncoutput; 329 ifp->if_start = pfsyncstart; 330 ifp->if_type = IFT_PFSYNC; 331 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 332 ifp->if_hdrlen = sizeof(struct pfsync_header); 333 ifp->if_mtu = ETHERMTU; 334 ifp->if_xflags = IFXF_CLONED; 335 timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc); 336 timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); 337 timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); 338 339 if_attach(ifp); 340 if_alloc_sadl(ifp); 341 342 #if NCARP > 0 343 if_addgroup(ifp, "carp"); 344 #endif 345 346 #if NBPFILTER > 0 347 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 348 #endif 349 350 pfsyncif = sc; 351 352 return (0); 353 } 354 355 int 356 pfsync_clone_destroy(struct ifnet *ifp) 357 { 358 struct pfsync_softc *sc = ifp->if_softc; 359 struct pfsync_deferral *pd; 360 361 timeout_del(&sc->sc_bulkfail_tmo); 362 timeout_del(&sc->sc_bulk_tmo); 363 timeout_del(&sc->sc_tmo); 364 #if NCARP > 0 365 if (!pfsync_sync_ok) 366 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 367 if (sc->sc_link_demoted) 368 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 369 #endif 370 if (sc->sc_sync_if) { 371 hook_disestablish( 372 sc->sc_sync_if->if_linkstatehooks, 373 sc->sc_lhcookie); 374 hook_disestablish(sc->sc_sync_if->if_detachhooks, 375 sc->sc_dhcookie); 376 } 377 if_detach(ifp); 378 379 pfsync_drop(sc); 380 381 while (sc->sc_deferred > 0) { 382 pd = TAILQ_FIRST(&sc->sc_deferrals); 383 timeout_del(&pd->pd_tmo); 384 pfsync_undefer(pd, 0); 385 } 386 387 pool_destroy(&sc->sc_pool); 388 free(sc->sc_imo.imo_membership, M_IPMOPTS, 0); 389 free(sc, M_DEVBUF, sizeof(*sc)); 390 391 pfsyncif = NULL; 392 393 return (0); 394 } 395 396 /* 397 * Start output on the pfsync interface. 398 */ 399 void 400 pfsyncstart(struct ifnet *ifp) 401 { 402 IFQ_PURGE(&ifp->if_snd); 403 } 404 405 void 406 pfsync_syncdev_state(void *arg) 407 { 408 struct pfsync_softc *sc = arg; 409 410 if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP)) 411 return; 412 413 if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) { 414 sc->sc_if.if_flags &= ~IFF_RUNNING; 415 if (!sc->sc_link_demoted) { 416 #if NCARP > 0 417 carp_group_demote_adj(&sc->sc_if, 1, 418 "pfsync link state down"); 419 #endif 420 sc->sc_link_demoted = 1; 421 } 422 423 /* drop everything */ 424 timeout_del(&sc->sc_tmo); 425 pfsync_drop(sc); 426 427 pfsync_cancel_full_update(sc); 428 } else if (sc->sc_link_demoted) { 429 sc->sc_if.if_flags |= IFF_RUNNING; 430 431 pfsync_request_full_update(sc); 432 } 433 } 434 435 void 436 pfsync_ifdetach(void *arg) 437 { 438 struct pfsync_softc *sc = arg; 439 440 sc->sc_sync_if = NULL; 441 } 442 443 int 444 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 445 struct pf_state_peer *d) 446 { 447 if (s->scrub.scrub_flag && d->scrub == NULL) { 448 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 449 if (d->scrub == NULL) 450 return (ENOMEM); 451 } 452 453 return (0); 454 } 455 456 void 457 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 458 { 459 pf_state_export(sp, st); 460 } 461 462 int 463 pfsync_state_import(struct pfsync_state *sp, int flags) 464 { 465 struct pf_state *st = NULL; 466 struct pf_state_key *skw = NULL, *sks = NULL; 467 struct pf_rule *r = NULL; 468 struct pfi_kif *kif; 469 int pool_flags; 470 int error; 471 472 if (sp->creatorid == 0) { 473 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 474 "invalid creator id: %08x", ntohl(sp->creatorid)); 475 return (EINVAL); 476 } 477 478 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 479 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 480 "unknown interface: %s", sp->ifname); 481 if (flags & PFSYNC_SI_IOCTL) 482 return (EINVAL); 483 return (0); /* skip this state */ 484 } 485 486 if (sp->af == 0) 487 return (0); /* skip this state */ 488 489 /* 490 * If the ruleset checksums match or the state is coming from the ioctl, 491 * it's safe to associate the state with the rule of that number. 492 */ 493 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 494 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 495 pf_main_ruleset.rules.active.rcount) 496 r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)]; 497 else 498 r = &pf_default_rule; 499 500 if ((r->max_states && r->states_cur >= r->max_states)) 501 goto cleanup; 502 503 if (flags & PFSYNC_SI_IOCTL) 504 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 505 else 506 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 507 508 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 509 goto cleanup; 510 511 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 512 goto cleanup; 513 514 if ((sp->key[PF_SK_WIRE].af && 515 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 516 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 517 &sp->key[PF_SK_STACK].addr[0], sp->af) || 518 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 519 &sp->key[PF_SK_STACK].addr[1], sp->af) || 520 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 521 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 522 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 523 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 524 goto cleanup; 525 } else 526 sks = skw; 527 528 /* allocate memory for scrub info */ 529 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 530 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 531 goto cleanup; 532 533 /* copy to state key(s) */ 534 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 535 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 536 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 537 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 538 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 539 PF_REF_INIT(skw->refcnt); 540 skw->proto = sp->proto; 541 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 542 skw->af = sp->af; 543 if (sks != skw) { 544 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 545 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 546 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 547 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 548 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 549 PF_REF_INIT(sks->refcnt); 550 if (!(sks->af = sp->key[PF_SK_STACK].af)) 551 sks->af = sp->af; 552 if (sks->af != skw->af) { 553 switch (sp->proto) { 554 case IPPROTO_ICMP: 555 sks->proto = IPPROTO_ICMPV6; 556 break; 557 case IPPROTO_ICMPV6: 558 sks->proto = IPPROTO_ICMP; 559 break; 560 default: 561 sks->proto = sp->proto; 562 } 563 } else 564 sks->proto = sp->proto; 565 } 566 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 567 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 568 569 /* copy to state */ 570 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 571 st->creation = time_uptime - ntohl(sp->creation); 572 st->expire = time_uptime; 573 if (ntohl(sp->expire)) { 574 u_int32_t timeout; 575 576 timeout = r->timeout[sp->timeout]; 577 if (!timeout) 578 timeout = pf_default_rule.timeout[sp->timeout]; 579 580 /* sp->expire may have been adaptively scaled by export. */ 581 st->expire -= timeout - ntohl(sp->expire); 582 } 583 584 st->direction = sp->direction; 585 st->log = sp->log; 586 st->timeout = sp->timeout; 587 st->state_flags = ntohs(sp->state_flags); 588 st->max_mss = ntohs(sp->max_mss); 589 st->min_ttl = sp->min_ttl; 590 st->set_tos = sp->set_tos; 591 st->set_prio[0] = sp->set_prio[0]; 592 st->set_prio[1] = sp->set_prio[1]; 593 594 st->id = sp->id; 595 st->creatorid = sp->creatorid; 596 pf_state_peer_ntoh(&sp->src, &st->src); 597 pf_state_peer_ntoh(&sp->dst, &st->dst); 598 599 st->rule.ptr = r; 600 st->anchor.ptr = NULL; 601 st->rt_kif = NULL; 602 603 st->pfsync_time = time_uptime; 604 st->sync_state = PFSYNC_S_NONE; 605 606 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 607 r->states_cur++; 608 r->states_tot++; 609 610 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 611 SET(st->state_flags, PFSTATE_NOSYNC); 612 613 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 614 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 615 r->states_cur--; 616 error = EEXIST; 617 goto cleanup_state; 618 } 619 620 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 621 CLR(st->state_flags, PFSTATE_NOSYNC); 622 if (ISSET(st->state_flags, PFSTATE_ACK)) { 623 pfsync_q_ins(st, PFSYNC_S_IACK); 624 schednetisr(NETISR_PFSYNC); 625 } 626 } 627 CLR(st->state_flags, PFSTATE_ACK); 628 629 return (0); 630 631 cleanup: 632 error = ENOMEM; 633 if (skw == sks) 634 sks = NULL; 635 if (skw != NULL) 636 pool_put(&pf_state_key_pl, skw); 637 if (sks != NULL) 638 pool_put(&pf_state_key_pl, sks); 639 640 cleanup_state: /* pf_state_insert frees the state keys */ 641 if (st) { 642 if (st->dst.scrub) 643 pool_put(&pf_state_scrub_pl, st->dst.scrub); 644 if (st->src.scrub) 645 pool_put(&pf_state_scrub_pl, st->src.scrub); 646 pool_put(&pf_state_pl, st); 647 } 648 return (error); 649 } 650 651 int 652 pfsync_input(struct mbuf **mp, int *offp, int proto, int af) 653 { 654 struct mbuf *n, *m = *mp; 655 struct pfsync_softc *sc = pfsyncif; 656 struct ip *ip = mtod(m, struct ip *); 657 struct pfsync_header *ph; 658 struct pfsync_subheader subh; 659 int offset, noff, len, count, mlen, flags = 0; 660 int e; 661 662 pfsyncstat_inc(pfsyncs_ipackets); 663 664 /* verify that we have a sync interface configured */ 665 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 666 sc->sc_sync_if == NULL || !pf_status.running) 667 goto done; 668 669 /* verify that the packet came in on the right interface */ 670 if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) { 671 pfsyncstat_inc(pfsyncs_badif); 672 goto done; 673 } 674 675 sc->sc_if.if_ipackets++; 676 sc->sc_if.if_ibytes += m->m_pkthdr.len; 677 678 /* verify that the IP TTL is 255. */ 679 if (ip->ip_ttl != PFSYNC_DFLTTL) { 680 pfsyncstat_inc(pfsyncs_badttl); 681 goto done; 682 } 683 684 offset = ip->ip_hl << 2; 685 n = m_pulldown(m, offset, sizeof(*ph), &noff); 686 if (n == NULL) { 687 pfsyncstat_inc(pfsyncs_hdrops); 688 return IPPROTO_DONE; 689 } 690 ph = (struct pfsync_header *)(n->m_data + noff); 691 692 /* verify the version */ 693 if (ph->version != PFSYNC_VERSION) { 694 pfsyncstat_inc(pfsyncs_badver); 695 goto done; 696 } 697 len = ntohs(ph->len) + offset; 698 if (m->m_pkthdr.len < len) { 699 pfsyncstat_inc(pfsyncs_badlen); 700 goto done; 701 } 702 703 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 704 flags = PFSYNC_SI_CKSUM; 705 706 offset += sizeof(*ph); 707 while (offset <= len - sizeof(subh)) { 708 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 709 offset += sizeof(subh); 710 711 mlen = subh.len << 2; 712 count = ntohs(subh.count); 713 714 if (subh.action >= PFSYNC_ACT_MAX || 715 subh.action >= nitems(pfsync_acts) || 716 mlen < pfsync_acts[subh.action].len) { 717 /* 718 * subheaders are always followed by at least one 719 * message, so if the peer is new 720 * enough to tell us how big its messages are then we 721 * know enough to skip them. 722 */ 723 if (count > 0 && mlen > 0) { 724 offset += count * mlen; 725 continue; 726 } 727 pfsyncstat_inc(pfsyncs_badact); 728 goto done; 729 } 730 731 n = m_pulldown(m, offset, mlen * count, &noff); 732 if (n == NULL) { 733 pfsyncstat_inc(pfsyncs_badlen); 734 return IPPROTO_DONE; 735 } 736 737 PF_LOCK(); 738 e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, 739 flags); 740 PF_UNLOCK(); 741 if (e != 0) 742 goto done; 743 744 offset += mlen * count; 745 } 746 747 done: 748 m_freem(m); 749 return IPPROTO_DONE; 750 } 751 752 int 753 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 754 { 755 struct pfsync_clr *clr; 756 struct pf_state *st, *nexts; 757 struct pfi_kif *kif; 758 u_int32_t creatorid; 759 int i; 760 761 for (i = 0; i < count; i++) { 762 clr = (struct pfsync_clr *)buf + len * i; 763 kif = NULL; 764 creatorid = clr->creatorid; 765 if (strlen(clr->ifname) && 766 (kif = pfi_kif_find(clr->ifname)) == NULL) 767 continue; 768 769 for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { 770 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 771 if (st->creatorid == creatorid && 772 ((kif && st->kif == kif) || !kif)) { 773 SET(st->state_flags, PFSTATE_NOSYNC); 774 pf_remove_state(st); 775 } 776 } 777 } 778 779 return (0); 780 } 781 782 int 783 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 784 { 785 struct pfsync_state *sp; 786 sa_family_t af1, af2; 787 int i; 788 789 for (i = 0; i < count; i++) { 790 sp = (struct pfsync_state *)(buf + len * i); 791 af1 = sp->key[0].af; 792 af2 = sp->key[1].af; 793 794 /* check for invalid values */ 795 if (sp->timeout >= PFTM_MAX || 796 sp->src.state > PF_TCPS_PROXY_DST || 797 sp->dst.state > PF_TCPS_PROXY_DST || 798 sp->direction > PF_OUT || 799 (((af1 || af2) && 800 ((af1 != AF_INET && af1 != AF_INET6) || 801 (af2 != AF_INET && af2 != AF_INET6))) || 802 (sp->af != AF_INET && sp->af != AF_INET6))) { 803 DPFPRINTF(LOG_NOTICE, 804 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 805 pfsyncstat_inc(pfsyncs_badval); 806 continue; 807 } 808 809 if (pfsync_state_import(sp, flags) == ENOMEM) { 810 /* drop out, but process the rest of the actions */ 811 break; 812 } 813 } 814 815 return (0); 816 } 817 818 int 819 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 820 { 821 struct pfsync_ins_ack *ia; 822 struct pf_state_cmp id_key; 823 struct pf_state *st; 824 int i; 825 826 for (i = 0; i < count; i++) { 827 ia = (struct pfsync_ins_ack *)(buf + len * i); 828 829 id_key.id = ia->id; 830 id_key.creatorid = ia->creatorid; 831 832 st = pf_find_state_byid(&id_key); 833 if (st == NULL) 834 continue; 835 836 if (ISSET(st->state_flags, PFSTATE_ACK)) 837 pfsync_deferred(st, 0); 838 } 839 840 return (0); 841 } 842 843 int 844 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 845 struct pfsync_state_peer *dst) 846 { 847 int sync = 0; 848 849 /* 850 * The state should never go backwards except 851 * for syn-proxy states. Neither should the 852 * sequence window slide backwards. 853 */ 854 if ((st->src.state > src->state && 855 (st->src.state < PF_TCPS_PROXY_SRC || 856 src->state >= PF_TCPS_PROXY_SRC)) || 857 858 (st->src.state == src->state && 859 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 860 sync++; 861 else 862 pf_state_peer_ntoh(src, &st->src); 863 864 if ((st->dst.state > dst->state) || 865 866 (st->dst.state >= TCPS_SYN_SENT && 867 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 868 sync++; 869 else 870 pf_state_peer_ntoh(dst, &st->dst); 871 872 return (sync); 873 } 874 875 int 876 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 877 { 878 struct pfsync_state *sp; 879 struct pf_state_cmp id_key; 880 struct pf_state *st; 881 int sync; 882 883 int i; 884 885 for (i = 0; i < count; i++) { 886 sp = (struct pfsync_state *)(buf + len * i); 887 888 /* check for invalid values */ 889 if (sp->timeout >= PFTM_MAX || 890 sp->src.state > PF_TCPS_PROXY_DST || 891 sp->dst.state > PF_TCPS_PROXY_DST) { 892 DPFPRINTF(LOG_NOTICE, 893 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 894 pfsyncstat_inc(pfsyncs_badval); 895 continue; 896 } 897 898 id_key.id = sp->id; 899 id_key.creatorid = sp->creatorid; 900 901 st = pf_find_state_byid(&id_key); 902 if (st == NULL) { 903 /* insert the update */ 904 if (pfsync_state_import(sp, flags)) 905 pfsyncstat_inc(pfsyncs_badstate); 906 continue; 907 } 908 909 if (ISSET(st->state_flags, PFSTATE_ACK)) 910 pfsync_deferred(st, 1); 911 912 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 913 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 914 else { 915 sync = 0; 916 917 /* 918 * Non-TCP protocol state machine always go 919 * forwards 920 */ 921 if (st->src.state > sp->src.state) 922 sync++; 923 else 924 pf_state_peer_ntoh(&sp->src, &st->src); 925 926 if (st->dst.state > sp->dst.state) 927 sync++; 928 else 929 pf_state_peer_ntoh(&sp->dst, &st->dst); 930 } 931 932 if (sync < 2) { 933 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 934 pf_state_peer_ntoh(&sp->dst, &st->dst); 935 st->expire = time_uptime; 936 st->timeout = sp->timeout; 937 } 938 st->pfsync_time = time_uptime; 939 940 if (sync) { 941 pfsyncstat_inc(pfsyncs_stale); 942 943 pfsync_update_state(st); 944 schednetisr(NETISR_PFSYNC); 945 } 946 } 947 948 return (0); 949 } 950 951 int 952 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 953 { 954 struct pfsync_upd_c *up; 955 struct pf_state_cmp id_key; 956 struct pf_state *st; 957 958 int sync; 959 960 int i; 961 962 for (i = 0; i < count; i++) { 963 up = (struct pfsync_upd_c *)(buf + len * i); 964 965 /* check for invalid values */ 966 if (up->timeout >= PFTM_MAX || 967 up->src.state > PF_TCPS_PROXY_DST || 968 up->dst.state > PF_TCPS_PROXY_DST) { 969 DPFPRINTF(LOG_NOTICE, 970 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 971 pfsyncstat_inc(pfsyncs_badval); 972 continue; 973 } 974 975 id_key.id = up->id; 976 id_key.creatorid = up->creatorid; 977 978 st = pf_find_state_byid(&id_key); 979 if (st == NULL) { 980 /* We don't have this state. Ask for it. */ 981 pfsync_request_update(id_key.creatorid, id_key.id); 982 continue; 983 } 984 985 if (ISSET(st->state_flags, PFSTATE_ACK)) 986 pfsync_deferred(st, 1); 987 988 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 989 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 990 else { 991 sync = 0; 992 /* 993 * Non-TCP protocol state machine always go 994 * forwards 995 */ 996 if (st->src.state > up->src.state) 997 sync++; 998 else 999 pf_state_peer_ntoh(&up->src, &st->src); 1000 1001 if (st->dst.state > up->dst.state) 1002 sync++; 1003 else 1004 pf_state_peer_ntoh(&up->dst, &st->dst); 1005 } 1006 if (sync < 2) { 1007 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1008 pf_state_peer_ntoh(&up->dst, &st->dst); 1009 st->expire = time_uptime; 1010 st->timeout = up->timeout; 1011 } 1012 st->pfsync_time = time_uptime; 1013 1014 if (sync) { 1015 pfsyncstat_inc(pfsyncs_stale); 1016 1017 pfsync_update_state(st); 1018 schednetisr(NETISR_PFSYNC); 1019 } 1020 } 1021 1022 return (0); 1023 } 1024 1025 int 1026 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1027 { 1028 struct pfsync_upd_req *ur; 1029 int i; 1030 1031 struct pf_state_cmp id_key; 1032 struct pf_state *st; 1033 1034 for (i = 0; i < count; i++) { 1035 ur = (struct pfsync_upd_req *)(buf + len * i); 1036 1037 id_key.id = ur->id; 1038 id_key.creatorid = ur->creatorid; 1039 1040 if (id_key.id == 0 && id_key.creatorid == 0) 1041 pfsync_bulk_start(); 1042 else { 1043 st = pf_find_state_byid(&id_key); 1044 if (st == NULL) { 1045 pfsyncstat_inc(pfsyncs_badstate); 1046 continue; 1047 } 1048 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1049 continue; 1050 1051 pfsync_update_state_req(st); 1052 } 1053 } 1054 1055 return (0); 1056 } 1057 1058 int 1059 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1060 { 1061 struct pfsync_state *sp; 1062 struct pf_state_cmp id_key; 1063 struct pf_state *st; 1064 int i; 1065 1066 for (i = 0; i < count; i++) { 1067 sp = (struct pfsync_state *)(buf + len * i); 1068 1069 id_key.id = sp->id; 1070 id_key.creatorid = sp->creatorid; 1071 1072 st = pf_find_state_byid(&id_key); 1073 if (st == NULL) { 1074 pfsyncstat_inc(pfsyncs_badstate); 1075 continue; 1076 } 1077 SET(st->state_flags, PFSTATE_NOSYNC); 1078 pf_remove_state(st); 1079 } 1080 1081 return (0); 1082 } 1083 1084 int 1085 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1086 { 1087 struct pfsync_del_c *sp; 1088 struct pf_state_cmp id_key; 1089 struct pf_state *st; 1090 int i; 1091 1092 for (i = 0; i < count; i++) { 1093 sp = (struct pfsync_del_c *)(buf + len * i); 1094 1095 id_key.id = sp->id; 1096 id_key.creatorid = sp->creatorid; 1097 1098 st = pf_find_state_byid(&id_key); 1099 if (st == NULL) { 1100 pfsyncstat_inc(pfsyncs_badstate); 1101 continue; 1102 } 1103 1104 SET(st->state_flags, PFSTATE_NOSYNC); 1105 pf_remove_state(st); 1106 } 1107 1108 return (0); 1109 } 1110 1111 int 1112 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1113 { 1114 struct pfsync_softc *sc = pfsyncif; 1115 struct pfsync_bus *bus; 1116 1117 /* If we're not waiting for a bulk update, who cares. */ 1118 if (sc->sc_ureq_sent == 0) 1119 return (0); 1120 1121 bus = (struct pfsync_bus *)buf; 1122 1123 switch (bus->status) { 1124 case PFSYNC_BUS_START: 1125 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1126 pf_pool_limits[PF_LIMIT_STATES].limit / 1127 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1128 sizeof(struct pfsync_state))); 1129 DPFPRINTF(LOG_INFO, "received bulk update start"); 1130 break; 1131 1132 case PFSYNC_BUS_END: 1133 if (time_uptime - ntohl(bus->endtime) >= 1134 sc->sc_ureq_sent) { 1135 /* that's it, we're happy */ 1136 sc->sc_ureq_sent = 0; 1137 sc->sc_bulk_tries = 0; 1138 timeout_del(&sc->sc_bulkfail_tmo); 1139 #if NCARP > 0 1140 if (!pfsync_sync_ok) 1141 carp_group_demote_adj(&sc->sc_if, -1, 1142 sc->sc_link_demoted ? 1143 "pfsync link state up" : 1144 "pfsync bulk done"); 1145 if (sc->sc_initial_bulk) { 1146 carp_group_demote_adj(&sc->sc_if, -32, 1147 "pfsync init"); 1148 sc->sc_initial_bulk = 0; 1149 } 1150 #endif 1151 pfsync_sync_ok = 1; 1152 sc->sc_link_demoted = 0; 1153 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1154 } else { 1155 DPFPRINTF(LOG_WARNING, "received invalid " 1156 "bulk update end: bad timestamp"); 1157 } 1158 break; 1159 } 1160 1161 return (0); 1162 } 1163 1164 int 1165 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1166 { 1167 #if defined(IPSEC) 1168 struct pfsync_tdb *tp; 1169 int i; 1170 1171 for (i = 0; i < count; i++) { 1172 tp = (struct pfsync_tdb *)(buf + len * i); 1173 pfsync_update_net_tdb(tp); 1174 } 1175 #endif 1176 1177 return (0); 1178 } 1179 1180 #if defined(IPSEC) 1181 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1182 void 1183 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1184 { 1185 struct tdb *tdb; 1186 1187 NET_ASSERT_LOCKED(); 1188 1189 /* check for invalid values */ 1190 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1191 (pt->dst.sa.sa_family != AF_INET && 1192 pt->dst.sa.sa_family != AF_INET6)) 1193 goto bad; 1194 1195 tdb = gettdb(ntohs(pt->rdomain), pt->spi, 1196 (union sockaddr_union *)&pt->dst, pt->sproto); 1197 if (tdb) { 1198 pt->rpl = betoh64(pt->rpl); 1199 pt->cur_bytes = betoh64(pt->cur_bytes); 1200 1201 /* Neither replay nor byte counter should ever decrease. */ 1202 if (pt->rpl < tdb->tdb_rpl || 1203 pt->cur_bytes < tdb->tdb_cur_bytes) { 1204 goto bad; 1205 } 1206 1207 tdb->tdb_rpl = pt->rpl; 1208 tdb->tdb_cur_bytes = pt->cur_bytes; 1209 } 1210 return; 1211 1212 bad: 1213 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1214 "invalid value"); 1215 pfsyncstat_inc(pfsyncs_badstate); 1216 return; 1217 } 1218 #endif 1219 1220 1221 int 1222 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1223 { 1224 if (len > 0 || count > 0) 1225 pfsyncstat_inc(pfsyncs_badact); 1226 1227 /* we're done. let the caller return */ 1228 return (1); 1229 } 1230 1231 int 1232 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1233 { 1234 pfsyncstat_inc(pfsyncs_badact); 1235 return (-1); 1236 } 1237 1238 int 1239 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1240 struct rtentry *rt) 1241 { 1242 m_freem(m); /* drop packet */ 1243 return (EAFNOSUPPORT); 1244 } 1245 1246 int 1247 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1248 { 1249 struct proc *p = curproc; 1250 struct pfsync_softc *sc = ifp->if_softc; 1251 struct ifreq *ifr = (struct ifreq *)data; 1252 struct ip_moptions *imo = &sc->sc_imo; 1253 struct pfsyncreq pfsyncr; 1254 struct ifnet *sifp; 1255 struct ip *ip; 1256 int error; 1257 1258 switch (cmd) { 1259 case SIOCSIFFLAGS: 1260 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1261 (ifp->if_flags & IFF_UP)) { 1262 ifp->if_flags |= IFF_RUNNING; 1263 1264 #if NCARP > 0 1265 sc->sc_initial_bulk = 1; 1266 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1267 #endif 1268 1269 pfsync_request_full_update(sc); 1270 } 1271 if ((ifp->if_flags & IFF_RUNNING) && 1272 (ifp->if_flags & IFF_UP) == 0) { 1273 ifp->if_flags &= ~IFF_RUNNING; 1274 1275 /* drop everything */ 1276 timeout_del(&sc->sc_tmo); 1277 pfsync_drop(sc); 1278 1279 pfsync_cancel_full_update(sc); 1280 } 1281 break; 1282 case SIOCSIFMTU: 1283 if (!sc->sc_sync_if || 1284 ifr->ifr_mtu <= PFSYNC_MINPKT || 1285 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1286 return (EINVAL); 1287 if (ifr->ifr_mtu < ifp->if_mtu) 1288 pfsync_sendout(); 1289 ifp->if_mtu = ifr->ifr_mtu; 1290 break; 1291 case SIOCGETPFSYNC: 1292 bzero(&pfsyncr, sizeof(pfsyncr)); 1293 if (sc->sc_sync_if) { 1294 strlcpy(pfsyncr.pfsyncr_syncdev, 1295 sc->sc_sync_if->if_xname, IFNAMSIZ); 1296 } 1297 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1298 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1299 pfsyncr.pfsyncr_defer = sc->sc_defer; 1300 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1301 1302 case SIOCSETPFSYNC: 1303 if ((error = suser(p, 0)) != 0) 1304 return (error); 1305 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1306 return (error); 1307 1308 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1309 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1310 else 1311 sc->sc_sync_peer.s_addr = 1312 pfsyncr.pfsyncr_syncpeer.s_addr; 1313 1314 if (pfsyncr.pfsyncr_maxupdates > 255) 1315 return (EINVAL); 1316 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1317 1318 sc->sc_defer = pfsyncr.pfsyncr_defer; 1319 1320 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1321 if (sc->sc_sync_if) { 1322 hook_disestablish( 1323 sc->sc_sync_if->if_linkstatehooks, 1324 sc->sc_lhcookie); 1325 hook_disestablish( 1326 sc->sc_sync_if->if_detachhooks, 1327 sc->sc_dhcookie); 1328 } 1329 sc->sc_sync_if = NULL; 1330 if (imo->imo_num_memberships > 0) { 1331 in_delmulti(imo->imo_membership[ 1332 --imo->imo_num_memberships]); 1333 imo->imo_ifidx = 0; 1334 } 1335 break; 1336 } 1337 1338 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) 1339 return (EINVAL); 1340 1341 if (sifp->if_mtu < sc->sc_if.if_mtu || 1342 (sc->sc_sync_if != NULL && 1343 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1344 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1345 pfsync_sendout(); 1346 1347 if (sc->sc_sync_if) { 1348 hook_disestablish( 1349 sc->sc_sync_if->if_linkstatehooks, 1350 sc->sc_lhcookie); 1351 hook_disestablish( 1352 sc->sc_sync_if->if_detachhooks, 1353 sc->sc_dhcookie); 1354 } 1355 sc->sc_sync_if = sifp; 1356 1357 if (imo->imo_num_memberships > 0) { 1358 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1359 imo->imo_ifidx = 0; 1360 } 1361 1362 if (sc->sc_sync_if && 1363 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1364 struct in_addr addr; 1365 1366 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1367 sc->sc_sync_if = NULL; 1368 return (EADDRNOTAVAIL); 1369 } 1370 1371 addr.s_addr = INADDR_PFSYNC_GROUP; 1372 1373 if ((imo->imo_membership[0] = 1374 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1375 sc->sc_sync_if = NULL; 1376 return (ENOBUFS); 1377 } 1378 imo->imo_num_memberships++; 1379 imo->imo_ifidx = sc->sc_sync_if->if_index; 1380 imo->imo_ttl = PFSYNC_DFLTTL; 1381 imo->imo_loop = 0; 1382 } 1383 1384 ip = &sc->sc_template; 1385 bzero(ip, sizeof(*ip)); 1386 ip->ip_v = IPVERSION; 1387 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1388 ip->ip_tos = IPTOS_LOWDELAY; 1389 /* len and id are set later */ 1390 ip->ip_off = htons(IP_DF); 1391 ip->ip_ttl = PFSYNC_DFLTTL; 1392 ip->ip_p = IPPROTO_PFSYNC; 1393 ip->ip_src.s_addr = INADDR_ANY; 1394 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1395 1396 sc->sc_lhcookie = 1397 hook_establish(sc->sc_sync_if->if_linkstatehooks, 1, 1398 pfsync_syncdev_state, sc); 1399 sc->sc_dhcookie = hook_establish(sc->sc_sync_if->if_detachhooks, 1400 0, pfsync_ifdetach, sc); 1401 1402 pfsync_request_full_update(sc); 1403 1404 break; 1405 1406 default: 1407 return (ENOTTY); 1408 } 1409 1410 return (0); 1411 } 1412 1413 void 1414 pfsync_out_state(struct pf_state *st, void *buf) 1415 { 1416 struct pfsync_state *sp = buf; 1417 1418 pfsync_state_export(sp, st); 1419 } 1420 1421 void 1422 pfsync_out_iack(struct pf_state *st, void *buf) 1423 { 1424 struct pfsync_ins_ack *iack = buf; 1425 1426 iack->id = st->id; 1427 iack->creatorid = st->creatorid; 1428 } 1429 1430 void 1431 pfsync_out_upd_c(struct pf_state *st, void *buf) 1432 { 1433 struct pfsync_upd_c *up = buf; 1434 1435 bzero(up, sizeof(*up)); 1436 up->id = st->id; 1437 pf_state_peer_hton(&st->src, &up->src); 1438 pf_state_peer_hton(&st->dst, &up->dst); 1439 up->creatorid = st->creatorid; 1440 up->timeout = st->timeout; 1441 } 1442 1443 void 1444 pfsync_out_del(struct pf_state *st, void *buf) 1445 { 1446 struct pfsync_del_c *dp = buf; 1447 1448 dp->id = st->id; 1449 dp->creatorid = st->creatorid; 1450 1451 SET(st->state_flags, PFSTATE_NOSYNC); 1452 } 1453 1454 void 1455 pfsync_drop(struct pfsync_softc *sc) 1456 { 1457 struct pf_state *st; 1458 struct pfsync_upd_req_item *ur; 1459 struct tdb *t; 1460 int q; 1461 1462 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1463 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1464 continue; 1465 1466 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1467 #ifdef PFSYNC_DEBUG 1468 KASSERT(st->sync_state == q); 1469 #endif 1470 st->sync_state = PFSYNC_S_NONE; 1471 } 1472 TAILQ_INIT(&sc->sc_qs[q]); 1473 } 1474 1475 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1476 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1477 pool_put(&sc->sc_pool, ur); 1478 } 1479 1480 sc->sc_plus = NULL; 1481 1482 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1483 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) 1484 CLR(t->tdb_flags, TDBF_PFSYNC); 1485 1486 TAILQ_INIT(&sc->sc_tdb_q); 1487 } 1488 1489 sc->sc_len = PFSYNC_MINPKT; 1490 } 1491 1492 void 1493 pfsync_sendout(void) 1494 { 1495 struct pfsync_softc *sc = pfsyncif; 1496 #if NBPFILTER > 0 1497 struct ifnet *ifp = &sc->sc_if; 1498 #endif 1499 struct mbuf *m; 1500 struct ip *ip; 1501 struct pfsync_header *ph; 1502 struct pfsync_subheader *subh; 1503 struct pf_state *st; 1504 struct pfsync_upd_req_item *ur; 1505 struct tdb *t; 1506 1507 int offset; 1508 int q, count = 0; 1509 1510 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1511 return; 1512 1513 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1514 #if NBPFILTER > 0 1515 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1516 #else 1517 sc->sc_sync_if == NULL) { 1518 #endif 1519 pfsync_drop(sc); 1520 return; 1521 } 1522 1523 MGETHDR(m, M_DONTWAIT, MT_DATA); 1524 if (m == NULL) { 1525 sc->sc_if.if_oerrors++; 1526 pfsyncstat_inc(pfsyncs_onomem); 1527 pfsync_drop(sc); 1528 return; 1529 } 1530 1531 if (max_linkhdr + sc->sc_len > MHLEN) { 1532 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1533 if (!ISSET(m->m_flags, M_EXT)) { 1534 m_free(m); 1535 sc->sc_if.if_oerrors++; 1536 pfsyncstat_inc(pfsyncs_onomem); 1537 pfsync_drop(sc); 1538 return; 1539 } 1540 } 1541 m->m_data += max_linkhdr; 1542 m->m_len = m->m_pkthdr.len = sc->sc_len; 1543 1544 /* build the ip header */ 1545 ip = mtod(m, struct ip *); 1546 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1547 offset = sizeof(*ip); 1548 1549 ip->ip_len = htons(m->m_pkthdr.len); 1550 ip->ip_id = htons(ip_randomid()); 1551 1552 /* build the pfsync header */ 1553 ph = (struct pfsync_header *)(m->m_data + offset); 1554 bzero(ph, sizeof(*ph)); 1555 offset += sizeof(*ph); 1556 1557 ph->version = PFSYNC_VERSION; 1558 ph->len = htons(sc->sc_len - sizeof(*ip)); 1559 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1560 1561 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1562 subh = (struct pfsync_subheader *)(m->m_data + offset); 1563 offset += sizeof(*subh); 1564 1565 count = 0; 1566 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1567 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1568 1569 bcopy(&ur->ur_msg, m->m_data + offset, 1570 sizeof(ur->ur_msg)); 1571 offset += sizeof(ur->ur_msg); 1572 1573 pool_put(&sc->sc_pool, ur); 1574 1575 count++; 1576 } 1577 1578 bzero(subh, sizeof(*subh)); 1579 subh->len = sizeof(ur->ur_msg) >> 2; 1580 subh->action = PFSYNC_ACT_UPD_REQ; 1581 subh->count = htons(count); 1582 } 1583 1584 /* has someone built a custom region for us to add? */ 1585 if (sc->sc_plus != NULL) { 1586 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1587 offset += sc->sc_pluslen; 1588 1589 sc->sc_plus = NULL; 1590 } 1591 1592 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1593 subh = (struct pfsync_subheader *)(m->m_data + offset); 1594 offset += sizeof(*subh); 1595 1596 count = 0; 1597 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { 1598 pfsync_out_tdb(t, m->m_data + offset); 1599 offset += sizeof(struct pfsync_tdb); 1600 CLR(t->tdb_flags, TDBF_PFSYNC); 1601 1602 count++; 1603 } 1604 TAILQ_INIT(&sc->sc_tdb_q); 1605 1606 bzero(subh, sizeof(*subh)); 1607 subh->action = PFSYNC_ACT_TDB; 1608 subh->len = sizeof(struct pfsync_tdb) >> 2; 1609 subh->count = htons(count); 1610 } 1611 1612 /* walk the queues */ 1613 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1614 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1615 continue; 1616 1617 subh = (struct pfsync_subheader *)(m->m_data + offset); 1618 offset += sizeof(*subh); 1619 1620 count = 0; 1621 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1622 #ifdef PFSYNC_DEBUG 1623 KASSERT(st->sync_state == q); 1624 #endif 1625 pfsync_qs[q].write(st, m->m_data + offset); 1626 offset += pfsync_qs[q].len; 1627 1628 st->sync_state = PFSYNC_S_NONE; 1629 count++; 1630 } 1631 TAILQ_INIT(&sc->sc_qs[q]); 1632 1633 bzero(subh, sizeof(*subh)); 1634 subh->action = pfsync_qs[q].action; 1635 subh->len = pfsync_qs[q].len >> 2; 1636 subh->count = htons(count); 1637 } 1638 1639 /* we're done, let's put it on the wire */ 1640 #if NBPFILTER > 0 1641 if (ifp->if_bpf) { 1642 m->m_data += sizeof(*ip); 1643 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1644 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1645 m->m_data -= sizeof(*ip); 1646 m->m_len = m->m_pkthdr.len = sc->sc_len; 1647 } 1648 1649 if (sc->sc_sync_if == NULL) { 1650 sc->sc_len = PFSYNC_MINPKT; 1651 m_freem(m); 1652 return; 1653 } 1654 #endif 1655 1656 /* start again */ 1657 sc->sc_len = PFSYNC_MINPKT; 1658 1659 sc->sc_if.if_opackets++; 1660 sc->sc_if.if_obytes += m->m_pkthdr.len; 1661 1662 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1663 1664 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0) 1665 pfsyncstat_inc(pfsyncs_opackets); 1666 else 1667 pfsyncstat_inc(pfsyncs_oerrors); 1668 } 1669 1670 void 1671 pfsync_insert_state(struct pf_state *st) 1672 { 1673 struct pfsync_softc *sc = pfsyncif; 1674 1675 NET_ASSERT_LOCKED(); 1676 1677 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1678 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1679 SET(st->state_flags, PFSTATE_NOSYNC); 1680 return; 1681 } 1682 1683 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1684 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1685 return; 1686 1687 #ifdef PFSYNC_DEBUG 1688 KASSERT(st->sync_state == PFSYNC_S_NONE); 1689 #endif 1690 1691 if (sc->sc_len == PFSYNC_MINPKT) 1692 timeout_add_sec(&sc->sc_tmo, 1); 1693 1694 pfsync_q_ins(st, PFSYNC_S_INS); 1695 1696 st->sync_updates = 0; 1697 } 1698 1699 int 1700 pfsync_defer(struct pf_state *st, struct mbuf *m) 1701 { 1702 struct pfsync_softc *sc = pfsyncif; 1703 struct pfsync_deferral *pd; 1704 1705 NET_ASSERT_LOCKED(); 1706 1707 if (!sc->sc_defer || 1708 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1709 m->m_flags & (M_BCAST|M_MCAST)) 1710 return (0); 1711 1712 if (sc->sc_deferred >= 128) { 1713 pd = TAILQ_FIRST(&sc->sc_deferrals); 1714 if (timeout_del(&pd->pd_tmo)) 1715 pfsync_undefer(pd, 0); 1716 } 1717 1718 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1719 if (pd == NULL) 1720 return (0); 1721 1722 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1723 SET(st->state_flags, PFSTATE_ACK); 1724 1725 pd->pd_st = st; 1726 pd->pd_m = m; 1727 1728 sc->sc_deferred++; 1729 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1730 1731 timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd); 1732 timeout_add_msec(&pd->pd_tmo, 20); 1733 1734 schednetisr(NETISR_PFSYNC); 1735 1736 return (1); 1737 } 1738 1739 void 1740 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1741 { 1742 struct pfsync_softc *sc = pfsyncif; 1743 struct pf_pdesc pdesc; 1744 1745 NET_ASSERT_LOCKED(); 1746 1747 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1748 sc->sc_deferred--; 1749 1750 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1751 if (drop) 1752 m_freem(pd->pd_m); 1753 else { 1754 if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) { 1755 if (pf_setup_pdesc(&pdesc, 1756 pd->pd_st->key[PF_SK_WIRE]->af, 1757 pd->pd_st->direction, pd->pd_st->rt_kif, 1758 pd->pd_m, NULL) != PF_PASS) { 1759 m_freem(pd->pd_m); 1760 goto out; 1761 } 1762 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1763 case AF_INET: 1764 pf_route(&pdesc, 1765 pd->pd_st->rule.ptr, pd->pd_st); 1766 break; 1767 #ifdef INET6 1768 case AF_INET6: 1769 pf_route6(&pdesc, 1770 pd->pd_st->rule.ptr, pd->pd_st); 1771 break; 1772 #endif /* INET6 */ 1773 } 1774 pd->pd_m = pdesc.m; 1775 } else { 1776 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1777 case AF_INET: 1778 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 1779 0); 1780 break; 1781 #ifdef INET6 1782 case AF_INET6: 1783 ip6_output(pd->pd_m, NULL, NULL, 0, 1784 NULL, NULL); 1785 break; 1786 #endif /* INET6 */ 1787 } 1788 } 1789 } 1790 out: 1791 pool_put(&sc->sc_pool, pd); 1792 } 1793 1794 void 1795 pfsync_defer_tmo(void *arg) 1796 { 1797 NET_LOCK(); 1798 pfsync_undefer(arg, 0); 1799 NET_UNLOCK(); 1800 } 1801 1802 void 1803 pfsync_deferred(struct pf_state *st, int drop) 1804 { 1805 struct pfsync_softc *sc = pfsyncif; 1806 struct pfsync_deferral *pd; 1807 1808 NET_ASSERT_LOCKED(); 1809 1810 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1811 if (pd->pd_st == st) { 1812 if (timeout_del(&pd->pd_tmo)) 1813 pfsync_undefer(pd, drop); 1814 return; 1815 } 1816 } 1817 1818 panic("pfsync_deferred: unable to find deferred state"); 1819 } 1820 1821 void 1822 pfsync_update_state(struct pf_state *st) 1823 { 1824 struct pfsync_softc *sc = pfsyncif; 1825 int sync = 0; 1826 1827 NET_ASSERT_LOCKED(); 1828 1829 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1830 return; 1831 1832 if (ISSET(st->state_flags, PFSTATE_ACK)) 1833 pfsync_deferred(st, 0); 1834 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1835 if (st->sync_state != PFSYNC_S_NONE) 1836 pfsync_q_del(st); 1837 return; 1838 } 1839 1840 if (sc->sc_len == PFSYNC_MINPKT) 1841 timeout_add_sec(&sc->sc_tmo, 1); 1842 1843 switch (st->sync_state) { 1844 case PFSYNC_S_UPD_C: 1845 case PFSYNC_S_UPD: 1846 case PFSYNC_S_INS: 1847 /* we're already handling it */ 1848 1849 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1850 st->sync_updates++; 1851 if (st->sync_updates >= sc->sc_maxupdates) 1852 sync = 1; 1853 } 1854 break; 1855 1856 case PFSYNC_S_IACK: 1857 pfsync_q_del(st); 1858 case PFSYNC_S_NONE: 1859 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1860 st->sync_updates = 0; 1861 break; 1862 1863 default: 1864 panic("pfsync_update_state: unexpected sync state %d", 1865 st->sync_state); 1866 } 1867 1868 if (sync || (time_uptime - st->pfsync_time) < 2) 1869 schednetisr(NETISR_PFSYNC); 1870 } 1871 1872 void 1873 pfsync_cancel_full_update(struct pfsync_softc *sc) 1874 { 1875 if (timeout_pending(&sc->sc_bulkfail_tmo) || 1876 timeout_pending(&sc->sc_bulk_tmo)) { 1877 #if NCARP > 0 1878 if (!pfsync_sync_ok) 1879 carp_group_demote_adj(&sc->sc_if, -1, 1880 "pfsync bulk cancelled"); 1881 if (sc->sc_initial_bulk) { 1882 carp_group_demote_adj(&sc->sc_if, -32, 1883 "pfsync init"); 1884 sc->sc_initial_bulk = 0; 1885 } 1886 #endif 1887 pfsync_sync_ok = 1; 1888 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 1889 } 1890 timeout_del(&sc->sc_bulkfail_tmo); 1891 timeout_del(&sc->sc_bulk_tmo); 1892 sc->sc_bulk_next = NULL; 1893 sc->sc_bulk_last = NULL; 1894 sc->sc_ureq_sent = 0; 1895 sc->sc_bulk_tries = 0; 1896 } 1897 1898 void 1899 pfsync_request_full_update(struct pfsync_softc *sc) 1900 { 1901 if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 1902 /* Request a full state table update. */ 1903 sc->sc_ureq_sent = time_uptime; 1904 #if NCARP > 0 1905 if (!sc->sc_link_demoted && pfsync_sync_ok) 1906 carp_group_demote_adj(&sc->sc_if, 1, 1907 "pfsync bulk start"); 1908 #endif 1909 pfsync_sync_ok = 0; 1910 DPFPRINTF(LOG_INFO, "requesting bulk update"); 1911 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1912 pf_pool_limits[PF_LIMIT_STATES].limit / 1913 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1914 sizeof(struct pfsync_state))); 1915 pfsync_request_update(0, 0); 1916 } 1917 } 1918 1919 void 1920 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1921 { 1922 struct pfsync_softc *sc = pfsyncif; 1923 struct pfsync_upd_req_item *item; 1924 size_t nlen = sizeof(struct pfsync_upd_req); 1925 1926 /* 1927 * this code does nothing to prevent multiple update requests for the 1928 * same state being generated. 1929 */ 1930 1931 item = pool_get(&sc->sc_pool, PR_NOWAIT); 1932 if (item == NULL) { 1933 /* XXX stats */ 1934 return; 1935 } 1936 1937 item->ur_msg.id = id; 1938 item->ur_msg.creatorid = creatorid; 1939 1940 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1941 nlen += sizeof(struct pfsync_subheader); 1942 1943 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 1944 pfsync_sendout(); 1945 1946 nlen = sizeof(struct pfsync_subheader) + 1947 sizeof(struct pfsync_upd_req); 1948 } 1949 1950 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1951 sc->sc_len += nlen; 1952 1953 schednetisr(NETISR_PFSYNC); 1954 } 1955 1956 void 1957 pfsync_update_state_req(struct pf_state *st) 1958 { 1959 struct pfsync_softc *sc = pfsyncif; 1960 1961 if (sc == NULL) 1962 panic("pfsync_update_state_req: nonexistant instance"); 1963 1964 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1965 if (st->sync_state != PFSYNC_S_NONE) 1966 pfsync_q_del(st); 1967 return; 1968 } 1969 1970 switch (st->sync_state) { 1971 case PFSYNC_S_UPD_C: 1972 case PFSYNC_S_IACK: 1973 pfsync_q_del(st); 1974 case PFSYNC_S_NONE: 1975 pfsync_q_ins(st, PFSYNC_S_UPD); 1976 schednetisr(NETISR_PFSYNC); 1977 return; 1978 1979 case PFSYNC_S_INS: 1980 case PFSYNC_S_UPD: 1981 case PFSYNC_S_DEL: 1982 /* we're already handling it */ 1983 return; 1984 1985 default: 1986 panic("pfsync_update_state_req: unexpected sync state %d", 1987 st->sync_state); 1988 } 1989 } 1990 1991 void 1992 pfsync_delete_state(struct pf_state *st) 1993 { 1994 struct pfsync_softc *sc = pfsyncif; 1995 1996 NET_ASSERT_LOCKED(); 1997 1998 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1999 return; 2000 2001 if (ISSET(st->state_flags, PFSTATE_ACK)) 2002 pfsync_deferred(st, 1); 2003 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2004 if (st->sync_state != PFSYNC_S_NONE) 2005 pfsync_q_del(st); 2006 return; 2007 } 2008 2009 if (sc->sc_len == PFSYNC_MINPKT) 2010 timeout_add_sec(&sc->sc_tmo, 1); 2011 2012 switch (st->sync_state) { 2013 case PFSYNC_S_INS: 2014 /* we never got to tell the world so just forget about it */ 2015 pfsync_q_del(st); 2016 return; 2017 2018 case PFSYNC_S_UPD_C: 2019 case PFSYNC_S_UPD: 2020 case PFSYNC_S_IACK: 2021 pfsync_q_del(st); 2022 /* FALLTHROUGH to putting it on the del list */ 2023 2024 case PFSYNC_S_NONE: 2025 pfsync_q_ins(st, PFSYNC_S_DEL); 2026 return; 2027 2028 default: 2029 panic("pfsync_delete_state: unexpected sync state %d", 2030 st->sync_state); 2031 } 2032 } 2033 2034 void 2035 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2036 { 2037 struct pfsync_softc *sc = pfsyncif; 2038 struct { 2039 struct pfsync_subheader subh; 2040 struct pfsync_clr clr; 2041 } __packed r; 2042 2043 NET_ASSERT_LOCKED(); 2044 2045 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2046 return; 2047 2048 bzero(&r, sizeof(r)); 2049 2050 r.subh.action = PFSYNC_ACT_CLR; 2051 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2052 r.subh.count = htons(1); 2053 2054 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2055 r.clr.creatorid = creatorid; 2056 2057 pfsync_send_plus(&r, sizeof(r)); 2058 } 2059 2060 void 2061 pfsync_q_ins(struct pf_state *st, int q) 2062 { 2063 struct pfsync_softc *sc = pfsyncif; 2064 size_t nlen = pfsync_qs[q].len; 2065 2066 KASSERT(st->sync_state == PFSYNC_S_NONE); 2067 2068 #if defined(PFSYNC_DEBUG) 2069 if (sc->sc_len < PFSYNC_MINPKT) 2070 panic("pfsync pkt len is too low %d", sc->sc_len); 2071 #endif 2072 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2073 nlen += sizeof(struct pfsync_subheader); 2074 2075 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2076 pfsync_sendout(); 2077 2078 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2079 } 2080 2081 sc->sc_len += nlen; 2082 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2083 st->sync_state = q; 2084 } 2085 2086 void 2087 pfsync_q_del(struct pf_state *st) 2088 { 2089 struct pfsync_softc *sc = pfsyncif; 2090 int q = st->sync_state; 2091 2092 KASSERT(st->sync_state != PFSYNC_S_NONE); 2093 2094 sc->sc_len -= pfsync_qs[q].len; 2095 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2096 st->sync_state = PFSYNC_S_NONE; 2097 2098 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2099 sc->sc_len -= sizeof(struct pfsync_subheader); 2100 } 2101 2102 void 2103 pfsync_update_tdb(struct tdb *t, int output) 2104 { 2105 struct pfsync_softc *sc = pfsyncif; 2106 size_t nlen = sizeof(struct pfsync_tdb); 2107 2108 if (sc == NULL) 2109 return; 2110 2111 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2112 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2113 nlen += sizeof(struct pfsync_subheader); 2114 2115 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2116 pfsync_sendout(); 2117 2118 nlen = sizeof(struct pfsync_subheader) + 2119 sizeof(struct pfsync_tdb); 2120 } 2121 2122 sc->sc_len += nlen; 2123 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2124 SET(t->tdb_flags, TDBF_PFSYNC); 2125 t->tdb_updates = 0; 2126 } else { 2127 if (++t->tdb_updates >= sc->sc_maxupdates) 2128 schednetisr(NETISR_PFSYNC); 2129 } 2130 2131 if (output) 2132 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2133 else 2134 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2135 } 2136 2137 void 2138 pfsync_delete_tdb(struct tdb *t) 2139 { 2140 struct pfsync_softc *sc = pfsyncif; 2141 2142 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2143 return; 2144 2145 sc->sc_len -= sizeof(struct pfsync_tdb); 2146 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2147 CLR(t->tdb_flags, TDBF_PFSYNC); 2148 2149 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2150 sc->sc_len -= sizeof(struct pfsync_subheader); 2151 } 2152 2153 void 2154 pfsync_out_tdb(struct tdb *t, void *buf) 2155 { 2156 struct pfsync_tdb *ut = buf; 2157 2158 bzero(ut, sizeof(*ut)); 2159 ut->spi = t->tdb_spi; 2160 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2161 /* 2162 * When a failover happens, the master's rpl is probably above 2163 * what we see here (we may be up to a second late), so 2164 * increase it a bit for outbound tdbs to manage most such 2165 * situations. 2166 * 2167 * For now, just add an offset that is likely to be larger 2168 * than the number of packets we can see in one second. The RFC 2169 * just says the next packet must have a higher seq value. 2170 * 2171 * XXX What is a good algorithm for this? We could use 2172 * a rate-determined increase, but to know it, we would have 2173 * to extend struct tdb. 2174 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2175 * will soon be replaced anyway. For now, just don't handle 2176 * this edge case. 2177 */ 2178 #define RPL_INCR 16384 2179 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2180 RPL_INCR : 0)); 2181 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2182 ut->sproto = t->tdb_sproto; 2183 ut->rdomain = htons(t->tdb_rdomain); 2184 } 2185 2186 void 2187 pfsync_bulk_start(void) 2188 { 2189 struct pfsync_softc *sc = pfsyncif; 2190 2191 DPFPRINTF(LOG_INFO, "received bulk update request"); 2192 2193 if (TAILQ_EMPTY(&state_list)) 2194 pfsync_bulk_status(PFSYNC_BUS_END); 2195 else { 2196 sc->sc_ureq_received = time_uptime; 2197 2198 if (sc->sc_bulk_next == NULL) 2199 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2200 sc->sc_bulk_last = sc->sc_bulk_next; 2201 2202 pfsync_bulk_status(PFSYNC_BUS_START); 2203 timeout_add(&sc->sc_bulk_tmo, 0); 2204 } 2205 } 2206 2207 void 2208 pfsync_bulk_update(void *arg) 2209 { 2210 struct pfsync_softc *sc = arg; 2211 struct pf_state *st; 2212 int i = 0; 2213 2214 NET_LOCK(); 2215 st = sc->sc_bulk_next; 2216 2217 for (;;) { 2218 if (st->sync_state == PFSYNC_S_NONE && 2219 st->timeout < PFTM_MAX && 2220 st->pfsync_time <= sc->sc_ureq_received) { 2221 pfsync_update_state_req(st); 2222 i++; 2223 } 2224 2225 st = TAILQ_NEXT(st, entry_list); 2226 if (st == NULL) 2227 st = TAILQ_FIRST(&state_list); 2228 2229 if (st == sc->sc_bulk_last) { 2230 /* we're done */ 2231 sc->sc_bulk_next = NULL; 2232 sc->sc_bulk_last = NULL; 2233 pfsync_bulk_status(PFSYNC_BUS_END); 2234 break; 2235 } 2236 2237 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2238 sizeof(struct pfsync_state)) { 2239 /* we've filled a packet */ 2240 sc->sc_bulk_next = st; 2241 timeout_add(&sc->sc_bulk_tmo, 1); 2242 break; 2243 } 2244 } 2245 NET_UNLOCK(); 2246 } 2247 2248 void 2249 pfsync_bulk_status(u_int8_t status) 2250 { 2251 struct { 2252 struct pfsync_subheader subh; 2253 struct pfsync_bus bus; 2254 } __packed r; 2255 2256 struct pfsync_softc *sc = pfsyncif; 2257 2258 bzero(&r, sizeof(r)); 2259 2260 r.subh.action = PFSYNC_ACT_BUS; 2261 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2262 r.subh.count = htons(1); 2263 2264 r.bus.creatorid = pf_status.hostid; 2265 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2266 r.bus.status = status; 2267 2268 pfsync_send_plus(&r, sizeof(r)); 2269 } 2270 2271 void 2272 pfsync_bulk_fail(void *arg) 2273 { 2274 struct pfsync_softc *sc = arg; 2275 2276 NET_LOCK(); 2277 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2278 /* Try again */ 2279 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2280 pfsync_request_update(0, 0); 2281 } else { 2282 /* Pretend like the transfer was ok */ 2283 sc->sc_ureq_sent = 0; 2284 sc->sc_bulk_tries = 0; 2285 #if NCARP > 0 2286 if (!pfsync_sync_ok) 2287 carp_group_demote_adj(&sc->sc_if, -1, 2288 sc->sc_link_demoted ? 2289 "pfsync link state up" : 2290 "pfsync bulk fail"); 2291 if (sc->sc_initial_bulk) { 2292 carp_group_demote_adj(&sc->sc_if, -32, 2293 "pfsync init"); 2294 sc->sc_initial_bulk = 0; 2295 } 2296 #endif 2297 pfsync_sync_ok = 1; 2298 sc->sc_link_demoted = 0; 2299 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2300 } 2301 NET_UNLOCK(); 2302 } 2303 2304 void 2305 pfsync_send_plus(void *plus, size_t pluslen) 2306 { 2307 struct pfsync_softc *sc = pfsyncif; 2308 2309 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2310 pfsync_sendout(); 2311 2312 sc->sc_plus = plus; 2313 sc->sc_len += (sc->sc_pluslen = pluslen); 2314 2315 pfsync_sendout(); 2316 } 2317 2318 int 2319 pfsync_up(void) 2320 { 2321 struct pfsync_softc *sc = pfsyncif; 2322 2323 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2324 return (0); 2325 2326 return (1); 2327 } 2328 2329 int 2330 pfsync_state_in_use(struct pf_state *st) 2331 { 2332 struct pfsync_softc *sc = pfsyncif; 2333 2334 if (sc == NULL) 2335 return (0); 2336 2337 if (st->sync_state != PFSYNC_S_NONE || 2338 st == sc->sc_bulk_next || 2339 st == sc->sc_bulk_last) 2340 return (1); 2341 2342 return (0); 2343 } 2344 2345 void 2346 pfsync_timeout(void *arg) 2347 { 2348 NET_LOCK(); 2349 pfsync_sendout(); 2350 NET_UNLOCK(); 2351 } 2352 2353 /* this is a softnet/netisr handler */ 2354 void 2355 pfsyncintr(void) 2356 { 2357 pfsync_sendout(); 2358 } 2359 2360 int 2361 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp) 2362 { 2363 struct pfsyncstats pfsyncstat; 2364 2365 CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t))); 2366 memset(&pfsyncstat, 0, sizeof pfsyncstat); 2367 counters_read(pfsynccounters, (uint64_t *)&pfsyncstat, 2368 pfsyncs_ncounters); 2369 return (sysctl_rdstruct(oldp, oldlenp, newp, 2370 &pfsyncstat, sizeof(pfsyncstat))); 2371 } 2372 2373 int 2374 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2375 size_t newlen) 2376 { 2377 /* All sysctl names at this level are terminal. */ 2378 if (namelen != 1) 2379 return (ENOTDIR); 2380 2381 switch (name[0]) { 2382 case PFSYNCCTL_STATS: 2383 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp)); 2384 default: 2385 return (ENOPROTOOPT); 2386 } 2387 } 2388