1 /* $OpenBSD: if_pfsync.c,v 1.305 2022/04/21 15:22:49 sashan Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_ipsp.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp6.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/udp.h> 75 76 #ifdef INET6 77 #include <netinet6/in6_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #endif /* INET6 */ 82 83 #include "carp.h" 84 #if NCARP > 0 85 #include <netinet/ip_carp.h> 86 #endif 87 88 #define PF_DEBUGNAME "pfsync: " 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_DEFER_NSEC 20000000ULL 97 98 #define PFSYNC_MINPKT ( \ 99 sizeof(struct ip) + \ 100 sizeof(struct pfsync_header)) 101 102 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 103 struct pfsync_state_peer *); 104 105 int pfsync_in_clr(caddr_t, int, int, int); 106 int pfsync_in_iack(caddr_t, int, int, int); 107 int pfsync_in_upd_c(caddr_t, int, int, int); 108 int pfsync_in_ureq(caddr_t, int, int, int); 109 int pfsync_in_del(caddr_t, int, int, int); 110 int pfsync_in_del_c(caddr_t, int, int, int); 111 int pfsync_in_bus(caddr_t, int, int, int); 112 int pfsync_in_tdb(caddr_t, int, int, int); 113 int pfsync_in_ins(caddr_t, int, int, int); 114 int pfsync_in_upd(caddr_t, int, int, int); 115 int pfsync_in_eof(caddr_t, int, int, int); 116 117 int pfsync_in_error(caddr_t, int, int, int); 118 119 void pfsync_update_state_locked(struct pf_state *); 120 121 struct { 122 int (*in)(caddr_t, int, int, int); 123 size_t len; 124 } pfsync_acts[] = { 125 /* PFSYNC_ACT_CLR */ 126 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 127 /* PFSYNC_ACT_OINS */ 128 { pfsync_in_error, 0 }, 129 /* PFSYNC_ACT_INS_ACK */ 130 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 131 /* PFSYNC_ACT_OUPD */ 132 { pfsync_in_error, 0 }, 133 /* PFSYNC_ACT_UPD_C */ 134 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 135 /* PFSYNC_ACT_UPD_REQ */ 136 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 137 /* PFSYNC_ACT_DEL */ 138 { pfsync_in_del, sizeof(struct pfsync_state) }, 139 /* PFSYNC_ACT_DEL_C */ 140 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 141 /* PFSYNC_ACT_INS_F */ 142 { pfsync_in_error, 0 }, 143 /* PFSYNC_ACT_DEL_F */ 144 { pfsync_in_error, 0 }, 145 /* PFSYNC_ACT_BUS */ 146 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 147 /* PFSYNC_ACT_OTDB */ 148 { pfsync_in_error, 0 }, 149 /* PFSYNC_ACT_EOF */ 150 { pfsync_in_error, 0 }, 151 /* PFSYNC_ACT_INS */ 152 { pfsync_in_ins, sizeof(struct pfsync_state) }, 153 /* PFSYNC_ACT_UPD */ 154 { pfsync_in_upd, sizeof(struct pfsync_state) }, 155 /* PFSYNC_ACT_TDB */ 156 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 157 }; 158 159 struct pfsync_q { 160 void (*write)(struct pf_state *, void *); 161 size_t len; 162 u_int8_t action; 163 }; 164 165 /* we have one of these for every PFSYNC_S_ */ 166 void pfsync_out_state(struct pf_state *, void *); 167 void pfsync_out_iack(struct pf_state *, void *); 168 void pfsync_out_upd_c(struct pf_state *, void *); 169 void pfsync_out_del(struct pf_state *, void *); 170 171 struct pfsync_q pfsync_qs[] = { 172 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 173 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 174 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 175 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 176 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 177 }; 178 179 void pfsync_q_ins(struct pf_state *, int); 180 void pfsync_q_del(struct pf_state *); 181 182 struct pfsync_upd_req_item { 183 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 184 TAILQ_ENTRY(pfsync_upd_req_item) ur_snap; 185 struct pfsync_upd_req ur_msg; 186 }; 187 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 188 189 struct pfsync_deferral { 190 TAILQ_ENTRY(pfsync_deferral) pd_entry; 191 struct pf_state *pd_st; 192 struct mbuf *pd_m; 193 uint64_t pd_deadline; 194 }; 195 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 196 197 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 198 sizeof(struct pfsync_deferral)) 199 200 void pfsync_out_tdb(struct tdb *, void *); 201 202 struct pfsync_softc { 203 struct ifnet sc_if; 204 unsigned int sc_sync_ifidx; 205 206 struct pool sc_pool; 207 208 struct ip_moptions sc_imo; 209 210 struct in_addr sc_sync_peer; 211 u_int8_t sc_maxupdates; 212 213 struct ip sc_template; 214 215 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 216 struct mutex sc_st_mtx; 217 size_t sc_len; 218 219 struct pfsync_upd_reqs sc_upd_req_list; 220 struct mutex sc_upd_req_mtx; 221 222 int sc_initial_bulk; 223 int sc_link_demoted; 224 225 int sc_defer; 226 struct pfsync_deferrals sc_deferrals; 227 u_int sc_deferred; 228 struct mutex sc_deferrals_mtx; 229 struct timeout sc_deferrals_tmo; 230 231 void *sc_plus; 232 size_t sc_pluslen; 233 234 u_int32_t sc_ureq_sent; 235 int sc_bulk_tries; 236 struct timeout sc_bulkfail_tmo; 237 238 u_int32_t sc_ureq_received; 239 struct pf_state *sc_bulk_next; 240 struct pf_state *sc_bulk_last; 241 struct timeout sc_bulk_tmo; 242 243 TAILQ_HEAD(, tdb) sc_tdb_q; 244 struct mutex sc_tdb_mtx; 245 246 struct task sc_ltask; 247 struct task sc_dtask; 248 249 struct timeout sc_tmo; 250 }; 251 252 struct pfsync_snapshot { 253 struct pfsync_softc *sn_sc; 254 struct pf_state_queue sn_qs[PFSYNC_S_COUNT]; 255 struct pfsync_upd_reqs sn_upd_req_list; 256 TAILQ_HEAD(, tdb) sn_tdb_q; 257 size_t sn_len; 258 void *sn_plus; 259 size_t sn_pluslen; 260 }; 261 262 struct pfsync_softc *pfsyncif = NULL; 263 struct cpumem *pfsynccounters; 264 265 void pfsyncattach(int); 266 int pfsync_clone_create(struct if_clone *, int); 267 int pfsync_clone_destroy(struct ifnet *); 268 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 269 struct pf_state_peer *); 270 void pfsync_update_net_tdb(struct pfsync_tdb *); 271 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 272 struct rtentry *); 273 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 274 void pfsyncstart(struct ifqueue *); 275 void pfsync_syncdev_state(void *); 276 void pfsync_ifdetach(void *); 277 278 void pfsync_deferred(struct pf_state *, int); 279 void pfsync_undefer(struct pfsync_deferral *, int); 280 void pfsync_deferrals_tmo(void *); 281 282 void pfsync_cancel_full_update(struct pfsync_softc *); 283 void pfsync_request_full_update(struct pfsync_softc *); 284 void pfsync_request_update(u_int32_t, u_int64_t); 285 void pfsync_update_state_req(struct pf_state *); 286 287 void pfsync_drop(struct pfsync_softc *); 288 void pfsync_sendout(void); 289 void pfsync_send_plus(void *, size_t); 290 void pfsync_timeout(void *); 291 void pfsync_tdb_timeout(void *); 292 293 void pfsync_bulk_start(void); 294 void pfsync_bulk_status(u_int8_t); 295 void pfsync_bulk_update(void *); 296 void pfsync_bulk_fail(void *); 297 298 void pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *); 299 void pfsync_drop_snapshot(struct pfsync_snapshot *); 300 301 void pfsync_send_dispatch(void *); 302 void pfsync_send_pkt(struct mbuf *); 303 304 static struct mbuf_queue pfsync_mq; 305 static struct task pfsync_task = 306 TASK_INITIALIZER(pfsync_send_dispatch, &pfsync_mq); 307 308 #define PFSYNC_MAX_BULKTRIES 12 309 int pfsync_sync_ok; 310 311 struct if_clone pfsync_cloner = 312 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 313 314 void 315 pfsyncattach(int npfsync) 316 { 317 if_clone_attach(&pfsync_cloner); 318 pfsynccounters = counters_alloc(pfsyncs_ncounters); 319 mq_init(&pfsync_mq, 4096, IPL_MPFLOOR); 320 } 321 322 int 323 pfsync_clone_create(struct if_clone *ifc, int unit) 324 { 325 struct pfsync_softc *sc; 326 struct ifnet *ifp; 327 int q; 328 329 if (unit != 0) 330 return (EINVAL); 331 332 pfsync_sync_ok = 1; 333 334 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO); 335 for (q = 0; q < PFSYNC_S_COUNT; q++) 336 TAILQ_INIT(&sc->sc_qs[q]); 337 mtx_init(&sc->sc_st_mtx, IPL_MPFLOOR); 338 339 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_MPFLOOR, 0, "pfsync", 340 NULL); 341 TAILQ_INIT(&sc->sc_upd_req_list); 342 mtx_init(&sc->sc_upd_req_mtx, IPL_MPFLOOR); 343 TAILQ_INIT(&sc->sc_deferrals); 344 mtx_init(&sc->sc_deferrals_mtx, IPL_MPFLOOR); 345 timeout_set_proc(&sc->sc_deferrals_tmo, pfsync_deferrals_tmo, sc); 346 task_set(&sc->sc_ltask, pfsync_syncdev_state, sc); 347 task_set(&sc->sc_dtask, pfsync_ifdetach, sc); 348 sc->sc_deferred = 0; 349 350 TAILQ_INIT(&sc->sc_tdb_q); 351 mtx_init(&sc->sc_tdb_mtx, IPL_MPFLOOR); 352 353 sc->sc_len = PFSYNC_MINPKT; 354 sc->sc_maxupdates = 128; 355 356 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 357 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 358 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 359 360 ifp = &sc->sc_if; 361 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 362 ifp->if_softc = sc; 363 ifp->if_ioctl = pfsyncioctl; 364 ifp->if_output = pfsyncoutput; 365 ifp->if_qstart = pfsyncstart; 366 ifp->if_type = IFT_PFSYNC; 367 ifp->if_hdrlen = sizeof(struct pfsync_header); 368 ifp->if_mtu = ETHERMTU; 369 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 370 timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL); 371 timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL); 372 timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL); 373 374 if_attach(ifp); 375 if_alloc_sadl(ifp); 376 377 #if NCARP > 0 378 if_addgroup(ifp, "carp"); 379 #endif 380 381 #if NBPFILTER > 0 382 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 383 #endif 384 385 pfsyncif = sc; 386 387 return (0); 388 } 389 390 int 391 pfsync_clone_destroy(struct ifnet *ifp) 392 { 393 struct pfsync_softc *sc = ifp->if_softc; 394 struct ifnet *ifp0; 395 struct pfsync_deferral *pd; 396 struct pfsync_deferrals deferrals; 397 398 NET_LOCK(); 399 400 #if NCARP > 0 401 if (!pfsync_sync_ok) 402 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 403 if (sc->sc_link_demoted) 404 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 405 #endif 406 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 407 if_linkstatehook_del(ifp0, &sc->sc_ltask); 408 if_detachhook_del(ifp0, &sc->sc_dtask); 409 } 410 if_put(ifp0); 411 412 /* XXXSMP breaks atomicity */ 413 NET_UNLOCK(); 414 if_detach(ifp); 415 NET_LOCK(); 416 417 pfsync_drop(sc); 418 419 if (sc->sc_deferred > 0) { 420 TAILQ_INIT(&deferrals); 421 mtx_enter(&sc->sc_deferrals_mtx); 422 TAILQ_CONCAT(&deferrals, &sc->sc_deferrals, pd_entry); 423 sc->sc_deferred = 0; 424 mtx_leave(&sc->sc_deferrals_mtx); 425 426 while ((pd = TAILQ_FIRST(&deferrals)) != NULL) { 427 TAILQ_REMOVE(&deferrals, pd, pd_entry); 428 pfsync_undefer(pd, 0); 429 } 430 } 431 432 pfsyncif = NULL; 433 timeout_del(&sc->sc_bulkfail_tmo); 434 timeout_del(&sc->sc_bulk_tmo); 435 timeout_del(&sc->sc_tmo); 436 437 NET_UNLOCK(); 438 439 pool_destroy(&sc->sc_pool); 440 free(sc->sc_imo.imo_membership, M_IPMOPTS, 441 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 442 free(sc, M_DEVBUF, sizeof(*sc)); 443 444 return (0); 445 } 446 447 /* 448 * Start output on the pfsync interface. 449 */ 450 void 451 pfsyncstart(struct ifqueue *ifq) 452 { 453 ifq_purge(ifq); 454 } 455 456 void 457 pfsync_syncdev_state(void *arg) 458 { 459 struct pfsync_softc *sc = arg; 460 struct ifnet *ifp; 461 462 if ((sc->sc_if.if_flags & IFF_UP) == 0) 463 return; 464 if ((ifp = if_get(sc->sc_sync_ifidx)) == NULL) 465 return; 466 467 if (ifp->if_link_state == LINK_STATE_DOWN) { 468 sc->sc_if.if_flags &= ~IFF_RUNNING; 469 if (!sc->sc_link_demoted) { 470 #if NCARP > 0 471 carp_group_demote_adj(&sc->sc_if, 1, 472 "pfsync link state down"); 473 #endif 474 sc->sc_link_demoted = 1; 475 } 476 477 /* drop everything */ 478 timeout_del(&sc->sc_tmo); 479 pfsync_drop(sc); 480 481 pfsync_cancel_full_update(sc); 482 } else if (sc->sc_link_demoted) { 483 sc->sc_if.if_flags |= IFF_RUNNING; 484 485 pfsync_request_full_update(sc); 486 } 487 488 if_put(ifp); 489 } 490 491 void 492 pfsync_ifdetach(void *arg) 493 { 494 struct pfsync_softc *sc = arg; 495 struct ifnet *ifp; 496 497 if ((ifp = if_get(sc->sc_sync_ifidx)) != NULL) { 498 if_linkstatehook_del(ifp, &sc->sc_ltask); 499 if_detachhook_del(ifp, &sc->sc_dtask); 500 } 501 if_put(ifp); 502 503 sc->sc_sync_ifidx = 0; 504 } 505 506 int 507 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 508 struct pf_state_peer *d) 509 { 510 if (s->scrub.scrub_flag && d->scrub == NULL) { 511 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 512 if (d->scrub == NULL) 513 return (ENOMEM); 514 } 515 516 return (0); 517 } 518 519 void 520 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 521 { 522 pf_state_export(sp, st); 523 } 524 525 int 526 pfsync_state_import(struct pfsync_state *sp, int flags) 527 { 528 struct pf_state *st = NULL; 529 struct pf_state_key *skw = NULL, *sks = NULL; 530 struct pf_rule *r = NULL; 531 struct pfi_kif *kif; 532 int pool_flags; 533 int error = ENOMEM; 534 int n = 0; 535 536 if (sp->creatorid == 0) { 537 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 538 "invalid creator id: %08x", ntohl(sp->creatorid)); 539 return (EINVAL); 540 } 541 542 if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) { 543 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 544 "unknown interface: %s", sp->ifname); 545 if (flags & PFSYNC_SI_IOCTL) 546 return (EINVAL); 547 return (0); /* skip this state */ 548 } 549 550 if (sp->af == 0) 551 return (0); /* skip this state */ 552 553 /* 554 * If the ruleset checksums match or the state is coming from the ioctl, 555 * it's safe to associate the state with the rule of that number. 556 */ 557 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 558 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 559 pf_main_ruleset.rules.active.rcount) { 560 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries) 561 if (ntohl(sp->rule) == n++) 562 break; 563 } else 564 r = &pf_default_rule; 565 566 if ((r->max_states && r->states_cur >= r->max_states)) 567 goto cleanup; 568 569 if (flags & PFSYNC_SI_IOCTL) 570 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 571 else 572 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 573 574 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 575 goto cleanup; 576 577 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 578 goto cleanup; 579 580 if ((sp->key[PF_SK_WIRE].af && 581 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 582 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 583 &sp->key[PF_SK_STACK].addr[0], sp->af) || 584 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 585 &sp->key[PF_SK_STACK].addr[1], sp->af) || 586 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 587 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 588 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 589 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 590 goto cleanup; 591 } else 592 sks = skw; 593 594 /* allocate memory for scrub info */ 595 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 596 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 597 goto cleanup; 598 599 /* copy to state key(s) */ 600 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 601 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 602 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 603 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 604 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 605 PF_REF_INIT(skw->refcnt); 606 skw->proto = sp->proto; 607 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 608 skw->af = sp->af; 609 if (sks != skw) { 610 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 611 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 612 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 613 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 614 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 615 PF_REF_INIT(sks->refcnt); 616 if (!(sks->af = sp->key[PF_SK_STACK].af)) 617 sks->af = sp->af; 618 if (sks->af != skw->af) { 619 switch (sp->proto) { 620 case IPPROTO_ICMP: 621 sks->proto = IPPROTO_ICMPV6; 622 break; 623 case IPPROTO_ICMPV6: 624 sks->proto = IPPROTO_ICMP; 625 break; 626 default: 627 sks->proto = sp->proto; 628 } 629 } else 630 sks->proto = sp->proto; 631 632 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) || 633 ((skw->af != AF_INET) && (skw->af != AF_INET6))) { 634 error = EINVAL; 635 goto cleanup; 636 } 637 638 } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { 639 error = EINVAL; 640 goto cleanup; 641 } 642 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 643 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 644 645 /* copy to state */ 646 st->rt_addr = sp->rt_addr; 647 st->rt = sp->rt; 648 st->creation = getuptime() - ntohl(sp->creation); 649 st->expire = getuptime(); 650 if (ntohl(sp->expire)) { 651 u_int32_t timeout; 652 653 timeout = r->timeout[sp->timeout]; 654 if (!timeout) 655 timeout = pf_default_rule.timeout[sp->timeout]; 656 657 /* sp->expire may have been adaptively scaled by export. */ 658 st->expire -= timeout - ntohl(sp->expire); 659 } 660 661 st->direction = sp->direction; 662 st->log = sp->log; 663 st->timeout = sp->timeout; 664 st->state_flags = ntohs(sp->state_flags); 665 st->max_mss = ntohs(sp->max_mss); 666 st->min_ttl = sp->min_ttl; 667 st->set_tos = sp->set_tos; 668 st->set_prio[0] = sp->set_prio[0]; 669 st->set_prio[1] = sp->set_prio[1]; 670 671 st->id = sp->id; 672 st->creatorid = sp->creatorid; 673 pf_state_peer_ntoh(&sp->src, &st->src); 674 pf_state_peer_ntoh(&sp->dst, &st->dst); 675 676 st->rule.ptr = r; 677 st->anchor.ptr = NULL; 678 679 st->pfsync_time = getuptime(); 680 st->sync_state = PFSYNC_S_NONE; 681 682 refcnt_init(&st->refcnt); 683 684 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 685 r->states_cur++; 686 r->states_tot++; 687 688 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 689 SET(st->state_flags, PFSTATE_NOSYNC); 690 691 /* 692 * We just set PFSTATE_NOSYNC bit, which prevents 693 * pfsync_insert_state() to insert state to pfsync. 694 */ 695 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 696 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 697 r->states_cur--; 698 error = EEXIST; 699 goto cleanup_state; 700 } 701 702 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 703 CLR(st->state_flags, PFSTATE_NOSYNC); 704 if (ISSET(st->state_flags, PFSTATE_ACK)) { 705 pfsync_q_ins(st, PFSYNC_S_IACK); 706 schednetisr(NETISR_PFSYNC); 707 } 708 } 709 CLR(st->state_flags, PFSTATE_ACK); 710 711 return (0); 712 713 cleanup: 714 if (skw == sks) 715 sks = NULL; 716 if (skw != NULL) 717 pool_put(&pf_state_key_pl, skw); 718 if (sks != NULL) 719 pool_put(&pf_state_key_pl, sks); 720 721 cleanup_state: /* pf_state_insert frees the state keys */ 722 if (st) { 723 if (st->dst.scrub) 724 pool_put(&pf_state_scrub_pl, st->dst.scrub); 725 if (st->src.scrub) 726 pool_put(&pf_state_scrub_pl, st->src.scrub); 727 pool_put(&pf_state_pl, st); 728 } 729 return (error); 730 } 731 732 int 733 pfsync_input(struct mbuf **mp, int *offp, int proto, int af) 734 { 735 struct mbuf *n, *m = *mp; 736 struct pfsync_softc *sc = pfsyncif; 737 struct ip *ip = mtod(m, struct ip *); 738 struct pfsync_header *ph; 739 struct pfsync_subheader subh; 740 int offset, noff, len, count, mlen, flags = 0; 741 int e; 742 743 NET_ASSERT_LOCKED(); 744 745 pfsyncstat_inc(pfsyncs_ipackets); 746 747 /* verify that we have a sync interface configured */ 748 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 749 sc->sc_sync_ifidx == 0 || !pf_status.running) 750 goto done; 751 752 /* verify that the packet came in on the right interface */ 753 if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) { 754 pfsyncstat_inc(pfsyncs_badif); 755 goto done; 756 } 757 758 sc->sc_if.if_ipackets++; 759 sc->sc_if.if_ibytes += m->m_pkthdr.len; 760 761 /* verify that the IP TTL is 255. */ 762 if (ip->ip_ttl != PFSYNC_DFLTTL) { 763 pfsyncstat_inc(pfsyncs_badttl); 764 goto done; 765 } 766 767 offset = ip->ip_hl << 2; 768 n = m_pulldown(m, offset, sizeof(*ph), &noff); 769 if (n == NULL) { 770 pfsyncstat_inc(pfsyncs_hdrops); 771 return IPPROTO_DONE; 772 } 773 ph = (struct pfsync_header *)(n->m_data + noff); 774 775 /* verify the version */ 776 if (ph->version != PFSYNC_VERSION) { 777 pfsyncstat_inc(pfsyncs_badver); 778 goto done; 779 } 780 len = ntohs(ph->len) + offset; 781 if (m->m_pkthdr.len < len) { 782 pfsyncstat_inc(pfsyncs_badlen); 783 goto done; 784 } 785 786 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 787 flags = PFSYNC_SI_CKSUM; 788 789 offset += sizeof(*ph); 790 while (offset <= len - sizeof(subh)) { 791 m_copydata(m, offset, sizeof(subh), &subh); 792 offset += sizeof(subh); 793 794 mlen = subh.len << 2; 795 count = ntohs(subh.count); 796 797 if (subh.action >= PFSYNC_ACT_MAX || 798 subh.action >= nitems(pfsync_acts) || 799 mlen < pfsync_acts[subh.action].len) { 800 /* 801 * subheaders are always followed by at least one 802 * message, so if the peer is new 803 * enough to tell us how big its messages are then we 804 * know enough to skip them. 805 */ 806 if (count > 0 && mlen > 0) { 807 offset += count * mlen; 808 continue; 809 } 810 pfsyncstat_inc(pfsyncs_badact); 811 goto done; 812 } 813 814 n = m_pulldown(m, offset, mlen * count, &noff); 815 if (n == NULL) { 816 pfsyncstat_inc(pfsyncs_badlen); 817 return IPPROTO_DONE; 818 } 819 820 e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, 821 flags); 822 if (e != 0) 823 goto done; 824 825 offset += mlen * count; 826 } 827 828 done: 829 m_freem(m); 830 return IPPROTO_DONE; 831 } 832 833 int 834 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 835 { 836 struct pfsync_clr *clr; 837 struct pf_state *st, *nexts; 838 struct pfi_kif *kif; 839 u_int32_t creatorid; 840 int i; 841 842 PF_LOCK(); 843 for (i = 0; i < count; i++) { 844 clr = (struct pfsync_clr *)buf + len * i; 845 kif = NULL; 846 creatorid = clr->creatorid; 847 if (strlen(clr->ifname) && 848 (kif = pfi_kif_find(clr->ifname)) == NULL) 849 continue; 850 851 PF_STATE_ENTER_WRITE(); 852 for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { 853 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 854 if (st->creatorid == creatorid && 855 ((kif && st->kif == kif) || !kif)) { 856 SET(st->state_flags, PFSTATE_NOSYNC); 857 pf_remove_state(st); 858 } 859 } 860 PF_STATE_EXIT_WRITE(); 861 } 862 PF_UNLOCK(); 863 864 return (0); 865 } 866 867 int 868 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 869 { 870 struct pfsync_state *sp; 871 sa_family_t af1, af2; 872 int i; 873 874 PF_LOCK(); 875 for (i = 0; i < count; i++) { 876 sp = (struct pfsync_state *)(buf + len * i); 877 af1 = sp->key[0].af; 878 af2 = sp->key[1].af; 879 880 /* check for invalid values */ 881 if (sp->timeout >= PFTM_MAX || 882 sp->src.state > PF_TCPS_PROXY_DST || 883 sp->dst.state > PF_TCPS_PROXY_DST || 884 sp->direction > PF_OUT || 885 (((af1 || af2) && 886 ((af1 != AF_INET && af1 != AF_INET6) || 887 (af2 != AF_INET && af2 != AF_INET6))) || 888 (sp->af != AF_INET && sp->af != AF_INET6))) { 889 DPFPRINTF(LOG_NOTICE, 890 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 891 pfsyncstat_inc(pfsyncs_badval); 892 continue; 893 } 894 895 if (pfsync_state_import(sp, flags) == ENOMEM) { 896 /* drop out, but process the rest of the actions */ 897 break; 898 } 899 } 900 PF_UNLOCK(); 901 902 return (0); 903 } 904 905 int 906 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 907 { 908 struct pfsync_ins_ack *ia; 909 struct pf_state_cmp id_key; 910 struct pf_state *st; 911 int i; 912 913 for (i = 0; i < count; i++) { 914 ia = (struct pfsync_ins_ack *)(buf + len * i); 915 916 id_key.id = ia->id; 917 id_key.creatorid = ia->creatorid; 918 919 PF_STATE_ENTER_READ(); 920 st = pf_find_state_byid(&id_key); 921 pf_state_ref(st); 922 PF_STATE_EXIT_READ(); 923 if (st == NULL) 924 continue; 925 926 if (ISSET(st->state_flags, PFSTATE_ACK)) 927 pfsync_deferred(st, 0); 928 929 pf_state_unref(st); 930 } 931 932 return (0); 933 } 934 935 int 936 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 937 struct pfsync_state_peer *dst) 938 { 939 int sync = 0; 940 941 /* 942 * The state should never go backwards except 943 * for syn-proxy states. Neither should the 944 * sequence window slide backwards. 945 */ 946 if ((st->src.state > src->state && 947 (st->src.state < PF_TCPS_PROXY_SRC || 948 src->state >= PF_TCPS_PROXY_SRC)) || 949 950 (st->src.state == src->state && 951 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 952 sync++; 953 else 954 pf_state_peer_ntoh(src, &st->src); 955 956 if ((st->dst.state > dst->state) || 957 958 (st->dst.state >= TCPS_SYN_SENT && 959 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 960 sync++; 961 else 962 pf_state_peer_ntoh(dst, &st->dst); 963 964 return (sync); 965 } 966 967 int 968 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 969 { 970 struct pfsync_state *sp; 971 struct pf_state_cmp id_key; 972 struct pf_state *st; 973 int sync, error; 974 int i; 975 976 for (i = 0; i < count; i++) { 977 sp = (struct pfsync_state *)(buf + len * i); 978 979 /* check for invalid values */ 980 if (sp->timeout >= PFTM_MAX || 981 sp->src.state > PF_TCPS_PROXY_DST || 982 sp->dst.state > PF_TCPS_PROXY_DST) { 983 DPFPRINTF(LOG_NOTICE, 984 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 985 pfsyncstat_inc(pfsyncs_badval); 986 continue; 987 } 988 989 id_key.id = sp->id; 990 id_key.creatorid = sp->creatorid; 991 992 PF_STATE_ENTER_READ(); 993 st = pf_find_state_byid(&id_key); 994 pf_state_ref(st); 995 PF_STATE_EXIT_READ(); 996 if (st == NULL) { 997 /* insert the update */ 998 PF_LOCK(); 999 error = pfsync_state_import(sp, flags); 1000 if (error) 1001 pfsyncstat_inc(pfsyncs_badstate); 1002 PF_UNLOCK(); 1003 continue; 1004 } 1005 1006 if (ISSET(st->state_flags, PFSTATE_ACK)) 1007 pfsync_deferred(st, 1); 1008 1009 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1010 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 1011 else { 1012 sync = 0; 1013 1014 /* 1015 * Non-TCP protocol state machine always go 1016 * forwards 1017 */ 1018 if (st->src.state > sp->src.state) 1019 sync++; 1020 else 1021 pf_state_peer_ntoh(&sp->src, &st->src); 1022 1023 if (st->dst.state > sp->dst.state) 1024 sync++; 1025 else 1026 pf_state_peer_ntoh(&sp->dst, &st->dst); 1027 } 1028 1029 if (sync < 2) { 1030 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 1031 pf_state_peer_ntoh(&sp->dst, &st->dst); 1032 st->expire = getuptime(); 1033 st->timeout = sp->timeout; 1034 } 1035 st->pfsync_time = getuptime(); 1036 1037 if (sync) { 1038 pfsyncstat_inc(pfsyncs_stale); 1039 1040 pfsync_update_state(st); 1041 schednetisr(NETISR_PFSYNC); 1042 } 1043 1044 pf_state_unref(st); 1045 } 1046 1047 return (0); 1048 } 1049 1050 int 1051 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 1052 { 1053 struct pfsync_upd_c *up; 1054 struct pf_state_cmp id_key; 1055 struct pf_state *st; 1056 1057 int sync; 1058 1059 int i; 1060 1061 for (i = 0; i < count; i++) { 1062 up = (struct pfsync_upd_c *)(buf + len * i); 1063 1064 /* check for invalid values */ 1065 if (up->timeout >= PFTM_MAX || 1066 up->src.state > PF_TCPS_PROXY_DST || 1067 up->dst.state > PF_TCPS_PROXY_DST) { 1068 DPFPRINTF(LOG_NOTICE, 1069 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 1070 pfsyncstat_inc(pfsyncs_badval); 1071 continue; 1072 } 1073 1074 id_key.id = up->id; 1075 id_key.creatorid = up->creatorid; 1076 1077 PF_STATE_ENTER_READ(); 1078 st = pf_find_state_byid(&id_key); 1079 pf_state_ref(st); 1080 PF_STATE_EXIT_READ(); 1081 if (st == NULL) { 1082 /* We don't have this state. Ask for it. */ 1083 pfsync_request_update(id_key.creatorid, id_key.id); 1084 continue; 1085 } 1086 1087 if (ISSET(st->state_flags, PFSTATE_ACK)) 1088 pfsync_deferred(st, 1); 1089 1090 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1091 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1092 else { 1093 sync = 0; 1094 /* 1095 * Non-TCP protocol state machine always go 1096 * forwards 1097 */ 1098 if (st->src.state > up->src.state) 1099 sync++; 1100 else 1101 pf_state_peer_ntoh(&up->src, &st->src); 1102 1103 if (st->dst.state > up->dst.state) 1104 sync++; 1105 else 1106 pf_state_peer_ntoh(&up->dst, &st->dst); 1107 } 1108 if (sync < 2) { 1109 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1110 pf_state_peer_ntoh(&up->dst, &st->dst); 1111 st->expire = getuptime(); 1112 st->timeout = up->timeout; 1113 } 1114 st->pfsync_time = getuptime(); 1115 1116 if (sync) { 1117 pfsyncstat_inc(pfsyncs_stale); 1118 1119 pfsync_update_state(st); 1120 schednetisr(NETISR_PFSYNC); 1121 } 1122 1123 pf_state_unref(st); 1124 } 1125 1126 return (0); 1127 } 1128 1129 int 1130 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1131 { 1132 struct pfsync_upd_req *ur; 1133 int i; 1134 1135 struct pf_state_cmp id_key; 1136 struct pf_state *st; 1137 1138 for (i = 0; i < count; i++) { 1139 ur = (struct pfsync_upd_req *)(buf + len * i); 1140 1141 id_key.id = ur->id; 1142 id_key.creatorid = ur->creatorid; 1143 1144 if (id_key.id == 0 && id_key.creatorid == 0) 1145 pfsync_bulk_start(); 1146 else { 1147 PF_STATE_ENTER_READ(); 1148 st = pf_find_state_byid(&id_key); 1149 pf_state_ref(st); 1150 PF_STATE_EXIT_READ(); 1151 if (st == NULL) { 1152 pfsyncstat_inc(pfsyncs_badstate); 1153 continue; 1154 } 1155 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1156 pf_state_unref(st); 1157 continue; 1158 } 1159 1160 pfsync_update_state_req(st); 1161 pf_state_unref(st); 1162 } 1163 } 1164 1165 return (0); 1166 } 1167 1168 int 1169 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1170 { 1171 struct pfsync_state *sp; 1172 struct pf_state_cmp id_key; 1173 struct pf_state *st; 1174 int i; 1175 1176 PF_STATE_ENTER_WRITE(); 1177 for (i = 0; i < count; i++) { 1178 sp = (struct pfsync_state *)(buf + len * i); 1179 1180 id_key.id = sp->id; 1181 id_key.creatorid = sp->creatorid; 1182 1183 st = pf_find_state_byid(&id_key); 1184 if (st == NULL) { 1185 pfsyncstat_inc(pfsyncs_badstate); 1186 continue; 1187 } 1188 SET(st->state_flags, PFSTATE_NOSYNC); 1189 pf_remove_state(st); 1190 } 1191 PF_STATE_EXIT_WRITE(); 1192 1193 return (0); 1194 } 1195 1196 int 1197 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1198 { 1199 struct pfsync_del_c *sp; 1200 struct pf_state_cmp id_key; 1201 struct pf_state *st; 1202 int i; 1203 1204 PF_LOCK(); 1205 PF_STATE_ENTER_WRITE(); 1206 for (i = 0; i < count; i++) { 1207 sp = (struct pfsync_del_c *)(buf + len * i); 1208 1209 id_key.id = sp->id; 1210 id_key.creatorid = sp->creatorid; 1211 1212 st = pf_find_state_byid(&id_key); 1213 if (st == NULL) { 1214 pfsyncstat_inc(pfsyncs_badstate); 1215 continue; 1216 } 1217 1218 SET(st->state_flags, PFSTATE_NOSYNC); 1219 pf_remove_state(st); 1220 } 1221 PF_STATE_EXIT_WRITE(); 1222 PF_UNLOCK(); 1223 1224 return (0); 1225 } 1226 1227 int 1228 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1229 { 1230 struct pfsync_softc *sc = pfsyncif; 1231 struct pfsync_bus *bus; 1232 1233 /* If we're not waiting for a bulk update, who cares. */ 1234 if (sc->sc_ureq_sent == 0) 1235 return (0); 1236 1237 bus = (struct pfsync_bus *)buf; 1238 1239 switch (bus->status) { 1240 case PFSYNC_BUS_START: 1241 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1242 pf_pool_limits[PF_LIMIT_STATES].limit / 1243 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1244 sizeof(struct pfsync_state))); 1245 DPFPRINTF(LOG_INFO, "received bulk update start"); 1246 break; 1247 1248 case PFSYNC_BUS_END: 1249 if (getuptime() - ntohl(bus->endtime) >= 1250 sc->sc_ureq_sent) { 1251 /* that's it, we're happy */ 1252 sc->sc_ureq_sent = 0; 1253 sc->sc_bulk_tries = 0; 1254 timeout_del(&sc->sc_bulkfail_tmo); 1255 #if NCARP > 0 1256 if (!pfsync_sync_ok) 1257 carp_group_demote_adj(&sc->sc_if, -1, 1258 sc->sc_link_demoted ? 1259 "pfsync link state up" : 1260 "pfsync bulk done"); 1261 if (sc->sc_initial_bulk) { 1262 carp_group_demote_adj(&sc->sc_if, -32, 1263 "pfsync init"); 1264 sc->sc_initial_bulk = 0; 1265 } 1266 #endif 1267 pfsync_sync_ok = 1; 1268 sc->sc_link_demoted = 0; 1269 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1270 } else { 1271 DPFPRINTF(LOG_WARNING, "received invalid " 1272 "bulk update end: bad timestamp"); 1273 } 1274 break; 1275 } 1276 1277 return (0); 1278 } 1279 1280 int 1281 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1282 { 1283 #if defined(IPSEC) 1284 struct pfsync_tdb *tp; 1285 int i; 1286 1287 for (i = 0; i < count; i++) { 1288 tp = (struct pfsync_tdb *)(buf + len * i); 1289 pfsync_update_net_tdb(tp); 1290 } 1291 #endif 1292 1293 return (0); 1294 } 1295 1296 #if defined(IPSEC) 1297 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1298 void 1299 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1300 { 1301 struct tdb *tdb; 1302 1303 NET_ASSERT_LOCKED(); 1304 1305 /* check for invalid values */ 1306 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1307 (pt->dst.sa.sa_family != AF_INET && 1308 pt->dst.sa.sa_family != AF_INET6)) 1309 goto bad; 1310 1311 tdb = gettdb(ntohs(pt->rdomain), pt->spi, 1312 (union sockaddr_union *)&pt->dst, pt->sproto); 1313 if (tdb) { 1314 pt->rpl = betoh64(pt->rpl); 1315 pt->cur_bytes = betoh64(pt->cur_bytes); 1316 1317 /* Neither replay nor byte counter should ever decrease. */ 1318 if (pt->rpl < tdb->tdb_rpl || 1319 pt->cur_bytes < tdb->tdb_cur_bytes) { 1320 tdb_unref(tdb); 1321 goto bad; 1322 } 1323 1324 tdb->tdb_rpl = pt->rpl; 1325 tdb->tdb_cur_bytes = pt->cur_bytes; 1326 tdb_unref(tdb); 1327 } 1328 return; 1329 1330 bad: 1331 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1332 "invalid value"); 1333 pfsyncstat_inc(pfsyncs_badstate); 1334 return; 1335 } 1336 #endif 1337 1338 1339 int 1340 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1341 { 1342 if (len > 0 || count > 0) 1343 pfsyncstat_inc(pfsyncs_badact); 1344 1345 /* we're done. let the caller return */ 1346 return (1); 1347 } 1348 1349 int 1350 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1351 { 1352 pfsyncstat_inc(pfsyncs_badact); 1353 return (-1); 1354 } 1355 1356 int 1357 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1358 struct rtentry *rt) 1359 { 1360 m_freem(m); /* drop packet */ 1361 return (EAFNOSUPPORT); 1362 } 1363 1364 int 1365 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1366 { 1367 struct proc *p = curproc; 1368 struct pfsync_softc *sc = ifp->if_softc; 1369 struct ifreq *ifr = (struct ifreq *)data; 1370 struct ip_moptions *imo = &sc->sc_imo; 1371 struct pfsyncreq pfsyncr; 1372 struct ifnet *ifp0, *sifp; 1373 struct ip *ip; 1374 int error; 1375 1376 switch (cmd) { 1377 case SIOCSIFFLAGS: 1378 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1379 (ifp->if_flags & IFF_UP)) { 1380 ifp->if_flags |= IFF_RUNNING; 1381 1382 #if NCARP > 0 1383 sc->sc_initial_bulk = 1; 1384 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1385 #endif 1386 1387 pfsync_request_full_update(sc); 1388 } 1389 if ((ifp->if_flags & IFF_RUNNING) && 1390 (ifp->if_flags & IFF_UP) == 0) { 1391 ifp->if_flags &= ~IFF_RUNNING; 1392 1393 /* drop everything */ 1394 timeout_del(&sc->sc_tmo); 1395 pfsync_drop(sc); 1396 1397 pfsync_cancel_full_update(sc); 1398 } 1399 break; 1400 case SIOCSIFMTU: 1401 if ((ifp0 = if_get(sc->sc_sync_ifidx)) == NULL) 1402 return (EINVAL); 1403 error = 0; 1404 if (ifr->ifr_mtu <= PFSYNC_MINPKT || 1405 ifr->ifr_mtu > ifp0->if_mtu) { 1406 error = EINVAL; 1407 } 1408 if_put(ifp0); 1409 if (error) 1410 return error; 1411 if (ifr->ifr_mtu < ifp->if_mtu) 1412 pfsync_sendout(); 1413 ifp->if_mtu = ifr->ifr_mtu; 1414 break; 1415 case SIOCGETPFSYNC: 1416 bzero(&pfsyncr, sizeof(pfsyncr)); 1417 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 1418 strlcpy(pfsyncr.pfsyncr_syncdev, 1419 ifp0->if_xname, IFNAMSIZ); 1420 } 1421 if_put(ifp0); 1422 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1423 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1424 pfsyncr.pfsyncr_defer = sc->sc_defer; 1425 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1426 1427 case SIOCSETPFSYNC: 1428 if ((error = suser(p)) != 0) 1429 return (error); 1430 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1431 return (error); 1432 1433 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1434 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1435 else 1436 sc->sc_sync_peer.s_addr = 1437 pfsyncr.pfsyncr_syncpeer.s_addr; 1438 1439 if (pfsyncr.pfsyncr_maxupdates > 255) 1440 return (EINVAL); 1441 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1442 1443 sc->sc_defer = pfsyncr.pfsyncr_defer; 1444 1445 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1446 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 1447 if_linkstatehook_del(ifp0, &sc->sc_ltask); 1448 if_detachhook_del(ifp0, &sc->sc_dtask); 1449 } 1450 if_put(ifp0); 1451 sc->sc_sync_ifidx = 0; 1452 if (imo->imo_num_memberships > 0) { 1453 in_delmulti(imo->imo_membership[ 1454 --imo->imo_num_memberships]); 1455 imo->imo_ifidx = 0; 1456 } 1457 break; 1458 } 1459 1460 if ((sifp = if_unit(pfsyncr.pfsyncr_syncdev)) == NULL) 1461 return (EINVAL); 1462 1463 ifp0 = if_get(sc->sc_sync_ifidx); 1464 1465 if (sifp->if_mtu < sc->sc_if.if_mtu || (ifp0 != NULL && 1466 sifp->if_mtu < ifp0->if_mtu) || 1467 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1468 pfsync_sendout(); 1469 1470 if (ifp0) { 1471 if_linkstatehook_del(ifp0, &sc->sc_ltask); 1472 if_detachhook_del(ifp0, &sc->sc_dtask); 1473 } 1474 if_put(ifp0); 1475 sc->sc_sync_ifidx = sifp->if_index; 1476 1477 if (imo->imo_num_memberships > 0) { 1478 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1479 imo->imo_ifidx = 0; 1480 } 1481 1482 if (sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1483 struct in_addr addr; 1484 1485 if (!(sifp->if_flags & IFF_MULTICAST)) { 1486 sc->sc_sync_ifidx = 0; 1487 if_put(sifp); 1488 return (EADDRNOTAVAIL); 1489 } 1490 1491 addr.s_addr = INADDR_PFSYNC_GROUP; 1492 1493 if ((imo->imo_membership[0] = 1494 in_addmulti(&addr, sifp)) == NULL) { 1495 sc->sc_sync_ifidx = 0; 1496 if_put(sifp); 1497 return (ENOBUFS); 1498 } 1499 imo->imo_num_memberships++; 1500 imo->imo_ifidx = sc->sc_sync_ifidx; 1501 imo->imo_ttl = PFSYNC_DFLTTL; 1502 imo->imo_loop = 0; 1503 } 1504 1505 ip = &sc->sc_template; 1506 bzero(ip, sizeof(*ip)); 1507 ip->ip_v = IPVERSION; 1508 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1509 ip->ip_tos = IPTOS_LOWDELAY; 1510 /* len and id are set later */ 1511 ip->ip_off = htons(IP_DF); 1512 ip->ip_ttl = PFSYNC_DFLTTL; 1513 ip->ip_p = IPPROTO_PFSYNC; 1514 ip->ip_src.s_addr = INADDR_ANY; 1515 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1516 1517 if_linkstatehook_add(sifp, &sc->sc_ltask); 1518 if_detachhook_add(sifp, &sc->sc_dtask); 1519 if_put(sifp); 1520 1521 pfsync_request_full_update(sc); 1522 1523 break; 1524 1525 default: 1526 return (ENOTTY); 1527 } 1528 1529 return (0); 1530 } 1531 1532 void 1533 pfsync_out_state(struct pf_state *st, void *buf) 1534 { 1535 struct pfsync_state *sp = buf; 1536 1537 pfsync_state_export(sp, st); 1538 } 1539 1540 void 1541 pfsync_out_iack(struct pf_state *st, void *buf) 1542 { 1543 struct pfsync_ins_ack *iack = buf; 1544 1545 iack->id = st->id; 1546 iack->creatorid = st->creatorid; 1547 } 1548 1549 void 1550 pfsync_out_upd_c(struct pf_state *st, void *buf) 1551 { 1552 struct pfsync_upd_c *up = buf; 1553 1554 bzero(up, sizeof(*up)); 1555 up->id = st->id; 1556 pf_state_peer_hton(&st->src, &up->src); 1557 pf_state_peer_hton(&st->dst, &up->dst); 1558 up->creatorid = st->creatorid; 1559 up->timeout = st->timeout; 1560 } 1561 1562 void 1563 pfsync_out_del(struct pf_state *st, void *buf) 1564 { 1565 struct pfsync_del_c *dp = buf; 1566 1567 dp->id = st->id; 1568 dp->creatorid = st->creatorid; 1569 1570 SET(st->state_flags, PFSTATE_NOSYNC); 1571 } 1572 1573 void 1574 pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc) 1575 { 1576 int q; 1577 struct pf_state *st; 1578 struct pfsync_upd_req_item *ur; 1579 struct tdb *tdb; 1580 1581 sn->sn_sc = sc; 1582 1583 mtx_enter(&sc->sc_st_mtx); 1584 mtx_enter(&sc->sc_upd_req_mtx); 1585 mtx_enter(&sc->sc_tdb_mtx); 1586 1587 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1588 TAILQ_INIT(&sn->sn_qs[q]); 1589 1590 while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { 1591 KASSERT(st->snapped == 0); 1592 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 1593 TAILQ_INSERT_TAIL(&sn->sn_qs[q], st, sync_snap); 1594 st->snapped = 1; 1595 } 1596 } 1597 1598 TAILQ_INIT(&sn->sn_upd_req_list); 1599 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1600 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1601 TAILQ_INSERT_TAIL(&sn->sn_upd_req_list, ur, ur_snap); 1602 } 1603 1604 TAILQ_INIT(&sn->sn_tdb_q); 1605 while ((tdb = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { 1606 TAILQ_REMOVE(&sc->sc_tdb_q, tdb, tdb_sync_entry); 1607 TAILQ_INSERT_TAIL(&sn->sn_tdb_q, tdb, tdb_sync_snap); 1608 1609 mtx_enter(&tdb->tdb_mtx); 1610 KASSERT(!ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED)); 1611 SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED); 1612 mtx_leave(&tdb->tdb_mtx); 1613 } 1614 1615 sn->sn_len = sc->sc_len; 1616 sc->sc_len = PFSYNC_MINPKT; 1617 1618 sn->sn_plus = sc->sc_plus; 1619 sc->sc_plus = NULL; 1620 sn->sn_pluslen = sc->sc_pluslen; 1621 sc->sc_pluslen = 0; 1622 1623 mtx_leave(&sc->sc_tdb_mtx); 1624 mtx_leave(&sc->sc_upd_req_mtx); 1625 mtx_leave(&sc->sc_st_mtx); 1626 } 1627 1628 void 1629 pfsync_drop_snapshot(struct pfsync_snapshot *sn) 1630 { 1631 struct pf_state *st; 1632 struct pfsync_upd_req_item *ur; 1633 struct tdb *t; 1634 int q; 1635 1636 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1637 if (TAILQ_EMPTY(&sn->sn_qs[q])) 1638 continue; 1639 1640 while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) { 1641 KASSERT(st->sync_state == q); 1642 KASSERT(st->snapped == 1); 1643 TAILQ_REMOVE(&sn->sn_qs[q], st, sync_snap); 1644 st->sync_state = PFSYNC_S_NONE; 1645 st->snapped = 0; 1646 pf_state_unref(st); 1647 } 1648 } 1649 1650 while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) { 1651 TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_snap); 1652 pool_put(&sn->sn_sc->sc_pool, ur); 1653 } 1654 1655 while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) { 1656 TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_snap); 1657 mtx_enter(&t->tdb_mtx); 1658 KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)); 1659 CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED); 1660 CLR(t->tdb_flags, TDBF_PFSYNC); 1661 mtx_leave(&t->tdb_mtx); 1662 } 1663 } 1664 1665 int 1666 pfsync_is_snapshot_empty(struct pfsync_snapshot *sn) 1667 { 1668 int q; 1669 1670 for (q = 0; q < PFSYNC_S_COUNT; q++) 1671 if (!TAILQ_EMPTY(&sn->sn_qs[q])) 1672 return (0); 1673 1674 if (!TAILQ_EMPTY(&sn->sn_upd_req_list)) 1675 return (0); 1676 1677 if (!TAILQ_EMPTY(&sn->sn_tdb_q)) 1678 return (0); 1679 1680 return (sn->sn_plus == NULL); 1681 } 1682 1683 void 1684 pfsync_drop(struct pfsync_softc *sc) 1685 { 1686 struct pfsync_snapshot sn; 1687 1688 pfsync_grab_snapshot(&sn, sc); 1689 pfsync_drop_snapshot(&sn); 1690 } 1691 1692 void 1693 pfsync_send_dispatch(void *xmq) 1694 { 1695 struct mbuf_queue *mq = xmq; 1696 struct pfsync_softc *sc; 1697 struct mbuf *m; 1698 struct mbuf_list ml; 1699 int error; 1700 1701 mq_delist(mq, &ml); 1702 if (ml_empty(&ml)) 1703 return; 1704 1705 NET_LOCK(); 1706 sc = pfsyncif; 1707 if (sc == NULL) { 1708 ml_purge(&ml); 1709 goto done; 1710 } 1711 1712 while ((m = ml_dequeue(&ml)) != NULL) { 1713 if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1714 &sc->sc_imo, NULL, 0)) == 0) 1715 pfsyncstat_inc(pfsyncs_opackets); 1716 else { 1717 DPFPRINTF(LOG_DEBUG, 1718 "ip_output() @ %s failed (%d)\n", __func__, error); 1719 pfsyncstat_inc(pfsyncs_oerrors); 1720 } 1721 } 1722 done: 1723 NET_UNLOCK(); 1724 } 1725 1726 void 1727 pfsync_send_pkt(struct mbuf *m) 1728 { 1729 if (mq_enqueue(&pfsync_mq, m) != 0) { 1730 pfsyncstat_inc(pfsyncs_oerrors); 1731 DPFPRINTF(LOG_DEBUG, "mq_enqueue() @ %s failed, queue full\n", 1732 __func__); 1733 } else 1734 task_add(net_tq(0), &pfsync_task); 1735 } 1736 1737 void 1738 pfsync_sendout(void) 1739 { 1740 struct pfsync_snapshot sn; 1741 struct pfsync_softc *sc = pfsyncif; 1742 #if NBPFILTER > 0 1743 struct ifnet *ifp = &sc->sc_if; 1744 #endif 1745 struct mbuf *m; 1746 struct ip *ip; 1747 struct pfsync_header *ph; 1748 struct pfsync_subheader *subh; 1749 struct pf_state *st; 1750 struct pfsync_upd_req_item *ur; 1751 struct tdb *t; 1752 int offset; 1753 int q, count = 0; 1754 1755 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1756 return; 1757 1758 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1759 #if NBPFILTER > 0 1760 (ifp->if_bpf == NULL && sc->sc_sync_ifidx == 0)) { 1761 #else 1762 sc->sc_sync_ifidx == 0) { 1763 #endif 1764 pfsync_drop(sc); 1765 return; 1766 } 1767 1768 pfsync_grab_snapshot(&sn, sc); 1769 1770 /* 1771 * Check below is sufficient to prevent us from sending empty packets, 1772 * but it does not stop us from sending short packets. 1773 */ 1774 if (pfsync_is_snapshot_empty(&sn)) 1775 return; 1776 1777 MGETHDR(m, M_DONTWAIT, MT_DATA); 1778 if (m == NULL) { 1779 sc->sc_if.if_oerrors++; 1780 pfsyncstat_inc(pfsyncs_onomem); 1781 pfsync_drop_snapshot(&sn); 1782 return; 1783 } 1784 1785 if (max_linkhdr + sn.sn_len > MHLEN) { 1786 MCLGETL(m, M_DONTWAIT, max_linkhdr + sn.sn_len); 1787 if (!ISSET(m->m_flags, M_EXT)) { 1788 m_free(m); 1789 sc->sc_if.if_oerrors++; 1790 pfsyncstat_inc(pfsyncs_onomem); 1791 pfsync_drop_snapshot(&sn); 1792 return; 1793 } 1794 } 1795 m->m_data += max_linkhdr; 1796 m->m_len = m->m_pkthdr.len = sn.sn_len; 1797 1798 /* build the ip header */ 1799 ip = mtod(m, struct ip *); 1800 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1801 offset = sizeof(*ip); 1802 1803 ip->ip_len = htons(m->m_pkthdr.len); 1804 ip->ip_id = htons(ip_randomid()); 1805 1806 /* build the pfsync header */ 1807 ph = (struct pfsync_header *)(m->m_data + offset); 1808 bzero(ph, sizeof(*ph)); 1809 offset += sizeof(*ph); 1810 1811 ph->version = PFSYNC_VERSION; 1812 ph->len = htons(sn.sn_len - sizeof(*ip)); 1813 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1814 1815 if (!TAILQ_EMPTY(&sn.sn_upd_req_list)) { 1816 subh = (struct pfsync_subheader *)(m->m_data + offset); 1817 offset += sizeof(*subh); 1818 1819 count = 0; 1820 while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) { 1821 TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_snap); 1822 1823 bcopy(&ur->ur_msg, m->m_data + offset, 1824 sizeof(ur->ur_msg)); 1825 offset += sizeof(ur->ur_msg); 1826 1827 pool_put(&sc->sc_pool, ur); 1828 1829 count++; 1830 } 1831 1832 bzero(subh, sizeof(*subh)); 1833 subh->len = sizeof(ur->ur_msg) >> 2; 1834 subh->action = PFSYNC_ACT_UPD_REQ; 1835 subh->count = htons(count); 1836 } 1837 1838 /* has someone built a custom region for us to add? */ 1839 if (sn.sn_plus != NULL) { 1840 bcopy(sn.sn_plus, m->m_data + offset, sn.sn_pluslen); 1841 offset += sn.sn_pluslen; 1842 sn.sn_plus = NULL; /* XXX memory leak ? */ 1843 } 1844 1845 if (!TAILQ_EMPTY(&sn.sn_tdb_q)) { 1846 subh = (struct pfsync_subheader *)(m->m_data + offset); 1847 offset += sizeof(*subh); 1848 1849 count = 0; 1850 while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) { 1851 TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_snap); 1852 pfsync_out_tdb(t, m->m_data + offset); 1853 offset += sizeof(struct pfsync_tdb); 1854 mtx_enter(&t->tdb_mtx); 1855 KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)); 1856 CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED); 1857 CLR(t->tdb_flags, TDBF_PFSYNC); 1858 mtx_leave(&t->tdb_mtx); 1859 tdb_unref(t); 1860 count++; 1861 } 1862 1863 bzero(subh, sizeof(*subh)); 1864 subh->action = PFSYNC_ACT_TDB; 1865 subh->len = sizeof(struct pfsync_tdb) >> 2; 1866 subh->count = htons(count); 1867 } 1868 1869 /* walk the queues */ 1870 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1871 if (TAILQ_EMPTY(&sn.sn_qs[q])) 1872 continue; 1873 1874 subh = (struct pfsync_subheader *)(m->m_data + offset); 1875 offset += sizeof(*subh); 1876 1877 count = 0; 1878 while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) { 1879 TAILQ_REMOVE(&sn.sn_qs[q], st, sync_snap); 1880 KASSERT(st->sync_state == q); 1881 KASSERT(st->snapped == 1); 1882 st->sync_state = PFSYNC_S_NONE; 1883 st->snapped = 0; 1884 pfsync_qs[q].write(st, m->m_data + offset); 1885 offset += pfsync_qs[q].len; 1886 1887 pf_state_unref(st); 1888 count++; 1889 } 1890 1891 bzero(subh, sizeof(*subh)); 1892 subh->action = pfsync_qs[q].action; 1893 subh->len = pfsync_qs[q].len >> 2; 1894 subh->count = htons(count); 1895 } 1896 1897 /* we're done, let's put it on the wire */ 1898 #if NBPFILTER > 0 1899 if (ifp->if_bpf) { 1900 m->m_data += sizeof(*ip); 1901 m->m_len = m->m_pkthdr.len = sn.sn_len - sizeof(*ip); 1902 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1903 m->m_data -= sizeof(*ip); 1904 m->m_len = m->m_pkthdr.len = sn.sn_len; 1905 } 1906 1907 if (sc->sc_sync_ifidx == 0) { 1908 sc->sc_len = PFSYNC_MINPKT; 1909 m_freem(m); 1910 return; 1911 } 1912 #endif 1913 1914 sc->sc_if.if_opackets++; 1915 sc->sc_if.if_obytes += m->m_pkthdr.len; 1916 1917 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1918 1919 pfsync_send_pkt(m); 1920 } 1921 1922 void 1923 pfsync_insert_state(struct pf_state *st) 1924 { 1925 struct pfsync_softc *sc = pfsyncif; 1926 1927 NET_ASSERT_LOCKED(); 1928 1929 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1930 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1931 SET(st->state_flags, PFSTATE_NOSYNC); 1932 return; 1933 } 1934 1935 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1936 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1937 return; 1938 1939 KASSERT(st->sync_state == PFSYNC_S_NONE); 1940 1941 if (sc->sc_len == PFSYNC_MINPKT) 1942 timeout_add_sec(&sc->sc_tmo, 1); 1943 1944 pfsync_q_ins(st, PFSYNC_S_INS); 1945 1946 st->sync_updates = 0; 1947 } 1948 1949 int 1950 pfsync_defer(struct pf_state *st, struct mbuf *m, struct pfsync_deferral **ppd) 1951 { 1952 struct pfsync_softc *sc = pfsyncif; 1953 struct pfsync_deferral *pd; 1954 unsigned int sched; 1955 1956 NET_ASSERT_LOCKED(); 1957 1958 if (!sc->sc_defer || 1959 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1960 m->m_flags & (M_BCAST|M_MCAST)) 1961 return (0); 1962 1963 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1964 if (pd == NULL) 1965 return (0); 1966 1967 /* 1968 * deferral queue grows faster, than timeout can consume, 1969 * we have to ask packet (caller) to help timer and dispatch 1970 * one deferral for us. 1971 * 1972 * We wish to call pfsync_undefer() here. Unfortunately we can't, 1973 * because pfsync_undefer() will be calling to ip_output(), 1974 * which in turn will call to pf_test(), which would then attempt 1975 * to grab PF_LOCK() we currently hold. 1976 */ 1977 if (sc->sc_deferred >= 128) { 1978 mtx_enter(&sc->sc_deferrals_mtx); 1979 *ppd = TAILQ_FIRST(&sc->sc_deferrals); 1980 if (*ppd != NULL) { 1981 TAILQ_REMOVE(&sc->sc_deferrals, *ppd, pd_entry); 1982 sc->sc_deferred--; 1983 } 1984 mtx_leave(&sc->sc_deferrals_mtx); 1985 } else 1986 *ppd = NULL; 1987 1988 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1989 SET(st->state_flags, PFSTATE_ACK); 1990 1991 pd->pd_st = pf_state_ref(st); 1992 pd->pd_m = m; 1993 1994 pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC; 1995 1996 mtx_enter(&sc->sc_deferrals_mtx); 1997 sched = TAILQ_EMPTY(&sc->sc_deferrals); 1998 1999 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 2000 sc->sc_deferred++; 2001 mtx_leave(&sc->sc_deferrals_mtx); 2002 2003 if (sched) 2004 timeout_add_nsec(&sc->sc_deferrals_tmo, PFSYNC_DEFER_NSEC); 2005 2006 schednetisr(NETISR_PFSYNC); 2007 2008 return (1); 2009 } 2010 2011 void 2012 pfsync_undefer_notify(struct pfsync_deferral *pd) 2013 { 2014 struct pf_pdesc pdesc; 2015 struct pf_state *st = pd->pd_st; 2016 2017 /* 2018 * pf_remove_state removes the state keys and sets st->timeout 2019 * to PFTM_UNLINKED. this is done under NET_LOCK which should 2020 * be held here, so we can use PFTM_UNLINKED as a test for 2021 * whether the state keys are set for the address family 2022 * lookup. 2023 */ 2024 2025 if (st->timeout == PFTM_UNLINKED) 2026 return; 2027 2028 if (st->rt == PF_ROUTETO) { 2029 if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af, 2030 st->direction, st->kif, pd->pd_m, NULL) != PF_PASS) 2031 return; 2032 switch (st->key[PF_SK_WIRE]->af) { 2033 case AF_INET: 2034 pf_route(&pdesc, st); 2035 break; 2036 #ifdef INET6 2037 case AF_INET6: 2038 pf_route6(&pdesc, st); 2039 break; 2040 #endif /* INET6 */ 2041 default: 2042 unhandled_af(st->key[PF_SK_WIRE]->af); 2043 } 2044 pd->pd_m = pdesc.m; 2045 } else { 2046 switch (st->key[PF_SK_WIRE]->af) { 2047 case AF_INET: 2048 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0); 2049 break; 2050 #ifdef INET6 2051 case AF_INET6: 2052 ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL); 2053 break; 2054 #endif /* INET6 */ 2055 default: 2056 unhandled_af(st->key[PF_SK_WIRE]->af); 2057 } 2058 2059 pd->pd_m = NULL; 2060 } 2061 } 2062 2063 void 2064 pfsync_free_deferral(struct pfsync_deferral *pd) 2065 { 2066 struct pfsync_softc *sc = pfsyncif; 2067 2068 pf_state_unref(pd->pd_st); 2069 m_freem(pd->pd_m); 2070 pool_put(&sc->sc_pool, pd); 2071 } 2072 2073 void 2074 pfsync_undefer(struct pfsync_deferral *pd, int drop) 2075 { 2076 struct pfsync_softc *sc = pfsyncif; 2077 2078 NET_ASSERT_LOCKED(); 2079 2080 if (sc == NULL) 2081 return; 2082 2083 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 2084 if (!drop) 2085 pfsync_undefer_notify(pd); 2086 2087 pfsync_free_deferral(pd); 2088 } 2089 2090 void 2091 pfsync_deferrals_tmo(void *arg) 2092 { 2093 struct pfsync_softc *sc = arg; 2094 struct pfsync_deferral *pd; 2095 uint64_t now, nsec = 0; 2096 struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds); 2097 2098 now = getnsecuptime(); 2099 2100 mtx_enter(&sc->sc_deferrals_mtx); 2101 for (;;) { 2102 pd = TAILQ_FIRST(&sc->sc_deferrals); 2103 if (pd == NULL) 2104 break; 2105 2106 if (now < pd->pd_deadline) { 2107 nsec = pd->pd_deadline - now; 2108 break; 2109 } 2110 2111 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 2112 sc->sc_deferred--; 2113 TAILQ_INSERT_TAIL(&pds, pd, pd_entry); 2114 } 2115 mtx_leave(&sc->sc_deferrals_mtx); 2116 2117 if (nsec > 0) { 2118 /* we were looking at a pd, but it wasn't old enough */ 2119 timeout_add_nsec(&sc->sc_deferrals_tmo, nsec); 2120 } 2121 2122 if (TAILQ_EMPTY(&pds)) 2123 return; 2124 2125 NET_LOCK(); 2126 while ((pd = TAILQ_FIRST(&pds)) != NULL) { 2127 TAILQ_REMOVE(&pds, pd, pd_entry); 2128 2129 pfsync_undefer(pd, 0); 2130 } 2131 NET_UNLOCK(); 2132 } 2133 2134 void 2135 pfsync_deferred(struct pf_state *st, int drop) 2136 { 2137 struct pfsync_softc *sc = pfsyncif; 2138 struct pfsync_deferral *pd; 2139 2140 NET_ASSERT_LOCKED(); 2141 2142 mtx_enter(&sc->sc_deferrals_mtx); 2143 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 2144 if (pd->pd_st == st) { 2145 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 2146 sc->sc_deferred--; 2147 break; 2148 } 2149 } 2150 mtx_leave(&sc->sc_deferrals_mtx); 2151 2152 if (pd != NULL) 2153 pfsync_undefer(pd, drop); 2154 } 2155 2156 void 2157 pfsync_update_state(struct pf_state *st) 2158 { 2159 struct pfsync_softc *sc = pfsyncif; 2160 int sync = 0; 2161 2162 NET_ASSERT_LOCKED(); 2163 2164 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2165 return; 2166 2167 if (ISSET(st->state_flags, PFSTATE_ACK)) 2168 pfsync_deferred(st, 0); 2169 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2170 if (st->sync_state != PFSYNC_S_NONE) 2171 pfsync_q_del(st); 2172 return; 2173 } 2174 2175 if (sc->sc_len == PFSYNC_MINPKT) 2176 timeout_add_sec(&sc->sc_tmo, 1); 2177 2178 switch (st->sync_state) { 2179 case PFSYNC_S_UPD_C: 2180 case PFSYNC_S_UPD: 2181 case PFSYNC_S_INS: 2182 /* we're already handling it */ 2183 2184 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 2185 st->sync_updates++; 2186 if (st->sync_updates >= sc->sc_maxupdates) 2187 sync = 1; 2188 } 2189 break; 2190 2191 case PFSYNC_S_IACK: 2192 pfsync_q_del(st); 2193 case PFSYNC_S_NONE: 2194 pfsync_q_ins(st, PFSYNC_S_UPD_C); 2195 st->sync_updates = 0; 2196 break; 2197 2198 default: 2199 panic("pfsync_update_state: unexpected sync state %d", 2200 st->sync_state); 2201 } 2202 2203 if (sync || (getuptime() - st->pfsync_time) < 2) 2204 schednetisr(NETISR_PFSYNC); 2205 } 2206 2207 void 2208 pfsync_cancel_full_update(struct pfsync_softc *sc) 2209 { 2210 if (timeout_pending(&sc->sc_bulkfail_tmo) || 2211 timeout_pending(&sc->sc_bulk_tmo)) { 2212 #if NCARP > 0 2213 if (!pfsync_sync_ok) 2214 carp_group_demote_adj(&sc->sc_if, -1, 2215 "pfsync bulk cancelled"); 2216 if (sc->sc_initial_bulk) { 2217 carp_group_demote_adj(&sc->sc_if, -32, 2218 "pfsync init"); 2219 sc->sc_initial_bulk = 0; 2220 } 2221 #endif 2222 pfsync_sync_ok = 1; 2223 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 2224 } 2225 timeout_del(&sc->sc_bulkfail_tmo); 2226 timeout_del(&sc->sc_bulk_tmo); 2227 sc->sc_bulk_next = NULL; 2228 sc->sc_bulk_last = NULL; 2229 sc->sc_ureq_sent = 0; 2230 sc->sc_bulk_tries = 0; 2231 } 2232 2233 void 2234 pfsync_request_full_update(struct pfsync_softc *sc) 2235 { 2236 if (sc->sc_sync_ifidx != 0 && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 2237 /* Request a full state table update. */ 2238 sc->sc_ureq_sent = getuptime(); 2239 #if NCARP > 0 2240 if (!sc->sc_link_demoted && pfsync_sync_ok) 2241 carp_group_demote_adj(&sc->sc_if, 1, 2242 "pfsync bulk start"); 2243 #endif 2244 pfsync_sync_ok = 0; 2245 DPFPRINTF(LOG_INFO, "requesting bulk update"); 2246 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 2247 pf_pool_limits[PF_LIMIT_STATES].limit / 2248 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 2249 sizeof(struct pfsync_state))); 2250 pfsync_request_update(0, 0); 2251 } 2252 } 2253 2254 void 2255 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2256 { 2257 struct pfsync_softc *sc = pfsyncif; 2258 struct pfsync_upd_req_item *item; 2259 size_t nlen, sclen; 2260 int retry; 2261 2262 /* 2263 * this code does nothing to prevent multiple update requests for the 2264 * same state being generated. 2265 */ 2266 2267 item = pool_get(&sc->sc_pool, PR_NOWAIT); 2268 if (item == NULL) { 2269 /* XXX stats */ 2270 return; 2271 } 2272 2273 item->ur_msg.id = id; 2274 item->ur_msg.creatorid = creatorid; 2275 2276 for (;;) { 2277 mtx_enter(&sc->sc_upd_req_mtx); 2278 2279 nlen = sizeof(struct pfsync_upd_req); 2280 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 2281 nlen += sizeof(struct pfsync_subheader); 2282 2283 sclen = atomic_add_long_nv(&sc->sc_len, nlen); 2284 retry = (sclen > sc->sc_if.if_mtu); 2285 if (retry) 2286 atomic_sub_long(&sc->sc_len, nlen); 2287 else 2288 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 2289 2290 mtx_leave(&sc->sc_upd_req_mtx); 2291 2292 if (!retry) 2293 break; 2294 2295 pfsync_sendout(); 2296 } 2297 2298 schednetisr(NETISR_PFSYNC); 2299 } 2300 2301 void 2302 pfsync_update_state_req(struct pf_state *st) 2303 { 2304 struct pfsync_softc *sc = pfsyncif; 2305 2306 if (sc == NULL) 2307 panic("pfsync_update_state_req: nonexistent instance"); 2308 2309 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2310 if (st->sync_state != PFSYNC_S_NONE) 2311 pfsync_q_del(st); 2312 return; 2313 } 2314 2315 switch (st->sync_state) { 2316 case PFSYNC_S_UPD_C: 2317 case PFSYNC_S_IACK: 2318 pfsync_q_del(st); 2319 case PFSYNC_S_NONE: 2320 pfsync_q_ins(st, PFSYNC_S_UPD); 2321 schednetisr(NETISR_PFSYNC); 2322 return; 2323 2324 case PFSYNC_S_INS: 2325 case PFSYNC_S_UPD: 2326 case PFSYNC_S_DEL: 2327 /* we're already handling it */ 2328 return; 2329 2330 default: 2331 panic("pfsync_update_state_req: unexpected sync state %d", 2332 st->sync_state); 2333 } 2334 } 2335 2336 void 2337 pfsync_delete_state(struct pf_state *st) 2338 { 2339 struct pfsync_softc *sc = pfsyncif; 2340 2341 NET_ASSERT_LOCKED(); 2342 2343 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2344 return; 2345 2346 if (ISSET(st->state_flags, PFSTATE_ACK)) 2347 pfsync_deferred(st, 1); 2348 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2349 if (st->sync_state != PFSYNC_S_NONE) 2350 pfsync_q_del(st); 2351 return; 2352 } 2353 2354 if (sc->sc_len == PFSYNC_MINPKT) 2355 timeout_add_sec(&sc->sc_tmo, 1); 2356 2357 switch (st->sync_state) { 2358 case PFSYNC_S_INS: 2359 /* we never got to tell the world so just forget about it */ 2360 pfsync_q_del(st); 2361 return; 2362 2363 case PFSYNC_S_UPD_C: 2364 case PFSYNC_S_UPD: 2365 case PFSYNC_S_IACK: 2366 pfsync_q_del(st); 2367 /* 2368 * FALLTHROUGH to putting it on the del list 2369 * Note on reference count bookkeeping: 2370 * pfsync_q_del() drops reference for queue 2371 * ownership. But the st entry survives, because 2372 * our caller still holds a reference. 2373 */ 2374 2375 case PFSYNC_S_NONE: 2376 /* 2377 * We either fall through here, or there is no reference to 2378 * st owned by pfsync queues at this point. 2379 * 2380 * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins() 2381 * grabs a reference for delete queue. 2382 */ 2383 pfsync_q_ins(st, PFSYNC_S_DEL); 2384 return; 2385 2386 default: 2387 panic("pfsync_delete_state: unexpected sync state %d", 2388 st->sync_state); 2389 } 2390 } 2391 2392 void 2393 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2394 { 2395 struct pfsync_softc *sc = pfsyncif; 2396 struct { 2397 struct pfsync_subheader subh; 2398 struct pfsync_clr clr; 2399 } __packed r; 2400 2401 NET_ASSERT_LOCKED(); 2402 2403 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2404 return; 2405 2406 bzero(&r, sizeof(r)); 2407 2408 r.subh.action = PFSYNC_ACT_CLR; 2409 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2410 r.subh.count = htons(1); 2411 2412 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2413 r.clr.creatorid = creatorid; 2414 2415 pfsync_send_plus(&r, sizeof(r)); 2416 } 2417 2418 void 2419 pfsync_q_ins(struct pf_state *st, int q) 2420 { 2421 struct pfsync_softc *sc = pfsyncif; 2422 size_t nlen, sclen; 2423 2424 if (sc->sc_len < PFSYNC_MINPKT) 2425 panic("pfsync pkt len is too low %zd", sc->sc_len); 2426 do { 2427 mtx_enter(&sc->sc_st_mtx); 2428 2429 /* 2430 * There are either two threads trying to update the 2431 * the same state, or the state is just being processed 2432 * (is on snapshot queue). 2433 */ 2434 if (st->sync_state != PFSYNC_S_NONE) { 2435 mtx_leave(&sc->sc_st_mtx); 2436 break; 2437 } 2438 2439 nlen = pfsync_qs[q].len; 2440 2441 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2442 nlen += sizeof(struct pfsync_subheader); 2443 2444 sclen = atomic_add_long_nv(&sc->sc_len, nlen); 2445 if (sclen > sc->sc_if.if_mtu) { 2446 atomic_sub_long(&sc->sc_len, nlen); 2447 mtx_leave(&sc->sc_st_mtx); 2448 pfsync_sendout(); 2449 continue; 2450 } 2451 2452 pf_state_ref(st); 2453 2454 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2455 st->sync_state = q; 2456 mtx_leave(&sc->sc_st_mtx); 2457 } while (0); 2458 } 2459 2460 void 2461 pfsync_q_del(struct pf_state *st) 2462 { 2463 struct pfsync_softc *sc = pfsyncif; 2464 int q; 2465 2466 KASSERT(st->sync_state != PFSYNC_S_NONE); 2467 2468 mtx_enter(&sc->sc_st_mtx); 2469 q = st->sync_state; 2470 /* 2471 * re-check under mutex 2472 * if state is snapped already, then just bail out, because we came 2473 * too late, the state is being just processed/dispatched to peer. 2474 */ 2475 if ((q == PFSYNC_S_NONE) || (st->snapped)) { 2476 mtx_leave(&sc->sc_st_mtx); 2477 return; 2478 } 2479 atomic_sub_long(&sc->sc_len, pfsync_qs[q].len); 2480 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2481 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2482 atomic_sub_long(&sc->sc_len, sizeof (struct pfsync_subheader)); 2483 st->sync_state = PFSYNC_S_NONE; 2484 mtx_leave(&sc->sc_st_mtx); 2485 2486 pf_state_unref(st); 2487 } 2488 2489 void 2490 pfsync_update_tdb(struct tdb *t, int output) 2491 { 2492 struct pfsync_softc *sc = pfsyncif; 2493 size_t nlen, sclen; 2494 2495 if (sc == NULL) 2496 return; 2497 2498 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2499 do { 2500 mtx_enter(&sc->sc_tdb_mtx); 2501 nlen = sizeof(struct pfsync_tdb); 2502 2503 mtx_enter(&t->tdb_mtx); 2504 if (ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2505 /* we've lost race, no action for us then */ 2506 mtx_leave(&t->tdb_mtx); 2507 mtx_leave(&sc->sc_tdb_mtx); 2508 break; 2509 } 2510 2511 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2512 nlen += sizeof(struct pfsync_subheader); 2513 2514 sclen = atomic_add_long_nv(&sc->sc_len, nlen); 2515 if (sclen > sc->sc_if.if_mtu) { 2516 atomic_sub_long(&sc->sc_len, nlen); 2517 mtx_leave(&t->tdb_mtx); 2518 mtx_leave(&sc->sc_tdb_mtx); 2519 pfsync_sendout(); 2520 continue; 2521 } 2522 2523 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2524 tdb_ref(t); 2525 SET(t->tdb_flags, TDBF_PFSYNC); 2526 mtx_leave(&t->tdb_mtx); 2527 2528 mtx_leave(&sc->sc_tdb_mtx); 2529 t->tdb_updates = 0; 2530 } while (0); 2531 } else { 2532 if (++t->tdb_updates >= sc->sc_maxupdates) 2533 schednetisr(NETISR_PFSYNC); 2534 } 2535 2536 mtx_enter(&t->tdb_mtx); 2537 if (output) 2538 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2539 else 2540 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2541 mtx_leave(&t->tdb_mtx); 2542 } 2543 2544 void 2545 pfsync_delete_tdb(struct tdb *t) 2546 { 2547 struct pfsync_softc *sc = pfsyncif; 2548 size_t nlen; 2549 2550 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2551 return; 2552 2553 mtx_enter(&sc->sc_tdb_mtx); 2554 2555 /* 2556 * if tdb entry is just being processed (found in snapshot), 2557 * then it can not be deleted. we just came too late 2558 */ 2559 if (ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)) { 2560 mtx_leave(&sc->sc_tdb_mtx); 2561 return; 2562 } 2563 2564 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2565 2566 mtx_enter(&t->tdb_mtx); 2567 CLR(t->tdb_flags, TDBF_PFSYNC); 2568 mtx_leave(&t->tdb_mtx); 2569 2570 nlen = sizeof(struct pfsync_tdb); 2571 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2572 nlen += sizeof(struct pfsync_subheader); 2573 atomic_sub_long(&sc->sc_len, nlen); 2574 2575 mtx_leave(&sc->sc_tdb_mtx); 2576 2577 tdb_unref(t); 2578 } 2579 2580 void 2581 pfsync_out_tdb(struct tdb *t, void *buf) 2582 { 2583 struct pfsync_tdb *ut = buf; 2584 2585 bzero(ut, sizeof(*ut)); 2586 ut->spi = t->tdb_spi; 2587 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2588 /* 2589 * When a failover happens, the master's rpl is probably above 2590 * what we see here (we may be up to a second late), so 2591 * increase it a bit for outbound tdbs to manage most such 2592 * situations. 2593 * 2594 * For now, just add an offset that is likely to be larger 2595 * than the number of packets we can see in one second. The RFC 2596 * just says the next packet must have a higher seq value. 2597 * 2598 * XXX What is a good algorithm for this? We could use 2599 * a rate-determined increase, but to know it, we would have 2600 * to extend struct tdb. 2601 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2602 * will soon be replaced anyway. For now, just don't handle 2603 * this edge case. 2604 */ 2605 #define RPL_INCR 16384 2606 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2607 RPL_INCR : 0)); 2608 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2609 ut->sproto = t->tdb_sproto; 2610 ut->rdomain = htons(t->tdb_rdomain); 2611 } 2612 2613 void 2614 pfsync_bulk_start(void) 2615 { 2616 struct pfsync_softc *sc = pfsyncif; 2617 2618 NET_ASSERT_LOCKED(); 2619 2620 /* 2621 * pf gc via pfsync_state_in_use reads sc_bulk_next and 2622 * sc_bulk_last while exclusively holding the pf_state_list 2623 * rwlock. make sure it can't race with us setting these 2624 * pointers. they basically act as hazards, and borrow the 2625 * lists state reference count. 2626 */ 2627 rw_enter_read(&pf_state_list.pfs_rwl); 2628 2629 /* get a consistent view of the list pointers */ 2630 mtx_enter(&pf_state_list.pfs_mtx); 2631 if (sc->sc_bulk_next == NULL) 2632 sc->sc_bulk_next = TAILQ_FIRST(&pf_state_list.pfs_list); 2633 2634 sc->sc_bulk_last = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 2635 mtx_leave(&pf_state_list.pfs_mtx); 2636 2637 rw_exit_read(&pf_state_list.pfs_rwl); 2638 2639 DPFPRINTF(LOG_INFO, "received bulk update request"); 2640 2641 if (sc->sc_bulk_last == NULL) 2642 pfsync_bulk_status(PFSYNC_BUS_END); 2643 else { 2644 sc->sc_ureq_received = getuptime(); 2645 2646 pfsync_bulk_status(PFSYNC_BUS_START); 2647 timeout_add(&sc->sc_bulk_tmo, 0); 2648 } 2649 } 2650 2651 void 2652 pfsync_bulk_update(void *arg) 2653 { 2654 struct pfsync_softc *sc; 2655 struct pf_state *st; 2656 int i = 0; 2657 2658 NET_LOCK(); 2659 sc = pfsyncif; 2660 if (sc == NULL) 2661 goto out; 2662 2663 rw_enter_read(&pf_state_list.pfs_rwl); 2664 st = sc->sc_bulk_next; 2665 sc->sc_bulk_next = NULL; 2666 2667 for (;;) { 2668 if (st->sync_state == PFSYNC_S_NONE && 2669 st->timeout < PFTM_MAX && 2670 st->pfsync_time <= sc->sc_ureq_received) { 2671 pfsync_update_state_req(st); 2672 i++; 2673 } 2674 2675 st = TAILQ_NEXT(st, entry_list); 2676 if ((st == NULL) || (st == sc->sc_bulk_last)) { 2677 /* we're done */ 2678 sc->sc_bulk_last = NULL; 2679 pfsync_bulk_status(PFSYNC_BUS_END); 2680 break; 2681 } 2682 2683 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2684 sizeof(struct pfsync_state)) { 2685 /* we've filled a packet */ 2686 sc->sc_bulk_next = st; 2687 timeout_add(&sc->sc_bulk_tmo, 1); 2688 break; 2689 } 2690 } 2691 2692 rw_exit_read(&pf_state_list.pfs_rwl); 2693 out: 2694 NET_UNLOCK(); 2695 } 2696 2697 void 2698 pfsync_bulk_status(u_int8_t status) 2699 { 2700 struct { 2701 struct pfsync_subheader subh; 2702 struct pfsync_bus bus; 2703 } __packed r; 2704 2705 struct pfsync_softc *sc = pfsyncif; 2706 2707 bzero(&r, sizeof(r)); 2708 2709 r.subh.action = PFSYNC_ACT_BUS; 2710 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2711 r.subh.count = htons(1); 2712 2713 r.bus.creatorid = pf_status.hostid; 2714 r.bus.endtime = htonl(getuptime() - sc->sc_ureq_received); 2715 r.bus.status = status; 2716 2717 pfsync_send_plus(&r, sizeof(r)); 2718 } 2719 2720 void 2721 pfsync_bulk_fail(void *arg) 2722 { 2723 struct pfsync_softc *sc; 2724 2725 NET_LOCK(); 2726 sc = pfsyncif; 2727 if (sc == NULL) 2728 goto out; 2729 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2730 /* Try again */ 2731 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2732 pfsync_request_update(0, 0); 2733 } else { 2734 /* Pretend like the transfer was ok */ 2735 sc->sc_ureq_sent = 0; 2736 sc->sc_bulk_tries = 0; 2737 #if NCARP > 0 2738 if (!pfsync_sync_ok) 2739 carp_group_demote_adj(&sc->sc_if, -1, 2740 sc->sc_link_demoted ? 2741 "pfsync link state up" : 2742 "pfsync bulk fail"); 2743 if (sc->sc_initial_bulk) { 2744 carp_group_demote_adj(&sc->sc_if, -32, 2745 "pfsync init"); 2746 sc->sc_initial_bulk = 0; 2747 } 2748 #endif 2749 pfsync_sync_ok = 1; 2750 sc->sc_link_demoted = 0; 2751 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2752 } 2753 out: 2754 NET_UNLOCK(); 2755 } 2756 2757 void 2758 pfsync_send_plus(void *plus, size_t pluslen) 2759 { 2760 struct pfsync_softc *sc = pfsyncif; 2761 2762 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2763 pfsync_sendout(); 2764 2765 sc->sc_plus = plus; 2766 sc->sc_pluslen = pluslen; 2767 atomic_add_long(&sc->sc_len, pluslen); 2768 2769 pfsync_sendout(); 2770 } 2771 2772 int 2773 pfsync_up(void) 2774 { 2775 struct pfsync_softc *sc = pfsyncif; 2776 2777 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2778 return (0); 2779 2780 return (1); 2781 } 2782 2783 int 2784 pfsync_state_in_use(struct pf_state *st) 2785 { 2786 struct pfsync_softc *sc = pfsyncif; 2787 2788 if (sc == NULL) 2789 return (0); 2790 2791 rw_assert_wrlock(&pf_state_list.pfs_rwl); 2792 2793 if (st->sync_state != PFSYNC_S_NONE || 2794 st == sc->sc_bulk_next || 2795 st == sc->sc_bulk_last) 2796 return (1); 2797 2798 return (0); 2799 } 2800 2801 void 2802 pfsync_timeout(void *arg) 2803 { 2804 NET_LOCK(); 2805 pfsync_sendout(); 2806 NET_UNLOCK(); 2807 } 2808 2809 /* this is a softnet/netisr handler */ 2810 void 2811 pfsyncintr(void) 2812 { 2813 pfsync_sendout(); 2814 } 2815 2816 int 2817 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp) 2818 { 2819 struct pfsyncstats pfsyncstat; 2820 2821 CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t))); 2822 memset(&pfsyncstat, 0, sizeof pfsyncstat); 2823 counters_read(pfsynccounters, (uint64_t *)&pfsyncstat, 2824 pfsyncs_ncounters); 2825 return (sysctl_rdstruct(oldp, oldlenp, newp, 2826 &pfsyncstat, sizeof(pfsyncstat))); 2827 } 2828 2829 int 2830 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2831 size_t newlen) 2832 { 2833 /* All sysctl names at this level are terminal. */ 2834 if (namelen != 1) 2835 return (ENOTDIR); 2836 2837 switch (name[0]) { 2838 case PFSYNCCTL_STATS: 2839 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp)); 2840 default: 2841 return (ENOPROTOOPT); 2842 } 2843 } 2844