1 /* $OpenBSD: if_pfsync.c,v 1.299 2021/11/25 13:46:02 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_ipsp.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp6.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/udp.h> 75 76 #ifdef INET6 77 #include <netinet6/in6_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #endif /* INET6 */ 82 83 #include "carp.h" 84 #if NCARP > 0 85 #include <netinet/ip_carp.h> 86 #endif 87 88 #define PF_DEBUGNAME "pfsync: " 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_DEFER_NSEC 20000000ULL 97 98 #define PFSYNC_MINPKT ( \ 99 sizeof(struct ip) + \ 100 sizeof(struct pfsync_header)) 101 102 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 103 struct pfsync_state_peer *); 104 105 int pfsync_in_clr(caddr_t, int, int, int); 106 int pfsync_in_iack(caddr_t, int, int, int); 107 int pfsync_in_upd_c(caddr_t, int, int, int); 108 int pfsync_in_ureq(caddr_t, int, int, int); 109 int pfsync_in_del(caddr_t, int, int, int); 110 int pfsync_in_del_c(caddr_t, int, int, int); 111 int pfsync_in_bus(caddr_t, int, int, int); 112 int pfsync_in_tdb(caddr_t, int, int, int); 113 int pfsync_in_ins(caddr_t, int, int, int); 114 int pfsync_in_upd(caddr_t, int, int, int); 115 int pfsync_in_eof(caddr_t, int, int, int); 116 117 int pfsync_in_error(caddr_t, int, int, int); 118 119 void pfsync_update_state_locked(struct pf_state *); 120 121 struct { 122 int (*in)(caddr_t, int, int, int); 123 size_t len; 124 } pfsync_acts[] = { 125 /* PFSYNC_ACT_CLR */ 126 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 127 /* PFSYNC_ACT_OINS */ 128 { pfsync_in_error, 0 }, 129 /* PFSYNC_ACT_INS_ACK */ 130 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 131 /* PFSYNC_ACT_OUPD */ 132 { pfsync_in_error, 0 }, 133 /* PFSYNC_ACT_UPD_C */ 134 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 135 /* PFSYNC_ACT_UPD_REQ */ 136 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 137 /* PFSYNC_ACT_DEL */ 138 { pfsync_in_del, sizeof(struct pfsync_state) }, 139 /* PFSYNC_ACT_DEL_C */ 140 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 141 /* PFSYNC_ACT_INS_F */ 142 { pfsync_in_error, 0 }, 143 /* PFSYNC_ACT_DEL_F */ 144 { pfsync_in_error, 0 }, 145 /* PFSYNC_ACT_BUS */ 146 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 147 /* PFSYNC_ACT_OTDB */ 148 { pfsync_in_error, 0 }, 149 /* PFSYNC_ACT_EOF */ 150 { pfsync_in_error, 0 }, 151 /* PFSYNC_ACT_INS */ 152 { pfsync_in_ins, sizeof(struct pfsync_state) }, 153 /* PFSYNC_ACT_UPD */ 154 { pfsync_in_upd, sizeof(struct pfsync_state) }, 155 /* PFSYNC_ACT_TDB */ 156 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 157 }; 158 159 struct pfsync_q { 160 void (*write)(struct pf_state *, void *); 161 size_t len; 162 u_int8_t action; 163 }; 164 165 /* we have one of these for every PFSYNC_S_ */ 166 void pfsync_out_state(struct pf_state *, void *); 167 void pfsync_out_iack(struct pf_state *, void *); 168 void pfsync_out_upd_c(struct pf_state *, void *); 169 void pfsync_out_del(struct pf_state *, void *); 170 171 struct pfsync_q pfsync_qs[] = { 172 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 173 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 174 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 175 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 176 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 177 }; 178 179 void pfsync_q_ins(struct pf_state *, int); 180 void pfsync_q_del(struct pf_state *); 181 182 struct pfsync_upd_req_item { 183 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 184 struct pfsync_upd_req ur_msg; 185 }; 186 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 187 188 struct pfsync_deferral { 189 TAILQ_ENTRY(pfsync_deferral) pd_entry; 190 struct pf_state *pd_st; 191 struct mbuf *pd_m; 192 uint64_t pd_deadline; 193 }; 194 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 195 196 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 197 sizeof(struct pfsync_deferral)) 198 199 void pfsync_out_tdb(struct tdb *, void *); 200 201 struct pfsync_softc { 202 struct ifnet sc_if; 203 unsigned int sc_sync_ifidx; 204 205 struct pool sc_pool; 206 207 struct ip_moptions sc_imo; 208 209 struct in_addr sc_sync_peer; 210 u_int8_t sc_maxupdates; 211 212 struct ip sc_template; 213 214 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 215 struct mutex sc_mtx[PFSYNC_S_COUNT]; 216 size_t sc_len; 217 218 struct pfsync_upd_reqs sc_upd_req_list; 219 struct mutex sc_upd_req_mtx; 220 221 int sc_initial_bulk; 222 int sc_link_demoted; 223 224 int sc_defer; 225 struct pfsync_deferrals sc_deferrals; 226 u_int sc_deferred; 227 struct mutex sc_deferrals_mtx; 228 struct timeout sc_deferrals_tmo; 229 230 void *sc_plus; 231 size_t sc_pluslen; 232 233 u_int32_t sc_ureq_sent; 234 int sc_bulk_tries; 235 struct timeout sc_bulkfail_tmo; 236 237 u_int32_t sc_ureq_received; 238 struct pf_state *sc_bulk_next; 239 struct pf_state *sc_bulk_last; 240 struct timeout sc_bulk_tmo; 241 242 TAILQ_HEAD(, tdb) sc_tdb_q; 243 struct mutex sc_tdb_mtx; 244 245 struct task sc_ltask; 246 struct task sc_dtask; 247 248 struct timeout sc_tmo; 249 }; 250 251 struct pfsync_snapshot { 252 struct pfsync_softc *sn_sc; 253 struct pf_state_queue sn_qs[PFSYNC_S_COUNT]; 254 struct pfsync_upd_reqs sn_upd_req_list; 255 TAILQ_HEAD(, tdb) sn_tdb_q; 256 size_t sn_len; 257 void *sn_plus; 258 size_t sn_pluslen; 259 }; 260 261 struct pfsync_softc *pfsyncif = NULL; 262 struct cpumem *pfsynccounters; 263 264 void pfsyncattach(int); 265 int pfsync_clone_create(struct if_clone *, int); 266 int pfsync_clone_destroy(struct ifnet *); 267 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 268 struct pf_state_peer *); 269 void pfsync_update_net_tdb(struct pfsync_tdb *); 270 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 271 struct rtentry *); 272 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 273 void pfsyncstart(struct ifqueue *); 274 void pfsync_syncdev_state(void *); 275 void pfsync_ifdetach(void *); 276 277 void pfsync_deferred(struct pf_state *, int); 278 void pfsync_undefer(struct pfsync_deferral *, int); 279 void pfsync_deferrals_tmo(void *); 280 281 void pfsync_cancel_full_update(struct pfsync_softc *); 282 void pfsync_request_full_update(struct pfsync_softc *); 283 void pfsync_request_update(u_int32_t, u_int64_t); 284 void pfsync_update_state_req(struct pf_state *); 285 286 void pfsync_drop(struct pfsync_softc *); 287 void pfsync_sendout(void); 288 void pfsync_send_plus(void *, size_t); 289 void pfsync_timeout(void *); 290 void pfsync_tdb_timeout(void *); 291 292 void pfsync_bulk_start(void); 293 void pfsync_bulk_status(u_int8_t); 294 void pfsync_bulk_update(void *); 295 void pfsync_bulk_fail(void *); 296 297 void pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *); 298 void pfsync_drop_snapshot(struct pfsync_snapshot *); 299 300 void pfsync_send_dispatch(void *); 301 void pfsync_send_pkt(struct mbuf *); 302 303 static struct mbuf_queue pfsync_mq; 304 static struct task pfsync_task = 305 TASK_INITIALIZER(pfsync_send_dispatch, &pfsync_mq); 306 307 #define PFSYNC_MAX_BULKTRIES 12 308 int pfsync_sync_ok; 309 310 struct if_clone pfsync_cloner = 311 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 312 313 void 314 pfsyncattach(int npfsync) 315 { 316 if_clone_attach(&pfsync_cloner); 317 pfsynccounters = counters_alloc(pfsyncs_ncounters); 318 mq_init(&pfsync_mq, 4096, IPL_SOFTNET); 319 } 320 321 int 322 pfsync_clone_create(struct if_clone *ifc, int unit) 323 { 324 struct pfsync_softc *sc; 325 struct ifnet *ifp; 326 int q; 327 static const char *mtx_names[] = { 328 "iack_mtx", 329 "upd_c_mtx", 330 "del_mtx", 331 "ins_mtx", 332 "upd_mtx", 333 "" }; 334 335 if (unit != 0) 336 return (EINVAL); 337 338 pfsync_sync_ok = 1; 339 340 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO); 341 for (q = 0; q < PFSYNC_S_COUNT; q++) { 342 TAILQ_INIT(&sc->sc_qs[q]); 343 mtx_init_flags(&sc->sc_mtx[q], IPL_SOFTNET, mtx_names[q], 0); 344 } 345 346 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync", 347 NULL); 348 TAILQ_INIT(&sc->sc_upd_req_list); 349 mtx_init(&sc->sc_upd_req_mtx, IPL_SOFTNET); 350 TAILQ_INIT(&sc->sc_deferrals); 351 mtx_init(&sc->sc_deferrals_mtx, IPL_SOFTNET); 352 timeout_set_proc(&sc->sc_deferrals_tmo, pfsync_deferrals_tmo, sc); 353 task_set(&sc->sc_ltask, pfsync_syncdev_state, sc); 354 task_set(&sc->sc_dtask, pfsync_ifdetach, sc); 355 sc->sc_deferred = 0; 356 357 TAILQ_INIT(&sc->sc_tdb_q); 358 mtx_init(&sc->sc_tdb_mtx, IPL_SOFTNET); 359 360 sc->sc_len = PFSYNC_MINPKT; 361 sc->sc_maxupdates = 128; 362 363 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 364 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 365 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 366 367 ifp = &sc->sc_if; 368 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 369 ifp->if_softc = sc; 370 ifp->if_ioctl = pfsyncioctl; 371 ifp->if_output = pfsyncoutput; 372 ifp->if_qstart = pfsyncstart; 373 ifp->if_type = IFT_PFSYNC; 374 ifp->if_hdrlen = sizeof(struct pfsync_header); 375 ifp->if_mtu = ETHERMTU; 376 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 377 timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL); 378 timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL); 379 timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL); 380 381 if_attach(ifp); 382 if_alloc_sadl(ifp); 383 384 #if NCARP > 0 385 if_addgroup(ifp, "carp"); 386 #endif 387 388 #if NBPFILTER > 0 389 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 390 #endif 391 392 pfsyncif = sc; 393 394 return (0); 395 } 396 397 int 398 pfsync_clone_destroy(struct ifnet *ifp) 399 { 400 struct pfsync_softc *sc = ifp->if_softc; 401 struct ifnet *ifp0; 402 struct pfsync_deferral *pd; 403 struct pfsync_deferrals deferrals; 404 405 NET_LOCK(); 406 407 #if NCARP > 0 408 if (!pfsync_sync_ok) 409 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 410 if (sc->sc_link_demoted) 411 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 412 #endif 413 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 414 if_linkstatehook_del(ifp0, &sc->sc_ltask); 415 if_detachhook_del(ifp0, &sc->sc_dtask); 416 } 417 if_put(ifp0); 418 419 /* XXXSMP breaks atomicity */ 420 NET_UNLOCK(); 421 if_detach(ifp); 422 NET_LOCK(); 423 424 pfsync_drop(sc); 425 426 if (sc->sc_deferred > 0) { 427 TAILQ_INIT(&deferrals); 428 mtx_enter(&sc->sc_deferrals_mtx); 429 TAILQ_CONCAT(&deferrals, &sc->sc_deferrals, pd_entry); 430 sc->sc_deferred = 0; 431 mtx_leave(&sc->sc_deferrals_mtx); 432 433 while (!TAILQ_EMPTY(&deferrals)) { 434 pd = TAILQ_FIRST(&deferrals); 435 TAILQ_REMOVE(&deferrals, pd, pd_entry); 436 pfsync_undefer(pd, 0); 437 } 438 } 439 440 pfsyncif = NULL; 441 timeout_del(&sc->sc_bulkfail_tmo); 442 timeout_del(&sc->sc_bulk_tmo); 443 timeout_del(&sc->sc_tmo); 444 445 NET_UNLOCK(); 446 447 pool_destroy(&sc->sc_pool); 448 free(sc->sc_imo.imo_membership, M_IPMOPTS, 449 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 450 free(sc, M_DEVBUF, sizeof(*sc)); 451 452 return (0); 453 } 454 455 /* 456 * Start output on the pfsync interface. 457 */ 458 void 459 pfsyncstart(struct ifqueue *ifq) 460 { 461 ifq_purge(ifq); 462 } 463 464 void 465 pfsync_syncdev_state(void *arg) 466 { 467 struct pfsync_softc *sc = arg; 468 struct ifnet *ifp; 469 470 if ((sc->sc_if.if_flags & IFF_UP) == 0) 471 return; 472 if ((ifp = if_get(sc->sc_sync_ifidx)) == NULL) 473 return; 474 475 if (ifp->if_link_state == LINK_STATE_DOWN) { 476 sc->sc_if.if_flags &= ~IFF_RUNNING; 477 if (!sc->sc_link_demoted) { 478 #if NCARP > 0 479 carp_group_demote_adj(&sc->sc_if, 1, 480 "pfsync link state down"); 481 #endif 482 sc->sc_link_demoted = 1; 483 } 484 485 /* drop everything */ 486 timeout_del(&sc->sc_tmo); 487 pfsync_drop(sc); 488 489 pfsync_cancel_full_update(sc); 490 } else if (sc->sc_link_demoted) { 491 sc->sc_if.if_flags |= IFF_RUNNING; 492 493 pfsync_request_full_update(sc); 494 } 495 496 if_put(ifp); 497 } 498 499 void 500 pfsync_ifdetach(void *arg) 501 { 502 struct pfsync_softc *sc = arg; 503 struct ifnet *ifp; 504 505 if ((ifp = if_get(sc->sc_sync_ifidx)) != NULL) { 506 if_linkstatehook_del(ifp, &sc->sc_ltask); 507 if_detachhook_del(ifp, &sc->sc_dtask); 508 } 509 if_put(ifp); 510 511 sc->sc_sync_ifidx = 0; 512 } 513 514 int 515 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 516 struct pf_state_peer *d) 517 { 518 if (s->scrub.scrub_flag && d->scrub == NULL) { 519 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 520 if (d->scrub == NULL) 521 return (ENOMEM); 522 } 523 524 return (0); 525 } 526 527 void 528 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 529 { 530 pf_state_export(sp, st); 531 } 532 533 int 534 pfsync_state_import(struct pfsync_state *sp, int flags) 535 { 536 struct pf_state *st = NULL; 537 struct pf_state_key *skw = NULL, *sks = NULL; 538 struct pf_rule *r = NULL; 539 struct pfi_kif *kif; 540 int pool_flags; 541 int error = ENOMEM; 542 int n = 0; 543 544 if (sp->creatorid == 0) { 545 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 546 "invalid creator id: %08x", ntohl(sp->creatorid)); 547 return (EINVAL); 548 } 549 550 if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) { 551 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 552 "unknown interface: %s", sp->ifname); 553 if (flags & PFSYNC_SI_IOCTL) 554 return (EINVAL); 555 return (0); /* skip this state */ 556 } 557 558 if (sp->af == 0) 559 return (0); /* skip this state */ 560 561 /* 562 * If the ruleset checksums match or the state is coming from the ioctl, 563 * it's safe to associate the state with the rule of that number. 564 */ 565 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 566 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 567 pf_main_ruleset.rules.active.rcount) { 568 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries) 569 if (ntohl(sp->rule) == n++) 570 break; 571 } else 572 r = &pf_default_rule; 573 574 if ((r->max_states && r->states_cur >= r->max_states)) 575 goto cleanup; 576 577 if (flags & PFSYNC_SI_IOCTL) 578 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 579 else 580 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 581 582 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 583 goto cleanup; 584 585 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 586 goto cleanup; 587 588 if ((sp->key[PF_SK_WIRE].af && 589 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 590 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 591 &sp->key[PF_SK_STACK].addr[0], sp->af) || 592 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 593 &sp->key[PF_SK_STACK].addr[1], sp->af) || 594 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 595 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 596 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 597 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 598 goto cleanup; 599 } else 600 sks = skw; 601 602 /* allocate memory for scrub info */ 603 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 604 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 605 goto cleanup; 606 607 /* copy to state key(s) */ 608 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 609 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 610 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 611 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 612 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 613 PF_REF_INIT(skw->refcnt); 614 skw->proto = sp->proto; 615 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 616 skw->af = sp->af; 617 if (sks != skw) { 618 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 619 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 620 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 621 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 622 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 623 PF_REF_INIT(sks->refcnt); 624 if (!(sks->af = sp->key[PF_SK_STACK].af)) 625 sks->af = sp->af; 626 if (sks->af != skw->af) { 627 switch (sp->proto) { 628 case IPPROTO_ICMP: 629 sks->proto = IPPROTO_ICMPV6; 630 break; 631 case IPPROTO_ICMPV6: 632 sks->proto = IPPROTO_ICMP; 633 break; 634 default: 635 sks->proto = sp->proto; 636 } 637 } else 638 sks->proto = sp->proto; 639 640 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) || 641 ((skw->af != AF_INET) && (skw->af != AF_INET6))) { 642 error = EINVAL; 643 goto cleanup; 644 } 645 646 } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { 647 error = EINVAL; 648 goto cleanup; 649 } 650 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 651 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 652 653 /* copy to state */ 654 st->rt_addr = sp->rt_addr; 655 st->rt = sp->rt; 656 st->creation = getuptime() - ntohl(sp->creation); 657 st->expire = getuptime(); 658 if (ntohl(sp->expire)) { 659 u_int32_t timeout; 660 661 timeout = r->timeout[sp->timeout]; 662 if (!timeout) 663 timeout = pf_default_rule.timeout[sp->timeout]; 664 665 /* sp->expire may have been adaptively scaled by export. */ 666 st->expire -= timeout - ntohl(sp->expire); 667 } 668 669 st->direction = sp->direction; 670 st->log = sp->log; 671 st->timeout = sp->timeout; 672 st->state_flags = ntohs(sp->state_flags); 673 st->max_mss = ntohs(sp->max_mss); 674 st->min_ttl = sp->min_ttl; 675 st->set_tos = sp->set_tos; 676 st->set_prio[0] = sp->set_prio[0]; 677 st->set_prio[1] = sp->set_prio[1]; 678 679 st->id = sp->id; 680 st->creatorid = sp->creatorid; 681 pf_state_peer_ntoh(&sp->src, &st->src); 682 pf_state_peer_ntoh(&sp->dst, &st->dst); 683 684 st->rule.ptr = r; 685 st->anchor.ptr = NULL; 686 687 st->pfsync_time = getuptime(); 688 st->sync_state = PFSYNC_S_NONE; 689 690 refcnt_init(&st->refcnt); 691 692 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 693 r->states_cur++; 694 r->states_tot++; 695 696 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 697 SET(st->state_flags, PFSTATE_NOSYNC); 698 699 /* 700 * We just set PFSTATE_NOSYNC bit, which prevents 701 * pfsync_insert_state() to insert state to pfsync. 702 */ 703 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 704 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 705 r->states_cur--; 706 error = EEXIST; 707 goto cleanup_state; 708 } 709 710 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 711 CLR(st->state_flags, PFSTATE_NOSYNC); 712 if (ISSET(st->state_flags, PFSTATE_ACK)) { 713 pfsync_q_ins(st, PFSYNC_S_IACK); 714 schednetisr(NETISR_PFSYNC); 715 } 716 } 717 CLR(st->state_flags, PFSTATE_ACK); 718 719 return (0); 720 721 cleanup: 722 if (skw == sks) 723 sks = NULL; 724 if (skw != NULL) 725 pool_put(&pf_state_key_pl, skw); 726 if (sks != NULL) 727 pool_put(&pf_state_key_pl, sks); 728 729 cleanup_state: /* pf_state_insert frees the state keys */ 730 if (st) { 731 if (st->dst.scrub) 732 pool_put(&pf_state_scrub_pl, st->dst.scrub); 733 if (st->src.scrub) 734 pool_put(&pf_state_scrub_pl, st->src.scrub); 735 pool_put(&pf_state_pl, st); 736 } 737 return (error); 738 } 739 740 int 741 pfsync_input(struct mbuf **mp, int *offp, int proto, int af) 742 { 743 struct mbuf *n, *m = *mp; 744 struct pfsync_softc *sc = pfsyncif; 745 struct ip *ip = mtod(m, struct ip *); 746 struct pfsync_header *ph; 747 struct pfsync_subheader subh; 748 int offset, noff, len, count, mlen, flags = 0; 749 int e; 750 751 NET_ASSERT_LOCKED(); 752 753 pfsyncstat_inc(pfsyncs_ipackets); 754 755 /* verify that we have a sync interface configured */ 756 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 757 sc->sc_sync_ifidx == 0 || !pf_status.running) 758 goto done; 759 760 /* verify that the packet came in on the right interface */ 761 if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) { 762 pfsyncstat_inc(pfsyncs_badif); 763 goto done; 764 } 765 766 sc->sc_if.if_ipackets++; 767 sc->sc_if.if_ibytes += m->m_pkthdr.len; 768 769 /* verify that the IP TTL is 255. */ 770 if (ip->ip_ttl != PFSYNC_DFLTTL) { 771 pfsyncstat_inc(pfsyncs_badttl); 772 goto done; 773 } 774 775 offset = ip->ip_hl << 2; 776 n = m_pulldown(m, offset, sizeof(*ph), &noff); 777 if (n == NULL) { 778 pfsyncstat_inc(pfsyncs_hdrops); 779 return IPPROTO_DONE; 780 } 781 ph = (struct pfsync_header *)(n->m_data + noff); 782 783 /* verify the version */ 784 if (ph->version != PFSYNC_VERSION) { 785 pfsyncstat_inc(pfsyncs_badver); 786 goto done; 787 } 788 len = ntohs(ph->len) + offset; 789 if (m->m_pkthdr.len < len) { 790 pfsyncstat_inc(pfsyncs_badlen); 791 goto done; 792 } 793 794 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 795 flags = PFSYNC_SI_CKSUM; 796 797 offset += sizeof(*ph); 798 while (offset <= len - sizeof(subh)) { 799 m_copydata(m, offset, sizeof(subh), &subh); 800 offset += sizeof(subh); 801 802 mlen = subh.len << 2; 803 count = ntohs(subh.count); 804 805 if (subh.action >= PFSYNC_ACT_MAX || 806 subh.action >= nitems(pfsync_acts) || 807 mlen < pfsync_acts[subh.action].len) { 808 /* 809 * subheaders are always followed by at least one 810 * message, so if the peer is new 811 * enough to tell us how big its messages are then we 812 * know enough to skip them. 813 */ 814 if (count > 0 && mlen > 0) { 815 offset += count * mlen; 816 continue; 817 } 818 pfsyncstat_inc(pfsyncs_badact); 819 goto done; 820 } 821 822 n = m_pulldown(m, offset, mlen * count, &noff); 823 if (n == NULL) { 824 pfsyncstat_inc(pfsyncs_badlen); 825 return IPPROTO_DONE; 826 } 827 828 e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, 829 flags); 830 if (e != 0) 831 goto done; 832 833 offset += mlen * count; 834 } 835 836 done: 837 m_freem(m); 838 return IPPROTO_DONE; 839 } 840 841 int 842 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 843 { 844 struct pfsync_clr *clr; 845 struct pf_state *st, *nexts; 846 struct pfi_kif *kif; 847 u_int32_t creatorid; 848 int i; 849 850 PF_LOCK(); 851 for (i = 0; i < count; i++) { 852 clr = (struct pfsync_clr *)buf + len * i; 853 kif = NULL; 854 creatorid = clr->creatorid; 855 if (strlen(clr->ifname) && 856 (kif = pfi_kif_find(clr->ifname)) == NULL) 857 continue; 858 859 PF_STATE_ENTER_WRITE(); 860 for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { 861 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 862 if (st->creatorid == creatorid && 863 ((kif && st->kif == kif) || !kif)) { 864 SET(st->state_flags, PFSTATE_NOSYNC); 865 pf_remove_state(st); 866 } 867 } 868 PF_STATE_EXIT_WRITE(); 869 } 870 PF_UNLOCK(); 871 872 return (0); 873 } 874 875 int 876 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 877 { 878 struct pfsync_state *sp; 879 sa_family_t af1, af2; 880 int i; 881 882 PF_LOCK(); 883 for (i = 0; i < count; i++) { 884 sp = (struct pfsync_state *)(buf + len * i); 885 af1 = sp->key[0].af; 886 af2 = sp->key[1].af; 887 888 /* check for invalid values */ 889 if (sp->timeout >= PFTM_MAX || 890 sp->src.state > PF_TCPS_PROXY_DST || 891 sp->dst.state > PF_TCPS_PROXY_DST || 892 sp->direction > PF_OUT || 893 (((af1 || af2) && 894 ((af1 != AF_INET && af1 != AF_INET6) || 895 (af2 != AF_INET && af2 != AF_INET6))) || 896 (sp->af != AF_INET && sp->af != AF_INET6))) { 897 DPFPRINTF(LOG_NOTICE, 898 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 899 pfsyncstat_inc(pfsyncs_badval); 900 continue; 901 } 902 903 if (pfsync_state_import(sp, flags) == ENOMEM) { 904 /* drop out, but process the rest of the actions */ 905 break; 906 } 907 } 908 PF_UNLOCK(); 909 910 return (0); 911 } 912 913 int 914 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 915 { 916 struct pfsync_ins_ack *ia; 917 struct pf_state_cmp id_key; 918 struct pf_state *st; 919 int i; 920 921 for (i = 0; i < count; i++) { 922 ia = (struct pfsync_ins_ack *)(buf + len * i); 923 924 id_key.id = ia->id; 925 id_key.creatorid = ia->creatorid; 926 927 PF_STATE_ENTER_READ(); 928 st = pf_find_state_byid(&id_key); 929 pf_state_ref(st); 930 PF_STATE_EXIT_READ(); 931 if (st == NULL) 932 continue; 933 934 if (ISSET(st->state_flags, PFSTATE_ACK)) 935 pfsync_deferred(st, 0); 936 937 pf_state_unref(st); 938 } 939 940 return (0); 941 } 942 943 int 944 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 945 struct pfsync_state_peer *dst) 946 { 947 int sync = 0; 948 949 /* 950 * The state should never go backwards except 951 * for syn-proxy states. Neither should the 952 * sequence window slide backwards. 953 */ 954 if ((st->src.state > src->state && 955 (st->src.state < PF_TCPS_PROXY_SRC || 956 src->state >= PF_TCPS_PROXY_SRC)) || 957 958 (st->src.state == src->state && 959 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 960 sync++; 961 else 962 pf_state_peer_ntoh(src, &st->src); 963 964 if ((st->dst.state > dst->state) || 965 966 (st->dst.state >= TCPS_SYN_SENT && 967 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 968 sync++; 969 else 970 pf_state_peer_ntoh(dst, &st->dst); 971 972 return (sync); 973 } 974 975 int 976 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 977 { 978 struct pfsync_state *sp; 979 struct pf_state_cmp id_key; 980 struct pf_state *st; 981 int sync, error; 982 int i; 983 984 for (i = 0; i < count; i++) { 985 sp = (struct pfsync_state *)(buf + len * i); 986 987 /* check for invalid values */ 988 if (sp->timeout >= PFTM_MAX || 989 sp->src.state > PF_TCPS_PROXY_DST || 990 sp->dst.state > PF_TCPS_PROXY_DST) { 991 DPFPRINTF(LOG_NOTICE, 992 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 993 pfsyncstat_inc(pfsyncs_badval); 994 continue; 995 } 996 997 id_key.id = sp->id; 998 id_key.creatorid = sp->creatorid; 999 1000 PF_STATE_ENTER_READ(); 1001 st = pf_find_state_byid(&id_key); 1002 pf_state_ref(st); 1003 PF_STATE_EXIT_READ(); 1004 if (st == NULL) { 1005 /* insert the update */ 1006 PF_LOCK(); 1007 error = pfsync_state_import(sp, flags); 1008 if (error) 1009 pfsyncstat_inc(pfsyncs_badstate); 1010 PF_UNLOCK(); 1011 continue; 1012 } 1013 1014 if (ISSET(st->state_flags, PFSTATE_ACK)) 1015 pfsync_deferred(st, 1); 1016 1017 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1018 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 1019 else { 1020 sync = 0; 1021 1022 /* 1023 * Non-TCP protocol state machine always go 1024 * forwards 1025 */ 1026 if (st->src.state > sp->src.state) 1027 sync++; 1028 else 1029 pf_state_peer_ntoh(&sp->src, &st->src); 1030 1031 if (st->dst.state > sp->dst.state) 1032 sync++; 1033 else 1034 pf_state_peer_ntoh(&sp->dst, &st->dst); 1035 } 1036 1037 if (sync < 2) { 1038 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 1039 pf_state_peer_ntoh(&sp->dst, &st->dst); 1040 st->expire = getuptime(); 1041 st->timeout = sp->timeout; 1042 } 1043 st->pfsync_time = getuptime(); 1044 1045 if (sync) { 1046 pfsyncstat_inc(pfsyncs_stale); 1047 1048 pfsync_update_state(st); 1049 schednetisr(NETISR_PFSYNC); 1050 } 1051 1052 pf_state_unref(st); 1053 } 1054 1055 return (0); 1056 } 1057 1058 int 1059 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 1060 { 1061 struct pfsync_upd_c *up; 1062 struct pf_state_cmp id_key; 1063 struct pf_state *st; 1064 1065 int sync; 1066 1067 int i; 1068 1069 for (i = 0; i < count; i++) { 1070 up = (struct pfsync_upd_c *)(buf + len * i); 1071 1072 /* check for invalid values */ 1073 if (up->timeout >= PFTM_MAX || 1074 up->src.state > PF_TCPS_PROXY_DST || 1075 up->dst.state > PF_TCPS_PROXY_DST) { 1076 DPFPRINTF(LOG_NOTICE, 1077 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 1078 pfsyncstat_inc(pfsyncs_badval); 1079 continue; 1080 } 1081 1082 id_key.id = up->id; 1083 id_key.creatorid = up->creatorid; 1084 1085 PF_STATE_ENTER_READ(); 1086 st = pf_find_state_byid(&id_key); 1087 pf_state_ref(st); 1088 PF_STATE_EXIT_READ(); 1089 if (st == NULL) { 1090 /* We don't have this state. Ask for it. */ 1091 pfsync_request_update(id_key.creatorid, id_key.id); 1092 continue; 1093 } 1094 1095 if (ISSET(st->state_flags, PFSTATE_ACK)) 1096 pfsync_deferred(st, 1); 1097 1098 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1099 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1100 else { 1101 sync = 0; 1102 /* 1103 * Non-TCP protocol state machine always go 1104 * forwards 1105 */ 1106 if (st->src.state > up->src.state) 1107 sync++; 1108 else 1109 pf_state_peer_ntoh(&up->src, &st->src); 1110 1111 if (st->dst.state > up->dst.state) 1112 sync++; 1113 else 1114 pf_state_peer_ntoh(&up->dst, &st->dst); 1115 } 1116 if (sync < 2) { 1117 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1118 pf_state_peer_ntoh(&up->dst, &st->dst); 1119 st->expire = getuptime(); 1120 st->timeout = up->timeout; 1121 } 1122 st->pfsync_time = getuptime(); 1123 1124 if (sync) { 1125 pfsyncstat_inc(pfsyncs_stale); 1126 1127 pfsync_update_state(st); 1128 schednetisr(NETISR_PFSYNC); 1129 } 1130 1131 pf_state_unref(st); 1132 } 1133 1134 return (0); 1135 } 1136 1137 int 1138 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1139 { 1140 struct pfsync_upd_req *ur; 1141 int i; 1142 1143 struct pf_state_cmp id_key; 1144 struct pf_state *st; 1145 1146 for (i = 0; i < count; i++) { 1147 ur = (struct pfsync_upd_req *)(buf + len * i); 1148 1149 id_key.id = ur->id; 1150 id_key.creatorid = ur->creatorid; 1151 1152 if (id_key.id == 0 && id_key.creatorid == 0) 1153 pfsync_bulk_start(); 1154 else { 1155 PF_STATE_ENTER_READ(); 1156 st = pf_find_state_byid(&id_key); 1157 pf_state_ref(st); 1158 PF_STATE_EXIT_READ(); 1159 if (st == NULL) { 1160 pfsyncstat_inc(pfsyncs_badstate); 1161 continue; 1162 } 1163 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1164 pf_state_unref(st); 1165 continue; 1166 } 1167 1168 pfsync_update_state_req(st); 1169 pf_state_unref(st); 1170 } 1171 } 1172 1173 return (0); 1174 } 1175 1176 int 1177 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1178 { 1179 struct pfsync_state *sp; 1180 struct pf_state_cmp id_key; 1181 struct pf_state *st; 1182 int i; 1183 1184 PF_STATE_ENTER_WRITE(); 1185 for (i = 0; i < count; i++) { 1186 sp = (struct pfsync_state *)(buf + len * i); 1187 1188 id_key.id = sp->id; 1189 id_key.creatorid = sp->creatorid; 1190 1191 st = pf_find_state_byid(&id_key); 1192 if (st == NULL) { 1193 pfsyncstat_inc(pfsyncs_badstate); 1194 continue; 1195 } 1196 SET(st->state_flags, PFSTATE_NOSYNC); 1197 pf_remove_state(st); 1198 } 1199 PF_STATE_EXIT_WRITE(); 1200 1201 return (0); 1202 } 1203 1204 int 1205 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1206 { 1207 struct pfsync_del_c *sp; 1208 struct pf_state_cmp id_key; 1209 struct pf_state *st; 1210 int i; 1211 1212 PF_LOCK(); 1213 PF_STATE_ENTER_WRITE(); 1214 for (i = 0; i < count; i++) { 1215 sp = (struct pfsync_del_c *)(buf + len * i); 1216 1217 id_key.id = sp->id; 1218 id_key.creatorid = sp->creatorid; 1219 1220 st = pf_find_state_byid(&id_key); 1221 if (st == NULL) { 1222 pfsyncstat_inc(pfsyncs_badstate); 1223 continue; 1224 } 1225 1226 SET(st->state_flags, PFSTATE_NOSYNC); 1227 pf_remove_state(st); 1228 } 1229 PF_STATE_EXIT_WRITE(); 1230 PF_UNLOCK(); 1231 1232 return (0); 1233 } 1234 1235 int 1236 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1237 { 1238 struct pfsync_softc *sc = pfsyncif; 1239 struct pfsync_bus *bus; 1240 1241 /* If we're not waiting for a bulk update, who cares. */ 1242 if (sc->sc_ureq_sent == 0) 1243 return (0); 1244 1245 bus = (struct pfsync_bus *)buf; 1246 1247 switch (bus->status) { 1248 case PFSYNC_BUS_START: 1249 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1250 pf_pool_limits[PF_LIMIT_STATES].limit / 1251 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1252 sizeof(struct pfsync_state))); 1253 DPFPRINTF(LOG_INFO, "received bulk update start"); 1254 break; 1255 1256 case PFSYNC_BUS_END: 1257 if (getuptime() - ntohl(bus->endtime) >= 1258 sc->sc_ureq_sent) { 1259 /* that's it, we're happy */ 1260 sc->sc_ureq_sent = 0; 1261 sc->sc_bulk_tries = 0; 1262 timeout_del(&sc->sc_bulkfail_tmo); 1263 #if NCARP > 0 1264 if (!pfsync_sync_ok) 1265 carp_group_demote_adj(&sc->sc_if, -1, 1266 sc->sc_link_demoted ? 1267 "pfsync link state up" : 1268 "pfsync bulk done"); 1269 if (sc->sc_initial_bulk) { 1270 carp_group_demote_adj(&sc->sc_if, -32, 1271 "pfsync init"); 1272 sc->sc_initial_bulk = 0; 1273 } 1274 #endif 1275 pfsync_sync_ok = 1; 1276 sc->sc_link_demoted = 0; 1277 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1278 } else { 1279 DPFPRINTF(LOG_WARNING, "received invalid " 1280 "bulk update end: bad timestamp"); 1281 } 1282 break; 1283 } 1284 1285 return (0); 1286 } 1287 1288 int 1289 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1290 { 1291 #if defined(IPSEC) 1292 struct pfsync_tdb *tp; 1293 int i; 1294 1295 for (i = 0; i < count; i++) { 1296 tp = (struct pfsync_tdb *)(buf + len * i); 1297 pfsync_update_net_tdb(tp); 1298 } 1299 #endif 1300 1301 return (0); 1302 } 1303 1304 #if defined(IPSEC) 1305 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1306 void 1307 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1308 { 1309 struct tdb *tdb; 1310 1311 NET_ASSERT_LOCKED(); 1312 1313 /* check for invalid values */ 1314 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1315 (pt->dst.sa.sa_family != AF_INET && 1316 pt->dst.sa.sa_family != AF_INET6)) 1317 goto bad; 1318 1319 tdb = gettdb(ntohs(pt->rdomain), pt->spi, 1320 (union sockaddr_union *)&pt->dst, pt->sproto); 1321 if (tdb) { 1322 pt->rpl = betoh64(pt->rpl); 1323 pt->cur_bytes = betoh64(pt->cur_bytes); 1324 1325 /* Neither replay nor byte counter should ever decrease. */ 1326 if (pt->rpl < tdb->tdb_rpl || 1327 pt->cur_bytes < tdb->tdb_cur_bytes) { 1328 tdb_unref(tdb); 1329 goto bad; 1330 } 1331 1332 tdb->tdb_rpl = pt->rpl; 1333 tdb->tdb_cur_bytes = pt->cur_bytes; 1334 tdb_unref(tdb); 1335 } 1336 return; 1337 1338 bad: 1339 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1340 "invalid value"); 1341 pfsyncstat_inc(pfsyncs_badstate); 1342 return; 1343 } 1344 #endif 1345 1346 1347 int 1348 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1349 { 1350 if (len > 0 || count > 0) 1351 pfsyncstat_inc(pfsyncs_badact); 1352 1353 /* we're done. let the caller return */ 1354 return (1); 1355 } 1356 1357 int 1358 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1359 { 1360 pfsyncstat_inc(pfsyncs_badact); 1361 return (-1); 1362 } 1363 1364 int 1365 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1366 struct rtentry *rt) 1367 { 1368 m_freem(m); /* drop packet */ 1369 return (EAFNOSUPPORT); 1370 } 1371 1372 int 1373 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1374 { 1375 struct proc *p = curproc; 1376 struct pfsync_softc *sc = ifp->if_softc; 1377 struct ifreq *ifr = (struct ifreq *)data; 1378 struct ip_moptions *imo = &sc->sc_imo; 1379 struct pfsyncreq pfsyncr; 1380 struct ifnet *ifp0, *sifp; 1381 struct ip *ip; 1382 int error; 1383 1384 switch (cmd) { 1385 case SIOCSIFFLAGS: 1386 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1387 (ifp->if_flags & IFF_UP)) { 1388 ifp->if_flags |= IFF_RUNNING; 1389 1390 #if NCARP > 0 1391 sc->sc_initial_bulk = 1; 1392 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1393 #endif 1394 1395 pfsync_request_full_update(sc); 1396 } 1397 if ((ifp->if_flags & IFF_RUNNING) && 1398 (ifp->if_flags & IFF_UP) == 0) { 1399 ifp->if_flags &= ~IFF_RUNNING; 1400 1401 /* drop everything */ 1402 timeout_del(&sc->sc_tmo); 1403 pfsync_drop(sc); 1404 1405 pfsync_cancel_full_update(sc); 1406 } 1407 break; 1408 case SIOCSIFMTU: 1409 if ((ifp0 = if_get(sc->sc_sync_ifidx)) == NULL) 1410 return (EINVAL); 1411 error = 0; 1412 if (ifr->ifr_mtu <= PFSYNC_MINPKT || 1413 ifr->ifr_mtu > ifp0->if_mtu) { 1414 error = EINVAL; 1415 } 1416 if_put(ifp0); 1417 if (error) 1418 return error; 1419 if (ifr->ifr_mtu < ifp->if_mtu) 1420 pfsync_sendout(); 1421 ifp->if_mtu = ifr->ifr_mtu; 1422 break; 1423 case SIOCGETPFSYNC: 1424 bzero(&pfsyncr, sizeof(pfsyncr)); 1425 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 1426 strlcpy(pfsyncr.pfsyncr_syncdev, 1427 ifp0->if_xname, IFNAMSIZ); 1428 } 1429 if_put(ifp0); 1430 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1431 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1432 pfsyncr.pfsyncr_defer = sc->sc_defer; 1433 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1434 1435 case SIOCSETPFSYNC: 1436 if ((error = suser(p)) != 0) 1437 return (error); 1438 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1439 return (error); 1440 1441 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1442 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1443 else 1444 sc->sc_sync_peer.s_addr = 1445 pfsyncr.pfsyncr_syncpeer.s_addr; 1446 1447 if (pfsyncr.pfsyncr_maxupdates > 255) 1448 return (EINVAL); 1449 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1450 1451 sc->sc_defer = pfsyncr.pfsyncr_defer; 1452 1453 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1454 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 1455 if_linkstatehook_del(ifp0, &sc->sc_ltask); 1456 if_detachhook_del(ifp0, &sc->sc_dtask); 1457 } 1458 if_put(ifp0); 1459 sc->sc_sync_ifidx = 0; 1460 if (imo->imo_num_memberships > 0) { 1461 in_delmulti(imo->imo_membership[ 1462 --imo->imo_num_memberships]); 1463 imo->imo_ifidx = 0; 1464 } 1465 break; 1466 } 1467 1468 if ((sifp = if_unit(pfsyncr.pfsyncr_syncdev)) == NULL) 1469 return (EINVAL); 1470 1471 ifp0 = if_get(sc->sc_sync_ifidx); 1472 1473 if (sifp->if_mtu < sc->sc_if.if_mtu || (ifp0 != NULL && 1474 sifp->if_mtu < ifp0->if_mtu) || 1475 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1476 pfsync_sendout(); 1477 1478 if (ifp0) { 1479 if_linkstatehook_del(ifp0, &sc->sc_ltask); 1480 if_detachhook_del(ifp0, &sc->sc_dtask); 1481 } 1482 if_put(ifp0); 1483 sc->sc_sync_ifidx = sifp->if_index; 1484 1485 if (imo->imo_num_memberships > 0) { 1486 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1487 imo->imo_ifidx = 0; 1488 } 1489 1490 if (sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1491 struct in_addr addr; 1492 1493 if (!(sifp->if_flags & IFF_MULTICAST)) { 1494 sc->sc_sync_ifidx = 0; 1495 if_put(sifp); 1496 return (EADDRNOTAVAIL); 1497 } 1498 1499 addr.s_addr = INADDR_PFSYNC_GROUP; 1500 1501 if ((imo->imo_membership[0] = 1502 in_addmulti(&addr, sifp)) == NULL) { 1503 sc->sc_sync_ifidx = 0; 1504 if_put(sifp); 1505 return (ENOBUFS); 1506 } 1507 imo->imo_num_memberships++; 1508 imo->imo_ifidx = sc->sc_sync_ifidx; 1509 imo->imo_ttl = PFSYNC_DFLTTL; 1510 imo->imo_loop = 0; 1511 } 1512 1513 ip = &sc->sc_template; 1514 bzero(ip, sizeof(*ip)); 1515 ip->ip_v = IPVERSION; 1516 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1517 ip->ip_tos = IPTOS_LOWDELAY; 1518 /* len and id are set later */ 1519 ip->ip_off = htons(IP_DF); 1520 ip->ip_ttl = PFSYNC_DFLTTL; 1521 ip->ip_p = IPPROTO_PFSYNC; 1522 ip->ip_src.s_addr = INADDR_ANY; 1523 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1524 1525 if_linkstatehook_add(sifp, &sc->sc_ltask); 1526 if_detachhook_add(sifp, &sc->sc_dtask); 1527 if_put(sifp); 1528 1529 pfsync_request_full_update(sc); 1530 1531 break; 1532 1533 default: 1534 return (ENOTTY); 1535 } 1536 1537 return (0); 1538 } 1539 1540 void 1541 pfsync_out_state(struct pf_state *st, void *buf) 1542 { 1543 struct pfsync_state *sp = buf; 1544 1545 pfsync_state_export(sp, st); 1546 } 1547 1548 void 1549 pfsync_out_iack(struct pf_state *st, void *buf) 1550 { 1551 struct pfsync_ins_ack *iack = buf; 1552 1553 iack->id = st->id; 1554 iack->creatorid = st->creatorid; 1555 } 1556 1557 void 1558 pfsync_out_upd_c(struct pf_state *st, void *buf) 1559 { 1560 struct pfsync_upd_c *up = buf; 1561 1562 bzero(up, sizeof(*up)); 1563 up->id = st->id; 1564 pf_state_peer_hton(&st->src, &up->src); 1565 pf_state_peer_hton(&st->dst, &up->dst); 1566 up->creatorid = st->creatorid; 1567 up->timeout = st->timeout; 1568 } 1569 1570 void 1571 pfsync_out_del(struct pf_state *st, void *buf) 1572 { 1573 struct pfsync_del_c *dp = buf; 1574 1575 dp->id = st->id; 1576 dp->creatorid = st->creatorid; 1577 1578 SET(st->state_flags, PFSTATE_NOSYNC); 1579 } 1580 1581 void 1582 pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc) 1583 { 1584 int q; 1585 1586 sn->sn_sc = sc; 1587 1588 for (q = 0; q < PFSYNC_S_COUNT; q++) 1589 mtx_enter(&sc->sc_mtx[q]); 1590 1591 mtx_enter(&sc->sc_upd_req_mtx); 1592 mtx_enter(&sc->sc_tdb_mtx); 1593 1594 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1595 TAILQ_INIT(&sn->sn_qs[q]); 1596 TAILQ_CONCAT(&sn->sn_qs[q], &sc->sc_qs[q], sync_list); 1597 } 1598 1599 TAILQ_INIT(&sn->sn_upd_req_list); 1600 TAILQ_CONCAT(&sn->sn_upd_req_list, &sc->sc_upd_req_list, ur_entry); 1601 1602 TAILQ_INIT(&sn->sn_tdb_q); 1603 TAILQ_CONCAT(&sn->sn_tdb_q, &sc->sc_tdb_q, tdb_sync_entry); 1604 1605 sn->sn_len = sc->sc_len; 1606 sc->sc_len = PFSYNC_MINPKT; 1607 1608 sn->sn_plus = sc->sc_plus; 1609 sc->sc_plus = NULL; 1610 sn->sn_pluslen = sc->sc_pluslen; 1611 sc->sc_pluslen = 0; 1612 1613 mtx_leave(&sc->sc_tdb_mtx); 1614 mtx_leave(&sc->sc_upd_req_mtx); 1615 1616 for (q = (PFSYNC_S_COUNT - 1); q >= 0; q--) 1617 mtx_leave(&sc->sc_mtx[q]); 1618 } 1619 1620 void 1621 pfsync_drop_snapshot(struct pfsync_snapshot *sn) 1622 { 1623 struct pf_state *st; 1624 struct pfsync_upd_req_item *ur; 1625 struct tdb *t; 1626 int q; 1627 1628 1629 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1630 if (TAILQ_EMPTY(&sn->sn_qs[q])) 1631 continue; 1632 1633 while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) { 1634 TAILQ_REMOVE(&sn->sn_qs[q], st, sync_list); 1635 #ifdef PFSYNC_DEBUG 1636 KASSERT(st->sync_state == q); 1637 #endif 1638 st->sync_state = PFSYNC_S_NONE; 1639 pf_state_unref(st); 1640 } 1641 } 1642 1643 while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) { 1644 TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_entry); 1645 pool_put(&sn->sn_sc->sc_pool, ur); 1646 } 1647 1648 while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) { 1649 TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_entry); 1650 CLR(t->tdb_flags, TDBF_PFSYNC); 1651 } 1652 } 1653 1654 int 1655 pfsync_is_snapshot_empty(struct pfsync_snapshot *sn) 1656 { 1657 int q; 1658 1659 for (q = 0; q < PFSYNC_S_COUNT; q++) 1660 if (!TAILQ_EMPTY(&sn->sn_qs[q])) 1661 return (0); 1662 1663 if (!TAILQ_EMPTY(&sn->sn_upd_req_list)) 1664 return (0); 1665 1666 if (!TAILQ_EMPTY(&sn->sn_tdb_q)) 1667 return (0); 1668 1669 return (sn->sn_plus == NULL); 1670 } 1671 1672 void 1673 pfsync_drop(struct pfsync_softc *sc) 1674 { 1675 struct pfsync_snapshot sn; 1676 1677 pfsync_grab_snapshot(&sn, sc); 1678 pfsync_drop_snapshot(&sn); 1679 } 1680 1681 void 1682 pfsync_send_dispatch(void *xmq) 1683 { 1684 struct mbuf_queue *mq = xmq; 1685 struct pfsync_softc *sc; 1686 struct mbuf *m; 1687 struct mbuf_list ml; 1688 int error; 1689 1690 mq_delist(mq, &ml); 1691 if (ml_empty(&ml)) 1692 return; 1693 1694 NET_LOCK(); 1695 sc = pfsyncif; 1696 if (sc == NULL) { 1697 ml_purge(&ml); 1698 goto done; 1699 } 1700 1701 while ((m = ml_dequeue(&ml)) != NULL) { 1702 if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1703 &sc->sc_imo, NULL, 0)) == 0) 1704 pfsyncstat_inc(pfsyncs_opackets); 1705 else { 1706 DPFPRINTF(LOG_DEBUG, 1707 "ip_output() @ %s failed (%d)\n", __func__, error); 1708 pfsyncstat_inc(pfsyncs_oerrors); 1709 } 1710 } 1711 done: 1712 NET_UNLOCK(); 1713 } 1714 1715 void 1716 pfsync_send_pkt(struct mbuf *m) 1717 { 1718 if (mq_enqueue(&pfsync_mq, m) != 0) { 1719 pfsyncstat_inc(pfsyncs_oerrors); 1720 DPFPRINTF(LOG_DEBUG, "mq_enqueue() @ %s failed, queue full\n", 1721 __func__); 1722 } else 1723 task_add(net_tq(0), &pfsync_task); 1724 } 1725 1726 void 1727 pfsync_sendout(void) 1728 { 1729 struct pfsync_snapshot sn; 1730 struct pfsync_softc *sc = pfsyncif; 1731 #if NBPFILTER > 0 1732 struct ifnet *ifp = &sc->sc_if; 1733 #endif 1734 struct mbuf *m; 1735 struct ip *ip; 1736 struct pfsync_header *ph; 1737 struct pfsync_subheader *subh; 1738 struct pf_state *st; 1739 struct pfsync_upd_req_item *ur; 1740 struct tdb *t; 1741 int offset; 1742 int q, count = 0; 1743 1744 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1745 return; 1746 1747 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1748 #if NBPFILTER > 0 1749 (ifp->if_bpf == NULL && sc->sc_sync_ifidx == 0)) { 1750 #else 1751 sc->sc_sync_ifidx == 0) { 1752 #endif 1753 pfsync_drop(sc); 1754 return; 1755 } 1756 1757 pfsync_grab_snapshot(&sn, sc); 1758 1759 /* 1760 * Check below is sufficient to prevent us from sending empty packets, 1761 * but it does not stop us from sending short packets. 1762 */ 1763 if (pfsync_is_snapshot_empty(&sn)) 1764 return; 1765 1766 MGETHDR(m, M_DONTWAIT, MT_DATA); 1767 if (m == NULL) { 1768 sc->sc_if.if_oerrors++; 1769 pfsyncstat_inc(pfsyncs_onomem); 1770 pfsync_drop_snapshot(&sn); 1771 return; 1772 } 1773 1774 if (max_linkhdr + sn.sn_len > MHLEN) { 1775 MCLGETL(m, M_DONTWAIT, max_linkhdr + sn.sn_len); 1776 if (!ISSET(m->m_flags, M_EXT)) { 1777 m_free(m); 1778 sc->sc_if.if_oerrors++; 1779 pfsyncstat_inc(pfsyncs_onomem); 1780 pfsync_drop_snapshot(&sn); 1781 return; 1782 } 1783 } 1784 m->m_data += max_linkhdr; 1785 m->m_len = m->m_pkthdr.len = sn.sn_len; 1786 1787 /* build the ip header */ 1788 ip = mtod(m, struct ip *); 1789 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1790 offset = sizeof(*ip); 1791 1792 ip->ip_len = htons(m->m_pkthdr.len); 1793 ip->ip_id = htons(ip_randomid()); 1794 1795 /* build the pfsync header */ 1796 ph = (struct pfsync_header *)(m->m_data + offset); 1797 bzero(ph, sizeof(*ph)); 1798 offset += sizeof(*ph); 1799 1800 ph->version = PFSYNC_VERSION; 1801 ph->len = htons(sn.sn_len - sizeof(*ip)); 1802 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1803 1804 if (!TAILQ_EMPTY(&sn.sn_upd_req_list)) { 1805 subh = (struct pfsync_subheader *)(m->m_data + offset); 1806 offset += sizeof(*subh); 1807 1808 count = 0; 1809 while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) { 1810 TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_entry); 1811 1812 bcopy(&ur->ur_msg, m->m_data + offset, 1813 sizeof(ur->ur_msg)); 1814 offset += sizeof(ur->ur_msg); 1815 1816 pool_put(&sc->sc_pool, ur); 1817 1818 count++; 1819 } 1820 1821 bzero(subh, sizeof(*subh)); 1822 subh->len = sizeof(ur->ur_msg) >> 2; 1823 subh->action = PFSYNC_ACT_UPD_REQ; 1824 subh->count = htons(count); 1825 } 1826 1827 /* has someone built a custom region for us to add? */ 1828 if (sn.sn_plus != NULL) { 1829 bcopy(sn.sn_plus, m->m_data + offset, sn.sn_pluslen); 1830 offset += sn.sn_pluslen; 1831 sn.sn_plus = NULL; /* XXX memory leak ? */ 1832 } 1833 1834 if (!TAILQ_EMPTY(&sn.sn_tdb_q)) { 1835 subh = (struct pfsync_subheader *)(m->m_data + offset); 1836 offset += sizeof(*subh); 1837 1838 count = 0; 1839 while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) { 1840 TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry); 1841 pfsync_out_tdb(t, m->m_data + offset); 1842 offset += sizeof(struct pfsync_tdb); 1843 CLR(t->tdb_flags, TDBF_PFSYNC); 1844 count++; 1845 } 1846 1847 bzero(subh, sizeof(*subh)); 1848 subh->action = PFSYNC_ACT_TDB; 1849 subh->len = sizeof(struct pfsync_tdb) >> 2; 1850 subh->count = htons(count); 1851 } 1852 1853 /* walk the queues */ 1854 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1855 if (TAILQ_EMPTY(&sn.sn_qs[q])) 1856 continue; 1857 1858 subh = (struct pfsync_subheader *)(m->m_data + offset); 1859 offset += sizeof(*subh); 1860 1861 count = 0; 1862 while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) { 1863 TAILQ_REMOVE(&sn.sn_qs[q], st, sync_list); 1864 #ifdef PFSYNC_DEBUG 1865 KASSERT(st->sync_state == q); 1866 #endif 1867 st->sync_state = PFSYNC_S_NONE; 1868 pfsync_qs[q].write(st, m->m_data + offset); 1869 offset += pfsync_qs[q].len; 1870 1871 pf_state_unref(st); 1872 count++; 1873 } 1874 1875 bzero(subh, sizeof(*subh)); 1876 subh->action = pfsync_qs[q].action; 1877 subh->len = pfsync_qs[q].len >> 2; 1878 subh->count = htons(count); 1879 } 1880 1881 /* we're done, let's put it on the wire */ 1882 #if NBPFILTER > 0 1883 if (ifp->if_bpf) { 1884 m->m_data += sizeof(*ip); 1885 m->m_len = m->m_pkthdr.len = sn.sn_len - sizeof(*ip); 1886 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1887 m->m_data -= sizeof(*ip); 1888 m->m_len = m->m_pkthdr.len = sn.sn_len; 1889 } 1890 1891 if (sc->sc_sync_ifidx == 0) { 1892 sc->sc_len = PFSYNC_MINPKT; 1893 m_freem(m); 1894 return; 1895 } 1896 #endif 1897 1898 sc->sc_if.if_opackets++; 1899 sc->sc_if.if_obytes += m->m_pkthdr.len; 1900 1901 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1902 1903 pfsync_send_pkt(m); 1904 } 1905 1906 void 1907 pfsync_insert_state(struct pf_state *st) 1908 { 1909 struct pfsync_softc *sc = pfsyncif; 1910 1911 NET_ASSERT_LOCKED(); 1912 1913 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1914 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1915 SET(st->state_flags, PFSTATE_NOSYNC); 1916 return; 1917 } 1918 1919 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1920 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1921 return; 1922 1923 #ifdef PFSYNC_DEBUG 1924 KASSERT(st->sync_state == PFSYNC_S_NONE); 1925 #endif 1926 1927 if (sc->sc_len == PFSYNC_MINPKT) 1928 timeout_add_sec(&sc->sc_tmo, 1); 1929 1930 pfsync_q_ins(st, PFSYNC_S_INS); 1931 1932 st->sync_updates = 0; 1933 } 1934 1935 int 1936 pfsync_defer(struct pf_state *st, struct mbuf *m, struct pfsync_deferral **ppd) 1937 { 1938 struct pfsync_softc *sc = pfsyncif; 1939 struct pfsync_deferral *pd; 1940 unsigned int sched; 1941 1942 NET_ASSERT_LOCKED(); 1943 1944 if (!sc->sc_defer || 1945 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1946 m->m_flags & (M_BCAST|M_MCAST)) 1947 return (0); 1948 1949 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1950 if (pd == NULL) 1951 return (0); 1952 1953 /* 1954 * deferral queue grows faster, than timeout can consume, 1955 * we have to ask packet (caller) to help timer and dispatch 1956 * one deferral for us. 1957 * 1958 * We wish to call pfsync_undefer() here. Unfortunately we can't, 1959 * because pfsync_undefer() will be calling to ip_output(), 1960 * which in turn will call to pf_test(), which would then attempt 1961 * to grab PF_LOCK() we currently hold. 1962 */ 1963 if (sc->sc_deferred >= 128) { 1964 mtx_enter(&sc->sc_deferrals_mtx); 1965 *ppd = TAILQ_FIRST(&sc->sc_deferrals); 1966 if (*ppd != NULL) { 1967 TAILQ_REMOVE(&sc->sc_deferrals, *ppd, pd_entry); 1968 sc->sc_deferred--; 1969 } 1970 mtx_leave(&sc->sc_deferrals_mtx); 1971 } else 1972 *ppd = NULL; 1973 1974 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1975 SET(st->state_flags, PFSTATE_ACK); 1976 1977 pd->pd_st = pf_state_ref(st); 1978 pd->pd_m = m; 1979 1980 pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC; 1981 1982 mtx_enter(&sc->sc_deferrals_mtx); 1983 sched = TAILQ_EMPTY(&sc->sc_deferrals); 1984 1985 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1986 sc->sc_deferred++; 1987 mtx_leave(&sc->sc_deferrals_mtx); 1988 1989 if (sched) 1990 timeout_add_nsec(&sc->sc_deferrals_tmo, PFSYNC_DEFER_NSEC); 1991 1992 schednetisr(NETISR_PFSYNC); 1993 1994 return (1); 1995 } 1996 1997 void 1998 pfsync_undefer_notify(struct pfsync_deferral *pd) 1999 { 2000 struct pf_pdesc pdesc; 2001 struct pf_state *st = pd->pd_st; 2002 2003 /* 2004 * pf_remove_state removes the state keys and sets st->timeout 2005 * to PFTM_UNLINKED. this is done under NET_LOCK which should 2006 * be held here, so we can use PFTM_UNLINKED as a test for 2007 * whether the state keys are set for the address family 2008 * lookup. 2009 */ 2010 2011 if (st->timeout == PFTM_UNLINKED) 2012 return; 2013 2014 if (st->rt == PF_ROUTETO) { 2015 if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af, 2016 st->direction, st->kif, pd->pd_m, NULL) != PF_PASS) 2017 return; 2018 switch (st->key[PF_SK_WIRE]->af) { 2019 case AF_INET: 2020 pf_route(&pdesc, st); 2021 break; 2022 #ifdef INET6 2023 case AF_INET6: 2024 pf_route6(&pdesc, st); 2025 break; 2026 #endif /* INET6 */ 2027 default: 2028 unhandled_af(st->key[PF_SK_WIRE]->af); 2029 } 2030 pd->pd_m = pdesc.m; 2031 } else { 2032 switch (st->key[PF_SK_WIRE]->af) { 2033 case AF_INET: 2034 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0); 2035 break; 2036 #ifdef INET6 2037 case AF_INET6: 2038 ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL); 2039 break; 2040 #endif /* INET6 */ 2041 default: 2042 unhandled_af(st->key[PF_SK_WIRE]->af); 2043 } 2044 2045 pd->pd_m = NULL; 2046 } 2047 } 2048 2049 void 2050 pfsync_free_deferral(struct pfsync_deferral *pd) 2051 { 2052 struct pfsync_softc *sc = pfsyncif; 2053 2054 pf_state_unref(pd->pd_st); 2055 m_freem(pd->pd_m); 2056 pool_put(&sc->sc_pool, pd); 2057 } 2058 2059 void 2060 pfsync_undefer(struct pfsync_deferral *pd, int drop) 2061 { 2062 struct pfsync_softc *sc = pfsyncif; 2063 2064 NET_ASSERT_LOCKED(); 2065 2066 if (sc == NULL) 2067 return; 2068 2069 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 2070 if (!drop) 2071 pfsync_undefer_notify(pd); 2072 2073 pfsync_free_deferral(pd); 2074 } 2075 2076 void 2077 pfsync_deferrals_tmo(void *arg) 2078 { 2079 struct pfsync_softc *sc = arg; 2080 struct pfsync_deferral *pd; 2081 uint64_t now, nsec = 0; 2082 struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds); 2083 2084 now = getnsecuptime(); 2085 2086 mtx_enter(&sc->sc_deferrals_mtx); 2087 for (;;) { 2088 pd = TAILQ_FIRST(&sc->sc_deferrals); 2089 if (pd == NULL) 2090 break; 2091 2092 if (now < pd->pd_deadline) { 2093 nsec = pd->pd_deadline - now; 2094 break; 2095 } 2096 2097 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 2098 sc->sc_deferred--; 2099 TAILQ_INSERT_TAIL(&pds, pd, pd_entry); 2100 } 2101 mtx_leave(&sc->sc_deferrals_mtx); 2102 2103 if (nsec > 0) { 2104 /* we were looking at a pd, but it wasn't old enough */ 2105 timeout_add_nsec(&sc->sc_deferrals_tmo, nsec); 2106 } 2107 2108 if (TAILQ_EMPTY(&pds)) 2109 return; 2110 2111 NET_LOCK(); 2112 while ((pd = TAILQ_FIRST(&pds)) != NULL) { 2113 TAILQ_REMOVE(&pds, pd, pd_entry); 2114 2115 pfsync_undefer(pd, 0); 2116 } 2117 NET_UNLOCK(); 2118 } 2119 2120 void 2121 pfsync_deferred(struct pf_state *st, int drop) 2122 { 2123 struct pfsync_softc *sc = pfsyncif; 2124 struct pfsync_deferral *pd; 2125 2126 NET_ASSERT_LOCKED(); 2127 2128 mtx_enter(&sc->sc_deferrals_mtx); 2129 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 2130 if (pd->pd_st == st) { 2131 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 2132 sc->sc_deferred--; 2133 break; 2134 } 2135 } 2136 mtx_leave(&sc->sc_deferrals_mtx); 2137 2138 if (pd != NULL) 2139 pfsync_undefer(pd, drop); 2140 } 2141 2142 void 2143 pfsync_update_state(struct pf_state *st) 2144 { 2145 struct pfsync_softc *sc = pfsyncif; 2146 int sync = 0; 2147 2148 NET_ASSERT_LOCKED(); 2149 2150 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2151 return; 2152 2153 if (ISSET(st->state_flags, PFSTATE_ACK)) 2154 pfsync_deferred(st, 0); 2155 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2156 if (st->sync_state != PFSYNC_S_NONE) 2157 pfsync_q_del(st); 2158 return; 2159 } 2160 2161 if (sc->sc_len == PFSYNC_MINPKT) 2162 timeout_add_sec(&sc->sc_tmo, 1); 2163 2164 switch (st->sync_state) { 2165 case PFSYNC_S_UPD_C: 2166 case PFSYNC_S_UPD: 2167 case PFSYNC_S_INS: 2168 /* we're already handling it */ 2169 2170 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 2171 st->sync_updates++; 2172 if (st->sync_updates >= sc->sc_maxupdates) 2173 sync = 1; 2174 } 2175 break; 2176 2177 case PFSYNC_S_IACK: 2178 pfsync_q_del(st); 2179 case PFSYNC_S_NONE: 2180 pfsync_q_ins(st, PFSYNC_S_UPD_C); 2181 st->sync_updates = 0; 2182 break; 2183 2184 default: 2185 panic("pfsync_update_state: unexpected sync state %d", 2186 st->sync_state); 2187 } 2188 2189 if (sync || (getuptime() - st->pfsync_time) < 2) 2190 schednetisr(NETISR_PFSYNC); 2191 } 2192 2193 void 2194 pfsync_cancel_full_update(struct pfsync_softc *sc) 2195 { 2196 if (timeout_pending(&sc->sc_bulkfail_tmo) || 2197 timeout_pending(&sc->sc_bulk_tmo)) { 2198 #if NCARP > 0 2199 if (!pfsync_sync_ok) 2200 carp_group_demote_adj(&sc->sc_if, -1, 2201 "pfsync bulk cancelled"); 2202 if (sc->sc_initial_bulk) { 2203 carp_group_demote_adj(&sc->sc_if, -32, 2204 "pfsync init"); 2205 sc->sc_initial_bulk = 0; 2206 } 2207 #endif 2208 pfsync_sync_ok = 1; 2209 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 2210 } 2211 timeout_del(&sc->sc_bulkfail_tmo); 2212 timeout_del(&sc->sc_bulk_tmo); 2213 sc->sc_bulk_next = NULL; 2214 sc->sc_bulk_last = NULL; 2215 sc->sc_ureq_sent = 0; 2216 sc->sc_bulk_tries = 0; 2217 } 2218 2219 void 2220 pfsync_request_full_update(struct pfsync_softc *sc) 2221 { 2222 if (sc->sc_sync_ifidx != 0 && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 2223 /* Request a full state table update. */ 2224 sc->sc_ureq_sent = getuptime(); 2225 #if NCARP > 0 2226 if (!sc->sc_link_demoted && pfsync_sync_ok) 2227 carp_group_demote_adj(&sc->sc_if, 1, 2228 "pfsync bulk start"); 2229 #endif 2230 pfsync_sync_ok = 0; 2231 DPFPRINTF(LOG_INFO, "requesting bulk update"); 2232 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 2233 pf_pool_limits[PF_LIMIT_STATES].limit / 2234 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 2235 sizeof(struct pfsync_state))); 2236 pfsync_request_update(0, 0); 2237 } 2238 } 2239 2240 void 2241 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2242 { 2243 struct pfsync_softc *sc = pfsyncif; 2244 struct pfsync_upd_req_item *item; 2245 size_t nlen, sc_len; 2246 int retry; 2247 2248 /* 2249 * this code does nothing to prevent multiple update requests for the 2250 * same state being generated. 2251 */ 2252 2253 item = pool_get(&sc->sc_pool, PR_NOWAIT); 2254 if (item == NULL) { 2255 /* XXX stats */ 2256 return; 2257 } 2258 2259 item->ur_msg.id = id; 2260 item->ur_msg.creatorid = creatorid; 2261 2262 for (;;) { 2263 mtx_enter(&sc->sc_upd_req_mtx); 2264 2265 nlen = sizeof(struct pfsync_upd_req); 2266 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 2267 nlen += sizeof(struct pfsync_subheader); 2268 2269 sc_len = atomic_add_long_nv(&sc->sc_len, nlen); 2270 retry = (sc_len > sc->sc_if.if_mtu); 2271 if (retry) 2272 atomic_sub_long(&sc->sc_len, nlen); 2273 else 2274 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 2275 2276 mtx_leave(&sc->sc_upd_req_mtx); 2277 2278 if (!retry) 2279 break; 2280 2281 pfsync_sendout(); 2282 } 2283 2284 schednetisr(NETISR_PFSYNC); 2285 } 2286 2287 void 2288 pfsync_update_state_req(struct pf_state *st) 2289 { 2290 struct pfsync_softc *sc = pfsyncif; 2291 2292 if (sc == NULL) 2293 panic("pfsync_update_state_req: nonexistent instance"); 2294 2295 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2296 if (st->sync_state != PFSYNC_S_NONE) 2297 pfsync_q_del(st); 2298 return; 2299 } 2300 2301 switch (st->sync_state) { 2302 case PFSYNC_S_UPD_C: 2303 case PFSYNC_S_IACK: 2304 pfsync_q_del(st); 2305 case PFSYNC_S_NONE: 2306 pfsync_q_ins(st, PFSYNC_S_UPD); 2307 schednetisr(NETISR_PFSYNC); 2308 return; 2309 2310 case PFSYNC_S_INS: 2311 case PFSYNC_S_UPD: 2312 case PFSYNC_S_DEL: 2313 /* we're already handling it */ 2314 return; 2315 2316 default: 2317 panic("pfsync_update_state_req: unexpected sync state %d", 2318 st->sync_state); 2319 } 2320 } 2321 2322 void 2323 pfsync_delete_state(struct pf_state *st) 2324 { 2325 struct pfsync_softc *sc = pfsyncif; 2326 2327 NET_ASSERT_LOCKED(); 2328 2329 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2330 return; 2331 2332 if (ISSET(st->state_flags, PFSTATE_ACK)) 2333 pfsync_deferred(st, 1); 2334 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2335 if (st->sync_state != PFSYNC_S_NONE) 2336 pfsync_q_del(st); 2337 return; 2338 } 2339 2340 if (sc->sc_len == PFSYNC_MINPKT) 2341 timeout_add_sec(&sc->sc_tmo, 1); 2342 2343 switch (st->sync_state) { 2344 case PFSYNC_S_INS: 2345 /* we never got to tell the world so just forget about it */ 2346 pfsync_q_del(st); 2347 return; 2348 2349 case PFSYNC_S_UPD_C: 2350 case PFSYNC_S_UPD: 2351 case PFSYNC_S_IACK: 2352 pfsync_q_del(st); 2353 /* 2354 * FALLTHROUGH to putting it on the del list 2355 * Note on reference count bookkeeping: 2356 * pfsync_q_del() drops reference for queue 2357 * ownership. But the st entry survives, because 2358 * our caller still holds a reference. 2359 */ 2360 2361 case PFSYNC_S_NONE: 2362 /* 2363 * We either fall through here, or there is no reference to 2364 * st owned by pfsync queues at this point. 2365 * 2366 * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins() 2367 * grabs a reference for delete queue. 2368 */ 2369 pfsync_q_ins(st, PFSYNC_S_DEL); 2370 return; 2371 2372 default: 2373 panic("pfsync_delete_state: unexpected sync state %d", 2374 st->sync_state); 2375 } 2376 } 2377 2378 void 2379 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2380 { 2381 struct pfsync_softc *sc = pfsyncif; 2382 struct { 2383 struct pfsync_subheader subh; 2384 struct pfsync_clr clr; 2385 } __packed r; 2386 2387 NET_ASSERT_LOCKED(); 2388 2389 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2390 return; 2391 2392 bzero(&r, sizeof(r)); 2393 2394 r.subh.action = PFSYNC_ACT_CLR; 2395 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2396 r.subh.count = htons(1); 2397 2398 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2399 r.clr.creatorid = creatorid; 2400 2401 pfsync_send_plus(&r, sizeof(r)); 2402 } 2403 2404 void 2405 pfsync_q_ins(struct pf_state *st, int q) 2406 { 2407 struct pfsync_softc *sc = pfsyncif; 2408 size_t nlen, sc_len; 2409 2410 #if defined(PFSYNC_DEBUG) 2411 if (sc->sc_len < PFSYNC_MINPKT) 2412 panic("pfsync pkt len is too low %zd", sc->sc_len); 2413 #endif 2414 do { 2415 mtx_enter(&sc->sc_mtx[q]); 2416 2417 /* 2418 * If two threads are competing to insert the same state, then 2419 * there must be just single winner. 2420 */ 2421 if (st->sync_state != PFSYNC_S_NONE) { 2422 mtx_leave(&sc->sc_mtx[q]); 2423 break; 2424 } 2425 2426 nlen = pfsync_qs[q].len; 2427 2428 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2429 nlen += sizeof(struct pfsync_subheader); 2430 2431 sc_len = atomic_add_long_nv(&sc->sc_len, nlen); 2432 if (sc_len > sc->sc_if.if_mtu) { 2433 atomic_sub_long(&sc->sc_len, nlen); 2434 mtx_leave(&sc->sc_mtx[q]); 2435 pfsync_sendout(); 2436 continue; 2437 } 2438 2439 pf_state_ref(st); 2440 2441 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2442 st->sync_state = q; 2443 mtx_leave(&sc->sc_mtx[q]); 2444 } while (0); 2445 } 2446 2447 void 2448 pfsync_q_del(struct pf_state *st) 2449 { 2450 struct pfsync_softc *sc = pfsyncif; 2451 int q = st->sync_state; 2452 2453 KASSERT(st->sync_state != PFSYNC_S_NONE); 2454 2455 mtx_enter(&sc->sc_mtx[q]); 2456 atomic_sub_long(&sc->sc_len, pfsync_qs[q].len); 2457 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2458 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2459 atomic_sub_long(&sc->sc_len, sizeof (struct pfsync_subheader)); 2460 mtx_leave(&sc->sc_mtx[q]); 2461 2462 st->sync_state = PFSYNC_S_NONE; 2463 pf_state_unref(st); 2464 } 2465 2466 void 2467 pfsync_update_tdb(struct tdb *t, int output) 2468 { 2469 struct pfsync_softc *sc = pfsyncif; 2470 size_t nlen, sc_len; 2471 2472 if (sc == NULL) 2473 return; 2474 2475 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2476 do { 2477 mtx_enter(&sc->sc_tdb_mtx); 2478 nlen = sizeof(struct pfsync_tdb); 2479 2480 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2481 nlen += sizeof(struct pfsync_subheader); 2482 2483 sc_len = atomic_add_long_nv(&sc->sc_len, nlen); 2484 if (sc_len > sc->sc_if.if_mtu) { 2485 atomic_sub_long(&sc->sc_len, nlen); 2486 mtx_leave(&sc->sc_tdb_mtx); 2487 pfsync_sendout(); 2488 continue; 2489 } 2490 2491 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2492 mtx_leave(&sc->sc_tdb_mtx); 2493 2494 SET(t->tdb_flags, TDBF_PFSYNC); 2495 t->tdb_updates = 0; 2496 } while (0); 2497 } else { 2498 if (++t->tdb_updates >= sc->sc_maxupdates) 2499 schednetisr(NETISR_PFSYNC); 2500 } 2501 2502 if (output) 2503 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2504 else 2505 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2506 } 2507 2508 void 2509 pfsync_delete_tdb(struct tdb *t) 2510 { 2511 struct pfsync_softc *sc = pfsyncif; 2512 size_t nlen; 2513 2514 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2515 return; 2516 2517 mtx_enter(&sc->sc_tdb_mtx); 2518 2519 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2520 CLR(t->tdb_flags, TDBF_PFSYNC); 2521 2522 nlen = sizeof(struct pfsync_tdb); 2523 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2524 nlen += sizeof(struct pfsync_subheader); 2525 atomic_sub_long(&sc->sc_len, nlen); 2526 2527 mtx_leave(&sc->sc_tdb_mtx); 2528 } 2529 2530 void 2531 pfsync_out_tdb(struct tdb *t, void *buf) 2532 { 2533 struct pfsync_tdb *ut = buf; 2534 2535 bzero(ut, sizeof(*ut)); 2536 ut->spi = t->tdb_spi; 2537 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2538 /* 2539 * When a failover happens, the master's rpl is probably above 2540 * what we see here (we may be up to a second late), so 2541 * increase it a bit for outbound tdbs to manage most such 2542 * situations. 2543 * 2544 * For now, just add an offset that is likely to be larger 2545 * than the number of packets we can see in one second. The RFC 2546 * just says the next packet must have a higher seq value. 2547 * 2548 * XXX What is a good algorithm for this? We could use 2549 * a rate-determined increase, but to know it, we would have 2550 * to extend struct tdb. 2551 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2552 * will soon be replaced anyway. For now, just don't handle 2553 * this edge case. 2554 */ 2555 #define RPL_INCR 16384 2556 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2557 RPL_INCR : 0)); 2558 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2559 ut->sproto = t->tdb_sproto; 2560 ut->rdomain = htons(t->tdb_rdomain); 2561 } 2562 2563 void 2564 pfsync_bulk_start(void) 2565 { 2566 struct pfsync_softc *sc = pfsyncif; 2567 2568 NET_ASSERT_LOCKED(); 2569 2570 /* 2571 * pf gc via pfsync_state_in_use reads sc_bulk_next and 2572 * sc_bulk_last while exclusively holding the pf_state_list 2573 * rwlock. make sure it can't race with us setting these 2574 * pointers. they basically act as hazards, and borrow the 2575 * lists state reference count. 2576 */ 2577 rw_enter_read(&pf_state_list.pfs_rwl); 2578 2579 /* get a consistent view of the list pointers */ 2580 mtx_enter(&pf_state_list.pfs_mtx); 2581 if (sc->sc_bulk_next == NULL) 2582 sc->sc_bulk_next = TAILQ_FIRST(&pf_state_list.pfs_list); 2583 2584 sc->sc_bulk_last = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 2585 mtx_leave(&pf_state_list.pfs_mtx); 2586 2587 rw_exit_read(&pf_state_list.pfs_rwl); 2588 2589 DPFPRINTF(LOG_INFO, "received bulk update request"); 2590 2591 if (sc->sc_bulk_last == NULL) 2592 pfsync_bulk_status(PFSYNC_BUS_END); 2593 else { 2594 sc->sc_ureq_received = getuptime(); 2595 2596 pfsync_bulk_status(PFSYNC_BUS_START); 2597 timeout_add(&sc->sc_bulk_tmo, 0); 2598 } 2599 } 2600 2601 void 2602 pfsync_bulk_update(void *arg) 2603 { 2604 struct pfsync_softc *sc; 2605 struct pf_state *st; 2606 int i = 0; 2607 2608 NET_LOCK(); 2609 sc = pfsyncif; 2610 if (sc == NULL) 2611 goto out; 2612 2613 rw_enter_read(&pf_state_list.pfs_rwl); 2614 st = sc->sc_bulk_next; 2615 sc->sc_bulk_next = NULL; 2616 2617 for (;;) { 2618 if (st->sync_state == PFSYNC_S_NONE && 2619 st->timeout < PFTM_MAX && 2620 st->pfsync_time <= sc->sc_ureq_received) { 2621 pfsync_update_state_req(st); 2622 i++; 2623 } 2624 2625 st = TAILQ_NEXT(st, entry_list); 2626 if ((st == NULL) || (st == sc->sc_bulk_last)) { 2627 /* we're done */ 2628 sc->sc_bulk_last = NULL; 2629 pfsync_bulk_status(PFSYNC_BUS_END); 2630 break; 2631 } 2632 2633 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2634 sizeof(struct pfsync_state)) { 2635 /* we've filled a packet */ 2636 sc->sc_bulk_next = st; 2637 timeout_add(&sc->sc_bulk_tmo, 1); 2638 break; 2639 } 2640 } 2641 2642 rw_exit_read(&pf_state_list.pfs_rwl); 2643 out: 2644 NET_UNLOCK(); 2645 } 2646 2647 void 2648 pfsync_bulk_status(u_int8_t status) 2649 { 2650 struct { 2651 struct pfsync_subheader subh; 2652 struct pfsync_bus bus; 2653 } __packed r; 2654 2655 struct pfsync_softc *sc = pfsyncif; 2656 2657 bzero(&r, sizeof(r)); 2658 2659 r.subh.action = PFSYNC_ACT_BUS; 2660 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2661 r.subh.count = htons(1); 2662 2663 r.bus.creatorid = pf_status.hostid; 2664 r.bus.endtime = htonl(getuptime() - sc->sc_ureq_received); 2665 r.bus.status = status; 2666 2667 pfsync_send_plus(&r, sizeof(r)); 2668 } 2669 2670 void 2671 pfsync_bulk_fail(void *arg) 2672 { 2673 struct pfsync_softc *sc; 2674 2675 NET_LOCK(); 2676 sc = pfsyncif; 2677 if (sc == NULL) 2678 goto out; 2679 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2680 /* Try again */ 2681 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2682 pfsync_request_update(0, 0); 2683 } else { 2684 /* Pretend like the transfer was ok */ 2685 sc->sc_ureq_sent = 0; 2686 sc->sc_bulk_tries = 0; 2687 #if NCARP > 0 2688 if (!pfsync_sync_ok) 2689 carp_group_demote_adj(&sc->sc_if, -1, 2690 sc->sc_link_demoted ? 2691 "pfsync link state up" : 2692 "pfsync bulk fail"); 2693 if (sc->sc_initial_bulk) { 2694 carp_group_demote_adj(&sc->sc_if, -32, 2695 "pfsync init"); 2696 sc->sc_initial_bulk = 0; 2697 } 2698 #endif 2699 pfsync_sync_ok = 1; 2700 sc->sc_link_demoted = 0; 2701 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2702 } 2703 out: 2704 NET_UNLOCK(); 2705 } 2706 2707 void 2708 pfsync_send_plus(void *plus, size_t pluslen) 2709 { 2710 struct pfsync_softc *sc = pfsyncif; 2711 2712 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2713 pfsync_sendout(); 2714 2715 sc->sc_plus = plus; 2716 sc->sc_len += (sc->sc_pluslen = pluslen); 2717 2718 pfsync_sendout(); 2719 } 2720 2721 int 2722 pfsync_up(void) 2723 { 2724 struct pfsync_softc *sc = pfsyncif; 2725 2726 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2727 return (0); 2728 2729 return (1); 2730 } 2731 2732 int 2733 pfsync_state_in_use(struct pf_state *st) 2734 { 2735 struct pfsync_softc *sc = pfsyncif; 2736 2737 if (sc == NULL) 2738 return (0); 2739 2740 rw_assert_wrlock(&pf_state_list.pfs_rwl); 2741 2742 if (st->sync_state != PFSYNC_S_NONE || 2743 st == sc->sc_bulk_next || 2744 st == sc->sc_bulk_last) 2745 return (1); 2746 2747 return (0); 2748 } 2749 2750 void 2751 pfsync_timeout(void *arg) 2752 { 2753 NET_LOCK(); 2754 pfsync_sendout(); 2755 NET_UNLOCK(); 2756 } 2757 2758 /* this is a softnet/netisr handler */ 2759 void 2760 pfsyncintr(void) 2761 { 2762 pfsync_sendout(); 2763 } 2764 2765 int 2766 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp) 2767 { 2768 struct pfsyncstats pfsyncstat; 2769 2770 CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t))); 2771 memset(&pfsyncstat, 0, sizeof pfsyncstat); 2772 counters_read(pfsynccounters, (uint64_t *)&pfsyncstat, 2773 pfsyncs_ncounters); 2774 return (sysctl_rdstruct(oldp, oldlenp, newp, 2775 &pfsyncstat, sizeof(pfsyncstat))); 2776 } 2777 2778 int 2779 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2780 size_t newlen) 2781 { 2782 /* All sysctl names at this level are terminal. */ 2783 if (namelen != 1) 2784 return (ENOTDIR); 2785 2786 switch (name[0]) { 2787 case PFSYNCCTL_STATS: 2788 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp)); 2789 default: 2790 return (ENOPROTOOPT); 2791 } 2792 } 2793