1 /* $OpenBSD: if_pfsync.c,v 1.297 2021/07/07 18:38:25 sashan Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_ipsp.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp6.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/udp.h> 75 76 #ifdef INET6 77 #include <netinet6/in6_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #endif /* INET6 */ 82 83 #include "carp.h" 84 #if NCARP > 0 85 #include <netinet/ip_carp.h> 86 #endif 87 88 #define PF_DEBUGNAME "pfsync: " 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_DEFER_NSEC 20000000ULL 97 98 #define PFSYNC_MINPKT ( \ 99 sizeof(struct ip) + \ 100 sizeof(struct pfsync_header)) 101 102 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 103 struct pfsync_state_peer *); 104 105 int pfsync_in_clr(caddr_t, int, int, int); 106 int pfsync_in_iack(caddr_t, int, int, int); 107 int pfsync_in_upd_c(caddr_t, int, int, int); 108 int pfsync_in_ureq(caddr_t, int, int, int); 109 int pfsync_in_del(caddr_t, int, int, int); 110 int pfsync_in_del_c(caddr_t, int, int, int); 111 int pfsync_in_bus(caddr_t, int, int, int); 112 int pfsync_in_tdb(caddr_t, int, int, int); 113 int pfsync_in_ins(caddr_t, int, int, int); 114 int pfsync_in_upd(caddr_t, int, int, int); 115 int pfsync_in_eof(caddr_t, int, int, int); 116 117 int pfsync_in_error(caddr_t, int, int, int); 118 119 void pfsync_update_state_locked(struct pf_state *); 120 121 struct { 122 int (*in)(caddr_t, int, int, int); 123 size_t len; 124 } pfsync_acts[] = { 125 /* PFSYNC_ACT_CLR */ 126 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 127 /* PFSYNC_ACT_OINS */ 128 { pfsync_in_error, 0 }, 129 /* PFSYNC_ACT_INS_ACK */ 130 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 131 /* PFSYNC_ACT_OUPD */ 132 { pfsync_in_error, 0 }, 133 /* PFSYNC_ACT_UPD_C */ 134 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 135 /* PFSYNC_ACT_UPD_REQ */ 136 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 137 /* PFSYNC_ACT_DEL */ 138 { pfsync_in_del, sizeof(struct pfsync_state) }, 139 /* PFSYNC_ACT_DEL_C */ 140 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 141 /* PFSYNC_ACT_INS_F */ 142 { pfsync_in_error, 0 }, 143 /* PFSYNC_ACT_DEL_F */ 144 { pfsync_in_error, 0 }, 145 /* PFSYNC_ACT_BUS */ 146 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 147 /* PFSYNC_ACT_OTDB */ 148 { pfsync_in_error, 0 }, 149 /* PFSYNC_ACT_EOF */ 150 { pfsync_in_error, 0 }, 151 /* PFSYNC_ACT_INS */ 152 { pfsync_in_ins, sizeof(struct pfsync_state) }, 153 /* PFSYNC_ACT_UPD */ 154 { pfsync_in_upd, sizeof(struct pfsync_state) }, 155 /* PFSYNC_ACT_TDB */ 156 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 157 }; 158 159 struct pfsync_q { 160 void (*write)(struct pf_state *, void *); 161 size_t len; 162 u_int8_t action; 163 }; 164 165 /* we have one of these for every PFSYNC_S_ */ 166 void pfsync_out_state(struct pf_state *, void *); 167 void pfsync_out_iack(struct pf_state *, void *); 168 void pfsync_out_upd_c(struct pf_state *, void *); 169 void pfsync_out_del(struct pf_state *, void *); 170 171 struct pfsync_q pfsync_qs[] = { 172 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 173 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 174 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 175 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 176 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 177 }; 178 179 void pfsync_q_ins(struct pf_state *, int); 180 void pfsync_q_del(struct pf_state *); 181 182 struct pfsync_upd_req_item { 183 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 184 struct pfsync_upd_req ur_msg; 185 }; 186 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 187 188 struct pfsync_deferral { 189 TAILQ_ENTRY(pfsync_deferral) pd_entry; 190 struct pf_state *pd_st; 191 struct mbuf *pd_m; 192 uint64_t pd_deadline; 193 }; 194 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 195 196 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 197 sizeof(struct pfsync_deferral)) 198 199 void pfsync_out_tdb(struct tdb *, void *); 200 201 struct pfsync_softc { 202 struct ifnet sc_if; 203 unsigned int sc_sync_ifidx; 204 205 struct pool sc_pool; 206 207 struct ip_moptions sc_imo; 208 209 struct in_addr sc_sync_peer; 210 u_int8_t sc_maxupdates; 211 212 struct ip sc_template; 213 214 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 215 struct mutex sc_mtx[PFSYNC_S_COUNT]; 216 size_t sc_len; 217 218 struct pfsync_upd_reqs sc_upd_req_list; 219 struct mutex sc_upd_req_mtx; 220 221 int sc_initial_bulk; 222 int sc_link_demoted; 223 224 int sc_defer; 225 struct pfsync_deferrals sc_deferrals; 226 u_int sc_deferred; 227 struct mutex sc_deferrals_mtx; 228 struct timeout sc_deferrals_tmo; 229 230 void *sc_plus; 231 size_t sc_pluslen; 232 233 u_int32_t sc_ureq_sent; 234 int sc_bulk_tries; 235 struct timeout sc_bulkfail_tmo; 236 237 u_int32_t sc_ureq_received; 238 struct pf_state *sc_bulk_next; 239 struct pf_state *sc_bulk_last; 240 struct timeout sc_bulk_tmo; 241 242 TAILQ_HEAD(, tdb) sc_tdb_q; 243 struct mutex sc_tdb_mtx; 244 245 struct task sc_ltask; 246 struct task sc_dtask; 247 248 struct timeout sc_tmo; 249 }; 250 251 struct pfsync_snapshot { 252 struct pfsync_softc *sn_sc; 253 struct pf_state_queue sn_qs[PFSYNC_S_COUNT]; 254 struct pfsync_upd_reqs sn_upd_req_list; 255 TAILQ_HEAD(, tdb) sn_tdb_q; 256 size_t sn_len; 257 void *sn_plus; 258 size_t sn_pluslen; 259 }; 260 261 struct pfsync_softc *pfsyncif = NULL; 262 struct cpumem *pfsynccounters; 263 264 void pfsyncattach(int); 265 int pfsync_clone_create(struct if_clone *, int); 266 int pfsync_clone_destroy(struct ifnet *); 267 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 268 struct pf_state_peer *); 269 void pfsync_update_net_tdb(struct pfsync_tdb *); 270 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 271 struct rtentry *); 272 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 273 void pfsyncstart(struct ifqueue *); 274 void pfsync_syncdev_state(void *); 275 void pfsync_ifdetach(void *); 276 277 void pfsync_deferred(struct pf_state *, int); 278 void pfsync_undefer(struct pfsync_deferral *, int); 279 void pfsync_deferrals_tmo(void *); 280 281 void pfsync_cancel_full_update(struct pfsync_softc *); 282 void pfsync_request_full_update(struct pfsync_softc *); 283 void pfsync_request_update(u_int32_t, u_int64_t); 284 void pfsync_update_state_req(struct pf_state *); 285 286 void pfsync_drop(struct pfsync_softc *); 287 void pfsync_sendout(void); 288 void pfsync_send_plus(void *, size_t); 289 void pfsync_timeout(void *); 290 void pfsync_tdb_timeout(void *); 291 292 void pfsync_bulk_start(void); 293 void pfsync_bulk_status(u_int8_t); 294 void pfsync_bulk_update(void *); 295 void pfsync_bulk_fail(void *); 296 297 void pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *); 298 void pfsync_drop_snapshot(struct pfsync_snapshot *); 299 300 void pfsync_send_dispatch(void *); 301 void pfsync_send_pkt(struct mbuf *); 302 303 static struct mbuf_queue pfsync_mq; 304 static struct task pfsync_task = 305 TASK_INITIALIZER(pfsync_send_dispatch, &pfsync_mq); 306 307 #define PFSYNC_MAX_BULKTRIES 12 308 int pfsync_sync_ok; 309 310 struct if_clone pfsync_cloner = 311 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 312 313 void 314 pfsyncattach(int npfsync) 315 { 316 if_clone_attach(&pfsync_cloner); 317 pfsynccounters = counters_alloc(pfsyncs_ncounters); 318 mq_init(&pfsync_mq, 4096, IPL_SOFTNET); 319 } 320 321 int 322 pfsync_clone_create(struct if_clone *ifc, int unit) 323 { 324 struct pfsync_softc *sc; 325 struct ifnet *ifp; 326 int q; 327 static const char *mtx_names[] = { 328 "iack_mtx", 329 "upd_c_mtx", 330 "del_mtx", 331 "ins_mtx", 332 "upd_mtx", 333 "" }; 334 335 if (unit != 0) 336 return (EINVAL); 337 338 pfsync_sync_ok = 1; 339 340 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO); 341 for (q = 0; q < PFSYNC_S_COUNT; q++) { 342 TAILQ_INIT(&sc->sc_qs[q]); 343 mtx_init_flags(&sc->sc_mtx[q], IPL_SOFTNET, mtx_names[q], 0); 344 } 345 346 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync", 347 NULL); 348 TAILQ_INIT(&sc->sc_upd_req_list); 349 mtx_init(&sc->sc_upd_req_mtx, IPL_SOFTNET); 350 TAILQ_INIT(&sc->sc_deferrals); 351 mtx_init(&sc->sc_deferrals_mtx, IPL_SOFTNET); 352 timeout_set_proc(&sc->sc_deferrals_tmo, pfsync_deferrals_tmo, sc); 353 task_set(&sc->sc_ltask, pfsync_syncdev_state, sc); 354 task_set(&sc->sc_dtask, pfsync_ifdetach, sc); 355 sc->sc_deferred = 0; 356 357 TAILQ_INIT(&sc->sc_tdb_q); 358 mtx_init(&sc->sc_tdb_mtx, IPL_SOFTNET); 359 360 sc->sc_len = PFSYNC_MINPKT; 361 sc->sc_maxupdates = 128; 362 363 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 364 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 365 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 366 367 ifp = &sc->sc_if; 368 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 369 ifp->if_softc = sc; 370 ifp->if_ioctl = pfsyncioctl; 371 ifp->if_output = pfsyncoutput; 372 ifp->if_qstart = pfsyncstart; 373 ifp->if_type = IFT_PFSYNC; 374 ifp->if_hdrlen = sizeof(struct pfsync_header); 375 ifp->if_mtu = ETHERMTU; 376 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 377 timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL); 378 timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL); 379 timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL); 380 381 if_attach(ifp); 382 if_alloc_sadl(ifp); 383 384 #if NCARP > 0 385 if_addgroup(ifp, "carp"); 386 #endif 387 388 #if NBPFILTER > 0 389 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 390 #endif 391 392 pfsyncif = sc; 393 394 return (0); 395 } 396 397 int 398 pfsync_clone_destroy(struct ifnet *ifp) 399 { 400 struct pfsync_softc *sc = ifp->if_softc; 401 struct ifnet *ifp0; 402 struct pfsync_deferral *pd; 403 struct pfsync_deferrals deferrals; 404 405 NET_LOCK(); 406 407 #if NCARP > 0 408 if (!pfsync_sync_ok) 409 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 410 if (sc->sc_link_demoted) 411 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 412 #endif 413 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 414 if_linkstatehook_del(ifp0, &sc->sc_ltask); 415 if_detachhook_del(ifp0, &sc->sc_dtask); 416 } 417 if_put(ifp0); 418 419 /* XXXSMP breaks atomicity */ 420 NET_UNLOCK(); 421 if_detach(ifp); 422 NET_LOCK(); 423 424 pfsync_drop(sc); 425 426 if (sc->sc_deferred > 0) { 427 TAILQ_INIT(&deferrals); 428 mtx_enter(&sc->sc_deferrals_mtx); 429 TAILQ_CONCAT(&deferrals, &sc->sc_deferrals, pd_entry); 430 sc->sc_deferred = 0; 431 mtx_leave(&sc->sc_deferrals_mtx); 432 433 while (!TAILQ_EMPTY(&deferrals)) { 434 pd = TAILQ_FIRST(&deferrals); 435 TAILQ_REMOVE(&deferrals, pd, pd_entry); 436 pfsync_undefer(pd, 0); 437 } 438 } 439 440 pfsyncif = NULL; 441 timeout_del(&sc->sc_bulkfail_tmo); 442 timeout_del(&sc->sc_bulk_tmo); 443 timeout_del(&sc->sc_tmo); 444 445 NET_UNLOCK(); 446 447 pool_destroy(&sc->sc_pool); 448 free(sc->sc_imo.imo_membership, M_IPMOPTS, 449 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 450 free(sc, M_DEVBUF, sizeof(*sc)); 451 452 return (0); 453 } 454 455 /* 456 * Start output on the pfsync interface. 457 */ 458 void 459 pfsyncstart(struct ifqueue *ifq) 460 { 461 ifq_purge(ifq); 462 } 463 464 void 465 pfsync_syncdev_state(void *arg) 466 { 467 struct pfsync_softc *sc = arg; 468 struct ifnet *ifp; 469 470 if ((sc->sc_if.if_flags & IFF_UP) == 0) 471 return; 472 if ((ifp = if_get(sc->sc_sync_ifidx)) == NULL) 473 return; 474 475 if (ifp->if_link_state == LINK_STATE_DOWN) { 476 sc->sc_if.if_flags &= ~IFF_RUNNING; 477 if (!sc->sc_link_demoted) { 478 #if NCARP > 0 479 carp_group_demote_adj(&sc->sc_if, 1, 480 "pfsync link state down"); 481 #endif 482 sc->sc_link_demoted = 1; 483 } 484 485 /* drop everything */ 486 timeout_del(&sc->sc_tmo); 487 pfsync_drop(sc); 488 489 pfsync_cancel_full_update(sc); 490 } else if (sc->sc_link_demoted) { 491 sc->sc_if.if_flags |= IFF_RUNNING; 492 493 pfsync_request_full_update(sc); 494 } 495 496 if_put(ifp); 497 } 498 499 void 500 pfsync_ifdetach(void *arg) 501 { 502 struct pfsync_softc *sc = arg; 503 struct ifnet *ifp; 504 505 if ((ifp = if_get(sc->sc_sync_ifidx)) != NULL) { 506 if_linkstatehook_del(ifp, &sc->sc_ltask); 507 if_detachhook_del(ifp, &sc->sc_dtask); 508 } 509 if_put(ifp); 510 511 sc->sc_sync_ifidx = 0; 512 } 513 514 int 515 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 516 struct pf_state_peer *d) 517 { 518 if (s->scrub.scrub_flag && d->scrub == NULL) { 519 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 520 if (d->scrub == NULL) 521 return (ENOMEM); 522 } 523 524 return (0); 525 } 526 527 void 528 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 529 { 530 pf_state_export(sp, st); 531 } 532 533 int 534 pfsync_state_import(struct pfsync_state *sp, int flags) 535 { 536 struct pf_state *st = NULL; 537 struct pf_state_key *skw = NULL, *sks = NULL; 538 struct pf_rule *r = NULL; 539 struct pfi_kif *kif; 540 int pool_flags; 541 int error = ENOMEM; 542 int n = 0; 543 544 if (sp->creatorid == 0) { 545 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 546 "invalid creator id: %08x", ntohl(sp->creatorid)); 547 return (EINVAL); 548 } 549 550 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 551 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 552 "unknown interface: %s", sp->ifname); 553 if (flags & PFSYNC_SI_IOCTL) 554 return (EINVAL); 555 return (0); /* skip this state */ 556 } 557 558 if (sp->af == 0) 559 return (0); /* skip this state */ 560 561 /* 562 * If the ruleset checksums match or the state is coming from the ioctl, 563 * it's safe to associate the state with the rule of that number. 564 */ 565 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 566 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 567 pf_main_ruleset.rules.active.rcount) { 568 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries) 569 if (ntohl(sp->rule) == n++) 570 break; 571 } else 572 r = &pf_default_rule; 573 574 if ((r->max_states && r->states_cur >= r->max_states)) 575 goto cleanup; 576 577 if (flags & PFSYNC_SI_IOCTL) 578 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 579 else 580 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 581 582 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 583 goto cleanup; 584 585 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 586 goto cleanup; 587 588 if ((sp->key[PF_SK_WIRE].af && 589 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 590 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 591 &sp->key[PF_SK_STACK].addr[0], sp->af) || 592 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 593 &sp->key[PF_SK_STACK].addr[1], sp->af) || 594 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 595 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 596 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 597 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 598 goto cleanup; 599 } else 600 sks = skw; 601 602 /* allocate memory for scrub info */ 603 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 604 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 605 goto cleanup; 606 607 /* copy to state key(s) */ 608 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 609 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 610 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 611 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 612 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 613 PF_REF_INIT(skw->refcnt); 614 skw->proto = sp->proto; 615 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 616 skw->af = sp->af; 617 if (sks != skw) { 618 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 619 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 620 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 621 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 622 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 623 PF_REF_INIT(sks->refcnt); 624 if (!(sks->af = sp->key[PF_SK_STACK].af)) 625 sks->af = sp->af; 626 if (sks->af != skw->af) { 627 switch (sp->proto) { 628 case IPPROTO_ICMP: 629 sks->proto = IPPROTO_ICMPV6; 630 break; 631 case IPPROTO_ICMPV6: 632 sks->proto = IPPROTO_ICMP; 633 break; 634 default: 635 sks->proto = sp->proto; 636 } 637 } else 638 sks->proto = sp->proto; 639 640 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) || 641 ((skw->af != AF_INET) && (skw->af != AF_INET6))) { 642 error = EINVAL; 643 goto cleanup; 644 } 645 646 } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { 647 error = EINVAL; 648 goto cleanup; 649 } 650 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 651 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 652 653 /* copy to state */ 654 st->rt_addr = sp->rt_addr; 655 st->rt = sp->rt; 656 st->creation = getuptime() - ntohl(sp->creation); 657 st->expire = getuptime(); 658 if (ntohl(sp->expire)) { 659 u_int32_t timeout; 660 661 timeout = r->timeout[sp->timeout]; 662 if (!timeout) 663 timeout = pf_default_rule.timeout[sp->timeout]; 664 665 /* sp->expire may have been adaptively scaled by export. */ 666 st->expire -= timeout - ntohl(sp->expire); 667 } 668 669 st->direction = sp->direction; 670 st->log = sp->log; 671 st->timeout = sp->timeout; 672 st->state_flags = ntohs(sp->state_flags); 673 st->max_mss = ntohs(sp->max_mss); 674 st->min_ttl = sp->min_ttl; 675 st->set_tos = sp->set_tos; 676 st->set_prio[0] = sp->set_prio[0]; 677 st->set_prio[1] = sp->set_prio[1]; 678 679 st->id = sp->id; 680 st->creatorid = sp->creatorid; 681 pf_state_peer_ntoh(&sp->src, &st->src); 682 pf_state_peer_ntoh(&sp->dst, &st->dst); 683 684 st->rule.ptr = r; 685 st->anchor.ptr = NULL; 686 687 st->pfsync_time = getuptime(); 688 st->sync_state = PFSYNC_S_NONE; 689 690 refcnt_init(&st->refcnt); 691 692 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 693 r->states_cur++; 694 r->states_tot++; 695 696 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 697 SET(st->state_flags, PFSTATE_NOSYNC); 698 699 /* 700 * We just set PFSTATE_NOSYNC bit, which prevents 701 * pfsync_insert_state() to insert state to pfsync. 702 */ 703 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 704 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 705 r->states_cur--; 706 error = EEXIST; 707 goto cleanup_state; 708 } 709 710 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 711 CLR(st->state_flags, PFSTATE_NOSYNC); 712 if (ISSET(st->state_flags, PFSTATE_ACK)) { 713 pfsync_q_ins(st, PFSYNC_S_IACK); 714 schednetisr(NETISR_PFSYNC); 715 } 716 } 717 CLR(st->state_flags, PFSTATE_ACK); 718 719 return (0); 720 721 cleanup: 722 if (skw == sks) 723 sks = NULL; 724 if (skw != NULL) 725 pool_put(&pf_state_key_pl, skw); 726 if (sks != NULL) 727 pool_put(&pf_state_key_pl, sks); 728 729 cleanup_state: /* pf_state_insert frees the state keys */ 730 if (st) { 731 if (st->dst.scrub) 732 pool_put(&pf_state_scrub_pl, st->dst.scrub); 733 if (st->src.scrub) 734 pool_put(&pf_state_scrub_pl, st->src.scrub); 735 pool_put(&pf_state_pl, st); 736 } 737 return (error); 738 } 739 740 int 741 pfsync_input(struct mbuf **mp, int *offp, int proto, int af) 742 { 743 struct mbuf *n, *m = *mp; 744 struct pfsync_softc *sc = pfsyncif; 745 struct ip *ip = mtod(m, struct ip *); 746 struct pfsync_header *ph; 747 struct pfsync_subheader subh; 748 int offset, noff, len, count, mlen, flags = 0; 749 int e; 750 751 NET_ASSERT_LOCKED(); 752 753 pfsyncstat_inc(pfsyncs_ipackets); 754 755 /* verify that we have a sync interface configured */ 756 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 757 sc->sc_sync_ifidx == 0 || !pf_status.running) 758 goto done; 759 760 /* verify that the packet came in on the right interface */ 761 if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) { 762 pfsyncstat_inc(pfsyncs_badif); 763 goto done; 764 } 765 766 sc->sc_if.if_ipackets++; 767 sc->sc_if.if_ibytes += m->m_pkthdr.len; 768 769 /* verify that the IP TTL is 255. */ 770 if (ip->ip_ttl != PFSYNC_DFLTTL) { 771 pfsyncstat_inc(pfsyncs_badttl); 772 goto done; 773 } 774 775 offset = ip->ip_hl << 2; 776 n = m_pulldown(m, offset, sizeof(*ph), &noff); 777 if (n == NULL) { 778 pfsyncstat_inc(pfsyncs_hdrops); 779 return IPPROTO_DONE; 780 } 781 ph = (struct pfsync_header *)(n->m_data + noff); 782 783 /* verify the version */ 784 if (ph->version != PFSYNC_VERSION) { 785 pfsyncstat_inc(pfsyncs_badver); 786 goto done; 787 } 788 len = ntohs(ph->len) + offset; 789 if (m->m_pkthdr.len < len) { 790 pfsyncstat_inc(pfsyncs_badlen); 791 goto done; 792 } 793 794 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 795 flags = PFSYNC_SI_CKSUM; 796 797 offset += sizeof(*ph); 798 while (offset <= len - sizeof(subh)) { 799 m_copydata(m, offset, sizeof(subh), &subh); 800 offset += sizeof(subh); 801 802 mlen = subh.len << 2; 803 count = ntohs(subh.count); 804 805 if (subh.action >= PFSYNC_ACT_MAX || 806 subh.action >= nitems(pfsync_acts) || 807 mlen < pfsync_acts[subh.action].len) { 808 /* 809 * subheaders are always followed by at least one 810 * message, so if the peer is new 811 * enough to tell us how big its messages are then we 812 * know enough to skip them. 813 */ 814 if (count > 0 && mlen > 0) { 815 offset += count * mlen; 816 continue; 817 } 818 pfsyncstat_inc(pfsyncs_badact); 819 goto done; 820 } 821 822 n = m_pulldown(m, offset, mlen * count, &noff); 823 if (n == NULL) { 824 pfsyncstat_inc(pfsyncs_badlen); 825 return IPPROTO_DONE; 826 } 827 828 e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, 829 flags); 830 if (e != 0) 831 goto done; 832 833 offset += mlen * count; 834 } 835 836 done: 837 m_freem(m); 838 return IPPROTO_DONE; 839 } 840 841 int 842 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 843 { 844 struct pfsync_clr *clr; 845 struct pf_state *st, *nexts; 846 struct pfi_kif *kif; 847 u_int32_t creatorid; 848 int i; 849 850 PF_LOCK(); 851 for (i = 0; i < count; i++) { 852 clr = (struct pfsync_clr *)buf + len * i; 853 kif = NULL; 854 creatorid = clr->creatorid; 855 if (strlen(clr->ifname) && 856 (kif = pfi_kif_find(clr->ifname)) == NULL) 857 continue; 858 859 PF_STATE_ENTER_WRITE(); 860 for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { 861 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 862 if (st->creatorid == creatorid && 863 ((kif && st->kif == kif) || !kif)) { 864 SET(st->state_flags, PFSTATE_NOSYNC); 865 pf_remove_state(st); 866 } 867 } 868 PF_STATE_EXIT_WRITE(); 869 } 870 PF_UNLOCK(); 871 872 return (0); 873 } 874 875 int 876 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 877 { 878 struct pfsync_state *sp; 879 sa_family_t af1, af2; 880 int i; 881 882 PF_LOCK(); 883 for (i = 0; i < count; i++) { 884 sp = (struct pfsync_state *)(buf + len * i); 885 af1 = sp->key[0].af; 886 af2 = sp->key[1].af; 887 888 /* check for invalid values */ 889 if (sp->timeout >= PFTM_MAX || 890 sp->src.state > PF_TCPS_PROXY_DST || 891 sp->dst.state > PF_TCPS_PROXY_DST || 892 sp->direction > PF_OUT || 893 (((af1 || af2) && 894 ((af1 != AF_INET && af1 != AF_INET6) || 895 (af2 != AF_INET && af2 != AF_INET6))) || 896 (sp->af != AF_INET && sp->af != AF_INET6))) { 897 DPFPRINTF(LOG_NOTICE, 898 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 899 pfsyncstat_inc(pfsyncs_badval); 900 continue; 901 } 902 903 if (pfsync_state_import(sp, flags) == ENOMEM) { 904 /* drop out, but process the rest of the actions */ 905 break; 906 } 907 } 908 PF_UNLOCK(); 909 910 return (0); 911 } 912 913 int 914 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 915 { 916 struct pfsync_ins_ack *ia; 917 struct pf_state_cmp id_key; 918 struct pf_state *st; 919 int i; 920 921 for (i = 0; i < count; i++) { 922 ia = (struct pfsync_ins_ack *)(buf + len * i); 923 924 id_key.id = ia->id; 925 id_key.creatorid = ia->creatorid; 926 927 PF_STATE_ENTER_READ(); 928 st = pf_find_state_byid(&id_key); 929 pf_state_ref(st); 930 PF_STATE_EXIT_READ(); 931 if (st == NULL) 932 continue; 933 934 if (ISSET(st->state_flags, PFSTATE_ACK)) 935 pfsync_deferred(st, 0); 936 937 pf_state_unref(st); 938 } 939 940 return (0); 941 } 942 943 int 944 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 945 struct pfsync_state_peer *dst) 946 { 947 int sync = 0; 948 949 /* 950 * The state should never go backwards except 951 * for syn-proxy states. Neither should the 952 * sequence window slide backwards. 953 */ 954 if ((st->src.state > src->state && 955 (st->src.state < PF_TCPS_PROXY_SRC || 956 src->state >= PF_TCPS_PROXY_SRC)) || 957 958 (st->src.state == src->state && 959 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 960 sync++; 961 else 962 pf_state_peer_ntoh(src, &st->src); 963 964 if ((st->dst.state > dst->state) || 965 966 (st->dst.state >= TCPS_SYN_SENT && 967 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 968 sync++; 969 else 970 pf_state_peer_ntoh(dst, &st->dst); 971 972 return (sync); 973 } 974 975 int 976 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 977 { 978 struct pfsync_state *sp; 979 struct pf_state_cmp id_key; 980 struct pf_state *st; 981 int sync, error; 982 int i; 983 984 for (i = 0; i < count; i++) { 985 sp = (struct pfsync_state *)(buf + len * i); 986 987 /* check for invalid values */ 988 if (sp->timeout >= PFTM_MAX || 989 sp->src.state > PF_TCPS_PROXY_DST || 990 sp->dst.state > PF_TCPS_PROXY_DST) { 991 DPFPRINTF(LOG_NOTICE, 992 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 993 pfsyncstat_inc(pfsyncs_badval); 994 continue; 995 } 996 997 id_key.id = sp->id; 998 id_key.creatorid = sp->creatorid; 999 1000 PF_STATE_ENTER_READ(); 1001 st = pf_find_state_byid(&id_key); 1002 pf_state_ref(st); 1003 PF_STATE_EXIT_READ(); 1004 if (st == NULL) { 1005 /* insert the update */ 1006 PF_LOCK(); 1007 error = pfsync_state_import(sp, flags); 1008 if (error) 1009 pfsyncstat_inc(pfsyncs_badstate); 1010 PF_UNLOCK(); 1011 continue; 1012 } 1013 1014 if (ISSET(st->state_flags, PFSTATE_ACK)) 1015 pfsync_deferred(st, 1); 1016 1017 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1018 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 1019 else { 1020 sync = 0; 1021 1022 /* 1023 * Non-TCP protocol state machine always go 1024 * forwards 1025 */ 1026 if (st->src.state > sp->src.state) 1027 sync++; 1028 else 1029 pf_state_peer_ntoh(&sp->src, &st->src); 1030 1031 if (st->dst.state > sp->dst.state) 1032 sync++; 1033 else 1034 pf_state_peer_ntoh(&sp->dst, &st->dst); 1035 } 1036 1037 if (sync < 2) { 1038 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 1039 pf_state_peer_ntoh(&sp->dst, &st->dst); 1040 st->expire = getuptime(); 1041 st->timeout = sp->timeout; 1042 } 1043 st->pfsync_time = getuptime(); 1044 1045 if (sync) { 1046 pfsyncstat_inc(pfsyncs_stale); 1047 1048 pfsync_update_state(st); 1049 schednetisr(NETISR_PFSYNC); 1050 } 1051 1052 pf_state_unref(st); 1053 } 1054 1055 return (0); 1056 } 1057 1058 int 1059 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 1060 { 1061 struct pfsync_upd_c *up; 1062 struct pf_state_cmp id_key; 1063 struct pf_state *st; 1064 1065 int sync; 1066 1067 int i; 1068 1069 for (i = 0; i < count; i++) { 1070 up = (struct pfsync_upd_c *)(buf + len * i); 1071 1072 /* check for invalid values */ 1073 if (up->timeout >= PFTM_MAX || 1074 up->src.state > PF_TCPS_PROXY_DST || 1075 up->dst.state > PF_TCPS_PROXY_DST) { 1076 DPFPRINTF(LOG_NOTICE, 1077 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 1078 pfsyncstat_inc(pfsyncs_badval); 1079 continue; 1080 } 1081 1082 id_key.id = up->id; 1083 id_key.creatorid = up->creatorid; 1084 1085 PF_STATE_ENTER_READ(); 1086 st = pf_find_state_byid(&id_key); 1087 pf_state_ref(st); 1088 PF_STATE_EXIT_READ(); 1089 if (st == NULL) { 1090 /* We don't have this state. Ask for it. */ 1091 pfsync_request_update(id_key.creatorid, id_key.id); 1092 continue; 1093 } 1094 1095 if (ISSET(st->state_flags, PFSTATE_ACK)) 1096 pfsync_deferred(st, 1); 1097 1098 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1099 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1100 else { 1101 sync = 0; 1102 /* 1103 * Non-TCP protocol state machine always go 1104 * forwards 1105 */ 1106 if (st->src.state > up->src.state) 1107 sync++; 1108 else 1109 pf_state_peer_ntoh(&up->src, &st->src); 1110 1111 if (st->dst.state > up->dst.state) 1112 sync++; 1113 else 1114 pf_state_peer_ntoh(&up->dst, &st->dst); 1115 } 1116 if (sync < 2) { 1117 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1118 pf_state_peer_ntoh(&up->dst, &st->dst); 1119 st->expire = getuptime(); 1120 st->timeout = up->timeout; 1121 } 1122 st->pfsync_time = getuptime(); 1123 1124 if (sync) { 1125 pfsyncstat_inc(pfsyncs_stale); 1126 1127 pfsync_update_state(st); 1128 schednetisr(NETISR_PFSYNC); 1129 } 1130 1131 pf_state_unref(st); 1132 } 1133 1134 return (0); 1135 } 1136 1137 int 1138 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1139 { 1140 struct pfsync_upd_req *ur; 1141 int i; 1142 1143 struct pf_state_cmp id_key; 1144 struct pf_state *st; 1145 1146 for (i = 0; i < count; i++) { 1147 ur = (struct pfsync_upd_req *)(buf + len * i); 1148 1149 id_key.id = ur->id; 1150 id_key.creatorid = ur->creatorid; 1151 1152 if (id_key.id == 0 && id_key.creatorid == 0) 1153 pfsync_bulk_start(); 1154 else { 1155 PF_STATE_ENTER_READ(); 1156 st = pf_find_state_byid(&id_key); 1157 pf_state_ref(st); 1158 PF_STATE_EXIT_READ(); 1159 if (st == NULL) { 1160 pfsyncstat_inc(pfsyncs_badstate); 1161 continue; 1162 } 1163 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1164 pf_state_unref(st); 1165 continue; 1166 } 1167 1168 pfsync_update_state_req(st); 1169 pf_state_unref(st); 1170 } 1171 } 1172 1173 return (0); 1174 } 1175 1176 int 1177 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1178 { 1179 struct pfsync_state *sp; 1180 struct pf_state_cmp id_key; 1181 struct pf_state *st; 1182 int i; 1183 1184 PF_STATE_ENTER_WRITE(); 1185 for (i = 0; i < count; i++) { 1186 sp = (struct pfsync_state *)(buf + len * i); 1187 1188 id_key.id = sp->id; 1189 id_key.creatorid = sp->creatorid; 1190 1191 st = pf_find_state_byid(&id_key); 1192 if (st == NULL) { 1193 pfsyncstat_inc(pfsyncs_badstate); 1194 continue; 1195 } 1196 SET(st->state_flags, PFSTATE_NOSYNC); 1197 pf_remove_state(st); 1198 } 1199 PF_STATE_EXIT_WRITE(); 1200 1201 return (0); 1202 } 1203 1204 int 1205 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1206 { 1207 struct pfsync_del_c *sp; 1208 struct pf_state_cmp id_key; 1209 struct pf_state *st; 1210 int i; 1211 1212 PF_LOCK(); 1213 PF_STATE_ENTER_WRITE(); 1214 for (i = 0; i < count; i++) { 1215 sp = (struct pfsync_del_c *)(buf + len * i); 1216 1217 id_key.id = sp->id; 1218 id_key.creatorid = sp->creatorid; 1219 1220 st = pf_find_state_byid(&id_key); 1221 if (st == NULL) { 1222 pfsyncstat_inc(pfsyncs_badstate); 1223 continue; 1224 } 1225 1226 SET(st->state_flags, PFSTATE_NOSYNC); 1227 pf_remove_state(st); 1228 } 1229 PF_STATE_EXIT_WRITE(); 1230 PF_UNLOCK(); 1231 1232 return (0); 1233 } 1234 1235 int 1236 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1237 { 1238 struct pfsync_softc *sc = pfsyncif; 1239 struct pfsync_bus *bus; 1240 1241 /* If we're not waiting for a bulk update, who cares. */ 1242 if (sc->sc_ureq_sent == 0) 1243 return (0); 1244 1245 bus = (struct pfsync_bus *)buf; 1246 1247 switch (bus->status) { 1248 case PFSYNC_BUS_START: 1249 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1250 pf_pool_limits[PF_LIMIT_STATES].limit / 1251 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1252 sizeof(struct pfsync_state))); 1253 DPFPRINTF(LOG_INFO, "received bulk update start"); 1254 break; 1255 1256 case PFSYNC_BUS_END: 1257 if (getuptime() - ntohl(bus->endtime) >= 1258 sc->sc_ureq_sent) { 1259 /* that's it, we're happy */ 1260 sc->sc_ureq_sent = 0; 1261 sc->sc_bulk_tries = 0; 1262 timeout_del(&sc->sc_bulkfail_tmo); 1263 #if NCARP > 0 1264 if (!pfsync_sync_ok) 1265 carp_group_demote_adj(&sc->sc_if, -1, 1266 sc->sc_link_demoted ? 1267 "pfsync link state up" : 1268 "pfsync bulk done"); 1269 if (sc->sc_initial_bulk) { 1270 carp_group_demote_adj(&sc->sc_if, -32, 1271 "pfsync init"); 1272 sc->sc_initial_bulk = 0; 1273 } 1274 #endif 1275 pfsync_sync_ok = 1; 1276 sc->sc_link_demoted = 0; 1277 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1278 } else { 1279 DPFPRINTF(LOG_WARNING, "received invalid " 1280 "bulk update end: bad timestamp"); 1281 } 1282 break; 1283 } 1284 1285 return (0); 1286 } 1287 1288 int 1289 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1290 { 1291 #if defined(IPSEC) 1292 struct pfsync_tdb *tp; 1293 int i; 1294 1295 for (i = 0; i < count; i++) { 1296 tp = (struct pfsync_tdb *)(buf + len * i); 1297 pfsync_update_net_tdb(tp); 1298 } 1299 #endif 1300 1301 return (0); 1302 } 1303 1304 #if defined(IPSEC) 1305 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1306 void 1307 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1308 { 1309 struct tdb *tdb; 1310 1311 NET_ASSERT_LOCKED(); 1312 1313 /* check for invalid values */ 1314 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1315 (pt->dst.sa.sa_family != AF_INET && 1316 pt->dst.sa.sa_family != AF_INET6)) 1317 goto bad; 1318 1319 tdb = gettdb(ntohs(pt->rdomain), pt->spi, 1320 (union sockaddr_union *)&pt->dst, pt->sproto); 1321 if (tdb) { 1322 pt->rpl = betoh64(pt->rpl); 1323 pt->cur_bytes = betoh64(pt->cur_bytes); 1324 1325 /* Neither replay nor byte counter should ever decrease. */ 1326 if (pt->rpl < tdb->tdb_rpl || 1327 pt->cur_bytes < tdb->tdb_cur_bytes) { 1328 goto bad; 1329 } 1330 1331 tdb->tdb_rpl = pt->rpl; 1332 tdb->tdb_cur_bytes = pt->cur_bytes; 1333 } 1334 return; 1335 1336 bad: 1337 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1338 "invalid value"); 1339 pfsyncstat_inc(pfsyncs_badstate); 1340 return; 1341 } 1342 #endif 1343 1344 1345 int 1346 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1347 { 1348 if (len > 0 || count > 0) 1349 pfsyncstat_inc(pfsyncs_badact); 1350 1351 /* we're done. let the caller return */ 1352 return (1); 1353 } 1354 1355 int 1356 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1357 { 1358 pfsyncstat_inc(pfsyncs_badact); 1359 return (-1); 1360 } 1361 1362 int 1363 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1364 struct rtentry *rt) 1365 { 1366 m_freem(m); /* drop packet */ 1367 return (EAFNOSUPPORT); 1368 } 1369 1370 int 1371 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1372 { 1373 struct proc *p = curproc; 1374 struct pfsync_softc *sc = ifp->if_softc; 1375 struct ifreq *ifr = (struct ifreq *)data; 1376 struct ip_moptions *imo = &sc->sc_imo; 1377 struct pfsyncreq pfsyncr; 1378 struct ifnet *ifp0, *sifp; 1379 struct ip *ip; 1380 int error; 1381 1382 switch (cmd) { 1383 case SIOCSIFFLAGS: 1384 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1385 (ifp->if_flags & IFF_UP)) { 1386 ifp->if_flags |= IFF_RUNNING; 1387 1388 #if NCARP > 0 1389 sc->sc_initial_bulk = 1; 1390 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1391 #endif 1392 1393 pfsync_request_full_update(sc); 1394 } 1395 if ((ifp->if_flags & IFF_RUNNING) && 1396 (ifp->if_flags & IFF_UP) == 0) { 1397 ifp->if_flags &= ~IFF_RUNNING; 1398 1399 /* drop everything */ 1400 timeout_del(&sc->sc_tmo); 1401 pfsync_drop(sc); 1402 1403 pfsync_cancel_full_update(sc); 1404 } 1405 break; 1406 case SIOCSIFMTU: 1407 if ((ifp0 = if_get(sc->sc_sync_ifidx)) == NULL) 1408 return (EINVAL); 1409 error = 0; 1410 if (ifr->ifr_mtu <= PFSYNC_MINPKT || 1411 ifr->ifr_mtu > ifp0->if_mtu) { 1412 error = EINVAL; 1413 } 1414 if_put(ifp0); 1415 if (error) 1416 return error; 1417 if (ifr->ifr_mtu < ifp->if_mtu) 1418 pfsync_sendout(); 1419 ifp->if_mtu = ifr->ifr_mtu; 1420 break; 1421 case SIOCGETPFSYNC: 1422 bzero(&pfsyncr, sizeof(pfsyncr)); 1423 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 1424 strlcpy(pfsyncr.pfsyncr_syncdev, 1425 ifp0->if_xname, IFNAMSIZ); 1426 } 1427 if_put(ifp0); 1428 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1429 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1430 pfsyncr.pfsyncr_defer = sc->sc_defer; 1431 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1432 1433 case SIOCSETPFSYNC: 1434 if ((error = suser(p)) != 0) 1435 return (error); 1436 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1437 return (error); 1438 1439 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1440 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1441 else 1442 sc->sc_sync_peer.s_addr = 1443 pfsyncr.pfsyncr_syncpeer.s_addr; 1444 1445 if (pfsyncr.pfsyncr_maxupdates > 255) 1446 return (EINVAL); 1447 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1448 1449 sc->sc_defer = pfsyncr.pfsyncr_defer; 1450 1451 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1452 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 1453 if_linkstatehook_del(ifp0, &sc->sc_ltask); 1454 if_detachhook_del(ifp0, &sc->sc_dtask); 1455 } 1456 if_put(ifp0); 1457 sc->sc_sync_ifidx = 0; 1458 if (imo->imo_num_memberships > 0) { 1459 in_delmulti(imo->imo_membership[ 1460 --imo->imo_num_memberships]); 1461 imo->imo_ifidx = 0; 1462 } 1463 break; 1464 } 1465 1466 if ((sifp = if_unit(pfsyncr.pfsyncr_syncdev)) == NULL) 1467 return (EINVAL); 1468 1469 ifp0 = if_get(sc->sc_sync_ifidx); 1470 1471 if (sifp->if_mtu < sc->sc_if.if_mtu || (ifp0 != NULL && 1472 sifp->if_mtu < ifp0->if_mtu) || 1473 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1474 pfsync_sendout(); 1475 1476 if (ifp0) { 1477 if_linkstatehook_del(ifp0, &sc->sc_ltask); 1478 if_detachhook_del(ifp0, &sc->sc_dtask); 1479 } 1480 if_put(ifp0); 1481 sc->sc_sync_ifidx = sifp->if_index; 1482 1483 if (imo->imo_num_memberships > 0) { 1484 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1485 imo->imo_ifidx = 0; 1486 } 1487 1488 if (sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1489 struct in_addr addr; 1490 1491 if (!(sifp->if_flags & IFF_MULTICAST)) { 1492 sc->sc_sync_ifidx = 0; 1493 if_put(sifp); 1494 return (EADDRNOTAVAIL); 1495 } 1496 1497 addr.s_addr = INADDR_PFSYNC_GROUP; 1498 1499 if ((imo->imo_membership[0] = 1500 in_addmulti(&addr, sifp)) == NULL) { 1501 sc->sc_sync_ifidx = 0; 1502 if_put(sifp); 1503 return (ENOBUFS); 1504 } 1505 imo->imo_num_memberships++; 1506 imo->imo_ifidx = sc->sc_sync_ifidx; 1507 imo->imo_ttl = PFSYNC_DFLTTL; 1508 imo->imo_loop = 0; 1509 } 1510 1511 ip = &sc->sc_template; 1512 bzero(ip, sizeof(*ip)); 1513 ip->ip_v = IPVERSION; 1514 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1515 ip->ip_tos = IPTOS_LOWDELAY; 1516 /* len and id are set later */ 1517 ip->ip_off = htons(IP_DF); 1518 ip->ip_ttl = PFSYNC_DFLTTL; 1519 ip->ip_p = IPPROTO_PFSYNC; 1520 ip->ip_src.s_addr = INADDR_ANY; 1521 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1522 1523 if_linkstatehook_add(sifp, &sc->sc_ltask); 1524 if_detachhook_add(sifp, &sc->sc_dtask); 1525 if_put(sifp); 1526 1527 pfsync_request_full_update(sc); 1528 1529 break; 1530 1531 default: 1532 return (ENOTTY); 1533 } 1534 1535 return (0); 1536 } 1537 1538 void 1539 pfsync_out_state(struct pf_state *st, void *buf) 1540 { 1541 struct pfsync_state *sp = buf; 1542 1543 pfsync_state_export(sp, st); 1544 } 1545 1546 void 1547 pfsync_out_iack(struct pf_state *st, void *buf) 1548 { 1549 struct pfsync_ins_ack *iack = buf; 1550 1551 iack->id = st->id; 1552 iack->creatorid = st->creatorid; 1553 } 1554 1555 void 1556 pfsync_out_upd_c(struct pf_state *st, void *buf) 1557 { 1558 struct pfsync_upd_c *up = buf; 1559 1560 bzero(up, sizeof(*up)); 1561 up->id = st->id; 1562 pf_state_peer_hton(&st->src, &up->src); 1563 pf_state_peer_hton(&st->dst, &up->dst); 1564 up->creatorid = st->creatorid; 1565 up->timeout = st->timeout; 1566 } 1567 1568 void 1569 pfsync_out_del(struct pf_state *st, void *buf) 1570 { 1571 struct pfsync_del_c *dp = buf; 1572 1573 dp->id = st->id; 1574 dp->creatorid = st->creatorid; 1575 1576 SET(st->state_flags, PFSTATE_NOSYNC); 1577 } 1578 1579 void 1580 pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc) 1581 { 1582 int q; 1583 1584 sn->sn_sc = sc; 1585 1586 for (q = 0; q < PFSYNC_S_COUNT; q++) 1587 mtx_enter(&sc->sc_mtx[q]); 1588 1589 mtx_enter(&sc->sc_upd_req_mtx); 1590 mtx_enter(&sc->sc_tdb_mtx); 1591 1592 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1593 TAILQ_INIT(&sn->sn_qs[q]); 1594 TAILQ_CONCAT(&sn->sn_qs[q], &sc->sc_qs[q], sync_list); 1595 } 1596 1597 TAILQ_INIT(&sn->sn_upd_req_list); 1598 TAILQ_CONCAT(&sn->sn_upd_req_list, &sc->sc_upd_req_list, ur_entry); 1599 1600 TAILQ_INIT(&sn->sn_tdb_q); 1601 TAILQ_CONCAT(&sn->sn_tdb_q, &sc->sc_tdb_q, tdb_sync_entry); 1602 1603 sn->sn_len = sc->sc_len; 1604 sc->sc_len = PFSYNC_MINPKT; 1605 1606 sn->sn_plus = sc->sc_plus; 1607 sc->sc_plus = NULL; 1608 sn->sn_pluslen = sc->sc_pluslen; 1609 sc->sc_pluslen = 0; 1610 1611 mtx_leave(&sc->sc_tdb_mtx); 1612 mtx_leave(&sc->sc_upd_req_mtx); 1613 1614 for (q = (PFSYNC_S_COUNT - 1); q >= 0; q--) 1615 mtx_leave(&sc->sc_mtx[q]); 1616 } 1617 1618 void 1619 pfsync_drop_snapshot(struct pfsync_snapshot *sn) 1620 { 1621 struct pf_state *st; 1622 struct pfsync_upd_req_item *ur; 1623 struct tdb *t; 1624 int q; 1625 1626 1627 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1628 if (TAILQ_EMPTY(&sn->sn_qs[q])) 1629 continue; 1630 1631 while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) { 1632 TAILQ_REMOVE(&sn->sn_qs[q], st, sync_list); 1633 #ifdef PFSYNC_DEBUG 1634 KASSERT(st->sync_state == q); 1635 #endif 1636 st->sync_state = PFSYNC_S_NONE; 1637 pf_state_unref(st); 1638 } 1639 } 1640 1641 while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) { 1642 TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_entry); 1643 pool_put(&sn->sn_sc->sc_pool, ur); 1644 } 1645 1646 while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) { 1647 TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_entry); 1648 CLR(t->tdb_flags, TDBF_PFSYNC); 1649 } 1650 } 1651 1652 int 1653 pfsync_is_snapshot_empty(struct pfsync_snapshot *sn) 1654 { 1655 int q; 1656 1657 for (q = 0; q < PFSYNC_S_COUNT; q++) 1658 if (!TAILQ_EMPTY(&sn->sn_qs[q])) 1659 return (0); 1660 1661 if (!TAILQ_EMPTY(&sn->sn_upd_req_list)) 1662 return (0); 1663 1664 if (!TAILQ_EMPTY(&sn->sn_tdb_q)) 1665 return (0); 1666 1667 return (sn->sn_plus == NULL); 1668 } 1669 1670 void 1671 pfsync_drop(struct pfsync_softc *sc) 1672 { 1673 struct pfsync_snapshot sn; 1674 1675 pfsync_grab_snapshot(&sn, sc); 1676 pfsync_drop_snapshot(&sn); 1677 } 1678 1679 void 1680 pfsync_send_dispatch(void *xmq) 1681 { 1682 struct mbuf_queue *mq = xmq; 1683 struct pfsync_softc *sc; 1684 struct mbuf *m; 1685 struct mbuf_list ml; 1686 int error; 1687 1688 mq_delist(mq, &ml); 1689 if (ml_empty(&ml)) 1690 return; 1691 1692 NET_LOCK(); 1693 sc = pfsyncif; 1694 if (sc == NULL) { 1695 ml_purge(&ml); 1696 goto done; 1697 } 1698 1699 while ((m = ml_dequeue(&ml)) != NULL) { 1700 if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1701 &sc->sc_imo, NULL, 0)) == 0) 1702 pfsyncstat_inc(pfsyncs_opackets); 1703 else { 1704 DPFPRINTF(LOG_DEBUG, 1705 "ip_output() @ %s failed (%d)\n", __func__, error); 1706 pfsyncstat_inc(pfsyncs_oerrors); 1707 } 1708 } 1709 done: 1710 NET_UNLOCK(); 1711 } 1712 1713 void 1714 pfsync_send_pkt(struct mbuf *m) 1715 { 1716 if (mq_enqueue(&pfsync_mq, m) != 0) { 1717 pfsyncstat_inc(pfsyncs_oerrors); 1718 DPFPRINTF(LOG_DEBUG, "mq_enqueue() @ %s failed, queue full\n", 1719 __func__); 1720 } else 1721 task_add(net_tq(0), &pfsync_task); 1722 } 1723 1724 void 1725 pfsync_sendout(void) 1726 { 1727 struct pfsync_snapshot sn; 1728 struct pfsync_softc *sc = pfsyncif; 1729 #if NBPFILTER > 0 1730 struct ifnet *ifp = &sc->sc_if; 1731 #endif 1732 struct mbuf *m; 1733 struct ip *ip; 1734 struct pfsync_header *ph; 1735 struct pfsync_subheader *subh; 1736 struct pf_state *st; 1737 struct pfsync_upd_req_item *ur; 1738 struct tdb *t; 1739 int offset; 1740 int q, count = 0; 1741 1742 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1743 return; 1744 1745 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1746 #if NBPFILTER > 0 1747 (ifp->if_bpf == NULL && sc->sc_sync_ifidx == 0)) { 1748 #else 1749 sc->sc_sync_ifidx == 0) { 1750 #endif 1751 pfsync_drop(sc); 1752 return; 1753 } 1754 1755 pfsync_grab_snapshot(&sn, sc); 1756 1757 /* 1758 * Check below is sufficient to prevent us from sending empty packets, 1759 * but it does not stop us from sending short packets. 1760 */ 1761 if (pfsync_is_snapshot_empty(&sn)) 1762 return; 1763 1764 MGETHDR(m, M_DONTWAIT, MT_DATA); 1765 if (m == NULL) { 1766 sc->sc_if.if_oerrors++; 1767 pfsyncstat_inc(pfsyncs_onomem); 1768 pfsync_drop_snapshot(&sn); 1769 return; 1770 } 1771 1772 if (max_linkhdr + sn.sn_len > MHLEN) { 1773 MCLGETL(m, M_DONTWAIT, max_linkhdr + sn.sn_len); 1774 if (!ISSET(m->m_flags, M_EXT)) { 1775 m_free(m); 1776 sc->sc_if.if_oerrors++; 1777 pfsyncstat_inc(pfsyncs_onomem); 1778 pfsync_drop_snapshot(&sn); 1779 return; 1780 } 1781 } 1782 m->m_data += max_linkhdr; 1783 m->m_len = m->m_pkthdr.len = sn.sn_len; 1784 1785 /* build the ip header */ 1786 ip = mtod(m, struct ip *); 1787 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1788 offset = sizeof(*ip); 1789 1790 ip->ip_len = htons(m->m_pkthdr.len); 1791 ip->ip_id = htons(ip_randomid()); 1792 1793 /* build the pfsync header */ 1794 ph = (struct pfsync_header *)(m->m_data + offset); 1795 bzero(ph, sizeof(*ph)); 1796 offset += sizeof(*ph); 1797 1798 ph->version = PFSYNC_VERSION; 1799 ph->len = htons(sn.sn_len - sizeof(*ip)); 1800 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1801 1802 if (!TAILQ_EMPTY(&sn.sn_upd_req_list)) { 1803 subh = (struct pfsync_subheader *)(m->m_data + offset); 1804 offset += sizeof(*subh); 1805 1806 count = 0; 1807 while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) { 1808 TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_entry); 1809 1810 bcopy(&ur->ur_msg, m->m_data + offset, 1811 sizeof(ur->ur_msg)); 1812 offset += sizeof(ur->ur_msg); 1813 1814 pool_put(&sc->sc_pool, ur); 1815 1816 count++; 1817 } 1818 1819 bzero(subh, sizeof(*subh)); 1820 subh->len = sizeof(ur->ur_msg) >> 2; 1821 subh->action = PFSYNC_ACT_UPD_REQ; 1822 subh->count = htons(count); 1823 } 1824 1825 /* has someone built a custom region for us to add? */ 1826 if (sn.sn_plus != NULL) { 1827 bcopy(sn.sn_plus, m->m_data + offset, sn.sn_pluslen); 1828 offset += sn.sn_pluslen; 1829 sn.sn_plus = NULL; /* XXX memory leak ? */ 1830 } 1831 1832 if (!TAILQ_EMPTY(&sn.sn_tdb_q)) { 1833 subh = (struct pfsync_subheader *)(m->m_data + offset); 1834 offset += sizeof(*subh); 1835 1836 count = 0; 1837 while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) { 1838 TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry); 1839 pfsync_out_tdb(t, m->m_data + offset); 1840 offset += sizeof(struct pfsync_tdb); 1841 CLR(t->tdb_flags, TDBF_PFSYNC); 1842 count++; 1843 } 1844 1845 bzero(subh, sizeof(*subh)); 1846 subh->action = PFSYNC_ACT_TDB; 1847 subh->len = sizeof(struct pfsync_tdb) >> 2; 1848 subh->count = htons(count); 1849 } 1850 1851 /* walk the queues */ 1852 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1853 if (TAILQ_EMPTY(&sn.sn_qs[q])) 1854 continue; 1855 1856 subh = (struct pfsync_subheader *)(m->m_data + offset); 1857 offset += sizeof(*subh); 1858 1859 count = 0; 1860 while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) { 1861 TAILQ_REMOVE(&sn.sn_qs[q], st, sync_list); 1862 #ifdef PFSYNC_DEBUG 1863 KASSERT(st->sync_state == q); 1864 #endif 1865 st->sync_state = PFSYNC_S_NONE; 1866 pfsync_qs[q].write(st, m->m_data + offset); 1867 offset += pfsync_qs[q].len; 1868 1869 pf_state_unref(st); 1870 count++; 1871 } 1872 1873 bzero(subh, sizeof(*subh)); 1874 subh->action = pfsync_qs[q].action; 1875 subh->len = pfsync_qs[q].len >> 2; 1876 subh->count = htons(count); 1877 } 1878 1879 /* we're done, let's put it on the wire */ 1880 #if NBPFILTER > 0 1881 if (ifp->if_bpf) { 1882 m->m_data += sizeof(*ip); 1883 m->m_len = m->m_pkthdr.len = sn.sn_len - sizeof(*ip); 1884 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1885 m->m_data -= sizeof(*ip); 1886 m->m_len = m->m_pkthdr.len = sn.sn_len; 1887 } 1888 1889 if (sc->sc_sync_ifidx == 0) { 1890 sc->sc_len = PFSYNC_MINPKT; 1891 m_freem(m); 1892 return; 1893 } 1894 #endif 1895 1896 sc->sc_if.if_opackets++; 1897 sc->sc_if.if_obytes += m->m_pkthdr.len; 1898 1899 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1900 1901 pfsync_send_pkt(m); 1902 } 1903 1904 void 1905 pfsync_insert_state(struct pf_state *st) 1906 { 1907 struct pfsync_softc *sc = pfsyncif; 1908 1909 NET_ASSERT_LOCKED(); 1910 1911 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1912 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1913 SET(st->state_flags, PFSTATE_NOSYNC); 1914 return; 1915 } 1916 1917 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1918 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1919 return; 1920 1921 #ifdef PFSYNC_DEBUG 1922 KASSERT(st->sync_state == PFSYNC_S_NONE); 1923 #endif 1924 1925 if (sc->sc_len == PFSYNC_MINPKT) 1926 timeout_add_sec(&sc->sc_tmo, 1); 1927 1928 pfsync_q_ins(st, PFSYNC_S_INS); 1929 1930 st->sync_updates = 0; 1931 } 1932 1933 int 1934 pfsync_defer(struct pf_state *st, struct mbuf *m, struct pfsync_deferral **ppd) 1935 { 1936 struct pfsync_softc *sc = pfsyncif; 1937 struct pfsync_deferral *pd; 1938 unsigned int sched; 1939 1940 NET_ASSERT_LOCKED(); 1941 1942 if (!sc->sc_defer || 1943 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1944 m->m_flags & (M_BCAST|M_MCAST)) 1945 return (0); 1946 1947 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1948 if (pd == NULL) 1949 return (0); 1950 1951 /* 1952 * deferral queue grows faster, than timeout can consume, 1953 * we have to ask packet (caller) to help timer and dispatch 1954 * one deferral for us. 1955 * 1956 * We wish to call pfsync_undefer() here. Unfortunately we can't, 1957 * because pfsync_undefer() will be calling to ip_output(), 1958 * which in turn will call to pf_test(), which would then attempt 1959 * to grab PF_LOCK() we currently hold. 1960 */ 1961 if (sc->sc_deferred >= 128) { 1962 mtx_enter(&sc->sc_deferrals_mtx); 1963 *ppd = TAILQ_FIRST(&sc->sc_deferrals); 1964 if (*ppd != NULL) { 1965 TAILQ_REMOVE(&sc->sc_deferrals, *ppd, pd_entry); 1966 sc->sc_deferred--; 1967 } 1968 mtx_leave(&sc->sc_deferrals_mtx); 1969 } else 1970 *ppd = NULL; 1971 1972 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1973 SET(st->state_flags, PFSTATE_ACK); 1974 1975 pd->pd_st = pf_state_ref(st); 1976 pd->pd_m = m; 1977 1978 pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC; 1979 1980 mtx_enter(&sc->sc_deferrals_mtx); 1981 sched = TAILQ_EMPTY(&sc->sc_deferrals); 1982 1983 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1984 sc->sc_deferred++; 1985 mtx_leave(&sc->sc_deferrals_mtx); 1986 1987 if (sched) 1988 timeout_add_nsec(&sc->sc_deferrals_tmo, PFSYNC_DEFER_NSEC); 1989 1990 schednetisr(NETISR_PFSYNC); 1991 1992 return (1); 1993 } 1994 1995 void 1996 pfsync_undefer_notify(struct pfsync_deferral *pd) 1997 { 1998 struct pf_pdesc pdesc; 1999 struct pf_state *st = pd->pd_st; 2000 2001 /* 2002 * pf_remove_state removes the state keys and sets st->timeout 2003 * to PFTM_UNLINKED. this is done under NET_LOCK which should 2004 * be held here, so we can use PFTM_UNLINKED as a test for 2005 * whether the state keys are set for the address family 2006 * lookup. 2007 */ 2008 2009 if (st->timeout == PFTM_UNLINKED) 2010 return; 2011 2012 if (st->rt == PF_ROUTETO) { 2013 if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af, 2014 st->direction, st->kif, pd->pd_m, NULL) != PF_PASS) 2015 return; 2016 switch (st->key[PF_SK_WIRE]->af) { 2017 case AF_INET: 2018 pf_route(&pdesc, st); 2019 break; 2020 #ifdef INET6 2021 case AF_INET6: 2022 pf_route6(&pdesc, st); 2023 break; 2024 #endif /* INET6 */ 2025 default: 2026 unhandled_af(st->key[PF_SK_WIRE]->af); 2027 } 2028 pd->pd_m = pdesc.m; 2029 } else { 2030 switch (st->key[PF_SK_WIRE]->af) { 2031 case AF_INET: 2032 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0); 2033 break; 2034 #ifdef INET6 2035 case AF_INET6: 2036 ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL); 2037 break; 2038 #endif /* INET6 */ 2039 default: 2040 unhandled_af(st->key[PF_SK_WIRE]->af); 2041 } 2042 2043 pd->pd_m = NULL; 2044 } 2045 } 2046 2047 void 2048 pfsync_free_deferral(struct pfsync_deferral *pd) 2049 { 2050 struct pfsync_softc *sc = pfsyncif; 2051 2052 pf_state_unref(pd->pd_st); 2053 m_freem(pd->pd_m); 2054 pool_put(&sc->sc_pool, pd); 2055 } 2056 2057 void 2058 pfsync_undefer(struct pfsync_deferral *pd, int drop) 2059 { 2060 struct pfsync_softc *sc = pfsyncif; 2061 2062 NET_ASSERT_LOCKED(); 2063 2064 if (sc == NULL) 2065 return; 2066 2067 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 2068 if (!drop) 2069 pfsync_undefer_notify(pd); 2070 2071 pfsync_free_deferral(pd); 2072 } 2073 2074 void 2075 pfsync_deferrals_tmo(void *arg) 2076 { 2077 struct pfsync_softc *sc = arg; 2078 struct pfsync_deferral *pd; 2079 uint64_t now, nsec = 0; 2080 struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds); 2081 2082 now = getnsecuptime(); 2083 2084 mtx_enter(&sc->sc_deferrals_mtx); 2085 for (;;) { 2086 pd = TAILQ_FIRST(&sc->sc_deferrals); 2087 if (pd == NULL) 2088 break; 2089 2090 if (now < pd->pd_deadline) { 2091 nsec = pd->pd_deadline - now; 2092 break; 2093 } 2094 2095 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 2096 sc->sc_deferred--; 2097 TAILQ_INSERT_TAIL(&pds, pd, pd_entry); 2098 } 2099 mtx_leave(&sc->sc_deferrals_mtx); 2100 2101 if (nsec > 0) { 2102 /* we were looking at a pd, but it wasn't old enough */ 2103 timeout_add_nsec(&sc->sc_deferrals_tmo, nsec); 2104 } 2105 2106 if (TAILQ_EMPTY(&pds)) 2107 return; 2108 2109 NET_LOCK(); 2110 while ((pd = TAILQ_FIRST(&pds)) != NULL) { 2111 TAILQ_REMOVE(&pds, pd, pd_entry); 2112 2113 pfsync_undefer(pd, 0); 2114 } 2115 NET_UNLOCK(); 2116 } 2117 2118 void 2119 pfsync_deferred(struct pf_state *st, int drop) 2120 { 2121 struct pfsync_softc *sc = pfsyncif; 2122 struct pfsync_deferral *pd; 2123 2124 NET_ASSERT_LOCKED(); 2125 2126 mtx_enter(&sc->sc_deferrals_mtx); 2127 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 2128 if (pd->pd_st == st) { 2129 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 2130 sc->sc_deferred--; 2131 break; 2132 } 2133 } 2134 mtx_leave(&sc->sc_deferrals_mtx); 2135 2136 if (pd != NULL) 2137 pfsync_undefer(pd, drop); 2138 } 2139 2140 void 2141 pfsync_update_state(struct pf_state *st) 2142 { 2143 struct pfsync_softc *sc = pfsyncif; 2144 int sync = 0; 2145 2146 NET_ASSERT_LOCKED(); 2147 2148 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2149 return; 2150 2151 if (ISSET(st->state_flags, PFSTATE_ACK)) 2152 pfsync_deferred(st, 0); 2153 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2154 if (st->sync_state != PFSYNC_S_NONE) 2155 pfsync_q_del(st); 2156 return; 2157 } 2158 2159 if (sc->sc_len == PFSYNC_MINPKT) 2160 timeout_add_sec(&sc->sc_tmo, 1); 2161 2162 switch (st->sync_state) { 2163 case PFSYNC_S_UPD_C: 2164 case PFSYNC_S_UPD: 2165 case PFSYNC_S_INS: 2166 /* we're already handling it */ 2167 2168 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 2169 st->sync_updates++; 2170 if (st->sync_updates >= sc->sc_maxupdates) 2171 sync = 1; 2172 } 2173 break; 2174 2175 case PFSYNC_S_IACK: 2176 pfsync_q_del(st); 2177 case PFSYNC_S_NONE: 2178 pfsync_q_ins(st, PFSYNC_S_UPD_C); 2179 st->sync_updates = 0; 2180 break; 2181 2182 default: 2183 panic("pfsync_update_state: unexpected sync state %d", 2184 st->sync_state); 2185 } 2186 2187 if (sync || (getuptime() - st->pfsync_time) < 2) 2188 schednetisr(NETISR_PFSYNC); 2189 } 2190 2191 void 2192 pfsync_cancel_full_update(struct pfsync_softc *sc) 2193 { 2194 if (timeout_pending(&sc->sc_bulkfail_tmo) || 2195 timeout_pending(&sc->sc_bulk_tmo)) { 2196 #if NCARP > 0 2197 if (!pfsync_sync_ok) 2198 carp_group_demote_adj(&sc->sc_if, -1, 2199 "pfsync bulk cancelled"); 2200 if (sc->sc_initial_bulk) { 2201 carp_group_demote_adj(&sc->sc_if, -32, 2202 "pfsync init"); 2203 sc->sc_initial_bulk = 0; 2204 } 2205 #endif 2206 pfsync_sync_ok = 1; 2207 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 2208 } 2209 timeout_del(&sc->sc_bulkfail_tmo); 2210 timeout_del(&sc->sc_bulk_tmo); 2211 sc->sc_bulk_next = NULL; 2212 sc->sc_bulk_last = NULL; 2213 sc->sc_ureq_sent = 0; 2214 sc->sc_bulk_tries = 0; 2215 } 2216 2217 void 2218 pfsync_request_full_update(struct pfsync_softc *sc) 2219 { 2220 if (sc->sc_sync_ifidx != 0 && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 2221 /* Request a full state table update. */ 2222 sc->sc_ureq_sent = getuptime(); 2223 #if NCARP > 0 2224 if (!sc->sc_link_demoted && pfsync_sync_ok) 2225 carp_group_demote_adj(&sc->sc_if, 1, 2226 "pfsync bulk start"); 2227 #endif 2228 pfsync_sync_ok = 0; 2229 DPFPRINTF(LOG_INFO, "requesting bulk update"); 2230 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 2231 pf_pool_limits[PF_LIMIT_STATES].limit / 2232 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 2233 sizeof(struct pfsync_state))); 2234 pfsync_request_update(0, 0); 2235 } 2236 } 2237 2238 void 2239 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2240 { 2241 struct pfsync_softc *sc = pfsyncif; 2242 struct pfsync_upd_req_item *item; 2243 size_t nlen, sc_len; 2244 int retry; 2245 2246 /* 2247 * this code does nothing to prevent multiple update requests for the 2248 * same state being generated. 2249 */ 2250 2251 item = pool_get(&sc->sc_pool, PR_NOWAIT); 2252 if (item == NULL) { 2253 /* XXX stats */ 2254 return; 2255 } 2256 2257 item->ur_msg.id = id; 2258 item->ur_msg.creatorid = creatorid; 2259 2260 for (;;) { 2261 mtx_enter(&sc->sc_upd_req_mtx); 2262 2263 nlen = sizeof(struct pfsync_upd_req); 2264 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 2265 nlen += sizeof(struct pfsync_subheader); 2266 2267 sc_len = atomic_add_long_nv(&sc->sc_len, nlen); 2268 retry = (sc_len > sc->sc_if.if_mtu); 2269 if (retry) 2270 atomic_sub_long(&sc->sc_len, nlen); 2271 else 2272 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 2273 2274 mtx_leave(&sc->sc_upd_req_mtx); 2275 2276 if (!retry) 2277 break; 2278 2279 pfsync_sendout(); 2280 } 2281 2282 schednetisr(NETISR_PFSYNC); 2283 } 2284 2285 void 2286 pfsync_update_state_req(struct pf_state *st) 2287 { 2288 struct pfsync_softc *sc = pfsyncif; 2289 2290 if (sc == NULL) 2291 panic("pfsync_update_state_req: nonexistent instance"); 2292 2293 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2294 if (st->sync_state != PFSYNC_S_NONE) 2295 pfsync_q_del(st); 2296 return; 2297 } 2298 2299 switch (st->sync_state) { 2300 case PFSYNC_S_UPD_C: 2301 case PFSYNC_S_IACK: 2302 pfsync_q_del(st); 2303 case PFSYNC_S_NONE: 2304 pfsync_q_ins(st, PFSYNC_S_UPD); 2305 schednetisr(NETISR_PFSYNC); 2306 return; 2307 2308 case PFSYNC_S_INS: 2309 case PFSYNC_S_UPD: 2310 case PFSYNC_S_DEL: 2311 /* we're already handling it */ 2312 return; 2313 2314 default: 2315 panic("pfsync_update_state_req: unexpected sync state %d", 2316 st->sync_state); 2317 } 2318 } 2319 2320 void 2321 pfsync_delete_state(struct pf_state *st) 2322 { 2323 struct pfsync_softc *sc = pfsyncif; 2324 2325 NET_ASSERT_LOCKED(); 2326 2327 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2328 return; 2329 2330 if (ISSET(st->state_flags, PFSTATE_ACK)) 2331 pfsync_deferred(st, 1); 2332 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2333 if (st->sync_state != PFSYNC_S_NONE) 2334 pfsync_q_del(st); 2335 return; 2336 } 2337 2338 if (sc->sc_len == PFSYNC_MINPKT) 2339 timeout_add_sec(&sc->sc_tmo, 1); 2340 2341 switch (st->sync_state) { 2342 case PFSYNC_S_INS: 2343 /* we never got to tell the world so just forget about it */ 2344 pfsync_q_del(st); 2345 return; 2346 2347 case PFSYNC_S_UPD_C: 2348 case PFSYNC_S_UPD: 2349 case PFSYNC_S_IACK: 2350 pfsync_q_del(st); 2351 /* 2352 * FALLTHROUGH to putting it on the del list 2353 * Note on reference count bookkeeping: 2354 * pfsync_q_del() drops reference for queue 2355 * ownership. But the st entry survives, because 2356 * our caller still holds a reference. 2357 */ 2358 2359 case PFSYNC_S_NONE: 2360 /* 2361 * We either fall through here, or there is no reference to 2362 * st owned by pfsync queues at this point. 2363 * 2364 * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins() 2365 * grabs a reference for delete queue. 2366 */ 2367 pfsync_q_ins(st, PFSYNC_S_DEL); 2368 return; 2369 2370 default: 2371 panic("pfsync_delete_state: unexpected sync state %d", 2372 st->sync_state); 2373 } 2374 } 2375 2376 void 2377 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2378 { 2379 struct pfsync_softc *sc = pfsyncif; 2380 struct { 2381 struct pfsync_subheader subh; 2382 struct pfsync_clr clr; 2383 } __packed r; 2384 2385 NET_ASSERT_LOCKED(); 2386 2387 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2388 return; 2389 2390 bzero(&r, sizeof(r)); 2391 2392 r.subh.action = PFSYNC_ACT_CLR; 2393 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2394 r.subh.count = htons(1); 2395 2396 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2397 r.clr.creatorid = creatorid; 2398 2399 pfsync_send_plus(&r, sizeof(r)); 2400 } 2401 2402 void 2403 pfsync_q_ins(struct pf_state *st, int q) 2404 { 2405 struct pfsync_softc *sc = pfsyncif; 2406 size_t nlen, sc_len; 2407 2408 #if defined(PFSYNC_DEBUG) 2409 if (sc->sc_len < PFSYNC_MINPKT) 2410 panic("pfsync pkt len is too low %zd", sc->sc_len); 2411 #endif 2412 do { 2413 mtx_enter(&sc->sc_mtx[q]); 2414 2415 /* 2416 * If two threads are competing to insert the same state, then 2417 * there must be just single winner. 2418 */ 2419 if (st->sync_state != PFSYNC_S_NONE) { 2420 mtx_leave(&sc->sc_mtx[q]); 2421 break; 2422 } 2423 2424 nlen = pfsync_qs[q].len; 2425 2426 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2427 nlen += sizeof(struct pfsync_subheader); 2428 2429 sc_len = atomic_add_long_nv(&sc->sc_len, nlen); 2430 if (sc_len > sc->sc_if.if_mtu) { 2431 atomic_sub_long(&sc->sc_len, nlen); 2432 mtx_leave(&sc->sc_mtx[q]); 2433 pfsync_sendout(); 2434 continue; 2435 } 2436 2437 pf_state_ref(st); 2438 2439 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2440 st->sync_state = q; 2441 mtx_leave(&sc->sc_mtx[q]); 2442 } while (0); 2443 } 2444 2445 void 2446 pfsync_q_del(struct pf_state *st) 2447 { 2448 struct pfsync_softc *sc = pfsyncif; 2449 int q = st->sync_state; 2450 2451 KASSERT(st->sync_state != PFSYNC_S_NONE); 2452 2453 mtx_enter(&sc->sc_mtx[q]); 2454 atomic_sub_long(&sc->sc_len, pfsync_qs[q].len); 2455 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2456 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2457 atomic_sub_long(&sc->sc_len, sizeof (struct pfsync_subheader)); 2458 mtx_leave(&sc->sc_mtx[q]); 2459 2460 st->sync_state = PFSYNC_S_NONE; 2461 pf_state_unref(st); 2462 } 2463 2464 void 2465 pfsync_update_tdb(struct tdb *t, int output) 2466 { 2467 struct pfsync_softc *sc = pfsyncif; 2468 size_t nlen, sc_len; 2469 2470 if (sc == NULL) 2471 return; 2472 2473 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2474 do { 2475 mtx_enter(&sc->sc_tdb_mtx); 2476 nlen = sizeof(struct pfsync_tdb); 2477 2478 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2479 nlen += sizeof(struct pfsync_subheader); 2480 2481 sc_len = atomic_add_long_nv(&sc->sc_len, nlen); 2482 if (sc_len > sc->sc_if.if_mtu) { 2483 atomic_sub_long(&sc->sc_len, nlen); 2484 mtx_leave(&sc->sc_tdb_mtx); 2485 pfsync_sendout(); 2486 continue; 2487 } 2488 2489 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2490 mtx_leave(&sc->sc_tdb_mtx); 2491 2492 SET(t->tdb_flags, TDBF_PFSYNC); 2493 t->tdb_updates = 0; 2494 } while (0); 2495 } else { 2496 if (++t->tdb_updates >= sc->sc_maxupdates) 2497 schednetisr(NETISR_PFSYNC); 2498 } 2499 2500 if (output) 2501 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2502 else 2503 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2504 } 2505 2506 void 2507 pfsync_delete_tdb(struct tdb *t) 2508 { 2509 struct pfsync_softc *sc = pfsyncif; 2510 size_t nlen; 2511 2512 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2513 return; 2514 2515 mtx_enter(&sc->sc_tdb_mtx); 2516 2517 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2518 CLR(t->tdb_flags, TDBF_PFSYNC); 2519 2520 nlen = sizeof(struct pfsync_tdb); 2521 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2522 nlen += sizeof(struct pfsync_subheader); 2523 atomic_sub_long(&sc->sc_len, nlen); 2524 2525 mtx_leave(&sc->sc_tdb_mtx); 2526 } 2527 2528 void 2529 pfsync_out_tdb(struct tdb *t, void *buf) 2530 { 2531 struct pfsync_tdb *ut = buf; 2532 2533 bzero(ut, sizeof(*ut)); 2534 ut->spi = t->tdb_spi; 2535 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2536 /* 2537 * When a failover happens, the master's rpl is probably above 2538 * what we see here (we may be up to a second late), so 2539 * increase it a bit for outbound tdbs to manage most such 2540 * situations. 2541 * 2542 * For now, just add an offset that is likely to be larger 2543 * than the number of packets we can see in one second. The RFC 2544 * just says the next packet must have a higher seq value. 2545 * 2546 * XXX What is a good algorithm for this? We could use 2547 * a rate-determined increase, but to know it, we would have 2548 * to extend struct tdb. 2549 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2550 * will soon be replaced anyway. For now, just don't handle 2551 * this edge case. 2552 */ 2553 #define RPL_INCR 16384 2554 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2555 RPL_INCR : 0)); 2556 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2557 ut->sproto = t->tdb_sproto; 2558 ut->rdomain = htons(t->tdb_rdomain); 2559 } 2560 2561 void 2562 pfsync_bulk_start(void) 2563 { 2564 struct pfsync_softc *sc = pfsyncif; 2565 2566 NET_ASSERT_LOCKED(); 2567 2568 /* 2569 * pf gc via pfsync_state_in_use reads sc_bulk_next and 2570 * sc_bulk_last while exclusively holding the pf_state_list 2571 * rwlock. make sure it can't race with us setting these 2572 * pointers. they basically act as hazards, and borrow the 2573 * lists state reference count. 2574 */ 2575 rw_enter_read(&pf_state_list.pfs_rwl); 2576 2577 /* get a consistent view of the list pointers */ 2578 mtx_enter(&pf_state_list.pfs_mtx); 2579 if (sc->sc_bulk_next == NULL) 2580 sc->sc_bulk_next = TAILQ_FIRST(&pf_state_list.pfs_list); 2581 2582 sc->sc_bulk_last = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 2583 mtx_leave(&pf_state_list.pfs_mtx); 2584 2585 rw_exit_read(&pf_state_list.pfs_rwl); 2586 2587 DPFPRINTF(LOG_INFO, "received bulk update request"); 2588 2589 if (sc->sc_bulk_last == NULL) 2590 pfsync_bulk_status(PFSYNC_BUS_END); 2591 else { 2592 sc->sc_ureq_received = getuptime(); 2593 2594 pfsync_bulk_status(PFSYNC_BUS_START); 2595 timeout_add(&sc->sc_bulk_tmo, 0); 2596 } 2597 } 2598 2599 void 2600 pfsync_bulk_update(void *arg) 2601 { 2602 struct pfsync_softc *sc; 2603 struct pf_state *st; 2604 int i = 0; 2605 2606 NET_LOCK(); 2607 sc = pfsyncif; 2608 if (sc == NULL) 2609 goto out; 2610 2611 rw_enter_read(&pf_state_list.pfs_rwl); 2612 st = sc->sc_bulk_next; 2613 sc->sc_bulk_next = NULL; 2614 2615 for (;;) { 2616 if (st->sync_state == PFSYNC_S_NONE && 2617 st->timeout < PFTM_MAX && 2618 st->pfsync_time <= sc->sc_ureq_received) { 2619 pfsync_update_state_req(st); 2620 i++; 2621 } 2622 2623 st = TAILQ_NEXT(st, entry_list); 2624 if ((st == NULL) || (st == sc->sc_bulk_last)) { 2625 /* we're done */ 2626 sc->sc_bulk_last = NULL; 2627 pfsync_bulk_status(PFSYNC_BUS_END); 2628 break; 2629 } 2630 2631 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2632 sizeof(struct pfsync_state)) { 2633 /* we've filled a packet */ 2634 sc->sc_bulk_next = st; 2635 timeout_add(&sc->sc_bulk_tmo, 1); 2636 break; 2637 } 2638 } 2639 2640 rw_exit_read(&pf_state_list.pfs_rwl); 2641 out: 2642 NET_UNLOCK(); 2643 } 2644 2645 void 2646 pfsync_bulk_status(u_int8_t status) 2647 { 2648 struct { 2649 struct pfsync_subheader subh; 2650 struct pfsync_bus bus; 2651 } __packed r; 2652 2653 struct pfsync_softc *sc = pfsyncif; 2654 2655 bzero(&r, sizeof(r)); 2656 2657 r.subh.action = PFSYNC_ACT_BUS; 2658 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2659 r.subh.count = htons(1); 2660 2661 r.bus.creatorid = pf_status.hostid; 2662 r.bus.endtime = htonl(getuptime() - sc->sc_ureq_received); 2663 r.bus.status = status; 2664 2665 pfsync_send_plus(&r, sizeof(r)); 2666 } 2667 2668 void 2669 pfsync_bulk_fail(void *arg) 2670 { 2671 struct pfsync_softc *sc; 2672 2673 NET_LOCK(); 2674 sc = pfsyncif; 2675 if (sc == NULL) 2676 goto out; 2677 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2678 /* Try again */ 2679 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2680 pfsync_request_update(0, 0); 2681 } else { 2682 /* Pretend like the transfer was ok */ 2683 sc->sc_ureq_sent = 0; 2684 sc->sc_bulk_tries = 0; 2685 #if NCARP > 0 2686 if (!pfsync_sync_ok) 2687 carp_group_demote_adj(&sc->sc_if, -1, 2688 sc->sc_link_demoted ? 2689 "pfsync link state up" : 2690 "pfsync bulk fail"); 2691 if (sc->sc_initial_bulk) { 2692 carp_group_demote_adj(&sc->sc_if, -32, 2693 "pfsync init"); 2694 sc->sc_initial_bulk = 0; 2695 } 2696 #endif 2697 pfsync_sync_ok = 1; 2698 sc->sc_link_demoted = 0; 2699 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2700 } 2701 out: 2702 NET_UNLOCK(); 2703 } 2704 2705 void 2706 pfsync_send_plus(void *plus, size_t pluslen) 2707 { 2708 struct pfsync_softc *sc = pfsyncif; 2709 2710 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2711 pfsync_sendout(); 2712 2713 sc->sc_plus = plus; 2714 sc->sc_len += (sc->sc_pluslen = pluslen); 2715 2716 pfsync_sendout(); 2717 } 2718 2719 int 2720 pfsync_up(void) 2721 { 2722 struct pfsync_softc *sc = pfsyncif; 2723 2724 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2725 return (0); 2726 2727 return (1); 2728 } 2729 2730 int 2731 pfsync_state_in_use(struct pf_state *st) 2732 { 2733 struct pfsync_softc *sc = pfsyncif; 2734 2735 if (sc == NULL) 2736 return (0); 2737 2738 rw_assert_wrlock(&pf_state_list.pfs_rwl); 2739 2740 if (st->sync_state != PFSYNC_S_NONE || 2741 st == sc->sc_bulk_next || 2742 st == sc->sc_bulk_last) 2743 return (1); 2744 2745 return (0); 2746 } 2747 2748 void 2749 pfsync_timeout(void *arg) 2750 { 2751 NET_LOCK(); 2752 pfsync_sendout(); 2753 NET_UNLOCK(); 2754 } 2755 2756 /* this is a softnet/netisr handler */ 2757 void 2758 pfsyncintr(void) 2759 { 2760 pfsync_sendout(); 2761 } 2762 2763 int 2764 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp) 2765 { 2766 struct pfsyncstats pfsyncstat; 2767 2768 CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t))); 2769 memset(&pfsyncstat, 0, sizeof pfsyncstat); 2770 counters_read(pfsynccounters, (uint64_t *)&pfsyncstat, 2771 pfsyncs_ncounters); 2772 return (sysctl_rdstruct(oldp, oldlenp, newp, 2773 &pfsyncstat, sizeof(pfsyncstat))); 2774 } 2775 2776 int 2777 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2778 size_t newlen) 2779 { 2780 /* All sysctl names at this level are terminal. */ 2781 if (namelen != 1) 2782 return (ENOTDIR); 2783 2784 switch (name[0]) { 2785 case PFSYNCCTL_STATS: 2786 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp)); 2787 default: 2788 return (ENOPROTOOPT); 2789 } 2790 } 2791