1 /* $OpenBSD: if_pfsync.c,v 1.257 2018/02/19 08:59:52 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_ipsp.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp6.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/udp.h> 75 76 #ifdef INET6 77 #include <netinet6/in6_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #endif /* INET6 */ 82 83 #include "carp.h" 84 #if NCARP > 0 85 #include <netinet/ip_carp.h> 86 #endif 87 88 #define PF_DEBUGNAME "pfsync: " 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_MINPKT ( \ 97 sizeof(struct ip) + \ 98 sizeof(struct pfsync_header)) 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(caddr_t, int, int, int); 104 int pfsync_in_iack(caddr_t, int, int, int); 105 int pfsync_in_upd_c(caddr_t, int, int, int); 106 int pfsync_in_ureq(caddr_t, int, int, int); 107 int pfsync_in_del(caddr_t, int, int, int); 108 int pfsync_in_del_c(caddr_t, int, int, int); 109 int pfsync_in_bus(caddr_t, int, int, int); 110 int pfsync_in_tdb(caddr_t, int, int, int); 111 int pfsync_in_ins(caddr_t, int, int, int); 112 int pfsync_in_upd(caddr_t, int, int, int); 113 int pfsync_in_eof(caddr_t, int, int, int); 114 115 int pfsync_in_error(caddr_t, int, int, int); 116 117 struct { 118 int (*in)(caddr_t, int, int, int); 119 size_t len; 120 } pfsync_acts[] = { 121 /* PFSYNC_ACT_CLR */ 122 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 123 /* PFSYNC_ACT_OINS */ 124 { pfsync_in_error, 0 }, 125 /* PFSYNC_ACT_INS_ACK */ 126 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 127 /* PFSYNC_ACT_OUPD */ 128 { pfsync_in_error, 0 }, 129 /* PFSYNC_ACT_UPD_C */ 130 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 131 /* PFSYNC_ACT_UPD_REQ */ 132 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 133 /* PFSYNC_ACT_DEL */ 134 { pfsync_in_del, sizeof(struct pfsync_state) }, 135 /* PFSYNC_ACT_DEL_C */ 136 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 137 /* PFSYNC_ACT_INS_F */ 138 { pfsync_in_error, 0 }, 139 /* PFSYNC_ACT_DEL_F */ 140 { pfsync_in_error, 0 }, 141 /* PFSYNC_ACT_BUS */ 142 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 143 /* PFSYNC_ACT_OTDB */ 144 { pfsync_in_error, 0 }, 145 /* PFSYNC_ACT_EOF */ 146 { pfsync_in_error, 0 }, 147 /* PFSYNC_ACT_INS */ 148 { pfsync_in_ins, sizeof(struct pfsync_state) }, 149 /* PFSYNC_ACT_UPD */ 150 { pfsync_in_upd, sizeof(struct pfsync_state) }, 151 /* PFSYNC_ACT_TDB */ 152 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 153 }; 154 155 struct pfsync_q { 156 void (*write)(struct pf_state *, void *); 157 size_t len; 158 u_int8_t action; 159 }; 160 161 /* we have one of these for every PFSYNC_S_ */ 162 void pfsync_out_state(struct pf_state *, void *); 163 void pfsync_out_iack(struct pf_state *, void *); 164 void pfsync_out_upd_c(struct pf_state *, void *); 165 void pfsync_out_del(struct pf_state *, void *); 166 167 struct pfsync_q pfsync_qs[] = { 168 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 169 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 170 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 171 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 172 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 173 }; 174 175 void pfsync_q_ins(struct pf_state *, int); 176 void pfsync_q_del(struct pf_state *); 177 178 struct pfsync_upd_req_item { 179 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 180 struct pfsync_upd_req ur_msg; 181 }; 182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 183 184 struct pfsync_deferral { 185 TAILQ_ENTRY(pfsync_deferral) pd_entry; 186 struct pf_state *pd_st; 187 struct mbuf *pd_m; 188 struct timeout pd_tmo; 189 }; 190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 191 192 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 193 sizeof(struct pfsync_deferral)) 194 195 void pfsync_out_tdb(struct tdb *, void *); 196 197 struct pfsync_softc { 198 struct ifnet sc_if; 199 struct ifnet *sc_sync_if; 200 201 struct pool sc_pool; 202 203 struct ip_moptions sc_imo; 204 205 struct in_addr sc_sync_peer; 206 u_int8_t sc_maxupdates; 207 208 struct ip sc_template; 209 210 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 211 size_t sc_len; 212 213 struct pfsync_upd_reqs sc_upd_req_list; 214 215 int sc_initial_bulk; 216 int sc_link_demoted; 217 218 int sc_defer; 219 struct pfsync_deferrals sc_deferrals; 220 u_int sc_deferred; 221 222 void *sc_plus; 223 size_t sc_pluslen; 224 225 u_int32_t sc_ureq_sent; 226 int sc_bulk_tries; 227 struct timeout sc_bulkfail_tmo; 228 229 u_int32_t sc_ureq_received; 230 struct pf_state *sc_bulk_next; 231 struct pf_state *sc_bulk_last; 232 struct timeout sc_bulk_tmo; 233 234 TAILQ_HEAD(, tdb) sc_tdb_q; 235 236 void *sc_lhcookie; 237 void *sc_dhcookie; 238 239 struct timeout sc_tmo; 240 }; 241 242 struct pfsync_softc *pfsyncif = NULL; 243 struct cpumem *pfsynccounters; 244 245 void pfsyncattach(int); 246 int pfsync_clone_create(struct if_clone *, int); 247 int pfsync_clone_destroy(struct ifnet *); 248 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 249 struct pf_state_peer *); 250 void pfsync_update_net_tdb(struct pfsync_tdb *); 251 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 252 struct rtentry *); 253 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 254 void pfsyncstart(struct ifnet *); 255 void pfsync_syncdev_state(void *); 256 void pfsync_ifdetach(void *); 257 258 void pfsync_deferred(struct pf_state *, int); 259 void pfsync_undefer(struct pfsync_deferral *, int); 260 void pfsync_defer_tmo(void *); 261 262 void pfsync_cancel_full_update(struct pfsync_softc *); 263 void pfsync_request_full_update(struct pfsync_softc *); 264 void pfsync_request_update(u_int32_t, u_int64_t); 265 void pfsync_update_state_req(struct pf_state *); 266 267 void pfsync_drop(struct pfsync_softc *); 268 void pfsync_sendout(void); 269 void pfsync_send_plus(void *, size_t); 270 void pfsync_timeout(void *); 271 void pfsync_tdb_timeout(void *); 272 273 void pfsync_bulk_start(void); 274 void pfsync_bulk_status(u_int8_t); 275 void pfsync_bulk_update(void *); 276 void pfsync_bulk_fail(void *); 277 278 #define PFSYNC_MAX_BULKTRIES 12 279 int pfsync_sync_ok; 280 281 struct if_clone pfsync_cloner = 282 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 283 284 void 285 pfsyncattach(int npfsync) 286 { 287 if_clone_attach(&pfsync_cloner); 288 pfsynccounters = counters_alloc(pfsyncs_ncounters); 289 } 290 291 int 292 pfsync_clone_create(struct if_clone *ifc, int unit) 293 { 294 struct pfsync_softc *sc; 295 struct ifnet *ifp; 296 int q; 297 298 if (unit != 0) 299 return (EINVAL); 300 301 pfsync_sync_ok = 1; 302 303 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO); 304 for (q = 0; q < PFSYNC_S_COUNT; q++) 305 TAILQ_INIT(&sc->sc_qs[q]); 306 307 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync", 308 NULL); 309 TAILQ_INIT(&sc->sc_upd_req_list); 310 TAILQ_INIT(&sc->sc_deferrals); 311 sc->sc_deferred = 0; 312 313 TAILQ_INIT(&sc->sc_tdb_q); 314 315 sc->sc_len = PFSYNC_MINPKT; 316 sc->sc_maxupdates = 128; 317 318 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 319 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 320 M_WAITOK | M_ZERO); 321 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 322 323 ifp = &sc->sc_if; 324 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 325 ifp->if_softc = sc; 326 ifp->if_ioctl = pfsyncioctl; 327 ifp->if_output = pfsyncoutput; 328 ifp->if_start = pfsyncstart; 329 ifp->if_type = IFT_PFSYNC; 330 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 331 ifp->if_hdrlen = sizeof(struct pfsync_header); 332 ifp->if_mtu = ETHERMTU; 333 ifp->if_xflags = IFXF_CLONED; 334 timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc); 335 timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); 336 timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); 337 338 if_attach(ifp); 339 if_alloc_sadl(ifp); 340 341 #if NCARP > 0 342 if_addgroup(ifp, "carp"); 343 #endif 344 345 #if NBPFILTER > 0 346 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 347 #endif 348 349 pfsyncif = sc; 350 351 return (0); 352 } 353 354 int 355 pfsync_clone_destroy(struct ifnet *ifp) 356 { 357 struct pfsync_softc *sc = ifp->if_softc; 358 struct pfsync_deferral *pd; 359 360 timeout_del(&sc->sc_bulkfail_tmo); 361 timeout_del(&sc->sc_bulk_tmo); 362 timeout_del(&sc->sc_tmo); 363 #if NCARP > 0 364 if (!pfsync_sync_ok) 365 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 366 if (sc->sc_link_demoted) 367 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 368 #endif 369 if (sc->sc_sync_if) { 370 hook_disestablish( 371 sc->sc_sync_if->if_linkstatehooks, 372 sc->sc_lhcookie); 373 hook_disestablish(sc->sc_sync_if->if_detachhooks, 374 sc->sc_dhcookie); 375 } 376 if_detach(ifp); 377 378 pfsync_drop(sc); 379 380 while (sc->sc_deferred > 0) { 381 pd = TAILQ_FIRST(&sc->sc_deferrals); 382 timeout_del(&pd->pd_tmo); 383 pfsync_undefer(pd, 0); 384 } 385 386 pool_destroy(&sc->sc_pool); 387 free(sc->sc_imo.imo_membership, M_IPMOPTS, 0); 388 free(sc, M_DEVBUF, sizeof(*sc)); 389 390 pfsyncif = NULL; 391 392 return (0); 393 } 394 395 /* 396 * Start output on the pfsync interface. 397 */ 398 void 399 pfsyncstart(struct ifnet *ifp) 400 { 401 IFQ_PURGE(&ifp->if_snd); 402 } 403 404 void 405 pfsync_syncdev_state(void *arg) 406 { 407 struct pfsync_softc *sc = arg; 408 409 if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP)) 410 return; 411 412 if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) { 413 sc->sc_if.if_flags &= ~IFF_RUNNING; 414 if (!sc->sc_link_demoted) { 415 #if NCARP > 0 416 carp_group_demote_adj(&sc->sc_if, 1, 417 "pfsync link state down"); 418 #endif 419 sc->sc_link_demoted = 1; 420 } 421 422 /* drop everything */ 423 timeout_del(&sc->sc_tmo); 424 pfsync_drop(sc); 425 426 pfsync_cancel_full_update(sc); 427 } else if (sc->sc_link_demoted) { 428 sc->sc_if.if_flags |= IFF_RUNNING; 429 430 pfsync_request_full_update(sc); 431 } 432 } 433 434 void 435 pfsync_ifdetach(void *arg) 436 { 437 struct pfsync_softc *sc = arg; 438 439 sc->sc_sync_if = NULL; 440 } 441 442 int 443 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 444 struct pf_state_peer *d) 445 { 446 if (s->scrub.scrub_flag && d->scrub == NULL) { 447 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 448 if (d->scrub == NULL) 449 return (ENOMEM); 450 } 451 452 return (0); 453 } 454 455 void 456 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 457 { 458 pf_state_export(sp, st); 459 } 460 461 int 462 pfsync_state_import(struct pfsync_state *sp, int flags) 463 { 464 struct pf_state *st = NULL; 465 struct pf_state_key *skw = NULL, *sks = NULL; 466 struct pf_rule *r = NULL; 467 struct pfi_kif *kif; 468 int pool_flags; 469 int error; 470 471 if (sp->creatorid == 0) { 472 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 473 "invalid creator id: %08x", ntohl(sp->creatorid)); 474 return (EINVAL); 475 } 476 477 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 478 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 479 "unknown interface: %s", sp->ifname); 480 if (flags & PFSYNC_SI_IOCTL) 481 return (EINVAL); 482 return (0); /* skip this state */ 483 } 484 485 if (sp->af == 0) 486 return (0); /* skip this state */ 487 488 /* 489 * If the ruleset checksums match or the state is coming from the ioctl, 490 * it's safe to associate the state with the rule of that number. 491 */ 492 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 493 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 494 pf_main_ruleset.rules.active.rcount) 495 r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)]; 496 else 497 r = &pf_default_rule; 498 499 if ((r->max_states && r->states_cur >= r->max_states)) 500 goto cleanup; 501 502 if (flags & PFSYNC_SI_IOCTL) 503 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 504 else 505 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 506 507 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 508 goto cleanup; 509 510 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 511 goto cleanup; 512 513 if ((sp->key[PF_SK_WIRE].af && 514 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 515 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 516 &sp->key[PF_SK_STACK].addr[0], sp->af) || 517 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 518 &sp->key[PF_SK_STACK].addr[1], sp->af) || 519 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 520 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 521 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 522 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 523 goto cleanup; 524 } else 525 sks = skw; 526 527 /* allocate memory for scrub info */ 528 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 529 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 530 goto cleanup; 531 532 /* copy to state key(s) */ 533 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 534 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 535 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 536 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 537 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 538 PF_REF_INIT(skw->refcnt); 539 skw->proto = sp->proto; 540 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 541 skw->af = sp->af; 542 if (sks != skw) { 543 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 544 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 545 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 546 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 547 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 548 PF_REF_INIT(sks->refcnt); 549 if (!(sks->af = sp->key[PF_SK_STACK].af)) 550 sks->af = sp->af; 551 if (sks->af != skw->af) { 552 switch (sp->proto) { 553 case IPPROTO_ICMP: 554 sks->proto = IPPROTO_ICMPV6; 555 break; 556 case IPPROTO_ICMPV6: 557 sks->proto = IPPROTO_ICMP; 558 break; 559 default: 560 sks->proto = sp->proto; 561 } 562 } else 563 sks->proto = sp->proto; 564 } 565 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 566 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 567 568 /* copy to state */ 569 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 570 st->creation = time_uptime - ntohl(sp->creation); 571 st->expire = time_uptime; 572 if (ntohl(sp->expire)) { 573 u_int32_t timeout; 574 575 timeout = r->timeout[sp->timeout]; 576 if (!timeout) 577 timeout = pf_default_rule.timeout[sp->timeout]; 578 579 /* sp->expire may have been adaptively scaled by export. */ 580 st->expire -= timeout - ntohl(sp->expire); 581 } 582 583 st->direction = sp->direction; 584 st->log = sp->log; 585 st->timeout = sp->timeout; 586 st->state_flags = ntohs(sp->state_flags); 587 st->max_mss = ntohs(sp->max_mss); 588 st->min_ttl = sp->min_ttl; 589 st->set_tos = sp->set_tos; 590 st->set_prio[0] = sp->set_prio[0]; 591 st->set_prio[1] = sp->set_prio[1]; 592 593 st->id = sp->id; 594 st->creatorid = sp->creatorid; 595 pf_state_peer_ntoh(&sp->src, &st->src); 596 pf_state_peer_ntoh(&sp->dst, &st->dst); 597 598 st->rule.ptr = r; 599 st->anchor.ptr = NULL; 600 st->rt_kif = NULL; 601 602 st->pfsync_time = time_uptime; 603 st->sync_state = PFSYNC_S_NONE; 604 605 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 606 r->states_cur++; 607 r->states_tot++; 608 609 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 610 SET(st->state_flags, PFSTATE_NOSYNC); 611 612 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 613 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 614 r->states_cur--; 615 error = EEXIST; 616 goto cleanup_state; 617 } 618 619 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 620 CLR(st->state_flags, PFSTATE_NOSYNC); 621 if (ISSET(st->state_flags, PFSTATE_ACK)) { 622 pfsync_q_ins(st, PFSYNC_S_IACK); 623 schednetisr(NETISR_PFSYNC); 624 } 625 } 626 CLR(st->state_flags, PFSTATE_ACK); 627 628 return (0); 629 630 cleanup: 631 error = ENOMEM; 632 if (skw == sks) 633 sks = NULL; 634 if (skw != NULL) 635 pool_put(&pf_state_key_pl, skw); 636 if (sks != NULL) 637 pool_put(&pf_state_key_pl, sks); 638 639 cleanup_state: /* pf_state_insert frees the state keys */ 640 if (st) { 641 if (st->dst.scrub) 642 pool_put(&pf_state_scrub_pl, st->dst.scrub); 643 if (st->src.scrub) 644 pool_put(&pf_state_scrub_pl, st->src.scrub); 645 pool_put(&pf_state_pl, st); 646 } 647 return (error); 648 } 649 650 int 651 pfsync_input(struct mbuf **mp, int *offp, int proto, int af) 652 { 653 struct mbuf *n, *m = *mp; 654 struct pfsync_softc *sc = pfsyncif; 655 struct ip *ip = mtod(m, struct ip *); 656 struct pfsync_header *ph; 657 struct pfsync_subheader subh; 658 int offset, noff, len, count, mlen, flags = 0; 659 int e; 660 661 NET_ASSERT_LOCKED(); 662 663 pfsyncstat_inc(pfsyncs_ipackets); 664 665 /* verify that we have a sync interface configured */ 666 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 667 sc->sc_sync_if == NULL || !pf_status.running) 668 goto done; 669 670 /* verify that the packet came in on the right interface */ 671 if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) { 672 pfsyncstat_inc(pfsyncs_badif); 673 goto done; 674 } 675 676 sc->sc_if.if_ipackets++; 677 sc->sc_if.if_ibytes += m->m_pkthdr.len; 678 679 /* verify that the IP TTL is 255. */ 680 if (ip->ip_ttl != PFSYNC_DFLTTL) { 681 pfsyncstat_inc(pfsyncs_badttl); 682 goto done; 683 } 684 685 offset = ip->ip_hl << 2; 686 n = m_pulldown(m, offset, sizeof(*ph), &noff); 687 if (n == NULL) { 688 pfsyncstat_inc(pfsyncs_hdrops); 689 return IPPROTO_DONE; 690 } 691 ph = (struct pfsync_header *)(n->m_data + noff); 692 693 /* verify the version */ 694 if (ph->version != PFSYNC_VERSION) { 695 pfsyncstat_inc(pfsyncs_badver); 696 goto done; 697 } 698 len = ntohs(ph->len) + offset; 699 if (m->m_pkthdr.len < len) { 700 pfsyncstat_inc(pfsyncs_badlen); 701 goto done; 702 } 703 704 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 705 flags = PFSYNC_SI_CKSUM; 706 707 offset += sizeof(*ph); 708 while (offset <= len - sizeof(subh)) { 709 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 710 offset += sizeof(subh); 711 712 mlen = subh.len << 2; 713 count = ntohs(subh.count); 714 715 if (subh.action >= PFSYNC_ACT_MAX || 716 subh.action >= nitems(pfsync_acts) || 717 mlen < pfsync_acts[subh.action].len) { 718 /* 719 * subheaders are always followed by at least one 720 * message, so if the peer is new 721 * enough to tell us how big its messages are then we 722 * know enough to skip them. 723 */ 724 if (count > 0 && mlen > 0) { 725 offset += count * mlen; 726 continue; 727 } 728 pfsyncstat_inc(pfsyncs_badact); 729 goto done; 730 } 731 732 n = m_pulldown(m, offset, mlen * count, &noff); 733 if (n == NULL) { 734 pfsyncstat_inc(pfsyncs_badlen); 735 return IPPROTO_DONE; 736 } 737 738 PF_LOCK(); 739 e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, 740 flags); 741 PF_UNLOCK(); 742 if (e != 0) 743 goto done; 744 745 offset += mlen * count; 746 } 747 748 done: 749 m_freem(m); 750 return IPPROTO_DONE; 751 } 752 753 int 754 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 755 { 756 struct pfsync_clr *clr; 757 struct pf_state *st, *nexts; 758 struct pfi_kif *kif; 759 u_int32_t creatorid; 760 int i; 761 762 for (i = 0; i < count; i++) { 763 clr = (struct pfsync_clr *)buf + len * i; 764 kif = NULL; 765 creatorid = clr->creatorid; 766 if (strlen(clr->ifname) && 767 (kif = pfi_kif_find(clr->ifname)) == NULL) 768 continue; 769 770 for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { 771 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 772 if (st->creatorid == creatorid && 773 ((kif && st->kif == kif) || !kif)) { 774 SET(st->state_flags, PFSTATE_NOSYNC); 775 pf_remove_state(st); 776 } 777 } 778 } 779 780 return (0); 781 } 782 783 int 784 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 785 { 786 struct pfsync_state *sp; 787 sa_family_t af1, af2; 788 int i; 789 790 for (i = 0; i < count; i++) { 791 sp = (struct pfsync_state *)(buf + len * i); 792 af1 = sp->key[0].af; 793 af2 = sp->key[1].af; 794 795 /* check for invalid values */ 796 if (sp->timeout >= PFTM_MAX || 797 sp->src.state > PF_TCPS_PROXY_DST || 798 sp->dst.state > PF_TCPS_PROXY_DST || 799 sp->direction > PF_OUT || 800 (((af1 || af2) && 801 ((af1 != AF_INET && af1 != AF_INET6) || 802 (af2 != AF_INET && af2 != AF_INET6))) || 803 (sp->af != AF_INET && sp->af != AF_INET6))) { 804 DPFPRINTF(LOG_NOTICE, 805 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 806 pfsyncstat_inc(pfsyncs_badval); 807 continue; 808 } 809 810 if (pfsync_state_import(sp, flags) == ENOMEM) { 811 /* drop out, but process the rest of the actions */ 812 break; 813 } 814 } 815 816 return (0); 817 } 818 819 int 820 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 821 { 822 struct pfsync_ins_ack *ia; 823 struct pf_state_cmp id_key; 824 struct pf_state *st; 825 int i; 826 827 for (i = 0; i < count; i++) { 828 ia = (struct pfsync_ins_ack *)(buf + len * i); 829 830 id_key.id = ia->id; 831 id_key.creatorid = ia->creatorid; 832 833 st = pf_find_state_byid(&id_key); 834 if (st == NULL) 835 continue; 836 837 if (ISSET(st->state_flags, PFSTATE_ACK)) 838 pfsync_deferred(st, 0); 839 } 840 841 return (0); 842 } 843 844 int 845 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 846 struct pfsync_state_peer *dst) 847 { 848 int sync = 0; 849 850 /* 851 * The state should never go backwards except 852 * for syn-proxy states. Neither should the 853 * sequence window slide backwards. 854 */ 855 if ((st->src.state > src->state && 856 (st->src.state < PF_TCPS_PROXY_SRC || 857 src->state >= PF_TCPS_PROXY_SRC)) || 858 859 (st->src.state == src->state && 860 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 861 sync++; 862 else 863 pf_state_peer_ntoh(src, &st->src); 864 865 if ((st->dst.state > dst->state) || 866 867 (st->dst.state >= TCPS_SYN_SENT && 868 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 869 sync++; 870 else 871 pf_state_peer_ntoh(dst, &st->dst); 872 873 return (sync); 874 } 875 876 int 877 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 878 { 879 struct pfsync_state *sp; 880 struct pf_state_cmp id_key; 881 struct pf_state *st; 882 int sync; 883 884 int i; 885 886 for (i = 0; i < count; i++) { 887 sp = (struct pfsync_state *)(buf + len * i); 888 889 /* check for invalid values */ 890 if (sp->timeout >= PFTM_MAX || 891 sp->src.state > PF_TCPS_PROXY_DST || 892 sp->dst.state > PF_TCPS_PROXY_DST) { 893 DPFPRINTF(LOG_NOTICE, 894 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 895 pfsyncstat_inc(pfsyncs_badval); 896 continue; 897 } 898 899 id_key.id = sp->id; 900 id_key.creatorid = sp->creatorid; 901 902 st = pf_find_state_byid(&id_key); 903 if (st == NULL) { 904 /* insert the update */ 905 if (pfsync_state_import(sp, flags)) 906 pfsyncstat_inc(pfsyncs_badstate); 907 continue; 908 } 909 910 if (ISSET(st->state_flags, PFSTATE_ACK)) 911 pfsync_deferred(st, 1); 912 913 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 914 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 915 else { 916 sync = 0; 917 918 /* 919 * Non-TCP protocol state machine always go 920 * forwards 921 */ 922 if (st->src.state > sp->src.state) 923 sync++; 924 else 925 pf_state_peer_ntoh(&sp->src, &st->src); 926 927 if (st->dst.state > sp->dst.state) 928 sync++; 929 else 930 pf_state_peer_ntoh(&sp->dst, &st->dst); 931 } 932 933 if (sync < 2) { 934 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 935 pf_state_peer_ntoh(&sp->dst, &st->dst); 936 st->expire = time_uptime; 937 st->timeout = sp->timeout; 938 } 939 st->pfsync_time = time_uptime; 940 941 if (sync) { 942 pfsyncstat_inc(pfsyncs_stale); 943 944 pfsync_update_state(st); 945 schednetisr(NETISR_PFSYNC); 946 } 947 } 948 949 return (0); 950 } 951 952 int 953 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 954 { 955 struct pfsync_upd_c *up; 956 struct pf_state_cmp id_key; 957 struct pf_state *st; 958 959 int sync; 960 961 int i; 962 963 for (i = 0; i < count; i++) { 964 up = (struct pfsync_upd_c *)(buf + len * i); 965 966 /* check for invalid values */ 967 if (up->timeout >= PFTM_MAX || 968 up->src.state > PF_TCPS_PROXY_DST || 969 up->dst.state > PF_TCPS_PROXY_DST) { 970 DPFPRINTF(LOG_NOTICE, 971 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 972 pfsyncstat_inc(pfsyncs_badval); 973 continue; 974 } 975 976 id_key.id = up->id; 977 id_key.creatorid = up->creatorid; 978 979 st = pf_find_state_byid(&id_key); 980 if (st == NULL) { 981 /* We don't have this state. Ask for it. */ 982 pfsync_request_update(id_key.creatorid, id_key.id); 983 continue; 984 } 985 986 if (ISSET(st->state_flags, PFSTATE_ACK)) 987 pfsync_deferred(st, 1); 988 989 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 990 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 991 else { 992 sync = 0; 993 /* 994 * Non-TCP protocol state machine always go 995 * forwards 996 */ 997 if (st->src.state > up->src.state) 998 sync++; 999 else 1000 pf_state_peer_ntoh(&up->src, &st->src); 1001 1002 if (st->dst.state > up->dst.state) 1003 sync++; 1004 else 1005 pf_state_peer_ntoh(&up->dst, &st->dst); 1006 } 1007 if (sync < 2) { 1008 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1009 pf_state_peer_ntoh(&up->dst, &st->dst); 1010 st->expire = time_uptime; 1011 st->timeout = up->timeout; 1012 } 1013 st->pfsync_time = time_uptime; 1014 1015 if (sync) { 1016 pfsyncstat_inc(pfsyncs_stale); 1017 1018 pfsync_update_state(st); 1019 schednetisr(NETISR_PFSYNC); 1020 } 1021 } 1022 1023 return (0); 1024 } 1025 1026 int 1027 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1028 { 1029 struct pfsync_upd_req *ur; 1030 int i; 1031 1032 struct pf_state_cmp id_key; 1033 struct pf_state *st; 1034 1035 for (i = 0; i < count; i++) { 1036 ur = (struct pfsync_upd_req *)(buf + len * i); 1037 1038 id_key.id = ur->id; 1039 id_key.creatorid = ur->creatorid; 1040 1041 if (id_key.id == 0 && id_key.creatorid == 0) 1042 pfsync_bulk_start(); 1043 else { 1044 st = pf_find_state_byid(&id_key); 1045 if (st == NULL) { 1046 pfsyncstat_inc(pfsyncs_badstate); 1047 continue; 1048 } 1049 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1050 continue; 1051 1052 pfsync_update_state_req(st); 1053 } 1054 } 1055 1056 return (0); 1057 } 1058 1059 int 1060 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1061 { 1062 struct pfsync_state *sp; 1063 struct pf_state_cmp id_key; 1064 struct pf_state *st; 1065 int i; 1066 1067 for (i = 0; i < count; i++) { 1068 sp = (struct pfsync_state *)(buf + len * i); 1069 1070 id_key.id = sp->id; 1071 id_key.creatorid = sp->creatorid; 1072 1073 st = pf_find_state_byid(&id_key); 1074 if (st == NULL) { 1075 pfsyncstat_inc(pfsyncs_badstate); 1076 continue; 1077 } 1078 SET(st->state_flags, PFSTATE_NOSYNC); 1079 pf_remove_state(st); 1080 } 1081 1082 return (0); 1083 } 1084 1085 int 1086 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1087 { 1088 struct pfsync_del_c *sp; 1089 struct pf_state_cmp id_key; 1090 struct pf_state *st; 1091 int i; 1092 1093 for (i = 0; i < count; i++) { 1094 sp = (struct pfsync_del_c *)(buf + len * i); 1095 1096 id_key.id = sp->id; 1097 id_key.creatorid = sp->creatorid; 1098 1099 st = pf_find_state_byid(&id_key); 1100 if (st == NULL) { 1101 pfsyncstat_inc(pfsyncs_badstate); 1102 continue; 1103 } 1104 1105 SET(st->state_flags, PFSTATE_NOSYNC); 1106 pf_remove_state(st); 1107 } 1108 1109 return (0); 1110 } 1111 1112 int 1113 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1114 { 1115 struct pfsync_softc *sc = pfsyncif; 1116 struct pfsync_bus *bus; 1117 1118 /* If we're not waiting for a bulk update, who cares. */ 1119 if (sc->sc_ureq_sent == 0) 1120 return (0); 1121 1122 bus = (struct pfsync_bus *)buf; 1123 1124 switch (bus->status) { 1125 case PFSYNC_BUS_START: 1126 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1127 pf_pool_limits[PF_LIMIT_STATES].limit / 1128 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1129 sizeof(struct pfsync_state))); 1130 DPFPRINTF(LOG_INFO, "received bulk update start"); 1131 break; 1132 1133 case PFSYNC_BUS_END: 1134 if (time_uptime - ntohl(bus->endtime) >= 1135 sc->sc_ureq_sent) { 1136 /* that's it, we're happy */ 1137 sc->sc_ureq_sent = 0; 1138 sc->sc_bulk_tries = 0; 1139 timeout_del(&sc->sc_bulkfail_tmo); 1140 #if NCARP > 0 1141 if (!pfsync_sync_ok) 1142 carp_group_demote_adj(&sc->sc_if, -1, 1143 sc->sc_link_demoted ? 1144 "pfsync link state up" : 1145 "pfsync bulk done"); 1146 if (sc->sc_initial_bulk) { 1147 carp_group_demote_adj(&sc->sc_if, -32, 1148 "pfsync init"); 1149 sc->sc_initial_bulk = 0; 1150 } 1151 #endif 1152 pfsync_sync_ok = 1; 1153 sc->sc_link_demoted = 0; 1154 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1155 } else { 1156 DPFPRINTF(LOG_WARNING, "received invalid " 1157 "bulk update end: bad timestamp"); 1158 } 1159 break; 1160 } 1161 1162 return (0); 1163 } 1164 1165 int 1166 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1167 { 1168 #if defined(IPSEC) 1169 struct pfsync_tdb *tp; 1170 int i; 1171 1172 for (i = 0; i < count; i++) { 1173 tp = (struct pfsync_tdb *)(buf + len * i); 1174 pfsync_update_net_tdb(tp); 1175 } 1176 #endif 1177 1178 return (0); 1179 } 1180 1181 #if defined(IPSEC) 1182 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1183 void 1184 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1185 { 1186 struct tdb *tdb; 1187 1188 NET_ASSERT_LOCKED(); 1189 1190 /* check for invalid values */ 1191 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1192 (pt->dst.sa.sa_family != AF_INET && 1193 pt->dst.sa.sa_family != AF_INET6)) 1194 goto bad; 1195 1196 tdb = gettdb(ntohs(pt->rdomain), pt->spi, 1197 (union sockaddr_union *)&pt->dst, pt->sproto); 1198 if (tdb) { 1199 pt->rpl = betoh64(pt->rpl); 1200 pt->cur_bytes = betoh64(pt->cur_bytes); 1201 1202 /* Neither replay nor byte counter should ever decrease. */ 1203 if (pt->rpl < tdb->tdb_rpl || 1204 pt->cur_bytes < tdb->tdb_cur_bytes) { 1205 goto bad; 1206 } 1207 1208 tdb->tdb_rpl = pt->rpl; 1209 tdb->tdb_cur_bytes = pt->cur_bytes; 1210 } 1211 return; 1212 1213 bad: 1214 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1215 "invalid value"); 1216 pfsyncstat_inc(pfsyncs_badstate); 1217 return; 1218 } 1219 #endif 1220 1221 1222 int 1223 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1224 { 1225 if (len > 0 || count > 0) 1226 pfsyncstat_inc(pfsyncs_badact); 1227 1228 /* we're done. let the caller return */ 1229 return (1); 1230 } 1231 1232 int 1233 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1234 { 1235 pfsyncstat_inc(pfsyncs_badact); 1236 return (-1); 1237 } 1238 1239 int 1240 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1241 struct rtentry *rt) 1242 { 1243 m_freem(m); /* drop packet */ 1244 return (EAFNOSUPPORT); 1245 } 1246 1247 int 1248 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1249 { 1250 struct proc *p = curproc; 1251 struct pfsync_softc *sc = ifp->if_softc; 1252 struct ifreq *ifr = (struct ifreq *)data; 1253 struct ip_moptions *imo = &sc->sc_imo; 1254 struct pfsyncreq pfsyncr; 1255 struct ifnet *sifp; 1256 struct ip *ip; 1257 int error; 1258 1259 switch (cmd) { 1260 case SIOCSIFFLAGS: 1261 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1262 (ifp->if_flags & IFF_UP)) { 1263 ifp->if_flags |= IFF_RUNNING; 1264 1265 #if NCARP > 0 1266 sc->sc_initial_bulk = 1; 1267 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1268 #endif 1269 1270 pfsync_request_full_update(sc); 1271 } 1272 if ((ifp->if_flags & IFF_RUNNING) && 1273 (ifp->if_flags & IFF_UP) == 0) { 1274 ifp->if_flags &= ~IFF_RUNNING; 1275 1276 /* drop everything */ 1277 timeout_del(&sc->sc_tmo); 1278 pfsync_drop(sc); 1279 1280 pfsync_cancel_full_update(sc); 1281 } 1282 break; 1283 case SIOCSIFMTU: 1284 if (!sc->sc_sync_if || 1285 ifr->ifr_mtu <= PFSYNC_MINPKT || 1286 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1287 return (EINVAL); 1288 if (ifr->ifr_mtu < ifp->if_mtu) 1289 pfsync_sendout(); 1290 ifp->if_mtu = ifr->ifr_mtu; 1291 break; 1292 case SIOCGETPFSYNC: 1293 bzero(&pfsyncr, sizeof(pfsyncr)); 1294 if (sc->sc_sync_if) { 1295 strlcpy(pfsyncr.pfsyncr_syncdev, 1296 sc->sc_sync_if->if_xname, IFNAMSIZ); 1297 } 1298 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1299 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1300 pfsyncr.pfsyncr_defer = sc->sc_defer; 1301 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1302 1303 case SIOCSETPFSYNC: 1304 if ((error = suser(p)) != 0) 1305 return (error); 1306 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1307 return (error); 1308 1309 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1310 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1311 else 1312 sc->sc_sync_peer.s_addr = 1313 pfsyncr.pfsyncr_syncpeer.s_addr; 1314 1315 if (pfsyncr.pfsyncr_maxupdates > 255) 1316 return (EINVAL); 1317 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1318 1319 sc->sc_defer = pfsyncr.pfsyncr_defer; 1320 1321 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1322 if (sc->sc_sync_if) { 1323 hook_disestablish( 1324 sc->sc_sync_if->if_linkstatehooks, 1325 sc->sc_lhcookie); 1326 hook_disestablish( 1327 sc->sc_sync_if->if_detachhooks, 1328 sc->sc_dhcookie); 1329 } 1330 sc->sc_sync_if = NULL; 1331 if (imo->imo_num_memberships > 0) { 1332 in_delmulti(imo->imo_membership[ 1333 --imo->imo_num_memberships]); 1334 imo->imo_ifidx = 0; 1335 } 1336 break; 1337 } 1338 1339 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) 1340 return (EINVAL); 1341 1342 if (sifp->if_mtu < sc->sc_if.if_mtu || 1343 (sc->sc_sync_if != NULL && 1344 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1345 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1346 pfsync_sendout(); 1347 1348 if (sc->sc_sync_if) { 1349 hook_disestablish( 1350 sc->sc_sync_if->if_linkstatehooks, 1351 sc->sc_lhcookie); 1352 hook_disestablish( 1353 sc->sc_sync_if->if_detachhooks, 1354 sc->sc_dhcookie); 1355 } 1356 sc->sc_sync_if = sifp; 1357 1358 if (imo->imo_num_memberships > 0) { 1359 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1360 imo->imo_ifidx = 0; 1361 } 1362 1363 if (sc->sc_sync_if && 1364 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1365 struct in_addr addr; 1366 1367 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1368 sc->sc_sync_if = NULL; 1369 return (EADDRNOTAVAIL); 1370 } 1371 1372 addr.s_addr = INADDR_PFSYNC_GROUP; 1373 1374 if ((imo->imo_membership[0] = 1375 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1376 sc->sc_sync_if = NULL; 1377 return (ENOBUFS); 1378 } 1379 imo->imo_num_memberships++; 1380 imo->imo_ifidx = sc->sc_sync_if->if_index; 1381 imo->imo_ttl = PFSYNC_DFLTTL; 1382 imo->imo_loop = 0; 1383 } 1384 1385 ip = &sc->sc_template; 1386 bzero(ip, sizeof(*ip)); 1387 ip->ip_v = IPVERSION; 1388 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1389 ip->ip_tos = IPTOS_LOWDELAY; 1390 /* len and id are set later */ 1391 ip->ip_off = htons(IP_DF); 1392 ip->ip_ttl = PFSYNC_DFLTTL; 1393 ip->ip_p = IPPROTO_PFSYNC; 1394 ip->ip_src.s_addr = INADDR_ANY; 1395 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1396 1397 sc->sc_lhcookie = 1398 hook_establish(sc->sc_sync_if->if_linkstatehooks, 1, 1399 pfsync_syncdev_state, sc); 1400 sc->sc_dhcookie = hook_establish(sc->sc_sync_if->if_detachhooks, 1401 0, pfsync_ifdetach, sc); 1402 1403 pfsync_request_full_update(sc); 1404 1405 break; 1406 1407 default: 1408 return (ENOTTY); 1409 } 1410 1411 return (0); 1412 } 1413 1414 void 1415 pfsync_out_state(struct pf_state *st, void *buf) 1416 { 1417 struct pfsync_state *sp = buf; 1418 1419 pfsync_state_export(sp, st); 1420 } 1421 1422 void 1423 pfsync_out_iack(struct pf_state *st, void *buf) 1424 { 1425 struct pfsync_ins_ack *iack = buf; 1426 1427 iack->id = st->id; 1428 iack->creatorid = st->creatorid; 1429 } 1430 1431 void 1432 pfsync_out_upd_c(struct pf_state *st, void *buf) 1433 { 1434 struct pfsync_upd_c *up = buf; 1435 1436 bzero(up, sizeof(*up)); 1437 up->id = st->id; 1438 pf_state_peer_hton(&st->src, &up->src); 1439 pf_state_peer_hton(&st->dst, &up->dst); 1440 up->creatorid = st->creatorid; 1441 up->timeout = st->timeout; 1442 } 1443 1444 void 1445 pfsync_out_del(struct pf_state *st, void *buf) 1446 { 1447 struct pfsync_del_c *dp = buf; 1448 1449 dp->id = st->id; 1450 dp->creatorid = st->creatorid; 1451 1452 SET(st->state_flags, PFSTATE_NOSYNC); 1453 } 1454 1455 void 1456 pfsync_drop(struct pfsync_softc *sc) 1457 { 1458 struct pf_state *st; 1459 struct pfsync_upd_req_item *ur; 1460 struct tdb *t; 1461 int q; 1462 1463 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1464 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1465 continue; 1466 1467 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1468 #ifdef PFSYNC_DEBUG 1469 KASSERT(st->sync_state == q); 1470 #endif 1471 st->sync_state = PFSYNC_S_NONE; 1472 } 1473 TAILQ_INIT(&sc->sc_qs[q]); 1474 } 1475 1476 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1477 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1478 pool_put(&sc->sc_pool, ur); 1479 } 1480 1481 sc->sc_plus = NULL; 1482 1483 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1484 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) 1485 CLR(t->tdb_flags, TDBF_PFSYNC); 1486 1487 TAILQ_INIT(&sc->sc_tdb_q); 1488 } 1489 1490 sc->sc_len = PFSYNC_MINPKT; 1491 } 1492 1493 void 1494 pfsync_sendout(void) 1495 { 1496 struct pfsync_softc *sc = pfsyncif; 1497 #if NBPFILTER > 0 1498 struct ifnet *ifp = &sc->sc_if; 1499 #endif 1500 struct mbuf *m; 1501 struct ip *ip; 1502 struct pfsync_header *ph; 1503 struct pfsync_subheader *subh; 1504 struct pf_state *st; 1505 struct pfsync_upd_req_item *ur; 1506 struct tdb *t; 1507 1508 int offset; 1509 int q, count = 0; 1510 1511 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1512 return; 1513 1514 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1515 #if NBPFILTER > 0 1516 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1517 #else 1518 sc->sc_sync_if == NULL) { 1519 #endif 1520 pfsync_drop(sc); 1521 return; 1522 } 1523 1524 MGETHDR(m, M_DONTWAIT, MT_DATA); 1525 if (m == NULL) { 1526 sc->sc_if.if_oerrors++; 1527 pfsyncstat_inc(pfsyncs_onomem); 1528 pfsync_drop(sc); 1529 return; 1530 } 1531 1532 if (max_linkhdr + sc->sc_len > MHLEN) { 1533 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1534 if (!ISSET(m->m_flags, M_EXT)) { 1535 m_free(m); 1536 sc->sc_if.if_oerrors++; 1537 pfsyncstat_inc(pfsyncs_onomem); 1538 pfsync_drop(sc); 1539 return; 1540 } 1541 } 1542 m->m_data += max_linkhdr; 1543 m->m_len = m->m_pkthdr.len = sc->sc_len; 1544 1545 /* build the ip header */ 1546 ip = mtod(m, struct ip *); 1547 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1548 offset = sizeof(*ip); 1549 1550 ip->ip_len = htons(m->m_pkthdr.len); 1551 ip->ip_id = htons(ip_randomid()); 1552 1553 /* build the pfsync header */ 1554 ph = (struct pfsync_header *)(m->m_data + offset); 1555 bzero(ph, sizeof(*ph)); 1556 offset += sizeof(*ph); 1557 1558 ph->version = PFSYNC_VERSION; 1559 ph->len = htons(sc->sc_len - sizeof(*ip)); 1560 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1561 1562 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1563 subh = (struct pfsync_subheader *)(m->m_data + offset); 1564 offset += sizeof(*subh); 1565 1566 count = 0; 1567 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1568 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1569 1570 bcopy(&ur->ur_msg, m->m_data + offset, 1571 sizeof(ur->ur_msg)); 1572 offset += sizeof(ur->ur_msg); 1573 1574 pool_put(&sc->sc_pool, ur); 1575 1576 count++; 1577 } 1578 1579 bzero(subh, sizeof(*subh)); 1580 subh->len = sizeof(ur->ur_msg) >> 2; 1581 subh->action = PFSYNC_ACT_UPD_REQ; 1582 subh->count = htons(count); 1583 } 1584 1585 /* has someone built a custom region for us to add? */ 1586 if (sc->sc_plus != NULL) { 1587 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1588 offset += sc->sc_pluslen; 1589 1590 sc->sc_plus = NULL; 1591 } 1592 1593 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1594 subh = (struct pfsync_subheader *)(m->m_data + offset); 1595 offset += sizeof(*subh); 1596 1597 count = 0; 1598 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { 1599 pfsync_out_tdb(t, m->m_data + offset); 1600 offset += sizeof(struct pfsync_tdb); 1601 CLR(t->tdb_flags, TDBF_PFSYNC); 1602 1603 count++; 1604 } 1605 TAILQ_INIT(&sc->sc_tdb_q); 1606 1607 bzero(subh, sizeof(*subh)); 1608 subh->action = PFSYNC_ACT_TDB; 1609 subh->len = sizeof(struct pfsync_tdb) >> 2; 1610 subh->count = htons(count); 1611 } 1612 1613 /* walk the queues */ 1614 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1615 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1616 continue; 1617 1618 subh = (struct pfsync_subheader *)(m->m_data + offset); 1619 offset += sizeof(*subh); 1620 1621 count = 0; 1622 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1623 #ifdef PFSYNC_DEBUG 1624 KASSERT(st->sync_state == q); 1625 #endif 1626 pfsync_qs[q].write(st, m->m_data + offset); 1627 offset += pfsync_qs[q].len; 1628 1629 st->sync_state = PFSYNC_S_NONE; 1630 count++; 1631 } 1632 TAILQ_INIT(&sc->sc_qs[q]); 1633 1634 bzero(subh, sizeof(*subh)); 1635 subh->action = pfsync_qs[q].action; 1636 subh->len = pfsync_qs[q].len >> 2; 1637 subh->count = htons(count); 1638 } 1639 1640 /* we're done, let's put it on the wire */ 1641 #if NBPFILTER > 0 1642 if (ifp->if_bpf) { 1643 m->m_data += sizeof(*ip); 1644 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1645 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1646 m->m_data -= sizeof(*ip); 1647 m->m_len = m->m_pkthdr.len = sc->sc_len; 1648 } 1649 1650 if (sc->sc_sync_if == NULL) { 1651 sc->sc_len = PFSYNC_MINPKT; 1652 m_freem(m); 1653 return; 1654 } 1655 #endif 1656 1657 /* start again */ 1658 sc->sc_len = PFSYNC_MINPKT; 1659 1660 sc->sc_if.if_opackets++; 1661 sc->sc_if.if_obytes += m->m_pkthdr.len; 1662 1663 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1664 1665 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0) 1666 pfsyncstat_inc(pfsyncs_opackets); 1667 else 1668 pfsyncstat_inc(pfsyncs_oerrors); 1669 } 1670 1671 void 1672 pfsync_insert_state(struct pf_state *st) 1673 { 1674 struct pfsync_softc *sc = pfsyncif; 1675 1676 NET_ASSERT_LOCKED(); 1677 1678 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1679 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1680 SET(st->state_flags, PFSTATE_NOSYNC); 1681 return; 1682 } 1683 1684 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1685 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1686 return; 1687 1688 #ifdef PFSYNC_DEBUG 1689 KASSERT(st->sync_state == PFSYNC_S_NONE); 1690 #endif 1691 1692 if (sc->sc_len == PFSYNC_MINPKT) 1693 timeout_add_sec(&sc->sc_tmo, 1); 1694 1695 pfsync_q_ins(st, PFSYNC_S_INS); 1696 1697 st->sync_updates = 0; 1698 } 1699 1700 int 1701 pfsync_defer(struct pf_state *st, struct mbuf *m) 1702 { 1703 struct pfsync_softc *sc = pfsyncif; 1704 struct pfsync_deferral *pd; 1705 1706 NET_ASSERT_LOCKED(); 1707 1708 if (!sc->sc_defer || 1709 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1710 m->m_flags & (M_BCAST|M_MCAST)) 1711 return (0); 1712 1713 if (sc->sc_deferred >= 128) { 1714 pd = TAILQ_FIRST(&sc->sc_deferrals); 1715 if (timeout_del(&pd->pd_tmo)) 1716 pfsync_undefer(pd, 0); 1717 } 1718 1719 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1720 if (pd == NULL) 1721 return (0); 1722 1723 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1724 SET(st->state_flags, PFSTATE_ACK); 1725 1726 pd->pd_st = st; 1727 pd->pd_m = m; 1728 1729 sc->sc_deferred++; 1730 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1731 1732 timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd); 1733 timeout_add_msec(&pd->pd_tmo, 20); 1734 1735 schednetisr(NETISR_PFSYNC); 1736 1737 return (1); 1738 } 1739 1740 void 1741 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1742 { 1743 struct pfsync_softc *sc = pfsyncif; 1744 struct pf_pdesc pdesc; 1745 1746 NET_ASSERT_LOCKED(); 1747 1748 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1749 sc->sc_deferred--; 1750 1751 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1752 if (drop) 1753 m_freem(pd->pd_m); 1754 else { 1755 if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) { 1756 if (pf_setup_pdesc(&pdesc, 1757 pd->pd_st->key[PF_SK_WIRE]->af, 1758 pd->pd_st->direction, pd->pd_st->rt_kif, 1759 pd->pd_m, NULL) != PF_PASS) { 1760 m_freem(pd->pd_m); 1761 goto out; 1762 } 1763 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1764 case AF_INET: 1765 pf_route(&pdesc, 1766 pd->pd_st->rule.ptr, pd->pd_st); 1767 break; 1768 #ifdef INET6 1769 case AF_INET6: 1770 pf_route6(&pdesc, 1771 pd->pd_st->rule.ptr, pd->pd_st); 1772 break; 1773 #endif /* INET6 */ 1774 } 1775 pd->pd_m = pdesc.m; 1776 } else { 1777 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1778 case AF_INET: 1779 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 1780 0); 1781 break; 1782 #ifdef INET6 1783 case AF_INET6: 1784 ip6_output(pd->pd_m, NULL, NULL, 0, 1785 NULL, NULL); 1786 break; 1787 #endif /* INET6 */ 1788 } 1789 } 1790 } 1791 out: 1792 pool_put(&sc->sc_pool, pd); 1793 } 1794 1795 void 1796 pfsync_defer_tmo(void *arg) 1797 { 1798 NET_LOCK(); 1799 pfsync_undefer(arg, 0); 1800 NET_UNLOCK(); 1801 } 1802 1803 void 1804 pfsync_deferred(struct pf_state *st, int drop) 1805 { 1806 struct pfsync_softc *sc = pfsyncif; 1807 struct pfsync_deferral *pd; 1808 1809 NET_ASSERT_LOCKED(); 1810 1811 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1812 if (pd->pd_st == st) { 1813 if (timeout_del(&pd->pd_tmo)) 1814 pfsync_undefer(pd, drop); 1815 return; 1816 } 1817 } 1818 1819 panic("pfsync_deferred: unable to find deferred state"); 1820 } 1821 1822 void 1823 pfsync_update_state(struct pf_state *st) 1824 { 1825 struct pfsync_softc *sc = pfsyncif; 1826 int sync = 0; 1827 1828 NET_ASSERT_LOCKED(); 1829 1830 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1831 return; 1832 1833 if (ISSET(st->state_flags, PFSTATE_ACK)) 1834 pfsync_deferred(st, 0); 1835 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1836 if (st->sync_state != PFSYNC_S_NONE) 1837 pfsync_q_del(st); 1838 return; 1839 } 1840 1841 if (sc->sc_len == PFSYNC_MINPKT) 1842 timeout_add_sec(&sc->sc_tmo, 1); 1843 1844 switch (st->sync_state) { 1845 case PFSYNC_S_UPD_C: 1846 case PFSYNC_S_UPD: 1847 case PFSYNC_S_INS: 1848 /* we're already handling it */ 1849 1850 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1851 st->sync_updates++; 1852 if (st->sync_updates >= sc->sc_maxupdates) 1853 sync = 1; 1854 } 1855 break; 1856 1857 case PFSYNC_S_IACK: 1858 pfsync_q_del(st); 1859 case PFSYNC_S_NONE: 1860 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1861 st->sync_updates = 0; 1862 break; 1863 1864 default: 1865 panic("pfsync_update_state: unexpected sync state %d", 1866 st->sync_state); 1867 } 1868 1869 if (sync || (time_uptime - st->pfsync_time) < 2) 1870 schednetisr(NETISR_PFSYNC); 1871 } 1872 1873 void 1874 pfsync_cancel_full_update(struct pfsync_softc *sc) 1875 { 1876 if (timeout_pending(&sc->sc_bulkfail_tmo) || 1877 timeout_pending(&sc->sc_bulk_tmo)) { 1878 #if NCARP > 0 1879 if (!pfsync_sync_ok) 1880 carp_group_demote_adj(&sc->sc_if, -1, 1881 "pfsync bulk cancelled"); 1882 if (sc->sc_initial_bulk) { 1883 carp_group_demote_adj(&sc->sc_if, -32, 1884 "pfsync init"); 1885 sc->sc_initial_bulk = 0; 1886 } 1887 #endif 1888 pfsync_sync_ok = 1; 1889 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 1890 } 1891 timeout_del(&sc->sc_bulkfail_tmo); 1892 timeout_del(&sc->sc_bulk_tmo); 1893 sc->sc_bulk_next = NULL; 1894 sc->sc_bulk_last = NULL; 1895 sc->sc_ureq_sent = 0; 1896 sc->sc_bulk_tries = 0; 1897 } 1898 1899 void 1900 pfsync_request_full_update(struct pfsync_softc *sc) 1901 { 1902 if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 1903 /* Request a full state table update. */ 1904 sc->sc_ureq_sent = time_uptime; 1905 #if NCARP > 0 1906 if (!sc->sc_link_demoted && pfsync_sync_ok) 1907 carp_group_demote_adj(&sc->sc_if, 1, 1908 "pfsync bulk start"); 1909 #endif 1910 pfsync_sync_ok = 0; 1911 DPFPRINTF(LOG_INFO, "requesting bulk update"); 1912 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1913 pf_pool_limits[PF_LIMIT_STATES].limit / 1914 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1915 sizeof(struct pfsync_state))); 1916 pfsync_request_update(0, 0); 1917 } 1918 } 1919 1920 void 1921 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1922 { 1923 struct pfsync_softc *sc = pfsyncif; 1924 struct pfsync_upd_req_item *item; 1925 size_t nlen = sizeof(struct pfsync_upd_req); 1926 1927 /* 1928 * this code does nothing to prevent multiple update requests for the 1929 * same state being generated. 1930 */ 1931 1932 item = pool_get(&sc->sc_pool, PR_NOWAIT); 1933 if (item == NULL) { 1934 /* XXX stats */ 1935 return; 1936 } 1937 1938 item->ur_msg.id = id; 1939 item->ur_msg.creatorid = creatorid; 1940 1941 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1942 nlen += sizeof(struct pfsync_subheader); 1943 1944 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 1945 pfsync_sendout(); 1946 1947 nlen = sizeof(struct pfsync_subheader) + 1948 sizeof(struct pfsync_upd_req); 1949 } 1950 1951 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1952 sc->sc_len += nlen; 1953 1954 schednetisr(NETISR_PFSYNC); 1955 } 1956 1957 void 1958 pfsync_update_state_req(struct pf_state *st) 1959 { 1960 struct pfsync_softc *sc = pfsyncif; 1961 1962 if (sc == NULL) 1963 panic("pfsync_update_state_req: nonexistant instance"); 1964 1965 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1966 if (st->sync_state != PFSYNC_S_NONE) 1967 pfsync_q_del(st); 1968 return; 1969 } 1970 1971 switch (st->sync_state) { 1972 case PFSYNC_S_UPD_C: 1973 case PFSYNC_S_IACK: 1974 pfsync_q_del(st); 1975 case PFSYNC_S_NONE: 1976 pfsync_q_ins(st, PFSYNC_S_UPD); 1977 schednetisr(NETISR_PFSYNC); 1978 return; 1979 1980 case PFSYNC_S_INS: 1981 case PFSYNC_S_UPD: 1982 case PFSYNC_S_DEL: 1983 /* we're already handling it */ 1984 return; 1985 1986 default: 1987 panic("pfsync_update_state_req: unexpected sync state %d", 1988 st->sync_state); 1989 } 1990 } 1991 1992 void 1993 pfsync_delete_state(struct pf_state *st) 1994 { 1995 struct pfsync_softc *sc = pfsyncif; 1996 1997 NET_ASSERT_LOCKED(); 1998 1999 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2000 return; 2001 2002 if (ISSET(st->state_flags, PFSTATE_ACK)) 2003 pfsync_deferred(st, 1); 2004 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2005 if (st->sync_state != PFSYNC_S_NONE) 2006 pfsync_q_del(st); 2007 return; 2008 } 2009 2010 if (sc->sc_len == PFSYNC_MINPKT) 2011 timeout_add_sec(&sc->sc_tmo, 1); 2012 2013 switch (st->sync_state) { 2014 case PFSYNC_S_INS: 2015 /* we never got to tell the world so just forget about it */ 2016 pfsync_q_del(st); 2017 return; 2018 2019 case PFSYNC_S_UPD_C: 2020 case PFSYNC_S_UPD: 2021 case PFSYNC_S_IACK: 2022 pfsync_q_del(st); 2023 /* FALLTHROUGH to putting it on the del list */ 2024 2025 case PFSYNC_S_NONE: 2026 pfsync_q_ins(st, PFSYNC_S_DEL); 2027 return; 2028 2029 default: 2030 panic("pfsync_delete_state: unexpected sync state %d", 2031 st->sync_state); 2032 } 2033 } 2034 2035 void 2036 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2037 { 2038 struct pfsync_softc *sc = pfsyncif; 2039 struct { 2040 struct pfsync_subheader subh; 2041 struct pfsync_clr clr; 2042 } __packed r; 2043 2044 NET_ASSERT_LOCKED(); 2045 2046 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2047 return; 2048 2049 bzero(&r, sizeof(r)); 2050 2051 r.subh.action = PFSYNC_ACT_CLR; 2052 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2053 r.subh.count = htons(1); 2054 2055 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2056 r.clr.creatorid = creatorid; 2057 2058 pfsync_send_plus(&r, sizeof(r)); 2059 } 2060 2061 void 2062 pfsync_q_ins(struct pf_state *st, int q) 2063 { 2064 struct pfsync_softc *sc = pfsyncif; 2065 size_t nlen = pfsync_qs[q].len; 2066 2067 KASSERT(st->sync_state == PFSYNC_S_NONE); 2068 2069 #if defined(PFSYNC_DEBUG) 2070 if (sc->sc_len < PFSYNC_MINPKT) 2071 panic("pfsync pkt len is too low %d", sc->sc_len); 2072 #endif 2073 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2074 nlen += sizeof(struct pfsync_subheader); 2075 2076 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2077 pfsync_sendout(); 2078 2079 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2080 } 2081 2082 sc->sc_len += nlen; 2083 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2084 st->sync_state = q; 2085 } 2086 2087 void 2088 pfsync_q_del(struct pf_state *st) 2089 { 2090 struct pfsync_softc *sc = pfsyncif; 2091 int q = st->sync_state; 2092 2093 KASSERT(st->sync_state != PFSYNC_S_NONE); 2094 2095 sc->sc_len -= pfsync_qs[q].len; 2096 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2097 st->sync_state = PFSYNC_S_NONE; 2098 2099 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2100 sc->sc_len -= sizeof(struct pfsync_subheader); 2101 } 2102 2103 void 2104 pfsync_update_tdb(struct tdb *t, int output) 2105 { 2106 struct pfsync_softc *sc = pfsyncif; 2107 size_t nlen = sizeof(struct pfsync_tdb); 2108 2109 if (sc == NULL) 2110 return; 2111 2112 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2113 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2114 nlen += sizeof(struct pfsync_subheader); 2115 2116 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2117 pfsync_sendout(); 2118 2119 nlen = sizeof(struct pfsync_subheader) + 2120 sizeof(struct pfsync_tdb); 2121 } 2122 2123 sc->sc_len += nlen; 2124 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2125 SET(t->tdb_flags, TDBF_PFSYNC); 2126 t->tdb_updates = 0; 2127 } else { 2128 if (++t->tdb_updates >= sc->sc_maxupdates) 2129 schednetisr(NETISR_PFSYNC); 2130 } 2131 2132 if (output) 2133 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2134 else 2135 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2136 } 2137 2138 void 2139 pfsync_delete_tdb(struct tdb *t) 2140 { 2141 struct pfsync_softc *sc = pfsyncif; 2142 2143 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2144 return; 2145 2146 sc->sc_len -= sizeof(struct pfsync_tdb); 2147 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2148 CLR(t->tdb_flags, TDBF_PFSYNC); 2149 2150 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2151 sc->sc_len -= sizeof(struct pfsync_subheader); 2152 } 2153 2154 void 2155 pfsync_out_tdb(struct tdb *t, void *buf) 2156 { 2157 struct pfsync_tdb *ut = buf; 2158 2159 bzero(ut, sizeof(*ut)); 2160 ut->spi = t->tdb_spi; 2161 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2162 /* 2163 * When a failover happens, the master's rpl is probably above 2164 * what we see here (we may be up to a second late), so 2165 * increase it a bit for outbound tdbs to manage most such 2166 * situations. 2167 * 2168 * For now, just add an offset that is likely to be larger 2169 * than the number of packets we can see in one second. The RFC 2170 * just says the next packet must have a higher seq value. 2171 * 2172 * XXX What is a good algorithm for this? We could use 2173 * a rate-determined increase, but to know it, we would have 2174 * to extend struct tdb. 2175 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2176 * will soon be replaced anyway. For now, just don't handle 2177 * this edge case. 2178 */ 2179 #define RPL_INCR 16384 2180 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2181 RPL_INCR : 0)); 2182 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2183 ut->sproto = t->tdb_sproto; 2184 ut->rdomain = htons(t->tdb_rdomain); 2185 } 2186 2187 void 2188 pfsync_bulk_start(void) 2189 { 2190 struct pfsync_softc *sc = pfsyncif; 2191 2192 DPFPRINTF(LOG_INFO, "received bulk update request"); 2193 2194 if (TAILQ_EMPTY(&state_list)) 2195 pfsync_bulk_status(PFSYNC_BUS_END); 2196 else { 2197 sc->sc_ureq_received = time_uptime; 2198 2199 if (sc->sc_bulk_next == NULL) 2200 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2201 sc->sc_bulk_last = sc->sc_bulk_next; 2202 2203 pfsync_bulk_status(PFSYNC_BUS_START); 2204 timeout_add(&sc->sc_bulk_tmo, 0); 2205 } 2206 } 2207 2208 void 2209 pfsync_bulk_update(void *arg) 2210 { 2211 struct pfsync_softc *sc = arg; 2212 struct pf_state *st; 2213 int i = 0; 2214 2215 NET_LOCK(); 2216 st = sc->sc_bulk_next; 2217 2218 for (;;) { 2219 if (st->sync_state == PFSYNC_S_NONE && 2220 st->timeout < PFTM_MAX && 2221 st->pfsync_time <= sc->sc_ureq_received) { 2222 pfsync_update_state_req(st); 2223 i++; 2224 } 2225 2226 st = TAILQ_NEXT(st, entry_list); 2227 if (st == NULL) 2228 st = TAILQ_FIRST(&state_list); 2229 2230 if (st == sc->sc_bulk_last) { 2231 /* we're done */ 2232 sc->sc_bulk_next = NULL; 2233 sc->sc_bulk_last = NULL; 2234 pfsync_bulk_status(PFSYNC_BUS_END); 2235 break; 2236 } 2237 2238 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2239 sizeof(struct pfsync_state)) { 2240 /* we've filled a packet */ 2241 sc->sc_bulk_next = st; 2242 timeout_add(&sc->sc_bulk_tmo, 1); 2243 break; 2244 } 2245 } 2246 NET_UNLOCK(); 2247 } 2248 2249 void 2250 pfsync_bulk_status(u_int8_t status) 2251 { 2252 struct { 2253 struct pfsync_subheader subh; 2254 struct pfsync_bus bus; 2255 } __packed r; 2256 2257 struct pfsync_softc *sc = pfsyncif; 2258 2259 bzero(&r, sizeof(r)); 2260 2261 r.subh.action = PFSYNC_ACT_BUS; 2262 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2263 r.subh.count = htons(1); 2264 2265 r.bus.creatorid = pf_status.hostid; 2266 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2267 r.bus.status = status; 2268 2269 pfsync_send_plus(&r, sizeof(r)); 2270 } 2271 2272 void 2273 pfsync_bulk_fail(void *arg) 2274 { 2275 struct pfsync_softc *sc = arg; 2276 2277 NET_LOCK(); 2278 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2279 /* Try again */ 2280 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2281 pfsync_request_update(0, 0); 2282 } else { 2283 /* Pretend like the transfer was ok */ 2284 sc->sc_ureq_sent = 0; 2285 sc->sc_bulk_tries = 0; 2286 #if NCARP > 0 2287 if (!pfsync_sync_ok) 2288 carp_group_demote_adj(&sc->sc_if, -1, 2289 sc->sc_link_demoted ? 2290 "pfsync link state up" : 2291 "pfsync bulk fail"); 2292 if (sc->sc_initial_bulk) { 2293 carp_group_demote_adj(&sc->sc_if, -32, 2294 "pfsync init"); 2295 sc->sc_initial_bulk = 0; 2296 } 2297 #endif 2298 pfsync_sync_ok = 1; 2299 sc->sc_link_demoted = 0; 2300 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2301 } 2302 NET_UNLOCK(); 2303 } 2304 2305 void 2306 pfsync_send_plus(void *plus, size_t pluslen) 2307 { 2308 struct pfsync_softc *sc = pfsyncif; 2309 2310 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2311 pfsync_sendout(); 2312 2313 sc->sc_plus = plus; 2314 sc->sc_len += (sc->sc_pluslen = pluslen); 2315 2316 pfsync_sendout(); 2317 } 2318 2319 int 2320 pfsync_up(void) 2321 { 2322 struct pfsync_softc *sc = pfsyncif; 2323 2324 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2325 return (0); 2326 2327 return (1); 2328 } 2329 2330 int 2331 pfsync_state_in_use(struct pf_state *st) 2332 { 2333 struct pfsync_softc *sc = pfsyncif; 2334 2335 if (sc == NULL) 2336 return (0); 2337 2338 if (st->sync_state != PFSYNC_S_NONE || 2339 st == sc->sc_bulk_next || 2340 st == sc->sc_bulk_last) 2341 return (1); 2342 2343 return (0); 2344 } 2345 2346 void 2347 pfsync_timeout(void *arg) 2348 { 2349 NET_LOCK(); 2350 pfsync_sendout(); 2351 NET_UNLOCK(); 2352 } 2353 2354 /* this is a softnet/netisr handler */ 2355 void 2356 pfsyncintr(void) 2357 { 2358 pfsync_sendout(); 2359 } 2360 2361 int 2362 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp) 2363 { 2364 struct pfsyncstats pfsyncstat; 2365 2366 CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t))); 2367 memset(&pfsyncstat, 0, sizeof pfsyncstat); 2368 counters_read(pfsynccounters, (uint64_t *)&pfsyncstat, 2369 pfsyncs_ncounters); 2370 return (sysctl_rdstruct(oldp, oldlenp, newp, 2371 &pfsyncstat, sizeof(pfsyncstat))); 2372 } 2373 2374 int 2375 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2376 size_t newlen) 2377 { 2378 /* All sysctl names at this level are terminal. */ 2379 if (namelen != 1) 2380 return (ENOTDIR); 2381 2382 switch (name[0]) { 2383 case PFSYNCCTL_STATS: 2384 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp)); 2385 default: 2386 return (ENOPROTOOPT); 2387 } 2388 } 2389