1 /* $OpenBSD: if_pfsync.c,v 1.278 2020/08/24 15:30:58 kn Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_ipsp.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp6.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/udp.h> 75 76 #ifdef INET6 77 #include <netinet6/in6_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #endif /* INET6 */ 82 83 #include "carp.h" 84 #if NCARP > 0 85 #include <netinet/ip_carp.h> 86 #endif 87 88 #define PF_DEBUGNAME "pfsync: " 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_MINPKT ( \ 97 sizeof(struct ip) + \ 98 sizeof(struct pfsync_header)) 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(caddr_t, int, int, int); 104 int pfsync_in_iack(caddr_t, int, int, int); 105 int pfsync_in_upd_c(caddr_t, int, int, int); 106 int pfsync_in_ureq(caddr_t, int, int, int); 107 int pfsync_in_del(caddr_t, int, int, int); 108 int pfsync_in_del_c(caddr_t, int, int, int); 109 int pfsync_in_bus(caddr_t, int, int, int); 110 int pfsync_in_tdb(caddr_t, int, int, int); 111 int pfsync_in_ins(caddr_t, int, int, int); 112 int pfsync_in_upd(caddr_t, int, int, int); 113 int pfsync_in_eof(caddr_t, int, int, int); 114 115 int pfsync_in_error(caddr_t, int, int, int); 116 117 void pfsync_update_state_locked(struct pf_state *); 118 119 struct { 120 int (*in)(caddr_t, int, int, int); 121 size_t len; 122 } pfsync_acts[] = { 123 /* PFSYNC_ACT_CLR */ 124 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 125 /* PFSYNC_ACT_OINS */ 126 { pfsync_in_error, 0 }, 127 /* PFSYNC_ACT_INS_ACK */ 128 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 129 /* PFSYNC_ACT_OUPD */ 130 { pfsync_in_error, 0 }, 131 /* PFSYNC_ACT_UPD_C */ 132 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 133 /* PFSYNC_ACT_UPD_REQ */ 134 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 135 /* PFSYNC_ACT_DEL */ 136 { pfsync_in_del, sizeof(struct pfsync_state) }, 137 /* PFSYNC_ACT_DEL_C */ 138 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 139 /* PFSYNC_ACT_INS_F */ 140 { pfsync_in_error, 0 }, 141 /* PFSYNC_ACT_DEL_F */ 142 { pfsync_in_error, 0 }, 143 /* PFSYNC_ACT_BUS */ 144 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 145 /* PFSYNC_ACT_OTDB */ 146 { pfsync_in_error, 0 }, 147 /* PFSYNC_ACT_EOF */ 148 { pfsync_in_error, 0 }, 149 /* PFSYNC_ACT_INS */ 150 { pfsync_in_ins, sizeof(struct pfsync_state) }, 151 /* PFSYNC_ACT_UPD */ 152 { pfsync_in_upd, sizeof(struct pfsync_state) }, 153 /* PFSYNC_ACT_TDB */ 154 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 155 }; 156 157 struct pfsync_q { 158 void (*write)(struct pf_state *, void *); 159 size_t len; 160 u_int8_t action; 161 }; 162 163 /* we have one of these for every PFSYNC_S_ */ 164 void pfsync_out_state(struct pf_state *, void *); 165 void pfsync_out_iack(struct pf_state *, void *); 166 void pfsync_out_upd_c(struct pf_state *, void *); 167 void pfsync_out_del(struct pf_state *, void *); 168 169 struct pfsync_q pfsync_qs[] = { 170 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 171 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 172 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 173 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 174 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 175 }; 176 177 void pfsync_q_ins(struct pf_state *, int); 178 void pfsync_q_del(struct pf_state *); 179 180 struct pfsync_upd_req_item { 181 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 182 struct pfsync_upd_req ur_msg; 183 }; 184 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 185 186 struct pfsync_deferral { 187 TAILQ_ENTRY(pfsync_deferral) pd_entry; 188 struct pf_state *pd_st; 189 struct mbuf *pd_m; 190 struct timeout pd_tmo; 191 }; 192 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 193 194 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 195 sizeof(struct pfsync_deferral)) 196 197 void pfsync_out_tdb(struct tdb *, void *); 198 199 struct pfsync_softc { 200 struct ifnet sc_if; 201 unsigned int sc_sync_ifidx; 202 203 struct pool sc_pool; 204 205 struct ip_moptions sc_imo; 206 207 struct in_addr sc_sync_peer; 208 u_int8_t sc_maxupdates; 209 210 struct ip sc_template; 211 212 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 213 size_t sc_len; 214 215 struct pfsync_upd_reqs sc_upd_req_list; 216 217 int sc_initial_bulk; 218 int sc_link_demoted; 219 220 int sc_defer; 221 struct pfsync_deferrals sc_deferrals; 222 u_int sc_deferred; 223 224 void *sc_plus; 225 size_t sc_pluslen; 226 227 u_int32_t sc_ureq_sent; 228 int sc_bulk_tries; 229 struct timeout sc_bulkfail_tmo; 230 231 u_int32_t sc_ureq_received; 232 struct pf_state *sc_bulk_next; 233 struct pf_state *sc_bulk_last; 234 struct timeout sc_bulk_tmo; 235 236 TAILQ_HEAD(, tdb) sc_tdb_q; 237 238 struct task sc_ltask; 239 struct task sc_dtask; 240 241 struct timeout sc_tmo; 242 }; 243 244 struct pfsync_softc *pfsyncif = NULL; 245 struct cpumem *pfsynccounters; 246 247 void pfsyncattach(int); 248 int pfsync_clone_create(struct if_clone *, int); 249 int pfsync_clone_destroy(struct ifnet *); 250 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 251 struct pf_state_peer *); 252 void pfsync_update_net_tdb(struct pfsync_tdb *); 253 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 254 struct rtentry *); 255 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 256 void pfsyncstart(struct ifqueue *); 257 void pfsync_syncdev_state(void *); 258 void pfsync_ifdetach(void *); 259 260 void pfsync_deferred(struct pf_state *, int); 261 void pfsync_undefer(struct pfsync_deferral *, int); 262 void pfsync_defer_tmo(void *); 263 264 void pfsync_cancel_full_update(struct pfsync_softc *); 265 void pfsync_request_full_update(struct pfsync_softc *); 266 void pfsync_request_update(u_int32_t, u_int64_t); 267 void pfsync_update_state_req(struct pf_state *); 268 269 void pfsync_drop(struct pfsync_softc *); 270 void pfsync_sendout(void); 271 void pfsync_send_plus(void *, size_t); 272 void pfsync_timeout(void *); 273 void pfsync_tdb_timeout(void *); 274 275 void pfsync_bulk_start(void); 276 void pfsync_bulk_status(u_int8_t); 277 void pfsync_bulk_update(void *); 278 void pfsync_bulk_fail(void *); 279 #ifdef WITH_PF_LOCK 280 void pfsync_send_dispatch(void *); 281 void pfsync_send_pkt(struct mbuf *); 282 283 static struct mbuf_queue pfsync_mq; 284 static struct task pfsync_task = 285 TASK_INITIALIZER(pfsync_send_dispatch, &pfsync_mq); 286 #endif /* WITH_PF_LOCK */ 287 288 #define PFSYNC_MAX_BULKTRIES 12 289 int pfsync_sync_ok; 290 291 struct if_clone pfsync_cloner = 292 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 293 294 void 295 pfsyncattach(int npfsync) 296 { 297 if_clone_attach(&pfsync_cloner); 298 pfsynccounters = counters_alloc(pfsyncs_ncounters); 299 #ifdef WITH_PF_LOCK 300 mq_init(&pfsync_mq, 4096, IPL_SOFTNET); 301 #endif /* WITH_PF_LOCK */ 302 } 303 304 int 305 pfsync_clone_create(struct if_clone *ifc, int unit) 306 { 307 struct pfsync_softc *sc; 308 struct ifnet *ifp; 309 int q; 310 311 if (unit != 0) 312 return (EINVAL); 313 314 pfsync_sync_ok = 1; 315 316 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO); 317 for (q = 0; q < PFSYNC_S_COUNT; q++) 318 TAILQ_INIT(&sc->sc_qs[q]); 319 320 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync", 321 NULL); 322 TAILQ_INIT(&sc->sc_upd_req_list); 323 TAILQ_INIT(&sc->sc_deferrals); 324 task_set(&sc->sc_ltask, pfsync_syncdev_state, sc); 325 task_set(&sc->sc_dtask, pfsync_ifdetach, sc); 326 sc->sc_deferred = 0; 327 328 TAILQ_INIT(&sc->sc_tdb_q); 329 330 sc->sc_len = PFSYNC_MINPKT; 331 sc->sc_maxupdates = 128; 332 333 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 334 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 335 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 336 337 ifp = &sc->sc_if; 338 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 339 ifp->if_softc = sc; 340 ifp->if_ioctl = pfsyncioctl; 341 ifp->if_output = pfsyncoutput; 342 ifp->if_qstart = pfsyncstart; 343 ifp->if_type = IFT_PFSYNC; 344 ifp->if_hdrlen = sizeof(struct pfsync_header); 345 ifp->if_mtu = ETHERMTU; 346 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 347 timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL); 348 timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL); 349 timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL); 350 351 if_attach(ifp); 352 if_alloc_sadl(ifp); 353 354 #if NCARP > 0 355 if_addgroup(ifp, "carp"); 356 #endif 357 358 #if NBPFILTER > 0 359 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 360 #endif 361 362 pfsyncif = sc; 363 364 return (0); 365 } 366 367 int 368 pfsync_clone_destroy(struct ifnet *ifp) 369 { 370 struct pfsync_softc *sc = ifp->if_softc; 371 struct ifnet *ifp0; 372 struct pfsync_deferral *pd; 373 374 NET_LOCK(); 375 376 #if NCARP > 0 377 if (!pfsync_sync_ok) 378 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 379 if (sc->sc_link_demoted) 380 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 381 #endif 382 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 383 if_linkstatehook_del(ifp0, &sc->sc_ltask); 384 if_detachhook_del(ifp0, &sc->sc_dtask); 385 } 386 if_put(ifp0); 387 388 /* XXXSMP breaks atomicity */ 389 NET_UNLOCK(); 390 if_detach(ifp); 391 NET_LOCK(); 392 393 pfsync_drop(sc); 394 395 while (sc->sc_deferred > 0) { 396 pd = TAILQ_FIRST(&sc->sc_deferrals); 397 timeout_del(&pd->pd_tmo); 398 pfsync_undefer(pd, 0); 399 } 400 401 pfsyncif = NULL; 402 timeout_del(&sc->sc_bulkfail_tmo); 403 timeout_del(&sc->sc_bulk_tmo); 404 timeout_del(&sc->sc_tmo); 405 406 NET_UNLOCK(); 407 408 pool_destroy(&sc->sc_pool); 409 free(sc->sc_imo.imo_membership, M_IPMOPTS, 410 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 411 free(sc, M_DEVBUF, sizeof(*sc)); 412 413 return (0); 414 } 415 416 /* 417 * Start output on the pfsync interface. 418 */ 419 void 420 pfsyncstart(struct ifqueue *ifq) 421 { 422 ifq_purge(ifq); 423 } 424 425 void 426 pfsync_syncdev_state(void *arg) 427 { 428 struct pfsync_softc *sc = arg; 429 struct ifnet *ifp; 430 431 if ((sc->sc_if.if_flags & IFF_UP) == 0) 432 return; 433 if ((ifp = if_get(sc->sc_sync_ifidx)) == NULL) 434 return; 435 436 if (ifp->if_link_state == LINK_STATE_DOWN) { 437 sc->sc_if.if_flags &= ~IFF_RUNNING; 438 if (!sc->sc_link_demoted) { 439 #if NCARP > 0 440 carp_group_demote_adj(&sc->sc_if, 1, 441 "pfsync link state down"); 442 #endif 443 sc->sc_link_demoted = 1; 444 } 445 446 /* drop everything */ 447 timeout_del(&sc->sc_tmo); 448 pfsync_drop(sc); 449 450 pfsync_cancel_full_update(sc); 451 } else if (sc->sc_link_demoted) { 452 sc->sc_if.if_flags |= IFF_RUNNING; 453 454 pfsync_request_full_update(sc); 455 } 456 457 if_put(ifp); 458 } 459 460 void 461 pfsync_ifdetach(void *arg) 462 { 463 struct pfsync_softc *sc = arg; 464 struct ifnet *ifp; 465 466 if ((ifp = if_get(sc->sc_sync_ifidx)) != NULL) { 467 if_linkstatehook_del(ifp, &sc->sc_ltask); 468 if_detachhook_del(ifp, &sc->sc_dtask); 469 } 470 if_put(ifp); 471 472 sc->sc_sync_ifidx = 0; 473 } 474 475 int 476 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 477 struct pf_state_peer *d) 478 { 479 if (s->scrub.scrub_flag && d->scrub == NULL) { 480 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 481 if (d->scrub == NULL) 482 return (ENOMEM); 483 } 484 485 return (0); 486 } 487 488 void 489 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 490 { 491 pf_state_export(sp, st); 492 } 493 494 int 495 pfsync_state_import(struct pfsync_state *sp, int flags) 496 { 497 struct pf_state *st = NULL; 498 struct pf_state_key *skw = NULL, *sks = NULL; 499 struct pf_rule *r = NULL; 500 struct pfi_kif *kif; 501 int pool_flags; 502 int error = ENOMEM; 503 int n = 0; 504 505 if (sp->creatorid == 0) { 506 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 507 "invalid creator id: %08x", ntohl(sp->creatorid)); 508 return (EINVAL); 509 } 510 511 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 512 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 513 "unknown interface: %s", sp->ifname); 514 if (flags & PFSYNC_SI_IOCTL) 515 return (EINVAL); 516 return (0); /* skip this state */ 517 } 518 519 if (sp->af == 0) 520 return (0); /* skip this state */ 521 522 /* 523 * If the ruleset checksums match or the state is coming from the ioctl, 524 * it's safe to associate the state with the rule of that number. 525 */ 526 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 527 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 528 pf_main_ruleset.rules.active.rcount) { 529 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries) 530 if (ntohl(sp->rule) == n++) 531 break; 532 } else 533 r = &pf_default_rule; 534 535 if ((r->max_states && r->states_cur >= r->max_states)) 536 goto cleanup; 537 538 if (flags & PFSYNC_SI_IOCTL) 539 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 540 else 541 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 542 543 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 544 goto cleanup; 545 546 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 547 goto cleanup; 548 549 if ((sp->key[PF_SK_WIRE].af && 550 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 551 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 552 &sp->key[PF_SK_STACK].addr[0], sp->af) || 553 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 554 &sp->key[PF_SK_STACK].addr[1], sp->af) || 555 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 556 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 557 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 558 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 559 goto cleanup; 560 } else 561 sks = skw; 562 563 /* allocate memory for scrub info */ 564 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 565 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 566 goto cleanup; 567 568 /* copy to state key(s) */ 569 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 570 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 571 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 572 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 573 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 574 PF_REF_INIT(skw->refcnt); 575 skw->proto = sp->proto; 576 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 577 skw->af = sp->af; 578 if (sks != skw) { 579 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 580 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 581 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 582 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 583 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 584 PF_REF_INIT(sks->refcnt); 585 if (!(sks->af = sp->key[PF_SK_STACK].af)) 586 sks->af = sp->af; 587 if (sks->af != skw->af) { 588 switch (sp->proto) { 589 case IPPROTO_ICMP: 590 sks->proto = IPPROTO_ICMPV6; 591 break; 592 case IPPROTO_ICMPV6: 593 sks->proto = IPPROTO_ICMP; 594 break; 595 default: 596 sks->proto = sp->proto; 597 } 598 } else 599 sks->proto = sp->proto; 600 601 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) || 602 ((skw->af != AF_INET) && (skw->af != AF_INET6))) { 603 error = EINVAL; 604 goto cleanup; 605 } 606 607 } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { 608 error = EINVAL; 609 goto cleanup; 610 } 611 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 612 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 613 614 /* copy to state */ 615 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 616 st->creation = getuptime() - ntohl(sp->creation); 617 st->expire = getuptime(); 618 if (ntohl(sp->expire)) { 619 u_int32_t timeout; 620 621 timeout = r->timeout[sp->timeout]; 622 if (!timeout) 623 timeout = pf_default_rule.timeout[sp->timeout]; 624 625 /* sp->expire may have been adaptively scaled by export. */ 626 st->expire -= timeout - ntohl(sp->expire); 627 } 628 629 st->direction = sp->direction; 630 st->log = sp->log; 631 st->timeout = sp->timeout; 632 st->state_flags = ntohs(sp->state_flags); 633 st->max_mss = ntohs(sp->max_mss); 634 st->min_ttl = sp->min_ttl; 635 st->set_tos = sp->set_tos; 636 st->set_prio[0] = sp->set_prio[0]; 637 st->set_prio[1] = sp->set_prio[1]; 638 639 st->id = sp->id; 640 st->creatorid = sp->creatorid; 641 pf_state_peer_ntoh(&sp->src, &st->src); 642 pf_state_peer_ntoh(&sp->dst, &st->dst); 643 644 st->rule.ptr = r; 645 st->anchor.ptr = NULL; 646 st->rt_kif = NULL; 647 648 st->pfsync_time = getuptime(); 649 st->sync_state = PFSYNC_S_NONE; 650 651 refcnt_init(&st->refcnt); 652 653 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 654 r->states_cur++; 655 r->states_tot++; 656 657 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 658 SET(st->state_flags, PFSTATE_NOSYNC); 659 660 /* 661 * We just set PFSTATE_NOSYNC bit, which prevents 662 * pfsync_insert_state() to insert state to pfsync. 663 */ 664 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 665 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 666 r->states_cur--; 667 error = EEXIST; 668 goto cleanup_state; 669 } 670 671 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 672 CLR(st->state_flags, PFSTATE_NOSYNC); 673 if (ISSET(st->state_flags, PFSTATE_ACK)) { 674 pfsync_q_ins(st, PFSYNC_S_IACK); 675 schednetisr(NETISR_PFSYNC); 676 } 677 } 678 CLR(st->state_flags, PFSTATE_ACK); 679 680 return (0); 681 682 cleanup: 683 if (skw == sks) 684 sks = NULL; 685 if (skw != NULL) 686 pool_put(&pf_state_key_pl, skw); 687 if (sks != NULL) 688 pool_put(&pf_state_key_pl, sks); 689 690 cleanup_state: /* pf_state_insert frees the state keys */ 691 if (st) { 692 if (st->dst.scrub) 693 pool_put(&pf_state_scrub_pl, st->dst.scrub); 694 if (st->src.scrub) 695 pool_put(&pf_state_scrub_pl, st->src.scrub); 696 pool_put(&pf_state_pl, st); 697 } 698 return (error); 699 } 700 701 int 702 pfsync_input(struct mbuf **mp, int *offp, int proto, int af) 703 { 704 struct mbuf *n, *m = *mp; 705 struct pfsync_softc *sc = pfsyncif; 706 struct ip *ip = mtod(m, struct ip *); 707 struct pfsync_header *ph; 708 struct pfsync_subheader subh; 709 int offset, noff, len, count, mlen, flags = 0; 710 int e; 711 712 NET_ASSERT_LOCKED(); 713 714 pfsyncstat_inc(pfsyncs_ipackets); 715 716 /* verify that we have a sync interface configured */ 717 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 718 sc->sc_sync_ifidx == 0 || !pf_status.running) 719 goto done; 720 721 /* verify that the packet came in on the right interface */ 722 if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) { 723 pfsyncstat_inc(pfsyncs_badif); 724 goto done; 725 } 726 727 sc->sc_if.if_ipackets++; 728 sc->sc_if.if_ibytes += m->m_pkthdr.len; 729 730 /* verify that the IP TTL is 255. */ 731 if (ip->ip_ttl != PFSYNC_DFLTTL) { 732 pfsyncstat_inc(pfsyncs_badttl); 733 goto done; 734 } 735 736 offset = ip->ip_hl << 2; 737 n = m_pulldown(m, offset, sizeof(*ph), &noff); 738 if (n == NULL) { 739 pfsyncstat_inc(pfsyncs_hdrops); 740 return IPPROTO_DONE; 741 } 742 ph = (struct pfsync_header *)(n->m_data + noff); 743 744 /* verify the version */ 745 if (ph->version != PFSYNC_VERSION) { 746 pfsyncstat_inc(pfsyncs_badver); 747 goto done; 748 } 749 len = ntohs(ph->len) + offset; 750 if (m->m_pkthdr.len < len) { 751 pfsyncstat_inc(pfsyncs_badlen); 752 goto done; 753 } 754 755 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 756 flags = PFSYNC_SI_CKSUM; 757 758 offset += sizeof(*ph); 759 while (offset <= len - sizeof(subh)) { 760 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 761 offset += sizeof(subh); 762 763 mlen = subh.len << 2; 764 count = ntohs(subh.count); 765 766 if (subh.action >= PFSYNC_ACT_MAX || 767 subh.action >= nitems(pfsync_acts) || 768 mlen < pfsync_acts[subh.action].len) { 769 /* 770 * subheaders are always followed by at least one 771 * message, so if the peer is new 772 * enough to tell us how big its messages are then we 773 * know enough to skip them. 774 */ 775 if (count > 0 && mlen > 0) { 776 offset += count * mlen; 777 continue; 778 } 779 pfsyncstat_inc(pfsyncs_badact); 780 goto done; 781 } 782 783 n = m_pulldown(m, offset, mlen * count, &noff); 784 if (n == NULL) { 785 pfsyncstat_inc(pfsyncs_badlen); 786 return IPPROTO_DONE; 787 } 788 789 PF_LOCK(); 790 e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, 791 flags); 792 PF_UNLOCK(); 793 if (e != 0) 794 goto done; 795 796 offset += mlen * count; 797 } 798 799 done: 800 m_freem(m); 801 return IPPROTO_DONE; 802 } 803 804 int 805 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 806 { 807 struct pfsync_clr *clr; 808 struct pf_state *st, *nexts; 809 struct pfi_kif *kif; 810 u_int32_t creatorid; 811 int i; 812 813 for (i = 0; i < count; i++) { 814 clr = (struct pfsync_clr *)buf + len * i; 815 kif = NULL; 816 creatorid = clr->creatorid; 817 if (strlen(clr->ifname) && 818 (kif = pfi_kif_find(clr->ifname)) == NULL) 819 continue; 820 821 for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { 822 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 823 if (st->creatorid == creatorid && 824 ((kif && st->kif == kif) || !kif)) { 825 SET(st->state_flags, PFSTATE_NOSYNC); 826 pf_remove_state(st); 827 } 828 } 829 } 830 831 return (0); 832 } 833 834 int 835 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 836 { 837 struct pfsync_state *sp; 838 sa_family_t af1, af2; 839 int i; 840 841 for (i = 0; i < count; i++) { 842 sp = (struct pfsync_state *)(buf + len * i); 843 af1 = sp->key[0].af; 844 af2 = sp->key[1].af; 845 846 /* check for invalid values */ 847 if (sp->timeout >= PFTM_MAX || 848 sp->src.state > PF_TCPS_PROXY_DST || 849 sp->dst.state > PF_TCPS_PROXY_DST || 850 sp->direction > PF_OUT || 851 (((af1 || af2) && 852 ((af1 != AF_INET && af1 != AF_INET6) || 853 (af2 != AF_INET && af2 != AF_INET6))) || 854 (sp->af != AF_INET && sp->af != AF_INET6))) { 855 DPFPRINTF(LOG_NOTICE, 856 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 857 pfsyncstat_inc(pfsyncs_badval); 858 continue; 859 } 860 861 if (pfsync_state_import(sp, flags) == ENOMEM) { 862 /* drop out, but process the rest of the actions */ 863 break; 864 } 865 } 866 867 return (0); 868 } 869 870 int 871 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 872 { 873 struct pfsync_ins_ack *ia; 874 struct pf_state_cmp id_key; 875 struct pf_state *st; 876 int i; 877 878 for (i = 0; i < count; i++) { 879 ia = (struct pfsync_ins_ack *)(buf + len * i); 880 881 id_key.id = ia->id; 882 id_key.creatorid = ia->creatorid; 883 884 st = pf_find_state_byid(&id_key); 885 if (st == NULL) 886 continue; 887 888 if (ISSET(st->state_flags, PFSTATE_ACK)) 889 pfsync_deferred(st, 0); 890 } 891 892 return (0); 893 } 894 895 int 896 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 897 struct pfsync_state_peer *dst) 898 { 899 int sync = 0; 900 901 /* 902 * The state should never go backwards except 903 * for syn-proxy states. Neither should the 904 * sequence window slide backwards. 905 */ 906 if ((st->src.state > src->state && 907 (st->src.state < PF_TCPS_PROXY_SRC || 908 src->state >= PF_TCPS_PROXY_SRC)) || 909 910 (st->src.state == src->state && 911 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 912 sync++; 913 else 914 pf_state_peer_ntoh(src, &st->src); 915 916 if ((st->dst.state > dst->state) || 917 918 (st->dst.state >= TCPS_SYN_SENT && 919 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 920 sync++; 921 else 922 pf_state_peer_ntoh(dst, &st->dst); 923 924 return (sync); 925 } 926 927 int 928 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 929 { 930 struct pfsync_state *sp; 931 struct pf_state_cmp id_key; 932 struct pf_state *st; 933 int sync; 934 935 int i; 936 937 for (i = 0; i < count; i++) { 938 sp = (struct pfsync_state *)(buf + len * i); 939 940 /* check for invalid values */ 941 if (sp->timeout >= PFTM_MAX || 942 sp->src.state > PF_TCPS_PROXY_DST || 943 sp->dst.state > PF_TCPS_PROXY_DST) { 944 DPFPRINTF(LOG_NOTICE, 945 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 946 pfsyncstat_inc(pfsyncs_badval); 947 continue; 948 } 949 950 id_key.id = sp->id; 951 id_key.creatorid = sp->creatorid; 952 953 st = pf_find_state_byid(&id_key); 954 if (st == NULL) { 955 /* insert the update */ 956 if (pfsync_state_import(sp, flags)) 957 pfsyncstat_inc(pfsyncs_badstate); 958 continue; 959 } 960 961 if (ISSET(st->state_flags, PFSTATE_ACK)) 962 pfsync_deferred(st, 1); 963 964 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 965 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 966 else { 967 sync = 0; 968 969 /* 970 * Non-TCP protocol state machine always go 971 * forwards 972 */ 973 if (st->src.state > sp->src.state) 974 sync++; 975 else 976 pf_state_peer_ntoh(&sp->src, &st->src); 977 978 if (st->dst.state > sp->dst.state) 979 sync++; 980 else 981 pf_state_peer_ntoh(&sp->dst, &st->dst); 982 } 983 984 if (sync < 2) { 985 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 986 pf_state_peer_ntoh(&sp->dst, &st->dst); 987 st->expire = getuptime(); 988 st->timeout = sp->timeout; 989 } 990 st->pfsync_time = getuptime(); 991 992 if (sync) { 993 pfsyncstat_inc(pfsyncs_stale); 994 995 pfsync_update_state_locked(st); 996 schednetisr(NETISR_PFSYNC); 997 } 998 } 999 1000 return (0); 1001 } 1002 1003 int 1004 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 1005 { 1006 struct pfsync_upd_c *up; 1007 struct pf_state_cmp id_key; 1008 struct pf_state *st; 1009 1010 int sync; 1011 1012 int i; 1013 1014 for (i = 0; i < count; i++) { 1015 up = (struct pfsync_upd_c *)(buf + len * i); 1016 1017 /* check for invalid values */ 1018 if (up->timeout >= PFTM_MAX || 1019 up->src.state > PF_TCPS_PROXY_DST || 1020 up->dst.state > PF_TCPS_PROXY_DST) { 1021 DPFPRINTF(LOG_NOTICE, 1022 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 1023 pfsyncstat_inc(pfsyncs_badval); 1024 continue; 1025 } 1026 1027 id_key.id = up->id; 1028 id_key.creatorid = up->creatorid; 1029 1030 st = pf_find_state_byid(&id_key); 1031 if (st == NULL) { 1032 /* We don't have this state. Ask for it. */ 1033 pfsync_request_update(id_key.creatorid, id_key.id); 1034 continue; 1035 } 1036 1037 if (ISSET(st->state_flags, PFSTATE_ACK)) 1038 pfsync_deferred(st, 1); 1039 1040 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1041 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1042 else { 1043 sync = 0; 1044 /* 1045 * Non-TCP protocol state machine always go 1046 * forwards 1047 */ 1048 if (st->src.state > up->src.state) 1049 sync++; 1050 else 1051 pf_state_peer_ntoh(&up->src, &st->src); 1052 1053 if (st->dst.state > up->dst.state) 1054 sync++; 1055 else 1056 pf_state_peer_ntoh(&up->dst, &st->dst); 1057 } 1058 if (sync < 2) { 1059 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1060 pf_state_peer_ntoh(&up->dst, &st->dst); 1061 st->expire = getuptime(); 1062 st->timeout = up->timeout; 1063 } 1064 st->pfsync_time = getuptime(); 1065 1066 if (sync) { 1067 pfsyncstat_inc(pfsyncs_stale); 1068 1069 pfsync_update_state_locked(st); 1070 schednetisr(NETISR_PFSYNC); 1071 } 1072 } 1073 1074 return (0); 1075 } 1076 1077 int 1078 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1079 { 1080 struct pfsync_upd_req *ur; 1081 int i; 1082 1083 struct pf_state_cmp id_key; 1084 struct pf_state *st; 1085 1086 for (i = 0; i < count; i++) { 1087 ur = (struct pfsync_upd_req *)(buf + len * i); 1088 1089 id_key.id = ur->id; 1090 id_key.creatorid = ur->creatorid; 1091 1092 if (id_key.id == 0 && id_key.creatorid == 0) 1093 pfsync_bulk_start(); 1094 else { 1095 st = pf_find_state_byid(&id_key); 1096 if (st == NULL) { 1097 pfsyncstat_inc(pfsyncs_badstate); 1098 continue; 1099 } 1100 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1101 continue; 1102 1103 pfsync_update_state_req(st); 1104 } 1105 } 1106 1107 return (0); 1108 } 1109 1110 int 1111 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1112 { 1113 struct pfsync_state *sp; 1114 struct pf_state_cmp id_key; 1115 struct pf_state *st; 1116 int i; 1117 1118 for (i = 0; i < count; i++) { 1119 sp = (struct pfsync_state *)(buf + len * i); 1120 1121 id_key.id = sp->id; 1122 id_key.creatorid = sp->creatorid; 1123 1124 st = pf_find_state_byid(&id_key); 1125 if (st == NULL) { 1126 pfsyncstat_inc(pfsyncs_badstate); 1127 continue; 1128 } 1129 SET(st->state_flags, PFSTATE_NOSYNC); 1130 pf_remove_state(st); 1131 } 1132 1133 return (0); 1134 } 1135 1136 int 1137 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1138 { 1139 struct pfsync_del_c *sp; 1140 struct pf_state_cmp id_key; 1141 struct pf_state *st; 1142 int i; 1143 1144 for (i = 0; i < count; i++) { 1145 sp = (struct pfsync_del_c *)(buf + len * i); 1146 1147 id_key.id = sp->id; 1148 id_key.creatorid = sp->creatorid; 1149 1150 st = pf_find_state_byid(&id_key); 1151 if (st == NULL) { 1152 pfsyncstat_inc(pfsyncs_badstate); 1153 continue; 1154 } 1155 1156 SET(st->state_flags, PFSTATE_NOSYNC); 1157 pf_remove_state(st); 1158 } 1159 1160 return (0); 1161 } 1162 1163 int 1164 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1165 { 1166 struct pfsync_softc *sc = pfsyncif; 1167 struct pfsync_bus *bus; 1168 1169 /* If we're not waiting for a bulk update, who cares. */ 1170 if (sc->sc_ureq_sent == 0) 1171 return (0); 1172 1173 bus = (struct pfsync_bus *)buf; 1174 1175 switch (bus->status) { 1176 case PFSYNC_BUS_START: 1177 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1178 pf_pool_limits[PF_LIMIT_STATES].limit / 1179 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1180 sizeof(struct pfsync_state))); 1181 DPFPRINTF(LOG_INFO, "received bulk update start"); 1182 break; 1183 1184 case PFSYNC_BUS_END: 1185 if (getuptime() - ntohl(bus->endtime) >= 1186 sc->sc_ureq_sent) { 1187 /* that's it, we're happy */ 1188 sc->sc_ureq_sent = 0; 1189 sc->sc_bulk_tries = 0; 1190 timeout_del(&sc->sc_bulkfail_tmo); 1191 #if NCARP > 0 1192 if (!pfsync_sync_ok) 1193 carp_group_demote_adj(&sc->sc_if, -1, 1194 sc->sc_link_demoted ? 1195 "pfsync link state up" : 1196 "pfsync bulk done"); 1197 if (sc->sc_initial_bulk) { 1198 carp_group_demote_adj(&sc->sc_if, -32, 1199 "pfsync init"); 1200 sc->sc_initial_bulk = 0; 1201 } 1202 #endif 1203 pfsync_sync_ok = 1; 1204 sc->sc_link_demoted = 0; 1205 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1206 } else { 1207 DPFPRINTF(LOG_WARNING, "received invalid " 1208 "bulk update end: bad timestamp"); 1209 } 1210 break; 1211 } 1212 1213 return (0); 1214 } 1215 1216 int 1217 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1218 { 1219 #if defined(IPSEC) 1220 struct pfsync_tdb *tp; 1221 int i; 1222 1223 for (i = 0; i < count; i++) { 1224 tp = (struct pfsync_tdb *)(buf + len * i); 1225 pfsync_update_net_tdb(tp); 1226 } 1227 #endif 1228 1229 return (0); 1230 } 1231 1232 #if defined(IPSEC) 1233 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1234 void 1235 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1236 { 1237 struct tdb *tdb; 1238 1239 NET_ASSERT_LOCKED(); 1240 1241 /* check for invalid values */ 1242 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1243 (pt->dst.sa.sa_family != AF_INET && 1244 pt->dst.sa.sa_family != AF_INET6)) 1245 goto bad; 1246 1247 tdb = gettdb(ntohs(pt->rdomain), pt->spi, 1248 (union sockaddr_union *)&pt->dst, pt->sproto); 1249 if (tdb) { 1250 pt->rpl = betoh64(pt->rpl); 1251 pt->cur_bytes = betoh64(pt->cur_bytes); 1252 1253 /* Neither replay nor byte counter should ever decrease. */ 1254 if (pt->rpl < tdb->tdb_rpl || 1255 pt->cur_bytes < tdb->tdb_cur_bytes) { 1256 goto bad; 1257 } 1258 1259 tdb->tdb_rpl = pt->rpl; 1260 tdb->tdb_cur_bytes = pt->cur_bytes; 1261 } 1262 return; 1263 1264 bad: 1265 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1266 "invalid value"); 1267 pfsyncstat_inc(pfsyncs_badstate); 1268 return; 1269 } 1270 #endif 1271 1272 1273 int 1274 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1275 { 1276 if (len > 0 || count > 0) 1277 pfsyncstat_inc(pfsyncs_badact); 1278 1279 /* we're done. let the caller return */ 1280 return (1); 1281 } 1282 1283 int 1284 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1285 { 1286 pfsyncstat_inc(pfsyncs_badact); 1287 return (-1); 1288 } 1289 1290 int 1291 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1292 struct rtentry *rt) 1293 { 1294 m_freem(m); /* drop packet */ 1295 return (EAFNOSUPPORT); 1296 } 1297 1298 int 1299 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1300 { 1301 struct proc *p = curproc; 1302 struct pfsync_softc *sc = ifp->if_softc; 1303 struct ifreq *ifr = (struct ifreq *)data; 1304 struct ip_moptions *imo = &sc->sc_imo; 1305 struct pfsyncreq pfsyncr; 1306 struct ifnet *ifp0, *sifp; 1307 struct ip *ip; 1308 int error; 1309 1310 switch (cmd) { 1311 case SIOCSIFFLAGS: 1312 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1313 (ifp->if_flags & IFF_UP)) { 1314 ifp->if_flags |= IFF_RUNNING; 1315 1316 #if NCARP > 0 1317 sc->sc_initial_bulk = 1; 1318 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1319 #endif 1320 1321 pfsync_request_full_update(sc); 1322 } 1323 if ((ifp->if_flags & IFF_RUNNING) && 1324 (ifp->if_flags & IFF_UP) == 0) { 1325 ifp->if_flags &= ~IFF_RUNNING; 1326 1327 /* drop everything */ 1328 timeout_del(&sc->sc_tmo); 1329 pfsync_drop(sc); 1330 1331 pfsync_cancel_full_update(sc); 1332 } 1333 break; 1334 case SIOCSIFMTU: 1335 if ((ifp0 = if_get(sc->sc_sync_ifidx)) == NULL) 1336 return (EINVAL); 1337 error = 0; 1338 if (ifr->ifr_mtu <= PFSYNC_MINPKT || 1339 ifr->ifr_mtu > ifp0->if_mtu) { 1340 error = EINVAL; 1341 } 1342 if_put(ifp0); 1343 if (error) 1344 return error; 1345 if (ifr->ifr_mtu < ifp->if_mtu) 1346 pfsync_sendout(); 1347 ifp->if_mtu = ifr->ifr_mtu; 1348 break; 1349 case SIOCGETPFSYNC: 1350 bzero(&pfsyncr, sizeof(pfsyncr)); 1351 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 1352 strlcpy(pfsyncr.pfsyncr_syncdev, 1353 ifp0->if_xname, IFNAMSIZ); 1354 } 1355 if_put(ifp0); 1356 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1357 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1358 pfsyncr.pfsyncr_defer = sc->sc_defer; 1359 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1360 1361 case SIOCSETPFSYNC: 1362 if ((error = suser(p)) != 0) 1363 return (error); 1364 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1365 return (error); 1366 1367 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1368 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1369 else 1370 sc->sc_sync_peer.s_addr = 1371 pfsyncr.pfsyncr_syncpeer.s_addr; 1372 1373 if (pfsyncr.pfsyncr_maxupdates > 255) 1374 return (EINVAL); 1375 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1376 1377 sc->sc_defer = pfsyncr.pfsyncr_defer; 1378 1379 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1380 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { 1381 if_linkstatehook_del(ifp0, &sc->sc_ltask); 1382 if_detachhook_del(ifp0, &sc->sc_dtask); 1383 } 1384 if_put(ifp0); 1385 sc->sc_sync_ifidx = 0; 1386 if (imo->imo_num_memberships > 0) { 1387 in_delmulti(imo->imo_membership[ 1388 --imo->imo_num_memberships]); 1389 imo->imo_ifidx = 0; 1390 } 1391 break; 1392 } 1393 1394 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) 1395 return (EINVAL); 1396 1397 ifp0 = if_get(sc->sc_sync_ifidx); 1398 1399 if (sifp->if_mtu < sc->sc_if.if_mtu || (ifp0 != NULL && 1400 sifp->if_mtu < ifp0->if_mtu) || 1401 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1402 pfsync_sendout(); 1403 1404 if (ifp0) { 1405 if_linkstatehook_del(ifp0, &sc->sc_ltask); 1406 if_detachhook_del(ifp0, &sc->sc_dtask); 1407 } 1408 if_put(ifp0); 1409 sc->sc_sync_ifidx = sifp->if_index; 1410 1411 if (imo->imo_num_memberships > 0) { 1412 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1413 imo->imo_ifidx = 0; 1414 } 1415 1416 if (sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1417 struct in_addr addr; 1418 1419 if (!(sifp->if_flags & IFF_MULTICAST)) { 1420 sc->sc_sync_ifidx = 0; 1421 return (EADDRNOTAVAIL); 1422 } 1423 1424 addr.s_addr = INADDR_PFSYNC_GROUP; 1425 1426 if ((imo->imo_membership[0] = 1427 in_addmulti(&addr, sifp)) == NULL) { 1428 sc->sc_sync_ifidx = 0; 1429 return (ENOBUFS); 1430 } 1431 imo->imo_num_memberships++; 1432 imo->imo_ifidx = sc->sc_sync_ifidx; 1433 imo->imo_ttl = PFSYNC_DFLTTL; 1434 imo->imo_loop = 0; 1435 } 1436 1437 ip = &sc->sc_template; 1438 bzero(ip, sizeof(*ip)); 1439 ip->ip_v = IPVERSION; 1440 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1441 ip->ip_tos = IPTOS_LOWDELAY; 1442 /* len and id are set later */ 1443 ip->ip_off = htons(IP_DF); 1444 ip->ip_ttl = PFSYNC_DFLTTL; 1445 ip->ip_p = IPPROTO_PFSYNC; 1446 ip->ip_src.s_addr = INADDR_ANY; 1447 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1448 1449 if_linkstatehook_add(sifp, &sc->sc_ltask); 1450 if_detachhook_add(sifp, &sc->sc_dtask); 1451 1452 pfsync_request_full_update(sc); 1453 1454 break; 1455 1456 default: 1457 return (ENOTTY); 1458 } 1459 1460 return (0); 1461 } 1462 1463 void 1464 pfsync_out_state(struct pf_state *st, void *buf) 1465 { 1466 struct pfsync_state *sp = buf; 1467 1468 pfsync_state_export(sp, st); 1469 } 1470 1471 void 1472 pfsync_out_iack(struct pf_state *st, void *buf) 1473 { 1474 struct pfsync_ins_ack *iack = buf; 1475 1476 iack->id = st->id; 1477 iack->creatorid = st->creatorid; 1478 } 1479 1480 void 1481 pfsync_out_upd_c(struct pf_state *st, void *buf) 1482 { 1483 struct pfsync_upd_c *up = buf; 1484 1485 bzero(up, sizeof(*up)); 1486 up->id = st->id; 1487 pf_state_peer_hton(&st->src, &up->src); 1488 pf_state_peer_hton(&st->dst, &up->dst); 1489 up->creatorid = st->creatorid; 1490 up->timeout = st->timeout; 1491 } 1492 1493 void 1494 pfsync_out_del(struct pf_state *st, void *buf) 1495 { 1496 struct pfsync_del_c *dp = buf; 1497 1498 dp->id = st->id; 1499 dp->creatorid = st->creatorid; 1500 1501 SET(st->state_flags, PFSTATE_NOSYNC); 1502 } 1503 1504 void 1505 pfsync_drop(struct pfsync_softc *sc) 1506 { 1507 struct pf_state *st; 1508 struct pfsync_upd_req_item *ur; 1509 struct tdb *t; 1510 int q; 1511 1512 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1513 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1514 continue; 1515 1516 while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { 1517 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 1518 #ifdef PFSYNC_DEBUG 1519 KASSERT(st->sync_state == q); 1520 #endif 1521 st->sync_state = PFSYNC_S_NONE; 1522 pf_state_unref(st); 1523 } 1524 } 1525 1526 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1527 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1528 pool_put(&sc->sc_pool, ur); 1529 } 1530 1531 sc->sc_plus = NULL; 1532 1533 while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { 1534 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 1535 CLR(t->tdb_flags, TDBF_PFSYNC); 1536 } 1537 1538 sc->sc_len = PFSYNC_MINPKT; 1539 } 1540 1541 #ifdef WITH_PF_LOCK 1542 void 1543 pfsync_send_dispatch(void *xmq) 1544 { 1545 struct mbuf_queue *mq = xmq; 1546 struct pfsync_softc *sc; 1547 struct mbuf *m; 1548 struct mbuf_list ml; 1549 int error; 1550 1551 mq_delist(mq, &ml); 1552 if (ml_empty(&ml)) 1553 return; 1554 1555 NET_LOCK(); 1556 sc = pfsyncif; 1557 if (sc == NULL) { 1558 ml_purge(&ml); 1559 goto done; 1560 } 1561 1562 while ((m = ml_dequeue(&ml)) != NULL) { 1563 if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1564 &sc->sc_imo, NULL, 0)) == 0) 1565 pfsyncstat_inc(pfsyncs_opackets); 1566 else { 1567 DPFPRINTF(LOG_DEBUG, 1568 "ip_output() @ %s failed (%d)\n", __func__, error); 1569 pfsyncstat_inc(pfsyncs_oerrors); 1570 } 1571 } 1572 done: 1573 NET_UNLOCK(); 1574 } 1575 1576 void 1577 pfsync_send_pkt(struct mbuf *m) 1578 { 1579 if (mq_enqueue(&pfsync_mq, m) != 0) { 1580 pfsyncstat_inc(pfsyncs_oerrors); 1581 DPFPRINTF(LOG_DEBUG, "mq_enqueue() @ %s failed, queue full\n", 1582 __func__); 1583 } else 1584 task_add(net_tq(0), &pfsync_task); 1585 } 1586 #endif /* WITH_PF_LOCK */ 1587 1588 void 1589 pfsync_sendout(void) 1590 { 1591 struct pfsync_softc *sc = pfsyncif; 1592 #if NBPFILTER > 0 1593 struct ifnet *ifp = &sc->sc_if; 1594 #endif 1595 struct mbuf *m; 1596 struct ip *ip; 1597 struct pfsync_header *ph; 1598 struct pfsync_subheader *subh; 1599 struct pf_state *st; 1600 struct pfsync_upd_req_item *ur; 1601 struct tdb *t; 1602 1603 int offset; 1604 int q, count = 0; 1605 1606 PF_ASSERT_LOCKED(); 1607 1608 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1609 return; 1610 1611 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1612 #if NBPFILTER > 0 1613 (ifp->if_bpf == NULL && sc->sc_sync_ifidx == 0)) { 1614 #else 1615 sc->sc_sync_ifidx == 0) { 1616 #endif 1617 pfsync_drop(sc); 1618 return; 1619 } 1620 1621 MGETHDR(m, M_DONTWAIT, MT_DATA); 1622 if (m == NULL) { 1623 sc->sc_if.if_oerrors++; 1624 pfsyncstat_inc(pfsyncs_onomem); 1625 pfsync_drop(sc); 1626 return; 1627 } 1628 1629 if (max_linkhdr + sc->sc_len > MHLEN) { 1630 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1631 if (!ISSET(m->m_flags, M_EXT)) { 1632 m_free(m); 1633 sc->sc_if.if_oerrors++; 1634 pfsyncstat_inc(pfsyncs_onomem); 1635 pfsync_drop(sc); 1636 return; 1637 } 1638 } 1639 m->m_data += max_linkhdr; 1640 m->m_len = m->m_pkthdr.len = sc->sc_len; 1641 1642 /* build the ip header */ 1643 ip = mtod(m, struct ip *); 1644 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1645 offset = sizeof(*ip); 1646 1647 ip->ip_len = htons(m->m_pkthdr.len); 1648 ip->ip_id = htons(ip_randomid()); 1649 1650 /* build the pfsync header */ 1651 ph = (struct pfsync_header *)(m->m_data + offset); 1652 bzero(ph, sizeof(*ph)); 1653 offset += sizeof(*ph); 1654 1655 ph->version = PFSYNC_VERSION; 1656 ph->len = htons(sc->sc_len - sizeof(*ip)); 1657 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1658 1659 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1660 subh = (struct pfsync_subheader *)(m->m_data + offset); 1661 offset += sizeof(*subh); 1662 1663 count = 0; 1664 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1665 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1666 1667 bcopy(&ur->ur_msg, m->m_data + offset, 1668 sizeof(ur->ur_msg)); 1669 offset += sizeof(ur->ur_msg); 1670 1671 pool_put(&sc->sc_pool, ur); 1672 1673 count++; 1674 } 1675 1676 bzero(subh, sizeof(*subh)); 1677 subh->len = sizeof(ur->ur_msg) >> 2; 1678 subh->action = PFSYNC_ACT_UPD_REQ; 1679 subh->count = htons(count); 1680 } 1681 1682 /* has someone built a custom region for us to add? */ 1683 if (sc->sc_plus != NULL) { 1684 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1685 offset += sc->sc_pluslen; 1686 1687 sc->sc_plus = NULL; 1688 } 1689 1690 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1691 subh = (struct pfsync_subheader *)(m->m_data + offset); 1692 offset += sizeof(*subh); 1693 1694 count = 0; 1695 while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { 1696 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 1697 pfsync_out_tdb(t, m->m_data + offset); 1698 offset += sizeof(struct pfsync_tdb); 1699 CLR(t->tdb_flags, TDBF_PFSYNC); 1700 count++; 1701 } 1702 1703 bzero(subh, sizeof(*subh)); 1704 subh->action = PFSYNC_ACT_TDB; 1705 subh->len = sizeof(struct pfsync_tdb) >> 2; 1706 subh->count = htons(count); 1707 } 1708 1709 /* walk the queues */ 1710 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1711 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1712 continue; 1713 1714 subh = (struct pfsync_subheader *)(m->m_data + offset); 1715 offset += sizeof(*subh); 1716 1717 count = 0; 1718 while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { 1719 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 1720 #ifdef PFSYNC_DEBUG 1721 KASSERT(st->sync_state == q); 1722 #endif 1723 st->sync_state = PFSYNC_S_NONE; 1724 pfsync_qs[q].write(st, m->m_data + offset); 1725 offset += pfsync_qs[q].len; 1726 1727 pf_state_unref(st); 1728 count++; 1729 } 1730 1731 bzero(subh, sizeof(*subh)); 1732 subh->action = pfsync_qs[q].action; 1733 subh->len = pfsync_qs[q].len >> 2; 1734 subh->count = htons(count); 1735 } 1736 1737 /* we're done, let's put it on the wire */ 1738 #if NBPFILTER > 0 1739 if (ifp->if_bpf) { 1740 m->m_data += sizeof(*ip); 1741 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1742 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1743 m->m_data -= sizeof(*ip); 1744 m->m_len = m->m_pkthdr.len = sc->sc_len; 1745 } 1746 1747 if (sc->sc_sync_ifidx == 0) { 1748 sc->sc_len = PFSYNC_MINPKT; 1749 m_freem(m); 1750 return; 1751 } 1752 #endif 1753 1754 /* start again */ 1755 sc->sc_len = PFSYNC_MINPKT; 1756 1757 sc->sc_if.if_opackets++; 1758 sc->sc_if.if_obytes += m->m_pkthdr.len; 1759 1760 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1761 1762 #ifdef WITH_PF_LOCK 1763 pfsync_send_pkt(m); 1764 #else /* !WITH_PF_LOCK */ 1765 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0) 1766 pfsyncstat_inc(pfsyncs_opackets); 1767 else 1768 pfsyncstat_inc(pfsyncs_oerrors); 1769 #endif /* WITH_PF_LOCK */ 1770 } 1771 1772 void 1773 pfsync_insert_state(struct pf_state *st) 1774 { 1775 struct pfsync_softc *sc = pfsyncif; 1776 1777 NET_ASSERT_LOCKED(); 1778 1779 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1780 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1781 SET(st->state_flags, PFSTATE_NOSYNC); 1782 return; 1783 } 1784 1785 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1786 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1787 return; 1788 1789 #ifdef PFSYNC_DEBUG 1790 KASSERT(st->sync_state == PFSYNC_S_NONE); 1791 #endif 1792 1793 if (sc->sc_len == PFSYNC_MINPKT) 1794 timeout_add_sec(&sc->sc_tmo, 1); 1795 1796 pfsync_q_ins(st, PFSYNC_S_INS); 1797 1798 st->sync_updates = 0; 1799 } 1800 1801 int 1802 pfsync_defer(struct pf_state *st, struct mbuf *m) 1803 { 1804 struct pfsync_softc *sc = pfsyncif; 1805 struct pfsync_deferral *pd; 1806 1807 NET_ASSERT_LOCKED(); 1808 1809 if (!sc->sc_defer || 1810 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1811 m->m_flags & (M_BCAST|M_MCAST)) 1812 return (0); 1813 1814 if (sc->sc_deferred >= 128) { 1815 pd = TAILQ_FIRST(&sc->sc_deferrals); 1816 if (timeout_del(&pd->pd_tmo)) 1817 pfsync_undefer(pd, 0); 1818 } 1819 1820 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1821 if (pd == NULL) 1822 return (0); 1823 1824 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1825 SET(st->state_flags, PFSTATE_ACK); 1826 1827 pd->pd_st = pf_state_ref(st); 1828 pd->pd_m = m; 1829 1830 sc->sc_deferred++; 1831 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1832 1833 timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd); 1834 timeout_add_msec(&pd->pd_tmo, 20); 1835 1836 schednetisr(NETISR_PFSYNC); 1837 1838 return (1); 1839 } 1840 1841 void 1842 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1843 { 1844 struct pfsync_softc *sc = pfsyncif; 1845 struct pf_pdesc pdesc; 1846 1847 NET_ASSERT_LOCKED(); 1848 1849 if (sc == NULL) 1850 return; 1851 1852 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1853 sc->sc_deferred--; 1854 1855 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1856 if (drop) 1857 m_freem(pd->pd_m); 1858 else { 1859 if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) { 1860 if (pf_setup_pdesc(&pdesc, 1861 pd->pd_st->key[PF_SK_WIRE]->af, 1862 pd->pd_st->direction, pd->pd_st->rt_kif, 1863 pd->pd_m, NULL) != PF_PASS) { 1864 m_freem(pd->pd_m); 1865 goto out; 1866 } 1867 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1868 case AF_INET: 1869 pf_route(&pdesc, 1870 pd->pd_st->rule.ptr, pd->pd_st); 1871 break; 1872 #ifdef INET6 1873 case AF_INET6: 1874 pf_route6(&pdesc, 1875 pd->pd_st->rule.ptr, pd->pd_st); 1876 break; 1877 #endif /* INET6 */ 1878 default: 1879 unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af); 1880 } 1881 pd->pd_m = pdesc.m; 1882 } else { 1883 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1884 case AF_INET: 1885 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 1886 0); 1887 break; 1888 #ifdef INET6 1889 case AF_INET6: 1890 ip6_output(pd->pd_m, NULL, NULL, 0, 1891 NULL, NULL); 1892 break; 1893 #endif /* INET6 */ 1894 default: 1895 unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af); 1896 } 1897 } 1898 } 1899 out: 1900 pf_state_unref(pd->pd_st); 1901 pool_put(&sc->sc_pool, pd); 1902 } 1903 1904 void 1905 pfsync_defer_tmo(void *arg) 1906 { 1907 NET_LOCK(); 1908 pfsync_undefer(arg, 0); 1909 NET_UNLOCK(); 1910 } 1911 1912 void 1913 pfsync_deferred(struct pf_state *st, int drop) 1914 { 1915 struct pfsync_softc *sc = pfsyncif; 1916 struct pfsync_deferral *pd; 1917 1918 NET_ASSERT_LOCKED(); 1919 1920 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1921 if (pd->pd_st == st) { 1922 if (timeout_del(&pd->pd_tmo)) 1923 pfsync_undefer(pd, drop); 1924 return; 1925 } 1926 } 1927 1928 panic("pfsync_deferred: unable to find deferred state"); 1929 } 1930 1931 void 1932 pfsync_update_state_locked(struct pf_state *st) 1933 { 1934 struct pfsync_softc *sc = pfsyncif; 1935 int sync = 0; 1936 1937 NET_ASSERT_LOCKED(); 1938 PF_ASSERT_LOCKED(); 1939 1940 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1941 return; 1942 1943 if (ISSET(st->state_flags, PFSTATE_ACK)) 1944 pfsync_deferred(st, 0); 1945 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1946 if (st->sync_state != PFSYNC_S_NONE) 1947 pfsync_q_del(st); 1948 return; 1949 } 1950 1951 if (sc->sc_len == PFSYNC_MINPKT) 1952 timeout_add_sec(&sc->sc_tmo, 1); 1953 1954 switch (st->sync_state) { 1955 case PFSYNC_S_UPD_C: 1956 case PFSYNC_S_UPD: 1957 case PFSYNC_S_INS: 1958 /* we're already handling it */ 1959 1960 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1961 st->sync_updates++; 1962 if (st->sync_updates >= sc->sc_maxupdates) 1963 sync = 1; 1964 } 1965 break; 1966 1967 case PFSYNC_S_IACK: 1968 pfsync_q_del(st); 1969 case PFSYNC_S_NONE: 1970 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1971 st->sync_updates = 0; 1972 break; 1973 1974 default: 1975 panic("pfsync_update_state: unexpected sync state %d", 1976 st->sync_state); 1977 } 1978 1979 if (sync || (getuptime() - st->pfsync_time) < 2) 1980 schednetisr(NETISR_PFSYNC); 1981 } 1982 1983 void 1984 pfsync_update_state(struct pf_state *st, int *have_pf_lock) 1985 { 1986 struct pfsync_softc *sc = pfsyncif; 1987 1988 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1989 return; 1990 1991 if (*have_pf_lock == 0) { 1992 PF_LOCK(); 1993 *have_pf_lock = 1; 1994 } 1995 1996 pfsync_update_state_locked(st); 1997 } 1998 1999 void 2000 pfsync_cancel_full_update(struct pfsync_softc *sc) 2001 { 2002 if (timeout_pending(&sc->sc_bulkfail_tmo) || 2003 timeout_pending(&sc->sc_bulk_tmo)) { 2004 #if NCARP > 0 2005 if (!pfsync_sync_ok) 2006 carp_group_demote_adj(&sc->sc_if, -1, 2007 "pfsync bulk cancelled"); 2008 if (sc->sc_initial_bulk) { 2009 carp_group_demote_adj(&sc->sc_if, -32, 2010 "pfsync init"); 2011 sc->sc_initial_bulk = 0; 2012 } 2013 #endif 2014 pfsync_sync_ok = 1; 2015 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 2016 } 2017 timeout_del(&sc->sc_bulkfail_tmo); 2018 timeout_del(&sc->sc_bulk_tmo); 2019 sc->sc_bulk_next = NULL; 2020 sc->sc_bulk_last = NULL; 2021 sc->sc_ureq_sent = 0; 2022 sc->sc_bulk_tries = 0; 2023 } 2024 2025 void 2026 pfsync_request_full_update(struct pfsync_softc *sc) 2027 { 2028 if (sc->sc_sync_ifidx != 0 && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 2029 /* Request a full state table update. */ 2030 sc->sc_ureq_sent = getuptime(); 2031 #if NCARP > 0 2032 if (!sc->sc_link_demoted && pfsync_sync_ok) 2033 carp_group_demote_adj(&sc->sc_if, 1, 2034 "pfsync bulk start"); 2035 #endif 2036 pfsync_sync_ok = 0; 2037 DPFPRINTF(LOG_INFO, "requesting bulk update"); 2038 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 2039 pf_pool_limits[PF_LIMIT_STATES].limit / 2040 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 2041 sizeof(struct pfsync_state))); 2042 pfsync_request_update(0, 0); 2043 } 2044 } 2045 2046 void 2047 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2048 { 2049 struct pfsync_softc *sc = pfsyncif; 2050 struct pfsync_upd_req_item *item; 2051 size_t nlen = sizeof(struct pfsync_upd_req); 2052 2053 /* 2054 * this code does nothing to prevent multiple update requests for the 2055 * same state being generated. 2056 */ 2057 2058 item = pool_get(&sc->sc_pool, PR_NOWAIT); 2059 if (item == NULL) { 2060 /* XXX stats */ 2061 return; 2062 } 2063 2064 item->ur_msg.id = id; 2065 item->ur_msg.creatorid = creatorid; 2066 2067 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 2068 nlen += sizeof(struct pfsync_subheader); 2069 2070 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2071 pfsync_sendout(); 2072 2073 nlen = sizeof(struct pfsync_subheader) + 2074 sizeof(struct pfsync_upd_req); 2075 } 2076 2077 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 2078 sc->sc_len += nlen; 2079 2080 schednetisr(NETISR_PFSYNC); 2081 } 2082 2083 void 2084 pfsync_update_state_req(struct pf_state *st) 2085 { 2086 struct pfsync_softc *sc = pfsyncif; 2087 2088 if (sc == NULL) 2089 panic("pfsync_update_state_req: nonexistent instance"); 2090 2091 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2092 if (st->sync_state != PFSYNC_S_NONE) 2093 pfsync_q_del(st); 2094 return; 2095 } 2096 2097 switch (st->sync_state) { 2098 case PFSYNC_S_UPD_C: 2099 case PFSYNC_S_IACK: 2100 pfsync_q_del(st); 2101 case PFSYNC_S_NONE: 2102 pfsync_q_ins(st, PFSYNC_S_UPD); 2103 schednetisr(NETISR_PFSYNC); 2104 return; 2105 2106 case PFSYNC_S_INS: 2107 case PFSYNC_S_UPD: 2108 case PFSYNC_S_DEL: 2109 /* we're already handling it */ 2110 return; 2111 2112 default: 2113 panic("pfsync_update_state_req: unexpected sync state %d", 2114 st->sync_state); 2115 } 2116 } 2117 2118 void 2119 pfsync_delete_state(struct pf_state *st) 2120 { 2121 struct pfsync_softc *sc = pfsyncif; 2122 2123 NET_ASSERT_LOCKED(); 2124 2125 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2126 return; 2127 2128 if (ISSET(st->state_flags, PFSTATE_ACK)) 2129 pfsync_deferred(st, 1); 2130 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2131 if (st->sync_state != PFSYNC_S_NONE) 2132 pfsync_q_del(st); 2133 return; 2134 } 2135 2136 if (sc->sc_len == PFSYNC_MINPKT) 2137 timeout_add_sec(&sc->sc_tmo, 1); 2138 2139 switch (st->sync_state) { 2140 case PFSYNC_S_INS: 2141 /* we never got to tell the world so just forget about it */ 2142 pfsync_q_del(st); 2143 return; 2144 2145 case PFSYNC_S_UPD_C: 2146 case PFSYNC_S_UPD: 2147 case PFSYNC_S_IACK: 2148 pfsync_q_del(st); 2149 /* 2150 * FALLTHROUGH to putting it on the del list 2151 * Note on refence count bookeeping: 2152 * pfsync_q_del() drops reference for queue 2153 * ownership. But the st entry survives, because 2154 * our caller still holds a reference. 2155 */ 2156 2157 case PFSYNC_S_NONE: 2158 /* 2159 * We either fall through here, or there is no reference to 2160 * st owned by pfsync queues at this point. 2161 * 2162 * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins() 2163 * grabs a reference for delete queue. 2164 */ 2165 pfsync_q_ins(st, PFSYNC_S_DEL); 2166 return; 2167 2168 default: 2169 panic("pfsync_delete_state: unexpected sync state %d", 2170 st->sync_state); 2171 } 2172 } 2173 2174 void 2175 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2176 { 2177 struct pfsync_softc *sc = pfsyncif; 2178 struct { 2179 struct pfsync_subheader subh; 2180 struct pfsync_clr clr; 2181 } __packed r; 2182 2183 NET_ASSERT_LOCKED(); 2184 2185 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2186 return; 2187 2188 bzero(&r, sizeof(r)); 2189 2190 r.subh.action = PFSYNC_ACT_CLR; 2191 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2192 r.subh.count = htons(1); 2193 2194 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2195 r.clr.creatorid = creatorid; 2196 2197 pfsync_send_plus(&r, sizeof(r)); 2198 } 2199 2200 void 2201 pfsync_q_ins(struct pf_state *st, int q) 2202 { 2203 struct pfsync_softc *sc = pfsyncif; 2204 size_t nlen = pfsync_qs[q].len; 2205 2206 KASSERT(st->sync_state == PFSYNC_S_NONE); 2207 2208 #if defined(PFSYNC_DEBUG) 2209 if (sc->sc_len < PFSYNC_MINPKT) 2210 panic("pfsync pkt len is too low %zd", sc->sc_len); 2211 #endif 2212 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2213 nlen += sizeof(struct pfsync_subheader); 2214 2215 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2216 pfsync_sendout(); 2217 2218 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2219 } 2220 2221 sc->sc_len += nlen; 2222 pf_state_ref(st); 2223 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2224 st->sync_state = q; 2225 } 2226 2227 void 2228 pfsync_q_del(struct pf_state *st) 2229 { 2230 struct pfsync_softc *sc = pfsyncif; 2231 int q = st->sync_state; 2232 2233 KASSERT(st->sync_state != PFSYNC_S_NONE); 2234 2235 sc->sc_len -= pfsync_qs[q].len; 2236 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2237 st->sync_state = PFSYNC_S_NONE; 2238 pf_state_unref(st); 2239 2240 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2241 sc->sc_len -= sizeof(struct pfsync_subheader); 2242 } 2243 2244 void 2245 pfsync_update_tdb(struct tdb *t, int output) 2246 { 2247 struct pfsync_softc *sc = pfsyncif; 2248 size_t nlen = sizeof(struct pfsync_tdb); 2249 2250 if (sc == NULL) 2251 return; 2252 2253 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2254 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2255 nlen += sizeof(struct pfsync_subheader); 2256 2257 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2258 pfsync_sendout(); 2259 2260 nlen = sizeof(struct pfsync_subheader) + 2261 sizeof(struct pfsync_tdb); 2262 } 2263 2264 sc->sc_len += nlen; 2265 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2266 SET(t->tdb_flags, TDBF_PFSYNC); 2267 t->tdb_updates = 0; 2268 } else { 2269 if (++t->tdb_updates >= sc->sc_maxupdates) 2270 schednetisr(NETISR_PFSYNC); 2271 } 2272 2273 if (output) 2274 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2275 else 2276 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2277 } 2278 2279 void 2280 pfsync_delete_tdb(struct tdb *t) 2281 { 2282 struct pfsync_softc *sc = pfsyncif; 2283 2284 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2285 return; 2286 2287 sc->sc_len -= sizeof(struct pfsync_tdb); 2288 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2289 CLR(t->tdb_flags, TDBF_PFSYNC); 2290 2291 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2292 sc->sc_len -= sizeof(struct pfsync_subheader); 2293 } 2294 2295 void 2296 pfsync_out_tdb(struct tdb *t, void *buf) 2297 { 2298 struct pfsync_tdb *ut = buf; 2299 2300 bzero(ut, sizeof(*ut)); 2301 ut->spi = t->tdb_spi; 2302 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2303 /* 2304 * When a failover happens, the master's rpl is probably above 2305 * what we see here (we may be up to a second late), so 2306 * increase it a bit for outbound tdbs to manage most such 2307 * situations. 2308 * 2309 * For now, just add an offset that is likely to be larger 2310 * than the number of packets we can see in one second. The RFC 2311 * just says the next packet must have a higher seq value. 2312 * 2313 * XXX What is a good algorithm for this? We could use 2314 * a rate-determined increase, but to know it, we would have 2315 * to extend struct tdb. 2316 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2317 * will soon be replaced anyway. For now, just don't handle 2318 * this edge case. 2319 */ 2320 #define RPL_INCR 16384 2321 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2322 RPL_INCR : 0)); 2323 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2324 ut->sproto = t->tdb_sproto; 2325 ut->rdomain = htons(t->tdb_rdomain); 2326 } 2327 2328 void 2329 pfsync_bulk_start(void) 2330 { 2331 struct pfsync_softc *sc = pfsyncif; 2332 2333 DPFPRINTF(LOG_INFO, "received bulk update request"); 2334 2335 if (TAILQ_EMPTY(&state_list)) 2336 pfsync_bulk_status(PFSYNC_BUS_END); 2337 else { 2338 sc->sc_ureq_received = getuptime(); 2339 2340 if (sc->sc_bulk_next == NULL) 2341 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2342 sc->sc_bulk_last = sc->sc_bulk_next; 2343 2344 pfsync_bulk_status(PFSYNC_BUS_START); 2345 timeout_add(&sc->sc_bulk_tmo, 0); 2346 } 2347 } 2348 2349 void 2350 pfsync_bulk_update(void *arg) 2351 { 2352 struct pfsync_softc *sc; 2353 struct pf_state *st; 2354 int i = 0; 2355 2356 NET_LOCK(); 2357 sc = pfsyncif; 2358 if (sc == NULL) 2359 goto out; 2360 st = sc->sc_bulk_next; 2361 2362 for (;;) { 2363 if (st->sync_state == PFSYNC_S_NONE && 2364 st->timeout < PFTM_MAX && 2365 st->pfsync_time <= sc->sc_ureq_received) { 2366 pfsync_update_state_req(st); 2367 i++; 2368 } 2369 2370 st = TAILQ_NEXT(st, entry_list); 2371 if (st == NULL) 2372 st = TAILQ_FIRST(&state_list); 2373 2374 if (st == sc->sc_bulk_last) { 2375 /* we're done */ 2376 sc->sc_bulk_next = NULL; 2377 sc->sc_bulk_last = NULL; 2378 pfsync_bulk_status(PFSYNC_BUS_END); 2379 break; 2380 } 2381 2382 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2383 sizeof(struct pfsync_state)) { 2384 /* we've filled a packet */ 2385 sc->sc_bulk_next = st; 2386 timeout_add(&sc->sc_bulk_tmo, 1); 2387 break; 2388 } 2389 } 2390 out: 2391 NET_UNLOCK(); 2392 } 2393 2394 void 2395 pfsync_bulk_status(u_int8_t status) 2396 { 2397 struct { 2398 struct pfsync_subheader subh; 2399 struct pfsync_bus bus; 2400 } __packed r; 2401 2402 struct pfsync_softc *sc = pfsyncif; 2403 2404 bzero(&r, sizeof(r)); 2405 2406 r.subh.action = PFSYNC_ACT_BUS; 2407 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2408 r.subh.count = htons(1); 2409 2410 r.bus.creatorid = pf_status.hostid; 2411 r.bus.endtime = htonl(getuptime() - sc->sc_ureq_received); 2412 r.bus.status = status; 2413 2414 pfsync_send_plus(&r, sizeof(r)); 2415 } 2416 2417 void 2418 pfsync_bulk_fail(void *arg) 2419 { 2420 struct pfsync_softc *sc; 2421 2422 NET_LOCK(); 2423 sc = pfsyncif; 2424 if (sc == NULL) 2425 goto out; 2426 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2427 /* Try again */ 2428 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2429 pfsync_request_update(0, 0); 2430 } else { 2431 /* Pretend like the transfer was ok */ 2432 sc->sc_ureq_sent = 0; 2433 sc->sc_bulk_tries = 0; 2434 #if NCARP > 0 2435 if (!pfsync_sync_ok) 2436 carp_group_demote_adj(&sc->sc_if, -1, 2437 sc->sc_link_demoted ? 2438 "pfsync link state up" : 2439 "pfsync bulk fail"); 2440 if (sc->sc_initial_bulk) { 2441 carp_group_demote_adj(&sc->sc_if, -32, 2442 "pfsync init"); 2443 sc->sc_initial_bulk = 0; 2444 } 2445 #endif 2446 pfsync_sync_ok = 1; 2447 sc->sc_link_demoted = 0; 2448 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2449 } 2450 out: 2451 NET_UNLOCK(); 2452 } 2453 2454 void 2455 pfsync_send_plus(void *plus, size_t pluslen) 2456 { 2457 struct pfsync_softc *sc = pfsyncif; 2458 2459 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2460 pfsync_sendout(); 2461 2462 sc->sc_plus = plus; 2463 sc->sc_len += (sc->sc_pluslen = pluslen); 2464 2465 pfsync_sendout(); 2466 } 2467 2468 int 2469 pfsync_up(void) 2470 { 2471 struct pfsync_softc *sc = pfsyncif; 2472 2473 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2474 return (0); 2475 2476 return (1); 2477 } 2478 2479 int 2480 pfsync_state_in_use(struct pf_state *st) 2481 { 2482 struct pfsync_softc *sc = pfsyncif; 2483 2484 if (sc == NULL) 2485 return (0); 2486 2487 if (st->sync_state != PFSYNC_S_NONE || 2488 st == sc->sc_bulk_next || 2489 st == sc->sc_bulk_last) 2490 return (1); 2491 2492 return (0); 2493 } 2494 2495 void 2496 pfsync_timeout(void *arg) 2497 { 2498 NET_LOCK(); 2499 PF_LOCK(); 2500 pfsync_sendout(); 2501 PF_UNLOCK(); 2502 NET_UNLOCK(); 2503 } 2504 2505 /* this is a softnet/netisr handler */ 2506 void 2507 pfsyncintr(void) 2508 { 2509 PF_LOCK(); 2510 pfsync_sendout(); 2511 PF_UNLOCK(); 2512 } 2513 2514 int 2515 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp) 2516 { 2517 struct pfsyncstats pfsyncstat; 2518 2519 CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t))); 2520 memset(&pfsyncstat, 0, sizeof pfsyncstat); 2521 counters_read(pfsynccounters, (uint64_t *)&pfsyncstat, 2522 pfsyncs_ncounters); 2523 return (sysctl_rdstruct(oldp, oldlenp, newp, 2524 &pfsyncstat, sizeof(pfsyncstat))); 2525 } 2526 2527 int 2528 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2529 size_t newlen) 2530 { 2531 /* All sysctl names at this level are terminal. */ 2532 if (namelen != 1) 2533 return (ENOTDIR); 2534 2535 switch (name[0]) { 2536 case PFSYNCCTL_STATS: 2537 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp)); 2538 default: 2539 return (ENOPROTOOPT); 2540 } 2541 } 2542