1 /* $OpenBSD: if_pfsync.c,v 1.261 2018/10/03 01:24:14 visa Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_ipsp.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp6.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/udp.h> 75 76 #ifdef INET6 77 #include <netinet6/in6_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #endif /* INET6 */ 82 83 #include "carp.h" 84 #if NCARP > 0 85 #include <netinet/ip_carp.h> 86 #endif 87 88 #define PF_DEBUGNAME "pfsync: " 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_MINPKT ( \ 97 sizeof(struct ip) + \ 98 sizeof(struct pfsync_header)) 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(caddr_t, int, int, int); 104 int pfsync_in_iack(caddr_t, int, int, int); 105 int pfsync_in_upd_c(caddr_t, int, int, int); 106 int pfsync_in_ureq(caddr_t, int, int, int); 107 int pfsync_in_del(caddr_t, int, int, int); 108 int pfsync_in_del_c(caddr_t, int, int, int); 109 int pfsync_in_bus(caddr_t, int, int, int); 110 int pfsync_in_tdb(caddr_t, int, int, int); 111 int pfsync_in_ins(caddr_t, int, int, int); 112 int pfsync_in_upd(caddr_t, int, int, int); 113 int pfsync_in_eof(caddr_t, int, int, int); 114 115 int pfsync_in_error(caddr_t, int, int, int); 116 117 void pfsync_update_state_locked(struct pf_state *); 118 119 struct { 120 int (*in)(caddr_t, int, int, int); 121 size_t len; 122 } pfsync_acts[] = { 123 /* PFSYNC_ACT_CLR */ 124 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 125 /* PFSYNC_ACT_OINS */ 126 { pfsync_in_error, 0 }, 127 /* PFSYNC_ACT_INS_ACK */ 128 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 129 /* PFSYNC_ACT_OUPD */ 130 { pfsync_in_error, 0 }, 131 /* PFSYNC_ACT_UPD_C */ 132 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 133 /* PFSYNC_ACT_UPD_REQ */ 134 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 135 /* PFSYNC_ACT_DEL */ 136 { pfsync_in_del, sizeof(struct pfsync_state) }, 137 /* PFSYNC_ACT_DEL_C */ 138 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 139 /* PFSYNC_ACT_INS_F */ 140 { pfsync_in_error, 0 }, 141 /* PFSYNC_ACT_DEL_F */ 142 { pfsync_in_error, 0 }, 143 /* PFSYNC_ACT_BUS */ 144 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 145 /* PFSYNC_ACT_OTDB */ 146 { pfsync_in_error, 0 }, 147 /* PFSYNC_ACT_EOF */ 148 { pfsync_in_error, 0 }, 149 /* PFSYNC_ACT_INS */ 150 { pfsync_in_ins, sizeof(struct pfsync_state) }, 151 /* PFSYNC_ACT_UPD */ 152 { pfsync_in_upd, sizeof(struct pfsync_state) }, 153 /* PFSYNC_ACT_TDB */ 154 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 155 }; 156 157 struct pfsync_q { 158 void (*write)(struct pf_state *, void *); 159 size_t len; 160 u_int8_t action; 161 }; 162 163 /* we have one of these for every PFSYNC_S_ */ 164 void pfsync_out_state(struct pf_state *, void *); 165 void pfsync_out_iack(struct pf_state *, void *); 166 void pfsync_out_upd_c(struct pf_state *, void *); 167 void pfsync_out_del(struct pf_state *, void *); 168 169 struct pfsync_q pfsync_qs[] = { 170 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 171 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 172 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 173 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 174 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 175 }; 176 177 void pfsync_q_ins(struct pf_state *, int); 178 void pfsync_q_del(struct pf_state *); 179 180 struct pfsync_upd_req_item { 181 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 182 struct pfsync_upd_req ur_msg; 183 }; 184 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 185 186 struct pfsync_deferral { 187 TAILQ_ENTRY(pfsync_deferral) pd_entry; 188 struct pf_state *pd_st; 189 struct mbuf *pd_m; 190 struct timeout pd_tmo; 191 }; 192 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 193 194 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 195 sizeof(struct pfsync_deferral)) 196 197 void pfsync_out_tdb(struct tdb *, void *); 198 199 struct pfsync_softc { 200 struct ifnet sc_if; 201 struct ifnet *sc_sync_if; 202 203 struct pool sc_pool; 204 205 struct ip_moptions sc_imo; 206 207 struct in_addr sc_sync_peer; 208 u_int8_t sc_maxupdates; 209 210 struct ip sc_template; 211 212 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 213 size_t sc_len; 214 215 struct pfsync_upd_reqs sc_upd_req_list; 216 217 int sc_initial_bulk; 218 int sc_link_demoted; 219 220 int sc_defer; 221 struct pfsync_deferrals sc_deferrals; 222 u_int sc_deferred; 223 224 void *sc_plus; 225 size_t sc_pluslen; 226 227 u_int32_t sc_ureq_sent; 228 int sc_bulk_tries; 229 struct timeout sc_bulkfail_tmo; 230 231 u_int32_t sc_ureq_received; 232 struct pf_state *sc_bulk_next; 233 struct pf_state *sc_bulk_last; 234 struct timeout sc_bulk_tmo; 235 236 TAILQ_HEAD(, tdb) sc_tdb_q; 237 238 void *sc_lhcookie; 239 void *sc_dhcookie; 240 241 struct timeout sc_tmo; 242 }; 243 244 struct pfsync_softc *pfsyncif = NULL; 245 struct cpumem *pfsynccounters; 246 247 void pfsyncattach(int); 248 int pfsync_clone_create(struct if_clone *, int); 249 int pfsync_clone_destroy(struct ifnet *); 250 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 251 struct pf_state_peer *); 252 void pfsync_update_net_tdb(struct pfsync_tdb *); 253 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 254 struct rtentry *); 255 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 256 void pfsyncstart(struct ifnet *); 257 void pfsync_syncdev_state(void *); 258 void pfsync_ifdetach(void *); 259 260 void pfsync_deferred(struct pf_state *, int); 261 void pfsync_undefer(struct pfsync_deferral *, int); 262 void pfsync_defer_tmo(void *); 263 264 void pfsync_cancel_full_update(struct pfsync_softc *); 265 void pfsync_request_full_update(struct pfsync_softc *); 266 void pfsync_request_update(u_int32_t, u_int64_t); 267 void pfsync_update_state_req(struct pf_state *); 268 269 void pfsync_drop(struct pfsync_softc *); 270 void pfsync_sendout(void); 271 void pfsync_send_plus(void *, size_t); 272 void pfsync_timeout(void *); 273 void pfsync_tdb_timeout(void *); 274 275 void pfsync_bulk_start(void); 276 void pfsync_bulk_status(u_int8_t); 277 void pfsync_bulk_update(void *); 278 void pfsync_bulk_fail(void *); 279 #ifdef WITH_PF_LOCK 280 void pfsync_send_dispatch(void *); 281 void pfsync_send_pkt(struct mbuf *); 282 283 static struct mbuf_queue pfsync_mq; 284 static struct task pfsync_task = 285 TASK_INITIALIZER(pfsync_send_dispatch, &pfsync_mq); 286 #endif /* WITH_PF_LOCK */ 287 288 #define PFSYNC_MAX_BULKTRIES 12 289 int pfsync_sync_ok; 290 291 struct if_clone pfsync_cloner = 292 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 293 294 void 295 pfsyncattach(int npfsync) 296 { 297 if_clone_attach(&pfsync_cloner); 298 pfsynccounters = counters_alloc(pfsyncs_ncounters); 299 #ifdef WITH_PF_LOCK 300 mq_init(&pfsync_mq, 4096, IPL_SOFTNET); 301 #endif /* WITH_PF_LOCK */ 302 } 303 304 int 305 pfsync_clone_create(struct if_clone *ifc, int unit) 306 { 307 struct pfsync_softc *sc; 308 struct ifnet *ifp; 309 int q; 310 311 if (unit != 0) 312 return (EINVAL); 313 314 pfsync_sync_ok = 1; 315 316 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO); 317 for (q = 0; q < PFSYNC_S_COUNT; q++) 318 TAILQ_INIT(&sc->sc_qs[q]); 319 320 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync", 321 NULL); 322 TAILQ_INIT(&sc->sc_upd_req_list); 323 TAILQ_INIT(&sc->sc_deferrals); 324 sc->sc_deferred = 0; 325 326 TAILQ_INIT(&sc->sc_tdb_q); 327 328 sc->sc_len = PFSYNC_MINPKT; 329 sc->sc_maxupdates = 128; 330 331 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 332 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 333 M_WAITOK | M_ZERO); 334 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 335 336 ifp = &sc->sc_if; 337 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 338 ifp->if_softc = sc; 339 ifp->if_ioctl = pfsyncioctl; 340 ifp->if_output = pfsyncoutput; 341 ifp->if_start = pfsyncstart; 342 ifp->if_type = IFT_PFSYNC; 343 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 344 ifp->if_hdrlen = sizeof(struct pfsync_header); 345 ifp->if_mtu = ETHERMTU; 346 ifp->if_xflags = IFXF_CLONED; 347 timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL); 348 timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL); 349 timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL); 350 351 if_attach(ifp); 352 if_alloc_sadl(ifp); 353 354 #if NCARP > 0 355 if_addgroup(ifp, "carp"); 356 #endif 357 358 #if NBPFILTER > 0 359 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 360 #endif 361 362 pfsyncif = sc; 363 364 return (0); 365 } 366 367 int 368 pfsync_clone_destroy(struct ifnet *ifp) 369 { 370 struct pfsync_softc *sc = ifp->if_softc; 371 struct pfsync_deferral *pd; 372 373 NET_LOCK(); 374 375 #if NCARP > 0 376 if (!pfsync_sync_ok) 377 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 378 if (sc->sc_link_demoted) 379 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 380 #endif 381 if (sc->sc_sync_if) { 382 hook_disestablish( 383 sc->sc_sync_if->if_linkstatehooks, 384 sc->sc_lhcookie); 385 hook_disestablish(sc->sc_sync_if->if_detachhooks, 386 sc->sc_dhcookie); 387 } 388 389 /* XXXSMP breaks atomicity */ 390 NET_UNLOCK(); 391 if_detach(ifp); 392 NET_LOCK(); 393 394 pfsync_drop(sc); 395 396 while (sc->sc_deferred > 0) { 397 pd = TAILQ_FIRST(&sc->sc_deferrals); 398 timeout_del(&pd->pd_tmo); 399 pfsync_undefer(pd, 0); 400 } 401 402 pfsyncif = NULL; 403 timeout_del(&sc->sc_bulkfail_tmo); 404 timeout_del(&sc->sc_bulk_tmo); 405 timeout_del(&sc->sc_tmo); 406 407 NET_UNLOCK(); 408 409 pool_destroy(&sc->sc_pool); 410 free(sc->sc_imo.imo_membership, M_IPMOPTS, 0); 411 free(sc, M_DEVBUF, sizeof(*sc)); 412 413 return (0); 414 } 415 416 /* 417 * Start output on the pfsync interface. 418 */ 419 void 420 pfsyncstart(struct ifnet *ifp) 421 { 422 IFQ_PURGE(&ifp->if_snd); 423 } 424 425 void 426 pfsync_syncdev_state(void *arg) 427 { 428 struct pfsync_softc *sc = arg; 429 430 if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP)) 431 return; 432 433 if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) { 434 sc->sc_if.if_flags &= ~IFF_RUNNING; 435 if (!sc->sc_link_demoted) { 436 #if NCARP > 0 437 carp_group_demote_adj(&sc->sc_if, 1, 438 "pfsync link state down"); 439 #endif 440 sc->sc_link_demoted = 1; 441 } 442 443 /* drop everything */ 444 timeout_del(&sc->sc_tmo); 445 pfsync_drop(sc); 446 447 pfsync_cancel_full_update(sc); 448 } else if (sc->sc_link_demoted) { 449 sc->sc_if.if_flags |= IFF_RUNNING; 450 451 pfsync_request_full_update(sc); 452 } 453 } 454 455 void 456 pfsync_ifdetach(void *arg) 457 { 458 struct pfsync_softc *sc = arg; 459 460 sc->sc_sync_if = NULL; 461 } 462 463 int 464 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 465 struct pf_state_peer *d) 466 { 467 if (s->scrub.scrub_flag && d->scrub == NULL) { 468 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 469 if (d->scrub == NULL) 470 return (ENOMEM); 471 } 472 473 return (0); 474 } 475 476 void 477 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 478 { 479 pf_state_export(sp, st); 480 } 481 482 int 483 pfsync_state_import(struct pfsync_state *sp, int flags) 484 { 485 struct pf_state *st = NULL; 486 struct pf_state_key *skw = NULL, *sks = NULL; 487 struct pf_rule *r = NULL; 488 struct pfi_kif *kif; 489 int pool_flags; 490 int error; 491 492 if (sp->creatorid == 0) { 493 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 494 "invalid creator id: %08x", ntohl(sp->creatorid)); 495 return (EINVAL); 496 } 497 498 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 499 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 500 "unknown interface: %s", sp->ifname); 501 if (flags & PFSYNC_SI_IOCTL) 502 return (EINVAL); 503 return (0); /* skip this state */ 504 } 505 506 if (sp->af == 0) 507 return (0); /* skip this state */ 508 509 /* 510 * If the ruleset checksums match or the state is coming from the ioctl, 511 * it's safe to associate the state with the rule of that number. 512 */ 513 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 514 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 515 pf_main_ruleset.rules.active.rcount) 516 r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)]; 517 else 518 r = &pf_default_rule; 519 520 if ((r->max_states && r->states_cur >= r->max_states)) 521 goto cleanup; 522 523 if (flags & PFSYNC_SI_IOCTL) 524 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 525 else 526 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 527 528 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 529 goto cleanup; 530 531 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 532 goto cleanup; 533 534 if ((sp->key[PF_SK_WIRE].af && 535 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 536 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 537 &sp->key[PF_SK_STACK].addr[0], sp->af) || 538 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 539 &sp->key[PF_SK_STACK].addr[1], sp->af) || 540 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 541 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 542 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 543 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 544 goto cleanup; 545 } else 546 sks = skw; 547 548 /* allocate memory for scrub info */ 549 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 550 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 551 goto cleanup; 552 553 /* copy to state key(s) */ 554 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 555 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 556 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 557 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 558 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 559 PF_REF_INIT(skw->refcnt); 560 skw->proto = sp->proto; 561 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 562 skw->af = sp->af; 563 if (sks != skw) { 564 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 565 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 566 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 567 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 568 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 569 PF_REF_INIT(sks->refcnt); 570 if (!(sks->af = sp->key[PF_SK_STACK].af)) 571 sks->af = sp->af; 572 if (sks->af != skw->af) { 573 switch (sp->proto) { 574 case IPPROTO_ICMP: 575 sks->proto = IPPROTO_ICMPV6; 576 break; 577 case IPPROTO_ICMPV6: 578 sks->proto = IPPROTO_ICMP; 579 break; 580 default: 581 sks->proto = sp->proto; 582 } 583 } else 584 sks->proto = sp->proto; 585 } 586 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 587 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 588 589 /* copy to state */ 590 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 591 st->creation = time_uptime - ntohl(sp->creation); 592 st->expire = time_uptime; 593 if (ntohl(sp->expire)) { 594 u_int32_t timeout; 595 596 timeout = r->timeout[sp->timeout]; 597 if (!timeout) 598 timeout = pf_default_rule.timeout[sp->timeout]; 599 600 /* sp->expire may have been adaptively scaled by export. */ 601 st->expire -= timeout - ntohl(sp->expire); 602 } 603 604 st->direction = sp->direction; 605 st->log = sp->log; 606 st->timeout = sp->timeout; 607 st->state_flags = ntohs(sp->state_flags); 608 st->max_mss = ntohs(sp->max_mss); 609 st->min_ttl = sp->min_ttl; 610 st->set_tos = sp->set_tos; 611 st->set_prio[0] = sp->set_prio[0]; 612 st->set_prio[1] = sp->set_prio[1]; 613 614 st->id = sp->id; 615 st->creatorid = sp->creatorid; 616 pf_state_peer_ntoh(&sp->src, &st->src); 617 pf_state_peer_ntoh(&sp->dst, &st->dst); 618 619 st->rule.ptr = r; 620 st->anchor.ptr = NULL; 621 st->rt_kif = NULL; 622 623 st->pfsync_time = time_uptime; 624 st->sync_state = PFSYNC_S_NONE; 625 626 refcnt_init(&st->refcnt); 627 628 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 629 r->states_cur++; 630 r->states_tot++; 631 632 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 633 SET(st->state_flags, PFSTATE_NOSYNC); 634 635 /* 636 * We just set PFSTATE_NOSYNC bit, which prevents 637 * pfsync_insert_state() to insert state to pfsync. 638 */ 639 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 640 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 641 r->states_cur--; 642 error = EEXIST; 643 goto cleanup_state; 644 } 645 646 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 647 CLR(st->state_flags, PFSTATE_NOSYNC); 648 if (ISSET(st->state_flags, PFSTATE_ACK)) { 649 pfsync_q_ins(st, PFSYNC_S_IACK); 650 schednetisr(NETISR_PFSYNC); 651 } 652 } 653 CLR(st->state_flags, PFSTATE_ACK); 654 655 return (0); 656 657 cleanup: 658 error = ENOMEM; 659 if (skw == sks) 660 sks = NULL; 661 if (skw != NULL) 662 pool_put(&pf_state_key_pl, skw); 663 if (sks != NULL) 664 pool_put(&pf_state_key_pl, sks); 665 666 cleanup_state: /* pf_state_insert frees the state keys */ 667 if (st) { 668 if (st->dst.scrub) 669 pool_put(&pf_state_scrub_pl, st->dst.scrub); 670 if (st->src.scrub) 671 pool_put(&pf_state_scrub_pl, st->src.scrub); 672 pool_put(&pf_state_pl, st); 673 } 674 return (error); 675 } 676 677 int 678 pfsync_input(struct mbuf **mp, int *offp, int proto, int af) 679 { 680 struct mbuf *n, *m = *mp; 681 struct pfsync_softc *sc = pfsyncif; 682 struct ip *ip = mtod(m, struct ip *); 683 struct pfsync_header *ph; 684 struct pfsync_subheader subh; 685 int offset, noff, len, count, mlen, flags = 0; 686 int e; 687 688 NET_ASSERT_LOCKED(); 689 690 pfsyncstat_inc(pfsyncs_ipackets); 691 692 /* verify that we have a sync interface configured */ 693 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 694 sc->sc_sync_if == NULL || !pf_status.running) 695 goto done; 696 697 /* verify that the packet came in on the right interface */ 698 if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) { 699 pfsyncstat_inc(pfsyncs_badif); 700 goto done; 701 } 702 703 sc->sc_if.if_ipackets++; 704 sc->sc_if.if_ibytes += m->m_pkthdr.len; 705 706 /* verify that the IP TTL is 255. */ 707 if (ip->ip_ttl != PFSYNC_DFLTTL) { 708 pfsyncstat_inc(pfsyncs_badttl); 709 goto done; 710 } 711 712 offset = ip->ip_hl << 2; 713 n = m_pulldown(m, offset, sizeof(*ph), &noff); 714 if (n == NULL) { 715 pfsyncstat_inc(pfsyncs_hdrops); 716 return IPPROTO_DONE; 717 } 718 ph = (struct pfsync_header *)(n->m_data + noff); 719 720 /* verify the version */ 721 if (ph->version != PFSYNC_VERSION) { 722 pfsyncstat_inc(pfsyncs_badver); 723 goto done; 724 } 725 len = ntohs(ph->len) + offset; 726 if (m->m_pkthdr.len < len) { 727 pfsyncstat_inc(pfsyncs_badlen); 728 goto done; 729 } 730 731 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 732 flags = PFSYNC_SI_CKSUM; 733 734 offset += sizeof(*ph); 735 while (offset <= len - sizeof(subh)) { 736 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 737 offset += sizeof(subh); 738 739 mlen = subh.len << 2; 740 count = ntohs(subh.count); 741 742 if (subh.action >= PFSYNC_ACT_MAX || 743 subh.action >= nitems(pfsync_acts) || 744 mlen < pfsync_acts[subh.action].len) { 745 /* 746 * subheaders are always followed by at least one 747 * message, so if the peer is new 748 * enough to tell us how big its messages are then we 749 * know enough to skip them. 750 */ 751 if (count > 0 && mlen > 0) { 752 offset += count * mlen; 753 continue; 754 } 755 pfsyncstat_inc(pfsyncs_badact); 756 goto done; 757 } 758 759 n = m_pulldown(m, offset, mlen * count, &noff); 760 if (n == NULL) { 761 pfsyncstat_inc(pfsyncs_badlen); 762 return IPPROTO_DONE; 763 } 764 765 PF_LOCK(); 766 e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, 767 flags); 768 PF_UNLOCK(); 769 if (e != 0) 770 goto done; 771 772 offset += mlen * count; 773 } 774 775 done: 776 m_freem(m); 777 return IPPROTO_DONE; 778 } 779 780 int 781 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 782 { 783 struct pfsync_clr *clr; 784 struct pf_state *st, *nexts; 785 struct pfi_kif *kif; 786 u_int32_t creatorid; 787 int i; 788 789 for (i = 0; i < count; i++) { 790 clr = (struct pfsync_clr *)buf + len * i; 791 kif = NULL; 792 creatorid = clr->creatorid; 793 if (strlen(clr->ifname) && 794 (kif = pfi_kif_find(clr->ifname)) == NULL) 795 continue; 796 797 for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { 798 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 799 if (st->creatorid == creatorid && 800 ((kif && st->kif == kif) || !kif)) { 801 SET(st->state_flags, PFSTATE_NOSYNC); 802 pf_remove_state(st); 803 } 804 } 805 } 806 807 return (0); 808 } 809 810 int 811 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 812 { 813 struct pfsync_state *sp; 814 sa_family_t af1, af2; 815 int i; 816 817 for (i = 0; i < count; i++) { 818 sp = (struct pfsync_state *)(buf + len * i); 819 af1 = sp->key[0].af; 820 af2 = sp->key[1].af; 821 822 /* check for invalid values */ 823 if (sp->timeout >= PFTM_MAX || 824 sp->src.state > PF_TCPS_PROXY_DST || 825 sp->dst.state > PF_TCPS_PROXY_DST || 826 sp->direction > PF_OUT || 827 (((af1 || af2) && 828 ((af1 != AF_INET && af1 != AF_INET6) || 829 (af2 != AF_INET && af2 != AF_INET6))) || 830 (sp->af != AF_INET && sp->af != AF_INET6))) { 831 DPFPRINTF(LOG_NOTICE, 832 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 833 pfsyncstat_inc(pfsyncs_badval); 834 continue; 835 } 836 837 if (pfsync_state_import(sp, flags) == ENOMEM) { 838 /* drop out, but process the rest of the actions */ 839 break; 840 } 841 } 842 843 return (0); 844 } 845 846 int 847 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 848 { 849 struct pfsync_ins_ack *ia; 850 struct pf_state_cmp id_key; 851 struct pf_state *st; 852 int i; 853 854 for (i = 0; i < count; i++) { 855 ia = (struct pfsync_ins_ack *)(buf + len * i); 856 857 id_key.id = ia->id; 858 id_key.creatorid = ia->creatorid; 859 860 st = pf_find_state_byid(&id_key); 861 if (st == NULL) 862 continue; 863 864 if (ISSET(st->state_flags, PFSTATE_ACK)) 865 pfsync_deferred(st, 0); 866 } 867 868 return (0); 869 } 870 871 int 872 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 873 struct pfsync_state_peer *dst) 874 { 875 int sync = 0; 876 877 /* 878 * The state should never go backwards except 879 * for syn-proxy states. Neither should the 880 * sequence window slide backwards. 881 */ 882 if ((st->src.state > src->state && 883 (st->src.state < PF_TCPS_PROXY_SRC || 884 src->state >= PF_TCPS_PROXY_SRC)) || 885 886 (st->src.state == src->state && 887 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 888 sync++; 889 else 890 pf_state_peer_ntoh(src, &st->src); 891 892 if ((st->dst.state > dst->state) || 893 894 (st->dst.state >= TCPS_SYN_SENT && 895 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 896 sync++; 897 else 898 pf_state_peer_ntoh(dst, &st->dst); 899 900 return (sync); 901 } 902 903 int 904 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 905 { 906 struct pfsync_state *sp; 907 struct pf_state_cmp id_key; 908 struct pf_state *st; 909 int sync; 910 911 int i; 912 913 for (i = 0; i < count; i++) { 914 sp = (struct pfsync_state *)(buf + len * i); 915 916 /* check for invalid values */ 917 if (sp->timeout >= PFTM_MAX || 918 sp->src.state > PF_TCPS_PROXY_DST || 919 sp->dst.state > PF_TCPS_PROXY_DST) { 920 DPFPRINTF(LOG_NOTICE, 921 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 922 pfsyncstat_inc(pfsyncs_badval); 923 continue; 924 } 925 926 id_key.id = sp->id; 927 id_key.creatorid = sp->creatorid; 928 929 st = pf_find_state_byid(&id_key); 930 if (st == NULL) { 931 /* insert the update */ 932 if (pfsync_state_import(sp, flags)) 933 pfsyncstat_inc(pfsyncs_badstate); 934 continue; 935 } 936 937 if (ISSET(st->state_flags, PFSTATE_ACK)) 938 pfsync_deferred(st, 1); 939 940 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 941 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 942 else { 943 sync = 0; 944 945 /* 946 * Non-TCP protocol state machine always go 947 * forwards 948 */ 949 if (st->src.state > sp->src.state) 950 sync++; 951 else 952 pf_state_peer_ntoh(&sp->src, &st->src); 953 954 if (st->dst.state > sp->dst.state) 955 sync++; 956 else 957 pf_state_peer_ntoh(&sp->dst, &st->dst); 958 } 959 960 if (sync < 2) { 961 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 962 pf_state_peer_ntoh(&sp->dst, &st->dst); 963 st->expire = time_uptime; 964 st->timeout = sp->timeout; 965 } 966 st->pfsync_time = time_uptime; 967 968 if (sync) { 969 pfsyncstat_inc(pfsyncs_stale); 970 971 pfsync_update_state_locked(st); 972 schednetisr(NETISR_PFSYNC); 973 } 974 } 975 976 return (0); 977 } 978 979 int 980 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 981 { 982 struct pfsync_upd_c *up; 983 struct pf_state_cmp id_key; 984 struct pf_state *st; 985 986 int sync; 987 988 int i; 989 990 for (i = 0; i < count; i++) { 991 up = (struct pfsync_upd_c *)(buf + len * i); 992 993 /* check for invalid values */ 994 if (up->timeout >= PFTM_MAX || 995 up->src.state > PF_TCPS_PROXY_DST || 996 up->dst.state > PF_TCPS_PROXY_DST) { 997 DPFPRINTF(LOG_NOTICE, 998 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 999 pfsyncstat_inc(pfsyncs_badval); 1000 continue; 1001 } 1002 1003 id_key.id = up->id; 1004 id_key.creatorid = up->creatorid; 1005 1006 st = pf_find_state_byid(&id_key); 1007 if (st == NULL) { 1008 /* We don't have this state. Ask for it. */ 1009 pfsync_request_update(id_key.creatorid, id_key.id); 1010 continue; 1011 } 1012 1013 if (ISSET(st->state_flags, PFSTATE_ACK)) 1014 pfsync_deferred(st, 1); 1015 1016 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1017 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1018 else { 1019 sync = 0; 1020 /* 1021 * Non-TCP protocol state machine always go 1022 * forwards 1023 */ 1024 if (st->src.state > up->src.state) 1025 sync++; 1026 else 1027 pf_state_peer_ntoh(&up->src, &st->src); 1028 1029 if (st->dst.state > up->dst.state) 1030 sync++; 1031 else 1032 pf_state_peer_ntoh(&up->dst, &st->dst); 1033 } 1034 if (sync < 2) { 1035 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1036 pf_state_peer_ntoh(&up->dst, &st->dst); 1037 st->expire = time_uptime; 1038 st->timeout = up->timeout; 1039 } 1040 st->pfsync_time = time_uptime; 1041 1042 if (sync) { 1043 pfsyncstat_inc(pfsyncs_stale); 1044 1045 pfsync_update_state_locked(st); 1046 schednetisr(NETISR_PFSYNC); 1047 } 1048 } 1049 1050 return (0); 1051 } 1052 1053 int 1054 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1055 { 1056 struct pfsync_upd_req *ur; 1057 int i; 1058 1059 struct pf_state_cmp id_key; 1060 struct pf_state *st; 1061 1062 for (i = 0; i < count; i++) { 1063 ur = (struct pfsync_upd_req *)(buf + len * i); 1064 1065 id_key.id = ur->id; 1066 id_key.creatorid = ur->creatorid; 1067 1068 if (id_key.id == 0 && id_key.creatorid == 0) 1069 pfsync_bulk_start(); 1070 else { 1071 st = pf_find_state_byid(&id_key); 1072 if (st == NULL) { 1073 pfsyncstat_inc(pfsyncs_badstate); 1074 continue; 1075 } 1076 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1077 continue; 1078 1079 pfsync_update_state_req(st); 1080 } 1081 } 1082 1083 return (0); 1084 } 1085 1086 int 1087 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1088 { 1089 struct pfsync_state *sp; 1090 struct pf_state_cmp id_key; 1091 struct pf_state *st; 1092 int i; 1093 1094 for (i = 0; i < count; i++) { 1095 sp = (struct pfsync_state *)(buf + len * i); 1096 1097 id_key.id = sp->id; 1098 id_key.creatorid = sp->creatorid; 1099 1100 st = pf_find_state_byid(&id_key); 1101 if (st == NULL) { 1102 pfsyncstat_inc(pfsyncs_badstate); 1103 continue; 1104 } 1105 SET(st->state_flags, PFSTATE_NOSYNC); 1106 pf_remove_state(st); 1107 } 1108 1109 return (0); 1110 } 1111 1112 int 1113 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1114 { 1115 struct pfsync_del_c *sp; 1116 struct pf_state_cmp id_key; 1117 struct pf_state *st; 1118 int i; 1119 1120 for (i = 0; i < count; i++) { 1121 sp = (struct pfsync_del_c *)(buf + len * i); 1122 1123 id_key.id = sp->id; 1124 id_key.creatorid = sp->creatorid; 1125 1126 st = pf_find_state_byid(&id_key); 1127 if (st == NULL) { 1128 pfsyncstat_inc(pfsyncs_badstate); 1129 continue; 1130 } 1131 1132 SET(st->state_flags, PFSTATE_NOSYNC); 1133 pf_remove_state(st); 1134 } 1135 1136 return (0); 1137 } 1138 1139 int 1140 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1141 { 1142 struct pfsync_softc *sc = pfsyncif; 1143 struct pfsync_bus *bus; 1144 1145 /* If we're not waiting for a bulk update, who cares. */ 1146 if (sc->sc_ureq_sent == 0) 1147 return (0); 1148 1149 bus = (struct pfsync_bus *)buf; 1150 1151 switch (bus->status) { 1152 case PFSYNC_BUS_START: 1153 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1154 pf_pool_limits[PF_LIMIT_STATES].limit / 1155 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1156 sizeof(struct pfsync_state))); 1157 DPFPRINTF(LOG_INFO, "received bulk update start"); 1158 break; 1159 1160 case PFSYNC_BUS_END: 1161 if (time_uptime - ntohl(bus->endtime) >= 1162 sc->sc_ureq_sent) { 1163 /* that's it, we're happy */ 1164 sc->sc_ureq_sent = 0; 1165 sc->sc_bulk_tries = 0; 1166 timeout_del(&sc->sc_bulkfail_tmo); 1167 #if NCARP > 0 1168 if (!pfsync_sync_ok) 1169 carp_group_demote_adj(&sc->sc_if, -1, 1170 sc->sc_link_demoted ? 1171 "pfsync link state up" : 1172 "pfsync bulk done"); 1173 if (sc->sc_initial_bulk) { 1174 carp_group_demote_adj(&sc->sc_if, -32, 1175 "pfsync init"); 1176 sc->sc_initial_bulk = 0; 1177 } 1178 #endif 1179 pfsync_sync_ok = 1; 1180 sc->sc_link_demoted = 0; 1181 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1182 } else { 1183 DPFPRINTF(LOG_WARNING, "received invalid " 1184 "bulk update end: bad timestamp"); 1185 } 1186 break; 1187 } 1188 1189 return (0); 1190 } 1191 1192 int 1193 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1194 { 1195 #if defined(IPSEC) 1196 struct pfsync_tdb *tp; 1197 int i; 1198 1199 for (i = 0; i < count; i++) { 1200 tp = (struct pfsync_tdb *)(buf + len * i); 1201 pfsync_update_net_tdb(tp); 1202 } 1203 #endif 1204 1205 return (0); 1206 } 1207 1208 #if defined(IPSEC) 1209 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1210 void 1211 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1212 { 1213 struct tdb *tdb; 1214 1215 NET_ASSERT_LOCKED(); 1216 1217 /* check for invalid values */ 1218 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1219 (pt->dst.sa.sa_family != AF_INET && 1220 pt->dst.sa.sa_family != AF_INET6)) 1221 goto bad; 1222 1223 tdb = gettdb(ntohs(pt->rdomain), pt->spi, 1224 (union sockaddr_union *)&pt->dst, pt->sproto); 1225 if (tdb) { 1226 pt->rpl = betoh64(pt->rpl); 1227 pt->cur_bytes = betoh64(pt->cur_bytes); 1228 1229 /* Neither replay nor byte counter should ever decrease. */ 1230 if (pt->rpl < tdb->tdb_rpl || 1231 pt->cur_bytes < tdb->tdb_cur_bytes) { 1232 goto bad; 1233 } 1234 1235 tdb->tdb_rpl = pt->rpl; 1236 tdb->tdb_cur_bytes = pt->cur_bytes; 1237 } 1238 return; 1239 1240 bad: 1241 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1242 "invalid value"); 1243 pfsyncstat_inc(pfsyncs_badstate); 1244 return; 1245 } 1246 #endif 1247 1248 1249 int 1250 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1251 { 1252 if (len > 0 || count > 0) 1253 pfsyncstat_inc(pfsyncs_badact); 1254 1255 /* we're done. let the caller return */ 1256 return (1); 1257 } 1258 1259 int 1260 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1261 { 1262 pfsyncstat_inc(pfsyncs_badact); 1263 return (-1); 1264 } 1265 1266 int 1267 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1268 struct rtentry *rt) 1269 { 1270 m_freem(m); /* drop packet */ 1271 return (EAFNOSUPPORT); 1272 } 1273 1274 int 1275 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1276 { 1277 struct proc *p = curproc; 1278 struct pfsync_softc *sc = ifp->if_softc; 1279 struct ifreq *ifr = (struct ifreq *)data; 1280 struct ip_moptions *imo = &sc->sc_imo; 1281 struct pfsyncreq pfsyncr; 1282 struct ifnet *sifp; 1283 struct ip *ip; 1284 int error; 1285 1286 switch (cmd) { 1287 case SIOCSIFFLAGS: 1288 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1289 (ifp->if_flags & IFF_UP)) { 1290 ifp->if_flags |= IFF_RUNNING; 1291 1292 #if NCARP > 0 1293 sc->sc_initial_bulk = 1; 1294 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1295 #endif 1296 1297 pfsync_request_full_update(sc); 1298 } 1299 if ((ifp->if_flags & IFF_RUNNING) && 1300 (ifp->if_flags & IFF_UP) == 0) { 1301 ifp->if_flags &= ~IFF_RUNNING; 1302 1303 /* drop everything */ 1304 timeout_del(&sc->sc_tmo); 1305 pfsync_drop(sc); 1306 1307 pfsync_cancel_full_update(sc); 1308 } 1309 break; 1310 case SIOCSIFMTU: 1311 if (!sc->sc_sync_if || 1312 ifr->ifr_mtu <= PFSYNC_MINPKT || 1313 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1314 return (EINVAL); 1315 if (ifr->ifr_mtu < ifp->if_mtu) 1316 pfsync_sendout(); 1317 ifp->if_mtu = ifr->ifr_mtu; 1318 break; 1319 case SIOCGETPFSYNC: 1320 bzero(&pfsyncr, sizeof(pfsyncr)); 1321 if (sc->sc_sync_if) { 1322 strlcpy(pfsyncr.pfsyncr_syncdev, 1323 sc->sc_sync_if->if_xname, IFNAMSIZ); 1324 } 1325 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1326 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1327 pfsyncr.pfsyncr_defer = sc->sc_defer; 1328 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1329 1330 case SIOCSETPFSYNC: 1331 if ((error = suser(p)) != 0) 1332 return (error); 1333 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1334 return (error); 1335 1336 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1337 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1338 else 1339 sc->sc_sync_peer.s_addr = 1340 pfsyncr.pfsyncr_syncpeer.s_addr; 1341 1342 if (pfsyncr.pfsyncr_maxupdates > 255) 1343 return (EINVAL); 1344 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1345 1346 sc->sc_defer = pfsyncr.pfsyncr_defer; 1347 1348 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1349 if (sc->sc_sync_if) { 1350 hook_disestablish( 1351 sc->sc_sync_if->if_linkstatehooks, 1352 sc->sc_lhcookie); 1353 hook_disestablish( 1354 sc->sc_sync_if->if_detachhooks, 1355 sc->sc_dhcookie); 1356 } 1357 sc->sc_sync_if = NULL; 1358 if (imo->imo_num_memberships > 0) { 1359 in_delmulti(imo->imo_membership[ 1360 --imo->imo_num_memberships]); 1361 imo->imo_ifidx = 0; 1362 } 1363 break; 1364 } 1365 1366 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) 1367 return (EINVAL); 1368 1369 if (sifp->if_mtu < sc->sc_if.if_mtu || 1370 (sc->sc_sync_if != NULL && 1371 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1372 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1373 pfsync_sendout(); 1374 1375 if (sc->sc_sync_if) { 1376 hook_disestablish( 1377 sc->sc_sync_if->if_linkstatehooks, 1378 sc->sc_lhcookie); 1379 hook_disestablish( 1380 sc->sc_sync_if->if_detachhooks, 1381 sc->sc_dhcookie); 1382 } 1383 sc->sc_sync_if = sifp; 1384 1385 if (imo->imo_num_memberships > 0) { 1386 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1387 imo->imo_ifidx = 0; 1388 } 1389 1390 if (sc->sc_sync_if && 1391 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1392 struct in_addr addr; 1393 1394 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1395 sc->sc_sync_if = NULL; 1396 return (EADDRNOTAVAIL); 1397 } 1398 1399 addr.s_addr = INADDR_PFSYNC_GROUP; 1400 1401 if ((imo->imo_membership[0] = 1402 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1403 sc->sc_sync_if = NULL; 1404 return (ENOBUFS); 1405 } 1406 imo->imo_num_memberships++; 1407 imo->imo_ifidx = sc->sc_sync_if->if_index; 1408 imo->imo_ttl = PFSYNC_DFLTTL; 1409 imo->imo_loop = 0; 1410 } 1411 1412 ip = &sc->sc_template; 1413 bzero(ip, sizeof(*ip)); 1414 ip->ip_v = IPVERSION; 1415 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1416 ip->ip_tos = IPTOS_LOWDELAY; 1417 /* len and id are set later */ 1418 ip->ip_off = htons(IP_DF); 1419 ip->ip_ttl = PFSYNC_DFLTTL; 1420 ip->ip_p = IPPROTO_PFSYNC; 1421 ip->ip_src.s_addr = INADDR_ANY; 1422 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1423 1424 sc->sc_lhcookie = 1425 hook_establish(sc->sc_sync_if->if_linkstatehooks, 1, 1426 pfsync_syncdev_state, sc); 1427 sc->sc_dhcookie = hook_establish(sc->sc_sync_if->if_detachhooks, 1428 0, pfsync_ifdetach, sc); 1429 1430 pfsync_request_full_update(sc); 1431 1432 break; 1433 1434 default: 1435 return (ENOTTY); 1436 } 1437 1438 return (0); 1439 } 1440 1441 void 1442 pfsync_out_state(struct pf_state *st, void *buf) 1443 { 1444 struct pfsync_state *sp = buf; 1445 1446 pfsync_state_export(sp, st); 1447 } 1448 1449 void 1450 pfsync_out_iack(struct pf_state *st, void *buf) 1451 { 1452 struct pfsync_ins_ack *iack = buf; 1453 1454 iack->id = st->id; 1455 iack->creatorid = st->creatorid; 1456 } 1457 1458 void 1459 pfsync_out_upd_c(struct pf_state *st, void *buf) 1460 { 1461 struct pfsync_upd_c *up = buf; 1462 1463 bzero(up, sizeof(*up)); 1464 up->id = st->id; 1465 pf_state_peer_hton(&st->src, &up->src); 1466 pf_state_peer_hton(&st->dst, &up->dst); 1467 up->creatorid = st->creatorid; 1468 up->timeout = st->timeout; 1469 } 1470 1471 void 1472 pfsync_out_del(struct pf_state *st, void *buf) 1473 { 1474 struct pfsync_del_c *dp = buf; 1475 1476 dp->id = st->id; 1477 dp->creatorid = st->creatorid; 1478 1479 SET(st->state_flags, PFSTATE_NOSYNC); 1480 } 1481 1482 void 1483 pfsync_drop(struct pfsync_softc *sc) 1484 { 1485 struct pf_state *st; 1486 struct pfsync_upd_req_item *ur; 1487 struct tdb *t; 1488 int q; 1489 1490 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1491 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1492 continue; 1493 1494 while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { 1495 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 1496 #ifdef PFSYNC_DEBUG 1497 KASSERT(st->sync_state == q); 1498 #endif 1499 st->sync_state = PFSYNC_S_NONE; 1500 pf_state_unref(st); 1501 } 1502 } 1503 1504 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1505 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1506 pool_put(&sc->sc_pool, ur); 1507 } 1508 1509 sc->sc_plus = NULL; 1510 1511 while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { 1512 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 1513 CLR(t->tdb_flags, TDBF_PFSYNC); 1514 } 1515 1516 sc->sc_len = PFSYNC_MINPKT; 1517 } 1518 1519 #ifdef WITH_PF_LOCK 1520 void 1521 pfsync_send_dispatch(void *xmq) 1522 { 1523 struct mbuf_queue *mq = xmq; 1524 struct pfsync_softc *sc; 1525 struct mbuf *m; 1526 struct mbuf_list ml; 1527 int error; 1528 1529 mq_delist(mq, &ml); 1530 if (ml_empty(&ml)) 1531 return; 1532 1533 NET_RLOCK(); 1534 sc = pfsyncif; 1535 if (sc == NULL) { 1536 ml_purge(&ml); 1537 goto done; 1538 } 1539 1540 while ((m = ml_dequeue(&ml)) != NULL) { 1541 if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1542 &sc->sc_imo, NULL, 0)) == 0) 1543 pfsyncstat_inc(pfsyncs_opackets); 1544 else { 1545 DPFPRINTF(LOG_DEBUG, 1546 "ip_output() @ %s failed (%d)\n", __func__, error); 1547 pfsyncstat_inc(pfsyncs_oerrors); 1548 } 1549 } 1550 done: 1551 NET_RUNLOCK(); 1552 } 1553 1554 void 1555 pfsync_send_pkt(struct mbuf *m) 1556 { 1557 if (mq_enqueue(&pfsync_mq, m) != 0) { 1558 pfsyncstat_inc(pfsyncs_oerrors); 1559 DPFPRINTF(LOG_DEBUG, "mq_enqueue() @ %s failed, queue full\n", 1560 __func__); 1561 } else 1562 task_add(net_tq(0), &pfsync_task); 1563 } 1564 #endif /* WITH_PF_LOCK */ 1565 1566 void 1567 pfsync_sendout(void) 1568 { 1569 struct pfsync_softc *sc = pfsyncif; 1570 #if NBPFILTER > 0 1571 struct ifnet *ifp = &sc->sc_if; 1572 #endif 1573 struct mbuf *m; 1574 struct ip *ip; 1575 struct pfsync_header *ph; 1576 struct pfsync_subheader *subh; 1577 struct pf_state *st; 1578 struct pfsync_upd_req_item *ur; 1579 struct tdb *t; 1580 1581 int offset; 1582 int q, count = 0; 1583 1584 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1585 return; 1586 1587 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1588 #if NBPFILTER > 0 1589 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1590 #else 1591 sc->sc_sync_if == NULL) { 1592 #endif 1593 pfsync_drop(sc); 1594 return; 1595 } 1596 1597 MGETHDR(m, M_DONTWAIT, MT_DATA); 1598 if (m == NULL) { 1599 sc->sc_if.if_oerrors++; 1600 pfsyncstat_inc(pfsyncs_onomem); 1601 pfsync_drop(sc); 1602 return; 1603 } 1604 1605 if (max_linkhdr + sc->sc_len > MHLEN) { 1606 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1607 if (!ISSET(m->m_flags, M_EXT)) { 1608 m_free(m); 1609 sc->sc_if.if_oerrors++; 1610 pfsyncstat_inc(pfsyncs_onomem); 1611 pfsync_drop(sc); 1612 return; 1613 } 1614 } 1615 m->m_data += max_linkhdr; 1616 m->m_len = m->m_pkthdr.len = sc->sc_len; 1617 1618 /* build the ip header */ 1619 ip = mtod(m, struct ip *); 1620 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1621 offset = sizeof(*ip); 1622 1623 ip->ip_len = htons(m->m_pkthdr.len); 1624 ip->ip_id = htons(ip_randomid()); 1625 1626 /* build the pfsync header */ 1627 ph = (struct pfsync_header *)(m->m_data + offset); 1628 bzero(ph, sizeof(*ph)); 1629 offset += sizeof(*ph); 1630 1631 ph->version = PFSYNC_VERSION; 1632 ph->len = htons(sc->sc_len - sizeof(*ip)); 1633 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1634 1635 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1636 subh = (struct pfsync_subheader *)(m->m_data + offset); 1637 offset += sizeof(*subh); 1638 1639 count = 0; 1640 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1641 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1642 1643 bcopy(&ur->ur_msg, m->m_data + offset, 1644 sizeof(ur->ur_msg)); 1645 offset += sizeof(ur->ur_msg); 1646 1647 pool_put(&sc->sc_pool, ur); 1648 1649 count++; 1650 } 1651 1652 bzero(subh, sizeof(*subh)); 1653 subh->len = sizeof(ur->ur_msg) >> 2; 1654 subh->action = PFSYNC_ACT_UPD_REQ; 1655 subh->count = htons(count); 1656 } 1657 1658 /* has someone built a custom region for us to add? */ 1659 if (sc->sc_plus != NULL) { 1660 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1661 offset += sc->sc_pluslen; 1662 1663 sc->sc_plus = NULL; 1664 } 1665 1666 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1667 subh = (struct pfsync_subheader *)(m->m_data + offset); 1668 offset += sizeof(*subh); 1669 1670 count = 0; 1671 while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { 1672 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 1673 pfsync_out_tdb(t, m->m_data + offset); 1674 offset += sizeof(struct pfsync_tdb); 1675 CLR(t->tdb_flags, TDBF_PFSYNC); 1676 count++; 1677 } 1678 1679 bzero(subh, sizeof(*subh)); 1680 subh->action = PFSYNC_ACT_TDB; 1681 subh->len = sizeof(struct pfsync_tdb) >> 2; 1682 subh->count = htons(count); 1683 } 1684 1685 /* walk the queues */ 1686 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1687 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1688 continue; 1689 1690 subh = (struct pfsync_subheader *)(m->m_data + offset); 1691 offset += sizeof(*subh); 1692 1693 count = 0; 1694 while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { 1695 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 1696 st->sync_state = PFSYNC_S_NONE; 1697 #ifdef PFSYNC_DEBUG 1698 KASSERT(st->sync_state == q); 1699 #endif 1700 pfsync_qs[q].write(st, m->m_data + offset); 1701 offset += pfsync_qs[q].len; 1702 1703 pf_state_unref(st); 1704 count++; 1705 } 1706 1707 bzero(subh, sizeof(*subh)); 1708 subh->action = pfsync_qs[q].action; 1709 subh->len = pfsync_qs[q].len >> 2; 1710 subh->count = htons(count); 1711 } 1712 1713 /* we're done, let's put it on the wire */ 1714 #if NBPFILTER > 0 1715 if (ifp->if_bpf) { 1716 m->m_data += sizeof(*ip); 1717 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1718 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1719 m->m_data -= sizeof(*ip); 1720 m->m_len = m->m_pkthdr.len = sc->sc_len; 1721 } 1722 1723 if (sc->sc_sync_if == NULL) { 1724 sc->sc_len = PFSYNC_MINPKT; 1725 m_freem(m); 1726 return; 1727 } 1728 #endif 1729 1730 /* start again */ 1731 sc->sc_len = PFSYNC_MINPKT; 1732 1733 sc->sc_if.if_opackets++; 1734 sc->sc_if.if_obytes += m->m_pkthdr.len; 1735 1736 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1737 1738 #ifdef WITH_PF_LOCK 1739 pfsync_send_pkt(m); 1740 #else /* !WITH_PF_LOCK */ 1741 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0) 1742 pfsyncstat_inc(pfsyncs_opackets); 1743 else 1744 pfsyncstat_inc(pfsyncs_oerrors); 1745 #endif /* WITH_PF_LOCK */ 1746 } 1747 1748 void 1749 pfsync_insert_state(struct pf_state *st) 1750 { 1751 struct pfsync_softc *sc = pfsyncif; 1752 1753 NET_ASSERT_LOCKED(); 1754 1755 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1756 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1757 SET(st->state_flags, PFSTATE_NOSYNC); 1758 return; 1759 } 1760 1761 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1762 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1763 return; 1764 1765 #ifdef PFSYNC_DEBUG 1766 KASSERT(st->sync_state == PFSYNC_S_NONE); 1767 #endif 1768 1769 if (sc->sc_len == PFSYNC_MINPKT) 1770 timeout_add_sec(&sc->sc_tmo, 1); 1771 1772 pfsync_q_ins(st, PFSYNC_S_INS); 1773 1774 st->sync_updates = 0; 1775 } 1776 1777 int 1778 pfsync_defer(struct pf_state *st, struct mbuf *m) 1779 { 1780 struct pfsync_softc *sc = pfsyncif; 1781 struct pfsync_deferral *pd; 1782 1783 NET_ASSERT_LOCKED(); 1784 1785 if (!sc->sc_defer || 1786 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1787 m->m_flags & (M_BCAST|M_MCAST)) 1788 return (0); 1789 1790 if (sc->sc_deferred >= 128) { 1791 pd = TAILQ_FIRST(&sc->sc_deferrals); 1792 if (timeout_del(&pd->pd_tmo)) 1793 pfsync_undefer(pd, 0); 1794 } 1795 1796 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1797 if (pd == NULL) 1798 return (0); 1799 1800 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1801 SET(st->state_flags, PFSTATE_ACK); 1802 1803 pd->pd_st = pf_state_ref(st); 1804 pd->pd_m = m; 1805 1806 sc->sc_deferred++; 1807 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1808 1809 timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd); 1810 timeout_add_msec(&pd->pd_tmo, 20); 1811 1812 schednetisr(NETISR_PFSYNC); 1813 1814 return (1); 1815 } 1816 1817 void 1818 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1819 { 1820 struct pfsync_softc *sc = pfsyncif; 1821 struct pf_pdesc pdesc; 1822 1823 NET_ASSERT_LOCKED(); 1824 1825 if (sc == NULL) 1826 return; 1827 1828 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1829 sc->sc_deferred--; 1830 1831 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1832 if (drop) 1833 m_freem(pd->pd_m); 1834 else { 1835 if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) { 1836 if (pf_setup_pdesc(&pdesc, 1837 pd->pd_st->key[PF_SK_WIRE]->af, 1838 pd->pd_st->direction, pd->pd_st->rt_kif, 1839 pd->pd_m, NULL) != PF_PASS) { 1840 m_freem(pd->pd_m); 1841 goto out; 1842 } 1843 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1844 case AF_INET: 1845 pf_route(&pdesc, 1846 pd->pd_st->rule.ptr, pd->pd_st); 1847 break; 1848 #ifdef INET6 1849 case AF_INET6: 1850 pf_route6(&pdesc, 1851 pd->pd_st->rule.ptr, pd->pd_st); 1852 break; 1853 #endif /* INET6 */ 1854 default: 1855 unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af); 1856 } 1857 pd->pd_m = pdesc.m; 1858 } else { 1859 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1860 case AF_INET: 1861 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 1862 0); 1863 break; 1864 #ifdef INET6 1865 case AF_INET6: 1866 ip6_output(pd->pd_m, NULL, NULL, 0, 1867 NULL, NULL); 1868 break; 1869 #endif /* INET6 */ 1870 default: 1871 unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af); 1872 } 1873 } 1874 } 1875 out: 1876 pf_state_unref(pd->pd_st); 1877 pool_put(&sc->sc_pool, pd); 1878 } 1879 1880 void 1881 pfsync_defer_tmo(void *arg) 1882 { 1883 NET_LOCK(); 1884 pfsync_undefer(arg, 0); 1885 NET_UNLOCK(); 1886 } 1887 1888 void 1889 pfsync_deferred(struct pf_state *st, int drop) 1890 { 1891 struct pfsync_softc *sc = pfsyncif; 1892 struct pfsync_deferral *pd; 1893 1894 NET_ASSERT_LOCKED(); 1895 1896 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1897 if (pd->pd_st == st) { 1898 if (timeout_del(&pd->pd_tmo)) 1899 pfsync_undefer(pd, drop); 1900 return; 1901 } 1902 } 1903 1904 panic("pfsync_deferred: unable to find deferred state"); 1905 } 1906 1907 void 1908 pfsync_update_state_locked(struct pf_state *st) 1909 { 1910 struct pfsync_softc *sc = pfsyncif; 1911 int sync = 0; 1912 1913 NET_ASSERT_LOCKED(); 1914 PF_ASSERT_LOCKED(); 1915 1916 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1917 return; 1918 1919 if (ISSET(st->state_flags, PFSTATE_ACK)) 1920 pfsync_deferred(st, 0); 1921 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1922 if (st->sync_state != PFSYNC_S_NONE) 1923 pfsync_q_del(st); 1924 return; 1925 } 1926 1927 if (sc->sc_len == PFSYNC_MINPKT) 1928 timeout_add_sec(&sc->sc_tmo, 1); 1929 1930 switch (st->sync_state) { 1931 case PFSYNC_S_UPD_C: 1932 case PFSYNC_S_UPD: 1933 case PFSYNC_S_INS: 1934 /* we're already handling it */ 1935 1936 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1937 st->sync_updates++; 1938 if (st->sync_updates >= sc->sc_maxupdates) 1939 sync = 1; 1940 } 1941 break; 1942 1943 case PFSYNC_S_IACK: 1944 pfsync_q_del(st); 1945 case PFSYNC_S_NONE: 1946 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1947 st->sync_updates = 0; 1948 break; 1949 1950 default: 1951 panic("pfsync_update_state: unexpected sync state %d", 1952 st->sync_state); 1953 } 1954 1955 if (sync || (time_uptime - st->pfsync_time) < 2) 1956 schednetisr(NETISR_PFSYNC); 1957 } 1958 1959 void 1960 pfsync_update_state(struct pf_state *st, int *have_pf_lock) 1961 { 1962 struct pfsync_softc *sc = pfsyncif; 1963 1964 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1965 return; 1966 1967 if (*have_pf_lock == 0) { 1968 PF_LOCK(); 1969 *have_pf_lock = 1; 1970 } 1971 1972 pfsync_update_state_locked(st); 1973 } 1974 1975 void 1976 pfsync_cancel_full_update(struct pfsync_softc *sc) 1977 { 1978 if (timeout_pending(&sc->sc_bulkfail_tmo) || 1979 timeout_pending(&sc->sc_bulk_tmo)) { 1980 #if NCARP > 0 1981 if (!pfsync_sync_ok) 1982 carp_group_demote_adj(&sc->sc_if, -1, 1983 "pfsync bulk cancelled"); 1984 if (sc->sc_initial_bulk) { 1985 carp_group_demote_adj(&sc->sc_if, -32, 1986 "pfsync init"); 1987 sc->sc_initial_bulk = 0; 1988 } 1989 #endif 1990 pfsync_sync_ok = 1; 1991 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 1992 } 1993 timeout_del(&sc->sc_bulkfail_tmo); 1994 timeout_del(&sc->sc_bulk_tmo); 1995 sc->sc_bulk_next = NULL; 1996 sc->sc_bulk_last = NULL; 1997 sc->sc_ureq_sent = 0; 1998 sc->sc_bulk_tries = 0; 1999 } 2000 2001 void 2002 pfsync_request_full_update(struct pfsync_softc *sc) 2003 { 2004 if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 2005 /* Request a full state table update. */ 2006 sc->sc_ureq_sent = time_uptime; 2007 #if NCARP > 0 2008 if (!sc->sc_link_demoted && pfsync_sync_ok) 2009 carp_group_demote_adj(&sc->sc_if, 1, 2010 "pfsync bulk start"); 2011 #endif 2012 pfsync_sync_ok = 0; 2013 DPFPRINTF(LOG_INFO, "requesting bulk update"); 2014 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 2015 pf_pool_limits[PF_LIMIT_STATES].limit / 2016 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 2017 sizeof(struct pfsync_state))); 2018 pfsync_request_update(0, 0); 2019 } 2020 } 2021 2022 void 2023 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2024 { 2025 struct pfsync_softc *sc = pfsyncif; 2026 struct pfsync_upd_req_item *item; 2027 size_t nlen = sizeof(struct pfsync_upd_req); 2028 2029 /* 2030 * this code does nothing to prevent multiple update requests for the 2031 * same state being generated. 2032 */ 2033 2034 item = pool_get(&sc->sc_pool, PR_NOWAIT); 2035 if (item == NULL) { 2036 /* XXX stats */ 2037 return; 2038 } 2039 2040 item->ur_msg.id = id; 2041 item->ur_msg.creatorid = creatorid; 2042 2043 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 2044 nlen += sizeof(struct pfsync_subheader); 2045 2046 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2047 pfsync_sendout(); 2048 2049 nlen = sizeof(struct pfsync_subheader) + 2050 sizeof(struct pfsync_upd_req); 2051 } 2052 2053 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 2054 sc->sc_len += nlen; 2055 2056 schednetisr(NETISR_PFSYNC); 2057 } 2058 2059 void 2060 pfsync_update_state_req(struct pf_state *st) 2061 { 2062 struct pfsync_softc *sc = pfsyncif; 2063 2064 if (sc == NULL) 2065 panic("pfsync_update_state_req: nonexistant instance"); 2066 2067 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2068 if (st->sync_state != PFSYNC_S_NONE) 2069 pfsync_q_del(st); 2070 return; 2071 } 2072 2073 switch (st->sync_state) { 2074 case PFSYNC_S_UPD_C: 2075 case PFSYNC_S_IACK: 2076 pfsync_q_del(st); 2077 case PFSYNC_S_NONE: 2078 pfsync_q_ins(st, PFSYNC_S_UPD); 2079 schednetisr(NETISR_PFSYNC); 2080 return; 2081 2082 case PFSYNC_S_INS: 2083 case PFSYNC_S_UPD: 2084 case PFSYNC_S_DEL: 2085 /* we're already handling it */ 2086 return; 2087 2088 default: 2089 panic("pfsync_update_state_req: unexpected sync state %d", 2090 st->sync_state); 2091 } 2092 } 2093 2094 void 2095 pfsync_delete_state(struct pf_state *st) 2096 { 2097 struct pfsync_softc *sc = pfsyncif; 2098 2099 NET_ASSERT_LOCKED(); 2100 2101 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2102 return; 2103 2104 if (ISSET(st->state_flags, PFSTATE_ACK)) 2105 pfsync_deferred(st, 1); 2106 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2107 if (st->sync_state != PFSYNC_S_NONE) 2108 pfsync_q_del(st); 2109 return; 2110 } 2111 2112 if (sc->sc_len == PFSYNC_MINPKT) 2113 timeout_add_sec(&sc->sc_tmo, 1); 2114 2115 switch (st->sync_state) { 2116 case PFSYNC_S_INS: 2117 /* we never got to tell the world so just forget about it */ 2118 pfsync_q_del(st); 2119 return; 2120 2121 case PFSYNC_S_UPD_C: 2122 case PFSYNC_S_UPD: 2123 case PFSYNC_S_IACK: 2124 pfsync_q_del(st); 2125 /* 2126 * FALLTHROUGH to putting it on the del list 2127 * Note on refence count bookeeping: 2128 * pfsync_q_del() drops reference for queue 2129 * ownership. But the st entry survives, because 2130 * our caller still holds a reference. 2131 */ 2132 2133 case PFSYNC_S_NONE: 2134 /* 2135 * We either fall through here, or there is no reference to 2136 * st owned by pfsync queues at this point. 2137 * 2138 * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins() 2139 * grabs a reference for delete queue. 2140 */ 2141 pfsync_q_ins(st, PFSYNC_S_DEL); 2142 return; 2143 2144 default: 2145 panic("pfsync_delete_state: unexpected sync state %d", 2146 st->sync_state); 2147 } 2148 } 2149 2150 void 2151 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2152 { 2153 struct pfsync_softc *sc = pfsyncif; 2154 struct { 2155 struct pfsync_subheader subh; 2156 struct pfsync_clr clr; 2157 } __packed r; 2158 2159 NET_ASSERT_LOCKED(); 2160 2161 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2162 return; 2163 2164 bzero(&r, sizeof(r)); 2165 2166 r.subh.action = PFSYNC_ACT_CLR; 2167 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2168 r.subh.count = htons(1); 2169 2170 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2171 r.clr.creatorid = creatorid; 2172 2173 pfsync_send_plus(&r, sizeof(r)); 2174 } 2175 2176 void 2177 pfsync_q_ins(struct pf_state *st, int q) 2178 { 2179 struct pfsync_softc *sc = pfsyncif; 2180 size_t nlen = pfsync_qs[q].len; 2181 2182 KASSERT(st->sync_state == PFSYNC_S_NONE); 2183 2184 #if defined(PFSYNC_DEBUG) 2185 if (sc->sc_len < PFSYNC_MINPKT) 2186 panic("pfsync pkt len is too low %d", sc->sc_len); 2187 #endif 2188 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2189 nlen += sizeof(struct pfsync_subheader); 2190 2191 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2192 pfsync_sendout(); 2193 2194 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2195 } 2196 2197 sc->sc_len += nlen; 2198 pf_state_ref(st); 2199 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2200 st->sync_state = q; 2201 } 2202 2203 void 2204 pfsync_q_del(struct pf_state *st) 2205 { 2206 struct pfsync_softc *sc = pfsyncif; 2207 int q = st->sync_state; 2208 2209 KASSERT(st->sync_state != PFSYNC_S_NONE); 2210 2211 sc->sc_len -= pfsync_qs[q].len; 2212 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2213 st->sync_state = PFSYNC_S_NONE; 2214 pf_state_unref(st); 2215 2216 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2217 sc->sc_len -= sizeof(struct pfsync_subheader); 2218 } 2219 2220 void 2221 pfsync_update_tdb(struct tdb *t, int output) 2222 { 2223 struct pfsync_softc *sc = pfsyncif; 2224 size_t nlen = sizeof(struct pfsync_tdb); 2225 2226 if (sc == NULL) 2227 return; 2228 2229 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2230 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2231 nlen += sizeof(struct pfsync_subheader); 2232 2233 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2234 pfsync_sendout(); 2235 2236 nlen = sizeof(struct pfsync_subheader) + 2237 sizeof(struct pfsync_tdb); 2238 } 2239 2240 sc->sc_len += nlen; 2241 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2242 SET(t->tdb_flags, TDBF_PFSYNC); 2243 t->tdb_updates = 0; 2244 } else { 2245 if (++t->tdb_updates >= sc->sc_maxupdates) 2246 schednetisr(NETISR_PFSYNC); 2247 } 2248 2249 if (output) 2250 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2251 else 2252 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2253 } 2254 2255 void 2256 pfsync_delete_tdb(struct tdb *t) 2257 { 2258 struct pfsync_softc *sc = pfsyncif; 2259 2260 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2261 return; 2262 2263 sc->sc_len -= sizeof(struct pfsync_tdb); 2264 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2265 CLR(t->tdb_flags, TDBF_PFSYNC); 2266 2267 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2268 sc->sc_len -= sizeof(struct pfsync_subheader); 2269 } 2270 2271 void 2272 pfsync_out_tdb(struct tdb *t, void *buf) 2273 { 2274 struct pfsync_tdb *ut = buf; 2275 2276 bzero(ut, sizeof(*ut)); 2277 ut->spi = t->tdb_spi; 2278 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2279 /* 2280 * When a failover happens, the master's rpl is probably above 2281 * what we see here (we may be up to a second late), so 2282 * increase it a bit for outbound tdbs to manage most such 2283 * situations. 2284 * 2285 * For now, just add an offset that is likely to be larger 2286 * than the number of packets we can see in one second. The RFC 2287 * just says the next packet must have a higher seq value. 2288 * 2289 * XXX What is a good algorithm for this? We could use 2290 * a rate-determined increase, but to know it, we would have 2291 * to extend struct tdb. 2292 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2293 * will soon be replaced anyway. For now, just don't handle 2294 * this edge case. 2295 */ 2296 #define RPL_INCR 16384 2297 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2298 RPL_INCR : 0)); 2299 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2300 ut->sproto = t->tdb_sproto; 2301 ut->rdomain = htons(t->tdb_rdomain); 2302 } 2303 2304 void 2305 pfsync_bulk_start(void) 2306 { 2307 struct pfsync_softc *sc = pfsyncif; 2308 2309 DPFPRINTF(LOG_INFO, "received bulk update request"); 2310 2311 if (TAILQ_EMPTY(&state_list)) 2312 pfsync_bulk_status(PFSYNC_BUS_END); 2313 else { 2314 sc->sc_ureq_received = time_uptime; 2315 2316 if (sc->sc_bulk_next == NULL) 2317 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2318 sc->sc_bulk_last = sc->sc_bulk_next; 2319 2320 pfsync_bulk_status(PFSYNC_BUS_START); 2321 timeout_add(&sc->sc_bulk_tmo, 0); 2322 } 2323 } 2324 2325 void 2326 pfsync_bulk_update(void *arg) 2327 { 2328 struct pfsync_softc *sc; 2329 struct pf_state *st; 2330 int i = 0; 2331 2332 NET_LOCK(); 2333 sc = pfsyncif; 2334 if (sc == NULL) 2335 goto out; 2336 st = sc->sc_bulk_next; 2337 2338 for (;;) { 2339 if (st->sync_state == PFSYNC_S_NONE && 2340 st->timeout < PFTM_MAX && 2341 st->pfsync_time <= sc->sc_ureq_received) { 2342 pfsync_update_state_req(st); 2343 i++; 2344 } 2345 2346 st = TAILQ_NEXT(st, entry_list); 2347 if (st == NULL) 2348 st = TAILQ_FIRST(&state_list); 2349 2350 if (st == sc->sc_bulk_last) { 2351 /* we're done */ 2352 sc->sc_bulk_next = NULL; 2353 sc->sc_bulk_last = NULL; 2354 pfsync_bulk_status(PFSYNC_BUS_END); 2355 break; 2356 } 2357 2358 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2359 sizeof(struct pfsync_state)) { 2360 /* we've filled a packet */ 2361 sc->sc_bulk_next = st; 2362 timeout_add(&sc->sc_bulk_tmo, 1); 2363 break; 2364 } 2365 } 2366 out: 2367 NET_UNLOCK(); 2368 } 2369 2370 void 2371 pfsync_bulk_status(u_int8_t status) 2372 { 2373 struct { 2374 struct pfsync_subheader subh; 2375 struct pfsync_bus bus; 2376 } __packed r; 2377 2378 struct pfsync_softc *sc = pfsyncif; 2379 2380 bzero(&r, sizeof(r)); 2381 2382 r.subh.action = PFSYNC_ACT_BUS; 2383 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2384 r.subh.count = htons(1); 2385 2386 r.bus.creatorid = pf_status.hostid; 2387 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2388 r.bus.status = status; 2389 2390 pfsync_send_plus(&r, sizeof(r)); 2391 } 2392 2393 void 2394 pfsync_bulk_fail(void *arg) 2395 { 2396 struct pfsync_softc *sc; 2397 2398 NET_LOCK(); 2399 sc = pfsyncif; 2400 if (sc == NULL) 2401 goto out; 2402 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2403 /* Try again */ 2404 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2405 pfsync_request_update(0, 0); 2406 } else { 2407 /* Pretend like the transfer was ok */ 2408 sc->sc_ureq_sent = 0; 2409 sc->sc_bulk_tries = 0; 2410 #if NCARP > 0 2411 if (!pfsync_sync_ok) 2412 carp_group_demote_adj(&sc->sc_if, -1, 2413 sc->sc_link_demoted ? 2414 "pfsync link state up" : 2415 "pfsync bulk fail"); 2416 if (sc->sc_initial_bulk) { 2417 carp_group_demote_adj(&sc->sc_if, -32, 2418 "pfsync init"); 2419 sc->sc_initial_bulk = 0; 2420 } 2421 #endif 2422 pfsync_sync_ok = 1; 2423 sc->sc_link_demoted = 0; 2424 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2425 } 2426 out: 2427 NET_UNLOCK(); 2428 } 2429 2430 void 2431 pfsync_send_plus(void *plus, size_t pluslen) 2432 { 2433 struct pfsync_softc *sc = pfsyncif; 2434 2435 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2436 pfsync_sendout(); 2437 2438 sc->sc_plus = plus; 2439 sc->sc_len += (sc->sc_pluslen = pluslen); 2440 2441 pfsync_sendout(); 2442 } 2443 2444 int 2445 pfsync_up(void) 2446 { 2447 struct pfsync_softc *sc = pfsyncif; 2448 2449 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2450 return (0); 2451 2452 return (1); 2453 } 2454 2455 int 2456 pfsync_state_in_use(struct pf_state *st) 2457 { 2458 struct pfsync_softc *sc = pfsyncif; 2459 2460 if (sc == NULL) 2461 return (0); 2462 2463 if (st->sync_state != PFSYNC_S_NONE || 2464 st == sc->sc_bulk_next || 2465 st == sc->sc_bulk_last) 2466 return (1); 2467 2468 return (0); 2469 } 2470 2471 void 2472 pfsync_timeout(void *arg) 2473 { 2474 NET_LOCK(); 2475 pfsync_sendout(); 2476 NET_UNLOCK(); 2477 } 2478 2479 /* this is a softnet/netisr handler */ 2480 void 2481 pfsyncintr(void) 2482 { 2483 pfsync_sendout(); 2484 } 2485 2486 int 2487 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp) 2488 { 2489 struct pfsyncstats pfsyncstat; 2490 2491 CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t))); 2492 memset(&pfsyncstat, 0, sizeof pfsyncstat); 2493 counters_read(pfsynccounters, (uint64_t *)&pfsyncstat, 2494 pfsyncs_ncounters); 2495 return (sysctl_rdstruct(oldp, oldlenp, newp, 2496 &pfsyncstat, sizeof(pfsyncstat))); 2497 } 2498 2499 int 2500 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2501 size_t newlen) 2502 { 2503 /* All sysctl names at this level are terminal. */ 2504 if (namelen != 1) 2505 return (ENOTDIR); 2506 2507 switch (name[0]) { 2508 case PFSYNCCTL_STATS: 2509 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp)); 2510 default: 2511 return (ENOPROTOOPT); 2512 } 2513 } 2514