1 /* $OpenBSD: if_pfsync.c,v 1.207 2014/07/12 18:44:22 tedu Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/bpf.h> 62 #include <net/netisr.h> 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/tcp.h> 66 #include <netinet/tcp_seq.h> 67 68 #ifdef INET 69 #include <netinet/in_systm.h> 70 #include <netinet/in_var.h> 71 #include <netinet/ip.h> 72 #include <netinet/ip_var.h> 73 #endif 74 75 #ifdef INET6 76 #include <netinet6/in6_var.h> 77 #include <netinet/ip6.h> 78 #include <netinet/in_pcb.h> 79 #include <netinet/icmp6.h> 80 #include <netinet6/nd6.h> 81 #include <netinet6/ip6_divert.h> 82 #endif /* INET6 */ 83 84 #include "carp.h" 85 #if NCARP > 0 86 #include <netinet/ip_carp.h> 87 #endif 88 89 #define PF_DEBUGNAME "pfsync: " 90 #include <net/pfvar.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_MINPKT ( \ 97 sizeof(struct ip) + \ 98 sizeof(struct pfsync_header)) 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(caddr_t, int, int, int); 104 int pfsync_in_iack(caddr_t, int, int, int); 105 int pfsync_in_upd_c(caddr_t, int, int, int); 106 int pfsync_in_ureq(caddr_t, int, int, int); 107 int pfsync_in_del(caddr_t, int, int, int); 108 int pfsync_in_del_c(caddr_t, int, int, int); 109 int pfsync_in_bus(caddr_t, int, int, int); 110 int pfsync_in_tdb(caddr_t, int, int, int); 111 int pfsync_in_ins(caddr_t, int, int, int); 112 int pfsync_in_upd(caddr_t, int, int, int); 113 int pfsync_in_eof(caddr_t, int, int, int); 114 115 int pfsync_in_error(caddr_t, int, int, int); 116 117 struct { 118 int (*in)(caddr_t, int, int, int); 119 size_t len; 120 } pfsync_acts[] = { 121 /* PFSYNC_ACT_CLR */ 122 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 123 /* PFSYNC_ACT_OINS */ 124 { pfsync_in_error, 0 }, 125 /* PFSYNC_ACT_INS_ACK */ 126 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 127 /* PFSYNC_ACT_OUPD */ 128 { pfsync_in_error, 0 }, 129 /* PFSYNC_ACT_UPD_C */ 130 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 131 /* PFSYNC_ACT_UPD_REQ */ 132 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 133 /* PFSYNC_ACT_DEL */ 134 { pfsync_in_del, sizeof(struct pfsync_state) }, 135 /* PFSYNC_ACT_DEL_C */ 136 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 137 /* PFSYNC_ACT_INS_F */ 138 { pfsync_in_error, 0 }, 139 /* PFSYNC_ACT_DEL_F */ 140 { pfsync_in_error, 0 }, 141 /* PFSYNC_ACT_BUS */ 142 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 143 /* PFSYNC_ACT_OTDB */ 144 { pfsync_in_error, 0 }, 145 /* PFSYNC_ACT_EOF */ 146 { pfsync_in_error, 0 }, 147 /* PFSYNC_ACT_INS */ 148 { pfsync_in_ins, sizeof(struct pfsync_state) }, 149 /* PFSYNC_ACT_UPD */ 150 { pfsync_in_upd, sizeof(struct pfsync_state) }, 151 /* PFSYNC_ACT_TDB */ 152 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 153 }; 154 155 struct pfsync_q { 156 void (*write)(struct pf_state *, void *); 157 size_t len; 158 u_int8_t action; 159 }; 160 161 /* we have one of these for every PFSYNC_S_ */ 162 void pfsync_out_state(struct pf_state *, void *); 163 void pfsync_out_iack(struct pf_state *, void *); 164 void pfsync_out_upd_c(struct pf_state *, void *); 165 void pfsync_out_del(struct pf_state *, void *); 166 167 struct pfsync_q pfsync_qs[] = { 168 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 169 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 170 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 171 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 172 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 173 }; 174 175 void pfsync_q_ins(struct pf_state *, int); 176 void pfsync_q_del(struct pf_state *); 177 178 struct pfsync_upd_req_item { 179 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 180 struct pfsync_upd_req ur_msg; 181 }; 182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 183 184 struct pfsync_deferral { 185 TAILQ_ENTRY(pfsync_deferral) pd_entry; 186 struct pf_state *pd_st; 187 struct mbuf *pd_m; 188 struct timeout pd_tmo; 189 }; 190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 191 192 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 193 sizeof(struct pfsync_deferral)) 194 195 void pfsync_out_tdb(struct tdb *, void *); 196 197 struct pfsync_softc { 198 struct ifnet sc_if; 199 struct ifnet *sc_sync_if; 200 201 struct pool sc_pool; 202 203 struct ip_moptions sc_imo; 204 205 struct in_addr sc_sync_peer; 206 u_int8_t sc_maxupdates; 207 208 struct ip sc_template; 209 210 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 211 size_t sc_len; 212 213 struct pfsync_upd_reqs sc_upd_req_list; 214 215 int sc_initial_bulk; 216 int sc_link_demoted; 217 218 int sc_defer; 219 struct pfsync_deferrals sc_deferrals; 220 u_int sc_deferred; 221 222 void *sc_plus; 223 size_t sc_pluslen; 224 225 u_int32_t sc_ureq_sent; 226 int sc_bulk_tries; 227 struct timeout sc_bulkfail_tmo; 228 229 u_int32_t sc_ureq_received; 230 struct pf_state *sc_bulk_next; 231 struct pf_state *sc_bulk_last; 232 struct timeout sc_bulk_tmo; 233 234 TAILQ_HEAD(, tdb) sc_tdb_q; 235 236 void *sc_lhcookie; 237 238 struct timeout sc_tmo; 239 }; 240 241 struct pfsync_softc *pfsyncif = NULL; 242 struct pfsyncstats pfsyncstats; 243 244 void pfsyncattach(int); 245 int pfsync_clone_create(struct if_clone *, int); 246 int pfsync_clone_destroy(struct ifnet *); 247 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 248 struct pf_state_peer *); 249 void pfsync_update_net_tdb(struct pfsync_tdb *); 250 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 251 struct rtentry *); 252 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 253 void pfsyncstart(struct ifnet *); 254 void pfsync_syncdev_state(void *); 255 256 struct mbuf *pfsync_if_dequeue(struct ifnet *); 257 258 void pfsync_deferred(struct pf_state *, int); 259 void pfsync_undefer(struct pfsync_deferral *, int); 260 void pfsync_defer_tmo(void *); 261 262 void pfsync_cancel_full_update(struct pfsync_softc *); 263 void pfsync_request_full_update(struct pfsync_softc *); 264 void pfsync_request_update(u_int32_t, u_int64_t); 265 void pfsync_update_state_req(struct pf_state *); 266 267 void pfsync_drop(struct pfsync_softc *); 268 void pfsync_sendout(void); 269 void pfsync_send_plus(void *, size_t); 270 void pfsync_timeout(void *); 271 void pfsync_tdb_timeout(void *); 272 273 void pfsync_bulk_start(void); 274 void pfsync_bulk_status(u_int8_t); 275 void pfsync_bulk_update(void *); 276 void pfsync_bulk_fail(void *); 277 278 #define PFSYNC_MAX_BULKTRIES 12 279 int pfsync_sync_ok; 280 281 struct if_clone pfsync_cloner = 282 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 283 284 void 285 pfsyncattach(int npfsync) 286 { 287 if_clone_attach(&pfsync_cloner); 288 } 289 290 int 291 pfsync_clone_create(struct if_clone *ifc, int unit) 292 { 293 struct pfsync_softc *sc; 294 struct ifnet *ifp; 295 int q; 296 297 if (unit != 0) 298 return (EINVAL); 299 300 pfsync_sync_ok = 1; 301 302 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO); 303 304 for (q = 0; q < PFSYNC_S_COUNT; q++) 305 TAILQ_INIT(&sc->sc_qs[q]); 306 307 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL); 308 TAILQ_INIT(&sc->sc_upd_req_list); 309 TAILQ_INIT(&sc->sc_deferrals); 310 sc->sc_deferred = 0; 311 312 TAILQ_INIT(&sc->sc_tdb_q); 313 314 sc->sc_len = PFSYNC_MINPKT; 315 sc->sc_maxupdates = 128; 316 317 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 318 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 319 M_WAITOK | M_ZERO); 320 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 321 322 ifp = &sc->sc_if; 323 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 324 ifp->if_softc = sc; 325 ifp->if_ioctl = pfsyncioctl; 326 ifp->if_output = pfsyncoutput; 327 ifp->if_start = pfsyncstart; 328 ifp->if_type = IFT_PFSYNC; 329 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 330 ifp->if_hdrlen = sizeof(struct pfsync_header); 331 ifp->if_mtu = ETHERMTU; 332 timeout_set(&sc->sc_tmo, pfsync_timeout, sc); 333 timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); 334 timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); 335 336 if_attach(ifp); 337 if_alloc_sadl(ifp); 338 339 #if NCARP > 0 340 if_addgroup(ifp, "carp"); 341 #endif 342 343 #if NBPFILTER > 0 344 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 345 #endif 346 347 pfsyncif = sc; 348 349 return (0); 350 } 351 352 int 353 pfsync_clone_destroy(struct ifnet *ifp) 354 { 355 struct pfsync_softc *sc = ifp->if_softc; 356 struct pfsync_deferral *pd; 357 int s; 358 359 s = splsoftnet(); 360 timeout_del(&sc->sc_bulkfail_tmo); 361 timeout_del(&sc->sc_bulk_tmo); 362 timeout_del(&sc->sc_tmo); 363 #if NCARP > 0 364 if (!pfsync_sync_ok) 365 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 366 if (sc->sc_link_demoted) 367 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 368 #endif 369 if (sc->sc_sync_if) 370 hook_disestablish( 371 sc->sc_sync_if->if_linkstatehooks, 372 sc->sc_lhcookie); 373 if_detach(ifp); 374 375 pfsync_drop(sc); 376 377 while (sc->sc_deferred > 0) { 378 pd = TAILQ_FIRST(&sc->sc_deferrals); 379 timeout_del(&pd->pd_tmo); 380 pfsync_undefer(pd, 0); 381 } 382 383 pool_destroy(&sc->sc_pool); 384 free(sc->sc_imo.imo_membership, M_IPMOPTS, 0); 385 free(sc, M_DEVBUF, 0); 386 387 pfsyncif = NULL; 388 splx(s); 389 390 return (0); 391 } 392 393 struct mbuf * 394 pfsync_if_dequeue(struct ifnet *ifp) 395 { 396 struct mbuf *m; 397 398 IF_DEQUEUE(&ifp->if_snd, m); 399 400 return (m); 401 } 402 403 /* 404 * Start output on the pfsync interface. 405 */ 406 void 407 pfsyncstart(struct ifnet *ifp) 408 { 409 struct mbuf *m; 410 int s; 411 412 s = splnet(); 413 while ((m = pfsync_if_dequeue(ifp)) != NULL) { 414 IF_DROP(&ifp->if_snd); 415 m_freem(m); 416 } 417 splx(s); 418 } 419 420 void 421 pfsync_syncdev_state(void *arg) 422 { 423 struct pfsync_softc *sc = arg; 424 425 if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP)) 426 return; 427 428 if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) { 429 sc->sc_if.if_flags &= ~IFF_RUNNING; 430 if (!sc->sc_link_demoted) { 431 #if NCARP > 0 432 carp_group_demote_adj(&sc->sc_if, 1, 433 "pfsync link state down"); 434 #endif 435 sc->sc_link_demoted = 1; 436 } 437 438 /* drop everything */ 439 timeout_del(&sc->sc_tmo); 440 pfsync_drop(sc); 441 442 pfsync_cancel_full_update(sc); 443 } else if (sc->sc_link_demoted) { 444 sc->sc_if.if_flags |= IFF_RUNNING; 445 446 pfsync_request_full_update(sc); 447 } 448 } 449 450 int 451 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 452 struct pf_state_peer *d) 453 { 454 if (s->scrub.scrub_flag && d->scrub == NULL) { 455 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 456 if (d->scrub == NULL) 457 return (ENOMEM); 458 } 459 460 return (0); 461 } 462 463 void 464 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 465 { 466 pf_state_export(sp, st); 467 } 468 469 int 470 pfsync_state_import(struct pfsync_state *sp, int flags) 471 { 472 struct pf_state *st = NULL; 473 struct pf_state_key *skw = NULL, *sks = NULL; 474 struct pf_rule *r = NULL; 475 struct pfi_kif *kif; 476 int pool_flags; 477 int error; 478 479 if (sp->creatorid == 0) { 480 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 481 "invalid creator id: %08x", ntohl(sp->creatorid)); 482 return (EINVAL); 483 } 484 485 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 486 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 487 "unknown interface: %s", sp->ifname); 488 if (flags & PFSYNC_SI_IOCTL) 489 return (EINVAL); 490 return (0); /* skip this state */ 491 } 492 493 if (sp->af == 0) 494 return (0); /* skip this state */ 495 496 /* 497 * If the ruleset checksums match or the state is coming from the ioctl, 498 * it's safe to associate the state with the rule of that number. 499 */ 500 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 501 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 502 pf_main_ruleset.rules.active.rcount) 503 r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)]; 504 else 505 r = &pf_default_rule; 506 507 if ((r->max_states && r->states_cur >= r->max_states)) 508 goto cleanup; 509 510 if (flags & PFSYNC_SI_IOCTL) 511 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 512 else 513 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 514 515 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 516 goto cleanup; 517 518 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 519 goto cleanup; 520 521 if ((sp->key[PF_SK_WIRE].af && 522 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 523 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 524 &sp->key[PF_SK_STACK].addr[0], sp->af) || 525 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 526 &sp->key[PF_SK_STACK].addr[1], sp->af) || 527 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 528 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 529 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 530 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 531 goto cleanup; 532 } else 533 sks = skw; 534 535 /* allocate memory for scrub info */ 536 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 537 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 538 goto cleanup; 539 540 /* copy to state key(s) */ 541 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 542 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 543 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 544 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 545 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 546 skw->proto = sp->proto; 547 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 548 skw->af = sp->af; 549 if (sks != skw) { 550 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 551 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 552 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 553 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 554 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 555 if (!(sks->af = sp->key[PF_SK_STACK].af)) 556 sks->af = sp->af; 557 if (sks->af != skw->af) { 558 switch (sp->proto) { 559 case IPPROTO_ICMP: 560 sks->proto = IPPROTO_ICMPV6; 561 break; 562 case IPPROTO_ICMPV6: 563 sks->proto = IPPROTO_ICMP; 564 break; 565 default: 566 sks->proto = sp->proto; 567 } 568 } else 569 sks->proto = sp->proto; 570 } 571 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 572 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 573 574 /* copy to state */ 575 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 576 st->creation = time_uptime - ntohl(sp->creation); 577 st->expire = time_uptime; 578 if (ntohl(sp->expire)) { 579 u_int32_t timeout; 580 581 timeout = r->timeout[sp->timeout]; 582 if (!timeout) 583 timeout = pf_default_rule.timeout[sp->timeout]; 584 585 /* sp->expire may have been adaptively scaled by export. */ 586 st->expire -= timeout - ntohl(sp->expire); 587 } 588 589 st->direction = sp->direction; 590 st->log = sp->log; 591 st->timeout = sp->timeout; 592 st->state_flags = ntohs(sp->state_flags); 593 st->max_mss = ntohs(sp->max_mss); 594 st->min_ttl = sp->min_ttl; 595 st->set_tos = sp->set_tos; 596 597 st->id = sp->id; 598 st->creatorid = sp->creatorid; 599 pf_state_peer_ntoh(&sp->src, &st->src); 600 pf_state_peer_ntoh(&sp->dst, &st->dst); 601 602 st->rule.ptr = r; 603 st->anchor.ptr = NULL; 604 st->rt_kif = NULL; 605 606 st->pfsync_time = time_uptime; 607 st->sync_state = PFSYNC_S_NONE; 608 609 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 610 r->states_cur++; 611 r->states_tot++; 612 613 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 614 SET(st->state_flags, PFSTATE_NOSYNC); 615 616 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 617 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 618 r->states_cur--; 619 error = EEXIST; 620 goto cleanup_state; 621 } 622 623 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 624 CLR(st->state_flags, PFSTATE_NOSYNC); 625 if (ISSET(st->state_flags, PFSTATE_ACK)) { 626 pfsync_q_ins(st, PFSYNC_S_IACK); 627 schednetisr(NETISR_PFSYNC); 628 } 629 } 630 CLR(st->state_flags, PFSTATE_ACK); 631 632 return (0); 633 634 cleanup: 635 error = ENOMEM; 636 if (skw == sks) 637 sks = NULL; 638 if (skw != NULL) 639 pool_put(&pf_state_key_pl, skw); 640 if (sks != NULL) 641 pool_put(&pf_state_key_pl, sks); 642 643 cleanup_state: /* pf_state_insert frees the state keys */ 644 if (st) { 645 if (st->dst.scrub) 646 pool_put(&pf_state_scrub_pl, st->dst.scrub); 647 if (st->src.scrub) 648 pool_put(&pf_state_scrub_pl, st->src.scrub); 649 pool_put(&pf_state_pl, st); 650 } 651 return (error); 652 } 653 654 void 655 pfsync_input(struct mbuf *m, ...) 656 { 657 struct pfsync_softc *sc = pfsyncif; 658 struct ip *ip = mtod(m, struct ip *); 659 struct mbuf *mp; 660 struct pfsync_header *ph; 661 struct pfsync_subheader subh; 662 663 int offset, offp, len, count, mlen, flags = 0; 664 665 pfsyncstats.pfsyncs_ipackets++; 666 667 /* verify that we have a sync interface configured */ 668 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 669 sc->sc_sync_if == NULL || !pf_status.running) 670 goto done; 671 672 /* verify that the packet came in on the right interface */ 673 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 674 pfsyncstats.pfsyncs_badif++; 675 goto done; 676 } 677 678 sc->sc_if.if_ipackets++; 679 sc->sc_if.if_ibytes += m->m_pkthdr.len; 680 681 /* verify that the IP TTL is 255. */ 682 if (ip->ip_ttl != PFSYNC_DFLTTL) { 683 pfsyncstats.pfsyncs_badttl++; 684 goto done; 685 } 686 687 offset = ip->ip_hl << 2; 688 mp = m_pulldown(m, offset, sizeof(*ph), &offp); 689 if (mp == NULL) { 690 pfsyncstats.pfsyncs_hdrops++; 691 return; 692 } 693 ph = (struct pfsync_header *)(mp->m_data + offp); 694 695 /* verify the version */ 696 if (ph->version != PFSYNC_VERSION) { 697 pfsyncstats.pfsyncs_badver++; 698 goto done; 699 } 700 len = ntohs(ph->len) + offset; 701 if (m->m_pkthdr.len < len) { 702 pfsyncstats.pfsyncs_badlen++; 703 goto done; 704 } 705 706 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 707 flags = PFSYNC_SI_CKSUM; 708 709 offset += sizeof(*ph); 710 while (offset <= len - sizeof(subh)) { 711 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 712 offset += sizeof(subh); 713 714 mlen = subh.len << 2; 715 count = ntohs(subh.count); 716 717 if (subh.action >= PFSYNC_ACT_MAX || 718 subh.action >= nitems(pfsync_acts) || 719 mlen < pfsync_acts[subh.action].len) { 720 /* 721 * subheaders are always followed by at least one 722 * message, so if the peer is new 723 * enough to tell us how big its messages are then we 724 * know enough to skip them. 725 */ 726 if (count > 0 && mlen > 0) { 727 offset += count * mlen; 728 continue; 729 } 730 pfsyncstats.pfsyncs_badact++; 731 goto done; 732 } 733 734 mp = m_pulldown(m, offset, mlen * count, &offp); 735 if (mp == NULL) { 736 pfsyncstats.pfsyncs_badlen++; 737 return; 738 } 739 740 if (pfsync_acts[subh.action].in(mp->m_data + offp, 741 mlen, count, flags) != 0) 742 goto done; 743 744 offset += mlen * count; 745 } 746 747 done: 748 m_freem(m); 749 } 750 751 int 752 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 753 { 754 struct pfsync_clr *clr; 755 int i; 756 757 struct pf_state *st, *nexts; 758 struct pf_state_key *sk, *nextsk; 759 struct pf_state_item *si; 760 u_int32_t creatorid; 761 762 for (i = 0; i < count; i++) { 763 clr = (struct pfsync_clr *)buf + len * i; 764 creatorid = clr->creatorid; 765 766 if (clr->ifname[0] == '\0') { 767 for (st = RB_MIN(pf_state_tree_id, &tree_id); 768 st; st = nexts) { 769 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 770 if (st->creatorid == creatorid) { 771 SET(st->state_flags, PFSTATE_NOSYNC); 772 pf_unlink_state(st); 773 } 774 } 775 } else { 776 if (pfi_kif_get(clr->ifname) == NULL) 777 continue; 778 779 /* XXX correct? */ 780 for (sk = RB_MIN(pf_state_tree, &pf_statetbl); 781 sk; sk = nextsk) { 782 nextsk = RB_NEXT(pf_state_tree, 783 &pf_statetbl, sk); 784 TAILQ_FOREACH(si, &sk->states, entry) { 785 if (si->s->creatorid == creatorid) { 786 SET(si->s->state_flags, 787 PFSTATE_NOSYNC); 788 pf_unlink_state(si->s); 789 } 790 } 791 } 792 } 793 } 794 795 return (0); 796 } 797 798 int 799 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 800 { 801 struct pfsync_state *sp; 802 sa_family_t af1, af2; 803 int i; 804 805 for (i = 0; i < count; i++) { 806 sp = (struct pfsync_state *)(buf + len * i); 807 af1 = sp->key[0].af; 808 af2 = sp->key[1].af; 809 810 /* check for invalid values */ 811 if (sp->timeout >= PFTM_MAX || 812 sp->src.state > PF_TCPS_PROXY_DST || 813 sp->dst.state > PF_TCPS_PROXY_DST || 814 sp->direction > PF_OUT || 815 (((af1 || af2) && 816 ((af1 != AF_INET && af1 != AF_INET6) || 817 (af2 != AF_INET && af2 != AF_INET6))) || 818 (sp->af != AF_INET && sp->af != AF_INET6))) { 819 DPFPRINTF(LOG_NOTICE, 820 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 821 pfsyncstats.pfsyncs_badval++; 822 continue; 823 } 824 825 if (pfsync_state_import(sp, flags) == ENOMEM) { 826 /* drop out, but process the rest of the actions */ 827 break; 828 } 829 } 830 831 return (0); 832 } 833 834 int 835 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 836 { 837 struct pfsync_ins_ack *ia; 838 struct pf_state_cmp id_key; 839 struct pf_state *st; 840 int i; 841 842 for (i = 0; i < count; i++) { 843 ia = (struct pfsync_ins_ack *)(buf + len * i); 844 845 id_key.id = ia->id; 846 id_key.creatorid = ia->creatorid; 847 848 st = pf_find_state_byid(&id_key); 849 if (st == NULL) 850 continue; 851 852 if (ISSET(st->state_flags, PFSTATE_ACK)) 853 pfsync_deferred(st, 0); 854 } 855 856 return (0); 857 } 858 859 int 860 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 861 struct pfsync_state_peer *dst) 862 { 863 int sync = 0; 864 865 /* 866 * The state should never go backwards except 867 * for syn-proxy states. Neither should the 868 * sequence window slide backwards. 869 */ 870 if ((st->src.state > src->state && 871 (st->src.state < PF_TCPS_PROXY_SRC || 872 src->state >= PF_TCPS_PROXY_SRC)) || 873 874 (st->src.state == src->state && 875 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 876 sync++; 877 else 878 pf_state_peer_ntoh(src, &st->src); 879 880 if ((st->dst.state > dst->state) || 881 882 (st->dst.state >= TCPS_SYN_SENT && 883 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 884 sync++; 885 else 886 pf_state_peer_ntoh(dst, &st->dst); 887 888 return (sync); 889 } 890 891 int 892 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 893 { 894 struct pfsync_state *sp; 895 struct pf_state_cmp id_key; 896 struct pf_state *st; 897 int sync; 898 899 int i; 900 901 for (i = 0; i < count; i++) { 902 sp = (struct pfsync_state *)(buf + len * i); 903 904 /* check for invalid values */ 905 if (sp->timeout >= PFTM_MAX || 906 sp->src.state > PF_TCPS_PROXY_DST || 907 sp->dst.state > PF_TCPS_PROXY_DST) { 908 DPFPRINTF(LOG_NOTICE, 909 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 910 pfsyncstats.pfsyncs_badval++; 911 continue; 912 } 913 914 id_key.id = sp->id; 915 id_key.creatorid = sp->creatorid; 916 917 st = pf_find_state_byid(&id_key); 918 if (st == NULL) { 919 /* insert the update */ 920 if (pfsync_state_import(sp, flags)) 921 pfsyncstats.pfsyncs_badstate++; 922 continue; 923 } 924 925 if (ISSET(st->state_flags, PFSTATE_ACK)) 926 pfsync_deferred(st, 1); 927 928 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 929 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 930 else { 931 sync = 0; 932 933 /* 934 * Non-TCP protocol state machine always go 935 * forwards 936 */ 937 if (st->src.state > sp->src.state) 938 sync++; 939 else 940 pf_state_peer_ntoh(&sp->src, &st->src); 941 942 if (st->dst.state > sp->dst.state) 943 sync++; 944 else 945 pf_state_peer_ntoh(&sp->dst, &st->dst); 946 } 947 948 if (sync < 2) { 949 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 950 pf_state_peer_ntoh(&sp->dst, &st->dst); 951 st->expire = time_uptime; 952 st->timeout = sp->timeout; 953 } 954 st->pfsync_time = time_uptime; 955 956 if (sync) { 957 pfsyncstats.pfsyncs_stale++; 958 959 pfsync_update_state(st); 960 schednetisr(NETISR_PFSYNC); 961 } 962 } 963 964 return (0); 965 } 966 967 int 968 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 969 { 970 struct pfsync_upd_c *up; 971 struct pf_state_cmp id_key; 972 struct pf_state *st; 973 974 int sync; 975 976 int i; 977 978 for (i = 0; i < count; i++) { 979 up = (struct pfsync_upd_c *)(buf + len * i); 980 981 /* check for invalid values */ 982 if (up->timeout >= PFTM_MAX || 983 up->src.state > PF_TCPS_PROXY_DST || 984 up->dst.state > PF_TCPS_PROXY_DST) { 985 DPFPRINTF(LOG_NOTICE, 986 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 987 pfsyncstats.pfsyncs_badval++; 988 continue; 989 } 990 991 id_key.id = up->id; 992 id_key.creatorid = up->creatorid; 993 994 st = pf_find_state_byid(&id_key); 995 if (st == NULL) { 996 /* We don't have this state. Ask for it. */ 997 pfsync_request_update(id_key.creatorid, id_key.id); 998 continue; 999 } 1000 1001 if (ISSET(st->state_flags, PFSTATE_ACK)) 1002 pfsync_deferred(st, 1); 1003 1004 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1005 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1006 else { 1007 sync = 0; 1008 /* 1009 * Non-TCP protocol state machine always go 1010 * forwards 1011 */ 1012 if (st->src.state > up->src.state) 1013 sync++; 1014 else 1015 pf_state_peer_ntoh(&up->src, &st->src); 1016 1017 if (st->dst.state > up->dst.state) 1018 sync++; 1019 else 1020 pf_state_peer_ntoh(&up->dst, &st->dst); 1021 } 1022 if (sync < 2) { 1023 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1024 pf_state_peer_ntoh(&up->dst, &st->dst); 1025 st->expire = time_uptime; 1026 st->timeout = up->timeout; 1027 } 1028 st->pfsync_time = time_uptime; 1029 1030 if (sync) { 1031 pfsyncstats.pfsyncs_stale++; 1032 1033 pfsync_update_state(st); 1034 schednetisr(NETISR_PFSYNC); 1035 } 1036 } 1037 1038 return (0); 1039 } 1040 1041 int 1042 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1043 { 1044 struct pfsync_upd_req *ur; 1045 int i; 1046 1047 struct pf_state_cmp id_key; 1048 struct pf_state *st; 1049 1050 for (i = 0; i < count; i++) { 1051 ur = (struct pfsync_upd_req *)(buf + len * i); 1052 1053 id_key.id = ur->id; 1054 id_key.creatorid = ur->creatorid; 1055 1056 if (id_key.id == 0 && id_key.creatorid == 0) 1057 pfsync_bulk_start(); 1058 else { 1059 st = pf_find_state_byid(&id_key); 1060 if (st == NULL) { 1061 pfsyncstats.pfsyncs_badstate++; 1062 continue; 1063 } 1064 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1065 continue; 1066 1067 pfsync_update_state_req(st); 1068 } 1069 } 1070 1071 return (0); 1072 } 1073 1074 int 1075 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1076 { 1077 struct pfsync_state *sp; 1078 struct pf_state_cmp id_key; 1079 struct pf_state *st; 1080 int i; 1081 1082 for (i = 0; i < count; i++) { 1083 sp = (struct pfsync_state *)(buf + len * i); 1084 1085 id_key.id = sp->id; 1086 id_key.creatorid = sp->creatorid; 1087 1088 st = pf_find_state_byid(&id_key); 1089 if (st == NULL) { 1090 pfsyncstats.pfsyncs_badstate++; 1091 continue; 1092 } 1093 SET(st->state_flags, PFSTATE_NOSYNC); 1094 pf_unlink_state(st); 1095 } 1096 1097 return (0); 1098 } 1099 1100 int 1101 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1102 { 1103 struct pfsync_del_c *sp; 1104 struct pf_state_cmp id_key; 1105 struct pf_state *st; 1106 int i; 1107 1108 for (i = 0; i < count; i++) { 1109 sp = (struct pfsync_del_c *)(buf + len * i); 1110 1111 id_key.id = sp->id; 1112 id_key.creatorid = sp->creatorid; 1113 1114 st = pf_find_state_byid(&id_key); 1115 if (st == NULL) { 1116 pfsyncstats.pfsyncs_badstate++; 1117 continue; 1118 } 1119 1120 SET(st->state_flags, PFSTATE_NOSYNC); 1121 pf_unlink_state(st); 1122 } 1123 1124 return (0); 1125 } 1126 1127 int 1128 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1129 { 1130 struct pfsync_softc *sc = pfsyncif; 1131 struct pfsync_bus *bus; 1132 1133 /* If we're not waiting for a bulk update, who cares. */ 1134 if (sc->sc_ureq_sent == 0) 1135 return (0); 1136 1137 bus = (struct pfsync_bus *)buf; 1138 1139 switch (bus->status) { 1140 case PFSYNC_BUS_START: 1141 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1142 pf_pool_limits[PF_LIMIT_STATES].limit / 1143 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1144 sizeof(struct pfsync_state))); 1145 DPFPRINTF(LOG_INFO, "received bulk update start"); 1146 break; 1147 1148 case PFSYNC_BUS_END: 1149 if (time_uptime - ntohl(bus->endtime) >= 1150 sc->sc_ureq_sent) { 1151 /* that's it, we're happy */ 1152 sc->sc_ureq_sent = 0; 1153 sc->sc_bulk_tries = 0; 1154 timeout_del(&sc->sc_bulkfail_tmo); 1155 #if NCARP > 0 1156 if (!pfsync_sync_ok) 1157 carp_group_demote_adj(&sc->sc_if, -1, 1158 sc->sc_link_demoted ? 1159 "pfsync link state up" : 1160 "pfsync bulk done"); 1161 if (sc->sc_initial_bulk) { 1162 carp_group_demote_adj(&sc->sc_if, -32, 1163 "pfsync init"); 1164 sc->sc_initial_bulk = 0; 1165 } 1166 #endif 1167 pfsync_sync_ok = 1; 1168 sc->sc_link_demoted = 0; 1169 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1170 } else { 1171 DPFPRINTF(LOG_WARNING, "received invalid " 1172 "bulk update end: bad timestamp"); 1173 } 1174 break; 1175 } 1176 1177 return (0); 1178 } 1179 1180 int 1181 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1182 { 1183 #if defined(IPSEC) 1184 struct pfsync_tdb *tp; 1185 int i; 1186 1187 for (i = 0; i < count; i++) { 1188 tp = (struct pfsync_tdb *)(buf + len * i); 1189 pfsync_update_net_tdb(tp); 1190 } 1191 #endif 1192 1193 return (0); 1194 } 1195 1196 #if defined(IPSEC) 1197 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1198 void 1199 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1200 { 1201 struct tdb *tdb; 1202 int s; 1203 1204 /* check for invalid values */ 1205 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1206 (pt->dst.sa.sa_family != AF_INET && 1207 pt->dst.sa.sa_family != AF_INET6)) 1208 goto bad; 1209 1210 s = splsoftnet(); 1211 tdb = gettdb(ntohs(pt->rdomain), pt->spi, &pt->dst, pt->sproto); 1212 if (tdb) { 1213 pt->rpl = betoh64(pt->rpl); 1214 pt->cur_bytes = betoh64(pt->cur_bytes); 1215 1216 /* Neither replay nor byte counter should ever decrease. */ 1217 if (pt->rpl < tdb->tdb_rpl || 1218 pt->cur_bytes < tdb->tdb_cur_bytes) { 1219 splx(s); 1220 goto bad; 1221 } 1222 1223 tdb->tdb_rpl = pt->rpl; 1224 tdb->tdb_cur_bytes = pt->cur_bytes; 1225 } 1226 splx(s); 1227 return; 1228 1229 bad: 1230 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1231 "invalid value"); 1232 pfsyncstats.pfsyncs_badstate++; 1233 return; 1234 } 1235 #endif 1236 1237 1238 int 1239 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1240 { 1241 if (len > 0 || count > 0) 1242 pfsyncstats.pfsyncs_badact++; 1243 1244 /* we're done. let the caller return */ 1245 return (1); 1246 } 1247 1248 int 1249 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1250 { 1251 pfsyncstats.pfsyncs_badact++; 1252 return (-1); 1253 } 1254 1255 int 1256 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1257 struct rtentry *rt) 1258 { 1259 m_freem(m); 1260 return (0); 1261 } 1262 1263 /* ARGSUSED */ 1264 int 1265 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1266 { 1267 struct proc *p = curproc; 1268 struct pfsync_softc *sc = ifp->if_softc; 1269 struct ifreq *ifr = (struct ifreq *)data; 1270 struct ip_moptions *imo = &sc->sc_imo; 1271 struct pfsyncreq pfsyncr; 1272 struct ifnet *sifp; 1273 struct ip *ip; 1274 int s, error; 1275 1276 switch (cmd) { 1277 #if 0 1278 case SIOCSIFADDR: 1279 case SIOCAIFADDR: 1280 case SIOCSIFDSTADDR: 1281 #endif 1282 case SIOCSIFFLAGS: 1283 s = splnet(); 1284 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1285 (ifp->if_flags & IFF_UP)) { 1286 ifp->if_flags |= IFF_RUNNING; 1287 1288 #if NCARP > 0 1289 sc->sc_initial_bulk = 1; 1290 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1291 #endif 1292 1293 pfsync_request_full_update(sc); 1294 } 1295 if ((ifp->if_flags & IFF_RUNNING) && 1296 (ifp->if_flags & IFF_UP) == 0) { 1297 ifp->if_flags &= ~IFF_RUNNING; 1298 1299 /* drop everything */ 1300 timeout_del(&sc->sc_tmo); 1301 pfsync_drop(sc); 1302 1303 pfsync_cancel_full_update(sc); 1304 } 1305 splx(s); 1306 break; 1307 case SIOCSIFMTU: 1308 if (!sc->sc_sync_if || 1309 ifr->ifr_mtu <= PFSYNC_MINPKT || 1310 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1311 return (EINVAL); 1312 s = splnet(); 1313 if (ifr->ifr_mtu < ifp->if_mtu) 1314 pfsync_sendout(); 1315 ifp->if_mtu = ifr->ifr_mtu; 1316 splx(s); 1317 break; 1318 case SIOCGETPFSYNC: 1319 bzero(&pfsyncr, sizeof(pfsyncr)); 1320 if (sc->sc_sync_if) { 1321 strlcpy(pfsyncr.pfsyncr_syncdev, 1322 sc->sc_sync_if->if_xname, IFNAMSIZ); 1323 } 1324 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1325 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1326 pfsyncr.pfsyncr_defer = sc->sc_defer; 1327 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1328 1329 case SIOCSETPFSYNC: 1330 if ((error = suser(p, 0)) != 0) 1331 return (error); 1332 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1333 return (error); 1334 1335 s = splnet(); 1336 1337 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1338 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1339 else 1340 sc->sc_sync_peer.s_addr = 1341 pfsyncr.pfsyncr_syncpeer.s_addr; 1342 1343 if (pfsyncr.pfsyncr_maxupdates > 255) { 1344 splx(s); 1345 return (EINVAL); 1346 } 1347 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1348 1349 sc->sc_defer = pfsyncr.pfsyncr_defer; 1350 1351 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1352 if (sc->sc_sync_if) 1353 hook_disestablish( 1354 sc->sc_sync_if->if_linkstatehooks, 1355 sc->sc_lhcookie); 1356 sc->sc_sync_if = NULL; 1357 if (imo->imo_num_memberships > 0) { 1358 in_delmulti(imo->imo_membership[ 1359 --imo->imo_num_memberships]); 1360 imo->imo_multicast_ifp = NULL; 1361 } 1362 splx(s); 1363 break; 1364 } 1365 1366 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1367 splx(s); 1368 return (EINVAL); 1369 } 1370 1371 if (sifp->if_mtu < sc->sc_if.if_mtu || 1372 (sc->sc_sync_if != NULL && 1373 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1374 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1375 pfsync_sendout(); 1376 1377 if (sc->sc_sync_if) 1378 hook_disestablish( 1379 sc->sc_sync_if->if_linkstatehooks, 1380 sc->sc_lhcookie); 1381 sc->sc_sync_if = sifp; 1382 1383 if (imo->imo_num_memberships > 0) { 1384 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1385 imo->imo_multicast_ifp = NULL; 1386 } 1387 1388 if (sc->sc_sync_if && 1389 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1390 struct in_addr addr; 1391 1392 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1393 sc->sc_sync_if = NULL; 1394 splx(s); 1395 return (EADDRNOTAVAIL); 1396 } 1397 1398 addr.s_addr = INADDR_PFSYNC_GROUP; 1399 1400 if ((imo->imo_membership[0] = 1401 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1402 sc->sc_sync_if = NULL; 1403 splx(s); 1404 return (ENOBUFS); 1405 } 1406 imo->imo_num_memberships++; 1407 imo->imo_multicast_ifp = sc->sc_sync_if; 1408 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1409 imo->imo_multicast_loop = 0; 1410 } 1411 1412 ip = &sc->sc_template; 1413 bzero(ip, sizeof(*ip)); 1414 ip->ip_v = IPVERSION; 1415 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1416 ip->ip_tos = IPTOS_LOWDELAY; 1417 /* len and id are set later */ 1418 ip->ip_off = htons(IP_DF); 1419 ip->ip_ttl = PFSYNC_DFLTTL; 1420 ip->ip_p = IPPROTO_PFSYNC; 1421 ip->ip_src.s_addr = INADDR_ANY; 1422 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1423 1424 sc->sc_lhcookie = 1425 hook_establish(sc->sc_sync_if->if_linkstatehooks, 1, 1426 pfsync_syncdev_state, sc); 1427 1428 pfsync_request_full_update(sc); 1429 splx(s); 1430 1431 break; 1432 1433 default: 1434 return (ENOTTY); 1435 } 1436 1437 return (0); 1438 } 1439 1440 void 1441 pfsync_out_state(struct pf_state *st, void *buf) 1442 { 1443 struct pfsync_state *sp = buf; 1444 1445 pfsync_state_export(sp, st); 1446 } 1447 1448 void 1449 pfsync_out_iack(struct pf_state *st, void *buf) 1450 { 1451 struct pfsync_ins_ack *iack = buf; 1452 1453 iack->id = st->id; 1454 iack->creatorid = st->creatorid; 1455 } 1456 1457 void 1458 pfsync_out_upd_c(struct pf_state *st, void *buf) 1459 { 1460 struct pfsync_upd_c *up = buf; 1461 1462 bzero(up, sizeof(*up)); 1463 up->id = st->id; 1464 pf_state_peer_hton(&st->src, &up->src); 1465 pf_state_peer_hton(&st->dst, &up->dst); 1466 up->creatorid = st->creatorid; 1467 up->timeout = st->timeout; 1468 } 1469 1470 void 1471 pfsync_out_del(struct pf_state *st, void *buf) 1472 { 1473 struct pfsync_del_c *dp = buf; 1474 1475 dp->id = st->id; 1476 dp->creatorid = st->creatorid; 1477 1478 SET(st->state_flags, PFSTATE_NOSYNC); 1479 } 1480 1481 void 1482 pfsync_drop(struct pfsync_softc *sc) 1483 { 1484 struct pf_state *st; 1485 struct pfsync_upd_req_item *ur; 1486 struct tdb *t; 1487 int q; 1488 1489 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1490 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1491 continue; 1492 1493 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1494 #ifdef PFSYNC_DEBUG 1495 KASSERT(st->sync_state == q); 1496 #endif 1497 st->sync_state = PFSYNC_S_NONE; 1498 } 1499 TAILQ_INIT(&sc->sc_qs[q]); 1500 } 1501 1502 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1503 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1504 pool_put(&sc->sc_pool, ur); 1505 } 1506 1507 sc->sc_plus = NULL; 1508 1509 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1510 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) 1511 CLR(t->tdb_flags, TDBF_PFSYNC); 1512 1513 TAILQ_INIT(&sc->sc_tdb_q); 1514 } 1515 1516 sc->sc_len = PFSYNC_MINPKT; 1517 } 1518 1519 void 1520 pfsync_sendout(void) 1521 { 1522 struct pfsync_softc *sc = pfsyncif; 1523 #if NBPFILTER > 0 1524 struct ifnet *ifp = &sc->sc_if; 1525 #endif 1526 struct mbuf *m; 1527 struct ip *ip; 1528 struct pfsync_header *ph; 1529 struct pfsync_subheader *subh; 1530 struct pf_state *st; 1531 struct pfsync_upd_req_item *ur; 1532 struct tdb *t; 1533 1534 int offset; 1535 int q, count = 0; 1536 1537 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1538 return; 1539 1540 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1541 #if NBPFILTER > 0 1542 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1543 #else 1544 sc->sc_sync_if == NULL) { 1545 #endif 1546 pfsync_drop(sc); 1547 return; 1548 } 1549 1550 MGETHDR(m, M_DONTWAIT, MT_DATA); 1551 if (m == NULL) { 1552 sc->sc_if.if_oerrors++; 1553 pfsyncstats.pfsyncs_onomem++; 1554 pfsync_drop(sc); 1555 return; 1556 } 1557 1558 if (max_linkhdr + sc->sc_len > MHLEN) { 1559 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1560 if (!ISSET(m->m_flags, M_EXT)) { 1561 m_free(m); 1562 sc->sc_if.if_oerrors++; 1563 pfsyncstats.pfsyncs_onomem++; 1564 pfsync_drop(sc); 1565 return; 1566 } 1567 } 1568 m->m_data += max_linkhdr; 1569 m->m_len = m->m_pkthdr.len = sc->sc_len; 1570 1571 /* build the ip header */ 1572 ip = mtod(m, struct ip *); 1573 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1574 offset = sizeof(*ip); 1575 1576 ip->ip_len = htons(m->m_pkthdr.len); 1577 ip->ip_id = htons(ip_randomid()); 1578 1579 /* build the pfsync header */ 1580 ph = (struct pfsync_header *)(m->m_data + offset); 1581 bzero(ph, sizeof(*ph)); 1582 offset += sizeof(*ph); 1583 1584 ph->version = PFSYNC_VERSION; 1585 ph->len = htons(sc->sc_len - sizeof(*ip)); 1586 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1587 1588 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1589 subh = (struct pfsync_subheader *)(m->m_data + offset); 1590 offset += sizeof(*subh); 1591 1592 count = 0; 1593 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1594 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1595 1596 bcopy(&ur->ur_msg, m->m_data + offset, 1597 sizeof(ur->ur_msg)); 1598 offset += sizeof(ur->ur_msg); 1599 1600 pool_put(&sc->sc_pool, ur); 1601 1602 count++; 1603 } 1604 1605 bzero(subh, sizeof(*subh)); 1606 subh->len = sizeof(ur->ur_msg) >> 2; 1607 subh->action = PFSYNC_ACT_UPD_REQ; 1608 subh->count = htons(count); 1609 } 1610 1611 /* has someone built a custom region for us to add? */ 1612 if (sc->sc_plus != NULL) { 1613 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1614 offset += sc->sc_pluslen; 1615 1616 sc->sc_plus = NULL; 1617 } 1618 1619 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1620 subh = (struct pfsync_subheader *)(m->m_data + offset); 1621 offset += sizeof(*subh); 1622 1623 count = 0; 1624 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { 1625 pfsync_out_tdb(t, m->m_data + offset); 1626 offset += sizeof(struct pfsync_tdb); 1627 CLR(t->tdb_flags, TDBF_PFSYNC); 1628 1629 count++; 1630 } 1631 TAILQ_INIT(&sc->sc_tdb_q); 1632 1633 bzero(subh, sizeof(*subh)); 1634 subh->action = PFSYNC_ACT_TDB; 1635 subh->len = sizeof(struct pfsync_tdb) >> 2; 1636 subh->count = htons(count); 1637 } 1638 1639 /* walk the queues */ 1640 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1641 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1642 continue; 1643 1644 subh = (struct pfsync_subheader *)(m->m_data + offset); 1645 offset += sizeof(*subh); 1646 1647 count = 0; 1648 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1649 #ifdef PFSYNC_DEBUG 1650 KASSERT(st->sync_state == q); 1651 #endif 1652 pfsync_qs[q].write(st, m->m_data + offset); 1653 offset += pfsync_qs[q].len; 1654 1655 st->sync_state = PFSYNC_S_NONE; 1656 count++; 1657 } 1658 TAILQ_INIT(&sc->sc_qs[q]); 1659 1660 bzero(subh, sizeof(*subh)); 1661 subh->action = pfsync_qs[q].action; 1662 subh->len = pfsync_qs[q].len >> 2; 1663 subh->count = htons(count); 1664 } 1665 1666 /* we're done, let's put it on the wire */ 1667 #if NBPFILTER > 0 1668 if (ifp->if_bpf) { 1669 m->m_data += sizeof(*ip); 1670 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1671 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1672 m->m_data -= sizeof(*ip); 1673 m->m_len = m->m_pkthdr.len = sc->sc_len; 1674 } 1675 1676 if (sc->sc_sync_if == NULL) { 1677 sc->sc_len = PFSYNC_MINPKT; 1678 m_freem(m); 1679 return; 1680 } 1681 #endif 1682 1683 sc->sc_if.if_opackets++; 1684 sc->sc_if.if_obytes += m->m_pkthdr.len; 1685 1686 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1687 1688 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0) 1689 pfsyncstats.pfsyncs_opackets++; 1690 else 1691 pfsyncstats.pfsyncs_oerrors++; 1692 1693 /* start again */ 1694 sc->sc_len = PFSYNC_MINPKT; 1695 } 1696 1697 void 1698 pfsync_insert_state(struct pf_state *st) 1699 { 1700 struct pfsync_softc *sc = pfsyncif; 1701 1702 splsoftassert(IPL_SOFTNET); 1703 1704 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1705 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1706 SET(st->state_flags, PFSTATE_NOSYNC); 1707 return; 1708 } 1709 1710 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1711 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1712 return; 1713 1714 #ifdef PFSYNC_DEBUG 1715 KASSERT(st->sync_state == PFSYNC_S_NONE); 1716 #endif 1717 1718 if (sc->sc_len == PFSYNC_MINPKT) 1719 timeout_add_sec(&sc->sc_tmo, 1); 1720 1721 pfsync_q_ins(st, PFSYNC_S_INS); 1722 1723 st->sync_updates = 0; 1724 } 1725 1726 int 1727 pfsync_defer(struct pf_state *st, struct mbuf *m) 1728 { 1729 struct pfsync_softc *sc = pfsyncif; 1730 struct pfsync_deferral *pd; 1731 1732 splsoftassert(IPL_SOFTNET); 1733 1734 if (!sc->sc_defer || 1735 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1736 m->m_flags & (M_BCAST|M_MCAST)) 1737 return (0); 1738 1739 if (sc->sc_deferred >= 128) { 1740 pd = TAILQ_FIRST(&sc->sc_deferrals); 1741 if (timeout_del(&pd->pd_tmo)) 1742 pfsync_undefer(pd, 0); 1743 } 1744 1745 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1746 if (pd == NULL) 1747 return (0); 1748 1749 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1750 SET(st->state_flags, PFSTATE_ACK); 1751 1752 pd->pd_st = st; 1753 pd->pd_m = m; 1754 1755 sc->sc_deferred++; 1756 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1757 1758 timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); 1759 timeout_add_msec(&pd->pd_tmo, 20); 1760 1761 schednetisr(NETISR_PFSYNC); 1762 1763 return (1); 1764 } 1765 1766 void 1767 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1768 { 1769 struct pfsync_softc *sc = pfsyncif; 1770 1771 splsoftassert(IPL_SOFTNET); 1772 1773 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1774 sc->sc_deferred--; 1775 1776 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1777 if (drop) 1778 m_freem(pd->pd_m); 1779 else { 1780 if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) { 1781 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1782 #ifdef INET 1783 case AF_INET: 1784 pf_route(&pd->pd_m, pd->pd_st->rule.ptr, 1785 pd->pd_st->direction, 1786 pd->pd_st->rt_kif->pfik_ifp, pd->pd_st); 1787 break; 1788 #endif /* INET */ 1789 #ifdef INET6 1790 case AF_INET6: 1791 pf_route6(&pd->pd_m, pd->pd_st->rule.ptr, 1792 pd->pd_st->direction, 1793 pd->pd_st->rt_kif->pfik_ifp, pd->pd_st); 1794 break; 1795 #endif /* INET6 */ 1796 } 1797 } else { 1798 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1799 #ifdef INET 1800 case AF_INET: 1801 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 1802 0); 1803 break; 1804 #endif /* INET */ 1805 #ifdef INET6 1806 case AF_INET6: 1807 ip6_output(pd->pd_m, NULL, NULL, 0, 1808 NULL, NULL, NULL); 1809 break; 1810 #endif /* INET6 */ 1811 } 1812 } 1813 } 1814 1815 pool_put(&sc->sc_pool, pd); 1816 } 1817 1818 void 1819 pfsync_defer_tmo(void *arg) 1820 { 1821 int s; 1822 1823 s = splsoftnet(); 1824 pfsync_undefer(arg, 0); 1825 splx(s); 1826 } 1827 1828 void 1829 pfsync_deferred(struct pf_state *st, int drop) 1830 { 1831 struct pfsync_softc *sc = pfsyncif; 1832 struct pfsync_deferral *pd; 1833 1834 splsoftassert(IPL_SOFTNET); 1835 1836 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1837 if (pd->pd_st == st) { 1838 if (timeout_del(&pd->pd_tmo)) 1839 pfsync_undefer(pd, drop); 1840 return; 1841 } 1842 } 1843 1844 panic("pfsync_deferred: unable to find deferred state"); 1845 } 1846 1847 void 1848 pfsync_update_state(struct pf_state *st) 1849 { 1850 struct pfsync_softc *sc = pfsyncif; 1851 int sync = 0; 1852 1853 splsoftassert(IPL_SOFTNET); 1854 1855 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1856 return; 1857 1858 if (ISSET(st->state_flags, PFSTATE_ACK)) 1859 pfsync_deferred(st, 0); 1860 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1861 if (st->sync_state != PFSYNC_S_NONE) 1862 pfsync_q_del(st); 1863 return; 1864 } 1865 1866 if (sc->sc_len == PFSYNC_MINPKT) 1867 timeout_add_sec(&sc->sc_tmo, 1); 1868 1869 switch (st->sync_state) { 1870 case PFSYNC_S_UPD_C: 1871 case PFSYNC_S_UPD: 1872 case PFSYNC_S_INS: 1873 /* we're already handling it */ 1874 1875 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1876 st->sync_updates++; 1877 if (st->sync_updates >= sc->sc_maxupdates) 1878 sync = 1; 1879 } 1880 break; 1881 1882 case PFSYNC_S_IACK: 1883 pfsync_q_del(st); 1884 case PFSYNC_S_NONE: 1885 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1886 st->sync_updates = 0; 1887 break; 1888 1889 default: 1890 panic("pfsync_update_state: unexpected sync state %d", 1891 st->sync_state); 1892 } 1893 1894 if (sync || (time_uptime - st->pfsync_time) < 2) 1895 schednetisr(NETISR_PFSYNC); 1896 } 1897 1898 void 1899 pfsync_cancel_full_update(struct pfsync_softc *sc) 1900 { 1901 if (timeout_pending(&sc->sc_bulkfail_tmo) || 1902 timeout_pending(&sc->sc_bulk_tmo)) { 1903 #if NCARP > 0 1904 if (!pfsync_sync_ok) 1905 carp_group_demote_adj(&sc->sc_if, -1, 1906 "pfsync bulk cancelled"); 1907 if (sc->sc_initial_bulk) { 1908 carp_group_demote_adj(&sc->sc_if, -32, 1909 "pfsync init"); 1910 sc->sc_initial_bulk = 0; 1911 } 1912 #endif 1913 pfsync_sync_ok = 1; 1914 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 1915 } 1916 timeout_del(&sc->sc_bulkfail_tmo); 1917 timeout_del(&sc->sc_bulk_tmo); 1918 sc->sc_bulk_next = NULL; 1919 sc->sc_bulk_last = NULL; 1920 sc->sc_ureq_sent = 0; 1921 sc->sc_bulk_tries = 0; 1922 } 1923 1924 void 1925 pfsync_request_full_update(struct pfsync_softc *sc) 1926 { 1927 if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 1928 /* Request a full state table update. */ 1929 sc->sc_ureq_sent = time_uptime; 1930 #if NCARP > 0 1931 if (!sc->sc_link_demoted && pfsync_sync_ok) 1932 carp_group_demote_adj(&sc->sc_if, 1, 1933 "pfsync bulk start"); 1934 #endif 1935 pfsync_sync_ok = 0; 1936 DPFPRINTF(LOG_INFO, "requesting bulk update"); 1937 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1938 pf_pool_limits[PF_LIMIT_STATES].limit / 1939 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1940 sizeof(struct pfsync_state))); 1941 pfsync_request_update(0, 0); 1942 } 1943 } 1944 1945 void 1946 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1947 { 1948 struct pfsync_softc *sc = pfsyncif; 1949 struct pfsync_upd_req_item *item; 1950 size_t nlen = sizeof(struct pfsync_upd_req); 1951 1952 /* 1953 * this code does nothing to prevent multiple update requests for the 1954 * same state being generated. 1955 */ 1956 1957 item = pool_get(&sc->sc_pool, PR_NOWAIT); 1958 if (item == NULL) { 1959 /* XXX stats */ 1960 return; 1961 } 1962 1963 item->ur_msg.id = id; 1964 item->ur_msg.creatorid = creatorid; 1965 1966 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1967 nlen += sizeof(struct pfsync_subheader); 1968 1969 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 1970 pfsync_sendout(); 1971 1972 nlen = sizeof(struct pfsync_subheader) + 1973 sizeof(struct pfsync_upd_req); 1974 } 1975 1976 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1977 sc->sc_len += nlen; 1978 1979 schednetisr(NETISR_PFSYNC); 1980 } 1981 1982 void 1983 pfsync_update_state_req(struct pf_state *st) 1984 { 1985 struct pfsync_softc *sc = pfsyncif; 1986 1987 if (sc == NULL) 1988 panic("pfsync_update_state_req: nonexistant instance"); 1989 1990 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1991 if (st->sync_state != PFSYNC_S_NONE) 1992 pfsync_q_del(st); 1993 return; 1994 } 1995 1996 switch (st->sync_state) { 1997 case PFSYNC_S_UPD_C: 1998 case PFSYNC_S_IACK: 1999 pfsync_q_del(st); 2000 case PFSYNC_S_NONE: 2001 pfsync_q_ins(st, PFSYNC_S_UPD); 2002 schednetisr(NETISR_PFSYNC); 2003 return; 2004 2005 case PFSYNC_S_INS: 2006 case PFSYNC_S_UPD: 2007 case PFSYNC_S_DEL: 2008 /* we're already handling it */ 2009 return; 2010 2011 default: 2012 panic("pfsync_update_state_req: unexpected sync state %d", 2013 st->sync_state); 2014 } 2015 } 2016 2017 void 2018 pfsync_delete_state(struct pf_state *st) 2019 { 2020 struct pfsync_softc *sc = pfsyncif; 2021 2022 splsoftassert(IPL_SOFTNET); 2023 2024 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2025 return; 2026 2027 if (ISSET(st->state_flags, PFSTATE_ACK)) 2028 pfsync_deferred(st, 1); 2029 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2030 if (st->sync_state != PFSYNC_S_NONE) 2031 pfsync_q_del(st); 2032 return; 2033 } 2034 2035 if (sc->sc_len == PFSYNC_MINPKT) 2036 timeout_add_sec(&sc->sc_tmo, 1); 2037 2038 switch (st->sync_state) { 2039 case PFSYNC_S_INS: 2040 /* we never got to tell the world so just forget about it */ 2041 pfsync_q_del(st); 2042 return; 2043 2044 case PFSYNC_S_UPD_C: 2045 case PFSYNC_S_UPD: 2046 case PFSYNC_S_IACK: 2047 pfsync_q_del(st); 2048 /* FALLTHROUGH to putting it on the del list */ 2049 2050 case PFSYNC_S_NONE: 2051 pfsync_q_ins(st, PFSYNC_S_DEL); 2052 return; 2053 2054 default: 2055 panic("pfsync_delete_state: unexpected sync state %d", 2056 st->sync_state); 2057 } 2058 } 2059 2060 void 2061 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2062 { 2063 struct pfsync_softc *sc = pfsyncif; 2064 struct { 2065 struct pfsync_subheader subh; 2066 struct pfsync_clr clr; 2067 } __packed r; 2068 2069 splsoftassert(IPL_SOFTNET); 2070 2071 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2072 return; 2073 2074 bzero(&r, sizeof(r)); 2075 2076 r.subh.action = PFSYNC_ACT_CLR; 2077 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2078 r.subh.count = htons(1); 2079 2080 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2081 r.clr.creatorid = creatorid; 2082 2083 pfsync_send_plus(&r, sizeof(r)); 2084 } 2085 2086 void 2087 pfsync_q_ins(struct pf_state *st, int q) 2088 { 2089 struct pfsync_softc *sc = pfsyncif; 2090 size_t nlen = pfsync_qs[q].len; 2091 2092 KASSERT(st->sync_state == PFSYNC_S_NONE); 2093 2094 #if defined(PFSYNC_DEBUG) 2095 if (sc->sc_len < PFSYNC_MINPKT) 2096 panic("pfsync pkt len is too low %d", sc->sc_len); 2097 #endif 2098 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2099 nlen += sizeof(struct pfsync_subheader); 2100 2101 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2102 pfsync_sendout(); 2103 2104 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2105 } 2106 2107 sc->sc_len += nlen; 2108 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2109 st->sync_state = q; 2110 } 2111 2112 void 2113 pfsync_q_del(struct pf_state *st) 2114 { 2115 struct pfsync_softc *sc = pfsyncif; 2116 int q = st->sync_state; 2117 2118 KASSERT(st->sync_state != PFSYNC_S_NONE); 2119 2120 sc->sc_len -= pfsync_qs[q].len; 2121 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2122 st->sync_state = PFSYNC_S_NONE; 2123 2124 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2125 sc->sc_len -= sizeof(struct pfsync_subheader); 2126 } 2127 2128 void 2129 pfsync_update_tdb(struct tdb *t, int output) 2130 { 2131 struct pfsync_softc *sc = pfsyncif; 2132 size_t nlen = sizeof(struct pfsync_tdb); 2133 2134 if (sc == NULL) 2135 return; 2136 2137 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2138 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2139 nlen += sizeof(struct pfsync_subheader); 2140 2141 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2142 pfsync_sendout(); 2143 2144 nlen = sizeof(struct pfsync_subheader) + 2145 sizeof(struct pfsync_tdb); 2146 } 2147 2148 sc->sc_len += nlen; 2149 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2150 SET(t->tdb_flags, TDBF_PFSYNC); 2151 t->tdb_updates = 0; 2152 } else { 2153 if (++t->tdb_updates >= sc->sc_maxupdates) 2154 schednetisr(NETISR_PFSYNC); 2155 } 2156 2157 if (output) 2158 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2159 else 2160 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2161 } 2162 2163 void 2164 pfsync_delete_tdb(struct tdb *t) 2165 { 2166 struct pfsync_softc *sc = pfsyncif; 2167 2168 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2169 return; 2170 2171 sc->sc_len -= sizeof(struct pfsync_tdb); 2172 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2173 CLR(t->tdb_flags, TDBF_PFSYNC); 2174 2175 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2176 sc->sc_len -= sizeof(struct pfsync_subheader); 2177 } 2178 2179 void 2180 pfsync_out_tdb(struct tdb *t, void *buf) 2181 { 2182 struct pfsync_tdb *ut = buf; 2183 2184 bzero(ut, sizeof(*ut)); 2185 ut->spi = t->tdb_spi; 2186 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2187 /* 2188 * When a failover happens, the master's rpl is probably above 2189 * what we see here (we may be up to a second late), so 2190 * increase it a bit for outbound tdbs to manage most such 2191 * situations. 2192 * 2193 * For now, just add an offset that is likely to be larger 2194 * than the number of packets we can see in one second. The RFC 2195 * just says the next packet must have a higher seq value. 2196 * 2197 * XXX What is a good algorithm for this? We could use 2198 * a rate-determined increase, but to know it, we would have 2199 * to extend struct tdb. 2200 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2201 * will soon be replaced anyway. For now, just don't handle 2202 * this edge case. 2203 */ 2204 #define RPL_INCR 16384 2205 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2206 RPL_INCR : 0)); 2207 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2208 ut->sproto = t->tdb_sproto; 2209 ut->rdomain = htons(t->tdb_rdomain); 2210 } 2211 2212 void 2213 pfsync_bulk_start(void) 2214 { 2215 struct pfsync_softc *sc = pfsyncif; 2216 2217 DPFPRINTF(LOG_INFO, "received bulk update request"); 2218 2219 if (TAILQ_EMPTY(&state_list)) 2220 pfsync_bulk_status(PFSYNC_BUS_END); 2221 else { 2222 sc->sc_ureq_received = time_uptime; 2223 2224 if (sc->sc_bulk_next == NULL) 2225 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2226 sc->sc_bulk_last = sc->sc_bulk_next; 2227 2228 pfsync_bulk_status(PFSYNC_BUS_START); 2229 timeout_add(&sc->sc_bulk_tmo, 0); 2230 } 2231 } 2232 2233 void 2234 pfsync_bulk_update(void *arg) 2235 { 2236 struct pfsync_softc *sc = arg; 2237 struct pf_state *st; 2238 int i = 0; 2239 int s; 2240 2241 s = splsoftnet(); 2242 2243 st = sc->sc_bulk_next; 2244 2245 for (;;) { 2246 if (st->sync_state == PFSYNC_S_NONE && 2247 st->timeout < PFTM_MAX && 2248 st->pfsync_time <= sc->sc_ureq_received) { 2249 pfsync_update_state_req(st); 2250 i++; 2251 } 2252 2253 st = TAILQ_NEXT(st, entry_list); 2254 if (st == NULL) 2255 st = TAILQ_FIRST(&state_list); 2256 2257 if (st == sc->sc_bulk_last) { 2258 /* we're done */ 2259 sc->sc_bulk_next = NULL; 2260 sc->sc_bulk_last = NULL; 2261 pfsync_bulk_status(PFSYNC_BUS_END); 2262 break; 2263 } 2264 2265 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2266 sizeof(struct pfsync_state)) { 2267 /* we've filled a packet */ 2268 sc->sc_bulk_next = st; 2269 timeout_add(&sc->sc_bulk_tmo, 1); 2270 break; 2271 } 2272 } 2273 2274 splx(s); 2275 } 2276 2277 void 2278 pfsync_bulk_status(u_int8_t status) 2279 { 2280 struct { 2281 struct pfsync_subheader subh; 2282 struct pfsync_bus bus; 2283 } __packed r; 2284 2285 struct pfsync_softc *sc = pfsyncif; 2286 2287 bzero(&r, sizeof(r)); 2288 2289 r.subh.action = PFSYNC_ACT_BUS; 2290 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2291 r.subh.count = htons(1); 2292 2293 r.bus.creatorid = pf_status.hostid; 2294 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2295 r.bus.status = status; 2296 2297 pfsync_send_plus(&r, sizeof(r)); 2298 } 2299 2300 void 2301 pfsync_bulk_fail(void *arg) 2302 { 2303 struct pfsync_softc *sc = arg; 2304 int s; 2305 2306 s = splsoftnet(); 2307 2308 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2309 /* Try again */ 2310 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2311 pfsync_request_update(0, 0); 2312 } else { 2313 /* Pretend like the transfer was ok */ 2314 sc->sc_ureq_sent = 0; 2315 sc->sc_bulk_tries = 0; 2316 #if NCARP > 0 2317 if (!pfsync_sync_ok) 2318 carp_group_demote_adj(&sc->sc_if, -1, 2319 sc->sc_link_demoted ? 2320 "pfsync link state up" : 2321 "pfsync bulk fail"); 2322 if (sc->sc_initial_bulk) { 2323 carp_group_demote_adj(&sc->sc_if, -32, 2324 "pfsync init"); 2325 sc->sc_initial_bulk = 0; 2326 } 2327 #endif 2328 pfsync_sync_ok = 1; 2329 sc->sc_link_demoted = 0; 2330 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2331 } 2332 2333 splx(s); 2334 } 2335 2336 void 2337 pfsync_send_plus(void *plus, size_t pluslen) 2338 { 2339 struct pfsync_softc *sc = pfsyncif; 2340 2341 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2342 pfsync_sendout(); 2343 2344 sc->sc_plus = plus; 2345 sc->sc_len += (sc->sc_pluslen = pluslen); 2346 2347 pfsync_sendout(); 2348 } 2349 2350 int 2351 pfsync_up(void) 2352 { 2353 struct pfsync_softc *sc = pfsyncif; 2354 2355 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2356 return (0); 2357 2358 return (1); 2359 } 2360 2361 int 2362 pfsync_state_in_use(struct pf_state *st) 2363 { 2364 struct pfsync_softc *sc = pfsyncif; 2365 2366 if (sc == NULL) 2367 return (0); 2368 2369 if (st->sync_state != PFSYNC_S_NONE || 2370 st == sc->sc_bulk_next || 2371 st == sc->sc_bulk_last) 2372 return (1); 2373 2374 return (0); 2375 } 2376 2377 void 2378 pfsync_timeout(void *arg) 2379 { 2380 int s; 2381 2382 s = splsoftnet(); 2383 pfsync_sendout(); 2384 splx(s); 2385 } 2386 2387 /* this is a softnet/netisr handler */ 2388 void 2389 pfsyncintr(void) 2390 { 2391 pfsync_sendout(); 2392 } 2393 2394 int 2395 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2396 size_t newlen) 2397 { 2398 /* All sysctl names at this level are terminal. */ 2399 if (namelen != 1) 2400 return (ENOTDIR); 2401 2402 switch (name[0]) { 2403 case PFSYNCCTL_STATS: 2404 if (newp != NULL) 2405 return (EPERM); 2406 return (sysctl_struct(oldp, oldlenp, newp, newlen, 2407 &pfsyncstats, sizeof(pfsyncstats))); 2408 default: 2409 return (ENOPROTOOPT); 2410 } 2411 } 2412