1 /* $OpenBSD: if_pfsync.c,v 1.188 2012/06/30 00:16:15 mikeb Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/systm.h> 48 #include <sys/time.h> 49 #include <sys/malloc.h> 50 #include <sys/mbuf.h> 51 #include <sys/socket.h> 52 #include <sys/ioctl.h> 53 #include <sys/timeout.h> 54 #include <sys/kernel.h> 55 #include <sys/sysctl.h> 56 #include <sys/pool.h> 57 #include <sys/syslog.h> 58 59 #include <net/if.h> 60 #include <net/if_types.h> 61 #include <net/route.h> 62 #include <net/bpf.h> 63 #include <net/netisr.h> 64 #include <netinet/in.h> 65 #include <netinet/if_ether.h> 66 #include <netinet/tcp.h> 67 #include <netinet/tcp_seq.h> 68 69 #ifdef INET 70 #include <netinet/in_systm.h> 71 #include <netinet/in_var.h> 72 #include <netinet/ip.h> 73 #include <netinet/ip_var.h> 74 #endif 75 76 #ifdef INET6 77 #include <netinet/ip6.h> 78 #include <netinet/in_pcb.h> 79 #include <netinet/icmp6.h> 80 #include <netinet6/nd6.h> 81 #include <netinet6/ip6_divert.h> 82 #endif /* INET6 */ 83 84 #include "carp.h" 85 #if NCARP > 0 86 #include <netinet/ip_carp.h> 87 #endif 88 89 #define PF_DEBUGNAME "pfsync: " 90 #include <net/pfvar.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_MINPKT ( \ 97 sizeof(struct ip) + \ 98 sizeof(struct pfsync_header)) 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(caddr_t, int, int, int); 104 int pfsync_in_iack(caddr_t, int, int, int); 105 int pfsync_in_upd_c(caddr_t, int, int, int); 106 int pfsync_in_ureq(caddr_t, int, int, int); 107 int pfsync_in_del(caddr_t, int, int, int); 108 int pfsync_in_del_c(caddr_t, int, int, int); 109 int pfsync_in_bus(caddr_t, int, int, int); 110 int pfsync_in_tdb(caddr_t, int, int, int); 111 int pfsync_in_ins(caddr_t, int, int, int); 112 int pfsync_in_upd(caddr_t, int, int, int); 113 int pfsync_in_eof(caddr_t, int, int, int); 114 115 int pfsync_in_error(caddr_t, int, int, int); 116 117 struct { 118 int (*in)(caddr_t, int, int, int); 119 size_t len; 120 } pfsync_acts[] = { 121 /* PFSYNC_ACT_CLR */ 122 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 123 /* PFSYNC_ACT_OINS */ 124 { pfsync_in_error, 0 }, 125 /* PFSYNC_ACT_INS_ACK */ 126 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 127 /* PFSYNC_ACT_OUPD */ 128 { pfsync_in_error, 0 }, 129 /* PFSYNC_ACT_UPD_C */ 130 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 131 /* PFSYNC_ACT_UPD_REQ */ 132 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 133 /* PFSYNC_ACT_DEL */ 134 { pfsync_in_del, sizeof(struct pfsync_state) }, 135 /* PFSYNC_ACT_DEL_C */ 136 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 137 /* PFSYNC_ACT_INS_F */ 138 { pfsync_in_error, 0 }, 139 /* PFSYNC_ACT_DEL_F */ 140 { pfsync_in_error, 0 }, 141 /* PFSYNC_ACT_BUS */ 142 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 143 /* PFSYNC_ACT_OTDB */ 144 { pfsync_in_error, 0 }, 145 /* PFSYNC_ACT_EOF */ 146 { pfsync_in_error, 0 }, 147 /* PFSYNC_ACT_INS */ 148 { pfsync_in_ins, sizeof(struct pfsync_state) }, 149 /* PFSYNC_ACT_UPD */ 150 { pfsync_in_upd, sizeof(struct pfsync_state) }, 151 /* PFSYNC_ACT_TDB */ 152 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 153 }; 154 155 struct pfsync_q { 156 void (*write)(struct pf_state *, void *); 157 size_t len; 158 u_int8_t action; 159 }; 160 161 /* we have one of these for every PFSYNC_S_ */ 162 void pfsync_out_state(struct pf_state *, void *); 163 void pfsync_out_iack(struct pf_state *, void *); 164 void pfsync_out_upd_c(struct pf_state *, void *); 165 void pfsync_out_del(struct pf_state *, void *); 166 167 struct pfsync_q pfsync_qs[] = { 168 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 169 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 170 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 171 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 172 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 173 }; 174 175 void pfsync_q_ins(struct pf_state *, int); 176 void pfsync_q_del(struct pf_state *); 177 178 struct pfsync_upd_req_item { 179 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 180 struct pfsync_upd_req ur_msg; 181 }; 182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 183 184 struct pfsync_deferral { 185 TAILQ_ENTRY(pfsync_deferral) pd_entry; 186 struct pf_state *pd_st; 187 struct mbuf *pd_m; 188 struct timeout pd_tmo; 189 }; 190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 191 192 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 193 sizeof(struct pfsync_deferral)) 194 195 void pfsync_out_tdb(struct tdb *, void *); 196 197 struct pfsync_softc { 198 struct ifnet sc_if; 199 struct ifnet *sc_sync_if; 200 201 struct pool sc_pool; 202 203 struct ip_moptions sc_imo; 204 205 struct in_addr sc_sync_peer; 206 u_int8_t sc_maxupdates; 207 208 struct ip sc_template; 209 210 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 211 size_t sc_len; 212 213 struct pfsync_upd_reqs sc_upd_req_list; 214 215 int sc_initial_bulk; 216 int sc_link_demoted; 217 218 int sc_defer; 219 struct pfsync_deferrals sc_deferrals; 220 u_int sc_deferred; 221 222 void *sc_plus; 223 size_t sc_pluslen; 224 225 u_int32_t sc_ureq_sent; 226 int sc_bulk_tries; 227 struct timeout sc_bulkfail_tmo; 228 229 u_int32_t sc_ureq_received; 230 struct pf_state *sc_bulk_next; 231 struct pf_state *sc_bulk_last; 232 struct timeout sc_bulk_tmo; 233 234 TAILQ_HEAD(, tdb) sc_tdb_q; 235 236 void *sc_lhcookie; 237 238 struct timeout sc_tmo; 239 }; 240 241 struct pfsync_softc *pfsyncif = NULL; 242 struct pfsyncstats pfsyncstats; 243 244 void pfsyncattach(int); 245 int pfsync_clone_create(struct if_clone *, int); 246 int pfsync_clone_destroy(struct ifnet *); 247 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 248 struct pf_state_peer *); 249 void pfsync_update_net_tdb(struct pfsync_tdb *); 250 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 251 struct rtentry *); 252 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 253 void pfsyncstart(struct ifnet *); 254 void pfsync_syncdev_state(void *); 255 256 struct mbuf *pfsync_if_dequeue(struct ifnet *); 257 258 void pfsync_deferred(struct pf_state *, int); 259 void pfsync_undefer(struct pfsync_deferral *, int); 260 void pfsync_defer_tmo(void *); 261 262 void pfsync_cancel_full_update(struct pfsync_softc *); 263 void pfsync_request_full_update(struct pfsync_softc *); 264 void pfsync_request_update(u_int32_t, u_int64_t); 265 void pfsync_update_state_req(struct pf_state *); 266 267 void pfsync_drop(struct pfsync_softc *); 268 void pfsync_sendout(void); 269 void pfsync_send_plus(void *, size_t); 270 void pfsync_timeout(void *); 271 void pfsync_tdb_timeout(void *); 272 273 void pfsync_bulk_start(void); 274 void pfsync_bulk_status(u_int8_t); 275 void pfsync_bulk_update(void *); 276 void pfsync_bulk_fail(void *); 277 278 #define PFSYNC_MAX_BULKTRIES 12 279 int pfsync_sync_ok; 280 281 struct if_clone pfsync_cloner = 282 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 283 284 void 285 pfsyncattach(int npfsync) 286 { 287 if_clone_attach(&pfsync_cloner); 288 } 289 290 int 291 pfsync_clone_create(struct if_clone *ifc, int unit) 292 { 293 struct pfsync_softc *sc; 294 struct ifnet *ifp; 295 int q; 296 297 if (unit != 0) 298 return (EINVAL); 299 300 pfsync_sync_ok = 1; 301 302 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO); 303 304 for (q = 0; q < PFSYNC_S_COUNT; q++) 305 TAILQ_INIT(&sc->sc_qs[q]); 306 307 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL); 308 TAILQ_INIT(&sc->sc_upd_req_list); 309 TAILQ_INIT(&sc->sc_deferrals); 310 sc->sc_deferred = 0; 311 312 TAILQ_INIT(&sc->sc_tdb_q); 313 314 sc->sc_len = PFSYNC_MINPKT; 315 sc->sc_maxupdates = 128; 316 317 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 318 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 319 M_WAITOK | M_ZERO); 320 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 321 322 ifp = &sc->sc_if; 323 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 324 ifp->if_softc = sc; 325 ifp->if_ioctl = pfsyncioctl; 326 ifp->if_output = pfsyncoutput; 327 ifp->if_start = pfsyncstart; 328 ifp->if_type = IFT_PFSYNC; 329 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 330 ifp->if_hdrlen = sizeof(struct pfsync_header); 331 ifp->if_mtu = ETHERMTU; 332 timeout_set(&sc->sc_tmo, pfsync_timeout, sc); 333 timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); 334 timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); 335 336 if_attach(ifp); 337 if_alloc_sadl(ifp); 338 339 #if NCARP > 0 340 if_addgroup(ifp, "carp"); 341 #endif 342 343 #if NBPFILTER > 0 344 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 345 #endif 346 347 pfsyncif = sc; 348 349 return (0); 350 } 351 352 int 353 pfsync_clone_destroy(struct ifnet *ifp) 354 { 355 struct pfsync_softc *sc = ifp->if_softc; 356 struct pfsync_deferral *pd; 357 int s; 358 359 s = splsoftnet(); 360 timeout_del(&sc->sc_bulkfail_tmo); 361 timeout_del(&sc->sc_bulk_tmo); 362 timeout_del(&sc->sc_tmo); 363 #if NCARP > 0 364 if (!pfsync_sync_ok) 365 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 366 if (sc->sc_link_demoted) 367 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 368 #endif 369 if (sc->sc_lhcookie != NULL) 370 hook_disestablish( 371 sc->sc_sync_if->if_linkstatehooks, 372 sc->sc_lhcookie); 373 if_detach(ifp); 374 375 pfsync_drop(sc); 376 377 while (sc->sc_deferred > 0) { 378 pd = TAILQ_FIRST(&sc->sc_deferrals); 379 timeout_del(&pd->pd_tmo); 380 pfsync_undefer(pd, 0); 381 } 382 383 pool_destroy(&sc->sc_pool); 384 free(sc->sc_imo.imo_membership, M_IPMOPTS); 385 free(sc, M_DEVBUF); 386 387 pfsyncif = NULL; 388 splx(s); 389 390 return (0); 391 } 392 393 struct mbuf * 394 pfsync_if_dequeue(struct ifnet *ifp) 395 { 396 struct mbuf *m; 397 398 IF_DEQUEUE(&ifp->if_snd, m); 399 400 return (m); 401 } 402 403 /* 404 * Start output on the pfsync interface. 405 */ 406 void 407 pfsyncstart(struct ifnet *ifp) 408 { 409 struct mbuf *m; 410 int s; 411 412 s = splnet(); 413 while ((m = pfsync_if_dequeue(ifp)) != NULL) { 414 IF_DROP(&ifp->if_snd); 415 m_freem(m); 416 } 417 splx(s); 418 } 419 420 void 421 pfsync_syncdev_state(void *arg) 422 { 423 struct pfsync_softc *sc = arg; 424 425 if (!sc->sc_sync_if && !(sc->sc_if.if_flags & IFF_UP)) 426 return; 427 428 if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) { 429 sc->sc_if.if_flags &= ~IFF_RUNNING; 430 if (!sc->sc_link_demoted) { 431 #if NCARP > 0 432 carp_group_demote_adj(&sc->sc_if, 1, 433 "pfsync link state down"); 434 #endif 435 sc->sc_link_demoted = 1; 436 } 437 438 /* drop everything */ 439 timeout_del(&sc->sc_tmo); 440 pfsync_drop(sc); 441 442 pfsync_cancel_full_update(sc); 443 } else if (sc->sc_link_demoted) { 444 sc->sc_if.if_flags |= IFF_RUNNING; 445 446 pfsync_request_full_update(sc); 447 } 448 } 449 450 int 451 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 452 struct pf_state_peer *d) 453 { 454 if (s->scrub.scrub_flag && d->scrub == NULL) { 455 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 456 if (d->scrub == NULL) 457 return (ENOMEM); 458 } 459 460 return (0); 461 } 462 463 void 464 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 465 { 466 pf_state_export(sp, st); 467 } 468 469 int 470 pfsync_state_import(struct pfsync_state *sp, int flags) 471 { 472 struct pf_state *st = NULL; 473 struct pf_state_key *skw = NULL, *sks = NULL; 474 struct pf_rule *r = NULL; 475 struct pfi_kif *kif; 476 int pool_flags; 477 int error; 478 479 if (sp->creatorid == 0) { 480 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 481 "invalid creator id: %08x", ntohl(sp->creatorid)); 482 return (EINVAL); 483 } 484 485 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 486 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 487 "unknown interface: %s", sp->ifname); 488 if (flags & PFSYNC_SI_IOCTL) 489 return (EINVAL); 490 return (0); /* skip this state */ 491 } 492 493 if (sp->af == 0) 494 return (0); /* skip this state */ 495 496 /* 497 * If the ruleset checksums match or the state is coming from the ioctl, 498 * it's safe to associate the state with the rule of that number. 499 */ 500 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 501 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 502 pf_main_ruleset.rules.active.rcount) 503 r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)]; 504 else 505 r = &pf_default_rule; 506 507 if ((r->max_states && r->states_cur >= r->max_states)) 508 goto cleanup; 509 510 if (flags & PFSYNC_SI_IOCTL) 511 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 512 else 513 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 514 515 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 516 goto cleanup; 517 518 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 519 goto cleanup; 520 521 if ((sp->key[PF_SK_WIRE].af && 522 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 523 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 524 &sp->key[PF_SK_STACK].addr[0], sp->af) || 525 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 526 &sp->key[PF_SK_STACK].addr[1], sp->af) || 527 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 528 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 529 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 530 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 531 goto cleanup; 532 } else 533 sks = skw; 534 535 /* allocate memory for scrub info */ 536 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 537 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 538 goto cleanup; 539 540 /* copy to state key(s) */ 541 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 542 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 543 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 544 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 545 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 546 skw->proto = sp->proto; 547 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 548 skw->af = sp->af; 549 if (sks != skw) { 550 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 551 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 552 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 553 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 554 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 555 if (!(sks->af = sp->key[PF_SK_STACK].af)) 556 sks->af = sp->af; 557 if (sks->af != skw->af) { 558 switch (sp->proto) { 559 case IPPROTO_ICMP: 560 sks->proto = IPPROTO_ICMPV6; 561 break; 562 case IPPROTO_ICMPV6: 563 sks->proto = IPPROTO_ICMP; 564 break; 565 default: 566 sks->proto = sp->proto; 567 } 568 } else 569 sks->proto = sp->proto; 570 } 571 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 572 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 573 574 /* copy to state */ 575 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 576 st->creation = time_uptime - ntohl(sp->creation); 577 st->expire = time_second; 578 if (sp->expire) { 579 u_int32_t timeout; 580 581 timeout = r->timeout[sp->timeout]; 582 if (!timeout) 583 timeout = pf_default_rule.timeout[sp->timeout]; 584 585 /* sp->expire may have been adaptively scaled by export. */ 586 st->expire -= timeout - ntohl(sp->expire); 587 } 588 589 st->direction = sp->direction; 590 st->log = sp->log; 591 st->timeout = sp->timeout; 592 /* XXX replace state_flags post 5.0 */ 593 st->state_flags = sp->state_flags | ntohs(sp->all_state_flags); 594 st->max_mss = ntohs(sp->max_mss); 595 st->min_ttl = sp->min_ttl; 596 st->set_tos = sp->set_tos; 597 598 st->id = sp->id; 599 st->creatorid = sp->creatorid; 600 pf_state_peer_ntoh(&sp->src, &st->src); 601 pf_state_peer_ntoh(&sp->dst, &st->dst); 602 603 st->rule.ptr = r; 604 st->anchor.ptr = NULL; 605 st->rt_kif = NULL; 606 607 st->pfsync_time = time_uptime; 608 st->sync_state = PFSYNC_S_NONE; 609 610 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 611 r->states_cur++; 612 r->states_tot++; 613 614 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 615 SET(st->state_flags, PFSTATE_NOSYNC); 616 617 if (pf_state_insert(kif, skw, sks, st) != 0) { 618 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 619 r->states_cur--; 620 error = EEXIST; 621 goto cleanup_state; 622 } 623 624 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 625 CLR(st->state_flags, PFSTATE_NOSYNC); 626 if (ISSET(st->state_flags, PFSTATE_ACK)) { 627 pfsync_q_ins(st, PFSYNC_S_IACK); 628 schednetisr(NETISR_PFSYNC); 629 } 630 } 631 CLR(st->state_flags, PFSTATE_ACK); 632 633 return (0); 634 635 cleanup: 636 error = ENOMEM; 637 if (skw == sks) 638 sks = NULL; 639 if (skw != NULL) 640 pool_put(&pf_state_key_pl, skw); 641 if (sks != NULL) 642 pool_put(&pf_state_key_pl, sks); 643 644 cleanup_state: /* pf_state_insert frees the state keys */ 645 if (st) { 646 if (st->dst.scrub) 647 pool_put(&pf_state_scrub_pl, st->dst.scrub); 648 if (st->src.scrub) 649 pool_put(&pf_state_scrub_pl, st->src.scrub); 650 pool_put(&pf_state_pl, st); 651 } 652 return (error); 653 } 654 655 void 656 pfsync_input(struct mbuf *m, ...) 657 { 658 struct pfsync_softc *sc = pfsyncif; 659 struct ip *ip = mtod(m, struct ip *); 660 struct mbuf *mp; 661 struct pfsync_header *ph; 662 struct pfsync_subheader subh; 663 664 int offset, offp, len, count, mlen, flags = 0; 665 666 pfsyncstats.pfsyncs_ipackets++; 667 668 /* verify that we have a sync interface configured */ 669 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 670 sc->sc_sync_if == NULL || !pf_status.running) 671 goto done; 672 673 /* verify that the packet came in on the right interface */ 674 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 675 pfsyncstats.pfsyncs_badif++; 676 goto done; 677 } 678 679 sc->sc_if.if_ipackets++; 680 sc->sc_if.if_ibytes += m->m_pkthdr.len; 681 682 /* verify that the IP TTL is 255. */ 683 if (ip->ip_ttl != PFSYNC_DFLTTL) { 684 pfsyncstats.pfsyncs_badttl++; 685 goto done; 686 } 687 688 offset = ip->ip_hl << 2; 689 mp = m_pulldown(m, offset, sizeof(*ph), &offp); 690 if (mp == NULL) { 691 pfsyncstats.pfsyncs_hdrops++; 692 return; 693 } 694 ph = (struct pfsync_header *)(mp->m_data + offp); 695 696 /* verify the version */ 697 if (ph->version != PFSYNC_VERSION) { 698 pfsyncstats.pfsyncs_badver++; 699 goto done; 700 } 701 len = ntohs(ph->len) + offset; 702 if (m->m_pkthdr.len < len) { 703 pfsyncstats.pfsyncs_badlen++; 704 goto done; 705 } 706 707 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 708 flags = PFSYNC_SI_CKSUM; 709 710 offset += sizeof(*ph); 711 while (offset <= len - sizeof(subh)) { 712 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 713 offset += sizeof(subh); 714 715 mlen = subh.len << 2; 716 count = ntohs(subh.count); 717 718 if (subh.action >= PFSYNC_ACT_MAX || 719 subh.action >= nitems(pfsync_acts) || 720 mlen < pfsync_acts[subh.action].len) { 721 /* 722 * subheaders are always followed by at least one 723 * message, so if the peer is new 724 * enough to tell us how big its messages are then we 725 * know enough to skip them. 726 */ 727 if (count > 0 && mlen > 0) { 728 offset += count * mlen; 729 continue; 730 } 731 pfsyncstats.pfsyncs_badact++; 732 goto done; 733 } 734 735 mp = m_pulldown(m, offset, mlen * count, &offp); 736 if (mp == NULL) { 737 pfsyncstats.pfsyncs_badlen++; 738 return; 739 } 740 741 if (pfsync_acts[subh.action].in(mp->m_data + offp, 742 mlen, count, flags) != 0) 743 goto done; 744 745 offset += mlen * count; 746 } 747 748 done: 749 m_freem(m); 750 } 751 752 int 753 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 754 { 755 struct pfsync_clr *clr; 756 int i; 757 758 struct pf_state *st, *nexts; 759 struct pf_state_key *sk, *nextsk; 760 struct pf_state_item *si; 761 u_int32_t creatorid; 762 763 for (i = 0; i < count; i++) { 764 clr = (struct pfsync_clr *)buf + len * i; 765 creatorid = clr->creatorid; 766 767 if (clr->ifname[0] == '\0') { 768 for (st = RB_MIN(pf_state_tree_id, &tree_id); 769 st; st = nexts) { 770 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 771 if (st->creatorid == creatorid) { 772 SET(st->state_flags, PFSTATE_NOSYNC); 773 pf_unlink_state(st); 774 } 775 } 776 } else { 777 if (pfi_kif_get(clr->ifname) == NULL) 778 continue; 779 780 /* XXX correct? */ 781 for (sk = RB_MIN(pf_state_tree, &pf_statetbl); 782 sk; sk = nextsk) { 783 nextsk = RB_NEXT(pf_state_tree, 784 &pf_statetbl, sk); 785 TAILQ_FOREACH(si, &sk->states, entry) { 786 if (si->s->creatorid == creatorid) { 787 SET(si->s->state_flags, 788 PFSTATE_NOSYNC); 789 pf_unlink_state(si->s); 790 } 791 } 792 } 793 } 794 } 795 796 return (0); 797 } 798 799 int 800 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 801 { 802 struct pfsync_state *sp; 803 sa_family_t af1, af2; 804 int i; 805 806 for (i = 0; i < count; i++) { 807 sp = (struct pfsync_state *)(buf + len * i); 808 af1 = sp->key[0].af; 809 af2 = sp->key[1].af; 810 811 /* check for invalid values */ 812 if (sp->timeout >= PFTM_MAX || 813 sp->src.state > PF_TCPS_PROXY_DST || 814 sp->dst.state > PF_TCPS_PROXY_DST || 815 sp->direction > PF_OUT || 816 (((af1 || af2) && 817 ((af1 != AF_INET && af1 != AF_INET6) || 818 (af2 != AF_INET && af2 != AF_INET6))) || 819 (sp->af != AF_INET && sp->af != AF_INET6))) { 820 DPFPRINTF(LOG_NOTICE, 821 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 822 pfsyncstats.pfsyncs_badval++; 823 continue; 824 } 825 826 if (pfsync_state_import(sp, flags) == ENOMEM) { 827 /* drop out, but process the rest of the actions */ 828 break; 829 } 830 } 831 832 return (0); 833 } 834 835 int 836 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 837 { 838 struct pfsync_ins_ack *ia; 839 struct pf_state_cmp id_key; 840 struct pf_state *st; 841 int i; 842 843 for (i = 0; i < count; i++) { 844 ia = (struct pfsync_ins_ack *)(buf + len * i); 845 846 id_key.id = ia->id; 847 id_key.creatorid = ia->creatorid; 848 849 st = pf_find_state_byid(&id_key); 850 if (st == NULL) 851 continue; 852 853 if (ISSET(st->state_flags, PFSTATE_ACK)) 854 pfsync_deferred(st, 0); 855 } 856 857 return (0); 858 } 859 860 int 861 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 862 struct pfsync_state_peer *dst) 863 { 864 int sync = 0; 865 866 /* 867 * The state should never go backwards except 868 * for syn-proxy states. Neither should the 869 * sequence window slide backwards. 870 */ 871 if ((st->src.state > src->state && 872 (st->src.state < PF_TCPS_PROXY_SRC || 873 src->state >= PF_TCPS_PROXY_SRC)) || 874 875 (st->src.state == src->state && 876 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 877 sync++; 878 else 879 pf_state_peer_ntoh(src, &st->src); 880 881 if ((st->dst.state > dst->state) || 882 883 (st->dst.state >= TCPS_SYN_SENT && 884 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 885 sync++; 886 else 887 pf_state_peer_ntoh(dst, &st->dst); 888 889 return (sync); 890 } 891 892 int 893 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 894 { 895 struct pfsync_state *sp; 896 struct pf_state_cmp id_key; 897 struct pf_state *st; 898 int sync; 899 900 int i; 901 902 for (i = 0; i < count; i++) { 903 sp = (struct pfsync_state *)(buf + len * i); 904 905 /* check for invalid values */ 906 if (sp->timeout >= PFTM_MAX || 907 sp->src.state > PF_TCPS_PROXY_DST || 908 sp->dst.state > PF_TCPS_PROXY_DST) { 909 DPFPRINTF(LOG_NOTICE, 910 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 911 pfsyncstats.pfsyncs_badval++; 912 continue; 913 } 914 915 id_key.id = sp->id; 916 id_key.creatorid = sp->creatorid; 917 918 st = pf_find_state_byid(&id_key); 919 if (st == NULL) { 920 /* insert the update */ 921 if (pfsync_state_import(sp, 0)) 922 pfsyncstats.pfsyncs_badstate++; 923 continue; 924 } 925 926 if (ISSET(st->state_flags, PFSTATE_ACK)) 927 pfsync_deferred(st, 1); 928 929 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 930 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 931 else { 932 sync = 0; 933 934 /* 935 * Non-TCP protocol state machine always go 936 * forwards 937 */ 938 if (st->src.state > sp->src.state) 939 sync++; 940 else 941 pf_state_peer_ntoh(&sp->src, &st->src); 942 943 if (st->dst.state > sp->dst.state) 944 sync++; 945 else 946 pf_state_peer_ntoh(&sp->dst, &st->dst); 947 } 948 949 if (sync < 2) { 950 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 951 pf_state_peer_ntoh(&sp->dst, &st->dst); 952 st->expire = time_second; 953 st->timeout = sp->timeout; 954 } 955 st->pfsync_time = time_uptime; 956 957 if (sync) { 958 pfsyncstats.pfsyncs_stale++; 959 960 pfsync_update_state(st); 961 schednetisr(NETISR_PFSYNC); 962 } 963 } 964 965 return (0); 966 } 967 968 int 969 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 970 { 971 struct pfsync_upd_c *up; 972 struct pf_state_cmp id_key; 973 struct pf_state *st; 974 975 int sync; 976 977 int i; 978 979 for (i = 0; i < count; i++) { 980 up = (struct pfsync_upd_c *)(buf + len * i); 981 982 /* check for invalid values */ 983 if (up->timeout >= PFTM_MAX || 984 up->src.state > PF_TCPS_PROXY_DST || 985 up->dst.state > PF_TCPS_PROXY_DST) { 986 DPFPRINTF(LOG_NOTICE, 987 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 988 pfsyncstats.pfsyncs_badval++; 989 continue; 990 } 991 992 id_key.id = up->id; 993 id_key.creatorid = up->creatorid; 994 995 st = pf_find_state_byid(&id_key); 996 if (st == NULL) { 997 /* We don't have this state. Ask for it. */ 998 pfsync_request_update(id_key.creatorid, id_key.id); 999 continue; 1000 } 1001 1002 if (ISSET(st->state_flags, PFSTATE_ACK)) 1003 pfsync_deferred(st, 1); 1004 1005 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1006 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1007 else { 1008 sync = 0; 1009 /* 1010 * Non-TCP protocol state machine always go 1011 * forwards 1012 */ 1013 if (st->src.state > up->src.state) 1014 sync++; 1015 else 1016 pf_state_peer_ntoh(&up->src, &st->src); 1017 1018 if (st->dst.state > up->dst.state) 1019 sync++; 1020 else 1021 pf_state_peer_ntoh(&up->dst, &st->dst); 1022 } 1023 if (sync < 2) { 1024 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1025 pf_state_peer_ntoh(&up->dst, &st->dst); 1026 st->expire = time_second; 1027 st->timeout = up->timeout; 1028 } 1029 st->pfsync_time = time_uptime; 1030 1031 if (sync) { 1032 pfsyncstats.pfsyncs_stale++; 1033 1034 pfsync_update_state(st); 1035 schednetisr(NETISR_PFSYNC); 1036 } 1037 } 1038 1039 return (0); 1040 } 1041 1042 int 1043 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1044 { 1045 struct pfsync_upd_req *ur; 1046 int i; 1047 1048 struct pf_state_cmp id_key; 1049 struct pf_state *st; 1050 1051 for (i = 0; i < count; i++) { 1052 ur = (struct pfsync_upd_req *)(buf + len * i); 1053 1054 id_key.id = ur->id; 1055 id_key.creatorid = ur->creatorid; 1056 1057 if (id_key.id == 0 && id_key.creatorid == 0) 1058 pfsync_bulk_start(); 1059 else { 1060 st = pf_find_state_byid(&id_key); 1061 if (st == NULL) { 1062 pfsyncstats.pfsyncs_badstate++; 1063 continue; 1064 } 1065 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1066 continue; 1067 1068 pfsync_update_state_req(st); 1069 } 1070 } 1071 1072 return (0); 1073 } 1074 1075 int 1076 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1077 { 1078 struct pfsync_state *sp; 1079 struct pf_state_cmp id_key; 1080 struct pf_state *st; 1081 int i; 1082 1083 for (i = 0; i < count; i++) { 1084 sp = (struct pfsync_state *)(buf + len * i); 1085 1086 id_key.id = sp->id; 1087 id_key.creatorid = sp->creatorid; 1088 1089 st = pf_find_state_byid(&id_key); 1090 if (st == NULL) { 1091 pfsyncstats.pfsyncs_badstate++; 1092 continue; 1093 } 1094 SET(st->state_flags, PFSTATE_NOSYNC); 1095 pf_unlink_state(st); 1096 } 1097 1098 return (0); 1099 } 1100 1101 int 1102 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1103 { 1104 struct pfsync_del_c *sp; 1105 struct pf_state_cmp id_key; 1106 struct pf_state *st; 1107 int i; 1108 1109 for (i = 0; i < count; i++) { 1110 sp = (struct pfsync_del_c *)(buf + len * i); 1111 1112 id_key.id = sp->id; 1113 id_key.creatorid = sp->creatorid; 1114 1115 st = pf_find_state_byid(&id_key); 1116 if (st == NULL) { 1117 pfsyncstats.pfsyncs_badstate++; 1118 continue; 1119 } 1120 1121 SET(st->state_flags, PFSTATE_NOSYNC); 1122 pf_unlink_state(st); 1123 } 1124 1125 return (0); 1126 } 1127 1128 int 1129 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1130 { 1131 struct pfsync_softc *sc = pfsyncif; 1132 struct pfsync_bus *bus; 1133 1134 /* If we're not waiting for a bulk update, who cares. */ 1135 if (sc->sc_ureq_sent == 0) 1136 return (0); 1137 1138 bus = (struct pfsync_bus *)buf; 1139 1140 switch (bus->status) { 1141 case PFSYNC_BUS_START: 1142 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1143 pf_pool_limits[PF_LIMIT_STATES].limit / 1144 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1145 sizeof(struct pfsync_state))); 1146 DPFPRINTF(LOG_INFO, "received bulk update start"); 1147 break; 1148 1149 case PFSYNC_BUS_END: 1150 if (time_uptime - ntohl(bus->endtime) >= 1151 sc->sc_ureq_sent) { 1152 /* that's it, we're happy */ 1153 sc->sc_ureq_sent = 0; 1154 sc->sc_bulk_tries = 0; 1155 timeout_del(&sc->sc_bulkfail_tmo); 1156 #if NCARP > 0 1157 if (!pfsync_sync_ok) 1158 carp_group_demote_adj(&sc->sc_if, -1, 1159 sc->sc_link_demoted ? 1160 "pfsync link state up" : 1161 "pfsync bulk done"); 1162 if (sc->sc_initial_bulk) { 1163 carp_group_demote_adj(&sc->sc_if, -32, 1164 "pfsync init"); 1165 sc->sc_initial_bulk = 0; 1166 } 1167 #endif 1168 pfsync_sync_ok = 1; 1169 sc->sc_link_demoted = 0; 1170 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1171 } else { 1172 DPFPRINTF(LOG_WARNING, "received invalid " 1173 "bulk update end: bad timestamp"); 1174 } 1175 break; 1176 } 1177 1178 return (0); 1179 } 1180 1181 int 1182 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1183 { 1184 #if defined(IPSEC) 1185 struct pfsync_tdb *tp; 1186 int i; 1187 1188 for (i = 0; i < count; i++) { 1189 tp = (struct pfsync_tdb *)(buf + len * i); 1190 pfsync_update_net_tdb(tp); 1191 } 1192 #endif 1193 1194 return (0); 1195 } 1196 1197 #if defined(IPSEC) 1198 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1199 void 1200 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1201 { 1202 struct tdb *tdb; 1203 int s; 1204 1205 /* check for invalid values */ 1206 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1207 (pt->dst.sa.sa_family != AF_INET && 1208 pt->dst.sa.sa_family != AF_INET6)) 1209 goto bad; 1210 1211 s = spltdb(); 1212 tdb = gettdb(ntohs(pt->rdomain), pt->spi, &pt->dst, pt->sproto); 1213 if (tdb) { 1214 pt->rpl = ntohl(pt->rpl); 1215 pt->cur_bytes = betoh64(pt->cur_bytes); 1216 1217 /* Neither replay nor byte counter should ever decrease. */ 1218 if (pt->rpl < tdb->tdb_rpl || 1219 pt->cur_bytes < tdb->tdb_cur_bytes) { 1220 splx(s); 1221 goto bad; 1222 } 1223 1224 tdb->tdb_rpl = pt->rpl; 1225 tdb->tdb_cur_bytes = pt->cur_bytes; 1226 } 1227 splx(s); 1228 return; 1229 1230 bad: 1231 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1232 "invalid value"); 1233 pfsyncstats.pfsyncs_badstate++; 1234 return; 1235 } 1236 #endif 1237 1238 1239 int 1240 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1241 { 1242 if (len > 0 || count > 0) 1243 pfsyncstats.pfsyncs_badact++; 1244 1245 /* we're done. let the caller return */ 1246 return (1); 1247 } 1248 1249 int 1250 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1251 { 1252 pfsyncstats.pfsyncs_badact++; 1253 return (-1); 1254 } 1255 1256 int 1257 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1258 struct rtentry *rt) 1259 { 1260 m_freem(m); 1261 return (0); 1262 } 1263 1264 /* ARGSUSED */ 1265 int 1266 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1267 { 1268 struct proc *p = curproc; 1269 struct pfsync_softc *sc = ifp->if_softc; 1270 struct ifreq *ifr = (struct ifreq *)data; 1271 struct ip_moptions *imo = &sc->sc_imo; 1272 struct pfsyncreq pfsyncr; 1273 struct ifnet *sifp; 1274 struct ip *ip; 1275 int s, error; 1276 1277 switch (cmd) { 1278 #if 0 1279 case SIOCSIFADDR: 1280 case SIOCAIFADDR: 1281 case SIOCSIFDSTADDR: 1282 #endif 1283 case SIOCSIFFLAGS: 1284 s = splnet(); 1285 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1286 (ifp->if_flags & IFF_UP)) { 1287 ifp->if_flags |= IFF_RUNNING; 1288 1289 #if NCARP > 0 1290 sc->sc_initial_bulk = 1; 1291 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1292 #endif 1293 1294 pfsync_request_full_update(sc); 1295 } 1296 if ((ifp->if_flags & IFF_RUNNING) && 1297 (ifp->if_flags & IFF_UP) == 0) { 1298 ifp->if_flags &= ~IFF_RUNNING; 1299 1300 #if NCARP > 0 1301 if (sc->sc_initial_bulk) { 1302 carp_group_demote_adj(&sc->sc_if, -32, 1303 "pfsync init"); 1304 sc->sc_initial_bulk = 0; 1305 } 1306 #endif 1307 1308 /* drop everything */ 1309 timeout_del(&sc->sc_tmo); 1310 pfsync_drop(sc); 1311 1312 pfsync_cancel_full_update(sc); 1313 } 1314 splx(s); 1315 break; 1316 case SIOCSIFMTU: 1317 if (!sc->sc_sync_if || 1318 ifr->ifr_mtu <= PFSYNC_MINPKT || 1319 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1320 return (EINVAL); 1321 s = splnet(); 1322 if (ifr->ifr_mtu < ifp->if_mtu) 1323 pfsync_sendout(); 1324 ifp->if_mtu = ifr->ifr_mtu; 1325 splx(s); 1326 break; 1327 case SIOCGETPFSYNC: 1328 bzero(&pfsyncr, sizeof(pfsyncr)); 1329 if (sc->sc_sync_if) { 1330 strlcpy(pfsyncr.pfsyncr_syncdev, 1331 sc->sc_sync_if->if_xname, IFNAMSIZ); 1332 } 1333 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1334 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1335 pfsyncr.pfsyncr_defer = sc->sc_defer; 1336 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1337 1338 case SIOCSETPFSYNC: 1339 if ((error = suser(p, 0)) != 0) 1340 return (error); 1341 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1342 return (error); 1343 1344 s = splnet(); 1345 1346 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1347 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1348 else 1349 sc->sc_sync_peer.s_addr = 1350 pfsyncr.pfsyncr_syncpeer.s_addr; 1351 1352 if (pfsyncr.pfsyncr_maxupdates > 255) { 1353 splx(s); 1354 return (EINVAL); 1355 } 1356 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1357 1358 sc->sc_defer = pfsyncr.pfsyncr_defer; 1359 1360 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1361 if (sc->sc_lhcookie != NULL) 1362 hook_disestablish( 1363 sc->sc_sync_if->if_linkstatehooks, 1364 sc->sc_lhcookie); 1365 sc->sc_sync_if = NULL; 1366 if (imo->imo_num_memberships > 0) { 1367 in_delmulti(imo->imo_membership[ 1368 --imo->imo_num_memberships]); 1369 imo->imo_multicast_ifp = NULL; 1370 } 1371 splx(s); 1372 break; 1373 } 1374 1375 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1376 splx(s); 1377 return (EINVAL); 1378 } 1379 1380 if (sifp->if_mtu < sc->sc_if.if_mtu || 1381 (sc->sc_sync_if != NULL && 1382 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1383 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1384 pfsync_sendout(); 1385 sc->sc_sync_if = sifp; 1386 1387 if (imo->imo_num_memberships > 0) { 1388 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1389 imo->imo_multicast_ifp = NULL; 1390 } 1391 1392 if (sc->sc_sync_if && 1393 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1394 struct in_addr addr; 1395 1396 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1397 if (sc->sc_lhcookie != NULL) 1398 hook_disestablish( 1399 sc->sc_sync_if->if_linkstatehooks, 1400 sc->sc_lhcookie); 1401 sc->sc_sync_if = NULL; 1402 splx(s); 1403 return (EADDRNOTAVAIL); 1404 } 1405 1406 addr.s_addr = INADDR_PFSYNC_GROUP; 1407 1408 if ((imo->imo_membership[0] = 1409 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1410 if (sc->sc_lhcookie != NULL) 1411 hook_disestablish( 1412 sc->sc_sync_if->if_linkstatehooks, 1413 sc->sc_lhcookie); 1414 sc->sc_sync_if = NULL; 1415 splx(s); 1416 return (ENOBUFS); 1417 } 1418 imo->imo_num_memberships++; 1419 imo->imo_multicast_ifp = sc->sc_sync_if; 1420 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1421 imo->imo_multicast_loop = 0; 1422 } 1423 1424 ip = &sc->sc_template; 1425 bzero(ip, sizeof(*ip)); 1426 ip->ip_v = IPVERSION; 1427 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1428 ip->ip_tos = IPTOS_LOWDELAY; 1429 /* len and id are set later */ 1430 ip->ip_off = htons(IP_DF); 1431 ip->ip_ttl = PFSYNC_DFLTTL; 1432 ip->ip_p = IPPROTO_PFSYNC; 1433 ip->ip_src.s_addr = INADDR_ANY; 1434 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1435 1436 sc->sc_lhcookie = 1437 hook_establish(sc->sc_sync_if->if_linkstatehooks, 1, 1438 pfsync_syncdev_state, sc); 1439 1440 pfsync_request_full_update(sc); 1441 splx(s); 1442 1443 break; 1444 1445 default: 1446 return (ENOTTY); 1447 } 1448 1449 return (0); 1450 } 1451 1452 void 1453 pfsync_out_state(struct pf_state *st, void *buf) 1454 { 1455 struct pfsync_state *sp = buf; 1456 1457 pfsync_state_export(sp, st); 1458 } 1459 1460 void 1461 pfsync_out_iack(struct pf_state *st, void *buf) 1462 { 1463 struct pfsync_ins_ack *iack = buf; 1464 1465 iack->id = st->id; 1466 iack->creatorid = st->creatorid; 1467 } 1468 1469 void 1470 pfsync_out_upd_c(struct pf_state *st, void *buf) 1471 { 1472 struct pfsync_upd_c *up = buf; 1473 1474 bzero(up, sizeof(*up)); 1475 up->id = st->id; 1476 pf_state_peer_hton(&st->src, &up->src); 1477 pf_state_peer_hton(&st->dst, &up->dst); 1478 up->creatorid = st->creatorid; 1479 up->timeout = st->timeout; 1480 } 1481 1482 void 1483 pfsync_out_del(struct pf_state *st, void *buf) 1484 { 1485 struct pfsync_del_c *dp = buf; 1486 1487 dp->id = st->id; 1488 dp->creatorid = st->creatorid; 1489 1490 SET(st->state_flags, PFSTATE_NOSYNC); 1491 } 1492 1493 void 1494 pfsync_drop(struct pfsync_softc *sc) 1495 { 1496 struct pf_state *st; 1497 struct pfsync_upd_req_item *ur; 1498 struct tdb *t; 1499 int q; 1500 1501 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1502 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1503 continue; 1504 1505 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1506 #ifdef PFSYNC_DEBUG 1507 KASSERT(st->sync_state == q); 1508 #endif 1509 st->sync_state = PFSYNC_S_NONE; 1510 } 1511 TAILQ_INIT(&sc->sc_qs[q]); 1512 } 1513 1514 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1515 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1516 pool_put(&sc->sc_pool, ur); 1517 } 1518 1519 sc->sc_plus = NULL; 1520 1521 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1522 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) 1523 CLR(t->tdb_flags, TDBF_PFSYNC); 1524 1525 TAILQ_INIT(&sc->sc_tdb_q); 1526 } 1527 1528 sc->sc_len = PFSYNC_MINPKT; 1529 } 1530 1531 void 1532 pfsync_sendout(void) 1533 { 1534 struct pfsync_softc *sc = pfsyncif; 1535 #if NBPFILTER > 0 1536 struct ifnet *ifp = &sc->sc_if; 1537 #endif 1538 struct mbuf *m; 1539 struct ip *ip; 1540 struct pfsync_header *ph; 1541 struct pfsync_subheader *subh; 1542 struct pf_state *st; 1543 struct pfsync_upd_req_item *ur; 1544 struct tdb *t; 1545 1546 int offset; 1547 int q, count = 0; 1548 1549 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1550 return; 1551 1552 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1553 #if NBPFILTER > 0 1554 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1555 #else 1556 sc->sc_sync_if == NULL) { 1557 #endif 1558 pfsync_drop(sc); 1559 return; 1560 } 1561 1562 MGETHDR(m, M_DONTWAIT, MT_DATA); 1563 if (m == NULL) { 1564 sc->sc_if.if_oerrors++; 1565 pfsyncstats.pfsyncs_onomem++; 1566 pfsync_drop(sc); 1567 return; 1568 } 1569 1570 if (max_linkhdr + sc->sc_len > MHLEN) { 1571 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1572 if (!ISSET(m->m_flags, M_EXT)) { 1573 m_free(m); 1574 sc->sc_if.if_oerrors++; 1575 pfsyncstats.pfsyncs_onomem++; 1576 pfsync_drop(sc); 1577 return; 1578 } 1579 } 1580 m->m_data += max_linkhdr; 1581 m->m_len = m->m_pkthdr.len = sc->sc_len; 1582 1583 /* build the ip header */ 1584 ip = mtod(m, struct ip *); 1585 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1586 offset = sizeof(*ip); 1587 1588 ip->ip_len = htons(m->m_pkthdr.len); 1589 ip->ip_id = htons(ip_randomid()); 1590 1591 /* build the pfsync header */ 1592 ph = (struct pfsync_header *)(m->m_data + offset); 1593 bzero(ph, sizeof(*ph)); 1594 offset += sizeof(*ph); 1595 1596 ph->version = PFSYNC_VERSION; 1597 ph->len = htons(sc->sc_len - sizeof(*ip)); 1598 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1599 1600 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1601 subh = (struct pfsync_subheader *)(m->m_data + offset); 1602 offset += sizeof(*subh); 1603 1604 count = 0; 1605 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1606 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1607 1608 bcopy(&ur->ur_msg, m->m_data + offset, 1609 sizeof(ur->ur_msg)); 1610 offset += sizeof(ur->ur_msg); 1611 1612 pool_put(&sc->sc_pool, ur); 1613 1614 count++; 1615 } 1616 1617 bzero(subh, sizeof(*subh)); 1618 subh->len = sizeof(ur->ur_msg) >> 2; 1619 subh->action = PFSYNC_ACT_UPD_REQ; 1620 subh->count = htons(count); 1621 } 1622 1623 /* has someone built a custom region for us to add? */ 1624 if (sc->sc_plus != NULL) { 1625 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1626 offset += sc->sc_pluslen; 1627 1628 sc->sc_plus = NULL; 1629 } 1630 1631 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1632 subh = (struct pfsync_subheader *)(m->m_data + offset); 1633 offset += sizeof(*subh); 1634 1635 count = 0; 1636 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { 1637 pfsync_out_tdb(t, m->m_data + offset); 1638 offset += sizeof(struct pfsync_tdb); 1639 CLR(t->tdb_flags, TDBF_PFSYNC); 1640 1641 count++; 1642 } 1643 TAILQ_INIT(&sc->sc_tdb_q); 1644 1645 bzero(subh, sizeof(*subh)); 1646 subh->action = PFSYNC_ACT_TDB; 1647 subh->len = sizeof(struct pfsync_tdb) >> 2; 1648 subh->count = htons(count); 1649 } 1650 1651 /* walk the queues */ 1652 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1653 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1654 continue; 1655 1656 subh = (struct pfsync_subheader *)(m->m_data + offset); 1657 offset += sizeof(*subh); 1658 1659 count = 0; 1660 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1661 #ifdef PFSYNC_DEBUG 1662 KASSERT(st->sync_state == q); 1663 #endif 1664 pfsync_qs[q].write(st, m->m_data + offset); 1665 offset += pfsync_qs[q].len; 1666 1667 st->sync_state = PFSYNC_S_NONE; 1668 count++; 1669 } 1670 TAILQ_INIT(&sc->sc_qs[q]); 1671 1672 bzero(subh, sizeof(*subh)); 1673 subh->action = pfsync_qs[q].action; 1674 subh->len = pfsync_qs[q].len >> 2; 1675 subh->count = htons(count); 1676 } 1677 1678 /* we're done, let's put it on the wire */ 1679 #if NBPFILTER > 0 1680 if (ifp->if_bpf) { 1681 m->m_data += sizeof(*ip); 1682 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1683 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1684 m->m_data -= sizeof(*ip); 1685 m->m_len = m->m_pkthdr.len = sc->sc_len; 1686 } 1687 1688 if (sc->sc_sync_if == NULL) { 1689 sc->sc_len = PFSYNC_MINPKT; 1690 m_freem(m); 1691 return; 1692 } 1693 #endif 1694 1695 sc->sc_if.if_opackets++; 1696 sc->sc_if.if_obytes += m->m_pkthdr.len; 1697 1698 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) 1699 pfsyncstats.pfsyncs_opackets++; 1700 else 1701 pfsyncstats.pfsyncs_oerrors++; 1702 1703 /* start again */ 1704 sc->sc_len = PFSYNC_MINPKT; 1705 } 1706 1707 void 1708 pfsync_insert_state(struct pf_state *st) 1709 { 1710 struct pfsync_softc *sc = pfsyncif; 1711 1712 splsoftassert(IPL_SOFTNET); 1713 1714 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1715 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1716 SET(st->state_flags, PFSTATE_NOSYNC); 1717 return; 1718 } 1719 1720 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1721 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1722 return; 1723 1724 #ifdef PFSYNC_DEBUG 1725 KASSERT(st->sync_state == PFSYNC_S_NONE); 1726 #endif 1727 1728 if (sc->sc_len == PFSYNC_MINPKT) 1729 timeout_add_sec(&sc->sc_tmo, 1); 1730 1731 pfsync_q_ins(st, PFSYNC_S_INS); 1732 1733 st->sync_updates = 0; 1734 } 1735 1736 int 1737 pfsync_defer(struct pf_state *st, struct mbuf *m) 1738 { 1739 struct pfsync_softc *sc = pfsyncif; 1740 struct pfsync_deferral *pd; 1741 1742 splsoftassert(IPL_SOFTNET); 1743 1744 if (!sc->sc_defer || 1745 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1746 m->m_flags & (M_BCAST|M_MCAST)) 1747 return (0); 1748 1749 if (sc->sc_deferred >= 128) { 1750 pd = TAILQ_FIRST(&sc->sc_deferrals); 1751 if (timeout_del(&pd->pd_tmo)) 1752 pfsync_undefer(pd, 0); 1753 } 1754 1755 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1756 if (pd == NULL) 1757 return (0); 1758 1759 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1760 SET(st->state_flags, PFSTATE_ACK); 1761 1762 pd->pd_st = st; 1763 pd->pd_m = m; 1764 1765 sc->sc_deferred++; 1766 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1767 1768 timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); 1769 timeout_add_msec(&pd->pd_tmo, 20); 1770 1771 schednetisr(NETISR_PFSYNC); 1772 1773 return (1); 1774 } 1775 1776 void 1777 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1778 { 1779 struct pfsync_softc *sc = pfsyncif; 1780 1781 splsoftassert(IPL_SOFTNET); 1782 1783 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1784 sc->sc_deferred--; 1785 1786 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1787 if (drop) 1788 m_freem(pd->pd_m); 1789 else { 1790 if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) { 1791 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1792 #ifdef INET 1793 case AF_INET: 1794 pf_route(&pd->pd_m, pd->pd_st->rule.ptr, 1795 pd->pd_st->direction, 1796 pd->pd_st->rt_kif->pfik_ifp, pd->pd_st); 1797 break; 1798 #endif /* INET */ 1799 #ifdef INET6 1800 case AF_INET6: 1801 pf_route6(&pd->pd_m, pd->pd_st->rule.ptr, 1802 pd->pd_st->direction, 1803 pd->pd_st->rt_kif->pfik_ifp, pd->pd_st); 1804 break; 1805 #endif /* INET6 */ 1806 } 1807 } else { 1808 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1809 #ifdef INET 1810 case AF_INET: 1811 ip_output(pd->pd_m, NULL, NULL, 0, 1812 NULL, NULL); 1813 break; 1814 #endif /* INET */ 1815 #ifdef INET6 1816 case AF_INET6: 1817 ip6_output(pd->pd_m, NULL, NULL, 0, 1818 NULL, NULL, NULL); 1819 break; 1820 #endif /* INET6 */ 1821 } 1822 } 1823 } 1824 1825 pool_put(&sc->sc_pool, pd); 1826 } 1827 1828 void 1829 pfsync_defer_tmo(void *arg) 1830 { 1831 int s; 1832 1833 s = splsoftnet(); 1834 pfsync_undefer(arg, 0); 1835 splx(s); 1836 } 1837 1838 void 1839 pfsync_deferred(struct pf_state *st, int drop) 1840 { 1841 struct pfsync_softc *sc = pfsyncif; 1842 struct pfsync_deferral *pd; 1843 1844 splsoftassert(IPL_SOFTNET); 1845 1846 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1847 if (pd->pd_st == st) { 1848 if (timeout_del(&pd->pd_tmo)) 1849 pfsync_undefer(pd, drop); 1850 return; 1851 } 1852 } 1853 1854 panic("pfsync_deferred: unable to find deferred state"); 1855 } 1856 1857 void 1858 pfsync_update_state(struct pf_state *st) 1859 { 1860 struct pfsync_softc *sc = pfsyncif; 1861 int sync = 0; 1862 1863 splsoftassert(IPL_SOFTNET); 1864 1865 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1866 return; 1867 1868 if (ISSET(st->state_flags, PFSTATE_ACK)) 1869 pfsync_deferred(st, 0); 1870 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1871 if (st->sync_state != PFSYNC_S_NONE) 1872 pfsync_q_del(st); 1873 return; 1874 } 1875 1876 if (sc->sc_len == PFSYNC_MINPKT) 1877 timeout_add_sec(&sc->sc_tmo, 1); 1878 1879 switch (st->sync_state) { 1880 case PFSYNC_S_UPD_C: 1881 case PFSYNC_S_UPD: 1882 case PFSYNC_S_INS: 1883 /* we're already handling it */ 1884 1885 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1886 st->sync_updates++; 1887 if (st->sync_updates >= sc->sc_maxupdates) 1888 sync = 1; 1889 } 1890 break; 1891 1892 case PFSYNC_S_IACK: 1893 pfsync_q_del(st); 1894 case PFSYNC_S_NONE: 1895 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1896 st->sync_updates = 0; 1897 break; 1898 1899 default: 1900 panic("pfsync_update_state: unexpected sync state %d", 1901 st->sync_state); 1902 } 1903 1904 if (sync || (time_uptime - st->pfsync_time) < 2) 1905 schednetisr(NETISR_PFSYNC); 1906 } 1907 1908 void 1909 pfsync_cancel_full_update(struct pfsync_softc *sc) 1910 { 1911 if (timeout_pending(&sc->sc_bulkfail_tmo) || 1912 timeout_pending(&sc->sc_bulk_tmo)) 1913 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 1914 timeout_del(&sc->sc_bulkfail_tmo); 1915 timeout_del(&sc->sc_bulk_tmo); 1916 sc->sc_bulk_next = NULL; 1917 sc->sc_bulk_last = NULL; 1918 sc->sc_ureq_sent = 0; 1919 sc->sc_bulk_tries = 0; 1920 } 1921 1922 void 1923 pfsync_request_full_update(struct pfsync_softc *sc) 1924 { 1925 if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 1926 /* Request a full state table update. */ 1927 sc->sc_ureq_sent = time_uptime; 1928 #if NCARP > 0 1929 if (!sc->sc_link_demoted && pfsync_sync_ok) 1930 carp_group_demote_adj(&sc->sc_if, 1, 1931 "pfsync bulk start"); 1932 #endif 1933 pfsync_sync_ok = 0; 1934 DPFPRINTF(LOG_INFO, "requesting bulk update"); 1935 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1936 pf_pool_limits[PF_LIMIT_STATES].limit / 1937 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1938 sizeof(struct pfsync_state))); 1939 pfsync_request_update(0, 0); 1940 } 1941 } 1942 1943 void 1944 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1945 { 1946 struct pfsync_softc *sc = pfsyncif; 1947 struct pfsync_upd_req_item *item; 1948 size_t nlen = sizeof(struct pfsync_upd_req); 1949 1950 /* 1951 * this code does nothing to prevent multiple update requests for the 1952 * same state being generated. 1953 */ 1954 1955 item = pool_get(&sc->sc_pool, PR_NOWAIT); 1956 if (item == NULL) { 1957 /* XXX stats */ 1958 return; 1959 } 1960 1961 item->ur_msg.id = id; 1962 item->ur_msg.creatorid = creatorid; 1963 1964 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1965 nlen += sizeof(struct pfsync_subheader); 1966 1967 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 1968 pfsync_sendout(); 1969 1970 nlen = sizeof(struct pfsync_subheader) + 1971 sizeof(struct pfsync_upd_req); 1972 } 1973 1974 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1975 sc->sc_len += nlen; 1976 1977 schednetisr(NETISR_PFSYNC); 1978 } 1979 1980 void 1981 pfsync_update_state_req(struct pf_state *st) 1982 { 1983 struct pfsync_softc *sc = pfsyncif; 1984 1985 if (sc == NULL) 1986 panic("pfsync_update_state_req: nonexistant instance"); 1987 1988 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1989 if (st->sync_state != PFSYNC_S_NONE) 1990 pfsync_q_del(st); 1991 return; 1992 } 1993 1994 switch (st->sync_state) { 1995 case PFSYNC_S_UPD_C: 1996 case PFSYNC_S_IACK: 1997 pfsync_q_del(st); 1998 case PFSYNC_S_NONE: 1999 pfsync_q_ins(st, PFSYNC_S_UPD); 2000 schednetisr(NETISR_PFSYNC); 2001 return; 2002 2003 case PFSYNC_S_INS: 2004 case PFSYNC_S_UPD: 2005 case PFSYNC_S_DEL: 2006 /* we're already handling it */ 2007 return; 2008 2009 default: 2010 panic("pfsync_update_state_req: unexpected sync state %d", 2011 st->sync_state); 2012 } 2013 } 2014 2015 void 2016 pfsync_delete_state(struct pf_state *st) 2017 { 2018 struct pfsync_softc *sc = pfsyncif; 2019 2020 splsoftassert(IPL_SOFTNET); 2021 2022 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2023 return; 2024 2025 if (ISSET(st->state_flags, PFSTATE_ACK)) 2026 pfsync_deferred(st, 1); 2027 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2028 if (st->sync_state != PFSYNC_S_NONE) 2029 pfsync_q_del(st); 2030 return; 2031 } 2032 2033 if (sc->sc_len == PFSYNC_MINPKT) 2034 timeout_add_sec(&sc->sc_tmo, 1); 2035 2036 switch (st->sync_state) { 2037 case PFSYNC_S_INS: 2038 /* we never got to tell the world so just forget about it */ 2039 pfsync_q_del(st); 2040 return; 2041 2042 case PFSYNC_S_UPD_C: 2043 case PFSYNC_S_UPD: 2044 case PFSYNC_S_IACK: 2045 pfsync_q_del(st); 2046 /* FALLTHROUGH to putting it on the del list */ 2047 2048 case PFSYNC_S_NONE: 2049 pfsync_q_ins(st, PFSYNC_S_DEL); 2050 return; 2051 2052 default: 2053 panic("pfsync_delete_state: unexpected sync state %d", 2054 st->sync_state); 2055 } 2056 } 2057 2058 void 2059 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2060 { 2061 struct pfsync_softc *sc = pfsyncif; 2062 struct { 2063 struct pfsync_subheader subh; 2064 struct pfsync_clr clr; 2065 } __packed r; 2066 2067 splsoftassert(IPL_SOFTNET); 2068 2069 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2070 return; 2071 2072 bzero(&r, sizeof(r)); 2073 2074 r.subh.action = PFSYNC_ACT_CLR; 2075 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2076 r.subh.count = htons(1); 2077 2078 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2079 r.clr.creatorid = creatorid; 2080 2081 pfsync_send_plus(&r, sizeof(r)); 2082 } 2083 2084 void 2085 pfsync_q_ins(struct pf_state *st, int q) 2086 { 2087 struct pfsync_softc *sc = pfsyncif; 2088 size_t nlen = pfsync_qs[q].len; 2089 2090 KASSERT(st->sync_state == PFSYNC_S_NONE); 2091 2092 #if defined(PFSYNC_DEBUG) 2093 if (sc->sc_len < PFSYNC_MINPKT) 2094 panic("pfsync pkt len is too low %d", sc->sc_len); 2095 #endif 2096 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2097 nlen += sizeof(struct pfsync_subheader); 2098 2099 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2100 pfsync_sendout(); 2101 2102 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2103 } 2104 2105 sc->sc_len += nlen; 2106 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2107 st->sync_state = q; 2108 } 2109 2110 void 2111 pfsync_q_del(struct pf_state *st) 2112 { 2113 struct pfsync_softc *sc = pfsyncif; 2114 int q = st->sync_state; 2115 2116 KASSERT(st->sync_state != PFSYNC_S_NONE); 2117 2118 sc->sc_len -= pfsync_qs[q].len; 2119 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2120 st->sync_state = PFSYNC_S_NONE; 2121 2122 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2123 sc->sc_len -= sizeof(struct pfsync_subheader); 2124 } 2125 2126 void 2127 pfsync_update_tdb(struct tdb *t, int output) 2128 { 2129 struct pfsync_softc *sc = pfsyncif; 2130 size_t nlen = sizeof(struct pfsync_tdb); 2131 2132 if (sc == NULL) 2133 return; 2134 2135 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2136 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2137 nlen += sizeof(struct pfsync_subheader); 2138 2139 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2140 pfsync_sendout(); 2141 2142 nlen = sizeof(struct pfsync_subheader) + 2143 sizeof(struct pfsync_tdb); 2144 } 2145 2146 sc->sc_len += nlen; 2147 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2148 SET(t->tdb_flags, TDBF_PFSYNC); 2149 t->tdb_updates = 0; 2150 } else { 2151 if (++t->tdb_updates >= sc->sc_maxupdates) 2152 schednetisr(NETISR_PFSYNC); 2153 } 2154 2155 if (output) 2156 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2157 else 2158 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2159 } 2160 2161 void 2162 pfsync_delete_tdb(struct tdb *t) 2163 { 2164 struct pfsync_softc *sc = pfsyncif; 2165 2166 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2167 return; 2168 2169 sc->sc_len -= sizeof(struct pfsync_tdb); 2170 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2171 CLR(t->tdb_flags, TDBF_PFSYNC); 2172 2173 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2174 sc->sc_len -= sizeof(struct pfsync_subheader); 2175 } 2176 2177 void 2178 pfsync_out_tdb(struct tdb *t, void *buf) 2179 { 2180 struct pfsync_tdb *ut = buf; 2181 2182 bzero(ut, sizeof(*ut)); 2183 ut->spi = t->tdb_spi; 2184 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2185 /* 2186 * When a failover happens, the master's rpl is probably above 2187 * what we see here (we may be up to a second late), so 2188 * increase it a bit for outbound tdbs to manage most such 2189 * situations. 2190 * 2191 * For now, just add an offset that is likely to be larger 2192 * than the number of packets we can see in one second. The RFC 2193 * just says the next packet must have a higher seq value. 2194 * 2195 * XXX What is a good algorithm for this? We could use 2196 * a rate-determined increase, but to know it, we would have 2197 * to extend struct tdb. 2198 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2199 * will soon be replaced anyway. For now, just don't handle 2200 * this edge case. 2201 */ 2202 #define RPL_INCR 16384 2203 ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2204 RPL_INCR : 0)); 2205 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2206 ut->sproto = t->tdb_sproto; 2207 ut->rdomain = htons(t->tdb_rdomain); 2208 } 2209 2210 void 2211 pfsync_bulk_start(void) 2212 { 2213 struct pfsync_softc *sc = pfsyncif; 2214 2215 DPFPRINTF(LOG_INFO, "received bulk update request"); 2216 2217 if (TAILQ_EMPTY(&state_list)) 2218 pfsync_bulk_status(PFSYNC_BUS_END); 2219 else { 2220 sc->sc_ureq_received = time_uptime; 2221 2222 if (sc->sc_bulk_next == NULL) 2223 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2224 sc->sc_bulk_last = sc->sc_bulk_next; 2225 2226 pfsync_bulk_status(PFSYNC_BUS_START); 2227 timeout_add(&sc->sc_bulk_tmo, 0); 2228 } 2229 } 2230 2231 void 2232 pfsync_bulk_update(void *arg) 2233 { 2234 struct pfsync_softc *sc = arg; 2235 struct pf_state *st; 2236 int i = 0; 2237 int s; 2238 2239 s = splsoftnet(); 2240 2241 st = sc->sc_bulk_next; 2242 2243 for (;;) { 2244 if (st->sync_state == PFSYNC_S_NONE && 2245 st->timeout < PFTM_MAX && 2246 st->pfsync_time <= sc->sc_ureq_received) { 2247 pfsync_update_state_req(st); 2248 i++; 2249 } 2250 2251 st = TAILQ_NEXT(st, entry_list); 2252 if (st == NULL) 2253 st = TAILQ_FIRST(&state_list); 2254 2255 if (st == sc->sc_bulk_last) { 2256 /* we're done */ 2257 sc->sc_bulk_next = NULL; 2258 sc->sc_bulk_last = NULL; 2259 pfsync_bulk_status(PFSYNC_BUS_END); 2260 break; 2261 } 2262 2263 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2264 sizeof(struct pfsync_state)) { 2265 /* we've filled a packet */ 2266 sc->sc_bulk_next = st; 2267 timeout_add(&sc->sc_bulk_tmo, 1); 2268 break; 2269 } 2270 } 2271 2272 splx(s); 2273 } 2274 2275 void 2276 pfsync_bulk_status(u_int8_t status) 2277 { 2278 struct { 2279 struct pfsync_subheader subh; 2280 struct pfsync_bus bus; 2281 } __packed r; 2282 2283 struct pfsync_softc *sc = pfsyncif; 2284 2285 bzero(&r, sizeof(r)); 2286 2287 r.subh.action = PFSYNC_ACT_BUS; 2288 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2289 r.subh.count = htons(1); 2290 2291 r.bus.creatorid = pf_status.hostid; 2292 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2293 r.bus.status = status; 2294 2295 pfsync_send_plus(&r, sizeof(r)); 2296 } 2297 2298 void 2299 pfsync_bulk_fail(void *arg) 2300 { 2301 struct pfsync_softc *sc = arg; 2302 int s; 2303 2304 s = splsoftnet(); 2305 2306 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2307 /* Try again */ 2308 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2309 pfsync_request_update(0, 0); 2310 } else { 2311 /* Pretend like the transfer was ok */ 2312 sc->sc_ureq_sent = 0; 2313 sc->sc_bulk_tries = 0; 2314 #if NCARP > 0 2315 if (!pfsync_sync_ok) 2316 carp_group_demote_adj(&sc->sc_if, -1, 2317 sc->sc_link_demoted ? 2318 "pfsync link state up" : 2319 "pfsync bulk fail"); 2320 if (sc->sc_initial_bulk) { 2321 carp_group_demote_adj(&sc->sc_if, -32, 2322 "pfsync init"); 2323 sc->sc_initial_bulk = 0; 2324 } 2325 #endif 2326 pfsync_sync_ok = 1; 2327 sc->sc_link_demoted = 0; 2328 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2329 } 2330 2331 splx(s); 2332 } 2333 2334 void 2335 pfsync_send_plus(void *plus, size_t pluslen) 2336 { 2337 struct pfsync_softc *sc = pfsyncif; 2338 2339 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2340 pfsync_sendout(); 2341 2342 sc->sc_plus = plus; 2343 sc->sc_len += (sc->sc_pluslen = pluslen); 2344 2345 pfsync_sendout(); 2346 } 2347 2348 int 2349 pfsync_up(void) 2350 { 2351 struct pfsync_softc *sc = pfsyncif; 2352 2353 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2354 return (0); 2355 2356 return (1); 2357 } 2358 2359 int 2360 pfsync_state_in_use(struct pf_state *st) 2361 { 2362 struct pfsync_softc *sc = pfsyncif; 2363 2364 if (sc == NULL) 2365 return (0); 2366 2367 if (st->sync_state != PFSYNC_S_NONE || 2368 st == sc->sc_bulk_next || 2369 st == sc->sc_bulk_last) 2370 return (1); 2371 2372 return (0); 2373 } 2374 2375 void 2376 pfsync_timeout(void *arg) 2377 { 2378 int s; 2379 2380 s = splsoftnet(); 2381 pfsync_sendout(); 2382 splx(s); 2383 } 2384 2385 /* this is a softnet/netisr handler */ 2386 void 2387 pfsyncintr(void) 2388 { 2389 pfsync_sendout(); 2390 } 2391 2392 int 2393 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2394 size_t newlen) 2395 { 2396 /* All sysctl names at this level are terminal. */ 2397 if (namelen != 1) 2398 return (ENOTDIR); 2399 2400 switch (name[0]) { 2401 case PFSYNCCTL_STATS: 2402 if (newp != NULL) 2403 return (EPERM); 2404 return (sysctl_struct(oldp, oldlenp, newp, newlen, 2405 &pfsyncstats, sizeof(pfsyncstats))); 2406 default: 2407 return (ENOPROTOOPT); 2408 } 2409 } 2410