1 /* $OpenBSD: if_pfsync.c,v 1.129 2009/09/28 03:01:23 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/systm.h> 48 #include <sys/time.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 #include <netinet/in.h> 63 #include <netinet/if_ether.h> 64 #include <netinet/tcp.h> 65 #include <netinet/tcp_seq.h> 66 67 #ifdef INET 68 #include <netinet/in_systm.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip.h> 71 #include <netinet/ip_var.h> 72 #endif 73 74 #ifdef INET6 75 #include <netinet6/nd6.h> 76 #endif /* INET6 */ 77 78 #include "carp.h" 79 #if NCARP > 0 80 #include <netinet/ip_carp.h> 81 #endif 82 83 #include <net/pfvar.h> 84 #include <net/if_pfsync.h> 85 86 #include "bpfilter.h" 87 #include "pfsync.h" 88 89 #define PFSYNC_MINPKT ( \ 90 sizeof(struct ip) + \ 91 sizeof(struct pfsync_header) + \ 92 sizeof(struct pfsync_subheader)) 93 94 struct pfsync_pkt { 95 struct ip *ip; 96 struct in_addr src; 97 u_int8_t flags; 98 }; 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); 104 int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); 105 int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); 106 int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); 107 int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); 108 int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); 109 int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); 110 int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); 111 int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); 112 int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); 113 int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); 114 115 int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); 116 117 int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { 118 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 119 pfsync_in_ins, /* PFSYNC_ACT_INS */ 120 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 121 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 122 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 123 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 124 pfsync_in_del, /* PFSYNC_ACT_DEL */ 125 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 126 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 127 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 128 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 129 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 130 pfsync_in_eof /* PFSYNC_ACT_EOF */ 131 }; 132 133 struct pfsync_q { 134 void (*write)(struct pf_state *, void *); 135 size_t len; 136 u_int8_t action; 137 }; 138 139 /* we have one of these for every PFSYNC_S_ */ 140 void pfsync_out_state(struct pf_state *, void *); 141 void pfsync_out_iack(struct pf_state *, void *); 142 void pfsync_out_upd_c(struct pf_state *, void *); 143 void pfsync_out_del(struct pf_state *, void *); 144 145 struct pfsync_q pfsync_qs[] = { 146 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 147 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 148 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 149 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 150 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 151 }; 152 153 void pfsync_q_ins(struct pf_state *, int); 154 void pfsync_q_del(struct pf_state *); 155 156 struct pfsync_upd_req_item { 157 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 158 struct pfsync_upd_req ur_msg; 159 }; 160 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 161 162 struct pfsync_deferral { 163 TAILQ_ENTRY(pfsync_deferral) pd_entry; 164 struct pf_state *pd_st; 165 struct mbuf *pd_m; 166 struct timeout pd_tmo; 167 }; 168 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 169 170 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 171 sizeof(struct pfsync_deferral)) 172 173 void pfsync_out_tdb(struct tdb *, void *); 174 175 struct pfsync_softc { 176 struct ifnet sc_if; 177 struct ifnet *sc_sync_if; 178 179 struct pool sc_pool; 180 181 struct ip_moptions sc_imo; 182 183 struct in_addr sc_sync_peer; 184 u_int8_t sc_maxupdates; 185 186 struct ip sc_template; 187 188 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 189 size_t sc_len; 190 191 struct pfsync_upd_reqs sc_upd_req_list; 192 193 int sc_defer; 194 struct pfsync_deferrals sc_deferrals; 195 u_int sc_deferred; 196 197 void *sc_plus; 198 size_t sc_pluslen; 199 200 u_int32_t sc_ureq_sent; 201 int sc_bulk_tries; 202 struct timeout sc_bulkfail_tmo; 203 204 u_int32_t sc_ureq_received; 205 struct pf_state *sc_bulk_next; 206 struct pf_state *sc_bulk_last; 207 struct timeout sc_bulk_tmo; 208 209 TAILQ_HEAD(, tdb) sc_tdb_q; 210 211 struct timeout sc_tmo; 212 }; 213 214 struct pfsync_softc *pfsyncif = NULL; 215 struct pfsyncstats pfsyncstats; 216 217 void pfsyncattach(int); 218 int pfsync_clone_create(struct if_clone *, int); 219 int pfsync_clone_destroy(struct ifnet *); 220 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 221 struct pf_state_peer *); 222 void pfsync_update_net_tdb(struct pfsync_tdb *); 223 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 224 struct rtentry *); 225 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 226 void pfsyncstart(struct ifnet *); 227 228 struct mbuf *pfsync_if_dequeue(struct ifnet *); 229 230 void pfsync_deferred(struct pf_state *, int); 231 void pfsync_undefer(struct pfsync_deferral *, int); 232 void pfsync_defer_tmo(void *); 233 234 void pfsync_request_update(u_int32_t, u_int64_t); 235 void pfsync_update_state_req(struct pf_state *); 236 237 void pfsync_drop(struct pfsync_softc *); 238 void pfsync_sendout(void); 239 void pfsync_send_plus(void *, size_t); 240 void pfsync_timeout(void *); 241 void pfsync_tdb_timeout(void *); 242 243 void pfsync_bulk_start(void); 244 void pfsync_bulk_status(u_int8_t); 245 void pfsync_bulk_update(void *); 246 void pfsync_bulk_fail(void *); 247 248 #define PFSYNC_MAX_BULKTRIES 12 249 int pfsync_sync_ok; 250 251 struct if_clone pfsync_cloner = 252 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 253 254 void 255 pfsyncattach(int npfsync) 256 { 257 if_clone_attach(&pfsync_cloner); 258 } 259 int 260 pfsync_clone_create(struct if_clone *ifc, int unit) 261 { 262 struct pfsync_softc *sc; 263 struct ifnet *ifp; 264 int q; 265 266 if (unit != 0) 267 return (EINVAL); 268 269 pfsync_sync_ok = 1; 270 271 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO); 272 if (sc == NULL) 273 return (ENOMEM); 274 275 for (q = 0; q < PFSYNC_S_COUNT; q++) 276 TAILQ_INIT(&sc->sc_qs[q]); 277 278 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL); 279 TAILQ_INIT(&sc->sc_upd_req_list); 280 TAILQ_INIT(&sc->sc_deferrals); 281 sc->sc_deferred = 0; 282 283 TAILQ_INIT(&sc->sc_tdb_q); 284 285 sc->sc_len = PFSYNC_MINPKT; 286 sc->sc_maxupdates = 128; 287 288 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 289 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 290 M_WAITOK | M_ZERO); 291 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 292 293 ifp = &sc->sc_if; 294 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 295 ifp->if_softc = sc; 296 ifp->if_ioctl = pfsyncioctl; 297 ifp->if_output = pfsyncoutput; 298 ifp->if_start = pfsyncstart; 299 ifp->if_type = IFT_PFSYNC; 300 ifp->if_snd.ifq_maxlen = ifqmaxlen; 301 ifp->if_hdrlen = sizeof(struct pfsync_header); 302 ifp->if_mtu = 1500; /* XXX */ 303 ifp->if_hardmtu = MCLBYTES; /* XXX */ 304 timeout_set(&sc->sc_tmo, pfsync_timeout, sc); 305 timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); 306 timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); 307 308 if_attach(ifp); 309 if_alloc_sadl(ifp); 310 311 #if NCARP > 0 312 if_addgroup(ifp, "carp"); 313 #endif 314 315 #if NBPFILTER > 0 316 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 317 #endif 318 319 pfsyncif = sc; 320 321 return (0); 322 } 323 324 int 325 pfsync_clone_destroy(struct ifnet *ifp) 326 { 327 struct pfsync_softc *sc = ifp->if_softc; 328 329 timeout_del(&sc->sc_bulk_tmo); 330 timeout_del(&sc->sc_tmo); 331 #if NCARP > 0 332 if (!pfsync_sync_ok) 333 carp_group_demote_adj(&sc->sc_if, -1); 334 #endif 335 #if NBPFILTER > 0 336 bpfdetach(ifp); 337 #endif 338 if_detach(ifp); 339 340 pfsync_drop(sc); 341 342 while (sc->sc_deferred > 0) 343 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 344 345 pool_destroy(&sc->sc_pool); 346 free(sc->sc_imo.imo_membership, M_IPMOPTS); 347 free(sc, M_DEVBUF); 348 349 pfsyncif = NULL; 350 351 return (0); 352 } 353 354 struct mbuf * 355 pfsync_if_dequeue(struct ifnet *ifp) 356 { 357 struct mbuf *m; 358 359 IF_DEQUEUE(&ifp->if_snd, m); 360 361 return (m); 362 } 363 364 /* 365 * Start output on the pfsync interface. 366 */ 367 void 368 pfsyncstart(struct ifnet *ifp) 369 { 370 struct mbuf *m; 371 int s; 372 373 s = splnet(); 374 while ((m = pfsync_if_dequeue(ifp)) != NULL) { 375 IF_DROP(&ifp->if_snd); 376 m_freem(m); 377 } 378 splx(s); 379 } 380 381 int 382 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 383 struct pf_state_peer *d) 384 { 385 if (s->scrub.scrub_flag && d->scrub == NULL) { 386 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 387 if (d->scrub == NULL) 388 return (ENOMEM); 389 } 390 391 return (0); 392 } 393 394 void 395 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 396 { 397 bzero(sp, sizeof(struct pfsync_state)); 398 399 /* copy from state key */ 400 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 401 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 402 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 403 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 404 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 405 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 406 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 407 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 408 sp->proto = st->key[PF_SK_WIRE]->proto; 409 sp->af = st->key[PF_SK_WIRE]->af; 410 411 /* copy from state */ 412 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 413 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 414 sp->creation = htonl(time_second - st->creation); 415 sp->expire = pf_state_expires(st); 416 if (sp->expire <= time_second) 417 sp->expire = htonl(0); 418 else 419 sp->expire = htonl(sp->expire - time_second); 420 421 sp->direction = st->direction; 422 sp->log = st->log; 423 sp->timeout = st->timeout; 424 sp->state_flags = st->state_flags; 425 if (st->src_node) 426 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 427 if (st->nat_src_node) 428 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 429 430 bcopy(&st->id, &sp->id, sizeof(sp->id)); 431 sp->creatorid = st->creatorid; 432 pf_state_peer_hton(&st->src, &sp->src); 433 pf_state_peer_hton(&st->dst, &sp->dst); 434 435 if (st->rule.ptr == NULL) 436 sp->rule = htonl(-1); 437 else 438 sp->rule = htonl(st->rule.ptr->nr); 439 if (st->anchor.ptr == NULL) 440 sp->anchor = htonl(-1); 441 else 442 sp->anchor = htonl(st->anchor.ptr->nr); 443 if (st->nat_rule.ptr == NULL) 444 sp->nat_rule = htonl(-1); 445 else 446 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 447 448 pf_state_counter_hton(st->packets[0], sp->packets[0]); 449 pf_state_counter_hton(st->packets[1], sp->packets[1]); 450 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 451 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 452 453 } 454 455 int 456 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 457 { 458 struct pf_state *st = NULL; 459 struct pf_state_key *skw = NULL, *sks = NULL; 460 struct pf_rule *r = NULL; 461 struct pfi_kif *kif; 462 int pool_flags; 463 int error; 464 465 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 466 printf("pfsync_state_import: invalid creator id:" 467 " %08x\n", ntohl(sp->creatorid)); 468 return (EINVAL); 469 } 470 471 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 472 if (pf_status.debug >= PF_DEBUG_MISC) 473 printf("pfsync_state_import: " 474 "unknown interface: %s\n", sp->ifname); 475 if (flags & PFSYNC_SI_IOCTL) 476 return (EINVAL); 477 return (0); /* skip this state */ 478 } 479 480 /* 481 * If the ruleset checksums match or the state is coming from the ioctl, 482 * it's safe to associate the state with the rule of that number. 483 */ 484 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 485 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 486 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 487 r = pf_main_ruleset.rules[ 488 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 489 else 490 r = &pf_default_rule; 491 492 if ((r->max_states && r->states_cur >= r->max_states)) 493 goto cleanup; 494 495 if (flags & PFSYNC_SI_IOCTL) 496 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 497 else 498 pool_flags = PR_LIMITFAIL | PR_ZERO; 499 500 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 501 goto cleanup; 502 503 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 504 goto cleanup; 505 506 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 507 &sp->key[PF_SK_STACK].addr[0], sp->af) || 508 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 509 &sp->key[PF_SK_STACK].addr[1], sp->af) || 510 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 511 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 512 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 513 goto cleanup; 514 } else 515 sks = skw; 516 517 /* allocate memory for scrub info */ 518 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 519 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 520 goto cleanup; 521 522 /* copy to state key(s) */ 523 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 524 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 525 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 526 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 527 skw->proto = sp->proto; 528 skw->af = sp->af; 529 if (sks != skw) { 530 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 531 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 532 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 533 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 534 sks->proto = sp->proto; 535 sks->af = sp->af; 536 } 537 538 /* copy to state */ 539 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 540 st->creation = time_second - ntohl(sp->creation); 541 st->expire = time_second; 542 if (sp->expire) { 543 /* XXX No adaptive scaling. */ 544 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 545 } 546 547 st->expire = ntohl(sp->expire) + time_second; 548 st->direction = sp->direction; 549 st->log = sp->log; 550 st->timeout = sp->timeout; 551 st->state_flags = sp->state_flags; 552 553 bcopy(sp->id, &st->id, sizeof(st->id)); 554 st->creatorid = sp->creatorid; 555 pf_state_peer_ntoh(&sp->src, &st->src); 556 pf_state_peer_ntoh(&sp->dst, &st->dst); 557 558 st->rule.ptr = r; 559 st->nat_rule.ptr = NULL; 560 st->anchor.ptr = NULL; 561 st->rt_kif = NULL; 562 563 st->pfsync_time = time_uptime; 564 st->sync_state = PFSYNC_S_NONE; 565 566 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 567 r->states_cur++; 568 r->states_tot++; 569 570 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 571 SET(st->state_flags, PFSTATE_NOSYNC); 572 573 if (pf_state_insert(kif, skw, sks, st) != 0) { 574 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 575 r->states_cur--; 576 error = EEXIST; 577 goto cleanup_state; 578 } 579 580 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 581 CLR(st->state_flags, PFSTATE_NOSYNC); 582 if (ISSET(st->state_flags, PFSTATE_ACK)) { 583 pfsync_q_ins(st, PFSYNC_S_IACK); 584 schednetisr(NETISR_PFSYNC); 585 } 586 } 587 CLR(st->state_flags, PFSTATE_ACK); 588 589 return (0); 590 591 cleanup: 592 error = ENOMEM; 593 if (skw == sks) 594 sks = NULL; 595 if (skw != NULL) 596 pool_put(&pf_state_key_pl, skw); 597 if (sks != NULL) 598 pool_put(&pf_state_key_pl, sks); 599 600 cleanup_state: /* pf_state_insert frees the state keys */ 601 if (st) { 602 if (st->dst.scrub) 603 pool_put(&pf_state_scrub_pl, st->dst.scrub); 604 if (st->src.scrub) 605 pool_put(&pf_state_scrub_pl, st->src.scrub); 606 pool_put(&pf_state_pl, st); 607 } 608 return (error); 609 } 610 611 void 612 pfsync_input(struct mbuf *m, ...) 613 { 614 struct pfsync_softc *sc = pfsyncif; 615 struct pfsync_pkt pkt; 616 struct ip *ip = mtod(m, struct ip *); 617 struct pfsync_header *ph; 618 struct pfsync_subheader subh; 619 620 int offset, len; 621 int rv; 622 623 pfsyncstats.pfsyncs_ipackets++; 624 625 /* verify that we have a sync interface configured */ 626 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 627 sc->sc_sync_if == NULL || !pf_status.running) 628 goto done; 629 630 /* verify that the packet came in on the right interface */ 631 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 632 pfsyncstats.pfsyncs_badif++; 633 goto done; 634 } 635 636 sc->sc_if.if_ipackets++; 637 sc->sc_if.if_ibytes += m->m_pkthdr.len; 638 639 /* verify that the IP TTL is 255. */ 640 if (ip->ip_ttl != PFSYNC_DFLTTL) { 641 pfsyncstats.pfsyncs_badttl++; 642 goto done; 643 } 644 645 offset = ip->ip_hl << 2; 646 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 647 pfsyncstats.pfsyncs_hdrops++; 648 goto done; 649 } 650 651 if (offset + sizeof(*ph) > m->m_len) { 652 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 653 pfsyncstats.pfsyncs_hdrops++; 654 return; 655 } 656 ip = mtod(m, struct ip *); 657 } 658 ph = (struct pfsync_header *)((char *)ip + offset); 659 660 /* verify the version */ 661 if (ph->version != PFSYNC_VERSION) { 662 pfsyncstats.pfsyncs_badver++; 663 goto done; 664 } 665 len = ntohs(ph->len) + offset; 666 if (m->m_pkthdr.len < len) { 667 pfsyncstats.pfsyncs_badlen++; 668 goto done; 669 } 670 671 /* Cheaper to grab this now than having to mess with mbufs later */ 672 pkt.ip = ip; 673 pkt.src = ip->ip_src; 674 pkt.flags = 0; 675 676 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 677 pkt.flags |= PFSYNC_SI_CKSUM; 678 679 offset += sizeof(*ph); 680 while (offset <= len - sizeof(subh)) { 681 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 682 offset += sizeof(subh); 683 684 if (subh.action >= PFSYNC_ACT_MAX) { 685 pfsyncstats.pfsyncs_badact++; 686 goto done; 687 } 688 689 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, 690 ntohs(subh.count)); 691 if (rv == -1) 692 return; 693 694 offset += rv; 695 } 696 697 done: 698 m_freem(m); 699 } 700 701 int 702 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 703 { 704 struct pfsync_clr *clr; 705 struct mbuf *mp; 706 int len = sizeof(*clr) * count; 707 int i, offp; 708 709 struct pf_state *st, *nexts; 710 struct pf_state_key *sk, *nextsk; 711 struct pf_state_item *si; 712 u_int32_t creatorid; 713 int s; 714 715 mp = m_pulldown(m, offset, len, &offp); 716 if (mp == NULL) { 717 pfsyncstats.pfsyncs_badlen++; 718 return (-1); 719 } 720 clr = (struct pfsync_clr *)(mp->m_data + offp); 721 722 s = splsoftnet(); 723 for (i = 0; i < count; i++) { 724 creatorid = clr[i].creatorid; 725 726 if (clr[i].ifname[0] == '\0') { 727 for (st = RB_MIN(pf_state_tree_id, &tree_id); 728 st; st = nexts) { 729 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 730 if (st->creatorid == creatorid) { 731 SET(st->state_flags, PFSTATE_NOSYNC); 732 pf_unlink_state(st); 733 } 734 } 735 } else { 736 if (pfi_kif_get(clr[i].ifname) == NULL) 737 continue; 738 739 /* XXX correct? */ 740 for (sk = RB_MIN(pf_state_tree, &pf_statetbl); 741 sk; sk = nextsk) { 742 nextsk = RB_NEXT(pf_state_tree, 743 &pf_statetbl, sk); 744 TAILQ_FOREACH(si, &sk->states, entry) { 745 if (si->s->creatorid == creatorid) { 746 SET(si->s->state_flags, 747 PFSTATE_NOSYNC); 748 pf_unlink_state(si->s); 749 } 750 } 751 } 752 } 753 } 754 splx(s); 755 756 return (len); 757 } 758 759 int 760 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 761 { 762 struct mbuf *mp; 763 struct pfsync_state *sa, *sp; 764 int len = sizeof(*sp) * count; 765 int i, offp; 766 767 int s; 768 769 mp = m_pulldown(m, offset, len, &offp); 770 if (mp == NULL) { 771 pfsyncstats.pfsyncs_badlen++; 772 return (-1); 773 } 774 sa = (struct pfsync_state *)(mp->m_data + offp); 775 776 s = splsoftnet(); 777 for (i = 0; i < count; i++) { 778 sp = &sa[i]; 779 780 /* check for invalid values */ 781 if (sp->timeout >= PFTM_MAX || 782 sp->src.state > PF_TCPS_PROXY_DST || 783 sp->dst.state > PF_TCPS_PROXY_DST || 784 sp->direction > PF_OUT || 785 (sp->af != AF_INET && sp->af != AF_INET6)) { 786 if (pf_status.debug >= PF_DEBUG_MISC) { 787 printf("pfsync_input: PFSYNC5_ACT_INS: " 788 "invalid value\n"); 789 } 790 pfsyncstats.pfsyncs_badval++; 791 continue; 792 } 793 794 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) { 795 /* drop out, but process the rest of the actions */ 796 break; 797 } 798 } 799 splx(s); 800 801 return (len); 802 } 803 804 int 805 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 806 { 807 struct pfsync_ins_ack *ia, *iaa; 808 struct pf_state_cmp id_key; 809 struct pf_state *st; 810 811 struct mbuf *mp; 812 int len = count * sizeof(*ia); 813 int offp, i; 814 int s; 815 816 mp = m_pulldown(m, offset, len, &offp); 817 if (mp == NULL) { 818 pfsyncstats.pfsyncs_badlen++; 819 return (-1); 820 } 821 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 822 823 s = splsoftnet(); 824 for (i = 0; i < count; i++) { 825 ia = &iaa[i]; 826 827 bcopy(&ia->id, &id_key.id, sizeof(id_key.id)); 828 id_key.creatorid = ia->creatorid; 829 830 st = pf_find_state_byid(&id_key); 831 if (st == NULL) 832 continue; 833 834 if (ISSET(st->state_flags, PFSTATE_ACK)) 835 pfsync_deferred(st, 0); 836 } 837 splx(s); 838 839 return (len); 840 } 841 842 int 843 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 844 struct pfsync_state_peer *dst) 845 { 846 int sync = 0; 847 848 /* 849 * The state should never go backwards except 850 * for syn-proxy states. Neither should the 851 * sequence window slide backwards. 852 */ 853 if ((st->src.state > src->state && 854 (st->src.state < PF_TCPS_PROXY_SRC || 855 src->state >= PF_TCPS_PROXY_SRC)) || 856 857 (st->src.state == src->state && 858 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 859 sync++; 860 else 861 pf_state_peer_ntoh(src, &st->src); 862 863 if ((st->dst.state > dst->state) || 864 865 (st->dst.state >= TCPS_SYN_SENT && 866 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 867 sync++; 868 else 869 pf_state_peer_ntoh(dst, &st->dst); 870 871 return (sync); 872 } 873 874 int 875 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 876 { 877 struct pfsync_state *sa, *sp; 878 struct pf_state_cmp id_key; 879 struct pf_state *st; 880 int sync; 881 882 struct mbuf *mp; 883 int len = count * sizeof(*sp); 884 int offp, i; 885 int s; 886 887 mp = m_pulldown(m, offset, len, &offp); 888 if (mp == NULL) { 889 pfsyncstats.pfsyncs_badlen++; 890 return (-1); 891 } 892 sa = (struct pfsync_state *)(mp->m_data + offp); 893 894 s = splsoftnet(); 895 for (i = 0; i < count; i++) { 896 sp = &sa[i]; 897 898 /* check for invalid values */ 899 if (sp->timeout >= PFTM_MAX || 900 sp->src.state > PF_TCPS_PROXY_DST || 901 sp->dst.state > PF_TCPS_PROXY_DST) { 902 if (pf_status.debug >= PF_DEBUG_MISC) { 903 printf("pfsync_input: PFSYNC_ACT_UPD: " 904 "invalid value\n"); 905 } 906 pfsyncstats.pfsyncs_badval++; 907 continue; 908 } 909 910 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 911 id_key.creatorid = sp->creatorid; 912 913 st = pf_find_state_byid(&id_key); 914 if (st == NULL) { 915 /* insert the update */ 916 if (pfsync_state_import(sp, 0)) 917 pfsyncstats.pfsyncs_badstate++; 918 continue; 919 } 920 921 if (ISSET(st->state_flags, PFSTATE_ACK)) 922 pfsync_deferred(st, 1); 923 924 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 925 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 926 else { 927 sync = 0; 928 929 /* 930 * Non-TCP protocol state machine always go 931 * forwards 932 */ 933 if (st->src.state > sp->src.state) 934 sync++; 935 else 936 pf_state_peer_ntoh(&sp->src, &st->src); 937 938 if (st->dst.state > sp->dst.state) 939 sync++; 940 else 941 pf_state_peer_ntoh(&sp->dst, &st->dst); 942 } 943 944 if (sync < 2) { 945 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 946 pf_state_peer_ntoh(&sp->dst, &st->dst); 947 st->expire = ntohl(sp->expire) + time_second; 948 st->timeout = sp->timeout; 949 } 950 st->pfsync_time = time_uptime; 951 952 if (sync) { 953 pfsyncstats.pfsyncs_stale++; 954 955 pfsync_update_state(st); 956 schednetisr(NETISR_PFSYNC); 957 } 958 } 959 splx(s); 960 961 return (len); 962 } 963 964 int 965 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 966 { 967 struct pfsync_upd_c *ua, *up; 968 struct pf_state_cmp id_key; 969 struct pf_state *st; 970 971 int len = count * sizeof(*up); 972 int sync; 973 974 struct mbuf *mp; 975 int offp, i; 976 int s; 977 978 mp = m_pulldown(m, offset, len, &offp); 979 if (mp == NULL) { 980 pfsyncstats.pfsyncs_badlen++; 981 return (-1); 982 } 983 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 984 985 s = splsoftnet(); 986 for (i = 0; i < count; i++) { 987 up = &ua[i]; 988 989 /* check for invalid values */ 990 if (up->timeout >= PFTM_MAX || 991 up->src.state > PF_TCPS_PROXY_DST || 992 up->dst.state > PF_TCPS_PROXY_DST) { 993 if (pf_status.debug >= PF_DEBUG_MISC) { 994 printf("pfsync_input: " 995 "PFSYNC_ACT_UPD_C: " 996 "invalid value\n"); 997 } 998 pfsyncstats.pfsyncs_badval++; 999 continue; 1000 } 1001 1002 bcopy(&up->id, &id_key.id, sizeof(id_key.id)); 1003 id_key.creatorid = up->creatorid; 1004 1005 st = pf_find_state_byid(&id_key); 1006 if (st == NULL) { 1007 /* We don't have this state. Ask for it. */ 1008 pfsync_request_update(id_key.creatorid, id_key.id); 1009 continue; 1010 } 1011 1012 if (ISSET(st->state_flags, PFSTATE_ACK)) 1013 pfsync_deferred(st, 1); 1014 1015 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1016 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1017 else { 1018 sync = 0; 1019 /* 1020 * Non-TCP protocol state machine always go 1021 * forwards 1022 */ 1023 if (st->src.state > up->src.state) 1024 sync++; 1025 else 1026 pf_state_peer_ntoh(&up->src, &st->src); 1027 1028 if (st->dst.state > up->dst.state) 1029 sync++; 1030 else 1031 pf_state_peer_ntoh(&up->dst, &st->dst); 1032 } 1033 if (sync < 2) { 1034 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1035 pf_state_peer_ntoh(&up->dst, &st->dst); 1036 st->expire = ntohl(up->expire) + time_second; 1037 st->timeout = up->timeout; 1038 } 1039 st->pfsync_time = time_uptime; 1040 1041 if (sync) { 1042 pfsyncstats.pfsyncs_stale++; 1043 1044 pfsync_update_state(st); 1045 schednetisr(NETISR_PFSYNC); 1046 } 1047 } 1048 splx(s); 1049 1050 return (len); 1051 } 1052 1053 int 1054 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1055 { 1056 struct pfsync_upd_req *ur, *ura; 1057 struct mbuf *mp; 1058 int len = count * sizeof(*ur); 1059 int i, offp; 1060 1061 struct pf_state_cmp id_key; 1062 struct pf_state *st; 1063 1064 mp = m_pulldown(m, offset, len, &offp); 1065 if (mp == NULL) { 1066 pfsyncstats.pfsyncs_badlen++; 1067 return (-1); 1068 } 1069 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1070 1071 for (i = 0; i < count; i++) { 1072 ur = &ura[i]; 1073 1074 bcopy(&ur->id, &id_key.id, sizeof(id_key.id)); 1075 id_key.creatorid = ur->creatorid; 1076 1077 if (id_key.id == 0 && id_key.creatorid == 0) 1078 pfsync_bulk_start(); 1079 else { 1080 st = pf_find_state_byid(&id_key); 1081 if (st == NULL) { 1082 pfsyncstats.pfsyncs_badstate++; 1083 continue; 1084 } 1085 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1086 continue; 1087 1088 pfsync_update_state_req(st); 1089 } 1090 } 1091 1092 return (len); 1093 } 1094 1095 int 1096 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1097 { 1098 struct mbuf *mp; 1099 struct pfsync_state *sa, *sp; 1100 struct pf_state_cmp id_key; 1101 struct pf_state *st; 1102 int len = count * sizeof(*sp); 1103 int offp, i; 1104 int s; 1105 1106 mp = m_pulldown(m, offset, len, &offp); 1107 if (mp == NULL) { 1108 pfsyncstats.pfsyncs_badlen++; 1109 return (-1); 1110 } 1111 sa = (struct pfsync_state *)(mp->m_data + offp); 1112 1113 s = splsoftnet(); 1114 for (i = 0; i < count; i++) { 1115 sp = &sa[i]; 1116 1117 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 1118 id_key.creatorid = sp->creatorid; 1119 1120 st = pf_find_state_byid(&id_key); 1121 if (st == NULL) { 1122 pfsyncstats.pfsyncs_badstate++; 1123 continue; 1124 } 1125 SET(st->state_flags, PFSTATE_NOSYNC); 1126 pf_unlink_state(st); 1127 } 1128 splx(s); 1129 1130 return (len); 1131 } 1132 1133 int 1134 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1135 { 1136 struct mbuf *mp; 1137 struct pfsync_del_c *sa, *sp; 1138 struct pf_state_cmp id_key; 1139 struct pf_state *st; 1140 int len = count * sizeof(*sp); 1141 int offp, i; 1142 int s; 1143 1144 mp = m_pulldown(m, offset, len, &offp); 1145 if (mp == NULL) { 1146 pfsyncstats.pfsyncs_badlen++; 1147 return (-1); 1148 } 1149 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1150 1151 s = splsoftnet(); 1152 for (i = 0; i < count; i++) { 1153 sp = &sa[i]; 1154 1155 bcopy(&sp->id, &id_key.id, sizeof(id_key.id)); 1156 id_key.creatorid = sp->creatorid; 1157 1158 st = pf_find_state_byid(&id_key); 1159 if (st == NULL) { 1160 pfsyncstats.pfsyncs_badstate++; 1161 continue; 1162 } 1163 1164 SET(st->state_flags, PFSTATE_NOSYNC); 1165 pf_unlink_state(st); 1166 } 1167 splx(s); 1168 1169 return (len); 1170 } 1171 1172 int 1173 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1174 { 1175 struct pfsync_softc *sc = pfsyncif; 1176 struct pfsync_bus *bus; 1177 struct mbuf *mp; 1178 int len = count * sizeof(*bus); 1179 int offp; 1180 1181 /* If we're not waiting for a bulk update, who cares. */ 1182 if (sc->sc_ureq_sent == 0) 1183 return (len); 1184 1185 mp = m_pulldown(m, offset, len, &offp); 1186 if (mp == NULL) { 1187 pfsyncstats.pfsyncs_badlen++; 1188 return (-1); 1189 } 1190 bus = (struct pfsync_bus *)(mp->m_data + offp); 1191 1192 switch (bus->status) { 1193 case PFSYNC_BUS_START: 1194 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1195 pf_pool_limits[PF_LIMIT_STATES].limit / 1196 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1197 sizeof(struct pfsync_state))); 1198 if (pf_status.debug >= PF_DEBUG_MISC) 1199 printf("pfsync: received bulk update start\n"); 1200 break; 1201 1202 case PFSYNC_BUS_END: 1203 if (time_uptime - ntohl(bus->endtime) >= 1204 sc->sc_ureq_sent) { 1205 /* that's it, we're happy */ 1206 sc->sc_ureq_sent = 0; 1207 sc->sc_bulk_tries = 0; 1208 timeout_del(&sc->sc_bulkfail_tmo); 1209 #if NCARP > 0 1210 if (!pfsync_sync_ok) 1211 carp_group_demote_adj(&sc->sc_if, -1); 1212 #endif 1213 pfsync_sync_ok = 1; 1214 if (pf_status.debug >= PF_DEBUG_MISC) 1215 printf("pfsync: received valid " 1216 "bulk update end\n"); 1217 } else { 1218 if (pf_status.debug >= PF_DEBUG_MISC) 1219 printf("pfsync: received invalid " 1220 "bulk update end: bad timestamp\n"); 1221 } 1222 break; 1223 } 1224 1225 return (len); 1226 } 1227 1228 int 1229 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1230 { 1231 int len = count * sizeof(struct pfsync_tdb); 1232 1233 #if defined(IPSEC) 1234 struct pfsync_tdb *tp; 1235 struct mbuf *mp; 1236 int offp; 1237 int i; 1238 int s; 1239 1240 mp = m_pulldown(m, offset, len, &offp); 1241 if (mp == NULL) { 1242 pfsyncstats.pfsyncs_badlen++; 1243 return (-1); 1244 } 1245 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1246 1247 s = splsoftnet(); 1248 for (i = 0; i < count; i++) 1249 pfsync_update_net_tdb(&tp[i]); 1250 splx(s); 1251 #endif 1252 1253 return (len); 1254 } 1255 1256 #if defined(IPSEC) 1257 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1258 void 1259 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1260 { 1261 struct tdb *tdb; 1262 int s; 1263 1264 /* check for invalid values */ 1265 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1266 (pt->dst.sa.sa_family != AF_INET && 1267 pt->dst.sa.sa_family != AF_INET6)) 1268 goto bad; 1269 1270 s = spltdb(); 1271 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1272 if (tdb) { 1273 pt->rpl = ntohl(pt->rpl); 1274 pt->cur_bytes = betoh64(pt->cur_bytes); 1275 1276 /* Neither replay nor byte counter should ever decrease. */ 1277 if (pt->rpl < tdb->tdb_rpl || 1278 pt->cur_bytes < tdb->tdb_cur_bytes) { 1279 splx(s); 1280 goto bad; 1281 } 1282 1283 tdb->tdb_rpl = pt->rpl; 1284 tdb->tdb_cur_bytes = pt->cur_bytes; 1285 } 1286 splx(s); 1287 return; 1288 1289 bad: 1290 if (pf_status.debug >= PF_DEBUG_MISC) 1291 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1292 "invalid value\n"); 1293 pfsyncstats.pfsyncs_badstate++; 1294 return; 1295 } 1296 #endif 1297 1298 1299 int 1300 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1301 { 1302 /* check if we are at the right place in the packet */ 1303 if (offset != m->m_pkthdr.len) 1304 pfsyncstats.pfsyncs_badlen++; 1305 1306 /* we're done. free and let the caller return */ 1307 m_freem(m); 1308 return (-1); 1309 } 1310 1311 int 1312 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1313 { 1314 pfsyncstats.pfsyncs_badact++; 1315 1316 m_freem(m); 1317 return (-1); 1318 } 1319 1320 int 1321 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1322 struct rtentry *rt) 1323 { 1324 m_freem(m); 1325 return (0); 1326 } 1327 1328 /* ARGSUSED */ 1329 int 1330 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1331 { 1332 struct proc *p = curproc; 1333 struct pfsync_softc *sc = ifp->if_softc; 1334 struct ifreq *ifr = (struct ifreq *)data; 1335 struct ip_moptions *imo = &sc->sc_imo; 1336 struct pfsyncreq pfsyncr; 1337 struct ifnet *sifp; 1338 struct ip *ip; 1339 int s, error; 1340 1341 switch (cmd) { 1342 #if 0 1343 case SIOCSIFADDR: 1344 case SIOCAIFADDR: 1345 case SIOCSIFDSTADDR: 1346 #endif 1347 case SIOCSIFFLAGS: 1348 s = splnet(); 1349 if (ifp->if_flags & IFF_UP) 1350 ifp->if_flags |= IFF_RUNNING; 1351 else { 1352 ifp->if_flags &= ~IFF_RUNNING; 1353 1354 /* drop everything */ 1355 timeout_del(&sc->sc_tmo); 1356 pfsync_drop(sc); 1357 1358 /* cancel bulk update */ 1359 timeout_del(&sc->sc_bulk_tmo); 1360 sc->sc_bulk_next = NULL; 1361 sc->sc_bulk_last = NULL; 1362 } 1363 splx(s); 1364 break; 1365 case SIOCSIFMTU: 1366 s = splnet(); 1367 if (ifr->ifr_mtu <= PFSYNC_MINPKT) 1368 return (EINVAL); 1369 if (ifr->ifr_mtu > MCLBYTES) /* XXX could be bigger */ 1370 ifr->ifr_mtu = MCLBYTES; 1371 if (ifr->ifr_mtu < ifp->if_mtu) 1372 pfsync_sendout(); 1373 ifp->if_mtu = ifr->ifr_mtu; 1374 splx(s); 1375 break; 1376 case SIOCGETPFSYNC: 1377 bzero(&pfsyncr, sizeof(pfsyncr)); 1378 if (sc->sc_sync_if) { 1379 strlcpy(pfsyncr.pfsyncr_syncdev, 1380 sc->sc_sync_if->if_xname, IFNAMSIZ); 1381 } 1382 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1383 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1384 pfsyncr.pfsyncr_defer = sc->sc_defer; 1385 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1386 1387 case SIOCSETPFSYNC: 1388 if ((error = suser(p, p->p_acflag)) != 0) 1389 return (error); 1390 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1391 return (error); 1392 1393 s = splnet(); 1394 1395 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1396 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1397 else 1398 sc->sc_sync_peer.s_addr = 1399 pfsyncr.pfsyncr_syncpeer.s_addr; 1400 1401 if (pfsyncr.pfsyncr_maxupdates > 255) { 1402 splx(s); 1403 return (EINVAL); 1404 } 1405 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1406 1407 sc->sc_defer = pfsyncr.pfsyncr_defer; 1408 1409 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1410 sc->sc_sync_if = NULL; 1411 if (imo->imo_num_memberships > 0) { 1412 in_delmulti(imo->imo_membership[ 1413 --imo->imo_num_memberships]); 1414 imo->imo_multicast_ifp = NULL; 1415 } 1416 splx(s); 1417 break; 1418 } 1419 1420 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1421 splx(s); 1422 return (EINVAL); 1423 } 1424 1425 if (sifp->if_mtu < sc->sc_if.if_mtu || 1426 (sc->sc_sync_if != NULL && 1427 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1428 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1429 pfsync_sendout(); 1430 sc->sc_sync_if = sifp; 1431 1432 if (imo->imo_num_memberships > 0) { 1433 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1434 imo->imo_multicast_ifp = NULL; 1435 } 1436 1437 if (sc->sc_sync_if && 1438 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1439 struct in_addr addr; 1440 1441 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1442 sc->sc_sync_if = NULL; 1443 splx(s); 1444 return (EADDRNOTAVAIL); 1445 } 1446 1447 addr.s_addr = INADDR_PFSYNC_GROUP; 1448 1449 if ((imo->imo_membership[0] = 1450 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1451 sc->sc_sync_if = NULL; 1452 splx(s); 1453 return (ENOBUFS); 1454 } 1455 imo->imo_num_memberships++; 1456 imo->imo_multicast_ifp = sc->sc_sync_if; 1457 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1458 imo->imo_multicast_loop = 0; 1459 } 1460 1461 ip = &sc->sc_template; 1462 bzero(ip, sizeof(*ip)); 1463 ip->ip_v = IPVERSION; 1464 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1465 ip->ip_tos = IPTOS_LOWDELAY; 1466 /* len and id are set later */ 1467 ip->ip_off = htons(IP_DF); 1468 ip->ip_ttl = PFSYNC_DFLTTL; 1469 ip->ip_p = IPPROTO_PFSYNC; 1470 ip->ip_src.s_addr = INADDR_ANY; 1471 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1472 1473 if (sc->sc_sync_if) { 1474 /* Request a full state table update. */ 1475 sc->sc_ureq_sent = time_uptime; 1476 #if NCARP > 0 1477 if (pfsync_sync_ok) 1478 carp_group_demote_adj(&sc->sc_if, 1); 1479 #endif 1480 pfsync_sync_ok = 0; 1481 if (pf_status.debug >= PF_DEBUG_MISC) 1482 printf("pfsync: requesting bulk update\n"); 1483 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1484 pf_pool_limits[PF_LIMIT_STATES].limit / 1485 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1486 sizeof(struct pfsync_state))); 1487 pfsync_request_update(0, 0); 1488 } 1489 splx(s); 1490 1491 break; 1492 1493 default: 1494 return (ENOTTY); 1495 } 1496 1497 return (0); 1498 } 1499 1500 void 1501 pfsync_out_state(struct pf_state *st, void *buf) 1502 { 1503 struct pfsync_state *sp = buf; 1504 1505 pfsync_state_export(sp, st); 1506 } 1507 1508 void 1509 pfsync_out_iack(struct pf_state *st, void *buf) 1510 { 1511 struct pfsync_ins_ack *iack = buf; 1512 1513 iack->id = st->id; 1514 iack->creatorid = st->creatorid; 1515 } 1516 1517 void 1518 pfsync_out_upd_c(struct pf_state *st, void *buf) 1519 { 1520 struct pfsync_upd_c *up = buf; 1521 1522 up->id = st->id; 1523 pf_state_peer_hton(&st->src, &up->src); 1524 pf_state_peer_hton(&st->dst, &up->dst); 1525 up->creatorid = st->creatorid; 1526 1527 up->expire = pf_state_expires(st); 1528 if (up->expire <= time_second) 1529 up->expire = htonl(0); 1530 else 1531 up->expire = htonl(up->expire - time_second); 1532 up->timeout = st->timeout; 1533 1534 bzero(up->_pad, sizeof(up->_pad)); /* XXX */ 1535 } 1536 1537 void 1538 pfsync_out_del(struct pf_state *st, void *buf) 1539 { 1540 struct pfsync_del_c *dp = buf; 1541 1542 dp->id = st->id; 1543 dp->creatorid = st->creatorid; 1544 1545 SET(st->state_flags, PFSTATE_NOSYNC); 1546 } 1547 1548 void 1549 pfsync_drop(struct pfsync_softc *sc) 1550 { 1551 struct pf_state *st; 1552 struct pfsync_upd_req_item *ur; 1553 struct tdb *t; 1554 int q; 1555 1556 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1557 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1558 continue; 1559 1560 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1561 #ifdef PFSYNC_DEBUG 1562 KASSERT(st->sync_state == q); 1563 #endif 1564 st->sync_state = PFSYNC_S_NONE; 1565 } 1566 TAILQ_INIT(&sc->sc_qs[q]); 1567 } 1568 1569 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1570 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1571 pool_put(&sc->sc_pool, ur); 1572 } 1573 1574 sc->sc_plus = NULL; 1575 1576 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1577 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) 1578 CLR(t->tdb_flags, TDBF_PFSYNC); 1579 1580 TAILQ_INIT(&sc->sc_tdb_q); 1581 } 1582 1583 sc->sc_len = PFSYNC_MINPKT; 1584 } 1585 1586 void 1587 pfsync_sendout(void) 1588 { 1589 struct pfsync_softc *sc = pfsyncif; 1590 #if NBPFILTER > 0 1591 struct ifnet *ifp = &sc->sc_if; 1592 #endif 1593 struct mbuf *m; 1594 struct ip *ip; 1595 struct pfsync_header *ph; 1596 struct pfsync_subheader *subh; 1597 struct pf_state *st; 1598 struct pfsync_upd_req_item *ur; 1599 struct tdb *t; 1600 1601 int offset; 1602 int q, count = 0; 1603 1604 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1605 return; 1606 1607 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1608 #if NBPFILTER > 0 1609 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1610 #else 1611 sc->sc_sync_if == NULL) { 1612 #endif 1613 pfsync_drop(sc); 1614 return; 1615 } 1616 1617 MGETHDR(m, M_DONTWAIT, MT_DATA); 1618 if (m == NULL) { 1619 sc->sc_if.if_oerrors++; 1620 pfsyncstats.pfsyncs_onomem++; 1621 pfsync_drop(sc); 1622 return; 1623 } 1624 1625 if (max_linkhdr + sc->sc_len > MHLEN) { 1626 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1627 if (!ISSET(m->m_flags, M_EXT)) { 1628 m_free(m); 1629 sc->sc_if.if_oerrors++; 1630 pfsyncstats.pfsyncs_onomem++; 1631 pfsync_drop(sc); 1632 return; 1633 } 1634 } 1635 m->m_data += max_linkhdr; 1636 m->m_len = m->m_pkthdr.len = sc->sc_len; 1637 1638 /* build the ip header */ 1639 ip = (struct ip *)m->m_data; 1640 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1641 offset = sizeof(*ip); 1642 1643 ip->ip_len = htons(m->m_pkthdr.len); 1644 ip->ip_id = htons(ip_randomid()); 1645 1646 /* build the pfsync header */ 1647 ph = (struct pfsync_header *)(m->m_data + offset); 1648 bzero(ph, sizeof(*ph)); 1649 offset += sizeof(*ph); 1650 1651 ph->version = PFSYNC_VERSION; 1652 ph->len = htons(sc->sc_len - sizeof(*ip)); 1653 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1654 1655 /* walk the queues */ 1656 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1657 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1658 continue; 1659 1660 subh = (struct pfsync_subheader *)(m->m_data + offset); 1661 offset += sizeof(*subh); 1662 1663 count = 0; 1664 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1665 #ifdef PFSYNC_DEBUG 1666 KASSERT(st->sync_state == q); 1667 #endif 1668 pfsync_qs[q].write(st, m->m_data + offset); 1669 offset += pfsync_qs[q].len; 1670 1671 st->sync_state = PFSYNC_S_NONE; 1672 count++; 1673 } 1674 TAILQ_INIT(&sc->sc_qs[q]); 1675 1676 bzero(subh, sizeof(*subh)); 1677 subh->action = pfsync_qs[q].action; 1678 subh->count = htons(count); 1679 } 1680 1681 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1682 subh = (struct pfsync_subheader *)(m->m_data + offset); 1683 offset += sizeof(*subh); 1684 1685 count = 0; 1686 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1687 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1688 1689 bcopy(&ur->ur_msg, m->m_data + offset, 1690 sizeof(ur->ur_msg)); 1691 offset += sizeof(ur->ur_msg); 1692 1693 pool_put(&sc->sc_pool, ur); 1694 1695 count++; 1696 } 1697 1698 bzero(subh, sizeof(*subh)); 1699 subh->action = PFSYNC_ACT_UPD_REQ; 1700 subh->count = htons(count); 1701 } 1702 1703 /* has someone built a custom region for us to add? */ 1704 if (sc->sc_plus != NULL) { 1705 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1706 offset += sc->sc_pluslen; 1707 1708 sc->sc_plus = NULL; 1709 } 1710 1711 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1712 subh = (struct pfsync_subheader *)(m->m_data + offset); 1713 offset += sizeof(*subh); 1714 1715 count = 0; 1716 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { 1717 pfsync_out_tdb(t, m->m_data + offset); 1718 offset += sizeof(struct pfsync_tdb); 1719 CLR(t->tdb_flags, TDBF_PFSYNC); 1720 1721 count++; 1722 } 1723 TAILQ_INIT(&sc->sc_tdb_q); 1724 1725 bzero(subh, sizeof(*subh)); 1726 subh->action = PFSYNC_ACT_TDB; 1727 subh->count = htons(count); 1728 } 1729 1730 subh = (struct pfsync_subheader *)(m->m_data + offset); 1731 offset += sizeof(*subh); 1732 1733 bzero(subh, sizeof(*subh)); 1734 subh->action = PFSYNC_ACT_EOF; 1735 subh->count = htons(1); 1736 1737 /* we're done, let's put it on the wire */ 1738 #if NBPFILTER > 0 1739 if (ifp->if_bpf) { 1740 m->m_data += sizeof(*ip); 1741 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1742 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1743 m->m_data -= sizeof(*ip); 1744 m->m_len = m->m_pkthdr.len = sc->sc_len; 1745 } 1746 1747 if (sc->sc_sync_if == NULL) { 1748 sc->sc_len = PFSYNC_MINPKT; 1749 m_freem(m); 1750 return; 1751 } 1752 #endif 1753 1754 sc->sc_if.if_opackets++; 1755 sc->sc_if.if_obytes += m->m_pkthdr.len; 1756 1757 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) 1758 pfsyncstats.pfsyncs_opackets++; 1759 else 1760 pfsyncstats.pfsyncs_oerrors++; 1761 1762 /* start again */ 1763 sc->sc_len = PFSYNC_MINPKT; 1764 } 1765 1766 void 1767 pfsync_insert_state(struct pf_state *st) 1768 { 1769 struct pfsync_softc *sc = pfsyncif; 1770 1771 splsoftassert(IPL_SOFTNET); 1772 1773 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1774 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1775 SET(st->state_flags, PFSTATE_NOSYNC); 1776 return; 1777 } 1778 1779 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1780 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1781 return; 1782 1783 #ifdef PFSYNC_DEBUG 1784 KASSERT(st->sync_state == PFSYNC_S_NONE); 1785 #endif 1786 1787 if (sc->sc_len == PFSYNC_MINPKT) 1788 timeout_add_sec(&sc->sc_tmo, 1); 1789 1790 pfsync_q_ins(st, PFSYNC_S_INS); 1791 1792 st->sync_updates = 0; 1793 } 1794 1795 int defer = 10; 1796 1797 int 1798 pfsync_defer(struct pf_state *st, struct mbuf *m) 1799 { 1800 struct pfsync_softc *sc = pfsyncif; 1801 struct pfsync_deferral *pd; 1802 1803 splsoftassert(IPL_SOFTNET); 1804 1805 if (!sc->sc_defer) 1806 return (0); 1807 1808 if (sc->sc_deferred >= 128) 1809 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1810 1811 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1812 if (pd == NULL) 1813 return (0); 1814 sc->sc_deferred++; 1815 1816 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1817 SET(st->state_flags, PFSTATE_ACK); 1818 1819 pd->pd_st = st; 1820 pd->pd_m = m; 1821 1822 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1823 timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); 1824 timeout_add(&pd->pd_tmo, defer); 1825 1826 schednetisr(NETISR_PFSYNC); 1827 1828 return (1); 1829 } 1830 1831 void 1832 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1833 { 1834 struct pfsync_softc *sc = pfsyncif; 1835 1836 splsoftassert(IPL_SOFTNET); 1837 1838 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1839 sc->sc_deferred--; 1840 1841 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1842 timeout_del(&pd->pd_tmo); /* bah */ 1843 if (drop) 1844 m_freem(pd->pd_m); 1845 else { 1846 ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0, 1847 (void *)NULL, (void *)NULL); 1848 } 1849 1850 pool_put(&sc->sc_pool, pd); 1851 } 1852 1853 void 1854 pfsync_defer_tmo(void *arg) 1855 { 1856 int s; 1857 1858 s = splsoftnet(); 1859 pfsync_undefer(arg, 0); 1860 splx(s); 1861 } 1862 1863 void 1864 pfsync_deferred(struct pf_state *st, int drop) 1865 { 1866 struct pfsync_softc *sc = pfsyncif; 1867 struct pfsync_deferral *pd; 1868 1869 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1870 if (pd->pd_st == st) { 1871 pfsync_undefer(pd, drop); 1872 return; 1873 } 1874 } 1875 1876 panic("pfsync_send_deferred: unable to find deferred state"); 1877 } 1878 1879 u_int pfsync_upds = 0; 1880 1881 void 1882 pfsync_update_state(struct pf_state *st) 1883 { 1884 struct pfsync_softc *sc = pfsyncif; 1885 int sync = 0; 1886 1887 splsoftassert(IPL_SOFTNET); 1888 1889 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1890 return; 1891 1892 if (ISSET(st->state_flags, PFSTATE_ACK)) 1893 pfsync_deferred(st, 0); 1894 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1895 if (st->sync_state != PFSYNC_S_NONE) 1896 pfsync_q_del(st); 1897 return; 1898 } 1899 1900 if (sc->sc_len == PFSYNC_MINPKT) 1901 timeout_add_sec(&sc->sc_tmo, 1); 1902 1903 switch (st->sync_state) { 1904 case PFSYNC_S_UPD_C: 1905 case PFSYNC_S_UPD: 1906 case PFSYNC_S_INS: 1907 /* we're already handling it */ 1908 1909 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1910 st->sync_updates++; 1911 if (st->sync_updates >= sc->sc_maxupdates) 1912 sync = 1; 1913 } 1914 break; 1915 1916 case PFSYNC_S_IACK: 1917 pfsync_q_del(st); 1918 case PFSYNC_S_NONE: 1919 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1920 st->sync_updates = 0; 1921 break; 1922 1923 default: 1924 panic("pfsync_update_state: unexpected sync state %d", 1925 st->sync_state); 1926 } 1927 1928 if (sync || (time_uptime - st->pfsync_time) < 2) { 1929 pfsync_upds++; 1930 schednetisr(NETISR_PFSYNC); 1931 } 1932 } 1933 1934 void 1935 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1936 { 1937 struct pfsync_softc *sc = pfsyncif; 1938 struct pfsync_upd_req_item *item; 1939 size_t nlen = sizeof(struct pfsync_upd_req); 1940 1941 /* 1942 * this code does nothing to prevent multiple update requests for the 1943 * same state being generated. 1944 */ 1945 1946 item = pool_get(&sc->sc_pool, PR_NOWAIT); 1947 if (item == NULL) { 1948 /* XXX stats */ 1949 return; 1950 } 1951 1952 item->ur_msg.id = id; 1953 item->ur_msg.creatorid = creatorid; 1954 1955 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1956 nlen += sizeof(struct pfsync_subheader); 1957 1958 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 1959 pfsync_sendout(); 1960 1961 nlen = sizeof(struct pfsync_subheader) + 1962 sizeof(struct pfsync_upd_req); 1963 } 1964 1965 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1966 sc->sc_len += nlen; 1967 1968 schednetisr(NETISR_PFSYNC); 1969 } 1970 1971 void 1972 pfsync_update_state_req(struct pf_state *st) 1973 { 1974 struct pfsync_softc *sc = pfsyncif; 1975 1976 if (sc == NULL) 1977 panic("pfsync_update_state_req: nonexistant instance"); 1978 1979 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1980 if (st->sync_state != PFSYNC_S_NONE) 1981 pfsync_q_del(st); 1982 return; 1983 } 1984 1985 switch (st->sync_state) { 1986 case PFSYNC_S_UPD_C: 1987 case PFSYNC_S_IACK: 1988 pfsync_q_del(st); 1989 case PFSYNC_S_NONE: 1990 pfsync_q_ins(st, PFSYNC_S_UPD); 1991 schednetisr(NETISR_PFSYNC); 1992 return; 1993 1994 case PFSYNC_S_INS: 1995 case PFSYNC_S_UPD: 1996 case PFSYNC_S_DEL: 1997 /* we're already handling it */ 1998 return; 1999 2000 default: 2001 panic("pfsync_update_state_req: unexpected sync state %d", 2002 st->sync_state); 2003 } 2004 } 2005 2006 void 2007 pfsync_delete_state(struct pf_state *st) 2008 { 2009 struct pfsync_softc *sc = pfsyncif; 2010 2011 splsoftassert(IPL_SOFTNET); 2012 2013 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2014 return; 2015 2016 if (ISSET(st->state_flags, PFSTATE_ACK)) 2017 pfsync_deferred(st, 1); 2018 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2019 if (st->sync_state != PFSYNC_S_NONE) 2020 pfsync_q_del(st); 2021 return; 2022 } 2023 2024 if (sc->sc_len == PFSYNC_MINPKT) 2025 timeout_add_sec(&sc->sc_tmo, 1); 2026 2027 switch (st->sync_state) { 2028 case PFSYNC_S_INS: 2029 /* we never got to tell the world so just forget about it */ 2030 pfsync_q_del(st); 2031 return; 2032 2033 case PFSYNC_S_UPD_C: 2034 case PFSYNC_S_UPD: 2035 case PFSYNC_S_IACK: 2036 pfsync_q_del(st); 2037 /* FALLTHROUGH to putting it on the del list */ 2038 2039 case PFSYNC_S_NONE: 2040 pfsync_q_ins(st, PFSYNC_S_DEL); 2041 return; 2042 2043 default: 2044 panic("pfsync_delete_state: unexpected sync state %d", 2045 st->sync_state); 2046 } 2047 } 2048 2049 void 2050 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2051 { 2052 struct pfsync_softc *sc = pfsyncif; 2053 struct { 2054 struct pfsync_subheader subh; 2055 struct pfsync_clr clr; 2056 } __packed r; 2057 2058 splsoftassert(IPL_SOFTNET); 2059 2060 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2061 return; 2062 2063 bzero(&r, sizeof(r)); 2064 2065 r.subh.action = PFSYNC_ACT_CLR; 2066 r.subh.count = htons(1); 2067 2068 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2069 r.clr.creatorid = creatorid; 2070 2071 pfsync_send_plus(&r, sizeof(r)); 2072 } 2073 2074 void 2075 pfsync_q_ins(struct pf_state *st, int q) 2076 { 2077 struct pfsync_softc *sc = pfsyncif; 2078 size_t nlen = pfsync_qs[q].len; 2079 2080 KASSERT(st->sync_state == PFSYNC_S_NONE); 2081 2082 #if 1 || defined(PFSYNC_DEBUG) 2083 if (sc->sc_len < PFSYNC_MINPKT) 2084 panic("pfsync pkt len is too low %d", sc->sc_len); 2085 #endif 2086 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2087 nlen += sizeof(struct pfsync_subheader); 2088 2089 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2090 pfsync_sendout(); 2091 2092 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2093 } 2094 2095 sc->sc_len += nlen; 2096 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2097 st->sync_state = q; 2098 } 2099 2100 void 2101 pfsync_q_del(struct pf_state *st) 2102 { 2103 struct pfsync_softc *sc = pfsyncif; 2104 int q = st->sync_state; 2105 2106 KASSERT(st->sync_state != PFSYNC_S_NONE); 2107 2108 sc->sc_len -= pfsync_qs[q].len; 2109 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2110 st->sync_state = PFSYNC_S_NONE; 2111 2112 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2113 sc->sc_len -= sizeof(struct pfsync_subheader); 2114 } 2115 2116 void 2117 pfsync_update_tdb(struct tdb *t, int output) 2118 { 2119 struct pfsync_softc *sc = pfsyncif; 2120 size_t nlen = sizeof(struct pfsync_tdb); 2121 2122 if (sc == NULL) 2123 return; 2124 2125 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2126 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2127 nlen += sizeof(struct pfsync_subheader); 2128 2129 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2130 pfsync_sendout(); 2131 2132 nlen = sizeof(struct pfsync_subheader) + 2133 sizeof(struct pfsync_tdb); 2134 } 2135 2136 sc->sc_len += nlen; 2137 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2138 SET(t->tdb_flags, TDBF_PFSYNC); 2139 t->tdb_updates = 0; 2140 } else { 2141 if (++t->tdb_updates >= sc->sc_maxupdates) 2142 schednetisr(NETISR_PFSYNC); 2143 } 2144 2145 if (output) 2146 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2147 else 2148 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2149 } 2150 2151 void 2152 pfsync_delete_tdb(struct tdb *t) 2153 { 2154 struct pfsync_softc *sc = pfsyncif; 2155 2156 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2157 return; 2158 2159 sc->sc_len -= sizeof(struct pfsync_tdb); 2160 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2161 CLR(t->tdb_flags, TDBF_PFSYNC); 2162 2163 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2164 sc->sc_len -= sizeof(struct pfsync_subheader); 2165 } 2166 2167 void 2168 pfsync_out_tdb(struct tdb *t, void *buf) 2169 { 2170 struct pfsync_tdb *ut = buf; 2171 2172 bzero(ut, sizeof(*ut)); 2173 ut->spi = t->tdb_spi; 2174 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2175 /* 2176 * When a failover happens, the master's rpl is probably above 2177 * what we see here (we may be up to a second late), so 2178 * increase it a bit for outbound tdbs to manage most such 2179 * situations. 2180 * 2181 * For now, just add an offset that is likely to be larger 2182 * than the number of packets we can see in one second. The RFC 2183 * just says the next packet must have a higher seq value. 2184 * 2185 * XXX What is a good algorithm for this? We could use 2186 * a rate-determined increase, but to know it, we would have 2187 * to extend struct tdb. 2188 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2189 * will soon be replaced anyway. For now, just don't handle 2190 * this edge case. 2191 */ 2192 #define RPL_INCR 16384 2193 ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2194 RPL_INCR : 0)); 2195 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2196 ut->sproto = t->tdb_sproto; 2197 } 2198 2199 void 2200 pfsync_bulk_start(void) 2201 { 2202 struct pfsync_softc *sc = pfsyncif; 2203 2204 sc->sc_ureq_received = time_uptime; 2205 2206 if (sc->sc_bulk_next == NULL) 2207 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2208 sc->sc_bulk_last = sc->sc_bulk_next; 2209 2210 if (pf_status.debug >= PF_DEBUG_MISC) 2211 printf("pfsync: received bulk update request\n"); 2212 2213 pfsync_bulk_status(PFSYNC_BUS_START); 2214 timeout_add(&sc->sc_bulk_tmo, 0); 2215 } 2216 2217 void 2218 pfsync_bulk_update(void *arg) 2219 { 2220 struct pfsync_softc *sc = arg; 2221 struct pf_state *st; 2222 int i = 0; 2223 int s; 2224 2225 s = splsoftnet(); 2226 2227 st = sc->sc_bulk_next; 2228 2229 while (st != sc->sc_bulk_last) { 2230 if (st->sync_state == PFSYNC_S_NONE && 2231 st->timeout < PFTM_MAX && 2232 st->pfsync_time <= sc->sc_ureq_received) { 2233 pfsync_update_state_req(st); 2234 i++; 2235 } 2236 2237 st = TAILQ_NEXT(st, entry_list); 2238 if (st == NULL) 2239 st = TAILQ_FIRST(&state_list); 2240 2241 if (i > 0 && TAILQ_EMPTY(&sc->sc_qs[PFSYNC_S_UPD])) { 2242 sc->sc_bulk_next = st; 2243 timeout_add(&sc->sc_bulk_tmo, 1); 2244 goto out; 2245 } 2246 } 2247 2248 /* we're done */ 2249 sc->sc_bulk_next = NULL; 2250 sc->sc_bulk_last = NULL; 2251 pfsync_bulk_status(PFSYNC_BUS_END); 2252 2253 out: 2254 splx(s); 2255 } 2256 2257 void 2258 pfsync_bulk_status(u_int8_t status) 2259 { 2260 struct { 2261 struct pfsync_subheader subh; 2262 struct pfsync_bus bus; 2263 } __packed r; 2264 2265 struct pfsync_softc *sc = pfsyncif; 2266 2267 bzero(&r, sizeof(r)); 2268 2269 r.subh.action = PFSYNC_ACT_BUS; 2270 r.subh.count = htons(1); 2271 2272 r.bus.creatorid = pf_status.hostid; 2273 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2274 r.bus.status = status; 2275 2276 pfsync_send_plus(&r, sizeof(r)); 2277 } 2278 2279 void 2280 pfsync_bulk_fail(void *arg) 2281 { 2282 struct pfsync_softc *sc = arg; 2283 2284 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2285 /* Try again */ 2286 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2287 pfsync_request_update(0, 0); 2288 } else { 2289 /* Pretend like the transfer was ok */ 2290 sc->sc_ureq_sent = 0; 2291 sc->sc_bulk_tries = 0; 2292 #if NCARP > 0 2293 if (!pfsync_sync_ok) 2294 carp_group_demote_adj(&sc->sc_if, -1); 2295 #endif 2296 pfsync_sync_ok = 1; 2297 if (pf_status.debug >= PF_DEBUG_MISC) 2298 printf("pfsync: failed to receive bulk update\n"); 2299 } 2300 } 2301 2302 void 2303 pfsync_send_plus(void *plus, size_t pluslen) 2304 { 2305 struct pfsync_softc *sc = pfsyncif; 2306 2307 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2308 pfsync_sendout(); 2309 2310 sc->sc_plus = plus; 2311 sc->sc_len += (sc->sc_pluslen = pluslen); 2312 2313 pfsync_sendout(); 2314 } 2315 2316 int 2317 pfsync_up(void) 2318 { 2319 struct pfsync_softc *sc = pfsyncif; 2320 2321 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2322 return (0); 2323 2324 return (1); 2325 } 2326 2327 int 2328 pfsync_state_in_use(struct pf_state *st) 2329 { 2330 struct pfsync_softc *sc = pfsyncif; 2331 2332 if (sc == NULL) 2333 return (0); 2334 2335 if (st->sync_state != PFSYNC_S_NONE) 2336 return (1); 2337 2338 if (sc->sc_bulk_next == NULL && sc->sc_bulk_last == NULL) 2339 return (0); 2340 2341 return (1); 2342 } 2343 2344 void 2345 pfsync_timeout(void *arg) 2346 { 2347 int s; 2348 2349 s = splsoftnet(); 2350 pfsync_sendout(); 2351 splx(s); 2352 } 2353 2354 /* this is a softnet/netisr handler */ 2355 void 2356 pfsyncintr(void) 2357 { 2358 pfsync_sendout(); 2359 } 2360 2361 int 2362 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2363 size_t newlen) 2364 { 2365 /* All sysctl names at this level are terminal. */ 2366 if (namelen != 1) 2367 return (ENOTDIR); 2368 2369 switch (name[0]) { 2370 case PFSYNCCTL_STATS: 2371 if (newp != NULL) 2372 return (EPERM); 2373 return (sysctl_struct(oldp, oldlenp, newp, newlen, 2374 &pfsyncstats, sizeof(pfsyncstats))); 2375 default: 2376 return (ENOPROTOOPT); 2377 } 2378 } 2379