1 /* $OpenBSD: if_pfsync.c,v 1.131 2009/11/12 06:53:24 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/systm.h> 48 #include <sys/time.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 #include <netinet/in.h> 63 #include <netinet/if_ether.h> 64 #include <netinet/tcp.h> 65 #include <netinet/tcp_seq.h> 66 67 #ifdef INET 68 #include <netinet/in_systm.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip.h> 71 #include <netinet/ip_var.h> 72 #endif 73 74 #ifdef INET6 75 #include <netinet6/nd6.h> 76 #endif /* INET6 */ 77 78 #include "carp.h" 79 #if NCARP > 0 80 #include <netinet/ip_carp.h> 81 #endif 82 83 #include <net/pfvar.h> 84 #include <net/if_pfsync.h> 85 86 #include "bpfilter.h" 87 #include "pfsync.h" 88 89 #define PFSYNC_MINPKT ( \ 90 sizeof(struct ip) + \ 91 sizeof(struct pfsync_header) + \ 92 sizeof(struct pfsync_subheader)) 93 94 struct pfsync_pkt { 95 struct ip *ip; 96 struct in_addr src; 97 u_int8_t flags; 98 }; 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); 104 int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); 105 int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); 106 int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); 107 int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); 108 int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); 109 int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); 110 int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); 111 int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); 112 int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); 113 int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); 114 115 int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); 116 117 int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { 118 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 119 pfsync_in_error, /* PFSYNC_ACT_OINS */ 120 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 121 pfsync_in_error, /* PFSYNC_ACT_OUPD */ 122 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 123 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 124 pfsync_in_del, /* PFSYNC_ACT_DEL */ 125 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 126 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 127 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 128 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 129 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 130 pfsync_in_eof, /* PFSYNC_ACT_EOF */ 131 pfsync_in_ins, /* PFSYNC_ACT_INS */ 132 pfsync_in_upd /* PFSYNC_ACT_UPD */ 133 }; 134 135 struct pfsync_q { 136 void (*write)(struct pf_state *, void *); 137 size_t len; 138 u_int8_t action; 139 }; 140 141 /* we have one of these for every PFSYNC_S_ */ 142 void pfsync_out_state(struct pf_state *, void *); 143 void pfsync_out_iack(struct pf_state *, void *); 144 void pfsync_out_upd_c(struct pf_state *, void *); 145 void pfsync_out_del(struct pf_state *, void *); 146 147 struct pfsync_q pfsync_qs[] = { 148 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 149 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 150 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 151 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 152 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 153 }; 154 155 void pfsync_q_ins(struct pf_state *, int); 156 void pfsync_q_del(struct pf_state *); 157 158 struct pfsync_upd_req_item { 159 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 160 struct pfsync_upd_req ur_msg; 161 }; 162 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 163 164 struct pfsync_deferral { 165 TAILQ_ENTRY(pfsync_deferral) pd_entry; 166 struct pf_state *pd_st; 167 struct mbuf *pd_m; 168 struct timeout pd_tmo; 169 }; 170 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 171 172 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 173 sizeof(struct pfsync_deferral)) 174 175 void pfsync_out_tdb(struct tdb *, void *); 176 177 struct pfsync_softc { 178 struct ifnet sc_if; 179 struct ifnet *sc_sync_if; 180 181 struct pool sc_pool; 182 183 struct ip_moptions sc_imo; 184 185 struct in_addr sc_sync_peer; 186 u_int8_t sc_maxupdates; 187 188 struct ip sc_template; 189 190 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 191 size_t sc_len; 192 193 struct pfsync_upd_reqs sc_upd_req_list; 194 195 int sc_defer; 196 struct pfsync_deferrals sc_deferrals; 197 u_int sc_deferred; 198 199 void *sc_plus; 200 size_t sc_pluslen; 201 202 u_int32_t sc_ureq_sent; 203 int sc_bulk_tries; 204 struct timeout sc_bulkfail_tmo; 205 206 u_int32_t sc_ureq_received; 207 struct pf_state *sc_bulk_next; 208 struct pf_state *sc_bulk_last; 209 struct timeout sc_bulk_tmo; 210 211 TAILQ_HEAD(, tdb) sc_tdb_q; 212 213 struct timeout sc_tmo; 214 }; 215 216 struct pfsync_softc *pfsyncif = NULL; 217 struct pfsyncstats pfsyncstats; 218 219 void pfsyncattach(int); 220 int pfsync_clone_create(struct if_clone *, int); 221 int pfsync_clone_destroy(struct ifnet *); 222 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 223 struct pf_state_peer *); 224 void pfsync_update_net_tdb(struct pfsync_tdb *); 225 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 226 struct rtentry *); 227 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 228 void pfsyncstart(struct ifnet *); 229 230 struct mbuf *pfsync_if_dequeue(struct ifnet *); 231 232 void pfsync_deferred(struct pf_state *, int); 233 void pfsync_undefer(struct pfsync_deferral *, int); 234 void pfsync_defer_tmo(void *); 235 236 void pfsync_request_update(u_int32_t, u_int64_t); 237 void pfsync_update_state_req(struct pf_state *); 238 239 void pfsync_drop(struct pfsync_softc *); 240 void pfsync_sendout(void); 241 void pfsync_send_plus(void *, size_t); 242 void pfsync_timeout(void *); 243 void pfsync_tdb_timeout(void *); 244 245 void pfsync_bulk_start(void); 246 void pfsync_bulk_status(u_int8_t); 247 void pfsync_bulk_update(void *); 248 void pfsync_bulk_fail(void *); 249 250 #define PFSYNC_MAX_BULKTRIES 12 251 int pfsync_sync_ok; 252 253 struct if_clone pfsync_cloner = 254 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 255 256 void 257 pfsyncattach(int npfsync) 258 { 259 if_clone_attach(&pfsync_cloner); 260 } 261 int 262 pfsync_clone_create(struct if_clone *ifc, int unit) 263 { 264 struct pfsync_softc *sc; 265 struct ifnet *ifp; 266 int q; 267 268 if (unit != 0) 269 return (EINVAL); 270 271 pfsync_sync_ok = 1; 272 273 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO); 274 if (sc == NULL) 275 return (ENOMEM); 276 277 for (q = 0; q < PFSYNC_S_COUNT; q++) 278 TAILQ_INIT(&sc->sc_qs[q]); 279 280 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL); 281 TAILQ_INIT(&sc->sc_upd_req_list); 282 TAILQ_INIT(&sc->sc_deferrals); 283 sc->sc_deferred = 0; 284 285 TAILQ_INIT(&sc->sc_tdb_q); 286 287 sc->sc_len = PFSYNC_MINPKT; 288 sc->sc_maxupdates = 128; 289 290 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 291 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 292 M_WAITOK | M_ZERO); 293 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 294 295 ifp = &sc->sc_if; 296 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 297 ifp->if_softc = sc; 298 ifp->if_ioctl = pfsyncioctl; 299 ifp->if_output = pfsyncoutput; 300 ifp->if_start = pfsyncstart; 301 ifp->if_type = IFT_PFSYNC; 302 ifp->if_snd.ifq_maxlen = ifqmaxlen; 303 ifp->if_hdrlen = sizeof(struct pfsync_header); 304 ifp->if_mtu = 1500; /* XXX */ 305 ifp->if_hardmtu = MCLBYTES; /* XXX */ 306 timeout_set(&sc->sc_tmo, pfsync_timeout, sc); 307 timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); 308 timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); 309 310 if_attach(ifp); 311 if_alloc_sadl(ifp); 312 313 #if NCARP > 0 314 if_addgroup(ifp, "carp"); 315 #endif 316 317 #if NBPFILTER > 0 318 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 319 #endif 320 321 pfsyncif = sc; 322 323 return (0); 324 } 325 326 int 327 pfsync_clone_destroy(struct ifnet *ifp) 328 { 329 struct pfsync_softc *sc = ifp->if_softc; 330 331 timeout_del(&sc->sc_bulk_tmo); 332 timeout_del(&sc->sc_tmo); 333 #if NCARP > 0 334 if (!pfsync_sync_ok) 335 carp_group_demote_adj(&sc->sc_if, -1); 336 #endif 337 #if NBPFILTER > 0 338 bpfdetach(ifp); 339 #endif 340 if_detach(ifp); 341 342 pfsync_drop(sc); 343 344 while (sc->sc_deferred > 0) 345 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 346 347 pool_destroy(&sc->sc_pool); 348 free(sc->sc_imo.imo_membership, M_IPMOPTS); 349 free(sc, M_DEVBUF); 350 351 pfsyncif = NULL; 352 353 return (0); 354 } 355 356 struct mbuf * 357 pfsync_if_dequeue(struct ifnet *ifp) 358 { 359 struct mbuf *m; 360 361 IF_DEQUEUE(&ifp->if_snd, m); 362 363 return (m); 364 } 365 366 /* 367 * Start output on the pfsync interface. 368 */ 369 void 370 pfsyncstart(struct ifnet *ifp) 371 { 372 struct mbuf *m; 373 int s; 374 375 s = splnet(); 376 while ((m = pfsync_if_dequeue(ifp)) != NULL) { 377 IF_DROP(&ifp->if_snd); 378 m_freem(m); 379 } 380 splx(s); 381 } 382 383 int 384 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 385 struct pf_state_peer *d) 386 { 387 if (s->scrub.scrub_flag && d->scrub == NULL) { 388 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 389 if (d->scrub == NULL) 390 return (ENOMEM); 391 } 392 393 return (0); 394 } 395 396 void 397 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 398 { 399 bzero(sp, sizeof(struct pfsync_state)); 400 401 /* copy from state key */ 402 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 403 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 404 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 405 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 406 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 407 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 408 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 409 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 410 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 411 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 412 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 413 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 414 sp->proto = st->key[PF_SK_WIRE]->proto; 415 sp->af = st->key[PF_SK_WIRE]->af; 416 417 /* copy from state */ 418 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 419 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 420 sp->creation = htonl(time_second - st->creation); 421 sp->expire = pf_state_expires(st); 422 if (sp->expire <= time_second) 423 sp->expire = htonl(0); 424 else 425 sp->expire = htonl(sp->expire - time_second); 426 427 sp->direction = st->direction; 428 sp->log = st->log; 429 sp->timeout = st->timeout; 430 sp->state_flags = st->state_flags; 431 if (st->src_node) 432 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 433 if (st->nat_src_node) 434 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 435 436 bcopy(&st->id, &sp->id, sizeof(sp->id)); 437 sp->creatorid = st->creatorid; 438 pf_state_peer_hton(&st->src, &sp->src); 439 pf_state_peer_hton(&st->dst, &sp->dst); 440 441 if (st->rule.ptr == NULL) 442 sp->rule = htonl(-1); 443 else 444 sp->rule = htonl(st->rule.ptr->nr); 445 if (st->anchor.ptr == NULL) 446 sp->anchor = htonl(-1); 447 else 448 sp->anchor = htonl(st->anchor.ptr->nr); 449 if (st->nat_rule.ptr == NULL) 450 sp->nat_rule = htonl(-1); 451 else 452 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 453 454 pf_state_counter_hton(st->packets[0], sp->packets[0]); 455 pf_state_counter_hton(st->packets[1], sp->packets[1]); 456 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 457 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 458 459 sp->max_mss = htons(st->max_mss); 460 sp->min_ttl = st->min_ttl; 461 sp->set_tos = st->set_tos; 462 } 463 464 int 465 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 466 { 467 struct pf_state *st = NULL; 468 struct pf_state_key *skw = NULL, *sks = NULL; 469 struct pf_rule *r = NULL; 470 struct pfi_kif *kif; 471 int pool_flags; 472 int error; 473 474 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 475 printf("pfsync_state_import: invalid creator id:" 476 " %08x\n", ntohl(sp->creatorid)); 477 return (EINVAL); 478 } 479 480 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 481 if (pf_status.debug >= PF_DEBUG_MISC) 482 printf("pfsync_state_import: " 483 "unknown interface: %s\n", sp->ifname); 484 if (flags & PFSYNC_SI_IOCTL) 485 return (EINVAL); 486 return (0); /* skip this state */ 487 } 488 489 /* 490 * If the ruleset checksums match or the state is coming from the ioctl, 491 * it's safe to associate the state with the rule of that number. 492 */ 493 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 494 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 495 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 496 r = pf_main_ruleset.rules[ 497 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 498 else 499 r = &pf_default_rule; 500 501 if ((r->max_states && r->states_cur >= r->max_states)) 502 goto cleanup; 503 504 if (flags & PFSYNC_SI_IOCTL) 505 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 506 else 507 pool_flags = PR_LIMITFAIL | PR_ZERO; 508 509 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 510 goto cleanup; 511 512 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 513 goto cleanup; 514 515 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 516 &sp->key[PF_SK_STACK].addr[0], sp->af) || 517 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 518 &sp->key[PF_SK_STACK].addr[1], sp->af) || 519 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 520 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 521 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 522 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 523 goto cleanup; 524 } else 525 sks = skw; 526 527 /* allocate memory for scrub info */ 528 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 529 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 530 goto cleanup; 531 532 /* copy to state key(s) */ 533 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 534 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 535 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 536 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 537 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 538 skw->proto = sp->proto; 539 skw->af = sp->af; 540 if (sks != skw) { 541 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 542 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 543 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 544 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 545 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 546 sks->proto = sp->proto; 547 sks->af = sp->af; 548 } 549 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 550 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 551 552 /* copy to state */ 553 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 554 st->creation = time_second - ntohl(sp->creation); 555 st->expire = time_second; 556 if (sp->expire) { 557 /* XXX No adaptive scaling. */ 558 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 559 } 560 561 st->expire = ntohl(sp->expire) + time_second; 562 st->direction = sp->direction; 563 st->log = sp->log; 564 st->timeout = sp->timeout; 565 st->state_flags = sp->state_flags; 566 st->max_mss = ntohs(sp->max_mss); 567 st->min_ttl = sp->min_ttl; 568 st->set_tos = sp->set_tos; 569 570 bcopy(sp->id, &st->id, sizeof(st->id)); 571 st->creatorid = sp->creatorid; 572 pf_state_peer_ntoh(&sp->src, &st->src); 573 pf_state_peer_ntoh(&sp->dst, &st->dst); 574 575 st->rule.ptr = r; 576 st->nat_rule.ptr = NULL; 577 st->anchor.ptr = NULL; 578 st->rt_kif = NULL; 579 580 st->pfsync_time = time_uptime; 581 st->sync_state = PFSYNC_S_NONE; 582 583 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 584 r->states_cur++; 585 r->states_tot++; 586 587 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 588 SET(st->state_flags, PFSTATE_NOSYNC); 589 590 if (pf_state_insert(kif, skw, sks, st) != 0) { 591 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 592 r->states_cur--; 593 error = EEXIST; 594 goto cleanup_state; 595 } 596 597 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 598 CLR(st->state_flags, PFSTATE_NOSYNC); 599 if (ISSET(st->state_flags, PFSTATE_ACK)) { 600 pfsync_q_ins(st, PFSYNC_S_IACK); 601 schednetisr(NETISR_PFSYNC); 602 } 603 } 604 CLR(st->state_flags, PFSTATE_ACK); 605 606 return (0); 607 608 cleanup: 609 error = ENOMEM; 610 if (skw == sks) 611 sks = NULL; 612 if (skw != NULL) 613 pool_put(&pf_state_key_pl, skw); 614 if (sks != NULL) 615 pool_put(&pf_state_key_pl, sks); 616 617 cleanup_state: /* pf_state_insert frees the state keys */ 618 if (st) { 619 if (st->dst.scrub) 620 pool_put(&pf_state_scrub_pl, st->dst.scrub); 621 if (st->src.scrub) 622 pool_put(&pf_state_scrub_pl, st->src.scrub); 623 pool_put(&pf_state_pl, st); 624 } 625 return (error); 626 } 627 628 void 629 pfsync_input(struct mbuf *m, ...) 630 { 631 struct pfsync_softc *sc = pfsyncif; 632 struct pfsync_pkt pkt; 633 struct ip *ip = mtod(m, struct ip *); 634 struct pfsync_header *ph; 635 struct pfsync_subheader subh; 636 637 int offset, len; 638 int rv; 639 640 pfsyncstats.pfsyncs_ipackets++; 641 642 /* verify that we have a sync interface configured */ 643 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 644 sc->sc_sync_if == NULL || !pf_status.running) 645 goto done; 646 647 /* verify that the packet came in on the right interface */ 648 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 649 pfsyncstats.pfsyncs_badif++; 650 goto done; 651 } 652 653 sc->sc_if.if_ipackets++; 654 sc->sc_if.if_ibytes += m->m_pkthdr.len; 655 656 /* verify that the IP TTL is 255. */ 657 if (ip->ip_ttl != PFSYNC_DFLTTL) { 658 pfsyncstats.pfsyncs_badttl++; 659 goto done; 660 } 661 662 offset = ip->ip_hl << 2; 663 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 664 pfsyncstats.pfsyncs_hdrops++; 665 goto done; 666 } 667 668 if (offset + sizeof(*ph) > m->m_len) { 669 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 670 pfsyncstats.pfsyncs_hdrops++; 671 return; 672 } 673 ip = mtod(m, struct ip *); 674 } 675 ph = (struct pfsync_header *)((char *)ip + offset); 676 677 /* verify the version */ 678 if (ph->version != PFSYNC_VERSION) { 679 pfsyncstats.pfsyncs_badver++; 680 goto done; 681 } 682 len = ntohs(ph->len) + offset; 683 if (m->m_pkthdr.len < len) { 684 pfsyncstats.pfsyncs_badlen++; 685 goto done; 686 } 687 688 /* Cheaper to grab this now than having to mess with mbufs later */ 689 pkt.ip = ip; 690 pkt.src = ip->ip_src; 691 pkt.flags = 0; 692 693 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 694 pkt.flags |= PFSYNC_SI_CKSUM; 695 696 offset += sizeof(*ph); 697 while (offset <= len - sizeof(subh)) { 698 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 699 offset += sizeof(subh); 700 701 if (subh.action >= PFSYNC_ACT_MAX || 702 subh.action >= nitems(pfsync_acts)) { 703 pfsyncstats.pfsyncs_badact++; 704 goto done; 705 } 706 707 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, 708 ntohs(subh.count)); 709 if (rv == -1) 710 return; 711 712 offset += rv; 713 } 714 715 done: 716 m_freem(m); 717 } 718 719 int 720 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 721 { 722 struct pfsync_clr *clr; 723 struct mbuf *mp; 724 int len = sizeof(*clr) * count; 725 int i, offp; 726 727 struct pf_state *st, *nexts; 728 struct pf_state_key *sk, *nextsk; 729 struct pf_state_item *si; 730 u_int32_t creatorid; 731 int s; 732 733 mp = m_pulldown(m, offset, len, &offp); 734 if (mp == NULL) { 735 pfsyncstats.pfsyncs_badlen++; 736 return (-1); 737 } 738 clr = (struct pfsync_clr *)(mp->m_data + offp); 739 740 s = splsoftnet(); 741 for (i = 0; i < count; i++) { 742 creatorid = clr[i].creatorid; 743 744 if (clr[i].ifname[0] == '\0') { 745 for (st = RB_MIN(pf_state_tree_id, &tree_id); 746 st; st = nexts) { 747 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 748 if (st->creatorid == creatorid) { 749 SET(st->state_flags, PFSTATE_NOSYNC); 750 pf_unlink_state(st); 751 } 752 } 753 } else { 754 if (pfi_kif_get(clr[i].ifname) == NULL) 755 continue; 756 757 /* XXX correct? */ 758 for (sk = RB_MIN(pf_state_tree, &pf_statetbl); 759 sk; sk = nextsk) { 760 nextsk = RB_NEXT(pf_state_tree, 761 &pf_statetbl, sk); 762 TAILQ_FOREACH(si, &sk->states, entry) { 763 if (si->s->creatorid == creatorid) { 764 SET(si->s->state_flags, 765 PFSTATE_NOSYNC); 766 pf_unlink_state(si->s); 767 } 768 } 769 } 770 } 771 } 772 splx(s); 773 774 return (len); 775 } 776 777 int 778 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 779 { 780 struct mbuf *mp; 781 struct pfsync_state *sa, *sp; 782 int len = sizeof(*sp) * count; 783 int i, offp; 784 785 int s; 786 787 mp = m_pulldown(m, offset, len, &offp); 788 if (mp == NULL) { 789 pfsyncstats.pfsyncs_badlen++; 790 return (-1); 791 } 792 sa = (struct pfsync_state *)(mp->m_data + offp); 793 794 s = splsoftnet(); 795 for (i = 0; i < count; i++) { 796 sp = &sa[i]; 797 798 /* check for invalid values */ 799 if (sp->timeout >= PFTM_MAX || 800 sp->src.state > PF_TCPS_PROXY_DST || 801 sp->dst.state > PF_TCPS_PROXY_DST || 802 sp->direction > PF_OUT || 803 (sp->af != AF_INET && sp->af != AF_INET6)) { 804 if (pf_status.debug >= PF_DEBUG_MISC) { 805 printf("pfsync_input: PFSYNC5_ACT_INS: " 806 "invalid value\n"); 807 } 808 pfsyncstats.pfsyncs_badval++; 809 continue; 810 } 811 812 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) { 813 /* drop out, but process the rest of the actions */ 814 break; 815 } 816 } 817 splx(s); 818 819 return (len); 820 } 821 822 int 823 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 824 { 825 struct pfsync_ins_ack *ia, *iaa; 826 struct pf_state_cmp id_key; 827 struct pf_state *st; 828 829 struct mbuf *mp; 830 int len = count * sizeof(*ia); 831 int offp, i; 832 int s; 833 834 mp = m_pulldown(m, offset, len, &offp); 835 if (mp == NULL) { 836 pfsyncstats.pfsyncs_badlen++; 837 return (-1); 838 } 839 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 840 841 s = splsoftnet(); 842 for (i = 0; i < count; i++) { 843 ia = &iaa[i]; 844 845 bcopy(&ia->id, &id_key.id, sizeof(id_key.id)); 846 id_key.creatorid = ia->creatorid; 847 848 st = pf_find_state_byid(&id_key); 849 if (st == NULL) 850 continue; 851 852 if (ISSET(st->state_flags, PFSTATE_ACK)) 853 pfsync_deferred(st, 0); 854 } 855 splx(s); 856 857 return (len); 858 } 859 860 int 861 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 862 struct pfsync_state_peer *dst) 863 { 864 int sync = 0; 865 866 /* 867 * The state should never go backwards except 868 * for syn-proxy states. Neither should the 869 * sequence window slide backwards. 870 */ 871 if ((st->src.state > src->state && 872 (st->src.state < PF_TCPS_PROXY_SRC || 873 src->state >= PF_TCPS_PROXY_SRC)) || 874 875 (st->src.state == src->state && 876 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 877 sync++; 878 else 879 pf_state_peer_ntoh(src, &st->src); 880 881 if ((st->dst.state > dst->state) || 882 883 (st->dst.state >= TCPS_SYN_SENT && 884 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 885 sync++; 886 else 887 pf_state_peer_ntoh(dst, &st->dst); 888 889 return (sync); 890 } 891 892 int 893 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 894 { 895 struct pfsync_state *sa, *sp; 896 struct pf_state_cmp id_key; 897 struct pf_state *st; 898 int sync; 899 900 struct mbuf *mp; 901 int len = count * sizeof(*sp); 902 int offp, i; 903 int s; 904 905 mp = m_pulldown(m, offset, len, &offp); 906 if (mp == NULL) { 907 pfsyncstats.pfsyncs_badlen++; 908 return (-1); 909 } 910 sa = (struct pfsync_state *)(mp->m_data + offp); 911 912 s = splsoftnet(); 913 for (i = 0; i < count; i++) { 914 sp = &sa[i]; 915 916 /* check for invalid values */ 917 if (sp->timeout >= PFTM_MAX || 918 sp->src.state > PF_TCPS_PROXY_DST || 919 sp->dst.state > PF_TCPS_PROXY_DST) { 920 if (pf_status.debug >= PF_DEBUG_MISC) { 921 printf("pfsync_input: PFSYNC_ACT_UPD: " 922 "invalid value\n"); 923 } 924 pfsyncstats.pfsyncs_badval++; 925 continue; 926 } 927 928 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 929 id_key.creatorid = sp->creatorid; 930 931 st = pf_find_state_byid(&id_key); 932 if (st == NULL) { 933 /* insert the update */ 934 if (pfsync_state_import(sp, 0)) 935 pfsyncstats.pfsyncs_badstate++; 936 continue; 937 } 938 939 if (ISSET(st->state_flags, PFSTATE_ACK)) 940 pfsync_deferred(st, 1); 941 942 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 943 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 944 else { 945 sync = 0; 946 947 /* 948 * Non-TCP protocol state machine always go 949 * forwards 950 */ 951 if (st->src.state > sp->src.state) 952 sync++; 953 else 954 pf_state_peer_ntoh(&sp->src, &st->src); 955 956 if (st->dst.state > sp->dst.state) 957 sync++; 958 else 959 pf_state_peer_ntoh(&sp->dst, &st->dst); 960 } 961 962 if (sync < 2) { 963 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 964 pf_state_peer_ntoh(&sp->dst, &st->dst); 965 st->expire = ntohl(sp->expire) + time_second; 966 st->timeout = sp->timeout; 967 } 968 st->pfsync_time = time_uptime; 969 970 if (sync) { 971 pfsyncstats.pfsyncs_stale++; 972 973 pfsync_update_state(st); 974 schednetisr(NETISR_PFSYNC); 975 } 976 } 977 splx(s); 978 979 return (len); 980 } 981 982 int 983 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 984 { 985 struct pfsync_upd_c *ua, *up; 986 struct pf_state_cmp id_key; 987 struct pf_state *st; 988 989 int len = count * sizeof(*up); 990 int sync; 991 992 struct mbuf *mp; 993 int offp, i; 994 int s; 995 996 mp = m_pulldown(m, offset, len, &offp); 997 if (mp == NULL) { 998 pfsyncstats.pfsyncs_badlen++; 999 return (-1); 1000 } 1001 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 1002 1003 s = splsoftnet(); 1004 for (i = 0; i < count; i++) { 1005 up = &ua[i]; 1006 1007 /* check for invalid values */ 1008 if (up->timeout >= PFTM_MAX || 1009 up->src.state > PF_TCPS_PROXY_DST || 1010 up->dst.state > PF_TCPS_PROXY_DST) { 1011 if (pf_status.debug >= PF_DEBUG_MISC) { 1012 printf("pfsync_input: " 1013 "PFSYNC_ACT_UPD_C: " 1014 "invalid value\n"); 1015 } 1016 pfsyncstats.pfsyncs_badval++; 1017 continue; 1018 } 1019 1020 bcopy(&up->id, &id_key.id, sizeof(id_key.id)); 1021 id_key.creatorid = up->creatorid; 1022 1023 st = pf_find_state_byid(&id_key); 1024 if (st == NULL) { 1025 /* We don't have this state. Ask for it. */ 1026 pfsync_request_update(id_key.creatorid, id_key.id); 1027 continue; 1028 } 1029 1030 if (ISSET(st->state_flags, PFSTATE_ACK)) 1031 pfsync_deferred(st, 1); 1032 1033 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1034 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1035 else { 1036 sync = 0; 1037 /* 1038 * Non-TCP protocol state machine always go 1039 * forwards 1040 */ 1041 if (st->src.state > up->src.state) 1042 sync++; 1043 else 1044 pf_state_peer_ntoh(&up->src, &st->src); 1045 1046 if (st->dst.state > up->dst.state) 1047 sync++; 1048 else 1049 pf_state_peer_ntoh(&up->dst, &st->dst); 1050 } 1051 if (sync < 2) { 1052 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1053 pf_state_peer_ntoh(&up->dst, &st->dst); 1054 st->expire = ntohl(up->expire) + time_second; 1055 st->timeout = up->timeout; 1056 } 1057 st->pfsync_time = time_uptime; 1058 1059 if (sync) { 1060 pfsyncstats.pfsyncs_stale++; 1061 1062 pfsync_update_state(st); 1063 schednetisr(NETISR_PFSYNC); 1064 } 1065 } 1066 splx(s); 1067 1068 return (len); 1069 } 1070 1071 int 1072 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1073 { 1074 struct pfsync_upd_req *ur, *ura; 1075 struct mbuf *mp; 1076 int len = count * sizeof(*ur); 1077 int i, offp; 1078 1079 struct pf_state_cmp id_key; 1080 struct pf_state *st; 1081 1082 mp = m_pulldown(m, offset, len, &offp); 1083 if (mp == NULL) { 1084 pfsyncstats.pfsyncs_badlen++; 1085 return (-1); 1086 } 1087 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1088 1089 for (i = 0; i < count; i++) { 1090 ur = &ura[i]; 1091 1092 bcopy(&ur->id, &id_key.id, sizeof(id_key.id)); 1093 id_key.creatorid = ur->creatorid; 1094 1095 if (id_key.id == 0 && id_key.creatorid == 0) 1096 pfsync_bulk_start(); 1097 else { 1098 st = pf_find_state_byid(&id_key); 1099 if (st == NULL) { 1100 pfsyncstats.pfsyncs_badstate++; 1101 continue; 1102 } 1103 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1104 continue; 1105 1106 pfsync_update_state_req(st); 1107 } 1108 } 1109 1110 return (len); 1111 } 1112 1113 int 1114 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1115 { 1116 struct mbuf *mp; 1117 struct pfsync_state *sa, *sp; 1118 struct pf_state_cmp id_key; 1119 struct pf_state *st; 1120 int len = count * sizeof(*sp); 1121 int offp, i; 1122 int s; 1123 1124 mp = m_pulldown(m, offset, len, &offp); 1125 if (mp == NULL) { 1126 pfsyncstats.pfsyncs_badlen++; 1127 return (-1); 1128 } 1129 sa = (struct pfsync_state *)(mp->m_data + offp); 1130 1131 s = splsoftnet(); 1132 for (i = 0; i < count; i++) { 1133 sp = &sa[i]; 1134 1135 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 1136 id_key.creatorid = sp->creatorid; 1137 1138 st = pf_find_state_byid(&id_key); 1139 if (st == NULL) { 1140 pfsyncstats.pfsyncs_badstate++; 1141 continue; 1142 } 1143 SET(st->state_flags, PFSTATE_NOSYNC); 1144 pf_unlink_state(st); 1145 } 1146 splx(s); 1147 1148 return (len); 1149 } 1150 1151 int 1152 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1153 { 1154 struct mbuf *mp; 1155 struct pfsync_del_c *sa, *sp; 1156 struct pf_state_cmp id_key; 1157 struct pf_state *st; 1158 int len = count * sizeof(*sp); 1159 int offp, i; 1160 int s; 1161 1162 mp = m_pulldown(m, offset, len, &offp); 1163 if (mp == NULL) { 1164 pfsyncstats.pfsyncs_badlen++; 1165 return (-1); 1166 } 1167 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1168 1169 s = splsoftnet(); 1170 for (i = 0; i < count; i++) { 1171 sp = &sa[i]; 1172 1173 bcopy(&sp->id, &id_key.id, sizeof(id_key.id)); 1174 id_key.creatorid = sp->creatorid; 1175 1176 st = pf_find_state_byid(&id_key); 1177 if (st == NULL) { 1178 pfsyncstats.pfsyncs_badstate++; 1179 continue; 1180 } 1181 1182 SET(st->state_flags, PFSTATE_NOSYNC); 1183 pf_unlink_state(st); 1184 } 1185 splx(s); 1186 1187 return (len); 1188 } 1189 1190 int 1191 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1192 { 1193 struct pfsync_softc *sc = pfsyncif; 1194 struct pfsync_bus *bus; 1195 struct mbuf *mp; 1196 int len = count * sizeof(*bus); 1197 int offp; 1198 1199 /* If we're not waiting for a bulk update, who cares. */ 1200 if (sc->sc_ureq_sent == 0) 1201 return (len); 1202 1203 mp = m_pulldown(m, offset, len, &offp); 1204 if (mp == NULL) { 1205 pfsyncstats.pfsyncs_badlen++; 1206 return (-1); 1207 } 1208 bus = (struct pfsync_bus *)(mp->m_data + offp); 1209 1210 switch (bus->status) { 1211 case PFSYNC_BUS_START: 1212 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1213 pf_pool_limits[PF_LIMIT_STATES].limit / 1214 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1215 sizeof(struct pfsync_state))); 1216 if (pf_status.debug >= PF_DEBUG_MISC) 1217 printf("pfsync: received bulk update start\n"); 1218 break; 1219 1220 case PFSYNC_BUS_END: 1221 if (time_uptime - ntohl(bus->endtime) >= 1222 sc->sc_ureq_sent) { 1223 /* that's it, we're happy */ 1224 sc->sc_ureq_sent = 0; 1225 sc->sc_bulk_tries = 0; 1226 timeout_del(&sc->sc_bulkfail_tmo); 1227 #if NCARP > 0 1228 if (!pfsync_sync_ok) 1229 carp_group_demote_adj(&sc->sc_if, -1); 1230 #endif 1231 pfsync_sync_ok = 1; 1232 if (pf_status.debug >= PF_DEBUG_MISC) 1233 printf("pfsync: received valid " 1234 "bulk update end\n"); 1235 } else { 1236 if (pf_status.debug >= PF_DEBUG_MISC) 1237 printf("pfsync: received invalid " 1238 "bulk update end: bad timestamp\n"); 1239 } 1240 break; 1241 } 1242 1243 return (len); 1244 } 1245 1246 int 1247 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1248 { 1249 int len = count * sizeof(struct pfsync_tdb); 1250 1251 #if defined(IPSEC) 1252 struct pfsync_tdb *tp; 1253 struct mbuf *mp; 1254 int offp; 1255 int i; 1256 int s; 1257 1258 mp = m_pulldown(m, offset, len, &offp); 1259 if (mp == NULL) { 1260 pfsyncstats.pfsyncs_badlen++; 1261 return (-1); 1262 } 1263 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1264 1265 s = splsoftnet(); 1266 for (i = 0; i < count; i++) 1267 pfsync_update_net_tdb(&tp[i]); 1268 splx(s); 1269 #endif 1270 1271 return (len); 1272 } 1273 1274 #if defined(IPSEC) 1275 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1276 void 1277 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1278 { 1279 struct tdb *tdb; 1280 int s; 1281 1282 /* check for invalid values */ 1283 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1284 (pt->dst.sa.sa_family != AF_INET && 1285 pt->dst.sa.sa_family != AF_INET6)) 1286 goto bad; 1287 1288 s = spltdb(); 1289 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1290 if (tdb) { 1291 pt->rpl = ntohl(pt->rpl); 1292 pt->cur_bytes = betoh64(pt->cur_bytes); 1293 1294 /* Neither replay nor byte counter should ever decrease. */ 1295 if (pt->rpl < tdb->tdb_rpl || 1296 pt->cur_bytes < tdb->tdb_cur_bytes) { 1297 splx(s); 1298 goto bad; 1299 } 1300 1301 tdb->tdb_rpl = pt->rpl; 1302 tdb->tdb_cur_bytes = pt->cur_bytes; 1303 } 1304 splx(s); 1305 return; 1306 1307 bad: 1308 if (pf_status.debug >= PF_DEBUG_MISC) 1309 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1310 "invalid value\n"); 1311 pfsyncstats.pfsyncs_badstate++; 1312 return; 1313 } 1314 #endif 1315 1316 1317 int 1318 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1319 { 1320 /* check if we are at the right place in the packet */ 1321 if (offset != m->m_pkthdr.len) 1322 pfsyncstats.pfsyncs_badlen++; 1323 1324 /* we're done. free and let the caller return */ 1325 m_freem(m); 1326 return (-1); 1327 } 1328 1329 int 1330 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1331 { 1332 pfsyncstats.pfsyncs_badact++; 1333 1334 m_freem(m); 1335 return (-1); 1336 } 1337 1338 int 1339 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1340 struct rtentry *rt) 1341 { 1342 m_freem(m); 1343 return (0); 1344 } 1345 1346 /* ARGSUSED */ 1347 int 1348 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1349 { 1350 struct proc *p = curproc; 1351 struct pfsync_softc *sc = ifp->if_softc; 1352 struct ifreq *ifr = (struct ifreq *)data; 1353 struct ip_moptions *imo = &sc->sc_imo; 1354 struct pfsyncreq pfsyncr; 1355 struct ifnet *sifp; 1356 struct ip *ip; 1357 int s, error; 1358 1359 switch (cmd) { 1360 #if 0 1361 case SIOCSIFADDR: 1362 case SIOCAIFADDR: 1363 case SIOCSIFDSTADDR: 1364 #endif 1365 case SIOCSIFFLAGS: 1366 s = splnet(); 1367 if (ifp->if_flags & IFF_UP) 1368 ifp->if_flags |= IFF_RUNNING; 1369 else { 1370 ifp->if_flags &= ~IFF_RUNNING; 1371 1372 /* drop everything */ 1373 timeout_del(&sc->sc_tmo); 1374 pfsync_drop(sc); 1375 1376 /* cancel bulk update */ 1377 timeout_del(&sc->sc_bulk_tmo); 1378 sc->sc_bulk_next = NULL; 1379 sc->sc_bulk_last = NULL; 1380 } 1381 splx(s); 1382 break; 1383 case SIOCSIFMTU: 1384 s = splnet(); 1385 if (ifr->ifr_mtu <= PFSYNC_MINPKT) 1386 return (EINVAL); 1387 if (ifr->ifr_mtu > MCLBYTES) /* XXX could be bigger */ 1388 ifr->ifr_mtu = MCLBYTES; 1389 if (ifr->ifr_mtu < ifp->if_mtu) 1390 pfsync_sendout(); 1391 ifp->if_mtu = ifr->ifr_mtu; 1392 splx(s); 1393 break; 1394 case SIOCGETPFSYNC: 1395 bzero(&pfsyncr, sizeof(pfsyncr)); 1396 if (sc->sc_sync_if) { 1397 strlcpy(pfsyncr.pfsyncr_syncdev, 1398 sc->sc_sync_if->if_xname, IFNAMSIZ); 1399 } 1400 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1401 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1402 pfsyncr.pfsyncr_defer = sc->sc_defer; 1403 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1404 1405 case SIOCSETPFSYNC: 1406 if ((error = suser(p, p->p_acflag)) != 0) 1407 return (error); 1408 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1409 return (error); 1410 1411 s = splnet(); 1412 1413 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1414 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1415 else 1416 sc->sc_sync_peer.s_addr = 1417 pfsyncr.pfsyncr_syncpeer.s_addr; 1418 1419 if (pfsyncr.pfsyncr_maxupdates > 255) { 1420 splx(s); 1421 return (EINVAL); 1422 } 1423 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1424 1425 sc->sc_defer = pfsyncr.pfsyncr_defer; 1426 1427 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1428 sc->sc_sync_if = NULL; 1429 if (imo->imo_num_memberships > 0) { 1430 in_delmulti(imo->imo_membership[ 1431 --imo->imo_num_memberships]); 1432 imo->imo_multicast_ifp = NULL; 1433 } 1434 splx(s); 1435 break; 1436 } 1437 1438 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1439 splx(s); 1440 return (EINVAL); 1441 } 1442 1443 if (sifp->if_mtu < sc->sc_if.if_mtu || 1444 (sc->sc_sync_if != NULL && 1445 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1446 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1447 pfsync_sendout(); 1448 sc->sc_sync_if = sifp; 1449 1450 if (imo->imo_num_memberships > 0) { 1451 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1452 imo->imo_multicast_ifp = NULL; 1453 } 1454 1455 if (sc->sc_sync_if && 1456 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1457 struct in_addr addr; 1458 1459 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1460 sc->sc_sync_if = NULL; 1461 splx(s); 1462 return (EADDRNOTAVAIL); 1463 } 1464 1465 addr.s_addr = INADDR_PFSYNC_GROUP; 1466 1467 if ((imo->imo_membership[0] = 1468 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1469 sc->sc_sync_if = NULL; 1470 splx(s); 1471 return (ENOBUFS); 1472 } 1473 imo->imo_num_memberships++; 1474 imo->imo_multicast_ifp = sc->sc_sync_if; 1475 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1476 imo->imo_multicast_loop = 0; 1477 } 1478 1479 ip = &sc->sc_template; 1480 bzero(ip, sizeof(*ip)); 1481 ip->ip_v = IPVERSION; 1482 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1483 ip->ip_tos = IPTOS_LOWDELAY; 1484 /* len and id are set later */ 1485 ip->ip_off = htons(IP_DF); 1486 ip->ip_ttl = PFSYNC_DFLTTL; 1487 ip->ip_p = IPPROTO_PFSYNC; 1488 ip->ip_src.s_addr = INADDR_ANY; 1489 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1490 1491 if (sc->sc_sync_if) { 1492 /* Request a full state table update. */ 1493 sc->sc_ureq_sent = time_uptime; 1494 #if NCARP > 0 1495 if (pfsync_sync_ok) 1496 carp_group_demote_adj(&sc->sc_if, 1); 1497 #endif 1498 pfsync_sync_ok = 0; 1499 if (pf_status.debug >= PF_DEBUG_MISC) 1500 printf("pfsync: requesting bulk update\n"); 1501 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1502 pf_pool_limits[PF_LIMIT_STATES].limit / 1503 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1504 sizeof(struct pfsync_state))); 1505 pfsync_request_update(0, 0); 1506 } 1507 splx(s); 1508 1509 break; 1510 1511 default: 1512 return (ENOTTY); 1513 } 1514 1515 return (0); 1516 } 1517 1518 void 1519 pfsync_out_state(struct pf_state *st, void *buf) 1520 { 1521 struct pfsync_state *sp = buf; 1522 1523 pfsync_state_export(sp, st); 1524 } 1525 1526 void 1527 pfsync_out_iack(struct pf_state *st, void *buf) 1528 { 1529 struct pfsync_ins_ack *iack = buf; 1530 1531 iack->id = st->id; 1532 iack->creatorid = st->creatorid; 1533 } 1534 1535 void 1536 pfsync_out_upd_c(struct pf_state *st, void *buf) 1537 { 1538 struct pfsync_upd_c *up = buf; 1539 1540 up->id = st->id; 1541 pf_state_peer_hton(&st->src, &up->src); 1542 pf_state_peer_hton(&st->dst, &up->dst); 1543 up->creatorid = st->creatorid; 1544 1545 up->expire = pf_state_expires(st); 1546 if (up->expire <= time_second) 1547 up->expire = htonl(0); 1548 else 1549 up->expire = htonl(up->expire - time_second); 1550 up->timeout = st->timeout; 1551 1552 bzero(up->_pad, sizeof(up->_pad)); /* XXX */ 1553 } 1554 1555 void 1556 pfsync_out_del(struct pf_state *st, void *buf) 1557 { 1558 struct pfsync_del_c *dp = buf; 1559 1560 dp->id = st->id; 1561 dp->creatorid = st->creatorid; 1562 1563 SET(st->state_flags, PFSTATE_NOSYNC); 1564 } 1565 1566 void 1567 pfsync_drop(struct pfsync_softc *sc) 1568 { 1569 struct pf_state *st; 1570 struct pfsync_upd_req_item *ur; 1571 struct tdb *t; 1572 int q; 1573 1574 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1575 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1576 continue; 1577 1578 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1579 #ifdef PFSYNC_DEBUG 1580 KASSERT(st->sync_state == q); 1581 #endif 1582 st->sync_state = PFSYNC_S_NONE; 1583 } 1584 TAILQ_INIT(&sc->sc_qs[q]); 1585 } 1586 1587 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1588 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1589 pool_put(&sc->sc_pool, ur); 1590 } 1591 1592 sc->sc_plus = NULL; 1593 1594 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1595 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) 1596 CLR(t->tdb_flags, TDBF_PFSYNC); 1597 1598 TAILQ_INIT(&sc->sc_tdb_q); 1599 } 1600 1601 sc->sc_len = PFSYNC_MINPKT; 1602 } 1603 1604 void 1605 pfsync_sendout(void) 1606 { 1607 struct pfsync_softc *sc = pfsyncif; 1608 #if NBPFILTER > 0 1609 struct ifnet *ifp = &sc->sc_if; 1610 #endif 1611 struct mbuf *m; 1612 struct ip *ip; 1613 struct pfsync_header *ph; 1614 struct pfsync_subheader *subh; 1615 struct pf_state *st; 1616 struct pfsync_upd_req_item *ur; 1617 struct tdb *t; 1618 1619 int offset; 1620 int q, count = 0; 1621 1622 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1623 return; 1624 1625 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1626 #if NBPFILTER > 0 1627 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1628 #else 1629 sc->sc_sync_if == NULL) { 1630 #endif 1631 pfsync_drop(sc); 1632 return; 1633 } 1634 1635 MGETHDR(m, M_DONTWAIT, MT_DATA); 1636 if (m == NULL) { 1637 sc->sc_if.if_oerrors++; 1638 pfsyncstats.pfsyncs_onomem++; 1639 pfsync_drop(sc); 1640 return; 1641 } 1642 1643 if (max_linkhdr + sc->sc_len > MHLEN) { 1644 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1645 if (!ISSET(m->m_flags, M_EXT)) { 1646 m_free(m); 1647 sc->sc_if.if_oerrors++; 1648 pfsyncstats.pfsyncs_onomem++; 1649 pfsync_drop(sc); 1650 return; 1651 } 1652 } 1653 m->m_data += max_linkhdr; 1654 m->m_len = m->m_pkthdr.len = sc->sc_len; 1655 1656 /* build the ip header */ 1657 ip = (struct ip *)m->m_data; 1658 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1659 offset = sizeof(*ip); 1660 1661 ip->ip_len = htons(m->m_pkthdr.len); 1662 ip->ip_id = htons(ip_randomid()); 1663 1664 /* build the pfsync header */ 1665 ph = (struct pfsync_header *)(m->m_data + offset); 1666 bzero(ph, sizeof(*ph)); 1667 offset += sizeof(*ph); 1668 1669 ph->version = PFSYNC_VERSION; 1670 ph->len = htons(sc->sc_len - sizeof(*ip)); 1671 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1672 1673 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1674 subh = (struct pfsync_subheader *)(m->m_data + offset); 1675 offset += sizeof(*subh); 1676 1677 count = 0; 1678 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1679 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1680 1681 bcopy(&ur->ur_msg, m->m_data + offset, 1682 sizeof(ur->ur_msg)); 1683 offset += sizeof(ur->ur_msg); 1684 1685 pool_put(&sc->sc_pool, ur); 1686 1687 count++; 1688 } 1689 1690 bzero(subh, sizeof(*subh)); 1691 subh->action = PFSYNC_ACT_UPD_REQ; 1692 subh->count = htons(count); 1693 } 1694 1695 /* has someone built a custom region for us to add? */ 1696 if (sc->sc_plus != NULL) { 1697 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1698 offset += sc->sc_pluslen; 1699 1700 sc->sc_plus = NULL; 1701 } 1702 1703 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1704 subh = (struct pfsync_subheader *)(m->m_data + offset); 1705 offset += sizeof(*subh); 1706 1707 count = 0; 1708 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { 1709 pfsync_out_tdb(t, m->m_data + offset); 1710 offset += sizeof(struct pfsync_tdb); 1711 CLR(t->tdb_flags, TDBF_PFSYNC); 1712 1713 count++; 1714 } 1715 TAILQ_INIT(&sc->sc_tdb_q); 1716 1717 bzero(subh, sizeof(*subh)); 1718 subh->action = PFSYNC_ACT_TDB; 1719 subh->count = htons(count); 1720 } 1721 1722 /* walk the queues */ 1723 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1724 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1725 continue; 1726 1727 subh = (struct pfsync_subheader *)(m->m_data + offset); 1728 offset += sizeof(*subh); 1729 1730 count = 0; 1731 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1732 #ifdef PFSYNC_DEBUG 1733 KASSERT(st->sync_state == q); 1734 #endif 1735 pfsync_qs[q].write(st, m->m_data + offset); 1736 offset += pfsync_qs[q].len; 1737 1738 st->sync_state = PFSYNC_S_NONE; 1739 count++; 1740 } 1741 TAILQ_INIT(&sc->sc_qs[q]); 1742 1743 bzero(subh, sizeof(*subh)); 1744 subh->action = pfsync_qs[q].action; 1745 subh->count = htons(count); 1746 } 1747 1748 subh = (struct pfsync_subheader *)(m->m_data + offset); 1749 offset += sizeof(*subh); 1750 1751 bzero(subh, sizeof(*subh)); 1752 subh->action = PFSYNC_ACT_EOF; 1753 subh->count = htons(1); 1754 1755 /* we're done, let's put it on the wire */ 1756 #if NBPFILTER > 0 1757 if (ifp->if_bpf) { 1758 m->m_data += sizeof(*ip); 1759 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1760 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1761 m->m_data -= sizeof(*ip); 1762 m->m_len = m->m_pkthdr.len = sc->sc_len; 1763 } 1764 1765 if (sc->sc_sync_if == NULL) { 1766 sc->sc_len = PFSYNC_MINPKT; 1767 m_freem(m); 1768 return; 1769 } 1770 #endif 1771 1772 sc->sc_if.if_opackets++; 1773 sc->sc_if.if_obytes += m->m_pkthdr.len; 1774 1775 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) 1776 pfsyncstats.pfsyncs_opackets++; 1777 else 1778 pfsyncstats.pfsyncs_oerrors++; 1779 1780 /* start again */ 1781 sc->sc_len = PFSYNC_MINPKT; 1782 } 1783 1784 void 1785 pfsync_insert_state(struct pf_state *st) 1786 { 1787 struct pfsync_softc *sc = pfsyncif; 1788 1789 splsoftassert(IPL_SOFTNET); 1790 1791 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1792 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1793 SET(st->state_flags, PFSTATE_NOSYNC); 1794 return; 1795 } 1796 1797 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1798 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1799 return; 1800 1801 #ifdef PFSYNC_DEBUG 1802 KASSERT(st->sync_state == PFSYNC_S_NONE); 1803 #endif 1804 1805 if (sc->sc_len == PFSYNC_MINPKT) 1806 timeout_add_sec(&sc->sc_tmo, 1); 1807 1808 pfsync_q_ins(st, PFSYNC_S_INS); 1809 1810 st->sync_updates = 0; 1811 } 1812 1813 int defer = 10; 1814 1815 int 1816 pfsync_defer(struct pf_state *st, struct mbuf *m) 1817 { 1818 struct pfsync_softc *sc = pfsyncif; 1819 struct pfsync_deferral *pd; 1820 1821 splsoftassert(IPL_SOFTNET); 1822 1823 if (!sc->sc_defer) 1824 return (0); 1825 1826 if (sc->sc_deferred >= 128) 1827 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1828 1829 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1830 if (pd == NULL) 1831 return (0); 1832 sc->sc_deferred++; 1833 1834 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1835 SET(st->state_flags, PFSTATE_ACK); 1836 1837 pd->pd_st = st; 1838 pd->pd_m = m; 1839 1840 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1841 timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); 1842 timeout_add(&pd->pd_tmo, defer); 1843 1844 schednetisr(NETISR_PFSYNC); 1845 1846 return (1); 1847 } 1848 1849 void 1850 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1851 { 1852 struct pfsync_softc *sc = pfsyncif; 1853 1854 splsoftassert(IPL_SOFTNET); 1855 1856 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1857 sc->sc_deferred--; 1858 1859 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1860 timeout_del(&pd->pd_tmo); /* bah */ 1861 if (drop) 1862 m_freem(pd->pd_m); 1863 else { 1864 ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0, 1865 (void *)NULL, (void *)NULL); 1866 } 1867 1868 pool_put(&sc->sc_pool, pd); 1869 } 1870 1871 void 1872 pfsync_defer_tmo(void *arg) 1873 { 1874 int s; 1875 1876 s = splsoftnet(); 1877 pfsync_undefer(arg, 0); 1878 splx(s); 1879 } 1880 1881 void 1882 pfsync_deferred(struct pf_state *st, int drop) 1883 { 1884 struct pfsync_softc *sc = pfsyncif; 1885 struct pfsync_deferral *pd; 1886 1887 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1888 if (pd->pd_st == st) { 1889 pfsync_undefer(pd, drop); 1890 return; 1891 } 1892 } 1893 1894 panic("pfsync_send_deferred: unable to find deferred state"); 1895 } 1896 1897 u_int pfsync_upds = 0; 1898 1899 void 1900 pfsync_update_state(struct pf_state *st) 1901 { 1902 struct pfsync_softc *sc = pfsyncif; 1903 int sync = 0; 1904 1905 splsoftassert(IPL_SOFTNET); 1906 1907 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1908 return; 1909 1910 if (ISSET(st->state_flags, PFSTATE_ACK)) 1911 pfsync_deferred(st, 0); 1912 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1913 if (st->sync_state != PFSYNC_S_NONE) 1914 pfsync_q_del(st); 1915 return; 1916 } 1917 1918 if (sc->sc_len == PFSYNC_MINPKT) 1919 timeout_add_sec(&sc->sc_tmo, 1); 1920 1921 switch (st->sync_state) { 1922 case PFSYNC_S_UPD_C: 1923 case PFSYNC_S_UPD: 1924 case PFSYNC_S_INS: 1925 /* we're already handling it */ 1926 1927 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1928 st->sync_updates++; 1929 if (st->sync_updates >= sc->sc_maxupdates) 1930 sync = 1; 1931 } 1932 break; 1933 1934 case PFSYNC_S_IACK: 1935 pfsync_q_del(st); 1936 case PFSYNC_S_NONE: 1937 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1938 st->sync_updates = 0; 1939 break; 1940 1941 default: 1942 panic("pfsync_update_state: unexpected sync state %d", 1943 st->sync_state); 1944 } 1945 1946 if (sync || (time_uptime - st->pfsync_time) < 2) { 1947 pfsync_upds++; 1948 schednetisr(NETISR_PFSYNC); 1949 } 1950 } 1951 1952 void 1953 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1954 { 1955 struct pfsync_softc *sc = pfsyncif; 1956 struct pfsync_upd_req_item *item; 1957 size_t nlen = sizeof(struct pfsync_upd_req); 1958 1959 /* 1960 * this code does nothing to prevent multiple update requests for the 1961 * same state being generated. 1962 */ 1963 1964 item = pool_get(&sc->sc_pool, PR_NOWAIT); 1965 if (item == NULL) { 1966 /* XXX stats */ 1967 return; 1968 } 1969 1970 item->ur_msg.id = id; 1971 item->ur_msg.creatorid = creatorid; 1972 1973 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1974 nlen += sizeof(struct pfsync_subheader); 1975 1976 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 1977 pfsync_sendout(); 1978 1979 nlen = sizeof(struct pfsync_subheader) + 1980 sizeof(struct pfsync_upd_req); 1981 } 1982 1983 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1984 sc->sc_len += nlen; 1985 1986 schednetisr(NETISR_PFSYNC); 1987 } 1988 1989 void 1990 pfsync_update_state_req(struct pf_state *st) 1991 { 1992 struct pfsync_softc *sc = pfsyncif; 1993 1994 if (sc == NULL) 1995 panic("pfsync_update_state_req: nonexistant instance"); 1996 1997 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1998 if (st->sync_state != PFSYNC_S_NONE) 1999 pfsync_q_del(st); 2000 return; 2001 } 2002 2003 switch (st->sync_state) { 2004 case PFSYNC_S_UPD_C: 2005 case PFSYNC_S_IACK: 2006 pfsync_q_del(st); 2007 case PFSYNC_S_NONE: 2008 pfsync_q_ins(st, PFSYNC_S_UPD); 2009 schednetisr(NETISR_PFSYNC); 2010 return; 2011 2012 case PFSYNC_S_INS: 2013 case PFSYNC_S_UPD: 2014 case PFSYNC_S_DEL: 2015 /* we're already handling it */ 2016 return; 2017 2018 default: 2019 panic("pfsync_update_state_req: unexpected sync state %d", 2020 st->sync_state); 2021 } 2022 } 2023 2024 void 2025 pfsync_delete_state(struct pf_state *st) 2026 { 2027 struct pfsync_softc *sc = pfsyncif; 2028 2029 splsoftassert(IPL_SOFTNET); 2030 2031 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2032 return; 2033 2034 if (ISSET(st->state_flags, PFSTATE_ACK)) 2035 pfsync_deferred(st, 1); 2036 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2037 if (st->sync_state != PFSYNC_S_NONE) 2038 pfsync_q_del(st); 2039 return; 2040 } 2041 2042 if (sc->sc_len == PFSYNC_MINPKT) 2043 timeout_add_sec(&sc->sc_tmo, 1); 2044 2045 switch (st->sync_state) { 2046 case PFSYNC_S_INS: 2047 /* we never got to tell the world so just forget about it */ 2048 pfsync_q_del(st); 2049 return; 2050 2051 case PFSYNC_S_UPD_C: 2052 case PFSYNC_S_UPD: 2053 case PFSYNC_S_IACK: 2054 pfsync_q_del(st); 2055 /* FALLTHROUGH to putting it on the del list */ 2056 2057 case PFSYNC_S_NONE: 2058 pfsync_q_ins(st, PFSYNC_S_DEL); 2059 return; 2060 2061 default: 2062 panic("pfsync_delete_state: unexpected sync state %d", 2063 st->sync_state); 2064 } 2065 } 2066 2067 void 2068 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2069 { 2070 struct pfsync_softc *sc = pfsyncif; 2071 struct { 2072 struct pfsync_subheader subh; 2073 struct pfsync_clr clr; 2074 } __packed r; 2075 2076 splsoftassert(IPL_SOFTNET); 2077 2078 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2079 return; 2080 2081 bzero(&r, sizeof(r)); 2082 2083 r.subh.action = PFSYNC_ACT_CLR; 2084 r.subh.count = htons(1); 2085 2086 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2087 r.clr.creatorid = creatorid; 2088 2089 pfsync_send_plus(&r, sizeof(r)); 2090 } 2091 2092 void 2093 pfsync_q_ins(struct pf_state *st, int q) 2094 { 2095 struct pfsync_softc *sc = pfsyncif; 2096 size_t nlen = pfsync_qs[q].len; 2097 2098 KASSERT(st->sync_state == PFSYNC_S_NONE); 2099 2100 #if 1 || defined(PFSYNC_DEBUG) 2101 if (sc->sc_len < PFSYNC_MINPKT) 2102 panic("pfsync pkt len is too low %d", sc->sc_len); 2103 #endif 2104 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2105 nlen += sizeof(struct pfsync_subheader); 2106 2107 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2108 pfsync_sendout(); 2109 2110 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2111 } 2112 2113 sc->sc_len += nlen; 2114 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2115 st->sync_state = q; 2116 } 2117 2118 void 2119 pfsync_q_del(struct pf_state *st) 2120 { 2121 struct pfsync_softc *sc = pfsyncif; 2122 int q = st->sync_state; 2123 2124 KASSERT(st->sync_state != PFSYNC_S_NONE); 2125 2126 sc->sc_len -= pfsync_qs[q].len; 2127 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2128 st->sync_state = PFSYNC_S_NONE; 2129 2130 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2131 sc->sc_len -= sizeof(struct pfsync_subheader); 2132 } 2133 2134 void 2135 pfsync_update_tdb(struct tdb *t, int output) 2136 { 2137 struct pfsync_softc *sc = pfsyncif; 2138 size_t nlen = sizeof(struct pfsync_tdb); 2139 2140 if (sc == NULL) 2141 return; 2142 2143 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2144 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2145 nlen += sizeof(struct pfsync_subheader); 2146 2147 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2148 pfsync_sendout(); 2149 2150 nlen = sizeof(struct pfsync_subheader) + 2151 sizeof(struct pfsync_tdb); 2152 } 2153 2154 sc->sc_len += nlen; 2155 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2156 SET(t->tdb_flags, TDBF_PFSYNC); 2157 t->tdb_updates = 0; 2158 } else { 2159 if (++t->tdb_updates >= sc->sc_maxupdates) 2160 schednetisr(NETISR_PFSYNC); 2161 } 2162 2163 if (output) 2164 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2165 else 2166 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2167 } 2168 2169 void 2170 pfsync_delete_tdb(struct tdb *t) 2171 { 2172 struct pfsync_softc *sc = pfsyncif; 2173 2174 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2175 return; 2176 2177 sc->sc_len -= sizeof(struct pfsync_tdb); 2178 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2179 CLR(t->tdb_flags, TDBF_PFSYNC); 2180 2181 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2182 sc->sc_len -= sizeof(struct pfsync_subheader); 2183 } 2184 2185 void 2186 pfsync_out_tdb(struct tdb *t, void *buf) 2187 { 2188 struct pfsync_tdb *ut = buf; 2189 2190 bzero(ut, sizeof(*ut)); 2191 ut->spi = t->tdb_spi; 2192 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2193 /* 2194 * When a failover happens, the master's rpl is probably above 2195 * what we see here (we may be up to a second late), so 2196 * increase it a bit for outbound tdbs to manage most such 2197 * situations. 2198 * 2199 * For now, just add an offset that is likely to be larger 2200 * than the number of packets we can see in one second. The RFC 2201 * just says the next packet must have a higher seq value. 2202 * 2203 * XXX What is a good algorithm for this? We could use 2204 * a rate-determined increase, but to know it, we would have 2205 * to extend struct tdb. 2206 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2207 * will soon be replaced anyway. For now, just don't handle 2208 * this edge case. 2209 */ 2210 #define RPL_INCR 16384 2211 ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2212 RPL_INCR : 0)); 2213 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2214 ut->sproto = t->tdb_sproto; 2215 } 2216 2217 void 2218 pfsync_bulk_start(void) 2219 { 2220 struct pfsync_softc *sc = pfsyncif; 2221 2222 sc->sc_ureq_received = time_uptime; 2223 2224 if (sc->sc_bulk_next == NULL) 2225 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2226 sc->sc_bulk_last = sc->sc_bulk_next; 2227 2228 if (pf_status.debug >= PF_DEBUG_MISC) 2229 printf("pfsync: received bulk update request\n"); 2230 2231 pfsync_bulk_status(PFSYNC_BUS_START); 2232 timeout_add(&sc->sc_bulk_tmo, 0); 2233 } 2234 2235 void 2236 pfsync_bulk_update(void *arg) 2237 { 2238 struct pfsync_softc *sc = arg; 2239 struct pf_state *st; 2240 int i = 0; 2241 int s; 2242 2243 s = splsoftnet(); 2244 2245 st = sc->sc_bulk_next; 2246 2247 while (st != sc->sc_bulk_last) { 2248 if (st->sync_state == PFSYNC_S_NONE && 2249 st->timeout < PFTM_MAX && 2250 st->pfsync_time <= sc->sc_ureq_received) { 2251 pfsync_update_state_req(st); 2252 i++; 2253 } 2254 2255 st = TAILQ_NEXT(st, entry_list); 2256 if (st == NULL) 2257 st = TAILQ_FIRST(&state_list); 2258 2259 if (i > 0 && TAILQ_EMPTY(&sc->sc_qs[PFSYNC_S_UPD])) { 2260 sc->sc_bulk_next = st; 2261 timeout_add(&sc->sc_bulk_tmo, 1); 2262 goto out; 2263 } 2264 } 2265 2266 /* we're done */ 2267 sc->sc_bulk_next = NULL; 2268 sc->sc_bulk_last = NULL; 2269 pfsync_bulk_status(PFSYNC_BUS_END); 2270 2271 out: 2272 splx(s); 2273 } 2274 2275 void 2276 pfsync_bulk_status(u_int8_t status) 2277 { 2278 struct { 2279 struct pfsync_subheader subh; 2280 struct pfsync_bus bus; 2281 } __packed r; 2282 2283 struct pfsync_softc *sc = pfsyncif; 2284 2285 bzero(&r, sizeof(r)); 2286 2287 r.subh.action = PFSYNC_ACT_BUS; 2288 r.subh.count = htons(1); 2289 2290 r.bus.creatorid = pf_status.hostid; 2291 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2292 r.bus.status = status; 2293 2294 pfsync_send_plus(&r, sizeof(r)); 2295 } 2296 2297 void 2298 pfsync_bulk_fail(void *arg) 2299 { 2300 struct pfsync_softc *sc = arg; 2301 2302 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2303 /* Try again */ 2304 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2305 pfsync_request_update(0, 0); 2306 } else { 2307 /* Pretend like the transfer was ok */ 2308 sc->sc_ureq_sent = 0; 2309 sc->sc_bulk_tries = 0; 2310 #if NCARP > 0 2311 if (!pfsync_sync_ok) 2312 carp_group_demote_adj(&sc->sc_if, -1); 2313 #endif 2314 pfsync_sync_ok = 1; 2315 if (pf_status.debug >= PF_DEBUG_MISC) 2316 printf("pfsync: failed to receive bulk update\n"); 2317 } 2318 } 2319 2320 void 2321 pfsync_send_plus(void *plus, size_t pluslen) 2322 { 2323 struct pfsync_softc *sc = pfsyncif; 2324 2325 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2326 pfsync_sendout(); 2327 2328 sc->sc_plus = plus; 2329 sc->sc_len += (sc->sc_pluslen = pluslen); 2330 2331 pfsync_sendout(); 2332 } 2333 2334 int 2335 pfsync_up(void) 2336 { 2337 struct pfsync_softc *sc = pfsyncif; 2338 2339 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2340 return (0); 2341 2342 return (1); 2343 } 2344 2345 int 2346 pfsync_state_in_use(struct pf_state *st) 2347 { 2348 struct pfsync_softc *sc = pfsyncif; 2349 2350 if (sc == NULL) 2351 return (0); 2352 2353 if (st->sync_state != PFSYNC_S_NONE) 2354 return (1); 2355 2356 if (sc->sc_bulk_next == NULL && sc->sc_bulk_last == NULL) 2357 return (0); 2358 2359 return (1); 2360 } 2361 2362 void 2363 pfsync_timeout(void *arg) 2364 { 2365 int s; 2366 2367 s = splsoftnet(); 2368 pfsync_sendout(); 2369 splx(s); 2370 } 2371 2372 /* this is a softnet/netisr handler */ 2373 void 2374 pfsyncintr(void) 2375 { 2376 pfsync_sendout(); 2377 } 2378 2379 int 2380 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2381 size_t newlen) 2382 { 2383 /* All sysctl names at this level are terminal. */ 2384 if (namelen != 1) 2385 return (ENOTDIR); 2386 2387 switch (name[0]) { 2388 case PFSYNCCTL_STATS: 2389 if (newp != NULL) 2390 return (EPERM); 2391 return (sysctl_struct(oldp, oldlenp, newp, newlen, 2392 &pfsyncstats, sizeof(pfsyncstats))); 2393 default: 2394 return (ENOPROTOOPT); 2395 } 2396 } 2397