1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "opt_inet.h" 39 #include "opt_inet6.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/filio.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/kernel.h> 49 #include <sys/time.h> 50 #include <sys/sysctl.h> 51 #include <sys/endian.h> 52 #include <sys/proc.h> 53 #include <sys/kthread.h> 54 #include <sys/spinlock.h> 55 56 #include <sys/md5.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr2.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_seq.h> 71 #include <netinet/udp.h> 72 #include <netinet/ip_icmp.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/tcp_timer.h> 75 #include <netinet/tcp_var.h> 76 #include <netinet/udp_var.h> 77 #include <netinet/icmp_var.h> 78 #include <netinet/if_ether.h> 79 80 #include <net/pf/pfvar.h> 81 #include <net/pf/if_pflog.h> 82 83 #include <net/pf/if_pfsync.h> 84 85 #ifdef INET6 86 #include <netinet/ip6.h> 87 #include <netinet/icmp6.h> 88 #include <netinet6/nd6.h> 89 #include <netinet6/ip6_var.h> 90 #include <netinet6/in6_pcb.h> 91 #endif /* INET6 */ 92 93 #include <sys/in_cksum.h> 94 #include <sys/ucred.h> 95 #include <machine/limits.h> 96 #include <sys/msgport2.h> 97 #include <sys/spinlock2.h> 98 #include <net/netmsg2.h> 99 #include <net/toeplitz2.h> 100 101 extern int ip_optcopy(struct ip *, struct ip *); 102 extern int debug_pfugidhack; 103 104 /* 105 * pf_token - shared lock for cpu-localized operations, 106 * exclusive lock otherwise. 107 * 108 * pf_gtoken- exclusive lock used for initialization. 109 */ 110 struct lwkt_token pf_token = LWKT_TOKEN_INITIALIZER(pf_token); 111 struct lwkt_token pf_gtoken = LWKT_TOKEN_INITIALIZER(pf_gtoken); 112 113 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x 114 115 #define FAIL(code) { error = (code); goto done; } 116 117 /* 118 * Global variables 119 */ 120 121 /* mask radix tree */ 122 struct radix_node_head *pf_maskhead; 123 124 /* state tables */ 125 struct pf_state_tree *pf_statetbl; /* incls one global table */ 126 struct pf_state **purge_cur; 127 struct pf_altqqueue pf_altqs[2]; 128 struct pf_palist pf_pabuf; 129 struct pf_altqqueue *pf_altqs_active; 130 struct pf_altqqueue *pf_altqs_inactive; 131 struct pf_status pf_status; 132 133 u_int32_t ticket_altqs_active; 134 u_int32_t ticket_altqs_inactive; 135 int altqs_inactive_open; 136 u_int32_t ticket_pabuf; 137 138 MD5_CTX pf_tcp_secret_ctx; 139 u_char pf_tcp_secret[16]; 140 int pf_tcp_secret_init; 141 int pf_tcp_iss_off; 142 143 struct pf_anchor_stackframe { 144 struct pf_ruleset *rs; 145 struct pf_rule *r; 146 struct pf_anchor_node *parent; 147 struct pf_anchor *child; 148 } pf_anchor_stack[64]; 149 150 struct malloc_type *pf_src_tree_pl, *pf_rule_pl, *pf_pooladdr_pl; 151 struct malloc_type *pf_state_pl, *pf_state_key_pl, *pf_state_item_pl; 152 struct malloc_type *pf_altq_pl; 153 154 void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); 155 156 void pf_init_threshold(struct pf_threshold *, u_int32_t, 157 u_int32_t); 158 void pf_add_threshold(struct pf_threshold *); 159 int pf_check_threshold(struct pf_threshold *); 160 161 void pf_change_ap(struct pf_addr *, u_int16_t *, 162 u_int16_t *, u_int16_t *, struct pf_addr *, 163 u_int16_t, u_int8_t, sa_family_t); 164 int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, 165 struct tcphdr *, struct pf_state_peer *); 166 #ifdef INET6 167 void pf_change_a6(struct pf_addr *, u_int16_t *, 168 struct pf_addr *, u_int8_t); 169 #endif /* INET6 */ 170 void pf_change_icmp(struct pf_addr *, u_int16_t *, 171 struct pf_addr *, struct pf_addr *, u_int16_t, 172 u_int16_t *, u_int16_t *, u_int16_t *, 173 u_int16_t *, u_int8_t, sa_family_t); 174 void pf_send_tcp(const struct pf_rule *, sa_family_t, 175 const struct pf_addr *, const struct pf_addr *, 176 u_int16_t, u_int16_t, u_int32_t, u_int32_t, 177 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, 178 u_int16_t, struct ether_header *, struct ifnet *); 179 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 180 sa_family_t, struct pf_rule *); 181 struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, 182 int, int, struct pfi_kif *, 183 struct pf_addr *, u_int16_t, struct pf_addr *, 184 u_int16_t, int); 185 struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, 186 int, int, struct pfi_kif *, struct pf_src_node **, 187 struct pf_state_key **, struct pf_state_key **, 188 struct pf_state_key **, struct pf_state_key **, 189 struct pf_addr *, struct pf_addr *, 190 u_int16_t, u_int16_t); 191 void pf_detach_state(struct pf_state *); 192 int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *, 193 struct pf_state_key **, struct pf_state_key **, 194 struct pf_state_key **, struct pf_state_key **, 195 struct pf_addr *, struct pf_addr *, 196 u_int16_t, u_int16_t); 197 void pf_state_key_detach(struct pf_state *, int); 198 u_int32_t pf_tcp_iss(struct pf_pdesc *); 199 int pf_test_rule(struct pf_rule **, struct pf_state **, 200 int, struct pfi_kif *, struct mbuf *, int, 201 void *, struct pf_pdesc *, struct pf_rule **, 202 struct pf_ruleset **, struct ifqueue *, struct inpcb *); 203 static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, 204 struct pf_rule *, struct pf_pdesc *, 205 struct pf_src_node *, struct pf_state_key *, 206 struct pf_state_key *, struct pf_state_key *, 207 struct pf_state_key *, struct mbuf *, int, 208 u_int16_t, u_int16_t, int *, struct pfi_kif *, 209 struct pf_state **, int, u_int16_t, u_int16_t, 210 int); 211 int pf_test_fragment(struct pf_rule **, int, 212 struct pfi_kif *, struct mbuf *, void *, 213 struct pf_pdesc *, struct pf_rule **, 214 struct pf_ruleset **); 215 int pf_tcp_track_full(struct pf_state_peer *, 216 struct pf_state_peer *, struct pf_state **, 217 struct pfi_kif *, struct mbuf *, int, 218 struct pf_pdesc *, u_short *, int *); 219 int pf_tcp_track_sloppy(struct pf_state_peer *, 220 struct pf_state_peer *, struct pf_state **, 221 struct pf_pdesc *, u_short *); 222 int pf_test_state_tcp(struct pf_state **, int, 223 struct pfi_kif *, struct mbuf *, int, 224 void *, struct pf_pdesc *, u_short *); 225 int pf_test_state_udp(struct pf_state **, int, 226 struct pfi_kif *, struct mbuf *, int, 227 void *, struct pf_pdesc *); 228 int pf_test_state_icmp(struct pf_state **, int, 229 struct pfi_kif *, struct mbuf *, int, 230 void *, struct pf_pdesc *, u_short *); 231 int pf_test_state_other(struct pf_state **, int, 232 struct pfi_kif *, struct mbuf *, struct pf_pdesc *); 233 void pf_step_into_anchor(int *, struct pf_ruleset **, int, 234 struct pf_rule **, struct pf_rule **, int *); 235 int pf_step_out_of_anchor(int *, struct pf_ruleset **, 236 int, struct pf_rule **, struct pf_rule **, 237 int *); 238 void pf_hash(struct pf_addr *, struct pf_addr *, 239 struct pf_poolhashkey *, sa_family_t); 240 int pf_map_addr(u_int8_t, struct pf_rule *, 241 struct pf_addr *, struct pf_addr *, 242 struct pf_addr *, struct pf_src_node **); 243 int pf_get_sport(struct pf_pdesc *, 244 sa_family_t, u_int8_t, struct pf_rule *, 245 struct pf_addr *, struct pf_addr *, 246 u_int16_t, u_int16_t, 247 struct pf_addr *, u_int16_t *, 248 u_int16_t, u_int16_t, 249 struct pf_src_node **); 250 void pf_route(struct mbuf **, struct pf_rule *, int, 251 struct ifnet *, struct pf_state *, 252 struct pf_pdesc *); 253 void pf_route6(struct mbuf **, struct pf_rule *, int, 254 struct ifnet *, struct pf_state *, 255 struct pf_pdesc *); 256 u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, 257 sa_family_t); 258 u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, 259 sa_family_t); 260 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, 261 u_int16_t); 262 void pf_set_rt_ifp(struct pf_state *, 263 struct pf_addr *); 264 int pf_check_proto_cksum(struct mbuf *, int, int, 265 u_int8_t, sa_family_t); 266 struct pf_divert *pf_get_divert(struct mbuf *); 267 void pf_print_state_parts(struct pf_state *, 268 struct pf_state_key *, struct pf_state_key *); 269 int pf_addr_wrap_neq(struct pf_addr_wrap *, 270 struct pf_addr_wrap *); 271 struct pf_state *pf_find_state(struct pfi_kif *, 272 struct pf_state_key_cmp *, u_int, struct mbuf *); 273 int pf_src_connlimit(struct pf_state *); 274 int pf_check_congestion(struct ifqueue *); 275 276 extern int pf_end_threads; 277 278 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 279 { &pf_state_pl, PFSTATE_HIWAT }, 280 { &pf_src_tree_pl, PFSNODE_HIWAT }, 281 { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, 282 { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, 283 { &pfr_kentry_pl, PFR_KENTRY_HIWAT } 284 }; 285 286 /* 287 * If route-to and direction is out we match with no further processing 288 * (rt_kif must be assigned and not equal to the out interface) 289 * If reply-to and direction is in we match with no further processing 290 * (rt_kif must be assigned and not equal to the in interface) 291 */ 292 #define STATE_LOOKUP(i, k, d, s, m) \ 293 do { \ 294 s = pf_find_state(i, k, d, m); \ 295 if (s == NULL || (s)->timeout == PFTM_PURGE) \ 296 return (PF_DROP); \ 297 if (d == PF_OUT && \ 298 (((s)->rule.ptr->rt == PF_ROUTETO && \ 299 (s)->rule.ptr->direction == PF_OUT) || \ 300 ((s)->rule.ptr->rt == PF_REPLYTO && \ 301 (s)->rule.ptr->direction == PF_IN)) && \ 302 (s)->rt_kif != NULL && \ 303 (s)->rt_kif != i) \ 304 return (PF_PASS); \ 305 } while (0) 306 307 #define BOUND_IFACE(r, k) \ 308 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 309 310 #define STATE_INC_COUNTERS(s) \ 311 do { \ 312 atomic_add_int(&s->rule.ptr->states_cur, 1); \ 313 s->rule.ptr->states_tot++; \ 314 if (s->anchor.ptr != NULL) { \ 315 atomic_add_int(&s->anchor.ptr->states_cur, 1); \ 316 s->anchor.ptr->states_tot++; \ 317 } \ 318 if (s->nat_rule.ptr != NULL) { \ 319 atomic_add_int(&s->nat_rule.ptr->states_cur, 1); \ 320 s->nat_rule.ptr->states_tot++; \ 321 } \ 322 } while (0) 323 324 #define STATE_DEC_COUNTERS(s) \ 325 do { \ 326 if (s->nat_rule.ptr != NULL) \ 327 atomic_add_int(&s->nat_rule.ptr->states_cur, -1); \ 328 if (s->anchor.ptr != NULL) \ 329 atomic_add_int(&s->anchor.ptr->states_cur, -1); \ 330 atomic_add_int(&s->rule.ptr->states_cur, -1); \ 331 } while (0) 332 333 static MALLOC_DEFINE(M_PFSTATEPL, "pfstatepl", "pf state pool list"); 334 static MALLOC_DEFINE(M_PFSRCTREEPL, "pfsrctpl", "pf source tree pool list"); 335 static MALLOC_DEFINE(M_PFSTATEKEYPL, "pfstatekeypl", "pf state key pool list"); 336 static MALLOC_DEFINE(M_PFSTATEITEMPL, "pfstateitempl", "pf state item pool list"); 337 338 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 339 static __inline int pf_state_compare_key(struct pf_state_key *, 340 struct pf_state_key *); 341 static __inline int pf_state_compare_rkey(struct pf_state_key *, 342 struct pf_state_key *); 343 static __inline int pf_state_compare_id(struct pf_state *, 344 struct pf_state *); 345 346 struct pf_src_tree *tree_src_tracking; 347 struct pf_state_tree_id *tree_id; 348 struct pf_state_queue *state_list; 349 struct pf_counters *pf_counters; 350 351 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 352 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 353 RB_GENERATE(pf_state_rtree, pf_state_key, entry, pf_state_compare_rkey); 354 RB_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); 355 356 static __inline int 357 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 358 { 359 int diff; 360 361 if (a->rule.ptr > b->rule.ptr) 362 return (1); 363 if (a->rule.ptr < b->rule.ptr) 364 return (-1); 365 if ((diff = a->af - b->af) != 0) 366 return (diff); 367 switch (a->af) { 368 #ifdef INET 369 case AF_INET: 370 if (a->addr.addr32[0] > b->addr.addr32[0]) 371 return (1); 372 if (a->addr.addr32[0] < b->addr.addr32[0]) 373 return (-1); 374 break; 375 #endif /* INET */ 376 #ifdef INET6 377 case AF_INET6: 378 if (a->addr.addr32[3] > b->addr.addr32[3]) 379 return (1); 380 if (a->addr.addr32[3] < b->addr.addr32[3]) 381 return (-1); 382 if (a->addr.addr32[2] > b->addr.addr32[2]) 383 return (1); 384 if (a->addr.addr32[2] < b->addr.addr32[2]) 385 return (-1); 386 if (a->addr.addr32[1] > b->addr.addr32[1]) 387 return (1); 388 if (a->addr.addr32[1] < b->addr.addr32[1]) 389 return (-1); 390 if (a->addr.addr32[0] > b->addr.addr32[0]) 391 return (1); 392 if (a->addr.addr32[0] < b->addr.addr32[0]) 393 return (-1); 394 break; 395 #endif /* INET6 */ 396 } 397 return (0); 398 } 399 400 u_int32_t 401 pf_state_hash(struct pf_state_key *sk) 402 { 403 u_int32_t hv = (u_int32_t)(((intptr_t)sk >> 6) ^ ((intptr_t)sk >> 15)); 404 if (hv == 0) /* disallow 0 */ 405 hv = 1; 406 return(hv); 407 } 408 409 #ifdef INET6 410 void 411 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 412 { 413 switch (af) { 414 #ifdef INET 415 case AF_INET: 416 dst->addr32[0] = src->addr32[0]; 417 break; 418 #endif /* INET */ 419 case AF_INET6: 420 dst->addr32[0] = src->addr32[0]; 421 dst->addr32[1] = src->addr32[1]; 422 dst->addr32[2] = src->addr32[2]; 423 dst->addr32[3] = src->addr32[3]; 424 break; 425 } 426 } 427 #endif /* INET6 */ 428 429 void 430 pf_init_threshold(struct pf_threshold *threshold, 431 u_int32_t limit, u_int32_t seconds) 432 { 433 threshold->limit = limit * PF_THRESHOLD_MULT; 434 threshold->seconds = seconds; 435 threshold->count = 0; 436 threshold->last = time_second; 437 } 438 439 void 440 pf_add_threshold(struct pf_threshold *threshold) 441 { 442 u_int32_t t = time_second, diff = t - threshold->last; 443 444 if (diff >= threshold->seconds) 445 threshold->count = 0; 446 else 447 threshold->count -= threshold->count * diff / 448 threshold->seconds; 449 threshold->count += PF_THRESHOLD_MULT; 450 threshold->last = t; 451 } 452 453 int 454 pf_check_threshold(struct pf_threshold *threshold) 455 { 456 return (threshold->count > threshold->limit); 457 } 458 459 int 460 pf_src_connlimit(struct pf_state *state) 461 { 462 int bad = 0; 463 int cpu = mycpu->gd_cpuid; 464 465 atomic_add_int(&state->src_node->conn, 1); 466 state->src.tcp_est = 1; 467 pf_add_threshold(&state->src_node->conn_rate); 468 469 if (state->rule.ptr->max_src_conn && 470 state->rule.ptr->max_src_conn < 471 state->src_node->conn) { 472 PF_INC_LCOUNTER(LCNT_SRCCONN); 473 bad++; 474 } 475 476 if (state->rule.ptr->max_src_conn_rate.limit && 477 pf_check_threshold(&state->src_node->conn_rate)) { 478 PF_INC_LCOUNTER(LCNT_SRCCONNRATE); 479 bad++; 480 } 481 482 if (!bad) 483 return 0; 484 485 if (state->rule.ptr->overload_tbl) { 486 struct pfr_addr p; 487 u_int32_t killed = 0; 488 489 PF_INC_LCOUNTER(LCNT_OVERLOAD_TABLE); 490 if (pf_status.debug >= PF_DEBUG_MISC) { 491 kprintf("pf_src_connlimit: blocking address "); 492 pf_print_host(&state->src_node->addr, 0, 493 state->key[PF_SK_WIRE]->af); 494 } 495 496 bzero(&p, sizeof(p)); 497 p.pfra_af = state->key[PF_SK_WIRE]->af; 498 switch (state->key[PF_SK_WIRE]->af) { 499 #ifdef INET 500 case AF_INET: 501 p.pfra_net = 32; 502 p.pfra_ip4addr = state->src_node->addr.v4; 503 break; 504 #endif /* INET */ 505 #ifdef INET6 506 case AF_INET6: 507 p.pfra_net = 128; 508 p.pfra_ip6addr = state->src_node->addr.v6; 509 break; 510 #endif /* INET6 */ 511 } 512 513 pfr_insert_kentry(state->rule.ptr->overload_tbl, 514 &p, time_second); 515 516 /* kill existing states if that's required. */ 517 if (state->rule.ptr->flush) { 518 struct pf_state_key *sk; 519 struct pf_state *st; 520 521 PF_INC_LCOUNTER(LCNT_OVERLOAD_FLUSH); 522 RB_FOREACH(st, pf_state_tree_id, &tree_id[cpu]) { 523 sk = st->key[PF_SK_WIRE]; 524 /* 525 * Kill states from this source. (Only those 526 * from the same rule if PF_FLUSH_GLOBAL is not 527 * set). (Only on current cpu). 528 */ 529 if (sk->af == 530 state->key[PF_SK_WIRE]->af && 531 ((state->direction == PF_OUT && 532 PF_AEQ(&state->src_node->addr, 533 &sk->addr[0], sk->af)) || 534 (state->direction == PF_IN && 535 PF_AEQ(&state->src_node->addr, 536 &sk->addr[1], sk->af))) && 537 (state->rule.ptr->flush & 538 PF_FLUSH_GLOBAL || 539 state->rule.ptr == st->rule.ptr)) { 540 st->timeout = PFTM_PURGE; 541 st->src.state = st->dst.state = 542 TCPS_CLOSED; 543 killed++; 544 } 545 } 546 if (pf_status.debug >= PF_DEBUG_MISC) 547 kprintf(", %u states killed", killed); 548 } 549 if (pf_status.debug >= PF_DEBUG_MISC) 550 kprintf("\n"); 551 } 552 553 /* kill this state */ 554 state->timeout = PFTM_PURGE; 555 state->src.state = state->dst.state = TCPS_CLOSED; 556 557 return 1; 558 } 559 560 int 561 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 562 struct pf_addr *src, sa_family_t af) 563 { 564 struct pf_src_node k; 565 int cpu = mycpu->gd_cpuid; 566 567 bzero(&k, sizeof(k)); /* avoid gcc warnings */ 568 if (*sn == NULL) { 569 k.af = af; 570 PF_ACPY(&k.addr, src, af); 571 if (rule->rule_flag & PFRULE_RULESRCTRACK || 572 rule->rpool.opts & PF_POOL_STICKYADDR) 573 k.rule.ptr = rule; 574 else 575 k.rule.ptr = NULL; 576 PF_INC_SCOUNTER(SCNT_SRC_NODE_SEARCH); 577 *sn = RB_FIND(pf_src_tree, &tree_src_tracking[cpu], &k); 578 } 579 if (*sn == NULL) { 580 if (!rule->max_src_nodes || 581 rule->src_nodes < rule->max_src_nodes) 582 (*sn) = kmalloc(sizeof(struct pf_src_node), 583 M_PFSRCTREEPL, M_NOWAIT|M_ZERO); 584 else 585 PF_INC_LCOUNTER(LCNT_SRCNODES); 586 if ((*sn) == NULL) 587 return (-1); 588 589 pf_init_threshold(&(*sn)->conn_rate, 590 rule->max_src_conn_rate.limit, 591 rule->max_src_conn_rate.seconds); 592 593 (*sn)->af = af; 594 if (rule->rule_flag & PFRULE_RULESRCTRACK || 595 rule->rpool.opts & PF_POOL_STICKYADDR) 596 (*sn)->rule.ptr = rule; 597 else 598 (*sn)->rule.ptr = NULL; 599 PF_ACPY(&(*sn)->addr, src, af); 600 if (RB_INSERT(pf_src_tree, 601 &tree_src_tracking[cpu], *sn) != NULL) { 602 if (pf_status.debug >= PF_DEBUG_MISC) { 603 kprintf("pf: src_tree insert failed: "); 604 pf_print_host(&(*sn)->addr, 0, af); 605 kprintf("\n"); 606 } 607 kfree(*sn, M_PFSRCTREEPL); 608 return (-1); 609 } 610 611 /* 612 * Atomic op required to increment src_nodes in the rule 613 * because we hold a shared token here (decrements will use 614 * an exclusive token). 615 */ 616 (*sn)->creation = time_second; 617 (*sn)->ruletype = rule->action; 618 if ((*sn)->rule.ptr != NULL) 619 atomic_add_int(&(*sn)->rule.ptr->src_nodes, 1); 620 PF_INC_SCOUNTER(SCNT_SRC_NODE_INSERT); 621 atomic_add_int(&pf_status.src_nodes, 1); 622 } else { 623 if (rule->max_src_states && 624 (*sn)->states >= rule->max_src_states) { 625 PF_INC_LCOUNTER(LCNT_SRCSTATES); 626 return (-1); 627 } 628 } 629 return (0); 630 } 631 632 /* 633 * state table (indexed by the pf_state_key structure), normal RBTREE 634 * comparison. 635 */ 636 static __inline int 637 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 638 { 639 int diff; 640 641 if ((diff = a->proto - b->proto) != 0) 642 return (diff); 643 if ((diff = a->af - b->af) != 0) 644 return (diff); 645 switch (a->af) { 646 #ifdef INET 647 case AF_INET: 648 if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) 649 return (1); 650 if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) 651 return (-1); 652 if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) 653 return (1); 654 if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) 655 return (-1); 656 break; 657 #endif /* INET */ 658 #ifdef INET6 659 case AF_INET6: 660 if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) 661 return (1); 662 if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) 663 return (-1); 664 if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) 665 return (1); 666 if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) 667 return (-1); 668 if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) 669 return (1); 670 if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) 671 return (-1); 672 if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) 673 return (1); 674 if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) 675 return (-1); 676 if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) 677 return (1); 678 if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) 679 return (-1); 680 if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) 681 return (1); 682 if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) 683 return (-1); 684 if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) 685 return (1); 686 if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) 687 return (-1); 688 if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) 689 return (1); 690 if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) 691 return (-1); 692 break; 693 #endif /* INET6 */ 694 } 695 696 if ((diff = a->port[0] - b->port[0]) != 0) 697 return (diff); 698 if ((diff = a->port[1] - b->port[1]) != 0) 699 return (diff); 700 701 return (0); 702 } 703 704 /* 705 * Used for RB_FIND only, compare in the reverse direction. The 706 * element to be reversed is always (a), since we obviously can't 707 * reverse the state tree depicted by (b). 708 */ 709 static __inline int 710 pf_state_compare_rkey(struct pf_state_key *a, struct pf_state_key *b) 711 { 712 int diff; 713 714 if ((diff = a->proto - b->proto) != 0) 715 return (diff); 716 if ((diff = a->af - b->af) != 0) 717 return (diff); 718 switch (a->af) { 719 #ifdef INET 720 case AF_INET: 721 if (a->addr[1].addr32[0] > b->addr[0].addr32[0]) 722 return (1); 723 if (a->addr[1].addr32[0] < b->addr[0].addr32[0]) 724 return (-1); 725 if (a->addr[0].addr32[0] > b->addr[1].addr32[0]) 726 return (1); 727 if (a->addr[0].addr32[0] < b->addr[1].addr32[0]) 728 return (-1); 729 break; 730 #endif /* INET */ 731 #ifdef INET6 732 case AF_INET6: 733 if (a->addr[1].addr32[3] > b->addr[0].addr32[3]) 734 return (1); 735 if (a->addr[1].addr32[3] < b->addr[0].addr32[3]) 736 return (-1); 737 if (a->addr[0].addr32[3] > b->addr[1].addr32[3]) 738 return (1); 739 if (a->addr[0].addr32[3] < b->addr[1].addr32[3]) 740 return (-1); 741 if (a->addr[1].addr32[2] > b->addr[0].addr32[2]) 742 return (1); 743 if (a->addr[1].addr32[2] < b->addr[0].addr32[2]) 744 return (-1); 745 if (a->addr[0].addr32[2] > b->addr[1].addr32[2]) 746 return (1); 747 if (a->addr[0].addr32[2] < b->addr[1].addr32[2]) 748 return (-1); 749 if (a->addr[1].addr32[1] > b->addr[0].addr32[1]) 750 return (1); 751 if (a->addr[1].addr32[1] < b->addr[0].addr32[1]) 752 return (-1); 753 if (a->addr[0].addr32[1] > b->addr[1].addr32[1]) 754 return (1); 755 if (a->addr[0].addr32[1] < b->addr[1].addr32[1]) 756 return (-1); 757 if (a->addr[1].addr32[0] > b->addr[0].addr32[0]) 758 return (1); 759 if (a->addr[1].addr32[0] < b->addr[0].addr32[0]) 760 return (-1); 761 if (a->addr[0].addr32[0] > b->addr[1].addr32[0]) 762 return (1); 763 if (a->addr[0].addr32[0] < b->addr[1].addr32[0]) 764 return (-1); 765 break; 766 #endif /* INET6 */ 767 } 768 769 if ((diff = a->port[1] - b->port[0]) != 0) 770 return (diff); 771 if ((diff = a->port[0] - b->port[1]) != 0) 772 return (diff); 773 774 return (0); 775 } 776 777 static __inline int 778 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 779 { 780 if (a->id > b->id) 781 return (1); 782 if (a->id < b->id) 783 return (-1); 784 if (a->creatorid > b->creatorid) 785 return (1); 786 if (a->creatorid < b->creatorid) 787 return (-1); 788 789 return (0); 790 } 791 792 int 793 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 794 { 795 struct pf_state_item *si; 796 struct pf_state_key *cur; 797 int cpu; 798 int error; 799 800 /* 801 * PFSTATE_STACK_GLOBAL is set when the state might not hash to the 802 * current cpu. The keys are managed on the global statetbl tree 803 * for this case. Only translations (RDR, NAT) can cause this. 804 * 805 * When this flag is not set we must still check the global statetbl 806 * for a collision, and if we find one we set the HALF_DUPLEX flag 807 * in the state. 808 */ 809 if (s->state_flags & PFSTATE_STACK_GLOBAL) { 810 cpu = ncpus; 811 lockmgr(&pf_global_statetbl_lock, LK_EXCLUSIVE); 812 } else { 813 cpu = mycpu->gd_cpuid; 814 lockmgr(&pf_global_statetbl_lock, LK_SHARED); 815 } 816 KKASSERT(s->key[idx] == NULL); /* XXX handle this? */ 817 818 if (pf_status.debug >= PF_DEBUG_MISC) { 819 kprintf("state_key attach cpu %d (%08x:%d) %s (%08x:%d)\n", 820 cpu, 821 ntohl(sk->addr[0].addr32[0]), ntohs(sk->port[0]), 822 (idx == PF_SK_WIRE ? "->" : "<-"), 823 ntohl(sk->addr[1].addr32[0]), ntohs(sk->port[1])); 824 } 825 826 /* 827 * Check whether (e.g.) a PASS rule being put on a per-cpu tree 828 * collides with a translation rule on the global tree. This is 829 * NOT an error. We *WANT* to establish state for this case so the 830 * packet path is short-cutted and doesn't need to scan the ruleset 831 * on every packet. But the established state will only see one 832 * side of a two-way packet conversation. To prevent this from 833 * causing problems (e.g. generating a RST), we force PFSTATE_SLOPPY 834 * to be set on the established state. 835 * 836 * A collision against RDR state can only occur with a PASS IN in the 837 * opposite direction or a PASS OUT in the forwards direction. This 838 * is because RDRs are processed on the input side. 839 * 840 * A collision against NAT state can only occur with a PASS IN in the 841 * forwards direction or a PASS OUT in the opposite direction. This 842 * is because NATs are processed on the output side. 843 * 844 * In both situations we need to do a reverse addr/port test because 845 * the PASS IN or PASS OUT only establishes if it doesn't match the 846 * established RDR state in the forwards direction. The direction 847 * flag has to be ignored (it will be one way for a PASS IN and the 848 * other way for a PASS OUT). 849 * 850 * pf_global_statetbl_lock will be locked shared when testing and 851 * not entering into the global state table. 852 */ 853 if (cpu != ncpus && 854 (cur = RB_FIND(pf_state_rtree, 855 (struct pf_state_rtree *)&pf_statetbl[ncpus], 856 sk)) != NULL) { 857 TAILQ_FOREACH(si, &cur->states, entry) { 858 /* 859 * NOTE: We must ignore direction mismatches. 860 */ 861 if (si->s->kif == s->kif) { 862 s->state_flags |= PFSTATE_HALF_DUPLEX | 863 PFSTATE_SLOPPY; 864 if (pf_status.debug >= PF_DEBUG_MISC) { 865 kprintf( 866 "pf: %s key attach collision " 867 "on %s: ", 868 (idx == PF_SK_WIRE) ? 869 "wire" : "stack", 870 s->kif->pfik_name); 871 pf_print_state_parts(s, 872 (idx == PF_SK_WIRE) ? sk : NULL, 873 (idx == PF_SK_STACK) ? sk : NULL); 874 kprintf("\n"); 875 } 876 break; 877 } 878 } 879 } 880 881 /* 882 * Enter into either the per-cpu or the global state table. 883 * 884 * pf_global_statetbl_lock will be locked exclusively when entering 885 * into the global state table. 886 */ 887 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl[cpu], sk)) != NULL) { 888 /* key exists. check for same kif, if none, add to key */ 889 TAILQ_FOREACH(si, &cur->states, entry) { 890 if (si->s->kif == s->kif && 891 si->s->direction == s->direction) { 892 if (pf_status.debug >= PF_DEBUG_MISC) { 893 kprintf( 894 "pf: %s key attach failed on %s: ", 895 (idx == PF_SK_WIRE) ? 896 "wire" : "stack", 897 s->kif->pfik_name); 898 pf_print_state_parts(s, 899 (idx == PF_SK_WIRE) ? sk : NULL, 900 (idx == PF_SK_STACK) ? sk : NULL); 901 kprintf("\n"); 902 } 903 kfree(sk, M_PFSTATEKEYPL); 904 error = -1; 905 goto failed; /* collision! */ 906 } 907 } 908 kfree(sk, M_PFSTATEKEYPL); 909 910 s->key[idx] = cur; 911 } else { 912 s->key[idx] = sk; 913 } 914 915 if ((si = kmalloc(sizeof(struct pf_state_item), 916 M_PFSTATEITEMPL, M_NOWAIT)) == NULL) { 917 pf_state_key_detach(s, idx); 918 error = -1; 919 goto failed; /* collision! */ 920 } 921 si->s = s; 922 923 /* list is sorted, if-bound states before floating */ 924 if (s->kif == pfi_all) 925 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 926 else 927 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 928 929 error = 0; 930 failed: 931 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 932 return error; 933 } 934 935 /* 936 * NOTE: Can only be called indirectly via the purge thread with pf_token 937 * exclusively locked. 938 */ 939 void 940 pf_detach_state(struct pf_state *s) 941 { 942 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 943 s->key[PF_SK_WIRE] = NULL; 944 945 if (s->key[PF_SK_STACK] != NULL) 946 pf_state_key_detach(s, PF_SK_STACK); 947 948 if (s->key[PF_SK_WIRE] != NULL) 949 pf_state_key_detach(s, PF_SK_WIRE); 950 } 951 952 /* 953 * NOTE: Can only be called indirectly via the purge thread with pf_token 954 * exclusively locked. 955 */ 956 void 957 pf_state_key_detach(struct pf_state *s, int idx) 958 { 959 struct pf_state_item *si; 960 int cpu; 961 962 /* 963 * PFSTATE_STACK_GLOBAL is set for translations when the translated 964 * address/port is not localized to the same cpu that the untranslated 965 * address/port is on. The wire pf_state_key is managed on the global 966 * statetbl tree for this case. 967 */ 968 if (s->state_flags & PFSTATE_STACK_GLOBAL) { 969 cpu = ncpus; 970 lockmgr(&pf_global_statetbl_lock, LK_EXCLUSIVE); 971 } else { 972 cpu = mycpu->gd_cpuid; 973 } 974 975 si = TAILQ_FIRST(&s->key[idx]->states); 976 while (si && si->s != s) 977 si = TAILQ_NEXT(si, entry); 978 979 if (si) { 980 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 981 kfree(si, M_PFSTATEITEMPL); 982 } 983 984 if (TAILQ_EMPTY(&s->key[idx]->states)) { 985 RB_REMOVE(pf_state_tree, &pf_statetbl[cpu], s->key[idx]); 986 if (s->key[idx]->reverse) 987 s->key[idx]->reverse->reverse = NULL; 988 if (s->key[idx]->inp) 989 s->key[idx]->inp->inp_pf_sk = NULL; 990 kfree(s->key[idx], M_PFSTATEKEYPL); 991 } 992 s->key[idx] = NULL; 993 994 if (s->state_flags & PFSTATE_STACK_GLOBAL) 995 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 996 } 997 998 struct pf_state_key * 999 pf_alloc_state_key(int pool_flags) 1000 { 1001 struct pf_state_key *sk; 1002 1003 sk = kmalloc(sizeof(struct pf_state_key), M_PFSTATEKEYPL, pool_flags); 1004 if (sk) { 1005 TAILQ_INIT(&sk->states); 1006 } 1007 return (sk); 1008 } 1009 1010 int 1011 pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr, 1012 struct pf_state_key **skw, struct pf_state_key **sks, 1013 struct pf_state_key **skp, struct pf_state_key **nkp, 1014 struct pf_addr *saddr, struct pf_addr *daddr, 1015 u_int16_t sport, u_int16_t dport) 1016 { 1017 KKASSERT((*skp == NULL && *nkp == NULL)); 1018 1019 if ((*skp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL) 1020 return (ENOMEM); 1021 1022 PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af); 1023 PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af); 1024 (*skp)->port[pd->sidx] = sport; 1025 (*skp)->port[pd->didx] = dport; 1026 (*skp)->proto = pd->proto; 1027 (*skp)->af = pd->af; 1028 1029 if (nr != NULL) { 1030 if ((*nkp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL) 1031 return (ENOMEM); /* caller must handle cleanup */ 1032 1033 /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */ 1034 PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af); 1035 PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af); 1036 (*nkp)->port[0] = (*skp)->port[0]; 1037 (*nkp)->port[1] = (*skp)->port[1]; 1038 (*nkp)->proto = pd->proto; 1039 (*nkp)->af = pd->af; 1040 } else { 1041 *nkp = *skp; 1042 } 1043 1044 if (pd->dir == PF_IN) { 1045 *skw = *skp; 1046 *sks = *nkp; 1047 } else { 1048 *sks = *skp; 1049 *skw = *nkp; 1050 } 1051 return (0); 1052 } 1053 1054 /* 1055 * Insert pf_state with one or two state keys (allowing a reverse path lookup 1056 * which is used by NAT). In the NAT case skw is the initiator (?) and 1057 * sks is the target. 1058 */ 1059 int 1060 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, 1061 struct pf_state_key *sks, struct pf_state *s) 1062 { 1063 int cpu = mycpu->gd_cpuid; 1064 1065 s->kif = kif; 1066 s->cpuid = cpu; 1067 1068 if (skw == sks) { 1069 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) 1070 return (-1); 1071 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 1072 } else { 1073 /* 1074 skw->reverse = sks; 1075 sks->reverse = skw; 1076 */ 1077 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { 1078 kfree(sks, M_PFSTATEKEYPL); 1079 return (-1); 1080 } 1081 if (pf_state_key_attach(sks, s, PF_SK_STACK)) { 1082 pf_state_key_detach(s, PF_SK_WIRE); 1083 return (-1); 1084 } 1085 } 1086 1087 if (s->id == 0 && s->creatorid == 0) { 1088 u_int64_t sid; 1089 1090 sid = atomic_fetchadd_long(&pf_status.stateid, 1); 1091 s->id = htobe64(sid); 1092 s->creatorid = pf_status.hostid; 1093 } 1094 1095 /* 1096 * Calculate hash code for altq 1097 */ 1098 s->hash = crc32(s->key[PF_SK_WIRE], PF_STATE_KEY_HASH_LENGTH); 1099 1100 if (RB_INSERT(pf_state_tree_id, &tree_id[cpu], s) != NULL) { 1101 if (pf_status.debug >= PF_DEBUG_MISC) { 1102 kprintf("pf: state insert failed: " 1103 "id: %016jx creatorid: %08x", 1104 (uintmax_t)be64toh(s->id), ntohl(s->creatorid)); 1105 if (s->sync_flags & PFSTATE_FROMSYNC) 1106 kprintf(" (from sync)"); 1107 kprintf("\n"); 1108 } 1109 pf_detach_state(s); 1110 return (-1); 1111 } 1112 TAILQ_INSERT_TAIL(&state_list[cpu], s, entry_list); 1113 PF_INC_FCOUNTER(FCNT_STATE_INSERT); 1114 atomic_add_int(&pf_status.states, 1); 1115 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 1116 pfsync_insert_state(s); 1117 return (0); 1118 } 1119 1120 struct pf_state * 1121 pf_find_state_byid(struct pf_state_cmp *key) 1122 { 1123 int cpu = mycpu->gd_cpuid; 1124 1125 PF_INC_FCOUNTER(FCNT_STATE_SEARCH); 1126 1127 return (RB_FIND(pf_state_tree_id, &tree_id[cpu], 1128 (struct pf_state *)key)); 1129 } 1130 1131 /* 1132 * WARNING! May return a state structure that was localized to another cpu, 1133 * destruction is typically protected by the callers pf_token. 1134 * The element can only be destroyed 1135 */ 1136 struct pf_state * 1137 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, 1138 struct mbuf *m) 1139 { 1140 struct pf_state_key *skey = (void *)key; 1141 struct pf_state_key *sk; 1142 struct pf_state_item *si; 1143 struct pf_state *s; 1144 int cpu = mycpu->gd_cpuid; 1145 int globalstl = 0; 1146 1147 PF_INC_FCOUNTER(FCNT_STATE_SEARCH); 1148 1149 if (dir == PF_OUT && m->m_pkthdr.pf.statekey && 1150 ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) { 1151 sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; 1152 } else { 1153 sk = RB_FIND(pf_state_tree, &pf_statetbl[cpu], skey); 1154 if (sk == NULL) { 1155 lockmgr(&pf_global_statetbl_lock, LK_SHARED); 1156 sk = RB_FIND(pf_state_tree, &pf_statetbl[ncpus], skey); 1157 if (sk == NULL) { 1158 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1159 return (NULL); 1160 } 1161 globalstl = 1; 1162 } 1163 if (dir == PF_OUT && m->m_pkthdr.pf.statekey) { 1164 ((struct pf_state_key *) 1165 m->m_pkthdr.pf.statekey)->reverse = sk; 1166 sk->reverse = m->m_pkthdr.pf.statekey; 1167 } 1168 } 1169 if (dir == PF_OUT) 1170 m->m_pkthdr.pf.statekey = NULL; 1171 1172 /* list is sorted, if-bound states before floating ones */ 1173 TAILQ_FOREACH(si, &sk->states, entry) { 1174 if ((si->s->kif == pfi_all || si->s->kif == kif) && 1175 sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1176 si->s->key[PF_SK_STACK])) { 1177 break; 1178 } 1179 } 1180 1181 /* 1182 * Extract state before potentially releasing the global statetbl 1183 * lock. Ignore the state if the create is still in-progress as 1184 * it can be deleted out from under us by the owning localized cpu. 1185 * However, if CREATEINPROG is not set, state can only be deleted 1186 * by the purge thread which we are protected from via our shared 1187 * pf_token. 1188 */ 1189 if (si) { 1190 s = si->s; 1191 if (s && (s->state_flags & PFSTATE_CREATEINPROG)) 1192 s = NULL; 1193 } else { 1194 s = NULL; 1195 } 1196 if (globalstl) 1197 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1198 return s; 1199 } 1200 1201 /* 1202 * WARNING! May return a state structure that was localized to another cpu, 1203 * destruction is typically protected by the callers pf_token. 1204 */ 1205 struct pf_state * 1206 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1207 { 1208 struct pf_state_key *skey = (void *)key; 1209 struct pf_state_key *sk; 1210 struct pf_state_item *si, *ret = NULL; 1211 struct pf_state *s; 1212 int cpu = mycpu->gd_cpuid; 1213 int globalstl = 0; 1214 1215 PF_INC_FCOUNTER(FCNT_STATE_SEARCH); 1216 1217 sk = RB_FIND(pf_state_tree, &pf_statetbl[cpu], skey); 1218 if (sk == NULL) { 1219 lockmgr(&pf_global_statetbl_lock, LK_SHARED); 1220 sk = RB_FIND(pf_state_tree, &pf_statetbl[ncpus], skey); 1221 globalstl = 1; 1222 } 1223 if (sk != NULL) { 1224 TAILQ_FOREACH(si, &sk->states, entry) 1225 if (dir == PF_INOUT || 1226 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1227 si->s->key[PF_SK_STACK]))) { 1228 if (more == NULL) { 1229 ret = si; 1230 break; 1231 } 1232 if (ret) 1233 (*more)++; 1234 else 1235 ret = si; 1236 } 1237 } 1238 1239 /* 1240 * Extract state before potentially releasing the global statetbl 1241 * lock. Ignore the state if the create is still in-progress as 1242 * it can be deleted out from under us by the owning localized cpu. 1243 * However, if CREATEINPROG is not set, state can only be deleted 1244 * by the purge thread which we are protected from via our shared 1245 * pf_token. 1246 */ 1247 if (ret) { 1248 s = ret->s; 1249 if (s && (s->state_flags & PFSTATE_CREATEINPROG)) 1250 s = NULL; 1251 } else { 1252 s = NULL; 1253 } 1254 if (globalstl) 1255 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1256 return s; 1257 } 1258 1259 /* END state table stuff */ 1260 1261 void 1262 pf_purge_thread(void *v) 1263 { 1264 globaldata_t save_gd = mycpu; 1265 int nloops = 0; 1266 int locked = 0; 1267 int nn; 1268 int endingit; 1269 1270 for (;;) { 1271 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); 1272 1273 endingit = pf_end_threads; 1274 1275 for (nn = 0; nn < ncpus; ++nn) { 1276 lwkt_setcpu_self(globaldata_find(nn)); 1277 1278 lwkt_gettoken(&pf_token); 1279 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1280 crit_enter(); 1281 1282 /* 1283 * process a fraction of the state table every second 1284 */ 1285 if(!pf_purge_expired_states( 1286 1 + (pf_status.states / 1287 pf_default_rule.timeout[ 1288 PFTM_INTERVAL]), 0)) { 1289 pf_purge_expired_states( 1290 1 + (pf_status.states / 1291 pf_default_rule.timeout[ 1292 PFTM_INTERVAL]), 1); 1293 } 1294 1295 /* 1296 * purge other expired types every PFTM_INTERVAL 1297 * seconds 1298 */ 1299 if (++nloops >= 1300 pf_default_rule.timeout[PFTM_INTERVAL]) { 1301 pf_purge_expired_fragments(); 1302 if (!pf_purge_expired_src_nodes(locked)) { 1303 pf_purge_expired_src_nodes(1); 1304 } 1305 nloops = 0; 1306 } 1307 1308 /* 1309 * If terminating the thread, clean everything out 1310 * (on all cpus). 1311 */ 1312 if (endingit) { 1313 pf_purge_expired_states(pf_status.states, 0); 1314 pf_purge_expired_fragments(); 1315 pf_purge_expired_src_nodes(1); 1316 } 1317 1318 crit_exit(); 1319 lockmgr(&pf_consistency_lock, LK_RELEASE); 1320 lwkt_reltoken(&pf_token); 1321 } 1322 lwkt_setcpu_self(save_gd); 1323 if (endingit) 1324 break; 1325 } 1326 1327 /* 1328 * Thread termination 1329 */ 1330 pf_end_threads++; 1331 wakeup(pf_purge_thread); 1332 kthread_exit(); 1333 } 1334 1335 u_int32_t 1336 pf_state_expires(const struct pf_state *state) 1337 { 1338 u_int32_t timeout; 1339 u_int32_t start; 1340 u_int32_t end; 1341 u_int32_t states; 1342 1343 /* handle all PFTM_* > PFTM_MAX here */ 1344 if (state->timeout == PFTM_PURGE) 1345 return (time_second); 1346 if (state->timeout == PFTM_UNTIL_PACKET) 1347 return (0); 1348 KKASSERT(state->timeout != PFTM_UNLINKED); 1349 KKASSERT(state->timeout < PFTM_MAX); 1350 timeout = state->rule.ptr->timeout[state->timeout]; 1351 if (!timeout) 1352 timeout = pf_default_rule.timeout[state->timeout]; 1353 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1354 if (start) { 1355 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1356 states = state->rule.ptr->states_cur; 1357 } else { 1358 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1359 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1360 states = pf_status.states; 1361 } 1362 1363 /* 1364 * If the number of states exceeds allowed values, adaptively 1365 * timeout the state more quickly. This can be very dangerous 1366 * to legitimate connections, however, so defray the timeout 1367 * based on the packet count. 1368 * 1369 * Retain from 0-100% based on number of states. 1370 * 1371 * Recover up to 50% of the lost portion if there was 1372 * packet traffic (100 pkts = 50%). 1373 */ 1374 if (end && states > start && start < end) { 1375 u_int32_t n; /* timeout retention 0-100% */ 1376 u_int64_t pkts; 1377 #if 0 1378 static struct krate boorate = { .freq = 1 }; 1379 #endif 1380 1381 /* 1382 * Reduce timeout by n% (0-100) 1383 */ 1384 n = (states - start) * 100 / (end - start); 1385 if (n > 100) 1386 n = 0; 1387 else 1388 n = 100 - n; 1389 1390 /* 1391 * But claw back some of the reduction based on packet 1392 * count associated with the state. 1393 */ 1394 pkts = state->packets[0] + state->packets[1]; 1395 if (pkts > 100) 1396 pkts = 100; 1397 #if 0 1398 krateprintf(&boorate, "timeout %-4u n=%u pkts=%-3lu -> %lu\n", 1399 timeout, n, pkts, n + (100 - n) * pkts / 200); 1400 #endif 1401 1402 n += (100 - n) * pkts / 200; /* recover by up-to 50% */ 1403 timeout = timeout * n / 100; 1404 1405 } 1406 return (state->expire + timeout); 1407 } 1408 1409 /* 1410 * (called with exclusive pf_token) 1411 */ 1412 int 1413 pf_purge_expired_src_nodes(int waslocked) 1414 { 1415 struct pf_src_node *cur, *next; 1416 int locked = waslocked; 1417 int cpu = mycpu->gd_cpuid; 1418 1419 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking[cpu]); 1420 cur; 1421 cur = next) { 1422 next = RB_NEXT(pf_src_tree, &tree_src_tracking[cpu], cur); 1423 1424 if (cur->states <= 0 && cur->expire <= time_second) { 1425 if (!locked) { 1426 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1427 next = RB_NEXT(pf_src_tree, 1428 &tree_src_tracking[cpu], cur); 1429 locked = 1; 1430 } 1431 if (cur->rule.ptr != NULL) { 1432 /* 1433 * decrements in rule should be ok, token is 1434 * held exclusively in this code path. 1435 */ 1436 atomic_add_int(&cur->rule.ptr->src_nodes, -1); 1437 if (cur->rule.ptr->states_cur <= 0 && 1438 cur->rule.ptr->max_src_nodes <= 0) 1439 pf_rm_rule(NULL, cur->rule.ptr); 1440 } 1441 RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], cur); 1442 PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS); 1443 atomic_add_int(&pf_status.src_nodes, -1); 1444 kfree(cur, M_PFSRCTREEPL); 1445 } 1446 } 1447 if (locked && !waslocked) 1448 lockmgr(&pf_consistency_lock, LK_RELEASE); 1449 return(1); 1450 } 1451 1452 void 1453 pf_src_tree_remove_state(struct pf_state *s) 1454 { 1455 u_int32_t timeout; 1456 1457 if (s->src_node != NULL) { 1458 if (s->src.tcp_est) 1459 atomic_add_int(&s->src_node->conn, -1); 1460 if (--s->src_node->states <= 0) { 1461 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1462 if (!timeout) { 1463 timeout = 1464 pf_default_rule.timeout[PFTM_SRC_NODE]; 1465 } 1466 s->src_node->expire = time_second + timeout; 1467 } 1468 } 1469 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { 1470 if (--s->nat_src_node->states <= 0) { 1471 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1472 if (!timeout) 1473 timeout = 1474 pf_default_rule.timeout[PFTM_SRC_NODE]; 1475 s->nat_src_node->expire = time_second + timeout; 1476 } 1477 } 1478 s->src_node = s->nat_src_node = NULL; 1479 } 1480 1481 /* callers should be at crit_enter() */ 1482 void 1483 pf_unlink_state(struct pf_state *cur) 1484 { 1485 int cpu = mycpu->gd_cpuid; 1486 1487 if (cur->src.state == PF_TCPS_PROXY_DST) { 1488 /* XXX wire key the right one? */ 1489 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1490 &cur->key[PF_SK_WIRE]->addr[1], 1491 &cur->key[PF_SK_WIRE]->addr[0], 1492 cur->key[PF_SK_WIRE]->port[1], 1493 cur->key[PF_SK_WIRE]->port[0], 1494 cur->src.seqhi, cur->src.seqlo + 1, 1495 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); 1496 } 1497 RB_REMOVE(pf_state_tree_id, &tree_id[cpu], cur); 1498 if (cur->creatorid == pf_status.hostid) 1499 pfsync_delete_state(cur); 1500 cur->timeout = PFTM_UNLINKED; 1501 pf_src_tree_remove_state(cur); 1502 pf_detach_state(cur); 1503 } 1504 1505 /* 1506 * callers should be at crit_enter() and hold pf_consistency_lock exclusively. 1507 * pf_token must also be held exclusively. 1508 */ 1509 void 1510 pf_free_state(struct pf_state *cur) 1511 { 1512 int cpu = mycpu->gd_cpuid; 1513 1514 KKASSERT(cur->cpuid == cpu); 1515 1516 if (pfsyncif != NULL && 1517 (pfsyncif->sc_bulk_send_next == cur || 1518 pfsyncif->sc_bulk_terminator == cur)) 1519 return; 1520 KKASSERT(cur->timeout == PFTM_UNLINKED); 1521 /* 1522 * decrements in rule should be ok, token is 1523 * held exclusively in this code path. 1524 */ 1525 if (--cur->rule.ptr->states_cur <= 0 && 1526 cur->rule.ptr->src_nodes <= 0) 1527 pf_rm_rule(NULL, cur->rule.ptr); 1528 if (cur->nat_rule.ptr != NULL) { 1529 if (--cur->nat_rule.ptr->states_cur <= 0 && 1530 cur->nat_rule.ptr->src_nodes <= 0) { 1531 pf_rm_rule(NULL, cur->nat_rule.ptr); 1532 } 1533 } 1534 if (cur->anchor.ptr != NULL) { 1535 if (--cur->anchor.ptr->states_cur <= 0) 1536 pf_rm_rule(NULL, cur->anchor.ptr); 1537 } 1538 pf_normalize_tcp_cleanup(cur); 1539 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1540 1541 /* 1542 * We may be freeing pf_purge_expired_states()'s saved scan entry, 1543 * adjust it if necessary. 1544 */ 1545 if (purge_cur[cpu] == cur) { 1546 kprintf("PURGE CONFLICT\n"); 1547 purge_cur[cpu] = TAILQ_NEXT(purge_cur[cpu], entry_list); 1548 } 1549 TAILQ_REMOVE(&state_list[cpu], cur, entry_list); 1550 if (cur->tag) 1551 pf_tag_unref(cur->tag); 1552 kfree(cur, M_PFSTATEPL); 1553 PF_INC_FCOUNTER(FCNT_STATE_REMOVALS); 1554 atomic_add_int(&pf_status.states, -1); 1555 } 1556 1557 int 1558 pf_purge_expired_states(u_int32_t maxcheck, int waslocked) 1559 { 1560 struct pf_state *cur; 1561 int locked = waslocked; 1562 int cpu = mycpu->gd_cpuid; 1563 1564 while (maxcheck--) { 1565 /* 1566 * Wrap to start of list when we hit the end 1567 */ 1568 cur = purge_cur[cpu]; 1569 if (cur == NULL) { 1570 cur = TAILQ_FIRST(&state_list[cpu]); 1571 if (cur == NULL) 1572 break; /* list empty */ 1573 } 1574 1575 /* 1576 * Setup next (purge_cur) while we process this one. If 1577 * we block and something else deletes purge_cur, 1578 * pf_free_state() will adjust it further ahead. 1579 */ 1580 purge_cur[cpu] = TAILQ_NEXT(cur, entry_list); 1581 1582 if (cur->timeout == PFTM_UNLINKED) { 1583 /* free unlinked state */ 1584 if (! locked) { 1585 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1586 locked = 1; 1587 } 1588 pf_free_state(cur); 1589 } else if (pf_state_expires(cur) <= time_second) { 1590 /* unlink and free expired state */ 1591 pf_unlink_state(cur); 1592 if (! locked) { 1593 if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE)) 1594 return (0); 1595 locked = 1; 1596 } 1597 pf_free_state(cur); 1598 } 1599 } 1600 1601 if (locked) 1602 lockmgr(&pf_consistency_lock, LK_RELEASE); 1603 return (1); 1604 } 1605 1606 int 1607 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1608 { 1609 if (aw->type != PF_ADDR_TABLE) 1610 return (0); 1611 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) 1612 return (1); 1613 return (0); 1614 } 1615 1616 void 1617 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1618 { 1619 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1620 return; 1621 pfr_detach_table(aw->p.tbl); 1622 aw->p.tbl = NULL; 1623 } 1624 1625 void 1626 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1627 { 1628 struct pfr_ktable *kt = aw->p.tbl; 1629 1630 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1631 return; 1632 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1633 kt = kt->pfrkt_root; 1634 aw->p.tbl = NULL; 1635 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1636 kt->pfrkt_cnt : -1; 1637 } 1638 1639 void 1640 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1641 { 1642 switch (af) { 1643 #ifdef INET 1644 case AF_INET: { 1645 u_int32_t a = ntohl(addr->addr32[0]); 1646 kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1647 (a>>8)&255, a&255); 1648 if (p) { 1649 p = ntohs(p); 1650 kprintf(":%u", p); 1651 } 1652 break; 1653 } 1654 #endif /* INET */ 1655 #ifdef INET6 1656 case AF_INET6: { 1657 u_int16_t b; 1658 u_int8_t i, curstart, curend, maxstart, maxend; 1659 curstart = curend = maxstart = maxend = 255; 1660 for (i = 0; i < 8; i++) { 1661 if (!addr->addr16[i]) { 1662 if (curstart == 255) 1663 curstart = i; 1664 curend = i; 1665 } else { 1666 if ((curend - curstart) > 1667 (maxend - maxstart)) { 1668 maxstart = curstart; 1669 maxend = curend; 1670 } 1671 curstart = curend = 255; 1672 } 1673 } 1674 if ((curend - curstart) > 1675 (maxend - maxstart)) { 1676 maxstart = curstart; 1677 maxend = curend; 1678 } 1679 for (i = 0; i < 8; i++) { 1680 if (i >= maxstart && i <= maxend) { 1681 if (i == 0) 1682 kprintf(":"); 1683 if (i == maxend) 1684 kprintf(":"); 1685 } else { 1686 b = ntohs(addr->addr16[i]); 1687 kprintf("%x", b); 1688 if (i < 7) 1689 kprintf(":"); 1690 } 1691 } 1692 if (p) { 1693 p = ntohs(p); 1694 kprintf("[%u]", p); 1695 } 1696 break; 1697 } 1698 #endif /* INET6 */ 1699 } 1700 } 1701 1702 void 1703 pf_print_state(struct pf_state *s) 1704 { 1705 pf_print_state_parts(s, NULL, NULL); 1706 } 1707 1708 void 1709 pf_print_state_parts(struct pf_state *s, 1710 struct pf_state_key *skwp, struct pf_state_key *sksp) 1711 { 1712 struct pf_state_key *skw, *sks; 1713 u_int8_t proto, dir; 1714 1715 /* Do our best to fill these, but they're skipped if NULL */ 1716 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1717 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1718 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1719 dir = s ? s->direction : 0; 1720 1721 switch (proto) { 1722 case IPPROTO_TCP: 1723 kprintf("TCP "); 1724 break; 1725 case IPPROTO_UDP: 1726 kprintf("UDP "); 1727 break; 1728 case IPPROTO_ICMP: 1729 kprintf("ICMP "); 1730 break; 1731 case IPPROTO_ICMPV6: 1732 kprintf("ICMPV6 "); 1733 break; 1734 default: 1735 kprintf("%u ", skw->proto); 1736 break; 1737 } 1738 switch (dir) { 1739 case PF_IN: 1740 kprintf(" in"); 1741 break; 1742 case PF_OUT: 1743 kprintf(" out"); 1744 break; 1745 } 1746 if (skw) { 1747 kprintf(" wire: "); 1748 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1749 kprintf(" "); 1750 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1751 } 1752 if (sks) { 1753 kprintf(" stack: "); 1754 if (sks != skw) { 1755 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1756 kprintf(" "); 1757 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1758 } else 1759 kprintf("-"); 1760 } 1761 if (s) { 1762 if (proto == IPPROTO_TCP) { 1763 kprintf(" [lo=%u high=%u win=%u modulator=%u", 1764 s->src.seqlo, s->src.seqhi, 1765 s->src.max_win, s->src.seqdiff); 1766 if (s->src.wscale && s->dst.wscale) 1767 kprintf(" wscale=%u", 1768 s->src.wscale & PF_WSCALE_MASK); 1769 kprintf("]"); 1770 kprintf(" [lo=%u high=%u win=%u modulator=%u", 1771 s->dst.seqlo, s->dst.seqhi, 1772 s->dst.max_win, s->dst.seqdiff); 1773 if (s->src.wscale && s->dst.wscale) 1774 kprintf(" wscale=%u", 1775 s->dst.wscale & PF_WSCALE_MASK); 1776 kprintf("]"); 1777 } 1778 kprintf(" %u:%u", s->src.state, s->dst.state); 1779 } 1780 } 1781 1782 void 1783 pf_print_flags(u_int8_t f) 1784 { 1785 if (f) 1786 kprintf(" "); 1787 if (f & TH_FIN) 1788 kprintf("F"); 1789 if (f & TH_SYN) 1790 kprintf("S"); 1791 if (f & TH_RST) 1792 kprintf("R"); 1793 if (f & TH_PUSH) 1794 kprintf("P"); 1795 if (f & TH_ACK) 1796 kprintf("A"); 1797 if (f & TH_URG) 1798 kprintf("U"); 1799 if (f & TH_ECE) 1800 kprintf("E"); 1801 if (f & TH_CWR) 1802 kprintf("W"); 1803 } 1804 1805 #define PF_SET_SKIP_STEPS(i) \ 1806 do { \ 1807 while (head[i] != cur) { \ 1808 head[i]->skip[i].ptr = cur; \ 1809 head[i] = TAILQ_NEXT(head[i], entries); \ 1810 } \ 1811 } while (0) 1812 1813 void 1814 pf_calc_skip_steps(struct pf_rulequeue *rules) 1815 { 1816 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1817 int i; 1818 1819 cur = TAILQ_FIRST(rules); 1820 prev = cur; 1821 for (i = 0; i < PF_SKIP_COUNT; ++i) 1822 head[i] = cur; 1823 while (cur != NULL) { 1824 1825 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1826 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1827 if (cur->direction != prev->direction) 1828 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1829 if (cur->af != prev->af) 1830 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1831 if (cur->proto != prev->proto) 1832 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1833 if (cur->src.neg != prev->src.neg || 1834 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1835 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1836 if (cur->src.port[0] != prev->src.port[0] || 1837 cur->src.port[1] != prev->src.port[1] || 1838 cur->src.port_op != prev->src.port_op) 1839 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1840 if (cur->dst.neg != prev->dst.neg || 1841 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1842 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1843 if (cur->dst.port[0] != prev->dst.port[0] || 1844 cur->dst.port[1] != prev->dst.port[1] || 1845 cur->dst.port_op != prev->dst.port_op) 1846 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1847 1848 prev = cur; 1849 cur = TAILQ_NEXT(cur, entries); 1850 } 1851 for (i = 0; i < PF_SKIP_COUNT; ++i) 1852 PF_SET_SKIP_STEPS(i); 1853 } 1854 1855 int 1856 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1857 { 1858 if (aw1->type != aw2->type) 1859 return (1); 1860 switch (aw1->type) { 1861 case PF_ADDR_ADDRMASK: 1862 case PF_ADDR_RANGE: 1863 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1864 return (1); 1865 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1866 return (1); 1867 return (0); 1868 case PF_ADDR_DYNIFTL: 1869 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1870 case PF_ADDR_NOROUTE: 1871 case PF_ADDR_URPFFAILED: 1872 return (0); 1873 case PF_ADDR_TABLE: 1874 return (aw1->p.tbl != aw2->p.tbl); 1875 case PF_ADDR_RTLABEL: 1876 return (aw1->v.rtlabel != aw2->v.rtlabel); 1877 default: 1878 kprintf("invalid address type: %d\n", aw1->type); 1879 return (1); 1880 } 1881 } 1882 1883 u_int16_t 1884 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) 1885 { 1886 u_int32_t l; 1887 1888 if (udp && !cksum) 1889 return (0x0000); 1890 l = cksum + old - new; 1891 l = (l >> 16) + (l & 65535); 1892 l = l & 65535; 1893 if (udp && !l) 1894 return (0xFFFF); 1895 return (l); 1896 } 1897 1898 void 1899 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, 1900 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) 1901 { 1902 struct pf_addr ao; 1903 u_int16_t po = *p; 1904 1905 PF_ACPY(&ao, a, af); 1906 PF_ACPY(a, an, af); 1907 1908 *p = pn; 1909 1910 switch (af) { 1911 #ifdef INET 1912 case AF_INET: 1913 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 1914 ao.addr16[0], an->addr16[0], 0), 1915 ao.addr16[1], an->addr16[1], 0); 1916 *p = pn; 1917 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1918 ao.addr16[0], an->addr16[0], u), 1919 ao.addr16[1], an->addr16[1], u), 1920 po, pn, u); 1921 break; 1922 #endif /* INET */ 1923 #ifdef INET6 1924 case AF_INET6: 1925 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1926 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1927 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1928 ao.addr16[0], an->addr16[0], u), 1929 ao.addr16[1], an->addr16[1], u), 1930 ao.addr16[2], an->addr16[2], u), 1931 ao.addr16[3], an->addr16[3], u), 1932 ao.addr16[4], an->addr16[4], u), 1933 ao.addr16[5], an->addr16[5], u), 1934 ao.addr16[6], an->addr16[6], u), 1935 ao.addr16[7], an->addr16[7], u), 1936 po, pn, u); 1937 break; 1938 #endif /* INET6 */ 1939 } 1940 } 1941 1942 1943 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ 1944 void 1945 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) 1946 { 1947 u_int32_t ao; 1948 1949 memcpy(&ao, a, sizeof(ao)); 1950 memcpy(a, &an, sizeof(u_int32_t)); 1951 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), 1952 ao % 65536, an % 65536, u); 1953 } 1954 1955 #ifdef INET6 1956 void 1957 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) 1958 { 1959 struct pf_addr ao; 1960 1961 PF_ACPY(&ao, a, AF_INET6); 1962 PF_ACPY(a, an, AF_INET6); 1963 1964 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1965 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1966 pf_cksum_fixup(pf_cksum_fixup(*c, 1967 ao.addr16[0], an->addr16[0], u), 1968 ao.addr16[1], an->addr16[1], u), 1969 ao.addr16[2], an->addr16[2], u), 1970 ao.addr16[3], an->addr16[3], u), 1971 ao.addr16[4], an->addr16[4], u), 1972 ao.addr16[5], an->addr16[5], u), 1973 ao.addr16[6], an->addr16[6], u), 1974 ao.addr16[7], an->addr16[7], u); 1975 } 1976 #endif /* INET6 */ 1977 1978 void 1979 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, 1980 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, 1981 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) 1982 { 1983 struct pf_addr oia, ooa; 1984 1985 PF_ACPY(&oia, ia, af); 1986 if (oa) 1987 PF_ACPY(&ooa, oa, af); 1988 1989 /* Change inner protocol port, fix inner protocol checksum. */ 1990 if (ip != NULL) { 1991 u_int16_t oip = *ip; 1992 u_int32_t opc = 0; 1993 1994 if (pc != NULL) 1995 opc = *pc; 1996 *ip = np; 1997 if (pc != NULL) 1998 *pc = pf_cksum_fixup(*pc, oip, *ip, u); 1999 *ic = pf_cksum_fixup(*ic, oip, *ip, 0); 2000 if (pc != NULL) 2001 *ic = pf_cksum_fixup(*ic, opc, *pc, 0); 2002 } 2003 /* Change inner ip address, fix inner ip and icmp checksums. */ 2004 PF_ACPY(ia, na, af); 2005 switch (af) { 2006 #ifdef INET 2007 case AF_INET: { 2008 u_int32_t oh2c = *h2c; 2009 2010 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, 2011 oia.addr16[0], ia->addr16[0], 0), 2012 oia.addr16[1], ia->addr16[1], 0); 2013 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 2014 oia.addr16[0], ia->addr16[0], 0), 2015 oia.addr16[1], ia->addr16[1], 0); 2016 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); 2017 break; 2018 } 2019 #endif /* INET */ 2020 #ifdef INET6 2021 case AF_INET6: 2022 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2023 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2024 pf_cksum_fixup(pf_cksum_fixup(*ic, 2025 oia.addr16[0], ia->addr16[0], u), 2026 oia.addr16[1], ia->addr16[1], u), 2027 oia.addr16[2], ia->addr16[2], u), 2028 oia.addr16[3], ia->addr16[3], u), 2029 oia.addr16[4], ia->addr16[4], u), 2030 oia.addr16[5], ia->addr16[5], u), 2031 oia.addr16[6], ia->addr16[6], u), 2032 oia.addr16[7], ia->addr16[7], u); 2033 break; 2034 #endif /* INET6 */ 2035 } 2036 /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ 2037 if (oa) { 2038 PF_ACPY(oa, na, af); 2039 switch (af) { 2040 #ifdef INET 2041 case AF_INET: 2042 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, 2043 ooa.addr16[0], oa->addr16[0], 0), 2044 ooa.addr16[1], oa->addr16[1], 0); 2045 break; 2046 #endif /* INET */ 2047 #ifdef INET6 2048 case AF_INET6: 2049 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2050 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2051 pf_cksum_fixup(pf_cksum_fixup(*ic, 2052 ooa.addr16[0], oa->addr16[0], u), 2053 ooa.addr16[1], oa->addr16[1], u), 2054 ooa.addr16[2], oa->addr16[2], u), 2055 ooa.addr16[3], oa->addr16[3], u), 2056 ooa.addr16[4], oa->addr16[4], u), 2057 ooa.addr16[5], oa->addr16[5], u), 2058 ooa.addr16[6], oa->addr16[6], u), 2059 ooa.addr16[7], oa->addr16[7], u); 2060 break; 2061 #endif /* INET6 */ 2062 } 2063 } 2064 } 2065 2066 2067 /* 2068 * Need to modulate the sequence numbers in the TCP SACK option 2069 * (credits to Krzysztof Pfaff for report and patch) 2070 */ 2071 int 2072 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, 2073 struct tcphdr *th, struct pf_state_peer *dst) 2074 { 2075 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; 2076 u_int8_t opts[TCP_MAXOLEN], *opt = opts; 2077 int copyback = 0, i, olen; 2078 struct raw_sackblock sack; 2079 2080 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) 2081 if (hlen < TCPOLEN_SACKLEN || 2082 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) 2083 return 0; 2084 2085 while (hlen >= TCPOLEN_SACKLEN) { 2086 olen = opt[1]; 2087 switch (*opt) { 2088 case TCPOPT_EOL: /* FALLTHROUGH */ 2089 case TCPOPT_NOP: 2090 opt++; 2091 hlen--; 2092 break; 2093 case TCPOPT_SACK: 2094 if (olen > hlen) 2095 olen = hlen; 2096 if (olen >= TCPOLEN_SACKLEN) { 2097 for (i = 2; i + TCPOLEN_SACK <= olen; 2098 i += TCPOLEN_SACK) { 2099 memcpy(&sack, &opt[i], sizeof(sack)); 2100 pf_change_a(&sack.rblk_start, &th->th_sum, 2101 htonl(ntohl(sack.rblk_start) - 2102 dst->seqdiff), 0); 2103 pf_change_a(&sack.rblk_end, &th->th_sum, 2104 htonl(ntohl(sack.rblk_end) - 2105 dst->seqdiff), 0); 2106 memcpy(&opt[i], &sack, sizeof(sack)); 2107 } 2108 copyback = 1; 2109 } 2110 /* FALLTHROUGH */ 2111 default: 2112 if (olen < 2) 2113 olen = 2; 2114 hlen -= olen; 2115 opt += olen; 2116 } 2117 } 2118 2119 if (copyback) 2120 m_copyback(m, off + sizeof(*th), thoptlen, opts); 2121 return (copyback); 2122 } 2123 2124 void 2125 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2126 const struct pf_addr *saddr, const struct pf_addr *daddr, 2127 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2128 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2129 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) 2130 { 2131 struct mbuf *m; 2132 int len = 0, tlen; 2133 #ifdef INET 2134 struct ip *h = NULL; 2135 #endif /* INET */ 2136 #ifdef INET6 2137 struct ip6_hdr *h6 = NULL; 2138 #endif /* INET6 */ 2139 struct tcphdr *th = NULL; 2140 char *opt; 2141 2142 ASSERT_LWKT_TOKEN_HELD(&pf_token); 2143 2144 /* maximum segment size tcp option */ 2145 tlen = sizeof(struct tcphdr); 2146 if (mss) 2147 tlen += 4; 2148 2149 switch (af) { 2150 #ifdef INET 2151 case AF_INET: 2152 len = sizeof(struct ip) + tlen; 2153 break; 2154 #endif /* INET */ 2155 #ifdef INET6 2156 case AF_INET6: 2157 len = sizeof(struct ip6_hdr) + tlen; 2158 break; 2159 #endif /* INET6 */ 2160 } 2161 2162 /* 2163 * Create outgoing mbuf. 2164 * 2165 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 2166 * so make sure pf.flags is clear. 2167 */ 2168 m = m_gethdr(M_NOWAIT, MT_HEADER); 2169 if (m == NULL) { 2170 return; 2171 } 2172 if (tag) 2173 m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; 2174 m->m_pkthdr.pf.flags = 0; 2175 m->m_pkthdr.pf.tag = rtag; 2176 /* XXX Recheck when upgrading to > 4.4 */ 2177 m->m_pkthdr.pf.statekey = NULL; 2178 if (r != NULL && r->rtableid >= 0) 2179 m->m_pkthdr.pf.rtableid = r->rtableid; 2180 2181 #ifdef ALTQ 2182 if (r != NULL && r->qid) { 2183 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 2184 m->m_pkthdr.pf.qid = r->qid; 2185 m->m_pkthdr.pf.ecn_af = af; 2186 m->m_pkthdr.pf.hdr = mtod(m, struct ip *); 2187 } 2188 #endif /* ALTQ */ 2189 m->m_data += max_linkhdr; 2190 m->m_pkthdr.len = m->m_len = len; 2191 m->m_pkthdr.rcvif = NULL; 2192 bzero(m->m_data, len); 2193 switch (af) { 2194 #ifdef INET 2195 case AF_INET: 2196 h = mtod(m, struct ip *); 2197 2198 /* IP header fields included in the TCP checksum */ 2199 h->ip_p = IPPROTO_TCP; 2200 h->ip_len = htons(tlen); 2201 h->ip_src.s_addr = saddr->v4.s_addr; 2202 h->ip_dst.s_addr = daddr->v4.s_addr; 2203 2204 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2205 break; 2206 #endif /* INET */ 2207 #ifdef INET6 2208 case AF_INET6: 2209 h6 = mtod(m, struct ip6_hdr *); 2210 2211 /* IP header fields included in the TCP checksum */ 2212 h6->ip6_nxt = IPPROTO_TCP; 2213 h6->ip6_plen = htons(tlen); 2214 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2215 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2216 2217 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2218 break; 2219 #endif /* INET6 */ 2220 } 2221 2222 /* TCP header */ 2223 th->th_sport = sport; 2224 th->th_dport = dport; 2225 th->th_seq = htonl(seq); 2226 th->th_ack = htonl(ack); 2227 th->th_off = tlen >> 2; 2228 th->th_flags = flags; 2229 th->th_win = htons(win); 2230 2231 if (mss) { 2232 opt = (char *)(th + 1); 2233 opt[0] = TCPOPT_MAXSEG; 2234 opt[1] = 4; 2235 mss = htons(mss); 2236 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); 2237 } 2238 2239 switch (af) { 2240 #ifdef INET 2241 case AF_INET: 2242 /* TCP checksum */ 2243 th->th_sum = in_cksum(m, len); 2244 2245 /* Finish the IP header */ 2246 h->ip_v = 4; 2247 h->ip_hl = sizeof(*h) >> 2; 2248 h->ip_tos = IPTOS_LOWDELAY; 2249 h->ip_len = htons(len); 2250 h->ip_off = path_mtu_discovery ? htons(IP_DF) : 0; 2251 h->ip_ttl = ttl ? ttl : ip_defttl; 2252 h->ip_sum = 0; 2253 if (eh == NULL) { 2254 lwkt_reltoken(&pf_token); 2255 ip_output(m, NULL, NULL, 0, NULL, NULL); 2256 lwkt_gettoken(&pf_token); 2257 } else { 2258 struct route ro; 2259 struct rtentry rt; 2260 struct ether_header *e = (void *)ro.ro_dst.sa_data; 2261 2262 if (ifp == NULL) { 2263 m_freem(m); 2264 return; 2265 } 2266 rt.rt_ifp = ifp; 2267 ro.ro_rt = &rt; 2268 ro.ro_dst.sa_len = sizeof(ro.ro_dst); 2269 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; 2270 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); 2271 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); 2272 e->ether_type = eh->ether_type; 2273 /* XXX_IMPORT: later */ 2274 lwkt_reltoken(&pf_token); 2275 ip_output(m, NULL, &ro, 0, NULL, NULL); 2276 lwkt_gettoken(&pf_token); 2277 } 2278 break; 2279 #endif /* INET */ 2280 #ifdef INET6 2281 case AF_INET6: 2282 /* TCP checksum */ 2283 th->th_sum = in6_cksum(m, IPPROTO_TCP, 2284 sizeof(struct ip6_hdr), tlen); 2285 2286 h6->ip6_vfc |= IPV6_VERSION; 2287 h6->ip6_hlim = IPV6_DEFHLIM; 2288 2289 lwkt_reltoken(&pf_token); 2290 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 2291 lwkt_gettoken(&pf_token); 2292 break; 2293 #endif /* INET6 */ 2294 } 2295 } 2296 2297 void 2298 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, 2299 struct pf_rule *r) 2300 { 2301 struct mbuf *m0; 2302 2303 /* 2304 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 2305 * so make sure pf.flags is clear. 2306 */ 2307 if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) 2308 return; 2309 2310 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; 2311 m0->m_pkthdr.pf.flags = 0; 2312 /* XXX Re-Check when Upgrading to > 4.4 */ 2313 m0->m_pkthdr.pf.statekey = NULL; 2314 2315 if (r->rtableid >= 0) 2316 m0->m_pkthdr.pf.rtableid = r->rtableid; 2317 2318 #ifdef ALTQ 2319 if (r->qid) { 2320 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 2321 m0->m_pkthdr.pf.qid = r->qid; 2322 m0->m_pkthdr.pf.ecn_af = af; 2323 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); 2324 } 2325 #endif /* ALTQ */ 2326 2327 switch (af) { 2328 #ifdef INET 2329 case AF_INET: 2330 icmp_error(m0, type, code, 0, 0); 2331 break; 2332 #endif /* INET */ 2333 #ifdef INET6 2334 case AF_INET6: 2335 icmp6_error(m0, type, code, 0); 2336 break; 2337 #endif /* INET6 */ 2338 } 2339 } 2340 2341 /* 2342 * Return 1 if the addresses a and b match (with mask m), otherwise return 0. 2343 * If n is 0, they match if they are equal. If n is != 0, they match if they 2344 * are different. 2345 */ 2346 int 2347 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2348 struct pf_addr *b, sa_family_t af) 2349 { 2350 int match = 0; 2351 2352 switch (af) { 2353 #ifdef INET 2354 case AF_INET: 2355 if ((a->addr32[0] & m->addr32[0]) == 2356 (b->addr32[0] & m->addr32[0])) 2357 match++; 2358 break; 2359 #endif /* INET */ 2360 #ifdef INET6 2361 case AF_INET6: 2362 if (((a->addr32[0] & m->addr32[0]) == 2363 (b->addr32[0] & m->addr32[0])) && 2364 ((a->addr32[1] & m->addr32[1]) == 2365 (b->addr32[1] & m->addr32[1])) && 2366 ((a->addr32[2] & m->addr32[2]) == 2367 (b->addr32[2] & m->addr32[2])) && 2368 ((a->addr32[3] & m->addr32[3]) == 2369 (b->addr32[3] & m->addr32[3]))) 2370 match++; 2371 break; 2372 #endif /* INET6 */ 2373 } 2374 if (match) { 2375 if (n) 2376 return (0); 2377 else 2378 return (1); 2379 } else { 2380 if (n) 2381 return (1); 2382 else 2383 return (0); 2384 } 2385 } 2386 2387 /* 2388 * Return 1 if b <= a <= e, otherwise return 0. 2389 */ 2390 int 2391 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2392 struct pf_addr *a, sa_family_t af) 2393 { 2394 switch (af) { 2395 #ifdef INET 2396 case AF_INET: 2397 if ((a->addr32[0] < b->addr32[0]) || 2398 (a->addr32[0] > e->addr32[0])) 2399 return (0); 2400 break; 2401 #endif /* INET */ 2402 #ifdef INET6 2403 case AF_INET6: { 2404 int i; 2405 2406 /* check a >= b */ 2407 for (i = 0; i < 4; ++i) 2408 if (a->addr32[i] > b->addr32[i]) 2409 break; 2410 else if (a->addr32[i] < b->addr32[i]) 2411 return (0); 2412 /* check a <= e */ 2413 for (i = 0; i < 4; ++i) 2414 if (a->addr32[i] < e->addr32[i]) 2415 break; 2416 else if (a->addr32[i] > e->addr32[i]) 2417 return (0); 2418 break; 2419 } 2420 #endif /* INET6 */ 2421 } 2422 return (1); 2423 } 2424 2425 int 2426 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 2427 { 2428 switch (op) { 2429 case PF_OP_IRG: 2430 return ((p > a1) && (p < a2)); 2431 case PF_OP_XRG: 2432 return ((p < a1) || (p > a2)); 2433 case PF_OP_RRG: 2434 return ((p >= a1) && (p <= a2)); 2435 case PF_OP_EQ: 2436 return (p == a1); 2437 case PF_OP_NE: 2438 return (p != a1); 2439 case PF_OP_LT: 2440 return (p < a1); 2441 case PF_OP_LE: 2442 return (p <= a1); 2443 case PF_OP_GT: 2444 return (p > a1); 2445 case PF_OP_GE: 2446 return (p >= a1); 2447 } 2448 return (0); /* never reached */ 2449 } 2450 2451 int 2452 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 2453 { 2454 a1 = ntohs(a1); 2455 a2 = ntohs(a2); 2456 p = ntohs(p); 2457 return (pf_match(op, a1, a2, p)); 2458 } 2459 2460 int 2461 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 2462 { 2463 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2464 return (0); 2465 return (pf_match(op, a1, a2, u)); 2466 } 2467 2468 int 2469 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 2470 { 2471 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2472 return (0); 2473 return (pf_match(op, a1, a2, g)); 2474 } 2475 2476 int 2477 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 2478 { 2479 if (*tag == -1) 2480 *tag = m->m_pkthdr.pf.tag; 2481 2482 return ((!r->match_tag_not && r->match_tag == *tag) || 2483 (r->match_tag_not && r->match_tag != *tag)); 2484 } 2485 2486 int 2487 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 2488 { 2489 if (tag <= 0 && rtableid < 0) 2490 return (0); 2491 2492 if (tag > 0) 2493 m->m_pkthdr.pf.tag = tag; 2494 if (rtableid >= 0) 2495 m->m_pkthdr.pf.rtableid = rtableid; 2496 2497 return (0); 2498 } 2499 2500 void 2501 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, 2502 struct pf_rule **r, struct pf_rule **a, int *match) 2503 { 2504 struct pf_anchor_stackframe *f; 2505 2506 (*r)->anchor->match = 0; 2507 if (match) 2508 *match = 0; 2509 if (*depth >= NELEM(pf_anchor_stack)) { 2510 kprintf("pf_step_into_anchor: stack overflow\n"); 2511 *r = TAILQ_NEXT(*r, entries); 2512 return; 2513 } else if (*depth == 0 && a != NULL) 2514 *a = *r; 2515 f = pf_anchor_stack + (*depth)++; 2516 f->rs = *rs; 2517 f->r = *r; 2518 if ((*r)->anchor_wildcard) { 2519 f->parent = &(*r)->anchor->children; 2520 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == 2521 NULL) { 2522 *r = NULL; 2523 return; 2524 } 2525 *rs = &f->child->ruleset; 2526 } else { 2527 f->parent = NULL; 2528 f->child = NULL; 2529 *rs = &(*r)->anchor->ruleset; 2530 } 2531 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 2532 } 2533 2534 int 2535 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, 2536 struct pf_rule **r, struct pf_rule **a, int *match) 2537 { 2538 struct pf_anchor_stackframe *f; 2539 int quick = 0; 2540 2541 do { 2542 if (*depth <= 0) 2543 break; 2544 f = pf_anchor_stack + *depth - 1; 2545 if (f->parent != NULL && f->child != NULL) { 2546 if (f->child->match || 2547 (match != NULL && *match)) { 2548 f->r->anchor->match = 1; 2549 *match = 0; 2550 } 2551 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); 2552 if (f->child != NULL) { 2553 *rs = &f->child->ruleset; 2554 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 2555 if (*r == NULL) 2556 continue; 2557 else 2558 break; 2559 } 2560 } 2561 (*depth)--; 2562 if (*depth == 0 && a != NULL) 2563 *a = NULL; 2564 *rs = f->rs; 2565 if (f->r->anchor->match || (match != NULL && *match)) 2566 quick = f->r->quick; 2567 *r = TAILQ_NEXT(f->r, entries); 2568 } while (*r == NULL); 2569 2570 return (quick); 2571 } 2572 2573 #ifdef INET6 2574 void 2575 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 2576 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 2577 { 2578 switch (af) { 2579 #ifdef INET 2580 case AF_INET: 2581 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2582 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2583 break; 2584 #endif /* INET */ 2585 case AF_INET6: 2586 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2587 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2588 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 2589 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 2590 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 2591 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 2592 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 2593 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 2594 break; 2595 } 2596 } 2597 2598 void 2599 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 2600 { 2601 switch (af) { 2602 #ifdef INET 2603 case AF_INET: 2604 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 2605 break; 2606 #endif /* INET */ 2607 case AF_INET6: 2608 if (addr->addr32[3] == 0xffffffff) { 2609 addr->addr32[3] = 0; 2610 if (addr->addr32[2] == 0xffffffff) { 2611 addr->addr32[2] = 0; 2612 if (addr->addr32[1] == 0xffffffff) { 2613 addr->addr32[1] = 0; 2614 addr->addr32[0] = 2615 htonl(ntohl(addr->addr32[0]) + 1); 2616 } else 2617 addr->addr32[1] = 2618 htonl(ntohl(addr->addr32[1]) + 1); 2619 } else 2620 addr->addr32[2] = 2621 htonl(ntohl(addr->addr32[2]) + 1); 2622 } else 2623 addr->addr32[3] = 2624 htonl(ntohl(addr->addr32[3]) + 1); 2625 break; 2626 } 2627 } 2628 #endif /* INET6 */ 2629 2630 #define mix(a,b,c) \ 2631 do { \ 2632 a -= b; a -= c; a ^= (c >> 13); \ 2633 b -= c; b -= a; b ^= (a << 8); \ 2634 c -= a; c -= b; c ^= (b >> 13); \ 2635 a -= b; a -= c; a ^= (c >> 12); \ 2636 b -= c; b -= a; b ^= (a << 16); \ 2637 c -= a; c -= b; c ^= (b >> 5); \ 2638 a -= b; a -= c; a ^= (c >> 3); \ 2639 b -= c; b -= a; b ^= (a << 10); \ 2640 c -= a; c -= b; c ^= (b >> 15); \ 2641 } while (0) 2642 2643 /* 2644 * hash function based on bridge_hash in if_bridge.c 2645 */ 2646 void 2647 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 2648 struct pf_poolhashkey *key, sa_family_t af) 2649 { 2650 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; 2651 2652 switch (af) { 2653 #ifdef INET 2654 case AF_INET: 2655 a += inaddr->addr32[0]; 2656 b += key->key32[1]; 2657 mix(a, b, c); 2658 hash->addr32[0] = c + key->key32[2]; 2659 break; 2660 #endif /* INET */ 2661 #ifdef INET6 2662 case AF_INET6: 2663 a += inaddr->addr32[0]; 2664 b += inaddr->addr32[2]; 2665 mix(a, b, c); 2666 hash->addr32[0] = c; 2667 a += inaddr->addr32[1]; 2668 b += inaddr->addr32[3]; 2669 c += key->key32[1]; 2670 mix(a, b, c); 2671 hash->addr32[1] = c; 2672 a += inaddr->addr32[2]; 2673 b += inaddr->addr32[1]; 2674 c += key->key32[2]; 2675 mix(a, b, c); 2676 hash->addr32[2] = c; 2677 a += inaddr->addr32[3]; 2678 b += inaddr->addr32[0]; 2679 c += key->key32[3]; 2680 mix(a, b, c); 2681 hash->addr32[3] = c; 2682 break; 2683 #endif /* INET6 */ 2684 } 2685 } 2686 2687 int 2688 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 2689 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) 2690 { 2691 unsigned char hash[16]; 2692 struct pf_pool *rpool = &r->rpool; 2693 struct pf_pooladdr *acur = rpool->cur; 2694 struct pf_pooladdr *cur; 2695 struct pf_addr *raddr; 2696 struct pf_addr *rmask; 2697 struct pf_addr counter; 2698 struct pf_src_node k; 2699 int cpu = mycpu->gd_cpuid; 2700 int tblidx; 2701 2702 bzero(hash, sizeof(hash)); /* avoid gcc warnings */ 2703 2704 /* 2705 * NOTE! rpool->cur and rpool->tblidx can be iterators and thus 2706 * may represent a SMP race due to the shared nature of the 2707 * rpool structure. We allow the race and ensure that updates 2708 * do not create a fatal condition. 2709 */ 2710 cpu_ccfence(); 2711 cur = acur; 2712 raddr = &cur->addr.v.a.addr; 2713 rmask = &cur->addr.v.a.mask; 2714 2715 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && 2716 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2717 k.af = af; 2718 PF_ACPY(&k.addr, saddr, af); 2719 if (r->rule_flag & PFRULE_RULESRCTRACK || 2720 r->rpool.opts & PF_POOL_STICKYADDR) 2721 k.rule.ptr = r; 2722 else 2723 k.rule.ptr = NULL; 2724 PF_INC_SCOUNTER(SCNT_SRC_NODE_SEARCH); 2725 *sn = RB_FIND(pf_src_tree, &tree_src_tracking[cpu], &k); 2726 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { 2727 PF_ACPY(naddr, &(*sn)->raddr, af); 2728 if (pf_status.debug >= PF_DEBUG_MISC) { 2729 kprintf("pf_map_addr: src tracking maps "); 2730 pf_print_host(&k.addr, 0, af); 2731 kprintf(" to "); 2732 pf_print_host(naddr, 0, af); 2733 kprintf("\n"); 2734 } 2735 return (0); 2736 } 2737 } 2738 2739 if (cur->addr.type == PF_ADDR_NOROUTE) 2740 return (1); 2741 if (cur->addr.type == PF_ADDR_DYNIFTL) { 2742 switch (af) { 2743 #ifdef INET 2744 case AF_INET: 2745 if (cur->addr.p.dyn->pfid_acnt4 < 1 && 2746 (rpool->opts & PF_POOL_TYPEMASK) != 2747 PF_POOL_ROUNDROBIN) 2748 return (1); 2749 raddr = &cur->addr.p.dyn->pfid_addr4; 2750 rmask = &cur->addr.p.dyn->pfid_mask4; 2751 break; 2752 #endif /* INET */ 2753 #ifdef INET6 2754 case AF_INET6: 2755 if (cur->addr.p.dyn->pfid_acnt6 < 1 && 2756 (rpool->opts & PF_POOL_TYPEMASK) != 2757 PF_POOL_ROUNDROBIN) 2758 return (1); 2759 raddr = &cur->addr.p.dyn->pfid_addr6; 2760 rmask = &cur->addr.p.dyn->pfid_mask6; 2761 break; 2762 #endif /* INET6 */ 2763 } 2764 } else if (cur->addr.type == PF_ADDR_TABLE) { 2765 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) 2766 return (1); /* unsupported */ 2767 } else { 2768 raddr = &cur->addr.v.a.addr; 2769 rmask = &cur->addr.v.a.mask; 2770 } 2771 2772 switch (rpool->opts & PF_POOL_TYPEMASK) { 2773 case PF_POOL_NONE: 2774 PF_ACPY(naddr, raddr, af); 2775 break; 2776 case PF_POOL_BITMASK: 2777 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 2778 break; 2779 case PF_POOL_RANDOM: 2780 if (init_addr != NULL && PF_AZERO(init_addr, af)) { 2781 switch (af) { 2782 #ifdef INET 2783 case AF_INET: 2784 counter.addr32[0] = htonl(karc4random()); 2785 break; 2786 #endif /* INET */ 2787 #ifdef INET6 2788 case AF_INET6: 2789 if (rmask->addr32[3] != 0xffffffff) 2790 counter.addr32[3] = 2791 htonl(karc4random()); 2792 else 2793 break; 2794 if (rmask->addr32[2] != 0xffffffff) 2795 counter.addr32[2] = 2796 htonl(karc4random()); 2797 else 2798 break; 2799 if (rmask->addr32[1] != 0xffffffff) 2800 counter.addr32[1] = 2801 htonl(karc4random()); 2802 else 2803 break; 2804 if (rmask->addr32[0] != 0xffffffff) 2805 counter.addr32[0] = 2806 htonl(karc4random()); 2807 break; 2808 #endif /* INET6 */ 2809 } 2810 PF_POOLMASK(naddr, raddr, rmask, &counter, af); 2811 PF_ACPY(init_addr, naddr, af); 2812 2813 } else { 2814 counter = rpool->counter; 2815 cpu_ccfence(); 2816 PF_AINC(&counter, af); 2817 PF_POOLMASK(naddr, raddr, rmask, &counter, af); 2818 rpool->counter = counter; 2819 } 2820 break; 2821 case PF_POOL_SRCHASH: 2822 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 2823 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); 2824 break; 2825 case PF_POOL_ROUNDROBIN: 2826 tblidx = rpool->tblidx; 2827 counter = rpool->counter; 2828 if (cur->addr.type == PF_ADDR_TABLE) { 2829 if (!pfr_pool_get(cur->addr.p.tbl, 2830 &tblidx, &counter, 2831 &raddr, &rmask, af)) { 2832 goto get_addr; 2833 } 2834 } else if (cur->addr.type == PF_ADDR_DYNIFTL) { 2835 if (!pfr_pool_get(cur->addr.p.dyn->pfid_kt, 2836 &tblidx, &counter, 2837 &raddr, &rmask, af)) { 2838 goto get_addr; 2839 } 2840 } else if (pf_match_addr(0, raddr, rmask, 2841 &counter, af)) { 2842 goto get_addr; 2843 } 2844 2845 try_next: 2846 if ((cur = TAILQ_NEXT(cur, entries)) == NULL) 2847 cur = TAILQ_FIRST(&rpool->list); 2848 if (cur->addr.type == PF_ADDR_TABLE) { 2849 tblidx = -1; 2850 if (pfr_pool_get(cur->addr.p.tbl, 2851 &tblidx, &counter, 2852 &raddr, &rmask, af)) { 2853 /* table contains no address of type 'af' */ 2854 if (cur != acur) 2855 goto try_next; 2856 return (1); 2857 } 2858 } else if (cur->addr.type == PF_ADDR_DYNIFTL) { 2859 tblidx = -1; 2860 if (pfr_pool_get(cur->addr.p.dyn->pfid_kt, 2861 &tblidx, &counter, 2862 &raddr, &rmask, af)) { 2863 /* table contains no address of type 'af' */ 2864 if (cur != acur) 2865 goto try_next; 2866 return (1); 2867 } 2868 } else { 2869 raddr = &cur->addr.v.a.addr; 2870 rmask = &cur->addr.v.a.mask; 2871 PF_ACPY(&counter, raddr, af); 2872 } 2873 2874 get_addr: 2875 rpool->cur = cur; 2876 rpool->tblidx = tblidx; 2877 PF_ACPY(naddr, &counter, af); 2878 if (init_addr != NULL && PF_AZERO(init_addr, af)) 2879 PF_ACPY(init_addr, naddr, af); 2880 PF_AINC(&counter, af); 2881 rpool->counter = counter; 2882 break; 2883 } 2884 if (*sn != NULL) 2885 PF_ACPY(&(*sn)->raddr, naddr, af); 2886 2887 if (pf_status.debug >= PF_DEBUG_MISC && 2888 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2889 kprintf("pf_map_addr: selected address "); 2890 pf_print_host(naddr, 0, af); 2891 kprintf("\n"); 2892 } 2893 2894 return (0); 2895 } 2896 2897 int 2898 pf_get_sport(struct pf_pdesc *pd, sa_family_t af, 2899 u_int8_t proto, struct pf_rule *r, 2900 struct pf_addr *saddr, struct pf_addr *daddr, 2901 u_int16_t sport, u_int16_t dport, 2902 struct pf_addr *naddr, u_int16_t *nport, 2903 u_int16_t low, u_int16_t high, struct pf_src_node **sn) 2904 { 2905 struct pf_state_key_cmp key; 2906 struct pf_addr init_addr; 2907 u_int16_t cut; 2908 u_int32_t hash_base = 0; 2909 int do_hash = 0; 2910 2911 bzero(&init_addr, sizeof(init_addr)); 2912 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 2913 return (1); 2914 2915 if (proto == IPPROTO_ICMP) { 2916 low = 1; 2917 high = 65535; 2918 } 2919 2920 bzero(&key, sizeof(key)); 2921 key.af = af; 2922 key.proto = proto; 2923 key.port[0] = dport; 2924 PF_ACPY(&key.addr[0], daddr, key.af); 2925 2926 do { 2927 PF_ACPY(&key.addr[1], naddr, key.af); 2928 2929 /* 2930 * We want to select a port that calculates to a toeplitz hash 2931 * that masks to the same cpu, otherwise the response may 2932 * not see the new state. 2933 * 2934 * We can still do this even if the kernel is disregarding 2935 * the hash and vectoring the packets to a specific cpu, 2936 * but it will reduce the number of ports we can use. 2937 */ 2938 switch(af) { 2939 case AF_INET: 2940 if (proto == IPPROTO_TCP) { 2941 do_hash = 1; 2942 hash_base = toeplitz_piecemeal_port(dport) ^ 2943 toeplitz_piecemeal_addr(daddr->v4.s_addr) ^ 2944 toeplitz_piecemeal_addr(naddr->v4.s_addr); 2945 } 2946 break; 2947 case AF_INET6: 2948 /* XXX TODO XXX */ 2949 default: 2950 /* XXX TODO XXX */ 2951 break; 2952 } 2953 2954 /* 2955 * port search; start random, step; 2956 * similar 2 portloop in in_pcbbind 2957 * 2958 * WARNING! We try to match such that the kernel will 2959 * dispatch the translated host/port to the same 2960 * cpu, but this might not be possible. 2961 * 2962 * In the case where the port is fixed, or for the 2963 * UDP case (whos toeplitz does not incorporate the 2964 * port), we set not_cpu_localized which ultimately 2965 * causes the pf_state_tree element 2966 * 2967 * XXX fixed ports present a problem for cpu localization. 2968 */ 2969 if (!(proto == IPPROTO_TCP || 2970 proto == IPPROTO_UDP || 2971 proto == IPPROTO_ICMP)) { 2972 /* 2973 * non-specific protocol, leave port intact. 2974 */ 2975 key.port[1] = sport; 2976 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 2977 *nport = sport; 2978 pd->not_cpu_localized = 1; 2979 return (0); 2980 } 2981 } else if (low == 0 && high == 0) { 2982 /* 2983 * static-port same as originator. 2984 */ 2985 key.port[1] = sport; 2986 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 2987 *nport = sport; 2988 pd->not_cpu_localized = 1; 2989 return (0); 2990 } 2991 } else if (low == high) { 2992 /* 2993 * specific port as specified. 2994 */ 2995 key.port[1] = htons(low); 2996 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 2997 *nport = htons(low); 2998 pd->not_cpu_localized = 1; 2999 return (0); 3000 } 3001 } else { 3002 /* 3003 * normal dynamic port 3004 */ 3005 u_int16_t tmp; 3006 3007 if (low > high) { 3008 tmp = low; 3009 low = high; 3010 high = tmp; 3011 } 3012 /* low < high */ 3013 cut = htonl(karc4random()) % (1 + high - low) + low; 3014 /* low <= cut <= high */ 3015 for (tmp = cut; tmp <= high; ++(tmp)) { 3016 key.port[1] = htons(tmp); 3017 if (do_hash) { 3018 uint32_t hash; 3019 3020 hash = hash_base ^ 3021 toeplitz_piecemeal_port(key.port[1]); 3022 if (netisr_hashcpu(hash) != mycpuid) 3023 continue; 3024 } 3025 if (pf_find_state_all(&key, PF_IN, NULL) == 3026 NULL && !in_baddynamic(tmp, proto)) { 3027 if (proto == IPPROTO_UDP) 3028 pd->not_cpu_localized = 1; 3029 *nport = htons(tmp); 3030 return (0); 3031 } 3032 } 3033 for (tmp = cut - 1; tmp >= low; --(tmp)) { 3034 key.port[1] = htons(tmp); 3035 if (do_hash) { 3036 uint32_t hash; 3037 3038 hash = hash_base ^ 3039 toeplitz_piecemeal_port(key.port[1]); 3040 if (netisr_hashcpu(hash) != mycpuid) 3041 continue; 3042 } 3043 if (pf_find_state_all(&key, PF_IN, NULL) == 3044 NULL && !in_baddynamic(tmp, proto)) { 3045 if (proto == IPPROTO_UDP) 3046 pd->not_cpu_localized = 1; 3047 *nport = htons(tmp); 3048 return (0); 3049 } 3050 } 3051 } 3052 3053 /* 3054 * Next address 3055 */ 3056 switch (r->rpool.opts & PF_POOL_TYPEMASK) { 3057 case PF_POOL_RANDOM: 3058 case PF_POOL_ROUNDROBIN: 3059 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 3060 return (1); 3061 break; 3062 case PF_POOL_NONE: 3063 case PF_POOL_SRCHASH: 3064 case PF_POOL_BITMASK: 3065 default: 3066 return (1); 3067 } 3068 } while (! PF_AEQ(&init_addr, naddr, af) ); 3069 return (1); /* none available */ 3070 } 3071 3072 struct pf_rule * 3073 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, 3074 int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, 3075 struct pf_addr *daddr, u_int16_t dport, int rs_num) 3076 { 3077 struct pf_rule *r, *rm = NULL; 3078 struct pf_ruleset *ruleset = NULL; 3079 int tag = -1; 3080 int rtableid = -1; 3081 int asd = 0; 3082 3083 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); 3084 while (r && rm == NULL) { 3085 struct pf_rule_addr *src = NULL, *dst = NULL; 3086 struct pf_addr_wrap *xdst = NULL; 3087 struct pf_pooladdr *cur; 3088 3089 if (r->action == PF_BINAT && direction == PF_IN) { 3090 src = &r->dst; 3091 cur = r->rpool.cur; /* SMP race possible */ 3092 cpu_ccfence(); 3093 if (cur) 3094 xdst = &cur->addr; 3095 } else { 3096 src = &r->src; 3097 dst = &r->dst; 3098 } 3099 3100 r->evaluations++; 3101 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3102 r = r->skip[PF_SKIP_IFP].ptr; 3103 else if (r->direction && r->direction != direction) 3104 r = r->skip[PF_SKIP_DIR].ptr; 3105 else if (r->af && r->af != pd->af) 3106 r = r->skip[PF_SKIP_AF].ptr; 3107 else if (r->proto && r->proto != pd->proto) 3108 r = r->skip[PF_SKIP_PROTO].ptr; 3109 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, 3110 src->neg, kif)) 3111 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 3112 PF_SKIP_DST_ADDR].ptr; 3113 else if (src->port_op && !pf_match_port(src->port_op, 3114 src->port[0], src->port[1], sport)) 3115 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 3116 PF_SKIP_DST_PORT].ptr; 3117 else if (dst != NULL && 3118 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) 3119 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3120 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 3121 0, NULL)) 3122 r = TAILQ_NEXT(r, entries); 3123 else if (dst != NULL && dst->port_op && 3124 !pf_match_port(dst->port_op, dst->port[0], 3125 dst->port[1], dport)) 3126 r = r->skip[PF_SKIP_DST_PORT].ptr; 3127 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3128 r = TAILQ_NEXT(r, entries); 3129 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 3130 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, 3131 off, pd->hdr.tcp), r->os_fingerprint))) 3132 r = TAILQ_NEXT(r, entries); 3133 else { 3134 if (r->tag) 3135 tag = r->tag; 3136 if (r->rtableid >= 0) 3137 rtableid = r->rtableid; 3138 if (r->anchor == NULL) { 3139 rm = r; 3140 } else 3141 pf_step_into_anchor(&asd, &ruleset, rs_num, 3142 &r, NULL, NULL); 3143 } 3144 if (r == NULL) 3145 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, 3146 NULL, NULL); 3147 } 3148 if (pf_tag_packet(m, tag, rtableid)) 3149 return (NULL); 3150 if (rm != NULL && (rm->action == PF_NONAT || 3151 rm->action == PF_NORDR || rm->action == PF_NOBINAT)) 3152 return (NULL); 3153 return (rm); 3154 } 3155 3156 struct pf_rule * 3157 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, 3158 struct pfi_kif *kif, struct pf_src_node **sn, 3159 struct pf_state_key **skw, struct pf_state_key **sks, 3160 struct pf_state_key **skp, struct pf_state_key **nkp, 3161 struct pf_addr *saddr, struct pf_addr *daddr, 3162 u_int16_t sport, u_int16_t dport) 3163 { 3164 struct pf_rule *r = NULL; 3165 3166 if (direction == PF_OUT) { 3167 r = pf_match_translation(pd, m, off, direction, kif, saddr, 3168 sport, daddr, dport, PF_RULESET_BINAT); 3169 if (r == NULL) 3170 r = pf_match_translation(pd, m, off, direction, kif, 3171 saddr, sport, daddr, dport, PF_RULESET_NAT); 3172 } else { 3173 r = pf_match_translation(pd, m, off, direction, kif, saddr, 3174 sport, daddr, dport, PF_RULESET_RDR); 3175 if (r == NULL) 3176 r = pf_match_translation(pd, m, off, direction, kif, 3177 saddr, sport, daddr, dport, PF_RULESET_BINAT); 3178 } 3179 3180 if (r != NULL) { 3181 struct pf_addr *naddr; 3182 u_int16_t *nport; 3183 3184 if (pf_state_key_setup(pd, r, skw, sks, skp, nkp, 3185 saddr, daddr, sport, dport)) 3186 return r; 3187 3188 /* XXX We only modify one side for now. */ 3189 naddr = &(*nkp)->addr[1]; 3190 nport = &(*nkp)->port[1]; 3191 3192 /* 3193 * NOTE: Currently all translations will clear 3194 * BRIDGE_MBUF_TAGGED, telling the bridge to 3195 * ignore the original input encapsulation. 3196 */ 3197 switch (r->action) { 3198 case PF_NONAT: 3199 case PF_NOBINAT: 3200 case PF_NORDR: 3201 return (NULL); 3202 case PF_NAT: 3203 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 3204 if (pf_get_sport(pd, pd->af, pd->proto, r, 3205 saddr, daddr, sport, dport, 3206 naddr, nport, r->rpool.proxy_port[0], 3207 r->rpool.proxy_port[1], sn)) { 3208 DPFPRINTF(PF_DEBUG_MISC, 3209 ("pf: NAT proxy port allocation " 3210 "(%u-%u) failed\n", 3211 r->rpool.proxy_port[0], 3212 r->rpool.proxy_port[1])); 3213 return (NULL); 3214 } 3215 break; 3216 case PF_BINAT: 3217 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 3218 switch (direction) { 3219 case PF_OUT: 3220 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ 3221 switch (pd->af) { 3222 #ifdef INET 3223 case AF_INET: 3224 if (r->rpool.cur->addr.p.dyn-> 3225 pfid_acnt4 < 1) 3226 return (NULL); 3227 PF_POOLMASK(naddr, 3228 &r->rpool.cur->addr.p.dyn-> 3229 pfid_addr4, 3230 &r->rpool.cur->addr.p.dyn-> 3231 pfid_mask4, 3232 saddr, AF_INET); 3233 break; 3234 #endif /* INET */ 3235 #ifdef INET6 3236 case AF_INET6: 3237 if (r->rpool.cur->addr.p.dyn-> 3238 pfid_acnt6 < 1) 3239 return (NULL); 3240 PF_POOLMASK(naddr, 3241 &r->rpool.cur->addr.p.dyn-> 3242 pfid_addr6, 3243 &r->rpool.cur->addr.p.dyn-> 3244 pfid_mask6, 3245 saddr, AF_INET6); 3246 break; 3247 #endif /* INET6 */ 3248 } 3249 } else 3250 PF_POOLMASK(naddr, 3251 &r->rpool.cur->addr.v.a.addr, 3252 &r->rpool.cur->addr.v.a.mask, 3253 saddr, pd->af); 3254 break; 3255 case PF_IN: 3256 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 3257 switch (pd->af) { 3258 #ifdef INET 3259 case AF_INET: 3260 if (r->src.addr.p.dyn-> 3261 pfid_acnt4 < 1) 3262 return (NULL); 3263 PF_POOLMASK(naddr, 3264 &r->src.addr.p.dyn-> 3265 pfid_addr4, 3266 &r->src.addr.p.dyn-> 3267 pfid_mask4, 3268 daddr, AF_INET); 3269 break; 3270 #endif /* INET */ 3271 #ifdef INET6 3272 case AF_INET6: 3273 if (r->src.addr.p.dyn-> 3274 pfid_acnt6 < 1) 3275 return (NULL); 3276 PF_POOLMASK(naddr, 3277 &r->src.addr.p.dyn-> 3278 pfid_addr6, 3279 &r->src.addr.p.dyn-> 3280 pfid_mask6, 3281 daddr, AF_INET6); 3282 break; 3283 #endif /* INET6 */ 3284 } 3285 } else 3286 PF_POOLMASK(naddr, 3287 &r->src.addr.v.a.addr, 3288 &r->src.addr.v.a.mask, daddr, 3289 pd->af); 3290 break; 3291 } 3292 break; 3293 case PF_RDR: { 3294 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 3295 if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) 3296 return (NULL); 3297 if ((r->rpool.opts & PF_POOL_TYPEMASK) == 3298 PF_POOL_BITMASK) 3299 PF_POOLMASK(naddr, naddr, 3300 &r->rpool.cur->addr.v.a.mask, daddr, 3301 pd->af); 3302 3303 if (r->rpool.proxy_port[1]) { 3304 u_int32_t tmp_nport; 3305 3306 tmp_nport = ((ntohs(dport) - 3307 ntohs(r->dst.port[0])) % 3308 (r->rpool.proxy_port[1] - 3309 r->rpool.proxy_port[0] + 1)) + 3310 r->rpool.proxy_port[0]; 3311 3312 /* wrap around if necessary */ 3313 if (tmp_nport > 65535) 3314 tmp_nport -= 65535; 3315 *nport = htons((u_int16_t)tmp_nport); 3316 } else if (r->rpool.proxy_port[0]) { 3317 *nport = htons(r->rpool.proxy_port[0]); 3318 } 3319 pd->not_cpu_localized = 1; 3320 break; 3321 } 3322 default: 3323 return (NULL); 3324 } 3325 } 3326 3327 return (r); 3328 } 3329 3330 struct netmsg_hashlookup { 3331 struct netmsg_base base; 3332 struct inpcb **nm_pinp; 3333 struct inpcbinfo *nm_pcbinfo; 3334 struct pf_addr *nm_saddr; 3335 struct pf_addr *nm_daddr; 3336 uint16_t nm_sport; 3337 uint16_t nm_dport; 3338 sa_family_t nm_af; 3339 }; 3340 3341 #ifdef PF_SOCKET_LOOKUP_DOMSG 3342 static void 3343 in_pcblookup_hash_handler(netmsg_t msg) 3344 { 3345 struct netmsg_hashlookup *rmsg = (struct netmsg_hashlookup *)msg; 3346 3347 if (rmsg->nm_af == AF_INET) 3348 *rmsg->nm_pinp = in_pcblookup_hash(rmsg->nm_pcbinfo, 3349 rmsg->nm_saddr->v4, rmsg->nm_sport, rmsg->nm_daddr->v4, 3350 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL); 3351 #ifdef INET6 3352 else 3353 *rmsg->nm_pinp = in6_pcblookup_hash(rmsg->nm_pcbinfo, 3354 &rmsg->nm_saddr->v6, rmsg->nm_sport, &rmsg->nm_daddr->v6, 3355 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL); 3356 #endif /* INET6 */ 3357 lwkt_replymsg(&rmsg->base.lmsg, 0); 3358 } 3359 #endif /* PF_SOCKET_LOOKUP_DOMSG */ 3360 3361 int 3362 pf_socket_lookup(int direction, struct pf_pdesc *pd) 3363 { 3364 struct pf_addr *saddr, *daddr; 3365 u_int16_t sport, dport; 3366 struct inpcbinfo *pi; 3367 struct inpcb *inp; 3368 struct netmsg_hashlookup *msg = NULL; 3369 #ifdef PF_SOCKET_LOOKUP_DOMSG 3370 struct netmsg_hashlookup msg0; 3371 #endif 3372 int pi_cpu = 0; 3373 3374 if (pd == NULL) 3375 return (-1); 3376 pd->lookup.uid = UID_MAX; 3377 pd->lookup.gid = GID_MAX; 3378 pd->lookup.pid = NO_PID; 3379 if (direction == PF_IN) { 3380 saddr = pd->src; 3381 daddr = pd->dst; 3382 } else { 3383 saddr = pd->dst; 3384 daddr = pd->src; 3385 } 3386 switch (pd->proto) { 3387 case IPPROTO_TCP: 3388 if (pd->hdr.tcp == NULL) 3389 return (-1); 3390 sport = pd->hdr.tcp->th_sport; 3391 dport = pd->hdr.tcp->th_dport; 3392 3393 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport); 3394 pi = &tcbinfo[pi_cpu]; 3395 /* 3396 * Our netstack runs lockless on MP systems 3397 * (only for TCP connections at the moment). 3398 * 3399 * As we are not allowed to read another CPU's tcbinfo, 3400 * we have to ask that CPU via remote call to search the 3401 * table for us. 3402 * 3403 * Prepare a msg iff data belongs to another CPU. 3404 */ 3405 if (pi_cpu != mycpu->gd_cpuid) { 3406 #ifdef PF_SOCKET_LOOKUP_DOMSG 3407 /* 3408 * NOTE: 3409 * 3410 * Following lwkt_domsg() is dangerous and could 3411 * lockup the network system, e.g. 3412 * 3413 * On 2 CPU system: 3414 * netisr0 domsg to netisr1 (due to lookup) 3415 * netisr1 domsg to netisr0 (due to lookup) 3416 * 3417 * We simply return -1 here, since we are probably 3418 * called before NAT, so the TCP packet should 3419 * already be on the correct CPU. 3420 */ 3421 msg = &msg0; 3422 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 3423 0, in_pcblookup_hash_handler); 3424 msg->nm_pinp = &inp; 3425 msg->nm_pcbinfo = pi; 3426 msg->nm_saddr = saddr; 3427 msg->nm_sport = sport; 3428 msg->nm_daddr = daddr; 3429 msg->nm_dport = dport; 3430 msg->nm_af = pd->af; 3431 #else /* !PF_SOCKET_LOOKUP_DOMSG */ 3432 kprintf("pf_socket_lookup: tcp packet not on the " 3433 "correct cpu %d, cur cpu %d\n", 3434 pi_cpu, mycpuid); 3435 print_backtrace(-1); 3436 return -1; 3437 #endif /* PF_SOCKET_LOOKUP_DOMSG */ 3438 } 3439 break; 3440 case IPPROTO_UDP: 3441 if (pd->hdr.udp == NULL) 3442 return (-1); 3443 sport = pd->hdr.udp->uh_sport; 3444 dport = pd->hdr.udp->uh_dport; 3445 pi = &udbinfo[mycpuid]; 3446 break; 3447 default: 3448 return (-1); 3449 } 3450 if (direction != PF_IN) { 3451 u_int16_t p; 3452 3453 p = sport; 3454 sport = dport; 3455 dport = p; 3456 } 3457 switch (pd->af) { 3458 #ifdef INET6 3459 case AF_INET6: 3460 /* 3461 * Query other CPU, second part 3462 * 3463 * msg only gets initialized when: 3464 * 1) packet is TCP 3465 * 2) the info belongs to another CPU 3466 * 3467 * Use some switch/case magic to avoid code duplication. 3468 */ 3469 if (msg == NULL) { 3470 inp = in6_pcblookup_hash(pi, &saddr->v6, sport, 3471 &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL); 3472 3473 if (inp == NULL) 3474 return (-1); 3475 break; 3476 } 3477 /* FALLTHROUGH if SMP and on other CPU */ 3478 #endif /* INET6 */ 3479 case AF_INET: 3480 if (msg != NULL) { 3481 lwkt_domsg(netisr_cpuport(pi_cpu), 3482 &msg->base.lmsg, 0); 3483 } else 3484 { 3485 inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, 3486 dport, INPLOOKUP_WILDCARD, NULL); 3487 } 3488 if (inp == NULL) 3489 return (-1); 3490 break; 3491 3492 default: 3493 return (-1); 3494 } 3495 pd->lookup.uid = inp->inp_socket->so_cred->cr_uid; 3496 pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0]; 3497 return (1); 3498 } 3499 3500 u_int8_t 3501 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 3502 { 3503 int hlen; 3504 u_int8_t hdr[60]; 3505 u_int8_t *opt, optlen; 3506 u_int8_t wscale = 0; 3507 3508 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 3509 if (hlen <= sizeof(struct tcphdr)) 3510 return (0); 3511 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 3512 return (0); 3513 opt = hdr + sizeof(struct tcphdr); 3514 hlen -= sizeof(struct tcphdr); 3515 while (hlen >= 3) { 3516 switch (*opt) { 3517 case TCPOPT_EOL: 3518 case TCPOPT_NOP: 3519 ++opt; 3520 --hlen; 3521 break; 3522 case TCPOPT_WINDOW: 3523 wscale = opt[2]; 3524 if (wscale > TCP_MAX_WINSHIFT) 3525 wscale = TCP_MAX_WINSHIFT; 3526 wscale |= PF_WSCALE_FLAG; 3527 /* FALLTHROUGH */ 3528 default: 3529 optlen = opt[1]; 3530 if (optlen < 2) 3531 optlen = 2; 3532 hlen -= optlen; 3533 opt += optlen; 3534 break; 3535 } 3536 } 3537 return (wscale); 3538 } 3539 3540 u_int16_t 3541 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 3542 { 3543 int hlen; 3544 u_int8_t hdr[60]; 3545 u_int8_t *opt, optlen; 3546 u_int16_t mss = tcp_mssdflt; 3547 3548 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 3549 if (hlen <= sizeof(struct tcphdr)) 3550 return (0); 3551 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 3552 return (0); 3553 opt = hdr + sizeof(struct tcphdr); 3554 hlen -= sizeof(struct tcphdr); 3555 while (hlen >= TCPOLEN_MAXSEG) { 3556 switch (*opt) { 3557 case TCPOPT_EOL: 3558 case TCPOPT_NOP: 3559 ++opt; 3560 --hlen; 3561 break; 3562 case TCPOPT_MAXSEG: 3563 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); 3564 /* FALLTHROUGH */ 3565 default: 3566 optlen = opt[1]; 3567 if (optlen < 2) 3568 optlen = 2; 3569 hlen -= optlen; 3570 opt += optlen; 3571 break; 3572 } 3573 } 3574 return (mss); 3575 } 3576 3577 u_int16_t 3578 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) 3579 { 3580 #ifdef INET 3581 struct sockaddr_in *dst; 3582 struct route ro; 3583 #endif /* INET */ 3584 #ifdef INET6 3585 struct sockaddr_in6 *dst6; 3586 struct route_in6 ro6; 3587 #endif /* INET6 */ 3588 struct rtentry *rt = NULL; 3589 int hlen = 0; 3590 u_int16_t mss = tcp_mssdflt; 3591 3592 switch (af) { 3593 #ifdef INET 3594 case AF_INET: 3595 hlen = sizeof(struct ip); 3596 bzero(&ro, sizeof(ro)); 3597 dst = (struct sockaddr_in *)&ro.ro_dst; 3598 dst->sin_family = AF_INET; 3599 dst->sin_len = sizeof(*dst); 3600 dst->sin_addr = addr->v4; 3601 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); 3602 rt = ro.ro_rt; 3603 break; 3604 #endif /* INET */ 3605 #ifdef INET6 3606 case AF_INET6: 3607 hlen = sizeof(struct ip6_hdr); 3608 bzero(&ro6, sizeof(ro6)); 3609 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; 3610 dst6->sin6_family = AF_INET6; 3611 dst6->sin6_len = sizeof(*dst6); 3612 dst6->sin6_addr = addr->v6; 3613 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING)); 3614 rt = ro6.ro_rt; 3615 break; 3616 #endif /* INET6 */ 3617 } 3618 3619 if (rt && rt->rt_ifp) { 3620 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); 3621 mss = max(tcp_mssdflt, mss); 3622 RTFREE(rt); 3623 } 3624 mss = min(mss, offer); 3625 mss = max(mss, 64); /* sanity - at least max opt space */ 3626 return (mss); 3627 } 3628 3629 void 3630 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) 3631 { 3632 struct pf_rule *r = s->rule.ptr; 3633 3634 s->rt_kif = NULL; 3635 if (!r->rt || r->rt == PF_FASTROUTE) 3636 return; 3637 switch (s->key[PF_SK_WIRE]->af) { 3638 #ifdef INET 3639 case AF_INET: 3640 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, 3641 &s->nat_src_node); 3642 s->rt_kif = r->rpool.cur->kif; 3643 break; 3644 #endif /* INET */ 3645 #ifdef INET6 3646 case AF_INET6: 3647 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, 3648 &s->nat_src_node); 3649 s->rt_kif = r->rpool.cur->kif; 3650 break; 3651 #endif /* INET6 */ 3652 } 3653 } 3654 3655 u_int32_t 3656 pf_tcp_iss(struct pf_pdesc *pd) 3657 { 3658 MD5_CTX ctx; 3659 u_int32_t digest[4]; 3660 3661 if (pf_tcp_secret_init == 0) { 3662 lwkt_gettoken(&pf_gtoken); 3663 if (pf_tcp_secret_init == 0) { 3664 karc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3665 MD5Init(&pf_tcp_secret_ctx); 3666 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3667 sizeof(pf_tcp_secret)); 3668 pf_tcp_secret_init = 1; 3669 } 3670 lwkt_reltoken(&pf_gtoken); 3671 } 3672 ctx = pf_tcp_secret_ctx; 3673 3674 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); 3675 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); 3676 if (pd->af == AF_INET6) { 3677 MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); 3678 MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); 3679 } else { 3680 MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); 3681 MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); 3682 } 3683 MD5Final((u_char *)digest, &ctx); 3684 pf_tcp_iss_off += 4096; 3685 3686 return (digest[0] + pd->hdr.tcp->th_seq + pf_tcp_iss_off); 3687 } 3688 3689 int 3690 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, 3691 struct pfi_kif *kif, struct mbuf *m, int off, void *h, 3692 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, 3693 struct ifqueue *ifq, struct inpcb *inp) 3694 { 3695 struct pf_rule *nr = NULL; 3696 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 3697 sa_family_t af = pd->af; 3698 struct pf_rule *r, *a = NULL; 3699 struct pf_ruleset *ruleset = NULL; 3700 struct pf_src_node *nsn = NULL; 3701 struct tcphdr *th = pd->hdr.tcp; 3702 struct pf_state_key *skw = NULL, *sks = NULL; 3703 struct pf_state_key *sk = NULL, *nk = NULL; 3704 u_short reason; 3705 int rewrite = 0, hdrlen = 0; 3706 int tag = -1, rtableid = -1; 3707 int asd = 0; 3708 int match = 0; 3709 int state_icmp = 0; 3710 u_int16_t sport = 0, dport = 0; 3711 u_int16_t bproto_sum = 0, bip_sum = 0; 3712 u_int8_t icmptype = 0, icmpcode = 0; 3713 3714 3715 if (direction == PF_IN && pf_check_congestion(ifq)) { 3716 REASON_SET(&reason, PFRES_CONGEST); 3717 return (PF_DROP); 3718 } 3719 3720 if (inp != NULL) 3721 pd->lookup.done = pf_socket_lookup(direction, pd); 3722 else if (debug_pfugidhack) { 3723 DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); 3724 pd->lookup.done = pf_socket_lookup(direction, pd); 3725 } 3726 3727 switch (pd->proto) { 3728 case IPPROTO_TCP: 3729 sport = th->th_sport; 3730 dport = th->th_dport; 3731 hdrlen = sizeof(*th); 3732 break; 3733 case IPPROTO_UDP: 3734 sport = pd->hdr.udp->uh_sport; 3735 dport = pd->hdr.udp->uh_dport; 3736 hdrlen = sizeof(*pd->hdr.udp); 3737 break; 3738 #ifdef INET 3739 case IPPROTO_ICMP: 3740 if (pd->af != AF_INET) 3741 break; 3742 sport = dport = pd->hdr.icmp->icmp_id; 3743 hdrlen = sizeof(*pd->hdr.icmp); 3744 icmptype = pd->hdr.icmp->icmp_type; 3745 icmpcode = pd->hdr.icmp->icmp_code; 3746 3747 if (icmptype == ICMP_UNREACH || 3748 icmptype == ICMP_SOURCEQUENCH || 3749 icmptype == ICMP_REDIRECT || 3750 icmptype == ICMP_TIMXCEED || 3751 icmptype == ICMP_PARAMPROB) 3752 state_icmp++; 3753 break; 3754 #endif /* INET */ 3755 #ifdef INET6 3756 case IPPROTO_ICMPV6: 3757 if (af != AF_INET6) 3758 break; 3759 sport = dport = pd->hdr.icmp6->icmp6_id; 3760 hdrlen = sizeof(*pd->hdr.icmp6); 3761 icmptype = pd->hdr.icmp6->icmp6_type; 3762 icmpcode = pd->hdr.icmp6->icmp6_code; 3763 3764 if (icmptype == ICMP6_DST_UNREACH || 3765 icmptype == ICMP6_PACKET_TOO_BIG || 3766 icmptype == ICMP6_TIME_EXCEEDED || 3767 icmptype == ICMP6_PARAM_PROB) 3768 state_icmp++; 3769 break; 3770 #endif /* INET6 */ 3771 default: 3772 sport = dport = hdrlen = 0; 3773 break; 3774 } 3775 3776 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 3777 3778 /* check packet for BINAT/NAT/RDR */ 3779 if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, 3780 &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) { 3781 if (nk == NULL || sk == NULL) { 3782 REASON_SET(&reason, PFRES_MEMORY); 3783 goto cleanup; 3784 } 3785 3786 if (pd->ip_sum) 3787 bip_sum = *pd->ip_sum; 3788 3789 m->m_flags &= ~M_HASH; 3790 switch (pd->proto) { 3791 case IPPROTO_TCP: 3792 bproto_sum = th->th_sum; 3793 pd->proto_sum = &th->th_sum; 3794 3795 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || 3796 nk->port[pd->sidx] != sport) { 3797 pf_change_ap(saddr, &th->th_sport, pd->ip_sum, 3798 &th->th_sum, &nk->addr[pd->sidx], 3799 nk->port[pd->sidx], 0, af); 3800 pd->sport = &th->th_sport; 3801 sport = th->th_sport; 3802 } 3803 3804 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || 3805 nk->port[pd->didx] != dport) { 3806 pf_change_ap(daddr, &th->th_dport, pd->ip_sum, 3807 &th->th_sum, &nk->addr[pd->didx], 3808 nk->port[pd->didx], 0, af); 3809 dport = th->th_dport; 3810 pd->dport = &th->th_dport; 3811 } 3812 rewrite++; 3813 break; 3814 case IPPROTO_UDP: 3815 bproto_sum = pd->hdr.udp->uh_sum; 3816 pd->proto_sum = &pd->hdr.udp->uh_sum; 3817 3818 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || 3819 nk->port[pd->sidx] != sport) { 3820 pf_change_ap(saddr, &pd->hdr.udp->uh_sport, 3821 pd->ip_sum, &pd->hdr.udp->uh_sum, 3822 &nk->addr[pd->sidx], 3823 nk->port[pd->sidx], 1, af); 3824 sport = pd->hdr.udp->uh_sport; 3825 pd->sport = &pd->hdr.udp->uh_sport; 3826 } 3827 3828 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || 3829 nk->port[pd->didx] != dport) { 3830 pf_change_ap(daddr, &pd->hdr.udp->uh_dport, 3831 pd->ip_sum, &pd->hdr.udp->uh_sum, 3832 &nk->addr[pd->didx], 3833 nk->port[pd->didx], 1, af); 3834 dport = pd->hdr.udp->uh_dport; 3835 pd->dport = &pd->hdr.udp->uh_dport; 3836 } 3837 rewrite++; 3838 break; 3839 #ifdef INET 3840 case IPPROTO_ICMP: 3841 nk->port[0] = nk->port[1]; 3842 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) 3843 pf_change_a(&saddr->v4.s_addr, pd->ip_sum, 3844 nk->addr[pd->sidx].v4.s_addr, 0); 3845 3846 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) 3847 pf_change_a(&daddr->v4.s_addr, pd->ip_sum, 3848 nk->addr[pd->didx].v4.s_addr, 0); 3849 3850 if (nk->port[1] != pd->hdr.icmp->icmp_id) { 3851 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( 3852 pd->hdr.icmp->icmp_cksum, sport, 3853 nk->port[1], 0); 3854 pd->hdr.icmp->icmp_id = nk->port[1]; 3855 pd->sport = &pd->hdr.icmp->icmp_id; 3856 } 3857 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); 3858 break; 3859 #endif /* INET */ 3860 #ifdef INET6 3861 case IPPROTO_ICMPV6: 3862 nk->port[0] = nk->port[1]; 3863 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) 3864 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, 3865 &nk->addr[pd->sidx], 0); 3866 3867 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) 3868 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, 3869 &nk->addr[pd->didx], 0); 3870 rewrite++; 3871 break; 3872 #endif /* INET */ 3873 default: 3874 switch (af) { 3875 #ifdef INET 3876 case AF_INET: 3877 if (PF_ANEQ(saddr, 3878 &nk->addr[pd->sidx], AF_INET)) 3879 pf_change_a(&saddr->v4.s_addr, 3880 pd->ip_sum, 3881 nk->addr[pd->sidx].v4.s_addr, 0); 3882 3883 if (PF_ANEQ(daddr, 3884 &nk->addr[pd->didx], AF_INET)) 3885 pf_change_a(&daddr->v4.s_addr, 3886 pd->ip_sum, 3887 nk->addr[pd->didx].v4.s_addr, 0); 3888 break; 3889 #endif /* INET */ 3890 #ifdef INET6 3891 case AF_INET6: 3892 if (PF_ANEQ(saddr, 3893 &nk->addr[pd->sidx], AF_INET6)) 3894 PF_ACPY(saddr, &nk->addr[pd->sidx], af); 3895 3896 if (PF_ANEQ(daddr, 3897 &nk->addr[pd->didx], AF_INET6)) 3898 PF_ACPY(saddr, &nk->addr[pd->didx], af); 3899 break; 3900 #endif /* INET */ 3901 } 3902 break; 3903 } 3904 if (nr->natpass) 3905 r = NULL; 3906 pd->nat_rule = nr; 3907 } 3908 3909 while (r != NULL) { 3910 r->evaluations++; 3911 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3912 r = r->skip[PF_SKIP_IFP].ptr; 3913 else if (r->direction && r->direction != direction) 3914 r = r->skip[PF_SKIP_DIR].ptr; 3915 else if (r->af && r->af != af) 3916 r = r->skip[PF_SKIP_AF].ptr; 3917 else if (r->proto && r->proto != pd->proto) 3918 r = r->skip[PF_SKIP_PROTO].ptr; 3919 else if (PF_MISMATCHAW(&r->src.addr, saddr, af, 3920 r->src.neg, kif)) 3921 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 3922 /* tcp/udp only. port_op always 0 in other cases */ 3923 else if (r->src.port_op && !pf_match_port(r->src.port_op, 3924 r->src.port[0], r->src.port[1], sport)) 3925 r = r->skip[PF_SKIP_SRC_PORT].ptr; 3926 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, 3927 r->dst.neg, NULL)) 3928 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3929 /* tcp/udp only. port_op always 0 in other cases */ 3930 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 3931 r->dst.port[0], r->dst.port[1], dport)) 3932 r = r->skip[PF_SKIP_DST_PORT].ptr; 3933 /* icmp only. type always 0 in other cases */ 3934 else if (r->type && r->type != icmptype + 1) 3935 r = TAILQ_NEXT(r, entries); 3936 /* icmp only. type always 0 in other cases */ 3937 else if (r->code && r->code != icmpcode + 1) 3938 r = TAILQ_NEXT(r, entries); 3939 else if (r->tos && !(r->tos == pd->tos)) 3940 r = TAILQ_NEXT(r, entries); 3941 else if (r->rule_flag & PFRULE_FRAGMENT) 3942 r = TAILQ_NEXT(r, entries); 3943 else if (pd->proto == IPPROTO_TCP && 3944 (r->flagset & th->th_flags) != r->flags) 3945 r = TAILQ_NEXT(r, entries); 3946 /* tcp/udp only. uid.op always 0 in other cases */ 3947 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = 3948 pf_socket_lookup(direction, pd), 1)) && 3949 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], 3950 pd->lookup.uid)) 3951 r = TAILQ_NEXT(r, entries); 3952 /* tcp/udp only. gid.op always 0 in other cases */ 3953 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = 3954 pf_socket_lookup(direction, pd), 1)) && 3955 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], 3956 pd->lookup.gid)) 3957 r = TAILQ_NEXT(r, entries); 3958 else if (r->prob && 3959 r->prob <= karc4random()) 3960 r = TAILQ_NEXT(r, entries); 3961 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3962 r = TAILQ_NEXT(r, entries); 3963 else if (r->os_fingerprint != PF_OSFP_ANY && 3964 (pd->proto != IPPROTO_TCP || !pf_osfp_match( 3965 pf_osfp_fingerprint(pd, m, off, th), 3966 r->os_fingerprint))) 3967 r = TAILQ_NEXT(r, entries); 3968 else { 3969 if (r->tag) 3970 tag = r->tag; 3971 if (r->rtableid >= 0) 3972 rtableid = r->rtableid; 3973 if (r->anchor == NULL) { 3974 match = 1; 3975 *rm = r; 3976 *am = a; 3977 *rsm = ruleset; 3978 if ((*rm)->quick) 3979 break; 3980 r = TAILQ_NEXT(r, entries); 3981 } else 3982 pf_step_into_anchor(&asd, &ruleset, 3983 PF_RULESET_FILTER, &r, &a, &match); 3984 } 3985 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3986 PF_RULESET_FILTER, &r, &a, &match)) 3987 break; 3988 } 3989 r = *rm; 3990 a = *am; 3991 ruleset = *rsm; 3992 3993 REASON_SET(&reason, PFRES_MATCH); 3994 3995 if (r->log || (nr != NULL && nr->log)) { 3996 if (rewrite) 3997 m_copyback(m, off, hdrlen, pd->hdr.any); 3998 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, 3999 a, ruleset, pd); 4000 } 4001 4002 if ((r->action == PF_DROP) && 4003 ((r->rule_flag & PFRULE_RETURNRST) || 4004 (r->rule_flag & PFRULE_RETURNICMP) || 4005 (r->rule_flag & PFRULE_RETURN))) { 4006 /* undo NAT changes, if they have taken place */ 4007 if (nr != NULL) { 4008 PF_ACPY(saddr, &sk->addr[pd->sidx], af); 4009 PF_ACPY(daddr, &sk->addr[pd->didx], af); 4010 if (pd->sport) 4011 *pd->sport = sk->port[pd->sidx]; 4012 if (pd->dport) 4013 *pd->dport = sk->port[pd->didx]; 4014 if (pd->proto_sum) 4015 *pd->proto_sum = bproto_sum; 4016 if (pd->ip_sum) 4017 *pd->ip_sum = bip_sum; 4018 m_copyback(m, off, hdrlen, pd->hdr.any); 4019 } 4020 if (pd->proto == IPPROTO_TCP && 4021 ((r->rule_flag & PFRULE_RETURNRST) || 4022 (r->rule_flag & PFRULE_RETURN)) && 4023 !(th->th_flags & TH_RST)) { 4024 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 4025 int len = 0; 4026 struct ip *h4; 4027 #ifdef INET6 4028 struct ip6_hdr *h6; 4029 #endif 4030 switch (af) { 4031 case AF_INET: 4032 h4 = mtod(m, struct ip *); 4033 len = ntohs(h4->ip_len) - off; 4034 break; 4035 #ifdef INET6 4036 case AF_INET6: 4037 h6 = mtod(m, struct ip6_hdr *); 4038 len = h6->ip6_plen - (off - sizeof(*h6)); 4039 break; 4040 #endif 4041 } 4042 4043 if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) 4044 REASON_SET(&reason, PFRES_PROTCKSUM); 4045 else { 4046 if (th->th_flags & TH_SYN) 4047 ack++; 4048 if (th->th_flags & TH_FIN) 4049 ack++; 4050 pf_send_tcp(r, af, pd->dst, 4051 pd->src, th->th_dport, th->th_sport, 4052 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 4053 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); 4054 } 4055 } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && 4056 r->return_icmp) 4057 pf_send_icmp(m, r->return_icmp >> 8, 4058 r->return_icmp & 255, af, r); 4059 else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && 4060 r->return_icmp6) 4061 pf_send_icmp(m, r->return_icmp6 >> 8, 4062 r->return_icmp6 & 255, af, r); 4063 } 4064 4065 if (r->action == PF_DROP) 4066 goto cleanup; 4067 4068 if (pf_tag_packet(m, tag, rtableid)) { 4069 REASON_SET(&reason, PFRES_MEMORY); 4070 goto cleanup; 4071 } 4072 4073 if (!state_icmp && (r->keep_state || nr != NULL || 4074 (pd->flags & PFDESC_TCP_NORM))) { 4075 int action; 4076 action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m, 4077 off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, 4078 bip_sum, hdrlen); 4079 if (action != PF_PASS) 4080 return (action); 4081 } 4082 4083 /* copy back packet headers if we performed NAT operations */ 4084 if (rewrite) 4085 m_copyback(m, off, hdrlen, pd->hdr.any); 4086 4087 return (PF_PASS); 4088 4089 cleanup: 4090 if (sk != NULL) 4091 kfree(sk, M_PFSTATEKEYPL); 4092 if (nk != NULL) 4093 kfree(nk, M_PFSTATEKEYPL); 4094 return (PF_DROP); 4095 } 4096 4097 static __inline int 4098 pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, 4099 struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw, 4100 struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk, 4101 struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, 4102 struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, 4103 u_int16_t bip_sum, int hdrlen) 4104 { 4105 struct pf_state *s = NULL; 4106 struct pf_src_node *sn = NULL; 4107 struct tcphdr *th = pd->hdr.tcp; 4108 u_int16_t mss = tcp_mssdflt; 4109 u_short reason; 4110 int cpu = mycpu->gd_cpuid; 4111 4112 /* check maximums */ 4113 if (r->max_states && (r->states_cur >= r->max_states)) { 4114 PF_INC_LCOUNTER(LCNT_STATES); 4115 REASON_SET(&reason, PFRES_MAXSTATES); 4116 return (PF_DROP); 4117 } 4118 /* src node for filter rule */ 4119 if ((r->rule_flag & PFRULE_SRCTRACK || 4120 r->rpool.opts & PF_POOL_STICKYADDR) && 4121 pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { 4122 REASON_SET(&reason, PFRES_SRCLIMIT); 4123 goto csfailed; 4124 } 4125 /* src node for translation rule */ 4126 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && 4127 pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { 4128 REASON_SET(&reason, PFRES_SRCLIMIT); 4129 goto csfailed; 4130 } 4131 s = kmalloc(sizeof(struct pf_state), M_PFSTATEPL, M_NOWAIT|M_ZERO); 4132 if (s == NULL) { 4133 REASON_SET(&reason, PFRES_MEMORY); 4134 goto csfailed; 4135 } 4136 lockinit(&s->lk, "pfstlk", 0, 0); 4137 s->id = 0; /* XXX Do we really need that? not in OpenBSD */ 4138 s->creatorid = 0; 4139 s->rule.ptr = r; 4140 s->nat_rule.ptr = nr; 4141 s->anchor.ptr = a; 4142 s->state_flags = PFSTATE_CREATEINPROG; 4143 STATE_INC_COUNTERS(s); 4144 if (r->allow_opts) 4145 s->state_flags |= PFSTATE_ALLOWOPTS; 4146 if (r->rule_flag & PFRULE_STATESLOPPY) 4147 s->state_flags |= PFSTATE_SLOPPY; 4148 if (pd->not_cpu_localized) 4149 s->state_flags |= PFSTATE_STACK_GLOBAL; 4150 4151 s->log = r->log & PF_LOG_ALL; 4152 if (nr != NULL) 4153 s->log |= nr->log & PF_LOG_ALL; 4154 switch (pd->proto) { 4155 case IPPROTO_TCP: 4156 s->src.seqlo = ntohl(th->th_seq); 4157 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4158 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4159 r->keep_state == PF_STATE_MODULATE) { 4160 /* Generate sequence number modulator */ 4161 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4162 0) 4163 s->src.seqdiff = 1; 4164 pf_change_a(&th->th_seq, &th->th_sum, 4165 htonl(s->src.seqlo + s->src.seqdiff), 0); 4166 *rewrite = 1; 4167 } else 4168 s->src.seqdiff = 0; 4169 if (th->th_flags & TH_SYN) { 4170 s->src.seqhi++; 4171 s->src.wscale = pf_get_wscale(m, off, 4172 th->th_off, pd->af); 4173 } 4174 s->src.max_win = MAX(ntohs(th->th_win), 1); 4175 if (s->src.wscale & PF_WSCALE_MASK) { 4176 /* Remove scale factor from initial window */ 4177 int win = s->src.max_win; 4178 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4179 s->src.max_win = (win - 1) >> 4180 (s->src.wscale & PF_WSCALE_MASK); 4181 } 4182 if (th->th_flags & TH_FIN) 4183 s->src.seqhi++; 4184 s->dst.seqhi = 1; 4185 s->dst.max_win = 1; 4186 s->src.state = TCPS_SYN_SENT; 4187 s->dst.state = TCPS_CLOSED; 4188 s->timeout = PFTM_TCP_FIRST_PACKET; 4189 break; 4190 case IPPROTO_UDP: 4191 s->src.state = PFUDPS_SINGLE; 4192 s->dst.state = PFUDPS_NO_TRAFFIC; 4193 s->timeout = PFTM_UDP_FIRST_PACKET; 4194 break; 4195 case IPPROTO_ICMP: 4196 #ifdef INET6 4197 case IPPROTO_ICMPV6: 4198 #endif 4199 s->timeout = PFTM_ICMP_FIRST_PACKET; 4200 break; 4201 default: 4202 s->src.state = PFOTHERS_SINGLE; 4203 s->dst.state = PFOTHERS_NO_TRAFFIC; 4204 s->timeout = PFTM_OTHER_FIRST_PACKET; 4205 } 4206 4207 s->creation = time_second; 4208 s->expire = time_second; 4209 4210 if (sn != NULL) { 4211 s->src_node = sn; 4212 s->src_node->states++; 4213 } 4214 if (nsn != NULL) { 4215 /* XXX We only modify one side for now. */ 4216 PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); 4217 s->nat_src_node = nsn; 4218 s->nat_src_node->states++; 4219 } 4220 if (pd->proto == IPPROTO_TCP) { 4221 if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, 4222 off, pd, th, &s->src, &s->dst)) { 4223 REASON_SET(&reason, PFRES_MEMORY); 4224 pf_src_tree_remove_state(s); 4225 STATE_DEC_COUNTERS(s); 4226 kfree(s, M_PFSTATEPL); 4227 return (PF_DROP); 4228 } 4229 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && 4230 pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, 4231 &s->src, &s->dst, rewrite)) { 4232 /* This really shouldn't happen!!! */ 4233 DPFPRINTF(PF_DEBUG_URGENT, 4234 ("pf_normalize_tcp_stateful failed on first pkt")); 4235 pf_normalize_tcp_cleanup(s); 4236 pf_src_tree_remove_state(s); 4237 STATE_DEC_COUNTERS(s); 4238 kfree(s, M_PFSTATEPL); 4239 return (PF_DROP); 4240 } 4241 } 4242 s->direction = pd->dir; 4243 4244 if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk, 4245 pd->src, pd->dst, sport, dport)) { 4246 REASON_SET(&reason, PFRES_MEMORY); 4247 goto csfailed; 4248 } 4249 4250 if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) { 4251 if (pd->proto == IPPROTO_TCP) 4252 pf_normalize_tcp_cleanup(s); 4253 REASON_SET(&reason, PFRES_STATEINS); 4254 pf_src_tree_remove_state(s); 4255 STATE_DEC_COUNTERS(s); 4256 kfree(s, M_PFSTATEPL); 4257 return (PF_DROP); 4258 } else 4259 *sm = s; 4260 4261 pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ 4262 if (tag > 0) { 4263 pf_tag_ref(tag); 4264 s->tag = tag; 4265 } 4266 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4267 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 4268 s->src.state = PF_TCPS_PROXY_SRC; 4269 /* undo NAT changes, if they have taken place */ 4270 if (nr != NULL) { 4271 struct pf_state_key *skt = s->key[PF_SK_WIRE]; 4272 if (pd->dir == PF_OUT) 4273 skt = s->key[PF_SK_STACK]; 4274 PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af); 4275 PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af); 4276 if (pd->sport) 4277 *pd->sport = skt->port[pd->sidx]; 4278 if (pd->dport) 4279 *pd->dport = skt->port[pd->didx]; 4280 if (pd->proto_sum) 4281 *pd->proto_sum = bproto_sum; 4282 if (pd->ip_sum) 4283 *pd->ip_sum = bip_sum; 4284 m->m_flags &= ~M_HASH; 4285 m_copyback(m, off, hdrlen, pd->hdr.any); 4286 } 4287 s->src.seqhi = htonl(karc4random()); 4288 /* Find mss option */ 4289 mss = pf_get_mss(m, off, th->th_off, pd->af); 4290 mss = pf_calc_mss(pd->src, pd->af, mss); 4291 mss = pf_calc_mss(pd->dst, pd->af, mss); 4292 s->src.mss = mss; 4293 s->state_flags &= ~PFSTATE_CREATEINPROG; 4294 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4295 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4296 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); 4297 REASON_SET(&reason, PFRES_SYNPROXY); 4298 return (PF_SYNPROXY_DROP); 4299 } 4300 4301 s->state_flags &= ~PFSTATE_CREATEINPROG; 4302 return (PF_PASS); 4303 4304 csfailed: 4305 if (sk != NULL) 4306 kfree(sk, M_PFSTATEKEYPL); 4307 if (nk != NULL) 4308 kfree(nk, M_PFSTATEKEYPL); 4309 4310 if (sn != NULL && sn->states == 0 && sn->expire == 0) { 4311 RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], sn); 4312 PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS); 4313 atomic_add_int(&pf_status.src_nodes, -1); 4314 kfree(sn, M_PFSRCTREEPL); 4315 } 4316 if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { 4317 RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], nsn); 4318 PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS); 4319 atomic_add_int(&pf_status.src_nodes, -1); 4320 kfree(nsn, M_PFSRCTREEPL); 4321 } 4322 if (s) { 4323 pf_src_tree_remove_state(s); 4324 STATE_DEC_COUNTERS(s); 4325 kfree(s, M_PFSTATEPL); 4326 } 4327 4328 return (PF_DROP); 4329 } 4330 4331 int 4332 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, 4333 struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, 4334 struct pf_ruleset **rsm) 4335 { 4336 struct pf_rule *r, *a = NULL; 4337 struct pf_ruleset *ruleset = NULL; 4338 sa_family_t af = pd->af; 4339 u_short reason; 4340 int tag = -1; 4341 int asd = 0; 4342 int match = 0; 4343 4344 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 4345 while (r != NULL) { 4346 r->evaluations++; 4347 if (pfi_kif_match(r->kif, kif) == r->ifnot) 4348 r = r->skip[PF_SKIP_IFP].ptr; 4349 else if (r->direction && r->direction != direction) 4350 r = r->skip[PF_SKIP_DIR].ptr; 4351 else if (r->af && r->af != af) 4352 r = r->skip[PF_SKIP_AF].ptr; 4353 else if (r->proto && r->proto != pd->proto) 4354 r = r->skip[PF_SKIP_PROTO].ptr; 4355 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 4356 r->src.neg, kif)) 4357 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 4358 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 4359 r->dst.neg, NULL)) 4360 r = r->skip[PF_SKIP_DST_ADDR].ptr; 4361 else if (r->tos && !(r->tos == pd->tos)) 4362 r = TAILQ_NEXT(r, entries); 4363 else if (r->os_fingerprint != PF_OSFP_ANY) 4364 r = TAILQ_NEXT(r, entries); 4365 else if (pd->proto == IPPROTO_UDP && 4366 (r->src.port_op || r->dst.port_op)) 4367 r = TAILQ_NEXT(r, entries); 4368 else if (pd->proto == IPPROTO_TCP && 4369 (r->src.port_op || r->dst.port_op || r->flagset)) 4370 r = TAILQ_NEXT(r, entries); 4371 else if ((pd->proto == IPPROTO_ICMP || 4372 pd->proto == IPPROTO_ICMPV6) && 4373 (r->type || r->code)) 4374 r = TAILQ_NEXT(r, entries); 4375 else if (r->prob && r->prob <= karc4random()) 4376 r = TAILQ_NEXT(r, entries); 4377 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 4378 r = TAILQ_NEXT(r, entries); 4379 else { 4380 if (r->anchor == NULL) { 4381 match = 1; 4382 *rm = r; 4383 *am = a; 4384 *rsm = ruleset; 4385 if ((*rm)->quick) 4386 break; 4387 r = TAILQ_NEXT(r, entries); 4388 } else 4389 pf_step_into_anchor(&asd, &ruleset, 4390 PF_RULESET_FILTER, &r, &a, &match); 4391 } 4392 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 4393 PF_RULESET_FILTER, &r, &a, &match)) 4394 break; 4395 } 4396 r = *rm; 4397 a = *am; 4398 ruleset = *rsm; 4399 4400 REASON_SET(&reason, PFRES_MATCH); 4401 4402 if (r->log) 4403 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, 4404 pd); 4405 4406 if (r->action != PF_PASS) 4407 return (PF_DROP); 4408 4409 if (pf_tag_packet(m, tag, -1)) { 4410 REASON_SET(&reason, PFRES_MEMORY); 4411 return (PF_DROP); 4412 } 4413 4414 return (PF_PASS); 4415 } 4416 4417 /* 4418 * Called with state locked 4419 */ 4420 int 4421 pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, 4422 struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, 4423 struct pf_pdesc *pd, u_short *reason, int *copyback) 4424 { 4425 struct tcphdr *th = pd->hdr.tcp; 4426 u_int16_t win = ntohs(th->th_win); 4427 u_int32_t ack, end, seq, orig_seq; 4428 u_int8_t sws, dws; 4429 int ackskew; 4430 4431 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4432 sws = src->wscale & PF_WSCALE_MASK; 4433 dws = dst->wscale & PF_WSCALE_MASK; 4434 } else { 4435 sws = dws = 0; 4436 } 4437 4438 /* 4439 * Sequence tracking algorithm from Guido van Rooij's paper: 4440 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4441 * tcp_filtering.ps 4442 */ 4443 4444 orig_seq = seq = ntohl(th->th_seq); 4445 if (src->seqlo == 0) { 4446 /* First packet from this end. Set its state */ 4447 4448 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && 4449 src->scrub == NULL) { 4450 if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { 4451 REASON_SET(reason, PFRES_MEMORY); 4452 return (PF_DROP); 4453 } 4454 } 4455 4456 /* Deferred generation of sequence number modulator */ 4457 if (dst->seqdiff && !src->seqdiff) { 4458 /* use random iss for the TCP server */ 4459 while ((src->seqdiff = karc4random() - seq) == 0) 4460 ; 4461 ack = ntohl(th->th_ack) - dst->seqdiff; 4462 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 4463 src->seqdiff), 0); 4464 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 4465 *copyback = 1; 4466 } else { 4467 ack = ntohl(th->th_ack); 4468 } 4469 4470 end = seq + pd->p_len; 4471 if (th->th_flags & TH_SYN) { 4472 end++; 4473 (*state)->sync_flags |= PFSTATE_GOT_SYN2; 4474 if (dst->wscale & PF_WSCALE_FLAG) { 4475 src->wscale = pf_get_wscale(m, off, th->th_off, 4476 pd->af); 4477 if (src->wscale & PF_WSCALE_FLAG) { 4478 /* Remove scale factor from initial 4479 * window */ 4480 sws = src->wscale & PF_WSCALE_MASK; 4481 win = ((u_int32_t)win + (1 << sws) - 1) 4482 >> sws; 4483 dws = dst->wscale & PF_WSCALE_MASK; 4484 } else { 4485 /* fixup other window */ 4486 dst->max_win <<= dst->wscale & 4487 PF_WSCALE_MASK; 4488 /* in case of a retrans SYN|ACK */ 4489 dst->wscale = 0; 4490 } 4491 } 4492 } 4493 if (th->th_flags & TH_FIN) 4494 end++; 4495 4496 src->seqlo = seq; 4497 if (src->state < TCPS_SYN_SENT) 4498 src->state = TCPS_SYN_SENT; 4499 4500 /* 4501 * May need to slide the window (seqhi may have been set by 4502 * the crappy stack check or if we picked up the connection 4503 * after establishment) 4504 */ 4505 if (src->seqhi == 1 || 4506 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4507 src->seqhi = end + MAX(1, dst->max_win << dws); 4508 if (win > src->max_win) 4509 src->max_win = win; 4510 4511 } else { 4512 ack = ntohl(th->th_ack) - dst->seqdiff; 4513 if (src->seqdiff) { 4514 /* Modulate sequence numbers */ 4515 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 4516 src->seqdiff), 0); 4517 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 4518 *copyback = 1; 4519 } 4520 end = seq + pd->p_len; 4521 if (th->th_flags & TH_SYN) 4522 end++; 4523 if (th->th_flags & TH_FIN) 4524 end++; 4525 } 4526 4527 if ((th->th_flags & TH_ACK) == 0) { 4528 /* Let it pass through the ack skew check */ 4529 ack = dst->seqlo; 4530 } else if ((ack == 0 && 4531 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4532 /* broken tcp stacks do not set ack */ 4533 (dst->state < TCPS_SYN_SENT)) { 4534 /* 4535 * Many stacks (ours included) will set the ACK number in an 4536 * FIN|ACK if the SYN times out -- no sequence to ACK. 4537 */ 4538 ack = dst->seqlo; 4539 } 4540 4541 if (seq == end) { 4542 /* Ease sequencing restrictions on no data packets */ 4543 seq = src->seqlo; 4544 end = seq; 4545 } 4546 4547 ackskew = dst->seqlo - ack; 4548 4549 4550 /* 4551 * Need to demodulate the sequence numbers in any TCP SACK options 4552 * (Selective ACK). We could optionally validate the SACK values 4553 * against the current ACK window, either forwards or backwards, but 4554 * I'm not confident that SACK has been implemented properly 4555 * everywhere. It wouldn't surprise me if several stacks accidently 4556 * SACK too far backwards of previously ACKed data. There really aren't 4557 * any security implications of bad SACKing unless the target stack 4558 * doesn't validate the option length correctly. Someone trying to 4559 * spoof into a TCP connection won't bother blindly sending SACK 4560 * options anyway. 4561 */ 4562 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4563 if (pf_modulate_sack(m, off, pd, th, dst)) 4564 *copyback = 1; 4565 } 4566 4567 4568 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4569 if (SEQ_GEQ(src->seqhi, end) && 4570 /* Last octet inside other's window space */ 4571 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4572 /* Retrans: not more than one window back */ 4573 (ackskew >= -MAXACKWINDOW) && 4574 /* Acking not more than one reassembled fragment backwards */ 4575 (ackskew <= (MAXACKWINDOW << sws)) && 4576 /* Acking not more than one window forward */ 4577 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4578 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) 4579 { 4580 /* 4581 * Require an exact/+1 sequence match on resets 4582 * when possible 4583 */ 4584 if (dst->scrub || src->scrub) { 4585 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 4586 *state, src, dst, copyback)) 4587 return (PF_DROP); 4588 } 4589 4590 /* update max window */ 4591 if (src->max_win < win) 4592 src->max_win = win; 4593 /* synchronize sequencing */ 4594 if (SEQ_GT(end, src->seqlo)) 4595 src->seqlo = end; 4596 /* slide the window of what the other end can send */ 4597 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4598 dst->seqhi = ack + MAX((win << sws), 1); 4599 4600 4601 /* update states */ 4602 if (th->th_flags & TH_SYN) 4603 if (src->state < TCPS_SYN_SENT) 4604 src->state = TCPS_SYN_SENT; 4605 if (th->th_flags & TH_FIN) 4606 if (src->state < TCPS_CLOSING) 4607 src->state = TCPS_CLOSING; 4608 if (th->th_flags & TH_ACK) { 4609 if (dst->state == TCPS_SYN_SENT) { 4610 dst->state = TCPS_ESTABLISHED; 4611 if (src->state == TCPS_ESTABLISHED && 4612 (*state)->src_node != NULL && 4613 pf_src_connlimit(*state)) { 4614 REASON_SET(reason, PFRES_SRCLIMIT); 4615 return (PF_DROP); 4616 } 4617 } else if (dst->state == TCPS_CLOSING) 4618 dst->state = TCPS_FIN_WAIT_2; 4619 } 4620 if (th->th_flags & TH_RST) 4621 src->state = dst->state = TCPS_TIME_WAIT; 4622 4623 /* update expire time */ 4624 (*state)->expire = time_second; 4625 if (src->state >= TCPS_FIN_WAIT_2 && 4626 dst->state >= TCPS_FIN_WAIT_2) 4627 (*state)->timeout = PFTM_TCP_CLOSED; 4628 else if (src->state >= TCPS_CLOSING && 4629 dst->state >= TCPS_CLOSING) 4630 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4631 else if (src->state < TCPS_ESTABLISHED || 4632 dst->state < TCPS_ESTABLISHED) 4633 (*state)->timeout = PFTM_TCP_OPENING; 4634 else if (src->state >= TCPS_CLOSING || 4635 dst->state >= TCPS_CLOSING) 4636 (*state)->timeout = PFTM_TCP_CLOSING; 4637 else if ((th->th_flags & TH_SYN) && 4638 ((*state)->state_flags & PFSTATE_SLOPPY)) 4639 (*state)->timeout = PFTM_TCP_FIRST_PACKET; 4640 else 4641 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4642 4643 /* Fall through to PASS packet */ 4644 4645 } else if ((dst->state < TCPS_SYN_SENT || 4646 dst->state >= TCPS_FIN_WAIT_2 || 4647 src->state >= TCPS_FIN_WAIT_2) && 4648 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && 4649 /* Within a window forward of the originating packet */ 4650 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4651 /* Within a window backward of the originating packet */ 4652 4653 /* 4654 * This currently handles three situations: 4655 * 1) Stupid stacks will shotgun SYNs before their peer 4656 * replies. 4657 * 2) When PF catches an already established stream (the 4658 * firewall rebooted, the state table was flushed, routes 4659 * changed...) 4660 * 3) Packets get funky immediately after the connection 4661 * closes (this should catch Solaris spurious ACK|FINs 4662 * that web servers like to spew after a close) 4663 * 4664 * This must be a little more careful than the above code 4665 * since packet floods will also be caught here. We don't 4666 * update the TTL here to mitigate the damage of a packet 4667 * flood and so the same code can handle awkward establishment 4668 * and a loosened connection close. 4669 * In the establishment case, a correct peer response will 4670 * validate the connection, go through the normal state code 4671 * and keep updating the state TTL. 4672 */ 4673 4674 if (pf_status.debug >= PF_DEBUG_MISC) { 4675 kprintf("pf: loose state match: "); 4676 pf_print_state(*state); 4677 pf_print_flags(th->th_flags); 4678 kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4679 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, 4680 ackskew, (unsigned long long)(*state)->packets[0], 4681 (unsigned long long)(*state)->packets[1], 4682 pd->dir == PF_IN ? "in" : "out", 4683 pd->dir == (*state)->direction ? "fwd" : "rev"); 4684 } 4685 4686 if (dst->scrub || src->scrub) { 4687 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 4688 *state, src, dst, copyback)) 4689 return (PF_DROP); 4690 } 4691 4692 /* update max window */ 4693 if (src->max_win < win) 4694 src->max_win = win; 4695 /* synchronize sequencing */ 4696 if (SEQ_GT(end, src->seqlo)) 4697 src->seqlo = end; 4698 /* slide the window of what the other end can send */ 4699 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4700 dst->seqhi = ack + MAX((win << sws), 1); 4701 4702 /* 4703 * Cannot set dst->seqhi here since this could be a shotgunned 4704 * SYN and not an already established connection. 4705 */ 4706 4707 if (th->th_flags & TH_FIN) 4708 if (src->state < TCPS_CLOSING) 4709 src->state = TCPS_CLOSING; 4710 if (th->th_flags & TH_RST) 4711 src->state = dst->state = TCPS_TIME_WAIT; 4712 4713 /* Fall through to PASS packet */ 4714 4715 } else if ((*state)->pickup_mode == PF_PICKUPS_HASHONLY || 4716 ((*state)->pickup_mode == PF_PICKUPS_ENABLED && 4717 ((*state)->sync_flags & PFSTATE_GOT_SYN_MASK) != 4718 PFSTATE_GOT_SYN_MASK)) { 4719 /* 4720 * If pickup mode is hash only, do not fail on sequence checks. 4721 * 4722 * If pickup mode is enabled and we did not see the SYN in 4723 * both direction, do not fail on sequence checks because 4724 * we do not have complete information on window scale. 4725 * 4726 * Adjust expiration and fall through to PASS packet. 4727 * XXX Add a FIN check to reduce timeout? 4728 */ 4729 (*state)->expire = time_second; 4730 } else { 4731 /* 4732 * Failure processing 4733 */ 4734 if ((*state)->dst.state == TCPS_SYN_SENT && 4735 (*state)->src.state == TCPS_SYN_SENT) { 4736 /* Send RST for state mismatches during handshake */ 4737 if (!(th->th_flags & TH_RST)) 4738 pf_send_tcp((*state)->rule.ptr, pd->af, 4739 pd->dst, pd->src, th->th_dport, 4740 th->th_sport, ntohl(th->th_ack), 0, 4741 TH_RST, 0, 0, 4742 (*state)->rule.ptr->return_ttl, 1, 0, 4743 pd->eh, kif->pfik_ifp); 4744 src->seqlo = 0; 4745 src->seqhi = 1; 4746 src->max_win = 1; 4747 } else if (pf_status.debug >= PF_DEBUG_MISC) { 4748 kprintf("pf: BAD state: "); 4749 pf_print_state(*state); 4750 pf_print_flags(th->th_flags); 4751 kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4752 "pkts=%llu:%llu dir=%s,%s\n", 4753 seq, orig_seq, ack, pd->p_len, ackskew, 4754 (unsigned long long)(*state)->packets[0], 4755 (unsigned long long)(*state)->packets[1], 4756 pd->dir == PF_IN ? "in" : "out", 4757 pd->dir == (*state)->direction ? "fwd" : "rev"); 4758 kprintf("pf: State failure on: %c %c %c %c | %c %c\n", 4759 SEQ_GEQ(src->seqhi, end) ? ' ' : '1', 4760 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4761 ' ': '2', 4762 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4763 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4764 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', 4765 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4766 } 4767 REASON_SET(reason, PFRES_BADSTATE); 4768 return (PF_DROP); 4769 } 4770 4771 return (PF_PASS); 4772 } 4773 4774 /* 4775 * Called with state locked 4776 */ 4777 int 4778 pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, 4779 struct pf_state **state, struct pf_pdesc *pd, u_short *reason) 4780 { 4781 struct tcphdr *th = pd->hdr.tcp; 4782 4783 if (th->th_flags & TH_SYN) 4784 if (src->state < TCPS_SYN_SENT) 4785 src->state = TCPS_SYN_SENT; 4786 if (th->th_flags & TH_FIN) 4787 if (src->state < TCPS_CLOSING) 4788 src->state = TCPS_CLOSING; 4789 if (th->th_flags & TH_ACK) { 4790 if (dst->state == TCPS_SYN_SENT) { 4791 dst->state = TCPS_ESTABLISHED; 4792 if (src->state == TCPS_ESTABLISHED && 4793 (*state)->src_node != NULL && 4794 pf_src_connlimit(*state)) { 4795 REASON_SET(reason, PFRES_SRCLIMIT); 4796 return (PF_DROP); 4797 } 4798 } else if (dst->state == TCPS_CLOSING) { 4799 dst->state = TCPS_FIN_WAIT_2; 4800 } else if (src->state == TCPS_SYN_SENT && 4801 dst->state < TCPS_SYN_SENT) { 4802 /* 4803 * Handle a special sloppy case where we only see one 4804 * half of the connection. If there is a ACK after 4805 * the initial SYN without ever seeing a packet from 4806 * the destination, set the connection to established. 4807 */ 4808 dst->state = src->state = TCPS_ESTABLISHED; 4809 if ((*state)->src_node != NULL && 4810 pf_src_connlimit(*state)) { 4811 REASON_SET(reason, PFRES_SRCLIMIT); 4812 return (PF_DROP); 4813 } 4814 } else if (src->state == TCPS_CLOSING && 4815 dst->state == TCPS_ESTABLISHED && 4816 dst->seqlo == 0) { 4817 /* 4818 * Handle the closing of half connections where we 4819 * don't see the full bidirectional FIN/ACK+ACK 4820 * handshake. 4821 */ 4822 dst->state = TCPS_CLOSING; 4823 } 4824 } 4825 if (th->th_flags & TH_RST) 4826 src->state = dst->state = TCPS_TIME_WAIT; 4827 4828 /* update expire time */ 4829 (*state)->expire = time_second; 4830 if (src->state >= TCPS_FIN_WAIT_2 && 4831 dst->state >= TCPS_FIN_WAIT_2) 4832 (*state)->timeout = PFTM_TCP_CLOSED; 4833 else if (src->state >= TCPS_CLOSING && 4834 dst->state >= TCPS_CLOSING) 4835 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4836 else if (src->state < TCPS_ESTABLISHED || 4837 dst->state < TCPS_ESTABLISHED) 4838 (*state)->timeout = PFTM_TCP_OPENING; 4839 else if (src->state >= TCPS_CLOSING || 4840 dst->state >= TCPS_CLOSING) 4841 (*state)->timeout = PFTM_TCP_CLOSING; 4842 else if ((th->th_flags & TH_SYN) && 4843 ((*state)->state_flags & PFSTATE_SLOPPY)) 4844 (*state)->timeout = PFTM_TCP_FIRST_PACKET; 4845 else 4846 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4847 4848 return (PF_PASS); 4849 } 4850 4851 /* 4852 * Test TCP connection state. Caller must hold the state locked. 4853 */ 4854 int 4855 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, 4856 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, 4857 u_short *reason) 4858 { 4859 struct pf_state_key_cmp key; 4860 struct tcphdr *th = pd->hdr.tcp; 4861 int copyback = 0; 4862 int error; 4863 struct pf_state_peer *src, *dst; 4864 struct pf_state_key *sk; 4865 4866 bzero(&key, sizeof(key)); 4867 key.af = pd->af; 4868 key.proto = IPPROTO_TCP; 4869 if (direction == PF_IN) { /* wire side, straight */ 4870 PF_ACPY(&key.addr[0], pd->src, key.af); 4871 PF_ACPY(&key.addr[1], pd->dst, key.af); 4872 key.port[0] = th->th_sport; 4873 key.port[1] = th->th_dport; 4874 if (pf_status.debug >= PF_DEBUG_MISC) { 4875 kprintf("test-tcp IN (%08x:%d) -> (%08x:%d)\n", 4876 ntohl(key.addr[0].addr32[0]), 4877 ntohs(key.port[0]), 4878 ntohl(key.addr[1].addr32[0]), 4879 ntohs(key.port[1])); 4880 } 4881 } else { /* stack side, reverse */ 4882 PF_ACPY(&key.addr[1], pd->src, key.af); 4883 PF_ACPY(&key.addr[0], pd->dst, key.af); 4884 key.port[1] = th->th_sport; 4885 key.port[0] = th->th_dport; 4886 if (pf_status.debug >= PF_DEBUG_MISC) { 4887 kprintf("test-tcp OUT (%08x:%d) <- (%08x:%d)\n", 4888 ntohl(key.addr[0].addr32[0]), 4889 ntohs(key.port[0]), 4890 ntohl(key.addr[1].addr32[0]), 4891 ntohs(key.port[1])); 4892 } 4893 } 4894 4895 STATE_LOOKUP(kif, &key, direction, *state, m); 4896 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 4897 4898 if (direction == (*state)->direction) { 4899 src = &(*state)->src; 4900 dst = &(*state)->dst; 4901 } else { 4902 src = &(*state)->dst; 4903 dst = &(*state)->src; 4904 } 4905 4906 sk = (*state)->key[pd->didx]; 4907 4908 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4909 if (direction != (*state)->direction) { 4910 REASON_SET(reason, PFRES_SYNPROXY); 4911 FAIL (PF_SYNPROXY_DROP); 4912 } 4913 if (th->th_flags & TH_SYN) { 4914 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4915 REASON_SET(reason, PFRES_SYNPROXY); 4916 FAIL (PF_DROP); 4917 } 4918 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4919 pd->src, th->th_dport, th->th_sport, 4920 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4921 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4922 0, NULL, NULL); 4923 REASON_SET(reason, PFRES_SYNPROXY); 4924 FAIL (PF_SYNPROXY_DROP); 4925 } else if (!(th->th_flags & TH_ACK) || 4926 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4927 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4928 REASON_SET(reason, PFRES_SYNPROXY); 4929 FAIL (PF_DROP); 4930 } else if ((*state)->src_node != NULL && 4931 pf_src_connlimit(*state)) { 4932 REASON_SET(reason, PFRES_SRCLIMIT); 4933 FAIL (PF_DROP); 4934 } else 4935 (*state)->src.state = PF_TCPS_PROXY_DST; 4936 } 4937 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4938 if (direction == (*state)->direction) { 4939 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4940 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4941 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4942 REASON_SET(reason, PFRES_SYNPROXY); 4943 FAIL (PF_DROP); 4944 } 4945 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4946 if ((*state)->dst.seqhi == 1) 4947 (*state)->dst.seqhi = htonl(karc4random()); 4948 pf_send_tcp((*state)->rule.ptr, pd->af, 4949 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4950 sk->port[pd->sidx], sk->port[pd->didx], 4951 (*state)->dst.seqhi, 0, TH_SYN, 0, 4952 (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); 4953 REASON_SET(reason, PFRES_SYNPROXY); 4954 FAIL (PF_SYNPROXY_DROP); 4955 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4956 (TH_SYN|TH_ACK)) || 4957 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4958 REASON_SET(reason, PFRES_SYNPROXY); 4959 FAIL (PF_DROP); 4960 } else { 4961 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4962 (*state)->dst.seqlo = ntohl(th->th_seq); 4963 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4964 pd->src, th->th_dport, th->th_sport, 4965 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4966 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4967 (*state)->tag, NULL, NULL); 4968 pf_send_tcp((*state)->rule.ptr, pd->af, 4969 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4970 sk->port[pd->sidx], sk->port[pd->didx], 4971 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4972 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4973 0, NULL, NULL); 4974 (*state)->src.seqdiff = (*state)->dst.seqhi - 4975 (*state)->src.seqlo; 4976 (*state)->dst.seqdiff = (*state)->src.seqhi - 4977 (*state)->dst.seqlo; 4978 (*state)->src.seqhi = (*state)->src.seqlo + 4979 (*state)->dst.max_win; 4980 (*state)->dst.seqhi = (*state)->dst.seqlo + 4981 (*state)->src.max_win; 4982 (*state)->src.wscale = (*state)->dst.wscale = 0; 4983 (*state)->src.state = (*state)->dst.state = 4984 TCPS_ESTABLISHED; 4985 REASON_SET(reason, PFRES_SYNPROXY); 4986 FAIL (PF_SYNPROXY_DROP); 4987 } 4988 } 4989 4990 /* 4991 * Check for connection (addr+port pair) reuse. We can't actually 4992 * unlink the state if we don't own it. 4993 */ 4994 if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && 4995 dst->state >= TCPS_FIN_WAIT_2 && 4996 src->state >= TCPS_FIN_WAIT_2) { 4997 if (pf_status.debug >= PF_DEBUG_MISC) { 4998 kprintf("pf: state reuse "); 4999 pf_print_state(*state); 5000 pf_print_flags(th->th_flags); 5001 kprintf("\n"); 5002 } 5003 /* XXX make sure it's the same direction ?? */ 5004 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 5005 if ((*state)->cpuid == mycpu->gd_cpuid) { 5006 pf_unlink_state(*state); 5007 *state = NULL; 5008 } else { 5009 (*state)->timeout = PFTM_PURGE; 5010 } 5011 FAIL (PF_DROP); 5012 } 5013 5014 if ((*state)->state_flags & PFSTATE_SLOPPY) { 5015 if (pf_tcp_track_sloppy(src, dst, state, pd, 5016 reason) == PF_DROP) { 5017 FAIL (PF_DROP); 5018 } 5019 } else { 5020 if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, 5021 reason, ©back) == PF_DROP) { 5022 FAIL (PF_DROP); 5023 } 5024 } 5025 5026 /* translate source/destination address, if necessary */ 5027 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5028 struct pf_state_key *nk = (*state)->key[pd->didx]; 5029 5030 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 5031 nk->port[pd->sidx] != th->th_sport) { 5032 /* 5033 * The translated source address may be completely 5034 * unrelated to the saved link header, make sure 5035 * a bridge doesn't try to use it. 5036 */ 5037 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 5038 pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, 5039 &th->th_sum, &nk->addr[pd->sidx], 5040 nk->port[pd->sidx], 0, pd->af); 5041 } 5042 5043 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 5044 nk->port[pd->didx] != th->th_dport) { 5045 /* 5046 * If we don't redispatch the packet will go into 5047 * the protocol stack on the wrong cpu for the 5048 * post-translated address. 5049 */ 5050 pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, 5051 &th->th_sum, &nk->addr[pd->didx], 5052 nk->port[pd->didx], 0, pd->af); 5053 } 5054 copyback = 1; 5055 } 5056 5057 /* Copyback sequence modulation or stateful scrub changes if needed */ 5058 if (copyback) { 5059 m->m_flags &= ~M_HASH; 5060 m_copyback(m, off, sizeof(*th), (caddr_t)th); 5061 } 5062 5063 pfsync_update_state(*state); 5064 error = PF_PASS; 5065 done: 5066 if (*state) 5067 lockmgr(&(*state)->lk, LK_RELEASE); 5068 return (error); 5069 } 5070 5071 /* 5072 * Test UDP connection state. Caller must hold the state locked. 5073 */ 5074 int 5075 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, 5076 struct mbuf *m, int off, void *h, struct pf_pdesc *pd) 5077 { 5078 struct pf_state_peer *src, *dst; 5079 struct pf_state_key_cmp key; 5080 struct udphdr *uh = pd->hdr.udp; 5081 5082 bzero(&key, sizeof(key)); 5083 key.af = pd->af; 5084 key.proto = IPPROTO_UDP; 5085 if (direction == PF_IN) { /* wire side, straight */ 5086 PF_ACPY(&key.addr[0], pd->src, key.af); 5087 PF_ACPY(&key.addr[1], pd->dst, key.af); 5088 key.port[0] = uh->uh_sport; 5089 key.port[1] = uh->uh_dport; 5090 } else { /* stack side, reverse */ 5091 PF_ACPY(&key.addr[1], pd->src, key.af); 5092 PF_ACPY(&key.addr[0], pd->dst, key.af); 5093 key.port[1] = uh->uh_sport; 5094 key.port[0] = uh->uh_dport; 5095 } 5096 5097 STATE_LOOKUP(kif, &key, direction, *state, m); 5098 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5099 5100 if (direction == (*state)->direction) { 5101 src = &(*state)->src; 5102 dst = &(*state)->dst; 5103 } else { 5104 src = &(*state)->dst; 5105 dst = &(*state)->src; 5106 } 5107 5108 /* update states */ 5109 if (src->state < PFUDPS_SINGLE) 5110 src->state = PFUDPS_SINGLE; 5111 if (dst->state == PFUDPS_SINGLE) 5112 dst->state = PFUDPS_MULTIPLE; 5113 5114 /* update expire time */ 5115 (*state)->expire = time_second; 5116 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) 5117 (*state)->timeout = PFTM_UDP_MULTIPLE; 5118 else 5119 (*state)->timeout = PFTM_UDP_SINGLE; 5120 5121 /* translate source/destination address, if necessary */ 5122 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5123 struct pf_state_key *nk = (*state)->key[pd->didx]; 5124 5125 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 5126 nk->port[pd->sidx] != uh->uh_sport) { 5127 /* 5128 * The translated source address may be completely 5129 * unrelated to the saved link header, make sure 5130 * a bridge doesn't try to use it. 5131 */ 5132 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 5133 m->m_flags &= ~M_HASH; 5134 pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, 5135 &uh->uh_sum, &nk->addr[pd->sidx], 5136 nk->port[pd->sidx], 1, pd->af); 5137 } 5138 5139 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 5140 nk->port[pd->didx] != uh->uh_dport) { 5141 /* 5142 * If we don't redispatch the packet will go into 5143 * the protocol stack on the wrong cpu for the 5144 * post-translated address. 5145 */ 5146 m->m_flags &= ~M_HASH; 5147 pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, 5148 &uh->uh_sum, &nk->addr[pd->didx], 5149 nk->port[pd->didx], 1, pd->af); 5150 } 5151 m_copyback(m, off, sizeof(*uh), (caddr_t)uh); 5152 } 5153 5154 pfsync_update_state(*state); 5155 lockmgr(&(*state)->lk, LK_RELEASE); 5156 return (PF_PASS); 5157 } 5158 5159 /* 5160 * Test ICMP connection state. Caller must hold the state locked. 5161 */ 5162 int 5163 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, 5164 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, 5165 u_short *reason) 5166 { 5167 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 5168 u_int16_t icmpid = 0, *icmpsum = NULL; 5169 u_int8_t icmptype = 0; 5170 int state_icmp = 0; 5171 int error; 5172 struct pf_state_key_cmp key; 5173 5174 bzero(&key, sizeof(key)); 5175 5176 switch (pd->proto) { 5177 #ifdef INET 5178 case IPPROTO_ICMP: 5179 icmptype = pd->hdr.icmp->icmp_type; 5180 icmpid = pd->hdr.icmp->icmp_id; 5181 icmpsum = &pd->hdr.icmp->icmp_cksum; 5182 5183 if (icmptype == ICMP_UNREACH || 5184 icmptype == ICMP_SOURCEQUENCH || 5185 icmptype == ICMP_REDIRECT || 5186 icmptype == ICMP_TIMXCEED || 5187 icmptype == ICMP_PARAMPROB) 5188 state_icmp++; 5189 break; 5190 #endif /* INET */ 5191 #ifdef INET6 5192 case IPPROTO_ICMPV6: 5193 icmptype = pd->hdr.icmp6->icmp6_type; 5194 icmpid = pd->hdr.icmp6->icmp6_id; 5195 icmpsum = &pd->hdr.icmp6->icmp6_cksum; 5196 5197 if (icmptype == ICMP6_DST_UNREACH || 5198 icmptype == ICMP6_PACKET_TOO_BIG || 5199 icmptype == ICMP6_TIME_EXCEEDED || 5200 icmptype == ICMP6_PARAM_PROB) 5201 state_icmp++; 5202 break; 5203 #endif /* INET6 */ 5204 } 5205 5206 if (!state_icmp) { 5207 5208 /* 5209 * ICMP query/reply message not related to a TCP/UDP packet. 5210 * Search for an ICMP state. 5211 */ 5212 key.af = pd->af; 5213 key.proto = pd->proto; 5214 key.port[0] = key.port[1] = icmpid; 5215 if (direction == PF_IN) { /* wire side, straight */ 5216 PF_ACPY(&key.addr[0], pd->src, key.af); 5217 PF_ACPY(&key.addr[1], pd->dst, key.af); 5218 } else { /* stack side, reverse */ 5219 PF_ACPY(&key.addr[1], pd->src, key.af); 5220 PF_ACPY(&key.addr[0], pd->dst, key.af); 5221 } 5222 5223 STATE_LOOKUP(kif, &key, direction, *state, m); 5224 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5225 5226 (*state)->expire = time_second; 5227 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5228 5229 /* translate source/destination address, if necessary */ 5230 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5231 struct pf_state_key *nk = (*state)->key[pd->didx]; 5232 5233 switch (pd->af) { 5234 #ifdef INET 5235 case AF_INET: 5236 if (PF_ANEQ(pd->src, 5237 &nk->addr[pd->sidx], AF_INET)) 5238 pf_change_a(&saddr->v4.s_addr, 5239 pd->ip_sum, 5240 nk->addr[pd->sidx].v4.s_addr, 0); 5241 5242 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], 5243 AF_INET)) 5244 pf_change_a(&daddr->v4.s_addr, 5245 pd->ip_sum, 5246 nk->addr[pd->didx].v4.s_addr, 0); 5247 5248 if (nk->port[0] != 5249 pd->hdr.icmp->icmp_id) { 5250 pd->hdr.icmp->icmp_cksum = 5251 pf_cksum_fixup( 5252 pd->hdr.icmp->icmp_cksum, icmpid, 5253 nk->port[pd->sidx], 0); 5254 pd->hdr.icmp->icmp_id = 5255 nk->port[pd->sidx]; 5256 } 5257 5258 m->m_flags &= ~M_HASH; 5259 m_copyback(m, off, ICMP_MINLEN, 5260 (caddr_t)pd->hdr.icmp); 5261 break; 5262 #endif /* INET */ 5263 #ifdef INET6 5264 case AF_INET6: 5265 if (PF_ANEQ(pd->src, 5266 &nk->addr[pd->sidx], AF_INET6)) 5267 pf_change_a6(saddr, 5268 &pd->hdr.icmp6->icmp6_cksum, 5269 &nk->addr[pd->sidx], 0); 5270 5271 if (PF_ANEQ(pd->dst, 5272 &nk->addr[pd->didx], AF_INET6)) 5273 pf_change_a6(daddr, 5274 &pd->hdr.icmp6->icmp6_cksum, 5275 &nk->addr[pd->didx], 0); 5276 5277 m->m_flags &= ~M_HASH; 5278 m_copyback(m, off, 5279 sizeof(struct icmp6_hdr), 5280 (caddr_t)pd->hdr.icmp6); 5281 break; 5282 #endif /* INET6 */ 5283 } 5284 } 5285 } else { 5286 /* 5287 * ICMP error message in response to a TCP/UDP packet. 5288 * Extract the inner TCP/UDP header and search for that state. 5289 */ 5290 5291 struct pf_pdesc pd2; 5292 #ifdef INET 5293 struct ip h2; 5294 #endif /* INET */ 5295 #ifdef INET6 5296 struct ip6_hdr h2_6; 5297 int terminal = 0; 5298 #endif /* INET6 */ 5299 int ipoff2; 5300 int off2; 5301 5302 pd2.not_cpu_localized = 1; 5303 pd2.af = pd->af; 5304 /* Payload packet is from the opposite direction. */ 5305 pd2.sidx = (direction == PF_IN) ? 1 : 0; 5306 pd2.didx = (direction == PF_IN) ? 0 : 1; 5307 switch (pd->af) { 5308 #ifdef INET 5309 case AF_INET: 5310 /* offset of h2 in mbuf chain */ 5311 ipoff2 = off + ICMP_MINLEN; 5312 5313 if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), 5314 NULL, reason, pd2.af)) { 5315 DPFPRINTF(PF_DEBUG_MISC, 5316 ("pf: ICMP error message too short " 5317 "(ip)\n")); 5318 FAIL (PF_DROP); 5319 } 5320 /* 5321 * ICMP error messages don't refer to non-first 5322 * fragments 5323 */ 5324 if (h2.ip_off & htons(IP_OFFMASK)) { 5325 REASON_SET(reason, PFRES_FRAG); 5326 FAIL (PF_DROP); 5327 } 5328 5329 /* offset of protocol header that follows h2 */ 5330 off2 = ipoff2 + (h2.ip_hl << 2); 5331 5332 pd2.proto = h2.ip_p; 5333 pd2.src = (struct pf_addr *)&h2.ip_src; 5334 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5335 pd2.ip_sum = &h2.ip_sum; 5336 break; 5337 #endif /* INET */ 5338 #ifdef INET6 5339 case AF_INET6: 5340 ipoff2 = off + sizeof(struct icmp6_hdr); 5341 5342 if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), 5343 NULL, reason, pd2.af)) { 5344 DPFPRINTF(PF_DEBUG_MISC, 5345 ("pf: ICMP error message too short " 5346 "(ip6)\n")); 5347 FAIL (PF_DROP); 5348 } 5349 pd2.proto = h2_6.ip6_nxt; 5350 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5351 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5352 pd2.ip_sum = NULL; 5353 off2 = ipoff2 + sizeof(h2_6); 5354 do { 5355 switch (pd2.proto) { 5356 case IPPROTO_FRAGMENT: 5357 /* 5358 * ICMPv6 error messages for 5359 * non-first fragments 5360 */ 5361 REASON_SET(reason, PFRES_FRAG); 5362 FAIL (PF_DROP); 5363 case IPPROTO_AH: 5364 case IPPROTO_HOPOPTS: 5365 case IPPROTO_ROUTING: 5366 case IPPROTO_DSTOPTS: { 5367 /* get next header and header length */ 5368 struct ip6_ext opt6; 5369 5370 if (!pf_pull_hdr(m, off2, &opt6, 5371 sizeof(opt6), NULL, reason, 5372 pd2.af)) { 5373 DPFPRINTF(PF_DEBUG_MISC, 5374 ("pf: ICMPv6 short opt\n")); 5375 FAIL (PF_DROP); 5376 } 5377 if (pd2.proto == IPPROTO_AH) 5378 off2 += (opt6.ip6e_len + 2) * 4; 5379 else 5380 off2 += (opt6.ip6e_len + 1) * 8; 5381 pd2.proto = opt6.ip6e_nxt; 5382 /* goto the next header */ 5383 break; 5384 } 5385 default: 5386 terminal++; 5387 break; 5388 } 5389 } while (!terminal); 5390 break; 5391 #endif /* INET6 */ 5392 default: 5393 DPFPRINTF(PF_DEBUG_MISC, 5394 ("pf: ICMP AF %d unknown (ip6)\n", pd->af)); 5395 FAIL (PF_DROP); 5396 break; 5397 } 5398 5399 switch (pd2.proto) { 5400 case IPPROTO_TCP: { 5401 struct tcphdr th; 5402 u_int32_t seq; 5403 struct pf_state_peer *src, *dst; 5404 u_int8_t dws; 5405 int copyback = 0; 5406 5407 /* 5408 * Only the first 8 bytes of the TCP header can be 5409 * expected. Don't access any TCP header fields after 5410 * th_seq, an ackskew test is not possible. 5411 */ 5412 if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, 5413 pd2.af)) { 5414 DPFPRINTF(PF_DEBUG_MISC, 5415 ("pf: ICMP error message too short " 5416 "(tcp)\n")); 5417 FAIL (PF_DROP); 5418 } 5419 5420 key.af = pd2.af; 5421 key.proto = IPPROTO_TCP; 5422 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5423 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5424 key.port[pd2.sidx] = th.th_sport; 5425 key.port[pd2.didx] = th.th_dport; 5426 5427 STATE_LOOKUP(kif, &key, direction, *state, m); 5428 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5429 5430 if (direction == (*state)->direction) { 5431 src = &(*state)->dst; 5432 dst = &(*state)->src; 5433 } else { 5434 src = &(*state)->src; 5435 dst = &(*state)->dst; 5436 } 5437 5438 if (src->wscale && dst->wscale) 5439 dws = dst->wscale & PF_WSCALE_MASK; 5440 else 5441 dws = 0; 5442 5443 /* Demodulate sequence number */ 5444 seq = ntohl(th.th_seq) - src->seqdiff; 5445 if (src->seqdiff) { 5446 pf_change_a(&th.th_seq, icmpsum, 5447 htonl(seq), 0); 5448 copyback = 1; 5449 } 5450 5451 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5452 (!SEQ_GEQ(src->seqhi, seq) || 5453 !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { 5454 if (pf_status.debug >= PF_DEBUG_MISC) { 5455 kprintf("pf: BAD ICMP %d:%d ", 5456 icmptype, pd->hdr.icmp->icmp_code); 5457 pf_print_host(pd->src, 0, pd->af); 5458 kprintf(" -> "); 5459 pf_print_host(pd->dst, 0, pd->af); 5460 kprintf(" state: "); 5461 pf_print_state(*state); 5462 kprintf(" seq=%u\n", seq); 5463 } 5464 REASON_SET(reason, PFRES_BADSTATE); 5465 FAIL (PF_DROP); 5466 } else { 5467 if (pf_status.debug >= PF_DEBUG_MISC) { 5468 kprintf("pf: OK ICMP %d:%d ", 5469 icmptype, pd->hdr.icmp->icmp_code); 5470 pf_print_host(pd->src, 0, pd->af); 5471 kprintf(" -> "); 5472 pf_print_host(pd->dst, 0, pd->af); 5473 kprintf(" state: "); 5474 pf_print_state(*state); 5475 kprintf(" seq=%u\n", seq); 5476 } 5477 } 5478 5479 /* translate source/destination address, if necessary */ 5480 if ((*state)->key[PF_SK_WIRE] != 5481 (*state)->key[PF_SK_STACK]) { 5482 struct pf_state_key *nk = 5483 (*state)->key[pd->didx]; 5484 5485 if (PF_ANEQ(pd2.src, 5486 &nk->addr[pd2.sidx], pd2.af) || 5487 nk->port[pd2.sidx] != th.th_sport) 5488 pf_change_icmp(pd2.src, &th.th_sport, 5489 daddr, &nk->addr[pd2.sidx], 5490 nk->port[pd2.sidx], NULL, 5491 pd2.ip_sum, icmpsum, 5492 pd->ip_sum, 0, pd2.af); 5493 5494 if (PF_ANEQ(pd2.dst, 5495 &nk->addr[pd2.didx], pd2.af) || 5496 nk->port[pd2.didx] != th.th_dport) 5497 pf_change_icmp(pd2.dst, &th.th_dport, 5498 NULL, /* XXX Inbound NAT? */ 5499 &nk->addr[pd2.didx], 5500 nk->port[pd2.didx], NULL, 5501 pd2.ip_sum, icmpsum, 5502 pd->ip_sum, 0, pd2.af); 5503 copyback = 1; 5504 } 5505 5506 if (copyback) { 5507 switch (pd2.af) { 5508 #ifdef INET 5509 case AF_INET: 5510 m_copyback(m, off, ICMP_MINLEN, 5511 (caddr_t)pd->hdr.icmp); 5512 m_copyback(m, ipoff2, sizeof(h2), 5513 (caddr_t)&h2); 5514 break; 5515 #endif /* INET */ 5516 #ifdef INET6 5517 case AF_INET6: 5518 m_copyback(m, off, 5519 sizeof(struct icmp6_hdr), 5520 (caddr_t)pd->hdr.icmp6); 5521 m_copyback(m, ipoff2, sizeof(h2_6), 5522 (caddr_t)&h2_6); 5523 break; 5524 #endif /* INET6 */ 5525 } 5526 m->m_flags &= ~M_HASH; 5527 m_copyback(m, off2, 8, (caddr_t)&th); 5528 } 5529 break; 5530 } 5531 case IPPROTO_UDP: { 5532 struct udphdr uh; 5533 5534 if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), 5535 NULL, reason, pd2.af)) { 5536 DPFPRINTF(PF_DEBUG_MISC, 5537 ("pf: ICMP error message too short " 5538 "(udp)\n")); 5539 return (PF_DROP); 5540 } 5541 5542 key.af = pd2.af; 5543 key.proto = IPPROTO_UDP; 5544 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5545 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5546 key.port[pd2.sidx] = uh.uh_sport; 5547 key.port[pd2.didx] = uh.uh_dport; 5548 5549 STATE_LOOKUP(kif, &key, direction, *state, m); 5550 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5551 5552 /* translate source/destination address, if necessary */ 5553 if ((*state)->key[PF_SK_WIRE] != 5554 (*state)->key[PF_SK_STACK]) { 5555 struct pf_state_key *nk = 5556 (*state)->key[pd->didx]; 5557 5558 if (PF_ANEQ(pd2.src, 5559 &nk->addr[pd2.sidx], pd2.af) || 5560 nk->port[pd2.sidx] != uh.uh_sport) 5561 pf_change_icmp(pd2.src, &uh.uh_sport, 5562 daddr, &nk->addr[pd2.sidx], 5563 nk->port[pd2.sidx], &uh.uh_sum, 5564 pd2.ip_sum, icmpsum, 5565 pd->ip_sum, 1, pd2.af); 5566 5567 if (PF_ANEQ(pd2.dst, 5568 &nk->addr[pd2.didx], pd2.af) || 5569 nk->port[pd2.didx] != uh.uh_dport) 5570 pf_change_icmp(pd2.dst, &uh.uh_dport, 5571 NULL, /* XXX Inbound NAT? */ 5572 &nk->addr[pd2.didx], 5573 nk->port[pd2.didx], &uh.uh_sum, 5574 pd2.ip_sum, icmpsum, 5575 pd->ip_sum, 1, pd2.af); 5576 5577 switch (pd2.af) { 5578 #ifdef INET 5579 case AF_INET: 5580 m_copyback(m, off, ICMP_MINLEN, 5581 (caddr_t)pd->hdr.icmp); 5582 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 5583 break; 5584 #endif /* INET */ 5585 #ifdef INET6 5586 case AF_INET6: 5587 m_copyback(m, off, 5588 sizeof(struct icmp6_hdr), 5589 (caddr_t)pd->hdr.icmp6); 5590 m_copyback(m, ipoff2, sizeof(h2_6), 5591 (caddr_t)&h2_6); 5592 break; 5593 #endif /* INET6 */ 5594 } 5595 m->m_flags &= ~M_HASH; 5596 m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); 5597 } 5598 break; 5599 } 5600 #ifdef INET 5601 case IPPROTO_ICMP: { 5602 struct icmp iih; 5603 5604 if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, 5605 NULL, reason, pd2.af)) { 5606 DPFPRINTF(PF_DEBUG_MISC, 5607 ("pf: ICMP error message too short i" 5608 "(icmp)\n")); 5609 return (PF_DROP); 5610 } 5611 5612 key.af = pd2.af; 5613 key.proto = IPPROTO_ICMP; 5614 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5615 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5616 key.port[0] = key.port[1] = iih.icmp_id; 5617 5618 STATE_LOOKUP(kif, &key, direction, *state, m); 5619 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5620 5621 /* translate source/destination address, if necessary */ 5622 if ((*state)->key[PF_SK_WIRE] != 5623 (*state)->key[PF_SK_STACK]) { 5624 struct pf_state_key *nk = 5625 (*state)->key[pd->didx]; 5626 5627 if (PF_ANEQ(pd2.src, 5628 &nk->addr[pd2.sidx], pd2.af) || 5629 nk->port[pd2.sidx] != iih.icmp_id) 5630 pf_change_icmp(pd2.src, &iih.icmp_id, 5631 daddr, &nk->addr[pd2.sidx], 5632 nk->port[pd2.sidx], NULL, 5633 pd2.ip_sum, icmpsum, 5634 pd->ip_sum, 0, AF_INET); 5635 5636 if (PF_ANEQ(pd2.dst, 5637 &nk->addr[pd2.didx], pd2.af) || 5638 nk->port[pd2.didx] != iih.icmp_id) 5639 pf_change_icmp(pd2.dst, &iih.icmp_id, 5640 NULL, /* XXX Inbound NAT? */ 5641 &nk->addr[pd2.didx], 5642 nk->port[pd2.didx], NULL, 5643 pd2.ip_sum, icmpsum, 5644 pd->ip_sum, 0, AF_INET); 5645 5646 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); 5647 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 5648 m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); 5649 m->m_flags &= ~M_HASH; 5650 } 5651 break; 5652 } 5653 #endif /* INET */ 5654 #ifdef INET6 5655 case IPPROTO_ICMPV6: { 5656 struct icmp6_hdr iih; 5657 5658 if (!pf_pull_hdr(m, off2, &iih, 5659 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5660 DPFPRINTF(PF_DEBUG_MISC, 5661 ("pf: ICMP error message too short " 5662 "(icmp6)\n")); 5663 FAIL (PF_DROP); 5664 } 5665 5666 key.af = pd2.af; 5667 key.proto = IPPROTO_ICMPV6; 5668 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5669 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5670 key.port[0] = key.port[1] = iih.icmp6_id; 5671 5672 STATE_LOOKUP(kif, &key, direction, *state, m); 5673 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5674 5675 /* translate source/destination address, if necessary */ 5676 if ((*state)->key[PF_SK_WIRE] != 5677 (*state)->key[PF_SK_STACK]) { 5678 struct pf_state_key *nk = 5679 (*state)->key[pd->didx]; 5680 5681 if (PF_ANEQ(pd2.src, 5682 &nk->addr[pd2.sidx], pd2.af) || 5683 nk->port[pd2.sidx] != iih.icmp6_id) 5684 pf_change_icmp(pd2.src, &iih.icmp6_id, 5685 daddr, &nk->addr[pd2.sidx], 5686 nk->port[pd2.sidx], NULL, 5687 pd2.ip_sum, icmpsum, 5688 pd->ip_sum, 0, AF_INET6); 5689 5690 if (PF_ANEQ(pd2.dst, 5691 &nk->addr[pd2.didx], pd2.af) || 5692 nk->port[pd2.didx] != iih.icmp6_id) 5693 pf_change_icmp(pd2.dst, &iih.icmp6_id, 5694 NULL, /* XXX Inbound NAT? */ 5695 &nk->addr[pd2.didx], 5696 nk->port[pd2.didx], NULL, 5697 pd2.ip_sum, icmpsum, 5698 pd->ip_sum, 0, AF_INET6); 5699 5700 m_copyback(m, off, sizeof(struct icmp6_hdr), 5701 (caddr_t)pd->hdr.icmp6); 5702 m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); 5703 m_copyback(m, off2, sizeof(struct icmp6_hdr), 5704 (caddr_t)&iih); 5705 m->m_flags &= ~M_HASH; 5706 } 5707 break; 5708 } 5709 #endif /* INET6 */ 5710 default: { 5711 key.af = pd2.af; 5712 key.proto = pd2.proto; 5713 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5714 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5715 key.port[0] = key.port[1] = 0; 5716 5717 STATE_LOOKUP(kif, &key, direction, *state, m); 5718 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5719 5720 /* translate source/destination address, if necessary */ 5721 if ((*state)->key[PF_SK_WIRE] != 5722 (*state)->key[PF_SK_STACK]) { 5723 struct pf_state_key *nk = 5724 (*state)->key[pd->didx]; 5725 5726 if (PF_ANEQ(pd2.src, 5727 &nk->addr[pd2.sidx], pd2.af)) 5728 pf_change_icmp(pd2.src, NULL, daddr, 5729 &nk->addr[pd2.sidx], 0, NULL, 5730 pd2.ip_sum, icmpsum, 5731 pd->ip_sum, 0, pd2.af); 5732 5733 if (PF_ANEQ(pd2.dst, 5734 &nk->addr[pd2.didx], pd2.af)) 5735 pf_change_icmp(pd2.src, NULL, 5736 NULL, /* XXX Inbound NAT? */ 5737 &nk->addr[pd2.didx], 0, NULL, 5738 pd2.ip_sum, icmpsum, 5739 pd->ip_sum, 0, pd2.af); 5740 5741 switch (pd2.af) { 5742 #ifdef INET 5743 case AF_INET: 5744 m_copyback(m, off, ICMP_MINLEN, 5745 (caddr_t)pd->hdr.icmp); 5746 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 5747 m->m_flags &= ~M_HASH; 5748 break; 5749 #endif /* INET */ 5750 #ifdef INET6 5751 case AF_INET6: 5752 m_copyback(m, off, 5753 sizeof(struct icmp6_hdr), 5754 (caddr_t)pd->hdr.icmp6); 5755 m_copyback(m, ipoff2, sizeof(h2_6), 5756 (caddr_t)&h2_6); 5757 m->m_flags &= ~M_HASH; 5758 break; 5759 #endif /* INET6 */ 5760 } 5761 } 5762 break; 5763 } 5764 } 5765 } 5766 5767 pfsync_update_state(*state); 5768 error = PF_PASS; 5769 done: 5770 if (*state) 5771 lockmgr(&(*state)->lk, LK_RELEASE); 5772 return (error); 5773 } 5774 5775 /* 5776 * Test other connection state. Caller must hold the state locked. 5777 */ 5778 int 5779 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, 5780 struct mbuf *m, struct pf_pdesc *pd) 5781 { 5782 struct pf_state_peer *src, *dst; 5783 struct pf_state_key_cmp key; 5784 5785 bzero(&key, sizeof(key)); 5786 key.af = pd->af; 5787 key.proto = pd->proto; 5788 if (direction == PF_IN) { 5789 PF_ACPY(&key.addr[0], pd->src, key.af); 5790 PF_ACPY(&key.addr[1], pd->dst, key.af); 5791 key.port[0] = key.port[1] = 0; 5792 } else { 5793 PF_ACPY(&key.addr[1], pd->src, key.af); 5794 PF_ACPY(&key.addr[0], pd->dst, key.af); 5795 key.port[1] = key.port[0] = 0; 5796 } 5797 5798 STATE_LOOKUP(kif, &key, direction, *state, m); 5799 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5800 5801 if (direction == (*state)->direction) { 5802 src = &(*state)->src; 5803 dst = &(*state)->dst; 5804 } else { 5805 src = &(*state)->dst; 5806 dst = &(*state)->src; 5807 } 5808 5809 /* update states */ 5810 if (src->state < PFOTHERS_SINGLE) 5811 src->state = PFOTHERS_SINGLE; 5812 if (dst->state == PFOTHERS_SINGLE) 5813 dst->state = PFOTHERS_MULTIPLE; 5814 5815 /* update expire time */ 5816 (*state)->expire = time_second; 5817 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) 5818 (*state)->timeout = PFTM_OTHER_MULTIPLE; 5819 else 5820 (*state)->timeout = PFTM_OTHER_SINGLE; 5821 5822 /* translate source/destination address, if necessary */ 5823 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5824 struct pf_state_key *nk = (*state)->key[pd->didx]; 5825 5826 KKASSERT(nk); 5827 KKASSERT(pd); 5828 KKASSERT(pd->src); 5829 KKASSERT(pd->dst); 5830 switch (pd->af) { 5831 #ifdef INET 5832 case AF_INET: 5833 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) 5834 pf_change_a(&pd->src->v4.s_addr, 5835 pd->ip_sum, 5836 nk->addr[pd->sidx].v4.s_addr, 5837 0); 5838 5839 5840 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) 5841 pf_change_a(&pd->dst->v4.s_addr, 5842 pd->ip_sum, 5843 nk->addr[pd->didx].v4.s_addr, 5844 0); 5845 5846 break; 5847 #endif /* INET */ 5848 #ifdef INET6 5849 case AF_INET6: 5850 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET6)) 5851 PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); 5852 5853 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET6)) 5854 PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); 5855 #endif /* INET6 */ 5856 } 5857 } 5858 5859 pfsync_update_state(*state); 5860 lockmgr(&(*state)->lk, LK_RELEASE); 5861 return (PF_PASS); 5862 } 5863 5864 /* 5865 * ipoff and off are measured from the start of the mbuf chain. 5866 * h must be at "ipoff" on the mbuf chain. 5867 */ 5868 void * 5869 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5870 u_short *actionp, u_short *reasonp, sa_family_t af) 5871 { 5872 switch (af) { 5873 #ifdef INET 5874 case AF_INET: { 5875 struct ip *h = mtod(m, struct ip *); 5876 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5877 5878 if (fragoff) { 5879 if (fragoff >= len) 5880 ACTION_SET(actionp, PF_PASS); 5881 else { 5882 ACTION_SET(actionp, PF_DROP); 5883 REASON_SET(reasonp, PFRES_FRAG); 5884 } 5885 return (NULL); 5886 } 5887 if (m->m_pkthdr.len < off + len || 5888 ntohs(h->ip_len) < off + len) { 5889 ACTION_SET(actionp, PF_DROP); 5890 REASON_SET(reasonp, PFRES_SHORT); 5891 return (NULL); 5892 } 5893 break; 5894 } 5895 #endif /* INET */ 5896 #ifdef INET6 5897 case AF_INET6: { 5898 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5899 5900 if (m->m_pkthdr.len < off + len || 5901 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < 5902 (unsigned)(off + len)) { 5903 ACTION_SET(actionp, PF_DROP); 5904 REASON_SET(reasonp, PFRES_SHORT); 5905 return (NULL); 5906 } 5907 break; 5908 } 5909 #endif /* INET6 */ 5910 } 5911 m_copydata(m, off, len, p); 5912 return (p); 5913 } 5914 5915 int 5916 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) 5917 { 5918 struct sockaddr_in *dst; 5919 int ret = 1; 5920 int check_mpath; 5921 #ifdef INET6 5922 struct sockaddr_in6 *dst6; 5923 struct route_in6 ro; 5924 #else 5925 struct route ro; 5926 #endif 5927 struct radix_node *rn; 5928 struct rtentry *rt; 5929 struct ifnet *ifp; 5930 5931 check_mpath = 0; 5932 bzero(&ro, sizeof(ro)); 5933 switch (af) { 5934 case AF_INET: 5935 dst = satosin(&ro.ro_dst); 5936 dst->sin_family = AF_INET; 5937 dst->sin_len = sizeof(*dst); 5938 dst->sin_addr = addr->v4; 5939 break; 5940 #ifdef INET6 5941 case AF_INET6: 5942 /* 5943 * Skip check for addresses with embedded interface scope, 5944 * as they would always match anyway. 5945 */ 5946 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5947 goto out; 5948 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 5949 dst6->sin6_family = AF_INET6; 5950 dst6->sin6_len = sizeof(*dst6); 5951 dst6->sin6_addr = addr->v6; 5952 break; 5953 #endif /* INET6 */ 5954 default: 5955 return (0); 5956 } 5957 5958 /* Skip checks for ipsec interfaces */ 5959 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5960 goto out; 5961 5962 rtalloc_ign((struct route *)&ro, 0); 5963 5964 if (ro.ro_rt != NULL) { 5965 /* No interface given, this is a no-route check */ 5966 if (kif == NULL) 5967 goto out; 5968 5969 if (kif->pfik_ifp == NULL) { 5970 ret = 0; 5971 goto out; 5972 } 5973 5974 /* Perform uRPF check if passed input interface */ 5975 ret = 0; 5976 rn = (struct radix_node *)ro.ro_rt; 5977 do { 5978 rt = (struct rtentry *)rn; 5979 ifp = rt->rt_ifp; 5980 5981 if (kif->pfik_ifp == ifp) 5982 ret = 1; 5983 rn = NULL; 5984 } while (check_mpath == 1 && rn != NULL && ret == 0); 5985 } else 5986 ret = 0; 5987 out: 5988 if (ro.ro_rt != NULL) 5989 RTFREE(ro.ro_rt); 5990 return (ret); 5991 } 5992 5993 int 5994 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) 5995 { 5996 struct sockaddr_in *dst; 5997 #ifdef INET6 5998 struct sockaddr_in6 *dst6; 5999 struct route_in6 ro; 6000 #else 6001 struct route ro; 6002 #endif 6003 int ret = 0; 6004 6005 ASSERT_LWKT_TOKEN_HELD(&pf_token); 6006 6007 bzero(&ro, sizeof(ro)); 6008 switch (af) { 6009 case AF_INET: 6010 dst = satosin(&ro.ro_dst); 6011 dst->sin_family = AF_INET; 6012 dst->sin_len = sizeof(*dst); 6013 dst->sin_addr = addr->v4; 6014 break; 6015 #ifdef INET6 6016 case AF_INET6: 6017 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 6018 dst6->sin6_family = AF_INET6; 6019 dst6->sin6_len = sizeof(*dst6); 6020 dst6->sin6_addr = addr->v6; 6021 break; 6022 #endif /* INET6 */ 6023 default: 6024 return (0); 6025 } 6026 6027 rtalloc_ign((struct route *)&ro, (RTF_CLONING | RTF_PRCLONING)); 6028 6029 if (ro.ro_rt != NULL) { 6030 RTFREE(ro.ro_rt); 6031 } 6032 6033 return (ret); 6034 } 6035 6036 #ifdef INET 6037 void 6038 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 6039 struct pf_state *s, struct pf_pdesc *pd) 6040 { 6041 struct mbuf *m0, *m1; 6042 struct route iproute; 6043 struct route *ro = NULL; 6044 struct sockaddr_in *dst; 6045 struct ip *ip; 6046 struct ifnet *ifp = NULL; 6047 struct pf_addr naddr; 6048 struct pf_src_node *sn = NULL; 6049 int error = 0; 6050 int sw_csum; 6051 6052 ASSERT_LWKT_TOKEN_HELD(&pf_token); 6053 6054 if (m == NULL || *m == NULL || r == NULL || 6055 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 6056 panic("pf_route: invalid parameters"); 6057 6058 if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) { 6059 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED; 6060 (*m)->m_pkthdr.pf.routed = 1; 6061 } else { 6062 if ((*m)->m_pkthdr.pf.routed++ > 3) { 6063 m0 = *m; 6064 *m = NULL; 6065 goto bad; 6066 } 6067 } 6068 6069 if (r->rt == PF_DUPTO) { 6070 if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { 6071 return; 6072 } 6073 } else { 6074 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { 6075 return; 6076 } 6077 m0 = *m; 6078 } 6079 6080 if (m0->m_len < sizeof(struct ip)) { 6081 DPFPRINTF(PF_DEBUG_URGENT, 6082 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 6083 goto bad; 6084 } 6085 6086 ip = mtod(m0, struct ip *); 6087 6088 ro = &iproute; 6089 bzero((caddr_t)ro, sizeof(*ro)); 6090 dst = satosin(&ro->ro_dst); 6091 dst->sin_family = AF_INET; 6092 dst->sin_len = sizeof(*dst); 6093 dst->sin_addr = ip->ip_dst; 6094 6095 if (r->rt == PF_FASTROUTE) { 6096 rtalloc(ro); 6097 if (ro->ro_rt == 0) { 6098 ipstat.ips_noroute++; 6099 goto bad; 6100 } 6101 6102 ifp = ro->ro_rt->rt_ifp; 6103 ro->ro_rt->rt_use++; 6104 6105 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 6106 dst = satosin(ro->ro_rt->rt_gateway); 6107 } else { 6108 if (TAILQ_EMPTY(&r->rpool.list)) { 6109 DPFPRINTF(PF_DEBUG_URGENT, 6110 ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); 6111 goto bad; 6112 } 6113 if (s == NULL) { 6114 pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, 6115 &naddr, NULL, &sn); 6116 if (!PF_AZERO(&naddr, AF_INET)) 6117 dst->sin_addr.s_addr = naddr.v4.s_addr; 6118 ifp = r->rpool.cur->kif ? 6119 r->rpool.cur->kif->pfik_ifp : NULL; 6120 } else { 6121 if (!PF_AZERO(&s->rt_addr, AF_INET)) 6122 dst->sin_addr.s_addr = 6123 s->rt_addr.v4.s_addr; 6124 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6125 } 6126 } 6127 if (ifp == NULL) 6128 goto bad; 6129 6130 if (oifp != ifp) { 6131 if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { 6132 goto bad; 6133 } else if (m0 == NULL) { 6134 goto done; 6135 } 6136 if (m0->m_len < sizeof(struct ip)) { 6137 DPFPRINTF(PF_DEBUG_URGENT, 6138 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 6139 goto bad; 6140 } 6141 ip = mtod(m0, struct ip *); 6142 } 6143 6144 /* Copied from FreeBSD 5.1-CURRENT ip_output. */ 6145 m0->m_pkthdr.csum_flags |= CSUM_IP; 6146 sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist; 6147 if (sw_csum & CSUM_DELAY_DATA) { 6148 in_delayed_cksum(m0); 6149 sw_csum &= ~CSUM_DELAY_DATA; 6150 } 6151 m0->m_pkthdr.csum_flags &= ifp->if_hwassist; 6152 m0->m_pkthdr.csum_iphlen = (ip->ip_hl << 2); 6153 6154 /* 6155 * WARNING! We cannot fragment if the packet was modified from an 6156 * original which expected to be using TSO. In this 6157 * situation we pray that the target interface is 6158 * compatible with the originating interface. 6159 */ 6160 if (ntohs(ip->ip_len) <= ifp->if_mtu || 6161 (m0->m_pkthdr.csum_flags & CSUM_TSO) || 6162 ((ifp->if_hwassist & CSUM_FRAGMENT) && 6163 (ip->ip_off & htons(IP_DF)) == 0)) { 6164 ip->ip_sum = 0; 6165 if (sw_csum & CSUM_DELAY_IP) { 6166 /* From KAME */ 6167 if (ip->ip_v == IPVERSION && 6168 (ip->ip_hl << 2) == sizeof(*ip)) { 6169 ip->ip_sum = in_cksum_hdr(ip); 6170 } else { 6171 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6172 } 6173 } 6174 lwkt_reltoken(&pf_token); 6175 error = ifp->if_output(ifp, m0, sintosa(dst), ro->ro_rt); 6176 lwkt_gettoken(&pf_token); 6177 goto done; 6178 } 6179 6180 /* 6181 * Too large for interface; fragment if possible. 6182 * Must be able to put at least 8 bytes per fragment. 6183 */ 6184 if (ip->ip_off & htons(IP_DF)) { 6185 ipstat.ips_cantfrag++; 6186 if (r->rt != PF_DUPTO) { 6187 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, 6188 ifp->if_mtu); 6189 goto done; 6190 } else 6191 goto bad; 6192 } 6193 6194 m1 = m0; 6195 error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum); 6196 if (error) { 6197 goto bad; 6198 } 6199 6200 for (m0 = m1; m0; m0 = m1) { 6201 m1 = m0->m_nextpkt; 6202 m0->m_nextpkt = 0; 6203 if (error == 0) { 6204 lwkt_reltoken(&pf_token); 6205 error = (*ifp->if_output)(ifp, m0, sintosa(dst), 6206 NULL); 6207 lwkt_gettoken(&pf_token); 6208 } else 6209 m_freem(m0); 6210 } 6211 6212 if (error == 0) 6213 ipstat.ips_fragmented++; 6214 6215 done: 6216 if (r->rt != PF_DUPTO) 6217 *m = NULL; 6218 if (ro == &iproute && ro->ro_rt) 6219 RTFREE(ro->ro_rt); 6220 return; 6221 6222 bad: 6223 m_freem(m0); 6224 goto done; 6225 } 6226 #endif /* INET */ 6227 6228 #ifdef INET6 6229 void 6230 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 6231 struct pf_state *s, struct pf_pdesc *pd) 6232 { 6233 struct mbuf *m0; 6234 struct route_in6 ip6route; 6235 struct route_in6 *ro; 6236 struct sockaddr_in6 *dst; 6237 struct ip6_hdr *ip6; 6238 struct ifnet *ifp = NULL; 6239 struct pf_addr naddr; 6240 struct pf_src_node *sn = NULL; 6241 6242 if (m == NULL || *m == NULL || r == NULL || 6243 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 6244 panic("pf_route6: invalid parameters"); 6245 6246 if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) { 6247 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED; 6248 (*m)->m_pkthdr.pf.routed = 1; 6249 } else { 6250 if ((*m)->m_pkthdr.pf.routed++ > 3) { 6251 m0 = *m; 6252 *m = NULL; 6253 goto bad; 6254 } 6255 } 6256 6257 if (r->rt == PF_DUPTO) { 6258 if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) 6259 return; 6260 } else { 6261 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 6262 return; 6263 m0 = *m; 6264 } 6265 6266 if (m0->m_len < sizeof(struct ip6_hdr)) { 6267 DPFPRINTF(PF_DEBUG_URGENT, 6268 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 6269 goto bad; 6270 } 6271 ip6 = mtod(m0, struct ip6_hdr *); 6272 6273 ro = &ip6route; 6274 bzero((caddr_t)ro, sizeof(*ro)); 6275 dst = (struct sockaddr_in6 *)&ro->ro_dst; 6276 dst->sin6_family = AF_INET6; 6277 dst->sin6_len = sizeof(*dst); 6278 dst->sin6_addr = ip6->ip6_dst; 6279 6280 /* 6281 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 6282 * so make sure pf.flags is clear. 6283 * 6284 * Cheat. XXX why only in the v6 case??? 6285 */ 6286 if (r->rt == PF_FASTROUTE) { 6287 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; 6288 m0->m_pkthdr.pf.flags = 0; 6289 /* XXX Re-Check when Upgrading to > 4.4 */ 6290 m0->m_pkthdr.pf.statekey = NULL; 6291 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); 6292 return; 6293 } 6294 6295 if (TAILQ_EMPTY(&r->rpool.list)) { 6296 DPFPRINTF(PF_DEBUG_URGENT, 6297 ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); 6298 goto bad; 6299 } 6300 if (s == NULL) { 6301 pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 6302 &naddr, NULL, &sn); 6303 if (!PF_AZERO(&naddr, AF_INET6)) 6304 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 6305 &naddr, AF_INET6); 6306 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; 6307 } else { 6308 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 6309 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 6310 &s->rt_addr, AF_INET6); 6311 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6312 } 6313 if (ifp == NULL) 6314 goto bad; 6315 6316 if (oifp != ifp) { 6317 if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { 6318 goto bad; 6319 } else if (m0 == NULL) { 6320 goto done; 6321 } 6322 if (m0->m_len < sizeof(struct ip6_hdr)) { 6323 DPFPRINTF(PF_DEBUG_URGENT, 6324 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 6325 goto bad; 6326 } 6327 ip6 = mtod(m0, struct ip6_hdr *); 6328 } 6329 6330 /* 6331 * If the packet is too large for the outgoing interface, 6332 * send back an icmp6 error. 6333 */ 6334 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 6335 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 6336 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6337 nd6_output(ifp, ifp, m0, dst, NULL); 6338 } else { 6339 in6_ifstat_inc(ifp, ifs6_in_toobig); 6340 if (r->rt != PF_DUPTO) 6341 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); 6342 else 6343 goto bad; 6344 } 6345 6346 done: 6347 if (r->rt != PF_DUPTO) 6348 *m = NULL; 6349 return; 6350 6351 bad: 6352 m_freem(m0); 6353 goto done; 6354 } 6355 #endif /* INET6 */ 6356 6357 6358 /* 6359 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag 6360 * off is the offset where the protocol header starts 6361 * len is the total length of protocol header plus payload 6362 * returns 0 when the checksum is valid, otherwise returns 1. 6363 */ 6364 /* 6365 * XXX 6366 * FreeBSD supports cksum offload for the following drivers. 6367 * em(4), gx(4), lge(4), nge(4), ti(4), xl(4) 6368 * If we can make full use of it we would outperform ipfw/ipfilter in 6369 * very heavy traffic. 6370 * I have not tested 'cause I don't have NICs that supports cksum offload. 6371 * (There might be problems. Typical phenomena would be 6372 * 1. No route message for UDP packet. 6373 * 2. No connection acceptance from external hosts regardless of rule set.) 6374 */ 6375 int 6376 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, 6377 sa_family_t af) 6378 { 6379 u_int16_t sum = 0; 6380 int hw_assist = 0; 6381 struct ip *ip; 6382 6383 if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) 6384 return (1); 6385 if (m->m_pkthdr.len < off + len) 6386 return (1); 6387 6388 switch (p) { 6389 case IPPROTO_TCP: 6390 case IPPROTO_UDP: 6391 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 6392 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { 6393 sum = m->m_pkthdr.csum_data; 6394 } else { 6395 ip = mtod(m, struct ip *); 6396 sum = in_pseudo(ip->ip_src.s_addr, 6397 ip->ip_dst.s_addr, htonl((u_short)len + 6398 m->m_pkthdr.csum_data + p)); 6399 } 6400 sum ^= 0xffff; 6401 ++hw_assist; 6402 } 6403 break; 6404 case IPPROTO_ICMP: 6405 #ifdef INET6 6406 case IPPROTO_ICMPV6: 6407 #endif /* INET6 */ 6408 break; 6409 default: 6410 return (1); 6411 } 6412 6413 if (!hw_assist) { 6414 switch (af) { 6415 case AF_INET: 6416 if (p == IPPROTO_ICMP) { 6417 if (m->m_len < off) 6418 return (1); 6419 m->m_data += off; 6420 m->m_len -= off; 6421 sum = in_cksum(m, len); 6422 m->m_data -= off; 6423 m->m_len += off; 6424 } else { 6425 if (m->m_len < sizeof(struct ip)) 6426 return (1); 6427 sum = in_cksum_range(m, p, off, len); 6428 if (sum == 0) { 6429 m->m_pkthdr.csum_flags |= 6430 (CSUM_DATA_VALID | 6431 CSUM_PSEUDO_HDR); 6432 m->m_pkthdr.csum_data = 0xffff; 6433 } 6434 } 6435 break; 6436 #ifdef INET6 6437 case AF_INET6: 6438 if (m->m_len < sizeof(struct ip6_hdr)) 6439 return (1); 6440 sum = in6_cksum(m, p, off, len); 6441 /* 6442 * XXX 6443 * IPv6 H/W cksum off-load not supported yet! 6444 * 6445 * if (sum == 0) { 6446 * m->m_pkthdr.csum_flags |= 6447 * (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 6448 * m->m_pkthdr.csum_data = 0xffff; 6449 *} 6450 */ 6451 break; 6452 #endif /* INET6 */ 6453 default: 6454 return (1); 6455 } 6456 } 6457 if (sum) { 6458 switch (p) { 6459 case IPPROTO_TCP: 6460 tcpstat.tcps_rcvbadsum++; 6461 break; 6462 case IPPROTO_UDP: 6463 udp_stat.udps_badsum++; 6464 break; 6465 case IPPROTO_ICMP: 6466 icmpstat.icps_checksum++; 6467 break; 6468 #ifdef INET6 6469 case IPPROTO_ICMPV6: 6470 icmp6stat.icp6s_checksum++; 6471 break; 6472 #endif /* INET6 */ 6473 } 6474 return (1); 6475 } 6476 return (0); 6477 } 6478 6479 struct pf_divert * 6480 pf_find_divert(struct mbuf *m) 6481 { 6482 struct m_tag *mtag; 6483 6484 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6485 return (NULL); 6486 6487 return ((struct pf_divert *)(mtag + 1)); 6488 } 6489 6490 struct pf_divert * 6491 pf_get_divert(struct mbuf *m) 6492 { 6493 struct m_tag *mtag; 6494 6495 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6496 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6497 M_NOWAIT); 6498 if (mtag == NULL) 6499 return (NULL); 6500 bzero(mtag + 1, sizeof(struct pf_divert)); 6501 m_tag_prepend(m, mtag); 6502 } 6503 6504 return ((struct pf_divert *)(mtag + 1)); 6505 } 6506 6507 #ifdef INET 6508 6509 /* 6510 * WARNING: pf_token held shared on entry, THIS IS CPU LOCALIZED CODE 6511 */ 6512 int 6513 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, 6514 struct ether_header *eh, struct inpcb *inp) 6515 { 6516 struct pfi_kif *kif; 6517 u_short action, reason = 0, log = 0; 6518 struct mbuf *m = *m0; 6519 struct ip *h = NULL; 6520 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 6521 struct pf_state *s = NULL; 6522 struct pf_ruleset *ruleset = NULL; 6523 struct pf_pdesc pd; 6524 int off, dirndx; 6525 #ifdef ALTQ 6526 int pqid = 0; 6527 #endif 6528 6529 if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) { 6530 /* Skip us; continue in ipfw. */ 6531 return (PF_PASS); 6532 } 6533 6534 if (!pf_status.running) 6535 return (PF_PASS); 6536 6537 memset(&pd, 0, sizeof(pd)); 6538 #ifdef foo 6539 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6540 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6541 else 6542 #endif 6543 kif = (struct pfi_kif *)ifp->if_pf_kif; 6544 6545 if (kif == NULL) { 6546 DPFPRINTF(PF_DEBUG_URGENT, 6547 ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); 6548 return (PF_DROP); 6549 } 6550 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6551 return (PF_PASS); 6552 6553 #ifdef DIAGNOSTIC 6554 if ((m->m_flags & M_PKTHDR) == 0) 6555 panic("non-M_PKTHDR is passed to pf_test"); 6556 #endif /* DIAGNOSTIC */ 6557 6558 if (m->m_pkthdr.len < (int)sizeof(*h)) { 6559 action = PF_DROP; 6560 REASON_SET(&reason, PFRES_SHORT); 6561 log = 1; 6562 goto done; 6563 } 6564 6565 /* 6566 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 6567 * so make sure pf.flags is clear. 6568 */ 6569 if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED) 6570 return (PF_PASS); 6571 m->m_pkthdr.pf.flags = 0; 6572 /* Re-Check when updating to > 4.4 */ 6573 m->m_pkthdr.pf.statekey = NULL; 6574 6575 /* We do IP header normalization and packet reassembly here */ 6576 if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { 6577 action = PF_DROP; 6578 goto done; 6579 } 6580 m = *m0; /* pf_normalize messes with m0 */ 6581 h = mtod(m, struct ip *); 6582 6583 off = h->ip_hl << 2; 6584 if (off < (int)sizeof(*h)) { 6585 action = PF_DROP; 6586 REASON_SET(&reason, PFRES_SHORT); 6587 log = 1; 6588 goto done; 6589 } 6590 6591 pd.src = (struct pf_addr *)&h->ip_src; 6592 pd.dst = (struct pf_addr *)&h->ip_dst; 6593 pd.sport = pd.dport = NULL; 6594 pd.ip_sum = &h->ip_sum; 6595 pd.proto_sum = NULL; 6596 pd.proto = h->ip_p; 6597 pd.dir = dir; 6598 pd.sidx = (dir == PF_IN) ? 0 : 1; 6599 pd.didx = (dir == PF_IN) ? 1 : 0; 6600 pd.af = AF_INET; 6601 pd.tos = h->ip_tos; 6602 pd.tot_len = ntohs(h->ip_len); 6603 pd.eh = eh; 6604 6605 /* handle fragments that didn't get reassembled by normalization */ 6606 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { 6607 action = pf_test_fragment(&r, dir, kif, m, h, 6608 &pd, &a, &ruleset); 6609 goto done; 6610 } 6611 6612 switch (h->ip_p) { 6613 6614 case IPPROTO_TCP: { 6615 struct tcphdr th; 6616 6617 pd.hdr.tcp = &th; 6618 if (!pf_pull_hdr(m, off, &th, sizeof(th), 6619 &action, &reason, AF_INET)) { 6620 log = action != PF_PASS; 6621 goto done; 6622 } 6623 pd.p_len = pd.tot_len - off - (th.th_off << 2); 6624 #ifdef ALTQ 6625 if ((th.th_flags & TH_ACK) && pd.p_len == 0) 6626 pqid = 1; 6627 #endif 6628 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 6629 if (action == PF_DROP) 6630 goto done; 6631 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 6632 &reason); 6633 if (action == PF_PASS) { 6634 r = s->rule.ptr; 6635 a = s->anchor.ptr; 6636 log = s->log; 6637 } else if (s == NULL) { 6638 action = pf_test_rule(&r, &s, dir, kif, 6639 m, off, h, &pd, &a, 6640 &ruleset, NULL, inp); 6641 } 6642 break; 6643 } 6644 6645 case IPPROTO_UDP: { 6646 struct udphdr uh; 6647 6648 pd.hdr.udp = &uh; 6649 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 6650 &action, &reason, AF_INET)) { 6651 log = action != PF_PASS; 6652 goto done; 6653 } 6654 if (uh.uh_dport == 0 || 6655 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 6656 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 6657 action = PF_DROP; 6658 REASON_SET(&reason, PFRES_SHORT); 6659 goto done; 6660 } 6661 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 6662 if (action == PF_PASS) { 6663 r = s->rule.ptr; 6664 a = s->anchor.ptr; 6665 log = s->log; 6666 } else if (s == NULL) { 6667 action = pf_test_rule(&r, &s, dir, kif, 6668 m, off, h, &pd, &a, 6669 &ruleset, NULL, inp); 6670 } 6671 break; 6672 } 6673 6674 case IPPROTO_ICMP: { 6675 struct icmp ih; 6676 6677 pd.hdr.icmp = &ih; 6678 if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, 6679 &action, &reason, AF_INET)) { 6680 log = action != PF_PASS; 6681 goto done; 6682 } 6683 action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, 6684 &reason); 6685 if (action == PF_PASS) { 6686 r = s->rule.ptr; 6687 a = s->anchor.ptr; 6688 log = s->log; 6689 } else if (s == NULL) { 6690 action = pf_test_rule(&r, &s, dir, kif, 6691 m, off, h, &pd, &a, 6692 &ruleset, NULL, inp); 6693 } 6694 break; 6695 } 6696 6697 default: 6698 action = pf_test_state_other(&s, dir, kif, m, &pd); 6699 if (action == PF_PASS) { 6700 r = s->rule.ptr; 6701 a = s->anchor.ptr; 6702 log = s->log; 6703 } else if (s == NULL) { 6704 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 6705 &pd, &a, &ruleset, NULL, inp); 6706 } 6707 break; 6708 } 6709 6710 done: 6711 if (action == PF_PASS && h->ip_hl > 5 && 6712 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { 6713 action = PF_DROP; 6714 REASON_SET(&reason, PFRES_IPOPTIONS); 6715 log = 1; 6716 DPFPRINTF(PF_DEBUG_MISC, 6717 ("pf: dropping packet with ip options\n")); 6718 } 6719 6720 if ((s && s->tag) || r->rtableid) 6721 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 6722 6723 #if 0 6724 if (dir == PF_IN && s && s->key[PF_SK_STACK]) 6725 m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 6726 #endif 6727 6728 #ifdef ALTQ 6729 /* 6730 * Generate a hash code and qid request for ALTQ. A qid of 0 6731 * is allowed and will cause altq to select the default queue. 6732 */ 6733 if (action == PF_PASS) { 6734 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 6735 if (pqid || (pd.tos & IPTOS_LOWDELAY)) 6736 m->m_pkthdr.pf.qid = r->pqid; 6737 else 6738 m->m_pkthdr.pf.qid = r->qid; 6739 m->m_pkthdr.pf.ecn_af = AF_INET; 6740 m->m_pkthdr.pf.hdr = h; 6741 /* add connection hash for fairq */ 6742 if (s) { 6743 /* for fairq */ 6744 m->m_pkthdr.pf.state_hash = s->hash; 6745 m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED; 6746 } 6747 } 6748 #endif /* ALTQ */ 6749 6750 /* 6751 * connections redirected to loopback should not match sockets 6752 * bound specifically to loopback due to security implications, 6753 * see tcp_input() and in_pcblookup_listen(). 6754 */ 6755 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 6756 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 6757 (s->nat_rule.ptr->action == PF_RDR || 6758 s->nat_rule.ptr->action == PF_BINAT) && 6759 (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) 6760 { 6761 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 6762 } 6763 6764 if (dir == PF_IN && action == PF_PASS && r->divert.port) { 6765 struct pf_divert *divert; 6766 6767 if ((divert = pf_get_divert(m))) { 6768 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 6769 divert->port = r->divert.port; 6770 divert->addr.ipv4 = r->divert.addr.v4; 6771 } 6772 } 6773 6774 if (log) { 6775 struct pf_rule *lr; 6776 6777 if (s != NULL && s->nat_rule.ptr != NULL && 6778 s->nat_rule.ptr->log & PF_LOG_ALL) 6779 lr = s->nat_rule.ptr; 6780 else 6781 lr = r; 6782 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, 6783 &pd); 6784 } 6785 6786 kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 6787 kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; 6788 6789 if (action == PF_PASS || r->action == PF_DROP) { 6790 dirndx = (dir == PF_OUT); 6791 r->packets[dirndx]++; 6792 r->bytes[dirndx] += pd.tot_len; 6793 if (a != NULL) { 6794 a->packets[dirndx]++; 6795 a->bytes[dirndx] += pd.tot_len; 6796 } 6797 if (s != NULL) { 6798 if (s->nat_rule.ptr != NULL) { 6799 s->nat_rule.ptr->packets[dirndx]++; 6800 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 6801 } 6802 if (s->src_node != NULL) { 6803 s->src_node->packets[dirndx]++; 6804 s->src_node->bytes[dirndx] += pd.tot_len; 6805 } 6806 if (s->nat_src_node != NULL) { 6807 s->nat_src_node->packets[dirndx]++; 6808 s->nat_src_node->bytes[dirndx] += pd.tot_len; 6809 } 6810 dirndx = (dir == s->direction) ? 0 : 1; 6811 s->packets[dirndx]++; 6812 s->bytes[dirndx] += pd.tot_len; 6813 } 6814 tr = r; 6815 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 6816 if (nr != NULL && r == &pf_default_rule) 6817 tr = nr; 6818 if (tr->src.addr.type == PF_ADDR_TABLE) 6819 pfr_update_stats(tr->src.addr.p.tbl, 6820 (s == NULL) ? pd.src : 6821 &s->key[(s->direction == PF_IN)]-> 6822 addr[(s->direction == PF_OUT)], 6823 pd.af, pd.tot_len, dir == PF_OUT, 6824 r->action == PF_PASS, tr->src.neg); 6825 if (tr->dst.addr.type == PF_ADDR_TABLE) 6826 pfr_update_stats(tr->dst.addr.p.tbl, 6827 (s == NULL) ? pd.dst : 6828 &s->key[(s->direction == PF_IN)]-> 6829 addr[(s->direction == PF_IN)], 6830 pd.af, pd.tot_len, dir == PF_OUT, 6831 r->action == PF_PASS, tr->dst.neg); 6832 } 6833 6834 6835 if (action == PF_SYNPROXY_DROP) { 6836 m_freem(*m0); 6837 *m0 = NULL; 6838 action = PF_PASS; 6839 } else if (r->rt) { 6840 /* pf_route can free the mbuf causing *m0 to become NULL */ 6841 pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); 6842 } 6843 6844 return (action); 6845 } 6846 #endif /* INET */ 6847 6848 #ifdef INET6 6849 6850 /* 6851 * WARNING: pf_token held shared on entry, THIS IS CPU LOCALIZED CODE 6852 */ 6853 int 6854 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, 6855 struct ether_header *eh, struct inpcb *inp) 6856 { 6857 struct pfi_kif *kif; 6858 u_short action, reason = 0, log = 0; 6859 struct mbuf *m = *m0, *n = NULL; 6860 struct ip6_hdr *h = NULL; 6861 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 6862 struct pf_state *s = NULL; 6863 struct pf_ruleset *ruleset = NULL; 6864 struct pf_pdesc pd; 6865 int off, terminal = 0, dirndx, rh_cnt = 0; 6866 6867 if (!pf_status.running) 6868 return (PF_PASS); 6869 6870 memset(&pd, 0, sizeof(pd)); 6871 #ifdef foo 6872 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6873 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6874 else 6875 #endif 6876 kif = (struct pfi_kif *)ifp->if_pf_kif; 6877 6878 if (kif == NULL) { 6879 DPFPRINTF(PF_DEBUG_URGENT, 6880 ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); 6881 return (PF_DROP); 6882 } 6883 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6884 return (PF_PASS); 6885 6886 #ifdef DIAGNOSTIC 6887 if ((m->m_flags & M_PKTHDR) == 0) 6888 panic("non-M_PKTHDR is passed to pf_test6"); 6889 #endif /* DIAGNOSTIC */ 6890 6891 if (m->m_pkthdr.len < (int)sizeof(*h)) { 6892 action = PF_DROP; 6893 REASON_SET(&reason, PFRES_SHORT); 6894 log = 1; 6895 goto done; 6896 } 6897 6898 /* 6899 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 6900 * so make sure pf.flags is clear. 6901 */ 6902 if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED) 6903 return (PF_PASS); 6904 m->m_pkthdr.pf.flags = 0; 6905 /* Re-Check when updating to > 4.4 */ 6906 m->m_pkthdr.pf.statekey = NULL; 6907 6908 /* We do IP header normalization and packet reassembly here */ 6909 if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { 6910 action = PF_DROP; 6911 goto done; 6912 } 6913 m = *m0; /* pf_normalize messes with m0 */ 6914 h = mtod(m, struct ip6_hdr *); 6915 6916 #if 1 6917 /* 6918 * we do not support jumbogram yet. if we keep going, zero ip6_plen 6919 * will do something bad, so drop the packet for now. 6920 */ 6921 if (htons(h->ip6_plen) == 0) { 6922 action = PF_DROP; 6923 REASON_SET(&reason, PFRES_NORM); /*XXX*/ 6924 goto done; 6925 } 6926 #endif 6927 6928 pd.src = (struct pf_addr *)&h->ip6_src; 6929 pd.dst = (struct pf_addr *)&h->ip6_dst; 6930 pd.sport = pd.dport = NULL; 6931 pd.ip_sum = NULL; 6932 pd.proto_sum = NULL; 6933 pd.dir = dir; 6934 pd.sidx = (dir == PF_IN) ? 0 : 1; 6935 pd.didx = (dir == PF_IN) ? 1 : 0; 6936 pd.af = AF_INET6; 6937 pd.tos = 0; 6938 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6939 pd.eh = eh; 6940 6941 off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); 6942 pd.proto = h->ip6_nxt; 6943 do { 6944 switch (pd.proto) { 6945 case IPPROTO_FRAGMENT: 6946 action = pf_test_fragment(&r, dir, kif, m, h, 6947 &pd, &a, &ruleset); 6948 if (action == PF_DROP) 6949 REASON_SET(&reason, PFRES_FRAG); 6950 goto done; 6951 case IPPROTO_ROUTING: { 6952 struct ip6_rthdr rthdr; 6953 6954 if (rh_cnt++) { 6955 DPFPRINTF(PF_DEBUG_MISC, 6956 ("pf: IPv6 more than one rthdr\n")); 6957 action = PF_DROP; 6958 REASON_SET(&reason, PFRES_IPOPTIONS); 6959 log = 1; 6960 goto done; 6961 } 6962 if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, 6963 &reason, pd.af)) { 6964 DPFPRINTF(PF_DEBUG_MISC, 6965 ("pf: IPv6 short rthdr\n")); 6966 action = PF_DROP; 6967 REASON_SET(&reason, PFRES_SHORT); 6968 log = 1; 6969 goto done; 6970 } 6971 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6972 DPFPRINTF(PF_DEBUG_MISC, 6973 ("pf: IPv6 rthdr0\n")); 6974 action = PF_DROP; 6975 REASON_SET(&reason, PFRES_IPOPTIONS); 6976 log = 1; 6977 goto done; 6978 } 6979 /* FALLTHROUGH */ 6980 } 6981 case IPPROTO_AH: 6982 case IPPROTO_HOPOPTS: 6983 case IPPROTO_DSTOPTS: { 6984 /* get next header and header length */ 6985 struct ip6_ext opt6; 6986 6987 if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), 6988 NULL, &reason, pd.af)) { 6989 DPFPRINTF(PF_DEBUG_MISC, 6990 ("pf: IPv6 short opt\n")); 6991 action = PF_DROP; 6992 log = 1; 6993 goto done; 6994 } 6995 if (pd.proto == IPPROTO_AH) 6996 off += (opt6.ip6e_len + 2) * 4; 6997 else 6998 off += (opt6.ip6e_len + 1) * 8; 6999 pd.proto = opt6.ip6e_nxt; 7000 /* goto the next header */ 7001 break; 7002 } 7003 default: 7004 terminal++; 7005 break; 7006 } 7007 } while (!terminal); 7008 7009 /* if there's no routing header, use unmodified mbuf for checksumming */ 7010 if (!n) 7011 n = m; 7012 7013 switch (pd.proto) { 7014 7015 case IPPROTO_TCP: { 7016 struct tcphdr th; 7017 7018 pd.hdr.tcp = &th; 7019 if (!pf_pull_hdr(m, off, &th, sizeof(th), 7020 &action, &reason, AF_INET6)) { 7021 log = action != PF_PASS; 7022 goto done; 7023 } 7024 pd.p_len = pd.tot_len - off - (th.th_off << 2); 7025 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 7026 if (action == PF_DROP) 7027 goto done; 7028 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 7029 &reason); 7030 if (action == PF_PASS) { 7031 r = s->rule.ptr; 7032 a = s->anchor.ptr; 7033 log = s->log; 7034 } else if (s == NULL) { 7035 action = pf_test_rule(&r, &s, dir, kif, 7036 m, off, h, &pd, &a, 7037 &ruleset, NULL, inp); 7038 } 7039 break; 7040 } 7041 7042 case IPPROTO_UDP: { 7043 struct udphdr uh; 7044 7045 pd.hdr.udp = &uh; 7046 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 7047 &action, &reason, AF_INET6)) { 7048 log = action != PF_PASS; 7049 goto done; 7050 } 7051 if (uh.uh_dport == 0 || 7052 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 7053 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 7054 action = PF_DROP; 7055 REASON_SET(&reason, PFRES_SHORT); 7056 goto done; 7057 } 7058 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 7059 if (action == PF_PASS) { 7060 r = s->rule.ptr; 7061 a = s->anchor.ptr; 7062 log = s->log; 7063 } else if (s == NULL) { 7064 action = pf_test_rule(&r, &s, dir, kif, 7065 m, off, h, &pd, &a, 7066 &ruleset, NULL, inp); 7067 } 7068 break; 7069 } 7070 7071 case IPPROTO_ICMPV6: { 7072 struct icmp6_hdr ih; 7073 7074 pd.hdr.icmp6 = &ih; 7075 if (!pf_pull_hdr(m, off, &ih, sizeof(ih), 7076 &action, &reason, AF_INET6)) { 7077 log = action != PF_PASS; 7078 goto done; 7079 } 7080 action = pf_test_state_icmp(&s, dir, kif, 7081 m, off, h, &pd, &reason); 7082 if (action == PF_PASS) { 7083 r = s->rule.ptr; 7084 a = s->anchor.ptr; 7085 log = s->log; 7086 } else if (s == NULL) { 7087 action = pf_test_rule(&r, &s, dir, kif, 7088 m, off, h, &pd, &a, 7089 &ruleset, NULL, inp); 7090 } 7091 break; 7092 } 7093 7094 default: 7095 action = pf_test_state_other(&s, dir, kif, m, &pd); 7096 if (action == PF_PASS) { 7097 r = s->rule.ptr; 7098 a = s->anchor.ptr; 7099 log = s->log; 7100 } else if (s == NULL) { 7101 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 7102 &pd, &a, &ruleset, NULL, inp); 7103 } 7104 break; 7105 } 7106 7107 done: 7108 if (n != m) { 7109 m_freem(n); 7110 n = NULL; 7111 } 7112 7113 /* handle dangerous IPv6 extension headers. */ 7114 if (action == PF_PASS && rh_cnt && 7115 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { 7116 action = PF_DROP; 7117 REASON_SET(&reason, PFRES_IPOPTIONS); 7118 log = 1; 7119 DPFPRINTF(PF_DEBUG_MISC, 7120 ("pf: dropping packet with dangerous v6 headers\n")); 7121 } 7122 7123 if ((s && s->tag) || r->rtableid) 7124 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 7125 7126 #if 0 7127 if (dir == PF_IN && s && s->key[PF_SK_STACK]) 7128 m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 7129 #endif 7130 7131 #ifdef ALTQ 7132 /* 7133 * Generate a hash code and qid request for ALTQ. A qid of 0 7134 * is allowed and will cause altq to select the default queue. 7135 */ 7136 if (action == PF_PASS) { 7137 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 7138 if (pd.tos & IPTOS_LOWDELAY) 7139 m->m_pkthdr.pf.qid = r->pqid; 7140 else 7141 m->m_pkthdr.pf.qid = r->qid; 7142 m->m_pkthdr.pf.ecn_af = AF_INET6; 7143 m->m_pkthdr.pf.hdr = h; 7144 if (s) { 7145 /* for fairq */ 7146 m->m_pkthdr.pf.state_hash = s->hash; 7147 m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED; 7148 } 7149 } 7150 #endif /* ALTQ */ 7151 7152 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 7153 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 7154 (s->nat_rule.ptr->action == PF_RDR || 7155 s->nat_rule.ptr->action == PF_BINAT) && 7156 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) 7157 { 7158 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7159 } 7160 7161 if (dir == PF_IN && action == PF_PASS && r->divert.port) { 7162 struct pf_divert *divert; 7163 7164 if ((divert = pf_get_divert(m))) { 7165 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7166 divert->port = r->divert.port; 7167 divert->addr.ipv6 = r->divert.addr.v6; 7168 } 7169 } 7170 7171 if (log) { 7172 struct pf_rule *lr; 7173 7174 if (s != NULL && s->nat_rule.ptr != NULL && 7175 s->nat_rule.ptr->log & PF_LOG_ALL) 7176 lr = s->nat_rule.ptr; 7177 else 7178 lr = r; 7179 PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, 7180 &pd); 7181 } 7182 7183 kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 7184 kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; 7185 7186 if (action == PF_PASS || r->action == PF_DROP) { 7187 dirndx = (dir == PF_OUT); 7188 r->packets[dirndx]++; 7189 r->bytes[dirndx] += pd.tot_len; 7190 if (a != NULL) { 7191 a->packets[dirndx]++; 7192 a->bytes[dirndx] += pd.tot_len; 7193 } 7194 if (s != NULL) { 7195 if (s->nat_rule.ptr != NULL) { 7196 s->nat_rule.ptr->packets[dirndx]++; 7197 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 7198 } 7199 if (s->src_node != NULL) { 7200 s->src_node->packets[dirndx]++; 7201 s->src_node->bytes[dirndx] += pd.tot_len; 7202 } 7203 if (s->nat_src_node != NULL) { 7204 s->nat_src_node->packets[dirndx]++; 7205 s->nat_src_node->bytes[dirndx] += pd.tot_len; 7206 } 7207 dirndx = (dir == s->direction) ? 0 : 1; 7208 s->packets[dirndx]++; 7209 s->bytes[dirndx] += pd.tot_len; 7210 } 7211 tr = r; 7212 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 7213 if (nr != NULL && r == &pf_default_rule) 7214 tr = nr; 7215 if (tr->src.addr.type == PF_ADDR_TABLE) 7216 pfr_update_stats(tr->src.addr.p.tbl, 7217 (s == NULL) ? pd.src : 7218 &s->key[(s->direction == PF_IN)]->addr[0], 7219 pd.af, pd.tot_len, dir == PF_OUT, 7220 r->action == PF_PASS, tr->src.neg); 7221 if (tr->dst.addr.type == PF_ADDR_TABLE) 7222 pfr_update_stats(tr->dst.addr.p.tbl, 7223 (s == NULL) ? pd.dst : 7224 &s->key[(s->direction == PF_IN)]->addr[1], 7225 pd.af, pd.tot_len, dir == PF_OUT, 7226 r->action == PF_PASS, tr->dst.neg); 7227 } 7228 7229 7230 if (action == PF_SYNPROXY_DROP) { 7231 m_freem(*m0); 7232 *m0 = NULL; 7233 action = PF_PASS; 7234 } else if (r->rt) 7235 /* pf_route6 can free the mbuf causing *m0 to become NULL */ 7236 pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); 7237 7238 return (action); 7239 } 7240 #endif /* INET6 */ 7241 7242 int 7243 pf_check_congestion(struct ifqueue *ifq) 7244 { 7245 return (0); 7246 } 7247