1 /* $OpenBSD: pf.c,v 1.771 2011/08/30 00:40:47 mikeb Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2010 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "pflog.h" 40 #include "pfsync.h" 41 #include "pflow.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/filio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/kernel.h> 50 #include <sys/time.h> 51 #include <sys/pool.h> 52 #include <sys/proc.h> 53 #include <sys/rwlock.h> 54 #include <sys/syslog.h> 55 56 #include <crypto/md5.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/route.h> 62 #include <net/radix_mpath.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_seq.h> 71 #include <netinet/udp.h> 72 #include <netinet/ip_icmp.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/tcp_timer.h> 75 #include <netinet/tcp_var.h> 76 #include <netinet/udp_var.h> 77 #include <netinet/icmp_var.h> 78 #include <netinet/if_ether.h> 79 #include <netinet/ip_divert.h> 80 81 #include <dev/rndvar.h> 82 #include <net/pfvar.h> 83 #include <net/if_pflog.h> 84 #include <net/if_pflow.h> 85 86 #if NPFSYNC > 0 87 #include <net/if_pfsync.h> 88 #endif /* NPFSYNC > 0 */ 89 90 #ifdef INET6 91 #include <netinet/ip6.h> 92 #include <netinet/in_pcb.h> 93 #include <netinet/icmp6.h> 94 #include <netinet6/nd6.h> 95 #include <netinet6/ip6_divert.h> 96 #endif /* INET6 */ 97 98 99 /* 100 * Global variables 101 */ 102 103 /* state tables */ 104 struct pf_state_tree pf_statetbl; 105 106 struct pf_altqqueue pf_altqs[2]; 107 struct pf_altqqueue *pf_altqs_active; 108 struct pf_altqqueue *pf_altqs_inactive; 109 struct pf_status pf_status; 110 111 u_int32_t ticket_altqs_active; 112 u_int32_t ticket_altqs_inactive; 113 int altqs_inactive_open; 114 115 MD5_CTX pf_tcp_secret_ctx; 116 u_char pf_tcp_secret[16]; 117 int pf_tcp_secret_init; 118 int pf_tcp_iss_off; 119 120 struct pf_anchor_stackframe { 121 struct pf_ruleset *rs; 122 struct pf_rule *r; 123 struct pf_anchor_node *parent; 124 struct pf_anchor *child; 125 } pf_anchor_stack[64]; 126 127 /* cannot fold into pf_pdesc directly, unknown storage size outside pf.c */ 128 union pf_headers { 129 struct tcphdr tcp; 130 struct udphdr udp; 131 struct icmp icmp; 132 #ifdef INET6 133 struct icmp6_hdr icmp6; 134 struct mld_hdr mld; 135 struct nd_neighbor_solicit nd_ns; 136 #endif /* INET6 */ 137 }; 138 139 140 struct pool pf_src_tree_pl, pf_rule_pl; 141 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 142 struct pool pf_altq_pl, pf_rule_item_pl, pf_sn_item_pl; 143 144 void pf_init_threshold(struct pf_threshold *, u_int32_t, 145 u_int32_t); 146 void pf_add_threshold(struct pf_threshold *); 147 int pf_check_threshold(struct pf_threshold *); 148 149 void pf_change_ap(struct pf_addr *, u_int16_t *, 150 u_int16_t *, struct pf_addr *, u_int16_t, 151 u_int8_t, sa_family_t); 152 int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, 153 struct tcphdr *, struct pf_state_peer *); 154 #ifdef INET6 155 void pf_change_a6(struct pf_addr *, u_int16_t *, 156 struct pf_addr *, u_int8_t); 157 #endif /* INET6 */ 158 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 159 int *, u_int16_t *, u_int16_t *); 160 void pf_change_icmp(struct pf_addr *, u_int16_t *, 161 struct pf_addr *, struct pf_addr *, u_int16_t, 162 u_int16_t *, u_int16_t *, u_int16_t *, 163 u_int8_t, sa_family_t); 164 void pf_send_tcp(const struct pf_rule *, sa_family_t, 165 const struct pf_addr *, const struct pf_addr *, 166 u_int16_t, u_int16_t, u_int32_t, u_int32_t, 167 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, 168 u_int16_t, u_int, struct ether_header *, 169 struct ifnet *); 170 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 171 sa_family_t, struct pf_rule *, u_int); 172 void pf_detach_state(struct pf_state *); 173 void pf_state_key_detach(struct pf_state *, int); 174 u_int32_t pf_tcp_iss(struct pf_pdesc *); 175 void pf_rule_to_actions(struct pf_rule *, 176 struct pf_rule_actions *); 177 int pf_test_rule(struct pf_rule **, struct pf_state **, 178 int, struct pfi_kif *, struct mbuf *, int, 179 struct pf_pdesc *, struct pf_rule **, 180 struct pf_ruleset **, int); 181 static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, 182 struct pf_rule *, struct pf_pdesc *, 183 struct pf_state_key **, struct pf_state_key **, 184 struct mbuf *, int, int *, struct pfi_kif *, 185 struct pf_state **, int, struct pf_rule_slist *, 186 struct pf_rule_actions *, struct pf_src_node *[]); 187 int pf_state_key_setup(struct pf_pdesc *, struct 188 pf_state_key **, struct pf_state_key **, int); 189 int pf_tcp_track_full(struct pf_state_peer *, 190 struct pf_state_peer *, struct pf_state **, 191 struct pfi_kif *, struct mbuf *, int, 192 struct pf_pdesc *, u_short *, int *); 193 int pf_tcp_track_sloppy(struct pf_state_peer *, 194 struct pf_state_peer *, struct pf_state **, 195 struct pf_pdesc *, u_short *); 196 int pf_test_state_tcp(struct pf_state **, int, 197 struct pfi_kif *, struct mbuf *, int, 198 struct pf_pdesc *, u_short *); 199 int pf_test_state_udp(struct pf_state **, int, 200 struct pfi_kif *, struct mbuf *, int, 201 struct pf_pdesc *); 202 int pf_icmp_state_lookup(struct pf_state_key_cmp *, 203 struct pf_pdesc *, struct pf_state **, 204 struct mbuf *, int, struct pfi_kif *, u_int16_t, 205 u_int16_t, int, int *, int, int); 206 int pf_test_state_icmp(struct pf_state **, int, 207 struct pfi_kif *, struct mbuf *, int, 208 struct pf_pdesc *, u_short *); 209 int pf_test_state_other(struct pf_state **, int, 210 struct pfi_kif *, struct mbuf *, struct pf_pdesc *); 211 void pf_route(struct mbuf **, struct pf_rule *, int, 212 struct ifnet *, struct pf_state *); 213 void pf_route6(struct mbuf **, struct pf_rule *, int, 214 struct ifnet *, struct pf_state *); 215 u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, 216 sa_family_t); 217 u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, 218 sa_family_t); 219 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 220 u_int16_t); 221 void pf_set_rt_ifp(struct pf_state *, 222 struct pf_addr *); 223 int pf_check_proto_cksum(struct mbuf *, int, int, 224 u_int8_t, sa_family_t); 225 struct pf_divert *pf_get_divert(struct mbuf *); 226 int pf_walk_option6(struct mbuf *, int, int, u_int32_t *, 227 u_short *); 228 int pf_walk_header6(struct mbuf *, struct ip6_hdr *, int *, 229 int *, int *, u_int8_t *, u_int32_t *, u_short *); 230 void pf_print_state_parts(struct pf_state *, 231 struct pf_state_key *, struct pf_state_key *); 232 int pf_addr_wrap_neq(struct pf_addr_wrap *, 233 struct pf_addr_wrap *); 234 int pf_compare_state_keys(struct pf_state_key *, 235 struct pf_state_key *, struct pfi_kif *, u_int); 236 struct pf_state *pf_find_state(struct pfi_kif *, 237 struct pf_state_key_cmp *, u_int, struct mbuf *); 238 int pf_src_connlimit(struct pf_state **); 239 int pf_check_congestion(struct ifqueue *); 240 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 241 void pf_counters_inc(int, int, 242 struct pf_pdesc *, struct pfi_kif *, 243 struct pf_state *, struct pf_rule *, 244 struct pf_rule *); 245 246 extern struct pool pfr_ktable_pl; 247 extern struct pool pfr_kentry_pl; 248 249 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 250 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 251 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 252 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 253 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 254 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT } 255 }; 256 257 enum { PF_ICMP_MULTI_NONE, PF_ICMP_MULTI_SOLICITED, PF_ICMP_MULTI_LINK }; 258 259 260 #define STATE_LOOKUP(i, k, d, s, m) \ 261 do { \ 262 s = pf_find_state(i, k, d, m); \ 263 if (s == NULL || (s)->timeout == PFTM_PURGE) \ 264 return (PF_DROP); \ 265 if (d == PF_OUT && \ 266 (((s)->rule.ptr->rt == PF_ROUTETO && \ 267 (s)->rule.ptr->direction == PF_OUT) || \ 268 ((s)->rule.ptr->rt == PF_REPLYTO && \ 269 (s)->rule.ptr->direction == PF_IN)) && \ 270 (s)->rt_kif != NULL && \ 271 (s)->rt_kif != i) \ 272 return (PF_PASS); \ 273 } while (0) 274 275 #define BOUND_IFACE(r, k) \ 276 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 277 278 #define STATE_INC_COUNTERS(s) \ 279 do { \ 280 struct pf_rule_item *mrm; \ 281 s->rule.ptr->states_cur++; \ 282 s->rule.ptr->states_tot++; \ 283 if (s->anchor.ptr != NULL) { \ 284 s->anchor.ptr->states_cur++; \ 285 s->anchor.ptr->states_tot++; \ 286 } \ 287 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 288 mrm->r->states_cur++; \ 289 } while (0) 290 291 #define STATE_DEC_COUNTERS(s) \ 292 do { \ 293 struct pf_rule_item *mrm; \ 294 if (s->anchor.ptr != NULL) \ 295 s->anchor.ptr->states_cur--; \ 296 s->rule.ptr->states_cur--; \ 297 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 298 mrm->r->states_cur--; \ 299 } while (0) 300 301 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 302 static __inline int pf_state_compare_key(struct pf_state_key *, 303 struct pf_state_key *); 304 static __inline int pf_state_compare_id(struct pf_state *, 305 struct pf_state *); 306 307 struct pf_src_tree tree_src_tracking; 308 309 struct pf_state_tree_id tree_id; 310 struct pf_state_queue state_list; 311 312 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 313 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 314 RB_GENERATE(pf_state_tree_id, pf_state, 315 entry_id, pf_state_compare_id); 316 317 __inline int 318 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 319 { 320 switch (af) { 321 #ifdef INET 322 case AF_INET: 323 if (a->addr32[0] > b->addr32[0]) 324 return (1); 325 if (a->addr32[0] < b->addr32[0]) 326 return (-1); 327 break; 328 #endif /* INET */ 329 #ifdef INET6 330 case AF_INET6: 331 if (a->addr32[3] > b->addr32[3]) 332 return (1); 333 if (a->addr32[3] < b->addr32[3]) 334 return (-1); 335 if (a->addr32[2] > b->addr32[2]) 336 return (1); 337 if (a->addr32[2] < b->addr32[2]) 338 return (-1); 339 if (a->addr32[1] > b->addr32[1]) 340 return (1); 341 if (a->addr32[1] < b->addr32[1]) 342 return (-1); 343 if (a->addr32[0] > b->addr32[0]) 344 return (1); 345 if (a->addr32[0] < b->addr32[0]) 346 return (-1); 347 break; 348 #endif /* INET6 */ 349 } 350 return (0); 351 } 352 353 static __inline int 354 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 355 { 356 int diff; 357 358 if (a->rule.ptr > b->rule.ptr) 359 return (1); 360 if (a->rule.ptr < b->rule.ptr) 361 return (-1); 362 if ((diff = a->type - b->type) != 0) 363 return (diff); 364 if ((diff = a->af - b->af) != 0) 365 return (diff); 366 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 367 return (diff); 368 return (0); 369 } 370 371 #ifdef INET6 372 void 373 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 374 { 375 switch (af) { 376 #ifdef INET 377 case AF_INET: 378 dst->addr32[0] = src->addr32[0]; 379 break; 380 #endif /* INET */ 381 case AF_INET6: 382 dst->addr32[0] = src->addr32[0]; 383 dst->addr32[1] = src->addr32[1]; 384 dst->addr32[2] = src->addr32[2]; 385 dst->addr32[3] = src->addr32[3]; 386 break; 387 } 388 } 389 #endif /* INET6 */ 390 391 void 392 pf_init_threshold(struct pf_threshold *threshold, 393 u_int32_t limit, u_int32_t seconds) 394 { 395 threshold->limit = limit * PF_THRESHOLD_MULT; 396 threshold->seconds = seconds; 397 threshold->count = 0; 398 threshold->last = time_second; 399 } 400 401 void 402 pf_add_threshold(struct pf_threshold *threshold) 403 { 404 u_int32_t t = time_second, diff = t - threshold->last; 405 406 if (diff >= threshold->seconds) 407 threshold->count = 0; 408 else 409 threshold->count -= threshold->count * diff / 410 threshold->seconds; 411 threshold->count += PF_THRESHOLD_MULT; 412 threshold->last = t; 413 } 414 415 int 416 pf_check_threshold(struct pf_threshold *threshold) 417 { 418 return (threshold->count > threshold->limit); 419 } 420 421 int 422 pf_src_connlimit(struct pf_state **state) 423 { 424 int bad = 0; 425 struct pf_src_node *sn; 426 427 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 428 return (0); 429 430 sn->conn++; 431 (*state)->src.tcp_est = 1; 432 pf_add_threshold(&sn->conn_rate); 433 434 if ((*state)->rule.ptr->max_src_conn && 435 (*state)->rule.ptr->max_src_conn < sn->conn) { 436 pf_status.lcounters[LCNT_SRCCONN]++; 437 bad++; 438 } 439 440 if ((*state)->rule.ptr->max_src_conn_rate.limit && 441 pf_check_threshold(&sn->conn_rate)) { 442 pf_status.lcounters[LCNT_SRCCONNRATE]++; 443 bad++; 444 } 445 446 if (!bad) 447 return (0); 448 449 if ((*state)->rule.ptr->overload_tbl) { 450 struct pfr_addr p; 451 u_int32_t killed = 0; 452 453 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 454 if (pf_status.debug >= LOG_NOTICE) { 455 log(LOG_NOTICE, 456 "pf: pf_src_connlimit: blocking address "); 457 pf_print_host(&sn->addr, 0, 458 (*state)->key[PF_SK_WIRE]->af); 459 } 460 461 bzero(&p, sizeof(p)); 462 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 463 switch ((*state)->key[PF_SK_WIRE]->af) { 464 #ifdef INET 465 case AF_INET: 466 p.pfra_net = 32; 467 p.pfra_ip4addr = sn->addr.v4; 468 break; 469 #endif /* INET */ 470 #ifdef INET6 471 case AF_INET6: 472 p.pfra_net = 128; 473 p.pfra_ip6addr = sn->addr.v6; 474 break; 475 #endif /* INET6 */ 476 } 477 478 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 479 &p, time_second); 480 481 /* kill existing states if that's required. */ 482 if ((*state)->rule.ptr->flush) { 483 struct pf_state_key *sk; 484 struct pf_state *st; 485 486 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 487 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 488 sk = st->key[PF_SK_WIRE]; 489 /* 490 * Kill states from this source. (Only those 491 * from the same rule if PF_FLUSH_GLOBAL is not 492 * set) 493 */ 494 if (sk->af == 495 (*state)->key[PF_SK_WIRE]->af && 496 (((*state)->direction == PF_OUT && 497 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 498 ((*state)->direction == PF_IN && 499 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 500 ((*state)->rule.ptr->flush & 501 PF_FLUSH_GLOBAL || 502 (*state)->rule.ptr == st->rule.ptr)) { 503 st->timeout = PFTM_PURGE; 504 st->src.state = st->dst.state = 505 TCPS_CLOSED; 506 killed++; 507 } 508 } 509 if (pf_status.debug >= LOG_NOTICE) 510 addlog(", %u states killed", killed); 511 } 512 if (pf_status.debug >= LOG_NOTICE) 513 addlog("\n"); 514 } 515 516 /* kill this state */ 517 (*state)->timeout = PFTM_PURGE; 518 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 519 return (1); 520 } 521 522 int 523 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 524 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 525 struct pf_addr *raddr, int global) 526 { 527 struct pf_src_node k; 528 529 if (*sn == NULL) { 530 k.af = af; 531 k.type = type; 532 PF_ACPY(&k.addr, src, af); 533 if (global) 534 k.rule.ptr = NULL; 535 else 536 k.rule.ptr = rule; 537 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 538 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 539 } 540 if (*sn == NULL) { 541 if (!rule->max_src_nodes || 542 rule->src_nodes < rule->max_src_nodes) 543 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 544 else 545 pf_status.lcounters[LCNT_SRCNODES]++; 546 if ((*sn) == NULL) 547 return (-1); 548 549 pf_init_threshold(&(*sn)->conn_rate, 550 rule->max_src_conn_rate.limit, 551 rule->max_src_conn_rate.seconds); 552 553 (*sn)->type = type; 554 (*sn)->af = af; 555 if (global) 556 (*sn)->rule.ptr = NULL; 557 else 558 (*sn)->rule.ptr = rule; 559 PF_ACPY(&(*sn)->addr, src, af); 560 if (raddr) 561 PF_ACPY(&(*sn)->raddr, raddr, af); 562 if (RB_INSERT(pf_src_tree, 563 &tree_src_tracking, *sn) != NULL) { 564 if (pf_status.debug >= LOG_NOTICE) { 565 log(LOG_NOTICE, 566 "pf: src_tree insert failed: "); 567 pf_print_host(&(*sn)->addr, 0, af); 568 addlog("\n"); 569 } 570 pool_put(&pf_src_tree_pl, *sn); 571 return (-1); 572 } 573 (*sn)->creation = time_second; 574 if ((*sn)->rule.ptr != NULL) 575 (*sn)->rule.ptr->src_nodes++; 576 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 577 pf_status.src_nodes++; 578 } else { 579 if (rule->max_src_states && 580 (*sn)->states >= rule->max_src_states) { 581 pf_status.lcounters[LCNT_SRCSTATES]++; 582 return (-1); 583 } 584 } 585 return (0); 586 } 587 588 void 589 pf_remove_src_node(struct pf_src_node *sn) 590 { 591 if (sn->states > 0 || sn->expire > time_second) 592 return; 593 594 if (sn->rule.ptr != NULL) { 595 sn->rule.ptr->src_nodes--; 596 if (sn->rule.ptr->states_cur <= 0 && 597 sn->rule.ptr->src_nodes <= 0) 598 pf_rm_rule(NULL, sn->rule.ptr); 599 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 600 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 601 pf_status.src_nodes--; 602 pool_put(&pf_src_tree_pl, sn); 603 } 604 } 605 606 struct pf_src_node * 607 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 608 { 609 struct pf_sn_item *sni; 610 611 SLIST_FOREACH(sni, &s->src_nodes, next) 612 if (sni->sn->type == type) 613 return (sni->sn); 614 return (NULL); 615 } 616 617 void 618 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 619 { 620 struct pf_sn_item *sni, *snin, *snip = NULL; 621 622 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 623 snin = SLIST_NEXT(sni, next); 624 if (sni->sn == sn) { 625 if (snip) 626 SLIST_REMOVE_NEXT(&s->src_nodes, snip, next); 627 else 628 SLIST_REMOVE_HEAD(&s->src_nodes, next); 629 pool_put(&pf_sn_item_pl, sni); 630 sn->states--; 631 } 632 snip = sni; 633 } 634 } 635 636 /* state table stuff */ 637 638 static __inline int 639 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 640 { 641 int diff; 642 643 if ((diff = a->proto - b->proto) != 0) 644 return (diff); 645 if ((diff = a->af - b->af) != 0) 646 return (diff); 647 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 648 return (diff); 649 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 650 return (diff); 651 if ((diff = a->port[0] - b->port[0]) != 0) 652 return (diff); 653 if ((diff = a->port[1] - b->port[1]) != 0) 654 return (diff); 655 if ((diff = a->rdomain - b->rdomain) != 0) 656 return (diff); 657 return (0); 658 } 659 660 static __inline int 661 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 662 { 663 if (a->id > b->id) 664 return (1); 665 if (a->id < b->id) 666 return (-1); 667 if (a->creatorid > b->creatorid) 668 return (1); 669 if (a->creatorid < b->creatorid) 670 return (-1); 671 672 return (0); 673 } 674 675 int 676 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 677 { 678 struct pf_state_item *si; 679 struct pf_state_key *cur; 680 struct pf_state *olds = NULL; 681 682 KASSERT(s->key[idx] == NULL); 683 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 684 /* key exists. check for same kif, if none, add to key */ 685 TAILQ_FOREACH(si, &cur->states, entry) 686 if (si->s->kif == s->kif && 687 si->s->direction == s->direction) { 688 if (sk->proto == IPPROTO_TCP && 689 si->s->src.state >= TCPS_FIN_WAIT_2 && 690 si->s->dst.state >= TCPS_FIN_WAIT_2) { 691 si->s->src.state = si->s->dst.state = 692 TCPS_CLOSED; 693 /* unlink late or sks can go away */ 694 olds = si->s; 695 } else { 696 if (pf_status.debug >= LOG_NOTICE) { 697 log(LOG_NOTICE, 698 "pf: %s key attach " 699 "failed on %s: ", 700 (idx == PF_SK_WIRE) ? 701 "wire" : "stack", 702 s->kif->pfik_name); 703 pf_print_state_parts(s, 704 (idx == PF_SK_WIRE) ? 705 sk : NULL, 706 (idx == PF_SK_STACK) ? 707 sk : NULL); 708 addlog(", existing: "); 709 pf_print_state_parts(si->s, 710 (idx == PF_SK_WIRE) ? 711 sk : NULL, 712 (idx == PF_SK_STACK) ? 713 sk : NULL); 714 addlog("\n"); 715 } 716 pool_put(&pf_state_key_pl, sk); 717 return (-1); /* collision! */ 718 } 719 } 720 pool_put(&pf_state_key_pl, sk); 721 s->key[idx] = cur; 722 } else 723 s->key[idx] = sk; 724 725 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 726 pf_state_key_detach(s, idx); 727 return (-1); 728 } 729 si->s = s; 730 731 /* list is sorted, if-bound states before floating */ 732 if (s->kif == pfi_all) 733 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 734 else 735 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 736 737 if (olds) 738 pf_unlink_state(olds); 739 740 return (0); 741 } 742 743 void 744 pf_detach_state(struct pf_state *s) 745 { 746 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 747 s->key[PF_SK_WIRE] = NULL; 748 749 if (s->key[PF_SK_STACK] != NULL) 750 pf_state_key_detach(s, PF_SK_STACK); 751 752 if (s->key[PF_SK_WIRE] != NULL) 753 pf_state_key_detach(s, PF_SK_WIRE); 754 } 755 756 void 757 pf_state_key_detach(struct pf_state *s, int idx) 758 { 759 struct pf_state_item *si; 760 761 if (s->key[idx] == NULL) 762 return; 763 764 si = TAILQ_FIRST(&s->key[idx]->states); 765 while (si && si->s != s) 766 si = TAILQ_NEXT(si, entry); 767 768 if (si) { 769 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 770 pool_put(&pf_state_item_pl, si); 771 } 772 773 if (TAILQ_EMPTY(&s->key[idx]->states)) { 774 RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]); 775 if (s->key[idx]->reverse) 776 s->key[idx]->reverse->reverse = NULL; 777 if (s->key[idx]->inp) 778 s->key[idx]->inp->inp_pf_sk = NULL; 779 pool_put(&pf_state_key_pl, s->key[idx]); 780 } 781 s->key[idx] = NULL; 782 } 783 784 struct pf_state_key * 785 pf_alloc_state_key(int pool_flags) 786 { 787 struct pf_state_key *sk; 788 789 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 790 return (NULL); 791 TAILQ_INIT(&sk->states); 792 793 return (sk); 794 } 795 796 int 797 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 798 struct pf_state_key **sks, int rtableid) 799 { 800 /* if returning error we MUST pool_put state keys ourselves */ 801 struct pf_state_key *sk1, *sk2; 802 u_int wrdom = pd->rdomain; 803 804 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 805 return (ENOMEM); 806 807 PF_ACPY(&sk1->addr[pd->sidx], pd->src, pd->af); 808 PF_ACPY(&sk1->addr[pd->didx], pd->dst, pd->af); 809 sk1->port[pd->sidx] = pd->osport; 810 sk1->port[pd->didx] = pd->odport; 811 sk1->proto = pd->proto; 812 sk1->af = pd->af; 813 sk1->rdomain = pd->rdomain; 814 if (rtableid >= 0) 815 wrdom = rtable_l2(rtableid); 816 817 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 818 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 819 pd->nsport != pd->osport || pd->ndport != pd->odport || 820 wrdom != pd->rdomain) { /* NAT */ 821 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 822 pool_put(&pf_state_key_pl, sk1); 823 return (ENOMEM); 824 } 825 PF_ACPY(&sk2->addr[pd->sidx], &pd->nsaddr, pd->af); 826 PF_ACPY(&sk2->addr[pd->didx], &pd->ndaddr, pd->af); 827 sk2->port[pd->sidx] = pd->nsport; 828 sk2->port[pd->didx] = pd->ndport; 829 sk2->proto = pd->proto; 830 sk2->af = pd->af; 831 sk2->rdomain = wrdom; 832 } else 833 sk2 = sk1; 834 835 if (pd->dir == PF_IN) { 836 *skw = sk1; 837 *sks = sk2; 838 } else { 839 *sks = sk1; 840 *skw = sk2; 841 } 842 843 if (pf_status.debug >= LOG_DEBUG) { 844 log(LOG_DEBUG, "pf: key setup: "); 845 pf_print_state_parts(NULL, *skw, *sks); 846 addlog("\n"); 847 } 848 849 return (0); 850 } 851 852 int 853 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, 854 struct pf_state_key *sks, struct pf_state *s) 855 { 856 splsoftassert(IPL_SOFTNET); 857 858 s->kif = kif; 859 if (skw == sks) { 860 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) 861 return (-1); 862 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 863 } else { 864 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { 865 pool_put(&pf_state_key_pl, sks); 866 return (-1); 867 } 868 if (pf_state_key_attach(sks, s, PF_SK_STACK)) { 869 pf_state_key_detach(s, PF_SK_WIRE); 870 return (-1); 871 } 872 } 873 874 if (s->id == 0 && s->creatorid == 0) { 875 s->id = htobe64(pf_status.stateid++); 876 s->creatorid = pf_status.hostid; 877 } 878 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 879 if (pf_status.debug >= LOG_NOTICE) { 880 log(LOG_NOTICE, "pf: state insert failed: " 881 "id: %016llx creatorid: %08x", 882 betoh64(s->id), ntohl(s->creatorid)); 883 addlog("\n"); 884 } 885 pf_detach_state(s); 886 return (-1); 887 } 888 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 889 pf_status.fcounters[FCNT_STATE_INSERT]++; 890 pf_status.states++; 891 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 892 #if NPFSYNC > 0 893 pfsync_insert_state(s); 894 #endif 895 return (0); 896 } 897 898 struct pf_state * 899 pf_find_state_byid(struct pf_state_cmp *key) 900 { 901 pf_status.fcounters[FCNT_STATE_SEARCH]++; 902 903 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 904 } 905 906 int 907 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 908 struct pfi_kif *kif, u_int dir) 909 { 910 /* a (from hdr) and b (new) must be exact opposites of each other */ 911 if (a->af == b->af && a->proto == b->proto && 912 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 913 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 914 a->port[0] == b->port[1] && 915 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 916 return (0); 917 else { 918 /* mismatch. must not happen. */ 919 if (pf_status.debug >= LOG_ERR) { 920 log(LOG_ERR, 921 "pf: state key linking mismatch! dir=%s, " 922 "if=%s, stored af=%u, a0: ", 923 dir == PF_OUT ? "OUT" : "IN", 924 kif->pfik_name, a->af); 925 pf_print_host(&a->addr[0], a->port[0], a->af); 926 addlog(", a1: "); 927 pf_print_host(&a->addr[1], a->port[1], a->af); 928 addlog(", proto=%u", a->proto); 929 addlog(", found af=%u, a0: ", b->af); 930 pf_print_host(&b->addr[0], b->port[0], b->af); 931 addlog(", a1: "); 932 pf_print_host(&b->addr[1], b->port[1], b->af); 933 addlog(", proto=%u", b->proto); 934 addlog("\n"); 935 } 936 return (-1); 937 } 938 } 939 940 struct pf_state * 941 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, 942 struct mbuf *m) 943 { 944 struct pf_state_key *sk; 945 struct pf_state_item *si; 946 947 pf_status.fcounters[FCNT_STATE_SEARCH]++; 948 if (pf_status.debug >= LOG_DEBUG) { 949 log(LOG_DEBUG, "pf: key search, if=%s: ", kif->pfik_name); 950 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 951 addlog("\n"); 952 } 953 954 if (dir == PF_OUT && m->m_pkthdr.pf.statekey && 955 ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) 956 sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; 957 else { 958 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 959 (struct pf_state_key *)key)) == NULL) 960 return (NULL); 961 if (dir == PF_OUT && m->m_pkthdr.pf.statekey && 962 pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk, 963 kif, dir) == 0) { 964 ((struct pf_state_key *) 965 m->m_pkthdr.pf.statekey)->reverse = sk; 966 sk->reverse = m->m_pkthdr.pf.statekey; 967 } 968 } 969 970 if (dir == PF_OUT) 971 m->m_pkthdr.pf.statekey = NULL; 972 973 /* list is sorted, if-bound states before floating ones */ 974 TAILQ_FOREACH(si, &sk->states, entry) 975 if ((si->s->kif == pfi_all || si->s->kif == kif) && 976 sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 977 si->s->key[PF_SK_STACK])) 978 return (si->s); 979 980 return (NULL); 981 } 982 983 struct pf_state * 984 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 985 { 986 struct pf_state_key *sk; 987 struct pf_state_item *si, *ret = NULL; 988 989 pf_status.fcounters[FCNT_STATE_SEARCH]++; 990 991 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 992 993 if (sk != NULL) { 994 TAILQ_FOREACH(si, &sk->states, entry) 995 if (dir == PF_INOUT || 996 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 997 si->s->key[PF_SK_STACK]))) { 998 if (more == NULL) 999 return (si->s); 1000 1001 if (ret) 1002 (*more)++; 1003 else 1004 ret = si; 1005 } 1006 } 1007 return (ret ? ret->s : NULL); 1008 } 1009 1010 /* END state table stuff */ 1011 1012 void 1013 pf_purge_thread(void *v) 1014 { 1015 int nloops = 0, s; 1016 1017 for (;;) { 1018 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); 1019 1020 s = splsoftnet(); 1021 1022 /* process a fraction of the state table every second */ 1023 pf_purge_expired_states(1 + (pf_status.states 1024 / pf_default_rule.timeout[PFTM_INTERVAL])); 1025 1026 /* purge other expired types every PFTM_INTERVAL seconds */ 1027 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1028 pf_purge_expired_fragments(); 1029 pf_purge_expired_src_nodes(0); 1030 nloops = 0; 1031 } 1032 1033 splx(s); 1034 } 1035 } 1036 1037 u_int32_t 1038 pf_state_expires(const struct pf_state *state) 1039 { 1040 u_int32_t timeout; 1041 u_int32_t start; 1042 u_int32_t end; 1043 u_int32_t states; 1044 1045 /* handle all PFTM_* > PFTM_MAX here */ 1046 if (state->timeout == PFTM_PURGE) 1047 return (time_second); 1048 if (state->timeout == PFTM_UNTIL_PACKET) 1049 return (0); 1050 KASSERT(state->timeout != PFTM_UNLINKED); 1051 KASSERT(state->timeout < PFTM_MAX); 1052 timeout = state->rule.ptr->timeout[state->timeout]; 1053 if (!timeout) 1054 timeout = pf_default_rule.timeout[state->timeout]; 1055 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1056 if (start) { 1057 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1058 states = state->rule.ptr->states_cur; 1059 } else { 1060 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1061 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1062 states = pf_status.states; 1063 } 1064 if (end && states > start && start < end) { 1065 if (states < end) 1066 return (state->expire + timeout * (end - states) / 1067 (end - start)); 1068 else 1069 return (time_second); 1070 } 1071 return (state->expire + timeout); 1072 } 1073 1074 void 1075 pf_purge_expired_src_nodes(int waslocked) 1076 { 1077 struct pf_src_node *cur, *next; 1078 int locked = waslocked; 1079 1080 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1081 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1082 1083 if (cur->states <= 0 && cur->expire <= time_second) { 1084 if (! locked) { 1085 rw_enter_write(&pf_consistency_lock); 1086 next = RB_NEXT(pf_src_tree, 1087 &tree_src_tracking, cur); 1088 locked = 1; 1089 } 1090 pf_remove_src_node(cur); 1091 } 1092 } 1093 1094 if (locked && !waslocked) 1095 rw_exit_write(&pf_consistency_lock); 1096 } 1097 1098 void 1099 pf_src_tree_remove_state(struct pf_state *s) 1100 { 1101 u_int32_t timeout; 1102 struct pf_sn_item *sni; 1103 1104 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1105 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1106 if (s->src.tcp_est) 1107 --sni->sn->conn; 1108 if (--sni->sn->states <= 0) { 1109 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1110 if (!timeout) 1111 timeout = 1112 pf_default_rule.timeout[PFTM_SRC_NODE]; 1113 sni->sn->expire = time_second + timeout; 1114 } 1115 pool_put(&pf_sn_item_pl, sni); 1116 } 1117 } 1118 1119 /* callers should be at splsoftnet */ 1120 void 1121 pf_unlink_state(struct pf_state *cur) 1122 { 1123 splsoftassert(IPL_SOFTNET); 1124 1125 /* handle load balancing related tasks */ 1126 pf_postprocess_addr(cur); 1127 1128 if (cur->src.state == PF_TCPS_PROXY_DST) { 1129 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1130 &cur->key[PF_SK_WIRE]->addr[1], 1131 &cur->key[PF_SK_WIRE]->addr[0], 1132 cur->key[PF_SK_WIRE]->port[1], 1133 cur->key[PF_SK_WIRE]->port[0], 1134 cur->src.seqhi, cur->src.seqlo + 1, 1135 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1136 cur->key[PF_SK_WIRE]->rdomain, NULL, NULL); 1137 } 1138 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1139 #if NPFLOW > 0 1140 if (cur->state_flags & PFSTATE_PFLOW) 1141 export_pflow(cur); 1142 #endif 1143 #if NPFSYNC > 0 1144 pfsync_delete_state(cur); 1145 #endif 1146 cur->timeout = PFTM_UNLINKED; 1147 pf_src_tree_remove_state(cur); 1148 pf_detach_state(cur); 1149 } 1150 1151 /* callers should be at splsoftnet and hold the 1152 * write_lock on pf_consistency_lock */ 1153 void 1154 pf_free_state(struct pf_state *cur) 1155 { 1156 struct pf_rule_item *ri; 1157 1158 splsoftassert(IPL_SOFTNET); 1159 1160 #if NPFSYNC > 0 1161 if (pfsync_state_in_use(cur)) 1162 return; 1163 #endif 1164 KASSERT(cur->timeout == PFTM_UNLINKED); 1165 if (--cur->rule.ptr->states_cur <= 0 && 1166 cur->rule.ptr->src_nodes <= 0) 1167 pf_rm_rule(NULL, cur->rule.ptr); 1168 if (cur->anchor.ptr != NULL) 1169 if (--cur->anchor.ptr->states_cur <= 0) 1170 pf_rm_rule(NULL, cur->anchor.ptr); 1171 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1172 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1173 if (--ri->r->states_cur <= 0 && 1174 ri->r->src_nodes <= 0) 1175 pf_rm_rule(NULL, ri->r); 1176 pool_put(&pf_rule_item_pl, ri); 1177 } 1178 pf_normalize_tcp_cleanup(cur); 1179 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1180 TAILQ_REMOVE(&state_list, cur, entry_list); 1181 if (cur->tag) 1182 pf_tag_unref(cur->tag); 1183 pool_put(&pf_state_pl, cur); 1184 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1185 pf_status.states--; 1186 } 1187 1188 void 1189 pf_purge_expired_states(u_int32_t maxcheck) 1190 { 1191 static struct pf_state *cur = NULL; 1192 struct pf_state *next; 1193 int locked = 0; 1194 1195 while (maxcheck--) { 1196 /* wrap to start of list when we hit the end */ 1197 if (cur == NULL) { 1198 cur = TAILQ_FIRST(&state_list); 1199 if (cur == NULL) 1200 break; /* list empty */ 1201 } 1202 1203 /* get next state, as cur may get deleted */ 1204 next = TAILQ_NEXT(cur, entry_list); 1205 1206 if (cur->timeout == PFTM_UNLINKED) { 1207 /* free unlinked state */ 1208 if (! locked) { 1209 rw_enter_write(&pf_consistency_lock); 1210 locked = 1; 1211 } 1212 pf_free_state(cur); 1213 } else if (pf_state_expires(cur) <= time_second) { 1214 /* unlink and free expired state */ 1215 pf_unlink_state(cur); 1216 if (! locked) { 1217 rw_enter_write(&pf_consistency_lock); 1218 locked = 1; 1219 } 1220 pf_free_state(cur); 1221 } 1222 cur = next; 1223 } 1224 1225 if (locked) 1226 rw_exit_write(&pf_consistency_lock); 1227 } 1228 1229 int 1230 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1231 { 1232 if (aw->type != PF_ADDR_TABLE) 1233 return (0); 1234 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1235 return (1); 1236 return (0); 1237 } 1238 1239 void 1240 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1241 { 1242 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1243 return; 1244 pfr_detach_table(aw->p.tbl); 1245 aw->p.tbl = NULL; 1246 } 1247 1248 void 1249 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1250 { 1251 struct pfr_ktable *kt = aw->p.tbl; 1252 1253 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1254 return; 1255 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1256 kt = kt->pfrkt_root; 1257 aw->p.tbl = NULL; 1258 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1259 kt->pfrkt_cnt : -1; 1260 } 1261 1262 void 1263 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1264 { 1265 switch (af) { 1266 #ifdef INET 1267 case AF_INET: { 1268 u_int32_t a = ntohl(addr->addr32[0]); 1269 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1270 (a>>8)&255, a&255); 1271 if (p) { 1272 p = ntohs(p); 1273 addlog(":%u", p); 1274 } 1275 break; 1276 } 1277 #endif /* INET */ 1278 #ifdef INET6 1279 case AF_INET6: { 1280 u_int16_t b; 1281 u_int8_t i, curstart, curend, maxstart, maxend; 1282 curstart = curend = maxstart = maxend = 255; 1283 for (i = 0; i < 8; i++) { 1284 if (!addr->addr16[i]) { 1285 if (curstart == 255) 1286 curstart = i; 1287 curend = i; 1288 } else { 1289 if ((curend - curstart) > 1290 (maxend - maxstart)) { 1291 maxstart = curstart; 1292 maxend = curend; 1293 } 1294 curstart = curend = 255; 1295 } 1296 } 1297 if ((curend - curstart) > 1298 (maxend - maxstart)) { 1299 maxstart = curstart; 1300 maxend = curend; 1301 } 1302 for (i = 0; i < 8; i++) { 1303 if (i >= maxstart && i <= maxend) { 1304 if (i == 0) 1305 addlog(":"); 1306 if (i == maxend) 1307 addlog(":"); 1308 } else { 1309 b = ntohs(addr->addr16[i]); 1310 addlog("%x", b); 1311 if (i < 7) 1312 addlog(":"); 1313 } 1314 } 1315 if (p) { 1316 p = ntohs(p); 1317 addlog("[%u]", p); 1318 } 1319 break; 1320 } 1321 #endif /* INET6 */ 1322 } 1323 } 1324 1325 void 1326 pf_print_state(struct pf_state *s) 1327 { 1328 pf_print_state_parts(s, NULL, NULL); 1329 } 1330 1331 void 1332 pf_print_state_parts(struct pf_state *s, 1333 struct pf_state_key *skwp, struct pf_state_key *sksp) 1334 { 1335 struct pf_state_key *skw, *sks; 1336 u_int8_t proto, dir; 1337 1338 /* Do our best to fill these, but they're skipped if NULL */ 1339 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1340 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1341 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1342 dir = s ? s->direction : 0; 1343 1344 switch (proto) { 1345 case IPPROTO_IPV4: 1346 addlog("IPv4"); 1347 break; 1348 case IPPROTO_IPV6: 1349 addlog("IPv6"); 1350 break; 1351 case IPPROTO_TCP: 1352 addlog("TCP"); 1353 break; 1354 case IPPROTO_UDP: 1355 addlog("UDP"); 1356 break; 1357 case IPPROTO_ICMP: 1358 addlog("ICMP"); 1359 break; 1360 case IPPROTO_ICMPV6: 1361 addlog("ICMPv6"); 1362 break; 1363 default: 1364 addlog("%u", proto); 1365 break; 1366 } 1367 switch (dir) { 1368 case PF_IN: 1369 addlog(" in"); 1370 break; 1371 case PF_OUT: 1372 addlog(" out"); 1373 break; 1374 } 1375 if (skw) { 1376 addlog(" wire: (%d) ", skw->rdomain); 1377 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1378 addlog(" "); 1379 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1380 } 1381 if (sks) { 1382 addlog(" stack: (%d) ", sks->rdomain); 1383 if (sks != skw) { 1384 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1385 addlog(" "); 1386 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1387 } else 1388 addlog("-"); 1389 } 1390 if (s) { 1391 if (proto == IPPROTO_TCP) { 1392 addlog(" [lo=%u high=%u win=%u modulator=%u", 1393 s->src.seqlo, s->src.seqhi, 1394 s->src.max_win, s->src.seqdiff); 1395 if (s->src.wscale && s->dst.wscale) 1396 addlog(" wscale=%u", 1397 s->src.wscale & PF_WSCALE_MASK); 1398 addlog("]"); 1399 addlog(" [lo=%u high=%u win=%u modulator=%u", 1400 s->dst.seqlo, s->dst.seqhi, 1401 s->dst.max_win, s->dst.seqdiff); 1402 if (s->src.wscale && s->dst.wscale) 1403 addlog(" wscale=%u", 1404 s->dst.wscale & PF_WSCALE_MASK); 1405 addlog("]"); 1406 } 1407 addlog(" %u:%u", s->src.state, s->dst.state); 1408 if (s->rule.ptr) 1409 addlog(" @%d", s->rule.ptr->nr); 1410 } 1411 } 1412 1413 void 1414 pf_print_flags(u_int8_t f) 1415 { 1416 if (f) 1417 addlog(" "); 1418 if (f & TH_FIN) 1419 addlog("F"); 1420 if (f & TH_SYN) 1421 addlog("S"); 1422 if (f & TH_RST) 1423 addlog("R"); 1424 if (f & TH_PUSH) 1425 addlog("P"); 1426 if (f & TH_ACK) 1427 addlog("A"); 1428 if (f & TH_URG) 1429 addlog("U"); 1430 if (f & TH_ECE) 1431 addlog("E"); 1432 if (f & TH_CWR) 1433 addlog("W"); 1434 } 1435 1436 #define PF_SET_SKIP_STEPS(i) \ 1437 do { \ 1438 while (head[i] != cur) { \ 1439 head[i]->skip[i].ptr = cur; \ 1440 head[i] = TAILQ_NEXT(head[i], entries); \ 1441 } \ 1442 } while (0) 1443 1444 void 1445 pf_calc_skip_steps(struct pf_rulequeue *rules) 1446 { 1447 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1448 int i; 1449 1450 cur = TAILQ_FIRST(rules); 1451 prev = cur; 1452 for (i = 0; i < PF_SKIP_COUNT; ++i) 1453 head[i] = cur; 1454 while (cur != NULL) { 1455 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1456 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1457 if (cur->direction != prev->direction) 1458 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1459 if (cur->onrdomain != prev->onrdomain || 1460 cur->ifnot != prev->ifnot) 1461 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1462 if (cur->af != prev->af) 1463 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1464 if (cur->proto != prev->proto) 1465 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1466 if (cur->src.neg != prev->src.neg || 1467 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1468 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1469 if (cur->dst.neg != prev->dst.neg || 1470 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1471 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1472 if (cur->src.port[0] != prev->src.port[0] || 1473 cur->src.port[1] != prev->src.port[1] || 1474 cur->src.port_op != prev->src.port_op) 1475 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1476 if (cur->dst.port[0] != prev->dst.port[0] || 1477 cur->dst.port[1] != prev->dst.port[1] || 1478 cur->dst.port_op != prev->dst.port_op) 1479 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1480 1481 prev = cur; 1482 cur = TAILQ_NEXT(cur, entries); 1483 } 1484 for (i = 0; i < PF_SKIP_COUNT; ++i) 1485 PF_SET_SKIP_STEPS(i); 1486 } 1487 1488 int 1489 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1490 { 1491 if (aw1->type != aw2->type) 1492 return (1); 1493 switch (aw1->type) { 1494 case PF_ADDR_ADDRMASK: 1495 case PF_ADDR_RANGE: 1496 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1497 return (1); 1498 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1499 return (1); 1500 return (0); 1501 case PF_ADDR_DYNIFTL: 1502 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1503 case PF_ADDR_NONE: 1504 case PF_ADDR_NOROUTE: 1505 case PF_ADDR_URPFFAILED: 1506 return (0); 1507 case PF_ADDR_TABLE: 1508 return (aw1->p.tbl != aw2->p.tbl); 1509 case PF_ADDR_RTLABEL: 1510 return (aw1->v.rtlabel != aw2->v.rtlabel); 1511 default: 1512 addlog("invalid address type: %d\n", aw1->type); 1513 return (1); 1514 } 1515 } 1516 1517 u_int16_t 1518 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) 1519 { 1520 u_int32_t l; 1521 1522 if (udp && !cksum) 1523 return (0x0000); 1524 l = cksum + old - new; 1525 l = (l >> 16) + (l & 65535); 1526 l = l & 65535; 1527 if (udp && !l) 1528 return (0xFFFF); 1529 return (l); 1530 } 1531 1532 void 1533 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *pc, 1534 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) 1535 { 1536 struct pf_addr ao; 1537 u_int16_t po = *p; 1538 1539 PF_ACPY(&ao, a, af); 1540 PF_ACPY(a, an, af); 1541 *p = pn; 1542 1543 switch (af) { 1544 #ifdef INET 1545 case AF_INET: 1546 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1547 ao.addr16[0], an->addr16[0], u), 1548 ao.addr16[1], an->addr16[1], u), 1549 po, pn, u); 1550 break; 1551 #endif /* INET */ 1552 #ifdef INET6 1553 case AF_INET6: 1554 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1555 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1556 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1557 ao.addr16[0], an->addr16[0], u), 1558 ao.addr16[1], an->addr16[1], u), 1559 ao.addr16[2], an->addr16[2], u), 1560 ao.addr16[3], an->addr16[3], u), 1561 ao.addr16[4], an->addr16[4], u), 1562 ao.addr16[5], an->addr16[5], u), 1563 ao.addr16[6], an->addr16[6], u), 1564 ao.addr16[7], an->addr16[7], u), 1565 po, pn, u); 1566 break; 1567 #endif /* INET6 */ 1568 } 1569 } 1570 1571 1572 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ 1573 void 1574 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) 1575 { 1576 u_int32_t ao; 1577 1578 memcpy(&ao, a, sizeof(ao)); 1579 memcpy(a, &an, sizeof(u_int32_t)); 1580 if (c != NULL) 1581 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, 1582 u), ao % 65536, an % 65536, u); 1583 } 1584 1585 #ifdef INET6 1586 void 1587 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) 1588 { 1589 struct pf_addr ao; 1590 1591 PF_ACPY(&ao, a, AF_INET6); 1592 PF_ACPY(a, an, AF_INET6); 1593 1594 if (c) 1595 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1596 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1597 pf_cksum_fixup(pf_cksum_fixup(*c, 1598 ao.addr16[0], an->addr16[0], u), 1599 ao.addr16[1], an->addr16[1], u), 1600 ao.addr16[2], an->addr16[2], u), 1601 ao.addr16[3], an->addr16[3], u), 1602 ao.addr16[4], an->addr16[4], u), 1603 ao.addr16[5], an->addr16[5], u), 1604 ao.addr16[6], an->addr16[6], u), 1605 ao.addr16[7], an->addr16[7], u); 1606 } 1607 #endif /* INET6 */ 1608 1609 int 1610 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, 1611 int *icmp_dir, int *multi, u_int16_t *virtual_id, u_int16_t *virtual_type) 1612 { 1613 /* 1614 * ICMP types marked with PF_OUT are typically responses to 1615 * PF_IN, and will match states in the opposite direction. 1616 * PF_IN ICMP types need to match a state with that type. 1617 */ 1618 *icmp_dir = PF_OUT; 1619 *multi = PF_ICMP_MULTI_LINK; 1620 1621 /* Queries (and responses) */ 1622 switch (pd->af) { 1623 #ifdef INET 1624 case AF_INET: 1625 switch (type) { 1626 case ICMP_ECHO: 1627 *icmp_dir = PF_IN; 1628 case ICMP_ECHOREPLY: 1629 *virtual_type = ICMP_ECHO; 1630 *virtual_id = pd->hdr.icmp->icmp_id; 1631 break; 1632 1633 case ICMP_TSTAMP: 1634 *icmp_dir = PF_IN; 1635 case ICMP_TSTAMPREPLY: 1636 *virtual_type = ICMP_TSTAMP; 1637 *virtual_id = pd->hdr.icmp->icmp_id; 1638 break; 1639 1640 case ICMP_IREQ: 1641 *icmp_dir = PF_IN; 1642 case ICMP_IREQREPLY: 1643 *virtual_type = ICMP_IREQ; 1644 *virtual_id = pd->hdr.icmp->icmp_id; 1645 break; 1646 1647 case ICMP_MASKREQ: 1648 *icmp_dir = PF_IN; 1649 case ICMP_MASKREPLY: 1650 *virtual_type = ICMP_MASKREQ; 1651 *virtual_id = pd->hdr.icmp->icmp_id; 1652 break; 1653 1654 case ICMP_IPV6_WHEREAREYOU: 1655 *icmp_dir = PF_IN; 1656 case ICMP_IPV6_IAMHERE: 1657 *virtual_type = ICMP_IPV6_WHEREAREYOU; 1658 *virtual_id = 0; /* Nothing sane to match on! */ 1659 break; 1660 1661 case ICMP_MOBILE_REGREQUEST: 1662 *icmp_dir = PF_IN; 1663 case ICMP_MOBILE_REGREPLY: 1664 *virtual_type = ICMP_MOBILE_REGREQUEST; 1665 *virtual_id = 0; /* Nothing sane to match on! */ 1666 break; 1667 1668 case ICMP_ROUTERSOLICIT: 1669 *icmp_dir = PF_IN; 1670 case ICMP_ROUTERADVERT: 1671 *virtual_type = ICMP_ROUTERSOLICIT; 1672 *virtual_id = 0; /* Nothing sane to match on! */ 1673 break; 1674 1675 /* These ICMP types map to other connections */ 1676 case ICMP_UNREACH: 1677 case ICMP_SOURCEQUENCH: 1678 case ICMP_REDIRECT: 1679 case ICMP_TIMXCEED: 1680 case ICMP_PARAMPROB: 1681 /* These will not be used, but set them anyway */ 1682 *icmp_dir = PF_IN; 1683 *virtual_type = type; 1684 *virtual_id = 0; 1685 HTONS(*virtual_type); 1686 return (1); /* These types match to another state */ 1687 1688 /* 1689 * All remaining ICMP types get their own states, 1690 * and will only match in one direction. 1691 */ 1692 default: 1693 *icmp_dir = PF_IN; 1694 *virtual_type = type; 1695 *virtual_id = 0; 1696 break; 1697 } 1698 break; 1699 #endif /* INET */ 1700 #ifdef INET6 1701 case AF_INET6: 1702 switch (type) { 1703 case ICMP6_ECHO_REQUEST: 1704 *icmp_dir = PF_IN; 1705 case ICMP6_ECHO_REPLY: 1706 *virtual_type = ICMP6_ECHO_REQUEST; 1707 *virtual_id = pd->hdr.icmp6->icmp6_id; 1708 break; 1709 1710 case MLD_LISTENER_QUERY: 1711 *icmp_dir = PF_IN; 1712 case MLD_LISTENER_REPORT: { 1713 struct mld_hdr *mld = (void *)pd->hdr.icmp6; 1714 1715 *virtual_type = MLD_LISTENER_QUERY; 1716 /* generate fake id for these messages */ 1717 *virtual_id = (mld->mld_addr.s6_addr32[0] ^ 1718 mld->mld_addr.s6_addr32[1] ^ 1719 mld->mld_addr.s6_addr32[2] ^ 1720 mld->mld_addr.s6_addr32[3]) & 0xffff; 1721 break; 1722 } 1723 1724 /* 1725 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 1726 * ICMP6_WRU 1727 */ 1728 case ICMP6_WRUREQUEST: 1729 *icmp_dir = PF_IN; 1730 case ICMP6_WRUREPLY: 1731 *virtual_type = ICMP6_WRUREQUEST; 1732 *virtual_id = 0; /* Nothing sane to match on! */ 1733 break; 1734 1735 case MLD_MTRACE: 1736 *icmp_dir = PF_IN; 1737 case MLD_MTRACE_RESP: 1738 *virtual_type = MLD_MTRACE; 1739 *virtual_id = 0; /* Nothing sane to match on! */ 1740 break; 1741 1742 case ND_NEIGHBOR_SOLICIT: 1743 *icmp_dir = PF_IN; 1744 case ND_NEIGHBOR_ADVERT: { 1745 struct nd_neighbor_solicit *nd = (void *)pd->hdr.icmp6; 1746 1747 *virtual_type = ND_NEIGHBOR_SOLICIT; 1748 *multi = PF_ICMP_MULTI_SOLICITED; 1749 /* generate fake id for these messages */ 1750 *virtual_id = (nd->nd_ns_target.s6_addr32[0] ^ 1751 nd->nd_ns_target.s6_addr32[1] ^ 1752 nd->nd_ns_target.s6_addr32[2] ^ 1753 nd->nd_ns_target.s6_addr32[3]) & 0xffff; 1754 break; 1755 } 1756 1757 /* 1758 * These ICMP types map to other connections. 1759 * ND_REDIRECT can't be in this list because the triggering 1760 * packet header is optional. 1761 */ 1762 case ICMP6_DST_UNREACH: 1763 case ICMP6_PACKET_TOO_BIG: 1764 case ICMP6_TIME_EXCEEDED: 1765 case ICMP6_PARAM_PROB: 1766 /* These will not be used, but set them anyway */ 1767 *icmp_dir = PF_IN; 1768 *virtual_type = type; 1769 *virtual_id = 0; 1770 HTONS(*virtual_type); 1771 return (1); /* These types match to another state */ 1772 /* 1773 * All remaining ICMP6 types get their own states, 1774 * and will only match in one direction. 1775 */ 1776 default: 1777 *icmp_dir = PF_IN; 1778 *virtual_type = type; 1779 *virtual_id = 0; 1780 break; 1781 } 1782 break; 1783 #endif /* INET6 */ 1784 default: 1785 *icmp_dir = PF_IN; 1786 *virtual_type = type; 1787 *virtual_id = 0; 1788 break; 1789 } 1790 HTONS(*virtual_type); 1791 return (0); /* These types match to their own state */ 1792 } 1793 1794 void 1795 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, 1796 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, 1797 u_int16_t *ic, u_int8_t u, sa_family_t af) 1798 { 1799 struct pf_addr oia, ooa; 1800 1801 PF_ACPY(&oia, ia, af); 1802 if (oa) 1803 PF_ACPY(&ooa, oa, af); 1804 1805 /* Change inner protocol port, fix inner protocol checksum. */ 1806 if (ip != NULL) { 1807 u_int16_t oip = *ip; 1808 u_int32_t opc; 1809 1810 if (pc != NULL) 1811 opc = *pc; 1812 *ip = np; 1813 if (pc != NULL) 1814 *pc = pf_cksum_fixup(*pc, oip, *ip, u); 1815 *ic = pf_cksum_fixup(*ic, oip, *ip, 0); 1816 if (pc != NULL) 1817 *ic = pf_cksum_fixup(*ic, opc, *pc, 0); 1818 } 1819 /* Change inner ip address, fix inner ip and icmp checksums. */ 1820 PF_ACPY(ia, na, af); 1821 switch (af) { 1822 #ifdef INET 1823 case AF_INET: { 1824 u_int32_t oh2c = *h2c; 1825 1826 /* XXX just in_cksum() */ 1827 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, 1828 oia.addr16[0], ia->addr16[0], 0), 1829 oia.addr16[1], ia->addr16[1], 0); 1830 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 1831 oia.addr16[0], ia->addr16[0], 0), 1832 oia.addr16[1], ia->addr16[1], 0); 1833 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); 1834 break; 1835 } 1836 #endif /* INET */ 1837 #ifdef INET6 1838 case AF_INET6: 1839 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1840 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1841 pf_cksum_fixup(pf_cksum_fixup(*ic, 1842 oia.addr16[0], ia->addr16[0], u), 1843 oia.addr16[1], ia->addr16[1], u), 1844 oia.addr16[2], ia->addr16[2], u), 1845 oia.addr16[3], ia->addr16[3], u), 1846 oia.addr16[4], ia->addr16[4], u), 1847 oia.addr16[5], ia->addr16[5], u), 1848 oia.addr16[6], ia->addr16[6], u), 1849 oia.addr16[7], ia->addr16[7], u); 1850 break; 1851 #endif /* INET6 */ 1852 } 1853 /* Outer ip address, fix outer icmpv6 checksum, if necessary. */ 1854 if (oa) { 1855 PF_ACPY(oa, na, af); 1856 #ifdef INET6 1857 if (af == AF_INET6) 1858 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1859 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1860 pf_cksum_fixup(pf_cksum_fixup(*ic, 1861 ooa.addr16[0], oa->addr16[0], u), 1862 ooa.addr16[1], oa->addr16[1], u), 1863 ooa.addr16[2], oa->addr16[2], u), 1864 ooa.addr16[3], oa->addr16[3], u), 1865 ooa.addr16[4], oa->addr16[4], u), 1866 ooa.addr16[5], oa->addr16[5], u), 1867 ooa.addr16[6], oa->addr16[6], u), 1868 ooa.addr16[7], oa->addr16[7], u); 1869 #endif /* INET6 */ 1870 } 1871 } 1872 1873 1874 /* 1875 * Need to modulate the sequence numbers in the TCP SACK option 1876 * (credits to Krzysztof Pfaff for report and patch) 1877 */ 1878 int 1879 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, 1880 struct tcphdr *th, struct pf_state_peer *dst) 1881 { 1882 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; 1883 u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; 1884 int copyback = 0, i, olen; 1885 struct sackblk sack; 1886 1887 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) 1888 if (hlen < TCPOLEN_SACKLEN || 1889 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) 1890 return 0; 1891 1892 while (hlen >= TCPOLEN_SACKLEN) { 1893 olen = opt[1]; 1894 switch (*opt) { 1895 case TCPOPT_EOL: /* FALLTHROUGH */ 1896 case TCPOPT_NOP: 1897 opt++; 1898 hlen--; 1899 break; 1900 case TCPOPT_SACK: 1901 if (olen > hlen) 1902 olen = hlen; 1903 if (olen >= TCPOLEN_SACKLEN) { 1904 for (i = 2; i + TCPOLEN_SACK <= olen; 1905 i += TCPOLEN_SACK) { 1906 memcpy(&sack, &opt[i], sizeof(sack)); 1907 pf_change_a(&sack.start, &th->th_sum, 1908 htonl(ntohl(sack.start) - 1909 dst->seqdiff), 0); 1910 pf_change_a(&sack.end, &th->th_sum, 1911 htonl(ntohl(sack.end) - 1912 dst->seqdiff), 0); 1913 memcpy(&opt[i], &sack, sizeof(sack)); 1914 } 1915 copyback = 1; 1916 } 1917 /* FALLTHROUGH */ 1918 default: 1919 if (olen < 2) 1920 olen = 2; 1921 hlen -= olen; 1922 opt += olen; 1923 } 1924 } 1925 1926 if (copyback) 1927 m_copyback(m, off + sizeof(*th), thoptlen, opts, M_NOWAIT); 1928 return (copyback); 1929 } 1930 1931 void 1932 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 1933 const struct pf_addr *saddr, const struct pf_addr *daddr, 1934 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 1935 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 1936 u_int16_t rtag, u_int rdom, struct ether_header *eh, struct ifnet *ifp) 1937 { 1938 struct mbuf *m; 1939 int len, tlen; 1940 #ifdef INET 1941 struct ip *h; 1942 #endif /* INET */ 1943 #ifdef INET6 1944 struct ip6_hdr *h6; 1945 #endif /* INET6 */ 1946 struct tcphdr *th; 1947 char *opt; 1948 1949 /* maximum segment size tcp option */ 1950 tlen = sizeof(struct tcphdr); 1951 if (mss) 1952 tlen += 4; 1953 1954 switch (af) { 1955 #ifdef INET 1956 case AF_INET: 1957 len = sizeof(struct ip) + tlen; 1958 break; 1959 #endif /* INET */ 1960 #ifdef INET6 1961 case AF_INET6: 1962 len = sizeof(struct ip6_hdr) + tlen; 1963 break; 1964 #endif /* INET6 */ 1965 } 1966 1967 /* create outgoing mbuf */ 1968 m = m_gethdr(M_DONTWAIT, MT_HEADER); 1969 if (m == NULL) 1970 return; 1971 if (tag) 1972 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1973 m->m_pkthdr.pf.tag = rtag; 1974 m->m_pkthdr.rdomain = rdom; 1975 if (r && r->prio[0] != PF_PRIO_NOTSET) 1976 m->m_pkthdr.pf.prio = r->prio[0]; 1977 1978 #ifdef ALTQ 1979 if (r != NULL && r->qid) { 1980 m->m_pkthdr.pf.qid = r->qid; 1981 /* add hints for ecn */ 1982 m->m_pkthdr.pf.hdr = mtod(m, struct ip *); 1983 } 1984 #endif /* ALTQ */ 1985 m->m_data += max_linkhdr; 1986 m->m_pkthdr.len = m->m_len = len; 1987 m->m_pkthdr.rcvif = NULL; 1988 bzero(m->m_data, len); 1989 switch (af) { 1990 #ifdef INET 1991 case AF_INET: 1992 h = mtod(m, struct ip *); 1993 1994 /* IP header fields included in the TCP checksum */ 1995 h->ip_p = IPPROTO_TCP; 1996 h->ip_len = htons(tlen); 1997 h->ip_src.s_addr = saddr->v4.s_addr; 1998 h->ip_dst.s_addr = daddr->v4.s_addr; 1999 2000 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2001 break; 2002 #endif /* INET */ 2003 #ifdef INET6 2004 case AF_INET6: 2005 h6 = mtod(m, struct ip6_hdr *); 2006 2007 /* IP header fields included in the TCP checksum */ 2008 h6->ip6_nxt = IPPROTO_TCP; 2009 h6->ip6_plen = htons(tlen); 2010 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2011 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2012 2013 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2014 break; 2015 #endif /* INET6 */ 2016 } 2017 2018 /* TCP header */ 2019 th->th_sport = sport; 2020 th->th_dport = dport; 2021 th->th_seq = htonl(seq); 2022 th->th_ack = htonl(ack); 2023 th->th_off = tlen >> 2; 2024 th->th_flags = flags; 2025 th->th_win = htons(win); 2026 2027 if (mss) { 2028 opt = (char *)(th + 1); 2029 opt[0] = TCPOPT_MAXSEG; 2030 opt[1] = 4; 2031 HTONS(mss); 2032 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); 2033 } 2034 2035 switch (af) { 2036 #ifdef INET 2037 case AF_INET: 2038 /* TCP checksum */ 2039 th->th_sum = in_cksum(m, len); 2040 2041 /* Finish the IP header */ 2042 h->ip_v = 4; 2043 h->ip_hl = sizeof(*h) >> 2; 2044 h->ip_tos = IPTOS_LOWDELAY; 2045 h->ip_len = htons(len); 2046 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2047 h->ip_ttl = ttl ? ttl : ip_defttl; 2048 h->ip_sum = 0; 2049 if (eh == NULL) { 2050 ip_output(m, (void *)NULL, (void *)NULL, 0, 2051 (void *)NULL, (void *)NULL); 2052 } else { 2053 struct route ro; 2054 struct rtentry rt; 2055 struct ether_header *e = (void *)ro.ro_dst.sa_data; 2056 2057 if (ifp == NULL) { 2058 m_freem(m); 2059 return; 2060 } 2061 rt.rt_ifp = ifp; 2062 ro.ro_rt = &rt; 2063 ro.ro_dst.sa_len = sizeof(ro.ro_dst); 2064 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; 2065 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); 2066 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); 2067 e->ether_type = eh->ether_type; 2068 ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER, 2069 (void *)NULL, (void *)NULL); 2070 } 2071 break; 2072 #endif /* INET */ 2073 #ifdef INET6 2074 case AF_INET6: 2075 /* TCP checksum */ 2076 th->th_sum = in6_cksum(m, IPPROTO_TCP, 2077 sizeof(struct ip6_hdr), tlen); 2078 2079 h6->ip6_vfc |= IPV6_VERSION; 2080 h6->ip6_hlim = IPV6_DEFHLIM; 2081 2082 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 2083 break; 2084 #endif /* INET6 */ 2085 } 2086 } 2087 2088 void 2089 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, 2090 struct pf_rule *r, u_int rdomain) 2091 { 2092 struct mbuf *m0; 2093 2094 if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) 2095 return; 2096 2097 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2098 m0->m_pkthdr.rdomain = rdomain; 2099 if (r && r->prio[0] != PF_PRIO_NOTSET) 2100 m0->m_pkthdr.pf.prio = r->prio[0]; 2101 2102 #ifdef ALTQ 2103 if (r->qid) { 2104 m0->m_pkthdr.pf.qid = r->qid; 2105 /* add hints for ecn */ 2106 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); 2107 } 2108 #endif /* ALTQ */ 2109 2110 switch (af) { 2111 #ifdef INET 2112 case AF_INET: 2113 icmp_error(m0, type, code, 0, 0); 2114 break; 2115 #endif /* INET */ 2116 #ifdef INET6 2117 case AF_INET6: 2118 icmp6_error(m0, type, code, 0); 2119 break; 2120 #endif /* INET6 */ 2121 } 2122 } 2123 2124 /* 2125 * Return 1 if the addresses a and b match (with mask m), otherwise return 0. 2126 * If n is 0, they match if they are equal. If n is != 0, they match if they 2127 * are different. 2128 */ 2129 int 2130 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2131 struct pf_addr *b, sa_family_t af) 2132 { 2133 int match = 0; 2134 2135 switch (af) { 2136 #ifdef INET 2137 case AF_INET: 2138 if ((a->addr32[0] & m->addr32[0]) == 2139 (b->addr32[0] & m->addr32[0])) 2140 match++; 2141 break; 2142 #endif /* INET */ 2143 #ifdef INET6 2144 case AF_INET6: 2145 if (((a->addr32[0] & m->addr32[0]) == 2146 (b->addr32[0] & m->addr32[0])) && 2147 ((a->addr32[1] & m->addr32[1]) == 2148 (b->addr32[1] & m->addr32[1])) && 2149 ((a->addr32[2] & m->addr32[2]) == 2150 (b->addr32[2] & m->addr32[2])) && 2151 ((a->addr32[3] & m->addr32[3]) == 2152 (b->addr32[3] & m->addr32[3]))) 2153 match++; 2154 break; 2155 #endif /* INET6 */ 2156 } 2157 if (match) { 2158 if (n) 2159 return (0); 2160 else 2161 return (1); 2162 } else { 2163 if (n) 2164 return (1); 2165 else 2166 return (0); 2167 } 2168 } 2169 2170 /* 2171 * Return 1 if b <= a <= e, otherwise return 0. 2172 */ 2173 int 2174 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2175 struct pf_addr *a, sa_family_t af) 2176 { 2177 switch (af) { 2178 #ifdef INET 2179 case AF_INET: 2180 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 2181 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 2182 return (0); 2183 break; 2184 #endif /* INET */ 2185 #ifdef INET6 2186 case AF_INET6: { 2187 int i; 2188 2189 /* check a >= b */ 2190 for (i = 0; i < 4; ++i) 2191 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 2192 break; 2193 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 2194 return (0); 2195 /* check a <= e */ 2196 for (i = 0; i < 4; ++i) 2197 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 2198 break; 2199 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 2200 return (0); 2201 break; 2202 } 2203 #endif /* INET6 */ 2204 } 2205 return (1); 2206 } 2207 2208 int 2209 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 2210 { 2211 switch (op) { 2212 case PF_OP_IRG: 2213 return ((p > a1) && (p < a2)); 2214 case PF_OP_XRG: 2215 return ((p < a1) || (p > a2)); 2216 case PF_OP_RRG: 2217 return ((p >= a1) && (p <= a2)); 2218 case PF_OP_EQ: 2219 return (p == a1); 2220 case PF_OP_NE: 2221 return (p != a1); 2222 case PF_OP_LT: 2223 return (p < a1); 2224 case PF_OP_LE: 2225 return (p <= a1); 2226 case PF_OP_GT: 2227 return (p > a1); 2228 case PF_OP_GE: 2229 return (p >= a1); 2230 } 2231 return (0); /* never reached */ 2232 } 2233 2234 int 2235 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 2236 { 2237 NTOHS(a1); 2238 NTOHS(a2); 2239 NTOHS(p); 2240 return (pf_match(op, a1, a2, p)); 2241 } 2242 2243 int 2244 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 2245 { 2246 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2247 return (0); 2248 return (pf_match(op, a1, a2, u)); 2249 } 2250 2251 int 2252 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 2253 { 2254 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2255 return (0); 2256 return (pf_match(op, a1, a2, g)); 2257 } 2258 2259 int 2260 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 2261 { 2262 if (*tag == -1) 2263 *tag = m->m_pkthdr.pf.tag; 2264 2265 return ((!r->match_tag_not && r->match_tag == *tag) || 2266 (r->match_tag_not && r->match_tag != *tag)); 2267 } 2268 2269 int 2270 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 2271 { 2272 struct ifnet *ifp = m->m_pkthdr.rcvif; 2273 struct pfi_kif *kif; 2274 2275 if (ifp == NULL) 2276 return (0); 2277 2278 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 2279 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 2280 else 2281 kif = (struct pfi_kif *)ifp->if_pf_kif; 2282 2283 if (kif == NULL) { 2284 DPFPRINTF(LOG_ERR, 2285 "pf_test_via: kif == NULL, @%d via %s", 2286 r->nr, r->rcv_ifname); 2287 return (0); 2288 } 2289 2290 return (pfi_kif_match(r->rcv_kif, kif)); 2291 } 2292 2293 void 2294 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 2295 { 2296 if (tag > 0) 2297 m->m_pkthdr.pf.tag = tag; 2298 if (rtableid >= 0) 2299 m->m_pkthdr.rdomain = rtableid; 2300 } 2301 2302 void 2303 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, 2304 struct pf_rule **r, struct pf_rule **a, int *match) 2305 { 2306 struct pf_anchor_stackframe *f; 2307 2308 (*r)->anchor->match = 0; 2309 if (match) 2310 *match = 0; 2311 if (*depth >= sizeof(pf_anchor_stack) / 2312 sizeof(pf_anchor_stack[0])) { 2313 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 2314 *r = TAILQ_NEXT(*r, entries); 2315 return; 2316 } else if (*depth == 0 && a != NULL) 2317 *a = *r; 2318 f = pf_anchor_stack + (*depth)++; 2319 f->rs = *rs; 2320 f->r = *r; 2321 if ((*r)->anchor_wildcard) { 2322 f->parent = &(*r)->anchor->children; 2323 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == 2324 NULL) { 2325 *r = NULL; 2326 return; 2327 } 2328 *rs = &f->child->ruleset; 2329 } else { 2330 f->parent = NULL; 2331 f->child = NULL; 2332 *rs = &(*r)->anchor->ruleset; 2333 } 2334 *r = TAILQ_FIRST((*rs)->rules.active.ptr); 2335 } 2336 2337 int 2338 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, 2339 struct pf_rule **r, struct pf_rule **a, int *match) 2340 { 2341 struct pf_anchor_stackframe *f; 2342 int quick = 0; 2343 2344 do { 2345 if (*depth <= 0) 2346 break; 2347 f = pf_anchor_stack + *depth - 1; 2348 if (f->parent != NULL && f->child != NULL) { 2349 if (f->child->match || 2350 (match != NULL && *match)) { 2351 f->r->anchor->match = 1; 2352 *match = 0; 2353 } 2354 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); 2355 if (f->child != NULL) { 2356 *rs = &f->child->ruleset; 2357 *r = TAILQ_FIRST((*rs)->rules.active.ptr); 2358 if (*r == NULL) 2359 continue; 2360 else 2361 break; 2362 } 2363 } 2364 (*depth)--; 2365 if (*depth == 0 && a != NULL) 2366 *a = NULL; 2367 *rs = f->rs; 2368 if (f->r->anchor->match || (match != NULL && *match)) 2369 quick = f->r->quick; 2370 *r = TAILQ_NEXT(f->r, entries); 2371 } while (*r == NULL); 2372 2373 return (quick); 2374 } 2375 2376 #ifdef INET6 2377 void 2378 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 2379 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 2380 { 2381 switch (af) { 2382 #ifdef INET 2383 case AF_INET: 2384 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2385 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2386 break; 2387 #endif /* INET */ 2388 case AF_INET6: 2389 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2390 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2391 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 2392 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 2393 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 2394 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 2395 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 2396 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 2397 break; 2398 } 2399 } 2400 2401 void 2402 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 2403 { 2404 switch (af) { 2405 #ifdef INET 2406 case AF_INET: 2407 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 2408 break; 2409 #endif /* INET */ 2410 case AF_INET6: 2411 if (addr->addr32[3] == 0xffffffff) { 2412 addr->addr32[3] = 0; 2413 if (addr->addr32[2] == 0xffffffff) { 2414 addr->addr32[2] = 0; 2415 if (addr->addr32[1] == 0xffffffff) { 2416 addr->addr32[1] = 0; 2417 addr->addr32[0] = 2418 htonl(ntohl(addr->addr32[0]) + 1); 2419 } else 2420 addr->addr32[1] = 2421 htonl(ntohl(addr->addr32[1]) + 1); 2422 } else 2423 addr->addr32[2] = 2424 htonl(ntohl(addr->addr32[2]) + 1); 2425 } else 2426 addr->addr32[3] = 2427 htonl(ntohl(addr->addr32[3]) + 1); 2428 break; 2429 } 2430 } 2431 #endif /* INET6 */ 2432 2433 int 2434 pf_socket_lookup(int direction, struct pf_pdesc *pd) 2435 { 2436 struct pf_addr *saddr, *daddr; 2437 u_int16_t sport, dport; 2438 struct inpcbtable *tb; 2439 struct inpcb *inp; 2440 2441 if (pd == NULL) 2442 return (-1); 2443 pd->lookup.uid = UID_MAX; 2444 pd->lookup.gid = GID_MAX; 2445 pd->lookup.pid = NO_PID; 2446 switch (pd->proto) { 2447 case IPPROTO_TCP: 2448 if (pd->hdr.tcp == NULL) 2449 return (-1); 2450 sport = pd->hdr.tcp->th_sport; 2451 dport = pd->hdr.tcp->th_dport; 2452 tb = &tcbtable; 2453 break; 2454 case IPPROTO_UDP: 2455 if (pd->hdr.udp == NULL) 2456 return (-1); 2457 sport = pd->hdr.udp->uh_sport; 2458 dport = pd->hdr.udp->uh_dport; 2459 tb = &udbtable; 2460 break; 2461 default: 2462 return (-1); 2463 } 2464 if (direction == PF_IN) { 2465 saddr = pd->src; 2466 daddr = pd->dst; 2467 } else { 2468 u_int16_t p; 2469 2470 p = sport; 2471 sport = dport; 2472 dport = p; 2473 saddr = pd->dst; 2474 daddr = pd->src; 2475 } 2476 switch (pd->af) { 2477 #ifdef INET 2478 case AF_INET: 2479 /* 2480 * Fails when rtable is changed while evaluating the ruleset 2481 * The socket looked up will not match the one hit in the end. 2482 */ 2483 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 2484 pd->rdomain); 2485 if (inp == NULL) { 2486 inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, 2487 NULL, pd->rdomain); 2488 if (inp == NULL) 2489 return (-1); 2490 } 2491 break; 2492 #endif /* INET */ 2493 #ifdef INET6 2494 case AF_INET6: 2495 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 2496 dport); 2497 if (inp == NULL) { 2498 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, 2499 NULL); 2500 if (inp == NULL) 2501 return (-1); 2502 } 2503 break; 2504 #endif /* INET6 */ 2505 2506 default: 2507 return (-1); 2508 } 2509 pd->lookup.uid = inp->inp_socket->so_euid; 2510 pd->lookup.gid = inp->inp_socket->so_egid; 2511 pd->lookup.pid = inp->inp_socket->so_cpid; 2512 return (1); 2513 } 2514 2515 u_int8_t 2516 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 2517 { 2518 int hlen; 2519 u_int8_t hdr[60]; 2520 u_int8_t *opt, optlen; 2521 u_int8_t wscale = 0; 2522 2523 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 2524 if (hlen <= sizeof(struct tcphdr)) 2525 return (0); 2526 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 2527 return (0); 2528 opt = hdr + sizeof(struct tcphdr); 2529 hlen -= sizeof(struct tcphdr); 2530 while (hlen >= 3) { 2531 switch (*opt) { 2532 case TCPOPT_EOL: 2533 case TCPOPT_NOP: 2534 ++opt; 2535 --hlen; 2536 break; 2537 case TCPOPT_WINDOW: 2538 wscale = opt[2]; 2539 if (wscale > TCP_MAX_WINSHIFT) 2540 wscale = TCP_MAX_WINSHIFT; 2541 wscale |= PF_WSCALE_FLAG; 2542 /* FALLTHROUGH */ 2543 default: 2544 optlen = opt[1]; 2545 if (optlen < 2) 2546 optlen = 2; 2547 hlen -= optlen; 2548 opt += optlen; 2549 break; 2550 } 2551 } 2552 return (wscale); 2553 } 2554 2555 u_int16_t 2556 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 2557 { 2558 int hlen; 2559 u_int8_t hdr[60]; 2560 u_int8_t *opt, optlen; 2561 u_int16_t mss = tcp_mssdflt; 2562 2563 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 2564 if (hlen <= sizeof(struct tcphdr)) 2565 return (0); 2566 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 2567 return (0); 2568 opt = hdr + sizeof(struct tcphdr); 2569 hlen -= sizeof(struct tcphdr); 2570 while (hlen >= TCPOLEN_MAXSEG) { 2571 switch (*opt) { 2572 case TCPOPT_EOL: 2573 case TCPOPT_NOP: 2574 ++opt; 2575 --hlen; 2576 break; 2577 case TCPOPT_MAXSEG: 2578 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); 2579 NTOHS(mss); 2580 /* FALLTHROUGH */ 2581 default: 2582 optlen = opt[1]; 2583 if (optlen < 2) 2584 optlen = 2; 2585 hlen -= optlen; 2586 opt += optlen; 2587 break; 2588 } 2589 } 2590 return (mss); 2591 } 2592 2593 u_int16_t 2594 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 2595 { 2596 #ifdef INET 2597 struct sockaddr_in *dst; 2598 struct route ro; 2599 #endif /* INET */ 2600 #ifdef INET6 2601 struct sockaddr_in6 *dst6; 2602 struct route_in6 ro6; 2603 #endif /* INET6 */ 2604 struct rtentry *rt = NULL; 2605 int hlen; 2606 u_int16_t mss = tcp_mssdflt; 2607 2608 switch (af) { 2609 #ifdef INET 2610 case AF_INET: 2611 hlen = sizeof(struct ip); 2612 bzero(&ro, sizeof(ro)); 2613 dst = (struct sockaddr_in *)&ro.ro_dst; 2614 dst->sin_family = AF_INET; 2615 dst->sin_len = sizeof(*dst); 2616 dst->sin_addr = addr->v4; 2617 ro.ro_tableid = rtableid; 2618 rtalloc_noclone(&ro); 2619 rt = ro.ro_rt; 2620 break; 2621 #endif /* INET */ 2622 #ifdef INET6 2623 case AF_INET6: 2624 hlen = sizeof(struct ip6_hdr); 2625 bzero(&ro6, sizeof(ro6)); 2626 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; 2627 dst6->sin6_family = AF_INET6; 2628 dst6->sin6_len = sizeof(*dst6); 2629 dst6->sin6_addr = addr->v6; 2630 ro6.ro_tableid = rtableid; 2631 rtalloc_noclone((struct route *)&ro6); 2632 rt = ro6.ro_rt; 2633 break; 2634 #endif /* INET6 */ 2635 } 2636 2637 if (rt && rt->rt_ifp) { 2638 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); 2639 mss = max(tcp_mssdflt, mss); 2640 RTFREE(rt); 2641 } 2642 mss = min(mss, offer); 2643 mss = max(mss, 64); /* sanity - at least max opt space */ 2644 return (mss); 2645 } 2646 2647 void 2648 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) 2649 { 2650 struct pf_rule *r = s->rule.ptr; 2651 struct pf_src_node *sn = NULL; 2652 2653 s->rt_kif = NULL; 2654 if (!r->rt) 2655 return; 2656 switch (s->key[PF_SK_WIRE]->af) { 2657 #ifdef INET 2658 case AF_INET: 2659 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn, 2660 &r->route, PF_SN_ROUTE); 2661 s->rt_kif = r->route.kif; 2662 s->natrule.ptr = r; 2663 break; 2664 #endif /* INET */ 2665 #ifdef INET6 2666 case AF_INET6: 2667 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn, 2668 &r->route, PF_SN_ROUTE); 2669 s->rt_kif = r->route.kif; 2670 s->natrule.ptr = r; 2671 break; 2672 #endif /* INET6 */ 2673 } 2674 } 2675 2676 u_int32_t 2677 pf_tcp_iss(struct pf_pdesc *pd) 2678 { 2679 MD5_CTX ctx; 2680 u_int32_t digest[4]; 2681 2682 if (pf_tcp_secret_init == 0) { 2683 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 2684 MD5Init(&pf_tcp_secret_ctx); 2685 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, 2686 sizeof(pf_tcp_secret)); 2687 pf_tcp_secret_init = 1; 2688 } 2689 ctx = pf_tcp_secret_ctx; 2690 2691 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); 2692 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); 2693 if (pd->af == AF_INET6) { 2694 MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); 2695 MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); 2696 } else { 2697 MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); 2698 MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); 2699 } 2700 MD5Final((u_char *)digest, &ctx); 2701 pf_tcp_iss_off += 4096; 2702 return (digest[0] + tcp_iss + pf_tcp_iss_off); 2703 } 2704 2705 void 2706 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 2707 { 2708 if (r->qid) 2709 a->qid = r->qid; 2710 if (r->pqid) 2711 a->pqid = r->pqid; 2712 if (r->rtableid >= 0) 2713 a->rtableid = r->rtableid; 2714 a->log |= r->log; 2715 if (r->scrub_flags & PFSTATE_SETTOS) 2716 a->set_tos = r->set_tos; 2717 if (r->min_ttl) 2718 a->min_ttl = r->min_ttl; 2719 if (r->max_mss) 2720 a->max_mss = r->max_mss; 2721 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 2722 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP)); 2723 if (r->prio[0] != PF_PRIO_NOTSET) 2724 a->prio[0] = r->prio[0]; 2725 if (r->prio[1] != PF_PRIO_NOTSET) 2726 a->prio[1] = r->prio[1]; 2727 } 2728 2729 #define PF_TEST_ATTRIB(t, a) \ 2730 do { \ 2731 if (t) { \ 2732 r = a; \ 2733 goto nextrule; \ 2734 } \ 2735 } while (0) 2736 2737 int 2738 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, 2739 struct pfi_kif *kif, struct mbuf *m, int off, 2740 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, 2741 int hdrlen) 2742 { 2743 struct pf_rule *lastr = NULL; 2744 sa_family_t af = pd->af; 2745 struct pf_rule *r; 2746 struct pf_rule *nr = NULL; 2747 struct pf_rule *a = NULL; 2748 struct pf_ruleset *ruleset = NULL; 2749 struct pf_rule_slist rules; 2750 struct pf_rule_item *ri; 2751 struct pf_src_node *sns[PF_SN_MAX]; 2752 struct tcphdr *th = pd->hdr.tcp; 2753 struct pf_state_key *skw = NULL, *sks = NULL; 2754 struct pf_rule_actions act; 2755 struct ifqueue *ifq = &ipintrq; 2756 u_short reason; 2757 int rewrite = 0; 2758 int tag = -1; 2759 int asd = 0; 2760 int match = 0; 2761 int state_icmp = 0, icmp_dir, multi; 2762 u_int16_t virtual_type, virtual_id; 2763 u_int8_t icmptype = 0, icmpcode = 0; 2764 2765 PF_ACPY(&pd->nsaddr, pd->src, pd->af); 2766 PF_ACPY(&pd->ndaddr, pd->dst, pd->af); 2767 2768 bzero(&act, sizeof(act)); 2769 act.prio[0] = act.prio[1] = PF_PRIO_NOTSET; 2770 bzero(sns, sizeof(sns)); 2771 act.rtableid = pd->rdomain; 2772 SLIST_INIT(&rules); 2773 2774 #ifdef INET6 2775 if (af == AF_INET6) 2776 ifq = &ip6intrq; 2777 #endif 2778 2779 if (direction == PF_IN && pf_check_congestion(ifq)) { 2780 REASON_SET(&reason, PFRES_CONGEST); 2781 return (PF_DROP); 2782 } 2783 2784 switch (pd->virtual_proto) { 2785 case IPPROTO_TCP: 2786 pd->nsport = th->th_sport; 2787 pd->ndport = th->th_dport; 2788 break; 2789 case IPPROTO_UDP: 2790 pd->nsport = pd->hdr.udp->uh_sport; 2791 pd->ndport = pd->hdr.udp->uh_dport; 2792 break; 2793 #ifdef INET 2794 case IPPROTO_ICMP: 2795 icmptype = pd->hdr.icmp->icmp_type; 2796 icmpcode = pd->hdr.icmp->icmp_code; 2797 state_icmp = pf_icmp_mapping(pd, icmptype, 2798 &icmp_dir, &multi, &virtual_id, &virtual_type); 2799 if (icmp_dir == PF_IN) { 2800 pd->nsport = virtual_id; 2801 pd->ndport = virtual_type; 2802 } else { 2803 pd->nsport = virtual_type; 2804 pd->ndport = virtual_id; 2805 } 2806 break; 2807 #endif /* INET */ 2808 #ifdef INET6 2809 case IPPROTO_ICMPV6: 2810 icmptype = pd->hdr.icmp6->icmp6_type; 2811 icmpcode = pd->hdr.icmp6->icmp6_code; 2812 state_icmp = pf_icmp_mapping(pd, icmptype, 2813 &icmp_dir, &multi, &virtual_id, &virtual_type); 2814 if (icmp_dir == PF_IN) { 2815 pd->nsport = virtual_id; 2816 pd->ndport = virtual_type; 2817 } else { 2818 pd->nsport = virtual_type; 2819 pd->ndport = virtual_id; 2820 } 2821 break; 2822 #endif /* INET6 */ 2823 default: 2824 pd->nsport = pd->ndport = 0; 2825 break; 2826 } 2827 2828 pd->osport = pd->nsport; 2829 pd->odport = pd->ndport; 2830 2831 r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr); 2832 while (r != NULL) { 2833 r->evaluations++; 2834 PF_TEST_ATTRIB((pfi_kif_match(r->kif, kif) == r->ifnot), 2835 r->skip[PF_SKIP_IFP].ptr); 2836 PF_TEST_ATTRIB((r->direction && r->direction != direction), 2837 r->skip[PF_SKIP_DIR].ptr); 2838 PF_TEST_ATTRIB((r->onrdomain >= 0 && 2839 (r->onrdomain == pd->rdomain) == r->ifnot), 2840 r->skip[PF_SKIP_RDOM].ptr); 2841 PF_TEST_ATTRIB((r->af && r->af != af), 2842 r->skip[PF_SKIP_AF].ptr); 2843 PF_TEST_ATTRIB((r->proto && r->proto != pd->proto), 2844 r->skip[PF_SKIP_PROTO].ptr); 2845 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr, af, 2846 r->src.neg, kif, act.rtableid)), 2847 r->skip[PF_SKIP_SRC_ADDR].ptr); 2848 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, af, 2849 r->dst.neg, NULL, act.rtableid)), 2850 r->skip[PF_SKIP_DST_ADDR].ptr); 2851 2852 switch (pd->virtual_proto) { 2853 case PF_VPROTO_FRAGMENT: 2854 /* tcp/udp only. port_op always 0 in other cases */ 2855 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 2856 TAILQ_NEXT(r, entries)); 2857 PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset), 2858 TAILQ_NEXT(r, entries)); 2859 /* icmp only. type/code always 0 in other cases */ 2860 PF_TEST_ATTRIB((r->type || r->code), 2861 TAILQ_NEXT(r, entries)); 2862 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 2863 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 2864 TAILQ_NEXT(r, entries)); 2865 break; 2866 2867 case IPPROTO_TCP: 2868 PF_TEST_ATTRIB(((r->flagset & th->th_flags) != r->flags), 2869 TAILQ_NEXT(r, entries)); 2870 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 2871 !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th), 2872 r->os_fingerprint)), 2873 TAILQ_NEXT(r, entries)); 2874 /* FALLTHROUGH */ 2875 2876 case IPPROTO_UDP: 2877 /* tcp/udp only. port_op always 0 in other cases */ 2878 PF_TEST_ATTRIB((r->src.port_op && 2879 !pf_match_port(r->src.port_op, r->src.port[0], 2880 r->src.port[1], pd->nsport)), 2881 r->skip[PF_SKIP_SRC_PORT].ptr); 2882 PF_TEST_ATTRIB((r->dst.port_op && 2883 !pf_match_port(r->dst.port_op, r->dst.port[0], 2884 r->dst.port[1], pd->ndport)), 2885 r->skip[PF_SKIP_DST_PORT].ptr); 2886 /* tcp/udp only. uid.op always 0 in other cases */ 2887 PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done || 2888 (pd->lookup.done = 2889 pf_socket_lookup(direction, pd), 1)) && 2890 !pf_match_uid(r->uid.op, r->uid.uid[0], 2891 r->uid.uid[1], pd->lookup.uid)), 2892 TAILQ_NEXT(r, entries)); 2893 /* tcp/udp only. gid.op always 0 in other cases */ 2894 PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done || 2895 (pd->lookup.done = 2896 pf_socket_lookup(direction, pd), 1)) && 2897 !pf_match_gid(r->gid.op, r->gid.gid[0], 2898 r->gid.gid[1], pd->lookup.gid)), 2899 TAILQ_NEXT(r, entries)); 2900 break; 2901 2902 case IPPROTO_ICMP: 2903 case IPPROTO_ICMPV6: 2904 /* icmp only. type always 0 in other cases */ 2905 PF_TEST_ATTRIB((r->type && r->type != icmptype + 1), 2906 TAILQ_NEXT(r, entries)); 2907 /* icmp only. type always 0 in other cases */ 2908 PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1), 2909 TAILQ_NEXT(r, entries)); 2910 break; 2911 2912 default: 2913 break; 2914 } 2915 2916 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 2917 pd->virtual_proto != PF_VPROTO_FRAGMENT), 2918 TAILQ_NEXT(r, entries)); 2919 PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)), 2920 TAILQ_NEXT(r, entries)); 2921 PF_TEST_ATTRIB((r->prob && 2922 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 2923 TAILQ_NEXT(r, entries)); 2924 PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(m, r, &tag)), 2925 TAILQ_NEXT(r, entries)); 2926 PF_TEST_ATTRIB((r->rcv_kif && !pf_match_rcvif(m, r)), 2927 TAILQ_NEXT(r, entries)); 2928 2929 /* FALLTHROUGH */ 2930 if (r->tag) 2931 tag = r->tag; 2932 if (r->anchor == NULL) { 2933 lastr = r; 2934 if (r->action == PF_MATCH) { 2935 if ((ri = pool_get(&pf_rule_item_pl, 2936 PR_NOWAIT)) == NULL) { 2937 REASON_SET(&reason, PFRES_MEMORY); 2938 goto cleanup; 2939 } 2940 ri->r = r; 2941 /* order is irrelevant */ 2942 SLIST_INSERT_HEAD(&rules, ri, entry); 2943 pf_rule_to_actions(r, &act); 2944 if (pf_get_transaddr(r, pd, sns, &nr) == -1) { 2945 REASON_SET(&reason, PFRES_MEMORY); 2946 goto cleanup; 2947 } 2948 if (r->log || act.log & PF_LOG_MATCHES) 2949 PFLOG_PACKET(kif, m, direction, 2950 reason, r, a, ruleset, pd); 2951 } else { 2952 match = 1; 2953 *rm = r; 2954 *am = a; 2955 *rsm = ruleset; 2956 if (act.log & PF_LOG_MATCHES) 2957 PFLOG_PACKET(kif, m, direction, 2958 reason, r, a, ruleset, pd); 2959 } 2960 2961 if ((*rm)->quick) 2962 break; 2963 r = TAILQ_NEXT(r, entries); 2964 } else 2965 pf_step_into_anchor(&asd, &ruleset, 2966 &r, &a, &match); 2967 2968 nextrule: 2969 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 2970 &r, &a, &match)) 2971 break; 2972 } 2973 r = *rm; 2974 a = *am; 2975 ruleset = *rsm; 2976 2977 /* apply actions for last matching pass/block rule */ 2978 pf_rule_to_actions(r, &act); 2979 if (pf_get_transaddr(r, pd, sns, &nr) == -1) { 2980 REASON_SET(&reason, PFRES_MEMORY); 2981 goto cleanup; 2982 } 2983 REASON_SET(&reason, PFRES_MATCH); 2984 2985 if (r->log || act.log & PF_LOG_MATCHES) 2986 PFLOG_PACKET(kif, m, direction, reason, r, a, ruleset, pd); 2987 2988 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 2989 (r->action == PF_DROP) && 2990 ((r->rule_flag & PFRULE_RETURNRST) || 2991 (r->rule_flag & PFRULE_RETURNICMP) || 2992 (r->rule_flag & PFRULE_RETURN))) { 2993 if (pd->proto == IPPROTO_TCP && 2994 ((r->rule_flag & PFRULE_RETURNRST) || 2995 (r->rule_flag & PFRULE_RETURN)) && 2996 !(th->th_flags & TH_RST)) { 2997 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 2998 int len = 0; 2999 struct ip *h4; 3000 struct ip6_hdr *h6; 3001 3002 switch (af) { 3003 case AF_INET: 3004 h4 = mtod(m, struct ip *); 3005 len = ntohs(h4->ip_len) - off; 3006 break; 3007 case AF_INET6: 3008 h6 = mtod(m, struct ip6_hdr *); 3009 len = ntohs(h6->ip6_plen) - (off - sizeof(*h6)); 3010 break; 3011 } 3012 3013 if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) 3014 REASON_SET(&reason, PFRES_PROTCKSUM); 3015 else { 3016 if (th->th_flags & TH_SYN) 3017 ack++; 3018 if (th->th_flags & TH_FIN) 3019 ack++; 3020 pf_send_tcp(r, af, pd->dst, 3021 pd->src, th->th_dport, th->th_sport, 3022 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 3023 r->return_ttl, 1, 0, pd->rdomain, 3024 pd->eh, kif->pfik_ifp); 3025 } 3026 } else if ((pd->proto != IPPROTO_ICMP || 3027 ICMP_INFOTYPE(icmptype)) && af == AF_INET && 3028 r->return_icmp) 3029 pf_send_icmp(m, r->return_icmp >> 8, 3030 r->return_icmp & 255, af, r, pd->rdomain); 3031 else if ((pd->proto != IPPROTO_ICMPV6 || 3032 (icmptype >= ICMP6_ECHO_REQUEST && 3033 icmptype != ND_REDIRECT)) && af == AF_INET6 && 3034 r->return_icmp6) 3035 pf_send_icmp(m, r->return_icmp6 >> 8, 3036 r->return_icmp6 & 255, af, r, pd->rdomain); 3037 } 3038 3039 if (r->action == PF_DROP) 3040 goto cleanup; 3041 3042 pf_tag_packet(m, tag, act.rtableid); 3043 if (act.rtableid >= 0 && 3044 rtable_l2(act.rtableid) != pd->rdomain) 3045 pd->destchg = 1; 3046 3047 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3048 REASON_SET(&reason, PFRES_IPOPTIONS); 3049 pd->pflog |= PF_LOG_FORCE; 3050 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3051 "ip/ipv6 options in pf_test_rule()"); 3052 goto cleanup; 3053 } 3054 3055 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3056 && !state_icmp && r->keep_state) { 3057 int action; 3058 3059 if (r->rule_flag & PFRULE_SRCTRACK && 3060 pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af, 3061 pd->src, NULL, 0) != 0) { 3062 REASON_SET(&reason, PFRES_SRCLIMIT); 3063 goto cleanup; 3064 } 3065 3066 action = pf_create_state(r, a, nr, pd, &skw, &sks, m, off, 3067 &rewrite, kif, sm, tag, &rules, &act, sns); 3068 3069 if (action != PF_PASS) 3070 return (action); 3071 if (sks != skw) { 3072 struct pf_state_key *sk; 3073 3074 if (pd->dir == PF_IN) 3075 sk = sks; 3076 else 3077 sk = skw; 3078 rewrite += pf_translate(pd, 3079 &sk->addr[pd->sidx], sk->port[pd->sidx], 3080 &sk->addr[pd->didx], sk->port[pd->didx], 3081 virtual_type, icmp_dir); 3082 } 3083 } else { 3084 while ((ri = SLIST_FIRST(&rules))) { 3085 SLIST_REMOVE_HEAD(&rules, entry); 3086 pool_put(&pf_rule_item_pl, ri); 3087 } 3088 } 3089 3090 /* copy back packet headers if we performed NAT operations */ 3091 if (rewrite && hdrlen) 3092 m_copyback(m, off, hdrlen, pd->hdr.any, M_NOWAIT); 3093 3094 #if NPFSYNC > 0 3095 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 3096 direction == PF_OUT && pfsync_up()) { 3097 /* 3098 * We want the state created, but we dont 3099 * want to send this in case a partner 3100 * firewall has to know about it to allow 3101 * replies through it. 3102 */ 3103 if (pfsync_defer(*sm, m)) 3104 return (PF_DEFER); 3105 } 3106 #endif 3107 3108 if (r->rule_flag & PFRULE_ONCE) 3109 pf_purge_rule(ruleset, r); 3110 3111 return (PF_PASS); 3112 3113 cleanup: 3114 while ((ri = SLIST_FIRST(&rules))) { 3115 SLIST_REMOVE_HEAD(&rules, entry); 3116 pool_put(&pf_rule_item_pl, ri); 3117 } 3118 3119 return (PF_DROP); 3120 } 3121 3122 static __inline int 3123 pf_create_state(struct pf_rule *r, struct pf_rule *a, struct pf_rule *nr, 3124 struct pf_pdesc *pd, struct pf_state_key **skw, struct pf_state_key **sks, 3125 struct mbuf *m, int off, int *rewrite, struct pfi_kif *kif, 3126 struct pf_state **sm, int tag, struct pf_rule_slist *rules, 3127 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 3128 { 3129 struct pf_state *s = NULL; 3130 struct tcphdr *th = pd->hdr.tcp; 3131 u_int16_t mss = tcp_mssdflt; 3132 u_short reason; 3133 u_int i; 3134 3135 /* check maximums */ 3136 if (r->max_states && (r->states_cur >= r->max_states)) { 3137 pf_status.lcounters[LCNT_STATES]++; 3138 REASON_SET(&reason, PFRES_MAXSTATES); 3139 return (PF_DROP); 3140 } 3141 3142 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 3143 if (s == NULL) { 3144 REASON_SET(&reason, PFRES_MEMORY); 3145 goto csfailed; 3146 } 3147 s->rule.ptr = r; 3148 s->anchor.ptr = a; 3149 s->natrule.ptr = nr; 3150 bcopy(rules, &s->match_rules, sizeof(s->match_rules)); 3151 STATE_INC_COUNTERS(s); 3152 if (r->allow_opts) 3153 s->state_flags |= PFSTATE_ALLOWOPTS; 3154 if (r->rule_flag & PFRULE_STATESLOPPY) 3155 s->state_flags |= PFSTATE_SLOPPY; 3156 if (r->rule_flag & PFRULE_PFLOW) 3157 s->state_flags |= PFSTATE_PFLOW; 3158 s->log = act->log & PF_LOG_ALL; 3159 s->qid = act->qid; 3160 s->pqid = act->pqid; 3161 s->rtableid[pd->didx] = act->rtableid; 3162 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 3163 s->min_ttl = act->min_ttl; 3164 s->set_tos = act->set_tos; 3165 s->max_mss = act->max_mss; 3166 s->state_flags |= act->flags; 3167 s->sync_state = PFSYNC_S_NONE; 3168 s->prio[0] = act->prio[0]; 3169 s->prio[1] = act->prio[1]; 3170 switch (pd->proto) { 3171 case IPPROTO_TCP: 3172 s->src.seqlo = ntohl(th->th_seq); 3173 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 3174 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 3175 r->keep_state == PF_STATE_MODULATE) { 3176 /* Generate sequence number modulator */ 3177 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 3178 0) 3179 s->src.seqdiff = 1; 3180 pf_change_a(&th->th_seq, &th->th_sum, 3181 htonl(s->src.seqlo + s->src.seqdiff), 0); 3182 *rewrite = 1; 3183 } else 3184 s->src.seqdiff = 0; 3185 if (th->th_flags & TH_SYN) { 3186 s->src.seqhi++; 3187 s->src.wscale = pf_get_wscale(m, off, 3188 th->th_off, pd->af); 3189 } 3190 s->src.max_win = MAX(ntohs(th->th_win), 1); 3191 if (s->src.wscale & PF_WSCALE_MASK) { 3192 /* Remove scale factor from initial window */ 3193 int win = s->src.max_win; 3194 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 3195 s->src.max_win = (win - 1) >> 3196 (s->src.wscale & PF_WSCALE_MASK); 3197 } 3198 if (th->th_flags & TH_FIN) 3199 s->src.seqhi++; 3200 s->dst.seqhi = 1; 3201 s->dst.max_win = 1; 3202 s->src.state = TCPS_SYN_SENT; 3203 s->dst.state = TCPS_CLOSED; 3204 s->timeout = PFTM_TCP_FIRST_PACKET; 3205 break; 3206 case IPPROTO_UDP: 3207 s->src.state = PFUDPS_SINGLE; 3208 s->dst.state = PFUDPS_NO_TRAFFIC; 3209 s->timeout = PFTM_UDP_FIRST_PACKET; 3210 break; 3211 case IPPROTO_ICMP: 3212 #ifdef INET6 3213 case IPPROTO_ICMPV6: 3214 #endif 3215 s->timeout = PFTM_ICMP_FIRST_PACKET; 3216 break; 3217 default: 3218 s->src.state = PFOTHERS_SINGLE; 3219 s->dst.state = PFOTHERS_NO_TRAFFIC; 3220 s->timeout = PFTM_OTHER_FIRST_PACKET; 3221 } 3222 3223 s->creation = time_second; 3224 s->expire = time_second; 3225 3226 if (pd->proto == IPPROTO_TCP) { 3227 if (s->state_flags & PFSTATE_SCRUB_TCP && 3228 pf_normalize_tcp_init(m, off, pd, th, &s->src, &s->dst)) { 3229 REASON_SET(&reason, PFRES_MEMORY); 3230 goto csfailed; 3231 } 3232 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 3233 pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, 3234 &s->src, &s->dst, rewrite)) { 3235 /* This really shouldn't happen!!! */ 3236 DPFPRINTF(LOG_ERR, 3237 "pf_normalize_tcp_stateful failed on first pkt"); 3238 goto csfailed; 3239 } 3240 } 3241 s->direction = pd->dir; 3242 3243 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 3244 REASON_SET(&reason, PFRES_MEMORY); 3245 goto csfailed; 3246 } 3247 3248 if (pf_state_insert(BOUND_IFACE(r, kif), *skw, *sks, s)) { 3249 pf_state_key_detach(s, PF_SK_STACK); 3250 pf_state_key_detach(s, PF_SK_WIRE); 3251 *sks = *skw = NULL; 3252 REASON_SET(&reason, PFRES_STATEINS); 3253 goto csfailed; 3254 } else 3255 *sm = s; 3256 3257 /* attach src nodes late, otherwise cleanup on error nontrivial */ 3258 for (i = 0; i < PF_SN_MAX; i++) 3259 if (sns[i] != NULL) { 3260 struct pf_sn_item *sni; 3261 3262 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 3263 if (sni == NULL) { 3264 REASON_SET(&reason, PFRES_MEMORY); 3265 pf_src_tree_remove_state(s); 3266 STATE_DEC_COUNTERS(s); 3267 pool_put(&pf_state_pl, s); 3268 return (PF_DROP); 3269 } 3270 sni->sn = sns[i]; 3271 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 3272 sni->sn->states++; 3273 } 3274 3275 pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ 3276 if (tag > 0) { 3277 pf_tag_ref(tag); 3278 s->tag = tag; 3279 } 3280 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 3281 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 3282 int rtid = pd->rdomain; 3283 if (act->rtableid >= 0) 3284 rtid = act->rtableid; 3285 s->src.state = PF_TCPS_PROXY_SRC; 3286 s->src.seqhi = htonl(arc4random()); 3287 /* Find mss option */ 3288 mss = pf_get_mss(m, off, th->th_off, pd->af); 3289 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 3290 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 3291 s->src.mss = mss; 3292 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 3293 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 3294 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain, 3295 NULL, NULL); 3296 REASON_SET(&reason, PFRES_SYNPROXY); 3297 return (PF_SYNPROXY_DROP); 3298 } 3299 3300 return (PF_PASS); 3301 3302 csfailed: 3303 for (i = 0; i < PF_SN_MAX; i++) 3304 if (sns[i] != NULL) 3305 pf_remove_src_node(sns[i]); 3306 if (s) { 3307 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 3308 pf_src_tree_remove_state(s); 3309 STATE_DEC_COUNTERS(s); 3310 pool_put(&pf_state_pl, s); 3311 } 3312 3313 return (PF_DROP); 3314 } 3315 3316 int 3317 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 3318 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 3319 int icmp_dir) 3320 { 3321 /* 3322 * when called from bpf_mtap_pflog, there are extra constraints: 3323 * -mbuf is faked, m_data is the bpf buffer 3324 * -pd is not fully set up 3325 */ 3326 int rewrite = 0; 3327 3328 if (PF_ANEQ(daddr, pd->dst, pd->af)) 3329 pd->destchg = 1; 3330 3331 switch (pd->proto) { 3332 case IPPROTO_TCP: 3333 if (PF_ANEQ(saddr, pd->src, pd->af) || *pd->sport != sport) { 3334 pf_change_ap(pd->src, pd->sport, &pd->hdr.tcp->th_sum, 3335 saddr, sport, 0, pd->af); 3336 rewrite = 1; 3337 } 3338 if (PF_ANEQ(daddr, pd->dst, pd->af) || *pd->dport != dport) { 3339 pf_change_ap(pd->dst, pd->dport, &pd->hdr.tcp->th_sum, 3340 daddr, dport, 0, pd->af); 3341 rewrite = 1; 3342 } 3343 break; 3344 3345 case IPPROTO_UDP: 3346 if (PF_ANEQ(saddr, pd->src, pd->af) || *pd->sport != sport) { 3347 pf_change_ap(pd->src, pd->sport, &pd->hdr.udp->uh_sum, 3348 saddr, sport, 1, pd->af); 3349 rewrite = 1; 3350 } 3351 if (PF_ANEQ(daddr, pd->dst, pd->af) || *pd->dport != dport) { 3352 pf_change_ap(pd->dst, pd->dport, &pd->hdr.udp->uh_sum, 3353 daddr, dport, 1, pd->af); 3354 rewrite = 1; 3355 } 3356 break; 3357 3358 #ifdef INET 3359 case IPPROTO_ICMP: 3360 /* pf_translate() is also used when logging invalid packets */ 3361 if (pd->af != AF_INET) 3362 return (0); 3363 3364 if (PF_ANEQ(saddr, pd->src, pd->af)) { 3365 pf_change_a(&pd->src->v4.s_addr, NULL, 3366 saddr->v4.s_addr, 0); 3367 rewrite = 1; 3368 } 3369 if (PF_ANEQ(daddr, pd->dst, pd->af)) { 3370 pf_change_a(&pd->dst->v4.s_addr, NULL, 3371 daddr->v4.s_addr, 0); 3372 rewrite = 1; 3373 } 3374 if (virtual_type == htons(ICMP_ECHO)) { 3375 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 3376 3377 if (icmpid != pd->hdr.icmp->icmp_id) { 3378 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( 3379 pd->hdr.icmp->icmp_cksum, 3380 pd->hdr.icmp->icmp_id, icmpid, 0); 3381 pd->hdr.icmp->icmp_id = icmpid; 3382 rewrite = 1; 3383 } 3384 } 3385 break; 3386 #endif /* INET */ 3387 3388 #ifdef INET6 3389 case IPPROTO_ICMPV6: 3390 /* pf_translate() is also used when logging invalid packets */ 3391 if (pd->af != AF_INET6) 3392 return (0); 3393 3394 if (PF_ANEQ(saddr, pd->src, pd->af)) { 3395 pf_change_a6(pd->src, &pd->hdr.icmp6->icmp6_cksum, 3396 saddr, 0); 3397 rewrite = 1; 3398 } 3399 if (PF_ANEQ(daddr, pd->dst, pd->af)) { 3400 pf_change_a6(pd->dst, &pd->hdr.icmp6->icmp6_cksum, 3401 daddr, 0); 3402 rewrite = 1; 3403 } 3404 break; 3405 #endif /* INET6 */ 3406 3407 default: 3408 switch (pd->af) { 3409 #ifdef INET 3410 case AF_INET: 3411 if (PF_ANEQ(saddr, pd->src, pd->af)) { 3412 pf_change_a(&pd->src->v4.s_addr, NULL, 3413 saddr->v4.s_addr, 0); 3414 rewrite = 1; 3415 } 3416 if (PF_ANEQ(daddr, pd->dst, pd->af)) { 3417 pf_change_a(&pd->dst->v4.s_addr, NULL, 3418 daddr->v4.s_addr, 0); 3419 rewrite = 1; 3420 } 3421 break; 3422 #endif /* INET */ 3423 #ifdef INET6 3424 case AF_INET6: 3425 if (PF_ANEQ(saddr, pd->src, pd->af)) { 3426 pf_change_a6(pd->src, NULL, saddr, 0); 3427 rewrite = 1; 3428 } 3429 if (PF_ANEQ(daddr, pd->dst, pd->af)) { 3430 pf_change_a6(pd->dst, NULL, daddr, 0); 3431 rewrite = 1; 3432 } 3433 break; 3434 #endif /* INET6 */ 3435 } 3436 } 3437 return (rewrite); 3438 } 3439 3440 int 3441 pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, 3442 struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, 3443 struct pf_pdesc *pd, u_short *reason, int *copyback) 3444 { 3445 struct tcphdr *th = pd->hdr.tcp; 3446 u_int16_t win = ntohs(th->th_win); 3447 u_int32_t ack, end, seq, orig_seq; 3448 u_int8_t sws, dws; 3449 int ackskew; 3450 3451 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 3452 sws = src->wscale & PF_WSCALE_MASK; 3453 dws = dst->wscale & PF_WSCALE_MASK; 3454 } else 3455 sws = dws = 0; 3456 3457 /* 3458 * Sequence tracking algorithm from Guido van Rooij's paper: 3459 * http://www.madison-gurkha.com/publications/tcp_filtering/ 3460 * tcp_filtering.ps 3461 */ 3462 3463 orig_seq = seq = ntohl(th->th_seq); 3464 if (src->seqlo == 0) { 3465 /* First packet from this end. Set its state */ 3466 3467 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 3468 src->scrub == NULL) { 3469 if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { 3470 REASON_SET(reason, PFRES_MEMORY); 3471 return (PF_DROP); 3472 } 3473 } 3474 3475 /* Deferred generation of sequence number modulator */ 3476 if (dst->seqdiff && !src->seqdiff) { 3477 /* use random iss for the TCP server */ 3478 while ((src->seqdiff = arc4random() - seq) == 0) 3479 ; 3480 ack = ntohl(th->th_ack) - dst->seqdiff; 3481 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 3482 src->seqdiff), 0); 3483 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 3484 *copyback = 1; 3485 } else { 3486 ack = ntohl(th->th_ack); 3487 } 3488 3489 end = seq + pd->p_len; 3490 if (th->th_flags & TH_SYN) { 3491 end++; 3492 if (dst->wscale & PF_WSCALE_FLAG) { 3493 src->wscale = pf_get_wscale(m, off, th->th_off, 3494 pd->af); 3495 if (src->wscale & PF_WSCALE_FLAG) { 3496 /* Remove scale factor from initial 3497 * window */ 3498 sws = src->wscale & PF_WSCALE_MASK; 3499 win = ((u_int32_t)win + (1 << sws) - 1) 3500 >> sws; 3501 dws = dst->wscale & PF_WSCALE_MASK; 3502 } else { 3503 /* fixup other window */ 3504 dst->max_win <<= dst->wscale & 3505 PF_WSCALE_MASK; 3506 /* in case of a retrans SYN|ACK */ 3507 dst->wscale = 0; 3508 } 3509 } 3510 } 3511 if (th->th_flags & TH_FIN) 3512 end++; 3513 3514 src->seqlo = seq; 3515 if (src->state < TCPS_SYN_SENT) 3516 src->state = TCPS_SYN_SENT; 3517 3518 /* 3519 * May need to slide the window (seqhi may have been set by 3520 * the crappy stack check or if we picked up the connection 3521 * after establishment) 3522 */ 3523 if (src->seqhi == 1 || 3524 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 3525 src->seqhi = end + MAX(1, dst->max_win << dws); 3526 if (win > src->max_win) 3527 src->max_win = win; 3528 3529 } else { 3530 ack = ntohl(th->th_ack) - dst->seqdiff; 3531 if (src->seqdiff) { 3532 /* Modulate sequence numbers */ 3533 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 3534 src->seqdiff), 0); 3535 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 3536 *copyback = 1; 3537 } 3538 end = seq + pd->p_len; 3539 if (th->th_flags & TH_SYN) 3540 end++; 3541 if (th->th_flags & TH_FIN) 3542 end++; 3543 } 3544 3545 if ((th->th_flags & TH_ACK) == 0) { 3546 /* Let it pass through the ack skew check */ 3547 ack = dst->seqlo; 3548 } else if ((ack == 0 && 3549 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 3550 /* broken tcp stacks do not set ack */ 3551 (dst->state < TCPS_SYN_SENT)) { 3552 /* 3553 * Many stacks (ours included) will set the ACK number in an 3554 * FIN|ACK if the SYN times out -- no sequence to ACK. 3555 */ 3556 ack = dst->seqlo; 3557 } 3558 3559 if (seq == end) { 3560 /* Ease sequencing restrictions on no data packets */ 3561 seq = src->seqlo; 3562 end = seq; 3563 } 3564 3565 ackskew = dst->seqlo - ack; 3566 3567 3568 /* 3569 * Need to demodulate the sequence numbers in any TCP SACK options 3570 * (Selective ACK). We could optionally validate the SACK values 3571 * against the current ACK window, either forwards or backwards, but 3572 * I'm not confident that SACK has been implemented properly 3573 * everywhere. It wouldn't surprise me if several stacks accidently 3574 * SACK too far backwards of previously ACKed data. There really aren't 3575 * any security implications of bad SACKing unless the target stack 3576 * doesn't validate the option length correctly. Someone trying to 3577 * spoof into a TCP connection won't bother blindly sending SACK 3578 * options anyway. 3579 */ 3580 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 3581 if (pf_modulate_sack(m, off, pd, th, dst)) 3582 *copyback = 1; 3583 } 3584 3585 3586 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 3587 if (SEQ_GEQ(src->seqhi, end) && 3588 /* Last octet inside other's window space */ 3589 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 3590 /* Retrans: not more than one window back */ 3591 (ackskew >= -MAXACKWINDOW) && 3592 /* Acking not more than one reassembled fragment backwards */ 3593 (ackskew <= (MAXACKWINDOW << sws)) && 3594 /* Acking not more than one window forward */ 3595 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 3596 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 3597 /* Require an exact/+1 sequence match on resets when possible */ 3598 3599 if (dst->scrub || src->scrub) { 3600 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 3601 *state, src, dst, copyback)) 3602 return (PF_DROP); 3603 } 3604 3605 /* update max window */ 3606 if (src->max_win < win) 3607 src->max_win = win; 3608 /* synchronize sequencing */ 3609 if (SEQ_GT(end, src->seqlo)) 3610 src->seqlo = end; 3611 /* slide the window of what the other end can send */ 3612 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 3613 dst->seqhi = ack + MAX((win << sws), 1); 3614 3615 /* update states */ 3616 if (th->th_flags & TH_SYN) 3617 if (src->state < TCPS_SYN_SENT) 3618 src->state = TCPS_SYN_SENT; 3619 if (th->th_flags & TH_FIN) 3620 if (src->state < TCPS_CLOSING) 3621 src->state = TCPS_CLOSING; 3622 if (th->th_flags & TH_ACK) { 3623 if (dst->state == TCPS_SYN_SENT) { 3624 dst->state = TCPS_ESTABLISHED; 3625 if (src->state == TCPS_ESTABLISHED && 3626 !SLIST_EMPTY(&(*state)->src_nodes) && 3627 pf_src_connlimit(state)) { 3628 REASON_SET(reason, PFRES_SRCLIMIT); 3629 return (PF_DROP); 3630 } 3631 } else if (dst->state == TCPS_CLOSING) 3632 dst->state = TCPS_FIN_WAIT_2; 3633 } 3634 if (th->th_flags & TH_RST) 3635 src->state = dst->state = TCPS_TIME_WAIT; 3636 3637 /* update expire time */ 3638 (*state)->expire = time_second; 3639 if (src->state >= TCPS_FIN_WAIT_2 && 3640 dst->state >= TCPS_FIN_WAIT_2) 3641 (*state)->timeout = PFTM_TCP_CLOSED; 3642 else if (src->state >= TCPS_CLOSING && 3643 dst->state >= TCPS_CLOSING) 3644 (*state)->timeout = PFTM_TCP_FIN_WAIT; 3645 else if (src->state < TCPS_ESTABLISHED || 3646 dst->state < TCPS_ESTABLISHED) 3647 (*state)->timeout = PFTM_TCP_OPENING; 3648 else if (src->state >= TCPS_CLOSING || 3649 dst->state >= TCPS_CLOSING) 3650 (*state)->timeout = PFTM_TCP_CLOSING; 3651 else 3652 (*state)->timeout = PFTM_TCP_ESTABLISHED; 3653 3654 /* Fall through to PASS packet */ 3655 } else if ((dst->state < TCPS_SYN_SENT || 3656 dst->state >= TCPS_FIN_WAIT_2 || 3657 src->state >= TCPS_FIN_WAIT_2) && 3658 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && 3659 /* Within a window forward of the originating packet */ 3660 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 3661 /* Within a window backward of the originating packet */ 3662 3663 /* 3664 * This currently handles three situations: 3665 * 1) Stupid stacks will shotgun SYNs before their peer 3666 * replies. 3667 * 2) When PF catches an already established stream (the 3668 * firewall rebooted, the state table was flushed, routes 3669 * changed...) 3670 * 3) Packets get funky immediately after the connection 3671 * closes (this should catch Solaris spurious ACK|FINs 3672 * that web servers like to spew after a close) 3673 * 3674 * This must be a little more careful than the above code 3675 * since packet floods will also be caught here. We don't 3676 * update the TTL here to mitigate the damage of a packet 3677 * flood and so the same code can handle awkward establishment 3678 * and a loosened connection close. 3679 * In the establishment case, a correct peer response will 3680 * validate the connection, go through the normal state code 3681 * and keep updating the state TTL. 3682 */ 3683 3684 if (pf_status.debug >= LOG_NOTICE) { 3685 log(LOG_NOTICE, "pf: loose state match: "); 3686 pf_print_state(*state); 3687 pf_print_flags(th->th_flags); 3688 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 3689 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 3690 pd->p_len, ackskew, (*state)->packets[0], 3691 (*state)->packets[1], 3692 pd->dir == PF_IN ? "in" : "out", 3693 pd->dir == (*state)->direction ? "fwd" : "rev"); 3694 } 3695 3696 if (dst->scrub || src->scrub) { 3697 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 3698 *state, src, dst, copyback)) 3699 return (PF_DROP); 3700 } 3701 3702 /* update max window */ 3703 if (src->max_win < win) 3704 src->max_win = win; 3705 /* synchronize sequencing */ 3706 if (SEQ_GT(end, src->seqlo)) 3707 src->seqlo = end; 3708 /* slide the window of what the other end can send */ 3709 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 3710 dst->seqhi = ack + MAX((win << sws), 1); 3711 3712 /* 3713 * Cannot set dst->seqhi here since this could be a shotgunned 3714 * SYN and not an already established connection. 3715 */ 3716 if (th->th_flags & TH_FIN) 3717 if (src->state < TCPS_CLOSING) 3718 src->state = TCPS_CLOSING; 3719 if (th->th_flags & TH_RST) 3720 src->state = dst->state = TCPS_TIME_WAIT; 3721 3722 /* Fall through to PASS packet */ 3723 } else { 3724 if ((*state)->dst.state == TCPS_SYN_SENT && 3725 (*state)->src.state == TCPS_SYN_SENT) { 3726 /* Send RST for state mismatches during handshake */ 3727 if (!(th->th_flags & TH_RST)) 3728 pf_send_tcp((*state)->rule.ptr, pd->af, 3729 pd->dst, pd->src, th->th_dport, 3730 th->th_sport, ntohl(th->th_ack), 0, 3731 TH_RST, 0, 0, 3732 (*state)->rule.ptr->return_ttl, 1, 0, 3733 pd->rdomain, pd->eh, kif->pfik_ifp); 3734 src->seqlo = 0; 3735 src->seqhi = 1; 3736 src->max_win = 1; 3737 } else if (pf_status.debug >= LOG_NOTICE) { 3738 log(LOG_NOTICE, "pf: BAD state: "); 3739 pf_print_state(*state); 3740 pf_print_flags(th->th_flags); 3741 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 3742 "pkts=%llu:%llu dir=%s,%s\n", 3743 seq, orig_seq, ack, pd->p_len, ackskew, 3744 (*state)->packets[0], (*state)->packets[1], 3745 pd->dir == PF_IN ? "in" : "out", 3746 pd->dir == (*state)->direction ? "fwd" : "rev"); 3747 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 3748 SEQ_GEQ(src->seqhi, end) ? ' ' : '1', 3749 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 3750 ' ': '2', 3751 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 3752 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 3753 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', 3754 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 3755 } 3756 REASON_SET(reason, PFRES_BADSTATE); 3757 return (PF_DROP); 3758 } 3759 3760 return (PF_PASS); 3761 } 3762 3763 int 3764 pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, 3765 struct pf_state **state, struct pf_pdesc *pd, u_short *reason) 3766 { 3767 struct tcphdr *th = pd->hdr.tcp; 3768 3769 if (th->th_flags & TH_SYN) 3770 if (src->state < TCPS_SYN_SENT) 3771 src->state = TCPS_SYN_SENT; 3772 if (th->th_flags & TH_FIN) 3773 if (src->state < TCPS_CLOSING) 3774 src->state = TCPS_CLOSING; 3775 if (th->th_flags & TH_ACK) { 3776 if (dst->state == TCPS_SYN_SENT) { 3777 dst->state = TCPS_ESTABLISHED; 3778 if (src->state == TCPS_ESTABLISHED && 3779 !SLIST_EMPTY(&(*state)->src_nodes) && 3780 pf_src_connlimit(state)) { 3781 REASON_SET(reason, PFRES_SRCLIMIT); 3782 return (PF_DROP); 3783 } 3784 } else if (dst->state == TCPS_CLOSING) { 3785 dst->state = TCPS_FIN_WAIT_2; 3786 } else if (src->state == TCPS_SYN_SENT && 3787 dst->state < TCPS_SYN_SENT) { 3788 /* 3789 * Handle a special sloppy case where we only see one 3790 * half of the connection. If there is a ACK after 3791 * the initial SYN without ever seeing a packet from 3792 * the destination, set the connection to established. 3793 */ 3794 dst->state = src->state = TCPS_ESTABLISHED; 3795 if (!SLIST_EMPTY(&(*state)->src_nodes) && 3796 pf_src_connlimit(state)) { 3797 REASON_SET(reason, PFRES_SRCLIMIT); 3798 return (PF_DROP); 3799 } 3800 } else if (src->state == TCPS_CLOSING && 3801 dst->state == TCPS_ESTABLISHED && 3802 dst->seqlo == 0) { 3803 /* 3804 * Handle the closing of half connections where we 3805 * don't see the full bidirectional FIN/ACK+ACK 3806 * handshake. 3807 */ 3808 dst->state = TCPS_CLOSING; 3809 } 3810 } 3811 if (th->th_flags & TH_RST) 3812 src->state = dst->state = TCPS_TIME_WAIT; 3813 3814 /* update expire time */ 3815 (*state)->expire = time_second; 3816 if (src->state >= TCPS_FIN_WAIT_2 && 3817 dst->state >= TCPS_FIN_WAIT_2) 3818 (*state)->timeout = PFTM_TCP_CLOSED; 3819 else if (src->state >= TCPS_CLOSING && 3820 dst->state >= TCPS_CLOSING) 3821 (*state)->timeout = PFTM_TCP_FIN_WAIT; 3822 else if (src->state < TCPS_ESTABLISHED || 3823 dst->state < TCPS_ESTABLISHED) 3824 (*state)->timeout = PFTM_TCP_OPENING; 3825 else if (src->state >= TCPS_CLOSING || 3826 dst->state >= TCPS_CLOSING) 3827 (*state)->timeout = PFTM_TCP_CLOSING; 3828 else 3829 (*state)->timeout = PFTM_TCP_ESTABLISHED; 3830 3831 return (PF_PASS); 3832 } 3833 3834 int 3835 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, 3836 struct mbuf *m, int off, struct pf_pdesc *pd, 3837 u_short *reason) 3838 { 3839 struct pf_state_key_cmp key; 3840 struct tcphdr *th = pd->hdr.tcp; 3841 int copyback = 0; 3842 struct pf_state_peer *src, *dst; 3843 struct pf_state_key *sk; 3844 3845 key.af = pd->af; 3846 key.proto = IPPROTO_TCP; 3847 key.rdomain = pd->rdomain; 3848 if (direction == PF_IN) { /* wire side, straight */ 3849 PF_ACPY(&key.addr[0], pd->src, key.af); 3850 PF_ACPY(&key.addr[1], pd->dst, key.af); 3851 key.port[0] = th->th_sport; 3852 key.port[1] = th->th_dport; 3853 } else { /* stack side, reverse */ 3854 PF_ACPY(&key.addr[1], pd->src, key.af); 3855 PF_ACPY(&key.addr[0], pd->dst, key.af); 3856 key.port[1] = th->th_sport; 3857 key.port[0] = th->th_dport; 3858 } 3859 3860 STATE_LOOKUP(kif, &key, direction, *state, m); 3861 3862 if (direction == (*state)->direction) { 3863 src = &(*state)->src; 3864 dst = &(*state)->dst; 3865 } else { 3866 src = &(*state)->dst; 3867 dst = &(*state)->src; 3868 } 3869 3870 sk = (*state)->key[pd->didx]; 3871 3872 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 3873 if (direction != (*state)->direction) { 3874 REASON_SET(reason, PFRES_SYNPROXY); 3875 return (PF_SYNPROXY_DROP); 3876 } 3877 if (th->th_flags & TH_SYN) { 3878 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 3879 REASON_SET(reason, PFRES_SYNPROXY); 3880 return (PF_DROP); 3881 } 3882 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 3883 pd->src, th->th_dport, th->th_sport, 3884 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 3885 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 3886 0, pd->rdomain, NULL, NULL); 3887 REASON_SET(reason, PFRES_SYNPROXY); 3888 return (PF_SYNPROXY_DROP); 3889 } else if (!(th->th_flags & TH_ACK) || 3890 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 3891 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 3892 REASON_SET(reason, PFRES_SYNPROXY); 3893 return (PF_DROP); 3894 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 3895 pf_src_connlimit(state)) { 3896 REASON_SET(reason, PFRES_SRCLIMIT); 3897 return (PF_DROP); 3898 } else 3899 (*state)->src.state = PF_TCPS_PROXY_DST; 3900 } 3901 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 3902 if (direction == (*state)->direction) { 3903 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 3904 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 3905 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 3906 REASON_SET(reason, PFRES_SYNPROXY); 3907 return (PF_DROP); 3908 } 3909 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 3910 if ((*state)->dst.seqhi == 1) 3911 (*state)->dst.seqhi = htonl(arc4random()); 3912 pf_send_tcp((*state)->rule.ptr, pd->af, 3913 &sk->addr[pd->sidx], &sk->addr[pd->didx], 3914 sk->port[pd->sidx], sk->port[pd->didx], 3915 (*state)->dst.seqhi, 0, TH_SYN, 0, 3916 (*state)->src.mss, 0, 0, (*state)->tag, 3917 sk->rdomain, NULL, NULL); 3918 REASON_SET(reason, PFRES_SYNPROXY); 3919 return (PF_SYNPROXY_DROP); 3920 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 3921 (TH_SYN|TH_ACK)) || 3922 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 3923 REASON_SET(reason, PFRES_SYNPROXY); 3924 return (PF_DROP); 3925 } else { 3926 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 3927 (*state)->dst.seqlo = ntohl(th->th_seq); 3928 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 3929 pd->src, th->th_dport, th->th_sport, 3930 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 3931 TH_ACK, (*state)->src.max_win, 0, 0, 0, 3932 (*state)->tag, pd->rdomain, NULL, NULL); 3933 pf_send_tcp((*state)->rule.ptr, pd->af, 3934 &sk->addr[pd->sidx], &sk->addr[pd->didx], 3935 sk->port[pd->sidx], sk->port[pd->didx], 3936 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 3937 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 3938 0, sk->rdomain, NULL, NULL); 3939 (*state)->src.seqdiff = (*state)->dst.seqhi - 3940 (*state)->src.seqlo; 3941 (*state)->dst.seqdiff = (*state)->src.seqhi - 3942 (*state)->dst.seqlo; 3943 (*state)->src.seqhi = (*state)->src.seqlo + 3944 (*state)->dst.max_win; 3945 (*state)->dst.seqhi = (*state)->dst.seqlo + 3946 (*state)->src.max_win; 3947 (*state)->src.wscale = (*state)->dst.wscale = 0; 3948 (*state)->src.state = (*state)->dst.state = 3949 TCPS_ESTABLISHED; 3950 REASON_SET(reason, PFRES_SYNPROXY); 3951 return (PF_SYNPROXY_DROP); 3952 } 3953 } 3954 3955 if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && 3956 dst->state >= TCPS_FIN_WAIT_2 && 3957 src->state >= TCPS_FIN_WAIT_2) { 3958 if (pf_status.debug >= LOG_NOTICE) { 3959 log(LOG_NOTICE, "pf: state reuse "); 3960 pf_print_state(*state); 3961 pf_print_flags(th->th_flags); 3962 addlog("\n"); 3963 } 3964 /* XXX make sure it's the same direction ?? */ 3965 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 3966 pf_unlink_state(*state); 3967 *state = NULL; 3968 return (PF_DROP); 3969 } 3970 3971 if ((*state)->state_flags & PFSTATE_SLOPPY) { 3972 if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP) 3973 return (PF_DROP); 3974 } else { 3975 if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason, 3976 ©back) == PF_DROP) 3977 return (PF_DROP); 3978 } 3979 3980 /* translate source/destination address, if necessary */ 3981 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 3982 struct pf_state_key *nk = (*state)->key[pd->didx]; 3983 3984 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 3985 nk->port[pd->sidx] != th->th_sport) 3986 pf_change_ap(pd->src, &th->th_sport, &th->th_sum, 3987 &nk->addr[pd->sidx], nk->port[pd->sidx], 0, pd->af); 3988 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 3989 pd->rdomain != nk->rdomain) 3990 pd->destchg = 1; 3991 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 3992 nk->port[pd->didx] != th->th_dport) 3993 pf_change_ap(pd->dst, &th->th_dport, &th->th_sum, 3994 &nk->addr[pd->didx], nk->port[pd->didx], 0, pd->af); 3995 m->m_pkthdr.rdomain = nk->rdomain; 3996 copyback = 1; 3997 } 3998 3999 /* Copyback sequence modulation or stateful scrub changes if needed */ 4000 if (copyback) 4001 m_copyback(m, off, sizeof(*th), th, M_NOWAIT); 4002 4003 return (PF_PASS); 4004 } 4005 4006 int 4007 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, 4008 struct mbuf *m, int off, struct pf_pdesc *pd) 4009 { 4010 struct pf_state_peer *src, *dst; 4011 struct pf_state_key_cmp key; 4012 struct udphdr *uh = pd->hdr.udp; 4013 4014 key.af = pd->af; 4015 key.proto = IPPROTO_UDP; 4016 key.rdomain = pd->rdomain; 4017 if (direction == PF_IN) { /* wire side, straight */ 4018 PF_ACPY(&key.addr[0], pd->src, key.af); 4019 PF_ACPY(&key.addr[1], pd->dst, key.af); 4020 key.port[0] = uh->uh_sport; 4021 key.port[1] = uh->uh_dport; 4022 } else { /* stack side, reverse */ 4023 PF_ACPY(&key.addr[1], pd->src, key.af); 4024 PF_ACPY(&key.addr[0], pd->dst, key.af); 4025 key.port[1] = uh->uh_sport; 4026 key.port[0] = uh->uh_dport; 4027 } 4028 4029 STATE_LOOKUP(kif, &key, direction, *state, m); 4030 4031 if (direction == (*state)->direction) { 4032 src = &(*state)->src; 4033 dst = &(*state)->dst; 4034 } else { 4035 src = &(*state)->dst; 4036 dst = &(*state)->src; 4037 } 4038 4039 /* update states */ 4040 if (src->state < PFUDPS_SINGLE) 4041 src->state = PFUDPS_SINGLE; 4042 if (dst->state == PFUDPS_SINGLE) 4043 dst->state = PFUDPS_MULTIPLE; 4044 4045 /* update expire time */ 4046 (*state)->expire = time_second; 4047 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) 4048 (*state)->timeout = PFTM_UDP_MULTIPLE; 4049 else 4050 (*state)->timeout = PFTM_UDP_SINGLE; 4051 4052 /* translate source/destination address, if necessary */ 4053 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4054 struct pf_state_key *nk = (*state)->key[pd->didx]; 4055 4056 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 4057 nk->port[pd->sidx] != uh->uh_sport) 4058 pf_change_ap(pd->src, &uh->uh_sport, &uh->uh_sum, 4059 &nk->addr[pd->sidx], nk->port[pd->sidx], 1, pd->af); 4060 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 4061 pd->rdomain != nk->rdomain) 4062 pd->destchg = 1; 4063 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 4064 nk->port[pd->didx] != uh->uh_dport) 4065 pf_change_ap(pd->dst, &uh->uh_dport, &uh->uh_sum, 4066 &nk->addr[pd->didx], nk->port[pd->didx], 1, pd->af); 4067 m->m_pkthdr.rdomain = nk->rdomain; 4068 m_copyback(m, off, sizeof(*uh), uh, M_NOWAIT); 4069 } 4070 4071 return (PF_PASS); 4072 } 4073 4074 int 4075 pf_icmp_state_lookup(struct pf_state_key_cmp *key, struct pf_pdesc *pd, 4076 struct pf_state **state, struct mbuf *m, int direction, struct pfi_kif *kif, 4077 u_int16_t icmpid, u_int16_t type, int icmp_dir, int *iidx, int multi, 4078 int inner) 4079 { 4080 key->af = pd->af; 4081 key->proto = pd->proto; 4082 key->rdomain = pd->rdomain; 4083 if (icmp_dir == PF_IN) { 4084 *iidx = pd->sidx; 4085 key->port[pd->sidx] = icmpid; 4086 key->port[pd->didx] = type; 4087 } else { 4088 *iidx = pd->didx; 4089 key->port[pd->sidx] = type; 4090 key->port[pd->didx] = icmpid; 4091 } 4092 if (pd->af == AF_INET6 && multi != PF_ICMP_MULTI_NONE) { 4093 switch (multi) { 4094 case PF_ICMP_MULTI_SOLICITED: 4095 key->addr[pd->sidx].addr32[0] = IPV6_ADDR_INT32_MLL; 4096 key->addr[pd->sidx].addr32[1] = 0; 4097 key->addr[pd->sidx].addr32[2] = IPV6_ADDR_INT32_ONE; 4098 key->addr[pd->sidx].addr32[3] = pd->src->addr32[3]; 4099 key->addr[pd->sidx].addr8[12] = 0xff; 4100 break; 4101 case PF_ICMP_MULTI_LINK: 4102 key->addr[pd->sidx].addr32[0] = IPV6_ADDR_INT32_MLL; 4103 key->addr[pd->sidx].addr32[1] = 0; 4104 key->addr[pd->sidx].addr32[2] = 0; 4105 key->addr[pd->sidx].addr32[3] = IPV6_ADDR_INT32_ONE; 4106 break; 4107 } 4108 } else 4109 PF_ACPY(&key->addr[pd->sidx], pd->src, key->af); 4110 PF_ACPY(&key->addr[pd->didx], pd->dst, key->af); 4111 4112 STATE_LOOKUP(kif, key, direction, *state, m); 4113 4114 /* Is this ICMP message flowing in right direction? */ 4115 if ((*state)->rule.ptr->type && 4116 (((!inner && (*state)->direction == direction) || 4117 (inner && (*state)->direction != direction)) ? 4118 PF_IN : PF_OUT) != icmp_dir) { 4119 if (pf_status.debug >= LOG_NOTICE) { 4120 log(LOG_NOTICE, 4121 "pf: icmp type %d in wrong direction (%d): ", 4122 ntohs(type), icmp_dir); 4123 pf_print_state(*state); 4124 addlog("\n"); 4125 } 4126 return (PF_DROP); 4127 } 4128 return (-1); 4129 } 4130 4131 int 4132 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, 4133 struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason) 4134 { 4135 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 4136 u_int16_t icmpid, *icmpsum, virtual_id, virtual_type; 4137 u_int8_t icmptype; 4138 int icmp_dir, iidx, ret, multi; 4139 struct pf_state_key_cmp key; 4140 4141 switch (pd->proto) { 4142 #ifdef INET 4143 case IPPROTO_ICMP: 4144 icmptype = pd->hdr.icmp->icmp_type; 4145 icmpid = pd->hdr.icmp->icmp_id; 4146 icmpsum = &pd->hdr.icmp->icmp_cksum; 4147 break; 4148 #endif /* INET */ 4149 #ifdef INET6 4150 case IPPROTO_ICMPV6: 4151 icmptype = pd->hdr.icmp6->icmp6_type; 4152 icmpid = pd->hdr.icmp6->icmp6_id; 4153 icmpsum = &pd->hdr.icmp6->icmp6_cksum; 4154 break; 4155 #endif /* INET6 */ 4156 } 4157 4158 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &multi, 4159 &virtual_id, &virtual_type) == 0) { 4160 /* 4161 * ICMP query/reply message not related to a TCP/UDP packet. 4162 * Search for an ICMP state. 4163 */ 4164 ret = pf_icmp_state_lookup(&key, pd, state, m, direction, 4165 kif, virtual_id, virtual_type, icmp_dir, &iidx, 4166 PF_ICMP_MULTI_NONE, 0); 4167 if (ret >= 0) { 4168 if (ret == PF_DROP && pd->af == AF_INET6 && 4169 icmp_dir == PF_OUT) { 4170 ret = pf_icmp_state_lookup(&key, pd, state, m, 4171 direction, kif, virtual_id, virtual_type, 4172 icmp_dir, &iidx, multi, 0); 4173 if (ret >= 0) 4174 return (ret); 4175 } else 4176 return (ret); 4177 } 4178 4179 (*state)->expire = time_second; 4180 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 4181 4182 /* translate source/destination address, if necessary */ 4183 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4184 struct pf_state_key *nk = (*state)->key[pd->didx]; 4185 4186 if (pd->rdomain != nk->rdomain) 4187 pd->destchg = 1; 4188 m->m_pkthdr.rdomain = nk->rdomain; 4189 4190 switch (pd->af) { 4191 #ifdef INET 4192 case AF_INET: 4193 if (PF_ANEQ(pd->src, 4194 &nk->addr[pd->sidx], AF_INET)) 4195 pf_change_a(&saddr->v4.s_addr, NULL, 4196 nk->addr[pd->sidx].v4.s_addr, 0); 4197 4198 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], 4199 AF_INET)) { 4200 pf_change_a(&daddr->v4.s_addr, NULL, 4201 nk->addr[pd->didx].v4.s_addr, 0); 4202 pd->destchg = 1; 4203 } 4204 4205 if (nk->port[iidx] != 4206 pd->hdr.icmp->icmp_id) { 4207 pd->hdr.icmp->icmp_cksum = 4208 pf_cksum_fixup( 4209 pd->hdr.icmp->icmp_cksum, 4210 pd->hdr.icmp->icmp_id, 4211 nk->port[iidx], 0); 4212 pd->hdr.icmp->icmp_id = nk->port[iidx]; 4213 } 4214 4215 m_copyback(m, off, ICMP_MINLEN, 4216 pd->hdr.icmp, M_NOWAIT); 4217 break; 4218 #endif /* INET */ 4219 #ifdef INET6 4220 case AF_INET6: 4221 if (PF_ANEQ(pd->src, 4222 &nk->addr[pd->sidx], AF_INET6)) 4223 pf_change_a6(saddr, 4224 &pd->hdr.icmp6->icmp6_cksum, 4225 &nk->addr[pd->sidx], 0); 4226 4227 if (PF_ANEQ(pd->dst, 4228 &nk->addr[pd->didx], AF_INET6)) { 4229 pf_change_a6(daddr, 4230 &pd->hdr.icmp6->icmp6_cksum, 4231 &nk->addr[pd->didx], 0); 4232 pd->destchg = 1; 4233 } 4234 4235 m_copyback(m, off, 4236 sizeof(struct icmp6_hdr), 4237 pd->hdr.icmp6, M_NOWAIT); 4238 break; 4239 #endif /* INET6 */ 4240 } 4241 } 4242 return (PF_PASS); 4243 4244 } else { 4245 /* 4246 * ICMP error message in response to a TCP/UDP packet. 4247 * Extract the inner TCP/UDP header and search for that state. 4248 */ 4249 4250 struct pf_pdesc pd2; 4251 #ifdef INET 4252 struct ip h2; 4253 #endif /* INET */ 4254 #ifdef INET6 4255 struct ip6_hdr h2_6; 4256 int fragoff2, extoff2; 4257 u_int32_t jumbolen; 4258 #endif /* INET6 */ 4259 int ipoff2; 4260 int off2; 4261 4262 pd2.af = pd->af; 4263 pd2.rdomain = pd->rdomain; 4264 /* Payload packet is from the opposite direction. */ 4265 pd2.sidx = (direction == PF_IN) ? 1 : 0; 4266 pd2.didx = (direction == PF_IN) ? 0 : 1; 4267 switch (pd->af) { 4268 #ifdef INET 4269 case AF_INET: 4270 /* offset of h2 in mbuf chain */ 4271 ipoff2 = off + ICMP_MINLEN; 4272 4273 if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), 4274 NULL, reason, pd2.af)) { 4275 DPFPRINTF(LOG_NOTICE, 4276 "ICMP error message too short (ip)"); 4277 return (PF_DROP); 4278 } 4279 /* 4280 * ICMP error messages don't refer to non-first 4281 * fragments 4282 */ 4283 if (h2.ip_off & htons(IP_OFFMASK)) { 4284 REASON_SET(reason, PFRES_FRAG); 4285 return (PF_DROP); 4286 } 4287 4288 /* offset of protocol header that follows h2 */ 4289 off2 = ipoff2 + (h2.ip_hl << 2); 4290 4291 pd2.proto = h2.ip_p; 4292 pd2.src = (struct pf_addr *)&h2.ip_src; 4293 pd2.dst = (struct pf_addr *)&h2.ip_dst; 4294 pd2.ip_sum = &h2.ip_sum; 4295 break; 4296 #endif /* INET */ 4297 #ifdef INET6 4298 case AF_INET6: 4299 ipoff2 = off + sizeof(struct icmp6_hdr); 4300 4301 if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), 4302 NULL, reason, pd2.af)) { 4303 DPFPRINTF(LOG_NOTICE, 4304 "ICMP error message too short (ip6)"); 4305 return (PF_DROP); 4306 } 4307 4308 off2 = ipoff2; 4309 if (pf_walk_header6(m, &h2_6, &off2, &extoff2, 4310 &fragoff2, &pd2.proto, &jumbolen, reason) 4311 != PF_PASS) 4312 return (PF_DROP); 4313 4314 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 4315 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 4316 pd2.ip_sum = NULL; 4317 break; 4318 #endif /* INET6 */ 4319 } 4320 4321 switch (pd2.proto) { 4322 case IPPROTO_TCP: { 4323 struct tcphdr th; 4324 u_int32_t seq; 4325 struct pf_state_peer *src, *dst; 4326 u_int8_t dws; 4327 int copyback = 0; 4328 4329 /* 4330 * Only the first 8 bytes of the TCP header can be 4331 * expected. Don't access any TCP header fields after 4332 * th_seq, an ackskew test is not possible. 4333 */ 4334 if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, 4335 pd2.af)) { 4336 DPFPRINTF(LOG_NOTICE, 4337 "ICMP error message too short (tcp)"); 4338 return (PF_DROP); 4339 } 4340 4341 key.af = pd2.af; 4342 key.proto = IPPROTO_TCP; 4343 key.rdomain = pd2.rdomain; 4344 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4345 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4346 key.port[pd2.sidx] = th.th_sport; 4347 key.port[pd2.didx] = th.th_dport; 4348 4349 STATE_LOOKUP(kif, &key, direction, *state, m); 4350 4351 if (direction == (*state)->direction) { 4352 src = &(*state)->dst; 4353 dst = &(*state)->src; 4354 } else { 4355 src = &(*state)->src; 4356 dst = &(*state)->dst; 4357 } 4358 4359 if (src->wscale && dst->wscale) 4360 dws = dst->wscale & PF_WSCALE_MASK; 4361 else 4362 dws = 0; 4363 4364 /* Demodulate sequence number */ 4365 seq = ntohl(th.th_seq) - src->seqdiff; 4366 if (src->seqdiff) { 4367 pf_change_a(&th.th_seq, icmpsum, 4368 htonl(seq), 0); 4369 copyback = 1; 4370 } 4371 4372 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 4373 (!SEQ_GEQ(src->seqhi, seq) || 4374 !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { 4375 if (pf_status.debug >= LOG_NOTICE) { 4376 log(LOG_NOTICE, 4377 "pf: BAD ICMP %d:%d ", 4378 icmptype, pd->hdr.icmp->icmp_code); 4379 pf_print_host(pd->src, 0, pd->af); 4380 addlog(" -> "); 4381 pf_print_host(pd->dst, 0, pd->af); 4382 addlog(" state: "); 4383 pf_print_state(*state); 4384 addlog(" seq=%u\n", seq); 4385 } 4386 REASON_SET(reason, PFRES_BADSTATE); 4387 return (PF_DROP); 4388 } else { 4389 if (pf_status.debug >= LOG_DEBUG) { 4390 log(LOG_DEBUG, 4391 "pf: OK ICMP %d:%d ", 4392 icmptype, pd->hdr.icmp->icmp_code); 4393 pf_print_host(pd->src, 0, pd->af); 4394 addlog(" -> "); 4395 pf_print_host(pd->dst, 0, pd->af); 4396 addlog(" state: "); 4397 pf_print_state(*state); 4398 addlog(" seq=%u\n", seq); 4399 } 4400 } 4401 4402 /* translate source/destination address, if necessary */ 4403 if ((*state)->key[PF_SK_WIRE] != 4404 (*state)->key[PF_SK_STACK]) { 4405 struct pf_state_key *nk = 4406 (*state)->key[pd->didx]; 4407 4408 if (PF_ANEQ(pd2.src, 4409 &nk->addr[pd2.sidx], pd2.af) || 4410 nk->port[pd2.sidx] != th.th_sport) 4411 pf_change_icmp(pd2.src, &th.th_sport, 4412 daddr, &nk->addr[pd2.sidx], 4413 nk->port[pd2.sidx], NULL, 4414 pd2.ip_sum, icmpsum, 0, pd2.af); 4415 4416 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 4417 pd2.af) || pd2.rdomain != nk->rdomain) 4418 pd->destchg = 1; 4419 m->m_pkthdr.rdomain = nk->rdomain; 4420 4421 if (PF_ANEQ(pd2.dst, 4422 &nk->addr[pd2.didx], pd2.af) || 4423 nk->port[pd2.didx] != th.th_dport) 4424 pf_change_icmp(pd2.dst, &th.th_dport, 4425 saddr, &nk->addr[pd2.didx], 4426 nk->port[pd2.didx], NULL, 4427 pd2.ip_sum, icmpsum, 0, pd2.af); 4428 copyback = 1; 4429 } 4430 4431 if (copyback) { 4432 switch (pd2.af) { 4433 #ifdef INET 4434 case AF_INET: 4435 m_copyback(m, off, ICMP_MINLEN, 4436 pd->hdr.icmp, M_NOWAIT); 4437 m_copyback(m, ipoff2, sizeof(h2), 4438 &h2, M_NOWAIT); 4439 break; 4440 #endif /* INET */ 4441 #ifdef INET6 4442 case AF_INET6: 4443 m_copyback(m, off, 4444 sizeof(struct icmp6_hdr), 4445 pd->hdr.icmp6, M_NOWAIT); 4446 m_copyback(m, ipoff2, sizeof(h2_6), 4447 &h2_6, M_NOWAIT); 4448 break; 4449 #endif /* INET6 */ 4450 } 4451 m_copyback(m, off2, 8, &th, M_NOWAIT); 4452 } 4453 4454 return (PF_PASS); 4455 break; 4456 } 4457 case IPPROTO_UDP: { 4458 struct udphdr uh; 4459 4460 if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), 4461 NULL, reason, pd2.af)) { 4462 DPFPRINTF(LOG_NOTICE, 4463 "ICMP error message too short (udp)"); 4464 return (PF_DROP); 4465 } 4466 4467 key.af = pd2.af; 4468 key.proto = IPPROTO_UDP; 4469 key.rdomain = pd2.rdomain; 4470 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4471 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4472 key.port[pd2.sidx] = uh.uh_sport; 4473 key.port[pd2.didx] = uh.uh_dport; 4474 4475 STATE_LOOKUP(kif, &key, direction, *state, m); 4476 4477 /* translate source/destination address, if necessary */ 4478 if ((*state)->key[PF_SK_WIRE] != 4479 (*state)->key[PF_SK_STACK]) { 4480 struct pf_state_key *nk = 4481 (*state)->key[pd->didx]; 4482 4483 if (PF_ANEQ(pd2.src, 4484 &nk->addr[pd2.sidx], pd2.af) || 4485 nk->port[pd2.sidx] != uh.uh_sport) 4486 pf_change_icmp(pd2.src, &uh.uh_sport, 4487 daddr, &nk->addr[pd2.sidx], 4488 nk->port[pd2.sidx], &uh.uh_sum, 4489 pd2.ip_sum, icmpsum, 1, pd2.af); 4490 4491 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 4492 pd2.af) || pd2.rdomain != nk->rdomain) 4493 pd->destchg = 1; 4494 m->m_pkthdr.rdomain = nk->rdomain; 4495 4496 if (PF_ANEQ(pd2.dst, 4497 &nk->addr[pd2.didx], pd2.af) || 4498 nk->port[pd2.didx] != uh.uh_dport) 4499 pf_change_icmp(pd2.dst, &uh.uh_dport, 4500 saddr, &nk->addr[pd2.didx], 4501 nk->port[pd2.didx], &uh.uh_sum, 4502 pd2.ip_sum, icmpsum, 1, pd2.af); 4503 4504 switch (pd2.af) { 4505 #ifdef INET 4506 case AF_INET: 4507 m_copyback(m, off, ICMP_MINLEN, 4508 pd->hdr.icmp, M_NOWAIT); 4509 m_copyback(m, ipoff2, sizeof(h2), &h2, 4510 M_NOWAIT); 4511 break; 4512 #endif /* INET */ 4513 #ifdef INET6 4514 case AF_INET6: 4515 m_copyback(m, off, 4516 sizeof(struct icmp6_hdr), 4517 pd->hdr.icmp6, M_NOWAIT); 4518 m_copyback(m, ipoff2, sizeof(h2_6), 4519 &h2_6, M_NOWAIT); 4520 break; 4521 #endif /* INET6 */ 4522 } 4523 m_copyback(m, off2, sizeof(uh), &uh, M_NOWAIT); 4524 } 4525 return (PF_PASS); 4526 break; 4527 } 4528 #ifdef INET 4529 case IPPROTO_ICMP: { 4530 struct icmp iih; 4531 4532 if (pd2.af != AF_INET) { 4533 REASON_SET(reason, PFRES_NORM); 4534 return (PF_DROP); 4535 } 4536 4537 if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, 4538 NULL, reason, pd2.af)) { 4539 DPFPRINTF(LOG_NOTICE, 4540 "ICMP error message too short (icmp)"); 4541 return (PF_DROP); 4542 } 4543 4544 pd2.hdr.icmp = &iih; 4545 pf_icmp_mapping(&pd2, iih.icmp_type, 4546 &icmp_dir, &multi, &virtual_id, &virtual_type); 4547 4548 ret = pf_icmp_state_lookup(&key, &pd2, state, m, 4549 direction, kif, virtual_id, virtual_type, 4550 icmp_dir, &iidx, PF_ICMP_MULTI_NONE, 1); 4551 if (ret >= 0) 4552 return (ret); 4553 4554 /* translate source/destination address, if necessary */ 4555 if ((*state)->key[PF_SK_WIRE] != 4556 (*state)->key[PF_SK_STACK]) { 4557 struct pf_state_key *nk = 4558 (*state)->key[pd->didx]; 4559 4560 if (PF_ANEQ(pd2.src, 4561 &nk->addr[pd2.sidx], pd2.af) || 4562 (virtual_type == htons(ICMP_ECHO) && 4563 nk->port[iidx] != iih.icmp_id)) 4564 pf_change_icmp(pd2.src, 4565 (virtual_type == htons(ICMP_ECHO)) ? 4566 &iih.icmp_id : NULL, 4567 daddr, &nk->addr[pd2.sidx], 4568 (virtual_type == htons(ICMP_ECHO)) ? 4569 nk->port[iidx] : 0, NULL, 4570 pd2.ip_sum, icmpsum, 0, AF_INET); 4571 4572 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 4573 pd2.af) || pd2.rdomain != nk->rdomain) 4574 pd->destchg = 1; 4575 m->m_pkthdr.rdomain = nk->rdomain; 4576 4577 if (PF_ANEQ(pd2.dst, 4578 &nk->addr[pd2.didx], pd2.af)) 4579 pf_change_icmp(pd2.dst, NULL, saddr, 4580 &nk->addr[pd2.didx], 0, NULL, 4581 pd2.ip_sum, icmpsum, 0, AF_INET); 4582 4583 m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp, 4584 M_NOWAIT); 4585 m_copyback(m, ipoff2, sizeof(h2), &h2, 4586 M_NOWAIT); 4587 m_copyback(m, off2, ICMP_MINLEN, &iih, 4588 M_NOWAIT); 4589 } 4590 return (PF_PASS); 4591 break; 4592 } 4593 #endif /* INET */ 4594 #ifdef INET6 4595 case IPPROTO_ICMPV6: { 4596 struct icmp6_hdr iih; 4597 4598 if (pd2.af != AF_INET6) { 4599 REASON_SET(reason, PFRES_NORM); 4600 return (PF_DROP); 4601 } 4602 4603 if (!pf_pull_hdr(m, off2, &iih, 4604 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 4605 DPFPRINTF(LOG_NOTICE, 4606 "ICMP error message too short (icmp6)"); 4607 return (PF_DROP); 4608 } 4609 4610 pd2.hdr.icmp6 = &iih; 4611 pf_icmp_mapping(&pd2, iih.icmp6_type, 4612 &icmp_dir, &multi, &virtual_id, &virtual_type); 4613 ret = pf_icmp_state_lookup(&key, &pd2, state, m, 4614 direction, kif, virtual_id, virtual_type, 4615 icmp_dir, &iidx, PF_ICMP_MULTI_NONE, 1); 4616 if (ret >= 0) { 4617 if (ret == PF_DROP && pd->af == AF_INET6 && 4618 icmp_dir == PF_OUT) { 4619 ret = pf_icmp_state_lookup(&key, pd, 4620 state, m, direction, kif, 4621 virtual_id, virtual_type, 4622 icmp_dir, &iidx, multi, 1); 4623 if (ret >= 0) 4624 return (ret); 4625 } else 4626 return (ret); 4627 } 4628 4629 /* translate source/destination address, if necessary */ 4630 if ((*state)->key[PF_SK_WIRE] != 4631 (*state)->key[PF_SK_STACK]) { 4632 struct pf_state_key *nk = 4633 (*state)->key[pd->didx]; 4634 4635 if (PF_ANEQ(pd2.src, 4636 &nk->addr[pd2.sidx], pd2.af) || 4637 ((virtual_type == 4638 htons(ICMP6_ECHO_REQUEST)) && 4639 nk->port[pd2.sidx] != iih.icmp6_id)) 4640 pf_change_icmp(pd2.src, 4641 (virtual_type == 4642 htons(ICMP6_ECHO_REQUEST)) 4643 ? &iih.icmp6_id : NULL, 4644 daddr, &nk->addr[pd2.sidx], 4645 (virtual_type == 4646 htons(ICMP6_ECHO_REQUEST)) 4647 ? nk->port[iidx] : 0, NULL, 4648 pd2.ip_sum, icmpsum, 0, AF_INET6); 4649 4650 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 4651 pd2.af) || pd2.rdomain != nk->rdomain) 4652 pd->destchg = 1; 4653 m->m_pkthdr.rdomain = nk->rdomain; 4654 4655 if (PF_ANEQ(pd2.dst, 4656 &nk->addr[pd2.didx], pd2.af)) 4657 pf_change_icmp(pd2.dst, NULL, saddr, 4658 &nk->addr[pd2.didx], 0, NULL, 4659 pd2.ip_sum, icmpsum, 0, AF_INET6); 4660 4661 m_copyback(m, off, sizeof(struct icmp6_hdr), 4662 pd->hdr.icmp6, M_NOWAIT); 4663 m_copyback(m, ipoff2, sizeof(h2_6), &h2_6, 4664 M_NOWAIT); 4665 m_copyback(m, off2, sizeof(struct icmp6_hdr), 4666 &iih, M_NOWAIT); 4667 } 4668 return (PF_PASS); 4669 break; 4670 } 4671 #endif /* INET6 */ 4672 default: { 4673 key.af = pd2.af; 4674 key.proto = pd2.proto; 4675 key.rdomain = pd2.rdomain; 4676 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4677 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4678 key.port[0] = key.port[1] = 0; 4679 4680 STATE_LOOKUP(kif, &key, direction, *state, m); 4681 4682 /* translate source/destination address, if necessary */ 4683 if ((*state)->key[PF_SK_WIRE] != 4684 (*state)->key[PF_SK_STACK]) { 4685 struct pf_state_key *nk = 4686 (*state)->key[pd->didx]; 4687 4688 if (PF_ANEQ(pd2.src, 4689 &nk->addr[pd2.sidx], pd2.af)) 4690 pf_change_icmp(pd2.src, NULL, daddr, 4691 &nk->addr[pd2.sidx], 0, NULL, 4692 pd2.ip_sum, icmpsum, 0, pd2.af); 4693 4694 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 4695 pd2.af) || pd2.rdomain != nk->rdomain) 4696 pd->destchg = 1; 4697 m->m_pkthdr.rdomain = nk->rdomain; 4698 4699 if (PF_ANEQ(pd2.dst, 4700 &nk->addr[pd2.didx], pd2.af)) 4701 pf_change_icmp(pd2.dst, NULL, saddr, 4702 &nk->addr[pd2.didx], 0, NULL, 4703 pd2.ip_sum, icmpsum, 0, pd2.af); 4704 4705 switch (pd2.af) { 4706 #ifdef INET 4707 case AF_INET: 4708 m_copyback(m, off, ICMP_MINLEN, 4709 pd->hdr.icmp, M_NOWAIT); 4710 m_copyback(m, ipoff2, sizeof(h2), &h2, 4711 M_NOWAIT); 4712 break; 4713 #endif /* INET */ 4714 #ifdef INET6 4715 case AF_INET6: 4716 m_copyback(m, off, 4717 sizeof(struct icmp6_hdr), 4718 pd->hdr.icmp6, M_NOWAIT); 4719 m_copyback(m, ipoff2, sizeof(h2_6), 4720 &h2_6, M_NOWAIT); 4721 break; 4722 #endif /* INET6 */ 4723 } 4724 } 4725 return (PF_PASS); 4726 break; 4727 } 4728 } 4729 } 4730 } 4731 4732 int 4733 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, 4734 struct mbuf *m, struct pf_pdesc *pd) 4735 { 4736 struct pf_state_peer *src, *dst; 4737 struct pf_state_key_cmp key; 4738 4739 key.af = pd->af; 4740 key.proto = pd->proto; 4741 key.rdomain = pd->rdomain; 4742 if (direction == PF_IN) { 4743 PF_ACPY(&key.addr[0], pd->src, key.af); 4744 PF_ACPY(&key.addr[1], pd->dst, key.af); 4745 key.port[0] = key.port[1] = 0; 4746 } else { 4747 PF_ACPY(&key.addr[1], pd->src, key.af); 4748 PF_ACPY(&key.addr[0], pd->dst, key.af); 4749 key.port[1] = key.port[0] = 0; 4750 } 4751 4752 STATE_LOOKUP(kif, &key, direction, *state, m); 4753 4754 if (direction == (*state)->direction) { 4755 src = &(*state)->src; 4756 dst = &(*state)->dst; 4757 } else { 4758 src = &(*state)->dst; 4759 dst = &(*state)->src; 4760 } 4761 4762 /* update states */ 4763 if (src->state < PFOTHERS_SINGLE) 4764 src->state = PFOTHERS_SINGLE; 4765 if (dst->state == PFOTHERS_SINGLE) 4766 dst->state = PFOTHERS_MULTIPLE; 4767 4768 /* update expire time */ 4769 (*state)->expire = time_second; 4770 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) 4771 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4772 else 4773 (*state)->timeout = PFTM_OTHER_SINGLE; 4774 4775 /* translate source/destination address, if necessary */ 4776 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4777 struct pf_state_key *nk = (*state)->key[pd->didx]; 4778 4779 KASSERT(nk); 4780 KASSERT(pd); 4781 KASSERT(pd->src); 4782 KASSERT(pd->dst); 4783 4784 switch (pd->af) { 4785 #ifdef INET 4786 case AF_INET: 4787 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) 4788 pf_change_a(&pd->src->v4.s_addr, NULL, 4789 nk->addr[pd->sidx].v4.s_addr, 4790 0); 4791 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) { 4792 pf_change_a(&pd->dst->v4.s_addr, NULL, 4793 nk->addr[pd->didx].v4.s_addr, 4794 0); 4795 pd->destchg = 1; 4796 } 4797 break; 4798 #endif /* INET */ 4799 #ifdef INET6 4800 case AF_INET6: 4801 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET6)) 4802 PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); 4803 4804 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET6)) { 4805 PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); 4806 pd->destchg = 1; 4807 } 4808 break; 4809 #endif /* INET6 */ 4810 } 4811 if (pd->rdomain != nk->rdomain) 4812 pd->destchg = 1; 4813 4814 m->m_pkthdr.rdomain = nk->rdomain; 4815 } 4816 return (PF_PASS); 4817 } 4818 4819 /* 4820 * ipoff and off are measured from the start of the mbuf chain. 4821 * h must be at "ipoff" on the mbuf chain. 4822 */ 4823 void * 4824 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 4825 u_short *actionp, u_short *reasonp, sa_family_t af) 4826 { 4827 switch (af) { 4828 #ifdef INET 4829 case AF_INET: { 4830 struct ip *h = mtod(m, struct ip *); 4831 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 4832 4833 if (fragoff) { 4834 if (fragoff >= len) 4835 ACTION_SET(actionp, PF_PASS); 4836 else { 4837 ACTION_SET(actionp, PF_DROP); 4838 REASON_SET(reasonp, PFRES_FRAG); 4839 } 4840 return (NULL); 4841 } 4842 if (m->m_pkthdr.len < off + len || 4843 ntohs(h->ip_len) < off + len) { 4844 ACTION_SET(actionp, PF_DROP); 4845 REASON_SET(reasonp, PFRES_SHORT); 4846 return (NULL); 4847 } 4848 break; 4849 } 4850 #endif /* INET */ 4851 #ifdef INET6 4852 case AF_INET6: { 4853 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 4854 4855 if (m->m_pkthdr.len < off + len || 4856 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < 4857 (unsigned)(off + len)) { 4858 ACTION_SET(actionp, PF_DROP); 4859 REASON_SET(reasonp, PFRES_SHORT); 4860 return (NULL); 4861 } 4862 break; 4863 } 4864 #endif /* INET6 */ 4865 } 4866 m_copydata(m, off, len, p); 4867 return (p); 4868 } 4869 4870 int 4871 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 4872 int rtableid) 4873 { 4874 struct sockaddr_in *dst; 4875 int ret = 1; 4876 int check_mpath; 4877 extern int ipmultipath; 4878 #ifdef INET6 4879 extern int ip6_multipath; 4880 struct sockaddr_in6 *dst6; 4881 struct route_in6 ro; 4882 #else 4883 struct route ro; 4884 #endif 4885 struct radix_node *rn; 4886 struct rtentry *rt; 4887 struct ifnet *ifp; 4888 4889 check_mpath = 0; 4890 bzero(&ro, sizeof(ro)); 4891 ro.ro_tableid = rtableid; 4892 switch (af) { 4893 case AF_INET: 4894 dst = satosin(&ro.ro_dst); 4895 dst->sin_family = AF_INET; 4896 dst->sin_len = sizeof(*dst); 4897 dst->sin_addr = addr->v4; 4898 if (ipmultipath) 4899 check_mpath = 1; 4900 break; 4901 #ifdef INET6 4902 case AF_INET6: 4903 /* 4904 * Skip check for addresses with embedded interface scope, 4905 * as they would always match anyway. 4906 */ 4907 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 4908 goto out; 4909 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 4910 dst6->sin6_family = AF_INET6; 4911 dst6->sin6_len = sizeof(*dst6); 4912 dst6->sin6_addr = addr->v6; 4913 if (ip6_multipath) 4914 check_mpath = 1; 4915 break; 4916 #endif /* INET6 */ 4917 default: 4918 return (0); 4919 } 4920 4921 /* Skip checks for ipsec interfaces */ 4922 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 4923 goto out; 4924 4925 rtalloc_noclone((struct route *)&ro); 4926 4927 if (ro.ro_rt != NULL) { 4928 /* No interface given, this is a no-route check */ 4929 if (kif == NULL) 4930 goto out; 4931 4932 if (kif->pfik_ifp == NULL) { 4933 ret = 0; 4934 goto out; 4935 } 4936 4937 /* Perform uRPF check if passed input interface */ 4938 ret = 0; 4939 rn = (struct radix_node *)ro.ro_rt; 4940 do { 4941 rt = (struct rtentry *)rn; 4942 if (rt->rt_ifp->if_type == IFT_CARP) 4943 ifp = rt->rt_ifp->if_carpdev; 4944 else 4945 ifp = rt->rt_ifp; 4946 4947 if (kif->pfik_ifp == ifp) 4948 ret = 1; 4949 rn = rn_mpath_next(rn, 0); 4950 } while (check_mpath == 1 && rn != NULL && ret == 0); 4951 } else 4952 ret = 0; 4953 out: 4954 if (ro.ro_rt != NULL) 4955 RTFREE(ro.ro_rt); 4956 return (ret); 4957 } 4958 4959 int 4960 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 4961 int rtableid) 4962 { 4963 struct sockaddr_in *dst; 4964 #ifdef INET6 4965 struct sockaddr_in6 *dst6; 4966 struct route_in6 ro; 4967 #else 4968 struct route ro; 4969 #endif 4970 int ret = 0; 4971 4972 bzero(&ro, sizeof(ro)); 4973 ro.ro_tableid = rtableid; 4974 switch (af) { 4975 case AF_INET: 4976 dst = satosin(&ro.ro_dst); 4977 dst->sin_family = AF_INET; 4978 dst->sin_len = sizeof(*dst); 4979 dst->sin_addr = addr->v4; 4980 break; 4981 #ifdef INET6 4982 case AF_INET6: 4983 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 4984 dst6->sin6_family = AF_INET6; 4985 dst6->sin6_len = sizeof(*dst6); 4986 dst6->sin6_addr = addr->v6; 4987 break; 4988 #endif /* INET6 */ 4989 default: 4990 return (0); 4991 } 4992 4993 rtalloc_noclone((struct route *)&ro); 4994 4995 if (ro.ro_rt != NULL) { 4996 if (ro.ro_rt->rt_labelid == aw->v.rtlabel) 4997 ret = 1; 4998 RTFREE(ro.ro_rt); 4999 } 5000 5001 return (ret); 5002 } 5003 5004 #ifdef INET 5005 void 5006 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5007 struct pf_state *s) 5008 { 5009 struct mbuf *m0, *m1; 5010 struct route iproute; 5011 struct route *ro = NULL; 5012 struct sockaddr_in *dst; 5013 struct ip *ip; 5014 struct ifnet *ifp = NULL; 5015 struct pf_addr naddr; 5016 struct pf_src_node *sn = NULL; 5017 int error = 0; 5018 #ifdef IPSEC 5019 struct m_tag *mtag; 5020 #endif /* IPSEC */ 5021 5022 if (m == NULL || *m == NULL || r == NULL || 5023 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5024 panic("pf_route: invalid parameters"); 5025 5026 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5027 m0 = *m; 5028 *m = NULL; 5029 goto bad; 5030 } 5031 5032 if (r->rt == PF_DUPTO) { 5033 if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) 5034 return; 5035 } else { 5036 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5037 return; 5038 m0 = *m; 5039 } 5040 5041 if (m0->m_len < sizeof(struct ip)) { 5042 DPFPRINTF(LOG_ERR, 5043 "pf_route: m0->m_len < sizeof(struct ip)"); 5044 goto bad; 5045 } 5046 5047 ip = mtod(m0, struct ip *); 5048 5049 ro = &iproute; 5050 bzero((caddr_t)ro, sizeof(*ro)); 5051 dst = satosin(&ro->ro_dst); 5052 dst->sin_family = AF_INET; 5053 dst->sin_len = sizeof(*dst); 5054 dst->sin_addr = ip->ip_dst; 5055 ro->ro_tableid = m0->m_pkthdr.rdomain; 5056 5057 if (!r->rt) { 5058 rtalloc(ro); 5059 if (ro->ro_rt == 0) { 5060 ipstat.ips_noroute++; 5061 goto bad; 5062 } 5063 5064 ifp = ro->ro_rt->rt_ifp; 5065 ro->ro_rt->rt_use++; 5066 5067 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 5068 dst = satosin(ro->ro_rt->rt_gateway); 5069 5070 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 5071 } else { 5072 if (s == NULL) { 5073 if (pf_map_addr(AF_INET, r, 5074 (struct pf_addr *)&ip->ip_src, 5075 &naddr, NULL, &sn, &r->route, PF_SN_ROUTE)) { 5076 DPFPRINTF(LOG_ERR, 5077 "pf_route: pf_map_addr() failed."); 5078 goto bad; 5079 } 5080 5081 if (!PF_AZERO(&naddr, AF_INET)) 5082 dst->sin_addr.s_addr = naddr.v4.s_addr; 5083 ifp = r->route.kif ? 5084 r->route.kif->pfik_ifp : NULL; 5085 } else { 5086 if (!PF_AZERO(&s->rt_addr, AF_INET)) 5087 dst->sin_addr.s_addr = 5088 s->rt_addr.v4.s_addr; 5089 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5090 } 5091 } 5092 if (ifp == NULL) 5093 goto bad; 5094 5095 5096 if (oifp != ifp) { 5097 if (pf_test(AF_INET, PF_OUT, ifp, &m0, NULL) != PF_PASS) 5098 goto bad; 5099 else if (m0 == NULL) 5100 goto done; 5101 if (m0->m_len < sizeof(struct ip)) { 5102 DPFPRINTF(LOG_ERR, 5103 "pf_route: m0->m_len < sizeof(struct ip)"); 5104 goto bad; 5105 } 5106 ip = mtod(m0, struct ip *); 5107 } 5108 5109 /* Copied from ip_output. */ 5110 #ifdef IPSEC 5111 /* 5112 * If deferred crypto processing is needed, check that the 5113 * interface supports it. 5114 */ 5115 if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) 5116 != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) { 5117 /* Notify IPsec to do its own crypto. */ 5118 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 5119 goto bad; 5120 } 5121 #endif /* IPSEC */ 5122 5123 in_proto_cksum_out(m0, ifp); 5124 5125 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 5126 ip->ip_sum = 0; 5127 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) { 5128 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 5129 ipstat.ips_outhwcsum++; 5130 } else 5131 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 5132 /* Update relevant hardware checksum stats for TCP/UDP */ 5133 if (m0->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) 5134 tcpstat.tcps_outhwcsum++; 5135 else if (m0->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) 5136 udpstat.udps_outhwcsum++; 5137 error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); 5138 goto done; 5139 } 5140 5141 /* 5142 * Too large for interface; fragment if possible. 5143 * Must be able to put at least 8 bytes per fragment. 5144 */ 5145 if (ip->ip_off & htons(IP_DF)) { 5146 ipstat.ips_cantfrag++; 5147 if (r->rt != PF_DUPTO) { 5148 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, 5149 ifp->if_mtu); 5150 goto done; 5151 } else 5152 goto bad; 5153 } 5154 5155 m1 = m0; 5156 error = ip_fragment(m0, ifp, ifp->if_mtu); 5157 if (error) { 5158 m0 = NULL; 5159 goto bad; 5160 } 5161 5162 for (m0 = m1; m0; m0 = m1) { 5163 m1 = m0->m_nextpkt; 5164 m0->m_nextpkt = 0; 5165 if (error == 0) 5166 error = (*ifp->if_output)(ifp, m0, sintosa(dst), 5167 NULL); 5168 else 5169 m_freem(m0); 5170 } 5171 5172 if (error == 0) 5173 ipstat.ips_fragmented++; 5174 5175 done: 5176 if (r->rt != PF_DUPTO) 5177 *m = NULL; 5178 if (ro == &iproute && ro->ro_rt) 5179 RTFREE(ro->ro_rt); 5180 return; 5181 5182 bad: 5183 m_freem(m0); 5184 goto done; 5185 } 5186 #endif /* INET */ 5187 5188 #ifdef INET6 5189 void 5190 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5191 struct pf_state *s) 5192 { 5193 struct mbuf *m0; 5194 struct route_in6 ip6route; 5195 struct route_in6 *ro; 5196 struct sockaddr_in6 *dst; 5197 struct ip6_hdr *ip6; 5198 struct ifnet *ifp = NULL; 5199 struct pf_addr naddr; 5200 struct pf_src_node *sn = NULL; 5201 5202 if (m == NULL || *m == NULL || r == NULL || 5203 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5204 panic("pf_route6: invalid parameters"); 5205 5206 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5207 m0 = *m; 5208 *m = NULL; 5209 goto bad; 5210 } 5211 5212 if (r->rt == PF_DUPTO) { 5213 if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) 5214 return; 5215 } else { 5216 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5217 return; 5218 m0 = *m; 5219 } 5220 5221 if (m0->m_len < sizeof(struct ip6_hdr)) { 5222 DPFPRINTF(LOG_ERR, 5223 "pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); 5224 goto bad; 5225 } 5226 ip6 = mtod(m0, struct ip6_hdr *); 5227 5228 ro = &ip6route; 5229 bzero((caddr_t)ro, sizeof(*ro)); 5230 dst = (struct sockaddr_in6 *)&ro->ro_dst; 5231 dst->sin6_family = AF_INET6; 5232 dst->sin6_len = sizeof(*dst); 5233 dst->sin6_addr = ip6->ip6_dst; 5234 5235 if (!r->rt) { 5236 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 5237 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); 5238 return; 5239 } 5240 5241 if (s == NULL) { 5242 if (pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 5243 &naddr, NULL, &sn, &r->route, PF_SN_ROUTE)) { 5244 DPFPRINTF(LOG_ERR, 5245 "pf_route6: pf_map_addr() failed."); 5246 goto bad; 5247 } 5248 if (!PF_AZERO(&naddr, AF_INET6)) 5249 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5250 &naddr, AF_INET6); 5251 ifp = r->route.kif ? r->route.kif->pfik_ifp : NULL; 5252 } else { 5253 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 5254 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5255 &s->rt_addr, AF_INET6); 5256 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5257 } 5258 if (ifp == NULL) 5259 goto bad; 5260 5261 if (oifp != ifp) { 5262 if (pf_test(AF_INET6, PF_OUT, ifp, &m0, NULL) != PF_PASS) 5263 goto bad; 5264 else if (m0 == NULL) 5265 goto done; 5266 if (m0->m_len < sizeof(struct ip6_hdr)) { 5267 DPFPRINTF(LOG_ERR, 5268 "pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); 5269 goto bad; 5270 } 5271 ip6 = mtod(m0, struct ip6_hdr *); 5272 } 5273 5274 /* 5275 * If the packet is too large for the outgoing interface, 5276 * send back an icmp6 error. 5277 */ 5278 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 5279 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 5280 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 5281 nd6_output(ifp, ifp, m0, dst, NULL); 5282 } else { 5283 in6_ifstat_inc(ifp, ifs6_in_toobig); 5284 if (r->rt != PF_DUPTO) 5285 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); 5286 else 5287 goto bad; 5288 } 5289 5290 done: 5291 if (r->rt != PF_DUPTO) 5292 *m = NULL; 5293 return; 5294 5295 bad: 5296 m_freem(m0); 5297 goto done; 5298 } 5299 #endif /* INET6 */ 5300 5301 5302 /* 5303 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag 5304 * off is the offset where the protocol header starts 5305 * len is the total length of protocol header plus payload 5306 * returns 0 when the checksum is valid, otherwise returns 1. 5307 * if the _OUT flag is set the checksum isn't done yet, consider these ok 5308 */ 5309 int 5310 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, 5311 sa_family_t af) 5312 { 5313 u_int16_t flag_ok, flag_bad, flag_out; 5314 u_int16_t sum; 5315 5316 switch (p) { 5317 case IPPROTO_TCP: 5318 flag_ok = M_TCP_CSUM_IN_OK; 5319 flag_out = M_TCP_CSUM_OUT; 5320 flag_bad = M_TCP_CSUM_IN_BAD; 5321 break; 5322 case IPPROTO_UDP: 5323 flag_ok = M_UDP_CSUM_IN_OK; 5324 flag_out = M_UDP_CSUM_OUT; 5325 flag_bad = M_UDP_CSUM_IN_BAD; 5326 break; 5327 case IPPROTO_ICMP: 5328 #ifdef INET6 5329 case IPPROTO_ICMPV6: 5330 #endif /* INET6 */ 5331 flag_ok = flag_out = flag_bad = 0; 5332 break; 5333 default: 5334 return (1); 5335 } 5336 if (m->m_pkthdr.csum_flags & (flag_ok | flag_out)) 5337 return (0); 5338 if (m->m_pkthdr.csum_flags & flag_bad) 5339 return (1); 5340 if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) 5341 return (1); 5342 if (m->m_pkthdr.len < off + len) 5343 return (1); 5344 switch (af) { 5345 #ifdef INET 5346 case AF_INET: 5347 if (p == IPPROTO_ICMP) { 5348 if (m->m_len < off) 5349 return (1); 5350 m->m_data += off; 5351 m->m_len -= off; 5352 sum = in_cksum(m, len); 5353 m->m_data -= off; 5354 m->m_len += off; 5355 } else { 5356 if (m->m_len < sizeof(struct ip)) 5357 return (1); 5358 sum = in4_cksum(m, p, off, len); 5359 } 5360 break; 5361 #endif /* INET */ 5362 #ifdef INET6 5363 case AF_INET6: 5364 if (m->m_len < sizeof(struct ip6_hdr)) 5365 return (1); 5366 sum = in6_cksum(m, p, off, len); 5367 break; 5368 #endif /* INET6 */ 5369 default: 5370 return (1); 5371 } 5372 if (sum) { 5373 m->m_pkthdr.csum_flags |= flag_bad; 5374 switch (p) { 5375 case IPPROTO_TCP: 5376 tcpstat.tcps_rcvbadsum++; 5377 break; 5378 case IPPROTO_UDP: 5379 udpstat.udps_badsum++; 5380 break; 5381 case IPPROTO_ICMP: 5382 icmpstat.icps_checksum++; 5383 break; 5384 #ifdef INET6 5385 case IPPROTO_ICMPV6: 5386 icmp6stat.icp6s_checksum++; 5387 break; 5388 #endif /* INET6 */ 5389 } 5390 return (1); 5391 } 5392 m->m_pkthdr.csum_flags |= flag_ok; 5393 return (0); 5394 } 5395 5396 struct pf_divert * 5397 pf_find_divert(struct mbuf *m) 5398 { 5399 struct m_tag *mtag; 5400 5401 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 5402 return (NULL); 5403 5404 return ((struct pf_divert *)(mtag + 1)); 5405 } 5406 5407 struct pf_divert * 5408 pf_get_divert(struct mbuf *m) 5409 { 5410 struct m_tag *mtag; 5411 5412 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 5413 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 5414 M_NOWAIT); 5415 if (mtag == NULL) 5416 return (NULL); 5417 bzero(mtag + 1, sizeof(struct pf_divert)); 5418 m_tag_prepend(m, mtag); 5419 } 5420 5421 return ((struct pf_divert *)(mtag + 1)); 5422 } 5423 5424 int 5425 pf_walk_option6(struct mbuf *m, int off, int end, u_int32_t *jumbolen, 5426 u_short *reason) 5427 { 5428 struct ip6_opt opt; 5429 struct ip6_opt_jumbo jumbo; 5430 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5431 5432 while (off < end) { 5433 if (!pf_pull_hdr(m, off, &opt.ip6o_type, sizeof(opt.ip6o_type), 5434 NULL, reason, AF_INET6)) { 5435 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 5436 return (PF_DROP); 5437 } 5438 if (opt.ip6o_type == IP6OPT_PAD1) { 5439 off++; 5440 continue; 5441 } 5442 if (!pf_pull_hdr(m, off, &opt, sizeof(opt), 5443 NULL, reason, AF_INET6)) { 5444 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 5445 return (PF_DROP); 5446 } 5447 if (off + sizeof(opt) + opt.ip6o_len > end) { 5448 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 5449 REASON_SET(reason, PFRES_IPOPTIONS); 5450 return (PF_DROP); 5451 } 5452 switch (opt.ip6o_type) { 5453 case IP6OPT_JUMBO: 5454 if (*jumbolen != 0) { 5455 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 5456 REASON_SET(reason, PFRES_IPOPTIONS); 5457 return (PF_DROP); 5458 } 5459 if (ntohs(h->ip6_plen) != 0) { 5460 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 5461 REASON_SET(reason, PFRES_IPOPTIONS); 5462 return (PF_DROP); 5463 } 5464 if (!pf_pull_hdr(m, off, &jumbo, sizeof(jumbo), 5465 NULL, reason, AF_INET6)) { 5466 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 5467 return (PF_DROP); 5468 } 5469 memcpy(jumbolen, jumbo.ip6oj_jumbo_len, 5470 sizeof(*jumbolen)); 5471 *jumbolen = ntohl(*jumbolen); 5472 if (*jumbolen < IPV6_MAXPACKET) { 5473 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 5474 REASON_SET(reason, PFRES_IPOPTIONS); 5475 return (PF_DROP); 5476 } 5477 break; 5478 default: 5479 break; 5480 } 5481 off += sizeof(opt) + opt.ip6o_len; 5482 } 5483 5484 return (PF_PASS); 5485 } 5486 5487 int 5488 pf_walk_header6(struct mbuf *m, struct ip6_hdr *h, int *off, int *extoff, 5489 int *fragoff, u_int8_t *nxt, u_int32_t *jumbolen, u_short *reason) 5490 { 5491 struct ip6_frag frag; 5492 struct ip6_ext ext; 5493 struct ip6_rthdr rthdr; 5494 int rthdr_cnt = 0; 5495 5496 *off += sizeof(struct ip6_hdr); 5497 *extoff = *fragoff = 0; 5498 *nxt = h->ip6_nxt; 5499 *jumbolen = 0; 5500 for (;;) { 5501 switch (*nxt) { 5502 case IPPROTO_FRAGMENT: 5503 if (*fragoff != 0) { 5504 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 5505 REASON_SET(reason, PFRES_FRAG); 5506 return (PF_DROP); 5507 } 5508 /* jumbo payload packets cannot be fragmented */ 5509 if (*jumbolen != 0) { 5510 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 5511 REASON_SET(reason, PFRES_FRAG); 5512 return (PF_DROP); 5513 } 5514 if (!pf_pull_hdr(m, *off, &frag, sizeof(frag), 5515 NULL, reason, AF_INET6)) { 5516 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 5517 return (PF_DROP); 5518 } 5519 *fragoff = *off; 5520 /* stop walking over non initial fragments */ 5521 if ((frag.ip6f_offlg & IP6F_OFF_MASK) != 0) 5522 return (PF_PASS); 5523 *off += sizeof(frag); 5524 *nxt = frag.ip6f_nxt; 5525 break; 5526 case IPPROTO_ROUTING: 5527 if (rthdr_cnt++) { 5528 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 5529 REASON_SET(reason, PFRES_IPOPTIONS); 5530 return (PF_DROP); 5531 } 5532 if (!pf_pull_hdr(m, *off, &rthdr, sizeof(rthdr), 5533 NULL, reason, AF_INET6)) { 5534 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 5535 /* fragments may be short */ 5536 if (*fragoff != 0) { 5537 *off = *fragoff; 5538 *nxt = IPPROTO_FRAGMENT; 5539 return (PF_PASS); 5540 } 5541 return (PF_DROP); 5542 } 5543 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 5544 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 5545 REASON_SET(reason, PFRES_IPOPTIONS); 5546 return (PF_DROP); 5547 } 5548 /* FALLTHROUGH */ 5549 case IPPROTO_AH: 5550 case IPPROTO_HOPOPTS: 5551 case IPPROTO_DSTOPTS: 5552 if (!pf_pull_hdr(m, *off, &ext, sizeof(ext), 5553 NULL, reason, AF_INET6)) { 5554 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 5555 /* fragments may be short */ 5556 if (*fragoff != 0) { 5557 *off = *fragoff; 5558 *nxt = IPPROTO_FRAGMENT; 5559 return (PF_PASS); 5560 } 5561 return (PF_DROP); 5562 } 5563 /* reassembly needs the ext header before the frag */ 5564 if (*fragoff == 0) 5565 *extoff = *off; 5566 if (*nxt == IPPROTO_HOPOPTS && *fragoff == 0) { 5567 if (pf_walk_option6(m, *off + sizeof(ext), 5568 *off + (ext.ip6e_len + 1) * 8, jumbolen, 5569 reason) != PF_PASS) 5570 return (PF_DROP); 5571 if (ntohs(h->ip6_plen) == 0 && *jumbolen != 0) { 5572 DPFPRINTF(LOG_NOTICE, 5573 "IPv6 missing jumbo"); 5574 REASON_SET(reason, PFRES_IPOPTIONS); 5575 return (PF_DROP); 5576 } 5577 } 5578 if (*nxt == IPPROTO_AH) 5579 *off += (ext.ip6e_len + 2) * 4; 5580 else 5581 *off += (ext.ip6e_len + 1) * 8; 5582 *nxt = ext.ip6e_nxt; 5583 break; 5584 case IPPROTO_TCP: 5585 case IPPROTO_UDP: 5586 case IPPROTO_ICMPV6: 5587 /* fragments may be short, ignore inner header then */ 5588 if (*fragoff != 0 && ntohs(h->ip6_plen) < *off + 5589 (*nxt == IPPROTO_TCP ? sizeof(struct tcphdr) : 5590 *nxt == IPPROTO_UDP ? sizeof(struct udphdr) : 5591 sizeof(struct icmp6_hdr))) { 5592 *off = *fragoff; 5593 *nxt = IPPROTO_FRAGMENT; 5594 } 5595 /* FALLTHROUGH */ 5596 default: 5597 return (PF_PASS); 5598 } 5599 } 5600 } 5601 5602 int 5603 pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, 5604 u_short *action, u_short *reason, struct pfi_kif *kif, struct pf_rule **a, 5605 struct pf_rule **r, struct pf_state **s, struct pf_ruleset **ruleset, 5606 int *off, int *hdrlen) 5607 { 5608 struct mbuf *m = *m0; 5609 5610 if (pd->hdr.any == NULL) 5611 panic("pf_setup_pdesc: no storage for headers provided"); 5612 5613 *hdrlen = 0; 5614 pd->af = af; 5615 switch (af) { 5616 #ifdef INET 5617 case AF_INET: { 5618 struct ip *h; 5619 5620 /* Check for illegal packets */ 5621 if (m->m_pkthdr.len < (int)sizeof(struct ip)) { 5622 *action = PF_DROP; 5623 REASON_SET(reason, PFRES_SHORT); 5624 return (-1); 5625 } 5626 5627 h = mtod(m, struct ip *); 5628 *off = h->ip_hl << 2; 5629 5630 if (*off < (int)sizeof(struct ip) || 5631 *off > ntohs(h->ip_len) || 5632 m->m_pkthdr.len < ntohs(h->ip_len)) { 5633 *action = PF_DROP; 5634 REASON_SET(reason, PFRES_SHORT); 5635 return (-1); 5636 } 5637 5638 /* packet reassembly */ 5639 if (h->ip_off & htons(IP_MF | IP_OFFMASK) && 5640 pf_normalize_ip(m0, dir, reason) != PF_PASS) { 5641 *action = PF_DROP; 5642 return (-1); 5643 } 5644 m = *m0; 5645 if (m == NULL) { 5646 /* packet sits in reassembly queue, no error */ 5647 *action = PF_PASS; 5648 return (-1); 5649 } 5650 /* refetch header, recalc offset and update pd */ 5651 h = mtod(m, struct ip *); 5652 *off = h->ip_hl << 2; 5653 5654 pd->src = (struct pf_addr *)&h->ip_src; 5655 pd->dst = (struct pf_addr *)&h->ip_dst; 5656 pd->sport = pd->dport = NULL; 5657 pd->ip_sum = &h->ip_sum; 5658 pd->proto_sum = NULL; 5659 pd->virtual_proto = pd->proto = h->ip_p; 5660 pd->dir = dir; 5661 pd->sidx = (dir == PF_IN) ? 0 : 1; 5662 pd->didx = (dir == PF_IN) ? 1 : 0; 5663 pd->tos = h->ip_tos; 5664 pd->tot_len = ntohs(h->ip_len); 5665 pd->rdomain = rtable_l2(m->m_pkthdr.rdomain); 5666 if (h->ip_hl > 5) /* has options */ 5667 pd->badopts++; 5668 5669 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { 5670 /* 5671 * handle fragments that aren't reassembled by 5672 * normalization 5673 */ 5674 pd->virtual_proto = PF_VPROTO_FRAGMENT; 5675 if (kif == NULL || r == NULL) /* pflog */ 5676 *action = PF_DROP; 5677 else 5678 *action = pf_test_rule(r, s, dir, kif, 5679 m, *off, pd, a, ruleset, *hdrlen); 5680 if (*action != PF_PASS) 5681 REASON_SET(reason, PFRES_FRAG); 5682 return (-1); 5683 } 5684 break; 5685 } 5686 #endif 5687 #ifdef INET6 5688 case AF_INET6: { 5689 struct ip6_hdr *h; 5690 int extoff, fragoff; 5691 u_int32_t jumbolen; 5692 u_int8_t nxt; 5693 5694 /* Check for illegal packets */ 5695 if (m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 5696 *action = PF_DROP; 5697 REASON_SET(reason, PFRES_SHORT); 5698 return (-1); 5699 } 5700 5701 h = mtod(m, struct ip6_hdr *); 5702 *off = 0; 5703 5704 if (m->m_pkthdr.len < 5705 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 5706 *action = PF_DROP; 5707 REASON_SET(reason, PFRES_SHORT); 5708 return (-1); 5709 } 5710 5711 if (pf_walk_header6(m, h, off, &extoff, &fragoff, &nxt, 5712 &jumbolen, reason) != PF_PASS) { 5713 *action = PF_DROP; 5714 return (-1); 5715 } 5716 5717 if (pf_status.reass && fragoff != 0) { 5718 /* packet reassembly */ 5719 if (pf_normalize_ip6(m0, dir, fragoff, extoff, reason) 5720 != PF_PASS) { 5721 *action = PF_DROP; 5722 return (-1); 5723 } 5724 m = *m0; 5725 if (m == NULL) { 5726 /* packet sits in reassembly queue, no error */ 5727 *action = PF_PASS; 5728 return (-1); 5729 } 5730 5731 /* refetch header, recalc offset, then update pd */ 5732 h = mtod(m, struct ip6_hdr *); 5733 *off = 0; 5734 5735 if (pf_walk_header6(m, h, off, &extoff, &fragoff, &nxt, 5736 &jumbolen, reason) != PF_PASS) { 5737 *action = PF_DROP; 5738 return (-1); 5739 } 5740 } 5741 5742 #if 1 5743 /* 5744 * we do not support jumbogram yet. if we keep going, zero 5745 * ip6_plen will do something bad, so drop the packet for now. 5746 */ 5747 if (jumbolen != 0) { 5748 *action = PF_DROP; 5749 REASON_SET(reason, PFRES_NORM); 5750 return (-1); 5751 } 5752 #endif 5753 5754 pd->src = (struct pf_addr *)&h->ip6_src; 5755 pd->dst = (struct pf_addr *)&h->ip6_dst; 5756 pd->sport = pd->dport = NULL; 5757 pd->ip_sum = NULL; 5758 pd->proto_sum = NULL; 5759 pd->dir = dir; 5760 pd->sidx = (dir == PF_IN) ? 0 : 1; 5761 pd->didx = (dir == PF_IN) ? 1 : 0; 5762 pd->tos = 0; 5763 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5764 pd->virtual_proto = pd->proto = nxt; 5765 5766 if (fragoff != 0) { 5767 /* 5768 * handle fragments that aren't reassembled by 5769 * normalization 5770 */ 5771 pd->virtual_proto = PF_VPROTO_FRAGMENT; 5772 if (kif == NULL || r == NULL) /* pflog */ 5773 *action = PF_DROP; 5774 else 5775 *action = pf_test_rule(r, s, dir, kif, 5776 m, *off, pd, a, ruleset, *hdrlen); 5777 if (*action != PF_PASS) 5778 REASON_SET(reason, PFRES_FRAG); 5779 return (-1); 5780 } 5781 break; 5782 } 5783 #endif 5784 default: 5785 panic("pf_setup_pdesc called with illegal af %u", af); 5786 5787 } 5788 5789 switch (pd->proto) { 5790 case IPPROTO_TCP: { 5791 struct tcphdr *th = pd->hdr.tcp; 5792 5793 if (!pf_pull_hdr(m, *off, th, sizeof(*th), action, reason, af)) 5794 return (-1); 5795 *hdrlen = sizeof(*th); 5796 pd->p_len = pd->tot_len - *off - (th->th_off << 2); 5797 pd->sport = &th->th_sport; 5798 pd->dport = &th->th_dport; 5799 break; 5800 } 5801 case IPPROTO_UDP: { 5802 struct udphdr *uh = pd->hdr.udp; 5803 5804 if (!pf_pull_hdr(m, *off, uh, sizeof(*uh), action, reason, af)) 5805 return (-1); 5806 *hdrlen = sizeof(*uh); 5807 if (uh->uh_dport == 0 || 5808 ntohs(uh->uh_ulen) > m->m_pkthdr.len - *off || 5809 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 5810 *action = PF_DROP; 5811 REASON_SET(reason, PFRES_SHORT); 5812 return (-1); 5813 } 5814 pd->sport = &uh->uh_sport; 5815 pd->dport = &uh->uh_dport; 5816 break; 5817 } 5818 case IPPROTO_ICMP: { 5819 if (!pf_pull_hdr(m, *off, pd->hdr.icmp, ICMP_MINLEN, 5820 action, reason, af)) 5821 return (-1); 5822 *hdrlen = ICMP_MINLEN; 5823 break; 5824 } 5825 #ifdef INET6 5826 case IPPROTO_ICMPV6: { 5827 size_t icmp_hlen = sizeof(struct icmp6_hdr); 5828 5829 if (!pf_pull_hdr(m, *off, pd->hdr.icmp6, icmp_hlen, 5830 action, reason, af)) 5831 return (-1); 5832 /* ICMP headers we look further into to match state */ 5833 switch (pd->hdr.icmp6->icmp6_type) { 5834 case MLD_LISTENER_QUERY: 5835 case MLD_LISTENER_REPORT: 5836 icmp_hlen = sizeof(struct mld_hdr); 5837 break; 5838 case ND_NEIGHBOR_SOLICIT: 5839 case ND_NEIGHBOR_ADVERT: 5840 icmp_hlen = sizeof(struct nd_neighbor_solicit); 5841 break; 5842 } 5843 if (icmp_hlen > sizeof(struct icmp6_hdr) && 5844 !pf_pull_hdr(m, *off, pd->hdr.icmp6, icmp_hlen, 5845 action, reason, af)) 5846 return (-1); 5847 *hdrlen = icmp_hlen; 5848 break; 5849 } 5850 #endif /* INET6 */ 5851 } 5852 return (0); 5853 } 5854 5855 void 5856 pf_counters_inc(int dir, int action, struct pf_pdesc *pd, 5857 struct pfi_kif *kif, struct pf_state *s, 5858 struct pf_rule *r, struct pf_rule *a) 5859 { 5860 int dirndx; 5861 kif->pfik_bytes[pd->af == AF_INET6][dir == PF_OUT][action != PF_PASS] 5862 += pd->tot_len; 5863 kif->pfik_packets[pd->af == AF_INET6][dir == PF_OUT][action != PF_PASS]++; 5864 5865 if (action == PF_PASS || r->action == PF_DROP) { 5866 dirndx = (dir == PF_OUT); 5867 r->packets[dirndx]++; 5868 r->bytes[dirndx] += pd->tot_len; 5869 if (a != NULL) { 5870 a->packets[dirndx]++; 5871 a->bytes[dirndx] += pd->tot_len; 5872 } 5873 if (s != NULL) { 5874 struct pf_rule_item *ri; 5875 struct pf_sn_item *sni; 5876 5877 SLIST_FOREACH(sni, &s->src_nodes, next) { 5878 sni->sn->packets[dirndx]++; 5879 sni->sn->bytes[dirndx] += pd->tot_len; 5880 } 5881 dirndx = (dir == s->direction) ? 0 : 1; 5882 s->packets[dirndx]++; 5883 s->bytes[dirndx] += pd->tot_len; 5884 5885 SLIST_FOREACH(ri, &s->match_rules, entry) { 5886 ri->r->packets[dirndx]++; 5887 ri->r->bytes[dirndx] += pd->tot_len; 5888 } 5889 } 5890 if (r->src.addr.type == PF_ADDR_TABLE) 5891 pfr_update_stats(r->src.addr.p.tbl, 5892 (s == NULL) ? pd->src : 5893 &s->key[(s->direction == PF_IN)]-> 5894 addr[(s->direction == PF_OUT)], 5895 pd->af, pd->tot_len, dir == PF_OUT, 5896 r->action == PF_PASS, r->src.neg); 5897 if (r->dst.addr.type == PF_ADDR_TABLE) 5898 pfr_update_stats(r->dst.addr.p.tbl, 5899 (s == NULL) ? pd->dst : 5900 &s->key[(s->direction == PF_IN)]-> 5901 addr[(s->direction == PF_IN)], 5902 pd->af, pd->tot_len, dir == PF_OUT, 5903 r->action == PF_PASS, r->dst.neg); 5904 } 5905 } 5906 5907 int 5908 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0, 5909 struct ether_header *eh) 5910 { 5911 struct pfi_kif *kif; 5912 u_short action, reason = 0; 5913 struct mbuf *m = *m0; 5914 struct pf_rule *a = NULL, *r = &pf_default_rule; 5915 struct pf_state *s = NULL; 5916 struct pf_ruleset *ruleset = NULL; 5917 struct pf_pdesc pd; 5918 union pf_headers hdrs; 5919 int off, hdrlen; 5920 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 5921 u_int32_t qid, pqid = 0; 5922 5923 if (!pf_status.running) 5924 return (PF_PASS); 5925 5926 memset(&pd, 0, sizeof(pd)); 5927 pd.hdr.any = &hdrs; 5928 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 5929 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 5930 else 5931 kif = (struct pfi_kif *)ifp->if_pf_kif; 5932 5933 if (kif == NULL) { 5934 DPFPRINTF(LOG_ERR, 5935 "pf_test: kif == NULL, if_xname %s", ifp->if_xname); 5936 return (PF_DROP); 5937 } 5938 if (kif->pfik_flags & PFI_IFLAG_SKIP) 5939 return (PF_PASS); 5940 5941 #ifdef DIAGNOSTIC 5942 if ((m->m_flags & M_PKTHDR) == 0) 5943 panic("non-M_PKTHDR is passed to pf_test"); 5944 #endif /* DIAGNOSTIC */ 5945 5946 if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) 5947 return (PF_PASS); 5948 5949 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) 5950 return (PF_PASS); 5951 5952 if (m->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 5953 m->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 5954 return (PF_PASS); 5955 } 5956 5957 if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason, kif, &a, &r, &s, 5958 &ruleset, &off, &hdrlen) == -1) { 5959 if (action == PF_PASS) 5960 return (PF_PASS); 5961 m = *m0; 5962 pd.pflog |= PF_LOG_FORCE; 5963 goto done; 5964 } 5965 pd.eh = eh; 5966 m = *m0; /* pf_setup_pdesc -> pf_normalize messes with m0 */ 5967 5968 switch (pd.proto) { 5969 5970 case IPPROTO_TCP: { 5971 if ((pd.hdr.tcp->th_flags & TH_ACK) && pd.p_len == 0) 5972 pqid = 1; 5973 action = pf_normalize_tcp(dir, m, off, &pd); 5974 if (action == PF_DROP) 5975 goto done; 5976 action = pf_test_state_tcp(&s, dir, kif, m, off, &pd, 5977 &reason); 5978 if (action == PF_PASS) { 5979 #if NPFSYNC > 0 5980 pfsync_update_state(s); 5981 #endif /* NPFSYNC */ 5982 r = s->rule.ptr; 5983 a = s->anchor.ptr; 5984 pd.pflog |= s->log; 5985 } else if (s == NULL) 5986 action = pf_test_rule(&r, &s, dir, kif, 5987 m, off, &pd, &a, &ruleset, hdrlen); 5988 5989 if (s) { 5990 if (s->max_mss) 5991 pf_normalize_mss(m, off, &pd, s->max_mss); 5992 } else if (r->max_mss) 5993 pf_normalize_mss(m, off, &pd, r->max_mss); 5994 5995 break; 5996 } 5997 5998 case IPPROTO_UDP: { 5999 action = pf_test_state_udp(&s, dir, kif, m, off, &pd); 6000 if (action == PF_PASS) { 6001 #if NPFSYNC > 0 6002 pfsync_update_state(s); 6003 #endif /* NPFSYNC */ 6004 r = s->rule.ptr; 6005 a = s->anchor.ptr; 6006 pd.pflog |= s->log; 6007 } else if (s == NULL) 6008 action = pf_test_rule(&r, &s, dir, kif, 6009 m, off, &pd, &a, &ruleset, hdrlen); 6010 break; 6011 } 6012 6013 case IPPROTO_ICMP: { 6014 if (af != AF_INET) { 6015 action = PF_DROP; 6016 REASON_SET(&reason, PFRES_NORM); 6017 DPFPRINTF(LOG_NOTICE, 6018 "dropping IPv6 packet with ICMPv4 payload"); 6019 goto done; 6020 } 6021 action = pf_test_state_icmp(&s, dir, kif, m, off, &pd, 6022 &reason); 6023 if (action == PF_PASS) { 6024 #if NPFSYNC > 0 6025 pfsync_update_state(s); 6026 #endif /* NPFSYNC */ 6027 r = s->rule.ptr; 6028 a = s->anchor.ptr; 6029 pd.pflog |= s->log; 6030 } else if (s == NULL) 6031 action = pf_test_rule(&r, &s, dir, kif, 6032 m, off, &pd, &a, &ruleset, hdrlen); 6033 break; 6034 } 6035 6036 case IPPROTO_ICMPV6: { 6037 if (af != AF_INET6) { 6038 action = PF_DROP; 6039 REASON_SET(&reason, PFRES_NORM); 6040 DPFPRINTF(LOG_NOTICE, 6041 "dropping IPv4 packet with ICMPv6 payload"); 6042 goto done; 6043 } 6044 action = pf_test_state_icmp(&s, dir, kif, m, off, &pd, 6045 &reason); 6046 if (action == PF_PASS) { 6047 #if NPFSYNC > 0 6048 pfsync_update_state(s); 6049 #endif /* NPFSYNC */ 6050 r = s->rule.ptr; 6051 a = s->anchor.ptr; 6052 pd.pflog |= s->log; 6053 } else if (s == NULL) 6054 action = pf_test_rule(&r, &s, dir, kif, 6055 m, off, &pd, &a, &ruleset, hdrlen); 6056 break; 6057 } 6058 6059 default: 6060 action = pf_test_state_other(&s, dir, kif, m, &pd); 6061 if (action == PF_PASS) { 6062 #if NPFSYNC > 0 6063 pfsync_update_state(s); 6064 #endif /* NPFSYNC */ 6065 r = s->rule.ptr; 6066 a = s->anchor.ptr; 6067 pd.pflog |= s->log; 6068 } else if (s == NULL) 6069 action = pf_test_rule(&r, &s, dir, kif, m, off, 6070 &pd, &a, &ruleset, hdrlen); 6071 break; 6072 } 6073 6074 done: 6075 if (action != PF_DROP) { 6076 if (s) { 6077 /* The non-state case is handled in pf_test_rule() */ 6078 if (action == PF_PASS && pd.badopts && 6079 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 6080 action = PF_DROP; 6081 REASON_SET(&reason, PFRES_IPOPTIONS); 6082 pd.pflog |= PF_LOG_FORCE; 6083 DPFPRINTF(LOG_NOTICE, "dropping packet with " 6084 "ip/ipv6 options in pf_test()"); 6085 } 6086 6087 pf_scrub(m, s->state_flags, pd.af, s->min_ttl, 6088 s->set_tos); 6089 pf_tag_packet(m, s->tag, s->rtableid[pd.didx]); 6090 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 6091 qid = s->pqid; 6092 if (s->prio[1] != PF_PRIO_NOTSET) 6093 m->m_pkthdr.pf.prio = s->prio[1]; 6094 } else { 6095 qid = s->qid; 6096 if (s->prio[0] != PF_PRIO_NOTSET) 6097 m->m_pkthdr.pf.prio = s->prio[0]; 6098 } 6099 } else { 6100 pf_scrub(m, r->scrub_flags, pd.af, r->min_ttl, 6101 r->set_tos); 6102 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 6103 qid = r->pqid; 6104 if (r->prio[1] != PF_PRIO_NOTSET) 6105 m->m_pkthdr.pf.prio = r->prio[1]; 6106 } else { 6107 qid = r->qid; 6108 if (r->prio[0] != PF_PRIO_NOTSET) 6109 m->m_pkthdr.pf.prio = r->prio[0]; 6110 } 6111 } 6112 } 6113 6114 if (dir == PF_IN && s && s->key[PF_SK_STACK]) 6115 m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 6116 6117 #ifdef ALTQ 6118 if (action == PF_PASS && qid) { 6119 m->m_pkthdr.pf.qid = qid; 6120 m->m_pkthdr.pf.hdr = mtod(m, caddr_t); /* hints for ecn */ 6121 } 6122 #endif /* ALTQ */ 6123 6124 /* 6125 * connections redirected to loopback should not match sockets 6126 * bound specifically to loopback due to security implications, 6127 * see tcp_input() and in_pcblookup_listen(). 6128 */ 6129 if (pd.destchg) 6130 if ((af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 6131 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || (af == AF_INET6 && 6132 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 6133 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 6134 /* We need to redo the route lookup on outgoing routes. */ 6135 if (pd.destchg && dir == PF_OUT) 6136 m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 6137 6138 if (dir == PF_IN && action == PF_PASS && r->divert.port) { 6139 struct pf_divert *divert; 6140 6141 if ((divert = pf_get_divert(m))) { 6142 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 6143 divert->port = r->divert.port; 6144 divert->rdomain = pd.rdomain; 6145 divert->addr = r->divert.addr; 6146 } 6147 } 6148 6149 if (action == PF_PASS && r->divert_packet.port) { 6150 struct pf_divert *divert; 6151 6152 if ((divert = pf_get_divert(m))) 6153 divert->port = r->divert_packet.port; 6154 6155 action = PF_DIVERT; 6156 } 6157 6158 if (pd.pflog) { 6159 struct pf_rule_item *ri; 6160 6161 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 6162 PFLOG_PACKET(kif, m, dir, reason, r, a, ruleset, &pd); 6163 if (s) { 6164 SLIST_FOREACH(ri, &s->match_rules, entry) 6165 if (ri->r->log & PF_LOG_ALL) 6166 PFLOG_PACKET(kif, m, dir, reason, 6167 ri->r, a, ruleset, &pd); 6168 } 6169 } 6170 6171 pf_counters_inc(dir, action, &pd, kif, s, r, a); 6172 6173 switch (action) { 6174 case PF_SYNPROXY_DROP: 6175 m_freem(*m0); 6176 case PF_DEFER: 6177 *m0 = NULL; 6178 action = PF_PASS; 6179 break; 6180 case PF_DIVERT: 6181 if (af == AF_INET) 6182 divert_packet(m, dir); 6183 #ifdef INET6 6184 if (af == AF_INET6) 6185 divert6_packet(m, dir); 6186 #endif 6187 *m0 = NULL; 6188 action = PF_PASS; 6189 break; 6190 default: 6191 /* pf_route can free the mbuf causing *m0 to become NULL */ 6192 if (r->rt) { 6193 if (af == AF_INET) 6194 pf_route(m0, r, dir, kif->pfik_ifp, s); 6195 #ifdef INET6 6196 if (af == AF_INET6) 6197 pf_route6(m0, r, dir, kif->pfik_ifp, s); 6198 #endif 6199 } 6200 break; 6201 } 6202 6203 #ifdef INET6 6204 /* if reassembled packet passed, create new fragments */ 6205 if (pf_status.reass && action == PF_PASS && *m0 && fwdir == PF_FWD) { 6206 struct m_tag *mtag; 6207 if ((mtag = m_tag_find(m, PACKET_TAG_PF_REASSEMBLED, NULL))) 6208 action = pf_refragment6(m0, mtag, fwdir); 6209 } 6210 #endif 6211 6212 return (action); 6213 } 6214 6215 int 6216 pf_check_congestion(struct ifqueue *ifq) 6217 { 6218 if (ifq->ifq_congestion) 6219 return (1); 6220 else 6221 return (0); 6222 } 6223 6224 /* 6225 * must be called whenever any addressing information such as 6226 * address, port, protocol has changed 6227 */ 6228 void 6229 pf_pkt_addr_changed(struct mbuf *m) 6230 { 6231 m->m_pkthdr.pf.statekey = NULL; 6232 } 6233