1 /* $OpenBSD: pf.c,v 1.600 2008/06/26 03:56:20 mcbride Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "pflog.h" 40 #include "pfsync.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/mbuf.h> 45 #include <sys/filio.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/kernel.h> 49 #include <sys/time.h> 50 #include <sys/pool.h> 51 #include <sys/proc.h> 52 #include <sys/rwlock.h> 53 54 #include <crypto/md5.h> 55 56 #include <net/if.h> 57 #include <net/if_types.h> 58 #include <net/bpf.h> 59 #include <net/route.h> 60 #include <net/radix_mpath.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_var.h> 64 #include <netinet/in_systm.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip_var.h> 67 #include <netinet/tcp.h> 68 #include <netinet/tcp_seq.h> 69 #include <netinet/udp.h> 70 #include <netinet/ip_icmp.h> 71 #include <netinet/in_pcb.h> 72 #include <netinet/tcp_timer.h> 73 #include <netinet/tcp_var.h> 74 #include <netinet/udp_var.h> 75 #include <netinet/icmp_var.h> 76 #include <netinet/if_ether.h> 77 78 #include <dev/rndvar.h> 79 #include <net/pfvar.h> 80 #include <net/if_pflog.h> 81 82 #if NPFSYNC > 0 83 #include <net/if_pfsync.h> 84 #endif /* NPFSYNC > 0 */ 85 86 #ifdef INET6 87 #include <netinet/ip6.h> 88 #include <netinet/in_pcb.h> 89 #include <netinet/icmp6.h> 90 #include <netinet6/nd6.h> 91 #endif /* INET6 */ 92 93 94 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x 95 96 /* 97 * Global variables 98 */ 99 100 /* state tables */ 101 struct pf_state_tree pf_statetbl; 102 103 struct pf_altqqueue pf_altqs[2]; 104 struct pf_palist pf_pabuf; 105 struct pf_altqqueue *pf_altqs_active; 106 struct pf_altqqueue *pf_altqs_inactive; 107 struct pf_status pf_status; 108 109 u_int32_t ticket_altqs_active; 110 u_int32_t ticket_altqs_inactive; 111 int altqs_inactive_open; 112 u_int32_t ticket_pabuf; 113 114 MD5_CTX pf_tcp_secret_ctx; 115 u_char pf_tcp_secret[16]; 116 int pf_tcp_secret_init; 117 int pf_tcp_iss_off; 118 119 struct pf_anchor_stackframe { 120 struct pf_ruleset *rs; 121 struct pf_rule *r; 122 struct pf_anchor_node *parent; 123 struct pf_anchor *child; 124 } pf_anchor_stack[64]; 125 126 struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl; 127 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 128 struct pool pf_altq_pl; 129 130 void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); 131 132 void pf_init_threshold(struct pf_threshold *, u_int32_t, 133 u_int32_t); 134 void pf_add_threshold(struct pf_threshold *); 135 int pf_check_threshold(struct pf_threshold *); 136 137 void pf_change_ap(struct pf_addr *, u_int16_t *, 138 u_int16_t *, u_int16_t *, struct pf_addr *, 139 u_int16_t, u_int8_t, sa_family_t); 140 int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, 141 struct tcphdr *, struct pf_state_peer *); 142 #ifdef INET6 143 void pf_change_a6(struct pf_addr *, u_int16_t *, 144 struct pf_addr *, u_int8_t); 145 #endif /* INET6 */ 146 void pf_change_icmp(struct pf_addr *, u_int16_t *, 147 struct pf_addr *, struct pf_addr *, u_int16_t, 148 u_int16_t *, u_int16_t *, u_int16_t *, 149 u_int16_t *, u_int8_t, sa_family_t); 150 void pf_send_tcp(const struct pf_rule *, sa_family_t, 151 const struct pf_addr *, const struct pf_addr *, 152 u_int16_t, u_int16_t, u_int32_t, u_int32_t, 153 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, 154 u_int16_t, struct ether_header *, struct ifnet *); 155 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 156 sa_family_t, struct pf_rule *); 157 struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, 158 int, int, struct pfi_kif *, 159 struct pf_addr *, u_int16_t, struct pf_addr *, 160 u_int16_t, int); 161 struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, 162 int, int, struct pfi_kif *, struct pf_src_node **, 163 struct pf_state_key **, struct pf_state_key **, 164 struct pf_state_key **, struct pf_state_key **, 165 struct pf_addr *, struct pf_addr *, 166 u_int16_t, u_int16_t); 167 void pf_detach_state(struct pf_state *, int); 168 struct pf_state_key *pf_state_key_insert(struct pf_state_key *, 169 struct pf_state *); 170 int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *, 171 struct pf_state_key **, struct pf_state_key **, 172 struct pf_state_key **, struct pf_state_key **, 173 struct pf_addr *, struct pf_addr *, 174 u_int16_t, u_int16_t); 175 void pf_state_key_detach(struct pf_state_key *, 176 struct pf_state *, int); 177 u_int32_t pf_tcp_iss(struct pf_pdesc *); 178 int pf_test_rule(struct pf_rule **, struct pf_state **, 179 int, struct pfi_kif *, struct mbuf *, int, 180 void *, struct pf_pdesc *, struct pf_rule **, 181 struct pf_ruleset **, struct ifqueue *); 182 static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, 183 struct pf_rule *, struct pf_pdesc *, 184 struct pf_src_node *, struct pf_state_key *, 185 struct pf_state_key *, struct pf_state_key *, 186 struct pf_state_key *, struct mbuf *, int, 187 u_int16_t, u_int16_t, int *, struct pfi_kif *, 188 struct pf_state **, int, u_int16_t, u_int16_t, 189 int); 190 int pf_test_fragment(struct pf_rule **, int, 191 struct pfi_kif *, struct mbuf *, void *, 192 struct pf_pdesc *, struct pf_rule **, 193 struct pf_ruleset **); 194 int pf_tcp_track_full(struct pf_state_peer *, 195 struct pf_state_peer *, struct pf_state **, 196 struct pfi_kif *, struct mbuf *, int, 197 struct pf_pdesc *, u_short *, int *); 198 int pf_tcp_track_sloppy(struct pf_state_peer *, 199 struct pf_state_peer *, struct pf_state **, 200 struct pf_pdesc *, u_short *); 201 int pf_test_state_tcp(struct pf_state **, int, 202 struct pfi_kif *, struct mbuf *, int, 203 void *, struct pf_pdesc *, u_short *); 204 int pf_test_state_udp(struct pf_state **, int, 205 struct pfi_kif *, struct mbuf *, int, 206 void *, struct pf_pdesc *); 207 int pf_test_state_icmp(struct pf_state **, int, 208 struct pfi_kif *, struct mbuf *, int, 209 void *, struct pf_pdesc *, u_short *); 210 int pf_test_state_other(struct pf_state **, int, 211 struct pfi_kif *, struct mbuf *, struct pf_pdesc *); 212 void pf_step_into_anchor(int *, struct pf_ruleset **, int, 213 struct pf_rule **, struct pf_rule **, int *); 214 int pf_step_out_of_anchor(int *, struct pf_ruleset **, 215 int, struct pf_rule **, struct pf_rule **, 216 int *); 217 void pf_hash(struct pf_addr *, struct pf_addr *, 218 struct pf_poolhashkey *, sa_family_t); 219 int pf_map_addr(u_int8_t, struct pf_rule *, 220 struct pf_addr *, struct pf_addr *, 221 struct pf_addr *, struct pf_src_node **); 222 int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, 223 struct pf_addr *, struct pf_addr *, u_int16_t, 224 struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, 225 struct pf_src_node **); 226 void pf_route(struct mbuf **, struct pf_rule *, int, 227 struct ifnet *, struct pf_state *, 228 struct pf_pdesc *); 229 void pf_route6(struct mbuf **, struct pf_rule *, int, 230 struct ifnet *, struct pf_state *, 231 struct pf_pdesc *); 232 int pf_socket_lookup(int, struct pf_pdesc *); 233 u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, 234 sa_family_t); 235 u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, 236 sa_family_t); 237 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, 238 u_int16_t); 239 void pf_set_rt_ifp(struct pf_state *, 240 struct pf_addr *); 241 int pf_check_proto_cksum(struct mbuf *, int, int, 242 u_int8_t, sa_family_t); 243 struct pf_divert *pf_get_divert(struct mbuf *); 244 void pf_print_state_parts(struct pf_state *, 245 struct pf_state_key *, struct pf_state_key *); 246 int pf_addr_wrap_neq(struct pf_addr_wrap *, 247 struct pf_addr_wrap *); 248 struct pf_state *pf_find_state(struct pfi_kif *, 249 struct pf_state_key_cmp *, u_int, struct mbuf *); 250 int pf_src_connlimit(struct pf_state **); 251 void pf_keyins_err(struct pf_state *, struct pf_state_key *, 252 struct pf_state_key *, char *, u_int8_t); 253 int pf_check_congestion(struct ifqueue *); 254 255 extern struct pool pfr_ktable_pl; 256 extern struct pool pfr_kentry_pl; 257 258 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 259 { &pf_state_pl, PFSTATE_HIWAT }, 260 { &pf_src_tree_pl, PFSNODE_HIWAT }, 261 { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, 262 { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, 263 { &pfr_kentry_pl, PFR_KENTRY_HIWAT } 264 }; 265 266 #define STATE_LOOKUP(i, k, d, s, m) \ 267 do { \ 268 s = pf_find_state(i, k, d, m); \ 269 if (s == NULL || (s)->timeout == PFTM_PURGE) \ 270 return (PF_DROP); \ 271 if (d == PF_OUT && \ 272 (((s)->rule.ptr->rt == PF_ROUTETO && \ 273 (s)->rule.ptr->direction == PF_OUT) || \ 274 ((s)->rule.ptr->rt == PF_REPLYTO && \ 275 (s)->rule.ptr->direction == PF_IN)) && \ 276 (s)->rt_kif != NULL && \ 277 (s)->rt_kif != i) \ 278 return (PF_PASS); \ 279 } while (0) 280 281 #define BOUND_IFACE(r, k) \ 282 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 283 284 #define STATE_INC_COUNTERS(s) \ 285 do { \ 286 s->rule.ptr->states_cur++; \ 287 s->rule.ptr->states_tot++; \ 288 if (s->anchor.ptr != NULL) { \ 289 s->anchor.ptr->states_cur++; \ 290 s->anchor.ptr->states_tot++; \ 291 } \ 292 if (s->nat_rule.ptr != NULL) { \ 293 s->nat_rule.ptr->states_cur++; \ 294 s->nat_rule.ptr->states_tot++; \ 295 } \ 296 } while (0) 297 298 #define STATE_DEC_COUNTERS(s) \ 299 do { \ 300 if (s->nat_rule.ptr != NULL) \ 301 s->nat_rule.ptr->states_cur--; \ 302 if (s->anchor.ptr != NULL) \ 303 s->anchor.ptr->states_cur--; \ 304 s->rule.ptr->states_cur--; \ 305 } while (0) 306 307 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 308 static __inline int pf_state_compare_key(struct pf_state_key *, 309 struct pf_state_key *); 310 static __inline int pf_state_compare_id(struct pf_state *, 311 struct pf_state *); 312 313 struct pf_src_tree tree_src_tracking; 314 315 struct pf_state_tree_id tree_id; 316 struct pf_state_queue state_list; 317 318 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 319 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 320 RB_GENERATE(pf_state_tree_id, pf_state, 321 entry_id, pf_state_compare_id); 322 323 #define PF_DT_SKIP_STATETREE 0x01 324 325 static __inline int 326 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 327 { 328 int diff; 329 330 if (a->rule.ptr > b->rule.ptr) 331 return (1); 332 if (a->rule.ptr < b->rule.ptr) 333 return (-1); 334 if ((diff = a->af - b->af) != 0) 335 return (diff); 336 switch (a->af) { 337 #ifdef INET 338 case AF_INET: 339 if (a->addr.addr32[0] > b->addr.addr32[0]) 340 return (1); 341 if (a->addr.addr32[0] < b->addr.addr32[0]) 342 return (-1); 343 break; 344 #endif /* INET */ 345 #ifdef INET6 346 case AF_INET6: 347 if (a->addr.addr32[3] > b->addr.addr32[3]) 348 return (1); 349 if (a->addr.addr32[3] < b->addr.addr32[3]) 350 return (-1); 351 if (a->addr.addr32[2] > b->addr.addr32[2]) 352 return (1); 353 if (a->addr.addr32[2] < b->addr.addr32[2]) 354 return (-1); 355 if (a->addr.addr32[1] > b->addr.addr32[1]) 356 return (1); 357 if (a->addr.addr32[1] < b->addr.addr32[1]) 358 return (-1); 359 if (a->addr.addr32[0] > b->addr.addr32[0]) 360 return (1); 361 if (a->addr.addr32[0] < b->addr.addr32[0]) 362 return (-1); 363 break; 364 #endif /* INET6 */ 365 } 366 return (0); 367 } 368 369 #ifdef INET6 370 void 371 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 372 { 373 switch (af) { 374 #ifdef INET 375 case AF_INET: 376 dst->addr32[0] = src->addr32[0]; 377 break; 378 #endif /* INET */ 379 case AF_INET6: 380 dst->addr32[0] = src->addr32[0]; 381 dst->addr32[1] = src->addr32[1]; 382 dst->addr32[2] = src->addr32[2]; 383 dst->addr32[3] = src->addr32[3]; 384 break; 385 } 386 } 387 #endif /* INET6 */ 388 389 void 390 pf_init_threshold(struct pf_threshold *threshold, 391 u_int32_t limit, u_int32_t seconds) 392 { 393 threshold->limit = limit * PF_THRESHOLD_MULT; 394 threshold->seconds = seconds; 395 threshold->count = 0; 396 threshold->last = time_second; 397 } 398 399 void 400 pf_add_threshold(struct pf_threshold *threshold) 401 { 402 u_int32_t t = time_second, diff = t - threshold->last; 403 404 if (diff >= threshold->seconds) 405 threshold->count = 0; 406 else 407 threshold->count -= threshold->count * diff / 408 threshold->seconds; 409 threshold->count += PF_THRESHOLD_MULT; 410 threshold->last = t; 411 } 412 413 int 414 pf_check_threshold(struct pf_threshold *threshold) 415 { 416 return (threshold->count > threshold->limit); 417 } 418 419 int 420 pf_src_connlimit(struct pf_state **state) 421 { 422 int bad = 0; 423 424 (*state)->src_node->conn++; 425 (*state)->src.tcp_est = 1; 426 pf_add_threshold(&(*state)->src_node->conn_rate); 427 428 if ((*state)->rule.ptr->max_src_conn && 429 (*state)->rule.ptr->max_src_conn < 430 (*state)->src_node->conn) { 431 pf_status.lcounters[LCNT_SRCCONN]++; 432 bad++; 433 } 434 435 if ((*state)->rule.ptr->max_src_conn_rate.limit && 436 pf_check_threshold(&(*state)->src_node->conn_rate)) { 437 pf_status.lcounters[LCNT_SRCCONNRATE]++; 438 bad++; 439 } 440 441 if (!bad) 442 return (0); 443 444 if ((*state)->rule.ptr->overload_tbl) { 445 struct pfr_addr p; 446 u_int32_t killed = 0; 447 448 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 449 if (pf_status.debug >= PF_DEBUG_MISC) { 450 printf("pf_src_connlimit: blocking address "); 451 pf_print_host(&(*state)->src_node->addr, 0, 452 (*state)->key[PF_SK_WIRE]->af); 453 } 454 455 bzero(&p, sizeof(p)); 456 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 457 switch ((*state)->key[PF_SK_WIRE]->af) { 458 #ifdef INET 459 case AF_INET: 460 p.pfra_net = 32; 461 p.pfra_ip4addr = (*state)->src_node->addr.v4; 462 break; 463 #endif /* INET */ 464 #ifdef INET6 465 case AF_INET6: 466 p.pfra_net = 128; 467 p.pfra_ip6addr = (*state)->src_node->addr.v6; 468 break; 469 #endif /* INET6 */ 470 } 471 472 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 473 &p, time_second); 474 475 /* kill existing states if that's required. */ 476 if ((*state)->rule.ptr->flush) { 477 struct pf_state_key *sk; 478 struct pf_state *st; 479 480 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 481 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 482 sk = st->key[PF_SK_WIRE]; 483 /* 484 * Kill states from this source. (Only those 485 * from the same rule if PF_FLUSH_GLOBAL is not 486 * set) 487 */ 488 if (sk->af == 489 (*state)->key[PF_SK_WIRE]->af && 490 (((*state)->direction == PF_OUT && 491 PF_AEQ(&(*state)->src_node->addr, 492 &sk->addr[0], sk->af)) || 493 ((*state)->direction == PF_IN && 494 PF_AEQ(&(*state)->src_node->addr, 495 &sk->addr[1], sk->af))) && 496 ((*state)->rule.ptr->flush & 497 PF_FLUSH_GLOBAL || 498 (*state)->rule.ptr == st->rule.ptr)) { 499 st->timeout = PFTM_PURGE; 500 st->src.state = st->dst.state = 501 TCPS_CLOSED; 502 killed++; 503 } 504 } 505 if (pf_status.debug >= PF_DEBUG_MISC) 506 printf(", %u states killed", killed); 507 } 508 if (pf_status.debug >= PF_DEBUG_MISC) 509 printf("\n"); 510 } 511 512 /* kill this state */ 513 (*state)->timeout = PFTM_PURGE; 514 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 515 return (1); 516 } 517 518 int 519 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 520 struct pf_addr *src, sa_family_t af) 521 { 522 struct pf_src_node k; 523 524 if (*sn == NULL) { 525 k.af = af; 526 PF_ACPY(&k.addr, src, af); 527 if (rule->rule_flag & PFRULE_RULESRCTRACK || 528 rule->rpool.opts & PF_POOL_STICKYADDR) 529 k.rule.ptr = rule; 530 else 531 k.rule.ptr = NULL; 532 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 533 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 534 } 535 if (*sn == NULL) { 536 if (!rule->max_src_nodes || 537 rule->src_nodes < rule->max_src_nodes) 538 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 539 else 540 pf_status.lcounters[LCNT_SRCNODES]++; 541 if ((*sn) == NULL) 542 return (-1); 543 544 pf_init_threshold(&(*sn)->conn_rate, 545 rule->max_src_conn_rate.limit, 546 rule->max_src_conn_rate.seconds); 547 548 (*sn)->af = af; 549 if (rule->rule_flag & PFRULE_RULESRCTRACK || 550 rule->rpool.opts & PF_POOL_STICKYADDR) 551 (*sn)->rule.ptr = rule; 552 else 553 (*sn)->rule.ptr = NULL; 554 PF_ACPY(&(*sn)->addr, src, af); 555 if (RB_INSERT(pf_src_tree, 556 &tree_src_tracking, *sn) != NULL) { 557 if (pf_status.debug >= PF_DEBUG_MISC) { 558 printf("pf: src_tree insert failed: "); 559 pf_print_host(&(*sn)->addr, 0, af); 560 printf("\n"); 561 } 562 pool_put(&pf_src_tree_pl, *sn); 563 return (-1); 564 } 565 (*sn)->creation = time_second; 566 (*sn)->ruletype = rule->action; 567 if ((*sn)->rule.ptr != NULL) 568 (*sn)->rule.ptr->src_nodes++; 569 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 570 pf_status.src_nodes++; 571 } else { 572 if (rule->max_src_states && 573 (*sn)->states >= rule->max_src_states) { 574 pf_status.lcounters[LCNT_SRCSTATES]++; 575 return (-1); 576 } 577 } 578 return (0); 579 } 580 581 void 582 pf_keyins_err(struct pf_state *s, struct pf_state_key *skw, 583 struct pf_state_key *sks, char *side, u_int8_t direction) 584 { 585 if (pf_status.debug >= PF_DEBUG_MISC) { 586 printf("pf: %s key insert failed: ", side, s->kif->pfik_name); 587 pf_print_state_parts(s, skw, sks); 588 printf("\n"); 589 } 590 } 591 592 /* state table stuff */ 593 594 static __inline int 595 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 596 { 597 int diff; 598 599 if ((diff = a->proto - b->proto) != 0) 600 return (diff); 601 if ((diff = a->af - b->af) != 0) 602 return (diff); 603 switch (a->af) { 604 #ifdef INET 605 case AF_INET: 606 if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) 607 return (1); 608 if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) 609 return (-1); 610 if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) 611 return (1); 612 if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) 613 return (-1); 614 break; 615 #endif /* INET */ 616 #ifdef INET6 617 case AF_INET6: 618 if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) 619 return (1); 620 if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) 621 return (-1); 622 if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) 623 return (1); 624 if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) 625 return (-1); 626 if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) 627 return (1); 628 if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) 629 return (-1); 630 if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) 631 return (1); 632 if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) 633 return (-1); 634 if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) 635 return (1); 636 if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) 637 return (-1); 638 if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) 639 return (1); 640 if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) 641 return (-1); 642 if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) 643 return (1); 644 if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) 645 return (-1); 646 if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) 647 return (1); 648 if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) 649 return (-1); 650 break; 651 #endif /* INET6 */ 652 } 653 654 if ((diff = a->port[0] - b->port[0]) != 0) 655 return (diff); 656 if ((diff = a->port[1] - b->port[1]) != 0) 657 return (diff); 658 659 return (0); 660 } 661 662 static __inline int 663 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 664 { 665 if (a->id > b->id) 666 return (1); 667 if (a->id < b->id) 668 return (-1); 669 if (a->creatorid > b->creatorid) 670 return (1); 671 if (a->creatorid < b->creatorid) 672 return (-1); 673 674 return (0); 675 } 676 677 void 678 pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail, 679 int where) 680 { 681 struct pf_state_item *si; 682 683 if (where == PF_SK_WIRE || where == PF_SK_BOTH) 684 s->key[PF_SK_WIRE] = sk; 685 if (where == PF_SK_STACK || where == PF_SK_BOTH) 686 s->key[PF_SK_STACK] = sk; 687 688 si = pool_get(&pf_state_item_pl, PR_NOWAIT); 689 si->s = s; 690 691 /* list is sorted, if-bound states before floating */ 692 if (tail) 693 TAILQ_INSERT_TAIL(&sk->states, si, entry); 694 else 695 TAILQ_INSERT_HEAD(&sk->states, si, entry); 696 } 697 698 void 699 pf_detach_state(struct pf_state *s, int flags) 700 { 701 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 702 s->key[PF_SK_WIRE] = NULL; 703 704 if (s->key[PF_SK_STACK] != NULL) { 705 pf_state_key_detach(s->key[PF_SK_STACK], s, flags); 706 s->key[PF_SK_STACK] = NULL; 707 } 708 709 if (s->key[PF_SK_WIRE] != NULL) { 710 pf_state_key_detach(s->key[PF_SK_WIRE], s, flags); 711 s->key[PF_SK_WIRE] = NULL; 712 } 713 } 714 715 void 716 pf_state_key_detach(struct pf_state_key *sk, struct pf_state *s, int flags) 717 { 718 struct pf_state_item *si; 719 720 for (si = TAILQ_FIRST(&sk->states); si->s != s; 721 si = TAILQ_NEXT(si, entry)); 722 723 TAILQ_REMOVE(&sk->states, si, entry); 724 pool_put(&pf_state_item_pl, si); 725 726 if (TAILQ_EMPTY(&sk->states)) { 727 if (!(flags & PF_DT_SKIP_STATETREE)) 728 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 729 if (sk->reverse) 730 sk->reverse->reverse = NULL; 731 pool_put(&pf_state_key_pl, sk); 732 } 733 } 734 735 struct pf_state_key * 736 pf_alloc_state_key(void) 737 { 738 struct pf_state_key *sk; 739 740 if ((sk = pool_get(&pf_state_key_pl, PR_NOWAIT | PR_ZERO)) == NULL) 741 return (NULL); 742 TAILQ_INIT(&sk->states); 743 744 return (sk); 745 } 746 747 struct pf_state_key * 748 pf_state_key_insert(struct pf_state_key *sk, struct pf_state *s) 749 { 750 struct pf_state_key *cur; 751 struct pf_state_item *si; 752 753 if (sk && (cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 754 /* key exists. check for same kif, if none, add to key */ 755 TAILQ_FOREACH(si, &cur->states, entry) 756 if (si->s->kif == s->kif && 757 si->s->direction == s->direction) { 758 /* collision! */ 759 pf_detach_state(s, PF_DT_SKIP_STATETREE); 760 return (NULL); 761 } 762 pf_detach_state(s, PF_DT_SKIP_STATETREE); 763 return (cur); 764 } 765 return (sk); 766 } 767 768 769 int 770 pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr, 771 struct pf_state_key **skw, struct pf_state_key **sks, 772 struct pf_state_key **skp, struct pf_state_key **nkp, 773 struct pf_addr *saddr, struct pf_addr *daddr, 774 u_int16_t sport, u_int16_t dport) 775 { 776 KASSERT((*skp == NULL && *nkp == NULL)); 777 778 if ((*skp = pf_alloc_state_key()) == NULL) 779 return (ENOMEM); 780 781 PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af); 782 PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af); 783 (*skp)->port[pd->sidx] = sport; 784 (*skp)->port[pd->didx] = dport; 785 (*skp)->proto = pd->proto; 786 (*skp)->af = pd->af; 787 788 if (nr != NULL) { 789 if ((*nkp = pf_alloc_state_key()) == NULL) 790 return (ENOMEM); /* cleanup handled in pf_test_rule() */ 791 792 /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */ 793 PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af); 794 PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af); 795 (*nkp)->port[0] = (*skp)->port[0]; 796 (*nkp)->port[1] = (*skp)->port[1]; 797 (*nkp)->proto = pd->proto; 798 (*nkp)->af = pd->af; 799 } else 800 *nkp = *skp; 801 802 if (pd->dir == PF_IN) { 803 *skw = *skp; 804 *sks = *nkp; 805 } else { 806 *sks = *skp; 807 *skw = *nkp; 808 } 809 return (0); 810 } 811 812 813 int 814 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, 815 struct pf_state_key *sks, struct pf_state *s) 816 { 817 struct pf_state_key *nskw, *nsks; 818 819 s->kif = kif; 820 821 KASSERT((sks != NULL)); 822 KASSERT((skw != NULL)); 823 824 if ((nskw = pf_state_key_insert(skw, s)) == NULL) { 825 pf_keyins_err(s, skw, sks, "wire", s->direction); 826 return (-1); 827 } 828 829 if (skw == sks) { 830 pf_attach_state(nskw, s, kif == pfi_all ? 1 : 0, PF_SK_BOTH); 831 } else { 832 if ((nsks = pf_state_key_insert(sks, s)) == NULL) { 833 pf_keyins_err(s, skw, sks, "stack", s->direction); 834 return (-1); 835 } 836 pf_attach_state(nskw, s, kif == pfi_all ? 1 : 0, PF_SK_WIRE); 837 pf_attach_state(nsks, s, kif == pfi_all ? 1 : 0, PF_SK_STACK); 838 } 839 840 if (s->id == 0 && s->creatorid == 0) { 841 s->id = htobe64(pf_status.stateid++); 842 s->creatorid = pf_status.hostid; 843 } 844 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 845 if (pf_status.debug >= PF_DEBUG_MISC) { 846 printf("pf: state insert failed: " 847 "id: %016llx creatorid: %08x", 848 betoh64(s->id), ntohl(s->creatorid)); 849 if (s->sync_flags & PFSTATE_FROMSYNC) 850 printf(" (from sync)"); 851 printf("\n"); 852 } 853 pf_detach_state(s, 0); 854 return (-1); 855 } 856 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 857 pf_status.fcounters[FCNT_STATE_INSERT]++; 858 pf_status.states++; 859 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 860 #if NPFSYNC 861 pfsync_insert_state(s); 862 #endif 863 return (0); 864 } 865 866 struct pf_state * 867 pf_find_state_byid(struct pf_state_cmp *key) 868 { 869 pf_status.fcounters[FCNT_STATE_SEARCH]++; 870 871 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 872 } 873 874 struct pf_state * 875 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, 876 struct mbuf *m) 877 { 878 struct pf_state_key *sk; 879 struct pf_state_item *si; 880 881 pf_status.fcounters[FCNT_STATE_SEARCH]++; 882 883 if (dir == PF_OUT && m->m_pkthdr.pf.statekey && 884 ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) 885 sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; 886 else { 887 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 888 (struct pf_state_key *)key)) == NULL) 889 return (NULL); 890 if (dir == PF_OUT && m->m_pkthdr.pf.statekey) { 891 ((struct pf_state_key *) 892 m->m_pkthdr.pf.statekey)->reverse = sk; 893 sk->reverse = m->m_pkthdr.pf.statekey; 894 } 895 } 896 897 if (dir == PF_OUT) 898 m->m_pkthdr.pf.statekey = NULL; 899 900 /* list is sorted, if-bound states before floating ones */ 901 TAILQ_FOREACH(si, &sk->states, entry) 902 if ((si->s->kif == pfi_all || si->s->kif == kif) && 903 sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 904 si->s->key[PF_SK_STACK])) 905 return (si->s); 906 907 return (NULL); 908 } 909 910 struct pf_state * 911 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 912 { 913 struct pf_state_key *sk; 914 struct pf_state_item *si, *ret = NULL; 915 916 pf_status.fcounters[FCNT_STATE_SEARCH]++; 917 918 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 919 920 if (sk != NULL) { 921 TAILQ_FOREACH(si, &sk->states, entry) 922 if (dir == PF_INOUT || 923 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 924 si->s->key[PF_SK_STACK]))) { 925 if (more == NULL) 926 return (si->s); 927 928 if (ret) 929 (*more)++; 930 else 931 ret = si; 932 } 933 } 934 return (ret ? ret->s : NULL); 935 } 936 937 /* END state table stuff */ 938 939 940 void 941 pf_purge_thread(void *v) 942 { 943 int nloops = 0, s; 944 945 for (;;) { 946 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); 947 948 s = splsoftnet(); 949 950 /* process a fraction of the state table every second */ 951 pf_purge_expired_states(1 + (pf_status.states 952 / pf_default_rule.timeout[PFTM_INTERVAL])); 953 954 /* purge other expired types every PFTM_INTERVAL seconds */ 955 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { 956 pf_purge_expired_fragments(); 957 pf_purge_expired_src_nodes(0); 958 nloops = 0; 959 } 960 961 splx(s); 962 } 963 } 964 965 u_int32_t 966 pf_state_expires(const struct pf_state *state) 967 { 968 u_int32_t timeout; 969 u_int32_t start; 970 u_int32_t end; 971 u_int32_t states; 972 973 /* handle all PFTM_* > PFTM_MAX here */ 974 if (state->timeout == PFTM_PURGE) 975 return (time_second); 976 if (state->timeout == PFTM_UNTIL_PACKET) 977 return (0); 978 KASSERT(state->timeout != PFTM_UNLINKED); 979 KASSERT(state->timeout < PFTM_MAX); 980 timeout = state->rule.ptr->timeout[state->timeout]; 981 if (!timeout) 982 timeout = pf_default_rule.timeout[state->timeout]; 983 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 984 if (start) { 985 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 986 states = state->rule.ptr->states_cur; 987 } else { 988 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 989 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 990 states = pf_status.states; 991 } 992 if (end && states > start && start < end) { 993 if (states < end) 994 return (state->expire + timeout * (end - states) / 995 (end - start)); 996 else 997 return (time_second); 998 } 999 return (state->expire + timeout); 1000 } 1001 1002 void 1003 pf_purge_expired_src_nodes(int waslocked) 1004 { 1005 struct pf_src_node *cur, *next; 1006 int locked = waslocked; 1007 1008 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1009 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1010 1011 if (cur->states <= 0 && cur->expire <= time_second) { 1012 if (! locked) { 1013 rw_enter_write(&pf_consistency_lock); 1014 next = RB_NEXT(pf_src_tree, 1015 &tree_src_tracking, cur); 1016 locked = 1; 1017 } 1018 if (cur->rule.ptr != NULL) { 1019 cur->rule.ptr->src_nodes--; 1020 if (cur->rule.ptr->states_cur <= 0 && 1021 cur->rule.ptr->max_src_nodes <= 0) 1022 pf_rm_rule(NULL, cur->rule.ptr); 1023 } 1024 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); 1025 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 1026 pf_status.src_nodes--; 1027 pool_put(&pf_src_tree_pl, cur); 1028 } 1029 } 1030 1031 if (locked && !waslocked) 1032 rw_exit_write(&pf_consistency_lock); 1033 } 1034 1035 void 1036 pf_src_tree_remove_state(struct pf_state *s) 1037 { 1038 u_int32_t timeout; 1039 1040 if (s->src_node != NULL) { 1041 if (s->src.tcp_est) 1042 --s->src_node->conn; 1043 if (--s->src_node->states <= 0) { 1044 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1045 if (!timeout) 1046 timeout = 1047 pf_default_rule.timeout[PFTM_SRC_NODE]; 1048 s->src_node->expire = time_second + timeout; 1049 } 1050 } 1051 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { 1052 if (--s->nat_src_node->states <= 0) { 1053 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1054 if (!timeout) 1055 timeout = 1056 pf_default_rule.timeout[PFTM_SRC_NODE]; 1057 s->nat_src_node->expire = time_second + timeout; 1058 } 1059 } 1060 s->src_node = s->nat_src_node = NULL; 1061 } 1062 1063 /* callers should be at splsoftnet */ 1064 void 1065 pf_unlink_state(struct pf_state *cur) 1066 { 1067 if (cur->src.state == PF_TCPS_PROXY_DST) { 1068 /* XXX wire key the right one? */ 1069 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1070 &cur->key[PF_SK_WIRE]->addr[1], 1071 &cur->key[PF_SK_WIRE]->addr[0], 1072 cur->key[PF_SK_WIRE]->port[1], 1073 cur->key[PF_SK_WIRE]->port[0], 1074 cur->src.seqhi, cur->src.seqlo + 1, 1075 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); 1076 } 1077 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1078 #if NPFSYNC 1079 if (cur->creatorid == pf_status.hostid) 1080 pfsync_delete_state(cur); 1081 #endif 1082 cur->timeout = PFTM_UNLINKED; 1083 pf_src_tree_remove_state(cur); 1084 pf_detach_state(cur, 0); 1085 } 1086 1087 /* callers should be at splsoftnet and hold the 1088 * write_lock on pf_consistency_lock */ 1089 void 1090 pf_free_state(struct pf_state *cur) 1091 { 1092 #if NPFSYNC 1093 if (pfsyncif != NULL && 1094 (pfsyncif->sc_bulk_send_next == cur || 1095 pfsyncif->sc_bulk_terminator == cur)) 1096 return; 1097 #endif 1098 KASSERT(cur->timeout == PFTM_UNLINKED); 1099 if (--cur->rule.ptr->states_cur <= 0 && 1100 cur->rule.ptr->src_nodes <= 0) 1101 pf_rm_rule(NULL, cur->rule.ptr); 1102 if (cur->nat_rule.ptr != NULL) 1103 if (--cur->nat_rule.ptr->states_cur <= 0 && 1104 cur->nat_rule.ptr->src_nodes <= 0) 1105 pf_rm_rule(NULL, cur->nat_rule.ptr); 1106 if (cur->anchor.ptr != NULL) 1107 if (--cur->anchor.ptr->states_cur <= 0) 1108 pf_rm_rule(NULL, cur->anchor.ptr); 1109 pf_normalize_tcp_cleanup(cur); 1110 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1111 TAILQ_REMOVE(&state_list, cur, entry_list); 1112 if (cur->tag) 1113 pf_tag_unref(cur->tag); 1114 pool_put(&pf_state_pl, cur); 1115 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1116 pf_status.states--; 1117 } 1118 1119 void 1120 pf_purge_expired_states(u_int32_t maxcheck) 1121 { 1122 static struct pf_state *cur = NULL; 1123 struct pf_state *next; 1124 int locked = 0; 1125 1126 while (maxcheck--) { 1127 /* wrap to start of list when we hit the end */ 1128 if (cur == NULL) { 1129 cur = TAILQ_FIRST(&state_list); 1130 if (cur == NULL) 1131 break; /* list empty */ 1132 } 1133 1134 /* get next state, as cur may get deleted */ 1135 next = TAILQ_NEXT(cur, entry_list); 1136 1137 if (cur->timeout == PFTM_UNLINKED) { 1138 /* free unlinked state */ 1139 if (! locked) { 1140 rw_enter_write(&pf_consistency_lock); 1141 locked = 1; 1142 } 1143 pf_free_state(cur); 1144 } else if (pf_state_expires(cur) <= time_second) { 1145 /* unlink and free expired state */ 1146 pf_unlink_state(cur); 1147 if (! locked) { 1148 rw_enter_write(&pf_consistency_lock); 1149 locked = 1; 1150 } 1151 pf_free_state(cur); 1152 } 1153 cur = next; 1154 } 1155 1156 if (locked) 1157 rw_exit_write(&pf_consistency_lock); 1158 } 1159 1160 int 1161 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1162 { 1163 if (aw->type != PF_ADDR_TABLE) 1164 return (0); 1165 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) 1166 return (1); 1167 return (0); 1168 } 1169 1170 void 1171 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1172 { 1173 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1174 return; 1175 pfr_detach_table(aw->p.tbl); 1176 aw->p.tbl = NULL; 1177 } 1178 1179 void 1180 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1181 { 1182 struct pfr_ktable *kt = aw->p.tbl; 1183 1184 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1185 return; 1186 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1187 kt = kt->pfrkt_root; 1188 aw->p.tbl = NULL; 1189 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1190 kt->pfrkt_cnt : -1; 1191 } 1192 1193 void 1194 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1195 { 1196 switch (af) { 1197 #ifdef INET 1198 case AF_INET: { 1199 u_int32_t a = ntohl(addr->addr32[0]); 1200 printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1201 (a>>8)&255, a&255); 1202 if (p) { 1203 p = ntohs(p); 1204 printf(":%u", p); 1205 } 1206 break; 1207 } 1208 #endif /* INET */ 1209 #ifdef INET6 1210 case AF_INET6: { 1211 u_int16_t b; 1212 u_int8_t i, curstart = 255, curend = 0, 1213 maxstart = 0, maxend = 0; 1214 for (i = 0; i < 8; i++) { 1215 if (!addr->addr16[i]) { 1216 if (curstart == 255) 1217 curstart = i; 1218 else 1219 curend = i; 1220 } else { 1221 if (curstart) { 1222 if ((curend - curstart) > 1223 (maxend - maxstart)) { 1224 maxstart = curstart; 1225 maxend = curend; 1226 curstart = 255; 1227 } 1228 } 1229 } 1230 } 1231 for (i = 0; i < 8; i++) { 1232 if (i >= maxstart && i <= maxend) { 1233 if (maxend != 7) { 1234 if (i == maxstart) 1235 printf(":"); 1236 } else { 1237 if (i == maxend) 1238 printf(":"); 1239 } 1240 } else { 1241 b = ntohs(addr->addr16[i]); 1242 printf("%x", b); 1243 if (i < 7) 1244 printf(":"); 1245 } 1246 } 1247 if (p) { 1248 p = ntohs(p); 1249 printf("[%u]", p); 1250 } 1251 break; 1252 } 1253 #endif /* INET6 */ 1254 } 1255 } 1256 1257 void 1258 pf_print_state(struct pf_state *s) 1259 { 1260 pf_print_state_parts(s, NULL, NULL); 1261 } 1262 1263 void 1264 pf_print_state_parts(struct pf_state *s, 1265 struct pf_state_key *skwp, struct pf_state_key *sksp) 1266 { 1267 struct pf_state_key *skw, *sks; 1268 u_int8_t proto, dir; 1269 1270 /* Do our best to fill these, but they're skipped if NULL */ 1271 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1272 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1273 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1274 dir = s ? s->direction : 0; 1275 1276 switch (proto) { 1277 case IPPROTO_TCP: 1278 printf("TCP"); 1279 break; 1280 case IPPROTO_UDP: 1281 printf("UDP"); 1282 break; 1283 case IPPROTO_ICMP: 1284 printf("ICMP"); 1285 break; 1286 case IPPROTO_ICMPV6: 1287 printf("ICMPV6"); 1288 break; 1289 default: 1290 printf("%u", skw->proto); 1291 break; 1292 } 1293 switch (dir) { 1294 case PF_IN: 1295 printf(" in"); 1296 break; 1297 case PF_OUT: 1298 printf(" out"); 1299 break; 1300 } 1301 if (skw) { 1302 printf(" wire: "); 1303 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1304 printf(" "); 1305 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1306 } 1307 if (sks) { 1308 printf(" stack: "); 1309 if (sks != skw) { 1310 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1311 printf(" "); 1312 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1313 } else 1314 printf("-"); 1315 } 1316 if (s) { 1317 if (proto == IPPROTO_TCP) { 1318 printf(" [lo=%u high=%u win=%u modulator=%u", 1319 s->src.seqlo, s->src.seqhi, 1320 s->src.max_win, s->src.seqdiff); 1321 if (s->src.wscale && s->dst.wscale) 1322 printf(" wscale=%u", 1323 s->src.wscale & PF_WSCALE_MASK); 1324 printf("]"); 1325 printf(" [lo=%u high=%u win=%u modulator=%u", 1326 s->dst.seqlo, s->dst.seqhi, 1327 s->dst.max_win, s->dst.seqdiff); 1328 if (s->src.wscale && s->dst.wscale) 1329 printf(" wscale=%u", 1330 s->dst.wscale & PF_WSCALE_MASK); 1331 printf("]"); 1332 } 1333 printf(" %u:%u", s->src.state, s->dst.state); 1334 } 1335 } 1336 1337 void 1338 pf_print_flags(u_int8_t f) 1339 { 1340 if (f) 1341 printf(" "); 1342 if (f & TH_FIN) 1343 printf("F"); 1344 if (f & TH_SYN) 1345 printf("S"); 1346 if (f & TH_RST) 1347 printf("R"); 1348 if (f & TH_PUSH) 1349 printf("P"); 1350 if (f & TH_ACK) 1351 printf("A"); 1352 if (f & TH_URG) 1353 printf("U"); 1354 if (f & TH_ECE) 1355 printf("E"); 1356 if (f & TH_CWR) 1357 printf("W"); 1358 } 1359 1360 #define PF_SET_SKIP_STEPS(i) \ 1361 do { \ 1362 while (head[i] != cur) { \ 1363 head[i]->skip[i].ptr = cur; \ 1364 head[i] = TAILQ_NEXT(head[i], entries); \ 1365 } \ 1366 } while (0) 1367 1368 void 1369 pf_calc_skip_steps(struct pf_rulequeue *rules) 1370 { 1371 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1372 int i; 1373 1374 cur = TAILQ_FIRST(rules); 1375 prev = cur; 1376 for (i = 0; i < PF_SKIP_COUNT; ++i) 1377 head[i] = cur; 1378 while (cur != NULL) { 1379 1380 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1381 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1382 if (cur->direction != prev->direction) 1383 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1384 if (cur->af != prev->af) 1385 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1386 if (cur->proto != prev->proto) 1387 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1388 if (cur->src.neg != prev->src.neg || 1389 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1390 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1391 if (cur->src.port[0] != prev->src.port[0] || 1392 cur->src.port[1] != prev->src.port[1] || 1393 cur->src.port_op != prev->src.port_op) 1394 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1395 if (cur->dst.neg != prev->dst.neg || 1396 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1397 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1398 if (cur->dst.port[0] != prev->dst.port[0] || 1399 cur->dst.port[1] != prev->dst.port[1] || 1400 cur->dst.port_op != prev->dst.port_op) 1401 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1402 1403 prev = cur; 1404 cur = TAILQ_NEXT(cur, entries); 1405 } 1406 for (i = 0; i < PF_SKIP_COUNT; ++i) 1407 PF_SET_SKIP_STEPS(i); 1408 } 1409 1410 int 1411 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1412 { 1413 if (aw1->type != aw2->type) 1414 return (1); 1415 switch (aw1->type) { 1416 case PF_ADDR_ADDRMASK: 1417 case PF_ADDR_RANGE: 1418 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) 1419 return (1); 1420 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) 1421 return (1); 1422 return (0); 1423 case PF_ADDR_DYNIFTL: 1424 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1425 case PF_ADDR_NOROUTE: 1426 case PF_ADDR_URPFFAILED: 1427 return (0); 1428 case PF_ADDR_TABLE: 1429 return (aw1->p.tbl != aw2->p.tbl); 1430 case PF_ADDR_RTLABEL: 1431 return (aw1->v.rtlabel != aw2->v.rtlabel); 1432 default: 1433 printf("invalid address type: %d\n", aw1->type); 1434 return (1); 1435 } 1436 } 1437 1438 u_int16_t 1439 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) 1440 { 1441 u_int32_t l; 1442 1443 if (udp && !cksum) 1444 return (0x0000); 1445 l = cksum + old - new; 1446 l = (l >> 16) + (l & 65535); 1447 l = l & 65535; 1448 if (udp && !l) 1449 return (0xFFFF); 1450 return (l); 1451 } 1452 1453 void 1454 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, 1455 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) 1456 { 1457 struct pf_addr ao; 1458 u_int16_t po = *p; 1459 1460 PF_ACPY(&ao, a, af); 1461 PF_ACPY(a, an, af); 1462 1463 *p = pn; 1464 1465 switch (af) { 1466 #ifdef INET 1467 case AF_INET: 1468 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 1469 ao.addr16[0], an->addr16[0], 0), 1470 ao.addr16[1], an->addr16[1], 0); 1471 *p = pn; 1472 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1473 ao.addr16[0], an->addr16[0], u), 1474 ao.addr16[1], an->addr16[1], u), 1475 po, pn, u); 1476 break; 1477 #endif /* INET */ 1478 #ifdef INET6 1479 case AF_INET6: 1480 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1481 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1482 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1483 ao.addr16[0], an->addr16[0], u), 1484 ao.addr16[1], an->addr16[1], u), 1485 ao.addr16[2], an->addr16[2], u), 1486 ao.addr16[3], an->addr16[3], u), 1487 ao.addr16[4], an->addr16[4], u), 1488 ao.addr16[5], an->addr16[5], u), 1489 ao.addr16[6], an->addr16[6], u), 1490 ao.addr16[7], an->addr16[7], u), 1491 po, pn, u); 1492 break; 1493 #endif /* INET6 */ 1494 } 1495 } 1496 1497 1498 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ 1499 void 1500 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) 1501 { 1502 u_int32_t ao; 1503 1504 memcpy(&ao, a, sizeof(ao)); 1505 memcpy(a, &an, sizeof(u_int32_t)); 1506 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), 1507 ao % 65536, an % 65536, u); 1508 } 1509 1510 #ifdef INET6 1511 void 1512 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) 1513 { 1514 struct pf_addr ao; 1515 1516 PF_ACPY(&ao, a, AF_INET6); 1517 PF_ACPY(a, an, AF_INET6); 1518 1519 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1520 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1521 pf_cksum_fixup(pf_cksum_fixup(*c, 1522 ao.addr16[0], an->addr16[0], u), 1523 ao.addr16[1], an->addr16[1], u), 1524 ao.addr16[2], an->addr16[2], u), 1525 ao.addr16[3], an->addr16[3], u), 1526 ao.addr16[4], an->addr16[4], u), 1527 ao.addr16[5], an->addr16[5], u), 1528 ao.addr16[6], an->addr16[6], u), 1529 ao.addr16[7], an->addr16[7], u); 1530 } 1531 #endif /* INET6 */ 1532 1533 void 1534 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, 1535 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, 1536 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) 1537 { 1538 struct pf_addr oia, ooa; 1539 1540 PF_ACPY(&oia, ia, af); 1541 if (oa) 1542 PF_ACPY(&ooa, oa, af); 1543 1544 /* Change inner protocol port, fix inner protocol checksum. */ 1545 if (ip != NULL) { 1546 u_int16_t oip = *ip; 1547 u_int32_t opc; 1548 1549 if (pc != NULL) 1550 opc = *pc; 1551 *ip = np; 1552 if (pc != NULL) 1553 *pc = pf_cksum_fixup(*pc, oip, *ip, u); 1554 *ic = pf_cksum_fixup(*ic, oip, *ip, 0); 1555 if (pc != NULL) 1556 *ic = pf_cksum_fixup(*ic, opc, *pc, 0); 1557 } 1558 /* Change inner ip address, fix inner ip and icmp checksums. */ 1559 PF_ACPY(ia, na, af); 1560 switch (af) { 1561 #ifdef INET 1562 case AF_INET: { 1563 u_int32_t oh2c = *h2c; 1564 1565 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, 1566 oia.addr16[0], ia->addr16[0], 0), 1567 oia.addr16[1], ia->addr16[1], 0); 1568 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 1569 oia.addr16[0], ia->addr16[0], 0), 1570 oia.addr16[1], ia->addr16[1], 0); 1571 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); 1572 break; 1573 } 1574 #endif /* INET */ 1575 #ifdef INET6 1576 case AF_INET6: 1577 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1578 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1579 pf_cksum_fixup(pf_cksum_fixup(*ic, 1580 oia.addr16[0], ia->addr16[0], u), 1581 oia.addr16[1], ia->addr16[1], u), 1582 oia.addr16[2], ia->addr16[2], u), 1583 oia.addr16[3], ia->addr16[3], u), 1584 oia.addr16[4], ia->addr16[4], u), 1585 oia.addr16[5], ia->addr16[5], u), 1586 oia.addr16[6], ia->addr16[6], u), 1587 oia.addr16[7], ia->addr16[7], u); 1588 break; 1589 #endif /* INET6 */ 1590 } 1591 /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ 1592 if (oa) { 1593 PF_ACPY(oa, na, af); 1594 switch (af) { 1595 #ifdef INET 1596 case AF_INET: 1597 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, 1598 ooa.addr16[0], oa->addr16[0], 0), 1599 ooa.addr16[1], oa->addr16[1], 0); 1600 break; 1601 #endif /* INET */ 1602 #ifdef INET6 1603 case AF_INET6: 1604 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1605 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1606 pf_cksum_fixup(pf_cksum_fixup(*ic, 1607 ooa.addr16[0], oa->addr16[0], u), 1608 ooa.addr16[1], oa->addr16[1], u), 1609 ooa.addr16[2], oa->addr16[2], u), 1610 ooa.addr16[3], oa->addr16[3], u), 1611 ooa.addr16[4], oa->addr16[4], u), 1612 ooa.addr16[5], oa->addr16[5], u), 1613 ooa.addr16[6], oa->addr16[6], u), 1614 ooa.addr16[7], oa->addr16[7], u); 1615 break; 1616 #endif /* INET6 */ 1617 } 1618 } 1619 } 1620 1621 1622 /* 1623 * Need to modulate the sequence numbers in the TCP SACK option 1624 * (credits to Krzysztof Pfaff for report and patch) 1625 */ 1626 int 1627 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, 1628 struct tcphdr *th, struct pf_state_peer *dst) 1629 { 1630 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; 1631 u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; 1632 int copyback = 0, i, olen; 1633 struct sackblk sack; 1634 1635 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) 1636 if (hlen < TCPOLEN_SACKLEN || 1637 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) 1638 return 0; 1639 1640 while (hlen >= TCPOLEN_SACKLEN) { 1641 olen = opt[1]; 1642 switch (*opt) { 1643 case TCPOPT_EOL: /* FALLTHROUGH */ 1644 case TCPOPT_NOP: 1645 opt++; 1646 hlen--; 1647 break; 1648 case TCPOPT_SACK: 1649 if (olen > hlen) 1650 olen = hlen; 1651 if (olen >= TCPOLEN_SACKLEN) { 1652 for (i = 2; i + TCPOLEN_SACK <= olen; 1653 i += TCPOLEN_SACK) { 1654 memcpy(&sack, &opt[i], sizeof(sack)); 1655 pf_change_a(&sack.start, &th->th_sum, 1656 htonl(ntohl(sack.start) - 1657 dst->seqdiff), 0); 1658 pf_change_a(&sack.end, &th->th_sum, 1659 htonl(ntohl(sack.end) - 1660 dst->seqdiff), 0); 1661 memcpy(&opt[i], &sack, sizeof(sack)); 1662 } 1663 copyback = 1; 1664 } 1665 /* FALLTHROUGH */ 1666 default: 1667 if (olen < 2) 1668 olen = 2; 1669 hlen -= olen; 1670 opt += olen; 1671 } 1672 } 1673 1674 if (copyback) 1675 m_copyback(m, off + sizeof(*th), thoptlen, opts); 1676 return (copyback); 1677 } 1678 1679 void 1680 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 1681 const struct pf_addr *saddr, const struct pf_addr *daddr, 1682 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 1683 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 1684 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) 1685 { 1686 struct mbuf *m; 1687 int len, tlen; 1688 #ifdef INET 1689 struct ip *h; 1690 #endif /* INET */ 1691 #ifdef INET6 1692 struct ip6_hdr *h6; 1693 #endif /* INET6 */ 1694 struct tcphdr *th; 1695 char *opt; 1696 1697 /* maximum segment size tcp option */ 1698 tlen = sizeof(struct tcphdr); 1699 if (mss) 1700 tlen += 4; 1701 1702 switch (af) { 1703 #ifdef INET 1704 case AF_INET: 1705 len = sizeof(struct ip) + tlen; 1706 break; 1707 #endif /* INET */ 1708 #ifdef INET6 1709 case AF_INET6: 1710 len = sizeof(struct ip6_hdr) + tlen; 1711 break; 1712 #endif /* INET6 */ 1713 } 1714 1715 /* create outgoing mbuf */ 1716 m = m_gethdr(M_DONTWAIT, MT_HEADER); 1717 if (m == NULL) 1718 return; 1719 if (tag) 1720 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1721 m->m_pkthdr.pf.tag = rtag; 1722 1723 if (r != NULL && r->rtableid >= 0) 1724 m->m_pkthdr.pf.rtableid = r->rtableid; 1725 1726 #ifdef ALTQ 1727 if (r != NULL && r->qid) { 1728 m->m_pkthdr.pf.qid = r->qid; 1729 /* add hints for ecn */ 1730 m->m_pkthdr.pf.hdr = mtod(m, struct ip *); 1731 } 1732 #endif /* ALTQ */ 1733 m->m_data += max_linkhdr; 1734 m->m_pkthdr.len = m->m_len = len; 1735 m->m_pkthdr.rcvif = NULL; 1736 bzero(m->m_data, len); 1737 switch (af) { 1738 #ifdef INET 1739 case AF_INET: 1740 h = mtod(m, struct ip *); 1741 1742 /* IP header fields included in the TCP checksum */ 1743 h->ip_p = IPPROTO_TCP; 1744 h->ip_len = htons(tlen); 1745 h->ip_src.s_addr = saddr->v4.s_addr; 1746 h->ip_dst.s_addr = daddr->v4.s_addr; 1747 1748 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 1749 break; 1750 #endif /* INET */ 1751 #ifdef INET6 1752 case AF_INET6: 1753 h6 = mtod(m, struct ip6_hdr *); 1754 1755 /* IP header fields included in the TCP checksum */ 1756 h6->ip6_nxt = IPPROTO_TCP; 1757 h6->ip6_plen = htons(tlen); 1758 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 1759 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 1760 1761 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 1762 break; 1763 #endif /* INET6 */ 1764 } 1765 1766 /* TCP header */ 1767 th->th_sport = sport; 1768 th->th_dport = dport; 1769 th->th_seq = htonl(seq); 1770 th->th_ack = htonl(ack); 1771 th->th_off = tlen >> 2; 1772 th->th_flags = flags; 1773 th->th_win = htons(win); 1774 1775 if (mss) { 1776 opt = (char *)(th + 1); 1777 opt[0] = TCPOPT_MAXSEG; 1778 opt[1] = 4; 1779 HTONS(mss); 1780 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); 1781 } 1782 1783 switch (af) { 1784 #ifdef INET 1785 case AF_INET: 1786 /* TCP checksum */ 1787 th->th_sum = in_cksum(m, len); 1788 1789 /* Finish the IP header */ 1790 h->ip_v = 4; 1791 h->ip_hl = sizeof(*h) >> 2; 1792 h->ip_tos = IPTOS_LOWDELAY; 1793 h->ip_len = htons(len); 1794 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 1795 h->ip_ttl = ttl ? ttl : ip_defttl; 1796 h->ip_sum = 0; 1797 if (eh == NULL) { 1798 ip_output(m, (void *)NULL, (void *)NULL, 0, 1799 (void *)NULL, (void *)NULL); 1800 } else { 1801 struct route ro; 1802 struct rtentry rt; 1803 struct ether_header *e = (void *)ro.ro_dst.sa_data; 1804 1805 if (ifp == NULL) { 1806 m_freem(m); 1807 return; 1808 } 1809 rt.rt_ifp = ifp; 1810 ro.ro_rt = &rt; 1811 ro.ro_dst.sa_len = sizeof(ro.ro_dst); 1812 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; 1813 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); 1814 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); 1815 e->ether_type = eh->ether_type; 1816 ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER, 1817 (void *)NULL, (void *)NULL); 1818 } 1819 break; 1820 #endif /* INET */ 1821 #ifdef INET6 1822 case AF_INET6: 1823 /* TCP checksum */ 1824 th->th_sum = in6_cksum(m, IPPROTO_TCP, 1825 sizeof(struct ip6_hdr), tlen); 1826 1827 h6->ip6_vfc |= IPV6_VERSION; 1828 h6->ip6_hlim = IPV6_DEFHLIM; 1829 1830 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 1831 break; 1832 #endif /* INET6 */ 1833 } 1834 } 1835 1836 void 1837 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, 1838 struct pf_rule *r) 1839 { 1840 struct mbuf *m0; 1841 1842 m0 = m_copy(m, 0, M_COPYALL); 1843 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1844 1845 if (r->rtableid >= 0) 1846 m0->m_pkthdr.pf.rtableid = r->rtableid; 1847 1848 #ifdef ALTQ 1849 if (r->qid) { 1850 m0->m_pkthdr.pf.qid = r->qid; 1851 /* add hints for ecn */ 1852 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); 1853 } 1854 #endif /* ALTQ */ 1855 1856 switch (af) { 1857 #ifdef INET 1858 case AF_INET: 1859 icmp_error(m0, type, code, 0, 0); 1860 break; 1861 #endif /* INET */ 1862 #ifdef INET6 1863 case AF_INET6: 1864 icmp6_error(m0, type, code, 0); 1865 break; 1866 #endif /* INET6 */ 1867 } 1868 } 1869 1870 /* 1871 * Return 1 if the addresses a and b match (with mask m), otherwise return 0. 1872 * If n is 0, they match if they are equal. If n is != 0, they match if they 1873 * are different. 1874 */ 1875 int 1876 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 1877 struct pf_addr *b, sa_family_t af) 1878 { 1879 int match = 0; 1880 1881 switch (af) { 1882 #ifdef INET 1883 case AF_INET: 1884 if ((a->addr32[0] & m->addr32[0]) == 1885 (b->addr32[0] & m->addr32[0])) 1886 match++; 1887 break; 1888 #endif /* INET */ 1889 #ifdef INET6 1890 case AF_INET6: 1891 if (((a->addr32[0] & m->addr32[0]) == 1892 (b->addr32[0] & m->addr32[0])) && 1893 ((a->addr32[1] & m->addr32[1]) == 1894 (b->addr32[1] & m->addr32[1])) && 1895 ((a->addr32[2] & m->addr32[2]) == 1896 (b->addr32[2] & m->addr32[2])) && 1897 ((a->addr32[3] & m->addr32[3]) == 1898 (b->addr32[3] & m->addr32[3]))) 1899 match++; 1900 break; 1901 #endif /* INET6 */ 1902 } 1903 if (match) { 1904 if (n) 1905 return (0); 1906 else 1907 return (1); 1908 } else { 1909 if (n) 1910 return (1); 1911 else 1912 return (0); 1913 } 1914 } 1915 1916 /* 1917 * Return 1 if b <= a <= e, otherwise return 0. 1918 */ 1919 int 1920 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 1921 struct pf_addr *a, sa_family_t af) 1922 { 1923 switch (af) { 1924 #ifdef INET 1925 case AF_INET: 1926 if ((a->addr32[0] < b->addr32[0]) || 1927 (a->addr32[0] > e->addr32[0])) 1928 return (0); 1929 break; 1930 #endif /* INET */ 1931 #ifdef INET6 1932 case AF_INET6: { 1933 int i; 1934 1935 /* check a >= b */ 1936 for (i = 0; i < 4; ++i) 1937 if (a->addr32[i] > b->addr32[i]) 1938 break; 1939 else if (a->addr32[i] < b->addr32[i]) 1940 return (0); 1941 /* check a <= e */ 1942 for (i = 0; i < 4; ++i) 1943 if (a->addr32[i] < e->addr32[i]) 1944 break; 1945 else if (a->addr32[i] > e->addr32[i]) 1946 return (0); 1947 break; 1948 } 1949 #endif /* INET6 */ 1950 } 1951 return (1); 1952 } 1953 1954 int 1955 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 1956 { 1957 switch (op) { 1958 case PF_OP_IRG: 1959 return ((p > a1) && (p < a2)); 1960 case PF_OP_XRG: 1961 return ((p < a1) || (p > a2)); 1962 case PF_OP_RRG: 1963 return ((p >= a1) && (p <= a2)); 1964 case PF_OP_EQ: 1965 return (p == a1); 1966 case PF_OP_NE: 1967 return (p != a1); 1968 case PF_OP_LT: 1969 return (p < a1); 1970 case PF_OP_LE: 1971 return (p <= a1); 1972 case PF_OP_GT: 1973 return (p > a1); 1974 case PF_OP_GE: 1975 return (p >= a1); 1976 } 1977 return (0); /* never reached */ 1978 } 1979 1980 int 1981 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 1982 { 1983 NTOHS(a1); 1984 NTOHS(a2); 1985 NTOHS(p); 1986 return (pf_match(op, a1, a2, p)); 1987 } 1988 1989 int 1990 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 1991 { 1992 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 1993 return (0); 1994 return (pf_match(op, a1, a2, u)); 1995 } 1996 1997 int 1998 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 1999 { 2000 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2001 return (0); 2002 return (pf_match(op, a1, a2, g)); 2003 } 2004 2005 int 2006 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 2007 { 2008 if (*tag == -1) 2009 *tag = m->m_pkthdr.pf.tag; 2010 2011 return ((!r->match_tag_not && r->match_tag == *tag) || 2012 (r->match_tag_not && r->match_tag != *tag)); 2013 } 2014 2015 int 2016 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 2017 { 2018 if (tag <= 0 && rtableid < 0) 2019 return (0); 2020 2021 if (tag > 0) 2022 m->m_pkthdr.pf.tag = tag; 2023 if (rtableid >= 0) 2024 m->m_pkthdr.pf.rtableid = rtableid; 2025 2026 return (0); 2027 } 2028 2029 void 2030 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, 2031 struct pf_rule **r, struct pf_rule **a, int *match) 2032 { 2033 struct pf_anchor_stackframe *f; 2034 2035 (*r)->anchor->match = 0; 2036 if (match) 2037 *match = 0; 2038 if (*depth >= sizeof(pf_anchor_stack) / 2039 sizeof(pf_anchor_stack[0])) { 2040 printf("pf_step_into_anchor: stack overflow\n"); 2041 *r = TAILQ_NEXT(*r, entries); 2042 return; 2043 } else if (*depth == 0 && a != NULL) 2044 *a = *r; 2045 f = pf_anchor_stack + (*depth)++; 2046 f->rs = *rs; 2047 f->r = *r; 2048 if ((*r)->anchor_wildcard) { 2049 f->parent = &(*r)->anchor->children; 2050 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == 2051 NULL) { 2052 *r = NULL; 2053 return; 2054 } 2055 *rs = &f->child->ruleset; 2056 } else { 2057 f->parent = NULL; 2058 f->child = NULL; 2059 *rs = &(*r)->anchor->ruleset; 2060 } 2061 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 2062 } 2063 2064 int 2065 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, 2066 struct pf_rule **r, struct pf_rule **a, int *match) 2067 { 2068 struct pf_anchor_stackframe *f; 2069 int quick = 0; 2070 2071 do { 2072 if (*depth <= 0) 2073 break; 2074 f = pf_anchor_stack + *depth - 1; 2075 if (f->parent != NULL && f->child != NULL) { 2076 if (f->child->match || 2077 (match != NULL && *match)) { 2078 f->r->anchor->match = 1; 2079 *match = 0; 2080 } 2081 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); 2082 if (f->child != NULL) { 2083 *rs = &f->child->ruleset; 2084 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 2085 if (*r == NULL) 2086 continue; 2087 else 2088 break; 2089 } 2090 } 2091 (*depth)--; 2092 if (*depth == 0 && a != NULL) 2093 *a = NULL; 2094 *rs = f->rs; 2095 if (f->r->anchor->match || (match != NULL && *match)) 2096 quick = f->r->quick; 2097 *r = TAILQ_NEXT(f->r, entries); 2098 } while (*r == NULL); 2099 2100 return (quick); 2101 } 2102 2103 #ifdef INET6 2104 void 2105 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 2106 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 2107 { 2108 switch (af) { 2109 #ifdef INET 2110 case AF_INET: 2111 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2112 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2113 break; 2114 #endif /* INET */ 2115 case AF_INET6: 2116 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2117 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2118 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 2119 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 2120 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 2121 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 2122 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 2123 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 2124 break; 2125 } 2126 } 2127 2128 void 2129 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 2130 { 2131 switch (af) { 2132 #ifdef INET 2133 case AF_INET: 2134 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 2135 break; 2136 #endif /* INET */ 2137 case AF_INET6: 2138 if (addr->addr32[3] == 0xffffffff) { 2139 addr->addr32[3] = 0; 2140 if (addr->addr32[2] == 0xffffffff) { 2141 addr->addr32[2] = 0; 2142 if (addr->addr32[1] == 0xffffffff) { 2143 addr->addr32[1] = 0; 2144 addr->addr32[0] = 2145 htonl(ntohl(addr->addr32[0]) + 1); 2146 } else 2147 addr->addr32[1] = 2148 htonl(ntohl(addr->addr32[1]) + 1); 2149 } else 2150 addr->addr32[2] = 2151 htonl(ntohl(addr->addr32[2]) + 1); 2152 } else 2153 addr->addr32[3] = 2154 htonl(ntohl(addr->addr32[3]) + 1); 2155 break; 2156 } 2157 } 2158 #endif /* INET6 */ 2159 2160 #define mix(a,b,c) \ 2161 do { \ 2162 a -= b; a -= c; a ^= (c >> 13); \ 2163 b -= c; b -= a; b ^= (a << 8); \ 2164 c -= a; c -= b; c ^= (b >> 13); \ 2165 a -= b; a -= c; a ^= (c >> 12); \ 2166 b -= c; b -= a; b ^= (a << 16); \ 2167 c -= a; c -= b; c ^= (b >> 5); \ 2168 a -= b; a -= c; a ^= (c >> 3); \ 2169 b -= c; b -= a; b ^= (a << 10); \ 2170 c -= a; c -= b; c ^= (b >> 15); \ 2171 } while (0) 2172 2173 /* 2174 * hash function based on bridge_hash in if_bridge.c 2175 */ 2176 void 2177 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 2178 struct pf_poolhashkey *key, sa_family_t af) 2179 { 2180 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; 2181 2182 switch (af) { 2183 #ifdef INET 2184 case AF_INET: 2185 a += inaddr->addr32[0]; 2186 b += key->key32[1]; 2187 mix(a, b, c); 2188 hash->addr32[0] = c + key->key32[2]; 2189 break; 2190 #endif /* INET */ 2191 #ifdef INET6 2192 case AF_INET6: 2193 a += inaddr->addr32[0]; 2194 b += inaddr->addr32[2]; 2195 mix(a, b, c); 2196 hash->addr32[0] = c; 2197 a += inaddr->addr32[1]; 2198 b += inaddr->addr32[3]; 2199 c += key->key32[1]; 2200 mix(a, b, c); 2201 hash->addr32[1] = c; 2202 a += inaddr->addr32[2]; 2203 b += inaddr->addr32[1]; 2204 c += key->key32[2]; 2205 mix(a, b, c); 2206 hash->addr32[2] = c; 2207 a += inaddr->addr32[3]; 2208 b += inaddr->addr32[0]; 2209 c += key->key32[3]; 2210 mix(a, b, c); 2211 hash->addr32[3] = c; 2212 break; 2213 #endif /* INET6 */ 2214 } 2215 } 2216 2217 int 2218 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 2219 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) 2220 { 2221 unsigned char hash[16]; 2222 struct pf_pool *rpool = &r->rpool; 2223 struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; 2224 struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; 2225 struct pf_pooladdr *acur = rpool->cur; 2226 struct pf_src_node k; 2227 2228 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && 2229 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2230 k.af = af; 2231 PF_ACPY(&k.addr, saddr, af); 2232 if (r->rule_flag & PFRULE_RULESRCTRACK || 2233 r->rpool.opts & PF_POOL_STICKYADDR) 2234 k.rule.ptr = r; 2235 else 2236 k.rule.ptr = NULL; 2237 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 2238 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 2239 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { 2240 PF_ACPY(naddr, &(*sn)->raddr, af); 2241 if (pf_status.debug >= PF_DEBUG_MISC) { 2242 printf("pf_map_addr: src tracking maps "); 2243 pf_print_host(&k.addr, 0, af); 2244 printf(" to "); 2245 pf_print_host(naddr, 0, af); 2246 printf("\n"); 2247 } 2248 return (0); 2249 } 2250 } 2251 2252 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) 2253 return (1); 2254 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 2255 switch (af) { 2256 #ifdef INET 2257 case AF_INET: 2258 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 2259 (rpool->opts & PF_POOL_TYPEMASK) != 2260 PF_POOL_ROUNDROBIN) 2261 return (1); 2262 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 2263 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 2264 break; 2265 #endif /* INET */ 2266 #ifdef INET6 2267 case AF_INET6: 2268 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 2269 (rpool->opts & PF_POOL_TYPEMASK) != 2270 PF_POOL_ROUNDROBIN) 2271 return (1); 2272 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 2273 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 2274 break; 2275 #endif /* INET6 */ 2276 } 2277 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 2278 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) 2279 return (1); /* unsupported */ 2280 } else { 2281 raddr = &rpool->cur->addr.v.a.addr; 2282 rmask = &rpool->cur->addr.v.a.mask; 2283 } 2284 2285 switch (rpool->opts & PF_POOL_TYPEMASK) { 2286 case PF_POOL_NONE: 2287 PF_ACPY(naddr, raddr, af); 2288 break; 2289 case PF_POOL_BITMASK: 2290 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 2291 break; 2292 case PF_POOL_RANDOM: 2293 if (init_addr != NULL && PF_AZERO(init_addr, af)) { 2294 switch (af) { 2295 #ifdef INET 2296 case AF_INET: 2297 rpool->counter.addr32[0] = htonl(arc4random()); 2298 break; 2299 #endif /* INET */ 2300 #ifdef INET6 2301 case AF_INET6: 2302 if (rmask->addr32[3] != 0xffffffff) 2303 rpool->counter.addr32[3] = 2304 htonl(arc4random()); 2305 else 2306 break; 2307 if (rmask->addr32[2] != 0xffffffff) 2308 rpool->counter.addr32[2] = 2309 htonl(arc4random()); 2310 else 2311 break; 2312 if (rmask->addr32[1] != 0xffffffff) 2313 rpool->counter.addr32[1] = 2314 htonl(arc4random()); 2315 else 2316 break; 2317 if (rmask->addr32[0] != 0xffffffff) 2318 rpool->counter.addr32[0] = 2319 htonl(arc4random()); 2320 break; 2321 #endif /* INET6 */ 2322 } 2323 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 2324 PF_ACPY(init_addr, naddr, af); 2325 2326 } else { 2327 PF_AINC(&rpool->counter, af); 2328 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 2329 } 2330 break; 2331 case PF_POOL_SRCHASH: 2332 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 2333 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); 2334 break; 2335 case PF_POOL_ROUNDROBIN: 2336 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 2337 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 2338 &rpool->tblidx, &rpool->counter, 2339 &raddr, &rmask, af)) 2340 goto get_addr; 2341 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 2342 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 2343 &rpool->tblidx, &rpool->counter, 2344 &raddr, &rmask, af)) 2345 goto get_addr; 2346 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 2347 goto get_addr; 2348 2349 try_next: 2350 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) 2351 rpool->cur = TAILQ_FIRST(&rpool->list); 2352 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 2353 rpool->tblidx = -1; 2354 if (pfr_pool_get(rpool->cur->addr.p.tbl, 2355 &rpool->tblidx, &rpool->counter, 2356 &raddr, &rmask, af)) { 2357 /* table contains no address of type 'af' */ 2358 if (rpool->cur != acur) 2359 goto try_next; 2360 return (1); 2361 } 2362 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 2363 rpool->tblidx = -1; 2364 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 2365 &rpool->tblidx, &rpool->counter, 2366 &raddr, &rmask, af)) { 2367 /* table contains no address of type 'af' */ 2368 if (rpool->cur != acur) 2369 goto try_next; 2370 return (1); 2371 } 2372 } else { 2373 raddr = &rpool->cur->addr.v.a.addr; 2374 rmask = &rpool->cur->addr.v.a.mask; 2375 PF_ACPY(&rpool->counter, raddr, af); 2376 } 2377 2378 get_addr: 2379 PF_ACPY(naddr, &rpool->counter, af); 2380 if (init_addr != NULL && PF_AZERO(init_addr, af)) 2381 PF_ACPY(init_addr, naddr, af); 2382 PF_AINC(&rpool->counter, af); 2383 break; 2384 } 2385 if (*sn != NULL) 2386 PF_ACPY(&(*sn)->raddr, naddr, af); 2387 2388 if (pf_status.debug >= PF_DEBUG_MISC && 2389 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2390 printf("pf_map_addr: selected address "); 2391 pf_print_host(naddr, 0, af); 2392 printf("\n"); 2393 } 2394 2395 return (0); 2396 } 2397 2398 int 2399 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, 2400 struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, 2401 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, 2402 struct pf_src_node **sn) 2403 { 2404 struct pf_state_key_cmp key; 2405 struct pf_addr init_addr; 2406 u_int16_t cut; 2407 2408 bzero(&init_addr, sizeof(init_addr)); 2409 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 2410 return (1); 2411 2412 if (proto == IPPROTO_ICMP) { 2413 low = 1; 2414 high = 65535; 2415 } 2416 2417 do { 2418 key.af = af; 2419 key.proto = proto; 2420 PF_ACPY(&key.addr[1], daddr, key.af); 2421 PF_ACPY(&key.addr[0], naddr, key.af); 2422 key.port[1] = dport; 2423 2424 /* 2425 * port search; start random, step; 2426 * similar 2 portloop in in_pcbbind 2427 */ 2428 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || 2429 proto == IPPROTO_ICMP)) { 2430 key.port[0] = dport; 2431 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) 2432 return (0); 2433 } else if (low == 0 && high == 0) { 2434 key.port[0] = *nport; 2435 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) 2436 return (0); 2437 } else if (low == high) { 2438 key.port[0] = htons(low); 2439 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 2440 *nport = htons(low); 2441 return (0); 2442 } 2443 } else { 2444 u_int16_t tmp; 2445 2446 if (low > high) { 2447 tmp = low; 2448 low = high; 2449 high = tmp; 2450 } 2451 /* low < high */ 2452 cut = htonl(arc4random()) % (1 + high - low) + low; 2453 /* low <= cut <= high */ 2454 for (tmp = cut; tmp <= high; ++(tmp)) { 2455 key.port[0] = htons(tmp); 2456 if (pf_find_state_all(&key, PF_IN, NULL) == 2457 NULL) { 2458 *nport = htons(tmp); 2459 return (0); 2460 } 2461 } 2462 for (tmp = cut - 1; tmp >= low; --(tmp)) { 2463 key.port[0] = htons(tmp); 2464 if (pf_find_state_all(&key, PF_IN, NULL) == 2465 NULL) { 2466 *nport = htons(tmp); 2467 return (0); 2468 } 2469 } 2470 } 2471 2472 switch (r->rpool.opts & PF_POOL_TYPEMASK) { 2473 case PF_POOL_RANDOM: 2474 case PF_POOL_ROUNDROBIN: 2475 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 2476 return (1); 2477 break; 2478 case PF_POOL_NONE: 2479 case PF_POOL_SRCHASH: 2480 case PF_POOL_BITMASK: 2481 default: 2482 return (1); 2483 } 2484 } while (! PF_AEQ(&init_addr, naddr, af) ); 2485 return (1); /* none available */ 2486 } 2487 2488 struct pf_rule * 2489 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, 2490 int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, 2491 struct pf_addr *daddr, u_int16_t dport, int rs_num) 2492 { 2493 struct pf_rule *r, *rm = NULL; 2494 struct pf_ruleset *ruleset = NULL; 2495 int tag = -1; 2496 int rtableid = -1; 2497 int asd = 0; 2498 2499 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); 2500 while (r && rm == NULL) { 2501 struct pf_rule_addr *src = NULL, *dst = NULL; 2502 struct pf_addr_wrap *xdst = NULL; 2503 2504 if (r->action == PF_BINAT && direction == PF_IN) { 2505 src = &r->dst; 2506 if (r->rpool.cur != NULL) 2507 xdst = &r->rpool.cur->addr; 2508 } else { 2509 src = &r->src; 2510 dst = &r->dst; 2511 } 2512 2513 r->evaluations++; 2514 if (pfi_kif_match(r->kif, kif) == r->ifnot) 2515 r = r->skip[PF_SKIP_IFP].ptr; 2516 else if (r->direction && r->direction != direction) 2517 r = r->skip[PF_SKIP_DIR].ptr; 2518 else if (r->af && r->af != pd->af) 2519 r = r->skip[PF_SKIP_AF].ptr; 2520 else if (r->proto && r->proto != pd->proto) 2521 r = r->skip[PF_SKIP_PROTO].ptr; 2522 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, 2523 src->neg, kif)) 2524 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 2525 PF_SKIP_DST_ADDR].ptr; 2526 else if (src->port_op && !pf_match_port(src->port_op, 2527 src->port[0], src->port[1], sport)) 2528 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 2529 PF_SKIP_DST_PORT].ptr; 2530 else if (dst != NULL && 2531 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) 2532 r = r->skip[PF_SKIP_DST_ADDR].ptr; 2533 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 2534 0, NULL)) 2535 r = TAILQ_NEXT(r, entries); 2536 else if (dst != NULL && dst->port_op && 2537 !pf_match_port(dst->port_op, dst->port[0], 2538 dst->port[1], dport)) 2539 r = r->skip[PF_SKIP_DST_PORT].ptr; 2540 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 2541 r = TAILQ_NEXT(r, entries); 2542 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 2543 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, 2544 off, pd->hdr.tcp), r->os_fingerprint))) 2545 r = TAILQ_NEXT(r, entries); 2546 else { 2547 if (r->tag) 2548 tag = r->tag; 2549 if (r->rtableid >= 0) 2550 rtableid = r->rtableid; 2551 if (r->anchor == NULL) { 2552 rm = r; 2553 } else 2554 pf_step_into_anchor(&asd, &ruleset, rs_num, 2555 &r, NULL, NULL); 2556 } 2557 if (r == NULL) 2558 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, 2559 NULL, NULL); 2560 } 2561 if (pf_tag_packet(m, tag, rtableid)) 2562 return (NULL); 2563 if (rm != NULL && (rm->action == PF_NONAT || 2564 rm->action == PF_NORDR || rm->action == PF_NOBINAT)) 2565 return (NULL); 2566 return (rm); 2567 } 2568 2569 struct pf_rule * 2570 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, 2571 struct pfi_kif *kif, struct pf_src_node **sn, 2572 struct pf_state_key **skw, struct pf_state_key **sks, 2573 struct pf_state_key **skp, struct pf_state_key **nkp, 2574 struct pf_addr *saddr, struct pf_addr *daddr, 2575 u_int16_t sport, u_int16_t dport) 2576 { 2577 struct pf_rule *r = NULL; 2578 2579 2580 if (direction == PF_OUT) { 2581 r = pf_match_translation(pd, m, off, direction, kif, saddr, 2582 sport, daddr, dport, PF_RULESET_BINAT); 2583 if (r == NULL) 2584 r = pf_match_translation(pd, m, off, direction, kif, 2585 saddr, sport, daddr, dport, PF_RULESET_NAT); 2586 } else { 2587 r = pf_match_translation(pd, m, off, direction, kif, saddr, 2588 sport, daddr, dport, PF_RULESET_RDR); 2589 if (r == NULL) 2590 r = pf_match_translation(pd, m, off, direction, kif, 2591 saddr, sport, daddr, dport, PF_RULESET_BINAT); 2592 } 2593 2594 if (r != NULL) { 2595 struct pf_addr *naddr; 2596 u_int16_t *nport; 2597 2598 if (pf_state_key_setup(pd, r, skw, sks, skp, nkp, 2599 saddr, daddr, sport, dport)) 2600 return r; 2601 2602 /* XXX We only modify one side for now. */ 2603 naddr = &(*nkp)->addr[1]; 2604 nport = &(*nkp)->port[1]; 2605 2606 switch (r->action) { 2607 case PF_NONAT: 2608 case PF_NOBINAT: 2609 case PF_NORDR: 2610 return (NULL); 2611 case PF_NAT: 2612 if (pf_get_sport(pd->af, pd->proto, r, saddr, 2613 daddr, dport, naddr, nport, r->rpool.proxy_port[0], 2614 r->rpool.proxy_port[1], sn)) { 2615 DPFPRINTF(PF_DEBUG_MISC, 2616 ("pf: NAT proxy port allocation " 2617 "(%u-%u) failed\n", 2618 r->rpool.proxy_port[0], 2619 r->rpool.proxy_port[1])); 2620 return (NULL); 2621 } 2622 break; 2623 case PF_BINAT: 2624 switch (direction) { 2625 case PF_OUT: 2626 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ 2627 switch (pd->af) { 2628 #ifdef INET 2629 case AF_INET: 2630 if (r->rpool.cur->addr.p.dyn-> 2631 pfid_acnt4 < 1) 2632 return (NULL); 2633 PF_POOLMASK(naddr, 2634 &r->rpool.cur->addr.p.dyn-> 2635 pfid_addr4, 2636 &r->rpool.cur->addr.p.dyn-> 2637 pfid_mask4, 2638 saddr, AF_INET); 2639 break; 2640 #endif /* INET */ 2641 #ifdef INET6 2642 case AF_INET6: 2643 if (r->rpool.cur->addr.p.dyn-> 2644 pfid_acnt6 < 1) 2645 return (NULL); 2646 PF_POOLMASK(naddr, 2647 &r->rpool.cur->addr.p.dyn-> 2648 pfid_addr6, 2649 &r->rpool.cur->addr.p.dyn-> 2650 pfid_mask6, 2651 saddr, AF_INET6); 2652 break; 2653 #endif /* INET6 */ 2654 } 2655 } else 2656 PF_POOLMASK(naddr, 2657 &r->rpool.cur->addr.v.a.addr, 2658 &r->rpool.cur->addr.v.a.mask, 2659 saddr, pd->af); 2660 break; 2661 case PF_IN: 2662 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 2663 switch (pd->af) { 2664 #ifdef INET 2665 case AF_INET: 2666 if (r->src.addr.p.dyn-> 2667 pfid_acnt4 < 1) 2668 return (NULL); 2669 PF_POOLMASK(naddr, 2670 &r->src.addr.p.dyn-> 2671 pfid_addr4, 2672 &r->src.addr.p.dyn-> 2673 pfid_mask4, 2674 daddr, AF_INET); 2675 break; 2676 #endif /* INET */ 2677 #ifdef INET6 2678 case AF_INET6: 2679 if (r->src.addr.p.dyn-> 2680 pfid_acnt6 < 1) 2681 return (NULL); 2682 PF_POOLMASK(naddr, 2683 &r->src.addr.p.dyn-> 2684 pfid_addr6, 2685 &r->src.addr.p.dyn-> 2686 pfid_mask6, 2687 daddr, AF_INET6); 2688 break; 2689 #endif /* INET6 */ 2690 } 2691 } else 2692 PF_POOLMASK(naddr, 2693 &r->src.addr.v.a.addr, 2694 &r->src.addr.v.a.mask, daddr, 2695 pd->af); 2696 break; 2697 } 2698 break; 2699 case PF_RDR: { 2700 if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) 2701 return (NULL); 2702 if ((r->rpool.opts & PF_POOL_TYPEMASK) == 2703 PF_POOL_BITMASK) 2704 PF_POOLMASK(naddr, naddr, 2705 &r->rpool.cur->addr.v.a.mask, daddr, 2706 pd->af); 2707 2708 if (r->rpool.proxy_port[1]) { 2709 u_int32_t tmp_nport; 2710 2711 tmp_nport = ((ntohs(dport) - 2712 ntohs(r->dst.port[0])) % 2713 (r->rpool.proxy_port[1] - 2714 r->rpool.proxy_port[0] + 1)) + 2715 r->rpool.proxy_port[0]; 2716 2717 /* wrap around if necessary */ 2718 if (tmp_nport > 65535) 2719 tmp_nport -= 65535; 2720 *nport = htons((u_int16_t)tmp_nport); 2721 } else if (r->rpool.proxy_port[0]) 2722 *nport = htons(r->rpool.proxy_port[0]); 2723 break; 2724 } 2725 default: 2726 return (NULL); 2727 } 2728 } 2729 2730 return (r); 2731 } 2732 2733 int 2734 pf_socket_lookup(int direction, struct pf_pdesc *pd) 2735 { 2736 struct pf_addr *saddr, *daddr; 2737 u_int16_t sport, dport; 2738 struct inpcbtable *tb; 2739 struct inpcb *inp; 2740 2741 if (pd == NULL) 2742 return (-1); 2743 pd->lookup.uid = UID_MAX; 2744 pd->lookup.gid = GID_MAX; 2745 pd->lookup.pid = NO_PID; 2746 switch (pd->proto) { 2747 case IPPROTO_TCP: 2748 if (pd->hdr.tcp == NULL) 2749 return (-1); 2750 sport = pd->hdr.tcp->th_sport; 2751 dport = pd->hdr.tcp->th_dport; 2752 tb = &tcbtable; 2753 break; 2754 case IPPROTO_UDP: 2755 if (pd->hdr.udp == NULL) 2756 return (-1); 2757 sport = pd->hdr.udp->uh_sport; 2758 dport = pd->hdr.udp->uh_dport; 2759 tb = &udbtable; 2760 break; 2761 default: 2762 return (-1); 2763 } 2764 if (direction == PF_IN) { 2765 saddr = pd->src; 2766 daddr = pd->dst; 2767 } else { 2768 u_int16_t p; 2769 2770 p = sport; 2771 sport = dport; 2772 dport = p; 2773 saddr = pd->dst; 2774 daddr = pd->src; 2775 } 2776 switch (pd->af) { 2777 #ifdef INET 2778 case AF_INET: 2779 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); 2780 if (inp == NULL) { 2781 inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, 2782 NULL); 2783 if (inp == NULL) 2784 return (-1); 2785 } 2786 break; 2787 #endif /* INET */ 2788 #ifdef INET6 2789 case AF_INET6: 2790 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 2791 dport); 2792 if (inp == NULL) { 2793 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, 2794 NULL); 2795 if (inp == NULL) 2796 return (-1); 2797 } 2798 break; 2799 #endif /* INET6 */ 2800 2801 default: 2802 return (-1); 2803 } 2804 pd->lookup.uid = inp->inp_socket->so_euid; 2805 pd->lookup.gid = inp->inp_socket->so_egid; 2806 pd->lookup.pid = inp->inp_socket->so_cpid; 2807 return (1); 2808 } 2809 2810 u_int8_t 2811 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 2812 { 2813 int hlen; 2814 u_int8_t hdr[60]; 2815 u_int8_t *opt, optlen; 2816 u_int8_t wscale = 0; 2817 2818 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 2819 if (hlen <= sizeof(struct tcphdr)) 2820 return (0); 2821 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 2822 return (0); 2823 opt = hdr + sizeof(struct tcphdr); 2824 hlen -= sizeof(struct tcphdr); 2825 while (hlen >= 3) { 2826 switch (*opt) { 2827 case TCPOPT_EOL: 2828 case TCPOPT_NOP: 2829 ++opt; 2830 --hlen; 2831 break; 2832 case TCPOPT_WINDOW: 2833 wscale = opt[2]; 2834 if (wscale > TCP_MAX_WINSHIFT) 2835 wscale = TCP_MAX_WINSHIFT; 2836 wscale |= PF_WSCALE_FLAG; 2837 /* FALLTHROUGH */ 2838 default: 2839 optlen = opt[1]; 2840 if (optlen < 2) 2841 optlen = 2; 2842 hlen -= optlen; 2843 opt += optlen; 2844 break; 2845 } 2846 } 2847 return (wscale); 2848 } 2849 2850 u_int16_t 2851 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 2852 { 2853 int hlen; 2854 u_int8_t hdr[60]; 2855 u_int8_t *opt, optlen; 2856 u_int16_t mss = tcp_mssdflt; 2857 2858 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 2859 if (hlen <= sizeof(struct tcphdr)) 2860 return (0); 2861 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 2862 return (0); 2863 opt = hdr + sizeof(struct tcphdr); 2864 hlen -= sizeof(struct tcphdr); 2865 while (hlen >= TCPOLEN_MAXSEG) { 2866 switch (*opt) { 2867 case TCPOPT_EOL: 2868 case TCPOPT_NOP: 2869 ++opt; 2870 --hlen; 2871 break; 2872 case TCPOPT_MAXSEG: 2873 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); 2874 NTOHS(mss); 2875 /* FALLTHROUGH */ 2876 default: 2877 optlen = opt[1]; 2878 if (optlen < 2) 2879 optlen = 2; 2880 hlen -= optlen; 2881 opt += optlen; 2882 break; 2883 } 2884 } 2885 return (mss); 2886 } 2887 2888 u_int16_t 2889 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) 2890 { 2891 #ifdef INET 2892 struct sockaddr_in *dst; 2893 struct route ro; 2894 #endif /* INET */ 2895 #ifdef INET6 2896 struct sockaddr_in6 *dst6; 2897 struct route_in6 ro6; 2898 #endif /* INET6 */ 2899 struct rtentry *rt = NULL; 2900 int hlen; 2901 u_int16_t mss = tcp_mssdflt; 2902 2903 switch (af) { 2904 #ifdef INET 2905 case AF_INET: 2906 hlen = sizeof(struct ip); 2907 bzero(&ro, sizeof(ro)); 2908 dst = (struct sockaddr_in *)&ro.ro_dst; 2909 dst->sin_family = AF_INET; 2910 dst->sin_len = sizeof(*dst); 2911 dst->sin_addr = addr->v4; 2912 rtalloc_noclone(&ro, NO_CLONING); 2913 rt = ro.ro_rt; 2914 break; 2915 #endif /* INET */ 2916 #ifdef INET6 2917 case AF_INET6: 2918 hlen = sizeof(struct ip6_hdr); 2919 bzero(&ro6, sizeof(ro6)); 2920 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; 2921 dst6->sin6_family = AF_INET6; 2922 dst6->sin6_len = sizeof(*dst6); 2923 dst6->sin6_addr = addr->v6; 2924 rtalloc_noclone((struct route *)&ro6, NO_CLONING); 2925 rt = ro6.ro_rt; 2926 break; 2927 #endif /* INET6 */ 2928 } 2929 2930 if (rt && rt->rt_ifp) { 2931 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); 2932 mss = max(tcp_mssdflt, mss); 2933 RTFREE(rt); 2934 } 2935 mss = min(mss, offer); 2936 mss = max(mss, 64); /* sanity - at least max opt space */ 2937 return (mss); 2938 } 2939 2940 void 2941 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) 2942 { 2943 struct pf_rule *r = s->rule.ptr; 2944 2945 s->rt_kif = NULL; 2946 if (!r->rt || r->rt == PF_FASTROUTE) 2947 return; 2948 switch (s->key[PF_SK_WIRE]->af) { 2949 #ifdef INET 2950 case AF_INET: 2951 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, 2952 &s->nat_src_node); 2953 s->rt_kif = r->rpool.cur->kif; 2954 break; 2955 #endif /* INET */ 2956 #ifdef INET6 2957 case AF_INET6: 2958 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, 2959 &s->nat_src_node); 2960 s->rt_kif = r->rpool.cur->kif; 2961 break; 2962 #endif /* INET6 */ 2963 } 2964 } 2965 2966 u_int32_t 2967 pf_tcp_iss(struct pf_pdesc *pd) 2968 { 2969 MD5_CTX ctx; 2970 u_int32_t digest[4]; 2971 2972 if (pf_tcp_secret_init == 0) { 2973 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 2974 MD5Init(&pf_tcp_secret_ctx); 2975 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, 2976 sizeof(pf_tcp_secret)); 2977 pf_tcp_secret_init = 1; 2978 } 2979 ctx = pf_tcp_secret_ctx; 2980 2981 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); 2982 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); 2983 if (pd->af == AF_INET6) { 2984 MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); 2985 MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); 2986 } else { 2987 MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); 2988 MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); 2989 } 2990 MD5Final((u_char *)digest, &ctx); 2991 pf_tcp_iss_off += 4096; 2992 return (digest[0] + tcp_iss + pf_tcp_iss_off); 2993 } 2994 2995 int 2996 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, 2997 struct pfi_kif *kif, struct mbuf *m, int off, void *h, 2998 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, 2999 struct ifqueue *ifq) 3000 { 3001 struct pf_rule *nr = NULL; 3002 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 3003 sa_family_t af = pd->af; 3004 struct pf_rule *r, *a = NULL; 3005 struct pf_ruleset *ruleset = NULL; 3006 struct pf_src_node *nsn = NULL; 3007 struct tcphdr *th = pd->hdr.tcp; 3008 struct pf_state_key *skw = NULL, *sks = NULL; 3009 struct pf_state_key *sk = NULL, *nk = NULL; 3010 u_short reason; 3011 int rewrite = 0, hdrlen = 0; 3012 int tag = -1, rtableid = -1; 3013 int asd = 0; 3014 int match = 0; 3015 int state_icmp = 0; 3016 u_int16_t sport, dport; 3017 u_int16_t nport = 0, bport = 0; 3018 u_int16_t bproto_sum = 0, bip_sum; 3019 u_int8_t icmptype = 0, icmpcode = 0; 3020 3021 3022 if (direction == PF_IN && pf_check_congestion(ifq)) { 3023 REASON_SET(&reason, PFRES_CONGEST); 3024 return (PF_DROP); 3025 } 3026 3027 switch (pd->proto) { 3028 case IPPROTO_TCP: 3029 sport = th->th_sport; 3030 dport = th->th_dport; 3031 hdrlen = sizeof(*th); 3032 break; 3033 case IPPROTO_UDP: 3034 sport = pd->hdr.udp->uh_sport; 3035 dport = pd->hdr.udp->uh_dport; 3036 hdrlen = sizeof(*pd->hdr.udp); 3037 break; 3038 #ifdef INET 3039 case IPPROTO_ICMP: 3040 if (pd->af != AF_INET) 3041 break; 3042 sport = dport = pd->hdr.icmp->icmp_id; 3043 hdrlen = sizeof(*pd->hdr.icmp); 3044 icmptype = pd->hdr.icmp->icmp_type; 3045 icmpcode = pd->hdr.icmp->icmp_code; 3046 3047 if (icmptype == ICMP_UNREACH || 3048 icmptype == ICMP_SOURCEQUENCH || 3049 icmptype == ICMP_REDIRECT || 3050 icmptype == ICMP_TIMXCEED || 3051 icmptype == ICMP_PARAMPROB) 3052 state_icmp++; 3053 break; 3054 #endif /* INET */ 3055 #ifdef INET6 3056 case IPPROTO_ICMPV6: 3057 if (af != AF_INET6) 3058 break; 3059 sport = dport = pd->hdr.icmp6->icmp6_id; 3060 hdrlen = sizeof(*pd->hdr.icmp6); 3061 icmptype = pd->hdr.icmp6->icmp6_type; 3062 icmpcode = pd->hdr.icmp6->icmp6_code; 3063 3064 if (icmptype == ICMP6_DST_UNREACH || 3065 icmptype == ICMP6_PACKET_TOO_BIG || 3066 icmptype == ICMP6_TIME_EXCEEDED || 3067 icmptype == ICMP6_PARAM_PROB) 3068 state_icmp++; 3069 break; 3070 #endif /* INET6 */ 3071 default: 3072 sport = dport = hdrlen = 0; 3073 break; 3074 } 3075 3076 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 3077 3078 bport = nport = sport; 3079 /* check packet for BINAT/NAT/RDR */ 3080 if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, 3081 &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) { 3082 if (nk == NULL || sk == NULL) { 3083 REASON_SET(&reason, PFRES_MEMORY); 3084 goto cleanup; 3085 } 3086 3087 if (pd->ip_sum) 3088 bip_sum = *pd->ip_sum; 3089 3090 switch (pd->proto) { 3091 case IPPROTO_TCP: 3092 bproto_sum = th->th_sum; 3093 pd->proto_sum = &th->th_sum; 3094 3095 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || 3096 nk->port[pd->sidx] != sport) { 3097 pf_change_ap(saddr, &th->th_sport, pd->ip_sum, 3098 &th->th_sum, &nk->addr[pd->sidx], 3099 nk->port[pd->sidx], 0, af); 3100 pd->sport = &th->th_sport; 3101 sport = th->th_sport; 3102 } 3103 3104 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || 3105 nk->port[pd->didx] != dport) { 3106 pf_change_ap(daddr, &th->th_dport, pd->ip_sum, 3107 &th->th_sum, &nk->addr[pd->didx], 3108 nk->port[pd->didx], 0, af); 3109 dport = th->th_dport; 3110 pd->dport = &th->th_dport; 3111 } 3112 rewrite++; 3113 break; 3114 case IPPROTO_UDP: 3115 bproto_sum = pd->hdr.udp->uh_sum; 3116 pd->proto_sum = &pd->hdr.udp->uh_sum; 3117 3118 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || 3119 nk->port[pd->sidx] != sport) { 3120 pf_change_ap(saddr, &pd->hdr.udp->uh_sport, 3121 pd->ip_sum, &pd->hdr.udp->uh_sum, 3122 &nk->addr[pd->sidx], 3123 nk->port[pd->sidx], 1, af); 3124 sport = pd->hdr.udp->uh_sport; 3125 pd->sport = &pd->hdr.udp->uh_sport; 3126 } 3127 3128 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || 3129 nk->port[pd->didx] != dport) { 3130 pf_change_ap(daddr, &pd->hdr.udp->uh_dport, 3131 pd->ip_sum, &pd->hdr.udp->uh_sum, 3132 &nk->addr[pd->didx], 3133 nk->port[pd->didx], 1, af); 3134 dport = pd->hdr.udp->uh_dport; 3135 pd->dport = &pd->hdr.udp->uh_dport; 3136 } 3137 rewrite++; 3138 break; 3139 #ifdef INET 3140 case IPPROTO_ICMP: 3141 nk->port[0] = nk->port[1]; 3142 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) 3143 pf_change_a(&saddr->v4.s_addr, pd->ip_sum, 3144 nk->addr[pd->sidx].v4.s_addr, 0); 3145 3146 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) 3147 pf_change_a(&daddr->v4.s_addr, pd->ip_sum, 3148 nk->addr[pd->didx].v4.s_addr, 0); 3149 3150 if (nk->port[1] != pd->hdr.icmp->icmp_id) { 3151 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( 3152 pd->hdr.icmp->icmp_cksum, sport, 3153 nk->port[1], 0); 3154 pd->hdr.icmp->icmp_id = nk->port[1]; 3155 pd->sport = &pd->hdr.icmp->icmp_id; 3156 } 3157 m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); 3158 break; 3159 #endif /* INET */ 3160 #ifdef INET6 3161 case IPPROTO_ICMPV6: 3162 nk->port[0] = nk->port[1]; 3163 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) 3164 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, 3165 &nk->addr[pd->sidx], 0); 3166 3167 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) 3168 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, 3169 &nk->addr[pd->didx], 0); 3170 rewrite++; 3171 break; 3172 #endif /* INET */ 3173 default: 3174 switch (af) { 3175 #ifdef INET 3176 case AF_INET: 3177 if (PF_ANEQ(saddr, 3178 &nk->addr[pd->sidx], AF_INET)) 3179 pf_change_a(&saddr->v4.s_addr, 3180 pd->ip_sum, 3181 nk->addr[pd->didx].v4.s_addr, 0); 3182 3183 if (PF_ANEQ(daddr, 3184 &nk->addr[pd->didx], AF_INET)) 3185 pf_change_a(&daddr->v4.s_addr, 3186 pd->ip_sum, 3187 nk->addr[pd->didx].v4.s_addr, 0); 3188 break; 3189 #endif /* INET */ 3190 #ifdef INET6 3191 case AF_INET6: 3192 if (PF_ANEQ(saddr, 3193 &nk->addr[pd->sidx], AF_INET6)) 3194 PF_ACPY(saddr, &nk->addr[pd->sidx], af); 3195 3196 if (PF_ANEQ(daddr, 3197 &nk->addr[pd->didx], AF_INET6)) 3198 PF_ACPY(saddr, &nk->addr[pd->didx], af); 3199 break; 3200 #endif /* INET */ 3201 } 3202 break; 3203 } 3204 if (nr->natpass) 3205 r = NULL; 3206 pd->nat_rule = nr; 3207 } 3208 3209 while (r != NULL) { 3210 r->evaluations++; 3211 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3212 r = r->skip[PF_SKIP_IFP].ptr; 3213 else if (r->direction && r->direction != direction) 3214 r = r->skip[PF_SKIP_DIR].ptr; 3215 else if (r->af && r->af != af) 3216 r = r->skip[PF_SKIP_AF].ptr; 3217 else if (r->proto && r->proto != pd->proto) 3218 r = r->skip[PF_SKIP_PROTO].ptr; 3219 else if (PF_MISMATCHAW(&r->src.addr, saddr, af, 3220 r->src.neg, kif)) 3221 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 3222 /* tcp/udp only. port_op always 0 in other cases */ 3223 else if (r->src.port_op && !pf_match_port(r->src.port_op, 3224 r->src.port[0], r->src.port[1], sport)) 3225 r = r->skip[PF_SKIP_SRC_PORT].ptr; 3226 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, 3227 r->dst.neg, NULL)) 3228 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3229 /* tcp/udp only. port_op always 0 in other cases */ 3230 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 3231 r->dst.port[0], r->dst.port[1], dport)) 3232 r = r->skip[PF_SKIP_DST_PORT].ptr; 3233 /* icmp only. type always 0 in other cases */ 3234 else if (r->type && r->type != icmptype + 1) 3235 r = TAILQ_NEXT(r, entries); 3236 /* icmp only. type always 0 in other cases */ 3237 else if (r->code && r->code != icmpcode + 1) 3238 r = TAILQ_NEXT(r, entries); 3239 else if (r->tos && !(r->tos == pd->tos)) 3240 r = TAILQ_NEXT(r, entries); 3241 else if (r->rule_flag & PFRULE_FRAGMENT) 3242 r = TAILQ_NEXT(r, entries); 3243 else if (pd->proto == IPPROTO_TCP && 3244 (r->flagset & th->th_flags) != r->flags) 3245 r = TAILQ_NEXT(r, entries); 3246 /* tcp/udp only. uid.op always 0 in other cases */ 3247 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = 3248 pf_socket_lookup(direction, pd), 1)) && 3249 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], 3250 pd->lookup.uid)) 3251 r = TAILQ_NEXT(r, entries); 3252 /* tcp/udp only. gid.op always 0 in other cases */ 3253 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = 3254 pf_socket_lookup(direction, pd), 1)) && 3255 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], 3256 pd->lookup.gid)) 3257 r = TAILQ_NEXT(r, entries); 3258 else if (r->prob && r->prob <= 3259 (arc4random() % (UINT_MAX - 1) + 1)) 3260 r = TAILQ_NEXT(r, entries); 3261 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3262 r = TAILQ_NEXT(r, entries); 3263 else if (r->os_fingerprint != PF_OSFP_ANY && 3264 (pd->proto != IPPROTO_TCP || !pf_osfp_match( 3265 pf_osfp_fingerprint(pd, m, off, th), 3266 r->os_fingerprint))) 3267 r = TAILQ_NEXT(r, entries); 3268 else { 3269 if (r->tag) 3270 tag = r->tag; 3271 if (r->rtableid >= 0) 3272 rtableid = r->rtableid; 3273 if (r->anchor == NULL) { 3274 match = 1; 3275 *rm = r; 3276 *am = a; 3277 *rsm = ruleset; 3278 if ((*rm)->quick) 3279 break; 3280 r = TAILQ_NEXT(r, entries); 3281 } else 3282 pf_step_into_anchor(&asd, &ruleset, 3283 PF_RULESET_FILTER, &r, &a, &match); 3284 } 3285 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3286 PF_RULESET_FILTER, &r, &a, &match)) 3287 break; 3288 } 3289 r = *rm; 3290 a = *am; 3291 ruleset = *rsm; 3292 3293 REASON_SET(&reason, PFRES_MATCH); 3294 3295 if (r->log || (nr != NULL && nr->log)) { 3296 if (rewrite) 3297 m_copyback(m, off, hdrlen, pd->hdr.any); 3298 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, 3299 a, ruleset, pd); 3300 } 3301 3302 if ((r->action == PF_DROP) && 3303 ((r->rule_flag & PFRULE_RETURNRST) || 3304 (r->rule_flag & PFRULE_RETURNICMP) || 3305 (r->rule_flag & PFRULE_RETURN))) { 3306 /* undo NAT changes, if they have taken place */ 3307 if (nr != NULL) { 3308 PF_ACPY(saddr, &sk->addr[pd->sidx], af); 3309 PF_ACPY(daddr, &sk->addr[pd->didx], af); 3310 if (pd->sport) 3311 *pd->sport = sk->port[pd->sidx]; 3312 if (pd->dport) 3313 *pd->dport = sk->port[pd->didx]; 3314 if (pd->proto_sum) 3315 *pd->proto_sum = bproto_sum; 3316 if (pd->ip_sum) 3317 *pd->ip_sum = bip_sum; 3318 m_copyback(m, off, hdrlen, pd->hdr.any); 3319 } 3320 if (pd->proto == IPPROTO_TCP && 3321 ((r->rule_flag & PFRULE_RETURNRST) || 3322 (r->rule_flag & PFRULE_RETURN)) && 3323 !(th->th_flags & TH_RST)) { 3324 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 3325 int len = 0; 3326 struct ip *h4; 3327 struct ip6_hdr *h6; 3328 3329 switch (af) { 3330 case AF_INET: 3331 h4 = mtod(m, struct ip *); 3332 len = ntohs(h4->ip_len) - off; 3333 break; 3334 case AF_INET6: 3335 h6 = mtod(m, struct ip6_hdr *); 3336 len = ntohs(h6->ip6_plen) - (off - sizeof(*h6)); 3337 break; 3338 } 3339 3340 if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) 3341 REASON_SET(&reason, PFRES_PROTCKSUM); 3342 else { 3343 if (th->th_flags & TH_SYN) 3344 ack++; 3345 if (th->th_flags & TH_FIN) 3346 ack++; 3347 pf_send_tcp(r, af, pd->dst, 3348 pd->src, th->th_dport, th->th_sport, 3349 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 3350 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); 3351 } 3352 } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && 3353 r->return_icmp) 3354 pf_send_icmp(m, r->return_icmp >> 8, 3355 r->return_icmp & 255, af, r); 3356 else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && 3357 r->return_icmp6) 3358 pf_send_icmp(m, r->return_icmp6 >> 8, 3359 r->return_icmp6 & 255, af, r); 3360 } 3361 3362 if (r->action == PF_DROP) 3363 return (PF_DROP); 3364 3365 if (pf_tag_packet(m, tag, rtableid)) { 3366 REASON_SET(&reason, PFRES_MEMORY); 3367 return (PF_DROP); 3368 } 3369 3370 if (!state_icmp && (r->keep_state || nr != NULL || 3371 (pd->flags & PFDESC_TCP_NORM))) { 3372 int action; 3373 action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m, 3374 off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, 3375 bip_sum, hdrlen); 3376 if (action == PF_DROP) 3377 goto cleanup; 3378 if (action != PF_PASS) 3379 return (action); 3380 } 3381 3382 /* copy back packet headers if we performed NAT operations */ 3383 if (rewrite) 3384 m_copyback(m, off, hdrlen, pd->hdr.any); 3385 3386 return (PF_PASS); 3387 3388 cleanup: 3389 if (sk != NULL) 3390 pool_put(&pf_state_key_pl, sk); 3391 if (nk != NULL) 3392 pool_put(&pf_state_key_pl, nk); 3393 return (PF_DROP); 3394 } 3395 3396 static __inline int 3397 pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, 3398 struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw, 3399 struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk, 3400 struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, 3401 struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, 3402 u_int16_t bip_sum, int hdrlen) 3403 { 3404 struct pf_state *s = NULL; 3405 struct pf_src_node *sn = NULL; 3406 struct tcphdr *th = pd->hdr.tcp; 3407 u_int16_t mss = tcp_mssdflt; 3408 u_short reason; 3409 3410 /* check maximums */ 3411 if (r->max_states && (r->states_cur >= r->max_states)) { 3412 pf_status.lcounters[LCNT_STATES]++; 3413 REASON_SET(&reason, PFRES_MAXSTATES); 3414 return (PF_DROP); 3415 } 3416 /* src node for filter rule */ 3417 if ((r->rule_flag & PFRULE_SRCTRACK || 3418 r->rpool.opts & PF_POOL_STICKYADDR) && 3419 pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { 3420 REASON_SET(&reason, PFRES_SRCLIMIT); 3421 goto csfailed; 3422 } 3423 /* src node for translation rule */ 3424 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && 3425 pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { 3426 REASON_SET(&reason, PFRES_SRCLIMIT); 3427 goto csfailed; 3428 } 3429 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 3430 if (s == NULL) { 3431 REASON_SET(&reason, PFRES_MEMORY); 3432 goto csfailed; 3433 } 3434 s->rule.ptr = r; 3435 s->nat_rule.ptr = nr; 3436 s->anchor.ptr = a; 3437 STATE_INC_COUNTERS(s); 3438 if (r->allow_opts) 3439 s->state_flags |= PFSTATE_ALLOWOPTS; 3440 if (r->rule_flag & PFRULE_STATESLOPPY) 3441 s->state_flags |= PFSTATE_SLOPPY; 3442 s->log = r->log & PF_LOG_ALL; 3443 if (nr != NULL) 3444 s->log |= nr->log & PF_LOG_ALL; 3445 switch (pd->proto) { 3446 case IPPROTO_TCP: 3447 s->src.seqlo = ntohl(th->th_seq); 3448 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 3449 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 3450 r->keep_state == PF_STATE_MODULATE) { 3451 /* Generate sequence number modulator */ 3452 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 3453 0) 3454 s->src.seqdiff = 1; 3455 pf_change_a(&th->th_seq, &th->th_sum, 3456 htonl(s->src.seqlo + s->src.seqdiff), 0); 3457 *rewrite = 1; 3458 } else 3459 s->src.seqdiff = 0; 3460 if (th->th_flags & TH_SYN) { 3461 s->src.seqhi++; 3462 s->src.wscale = pf_get_wscale(m, off, 3463 th->th_off, pd->af); 3464 } 3465 s->src.max_win = MAX(ntohs(th->th_win), 1); 3466 if (s->src.wscale & PF_WSCALE_MASK) { 3467 /* Remove scale factor from initial window */ 3468 int win = s->src.max_win; 3469 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 3470 s->src.max_win = (win - 1) >> 3471 (s->src.wscale & PF_WSCALE_MASK); 3472 } 3473 if (th->th_flags & TH_FIN) 3474 s->src.seqhi++; 3475 s->dst.seqhi = 1; 3476 s->dst.max_win = 1; 3477 s->src.state = TCPS_SYN_SENT; 3478 s->dst.state = TCPS_CLOSED; 3479 s->timeout = PFTM_TCP_FIRST_PACKET; 3480 break; 3481 case IPPROTO_UDP: 3482 s->src.state = PFUDPS_SINGLE; 3483 s->dst.state = PFUDPS_NO_TRAFFIC; 3484 s->timeout = PFTM_UDP_FIRST_PACKET; 3485 break; 3486 case IPPROTO_ICMP: 3487 #ifdef INET6 3488 case IPPROTO_ICMPV6: 3489 #endif 3490 s->timeout = PFTM_ICMP_FIRST_PACKET; 3491 break; 3492 default: 3493 s->src.state = PFOTHERS_SINGLE; 3494 s->dst.state = PFOTHERS_NO_TRAFFIC; 3495 s->timeout = PFTM_OTHER_FIRST_PACKET; 3496 } 3497 3498 s->creation = time_second; 3499 s->expire = time_second; 3500 3501 if (sn != NULL) { 3502 s->src_node = sn; 3503 s->src_node->states++; 3504 } 3505 if (nsn != NULL) { 3506 /* XXX We only modify one side for now. */ 3507 PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); 3508 s->nat_src_node = nsn; 3509 s->nat_src_node->states++; 3510 } 3511 if (pd->proto == IPPROTO_TCP) { 3512 if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, 3513 off, pd, th, &s->src, &s->dst)) { 3514 REASON_SET(&reason, PFRES_MEMORY); 3515 pf_src_tree_remove_state(s); 3516 STATE_DEC_COUNTERS(s); 3517 pool_put(&pf_state_pl, s); 3518 return (PF_DROP); 3519 } 3520 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && 3521 pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, 3522 &s->src, &s->dst, rewrite)) { 3523 /* This really shouldn't happen!!! */ 3524 DPFPRINTF(PF_DEBUG_URGENT, 3525 ("pf_normalize_tcp_stateful failed on first pkt")); 3526 pf_normalize_tcp_cleanup(s); 3527 pf_src_tree_remove_state(s); 3528 STATE_DEC_COUNTERS(s); 3529 pool_put(&pf_state_pl, s); 3530 return (PF_DROP); 3531 } 3532 } 3533 s->direction = pd->dir; 3534 3535 if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk, 3536 pd->src, pd->dst, sport, dport)) 3537 goto csfailed; 3538 3539 if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) { 3540 if (pd->proto == IPPROTO_TCP) 3541 pf_normalize_tcp_cleanup(s); 3542 REASON_SET(&reason, PFRES_STATEINS); 3543 pf_src_tree_remove_state(s); 3544 STATE_DEC_COUNTERS(s); 3545 pool_put(&pf_state_pl, s); 3546 return (PF_DROP); 3547 } else 3548 *sm = s; 3549 3550 pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ 3551 if (tag > 0) { 3552 pf_tag_ref(tag); 3553 s->tag = tag; 3554 } 3555 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 3556 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 3557 s->src.state = PF_TCPS_PROXY_SRC; 3558 /* undo NAT changes, if they have taken place */ 3559 if (nr != NULL) { 3560 PF_ACPY(pd->src, &sk->addr[pd->sidx], pd->af); 3561 PF_ACPY(pd->dst, &sk->addr[pd->didx], pd->af); 3562 if (pd->sport) 3563 *pd->sport = sk->port[pd->sidx]; 3564 if (pd->dport) 3565 *pd->dport = sk->port[pd->didx]; 3566 if (pd->proto_sum) 3567 *pd->proto_sum = bproto_sum; 3568 if (pd->ip_sum) 3569 *pd->ip_sum = bip_sum; 3570 m_copyback(m, off, hdrlen, pd->hdr.any); 3571 } 3572 s->src.seqhi = htonl(arc4random()); 3573 /* Find mss option */ 3574 mss = pf_get_mss(m, off, th->th_off, pd->af); 3575 mss = pf_calc_mss(pd->src, pd->af, mss); 3576 mss = pf_calc_mss(pd->dst, pd->af, mss); 3577 s->src.mss = mss; 3578 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 3579 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 3580 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); 3581 REASON_SET(&reason, PFRES_SYNPROXY); 3582 return (PF_SYNPROXY_DROP); 3583 } 3584 3585 return (PF_PASS); 3586 3587 csfailed: 3588 if (sn != NULL && sn->states == 0 && sn->expire == 0) { 3589 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 3590 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 3591 pf_status.src_nodes--; 3592 pool_put(&pf_src_tree_pl, sn); 3593 } 3594 if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { 3595 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); 3596 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 3597 pf_status.src_nodes--; 3598 pool_put(&pf_src_tree_pl, nsn); 3599 } 3600 return (PF_DROP); 3601 } 3602 3603 int 3604 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, 3605 struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, 3606 struct pf_ruleset **rsm) 3607 { 3608 struct pf_rule *r, *a = NULL; 3609 struct pf_ruleset *ruleset = NULL; 3610 sa_family_t af = pd->af; 3611 u_short reason; 3612 int tag = -1; 3613 int asd = 0; 3614 int match = 0; 3615 3616 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 3617 while (r != NULL) { 3618 r->evaluations++; 3619 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3620 r = r->skip[PF_SKIP_IFP].ptr; 3621 else if (r->direction && r->direction != direction) 3622 r = r->skip[PF_SKIP_DIR].ptr; 3623 else if (r->af && r->af != af) 3624 r = r->skip[PF_SKIP_AF].ptr; 3625 else if (r->proto && r->proto != pd->proto) 3626 r = r->skip[PF_SKIP_PROTO].ptr; 3627 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 3628 r->src.neg, kif)) 3629 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 3630 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 3631 r->dst.neg, NULL)) 3632 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3633 else if (r->tos && !(r->tos == pd->tos)) 3634 r = TAILQ_NEXT(r, entries); 3635 else if (r->os_fingerprint != PF_OSFP_ANY) 3636 r = TAILQ_NEXT(r, entries); 3637 else if (pd->proto == IPPROTO_UDP && 3638 (r->src.port_op || r->dst.port_op)) 3639 r = TAILQ_NEXT(r, entries); 3640 else if (pd->proto == IPPROTO_TCP && 3641 (r->src.port_op || r->dst.port_op || r->flagset)) 3642 r = TAILQ_NEXT(r, entries); 3643 else if ((pd->proto == IPPROTO_ICMP || 3644 pd->proto == IPPROTO_ICMPV6) && 3645 (r->type || r->code)) 3646 r = TAILQ_NEXT(r, entries); 3647 else if (r->prob && r->prob <= 3648 (arc4random() % (UINT_MAX - 1) + 1)) 3649 r = TAILQ_NEXT(r, entries); 3650 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3651 r = TAILQ_NEXT(r, entries); 3652 else { 3653 if (r->anchor == NULL) { 3654 match = 1; 3655 *rm = r; 3656 *am = a; 3657 *rsm = ruleset; 3658 if ((*rm)->quick) 3659 break; 3660 r = TAILQ_NEXT(r, entries); 3661 } else 3662 pf_step_into_anchor(&asd, &ruleset, 3663 PF_RULESET_FILTER, &r, &a, &match); 3664 } 3665 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3666 PF_RULESET_FILTER, &r, &a, &match)) 3667 break; 3668 } 3669 r = *rm; 3670 a = *am; 3671 ruleset = *rsm; 3672 3673 REASON_SET(&reason, PFRES_MATCH); 3674 3675 if (r->log) 3676 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, 3677 pd); 3678 3679 if (r->action != PF_PASS) 3680 return (PF_DROP); 3681 3682 if (pf_tag_packet(m, tag, -1)) { 3683 REASON_SET(&reason, PFRES_MEMORY); 3684 return (PF_DROP); 3685 } 3686 3687 return (PF_PASS); 3688 } 3689 3690 int 3691 pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, 3692 struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, 3693 struct pf_pdesc *pd, u_short *reason, int *copyback) 3694 { 3695 struct tcphdr *th = pd->hdr.tcp; 3696 u_int16_t win = ntohs(th->th_win); 3697 u_int32_t ack, end, seq, orig_seq; 3698 u_int8_t sws, dws; 3699 int ackskew; 3700 3701 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 3702 sws = src->wscale & PF_WSCALE_MASK; 3703 dws = dst->wscale & PF_WSCALE_MASK; 3704 } else 3705 sws = dws = 0; 3706 3707 /* 3708 * Sequence tracking algorithm from Guido van Rooij's paper: 3709 * http://www.madison-gurkha.com/publications/tcp_filtering/ 3710 * tcp_filtering.ps 3711 */ 3712 3713 orig_seq = seq = ntohl(th->th_seq); 3714 if (src->seqlo == 0) { 3715 /* First packet from this end. Set its state */ 3716 3717 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && 3718 src->scrub == NULL) { 3719 if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { 3720 REASON_SET(reason, PFRES_MEMORY); 3721 return (PF_DROP); 3722 } 3723 } 3724 3725 /* Deferred generation of sequence number modulator */ 3726 if (dst->seqdiff && !src->seqdiff) { 3727 /* use random iss for the TCP server */ 3728 while ((src->seqdiff = arc4random() - seq) == 0) 3729 ; 3730 ack = ntohl(th->th_ack) - dst->seqdiff; 3731 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 3732 src->seqdiff), 0); 3733 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 3734 *copyback = 1; 3735 } else { 3736 ack = ntohl(th->th_ack); 3737 } 3738 3739 end = seq + pd->p_len; 3740 if (th->th_flags & TH_SYN) { 3741 end++; 3742 if (dst->wscale & PF_WSCALE_FLAG) { 3743 src->wscale = pf_get_wscale(m, off, th->th_off, 3744 pd->af); 3745 if (src->wscale & PF_WSCALE_FLAG) { 3746 /* Remove scale factor from initial 3747 * window */ 3748 sws = src->wscale & PF_WSCALE_MASK; 3749 win = ((u_int32_t)win + (1 << sws) - 1) 3750 >> sws; 3751 dws = dst->wscale & PF_WSCALE_MASK; 3752 } else { 3753 /* fixup other window */ 3754 dst->max_win <<= dst->wscale & 3755 PF_WSCALE_MASK; 3756 /* in case of a retrans SYN|ACK */ 3757 dst->wscale = 0; 3758 } 3759 } 3760 } 3761 if (th->th_flags & TH_FIN) 3762 end++; 3763 3764 src->seqlo = seq; 3765 if (src->state < TCPS_SYN_SENT) 3766 src->state = TCPS_SYN_SENT; 3767 3768 /* 3769 * May need to slide the window (seqhi may have been set by 3770 * the crappy stack check or if we picked up the connection 3771 * after establishment) 3772 */ 3773 if (src->seqhi == 1 || 3774 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 3775 src->seqhi = end + MAX(1, dst->max_win << dws); 3776 if (win > src->max_win) 3777 src->max_win = win; 3778 3779 } else { 3780 ack = ntohl(th->th_ack) - dst->seqdiff; 3781 if (src->seqdiff) { 3782 /* Modulate sequence numbers */ 3783 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 3784 src->seqdiff), 0); 3785 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 3786 *copyback = 1; 3787 } 3788 end = seq + pd->p_len; 3789 if (th->th_flags & TH_SYN) 3790 end++; 3791 if (th->th_flags & TH_FIN) 3792 end++; 3793 } 3794 3795 if ((th->th_flags & TH_ACK) == 0) { 3796 /* Let it pass through the ack skew check */ 3797 ack = dst->seqlo; 3798 } else if ((ack == 0 && 3799 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 3800 /* broken tcp stacks do not set ack */ 3801 (dst->state < TCPS_SYN_SENT)) { 3802 /* 3803 * Many stacks (ours included) will set the ACK number in an 3804 * FIN|ACK if the SYN times out -- no sequence to ACK. 3805 */ 3806 ack = dst->seqlo; 3807 } 3808 3809 if (seq == end) { 3810 /* Ease sequencing restrictions on no data packets */ 3811 seq = src->seqlo; 3812 end = seq; 3813 } 3814 3815 ackskew = dst->seqlo - ack; 3816 3817 3818 /* 3819 * Need to demodulate the sequence numbers in any TCP SACK options 3820 * (Selective ACK). We could optionally validate the SACK values 3821 * against the current ACK window, either forwards or backwards, but 3822 * I'm not confident that SACK has been implemented properly 3823 * everywhere. It wouldn't surprise me if several stacks accidently 3824 * SACK too far backwards of previously ACKed data. There really aren't 3825 * any security implications of bad SACKing unless the target stack 3826 * doesn't validate the option length correctly. Someone trying to 3827 * spoof into a TCP connection won't bother blindly sending SACK 3828 * options anyway. 3829 */ 3830 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 3831 if (pf_modulate_sack(m, off, pd, th, dst)) 3832 *copyback = 1; 3833 } 3834 3835 3836 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 3837 if (SEQ_GEQ(src->seqhi, end) && 3838 /* Last octet inside other's window space */ 3839 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 3840 /* Retrans: not more than one window back */ 3841 (ackskew >= -MAXACKWINDOW) && 3842 /* Acking not more than one reassembled fragment backwards */ 3843 (ackskew <= (MAXACKWINDOW << sws)) && 3844 /* Acking not more than one window forward */ 3845 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 3846 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || 3847 (pd->flags & PFDESC_IP_REAS) == 0)) { 3848 /* Require an exact/+1 sequence match on resets when possible */ 3849 3850 if (dst->scrub || src->scrub) { 3851 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 3852 *state, src, dst, copyback)) 3853 return (PF_DROP); 3854 } 3855 3856 /* update max window */ 3857 if (src->max_win < win) 3858 src->max_win = win; 3859 /* synchronize sequencing */ 3860 if (SEQ_GT(end, src->seqlo)) 3861 src->seqlo = end; 3862 /* slide the window of what the other end can send */ 3863 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 3864 dst->seqhi = ack + MAX((win << sws), 1); 3865 3866 3867 /* update states */ 3868 if (th->th_flags & TH_SYN) 3869 if (src->state < TCPS_SYN_SENT) 3870 src->state = TCPS_SYN_SENT; 3871 if (th->th_flags & TH_FIN) 3872 if (src->state < TCPS_CLOSING) 3873 src->state = TCPS_CLOSING; 3874 if (th->th_flags & TH_ACK) { 3875 if (dst->state == TCPS_SYN_SENT) { 3876 dst->state = TCPS_ESTABLISHED; 3877 if (src->state == TCPS_ESTABLISHED && 3878 (*state)->src_node != NULL && 3879 pf_src_connlimit(state)) { 3880 REASON_SET(reason, PFRES_SRCLIMIT); 3881 return (PF_DROP); 3882 } 3883 } else if (dst->state == TCPS_CLOSING) 3884 dst->state = TCPS_FIN_WAIT_2; 3885 } 3886 if (th->th_flags & TH_RST) 3887 src->state = dst->state = TCPS_TIME_WAIT; 3888 3889 /* update expire time */ 3890 (*state)->expire = time_second; 3891 if (src->state >= TCPS_FIN_WAIT_2 && 3892 dst->state >= TCPS_FIN_WAIT_2) 3893 (*state)->timeout = PFTM_TCP_CLOSED; 3894 else if (src->state >= TCPS_CLOSING && 3895 dst->state >= TCPS_CLOSING) 3896 (*state)->timeout = PFTM_TCP_FIN_WAIT; 3897 else if (src->state < TCPS_ESTABLISHED || 3898 dst->state < TCPS_ESTABLISHED) 3899 (*state)->timeout = PFTM_TCP_OPENING; 3900 else if (src->state >= TCPS_CLOSING || 3901 dst->state >= TCPS_CLOSING) 3902 (*state)->timeout = PFTM_TCP_CLOSING; 3903 else 3904 (*state)->timeout = PFTM_TCP_ESTABLISHED; 3905 3906 /* Fall through to PASS packet */ 3907 3908 } else if ((dst->state < TCPS_SYN_SENT || 3909 dst->state >= TCPS_FIN_WAIT_2 || 3910 src->state >= TCPS_FIN_WAIT_2) && 3911 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && 3912 /* Within a window forward of the originating packet */ 3913 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 3914 /* Within a window backward of the originating packet */ 3915 3916 /* 3917 * This currently handles three situations: 3918 * 1) Stupid stacks will shotgun SYNs before their peer 3919 * replies. 3920 * 2) When PF catches an already established stream (the 3921 * firewall rebooted, the state table was flushed, routes 3922 * changed...) 3923 * 3) Packets get funky immediately after the connection 3924 * closes (this should catch Solaris spurious ACK|FINs 3925 * that web servers like to spew after a close) 3926 * 3927 * This must be a little more careful than the above code 3928 * since packet floods will also be caught here. We don't 3929 * update the TTL here to mitigate the damage of a packet 3930 * flood and so the same code can handle awkward establishment 3931 * and a loosened connection close. 3932 * In the establishment case, a correct peer response will 3933 * validate the connection, go through the normal state code 3934 * and keep updating the state TTL. 3935 */ 3936 3937 if (pf_status.debug >= PF_DEBUG_MISC) { 3938 printf("pf: loose state match: "); 3939 pf_print_state(*state); 3940 pf_print_flags(th->th_flags); 3941 printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 3942 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 3943 pd->p_len, ackskew, (*state)->packets[0], 3944 (*state)->packets[1], 3945 pd->dir == PF_IN ? "in" : "out", 3946 pd->dir == (*state)->direction ? "fwd" : "rev"); 3947 } 3948 3949 if (dst->scrub || src->scrub) { 3950 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 3951 *state, src, dst, copyback)) 3952 return (PF_DROP); 3953 } 3954 3955 /* update max window */ 3956 if (src->max_win < win) 3957 src->max_win = win; 3958 /* synchronize sequencing */ 3959 if (SEQ_GT(end, src->seqlo)) 3960 src->seqlo = end; 3961 /* slide the window of what the other end can send */ 3962 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 3963 dst->seqhi = ack + MAX((win << sws), 1); 3964 3965 /* 3966 * Cannot set dst->seqhi here since this could be a shotgunned 3967 * SYN and not an already established connection. 3968 */ 3969 3970 if (th->th_flags & TH_FIN) 3971 if (src->state < TCPS_CLOSING) 3972 src->state = TCPS_CLOSING; 3973 if (th->th_flags & TH_RST) 3974 src->state = dst->state = TCPS_TIME_WAIT; 3975 3976 /* Fall through to PASS packet */ 3977 3978 } else { 3979 if ((*state)->dst.state == TCPS_SYN_SENT && 3980 (*state)->src.state == TCPS_SYN_SENT) { 3981 /* Send RST for state mismatches during handshake */ 3982 if (!(th->th_flags & TH_RST)) 3983 pf_send_tcp((*state)->rule.ptr, pd->af, 3984 pd->dst, pd->src, th->th_dport, 3985 th->th_sport, ntohl(th->th_ack), 0, 3986 TH_RST, 0, 0, 3987 (*state)->rule.ptr->return_ttl, 1, 0, 3988 pd->eh, kif->pfik_ifp); 3989 src->seqlo = 0; 3990 src->seqhi = 1; 3991 src->max_win = 1; 3992 } else if (pf_status.debug >= PF_DEBUG_MISC) { 3993 printf("pf: BAD state: "); 3994 pf_print_state(*state); 3995 pf_print_flags(th->th_flags); 3996 printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 3997 "pkts=%llu:%llu dir=%s,%s\n", 3998 seq, orig_seq, ack, pd->p_len, ackskew, 3999 (*state)->packets[0], (*state)->packets[1], 4000 pd->dir == PF_IN ? "in" : "out", 4001 pd->dir == (*state)->direction ? "fwd" : "rev"); 4002 printf("pf: State failure on: %c %c %c %c | %c %c\n", 4003 SEQ_GEQ(src->seqhi, end) ? ' ' : '1', 4004 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4005 ' ': '2', 4006 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4007 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4008 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', 4009 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4010 } 4011 REASON_SET(reason, PFRES_BADSTATE); 4012 return (PF_DROP); 4013 } 4014 4015 return (PF_PASS); 4016 } 4017 4018 int 4019 pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, 4020 struct pf_state **state, struct pf_pdesc *pd, u_short *reason) 4021 { 4022 struct tcphdr *th = pd->hdr.tcp; 4023 4024 if (th->th_flags & TH_SYN) 4025 if (src->state < TCPS_SYN_SENT) 4026 src->state = TCPS_SYN_SENT; 4027 if (th->th_flags & TH_FIN) 4028 if (src->state < TCPS_CLOSING) 4029 src->state = TCPS_CLOSING; 4030 if (th->th_flags & TH_ACK) { 4031 if (dst->state == TCPS_SYN_SENT) { 4032 dst->state = TCPS_ESTABLISHED; 4033 if (src->state == TCPS_ESTABLISHED && 4034 (*state)->src_node != NULL && 4035 pf_src_connlimit(state)) { 4036 REASON_SET(reason, PFRES_SRCLIMIT); 4037 return (PF_DROP); 4038 } 4039 } else if (dst->state == TCPS_CLOSING) { 4040 dst->state = TCPS_FIN_WAIT_2; 4041 } else if (src->state == TCPS_SYN_SENT && 4042 dst->state < TCPS_SYN_SENT) { 4043 /* 4044 * Handle a special sloppy case where we only see one 4045 * half of the connection. If there is a ACK after 4046 * the initial SYN without ever seeing a packet from 4047 * the destination, set the connection to established. 4048 */ 4049 dst->state = src->state = TCPS_ESTABLISHED; 4050 if ((*state)->src_node != NULL && 4051 pf_src_connlimit(state)) { 4052 REASON_SET(reason, PFRES_SRCLIMIT); 4053 return (PF_DROP); 4054 } 4055 } else if (src->state == TCPS_CLOSING && 4056 dst->state == TCPS_ESTABLISHED && 4057 dst->seqlo == 0) { 4058 /* 4059 * Handle the closing of half connections where we 4060 * don't see the full bidirectional FIN/ACK+ACK 4061 * handshake. 4062 */ 4063 dst->state = TCPS_CLOSING; 4064 } 4065 } 4066 if (th->th_flags & TH_RST) 4067 src->state = dst->state = TCPS_TIME_WAIT; 4068 4069 /* update expire time */ 4070 (*state)->expire = time_second; 4071 if (src->state >= TCPS_FIN_WAIT_2 && 4072 dst->state >= TCPS_FIN_WAIT_2) 4073 (*state)->timeout = PFTM_TCP_CLOSED; 4074 else if (src->state >= TCPS_CLOSING && 4075 dst->state >= TCPS_CLOSING) 4076 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4077 else if (src->state < TCPS_ESTABLISHED || 4078 dst->state < TCPS_ESTABLISHED) 4079 (*state)->timeout = PFTM_TCP_OPENING; 4080 else if (src->state >= TCPS_CLOSING || 4081 dst->state >= TCPS_CLOSING) 4082 (*state)->timeout = PFTM_TCP_CLOSING; 4083 else 4084 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4085 4086 return (PF_PASS); 4087 } 4088 4089 int 4090 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, 4091 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, 4092 u_short *reason) 4093 { 4094 struct pf_state_key_cmp key; 4095 struct tcphdr *th = pd->hdr.tcp; 4096 int copyback = 0; 4097 struct pf_state_peer *src, *dst; 4098 struct pf_state_key *sk; 4099 4100 key.af = pd->af; 4101 key.proto = IPPROTO_TCP; 4102 if (direction == PF_IN) { /* wire side, straight */ 4103 PF_ACPY(&key.addr[0], pd->src, key.af); 4104 PF_ACPY(&key.addr[1], pd->dst, key.af); 4105 key.port[0] = th->th_sport; 4106 key.port[1] = th->th_dport; 4107 } else { /* stack side, reverse */ 4108 PF_ACPY(&key.addr[1], pd->src, key.af); 4109 PF_ACPY(&key.addr[0], pd->dst, key.af); 4110 key.port[1] = th->th_sport; 4111 key.port[0] = th->th_dport; 4112 } 4113 4114 STATE_LOOKUP(kif, &key, direction, *state, m); 4115 4116 if (direction == (*state)->direction) { 4117 src = &(*state)->src; 4118 dst = &(*state)->dst; 4119 } else { 4120 src = &(*state)->dst; 4121 dst = &(*state)->src; 4122 } 4123 4124 sk = (*state)->key[pd->didx]; 4125 4126 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4127 if (direction != (*state)->direction) { 4128 REASON_SET(reason, PFRES_SYNPROXY); 4129 return (PF_SYNPROXY_DROP); 4130 } 4131 if (th->th_flags & TH_SYN) { 4132 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4133 REASON_SET(reason, PFRES_SYNPROXY); 4134 return (PF_DROP); 4135 } 4136 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4137 pd->src, th->th_dport, th->th_sport, 4138 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4139 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4140 0, NULL, NULL); 4141 REASON_SET(reason, PFRES_SYNPROXY); 4142 return (PF_SYNPROXY_DROP); 4143 } else if (!(th->th_flags & TH_ACK) || 4144 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4145 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4146 REASON_SET(reason, PFRES_SYNPROXY); 4147 return (PF_DROP); 4148 } else if ((*state)->src_node != NULL && 4149 pf_src_connlimit(state)) { 4150 REASON_SET(reason, PFRES_SRCLIMIT); 4151 return (PF_DROP); 4152 } else 4153 (*state)->src.state = PF_TCPS_PROXY_DST; 4154 } 4155 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4156 if (direction == (*state)->direction) { 4157 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4158 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4159 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4160 REASON_SET(reason, PFRES_SYNPROXY); 4161 return (PF_DROP); 4162 } 4163 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4164 if ((*state)->dst.seqhi == 1) 4165 (*state)->dst.seqhi = htonl(arc4random()); 4166 pf_send_tcp((*state)->rule.ptr, pd->af, 4167 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4168 sk->port[pd->sidx], sk->port[pd->didx], 4169 (*state)->dst.seqhi, 0, TH_SYN, 0, 4170 (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); 4171 REASON_SET(reason, PFRES_SYNPROXY); 4172 return (PF_SYNPROXY_DROP); 4173 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4174 (TH_SYN|TH_ACK)) || 4175 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4176 REASON_SET(reason, PFRES_SYNPROXY); 4177 return (PF_DROP); 4178 } else { 4179 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4180 (*state)->dst.seqlo = ntohl(th->th_seq); 4181 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4182 pd->src, th->th_dport, th->th_sport, 4183 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4184 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4185 (*state)->tag, NULL, NULL); 4186 pf_send_tcp((*state)->rule.ptr, pd->af, 4187 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4188 sk->port[pd->sidx], sk->port[pd->didx], 4189 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4190 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4191 0, NULL, NULL); 4192 (*state)->src.seqdiff = (*state)->dst.seqhi - 4193 (*state)->src.seqlo; 4194 (*state)->dst.seqdiff = (*state)->src.seqhi - 4195 (*state)->dst.seqlo; 4196 (*state)->src.seqhi = (*state)->src.seqlo + 4197 (*state)->dst.max_win; 4198 (*state)->dst.seqhi = (*state)->dst.seqlo + 4199 (*state)->src.max_win; 4200 (*state)->src.wscale = (*state)->dst.wscale = 0; 4201 (*state)->src.state = (*state)->dst.state = 4202 TCPS_ESTABLISHED; 4203 REASON_SET(reason, PFRES_SYNPROXY); 4204 return (PF_SYNPROXY_DROP); 4205 } 4206 } 4207 4208 if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && 4209 dst->state >= TCPS_FIN_WAIT_2 && 4210 src->state >= TCPS_FIN_WAIT_2) { 4211 if (pf_status.debug >= PF_DEBUG_MISC) { 4212 printf("pf: state reuse "); 4213 pf_print_state(*state); 4214 pf_print_flags(th->th_flags); 4215 printf("\n"); 4216 } 4217 /* XXX make sure it's the same direction ?? */ 4218 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 4219 pf_unlink_state(*state); 4220 *state = NULL; 4221 return (PF_DROP); 4222 } 4223 4224 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4225 if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP) 4226 return (PF_DROP); 4227 } else { 4228 if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason, 4229 ©back) == PF_DROP) 4230 return (PF_DROP); 4231 } 4232 4233 /* translate source/destination address, if necessary */ 4234 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4235 struct pf_state_key *nk = (*state)->key[pd->didx]; 4236 4237 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 4238 nk->port[pd->sidx] != th->th_sport) 4239 pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, 4240 &th->th_sum, &nk->addr[pd->sidx], 4241 nk->port[pd->sidx], 0, pd->af); 4242 4243 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 4244 nk->port[pd->didx] != th->th_dport) 4245 pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, 4246 &th->th_sum, &nk->addr[pd->didx], 4247 nk->port[pd->didx], 0, pd->af); 4248 copyback = 1; 4249 } 4250 4251 /* Copyback sequence modulation or stateful scrub changes if needed */ 4252 if (copyback) 4253 m_copyback(m, off, sizeof(*th), th); 4254 4255 return (PF_PASS); 4256 } 4257 4258 int 4259 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, 4260 struct mbuf *m, int off, void *h, struct pf_pdesc *pd) 4261 { 4262 struct pf_state_peer *src, *dst; 4263 struct pf_state_key_cmp key; 4264 struct udphdr *uh = pd->hdr.udp; 4265 4266 key.af = pd->af; 4267 key.proto = IPPROTO_UDP; 4268 if (direction == PF_IN) { /* wire side, straight */ 4269 PF_ACPY(&key.addr[0], pd->src, key.af); 4270 PF_ACPY(&key.addr[1], pd->dst, key.af); 4271 key.port[0] = uh->uh_sport; 4272 key.port[1] = uh->uh_dport; 4273 } else { /* stack side, reverse */ 4274 PF_ACPY(&key.addr[1], pd->src, key.af); 4275 PF_ACPY(&key.addr[0], pd->dst, key.af); 4276 key.port[1] = uh->uh_sport; 4277 key.port[0] = uh->uh_dport; 4278 } 4279 4280 STATE_LOOKUP(kif, &key, direction, *state, m); 4281 4282 if (direction == (*state)->direction) { 4283 src = &(*state)->src; 4284 dst = &(*state)->dst; 4285 } else { 4286 src = &(*state)->dst; 4287 dst = &(*state)->src; 4288 } 4289 4290 /* update states */ 4291 if (src->state < PFUDPS_SINGLE) 4292 src->state = PFUDPS_SINGLE; 4293 if (dst->state == PFUDPS_SINGLE) 4294 dst->state = PFUDPS_MULTIPLE; 4295 4296 /* update expire time */ 4297 (*state)->expire = time_second; 4298 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) 4299 (*state)->timeout = PFTM_UDP_MULTIPLE; 4300 else 4301 (*state)->timeout = PFTM_UDP_SINGLE; 4302 4303 /* translate source/destination address, if necessary */ 4304 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4305 struct pf_state_key *nk = (*state)->key[pd->didx]; 4306 4307 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 4308 nk->port[pd->sidx] != uh->uh_sport) 4309 pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, 4310 &uh->uh_sum, &nk->addr[pd->sidx], 4311 nk->port[pd->sidx], 1, pd->af); 4312 4313 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 4314 nk->port[pd->didx] != uh->uh_dport) 4315 pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, 4316 &uh->uh_sum, &nk->addr[pd->didx], 4317 nk->port[pd->didx], 1, pd->af); 4318 m_copyback(m, off, sizeof(*uh), uh); 4319 } 4320 4321 return (PF_PASS); 4322 } 4323 4324 int 4325 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, 4326 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) 4327 { 4328 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 4329 u_int16_t icmpid, *icmpsum; 4330 u_int8_t icmptype; 4331 int state_icmp = 0; 4332 struct pf_state_key_cmp key; 4333 4334 switch (pd->proto) { 4335 #ifdef INET 4336 case IPPROTO_ICMP: 4337 icmptype = pd->hdr.icmp->icmp_type; 4338 icmpid = pd->hdr.icmp->icmp_id; 4339 icmpsum = &pd->hdr.icmp->icmp_cksum; 4340 4341 if (icmptype == ICMP_UNREACH || 4342 icmptype == ICMP_SOURCEQUENCH || 4343 icmptype == ICMP_REDIRECT || 4344 icmptype == ICMP_TIMXCEED || 4345 icmptype == ICMP_PARAMPROB) 4346 state_icmp++; 4347 break; 4348 #endif /* INET */ 4349 #ifdef INET6 4350 case IPPROTO_ICMPV6: 4351 icmptype = pd->hdr.icmp6->icmp6_type; 4352 icmpid = pd->hdr.icmp6->icmp6_id; 4353 icmpsum = &pd->hdr.icmp6->icmp6_cksum; 4354 4355 if (icmptype == ICMP6_DST_UNREACH || 4356 icmptype == ICMP6_PACKET_TOO_BIG || 4357 icmptype == ICMP6_TIME_EXCEEDED || 4358 icmptype == ICMP6_PARAM_PROB) 4359 state_icmp++; 4360 break; 4361 #endif /* INET6 */ 4362 } 4363 4364 if (!state_icmp) { 4365 4366 /* 4367 * ICMP query/reply message not related to a TCP/UDP packet. 4368 * Search for an ICMP state. 4369 */ 4370 key.af = pd->af; 4371 key.proto = pd->proto; 4372 key.port[0] = key.port[1] = icmpid; 4373 if (direction == PF_IN) { /* wire side, straight */ 4374 PF_ACPY(&key.addr[0], pd->src, key.af); 4375 PF_ACPY(&key.addr[1], pd->dst, key.af); 4376 } else { /* stack side, reverse */ 4377 PF_ACPY(&key.addr[1], pd->src, key.af); 4378 PF_ACPY(&key.addr[0], pd->dst, key.af); 4379 } 4380 4381 STATE_LOOKUP(kif, &key, direction, *state, m); 4382 4383 (*state)->expire = time_second; 4384 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 4385 4386 /* translate source/destination address, if necessary */ 4387 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4388 struct pf_state_key *nk = (*state)->key[pd->didx]; 4389 4390 switch (pd->af) { 4391 #ifdef INET 4392 case AF_INET: 4393 if (PF_ANEQ(pd->src, 4394 &nk->addr[pd->sidx], AF_INET)) 4395 pf_change_a(&saddr->v4.s_addr, 4396 pd->ip_sum, 4397 nk->addr[pd->sidx].v4.s_addr, 0); 4398 4399 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], 4400 AF_INET)) 4401 pf_change_a(&daddr->v4.s_addr, 4402 pd->ip_sum, 4403 nk->addr[pd->didx].v4.s_addr, 0); 4404 4405 if (nk->port[0] != 4406 pd->hdr.icmp->icmp_id) { 4407 pd->hdr.icmp->icmp_cksum = 4408 pf_cksum_fixup( 4409 pd->hdr.icmp->icmp_cksum, icmpid, 4410 nk->port[pd->sidx], 0); 4411 pd->hdr.icmp->icmp_id = 4412 nk->port[pd->sidx]; 4413 } 4414 4415 m_copyback(m, off, ICMP_MINLEN, 4416 pd->hdr.icmp); 4417 break; 4418 #endif /* INET */ 4419 #ifdef INET6 4420 case AF_INET6: 4421 if (PF_ANEQ(pd->src, 4422 &nk->addr[pd->sidx], AF_INET6)) 4423 pf_change_a6(saddr, 4424 &pd->hdr.icmp6->icmp6_cksum, 4425 &nk->addr[pd->sidx], 0); 4426 4427 if (PF_ANEQ(pd->dst, 4428 &nk->addr[pd->didx], AF_INET6)) 4429 pf_change_a6(daddr, 4430 &pd->hdr.icmp6->icmp6_cksum, 4431 &nk->addr[pd->didx], 0); 4432 4433 m_copyback(m, off, 4434 sizeof(struct icmp6_hdr), 4435 pd->hdr.icmp6); 4436 break; 4437 #endif /* INET6 */ 4438 } 4439 } 4440 return (PF_PASS); 4441 4442 } else { 4443 /* 4444 * ICMP error message in response to a TCP/UDP packet. 4445 * Extract the inner TCP/UDP header and search for that state. 4446 */ 4447 4448 struct pf_pdesc pd2; 4449 #ifdef INET 4450 struct ip h2; 4451 #endif /* INET */ 4452 #ifdef INET6 4453 struct ip6_hdr h2_6; 4454 int terminal = 0; 4455 #endif /* INET6 */ 4456 int ipoff2; 4457 int off2; 4458 4459 pd2.af = pd->af; 4460 /* Payload packet is from the opposite direction. */ 4461 pd2.sidx = (direction == PF_IN) ? 1 : 0; 4462 pd2.didx = (direction == PF_IN) ? 0 : 1; 4463 switch (pd->af) { 4464 #ifdef INET 4465 case AF_INET: 4466 /* offset of h2 in mbuf chain */ 4467 ipoff2 = off + ICMP_MINLEN; 4468 4469 if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), 4470 NULL, reason, pd2.af)) { 4471 DPFPRINTF(PF_DEBUG_MISC, 4472 ("pf: ICMP error message too short " 4473 "(ip)\n")); 4474 return (PF_DROP); 4475 } 4476 /* 4477 * ICMP error messages don't refer to non-first 4478 * fragments 4479 */ 4480 if (h2.ip_off & htons(IP_OFFMASK)) { 4481 REASON_SET(reason, PFRES_FRAG); 4482 return (PF_DROP); 4483 } 4484 4485 /* offset of protocol header that follows h2 */ 4486 off2 = ipoff2 + (h2.ip_hl << 2); 4487 4488 pd2.proto = h2.ip_p; 4489 pd2.src = (struct pf_addr *)&h2.ip_src; 4490 pd2.dst = (struct pf_addr *)&h2.ip_dst; 4491 pd2.ip_sum = &h2.ip_sum; 4492 break; 4493 #endif /* INET */ 4494 #ifdef INET6 4495 case AF_INET6: 4496 ipoff2 = off + sizeof(struct icmp6_hdr); 4497 4498 if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), 4499 NULL, reason, pd2.af)) { 4500 DPFPRINTF(PF_DEBUG_MISC, 4501 ("pf: ICMP error message too short " 4502 "(ip6)\n")); 4503 return (PF_DROP); 4504 } 4505 pd2.proto = h2_6.ip6_nxt; 4506 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 4507 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 4508 pd2.ip_sum = NULL; 4509 off2 = ipoff2 + sizeof(h2_6); 4510 do { 4511 switch (pd2.proto) { 4512 case IPPROTO_FRAGMENT: 4513 /* 4514 * ICMPv6 error messages for 4515 * non-first fragments 4516 */ 4517 REASON_SET(reason, PFRES_FRAG); 4518 return (PF_DROP); 4519 case IPPROTO_AH: 4520 case IPPROTO_HOPOPTS: 4521 case IPPROTO_ROUTING: 4522 case IPPROTO_DSTOPTS: { 4523 /* get next header and header length */ 4524 struct ip6_ext opt6; 4525 4526 if (!pf_pull_hdr(m, off2, &opt6, 4527 sizeof(opt6), NULL, reason, 4528 pd2.af)) { 4529 DPFPRINTF(PF_DEBUG_MISC, 4530 ("pf: ICMPv6 short opt\n")); 4531 return (PF_DROP); 4532 } 4533 if (pd2.proto == IPPROTO_AH) 4534 off2 += (opt6.ip6e_len + 2) * 4; 4535 else 4536 off2 += (opt6.ip6e_len + 1) * 8; 4537 pd2.proto = opt6.ip6e_nxt; 4538 /* goto the next header */ 4539 break; 4540 } 4541 default: 4542 terminal++; 4543 break; 4544 } 4545 } while (!terminal); 4546 break; 4547 #endif /* INET6 */ 4548 } 4549 4550 switch (pd2.proto) { 4551 case IPPROTO_TCP: { 4552 struct tcphdr th; 4553 u_int32_t seq; 4554 struct pf_state_peer *src, *dst; 4555 u_int8_t dws; 4556 int copyback = 0; 4557 4558 /* 4559 * Only the first 8 bytes of the TCP header can be 4560 * expected. Don't access any TCP header fields after 4561 * th_seq, an ackskew test is not possible. 4562 */ 4563 if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, 4564 pd2.af)) { 4565 DPFPRINTF(PF_DEBUG_MISC, 4566 ("pf: ICMP error message too short " 4567 "(tcp)\n")); 4568 return (PF_DROP); 4569 } 4570 4571 key.af = pd2.af; 4572 key.proto = IPPROTO_TCP; 4573 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4574 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4575 key.port[pd2.sidx] = th.th_sport; 4576 key.port[pd2.didx] = th.th_dport; 4577 4578 STATE_LOOKUP(kif, &key, direction, *state, m); 4579 4580 if (direction == (*state)->direction) { 4581 src = &(*state)->dst; 4582 dst = &(*state)->src; 4583 } else { 4584 src = &(*state)->src; 4585 dst = &(*state)->dst; 4586 } 4587 4588 if (src->wscale && dst->wscale) 4589 dws = dst->wscale & PF_WSCALE_MASK; 4590 else 4591 dws = 0; 4592 4593 /* Demodulate sequence number */ 4594 seq = ntohl(th.th_seq) - src->seqdiff; 4595 if (src->seqdiff) { 4596 pf_change_a(&th.th_seq, icmpsum, 4597 htonl(seq), 0); 4598 copyback = 1; 4599 } 4600 4601 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 4602 (!SEQ_GEQ(src->seqhi, seq) || 4603 !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { 4604 if (pf_status.debug >= PF_DEBUG_MISC) { 4605 printf("pf: BAD ICMP %d:%d ", 4606 icmptype, pd->hdr.icmp->icmp_code); 4607 pf_print_host(pd->src, 0, pd->af); 4608 printf(" -> "); 4609 pf_print_host(pd->dst, 0, pd->af); 4610 printf(" state: "); 4611 pf_print_state(*state); 4612 printf(" seq=%u\n", seq); 4613 } 4614 REASON_SET(reason, PFRES_BADSTATE); 4615 return (PF_DROP); 4616 } else { 4617 if (pf_status.debug >= PF_DEBUG_MISC) { 4618 printf("pf: OK ICMP %d:%d ", 4619 icmptype, pd->hdr.icmp->icmp_code); 4620 pf_print_host(pd->src, 0, pd->af); 4621 printf(" -> "); 4622 pf_print_host(pd->dst, 0, pd->af); 4623 printf(" state: "); 4624 pf_print_state(*state); 4625 printf(" seq=%u\n", seq); 4626 } 4627 } 4628 4629 /* translate source/destination address, if necessary */ 4630 if ((*state)->key[PF_SK_WIRE] != 4631 (*state)->key[PF_SK_STACK]) { 4632 struct pf_state_key *nk = 4633 (*state)->key[pd->didx]; 4634 4635 if (PF_ANEQ(pd2.src, 4636 &nk->addr[pd2.sidx], pd2.af) || 4637 nk->port[pd2.sidx] != th.th_sport) 4638 pf_change_icmp(pd2.src, &th.th_sport, 4639 daddr, &nk->addr[pd2.sidx], 4640 nk->port[pd2.sidx], NULL, 4641 pd2.ip_sum, icmpsum, 4642 pd->ip_sum, 0, pd2.af); 4643 4644 if (PF_ANEQ(pd2.dst, 4645 &nk->addr[pd2.didx], pd2.af) || 4646 nk->port[pd2.didx] != th.th_dport) 4647 pf_change_icmp(pd2.dst, &th.th_dport, 4648 NULL, /* XXX Inbound NAT? */ 4649 &nk->addr[pd2.didx], 4650 nk->port[pd2.didx], NULL, 4651 pd2.ip_sum, icmpsum, 4652 pd->ip_sum, 0, pd2.af); 4653 copyback = 1; 4654 } 4655 4656 if (copyback) { 4657 switch (pd2.af) { 4658 #ifdef INET 4659 case AF_INET: 4660 m_copyback(m, off, ICMP_MINLEN, 4661 pd->hdr.icmp); 4662 m_copyback(m, ipoff2, sizeof(h2), 4663 &h2); 4664 break; 4665 #endif /* INET */ 4666 #ifdef INET6 4667 case AF_INET6: 4668 m_copyback(m, off, 4669 sizeof(struct icmp6_hdr), 4670 pd->hdr.icmp6); 4671 m_copyback(m, ipoff2, sizeof(h2_6), 4672 &h2_6); 4673 break; 4674 #endif /* INET6 */ 4675 } 4676 m_copyback(m, off2, 8, &th); 4677 } 4678 4679 return (PF_PASS); 4680 break; 4681 } 4682 case IPPROTO_UDP: { 4683 struct udphdr uh; 4684 4685 if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), 4686 NULL, reason, pd2.af)) { 4687 DPFPRINTF(PF_DEBUG_MISC, 4688 ("pf: ICMP error message too short " 4689 "(udp)\n")); 4690 return (PF_DROP); 4691 } 4692 4693 key.af = pd2.af; 4694 key.proto = IPPROTO_UDP; 4695 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4696 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4697 key.port[pd2.sidx] = uh.uh_sport; 4698 key.port[pd2.didx] = uh.uh_dport; 4699 4700 STATE_LOOKUP(kif, &key, direction, *state, m); 4701 4702 /* translate source/destination address, if necessary */ 4703 if ((*state)->key[PF_SK_WIRE] != 4704 (*state)->key[PF_SK_STACK]) { 4705 struct pf_state_key *nk = 4706 (*state)->key[pd->didx]; 4707 4708 if (PF_ANEQ(pd2.src, 4709 &nk->addr[pd2.sidx], pd2.af) || 4710 nk->port[pd2.sidx] != uh.uh_sport) 4711 pf_change_icmp(pd2.src, &uh.uh_sport, 4712 daddr, &nk->addr[pd2.sidx], 4713 nk->port[pd2.sidx], &uh.uh_sum, 4714 pd2.ip_sum, icmpsum, 4715 pd->ip_sum, 1, pd2.af); 4716 4717 if (PF_ANEQ(pd2.dst, 4718 &nk->addr[pd2.didx], pd2.af) || 4719 nk->port[pd2.didx] != uh.uh_dport) 4720 pf_change_icmp(pd2.dst, &uh.uh_dport, 4721 NULL, /* XXX Inbound NAT? */ 4722 &nk->addr[pd2.didx], 4723 nk->port[pd2.didx], &uh.uh_sum, 4724 pd2.ip_sum, icmpsum, 4725 pd->ip_sum, 1, pd2.af); 4726 4727 switch (pd2.af) { 4728 #ifdef INET 4729 case AF_INET: 4730 m_copyback(m, off, ICMP_MINLEN, 4731 pd->hdr.icmp); 4732 m_copyback(m, ipoff2, sizeof(h2), &h2); 4733 break; 4734 #endif /* INET */ 4735 #ifdef INET6 4736 case AF_INET6: 4737 m_copyback(m, off, 4738 sizeof(struct icmp6_hdr), 4739 pd->hdr.icmp6); 4740 m_copyback(m, ipoff2, sizeof(h2_6), 4741 &h2_6); 4742 break; 4743 #endif /* INET6 */ 4744 } 4745 m_copyback(m, off2, sizeof(uh), &uh); 4746 } 4747 return (PF_PASS); 4748 break; 4749 } 4750 #ifdef INET 4751 case IPPROTO_ICMP: { 4752 struct icmp iih; 4753 4754 if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, 4755 NULL, reason, pd2.af)) { 4756 DPFPRINTF(PF_DEBUG_MISC, 4757 ("pf: ICMP error message too short i" 4758 "(icmp)\n")); 4759 return (PF_DROP); 4760 } 4761 4762 key.af = pd2.af; 4763 key.proto = IPPROTO_ICMP; 4764 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4765 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4766 key.port[0] = key.port[1] = iih.icmp_id; 4767 4768 STATE_LOOKUP(kif, &key, direction, *state, m); 4769 4770 /* translate source/destination address, if necessary */ 4771 if ((*state)->key[PF_SK_WIRE] != 4772 (*state)->key[PF_SK_STACK]) { 4773 struct pf_state_key *nk = 4774 (*state)->key[pd->didx]; 4775 4776 if (PF_ANEQ(pd2.src, 4777 &nk->addr[pd2.sidx], pd2.af) || 4778 nk->port[pd2.sidx] != iih.icmp_id) 4779 pf_change_icmp(pd2.src, &iih.icmp_id, 4780 daddr, &nk->addr[pd2.sidx], 4781 nk->port[pd2.sidx], NULL, 4782 pd2.ip_sum, icmpsum, 4783 pd->ip_sum, 0, AF_INET); 4784 4785 if (PF_ANEQ(pd2.dst, 4786 &nk->addr[pd2.didx], pd2.af) || 4787 nk->port[pd2.didx] != iih.icmp_id) 4788 pf_change_icmp(pd2.dst, &iih.icmp_id, 4789 NULL, /* XXX Inbound NAT? */ 4790 &nk->addr[pd2.didx], 4791 nk->port[pd2.didx], NULL, 4792 pd2.ip_sum, icmpsum, 4793 pd->ip_sum, 0, AF_INET); 4794 4795 m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); 4796 m_copyback(m, ipoff2, sizeof(h2), &h2); 4797 m_copyback(m, off2, ICMP_MINLEN, &iih); 4798 } 4799 return (PF_PASS); 4800 break; 4801 } 4802 #endif /* INET */ 4803 #ifdef INET6 4804 case IPPROTO_ICMPV6: { 4805 struct icmp6_hdr iih; 4806 4807 if (!pf_pull_hdr(m, off2, &iih, 4808 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 4809 DPFPRINTF(PF_DEBUG_MISC, 4810 ("pf: ICMP error message too short " 4811 "(icmp6)\n")); 4812 return (PF_DROP); 4813 } 4814 4815 key.af = pd2.af; 4816 key.proto = IPPROTO_ICMPV6; 4817 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4818 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4819 key.port[0] = key.port[1] = iih.icmp6_id; 4820 4821 STATE_LOOKUP(kif, &key, direction, *state, m); 4822 4823 /* translate source/destination address, if necessary */ 4824 if ((*state)->key[PF_SK_WIRE] != 4825 (*state)->key[PF_SK_STACK]) { 4826 struct pf_state_key *nk = 4827 (*state)->key[pd->didx]; 4828 4829 if (PF_ANEQ(pd2.src, 4830 &nk->addr[pd2.sidx], pd2.af) || 4831 nk->port[pd2.sidx] != iih.icmp6_id) 4832 pf_change_icmp(pd2.src, &iih.icmp6_id, 4833 daddr, &nk->addr[pd2.sidx], 4834 nk->port[pd2.sidx], NULL, 4835 pd2.ip_sum, icmpsum, 4836 pd->ip_sum, 0, AF_INET6); 4837 4838 if (PF_ANEQ(pd2.dst, 4839 &nk->addr[pd2.didx], pd2.af) || 4840 nk->port[pd2.didx] != iih.icmp6_id) 4841 pf_change_icmp(pd2.dst, &iih.icmp6_id, 4842 NULL, /* XXX Inbound NAT? */ 4843 &nk->addr[pd2.didx], 4844 nk->port[pd2.didx], NULL, 4845 pd2.ip_sum, icmpsum, 4846 pd->ip_sum, 0, AF_INET6); 4847 4848 m_copyback(m, off, sizeof(struct icmp6_hdr), 4849 pd->hdr.icmp6); 4850 m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); 4851 m_copyback(m, off2, sizeof(struct icmp6_hdr), 4852 &iih); 4853 } 4854 return (PF_PASS); 4855 break; 4856 } 4857 #endif /* INET6 */ 4858 default: { 4859 key.af = pd2.af; 4860 key.proto = pd2.proto; 4861 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4862 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4863 key.port[0] = key.port[1] = 0; 4864 4865 STATE_LOOKUP(kif, &key, direction, *state, m); 4866 4867 /* translate source/destination address, if necessary */ 4868 if ((*state)->key[PF_SK_WIRE] != 4869 (*state)->key[PF_SK_STACK]) { 4870 struct pf_state_key *nk = 4871 (*state)->key[pd->didx]; 4872 4873 if (PF_ANEQ(pd2.src, 4874 &nk->addr[pd2.sidx], pd2.af)) 4875 pf_change_icmp(pd2.src, NULL, daddr, 4876 &nk->addr[pd2.sidx], 0, NULL, 4877 pd2.ip_sum, icmpsum, 4878 pd->ip_sum, 0, pd2.af); 4879 4880 if (PF_ANEQ(pd2.dst, 4881 &nk->addr[pd2.didx], pd2.af)) 4882 pf_change_icmp(pd2.src, NULL, 4883 NULL, /* XXX Inbound NAT? */ 4884 &nk->addr[pd2.didx], 0, NULL, 4885 pd2.ip_sum, icmpsum, 4886 pd->ip_sum, 0, pd2.af); 4887 4888 switch (pd2.af) { 4889 #ifdef INET 4890 case AF_INET: 4891 m_copyback(m, off, ICMP_MINLEN, 4892 pd->hdr.icmp); 4893 m_copyback(m, ipoff2, sizeof(h2), &h2); 4894 break; 4895 #endif /* INET */ 4896 #ifdef INET6 4897 case AF_INET6: 4898 m_copyback(m, off, 4899 sizeof(struct icmp6_hdr), 4900 pd->hdr.icmp6); 4901 m_copyback(m, ipoff2, sizeof(h2_6), 4902 &h2_6); 4903 break; 4904 #endif /* INET6 */ 4905 } 4906 } 4907 return (PF_PASS); 4908 break; 4909 } 4910 } 4911 } 4912 } 4913 4914 int 4915 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, 4916 struct mbuf *m, struct pf_pdesc *pd) 4917 { 4918 struct pf_state_peer *src, *dst; 4919 struct pf_state_key_cmp key; 4920 4921 key.af = pd->af; 4922 key.proto = pd->proto; 4923 if (direction == PF_IN) { 4924 PF_ACPY(&key.addr[0], pd->src, key.af); 4925 PF_ACPY(&key.addr[1], pd->dst, key.af); 4926 key.port[0] = key.port[1] = 0; 4927 } else { 4928 PF_ACPY(&key.addr[1], pd->src, key.af); 4929 PF_ACPY(&key.addr[0], pd->dst, key.af); 4930 key.port[1] = key.port[0] = 0; 4931 } 4932 4933 STATE_LOOKUP(kif, &key, direction, *state, m); 4934 4935 if (direction == (*state)->direction) { 4936 src = &(*state)->src; 4937 dst = &(*state)->dst; 4938 } else { 4939 src = &(*state)->dst; 4940 dst = &(*state)->src; 4941 } 4942 4943 /* update states */ 4944 if (src->state < PFOTHERS_SINGLE) 4945 src->state = PFOTHERS_SINGLE; 4946 if (dst->state == PFOTHERS_SINGLE) 4947 dst->state = PFOTHERS_MULTIPLE; 4948 4949 /* update expire time */ 4950 (*state)->expire = time_second; 4951 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) 4952 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4953 else 4954 (*state)->timeout = PFTM_OTHER_SINGLE; 4955 4956 /* translate source/destination address, if necessary */ 4957 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4958 struct pf_state_key *nk = (*state)->key[pd->didx]; 4959 4960 switch (pd->af) { 4961 #ifdef INET 4962 case AF_INET: 4963 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) 4964 pf_change_a(&pd->src->v4.s_addr, 4965 pd->ip_sum, 4966 nk->addr[pd->sidx].v4.s_addr, 4967 0); 4968 4969 4970 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) 4971 pf_change_a(&pd->dst->v4.s_addr, 4972 pd->ip_sum, 4973 nk->addr[pd->didx].v4.s_addr, 4974 0); 4975 4976 break; 4977 #endif /* INET */ 4978 #ifdef INET6 4979 case AF_INET6: 4980 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) 4981 PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); 4982 4983 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) 4984 PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); 4985 #endif /* INET6 */ 4986 } 4987 } 4988 return (PF_PASS); 4989 } 4990 4991 /* 4992 * ipoff and off are measured from the start of the mbuf chain. 4993 * h must be at "ipoff" on the mbuf chain. 4994 */ 4995 void * 4996 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 4997 u_short *actionp, u_short *reasonp, sa_family_t af) 4998 { 4999 switch (af) { 5000 #ifdef INET 5001 case AF_INET: { 5002 struct ip *h = mtod(m, struct ip *); 5003 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5004 5005 if (fragoff) { 5006 if (fragoff >= len) 5007 ACTION_SET(actionp, PF_PASS); 5008 else { 5009 ACTION_SET(actionp, PF_DROP); 5010 REASON_SET(reasonp, PFRES_FRAG); 5011 } 5012 return (NULL); 5013 } 5014 if (m->m_pkthdr.len < off + len || 5015 ntohs(h->ip_len) < off + len) { 5016 ACTION_SET(actionp, PF_DROP); 5017 REASON_SET(reasonp, PFRES_SHORT); 5018 return (NULL); 5019 } 5020 break; 5021 } 5022 #endif /* INET */ 5023 #ifdef INET6 5024 case AF_INET6: { 5025 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5026 5027 if (m->m_pkthdr.len < off + len || 5028 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < 5029 (unsigned)(off + len)) { 5030 ACTION_SET(actionp, PF_DROP); 5031 REASON_SET(reasonp, PFRES_SHORT); 5032 return (NULL); 5033 } 5034 break; 5035 } 5036 #endif /* INET6 */ 5037 } 5038 m_copydata(m, off, len, p); 5039 return (p); 5040 } 5041 5042 int 5043 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) 5044 { 5045 struct sockaddr_in *dst; 5046 int ret = 1; 5047 int check_mpath; 5048 extern int ipmultipath; 5049 #ifdef INET6 5050 extern int ip6_multipath; 5051 struct sockaddr_in6 *dst6; 5052 struct route_in6 ro; 5053 #else 5054 struct route ro; 5055 #endif 5056 struct radix_node *rn; 5057 struct rtentry *rt; 5058 struct ifnet *ifp; 5059 5060 check_mpath = 0; 5061 bzero(&ro, sizeof(ro)); 5062 switch (af) { 5063 case AF_INET: 5064 dst = satosin(&ro.ro_dst); 5065 dst->sin_family = AF_INET; 5066 dst->sin_len = sizeof(*dst); 5067 dst->sin_addr = addr->v4; 5068 if (ipmultipath) 5069 check_mpath = 1; 5070 break; 5071 #ifdef INET6 5072 case AF_INET6: 5073 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 5074 dst6->sin6_family = AF_INET6; 5075 dst6->sin6_len = sizeof(*dst6); 5076 dst6->sin6_addr = addr->v6; 5077 if (ip6_multipath) 5078 check_mpath = 1; 5079 break; 5080 #endif /* INET6 */ 5081 default: 5082 return (0); 5083 } 5084 5085 /* Skip checks for ipsec interfaces */ 5086 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5087 goto out; 5088 5089 rtalloc_noclone((struct route *)&ro, NO_CLONING); 5090 5091 if (ro.ro_rt != NULL) { 5092 /* No interface given, this is a no-route check */ 5093 if (kif == NULL) 5094 goto out; 5095 5096 if (kif->pfik_ifp == NULL) { 5097 ret = 0; 5098 goto out; 5099 } 5100 5101 /* Perform uRPF check if passed input interface */ 5102 ret = 0; 5103 rn = (struct radix_node *)ro.ro_rt; 5104 do { 5105 rt = (struct rtentry *)rn; 5106 if (rt->rt_ifp->if_type == IFT_CARP) 5107 ifp = rt->rt_ifp->if_carpdev; 5108 else 5109 ifp = rt->rt_ifp; 5110 5111 if (kif->pfik_ifp == ifp) 5112 ret = 1; 5113 rn = rn_mpath_next(rn); 5114 } while (check_mpath == 1 && rn != NULL && ret == 0); 5115 } else 5116 ret = 0; 5117 out: 5118 if (ro.ro_rt != NULL) 5119 RTFREE(ro.ro_rt); 5120 return (ret); 5121 } 5122 5123 int 5124 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) 5125 { 5126 struct sockaddr_in *dst; 5127 #ifdef INET6 5128 struct sockaddr_in6 *dst6; 5129 struct route_in6 ro; 5130 #else 5131 struct route ro; 5132 #endif 5133 int ret = 0; 5134 5135 bzero(&ro, sizeof(ro)); 5136 switch (af) { 5137 case AF_INET: 5138 dst = satosin(&ro.ro_dst); 5139 dst->sin_family = AF_INET; 5140 dst->sin_len = sizeof(*dst); 5141 dst->sin_addr = addr->v4; 5142 break; 5143 #ifdef INET6 5144 case AF_INET6: 5145 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 5146 dst6->sin6_family = AF_INET6; 5147 dst6->sin6_len = sizeof(*dst6); 5148 dst6->sin6_addr = addr->v6; 5149 break; 5150 #endif /* INET6 */ 5151 default: 5152 return (0); 5153 } 5154 5155 rtalloc_noclone((struct route *)&ro, NO_CLONING); 5156 5157 if (ro.ro_rt != NULL) { 5158 if (ro.ro_rt->rt_labelid == aw->v.rtlabel) 5159 ret = 1; 5160 RTFREE(ro.ro_rt); 5161 } 5162 5163 return (ret); 5164 } 5165 5166 #ifdef INET 5167 void 5168 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5169 struct pf_state *s, struct pf_pdesc *pd) 5170 { 5171 struct mbuf *m0, *m1; 5172 struct route iproute; 5173 struct route *ro = NULL; 5174 struct sockaddr_in *dst; 5175 struct ip *ip; 5176 struct ifnet *ifp = NULL; 5177 struct pf_addr naddr; 5178 struct pf_src_node *sn = NULL; 5179 int error = 0; 5180 #ifdef IPSEC 5181 struct m_tag *mtag; 5182 #endif /* IPSEC */ 5183 5184 if (m == NULL || *m == NULL || r == NULL || 5185 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5186 panic("pf_route: invalid parameters"); 5187 5188 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5189 m0 = *m; 5190 *m = NULL; 5191 goto bad; 5192 } 5193 5194 if (r->rt == PF_DUPTO) { 5195 if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) 5196 return; 5197 } else { 5198 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5199 return; 5200 m0 = *m; 5201 } 5202 5203 if (m0->m_len < sizeof(struct ip)) { 5204 DPFPRINTF(PF_DEBUG_URGENT, 5205 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 5206 goto bad; 5207 } 5208 5209 ip = mtod(m0, struct ip *); 5210 5211 ro = &iproute; 5212 bzero((caddr_t)ro, sizeof(*ro)); 5213 dst = satosin(&ro->ro_dst); 5214 dst->sin_family = AF_INET; 5215 dst->sin_len = sizeof(*dst); 5216 dst->sin_addr = ip->ip_dst; 5217 5218 if (r->rt == PF_FASTROUTE) { 5219 rtalloc(ro); 5220 if (ro->ro_rt == 0) { 5221 ipstat.ips_noroute++; 5222 goto bad; 5223 } 5224 5225 ifp = ro->ro_rt->rt_ifp; 5226 ro->ro_rt->rt_use++; 5227 5228 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 5229 dst = satosin(ro->ro_rt->rt_gateway); 5230 } else { 5231 if (TAILQ_EMPTY(&r->rpool.list)) { 5232 DPFPRINTF(PF_DEBUG_URGENT, 5233 ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); 5234 goto bad; 5235 } 5236 if (s == NULL) { 5237 pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, 5238 &naddr, NULL, &sn); 5239 if (!PF_AZERO(&naddr, AF_INET)) 5240 dst->sin_addr.s_addr = naddr.v4.s_addr; 5241 ifp = r->rpool.cur->kif ? 5242 r->rpool.cur->kif->pfik_ifp : NULL; 5243 } else { 5244 if (!PF_AZERO(&s->rt_addr, AF_INET)) 5245 dst->sin_addr.s_addr = 5246 s->rt_addr.v4.s_addr; 5247 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5248 } 5249 } 5250 if (ifp == NULL) 5251 goto bad; 5252 5253 if (oifp != ifp) { 5254 if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) 5255 goto bad; 5256 else if (m0 == NULL) 5257 goto done; 5258 if (m0->m_len < sizeof(struct ip)) { 5259 DPFPRINTF(PF_DEBUG_URGENT, 5260 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 5261 goto bad; 5262 } 5263 ip = mtod(m0, struct ip *); 5264 } 5265 5266 /* Copied from ip_output. */ 5267 #ifdef IPSEC 5268 /* 5269 * If deferred crypto processing is needed, check that the 5270 * interface supports it. 5271 */ 5272 if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) 5273 != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) { 5274 /* Notify IPsec to do its own crypto. */ 5275 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 5276 goto bad; 5277 } 5278 #endif /* IPSEC */ 5279 5280 /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ 5281 if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) { 5282 if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 5283 ifp->if_bridge != NULL) { 5284 in_delayed_cksum(m0); 5285 m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */ 5286 } 5287 } else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) { 5288 if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 5289 ifp->if_bridge != NULL) { 5290 in_delayed_cksum(m0); 5291 m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */ 5292 } 5293 } 5294 5295 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 5296 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 5297 ifp->if_bridge == NULL) { 5298 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 5299 ipstat.ips_outhwcsum++; 5300 } else { 5301 ip->ip_sum = 0; 5302 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 5303 } 5304 /* Update relevant hardware checksum stats for TCP/UDP */ 5305 if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) 5306 tcpstat.tcps_outhwcsum++; 5307 else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) 5308 udpstat.udps_outhwcsum++; 5309 error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); 5310 goto done; 5311 } 5312 5313 /* 5314 * Too large for interface; fragment if possible. 5315 * Must be able to put at least 8 bytes per fragment. 5316 */ 5317 if (ip->ip_off & htons(IP_DF)) { 5318 ipstat.ips_cantfrag++; 5319 if (r->rt != PF_DUPTO) { 5320 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, 5321 ifp->if_mtu); 5322 goto done; 5323 } else 5324 goto bad; 5325 } 5326 5327 m1 = m0; 5328 error = ip_fragment(m0, ifp, ifp->if_mtu); 5329 if (error) { 5330 m0 = NULL; 5331 goto bad; 5332 } 5333 5334 for (m0 = m1; m0; m0 = m1) { 5335 m1 = m0->m_nextpkt; 5336 m0->m_nextpkt = 0; 5337 if (error == 0) 5338 error = (*ifp->if_output)(ifp, m0, sintosa(dst), 5339 NULL); 5340 else 5341 m_freem(m0); 5342 } 5343 5344 if (error == 0) 5345 ipstat.ips_fragmented++; 5346 5347 done: 5348 if (r->rt != PF_DUPTO) 5349 *m = NULL; 5350 if (ro == &iproute && ro->ro_rt) 5351 RTFREE(ro->ro_rt); 5352 return; 5353 5354 bad: 5355 m_freem(m0); 5356 goto done; 5357 } 5358 #endif /* INET */ 5359 5360 #ifdef INET6 5361 void 5362 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5363 struct pf_state *s, struct pf_pdesc *pd) 5364 { 5365 struct mbuf *m0; 5366 struct route_in6 ip6route; 5367 struct route_in6 *ro; 5368 struct sockaddr_in6 *dst; 5369 struct ip6_hdr *ip6; 5370 struct ifnet *ifp = NULL; 5371 struct pf_addr naddr; 5372 struct pf_src_node *sn = NULL; 5373 int error = 0; 5374 5375 if (m == NULL || *m == NULL || r == NULL || 5376 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5377 panic("pf_route6: invalid parameters"); 5378 5379 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5380 m0 = *m; 5381 *m = NULL; 5382 goto bad; 5383 } 5384 5385 if (r->rt == PF_DUPTO) { 5386 if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) 5387 return; 5388 } else { 5389 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5390 return; 5391 m0 = *m; 5392 } 5393 5394 if (m0->m_len < sizeof(struct ip6_hdr)) { 5395 DPFPRINTF(PF_DEBUG_URGENT, 5396 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 5397 goto bad; 5398 } 5399 ip6 = mtod(m0, struct ip6_hdr *); 5400 5401 ro = &ip6route; 5402 bzero((caddr_t)ro, sizeof(*ro)); 5403 dst = (struct sockaddr_in6 *)&ro->ro_dst; 5404 dst->sin6_family = AF_INET6; 5405 dst->sin6_len = sizeof(*dst); 5406 dst->sin6_addr = ip6->ip6_dst; 5407 5408 /* Cheat. XXX why only in the v6 case??? */ 5409 if (r->rt == PF_FASTROUTE) { 5410 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 5411 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); 5412 return; 5413 } 5414 5415 if (TAILQ_EMPTY(&r->rpool.list)) { 5416 DPFPRINTF(PF_DEBUG_URGENT, 5417 ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); 5418 goto bad; 5419 } 5420 if (s == NULL) { 5421 pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 5422 &naddr, NULL, &sn); 5423 if (!PF_AZERO(&naddr, AF_INET6)) 5424 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5425 &naddr, AF_INET6); 5426 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; 5427 } else { 5428 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 5429 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5430 &s->rt_addr, AF_INET6); 5431 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5432 } 5433 if (ifp == NULL) 5434 goto bad; 5435 5436 if (oifp != ifp) { 5437 if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) 5438 goto bad; 5439 else if (m0 == NULL) 5440 goto done; 5441 if (m0->m_len < sizeof(struct ip6_hdr)) { 5442 DPFPRINTF(PF_DEBUG_URGENT, 5443 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 5444 goto bad; 5445 } 5446 ip6 = mtod(m0, struct ip6_hdr *); 5447 } 5448 5449 /* 5450 * If the packet is too large for the outgoing interface, 5451 * send back an icmp6 error. 5452 */ 5453 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 5454 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 5455 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 5456 error = nd6_output(ifp, ifp, m0, dst, NULL); 5457 } else { 5458 in6_ifstat_inc(ifp, ifs6_in_toobig); 5459 if (r->rt != PF_DUPTO) 5460 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); 5461 else 5462 goto bad; 5463 } 5464 5465 done: 5466 if (r->rt != PF_DUPTO) 5467 *m = NULL; 5468 return; 5469 5470 bad: 5471 m_freem(m0); 5472 goto done; 5473 } 5474 #endif /* INET6 */ 5475 5476 5477 /* 5478 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag 5479 * off is the offset where the protocol header starts 5480 * len is the total length of protocol header plus payload 5481 * returns 0 when the checksum is valid, otherwise returns 1. 5482 */ 5483 int 5484 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, 5485 sa_family_t af) 5486 { 5487 u_int16_t flag_ok, flag_bad; 5488 u_int16_t sum; 5489 5490 switch (p) { 5491 case IPPROTO_TCP: 5492 flag_ok = M_TCP_CSUM_IN_OK; 5493 flag_bad = M_TCP_CSUM_IN_BAD; 5494 break; 5495 case IPPROTO_UDP: 5496 flag_ok = M_UDP_CSUM_IN_OK; 5497 flag_bad = M_UDP_CSUM_IN_BAD; 5498 break; 5499 case IPPROTO_ICMP: 5500 #ifdef INET6 5501 case IPPROTO_ICMPV6: 5502 #endif /* INET6 */ 5503 flag_ok = flag_bad = 0; 5504 break; 5505 default: 5506 return (1); 5507 } 5508 if (m->m_pkthdr.csum_flags & flag_ok) 5509 return (0); 5510 if (m->m_pkthdr.csum_flags & flag_bad) 5511 return (1); 5512 if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) 5513 return (1); 5514 if (m->m_pkthdr.len < off + len) 5515 return (1); 5516 switch (af) { 5517 #ifdef INET 5518 case AF_INET: 5519 if (p == IPPROTO_ICMP) { 5520 if (m->m_len < off) 5521 return (1); 5522 m->m_data += off; 5523 m->m_len -= off; 5524 sum = in_cksum(m, len); 5525 m->m_data -= off; 5526 m->m_len += off; 5527 } else { 5528 if (m->m_len < sizeof(struct ip)) 5529 return (1); 5530 sum = in4_cksum(m, p, off, len); 5531 } 5532 break; 5533 #endif /* INET */ 5534 #ifdef INET6 5535 case AF_INET6: 5536 if (m->m_len < sizeof(struct ip6_hdr)) 5537 return (1); 5538 sum = in6_cksum(m, p, off, len); 5539 break; 5540 #endif /* INET6 */ 5541 default: 5542 return (1); 5543 } 5544 if (sum) { 5545 m->m_pkthdr.csum_flags |= flag_bad; 5546 switch (p) { 5547 case IPPROTO_TCP: 5548 tcpstat.tcps_rcvbadsum++; 5549 break; 5550 case IPPROTO_UDP: 5551 udpstat.udps_badsum++; 5552 break; 5553 case IPPROTO_ICMP: 5554 icmpstat.icps_checksum++; 5555 break; 5556 #ifdef INET6 5557 case IPPROTO_ICMPV6: 5558 icmp6stat.icp6s_checksum++; 5559 break; 5560 #endif /* INET6 */ 5561 } 5562 return (1); 5563 } 5564 m->m_pkthdr.csum_flags |= flag_ok; 5565 return (0); 5566 } 5567 5568 struct pf_divert * 5569 pf_find_divert(struct mbuf *m) 5570 { 5571 struct m_tag *mtag; 5572 5573 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 5574 return (NULL); 5575 5576 return ((struct pf_divert *)(mtag + 1)); 5577 } 5578 5579 struct pf_divert * 5580 pf_get_divert(struct mbuf *m) 5581 { 5582 struct m_tag *mtag; 5583 5584 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 5585 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 5586 M_NOWAIT); 5587 if (mtag == NULL) 5588 return (NULL); 5589 bzero(mtag + 1, sizeof(struct pf_divert)); 5590 m_tag_prepend(m, mtag); 5591 } 5592 5593 return ((struct pf_divert *)(mtag + 1)); 5594 } 5595 5596 #ifdef INET 5597 int 5598 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, 5599 struct ether_header *eh) 5600 { 5601 struct pfi_kif *kif; 5602 u_short action, reason = 0, log = 0; 5603 struct mbuf *m = *m0; 5604 struct ip *h; 5605 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 5606 struct pf_state *s = NULL; 5607 struct pf_ruleset *ruleset = NULL; 5608 struct pf_pdesc pd; 5609 int off, dirndx, pqid = 0; 5610 5611 if (!pf_status.running) 5612 return (PF_PASS); 5613 5614 memset(&pd, 0, sizeof(pd)); 5615 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 5616 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 5617 else 5618 kif = (struct pfi_kif *)ifp->if_pf_kif; 5619 5620 if (kif == NULL) { 5621 DPFPRINTF(PF_DEBUG_URGENT, 5622 ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); 5623 return (PF_DROP); 5624 } 5625 if (kif->pfik_flags & PFI_IFLAG_SKIP) 5626 return (PF_PASS); 5627 5628 #ifdef DIAGNOSTIC 5629 if ((m->m_flags & M_PKTHDR) == 0) 5630 panic("non-M_PKTHDR is passed to pf_test"); 5631 #endif /* DIAGNOSTIC */ 5632 5633 if (m->m_pkthdr.len < (int)sizeof(*h)) { 5634 action = PF_DROP; 5635 REASON_SET(&reason, PFRES_SHORT); 5636 log = 1; 5637 goto done; 5638 } 5639 5640 if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) 5641 return (PF_PASS); 5642 5643 /* We do IP header normalization and packet reassembly here */ 5644 if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { 5645 action = PF_DROP; 5646 goto done; 5647 } 5648 m = *m0; /* pf_normalize messes with m0 */ 5649 h = mtod(m, struct ip *); 5650 5651 off = h->ip_hl << 2; 5652 if (off < (int)sizeof(*h)) { 5653 action = PF_DROP; 5654 REASON_SET(&reason, PFRES_SHORT); 5655 log = 1; 5656 goto done; 5657 } 5658 5659 pd.src = (struct pf_addr *)&h->ip_src; 5660 pd.dst = (struct pf_addr *)&h->ip_dst; 5661 pd.sport = pd.dport = NULL; 5662 pd.ip_sum = &h->ip_sum; 5663 pd.proto_sum = NULL; 5664 pd.proto = h->ip_p; 5665 pd.dir = dir; 5666 pd.sidx = (dir == PF_IN) ? 0 : 1; 5667 pd.didx = (dir == PF_IN) ? 1 : 0; 5668 pd.af = AF_INET; 5669 pd.tos = h->ip_tos; 5670 pd.tot_len = ntohs(h->ip_len); 5671 pd.eh = eh; 5672 5673 /* handle fragments that didn't get reassembled by normalization */ 5674 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { 5675 action = pf_test_fragment(&r, dir, kif, m, h, 5676 &pd, &a, &ruleset); 5677 goto done; 5678 } 5679 5680 switch (h->ip_p) { 5681 5682 case IPPROTO_TCP: { 5683 struct tcphdr th; 5684 5685 pd.hdr.tcp = &th; 5686 if (!pf_pull_hdr(m, off, &th, sizeof(th), 5687 &action, &reason, AF_INET)) { 5688 log = action != PF_PASS; 5689 goto done; 5690 } 5691 pd.p_len = pd.tot_len - off - (th.th_off << 2); 5692 if ((th.th_flags & TH_ACK) && pd.p_len == 0) 5693 pqid = 1; 5694 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 5695 if (action == PF_DROP) 5696 goto done; 5697 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 5698 &reason); 5699 if (action == PF_PASS) { 5700 #if NPFSYNC 5701 pfsync_update_state(s); 5702 #endif /* NPFSYNC */ 5703 r = s->rule.ptr; 5704 a = s->anchor.ptr; 5705 log = s->log; 5706 } else if (s == NULL) 5707 action = pf_test_rule(&r, &s, dir, kif, 5708 m, off, h, &pd, &a, &ruleset, &ipintrq); 5709 break; 5710 } 5711 5712 case IPPROTO_UDP: { 5713 struct udphdr uh; 5714 5715 pd.hdr.udp = &uh; 5716 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 5717 &action, &reason, AF_INET)) { 5718 log = action != PF_PASS; 5719 goto done; 5720 } 5721 if (uh.uh_dport == 0 || 5722 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 5723 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 5724 action = PF_DROP; 5725 REASON_SET(&reason, PFRES_SHORT); 5726 goto done; 5727 } 5728 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 5729 if (action == PF_PASS) { 5730 #if NPFSYNC 5731 pfsync_update_state(s); 5732 #endif /* NPFSYNC */ 5733 r = s->rule.ptr; 5734 a = s->anchor.ptr; 5735 log = s->log; 5736 } else if (s == NULL) 5737 action = pf_test_rule(&r, &s, dir, kif, 5738 m, off, h, &pd, &a, &ruleset, &ipintrq); 5739 break; 5740 } 5741 5742 case IPPROTO_ICMP: { 5743 struct icmp ih; 5744 5745 pd.hdr.icmp = &ih; 5746 if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, 5747 &action, &reason, AF_INET)) { 5748 log = action != PF_PASS; 5749 goto done; 5750 } 5751 action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, 5752 &reason); 5753 if (action == PF_PASS) { 5754 #if NPFSYNC 5755 pfsync_update_state(s); 5756 #endif /* NPFSYNC */ 5757 r = s->rule.ptr; 5758 a = s->anchor.ptr; 5759 log = s->log; 5760 } else if (s == NULL) 5761 action = pf_test_rule(&r, &s, dir, kif, 5762 m, off, h, &pd, &a, &ruleset, &ipintrq); 5763 break; 5764 } 5765 5766 default: 5767 action = pf_test_state_other(&s, dir, kif, m, &pd); 5768 if (action == PF_PASS) { 5769 #if NPFSYNC 5770 pfsync_update_state(s); 5771 #endif /* NPFSYNC */ 5772 r = s->rule.ptr; 5773 a = s->anchor.ptr; 5774 log = s->log; 5775 } else if (s == NULL) 5776 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 5777 &pd, &a, &ruleset, &ipintrq); 5778 break; 5779 } 5780 5781 done: 5782 if (action == PF_PASS && h->ip_hl > 5 && 5783 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { 5784 action = PF_DROP; 5785 REASON_SET(&reason, PFRES_IPOPTIONS); 5786 log = 1; 5787 DPFPRINTF(PF_DEBUG_MISC, 5788 ("pf: dropping packet with ip options\n")); 5789 } 5790 5791 if ((s && s->tag) || r->rtableid) 5792 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 5793 5794 if (dir == PF_IN && s && s->key[PF_SK_STACK]) 5795 m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 5796 5797 #ifdef ALTQ 5798 if (action == PF_PASS && r->qid) { 5799 if (pqid || (pd.tos & IPTOS_LOWDELAY)) 5800 m->m_pkthdr.pf.qid = r->pqid; 5801 else 5802 m->m_pkthdr.pf.qid = r->qid; 5803 /* add hints for ecn */ 5804 m->m_pkthdr.pf.hdr = h; 5805 } 5806 #endif /* ALTQ */ 5807 5808 /* 5809 * connections redirected to loopback should not match sockets 5810 * bound specifically to loopback due to security implications, 5811 * see tcp_input() and in_pcblookup_listen(). 5812 */ 5813 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 5814 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 5815 (s->nat_rule.ptr->action == PF_RDR || 5816 s->nat_rule.ptr->action == PF_BINAT) && 5817 (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) 5818 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 5819 5820 if (dir == PF_IN && action == PF_PASS && r->divert.port) { 5821 struct pf_divert *divert; 5822 5823 if ((divert = pf_get_divert(m))) { 5824 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 5825 divert->port = r->divert.port; 5826 divert->addr.ipv4 = r->divert.addr.v4; 5827 } 5828 } 5829 5830 if (log) { 5831 struct pf_rule *lr; 5832 5833 if (s != NULL && s->nat_rule.ptr != NULL && 5834 s->nat_rule.ptr->log & PF_LOG_ALL) 5835 lr = s->nat_rule.ptr; 5836 else 5837 lr = r; 5838 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, 5839 &pd); 5840 } 5841 5842 kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 5843 kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; 5844 5845 if (action == PF_PASS || r->action == PF_DROP) { 5846 dirndx = (dir == PF_OUT); 5847 r->packets[dirndx]++; 5848 r->bytes[dirndx] += pd.tot_len; 5849 if (a != NULL) { 5850 a->packets[dirndx]++; 5851 a->bytes[dirndx] += pd.tot_len; 5852 } 5853 if (s != NULL) { 5854 if (s->nat_rule.ptr != NULL) { 5855 s->nat_rule.ptr->packets[dirndx]++; 5856 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 5857 } 5858 if (s->src_node != NULL) { 5859 s->src_node->packets[dirndx]++; 5860 s->src_node->bytes[dirndx] += pd.tot_len; 5861 } 5862 if (s->nat_src_node != NULL) { 5863 s->nat_src_node->packets[dirndx]++; 5864 s->nat_src_node->bytes[dirndx] += pd.tot_len; 5865 } 5866 dirndx = (dir == s->direction) ? 0 : 1; 5867 s->packets[dirndx]++; 5868 s->bytes[dirndx] += pd.tot_len; 5869 } 5870 tr = r; 5871 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 5872 if (nr != NULL && r == &pf_default_rule) 5873 tr = nr; 5874 if (tr->src.addr.type == PF_ADDR_TABLE) 5875 pfr_update_stats(tr->src.addr.p.tbl, 5876 (s == NULL) ? pd.src : 5877 &s->key[(s->direction == PF_IN)]-> 5878 addr[(s->direction == PF_OUT)], 5879 pd.af, pd.tot_len, dir == PF_OUT, 5880 r->action == PF_PASS, tr->src.neg); 5881 if (tr->dst.addr.type == PF_ADDR_TABLE) 5882 pfr_update_stats(tr->dst.addr.p.tbl, 5883 (s == NULL) ? pd.dst : 5884 &s->key[(s->direction == PF_IN)]-> 5885 addr[(s->direction == PF_IN)], 5886 pd.af, pd.tot_len, dir == PF_OUT, 5887 r->action == PF_PASS, tr->dst.neg); 5888 } 5889 5890 5891 if (action == PF_SYNPROXY_DROP) { 5892 m_freem(*m0); 5893 *m0 = NULL; 5894 action = PF_PASS; 5895 } else if (r->rt) 5896 /* pf_route can free the mbuf causing *m0 to become NULL */ 5897 pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); 5898 5899 return (action); 5900 } 5901 #endif /* INET */ 5902 5903 #ifdef INET6 5904 int 5905 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, 5906 struct ether_header *eh) 5907 { 5908 struct pfi_kif *kif; 5909 u_short action, reason = 0, log = 0; 5910 struct mbuf *m = *m0, *n = NULL; 5911 struct ip6_hdr *h; 5912 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 5913 struct pf_state *s = NULL; 5914 struct pf_ruleset *ruleset = NULL; 5915 struct pf_pdesc pd; 5916 int off, terminal = 0, dirndx, rh_cnt = 0; 5917 5918 if (!pf_status.running) 5919 return (PF_PASS); 5920 5921 memset(&pd, 0, sizeof(pd)); 5922 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 5923 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 5924 else 5925 kif = (struct pfi_kif *)ifp->if_pf_kif; 5926 5927 if (kif == NULL) { 5928 DPFPRINTF(PF_DEBUG_URGENT, 5929 ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); 5930 return (PF_DROP); 5931 } 5932 if (kif->pfik_flags & PFI_IFLAG_SKIP) 5933 return (PF_PASS); 5934 5935 #ifdef DIAGNOSTIC 5936 if ((m->m_flags & M_PKTHDR) == 0) 5937 panic("non-M_PKTHDR is passed to pf_test6"); 5938 #endif /* DIAGNOSTIC */ 5939 5940 if (m->m_pkthdr.len < (int)sizeof(*h)) { 5941 action = PF_DROP; 5942 REASON_SET(&reason, PFRES_SHORT); 5943 log = 1; 5944 goto done; 5945 } 5946 5947 if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) 5948 return (PF_PASS); 5949 5950 /* We do IP header normalization and packet reassembly here */ 5951 if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { 5952 action = PF_DROP; 5953 goto done; 5954 } 5955 m = *m0; /* pf_normalize messes with m0 */ 5956 h = mtod(m, struct ip6_hdr *); 5957 5958 #if 1 5959 /* 5960 * we do not support jumbogram yet. if we keep going, zero ip6_plen 5961 * will do something bad, so drop the packet for now. 5962 */ 5963 if (htons(h->ip6_plen) == 0) { 5964 action = PF_DROP; 5965 REASON_SET(&reason, PFRES_NORM); /*XXX*/ 5966 goto done; 5967 } 5968 #endif 5969 5970 pd.src = (struct pf_addr *)&h->ip6_src; 5971 pd.dst = (struct pf_addr *)&h->ip6_dst; 5972 pd.sport = pd.dport = NULL; 5973 pd.ip_sum = NULL; 5974 pd.proto_sum = NULL; 5975 pd.dir = dir; 5976 pd.sidx = (dir == PF_IN) ? 0 : 1; 5977 pd.didx = (dir == PF_IN) ? 1 : 0; 5978 pd.af = AF_INET6; 5979 pd.tos = 0; 5980 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5981 pd.eh = eh; 5982 5983 off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); 5984 pd.proto = h->ip6_nxt; 5985 do { 5986 switch (pd.proto) { 5987 case IPPROTO_FRAGMENT: 5988 action = pf_test_fragment(&r, dir, kif, m, h, 5989 &pd, &a, &ruleset); 5990 if (action == PF_DROP) 5991 REASON_SET(&reason, PFRES_FRAG); 5992 goto done; 5993 case IPPROTO_ROUTING: { 5994 struct ip6_rthdr rthdr; 5995 5996 if (rh_cnt++) { 5997 DPFPRINTF(PF_DEBUG_MISC, 5998 ("pf: IPv6 more than one rthdr\n")); 5999 action = PF_DROP; 6000 REASON_SET(&reason, PFRES_IPOPTIONS); 6001 log = 1; 6002 goto done; 6003 } 6004 if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, 6005 &reason, pd.af)) { 6006 DPFPRINTF(PF_DEBUG_MISC, 6007 ("pf: IPv6 short rthdr\n")); 6008 action = PF_DROP; 6009 REASON_SET(&reason, PFRES_SHORT); 6010 log = 1; 6011 goto done; 6012 } 6013 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6014 DPFPRINTF(PF_DEBUG_MISC, 6015 ("pf: IPv6 rthdr0\n")); 6016 action = PF_DROP; 6017 REASON_SET(&reason, PFRES_IPOPTIONS); 6018 log = 1; 6019 goto done; 6020 } 6021 /* FALLTHROUGH */ 6022 } 6023 case IPPROTO_AH: 6024 case IPPROTO_HOPOPTS: 6025 case IPPROTO_DSTOPTS: { 6026 /* get next header and header length */ 6027 struct ip6_ext opt6; 6028 6029 if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), 6030 NULL, &reason, pd.af)) { 6031 DPFPRINTF(PF_DEBUG_MISC, 6032 ("pf: IPv6 short opt\n")); 6033 action = PF_DROP; 6034 log = 1; 6035 goto done; 6036 } 6037 if (pd.proto == IPPROTO_AH) 6038 off += (opt6.ip6e_len + 2) * 4; 6039 else 6040 off += (opt6.ip6e_len + 1) * 8; 6041 pd.proto = opt6.ip6e_nxt; 6042 /* goto the next header */ 6043 break; 6044 } 6045 default: 6046 terminal++; 6047 break; 6048 } 6049 } while (!terminal); 6050 6051 /* if there's no routing header, use unmodified mbuf for checksumming */ 6052 if (!n) 6053 n = m; 6054 6055 switch (pd.proto) { 6056 6057 case IPPROTO_TCP: { 6058 struct tcphdr th; 6059 6060 pd.hdr.tcp = &th; 6061 if (!pf_pull_hdr(m, off, &th, sizeof(th), 6062 &action, &reason, AF_INET6)) { 6063 log = action != PF_PASS; 6064 goto done; 6065 } 6066 pd.p_len = pd.tot_len - off - (th.th_off << 2); 6067 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 6068 if (action == PF_DROP) 6069 goto done; 6070 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 6071 &reason); 6072 if (action == PF_PASS) { 6073 #if NPFSYNC 6074 pfsync_update_state(s); 6075 #endif /* NPFSYNC */ 6076 r = s->rule.ptr; 6077 a = s->anchor.ptr; 6078 log = s->log; 6079 } else if (s == NULL) 6080 action = pf_test_rule(&r, &s, dir, kif, 6081 m, off, h, &pd, &a, &ruleset, &ip6intrq); 6082 break; 6083 } 6084 6085 case IPPROTO_UDP: { 6086 struct udphdr uh; 6087 6088 pd.hdr.udp = &uh; 6089 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 6090 &action, &reason, AF_INET6)) { 6091 log = action != PF_PASS; 6092 goto done; 6093 } 6094 if (uh.uh_dport == 0 || 6095 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 6096 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 6097 action = PF_DROP; 6098 REASON_SET(&reason, PFRES_SHORT); 6099 goto done; 6100 } 6101 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 6102 if (action == PF_PASS) { 6103 #if NPFSYNC 6104 pfsync_update_state(s); 6105 #endif /* NPFSYNC */ 6106 r = s->rule.ptr; 6107 a = s->anchor.ptr; 6108 log = s->log; 6109 } else if (s == NULL) 6110 action = pf_test_rule(&r, &s, dir, kif, 6111 m, off, h, &pd, &a, &ruleset, &ip6intrq); 6112 break; 6113 } 6114 6115 case IPPROTO_ICMPV6: { 6116 struct icmp6_hdr ih; 6117 6118 pd.hdr.icmp6 = &ih; 6119 if (!pf_pull_hdr(m, off, &ih, sizeof(ih), 6120 &action, &reason, AF_INET6)) { 6121 log = action != PF_PASS; 6122 goto done; 6123 } 6124 action = pf_test_state_icmp(&s, dir, kif, 6125 m, off, h, &pd, &reason); 6126 if (action == PF_PASS) { 6127 #if NPFSYNC 6128 pfsync_update_state(s); 6129 #endif /* NPFSYNC */ 6130 r = s->rule.ptr; 6131 a = s->anchor.ptr; 6132 log = s->log; 6133 } else if (s == NULL) 6134 action = pf_test_rule(&r, &s, dir, kif, 6135 m, off, h, &pd, &a, &ruleset, &ip6intrq); 6136 break; 6137 } 6138 6139 default: 6140 action = pf_test_state_other(&s, dir, kif, m, &pd); 6141 if (action == PF_PASS) { 6142 #if NPFSYNC 6143 pfsync_update_state(s); 6144 #endif /* NPFSYNC */ 6145 r = s->rule.ptr; 6146 a = s->anchor.ptr; 6147 log = s->log; 6148 } else if (s == NULL) 6149 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 6150 &pd, &a, &ruleset, &ip6intrq); 6151 break; 6152 } 6153 6154 done: 6155 if (n != m) { 6156 m_freem(n); 6157 n = NULL; 6158 } 6159 6160 /* handle dangerous IPv6 extension headers. */ 6161 if (action == PF_PASS && rh_cnt && 6162 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { 6163 action = PF_DROP; 6164 REASON_SET(&reason, PFRES_IPOPTIONS); 6165 log = 1; 6166 DPFPRINTF(PF_DEBUG_MISC, 6167 ("pf: dropping packet with dangerous v6 headers\n")); 6168 } 6169 6170 if ((s && s->tag) || r->rtableid) 6171 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 6172 6173 if (dir == PF_IN && s && s->key[PF_SK_STACK]) 6174 m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 6175 6176 #ifdef ALTQ 6177 if (action == PF_PASS && r->qid) { 6178 if (pd.tos & IPTOS_LOWDELAY) 6179 m->m_pkthdr.pf.qid = r->pqid; 6180 else 6181 m->m_pkthdr.pf.qid = r->qid; 6182 /* add hints for ecn */ 6183 m->m_pkthdr.pf.hdr = h; 6184 } 6185 #endif /* ALTQ */ 6186 6187 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 6188 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 6189 (s->nat_rule.ptr->action == PF_RDR || 6190 s->nat_rule.ptr->action == PF_BINAT) && 6191 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) 6192 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 6193 6194 if (dir == PF_IN && action == PF_PASS && r->divert.port) { 6195 struct pf_divert *divert; 6196 6197 if ((divert = pf_get_divert(m))) { 6198 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 6199 divert->port = r->divert.port; 6200 divert->addr.ipv6 = r->divert.addr.v6; 6201 } 6202 } 6203 6204 if (log) { 6205 struct pf_rule *lr; 6206 6207 if (s != NULL && s->nat_rule.ptr != NULL && 6208 s->nat_rule.ptr->log & PF_LOG_ALL) 6209 lr = s->nat_rule.ptr; 6210 else 6211 lr = r; 6212 PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, 6213 &pd); 6214 } 6215 6216 kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 6217 kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; 6218 6219 if (action == PF_PASS || r->action == PF_DROP) { 6220 dirndx = (dir == PF_OUT); 6221 r->packets[dirndx]++; 6222 r->bytes[dirndx] += pd.tot_len; 6223 if (a != NULL) { 6224 a->packets[dirndx]++; 6225 a->bytes[dirndx] += pd.tot_len; 6226 } 6227 if (s != NULL) { 6228 if (s->nat_rule.ptr != NULL) { 6229 s->nat_rule.ptr->packets[dirndx]++; 6230 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 6231 } 6232 if (s->src_node != NULL) { 6233 s->src_node->packets[dirndx]++; 6234 s->src_node->bytes[dirndx] += pd.tot_len; 6235 } 6236 if (s->nat_src_node != NULL) { 6237 s->nat_src_node->packets[dirndx]++; 6238 s->nat_src_node->bytes[dirndx] += pd.tot_len; 6239 } 6240 dirndx = (dir == s->direction) ? 0 : 1; 6241 s->packets[dirndx]++; 6242 s->bytes[dirndx] += pd.tot_len; 6243 } 6244 tr = r; 6245 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 6246 if (nr != NULL && r == &pf_default_rule) 6247 tr = nr; 6248 if (tr->src.addr.type == PF_ADDR_TABLE) 6249 pfr_update_stats(tr->src.addr.p.tbl, 6250 (s == NULL) ? pd.src : 6251 &s->key[(s->direction == PF_IN)]->addr[0], 6252 pd.af, pd.tot_len, dir == PF_OUT, 6253 r->action == PF_PASS, tr->src.neg); 6254 if (tr->dst.addr.type == PF_ADDR_TABLE) 6255 pfr_update_stats(tr->dst.addr.p.tbl, 6256 (s == NULL) ? pd.dst : 6257 &s->key[(s->direction == PF_IN)]->addr[1], 6258 pd.af, pd.tot_len, dir == PF_OUT, 6259 r->action == PF_PASS, tr->dst.neg); 6260 } 6261 6262 6263 if (action == PF_SYNPROXY_DROP) { 6264 m_freem(*m0); 6265 *m0 = NULL; 6266 action = PF_PASS; 6267 } else if (r->rt) 6268 /* pf_route6 can free the mbuf causing *m0 to become NULL */ 6269 pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); 6270 6271 return (action); 6272 } 6273 #endif /* INET6 */ 6274 6275 int 6276 pf_check_congestion(struct ifqueue *ifq) 6277 { 6278 if (ifq->ifq_congestion) 6279 return (1); 6280 else 6281 return (0); 6282 } 6283