1 /* $OpenBSD: pf.c,v 1.979 2016/07/18 13:17:44 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip_var.h> 67 #include <netinet/tcp.h> 68 #include <netinet/tcp_seq.h> 69 #include <netinet/udp.h> 70 #include <netinet/ip_icmp.h> 71 #include <netinet/in_pcb.h> 72 #include <netinet/tcp_timer.h> 73 #include <netinet/tcp_var.h> 74 #include <netinet/tcp_fsm.h> 75 #include <netinet/udp_var.h> 76 #include <netinet/icmp_var.h> 77 #include <netinet/ip_divert.h> 78 79 #include <net/pfvar.h> 80 81 #if NPFLOG > 0 82 #include <net/if_pflog.h> 83 #endif /* NPFLOG > 0 */ 84 85 #if NPFLOW > 0 86 #include <net/if_pflow.h> 87 #endif /* NPFLOW > 0 */ 88 89 #if NPFSYNC > 0 90 #include <net/if_pfsync.h> 91 #endif /* NPFSYNC > 0 */ 92 93 #ifdef INET6 94 #include <netinet6/in6_var.h> 95 #include <netinet/ip6.h> 96 #include <netinet6/ip6_var.h> 97 #include <netinet/icmp6.h> 98 #include <netinet6/nd6.h> 99 #include <netinet6/ip6_divert.h> 100 #endif /* INET6 */ 101 102 #ifdef DDB 103 #include <machine/db_machdep.h> 104 #include <ddb/db_interface.h> 105 #endif 106 107 /* 108 * Global variables 109 */ 110 struct pf_state_tree pf_statetbl; 111 struct pf_queuehead pf_queues[2]; 112 struct pf_queuehead *pf_queues_active; 113 struct pf_queuehead *pf_queues_inactive; 114 115 struct pf_status pf_status; 116 117 SHA2_CTX pf_tcp_secret_ctx; 118 u_char pf_tcp_secret[16]; 119 int pf_tcp_secret_init; 120 int pf_tcp_iss_off; 121 122 struct pf_anchor_stackframe { 123 struct pf_ruleset *rs; 124 struct pf_rule *r; 125 struct pf_anchor_node *parent; 126 struct pf_anchor *child; 127 } pf_anchor_stack[64]; 128 129 /* 130 * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c. 131 * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c. 132 */ 133 union pf_headers { 134 struct tcphdr tcp; 135 struct udphdr udp; 136 struct icmp icmp; 137 #ifdef INET6 138 struct icmp6_hdr icmp6; 139 struct mld_hdr mld; 140 struct nd_neighbor_solicit nd_ns; 141 #endif /* INET6 */ 142 }; 143 144 145 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 146 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 147 struct pool pf_rule_item_pl, pf_sn_item_pl; 148 149 void pf_init_threshold(struct pf_threshold *, u_int32_t, 150 u_int32_t); 151 void pf_add_threshold(struct pf_threshold *); 152 int pf_check_threshold(struct pf_threshold *); 153 154 void pf_change_ap(struct pf_pdesc *, struct pf_addr *, 155 u_int16_t *, struct pf_addr *, u_int16_t); 156 int pf_modulate_sack(struct pf_pdesc *, 157 struct pf_state_peer *); 158 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 159 u_int16_t *, u_int16_t *); 160 void pf_change_icmp(struct pf_pdesc *, struct pf_addr *, 161 u_int16_t *, struct pf_addr *, struct pf_addr *, 162 u_int16_t); 163 int pf_change_icmp_af(struct mbuf *, int, 164 struct pf_pdesc *, struct pf_pdesc *, 165 struct pf_addr *, struct pf_addr *, sa_family_t, 166 sa_family_t); 167 int pf_translate_icmp_af(int, void *); 168 void pf_send_tcp(const struct pf_rule *, sa_family_t, 169 const struct pf_addr *, const struct pf_addr *, 170 u_int16_t, u_int16_t, u_int32_t, u_int32_t, 171 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, 172 u_int16_t, u_int); 173 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 174 sa_family_t, struct pf_rule *, u_int); 175 void pf_detach_state(struct pf_state *); 176 void pf_state_key_detach(struct pf_state *, int); 177 u_int32_t pf_tcp_iss(struct pf_pdesc *); 178 void pf_rule_to_actions(struct pf_rule *, 179 struct pf_rule_actions *); 180 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 181 struct pf_state **, struct pf_rule **, 182 struct pf_ruleset **, u_short *); 183 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 184 struct pf_rule *, struct pf_rule *, 185 struct pf_state_key **, struct pf_state_key **, 186 int *, struct pf_state **, int, 187 struct pf_rule_slist *, struct pf_rule_actions *, 188 struct pf_src_node *[]); 189 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 190 int, struct pf_addr *, int, struct pf_addr *, 191 int, int); 192 int pf_state_key_setup(struct pf_pdesc *, struct 193 pf_state_key **, struct pf_state_key **, int); 194 int pf_tcp_track_full(struct pf_pdesc *, 195 struct pf_state_peer *, struct pf_state_peer *, 196 struct pf_state **, u_short *, int *); 197 int pf_tcp_track_sloppy(struct pf_pdesc *, 198 struct pf_state_peer *, struct pf_state_peer *, 199 struct pf_state **, u_short *); 200 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 201 u_short *); 202 int pf_test_state(struct pf_pdesc *, struct pf_state **, 203 u_short *); 204 int pf_icmp_state_lookup(struct pf_pdesc *, 205 struct pf_state_key_cmp *, struct pf_state **, 206 u_int16_t, u_int16_t, int, int *, int, int); 207 int pf_test_state_icmp(struct pf_pdesc *, 208 struct pf_state **, u_short *); 209 u_int8_t pf_get_wscale(struct pf_pdesc *); 210 u_int16_t pf_get_mss(struct pf_pdesc *); 211 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 212 u_int16_t); 213 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 214 sa_family_t); 215 struct pf_divert *pf_get_divert(struct mbuf *); 216 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 217 int, int, u_short *); 218 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 219 u_short *); 220 void pf_print_state_parts(struct pf_state *, 221 struct pf_state_key *, struct pf_state_key *); 222 int pf_addr_wrap_neq(struct pf_addr_wrap *, 223 struct pf_addr_wrap *); 224 int pf_compare_state_keys(struct pf_state_key *, 225 struct pf_state_key *, struct pfi_kif *, u_int); 226 struct pf_state *pf_find_state(struct pfi_kif *, 227 struct pf_state_key_cmp *, u_int, struct mbuf *); 228 int pf_src_connlimit(struct pf_state **); 229 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 230 void pf_step_into_anchor(int *, struct pf_ruleset **, 231 struct pf_rule **, struct pf_rule **); 232 int pf_step_out_of_anchor(int *, struct pf_ruleset **, 233 struct pf_rule **, struct pf_rule **, 234 int *); 235 void pf_counters_inc(int, struct pf_pdesc *, 236 struct pf_state *, struct pf_rule *, 237 struct pf_rule *); 238 void pf_state_key_link(struct pf_state_key *, 239 struct pf_state_key *); 240 void pf_inpcb_unlink_state_key(struct inpcb *); 241 void pf_state_key_unlink_reverse(struct pf_state_key *); 242 243 #if NPFLOG > 0 244 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 245 struct pf_rule *, struct pf_ruleset *, 246 struct pf_rule_slist *); 247 #endif /* NPFLOG > 0 */ 248 249 extern struct pool pfr_ktable_pl; 250 extern struct pool pfr_kentry_pl; 251 252 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 253 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 254 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 255 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 256 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 257 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT } 258 }; 259 260 #define STATE_LOOKUP(i, k, d, s, m) \ 261 do { \ 262 s = pf_find_state(i, k, d, m); \ 263 if (s == NULL || (s)->timeout == PFTM_PURGE) \ 264 return (PF_DROP); \ 265 if (d == PF_OUT && \ 266 (((s)->rule.ptr->rt == PF_ROUTETO && \ 267 (s)->rule.ptr->direction == PF_OUT) || \ 268 ((s)->rule.ptr->rt == PF_REPLYTO && \ 269 (s)->rule.ptr->direction == PF_IN)) && \ 270 (s)->rt_kif != NULL && \ 271 (s)->rt_kif != i) \ 272 return (PF_PASS); \ 273 } while (0) 274 275 #define BOUND_IFACE(r, k) \ 276 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 277 278 #define STATE_INC_COUNTERS(s) \ 279 do { \ 280 struct pf_rule_item *mrm; \ 281 s->rule.ptr->states_cur++; \ 282 s->rule.ptr->states_tot++; \ 283 if (s->anchor.ptr != NULL) { \ 284 s->anchor.ptr->states_cur++; \ 285 s->anchor.ptr->states_tot++; \ 286 } \ 287 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 288 mrm->r->states_cur++; \ 289 } while (0) 290 291 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 292 static __inline int pf_state_compare_key(struct pf_state_key *, 293 struct pf_state_key *); 294 static __inline int pf_state_compare_id(struct pf_state *, 295 struct pf_state *); 296 297 struct pf_src_tree tree_src_tracking; 298 299 struct pf_state_tree_id tree_id; 300 struct pf_state_queue state_list; 301 302 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 303 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 304 RB_GENERATE(pf_state_tree_id, pf_state, 305 entry_id, pf_state_compare_id); 306 307 __inline int 308 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 309 { 310 switch (af) { 311 case AF_INET: 312 if (a->addr32[0] > b->addr32[0]) 313 return (1); 314 if (a->addr32[0] < b->addr32[0]) 315 return (-1); 316 break; 317 #ifdef INET6 318 case AF_INET6: 319 if (a->addr32[3] > b->addr32[3]) 320 return (1); 321 if (a->addr32[3] < b->addr32[3]) 322 return (-1); 323 if (a->addr32[2] > b->addr32[2]) 324 return (1); 325 if (a->addr32[2] < b->addr32[2]) 326 return (-1); 327 if (a->addr32[1] > b->addr32[1]) 328 return (1); 329 if (a->addr32[1] < b->addr32[1]) 330 return (-1); 331 if (a->addr32[0] > b->addr32[0]) 332 return (1); 333 if (a->addr32[0] < b->addr32[0]) 334 return (-1); 335 break; 336 #endif /* INET6 */ 337 } 338 return (0); 339 } 340 341 static __inline int 342 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 343 { 344 int diff; 345 346 if (a->rule.ptr > b->rule.ptr) 347 return (1); 348 if (a->rule.ptr < b->rule.ptr) 349 return (-1); 350 if ((diff = a->type - b->type) != 0) 351 return (diff); 352 if ((diff = a->af - b->af) != 0) 353 return (diff); 354 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 355 return (diff); 356 return (0); 357 } 358 359 void 360 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 361 { 362 switch (af) { 363 case AF_INET: 364 dst->addr32[0] = src->addr32[0]; 365 break; 366 #ifdef INET6 367 case AF_INET6: 368 dst->addr32[0] = src->addr32[0]; 369 dst->addr32[1] = src->addr32[1]; 370 dst->addr32[2] = src->addr32[2]; 371 dst->addr32[3] = src->addr32[3]; 372 break; 373 #endif /* INET6 */ 374 default: 375 unhandled_af(af); 376 } 377 } 378 379 void 380 pf_init_threshold(struct pf_threshold *threshold, 381 u_int32_t limit, u_int32_t seconds) 382 { 383 threshold->limit = limit * PF_THRESHOLD_MULT; 384 threshold->seconds = seconds; 385 threshold->count = 0; 386 threshold->last = time_uptime; 387 } 388 389 void 390 pf_add_threshold(struct pf_threshold *threshold) 391 { 392 u_int32_t t = time_uptime, diff = t - threshold->last; 393 394 if (diff >= threshold->seconds) 395 threshold->count = 0; 396 else 397 threshold->count -= threshold->count * diff / 398 threshold->seconds; 399 threshold->count += PF_THRESHOLD_MULT; 400 threshold->last = t; 401 } 402 403 int 404 pf_check_threshold(struct pf_threshold *threshold) 405 { 406 return (threshold->count > threshold->limit); 407 } 408 409 int 410 pf_src_connlimit(struct pf_state **state) 411 { 412 int bad = 0; 413 struct pf_src_node *sn; 414 415 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 416 return (0); 417 418 sn->conn++; 419 (*state)->src.tcp_est = 1; 420 pf_add_threshold(&sn->conn_rate); 421 422 if ((*state)->rule.ptr->max_src_conn && 423 (*state)->rule.ptr->max_src_conn < sn->conn) { 424 pf_status.lcounters[LCNT_SRCCONN]++; 425 bad++; 426 } 427 428 if ((*state)->rule.ptr->max_src_conn_rate.limit && 429 pf_check_threshold(&sn->conn_rate)) { 430 pf_status.lcounters[LCNT_SRCCONNRATE]++; 431 bad++; 432 } 433 434 if (!bad) 435 return (0); 436 437 if ((*state)->rule.ptr->overload_tbl) { 438 struct pfr_addr p; 439 u_int32_t killed = 0; 440 441 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 442 if (pf_status.debug >= LOG_NOTICE) { 443 log(LOG_NOTICE, 444 "pf: pf_src_connlimit: blocking address "); 445 pf_print_host(&sn->addr, 0, 446 (*state)->key[PF_SK_WIRE]->af); 447 } 448 449 bzero(&p, sizeof(p)); 450 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 451 switch ((*state)->key[PF_SK_WIRE]->af) { 452 case AF_INET: 453 p.pfra_net = 32; 454 p.pfra_ip4addr = sn->addr.v4; 455 break; 456 #ifdef INET6 457 case AF_INET6: 458 p.pfra_net = 128; 459 p.pfra_ip6addr = sn->addr.v6; 460 break; 461 #endif /* INET6 */ 462 } 463 464 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 465 &p, time_second); 466 467 /* kill existing states if that's required. */ 468 if ((*state)->rule.ptr->flush) { 469 struct pf_state_key *sk; 470 struct pf_state *st; 471 472 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 473 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 474 sk = st->key[PF_SK_WIRE]; 475 /* 476 * Kill states from this source. (Only those 477 * from the same rule if PF_FLUSH_GLOBAL is not 478 * set) 479 */ 480 if (sk->af == 481 (*state)->key[PF_SK_WIRE]->af && 482 (((*state)->direction == PF_OUT && 483 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 484 ((*state)->direction == PF_IN && 485 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 486 ((*state)->rule.ptr->flush & 487 PF_FLUSH_GLOBAL || 488 (*state)->rule.ptr == st->rule.ptr)) { 489 st->timeout = PFTM_PURGE; 490 st->src.state = st->dst.state = 491 TCPS_CLOSED; 492 killed++; 493 } 494 } 495 if (pf_status.debug >= LOG_NOTICE) 496 addlog(", %u states killed", killed); 497 } 498 if (pf_status.debug >= LOG_NOTICE) 499 addlog("\n"); 500 } 501 502 /* kill this state */ 503 (*state)->timeout = PFTM_PURGE; 504 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 505 return (1); 506 } 507 508 int 509 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 510 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 511 struct pf_addr *raddr) 512 { 513 struct pf_src_node k; 514 515 if (*sn == NULL) { 516 k.af = af; 517 k.type = type; 518 PF_ACPY(&k.addr, src, af); 519 k.rule.ptr = rule; 520 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 521 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 522 } 523 if (*sn == NULL) { 524 if (!rule->max_src_nodes || 525 rule->src_nodes < rule->max_src_nodes) 526 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 527 else 528 pf_status.lcounters[LCNT_SRCNODES]++; 529 if ((*sn) == NULL) 530 return (-1); 531 532 pf_init_threshold(&(*sn)->conn_rate, 533 rule->max_src_conn_rate.limit, 534 rule->max_src_conn_rate.seconds); 535 536 (*sn)->type = type; 537 (*sn)->af = af; 538 (*sn)->rule.ptr = rule; 539 PF_ACPY(&(*sn)->addr, src, af); 540 if (raddr) 541 PF_ACPY(&(*sn)->raddr, raddr, af); 542 if (RB_INSERT(pf_src_tree, 543 &tree_src_tracking, *sn) != NULL) { 544 if (pf_status.debug >= LOG_NOTICE) { 545 log(LOG_NOTICE, 546 "pf: src_tree insert failed: "); 547 pf_print_host(&(*sn)->addr, 0, af); 548 addlog("\n"); 549 } 550 pool_put(&pf_src_tree_pl, *sn); 551 return (-1); 552 } 553 (*sn)->creation = time_uptime; 554 (*sn)->rule.ptr->src_nodes++; 555 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 556 pf_status.src_nodes++; 557 } else { 558 if (rule->max_src_states && 559 (*sn)->states >= rule->max_src_states) { 560 pf_status.lcounters[LCNT_SRCSTATES]++; 561 return (-1); 562 } 563 } 564 return (0); 565 } 566 567 void 568 pf_remove_src_node(struct pf_src_node *sn) 569 { 570 if (sn->states > 0 || sn->expire > time_uptime) 571 return; 572 573 sn->rule.ptr->src_nodes--; 574 if (sn->rule.ptr->states_cur == 0 && 575 sn->rule.ptr->src_nodes == 0) 576 pf_rm_rule(NULL, sn->rule.ptr); 577 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 578 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 579 pf_status.src_nodes--; 580 pool_put(&pf_src_tree_pl, sn); 581 } 582 583 struct pf_src_node * 584 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 585 { 586 struct pf_sn_item *sni; 587 588 SLIST_FOREACH(sni, &s->src_nodes, next) 589 if (sni->sn->type == type) 590 return (sni->sn); 591 return (NULL); 592 } 593 594 void 595 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 596 { 597 struct pf_sn_item *sni, *snin, *snip = NULL; 598 599 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 600 snin = SLIST_NEXT(sni, next); 601 if (sni->sn == sn) { 602 if (snip) 603 SLIST_REMOVE_AFTER(snip, next); 604 else 605 SLIST_REMOVE_HEAD(&s->src_nodes, next); 606 pool_put(&pf_sn_item_pl, sni); 607 sni = NULL; 608 sn->states--; 609 } 610 if (sni != NULL) 611 snip = sni; 612 } 613 } 614 615 /* state table stuff */ 616 617 static __inline int 618 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 619 { 620 int diff; 621 622 if ((diff = a->proto - b->proto) != 0) 623 return (diff); 624 if ((diff = a->af - b->af) != 0) 625 return (diff); 626 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 627 return (diff); 628 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 629 return (diff); 630 if ((diff = a->port[0] - b->port[0]) != 0) 631 return (diff); 632 if ((diff = a->port[1] - b->port[1]) != 0) 633 return (diff); 634 if ((diff = a->rdomain - b->rdomain) != 0) 635 return (diff); 636 return (0); 637 } 638 639 static __inline int 640 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 641 { 642 if (a->id > b->id) 643 return (1); 644 if (a->id < b->id) 645 return (-1); 646 if (a->creatorid > b->creatorid) 647 return (1); 648 if (a->creatorid < b->creatorid) 649 return (-1); 650 651 return (0); 652 } 653 654 int 655 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 656 { 657 struct pf_state_item *si; 658 struct pf_state_key *cur; 659 struct pf_state *olds = NULL; 660 661 KASSERT(s->key[idx] == NULL); 662 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 663 /* key exists. check for same kif, if none, add to key */ 664 TAILQ_FOREACH(si, &cur->states, entry) 665 if (si->s->kif == s->kif && 666 ((si->s->key[PF_SK_WIRE]->af == sk->af && 667 si->s->direction == s->direction) || 668 (si->s->key[PF_SK_WIRE]->af != 669 si->s->key[PF_SK_STACK]->af && 670 sk->af == si->s->key[PF_SK_STACK]->af && 671 si->s->direction != s->direction))) { 672 int reuse = 0; 673 674 if (sk->proto == IPPROTO_TCP && 675 si->s->src.state >= TCPS_FIN_WAIT_2 && 676 si->s->dst.state >= TCPS_FIN_WAIT_2) 677 reuse = 1; 678 if (pf_status.debug >= LOG_NOTICE) { 679 log(LOG_NOTICE, 680 "pf: %s key attach %s on %s: ", 681 (idx == PF_SK_WIRE) ? 682 "wire" : "stack", 683 reuse ? "reuse" : "failed", 684 s->kif->pfik_name); 685 pf_print_state_parts(s, 686 (idx == PF_SK_WIRE) ? sk : NULL, 687 (idx == PF_SK_STACK) ? sk : NULL); 688 addlog(", existing: "); 689 pf_print_state_parts(si->s, 690 (idx == PF_SK_WIRE) ? sk : NULL, 691 (idx == PF_SK_STACK) ? sk : NULL); 692 addlog("\n"); 693 } 694 if (reuse) { 695 si->s->src.state = si->s->dst.state = 696 TCPS_CLOSED; 697 /* remove late or sks can go away */ 698 olds = si->s; 699 } else { 700 pool_put(&pf_state_key_pl, sk); 701 return (-1); /* collision! */ 702 } 703 } 704 pool_put(&pf_state_key_pl, sk); 705 s->key[idx] = cur; 706 } else 707 s->key[idx] = sk; 708 709 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 710 pf_state_key_detach(s, idx); 711 return (-1); 712 } 713 si->s = s; 714 715 /* list is sorted, if-bound states before floating */ 716 if (s->kif == pfi_all) 717 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 718 else 719 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 720 721 if (olds) 722 pf_remove_state(olds); 723 724 return (0); 725 } 726 727 void 728 pf_detach_state(struct pf_state *s) 729 { 730 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 731 s->key[PF_SK_WIRE] = NULL; 732 733 if (s->key[PF_SK_STACK] != NULL) 734 pf_state_key_detach(s, PF_SK_STACK); 735 736 if (s->key[PF_SK_WIRE] != NULL) 737 pf_state_key_detach(s, PF_SK_WIRE); 738 } 739 740 void 741 pf_state_key_detach(struct pf_state *s, int idx) 742 { 743 struct pf_state_item *si; 744 struct pf_state_key *sk; 745 746 if (s->key[idx] == NULL) 747 return; 748 749 si = TAILQ_FIRST(&s->key[idx]->states); 750 while (si && si->s != s) 751 si = TAILQ_NEXT(si, entry); 752 753 if (si) { 754 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 755 pool_put(&pf_state_item_pl, si); 756 } 757 758 sk = s->key[idx]; 759 s->key[idx] = NULL; 760 if (TAILQ_EMPTY(&sk->states)) { 761 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 762 sk->removed = 1; 763 pf_state_key_unlink_reverse(sk); 764 pf_inpcb_unlink_state_key(sk->inp); 765 pf_state_key_unref(sk); 766 } 767 } 768 769 struct pf_state_key * 770 pf_alloc_state_key(int pool_flags) 771 { 772 struct pf_state_key *sk; 773 774 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 775 return (NULL); 776 TAILQ_INIT(&sk->states); 777 778 return (sk); 779 } 780 781 static __inline int 782 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 783 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 784 { 785 struct pf_state_key_cmp *key = arg; 786 #ifdef INET6 787 struct nd_neighbor_solicit *nd; 788 struct pf_addr *target; 789 790 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 791 goto copy; 792 793 switch (pd->hdr.icmp6->icmp6_type) { 794 case ND_NEIGHBOR_SOLICIT: 795 if (multi) 796 return (-1); 797 nd = (void *)pd->hdr.icmp6; 798 target = (struct pf_addr *)&nd->nd_ns_target; 799 daddr = target; 800 break; 801 case ND_NEIGHBOR_ADVERT: 802 if (multi) 803 return (-1); 804 nd = (void *)pd->hdr.icmp6; 805 target = (struct pf_addr *)&nd->nd_ns_target; 806 saddr = target; 807 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 808 key->addr[didx].addr32[0] = 0; 809 key->addr[didx].addr32[1] = 0; 810 key->addr[didx].addr32[2] = 0; 811 key->addr[didx].addr32[3] = 0; 812 daddr = NULL; /* overwritten */ 813 } 814 break; 815 default: 816 if (multi) { 817 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 818 key->addr[sidx].addr32[1] = 0; 819 key->addr[sidx].addr32[2] = 0; 820 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 821 saddr = NULL; /* overwritten */ 822 } 823 } 824 copy: 825 #endif /* INET6 */ 826 if (saddr) 827 PF_ACPY(&key->addr[sidx], saddr, af); 828 if (daddr) 829 PF_ACPY(&key->addr[didx], daddr, af); 830 831 return (0); 832 } 833 834 int 835 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 836 struct pf_state_key **sks, int rtableid) 837 { 838 /* if returning error we MUST pool_put state keys ourselves */ 839 struct pf_state_key *sk1, *sk2; 840 u_int wrdom = pd->rdomain; 841 int afto = pd->af != pd->naf; 842 843 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 844 return (ENOMEM); 845 846 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 847 pd->af, 0); 848 sk1->port[pd->sidx] = pd->osport; 849 sk1->port[pd->didx] = pd->odport; 850 sk1->proto = pd->proto; 851 sk1->af = pd->af; 852 sk1->rdomain = pd->rdomain; 853 PF_REF_INIT(sk1->refcnt); 854 sk1->removed = 0; 855 if (rtableid >= 0) 856 wrdom = rtable_l2(rtableid); 857 858 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 859 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 860 pd->nsport != pd->osport || pd->ndport != pd->odport || 861 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 862 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 863 pool_put(&pf_state_key_pl, sk1); 864 return (ENOMEM); 865 } 866 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 867 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 868 pd->naf, 0); 869 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 870 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 871 if (afto) { 872 switch (pd->proto) { 873 case IPPROTO_ICMP: 874 sk2->proto = IPPROTO_ICMPV6; 875 break; 876 case IPPROTO_ICMPV6: 877 sk2->proto = IPPROTO_ICMP; 878 break; 879 default: 880 sk2->proto = pd->proto; 881 } 882 } else 883 sk2->proto = pd->proto; 884 sk2->af = pd->naf; 885 sk2->rdomain = wrdom; 886 PF_REF_INIT(sk2->refcnt); 887 sk2->removed = 0; 888 } else 889 sk2 = sk1; 890 891 if (pd->dir == PF_IN) { 892 *skw = sk1; 893 *sks = sk2; 894 } else { 895 *sks = sk1; 896 *skw = sk2; 897 } 898 899 if (pf_status.debug >= LOG_DEBUG) { 900 log(LOG_DEBUG, "pf: key setup: "); 901 pf_print_state_parts(NULL, *skw, *sks); 902 addlog("\n"); 903 } 904 905 return (0); 906 } 907 908 int 909 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, 910 struct pf_state_key **sks, struct pf_state *s) 911 { 912 splsoftassert(IPL_SOFTNET); 913 914 s->kif = kif; 915 if (*skw == *sks) { 916 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) 917 return (-1); 918 *skw = *sks = s->key[PF_SK_WIRE]; 919 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 920 } else { 921 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 922 pool_put(&pf_state_key_pl, *sks); 923 return (-1); 924 } 925 *skw = s->key[PF_SK_WIRE]; 926 if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { 927 pf_state_key_detach(s, PF_SK_WIRE); 928 return (-1); 929 } 930 *sks = s->key[PF_SK_STACK]; 931 } 932 933 if (s->id == 0 && s->creatorid == 0) { 934 s->id = htobe64(pf_status.stateid++); 935 s->creatorid = pf_status.hostid; 936 } 937 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 938 if (pf_status.debug >= LOG_NOTICE) { 939 log(LOG_NOTICE, "pf: state insert failed: " 940 "id: %016llx creatorid: %08x", 941 betoh64(s->id), ntohl(s->creatorid)); 942 addlog("\n"); 943 } 944 pf_detach_state(s); 945 return (-1); 946 } 947 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 948 pf_status.fcounters[FCNT_STATE_INSERT]++; 949 pf_status.states++; 950 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 951 #if NPFSYNC > 0 952 pfsync_insert_state(s); 953 #endif /* NPFSYNC > 0 */ 954 return (0); 955 } 956 957 struct pf_state * 958 pf_find_state_byid(struct pf_state_cmp *key) 959 { 960 pf_status.fcounters[FCNT_STATE_SEARCH]++; 961 962 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 963 } 964 965 int 966 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 967 struct pfi_kif *kif, u_int dir) 968 { 969 /* a (from hdr) and b (new) must be exact opposites of each other */ 970 if (a->af == b->af && a->proto == b->proto && 971 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 972 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 973 a->port[0] == b->port[1] && 974 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 975 return (0); 976 else { 977 /* mismatch. must not happen. */ 978 if (pf_status.debug >= LOG_ERR) { 979 log(LOG_ERR, 980 "pf: state key linking mismatch! dir=%s, " 981 "if=%s, stored af=%u, a0: ", 982 dir == PF_OUT ? "OUT" : "IN", 983 kif->pfik_name, a->af); 984 pf_print_host(&a->addr[0], a->port[0], a->af); 985 addlog(", a1: "); 986 pf_print_host(&a->addr[1], a->port[1], a->af); 987 addlog(", proto=%u", a->proto); 988 addlog(", found af=%u, a0: ", b->af); 989 pf_print_host(&b->addr[0], b->port[0], b->af); 990 addlog(", a1: "); 991 pf_print_host(&b->addr[1], b->port[1], b->af); 992 addlog(", proto=%u", b->proto); 993 addlog("\n"); 994 } 995 return (-1); 996 } 997 } 998 999 struct pf_state * 1000 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, 1001 struct mbuf *m) 1002 { 1003 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1004 struct pf_state_item *si; 1005 1006 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1007 if (pf_status.debug >= LOG_DEBUG) { 1008 log(LOG_DEBUG, "pf: key search, if=%s: ", kif->pfik_name); 1009 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1010 addlog("\n"); 1011 } 1012 1013 inp_sk = NULL; 1014 pkt_sk = NULL; 1015 sk = NULL; 1016 if (dir == PF_OUT) { 1017 /* first if block deals with outbound forwarded packet */ 1018 pkt_sk = m->m_pkthdr.pf.statekey; 1019 if (pf_state_key_isvalid(pkt_sk) && 1020 pf_state_key_isvalid(pkt_sk->reverse)) { 1021 sk = pkt_sk->reverse; 1022 } else { 1023 pf_pkt_unlink_state_key(m); 1024 pkt_sk = NULL; 1025 } 1026 1027 if (pkt_sk == NULL) { 1028 /* here we deal with local outbound packet */ 1029 if (m->m_pkthdr.pf.inp != NULL) { 1030 inp_sk = m->m_pkthdr.pf.inp->inp_pf_sk; 1031 if (pf_state_key_isvalid(inp_sk)) 1032 sk = inp_sk; 1033 else 1034 pf_inpcb_unlink_state_key( 1035 m->m_pkthdr.pf.inp); 1036 } 1037 } 1038 } 1039 1040 if (sk == NULL) { 1041 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1042 (struct pf_state_key *)key)) == NULL) 1043 return (NULL); 1044 if (dir == PF_OUT && pkt_sk && 1045 pf_compare_state_keys(pkt_sk, sk, kif, dir) == 0) 1046 pf_state_key_link(sk, pkt_sk); 1047 else if (dir == PF_OUT) 1048 pf_inp_link(m, m->m_pkthdr.pf.inp); 1049 } 1050 1051 /* remove firewall data from outbound packet */ 1052 if (dir == PF_OUT) 1053 pf_pkt_addr_changed(m); 1054 1055 /* list is sorted, if-bound states before floating ones */ 1056 TAILQ_FOREACH(si, &sk->states, entry) 1057 if ((si->s->kif == pfi_all || si->s->kif == kif) && 1058 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1059 && sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1060 si->s->key[PF_SK_STACK])) || 1061 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1062 && dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1063 sk == si->s->key[PF_SK_WIRE])))) 1064 return (si->s); 1065 1066 return (NULL); 1067 } 1068 1069 struct pf_state * 1070 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1071 { 1072 struct pf_state_key *sk; 1073 struct pf_state_item *si, *ret = NULL; 1074 1075 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1076 1077 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1078 1079 if (sk != NULL) { 1080 TAILQ_FOREACH(si, &sk->states, entry) 1081 if (dir == PF_INOUT || 1082 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1083 si->s->key[PF_SK_STACK]))) { 1084 if (more == NULL) 1085 return (si->s); 1086 1087 if (ret) 1088 (*more)++; 1089 else 1090 ret = si; 1091 } 1092 } 1093 return (ret ? ret->s : NULL); 1094 } 1095 1096 void 1097 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1098 { 1099 int32_t expire; 1100 1101 bzero(sp, sizeof(struct pfsync_state)); 1102 1103 /* copy from state key */ 1104 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1105 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1106 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1107 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1108 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1109 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1110 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1111 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1112 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1113 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1114 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1115 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1116 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1117 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1118 sp->proto = st->key[PF_SK_WIRE]->proto; 1119 sp->af = st->key[PF_SK_WIRE]->af; 1120 1121 /* copy from state */ 1122 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1123 memcpy(&sp->rt_addr, &st->rt_addr, sizeof(sp->rt_addr)); 1124 sp->creation = htonl(time_uptime - st->creation); 1125 expire = pf_state_expires(st); 1126 if (expire <= time_uptime) 1127 sp->expire = htonl(0); 1128 else 1129 sp->expire = htonl(expire - time_uptime); 1130 1131 sp->direction = st->direction; 1132 #if NPFLOG > 0 1133 sp->log = st->log; 1134 #endif /* NPFLOG > 0 */ 1135 sp->timeout = st->timeout; 1136 sp->state_flags = htons(st->state_flags); 1137 if (!SLIST_EMPTY(&st->src_nodes)) 1138 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1139 1140 sp->id = st->id; 1141 sp->creatorid = st->creatorid; 1142 pf_state_peer_hton(&st->src, &sp->src); 1143 pf_state_peer_hton(&st->dst, &sp->dst); 1144 1145 if (st->rule.ptr == NULL) 1146 sp->rule = htonl(-1); 1147 else 1148 sp->rule = htonl(st->rule.ptr->nr); 1149 if (st->anchor.ptr == NULL) 1150 sp->anchor = htonl(-1); 1151 else 1152 sp->anchor = htonl(st->anchor.ptr->nr); 1153 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1154 1155 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1156 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1157 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1158 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1159 1160 sp->max_mss = htons(st->max_mss); 1161 sp->min_ttl = st->min_ttl; 1162 sp->set_tos = st->set_tos; 1163 sp->set_prio[0] = st->set_prio[0]; 1164 sp->set_prio[1] = st->set_prio[1]; 1165 } 1166 1167 /* END state table stuff */ 1168 1169 void 1170 pf_purge_thread(void *v) 1171 { 1172 int nloops = 0, s; 1173 1174 for (;;) { 1175 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); 1176 1177 s = splsoftnet(); 1178 1179 /* process a fraction of the state table every second */ 1180 pf_purge_expired_states(1 + (pf_status.states 1181 / pf_default_rule.timeout[PFTM_INTERVAL])); 1182 1183 /* purge other expired types every PFTM_INTERVAL seconds */ 1184 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1185 pf_purge_expired_fragments(); 1186 pf_purge_expired_src_nodes(0); 1187 nloops = 0; 1188 } 1189 1190 splx(s); 1191 } 1192 } 1193 1194 int32_t 1195 pf_state_expires(const struct pf_state *state) 1196 { 1197 int32_t timeout; 1198 u_int32_t start; 1199 u_int32_t end; 1200 u_int32_t states; 1201 1202 /* handle all PFTM_* > PFTM_MAX here */ 1203 if (state->timeout == PFTM_PURGE) 1204 return (0); 1205 1206 KASSERT(state->timeout != PFTM_UNLINKED); 1207 KASSERT(state->timeout < PFTM_MAX); 1208 1209 timeout = state->rule.ptr->timeout[state->timeout]; 1210 if (!timeout) 1211 timeout = pf_default_rule.timeout[state->timeout]; 1212 1213 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1214 if (start) { 1215 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1216 states = state->rule.ptr->states_cur; 1217 } else { 1218 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1219 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1220 states = pf_status.states; 1221 } 1222 if (end && states > start && start < end) { 1223 if (states >= end) 1224 return (0); 1225 1226 timeout = timeout * (end - states) / (end - start); 1227 } 1228 1229 return (state->expire + timeout); 1230 } 1231 1232 void 1233 pf_purge_expired_src_nodes(int waslocked) 1234 { 1235 struct pf_src_node *cur, *next; 1236 int locked = waslocked; 1237 1238 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1239 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1240 1241 if (cur->states == 0 && cur->expire <= time_uptime) { 1242 if (! locked) { 1243 rw_enter_write(&pf_consistency_lock); 1244 next = RB_NEXT(pf_src_tree, 1245 &tree_src_tracking, cur); 1246 locked = 1; 1247 } 1248 pf_remove_src_node(cur); 1249 } 1250 } 1251 1252 if (locked && !waslocked) 1253 rw_exit_write(&pf_consistency_lock); 1254 } 1255 1256 void 1257 pf_src_tree_remove_state(struct pf_state *s) 1258 { 1259 u_int32_t timeout; 1260 struct pf_sn_item *sni; 1261 1262 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1263 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1264 if (s->src.tcp_est) 1265 --sni->sn->conn; 1266 if (--sni->sn->states == 0) { 1267 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1268 if (!timeout) 1269 timeout = 1270 pf_default_rule.timeout[PFTM_SRC_NODE]; 1271 sni->sn->expire = time_uptime + timeout; 1272 } 1273 pool_put(&pf_sn_item_pl, sni); 1274 } 1275 } 1276 1277 /* callers should be at splsoftnet */ 1278 void 1279 pf_remove_state(struct pf_state *cur) 1280 { 1281 splsoftassert(IPL_SOFTNET); 1282 1283 /* handle load balancing related tasks */ 1284 pf_postprocess_addr(cur); 1285 1286 if (cur->src.state == PF_TCPS_PROXY_DST) { 1287 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1288 &cur->key[PF_SK_WIRE]->addr[1], 1289 &cur->key[PF_SK_WIRE]->addr[0], 1290 cur->key[PF_SK_WIRE]->port[1], 1291 cur->key[PF_SK_WIRE]->port[0], 1292 cur->src.seqhi, cur->src.seqlo + 1, 1293 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1294 cur->key[PF_SK_WIRE]->rdomain); 1295 } 1296 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1297 #if NPFLOW > 0 1298 if (cur->state_flags & PFSTATE_PFLOW) 1299 export_pflow(cur); 1300 #endif /* NPFLOW > 0 */ 1301 #if NPFSYNC > 0 1302 pfsync_delete_state(cur); 1303 #endif /* NPFSYNC > 0 */ 1304 cur->timeout = PFTM_UNLINKED; 1305 pf_src_tree_remove_state(cur); 1306 pf_detach_state(cur); 1307 } 1308 1309 void 1310 pf_remove_divert_state(struct pf_state_key *sk) 1311 { 1312 struct pf_state_item *si; 1313 1314 TAILQ_FOREACH(si, &sk->states, entry) { 1315 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 1316 si->s->rule.ptr->divert.port) { 1317 pf_remove_state(si->s); 1318 break; 1319 } 1320 } 1321 } 1322 1323 /* callers should be at splsoftnet and hold the 1324 * write_lock on pf_consistency_lock */ 1325 void 1326 pf_free_state(struct pf_state *cur) 1327 { 1328 struct pf_rule_item *ri; 1329 1330 splsoftassert(IPL_SOFTNET); 1331 1332 #if NPFSYNC > 0 1333 if (pfsync_state_in_use(cur)) 1334 return; 1335 #endif /* NPFSYNC > 0 */ 1336 KASSERT(cur->timeout == PFTM_UNLINKED); 1337 if (--cur->rule.ptr->states_cur == 0 && 1338 cur->rule.ptr->src_nodes == 0) 1339 pf_rm_rule(NULL, cur->rule.ptr); 1340 if (cur->anchor.ptr != NULL) 1341 if (--cur->anchor.ptr->states_cur == 0) 1342 pf_rm_rule(NULL, cur->anchor.ptr); 1343 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1344 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1345 if (--ri->r->states_cur == 0 && 1346 ri->r->src_nodes == 0) 1347 pf_rm_rule(NULL, ri->r); 1348 pool_put(&pf_rule_item_pl, ri); 1349 } 1350 pf_normalize_tcp_cleanup(cur); 1351 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1352 TAILQ_REMOVE(&state_list, cur, entry_list); 1353 if (cur->tag) 1354 pf_tag_unref(cur->tag); 1355 pool_put(&pf_state_pl, cur); 1356 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1357 pf_status.states--; 1358 } 1359 1360 void 1361 pf_purge_expired_states(u_int32_t maxcheck) 1362 { 1363 static struct pf_state *cur = NULL; 1364 struct pf_state *next; 1365 int locked = 0; 1366 1367 while (maxcheck--) { 1368 /* wrap to start of list when we hit the end */ 1369 if (cur == NULL) { 1370 cur = TAILQ_FIRST(&state_list); 1371 if (cur == NULL) 1372 break; /* list empty */ 1373 } 1374 1375 /* get next state, as cur may get deleted */ 1376 next = TAILQ_NEXT(cur, entry_list); 1377 1378 if (cur->timeout == PFTM_UNLINKED) { 1379 /* free removed state */ 1380 if (! locked) { 1381 rw_enter_write(&pf_consistency_lock); 1382 locked = 1; 1383 } 1384 pf_free_state(cur); 1385 } else if (pf_state_expires(cur) <= time_uptime) { 1386 /* remove and free expired state */ 1387 pf_remove_state(cur); 1388 if (! locked) { 1389 rw_enter_write(&pf_consistency_lock); 1390 locked = 1; 1391 } 1392 pf_free_state(cur); 1393 } 1394 cur = next; 1395 } 1396 1397 if (locked) 1398 rw_exit_write(&pf_consistency_lock); 1399 } 1400 1401 int 1402 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1403 { 1404 if (aw->type != PF_ADDR_TABLE) 1405 return (0); 1406 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1407 return (1); 1408 return (0); 1409 } 1410 1411 void 1412 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1413 { 1414 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1415 return; 1416 pfr_detach_table(aw->p.tbl); 1417 aw->p.tbl = NULL; 1418 } 1419 1420 void 1421 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1422 { 1423 struct pfr_ktable *kt = aw->p.tbl; 1424 1425 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1426 return; 1427 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1428 kt = kt->pfrkt_root; 1429 aw->p.tbl = NULL; 1430 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1431 kt->pfrkt_cnt : -1; 1432 } 1433 1434 void 1435 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1436 { 1437 switch (af) { 1438 case AF_INET: { 1439 u_int32_t a = ntohl(addr->addr32[0]); 1440 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1441 (a>>8)&255, a&255); 1442 if (p) { 1443 p = ntohs(p); 1444 addlog(":%u", p); 1445 } 1446 break; 1447 } 1448 #ifdef INET6 1449 case AF_INET6: { 1450 u_int16_t b; 1451 u_int8_t i, curstart, curend, maxstart, maxend; 1452 curstart = curend = maxstart = maxend = 255; 1453 for (i = 0; i < 8; i++) { 1454 if (!addr->addr16[i]) { 1455 if (curstart == 255) 1456 curstart = i; 1457 curend = i; 1458 } else { 1459 if ((curend - curstart) > 1460 (maxend - maxstart)) { 1461 maxstart = curstart; 1462 maxend = curend; 1463 } 1464 curstart = curend = 255; 1465 } 1466 } 1467 if ((curend - curstart) > 1468 (maxend - maxstart)) { 1469 maxstart = curstart; 1470 maxend = curend; 1471 } 1472 for (i = 0; i < 8; i++) { 1473 if (i >= maxstart && i <= maxend) { 1474 if (i == 0) 1475 addlog(":"); 1476 if (i == maxend) 1477 addlog(":"); 1478 } else { 1479 b = ntohs(addr->addr16[i]); 1480 addlog("%x", b); 1481 if (i < 7) 1482 addlog(":"); 1483 } 1484 } 1485 if (p) { 1486 p = ntohs(p); 1487 addlog("[%u]", p); 1488 } 1489 break; 1490 } 1491 #endif /* INET6 */ 1492 } 1493 } 1494 1495 void 1496 pf_print_state(struct pf_state *s) 1497 { 1498 pf_print_state_parts(s, NULL, NULL); 1499 } 1500 1501 void 1502 pf_print_state_parts(struct pf_state *s, 1503 struct pf_state_key *skwp, struct pf_state_key *sksp) 1504 { 1505 struct pf_state_key *skw, *sks; 1506 u_int8_t proto, dir; 1507 1508 /* Do our best to fill these, but they're skipped if NULL */ 1509 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1510 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1511 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1512 dir = s ? s->direction : 0; 1513 1514 switch (proto) { 1515 case IPPROTO_IPV4: 1516 addlog("IPv4"); 1517 break; 1518 case IPPROTO_IPV6: 1519 addlog("IPv6"); 1520 break; 1521 case IPPROTO_TCP: 1522 addlog("TCP"); 1523 break; 1524 case IPPROTO_UDP: 1525 addlog("UDP"); 1526 break; 1527 case IPPROTO_ICMP: 1528 addlog("ICMP"); 1529 break; 1530 case IPPROTO_ICMPV6: 1531 addlog("ICMPv6"); 1532 break; 1533 default: 1534 addlog("%u", proto); 1535 break; 1536 } 1537 switch (dir) { 1538 case PF_IN: 1539 addlog(" in"); 1540 break; 1541 case PF_OUT: 1542 addlog(" out"); 1543 break; 1544 } 1545 if (skw) { 1546 addlog(" wire: (%d) ", skw->rdomain); 1547 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1548 addlog(" "); 1549 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1550 } 1551 if (sks) { 1552 addlog(" stack: (%d) ", sks->rdomain); 1553 if (sks != skw) { 1554 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1555 addlog(" "); 1556 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1557 } else 1558 addlog("-"); 1559 } 1560 if (s) { 1561 if (proto == IPPROTO_TCP) { 1562 addlog(" [lo=%u high=%u win=%u modulator=%u", 1563 s->src.seqlo, s->src.seqhi, 1564 s->src.max_win, s->src.seqdiff); 1565 if (s->src.wscale && s->dst.wscale) 1566 addlog(" wscale=%u", 1567 s->src.wscale & PF_WSCALE_MASK); 1568 addlog("]"); 1569 addlog(" [lo=%u high=%u win=%u modulator=%u", 1570 s->dst.seqlo, s->dst.seqhi, 1571 s->dst.max_win, s->dst.seqdiff); 1572 if (s->src.wscale && s->dst.wscale) 1573 addlog(" wscale=%u", 1574 s->dst.wscale & PF_WSCALE_MASK); 1575 addlog("]"); 1576 } 1577 addlog(" %u:%u", s->src.state, s->dst.state); 1578 if (s->rule.ptr) 1579 addlog(" @%d", s->rule.ptr->nr); 1580 } 1581 } 1582 1583 void 1584 pf_print_flags(u_int8_t f) 1585 { 1586 if (f) 1587 addlog(" "); 1588 if (f & TH_FIN) 1589 addlog("F"); 1590 if (f & TH_SYN) 1591 addlog("S"); 1592 if (f & TH_RST) 1593 addlog("R"); 1594 if (f & TH_PUSH) 1595 addlog("P"); 1596 if (f & TH_ACK) 1597 addlog("A"); 1598 if (f & TH_URG) 1599 addlog("U"); 1600 if (f & TH_ECE) 1601 addlog("E"); 1602 if (f & TH_CWR) 1603 addlog("W"); 1604 } 1605 1606 #define PF_SET_SKIP_STEPS(i) \ 1607 do { \ 1608 while (head[i] != cur) { \ 1609 head[i]->skip[i].ptr = cur; \ 1610 head[i] = TAILQ_NEXT(head[i], entries); \ 1611 } \ 1612 } while (0) 1613 1614 void 1615 pf_calc_skip_steps(struct pf_rulequeue *rules) 1616 { 1617 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1618 int i; 1619 1620 cur = TAILQ_FIRST(rules); 1621 prev = cur; 1622 for (i = 0; i < PF_SKIP_COUNT; ++i) 1623 head[i] = cur; 1624 while (cur != NULL) { 1625 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1626 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1627 if (cur->direction != prev->direction) 1628 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1629 if (cur->onrdomain != prev->onrdomain || 1630 cur->ifnot != prev->ifnot) 1631 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1632 if (cur->af != prev->af) 1633 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1634 if (cur->proto != prev->proto) 1635 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1636 if (cur->src.neg != prev->src.neg || 1637 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1638 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1639 if (cur->dst.neg != prev->dst.neg || 1640 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1641 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1642 if (cur->src.port[0] != prev->src.port[0] || 1643 cur->src.port[1] != prev->src.port[1] || 1644 cur->src.port_op != prev->src.port_op) 1645 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1646 if (cur->dst.port[0] != prev->dst.port[0] || 1647 cur->dst.port[1] != prev->dst.port[1] || 1648 cur->dst.port_op != prev->dst.port_op) 1649 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1650 1651 prev = cur; 1652 cur = TAILQ_NEXT(cur, entries); 1653 } 1654 for (i = 0; i < PF_SKIP_COUNT; ++i) 1655 PF_SET_SKIP_STEPS(i); 1656 } 1657 1658 int 1659 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1660 { 1661 if (aw1->type != aw2->type) 1662 return (1); 1663 switch (aw1->type) { 1664 case PF_ADDR_ADDRMASK: 1665 case PF_ADDR_RANGE: 1666 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1667 return (1); 1668 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1669 return (1); 1670 return (0); 1671 case PF_ADDR_DYNIFTL: 1672 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1673 case PF_ADDR_NONE: 1674 case PF_ADDR_NOROUTE: 1675 case PF_ADDR_URPFFAILED: 1676 return (0); 1677 case PF_ADDR_TABLE: 1678 return (aw1->p.tbl != aw2->p.tbl); 1679 case PF_ADDR_RTLABEL: 1680 return (aw1->v.rtlabel != aw2->v.rtlabel); 1681 default: 1682 addlog("invalid address type: %d\n", aw1->type); 1683 return (1); 1684 } 1685 } 1686 1687 void 1688 pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p, 1689 struct pf_addr *an, u_int16_t pn) 1690 { 1691 if (pd->csum_status == PF_CSUM_UNKNOWN) 1692 pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off, 1693 pd->proto, pd->af); 1694 if (pd->af == pd->naf) 1695 PF_ACPY(a, an, pd->naf); 1696 if (p != NULL) 1697 *p = pn; 1698 } 1699 1700 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ 1701 void 1702 pf_change_a(struct pf_pdesc *pd, void *a, u_int32_t an) 1703 { 1704 if (pd->csum_status == PF_CSUM_UNKNOWN) 1705 pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off, 1706 pd->proto, pd->af); 1707 memcpy(a, &an, sizeof(u_int32_t)); 1708 } 1709 1710 int 1711 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 1712 u_int16_t *virtual_id, u_int16_t *virtual_type) 1713 { 1714 /* 1715 * ICMP types marked with PF_OUT are typically responses to 1716 * PF_IN, and will match states in the opposite direction. 1717 * PF_IN ICMP types need to match a state with that type. 1718 */ 1719 *icmp_dir = PF_OUT; 1720 1721 /* Queries (and responses) */ 1722 switch (pd->af) { 1723 case AF_INET: 1724 switch (type) { 1725 case ICMP_ECHO: 1726 *icmp_dir = PF_IN; 1727 /* FALLTHROUGH */ 1728 case ICMP_ECHOREPLY: 1729 *virtual_type = ICMP_ECHO; 1730 *virtual_id = pd->hdr.icmp->icmp_id; 1731 break; 1732 1733 case ICMP_TSTAMP: 1734 *icmp_dir = PF_IN; 1735 /* FALLTHROUGH */ 1736 case ICMP_TSTAMPREPLY: 1737 *virtual_type = ICMP_TSTAMP; 1738 *virtual_id = pd->hdr.icmp->icmp_id; 1739 break; 1740 1741 case ICMP_IREQ: 1742 *icmp_dir = PF_IN; 1743 /* FALLTHROUGH */ 1744 case ICMP_IREQREPLY: 1745 *virtual_type = ICMP_IREQ; 1746 *virtual_id = pd->hdr.icmp->icmp_id; 1747 break; 1748 1749 case ICMP_MASKREQ: 1750 *icmp_dir = PF_IN; 1751 /* FALLTHROUGH */ 1752 case ICMP_MASKREPLY: 1753 *virtual_type = ICMP_MASKREQ; 1754 *virtual_id = pd->hdr.icmp->icmp_id; 1755 break; 1756 1757 case ICMP_IPV6_WHEREAREYOU: 1758 *icmp_dir = PF_IN; 1759 /* FALLTHROUGH */ 1760 case ICMP_IPV6_IAMHERE: 1761 *virtual_type = ICMP_IPV6_WHEREAREYOU; 1762 *virtual_id = 0; /* Nothing sane to match on! */ 1763 break; 1764 1765 case ICMP_MOBILE_REGREQUEST: 1766 *icmp_dir = PF_IN; 1767 /* FALLTHROUGH */ 1768 case ICMP_MOBILE_REGREPLY: 1769 *virtual_type = ICMP_MOBILE_REGREQUEST; 1770 *virtual_id = 0; /* Nothing sane to match on! */ 1771 break; 1772 1773 case ICMP_ROUTERSOLICIT: 1774 *icmp_dir = PF_IN; 1775 /* FALLTHROUGH */ 1776 case ICMP_ROUTERADVERT: 1777 *virtual_type = ICMP_ROUTERSOLICIT; 1778 *virtual_id = 0; /* Nothing sane to match on! */ 1779 break; 1780 1781 /* These ICMP types map to other connections */ 1782 case ICMP_UNREACH: 1783 case ICMP_SOURCEQUENCH: 1784 case ICMP_REDIRECT: 1785 case ICMP_TIMXCEED: 1786 case ICMP_PARAMPROB: 1787 /* These will not be used, but set them anyway */ 1788 *icmp_dir = PF_IN; 1789 *virtual_type = htons(type); 1790 *virtual_id = 0; 1791 return (1); /* These types match to another state */ 1792 1793 /* 1794 * All remaining ICMP types get their own states, 1795 * and will only match in one direction. 1796 */ 1797 default: 1798 *icmp_dir = PF_IN; 1799 *virtual_type = type; 1800 *virtual_id = 0; 1801 break; 1802 } 1803 break; 1804 #ifdef INET6 1805 case AF_INET6: 1806 switch (type) { 1807 case ICMP6_ECHO_REQUEST: 1808 *icmp_dir = PF_IN; 1809 /* FALLTHROUGH */ 1810 case ICMP6_ECHO_REPLY: 1811 *virtual_type = ICMP6_ECHO_REQUEST; 1812 *virtual_id = pd->hdr.icmp6->icmp6_id; 1813 break; 1814 1815 case MLD_LISTENER_QUERY: 1816 *icmp_dir = PF_IN; 1817 /* FALLTHROUGH */ 1818 case MLD_LISTENER_REPORT: { 1819 struct mld_hdr *mld = (void *)pd->hdr.icmp6; 1820 u_int32_t h; 1821 1822 *virtual_type = MLD_LISTENER_QUERY; 1823 /* generate fake id for these messages */ 1824 h = mld->mld_addr.s6_addr32[0] ^ 1825 mld->mld_addr.s6_addr32[1] ^ 1826 mld->mld_addr.s6_addr32[2] ^ 1827 mld->mld_addr.s6_addr32[3]; 1828 *virtual_id = (h >> 16) ^ (h & 0xffff); 1829 break; 1830 } 1831 1832 /* 1833 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 1834 * ICMP6_WRU 1835 */ 1836 case ICMP6_WRUREQUEST: 1837 *icmp_dir = PF_IN; 1838 /* FALLTHROUGH */ 1839 case ICMP6_WRUREPLY: 1840 *virtual_type = ICMP6_WRUREQUEST; 1841 *virtual_id = 0; /* Nothing sane to match on! */ 1842 break; 1843 1844 case MLD_MTRACE: 1845 *icmp_dir = PF_IN; 1846 /* FALLTHROUGH */ 1847 case MLD_MTRACE_RESP: 1848 *virtual_type = MLD_MTRACE; 1849 *virtual_id = 0; /* Nothing sane to match on! */ 1850 break; 1851 1852 case ND_NEIGHBOR_SOLICIT: 1853 *icmp_dir = PF_IN; 1854 /* FALLTHROUGH */ 1855 case ND_NEIGHBOR_ADVERT: { 1856 struct nd_neighbor_solicit *nd = (void *)pd->hdr.icmp6; 1857 u_int32_t h; 1858 1859 *virtual_type = ND_NEIGHBOR_SOLICIT; 1860 /* generate fake id for these messages */ 1861 h = nd->nd_ns_target.s6_addr32[0] ^ 1862 nd->nd_ns_target.s6_addr32[1] ^ 1863 nd->nd_ns_target.s6_addr32[2] ^ 1864 nd->nd_ns_target.s6_addr32[3]; 1865 *virtual_id = (h >> 16) ^ (h & 0xffff); 1866 break; 1867 } 1868 1869 /* 1870 * These ICMP types map to other connections. 1871 * ND_REDIRECT can't be in this list because the triggering 1872 * packet header is optional. 1873 */ 1874 case ICMP6_DST_UNREACH: 1875 case ICMP6_PACKET_TOO_BIG: 1876 case ICMP6_TIME_EXCEEDED: 1877 case ICMP6_PARAM_PROB: 1878 /* These will not be used, but set them anyway */ 1879 *icmp_dir = PF_IN; 1880 *virtual_type = htons(type); 1881 *virtual_id = 0; 1882 return (1); /* These types match to another state */ 1883 /* 1884 * All remaining ICMP6 types get their own states, 1885 * and will only match in one direction. 1886 */ 1887 default: 1888 *icmp_dir = PF_IN; 1889 *virtual_type = type; 1890 *virtual_id = 0; 1891 break; 1892 } 1893 break; 1894 #endif /* INET6 */ 1895 } 1896 *virtual_type = htons(*virtual_type); 1897 return (0); /* These types match to their own state */ 1898 } 1899 1900 void 1901 pf_change_icmp(struct pf_pdesc *pd, struct pf_addr *ia, u_int16_t *ip, 1902 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 1903 { 1904 if (pd->csum_status == PF_CSUM_UNKNOWN) 1905 pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off, 1906 pd->proto, pd->af); 1907 1908 /* Change inner protocol port */ 1909 if (ip != NULL) 1910 *ip = np; 1911 1912 /* Change inner ip address */ 1913 PF_ACPY(ia, na, pd->af); 1914 1915 /* Outer ip address, fix outer icmpv6 checksum, if necessary. */ 1916 if (oa) 1917 PF_ACPY(oa, na, pd->af); 1918 } 1919 1920 #if INET6 1921 int 1922 pf_translate_af(struct pf_pdesc *pd) 1923 { 1924 struct mbuf *mp; 1925 struct ip *ip4; 1926 struct ip6_hdr *ip6; 1927 struct icmp6_hdr *icmp; 1928 int hlen; 1929 1930 if (pd->csum_status == PF_CSUM_UNKNOWN) 1931 pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off, 1932 pd->proto, pd->af); 1933 1934 hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 1935 1936 /* trim the old header */ 1937 m_adj(pd->m, pd->off); 1938 1939 /* prepend a new one */ 1940 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 1941 pd->m = NULL; 1942 return (-1); 1943 } 1944 1945 switch (pd->naf) { 1946 case AF_INET: 1947 ip4 = mtod(pd->m, struct ip *); 1948 bzero(ip4, hlen); 1949 ip4->ip_v = IPVERSION; 1950 ip4->ip_hl = hlen >> 2; 1951 ip4->ip_tos = pd->tos; 1952 ip4->ip_len = htons(hlen + (pd->tot_len - pd->off)); 1953 ip4->ip_id = htons(ip_randomid()); 1954 ip4->ip_off = htons(IP_DF); 1955 ip4->ip_ttl = pd->ttl; 1956 ip4->ip_p = pd->proto; 1957 ip4->ip_src = pd->nsaddr.v4; 1958 ip4->ip_dst = pd->ndaddr.v4; 1959 break; 1960 case AF_INET6: 1961 ip6 = mtod(pd->m, struct ip6_hdr *); 1962 bzero(ip6, hlen); 1963 ip6->ip6_vfc = IPV6_VERSION; 1964 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 1965 ip6->ip6_plen = htons(pd->tot_len - pd->off); 1966 ip6->ip6_nxt = pd->proto; 1967 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 1968 ip6->ip6_hlim = IPV6_DEFHLIM; 1969 else 1970 ip6->ip6_hlim = pd->ttl; 1971 ip6->ip6_src = pd->nsaddr.v6; 1972 ip6->ip6_dst = pd->ndaddr.v6; 1973 break; 1974 default: 1975 return (-1); 1976 } 1977 1978 /* recalculate icmp/icmp6 checksums */ 1979 if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) { 1980 int off; 1981 if ((mp = m_pulldown(pd->m, hlen, sizeof(*icmp), &off)) == 1982 NULL) { 1983 pd->m = NULL; 1984 return (-1); 1985 } 1986 icmp = (struct icmp6_hdr *)(mp->m_data + off); 1987 icmp->icmp6_cksum = 0; 1988 icmp->icmp6_cksum = pd->naf == AF_INET ? 1989 in4_cksum(pd->m, 0, hlen, ntohs(ip4->ip_len) - hlen) : 1990 in6_cksum(pd->m, IPPROTO_ICMPV6, hlen, 1991 ntohs(ip6->ip6_plen)); 1992 } 1993 1994 return (0); 1995 } 1996 1997 int 1998 pf_change_icmp_af(struct mbuf *m, int off, struct pf_pdesc *pd, 1999 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2000 sa_family_t af, sa_family_t naf) 2001 { 2002 struct mbuf *n = NULL; 2003 struct ip *ip4; 2004 struct ip6_hdr *ip6; 2005 int hlen, olen, mlen; 2006 2007 if (pd->csum_status == PF_CSUM_UNKNOWN) 2008 pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off, 2009 pd->proto, pd->af); 2010 2011 if (af == naf || (af != AF_INET && af != AF_INET6) || 2012 (naf != AF_INET && naf != AF_INET6)) 2013 return (-1); 2014 2015 /* split the mbuf chain on the inner ip/ip6 header boundary */ 2016 if ((n = m_split(m, off, M_DONTWAIT)) == NULL) 2017 return (-1); 2018 2019 /* old header */ 2020 olen = pd2->off - off; 2021 /* new header */ 2022 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2023 2024 /* trim old header */ 2025 m_adj(n, olen); 2026 2027 /* prepend a new one */ 2028 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2029 return (-1); 2030 2031 /* translate inner ip/ip6 header */ 2032 switch (naf) { 2033 case AF_INET: 2034 ip4 = mtod(n, struct ip *); 2035 bzero(ip4, sizeof(*ip4)); 2036 ip4->ip_v = IPVERSION; 2037 ip4->ip_hl = sizeof(*ip4) >> 2; 2038 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen); 2039 ip4->ip_id = htons(ip_randomid()); 2040 ip4->ip_off = htons(IP_DF); 2041 ip4->ip_ttl = pd2->ttl; 2042 if (pd2->proto == IPPROTO_ICMPV6) 2043 ip4->ip_p = IPPROTO_ICMP; 2044 else 2045 ip4->ip_p = pd2->proto; 2046 ip4->ip_src = src->v4; 2047 ip4->ip_dst = dst->v4; 2048 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2049 break; 2050 case AF_INET6: 2051 ip6 = mtod(n, struct ip6_hdr *); 2052 bzero(ip6, sizeof(*ip6)); 2053 ip6->ip6_vfc = IPV6_VERSION; 2054 ip6->ip6_plen = htons(pd2->tot_len - olen); 2055 if (pd2->proto == IPPROTO_ICMP) 2056 ip6->ip6_nxt = IPPROTO_ICMPV6; 2057 else 2058 ip6->ip6_nxt = pd2->proto; 2059 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2060 ip6->ip6_hlim = IPV6_DEFHLIM; 2061 else 2062 ip6->ip6_hlim = pd2->ttl; 2063 ip6->ip6_src = src->v6; 2064 ip6->ip6_dst = dst->v6; 2065 break; 2066 } 2067 2068 /* adjust payload offset and total packet length */ 2069 pd2->off += hlen - olen; 2070 pd->tot_len += hlen - olen; 2071 2072 /* merge modified inner packet with the original header */ 2073 mlen = n->m_pkthdr.len; 2074 m_cat(m, n); 2075 m->m_pkthdr.len += mlen; 2076 2077 return (0); 2078 } 2079 2080 2081 #define PTR_IP(field) (offsetof(struct ip, field)) 2082 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2083 2084 int 2085 pf_translate_icmp_af(int af, void *arg) 2086 { 2087 struct icmp *icmp4; 2088 struct icmp6_hdr *icmp6; 2089 u_int32_t mtu; 2090 int32_t ptr = -1; 2091 u_int8_t type; 2092 u_int8_t code; 2093 2094 switch (af) { 2095 case AF_INET: 2096 icmp6 = arg; 2097 type = icmp6->icmp6_type; 2098 code = icmp6->icmp6_code; 2099 mtu = ntohl(icmp6->icmp6_mtu); 2100 2101 switch (type) { 2102 case ICMP6_ECHO_REQUEST: 2103 type = ICMP_ECHO; 2104 break; 2105 case ICMP6_ECHO_REPLY: 2106 type = ICMP_ECHOREPLY; 2107 break; 2108 case ICMP6_DST_UNREACH: 2109 type = ICMP_UNREACH; 2110 switch (code) { 2111 case ICMP6_DST_UNREACH_NOROUTE: 2112 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2113 case ICMP6_DST_UNREACH_ADDR: 2114 code = ICMP_UNREACH_HOST; 2115 break; 2116 case ICMP6_DST_UNREACH_ADMIN: 2117 code = ICMP_UNREACH_HOST_PROHIB; 2118 break; 2119 case ICMP6_DST_UNREACH_NOPORT: 2120 code = ICMP_UNREACH_PORT; 2121 break; 2122 default: 2123 return (-1); 2124 } 2125 break; 2126 case ICMP6_PACKET_TOO_BIG: 2127 type = ICMP_UNREACH; 2128 code = ICMP_UNREACH_NEEDFRAG; 2129 mtu -= 20; 2130 break; 2131 case ICMP6_TIME_EXCEEDED: 2132 type = ICMP_TIMXCEED; 2133 break; 2134 case ICMP6_PARAM_PROB: 2135 switch (code) { 2136 case ICMP6_PARAMPROB_HEADER: 2137 type = ICMP_PARAMPROB; 2138 code = ICMP_PARAMPROB_ERRATPTR; 2139 ptr = ntohl(icmp6->icmp6_pptr); 2140 2141 if (ptr == PTR_IP6(ip6_vfc)) 2142 ; /* preserve */ 2143 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2144 ptr = PTR_IP(ip_tos); 2145 else if (ptr == PTR_IP6(ip6_plen) || 2146 ptr == PTR_IP6(ip6_plen) + 1) 2147 ptr = PTR_IP(ip_len); 2148 else if (ptr == PTR_IP6(ip6_nxt)) 2149 ptr = PTR_IP(ip_p); 2150 else if (ptr == PTR_IP6(ip6_hlim)) 2151 ptr = PTR_IP(ip_ttl); 2152 else if (ptr >= PTR_IP6(ip6_src) && 2153 ptr < PTR_IP6(ip6_dst)) 2154 ptr = PTR_IP(ip_src); 2155 else if (ptr >= PTR_IP6(ip6_dst) && 2156 ptr < sizeof(struct ip6_hdr)) 2157 ptr = PTR_IP(ip_dst); 2158 else { 2159 return (-1); 2160 } 2161 break; 2162 case ICMP6_PARAMPROB_NEXTHEADER: 2163 type = ICMP_UNREACH; 2164 code = ICMP_UNREACH_PROTOCOL; 2165 break; 2166 default: 2167 return (-1); 2168 } 2169 break; 2170 default: 2171 return (-1); 2172 } 2173 icmp6->icmp6_type = type; 2174 icmp6->icmp6_code = code; 2175 /* aligns well with a icmpv4 nextmtu */ 2176 icmp6->icmp6_mtu = htonl(mtu); 2177 /* icmpv4 pptr is a one most significant byte */ 2178 if (ptr >= 0) 2179 icmp6->icmp6_pptr = htonl(ptr << 24); 2180 break; 2181 case AF_INET6: 2182 icmp4 = arg; 2183 type = icmp4->icmp_type; 2184 code = icmp4->icmp_code; 2185 mtu = ntohs(icmp4->icmp_nextmtu); 2186 2187 switch (type) { 2188 case ICMP_ECHO: 2189 type = ICMP6_ECHO_REQUEST; 2190 break; 2191 case ICMP_ECHOREPLY: 2192 type = ICMP6_ECHO_REPLY; 2193 break; 2194 case ICMP_UNREACH: 2195 type = ICMP6_DST_UNREACH; 2196 switch (code) { 2197 case ICMP_UNREACH_NET: 2198 case ICMP_UNREACH_HOST: 2199 case ICMP_UNREACH_NET_UNKNOWN: 2200 case ICMP_UNREACH_HOST_UNKNOWN: 2201 case ICMP_UNREACH_ISOLATED: 2202 case ICMP_UNREACH_TOSNET: 2203 case ICMP_UNREACH_TOSHOST: 2204 code = ICMP6_DST_UNREACH_NOROUTE; 2205 break; 2206 case ICMP_UNREACH_PORT: 2207 code = ICMP6_DST_UNREACH_NOPORT; 2208 break; 2209 case ICMP_UNREACH_NET_PROHIB: 2210 case ICMP_UNREACH_HOST_PROHIB: 2211 case ICMP_UNREACH_FILTER_PROHIB: 2212 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2213 code = ICMP6_DST_UNREACH_ADMIN; 2214 break; 2215 case ICMP_UNREACH_PROTOCOL: 2216 type = ICMP6_PARAM_PROB; 2217 code = ICMP6_PARAMPROB_NEXTHEADER; 2218 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2219 break; 2220 case ICMP_UNREACH_NEEDFRAG: 2221 type = ICMP6_PACKET_TOO_BIG; 2222 code = 0; 2223 mtu += 20; 2224 break; 2225 default: 2226 return (-1); 2227 } 2228 break; 2229 case ICMP_TIMXCEED: 2230 type = ICMP6_TIME_EXCEEDED; 2231 break; 2232 case ICMP_PARAMPROB: 2233 type = ICMP6_PARAM_PROB; 2234 switch (code) { 2235 case ICMP_PARAMPROB_ERRATPTR: 2236 code = ICMP6_PARAMPROB_HEADER; 2237 break; 2238 case ICMP_PARAMPROB_LENGTH: 2239 code = ICMP6_PARAMPROB_HEADER; 2240 break; 2241 default: 2242 return (-1); 2243 } 2244 2245 ptr = icmp4->icmp_pptr; 2246 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2247 ; /* preserve */ 2248 else if (ptr == PTR_IP(ip_len) || 2249 ptr == PTR_IP(ip_len) + 1) 2250 ptr = PTR_IP6(ip6_plen); 2251 else if (ptr == PTR_IP(ip_ttl)) 2252 ptr = PTR_IP6(ip6_hlim); 2253 else if (ptr == PTR_IP(ip_p)) 2254 ptr = PTR_IP6(ip6_nxt); 2255 else if (ptr >= PTR_IP(ip_src) && 2256 ptr < PTR_IP(ip_dst)) 2257 ptr = PTR_IP6(ip6_src); 2258 else if (ptr >= PTR_IP(ip_dst) && 2259 ptr < sizeof(struct ip)) 2260 ptr = PTR_IP6(ip6_dst); 2261 else { 2262 return (-1); 2263 } 2264 break; 2265 default: 2266 return (-1); 2267 } 2268 icmp4->icmp_type = type; 2269 icmp4->icmp_code = code; 2270 icmp4->icmp_nextmtu = htons(mtu); 2271 if (ptr >= 0) 2272 icmp4->icmp_void = htonl(ptr); 2273 break; 2274 } 2275 2276 return (0); 2277 } 2278 #endif /* INET6 */ 2279 2280 /* 2281 * Need to modulate the sequence numbers in the TCP SACK option 2282 * (credits to Krzysztof Pfaff for report and patch) 2283 */ 2284 int 2285 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2286 { 2287 struct tcphdr *th = pd->hdr.tcp; 2288 int hlen = (th->th_off << 2) - sizeof(*th); 2289 int thoptlen = hlen; 2290 u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; 2291 int copyback = 0, i, olen; 2292 struct sackblk sack; 2293 2294 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) 2295 if (hlen < TCPOLEN_SACKLEN || hlen > MAX_TCPOPTLEN || !pf_pull_hdr( 2296 pd->m, pd->off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) 2297 return 0; 2298 2299 while (hlen >= TCPOLEN_SACKLEN) { 2300 olen = opt[1]; 2301 switch (*opt) { 2302 case TCPOPT_EOL: /* FALLTHROUGH */ 2303 case TCPOPT_NOP: 2304 opt++; 2305 hlen--; 2306 break; 2307 case TCPOPT_SACK: 2308 if (olen > hlen) 2309 olen = hlen; 2310 if (olen >= TCPOLEN_SACKLEN) { 2311 for (i = 2; i + TCPOLEN_SACK <= olen; 2312 i += TCPOLEN_SACK) { 2313 memcpy(&sack, &opt[i], sizeof(sack)); 2314 pf_change_a(pd, &sack.start, 2315 htonl(ntohl(sack.start) - 2316 dst->seqdiff)); 2317 pf_change_a(pd, &sack.end, 2318 htonl(ntohl(sack.end) - 2319 dst->seqdiff)); 2320 memcpy(&opt[i], &sack, sizeof(sack)); 2321 } 2322 copyback = 1; 2323 } 2324 /* FALLTHROUGH */ 2325 default: 2326 if (olen < 2) 2327 olen = 2; 2328 hlen -= olen; 2329 opt += olen; 2330 } 2331 } 2332 2333 if (copyback) 2334 m_copyback(pd->m, pd->off + sizeof(*th), thoptlen, opts, 2335 M_NOWAIT); 2336 return (copyback); 2337 } 2338 2339 void 2340 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2341 const struct pf_addr *saddr, const struct pf_addr *daddr, 2342 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2343 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2344 u_int16_t rtag, u_int rdom) 2345 { 2346 struct mbuf *m; 2347 int len, tlen; 2348 struct ip *h; 2349 #ifdef INET6 2350 struct ip6_hdr *h6; 2351 #endif /* INET6 */ 2352 struct tcphdr *th; 2353 char *opt; 2354 2355 /* maximum segment size tcp option */ 2356 tlen = sizeof(struct tcphdr); 2357 if (mss) 2358 tlen += 4; 2359 2360 switch (af) { 2361 case AF_INET: 2362 len = sizeof(struct ip) + tlen; 2363 break; 2364 #ifdef INET6 2365 case AF_INET6: 2366 len = sizeof(struct ip6_hdr) + tlen; 2367 break; 2368 #endif /* INET6 */ 2369 default: 2370 unhandled_af(af); 2371 } 2372 2373 /* create outgoing mbuf */ 2374 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2375 if (m == NULL) 2376 return; 2377 if (tag) 2378 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2379 m->m_pkthdr.pf.tag = rtag; 2380 m->m_pkthdr.ph_rtableid = rdom; 2381 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2382 m->m_pkthdr.pf.prio = r->set_prio[0]; 2383 if (r && r->qid) 2384 m->m_pkthdr.pf.qid = r->qid; 2385 m->m_data += max_linkhdr; 2386 m->m_pkthdr.len = m->m_len = len; 2387 m->m_pkthdr.ph_ifidx = 0; 2388 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 2389 bzero(m->m_data, len); 2390 switch (af) { 2391 case AF_INET: 2392 h = mtod(m, struct ip *); 2393 h->ip_p = IPPROTO_TCP; 2394 h->ip_len = htons(tlen); 2395 h->ip_v = 4; 2396 h->ip_hl = sizeof(*h) >> 2; 2397 h->ip_tos = IPTOS_LOWDELAY; 2398 h->ip_len = htons(len); 2399 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2400 h->ip_ttl = ttl ? ttl : ip_defttl; 2401 h->ip_sum = 0; 2402 h->ip_src.s_addr = saddr->v4.s_addr; 2403 h->ip_dst.s_addr = daddr->v4.s_addr; 2404 2405 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2406 break; 2407 #ifdef INET6 2408 case AF_INET6: 2409 h6 = mtod(m, struct ip6_hdr *); 2410 h6->ip6_nxt = IPPROTO_TCP; 2411 h6->ip6_plen = htons(tlen); 2412 h6->ip6_vfc |= IPV6_VERSION; 2413 h6->ip6_hlim = IPV6_DEFHLIM; 2414 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2415 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2416 2417 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2418 break; 2419 #endif /* INET6 */ 2420 default: 2421 unhandled_af(af); 2422 } 2423 2424 /* TCP header */ 2425 th->th_sport = sport; 2426 th->th_dport = dport; 2427 th->th_seq = htonl(seq); 2428 th->th_ack = htonl(ack); 2429 th->th_off = tlen >> 2; 2430 th->th_flags = flags; 2431 th->th_win = htons(win); 2432 2433 if (mss) { 2434 opt = (char *)(th + 1); 2435 opt[0] = TCPOPT_MAXSEG; 2436 opt[1] = 4; 2437 mss = htons(mss); 2438 memcpy((opt + 2), &mss, 2); 2439 } 2440 2441 switch (af) { 2442 case AF_INET: 2443 ip_send(m); 2444 break; 2445 #ifdef INET6 2446 case AF_INET6: 2447 ip6_send(m); 2448 break; 2449 #endif /* INET6 */ 2450 } 2451 } 2452 2453 void 2454 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, 2455 struct pf_rule *r, u_int rdomain) 2456 { 2457 struct mbuf *m0; 2458 2459 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 2460 return; 2461 2462 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2463 m0->m_pkthdr.ph_rtableid = rdomain; 2464 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2465 m0->m_pkthdr.pf.prio = r->set_prio[0]; 2466 if (r && r->qid) 2467 m0->m_pkthdr.pf.qid = r->qid; 2468 2469 switch (af) { 2470 case AF_INET: 2471 icmp_error(m0, type, code, 0, 0); 2472 break; 2473 #ifdef INET6 2474 case AF_INET6: 2475 icmp6_error(m0, type, code, 0); 2476 break; 2477 #endif /* INET6 */ 2478 } 2479 } 2480 2481 /* 2482 * Return 1 if the addresses a and b match (with mask m), otherwise return 0. 2483 * If n is 0, they match if they are equal. If n is != 0, they match if they 2484 * are different. 2485 */ 2486 int 2487 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2488 struct pf_addr *b, sa_family_t af) 2489 { 2490 int match = 0; 2491 2492 switch (af) { 2493 case AF_INET: 2494 if ((a->addr32[0] & m->addr32[0]) == 2495 (b->addr32[0] & m->addr32[0])) 2496 match++; 2497 break; 2498 #ifdef INET6 2499 case AF_INET6: 2500 if (((a->addr32[0] & m->addr32[0]) == 2501 (b->addr32[0] & m->addr32[0])) && 2502 ((a->addr32[1] & m->addr32[1]) == 2503 (b->addr32[1] & m->addr32[1])) && 2504 ((a->addr32[2] & m->addr32[2]) == 2505 (b->addr32[2] & m->addr32[2])) && 2506 ((a->addr32[3] & m->addr32[3]) == 2507 (b->addr32[3] & m->addr32[3]))) 2508 match++; 2509 break; 2510 #endif /* INET6 */ 2511 } 2512 if (match) { 2513 if (n) 2514 return (0); 2515 else 2516 return (1); 2517 } else { 2518 if (n) 2519 return (1); 2520 else 2521 return (0); 2522 } 2523 } 2524 2525 /* 2526 * Return 1 if b <= a <= e, otherwise return 0. 2527 */ 2528 int 2529 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2530 struct pf_addr *a, sa_family_t af) 2531 { 2532 switch (af) { 2533 case AF_INET: 2534 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 2535 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 2536 return (0); 2537 break; 2538 #ifdef INET6 2539 case AF_INET6: { 2540 int i; 2541 2542 /* check a >= b */ 2543 for (i = 0; i < 4; ++i) 2544 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 2545 break; 2546 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 2547 return (0); 2548 /* check a <= e */ 2549 for (i = 0; i < 4; ++i) 2550 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 2551 break; 2552 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 2553 return (0); 2554 break; 2555 } 2556 #endif /* INET6 */ 2557 } 2558 return (1); 2559 } 2560 2561 int 2562 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 2563 { 2564 switch (op) { 2565 case PF_OP_IRG: 2566 return ((p > a1) && (p < a2)); 2567 case PF_OP_XRG: 2568 return ((p < a1) || (p > a2)); 2569 case PF_OP_RRG: 2570 return ((p >= a1) && (p <= a2)); 2571 case PF_OP_EQ: 2572 return (p == a1); 2573 case PF_OP_NE: 2574 return (p != a1); 2575 case PF_OP_LT: 2576 return (p < a1); 2577 case PF_OP_LE: 2578 return (p <= a1); 2579 case PF_OP_GT: 2580 return (p > a1); 2581 case PF_OP_GE: 2582 return (p >= a1); 2583 } 2584 return (0); /* never reached */ 2585 } 2586 2587 int 2588 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 2589 { 2590 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 2591 } 2592 2593 int 2594 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 2595 { 2596 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2597 return (0); 2598 return (pf_match(op, a1, a2, u)); 2599 } 2600 2601 int 2602 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 2603 { 2604 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2605 return (0); 2606 return (pf_match(op, a1, a2, g)); 2607 } 2608 2609 int 2610 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 2611 { 2612 if (*tag == -1) 2613 *tag = m->m_pkthdr.pf.tag; 2614 2615 return ((!r->match_tag_not && r->match_tag == *tag) || 2616 (r->match_tag_not && r->match_tag != *tag)); 2617 } 2618 2619 int 2620 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 2621 { 2622 struct ifnet *ifp; 2623 struct pfi_kif *kif; 2624 2625 ifp = if_get(m->m_pkthdr.ph_ifidx); 2626 if (ifp == NULL) 2627 return (0); 2628 2629 #if NCARP > 0 2630 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 2631 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 2632 else 2633 #endif /* NCARP */ 2634 kif = (struct pfi_kif *)ifp->if_pf_kif; 2635 2636 if_put(ifp); 2637 2638 if (kif == NULL) { 2639 DPFPRINTF(LOG_ERR, 2640 "pf_test_via: kif == NULL, @%d via %s", 2641 r->nr, r->rcv_ifname); 2642 return (0); 2643 } 2644 2645 return (pfi_kif_match(r->rcv_kif, kif)); 2646 } 2647 2648 void 2649 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 2650 { 2651 if (tag > 0) 2652 m->m_pkthdr.pf.tag = tag; 2653 if (rtableid >= 0) 2654 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 2655 } 2656 2657 void 2658 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, 2659 struct pf_rule **r, struct pf_rule **a) 2660 { 2661 struct pf_anchor_stackframe *f; 2662 2663 if (*depth >= sizeof(pf_anchor_stack) / 2664 sizeof(pf_anchor_stack[0])) { 2665 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 2666 *r = TAILQ_NEXT(*r, entries); 2667 return; 2668 } else if (a != NULL) 2669 *a = *r; 2670 f = pf_anchor_stack + (*depth)++; 2671 f->rs = *rs; 2672 f->r = *r; 2673 if ((*r)->anchor_wildcard) { 2674 f->parent = &(*r)->anchor->children; 2675 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) { 2676 *r = NULL; 2677 return; 2678 } 2679 *rs = &f->child->ruleset; 2680 } else { 2681 f->parent = NULL; 2682 f->child = NULL; 2683 *rs = &(*r)->anchor->ruleset; 2684 } 2685 *r = TAILQ_FIRST((*rs)->rules.active.ptr); 2686 } 2687 2688 int 2689 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, 2690 struct pf_rule **r, struct pf_rule **a, int *match) 2691 { 2692 struct pf_anchor_stackframe *f; 2693 int quick = 0; 2694 2695 do { 2696 if (*depth <= 0) 2697 break; 2698 f = pf_anchor_stack + *depth - 1; 2699 if (f->parent != NULL && f->child != NULL) { 2700 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); 2701 if (f->child != NULL) { 2702 *rs = &f->child->ruleset; 2703 *r = TAILQ_FIRST((*rs)->rules.active.ptr); 2704 if (*r == NULL) 2705 continue; 2706 else 2707 break; 2708 } 2709 } 2710 (*depth)--; 2711 if (*depth == 0 && a != NULL) 2712 *a = NULL; 2713 else if (a != NULL) 2714 *a = f->r; 2715 *rs = f->rs; 2716 if (*match > *depth) { 2717 *match = *depth; 2718 if (f->r->quick) 2719 quick = 1; 2720 } 2721 *r = TAILQ_NEXT(f->r, entries); 2722 } while (*r == NULL); 2723 2724 return (quick); 2725 } 2726 2727 void 2728 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 2729 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 2730 { 2731 switch (af) { 2732 case AF_INET: 2733 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2734 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2735 break; 2736 #ifdef INET6 2737 case AF_INET6: 2738 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2739 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2740 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 2741 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 2742 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 2743 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 2744 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 2745 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 2746 break; 2747 #endif /* INET6 */ 2748 default: 2749 unhandled_af(af); 2750 } 2751 } 2752 2753 void 2754 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 2755 { 2756 switch (af) { 2757 case AF_INET: 2758 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 2759 break; 2760 #ifdef INET6 2761 case AF_INET6: 2762 if (addr->addr32[3] == 0xffffffff) { 2763 addr->addr32[3] = 0; 2764 if (addr->addr32[2] == 0xffffffff) { 2765 addr->addr32[2] = 0; 2766 if (addr->addr32[1] == 0xffffffff) { 2767 addr->addr32[1] = 0; 2768 addr->addr32[0] = 2769 htonl(ntohl(addr->addr32[0]) + 1); 2770 } else 2771 addr->addr32[1] = 2772 htonl(ntohl(addr->addr32[1]) + 1); 2773 } else 2774 addr->addr32[2] = 2775 htonl(ntohl(addr->addr32[2]) + 1); 2776 } else 2777 addr->addr32[3] = 2778 htonl(ntohl(addr->addr32[3]) + 1); 2779 break; 2780 #endif /* INET6 */ 2781 default: 2782 unhandled_af(af); 2783 } 2784 } 2785 2786 int 2787 pf_socket_lookup(struct pf_pdesc *pd) 2788 { 2789 struct pf_addr *saddr, *daddr; 2790 u_int16_t sport, dport; 2791 struct inpcbtable *tb; 2792 struct inpcb *inp; 2793 2794 if (pd == NULL) 2795 return (-1); 2796 pd->lookup.uid = UID_MAX; 2797 pd->lookup.gid = GID_MAX; 2798 pd->lookup.pid = NO_PID; 2799 switch (pd->proto) { 2800 case IPPROTO_TCP: 2801 if (pd->hdr.tcp == NULL) 2802 return (-1); 2803 sport = pd->hdr.tcp->th_sport; 2804 dport = pd->hdr.tcp->th_dport; 2805 tb = &tcbtable; 2806 break; 2807 case IPPROTO_UDP: 2808 if (pd->hdr.udp == NULL) 2809 return (-1); 2810 sport = pd->hdr.udp->uh_sport; 2811 dport = pd->hdr.udp->uh_dport; 2812 tb = &udbtable; 2813 break; 2814 default: 2815 return (-1); 2816 } 2817 if (pd->dir == PF_IN) { 2818 saddr = pd->src; 2819 daddr = pd->dst; 2820 } else { 2821 u_int16_t p; 2822 2823 p = sport; 2824 sport = dport; 2825 dport = p; 2826 saddr = pd->dst; 2827 daddr = pd->src; 2828 } 2829 switch (pd->af) { 2830 case AF_INET: 2831 /* 2832 * Fails when rtable is changed while evaluating the ruleset 2833 * The socket looked up will not match the one hit in the end. 2834 */ 2835 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 2836 pd->rdomain); 2837 if (inp == NULL) { 2838 inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, 2839 NULL, pd->rdomain); 2840 if (inp == NULL) 2841 return (-1); 2842 } 2843 break; 2844 #ifdef INET6 2845 case AF_INET6: 2846 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 2847 dport, pd->rdomain); 2848 if (inp == NULL) { 2849 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, 2850 NULL, pd->rdomain); 2851 if (inp == NULL) 2852 return (-1); 2853 } 2854 break; 2855 #endif /* INET6 */ 2856 default: 2857 unhandled_af(pd->af); 2858 } 2859 pd->lookup.uid = inp->inp_socket->so_euid; 2860 pd->lookup.gid = inp->inp_socket->so_egid; 2861 pd->lookup.pid = inp->inp_socket->so_cpid; 2862 return (1); 2863 } 2864 2865 u_int8_t 2866 pf_get_wscale(struct pf_pdesc *pd) 2867 { 2868 struct tcphdr *th = pd->hdr.tcp; 2869 int hlen; 2870 u_int8_t hdr[60]; 2871 u_int8_t *opt, optlen; 2872 u_int8_t wscale = 0; 2873 2874 hlen = th->th_off << 2; /* hlen <= sizeof(hdr) */ 2875 if (hlen <= sizeof(struct tcphdr)) 2876 return (0); 2877 if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af)) 2878 return (0); 2879 opt = hdr + sizeof(struct tcphdr); 2880 hlen -= sizeof(struct tcphdr); 2881 while (hlen >= 3) { 2882 switch (*opt) { 2883 case TCPOPT_EOL: 2884 case TCPOPT_NOP: 2885 ++opt; 2886 --hlen; 2887 break; 2888 case TCPOPT_WINDOW: 2889 wscale = opt[2]; 2890 if (wscale > TCP_MAX_WINSHIFT) 2891 wscale = TCP_MAX_WINSHIFT; 2892 wscale |= PF_WSCALE_FLAG; 2893 /* FALLTHROUGH */ 2894 default: 2895 optlen = opt[1]; 2896 if (optlen < 2) 2897 optlen = 2; 2898 hlen -= optlen; 2899 opt += optlen; 2900 break; 2901 } 2902 } 2903 return (wscale); 2904 } 2905 2906 u_int16_t 2907 pf_get_mss(struct pf_pdesc *pd) 2908 { 2909 struct tcphdr *th = pd->hdr.tcp; 2910 int hlen; 2911 u_int8_t hdr[60]; 2912 u_int8_t *opt, optlen; 2913 u_int16_t mss = tcp_mssdflt; 2914 2915 hlen = th->th_off << 2; /* hlen <= sizeof(hdr) */ 2916 if (hlen <= sizeof(struct tcphdr)) 2917 return (0); 2918 if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af)) 2919 return (0); 2920 opt = hdr + sizeof(struct tcphdr); 2921 hlen -= sizeof(struct tcphdr); 2922 while (hlen >= TCPOLEN_MAXSEG) { 2923 switch (*opt) { 2924 case TCPOPT_EOL: 2925 case TCPOPT_NOP: 2926 ++opt; 2927 --hlen; 2928 break; 2929 case TCPOPT_MAXSEG: 2930 memcpy(&mss, (opt + 2), 2); 2931 mss = ntohs(mss); 2932 /* FALLTHROUGH */ 2933 default: 2934 optlen = opt[1]; 2935 if (optlen < 2) 2936 optlen = 2; 2937 hlen -= optlen; 2938 opt += optlen; 2939 break; 2940 } 2941 } 2942 return (mss); 2943 } 2944 2945 u_int16_t 2946 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 2947 { 2948 struct ifnet *ifp; 2949 struct sockaddr_in *dst; 2950 #ifdef INET6 2951 struct sockaddr_in6 *dst6; 2952 #endif /* INET6 */ 2953 struct rtentry *rt = NULL; 2954 struct sockaddr_storage ss; 2955 int hlen; 2956 u_int16_t mss = tcp_mssdflt; 2957 2958 memset(&ss, 0, sizeof(ss)); 2959 2960 switch (af) { 2961 case AF_INET: 2962 hlen = sizeof(struct ip); 2963 dst = (struct sockaddr_in *)&ss; 2964 dst->sin_family = AF_INET; 2965 dst->sin_len = sizeof(*dst); 2966 dst->sin_addr = addr->v4; 2967 rt = rtalloc(sintosa(dst), 0, rtableid); 2968 break; 2969 #ifdef INET6 2970 case AF_INET6: 2971 hlen = sizeof(struct ip6_hdr); 2972 dst6 = (struct sockaddr_in6 *)&ss; 2973 dst6->sin6_family = AF_INET6; 2974 dst6->sin6_len = sizeof(*dst6); 2975 dst6->sin6_addr = addr->v6; 2976 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 2977 break; 2978 #endif /* INET6 */ 2979 } 2980 2981 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 2982 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 2983 mss = max(tcp_mssdflt, mss); 2984 if_put(ifp); 2985 } 2986 rtfree(rt); 2987 mss = min(mss, offer); 2988 mss = max(mss, 64); /* sanity - at least max opt space */ 2989 return (mss); 2990 } 2991 2992 static __inline int 2993 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af) 2994 { 2995 struct pf_rule *r = s->rule.ptr; 2996 struct pf_src_node *sns[PF_SN_MAX]; 2997 int rv; 2998 2999 s->rt_kif = NULL; 3000 if (!r->rt) 3001 return (0); 3002 3003 bzero(sns, sizeof(sns)); 3004 switch (af) { 3005 case AF_INET: 3006 rv = pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, sns, 3007 &r->route, PF_SN_ROUTE); 3008 break; 3009 #ifdef INET6 3010 case AF_INET6: 3011 rv = pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, sns, 3012 &r->route, PF_SN_ROUTE); 3013 break; 3014 #endif /* INET6 */ 3015 default: 3016 rv = 1; 3017 } 3018 3019 if (rv == 0) { 3020 s->rt_kif = r->route.kif; 3021 s->natrule.ptr = r; 3022 } 3023 3024 return (rv); 3025 } 3026 3027 u_int32_t 3028 pf_tcp_iss(struct pf_pdesc *pd) 3029 { 3030 SHA2_CTX ctx; 3031 union { 3032 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3033 uint32_t words[1]; 3034 } digest; 3035 3036 if (pf_tcp_secret_init == 0) { 3037 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3038 SHA512Init(&pf_tcp_secret_ctx); 3039 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3040 sizeof(pf_tcp_secret)); 3041 pf_tcp_secret_init = 1; 3042 } 3043 ctx = pf_tcp_secret_ctx; 3044 3045 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3046 SHA512Update(&ctx, &pd->hdr.tcp->th_sport, sizeof(u_short)); 3047 SHA512Update(&ctx, &pd->hdr.tcp->th_dport, sizeof(u_short)); 3048 switch (pd->af) { 3049 case AF_INET: 3050 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3051 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3052 break; 3053 #ifdef INET6 3054 case AF_INET6: 3055 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3056 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3057 break; 3058 #endif /* INET6 */ 3059 } 3060 SHA512Final(digest.bytes, &ctx); 3061 pf_tcp_iss_off += 4096; 3062 return (digest.words[0] + tcp_iss + pf_tcp_iss_off); 3063 } 3064 3065 void 3066 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3067 { 3068 if (r->qid) 3069 a->qid = r->qid; 3070 if (r->pqid) 3071 a->pqid = r->pqid; 3072 if (r->rtableid >= 0) 3073 a->rtableid = r->rtableid; 3074 #if NPFLOG > 0 3075 a->log |= r->log; 3076 #endif /* NPFLOG > 0 */ 3077 if (r->scrub_flags & PFSTATE_SETTOS) 3078 a->set_tos = r->set_tos; 3079 if (r->min_ttl) 3080 a->min_ttl = r->min_ttl; 3081 if (r->max_mss) 3082 a->max_mss = r->max_mss; 3083 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3084 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3085 if (r->scrub_flags & PFSTATE_SETPRIO) { 3086 a->set_prio[0] = r->set_prio[0]; 3087 a->set_prio[1] = r->set_prio[1]; 3088 } 3089 } 3090 3091 #define PF_TEST_ATTRIB(t, a) \ 3092 do { \ 3093 if (t) { \ 3094 r = a; \ 3095 goto nextrule; \ 3096 } \ 3097 } while (0) 3098 3099 int 3100 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3101 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason) 3102 { 3103 struct pf_rule *r; 3104 struct pf_rule *nr = NULL; 3105 struct pf_rule *a = NULL; 3106 struct pf_ruleset *arsm = NULL; 3107 struct pf_ruleset *aruleset = NULL; 3108 struct pf_ruleset *ruleset = NULL; 3109 struct pf_rule_slist rules; 3110 struct pf_rule_item *ri; 3111 struct pf_src_node *sns[PF_SN_MAX]; 3112 struct tcphdr *th = pd->hdr.tcp; 3113 struct pf_state_key *skw = NULL, *sks = NULL; 3114 struct pf_rule_actions act; 3115 int rewrite = 0; 3116 int tag = -1; 3117 int asd = 0; 3118 int match = 0; 3119 int state_icmp = 0, icmp_dir = 0; 3120 u_int16_t virtual_type, virtual_id; 3121 u_int8_t icmptype = 0, icmpcode = 0; 3122 int action = PF_DROP; 3123 3124 bzero(&act, sizeof(act)); 3125 bzero(sns, sizeof(sns)); 3126 act.rtableid = pd->rdomain; 3127 SLIST_INIT(&rules); 3128 3129 if (pd->dir == PF_IN && if_congested()) { 3130 REASON_SET(reason, PFRES_CONGEST); 3131 return (PF_DROP); 3132 } 3133 3134 switch (pd->virtual_proto) { 3135 case IPPROTO_ICMP: 3136 icmptype = pd->hdr.icmp->icmp_type; 3137 icmpcode = pd->hdr.icmp->icmp_code; 3138 state_icmp = pf_icmp_mapping(pd, icmptype, 3139 &icmp_dir, &virtual_id, &virtual_type); 3140 if (icmp_dir == PF_IN) { 3141 pd->osport = pd->nsport = virtual_id; 3142 pd->odport = pd->ndport = virtual_type; 3143 } else { 3144 pd->osport = pd->nsport = virtual_type; 3145 pd->odport = pd->ndport = virtual_id; 3146 } 3147 break; 3148 #ifdef INET6 3149 case IPPROTO_ICMPV6: 3150 icmptype = pd->hdr.icmp6->icmp6_type; 3151 icmpcode = pd->hdr.icmp6->icmp6_code; 3152 state_icmp = pf_icmp_mapping(pd, icmptype, 3153 &icmp_dir, &virtual_id, &virtual_type); 3154 if (icmp_dir == PF_IN) { 3155 pd->osport = pd->nsport = virtual_id; 3156 pd->odport = pd->ndport = virtual_type; 3157 } else { 3158 pd->osport = pd->nsport = virtual_type; 3159 pd->odport = pd->ndport = virtual_id; 3160 } 3161 break; 3162 #endif /* INET6 */ 3163 } 3164 3165 ruleset = &pf_main_ruleset; 3166 r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr); 3167 while (r != NULL) { 3168 r->evaluations++; 3169 PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot), 3170 r->skip[PF_SKIP_IFP].ptr); 3171 PF_TEST_ATTRIB((r->direction && r->direction != pd->dir), 3172 r->skip[PF_SKIP_DIR].ptr); 3173 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3174 (r->onrdomain == pd->rdomain) == r->ifnot), 3175 r->skip[PF_SKIP_RDOM].ptr); 3176 PF_TEST_ATTRIB((r->af && r->af != pd->af), 3177 r->skip[PF_SKIP_AF].ptr); 3178 PF_TEST_ATTRIB((r->proto && r->proto != pd->proto), 3179 r->skip[PF_SKIP_PROTO].ptr); 3180 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr, 3181 pd->naf, r->src.neg, pd->kif, act.rtableid)), 3182 r->skip[PF_SKIP_SRC_ADDR].ptr); 3183 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af, 3184 r->dst.neg, NULL, act.rtableid)), 3185 r->skip[PF_SKIP_DST_ADDR].ptr); 3186 3187 switch (pd->virtual_proto) { 3188 case PF_VPROTO_FRAGMENT: 3189 /* tcp/udp only. port_op always 0 in other cases */ 3190 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3191 TAILQ_NEXT(r, entries)); 3192 PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset), 3193 TAILQ_NEXT(r, entries)); 3194 /* icmp only. type/code always 0 in other cases */ 3195 PF_TEST_ATTRIB((r->type || r->code), 3196 TAILQ_NEXT(r, entries)); 3197 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3198 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3199 TAILQ_NEXT(r, entries)); 3200 break; 3201 3202 case IPPROTO_TCP: 3203 PF_TEST_ATTRIB(((r->flagset & th->th_flags) != 3204 r->flags), 3205 TAILQ_NEXT(r, entries)); 3206 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3207 !pf_osfp_match(pf_osfp_fingerprint(pd), 3208 r->os_fingerprint)), 3209 TAILQ_NEXT(r, entries)); 3210 /* FALLTHROUGH */ 3211 3212 case IPPROTO_UDP: 3213 /* tcp/udp only. port_op always 0 in other cases */ 3214 PF_TEST_ATTRIB((r->src.port_op && 3215 !pf_match_port(r->src.port_op, r->src.port[0], 3216 r->src.port[1], pd->nsport)), 3217 r->skip[PF_SKIP_SRC_PORT].ptr); 3218 PF_TEST_ATTRIB((r->dst.port_op && 3219 !pf_match_port(r->dst.port_op, r->dst.port[0], 3220 r->dst.port[1], pd->ndport)), 3221 r->skip[PF_SKIP_DST_PORT].ptr); 3222 /* tcp/udp only. uid.op always 0 in other cases */ 3223 PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done || 3224 (pd->lookup.done = 3225 pf_socket_lookup(pd), 1)) && 3226 !pf_match_uid(r->uid.op, r->uid.uid[0], 3227 r->uid.uid[1], pd->lookup.uid)), 3228 TAILQ_NEXT(r, entries)); 3229 /* tcp/udp only. gid.op always 0 in other cases */ 3230 PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done || 3231 (pd->lookup.done = 3232 pf_socket_lookup(pd), 1)) && 3233 !pf_match_gid(r->gid.op, r->gid.gid[0], 3234 r->gid.gid[1], pd->lookup.gid)), 3235 TAILQ_NEXT(r, entries)); 3236 break; 3237 3238 case IPPROTO_ICMP: 3239 case IPPROTO_ICMPV6: 3240 /* icmp only. type always 0 in other cases */ 3241 PF_TEST_ATTRIB((r->type && r->type != icmptype + 1), 3242 TAILQ_NEXT(r, entries)); 3243 /* icmp only. type always 0 in other cases */ 3244 PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1), 3245 TAILQ_NEXT(r, entries)); 3246 /* icmp only. don't create states on replies */ 3247 PF_TEST_ATTRIB((r->keep_state && !state_icmp && 3248 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 3249 icmp_dir != PF_IN), 3250 TAILQ_NEXT(r, entries)); 3251 break; 3252 3253 default: 3254 break; 3255 } 3256 3257 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3258 pd->virtual_proto != PF_VPROTO_FRAGMENT), 3259 TAILQ_NEXT(r, entries)); 3260 PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)), 3261 TAILQ_NEXT(r, entries)); 3262 PF_TEST_ATTRIB((r->prob && 3263 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3264 TAILQ_NEXT(r, entries)); 3265 PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)), 3266 TAILQ_NEXT(r, entries)); 3267 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) == 3268 r->rcvifnot), 3269 TAILQ_NEXT(r, entries)); 3270 PF_TEST_ATTRIB((r->prio && 3271 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != pd->m->m_pkthdr.pf.prio), 3272 TAILQ_NEXT(r, entries)); 3273 3274 /* FALLTHROUGH */ 3275 if (r->tag) 3276 tag = r->tag; 3277 if (r->anchor == NULL) { 3278 if (r->action == PF_MATCH) { 3279 if ((ri = pool_get(&pf_rule_item_pl, 3280 PR_NOWAIT)) == NULL) { 3281 REASON_SET(reason, PFRES_MEMORY); 3282 goto cleanup; 3283 } 3284 ri->r = r; 3285 /* order is irrelevant */ 3286 SLIST_INSERT_HEAD(&rules, ri, entry); 3287 pf_rule_to_actions(r, &act); 3288 if (r->rule_flag & PFRULE_AFTO) 3289 pd->naf = r->naf; 3290 if (pf_get_transaddr(r, pd, sns, &nr) == -1) { 3291 REASON_SET(reason, PFRES_TRANSLATE); 3292 goto cleanup; 3293 } 3294 #if NPFLOG > 0 3295 if (r->log) { 3296 REASON_SET(reason, PFRES_MATCH); 3297 PFLOG_PACKET(pd, *reason, r, a, ruleset, 3298 NULL); 3299 } 3300 #endif /* NPFLOG > 0 */ 3301 } else { 3302 match = asd; 3303 *rm = r; 3304 *am = a; 3305 *rsm = ruleset; 3306 arsm = aruleset; 3307 } 3308 3309 #if NPFLOG > 0 3310 if (act.log & PF_LOG_MATCHES) 3311 pf_log_matches(pd, r, a, ruleset, &rules); 3312 #endif /* NPFLOG > 0 */ 3313 3314 if (r->quick) 3315 break; 3316 r = TAILQ_NEXT(r, entries); 3317 } else { 3318 aruleset = ruleset; 3319 pf_step_into_anchor(&asd, &ruleset, &r, &a); 3320 } 3321 3322 nextrule: 3323 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3324 &r, &a, &match)) 3325 break; 3326 } 3327 r = *rm; /* matching rule */ 3328 a = *am; /* rule that defines an anchor containing 'r' */ 3329 ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */ 3330 aruleset = arsm;/* ruleset of the 'a' rule itself */ 3331 3332 /* apply actions for last matching pass/block rule */ 3333 pf_rule_to_actions(r, &act); 3334 if (r->rule_flag & PFRULE_AFTO) 3335 pd->naf = r->naf; 3336 if (pf_get_transaddr(r, pd, sns, &nr) == -1) { 3337 REASON_SET(reason, PFRES_TRANSLATE); 3338 goto cleanup; 3339 } 3340 REASON_SET(reason, PFRES_MATCH); 3341 3342 #if NPFLOG > 0 3343 if (r->log) 3344 PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL); 3345 if (act.log & PF_LOG_MATCHES) 3346 pf_log_matches(pd, r, a, ruleset, &rules); 3347 #endif /* NPFLOG > 0 */ 3348 3349 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3350 (r->action == PF_DROP) && 3351 ((r->rule_flag & PFRULE_RETURNRST) || 3352 (r->rule_flag & PFRULE_RETURNICMP) || 3353 (r->rule_flag & PFRULE_RETURN))) { 3354 if (pd->proto == IPPROTO_TCP && 3355 ((r->rule_flag & PFRULE_RETURNRST) || 3356 (r->rule_flag & PFRULE_RETURN)) && 3357 !(th->th_flags & TH_RST)) { 3358 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 3359 3360 if (pf_check_proto_cksum(pd, pd->off, 3361 pd->tot_len - pd->off, IPPROTO_TCP, pd->af)) 3362 REASON_SET(reason, PFRES_PROTCKSUM); 3363 else { 3364 if (th->th_flags & TH_SYN) 3365 ack++; 3366 if (th->th_flags & TH_FIN) 3367 ack++; 3368 pf_send_tcp(r, pd->af, pd->dst, 3369 pd->src, th->th_dport, th->th_sport, 3370 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 3371 r->return_ttl, 1, 0, pd->rdomain); 3372 } 3373 } else if ((pd->proto != IPPROTO_ICMP || 3374 ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET && 3375 r->return_icmp) 3376 pf_send_icmp(pd->m, r->return_icmp >> 8, 3377 r->return_icmp & 255, pd->af, r, pd->rdomain); 3378 else if ((pd->proto != IPPROTO_ICMPV6 || 3379 (icmptype >= ICMP6_ECHO_REQUEST && 3380 icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3381 r->return_icmp6) 3382 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3383 r->return_icmp6 & 255, pd->af, r, pd->rdomain); 3384 } 3385 3386 if (r->action == PF_DROP) 3387 goto cleanup; 3388 3389 pf_tag_packet(pd->m, tag, act.rtableid); 3390 if (act.rtableid >= 0 && 3391 rtable_l2(act.rtableid) != pd->rdomain) 3392 pd->destchg = 1; 3393 3394 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3395 REASON_SET(reason, PFRES_IPOPTIONS); 3396 #if NPFLOG > 0 3397 pd->pflog |= PF_LOG_FORCE; 3398 #endif /* NPFLOG > 0 */ 3399 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3400 "ip/ipv6 options in pf_test_rule()"); 3401 goto cleanup; 3402 } 3403 3404 action = PF_PASS; 3405 3406 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3407 && !state_icmp && r->keep_state) { 3408 3409 if (r->rule_flag & PFRULE_SRCTRACK && 3410 pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af, 3411 pd->src, NULL) != 0) { 3412 REASON_SET(reason, PFRES_SRCLIMIT); 3413 goto cleanup; 3414 } 3415 3416 if (r->max_states && (r->states_cur >= r->max_states)) { 3417 pf_status.lcounters[LCNT_STATES]++; 3418 REASON_SET(reason, PFRES_MAXSTATES); 3419 goto cleanup; 3420 } 3421 3422 action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite, 3423 sm, tag, &rules, &act, sns); 3424 3425 if (action != PF_PASS) 3426 goto cleanup; 3427 if (sks != skw) { 3428 struct pf_state_key *sk; 3429 3430 if (pd->dir == PF_IN) 3431 sk = sks; 3432 else 3433 sk = skw; 3434 rewrite += pf_translate(pd, 3435 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 3436 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 3437 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 3438 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 3439 virtual_type, icmp_dir); 3440 } 3441 3442 #ifdef INET6 3443 if (rewrite && skw->af != sks->af) 3444 action = PF_AFRT; 3445 #endif /* INET6 */ 3446 3447 } else { 3448 while ((ri = SLIST_FIRST(&rules))) { 3449 SLIST_REMOVE_HEAD(&rules, entry); 3450 pool_put(&pf_rule_item_pl, ri); 3451 } 3452 } 3453 3454 /* copy back packet headers if needed */ 3455 if (rewrite && pd->hdrlen) { 3456 pf_cksum(pd, pd->m); 3457 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT); 3458 } 3459 3460 #if NPFSYNC > 0 3461 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 3462 pd->dir == PF_OUT && pfsync_up()) { 3463 /* 3464 * We want the state created, but we dont 3465 * want to send this in case a partner 3466 * firewall has to know about it to allow 3467 * replies through it. 3468 */ 3469 if (pfsync_defer(*sm, pd->m)) 3470 return (PF_DEFER); 3471 } 3472 #endif /* NPFSYNC > 0 */ 3473 3474 if (r->rule_flag & PFRULE_ONCE) 3475 pf_purge_rule(ruleset, r, aruleset, a); 3476 3477 return (action); 3478 3479 cleanup: 3480 while ((ri = SLIST_FIRST(&rules))) { 3481 SLIST_REMOVE_HEAD(&rules, entry); 3482 pool_put(&pf_rule_item_pl, ri); 3483 } 3484 3485 return (action); 3486 } 3487 3488 static __inline int 3489 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 3490 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 3491 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 3492 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 3493 { 3494 struct pf_state *s = NULL; 3495 struct tcphdr *th = pd->hdr.tcp; 3496 u_int16_t mss = tcp_mssdflt; 3497 u_short reason; 3498 u_int i; 3499 3500 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 3501 if (s == NULL) { 3502 REASON_SET(&reason, PFRES_MEMORY); 3503 goto csfailed; 3504 } 3505 s->rule.ptr = r; 3506 s->anchor.ptr = a; 3507 s->natrule.ptr = nr; 3508 if (r->allow_opts) 3509 s->state_flags |= PFSTATE_ALLOWOPTS; 3510 if (r->rule_flag & PFRULE_STATESLOPPY) 3511 s->state_flags |= PFSTATE_SLOPPY; 3512 if (r->rule_flag & PFRULE_PFLOW) 3513 s->state_flags |= PFSTATE_PFLOW; 3514 #if NPFLOG > 0 3515 s->log = act->log & PF_LOG_ALL; 3516 #endif /* NPFLOG > 0 */ 3517 s->qid = act->qid; 3518 s->pqid = act->pqid; 3519 s->rtableid[pd->didx] = act->rtableid; 3520 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 3521 s->min_ttl = act->min_ttl; 3522 s->set_tos = act->set_tos; 3523 s->max_mss = act->max_mss; 3524 s->state_flags |= act->flags; 3525 #if NPFSYNC > 0 3526 s->sync_state = PFSYNC_S_NONE; 3527 #endif /* NPFSYNC > 0 */ 3528 s->set_prio[0] = act->set_prio[0]; 3529 s->set_prio[1] = act->set_prio[1]; 3530 SLIST_INIT(&s->src_nodes); 3531 3532 switch (pd->proto) { 3533 case IPPROTO_TCP: 3534 s->src.seqlo = ntohl(th->th_seq); 3535 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 3536 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 3537 r->keep_state == PF_STATE_MODULATE) { 3538 /* Generate sequence number modulator */ 3539 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 3540 0) 3541 s->src.seqdiff = 1; 3542 pf_change_a(pd, &th->th_seq, 3543 htonl(s->src.seqlo + s->src.seqdiff)); 3544 *rewrite = 1; 3545 } else 3546 s->src.seqdiff = 0; 3547 if (th->th_flags & TH_SYN) { 3548 s->src.seqhi++; 3549 s->src.wscale = pf_get_wscale(pd); 3550 } 3551 s->src.max_win = MAX(ntohs(th->th_win), 1); 3552 if (s->src.wscale & PF_WSCALE_MASK) { 3553 /* Remove scale factor from initial window */ 3554 int win = s->src.max_win; 3555 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 3556 s->src.max_win = (win - 1) >> 3557 (s->src.wscale & PF_WSCALE_MASK); 3558 } 3559 if (th->th_flags & TH_FIN) 3560 s->src.seqhi++; 3561 s->dst.seqhi = 1; 3562 s->dst.max_win = 1; 3563 s->src.state = TCPS_SYN_SENT; 3564 s->dst.state = TCPS_CLOSED; 3565 s->timeout = PFTM_TCP_FIRST_PACKET; 3566 break; 3567 case IPPROTO_UDP: 3568 s->src.state = PFUDPS_SINGLE; 3569 s->dst.state = PFUDPS_NO_TRAFFIC; 3570 s->timeout = PFTM_UDP_FIRST_PACKET; 3571 break; 3572 case IPPROTO_ICMP: 3573 #ifdef INET6 3574 case IPPROTO_ICMPV6: 3575 #endif /* INET6 */ 3576 s->timeout = PFTM_ICMP_FIRST_PACKET; 3577 break; 3578 default: 3579 s->src.state = PFOTHERS_SINGLE; 3580 s->dst.state = PFOTHERS_NO_TRAFFIC; 3581 s->timeout = PFTM_OTHER_FIRST_PACKET; 3582 } 3583 3584 s->creation = time_uptime; 3585 s->expire = time_uptime; 3586 3587 if (pd->proto == IPPROTO_TCP) { 3588 if (s->state_flags & PFSTATE_SCRUB_TCP && 3589 pf_normalize_tcp_init(pd, &s->src)) { 3590 REASON_SET(&reason, PFRES_MEMORY); 3591 goto csfailed; 3592 } 3593 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 3594 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 3595 rewrite)) { 3596 /* This really shouldn't happen!!! */ 3597 DPFPRINTF(LOG_ERR, 3598 "pf_normalize_tcp_stateful failed on first pkt"); 3599 goto csfailed; 3600 } 3601 } 3602 s->direction = pd->dir; 3603 3604 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 3605 REASON_SET(&reason, PFRES_MEMORY); 3606 goto csfailed; 3607 } 3608 3609 for (i = 0; i < PF_SN_MAX; i++) 3610 if (sns[i] != NULL) { 3611 struct pf_sn_item *sni; 3612 3613 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 3614 if (sni == NULL) { 3615 REASON_SET(&reason, PFRES_MEMORY); 3616 goto csfailed; 3617 } 3618 sni->sn = sns[i]; 3619 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 3620 sni->sn->states++; 3621 } 3622 3623 if (pf_set_rt_ifp(s, pd->src, (*skw)->af) != 0) { 3624 REASON_SET(&reason, PFRES_NOROUTE); 3625 goto csfailed; 3626 } 3627 3628 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { 3629 pf_detach_state(s); 3630 *sks = *skw = NULL; 3631 REASON_SET(&reason, PFRES_STATEINS); 3632 goto csfailed; 3633 } else 3634 *sm = s; 3635 3636 /* 3637 * Make state responsible for rules it binds here. 3638 */ 3639 memcpy(&s->match_rules, rules, sizeof(s->match_rules)); 3640 bzero(rules, sizeof(*rules)); 3641 STATE_INC_COUNTERS(s); 3642 3643 if (tag > 0) { 3644 pf_tag_ref(tag); 3645 s->tag = tag; 3646 } 3647 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 3648 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 3649 int rtid = pd->rdomain; 3650 if (act->rtableid >= 0) 3651 rtid = act->rtableid; 3652 s->src.state = PF_TCPS_PROXY_SRC; 3653 s->src.seqhi = arc4random(); 3654 /* Find mss option */ 3655 mss = pf_get_mss(pd); 3656 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 3657 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 3658 s->src.mss = mss; 3659 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 3660 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 3661 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); 3662 REASON_SET(&reason, PFRES_SYNPROXY); 3663 return (PF_SYNPROXY_DROP); 3664 } 3665 3666 return (PF_PASS); 3667 3668 csfailed: 3669 if (s) { 3670 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 3671 pf_src_tree_remove_state(s); 3672 pool_put(&pf_state_pl, s); 3673 } 3674 3675 for (i = 0; i < PF_SN_MAX; i++) 3676 if (sns[i] != NULL) 3677 pf_remove_src_node(sns[i]); 3678 3679 return (PF_DROP); 3680 } 3681 3682 int 3683 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 3684 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 3685 int icmp_dir) 3686 { 3687 /* 3688 * when called from bpf_mtap_pflog, there are extra constraints: 3689 * -mbuf is faked, m_data is the bpf buffer 3690 * -pd is not fully set up 3691 */ 3692 int rewrite = 0; 3693 int afto = pd->af != pd->naf; 3694 3695 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 3696 pd->destchg = 1; 3697 3698 switch (pd->proto) { 3699 case IPPROTO_TCP: 3700 if (afto || PF_ANEQ(saddr, pd->src, pd->af) || 3701 *pd->sport != sport) { 3702 pf_change_ap(pd, pd->src, pd->sport, saddr, sport); 3703 rewrite = 1; 3704 } 3705 if (afto || PF_ANEQ(daddr, pd->dst, pd->af) || 3706 *pd->dport != dport) { 3707 pf_change_ap(pd, pd->dst, pd->dport, daddr, dport); 3708 rewrite = 1; 3709 } 3710 break; 3711 3712 case IPPROTO_UDP: 3713 if (afto || PF_ANEQ(saddr, pd->src, pd->af) || 3714 *pd->sport != sport) { 3715 pf_change_ap(pd, pd->src, pd->sport, saddr, sport); 3716 rewrite = 1; 3717 } 3718 if (afto || PF_ANEQ(daddr, pd->dst, pd->af) || 3719 *pd->dport != dport) { 3720 pf_change_ap(pd, pd->dst, pd->dport, daddr, dport); 3721 rewrite = 1; 3722 } 3723 break; 3724 3725 case IPPROTO_ICMP: 3726 /* pf_translate() is also used when logging invalid packets */ 3727 if (pd->af != AF_INET) 3728 return (0); 3729 3730 if (afto) { 3731 #ifdef INET6 3732 if (pf_translate_icmp_af(AF_INET6, pd->hdr.icmp)) 3733 return (0); 3734 pd->proto = IPPROTO_ICMPV6; 3735 rewrite = 1; 3736 #endif /* INET6 */ 3737 } else { 3738 if (PF_ANEQ(saddr, pd->src, pd->af)) { 3739 pf_change_a(pd, &pd->src->v4.s_addr, 3740 saddr->v4.s_addr); 3741 rewrite = 1; 3742 } 3743 if (PF_ANEQ(daddr, pd->dst, pd->af)) { 3744 pf_change_a(pd, &pd->dst->v4.s_addr, 3745 daddr->v4.s_addr); 3746 rewrite = 1; 3747 } 3748 } 3749 if (virtual_type == htons(ICMP_ECHO)) { 3750 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 3751 3752 if (icmpid != pd->hdr.icmp->icmp_id) { 3753 if (pd->csum_status == PF_CSUM_UNKNOWN) 3754 pf_check_proto_cksum(pd, pd->off, 3755 pd->tot_len - pd->off, pd->proto, 3756 pd->af); 3757 pd->hdr.icmp->icmp_id = icmpid; 3758 rewrite = 1; 3759 } 3760 } 3761 break; 3762 3763 #ifdef INET6 3764 case IPPROTO_ICMPV6: 3765 /* pf_translate() is also used when logging invalid packets */ 3766 if (pd->af != AF_INET6) 3767 return (0); 3768 3769 if (afto) { 3770 /* ip_sum will be recalculated in pf_translate_af */ 3771 if (pf_translate_icmp_af(AF_INET, pd->hdr.icmp6)) 3772 return (0); 3773 pd->proto = IPPROTO_ICMP; 3774 rewrite = 1; 3775 } else { 3776 if (PF_ANEQ(saddr, pd->src, pd->af)) { 3777 pf_change_ap(pd, pd->src, NULL, saddr, 0); 3778 rewrite = 1; 3779 } 3780 if (PF_ANEQ(daddr, pd->dst, pd->af)) { 3781 pf_change_ap(pd, pd->dst, NULL, daddr, 0); 3782 rewrite = 1; 3783 } 3784 } 3785 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 3786 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 3787 3788 if (icmpid != pd->hdr.icmp6->icmp6_id) { 3789 if (pd->csum_status == PF_CSUM_UNKNOWN) 3790 pf_check_proto_cksum(pd, pd->off, 3791 pd->tot_len - pd->off, pd->proto, 3792 pd->af); 3793 pd->hdr.icmp6->icmp6_id = icmpid; 3794 rewrite = 1; 3795 } 3796 } 3797 break; 3798 #endif /* INET6 */ 3799 3800 default: 3801 switch (pd->af) { 3802 case AF_INET: 3803 if (!afto && PF_ANEQ(saddr, pd->src, pd->af)) { 3804 pf_change_a(pd, &pd->src->v4.s_addr, 3805 saddr->v4.s_addr); 3806 rewrite = 1; 3807 } 3808 if (!afto && PF_ANEQ(daddr, pd->dst, pd->af)) { 3809 pf_change_a(pd, &pd->dst->v4.s_addr, 3810 daddr->v4.s_addr); 3811 rewrite = 1; 3812 } 3813 break; 3814 #ifdef INET6 3815 case AF_INET6: 3816 if (!afto && PF_ANEQ(saddr, pd->src, pd->af)) { 3817 pf_change_ap(pd, pd->src, NULL, saddr, 0); 3818 rewrite = 1; 3819 } 3820 if (!afto && PF_ANEQ(daddr, pd->dst, pd->af)) { 3821 pf_change_ap(pd, pd->dst, NULL, daddr, 0); 3822 rewrite = 1; 3823 } 3824 break; 3825 #endif /* INET6 */ 3826 } 3827 } 3828 return (rewrite); 3829 } 3830 3831 int 3832 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state_peer *src, 3833 struct pf_state_peer *dst, struct pf_state **state, u_short *reason, 3834 int *copyback) 3835 { 3836 struct tcphdr *th = pd->hdr.tcp; 3837 u_int16_t win = ntohs(th->th_win); 3838 u_int32_t ack, end, data_end, seq, orig_seq; 3839 u_int8_t sws, dws; 3840 int ackskew; 3841 3842 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 3843 sws = src->wscale & PF_WSCALE_MASK; 3844 dws = dst->wscale & PF_WSCALE_MASK; 3845 } else 3846 sws = dws = 0; 3847 3848 /* 3849 * Sequence tracking algorithm from Guido van Rooij's paper: 3850 * http://www.madison-gurkha.com/publications/tcp_filtering/ 3851 * tcp_filtering.ps 3852 */ 3853 3854 orig_seq = seq = ntohl(th->th_seq); 3855 if (src->seqlo == 0) { 3856 /* First packet from this end. Set its state */ 3857 3858 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 3859 src->scrub == NULL) { 3860 if (pf_normalize_tcp_init(pd, src)) { 3861 REASON_SET(reason, PFRES_MEMORY); 3862 return (PF_DROP); 3863 } 3864 } 3865 3866 /* Deferred generation of sequence number modulator */ 3867 if (dst->seqdiff && !src->seqdiff) { 3868 /* use random iss for the TCP server */ 3869 while ((src->seqdiff = arc4random() - seq) == 0) 3870 continue; 3871 ack = ntohl(th->th_ack) - dst->seqdiff; 3872 pf_change_a(pd, &th->th_seq, htonl(seq + src->seqdiff)); 3873 pf_change_a(pd, &th->th_ack, htonl(ack)); 3874 *copyback = 1; 3875 } else { 3876 ack = ntohl(th->th_ack); 3877 } 3878 3879 end = seq + pd->p_len; 3880 if (th->th_flags & TH_SYN) { 3881 end++; 3882 if (dst->wscale & PF_WSCALE_FLAG) { 3883 src->wscale = pf_get_wscale(pd); 3884 if (src->wscale & PF_WSCALE_FLAG) { 3885 /* Remove scale factor from initial 3886 * window */ 3887 sws = src->wscale & PF_WSCALE_MASK; 3888 win = ((u_int32_t)win + (1 << sws) - 1) 3889 >> sws; 3890 dws = dst->wscale & PF_WSCALE_MASK; 3891 } else { 3892 /* fixup other window */ 3893 dst->max_win = MIN(TCP_MAXWIN, 3894 (u_int32_t)dst->max_win << 3895 (dst->wscale & PF_WSCALE_MASK)); 3896 /* in case of a retrans SYN|ACK */ 3897 dst->wscale = 0; 3898 } 3899 } 3900 } 3901 data_end = end; 3902 if (th->th_flags & TH_FIN) 3903 end++; 3904 3905 src->seqlo = seq; 3906 if (src->state < TCPS_SYN_SENT) 3907 src->state = TCPS_SYN_SENT; 3908 3909 /* 3910 * May need to slide the window (seqhi may have been set by 3911 * the crappy stack check or if we picked up the connection 3912 * after establishment) 3913 */ 3914 if (src->seqhi == 1 || 3915 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 3916 src->seqhi = end + MAX(1, dst->max_win << dws); 3917 if (win > src->max_win) 3918 src->max_win = win; 3919 3920 } else { 3921 ack = ntohl(th->th_ack) - dst->seqdiff; 3922 if (src->seqdiff) { 3923 /* Modulate sequence numbers */ 3924 pf_change_a(pd, &th->th_seq, htonl(seq + src->seqdiff)); 3925 pf_change_a(pd, &th->th_ack, htonl(ack)); 3926 *copyback = 1; 3927 } 3928 end = seq + pd->p_len; 3929 if (th->th_flags & TH_SYN) 3930 end++; 3931 data_end = end; 3932 if (th->th_flags & TH_FIN) 3933 end++; 3934 } 3935 3936 if ((th->th_flags & TH_ACK) == 0) { 3937 /* Let it pass through the ack skew check */ 3938 ack = dst->seqlo; 3939 } else if ((ack == 0 && 3940 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 3941 /* broken tcp stacks do not set ack */ 3942 (dst->state < TCPS_SYN_SENT)) { 3943 /* 3944 * Many stacks (ours included) will set the ACK number in an 3945 * FIN|ACK if the SYN times out -- no sequence to ACK. 3946 */ 3947 ack = dst->seqlo; 3948 } 3949 3950 if (seq == end) { 3951 /* Ease sequencing restrictions on no data packets */ 3952 seq = src->seqlo; 3953 data_end = end = seq; 3954 } 3955 3956 ackskew = dst->seqlo - ack; 3957 3958 3959 /* 3960 * Need to demodulate the sequence numbers in any TCP SACK options 3961 * (Selective ACK). We could optionally validate the SACK values 3962 * against the current ACK window, either forwards or backwards, but 3963 * I'm not confident that SACK has been implemented properly 3964 * everywhere. It wouldn't surprise me if several stacks accidently 3965 * SACK too far backwards of previously ACKed data. There really aren't 3966 * any security implications of bad SACKing unless the target stack 3967 * doesn't validate the option length correctly. Someone trying to 3968 * spoof into a TCP connection won't bother blindly sending SACK 3969 * options anyway. 3970 */ 3971 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 3972 if (pf_modulate_sack(pd, dst)) 3973 *copyback = 1; 3974 } 3975 3976 3977 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 3978 if (SEQ_GEQ(src->seqhi, data_end) && 3979 /* Last octet inside other's window space */ 3980 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 3981 /* Retrans: not more than one window back */ 3982 (ackskew >= -MAXACKWINDOW) && 3983 /* Acking not more than one reassembled fragment backwards */ 3984 (ackskew <= (MAXACKWINDOW << sws)) && 3985 /* Acking not more than one window forward */ 3986 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 3987 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 3988 /* Require an exact/+1 sequence match on resets when possible */ 3989 3990 if (dst->scrub || src->scrub) { 3991 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 3992 dst, copyback)) 3993 return (PF_DROP); 3994 } 3995 3996 /* update max window */ 3997 if (src->max_win < win) 3998 src->max_win = win; 3999 /* synchronize sequencing */ 4000 if (SEQ_GT(end, src->seqlo)) 4001 src->seqlo = end; 4002 /* slide the window of what the other end can send */ 4003 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4004 dst->seqhi = ack + MAX((win << sws), 1); 4005 4006 /* update states */ 4007 if (th->th_flags & TH_SYN) 4008 if (src->state < TCPS_SYN_SENT) 4009 src->state = TCPS_SYN_SENT; 4010 if (th->th_flags & TH_FIN) 4011 if (src->state < TCPS_CLOSING) 4012 src->state = TCPS_CLOSING; 4013 if (th->th_flags & TH_ACK) { 4014 if (dst->state == TCPS_SYN_SENT) { 4015 dst->state = TCPS_ESTABLISHED; 4016 if (src->state == TCPS_ESTABLISHED && 4017 !SLIST_EMPTY(&(*state)->src_nodes) && 4018 pf_src_connlimit(state)) { 4019 REASON_SET(reason, PFRES_SRCLIMIT); 4020 return (PF_DROP); 4021 } 4022 } else if (dst->state == TCPS_CLOSING) 4023 dst->state = TCPS_FIN_WAIT_2; 4024 } 4025 if (th->th_flags & TH_RST) 4026 src->state = dst->state = TCPS_TIME_WAIT; 4027 4028 /* update expire time */ 4029 (*state)->expire = time_uptime; 4030 if (src->state >= TCPS_FIN_WAIT_2 && 4031 dst->state >= TCPS_FIN_WAIT_2) 4032 (*state)->timeout = PFTM_TCP_CLOSED; 4033 else if (src->state >= TCPS_CLOSING && 4034 dst->state >= TCPS_CLOSING) 4035 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4036 else if (src->state < TCPS_ESTABLISHED || 4037 dst->state < TCPS_ESTABLISHED) 4038 (*state)->timeout = PFTM_TCP_OPENING; 4039 else if (src->state >= TCPS_CLOSING || 4040 dst->state >= TCPS_CLOSING) 4041 (*state)->timeout = PFTM_TCP_CLOSING; 4042 else 4043 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4044 4045 /* Fall through to PASS packet */ 4046 } else if ((dst->state < TCPS_SYN_SENT || 4047 dst->state >= TCPS_FIN_WAIT_2 || 4048 src->state >= TCPS_FIN_WAIT_2) && 4049 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4050 /* Within a window forward of the originating packet */ 4051 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4052 /* Within a window backward of the originating packet */ 4053 4054 /* 4055 * This currently handles three situations: 4056 * 1) Stupid stacks will shotgun SYNs before their peer 4057 * replies. 4058 * 2) When PF catches an already established stream (the 4059 * firewall rebooted, the state table was flushed, routes 4060 * changed...) 4061 * 3) Packets get funky immediately after the connection 4062 * closes (this should catch Solaris spurious ACK|FINs 4063 * that web servers like to spew after a close) 4064 * 4065 * This must be a little more careful than the above code 4066 * since packet floods will also be caught here. We don't 4067 * update the TTL here to mitigate the damage of a packet 4068 * flood and so the same code can handle awkward establishment 4069 * and a loosened connection close. 4070 * In the establishment case, a correct peer response will 4071 * validate the connection, go through the normal state code 4072 * and keep updating the state TTL. 4073 */ 4074 4075 if (pf_status.debug >= LOG_NOTICE) { 4076 log(LOG_NOTICE, "pf: loose state match: "); 4077 pf_print_state(*state); 4078 pf_print_flags(th->th_flags); 4079 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4080 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4081 pd->p_len, ackskew, (*state)->packets[0], 4082 (*state)->packets[1], 4083 pd->dir == PF_IN ? "in" : "out", 4084 pd->dir == (*state)->direction ? "fwd" : "rev"); 4085 } 4086 4087 if (dst->scrub || src->scrub) { 4088 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4089 dst, copyback)) 4090 return (PF_DROP); 4091 } 4092 4093 /* update max window */ 4094 if (src->max_win < win) 4095 src->max_win = win; 4096 /* synchronize sequencing */ 4097 if (SEQ_GT(end, src->seqlo)) 4098 src->seqlo = end; 4099 /* slide the window of what the other end can send */ 4100 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4101 dst->seqhi = ack + MAX((win << sws), 1); 4102 4103 /* 4104 * Cannot set dst->seqhi here since this could be a shotgunned 4105 * SYN and not an already established connection. 4106 */ 4107 if (th->th_flags & TH_FIN) 4108 if (src->state < TCPS_CLOSING) 4109 src->state = TCPS_CLOSING; 4110 if (th->th_flags & TH_RST) 4111 src->state = dst->state = TCPS_TIME_WAIT; 4112 4113 /* Fall through to PASS packet */ 4114 } else { 4115 if ((*state)->dst.state == TCPS_SYN_SENT && 4116 (*state)->src.state == TCPS_SYN_SENT) { 4117 /* Send RST for state mismatches during handshake */ 4118 if (!(th->th_flags & TH_RST)) 4119 pf_send_tcp((*state)->rule.ptr, pd->af, 4120 pd->dst, pd->src, th->th_dport, 4121 th->th_sport, ntohl(th->th_ack), 0, 4122 TH_RST, 0, 0, 4123 (*state)->rule.ptr->return_ttl, 1, 0, 4124 pd->rdomain); 4125 src->seqlo = 0; 4126 src->seqhi = 1; 4127 src->max_win = 1; 4128 } else if (pf_status.debug >= LOG_NOTICE) { 4129 log(LOG_NOTICE, "pf: BAD state: "); 4130 pf_print_state(*state); 4131 pf_print_flags(th->th_flags); 4132 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4133 "pkts=%llu:%llu dir=%s,%s\n", 4134 seq, orig_seq, ack, pd->p_len, ackskew, 4135 (*state)->packets[0], (*state)->packets[1], 4136 pd->dir == PF_IN ? "in" : "out", 4137 pd->dir == (*state)->direction ? "fwd" : "rev"); 4138 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4139 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 4140 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4141 ' ': '2', 4142 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4143 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4144 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 4145 ' ' :'5', 4146 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4147 } 4148 REASON_SET(reason, PFRES_BADSTATE); 4149 return (PF_DROP); 4150 } 4151 4152 return (PF_PASS); 4153 } 4154 4155 int 4156 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state_peer *src, 4157 struct pf_state_peer *dst, struct pf_state **state, u_short *reason) 4158 { 4159 struct tcphdr *th = pd->hdr.tcp; 4160 4161 if (th->th_flags & TH_SYN) 4162 if (src->state < TCPS_SYN_SENT) 4163 src->state = TCPS_SYN_SENT; 4164 if (th->th_flags & TH_FIN) 4165 if (src->state < TCPS_CLOSING) 4166 src->state = TCPS_CLOSING; 4167 if (th->th_flags & TH_ACK) { 4168 if (dst->state == TCPS_SYN_SENT) { 4169 dst->state = TCPS_ESTABLISHED; 4170 if (src->state == TCPS_ESTABLISHED && 4171 !SLIST_EMPTY(&(*state)->src_nodes) && 4172 pf_src_connlimit(state)) { 4173 REASON_SET(reason, PFRES_SRCLIMIT); 4174 return (PF_DROP); 4175 } 4176 } else if (dst->state == TCPS_CLOSING) { 4177 dst->state = TCPS_FIN_WAIT_2; 4178 } else if (src->state == TCPS_SYN_SENT && 4179 dst->state < TCPS_SYN_SENT) { 4180 /* 4181 * Handle a special sloppy case where we only see one 4182 * half of the connection. If there is a ACK after 4183 * the initial SYN without ever seeing a packet from 4184 * the destination, set the connection to established. 4185 */ 4186 dst->state = src->state = TCPS_ESTABLISHED; 4187 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4188 pf_src_connlimit(state)) { 4189 REASON_SET(reason, PFRES_SRCLIMIT); 4190 return (PF_DROP); 4191 } 4192 } else if (src->state == TCPS_CLOSING && 4193 dst->state == TCPS_ESTABLISHED && 4194 dst->seqlo == 0) { 4195 /* 4196 * Handle the closing of half connections where we 4197 * don't see the full bidirectional FIN/ACK+ACK 4198 * handshake. 4199 */ 4200 dst->state = TCPS_CLOSING; 4201 } 4202 } 4203 if (th->th_flags & TH_RST) 4204 src->state = dst->state = TCPS_TIME_WAIT; 4205 4206 /* update expire time */ 4207 (*state)->expire = time_uptime; 4208 if (src->state >= TCPS_FIN_WAIT_2 && 4209 dst->state >= TCPS_FIN_WAIT_2) 4210 (*state)->timeout = PFTM_TCP_CLOSED; 4211 else if (src->state >= TCPS_CLOSING && 4212 dst->state >= TCPS_CLOSING) 4213 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4214 else if (src->state < TCPS_ESTABLISHED || 4215 dst->state < TCPS_ESTABLISHED) 4216 (*state)->timeout = PFTM_TCP_OPENING; 4217 else if (src->state >= TCPS_CLOSING || 4218 dst->state >= TCPS_CLOSING) 4219 (*state)->timeout = PFTM_TCP_CLOSING; 4220 else 4221 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4222 4223 return (PF_PASS); 4224 } 4225 4226 static __inline int 4227 pf_synproxy(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4228 { 4229 struct pf_state_key *sk = (*state)->key[pd->didx]; 4230 4231 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4232 struct tcphdr *th = pd->hdr.tcp; 4233 4234 if (pd->dir != (*state)->direction) { 4235 REASON_SET(reason, PFRES_SYNPROXY); 4236 return (PF_SYNPROXY_DROP); 4237 } 4238 if (th->th_flags & TH_SYN) { 4239 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4240 REASON_SET(reason, PFRES_SYNPROXY); 4241 return (PF_DROP); 4242 } 4243 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4244 pd->src, th->th_dport, th->th_sport, 4245 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4246 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4247 0, pd->rdomain); 4248 REASON_SET(reason, PFRES_SYNPROXY); 4249 return (PF_SYNPROXY_DROP); 4250 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 4251 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4252 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4253 REASON_SET(reason, PFRES_SYNPROXY); 4254 return (PF_DROP); 4255 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4256 pf_src_connlimit(state)) { 4257 REASON_SET(reason, PFRES_SRCLIMIT); 4258 return (PF_DROP); 4259 } else 4260 (*state)->src.state = PF_TCPS_PROXY_DST; 4261 } 4262 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4263 struct tcphdr *th = pd->hdr.tcp; 4264 4265 if (pd->dir == (*state)->direction) { 4266 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4267 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4268 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4269 REASON_SET(reason, PFRES_SYNPROXY); 4270 return (PF_DROP); 4271 } 4272 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4273 if ((*state)->dst.seqhi == 1) 4274 (*state)->dst.seqhi = arc4random(); 4275 pf_send_tcp((*state)->rule.ptr, pd->af, 4276 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4277 sk->port[pd->sidx], sk->port[pd->didx], 4278 (*state)->dst.seqhi, 0, TH_SYN, 0, 4279 (*state)->src.mss, 0, 0, (*state)->tag, 4280 sk->rdomain); 4281 REASON_SET(reason, PFRES_SYNPROXY); 4282 return (PF_SYNPROXY_DROP); 4283 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4284 (TH_SYN|TH_ACK)) || 4285 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4286 REASON_SET(reason, PFRES_SYNPROXY); 4287 return (PF_DROP); 4288 } else { 4289 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4290 (*state)->dst.seqlo = ntohl(th->th_seq); 4291 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4292 pd->src, th->th_dport, th->th_sport, 4293 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4294 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4295 (*state)->tag, pd->rdomain); 4296 pf_send_tcp((*state)->rule.ptr, pd->af, 4297 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4298 sk->port[pd->sidx], sk->port[pd->didx], 4299 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4300 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4301 0, sk->rdomain); 4302 (*state)->src.seqdiff = (*state)->dst.seqhi - 4303 (*state)->src.seqlo; 4304 (*state)->dst.seqdiff = (*state)->src.seqhi - 4305 (*state)->dst.seqlo; 4306 (*state)->src.seqhi = (*state)->src.seqlo + 4307 (*state)->dst.max_win; 4308 (*state)->dst.seqhi = (*state)->dst.seqlo + 4309 (*state)->src.max_win; 4310 (*state)->src.wscale = (*state)->dst.wscale = 0; 4311 (*state)->src.state = (*state)->dst.state = 4312 TCPS_ESTABLISHED; 4313 REASON_SET(reason, PFRES_SYNPROXY); 4314 return (PF_SYNPROXY_DROP); 4315 } 4316 } 4317 return (PF_PASS); 4318 } 4319 4320 int 4321 pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4322 { 4323 struct pf_state_key_cmp key; 4324 int copyback = 0; 4325 struct pf_state_peer *src, *dst; 4326 int action = PF_PASS; 4327 struct inpcb *inp; 4328 4329 key.af = pd->af; 4330 key.proto = pd->virtual_proto; 4331 key.rdomain = pd->rdomain; 4332 PF_ACPY(&key.addr[pd->sidx], pd->src, key.af); 4333 PF_ACPY(&key.addr[pd->didx], pd->dst, key.af); 4334 key.port[pd->sidx] = pd->osport; 4335 key.port[pd->didx] = pd->odport; 4336 inp = pd->m->m_pkthdr.pf.inp; 4337 4338 STATE_LOOKUP(pd->kif, &key, pd->dir, *state, pd->m); 4339 4340 if (pd->dir == (*state)->direction) { 4341 src = &(*state)->src; 4342 dst = &(*state)->dst; 4343 } else { 4344 src = &(*state)->dst; 4345 dst = &(*state)->src; 4346 } 4347 4348 switch (pd->virtual_proto) { 4349 case IPPROTO_TCP: 4350 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) 4351 return (action); 4352 if (((pd->hdr.tcp->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && 4353 dst->state >= TCPS_FIN_WAIT_2 && 4354 src->state >= TCPS_FIN_WAIT_2) { 4355 if (pf_status.debug >= LOG_NOTICE) { 4356 log(LOG_NOTICE, "pf: state reuse "); 4357 pf_print_state(*state); 4358 pf_print_flags(pd->hdr.tcp->th_flags); 4359 addlog("\n"); 4360 } 4361 /* XXX make sure it's the same direction ?? */ 4362 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 4363 pf_remove_state(*state); 4364 *state = NULL; 4365 pd->m->m_pkthdr.pf.inp = inp; 4366 return (PF_DROP); 4367 } 4368 4369 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4370 if (pf_tcp_track_sloppy(pd, src, dst, state, reason) == 4371 PF_DROP) 4372 return (PF_DROP); 4373 } else { 4374 int ret; 4375 4376 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4377 ret = pf_tcp_track_full(pd, dst, src, state, 4378 reason, ©back); 4379 else 4380 ret = pf_tcp_track_full(pd, src, dst, state, 4381 reason, ©back); 4382 if (ret == PF_DROP) 4383 return (PF_DROP); 4384 } 4385 break; 4386 case IPPROTO_UDP: 4387 /* update states */ 4388 if (src->state < PFUDPS_SINGLE) 4389 src->state = PFUDPS_SINGLE; 4390 if (dst->state == PFUDPS_SINGLE) 4391 dst->state = PFUDPS_MULTIPLE; 4392 4393 /* update expire time */ 4394 (*state)->expire = time_uptime; 4395 if (src->state == PFUDPS_MULTIPLE && 4396 dst->state == PFUDPS_MULTIPLE) 4397 (*state)->timeout = PFTM_UDP_MULTIPLE; 4398 else 4399 (*state)->timeout = PFTM_UDP_SINGLE; 4400 break; 4401 default: 4402 /* update states */ 4403 if (src->state < PFOTHERS_SINGLE) 4404 src->state = PFOTHERS_SINGLE; 4405 if (dst->state == PFOTHERS_SINGLE) 4406 dst->state = PFOTHERS_MULTIPLE; 4407 4408 /* update expire time */ 4409 (*state)->expire = time_uptime; 4410 if (src->state == PFOTHERS_MULTIPLE && 4411 dst->state == PFOTHERS_MULTIPLE) 4412 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4413 else 4414 (*state)->timeout = PFTM_OTHER_SINGLE; 4415 break; 4416 } 4417 4418 /* translate source/destination address, if necessary */ 4419 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4420 struct pf_state_key *nk; 4421 int afto, sidx, didx; 4422 4423 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4424 nk = (*state)->key[pd->sidx]; 4425 else 4426 nk = (*state)->key[pd->didx]; 4427 4428 afto = pd->af != nk->af; 4429 sidx = afto ? pd->didx : pd->sidx; 4430 didx = afto ? pd->sidx : pd->didx; 4431 4432 #ifdef INET6 4433 if (afto) { 4434 PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); 4435 PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); 4436 pd->naf = nk->af; 4437 action = PF_AFRT; 4438 } 4439 #endif /* INET6 */ 4440 4441 if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) || 4442 nk->port[sidx] != pd->osport) 4443 pf_change_ap(pd, pd->src, pd->sport, 4444 &nk->addr[sidx], nk->port[sidx]); 4445 4446 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4447 pd->rdomain != nk->rdomain) 4448 pd->destchg = 1; 4449 4450 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4451 nk->port[didx] != pd->odport) 4452 pf_change_ap(pd, pd->dst, pd->dport, 4453 &nk->addr[didx], nk->port[didx]); 4454 4455 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4456 copyback = 1; 4457 } 4458 4459 if (copyback && pd->hdrlen > 0) { 4460 pf_cksum(pd, pd->m); 4461 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT); 4462 } 4463 4464 return (action); 4465 } 4466 4467 int 4468 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 4469 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 4470 int icmp_dir, int *iidx, int multi, int inner) 4471 { 4472 int direction; 4473 4474 key->af = pd->af; 4475 key->proto = pd->proto; 4476 key->rdomain = pd->rdomain; 4477 if (icmp_dir == PF_IN) { 4478 *iidx = pd->sidx; 4479 key->port[pd->sidx] = icmpid; 4480 key->port[pd->didx] = type; 4481 } else { 4482 *iidx = pd->didx; 4483 key->port[pd->sidx] = type; 4484 key->port[pd->didx] = icmpid; 4485 } 4486 4487 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 4488 pd->dst, pd->af, multi)) 4489 return (PF_DROP); 4490 4491 STATE_LOOKUP(pd->kif, key, pd->dir, *state, pd->m); 4492 4493 if ((*state)->state_flags & PFSTATE_SLOPPY) 4494 return (-1); 4495 4496 /* Is this ICMP message flowing in right direction? */ 4497 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 4498 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 4499 PF_IN : PF_OUT; 4500 else 4501 direction = (*state)->direction; 4502 if ((((!inner && direction == pd->dir) || 4503 (inner && direction != pd->dir)) ? 4504 PF_IN : PF_OUT) != icmp_dir) { 4505 if (pf_status.debug >= LOG_NOTICE) { 4506 log(LOG_NOTICE, 4507 "pf: icmp type %d in wrong direction (%d): ", 4508 ntohs(type), icmp_dir); 4509 pf_print_state(*state); 4510 addlog("\n"); 4511 } 4512 return (PF_DROP); 4513 } 4514 return (-1); 4515 } 4516 4517 int 4518 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 4519 u_short *reason) 4520 { 4521 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 4522 u_int16_t virtual_id, virtual_type; 4523 u_int8_t icmptype; 4524 int icmp_dir, iidx, ret, copyback = 0; 4525 4526 struct pf_state_key_cmp key; 4527 4528 switch (pd->proto) { 4529 case IPPROTO_ICMP: 4530 icmptype = pd->hdr.icmp->icmp_type; 4531 break; 4532 #ifdef INET6 4533 case IPPROTO_ICMPV6: 4534 icmptype = pd->hdr.icmp6->icmp6_type; 4535 break; 4536 #endif /* INET6 */ 4537 default: 4538 panic("unhandled proto %d", pd->proto); 4539 } 4540 4541 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 4542 &virtual_type) == 0) { 4543 /* 4544 * ICMP query/reply message not related to a TCP/UDP packet. 4545 * Search for an ICMP state. 4546 */ 4547 ret = pf_icmp_state_lookup(pd, &key, state, 4548 virtual_id, virtual_type, icmp_dir, &iidx, 4549 0, 0); 4550 /* IPv6? try matching a multicast address */ 4551 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 4552 ret = pf_icmp_state_lookup(pd, &key, state, virtual_id, 4553 virtual_type, icmp_dir, &iidx, 1, 0); 4554 if (ret >= 0) 4555 return (ret); 4556 4557 (*state)->expire = time_uptime; 4558 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 4559 4560 /* translate source/destination address, if necessary */ 4561 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4562 struct pf_state_key *nk; 4563 int afto, sidx, didx; 4564 4565 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4566 nk = (*state)->key[pd->sidx]; 4567 else 4568 nk = (*state)->key[pd->didx]; 4569 4570 afto = pd->af != nk->af; 4571 sidx = afto ? pd->didx : pd->sidx; 4572 didx = afto ? pd->sidx : pd->didx; 4573 iidx = afto ? !iidx : iidx; 4574 4575 if (pd->rdomain != nk->rdomain) 4576 pd->destchg = 1; 4577 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4578 4579 switch (pd->af) { 4580 case AF_INET: 4581 #ifdef INET6 4582 if (afto) { 4583 if (pf_translate_icmp_af(AF_INET6, 4584 pd->hdr.icmp)) 4585 return (PF_DROP); 4586 pd->proto = IPPROTO_ICMPV6; 4587 } 4588 #endif /* INET6 */ 4589 if (!afto && PF_ANEQ(pd->src, 4590 &nk->addr[sidx], AF_INET)) 4591 pf_change_a(pd, &saddr->v4.s_addr, 4592 nk->addr[sidx].v4.s_addr); 4593 4594 if (!afto && PF_ANEQ(pd->dst, 4595 &nk->addr[didx], AF_INET)) { 4596 pf_change_a(pd, &daddr->v4.s_addr, 4597 nk->addr[didx].v4.s_addr); 4598 pd->destchg = 1; 4599 } 4600 4601 if (nk->port[iidx] != pd->hdr.icmp->icmp_id) { 4602 if (pd->csum_status == PF_CSUM_UNKNOWN) 4603 pf_check_proto_cksum(pd, 4604 pd->off, pd->tot_len - 4605 pd->off, pd->proto, pd->af); 4606 pd->hdr.icmp->icmp_id = nk->port[iidx]; 4607 } 4608 4609 m_copyback(pd->m, pd->off, ICMP_MINLEN, 4610 pd->hdr.icmp, M_NOWAIT); 4611 copyback = 1; 4612 break; 4613 #ifdef INET6 4614 case AF_INET6: 4615 if (afto) { 4616 if (pf_translate_icmp_af(AF_INET, 4617 pd->hdr.icmp6)) 4618 return (PF_DROP); 4619 pd->proto = IPPROTO_ICMP; 4620 } 4621 if (!afto && PF_ANEQ(pd->src, 4622 &nk->addr[sidx], AF_INET6)) 4623 pf_change_ap(pd, saddr, NULL, 4624 &nk->addr[sidx], 0); 4625 4626 if (!afto && PF_ANEQ(pd->dst, 4627 &nk->addr[didx], AF_INET6)) { 4628 pf_change_ap(pd, daddr, NULL, 4629 &nk->addr[didx], 0); 4630 pd->destchg = 1; 4631 } 4632 4633 if (nk->port[iidx] != pd->hdr.icmp6->icmp6_id) { 4634 if (pd->csum_status == PF_CSUM_UNKNOWN) 4635 pf_check_proto_cksum(pd, 4636 pd->off, pd->tot_len - 4637 pd->off, pd->proto, pd->af); 4638 pd->hdr.icmp6->icmp6_id = 4639 nk->port[iidx]; 4640 } 4641 4642 m_copyback(pd->m, pd->off, 4643 sizeof(struct icmp6_hdr), pd->hdr.icmp6, 4644 M_NOWAIT); 4645 copyback = 1; 4646 break; 4647 #endif /* INET6 */ 4648 } 4649 #ifdef INET6 4650 if (afto) { 4651 PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); 4652 PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); 4653 pd->naf = nk->af; 4654 return (PF_AFRT); 4655 } 4656 #endif /* INET6 */ 4657 } 4658 } else { 4659 /* 4660 * ICMP error message in response to a TCP/UDP packet. 4661 * Extract the inner TCP/UDP header and search for that state. 4662 */ 4663 struct pf_pdesc pd2; 4664 struct ip h2; 4665 #ifdef INET6 4666 struct ip6_hdr h2_6; 4667 #endif /* INET6 */ 4668 int ipoff2; 4669 4670 /* Initialize pd2 fields valid for both packets with pd. */ 4671 bzero(&pd2, sizeof(pd2)); 4672 pd2.af = pd->af; 4673 pd2.dir = pd->dir; 4674 pd2.kif = pd->kif; 4675 pd2.m = pd->m; 4676 pd2.rdomain = pd->rdomain; 4677 /* Payload packet is from the opposite direction. */ 4678 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 4679 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 4680 switch (pd->af) { 4681 case AF_INET: 4682 /* offset of h2 in mbuf chain */ 4683 ipoff2 = pd->off + ICMP_MINLEN; 4684 4685 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 4686 NULL, reason, pd2.af)) { 4687 DPFPRINTF(LOG_NOTICE, 4688 "ICMP error message too short (ip)"); 4689 return (PF_DROP); 4690 } 4691 /* 4692 * ICMP error messages don't refer to non-first 4693 * fragments 4694 */ 4695 if (h2.ip_off & htons(IP_OFFMASK)) { 4696 REASON_SET(reason, PFRES_FRAG); 4697 return (PF_DROP); 4698 } 4699 4700 /* offset of protocol header that follows h2 */ 4701 pd2.off = ipoff2 + (h2.ip_hl << 2); 4702 4703 pd2.proto = h2.ip_p; 4704 pd2.tot_len = ntohs(h2.ip_len); 4705 pd2.src = (struct pf_addr *)&h2.ip_src; 4706 pd2.dst = (struct pf_addr *)&h2.ip_dst; 4707 break; 4708 #ifdef INET6 4709 case AF_INET6: 4710 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 4711 4712 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 4713 NULL, reason, pd2.af)) { 4714 DPFPRINTF(LOG_NOTICE, 4715 "ICMP error message too short (ip6)"); 4716 return (PF_DROP); 4717 } 4718 4719 pd2.off = ipoff2; 4720 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 4721 return (PF_DROP); 4722 4723 pd2.tot_len = ntohs(h2_6.ip6_plen) + 4724 sizeof(struct ip6_hdr); 4725 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 4726 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 4727 break; 4728 #endif /* INET6 */ 4729 default: 4730 unhandled_af(pd->af); 4731 } 4732 4733 switch (pd2.proto) { 4734 case IPPROTO_TCP: { 4735 struct tcphdr th; 4736 u_int32_t seq; 4737 struct pf_state_peer *src, *dst; 4738 u_int8_t dws; 4739 4740 /* 4741 * Only the first 8 bytes of the TCP header can be 4742 * expected. Don't access any TCP header fields after 4743 * th_seq, an ackskew test is not possible. 4744 */ 4745 if (!pf_pull_hdr(pd2.m, pd2.off, &th, 8, NULL, reason, 4746 pd2.af)) { 4747 DPFPRINTF(LOG_NOTICE, 4748 "ICMP error message too short (tcp)"); 4749 return (PF_DROP); 4750 } 4751 4752 key.af = pd2.af; 4753 key.proto = IPPROTO_TCP; 4754 key.rdomain = pd2.rdomain; 4755 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4756 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4757 key.port[pd2.sidx] = th.th_sport; 4758 key.port[pd2.didx] = th.th_dport; 4759 4760 STATE_LOOKUP(pd2.kif, &key, pd2.dir, *state, pd2.m); 4761 4762 if (pd2.dir == (*state)->direction) { 4763 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 4764 src = &(*state)->src; 4765 dst = &(*state)->dst; 4766 } else { 4767 src = &(*state)->dst; 4768 dst = &(*state)->src; 4769 } 4770 } else { 4771 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 4772 src = &(*state)->dst; 4773 dst = &(*state)->src; 4774 } else { 4775 src = &(*state)->src; 4776 dst = &(*state)->dst; 4777 } 4778 } 4779 4780 if (src->wscale && dst->wscale) 4781 dws = dst->wscale & PF_WSCALE_MASK; 4782 else 4783 dws = 0; 4784 4785 /* Demodulate sequence number */ 4786 seq = ntohl(th.th_seq) - src->seqdiff; 4787 if (src->seqdiff) { 4788 pf_change_a(pd, &th.th_seq, htonl(seq)); 4789 copyback = 1; 4790 } 4791 4792 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 4793 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 4794 src->seqlo - (dst->max_win << dws)))) { 4795 if (pf_status.debug >= LOG_NOTICE) { 4796 log(LOG_NOTICE, 4797 "pf: BAD ICMP %d:%d ", 4798 icmptype, pd->hdr.icmp->icmp_code); 4799 pf_print_host(pd->src, 0, pd->af); 4800 addlog(" -> "); 4801 pf_print_host(pd->dst, 0, pd->af); 4802 addlog(" state: "); 4803 pf_print_state(*state); 4804 addlog(" seq=%u\n", seq); 4805 } 4806 REASON_SET(reason, PFRES_BADSTATE); 4807 return (PF_DROP); 4808 } else { 4809 if (pf_status.debug >= LOG_DEBUG) { 4810 log(LOG_DEBUG, 4811 "pf: OK ICMP %d:%d ", 4812 icmptype, pd->hdr.icmp->icmp_code); 4813 pf_print_host(pd->src, 0, pd->af); 4814 addlog(" -> "); 4815 pf_print_host(pd->dst, 0, pd->af); 4816 addlog(" state: "); 4817 pf_print_state(*state); 4818 addlog(" seq=%u\n", seq); 4819 } 4820 } 4821 4822 /* translate source/destination address, if necessary */ 4823 if ((*state)->key[PF_SK_WIRE] != 4824 (*state)->key[PF_SK_STACK]) { 4825 struct pf_state_key *nk; 4826 int afto, sidx, didx; 4827 4828 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4829 nk = (*state)->key[pd->sidx]; 4830 else 4831 nk = (*state)->key[pd->didx]; 4832 4833 afto = pd->af != nk->af; 4834 sidx = afto ? pd2.didx : pd2.sidx; 4835 didx = afto ? pd2.sidx : pd2.didx; 4836 4837 #ifdef INET6 4838 if (afto) { 4839 if (pf_translate_icmp_af(nk->af, 4840 pd->hdr.icmp)) 4841 return (PF_DROP); 4842 m_copyback(pd->m, pd->off, 4843 sizeof(struct icmp6_hdr), 4844 pd->hdr.icmp6, M_NOWAIT); 4845 if (nk->af == AF_INET) 4846 pd->proto = IPPROTO_ICMP; 4847 else 4848 pd->proto = IPPROTO_ICMPV6; 4849 pd->m->m_pkthdr.ph_rtableid = 4850 nk->rdomain; 4851 pd->destchg = 1; 4852 PF_ACPY(&pd->nsaddr, 4853 &nk->addr[pd2.sidx], nk->af); 4854 PF_ACPY(&pd->ndaddr, 4855 &nk->addr[pd2.didx], nk->af); 4856 pd->naf = nk->af; 4857 4858 if (pf_change_icmp_af(pd->m, ipoff2, 4859 pd, &pd2, &nk->addr[sidx], 4860 &nk->addr[didx], pd->af, nk->af)) 4861 return (PF_DROP); 4862 pf_change_ap(pd, pd2.src, &th.th_sport, 4863 &nk->addr[pd2.sidx], 4864 nk->port[sidx]); 4865 pf_change_ap(pd, pd2.dst, &th.th_dport, 4866 &nk->addr[pd2.didx], 4867 nk->port[didx]); 4868 m_copyback(pd2.m, pd2.off, 8, &th, 4869 M_NOWAIT); 4870 return (PF_AFRT); 4871 } 4872 #endif /* INET6 */ 4873 if (PF_ANEQ(pd2.src, 4874 &nk->addr[pd2.sidx], pd2.af) || 4875 nk->port[pd2.sidx] != th.th_sport) 4876 pf_change_icmp(pd, pd2.src, 4877 &th.th_sport, daddr, 4878 &nk->addr[pd2.sidx], 4879 nk->port[pd2.sidx]); 4880 4881 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 4882 pd2.af) || pd2.rdomain != nk->rdomain) 4883 pd->destchg = 1; 4884 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4885 4886 if (PF_ANEQ(pd2.dst, 4887 &nk->addr[pd2.didx], pd2.af) || 4888 nk->port[pd2.didx] != th.th_dport) 4889 pf_change_icmp(pd, pd2.dst, 4890 &th.th_dport, saddr, 4891 &nk->addr[pd2.didx], 4892 nk->port[pd2.didx]); 4893 copyback = 1; 4894 } 4895 4896 if (copyback) { 4897 switch (pd2.af) { 4898 case AF_INET: 4899 m_copyback(pd->m, pd->off, ICMP_MINLEN, 4900 pd->hdr.icmp, M_NOWAIT); 4901 m_copyback(pd2.m, ipoff2, sizeof(h2), 4902 &h2, M_NOWAIT); 4903 break; 4904 #ifdef INET6 4905 case AF_INET6: 4906 m_copyback(pd->m, pd->off, 4907 sizeof(struct icmp6_hdr), 4908 pd->hdr.icmp6, M_NOWAIT); 4909 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 4910 &h2_6, M_NOWAIT); 4911 break; 4912 #endif /* INET6 */ 4913 } 4914 m_copyback(pd2.m, pd2.off, 8, &th, M_NOWAIT); 4915 } 4916 break; 4917 } 4918 case IPPROTO_UDP: { 4919 struct udphdr uh; 4920 4921 if (!pf_pull_hdr(pd2.m, pd2.off, &uh, sizeof(uh), 4922 NULL, reason, pd2.af)) { 4923 DPFPRINTF(LOG_NOTICE, 4924 "ICMP error message too short (udp)"); 4925 return (PF_DROP); 4926 } 4927 4928 key.af = pd2.af; 4929 key.proto = IPPROTO_UDP; 4930 key.rdomain = pd2.rdomain; 4931 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4932 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4933 key.port[pd2.sidx] = uh.uh_sport; 4934 key.port[pd2.didx] = uh.uh_dport; 4935 4936 STATE_LOOKUP(pd2.kif, &key, pd2.dir, *state, pd2.m); 4937 4938 /* translate source/destination address, if necessary */ 4939 if ((*state)->key[PF_SK_WIRE] != 4940 (*state)->key[PF_SK_STACK]) { 4941 struct pf_state_key *nk; 4942 int afto, sidx, didx; 4943 4944 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4945 nk = (*state)->key[pd->sidx]; 4946 else 4947 nk = (*state)->key[pd->didx]; 4948 4949 afto = pd->af != nk->af; 4950 sidx = afto ? pd2.didx : pd2.sidx; 4951 didx = afto ? pd2.sidx : pd2.didx; 4952 4953 #ifdef INET6 4954 if (afto) { 4955 if (pf_translate_icmp_af(nk->af, 4956 pd->hdr.icmp)) 4957 return (PF_DROP); 4958 m_copyback(pd->m, pd->off, 4959 sizeof(struct icmp6_hdr), 4960 pd->hdr.icmp6, M_NOWAIT); 4961 if (nk->af == AF_INET) 4962 pd->proto = IPPROTO_ICMP; 4963 else 4964 pd->proto = IPPROTO_ICMPV6; 4965 pd->m->m_pkthdr.ph_rtableid = 4966 nk->rdomain; 4967 pd->destchg = 1; 4968 PF_ACPY(&pd->nsaddr, 4969 &nk->addr[pd2.sidx], nk->af); 4970 PF_ACPY(&pd->ndaddr, 4971 &nk->addr[pd2.didx], nk->af); 4972 pd->naf = nk->af; 4973 4974 if (pf_change_icmp_af(pd->m, ipoff2, 4975 pd, &pd2, &nk->addr[sidx], 4976 &nk->addr[didx], pd->af, nk->af)) 4977 return (PF_DROP); 4978 pf_change_ap(pd, pd2.src, &uh.uh_sport, 4979 &nk->addr[pd2.sidx], 4980 nk->port[sidx]); 4981 pf_change_ap(pd, pd2.dst, &uh.uh_dport, 4982 &nk->addr[pd2.didx], 4983 nk->port[didx]); 4984 m_copyback(pd2.m, pd2.off, sizeof(uh), 4985 &uh, M_NOWAIT); 4986 return (PF_AFRT); 4987 } 4988 #endif /* INET6 */ 4989 4990 if (PF_ANEQ(pd2.src, 4991 &nk->addr[pd2.sidx], pd2.af) || 4992 nk->port[pd2.sidx] != uh.uh_sport) 4993 pf_change_icmp(pd, pd2.src, 4994 &uh.uh_sport, daddr, 4995 &nk->addr[pd2.sidx], 4996 nk->port[pd2.sidx]); 4997 4998 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 4999 pd2.af) || pd2.rdomain != nk->rdomain) 5000 pd->destchg = 1; 5001 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5002 5003 if (PF_ANEQ(pd2.dst, 5004 &nk->addr[pd2.didx], pd2.af) || 5005 nk->port[pd2.didx] != uh.uh_dport) 5006 pf_change_icmp(pd, pd2.dst, 5007 &uh.uh_dport, saddr, 5008 &nk->addr[pd2.didx], 5009 nk->port[pd2.didx]); 5010 5011 switch (pd2.af) { 5012 case AF_INET: 5013 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5014 pd->hdr.icmp, M_NOWAIT); 5015 m_copyback(pd2.m, ipoff2, sizeof(h2), 5016 &h2, M_NOWAIT); 5017 break; 5018 #ifdef INET6 5019 case AF_INET6: 5020 m_copyback(pd->m, pd->off, 5021 sizeof(struct icmp6_hdr), 5022 pd->hdr.icmp6, M_NOWAIT); 5023 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5024 &h2_6, M_NOWAIT); 5025 break; 5026 #endif /* INET6 */ 5027 } 5028 uh.uh_sum = 0; 5029 m_copyback(pd2.m, pd2.off, sizeof(uh), &uh, 5030 M_NOWAIT); 5031 copyback = 1; 5032 } 5033 break; 5034 } 5035 case IPPROTO_ICMP: { 5036 struct icmp iih; 5037 5038 if (pd2.af != AF_INET) { 5039 REASON_SET(reason, PFRES_NORM); 5040 return (PF_DROP); 5041 } 5042 5043 if (!pf_pull_hdr(pd2.m, pd2.off, &iih, ICMP_MINLEN, 5044 NULL, reason, pd2.af)) { 5045 DPFPRINTF(LOG_NOTICE, 5046 "ICMP error message too short (icmp)"); 5047 return (PF_DROP); 5048 } 5049 5050 pd2.hdr.icmp = &iih; 5051 pf_icmp_mapping(&pd2, iih.icmp_type, 5052 &icmp_dir, &virtual_id, &virtual_type); 5053 5054 ret = pf_icmp_state_lookup(&pd2, &key, state, 5055 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5056 if (ret >= 0) 5057 return (ret); 5058 5059 /* translate source/destination address, if necessary */ 5060 if ((*state)->key[PF_SK_WIRE] != 5061 (*state)->key[PF_SK_STACK]) { 5062 struct pf_state_key *nk; 5063 int afto, sidx, didx; 5064 5065 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5066 nk = (*state)->key[pd->sidx]; 5067 else 5068 nk = (*state)->key[pd->didx]; 5069 5070 afto = pd->af != nk->af; 5071 sidx = afto ? pd2.didx : pd2.sidx; 5072 didx = afto ? pd2.sidx : pd2.didx; 5073 iidx = afto ? !iidx : iidx; 5074 5075 #ifdef INET6 5076 if (afto) { 5077 if (nk->af != AF_INET6) 5078 return (PF_DROP); 5079 if (pf_translate_icmp_af(nk->af, 5080 pd->hdr.icmp)) 5081 return (PF_DROP); 5082 m_copyback(pd->m, pd->off, 5083 sizeof(struct icmp6_hdr), 5084 pd->hdr.icmp6, M_NOWAIT); 5085 if (pf_change_icmp_af(pd->m, ipoff2, 5086 pd, &pd2, &nk->addr[sidx], 5087 &nk->addr[didx], pd->af, nk->af)) 5088 return (PF_DROP); 5089 pd->proto = IPPROTO_ICMPV6; 5090 if (pf_translate_icmp_af(nk->af, &iih)) 5091 return (PF_DROP); 5092 if (virtual_type == htons(ICMP_ECHO) && 5093 nk->port[iidx] != iih.icmp_id) 5094 iih.icmp_id = nk->port[iidx]; 5095 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5096 &iih, M_NOWAIT); 5097 pd->m->m_pkthdr.ph_rtableid = 5098 nk->rdomain; 5099 pd->destchg = 1; 5100 PF_ACPY(&pd->nsaddr, 5101 &nk->addr[pd2.sidx], nk->af); 5102 PF_ACPY(&pd->ndaddr, 5103 &nk->addr[pd2.didx], nk->af); 5104 pd->naf = nk->af; 5105 return (PF_AFRT); 5106 } 5107 #endif /* INET6 */ 5108 5109 if (PF_ANEQ(pd2.src, 5110 &nk->addr[pd2.sidx], pd2.af) || 5111 (virtual_type == htons(ICMP_ECHO) && 5112 nk->port[iidx] != iih.icmp_id)) 5113 pf_change_icmp(pd, pd2.src, 5114 (virtual_type == htons(ICMP_ECHO)) ? 5115 &iih.icmp_id : NULL, 5116 daddr, &nk->addr[pd2.sidx], 5117 (virtual_type == htons(ICMP_ECHO)) ? 5118 nk->port[iidx] : 0); 5119 5120 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5121 pd2.af) || pd2.rdomain != nk->rdomain) 5122 pd->destchg = 1; 5123 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5124 5125 if (PF_ANEQ(pd2.dst, 5126 &nk->addr[pd2.didx], pd2.af)) 5127 pf_change_icmp(pd, pd2.dst, NULL, 5128 saddr, &nk->addr[pd2.didx], 0); 5129 5130 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5131 pd->hdr.icmp, M_NOWAIT); 5132 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5133 M_NOWAIT); 5134 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, &iih, 5135 M_NOWAIT); 5136 copyback = 1; 5137 } 5138 break; 5139 } 5140 #ifdef INET6 5141 case IPPROTO_ICMPV6: { 5142 struct icmp6_hdr iih; 5143 5144 if (pd2.af != AF_INET6) { 5145 REASON_SET(reason, PFRES_NORM); 5146 return (PF_DROP); 5147 } 5148 5149 if (!pf_pull_hdr(pd2.m, pd2.off, &iih, 5150 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5151 DPFPRINTF(LOG_NOTICE, 5152 "ICMP error message too short (icmp6)"); 5153 return (PF_DROP); 5154 } 5155 5156 pd2.hdr.icmp6 = &iih; 5157 pf_icmp_mapping(&pd2, iih.icmp6_type, 5158 &icmp_dir, &virtual_id, &virtual_type); 5159 ret = pf_icmp_state_lookup(&pd2, &key, state, 5160 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5161 /* IPv6? try matching a multicast address */ 5162 if (ret == PF_DROP && pd2.af == AF_INET6 && 5163 icmp_dir == PF_OUT) 5164 ret = pf_icmp_state_lookup(&pd2, &key, state, 5165 virtual_id, virtual_type, icmp_dir, &iidx, 5166 1, 1); 5167 if (ret >= 0) 5168 return (ret); 5169 5170 /* translate source/destination address, if necessary */ 5171 if ((*state)->key[PF_SK_WIRE] != 5172 (*state)->key[PF_SK_STACK]) { 5173 struct pf_state_key *nk; 5174 int afto, sidx, didx; 5175 5176 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5177 nk = (*state)->key[pd->sidx]; 5178 else 5179 nk = (*state)->key[pd->didx]; 5180 5181 afto = pd->af != nk->af; 5182 sidx = afto ? pd2.didx : pd2.sidx; 5183 didx = afto ? pd2.sidx : pd2.didx; 5184 iidx = afto ? !iidx : iidx; 5185 5186 if (afto) { 5187 if (nk->af != AF_INET) 5188 return (PF_DROP); 5189 if (pf_translate_icmp_af(nk->af, 5190 pd->hdr.icmp)) 5191 return (PF_DROP); 5192 m_copyback(pd->m, pd->off, 5193 sizeof(struct icmp6_hdr), 5194 pd->hdr.icmp6, M_NOWAIT); 5195 if (pf_change_icmp_af(pd->m, ipoff2, 5196 pd, &pd2, &nk->addr[sidx], 5197 &nk->addr[didx], pd->af, nk->af)) 5198 return (PF_DROP); 5199 pd->proto = IPPROTO_ICMP; 5200 if (pf_translate_icmp_af(nk->af, &iih)) 5201 return (PF_DROP); 5202 if (virtual_type == 5203 htons(ICMP6_ECHO_REQUEST) && 5204 nk->port[iidx] != iih.icmp6_id) 5205 iih.icmp6_id = nk->port[iidx]; 5206 m_copyback(pd2.m, pd2.off, 5207 sizeof(struct icmp6_hdr), &iih, 5208 M_NOWAIT); 5209 pd->m->m_pkthdr.ph_rtableid = 5210 nk->rdomain; 5211 pd->destchg = 1; 5212 PF_ACPY(&pd->nsaddr, 5213 &nk->addr[pd2.sidx], nk->af); 5214 PF_ACPY(&pd->ndaddr, 5215 &nk->addr[pd2.didx], nk->af); 5216 pd->naf = nk->af; 5217 return (PF_AFRT); 5218 } 5219 5220 if (PF_ANEQ(pd2.src, 5221 &nk->addr[pd2.sidx], pd2.af) || 5222 ((virtual_type == 5223 htons(ICMP6_ECHO_REQUEST)) && 5224 nk->port[pd2.sidx] != iih.icmp6_id)) 5225 pf_change_icmp(pd, pd2.src, 5226 (virtual_type == 5227 htons(ICMP6_ECHO_REQUEST)) 5228 ? &iih.icmp6_id : NULL, 5229 daddr, &nk->addr[pd2.sidx], 5230 (virtual_type == 5231 htons(ICMP6_ECHO_REQUEST)) 5232 ? nk->port[iidx] : 0); 5233 5234 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5235 pd2.af) || pd2.rdomain != nk->rdomain) 5236 pd->destchg = 1; 5237 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5238 5239 if (PF_ANEQ(pd2.dst, 5240 &nk->addr[pd2.didx], pd2.af)) 5241 pf_change_icmp(pd, pd2.dst, NULL, 5242 saddr, &nk->addr[pd2.didx], 0); 5243 5244 m_copyback(pd->m, pd->off, 5245 sizeof(struct icmp6_hdr), pd->hdr.icmp6, 5246 M_NOWAIT); 5247 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5248 M_NOWAIT); 5249 m_copyback(pd2.m, pd2.off, 5250 sizeof(struct icmp6_hdr), &iih, M_NOWAIT); 5251 copyback = 1; 5252 } 5253 break; 5254 } 5255 #endif /* INET6 */ 5256 default: { 5257 key.af = pd2.af; 5258 key.proto = pd2.proto; 5259 key.rdomain = pd2.rdomain; 5260 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5261 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5262 key.port[0] = key.port[1] = 0; 5263 5264 STATE_LOOKUP(pd2.kif, &key, pd2.dir, *state, pd2.m); 5265 5266 /* translate source/destination address, if necessary */ 5267 if ((*state)->key[PF_SK_WIRE] != 5268 (*state)->key[PF_SK_STACK]) { 5269 struct pf_state_key *nk = 5270 (*state)->key[pd->didx]; 5271 5272 if (PF_ANEQ(pd2.src, 5273 &nk->addr[pd2.sidx], pd2.af)) 5274 pf_change_icmp(pd, pd2.src, NULL, 5275 daddr, &nk->addr[pd2.sidx], 0); 5276 5277 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5278 pd2.af) || pd2.rdomain != nk->rdomain) 5279 pd->destchg = 1; 5280 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5281 5282 if (PF_ANEQ(pd2.dst, 5283 &nk->addr[pd2.didx], pd2.af)) 5284 pf_change_icmp(pd, pd2.dst, NULL, 5285 saddr, &nk->addr[pd2.didx], 0); 5286 5287 switch (pd2.af) { 5288 case AF_INET: 5289 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5290 pd->hdr.icmp, M_NOWAIT); 5291 m_copyback(pd2.m, ipoff2, sizeof(h2), 5292 &h2, M_NOWAIT); 5293 break; 5294 #ifdef INET6 5295 case AF_INET6: 5296 m_copyback(pd->m, pd->off, 5297 sizeof(struct icmp6_hdr), 5298 pd->hdr.icmp6, M_NOWAIT); 5299 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5300 &h2_6, M_NOWAIT); 5301 break; 5302 #endif /* INET6 */ 5303 } 5304 copyback = 1; 5305 } 5306 break; 5307 } 5308 } 5309 } 5310 if (copyback) { 5311 pf_cksum(pd, pd->m); 5312 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT); 5313 } 5314 5315 return (PF_PASS); 5316 } 5317 5318 /* 5319 * ipoff and off are measured from the start of the mbuf chain. 5320 * h must be at "ipoff" on the mbuf chain. 5321 */ 5322 void * 5323 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5324 u_short *actionp, u_short *reasonp, sa_family_t af) 5325 { 5326 switch (af) { 5327 case AF_INET: { 5328 struct ip *h = mtod(m, struct ip *); 5329 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5330 5331 if (fragoff) { 5332 if (fragoff >= len) 5333 ACTION_SET(actionp, PF_PASS); 5334 else { 5335 ACTION_SET(actionp, PF_DROP); 5336 REASON_SET(reasonp, PFRES_FRAG); 5337 } 5338 return (NULL); 5339 } 5340 if (m->m_pkthdr.len < off + len || 5341 ntohs(h->ip_len) < off + len) { 5342 ACTION_SET(actionp, PF_DROP); 5343 REASON_SET(reasonp, PFRES_SHORT); 5344 return (NULL); 5345 } 5346 break; 5347 } 5348 #ifdef INET6 5349 case AF_INET6: { 5350 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5351 5352 if (m->m_pkthdr.len < off + len || 5353 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < 5354 (unsigned)(off + len)) { 5355 ACTION_SET(actionp, PF_DROP); 5356 REASON_SET(reasonp, PFRES_SHORT); 5357 return (NULL); 5358 } 5359 break; 5360 } 5361 #endif /* INET6 */ 5362 } 5363 m_copydata(m, off, len, p); 5364 return (p); 5365 } 5366 5367 int 5368 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5369 int rtableid) 5370 { 5371 struct sockaddr_storage ss; 5372 struct sockaddr_in *dst; 5373 int ret = 1; 5374 int check_mpath; 5375 #ifdef INET6 5376 struct sockaddr_in6 *dst6; 5377 #endif /* INET6 */ 5378 struct rtentry *rt, *rt0 = NULL; 5379 5380 check_mpath = 0; 5381 memset(&ss, 0, sizeof(ss)); 5382 switch (af) { 5383 case AF_INET: 5384 dst = (struct sockaddr_in *)&ss; 5385 dst->sin_family = AF_INET; 5386 dst->sin_len = sizeof(*dst); 5387 dst->sin_addr = addr->v4; 5388 if (ipmultipath) 5389 check_mpath = 1; 5390 break; 5391 #ifdef INET6 5392 case AF_INET6: 5393 /* 5394 * Skip check for addresses with embedded interface scope, 5395 * as they would always match anyway. 5396 */ 5397 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5398 goto out; 5399 dst6 = (struct sockaddr_in6 *)&ss; 5400 dst6->sin6_family = AF_INET6; 5401 dst6->sin6_len = sizeof(*dst6); 5402 dst6->sin6_addr = addr->v6; 5403 if (ip6_multipath) 5404 check_mpath = 1; 5405 break; 5406 #endif /* INET6 */ 5407 } 5408 5409 /* Skip checks for ipsec interfaces */ 5410 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5411 goto out; 5412 5413 rt0 = rtalloc((struct sockaddr *)&ss, 0, rtableid); 5414 if (rt0 != NULL) { 5415 /* No interface given, this is a no-route check */ 5416 if (kif == NULL) 5417 goto out; 5418 5419 if (kif->pfik_ifp == NULL) { 5420 ret = 0; 5421 goto out; 5422 } 5423 5424 /* Perform uRPF check if passed input interface */ 5425 ret = 0; 5426 rt = rt0; 5427 do { 5428 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 5429 ret = 1; 5430 #if NCARP > 0 5431 } else { 5432 struct ifnet *ifp; 5433 5434 ifp = if_get(rt->rt_ifidx); 5435 if (ifp != NULL && ifp->if_type == IFT_CARP && 5436 ifp->if_carpdev == kif->pfik_ifp) 5437 ret = 1; 5438 if_put(ifp); 5439 #endif /* NCARP */ 5440 } 5441 5442 #ifndef SMALL_KERNEL 5443 rt = rtable_mpath_next(rt); 5444 #else 5445 rt = NULL; 5446 #endif /* SMALL_KERNEL */ 5447 } while (check_mpath == 1 && rt != NULL && ret == 0); 5448 } else 5449 ret = 0; 5450 out: 5451 rtfree(rt0); 5452 return (ret); 5453 } 5454 5455 int 5456 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 5457 int rtableid) 5458 { 5459 struct sockaddr_storage ss; 5460 struct sockaddr_in *dst; 5461 #ifdef INET6 5462 struct sockaddr_in6 *dst6; 5463 #endif /* INET6 */ 5464 struct rtentry *rt; 5465 int ret = 0; 5466 5467 memset(&ss, 0, sizeof(ss)); 5468 switch (af) { 5469 case AF_INET: 5470 dst = (struct sockaddr_in *)&ss; 5471 dst->sin_family = AF_INET; 5472 dst->sin_len = sizeof(*dst); 5473 dst->sin_addr = addr->v4; 5474 break; 5475 #ifdef INET6 5476 case AF_INET6: 5477 dst6 = (struct sockaddr_in6 *)&ss; 5478 dst6->sin6_family = AF_INET6; 5479 dst6->sin6_len = sizeof(*dst6); 5480 dst6->sin6_addr = addr->v6; 5481 break; 5482 #endif /* INET6 */ 5483 } 5484 5485 rt = rtalloc((struct sockaddr *)&ss, RT_RESOLVE, rtableid); 5486 if (rt != NULL) { 5487 if (rt->rt_labelid == aw->v.rtlabel) 5488 ret = 1; 5489 rtfree(rt); 5490 } 5491 5492 return (ret); 5493 } 5494 5495 void 5496 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5497 struct pf_state *s) 5498 { 5499 struct mbuf *m0, *m1; 5500 struct sockaddr_in *dst, sin; 5501 struct rtentry *rt = NULL; 5502 struct ip *ip; 5503 struct ifnet *ifp = NULL; 5504 struct pf_addr naddr; 5505 struct pf_src_node *sns[PF_SN_MAX]; 5506 int error = 0; 5507 unsigned int rtableid; 5508 5509 if (m == NULL || *m == NULL || r == NULL || 5510 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5511 panic("pf_route: invalid parameters"); 5512 5513 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5514 m0 = *m; 5515 *m = NULL; 5516 goto bad; 5517 } 5518 5519 if (r->rt == PF_DUPTO) { 5520 if ((m0 = m_dup_pkt(*m, max_linkhdr, M_NOWAIT)) == NULL) 5521 return; 5522 } else { 5523 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5524 return; 5525 m0 = *m; 5526 } 5527 5528 if (m0->m_len < sizeof(struct ip)) { 5529 DPFPRINTF(LOG_ERR, 5530 "pf_route: m0->m_len < sizeof(struct ip)"); 5531 goto bad; 5532 } 5533 5534 ip = mtod(m0, struct ip *); 5535 5536 memset(&sin, 0, sizeof(sin)); 5537 dst = &sin; 5538 dst->sin_family = AF_INET; 5539 dst->sin_len = sizeof(*dst); 5540 dst->sin_addr = ip->ip_dst; 5541 rtableid = m0->m_pkthdr.ph_rtableid; 5542 5543 if (!r->rt) { 5544 rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); 5545 if (rt == NULL) { 5546 ipstat.ips_noroute++; 5547 goto bad; 5548 } 5549 5550 ifp = if_get(rt->rt_ifidx); 5551 5552 if (rt->rt_flags & RTF_GATEWAY) 5553 dst = satosin(rt->rt_gateway); 5554 5555 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 5556 } else { 5557 if (s == NULL) { 5558 bzero(sns, sizeof(sns)); 5559 if (pf_map_addr(AF_INET, r, 5560 (struct pf_addr *)&ip->ip_src, 5561 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 5562 DPFPRINTF(LOG_ERR, 5563 "pf_route: pf_map_addr() failed."); 5564 goto bad; 5565 } 5566 5567 if (!PF_AZERO(&naddr, AF_INET)) 5568 dst->sin_addr.s_addr = naddr.v4.s_addr; 5569 ifp = r->route.kif ? 5570 r->route.kif->pfik_ifp : NULL; 5571 } else { 5572 if (!PF_AZERO(&s->rt_addr, AF_INET)) 5573 dst->sin_addr.s_addr = 5574 s->rt_addr.v4.s_addr; 5575 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5576 } 5577 5578 rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); 5579 if (rt == NULL) { 5580 ipstat.ips_noroute++; 5581 goto bad; 5582 } 5583 } 5584 if (ifp == NULL) 5585 goto bad; 5586 5587 5588 if (oifp != ifp) { 5589 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 5590 goto bad; 5591 else if (m0 == NULL) 5592 goto done; 5593 if (m0->m_len < sizeof(struct ip)) { 5594 DPFPRINTF(LOG_ERR, 5595 "pf_route: m0->m_len < sizeof(struct ip)"); 5596 goto bad; 5597 } 5598 ip = mtod(m0, struct ip *); 5599 } 5600 5601 in_proto_cksum_out(m0, ifp); 5602 5603 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 5604 ip->ip_sum = 0; 5605 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) 5606 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 5607 else { 5608 ipstat.ips_outswcsum++; 5609 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 5610 } 5611 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 5612 goto done; 5613 } 5614 5615 /* 5616 * Too large for interface; fragment if possible. 5617 * Must be able to put at least 8 bytes per fragment. 5618 */ 5619 if (ip->ip_off & htons(IP_DF)) { 5620 ipstat.ips_cantfrag++; 5621 if (r->rt != PF_DUPTO) { 5622 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, 5623 ifp->if_mtu); 5624 goto done; 5625 } else 5626 goto bad; 5627 } 5628 5629 m1 = m0; 5630 error = ip_fragment(m0, ifp, ifp->if_mtu); 5631 if (error) { 5632 m0 = NULL; 5633 goto bad; 5634 } 5635 5636 for (m0 = m1; m0; m0 = m1) { 5637 m1 = m0->m_nextpkt; 5638 m0->m_nextpkt = 0; 5639 if (error == 0) 5640 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 5641 else 5642 m_freem(m0); 5643 } 5644 5645 if (error == 0) 5646 ipstat.ips_fragmented++; 5647 5648 done: 5649 if (r->rt != PF_DUPTO) 5650 *m = NULL; 5651 if (!r->rt) 5652 if_put(ifp); 5653 rtfree(rt); 5654 return; 5655 5656 bad: 5657 m_freem(m0); 5658 goto done; 5659 } 5660 5661 #ifdef INET6 5662 void 5663 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5664 struct pf_state *s) 5665 { 5666 struct mbuf *m0; 5667 struct sockaddr_in6 *dst, sin6; 5668 struct rtentry *rt = NULL; 5669 struct ip6_hdr *ip6; 5670 struct ifnet *ifp = NULL; 5671 struct pf_addr naddr; 5672 struct pf_src_node *sns[PF_SN_MAX]; 5673 struct m_tag *mtag; 5674 unsigned int rtableid; 5675 5676 if (m == NULL || *m == NULL || r == NULL || 5677 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5678 panic("pf_route6: invalid parameters"); 5679 5680 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5681 m0 = *m; 5682 *m = NULL; 5683 goto bad; 5684 } 5685 5686 if (r->rt == PF_DUPTO) { 5687 if ((m0 = m_dup_pkt(*m, max_linkhdr, M_NOWAIT)) == NULL) 5688 return; 5689 } else { 5690 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5691 return; 5692 m0 = *m; 5693 } 5694 5695 if (m0->m_len < sizeof(struct ip6_hdr)) { 5696 DPFPRINTF(LOG_ERR, 5697 "pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); 5698 goto bad; 5699 } 5700 ip6 = mtod(m0, struct ip6_hdr *); 5701 5702 memset(&sin6, 0, sizeof(sin6)); 5703 dst = &sin6; 5704 dst->sin6_family = AF_INET6; 5705 dst->sin6_len = sizeof(*dst); 5706 dst->sin6_addr = ip6->ip6_dst; 5707 rtableid = m0->m_pkthdr.ph_rtableid; 5708 5709 if (!r->rt) { 5710 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 5711 ip6_output(m0, NULL, NULL, 0, NULL, NULL); 5712 return; 5713 } 5714 5715 if (s == NULL) { 5716 bzero(sns, sizeof(sns)); 5717 if (pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 5718 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 5719 DPFPRINTF(LOG_ERR, 5720 "pf_route6: pf_map_addr() failed."); 5721 goto bad; 5722 } 5723 if (!PF_AZERO(&naddr, AF_INET6)) 5724 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5725 &naddr, AF_INET6); 5726 ifp = r->route.kif ? r->route.kif->pfik_ifp : NULL; 5727 } else { 5728 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 5729 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5730 &s->rt_addr, AF_INET6); 5731 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5732 } 5733 if (ifp == NULL) 5734 goto bad; 5735 5736 if (oifp != ifp) { 5737 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 5738 goto bad; 5739 else if (m0 == NULL) 5740 goto done; 5741 if (m0->m_len < sizeof(struct ip6_hdr)) { 5742 DPFPRINTF(LOG_ERR, 5743 "pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); 5744 goto bad; 5745 } 5746 } 5747 5748 in6_proto_cksum_out(m0, ifp); 5749 5750 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 5751 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 5752 5753 /* 5754 * If packet has been reassembled by PF earlier, we have to 5755 * use pf_refragment6() here to turn it back to fragments. 5756 */ 5757 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 5758 (void) pf_refragment6(&m0, mtag, dst, ifp); 5759 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 5760 rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid); 5761 if (rt == NULL) { 5762 ip6stat.ip6s_noroute++; 5763 goto bad; 5764 } 5765 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 5766 rtfree(rt); 5767 } else { 5768 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); 5769 } 5770 5771 done: 5772 if (r->rt != PF_DUPTO) 5773 *m = NULL; 5774 return; 5775 5776 bad: 5777 m_freem(m0); 5778 goto done; 5779 } 5780 #endif /* INET6 */ 5781 5782 5783 /* 5784 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag 5785 * off is the offset where the protocol header starts 5786 * len is the total length of protocol header plus payload 5787 * returns 0 when the checksum is valid, otherwise returns 1. 5788 * if the _OUT flag is set the checksum isn't done yet, consider these ok 5789 */ 5790 int 5791 pf_check_proto_cksum(struct pf_pdesc *pd, int off, int len, u_int8_t p, 5792 sa_family_t af) 5793 { 5794 u_int16_t flag_ok, flag_bad, flag_out; 5795 u_int16_t sum; 5796 5797 if (pd->csum_status == PF_CSUM_OK) 5798 return (0); 5799 if (pd->csum_status == PF_CSUM_BAD) 5800 return (1); 5801 5802 switch (p) { 5803 case IPPROTO_TCP: 5804 flag_ok = M_TCP_CSUM_IN_OK; 5805 flag_out = M_TCP_CSUM_OUT; 5806 flag_bad = M_TCP_CSUM_IN_BAD; 5807 break; 5808 case IPPROTO_UDP: 5809 flag_ok = M_UDP_CSUM_IN_OK; 5810 flag_out = M_UDP_CSUM_OUT; 5811 flag_bad = M_UDP_CSUM_IN_BAD; 5812 break; 5813 case IPPROTO_ICMP: 5814 #ifdef INET6 5815 case IPPROTO_ICMPV6: 5816 #endif /* INET6 */ 5817 flag_ok = M_ICMP_CSUM_IN_OK; 5818 flag_out = M_ICMP_CSUM_OUT; 5819 flag_bad = M_ICMP_CSUM_IN_BAD; 5820 break; 5821 default: 5822 return (1); 5823 } 5824 if (pd->m->m_pkthdr.csum_flags & (flag_ok | flag_out)) { 5825 pd->csum_status = PF_CSUM_OK; 5826 return (0); 5827 } 5828 if (pd->m->m_pkthdr.csum_flags & flag_bad || off < sizeof(struct ip) || 5829 pd->m->m_pkthdr.len < off + len) { 5830 pd->csum_status = PF_CSUM_BAD; 5831 return (1); 5832 } 5833 5834 /* need to do it in software */ 5835 if (p == IPPROTO_TCP) 5836 tcpstat.tcps_inswcsum++; 5837 else if (p == IPPROTO_UDP) 5838 udpstat.udps_inswcsum++; 5839 5840 switch (af) { 5841 case AF_INET: 5842 if (pd->m->m_len < sizeof(struct ip)) { 5843 pd->csum_status = PF_CSUM_BAD; 5844 return (1); 5845 } 5846 sum = in4_cksum(pd->m, (p == IPPROTO_ICMP ? 0 : p), off, len); 5847 break; 5848 #ifdef INET6 5849 case AF_INET6: 5850 if (pd->m->m_len < sizeof(struct ip6_hdr)) { 5851 pd->csum_status = PF_CSUM_BAD; 5852 return (1); 5853 } 5854 sum = in6_cksum(pd->m, p, off, len); 5855 break; 5856 #endif /* INET6 */ 5857 default: 5858 unhandled_af(af); 5859 } 5860 if (sum) { 5861 switch (p) { 5862 case IPPROTO_TCP: 5863 tcpstat.tcps_rcvbadsum++; 5864 break; 5865 case IPPROTO_UDP: 5866 udpstat.udps_badsum++; 5867 break; 5868 case IPPROTO_ICMP: 5869 icmpstat.icps_checksum++; 5870 break; 5871 #ifdef INET6 5872 case IPPROTO_ICMPV6: 5873 icmp6stat.icp6s_checksum++; 5874 break; 5875 #endif /* INET6 */ 5876 } 5877 pd->m->m_pkthdr.csum_flags |= flag_bad; 5878 pd->csum_status = PF_CSUM_BAD; 5879 return (1); 5880 } 5881 pd->m->m_pkthdr.csum_flags |= flag_ok; 5882 pd->csum_status = PF_CSUM_OK; 5883 return (0); 5884 } 5885 5886 struct pf_divert * 5887 pf_find_divert(struct mbuf *m) 5888 { 5889 struct m_tag *mtag; 5890 5891 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 5892 return (NULL); 5893 5894 return ((struct pf_divert *)(mtag + 1)); 5895 } 5896 5897 struct pf_divert * 5898 pf_get_divert(struct mbuf *m) 5899 { 5900 struct m_tag *mtag; 5901 5902 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 5903 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 5904 M_NOWAIT); 5905 if (mtag == NULL) 5906 return (NULL); 5907 bzero(mtag + 1, sizeof(struct pf_divert)); 5908 m_tag_prepend(m, mtag); 5909 } 5910 5911 return ((struct pf_divert *)(mtag + 1)); 5912 } 5913 5914 #ifdef INET6 5915 int 5916 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 5917 u_short *reason) 5918 { 5919 struct ip6_opt opt; 5920 struct ip6_opt_jumbo jumbo; 5921 5922 while (off < end) { 5923 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 5924 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 5925 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 5926 return (PF_DROP); 5927 } 5928 if (opt.ip6o_type == IP6OPT_PAD1) { 5929 off++; 5930 continue; 5931 } 5932 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 5933 NULL, reason, AF_INET6)) { 5934 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 5935 return (PF_DROP); 5936 } 5937 if (off + sizeof(opt) + opt.ip6o_len > end) { 5938 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 5939 REASON_SET(reason, PFRES_IPOPTIONS); 5940 return (PF_DROP); 5941 } 5942 switch (opt.ip6o_type) { 5943 case IP6OPT_JUMBO: 5944 if (pd->jumbolen != 0) { 5945 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 5946 REASON_SET(reason, PFRES_IPOPTIONS); 5947 return (PF_DROP); 5948 } 5949 if (ntohs(h->ip6_plen) != 0) { 5950 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 5951 REASON_SET(reason, PFRES_IPOPTIONS); 5952 return (PF_DROP); 5953 } 5954 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 5955 NULL, reason, AF_INET6)) { 5956 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 5957 return (PF_DROP); 5958 } 5959 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 5960 sizeof(pd->jumbolen)); 5961 pd->jumbolen = ntohl(pd->jumbolen); 5962 if (pd->jumbolen < IPV6_MAXPACKET) { 5963 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 5964 REASON_SET(reason, PFRES_IPOPTIONS); 5965 return (PF_DROP); 5966 } 5967 break; 5968 default: 5969 break; 5970 } 5971 off += sizeof(opt) + opt.ip6o_len; 5972 } 5973 5974 return (PF_PASS); 5975 } 5976 5977 int 5978 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 5979 { 5980 struct ip6_frag frag; 5981 struct ip6_ext ext; 5982 struct ip6_rthdr rthdr; 5983 u_int32_t end; 5984 int fraghdr_cnt = 0, rthdr_cnt = 0; 5985 5986 pd->off += sizeof(struct ip6_hdr); 5987 end = pd->off + ntohs(h->ip6_plen); 5988 pd->fragoff = pd->extoff = pd->jumbolen = 0; 5989 pd->proto = h->ip6_nxt; 5990 for (;;) { 5991 switch (pd->proto) { 5992 case IPPROTO_FRAGMENT: 5993 if (fraghdr_cnt++) { 5994 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 5995 REASON_SET(reason, PFRES_FRAG); 5996 return (PF_DROP); 5997 } 5998 /* jumbo payload packets cannot be fragmented */ 5999 if (pd->jumbolen != 0) { 6000 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6001 REASON_SET(reason, PFRES_FRAG); 6002 return (PF_DROP); 6003 } 6004 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6005 NULL, reason, AF_INET6)) { 6006 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6007 return (PF_DROP); 6008 } 6009 /* stop walking over non initial fragments */ 6010 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 6011 pd->fragoff = pd->off; 6012 return (PF_PASS); 6013 } 6014 /* RFC6946: reassemble only non atomic fragments */ 6015 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 6016 pd->fragoff = pd->off; 6017 pd->off += sizeof(frag); 6018 pd->proto = frag.ip6f_nxt; 6019 break; 6020 case IPPROTO_ROUTING: 6021 if (rthdr_cnt++) { 6022 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6023 REASON_SET(reason, PFRES_IPOPTIONS); 6024 return (PF_DROP); 6025 } 6026 /* fragments may be short */ 6027 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6028 pd->off = pd->fragoff; 6029 pd->proto = IPPROTO_FRAGMENT; 6030 return (PF_PASS); 6031 } 6032 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6033 NULL, reason, AF_INET6)) { 6034 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6035 return (PF_DROP); 6036 } 6037 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6038 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6039 REASON_SET(reason, PFRES_IPOPTIONS); 6040 return (PF_DROP); 6041 } 6042 /* FALLTHROUGH */ 6043 case IPPROTO_AH: 6044 case IPPROTO_HOPOPTS: 6045 case IPPROTO_DSTOPTS: 6046 /* fragments may be short */ 6047 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6048 pd->off = pd->fragoff; 6049 pd->proto = IPPROTO_FRAGMENT; 6050 return (PF_PASS); 6051 } 6052 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6053 NULL, reason, AF_INET6)) { 6054 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6055 return (PF_DROP); 6056 } 6057 /* reassembly needs the ext header before the frag */ 6058 if (pd->fragoff == 0) 6059 pd->extoff = pd->off; 6060 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { 6061 if (pf_walk_option6(pd, h, 6062 pd->off + sizeof(ext), 6063 pd->off + (ext.ip6e_len + 1) * 8, reason) 6064 != PF_PASS) 6065 return (PF_DROP); 6066 if (ntohs(h->ip6_plen) == 0 && 6067 pd->jumbolen != 0) { 6068 DPFPRINTF(LOG_NOTICE, 6069 "IPv6 missing jumbo"); 6070 REASON_SET(reason, PFRES_IPOPTIONS); 6071 return (PF_DROP); 6072 } 6073 } 6074 if (pd->proto == IPPROTO_AH) 6075 pd->off += (ext.ip6e_len + 2) * 4; 6076 else 6077 pd->off += (ext.ip6e_len + 1) * 8; 6078 pd->proto = ext.ip6e_nxt; 6079 break; 6080 case IPPROTO_TCP: 6081 case IPPROTO_UDP: 6082 case IPPROTO_ICMPV6: 6083 /* fragments may be short, ignore inner header then */ 6084 if (pd->fragoff != 0 && end < pd->off + 6085 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6086 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6087 sizeof(struct icmp6_hdr))) { 6088 pd->off = pd->fragoff; 6089 pd->proto = IPPROTO_FRAGMENT; 6090 } 6091 /* FALLTHROUGH */ 6092 default: 6093 return (PF_PASS); 6094 } 6095 } 6096 } 6097 #endif /* INET6 */ 6098 6099 int 6100 pf_setup_pdesc(struct pf_pdesc *pd, void *pdhdrs, sa_family_t af, int dir, 6101 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6102 { 6103 bzero(pd, sizeof(*pd)); 6104 pd->hdr.any = pdhdrs; 6105 pd->dir = dir; 6106 pd->kif = kif; /* kif is NULL when called by pflog */ 6107 pd->m = m; 6108 pd->sidx = (dir == PF_IN) ? 0 : 1; 6109 pd->didx = (dir == PF_IN) ? 1 : 0; 6110 pd->af = pd->naf = af; 6111 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 6112 6113 switch (pd->af) { 6114 case AF_INET: { 6115 struct ip *h; 6116 6117 /* Check for illegal packets */ 6118 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6119 REASON_SET(reason, PFRES_SHORT); 6120 return (PF_DROP); 6121 } 6122 6123 h = mtod(pd->m, struct ip *); 6124 pd->off = h->ip_hl << 2; 6125 6126 if (pd->off < sizeof(struct ip) || 6127 pd->off > ntohs(h->ip_len) || 6128 pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6129 REASON_SET(reason, PFRES_SHORT); 6130 return (PF_DROP); 6131 } 6132 6133 pd->src = (struct pf_addr *)&h->ip_src; 6134 pd->dst = (struct pf_addr *)&h->ip_dst; 6135 pd->virtual_proto = pd->proto = h->ip_p; 6136 pd->tot_len = ntohs(h->ip_len); 6137 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6138 pd->ttl = h->ip_ttl; 6139 if (h->ip_hl > 5) /* has options */ 6140 pd->badopts++; 6141 6142 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) 6143 pd->virtual_proto = PF_VPROTO_FRAGMENT; 6144 6145 break; 6146 } 6147 #ifdef INET6 6148 case AF_INET6: { 6149 struct ip6_hdr *h; 6150 6151 /* Check for illegal packets */ 6152 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6153 REASON_SET(reason, PFRES_SHORT); 6154 return (PF_DROP); 6155 } 6156 6157 h = mtod(pd->m, struct ip6_hdr *); 6158 pd->off = 0; 6159 6160 if (pd->m->m_pkthdr.len < 6161 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6162 REASON_SET(reason, PFRES_SHORT); 6163 return (PF_DROP); 6164 } 6165 6166 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6167 return (PF_DROP); 6168 6169 #if 1 6170 /* 6171 * we do not support jumbogram yet. if we keep going, zero 6172 * ip6_plen will do something bad, so drop the packet for now. 6173 */ 6174 if (pd->jumbolen != 0) { 6175 REASON_SET(reason, PFRES_NORM); 6176 return (PF_DROP); 6177 } 6178 #endif /* 1 */ 6179 6180 pd->src = (struct pf_addr *)&h->ip6_src; 6181 pd->dst = (struct pf_addr *)&h->ip6_dst; 6182 pd->virtual_proto = pd->proto; 6183 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6184 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 6185 pd->ttl = h->ip6_hlim; 6186 6187 if (pd->fragoff != 0) 6188 pd->virtual_proto = PF_VPROTO_FRAGMENT; 6189 6190 break; 6191 } 6192 #endif /* INET6 */ 6193 default: 6194 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6195 6196 } 6197 6198 PF_ACPY(&pd->nsaddr, pd->src, pd->af); 6199 PF_ACPY(&pd->ndaddr, pd->dst, pd->af); 6200 6201 switch (pd->virtual_proto) { 6202 case IPPROTO_TCP: { 6203 struct tcphdr *th = pd->hdr.tcp; 6204 6205 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6206 NULL, reason, pd->af)) 6207 return (PF_DROP); 6208 pd->hdrlen = sizeof(*th); 6209 if (pd->off + (th->th_off << 2) > pd->tot_len || 6210 (th->th_off << 2) < sizeof(struct tcphdr)) { 6211 REASON_SET(reason, PFRES_SHORT); 6212 return (PF_DROP); 6213 } 6214 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6215 pd->sport = &th->th_sport; 6216 pd->dport = &th->th_dport; 6217 pd->pcksum = &th->th_sum; 6218 break; 6219 } 6220 case IPPROTO_UDP: { 6221 struct udphdr *uh = pd->hdr.udp; 6222 6223 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6224 NULL, reason, pd->af)) 6225 return (PF_DROP); 6226 pd->hdrlen = sizeof(*uh); 6227 if (uh->uh_dport == 0 || 6228 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6229 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6230 REASON_SET(reason, PFRES_SHORT); 6231 return (PF_DROP); 6232 } 6233 pd->sport = &uh->uh_sport; 6234 pd->dport = &uh->uh_dport; 6235 pd->pcksum = &uh->uh_sum; 6236 break; 6237 } 6238 case IPPROTO_ICMP: { 6239 if (!pf_pull_hdr(pd->m, pd->off, pd->hdr.icmp, ICMP_MINLEN, 6240 NULL, reason, pd->af)) 6241 return (PF_DROP); 6242 pd->hdrlen = ICMP_MINLEN; 6243 if (pd->off + pd->hdrlen > pd->tot_len) { 6244 REASON_SET(reason, PFRES_SHORT); 6245 return (PF_DROP); 6246 } 6247 pd->pcksum = &pd->hdr.icmp->icmp_cksum; 6248 break; 6249 } 6250 #ifdef INET6 6251 case IPPROTO_ICMPV6: { 6252 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6253 6254 if (!pf_pull_hdr(pd->m, pd->off, pd->hdr.icmp6, icmp_hlen, 6255 NULL, reason, pd->af)) 6256 return (PF_DROP); 6257 /* ICMP headers we look further into to match state */ 6258 switch (pd->hdr.icmp6->icmp6_type) { 6259 case MLD_LISTENER_QUERY: 6260 case MLD_LISTENER_REPORT: 6261 icmp_hlen = sizeof(struct mld_hdr); 6262 break; 6263 case ND_NEIGHBOR_SOLICIT: 6264 case ND_NEIGHBOR_ADVERT: 6265 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6266 break; 6267 } 6268 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6269 !pf_pull_hdr(pd->m, pd->off, pd->hdr.icmp6, icmp_hlen, 6270 NULL, reason, pd->af)) 6271 return (PF_DROP); 6272 pd->hdrlen = icmp_hlen; 6273 if (pd->off + pd->hdrlen > pd->tot_len) { 6274 REASON_SET(reason, PFRES_SHORT); 6275 return (PF_DROP); 6276 } 6277 break; 6278 } 6279 #endif /* INET6 */ 6280 } 6281 6282 if (pd->sport) 6283 pd->osport = pd->nsport = *pd->sport; 6284 if (pd->dport) 6285 pd->odport = pd->ndport = *pd->dport; 6286 6287 return (PF_PASS); 6288 } 6289 6290 void 6291 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6292 struct pf_rule *r, struct pf_rule *a) 6293 { 6294 int dirndx; 6295 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6296 [action != PF_PASS] += pd->tot_len; 6297 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6298 [action != PF_PASS]++; 6299 6300 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6301 dirndx = (pd->dir == PF_OUT); 6302 r->packets[dirndx]++; 6303 r->bytes[dirndx] += pd->tot_len; 6304 if (a != NULL) { 6305 a->packets[dirndx]++; 6306 a->bytes[dirndx] += pd->tot_len; 6307 } 6308 if (s != NULL) { 6309 struct pf_rule_item *ri; 6310 struct pf_sn_item *sni; 6311 6312 SLIST_FOREACH(sni, &s->src_nodes, next) { 6313 sni->sn->packets[dirndx]++; 6314 sni->sn->bytes[dirndx] += pd->tot_len; 6315 } 6316 dirndx = (pd->dir == s->direction) ? 0 : 1; 6317 s->packets[dirndx]++; 6318 s->bytes[dirndx] += pd->tot_len; 6319 6320 SLIST_FOREACH(ri, &s->match_rules, entry) { 6321 ri->r->packets[dirndx]++; 6322 ri->r->bytes[dirndx] += pd->tot_len; 6323 6324 if (ri->r->src.addr.type == PF_ADDR_TABLE) 6325 pfr_update_stats(ri->r->src.addr.p.tbl, 6326 &s->key[(s->direction == PF_IN)]-> 6327 addr[(s->direction == PF_OUT)], 6328 pd, ri->r->action, ri->r->src.neg); 6329 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 6330 pfr_update_stats(ri->r->dst.addr.p.tbl, 6331 &s->key[(s->direction == PF_IN)]-> 6332 addr[(s->direction == PF_IN)], 6333 pd, ri->r->action, ri->r->dst.neg); 6334 } 6335 } 6336 if (r->src.addr.type == PF_ADDR_TABLE) 6337 pfr_update_stats(r->src.addr.p.tbl, 6338 (s == NULL) ? pd->src : 6339 &s->key[(s->direction == PF_IN)]-> 6340 addr[(s->direction == PF_OUT)], 6341 pd, r->action, r->src.neg); 6342 if (r->dst.addr.type == PF_ADDR_TABLE) 6343 pfr_update_stats(r->dst.addr.p.tbl, 6344 (s == NULL) ? pd->dst : 6345 &s->key[(s->direction == PF_IN)]-> 6346 addr[(s->direction == PF_IN)], 6347 pd, r->action, r->dst.neg); 6348 } 6349 } 6350 6351 int 6352 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 6353 { 6354 struct pfi_kif *kif; 6355 u_short action, reason = 0; 6356 struct pf_rule *a = NULL, *r = &pf_default_rule; 6357 struct pf_state *s = NULL; 6358 struct pf_ruleset *ruleset = NULL; 6359 struct pf_pdesc pd; 6360 union pf_headers pdhdrs; 6361 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6362 u_int32_t qid, pqid = 0; 6363 6364 if (!pf_status.running) 6365 return (PF_PASS); 6366 6367 #if NCARP > 0 6368 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6369 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6370 else 6371 #endif /* NCARP */ 6372 kif = (struct pfi_kif *)ifp->if_pf_kif; 6373 6374 if (kif == NULL) { 6375 DPFPRINTF(LOG_ERR, 6376 "pf_test: kif == NULL, if_xname %s", ifp->if_xname); 6377 return (PF_DROP); 6378 } 6379 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6380 return (PF_PASS); 6381 6382 #ifdef DIAGNOSTIC 6383 if (((*m0)->m_flags & M_PKTHDR) == 0) 6384 panic("non-M_PKTHDR is passed to pf_test"); 6385 #endif /* DIAGNOSTIC */ 6386 6387 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 6388 return (PF_PASS); 6389 6390 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) 6391 return (PF_PASS); 6392 6393 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 6394 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 6395 return (PF_PASS); 6396 } 6397 6398 action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, &reason); 6399 if (action != PF_PASS) { 6400 #if NPFLOG > 0 6401 pd.pflog |= PF_LOG_FORCE; 6402 #endif /* NPFLOG > 0 */ 6403 goto done; 6404 } 6405 6406 /* packet normalization and reassembly */ 6407 switch (pd.af) { 6408 case AF_INET: 6409 action = pf_normalize_ip(&pd, &reason); 6410 break; 6411 #ifdef INET6 6412 case AF_INET6: 6413 action = pf_normalize_ip6(&pd, &reason); 6414 break; 6415 #endif /* INET6 */ 6416 } 6417 *m0 = pd.m; 6418 /* if packet sits in reassembly queue, return without error */ 6419 if (pd.m == NULL) 6420 return PF_PASS; 6421 if (action != PF_PASS) { 6422 #if NPFLOG > 0 6423 pd.pflog |= PF_LOG_FORCE; 6424 #endif /* NPFLOG > 0 */ 6425 goto done; 6426 } 6427 6428 /* if packet has been reassembled, update packet description */ 6429 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 6430 action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, 6431 &reason); 6432 if (action != PF_PASS) { 6433 #if NPFLOG > 0 6434 pd.pflog |= PF_LOG_FORCE; 6435 #endif /* NPFLOG > 0 */ 6436 goto done; 6437 } 6438 } 6439 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 6440 6441 switch (pd.virtual_proto) { 6442 6443 case PF_VPROTO_FRAGMENT: { 6444 /* 6445 * handle fragments that aren't reassembled by 6446 * normalization 6447 */ 6448 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); 6449 if (action != PF_PASS) 6450 REASON_SET(&reason, PFRES_FRAG); 6451 break; 6452 } 6453 6454 case IPPROTO_ICMP: { 6455 if (pd.af != AF_INET) { 6456 action = PF_DROP; 6457 REASON_SET(&reason, PFRES_NORM); 6458 DPFPRINTF(LOG_NOTICE, 6459 "dropping IPv6 packet with ICMPv4 payload"); 6460 goto done; 6461 } 6462 action = pf_test_state_icmp(&pd, &s, &reason); 6463 if (action == PF_PASS || action == PF_AFRT) { 6464 #if NPFSYNC > 0 6465 pfsync_update_state(s); 6466 #endif /* NPFSYNC > 0 */ 6467 r = s->rule.ptr; 6468 a = s->anchor.ptr; 6469 #if NPFLOG > 0 6470 pd.pflog |= s->log; 6471 #endif /* NPFLOG > 0 */ 6472 } else if (s == NULL) 6473 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6474 &reason); 6475 break; 6476 } 6477 6478 #ifdef INET6 6479 case IPPROTO_ICMPV6: { 6480 if (pd.af != AF_INET6) { 6481 action = PF_DROP; 6482 REASON_SET(&reason, PFRES_NORM); 6483 DPFPRINTF(LOG_NOTICE, 6484 "dropping IPv4 packet with ICMPv6 payload"); 6485 goto done; 6486 } 6487 action = pf_test_state_icmp(&pd, &s, &reason); 6488 if (action == PF_PASS || action == PF_AFRT) { 6489 #if NPFSYNC > 0 6490 pfsync_update_state(s); 6491 #endif /* NPFSYNC > 0 */ 6492 r = s->rule.ptr; 6493 a = s->anchor.ptr; 6494 #if NPFLOG > 0 6495 pd.pflog |= s->log; 6496 #endif /* NPFLOG > 0 */ 6497 } else if (s == NULL) 6498 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6499 &reason); 6500 break; 6501 } 6502 #endif /* INET6 */ 6503 6504 default: 6505 if (pd.virtual_proto == IPPROTO_TCP) { 6506 if ((pd.hdr.tcp->th_flags & TH_ACK) && pd.p_len == 0) 6507 pqid = 1; 6508 action = pf_normalize_tcp(&pd); 6509 if (action == PF_DROP) 6510 goto done; 6511 } 6512 action = pf_test_state(&pd, &s, &reason); 6513 if (action == PF_PASS || action == PF_AFRT) { 6514 #if NPFSYNC > 0 6515 pfsync_update_state(s); 6516 #endif /* NPFSYNC > 0 */ 6517 r = s->rule.ptr; 6518 a = s->anchor.ptr; 6519 #if NPFLOG > 0 6520 pd.pflog |= s->log; 6521 #endif /* NPFLOG > 0 */ 6522 } else if (s == NULL) 6523 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6524 &reason); 6525 6526 if (pd.virtual_proto == IPPROTO_TCP) { 6527 if (s) { 6528 if (s->max_mss) 6529 pf_normalize_mss(&pd, s->max_mss); 6530 } else if (r->max_mss) 6531 pf_normalize_mss(&pd, r->max_mss); 6532 } 6533 6534 break; 6535 } 6536 6537 done: 6538 if (action != PF_DROP) { 6539 if (s) { 6540 /* The non-state case is handled in pf_test_rule() */ 6541 if (action == PF_PASS && pd.badopts && 6542 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 6543 action = PF_DROP; 6544 REASON_SET(&reason, PFRES_IPOPTIONS); 6545 #if NPFLOG > 0 6546 pd.pflog |= PF_LOG_FORCE; 6547 #endif /* NPFLOG > 0 */ 6548 DPFPRINTF(LOG_NOTICE, "dropping packet with " 6549 "ip/ipv6 options in pf_test()"); 6550 } 6551 6552 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 6553 s->set_tos); 6554 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 6555 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 6556 qid = s->pqid; 6557 if (s->state_flags & PFSTATE_SETPRIO) 6558 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 6559 } else { 6560 qid = s->qid; 6561 if (s->state_flags & PFSTATE_SETPRIO) 6562 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 6563 } 6564 } else { 6565 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 6566 r->set_tos); 6567 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 6568 qid = r->pqid; 6569 if (r->scrub_flags & PFSTATE_SETPRIO) 6570 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 6571 } else { 6572 qid = r->qid; 6573 if (r->scrub_flags & PFSTATE_SETPRIO) 6574 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 6575 } 6576 } 6577 } 6578 6579 if (action == PF_PASS && qid) 6580 pd.m->m_pkthdr.pf.qid = qid; 6581 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) { 6582 /* 6583 * Check below fires whenever caller forgets to call 6584 * pf_pkt_addr_changed(). This might happen when we 6585 * deal with IP tunnels. 6586 */ 6587 if (pd.m->m_pkthdr.pf.statekey != NULL) { 6588 #ifdef DDB 6589 m_print(pd.m, printf); 6590 #endif 6591 panic("incoming mbuf already has a statekey"); 6592 } 6593 pd.m->m_pkthdr.pf.statekey = 6594 pf_state_key_ref(s->key[PF_SK_STACK]); 6595 } 6596 if (pd.dir == PF_OUT && 6597 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 6598 s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) { 6599 pd.m->m_pkthdr.pf.inp->inp_pf_sk = 6600 pf_state_key_ref(s->key[PF_SK_STACK]); 6601 s->key[PF_SK_STACK]->inp = pd.m->m_pkthdr.pf.inp; 6602 } 6603 6604 if (s) { 6605 pd.m->m_pkthdr.ph_flowid = M_FLOWID_VALID | 6606 (M_FLOWID_MASK & bemtoh64(&s->id)); 6607 } 6608 6609 /* 6610 * connections redirected to loopback should not match sockets 6611 * bound specifically to loopback due to security implications, 6612 * see tcp_input() and in_pcblookup_listen(). 6613 */ 6614 if (pd.destchg) 6615 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 6616 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 6617 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 6618 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 6619 /* We need to redo the route lookup on outgoing routes. */ 6620 if (pd.destchg && pd.dir == PF_OUT) 6621 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 6622 6623 if (pd.dir == PF_IN && action == PF_PASS && r->divert.port) { 6624 struct pf_divert *divert; 6625 6626 if ((divert = pf_get_divert(pd.m))) { 6627 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 6628 divert->port = r->divert.port; 6629 divert->rdomain = pd.rdomain; 6630 divert->addr = r->divert.addr; 6631 } 6632 } 6633 6634 if (action == PF_PASS && r->divert_packet.port) 6635 action = PF_DIVERT; 6636 6637 #if NPFLOG > 0 6638 if (pd.pflog) { 6639 struct pf_rule_item *ri; 6640 6641 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 6642 PFLOG_PACKET(&pd, reason, r, a, ruleset, NULL); 6643 if (s) { 6644 SLIST_FOREACH(ri, &s->match_rules, entry) 6645 if (ri->r->log & PF_LOG_ALL) 6646 PFLOG_PACKET(&pd, reason, ri->r, a, 6647 ruleset, NULL); 6648 } 6649 } 6650 #endif /* NPFLOG > 0 */ 6651 6652 pf_counters_inc(action, &pd, s, r, a); 6653 6654 switch (action) { 6655 case PF_SYNPROXY_DROP: 6656 m_freem(*m0); 6657 case PF_DEFER: 6658 *m0 = NULL; 6659 action = PF_PASS; 6660 break; 6661 case PF_DIVERT: 6662 switch (pd.af) { 6663 case AF_INET: 6664 if (!divert_packet(pd.m, pd.dir, r->divert_packet.port)) 6665 *m0 = NULL; 6666 break; 6667 #ifdef INET6 6668 case AF_INET6: 6669 if (!divert6_packet(pd.m, pd.dir, 6670 r->divert_packet.port)) 6671 *m0 = NULL; 6672 break; 6673 #endif /* INET6 */ 6674 } 6675 action = PF_PASS; 6676 break; 6677 #ifdef INET6 6678 case PF_AFRT: 6679 if (pf_translate_af(&pd)) { 6680 if (!pd.m) 6681 *m0 = NULL; 6682 action = PF_DROP; 6683 break; 6684 } 6685 if (pd.naf == AF_INET) 6686 pf_route(&pd.m, r, dir, kif->pfik_ifp, s); 6687 if (pd.naf == AF_INET6) 6688 pf_route6(&pd.m, r, dir, kif->pfik_ifp, s); 6689 *m0 = NULL; 6690 action = PF_PASS; 6691 break; 6692 #endif /* INET6 */ 6693 case PF_DROP: 6694 m_freem(*m0); 6695 *m0 = NULL; 6696 break; 6697 default: 6698 /* pf_route can free the mbuf causing *m0 to become NULL */ 6699 if (r->rt) { 6700 switch (pd.af) { 6701 case AF_INET: 6702 pf_route(m0, r, pd.dir, pd.kif->pfik_ifp, s); 6703 break; 6704 #ifdef INET6 6705 case AF_INET6: 6706 pf_route6(m0, r, pd.dir, pd.kif->pfik_ifp, s); 6707 break; 6708 #endif /* INET6 */ 6709 } 6710 } 6711 break; 6712 } 6713 6714 #ifdef INET6 6715 /* if reassembled packet passed, create new fragments */ 6716 if (pf_status.reass && action == PF_PASS && *m0 && fwdir == PF_FWD) { 6717 struct m_tag *mtag; 6718 6719 if ((mtag = m_tag_find(*m0, PACKET_TAG_PF_REASSEMBLED, NULL))) 6720 action = pf_refragment6(m0, mtag, NULL, NULL); 6721 } 6722 #endif /* INET6 */ 6723 if (s && action != PF_DROP) { 6724 if (!s->if_index_in && dir == PF_IN) 6725 s->if_index_in = ifp->if_index; 6726 else if (!s->if_index_out && dir == PF_OUT) 6727 s->if_index_out = ifp->if_index; 6728 } 6729 6730 return (action); 6731 } 6732 6733 void 6734 pf_cksum(struct pf_pdesc *pd, struct mbuf *m) 6735 { 6736 if (pd->csum_status != PF_CSUM_OK) 6737 return; /* don't fix broken cksums */ 6738 6739 switch (pd->proto) { 6740 case IPPROTO_TCP: 6741 pd->hdr.tcp->th_sum = 0; 6742 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 6743 break; 6744 case IPPROTO_UDP: 6745 pd->hdr.udp->uh_sum = 0; 6746 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 6747 break; 6748 case IPPROTO_ICMP: 6749 pd->hdr.icmp->icmp_cksum = 0; 6750 m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT; 6751 break; 6752 #ifdef INET6 6753 case IPPROTO_ICMPV6: 6754 pd->hdr.icmp6->icmp6_cksum = 0; 6755 m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT; 6756 break; 6757 #endif /* INET6 */ 6758 default: 6759 /* nothing */ 6760 break; 6761 } 6762 } 6763 6764 int 6765 pf_ouraddr(struct mbuf *m) 6766 { 6767 struct pf_state_key *sk; 6768 6769 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 6770 return (1); 6771 6772 sk = m->m_pkthdr.pf.statekey; 6773 if (sk != NULL) { 6774 if (sk->inp != NULL) 6775 return (1); 6776 6777 /* If we have linked state keys it is certainly forwarded. */ 6778 if (sk->reverse != NULL) 6779 return (0); 6780 } 6781 6782 return (-1); 6783 } 6784 6785 /* 6786 * must be called whenever any addressing information such as 6787 * address, port, protocol has changed 6788 */ 6789 void 6790 pf_pkt_addr_changed(struct mbuf *m) 6791 { 6792 pf_pkt_unlink_state_key(m); 6793 m->m_pkthdr.pf.inp = NULL; 6794 } 6795 6796 struct inpcb * 6797 pf_inp_lookup(struct mbuf *m) 6798 { 6799 struct inpcb *inp = NULL; 6800 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 6801 6802 if (!pf_state_key_isvalid(sk)) 6803 pf_pkt_unlink_state_key(m); 6804 else 6805 inp = m->m_pkthdr.pf.statekey->inp; 6806 6807 if (inp && inp->inp_pf_sk) 6808 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 6809 6810 return (inp); 6811 } 6812 6813 void 6814 pf_inp_link(struct mbuf *m, struct inpcb *inp) 6815 { 6816 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 6817 6818 if (!pf_state_key_isvalid(sk)) { 6819 pf_pkt_unlink_state_key(m); 6820 return; 6821 } 6822 6823 /* 6824 * we don't need to grab PF-lock here. At worst case we link inp to 6825 * state, which might be just being marked as deleted by another 6826 * thread. 6827 */ 6828 if (inp && !sk->inp && !inp->inp_pf_sk) { 6829 sk->inp = inp; 6830 inp->inp_pf_sk = pf_state_key_ref(sk); 6831 } 6832 /* The statekey has finished finding the inp, it is no longer needed. */ 6833 pf_pkt_unlink_state_key(m); 6834 } 6835 6836 void 6837 pf_inp_unlink(struct inpcb *inp) 6838 { 6839 if (inp->inp_pf_sk) { 6840 inp->inp_pf_sk->inp = NULL; 6841 pf_inpcb_unlink_state_key(inp); 6842 } 6843 } 6844 6845 void 6846 pf_state_key_link(struct pf_state_key *sk, struct pf_state_key *pkt_sk) 6847 { 6848 /* 6849 * Assert will not wire as long as we are called by pf_find_state() 6850 */ 6851 KASSERT((pkt_sk->reverse == NULL) && (sk->reverse == NULL)); 6852 pkt_sk->reverse = pf_state_key_ref(sk); 6853 sk->reverse = pf_state_key_ref(pkt_sk); 6854 } 6855 6856 #if NPFLOG > 0 6857 void 6858 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 6859 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 6860 { 6861 struct pf_rule_item *ri; 6862 6863 /* if this is the log(matches) rule, packet has been logged already */ 6864 if (rm->log & PF_LOG_MATCHES) 6865 return; 6866 6867 SLIST_FOREACH(ri, matchrules, entry) 6868 if (ri->r->log & PF_LOG_MATCHES) 6869 PFLOG_PACKET(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 6870 } 6871 #endif /* NPFLOG > 0 */ 6872 6873 struct pf_state_key * 6874 pf_state_key_ref(struct pf_state_key *sk) 6875 { 6876 if (sk != NULL) 6877 PF_REF_TAKE(sk->refcnt); 6878 6879 return (sk); 6880 } 6881 6882 void 6883 pf_state_key_unref(struct pf_state_key *sk) 6884 { 6885 if ((sk != NULL) && PF_REF_RELE(sk->refcnt)) { 6886 /* state key must be removed from tree */ 6887 KASSERT(!pf_state_key_isvalid(sk)); 6888 /* state key must be unlinked from reverse key */ 6889 KASSERT(sk->reverse == NULL); 6890 /* state key must be unlinked from socket */ 6891 KASSERT((sk->inp == NULL) || (sk->inp->inp_pf_sk == NULL)); 6892 sk->inp = NULL; 6893 pool_put(&pf_state_key_pl, sk); 6894 } 6895 } 6896 6897 int 6898 pf_state_key_isvalid(struct pf_state_key *sk) 6899 { 6900 return ((sk != NULL) && (sk->removed == 0)); 6901 } 6902 6903 void 6904 pf_pkt_unlink_state_key(struct mbuf *m) 6905 { 6906 pf_state_key_unref(m->m_pkthdr.pf.statekey); 6907 m->m_pkthdr.pf.statekey = NULL; 6908 } 6909 6910 void 6911 pf_pkt_state_key_ref(struct mbuf *m) 6912 { 6913 pf_state_key_ref(m->m_pkthdr.pf.statekey); 6914 } 6915 6916 void 6917 pf_inpcb_unlink_state_key(struct inpcb *inp) 6918 { 6919 if (inp != NULL) { 6920 pf_state_key_unref(inp->inp_pf_sk); 6921 inp->inp_pf_sk = NULL; 6922 } 6923 } 6924 6925 void 6926 pf_state_key_unlink_reverse(struct pf_state_key *sk) 6927 { 6928 if ((sk != NULL) && (sk->reverse != NULL)) { 6929 pf_state_key_unref(sk->reverse->reverse); 6930 sk->reverse->reverse = NULL; 6931 pf_state_key_unref(sk->reverse); 6932 sk->reverse = NULL; 6933 } 6934 } 6935