1 /* $OpenBSD: pf.c,v 1.984 2016/09/07 09:36:49 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip_var.h> 67 #include <netinet/tcp.h> 68 #include <netinet/tcp_seq.h> 69 #include <netinet/udp.h> 70 #include <netinet/ip_icmp.h> 71 #include <netinet/in_pcb.h> 72 #include <netinet/tcp_timer.h> 73 #include <netinet/tcp_var.h> 74 #include <netinet/tcp_fsm.h> 75 #include <netinet/udp_var.h> 76 #include <netinet/icmp_var.h> 77 #include <netinet/ip_divert.h> 78 79 #include <net/pfvar.h> 80 81 #if NPFLOG > 0 82 #include <net/if_pflog.h> 83 #endif /* NPFLOG > 0 */ 84 85 #if NPFLOW > 0 86 #include <net/if_pflow.h> 87 #endif /* NPFLOW > 0 */ 88 89 #if NPFSYNC > 0 90 #include <net/if_pfsync.h> 91 #endif /* NPFSYNC > 0 */ 92 93 #ifdef INET6 94 #include <netinet6/in6_var.h> 95 #include <netinet/ip6.h> 96 #include <netinet6/ip6_var.h> 97 #include <netinet/icmp6.h> 98 #include <netinet6/nd6.h> 99 #include <netinet6/ip6_divert.h> 100 #endif /* INET6 */ 101 102 #ifdef DDB 103 #include <machine/db_machdep.h> 104 #include <ddb/db_interface.h> 105 #endif 106 107 /* 108 * Global variables 109 */ 110 struct pf_state_tree pf_statetbl; 111 struct pf_queuehead pf_queues[2]; 112 struct pf_queuehead *pf_queues_active; 113 struct pf_queuehead *pf_queues_inactive; 114 115 struct pf_status pf_status; 116 117 SHA2_CTX pf_tcp_secret_ctx; 118 u_char pf_tcp_secret[16]; 119 int pf_tcp_secret_init; 120 int pf_tcp_iss_off; 121 122 struct pf_anchor_stackframe { 123 struct pf_ruleset *rs; 124 struct pf_rule *r; 125 struct pf_anchor_node *parent; 126 struct pf_anchor *child; 127 } pf_anchor_stack[64]; 128 129 /* 130 * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c. 131 * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c. 132 */ 133 union pf_headers { 134 struct tcphdr tcp; 135 struct udphdr udp; 136 struct icmp icmp; 137 #ifdef INET6 138 struct icmp6_hdr icmp6; 139 struct mld_hdr mld; 140 struct nd_neighbor_solicit nd_ns; 141 #endif /* INET6 */ 142 }; 143 144 145 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 146 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 147 struct pool pf_rule_item_pl, pf_sn_item_pl; 148 149 void pf_init_threshold(struct pf_threshold *, u_int32_t, 150 u_int32_t); 151 void pf_add_threshold(struct pf_threshold *); 152 int pf_check_threshold(struct pf_threshold *); 153 int pf_check_tcp_cksum(struct mbuf *, int, int, 154 sa_family_t); 155 static __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 156 u_int8_t); 157 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 158 const struct pf_addr *, sa_family_t, u_int8_t); 159 int pf_modulate_sack(struct pf_pdesc *, 160 struct pf_state_peer *); 161 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 162 u_int16_t *, u_int16_t *); 163 int pf_change_icmp_af(struct mbuf *, int, 164 struct pf_pdesc *, struct pf_pdesc *, 165 struct pf_addr *, struct pf_addr *, sa_family_t, 166 sa_family_t); 167 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 168 struct pf_addr *); 169 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 170 u_int16_t *, struct pf_addr *, struct pf_addr *, 171 u_int16_t); 172 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 173 void pf_send_tcp(const struct pf_rule *, sa_family_t, 174 const struct pf_addr *, const struct pf_addr *, 175 u_int16_t, u_int16_t, u_int32_t, u_int32_t, 176 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, 177 u_int16_t, u_int); 178 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 179 sa_family_t, struct pf_rule *, u_int); 180 void pf_detach_state(struct pf_state *); 181 void pf_state_key_detach(struct pf_state *, int); 182 u_int32_t pf_tcp_iss(struct pf_pdesc *); 183 void pf_rule_to_actions(struct pf_rule *, 184 struct pf_rule_actions *); 185 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 186 struct pf_state **, struct pf_rule **, 187 struct pf_ruleset **, u_short *); 188 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 189 struct pf_rule *, struct pf_rule *, 190 struct pf_state_key **, struct pf_state_key **, 191 int *, struct pf_state **, int, 192 struct pf_rule_slist *, struct pf_rule_actions *, 193 struct pf_src_node *[]); 194 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 195 int, struct pf_addr *, int, struct pf_addr *, 196 int, int); 197 int pf_state_key_setup(struct pf_pdesc *, struct 198 pf_state_key **, struct pf_state_key **, int); 199 int pf_tcp_track_full(struct pf_pdesc *, 200 struct pf_state_peer *, struct pf_state_peer *, 201 struct pf_state **, u_short *, int *); 202 int pf_tcp_track_sloppy(struct pf_pdesc *, 203 struct pf_state_peer *, struct pf_state_peer *, 204 struct pf_state **, u_short *); 205 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 206 u_short *); 207 int pf_test_state(struct pf_pdesc *, struct pf_state **, 208 u_short *); 209 int pf_icmp_state_lookup(struct pf_pdesc *, 210 struct pf_state_key_cmp *, struct pf_state **, 211 u_int16_t, u_int16_t, int, int *, int, int); 212 int pf_test_state_icmp(struct pf_pdesc *, 213 struct pf_state **, u_short *); 214 u_int8_t pf_get_wscale(struct pf_pdesc *); 215 u_int16_t pf_get_mss(struct pf_pdesc *); 216 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 217 u_int16_t); 218 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 219 sa_family_t); 220 struct pf_divert *pf_get_divert(struct mbuf *); 221 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 222 int, int, u_short *); 223 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 224 u_short *); 225 void pf_print_state_parts(struct pf_state *, 226 struct pf_state_key *, struct pf_state_key *); 227 int pf_addr_wrap_neq(struct pf_addr_wrap *, 228 struct pf_addr_wrap *); 229 int pf_compare_state_keys(struct pf_state_key *, 230 struct pf_state_key *, struct pfi_kif *, u_int); 231 struct pf_state *pf_find_state(struct pfi_kif *, 232 struct pf_state_key_cmp *, u_int, struct mbuf *); 233 int pf_src_connlimit(struct pf_state **); 234 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 235 void pf_step_into_anchor(int *, struct pf_ruleset **, 236 struct pf_rule **, struct pf_rule **); 237 int pf_step_out_of_anchor(int *, struct pf_ruleset **, 238 struct pf_rule **, struct pf_rule **, 239 int *); 240 void pf_counters_inc(int, struct pf_pdesc *, 241 struct pf_state *, struct pf_rule *, 242 struct pf_rule *); 243 void pf_state_key_link(struct pf_state_key *, 244 struct pf_state_key *); 245 void pf_inpcb_unlink_state_key(struct inpcb *); 246 void pf_state_key_unlink_reverse(struct pf_state_key *); 247 248 #if NPFLOG > 0 249 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 250 struct pf_rule *, struct pf_ruleset *, 251 struct pf_rule_slist *); 252 #endif /* NPFLOG > 0 */ 253 254 extern struct pool pfr_ktable_pl; 255 extern struct pool pfr_kentry_pl; 256 257 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 258 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 259 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 260 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 261 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 262 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT } 263 }; 264 265 #define STATE_LOOKUP(i, k, d, s, m) \ 266 do { \ 267 s = pf_find_state(i, k, d, m); \ 268 if (s == NULL || (s)->timeout == PFTM_PURGE) \ 269 return (PF_DROP); \ 270 if (d == PF_OUT && \ 271 (((s)->rule.ptr->rt == PF_ROUTETO && \ 272 (s)->rule.ptr->direction == PF_OUT) || \ 273 ((s)->rule.ptr->rt == PF_REPLYTO && \ 274 (s)->rule.ptr->direction == PF_IN)) && \ 275 (s)->rt_kif != NULL && \ 276 (s)->rt_kif != i) \ 277 return (PF_PASS); \ 278 } while (0) 279 280 #define BOUND_IFACE(r, k) \ 281 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 282 283 #define STATE_INC_COUNTERS(s) \ 284 do { \ 285 struct pf_rule_item *mrm; \ 286 s->rule.ptr->states_cur++; \ 287 s->rule.ptr->states_tot++; \ 288 if (s->anchor.ptr != NULL) { \ 289 s->anchor.ptr->states_cur++; \ 290 s->anchor.ptr->states_tot++; \ 291 } \ 292 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 293 mrm->r->states_cur++; \ 294 } while (0) 295 296 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 297 static __inline int pf_state_compare_key(struct pf_state_key *, 298 struct pf_state_key *); 299 static __inline int pf_state_compare_id(struct pf_state *, 300 struct pf_state *); 301 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 302 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 303 304 struct pf_src_tree tree_src_tracking; 305 306 struct pf_state_tree_id tree_id; 307 struct pf_state_queue state_list; 308 309 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 310 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 311 RB_GENERATE(pf_state_tree_id, pf_state, 312 entry_id, pf_state_compare_id); 313 314 SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl = 315 SLIST_HEAD_INITIALIZER(pf_rule_gcl); 316 317 __inline int 318 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 319 { 320 switch (af) { 321 case AF_INET: 322 if (a->addr32[0] > b->addr32[0]) 323 return (1); 324 if (a->addr32[0] < b->addr32[0]) 325 return (-1); 326 break; 327 #ifdef INET6 328 case AF_INET6: 329 if (a->addr32[3] > b->addr32[3]) 330 return (1); 331 if (a->addr32[3] < b->addr32[3]) 332 return (-1); 333 if (a->addr32[2] > b->addr32[2]) 334 return (1); 335 if (a->addr32[2] < b->addr32[2]) 336 return (-1); 337 if (a->addr32[1] > b->addr32[1]) 338 return (1); 339 if (a->addr32[1] < b->addr32[1]) 340 return (-1); 341 if (a->addr32[0] > b->addr32[0]) 342 return (1); 343 if (a->addr32[0] < b->addr32[0]) 344 return (-1); 345 break; 346 #endif /* INET6 */ 347 } 348 return (0); 349 } 350 351 static __inline int 352 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 353 { 354 int diff; 355 356 if (a->rule.ptr > b->rule.ptr) 357 return (1); 358 if (a->rule.ptr < b->rule.ptr) 359 return (-1); 360 if ((diff = a->type - b->type) != 0) 361 return (diff); 362 if ((diff = a->af - b->af) != 0) 363 return (diff); 364 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 365 return (diff); 366 return (0); 367 } 368 369 void 370 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 371 { 372 switch (af) { 373 case AF_INET: 374 dst->addr32[0] = src->addr32[0]; 375 break; 376 #ifdef INET6 377 case AF_INET6: 378 dst->addr32[0] = src->addr32[0]; 379 dst->addr32[1] = src->addr32[1]; 380 dst->addr32[2] = src->addr32[2]; 381 dst->addr32[3] = src->addr32[3]; 382 break; 383 #endif /* INET6 */ 384 default: 385 unhandled_af(af); 386 } 387 } 388 389 void 390 pf_init_threshold(struct pf_threshold *threshold, 391 u_int32_t limit, u_int32_t seconds) 392 { 393 threshold->limit = limit * PF_THRESHOLD_MULT; 394 threshold->seconds = seconds; 395 threshold->count = 0; 396 threshold->last = time_uptime; 397 } 398 399 void 400 pf_add_threshold(struct pf_threshold *threshold) 401 { 402 u_int32_t t = time_uptime, diff = t - threshold->last; 403 404 if (diff >= threshold->seconds) 405 threshold->count = 0; 406 else 407 threshold->count -= threshold->count * diff / 408 threshold->seconds; 409 threshold->count += PF_THRESHOLD_MULT; 410 threshold->last = t; 411 } 412 413 int 414 pf_check_threshold(struct pf_threshold *threshold) 415 { 416 return (threshold->count > threshold->limit); 417 } 418 419 int 420 pf_src_connlimit(struct pf_state **state) 421 { 422 int bad = 0; 423 struct pf_src_node *sn; 424 425 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 426 return (0); 427 428 sn->conn++; 429 (*state)->src.tcp_est = 1; 430 pf_add_threshold(&sn->conn_rate); 431 432 if ((*state)->rule.ptr->max_src_conn && 433 (*state)->rule.ptr->max_src_conn < sn->conn) { 434 pf_status.lcounters[LCNT_SRCCONN]++; 435 bad++; 436 } 437 438 if ((*state)->rule.ptr->max_src_conn_rate.limit && 439 pf_check_threshold(&sn->conn_rate)) { 440 pf_status.lcounters[LCNT_SRCCONNRATE]++; 441 bad++; 442 } 443 444 if (!bad) 445 return (0); 446 447 if ((*state)->rule.ptr->overload_tbl) { 448 struct pfr_addr p; 449 u_int32_t killed = 0; 450 451 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 452 if (pf_status.debug >= LOG_NOTICE) { 453 log(LOG_NOTICE, 454 "pf: pf_src_connlimit: blocking address "); 455 pf_print_host(&sn->addr, 0, 456 (*state)->key[PF_SK_WIRE]->af); 457 } 458 459 bzero(&p, sizeof(p)); 460 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 461 switch ((*state)->key[PF_SK_WIRE]->af) { 462 case AF_INET: 463 p.pfra_net = 32; 464 p.pfra_ip4addr = sn->addr.v4; 465 break; 466 #ifdef INET6 467 case AF_INET6: 468 p.pfra_net = 128; 469 p.pfra_ip6addr = sn->addr.v6; 470 break; 471 #endif /* INET6 */ 472 } 473 474 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 475 &p, time_second); 476 477 /* kill existing states if that's required. */ 478 if ((*state)->rule.ptr->flush) { 479 struct pf_state_key *sk; 480 struct pf_state *st; 481 482 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 483 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 484 sk = st->key[PF_SK_WIRE]; 485 /* 486 * Kill states from this source. (Only those 487 * from the same rule if PF_FLUSH_GLOBAL is not 488 * set) 489 */ 490 if (sk->af == 491 (*state)->key[PF_SK_WIRE]->af && 492 (((*state)->direction == PF_OUT && 493 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 494 ((*state)->direction == PF_IN && 495 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 496 ((*state)->rule.ptr->flush & 497 PF_FLUSH_GLOBAL || 498 (*state)->rule.ptr == st->rule.ptr)) { 499 st->timeout = PFTM_PURGE; 500 st->src.state = st->dst.state = 501 TCPS_CLOSED; 502 killed++; 503 } 504 } 505 if (pf_status.debug >= LOG_NOTICE) 506 addlog(", %u states killed", killed); 507 } 508 if (pf_status.debug >= LOG_NOTICE) 509 addlog("\n"); 510 } 511 512 /* kill this state */ 513 (*state)->timeout = PFTM_PURGE; 514 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 515 return (1); 516 } 517 518 int 519 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 520 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 521 struct pf_addr *raddr) 522 { 523 struct pf_src_node k; 524 525 if (*sn == NULL) { 526 k.af = af; 527 k.type = type; 528 PF_ACPY(&k.addr, src, af); 529 k.rule.ptr = rule; 530 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 531 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 532 } 533 if (*sn == NULL) { 534 if (!rule->max_src_nodes || 535 rule->src_nodes < rule->max_src_nodes) 536 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 537 else 538 pf_status.lcounters[LCNT_SRCNODES]++; 539 if ((*sn) == NULL) 540 return (-1); 541 542 pf_init_threshold(&(*sn)->conn_rate, 543 rule->max_src_conn_rate.limit, 544 rule->max_src_conn_rate.seconds); 545 546 (*sn)->type = type; 547 (*sn)->af = af; 548 (*sn)->rule.ptr = rule; 549 PF_ACPY(&(*sn)->addr, src, af); 550 if (raddr) 551 PF_ACPY(&(*sn)->raddr, raddr, af); 552 if (RB_INSERT(pf_src_tree, 553 &tree_src_tracking, *sn) != NULL) { 554 if (pf_status.debug >= LOG_NOTICE) { 555 log(LOG_NOTICE, 556 "pf: src_tree insert failed: "); 557 pf_print_host(&(*sn)->addr, 0, af); 558 addlog("\n"); 559 } 560 pool_put(&pf_src_tree_pl, *sn); 561 return (-1); 562 } 563 (*sn)->creation = time_uptime; 564 (*sn)->rule.ptr->src_nodes++; 565 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 566 pf_status.src_nodes++; 567 } else { 568 if (rule->max_src_states && 569 (*sn)->states >= rule->max_src_states) { 570 pf_status.lcounters[LCNT_SRCSTATES]++; 571 return (-1); 572 } 573 } 574 return (0); 575 } 576 577 void 578 pf_remove_src_node(struct pf_src_node *sn) 579 { 580 if (sn->states > 0 || sn->expire > time_uptime) 581 return; 582 583 sn->rule.ptr->src_nodes--; 584 if (sn->rule.ptr->states_cur == 0 && 585 sn->rule.ptr->src_nodes == 0) 586 pf_rm_rule(NULL, sn->rule.ptr); 587 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 588 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 589 pf_status.src_nodes--; 590 pool_put(&pf_src_tree_pl, sn); 591 } 592 593 struct pf_src_node * 594 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 595 { 596 struct pf_sn_item *sni; 597 598 SLIST_FOREACH(sni, &s->src_nodes, next) 599 if (sni->sn->type == type) 600 return (sni->sn); 601 return (NULL); 602 } 603 604 void 605 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 606 { 607 struct pf_sn_item *sni, *snin, *snip = NULL; 608 609 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 610 snin = SLIST_NEXT(sni, next); 611 if (sni->sn == sn) { 612 if (snip) 613 SLIST_REMOVE_AFTER(snip, next); 614 else 615 SLIST_REMOVE_HEAD(&s->src_nodes, next); 616 pool_put(&pf_sn_item_pl, sni); 617 sni = NULL; 618 sn->states--; 619 } 620 if (sni != NULL) 621 snip = sni; 622 } 623 } 624 625 /* state table stuff */ 626 627 static __inline int 628 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 629 { 630 int diff; 631 632 if ((diff = a->proto - b->proto) != 0) 633 return (diff); 634 if ((diff = a->af - b->af) != 0) 635 return (diff); 636 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 637 return (diff); 638 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 639 return (diff); 640 if ((diff = a->port[0] - b->port[0]) != 0) 641 return (diff); 642 if ((diff = a->port[1] - b->port[1]) != 0) 643 return (diff); 644 if ((diff = a->rdomain - b->rdomain) != 0) 645 return (diff); 646 return (0); 647 } 648 649 static __inline int 650 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 651 { 652 if (a->id > b->id) 653 return (1); 654 if (a->id < b->id) 655 return (-1); 656 if (a->creatorid > b->creatorid) 657 return (1); 658 if (a->creatorid < b->creatorid) 659 return (-1); 660 661 return (0); 662 } 663 664 int 665 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 666 { 667 struct pf_state_item *si; 668 struct pf_state_key *cur; 669 struct pf_state *olds = NULL; 670 671 KASSERT(s->key[idx] == NULL); 672 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 673 /* key exists. check for same kif, if none, add to key */ 674 TAILQ_FOREACH(si, &cur->states, entry) 675 if (si->s->kif == s->kif && 676 ((si->s->key[PF_SK_WIRE]->af == sk->af && 677 si->s->direction == s->direction) || 678 (si->s->key[PF_SK_WIRE]->af != 679 si->s->key[PF_SK_STACK]->af && 680 sk->af == si->s->key[PF_SK_STACK]->af && 681 si->s->direction != s->direction))) { 682 int reuse = 0; 683 684 if (sk->proto == IPPROTO_TCP && 685 si->s->src.state >= TCPS_FIN_WAIT_2 && 686 si->s->dst.state >= TCPS_FIN_WAIT_2) 687 reuse = 1; 688 if (pf_status.debug >= LOG_NOTICE) { 689 log(LOG_NOTICE, 690 "pf: %s key attach %s on %s: ", 691 (idx == PF_SK_WIRE) ? 692 "wire" : "stack", 693 reuse ? "reuse" : "failed", 694 s->kif->pfik_name); 695 pf_print_state_parts(s, 696 (idx == PF_SK_WIRE) ? sk : NULL, 697 (idx == PF_SK_STACK) ? sk : NULL); 698 addlog(", existing: "); 699 pf_print_state_parts(si->s, 700 (idx == PF_SK_WIRE) ? sk : NULL, 701 (idx == PF_SK_STACK) ? sk : NULL); 702 addlog("\n"); 703 } 704 if (reuse) { 705 si->s->src.state = si->s->dst.state = 706 TCPS_CLOSED; 707 /* remove late or sks can go away */ 708 olds = si->s; 709 } else { 710 pool_put(&pf_state_key_pl, sk); 711 return (-1); /* collision! */ 712 } 713 } 714 pool_put(&pf_state_key_pl, sk); 715 s->key[idx] = cur; 716 } else 717 s->key[idx] = sk; 718 719 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 720 pf_state_key_detach(s, idx); 721 return (-1); 722 } 723 si->s = s; 724 725 /* list is sorted, if-bound states before floating */ 726 if (s->kif == pfi_all) 727 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 728 else 729 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 730 731 if (olds) 732 pf_remove_state(olds); 733 734 return (0); 735 } 736 737 void 738 pf_detach_state(struct pf_state *s) 739 { 740 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 741 s->key[PF_SK_WIRE] = NULL; 742 743 if (s->key[PF_SK_STACK] != NULL) 744 pf_state_key_detach(s, PF_SK_STACK); 745 746 if (s->key[PF_SK_WIRE] != NULL) 747 pf_state_key_detach(s, PF_SK_WIRE); 748 } 749 750 void 751 pf_state_key_detach(struct pf_state *s, int idx) 752 { 753 struct pf_state_item *si; 754 struct pf_state_key *sk; 755 756 if (s->key[idx] == NULL) 757 return; 758 759 si = TAILQ_FIRST(&s->key[idx]->states); 760 while (si && si->s != s) 761 si = TAILQ_NEXT(si, entry); 762 763 if (si) { 764 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 765 pool_put(&pf_state_item_pl, si); 766 } 767 768 sk = s->key[idx]; 769 s->key[idx] = NULL; 770 if (TAILQ_EMPTY(&sk->states)) { 771 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 772 sk->removed = 1; 773 pf_state_key_unlink_reverse(sk); 774 pf_inpcb_unlink_state_key(sk->inp); 775 pf_state_key_unref(sk); 776 } 777 } 778 779 struct pf_state_key * 780 pf_alloc_state_key(int pool_flags) 781 { 782 struct pf_state_key *sk; 783 784 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 785 return (NULL); 786 TAILQ_INIT(&sk->states); 787 788 return (sk); 789 } 790 791 static __inline int 792 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 793 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 794 { 795 struct pf_state_key_cmp *key = arg; 796 #ifdef INET6 797 struct nd_neighbor_solicit *nd; 798 struct pf_addr *target; 799 800 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 801 goto copy; 802 803 switch (pd->hdr.icmp6->icmp6_type) { 804 case ND_NEIGHBOR_SOLICIT: 805 if (multi) 806 return (-1); 807 nd = (void *)pd->hdr.icmp6; 808 target = (struct pf_addr *)&nd->nd_ns_target; 809 daddr = target; 810 break; 811 case ND_NEIGHBOR_ADVERT: 812 if (multi) 813 return (-1); 814 nd = (void *)pd->hdr.icmp6; 815 target = (struct pf_addr *)&nd->nd_ns_target; 816 saddr = target; 817 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 818 key->addr[didx].addr32[0] = 0; 819 key->addr[didx].addr32[1] = 0; 820 key->addr[didx].addr32[2] = 0; 821 key->addr[didx].addr32[3] = 0; 822 daddr = NULL; /* overwritten */ 823 } 824 break; 825 default: 826 if (multi) { 827 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 828 key->addr[sidx].addr32[1] = 0; 829 key->addr[sidx].addr32[2] = 0; 830 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 831 saddr = NULL; /* overwritten */ 832 } 833 } 834 copy: 835 #endif /* INET6 */ 836 if (saddr) 837 PF_ACPY(&key->addr[sidx], saddr, af); 838 if (daddr) 839 PF_ACPY(&key->addr[didx], daddr, af); 840 841 return (0); 842 } 843 844 int 845 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 846 struct pf_state_key **sks, int rtableid) 847 { 848 /* if returning error we MUST pool_put state keys ourselves */ 849 struct pf_state_key *sk1, *sk2; 850 u_int wrdom = pd->rdomain; 851 int afto = pd->af != pd->naf; 852 853 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 854 return (ENOMEM); 855 856 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 857 pd->af, 0); 858 sk1->port[pd->sidx] = pd->osport; 859 sk1->port[pd->didx] = pd->odport; 860 sk1->proto = pd->proto; 861 sk1->af = pd->af; 862 sk1->rdomain = pd->rdomain; 863 PF_REF_INIT(sk1->refcnt); 864 sk1->removed = 0; 865 if (rtableid >= 0) 866 wrdom = rtable_l2(rtableid); 867 868 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 869 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 870 pd->nsport != pd->osport || pd->ndport != pd->odport || 871 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 872 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 873 pool_put(&pf_state_key_pl, sk1); 874 return (ENOMEM); 875 } 876 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 877 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 878 pd->naf, 0); 879 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 880 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 881 if (afto) { 882 switch (pd->proto) { 883 case IPPROTO_ICMP: 884 sk2->proto = IPPROTO_ICMPV6; 885 break; 886 case IPPROTO_ICMPV6: 887 sk2->proto = IPPROTO_ICMP; 888 break; 889 default: 890 sk2->proto = pd->proto; 891 } 892 } else 893 sk2->proto = pd->proto; 894 sk2->af = pd->naf; 895 sk2->rdomain = wrdom; 896 PF_REF_INIT(sk2->refcnt); 897 sk2->removed = 0; 898 } else 899 sk2 = sk1; 900 901 if (pd->dir == PF_IN) { 902 *skw = sk1; 903 *sks = sk2; 904 } else { 905 *sks = sk1; 906 *skw = sk2; 907 } 908 909 if (pf_status.debug >= LOG_DEBUG) { 910 log(LOG_DEBUG, "pf: key setup: "); 911 pf_print_state_parts(NULL, *skw, *sks); 912 addlog("\n"); 913 } 914 915 return (0); 916 } 917 918 int 919 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, 920 struct pf_state_key **sks, struct pf_state *s) 921 { 922 splsoftassert(IPL_SOFTNET); 923 924 s->kif = kif; 925 if (*skw == *sks) { 926 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) 927 return (-1); 928 *skw = *sks = s->key[PF_SK_WIRE]; 929 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 930 } else { 931 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 932 pool_put(&pf_state_key_pl, *sks); 933 return (-1); 934 } 935 *skw = s->key[PF_SK_WIRE]; 936 if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { 937 pf_state_key_detach(s, PF_SK_WIRE); 938 return (-1); 939 } 940 *sks = s->key[PF_SK_STACK]; 941 } 942 943 if (s->id == 0 && s->creatorid == 0) { 944 s->id = htobe64(pf_status.stateid++); 945 s->creatorid = pf_status.hostid; 946 } 947 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 948 if (pf_status.debug >= LOG_NOTICE) { 949 log(LOG_NOTICE, "pf: state insert failed: " 950 "id: %016llx creatorid: %08x", 951 betoh64(s->id), ntohl(s->creatorid)); 952 addlog("\n"); 953 } 954 pf_detach_state(s); 955 return (-1); 956 } 957 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 958 pf_status.fcounters[FCNT_STATE_INSERT]++; 959 pf_status.states++; 960 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 961 #if NPFSYNC > 0 962 pfsync_insert_state(s); 963 #endif /* NPFSYNC > 0 */ 964 return (0); 965 } 966 967 struct pf_state * 968 pf_find_state_byid(struct pf_state_cmp *key) 969 { 970 pf_status.fcounters[FCNT_STATE_SEARCH]++; 971 972 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 973 } 974 975 int 976 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 977 struct pfi_kif *kif, u_int dir) 978 { 979 /* a (from hdr) and b (new) must be exact opposites of each other */ 980 if (a->af == b->af && a->proto == b->proto && 981 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 982 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 983 a->port[0] == b->port[1] && 984 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 985 return (0); 986 else { 987 /* mismatch. must not happen. */ 988 if (pf_status.debug >= LOG_ERR) { 989 log(LOG_ERR, 990 "pf: state key linking mismatch! dir=%s, " 991 "if=%s, stored af=%u, a0: ", 992 dir == PF_OUT ? "OUT" : "IN", 993 kif->pfik_name, a->af); 994 pf_print_host(&a->addr[0], a->port[0], a->af); 995 addlog(", a1: "); 996 pf_print_host(&a->addr[1], a->port[1], a->af); 997 addlog(", proto=%u", a->proto); 998 addlog(", found af=%u, a0: ", b->af); 999 pf_print_host(&b->addr[0], b->port[0], b->af); 1000 addlog(", a1: "); 1001 pf_print_host(&b->addr[1], b->port[1], b->af); 1002 addlog(", proto=%u", b->proto); 1003 addlog("\n"); 1004 } 1005 return (-1); 1006 } 1007 } 1008 1009 struct pf_state * 1010 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, 1011 struct mbuf *m) 1012 { 1013 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1014 struct pf_state_item *si; 1015 1016 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1017 if (pf_status.debug >= LOG_DEBUG) { 1018 log(LOG_DEBUG, "pf: key search, if=%s: ", kif->pfik_name); 1019 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1020 addlog("\n"); 1021 } 1022 1023 inp_sk = NULL; 1024 pkt_sk = NULL; 1025 sk = NULL; 1026 if (dir == PF_OUT) { 1027 /* first if block deals with outbound forwarded packet */ 1028 pkt_sk = m->m_pkthdr.pf.statekey; 1029 if (pf_state_key_isvalid(pkt_sk) && 1030 pf_state_key_isvalid(pkt_sk->reverse)) { 1031 sk = pkt_sk->reverse; 1032 } else { 1033 pf_pkt_unlink_state_key(m); 1034 pkt_sk = NULL; 1035 } 1036 1037 if (pkt_sk == NULL) { 1038 /* here we deal with local outbound packet */ 1039 if (m->m_pkthdr.pf.inp != NULL) { 1040 inp_sk = m->m_pkthdr.pf.inp->inp_pf_sk; 1041 if (pf_state_key_isvalid(inp_sk)) 1042 sk = inp_sk; 1043 else 1044 pf_inpcb_unlink_state_key( 1045 m->m_pkthdr.pf.inp); 1046 } 1047 } 1048 } 1049 1050 if (sk == NULL) { 1051 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1052 (struct pf_state_key *)key)) == NULL) 1053 return (NULL); 1054 if (dir == PF_OUT && pkt_sk && 1055 pf_compare_state_keys(pkt_sk, sk, kif, dir) == 0) 1056 pf_state_key_link(sk, pkt_sk); 1057 else if (dir == PF_OUT) 1058 pf_inp_link(m, m->m_pkthdr.pf.inp); 1059 } 1060 1061 /* remove firewall data from outbound packet */ 1062 if (dir == PF_OUT) 1063 pf_pkt_addr_changed(m); 1064 1065 /* list is sorted, if-bound states before floating ones */ 1066 TAILQ_FOREACH(si, &sk->states, entry) 1067 if ((si->s->kif == pfi_all || si->s->kif == kif) && 1068 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1069 && sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1070 si->s->key[PF_SK_STACK])) || 1071 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1072 && dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1073 sk == si->s->key[PF_SK_WIRE])))) 1074 return (si->s); 1075 1076 return (NULL); 1077 } 1078 1079 struct pf_state * 1080 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1081 { 1082 struct pf_state_key *sk; 1083 struct pf_state_item *si, *ret = NULL; 1084 1085 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1086 1087 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1088 1089 if (sk != NULL) { 1090 TAILQ_FOREACH(si, &sk->states, entry) 1091 if (dir == PF_INOUT || 1092 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1093 si->s->key[PF_SK_STACK]))) { 1094 if (more == NULL) 1095 return (si->s); 1096 1097 if (ret) 1098 (*more)++; 1099 else 1100 ret = si; 1101 } 1102 } 1103 return (ret ? ret->s : NULL); 1104 } 1105 1106 void 1107 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1108 { 1109 int32_t expire; 1110 1111 bzero(sp, sizeof(struct pfsync_state)); 1112 1113 /* copy from state key */ 1114 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1115 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1116 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1117 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1118 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1119 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1120 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1121 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1122 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1123 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1124 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1125 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1126 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1127 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1128 sp->proto = st->key[PF_SK_WIRE]->proto; 1129 sp->af = st->key[PF_SK_WIRE]->af; 1130 1131 /* copy from state */ 1132 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1133 memcpy(&sp->rt_addr, &st->rt_addr, sizeof(sp->rt_addr)); 1134 sp->creation = htonl(time_uptime - st->creation); 1135 expire = pf_state_expires(st); 1136 if (expire <= time_uptime) 1137 sp->expire = htonl(0); 1138 else 1139 sp->expire = htonl(expire - time_uptime); 1140 1141 sp->direction = st->direction; 1142 #if NPFLOG > 0 1143 sp->log = st->log; 1144 #endif /* NPFLOG > 0 */ 1145 sp->timeout = st->timeout; 1146 sp->state_flags = htons(st->state_flags); 1147 if (!SLIST_EMPTY(&st->src_nodes)) 1148 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1149 1150 sp->id = st->id; 1151 sp->creatorid = st->creatorid; 1152 pf_state_peer_hton(&st->src, &sp->src); 1153 pf_state_peer_hton(&st->dst, &sp->dst); 1154 1155 if (st->rule.ptr == NULL) 1156 sp->rule = htonl(-1); 1157 else 1158 sp->rule = htonl(st->rule.ptr->nr); 1159 if (st->anchor.ptr == NULL) 1160 sp->anchor = htonl(-1); 1161 else 1162 sp->anchor = htonl(st->anchor.ptr->nr); 1163 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1164 1165 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1166 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1167 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1168 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1169 1170 sp->max_mss = htons(st->max_mss); 1171 sp->min_ttl = st->min_ttl; 1172 sp->set_tos = st->set_tos; 1173 sp->set_prio[0] = st->set_prio[0]; 1174 sp->set_prio[1] = st->set_prio[1]; 1175 } 1176 1177 /* END state table stuff */ 1178 1179 void 1180 pf_purge_expired_rules(int locked) 1181 { 1182 struct pf_rule *r; 1183 1184 if (SLIST_EMPTY(&pf_rule_gcl)) 1185 return; 1186 1187 if (!locked) 1188 rw_enter_write(&pf_consistency_lock); 1189 else 1190 rw_assert_wrlock(&pf_consistency_lock); 1191 1192 while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { 1193 SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); 1194 KASSERT(r->rule_flag & PFRULE_EXPIRED); 1195 pf_purge_rule(r); 1196 } 1197 1198 if (!locked) 1199 rw_exit_write(&pf_consistency_lock); 1200 } 1201 1202 void 1203 pf_purge_thread(void *v) 1204 { 1205 int nloops = 0, s; 1206 1207 for (;;) { 1208 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); 1209 1210 s = splsoftnet(); 1211 1212 /* process a fraction of the state table every second */ 1213 pf_purge_expired_states(1 + (pf_status.states 1214 / pf_default_rule.timeout[PFTM_INTERVAL])); 1215 1216 /* purge other expired types every PFTM_INTERVAL seconds */ 1217 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1218 pf_purge_expired_fragments(); 1219 pf_purge_expired_src_nodes(0); 1220 pf_purge_expired_rules(0); 1221 nloops = 0; 1222 } 1223 1224 splx(s); 1225 } 1226 } 1227 1228 int32_t 1229 pf_state_expires(const struct pf_state *state) 1230 { 1231 int32_t timeout; 1232 u_int32_t start; 1233 u_int32_t end; 1234 u_int32_t states; 1235 1236 /* handle all PFTM_* > PFTM_MAX here */ 1237 if (state->timeout == PFTM_PURGE) 1238 return (0); 1239 1240 KASSERT(state->timeout != PFTM_UNLINKED); 1241 KASSERT(state->timeout < PFTM_MAX); 1242 1243 timeout = state->rule.ptr->timeout[state->timeout]; 1244 if (!timeout) 1245 timeout = pf_default_rule.timeout[state->timeout]; 1246 1247 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1248 if (start) { 1249 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1250 states = state->rule.ptr->states_cur; 1251 } else { 1252 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1253 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1254 states = pf_status.states; 1255 } 1256 if (end && states > start && start < end) { 1257 if (states >= end) 1258 return (0); 1259 1260 timeout = timeout * (end - states) / (end - start); 1261 } 1262 1263 return (state->expire + timeout); 1264 } 1265 1266 void 1267 pf_purge_expired_src_nodes(int waslocked) 1268 { 1269 struct pf_src_node *cur, *next; 1270 int locked = waslocked; 1271 1272 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1273 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1274 1275 if (cur->states == 0 && cur->expire <= time_uptime) { 1276 if (! locked) { 1277 rw_enter_write(&pf_consistency_lock); 1278 next = RB_NEXT(pf_src_tree, 1279 &tree_src_tracking, cur); 1280 locked = 1; 1281 } 1282 pf_remove_src_node(cur); 1283 } 1284 } 1285 1286 if (locked && !waslocked) 1287 rw_exit_write(&pf_consistency_lock); 1288 } 1289 1290 void 1291 pf_src_tree_remove_state(struct pf_state *s) 1292 { 1293 u_int32_t timeout; 1294 struct pf_sn_item *sni; 1295 1296 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1297 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1298 if (s->src.tcp_est) 1299 --sni->sn->conn; 1300 if (--sni->sn->states == 0) { 1301 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1302 if (!timeout) 1303 timeout = 1304 pf_default_rule.timeout[PFTM_SRC_NODE]; 1305 sni->sn->expire = time_uptime + timeout; 1306 } 1307 pool_put(&pf_sn_item_pl, sni); 1308 } 1309 } 1310 1311 /* callers should be at splsoftnet */ 1312 void 1313 pf_remove_state(struct pf_state *cur) 1314 { 1315 splsoftassert(IPL_SOFTNET); 1316 1317 /* handle load balancing related tasks */ 1318 pf_postprocess_addr(cur); 1319 1320 if (cur->src.state == PF_TCPS_PROXY_DST) { 1321 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1322 &cur->key[PF_SK_WIRE]->addr[1], 1323 &cur->key[PF_SK_WIRE]->addr[0], 1324 cur->key[PF_SK_WIRE]->port[1], 1325 cur->key[PF_SK_WIRE]->port[0], 1326 cur->src.seqhi, cur->src.seqlo + 1, 1327 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1328 cur->key[PF_SK_WIRE]->rdomain); 1329 } 1330 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1331 #if NPFLOW > 0 1332 if (cur->state_flags & PFSTATE_PFLOW) 1333 export_pflow(cur); 1334 #endif /* NPFLOW > 0 */ 1335 #if NPFSYNC > 0 1336 pfsync_delete_state(cur); 1337 #endif /* NPFSYNC > 0 */ 1338 cur->timeout = PFTM_UNLINKED; 1339 pf_src_tree_remove_state(cur); 1340 pf_detach_state(cur); 1341 } 1342 1343 void 1344 pf_remove_divert_state(struct pf_state_key *sk) 1345 { 1346 struct pf_state_item *si; 1347 1348 TAILQ_FOREACH(si, &sk->states, entry) { 1349 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 1350 si->s->rule.ptr->divert.port) { 1351 pf_remove_state(si->s); 1352 break; 1353 } 1354 } 1355 } 1356 1357 /* callers should be at splsoftnet and hold the 1358 * write_lock on pf_consistency_lock */ 1359 void 1360 pf_free_state(struct pf_state *cur) 1361 { 1362 struct pf_rule_item *ri; 1363 1364 splsoftassert(IPL_SOFTNET); 1365 1366 #if NPFSYNC > 0 1367 if (pfsync_state_in_use(cur)) 1368 return; 1369 #endif /* NPFSYNC > 0 */ 1370 KASSERT(cur->timeout == PFTM_UNLINKED); 1371 if (--cur->rule.ptr->states_cur == 0 && 1372 cur->rule.ptr->src_nodes == 0) 1373 pf_rm_rule(NULL, cur->rule.ptr); 1374 if (cur->anchor.ptr != NULL) 1375 if (--cur->anchor.ptr->states_cur == 0) 1376 pf_rm_rule(NULL, cur->anchor.ptr); 1377 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1378 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1379 if (--ri->r->states_cur == 0 && 1380 ri->r->src_nodes == 0) 1381 pf_rm_rule(NULL, ri->r); 1382 pool_put(&pf_rule_item_pl, ri); 1383 } 1384 pf_normalize_tcp_cleanup(cur); 1385 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1386 TAILQ_REMOVE(&state_list, cur, entry_list); 1387 if (cur->tag) 1388 pf_tag_unref(cur->tag); 1389 pool_put(&pf_state_pl, cur); 1390 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1391 pf_status.states--; 1392 } 1393 1394 void 1395 pf_purge_expired_states(u_int32_t maxcheck) 1396 { 1397 static struct pf_state *cur = NULL; 1398 struct pf_state *next; 1399 int locked = 0; 1400 1401 while (maxcheck--) { 1402 /* wrap to start of list when we hit the end */ 1403 if (cur == NULL) { 1404 cur = TAILQ_FIRST(&state_list); 1405 if (cur == NULL) 1406 break; /* list empty */ 1407 } 1408 1409 /* get next state, as cur may get deleted */ 1410 next = TAILQ_NEXT(cur, entry_list); 1411 1412 if (cur->timeout == PFTM_UNLINKED) { 1413 /* free removed state */ 1414 if (! locked) { 1415 rw_enter_write(&pf_consistency_lock); 1416 locked = 1; 1417 } 1418 pf_free_state(cur); 1419 } else if (pf_state_expires(cur) <= time_uptime) { 1420 /* remove and free expired state */ 1421 pf_remove_state(cur); 1422 if (! locked) { 1423 rw_enter_write(&pf_consistency_lock); 1424 locked = 1; 1425 } 1426 pf_free_state(cur); 1427 } 1428 cur = next; 1429 } 1430 1431 if (locked) 1432 rw_exit_write(&pf_consistency_lock); 1433 } 1434 1435 int 1436 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1437 { 1438 if (aw->type != PF_ADDR_TABLE) 1439 return (0); 1440 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1441 return (1); 1442 return (0); 1443 } 1444 1445 void 1446 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1447 { 1448 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1449 return; 1450 pfr_detach_table(aw->p.tbl); 1451 aw->p.tbl = NULL; 1452 } 1453 1454 void 1455 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1456 { 1457 struct pfr_ktable *kt = aw->p.tbl; 1458 1459 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1460 return; 1461 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1462 kt = kt->pfrkt_root; 1463 aw->p.tbl = NULL; 1464 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1465 kt->pfrkt_cnt : -1; 1466 } 1467 1468 void 1469 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1470 { 1471 switch (af) { 1472 case AF_INET: { 1473 u_int32_t a = ntohl(addr->addr32[0]); 1474 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1475 (a>>8)&255, a&255); 1476 if (p) { 1477 p = ntohs(p); 1478 addlog(":%u", p); 1479 } 1480 break; 1481 } 1482 #ifdef INET6 1483 case AF_INET6: { 1484 u_int16_t b; 1485 u_int8_t i, curstart, curend, maxstart, maxend; 1486 curstart = curend = maxstart = maxend = 255; 1487 for (i = 0; i < 8; i++) { 1488 if (!addr->addr16[i]) { 1489 if (curstart == 255) 1490 curstart = i; 1491 curend = i; 1492 } else { 1493 if ((curend - curstart) > 1494 (maxend - maxstart)) { 1495 maxstart = curstart; 1496 maxend = curend; 1497 } 1498 curstart = curend = 255; 1499 } 1500 } 1501 if ((curend - curstart) > 1502 (maxend - maxstart)) { 1503 maxstart = curstart; 1504 maxend = curend; 1505 } 1506 for (i = 0; i < 8; i++) { 1507 if (i >= maxstart && i <= maxend) { 1508 if (i == 0) 1509 addlog(":"); 1510 if (i == maxend) 1511 addlog(":"); 1512 } else { 1513 b = ntohs(addr->addr16[i]); 1514 addlog("%x", b); 1515 if (i < 7) 1516 addlog(":"); 1517 } 1518 } 1519 if (p) { 1520 p = ntohs(p); 1521 addlog("[%u]", p); 1522 } 1523 break; 1524 } 1525 #endif /* INET6 */ 1526 } 1527 } 1528 1529 void 1530 pf_print_state(struct pf_state *s) 1531 { 1532 pf_print_state_parts(s, NULL, NULL); 1533 } 1534 1535 void 1536 pf_print_state_parts(struct pf_state *s, 1537 struct pf_state_key *skwp, struct pf_state_key *sksp) 1538 { 1539 struct pf_state_key *skw, *sks; 1540 u_int8_t proto, dir; 1541 1542 /* Do our best to fill these, but they're skipped if NULL */ 1543 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1544 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1545 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1546 dir = s ? s->direction : 0; 1547 1548 switch (proto) { 1549 case IPPROTO_IPV4: 1550 addlog("IPv4"); 1551 break; 1552 case IPPROTO_IPV6: 1553 addlog("IPv6"); 1554 break; 1555 case IPPROTO_TCP: 1556 addlog("TCP"); 1557 break; 1558 case IPPROTO_UDP: 1559 addlog("UDP"); 1560 break; 1561 case IPPROTO_ICMP: 1562 addlog("ICMP"); 1563 break; 1564 case IPPROTO_ICMPV6: 1565 addlog("ICMPv6"); 1566 break; 1567 default: 1568 addlog("%u", proto); 1569 break; 1570 } 1571 switch (dir) { 1572 case PF_IN: 1573 addlog(" in"); 1574 break; 1575 case PF_OUT: 1576 addlog(" out"); 1577 break; 1578 } 1579 if (skw) { 1580 addlog(" wire: (%d) ", skw->rdomain); 1581 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1582 addlog(" "); 1583 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1584 } 1585 if (sks) { 1586 addlog(" stack: (%d) ", sks->rdomain); 1587 if (sks != skw) { 1588 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1589 addlog(" "); 1590 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1591 } else 1592 addlog("-"); 1593 } 1594 if (s) { 1595 if (proto == IPPROTO_TCP) { 1596 addlog(" [lo=%u high=%u win=%u modulator=%u", 1597 s->src.seqlo, s->src.seqhi, 1598 s->src.max_win, s->src.seqdiff); 1599 if (s->src.wscale && s->dst.wscale) 1600 addlog(" wscale=%u", 1601 s->src.wscale & PF_WSCALE_MASK); 1602 addlog("]"); 1603 addlog(" [lo=%u high=%u win=%u modulator=%u", 1604 s->dst.seqlo, s->dst.seqhi, 1605 s->dst.max_win, s->dst.seqdiff); 1606 if (s->src.wscale && s->dst.wscale) 1607 addlog(" wscale=%u", 1608 s->dst.wscale & PF_WSCALE_MASK); 1609 addlog("]"); 1610 } 1611 addlog(" %u:%u", s->src.state, s->dst.state); 1612 if (s->rule.ptr) 1613 addlog(" @%d", s->rule.ptr->nr); 1614 } 1615 } 1616 1617 void 1618 pf_print_flags(u_int8_t f) 1619 { 1620 if (f) 1621 addlog(" "); 1622 if (f & TH_FIN) 1623 addlog("F"); 1624 if (f & TH_SYN) 1625 addlog("S"); 1626 if (f & TH_RST) 1627 addlog("R"); 1628 if (f & TH_PUSH) 1629 addlog("P"); 1630 if (f & TH_ACK) 1631 addlog("A"); 1632 if (f & TH_URG) 1633 addlog("U"); 1634 if (f & TH_ECE) 1635 addlog("E"); 1636 if (f & TH_CWR) 1637 addlog("W"); 1638 } 1639 1640 #define PF_SET_SKIP_STEPS(i) \ 1641 do { \ 1642 while (head[i] != cur) { \ 1643 head[i]->skip[i].ptr = cur; \ 1644 head[i] = TAILQ_NEXT(head[i], entries); \ 1645 } \ 1646 } while (0) 1647 1648 void 1649 pf_calc_skip_steps(struct pf_rulequeue *rules) 1650 { 1651 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1652 int i; 1653 1654 cur = TAILQ_FIRST(rules); 1655 prev = cur; 1656 for (i = 0; i < PF_SKIP_COUNT; ++i) 1657 head[i] = cur; 1658 while (cur != NULL) { 1659 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1660 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1661 if (cur->direction != prev->direction) 1662 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1663 if (cur->onrdomain != prev->onrdomain || 1664 cur->ifnot != prev->ifnot) 1665 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1666 if (cur->af != prev->af) 1667 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1668 if (cur->proto != prev->proto) 1669 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1670 if (cur->src.neg != prev->src.neg || 1671 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1672 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1673 if (cur->dst.neg != prev->dst.neg || 1674 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1675 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1676 if (cur->src.port[0] != prev->src.port[0] || 1677 cur->src.port[1] != prev->src.port[1] || 1678 cur->src.port_op != prev->src.port_op) 1679 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1680 if (cur->dst.port[0] != prev->dst.port[0] || 1681 cur->dst.port[1] != prev->dst.port[1] || 1682 cur->dst.port_op != prev->dst.port_op) 1683 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1684 1685 prev = cur; 1686 cur = TAILQ_NEXT(cur, entries); 1687 } 1688 for (i = 0; i < PF_SKIP_COUNT; ++i) 1689 PF_SET_SKIP_STEPS(i); 1690 } 1691 1692 int 1693 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1694 { 1695 if (aw1->type != aw2->type) 1696 return (1); 1697 switch (aw1->type) { 1698 case PF_ADDR_ADDRMASK: 1699 case PF_ADDR_RANGE: 1700 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1701 return (1); 1702 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1703 return (1); 1704 return (0); 1705 case PF_ADDR_DYNIFTL: 1706 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1707 case PF_ADDR_NONE: 1708 case PF_ADDR_NOROUTE: 1709 case PF_ADDR_URPFFAILED: 1710 return (0); 1711 case PF_ADDR_TABLE: 1712 return (aw1->p.tbl != aw2->p.tbl); 1713 case PF_ADDR_RTLABEL: 1714 return (aw1->v.rtlabel != aw2->v.rtlabel); 1715 default: 1716 addlog("invalid address type: %d\n", aw1->type); 1717 return (1); 1718 } 1719 } 1720 1721 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 1722 * emulate at most one ones-complement subtraction. This thereby limits net 1723 * carries/borrows to at most one, eliminating a reduction step and saving one 1724 * each of +, >>, & and ~. 1725 * 1726 * def. x mod y = x - (x//y)*y for integer x,y 1727 * def. sum = x mod 2^16 1728 * def. accumulator = (x >> 16) mod 2^16 1729 * 1730 * The trick works as follows: subtracting exactly one u_int16_t from the 1731 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 1732 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 1733 * ones-complement borrow: 1734 * 1735 * (sum + accumulator) mod 2^16 1736 * = { assume underflow: accumulator := 2^16 - 1 } 1737 * (sum + 2^16 - 1) mod 2^16 1738 * = { mod } 1739 * (sum - 1) mod 2^16 1740 * 1741 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 1742 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 1743 * to zero as that requires subtraction of at least 2^16, which exceeds a 1744 * single u_int16_t's range. 1745 * 1746 * We use the following theorem to derive the implementation: 1747 * 1748 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 1749 * proof. 1750 * (x + (y mod z)) mod z 1751 * = { def mod } 1752 * (x + y - (y//z)*z) mod z 1753 * = { (a + b*c) mod c = a mod c } 1754 * (x + y) mod z [end of proof] 1755 * 1756 * ... and thereby obtain: 1757 * 1758 * (sum + accumulator) mod 2^16 1759 * = { def. accumulator, def. sum } 1760 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 1761 * = { (0), twice } 1762 * (x + (x >> 16)) mod 2^16 1763 * = { x mod 2^n = x & (2^n - 1) } 1764 * (x + (x >> 16)) & 0xffff 1765 * 1766 * Note: this serves also as a reduction step for at most one add (as the 1767 * trailing mod 2^16 prevents further reductions by destroying carries). 1768 */ 1769 static __inline void 1770 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 1771 u_int8_t proto) 1772 { 1773 u_int32_t x; 1774 const int udp = proto == IPPROTO_UDP; 1775 1776 x = *cksum + was - now; 1777 x = (x + (x >> 16)) & 0xffff; 1778 1779 /* optimise: eliminate a branch when not udp */ 1780 if (udp && *cksum == 0x0000) 1781 return; 1782 if (udp && x == 0x0000) 1783 x = 0xffff; 1784 1785 *cksum = (u_int16_t)(x); 1786 } 1787 1788 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 1789 static __inline void 1790 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 1791 { 1792 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 1793 } 1794 1795 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 1796 static __inline void 1797 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 1798 { 1799 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 1800 } 1801 1802 /* pre: *a is 16-bit aligned within its packet 1803 * 1804 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 1805 * machine by conserving ones-complement's otherwise discarded carries in the 1806 * upper bits of x. These accumulated carries when added to the lower 16-bits 1807 * over at least zero 'reduction' steps then complete the ones-complement sum. 1808 * 1809 * def. sum = x mod 2^16 1810 * def. accumulator = (x >> 16) 1811 * 1812 * At most two reduction steps 1813 * 1814 * x := sum + accumulator 1815 * = { def sum, def accumulator } 1816 * x := x mod 2^16 + (x >> 16) 1817 * = { x mod 2^n = x & (2^n - 1) } 1818 * x := (x & 0xffff) + (x >> 16) 1819 * 1820 * are necessary to incorporate the accumulated carries (at most one per add) 1821 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 1822 * 1823 * The function is also invariant over the endian of the host. Why? 1824 * 1825 * Define the unary transpose operator ~ on a bitstring in python slice 1826 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 1827 * 1828 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 1829 * 1830 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 1831 * 1832 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 1833 * 'half-adds'. Under ones-complement addition, each half-add carries to the 1834 * other, so the sum of each half-add is unaffected by their relative 1835 * order. Therefore: 1836 * 1837 * ~m +_1 ~n 1838 * = { half-adds invariant under transposition } 1839 * ~s 1840 * = { substitute } 1841 * ~(m +_1 n) [end of proof] 1842 * 1843 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 1844 * with the converse endian does not alter the result. 1845 * 1846 * proof. 1847 * { converse machine endian: load/store transposes, P := 8 } 1848 * ~(~m +_1 ~n) 1849 * = { ~ over +_1 } 1850 * ~~m +_1 ~~n 1851 * = { ~ is an involution } 1852 * m +_1 n [end of proof] 1853 * 1854 */ 1855 #define NEG(x) ((u_int16_t)~(x)) 1856 void 1857 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 1858 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 1859 { 1860 u_int32_t x; 1861 const u_int16_t *n = an->addr16; 1862 const u_int16_t *o = a->addr16; 1863 const int udp = proto == IPPROTO_UDP; 1864 1865 switch (af) { 1866 case AF_INET: 1867 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 1868 break; 1869 #ifdef INET6 1870 case AF_INET6: 1871 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 1872 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 1873 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 1874 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 1875 break; 1876 #endif /* INET6 */ 1877 default: 1878 unhandled_af(af); 1879 } 1880 1881 x = (x & 0xffff) + (x >> 16); 1882 x = (x & 0xffff) + (x >> 16); 1883 1884 /* optimise: eliminate a branch when not udp */ 1885 if (udp && *cksum == 0x0000) 1886 return; 1887 if (udp && x == 0x0000) 1888 x = 0xffff; 1889 1890 *cksum = (u_int16_t)(x); 1891 } 1892 1893 int 1894 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 1895 { 1896 int rewrite = 0; 1897 1898 if (*f != v) { 1899 u_int16_t old = htons(hi ? (*f << 8) : *f); 1900 u_int16_t new = htons(hi ? ( v << 8) : v); 1901 1902 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 1903 *f = v; 1904 rewrite = 1; 1905 } 1906 1907 return (rewrite); 1908 } 1909 1910 /* pre: *f is 16-bit aligned within its packet */ 1911 int 1912 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 1913 { 1914 int rewrite = 0; 1915 1916 if (*f != v) { 1917 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 1918 *f = v; 1919 rewrite = 1; 1920 } 1921 1922 return (rewrite); 1923 } 1924 1925 int 1926 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 1927 { 1928 int rewrite = 0; 1929 u_int8_t *fb = (u_int8_t*)f; 1930 u_int8_t *vb = (u_int8_t*)&v; 1931 1932 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 1933 return (pf_patch_16(pd, f, v)); /* optimise */ 1934 } 1935 1936 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 1937 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 1938 1939 return (rewrite); 1940 } 1941 1942 /* pre: *f is 16-bit aligned within its packet */ 1943 /* pre: pd->proto != IPPROTO_UDP */ 1944 int 1945 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 1946 { 1947 int rewrite = 0; 1948 u_int16_t *pc = pd->pcksum; 1949 u_int8_t proto = pd->proto; 1950 1951 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 1952 if (proto == IPPROTO_UDP) 1953 panic("pf_patch_32: udp"); 1954 1955 /* optimise: skip *f != v guard; true for all use-cases */ 1956 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 1957 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 1958 1959 *f = v; 1960 rewrite = 1; 1961 1962 return (rewrite); 1963 } 1964 1965 int 1966 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 1967 { 1968 int rewrite = 0; 1969 u_int8_t *fb = (u_int8_t*)f; 1970 u_int8_t *vb = (u_int8_t*)&v; 1971 1972 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 1973 return (pf_patch_32(pd, f, v)); /* optimise */ 1974 } 1975 1976 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 1977 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 1978 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 1979 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 1980 1981 return (rewrite); 1982 } 1983 1984 int 1985 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 1986 u_int16_t *virtual_id, u_int16_t *virtual_type) 1987 { 1988 /* 1989 * ICMP types marked with PF_OUT are typically responses to 1990 * PF_IN, and will match states in the opposite direction. 1991 * PF_IN ICMP types need to match a state with that type. 1992 */ 1993 *icmp_dir = PF_OUT; 1994 1995 /* Queries (and responses) */ 1996 switch (pd->af) { 1997 case AF_INET: 1998 switch (type) { 1999 case ICMP_ECHO: 2000 *icmp_dir = PF_IN; 2001 /* FALLTHROUGH */ 2002 case ICMP_ECHOREPLY: 2003 *virtual_type = ICMP_ECHO; 2004 *virtual_id = pd->hdr.icmp->icmp_id; 2005 break; 2006 2007 case ICMP_TSTAMP: 2008 *icmp_dir = PF_IN; 2009 /* FALLTHROUGH */ 2010 case ICMP_TSTAMPREPLY: 2011 *virtual_type = ICMP_TSTAMP; 2012 *virtual_id = pd->hdr.icmp->icmp_id; 2013 break; 2014 2015 case ICMP_IREQ: 2016 *icmp_dir = PF_IN; 2017 /* FALLTHROUGH */ 2018 case ICMP_IREQREPLY: 2019 *virtual_type = ICMP_IREQ; 2020 *virtual_id = pd->hdr.icmp->icmp_id; 2021 break; 2022 2023 case ICMP_MASKREQ: 2024 *icmp_dir = PF_IN; 2025 /* FALLTHROUGH */ 2026 case ICMP_MASKREPLY: 2027 *virtual_type = ICMP_MASKREQ; 2028 *virtual_id = pd->hdr.icmp->icmp_id; 2029 break; 2030 2031 case ICMP_IPV6_WHEREAREYOU: 2032 *icmp_dir = PF_IN; 2033 /* FALLTHROUGH */ 2034 case ICMP_IPV6_IAMHERE: 2035 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2036 *virtual_id = 0; /* Nothing sane to match on! */ 2037 break; 2038 2039 case ICMP_MOBILE_REGREQUEST: 2040 *icmp_dir = PF_IN; 2041 /* FALLTHROUGH */ 2042 case ICMP_MOBILE_REGREPLY: 2043 *virtual_type = ICMP_MOBILE_REGREQUEST; 2044 *virtual_id = 0; /* Nothing sane to match on! */ 2045 break; 2046 2047 case ICMP_ROUTERSOLICIT: 2048 *icmp_dir = PF_IN; 2049 /* FALLTHROUGH */ 2050 case ICMP_ROUTERADVERT: 2051 *virtual_type = ICMP_ROUTERSOLICIT; 2052 *virtual_id = 0; /* Nothing sane to match on! */ 2053 break; 2054 2055 /* These ICMP types map to other connections */ 2056 case ICMP_UNREACH: 2057 case ICMP_SOURCEQUENCH: 2058 case ICMP_REDIRECT: 2059 case ICMP_TIMXCEED: 2060 case ICMP_PARAMPROB: 2061 /* These will not be used, but set them anyway */ 2062 *icmp_dir = PF_IN; 2063 *virtual_type = htons(type); 2064 *virtual_id = 0; 2065 return (1); /* These types match to another state */ 2066 2067 /* 2068 * All remaining ICMP types get their own states, 2069 * and will only match in one direction. 2070 */ 2071 default: 2072 *icmp_dir = PF_IN; 2073 *virtual_type = type; 2074 *virtual_id = 0; 2075 break; 2076 } 2077 break; 2078 #ifdef INET6 2079 case AF_INET6: 2080 switch (type) { 2081 case ICMP6_ECHO_REQUEST: 2082 *icmp_dir = PF_IN; 2083 /* FALLTHROUGH */ 2084 case ICMP6_ECHO_REPLY: 2085 *virtual_type = ICMP6_ECHO_REQUEST; 2086 *virtual_id = pd->hdr.icmp6->icmp6_id; 2087 break; 2088 2089 case MLD_LISTENER_QUERY: 2090 *icmp_dir = PF_IN; 2091 /* FALLTHROUGH */ 2092 case MLD_LISTENER_REPORT: { 2093 struct mld_hdr *mld = (void *)pd->hdr.icmp6; 2094 u_int32_t h; 2095 2096 *virtual_type = MLD_LISTENER_QUERY; 2097 /* generate fake id for these messages */ 2098 h = mld->mld_addr.s6_addr32[0] ^ 2099 mld->mld_addr.s6_addr32[1] ^ 2100 mld->mld_addr.s6_addr32[2] ^ 2101 mld->mld_addr.s6_addr32[3]; 2102 *virtual_id = (h >> 16) ^ (h & 0xffff); 2103 break; 2104 } 2105 2106 /* 2107 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2108 * ICMP6_WRU 2109 */ 2110 case ICMP6_WRUREQUEST: 2111 *icmp_dir = PF_IN; 2112 /* FALLTHROUGH */ 2113 case ICMP6_WRUREPLY: 2114 *virtual_type = ICMP6_WRUREQUEST; 2115 *virtual_id = 0; /* Nothing sane to match on! */ 2116 break; 2117 2118 case MLD_MTRACE: 2119 *icmp_dir = PF_IN; 2120 /* FALLTHROUGH */ 2121 case MLD_MTRACE_RESP: 2122 *virtual_type = MLD_MTRACE; 2123 *virtual_id = 0; /* Nothing sane to match on! */ 2124 break; 2125 2126 case ND_NEIGHBOR_SOLICIT: 2127 *icmp_dir = PF_IN; 2128 /* FALLTHROUGH */ 2129 case ND_NEIGHBOR_ADVERT: { 2130 struct nd_neighbor_solicit *nd = (void *)pd->hdr.icmp6; 2131 u_int32_t h; 2132 2133 *virtual_type = ND_NEIGHBOR_SOLICIT; 2134 /* generate fake id for these messages */ 2135 h = nd->nd_ns_target.s6_addr32[0] ^ 2136 nd->nd_ns_target.s6_addr32[1] ^ 2137 nd->nd_ns_target.s6_addr32[2] ^ 2138 nd->nd_ns_target.s6_addr32[3]; 2139 *virtual_id = (h >> 16) ^ (h & 0xffff); 2140 break; 2141 } 2142 2143 /* 2144 * These ICMP types map to other connections. 2145 * ND_REDIRECT can't be in this list because the triggering 2146 * packet header is optional. 2147 */ 2148 case ICMP6_DST_UNREACH: 2149 case ICMP6_PACKET_TOO_BIG: 2150 case ICMP6_TIME_EXCEEDED: 2151 case ICMP6_PARAM_PROB: 2152 /* These will not be used, but set them anyway */ 2153 *icmp_dir = PF_IN; 2154 *virtual_type = htons(type); 2155 *virtual_id = 0; 2156 return (1); /* These types match to another state */ 2157 /* 2158 * All remaining ICMP6 types get their own states, 2159 * and will only match in one direction. 2160 */ 2161 default: 2162 *icmp_dir = PF_IN; 2163 *virtual_type = type; 2164 *virtual_id = 0; 2165 break; 2166 } 2167 break; 2168 #endif /* INET6 */ 2169 } 2170 *virtual_type = htons(*virtual_type); 2171 return (0); /* These types match to their own state */ 2172 } 2173 2174 void 2175 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2176 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2177 { 2178 /* note: doesn't trouble to fixup quoted checksums, if any */ 2179 2180 /* change quoted protocol port */ 2181 if (qp != NULL) 2182 pf_patch_16(pd, qp, np); 2183 2184 /* change quoted ip address */ 2185 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2186 PF_ACPY(qa, na, pd->af); 2187 2188 /* change network-header's ip address */ 2189 if (oa) 2190 pf_translate_a(pd, oa, na); 2191 } 2192 2193 /* pre: *a is 16-bit aligned within its packet */ 2194 /* *a is a network header src/dst address */ 2195 int 2196 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2197 { 2198 int rewrite = 0; 2199 2200 /* warning: !PF_ANEQ != PF_AEQ */ 2201 if (!PF_ANEQ(a, an, pd->af)) 2202 return (0); 2203 2204 /* fixup transport pseudo-header, if any */ 2205 switch (pd->proto) { 2206 case IPPROTO_TCP: /* FALLTHROUGH */ 2207 case IPPROTO_UDP: /* FALLTHROUGH */ 2208 case IPPROTO_ICMPV6: 2209 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2210 break; 2211 default: 2212 break; /* assume no pseudo-header */ 2213 } 2214 2215 PF_ACPY(a, an, pd->af); 2216 rewrite = 1; 2217 2218 return (rewrite); 2219 } 2220 2221 #if INET6 2222 int 2223 pf_translate_af(struct pf_pdesc *pd) 2224 { 2225 static const struct pf_addr zero; 2226 struct ip *ip4; 2227 struct ip6_hdr *ip6; 2228 int copyback = 0; 2229 u_int hlen, ohlen, dlen; 2230 u_int16_t *pc; 2231 u_int8_t af_proto, naf_proto; 2232 2233 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2234 ohlen = pd->off; 2235 dlen = pd->tot_len - pd->off; 2236 pc = pd->pcksum; 2237 2238 af_proto = naf_proto = pd->proto; 2239 if (naf_proto == IPPROTO_ICMP) 2240 af_proto = IPPROTO_ICMPV6; 2241 if (naf_proto == IPPROTO_ICMPV6) 2242 af_proto = IPPROTO_ICMP; 2243 2244 /* uncover stale pseudo-header */ 2245 switch (af_proto) { 2246 case IPPROTO_ICMPV6: 2247 /* optimise: unchanged for TCP/UDP */ 2248 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2249 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2250 /* FALLTHROUGH */ 2251 case IPPROTO_UDP: /* FALLTHROUGH */ 2252 case IPPROTO_TCP: 2253 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2254 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2255 copyback = 1; 2256 break; 2257 default: 2258 break; /* assume no pseudo-header */ 2259 } 2260 2261 /* replace the network header */ 2262 m_adj(pd->m, pd->off); 2263 pd->src = NULL; 2264 pd->dst = NULL; 2265 2266 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2267 pd->m = NULL; 2268 return (-1); 2269 } 2270 2271 pd->off = hlen; 2272 pd->tot_len += hlen - ohlen; 2273 2274 switch (pd->naf) { 2275 case AF_INET: 2276 ip4 = mtod(pd->m, struct ip *); 2277 bzero(ip4, hlen); 2278 ip4->ip_v = IPVERSION; 2279 ip4->ip_hl = hlen >> 2; 2280 ip4->ip_tos = pd->tos; 2281 ip4->ip_len = htons(hlen + dlen); 2282 ip4->ip_id = htons(ip_randomid()); 2283 ip4->ip_off = htons(IP_DF); 2284 ip4->ip_ttl = pd->ttl; 2285 ip4->ip_p = pd->proto; 2286 ip4->ip_src = pd->nsaddr.v4; 2287 ip4->ip_dst = pd->ndaddr.v4; 2288 break; 2289 case AF_INET6: 2290 ip6 = mtod(pd->m, struct ip6_hdr *); 2291 bzero(ip6, hlen); 2292 ip6->ip6_vfc = IPV6_VERSION; 2293 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2294 ip6->ip6_plen = htons(dlen); 2295 ip6->ip6_nxt = pd->proto; 2296 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2297 ip6->ip6_hlim = IPV6_DEFHLIM; 2298 else 2299 ip6->ip6_hlim = pd->ttl; 2300 ip6->ip6_src = pd->nsaddr.v6; 2301 ip6->ip6_dst = pd->ndaddr.v6; 2302 break; 2303 default: 2304 unhandled_af(pd->naf); 2305 } 2306 2307 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2308 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2309 pd->naf == AF_INET6) { 2310 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2311 } 2312 2313 /* cover fresh pseudo-header */ 2314 switch (naf_proto) { 2315 case IPPROTO_ICMPV6: 2316 /* optimise: unchanged for TCP/UDP */ 2317 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2318 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2319 /* FALLTHROUGH */ 2320 case IPPROTO_UDP: /* FALLTHROUGH */ 2321 case IPPROTO_TCP: 2322 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2323 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2324 copyback = 1; 2325 break; 2326 default: 2327 break; /* assume no pseudo-header */ 2328 } 2329 2330 /* flush pd->pcksum */ 2331 if (copyback) 2332 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT); 2333 2334 return (0); 2335 } 2336 2337 int 2338 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2339 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2340 sa_family_t af, sa_family_t naf) 2341 { 2342 struct mbuf *n = NULL; 2343 struct ip *ip4; 2344 struct ip6_hdr *ip6; 2345 u_int hlen, ohlen, d; 2346 2347 if (af == naf || (af != AF_INET && af != AF_INET6) || 2348 (naf != AF_INET && naf != AF_INET6)) 2349 return (-1); 2350 2351 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2352 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2353 return (-1); 2354 2355 /* new quoted header */ 2356 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2357 /* old quoted header */ 2358 ohlen = pd2->off - ipoff2; 2359 2360 /* trim old quoted header */ 2361 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2362 m_adj(n, ohlen); 2363 2364 /* prepend a new, translated, quoted header */ 2365 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2366 return (-1); 2367 2368 switch (naf) { 2369 case AF_INET: 2370 ip4 = mtod(n, struct ip *); 2371 bzero(ip4, sizeof(*ip4)); 2372 ip4->ip_v = IPVERSION; 2373 ip4->ip_hl = sizeof(*ip4) >> 2; 2374 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2375 ip4->ip_id = htons(ip_randomid()); 2376 ip4->ip_off = htons(IP_DF); 2377 ip4->ip_ttl = pd2->ttl; 2378 if (pd2->proto == IPPROTO_ICMPV6) 2379 ip4->ip_p = IPPROTO_ICMP; 2380 else 2381 ip4->ip_p = pd2->proto; 2382 ip4->ip_src = src->v4; 2383 ip4->ip_dst = dst->v4; 2384 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2385 break; 2386 case AF_INET6: 2387 ip6 = mtod(n, struct ip6_hdr *); 2388 bzero(ip6, sizeof(*ip6)); 2389 ip6->ip6_vfc = IPV6_VERSION; 2390 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2391 if (pd2->proto == IPPROTO_ICMP) 2392 ip6->ip6_nxt = IPPROTO_ICMPV6; 2393 else 2394 ip6->ip6_nxt = pd2->proto; 2395 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2396 ip6->ip6_hlim = IPV6_DEFHLIM; 2397 else 2398 ip6->ip6_hlim = pd2->ttl; 2399 ip6->ip6_src = src->v6; 2400 ip6->ip6_dst = dst->v6; 2401 break; 2402 } 2403 2404 /* cover new quoted header */ 2405 /* optimise: any new AF_INET header of ours sums to zero */ 2406 if (naf != AF_INET) { 2407 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2408 } 2409 2410 /* reattach modified quoted packet to outer header */ 2411 { 2412 int nlen = n->m_pkthdr.len; 2413 m_cat(m, n); 2414 m->m_pkthdr.len += nlen; 2415 } 2416 2417 /* account for altered length */ 2418 d = hlen - ohlen; 2419 2420 if (pd->proto == IPPROTO_ICMPV6) { 2421 /* fixup pseudo-header */ 2422 int dlen = pd->tot_len - pd->off; 2423 pf_cksum_fixup(pd->pcksum, 2424 htons(dlen), htons(dlen + d), pd->proto); 2425 } 2426 2427 pd->tot_len += d; 2428 pd2->tot_len += d; 2429 pd2->off += d; 2430 2431 /* note: not bothering to update network headers as 2432 these due for rewrite by pf_translate_af() */ 2433 2434 return (0); 2435 } 2436 2437 2438 #define PTR_IP(field) (offsetof(struct ip, field)) 2439 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2440 2441 int 2442 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 2443 { 2444 struct icmp *icmp4; 2445 struct icmp6_hdr *icmp6; 2446 u_int32_t mtu; 2447 int32_t ptr = -1; 2448 u_int8_t type; 2449 u_int8_t code; 2450 2451 switch (af) { 2452 case AF_INET: 2453 icmp6 = arg; 2454 type = icmp6->icmp6_type; 2455 code = icmp6->icmp6_code; 2456 mtu = ntohl(icmp6->icmp6_mtu); 2457 2458 switch (type) { 2459 case ICMP6_ECHO_REQUEST: 2460 type = ICMP_ECHO; 2461 break; 2462 case ICMP6_ECHO_REPLY: 2463 type = ICMP_ECHOREPLY; 2464 break; 2465 case ICMP6_DST_UNREACH: 2466 type = ICMP_UNREACH; 2467 switch (code) { 2468 case ICMP6_DST_UNREACH_NOROUTE: 2469 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2470 case ICMP6_DST_UNREACH_ADDR: 2471 code = ICMP_UNREACH_HOST; 2472 break; 2473 case ICMP6_DST_UNREACH_ADMIN: 2474 code = ICMP_UNREACH_HOST_PROHIB; 2475 break; 2476 case ICMP6_DST_UNREACH_NOPORT: 2477 code = ICMP_UNREACH_PORT; 2478 break; 2479 default: 2480 return (-1); 2481 } 2482 break; 2483 case ICMP6_PACKET_TOO_BIG: 2484 type = ICMP_UNREACH; 2485 code = ICMP_UNREACH_NEEDFRAG; 2486 mtu -= 20; 2487 break; 2488 case ICMP6_TIME_EXCEEDED: 2489 type = ICMP_TIMXCEED; 2490 break; 2491 case ICMP6_PARAM_PROB: 2492 switch (code) { 2493 case ICMP6_PARAMPROB_HEADER: 2494 type = ICMP_PARAMPROB; 2495 code = ICMP_PARAMPROB_ERRATPTR; 2496 ptr = ntohl(icmp6->icmp6_pptr); 2497 2498 if (ptr == PTR_IP6(ip6_vfc)) 2499 ; /* preserve */ 2500 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2501 ptr = PTR_IP(ip_tos); 2502 else if (ptr == PTR_IP6(ip6_plen) || 2503 ptr == PTR_IP6(ip6_plen) + 1) 2504 ptr = PTR_IP(ip_len); 2505 else if (ptr == PTR_IP6(ip6_nxt)) 2506 ptr = PTR_IP(ip_p); 2507 else if (ptr == PTR_IP6(ip6_hlim)) 2508 ptr = PTR_IP(ip_ttl); 2509 else if (ptr >= PTR_IP6(ip6_src) && 2510 ptr < PTR_IP6(ip6_dst)) 2511 ptr = PTR_IP(ip_src); 2512 else if (ptr >= PTR_IP6(ip6_dst) && 2513 ptr < sizeof(struct ip6_hdr)) 2514 ptr = PTR_IP(ip_dst); 2515 else { 2516 return (-1); 2517 } 2518 break; 2519 case ICMP6_PARAMPROB_NEXTHEADER: 2520 type = ICMP_UNREACH; 2521 code = ICMP_UNREACH_PROTOCOL; 2522 break; 2523 default: 2524 return (-1); 2525 } 2526 break; 2527 default: 2528 return (-1); 2529 } 2530 2531 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 2532 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 2533 2534 /* aligns well with a icmpv4 nextmtu */ 2535 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 2536 2537 /* icmpv4 pptr is a one most significant byte */ 2538 if (ptr >= 0) 2539 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 2540 break; 2541 case AF_INET6: 2542 icmp4 = arg; 2543 type = icmp4->icmp_type; 2544 code = icmp4->icmp_code; 2545 mtu = ntohs(icmp4->icmp_nextmtu); 2546 2547 switch (type) { 2548 case ICMP_ECHO: 2549 type = ICMP6_ECHO_REQUEST; 2550 break; 2551 case ICMP_ECHOREPLY: 2552 type = ICMP6_ECHO_REPLY; 2553 break; 2554 case ICMP_UNREACH: 2555 type = ICMP6_DST_UNREACH; 2556 switch (code) { 2557 case ICMP_UNREACH_NET: 2558 case ICMP_UNREACH_HOST: 2559 case ICMP_UNREACH_NET_UNKNOWN: 2560 case ICMP_UNREACH_HOST_UNKNOWN: 2561 case ICMP_UNREACH_ISOLATED: 2562 case ICMP_UNREACH_TOSNET: 2563 case ICMP_UNREACH_TOSHOST: 2564 code = ICMP6_DST_UNREACH_NOROUTE; 2565 break; 2566 case ICMP_UNREACH_PORT: 2567 code = ICMP6_DST_UNREACH_NOPORT; 2568 break; 2569 case ICMP_UNREACH_NET_PROHIB: 2570 case ICMP_UNREACH_HOST_PROHIB: 2571 case ICMP_UNREACH_FILTER_PROHIB: 2572 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2573 code = ICMP6_DST_UNREACH_ADMIN; 2574 break; 2575 case ICMP_UNREACH_PROTOCOL: 2576 type = ICMP6_PARAM_PROB; 2577 code = ICMP6_PARAMPROB_NEXTHEADER; 2578 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2579 break; 2580 case ICMP_UNREACH_NEEDFRAG: 2581 type = ICMP6_PACKET_TOO_BIG; 2582 code = 0; 2583 mtu += 20; 2584 break; 2585 default: 2586 return (-1); 2587 } 2588 break; 2589 case ICMP_TIMXCEED: 2590 type = ICMP6_TIME_EXCEEDED; 2591 break; 2592 case ICMP_PARAMPROB: 2593 type = ICMP6_PARAM_PROB; 2594 switch (code) { 2595 case ICMP_PARAMPROB_ERRATPTR: 2596 code = ICMP6_PARAMPROB_HEADER; 2597 break; 2598 case ICMP_PARAMPROB_LENGTH: 2599 code = ICMP6_PARAMPROB_HEADER; 2600 break; 2601 default: 2602 return (-1); 2603 } 2604 2605 ptr = icmp4->icmp_pptr; 2606 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2607 ; /* preserve */ 2608 else if (ptr == PTR_IP(ip_len) || 2609 ptr == PTR_IP(ip_len) + 1) 2610 ptr = PTR_IP6(ip6_plen); 2611 else if (ptr == PTR_IP(ip_ttl)) 2612 ptr = PTR_IP6(ip6_hlim); 2613 else if (ptr == PTR_IP(ip_p)) 2614 ptr = PTR_IP6(ip6_nxt); 2615 else if (ptr >= PTR_IP(ip_src) && 2616 ptr < PTR_IP(ip_dst)) 2617 ptr = PTR_IP6(ip6_src); 2618 else if (ptr >= PTR_IP(ip_dst) && 2619 ptr < sizeof(struct ip)) 2620 ptr = PTR_IP6(ip6_dst); 2621 else { 2622 return (-1); 2623 } 2624 break; 2625 default: 2626 return (-1); 2627 } 2628 2629 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 2630 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 2631 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 2632 if (ptr >= 0) 2633 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 2634 break; 2635 } 2636 2637 return (0); 2638 } 2639 #endif /* INET6 */ 2640 2641 /* 2642 * Need to modulate the sequence numbers in the TCP SACK option 2643 * (credits to Krzysztof Pfaff for report and patch) 2644 */ 2645 int 2646 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2647 { 2648 struct tcphdr *th = pd->hdr.tcp; 2649 int hlen = (th->th_off << 2) - sizeof(*th); 2650 int thoptlen = hlen; 2651 u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; 2652 int copyback = 0, i, olen; 2653 struct sackblk sack; 2654 2655 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) 2656 if (hlen < TCPOLEN_SACKLEN || hlen > MAX_TCPOPTLEN || !pf_pull_hdr( 2657 pd->m, pd->off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) 2658 return 0; 2659 2660 while (hlen >= TCPOLEN_SACKLEN) { 2661 olen = opt[1]; 2662 switch (*opt) { 2663 case TCPOPT_EOL: /* FALLTHROUGH */ 2664 case TCPOPT_NOP: 2665 opt++; 2666 hlen--; 2667 break; 2668 case TCPOPT_SACK: 2669 if (olen > hlen) 2670 olen = hlen; 2671 if (olen >= TCPOLEN_SACKLEN) { 2672 for (i = 2; i + TCPOLEN_SACK <= olen; 2673 i += TCPOLEN_SACK) { 2674 size_t startoff = (opt + i) - opts; 2675 memcpy(&sack, &opt[i], sizeof(sack)); 2676 pf_patch_32_unaligned(pd, &sack.start, 2677 htonl(ntohl(sack.start) - 2678 dst->seqdiff), 2679 PF_ALGNMNT(startoff)); 2680 pf_patch_32_unaligned(pd, &sack.end, 2681 htonl(ntohl(sack.end) - 2682 dst->seqdiff), 2683 PF_ALGNMNT(startoff + 2684 sizeof(sack.start))); 2685 memcpy(&opt[i], &sack, sizeof(sack)); 2686 } 2687 copyback = 1; 2688 } 2689 /* FALLTHROUGH */ 2690 default: 2691 if (olen < 2) 2692 olen = 2; 2693 hlen -= olen; 2694 opt += olen; 2695 } 2696 } 2697 2698 if (copyback) 2699 m_copyback(pd->m, pd->off + sizeof(*th), thoptlen, opts, 2700 M_NOWAIT); 2701 return (copyback); 2702 } 2703 2704 void 2705 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2706 const struct pf_addr *saddr, const struct pf_addr *daddr, 2707 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2708 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2709 u_int16_t rtag, u_int rdom) 2710 { 2711 struct mbuf *m; 2712 int len, tlen; 2713 struct ip *h; 2714 #ifdef INET6 2715 struct ip6_hdr *h6; 2716 #endif /* INET6 */ 2717 struct tcphdr *th; 2718 char *opt; 2719 2720 /* maximum segment size tcp option */ 2721 tlen = sizeof(struct tcphdr); 2722 if (mss) 2723 tlen += 4; 2724 2725 switch (af) { 2726 case AF_INET: 2727 len = sizeof(struct ip) + tlen; 2728 break; 2729 #ifdef INET6 2730 case AF_INET6: 2731 len = sizeof(struct ip6_hdr) + tlen; 2732 break; 2733 #endif /* INET6 */ 2734 default: 2735 unhandled_af(af); 2736 } 2737 2738 /* create outgoing mbuf */ 2739 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2740 if (m == NULL) 2741 return; 2742 if (tag) 2743 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2744 m->m_pkthdr.pf.tag = rtag; 2745 m->m_pkthdr.ph_rtableid = rdom; 2746 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2747 m->m_pkthdr.pf.prio = r->set_prio[0]; 2748 if (r && r->qid) 2749 m->m_pkthdr.pf.qid = r->qid; 2750 m->m_data += max_linkhdr; 2751 m->m_pkthdr.len = m->m_len = len; 2752 m->m_pkthdr.ph_ifidx = 0; 2753 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 2754 bzero(m->m_data, len); 2755 switch (af) { 2756 case AF_INET: 2757 h = mtod(m, struct ip *); 2758 h->ip_p = IPPROTO_TCP; 2759 h->ip_len = htons(tlen); 2760 h->ip_v = 4; 2761 h->ip_hl = sizeof(*h) >> 2; 2762 h->ip_tos = IPTOS_LOWDELAY; 2763 h->ip_len = htons(len); 2764 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2765 h->ip_ttl = ttl ? ttl : ip_defttl; 2766 h->ip_sum = 0; 2767 h->ip_src.s_addr = saddr->v4.s_addr; 2768 h->ip_dst.s_addr = daddr->v4.s_addr; 2769 2770 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2771 break; 2772 #ifdef INET6 2773 case AF_INET6: 2774 h6 = mtod(m, struct ip6_hdr *); 2775 h6->ip6_nxt = IPPROTO_TCP; 2776 h6->ip6_plen = htons(tlen); 2777 h6->ip6_vfc |= IPV6_VERSION; 2778 h6->ip6_hlim = IPV6_DEFHLIM; 2779 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2780 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2781 2782 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2783 break; 2784 #endif /* INET6 */ 2785 default: 2786 unhandled_af(af); 2787 } 2788 2789 /* TCP header */ 2790 th->th_sport = sport; 2791 th->th_dport = dport; 2792 th->th_seq = htonl(seq); 2793 th->th_ack = htonl(ack); 2794 th->th_off = tlen >> 2; 2795 th->th_flags = flags; 2796 th->th_win = htons(win); 2797 2798 if (mss) { 2799 opt = (char *)(th + 1); 2800 opt[0] = TCPOPT_MAXSEG; 2801 opt[1] = 4; 2802 mss = htons(mss); 2803 memcpy((opt + 2), &mss, 2); 2804 } 2805 2806 switch (af) { 2807 case AF_INET: 2808 ip_send(m); 2809 break; 2810 #ifdef INET6 2811 case AF_INET6: 2812 ip6_send(m); 2813 break; 2814 #endif /* INET6 */ 2815 } 2816 } 2817 2818 void 2819 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, 2820 struct pf_rule *r, u_int rdomain) 2821 { 2822 struct mbuf *m0; 2823 2824 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 2825 return; 2826 2827 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2828 m0->m_pkthdr.ph_rtableid = rdomain; 2829 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2830 m0->m_pkthdr.pf.prio = r->set_prio[0]; 2831 if (r && r->qid) 2832 m0->m_pkthdr.pf.qid = r->qid; 2833 2834 switch (af) { 2835 case AF_INET: 2836 icmp_error(m0, type, code, 0, 0); 2837 break; 2838 #ifdef INET6 2839 case AF_INET6: 2840 icmp6_error(m0, type, code, 0); 2841 break; 2842 #endif /* INET6 */ 2843 } 2844 } 2845 2846 /* 2847 * Return ((n = 0) == (a = b [with mask m])) 2848 * Note: n != 0 => returns (a != b [with mask m]) 2849 */ 2850 int 2851 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2852 struct pf_addr *b, sa_family_t af) 2853 { 2854 switch (af) { 2855 case AF_INET: 2856 if ((a->addr32[0] & m->addr32[0]) == 2857 (b->addr32[0] & m->addr32[0])) 2858 return (n == 0); 2859 break; 2860 #ifdef INET6 2861 case AF_INET6: 2862 if (((a->addr32[0] & m->addr32[0]) == 2863 (b->addr32[0] & m->addr32[0])) && 2864 ((a->addr32[1] & m->addr32[1]) == 2865 (b->addr32[1] & m->addr32[1])) && 2866 ((a->addr32[2] & m->addr32[2]) == 2867 (b->addr32[2] & m->addr32[2])) && 2868 ((a->addr32[3] & m->addr32[3]) == 2869 (b->addr32[3] & m->addr32[3]))) 2870 return (n == 0); 2871 break; 2872 #endif /* INET6 */ 2873 } 2874 2875 return (n != 0); 2876 } 2877 2878 /* 2879 * Return 1 if b <= a <= e, otherwise return 0. 2880 */ 2881 int 2882 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2883 struct pf_addr *a, sa_family_t af) 2884 { 2885 switch (af) { 2886 case AF_INET: 2887 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 2888 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 2889 return (0); 2890 break; 2891 #ifdef INET6 2892 case AF_INET6: { 2893 int i; 2894 2895 /* check a >= b */ 2896 for (i = 0; i < 4; ++i) 2897 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 2898 break; 2899 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 2900 return (0); 2901 /* check a <= e */ 2902 for (i = 0; i < 4; ++i) 2903 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 2904 break; 2905 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 2906 return (0); 2907 break; 2908 } 2909 #endif /* INET6 */ 2910 } 2911 return (1); 2912 } 2913 2914 int 2915 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 2916 { 2917 switch (op) { 2918 case PF_OP_IRG: 2919 return ((p > a1) && (p < a2)); 2920 case PF_OP_XRG: 2921 return ((p < a1) || (p > a2)); 2922 case PF_OP_RRG: 2923 return ((p >= a1) && (p <= a2)); 2924 case PF_OP_EQ: 2925 return (p == a1); 2926 case PF_OP_NE: 2927 return (p != a1); 2928 case PF_OP_LT: 2929 return (p < a1); 2930 case PF_OP_LE: 2931 return (p <= a1); 2932 case PF_OP_GT: 2933 return (p > a1); 2934 case PF_OP_GE: 2935 return (p >= a1); 2936 } 2937 return (0); /* never reached */ 2938 } 2939 2940 int 2941 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 2942 { 2943 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 2944 } 2945 2946 int 2947 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 2948 { 2949 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2950 return (0); 2951 return (pf_match(op, a1, a2, u)); 2952 } 2953 2954 int 2955 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 2956 { 2957 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2958 return (0); 2959 return (pf_match(op, a1, a2, g)); 2960 } 2961 2962 int 2963 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 2964 { 2965 if (*tag == -1) 2966 *tag = m->m_pkthdr.pf.tag; 2967 2968 return ((!r->match_tag_not && r->match_tag == *tag) || 2969 (r->match_tag_not && r->match_tag != *tag)); 2970 } 2971 2972 int 2973 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 2974 { 2975 struct ifnet *ifp; 2976 struct pfi_kif *kif; 2977 2978 ifp = if_get(m->m_pkthdr.ph_ifidx); 2979 if (ifp == NULL) 2980 return (0); 2981 2982 #if NCARP > 0 2983 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 2984 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 2985 else 2986 #endif /* NCARP */ 2987 kif = (struct pfi_kif *)ifp->if_pf_kif; 2988 2989 if_put(ifp); 2990 2991 if (kif == NULL) { 2992 DPFPRINTF(LOG_ERR, 2993 "pf_test_via: kif == NULL, @%d via %s", 2994 r->nr, r->rcv_ifname); 2995 return (0); 2996 } 2997 2998 return (pfi_kif_match(r->rcv_kif, kif)); 2999 } 3000 3001 void 3002 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3003 { 3004 if (tag > 0) 3005 m->m_pkthdr.pf.tag = tag; 3006 if (rtableid >= 0) 3007 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3008 } 3009 3010 void 3011 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, 3012 struct pf_rule **r, struct pf_rule **a) 3013 { 3014 struct pf_anchor_stackframe *f; 3015 3016 if (*depth >= sizeof(pf_anchor_stack) / 3017 sizeof(pf_anchor_stack[0])) { 3018 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 3019 *r = TAILQ_NEXT(*r, entries); 3020 return; 3021 } else if (a != NULL) 3022 *a = *r; 3023 f = pf_anchor_stack + (*depth)++; 3024 f->rs = *rs; 3025 f->r = *r; 3026 if ((*r)->anchor_wildcard) { 3027 f->parent = &(*r)->anchor->children; 3028 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) { 3029 *r = NULL; 3030 return; 3031 } 3032 *rs = &f->child->ruleset; 3033 } else { 3034 f->parent = NULL; 3035 f->child = NULL; 3036 *rs = &(*r)->anchor->ruleset; 3037 } 3038 *r = TAILQ_FIRST((*rs)->rules.active.ptr); 3039 } 3040 3041 int 3042 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, 3043 struct pf_rule **r, struct pf_rule **a, int *match) 3044 { 3045 struct pf_anchor_stackframe *f; 3046 int quick = 0; 3047 3048 do { 3049 if (*depth <= 0) 3050 break; 3051 f = pf_anchor_stack + *depth - 1; 3052 if (f->parent != NULL && f->child != NULL) { 3053 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); 3054 if (f->child != NULL) { 3055 *rs = &f->child->ruleset; 3056 *r = TAILQ_FIRST((*rs)->rules.active.ptr); 3057 if (*r == NULL) 3058 continue; 3059 else 3060 break; 3061 } 3062 } 3063 (*depth)--; 3064 if (*depth == 0 && a != NULL) 3065 *a = NULL; 3066 else if (a != NULL) 3067 *a = f->r; 3068 *rs = f->rs; 3069 if (*match > *depth) { 3070 *match = *depth; 3071 if (f->r->quick) 3072 quick = 1; 3073 } 3074 *r = TAILQ_NEXT(f->r, entries); 3075 } while (*r == NULL); 3076 3077 return (quick); 3078 } 3079 3080 void 3081 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3082 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3083 { 3084 switch (af) { 3085 case AF_INET: 3086 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3087 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3088 break; 3089 #ifdef INET6 3090 case AF_INET6: 3091 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3092 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3093 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3094 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3095 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3096 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3097 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3098 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3099 break; 3100 #endif /* INET6 */ 3101 default: 3102 unhandled_af(af); 3103 } 3104 } 3105 3106 void 3107 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3108 { 3109 switch (af) { 3110 case AF_INET: 3111 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3112 break; 3113 #ifdef INET6 3114 case AF_INET6: 3115 if (addr->addr32[3] == 0xffffffff) { 3116 addr->addr32[3] = 0; 3117 if (addr->addr32[2] == 0xffffffff) { 3118 addr->addr32[2] = 0; 3119 if (addr->addr32[1] == 0xffffffff) { 3120 addr->addr32[1] = 0; 3121 addr->addr32[0] = 3122 htonl(ntohl(addr->addr32[0]) + 1); 3123 } else 3124 addr->addr32[1] = 3125 htonl(ntohl(addr->addr32[1]) + 1); 3126 } else 3127 addr->addr32[2] = 3128 htonl(ntohl(addr->addr32[2]) + 1); 3129 } else 3130 addr->addr32[3] = 3131 htonl(ntohl(addr->addr32[3]) + 1); 3132 break; 3133 #endif /* INET6 */ 3134 default: 3135 unhandled_af(af); 3136 } 3137 } 3138 3139 int 3140 pf_socket_lookup(struct pf_pdesc *pd) 3141 { 3142 struct pf_addr *saddr, *daddr; 3143 u_int16_t sport, dport; 3144 struct inpcbtable *tb; 3145 struct inpcb *inp; 3146 3147 if (pd == NULL) 3148 return (-1); 3149 pd->lookup.uid = UID_MAX; 3150 pd->lookup.gid = GID_MAX; 3151 pd->lookup.pid = NO_PID; 3152 switch (pd->proto) { 3153 case IPPROTO_TCP: 3154 if (pd->hdr.tcp == NULL) 3155 return (-1); 3156 sport = pd->hdr.tcp->th_sport; 3157 dport = pd->hdr.tcp->th_dport; 3158 tb = &tcbtable; 3159 break; 3160 case IPPROTO_UDP: 3161 if (pd->hdr.udp == NULL) 3162 return (-1); 3163 sport = pd->hdr.udp->uh_sport; 3164 dport = pd->hdr.udp->uh_dport; 3165 tb = &udbtable; 3166 break; 3167 default: 3168 return (-1); 3169 } 3170 if (pd->dir == PF_IN) { 3171 saddr = pd->src; 3172 daddr = pd->dst; 3173 } else { 3174 u_int16_t p; 3175 3176 p = sport; 3177 sport = dport; 3178 dport = p; 3179 saddr = pd->dst; 3180 daddr = pd->src; 3181 } 3182 switch (pd->af) { 3183 case AF_INET: 3184 /* 3185 * Fails when rtable is changed while evaluating the ruleset 3186 * The socket looked up will not match the one hit in the end. 3187 */ 3188 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 3189 pd->rdomain); 3190 if (inp == NULL) { 3191 inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, 3192 NULL, pd->rdomain); 3193 if (inp == NULL) 3194 return (-1); 3195 } 3196 break; 3197 #ifdef INET6 3198 case AF_INET6: 3199 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 3200 dport, pd->rdomain); 3201 if (inp == NULL) { 3202 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, 3203 NULL, pd->rdomain); 3204 if (inp == NULL) 3205 return (-1); 3206 } 3207 break; 3208 #endif /* INET6 */ 3209 default: 3210 unhandled_af(pd->af); 3211 } 3212 pd->lookup.uid = inp->inp_socket->so_euid; 3213 pd->lookup.gid = inp->inp_socket->so_egid; 3214 pd->lookup.pid = inp->inp_socket->so_cpid; 3215 return (1); 3216 } 3217 3218 u_int8_t 3219 pf_get_wscale(struct pf_pdesc *pd) 3220 { 3221 struct tcphdr *th = pd->hdr.tcp; 3222 int hlen; 3223 u_int8_t hdr[60]; 3224 u_int8_t *opt, optlen; 3225 u_int8_t wscale = 0; 3226 3227 hlen = th->th_off << 2; /* hlen <= sizeof(hdr) */ 3228 if (hlen <= sizeof(struct tcphdr)) 3229 return (0); 3230 if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af)) 3231 return (0); 3232 opt = hdr + sizeof(struct tcphdr); 3233 hlen -= sizeof(struct tcphdr); 3234 while (hlen >= 3) { 3235 switch (*opt) { 3236 case TCPOPT_EOL: 3237 case TCPOPT_NOP: 3238 ++opt; 3239 --hlen; 3240 break; 3241 case TCPOPT_WINDOW: 3242 wscale = opt[2]; 3243 if (wscale > TCP_MAX_WINSHIFT) 3244 wscale = TCP_MAX_WINSHIFT; 3245 wscale |= PF_WSCALE_FLAG; 3246 /* FALLTHROUGH */ 3247 default: 3248 optlen = opt[1]; 3249 if (optlen < 2) 3250 optlen = 2; 3251 hlen -= optlen; 3252 opt += optlen; 3253 break; 3254 } 3255 } 3256 return (wscale); 3257 } 3258 3259 u_int16_t 3260 pf_get_mss(struct pf_pdesc *pd) 3261 { 3262 struct tcphdr *th = pd->hdr.tcp; 3263 int hlen; 3264 u_int8_t hdr[60]; 3265 u_int8_t *opt, optlen; 3266 u_int16_t mss = tcp_mssdflt; 3267 3268 hlen = th->th_off << 2; /* hlen <= sizeof(hdr) */ 3269 if (hlen <= sizeof(struct tcphdr)) 3270 return (0); 3271 if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af)) 3272 return (0); 3273 opt = hdr + sizeof(struct tcphdr); 3274 hlen -= sizeof(struct tcphdr); 3275 while (hlen >= TCPOLEN_MAXSEG) { 3276 switch (*opt) { 3277 case TCPOPT_EOL: 3278 case TCPOPT_NOP: 3279 ++opt; 3280 --hlen; 3281 break; 3282 case TCPOPT_MAXSEG: 3283 memcpy(&mss, (opt + 2), 2); 3284 mss = ntohs(mss); 3285 /* FALLTHROUGH */ 3286 default: 3287 optlen = opt[1]; 3288 if (optlen < 2) 3289 optlen = 2; 3290 hlen -= optlen; 3291 opt += optlen; 3292 break; 3293 } 3294 } 3295 return (mss); 3296 } 3297 3298 u_int16_t 3299 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3300 { 3301 struct ifnet *ifp; 3302 struct sockaddr_in *dst; 3303 #ifdef INET6 3304 struct sockaddr_in6 *dst6; 3305 #endif /* INET6 */ 3306 struct rtentry *rt = NULL; 3307 struct sockaddr_storage ss; 3308 int hlen; 3309 u_int16_t mss = tcp_mssdflt; 3310 3311 memset(&ss, 0, sizeof(ss)); 3312 3313 switch (af) { 3314 case AF_INET: 3315 hlen = sizeof(struct ip); 3316 dst = (struct sockaddr_in *)&ss; 3317 dst->sin_family = AF_INET; 3318 dst->sin_len = sizeof(*dst); 3319 dst->sin_addr = addr->v4; 3320 rt = rtalloc(sintosa(dst), 0, rtableid); 3321 break; 3322 #ifdef INET6 3323 case AF_INET6: 3324 hlen = sizeof(struct ip6_hdr); 3325 dst6 = (struct sockaddr_in6 *)&ss; 3326 dst6->sin6_family = AF_INET6; 3327 dst6->sin6_len = sizeof(*dst6); 3328 dst6->sin6_addr = addr->v6; 3329 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3330 break; 3331 #endif /* INET6 */ 3332 } 3333 3334 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3335 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3336 mss = max(tcp_mssdflt, mss); 3337 if_put(ifp); 3338 } 3339 rtfree(rt); 3340 mss = min(mss, offer); 3341 mss = max(mss, 64); /* sanity - at least max opt space */ 3342 return (mss); 3343 } 3344 3345 static __inline int 3346 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af) 3347 { 3348 struct pf_rule *r = s->rule.ptr; 3349 struct pf_src_node *sns[PF_SN_MAX]; 3350 int rv; 3351 3352 s->rt_kif = NULL; 3353 if (!r->rt) 3354 return (0); 3355 3356 bzero(sns, sizeof(sns)); 3357 switch (af) { 3358 case AF_INET: 3359 rv = pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, sns, 3360 &r->route, PF_SN_ROUTE); 3361 break; 3362 #ifdef INET6 3363 case AF_INET6: 3364 rv = pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, sns, 3365 &r->route, PF_SN_ROUTE); 3366 break; 3367 #endif /* INET6 */ 3368 default: 3369 rv = 1; 3370 } 3371 3372 if (rv == 0) { 3373 s->rt_kif = r->route.kif; 3374 s->natrule.ptr = r; 3375 } 3376 3377 return (rv); 3378 } 3379 3380 u_int32_t 3381 pf_tcp_iss(struct pf_pdesc *pd) 3382 { 3383 SHA2_CTX ctx; 3384 union { 3385 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3386 uint32_t words[1]; 3387 } digest; 3388 3389 if (pf_tcp_secret_init == 0) { 3390 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3391 SHA512Init(&pf_tcp_secret_ctx); 3392 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3393 sizeof(pf_tcp_secret)); 3394 pf_tcp_secret_init = 1; 3395 } 3396 ctx = pf_tcp_secret_ctx; 3397 3398 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3399 SHA512Update(&ctx, &pd->hdr.tcp->th_sport, sizeof(u_short)); 3400 SHA512Update(&ctx, &pd->hdr.tcp->th_dport, sizeof(u_short)); 3401 switch (pd->af) { 3402 case AF_INET: 3403 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3404 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3405 break; 3406 #ifdef INET6 3407 case AF_INET6: 3408 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3409 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3410 break; 3411 #endif /* INET6 */ 3412 } 3413 SHA512Final(digest.bytes, &ctx); 3414 pf_tcp_iss_off += 4096; 3415 return (digest.words[0] + tcp_iss + pf_tcp_iss_off); 3416 } 3417 3418 void 3419 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3420 { 3421 if (r->qid) 3422 a->qid = r->qid; 3423 if (r->pqid) 3424 a->pqid = r->pqid; 3425 if (r->rtableid >= 0) 3426 a->rtableid = r->rtableid; 3427 #if NPFLOG > 0 3428 a->log |= r->log; 3429 #endif /* NPFLOG > 0 */ 3430 if (r->scrub_flags & PFSTATE_SETTOS) 3431 a->set_tos = r->set_tos; 3432 if (r->min_ttl) 3433 a->min_ttl = r->min_ttl; 3434 if (r->max_mss) 3435 a->max_mss = r->max_mss; 3436 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3437 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3438 if (r->scrub_flags & PFSTATE_SETPRIO) { 3439 a->set_prio[0] = r->set_prio[0]; 3440 a->set_prio[1] = r->set_prio[1]; 3441 } 3442 } 3443 3444 #define PF_TEST_ATTRIB(t, a) \ 3445 do { \ 3446 if (t) { \ 3447 r = a; \ 3448 goto nextrule; \ 3449 } \ 3450 } while (0) 3451 3452 int 3453 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3454 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason) 3455 { 3456 struct pf_rule *r; 3457 struct pf_rule *nr = NULL; 3458 struct pf_rule *a = NULL; 3459 struct pf_ruleset *arsm = NULL; 3460 struct pf_ruleset *aruleset = NULL; 3461 struct pf_ruleset *ruleset = NULL; 3462 struct pf_rule_slist rules; 3463 struct pf_rule_item *ri; 3464 struct pf_src_node *sns[PF_SN_MAX]; 3465 struct tcphdr *th = pd->hdr.tcp; 3466 struct pf_state_key *skw = NULL, *sks = NULL; 3467 struct pf_rule_actions act; 3468 int rewrite = 0; 3469 int tag = -1; 3470 int asd = 0; 3471 int match = 0; 3472 int state_icmp = 0, icmp_dir = 0; 3473 u_int16_t virtual_type, virtual_id; 3474 u_int8_t icmptype = 0, icmpcode = 0; 3475 int action = PF_DROP; 3476 3477 bzero(&act, sizeof(act)); 3478 bzero(sns, sizeof(sns)); 3479 act.rtableid = pd->rdomain; 3480 SLIST_INIT(&rules); 3481 3482 if (pd->dir == PF_IN && if_congested()) { 3483 REASON_SET(reason, PFRES_CONGEST); 3484 return (PF_DROP); 3485 } 3486 3487 switch (pd->virtual_proto) { 3488 case IPPROTO_ICMP: 3489 icmptype = pd->hdr.icmp->icmp_type; 3490 icmpcode = pd->hdr.icmp->icmp_code; 3491 state_icmp = pf_icmp_mapping(pd, icmptype, 3492 &icmp_dir, &virtual_id, &virtual_type); 3493 if (icmp_dir == PF_IN) { 3494 pd->osport = pd->nsport = virtual_id; 3495 pd->odport = pd->ndport = virtual_type; 3496 } else { 3497 pd->osport = pd->nsport = virtual_type; 3498 pd->odport = pd->ndport = virtual_id; 3499 } 3500 break; 3501 #ifdef INET6 3502 case IPPROTO_ICMPV6: 3503 icmptype = pd->hdr.icmp6->icmp6_type; 3504 icmpcode = pd->hdr.icmp6->icmp6_code; 3505 state_icmp = pf_icmp_mapping(pd, icmptype, 3506 &icmp_dir, &virtual_id, &virtual_type); 3507 if (icmp_dir == PF_IN) { 3508 pd->osport = pd->nsport = virtual_id; 3509 pd->odport = pd->ndport = virtual_type; 3510 } else { 3511 pd->osport = pd->nsport = virtual_type; 3512 pd->odport = pd->ndport = virtual_id; 3513 } 3514 break; 3515 #endif /* INET6 */ 3516 } 3517 3518 ruleset = &pf_main_ruleset; 3519 r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr); 3520 while (r != NULL) { 3521 if (r->rule_flag & PFRULE_EXPIRED) { 3522 r = TAILQ_NEXT(r, entries); 3523 goto nextrule; 3524 } 3525 r->evaluations++; 3526 PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot), 3527 r->skip[PF_SKIP_IFP].ptr); 3528 PF_TEST_ATTRIB((r->direction && r->direction != pd->dir), 3529 r->skip[PF_SKIP_DIR].ptr); 3530 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3531 (r->onrdomain == pd->rdomain) == r->ifnot), 3532 r->skip[PF_SKIP_RDOM].ptr); 3533 PF_TEST_ATTRIB((r->af && r->af != pd->af), 3534 r->skip[PF_SKIP_AF].ptr); 3535 PF_TEST_ATTRIB((r->proto && r->proto != pd->proto), 3536 r->skip[PF_SKIP_PROTO].ptr); 3537 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr, 3538 pd->naf, r->src.neg, pd->kif, act.rtableid)), 3539 r->skip[PF_SKIP_SRC_ADDR].ptr); 3540 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af, 3541 r->dst.neg, NULL, act.rtableid)), 3542 r->skip[PF_SKIP_DST_ADDR].ptr); 3543 3544 switch (pd->virtual_proto) { 3545 case PF_VPROTO_FRAGMENT: 3546 /* tcp/udp only. port_op always 0 in other cases */ 3547 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3548 TAILQ_NEXT(r, entries)); 3549 PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset), 3550 TAILQ_NEXT(r, entries)); 3551 /* icmp only. type/code always 0 in other cases */ 3552 PF_TEST_ATTRIB((r->type || r->code), 3553 TAILQ_NEXT(r, entries)); 3554 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3555 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3556 TAILQ_NEXT(r, entries)); 3557 break; 3558 3559 case IPPROTO_TCP: 3560 PF_TEST_ATTRIB(((r->flagset & th->th_flags) != 3561 r->flags), 3562 TAILQ_NEXT(r, entries)); 3563 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3564 !pf_osfp_match(pf_osfp_fingerprint(pd), 3565 r->os_fingerprint)), 3566 TAILQ_NEXT(r, entries)); 3567 /* FALLTHROUGH */ 3568 3569 case IPPROTO_UDP: 3570 /* tcp/udp only. port_op always 0 in other cases */ 3571 PF_TEST_ATTRIB((r->src.port_op && 3572 !pf_match_port(r->src.port_op, r->src.port[0], 3573 r->src.port[1], pd->nsport)), 3574 r->skip[PF_SKIP_SRC_PORT].ptr); 3575 PF_TEST_ATTRIB((r->dst.port_op && 3576 !pf_match_port(r->dst.port_op, r->dst.port[0], 3577 r->dst.port[1], pd->ndport)), 3578 r->skip[PF_SKIP_DST_PORT].ptr); 3579 /* tcp/udp only. uid.op always 0 in other cases */ 3580 PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done || 3581 (pd->lookup.done = 3582 pf_socket_lookup(pd), 1)) && 3583 !pf_match_uid(r->uid.op, r->uid.uid[0], 3584 r->uid.uid[1], pd->lookup.uid)), 3585 TAILQ_NEXT(r, entries)); 3586 /* tcp/udp only. gid.op always 0 in other cases */ 3587 PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done || 3588 (pd->lookup.done = 3589 pf_socket_lookup(pd), 1)) && 3590 !pf_match_gid(r->gid.op, r->gid.gid[0], 3591 r->gid.gid[1], pd->lookup.gid)), 3592 TAILQ_NEXT(r, entries)); 3593 break; 3594 3595 case IPPROTO_ICMP: 3596 case IPPROTO_ICMPV6: 3597 /* icmp only. type always 0 in other cases */ 3598 PF_TEST_ATTRIB((r->type && r->type != icmptype + 1), 3599 TAILQ_NEXT(r, entries)); 3600 /* icmp only. type always 0 in other cases */ 3601 PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1), 3602 TAILQ_NEXT(r, entries)); 3603 /* icmp only. don't create states on replies */ 3604 PF_TEST_ATTRIB((r->keep_state && !state_icmp && 3605 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 3606 icmp_dir != PF_IN), 3607 TAILQ_NEXT(r, entries)); 3608 break; 3609 3610 default: 3611 break; 3612 } 3613 3614 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3615 pd->virtual_proto != PF_VPROTO_FRAGMENT), 3616 TAILQ_NEXT(r, entries)); 3617 PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)), 3618 TAILQ_NEXT(r, entries)); 3619 PF_TEST_ATTRIB((r->prob && 3620 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3621 TAILQ_NEXT(r, entries)); 3622 PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)), 3623 TAILQ_NEXT(r, entries)); 3624 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) == 3625 r->rcvifnot), 3626 TAILQ_NEXT(r, entries)); 3627 PF_TEST_ATTRIB((r->prio && 3628 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != pd->m->m_pkthdr.pf.prio), 3629 TAILQ_NEXT(r, entries)); 3630 3631 /* FALLTHROUGH */ 3632 if (r->tag) 3633 tag = r->tag; 3634 if (r->anchor == NULL) { 3635 if (r->action == PF_MATCH) { 3636 if ((ri = pool_get(&pf_rule_item_pl, 3637 PR_NOWAIT)) == NULL) { 3638 REASON_SET(reason, PFRES_MEMORY); 3639 goto cleanup; 3640 } 3641 ri->r = r; 3642 /* order is irrelevant */ 3643 SLIST_INSERT_HEAD(&rules, ri, entry); 3644 pf_rule_to_actions(r, &act); 3645 if (r->rule_flag & PFRULE_AFTO) 3646 pd->naf = r->naf; 3647 if (pf_get_transaddr(r, pd, sns, &nr) == -1) { 3648 REASON_SET(reason, PFRES_TRANSLATE); 3649 goto cleanup; 3650 } 3651 #if NPFLOG > 0 3652 if (r->log) { 3653 REASON_SET(reason, PFRES_MATCH); 3654 PFLOG_PACKET(pd, *reason, r, a, ruleset, 3655 NULL); 3656 } 3657 #endif /* NPFLOG > 0 */ 3658 } else { 3659 match = asd; 3660 *rm = r; 3661 *am = a; 3662 *rsm = ruleset; 3663 arsm = aruleset; 3664 } 3665 3666 #if NPFLOG > 0 3667 if (act.log & PF_LOG_MATCHES) 3668 pf_log_matches(pd, r, a, ruleset, &rules); 3669 #endif /* NPFLOG > 0 */ 3670 3671 if (r->quick) 3672 break; 3673 r = TAILQ_NEXT(r, entries); 3674 } else { 3675 aruleset = ruleset; 3676 pf_step_into_anchor(&asd, &ruleset, &r, &a); 3677 } 3678 3679 nextrule: 3680 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3681 &r, &a, &match)) 3682 break; 3683 } 3684 r = *rm; /* matching rule */ 3685 a = *am; /* rule that defines an anchor containing 'r' */ 3686 ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */ 3687 aruleset = arsm;/* ruleset of the 'a' rule itself */ 3688 3689 /* apply actions for last matching pass/block rule */ 3690 pf_rule_to_actions(r, &act); 3691 if (r->rule_flag & PFRULE_AFTO) 3692 pd->naf = r->naf; 3693 if (pf_get_transaddr(r, pd, sns, &nr) == -1) { 3694 REASON_SET(reason, PFRES_TRANSLATE); 3695 goto cleanup; 3696 } 3697 REASON_SET(reason, PFRES_MATCH); 3698 3699 #if NPFLOG > 0 3700 if (r->log) 3701 PFLOG_PACKET(pd, *reason, r, a, ruleset, NULL); 3702 if (act.log & PF_LOG_MATCHES) 3703 pf_log_matches(pd, r, a, ruleset, &rules); 3704 #endif /* NPFLOG > 0 */ 3705 3706 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3707 (r->action == PF_DROP) && 3708 ((r->rule_flag & PFRULE_RETURNRST) || 3709 (r->rule_flag & PFRULE_RETURNICMP) || 3710 (r->rule_flag & PFRULE_RETURN))) { 3711 if (pd->proto == IPPROTO_TCP && 3712 ((r->rule_flag & PFRULE_RETURNRST) || 3713 (r->rule_flag & PFRULE_RETURN)) && 3714 !(th->th_flags & TH_RST)) { 3715 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 3716 3717 if (pf_check_tcp_cksum(pd->m, pd->off, 3718 pd->tot_len - pd->off, pd->af)) 3719 REASON_SET(reason, PFRES_PROTCKSUM); 3720 else { 3721 if (th->th_flags & TH_SYN) 3722 ack++; 3723 if (th->th_flags & TH_FIN) 3724 ack++; 3725 pf_send_tcp(r, pd->af, pd->dst, 3726 pd->src, th->th_dport, th->th_sport, 3727 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 3728 r->return_ttl, 1, 0, pd->rdomain); 3729 } 3730 } else if ((pd->proto != IPPROTO_ICMP || 3731 ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET && 3732 r->return_icmp) 3733 pf_send_icmp(pd->m, r->return_icmp >> 8, 3734 r->return_icmp & 255, pd->af, r, pd->rdomain); 3735 else if ((pd->proto != IPPROTO_ICMPV6 || 3736 (icmptype >= ICMP6_ECHO_REQUEST && 3737 icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3738 r->return_icmp6) 3739 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3740 r->return_icmp6 & 255, pd->af, r, pd->rdomain); 3741 } 3742 3743 if (r->action == PF_DROP) 3744 goto cleanup; 3745 3746 pf_tag_packet(pd->m, tag, act.rtableid); 3747 if (act.rtableid >= 0 && 3748 rtable_l2(act.rtableid) != pd->rdomain) 3749 pd->destchg = 1; 3750 3751 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3752 REASON_SET(reason, PFRES_IPOPTIONS); 3753 #if NPFLOG > 0 3754 pd->pflog |= PF_LOG_FORCE; 3755 #endif /* NPFLOG > 0 */ 3756 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3757 "ip/ipv6 options in pf_test_rule()"); 3758 goto cleanup; 3759 } 3760 3761 action = PF_PASS; 3762 3763 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3764 && !state_icmp && r->keep_state) { 3765 3766 if (r->rule_flag & PFRULE_SRCTRACK && 3767 pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af, 3768 pd->src, NULL) != 0) { 3769 REASON_SET(reason, PFRES_SRCLIMIT); 3770 goto cleanup; 3771 } 3772 3773 if (r->max_states && (r->states_cur >= r->max_states)) { 3774 pf_status.lcounters[LCNT_STATES]++; 3775 REASON_SET(reason, PFRES_MAXSTATES); 3776 goto cleanup; 3777 } 3778 3779 action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite, 3780 sm, tag, &rules, &act, sns); 3781 3782 if (action != PF_PASS) 3783 goto cleanup; 3784 if (sks != skw) { 3785 struct pf_state_key *sk; 3786 3787 if (pd->dir == PF_IN) 3788 sk = sks; 3789 else 3790 sk = skw; 3791 rewrite += pf_translate(pd, 3792 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 3793 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 3794 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 3795 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 3796 virtual_type, icmp_dir); 3797 } 3798 3799 #ifdef INET6 3800 if (rewrite && skw->af != sks->af) 3801 action = PF_AFRT; 3802 #endif /* INET6 */ 3803 3804 } else { 3805 while ((ri = SLIST_FIRST(&rules))) { 3806 SLIST_REMOVE_HEAD(&rules, entry); 3807 pool_put(&pf_rule_item_pl, ri); 3808 } 3809 } 3810 3811 /* copy back packet headers if needed */ 3812 if (rewrite && pd->hdrlen) { 3813 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT); 3814 } 3815 3816 #if NPFSYNC > 0 3817 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 3818 pd->dir == PF_OUT && pfsync_up()) { 3819 /* 3820 * We want the state created, but we dont 3821 * want to send this in case a partner 3822 * firewall has to know about it to allow 3823 * replies through it. 3824 */ 3825 if (pfsync_defer(*sm, pd->m)) 3826 return (PF_DEFER); 3827 } 3828 #endif /* NPFSYNC > 0 */ 3829 3830 if (r->rule_flag & PFRULE_ONCE) { 3831 if ((a != NULL) && TAILQ_EMPTY(a->ruleset->rules.active.ptr)) { 3832 a->rule_flag |= PFRULE_EXPIRED; 3833 a->exptime = time_second; 3834 SLIST_INSERT_HEAD(&pf_rule_gcl, a, gcle); 3835 } 3836 3837 r->rule_flag |= PFRULE_EXPIRED; 3838 r->exptime = time_second; 3839 SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle); 3840 } 3841 3842 return (action); 3843 3844 cleanup: 3845 while ((ri = SLIST_FIRST(&rules))) { 3846 SLIST_REMOVE_HEAD(&rules, entry); 3847 pool_put(&pf_rule_item_pl, ri); 3848 } 3849 3850 return (action); 3851 } 3852 3853 static __inline int 3854 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 3855 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 3856 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 3857 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 3858 { 3859 struct pf_state *s = NULL; 3860 struct tcphdr *th = pd->hdr.tcp; 3861 u_int16_t mss = tcp_mssdflt; 3862 u_short reason; 3863 u_int i; 3864 3865 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 3866 if (s == NULL) { 3867 REASON_SET(&reason, PFRES_MEMORY); 3868 goto csfailed; 3869 } 3870 s->rule.ptr = r; 3871 s->anchor.ptr = a; 3872 s->natrule.ptr = nr; 3873 if (r->allow_opts) 3874 s->state_flags |= PFSTATE_ALLOWOPTS; 3875 if (r->rule_flag & PFRULE_STATESLOPPY) 3876 s->state_flags |= PFSTATE_SLOPPY; 3877 if (r->rule_flag & PFRULE_PFLOW) 3878 s->state_flags |= PFSTATE_PFLOW; 3879 #if NPFLOG > 0 3880 s->log = act->log & PF_LOG_ALL; 3881 #endif /* NPFLOG > 0 */ 3882 s->qid = act->qid; 3883 s->pqid = act->pqid; 3884 s->rtableid[pd->didx] = act->rtableid; 3885 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 3886 s->min_ttl = act->min_ttl; 3887 s->set_tos = act->set_tos; 3888 s->max_mss = act->max_mss; 3889 s->state_flags |= act->flags; 3890 #if NPFSYNC > 0 3891 s->sync_state = PFSYNC_S_NONE; 3892 #endif /* NPFSYNC > 0 */ 3893 s->set_prio[0] = act->set_prio[0]; 3894 s->set_prio[1] = act->set_prio[1]; 3895 SLIST_INIT(&s->src_nodes); 3896 3897 switch (pd->proto) { 3898 case IPPROTO_TCP: 3899 s->src.seqlo = ntohl(th->th_seq); 3900 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 3901 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 3902 r->keep_state == PF_STATE_MODULATE) { 3903 /* Generate sequence number modulator */ 3904 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 3905 0) 3906 s->src.seqdiff = 1; 3907 pf_patch_32(pd, 3908 &th->th_seq, htonl(s->src.seqlo + s->src.seqdiff)); 3909 *rewrite = 1; 3910 } else 3911 s->src.seqdiff = 0; 3912 if (th->th_flags & TH_SYN) { 3913 s->src.seqhi++; 3914 s->src.wscale = pf_get_wscale(pd); 3915 } 3916 s->src.max_win = MAX(ntohs(th->th_win), 1); 3917 if (s->src.wscale & PF_WSCALE_MASK) { 3918 /* Remove scale factor from initial window */ 3919 int win = s->src.max_win; 3920 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 3921 s->src.max_win = (win - 1) >> 3922 (s->src.wscale & PF_WSCALE_MASK); 3923 } 3924 if (th->th_flags & TH_FIN) 3925 s->src.seqhi++; 3926 s->dst.seqhi = 1; 3927 s->dst.max_win = 1; 3928 s->src.state = TCPS_SYN_SENT; 3929 s->dst.state = TCPS_CLOSED; 3930 s->timeout = PFTM_TCP_FIRST_PACKET; 3931 break; 3932 case IPPROTO_UDP: 3933 s->src.state = PFUDPS_SINGLE; 3934 s->dst.state = PFUDPS_NO_TRAFFIC; 3935 s->timeout = PFTM_UDP_FIRST_PACKET; 3936 break; 3937 case IPPROTO_ICMP: 3938 #ifdef INET6 3939 case IPPROTO_ICMPV6: 3940 #endif /* INET6 */ 3941 s->timeout = PFTM_ICMP_FIRST_PACKET; 3942 break; 3943 default: 3944 s->src.state = PFOTHERS_SINGLE; 3945 s->dst.state = PFOTHERS_NO_TRAFFIC; 3946 s->timeout = PFTM_OTHER_FIRST_PACKET; 3947 } 3948 3949 s->creation = time_uptime; 3950 s->expire = time_uptime; 3951 3952 if (pd->proto == IPPROTO_TCP) { 3953 if (s->state_flags & PFSTATE_SCRUB_TCP && 3954 pf_normalize_tcp_init(pd, &s->src)) { 3955 REASON_SET(&reason, PFRES_MEMORY); 3956 goto csfailed; 3957 } 3958 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 3959 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 3960 rewrite)) { 3961 /* This really shouldn't happen!!! */ 3962 DPFPRINTF(LOG_ERR, 3963 "pf_normalize_tcp_stateful failed on first pkt"); 3964 goto csfailed; 3965 } 3966 } 3967 s->direction = pd->dir; 3968 3969 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 3970 REASON_SET(&reason, PFRES_MEMORY); 3971 goto csfailed; 3972 } 3973 3974 for (i = 0; i < PF_SN_MAX; i++) 3975 if (sns[i] != NULL) { 3976 struct pf_sn_item *sni; 3977 3978 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 3979 if (sni == NULL) { 3980 REASON_SET(&reason, PFRES_MEMORY); 3981 goto csfailed; 3982 } 3983 sni->sn = sns[i]; 3984 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 3985 sni->sn->states++; 3986 } 3987 3988 if (pf_set_rt_ifp(s, pd->src, (*skw)->af) != 0) { 3989 REASON_SET(&reason, PFRES_NOROUTE); 3990 goto csfailed; 3991 } 3992 3993 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { 3994 pf_detach_state(s); 3995 *sks = *skw = NULL; 3996 REASON_SET(&reason, PFRES_STATEINS); 3997 goto csfailed; 3998 } else 3999 *sm = s; 4000 4001 /* 4002 * Make state responsible for rules it binds here. 4003 */ 4004 memcpy(&s->match_rules, rules, sizeof(s->match_rules)); 4005 bzero(rules, sizeof(*rules)); 4006 STATE_INC_COUNTERS(s); 4007 4008 if (tag > 0) { 4009 pf_tag_ref(tag); 4010 s->tag = tag; 4011 } 4012 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4013 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 4014 int rtid = pd->rdomain; 4015 if (act->rtableid >= 0) 4016 rtid = act->rtableid; 4017 s->src.state = PF_TCPS_PROXY_SRC; 4018 s->src.seqhi = arc4random(); 4019 /* Find mss option */ 4020 mss = pf_get_mss(pd); 4021 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 4022 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 4023 s->src.mss = mss; 4024 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4025 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4026 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); 4027 REASON_SET(&reason, PFRES_SYNPROXY); 4028 return (PF_SYNPROXY_DROP); 4029 } 4030 4031 return (PF_PASS); 4032 4033 csfailed: 4034 if (s) { 4035 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 4036 pf_src_tree_remove_state(s); 4037 pool_put(&pf_state_pl, s); 4038 } 4039 4040 for (i = 0; i < PF_SN_MAX; i++) 4041 if (sns[i] != NULL) 4042 pf_remove_src_node(sns[i]); 4043 4044 return (PF_DROP); 4045 } 4046 4047 int 4048 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4049 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4050 int icmp_dir) 4051 { 4052 /* 4053 * when called from bpf_mtap_pflog, there are extra constraints: 4054 * -mbuf is faked, m_data is the bpf buffer 4055 * -pd is not fully set up 4056 */ 4057 int rewrite = 0; 4058 int afto = pd->af != pd->naf; 4059 4060 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4061 pd->destchg = 1; 4062 4063 switch (pd->proto) { 4064 case IPPROTO_TCP: /* FALLTHROUGH */ 4065 case IPPROTO_UDP: 4066 rewrite += pf_patch_16(pd, pd->sport, sport); 4067 rewrite += pf_patch_16(pd, pd->dport, dport); 4068 break; 4069 4070 case IPPROTO_ICMP: 4071 /* pf_translate() is also used when logging invalid packets */ 4072 if (pd->af != AF_INET) 4073 return (0); 4074 4075 if (afto) { 4076 #ifdef INET6 4077 if (pf_translate_icmp_af(pd, AF_INET6, pd->hdr.icmp)) 4078 return (0); 4079 pd->proto = IPPROTO_ICMPV6; 4080 rewrite = 1; 4081 #endif /* INET6 */ 4082 } 4083 if (virtual_type == htons(ICMP_ECHO)) { 4084 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4085 rewrite += pf_patch_16(pd, 4086 &pd->hdr.icmp->icmp_id, icmpid); 4087 } 4088 break; 4089 4090 #ifdef INET6 4091 case IPPROTO_ICMPV6: 4092 /* pf_translate() is also used when logging invalid packets */ 4093 if (pd->af != AF_INET6) 4094 return (0); 4095 4096 if (afto) { 4097 if (pf_translate_icmp_af(pd, AF_INET, pd->hdr.icmp6)) 4098 return (0); 4099 pd->proto = IPPROTO_ICMP; 4100 rewrite = 1; 4101 } 4102 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4103 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4104 rewrite += pf_patch_16(pd, 4105 &pd->hdr.icmp6->icmp6_id, icmpid); 4106 } 4107 break; 4108 #endif /* INET6 */ 4109 } 4110 4111 if (!afto) { 4112 rewrite += pf_translate_a(pd, pd->src, saddr); 4113 rewrite += pf_translate_a(pd, pd->dst, daddr); 4114 } 4115 4116 return (rewrite); 4117 } 4118 4119 int 4120 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state_peer *src, 4121 struct pf_state_peer *dst, struct pf_state **state, u_short *reason, 4122 int *copyback) 4123 { 4124 struct tcphdr *th = pd->hdr.tcp; 4125 u_int16_t win = ntohs(th->th_win); 4126 u_int32_t ack, end, data_end, seq, orig_seq; 4127 u_int8_t sws, dws; 4128 int ackskew; 4129 4130 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4131 sws = src->wscale & PF_WSCALE_MASK; 4132 dws = dst->wscale & PF_WSCALE_MASK; 4133 } else 4134 sws = dws = 0; 4135 4136 /* 4137 * Sequence tracking algorithm from Guido van Rooij's paper: 4138 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4139 * tcp_filtering.ps 4140 */ 4141 4142 orig_seq = seq = ntohl(th->th_seq); 4143 if (src->seqlo == 0) { 4144 /* First packet from this end. Set its state */ 4145 4146 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4147 src->scrub == NULL) { 4148 if (pf_normalize_tcp_init(pd, src)) { 4149 REASON_SET(reason, PFRES_MEMORY); 4150 return (PF_DROP); 4151 } 4152 } 4153 4154 /* Deferred generation of sequence number modulator */ 4155 if (dst->seqdiff && !src->seqdiff) { 4156 /* use random iss for the TCP server */ 4157 while ((src->seqdiff = arc4random() - seq) == 0) 4158 continue; 4159 ack = ntohl(th->th_ack) - dst->seqdiff; 4160 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4161 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4162 *copyback = 1; 4163 } else { 4164 ack = ntohl(th->th_ack); 4165 } 4166 4167 end = seq + pd->p_len; 4168 if (th->th_flags & TH_SYN) { 4169 end++; 4170 if (dst->wscale & PF_WSCALE_FLAG) { 4171 src->wscale = pf_get_wscale(pd); 4172 if (src->wscale & PF_WSCALE_FLAG) { 4173 /* Remove scale factor from initial 4174 * window */ 4175 sws = src->wscale & PF_WSCALE_MASK; 4176 win = ((u_int32_t)win + (1 << sws) - 1) 4177 >> sws; 4178 dws = dst->wscale & PF_WSCALE_MASK; 4179 } else { 4180 /* fixup other window */ 4181 dst->max_win = MIN(TCP_MAXWIN, 4182 (u_int32_t)dst->max_win << 4183 (dst->wscale & PF_WSCALE_MASK)); 4184 /* in case of a retrans SYN|ACK */ 4185 dst->wscale = 0; 4186 } 4187 } 4188 } 4189 data_end = end; 4190 if (th->th_flags & TH_FIN) 4191 end++; 4192 4193 src->seqlo = seq; 4194 if (src->state < TCPS_SYN_SENT) 4195 src->state = TCPS_SYN_SENT; 4196 4197 /* 4198 * May need to slide the window (seqhi may have been set by 4199 * the crappy stack check or if we picked up the connection 4200 * after establishment) 4201 */ 4202 if (src->seqhi == 1 || 4203 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4204 src->seqhi = end + MAX(1, dst->max_win << dws); 4205 if (win > src->max_win) 4206 src->max_win = win; 4207 4208 } else { 4209 ack = ntohl(th->th_ack) - dst->seqdiff; 4210 if (src->seqdiff) { 4211 /* Modulate sequence numbers */ 4212 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4213 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4214 *copyback = 1; 4215 } 4216 end = seq + pd->p_len; 4217 if (th->th_flags & TH_SYN) 4218 end++; 4219 data_end = end; 4220 if (th->th_flags & TH_FIN) 4221 end++; 4222 } 4223 4224 if ((th->th_flags & TH_ACK) == 0) { 4225 /* Let it pass through the ack skew check */ 4226 ack = dst->seqlo; 4227 } else if ((ack == 0 && 4228 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4229 /* broken tcp stacks do not set ack */ 4230 (dst->state < TCPS_SYN_SENT)) { 4231 /* 4232 * Many stacks (ours included) will set the ACK number in an 4233 * FIN|ACK if the SYN times out -- no sequence to ACK. 4234 */ 4235 ack = dst->seqlo; 4236 } 4237 4238 if (seq == end) { 4239 /* Ease sequencing restrictions on no data packets */ 4240 seq = src->seqlo; 4241 data_end = end = seq; 4242 } 4243 4244 ackskew = dst->seqlo - ack; 4245 4246 4247 /* 4248 * Need to demodulate the sequence numbers in any TCP SACK options 4249 * (Selective ACK). We could optionally validate the SACK values 4250 * against the current ACK window, either forwards or backwards, but 4251 * I'm not confident that SACK has been implemented properly 4252 * everywhere. It wouldn't surprise me if several stacks accidently 4253 * SACK too far backwards of previously ACKed data. There really aren't 4254 * any security implications of bad SACKing unless the target stack 4255 * doesn't validate the option length correctly. Someone trying to 4256 * spoof into a TCP connection won't bother blindly sending SACK 4257 * options anyway. 4258 */ 4259 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4260 if (pf_modulate_sack(pd, dst)) 4261 *copyback = 1; 4262 } 4263 4264 4265 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4266 if (SEQ_GEQ(src->seqhi, data_end) && 4267 /* Last octet inside other's window space */ 4268 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4269 /* Retrans: not more than one window back */ 4270 (ackskew >= -MAXACKWINDOW) && 4271 /* Acking not more than one reassembled fragment backwards */ 4272 (ackskew <= (MAXACKWINDOW << sws)) && 4273 /* Acking not more than one window forward */ 4274 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4275 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4276 /* Require an exact/+1 sequence match on resets when possible */ 4277 4278 if (dst->scrub || src->scrub) { 4279 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4280 dst, copyback)) 4281 return (PF_DROP); 4282 } 4283 4284 /* update max window */ 4285 if (src->max_win < win) 4286 src->max_win = win; 4287 /* synchronize sequencing */ 4288 if (SEQ_GT(end, src->seqlo)) 4289 src->seqlo = end; 4290 /* slide the window of what the other end can send */ 4291 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4292 dst->seqhi = ack + MAX((win << sws), 1); 4293 4294 /* update states */ 4295 if (th->th_flags & TH_SYN) 4296 if (src->state < TCPS_SYN_SENT) 4297 src->state = TCPS_SYN_SENT; 4298 if (th->th_flags & TH_FIN) 4299 if (src->state < TCPS_CLOSING) 4300 src->state = TCPS_CLOSING; 4301 if (th->th_flags & TH_ACK) { 4302 if (dst->state == TCPS_SYN_SENT) { 4303 dst->state = TCPS_ESTABLISHED; 4304 if (src->state == TCPS_ESTABLISHED && 4305 !SLIST_EMPTY(&(*state)->src_nodes) && 4306 pf_src_connlimit(state)) { 4307 REASON_SET(reason, PFRES_SRCLIMIT); 4308 return (PF_DROP); 4309 } 4310 } else if (dst->state == TCPS_CLOSING) 4311 dst->state = TCPS_FIN_WAIT_2; 4312 } 4313 if (th->th_flags & TH_RST) 4314 src->state = dst->state = TCPS_TIME_WAIT; 4315 4316 /* update expire time */ 4317 (*state)->expire = time_uptime; 4318 if (src->state >= TCPS_FIN_WAIT_2 && 4319 dst->state >= TCPS_FIN_WAIT_2) 4320 (*state)->timeout = PFTM_TCP_CLOSED; 4321 else if (src->state >= TCPS_CLOSING && 4322 dst->state >= TCPS_CLOSING) 4323 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4324 else if (src->state < TCPS_ESTABLISHED || 4325 dst->state < TCPS_ESTABLISHED) 4326 (*state)->timeout = PFTM_TCP_OPENING; 4327 else if (src->state >= TCPS_CLOSING || 4328 dst->state >= TCPS_CLOSING) 4329 (*state)->timeout = PFTM_TCP_CLOSING; 4330 else 4331 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4332 4333 /* Fall through to PASS packet */ 4334 } else if ((dst->state < TCPS_SYN_SENT || 4335 dst->state >= TCPS_FIN_WAIT_2 || 4336 src->state >= TCPS_FIN_WAIT_2) && 4337 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4338 /* Within a window forward of the originating packet */ 4339 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4340 /* Within a window backward of the originating packet */ 4341 4342 /* 4343 * This currently handles three situations: 4344 * 1) Stupid stacks will shotgun SYNs before their peer 4345 * replies. 4346 * 2) When PF catches an already established stream (the 4347 * firewall rebooted, the state table was flushed, routes 4348 * changed...) 4349 * 3) Packets get funky immediately after the connection 4350 * closes (this should catch Solaris spurious ACK|FINs 4351 * that web servers like to spew after a close) 4352 * 4353 * This must be a little more careful than the above code 4354 * since packet floods will also be caught here. We don't 4355 * update the TTL here to mitigate the damage of a packet 4356 * flood and so the same code can handle awkward establishment 4357 * and a loosened connection close. 4358 * In the establishment case, a correct peer response will 4359 * validate the connection, go through the normal state code 4360 * and keep updating the state TTL. 4361 */ 4362 4363 if (pf_status.debug >= LOG_NOTICE) { 4364 log(LOG_NOTICE, "pf: loose state match: "); 4365 pf_print_state(*state); 4366 pf_print_flags(th->th_flags); 4367 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4368 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4369 pd->p_len, ackskew, (*state)->packets[0], 4370 (*state)->packets[1], 4371 pd->dir == PF_IN ? "in" : "out", 4372 pd->dir == (*state)->direction ? "fwd" : "rev"); 4373 } 4374 4375 if (dst->scrub || src->scrub) { 4376 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4377 dst, copyback)) 4378 return (PF_DROP); 4379 } 4380 4381 /* update max window */ 4382 if (src->max_win < win) 4383 src->max_win = win; 4384 /* synchronize sequencing */ 4385 if (SEQ_GT(end, src->seqlo)) 4386 src->seqlo = end; 4387 /* slide the window of what the other end can send */ 4388 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4389 dst->seqhi = ack + MAX((win << sws), 1); 4390 4391 /* 4392 * Cannot set dst->seqhi here since this could be a shotgunned 4393 * SYN and not an already established connection. 4394 */ 4395 if (th->th_flags & TH_FIN) 4396 if (src->state < TCPS_CLOSING) 4397 src->state = TCPS_CLOSING; 4398 if (th->th_flags & TH_RST) 4399 src->state = dst->state = TCPS_TIME_WAIT; 4400 4401 /* Fall through to PASS packet */ 4402 } else { 4403 if ((*state)->dst.state == TCPS_SYN_SENT && 4404 (*state)->src.state == TCPS_SYN_SENT) { 4405 /* Send RST for state mismatches during handshake */ 4406 if (!(th->th_flags & TH_RST)) 4407 pf_send_tcp((*state)->rule.ptr, pd->af, 4408 pd->dst, pd->src, th->th_dport, 4409 th->th_sport, ntohl(th->th_ack), 0, 4410 TH_RST, 0, 0, 4411 (*state)->rule.ptr->return_ttl, 1, 0, 4412 pd->rdomain); 4413 src->seqlo = 0; 4414 src->seqhi = 1; 4415 src->max_win = 1; 4416 } else if (pf_status.debug >= LOG_NOTICE) { 4417 log(LOG_NOTICE, "pf: BAD state: "); 4418 pf_print_state(*state); 4419 pf_print_flags(th->th_flags); 4420 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4421 "pkts=%llu:%llu dir=%s,%s\n", 4422 seq, orig_seq, ack, pd->p_len, ackskew, 4423 (*state)->packets[0], (*state)->packets[1], 4424 pd->dir == PF_IN ? "in" : "out", 4425 pd->dir == (*state)->direction ? "fwd" : "rev"); 4426 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4427 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 4428 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4429 ' ': '2', 4430 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4431 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4432 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 4433 ' ' :'5', 4434 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4435 } 4436 REASON_SET(reason, PFRES_BADSTATE); 4437 return (PF_DROP); 4438 } 4439 4440 return (PF_PASS); 4441 } 4442 4443 int 4444 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state_peer *src, 4445 struct pf_state_peer *dst, struct pf_state **state, u_short *reason) 4446 { 4447 struct tcphdr *th = pd->hdr.tcp; 4448 4449 if (th->th_flags & TH_SYN) 4450 if (src->state < TCPS_SYN_SENT) 4451 src->state = TCPS_SYN_SENT; 4452 if (th->th_flags & TH_FIN) 4453 if (src->state < TCPS_CLOSING) 4454 src->state = TCPS_CLOSING; 4455 if (th->th_flags & TH_ACK) { 4456 if (dst->state == TCPS_SYN_SENT) { 4457 dst->state = TCPS_ESTABLISHED; 4458 if (src->state == TCPS_ESTABLISHED && 4459 !SLIST_EMPTY(&(*state)->src_nodes) && 4460 pf_src_connlimit(state)) { 4461 REASON_SET(reason, PFRES_SRCLIMIT); 4462 return (PF_DROP); 4463 } 4464 } else if (dst->state == TCPS_CLOSING) { 4465 dst->state = TCPS_FIN_WAIT_2; 4466 } else if (src->state == TCPS_SYN_SENT && 4467 dst->state < TCPS_SYN_SENT) { 4468 /* 4469 * Handle a special sloppy case where we only see one 4470 * half of the connection. If there is a ACK after 4471 * the initial SYN without ever seeing a packet from 4472 * the destination, set the connection to established. 4473 */ 4474 dst->state = src->state = TCPS_ESTABLISHED; 4475 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4476 pf_src_connlimit(state)) { 4477 REASON_SET(reason, PFRES_SRCLIMIT); 4478 return (PF_DROP); 4479 } 4480 } else if (src->state == TCPS_CLOSING && 4481 dst->state == TCPS_ESTABLISHED && 4482 dst->seqlo == 0) { 4483 /* 4484 * Handle the closing of half connections where we 4485 * don't see the full bidirectional FIN/ACK+ACK 4486 * handshake. 4487 */ 4488 dst->state = TCPS_CLOSING; 4489 } 4490 } 4491 if (th->th_flags & TH_RST) 4492 src->state = dst->state = TCPS_TIME_WAIT; 4493 4494 /* update expire time */ 4495 (*state)->expire = time_uptime; 4496 if (src->state >= TCPS_FIN_WAIT_2 && 4497 dst->state >= TCPS_FIN_WAIT_2) 4498 (*state)->timeout = PFTM_TCP_CLOSED; 4499 else if (src->state >= TCPS_CLOSING && 4500 dst->state >= TCPS_CLOSING) 4501 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4502 else if (src->state < TCPS_ESTABLISHED || 4503 dst->state < TCPS_ESTABLISHED) 4504 (*state)->timeout = PFTM_TCP_OPENING; 4505 else if (src->state >= TCPS_CLOSING || 4506 dst->state >= TCPS_CLOSING) 4507 (*state)->timeout = PFTM_TCP_CLOSING; 4508 else 4509 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4510 4511 return (PF_PASS); 4512 } 4513 4514 static __inline int 4515 pf_synproxy(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4516 { 4517 struct pf_state_key *sk = (*state)->key[pd->didx]; 4518 4519 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4520 struct tcphdr *th = pd->hdr.tcp; 4521 4522 if (pd->dir != (*state)->direction) { 4523 REASON_SET(reason, PFRES_SYNPROXY); 4524 return (PF_SYNPROXY_DROP); 4525 } 4526 if (th->th_flags & TH_SYN) { 4527 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4528 REASON_SET(reason, PFRES_SYNPROXY); 4529 return (PF_DROP); 4530 } 4531 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4532 pd->src, th->th_dport, th->th_sport, 4533 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4534 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4535 0, pd->rdomain); 4536 REASON_SET(reason, PFRES_SYNPROXY); 4537 return (PF_SYNPROXY_DROP); 4538 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 4539 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4540 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4541 REASON_SET(reason, PFRES_SYNPROXY); 4542 return (PF_DROP); 4543 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4544 pf_src_connlimit(state)) { 4545 REASON_SET(reason, PFRES_SRCLIMIT); 4546 return (PF_DROP); 4547 } else 4548 (*state)->src.state = PF_TCPS_PROXY_DST; 4549 } 4550 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4551 struct tcphdr *th = pd->hdr.tcp; 4552 4553 if (pd->dir == (*state)->direction) { 4554 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4555 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4556 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4557 REASON_SET(reason, PFRES_SYNPROXY); 4558 return (PF_DROP); 4559 } 4560 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4561 if ((*state)->dst.seqhi == 1) 4562 (*state)->dst.seqhi = arc4random(); 4563 pf_send_tcp((*state)->rule.ptr, pd->af, 4564 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4565 sk->port[pd->sidx], sk->port[pd->didx], 4566 (*state)->dst.seqhi, 0, TH_SYN, 0, 4567 (*state)->src.mss, 0, 0, (*state)->tag, 4568 sk->rdomain); 4569 REASON_SET(reason, PFRES_SYNPROXY); 4570 return (PF_SYNPROXY_DROP); 4571 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4572 (TH_SYN|TH_ACK)) || 4573 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4574 REASON_SET(reason, PFRES_SYNPROXY); 4575 return (PF_DROP); 4576 } else { 4577 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4578 (*state)->dst.seqlo = ntohl(th->th_seq); 4579 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4580 pd->src, th->th_dport, th->th_sport, 4581 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4582 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4583 (*state)->tag, pd->rdomain); 4584 pf_send_tcp((*state)->rule.ptr, pd->af, 4585 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4586 sk->port[pd->sidx], sk->port[pd->didx], 4587 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4588 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4589 0, sk->rdomain); 4590 (*state)->src.seqdiff = (*state)->dst.seqhi - 4591 (*state)->src.seqlo; 4592 (*state)->dst.seqdiff = (*state)->src.seqhi - 4593 (*state)->dst.seqlo; 4594 (*state)->src.seqhi = (*state)->src.seqlo + 4595 (*state)->dst.max_win; 4596 (*state)->dst.seqhi = (*state)->dst.seqlo + 4597 (*state)->src.max_win; 4598 (*state)->src.wscale = (*state)->dst.wscale = 0; 4599 (*state)->src.state = (*state)->dst.state = 4600 TCPS_ESTABLISHED; 4601 REASON_SET(reason, PFRES_SYNPROXY); 4602 return (PF_SYNPROXY_DROP); 4603 } 4604 } 4605 return (PF_PASS); 4606 } 4607 4608 int 4609 pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4610 { 4611 struct pf_state_key_cmp key; 4612 int copyback = 0; 4613 struct pf_state_peer *src, *dst; 4614 int action = PF_PASS; 4615 struct inpcb *inp; 4616 4617 key.af = pd->af; 4618 key.proto = pd->virtual_proto; 4619 key.rdomain = pd->rdomain; 4620 PF_ACPY(&key.addr[pd->sidx], pd->src, key.af); 4621 PF_ACPY(&key.addr[pd->didx], pd->dst, key.af); 4622 key.port[pd->sidx] = pd->osport; 4623 key.port[pd->didx] = pd->odport; 4624 inp = pd->m->m_pkthdr.pf.inp; 4625 4626 STATE_LOOKUP(pd->kif, &key, pd->dir, *state, pd->m); 4627 4628 if (pd->dir == (*state)->direction) { 4629 src = &(*state)->src; 4630 dst = &(*state)->dst; 4631 } else { 4632 src = &(*state)->dst; 4633 dst = &(*state)->src; 4634 } 4635 4636 switch (pd->virtual_proto) { 4637 case IPPROTO_TCP: 4638 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) 4639 return (action); 4640 if (((pd->hdr.tcp->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && 4641 dst->state >= TCPS_FIN_WAIT_2 && 4642 src->state >= TCPS_FIN_WAIT_2) { 4643 if (pf_status.debug >= LOG_NOTICE) { 4644 log(LOG_NOTICE, "pf: state reuse "); 4645 pf_print_state(*state); 4646 pf_print_flags(pd->hdr.tcp->th_flags); 4647 addlog("\n"); 4648 } 4649 /* XXX make sure it's the same direction ?? */ 4650 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 4651 pf_remove_state(*state); 4652 *state = NULL; 4653 pd->m->m_pkthdr.pf.inp = inp; 4654 return (PF_DROP); 4655 } 4656 4657 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4658 if (pf_tcp_track_sloppy(pd, src, dst, state, reason) == 4659 PF_DROP) 4660 return (PF_DROP); 4661 } else { 4662 int ret; 4663 4664 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4665 ret = pf_tcp_track_full(pd, dst, src, state, 4666 reason, ©back); 4667 else 4668 ret = pf_tcp_track_full(pd, src, dst, state, 4669 reason, ©back); 4670 if (ret == PF_DROP) 4671 return (PF_DROP); 4672 } 4673 break; 4674 case IPPROTO_UDP: 4675 /* update states */ 4676 if (src->state < PFUDPS_SINGLE) 4677 src->state = PFUDPS_SINGLE; 4678 if (dst->state == PFUDPS_SINGLE) 4679 dst->state = PFUDPS_MULTIPLE; 4680 4681 /* update expire time */ 4682 (*state)->expire = time_uptime; 4683 if (src->state == PFUDPS_MULTIPLE && 4684 dst->state == PFUDPS_MULTIPLE) 4685 (*state)->timeout = PFTM_UDP_MULTIPLE; 4686 else 4687 (*state)->timeout = PFTM_UDP_SINGLE; 4688 break; 4689 default: 4690 /* update states */ 4691 if (src->state < PFOTHERS_SINGLE) 4692 src->state = PFOTHERS_SINGLE; 4693 if (dst->state == PFOTHERS_SINGLE) 4694 dst->state = PFOTHERS_MULTIPLE; 4695 4696 /* update expire time */ 4697 (*state)->expire = time_uptime; 4698 if (src->state == PFOTHERS_MULTIPLE && 4699 dst->state == PFOTHERS_MULTIPLE) 4700 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4701 else 4702 (*state)->timeout = PFTM_OTHER_SINGLE; 4703 break; 4704 } 4705 4706 /* translate source/destination address, if necessary */ 4707 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4708 struct pf_state_key *nk; 4709 int afto, sidx, didx; 4710 4711 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4712 nk = (*state)->key[pd->sidx]; 4713 else 4714 nk = (*state)->key[pd->didx]; 4715 4716 afto = pd->af != nk->af; 4717 sidx = afto ? pd->didx : pd->sidx; 4718 didx = afto ? pd->sidx : pd->didx; 4719 4720 #ifdef INET6 4721 if (afto) { 4722 PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); 4723 PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); 4724 pd->naf = nk->af; 4725 action = PF_AFRT; 4726 } 4727 #endif /* INET6 */ 4728 4729 if (!afto) 4730 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 4731 4732 if (pd->sport != NULL) 4733 pf_patch_16(pd, pd->sport, nk->port[sidx]); 4734 4735 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4736 pd->rdomain != nk->rdomain) 4737 pd->destchg = 1; 4738 4739 if (!afto) 4740 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 4741 4742 if (pd->dport != NULL) 4743 pf_patch_16(pd, pd->dport, nk->port[didx]); 4744 4745 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4746 copyback = 1; 4747 } 4748 4749 if (copyback && pd->hdrlen > 0) { 4750 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT); 4751 } 4752 4753 return (action); 4754 } 4755 4756 int 4757 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 4758 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 4759 int icmp_dir, int *iidx, int multi, int inner) 4760 { 4761 int direction; 4762 4763 key->af = pd->af; 4764 key->proto = pd->proto; 4765 key->rdomain = pd->rdomain; 4766 if (icmp_dir == PF_IN) { 4767 *iidx = pd->sidx; 4768 key->port[pd->sidx] = icmpid; 4769 key->port[pd->didx] = type; 4770 } else { 4771 *iidx = pd->didx; 4772 key->port[pd->sidx] = type; 4773 key->port[pd->didx] = icmpid; 4774 } 4775 4776 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 4777 pd->dst, pd->af, multi)) 4778 return (PF_DROP); 4779 4780 STATE_LOOKUP(pd->kif, key, pd->dir, *state, pd->m); 4781 4782 if ((*state)->state_flags & PFSTATE_SLOPPY) 4783 return (-1); 4784 4785 /* Is this ICMP message flowing in right direction? */ 4786 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 4787 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 4788 PF_IN : PF_OUT; 4789 else 4790 direction = (*state)->direction; 4791 if ((((!inner && direction == pd->dir) || 4792 (inner && direction != pd->dir)) ? 4793 PF_IN : PF_OUT) != icmp_dir) { 4794 if (pf_status.debug >= LOG_NOTICE) { 4795 log(LOG_NOTICE, 4796 "pf: icmp type %d in wrong direction (%d): ", 4797 ntohs(type), icmp_dir); 4798 pf_print_state(*state); 4799 addlog("\n"); 4800 } 4801 return (PF_DROP); 4802 } 4803 return (-1); 4804 } 4805 4806 int 4807 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 4808 u_short *reason) 4809 { 4810 u_int16_t virtual_id, virtual_type; 4811 u_int8_t icmptype; 4812 int icmp_dir, iidx, ret, copyback = 0; 4813 4814 struct pf_state_key_cmp key; 4815 4816 switch (pd->proto) { 4817 case IPPROTO_ICMP: 4818 icmptype = pd->hdr.icmp->icmp_type; 4819 break; 4820 #ifdef INET6 4821 case IPPROTO_ICMPV6: 4822 icmptype = pd->hdr.icmp6->icmp6_type; 4823 break; 4824 #endif /* INET6 */ 4825 default: 4826 panic("unhandled proto %d", pd->proto); 4827 } 4828 4829 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 4830 &virtual_type) == 0) { 4831 /* 4832 * ICMP query/reply message not related to a TCP/UDP packet. 4833 * Search for an ICMP state. 4834 */ 4835 ret = pf_icmp_state_lookup(pd, &key, state, 4836 virtual_id, virtual_type, icmp_dir, &iidx, 4837 0, 0); 4838 /* IPv6? try matching a multicast address */ 4839 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 4840 ret = pf_icmp_state_lookup(pd, &key, state, virtual_id, 4841 virtual_type, icmp_dir, &iidx, 1, 0); 4842 if (ret >= 0) 4843 return (ret); 4844 4845 (*state)->expire = time_uptime; 4846 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 4847 4848 /* translate source/destination address, if necessary */ 4849 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4850 struct pf_state_key *nk; 4851 int afto, sidx, didx; 4852 4853 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4854 nk = (*state)->key[pd->sidx]; 4855 else 4856 nk = (*state)->key[pd->didx]; 4857 4858 afto = pd->af != nk->af; 4859 sidx = afto ? pd->didx : pd->sidx; 4860 didx = afto ? pd->sidx : pd->didx; 4861 iidx = afto ? !iidx : iidx; 4862 #ifdef INET6 4863 if (afto) { 4864 PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); 4865 PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); 4866 pd->naf = nk->af; 4867 } 4868 #endif /* INET6 */ 4869 if (!afto) { 4870 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 4871 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 4872 } 4873 4874 if (pd->rdomain != nk->rdomain) 4875 pd->destchg = 1; 4876 if (!afto && PF_ANEQ(pd->dst, 4877 &nk->addr[didx], pd->af)) 4878 pd->destchg = 1; 4879 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4880 4881 switch (pd->af) { 4882 case AF_INET: 4883 #ifdef INET6 4884 if (afto) { 4885 if (pf_translate_icmp_af(pd, AF_INET6, 4886 pd->hdr.icmp)) 4887 return (PF_DROP); 4888 pd->proto = IPPROTO_ICMPV6; 4889 } 4890 #endif /* INET6 */ 4891 pf_patch_16(pd, 4892 &pd->hdr.icmp->icmp_id, nk->port[iidx]); 4893 4894 m_copyback(pd->m, pd->off, ICMP_MINLEN, 4895 pd->hdr.icmp, M_NOWAIT); 4896 copyback = 1; 4897 break; 4898 #ifdef INET6 4899 case AF_INET6: 4900 if (afto) { 4901 if (pf_translate_icmp_af(pd, AF_INET, 4902 pd->hdr.icmp6)) 4903 return (PF_DROP); 4904 pd->proto = IPPROTO_ICMP; 4905 } 4906 4907 pf_patch_16(pd, 4908 &pd->hdr.icmp6->icmp6_id, nk->port[iidx]); 4909 4910 m_copyback(pd->m, pd->off, 4911 sizeof(struct icmp6_hdr), pd->hdr.icmp6, 4912 M_NOWAIT); 4913 copyback = 1; 4914 break; 4915 #endif /* INET6 */ 4916 } 4917 #ifdef INET6 4918 if (afto) 4919 return (PF_AFRT); 4920 #endif /* INET6 */ 4921 } 4922 } else { 4923 /* 4924 * ICMP error message in response to a TCP/UDP packet. 4925 * Extract the inner TCP/UDP header and search for that state. 4926 */ 4927 struct pf_pdesc pd2; 4928 struct ip h2; 4929 #ifdef INET6 4930 struct ip6_hdr h2_6; 4931 #endif /* INET6 */ 4932 int ipoff2; 4933 4934 /* Initialize pd2 fields valid for both packets with pd. */ 4935 bzero(&pd2, sizeof(pd2)); 4936 pd2.af = pd->af; 4937 pd2.dir = pd->dir; 4938 pd2.kif = pd->kif; 4939 pd2.m = pd->m; 4940 pd2.rdomain = pd->rdomain; 4941 /* Payload packet is from the opposite direction. */ 4942 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 4943 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 4944 switch (pd->af) { 4945 case AF_INET: 4946 /* offset of h2 in mbuf chain */ 4947 ipoff2 = pd->off + ICMP_MINLEN; 4948 4949 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 4950 NULL, reason, pd2.af)) { 4951 DPFPRINTF(LOG_NOTICE, 4952 "ICMP error message too short (ip)"); 4953 return (PF_DROP); 4954 } 4955 /* 4956 * ICMP error messages don't refer to non-first 4957 * fragments 4958 */ 4959 if (h2.ip_off & htons(IP_OFFMASK)) { 4960 REASON_SET(reason, PFRES_FRAG); 4961 return (PF_DROP); 4962 } 4963 4964 /* offset of protocol header that follows h2 */ 4965 pd2.off = ipoff2 + (h2.ip_hl << 2); 4966 4967 pd2.proto = h2.ip_p; 4968 pd2.tot_len = ntohs(h2.ip_len); 4969 pd2.src = (struct pf_addr *)&h2.ip_src; 4970 pd2.dst = (struct pf_addr *)&h2.ip_dst; 4971 break; 4972 #ifdef INET6 4973 case AF_INET6: 4974 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 4975 4976 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 4977 NULL, reason, pd2.af)) { 4978 DPFPRINTF(LOG_NOTICE, 4979 "ICMP error message too short (ip6)"); 4980 return (PF_DROP); 4981 } 4982 4983 pd2.off = ipoff2; 4984 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 4985 return (PF_DROP); 4986 4987 pd2.tot_len = ntohs(h2_6.ip6_plen) + 4988 sizeof(struct ip6_hdr); 4989 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 4990 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 4991 break; 4992 #endif /* INET6 */ 4993 default: 4994 unhandled_af(pd->af); 4995 } 4996 4997 switch (pd2.proto) { 4998 case IPPROTO_TCP: { 4999 struct tcphdr th; 5000 u_int32_t seq; 5001 struct pf_state_peer *src, *dst; 5002 u_int8_t dws; 5003 5004 /* 5005 * Only the first 8 bytes of the TCP header can be 5006 * expected. Don't access any TCP header fields after 5007 * th_seq, an ackskew test is not possible. 5008 */ 5009 if (!pf_pull_hdr(pd2.m, pd2.off, &th, 8, NULL, reason, 5010 pd2.af)) { 5011 DPFPRINTF(LOG_NOTICE, 5012 "ICMP error message too short (tcp)"); 5013 return (PF_DROP); 5014 } 5015 5016 key.af = pd2.af; 5017 key.proto = IPPROTO_TCP; 5018 key.rdomain = pd2.rdomain; 5019 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5020 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5021 key.port[pd2.sidx] = th.th_sport; 5022 key.port[pd2.didx] = th.th_dport; 5023 5024 STATE_LOOKUP(pd2.kif, &key, pd2.dir, *state, pd2.m); 5025 5026 if (pd2.dir == (*state)->direction) { 5027 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5028 src = &(*state)->src; 5029 dst = &(*state)->dst; 5030 } else { 5031 src = &(*state)->dst; 5032 dst = &(*state)->src; 5033 } 5034 } else { 5035 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5036 src = &(*state)->dst; 5037 dst = &(*state)->src; 5038 } else { 5039 src = &(*state)->src; 5040 dst = &(*state)->dst; 5041 } 5042 } 5043 5044 if (src->wscale && dst->wscale) 5045 dws = dst->wscale & PF_WSCALE_MASK; 5046 else 5047 dws = 0; 5048 5049 /* Demodulate sequence number */ 5050 seq = ntohl(th.th_seq) - src->seqdiff; 5051 if (src->seqdiff) { 5052 pf_patch_32(pd, &th.th_seq, htonl(seq)); 5053 copyback = 1; 5054 } 5055 5056 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5057 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5058 src->seqlo - (dst->max_win << dws)))) { 5059 if (pf_status.debug >= LOG_NOTICE) { 5060 log(LOG_NOTICE, 5061 "pf: BAD ICMP %d:%d ", 5062 icmptype, pd->hdr.icmp->icmp_code); 5063 pf_print_host(pd->src, 0, pd->af); 5064 addlog(" -> "); 5065 pf_print_host(pd->dst, 0, pd->af); 5066 addlog(" state: "); 5067 pf_print_state(*state); 5068 addlog(" seq=%u\n", seq); 5069 } 5070 REASON_SET(reason, PFRES_BADSTATE); 5071 return (PF_DROP); 5072 } else { 5073 if (pf_status.debug >= LOG_DEBUG) { 5074 log(LOG_DEBUG, 5075 "pf: OK ICMP %d:%d ", 5076 icmptype, pd->hdr.icmp->icmp_code); 5077 pf_print_host(pd->src, 0, pd->af); 5078 addlog(" -> "); 5079 pf_print_host(pd->dst, 0, pd->af); 5080 addlog(" state: "); 5081 pf_print_state(*state); 5082 addlog(" seq=%u\n", seq); 5083 } 5084 } 5085 5086 /* translate source/destination address, if necessary */ 5087 if ((*state)->key[PF_SK_WIRE] != 5088 (*state)->key[PF_SK_STACK]) { 5089 struct pf_state_key *nk; 5090 int afto, sidx, didx; 5091 5092 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5093 nk = (*state)->key[pd->sidx]; 5094 else 5095 nk = (*state)->key[pd->didx]; 5096 5097 afto = pd->af != nk->af; 5098 sidx = afto ? pd2.didx : pd2.sidx; 5099 didx = afto ? pd2.sidx : pd2.didx; 5100 5101 #ifdef INET6 5102 if (afto) { 5103 if (pf_translate_icmp_af(pd, nk->af, 5104 pd->hdr.icmp)) 5105 return (PF_DROP); 5106 m_copyback(pd->m, pd->off, 5107 sizeof(struct icmp6_hdr), 5108 pd->hdr.icmp6, M_NOWAIT); 5109 if (nk->af == AF_INET) 5110 pd->proto = IPPROTO_ICMP; 5111 else 5112 pd->proto = IPPROTO_ICMPV6; 5113 pd->m->m_pkthdr.ph_rtableid = 5114 nk->rdomain; 5115 pd->destchg = 1; 5116 PF_ACPY(&pd->nsaddr, 5117 &nk->addr[pd2.sidx], nk->af); 5118 PF_ACPY(&pd->ndaddr, 5119 &nk->addr[pd2.didx], nk->af); 5120 pd->naf = nk->af; 5121 5122 if (pf_change_icmp_af(pd->m, ipoff2, 5123 pd, &pd2, &nk->addr[sidx], 5124 &nk->addr[didx], pd->af, nk->af)) 5125 return (PF_DROP); 5126 5127 pf_patch_16(pd, 5128 &th.th_sport, nk->port[sidx]); 5129 pf_patch_16(pd, 5130 &th.th_dport, nk->port[didx]); 5131 5132 m_copyback(pd2.m, pd2.off, 8, &th, 5133 M_NOWAIT); 5134 return (PF_AFRT); 5135 } 5136 #endif /* INET6 */ 5137 if (PF_ANEQ(pd2.src, 5138 &nk->addr[pd2.sidx], pd2.af) || 5139 nk->port[pd2.sidx] != th.th_sport) 5140 pf_translate_icmp(pd, pd2.src, 5141 &th.th_sport, pd->dst, 5142 &nk->addr[pd2.sidx], 5143 nk->port[pd2.sidx]); 5144 5145 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5146 pd2.af) || pd2.rdomain != nk->rdomain) 5147 pd->destchg = 1; 5148 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5149 5150 if (PF_ANEQ(pd2.dst, 5151 &nk->addr[pd2.didx], pd2.af) || 5152 nk->port[pd2.didx] != th.th_dport) 5153 pf_translate_icmp(pd, pd2.dst, 5154 &th.th_dport, pd->src, 5155 &nk->addr[pd2.didx], 5156 nk->port[pd2.didx]); 5157 copyback = 1; 5158 } 5159 5160 if (copyback) { 5161 switch (pd2.af) { 5162 case AF_INET: 5163 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5164 pd->hdr.icmp, M_NOWAIT); 5165 m_copyback(pd2.m, ipoff2, sizeof(h2), 5166 &h2, M_NOWAIT); 5167 break; 5168 #ifdef INET6 5169 case AF_INET6: 5170 m_copyback(pd->m, pd->off, 5171 sizeof(struct icmp6_hdr), 5172 pd->hdr.icmp6, M_NOWAIT); 5173 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5174 &h2_6, M_NOWAIT); 5175 break; 5176 #endif /* INET6 */ 5177 } 5178 m_copyback(pd2.m, pd2.off, 8, &th, M_NOWAIT); 5179 } 5180 break; 5181 } 5182 case IPPROTO_UDP: { 5183 struct udphdr uh; 5184 5185 if (!pf_pull_hdr(pd2.m, pd2.off, &uh, sizeof(uh), 5186 NULL, reason, pd2.af)) { 5187 DPFPRINTF(LOG_NOTICE, 5188 "ICMP error message too short (udp)"); 5189 return (PF_DROP); 5190 } 5191 5192 key.af = pd2.af; 5193 key.proto = IPPROTO_UDP; 5194 key.rdomain = pd2.rdomain; 5195 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5196 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5197 key.port[pd2.sidx] = uh.uh_sport; 5198 key.port[pd2.didx] = uh.uh_dport; 5199 5200 STATE_LOOKUP(pd2.kif, &key, pd2.dir, *state, pd2.m); 5201 5202 /* translate source/destination address, if necessary */ 5203 if ((*state)->key[PF_SK_WIRE] != 5204 (*state)->key[PF_SK_STACK]) { 5205 struct pf_state_key *nk; 5206 int afto, sidx, didx; 5207 5208 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5209 nk = (*state)->key[pd->sidx]; 5210 else 5211 nk = (*state)->key[pd->didx]; 5212 5213 afto = pd->af != nk->af; 5214 sidx = afto ? pd2.didx : pd2.sidx; 5215 didx = afto ? pd2.sidx : pd2.didx; 5216 5217 #ifdef INET6 5218 if (afto) { 5219 if (pf_translate_icmp_af(pd, nk->af, 5220 pd->hdr.icmp)) 5221 return (PF_DROP); 5222 m_copyback(pd->m, pd->off, 5223 sizeof(struct icmp6_hdr), 5224 pd->hdr.icmp6, M_NOWAIT); 5225 if (nk->af == AF_INET) 5226 pd->proto = IPPROTO_ICMP; 5227 else 5228 pd->proto = IPPROTO_ICMPV6; 5229 pd->m->m_pkthdr.ph_rtableid = 5230 nk->rdomain; 5231 pd->destchg = 1; 5232 PF_ACPY(&pd->nsaddr, 5233 &nk->addr[pd2.sidx], nk->af); 5234 PF_ACPY(&pd->ndaddr, 5235 &nk->addr[pd2.didx], nk->af); 5236 pd->naf = nk->af; 5237 5238 if (pf_change_icmp_af(pd->m, ipoff2, 5239 pd, &pd2, &nk->addr[sidx], 5240 &nk->addr[didx], pd->af, nk->af)) 5241 return (PF_DROP); 5242 5243 pf_patch_16(pd, 5244 &uh.uh_sport, nk->port[sidx]); 5245 pf_patch_16(pd, 5246 &uh.uh_dport, nk->port[didx]); 5247 5248 m_copyback(pd2.m, pd2.off, sizeof(uh), 5249 &uh, M_NOWAIT); 5250 return (PF_AFRT); 5251 } 5252 #endif /* INET6 */ 5253 5254 if (PF_ANEQ(pd2.src, 5255 &nk->addr[pd2.sidx], pd2.af) || 5256 nk->port[pd2.sidx] != uh.uh_sport) 5257 pf_translate_icmp(pd, pd2.src, 5258 &uh.uh_sport, pd->dst, 5259 &nk->addr[pd2.sidx], 5260 nk->port[pd2.sidx]); 5261 5262 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5263 pd2.af) || pd2.rdomain != nk->rdomain) 5264 pd->destchg = 1; 5265 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5266 5267 if (PF_ANEQ(pd2.dst, 5268 &nk->addr[pd2.didx], pd2.af) || 5269 nk->port[pd2.didx] != uh.uh_dport) 5270 pf_translate_icmp(pd, pd2.dst, 5271 &uh.uh_dport, pd->src, 5272 &nk->addr[pd2.didx], 5273 nk->port[pd2.didx]); 5274 5275 switch (pd2.af) { 5276 case AF_INET: 5277 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5278 pd->hdr.icmp, M_NOWAIT); 5279 m_copyback(pd2.m, ipoff2, sizeof(h2), 5280 &h2, M_NOWAIT); 5281 break; 5282 #ifdef INET6 5283 case AF_INET6: 5284 m_copyback(pd->m, pd->off, 5285 sizeof(struct icmp6_hdr), 5286 pd->hdr.icmp6, M_NOWAIT); 5287 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5288 &h2_6, M_NOWAIT); 5289 break; 5290 #endif /* INET6 */ 5291 } 5292 /* Avoid recomputing quoted UDP checksum. 5293 * note: udp6 0 csum invalid per rfc2460 p27. 5294 * but presumed nothing cares in this context */ 5295 pf_patch_16(pd, &uh.uh_sum, 0); 5296 m_copyback(pd2.m, pd2.off, sizeof(uh), &uh, 5297 M_NOWAIT); 5298 copyback = 1; 5299 } 5300 break; 5301 } 5302 case IPPROTO_ICMP: { 5303 struct icmp iih; 5304 5305 if (pd2.af != AF_INET) { 5306 REASON_SET(reason, PFRES_NORM); 5307 return (PF_DROP); 5308 } 5309 5310 if (!pf_pull_hdr(pd2.m, pd2.off, &iih, ICMP_MINLEN, 5311 NULL, reason, pd2.af)) { 5312 DPFPRINTF(LOG_NOTICE, 5313 "ICMP error message too short (icmp)"); 5314 return (PF_DROP); 5315 } 5316 5317 pd2.hdr.icmp = &iih; 5318 pf_icmp_mapping(&pd2, iih.icmp_type, 5319 &icmp_dir, &virtual_id, &virtual_type); 5320 5321 ret = pf_icmp_state_lookup(&pd2, &key, state, 5322 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5323 if (ret >= 0) 5324 return (ret); 5325 5326 /* translate source/destination address, if necessary */ 5327 if ((*state)->key[PF_SK_WIRE] != 5328 (*state)->key[PF_SK_STACK]) { 5329 struct pf_state_key *nk; 5330 int afto, sidx, didx; 5331 5332 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5333 nk = (*state)->key[pd->sidx]; 5334 else 5335 nk = (*state)->key[pd->didx]; 5336 5337 afto = pd->af != nk->af; 5338 sidx = afto ? pd2.didx : pd2.sidx; 5339 didx = afto ? pd2.sidx : pd2.didx; 5340 iidx = afto ? !iidx : iidx; 5341 5342 #ifdef INET6 5343 if (afto) { 5344 if (nk->af != AF_INET6) 5345 return (PF_DROP); 5346 if (pf_translate_icmp_af(pd, nk->af, 5347 pd->hdr.icmp)) 5348 return (PF_DROP); 5349 m_copyback(pd->m, pd->off, 5350 sizeof(struct icmp6_hdr), 5351 pd->hdr.icmp6, M_NOWAIT); 5352 if (pf_change_icmp_af(pd->m, ipoff2, 5353 pd, &pd2, &nk->addr[sidx], 5354 &nk->addr[didx], pd->af, nk->af)) 5355 return (PF_DROP); 5356 pd->proto = IPPROTO_ICMPV6; 5357 if (pf_translate_icmp_af(pd, 5358 nk->af, &iih)) 5359 return (PF_DROP); 5360 if (virtual_type == htons(ICMP_ECHO)) 5361 pf_patch_16(pd, &iih.icmp_id, 5362 nk->port[iidx]); 5363 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5364 &iih, M_NOWAIT); 5365 pd->m->m_pkthdr.ph_rtableid = 5366 nk->rdomain; 5367 pd->destchg = 1; 5368 PF_ACPY(&pd->nsaddr, 5369 &nk->addr[pd2.sidx], nk->af); 5370 PF_ACPY(&pd->ndaddr, 5371 &nk->addr[pd2.didx], nk->af); 5372 pd->naf = nk->af; 5373 return (PF_AFRT); 5374 } 5375 #endif /* INET6 */ 5376 5377 if (PF_ANEQ(pd2.src, 5378 &nk->addr[pd2.sidx], pd2.af) || 5379 (virtual_type == htons(ICMP_ECHO) && 5380 nk->port[iidx] != iih.icmp_id)) 5381 pf_translate_icmp(pd, pd2.src, 5382 (virtual_type == htons(ICMP_ECHO)) ? 5383 &iih.icmp_id : NULL, 5384 pd->dst, &nk->addr[pd2.sidx], 5385 (virtual_type == htons(ICMP_ECHO)) ? 5386 nk->port[iidx] : 0); 5387 5388 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5389 pd2.af) || pd2.rdomain != nk->rdomain) 5390 pd->destchg = 1; 5391 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5392 5393 if (PF_ANEQ(pd2.dst, 5394 &nk->addr[pd2.didx], pd2.af)) 5395 pf_translate_icmp(pd, pd2.dst, NULL, 5396 pd->src, &nk->addr[pd2.didx], 0); 5397 5398 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5399 pd->hdr.icmp, M_NOWAIT); 5400 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5401 M_NOWAIT); 5402 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, &iih, 5403 M_NOWAIT); 5404 copyback = 1; 5405 } 5406 break; 5407 } 5408 #ifdef INET6 5409 case IPPROTO_ICMPV6: { 5410 struct icmp6_hdr iih; 5411 5412 if (pd2.af != AF_INET6) { 5413 REASON_SET(reason, PFRES_NORM); 5414 return (PF_DROP); 5415 } 5416 5417 if (!pf_pull_hdr(pd2.m, pd2.off, &iih, 5418 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5419 DPFPRINTF(LOG_NOTICE, 5420 "ICMP error message too short (icmp6)"); 5421 return (PF_DROP); 5422 } 5423 5424 pd2.hdr.icmp6 = &iih; 5425 pf_icmp_mapping(&pd2, iih.icmp6_type, 5426 &icmp_dir, &virtual_id, &virtual_type); 5427 ret = pf_icmp_state_lookup(&pd2, &key, state, 5428 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5429 /* IPv6? try matching a multicast address */ 5430 if (ret == PF_DROP && pd2.af == AF_INET6 && 5431 icmp_dir == PF_OUT) 5432 ret = pf_icmp_state_lookup(&pd2, &key, state, 5433 virtual_id, virtual_type, icmp_dir, &iidx, 5434 1, 1); 5435 if (ret >= 0) 5436 return (ret); 5437 5438 /* translate source/destination address, if necessary */ 5439 if ((*state)->key[PF_SK_WIRE] != 5440 (*state)->key[PF_SK_STACK]) { 5441 struct pf_state_key *nk; 5442 int afto, sidx, didx; 5443 5444 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5445 nk = (*state)->key[pd->sidx]; 5446 else 5447 nk = (*state)->key[pd->didx]; 5448 5449 afto = pd->af != nk->af; 5450 sidx = afto ? pd2.didx : pd2.sidx; 5451 didx = afto ? pd2.sidx : pd2.didx; 5452 iidx = afto ? !iidx : iidx; 5453 5454 if (afto) { 5455 if (nk->af != AF_INET) 5456 return (PF_DROP); 5457 if (pf_translate_icmp_af(pd, nk->af, 5458 pd->hdr.icmp)) 5459 return (PF_DROP); 5460 m_copyback(pd->m, pd->off, 5461 sizeof(struct icmp6_hdr), 5462 pd->hdr.icmp6, M_NOWAIT); 5463 if (pf_change_icmp_af(pd->m, ipoff2, 5464 pd, &pd2, &nk->addr[sidx], 5465 &nk->addr[didx], pd->af, nk->af)) 5466 return (PF_DROP); 5467 pd->proto = IPPROTO_ICMP; 5468 if (pf_translate_icmp_af(pd, 5469 nk->af, &iih)) 5470 return (PF_DROP); 5471 if (virtual_type == 5472 htons(ICMP6_ECHO_REQUEST)) 5473 pf_patch_16(pd, &iih.icmp6_id, 5474 nk->port[iidx]); 5475 m_copyback(pd2.m, pd2.off, 5476 sizeof(struct icmp6_hdr), &iih, 5477 M_NOWAIT); 5478 pd->m->m_pkthdr.ph_rtableid = 5479 nk->rdomain; 5480 pd->destchg = 1; 5481 PF_ACPY(&pd->nsaddr, 5482 &nk->addr[pd2.sidx], nk->af); 5483 PF_ACPY(&pd->ndaddr, 5484 &nk->addr[pd2.didx], nk->af); 5485 pd->naf = nk->af; 5486 return (PF_AFRT); 5487 } 5488 5489 if (PF_ANEQ(pd2.src, 5490 &nk->addr[pd2.sidx], pd2.af) || 5491 ((virtual_type == 5492 htons(ICMP6_ECHO_REQUEST)) && 5493 nk->port[pd2.sidx] != iih.icmp6_id)) 5494 pf_translate_icmp(pd, pd2.src, 5495 (virtual_type == 5496 htons(ICMP6_ECHO_REQUEST)) 5497 ? &iih.icmp6_id : NULL, 5498 pd->dst, &nk->addr[pd2.sidx], 5499 (virtual_type == 5500 htons(ICMP6_ECHO_REQUEST)) 5501 ? nk->port[iidx] : 0); 5502 5503 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5504 pd2.af) || pd2.rdomain != nk->rdomain) 5505 pd->destchg = 1; 5506 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5507 5508 if (PF_ANEQ(pd2.dst, 5509 &nk->addr[pd2.didx], pd2.af)) 5510 pf_translate_icmp(pd, pd2.dst, NULL, 5511 pd->src, &nk->addr[pd2.didx], 0); 5512 5513 m_copyback(pd->m, pd->off, 5514 sizeof(struct icmp6_hdr), pd->hdr.icmp6, 5515 M_NOWAIT); 5516 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5517 M_NOWAIT); 5518 m_copyback(pd2.m, pd2.off, 5519 sizeof(struct icmp6_hdr), &iih, M_NOWAIT); 5520 copyback = 1; 5521 } 5522 break; 5523 } 5524 #endif /* INET6 */ 5525 default: { 5526 key.af = pd2.af; 5527 key.proto = pd2.proto; 5528 key.rdomain = pd2.rdomain; 5529 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5530 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5531 key.port[0] = key.port[1] = 0; 5532 5533 STATE_LOOKUP(pd2.kif, &key, pd2.dir, *state, pd2.m); 5534 5535 /* translate source/destination address, if necessary */ 5536 if ((*state)->key[PF_SK_WIRE] != 5537 (*state)->key[PF_SK_STACK]) { 5538 struct pf_state_key *nk = 5539 (*state)->key[pd->didx]; 5540 5541 if (PF_ANEQ(pd2.src, 5542 &nk->addr[pd2.sidx], pd2.af)) 5543 pf_translate_icmp(pd, pd2.src, NULL, 5544 pd->dst, &nk->addr[pd2.sidx], 0); 5545 5546 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5547 pd2.af) || pd2.rdomain != nk->rdomain) 5548 pd->destchg = 1; 5549 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5550 5551 if (PF_ANEQ(pd2.dst, 5552 &nk->addr[pd2.didx], pd2.af)) 5553 pf_translate_icmp(pd, pd2.dst, NULL, 5554 pd->src, &nk->addr[pd2.didx], 0); 5555 5556 switch (pd2.af) { 5557 case AF_INET: 5558 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5559 pd->hdr.icmp, M_NOWAIT); 5560 m_copyback(pd2.m, ipoff2, sizeof(h2), 5561 &h2, M_NOWAIT); 5562 break; 5563 #ifdef INET6 5564 case AF_INET6: 5565 m_copyback(pd->m, pd->off, 5566 sizeof(struct icmp6_hdr), 5567 pd->hdr.icmp6, M_NOWAIT); 5568 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5569 &h2_6, M_NOWAIT); 5570 break; 5571 #endif /* INET6 */ 5572 } 5573 copyback = 1; 5574 } 5575 break; 5576 } 5577 } 5578 } 5579 if (copyback) { 5580 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT); 5581 } 5582 5583 return (PF_PASS); 5584 } 5585 5586 /* 5587 * ipoff and off are measured from the start of the mbuf chain. 5588 * h must be at "ipoff" on the mbuf chain. 5589 */ 5590 void * 5591 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5592 u_short *actionp, u_short *reasonp, sa_family_t af) 5593 { 5594 switch (af) { 5595 case AF_INET: { 5596 struct ip *h = mtod(m, struct ip *); 5597 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5598 5599 if (fragoff) { 5600 if (fragoff >= len) 5601 ACTION_SET(actionp, PF_PASS); 5602 else { 5603 ACTION_SET(actionp, PF_DROP); 5604 REASON_SET(reasonp, PFRES_FRAG); 5605 } 5606 return (NULL); 5607 } 5608 if (m->m_pkthdr.len < off + len || 5609 ntohs(h->ip_len) < off + len) { 5610 ACTION_SET(actionp, PF_DROP); 5611 REASON_SET(reasonp, PFRES_SHORT); 5612 return (NULL); 5613 } 5614 break; 5615 } 5616 #ifdef INET6 5617 case AF_INET6: { 5618 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5619 5620 if (m->m_pkthdr.len < off + len || 5621 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < 5622 (unsigned)(off + len)) { 5623 ACTION_SET(actionp, PF_DROP); 5624 REASON_SET(reasonp, PFRES_SHORT); 5625 return (NULL); 5626 } 5627 break; 5628 } 5629 #endif /* INET6 */ 5630 } 5631 m_copydata(m, off, len, p); 5632 return (p); 5633 } 5634 5635 int 5636 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5637 int rtableid) 5638 { 5639 struct sockaddr_storage ss; 5640 struct sockaddr_in *dst; 5641 int ret = 1; 5642 int check_mpath; 5643 #ifdef INET6 5644 struct sockaddr_in6 *dst6; 5645 #endif /* INET6 */ 5646 struct rtentry *rt = NULL; 5647 5648 check_mpath = 0; 5649 memset(&ss, 0, sizeof(ss)); 5650 switch (af) { 5651 case AF_INET: 5652 dst = (struct sockaddr_in *)&ss; 5653 dst->sin_family = AF_INET; 5654 dst->sin_len = sizeof(*dst); 5655 dst->sin_addr = addr->v4; 5656 if (ipmultipath) 5657 check_mpath = 1; 5658 break; 5659 #ifdef INET6 5660 case AF_INET6: 5661 /* 5662 * Skip check for addresses with embedded interface scope, 5663 * as they would always match anyway. 5664 */ 5665 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5666 goto out; 5667 dst6 = (struct sockaddr_in6 *)&ss; 5668 dst6->sin6_family = AF_INET6; 5669 dst6->sin6_len = sizeof(*dst6); 5670 dst6->sin6_addr = addr->v6; 5671 if (ip6_multipath) 5672 check_mpath = 1; 5673 break; 5674 #endif /* INET6 */ 5675 } 5676 5677 /* Skip checks for ipsec interfaces */ 5678 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5679 goto out; 5680 5681 rt = rtalloc((struct sockaddr *)&ss, 0, rtableid); 5682 if (rt != NULL) { 5683 /* No interface given, this is a no-route check */ 5684 if (kif == NULL) 5685 goto out; 5686 5687 if (kif->pfik_ifp == NULL) { 5688 ret = 0; 5689 goto out; 5690 } 5691 5692 /* Perform uRPF check if passed input interface */ 5693 ret = 0; 5694 do { 5695 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 5696 ret = 1; 5697 #if NCARP > 0 5698 } else { 5699 struct ifnet *ifp; 5700 5701 ifp = if_get(rt->rt_ifidx); 5702 if (ifp != NULL && ifp->if_type == IFT_CARP && 5703 ifp->if_carpdev == kif->pfik_ifp) 5704 ret = 1; 5705 if_put(ifp); 5706 #endif /* NCARP */ 5707 } 5708 5709 rt = rtable_iterate(rt); 5710 } while (check_mpath == 1 && rt != NULL && ret == 0); 5711 } else 5712 ret = 0; 5713 out: 5714 rtfree(rt); 5715 return (ret); 5716 } 5717 5718 int 5719 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 5720 int rtableid) 5721 { 5722 struct sockaddr_storage ss; 5723 struct sockaddr_in *dst; 5724 #ifdef INET6 5725 struct sockaddr_in6 *dst6; 5726 #endif /* INET6 */ 5727 struct rtentry *rt; 5728 int ret = 0; 5729 5730 memset(&ss, 0, sizeof(ss)); 5731 switch (af) { 5732 case AF_INET: 5733 dst = (struct sockaddr_in *)&ss; 5734 dst->sin_family = AF_INET; 5735 dst->sin_len = sizeof(*dst); 5736 dst->sin_addr = addr->v4; 5737 break; 5738 #ifdef INET6 5739 case AF_INET6: 5740 dst6 = (struct sockaddr_in6 *)&ss; 5741 dst6->sin6_family = AF_INET6; 5742 dst6->sin6_len = sizeof(*dst6); 5743 dst6->sin6_addr = addr->v6; 5744 break; 5745 #endif /* INET6 */ 5746 } 5747 5748 rt = rtalloc((struct sockaddr *)&ss, RT_RESOLVE, rtableid); 5749 if (rt != NULL) { 5750 if (rt->rt_labelid == aw->v.rtlabel) 5751 ret = 1; 5752 rtfree(rt); 5753 } 5754 5755 return (ret); 5756 } 5757 5758 void 5759 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5760 struct pf_state *s) 5761 { 5762 struct mbuf *m0, *m1; 5763 struct sockaddr_in *dst, sin; 5764 struct rtentry *rt = NULL; 5765 struct ip *ip; 5766 struct ifnet *ifp = NULL; 5767 struct pf_addr naddr; 5768 struct pf_src_node *sns[PF_SN_MAX]; 5769 int error = 0; 5770 unsigned int rtableid; 5771 5772 if (m == NULL || *m == NULL || r == NULL || 5773 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5774 panic("pf_route: invalid parameters"); 5775 5776 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5777 m0 = *m; 5778 *m = NULL; 5779 goto bad; 5780 } 5781 5782 if (r->rt == PF_DUPTO) { 5783 if ((m0 = m_dup_pkt(*m, max_linkhdr, M_NOWAIT)) == NULL) 5784 return; 5785 } else { 5786 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5787 return; 5788 m0 = *m; 5789 } 5790 5791 if (m0->m_len < sizeof(struct ip)) { 5792 DPFPRINTF(LOG_ERR, 5793 "pf_route: m0->m_len < sizeof(struct ip)"); 5794 goto bad; 5795 } 5796 5797 ip = mtod(m0, struct ip *); 5798 5799 memset(&sin, 0, sizeof(sin)); 5800 dst = &sin; 5801 dst->sin_family = AF_INET; 5802 dst->sin_len = sizeof(*dst); 5803 dst->sin_addr = ip->ip_dst; 5804 rtableid = m0->m_pkthdr.ph_rtableid; 5805 5806 if (!r->rt) { 5807 rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); 5808 if (rt == NULL) { 5809 ipstat.ips_noroute++; 5810 goto bad; 5811 } 5812 5813 ifp = if_get(rt->rt_ifidx); 5814 5815 if (rt->rt_flags & RTF_GATEWAY) 5816 dst = satosin(rt->rt_gateway); 5817 5818 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 5819 } else { 5820 if (s == NULL) { 5821 bzero(sns, sizeof(sns)); 5822 if (pf_map_addr(AF_INET, r, 5823 (struct pf_addr *)&ip->ip_src, 5824 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 5825 DPFPRINTF(LOG_ERR, 5826 "pf_route: pf_map_addr() failed."); 5827 goto bad; 5828 } 5829 5830 if (!PF_AZERO(&naddr, AF_INET)) 5831 dst->sin_addr.s_addr = naddr.v4.s_addr; 5832 ifp = r->route.kif ? 5833 r->route.kif->pfik_ifp : NULL; 5834 } else { 5835 if (!PF_AZERO(&s->rt_addr, AF_INET)) 5836 dst->sin_addr.s_addr = 5837 s->rt_addr.v4.s_addr; 5838 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5839 } 5840 5841 rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); 5842 if (rt == NULL) { 5843 ipstat.ips_noroute++; 5844 goto bad; 5845 } 5846 } 5847 if (ifp == NULL) 5848 goto bad; 5849 5850 5851 if (oifp != ifp) { 5852 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 5853 goto bad; 5854 else if (m0 == NULL) 5855 goto done; 5856 if (m0->m_len < sizeof(struct ip)) { 5857 DPFPRINTF(LOG_ERR, 5858 "pf_route: m0->m_len < sizeof(struct ip)"); 5859 goto bad; 5860 } 5861 ip = mtod(m0, struct ip *); 5862 } 5863 5864 in_proto_cksum_out(m0, ifp); 5865 5866 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 5867 ip->ip_sum = 0; 5868 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) 5869 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 5870 else { 5871 ipstat.ips_outswcsum++; 5872 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 5873 } 5874 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 5875 goto done; 5876 } 5877 5878 /* 5879 * Too large for interface; fragment if possible. 5880 * Must be able to put at least 8 bytes per fragment. 5881 */ 5882 if (ip->ip_off & htons(IP_DF)) { 5883 ipstat.ips_cantfrag++; 5884 if (r->rt != PF_DUPTO) { 5885 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, 5886 ifp->if_mtu); 5887 goto done; 5888 } else 5889 goto bad; 5890 } 5891 5892 m1 = m0; 5893 error = ip_fragment(m0, ifp, ifp->if_mtu); 5894 if (error) { 5895 m0 = NULL; 5896 goto bad; 5897 } 5898 5899 for (m0 = m1; m0; m0 = m1) { 5900 m1 = m0->m_nextpkt; 5901 m0->m_nextpkt = 0; 5902 if (error == 0) 5903 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 5904 else 5905 m_freem(m0); 5906 } 5907 5908 if (error == 0) 5909 ipstat.ips_fragmented++; 5910 5911 done: 5912 if (r->rt != PF_DUPTO) 5913 *m = NULL; 5914 if (!r->rt) 5915 if_put(ifp); 5916 rtfree(rt); 5917 return; 5918 5919 bad: 5920 m_freem(m0); 5921 goto done; 5922 } 5923 5924 #ifdef INET6 5925 void 5926 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5927 struct pf_state *s) 5928 { 5929 struct mbuf *m0; 5930 struct sockaddr_in6 *dst, sin6; 5931 struct rtentry *rt = NULL; 5932 struct ip6_hdr *ip6; 5933 struct ifnet *ifp = NULL; 5934 struct pf_addr naddr; 5935 struct pf_src_node *sns[PF_SN_MAX]; 5936 struct m_tag *mtag; 5937 unsigned int rtableid; 5938 5939 if (m == NULL || *m == NULL || r == NULL || 5940 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5941 panic("pf_route6: invalid parameters"); 5942 5943 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5944 m0 = *m; 5945 *m = NULL; 5946 goto bad; 5947 } 5948 5949 if (r->rt == PF_DUPTO) { 5950 if ((m0 = m_dup_pkt(*m, max_linkhdr, M_NOWAIT)) == NULL) 5951 return; 5952 } else { 5953 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5954 return; 5955 m0 = *m; 5956 } 5957 5958 if (m0->m_len < sizeof(struct ip6_hdr)) { 5959 DPFPRINTF(LOG_ERR, 5960 "pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); 5961 goto bad; 5962 } 5963 ip6 = mtod(m0, struct ip6_hdr *); 5964 5965 memset(&sin6, 0, sizeof(sin6)); 5966 dst = &sin6; 5967 dst->sin6_family = AF_INET6; 5968 dst->sin6_len = sizeof(*dst); 5969 dst->sin6_addr = ip6->ip6_dst; 5970 rtableid = m0->m_pkthdr.ph_rtableid; 5971 5972 if (!r->rt) { 5973 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 5974 ip6_output(m0, NULL, NULL, 0, NULL, NULL); 5975 return; 5976 } 5977 5978 if (s == NULL) { 5979 bzero(sns, sizeof(sns)); 5980 if (pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 5981 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 5982 DPFPRINTF(LOG_ERR, 5983 "pf_route6: pf_map_addr() failed."); 5984 goto bad; 5985 } 5986 if (!PF_AZERO(&naddr, AF_INET6)) 5987 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5988 &naddr, AF_INET6); 5989 ifp = r->route.kif ? r->route.kif->pfik_ifp : NULL; 5990 } else { 5991 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 5992 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5993 &s->rt_addr, AF_INET6); 5994 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5995 } 5996 if (ifp == NULL) 5997 goto bad; 5998 5999 if (oifp != ifp) { 6000 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6001 goto bad; 6002 else if (m0 == NULL) 6003 goto done; 6004 if (m0->m_len < sizeof(struct ip6_hdr)) { 6005 DPFPRINTF(LOG_ERR, 6006 "pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); 6007 goto bad; 6008 } 6009 } 6010 6011 in6_proto_cksum_out(m0, ifp); 6012 6013 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 6014 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 6015 6016 /* 6017 * If packet has been reassembled by PF earlier, we have to 6018 * use pf_refragment6() here to turn it back to fragments. 6019 */ 6020 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6021 (void) pf_refragment6(&m0, mtag, dst, ifp); 6022 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6023 rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid); 6024 if (rt == NULL) { 6025 ip6stat.ip6s_noroute++; 6026 goto bad; 6027 } 6028 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 6029 rtfree(rt); 6030 } else { 6031 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); 6032 } 6033 6034 done: 6035 if (r->rt != PF_DUPTO) 6036 *m = NULL; 6037 return; 6038 6039 bad: 6040 m_freem(m0); 6041 goto done; 6042 } 6043 #endif /* INET6 */ 6044 6045 6046 /* 6047 * check TCP checksum and set mbuf flag 6048 * off is the offset where the protocol header starts 6049 * len is the total length of protocol header plus payload 6050 * returns 0 when the checksum is valid, otherwise returns 1. 6051 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6052 */ 6053 int 6054 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6055 { 6056 u_int16_t sum; 6057 6058 if (m->m_pkthdr.csum_flags & 6059 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6060 return (0); 6061 } 6062 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6063 off < sizeof(struct ip) || 6064 m->m_pkthdr.len < off + len) { 6065 return (1); 6066 } 6067 6068 /* need to do it in software */ 6069 tcpstat.tcps_inswcsum++; 6070 6071 switch (af) { 6072 case AF_INET: 6073 if (m->m_len < sizeof(struct ip)) 6074 return (1); 6075 6076 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6077 break; 6078 #ifdef INET6 6079 case AF_INET6: 6080 if (m->m_len < sizeof(struct ip6_hdr)) 6081 return (1); 6082 6083 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6084 break; 6085 #endif /* INET6 */ 6086 default: 6087 unhandled_af(af); 6088 } 6089 if (sum) { 6090 tcpstat.tcps_rcvbadsum++; 6091 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6092 return (1); 6093 } 6094 6095 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6096 return (0); 6097 } 6098 6099 struct pf_divert * 6100 pf_find_divert(struct mbuf *m) 6101 { 6102 struct m_tag *mtag; 6103 6104 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6105 return (NULL); 6106 6107 return ((struct pf_divert *)(mtag + 1)); 6108 } 6109 6110 struct pf_divert * 6111 pf_get_divert(struct mbuf *m) 6112 { 6113 struct m_tag *mtag; 6114 6115 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6116 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6117 M_NOWAIT); 6118 if (mtag == NULL) 6119 return (NULL); 6120 bzero(mtag + 1, sizeof(struct pf_divert)); 6121 m_tag_prepend(m, mtag); 6122 } 6123 6124 return ((struct pf_divert *)(mtag + 1)); 6125 } 6126 6127 #ifdef INET6 6128 int 6129 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6130 u_short *reason) 6131 { 6132 struct ip6_opt opt; 6133 struct ip6_opt_jumbo jumbo; 6134 6135 while (off < end) { 6136 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6137 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6138 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6139 return (PF_DROP); 6140 } 6141 if (opt.ip6o_type == IP6OPT_PAD1) { 6142 off++; 6143 continue; 6144 } 6145 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6146 NULL, reason, AF_INET6)) { 6147 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6148 return (PF_DROP); 6149 } 6150 if (off + sizeof(opt) + opt.ip6o_len > end) { 6151 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6152 REASON_SET(reason, PFRES_IPOPTIONS); 6153 return (PF_DROP); 6154 } 6155 switch (opt.ip6o_type) { 6156 case IP6OPT_JUMBO: 6157 if (pd->jumbolen != 0) { 6158 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6159 REASON_SET(reason, PFRES_IPOPTIONS); 6160 return (PF_DROP); 6161 } 6162 if (ntohs(h->ip6_plen) != 0) { 6163 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6164 REASON_SET(reason, PFRES_IPOPTIONS); 6165 return (PF_DROP); 6166 } 6167 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6168 NULL, reason, AF_INET6)) { 6169 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6170 return (PF_DROP); 6171 } 6172 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6173 sizeof(pd->jumbolen)); 6174 pd->jumbolen = ntohl(pd->jumbolen); 6175 if (pd->jumbolen < IPV6_MAXPACKET) { 6176 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6177 REASON_SET(reason, PFRES_IPOPTIONS); 6178 return (PF_DROP); 6179 } 6180 break; 6181 default: 6182 break; 6183 } 6184 off += sizeof(opt) + opt.ip6o_len; 6185 } 6186 6187 return (PF_PASS); 6188 } 6189 6190 int 6191 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6192 { 6193 struct ip6_frag frag; 6194 struct ip6_ext ext; 6195 struct ip6_rthdr rthdr; 6196 u_int32_t end; 6197 int fraghdr_cnt = 0, rthdr_cnt = 0; 6198 6199 pd->off += sizeof(struct ip6_hdr); 6200 end = pd->off + ntohs(h->ip6_plen); 6201 pd->fragoff = pd->extoff = pd->jumbolen = 0; 6202 pd->proto = h->ip6_nxt; 6203 for (;;) { 6204 switch (pd->proto) { 6205 case IPPROTO_FRAGMENT: 6206 if (fraghdr_cnt++) { 6207 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 6208 REASON_SET(reason, PFRES_FRAG); 6209 return (PF_DROP); 6210 } 6211 /* jumbo payload packets cannot be fragmented */ 6212 if (pd->jumbolen != 0) { 6213 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6214 REASON_SET(reason, PFRES_FRAG); 6215 return (PF_DROP); 6216 } 6217 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6218 NULL, reason, AF_INET6)) { 6219 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6220 return (PF_DROP); 6221 } 6222 /* stop walking over non initial fragments */ 6223 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 6224 pd->fragoff = pd->off; 6225 return (PF_PASS); 6226 } 6227 /* RFC6946: reassemble only non atomic fragments */ 6228 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 6229 pd->fragoff = pd->off; 6230 pd->off += sizeof(frag); 6231 pd->proto = frag.ip6f_nxt; 6232 break; 6233 case IPPROTO_ROUTING: 6234 if (rthdr_cnt++) { 6235 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6236 REASON_SET(reason, PFRES_IPOPTIONS); 6237 return (PF_DROP); 6238 } 6239 /* fragments may be short */ 6240 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6241 pd->off = pd->fragoff; 6242 pd->proto = IPPROTO_FRAGMENT; 6243 return (PF_PASS); 6244 } 6245 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6246 NULL, reason, AF_INET6)) { 6247 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6248 return (PF_DROP); 6249 } 6250 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6251 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6252 REASON_SET(reason, PFRES_IPOPTIONS); 6253 return (PF_DROP); 6254 } 6255 /* FALLTHROUGH */ 6256 case IPPROTO_AH: 6257 case IPPROTO_HOPOPTS: 6258 case IPPROTO_DSTOPTS: 6259 /* fragments may be short */ 6260 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6261 pd->off = pd->fragoff; 6262 pd->proto = IPPROTO_FRAGMENT; 6263 return (PF_PASS); 6264 } 6265 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6266 NULL, reason, AF_INET6)) { 6267 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6268 return (PF_DROP); 6269 } 6270 /* reassembly needs the ext header before the frag */ 6271 if (pd->fragoff == 0) 6272 pd->extoff = pd->off; 6273 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { 6274 if (pf_walk_option6(pd, h, 6275 pd->off + sizeof(ext), 6276 pd->off + (ext.ip6e_len + 1) * 8, reason) 6277 != PF_PASS) 6278 return (PF_DROP); 6279 if (ntohs(h->ip6_plen) == 0 && 6280 pd->jumbolen != 0) { 6281 DPFPRINTF(LOG_NOTICE, 6282 "IPv6 missing jumbo"); 6283 REASON_SET(reason, PFRES_IPOPTIONS); 6284 return (PF_DROP); 6285 } 6286 } 6287 if (pd->proto == IPPROTO_AH) 6288 pd->off += (ext.ip6e_len + 2) * 4; 6289 else 6290 pd->off += (ext.ip6e_len + 1) * 8; 6291 pd->proto = ext.ip6e_nxt; 6292 break; 6293 case IPPROTO_TCP: 6294 case IPPROTO_UDP: 6295 case IPPROTO_ICMPV6: 6296 /* fragments may be short, ignore inner header then */ 6297 if (pd->fragoff != 0 && end < pd->off + 6298 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6299 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6300 sizeof(struct icmp6_hdr))) { 6301 pd->off = pd->fragoff; 6302 pd->proto = IPPROTO_FRAGMENT; 6303 } 6304 /* FALLTHROUGH */ 6305 default: 6306 return (PF_PASS); 6307 } 6308 } 6309 } 6310 #endif /* INET6 */ 6311 6312 int 6313 pf_setup_pdesc(struct pf_pdesc *pd, void *pdhdrs, sa_family_t af, int dir, 6314 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6315 { 6316 bzero(pd, sizeof(*pd)); 6317 pd->hdr.any = pdhdrs; 6318 pd->dir = dir; 6319 pd->kif = kif; /* kif is NULL when called by pflog */ 6320 pd->m = m; 6321 pd->sidx = (dir == PF_IN) ? 0 : 1; 6322 pd->didx = (dir == PF_IN) ? 1 : 0; 6323 pd->af = pd->naf = af; 6324 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 6325 6326 switch (pd->af) { 6327 case AF_INET: { 6328 struct ip *h; 6329 6330 /* Check for illegal packets */ 6331 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6332 REASON_SET(reason, PFRES_SHORT); 6333 return (PF_DROP); 6334 } 6335 6336 h = mtod(pd->m, struct ip *); 6337 pd->off = h->ip_hl << 2; 6338 6339 if (pd->off < sizeof(struct ip) || 6340 pd->off > ntohs(h->ip_len) || 6341 pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6342 REASON_SET(reason, PFRES_SHORT); 6343 return (PF_DROP); 6344 } 6345 6346 pd->src = (struct pf_addr *)&h->ip_src; 6347 pd->dst = (struct pf_addr *)&h->ip_dst; 6348 pd->virtual_proto = pd->proto = h->ip_p; 6349 pd->tot_len = ntohs(h->ip_len); 6350 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6351 pd->ttl = h->ip_ttl; 6352 if (h->ip_hl > 5) /* has options */ 6353 pd->badopts++; 6354 6355 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) 6356 pd->virtual_proto = PF_VPROTO_FRAGMENT; 6357 6358 break; 6359 } 6360 #ifdef INET6 6361 case AF_INET6: { 6362 struct ip6_hdr *h; 6363 6364 /* Check for illegal packets */ 6365 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6366 REASON_SET(reason, PFRES_SHORT); 6367 return (PF_DROP); 6368 } 6369 6370 h = mtod(pd->m, struct ip6_hdr *); 6371 pd->off = 0; 6372 6373 if (pd->m->m_pkthdr.len < 6374 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6375 REASON_SET(reason, PFRES_SHORT); 6376 return (PF_DROP); 6377 } 6378 6379 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6380 return (PF_DROP); 6381 6382 #if 1 6383 /* 6384 * we do not support jumbogram yet. if we keep going, zero 6385 * ip6_plen will do something bad, so drop the packet for now. 6386 */ 6387 if (pd->jumbolen != 0) { 6388 REASON_SET(reason, PFRES_NORM); 6389 return (PF_DROP); 6390 } 6391 #endif /* 1 */ 6392 6393 pd->src = (struct pf_addr *)&h->ip6_src; 6394 pd->dst = (struct pf_addr *)&h->ip6_dst; 6395 pd->virtual_proto = pd->proto; 6396 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6397 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 6398 pd->ttl = h->ip6_hlim; 6399 6400 if (pd->fragoff != 0) 6401 pd->virtual_proto = PF_VPROTO_FRAGMENT; 6402 6403 break; 6404 } 6405 #endif /* INET6 */ 6406 default: 6407 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6408 6409 } 6410 6411 PF_ACPY(&pd->nsaddr, pd->src, pd->af); 6412 PF_ACPY(&pd->ndaddr, pd->dst, pd->af); 6413 6414 switch (pd->virtual_proto) { 6415 case IPPROTO_TCP: { 6416 struct tcphdr *th = pd->hdr.tcp; 6417 6418 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6419 NULL, reason, pd->af)) 6420 return (PF_DROP); 6421 pd->hdrlen = sizeof(*th); 6422 if (pd->off + (th->th_off << 2) > pd->tot_len || 6423 (th->th_off << 2) < sizeof(struct tcphdr)) { 6424 REASON_SET(reason, PFRES_SHORT); 6425 return (PF_DROP); 6426 } 6427 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6428 pd->sport = &th->th_sport; 6429 pd->dport = &th->th_dport; 6430 pd->pcksum = &th->th_sum; 6431 break; 6432 } 6433 case IPPROTO_UDP: { 6434 struct udphdr *uh = pd->hdr.udp; 6435 6436 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6437 NULL, reason, pd->af)) 6438 return (PF_DROP); 6439 pd->hdrlen = sizeof(*uh); 6440 if (uh->uh_dport == 0 || 6441 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6442 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6443 REASON_SET(reason, PFRES_SHORT); 6444 return (PF_DROP); 6445 } 6446 pd->sport = &uh->uh_sport; 6447 pd->dport = &uh->uh_dport; 6448 pd->pcksum = &uh->uh_sum; 6449 break; 6450 } 6451 case IPPROTO_ICMP: { 6452 if (!pf_pull_hdr(pd->m, pd->off, pd->hdr.icmp, ICMP_MINLEN, 6453 NULL, reason, pd->af)) 6454 return (PF_DROP); 6455 pd->hdrlen = ICMP_MINLEN; 6456 if (pd->off + pd->hdrlen > pd->tot_len) { 6457 REASON_SET(reason, PFRES_SHORT); 6458 return (PF_DROP); 6459 } 6460 pd->pcksum = &pd->hdr.icmp->icmp_cksum; 6461 break; 6462 } 6463 #ifdef INET6 6464 case IPPROTO_ICMPV6: { 6465 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6466 6467 if (!pf_pull_hdr(pd->m, pd->off, pd->hdr.icmp6, icmp_hlen, 6468 NULL, reason, pd->af)) 6469 return (PF_DROP); 6470 /* ICMP headers we look further into to match state */ 6471 switch (pd->hdr.icmp6->icmp6_type) { 6472 case MLD_LISTENER_QUERY: 6473 case MLD_LISTENER_REPORT: 6474 icmp_hlen = sizeof(struct mld_hdr); 6475 break; 6476 case ND_NEIGHBOR_SOLICIT: 6477 case ND_NEIGHBOR_ADVERT: 6478 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6479 break; 6480 } 6481 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6482 !pf_pull_hdr(pd->m, pd->off, pd->hdr.icmp6, icmp_hlen, 6483 NULL, reason, pd->af)) 6484 return (PF_DROP); 6485 pd->hdrlen = icmp_hlen; 6486 if (pd->off + pd->hdrlen > pd->tot_len) { 6487 REASON_SET(reason, PFRES_SHORT); 6488 return (PF_DROP); 6489 } 6490 pd->pcksum = &pd->hdr.icmp6->icmp6_cksum; 6491 break; 6492 } 6493 #endif /* INET6 */ 6494 } 6495 6496 if (pd->sport) 6497 pd->osport = pd->nsport = *pd->sport; 6498 if (pd->dport) 6499 pd->odport = pd->ndport = *pd->dport; 6500 6501 return (PF_PASS); 6502 } 6503 6504 void 6505 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6506 struct pf_rule *r, struct pf_rule *a) 6507 { 6508 int dirndx; 6509 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6510 [action != PF_PASS] += pd->tot_len; 6511 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6512 [action != PF_PASS]++; 6513 6514 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6515 dirndx = (pd->dir == PF_OUT); 6516 r->packets[dirndx]++; 6517 r->bytes[dirndx] += pd->tot_len; 6518 if (a != NULL) { 6519 a->packets[dirndx]++; 6520 a->bytes[dirndx] += pd->tot_len; 6521 } 6522 if (s != NULL) { 6523 struct pf_rule_item *ri; 6524 struct pf_sn_item *sni; 6525 6526 SLIST_FOREACH(sni, &s->src_nodes, next) { 6527 sni->sn->packets[dirndx]++; 6528 sni->sn->bytes[dirndx] += pd->tot_len; 6529 } 6530 dirndx = (pd->dir == s->direction) ? 0 : 1; 6531 s->packets[dirndx]++; 6532 s->bytes[dirndx] += pd->tot_len; 6533 6534 SLIST_FOREACH(ri, &s->match_rules, entry) { 6535 ri->r->packets[dirndx]++; 6536 ri->r->bytes[dirndx] += pd->tot_len; 6537 6538 if (ri->r->src.addr.type == PF_ADDR_TABLE) 6539 pfr_update_stats(ri->r->src.addr.p.tbl, 6540 &s->key[(s->direction == PF_IN)]-> 6541 addr[(s->direction == PF_OUT)], 6542 pd, ri->r->action, ri->r->src.neg); 6543 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 6544 pfr_update_stats(ri->r->dst.addr.p.tbl, 6545 &s->key[(s->direction == PF_IN)]-> 6546 addr[(s->direction == PF_IN)], 6547 pd, ri->r->action, ri->r->dst.neg); 6548 } 6549 } 6550 if (r->src.addr.type == PF_ADDR_TABLE) 6551 pfr_update_stats(r->src.addr.p.tbl, 6552 (s == NULL) ? pd->src : 6553 &s->key[(s->direction == PF_IN)]-> 6554 addr[(s->direction == PF_OUT)], 6555 pd, r->action, r->src.neg); 6556 if (r->dst.addr.type == PF_ADDR_TABLE) 6557 pfr_update_stats(r->dst.addr.p.tbl, 6558 (s == NULL) ? pd->dst : 6559 &s->key[(s->direction == PF_IN)]-> 6560 addr[(s->direction == PF_IN)], 6561 pd, r->action, r->dst.neg); 6562 } 6563 } 6564 6565 int 6566 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 6567 { 6568 struct pfi_kif *kif; 6569 u_short action, reason = 0; 6570 struct pf_rule *a = NULL, *r = &pf_default_rule; 6571 struct pf_state *s = NULL; 6572 struct pf_ruleset *ruleset = NULL; 6573 struct pf_pdesc pd; 6574 union pf_headers pdhdrs; 6575 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6576 u_int32_t qid, pqid = 0; 6577 6578 if (!pf_status.running) 6579 return (PF_PASS); 6580 6581 #if NCARP > 0 6582 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6583 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6584 else 6585 #endif /* NCARP */ 6586 kif = (struct pfi_kif *)ifp->if_pf_kif; 6587 6588 if (kif == NULL) { 6589 DPFPRINTF(LOG_ERR, 6590 "pf_test: kif == NULL, if_xname %s", ifp->if_xname); 6591 return (PF_DROP); 6592 } 6593 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6594 return (PF_PASS); 6595 6596 #ifdef DIAGNOSTIC 6597 if (((*m0)->m_flags & M_PKTHDR) == 0) 6598 panic("non-M_PKTHDR is passed to pf_test"); 6599 #endif /* DIAGNOSTIC */ 6600 6601 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 6602 return (PF_PASS); 6603 6604 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) 6605 return (PF_PASS); 6606 6607 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 6608 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 6609 return (PF_PASS); 6610 } 6611 6612 action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, &reason); 6613 if (action != PF_PASS) { 6614 #if NPFLOG > 0 6615 pd.pflog |= PF_LOG_FORCE; 6616 #endif /* NPFLOG > 0 */ 6617 goto done; 6618 } 6619 6620 /* packet normalization and reassembly */ 6621 switch (pd.af) { 6622 case AF_INET: 6623 action = pf_normalize_ip(&pd, &reason); 6624 break; 6625 #ifdef INET6 6626 case AF_INET6: 6627 action = pf_normalize_ip6(&pd, &reason); 6628 break; 6629 #endif /* INET6 */ 6630 } 6631 *m0 = pd.m; 6632 /* if packet sits in reassembly queue, return without error */ 6633 if (pd.m == NULL) 6634 return PF_PASS; 6635 if (action != PF_PASS) { 6636 #if NPFLOG > 0 6637 pd.pflog |= PF_LOG_FORCE; 6638 #endif /* NPFLOG > 0 */ 6639 goto done; 6640 } 6641 6642 /* if packet has been reassembled, update packet description */ 6643 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 6644 action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, 6645 &reason); 6646 if (action != PF_PASS) { 6647 #if NPFLOG > 0 6648 pd.pflog |= PF_LOG_FORCE; 6649 #endif /* NPFLOG > 0 */ 6650 goto done; 6651 } 6652 } 6653 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 6654 6655 switch (pd.virtual_proto) { 6656 6657 case PF_VPROTO_FRAGMENT: { 6658 /* 6659 * handle fragments that aren't reassembled by 6660 * normalization 6661 */ 6662 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); 6663 if (action != PF_PASS) 6664 REASON_SET(&reason, PFRES_FRAG); 6665 break; 6666 } 6667 6668 case IPPROTO_ICMP: { 6669 if (pd.af != AF_INET) { 6670 action = PF_DROP; 6671 REASON_SET(&reason, PFRES_NORM); 6672 DPFPRINTF(LOG_NOTICE, 6673 "dropping IPv6 packet with ICMPv4 payload"); 6674 goto done; 6675 } 6676 action = pf_test_state_icmp(&pd, &s, &reason); 6677 if (action == PF_PASS || action == PF_AFRT) { 6678 #if NPFSYNC > 0 6679 pfsync_update_state(s); 6680 #endif /* NPFSYNC > 0 */ 6681 r = s->rule.ptr; 6682 a = s->anchor.ptr; 6683 #if NPFLOG > 0 6684 pd.pflog |= s->log; 6685 #endif /* NPFLOG > 0 */ 6686 } else if (s == NULL) 6687 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6688 &reason); 6689 break; 6690 } 6691 6692 #ifdef INET6 6693 case IPPROTO_ICMPV6: { 6694 if (pd.af != AF_INET6) { 6695 action = PF_DROP; 6696 REASON_SET(&reason, PFRES_NORM); 6697 DPFPRINTF(LOG_NOTICE, 6698 "dropping IPv4 packet with ICMPv6 payload"); 6699 goto done; 6700 } 6701 action = pf_test_state_icmp(&pd, &s, &reason); 6702 if (action == PF_PASS || action == PF_AFRT) { 6703 #if NPFSYNC > 0 6704 pfsync_update_state(s); 6705 #endif /* NPFSYNC > 0 */ 6706 r = s->rule.ptr; 6707 a = s->anchor.ptr; 6708 #if NPFLOG > 0 6709 pd.pflog |= s->log; 6710 #endif /* NPFLOG > 0 */ 6711 } else if (s == NULL) 6712 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6713 &reason); 6714 break; 6715 } 6716 #endif /* INET6 */ 6717 6718 default: 6719 if (pd.virtual_proto == IPPROTO_TCP) { 6720 if ((pd.hdr.tcp->th_flags & TH_ACK) && pd.p_len == 0) 6721 pqid = 1; 6722 action = pf_normalize_tcp(&pd); 6723 if (action == PF_DROP) 6724 goto done; 6725 } 6726 action = pf_test_state(&pd, &s, &reason); 6727 if (action == PF_PASS || action == PF_AFRT) { 6728 #if NPFSYNC > 0 6729 pfsync_update_state(s); 6730 #endif /* NPFSYNC > 0 */ 6731 r = s->rule.ptr; 6732 a = s->anchor.ptr; 6733 #if NPFLOG > 0 6734 pd.pflog |= s->log; 6735 #endif /* NPFLOG > 0 */ 6736 } else if (s == NULL) 6737 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6738 &reason); 6739 6740 if (pd.virtual_proto == IPPROTO_TCP) { 6741 if (s) { 6742 if (s->max_mss) 6743 pf_normalize_mss(&pd, s->max_mss); 6744 } else if (r->max_mss) 6745 pf_normalize_mss(&pd, r->max_mss); 6746 } 6747 6748 break; 6749 } 6750 6751 done: 6752 if (action != PF_DROP) { 6753 if (s) { 6754 /* The non-state case is handled in pf_test_rule() */ 6755 if (action == PF_PASS && pd.badopts && 6756 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 6757 action = PF_DROP; 6758 REASON_SET(&reason, PFRES_IPOPTIONS); 6759 #if NPFLOG > 0 6760 pd.pflog |= PF_LOG_FORCE; 6761 #endif /* NPFLOG > 0 */ 6762 DPFPRINTF(LOG_NOTICE, "dropping packet with " 6763 "ip/ipv6 options in pf_test()"); 6764 } 6765 6766 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 6767 s->set_tos); 6768 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 6769 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 6770 qid = s->pqid; 6771 if (s->state_flags & PFSTATE_SETPRIO) 6772 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 6773 } else { 6774 qid = s->qid; 6775 if (s->state_flags & PFSTATE_SETPRIO) 6776 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 6777 } 6778 } else { 6779 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 6780 r->set_tos); 6781 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 6782 qid = r->pqid; 6783 if (r->scrub_flags & PFSTATE_SETPRIO) 6784 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 6785 } else { 6786 qid = r->qid; 6787 if (r->scrub_flags & PFSTATE_SETPRIO) 6788 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 6789 } 6790 } 6791 } 6792 6793 if (action == PF_PASS && qid) 6794 pd.m->m_pkthdr.pf.qid = qid; 6795 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) { 6796 /* 6797 * Check below fires whenever caller forgets to call 6798 * pf_pkt_addr_changed(). This might happen when we 6799 * deal with IP tunnels. 6800 */ 6801 if (pd.m->m_pkthdr.pf.statekey != NULL) { 6802 #ifdef DDB 6803 m_print(pd.m, printf); 6804 #endif 6805 panic("incoming mbuf already has a statekey"); 6806 } 6807 pd.m->m_pkthdr.pf.statekey = 6808 pf_state_key_ref(s->key[PF_SK_STACK]); 6809 } 6810 if (pd.dir == PF_OUT && 6811 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 6812 s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) { 6813 pd.m->m_pkthdr.pf.inp->inp_pf_sk = 6814 pf_state_key_ref(s->key[PF_SK_STACK]); 6815 s->key[PF_SK_STACK]->inp = pd.m->m_pkthdr.pf.inp; 6816 } 6817 6818 if (s) { 6819 pd.m->m_pkthdr.ph_flowid = M_FLOWID_VALID | 6820 (M_FLOWID_MASK & bemtoh64(&s->id)); 6821 } 6822 6823 /* 6824 * connections redirected to loopback should not match sockets 6825 * bound specifically to loopback due to security implications, 6826 * see tcp_input() and in_pcblookup_listen(). 6827 */ 6828 if (pd.destchg) 6829 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 6830 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 6831 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 6832 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 6833 /* We need to redo the route lookup on outgoing routes. */ 6834 if (pd.destchg && pd.dir == PF_OUT) 6835 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 6836 6837 if (pd.dir == PF_IN && action == PF_PASS && r->divert.port) { 6838 struct pf_divert *divert; 6839 6840 if ((divert = pf_get_divert(pd.m))) { 6841 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 6842 divert->port = r->divert.port; 6843 divert->rdomain = pd.rdomain; 6844 divert->addr = r->divert.addr; 6845 } 6846 } 6847 6848 if (action == PF_PASS && r->divert_packet.port) 6849 action = PF_DIVERT; 6850 6851 #if NPFLOG > 0 6852 if (pd.pflog) { 6853 struct pf_rule_item *ri; 6854 6855 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 6856 PFLOG_PACKET(&pd, reason, r, a, ruleset, NULL); 6857 if (s) { 6858 SLIST_FOREACH(ri, &s->match_rules, entry) 6859 if (ri->r->log & PF_LOG_ALL) 6860 PFLOG_PACKET(&pd, reason, ri->r, a, 6861 ruleset, NULL); 6862 } 6863 } 6864 #endif /* NPFLOG > 0 */ 6865 6866 pf_counters_inc(action, &pd, s, r, a); 6867 6868 switch (action) { 6869 case PF_SYNPROXY_DROP: 6870 m_freem(*m0); 6871 case PF_DEFER: 6872 *m0 = NULL; 6873 action = PF_PASS; 6874 break; 6875 case PF_DIVERT: 6876 switch (pd.af) { 6877 case AF_INET: 6878 if (!divert_packet(pd.m, pd.dir, r->divert_packet.port)) 6879 *m0 = NULL; 6880 break; 6881 #ifdef INET6 6882 case AF_INET6: 6883 if (!divert6_packet(pd.m, pd.dir, 6884 r->divert_packet.port)) 6885 *m0 = NULL; 6886 break; 6887 #endif /* INET6 */ 6888 } 6889 action = PF_PASS; 6890 break; 6891 #ifdef INET6 6892 case PF_AFRT: 6893 if (pf_translate_af(&pd)) { 6894 if (!pd.m) 6895 *m0 = NULL; 6896 action = PF_DROP; 6897 break; 6898 } 6899 if (pd.naf == AF_INET) 6900 pf_route(&pd.m, r, dir, kif->pfik_ifp, s); 6901 if (pd.naf == AF_INET6) 6902 pf_route6(&pd.m, r, dir, kif->pfik_ifp, s); 6903 *m0 = NULL; 6904 action = PF_PASS; 6905 break; 6906 #endif /* INET6 */ 6907 case PF_DROP: 6908 m_freem(*m0); 6909 *m0 = NULL; 6910 break; 6911 default: 6912 /* pf_route can free the mbuf causing *m0 to become NULL */ 6913 if (r->rt) { 6914 switch (pd.af) { 6915 case AF_INET: 6916 pf_route(m0, r, pd.dir, pd.kif->pfik_ifp, s); 6917 break; 6918 #ifdef INET6 6919 case AF_INET6: 6920 pf_route6(m0, r, pd.dir, pd.kif->pfik_ifp, s); 6921 break; 6922 #endif /* INET6 */ 6923 } 6924 } 6925 break; 6926 } 6927 6928 #ifdef INET6 6929 /* if reassembled packet passed, create new fragments */ 6930 if (pf_status.reass && action == PF_PASS && *m0 && fwdir == PF_FWD) { 6931 struct m_tag *mtag; 6932 6933 if ((mtag = m_tag_find(*m0, PACKET_TAG_PF_REASSEMBLED, NULL))) 6934 action = pf_refragment6(m0, mtag, NULL, NULL); 6935 } 6936 #endif /* INET6 */ 6937 if (s && action != PF_DROP) { 6938 if (!s->if_index_in && dir == PF_IN) 6939 s->if_index_in = ifp->if_index; 6940 else if (!s->if_index_out && dir == PF_OUT) 6941 s->if_index_out = ifp->if_index; 6942 } 6943 6944 return (action); 6945 } 6946 6947 int 6948 pf_ouraddr(struct mbuf *m) 6949 { 6950 struct pf_state_key *sk; 6951 6952 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 6953 return (1); 6954 6955 sk = m->m_pkthdr.pf.statekey; 6956 if (sk != NULL) { 6957 if (sk->inp != NULL) 6958 return (1); 6959 6960 /* If we have linked state keys it is certainly forwarded. */ 6961 if (sk->reverse != NULL) 6962 return (0); 6963 } 6964 6965 return (-1); 6966 } 6967 6968 /* 6969 * must be called whenever any addressing information such as 6970 * address, port, protocol has changed 6971 */ 6972 void 6973 pf_pkt_addr_changed(struct mbuf *m) 6974 { 6975 pf_pkt_unlink_state_key(m); 6976 m->m_pkthdr.pf.inp = NULL; 6977 } 6978 6979 struct inpcb * 6980 pf_inp_lookup(struct mbuf *m) 6981 { 6982 struct inpcb *inp = NULL; 6983 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 6984 6985 if (!pf_state_key_isvalid(sk)) 6986 pf_pkt_unlink_state_key(m); 6987 else 6988 inp = m->m_pkthdr.pf.statekey->inp; 6989 6990 if (inp && inp->inp_pf_sk) 6991 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 6992 6993 return (inp); 6994 } 6995 6996 void 6997 pf_inp_link(struct mbuf *m, struct inpcb *inp) 6998 { 6999 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7000 7001 if (!pf_state_key_isvalid(sk)) { 7002 pf_pkt_unlink_state_key(m); 7003 return; 7004 } 7005 7006 /* 7007 * we don't need to grab PF-lock here. At worst case we link inp to 7008 * state, which might be just being marked as deleted by another 7009 * thread. 7010 */ 7011 if (inp && !sk->inp && !inp->inp_pf_sk) { 7012 sk->inp = inp; 7013 inp->inp_pf_sk = pf_state_key_ref(sk); 7014 } 7015 /* The statekey has finished finding the inp, it is no longer needed. */ 7016 pf_pkt_unlink_state_key(m); 7017 } 7018 7019 void 7020 pf_inp_unlink(struct inpcb *inp) 7021 { 7022 if (inp->inp_pf_sk) { 7023 inp->inp_pf_sk->inp = NULL; 7024 pf_inpcb_unlink_state_key(inp); 7025 } 7026 } 7027 7028 void 7029 pf_state_key_link(struct pf_state_key *sk, struct pf_state_key *pkt_sk) 7030 { 7031 /* 7032 * Assert will not wire as long as we are called by pf_find_state() 7033 */ 7034 KASSERT((pkt_sk->reverse == NULL) && (sk->reverse == NULL)); 7035 pkt_sk->reverse = pf_state_key_ref(sk); 7036 sk->reverse = pf_state_key_ref(pkt_sk); 7037 } 7038 7039 #if NPFLOG > 0 7040 void 7041 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 7042 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 7043 { 7044 struct pf_rule_item *ri; 7045 7046 /* if this is the log(matches) rule, packet has been logged already */ 7047 if (rm->log & PF_LOG_MATCHES) 7048 return; 7049 7050 SLIST_FOREACH(ri, matchrules, entry) 7051 if (ri->r->log & PF_LOG_MATCHES) 7052 PFLOG_PACKET(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 7053 } 7054 #endif /* NPFLOG > 0 */ 7055 7056 struct pf_state_key * 7057 pf_state_key_ref(struct pf_state_key *sk) 7058 { 7059 if (sk != NULL) 7060 PF_REF_TAKE(sk->refcnt); 7061 7062 return (sk); 7063 } 7064 7065 void 7066 pf_state_key_unref(struct pf_state_key *sk) 7067 { 7068 if ((sk != NULL) && PF_REF_RELE(sk->refcnt)) { 7069 /* state key must be removed from tree */ 7070 KASSERT(!pf_state_key_isvalid(sk)); 7071 /* state key must be unlinked from reverse key */ 7072 KASSERT(sk->reverse == NULL); 7073 /* state key must be unlinked from socket */ 7074 KASSERT((sk->inp == NULL) || (sk->inp->inp_pf_sk == NULL)); 7075 sk->inp = NULL; 7076 pool_put(&pf_state_key_pl, sk); 7077 } 7078 } 7079 7080 int 7081 pf_state_key_isvalid(struct pf_state_key *sk) 7082 { 7083 return ((sk != NULL) && (sk->removed == 0)); 7084 } 7085 7086 void 7087 pf_pkt_unlink_state_key(struct mbuf *m) 7088 { 7089 pf_state_key_unref(m->m_pkthdr.pf.statekey); 7090 m->m_pkthdr.pf.statekey = NULL; 7091 } 7092 7093 void 7094 pf_pkt_state_key_ref(struct mbuf *m) 7095 { 7096 pf_state_key_ref(m->m_pkthdr.pf.statekey); 7097 } 7098 7099 void 7100 pf_inpcb_unlink_state_key(struct inpcb *inp) 7101 { 7102 if (inp != NULL) { 7103 pf_state_key_unref(inp->inp_pf_sk); 7104 inp->inp_pf_sk = NULL; 7105 } 7106 } 7107 7108 void 7109 pf_state_key_unlink_reverse(struct pf_state_key *sk) 7110 { 7111 if ((sk != NULL) && (sk->reverse != NULL)) { 7112 pf_state_key_unref(sk->reverse->reverse); 7113 sk->reverse->reverse = NULL; 7114 pf_state_key_unref(sk->reverse); 7115 sk->reverse = NULL; 7116 } 7117 } 7118