1 /* $OpenBSD: pf.c,v 1.1094 2020/07/24 18:17:15 mvs Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/in_pcb.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp_var.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_timer.h> 74 #include <netinet/tcp_var.h> 75 #include <netinet/tcp_fsm.h> 76 #include <netinet/udp.h> 77 #include <netinet/udp_var.h> 78 #include <netinet/ip_divert.h> 79 80 #ifdef INET6 81 #include <netinet6/in6_var.h> 82 #include <netinet/ip6.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet/icmp6.h> 85 #include <netinet6/nd6.h> 86 #include <netinet6/ip6_divert.h> 87 #endif /* INET6 */ 88 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 92 #if NPFLOG > 0 93 #include <net/if_pflog.h> 94 #endif /* NPFLOG > 0 */ 95 96 #if NPFLOW > 0 97 #include <net/if_pflow.h> 98 #endif /* NPFLOW > 0 */ 99 100 #if NPFSYNC > 0 101 #include <net/if_pfsync.h> 102 #endif /* NPFSYNC > 0 */ 103 104 #ifdef DDB 105 #include <machine/db_machdep.h> 106 #include <ddb/db_interface.h> 107 #endif 108 109 /* 110 * Global variables 111 */ 112 struct pf_state_tree pf_statetbl; 113 struct pf_queuehead pf_queues[2]; 114 struct pf_queuehead *pf_queues_active; 115 struct pf_queuehead *pf_queues_inactive; 116 117 struct pf_status pf_status; 118 119 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 120 121 SHA2_CTX pf_tcp_secret_ctx; 122 u_char pf_tcp_secret[16]; 123 int pf_tcp_secret_init; 124 int pf_tcp_iss_off; 125 126 int pf_npurge; 127 struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); 128 struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); 129 130 enum pf_test_status { 131 PF_TEST_FAIL = -1, 132 PF_TEST_OK, 133 PF_TEST_QUICK 134 }; 135 136 struct pf_test_ctx { 137 enum pf_test_status test_status; 138 struct pf_pdesc *pd; 139 struct pf_rule_actions act; 140 u_int8_t icmpcode; 141 u_int8_t icmptype; 142 int icmp_dir; 143 int state_icmp; 144 int tag; 145 u_short reason; 146 struct pf_rule_item *ri; 147 struct pf_src_node *sns[PF_SN_MAX]; 148 struct pf_rule_slist rules; 149 struct pf_rule *nr; 150 struct pf_rule **rm; 151 struct pf_rule *a; 152 struct pf_rule **am; 153 struct pf_ruleset **rsm; 154 struct pf_ruleset *arsm; 155 struct pf_ruleset *aruleset; 156 struct tcphdr *th; 157 int depth; 158 }; 159 160 #define PF_ANCHOR_STACK_MAX 64 161 162 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 163 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 164 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 165 166 void pf_add_threshold(struct pf_threshold *); 167 int pf_check_threshold(struct pf_threshold *); 168 int pf_check_tcp_cksum(struct mbuf *, int, int, 169 sa_family_t); 170 static __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 171 u_int8_t); 172 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 173 const struct pf_addr *, sa_family_t, u_int8_t); 174 int pf_modulate_sack(struct pf_pdesc *, 175 struct pf_state_peer *); 176 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 177 u_int16_t *, u_int16_t *); 178 int pf_change_icmp_af(struct mbuf *, int, 179 struct pf_pdesc *, struct pf_pdesc *, 180 struct pf_addr *, struct pf_addr *, sa_family_t, 181 sa_family_t); 182 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 183 struct pf_addr *); 184 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 185 u_int16_t *, struct pf_addr *, struct pf_addr *, 186 u_int16_t); 187 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 188 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 189 sa_family_t, struct pf_rule *, u_int); 190 void pf_detach_state(struct pf_state *); 191 void pf_state_key_detach(struct pf_state *, int); 192 u_int32_t pf_tcp_iss(struct pf_pdesc *); 193 void pf_rule_to_actions(struct pf_rule *, 194 struct pf_rule_actions *); 195 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 196 struct pf_state **, struct pf_rule **, 197 struct pf_ruleset **, u_short *); 198 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 199 struct pf_rule *, struct pf_rule *, 200 struct pf_state_key **, struct pf_state_key **, 201 int *, struct pf_state **, int, 202 struct pf_rule_slist *, struct pf_rule_actions *, 203 struct pf_src_node *[]); 204 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 205 int, struct pf_addr *, int, struct pf_addr *, 206 int, int); 207 int pf_state_key_setup(struct pf_pdesc *, struct 208 pf_state_key **, struct pf_state_key **, int); 209 int pf_tcp_track_full(struct pf_pdesc *, 210 struct pf_state **, u_short *, int *, int); 211 int pf_tcp_track_sloppy(struct pf_pdesc *, 212 struct pf_state **, u_short *); 213 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 214 u_short *); 215 int pf_test_state(struct pf_pdesc *, struct pf_state **, 216 u_short *, int); 217 int pf_icmp_state_lookup(struct pf_pdesc *, 218 struct pf_state_key_cmp *, struct pf_state **, 219 u_int16_t, u_int16_t, int, int *, int, int); 220 int pf_test_state_icmp(struct pf_pdesc *, 221 struct pf_state **, u_short *); 222 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 223 u_int16_t); 224 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 225 sa_family_t, struct pf_src_node **); 226 struct pf_divert *pf_get_divert(struct mbuf *); 227 int pf_walk_header(struct pf_pdesc *, struct ip *, 228 u_short *); 229 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 230 int, int, u_short *); 231 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 232 u_short *); 233 void pf_print_state_parts(struct pf_state *, 234 struct pf_state_key *, struct pf_state_key *); 235 int pf_addr_wrap_neq(struct pf_addr_wrap *, 236 struct pf_addr_wrap *); 237 int pf_compare_state_keys(struct pf_state_key *, 238 struct pf_state_key *, struct pfi_kif *, u_int); 239 int pf_find_state(struct pf_pdesc *, 240 struct pf_state_key_cmp *, struct pf_state **); 241 int pf_src_connlimit(struct pf_state **); 242 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 243 int pf_step_into_anchor(struct pf_test_ctx *, 244 struct pf_rule *); 245 int pf_match_rule(struct pf_test_ctx *, 246 struct pf_ruleset *); 247 void pf_counters_inc(int, struct pf_pdesc *, 248 struct pf_state *, struct pf_rule *, 249 struct pf_rule *); 250 251 int pf_state_key_isvalid(struct pf_state_key *); 252 struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 253 void pf_state_key_unref(struct pf_state_key *); 254 void pf_state_key_link_reverse(struct pf_state_key *, 255 struct pf_state_key *); 256 void pf_state_key_unlink_reverse(struct pf_state_key *); 257 void pf_state_key_link_inpcb(struct pf_state_key *, 258 struct inpcb *); 259 void pf_state_key_unlink_inpcb(struct pf_state_key *); 260 void pf_inpcb_unlink_state_key(struct inpcb *); 261 void pf_pktenqueue_delayed(void *); 262 263 #if NPFLOG > 0 264 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 265 struct pf_rule *, struct pf_ruleset *, 266 struct pf_rule_slist *); 267 #endif /* NPFLOG > 0 */ 268 269 extern struct pool pfr_ktable_pl; 270 extern struct pool pfr_kentry_pl; 271 272 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 273 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 274 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 275 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 276 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 277 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 278 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS } 279 }; 280 281 #define BOUND_IFACE(r, k) \ 282 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 283 284 #define STATE_INC_COUNTERS(s) \ 285 do { \ 286 struct pf_rule_item *mrm; \ 287 s->rule.ptr->states_cur++; \ 288 s->rule.ptr->states_tot++; \ 289 if (s->anchor.ptr != NULL) { \ 290 s->anchor.ptr->states_cur++; \ 291 s->anchor.ptr->states_tot++; \ 292 } \ 293 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 294 mrm->r->states_cur++; \ 295 } while (0) 296 297 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 298 static __inline int pf_state_compare_key(struct pf_state_key *, 299 struct pf_state_key *); 300 static __inline int pf_state_compare_id(struct pf_state *, 301 struct pf_state *); 302 #ifdef INET6 303 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 304 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 305 #endif /* INET6 */ 306 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 307 308 struct pf_src_tree tree_src_tracking; 309 310 struct pf_state_tree_id tree_id; 311 struct pf_state_queue state_list; 312 313 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 314 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 315 RB_GENERATE(pf_state_tree_id, pf_state, 316 entry_id, pf_state_compare_id); 317 318 SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl = 319 SLIST_HEAD_INITIALIZER(pf_rule_gcl); 320 321 __inline int 322 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 323 { 324 switch (af) { 325 case AF_INET: 326 if (a->addr32[0] > b->addr32[0]) 327 return (1); 328 if (a->addr32[0] < b->addr32[0]) 329 return (-1); 330 break; 331 #ifdef INET6 332 case AF_INET6: 333 if (a->addr32[3] > b->addr32[3]) 334 return (1); 335 if (a->addr32[3] < b->addr32[3]) 336 return (-1); 337 if (a->addr32[2] > b->addr32[2]) 338 return (1); 339 if (a->addr32[2] < b->addr32[2]) 340 return (-1); 341 if (a->addr32[1] > b->addr32[1]) 342 return (1); 343 if (a->addr32[1] < b->addr32[1]) 344 return (-1); 345 if (a->addr32[0] > b->addr32[0]) 346 return (1); 347 if (a->addr32[0] < b->addr32[0]) 348 return (-1); 349 break; 350 #endif /* INET6 */ 351 } 352 return (0); 353 } 354 355 static __inline int 356 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 357 { 358 int diff; 359 360 if (a->rule.ptr > b->rule.ptr) 361 return (1); 362 if (a->rule.ptr < b->rule.ptr) 363 return (-1); 364 if ((diff = a->type - b->type) != 0) 365 return (diff); 366 if ((diff = a->af - b->af) != 0) 367 return (diff); 368 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 369 return (diff); 370 return (0); 371 } 372 373 static __inline void 374 pf_set_protostate(struct pf_state *s, int which, u_int8_t newstate) 375 { 376 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 377 s->dst.state = newstate; 378 if (which == PF_PEER_DST) 379 return; 380 381 if (s->src.state == newstate) 382 return; 383 if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK] != NULL && 384 s->key[PF_SK_STACK]->proto == IPPROTO_TCP && 385 !(TCPS_HAVEESTABLISHED(s->src.state) || 386 s->src.state == TCPS_CLOSED) && 387 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 388 pf_status.states_halfopen--; 389 390 s->src.state = newstate; 391 } 392 393 void 394 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 395 { 396 switch (af) { 397 case AF_INET: 398 dst->addr32[0] = src->addr32[0]; 399 break; 400 #ifdef INET6 401 case AF_INET6: 402 dst->addr32[0] = src->addr32[0]; 403 dst->addr32[1] = src->addr32[1]; 404 dst->addr32[2] = src->addr32[2]; 405 dst->addr32[3] = src->addr32[3]; 406 break; 407 #endif /* INET6 */ 408 default: 409 unhandled_af(af); 410 } 411 } 412 413 void 414 pf_init_threshold(struct pf_threshold *threshold, 415 u_int32_t limit, u_int32_t seconds) 416 { 417 threshold->limit = limit * PF_THRESHOLD_MULT; 418 threshold->seconds = seconds; 419 threshold->count = 0; 420 threshold->last = getuptime(); 421 } 422 423 void 424 pf_add_threshold(struct pf_threshold *threshold) 425 { 426 u_int32_t t = getuptime(), diff = t - threshold->last; 427 428 if (diff >= threshold->seconds) 429 threshold->count = 0; 430 else 431 threshold->count -= threshold->count * diff / 432 threshold->seconds; 433 threshold->count += PF_THRESHOLD_MULT; 434 threshold->last = t; 435 } 436 437 int 438 pf_check_threshold(struct pf_threshold *threshold) 439 { 440 return (threshold->count > threshold->limit); 441 } 442 443 int 444 pf_src_connlimit(struct pf_state **state) 445 { 446 int bad = 0; 447 struct pf_src_node *sn; 448 449 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 450 return (0); 451 452 sn->conn++; 453 (*state)->src.tcp_est = 1; 454 pf_add_threshold(&sn->conn_rate); 455 456 if ((*state)->rule.ptr->max_src_conn && 457 (*state)->rule.ptr->max_src_conn < sn->conn) { 458 pf_status.lcounters[LCNT_SRCCONN]++; 459 bad++; 460 } 461 462 if ((*state)->rule.ptr->max_src_conn_rate.limit && 463 pf_check_threshold(&sn->conn_rate)) { 464 pf_status.lcounters[LCNT_SRCCONNRATE]++; 465 bad++; 466 } 467 468 if (!bad) 469 return (0); 470 471 if ((*state)->rule.ptr->overload_tbl) { 472 struct pfr_addr p; 473 u_int32_t killed = 0; 474 475 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 476 if (pf_status.debug >= LOG_NOTICE) { 477 log(LOG_NOTICE, 478 "pf: pf_src_connlimit: blocking address "); 479 pf_print_host(&sn->addr, 0, 480 (*state)->key[PF_SK_WIRE]->af); 481 } 482 483 memset(&p, 0, sizeof(p)); 484 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 485 switch ((*state)->key[PF_SK_WIRE]->af) { 486 case AF_INET: 487 p.pfra_net = 32; 488 p.pfra_ip4addr = sn->addr.v4; 489 break; 490 #ifdef INET6 491 case AF_INET6: 492 p.pfra_net = 128; 493 p.pfra_ip6addr = sn->addr.v6; 494 break; 495 #endif /* INET6 */ 496 } 497 498 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 499 &p, gettime()); 500 501 /* kill existing states if that's required. */ 502 if ((*state)->rule.ptr->flush) { 503 struct pf_state_key *sk; 504 struct pf_state *st; 505 506 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 507 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 508 sk = st->key[PF_SK_WIRE]; 509 /* 510 * Kill states from this source. (Only those 511 * from the same rule if PF_FLUSH_GLOBAL is not 512 * set) 513 */ 514 if (sk->af == 515 (*state)->key[PF_SK_WIRE]->af && 516 (((*state)->direction == PF_OUT && 517 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 518 ((*state)->direction == PF_IN && 519 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 520 ((*state)->rule.ptr->flush & 521 PF_FLUSH_GLOBAL || 522 (*state)->rule.ptr == st->rule.ptr)) { 523 st->timeout = PFTM_PURGE; 524 pf_set_protostate(st, PF_PEER_BOTH, 525 TCPS_CLOSED); 526 killed++; 527 } 528 } 529 if (pf_status.debug >= LOG_NOTICE) 530 addlog(", %u states killed", killed); 531 } 532 if (pf_status.debug >= LOG_NOTICE) 533 addlog("\n"); 534 } 535 536 /* kill this state */ 537 (*state)->timeout = PFTM_PURGE; 538 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); 539 return (1); 540 } 541 542 int 543 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 544 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 545 struct pf_addr *raddr, struct pfi_kif *kif) 546 { 547 struct pf_src_node k; 548 549 if (*sn == NULL) { 550 k.af = af; 551 k.type = type; 552 pf_addrcpy(&k.addr, src, af); 553 k.rule.ptr = rule; 554 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 555 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 556 } 557 if (*sn == NULL) { 558 if (!rule->max_src_nodes || 559 rule->src_nodes < rule->max_src_nodes) 560 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 561 else 562 pf_status.lcounters[LCNT_SRCNODES]++; 563 if ((*sn) == NULL) 564 return (-1); 565 566 pf_init_threshold(&(*sn)->conn_rate, 567 rule->max_src_conn_rate.limit, 568 rule->max_src_conn_rate.seconds); 569 570 (*sn)->type = type; 571 (*sn)->af = af; 572 (*sn)->rule.ptr = rule; 573 pf_addrcpy(&(*sn)->addr, src, af); 574 if (raddr) 575 pf_addrcpy(&(*sn)->raddr, raddr, af); 576 if (RB_INSERT(pf_src_tree, 577 &tree_src_tracking, *sn) != NULL) { 578 if (pf_status.debug >= LOG_NOTICE) { 579 log(LOG_NOTICE, 580 "pf: src_tree insert failed: "); 581 pf_print_host(&(*sn)->addr, 0, af); 582 addlog("\n"); 583 } 584 pool_put(&pf_src_tree_pl, *sn); 585 return (-1); 586 } 587 (*sn)->creation = getuptime(); 588 (*sn)->rule.ptr->src_nodes++; 589 if (kif != NULL) { 590 (*sn)->kif = kif; 591 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE); 592 } 593 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 594 pf_status.src_nodes++; 595 } else { 596 if (rule->max_src_states && 597 (*sn)->states >= rule->max_src_states) { 598 pf_status.lcounters[LCNT_SRCSTATES]++; 599 return (-1); 600 } 601 } 602 return (0); 603 } 604 605 void 606 pf_remove_src_node(struct pf_src_node *sn) 607 { 608 if (sn->states > 0 || sn->expire > getuptime()) 609 return; 610 611 sn->rule.ptr->src_nodes--; 612 if (sn->rule.ptr->states_cur == 0 && 613 sn->rule.ptr->src_nodes == 0) 614 pf_rm_rule(NULL, sn->rule.ptr); 615 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 616 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 617 pf_status.src_nodes--; 618 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE); 619 pool_put(&pf_src_tree_pl, sn); 620 } 621 622 struct pf_src_node * 623 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 624 { 625 struct pf_sn_item *sni; 626 627 SLIST_FOREACH(sni, &s->src_nodes, next) 628 if (sni->sn->type == type) 629 return (sni->sn); 630 return (NULL); 631 } 632 633 void 634 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 635 { 636 struct pf_sn_item *sni, *snin, *snip = NULL; 637 638 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 639 snin = SLIST_NEXT(sni, next); 640 if (sni->sn == sn) { 641 if (snip) 642 SLIST_REMOVE_AFTER(snip, next); 643 else 644 SLIST_REMOVE_HEAD(&s->src_nodes, next); 645 pool_put(&pf_sn_item_pl, sni); 646 sni = NULL; 647 sn->states--; 648 } 649 if (sni != NULL) 650 snip = sni; 651 } 652 } 653 654 /* state table stuff */ 655 656 static __inline int 657 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 658 { 659 int diff; 660 661 if ((diff = a->proto - b->proto) != 0) 662 return (diff); 663 if ((diff = a->af - b->af) != 0) 664 return (diff); 665 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 666 return (diff); 667 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 668 return (diff); 669 if ((diff = a->port[0] - b->port[0]) != 0) 670 return (diff); 671 if ((diff = a->port[1] - b->port[1]) != 0) 672 return (diff); 673 if ((diff = a->rdomain - b->rdomain) != 0) 674 return (diff); 675 return (0); 676 } 677 678 static __inline int 679 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 680 { 681 if (a->id > b->id) 682 return (1); 683 if (a->id < b->id) 684 return (-1); 685 if (a->creatorid > b->creatorid) 686 return (1); 687 if (a->creatorid < b->creatorid) 688 return (-1); 689 690 return (0); 691 } 692 693 int 694 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 695 { 696 struct pf_state_item *si; 697 struct pf_state_key *cur; 698 struct pf_state *olds = NULL; 699 700 KASSERT(s->key[idx] == NULL); 701 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 702 /* key exists. check for same kif, if none, add to key */ 703 TAILQ_FOREACH(si, &cur->states, entry) 704 if (si->s->kif == s->kif && 705 ((si->s->key[PF_SK_WIRE]->af == sk->af && 706 si->s->direction == s->direction) || 707 (si->s->key[PF_SK_WIRE]->af != 708 si->s->key[PF_SK_STACK]->af && 709 sk->af == si->s->key[PF_SK_STACK]->af && 710 si->s->direction != s->direction))) { 711 int reuse = 0; 712 713 if (sk->proto == IPPROTO_TCP && 714 si->s->src.state >= TCPS_FIN_WAIT_2 && 715 si->s->dst.state >= TCPS_FIN_WAIT_2) 716 reuse = 1; 717 if (pf_status.debug >= LOG_NOTICE) { 718 log(LOG_NOTICE, 719 "pf: %s key attach %s on %s: ", 720 (idx == PF_SK_WIRE) ? 721 "wire" : "stack", 722 reuse ? "reuse" : "failed", 723 s->kif->pfik_name); 724 pf_print_state_parts(s, 725 (idx == PF_SK_WIRE) ? sk : NULL, 726 (idx == PF_SK_STACK) ? sk : NULL); 727 addlog(", existing: "); 728 pf_print_state_parts(si->s, 729 (idx == PF_SK_WIRE) ? sk : NULL, 730 (idx == PF_SK_STACK) ? sk : NULL); 731 addlog("\n"); 732 } 733 if (reuse) { 734 pf_set_protostate(si->s, PF_PEER_BOTH, 735 TCPS_CLOSED); 736 /* remove late or sks can go away */ 737 olds = si->s; 738 } else { 739 pool_put(&pf_state_key_pl, sk); 740 return (-1); /* collision! */ 741 } 742 } 743 pool_put(&pf_state_key_pl, sk); 744 s->key[idx] = cur; 745 } else 746 s->key[idx] = sk; 747 748 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 749 pf_state_key_detach(s, idx); 750 return (-1); 751 } 752 si->s = s; 753 754 /* list is sorted, if-bound states before floating */ 755 if (s->kif == pfi_all) 756 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 757 else 758 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 759 760 if (olds) 761 pf_remove_state(olds); 762 763 return (0); 764 } 765 766 void 767 pf_detach_state(struct pf_state *s) 768 { 769 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 770 s->key[PF_SK_WIRE] = NULL; 771 772 if (s->key[PF_SK_STACK] != NULL) 773 pf_state_key_detach(s, PF_SK_STACK); 774 775 if (s->key[PF_SK_WIRE] != NULL) 776 pf_state_key_detach(s, PF_SK_WIRE); 777 } 778 779 void 780 pf_state_key_detach(struct pf_state *s, int idx) 781 { 782 struct pf_state_item *si; 783 struct pf_state_key *sk; 784 785 if (s->key[idx] == NULL) 786 return; 787 788 si = TAILQ_FIRST(&s->key[idx]->states); 789 while (si && si->s != s) 790 si = TAILQ_NEXT(si, entry); 791 792 if (si) { 793 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 794 pool_put(&pf_state_item_pl, si); 795 } 796 797 sk = s->key[idx]; 798 s->key[idx] = NULL; 799 if (TAILQ_EMPTY(&sk->states)) { 800 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 801 sk->removed = 1; 802 pf_state_key_unlink_reverse(sk); 803 pf_state_key_unlink_inpcb(sk); 804 pf_state_key_unref(sk); 805 } 806 } 807 808 struct pf_state_key * 809 pf_alloc_state_key(int pool_flags) 810 { 811 struct pf_state_key *sk; 812 813 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 814 return (NULL); 815 TAILQ_INIT(&sk->states); 816 817 return (sk); 818 } 819 820 static __inline int 821 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 822 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 823 { 824 struct pf_state_key_cmp *key = arg; 825 #ifdef INET6 826 struct pf_addr *target; 827 828 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 829 goto copy; 830 831 switch (pd->hdr.icmp6.icmp6_type) { 832 case ND_NEIGHBOR_SOLICIT: 833 if (multi) 834 return (-1); 835 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 836 daddr = target; 837 break; 838 case ND_NEIGHBOR_ADVERT: 839 if (multi) 840 return (-1); 841 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 842 saddr = target; 843 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 844 key->addr[didx].addr32[0] = 0; 845 key->addr[didx].addr32[1] = 0; 846 key->addr[didx].addr32[2] = 0; 847 key->addr[didx].addr32[3] = 0; 848 daddr = NULL; /* overwritten */ 849 } 850 break; 851 default: 852 if (multi) { 853 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 854 key->addr[sidx].addr32[1] = 0; 855 key->addr[sidx].addr32[2] = 0; 856 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 857 saddr = NULL; /* overwritten */ 858 } 859 } 860 copy: 861 #endif /* INET6 */ 862 if (saddr) 863 pf_addrcpy(&key->addr[sidx], saddr, af); 864 if (daddr) 865 pf_addrcpy(&key->addr[didx], daddr, af); 866 867 return (0); 868 } 869 870 int 871 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 872 struct pf_state_key **sks, int rtableid) 873 { 874 /* if returning error we MUST pool_put state keys ourselves */ 875 struct pf_state_key *sk1, *sk2; 876 u_int wrdom = pd->rdomain; 877 int afto = pd->af != pd->naf; 878 879 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 880 return (ENOMEM); 881 882 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 883 pd->af, 0); 884 sk1->port[pd->sidx] = pd->osport; 885 sk1->port[pd->didx] = pd->odport; 886 sk1->proto = pd->proto; 887 sk1->af = pd->af; 888 sk1->rdomain = pd->rdomain; 889 PF_REF_INIT(sk1->refcnt); 890 sk1->removed = 0; 891 if (rtableid >= 0) 892 wrdom = rtable_l2(rtableid); 893 894 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 895 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 896 pd->nsport != pd->osport || pd->ndport != pd->odport || 897 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 898 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 899 pool_put(&pf_state_key_pl, sk1); 900 return (ENOMEM); 901 } 902 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 903 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 904 pd->naf, 0); 905 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 906 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 907 if (afto) { 908 switch (pd->proto) { 909 case IPPROTO_ICMP: 910 sk2->proto = IPPROTO_ICMPV6; 911 break; 912 case IPPROTO_ICMPV6: 913 sk2->proto = IPPROTO_ICMP; 914 break; 915 default: 916 sk2->proto = pd->proto; 917 } 918 } else 919 sk2->proto = pd->proto; 920 sk2->af = pd->naf; 921 sk2->rdomain = wrdom; 922 PF_REF_INIT(sk2->refcnt); 923 sk2->removed = 0; 924 } else 925 sk2 = sk1; 926 927 if (pd->dir == PF_IN) { 928 *skw = sk1; 929 *sks = sk2; 930 } else { 931 *sks = sk1; 932 *skw = sk2; 933 } 934 935 if (pf_status.debug >= LOG_DEBUG) { 936 log(LOG_DEBUG, "pf: key setup: "); 937 pf_print_state_parts(NULL, *skw, *sks); 938 addlog("\n"); 939 } 940 941 return (0); 942 } 943 944 int 945 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, 946 struct pf_state_key **sks, struct pf_state *s) 947 { 948 PF_ASSERT_LOCKED(); 949 950 s->kif = kif; 951 PF_STATE_ENTER_WRITE(); 952 if (*skw == *sks) { 953 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 954 PF_STATE_EXIT_WRITE(); 955 return (-1); 956 } 957 *skw = *sks = s->key[PF_SK_WIRE]; 958 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 959 } else { 960 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 961 pool_put(&pf_state_key_pl, *sks); 962 PF_STATE_EXIT_WRITE(); 963 return (-1); 964 } 965 *skw = s->key[PF_SK_WIRE]; 966 if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { 967 pf_state_key_detach(s, PF_SK_WIRE); 968 PF_STATE_EXIT_WRITE(); 969 return (-1); 970 } 971 *sks = s->key[PF_SK_STACK]; 972 } 973 974 if (s->id == 0 && s->creatorid == 0) { 975 s->id = htobe64(pf_status.stateid++); 976 s->creatorid = pf_status.hostid; 977 } 978 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 979 if (pf_status.debug >= LOG_NOTICE) { 980 log(LOG_NOTICE, "pf: state insert failed: " 981 "id: %016llx creatorid: %08x", 982 betoh64(s->id), ntohl(s->creatorid)); 983 addlog("\n"); 984 } 985 pf_detach_state(s); 986 PF_STATE_EXIT_WRITE(); 987 return (-1); 988 } 989 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 990 pf_status.fcounters[FCNT_STATE_INSERT]++; 991 pf_status.states++; 992 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 993 PF_STATE_EXIT_WRITE(); 994 #if NPFSYNC > 0 995 pfsync_insert_state(s); 996 #endif /* NPFSYNC > 0 */ 997 return (0); 998 } 999 1000 struct pf_state * 1001 pf_find_state_byid(struct pf_state_cmp *key) 1002 { 1003 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1004 1005 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 1006 } 1007 1008 int 1009 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 1010 struct pfi_kif *kif, u_int dir) 1011 { 1012 /* a (from hdr) and b (new) must be exact opposites of each other */ 1013 if (a->af == b->af && a->proto == b->proto && 1014 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1015 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1016 a->port[0] == b->port[1] && 1017 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1018 return (0); 1019 else { 1020 /* mismatch. must not happen. */ 1021 if (pf_status.debug >= LOG_ERR) { 1022 log(LOG_ERR, 1023 "pf: state key linking mismatch! dir=%s, " 1024 "if=%s, stored af=%u, a0: ", 1025 dir == PF_OUT ? "OUT" : "IN", 1026 kif->pfik_name, a->af); 1027 pf_print_host(&a->addr[0], a->port[0], a->af); 1028 addlog(", a1: "); 1029 pf_print_host(&a->addr[1], a->port[1], a->af); 1030 addlog(", proto=%u", a->proto); 1031 addlog(", found af=%u, a0: ", b->af); 1032 pf_print_host(&b->addr[0], b->port[0], b->af); 1033 addlog(", a1: "); 1034 pf_print_host(&b->addr[1], b->port[1], b->af); 1035 addlog(", proto=%u", b->proto); 1036 addlog("\n"); 1037 } 1038 return (-1); 1039 } 1040 } 1041 1042 int 1043 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1044 struct pf_state **state) 1045 { 1046 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1047 struct pf_state_item *si; 1048 struct pf_state *s = NULL; 1049 1050 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1051 if (pf_status.debug >= LOG_DEBUG) { 1052 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1053 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1054 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1055 addlog("\n"); 1056 } 1057 1058 inp_sk = NULL; 1059 pkt_sk = NULL; 1060 sk = NULL; 1061 if (pd->dir == PF_OUT) { 1062 /* first if block deals with outbound forwarded packet */ 1063 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1064 1065 if (!pf_state_key_isvalid(pkt_sk)) { 1066 pf_mbuf_unlink_state_key(pd->m); 1067 pkt_sk = NULL; 1068 } 1069 1070 if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse)) 1071 sk = pkt_sk->reverse; 1072 1073 if (pkt_sk == NULL) { 1074 /* here we deal with local outbound packet */ 1075 if (pd->m->m_pkthdr.pf.inp != NULL) { 1076 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk; 1077 if (pf_state_key_isvalid(inp_sk)) 1078 sk = inp_sk; 1079 else 1080 pf_inpcb_unlink_state_key( 1081 pd->m->m_pkthdr.pf.inp); 1082 } 1083 } 1084 } 1085 1086 if (sk == NULL) { 1087 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1088 (struct pf_state_key *)key)) == NULL) 1089 return (PF_DROP); 1090 if (pd->dir == PF_OUT && pkt_sk && 1091 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1092 pf_state_key_link_reverse(sk, pkt_sk); 1093 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp && 1094 !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->inp) 1095 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1096 } 1097 1098 /* remove firewall data from outbound packet */ 1099 if (pd->dir == PF_OUT) 1100 pf_pkt_addr_changed(pd->m); 1101 1102 /* list is sorted, if-bound states before floating ones */ 1103 TAILQ_FOREACH(si, &sk->states, entry) 1104 if ((si->s->kif == pfi_all || si->s->kif == pd->kif) && 1105 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1106 && sk == (pd->dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1107 si->s->key[PF_SK_STACK])) || 1108 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1109 && pd->dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1110 sk == si->s->key[PF_SK_WIRE])))) { 1111 s = si->s; 1112 break; 1113 } 1114 1115 if (s == NULL || s->timeout == PFTM_PURGE) 1116 return (PF_DROP); 1117 1118 if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) { 1119 pf_add_threshold(&s->rule.ptr->pktrate); 1120 if (pf_check_threshold(&s->rule.ptr->pktrate)) 1121 return (PF_DROP); 1122 } 1123 1124 *state = s; 1125 if (pd->dir == PF_OUT && s->rt_kif != NULL && s->rt_kif != pd->kif && 1126 ((s->rule.ptr->rt == PF_ROUTETO && 1127 s->rule.ptr->direction == PF_OUT) || 1128 (s->rule.ptr->rt == PF_REPLYTO && 1129 s->rule.ptr->direction == PF_IN))) 1130 return (PF_PASS); 1131 1132 return (PF_MATCH); 1133 } 1134 1135 struct pf_state * 1136 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1137 { 1138 struct pf_state_key *sk; 1139 struct pf_state_item *si, *ret = NULL; 1140 1141 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1142 1143 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1144 1145 if (sk != NULL) { 1146 TAILQ_FOREACH(si, &sk->states, entry) 1147 if (dir == PF_INOUT || 1148 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1149 si->s->key[PF_SK_STACK]))) { 1150 if (more == NULL) 1151 return (si->s); 1152 1153 if (ret) 1154 (*more)++; 1155 else 1156 ret = si; 1157 } 1158 } 1159 return (ret ? ret->s : NULL); 1160 } 1161 1162 void 1163 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1164 { 1165 int32_t expire; 1166 1167 memset(sp, 0, sizeof(struct pfsync_state)); 1168 1169 /* copy from state key */ 1170 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1171 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1172 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1173 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1174 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1175 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1176 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1177 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1178 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1179 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1180 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1181 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1182 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1183 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1184 sp->proto = st->key[PF_SK_WIRE]->proto; 1185 sp->af = st->key[PF_SK_WIRE]->af; 1186 1187 /* copy from state */ 1188 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1189 memcpy(&sp->rt_addr, &st->rt_addr, sizeof(sp->rt_addr)); 1190 sp->creation = htonl(getuptime() - st->creation); 1191 expire = pf_state_expires(st); 1192 if (expire <= getuptime()) 1193 sp->expire = htonl(0); 1194 else 1195 sp->expire = htonl(expire - getuptime()); 1196 1197 sp->direction = st->direction; 1198 #if NPFLOG > 0 1199 sp->log = st->log; 1200 #endif /* NPFLOG > 0 */ 1201 sp->timeout = st->timeout; 1202 sp->state_flags = htons(st->state_flags); 1203 if (!SLIST_EMPTY(&st->src_nodes)) 1204 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1205 1206 sp->id = st->id; 1207 sp->creatorid = st->creatorid; 1208 pf_state_peer_hton(&st->src, &sp->src); 1209 pf_state_peer_hton(&st->dst, &sp->dst); 1210 1211 if (st->rule.ptr == NULL) 1212 sp->rule = htonl(-1); 1213 else 1214 sp->rule = htonl(st->rule.ptr->nr); 1215 if (st->anchor.ptr == NULL) 1216 sp->anchor = htonl(-1); 1217 else 1218 sp->anchor = htonl(st->anchor.ptr->nr); 1219 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1220 1221 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1222 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1223 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1224 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1225 1226 sp->max_mss = htons(st->max_mss); 1227 sp->min_ttl = st->min_ttl; 1228 sp->set_tos = st->set_tos; 1229 sp->set_prio[0] = st->set_prio[0]; 1230 sp->set_prio[1] = st->set_prio[1]; 1231 } 1232 1233 /* END state table stuff */ 1234 1235 void 1236 pf_purge_expired_rules(void) 1237 { 1238 struct pf_rule *r; 1239 1240 PF_ASSERT_LOCKED(); 1241 1242 if (SLIST_EMPTY(&pf_rule_gcl)) 1243 return; 1244 1245 while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { 1246 SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); 1247 KASSERT(r->rule_flag & PFRULE_EXPIRED); 1248 pf_purge_rule(r); 1249 } 1250 } 1251 1252 void 1253 pf_purge_timeout(void *unused) 1254 { 1255 task_add(net_tq(0), &pf_purge_task); 1256 } 1257 1258 void 1259 pf_purge(void *xnloops) 1260 { 1261 int *nloops = xnloops; 1262 1263 KERNEL_LOCK(); 1264 NET_LOCK(); 1265 1266 /* 1267 * process a fraction of the state table every second 1268 * Note: 1269 * we no longer need PF_LOCK() here, because 1270 * pf_purge_expired_states() uses pf_state_lock to maintain 1271 * consistency. 1272 */ 1273 pf_purge_expired_states(1 + (pf_status.states 1274 / pf_default_rule.timeout[PFTM_INTERVAL])); 1275 1276 PF_LOCK(); 1277 /* purge other expired types every PFTM_INTERVAL seconds */ 1278 if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1279 pf_purge_expired_src_nodes(); 1280 pf_purge_expired_rules(); 1281 } 1282 PF_UNLOCK(); 1283 1284 /* 1285 * Fragments don't require PF_LOCK(), they use their own lock. 1286 */ 1287 if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1288 pf_purge_expired_fragments(); 1289 *nloops = 0; 1290 } 1291 NET_UNLOCK(); 1292 KERNEL_UNLOCK(); 1293 1294 timeout_add_sec(&pf_purge_to, 1); 1295 } 1296 1297 int32_t 1298 pf_state_expires(const struct pf_state *state) 1299 { 1300 u_int32_t timeout; 1301 u_int32_t start; 1302 u_int32_t end; 1303 u_int32_t states; 1304 1305 /* handle all PFTM_* > PFTM_MAX here */ 1306 if (state->timeout == PFTM_PURGE) 1307 return (0); 1308 1309 KASSERT(state->timeout != PFTM_UNLINKED); 1310 KASSERT(state->timeout < PFTM_MAX); 1311 1312 timeout = state->rule.ptr->timeout[state->timeout]; 1313 if (!timeout) 1314 timeout = pf_default_rule.timeout[state->timeout]; 1315 1316 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1317 if (start) { 1318 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1319 states = state->rule.ptr->states_cur; 1320 } else { 1321 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1322 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1323 states = pf_status.states; 1324 } 1325 if (end && states > start && start < end) { 1326 if (states >= end) 1327 return (0); 1328 1329 timeout = (u_int64_t)timeout * (end - states) / (end - start); 1330 } 1331 1332 return (state->expire + timeout); 1333 } 1334 1335 void 1336 pf_purge_expired_src_nodes(void) 1337 { 1338 struct pf_src_node *cur, *next; 1339 1340 PF_ASSERT_LOCKED(); 1341 1342 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1343 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1344 1345 if (cur->states == 0 && cur->expire <= getuptime()) { 1346 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1347 pf_remove_src_node(cur); 1348 } 1349 } 1350 } 1351 1352 void 1353 pf_src_tree_remove_state(struct pf_state *s) 1354 { 1355 u_int32_t timeout; 1356 struct pf_sn_item *sni; 1357 1358 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1359 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1360 if (s->src.tcp_est) 1361 --sni->sn->conn; 1362 if (--sni->sn->states == 0) { 1363 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1364 if (!timeout) 1365 timeout = 1366 pf_default_rule.timeout[PFTM_SRC_NODE]; 1367 sni->sn->expire = getuptime() + timeout; 1368 } 1369 pool_put(&pf_sn_item_pl, sni); 1370 } 1371 } 1372 1373 void 1374 pf_remove_state(struct pf_state *cur) 1375 { 1376 PF_ASSERT_LOCKED(); 1377 1378 /* handle load balancing related tasks */ 1379 pf_postprocess_addr(cur); 1380 1381 if (cur->src.state == PF_TCPS_PROXY_DST) { 1382 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1383 &cur->key[PF_SK_WIRE]->addr[1], 1384 &cur->key[PF_SK_WIRE]->addr[0], 1385 cur->key[PF_SK_WIRE]->port[1], 1386 cur->key[PF_SK_WIRE]->port[0], 1387 cur->src.seqhi, cur->src.seqlo + 1, 1388 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1389 cur->key[PF_SK_WIRE]->rdomain); 1390 } 1391 if (cur->key[PF_SK_STACK]->proto == IPPROTO_TCP) 1392 pf_set_protostate(cur, PF_PEER_BOTH, TCPS_CLOSED); 1393 1394 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1395 #if NPFLOW > 0 1396 if (cur->state_flags & PFSTATE_PFLOW) 1397 export_pflow(cur); 1398 #endif /* NPFLOW > 0 */ 1399 #if NPFSYNC > 0 1400 pfsync_delete_state(cur); 1401 #endif /* NPFSYNC > 0 */ 1402 cur->timeout = PFTM_UNLINKED; 1403 pf_src_tree_remove_state(cur); 1404 pf_detach_state(cur); 1405 } 1406 1407 void 1408 pf_remove_divert_state(struct pf_state_key *sk) 1409 { 1410 struct pf_state_item *si; 1411 1412 TAILQ_FOREACH(si, &sk->states, entry) { 1413 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 1414 (si->s->rule.ptr->divert.type == PF_DIVERT_TO || 1415 si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 1416 pf_remove_state(si->s); 1417 break; 1418 } 1419 } 1420 } 1421 1422 void 1423 pf_free_state(struct pf_state *cur) 1424 { 1425 struct pf_rule_item *ri; 1426 1427 PF_ASSERT_LOCKED(); 1428 1429 #if NPFSYNC > 0 1430 if (pfsync_state_in_use(cur)) 1431 return; 1432 #endif /* NPFSYNC > 0 */ 1433 KASSERT(cur->timeout == PFTM_UNLINKED); 1434 if (--cur->rule.ptr->states_cur == 0 && 1435 cur->rule.ptr->src_nodes == 0) 1436 pf_rm_rule(NULL, cur->rule.ptr); 1437 if (cur->anchor.ptr != NULL) 1438 if (--cur->anchor.ptr->states_cur == 0) 1439 pf_rm_rule(NULL, cur->anchor.ptr); 1440 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1441 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1442 if (--ri->r->states_cur == 0 && 1443 ri->r->src_nodes == 0) 1444 pf_rm_rule(NULL, ri->r); 1445 pool_put(&pf_rule_item_pl, ri); 1446 } 1447 pf_normalize_tcp_cleanup(cur); 1448 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1449 TAILQ_REMOVE(&state_list, cur, entry_list); 1450 if (cur->tag) 1451 pf_tag_unref(cur->tag); 1452 pf_state_unref(cur); 1453 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1454 pf_status.states--; 1455 } 1456 1457 void 1458 pf_purge_expired_states(u_int32_t maxcheck) 1459 { 1460 static struct pf_state *cur = NULL; 1461 struct pf_state *next; 1462 SLIST_HEAD(pf_state_gcl, pf_state) gcl; 1463 1464 PF_ASSERT_UNLOCKED(); 1465 SLIST_INIT(&gcl); 1466 1467 PF_STATE_ENTER_READ(); 1468 while (maxcheck--) { 1469 /* wrap to start of list when we hit the end */ 1470 if (cur == NULL) { 1471 cur = pf_state_ref(TAILQ_FIRST(&state_list)); 1472 if (cur == NULL) 1473 break; /* list empty */ 1474 } 1475 1476 /* get next state, as cur may get deleted */ 1477 next = TAILQ_NEXT(cur, entry_list); 1478 1479 if ((cur->timeout == PFTM_UNLINKED) || 1480 (pf_state_expires(cur) <= getuptime())) 1481 SLIST_INSERT_HEAD(&gcl, cur, gc_list); 1482 else 1483 pf_state_unref(cur); 1484 1485 cur = pf_state_ref(next); 1486 1487 if (cur == NULL) 1488 break; 1489 } 1490 PF_STATE_EXIT_READ(); 1491 1492 PF_LOCK(); 1493 PF_STATE_ENTER_WRITE(); 1494 while ((next = SLIST_FIRST(&gcl)) != NULL) { 1495 SLIST_REMOVE_HEAD(&gcl, gc_list); 1496 if (next->timeout == PFTM_UNLINKED) 1497 pf_free_state(next); 1498 else { 1499 pf_remove_state(next); 1500 pf_free_state(next); 1501 } 1502 1503 pf_state_unref(next); 1504 } 1505 PF_STATE_EXIT_WRITE(); 1506 PF_UNLOCK(); 1507 } 1508 1509 int 1510 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1511 { 1512 if (aw->type != PF_ADDR_TABLE) 1513 return (0); 1514 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1515 return (1); 1516 return (0); 1517 } 1518 1519 void 1520 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1521 { 1522 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1523 return; 1524 pfr_detach_table(aw->p.tbl); 1525 aw->p.tbl = NULL; 1526 } 1527 1528 void 1529 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1530 { 1531 struct pfr_ktable *kt = aw->p.tbl; 1532 1533 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1534 return; 1535 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1536 kt = kt->pfrkt_root; 1537 aw->p.tbl = NULL; 1538 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1539 kt->pfrkt_cnt : -1; 1540 } 1541 1542 void 1543 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1544 { 1545 switch (af) { 1546 case AF_INET: { 1547 u_int32_t a = ntohl(addr->addr32[0]); 1548 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1549 (a>>8)&255, a&255); 1550 if (p) { 1551 p = ntohs(p); 1552 addlog(":%u", p); 1553 } 1554 break; 1555 } 1556 #ifdef INET6 1557 case AF_INET6: { 1558 u_int16_t b; 1559 u_int8_t i, curstart, curend, maxstart, maxend; 1560 curstart = curend = maxstart = maxend = 255; 1561 for (i = 0; i < 8; i++) { 1562 if (!addr->addr16[i]) { 1563 if (curstart == 255) 1564 curstart = i; 1565 curend = i; 1566 } else { 1567 if ((curend - curstart) > 1568 (maxend - maxstart)) { 1569 maxstart = curstart; 1570 maxend = curend; 1571 } 1572 curstart = curend = 255; 1573 } 1574 } 1575 if ((curend - curstart) > 1576 (maxend - maxstart)) { 1577 maxstart = curstart; 1578 maxend = curend; 1579 } 1580 for (i = 0; i < 8; i++) { 1581 if (i >= maxstart && i <= maxend) { 1582 if (i == 0) 1583 addlog(":"); 1584 if (i == maxend) 1585 addlog(":"); 1586 } else { 1587 b = ntohs(addr->addr16[i]); 1588 addlog("%x", b); 1589 if (i < 7) 1590 addlog(":"); 1591 } 1592 } 1593 if (p) { 1594 p = ntohs(p); 1595 addlog("[%u]", p); 1596 } 1597 break; 1598 } 1599 #endif /* INET6 */ 1600 } 1601 } 1602 1603 void 1604 pf_print_state(struct pf_state *s) 1605 { 1606 pf_print_state_parts(s, NULL, NULL); 1607 } 1608 1609 void 1610 pf_print_state_parts(struct pf_state *s, 1611 struct pf_state_key *skwp, struct pf_state_key *sksp) 1612 { 1613 struct pf_state_key *skw, *sks; 1614 u_int8_t proto, dir; 1615 1616 /* Do our best to fill these, but they're skipped if NULL */ 1617 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1618 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1619 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1620 dir = s ? s->direction : 0; 1621 1622 switch (proto) { 1623 case IPPROTO_IPV4: 1624 addlog("IPv4"); 1625 break; 1626 case IPPROTO_IPV6: 1627 addlog("IPv6"); 1628 break; 1629 case IPPROTO_TCP: 1630 addlog("TCP"); 1631 break; 1632 case IPPROTO_UDP: 1633 addlog("UDP"); 1634 break; 1635 case IPPROTO_ICMP: 1636 addlog("ICMP"); 1637 break; 1638 case IPPROTO_ICMPV6: 1639 addlog("ICMPv6"); 1640 break; 1641 default: 1642 addlog("%u", proto); 1643 break; 1644 } 1645 switch (dir) { 1646 case PF_IN: 1647 addlog(" in"); 1648 break; 1649 case PF_OUT: 1650 addlog(" out"); 1651 break; 1652 } 1653 if (skw) { 1654 addlog(" wire: (%d) ", skw->rdomain); 1655 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1656 addlog(" "); 1657 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1658 } 1659 if (sks) { 1660 addlog(" stack: (%d) ", sks->rdomain); 1661 if (sks != skw) { 1662 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1663 addlog(" "); 1664 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1665 } else 1666 addlog("-"); 1667 } 1668 if (s) { 1669 if (proto == IPPROTO_TCP) { 1670 addlog(" [lo=%u high=%u win=%u modulator=%u", 1671 s->src.seqlo, s->src.seqhi, 1672 s->src.max_win, s->src.seqdiff); 1673 if (s->src.wscale && s->dst.wscale) 1674 addlog(" wscale=%u", 1675 s->src.wscale & PF_WSCALE_MASK); 1676 addlog("]"); 1677 addlog(" [lo=%u high=%u win=%u modulator=%u", 1678 s->dst.seqlo, s->dst.seqhi, 1679 s->dst.max_win, s->dst.seqdiff); 1680 if (s->src.wscale && s->dst.wscale) 1681 addlog(" wscale=%u", 1682 s->dst.wscale & PF_WSCALE_MASK); 1683 addlog("]"); 1684 } 1685 addlog(" %u:%u", s->src.state, s->dst.state); 1686 if (s->rule.ptr) 1687 addlog(" @%d", s->rule.ptr->nr); 1688 } 1689 } 1690 1691 void 1692 pf_print_flags(u_int8_t f) 1693 { 1694 if (f) 1695 addlog(" "); 1696 if (f & TH_FIN) 1697 addlog("F"); 1698 if (f & TH_SYN) 1699 addlog("S"); 1700 if (f & TH_RST) 1701 addlog("R"); 1702 if (f & TH_PUSH) 1703 addlog("P"); 1704 if (f & TH_ACK) 1705 addlog("A"); 1706 if (f & TH_URG) 1707 addlog("U"); 1708 if (f & TH_ECE) 1709 addlog("E"); 1710 if (f & TH_CWR) 1711 addlog("W"); 1712 } 1713 1714 #define PF_SET_SKIP_STEPS(i) \ 1715 do { \ 1716 while (head[i] != cur) { \ 1717 head[i]->skip[i].ptr = cur; \ 1718 head[i] = TAILQ_NEXT(head[i], entries); \ 1719 } \ 1720 } while (0) 1721 1722 void 1723 pf_calc_skip_steps(struct pf_rulequeue *rules) 1724 { 1725 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1726 int i; 1727 1728 cur = TAILQ_FIRST(rules); 1729 prev = cur; 1730 for (i = 0; i < PF_SKIP_COUNT; ++i) 1731 head[i] = cur; 1732 while (cur != NULL) { 1733 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1734 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1735 if (cur->direction != prev->direction) 1736 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1737 if (cur->onrdomain != prev->onrdomain || 1738 cur->ifnot != prev->ifnot) 1739 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1740 if (cur->af != prev->af) 1741 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1742 if (cur->proto != prev->proto) 1743 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1744 if (cur->src.neg != prev->src.neg || 1745 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1746 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1747 if (cur->dst.neg != prev->dst.neg || 1748 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1749 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1750 if (cur->src.port[0] != prev->src.port[0] || 1751 cur->src.port[1] != prev->src.port[1] || 1752 cur->src.port_op != prev->src.port_op) 1753 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1754 if (cur->dst.port[0] != prev->dst.port[0] || 1755 cur->dst.port[1] != prev->dst.port[1] || 1756 cur->dst.port_op != prev->dst.port_op) 1757 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1758 1759 prev = cur; 1760 cur = TAILQ_NEXT(cur, entries); 1761 } 1762 for (i = 0; i < PF_SKIP_COUNT; ++i) 1763 PF_SET_SKIP_STEPS(i); 1764 } 1765 1766 int 1767 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1768 { 1769 if (aw1->type != aw2->type) 1770 return (1); 1771 switch (aw1->type) { 1772 case PF_ADDR_ADDRMASK: 1773 case PF_ADDR_RANGE: 1774 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1775 return (1); 1776 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1777 return (1); 1778 return (0); 1779 case PF_ADDR_DYNIFTL: 1780 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1781 case PF_ADDR_NONE: 1782 case PF_ADDR_NOROUTE: 1783 case PF_ADDR_URPFFAILED: 1784 return (0); 1785 case PF_ADDR_TABLE: 1786 return (aw1->p.tbl != aw2->p.tbl); 1787 case PF_ADDR_RTLABEL: 1788 return (aw1->v.rtlabel != aw2->v.rtlabel); 1789 default: 1790 addlog("invalid address type: %d\n", aw1->type); 1791 return (1); 1792 } 1793 } 1794 1795 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 1796 * emulate at most one ones-complement subtraction. This thereby limits net 1797 * carries/borrows to at most one, eliminating a reduction step and saving one 1798 * each of +, >>, & and ~. 1799 * 1800 * def. x mod y = x - (x//y)*y for integer x,y 1801 * def. sum = x mod 2^16 1802 * def. accumulator = (x >> 16) mod 2^16 1803 * 1804 * The trick works as follows: subtracting exactly one u_int16_t from the 1805 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 1806 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 1807 * ones-complement borrow: 1808 * 1809 * (sum + accumulator) mod 2^16 1810 * = { assume underflow: accumulator := 2^16 - 1 } 1811 * (sum + 2^16 - 1) mod 2^16 1812 * = { mod } 1813 * (sum - 1) mod 2^16 1814 * 1815 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 1816 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 1817 * to zero as that requires subtraction of at least 2^16, which exceeds a 1818 * single u_int16_t's range. 1819 * 1820 * We use the following theorem to derive the implementation: 1821 * 1822 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 1823 * proof. 1824 * (x + (y mod z)) mod z 1825 * = { def mod } 1826 * (x + y - (y//z)*z) mod z 1827 * = { (a + b*c) mod c = a mod c } 1828 * (x + y) mod z [end of proof] 1829 * 1830 * ... and thereby obtain: 1831 * 1832 * (sum + accumulator) mod 2^16 1833 * = { def. accumulator, def. sum } 1834 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 1835 * = { (0), twice } 1836 * (x + (x >> 16)) mod 2^16 1837 * = { x mod 2^n = x & (2^n - 1) } 1838 * (x + (x >> 16)) & 0xffff 1839 * 1840 * Note: this serves also as a reduction step for at most one add (as the 1841 * trailing mod 2^16 prevents further reductions by destroying carries). 1842 */ 1843 static __inline void 1844 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 1845 u_int8_t proto) 1846 { 1847 u_int32_t x; 1848 const int udp = proto == IPPROTO_UDP; 1849 1850 x = *cksum + was - now; 1851 x = (x + (x >> 16)) & 0xffff; 1852 1853 /* optimise: eliminate a branch when not udp */ 1854 if (udp && *cksum == 0x0000) 1855 return; 1856 if (udp && x == 0x0000) 1857 x = 0xffff; 1858 1859 *cksum = (u_int16_t)(x); 1860 } 1861 1862 #ifdef INET6 1863 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 1864 static __inline void 1865 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 1866 { 1867 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 1868 } 1869 1870 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 1871 static __inline void 1872 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 1873 { 1874 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 1875 } 1876 #endif /* INET6 */ 1877 1878 /* pre: *a is 16-bit aligned within its packet 1879 * 1880 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 1881 * machine by conserving ones-complement's otherwise discarded carries in the 1882 * upper bits of x. These accumulated carries when added to the lower 16-bits 1883 * over at least zero 'reduction' steps then complete the ones-complement sum. 1884 * 1885 * def. sum = x mod 2^16 1886 * def. accumulator = (x >> 16) 1887 * 1888 * At most two reduction steps 1889 * 1890 * x := sum + accumulator 1891 * = { def sum, def accumulator } 1892 * x := x mod 2^16 + (x >> 16) 1893 * = { x mod 2^n = x & (2^n - 1) } 1894 * x := (x & 0xffff) + (x >> 16) 1895 * 1896 * are necessary to incorporate the accumulated carries (at most one per add) 1897 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 1898 * 1899 * The function is also invariant over the endian of the host. Why? 1900 * 1901 * Define the unary transpose operator ~ on a bitstring in python slice 1902 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 1903 * 1904 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 1905 * 1906 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 1907 * 1908 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 1909 * 'half-adds'. Under ones-complement addition, each half-add carries to the 1910 * other, so the sum of each half-add is unaffected by their relative 1911 * order. Therefore: 1912 * 1913 * ~m +_1 ~n 1914 * = { half-adds invariant under transposition } 1915 * ~s 1916 * = { substitute } 1917 * ~(m +_1 n) [end of proof] 1918 * 1919 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 1920 * with the converse endian does not alter the result. 1921 * 1922 * proof. 1923 * { converse machine endian: load/store transposes, P := 8 } 1924 * ~(~m +_1 ~n) 1925 * = { ~ over +_1 } 1926 * ~~m +_1 ~~n 1927 * = { ~ is an involution } 1928 * m +_1 n [end of proof] 1929 * 1930 */ 1931 #define NEG(x) ((u_int16_t)~(x)) 1932 void 1933 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 1934 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 1935 { 1936 u_int32_t x; 1937 const u_int16_t *n = an->addr16; 1938 const u_int16_t *o = a->addr16; 1939 const int udp = proto == IPPROTO_UDP; 1940 1941 switch (af) { 1942 case AF_INET: 1943 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 1944 break; 1945 #ifdef INET6 1946 case AF_INET6: 1947 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 1948 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 1949 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 1950 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 1951 break; 1952 #endif /* INET6 */ 1953 default: 1954 unhandled_af(af); 1955 } 1956 1957 x = (x & 0xffff) + (x >> 16); 1958 x = (x & 0xffff) + (x >> 16); 1959 1960 /* optimise: eliminate a branch when not udp */ 1961 if (udp && *cksum == 0x0000) 1962 return; 1963 if (udp && x == 0x0000) 1964 x = 0xffff; 1965 1966 *cksum = (u_int16_t)(x); 1967 } 1968 1969 int 1970 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 1971 { 1972 int rewrite = 0; 1973 1974 if (*f != v) { 1975 u_int16_t old = htons(hi ? (*f << 8) : *f); 1976 u_int16_t new = htons(hi ? ( v << 8) : v); 1977 1978 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 1979 *f = v; 1980 rewrite = 1; 1981 } 1982 1983 return (rewrite); 1984 } 1985 1986 /* pre: *f is 16-bit aligned within its packet */ 1987 int 1988 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 1989 { 1990 int rewrite = 0; 1991 1992 if (*f != v) { 1993 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 1994 *f = v; 1995 rewrite = 1; 1996 } 1997 1998 return (rewrite); 1999 } 2000 2001 int 2002 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 2003 { 2004 int rewrite = 0; 2005 u_int8_t *fb = (u_int8_t*)f; 2006 u_int8_t *vb = (u_int8_t*)&v; 2007 2008 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 2009 return (pf_patch_16(pd, f, v)); /* optimise */ 2010 } 2011 2012 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2013 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2014 2015 return (rewrite); 2016 } 2017 2018 /* pre: *f is 16-bit aligned within its packet */ 2019 /* pre: pd->proto != IPPROTO_UDP */ 2020 int 2021 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2022 { 2023 int rewrite = 0; 2024 u_int16_t *pc = pd->pcksum; 2025 u_int8_t proto = pd->proto; 2026 2027 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2028 if (proto == IPPROTO_UDP) 2029 panic("%s: udp", __func__); 2030 2031 /* optimise: skip *f != v guard; true for all use-cases */ 2032 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2033 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2034 2035 *f = v; 2036 rewrite = 1; 2037 2038 return (rewrite); 2039 } 2040 2041 int 2042 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2043 { 2044 int rewrite = 0; 2045 u_int8_t *fb = (u_int8_t*)f; 2046 u_int8_t *vb = (u_int8_t*)&v; 2047 2048 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2049 return (pf_patch_32(pd, f, v)); /* optimise */ 2050 } 2051 2052 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2053 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2054 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2055 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2056 2057 return (rewrite); 2058 } 2059 2060 int 2061 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2062 u_int16_t *virtual_id, u_int16_t *virtual_type) 2063 { 2064 /* 2065 * ICMP types marked with PF_OUT are typically responses to 2066 * PF_IN, and will match states in the opposite direction. 2067 * PF_IN ICMP types need to match a state with that type. 2068 */ 2069 *icmp_dir = PF_OUT; 2070 2071 /* Queries (and responses) */ 2072 switch (pd->af) { 2073 case AF_INET: 2074 switch (type) { 2075 case ICMP_ECHO: 2076 *icmp_dir = PF_IN; 2077 /* FALLTHROUGH */ 2078 case ICMP_ECHOREPLY: 2079 *virtual_type = ICMP_ECHO; 2080 *virtual_id = pd->hdr.icmp.icmp_id; 2081 break; 2082 2083 case ICMP_TSTAMP: 2084 *icmp_dir = PF_IN; 2085 /* FALLTHROUGH */ 2086 case ICMP_TSTAMPREPLY: 2087 *virtual_type = ICMP_TSTAMP; 2088 *virtual_id = pd->hdr.icmp.icmp_id; 2089 break; 2090 2091 case ICMP_IREQ: 2092 *icmp_dir = PF_IN; 2093 /* FALLTHROUGH */ 2094 case ICMP_IREQREPLY: 2095 *virtual_type = ICMP_IREQ; 2096 *virtual_id = pd->hdr.icmp.icmp_id; 2097 break; 2098 2099 case ICMP_MASKREQ: 2100 *icmp_dir = PF_IN; 2101 /* FALLTHROUGH */ 2102 case ICMP_MASKREPLY: 2103 *virtual_type = ICMP_MASKREQ; 2104 *virtual_id = pd->hdr.icmp.icmp_id; 2105 break; 2106 2107 case ICMP_IPV6_WHEREAREYOU: 2108 *icmp_dir = PF_IN; 2109 /* FALLTHROUGH */ 2110 case ICMP_IPV6_IAMHERE: 2111 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2112 *virtual_id = 0; /* Nothing sane to match on! */ 2113 break; 2114 2115 case ICMP_MOBILE_REGREQUEST: 2116 *icmp_dir = PF_IN; 2117 /* FALLTHROUGH */ 2118 case ICMP_MOBILE_REGREPLY: 2119 *virtual_type = ICMP_MOBILE_REGREQUEST; 2120 *virtual_id = 0; /* Nothing sane to match on! */ 2121 break; 2122 2123 case ICMP_ROUTERSOLICIT: 2124 *icmp_dir = PF_IN; 2125 /* FALLTHROUGH */ 2126 case ICMP_ROUTERADVERT: 2127 *virtual_type = ICMP_ROUTERSOLICIT; 2128 *virtual_id = 0; /* Nothing sane to match on! */ 2129 break; 2130 2131 /* These ICMP types map to other connections */ 2132 case ICMP_UNREACH: 2133 case ICMP_SOURCEQUENCH: 2134 case ICMP_REDIRECT: 2135 case ICMP_TIMXCEED: 2136 case ICMP_PARAMPROB: 2137 /* These will not be used, but set them anyway */ 2138 *icmp_dir = PF_IN; 2139 *virtual_type = htons(type); 2140 *virtual_id = 0; 2141 return (1); /* These types match to another state */ 2142 2143 /* 2144 * All remaining ICMP types get their own states, 2145 * and will only match in one direction. 2146 */ 2147 default: 2148 *icmp_dir = PF_IN; 2149 *virtual_type = type; 2150 *virtual_id = 0; 2151 break; 2152 } 2153 break; 2154 #ifdef INET6 2155 case AF_INET6: 2156 switch (type) { 2157 case ICMP6_ECHO_REQUEST: 2158 *icmp_dir = PF_IN; 2159 /* FALLTHROUGH */ 2160 case ICMP6_ECHO_REPLY: 2161 *virtual_type = ICMP6_ECHO_REQUEST; 2162 *virtual_id = pd->hdr.icmp6.icmp6_id; 2163 break; 2164 2165 case MLD_LISTENER_QUERY: 2166 case MLD_LISTENER_REPORT: { 2167 struct mld_hdr *mld = &pd->hdr.mld; 2168 u_int32_t h; 2169 2170 /* 2171 * Listener Report can be sent by clients 2172 * without an associated Listener Query. 2173 * In addition to that, when Report is sent as a 2174 * reply to a Query its source and destination 2175 * address are different. 2176 */ 2177 *icmp_dir = PF_IN; 2178 *virtual_type = MLD_LISTENER_QUERY; 2179 /* generate fake id for these messages */ 2180 h = mld->mld_addr.s6_addr32[0] ^ 2181 mld->mld_addr.s6_addr32[1] ^ 2182 mld->mld_addr.s6_addr32[2] ^ 2183 mld->mld_addr.s6_addr32[3]; 2184 *virtual_id = (h >> 16) ^ (h & 0xffff); 2185 break; 2186 } 2187 2188 /* 2189 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2190 * ICMP6_WRU 2191 */ 2192 case ICMP6_WRUREQUEST: 2193 *icmp_dir = PF_IN; 2194 /* FALLTHROUGH */ 2195 case ICMP6_WRUREPLY: 2196 *virtual_type = ICMP6_WRUREQUEST; 2197 *virtual_id = 0; /* Nothing sane to match on! */ 2198 break; 2199 2200 case MLD_MTRACE: 2201 *icmp_dir = PF_IN; 2202 /* FALLTHROUGH */ 2203 case MLD_MTRACE_RESP: 2204 *virtual_type = MLD_MTRACE; 2205 *virtual_id = 0; /* Nothing sane to match on! */ 2206 break; 2207 2208 case ND_NEIGHBOR_SOLICIT: 2209 *icmp_dir = PF_IN; 2210 /* FALLTHROUGH */ 2211 case ND_NEIGHBOR_ADVERT: { 2212 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 2213 u_int32_t h; 2214 2215 *virtual_type = ND_NEIGHBOR_SOLICIT; 2216 /* generate fake id for these messages */ 2217 h = nd->nd_ns_target.s6_addr32[0] ^ 2218 nd->nd_ns_target.s6_addr32[1] ^ 2219 nd->nd_ns_target.s6_addr32[2] ^ 2220 nd->nd_ns_target.s6_addr32[3]; 2221 *virtual_id = (h >> 16) ^ (h & 0xffff); 2222 break; 2223 } 2224 2225 /* 2226 * These ICMP types map to other connections. 2227 * ND_REDIRECT can't be in this list because the triggering 2228 * packet header is optional. 2229 */ 2230 case ICMP6_DST_UNREACH: 2231 case ICMP6_PACKET_TOO_BIG: 2232 case ICMP6_TIME_EXCEEDED: 2233 case ICMP6_PARAM_PROB: 2234 /* These will not be used, but set them anyway */ 2235 *icmp_dir = PF_IN; 2236 *virtual_type = htons(type); 2237 *virtual_id = 0; 2238 return (1); /* These types match to another state */ 2239 /* 2240 * All remaining ICMP6 types get their own states, 2241 * and will only match in one direction. 2242 */ 2243 default: 2244 *icmp_dir = PF_IN; 2245 *virtual_type = type; 2246 *virtual_id = 0; 2247 break; 2248 } 2249 break; 2250 #endif /* INET6 */ 2251 } 2252 *virtual_type = htons(*virtual_type); 2253 return (0); /* These types match to their own state */ 2254 } 2255 2256 void 2257 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2258 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2259 { 2260 /* note: doesn't trouble to fixup quoted checksums, if any */ 2261 2262 /* change quoted protocol port */ 2263 if (qp != NULL) 2264 pf_patch_16(pd, qp, np); 2265 2266 /* change quoted ip address */ 2267 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2268 pf_addrcpy(qa, na, pd->af); 2269 2270 /* change network-header's ip address */ 2271 if (oa) 2272 pf_translate_a(pd, oa, na); 2273 } 2274 2275 /* pre: *a is 16-bit aligned within its packet */ 2276 /* *a is a network header src/dst address */ 2277 int 2278 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2279 { 2280 int rewrite = 0; 2281 2282 /* warning: !PF_ANEQ != PF_AEQ */ 2283 if (!PF_ANEQ(a, an, pd->af)) 2284 return (0); 2285 2286 /* fixup transport pseudo-header, if any */ 2287 switch (pd->proto) { 2288 case IPPROTO_TCP: /* FALLTHROUGH */ 2289 case IPPROTO_UDP: /* FALLTHROUGH */ 2290 case IPPROTO_ICMPV6: 2291 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2292 break; 2293 default: 2294 break; /* assume no pseudo-header */ 2295 } 2296 2297 pf_addrcpy(a, an, pd->af); 2298 rewrite = 1; 2299 2300 return (rewrite); 2301 } 2302 2303 #if INET6 2304 /* pf_translate_af() may change pd->m, adjust local copies after calling */ 2305 int 2306 pf_translate_af(struct pf_pdesc *pd) 2307 { 2308 static const struct pf_addr zero; 2309 struct ip *ip4; 2310 struct ip6_hdr *ip6; 2311 int copyback = 0; 2312 u_int hlen, ohlen, dlen; 2313 u_int16_t *pc; 2314 u_int8_t af_proto, naf_proto; 2315 2316 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2317 ohlen = pd->off; 2318 dlen = pd->tot_len - pd->off; 2319 pc = pd->pcksum; 2320 2321 af_proto = naf_proto = pd->proto; 2322 if (naf_proto == IPPROTO_ICMP) 2323 af_proto = IPPROTO_ICMPV6; 2324 if (naf_proto == IPPROTO_ICMPV6) 2325 af_proto = IPPROTO_ICMP; 2326 2327 /* uncover stale pseudo-header */ 2328 switch (af_proto) { 2329 case IPPROTO_ICMPV6: 2330 /* optimise: unchanged for TCP/UDP */ 2331 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2332 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2333 /* FALLTHROUGH */ 2334 case IPPROTO_UDP: /* FALLTHROUGH */ 2335 case IPPROTO_TCP: 2336 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2337 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2338 copyback = 1; 2339 break; 2340 default: 2341 break; /* assume no pseudo-header */ 2342 } 2343 2344 /* replace the network header */ 2345 m_adj(pd->m, pd->off); 2346 pd->src = NULL; 2347 pd->dst = NULL; 2348 2349 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2350 pd->m = NULL; 2351 return (-1); 2352 } 2353 2354 pd->off = hlen; 2355 pd->tot_len += hlen - ohlen; 2356 2357 switch (pd->naf) { 2358 case AF_INET: 2359 ip4 = mtod(pd->m, struct ip *); 2360 memset(ip4, 0, hlen); 2361 ip4->ip_v = IPVERSION; 2362 ip4->ip_hl = hlen >> 2; 2363 ip4->ip_tos = pd->tos; 2364 ip4->ip_len = htons(hlen + dlen); 2365 ip4->ip_id = htons(ip_randomid()); 2366 ip4->ip_off = htons(IP_DF); 2367 ip4->ip_ttl = pd->ttl; 2368 ip4->ip_p = pd->proto; 2369 ip4->ip_src = pd->nsaddr.v4; 2370 ip4->ip_dst = pd->ndaddr.v4; 2371 break; 2372 case AF_INET6: 2373 ip6 = mtod(pd->m, struct ip6_hdr *); 2374 memset(ip6, 0, hlen); 2375 ip6->ip6_vfc = IPV6_VERSION; 2376 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2377 ip6->ip6_plen = htons(dlen); 2378 ip6->ip6_nxt = pd->proto; 2379 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2380 ip6->ip6_hlim = IPV6_DEFHLIM; 2381 else 2382 ip6->ip6_hlim = pd->ttl; 2383 ip6->ip6_src = pd->nsaddr.v6; 2384 ip6->ip6_dst = pd->ndaddr.v6; 2385 break; 2386 default: 2387 unhandled_af(pd->naf); 2388 } 2389 2390 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2391 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2392 pd->naf == AF_INET6) { 2393 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2394 } 2395 2396 /* cover fresh pseudo-header */ 2397 switch (naf_proto) { 2398 case IPPROTO_ICMPV6: 2399 /* optimise: unchanged for TCP/UDP */ 2400 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2401 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2402 /* FALLTHROUGH */ 2403 case IPPROTO_UDP: /* FALLTHROUGH */ 2404 case IPPROTO_TCP: 2405 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2406 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2407 copyback = 1; 2408 break; 2409 default: 2410 break; /* assume no pseudo-header */ 2411 } 2412 2413 /* flush pd->pcksum */ 2414 if (copyback) 2415 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 2416 2417 return (0); 2418 } 2419 2420 int 2421 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2422 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2423 sa_family_t af, sa_family_t naf) 2424 { 2425 struct mbuf *n = NULL; 2426 struct ip *ip4; 2427 struct ip6_hdr *ip6; 2428 u_int hlen, ohlen, dlen; 2429 int d; 2430 2431 if (af == naf || (af != AF_INET && af != AF_INET6) || 2432 (naf != AF_INET && naf != AF_INET6)) 2433 return (-1); 2434 2435 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2436 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2437 return (-1); 2438 2439 /* new quoted header */ 2440 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2441 /* old quoted header */ 2442 ohlen = pd2->off - ipoff2; 2443 2444 /* trim old quoted header */ 2445 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2446 m_adj(n, ohlen); 2447 2448 /* prepend a new, translated, quoted header */ 2449 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2450 return (-1); 2451 2452 switch (naf) { 2453 case AF_INET: 2454 ip4 = mtod(n, struct ip *); 2455 memset(ip4, 0, sizeof(*ip4)); 2456 ip4->ip_v = IPVERSION; 2457 ip4->ip_hl = sizeof(*ip4) >> 2; 2458 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2459 ip4->ip_id = htons(ip_randomid()); 2460 ip4->ip_off = htons(IP_DF); 2461 ip4->ip_ttl = pd2->ttl; 2462 if (pd2->proto == IPPROTO_ICMPV6) 2463 ip4->ip_p = IPPROTO_ICMP; 2464 else 2465 ip4->ip_p = pd2->proto; 2466 ip4->ip_src = src->v4; 2467 ip4->ip_dst = dst->v4; 2468 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2469 break; 2470 case AF_INET6: 2471 ip6 = mtod(n, struct ip6_hdr *); 2472 memset(ip6, 0, sizeof(*ip6)); 2473 ip6->ip6_vfc = IPV6_VERSION; 2474 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2475 if (pd2->proto == IPPROTO_ICMP) 2476 ip6->ip6_nxt = IPPROTO_ICMPV6; 2477 else 2478 ip6->ip6_nxt = pd2->proto; 2479 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2480 ip6->ip6_hlim = IPV6_DEFHLIM; 2481 else 2482 ip6->ip6_hlim = pd2->ttl; 2483 ip6->ip6_src = src->v6; 2484 ip6->ip6_dst = dst->v6; 2485 break; 2486 } 2487 2488 /* cover new quoted header */ 2489 /* optimise: any new AF_INET header of ours sums to zero */ 2490 if (naf != AF_INET) { 2491 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2492 } 2493 2494 /* reattach modified quoted packet to outer header */ 2495 { 2496 int nlen = n->m_pkthdr.len; 2497 m_cat(m, n); 2498 m->m_pkthdr.len += nlen; 2499 } 2500 2501 /* account for altered length */ 2502 d = hlen - ohlen; 2503 2504 if (pd->proto == IPPROTO_ICMPV6) { 2505 /* fixup pseudo-header */ 2506 dlen = pd->tot_len - pd->off; 2507 pf_cksum_fixup(pd->pcksum, 2508 htons(dlen), htons(dlen + d), pd->proto); 2509 } 2510 2511 pd->tot_len += d; 2512 pd2->tot_len += d; 2513 pd2->off += d; 2514 2515 /* note: not bothering to update network headers as 2516 these due for rewrite by pf_translate_af() */ 2517 2518 return (0); 2519 } 2520 2521 2522 #define PTR_IP(field) (offsetof(struct ip, field)) 2523 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2524 2525 int 2526 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 2527 { 2528 struct icmp *icmp4; 2529 struct icmp6_hdr *icmp6; 2530 u_int32_t mtu; 2531 int32_t ptr = -1; 2532 u_int8_t type; 2533 u_int8_t code; 2534 2535 switch (af) { 2536 case AF_INET: 2537 icmp6 = arg; 2538 type = icmp6->icmp6_type; 2539 code = icmp6->icmp6_code; 2540 mtu = ntohl(icmp6->icmp6_mtu); 2541 2542 switch (type) { 2543 case ICMP6_ECHO_REQUEST: 2544 type = ICMP_ECHO; 2545 break; 2546 case ICMP6_ECHO_REPLY: 2547 type = ICMP_ECHOREPLY; 2548 break; 2549 case ICMP6_DST_UNREACH: 2550 type = ICMP_UNREACH; 2551 switch (code) { 2552 case ICMP6_DST_UNREACH_NOROUTE: 2553 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2554 case ICMP6_DST_UNREACH_ADDR: 2555 code = ICMP_UNREACH_HOST; 2556 break; 2557 case ICMP6_DST_UNREACH_ADMIN: 2558 code = ICMP_UNREACH_HOST_PROHIB; 2559 break; 2560 case ICMP6_DST_UNREACH_NOPORT: 2561 code = ICMP_UNREACH_PORT; 2562 break; 2563 default: 2564 return (-1); 2565 } 2566 break; 2567 case ICMP6_PACKET_TOO_BIG: 2568 type = ICMP_UNREACH; 2569 code = ICMP_UNREACH_NEEDFRAG; 2570 mtu -= 20; 2571 break; 2572 case ICMP6_TIME_EXCEEDED: 2573 type = ICMP_TIMXCEED; 2574 break; 2575 case ICMP6_PARAM_PROB: 2576 switch (code) { 2577 case ICMP6_PARAMPROB_HEADER: 2578 type = ICMP_PARAMPROB; 2579 code = ICMP_PARAMPROB_ERRATPTR; 2580 ptr = ntohl(icmp6->icmp6_pptr); 2581 2582 if (ptr == PTR_IP6(ip6_vfc)) 2583 ; /* preserve */ 2584 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2585 ptr = PTR_IP(ip_tos); 2586 else if (ptr == PTR_IP6(ip6_plen) || 2587 ptr == PTR_IP6(ip6_plen) + 1) 2588 ptr = PTR_IP(ip_len); 2589 else if (ptr == PTR_IP6(ip6_nxt)) 2590 ptr = PTR_IP(ip_p); 2591 else if (ptr == PTR_IP6(ip6_hlim)) 2592 ptr = PTR_IP(ip_ttl); 2593 else if (ptr >= PTR_IP6(ip6_src) && 2594 ptr < PTR_IP6(ip6_dst)) 2595 ptr = PTR_IP(ip_src); 2596 else if (ptr >= PTR_IP6(ip6_dst) && 2597 ptr < sizeof(struct ip6_hdr)) 2598 ptr = PTR_IP(ip_dst); 2599 else { 2600 return (-1); 2601 } 2602 break; 2603 case ICMP6_PARAMPROB_NEXTHEADER: 2604 type = ICMP_UNREACH; 2605 code = ICMP_UNREACH_PROTOCOL; 2606 break; 2607 default: 2608 return (-1); 2609 } 2610 break; 2611 default: 2612 return (-1); 2613 } 2614 2615 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 2616 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 2617 2618 /* aligns well with a icmpv4 nextmtu */ 2619 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 2620 2621 /* icmpv4 pptr is a one most significant byte */ 2622 if (ptr >= 0) 2623 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 2624 break; 2625 case AF_INET6: 2626 icmp4 = arg; 2627 type = icmp4->icmp_type; 2628 code = icmp4->icmp_code; 2629 mtu = ntohs(icmp4->icmp_nextmtu); 2630 2631 switch (type) { 2632 case ICMP_ECHO: 2633 type = ICMP6_ECHO_REQUEST; 2634 break; 2635 case ICMP_ECHOREPLY: 2636 type = ICMP6_ECHO_REPLY; 2637 break; 2638 case ICMP_UNREACH: 2639 type = ICMP6_DST_UNREACH; 2640 switch (code) { 2641 case ICMP_UNREACH_NET: 2642 case ICMP_UNREACH_HOST: 2643 case ICMP_UNREACH_NET_UNKNOWN: 2644 case ICMP_UNREACH_HOST_UNKNOWN: 2645 case ICMP_UNREACH_ISOLATED: 2646 case ICMP_UNREACH_TOSNET: 2647 case ICMP_UNREACH_TOSHOST: 2648 code = ICMP6_DST_UNREACH_NOROUTE; 2649 break; 2650 case ICMP_UNREACH_PORT: 2651 code = ICMP6_DST_UNREACH_NOPORT; 2652 break; 2653 case ICMP_UNREACH_NET_PROHIB: 2654 case ICMP_UNREACH_HOST_PROHIB: 2655 case ICMP_UNREACH_FILTER_PROHIB: 2656 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2657 code = ICMP6_DST_UNREACH_ADMIN; 2658 break; 2659 case ICMP_UNREACH_PROTOCOL: 2660 type = ICMP6_PARAM_PROB; 2661 code = ICMP6_PARAMPROB_NEXTHEADER; 2662 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2663 break; 2664 case ICMP_UNREACH_NEEDFRAG: 2665 type = ICMP6_PACKET_TOO_BIG; 2666 code = 0; 2667 mtu += 20; 2668 break; 2669 default: 2670 return (-1); 2671 } 2672 break; 2673 case ICMP_TIMXCEED: 2674 type = ICMP6_TIME_EXCEEDED; 2675 break; 2676 case ICMP_PARAMPROB: 2677 type = ICMP6_PARAM_PROB; 2678 switch (code) { 2679 case ICMP_PARAMPROB_ERRATPTR: 2680 code = ICMP6_PARAMPROB_HEADER; 2681 break; 2682 case ICMP_PARAMPROB_LENGTH: 2683 code = ICMP6_PARAMPROB_HEADER; 2684 break; 2685 default: 2686 return (-1); 2687 } 2688 2689 ptr = icmp4->icmp_pptr; 2690 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2691 ; /* preserve */ 2692 else if (ptr == PTR_IP(ip_len) || 2693 ptr == PTR_IP(ip_len) + 1) 2694 ptr = PTR_IP6(ip6_plen); 2695 else if (ptr == PTR_IP(ip_ttl)) 2696 ptr = PTR_IP6(ip6_hlim); 2697 else if (ptr == PTR_IP(ip_p)) 2698 ptr = PTR_IP6(ip6_nxt); 2699 else if (ptr >= PTR_IP(ip_src) && 2700 ptr < PTR_IP(ip_dst)) 2701 ptr = PTR_IP6(ip6_src); 2702 else if (ptr >= PTR_IP(ip_dst) && 2703 ptr < sizeof(struct ip)) 2704 ptr = PTR_IP6(ip6_dst); 2705 else { 2706 return (-1); 2707 } 2708 break; 2709 default: 2710 return (-1); 2711 } 2712 2713 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 2714 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 2715 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 2716 if (ptr >= 0) 2717 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 2718 break; 2719 } 2720 2721 return (0); 2722 } 2723 #endif /* INET6 */ 2724 2725 /* 2726 * Need to modulate the sequence numbers in the TCP SACK option 2727 * (credits to Krzysztof Pfaff for report and patch) 2728 */ 2729 int 2730 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2731 { 2732 struct sackblk sack; 2733 int copyback = 0, i; 2734 int olen, optsoff; 2735 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 2736 2737 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 2738 optsoff = pd->off + sizeof(struct tcphdr); 2739 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 2740 if (olen < TCPOLEN_MINSACK || 2741 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) 2742 return (0); 2743 2744 eoh = opts + olen; 2745 opt = opts; 2746 while ((opt = pf_find_tcpopt(opt, opts, olen, 2747 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 2748 { 2749 size_t safelen = MIN(opt[1], (eoh - opt)); 2750 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 2751 size_t startoff = (opt + i) - opts; 2752 memcpy(&sack, &opt[i], sizeof(sack)); 2753 pf_patch_32_unaligned(pd, &sack.start, 2754 htonl(ntohl(sack.start) - dst->seqdiff), 2755 PF_ALGNMNT(startoff)); 2756 pf_patch_32_unaligned(pd, &sack.end, 2757 htonl(ntohl(sack.end) - dst->seqdiff), 2758 PF_ALGNMNT(startoff + sizeof(sack.start))); 2759 memcpy(&opt[i], &sack, sizeof(sack)); 2760 } 2761 copyback = 1; 2762 opt += opt[1]; 2763 } 2764 2765 if (copyback) 2766 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 2767 return (copyback); 2768 } 2769 2770 struct mbuf * 2771 pf_build_tcp(const struct pf_rule *r, sa_family_t af, 2772 const struct pf_addr *saddr, const struct pf_addr *daddr, 2773 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2774 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2775 u_int16_t rtag, u_int sack, u_int rdom) 2776 { 2777 struct mbuf *m; 2778 int len, tlen; 2779 struct ip *h; 2780 #ifdef INET6 2781 struct ip6_hdr *h6; 2782 #endif /* INET6 */ 2783 struct tcphdr *th; 2784 char *opt; 2785 2786 /* maximum segment size tcp option */ 2787 tlen = sizeof(struct tcphdr); 2788 if (mss) 2789 tlen += 4; 2790 if (sack) 2791 tlen += 2; 2792 2793 switch (af) { 2794 case AF_INET: 2795 len = sizeof(struct ip) + tlen; 2796 break; 2797 #ifdef INET6 2798 case AF_INET6: 2799 len = sizeof(struct ip6_hdr) + tlen; 2800 break; 2801 #endif /* INET6 */ 2802 default: 2803 unhandled_af(af); 2804 } 2805 2806 /* create outgoing mbuf */ 2807 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2808 if (m == NULL) 2809 return (NULL); 2810 if (tag) 2811 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2812 m->m_pkthdr.pf.tag = rtag; 2813 m->m_pkthdr.ph_rtableid = rdom; 2814 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2815 m->m_pkthdr.pf.prio = r->set_prio[0]; 2816 if (r && r->qid) 2817 m->m_pkthdr.pf.qid = r->qid; 2818 m->m_data += max_linkhdr; 2819 m->m_pkthdr.len = m->m_len = len; 2820 m->m_pkthdr.ph_ifidx = 0; 2821 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 2822 memset(m->m_data, 0, len); 2823 switch (af) { 2824 case AF_INET: 2825 h = mtod(m, struct ip *); 2826 h->ip_p = IPPROTO_TCP; 2827 h->ip_len = htons(tlen); 2828 h->ip_v = 4; 2829 h->ip_hl = sizeof(*h) >> 2; 2830 h->ip_tos = IPTOS_LOWDELAY; 2831 h->ip_len = htons(len); 2832 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2833 h->ip_ttl = ttl ? ttl : ip_defttl; 2834 h->ip_sum = 0; 2835 h->ip_src.s_addr = saddr->v4.s_addr; 2836 h->ip_dst.s_addr = daddr->v4.s_addr; 2837 2838 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2839 break; 2840 #ifdef INET6 2841 case AF_INET6: 2842 h6 = mtod(m, struct ip6_hdr *); 2843 h6->ip6_nxt = IPPROTO_TCP; 2844 h6->ip6_plen = htons(tlen); 2845 h6->ip6_vfc |= IPV6_VERSION; 2846 h6->ip6_hlim = IPV6_DEFHLIM; 2847 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2848 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2849 2850 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2851 break; 2852 #endif /* INET6 */ 2853 default: 2854 unhandled_af(af); 2855 } 2856 2857 /* TCP header */ 2858 th->th_sport = sport; 2859 th->th_dport = dport; 2860 th->th_seq = htonl(seq); 2861 th->th_ack = htonl(ack); 2862 th->th_off = tlen >> 2; 2863 th->th_flags = flags; 2864 th->th_win = htons(win); 2865 2866 opt = (char *)(th + 1); 2867 if (mss) { 2868 opt[0] = TCPOPT_MAXSEG; 2869 opt[1] = 4; 2870 mss = htons(mss); 2871 memcpy((opt + 2), &mss, 2); 2872 opt += 4; 2873 } 2874 if (sack) { 2875 opt[0] = TCPOPT_SACK_PERMITTED; 2876 opt[1] = 2; 2877 opt += 2; 2878 } 2879 2880 return (m); 2881 } 2882 2883 void 2884 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2885 const struct pf_addr *saddr, const struct pf_addr *daddr, 2886 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2887 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2888 u_int16_t rtag, u_int rdom) 2889 { 2890 struct mbuf *m; 2891 2892 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 2893 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL) 2894 return; 2895 2896 switch (af) { 2897 case AF_INET: 2898 ip_send(m); 2899 break; 2900 #ifdef INET6 2901 case AF_INET6: 2902 ip6_send(m); 2903 break; 2904 #endif /* INET6 */ 2905 } 2906 } 2907 2908 static void 2909 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *s, 2910 struct pf_state_peer *src, struct pf_state_peer *dst) 2911 { 2912 /* 2913 * We are sending challenge ACK as a response to SYN packet, which 2914 * matches existing state (modulo TCP window check). Therefore packet 2915 * must be sent on behalf of destination. 2916 * 2917 * We expect sender to remain either silent, or send RST packet 2918 * so both, firewall and remote peer, can purge dead state from 2919 * memory. 2920 */ 2921 pf_send_tcp(s->rule.ptr, pd->af, pd->dst, pd->src, 2922 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 2923 src->seqlo, TH_ACK, 0, 0, s->rule.ptr->return_ttl, 1, 0, 2924 pd->rdomain); 2925 } 2926 2927 void 2928 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 2929 sa_family_t af, struct pf_rule *r, u_int rdomain) 2930 { 2931 struct mbuf *m0; 2932 2933 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 2934 return; 2935 2936 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2937 m0->m_pkthdr.ph_rtableid = rdomain; 2938 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2939 m0->m_pkthdr.pf.prio = r->set_prio[0]; 2940 if (r && r->qid) 2941 m0->m_pkthdr.pf.qid = r->qid; 2942 2943 switch (af) { 2944 case AF_INET: 2945 icmp_error(m0, type, code, 0, param); 2946 break; 2947 #ifdef INET6 2948 case AF_INET6: 2949 icmp6_error(m0, type, code, param); 2950 break; 2951 #endif /* INET6 */ 2952 } 2953 } 2954 2955 /* 2956 * Return ((n = 0) == (a = b [with mask m])) 2957 * Note: n != 0 => returns (a != b [with mask m]) 2958 */ 2959 int 2960 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2961 struct pf_addr *b, sa_family_t af) 2962 { 2963 switch (af) { 2964 case AF_INET: 2965 if ((a->addr32[0] & m->addr32[0]) == 2966 (b->addr32[0] & m->addr32[0])) 2967 return (n == 0); 2968 break; 2969 #ifdef INET6 2970 case AF_INET6: 2971 if (((a->addr32[0] & m->addr32[0]) == 2972 (b->addr32[0] & m->addr32[0])) && 2973 ((a->addr32[1] & m->addr32[1]) == 2974 (b->addr32[1] & m->addr32[1])) && 2975 ((a->addr32[2] & m->addr32[2]) == 2976 (b->addr32[2] & m->addr32[2])) && 2977 ((a->addr32[3] & m->addr32[3]) == 2978 (b->addr32[3] & m->addr32[3]))) 2979 return (n == 0); 2980 break; 2981 #endif /* INET6 */ 2982 } 2983 2984 return (n != 0); 2985 } 2986 2987 /* 2988 * Return 1 if b <= a <= e, otherwise return 0. 2989 */ 2990 int 2991 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2992 struct pf_addr *a, sa_family_t af) 2993 { 2994 switch (af) { 2995 case AF_INET: 2996 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 2997 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 2998 return (0); 2999 break; 3000 #ifdef INET6 3001 case AF_INET6: { 3002 int i; 3003 3004 /* check a >= b */ 3005 for (i = 0; i < 4; ++i) 3006 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 3007 break; 3008 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 3009 return (0); 3010 /* check a <= e */ 3011 for (i = 0; i < 4; ++i) 3012 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 3013 break; 3014 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3015 return (0); 3016 break; 3017 } 3018 #endif /* INET6 */ 3019 } 3020 return (1); 3021 } 3022 3023 int 3024 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3025 { 3026 switch (op) { 3027 case PF_OP_IRG: 3028 return ((p > a1) && (p < a2)); 3029 case PF_OP_XRG: 3030 return ((p < a1) || (p > a2)); 3031 case PF_OP_RRG: 3032 return ((p >= a1) && (p <= a2)); 3033 case PF_OP_EQ: 3034 return (p == a1); 3035 case PF_OP_NE: 3036 return (p != a1); 3037 case PF_OP_LT: 3038 return (p < a1); 3039 case PF_OP_LE: 3040 return (p <= a1); 3041 case PF_OP_GT: 3042 return (p > a1); 3043 case PF_OP_GE: 3044 return (p >= a1); 3045 } 3046 return (0); /* never reached */ 3047 } 3048 3049 int 3050 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3051 { 3052 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3053 } 3054 3055 int 3056 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3057 { 3058 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3059 return (0); 3060 return (pf_match(op, a1, a2, u)); 3061 } 3062 3063 int 3064 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3065 { 3066 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3067 return (0); 3068 return (pf_match(op, a1, a2, g)); 3069 } 3070 3071 int 3072 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3073 { 3074 if (*tag == -1) 3075 *tag = m->m_pkthdr.pf.tag; 3076 3077 return ((!r->match_tag_not && r->match_tag == *tag) || 3078 (r->match_tag_not && r->match_tag != *tag)); 3079 } 3080 3081 int 3082 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3083 { 3084 struct ifnet *ifp, *ifp0; 3085 struct pfi_kif *kif; 3086 3087 ifp = if_get(m->m_pkthdr.ph_ifidx); 3088 if (ifp == NULL) 3089 return (0); 3090 3091 #if NCARP > 0 3092 if (ifp->if_type == IFT_CARP && 3093 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 3094 kif = (struct pfi_kif *)ifp0->if_pf_kif; 3095 if_put(ifp0); 3096 } else 3097 #endif /* NCARP */ 3098 kif = (struct pfi_kif *)ifp->if_pf_kif; 3099 3100 if_put(ifp); 3101 3102 if (kif == NULL) { 3103 DPFPRINTF(LOG_ERR, 3104 "%s: kif == NULL, @%d via %s", __func__, 3105 r->nr, r->rcv_ifname); 3106 return (0); 3107 } 3108 3109 return (pfi_kif_match(r->rcv_kif, kif)); 3110 } 3111 3112 void 3113 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3114 { 3115 if (tag > 0) 3116 m->m_pkthdr.pf.tag = tag; 3117 if (rtableid >= 0) 3118 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3119 } 3120 3121 enum pf_test_status 3122 pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) 3123 { 3124 int rv; 3125 3126 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 3127 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 3128 return (PF_TEST_FAIL); 3129 } 3130 3131 ctx->depth++; 3132 3133 if (r->anchor_wildcard) { 3134 struct pf_anchor *child; 3135 rv = PF_TEST_OK; 3136 RB_FOREACH(child, pf_anchor_node, &r->anchor->children) { 3137 rv = pf_match_rule(ctx, &child->ruleset); 3138 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 3139 /* 3140 * we either hit a rule with quick action 3141 * (more likely), or hit some runtime 3142 * error (e.g. pool_get() failure). 3143 */ 3144 break; 3145 } 3146 } 3147 } else { 3148 rv = pf_match_rule(ctx, &r->anchor->ruleset); 3149 /* 3150 * Unless errors occured, stop iff any rule matched 3151 * within quick anchors. 3152 */ 3153 if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && 3154 *ctx->am == r) 3155 rv = PF_TEST_QUICK; 3156 } 3157 3158 ctx->depth--; 3159 3160 return (rv); 3161 } 3162 3163 void 3164 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3165 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3166 { 3167 switch (af) { 3168 case AF_INET: 3169 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3170 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3171 break; 3172 #ifdef INET6 3173 case AF_INET6: 3174 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3175 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3176 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3177 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3178 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3179 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3180 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3181 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3182 break; 3183 #endif /* INET6 */ 3184 default: 3185 unhandled_af(af); 3186 } 3187 } 3188 3189 void 3190 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3191 { 3192 switch (af) { 3193 case AF_INET: 3194 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3195 break; 3196 #ifdef INET6 3197 case AF_INET6: 3198 if (addr->addr32[3] == 0xffffffff) { 3199 addr->addr32[3] = 0; 3200 if (addr->addr32[2] == 0xffffffff) { 3201 addr->addr32[2] = 0; 3202 if (addr->addr32[1] == 0xffffffff) { 3203 addr->addr32[1] = 0; 3204 addr->addr32[0] = 3205 htonl(ntohl(addr->addr32[0]) + 1); 3206 } else 3207 addr->addr32[1] = 3208 htonl(ntohl(addr->addr32[1]) + 1); 3209 } else 3210 addr->addr32[2] = 3211 htonl(ntohl(addr->addr32[2]) + 1); 3212 } else 3213 addr->addr32[3] = 3214 htonl(ntohl(addr->addr32[3]) + 1); 3215 break; 3216 #endif /* INET6 */ 3217 default: 3218 unhandled_af(af); 3219 } 3220 } 3221 3222 int 3223 pf_socket_lookup(struct pf_pdesc *pd) 3224 { 3225 struct pf_addr *saddr, *daddr; 3226 u_int16_t sport, dport; 3227 struct inpcbtable *tb; 3228 struct inpcb *inp; 3229 3230 pd->lookup.uid = -1; 3231 pd->lookup.gid = -1; 3232 pd->lookup.pid = NO_PID; 3233 switch (pd->virtual_proto) { 3234 case IPPROTO_TCP: 3235 sport = pd->hdr.tcp.th_sport; 3236 dport = pd->hdr.tcp.th_dport; 3237 PF_ASSERT_LOCKED(); 3238 NET_ASSERT_LOCKED(); 3239 tb = &tcbtable; 3240 break; 3241 case IPPROTO_UDP: 3242 sport = pd->hdr.udp.uh_sport; 3243 dport = pd->hdr.udp.uh_dport; 3244 PF_ASSERT_LOCKED(); 3245 NET_ASSERT_LOCKED(); 3246 tb = &udbtable; 3247 break; 3248 default: 3249 return (-1); 3250 } 3251 if (pd->dir == PF_IN) { 3252 saddr = pd->src; 3253 daddr = pd->dst; 3254 } else { 3255 u_int16_t p; 3256 3257 p = sport; 3258 sport = dport; 3259 dport = p; 3260 saddr = pd->dst; 3261 daddr = pd->src; 3262 } 3263 switch (pd->af) { 3264 case AF_INET: 3265 /* 3266 * Fails when rtable is changed while evaluating the ruleset 3267 * The socket looked up will not match the one hit in the end. 3268 */ 3269 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 3270 pd->rdomain); 3271 if (inp == NULL) { 3272 inp = in_pcblookup_listen(tb, daddr->v4, dport, 3273 NULL, pd->rdomain); 3274 if (inp == NULL) 3275 return (-1); 3276 } 3277 break; 3278 #ifdef INET6 3279 case AF_INET6: 3280 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 3281 dport, pd->rdomain); 3282 if (inp == NULL) { 3283 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 3284 NULL, pd->rdomain); 3285 if (inp == NULL) 3286 return (-1); 3287 } 3288 break; 3289 #endif /* INET6 */ 3290 default: 3291 unhandled_af(pd->af); 3292 } 3293 pd->lookup.uid = inp->inp_socket->so_euid; 3294 pd->lookup.gid = inp->inp_socket->so_egid; 3295 pd->lookup.pid = inp->inp_socket->so_cpid; 3296 return (1); 3297 } 3298 3299 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 3300 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 3301 * 3302 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 3303 */ 3304 u_int8_t* 3305 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 3306 u_int8_t min_typelen) 3307 { 3308 u_int8_t *eoh = opts + hlen; 3309 3310 if (min_typelen < 2) 3311 return (NULL); 3312 3313 while ((eoh - opt) >= min_typelen) { 3314 switch (*opt) { 3315 case TCPOPT_EOL: 3316 /* FALLTHROUGH - Workaround the failure of some 3317 systems to NOP-pad their bzero'd option buffers, 3318 producing spurious EOLs */ 3319 case TCPOPT_NOP: 3320 opt++; 3321 continue; 3322 default: 3323 if (opt[0] == type && 3324 opt[1] >= min_typelen) 3325 return (opt); 3326 } 3327 3328 opt += MAX(opt[1], 2); /* evade infinite loops */ 3329 } 3330 3331 return (NULL); 3332 } 3333 3334 u_int8_t 3335 pf_get_wscale(struct pf_pdesc *pd) 3336 { 3337 int olen; 3338 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3339 u_int8_t wscale = 0; 3340 3341 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3342 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 3343 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3344 return (0); 3345 3346 opt = opts; 3347 while ((opt = pf_find_tcpopt(opt, opts, olen, 3348 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 3349 wscale = opt[2]; 3350 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 3351 wscale |= PF_WSCALE_FLAG; 3352 3353 opt += opt[1]; 3354 } 3355 3356 return (wscale); 3357 } 3358 3359 u_int16_t 3360 pf_get_mss(struct pf_pdesc *pd) 3361 { 3362 int olen; 3363 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3364 u_int16_t mss = tcp_mssdflt; 3365 3366 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3367 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 3368 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3369 return (0); 3370 3371 opt = opts; 3372 while ((opt = pf_find_tcpopt(opt, opts, olen, 3373 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 3374 memcpy(&mss, (opt + 2), 2); 3375 mss = ntohs(mss); 3376 3377 opt += opt[1]; 3378 } 3379 return (mss); 3380 } 3381 3382 u_int16_t 3383 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3384 { 3385 struct ifnet *ifp; 3386 struct sockaddr_in *dst; 3387 #ifdef INET6 3388 struct sockaddr_in6 *dst6; 3389 #endif /* INET6 */ 3390 struct rtentry *rt = NULL; 3391 struct sockaddr_storage ss; 3392 int hlen; 3393 u_int16_t mss = tcp_mssdflt; 3394 3395 memset(&ss, 0, sizeof(ss)); 3396 3397 switch (af) { 3398 case AF_INET: 3399 hlen = sizeof(struct ip); 3400 dst = (struct sockaddr_in *)&ss; 3401 dst->sin_family = AF_INET; 3402 dst->sin_len = sizeof(*dst); 3403 dst->sin_addr = addr->v4; 3404 rt = rtalloc(sintosa(dst), 0, rtableid); 3405 break; 3406 #ifdef INET6 3407 case AF_INET6: 3408 hlen = sizeof(struct ip6_hdr); 3409 dst6 = (struct sockaddr_in6 *)&ss; 3410 dst6->sin6_family = AF_INET6; 3411 dst6->sin6_len = sizeof(*dst6); 3412 dst6->sin6_addr = addr->v6; 3413 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3414 break; 3415 #endif /* INET6 */ 3416 } 3417 3418 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3419 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3420 mss = max(tcp_mssdflt, mss); 3421 if_put(ifp); 3422 } 3423 rtfree(rt); 3424 mss = min(mss, offer); 3425 mss = max(mss, 64); /* sanity - at least max opt space */ 3426 return (mss); 3427 } 3428 3429 static __inline int 3430 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af, 3431 struct pf_src_node **sns) 3432 { 3433 struct pf_rule *r = s->rule.ptr; 3434 int rv; 3435 3436 s->rt_kif = NULL; 3437 if (!r->rt) 3438 return (0); 3439 3440 switch (af) { 3441 case AF_INET: 3442 rv = pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, sns, 3443 &r->route, PF_SN_ROUTE); 3444 break; 3445 #ifdef INET6 3446 case AF_INET6: 3447 rv = pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, sns, 3448 &r->route, PF_SN_ROUTE); 3449 break; 3450 #endif /* INET6 */ 3451 default: 3452 rv = 1; 3453 } 3454 3455 if (rv == 0) { 3456 s->rt_kif = r->route.kif; 3457 s->natrule.ptr = r; 3458 } 3459 3460 return (rv); 3461 } 3462 3463 u_int32_t 3464 pf_tcp_iss(struct pf_pdesc *pd) 3465 { 3466 SHA2_CTX ctx; 3467 union { 3468 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3469 uint32_t words[1]; 3470 } digest; 3471 3472 if (pf_tcp_secret_init == 0) { 3473 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3474 SHA512Init(&pf_tcp_secret_ctx); 3475 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3476 sizeof(pf_tcp_secret)); 3477 pf_tcp_secret_init = 1; 3478 } 3479 ctx = pf_tcp_secret_ctx; 3480 3481 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3482 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 3483 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 3484 switch (pd->af) { 3485 case AF_INET: 3486 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3487 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3488 break; 3489 #ifdef INET6 3490 case AF_INET6: 3491 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3492 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3493 break; 3494 #endif /* INET6 */ 3495 } 3496 SHA512Final(digest.bytes, &ctx); 3497 pf_tcp_iss_off += 4096; 3498 return (digest.words[0] + tcp_iss + pf_tcp_iss_off); 3499 } 3500 3501 void 3502 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3503 { 3504 if (r->qid) 3505 a->qid = r->qid; 3506 if (r->pqid) 3507 a->pqid = r->pqid; 3508 if (r->rtableid >= 0) 3509 a->rtableid = r->rtableid; 3510 #if NPFLOG > 0 3511 a->log |= r->log; 3512 #endif /* NPFLOG > 0 */ 3513 if (r->scrub_flags & PFSTATE_SETTOS) 3514 a->set_tos = r->set_tos; 3515 if (r->min_ttl) 3516 a->min_ttl = r->min_ttl; 3517 if (r->max_mss) 3518 a->max_mss = r->max_mss; 3519 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3520 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3521 if (r->scrub_flags & PFSTATE_SETPRIO) { 3522 a->set_prio[0] = r->set_prio[0]; 3523 a->set_prio[1] = r->set_prio[1]; 3524 } 3525 if (r->rule_flag & PFRULE_SETDELAY) 3526 a->delay = r->delay; 3527 } 3528 3529 #define PF_TEST_ATTRIB(t, a) \ 3530 if (t) { \ 3531 r = a; \ 3532 continue; \ 3533 } else do { \ 3534 } while (0) 3535 3536 enum pf_test_status 3537 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 3538 { 3539 struct pf_rule *r; 3540 struct pf_rule *save_a; 3541 struct pf_ruleset *save_aruleset; 3542 3543 r = TAILQ_FIRST(ruleset->rules.active.ptr); 3544 while (r != NULL) { 3545 r->evaluations++; 3546 PF_TEST_ATTRIB( 3547 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 3548 r->skip[PF_SKIP_IFP].ptr); 3549 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 3550 r->skip[PF_SKIP_DIR].ptr); 3551 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3552 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 3553 r->skip[PF_SKIP_RDOM].ptr); 3554 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 3555 r->skip[PF_SKIP_AF].ptr); 3556 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 3557 r->skip[PF_SKIP_PROTO].ptr); 3558 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 3559 ctx->pd->naf, r->src.neg, ctx->pd->kif, 3560 ctx->act.rtableid)), 3561 r->skip[PF_SKIP_SRC_ADDR].ptr); 3562 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 3563 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 3564 r->skip[PF_SKIP_DST_ADDR].ptr); 3565 3566 switch (ctx->pd->virtual_proto) { 3567 case PF_VPROTO_FRAGMENT: 3568 /* tcp/udp only. port_op always 0 in other cases */ 3569 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3570 TAILQ_NEXT(r, entries)); 3571 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 3572 r->flagset), 3573 TAILQ_NEXT(r, entries)); 3574 /* icmp only. type/code always 0 in other cases */ 3575 PF_TEST_ATTRIB((r->type || r->code), 3576 TAILQ_NEXT(r, entries)); 3577 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3578 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3579 TAILQ_NEXT(r, entries)); 3580 break; 3581 3582 case IPPROTO_TCP: 3583 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 3584 r->flags), 3585 TAILQ_NEXT(r, entries)); 3586 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3587 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 3588 r->os_fingerprint)), 3589 TAILQ_NEXT(r, entries)); 3590 /* FALLTHROUGH */ 3591 3592 case IPPROTO_UDP: 3593 /* tcp/udp only. port_op always 0 in other cases */ 3594 PF_TEST_ATTRIB((r->src.port_op && 3595 !pf_match_port(r->src.port_op, r->src.port[0], 3596 r->src.port[1], ctx->pd->nsport)), 3597 r->skip[PF_SKIP_SRC_PORT].ptr); 3598 PF_TEST_ATTRIB((r->dst.port_op && 3599 !pf_match_port(r->dst.port_op, r->dst.port[0], 3600 r->dst.port[1], ctx->pd->ndport)), 3601 r->skip[PF_SKIP_DST_PORT].ptr); 3602 /* tcp/udp only. uid.op always 0 in other cases */ 3603 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 3604 (ctx->pd->lookup.done = 3605 pf_socket_lookup(ctx->pd), 1)) && 3606 !pf_match_uid(r->uid.op, r->uid.uid[0], 3607 r->uid.uid[1], ctx->pd->lookup.uid)), 3608 TAILQ_NEXT(r, entries)); 3609 /* tcp/udp only. gid.op always 0 in other cases */ 3610 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 3611 (ctx->pd->lookup.done = 3612 pf_socket_lookup(ctx->pd), 1)) && 3613 !pf_match_gid(r->gid.op, r->gid.gid[0], 3614 r->gid.gid[1], ctx->pd->lookup.gid)), 3615 TAILQ_NEXT(r, entries)); 3616 break; 3617 3618 case IPPROTO_ICMP: 3619 case IPPROTO_ICMPV6: 3620 /* icmp only. type always 0 in other cases */ 3621 PF_TEST_ATTRIB((r->type && 3622 r->type != ctx->icmptype + 1), 3623 TAILQ_NEXT(r, entries)); 3624 /* icmp only. type always 0 in other cases */ 3625 PF_TEST_ATTRIB((r->code && 3626 r->code != ctx->icmpcode + 1), 3627 TAILQ_NEXT(r, entries)); 3628 /* icmp only. don't create states on replies */ 3629 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 3630 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 3631 ctx->icmp_dir != PF_IN), 3632 TAILQ_NEXT(r, entries)); 3633 break; 3634 3635 default: 3636 break; 3637 } 3638 3639 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3640 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 3641 TAILQ_NEXT(r, entries)); 3642 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 3643 TAILQ_NEXT(r, entries)); 3644 PF_TEST_ATTRIB((r->prob && 3645 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3646 TAILQ_NEXT(r, entries)); 3647 PF_TEST_ATTRIB((r->match_tag && 3648 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 3649 TAILQ_NEXT(r, entries)); 3650 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 3651 r->rcvifnot), 3652 TAILQ_NEXT(r, entries)); 3653 PF_TEST_ATTRIB((r->prio && 3654 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 3655 ctx->pd->m->m_pkthdr.pf.prio), 3656 TAILQ_NEXT(r, entries)); 3657 3658 /* must be last! */ 3659 if (r->pktrate.limit) { 3660 pf_add_threshold(&r->pktrate); 3661 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 3662 TAILQ_NEXT(r, entries)); 3663 } 3664 3665 /* FALLTHROUGH */ 3666 if (r->tag) 3667 ctx->tag = r->tag; 3668 if (r->anchor == NULL) { 3669 if (r->action == PF_MATCH) { 3670 if ((ctx->ri = pool_get(&pf_rule_item_pl, 3671 PR_NOWAIT)) == NULL) { 3672 REASON_SET(&ctx->reason, PFRES_MEMORY); 3673 ctx->test_status = PF_TEST_FAIL; 3674 break; 3675 } 3676 ctx->ri->r = r; 3677 /* order is irrelevant */ 3678 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 3679 ctx->ri = NULL; 3680 pf_rule_to_actions(r, &ctx->act); 3681 if (r->rule_flag & PFRULE_AFTO) 3682 ctx->pd->naf = r->naf; 3683 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 3684 &ctx->nr) == -1) { 3685 REASON_SET(&ctx->reason, 3686 PFRES_TRANSLATE); 3687 ctx->test_status = PF_TEST_FAIL; 3688 break; 3689 } 3690 #if NPFLOG > 0 3691 if (r->log) { 3692 REASON_SET(&ctx->reason, PFRES_MATCH); 3693 pflog_packet(ctx->pd, ctx->reason, r, 3694 ctx->a, ruleset, NULL); 3695 } 3696 #endif /* NPFLOG > 0 */ 3697 } else { 3698 /* 3699 * found matching r 3700 */ 3701 *ctx->rm = r; 3702 /* 3703 * anchor, with ruleset, where r belongs to 3704 */ 3705 *ctx->am = ctx->a; 3706 /* 3707 * ruleset where r belongs to 3708 */ 3709 *ctx->rsm = ruleset; 3710 /* 3711 * ruleset, where anchor belongs to. 3712 */ 3713 ctx->arsm = ctx->aruleset; 3714 } 3715 3716 #if NPFLOG > 0 3717 if (ctx->act.log & PF_LOG_MATCHES) 3718 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 3719 &ctx->rules); 3720 #endif /* NPFLOG > 0 */ 3721 3722 if (r->quick) { 3723 ctx->test_status = PF_TEST_QUICK; 3724 break; 3725 } 3726 } else { 3727 save_a = ctx->a; 3728 save_aruleset = ctx->aruleset; 3729 ctx->a = r; /* remember anchor */ 3730 ctx->aruleset = ruleset; /* and its ruleset */ 3731 /* 3732 * Note: we don't need to restore if we are not going 3733 * to continue with ruleset evaluation. 3734 */ 3735 if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) 3736 break; 3737 ctx->a = save_a; 3738 ctx->aruleset = save_aruleset; 3739 } 3740 r = TAILQ_NEXT(r, entries); 3741 } 3742 3743 return (ctx->test_status); 3744 } 3745 3746 int 3747 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3748 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason) 3749 { 3750 struct pf_rule *r = NULL; 3751 struct pf_rule *a = NULL; 3752 struct pf_ruleset *ruleset = NULL; 3753 struct pf_state_key *skw = NULL, *sks = NULL; 3754 int rewrite = 0; 3755 u_int16_t virtual_type, virtual_id; 3756 int action = PF_DROP; 3757 struct pf_test_ctx ctx; 3758 int rv; 3759 3760 memset(&ctx, 0, sizeof(ctx)); 3761 ctx.pd = pd; 3762 ctx.rm = rm; 3763 ctx.am = am; 3764 ctx.rsm = rsm; 3765 ctx.th = &pd->hdr.tcp; 3766 ctx.act.rtableid = pd->rdomain; 3767 ctx.tag = -1; 3768 SLIST_INIT(&ctx.rules); 3769 3770 if (pd->dir == PF_IN && if_congested()) { 3771 REASON_SET(&ctx.reason, PFRES_CONGEST); 3772 return (PF_DROP); 3773 } 3774 3775 switch (pd->virtual_proto) { 3776 case IPPROTO_ICMP: 3777 ctx.icmptype = pd->hdr.icmp.icmp_type; 3778 ctx.icmpcode = pd->hdr.icmp.icmp_code; 3779 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3780 &ctx.icmp_dir, &virtual_id, &virtual_type); 3781 if (ctx.icmp_dir == PF_IN) { 3782 pd->osport = pd->nsport = virtual_id; 3783 pd->odport = pd->ndport = virtual_type; 3784 } else { 3785 pd->osport = pd->nsport = virtual_type; 3786 pd->odport = pd->ndport = virtual_id; 3787 } 3788 break; 3789 #ifdef INET6 3790 case IPPROTO_ICMPV6: 3791 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 3792 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 3793 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3794 &ctx.icmp_dir, &virtual_id, &virtual_type); 3795 if (ctx.icmp_dir == PF_IN) { 3796 pd->osport = pd->nsport = virtual_id; 3797 pd->odport = pd->ndport = virtual_type; 3798 } else { 3799 pd->osport = pd->nsport = virtual_type; 3800 pd->odport = pd->ndport = virtual_id; 3801 } 3802 break; 3803 #endif /* INET6 */ 3804 } 3805 3806 ruleset = &pf_main_ruleset; 3807 rv = pf_match_rule(&ctx, ruleset); 3808 if (rv == PF_TEST_FAIL) { 3809 /* 3810 * Reason has been set in pf_match_rule() already. 3811 */ 3812 goto cleanup; 3813 } 3814 3815 r = *ctx.rm; /* matching rule */ 3816 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 3817 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 3818 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 3819 3820 /* apply actions for last matching pass/block rule */ 3821 pf_rule_to_actions(r, &ctx.act); 3822 if (r->rule_flag & PFRULE_AFTO) 3823 pd->naf = r->naf; 3824 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 3825 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 3826 goto cleanup; 3827 } 3828 REASON_SET(&ctx.reason, PFRES_MATCH); 3829 3830 #if NPFLOG > 0 3831 if (r->log) 3832 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 3833 if (ctx.act.log & PF_LOG_MATCHES) 3834 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 3835 #endif /* NPFLOG > 0 */ 3836 3837 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3838 (r->action == PF_DROP) && 3839 ((r->rule_flag & PFRULE_RETURNRST) || 3840 (r->rule_flag & PFRULE_RETURNICMP) || 3841 (r->rule_flag & PFRULE_RETURN))) { 3842 if (pd->proto == IPPROTO_TCP && 3843 ((r->rule_flag & PFRULE_RETURNRST) || 3844 (r->rule_flag & PFRULE_RETURN)) && 3845 !(ctx.th->th_flags & TH_RST)) { 3846 u_int32_t ack = 3847 ntohl(ctx.th->th_seq) + pd->p_len; 3848 3849 if (pf_check_tcp_cksum(pd->m, pd->off, 3850 pd->tot_len - pd->off, pd->af)) 3851 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 3852 else { 3853 if (ctx.th->th_flags & TH_SYN) 3854 ack++; 3855 if (ctx.th->th_flags & TH_FIN) 3856 ack++; 3857 pf_send_tcp(r, pd->af, pd->dst, 3858 pd->src, ctx.th->th_dport, 3859 ctx.th->th_sport, ntohl(ctx.th->th_ack), 3860 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 3861 1, 0, pd->rdomain); 3862 } 3863 } else if ((pd->proto != IPPROTO_ICMP || 3864 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 3865 r->return_icmp) 3866 pf_send_icmp(pd->m, r->return_icmp >> 8, 3867 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 3868 else if ((pd->proto != IPPROTO_ICMPV6 || 3869 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 3870 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3871 r->return_icmp6) 3872 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3873 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 3874 } 3875 3876 if (r->action == PF_DROP) 3877 goto cleanup; 3878 3879 /* 3880 * If an expired "once" rule has not been purged, drop any new matching 3881 * packets. 3882 */ 3883 if (r->rule_flag & PFRULE_EXPIRED) 3884 goto cleanup; 3885 3886 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 3887 if (ctx.act.rtableid >= 0 && 3888 rtable_l2(ctx.act.rtableid) != pd->rdomain) 3889 pd->destchg = 1; 3890 3891 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3892 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 3893 #if NPFLOG > 0 3894 pd->pflog |= PF_LOG_FORCE; 3895 #endif /* NPFLOG > 0 */ 3896 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3897 "ip/ipv6 options in pf_test_rule()"); 3898 goto cleanup; 3899 } 3900 3901 action = PF_PASS; 3902 3903 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3904 && !ctx.state_icmp && r->keep_state) { 3905 3906 if (r->rule_flag & PFRULE_SRCTRACK && 3907 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 3908 pd->af, pd->src, NULL, NULL) != 0) { 3909 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 3910 goto cleanup; 3911 } 3912 3913 if (r->max_states && (r->states_cur >= r->max_states)) { 3914 pf_status.lcounters[LCNT_STATES]++; 3915 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 3916 goto cleanup; 3917 } 3918 3919 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 3920 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); 3921 3922 if (action != PF_PASS) 3923 goto cleanup; 3924 if (sks != skw) { 3925 struct pf_state_key *sk; 3926 3927 if (pd->dir == PF_IN) 3928 sk = sks; 3929 else 3930 sk = skw; 3931 rewrite += pf_translate(pd, 3932 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 3933 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 3934 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 3935 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 3936 virtual_type, ctx.icmp_dir); 3937 } 3938 3939 #ifdef INET6 3940 if (rewrite && skw->af != sks->af) 3941 action = PF_AFRT; 3942 #endif /* INET6 */ 3943 3944 } else { 3945 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 3946 SLIST_REMOVE_HEAD(&ctx.rules, entry); 3947 pool_put(&pf_rule_item_pl, ctx.ri); 3948 } 3949 } 3950 3951 /* copy back packet headers if needed */ 3952 if (rewrite && pd->hdrlen) { 3953 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 3954 } 3955 3956 #if NPFSYNC > 0 3957 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 3958 pd->dir == PF_OUT && pfsync_up()) { 3959 /* 3960 * We want the state created, but we dont 3961 * want to send this in case a partner 3962 * firewall has to know about it to allow 3963 * replies through it. 3964 */ 3965 if (pfsync_defer(*sm, pd->m)) 3966 return (PF_DEFER); 3967 } 3968 #endif /* NPFSYNC > 0 */ 3969 3970 if (r->rule_flag & PFRULE_ONCE) { 3971 u_int32_t rule_flag; 3972 3973 /* 3974 * Use atomic_cas() to determine a clear winner, which will 3975 * insert an expired rule to gcl. 3976 */ 3977 rule_flag = r->rule_flag; 3978 if (((rule_flag & PFRULE_EXPIRED) == 0) && 3979 atomic_cas_uint(&r->rule_flag, rule_flag, 3980 rule_flag | PFRULE_EXPIRED) == rule_flag) { 3981 r->exptime = gettime(); 3982 SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle); 3983 } 3984 } 3985 3986 return (action); 3987 3988 cleanup: 3989 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 3990 SLIST_REMOVE_HEAD(&ctx.rules, entry); 3991 pool_put(&pf_rule_item_pl, ctx.ri); 3992 } 3993 3994 return (action); 3995 } 3996 3997 static __inline int 3998 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 3999 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 4000 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 4001 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 4002 { 4003 struct pf_state *s = NULL; 4004 struct tcphdr *th = &pd->hdr.tcp; 4005 u_int16_t mss = tcp_mssdflt; 4006 u_short reason; 4007 u_int i; 4008 4009 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 4010 if (s == NULL) { 4011 REASON_SET(&reason, PFRES_MEMORY); 4012 goto csfailed; 4013 } 4014 s->rule.ptr = r; 4015 s->anchor.ptr = a; 4016 s->natrule.ptr = nr; 4017 if (r->allow_opts) 4018 s->state_flags |= PFSTATE_ALLOWOPTS; 4019 if (r->rule_flag & PFRULE_STATESLOPPY) 4020 s->state_flags |= PFSTATE_SLOPPY; 4021 if (r->rule_flag & PFRULE_PFLOW) 4022 s->state_flags |= PFSTATE_PFLOW; 4023 #if NPFLOG > 0 4024 s->log = act->log & PF_LOG_ALL; 4025 #endif /* NPFLOG > 0 */ 4026 s->qid = act->qid; 4027 s->pqid = act->pqid; 4028 s->rtableid[pd->didx] = act->rtableid; 4029 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 4030 s->min_ttl = act->min_ttl; 4031 s->set_tos = act->set_tos; 4032 s->max_mss = act->max_mss; 4033 s->state_flags |= act->flags; 4034 #if NPFSYNC > 0 4035 s->sync_state = PFSYNC_S_NONE; 4036 #endif /* NPFSYNC > 0 */ 4037 s->set_prio[0] = act->set_prio[0]; 4038 s->set_prio[1] = act->set_prio[1]; 4039 s->delay = act->delay; 4040 SLIST_INIT(&s->src_nodes); 4041 /* 4042 * must initialize refcnt, before pf_state_insert() gets called. 4043 * pf_state_inserts() grabs reference for pfsync! 4044 */ 4045 refcnt_init(&s->refcnt); 4046 4047 switch (pd->proto) { 4048 case IPPROTO_TCP: 4049 s->src.seqlo = ntohl(th->th_seq); 4050 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4051 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4052 r->keep_state == PF_STATE_MODULATE) { 4053 /* Generate sequence number modulator */ 4054 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4055 0) 4056 s->src.seqdiff = 1; 4057 pf_patch_32(pd, 4058 &th->th_seq, htonl(s->src.seqlo + s->src.seqdiff)); 4059 *rewrite = 1; 4060 } else 4061 s->src.seqdiff = 0; 4062 if (th->th_flags & TH_SYN) { 4063 s->src.seqhi++; 4064 s->src.wscale = pf_get_wscale(pd); 4065 } 4066 s->src.max_win = MAX(ntohs(th->th_win), 1); 4067 if (s->src.wscale & PF_WSCALE_MASK) { 4068 /* Remove scale factor from initial window */ 4069 int win = s->src.max_win; 4070 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4071 s->src.max_win = (win - 1) >> 4072 (s->src.wscale & PF_WSCALE_MASK); 4073 } 4074 if (th->th_flags & TH_FIN) 4075 s->src.seqhi++; 4076 s->dst.seqhi = 1; 4077 s->dst.max_win = 1; 4078 pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT); 4079 pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED); 4080 s->timeout = PFTM_TCP_FIRST_PACKET; 4081 pf_status.states_halfopen++; 4082 break; 4083 case IPPROTO_UDP: 4084 pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE); 4085 pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 4086 s->timeout = PFTM_UDP_FIRST_PACKET; 4087 break; 4088 case IPPROTO_ICMP: 4089 #ifdef INET6 4090 case IPPROTO_ICMPV6: 4091 #endif /* INET6 */ 4092 s->timeout = PFTM_ICMP_FIRST_PACKET; 4093 break; 4094 default: 4095 pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE); 4096 pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 4097 s->timeout = PFTM_OTHER_FIRST_PACKET; 4098 } 4099 4100 s->creation = getuptime(); 4101 s->expire = getuptime(); 4102 4103 if (pd->proto == IPPROTO_TCP) { 4104 if (s->state_flags & PFSTATE_SCRUB_TCP && 4105 pf_normalize_tcp_init(pd, &s->src)) { 4106 REASON_SET(&reason, PFRES_MEMORY); 4107 goto csfailed; 4108 } 4109 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 4110 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 4111 rewrite)) { 4112 /* This really shouldn't happen!!! */ 4113 DPFPRINTF(LOG_ERR, 4114 "%s: tcp normalize failed on first pkt", __func__); 4115 goto csfailed; 4116 } 4117 } 4118 s->direction = pd->dir; 4119 4120 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 4121 REASON_SET(&reason, PFRES_MEMORY); 4122 goto csfailed; 4123 } 4124 4125 if (pf_set_rt_ifp(s, pd->src, (*skw)->af, sns) != 0) { 4126 REASON_SET(&reason, PFRES_NOROUTE); 4127 goto csfailed; 4128 } 4129 4130 for (i = 0; i < PF_SN_MAX; i++) 4131 if (sns[i] != NULL) { 4132 struct pf_sn_item *sni; 4133 4134 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 4135 if (sni == NULL) { 4136 REASON_SET(&reason, PFRES_MEMORY); 4137 goto csfailed; 4138 } 4139 sni->sn = sns[i]; 4140 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 4141 sni->sn->states++; 4142 } 4143 4144 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { 4145 pf_detach_state(s); 4146 *sks = *skw = NULL; 4147 REASON_SET(&reason, PFRES_STATEINS); 4148 goto csfailed; 4149 } else 4150 *sm = s; 4151 4152 /* 4153 * Make state responsible for rules it binds here. 4154 */ 4155 memcpy(&s->match_rules, rules, sizeof(s->match_rules)); 4156 memset(rules, 0, sizeof(*rules)); 4157 STATE_INC_COUNTERS(s); 4158 4159 if (tag > 0) { 4160 pf_tag_ref(tag); 4161 s->tag = tag; 4162 } 4163 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4164 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 4165 int rtid = pd->rdomain; 4166 if (act->rtableid >= 0) 4167 rtid = act->rtableid; 4168 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 4169 s->src.seqhi = arc4random(); 4170 /* Find mss option */ 4171 mss = pf_get_mss(pd); 4172 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 4173 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 4174 s->src.mss = mss; 4175 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4176 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4177 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); 4178 REASON_SET(&reason, PFRES_SYNPROXY); 4179 return (PF_SYNPROXY_DROP); 4180 } 4181 4182 return (PF_PASS); 4183 4184 csfailed: 4185 if (s) { 4186 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 4187 pf_src_tree_remove_state(s); 4188 pool_put(&pf_state_pl, s); 4189 } 4190 4191 for (i = 0; i < PF_SN_MAX; i++) 4192 if (sns[i] != NULL) 4193 pf_remove_src_node(sns[i]); 4194 4195 return (PF_DROP); 4196 } 4197 4198 int 4199 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4200 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4201 int icmp_dir) 4202 { 4203 /* 4204 * when called from bpf_mtap_pflog, there are extra constraints: 4205 * -mbuf is faked, m_data is the bpf buffer 4206 * -pd is not fully set up 4207 */ 4208 int rewrite = 0; 4209 int afto = pd->af != pd->naf; 4210 4211 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4212 pd->destchg = 1; 4213 4214 switch (pd->proto) { 4215 case IPPROTO_TCP: /* FALLTHROUGH */ 4216 case IPPROTO_UDP: 4217 rewrite += pf_patch_16(pd, pd->sport, sport); 4218 rewrite += pf_patch_16(pd, pd->dport, dport); 4219 break; 4220 4221 case IPPROTO_ICMP: 4222 /* pf_translate() is also used when logging invalid packets */ 4223 if (pd->af != AF_INET) 4224 return (0); 4225 4226 if (afto) { 4227 #ifdef INET6 4228 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 4229 return (0); 4230 pd->proto = IPPROTO_ICMPV6; 4231 rewrite = 1; 4232 #endif /* INET6 */ 4233 } 4234 if (virtual_type == htons(ICMP_ECHO)) { 4235 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4236 rewrite += pf_patch_16(pd, 4237 &pd->hdr.icmp.icmp_id, icmpid); 4238 } 4239 break; 4240 4241 #ifdef INET6 4242 case IPPROTO_ICMPV6: 4243 /* pf_translate() is also used when logging invalid packets */ 4244 if (pd->af != AF_INET6) 4245 return (0); 4246 4247 if (afto) { 4248 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 4249 return (0); 4250 pd->proto = IPPROTO_ICMP; 4251 rewrite = 1; 4252 } 4253 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4254 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4255 rewrite += pf_patch_16(pd, 4256 &pd->hdr.icmp6.icmp6_id, icmpid); 4257 } 4258 break; 4259 #endif /* INET6 */ 4260 } 4261 4262 if (!afto) { 4263 rewrite += pf_translate_a(pd, pd->src, saddr); 4264 rewrite += pf_translate_a(pd, pd->dst, daddr); 4265 } 4266 4267 return (rewrite); 4268 } 4269 4270 int 4271 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4272 int *copyback, int reverse) 4273 { 4274 struct tcphdr *th = &pd->hdr.tcp; 4275 struct pf_state_peer *src, *dst; 4276 u_int16_t win = ntohs(th->th_win); 4277 u_int32_t ack, end, data_end, seq, orig_seq; 4278 u_int8_t sws, dws, psrc, pdst; 4279 int ackskew; 4280 4281 if ((pd->dir == (*state)->direction && !reverse) || 4282 (pd->dir != (*state)->direction && reverse)) { 4283 src = &(*state)->src; 4284 dst = &(*state)->dst; 4285 psrc = PF_PEER_SRC; 4286 pdst = PF_PEER_DST; 4287 } else { 4288 src = &(*state)->dst; 4289 dst = &(*state)->src; 4290 psrc = PF_PEER_DST; 4291 pdst = PF_PEER_SRC; 4292 } 4293 4294 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4295 sws = src->wscale & PF_WSCALE_MASK; 4296 dws = dst->wscale & PF_WSCALE_MASK; 4297 } else 4298 sws = dws = 0; 4299 4300 /* 4301 * Sequence tracking algorithm from Guido van Rooij's paper: 4302 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4303 * tcp_filtering.ps 4304 */ 4305 4306 orig_seq = seq = ntohl(th->th_seq); 4307 if (src->seqlo == 0) { 4308 /* First packet from this end. Set its state */ 4309 4310 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4311 src->scrub == NULL) { 4312 if (pf_normalize_tcp_init(pd, src)) { 4313 REASON_SET(reason, PFRES_MEMORY); 4314 return (PF_DROP); 4315 } 4316 } 4317 4318 /* Deferred generation of sequence number modulator */ 4319 if (dst->seqdiff && !src->seqdiff) { 4320 /* use random iss for the TCP server */ 4321 while ((src->seqdiff = arc4random() - seq) == 0) 4322 continue; 4323 ack = ntohl(th->th_ack) - dst->seqdiff; 4324 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4325 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4326 *copyback = 1; 4327 } else { 4328 ack = ntohl(th->th_ack); 4329 } 4330 4331 end = seq + pd->p_len; 4332 if (th->th_flags & TH_SYN) { 4333 end++; 4334 if (dst->wscale & PF_WSCALE_FLAG) { 4335 src->wscale = pf_get_wscale(pd); 4336 if (src->wscale & PF_WSCALE_FLAG) { 4337 /* Remove scale factor from initial 4338 * window */ 4339 sws = src->wscale & PF_WSCALE_MASK; 4340 win = ((u_int32_t)win + (1 << sws) - 1) 4341 >> sws; 4342 dws = dst->wscale & PF_WSCALE_MASK; 4343 } else { 4344 /* fixup other window */ 4345 dst->max_win = MIN(TCP_MAXWIN, 4346 (u_int32_t)dst->max_win << 4347 (dst->wscale & PF_WSCALE_MASK)); 4348 /* in case of a retrans SYN|ACK */ 4349 dst->wscale = 0; 4350 } 4351 } 4352 } 4353 data_end = end; 4354 if (th->th_flags & TH_FIN) 4355 end++; 4356 4357 src->seqlo = seq; 4358 if (src->state < TCPS_SYN_SENT) 4359 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4360 4361 /* 4362 * May need to slide the window (seqhi may have been set by 4363 * the crappy stack check or if we picked up the connection 4364 * after establishment) 4365 */ 4366 if (src->seqhi == 1 || 4367 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4368 src->seqhi = end + MAX(1, dst->max_win << dws); 4369 if (win > src->max_win) 4370 src->max_win = win; 4371 4372 } else { 4373 ack = ntohl(th->th_ack) - dst->seqdiff; 4374 if (src->seqdiff) { 4375 /* Modulate sequence numbers */ 4376 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4377 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4378 *copyback = 1; 4379 } 4380 end = seq + pd->p_len; 4381 if (th->th_flags & TH_SYN) 4382 end++; 4383 data_end = end; 4384 if (th->th_flags & TH_FIN) 4385 end++; 4386 } 4387 4388 if ((th->th_flags & TH_ACK) == 0) { 4389 /* Let it pass through the ack skew check */ 4390 ack = dst->seqlo; 4391 } else if ((ack == 0 && 4392 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4393 /* broken tcp stacks do not set ack */ 4394 (dst->state < TCPS_SYN_SENT)) { 4395 /* 4396 * Many stacks (ours included) will set the ACK number in an 4397 * FIN|ACK if the SYN times out -- no sequence to ACK. 4398 */ 4399 ack = dst->seqlo; 4400 } 4401 4402 if (seq == end) { 4403 /* Ease sequencing restrictions on no data packets */ 4404 seq = src->seqlo; 4405 data_end = end = seq; 4406 } 4407 4408 ackskew = dst->seqlo - ack; 4409 4410 4411 /* 4412 * Need to demodulate the sequence numbers in any TCP SACK options 4413 * (Selective ACK). We could optionally validate the SACK values 4414 * against the current ACK window, either forwards or backwards, but 4415 * I'm not confident that SACK has been implemented properly 4416 * everywhere. It wouldn't surprise me if several stacks accidently 4417 * SACK too far backwards of previously ACKed data. There really aren't 4418 * any security implications of bad SACKing unless the target stack 4419 * doesn't validate the option length correctly. Someone trying to 4420 * spoof into a TCP connection won't bother blindly sending SACK 4421 * options anyway. 4422 */ 4423 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4424 if (pf_modulate_sack(pd, dst)) 4425 *copyback = 1; 4426 } 4427 4428 4429 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4430 if (SEQ_GEQ(src->seqhi, data_end) && 4431 /* Last octet inside other's window space */ 4432 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4433 /* Retrans: not more than one window back */ 4434 (ackskew >= -MAXACKWINDOW) && 4435 /* Acking not more than one reassembled fragment backwards */ 4436 (ackskew <= (MAXACKWINDOW << sws)) && 4437 /* Acking not more than one window forward */ 4438 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4439 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4440 /* Require an exact/+1 sequence match on resets when possible */ 4441 4442 if (dst->scrub || src->scrub) { 4443 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4444 dst, copyback)) 4445 return (PF_DROP); 4446 } 4447 4448 /* update max window */ 4449 if (src->max_win < win) 4450 src->max_win = win; 4451 /* synchronize sequencing */ 4452 if (SEQ_GT(end, src->seqlo)) 4453 src->seqlo = end; 4454 /* slide the window of what the other end can send */ 4455 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4456 dst->seqhi = ack + MAX((win << sws), 1); 4457 4458 /* update states */ 4459 if (th->th_flags & TH_SYN) 4460 if (src->state < TCPS_SYN_SENT) 4461 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4462 if (th->th_flags & TH_FIN) 4463 if (src->state < TCPS_CLOSING) 4464 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4465 if (th->th_flags & TH_ACK) { 4466 if (dst->state == TCPS_SYN_SENT) { 4467 pf_set_protostate(*state, pdst, 4468 TCPS_ESTABLISHED); 4469 if (src->state == TCPS_ESTABLISHED && 4470 !SLIST_EMPTY(&(*state)->src_nodes) && 4471 pf_src_connlimit(state)) { 4472 REASON_SET(reason, PFRES_SRCLIMIT); 4473 return (PF_DROP); 4474 } 4475 } else if (dst->state == TCPS_CLOSING) 4476 pf_set_protostate(*state, pdst, 4477 TCPS_FIN_WAIT_2); 4478 } 4479 if (th->th_flags & TH_RST) 4480 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4481 4482 /* update expire time */ 4483 (*state)->expire = getuptime(); 4484 if (src->state >= TCPS_FIN_WAIT_2 && 4485 dst->state >= TCPS_FIN_WAIT_2) 4486 (*state)->timeout = PFTM_TCP_CLOSED; 4487 else if (src->state >= TCPS_CLOSING && 4488 dst->state >= TCPS_CLOSING) 4489 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4490 else if (src->state < TCPS_ESTABLISHED || 4491 dst->state < TCPS_ESTABLISHED) 4492 (*state)->timeout = PFTM_TCP_OPENING; 4493 else if (src->state >= TCPS_CLOSING || 4494 dst->state >= TCPS_CLOSING) 4495 (*state)->timeout = PFTM_TCP_CLOSING; 4496 else 4497 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4498 4499 /* Fall through to PASS packet */ 4500 } else if ((dst->state < TCPS_SYN_SENT || 4501 dst->state >= TCPS_FIN_WAIT_2 || 4502 src->state >= TCPS_FIN_WAIT_2) && 4503 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4504 /* Within a window forward of the originating packet */ 4505 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4506 /* Within a window backward of the originating packet */ 4507 4508 /* 4509 * This currently handles three situations: 4510 * 1) Stupid stacks will shotgun SYNs before their peer 4511 * replies. 4512 * 2) When PF catches an already established stream (the 4513 * firewall rebooted, the state table was flushed, routes 4514 * changed...) 4515 * 3) Packets get funky immediately after the connection 4516 * closes (this should catch Solaris spurious ACK|FINs 4517 * that web servers like to spew after a close) 4518 * 4519 * This must be a little more careful than the above code 4520 * since packet floods will also be caught here. We don't 4521 * update the TTL here to mitigate the damage of a packet 4522 * flood and so the same code can handle awkward establishment 4523 * and a loosened connection close. 4524 * In the establishment case, a correct peer response will 4525 * validate the connection, go through the normal state code 4526 * and keep updating the state TTL. 4527 */ 4528 4529 if (pf_status.debug >= LOG_NOTICE) { 4530 log(LOG_NOTICE, "pf: loose state match: "); 4531 pf_print_state(*state); 4532 pf_print_flags(th->th_flags); 4533 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4534 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4535 pd->p_len, ackskew, (*state)->packets[0], 4536 (*state)->packets[1], 4537 pd->dir == PF_IN ? "in" : "out", 4538 pd->dir == (*state)->direction ? "fwd" : "rev"); 4539 } 4540 4541 if (dst->scrub || src->scrub) { 4542 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4543 dst, copyback)) 4544 return (PF_DROP); 4545 } 4546 4547 /* update max window */ 4548 if (src->max_win < win) 4549 src->max_win = win; 4550 /* synchronize sequencing */ 4551 if (SEQ_GT(end, src->seqlo)) 4552 src->seqlo = end; 4553 /* slide the window of what the other end can send */ 4554 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4555 dst->seqhi = ack + MAX((win << sws), 1); 4556 4557 /* 4558 * Cannot set dst->seqhi here since this could be a shotgunned 4559 * SYN and not an already established connection. 4560 */ 4561 if (th->th_flags & TH_FIN) 4562 if (src->state < TCPS_CLOSING) 4563 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4564 if (th->th_flags & TH_RST) 4565 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4566 4567 /* Fall through to PASS packet */ 4568 } else { 4569 if ((*state)->dst.state == TCPS_SYN_SENT && 4570 (*state)->src.state == TCPS_SYN_SENT) { 4571 /* Send RST for state mismatches during handshake */ 4572 if (!(th->th_flags & TH_RST)) 4573 pf_send_tcp((*state)->rule.ptr, pd->af, 4574 pd->dst, pd->src, th->th_dport, 4575 th->th_sport, ntohl(th->th_ack), 0, 4576 TH_RST, 0, 0, 4577 (*state)->rule.ptr->return_ttl, 1, 0, 4578 pd->rdomain); 4579 src->seqlo = 0; 4580 src->seqhi = 1; 4581 src->max_win = 1; 4582 } else if (pf_status.debug >= LOG_NOTICE) { 4583 log(LOG_NOTICE, "pf: BAD state: "); 4584 pf_print_state(*state); 4585 pf_print_flags(th->th_flags); 4586 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4587 "pkts=%llu:%llu dir=%s,%s\n", 4588 seq, orig_seq, ack, pd->p_len, ackskew, 4589 (*state)->packets[0], (*state)->packets[1], 4590 pd->dir == PF_IN ? "in" : "out", 4591 pd->dir == (*state)->direction ? "fwd" : "rev"); 4592 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4593 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 4594 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4595 ' ': '2', 4596 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4597 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4598 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 4599 ' ' :'5', 4600 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4601 } 4602 REASON_SET(reason, PFRES_BADSTATE); 4603 return (PF_DROP); 4604 } 4605 4606 return (PF_PASS); 4607 } 4608 4609 int 4610 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **state, 4611 u_short *reason) 4612 { 4613 struct tcphdr *th = &pd->hdr.tcp; 4614 struct pf_state_peer *src, *dst; 4615 u_int8_t psrc, pdst; 4616 4617 if (pd->dir == (*state)->direction) { 4618 src = &(*state)->src; 4619 dst = &(*state)->dst; 4620 psrc = PF_PEER_SRC; 4621 pdst = PF_PEER_DST; 4622 } else { 4623 src = &(*state)->dst; 4624 dst = &(*state)->src; 4625 psrc = PF_PEER_DST; 4626 pdst = PF_PEER_SRC; 4627 } 4628 4629 if (th->th_flags & TH_SYN) 4630 if (src->state < TCPS_SYN_SENT) 4631 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4632 if (th->th_flags & TH_FIN) 4633 if (src->state < TCPS_CLOSING) 4634 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4635 if (th->th_flags & TH_ACK) { 4636 if (dst->state == TCPS_SYN_SENT) { 4637 pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); 4638 if (src->state == TCPS_ESTABLISHED && 4639 !SLIST_EMPTY(&(*state)->src_nodes) && 4640 pf_src_connlimit(state)) { 4641 REASON_SET(reason, PFRES_SRCLIMIT); 4642 return (PF_DROP); 4643 } 4644 } else if (dst->state == TCPS_CLOSING) { 4645 pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2); 4646 } else if (src->state == TCPS_SYN_SENT && 4647 dst->state < TCPS_SYN_SENT) { 4648 /* 4649 * Handle a special sloppy case where we only see one 4650 * half of the connection. If there is a ACK after 4651 * the initial SYN without ever seeing a packet from 4652 * the destination, set the connection to established. 4653 */ 4654 pf_set_protostate(*state, PF_PEER_BOTH, 4655 TCPS_ESTABLISHED); 4656 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4657 pf_src_connlimit(state)) { 4658 REASON_SET(reason, PFRES_SRCLIMIT); 4659 return (PF_DROP); 4660 } 4661 } else if (src->state == TCPS_CLOSING && 4662 dst->state == TCPS_ESTABLISHED && 4663 dst->seqlo == 0) { 4664 /* 4665 * Handle the closing of half connections where we 4666 * don't see the full bidirectional FIN/ACK+ACK 4667 * handshake. 4668 */ 4669 pf_set_protostate(*state, pdst, TCPS_CLOSING); 4670 } 4671 } 4672 if (th->th_flags & TH_RST) 4673 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4674 4675 /* update expire time */ 4676 (*state)->expire = getuptime(); 4677 if (src->state >= TCPS_FIN_WAIT_2 && 4678 dst->state >= TCPS_FIN_WAIT_2) 4679 (*state)->timeout = PFTM_TCP_CLOSED; 4680 else if (src->state >= TCPS_CLOSING && 4681 dst->state >= TCPS_CLOSING) 4682 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4683 else if (src->state < TCPS_ESTABLISHED || 4684 dst->state < TCPS_ESTABLISHED) 4685 (*state)->timeout = PFTM_TCP_OPENING; 4686 else if (src->state >= TCPS_CLOSING || 4687 dst->state >= TCPS_CLOSING) 4688 (*state)->timeout = PFTM_TCP_CLOSING; 4689 else 4690 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4691 4692 return (PF_PASS); 4693 } 4694 4695 static __inline int 4696 pf_synproxy(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4697 { 4698 struct pf_state_key *sk = (*state)->key[pd->didx]; 4699 4700 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4701 struct tcphdr *th = &pd->hdr.tcp; 4702 4703 if (pd->dir != (*state)->direction) { 4704 REASON_SET(reason, PFRES_SYNPROXY); 4705 return (PF_SYNPROXY_DROP); 4706 } 4707 if (th->th_flags & TH_SYN) { 4708 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4709 REASON_SET(reason, PFRES_SYNPROXY); 4710 return (PF_DROP); 4711 } 4712 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4713 pd->src, th->th_dport, th->th_sport, 4714 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4715 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4716 0, pd->rdomain); 4717 REASON_SET(reason, PFRES_SYNPROXY); 4718 return (PF_SYNPROXY_DROP); 4719 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 4720 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4721 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4722 REASON_SET(reason, PFRES_SYNPROXY); 4723 return (PF_DROP); 4724 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4725 pf_src_connlimit(state)) { 4726 REASON_SET(reason, PFRES_SRCLIMIT); 4727 return (PF_DROP); 4728 } else 4729 pf_set_protostate(*state, PF_PEER_SRC, 4730 PF_TCPS_PROXY_DST); 4731 } 4732 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4733 struct tcphdr *th = &pd->hdr.tcp; 4734 4735 if (pd->dir == (*state)->direction) { 4736 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4737 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4738 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4739 REASON_SET(reason, PFRES_SYNPROXY); 4740 return (PF_DROP); 4741 } 4742 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4743 if ((*state)->dst.seqhi == 1) 4744 (*state)->dst.seqhi = arc4random(); 4745 pf_send_tcp((*state)->rule.ptr, pd->af, 4746 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4747 sk->port[pd->sidx], sk->port[pd->didx], 4748 (*state)->dst.seqhi, 0, TH_SYN, 0, 4749 (*state)->src.mss, 0, 0, (*state)->tag, 4750 sk->rdomain); 4751 REASON_SET(reason, PFRES_SYNPROXY); 4752 return (PF_SYNPROXY_DROP); 4753 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4754 (TH_SYN|TH_ACK)) || 4755 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4756 REASON_SET(reason, PFRES_SYNPROXY); 4757 return (PF_DROP); 4758 } else { 4759 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4760 (*state)->dst.seqlo = ntohl(th->th_seq); 4761 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4762 pd->src, th->th_dport, th->th_sport, 4763 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4764 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4765 (*state)->tag, pd->rdomain); 4766 pf_send_tcp((*state)->rule.ptr, pd->af, 4767 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4768 sk->port[pd->sidx], sk->port[pd->didx], 4769 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4770 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4771 0, sk->rdomain); 4772 (*state)->src.seqdiff = (*state)->dst.seqhi - 4773 (*state)->src.seqlo; 4774 (*state)->dst.seqdiff = (*state)->src.seqhi - 4775 (*state)->dst.seqlo; 4776 (*state)->src.seqhi = (*state)->src.seqlo + 4777 (*state)->dst.max_win; 4778 (*state)->dst.seqhi = (*state)->dst.seqlo + 4779 (*state)->src.max_win; 4780 (*state)->src.wscale = (*state)->dst.wscale = 0; 4781 pf_set_protostate(*state, PF_PEER_BOTH, 4782 TCPS_ESTABLISHED); 4783 REASON_SET(reason, PFRES_SYNPROXY); 4784 return (PF_SYNPROXY_DROP); 4785 } 4786 } 4787 return (PF_PASS); 4788 } 4789 4790 int 4791 pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4792 int syncookie) 4793 { 4794 struct pf_state_key_cmp key; 4795 int copyback = 0; 4796 struct pf_state_peer *src, *dst; 4797 int action; 4798 struct inpcb *inp; 4799 u_int8_t psrc, pdst; 4800 4801 key.af = pd->af; 4802 key.proto = pd->virtual_proto; 4803 key.rdomain = pd->rdomain; 4804 pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); 4805 pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); 4806 key.port[pd->sidx] = pd->osport; 4807 key.port[pd->didx] = pd->odport; 4808 inp = pd->m->m_pkthdr.pf.inp; 4809 4810 action = pf_find_state(pd, &key, state); 4811 if (action != PF_MATCH) 4812 return (action); 4813 4814 action = PF_PASS; 4815 if (pd->dir == (*state)->direction) { 4816 src = &(*state)->src; 4817 dst = &(*state)->dst; 4818 psrc = PF_PEER_SRC; 4819 pdst = PF_PEER_DST; 4820 } else { 4821 src = &(*state)->dst; 4822 dst = &(*state)->src; 4823 psrc = PF_PEER_DST; 4824 pdst = PF_PEER_SRC; 4825 } 4826 4827 switch (pd->virtual_proto) { 4828 case IPPROTO_TCP: 4829 if (syncookie) { 4830 pf_set_protostate(*state, PF_PEER_SRC, 4831 PF_TCPS_PROXY_DST); 4832 (*state)->dst.seqhi = ntohl(pd->hdr.tcp.th_ack) - 1; 4833 } 4834 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) 4835 return (action); 4836 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 4837 4838 if (dst->state >= TCPS_FIN_WAIT_2 && 4839 src->state >= TCPS_FIN_WAIT_2) { 4840 if (pf_status.debug >= LOG_NOTICE) { 4841 log(LOG_NOTICE, "pf: state reuse "); 4842 pf_print_state(*state); 4843 pf_print_flags(pd->hdr.tcp.th_flags); 4844 addlog("\n"); 4845 } 4846 /* XXX make sure it's the same direction ?? */ 4847 (*state)->timeout = PFTM_PURGE; 4848 *state = NULL; 4849 pf_mbuf_link_inpcb(pd->m, inp); 4850 return (PF_DROP); 4851 } else if (dst->state >= TCPS_ESTABLISHED && 4852 src->state >= TCPS_ESTABLISHED) { 4853 /* 4854 * SYN matches existing state??? 4855 * Typically happens when sender boots up after 4856 * sudden panic. Certain protocols (NFSv3) are 4857 * always using same port numbers. Challenge 4858 * ACK enables all parties (firewall and peers) 4859 * to get in sync again. 4860 */ 4861 pf_send_challenge_ack(pd, *state, src, dst); 4862 return (PF_DROP); 4863 } 4864 } 4865 4866 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4867 if (pf_tcp_track_sloppy(pd, state, reason) == PF_DROP) 4868 return (PF_DROP); 4869 } else { 4870 if (pf_tcp_track_full(pd, state, reason, ©back, 4871 PF_REVERSED_KEY((*state)->key, pd->af)) == PF_DROP) 4872 return (PF_DROP); 4873 } 4874 break; 4875 case IPPROTO_UDP: 4876 /* update states */ 4877 if (src->state < PFUDPS_SINGLE) 4878 pf_set_protostate(*state, psrc, PFUDPS_SINGLE); 4879 if (dst->state == PFUDPS_SINGLE) 4880 pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE); 4881 4882 /* update expire time */ 4883 (*state)->expire = getuptime(); 4884 if (src->state == PFUDPS_MULTIPLE && 4885 dst->state == PFUDPS_MULTIPLE) 4886 (*state)->timeout = PFTM_UDP_MULTIPLE; 4887 else 4888 (*state)->timeout = PFTM_UDP_SINGLE; 4889 break; 4890 default: 4891 /* update states */ 4892 if (src->state < PFOTHERS_SINGLE) 4893 pf_set_protostate(*state, psrc, PFOTHERS_SINGLE); 4894 if (dst->state == PFOTHERS_SINGLE) 4895 pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE); 4896 4897 /* update expire time */ 4898 (*state)->expire = getuptime(); 4899 if (src->state == PFOTHERS_MULTIPLE && 4900 dst->state == PFOTHERS_MULTIPLE) 4901 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4902 else 4903 (*state)->timeout = PFTM_OTHER_SINGLE; 4904 break; 4905 } 4906 4907 /* translate source/destination address, if necessary */ 4908 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4909 struct pf_state_key *nk; 4910 int afto, sidx, didx; 4911 4912 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4913 nk = (*state)->key[pd->sidx]; 4914 else 4915 nk = (*state)->key[pd->didx]; 4916 4917 afto = pd->af != nk->af; 4918 sidx = afto ? pd->didx : pd->sidx; 4919 didx = afto ? pd->sidx : pd->didx; 4920 4921 #ifdef INET6 4922 if (afto) { 4923 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 4924 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 4925 pd->naf = nk->af; 4926 action = PF_AFRT; 4927 } 4928 #endif /* INET6 */ 4929 4930 if (!afto) 4931 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 4932 4933 if (pd->sport != NULL) 4934 pf_patch_16(pd, pd->sport, nk->port[sidx]); 4935 4936 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4937 pd->rdomain != nk->rdomain) 4938 pd->destchg = 1; 4939 4940 if (!afto) 4941 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 4942 4943 if (pd->dport != NULL) 4944 pf_patch_16(pd, pd->dport, nk->port[didx]); 4945 4946 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4947 copyback = 1; 4948 } 4949 4950 if (copyback && pd->hdrlen > 0) { 4951 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4952 } 4953 4954 return (action); 4955 } 4956 4957 int 4958 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 4959 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 4960 int icmp_dir, int *iidx, int multi, int inner) 4961 { 4962 int direction, action; 4963 4964 key->af = pd->af; 4965 key->proto = pd->proto; 4966 key->rdomain = pd->rdomain; 4967 if (icmp_dir == PF_IN) { 4968 *iidx = pd->sidx; 4969 key->port[pd->sidx] = icmpid; 4970 key->port[pd->didx] = type; 4971 } else { 4972 *iidx = pd->didx; 4973 key->port[pd->sidx] = type; 4974 key->port[pd->didx] = icmpid; 4975 } 4976 4977 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 4978 pd->dst, pd->af, multi)) 4979 return (PF_DROP); 4980 4981 action = pf_find_state(pd, key, state); 4982 if (action != PF_MATCH) 4983 return (action); 4984 4985 if ((*state)->state_flags & PFSTATE_SLOPPY) 4986 return (-1); 4987 4988 /* Is this ICMP message flowing in right direction? */ 4989 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 4990 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 4991 PF_IN : PF_OUT; 4992 else 4993 direction = (*state)->direction; 4994 if ((((!inner && direction == pd->dir) || 4995 (inner && direction != pd->dir)) ? 4996 PF_IN : PF_OUT) != icmp_dir) { 4997 if (pf_status.debug >= LOG_NOTICE) { 4998 log(LOG_NOTICE, 4999 "pf: icmp type %d in wrong direction (%d): ", 5000 ntohs(type), icmp_dir); 5001 pf_print_state(*state); 5002 addlog("\n"); 5003 } 5004 return (PF_DROP); 5005 } 5006 return (-1); 5007 } 5008 5009 int 5010 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 5011 u_short *reason) 5012 { 5013 u_int16_t virtual_id, virtual_type; 5014 u_int8_t icmptype, icmpcode; 5015 int icmp_dir, iidx, ret, copyback = 0; 5016 5017 struct pf_state_key_cmp key; 5018 5019 switch (pd->proto) { 5020 case IPPROTO_ICMP: 5021 icmptype = pd->hdr.icmp.icmp_type; 5022 icmpcode = pd->hdr.icmp.icmp_code; 5023 break; 5024 #ifdef INET6 5025 case IPPROTO_ICMPV6: 5026 icmptype = pd->hdr.icmp6.icmp6_type; 5027 icmpcode = pd->hdr.icmp6.icmp6_code; 5028 break; 5029 #endif /* INET6 */ 5030 default: 5031 panic("unhandled proto %d", pd->proto); 5032 } 5033 5034 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 5035 &virtual_type) == 0) { 5036 /* 5037 * ICMP query/reply message not related to a TCP/UDP packet. 5038 * Search for an ICMP state. 5039 */ 5040 ret = pf_icmp_state_lookup(pd, &key, state, 5041 virtual_id, virtual_type, icmp_dir, &iidx, 5042 0, 0); 5043 /* IPv6? try matching a multicast address */ 5044 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 5045 ret = pf_icmp_state_lookup(pd, &key, state, virtual_id, 5046 virtual_type, icmp_dir, &iidx, 1, 0); 5047 if (ret >= 0) 5048 return (ret); 5049 5050 (*state)->expire = getuptime(); 5051 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5052 5053 /* translate source/destination address, if necessary */ 5054 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5055 struct pf_state_key *nk; 5056 int afto, sidx, didx; 5057 5058 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5059 nk = (*state)->key[pd->sidx]; 5060 else 5061 nk = (*state)->key[pd->didx]; 5062 5063 afto = pd->af != nk->af; 5064 sidx = afto ? pd->didx : pd->sidx; 5065 didx = afto ? pd->sidx : pd->didx; 5066 iidx = afto ? !iidx : iidx; 5067 #ifdef INET6 5068 if (afto) { 5069 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 5070 nk->af); 5071 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 5072 nk->af); 5073 pd->naf = nk->af; 5074 } 5075 #endif /* INET6 */ 5076 if (!afto) { 5077 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5078 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5079 } 5080 5081 if (pd->rdomain != nk->rdomain) 5082 pd->destchg = 1; 5083 if (!afto && PF_ANEQ(pd->dst, 5084 &nk->addr[didx], pd->af)) 5085 pd->destchg = 1; 5086 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5087 5088 switch (pd->af) { 5089 case AF_INET: 5090 #ifdef INET6 5091 if (afto) { 5092 if (pf_translate_icmp_af(pd, AF_INET6, 5093 &pd->hdr.icmp)) 5094 return (PF_DROP); 5095 pd->proto = IPPROTO_ICMPV6; 5096 } 5097 #endif /* INET6 */ 5098 pf_patch_16(pd, 5099 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 5100 5101 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5102 &pd->hdr.icmp, M_NOWAIT); 5103 copyback = 1; 5104 break; 5105 #ifdef INET6 5106 case AF_INET6: 5107 if (afto) { 5108 if (pf_translate_icmp_af(pd, AF_INET, 5109 &pd->hdr.icmp6)) 5110 return (PF_DROP); 5111 pd->proto = IPPROTO_ICMP; 5112 } 5113 5114 pf_patch_16(pd, 5115 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 5116 5117 m_copyback(pd->m, pd->off, 5118 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5119 M_NOWAIT); 5120 copyback = 1; 5121 break; 5122 #endif /* INET6 */ 5123 } 5124 #ifdef INET6 5125 if (afto) 5126 return (PF_AFRT); 5127 #endif /* INET6 */ 5128 } 5129 } else { 5130 /* 5131 * ICMP error message in response to a TCP/UDP packet. 5132 * Extract the inner TCP/UDP header and search for that state. 5133 */ 5134 struct pf_pdesc pd2; 5135 struct ip h2; 5136 #ifdef INET6 5137 struct ip6_hdr h2_6; 5138 #endif /* INET6 */ 5139 int ipoff2; 5140 5141 /* Initialize pd2 fields valid for both packets with pd. */ 5142 memset(&pd2, 0, sizeof(pd2)); 5143 pd2.af = pd->af; 5144 pd2.dir = pd->dir; 5145 pd2.kif = pd->kif; 5146 pd2.m = pd->m; 5147 pd2.rdomain = pd->rdomain; 5148 /* Payload packet is from the opposite direction. */ 5149 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 5150 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 5151 switch (pd->af) { 5152 case AF_INET: 5153 /* offset of h2 in mbuf chain */ 5154 ipoff2 = pd->off + ICMP_MINLEN; 5155 5156 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 5157 NULL, reason, pd2.af)) { 5158 DPFPRINTF(LOG_NOTICE, 5159 "ICMP error message too short (ip)"); 5160 return (PF_DROP); 5161 } 5162 /* 5163 * ICMP error messages don't refer to non-first 5164 * fragments 5165 */ 5166 if (h2.ip_off & htons(IP_OFFMASK)) { 5167 REASON_SET(reason, PFRES_FRAG); 5168 return (PF_DROP); 5169 } 5170 5171 /* offset of protocol header that follows h2 */ 5172 pd2.off = ipoff2; 5173 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 5174 return (PF_DROP); 5175 5176 pd2.tot_len = ntohs(h2.ip_len); 5177 pd2.src = (struct pf_addr *)&h2.ip_src; 5178 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5179 break; 5180 #ifdef INET6 5181 case AF_INET6: 5182 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 5183 5184 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 5185 NULL, reason, pd2.af)) { 5186 DPFPRINTF(LOG_NOTICE, 5187 "ICMP error message too short (ip6)"); 5188 return (PF_DROP); 5189 } 5190 5191 pd2.off = ipoff2; 5192 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 5193 return (PF_DROP); 5194 5195 pd2.tot_len = ntohs(h2_6.ip6_plen) + 5196 sizeof(struct ip6_hdr); 5197 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5198 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5199 break; 5200 #endif /* INET6 */ 5201 default: 5202 unhandled_af(pd->af); 5203 } 5204 5205 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 5206 if (pf_status.debug >= LOG_NOTICE) { 5207 log(LOG_NOTICE, 5208 "pf: BAD ICMP %d:%d outer dst: ", 5209 icmptype, icmpcode); 5210 pf_print_host(pd->src, 0, pd->af); 5211 addlog(" -> "); 5212 pf_print_host(pd->dst, 0, pd->af); 5213 addlog(" inner src: "); 5214 pf_print_host(pd2.src, 0, pd2.af); 5215 addlog(" -> "); 5216 pf_print_host(pd2.dst, 0, pd2.af); 5217 addlog("\n"); 5218 } 5219 REASON_SET(reason, PFRES_BADSTATE); 5220 return (PF_DROP); 5221 } 5222 5223 switch (pd2.proto) { 5224 case IPPROTO_TCP: { 5225 struct tcphdr *th = &pd2.hdr.tcp; 5226 u_int32_t seq; 5227 struct pf_state_peer *src, *dst; 5228 u_int8_t dws; 5229 int action; 5230 5231 /* 5232 * Only the first 8 bytes of the TCP header can be 5233 * expected. Don't access any TCP header fields after 5234 * th_seq, an ackskew test is not possible. 5235 */ 5236 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason, 5237 pd2.af)) { 5238 DPFPRINTF(LOG_NOTICE, 5239 "ICMP error message too short (tcp)"); 5240 return (PF_DROP); 5241 } 5242 5243 key.af = pd2.af; 5244 key.proto = IPPROTO_TCP; 5245 key.rdomain = pd2.rdomain; 5246 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5247 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5248 key.port[pd2.sidx] = th->th_sport; 5249 key.port[pd2.didx] = th->th_dport; 5250 5251 action = pf_find_state(&pd2, &key, state); 5252 if (action != PF_MATCH) 5253 return (action); 5254 5255 if (pd2.dir == (*state)->direction) { 5256 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5257 src = &(*state)->src; 5258 dst = &(*state)->dst; 5259 } else { 5260 src = &(*state)->dst; 5261 dst = &(*state)->src; 5262 } 5263 } else { 5264 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5265 src = &(*state)->dst; 5266 dst = &(*state)->src; 5267 } else { 5268 src = &(*state)->src; 5269 dst = &(*state)->dst; 5270 } 5271 } 5272 5273 if (src->wscale && dst->wscale) 5274 dws = dst->wscale & PF_WSCALE_MASK; 5275 else 5276 dws = 0; 5277 5278 /* Demodulate sequence number */ 5279 seq = ntohl(th->th_seq) - src->seqdiff; 5280 if (src->seqdiff) { 5281 pf_patch_32(pd, &th->th_seq, htonl(seq)); 5282 copyback = 1; 5283 } 5284 5285 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5286 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5287 src->seqlo - (dst->max_win << dws)))) { 5288 if (pf_status.debug >= LOG_NOTICE) { 5289 log(LOG_NOTICE, 5290 "pf: BAD ICMP %d:%d ", 5291 icmptype, icmpcode); 5292 pf_print_host(pd->src, 0, pd->af); 5293 addlog(" -> "); 5294 pf_print_host(pd->dst, 0, pd->af); 5295 addlog(" state: "); 5296 pf_print_state(*state); 5297 addlog(" seq=%u\n", seq); 5298 } 5299 REASON_SET(reason, PFRES_BADSTATE); 5300 return (PF_DROP); 5301 } else { 5302 if (pf_status.debug >= LOG_DEBUG) { 5303 log(LOG_DEBUG, 5304 "pf: OK ICMP %d:%d ", 5305 icmptype, icmpcode); 5306 pf_print_host(pd->src, 0, pd->af); 5307 addlog(" -> "); 5308 pf_print_host(pd->dst, 0, pd->af); 5309 addlog(" state: "); 5310 pf_print_state(*state); 5311 addlog(" seq=%u\n", seq); 5312 } 5313 } 5314 5315 /* translate source/destination address, if necessary */ 5316 if ((*state)->key[PF_SK_WIRE] != 5317 (*state)->key[PF_SK_STACK]) { 5318 struct pf_state_key *nk; 5319 int afto, sidx, didx; 5320 5321 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5322 nk = (*state)->key[pd->sidx]; 5323 else 5324 nk = (*state)->key[pd->didx]; 5325 5326 afto = pd->af != nk->af; 5327 sidx = afto ? pd2.didx : pd2.sidx; 5328 didx = afto ? pd2.sidx : pd2.didx; 5329 5330 #ifdef INET6 5331 if (afto) { 5332 if (pf_translate_icmp_af(pd, nk->af, 5333 &pd->hdr.icmp)) 5334 return (PF_DROP); 5335 m_copyback(pd->m, pd->off, 5336 sizeof(struct icmp6_hdr), 5337 &pd->hdr.icmp6, M_NOWAIT); 5338 if (pf_change_icmp_af(pd->m, ipoff2, 5339 pd, &pd2, &nk->addr[sidx], 5340 &nk->addr[didx], pd->af, nk->af)) 5341 return (PF_DROP); 5342 if (nk->af == AF_INET) 5343 pd->proto = IPPROTO_ICMP; 5344 else 5345 pd->proto = IPPROTO_ICMPV6; 5346 pd->m->m_pkthdr.ph_rtableid = 5347 nk->rdomain; 5348 pd->destchg = 1; 5349 pf_addrcpy(&pd->nsaddr, 5350 &nk->addr[pd2.sidx], nk->af); 5351 pf_addrcpy(&pd->ndaddr, 5352 &nk->addr[pd2.didx], nk->af); 5353 pd->naf = nk->af; 5354 5355 pf_patch_16(pd, 5356 &th->th_sport, nk->port[sidx]); 5357 pf_patch_16(pd, 5358 &th->th_dport, nk->port[didx]); 5359 5360 m_copyback(pd2.m, pd2.off, 8, th, 5361 M_NOWAIT); 5362 return (PF_AFRT); 5363 } 5364 #endif /* INET6 */ 5365 if (PF_ANEQ(pd2.src, 5366 &nk->addr[pd2.sidx], pd2.af) || 5367 nk->port[pd2.sidx] != th->th_sport) 5368 pf_translate_icmp(pd, pd2.src, 5369 &th->th_sport, pd->dst, 5370 &nk->addr[pd2.sidx], 5371 nk->port[pd2.sidx]); 5372 5373 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5374 pd2.af) || pd2.rdomain != nk->rdomain) 5375 pd->destchg = 1; 5376 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5377 5378 if (PF_ANEQ(pd2.dst, 5379 &nk->addr[pd2.didx], pd2.af) || 5380 nk->port[pd2.didx] != th->th_dport) 5381 pf_translate_icmp(pd, pd2.dst, 5382 &th->th_dport, pd->src, 5383 &nk->addr[pd2.didx], 5384 nk->port[pd2.didx]); 5385 copyback = 1; 5386 } 5387 5388 if (copyback) { 5389 switch (pd2.af) { 5390 case AF_INET: 5391 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5392 &pd->hdr.icmp, M_NOWAIT); 5393 m_copyback(pd2.m, ipoff2, sizeof(h2), 5394 &h2, M_NOWAIT); 5395 break; 5396 #ifdef INET6 5397 case AF_INET6: 5398 m_copyback(pd->m, pd->off, 5399 sizeof(struct icmp6_hdr), 5400 &pd->hdr.icmp6, M_NOWAIT); 5401 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5402 &h2_6, M_NOWAIT); 5403 break; 5404 #endif /* INET6 */ 5405 } 5406 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 5407 } 5408 break; 5409 } 5410 case IPPROTO_UDP: { 5411 struct udphdr *uh = &pd2.hdr.udp; 5412 int action; 5413 5414 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 5415 NULL, reason, pd2.af)) { 5416 DPFPRINTF(LOG_NOTICE, 5417 "ICMP error message too short (udp)"); 5418 return (PF_DROP); 5419 } 5420 5421 key.af = pd2.af; 5422 key.proto = IPPROTO_UDP; 5423 key.rdomain = pd2.rdomain; 5424 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5425 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5426 key.port[pd2.sidx] = uh->uh_sport; 5427 key.port[pd2.didx] = uh->uh_dport; 5428 5429 action = pf_find_state(&pd2, &key, state); 5430 if (action != PF_MATCH) 5431 return (action); 5432 5433 /* translate source/destination address, if necessary */ 5434 if ((*state)->key[PF_SK_WIRE] != 5435 (*state)->key[PF_SK_STACK]) { 5436 struct pf_state_key *nk; 5437 int afto, sidx, didx; 5438 5439 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5440 nk = (*state)->key[pd->sidx]; 5441 else 5442 nk = (*state)->key[pd->didx]; 5443 5444 afto = pd->af != nk->af; 5445 sidx = afto ? pd2.didx : pd2.sidx; 5446 didx = afto ? pd2.sidx : pd2.didx; 5447 5448 #ifdef INET6 5449 if (afto) { 5450 if (pf_translate_icmp_af(pd, nk->af, 5451 &pd->hdr.icmp)) 5452 return (PF_DROP); 5453 m_copyback(pd->m, pd->off, 5454 sizeof(struct icmp6_hdr), 5455 &pd->hdr.icmp6, M_NOWAIT); 5456 if (pf_change_icmp_af(pd->m, ipoff2, 5457 pd, &pd2, &nk->addr[sidx], 5458 &nk->addr[didx], pd->af, nk->af)) 5459 return (PF_DROP); 5460 if (nk->af == AF_INET) 5461 pd->proto = IPPROTO_ICMP; 5462 else 5463 pd->proto = IPPROTO_ICMPV6; 5464 pd->m->m_pkthdr.ph_rtableid = 5465 nk->rdomain; 5466 pd->destchg = 1; 5467 pf_addrcpy(&pd->nsaddr, 5468 &nk->addr[pd2.sidx], nk->af); 5469 pf_addrcpy(&pd->ndaddr, 5470 &nk->addr[pd2.didx], nk->af); 5471 pd->naf = nk->af; 5472 5473 pf_patch_16(pd, 5474 &uh->uh_sport, nk->port[sidx]); 5475 pf_patch_16(pd, 5476 &uh->uh_dport, nk->port[didx]); 5477 5478 m_copyback(pd2.m, pd2.off, sizeof(*uh), 5479 uh, M_NOWAIT); 5480 return (PF_AFRT); 5481 } 5482 #endif /* INET6 */ 5483 5484 if (PF_ANEQ(pd2.src, 5485 &nk->addr[pd2.sidx], pd2.af) || 5486 nk->port[pd2.sidx] != uh->uh_sport) 5487 pf_translate_icmp(pd, pd2.src, 5488 &uh->uh_sport, pd->dst, 5489 &nk->addr[pd2.sidx], 5490 nk->port[pd2.sidx]); 5491 5492 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5493 pd2.af) || pd2.rdomain != nk->rdomain) 5494 pd->destchg = 1; 5495 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5496 5497 if (PF_ANEQ(pd2.dst, 5498 &nk->addr[pd2.didx], pd2.af) || 5499 nk->port[pd2.didx] != uh->uh_dport) 5500 pf_translate_icmp(pd, pd2.dst, 5501 &uh->uh_dport, pd->src, 5502 &nk->addr[pd2.didx], 5503 nk->port[pd2.didx]); 5504 5505 switch (pd2.af) { 5506 case AF_INET: 5507 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5508 &pd->hdr.icmp, M_NOWAIT); 5509 m_copyback(pd2.m, ipoff2, sizeof(h2), 5510 &h2, M_NOWAIT); 5511 break; 5512 #ifdef INET6 5513 case AF_INET6: 5514 m_copyback(pd->m, pd->off, 5515 sizeof(struct icmp6_hdr), 5516 &pd->hdr.icmp6, M_NOWAIT); 5517 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5518 &h2_6, M_NOWAIT); 5519 break; 5520 #endif /* INET6 */ 5521 } 5522 /* Avoid recomputing quoted UDP checksum. 5523 * note: udp6 0 csum invalid per rfc2460 p27. 5524 * but presumed nothing cares in this context */ 5525 pf_patch_16(pd, &uh->uh_sum, 0); 5526 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 5527 M_NOWAIT); 5528 copyback = 1; 5529 } 5530 break; 5531 } 5532 case IPPROTO_ICMP: { 5533 struct icmp *iih = &pd2.hdr.icmp; 5534 5535 if (pd2.af != AF_INET) { 5536 REASON_SET(reason, PFRES_NORM); 5537 return (PF_DROP); 5538 } 5539 5540 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 5541 NULL, reason, pd2.af)) { 5542 DPFPRINTF(LOG_NOTICE, 5543 "ICMP error message too short (icmp)"); 5544 return (PF_DROP); 5545 } 5546 5547 pf_icmp_mapping(&pd2, iih->icmp_type, 5548 &icmp_dir, &virtual_id, &virtual_type); 5549 5550 ret = pf_icmp_state_lookup(&pd2, &key, state, 5551 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5552 if (ret >= 0) 5553 return (ret); 5554 5555 /* translate source/destination address, if necessary */ 5556 if ((*state)->key[PF_SK_WIRE] != 5557 (*state)->key[PF_SK_STACK]) { 5558 struct pf_state_key *nk; 5559 int afto, sidx, didx; 5560 5561 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5562 nk = (*state)->key[pd->sidx]; 5563 else 5564 nk = (*state)->key[pd->didx]; 5565 5566 afto = pd->af != nk->af; 5567 sidx = afto ? pd2.didx : pd2.sidx; 5568 didx = afto ? pd2.sidx : pd2.didx; 5569 iidx = afto ? !iidx : iidx; 5570 5571 #ifdef INET6 5572 if (afto) { 5573 if (nk->af != AF_INET6) 5574 return (PF_DROP); 5575 if (pf_translate_icmp_af(pd, nk->af, 5576 &pd->hdr.icmp)) 5577 return (PF_DROP); 5578 m_copyback(pd->m, pd->off, 5579 sizeof(struct icmp6_hdr), 5580 &pd->hdr.icmp6, M_NOWAIT); 5581 if (pf_change_icmp_af(pd->m, ipoff2, 5582 pd, &pd2, &nk->addr[sidx], 5583 &nk->addr[didx], pd->af, nk->af)) 5584 return (PF_DROP); 5585 pd->proto = IPPROTO_ICMPV6; 5586 if (pf_translate_icmp_af(pd, 5587 nk->af, iih)) 5588 return (PF_DROP); 5589 if (virtual_type == htons(ICMP_ECHO)) 5590 pf_patch_16(pd, &iih->icmp_id, 5591 nk->port[iidx]); 5592 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5593 iih, M_NOWAIT); 5594 pd->m->m_pkthdr.ph_rtableid = 5595 nk->rdomain; 5596 pd->destchg = 1; 5597 pf_addrcpy(&pd->nsaddr, 5598 &nk->addr[pd2.sidx], nk->af); 5599 pf_addrcpy(&pd->ndaddr, 5600 &nk->addr[pd2.didx], nk->af); 5601 pd->naf = nk->af; 5602 return (PF_AFRT); 5603 } 5604 #endif /* INET6 */ 5605 5606 if (PF_ANEQ(pd2.src, 5607 &nk->addr[pd2.sidx], pd2.af) || 5608 (virtual_type == htons(ICMP_ECHO) && 5609 nk->port[iidx] != iih->icmp_id)) 5610 pf_translate_icmp(pd, pd2.src, 5611 (virtual_type == htons(ICMP_ECHO)) ? 5612 &iih->icmp_id : NULL, 5613 pd->dst, &nk->addr[pd2.sidx], 5614 (virtual_type == htons(ICMP_ECHO)) ? 5615 nk->port[iidx] : 0); 5616 5617 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5618 pd2.af) || pd2.rdomain != nk->rdomain) 5619 pd->destchg = 1; 5620 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5621 5622 if (PF_ANEQ(pd2.dst, 5623 &nk->addr[pd2.didx], pd2.af)) 5624 pf_translate_icmp(pd, pd2.dst, NULL, 5625 pd->src, &nk->addr[pd2.didx], 0); 5626 5627 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5628 &pd->hdr.icmp, M_NOWAIT); 5629 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5630 M_NOWAIT); 5631 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 5632 M_NOWAIT); 5633 copyback = 1; 5634 } 5635 break; 5636 } 5637 #ifdef INET6 5638 case IPPROTO_ICMPV6: { 5639 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 5640 5641 if (pd2.af != AF_INET6) { 5642 REASON_SET(reason, PFRES_NORM); 5643 return (PF_DROP); 5644 } 5645 5646 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 5647 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5648 DPFPRINTF(LOG_NOTICE, 5649 "ICMP error message too short (icmp6)"); 5650 return (PF_DROP); 5651 } 5652 5653 pf_icmp_mapping(&pd2, iih->icmp6_type, 5654 &icmp_dir, &virtual_id, &virtual_type); 5655 ret = pf_icmp_state_lookup(&pd2, &key, state, 5656 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5657 /* IPv6? try matching a multicast address */ 5658 if (ret == PF_DROP && pd2.af == AF_INET6 && 5659 icmp_dir == PF_OUT) 5660 ret = pf_icmp_state_lookup(&pd2, &key, state, 5661 virtual_id, virtual_type, icmp_dir, &iidx, 5662 1, 1); 5663 if (ret >= 0) 5664 return (ret); 5665 5666 /* translate source/destination address, if necessary */ 5667 if ((*state)->key[PF_SK_WIRE] != 5668 (*state)->key[PF_SK_STACK]) { 5669 struct pf_state_key *nk; 5670 int afto, sidx, didx; 5671 5672 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5673 nk = (*state)->key[pd->sidx]; 5674 else 5675 nk = (*state)->key[pd->didx]; 5676 5677 afto = pd->af != nk->af; 5678 sidx = afto ? pd2.didx : pd2.sidx; 5679 didx = afto ? pd2.sidx : pd2.didx; 5680 iidx = afto ? !iidx : iidx; 5681 5682 if (afto) { 5683 if (nk->af != AF_INET) 5684 return (PF_DROP); 5685 if (pf_translate_icmp_af(pd, nk->af, 5686 &pd->hdr.icmp)) 5687 return (PF_DROP); 5688 m_copyback(pd->m, pd->off, 5689 sizeof(struct icmp6_hdr), 5690 &pd->hdr.icmp6, M_NOWAIT); 5691 if (pf_change_icmp_af(pd->m, ipoff2, 5692 pd, &pd2, &nk->addr[sidx], 5693 &nk->addr[didx], pd->af, nk->af)) 5694 return (PF_DROP); 5695 pd->proto = IPPROTO_ICMP; 5696 if (pf_translate_icmp_af(pd, 5697 nk->af, iih)) 5698 return (PF_DROP); 5699 if (virtual_type == 5700 htons(ICMP6_ECHO_REQUEST)) 5701 pf_patch_16(pd, &iih->icmp6_id, 5702 nk->port[iidx]); 5703 m_copyback(pd2.m, pd2.off, 5704 sizeof(struct icmp6_hdr), iih, 5705 M_NOWAIT); 5706 pd->m->m_pkthdr.ph_rtableid = 5707 nk->rdomain; 5708 pd->destchg = 1; 5709 pf_addrcpy(&pd->nsaddr, 5710 &nk->addr[pd2.sidx], nk->af); 5711 pf_addrcpy(&pd->ndaddr, 5712 &nk->addr[pd2.didx], nk->af); 5713 pd->naf = nk->af; 5714 return (PF_AFRT); 5715 } 5716 5717 if (PF_ANEQ(pd2.src, 5718 &nk->addr[pd2.sidx], pd2.af) || 5719 ((virtual_type == 5720 htons(ICMP6_ECHO_REQUEST)) && 5721 nk->port[pd2.sidx] != iih->icmp6_id)) 5722 pf_translate_icmp(pd, pd2.src, 5723 (virtual_type == 5724 htons(ICMP6_ECHO_REQUEST)) 5725 ? &iih->icmp6_id : NULL, 5726 pd->dst, &nk->addr[pd2.sidx], 5727 (virtual_type == 5728 htons(ICMP6_ECHO_REQUEST)) 5729 ? nk->port[iidx] : 0); 5730 5731 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5732 pd2.af) || pd2.rdomain != nk->rdomain) 5733 pd->destchg = 1; 5734 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5735 5736 if (PF_ANEQ(pd2.dst, 5737 &nk->addr[pd2.didx], pd2.af)) 5738 pf_translate_icmp(pd, pd2.dst, NULL, 5739 pd->src, &nk->addr[pd2.didx], 0); 5740 5741 m_copyback(pd->m, pd->off, 5742 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5743 M_NOWAIT); 5744 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5745 M_NOWAIT); 5746 m_copyback(pd2.m, pd2.off, 5747 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 5748 copyback = 1; 5749 } 5750 break; 5751 } 5752 #endif /* INET6 */ 5753 default: { 5754 int action; 5755 5756 key.af = pd2.af; 5757 key.proto = pd2.proto; 5758 key.rdomain = pd2.rdomain; 5759 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5760 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5761 key.port[0] = key.port[1] = 0; 5762 5763 action = pf_find_state(&pd2, &key, state); 5764 if (action != PF_MATCH) 5765 return (action); 5766 5767 /* translate source/destination address, if necessary */ 5768 if ((*state)->key[PF_SK_WIRE] != 5769 (*state)->key[PF_SK_STACK]) { 5770 struct pf_state_key *nk = 5771 (*state)->key[pd->didx]; 5772 5773 if (PF_ANEQ(pd2.src, 5774 &nk->addr[pd2.sidx], pd2.af)) 5775 pf_translate_icmp(pd, pd2.src, NULL, 5776 pd->dst, &nk->addr[pd2.sidx], 0); 5777 5778 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5779 pd2.af) || pd2.rdomain != nk->rdomain) 5780 pd->destchg = 1; 5781 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5782 5783 if (PF_ANEQ(pd2.dst, 5784 &nk->addr[pd2.didx], pd2.af)) 5785 pf_translate_icmp(pd, pd2.dst, NULL, 5786 pd->src, &nk->addr[pd2.didx], 0); 5787 5788 switch (pd2.af) { 5789 case AF_INET: 5790 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5791 &pd->hdr.icmp, M_NOWAIT); 5792 m_copyback(pd2.m, ipoff2, sizeof(h2), 5793 &h2, M_NOWAIT); 5794 break; 5795 #ifdef INET6 5796 case AF_INET6: 5797 m_copyback(pd->m, pd->off, 5798 sizeof(struct icmp6_hdr), 5799 &pd->hdr.icmp6, M_NOWAIT); 5800 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5801 &h2_6, M_NOWAIT); 5802 break; 5803 #endif /* INET6 */ 5804 } 5805 copyback = 1; 5806 } 5807 break; 5808 } 5809 } 5810 } 5811 if (copyback) { 5812 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5813 } 5814 5815 return (PF_PASS); 5816 } 5817 5818 /* 5819 * ipoff and off are measured from the start of the mbuf chain. 5820 * h must be at "ipoff" on the mbuf chain. 5821 */ 5822 void * 5823 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5824 u_short *actionp, u_short *reasonp, sa_family_t af) 5825 { 5826 int iplen = 0; 5827 5828 switch (af) { 5829 case AF_INET: { 5830 struct ip *h = mtod(m, struct ip *); 5831 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5832 5833 if (fragoff) { 5834 if (fragoff >= len) 5835 ACTION_SET(actionp, PF_PASS); 5836 else { 5837 ACTION_SET(actionp, PF_DROP); 5838 REASON_SET(reasonp, PFRES_FRAG); 5839 } 5840 return (NULL); 5841 } 5842 iplen = ntohs(h->ip_len); 5843 break; 5844 } 5845 #ifdef INET6 5846 case AF_INET6: { 5847 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5848 5849 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5850 break; 5851 } 5852 #endif /* INET6 */ 5853 } 5854 if (m->m_pkthdr.len < off + len || iplen < off + len) { 5855 ACTION_SET(actionp, PF_DROP); 5856 REASON_SET(reasonp, PFRES_SHORT); 5857 return (NULL); 5858 } 5859 m_copydata(m, off, len, p); 5860 return (p); 5861 } 5862 5863 int 5864 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5865 int rtableid) 5866 { 5867 struct sockaddr_storage ss; 5868 struct sockaddr_in *dst; 5869 int ret = 1; 5870 int check_mpath; 5871 #ifdef INET6 5872 struct sockaddr_in6 *dst6; 5873 #endif /* INET6 */ 5874 struct rtentry *rt = NULL; 5875 5876 check_mpath = 0; 5877 memset(&ss, 0, sizeof(ss)); 5878 switch (af) { 5879 case AF_INET: 5880 dst = (struct sockaddr_in *)&ss; 5881 dst->sin_family = AF_INET; 5882 dst->sin_len = sizeof(*dst); 5883 dst->sin_addr = addr->v4; 5884 if (ipmultipath) 5885 check_mpath = 1; 5886 break; 5887 #ifdef INET6 5888 case AF_INET6: 5889 /* 5890 * Skip check for addresses with embedded interface scope, 5891 * as they would always match anyway. 5892 */ 5893 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5894 goto out; 5895 dst6 = (struct sockaddr_in6 *)&ss; 5896 dst6->sin6_family = AF_INET6; 5897 dst6->sin6_len = sizeof(*dst6); 5898 dst6->sin6_addr = addr->v6; 5899 if (ip6_multipath) 5900 check_mpath = 1; 5901 break; 5902 #endif /* INET6 */ 5903 } 5904 5905 /* Skip checks for ipsec interfaces */ 5906 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5907 goto out; 5908 5909 rt = rtalloc(sstosa(&ss), 0, rtableid); 5910 if (rt != NULL) { 5911 /* No interface given, this is a no-route check */ 5912 if (kif == NULL) 5913 goto out; 5914 5915 if (kif->pfik_ifp == NULL) { 5916 ret = 0; 5917 goto out; 5918 } 5919 5920 /* Perform uRPF check if passed input interface */ 5921 ret = 0; 5922 do { 5923 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 5924 ret = 1; 5925 #if NCARP > 0 5926 } else { 5927 struct ifnet *ifp; 5928 5929 ifp = if_get(rt->rt_ifidx); 5930 if (ifp != NULL && ifp->if_type == IFT_CARP && 5931 ifp->if_carpdevidx == 5932 kif->pfik_ifp->if_index) 5933 ret = 1; 5934 if_put(ifp); 5935 #endif /* NCARP */ 5936 } 5937 5938 rt = rtable_iterate(rt); 5939 } while (check_mpath == 1 && rt != NULL && ret == 0); 5940 } else 5941 ret = 0; 5942 out: 5943 rtfree(rt); 5944 return (ret); 5945 } 5946 5947 int 5948 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 5949 int rtableid) 5950 { 5951 struct sockaddr_storage ss; 5952 struct sockaddr_in *dst; 5953 #ifdef INET6 5954 struct sockaddr_in6 *dst6; 5955 #endif /* INET6 */ 5956 struct rtentry *rt; 5957 int ret = 0; 5958 5959 memset(&ss, 0, sizeof(ss)); 5960 switch (af) { 5961 case AF_INET: 5962 dst = (struct sockaddr_in *)&ss; 5963 dst->sin_family = AF_INET; 5964 dst->sin_len = sizeof(*dst); 5965 dst->sin_addr = addr->v4; 5966 break; 5967 #ifdef INET6 5968 case AF_INET6: 5969 dst6 = (struct sockaddr_in6 *)&ss; 5970 dst6->sin6_family = AF_INET6; 5971 dst6->sin6_len = sizeof(*dst6); 5972 dst6->sin6_addr = addr->v6; 5973 break; 5974 #endif /* INET6 */ 5975 } 5976 5977 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 5978 if (rt != NULL) { 5979 if (rt->rt_labelid == aw->v.rtlabel) 5980 ret = 1; 5981 rtfree(rt); 5982 } 5983 5984 return (ret); 5985 } 5986 5987 /* pf_route() may change pd->m, adjust local copies after calling */ 5988 void 5989 pf_route(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s) 5990 { 5991 struct mbuf *m0, *m1; 5992 struct sockaddr_in *dst, sin; 5993 struct rtentry *rt = NULL; 5994 struct ip *ip; 5995 struct ifnet *ifp = NULL; 5996 struct pf_addr naddr; 5997 struct pf_src_node *sns[PF_SN_MAX]; 5998 int error = 0; 5999 unsigned int rtableid; 6000 6001 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6002 m_freem(pd->m); 6003 pd->m = NULL; 6004 return; 6005 } 6006 6007 if (r->rt == PF_DUPTO) { 6008 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6009 return; 6010 } else { 6011 if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir)) 6012 return; 6013 m0 = pd->m; 6014 } 6015 6016 if (m0->m_len < sizeof(struct ip)) { 6017 DPFPRINTF(LOG_ERR, 6018 "%s: m0->m_len < sizeof(struct ip)", __func__); 6019 goto bad; 6020 } 6021 6022 ip = mtod(m0, struct ip *); 6023 6024 memset(&sin, 0, sizeof(sin)); 6025 dst = &sin; 6026 dst->sin_family = AF_INET; 6027 dst->sin_len = sizeof(*dst); 6028 dst->sin_addr = ip->ip_dst; 6029 rtableid = m0->m_pkthdr.ph_rtableid; 6030 6031 if (pd->dir == PF_IN) { 6032 if (ip->ip_ttl <= IPTTLDEC) { 6033 if (r->rt != PF_DUPTO) 6034 pf_send_icmp(m0, ICMP_TIMXCEED, 6035 ICMP_TIMXCEED_INTRANS, 0, 6036 pd->af, r, pd->rdomain); 6037 goto bad; 6038 } 6039 ip->ip_ttl -= IPTTLDEC; 6040 } 6041 6042 if (s == NULL) { 6043 memset(sns, 0, sizeof(sns)); 6044 if (pf_map_addr(AF_INET, r, 6045 (struct pf_addr *)&ip->ip_src, 6046 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 6047 DPFPRINTF(LOG_ERR, 6048 "%s: pf_map_addr() failed", __func__); 6049 goto bad; 6050 } 6051 6052 if (!PF_AZERO(&naddr, AF_INET)) 6053 dst->sin_addr.s_addr = naddr.v4.s_addr; 6054 ifp = r->route.kif ? 6055 r->route.kif->pfik_ifp : NULL; 6056 } else { 6057 if (!PF_AZERO(&s->rt_addr, AF_INET)) 6058 dst->sin_addr.s_addr = 6059 s->rt_addr.v4.s_addr; 6060 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6061 } 6062 if (ifp == NULL) 6063 goto bad; 6064 6065 if (pd->kif->pfik_ifp != ifp) { 6066 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 6067 goto bad; 6068 else if (m0 == NULL) 6069 goto done; 6070 if (m0->m_len < sizeof(struct ip)) { 6071 DPFPRINTF(LOG_ERR, 6072 "%s: m0->m_len < sizeof(struct ip)", __func__); 6073 goto bad; 6074 } 6075 ip = mtod(m0, struct ip *); 6076 } 6077 6078 rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); 6079 if (!rtisvalid(rt)) { 6080 ipstat_inc(ips_noroute); 6081 goto bad; 6082 } 6083 /* A locally generated packet may have invalid source address. */ 6084 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 6085 (ifp->if_flags & IFF_LOOPBACK) == 0) 6086 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 6087 6088 in_proto_cksum_out(m0, ifp); 6089 6090 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 6091 ip->ip_sum = 0; 6092 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) 6093 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 6094 else { 6095 ipstat_inc(ips_outswcsum); 6096 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6097 } 6098 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6099 goto done; 6100 } 6101 6102 /* 6103 * Too large for interface; fragment if possible. 6104 * Must be able to put at least 8 bytes per fragment. 6105 */ 6106 if (ip->ip_off & htons(IP_DF)) { 6107 ipstat_inc(ips_cantfrag); 6108 if (r->rt != PF_DUPTO) 6109 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 6110 ifp->if_mtu, pd->af, r, pd->rdomain); 6111 goto bad; 6112 } 6113 6114 m1 = m0; 6115 error = ip_fragment(m0, ifp, ifp->if_mtu); 6116 if (error) { 6117 m0 = NULL; 6118 goto bad; 6119 } 6120 6121 for (m0 = m1; m0; m0 = m1) { 6122 m1 = m0->m_nextpkt; 6123 m0->m_nextpkt = 0; 6124 if (error == 0) 6125 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6126 else 6127 m_freem(m0); 6128 } 6129 6130 if (error == 0) 6131 ipstat_inc(ips_fragmented); 6132 6133 done: 6134 if (r->rt != PF_DUPTO) 6135 pd->m = NULL; 6136 rtfree(rt); 6137 return; 6138 6139 bad: 6140 m_freem(m0); 6141 goto done; 6142 } 6143 6144 #ifdef INET6 6145 /* pf_route6() may change pd->m, adjust local copies after calling */ 6146 void 6147 pf_route6(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s) 6148 { 6149 struct mbuf *m0; 6150 struct sockaddr_in6 *dst, sin6; 6151 struct rtentry *rt = NULL; 6152 struct ip6_hdr *ip6; 6153 struct ifnet *ifp = NULL; 6154 struct pf_addr naddr; 6155 struct pf_src_node *sns[PF_SN_MAX]; 6156 struct m_tag *mtag; 6157 unsigned int rtableid; 6158 6159 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6160 m_freem(pd->m); 6161 pd->m = NULL; 6162 return; 6163 } 6164 6165 if (r->rt == PF_DUPTO) { 6166 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6167 return; 6168 } else { 6169 if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir)) 6170 return; 6171 m0 = pd->m; 6172 } 6173 6174 if (m0->m_len < sizeof(struct ip6_hdr)) { 6175 DPFPRINTF(LOG_ERR, 6176 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6177 goto bad; 6178 } 6179 ip6 = mtod(m0, struct ip6_hdr *); 6180 6181 memset(&sin6, 0, sizeof(sin6)); 6182 dst = &sin6; 6183 dst->sin6_family = AF_INET6; 6184 dst->sin6_len = sizeof(*dst); 6185 dst->sin6_addr = ip6->ip6_dst; 6186 rtableid = m0->m_pkthdr.ph_rtableid; 6187 6188 if (pd->dir == PF_IN) { 6189 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 6190 if (r->rt != PF_DUPTO) 6191 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 6192 ICMP6_TIME_EXCEED_TRANSIT, 0, 6193 pd->af, r, pd->rdomain); 6194 goto bad; 6195 } 6196 ip6->ip6_hlim -= IPV6_HLIMDEC; 6197 } 6198 6199 if (s == NULL) { 6200 memset(sns, 0, sizeof(sns)); 6201 if (pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 6202 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 6203 DPFPRINTF(LOG_ERR, 6204 "%s: pf_map_addr() failed", __func__); 6205 goto bad; 6206 } 6207 if (!PF_AZERO(&naddr, AF_INET6)) 6208 pf_addrcpy((struct pf_addr *)&dst->sin6_addr, 6209 &naddr, AF_INET6); 6210 ifp = r->route.kif ? r->route.kif->pfik_ifp : NULL; 6211 } else { 6212 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 6213 pf_addrcpy((struct pf_addr *)&dst->sin6_addr, 6214 &s->rt_addr, AF_INET6); 6215 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6216 } 6217 if (ifp == NULL) 6218 goto bad; 6219 6220 if (pd->kif->pfik_ifp != ifp) { 6221 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6222 goto bad; 6223 else if (m0 == NULL) 6224 goto done; 6225 if (m0->m_len < sizeof(struct ip6_hdr)) { 6226 DPFPRINTF(LOG_ERR, 6227 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6228 goto bad; 6229 } 6230 } 6231 6232 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 6233 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 6234 rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid); 6235 if (!rtisvalid(rt)) { 6236 ip6stat_inc(ip6s_noroute); 6237 goto bad; 6238 } 6239 /* A locally generated packet may have invalid source address. */ 6240 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 6241 (ifp->if_flags & IFF_LOOPBACK) == 0) 6242 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 6243 6244 in6_proto_cksum_out(m0, ifp); 6245 6246 /* 6247 * If packet has been reassembled by PF earlier, we have to 6248 * use pf_refragment6() here to turn it back to fragments. 6249 */ 6250 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6251 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 6252 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6253 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 6254 } else { 6255 ip6stat_inc(ip6s_cantfrag); 6256 if (r->rt != PF_DUPTO) 6257 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 6258 ifp->if_mtu, pd->af, r, pd->rdomain); 6259 goto bad; 6260 } 6261 6262 done: 6263 if (r->rt != PF_DUPTO) 6264 pd->m = NULL; 6265 rtfree(rt); 6266 return; 6267 6268 bad: 6269 m_freem(m0); 6270 goto done; 6271 } 6272 #endif /* INET6 */ 6273 6274 6275 /* 6276 * check TCP checksum and set mbuf flag 6277 * off is the offset where the protocol header starts 6278 * len is the total length of protocol header plus payload 6279 * returns 0 when the checksum is valid, otherwise returns 1. 6280 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6281 */ 6282 int 6283 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6284 { 6285 u_int16_t sum; 6286 6287 if (m->m_pkthdr.csum_flags & 6288 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6289 return (0); 6290 } 6291 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6292 off < sizeof(struct ip) || 6293 m->m_pkthdr.len < off + len) { 6294 return (1); 6295 } 6296 6297 /* need to do it in software */ 6298 tcpstat_inc(tcps_inswcsum); 6299 6300 switch (af) { 6301 case AF_INET: 6302 if (m->m_len < sizeof(struct ip)) 6303 return (1); 6304 6305 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6306 break; 6307 #ifdef INET6 6308 case AF_INET6: 6309 if (m->m_len < sizeof(struct ip6_hdr)) 6310 return (1); 6311 6312 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6313 break; 6314 #endif /* INET6 */ 6315 default: 6316 unhandled_af(af); 6317 } 6318 if (sum) { 6319 tcpstat_inc(tcps_rcvbadsum); 6320 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6321 return (1); 6322 } 6323 6324 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6325 return (0); 6326 } 6327 6328 struct pf_divert * 6329 pf_find_divert(struct mbuf *m) 6330 { 6331 struct m_tag *mtag; 6332 6333 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6334 return (NULL); 6335 6336 return ((struct pf_divert *)(mtag + 1)); 6337 } 6338 6339 struct pf_divert * 6340 pf_get_divert(struct mbuf *m) 6341 { 6342 struct m_tag *mtag; 6343 6344 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6345 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6346 M_NOWAIT); 6347 if (mtag == NULL) 6348 return (NULL); 6349 memset(mtag + 1, 0, sizeof(struct pf_divert)); 6350 m_tag_prepend(m, mtag); 6351 } 6352 6353 return ((struct pf_divert *)(mtag + 1)); 6354 } 6355 6356 int 6357 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 6358 { 6359 struct ip6_ext ext; 6360 u_int32_t hlen, end; 6361 int hdr_cnt; 6362 6363 hlen = h->ip_hl << 2; 6364 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 6365 REASON_SET(reason, PFRES_SHORT); 6366 return (PF_DROP); 6367 } 6368 if (hlen != sizeof(struct ip)) 6369 pd->badopts++; 6370 end = pd->off + ntohs(h->ip_len); 6371 pd->off += hlen; 6372 pd->proto = h->ip_p; 6373 /* stop walking over non initial fragments */ 6374 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 6375 return (PF_PASS); 6376 6377 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6378 switch (pd->proto) { 6379 case IPPROTO_AH: 6380 /* fragments may be short */ 6381 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 6382 end < pd->off + sizeof(ext)) 6383 return (PF_PASS); 6384 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6385 NULL, reason, AF_INET)) { 6386 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 6387 return (PF_DROP); 6388 } 6389 pd->off += (ext.ip6e_len + 2) * 4; 6390 pd->proto = ext.ip6e_nxt; 6391 break; 6392 default: 6393 return (PF_PASS); 6394 } 6395 } 6396 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 6397 REASON_SET(reason, PFRES_IPOPTIONS); 6398 return (PF_DROP); 6399 } 6400 6401 #ifdef INET6 6402 int 6403 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6404 u_short *reason) 6405 { 6406 struct ip6_opt opt; 6407 struct ip6_opt_jumbo jumbo; 6408 6409 while (off < end) { 6410 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6411 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6412 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6413 return (PF_DROP); 6414 } 6415 if (opt.ip6o_type == IP6OPT_PAD1) { 6416 off++; 6417 continue; 6418 } 6419 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6420 NULL, reason, AF_INET6)) { 6421 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6422 return (PF_DROP); 6423 } 6424 if (off + sizeof(opt) + opt.ip6o_len > end) { 6425 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6426 REASON_SET(reason, PFRES_IPOPTIONS); 6427 return (PF_DROP); 6428 } 6429 switch (opt.ip6o_type) { 6430 case IP6OPT_JUMBO: 6431 if (pd->jumbolen != 0) { 6432 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6433 REASON_SET(reason, PFRES_IPOPTIONS); 6434 return (PF_DROP); 6435 } 6436 if (ntohs(h->ip6_plen) != 0) { 6437 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6438 REASON_SET(reason, PFRES_IPOPTIONS); 6439 return (PF_DROP); 6440 } 6441 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6442 NULL, reason, AF_INET6)) { 6443 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6444 return (PF_DROP); 6445 } 6446 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6447 sizeof(pd->jumbolen)); 6448 pd->jumbolen = ntohl(pd->jumbolen); 6449 if (pd->jumbolen < IPV6_MAXPACKET) { 6450 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6451 REASON_SET(reason, PFRES_IPOPTIONS); 6452 return (PF_DROP); 6453 } 6454 break; 6455 default: 6456 break; 6457 } 6458 off += sizeof(opt) + opt.ip6o_len; 6459 } 6460 6461 return (PF_PASS); 6462 } 6463 6464 int 6465 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6466 { 6467 struct ip6_frag frag; 6468 struct ip6_ext ext; 6469 struct ip6_rthdr rthdr; 6470 u_int32_t end; 6471 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 6472 6473 pd->off += sizeof(struct ip6_hdr); 6474 end = pd->off + ntohs(h->ip6_plen); 6475 pd->fragoff = pd->extoff = pd->jumbolen = 0; 6476 pd->proto = h->ip6_nxt; 6477 6478 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6479 switch (pd->proto) { 6480 case IPPROTO_ROUTING: 6481 case IPPROTO_HOPOPTS: 6482 case IPPROTO_DSTOPTS: 6483 pd->badopts++; 6484 break; 6485 } 6486 switch (pd->proto) { 6487 case IPPROTO_FRAGMENT: 6488 if (fraghdr_cnt++) { 6489 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 6490 REASON_SET(reason, PFRES_FRAG); 6491 return (PF_DROP); 6492 } 6493 /* jumbo payload packets cannot be fragmented */ 6494 if (pd->jumbolen != 0) { 6495 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6496 REASON_SET(reason, PFRES_FRAG); 6497 return (PF_DROP); 6498 } 6499 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6500 NULL, reason, AF_INET6)) { 6501 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6502 return (PF_DROP); 6503 } 6504 /* stop walking over non initial fragments */ 6505 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 6506 pd->fragoff = pd->off; 6507 return (PF_PASS); 6508 } 6509 /* RFC6946: reassemble only non atomic fragments */ 6510 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 6511 pd->fragoff = pd->off; 6512 pd->off += sizeof(frag); 6513 pd->proto = frag.ip6f_nxt; 6514 break; 6515 case IPPROTO_ROUTING: 6516 if (rthdr_cnt++) { 6517 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6518 REASON_SET(reason, PFRES_IPOPTIONS); 6519 return (PF_DROP); 6520 } 6521 /* fragments may be short */ 6522 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6523 pd->off = pd->fragoff; 6524 pd->proto = IPPROTO_FRAGMENT; 6525 return (PF_PASS); 6526 } 6527 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6528 NULL, reason, AF_INET6)) { 6529 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6530 return (PF_DROP); 6531 } 6532 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6533 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6534 REASON_SET(reason, PFRES_IPOPTIONS); 6535 return (PF_DROP); 6536 } 6537 /* FALLTHROUGH */ 6538 case IPPROTO_HOPOPTS: 6539 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 6540 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 6541 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 6542 REASON_SET(reason, PFRES_IPOPTIONS); 6543 return (PF_DROP); 6544 } 6545 /* FALLTHROUGH */ 6546 case IPPROTO_AH: 6547 case IPPROTO_DSTOPTS: 6548 /* fragments may be short */ 6549 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6550 pd->off = pd->fragoff; 6551 pd->proto = IPPROTO_FRAGMENT; 6552 return (PF_PASS); 6553 } 6554 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6555 NULL, reason, AF_INET6)) { 6556 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6557 return (PF_DROP); 6558 } 6559 /* reassembly needs the ext header before the frag */ 6560 if (pd->fragoff == 0) 6561 pd->extoff = pd->off; 6562 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { 6563 if (pf_walk_option6(pd, h, 6564 pd->off + sizeof(ext), 6565 pd->off + (ext.ip6e_len + 1) * 8, reason) 6566 != PF_PASS) 6567 return (PF_DROP); 6568 if (ntohs(h->ip6_plen) == 0 && 6569 pd->jumbolen != 0) { 6570 DPFPRINTF(LOG_NOTICE, 6571 "IPv6 missing jumbo"); 6572 REASON_SET(reason, PFRES_IPOPTIONS); 6573 return (PF_DROP); 6574 } 6575 } 6576 if (pd->proto == IPPROTO_AH) 6577 pd->off += (ext.ip6e_len + 2) * 4; 6578 else 6579 pd->off += (ext.ip6e_len + 1) * 8; 6580 pd->proto = ext.ip6e_nxt; 6581 break; 6582 case IPPROTO_TCP: 6583 case IPPROTO_UDP: 6584 case IPPROTO_ICMPV6: 6585 /* fragments may be short, ignore inner header then */ 6586 if (pd->fragoff != 0 && end < pd->off + 6587 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6588 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6589 sizeof(struct icmp6_hdr))) { 6590 pd->off = pd->fragoff; 6591 pd->proto = IPPROTO_FRAGMENT; 6592 } 6593 /* FALLTHROUGH */ 6594 default: 6595 return (PF_PASS); 6596 } 6597 } 6598 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 6599 REASON_SET(reason, PFRES_IPOPTIONS); 6600 return (PF_DROP); 6601 } 6602 #endif /* INET6 */ 6603 6604 int 6605 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 6606 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6607 { 6608 memset(pd, 0, sizeof(*pd)); 6609 pd->dir = dir; 6610 pd->kif = kif; /* kif is NULL when called by pflog */ 6611 pd->m = m; 6612 pd->sidx = (dir == PF_IN) ? 0 : 1; 6613 pd->didx = (dir == PF_IN) ? 1 : 0; 6614 pd->af = pd->naf = af; 6615 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 6616 6617 switch (pd->af) { 6618 case AF_INET: { 6619 struct ip *h; 6620 6621 /* Check for illegal packets */ 6622 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6623 REASON_SET(reason, PFRES_SHORT); 6624 return (PF_DROP); 6625 } 6626 6627 h = mtod(pd->m, struct ip *); 6628 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6629 REASON_SET(reason, PFRES_SHORT); 6630 return (PF_DROP); 6631 } 6632 6633 if (pf_walk_header(pd, h, reason) != PF_PASS) 6634 return (PF_DROP); 6635 6636 pd->src = (struct pf_addr *)&h->ip_src; 6637 pd->dst = (struct pf_addr *)&h->ip_dst; 6638 pd->tot_len = ntohs(h->ip_len); 6639 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6640 pd->ttl = h->ip_ttl; 6641 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 6642 PF_VPROTO_FRAGMENT : pd->proto; 6643 6644 break; 6645 } 6646 #ifdef INET6 6647 case AF_INET6: { 6648 struct ip6_hdr *h; 6649 6650 /* Check for illegal packets */ 6651 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6652 REASON_SET(reason, PFRES_SHORT); 6653 return (PF_DROP); 6654 } 6655 6656 h = mtod(pd->m, struct ip6_hdr *); 6657 if (pd->m->m_pkthdr.len < 6658 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6659 REASON_SET(reason, PFRES_SHORT); 6660 return (PF_DROP); 6661 } 6662 6663 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6664 return (PF_DROP); 6665 6666 #if 1 6667 /* 6668 * we do not support jumbogram yet. if we keep going, zero 6669 * ip6_plen will do something bad, so drop the packet for now. 6670 */ 6671 if (pd->jumbolen != 0) { 6672 REASON_SET(reason, PFRES_NORM); 6673 return (PF_DROP); 6674 } 6675 #endif /* 1 */ 6676 6677 pd->src = (struct pf_addr *)&h->ip6_src; 6678 pd->dst = (struct pf_addr *)&h->ip6_dst; 6679 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6680 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 6681 pd->ttl = h->ip6_hlim; 6682 pd->virtual_proto = (pd->fragoff != 0) ? 6683 PF_VPROTO_FRAGMENT : pd->proto; 6684 6685 break; 6686 } 6687 #endif /* INET6 */ 6688 default: 6689 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6690 6691 } 6692 6693 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6694 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6695 6696 switch (pd->virtual_proto) { 6697 case IPPROTO_TCP: { 6698 struct tcphdr *th = &pd->hdr.tcp; 6699 6700 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6701 NULL, reason, pd->af)) 6702 return (PF_DROP); 6703 pd->hdrlen = sizeof(*th); 6704 if (pd->off + (th->th_off << 2) > pd->tot_len || 6705 (th->th_off << 2) < sizeof(struct tcphdr)) { 6706 REASON_SET(reason, PFRES_SHORT); 6707 return (PF_DROP); 6708 } 6709 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6710 pd->sport = &th->th_sport; 6711 pd->dport = &th->th_dport; 6712 pd->pcksum = &th->th_sum; 6713 break; 6714 } 6715 case IPPROTO_UDP: { 6716 struct udphdr *uh = &pd->hdr.udp; 6717 6718 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6719 NULL, reason, pd->af)) 6720 return (PF_DROP); 6721 pd->hdrlen = sizeof(*uh); 6722 if (uh->uh_dport == 0 || 6723 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6724 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6725 REASON_SET(reason, PFRES_SHORT); 6726 return (PF_DROP); 6727 } 6728 pd->sport = &uh->uh_sport; 6729 pd->dport = &uh->uh_dport; 6730 pd->pcksum = &uh->uh_sum; 6731 break; 6732 } 6733 case IPPROTO_ICMP: { 6734 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 6735 NULL, reason, pd->af)) 6736 return (PF_DROP); 6737 pd->hdrlen = ICMP_MINLEN; 6738 if (pd->off + pd->hdrlen > pd->tot_len) { 6739 REASON_SET(reason, PFRES_SHORT); 6740 return (PF_DROP); 6741 } 6742 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 6743 break; 6744 } 6745 #ifdef INET6 6746 case IPPROTO_ICMPV6: { 6747 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6748 6749 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6750 NULL, reason, pd->af)) 6751 return (PF_DROP); 6752 /* ICMP headers we look further into to match state */ 6753 switch (pd->hdr.icmp6.icmp6_type) { 6754 case MLD_LISTENER_QUERY: 6755 case MLD_LISTENER_REPORT: 6756 icmp_hlen = sizeof(struct mld_hdr); 6757 break; 6758 case ND_NEIGHBOR_SOLICIT: 6759 case ND_NEIGHBOR_ADVERT: 6760 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6761 /* FALLTHROUGH */ 6762 case ND_ROUTER_SOLICIT: 6763 case ND_ROUTER_ADVERT: 6764 case ND_REDIRECT: 6765 if (pd->ttl != 255) { 6766 REASON_SET(reason, PFRES_NORM); 6767 return (PF_DROP); 6768 } 6769 break; 6770 } 6771 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6772 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6773 NULL, reason, pd->af)) 6774 return (PF_DROP); 6775 pd->hdrlen = icmp_hlen; 6776 if (pd->off + pd->hdrlen > pd->tot_len) { 6777 REASON_SET(reason, PFRES_SHORT); 6778 return (PF_DROP); 6779 } 6780 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 6781 break; 6782 } 6783 #endif /* INET6 */ 6784 } 6785 6786 if (pd->sport) 6787 pd->osport = pd->nsport = *pd->sport; 6788 if (pd->dport) 6789 pd->odport = pd->ndport = *pd->dport; 6790 6791 return (PF_PASS); 6792 } 6793 6794 void 6795 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6796 struct pf_rule *r, struct pf_rule *a) 6797 { 6798 int dirndx; 6799 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6800 [action != PF_PASS] += pd->tot_len; 6801 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6802 [action != PF_PASS]++; 6803 6804 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6805 dirndx = (pd->dir == PF_OUT); 6806 r->packets[dirndx]++; 6807 r->bytes[dirndx] += pd->tot_len; 6808 if (a != NULL) { 6809 a->packets[dirndx]++; 6810 a->bytes[dirndx] += pd->tot_len; 6811 } 6812 if (s != NULL) { 6813 struct pf_rule_item *ri; 6814 struct pf_sn_item *sni; 6815 6816 SLIST_FOREACH(sni, &s->src_nodes, next) { 6817 sni->sn->packets[dirndx]++; 6818 sni->sn->bytes[dirndx] += pd->tot_len; 6819 } 6820 dirndx = (pd->dir == s->direction) ? 0 : 1; 6821 s->packets[dirndx]++; 6822 s->bytes[dirndx] += pd->tot_len; 6823 6824 SLIST_FOREACH(ri, &s->match_rules, entry) { 6825 ri->r->packets[dirndx]++; 6826 ri->r->bytes[dirndx] += pd->tot_len; 6827 6828 if (ri->r->src.addr.type == PF_ADDR_TABLE) 6829 pfr_update_stats(ri->r->src.addr.p.tbl, 6830 &s->key[(s->direction == PF_IN)]-> 6831 addr[(s->direction == PF_OUT)], 6832 pd, ri->r->action, ri->r->src.neg); 6833 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 6834 pfr_update_stats(ri->r->dst.addr.p.tbl, 6835 &s->key[(s->direction == PF_IN)]-> 6836 addr[(s->direction == PF_IN)], 6837 pd, ri->r->action, ri->r->dst.neg); 6838 } 6839 } 6840 if (r->src.addr.type == PF_ADDR_TABLE) 6841 pfr_update_stats(r->src.addr.p.tbl, 6842 (s == NULL) ? pd->src : 6843 &s->key[(s->direction == PF_IN)]-> 6844 addr[(s->direction == PF_OUT)], 6845 pd, r->action, r->src.neg); 6846 if (r->dst.addr.type == PF_ADDR_TABLE) 6847 pfr_update_stats(r->dst.addr.p.tbl, 6848 (s == NULL) ? pd->dst : 6849 &s->key[(s->direction == PF_IN)]-> 6850 addr[(s->direction == PF_IN)], 6851 pd, r->action, r->dst.neg); 6852 } 6853 } 6854 6855 int 6856 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 6857 { 6858 struct ifnet *ifp0; 6859 struct pfi_kif *kif; 6860 u_short action, reason = 0; 6861 struct pf_rule *a = NULL, *r = &pf_default_rule; 6862 struct pf_state *s = NULL; 6863 struct pf_ruleset *ruleset = NULL; 6864 struct pf_pdesc pd; 6865 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6866 u_int32_t qid, pqid = 0; 6867 int have_pf_lock = 0; 6868 6869 if (!pf_status.running) 6870 return (PF_PASS); 6871 6872 #if NCARP > 0 6873 if (ifp->if_type == IFT_CARP && 6874 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 6875 kif = (struct pfi_kif *)ifp0->if_pf_kif; 6876 if_put(ifp0); 6877 } else 6878 #endif /* NCARP */ 6879 kif = (struct pfi_kif *)ifp->if_pf_kif; 6880 6881 if (kif == NULL) { 6882 DPFPRINTF(LOG_ERR, 6883 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 6884 return (PF_DROP); 6885 } 6886 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6887 return (PF_PASS); 6888 6889 #ifdef DIAGNOSTIC 6890 if (((*m0)->m_flags & M_PKTHDR) == 0) 6891 panic("non-M_PKTHDR is passed to pf_test"); 6892 #endif /* DIAGNOSTIC */ 6893 6894 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 6895 return (PF_PASS); 6896 6897 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) 6898 return (PF_PASS); 6899 6900 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 6901 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 6902 return (PF_PASS); 6903 } 6904 6905 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 6906 if (action != PF_PASS) { 6907 #if NPFLOG > 0 6908 pd.pflog |= PF_LOG_FORCE; 6909 #endif /* NPFLOG > 0 */ 6910 goto done; 6911 } 6912 6913 /* packet normalization and reassembly */ 6914 switch (pd.af) { 6915 case AF_INET: 6916 action = pf_normalize_ip(&pd, &reason); 6917 break; 6918 #ifdef INET6 6919 case AF_INET6: 6920 action = pf_normalize_ip6(&pd, &reason); 6921 break; 6922 #endif /* INET6 */ 6923 } 6924 *m0 = pd.m; 6925 /* if packet sits in reassembly queue, return without error */ 6926 if (pd.m == NULL) 6927 return PF_PASS; 6928 6929 if (action != PF_PASS) { 6930 #if NPFLOG > 0 6931 pd.pflog |= PF_LOG_FORCE; 6932 #endif /* NPFLOG > 0 */ 6933 goto done; 6934 } 6935 6936 /* if packet has been reassembled, update packet description */ 6937 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 6938 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 6939 if (action != PF_PASS) { 6940 #if NPFLOG > 0 6941 pd.pflog |= PF_LOG_FORCE; 6942 #endif /* NPFLOG > 0 */ 6943 goto done; 6944 } 6945 } 6946 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 6947 6948 /* 6949 * Avoid pcb-lookups from the forwarding path. They should never 6950 * match and would cause MP locking problems. 6951 */ 6952 if (fwdir == PF_FWD) { 6953 pd.lookup.done = -1; 6954 pd.lookup.uid = -1; 6955 pd.lookup.gid = -1; 6956 pd.lookup.pid = NO_PID; 6957 } 6958 6959 switch (pd.virtual_proto) { 6960 6961 case PF_VPROTO_FRAGMENT: { 6962 /* 6963 * handle fragments that aren't reassembled by 6964 * normalization 6965 */ 6966 PF_LOCK(); 6967 have_pf_lock = 1; 6968 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); 6969 s = pf_state_ref(s); 6970 if (action != PF_PASS) 6971 REASON_SET(&reason, PFRES_FRAG); 6972 break; 6973 } 6974 6975 case IPPROTO_ICMP: { 6976 if (pd.af != AF_INET) { 6977 action = PF_DROP; 6978 REASON_SET(&reason, PFRES_NORM); 6979 DPFPRINTF(LOG_NOTICE, 6980 "dropping IPv6 packet with ICMPv4 payload"); 6981 break; 6982 } 6983 PF_STATE_ENTER_READ(); 6984 action = pf_test_state_icmp(&pd, &s, &reason); 6985 s = pf_state_ref(s); 6986 PF_STATE_EXIT_READ(); 6987 if (action == PF_PASS || action == PF_AFRT) { 6988 #if NPFSYNC > 0 6989 pfsync_update_state(s, &have_pf_lock); 6990 #endif /* NPFSYNC > 0 */ 6991 r = s->rule.ptr; 6992 a = s->anchor.ptr; 6993 #if NPFLOG > 0 6994 pd.pflog |= s->log; 6995 #endif /* NPFLOG > 0 */ 6996 } else if (s == NULL) { 6997 PF_LOCK(); 6998 have_pf_lock = 1; 6999 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7000 &reason); 7001 s = pf_state_ref(s); 7002 } 7003 break; 7004 } 7005 7006 #ifdef INET6 7007 case IPPROTO_ICMPV6: { 7008 if (pd.af != AF_INET6) { 7009 action = PF_DROP; 7010 REASON_SET(&reason, PFRES_NORM); 7011 DPFPRINTF(LOG_NOTICE, 7012 "dropping IPv4 packet with ICMPv6 payload"); 7013 break; 7014 } 7015 PF_STATE_ENTER_READ(); 7016 action = pf_test_state_icmp(&pd, &s, &reason); 7017 s = pf_state_ref(s); 7018 PF_STATE_EXIT_READ(); 7019 if (action == PF_PASS || action == PF_AFRT) { 7020 #if NPFSYNC > 0 7021 pfsync_update_state(s, &have_pf_lock); 7022 #endif /* NPFSYNC > 0 */ 7023 r = s->rule.ptr; 7024 a = s->anchor.ptr; 7025 #if NPFLOG > 0 7026 pd.pflog |= s->log; 7027 #endif /* NPFLOG > 0 */ 7028 } else if (s == NULL) { 7029 PF_LOCK(); 7030 have_pf_lock = 1; 7031 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7032 &reason); 7033 s = pf_state_ref(s); 7034 } 7035 break; 7036 } 7037 #endif /* INET6 */ 7038 7039 default: 7040 if (pd.virtual_proto == IPPROTO_TCP) { 7041 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & 7042 (TH_SYN|TH_ACK)) == TH_SYN && 7043 pf_synflood_check(&pd)) { 7044 PF_LOCK(); 7045 have_pf_lock = 1; 7046 pf_syncookie_send(&pd); 7047 action = PF_DROP; 7048 break; 7049 } 7050 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 7051 pqid = 1; 7052 action = pf_normalize_tcp(&pd); 7053 if (action == PF_DROP) 7054 break; 7055 } 7056 PF_STATE_ENTER_READ(); 7057 action = pf_test_state(&pd, &s, &reason, 0); 7058 s = pf_state_ref(s); 7059 PF_STATE_EXIT_READ(); 7060 if (s == NULL && action != PF_PASS && action != PF_AFRT && 7061 pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 7062 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 7063 pf_syncookie_validate(&pd)) { 7064 struct mbuf *msyn; 7065 msyn = pf_syncookie_recreate_syn(&pd); 7066 if (msyn) { 7067 action = pf_test(af, fwdir, ifp, &msyn); 7068 m_freem(msyn); 7069 if (action == PF_PASS || action == PF_AFRT) { 7070 PF_STATE_ENTER_READ(); 7071 pf_test_state(&pd, &s, &reason, 1); 7072 s = pf_state_ref(s); 7073 PF_STATE_EXIT_READ(); 7074 if (s == NULL) 7075 return (PF_DROP); 7076 s->src.seqhi = 7077 ntohl(pd.hdr.tcp.th_ack) - 1; 7078 s->src.seqlo = 7079 ntohl(pd.hdr.tcp.th_seq) - 1; 7080 pf_set_protostate(s, PF_PEER_SRC, 7081 PF_TCPS_PROXY_DST); 7082 PF_LOCK(); 7083 have_pf_lock = 1; 7084 action = pf_synproxy(&pd, &s, &reason); 7085 if (action != PF_PASS) { 7086 PF_UNLOCK(); 7087 pf_state_unref(s); 7088 return (action); 7089 } 7090 } 7091 } else 7092 action = PF_DROP; 7093 } 7094 7095 if (action == PF_PASS || action == PF_AFRT) { 7096 #if NPFSYNC > 0 7097 pfsync_update_state(s, &have_pf_lock); 7098 #endif /* NPFSYNC > 0 */ 7099 r = s->rule.ptr; 7100 a = s->anchor.ptr; 7101 #if NPFLOG > 0 7102 pd.pflog |= s->log; 7103 #endif /* NPFLOG > 0 */ 7104 } else if (s == NULL) { 7105 PF_LOCK(); 7106 have_pf_lock = 1; 7107 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7108 &reason); 7109 s = pf_state_ref(s); 7110 } 7111 7112 if (pd.virtual_proto == IPPROTO_TCP) { 7113 if (s) { 7114 if (s->max_mss) 7115 pf_normalize_mss(&pd, s->max_mss); 7116 } else if (r->max_mss) 7117 pf_normalize_mss(&pd, r->max_mss); 7118 } 7119 7120 break; 7121 } 7122 7123 if (have_pf_lock != 0) 7124 PF_UNLOCK(); 7125 7126 /* 7127 * At the moment, we rely on NET_LOCK() to prevent removal of items 7128 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 7129 * to be refcounted when NET_LOCK() is gone. 7130 */ 7131 7132 done: 7133 if (action != PF_DROP) { 7134 if (s) { 7135 /* The non-state case is handled in pf_test_rule() */ 7136 if (action == PF_PASS && pd.badopts && 7137 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 7138 action = PF_DROP; 7139 REASON_SET(&reason, PFRES_IPOPTIONS); 7140 #if NPFLOG > 0 7141 pd.pflog |= PF_LOG_FORCE; 7142 #endif /* NPFLOG > 0 */ 7143 DPFPRINTF(LOG_NOTICE, "dropping packet with " 7144 "ip/ipv6 options in pf_test()"); 7145 } 7146 7147 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 7148 s->set_tos); 7149 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 7150 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7151 qid = s->pqid; 7152 if (s->state_flags & PFSTATE_SETPRIO) 7153 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 7154 } else { 7155 qid = s->qid; 7156 if (s->state_flags & PFSTATE_SETPRIO) 7157 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 7158 } 7159 pd.m->m_pkthdr.pf.delay = s->delay; 7160 } else { 7161 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 7162 r->set_tos); 7163 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7164 qid = r->pqid; 7165 if (r->scrub_flags & PFSTATE_SETPRIO) 7166 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 7167 } else { 7168 qid = r->qid; 7169 if (r->scrub_flags & PFSTATE_SETPRIO) 7170 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 7171 } 7172 pd.m->m_pkthdr.pf.delay = r->delay; 7173 } 7174 } 7175 7176 if (action == PF_PASS && qid) 7177 pd.m->m_pkthdr.pf.qid = qid; 7178 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) 7179 pf_mbuf_link_state_key(pd.m, s->key[PF_SK_STACK]); 7180 if (pd.dir == PF_OUT && 7181 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 7182 s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) 7183 pf_state_key_link_inpcb(s->key[PF_SK_STACK], 7184 pd.m->m_pkthdr.pf.inp); 7185 7186 if (s && (pd.m->m_pkthdr.csum_flags & M_FLOWID) == 0) 7187 pd.m->m_pkthdr.ph_flowid = bemtoh64(&s->id); 7188 7189 /* 7190 * connections redirected to loopback should not match sockets 7191 * bound specifically to loopback due to security implications, 7192 * see in_pcblookup_listen(). 7193 */ 7194 if (pd.destchg) 7195 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 7196 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 7197 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 7198 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7199 /* We need to redo the route lookup on outgoing routes. */ 7200 if (pd.destchg && pd.dir == PF_OUT) 7201 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 7202 7203 if (pd.dir == PF_IN && action == PF_PASS && 7204 (r->divert.type == PF_DIVERT_TO || 7205 r->divert.type == PF_DIVERT_REPLY)) { 7206 struct pf_divert *divert; 7207 7208 if ((divert = pf_get_divert(pd.m))) { 7209 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7210 divert->addr = r->divert.addr; 7211 divert->port = r->divert.port; 7212 divert->rdomain = pd.rdomain; 7213 divert->type = r->divert.type; 7214 } 7215 } 7216 7217 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 7218 action = PF_DIVERT; 7219 7220 #if NPFLOG > 0 7221 if (pd.pflog) { 7222 struct pf_rule_item *ri; 7223 7224 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 7225 pflog_packet(&pd, reason, r, a, ruleset, NULL); 7226 if (s) { 7227 SLIST_FOREACH(ri, &s->match_rules, entry) 7228 if (ri->r->log & PF_LOG_ALL) 7229 pflog_packet(&pd, reason, ri->r, a, 7230 ruleset, NULL); 7231 } 7232 } 7233 #endif /* NPFLOG > 0 */ 7234 7235 pf_counters_inc(action, &pd, s, r, a); 7236 7237 switch (action) { 7238 case PF_SYNPROXY_DROP: 7239 m_freem(pd.m); 7240 /* FALLTHROUGH */ 7241 case PF_DEFER: 7242 pd.m = NULL; 7243 action = PF_PASS; 7244 break; 7245 case PF_DIVERT: 7246 switch (pd.af) { 7247 case AF_INET: 7248 if (!divert_packet(pd.m, pd.dir, r->divert.port)) 7249 pd.m = NULL; 7250 break; 7251 #ifdef INET6 7252 case AF_INET6: 7253 if (!divert6_packet(pd.m, pd.dir, r->divert.port)) 7254 pd.m = NULL; 7255 break; 7256 #endif /* INET6 */ 7257 } 7258 action = PF_PASS; 7259 break; 7260 #ifdef INET6 7261 case PF_AFRT: 7262 if (pf_translate_af(&pd)) { 7263 action = PF_DROP; 7264 break; 7265 } 7266 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 7267 switch (pd.naf) { 7268 case AF_INET: 7269 if (pd.dir == PF_IN) 7270 ip_forward(pd.m, ifp, NULL, 1); 7271 else 7272 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 7273 break; 7274 case AF_INET6: 7275 if (pd.dir == PF_IN) 7276 ip6_forward(pd.m, NULL, 1); 7277 else 7278 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 7279 break; 7280 } 7281 pd.m = NULL; 7282 action = PF_PASS; 7283 break; 7284 #endif /* INET6 */ 7285 case PF_DROP: 7286 m_freem(pd.m); 7287 pd.m = NULL; 7288 break; 7289 default: 7290 if (r->rt) { 7291 switch (pd.af) { 7292 case AF_INET: 7293 pf_route(&pd, r, s); 7294 break; 7295 #ifdef INET6 7296 case AF_INET6: 7297 pf_route6(&pd, r, s); 7298 break; 7299 #endif /* INET6 */ 7300 } 7301 } 7302 break; 7303 } 7304 7305 #ifdef INET6 7306 /* if reassembled packet passed, create new fragments */ 7307 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 7308 pd.af == AF_INET6) { 7309 struct m_tag *mtag; 7310 7311 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 7312 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 7313 } 7314 #endif /* INET6 */ 7315 if (s && action != PF_DROP) { 7316 if (!s->if_index_in && dir == PF_IN) 7317 s->if_index_in = ifp->if_index; 7318 else if (!s->if_index_out && dir == PF_OUT) 7319 s->if_index_out = ifp->if_index; 7320 } 7321 7322 *m0 = pd.m; 7323 7324 pf_state_unref(s); 7325 7326 return (action); 7327 } 7328 7329 int 7330 pf_ouraddr(struct mbuf *m) 7331 { 7332 struct pf_state_key *sk; 7333 7334 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 7335 return (1); 7336 7337 sk = m->m_pkthdr.pf.statekey; 7338 if (sk != NULL) { 7339 if (sk->inp != NULL) 7340 return (1); 7341 } 7342 7343 return (-1); 7344 } 7345 7346 /* 7347 * must be called whenever any addressing information such as 7348 * address, port, protocol has changed 7349 */ 7350 void 7351 pf_pkt_addr_changed(struct mbuf *m) 7352 { 7353 pf_mbuf_unlink_state_key(m); 7354 pf_mbuf_unlink_inpcb(m); 7355 } 7356 7357 struct inpcb * 7358 pf_inp_lookup(struct mbuf *m) 7359 { 7360 struct inpcb *inp = NULL; 7361 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7362 7363 if (!pf_state_key_isvalid(sk)) 7364 pf_mbuf_unlink_state_key(m); 7365 else 7366 inp = m->m_pkthdr.pf.statekey->inp; 7367 7368 if (inp && inp->inp_pf_sk) 7369 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 7370 7371 return (inp); 7372 } 7373 7374 void 7375 pf_inp_link(struct mbuf *m, struct inpcb *inp) 7376 { 7377 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7378 7379 if (!pf_state_key_isvalid(sk)) { 7380 pf_mbuf_unlink_state_key(m); 7381 return; 7382 } 7383 7384 /* 7385 * we don't need to grab PF-lock here. At worst case we link inp to 7386 * state, which might be just being marked as deleted by another 7387 * thread. 7388 */ 7389 if (inp && !sk->inp && !inp->inp_pf_sk) 7390 pf_state_key_link_inpcb(sk, inp); 7391 7392 /* The statekey has finished finding the inp, it is no longer needed. */ 7393 pf_mbuf_unlink_state_key(m); 7394 } 7395 7396 void 7397 pf_inp_unlink(struct inpcb *inp) 7398 { 7399 pf_inpcb_unlink_state_key(inp); 7400 } 7401 7402 void 7403 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 7404 { 7405 /* Note that sk and skrev may be equal, then we refcount twice. */ 7406 KASSERT(sk->reverse == NULL); 7407 KASSERT(skrev->reverse == NULL); 7408 sk->reverse = pf_state_key_ref(skrev); 7409 skrev->reverse = pf_state_key_ref(sk); 7410 } 7411 7412 #if NPFLOG > 0 7413 void 7414 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 7415 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 7416 { 7417 struct pf_rule_item *ri; 7418 7419 /* if this is the log(matches) rule, packet has been logged already */ 7420 if (rm->log & PF_LOG_MATCHES) 7421 return; 7422 7423 SLIST_FOREACH(ri, matchrules, entry) 7424 if (ri->r->log & PF_LOG_MATCHES) 7425 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 7426 } 7427 #endif /* NPFLOG > 0 */ 7428 7429 struct pf_state_key * 7430 pf_state_key_ref(struct pf_state_key *sk) 7431 { 7432 if (sk != NULL) 7433 PF_REF_TAKE(sk->refcnt); 7434 7435 return (sk); 7436 } 7437 7438 void 7439 pf_state_key_unref(struct pf_state_key *sk) 7440 { 7441 if (PF_REF_RELE(sk->refcnt)) { 7442 /* state key must be removed from tree */ 7443 KASSERT(!pf_state_key_isvalid(sk)); 7444 /* state key must be unlinked from reverse key */ 7445 KASSERT(sk->reverse == NULL); 7446 /* state key must be unlinked from socket */ 7447 KASSERT(sk->inp == NULL); 7448 pool_put(&pf_state_key_pl, sk); 7449 } 7450 } 7451 7452 int 7453 pf_state_key_isvalid(struct pf_state_key *sk) 7454 { 7455 return ((sk != NULL) && (sk->removed == 0)); 7456 } 7457 7458 void 7459 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 7460 { 7461 KASSERT(m->m_pkthdr.pf.statekey == NULL); 7462 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 7463 } 7464 7465 void 7466 pf_mbuf_unlink_state_key(struct mbuf *m) 7467 { 7468 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7469 7470 if (sk != NULL) { 7471 m->m_pkthdr.pf.statekey = NULL; 7472 pf_state_key_unref(sk); 7473 } 7474 } 7475 7476 void 7477 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 7478 { 7479 KASSERT(m->m_pkthdr.pf.inp == NULL); 7480 m->m_pkthdr.pf.inp = in_pcbref(inp); 7481 } 7482 7483 void 7484 pf_mbuf_unlink_inpcb(struct mbuf *m) 7485 { 7486 struct inpcb *inp = m->m_pkthdr.pf.inp; 7487 7488 if (inp != NULL) { 7489 m->m_pkthdr.pf.inp = NULL; 7490 in_pcbunref(inp); 7491 } 7492 } 7493 7494 void 7495 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 7496 { 7497 KASSERT(sk->inp == NULL); 7498 sk->inp = in_pcbref(inp); 7499 KASSERT(inp->inp_pf_sk == NULL); 7500 inp->inp_pf_sk = pf_state_key_ref(sk); 7501 } 7502 7503 void 7504 pf_inpcb_unlink_state_key(struct inpcb *inp) 7505 { 7506 struct pf_state_key *sk = inp->inp_pf_sk; 7507 7508 if (sk != NULL) { 7509 KASSERT(sk->inp == inp); 7510 sk->inp = NULL; 7511 inp->inp_pf_sk = NULL; 7512 pf_state_key_unref(sk); 7513 in_pcbunref(inp); 7514 } 7515 } 7516 7517 void 7518 pf_state_key_unlink_inpcb(struct pf_state_key *sk) 7519 { 7520 struct inpcb *inp = sk->inp; 7521 7522 if (inp != NULL) { 7523 KASSERT(inp->inp_pf_sk == sk); 7524 sk->inp = NULL; 7525 inp->inp_pf_sk = NULL; 7526 pf_state_key_unref(sk); 7527 in_pcbunref(inp); 7528 } 7529 } 7530 7531 void 7532 pf_state_key_unlink_reverse(struct pf_state_key *sk) 7533 { 7534 struct pf_state_key *skrev = sk->reverse; 7535 7536 /* Note that sk and skrev may be equal, then we unref twice. */ 7537 if (skrev != NULL) { 7538 KASSERT(skrev->reverse == sk); 7539 sk->reverse = NULL; 7540 skrev->reverse = NULL; 7541 pf_state_key_unref(skrev); 7542 pf_state_key_unref(sk); 7543 } 7544 } 7545 7546 struct pf_state * 7547 pf_state_ref(struct pf_state *s) 7548 { 7549 if (s != NULL) 7550 PF_REF_TAKE(s->refcnt); 7551 return (s); 7552 } 7553 7554 void 7555 pf_state_unref(struct pf_state *s) 7556 { 7557 if ((s != NULL) && PF_REF_RELE(s->refcnt)) { 7558 /* never inserted or removed */ 7559 #if NPFSYNC > 0 7560 KASSERT((TAILQ_NEXT(s, sync_list) == NULL) || 7561 ((TAILQ_NEXT(s, sync_list) == _Q_INVALID) && 7562 (s->sync_state == PFSYNC_S_NONE))); 7563 #endif /* NPFSYNC */ 7564 KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || 7565 (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); 7566 KASSERT((s->key[PF_SK_WIRE] == NULL) && 7567 (s->key[PF_SK_STACK] == NULL)); 7568 7569 pool_put(&pf_state_pl, s); 7570 } 7571 } 7572 7573 int 7574 pf_delay_pkt(struct mbuf *m, u_int ifidx) 7575 { 7576 struct pf_pktdelay *pdy; 7577 7578 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 7579 m_freem(m); 7580 return (ENOBUFS); 7581 } 7582 pdy->ifidx = ifidx; 7583 pdy->m = m; 7584 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy); 7585 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay); 7586 m->m_pkthdr.pf.delay = 0; 7587 return (0); 7588 } 7589 7590 void 7591 pf_pktenqueue_delayed(void *arg) 7592 { 7593 struct pf_pktdelay *pdy = arg; 7594 struct ifnet *ifp; 7595 7596 ifp = if_get(pdy->ifidx); 7597 if (ifp != NULL) { 7598 if_enqueue(ifp, pdy->m); 7599 if_put(ifp); 7600 } else 7601 m_freem(pdy->m); 7602 7603 pool_put(&pf_pktdelay_pl, pdy); 7604 } 7605