1 /* $OpenBSD: pf.c,v 1.1093 2020/06/24 22:03:42 cheloha Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/in_pcb.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp_var.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_timer.h> 74 #include <netinet/tcp_var.h> 75 #include <netinet/tcp_fsm.h> 76 #include <netinet/udp.h> 77 #include <netinet/udp_var.h> 78 #include <netinet/ip_divert.h> 79 80 #ifdef INET6 81 #include <netinet6/in6_var.h> 82 #include <netinet/ip6.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet/icmp6.h> 85 #include <netinet6/nd6.h> 86 #include <netinet6/ip6_divert.h> 87 #endif /* INET6 */ 88 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 92 #if NPFLOG > 0 93 #include <net/if_pflog.h> 94 #endif /* NPFLOG > 0 */ 95 96 #if NPFLOW > 0 97 #include <net/if_pflow.h> 98 #endif /* NPFLOW > 0 */ 99 100 #if NPFSYNC > 0 101 #include <net/if_pfsync.h> 102 #endif /* NPFSYNC > 0 */ 103 104 #ifdef DDB 105 #include <machine/db_machdep.h> 106 #include <ddb/db_interface.h> 107 #endif 108 109 /* 110 * Global variables 111 */ 112 struct pf_state_tree pf_statetbl; 113 struct pf_queuehead pf_queues[2]; 114 struct pf_queuehead *pf_queues_active; 115 struct pf_queuehead *pf_queues_inactive; 116 117 struct pf_status pf_status; 118 119 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 120 121 SHA2_CTX pf_tcp_secret_ctx; 122 u_char pf_tcp_secret[16]; 123 int pf_tcp_secret_init; 124 int pf_tcp_iss_off; 125 126 int pf_npurge; 127 struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); 128 struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); 129 130 enum pf_test_status { 131 PF_TEST_FAIL = -1, 132 PF_TEST_OK, 133 PF_TEST_QUICK 134 }; 135 136 struct pf_test_ctx { 137 enum pf_test_status test_status; 138 struct pf_pdesc *pd; 139 struct pf_rule_actions act; 140 u_int8_t icmpcode; 141 u_int8_t icmptype; 142 int icmp_dir; 143 int state_icmp; 144 int tag; 145 u_short reason; 146 struct pf_rule_item *ri; 147 struct pf_src_node *sns[PF_SN_MAX]; 148 struct pf_rule_slist rules; 149 struct pf_rule *nr; 150 struct pf_rule **rm; 151 struct pf_rule *a; 152 struct pf_rule **am; 153 struct pf_ruleset **rsm; 154 struct pf_ruleset *arsm; 155 struct pf_ruleset *aruleset; 156 struct tcphdr *th; 157 int depth; 158 }; 159 160 #define PF_ANCHOR_STACK_MAX 64 161 162 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 163 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 164 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 165 166 void pf_add_threshold(struct pf_threshold *); 167 int pf_check_threshold(struct pf_threshold *); 168 int pf_check_tcp_cksum(struct mbuf *, int, int, 169 sa_family_t); 170 static __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 171 u_int8_t); 172 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 173 const struct pf_addr *, sa_family_t, u_int8_t); 174 int pf_modulate_sack(struct pf_pdesc *, 175 struct pf_state_peer *); 176 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 177 u_int16_t *, u_int16_t *); 178 int pf_change_icmp_af(struct mbuf *, int, 179 struct pf_pdesc *, struct pf_pdesc *, 180 struct pf_addr *, struct pf_addr *, sa_family_t, 181 sa_family_t); 182 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 183 struct pf_addr *); 184 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 185 u_int16_t *, struct pf_addr *, struct pf_addr *, 186 u_int16_t); 187 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 188 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 189 sa_family_t, struct pf_rule *, u_int); 190 void pf_detach_state(struct pf_state *); 191 void pf_state_key_detach(struct pf_state *, int); 192 u_int32_t pf_tcp_iss(struct pf_pdesc *); 193 void pf_rule_to_actions(struct pf_rule *, 194 struct pf_rule_actions *); 195 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 196 struct pf_state **, struct pf_rule **, 197 struct pf_ruleset **, u_short *); 198 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 199 struct pf_rule *, struct pf_rule *, 200 struct pf_state_key **, struct pf_state_key **, 201 int *, struct pf_state **, int, 202 struct pf_rule_slist *, struct pf_rule_actions *, 203 struct pf_src_node *[]); 204 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 205 int, struct pf_addr *, int, struct pf_addr *, 206 int, int); 207 int pf_state_key_setup(struct pf_pdesc *, struct 208 pf_state_key **, struct pf_state_key **, int); 209 int pf_tcp_track_full(struct pf_pdesc *, 210 struct pf_state **, u_short *, int *, int); 211 int pf_tcp_track_sloppy(struct pf_pdesc *, 212 struct pf_state **, u_short *); 213 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 214 u_short *); 215 int pf_test_state(struct pf_pdesc *, struct pf_state **, 216 u_short *, int); 217 int pf_icmp_state_lookup(struct pf_pdesc *, 218 struct pf_state_key_cmp *, struct pf_state **, 219 u_int16_t, u_int16_t, int, int *, int, int); 220 int pf_test_state_icmp(struct pf_pdesc *, 221 struct pf_state **, u_short *); 222 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 223 u_int16_t); 224 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 225 sa_family_t, struct pf_src_node **); 226 struct pf_divert *pf_get_divert(struct mbuf *); 227 int pf_walk_header(struct pf_pdesc *, struct ip *, 228 u_short *); 229 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 230 int, int, u_short *); 231 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 232 u_short *); 233 void pf_print_state_parts(struct pf_state *, 234 struct pf_state_key *, struct pf_state_key *); 235 int pf_addr_wrap_neq(struct pf_addr_wrap *, 236 struct pf_addr_wrap *); 237 int pf_compare_state_keys(struct pf_state_key *, 238 struct pf_state_key *, struct pfi_kif *, u_int); 239 int pf_find_state(struct pf_pdesc *, 240 struct pf_state_key_cmp *, struct pf_state **); 241 int pf_src_connlimit(struct pf_state **); 242 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 243 int pf_step_into_anchor(struct pf_test_ctx *, 244 struct pf_rule *); 245 int pf_match_rule(struct pf_test_ctx *, 246 struct pf_ruleset *); 247 void pf_counters_inc(int, struct pf_pdesc *, 248 struct pf_state *, struct pf_rule *, 249 struct pf_rule *); 250 251 int pf_state_key_isvalid(struct pf_state_key *); 252 struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 253 void pf_state_key_unref(struct pf_state_key *); 254 void pf_state_key_link_reverse(struct pf_state_key *, 255 struct pf_state_key *); 256 void pf_state_key_unlink_reverse(struct pf_state_key *); 257 void pf_state_key_link_inpcb(struct pf_state_key *, 258 struct inpcb *); 259 void pf_state_key_unlink_inpcb(struct pf_state_key *); 260 void pf_inpcb_unlink_state_key(struct inpcb *); 261 void pf_pktenqueue_delayed(void *); 262 263 #if NPFLOG > 0 264 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 265 struct pf_rule *, struct pf_ruleset *, 266 struct pf_rule_slist *); 267 #endif /* NPFLOG > 0 */ 268 269 extern struct pool pfr_ktable_pl; 270 extern struct pool pfr_kentry_pl; 271 272 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 273 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 274 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 275 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 276 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 277 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 278 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS } 279 }; 280 281 #define BOUND_IFACE(r, k) \ 282 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 283 284 #define STATE_INC_COUNTERS(s) \ 285 do { \ 286 struct pf_rule_item *mrm; \ 287 s->rule.ptr->states_cur++; \ 288 s->rule.ptr->states_tot++; \ 289 if (s->anchor.ptr != NULL) { \ 290 s->anchor.ptr->states_cur++; \ 291 s->anchor.ptr->states_tot++; \ 292 } \ 293 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 294 mrm->r->states_cur++; \ 295 } while (0) 296 297 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 298 static __inline int pf_state_compare_key(struct pf_state_key *, 299 struct pf_state_key *); 300 static __inline int pf_state_compare_id(struct pf_state *, 301 struct pf_state *); 302 #ifdef INET6 303 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 304 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 305 #endif /* INET6 */ 306 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 307 308 struct pf_src_tree tree_src_tracking; 309 310 struct pf_state_tree_id tree_id; 311 struct pf_state_queue state_list; 312 313 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 314 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 315 RB_GENERATE(pf_state_tree_id, pf_state, 316 entry_id, pf_state_compare_id); 317 318 SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl = 319 SLIST_HEAD_INITIALIZER(pf_rule_gcl); 320 321 __inline int 322 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 323 { 324 switch (af) { 325 case AF_INET: 326 if (a->addr32[0] > b->addr32[0]) 327 return (1); 328 if (a->addr32[0] < b->addr32[0]) 329 return (-1); 330 break; 331 #ifdef INET6 332 case AF_INET6: 333 if (a->addr32[3] > b->addr32[3]) 334 return (1); 335 if (a->addr32[3] < b->addr32[3]) 336 return (-1); 337 if (a->addr32[2] > b->addr32[2]) 338 return (1); 339 if (a->addr32[2] < b->addr32[2]) 340 return (-1); 341 if (a->addr32[1] > b->addr32[1]) 342 return (1); 343 if (a->addr32[1] < b->addr32[1]) 344 return (-1); 345 if (a->addr32[0] > b->addr32[0]) 346 return (1); 347 if (a->addr32[0] < b->addr32[0]) 348 return (-1); 349 break; 350 #endif /* INET6 */ 351 } 352 return (0); 353 } 354 355 static __inline int 356 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 357 { 358 int diff; 359 360 if (a->rule.ptr > b->rule.ptr) 361 return (1); 362 if (a->rule.ptr < b->rule.ptr) 363 return (-1); 364 if ((diff = a->type - b->type) != 0) 365 return (diff); 366 if ((diff = a->af - b->af) != 0) 367 return (diff); 368 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 369 return (diff); 370 return (0); 371 } 372 373 static __inline void 374 pf_set_protostate(struct pf_state *s, int which, u_int8_t newstate) 375 { 376 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 377 s->dst.state = newstate; 378 if (which == PF_PEER_DST) 379 return; 380 381 if (s->src.state == newstate) 382 return; 383 if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK] != NULL && 384 s->key[PF_SK_STACK]->proto == IPPROTO_TCP && 385 !(TCPS_HAVEESTABLISHED(s->src.state) || 386 s->src.state == TCPS_CLOSED) && 387 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 388 pf_status.states_halfopen--; 389 390 s->src.state = newstate; 391 } 392 393 void 394 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 395 { 396 switch (af) { 397 case AF_INET: 398 dst->addr32[0] = src->addr32[0]; 399 break; 400 #ifdef INET6 401 case AF_INET6: 402 dst->addr32[0] = src->addr32[0]; 403 dst->addr32[1] = src->addr32[1]; 404 dst->addr32[2] = src->addr32[2]; 405 dst->addr32[3] = src->addr32[3]; 406 break; 407 #endif /* INET6 */ 408 default: 409 unhandled_af(af); 410 } 411 } 412 413 void 414 pf_init_threshold(struct pf_threshold *threshold, 415 u_int32_t limit, u_int32_t seconds) 416 { 417 threshold->limit = limit * PF_THRESHOLD_MULT; 418 threshold->seconds = seconds; 419 threshold->count = 0; 420 threshold->last = getuptime(); 421 } 422 423 void 424 pf_add_threshold(struct pf_threshold *threshold) 425 { 426 u_int32_t t = getuptime(), diff = t - threshold->last; 427 428 if (diff >= threshold->seconds) 429 threshold->count = 0; 430 else 431 threshold->count -= threshold->count * diff / 432 threshold->seconds; 433 threshold->count += PF_THRESHOLD_MULT; 434 threshold->last = t; 435 } 436 437 int 438 pf_check_threshold(struct pf_threshold *threshold) 439 { 440 return (threshold->count > threshold->limit); 441 } 442 443 int 444 pf_src_connlimit(struct pf_state **state) 445 { 446 int bad = 0; 447 struct pf_src_node *sn; 448 449 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 450 return (0); 451 452 sn->conn++; 453 (*state)->src.tcp_est = 1; 454 pf_add_threshold(&sn->conn_rate); 455 456 if ((*state)->rule.ptr->max_src_conn && 457 (*state)->rule.ptr->max_src_conn < sn->conn) { 458 pf_status.lcounters[LCNT_SRCCONN]++; 459 bad++; 460 } 461 462 if ((*state)->rule.ptr->max_src_conn_rate.limit && 463 pf_check_threshold(&sn->conn_rate)) { 464 pf_status.lcounters[LCNT_SRCCONNRATE]++; 465 bad++; 466 } 467 468 if (!bad) 469 return (0); 470 471 if ((*state)->rule.ptr->overload_tbl) { 472 struct pfr_addr p; 473 u_int32_t killed = 0; 474 475 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 476 if (pf_status.debug >= LOG_NOTICE) { 477 log(LOG_NOTICE, 478 "pf: pf_src_connlimit: blocking address "); 479 pf_print_host(&sn->addr, 0, 480 (*state)->key[PF_SK_WIRE]->af); 481 } 482 483 memset(&p, 0, sizeof(p)); 484 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 485 switch ((*state)->key[PF_SK_WIRE]->af) { 486 case AF_INET: 487 p.pfra_net = 32; 488 p.pfra_ip4addr = sn->addr.v4; 489 break; 490 #ifdef INET6 491 case AF_INET6: 492 p.pfra_net = 128; 493 p.pfra_ip6addr = sn->addr.v6; 494 break; 495 #endif /* INET6 */ 496 } 497 498 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 499 &p, gettime()); 500 501 /* kill existing states if that's required. */ 502 if ((*state)->rule.ptr->flush) { 503 struct pf_state_key *sk; 504 struct pf_state *st; 505 506 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 507 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 508 sk = st->key[PF_SK_WIRE]; 509 /* 510 * Kill states from this source. (Only those 511 * from the same rule if PF_FLUSH_GLOBAL is not 512 * set) 513 */ 514 if (sk->af == 515 (*state)->key[PF_SK_WIRE]->af && 516 (((*state)->direction == PF_OUT && 517 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 518 ((*state)->direction == PF_IN && 519 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 520 ((*state)->rule.ptr->flush & 521 PF_FLUSH_GLOBAL || 522 (*state)->rule.ptr == st->rule.ptr)) { 523 st->timeout = PFTM_PURGE; 524 pf_set_protostate(st, PF_PEER_BOTH, 525 TCPS_CLOSED); 526 killed++; 527 } 528 } 529 if (pf_status.debug >= LOG_NOTICE) 530 addlog(", %u states killed", killed); 531 } 532 if (pf_status.debug >= LOG_NOTICE) 533 addlog("\n"); 534 } 535 536 /* kill this state */ 537 (*state)->timeout = PFTM_PURGE; 538 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); 539 return (1); 540 } 541 542 int 543 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 544 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 545 struct pf_addr *raddr, struct pfi_kif *kif) 546 { 547 struct pf_src_node k; 548 549 if (*sn == NULL) { 550 k.af = af; 551 k.type = type; 552 pf_addrcpy(&k.addr, src, af); 553 k.rule.ptr = rule; 554 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 555 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 556 } 557 if (*sn == NULL) { 558 if (!rule->max_src_nodes || 559 rule->src_nodes < rule->max_src_nodes) 560 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 561 else 562 pf_status.lcounters[LCNT_SRCNODES]++; 563 if ((*sn) == NULL) 564 return (-1); 565 566 pf_init_threshold(&(*sn)->conn_rate, 567 rule->max_src_conn_rate.limit, 568 rule->max_src_conn_rate.seconds); 569 570 (*sn)->type = type; 571 (*sn)->af = af; 572 (*sn)->rule.ptr = rule; 573 pf_addrcpy(&(*sn)->addr, src, af); 574 if (raddr) 575 pf_addrcpy(&(*sn)->raddr, raddr, af); 576 if (RB_INSERT(pf_src_tree, 577 &tree_src_tracking, *sn) != NULL) { 578 if (pf_status.debug >= LOG_NOTICE) { 579 log(LOG_NOTICE, 580 "pf: src_tree insert failed: "); 581 pf_print_host(&(*sn)->addr, 0, af); 582 addlog("\n"); 583 } 584 pool_put(&pf_src_tree_pl, *sn); 585 return (-1); 586 } 587 (*sn)->creation = getuptime(); 588 (*sn)->rule.ptr->src_nodes++; 589 if (kif != NULL) { 590 (*sn)->kif = kif; 591 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE); 592 } 593 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 594 pf_status.src_nodes++; 595 } else { 596 if (rule->max_src_states && 597 (*sn)->states >= rule->max_src_states) { 598 pf_status.lcounters[LCNT_SRCSTATES]++; 599 return (-1); 600 } 601 } 602 return (0); 603 } 604 605 void 606 pf_remove_src_node(struct pf_src_node *sn) 607 { 608 if (sn->states > 0 || sn->expire > getuptime()) 609 return; 610 611 sn->rule.ptr->src_nodes--; 612 if (sn->rule.ptr->states_cur == 0 && 613 sn->rule.ptr->src_nodes == 0) 614 pf_rm_rule(NULL, sn->rule.ptr); 615 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 616 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 617 pf_status.src_nodes--; 618 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE); 619 pool_put(&pf_src_tree_pl, sn); 620 } 621 622 struct pf_src_node * 623 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 624 { 625 struct pf_sn_item *sni; 626 627 SLIST_FOREACH(sni, &s->src_nodes, next) 628 if (sni->sn->type == type) 629 return (sni->sn); 630 return (NULL); 631 } 632 633 void 634 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 635 { 636 struct pf_sn_item *sni, *snin, *snip = NULL; 637 638 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 639 snin = SLIST_NEXT(sni, next); 640 if (sni->sn == sn) { 641 if (snip) 642 SLIST_REMOVE_AFTER(snip, next); 643 else 644 SLIST_REMOVE_HEAD(&s->src_nodes, next); 645 pool_put(&pf_sn_item_pl, sni); 646 sni = NULL; 647 sn->states--; 648 } 649 if (sni != NULL) 650 snip = sni; 651 } 652 } 653 654 /* state table stuff */ 655 656 static __inline int 657 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 658 { 659 int diff; 660 661 if ((diff = a->proto - b->proto) != 0) 662 return (diff); 663 if ((diff = a->af - b->af) != 0) 664 return (diff); 665 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 666 return (diff); 667 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 668 return (diff); 669 if ((diff = a->port[0] - b->port[0]) != 0) 670 return (diff); 671 if ((diff = a->port[1] - b->port[1]) != 0) 672 return (diff); 673 if ((diff = a->rdomain - b->rdomain) != 0) 674 return (diff); 675 return (0); 676 } 677 678 static __inline int 679 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 680 { 681 if (a->id > b->id) 682 return (1); 683 if (a->id < b->id) 684 return (-1); 685 if (a->creatorid > b->creatorid) 686 return (1); 687 if (a->creatorid < b->creatorid) 688 return (-1); 689 690 return (0); 691 } 692 693 int 694 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 695 { 696 struct pf_state_item *si; 697 struct pf_state_key *cur; 698 struct pf_state *olds = NULL; 699 700 KASSERT(s->key[idx] == NULL); 701 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 702 /* key exists. check for same kif, if none, add to key */ 703 TAILQ_FOREACH(si, &cur->states, entry) 704 if (si->s->kif == s->kif && 705 ((si->s->key[PF_SK_WIRE]->af == sk->af && 706 si->s->direction == s->direction) || 707 (si->s->key[PF_SK_WIRE]->af != 708 si->s->key[PF_SK_STACK]->af && 709 sk->af == si->s->key[PF_SK_STACK]->af && 710 si->s->direction != s->direction))) { 711 int reuse = 0; 712 713 if (sk->proto == IPPROTO_TCP && 714 si->s->src.state >= TCPS_FIN_WAIT_2 && 715 si->s->dst.state >= TCPS_FIN_WAIT_2) 716 reuse = 1; 717 if (pf_status.debug >= LOG_NOTICE) { 718 log(LOG_NOTICE, 719 "pf: %s key attach %s on %s: ", 720 (idx == PF_SK_WIRE) ? 721 "wire" : "stack", 722 reuse ? "reuse" : "failed", 723 s->kif->pfik_name); 724 pf_print_state_parts(s, 725 (idx == PF_SK_WIRE) ? sk : NULL, 726 (idx == PF_SK_STACK) ? sk : NULL); 727 addlog(", existing: "); 728 pf_print_state_parts(si->s, 729 (idx == PF_SK_WIRE) ? sk : NULL, 730 (idx == PF_SK_STACK) ? sk : NULL); 731 addlog("\n"); 732 } 733 if (reuse) { 734 pf_set_protostate(si->s, PF_PEER_BOTH, 735 TCPS_CLOSED); 736 /* remove late or sks can go away */ 737 olds = si->s; 738 } else { 739 pool_put(&pf_state_key_pl, sk); 740 return (-1); /* collision! */ 741 } 742 } 743 pool_put(&pf_state_key_pl, sk); 744 s->key[idx] = cur; 745 } else 746 s->key[idx] = sk; 747 748 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 749 pf_state_key_detach(s, idx); 750 return (-1); 751 } 752 si->s = s; 753 754 /* list is sorted, if-bound states before floating */ 755 if (s->kif == pfi_all) 756 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 757 else 758 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 759 760 if (olds) 761 pf_remove_state(olds); 762 763 return (0); 764 } 765 766 void 767 pf_detach_state(struct pf_state *s) 768 { 769 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 770 s->key[PF_SK_WIRE] = NULL; 771 772 if (s->key[PF_SK_STACK] != NULL) 773 pf_state_key_detach(s, PF_SK_STACK); 774 775 if (s->key[PF_SK_WIRE] != NULL) 776 pf_state_key_detach(s, PF_SK_WIRE); 777 } 778 779 void 780 pf_state_key_detach(struct pf_state *s, int idx) 781 { 782 struct pf_state_item *si; 783 struct pf_state_key *sk; 784 785 if (s->key[idx] == NULL) 786 return; 787 788 si = TAILQ_FIRST(&s->key[idx]->states); 789 while (si && si->s != s) 790 si = TAILQ_NEXT(si, entry); 791 792 if (si) { 793 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 794 pool_put(&pf_state_item_pl, si); 795 } 796 797 sk = s->key[idx]; 798 s->key[idx] = NULL; 799 if (TAILQ_EMPTY(&sk->states)) { 800 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 801 sk->removed = 1; 802 pf_state_key_unlink_reverse(sk); 803 pf_state_key_unlink_inpcb(sk); 804 pf_state_key_unref(sk); 805 } 806 } 807 808 struct pf_state_key * 809 pf_alloc_state_key(int pool_flags) 810 { 811 struct pf_state_key *sk; 812 813 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 814 return (NULL); 815 TAILQ_INIT(&sk->states); 816 817 return (sk); 818 } 819 820 static __inline int 821 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 822 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 823 { 824 struct pf_state_key_cmp *key = arg; 825 #ifdef INET6 826 struct pf_addr *target; 827 828 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 829 goto copy; 830 831 switch (pd->hdr.icmp6.icmp6_type) { 832 case ND_NEIGHBOR_SOLICIT: 833 if (multi) 834 return (-1); 835 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 836 daddr = target; 837 break; 838 case ND_NEIGHBOR_ADVERT: 839 if (multi) 840 return (-1); 841 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 842 saddr = target; 843 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 844 key->addr[didx].addr32[0] = 0; 845 key->addr[didx].addr32[1] = 0; 846 key->addr[didx].addr32[2] = 0; 847 key->addr[didx].addr32[3] = 0; 848 daddr = NULL; /* overwritten */ 849 } 850 break; 851 default: 852 if (multi) { 853 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 854 key->addr[sidx].addr32[1] = 0; 855 key->addr[sidx].addr32[2] = 0; 856 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 857 saddr = NULL; /* overwritten */ 858 } 859 } 860 copy: 861 #endif /* INET6 */ 862 if (saddr) 863 pf_addrcpy(&key->addr[sidx], saddr, af); 864 if (daddr) 865 pf_addrcpy(&key->addr[didx], daddr, af); 866 867 return (0); 868 } 869 870 int 871 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 872 struct pf_state_key **sks, int rtableid) 873 { 874 /* if returning error we MUST pool_put state keys ourselves */ 875 struct pf_state_key *sk1, *sk2; 876 u_int wrdom = pd->rdomain; 877 int afto = pd->af != pd->naf; 878 879 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 880 return (ENOMEM); 881 882 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 883 pd->af, 0); 884 sk1->port[pd->sidx] = pd->osport; 885 sk1->port[pd->didx] = pd->odport; 886 sk1->proto = pd->proto; 887 sk1->af = pd->af; 888 sk1->rdomain = pd->rdomain; 889 PF_REF_INIT(sk1->refcnt); 890 sk1->removed = 0; 891 if (rtableid >= 0) 892 wrdom = rtable_l2(rtableid); 893 894 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 895 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 896 pd->nsport != pd->osport || pd->ndport != pd->odport || 897 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 898 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 899 pool_put(&pf_state_key_pl, sk1); 900 return (ENOMEM); 901 } 902 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 903 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 904 pd->naf, 0); 905 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 906 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 907 if (afto) { 908 switch (pd->proto) { 909 case IPPROTO_ICMP: 910 sk2->proto = IPPROTO_ICMPV6; 911 break; 912 case IPPROTO_ICMPV6: 913 sk2->proto = IPPROTO_ICMP; 914 break; 915 default: 916 sk2->proto = pd->proto; 917 } 918 } else 919 sk2->proto = pd->proto; 920 sk2->af = pd->naf; 921 sk2->rdomain = wrdom; 922 PF_REF_INIT(sk2->refcnt); 923 sk2->removed = 0; 924 } else 925 sk2 = sk1; 926 927 if (pd->dir == PF_IN) { 928 *skw = sk1; 929 *sks = sk2; 930 } else { 931 *sks = sk1; 932 *skw = sk2; 933 } 934 935 if (pf_status.debug >= LOG_DEBUG) { 936 log(LOG_DEBUG, "pf: key setup: "); 937 pf_print_state_parts(NULL, *skw, *sks); 938 addlog("\n"); 939 } 940 941 return (0); 942 } 943 944 int 945 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, 946 struct pf_state_key **sks, struct pf_state *s) 947 { 948 PF_ASSERT_LOCKED(); 949 950 s->kif = kif; 951 PF_STATE_ENTER_WRITE(); 952 if (*skw == *sks) { 953 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 954 PF_STATE_EXIT_WRITE(); 955 return (-1); 956 } 957 *skw = *sks = s->key[PF_SK_WIRE]; 958 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 959 } else { 960 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 961 pool_put(&pf_state_key_pl, *sks); 962 PF_STATE_EXIT_WRITE(); 963 return (-1); 964 } 965 *skw = s->key[PF_SK_WIRE]; 966 if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { 967 pf_state_key_detach(s, PF_SK_WIRE); 968 PF_STATE_EXIT_WRITE(); 969 return (-1); 970 } 971 *sks = s->key[PF_SK_STACK]; 972 } 973 974 if (s->id == 0 && s->creatorid == 0) { 975 s->id = htobe64(pf_status.stateid++); 976 s->creatorid = pf_status.hostid; 977 } 978 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 979 if (pf_status.debug >= LOG_NOTICE) { 980 log(LOG_NOTICE, "pf: state insert failed: " 981 "id: %016llx creatorid: %08x", 982 betoh64(s->id), ntohl(s->creatorid)); 983 addlog("\n"); 984 } 985 pf_detach_state(s); 986 PF_STATE_EXIT_WRITE(); 987 return (-1); 988 } 989 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 990 pf_status.fcounters[FCNT_STATE_INSERT]++; 991 pf_status.states++; 992 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 993 PF_STATE_EXIT_WRITE(); 994 #if NPFSYNC > 0 995 pfsync_insert_state(s); 996 #endif /* NPFSYNC > 0 */ 997 return (0); 998 } 999 1000 struct pf_state * 1001 pf_find_state_byid(struct pf_state_cmp *key) 1002 { 1003 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1004 1005 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 1006 } 1007 1008 int 1009 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 1010 struct pfi_kif *kif, u_int dir) 1011 { 1012 /* a (from hdr) and b (new) must be exact opposites of each other */ 1013 if (a->af == b->af && a->proto == b->proto && 1014 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1015 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1016 a->port[0] == b->port[1] && 1017 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1018 return (0); 1019 else { 1020 /* mismatch. must not happen. */ 1021 if (pf_status.debug >= LOG_ERR) { 1022 log(LOG_ERR, 1023 "pf: state key linking mismatch! dir=%s, " 1024 "if=%s, stored af=%u, a0: ", 1025 dir == PF_OUT ? "OUT" : "IN", 1026 kif->pfik_name, a->af); 1027 pf_print_host(&a->addr[0], a->port[0], a->af); 1028 addlog(", a1: "); 1029 pf_print_host(&a->addr[1], a->port[1], a->af); 1030 addlog(", proto=%u", a->proto); 1031 addlog(", found af=%u, a0: ", b->af); 1032 pf_print_host(&b->addr[0], b->port[0], b->af); 1033 addlog(", a1: "); 1034 pf_print_host(&b->addr[1], b->port[1], b->af); 1035 addlog(", proto=%u", b->proto); 1036 addlog("\n"); 1037 } 1038 return (-1); 1039 } 1040 } 1041 1042 int 1043 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1044 struct pf_state **state) 1045 { 1046 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1047 struct pf_state_item *si; 1048 struct pf_state *s = NULL; 1049 1050 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1051 if (pf_status.debug >= LOG_DEBUG) { 1052 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1053 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1054 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1055 addlog("\n"); 1056 } 1057 1058 inp_sk = NULL; 1059 pkt_sk = NULL; 1060 sk = NULL; 1061 if (pd->dir == PF_OUT) { 1062 /* first if block deals with outbound forwarded packet */ 1063 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1064 1065 if (!pf_state_key_isvalid(pkt_sk)) { 1066 pf_mbuf_unlink_state_key(pd->m); 1067 pkt_sk = NULL; 1068 } 1069 1070 if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse)) 1071 sk = pkt_sk->reverse; 1072 1073 if (pkt_sk == NULL) { 1074 /* here we deal with local outbound packet */ 1075 if (pd->m->m_pkthdr.pf.inp != NULL) { 1076 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk; 1077 if (pf_state_key_isvalid(inp_sk)) 1078 sk = inp_sk; 1079 else 1080 pf_inpcb_unlink_state_key( 1081 pd->m->m_pkthdr.pf.inp); 1082 } 1083 } 1084 } 1085 1086 if (sk == NULL) { 1087 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1088 (struct pf_state_key *)key)) == NULL) 1089 return (PF_DROP); 1090 if (pd->dir == PF_OUT && pkt_sk && 1091 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1092 pf_state_key_link_reverse(sk, pkt_sk); 1093 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp && 1094 !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->inp) 1095 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1096 } 1097 1098 /* remove firewall data from outbound packet */ 1099 if (pd->dir == PF_OUT) 1100 pf_pkt_addr_changed(pd->m); 1101 1102 /* list is sorted, if-bound states before floating ones */ 1103 TAILQ_FOREACH(si, &sk->states, entry) 1104 if ((si->s->kif == pfi_all || si->s->kif == pd->kif) && 1105 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1106 && sk == (pd->dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1107 si->s->key[PF_SK_STACK])) || 1108 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1109 && pd->dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1110 sk == si->s->key[PF_SK_WIRE])))) { 1111 s = si->s; 1112 break; 1113 } 1114 1115 if (s == NULL || s->timeout == PFTM_PURGE) 1116 return (PF_DROP); 1117 1118 if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) { 1119 pf_add_threshold(&s->rule.ptr->pktrate); 1120 if (pf_check_threshold(&s->rule.ptr->pktrate)) 1121 return (PF_DROP); 1122 } 1123 1124 *state = s; 1125 if (pd->dir == PF_OUT && s->rt_kif != NULL && s->rt_kif != pd->kif && 1126 ((s->rule.ptr->rt == PF_ROUTETO && 1127 s->rule.ptr->direction == PF_OUT) || 1128 (s->rule.ptr->rt == PF_REPLYTO && 1129 s->rule.ptr->direction == PF_IN))) 1130 return (PF_PASS); 1131 1132 return (PF_MATCH); 1133 } 1134 1135 struct pf_state * 1136 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1137 { 1138 struct pf_state_key *sk; 1139 struct pf_state_item *si, *ret = NULL; 1140 1141 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1142 1143 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1144 1145 if (sk != NULL) { 1146 TAILQ_FOREACH(si, &sk->states, entry) 1147 if (dir == PF_INOUT || 1148 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1149 si->s->key[PF_SK_STACK]))) { 1150 if (more == NULL) 1151 return (si->s); 1152 1153 if (ret) 1154 (*more)++; 1155 else 1156 ret = si; 1157 } 1158 } 1159 return (ret ? ret->s : NULL); 1160 } 1161 1162 void 1163 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1164 { 1165 int32_t expire; 1166 1167 memset(sp, 0, sizeof(struct pfsync_state)); 1168 1169 /* copy from state key */ 1170 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1171 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1172 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1173 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1174 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1175 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1176 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1177 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1178 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1179 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1180 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1181 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1182 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1183 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1184 sp->proto = st->key[PF_SK_WIRE]->proto; 1185 sp->af = st->key[PF_SK_WIRE]->af; 1186 1187 /* copy from state */ 1188 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1189 memcpy(&sp->rt_addr, &st->rt_addr, sizeof(sp->rt_addr)); 1190 sp->creation = htonl(getuptime() - st->creation); 1191 expire = pf_state_expires(st); 1192 if (expire <= getuptime()) 1193 sp->expire = htonl(0); 1194 else 1195 sp->expire = htonl(expire - getuptime()); 1196 1197 sp->direction = st->direction; 1198 #if NPFLOG > 0 1199 sp->log = st->log; 1200 #endif /* NPFLOG > 0 */ 1201 sp->timeout = st->timeout; 1202 sp->state_flags = htons(st->state_flags); 1203 if (!SLIST_EMPTY(&st->src_nodes)) 1204 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1205 1206 sp->id = st->id; 1207 sp->creatorid = st->creatorid; 1208 pf_state_peer_hton(&st->src, &sp->src); 1209 pf_state_peer_hton(&st->dst, &sp->dst); 1210 1211 if (st->rule.ptr == NULL) 1212 sp->rule = htonl(-1); 1213 else 1214 sp->rule = htonl(st->rule.ptr->nr); 1215 if (st->anchor.ptr == NULL) 1216 sp->anchor = htonl(-1); 1217 else 1218 sp->anchor = htonl(st->anchor.ptr->nr); 1219 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1220 1221 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1222 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1223 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1224 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1225 1226 sp->max_mss = htons(st->max_mss); 1227 sp->min_ttl = st->min_ttl; 1228 sp->set_tos = st->set_tos; 1229 sp->set_prio[0] = st->set_prio[0]; 1230 sp->set_prio[1] = st->set_prio[1]; 1231 } 1232 1233 /* END state table stuff */ 1234 1235 void 1236 pf_purge_expired_rules(void) 1237 { 1238 struct pf_rule *r; 1239 1240 PF_ASSERT_LOCKED(); 1241 1242 if (SLIST_EMPTY(&pf_rule_gcl)) 1243 return; 1244 1245 while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { 1246 SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); 1247 KASSERT(r->rule_flag & PFRULE_EXPIRED); 1248 pf_purge_rule(r); 1249 } 1250 } 1251 1252 void 1253 pf_purge_timeout(void *unused) 1254 { 1255 task_add(net_tq(0), &pf_purge_task); 1256 } 1257 1258 void 1259 pf_purge(void *xnloops) 1260 { 1261 int *nloops = xnloops; 1262 1263 KERNEL_LOCK(); 1264 NET_LOCK(); 1265 1266 /* 1267 * process a fraction of the state table every second 1268 * Note: 1269 * we no longer need PF_LOCK() here, because 1270 * pf_purge_expired_states() uses pf_state_lock to maintain 1271 * consistency. 1272 */ 1273 pf_purge_expired_states(1 + (pf_status.states 1274 / pf_default_rule.timeout[PFTM_INTERVAL])); 1275 1276 PF_LOCK(); 1277 /* purge other expired types every PFTM_INTERVAL seconds */ 1278 if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1279 pf_purge_expired_src_nodes(); 1280 pf_purge_expired_rules(); 1281 } 1282 PF_UNLOCK(); 1283 1284 /* 1285 * Fragments don't require PF_LOCK(), they use their own lock. 1286 */ 1287 if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1288 pf_purge_expired_fragments(); 1289 *nloops = 0; 1290 } 1291 NET_UNLOCK(); 1292 KERNEL_UNLOCK(); 1293 1294 timeout_add_sec(&pf_purge_to, 1); 1295 } 1296 1297 int32_t 1298 pf_state_expires(const struct pf_state *state) 1299 { 1300 u_int32_t timeout; 1301 u_int32_t start; 1302 u_int32_t end; 1303 u_int32_t states; 1304 1305 /* handle all PFTM_* > PFTM_MAX here */ 1306 if (state->timeout == PFTM_PURGE) 1307 return (0); 1308 1309 KASSERT(state->timeout != PFTM_UNLINKED); 1310 KASSERT(state->timeout < PFTM_MAX); 1311 1312 timeout = state->rule.ptr->timeout[state->timeout]; 1313 if (!timeout) 1314 timeout = pf_default_rule.timeout[state->timeout]; 1315 1316 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1317 if (start) { 1318 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1319 states = state->rule.ptr->states_cur; 1320 } else { 1321 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1322 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1323 states = pf_status.states; 1324 } 1325 if (end && states > start && start < end) { 1326 if (states >= end) 1327 return (0); 1328 1329 timeout = (u_int64_t)timeout * (end - states) / (end - start); 1330 } 1331 1332 return (state->expire + timeout); 1333 } 1334 1335 void 1336 pf_purge_expired_src_nodes(void) 1337 { 1338 struct pf_src_node *cur, *next; 1339 1340 PF_ASSERT_LOCKED(); 1341 1342 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1343 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1344 1345 if (cur->states == 0 && cur->expire <= getuptime()) { 1346 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1347 pf_remove_src_node(cur); 1348 } 1349 } 1350 } 1351 1352 void 1353 pf_src_tree_remove_state(struct pf_state *s) 1354 { 1355 u_int32_t timeout; 1356 struct pf_sn_item *sni; 1357 1358 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1359 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1360 if (s->src.tcp_est) 1361 --sni->sn->conn; 1362 if (--sni->sn->states == 0) { 1363 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1364 if (!timeout) 1365 timeout = 1366 pf_default_rule.timeout[PFTM_SRC_NODE]; 1367 sni->sn->expire = getuptime() + timeout; 1368 } 1369 pool_put(&pf_sn_item_pl, sni); 1370 } 1371 } 1372 1373 void 1374 pf_remove_state(struct pf_state *cur) 1375 { 1376 PF_ASSERT_LOCKED(); 1377 1378 /* handle load balancing related tasks */ 1379 pf_postprocess_addr(cur); 1380 1381 if (cur->src.state == PF_TCPS_PROXY_DST) { 1382 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1383 &cur->key[PF_SK_WIRE]->addr[1], 1384 &cur->key[PF_SK_WIRE]->addr[0], 1385 cur->key[PF_SK_WIRE]->port[1], 1386 cur->key[PF_SK_WIRE]->port[0], 1387 cur->src.seqhi, cur->src.seqlo + 1, 1388 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1389 cur->key[PF_SK_WIRE]->rdomain); 1390 } 1391 if (cur->key[PF_SK_STACK]->proto == IPPROTO_TCP) 1392 pf_set_protostate(cur, PF_PEER_BOTH, TCPS_CLOSED); 1393 1394 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1395 #if NPFLOW > 0 1396 if (cur->state_flags & PFSTATE_PFLOW) 1397 export_pflow(cur); 1398 #endif /* NPFLOW > 0 */ 1399 #if NPFSYNC > 0 1400 pfsync_delete_state(cur); 1401 #endif /* NPFSYNC > 0 */ 1402 cur->timeout = PFTM_UNLINKED; 1403 pf_src_tree_remove_state(cur); 1404 pf_detach_state(cur); 1405 } 1406 1407 void 1408 pf_remove_divert_state(struct pf_state_key *sk) 1409 { 1410 struct pf_state_item *si; 1411 1412 TAILQ_FOREACH(si, &sk->states, entry) { 1413 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 1414 (si->s->rule.ptr->divert.type == PF_DIVERT_TO || 1415 si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 1416 pf_remove_state(si->s); 1417 break; 1418 } 1419 } 1420 } 1421 1422 void 1423 pf_free_state(struct pf_state *cur) 1424 { 1425 struct pf_rule_item *ri; 1426 1427 PF_ASSERT_LOCKED(); 1428 1429 #if NPFSYNC > 0 1430 if (pfsync_state_in_use(cur)) 1431 return; 1432 #endif /* NPFSYNC > 0 */ 1433 KASSERT(cur->timeout == PFTM_UNLINKED); 1434 if (--cur->rule.ptr->states_cur == 0 && 1435 cur->rule.ptr->src_nodes == 0) 1436 pf_rm_rule(NULL, cur->rule.ptr); 1437 if (cur->anchor.ptr != NULL) 1438 if (--cur->anchor.ptr->states_cur == 0) 1439 pf_rm_rule(NULL, cur->anchor.ptr); 1440 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1441 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1442 if (--ri->r->states_cur == 0 && 1443 ri->r->src_nodes == 0) 1444 pf_rm_rule(NULL, ri->r); 1445 pool_put(&pf_rule_item_pl, ri); 1446 } 1447 pf_normalize_tcp_cleanup(cur); 1448 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1449 TAILQ_REMOVE(&state_list, cur, entry_list); 1450 if (cur->tag) 1451 pf_tag_unref(cur->tag); 1452 pf_state_unref(cur); 1453 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1454 pf_status.states--; 1455 } 1456 1457 void 1458 pf_purge_expired_states(u_int32_t maxcheck) 1459 { 1460 static struct pf_state *cur = NULL; 1461 struct pf_state *next; 1462 SLIST_HEAD(pf_state_gcl, pf_state) gcl; 1463 1464 PF_ASSERT_UNLOCKED(); 1465 SLIST_INIT(&gcl); 1466 1467 PF_STATE_ENTER_READ(); 1468 while (maxcheck--) { 1469 /* wrap to start of list when we hit the end */ 1470 if (cur == NULL) { 1471 cur = pf_state_ref(TAILQ_FIRST(&state_list)); 1472 if (cur == NULL) 1473 break; /* list empty */ 1474 } 1475 1476 /* get next state, as cur may get deleted */ 1477 next = TAILQ_NEXT(cur, entry_list); 1478 1479 if ((cur->timeout == PFTM_UNLINKED) || 1480 (pf_state_expires(cur) <= getuptime())) 1481 SLIST_INSERT_HEAD(&gcl, cur, gc_list); 1482 else 1483 pf_state_unref(cur); 1484 1485 cur = pf_state_ref(next); 1486 1487 if (cur == NULL) 1488 break; 1489 } 1490 PF_STATE_EXIT_READ(); 1491 1492 PF_LOCK(); 1493 PF_STATE_ENTER_WRITE(); 1494 while ((next = SLIST_FIRST(&gcl)) != NULL) { 1495 SLIST_REMOVE_HEAD(&gcl, gc_list); 1496 if (next->timeout == PFTM_UNLINKED) 1497 pf_free_state(next); 1498 else { 1499 pf_remove_state(next); 1500 pf_free_state(next); 1501 } 1502 1503 pf_state_unref(next); 1504 } 1505 PF_STATE_EXIT_WRITE(); 1506 PF_UNLOCK(); 1507 } 1508 1509 int 1510 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1511 { 1512 if (aw->type != PF_ADDR_TABLE) 1513 return (0); 1514 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1515 return (1); 1516 return (0); 1517 } 1518 1519 void 1520 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1521 { 1522 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1523 return; 1524 pfr_detach_table(aw->p.tbl); 1525 aw->p.tbl = NULL; 1526 } 1527 1528 void 1529 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1530 { 1531 struct pfr_ktable *kt = aw->p.tbl; 1532 1533 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1534 return; 1535 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1536 kt = kt->pfrkt_root; 1537 aw->p.tbl = NULL; 1538 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1539 kt->pfrkt_cnt : -1; 1540 } 1541 1542 void 1543 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1544 { 1545 switch (af) { 1546 case AF_INET: { 1547 u_int32_t a = ntohl(addr->addr32[0]); 1548 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1549 (a>>8)&255, a&255); 1550 if (p) { 1551 p = ntohs(p); 1552 addlog(":%u", p); 1553 } 1554 break; 1555 } 1556 #ifdef INET6 1557 case AF_INET6: { 1558 u_int16_t b; 1559 u_int8_t i, curstart, curend, maxstart, maxend; 1560 curstart = curend = maxstart = maxend = 255; 1561 for (i = 0; i < 8; i++) { 1562 if (!addr->addr16[i]) { 1563 if (curstart == 255) 1564 curstart = i; 1565 curend = i; 1566 } else { 1567 if ((curend - curstart) > 1568 (maxend - maxstart)) { 1569 maxstart = curstart; 1570 maxend = curend; 1571 } 1572 curstart = curend = 255; 1573 } 1574 } 1575 if ((curend - curstart) > 1576 (maxend - maxstart)) { 1577 maxstart = curstart; 1578 maxend = curend; 1579 } 1580 for (i = 0; i < 8; i++) { 1581 if (i >= maxstart && i <= maxend) { 1582 if (i == 0) 1583 addlog(":"); 1584 if (i == maxend) 1585 addlog(":"); 1586 } else { 1587 b = ntohs(addr->addr16[i]); 1588 addlog("%x", b); 1589 if (i < 7) 1590 addlog(":"); 1591 } 1592 } 1593 if (p) { 1594 p = ntohs(p); 1595 addlog("[%u]", p); 1596 } 1597 break; 1598 } 1599 #endif /* INET6 */ 1600 } 1601 } 1602 1603 void 1604 pf_print_state(struct pf_state *s) 1605 { 1606 pf_print_state_parts(s, NULL, NULL); 1607 } 1608 1609 void 1610 pf_print_state_parts(struct pf_state *s, 1611 struct pf_state_key *skwp, struct pf_state_key *sksp) 1612 { 1613 struct pf_state_key *skw, *sks; 1614 u_int8_t proto, dir; 1615 1616 /* Do our best to fill these, but they're skipped if NULL */ 1617 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1618 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1619 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1620 dir = s ? s->direction : 0; 1621 1622 switch (proto) { 1623 case IPPROTO_IPV4: 1624 addlog("IPv4"); 1625 break; 1626 case IPPROTO_IPV6: 1627 addlog("IPv6"); 1628 break; 1629 case IPPROTO_TCP: 1630 addlog("TCP"); 1631 break; 1632 case IPPROTO_UDP: 1633 addlog("UDP"); 1634 break; 1635 case IPPROTO_ICMP: 1636 addlog("ICMP"); 1637 break; 1638 case IPPROTO_ICMPV6: 1639 addlog("ICMPv6"); 1640 break; 1641 default: 1642 addlog("%u", proto); 1643 break; 1644 } 1645 switch (dir) { 1646 case PF_IN: 1647 addlog(" in"); 1648 break; 1649 case PF_OUT: 1650 addlog(" out"); 1651 break; 1652 } 1653 if (skw) { 1654 addlog(" wire: (%d) ", skw->rdomain); 1655 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1656 addlog(" "); 1657 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1658 } 1659 if (sks) { 1660 addlog(" stack: (%d) ", sks->rdomain); 1661 if (sks != skw) { 1662 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1663 addlog(" "); 1664 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1665 } else 1666 addlog("-"); 1667 } 1668 if (s) { 1669 if (proto == IPPROTO_TCP) { 1670 addlog(" [lo=%u high=%u win=%u modulator=%u", 1671 s->src.seqlo, s->src.seqhi, 1672 s->src.max_win, s->src.seqdiff); 1673 if (s->src.wscale && s->dst.wscale) 1674 addlog(" wscale=%u", 1675 s->src.wscale & PF_WSCALE_MASK); 1676 addlog("]"); 1677 addlog(" [lo=%u high=%u win=%u modulator=%u", 1678 s->dst.seqlo, s->dst.seqhi, 1679 s->dst.max_win, s->dst.seqdiff); 1680 if (s->src.wscale && s->dst.wscale) 1681 addlog(" wscale=%u", 1682 s->dst.wscale & PF_WSCALE_MASK); 1683 addlog("]"); 1684 } 1685 addlog(" %u:%u", s->src.state, s->dst.state); 1686 if (s->rule.ptr) 1687 addlog(" @%d", s->rule.ptr->nr); 1688 } 1689 } 1690 1691 void 1692 pf_print_flags(u_int8_t f) 1693 { 1694 if (f) 1695 addlog(" "); 1696 if (f & TH_FIN) 1697 addlog("F"); 1698 if (f & TH_SYN) 1699 addlog("S"); 1700 if (f & TH_RST) 1701 addlog("R"); 1702 if (f & TH_PUSH) 1703 addlog("P"); 1704 if (f & TH_ACK) 1705 addlog("A"); 1706 if (f & TH_URG) 1707 addlog("U"); 1708 if (f & TH_ECE) 1709 addlog("E"); 1710 if (f & TH_CWR) 1711 addlog("W"); 1712 } 1713 1714 #define PF_SET_SKIP_STEPS(i) \ 1715 do { \ 1716 while (head[i] != cur) { \ 1717 head[i]->skip[i].ptr = cur; \ 1718 head[i] = TAILQ_NEXT(head[i], entries); \ 1719 } \ 1720 } while (0) 1721 1722 void 1723 pf_calc_skip_steps(struct pf_rulequeue *rules) 1724 { 1725 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1726 int i; 1727 1728 cur = TAILQ_FIRST(rules); 1729 prev = cur; 1730 for (i = 0; i < PF_SKIP_COUNT; ++i) 1731 head[i] = cur; 1732 while (cur != NULL) { 1733 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1734 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1735 if (cur->direction != prev->direction) 1736 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1737 if (cur->onrdomain != prev->onrdomain || 1738 cur->ifnot != prev->ifnot) 1739 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1740 if (cur->af != prev->af) 1741 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1742 if (cur->proto != prev->proto) 1743 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1744 if (cur->src.neg != prev->src.neg || 1745 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1746 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1747 if (cur->dst.neg != prev->dst.neg || 1748 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1749 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1750 if (cur->src.port[0] != prev->src.port[0] || 1751 cur->src.port[1] != prev->src.port[1] || 1752 cur->src.port_op != prev->src.port_op) 1753 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1754 if (cur->dst.port[0] != prev->dst.port[0] || 1755 cur->dst.port[1] != prev->dst.port[1] || 1756 cur->dst.port_op != prev->dst.port_op) 1757 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1758 1759 prev = cur; 1760 cur = TAILQ_NEXT(cur, entries); 1761 } 1762 for (i = 0; i < PF_SKIP_COUNT; ++i) 1763 PF_SET_SKIP_STEPS(i); 1764 } 1765 1766 int 1767 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1768 { 1769 if (aw1->type != aw2->type) 1770 return (1); 1771 switch (aw1->type) { 1772 case PF_ADDR_ADDRMASK: 1773 case PF_ADDR_RANGE: 1774 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1775 return (1); 1776 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1777 return (1); 1778 return (0); 1779 case PF_ADDR_DYNIFTL: 1780 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1781 case PF_ADDR_NONE: 1782 case PF_ADDR_NOROUTE: 1783 case PF_ADDR_URPFFAILED: 1784 return (0); 1785 case PF_ADDR_TABLE: 1786 return (aw1->p.tbl != aw2->p.tbl); 1787 case PF_ADDR_RTLABEL: 1788 return (aw1->v.rtlabel != aw2->v.rtlabel); 1789 default: 1790 addlog("invalid address type: %d\n", aw1->type); 1791 return (1); 1792 } 1793 } 1794 1795 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 1796 * emulate at most one ones-complement subtraction. This thereby limits net 1797 * carries/borrows to at most one, eliminating a reduction step and saving one 1798 * each of +, >>, & and ~. 1799 * 1800 * def. x mod y = x - (x//y)*y for integer x,y 1801 * def. sum = x mod 2^16 1802 * def. accumulator = (x >> 16) mod 2^16 1803 * 1804 * The trick works as follows: subtracting exactly one u_int16_t from the 1805 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 1806 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 1807 * ones-complement borrow: 1808 * 1809 * (sum + accumulator) mod 2^16 1810 * = { assume underflow: accumulator := 2^16 - 1 } 1811 * (sum + 2^16 - 1) mod 2^16 1812 * = { mod } 1813 * (sum - 1) mod 2^16 1814 * 1815 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 1816 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 1817 * to zero as that requires subtraction of at least 2^16, which exceeds a 1818 * single u_int16_t's range. 1819 * 1820 * We use the following theorem to derive the implementation: 1821 * 1822 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 1823 * proof. 1824 * (x + (y mod z)) mod z 1825 * = { def mod } 1826 * (x + y - (y//z)*z) mod z 1827 * = { (a + b*c) mod c = a mod c } 1828 * (x + y) mod z [end of proof] 1829 * 1830 * ... and thereby obtain: 1831 * 1832 * (sum + accumulator) mod 2^16 1833 * = { def. accumulator, def. sum } 1834 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 1835 * = { (0), twice } 1836 * (x + (x >> 16)) mod 2^16 1837 * = { x mod 2^n = x & (2^n - 1) } 1838 * (x + (x >> 16)) & 0xffff 1839 * 1840 * Note: this serves also as a reduction step for at most one add (as the 1841 * trailing mod 2^16 prevents further reductions by destroying carries). 1842 */ 1843 static __inline void 1844 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 1845 u_int8_t proto) 1846 { 1847 u_int32_t x; 1848 const int udp = proto == IPPROTO_UDP; 1849 1850 x = *cksum + was - now; 1851 x = (x + (x >> 16)) & 0xffff; 1852 1853 /* optimise: eliminate a branch when not udp */ 1854 if (udp && *cksum == 0x0000) 1855 return; 1856 if (udp && x == 0x0000) 1857 x = 0xffff; 1858 1859 *cksum = (u_int16_t)(x); 1860 } 1861 1862 #ifdef INET6 1863 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 1864 static __inline void 1865 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 1866 { 1867 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 1868 } 1869 1870 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 1871 static __inline void 1872 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 1873 { 1874 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 1875 } 1876 #endif /* INET6 */ 1877 1878 /* pre: *a is 16-bit aligned within its packet 1879 * 1880 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 1881 * machine by conserving ones-complement's otherwise discarded carries in the 1882 * upper bits of x. These accumulated carries when added to the lower 16-bits 1883 * over at least zero 'reduction' steps then complete the ones-complement sum. 1884 * 1885 * def. sum = x mod 2^16 1886 * def. accumulator = (x >> 16) 1887 * 1888 * At most two reduction steps 1889 * 1890 * x := sum + accumulator 1891 * = { def sum, def accumulator } 1892 * x := x mod 2^16 + (x >> 16) 1893 * = { x mod 2^n = x & (2^n - 1) } 1894 * x := (x & 0xffff) + (x >> 16) 1895 * 1896 * are necessary to incorporate the accumulated carries (at most one per add) 1897 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 1898 * 1899 * The function is also invariant over the endian of the host. Why? 1900 * 1901 * Define the unary transpose operator ~ on a bitstring in python slice 1902 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 1903 * 1904 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 1905 * 1906 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 1907 * 1908 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 1909 * 'half-adds'. Under ones-complement addition, each half-add carries to the 1910 * other, so the sum of each half-add is unaffected by their relative 1911 * order. Therefore: 1912 * 1913 * ~m +_1 ~n 1914 * = { half-adds invariant under transposition } 1915 * ~s 1916 * = { substitute } 1917 * ~(m +_1 n) [end of proof] 1918 * 1919 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 1920 * with the converse endian does not alter the result. 1921 * 1922 * proof. 1923 * { converse machine endian: load/store transposes, P := 8 } 1924 * ~(~m +_1 ~n) 1925 * = { ~ over +_1 } 1926 * ~~m +_1 ~~n 1927 * = { ~ is an involution } 1928 * m +_1 n [end of proof] 1929 * 1930 */ 1931 #define NEG(x) ((u_int16_t)~(x)) 1932 void 1933 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 1934 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 1935 { 1936 u_int32_t x; 1937 const u_int16_t *n = an->addr16; 1938 const u_int16_t *o = a->addr16; 1939 const int udp = proto == IPPROTO_UDP; 1940 1941 switch (af) { 1942 case AF_INET: 1943 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 1944 break; 1945 #ifdef INET6 1946 case AF_INET6: 1947 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 1948 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 1949 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 1950 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 1951 break; 1952 #endif /* INET6 */ 1953 default: 1954 unhandled_af(af); 1955 } 1956 1957 x = (x & 0xffff) + (x >> 16); 1958 x = (x & 0xffff) + (x >> 16); 1959 1960 /* optimise: eliminate a branch when not udp */ 1961 if (udp && *cksum == 0x0000) 1962 return; 1963 if (udp && x == 0x0000) 1964 x = 0xffff; 1965 1966 *cksum = (u_int16_t)(x); 1967 } 1968 1969 int 1970 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 1971 { 1972 int rewrite = 0; 1973 1974 if (*f != v) { 1975 u_int16_t old = htons(hi ? (*f << 8) : *f); 1976 u_int16_t new = htons(hi ? ( v << 8) : v); 1977 1978 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 1979 *f = v; 1980 rewrite = 1; 1981 } 1982 1983 return (rewrite); 1984 } 1985 1986 /* pre: *f is 16-bit aligned within its packet */ 1987 int 1988 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 1989 { 1990 int rewrite = 0; 1991 1992 if (*f != v) { 1993 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 1994 *f = v; 1995 rewrite = 1; 1996 } 1997 1998 return (rewrite); 1999 } 2000 2001 int 2002 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 2003 { 2004 int rewrite = 0; 2005 u_int8_t *fb = (u_int8_t*)f; 2006 u_int8_t *vb = (u_int8_t*)&v; 2007 2008 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 2009 return (pf_patch_16(pd, f, v)); /* optimise */ 2010 } 2011 2012 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2013 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2014 2015 return (rewrite); 2016 } 2017 2018 /* pre: *f is 16-bit aligned within its packet */ 2019 /* pre: pd->proto != IPPROTO_UDP */ 2020 int 2021 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2022 { 2023 int rewrite = 0; 2024 u_int16_t *pc = pd->pcksum; 2025 u_int8_t proto = pd->proto; 2026 2027 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2028 if (proto == IPPROTO_UDP) 2029 panic("%s: udp", __func__); 2030 2031 /* optimise: skip *f != v guard; true for all use-cases */ 2032 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2033 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2034 2035 *f = v; 2036 rewrite = 1; 2037 2038 return (rewrite); 2039 } 2040 2041 int 2042 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2043 { 2044 int rewrite = 0; 2045 u_int8_t *fb = (u_int8_t*)f; 2046 u_int8_t *vb = (u_int8_t*)&v; 2047 2048 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2049 return (pf_patch_32(pd, f, v)); /* optimise */ 2050 } 2051 2052 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2053 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2054 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2055 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2056 2057 return (rewrite); 2058 } 2059 2060 int 2061 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2062 u_int16_t *virtual_id, u_int16_t *virtual_type) 2063 { 2064 /* 2065 * ICMP types marked with PF_OUT are typically responses to 2066 * PF_IN, and will match states in the opposite direction. 2067 * PF_IN ICMP types need to match a state with that type. 2068 */ 2069 *icmp_dir = PF_OUT; 2070 2071 /* Queries (and responses) */ 2072 switch (pd->af) { 2073 case AF_INET: 2074 switch (type) { 2075 case ICMP_ECHO: 2076 *icmp_dir = PF_IN; 2077 /* FALLTHROUGH */ 2078 case ICMP_ECHOREPLY: 2079 *virtual_type = ICMP_ECHO; 2080 *virtual_id = pd->hdr.icmp.icmp_id; 2081 break; 2082 2083 case ICMP_TSTAMP: 2084 *icmp_dir = PF_IN; 2085 /* FALLTHROUGH */ 2086 case ICMP_TSTAMPREPLY: 2087 *virtual_type = ICMP_TSTAMP; 2088 *virtual_id = pd->hdr.icmp.icmp_id; 2089 break; 2090 2091 case ICMP_IREQ: 2092 *icmp_dir = PF_IN; 2093 /* FALLTHROUGH */ 2094 case ICMP_IREQREPLY: 2095 *virtual_type = ICMP_IREQ; 2096 *virtual_id = pd->hdr.icmp.icmp_id; 2097 break; 2098 2099 case ICMP_MASKREQ: 2100 *icmp_dir = PF_IN; 2101 /* FALLTHROUGH */ 2102 case ICMP_MASKREPLY: 2103 *virtual_type = ICMP_MASKREQ; 2104 *virtual_id = pd->hdr.icmp.icmp_id; 2105 break; 2106 2107 case ICMP_IPV6_WHEREAREYOU: 2108 *icmp_dir = PF_IN; 2109 /* FALLTHROUGH */ 2110 case ICMP_IPV6_IAMHERE: 2111 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2112 *virtual_id = 0; /* Nothing sane to match on! */ 2113 break; 2114 2115 case ICMP_MOBILE_REGREQUEST: 2116 *icmp_dir = PF_IN; 2117 /* FALLTHROUGH */ 2118 case ICMP_MOBILE_REGREPLY: 2119 *virtual_type = ICMP_MOBILE_REGREQUEST; 2120 *virtual_id = 0; /* Nothing sane to match on! */ 2121 break; 2122 2123 case ICMP_ROUTERSOLICIT: 2124 *icmp_dir = PF_IN; 2125 /* FALLTHROUGH */ 2126 case ICMP_ROUTERADVERT: 2127 *virtual_type = ICMP_ROUTERSOLICIT; 2128 *virtual_id = 0; /* Nothing sane to match on! */ 2129 break; 2130 2131 /* These ICMP types map to other connections */ 2132 case ICMP_UNREACH: 2133 case ICMP_SOURCEQUENCH: 2134 case ICMP_REDIRECT: 2135 case ICMP_TIMXCEED: 2136 case ICMP_PARAMPROB: 2137 /* These will not be used, but set them anyway */ 2138 *icmp_dir = PF_IN; 2139 *virtual_type = htons(type); 2140 *virtual_id = 0; 2141 return (1); /* These types match to another state */ 2142 2143 /* 2144 * All remaining ICMP types get their own states, 2145 * and will only match in one direction. 2146 */ 2147 default: 2148 *icmp_dir = PF_IN; 2149 *virtual_type = type; 2150 *virtual_id = 0; 2151 break; 2152 } 2153 break; 2154 #ifdef INET6 2155 case AF_INET6: 2156 switch (type) { 2157 case ICMP6_ECHO_REQUEST: 2158 *icmp_dir = PF_IN; 2159 /* FALLTHROUGH */ 2160 case ICMP6_ECHO_REPLY: 2161 *virtual_type = ICMP6_ECHO_REQUEST; 2162 *virtual_id = pd->hdr.icmp6.icmp6_id; 2163 break; 2164 2165 case MLD_LISTENER_QUERY: 2166 case MLD_LISTENER_REPORT: { 2167 struct mld_hdr *mld = &pd->hdr.mld; 2168 u_int32_t h; 2169 2170 /* 2171 * Listener Report can be sent by clients 2172 * without an associated Listener Query. 2173 * In addition to that, when Report is sent as a 2174 * reply to a Query its source and destination 2175 * address are different. 2176 */ 2177 *icmp_dir = PF_IN; 2178 *virtual_type = MLD_LISTENER_QUERY; 2179 /* generate fake id for these messages */ 2180 h = mld->mld_addr.s6_addr32[0] ^ 2181 mld->mld_addr.s6_addr32[1] ^ 2182 mld->mld_addr.s6_addr32[2] ^ 2183 mld->mld_addr.s6_addr32[3]; 2184 *virtual_id = (h >> 16) ^ (h & 0xffff); 2185 break; 2186 } 2187 2188 /* 2189 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2190 * ICMP6_WRU 2191 */ 2192 case ICMP6_WRUREQUEST: 2193 *icmp_dir = PF_IN; 2194 /* FALLTHROUGH */ 2195 case ICMP6_WRUREPLY: 2196 *virtual_type = ICMP6_WRUREQUEST; 2197 *virtual_id = 0; /* Nothing sane to match on! */ 2198 break; 2199 2200 case MLD_MTRACE: 2201 *icmp_dir = PF_IN; 2202 /* FALLTHROUGH */ 2203 case MLD_MTRACE_RESP: 2204 *virtual_type = MLD_MTRACE; 2205 *virtual_id = 0; /* Nothing sane to match on! */ 2206 break; 2207 2208 case ND_NEIGHBOR_SOLICIT: 2209 *icmp_dir = PF_IN; 2210 /* FALLTHROUGH */ 2211 case ND_NEIGHBOR_ADVERT: { 2212 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 2213 u_int32_t h; 2214 2215 *virtual_type = ND_NEIGHBOR_SOLICIT; 2216 /* generate fake id for these messages */ 2217 h = nd->nd_ns_target.s6_addr32[0] ^ 2218 nd->nd_ns_target.s6_addr32[1] ^ 2219 nd->nd_ns_target.s6_addr32[2] ^ 2220 nd->nd_ns_target.s6_addr32[3]; 2221 *virtual_id = (h >> 16) ^ (h & 0xffff); 2222 break; 2223 } 2224 2225 /* 2226 * These ICMP types map to other connections. 2227 * ND_REDIRECT can't be in this list because the triggering 2228 * packet header is optional. 2229 */ 2230 case ICMP6_DST_UNREACH: 2231 case ICMP6_PACKET_TOO_BIG: 2232 case ICMP6_TIME_EXCEEDED: 2233 case ICMP6_PARAM_PROB: 2234 /* These will not be used, but set them anyway */ 2235 *icmp_dir = PF_IN; 2236 *virtual_type = htons(type); 2237 *virtual_id = 0; 2238 return (1); /* These types match to another state */ 2239 /* 2240 * All remaining ICMP6 types get their own states, 2241 * and will only match in one direction. 2242 */ 2243 default: 2244 *icmp_dir = PF_IN; 2245 *virtual_type = type; 2246 *virtual_id = 0; 2247 break; 2248 } 2249 break; 2250 #endif /* INET6 */ 2251 } 2252 *virtual_type = htons(*virtual_type); 2253 return (0); /* These types match to their own state */ 2254 } 2255 2256 void 2257 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2258 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2259 { 2260 /* note: doesn't trouble to fixup quoted checksums, if any */ 2261 2262 /* change quoted protocol port */ 2263 if (qp != NULL) 2264 pf_patch_16(pd, qp, np); 2265 2266 /* change quoted ip address */ 2267 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2268 pf_addrcpy(qa, na, pd->af); 2269 2270 /* change network-header's ip address */ 2271 if (oa) 2272 pf_translate_a(pd, oa, na); 2273 } 2274 2275 /* pre: *a is 16-bit aligned within its packet */ 2276 /* *a is a network header src/dst address */ 2277 int 2278 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2279 { 2280 int rewrite = 0; 2281 2282 /* warning: !PF_ANEQ != PF_AEQ */ 2283 if (!PF_ANEQ(a, an, pd->af)) 2284 return (0); 2285 2286 /* fixup transport pseudo-header, if any */ 2287 switch (pd->proto) { 2288 case IPPROTO_TCP: /* FALLTHROUGH */ 2289 case IPPROTO_UDP: /* FALLTHROUGH */ 2290 case IPPROTO_ICMPV6: 2291 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2292 break; 2293 default: 2294 break; /* assume no pseudo-header */ 2295 } 2296 2297 pf_addrcpy(a, an, pd->af); 2298 rewrite = 1; 2299 2300 return (rewrite); 2301 } 2302 2303 #if INET6 2304 /* pf_translate_af() may change pd->m, adjust local copies after calling */ 2305 int 2306 pf_translate_af(struct pf_pdesc *pd) 2307 { 2308 static const struct pf_addr zero; 2309 struct ip *ip4; 2310 struct ip6_hdr *ip6; 2311 int copyback = 0; 2312 u_int hlen, ohlen, dlen; 2313 u_int16_t *pc; 2314 u_int8_t af_proto, naf_proto; 2315 2316 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2317 ohlen = pd->off; 2318 dlen = pd->tot_len - pd->off; 2319 pc = pd->pcksum; 2320 2321 af_proto = naf_proto = pd->proto; 2322 if (naf_proto == IPPROTO_ICMP) 2323 af_proto = IPPROTO_ICMPV6; 2324 if (naf_proto == IPPROTO_ICMPV6) 2325 af_proto = IPPROTO_ICMP; 2326 2327 /* uncover stale pseudo-header */ 2328 switch (af_proto) { 2329 case IPPROTO_ICMPV6: 2330 /* optimise: unchanged for TCP/UDP */ 2331 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2332 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2333 /* FALLTHROUGH */ 2334 case IPPROTO_UDP: /* FALLTHROUGH */ 2335 case IPPROTO_TCP: 2336 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2337 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2338 copyback = 1; 2339 break; 2340 default: 2341 break; /* assume no pseudo-header */ 2342 } 2343 2344 /* replace the network header */ 2345 m_adj(pd->m, pd->off); 2346 pd->src = NULL; 2347 pd->dst = NULL; 2348 2349 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2350 pd->m = NULL; 2351 return (-1); 2352 } 2353 2354 pd->off = hlen; 2355 pd->tot_len += hlen - ohlen; 2356 2357 switch (pd->naf) { 2358 case AF_INET: 2359 ip4 = mtod(pd->m, struct ip *); 2360 memset(ip4, 0, hlen); 2361 ip4->ip_v = IPVERSION; 2362 ip4->ip_hl = hlen >> 2; 2363 ip4->ip_tos = pd->tos; 2364 ip4->ip_len = htons(hlen + dlen); 2365 ip4->ip_id = htons(ip_randomid()); 2366 ip4->ip_off = htons(IP_DF); 2367 ip4->ip_ttl = pd->ttl; 2368 ip4->ip_p = pd->proto; 2369 ip4->ip_src = pd->nsaddr.v4; 2370 ip4->ip_dst = pd->ndaddr.v4; 2371 break; 2372 case AF_INET6: 2373 ip6 = mtod(pd->m, struct ip6_hdr *); 2374 memset(ip6, 0, hlen); 2375 ip6->ip6_vfc = IPV6_VERSION; 2376 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2377 ip6->ip6_plen = htons(dlen); 2378 ip6->ip6_nxt = pd->proto; 2379 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2380 ip6->ip6_hlim = IPV6_DEFHLIM; 2381 else 2382 ip6->ip6_hlim = pd->ttl; 2383 ip6->ip6_src = pd->nsaddr.v6; 2384 ip6->ip6_dst = pd->ndaddr.v6; 2385 break; 2386 default: 2387 unhandled_af(pd->naf); 2388 } 2389 2390 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2391 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2392 pd->naf == AF_INET6) { 2393 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2394 } 2395 2396 /* cover fresh pseudo-header */ 2397 switch (naf_proto) { 2398 case IPPROTO_ICMPV6: 2399 /* optimise: unchanged for TCP/UDP */ 2400 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2401 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2402 /* FALLTHROUGH */ 2403 case IPPROTO_UDP: /* FALLTHROUGH */ 2404 case IPPROTO_TCP: 2405 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2406 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2407 copyback = 1; 2408 break; 2409 default: 2410 break; /* assume no pseudo-header */ 2411 } 2412 2413 /* flush pd->pcksum */ 2414 if (copyback) 2415 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 2416 2417 return (0); 2418 } 2419 2420 int 2421 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2422 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2423 sa_family_t af, sa_family_t naf) 2424 { 2425 struct mbuf *n = NULL; 2426 struct ip *ip4; 2427 struct ip6_hdr *ip6; 2428 u_int hlen, ohlen, dlen; 2429 int d; 2430 2431 if (af == naf || (af != AF_INET && af != AF_INET6) || 2432 (naf != AF_INET && naf != AF_INET6)) 2433 return (-1); 2434 2435 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2436 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2437 return (-1); 2438 2439 /* new quoted header */ 2440 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2441 /* old quoted header */ 2442 ohlen = pd2->off - ipoff2; 2443 2444 /* trim old quoted header */ 2445 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2446 m_adj(n, ohlen); 2447 2448 /* prepend a new, translated, quoted header */ 2449 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2450 return (-1); 2451 2452 switch (naf) { 2453 case AF_INET: 2454 ip4 = mtod(n, struct ip *); 2455 memset(ip4, 0, sizeof(*ip4)); 2456 ip4->ip_v = IPVERSION; 2457 ip4->ip_hl = sizeof(*ip4) >> 2; 2458 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2459 ip4->ip_id = htons(ip_randomid()); 2460 ip4->ip_off = htons(IP_DF); 2461 ip4->ip_ttl = pd2->ttl; 2462 if (pd2->proto == IPPROTO_ICMPV6) 2463 ip4->ip_p = IPPROTO_ICMP; 2464 else 2465 ip4->ip_p = pd2->proto; 2466 ip4->ip_src = src->v4; 2467 ip4->ip_dst = dst->v4; 2468 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2469 break; 2470 case AF_INET6: 2471 ip6 = mtod(n, struct ip6_hdr *); 2472 memset(ip6, 0, sizeof(*ip6)); 2473 ip6->ip6_vfc = IPV6_VERSION; 2474 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2475 if (pd2->proto == IPPROTO_ICMP) 2476 ip6->ip6_nxt = IPPROTO_ICMPV6; 2477 else 2478 ip6->ip6_nxt = pd2->proto; 2479 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2480 ip6->ip6_hlim = IPV6_DEFHLIM; 2481 else 2482 ip6->ip6_hlim = pd2->ttl; 2483 ip6->ip6_src = src->v6; 2484 ip6->ip6_dst = dst->v6; 2485 break; 2486 } 2487 2488 /* cover new quoted header */ 2489 /* optimise: any new AF_INET header of ours sums to zero */ 2490 if (naf != AF_INET) { 2491 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2492 } 2493 2494 /* reattach modified quoted packet to outer header */ 2495 { 2496 int nlen = n->m_pkthdr.len; 2497 m_cat(m, n); 2498 m->m_pkthdr.len += nlen; 2499 } 2500 2501 /* account for altered length */ 2502 d = hlen - ohlen; 2503 2504 if (pd->proto == IPPROTO_ICMPV6) { 2505 /* fixup pseudo-header */ 2506 dlen = pd->tot_len - pd->off; 2507 pf_cksum_fixup(pd->pcksum, 2508 htons(dlen), htons(dlen + d), pd->proto); 2509 } 2510 2511 pd->tot_len += d; 2512 pd2->tot_len += d; 2513 pd2->off += d; 2514 2515 /* note: not bothering to update network headers as 2516 these due for rewrite by pf_translate_af() */ 2517 2518 return (0); 2519 } 2520 2521 2522 #define PTR_IP(field) (offsetof(struct ip, field)) 2523 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2524 2525 int 2526 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 2527 { 2528 struct icmp *icmp4; 2529 struct icmp6_hdr *icmp6; 2530 u_int32_t mtu; 2531 int32_t ptr = -1; 2532 u_int8_t type; 2533 u_int8_t code; 2534 2535 switch (af) { 2536 case AF_INET: 2537 icmp6 = arg; 2538 type = icmp6->icmp6_type; 2539 code = icmp6->icmp6_code; 2540 mtu = ntohl(icmp6->icmp6_mtu); 2541 2542 switch (type) { 2543 case ICMP6_ECHO_REQUEST: 2544 type = ICMP_ECHO; 2545 break; 2546 case ICMP6_ECHO_REPLY: 2547 type = ICMP_ECHOREPLY; 2548 break; 2549 case ICMP6_DST_UNREACH: 2550 type = ICMP_UNREACH; 2551 switch (code) { 2552 case ICMP6_DST_UNREACH_NOROUTE: 2553 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2554 case ICMP6_DST_UNREACH_ADDR: 2555 code = ICMP_UNREACH_HOST; 2556 break; 2557 case ICMP6_DST_UNREACH_ADMIN: 2558 code = ICMP_UNREACH_HOST_PROHIB; 2559 break; 2560 case ICMP6_DST_UNREACH_NOPORT: 2561 code = ICMP_UNREACH_PORT; 2562 break; 2563 default: 2564 return (-1); 2565 } 2566 break; 2567 case ICMP6_PACKET_TOO_BIG: 2568 type = ICMP_UNREACH; 2569 code = ICMP_UNREACH_NEEDFRAG; 2570 mtu -= 20; 2571 break; 2572 case ICMP6_TIME_EXCEEDED: 2573 type = ICMP_TIMXCEED; 2574 break; 2575 case ICMP6_PARAM_PROB: 2576 switch (code) { 2577 case ICMP6_PARAMPROB_HEADER: 2578 type = ICMP_PARAMPROB; 2579 code = ICMP_PARAMPROB_ERRATPTR; 2580 ptr = ntohl(icmp6->icmp6_pptr); 2581 2582 if (ptr == PTR_IP6(ip6_vfc)) 2583 ; /* preserve */ 2584 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2585 ptr = PTR_IP(ip_tos); 2586 else if (ptr == PTR_IP6(ip6_plen) || 2587 ptr == PTR_IP6(ip6_plen) + 1) 2588 ptr = PTR_IP(ip_len); 2589 else if (ptr == PTR_IP6(ip6_nxt)) 2590 ptr = PTR_IP(ip_p); 2591 else if (ptr == PTR_IP6(ip6_hlim)) 2592 ptr = PTR_IP(ip_ttl); 2593 else if (ptr >= PTR_IP6(ip6_src) && 2594 ptr < PTR_IP6(ip6_dst)) 2595 ptr = PTR_IP(ip_src); 2596 else if (ptr >= PTR_IP6(ip6_dst) && 2597 ptr < sizeof(struct ip6_hdr)) 2598 ptr = PTR_IP(ip_dst); 2599 else { 2600 return (-1); 2601 } 2602 break; 2603 case ICMP6_PARAMPROB_NEXTHEADER: 2604 type = ICMP_UNREACH; 2605 code = ICMP_UNREACH_PROTOCOL; 2606 break; 2607 default: 2608 return (-1); 2609 } 2610 break; 2611 default: 2612 return (-1); 2613 } 2614 2615 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 2616 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 2617 2618 /* aligns well with a icmpv4 nextmtu */ 2619 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 2620 2621 /* icmpv4 pptr is a one most significant byte */ 2622 if (ptr >= 0) 2623 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 2624 break; 2625 case AF_INET6: 2626 icmp4 = arg; 2627 type = icmp4->icmp_type; 2628 code = icmp4->icmp_code; 2629 mtu = ntohs(icmp4->icmp_nextmtu); 2630 2631 switch (type) { 2632 case ICMP_ECHO: 2633 type = ICMP6_ECHO_REQUEST; 2634 break; 2635 case ICMP_ECHOREPLY: 2636 type = ICMP6_ECHO_REPLY; 2637 break; 2638 case ICMP_UNREACH: 2639 type = ICMP6_DST_UNREACH; 2640 switch (code) { 2641 case ICMP_UNREACH_NET: 2642 case ICMP_UNREACH_HOST: 2643 case ICMP_UNREACH_NET_UNKNOWN: 2644 case ICMP_UNREACH_HOST_UNKNOWN: 2645 case ICMP_UNREACH_ISOLATED: 2646 case ICMP_UNREACH_TOSNET: 2647 case ICMP_UNREACH_TOSHOST: 2648 code = ICMP6_DST_UNREACH_NOROUTE; 2649 break; 2650 case ICMP_UNREACH_PORT: 2651 code = ICMP6_DST_UNREACH_NOPORT; 2652 break; 2653 case ICMP_UNREACH_NET_PROHIB: 2654 case ICMP_UNREACH_HOST_PROHIB: 2655 case ICMP_UNREACH_FILTER_PROHIB: 2656 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2657 code = ICMP6_DST_UNREACH_ADMIN; 2658 break; 2659 case ICMP_UNREACH_PROTOCOL: 2660 type = ICMP6_PARAM_PROB; 2661 code = ICMP6_PARAMPROB_NEXTHEADER; 2662 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2663 break; 2664 case ICMP_UNREACH_NEEDFRAG: 2665 type = ICMP6_PACKET_TOO_BIG; 2666 code = 0; 2667 mtu += 20; 2668 break; 2669 default: 2670 return (-1); 2671 } 2672 break; 2673 case ICMP_TIMXCEED: 2674 type = ICMP6_TIME_EXCEEDED; 2675 break; 2676 case ICMP_PARAMPROB: 2677 type = ICMP6_PARAM_PROB; 2678 switch (code) { 2679 case ICMP_PARAMPROB_ERRATPTR: 2680 code = ICMP6_PARAMPROB_HEADER; 2681 break; 2682 case ICMP_PARAMPROB_LENGTH: 2683 code = ICMP6_PARAMPROB_HEADER; 2684 break; 2685 default: 2686 return (-1); 2687 } 2688 2689 ptr = icmp4->icmp_pptr; 2690 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2691 ; /* preserve */ 2692 else if (ptr == PTR_IP(ip_len) || 2693 ptr == PTR_IP(ip_len) + 1) 2694 ptr = PTR_IP6(ip6_plen); 2695 else if (ptr == PTR_IP(ip_ttl)) 2696 ptr = PTR_IP6(ip6_hlim); 2697 else if (ptr == PTR_IP(ip_p)) 2698 ptr = PTR_IP6(ip6_nxt); 2699 else if (ptr >= PTR_IP(ip_src) && 2700 ptr < PTR_IP(ip_dst)) 2701 ptr = PTR_IP6(ip6_src); 2702 else if (ptr >= PTR_IP(ip_dst) && 2703 ptr < sizeof(struct ip)) 2704 ptr = PTR_IP6(ip6_dst); 2705 else { 2706 return (-1); 2707 } 2708 break; 2709 default: 2710 return (-1); 2711 } 2712 2713 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 2714 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 2715 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 2716 if (ptr >= 0) 2717 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 2718 break; 2719 } 2720 2721 return (0); 2722 } 2723 #endif /* INET6 */ 2724 2725 /* 2726 * Need to modulate the sequence numbers in the TCP SACK option 2727 * (credits to Krzysztof Pfaff for report and patch) 2728 */ 2729 int 2730 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2731 { 2732 struct sackblk sack; 2733 int copyback = 0, i; 2734 int olen, optsoff; 2735 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 2736 2737 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 2738 optsoff = pd->off + sizeof(struct tcphdr); 2739 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 2740 if (olen < TCPOLEN_MINSACK || 2741 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) 2742 return (0); 2743 2744 eoh = opts + olen; 2745 opt = opts; 2746 while ((opt = pf_find_tcpopt(opt, opts, olen, 2747 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 2748 { 2749 size_t safelen = MIN(opt[1], (eoh - opt)); 2750 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 2751 size_t startoff = (opt + i) - opts; 2752 memcpy(&sack, &opt[i], sizeof(sack)); 2753 pf_patch_32_unaligned(pd, &sack.start, 2754 htonl(ntohl(sack.start) - dst->seqdiff), 2755 PF_ALGNMNT(startoff)); 2756 pf_patch_32_unaligned(pd, &sack.end, 2757 htonl(ntohl(sack.end) - dst->seqdiff), 2758 PF_ALGNMNT(startoff + sizeof(sack.start))); 2759 memcpy(&opt[i], &sack, sizeof(sack)); 2760 } 2761 copyback = 1; 2762 opt += opt[1]; 2763 } 2764 2765 if (copyback) 2766 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 2767 return (copyback); 2768 } 2769 2770 struct mbuf * 2771 pf_build_tcp(const struct pf_rule *r, sa_family_t af, 2772 const struct pf_addr *saddr, const struct pf_addr *daddr, 2773 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2774 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2775 u_int16_t rtag, u_int sack, u_int rdom) 2776 { 2777 struct mbuf *m; 2778 int len, tlen; 2779 struct ip *h; 2780 #ifdef INET6 2781 struct ip6_hdr *h6; 2782 #endif /* INET6 */ 2783 struct tcphdr *th; 2784 char *opt; 2785 2786 /* maximum segment size tcp option */ 2787 tlen = sizeof(struct tcphdr); 2788 if (mss) 2789 tlen += 4; 2790 if (sack) 2791 tlen += 2; 2792 2793 switch (af) { 2794 case AF_INET: 2795 len = sizeof(struct ip) + tlen; 2796 break; 2797 #ifdef INET6 2798 case AF_INET6: 2799 len = sizeof(struct ip6_hdr) + tlen; 2800 break; 2801 #endif /* INET6 */ 2802 default: 2803 unhandled_af(af); 2804 } 2805 2806 /* create outgoing mbuf */ 2807 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2808 if (m == NULL) 2809 return (NULL); 2810 if (tag) 2811 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2812 m->m_pkthdr.pf.tag = rtag; 2813 m->m_pkthdr.ph_rtableid = rdom; 2814 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2815 m->m_pkthdr.pf.prio = r->set_prio[0]; 2816 if (r && r->qid) 2817 m->m_pkthdr.pf.qid = r->qid; 2818 m->m_data += max_linkhdr; 2819 m->m_pkthdr.len = m->m_len = len; 2820 m->m_pkthdr.ph_ifidx = 0; 2821 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 2822 memset(m->m_data, 0, len); 2823 switch (af) { 2824 case AF_INET: 2825 h = mtod(m, struct ip *); 2826 h->ip_p = IPPROTO_TCP; 2827 h->ip_len = htons(tlen); 2828 h->ip_v = 4; 2829 h->ip_hl = sizeof(*h) >> 2; 2830 h->ip_tos = IPTOS_LOWDELAY; 2831 h->ip_len = htons(len); 2832 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2833 h->ip_ttl = ttl ? ttl : ip_defttl; 2834 h->ip_sum = 0; 2835 h->ip_src.s_addr = saddr->v4.s_addr; 2836 h->ip_dst.s_addr = daddr->v4.s_addr; 2837 2838 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2839 break; 2840 #ifdef INET6 2841 case AF_INET6: 2842 h6 = mtod(m, struct ip6_hdr *); 2843 h6->ip6_nxt = IPPROTO_TCP; 2844 h6->ip6_plen = htons(tlen); 2845 h6->ip6_vfc |= IPV6_VERSION; 2846 h6->ip6_hlim = IPV6_DEFHLIM; 2847 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2848 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2849 2850 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2851 break; 2852 #endif /* INET6 */ 2853 default: 2854 unhandled_af(af); 2855 } 2856 2857 /* TCP header */ 2858 th->th_sport = sport; 2859 th->th_dport = dport; 2860 th->th_seq = htonl(seq); 2861 th->th_ack = htonl(ack); 2862 th->th_off = tlen >> 2; 2863 th->th_flags = flags; 2864 th->th_win = htons(win); 2865 2866 opt = (char *)(th + 1); 2867 if (mss) { 2868 opt[0] = TCPOPT_MAXSEG; 2869 opt[1] = 4; 2870 mss = htons(mss); 2871 memcpy((opt + 2), &mss, 2); 2872 opt += 4; 2873 } 2874 if (sack) { 2875 opt[0] = TCPOPT_SACK_PERMITTED; 2876 opt[1] = 2; 2877 opt += 2; 2878 } 2879 2880 return (m); 2881 } 2882 2883 void 2884 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2885 const struct pf_addr *saddr, const struct pf_addr *daddr, 2886 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2887 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2888 u_int16_t rtag, u_int rdom) 2889 { 2890 struct mbuf *m; 2891 2892 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 2893 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL) 2894 return; 2895 2896 switch (af) { 2897 case AF_INET: 2898 ip_send(m); 2899 break; 2900 #ifdef INET6 2901 case AF_INET6: 2902 ip6_send(m); 2903 break; 2904 #endif /* INET6 */ 2905 } 2906 } 2907 2908 static void 2909 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *s, 2910 struct pf_state_peer *src, struct pf_state_peer *dst) 2911 { 2912 /* 2913 * We are sending challenge ACK as a response to SYN packet, which 2914 * matches existing state (modulo TCP window check). Therefore packet 2915 * must be sent on behalf of destination. 2916 * 2917 * We expect sender to remain either silent, or send RST packet 2918 * so both, firewall and remote peer, can purge dead state from 2919 * memory. 2920 */ 2921 pf_send_tcp(s->rule.ptr, pd->af, pd->dst, pd->src, 2922 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 2923 src->seqlo, TH_ACK, 0, 0, s->rule.ptr->return_ttl, 1, 0, 2924 pd->rdomain); 2925 } 2926 2927 void 2928 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 2929 sa_family_t af, struct pf_rule *r, u_int rdomain) 2930 { 2931 struct mbuf *m0; 2932 2933 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 2934 return; 2935 2936 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2937 m0->m_pkthdr.ph_rtableid = rdomain; 2938 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2939 m0->m_pkthdr.pf.prio = r->set_prio[0]; 2940 if (r && r->qid) 2941 m0->m_pkthdr.pf.qid = r->qid; 2942 2943 switch (af) { 2944 case AF_INET: 2945 icmp_error(m0, type, code, 0, param); 2946 break; 2947 #ifdef INET6 2948 case AF_INET6: 2949 icmp6_error(m0, type, code, param); 2950 break; 2951 #endif /* INET6 */ 2952 } 2953 } 2954 2955 /* 2956 * Return ((n = 0) == (a = b [with mask m])) 2957 * Note: n != 0 => returns (a != b [with mask m]) 2958 */ 2959 int 2960 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2961 struct pf_addr *b, sa_family_t af) 2962 { 2963 switch (af) { 2964 case AF_INET: 2965 if ((a->addr32[0] & m->addr32[0]) == 2966 (b->addr32[0] & m->addr32[0])) 2967 return (n == 0); 2968 break; 2969 #ifdef INET6 2970 case AF_INET6: 2971 if (((a->addr32[0] & m->addr32[0]) == 2972 (b->addr32[0] & m->addr32[0])) && 2973 ((a->addr32[1] & m->addr32[1]) == 2974 (b->addr32[1] & m->addr32[1])) && 2975 ((a->addr32[2] & m->addr32[2]) == 2976 (b->addr32[2] & m->addr32[2])) && 2977 ((a->addr32[3] & m->addr32[3]) == 2978 (b->addr32[3] & m->addr32[3]))) 2979 return (n == 0); 2980 break; 2981 #endif /* INET6 */ 2982 } 2983 2984 return (n != 0); 2985 } 2986 2987 /* 2988 * Return 1 if b <= a <= e, otherwise return 0. 2989 */ 2990 int 2991 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2992 struct pf_addr *a, sa_family_t af) 2993 { 2994 switch (af) { 2995 case AF_INET: 2996 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 2997 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 2998 return (0); 2999 break; 3000 #ifdef INET6 3001 case AF_INET6: { 3002 int i; 3003 3004 /* check a >= b */ 3005 for (i = 0; i < 4; ++i) 3006 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 3007 break; 3008 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 3009 return (0); 3010 /* check a <= e */ 3011 for (i = 0; i < 4; ++i) 3012 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 3013 break; 3014 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3015 return (0); 3016 break; 3017 } 3018 #endif /* INET6 */ 3019 } 3020 return (1); 3021 } 3022 3023 int 3024 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3025 { 3026 switch (op) { 3027 case PF_OP_IRG: 3028 return ((p > a1) && (p < a2)); 3029 case PF_OP_XRG: 3030 return ((p < a1) || (p > a2)); 3031 case PF_OP_RRG: 3032 return ((p >= a1) && (p <= a2)); 3033 case PF_OP_EQ: 3034 return (p == a1); 3035 case PF_OP_NE: 3036 return (p != a1); 3037 case PF_OP_LT: 3038 return (p < a1); 3039 case PF_OP_LE: 3040 return (p <= a1); 3041 case PF_OP_GT: 3042 return (p > a1); 3043 case PF_OP_GE: 3044 return (p >= a1); 3045 } 3046 return (0); /* never reached */ 3047 } 3048 3049 int 3050 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3051 { 3052 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3053 } 3054 3055 int 3056 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3057 { 3058 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3059 return (0); 3060 return (pf_match(op, a1, a2, u)); 3061 } 3062 3063 int 3064 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3065 { 3066 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3067 return (0); 3068 return (pf_match(op, a1, a2, g)); 3069 } 3070 3071 int 3072 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3073 { 3074 if (*tag == -1) 3075 *tag = m->m_pkthdr.pf.tag; 3076 3077 return ((!r->match_tag_not && r->match_tag == *tag) || 3078 (r->match_tag_not && r->match_tag != *tag)); 3079 } 3080 3081 int 3082 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3083 { 3084 struct ifnet *ifp; 3085 struct pfi_kif *kif; 3086 3087 ifp = if_get(m->m_pkthdr.ph_ifidx); 3088 if (ifp == NULL) 3089 return (0); 3090 3091 #if NCARP > 0 3092 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 3093 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 3094 else 3095 #endif /* NCARP */ 3096 kif = (struct pfi_kif *)ifp->if_pf_kif; 3097 3098 if_put(ifp); 3099 3100 if (kif == NULL) { 3101 DPFPRINTF(LOG_ERR, 3102 "%s: kif == NULL, @%d via %s", __func__, 3103 r->nr, r->rcv_ifname); 3104 return (0); 3105 } 3106 3107 return (pfi_kif_match(r->rcv_kif, kif)); 3108 } 3109 3110 void 3111 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3112 { 3113 if (tag > 0) 3114 m->m_pkthdr.pf.tag = tag; 3115 if (rtableid >= 0) 3116 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3117 } 3118 3119 enum pf_test_status 3120 pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) 3121 { 3122 int rv; 3123 3124 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 3125 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 3126 return (PF_TEST_FAIL); 3127 } 3128 3129 ctx->depth++; 3130 3131 if (r->anchor_wildcard) { 3132 struct pf_anchor *child; 3133 rv = PF_TEST_OK; 3134 RB_FOREACH(child, pf_anchor_node, &r->anchor->children) { 3135 rv = pf_match_rule(ctx, &child->ruleset); 3136 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 3137 /* 3138 * we either hit a rule with quick action 3139 * (more likely), or hit some runtime 3140 * error (e.g. pool_get() failure). 3141 */ 3142 break; 3143 } 3144 } 3145 } else { 3146 rv = pf_match_rule(ctx, &r->anchor->ruleset); 3147 /* 3148 * Unless errors occured, stop iff any rule matched 3149 * within quick anchors. 3150 */ 3151 if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && 3152 *ctx->am == r) 3153 rv = PF_TEST_QUICK; 3154 } 3155 3156 ctx->depth--; 3157 3158 return (rv); 3159 } 3160 3161 void 3162 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3163 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3164 { 3165 switch (af) { 3166 case AF_INET: 3167 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3168 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3169 break; 3170 #ifdef INET6 3171 case AF_INET6: 3172 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3173 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3174 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3175 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3176 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3177 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3178 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3179 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3180 break; 3181 #endif /* INET6 */ 3182 default: 3183 unhandled_af(af); 3184 } 3185 } 3186 3187 void 3188 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3189 { 3190 switch (af) { 3191 case AF_INET: 3192 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3193 break; 3194 #ifdef INET6 3195 case AF_INET6: 3196 if (addr->addr32[3] == 0xffffffff) { 3197 addr->addr32[3] = 0; 3198 if (addr->addr32[2] == 0xffffffff) { 3199 addr->addr32[2] = 0; 3200 if (addr->addr32[1] == 0xffffffff) { 3201 addr->addr32[1] = 0; 3202 addr->addr32[0] = 3203 htonl(ntohl(addr->addr32[0]) + 1); 3204 } else 3205 addr->addr32[1] = 3206 htonl(ntohl(addr->addr32[1]) + 1); 3207 } else 3208 addr->addr32[2] = 3209 htonl(ntohl(addr->addr32[2]) + 1); 3210 } else 3211 addr->addr32[3] = 3212 htonl(ntohl(addr->addr32[3]) + 1); 3213 break; 3214 #endif /* INET6 */ 3215 default: 3216 unhandled_af(af); 3217 } 3218 } 3219 3220 int 3221 pf_socket_lookup(struct pf_pdesc *pd) 3222 { 3223 struct pf_addr *saddr, *daddr; 3224 u_int16_t sport, dport; 3225 struct inpcbtable *tb; 3226 struct inpcb *inp; 3227 3228 pd->lookup.uid = -1; 3229 pd->lookup.gid = -1; 3230 pd->lookup.pid = NO_PID; 3231 switch (pd->virtual_proto) { 3232 case IPPROTO_TCP: 3233 sport = pd->hdr.tcp.th_sport; 3234 dport = pd->hdr.tcp.th_dport; 3235 PF_ASSERT_LOCKED(); 3236 NET_ASSERT_LOCKED(); 3237 tb = &tcbtable; 3238 break; 3239 case IPPROTO_UDP: 3240 sport = pd->hdr.udp.uh_sport; 3241 dport = pd->hdr.udp.uh_dport; 3242 PF_ASSERT_LOCKED(); 3243 NET_ASSERT_LOCKED(); 3244 tb = &udbtable; 3245 break; 3246 default: 3247 return (-1); 3248 } 3249 if (pd->dir == PF_IN) { 3250 saddr = pd->src; 3251 daddr = pd->dst; 3252 } else { 3253 u_int16_t p; 3254 3255 p = sport; 3256 sport = dport; 3257 dport = p; 3258 saddr = pd->dst; 3259 daddr = pd->src; 3260 } 3261 switch (pd->af) { 3262 case AF_INET: 3263 /* 3264 * Fails when rtable is changed while evaluating the ruleset 3265 * The socket looked up will not match the one hit in the end. 3266 */ 3267 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 3268 pd->rdomain); 3269 if (inp == NULL) { 3270 inp = in_pcblookup_listen(tb, daddr->v4, dport, 3271 NULL, pd->rdomain); 3272 if (inp == NULL) 3273 return (-1); 3274 } 3275 break; 3276 #ifdef INET6 3277 case AF_INET6: 3278 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 3279 dport, pd->rdomain); 3280 if (inp == NULL) { 3281 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 3282 NULL, pd->rdomain); 3283 if (inp == NULL) 3284 return (-1); 3285 } 3286 break; 3287 #endif /* INET6 */ 3288 default: 3289 unhandled_af(pd->af); 3290 } 3291 pd->lookup.uid = inp->inp_socket->so_euid; 3292 pd->lookup.gid = inp->inp_socket->so_egid; 3293 pd->lookup.pid = inp->inp_socket->so_cpid; 3294 return (1); 3295 } 3296 3297 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 3298 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 3299 * 3300 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 3301 */ 3302 u_int8_t* 3303 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 3304 u_int8_t min_typelen) 3305 { 3306 u_int8_t *eoh = opts + hlen; 3307 3308 if (min_typelen < 2) 3309 return (NULL); 3310 3311 while ((eoh - opt) >= min_typelen) { 3312 switch (*opt) { 3313 case TCPOPT_EOL: 3314 /* FALLTHROUGH - Workaround the failure of some 3315 systems to NOP-pad their bzero'd option buffers, 3316 producing spurious EOLs */ 3317 case TCPOPT_NOP: 3318 opt++; 3319 continue; 3320 default: 3321 if (opt[0] == type && 3322 opt[1] >= min_typelen) 3323 return (opt); 3324 } 3325 3326 opt += MAX(opt[1], 2); /* evade infinite loops */ 3327 } 3328 3329 return (NULL); 3330 } 3331 3332 u_int8_t 3333 pf_get_wscale(struct pf_pdesc *pd) 3334 { 3335 int olen; 3336 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3337 u_int8_t wscale = 0; 3338 3339 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3340 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 3341 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3342 return (0); 3343 3344 opt = opts; 3345 while ((opt = pf_find_tcpopt(opt, opts, olen, 3346 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 3347 wscale = opt[2]; 3348 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 3349 wscale |= PF_WSCALE_FLAG; 3350 3351 opt += opt[1]; 3352 } 3353 3354 return (wscale); 3355 } 3356 3357 u_int16_t 3358 pf_get_mss(struct pf_pdesc *pd) 3359 { 3360 int olen; 3361 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3362 u_int16_t mss = tcp_mssdflt; 3363 3364 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3365 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 3366 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3367 return (0); 3368 3369 opt = opts; 3370 while ((opt = pf_find_tcpopt(opt, opts, olen, 3371 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 3372 memcpy(&mss, (opt + 2), 2); 3373 mss = ntohs(mss); 3374 3375 opt += opt[1]; 3376 } 3377 return (mss); 3378 } 3379 3380 u_int16_t 3381 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3382 { 3383 struct ifnet *ifp; 3384 struct sockaddr_in *dst; 3385 #ifdef INET6 3386 struct sockaddr_in6 *dst6; 3387 #endif /* INET6 */ 3388 struct rtentry *rt = NULL; 3389 struct sockaddr_storage ss; 3390 int hlen; 3391 u_int16_t mss = tcp_mssdflt; 3392 3393 memset(&ss, 0, sizeof(ss)); 3394 3395 switch (af) { 3396 case AF_INET: 3397 hlen = sizeof(struct ip); 3398 dst = (struct sockaddr_in *)&ss; 3399 dst->sin_family = AF_INET; 3400 dst->sin_len = sizeof(*dst); 3401 dst->sin_addr = addr->v4; 3402 rt = rtalloc(sintosa(dst), 0, rtableid); 3403 break; 3404 #ifdef INET6 3405 case AF_INET6: 3406 hlen = sizeof(struct ip6_hdr); 3407 dst6 = (struct sockaddr_in6 *)&ss; 3408 dst6->sin6_family = AF_INET6; 3409 dst6->sin6_len = sizeof(*dst6); 3410 dst6->sin6_addr = addr->v6; 3411 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3412 break; 3413 #endif /* INET6 */ 3414 } 3415 3416 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3417 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3418 mss = max(tcp_mssdflt, mss); 3419 if_put(ifp); 3420 } 3421 rtfree(rt); 3422 mss = min(mss, offer); 3423 mss = max(mss, 64); /* sanity - at least max opt space */ 3424 return (mss); 3425 } 3426 3427 static __inline int 3428 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af, 3429 struct pf_src_node **sns) 3430 { 3431 struct pf_rule *r = s->rule.ptr; 3432 int rv; 3433 3434 s->rt_kif = NULL; 3435 if (!r->rt) 3436 return (0); 3437 3438 switch (af) { 3439 case AF_INET: 3440 rv = pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, sns, 3441 &r->route, PF_SN_ROUTE); 3442 break; 3443 #ifdef INET6 3444 case AF_INET6: 3445 rv = pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, sns, 3446 &r->route, PF_SN_ROUTE); 3447 break; 3448 #endif /* INET6 */ 3449 default: 3450 rv = 1; 3451 } 3452 3453 if (rv == 0) { 3454 s->rt_kif = r->route.kif; 3455 s->natrule.ptr = r; 3456 } 3457 3458 return (rv); 3459 } 3460 3461 u_int32_t 3462 pf_tcp_iss(struct pf_pdesc *pd) 3463 { 3464 SHA2_CTX ctx; 3465 union { 3466 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3467 uint32_t words[1]; 3468 } digest; 3469 3470 if (pf_tcp_secret_init == 0) { 3471 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3472 SHA512Init(&pf_tcp_secret_ctx); 3473 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3474 sizeof(pf_tcp_secret)); 3475 pf_tcp_secret_init = 1; 3476 } 3477 ctx = pf_tcp_secret_ctx; 3478 3479 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3480 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 3481 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 3482 switch (pd->af) { 3483 case AF_INET: 3484 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3485 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3486 break; 3487 #ifdef INET6 3488 case AF_INET6: 3489 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3490 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3491 break; 3492 #endif /* INET6 */ 3493 } 3494 SHA512Final(digest.bytes, &ctx); 3495 pf_tcp_iss_off += 4096; 3496 return (digest.words[0] + tcp_iss + pf_tcp_iss_off); 3497 } 3498 3499 void 3500 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3501 { 3502 if (r->qid) 3503 a->qid = r->qid; 3504 if (r->pqid) 3505 a->pqid = r->pqid; 3506 if (r->rtableid >= 0) 3507 a->rtableid = r->rtableid; 3508 #if NPFLOG > 0 3509 a->log |= r->log; 3510 #endif /* NPFLOG > 0 */ 3511 if (r->scrub_flags & PFSTATE_SETTOS) 3512 a->set_tos = r->set_tos; 3513 if (r->min_ttl) 3514 a->min_ttl = r->min_ttl; 3515 if (r->max_mss) 3516 a->max_mss = r->max_mss; 3517 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3518 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3519 if (r->scrub_flags & PFSTATE_SETPRIO) { 3520 a->set_prio[0] = r->set_prio[0]; 3521 a->set_prio[1] = r->set_prio[1]; 3522 } 3523 if (r->rule_flag & PFRULE_SETDELAY) 3524 a->delay = r->delay; 3525 } 3526 3527 #define PF_TEST_ATTRIB(t, a) \ 3528 if (t) { \ 3529 r = a; \ 3530 continue; \ 3531 } else do { \ 3532 } while (0) 3533 3534 enum pf_test_status 3535 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 3536 { 3537 struct pf_rule *r; 3538 struct pf_rule *save_a; 3539 struct pf_ruleset *save_aruleset; 3540 3541 r = TAILQ_FIRST(ruleset->rules.active.ptr); 3542 while (r != NULL) { 3543 r->evaluations++; 3544 PF_TEST_ATTRIB( 3545 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 3546 r->skip[PF_SKIP_IFP].ptr); 3547 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 3548 r->skip[PF_SKIP_DIR].ptr); 3549 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3550 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 3551 r->skip[PF_SKIP_RDOM].ptr); 3552 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 3553 r->skip[PF_SKIP_AF].ptr); 3554 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 3555 r->skip[PF_SKIP_PROTO].ptr); 3556 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 3557 ctx->pd->naf, r->src.neg, ctx->pd->kif, 3558 ctx->act.rtableid)), 3559 r->skip[PF_SKIP_SRC_ADDR].ptr); 3560 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 3561 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 3562 r->skip[PF_SKIP_DST_ADDR].ptr); 3563 3564 switch (ctx->pd->virtual_proto) { 3565 case PF_VPROTO_FRAGMENT: 3566 /* tcp/udp only. port_op always 0 in other cases */ 3567 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3568 TAILQ_NEXT(r, entries)); 3569 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 3570 r->flagset), 3571 TAILQ_NEXT(r, entries)); 3572 /* icmp only. type/code always 0 in other cases */ 3573 PF_TEST_ATTRIB((r->type || r->code), 3574 TAILQ_NEXT(r, entries)); 3575 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3576 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3577 TAILQ_NEXT(r, entries)); 3578 break; 3579 3580 case IPPROTO_TCP: 3581 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 3582 r->flags), 3583 TAILQ_NEXT(r, entries)); 3584 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3585 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 3586 r->os_fingerprint)), 3587 TAILQ_NEXT(r, entries)); 3588 /* FALLTHROUGH */ 3589 3590 case IPPROTO_UDP: 3591 /* tcp/udp only. port_op always 0 in other cases */ 3592 PF_TEST_ATTRIB((r->src.port_op && 3593 !pf_match_port(r->src.port_op, r->src.port[0], 3594 r->src.port[1], ctx->pd->nsport)), 3595 r->skip[PF_SKIP_SRC_PORT].ptr); 3596 PF_TEST_ATTRIB((r->dst.port_op && 3597 !pf_match_port(r->dst.port_op, r->dst.port[0], 3598 r->dst.port[1], ctx->pd->ndport)), 3599 r->skip[PF_SKIP_DST_PORT].ptr); 3600 /* tcp/udp only. uid.op always 0 in other cases */ 3601 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 3602 (ctx->pd->lookup.done = 3603 pf_socket_lookup(ctx->pd), 1)) && 3604 !pf_match_uid(r->uid.op, r->uid.uid[0], 3605 r->uid.uid[1], ctx->pd->lookup.uid)), 3606 TAILQ_NEXT(r, entries)); 3607 /* tcp/udp only. gid.op always 0 in other cases */ 3608 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 3609 (ctx->pd->lookup.done = 3610 pf_socket_lookup(ctx->pd), 1)) && 3611 !pf_match_gid(r->gid.op, r->gid.gid[0], 3612 r->gid.gid[1], ctx->pd->lookup.gid)), 3613 TAILQ_NEXT(r, entries)); 3614 break; 3615 3616 case IPPROTO_ICMP: 3617 case IPPROTO_ICMPV6: 3618 /* icmp only. type always 0 in other cases */ 3619 PF_TEST_ATTRIB((r->type && 3620 r->type != ctx->icmptype + 1), 3621 TAILQ_NEXT(r, entries)); 3622 /* icmp only. type always 0 in other cases */ 3623 PF_TEST_ATTRIB((r->code && 3624 r->code != ctx->icmpcode + 1), 3625 TAILQ_NEXT(r, entries)); 3626 /* icmp only. don't create states on replies */ 3627 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 3628 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 3629 ctx->icmp_dir != PF_IN), 3630 TAILQ_NEXT(r, entries)); 3631 break; 3632 3633 default: 3634 break; 3635 } 3636 3637 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3638 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 3639 TAILQ_NEXT(r, entries)); 3640 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 3641 TAILQ_NEXT(r, entries)); 3642 PF_TEST_ATTRIB((r->prob && 3643 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3644 TAILQ_NEXT(r, entries)); 3645 PF_TEST_ATTRIB((r->match_tag && 3646 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 3647 TAILQ_NEXT(r, entries)); 3648 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 3649 r->rcvifnot), 3650 TAILQ_NEXT(r, entries)); 3651 PF_TEST_ATTRIB((r->prio && 3652 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 3653 ctx->pd->m->m_pkthdr.pf.prio), 3654 TAILQ_NEXT(r, entries)); 3655 3656 /* must be last! */ 3657 if (r->pktrate.limit) { 3658 pf_add_threshold(&r->pktrate); 3659 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 3660 TAILQ_NEXT(r, entries)); 3661 } 3662 3663 /* FALLTHROUGH */ 3664 if (r->tag) 3665 ctx->tag = r->tag; 3666 if (r->anchor == NULL) { 3667 if (r->action == PF_MATCH) { 3668 if ((ctx->ri = pool_get(&pf_rule_item_pl, 3669 PR_NOWAIT)) == NULL) { 3670 REASON_SET(&ctx->reason, PFRES_MEMORY); 3671 ctx->test_status = PF_TEST_FAIL; 3672 break; 3673 } 3674 ctx->ri->r = r; 3675 /* order is irrelevant */ 3676 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 3677 ctx->ri = NULL; 3678 pf_rule_to_actions(r, &ctx->act); 3679 if (r->rule_flag & PFRULE_AFTO) 3680 ctx->pd->naf = r->naf; 3681 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 3682 &ctx->nr) == -1) { 3683 REASON_SET(&ctx->reason, 3684 PFRES_TRANSLATE); 3685 ctx->test_status = PF_TEST_FAIL; 3686 break; 3687 } 3688 #if NPFLOG > 0 3689 if (r->log) { 3690 REASON_SET(&ctx->reason, PFRES_MATCH); 3691 pflog_packet(ctx->pd, ctx->reason, r, 3692 ctx->a, ruleset, NULL); 3693 } 3694 #endif /* NPFLOG > 0 */ 3695 } else { 3696 /* 3697 * found matching r 3698 */ 3699 *ctx->rm = r; 3700 /* 3701 * anchor, with ruleset, where r belongs to 3702 */ 3703 *ctx->am = ctx->a; 3704 /* 3705 * ruleset where r belongs to 3706 */ 3707 *ctx->rsm = ruleset; 3708 /* 3709 * ruleset, where anchor belongs to. 3710 */ 3711 ctx->arsm = ctx->aruleset; 3712 } 3713 3714 #if NPFLOG > 0 3715 if (ctx->act.log & PF_LOG_MATCHES) 3716 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 3717 &ctx->rules); 3718 #endif /* NPFLOG > 0 */ 3719 3720 if (r->quick) { 3721 ctx->test_status = PF_TEST_QUICK; 3722 break; 3723 } 3724 } else { 3725 save_a = ctx->a; 3726 save_aruleset = ctx->aruleset; 3727 ctx->a = r; /* remember anchor */ 3728 ctx->aruleset = ruleset; /* and its ruleset */ 3729 /* 3730 * Note: we don't need to restore if we are not going 3731 * to continue with ruleset evaluation. 3732 */ 3733 if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) 3734 break; 3735 ctx->a = save_a; 3736 ctx->aruleset = save_aruleset; 3737 } 3738 r = TAILQ_NEXT(r, entries); 3739 } 3740 3741 return (ctx->test_status); 3742 } 3743 3744 int 3745 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3746 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason) 3747 { 3748 struct pf_rule *r = NULL; 3749 struct pf_rule *a = NULL; 3750 struct pf_ruleset *ruleset = NULL; 3751 struct pf_state_key *skw = NULL, *sks = NULL; 3752 int rewrite = 0; 3753 u_int16_t virtual_type, virtual_id; 3754 int action = PF_DROP; 3755 struct pf_test_ctx ctx; 3756 int rv; 3757 3758 memset(&ctx, 0, sizeof(ctx)); 3759 ctx.pd = pd; 3760 ctx.rm = rm; 3761 ctx.am = am; 3762 ctx.rsm = rsm; 3763 ctx.th = &pd->hdr.tcp; 3764 ctx.act.rtableid = pd->rdomain; 3765 ctx.tag = -1; 3766 SLIST_INIT(&ctx.rules); 3767 3768 if (pd->dir == PF_IN && if_congested()) { 3769 REASON_SET(&ctx.reason, PFRES_CONGEST); 3770 return (PF_DROP); 3771 } 3772 3773 switch (pd->virtual_proto) { 3774 case IPPROTO_ICMP: 3775 ctx.icmptype = pd->hdr.icmp.icmp_type; 3776 ctx.icmpcode = pd->hdr.icmp.icmp_code; 3777 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3778 &ctx.icmp_dir, &virtual_id, &virtual_type); 3779 if (ctx.icmp_dir == PF_IN) { 3780 pd->osport = pd->nsport = virtual_id; 3781 pd->odport = pd->ndport = virtual_type; 3782 } else { 3783 pd->osport = pd->nsport = virtual_type; 3784 pd->odport = pd->ndport = virtual_id; 3785 } 3786 break; 3787 #ifdef INET6 3788 case IPPROTO_ICMPV6: 3789 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 3790 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 3791 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3792 &ctx.icmp_dir, &virtual_id, &virtual_type); 3793 if (ctx.icmp_dir == PF_IN) { 3794 pd->osport = pd->nsport = virtual_id; 3795 pd->odport = pd->ndport = virtual_type; 3796 } else { 3797 pd->osport = pd->nsport = virtual_type; 3798 pd->odport = pd->ndport = virtual_id; 3799 } 3800 break; 3801 #endif /* INET6 */ 3802 } 3803 3804 ruleset = &pf_main_ruleset; 3805 rv = pf_match_rule(&ctx, ruleset); 3806 if (rv == PF_TEST_FAIL) { 3807 /* 3808 * Reason has been set in pf_match_rule() already. 3809 */ 3810 goto cleanup; 3811 } 3812 3813 r = *ctx.rm; /* matching rule */ 3814 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 3815 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 3816 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 3817 3818 /* apply actions for last matching pass/block rule */ 3819 pf_rule_to_actions(r, &ctx.act); 3820 if (r->rule_flag & PFRULE_AFTO) 3821 pd->naf = r->naf; 3822 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 3823 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 3824 goto cleanup; 3825 } 3826 REASON_SET(&ctx.reason, PFRES_MATCH); 3827 3828 #if NPFLOG > 0 3829 if (r->log) 3830 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 3831 if (ctx.act.log & PF_LOG_MATCHES) 3832 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 3833 #endif /* NPFLOG > 0 */ 3834 3835 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3836 (r->action == PF_DROP) && 3837 ((r->rule_flag & PFRULE_RETURNRST) || 3838 (r->rule_flag & PFRULE_RETURNICMP) || 3839 (r->rule_flag & PFRULE_RETURN))) { 3840 if (pd->proto == IPPROTO_TCP && 3841 ((r->rule_flag & PFRULE_RETURNRST) || 3842 (r->rule_flag & PFRULE_RETURN)) && 3843 !(ctx.th->th_flags & TH_RST)) { 3844 u_int32_t ack = 3845 ntohl(ctx.th->th_seq) + pd->p_len; 3846 3847 if (pf_check_tcp_cksum(pd->m, pd->off, 3848 pd->tot_len - pd->off, pd->af)) 3849 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 3850 else { 3851 if (ctx.th->th_flags & TH_SYN) 3852 ack++; 3853 if (ctx.th->th_flags & TH_FIN) 3854 ack++; 3855 pf_send_tcp(r, pd->af, pd->dst, 3856 pd->src, ctx.th->th_dport, 3857 ctx.th->th_sport, ntohl(ctx.th->th_ack), 3858 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 3859 1, 0, pd->rdomain); 3860 } 3861 } else if ((pd->proto != IPPROTO_ICMP || 3862 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 3863 r->return_icmp) 3864 pf_send_icmp(pd->m, r->return_icmp >> 8, 3865 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 3866 else if ((pd->proto != IPPROTO_ICMPV6 || 3867 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 3868 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3869 r->return_icmp6) 3870 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3871 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 3872 } 3873 3874 if (r->action == PF_DROP) 3875 goto cleanup; 3876 3877 /* 3878 * If an expired "once" rule has not been purged, drop any new matching 3879 * packets. 3880 */ 3881 if (r->rule_flag & PFRULE_EXPIRED) 3882 goto cleanup; 3883 3884 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 3885 if (ctx.act.rtableid >= 0 && 3886 rtable_l2(ctx.act.rtableid) != pd->rdomain) 3887 pd->destchg = 1; 3888 3889 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3890 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 3891 #if NPFLOG > 0 3892 pd->pflog |= PF_LOG_FORCE; 3893 #endif /* NPFLOG > 0 */ 3894 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3895 "ip/ipv6 options in pf_test_rule()"); 3896 goto cleanup; 3897 } 3898 3899 action = PF_PASS; 3900 3901 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3902 && !ctx.state_icmp && r->keep_state) { 3903 3904 if (r->rule_flag & PFRULE_SRCTRACK && 3905 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 3906 pd->af, pd->src, NULL, NULL) != 0) { 3907 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 3908 goto cleanup; 3909 } 3910 3911 if (r->max_states && (r->states_cur >= r->max_states)) { 3912 pf_status.lcounters[LCNT_STATES]++; 3913 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 3914 goto cleanup; 3915 } 3916 3917 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 3918 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); 3919 3920 if (action != PF_PASS) 3921 goto cleanup; 3922 if (sks != skw) { 3923 struct pf_state_key *sk; 3924 3925 if (pd->dir == PF_IN) 3926 sk = sks; 3927 else 3928 sk = skw; 3929 rewrite += pf_translate(pd, 3930 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 3931 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 3932 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 3933 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 3934 virtual_type, ctx.icmp_dir); 3935 } 3936 3937 #ifdef INET6 3938 if (rewrite && skw->af != sks->af) 3939 action = PF_AFRT; 3940 #endif /* INET6 */ 3941 3942 } else { 3943 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 3944 SLIST_REMOVE_HEAD(&ctx.rules, entry); 3945 pool_put(&pf_rule_item_pl, ctx.ri); 3946 } 3947 } 3948 3949 /* copy back packet headers if needed */ 3950 if (rewrite && pd->hdrlen) { 3951 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 3952 } 3953 3954 #if NPFSYNC > 0 3955 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 3956 pd->dir == PF_OUT && pfsync_up()) { 3957 /* 3958 * We want the state created, but we dont 3959 * want to send this in case a partner 3960 * firewall has to know about it to allow 3961 * replies through it. 3962 */ 3963 if (pfsync_defer(*sm, pd->m)) 3964 return (PF_DEFER); 3965 } 3966 #endif /* NPFSYNC > 0 */ 3967 3968 if (r->rule_flag & PFRULE_ONCE) { 3969 u_int32_t rule_flag; 3970 3971 /* 3972 * Use atomic_cas() to determine a clear winner, which will 3973 * insert an expired rule to gcl. 3974 */ 3975 rule_flag = r->rule_flag; 3976 if (((rule_flag & PFRULE_EXPIRED) == 0) && 3977 atomic_cas_uint(&r->rule_flag, rule_flag, 3978 rule_flag | PFRULE_EXPIRED) == rule_flag) { 3979 r->exptime = gettime(); 3980 SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle); 3981 } 3982 } 3983 3984 return (action); 3985 3986 cleanup: 3987 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 3988 SLIST_REMOVE_HEAD(&ctx.rules, entry); 3989 pool_put(&pf_rule_item_pl, ctx.ri); 3990 } 3991 3992 return (action); 3993 } 3994 3995 static __inline int 3996 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 3997 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 3998 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 3999 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 4000 { 4001 struct pf_state *s = NULL; 4002 struct tcphdr *th = &pd->hdr.tcp; 4003 u_int16_t mss = tcp_mssdflt; 4004 u_short reason; 4005 u_int i; 4006 4007 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 4008 if (s == NULL) { 4009 REASON_SET(&reason, PFRES_MEMORY); 4010 goto csfailed; 4011 } 4012 s->rule.ptr = r; 4013 s->anchor.ptr = a; 4014 s->natrule.ptr = nr; 4015 if (r->allow_opts) 4016 s->state_flags |= PFSTATE_ALLOWOPTS; 4017 if (r->rule_flag & PFRULE_STATESLOPPY) 4018 s->state_flags |= PFSTATE_SLOPPY; 4019 if (r->rule_flag & PFRULE_PFLOW) 4020 s->state_flags |= PFSTATE_PFLOW; 4021 #if NPFLOG > 0 4022 s->log = act->log & PF_LOG_ALL; 4023 #endif /* NPFLOG > 0 */ 4024 s->qid = act->qid; 4025 s->pqid = act->pqid; 4026 s->rtableid[pd->didx] = act->rtableid; 4027 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 4028 s->min_ttl = act->min_ttl; 4029 s->set_tos = act->set_tos; 4030 s->max_mss = act->max_mss; 4031 s->state_flags |= act->flags; 4032 #if NPFSYNC > 0 4033 s->sync_state = PFSYNC_S_NONE; 4034 #endif /* NPFSYNC > 0 */ 4035 s->set_prio[0] = act->set_prio[0]; 4036 s->set_prio[1] = act->set_prio[1]; 4037 s->delay = act->delay; 4038 SLIST_INIT(&s->src_nodes); 4039 /* 4040 * must initialize refcnt, before pf_state_insert() gets called. 4041 * pf_state_inserts() grabs reference for pfsync! 4042 */ 4043 refcnt_init(&s->refcnt); 4044 4045 switch (pd->proto) { 4046 case IPPROTO_TCP: 4047 s->src.seqlo = ntohl(th->th_seq); 4048 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4049 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4050 r->keep_state == PF_STATE_MODULATE) { 4051 /* Generate sequence number modulator */ 4052 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4053 0) 4054 s->src.seqdiff = 1; 4055 pf_patch_32(pd, 4056 &th->th_seq, htonl(s->src.seqlo + s->src.seqdiff)); 4057 *rewrite = 1; 4058 } else 4059 s->src.seqdiff = 0; 4060 if (th->th_flags & TH_SYN) { 4061 s->src.seqhi++; 4062 s->src.wscale = pf_get_wscale(pd); 4063 } 4064 s->src.max_win = MAX(ntohs(th->th_win), 1); 4065 if (s->src.wscale & PF_WSCALE_MASK) { 4066 /* Remove scale factor from initial window */ 4067 int win = s->src.max_win; 4068 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4069 s->src.max_win = (win - 1) >> 4070 (s->src.wscale & PF_WSCALE_MASK); 4071 } 4072 if (th->th_flags & TH_FIN) 4073 s->src.seqhi++; 4074 s->dst.seqhi = 1; 4075 s->dst.max_win = 1; 4076 pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT); 4077 pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED); 4078 s->timeout = PFTM_TCP_FIRST_PACKET; 4079 pf_status.states_halfopen++; 4080 break; 4081 case IPPROTO_UDP: 4082 pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE); 4083 pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 4084 s->timeout = PFTM_UDP_FIRST_PACKET; 4085 break; 4086 case IPPROTO_ICMP: 4087 #ifdef INET6 4088 case IPPROTO_ICMPV6: 4089 #endif /* INET6 */ 4090 s->timeout = PFTM_ICMP_FIRST_PACKET; 4091 break; 4092 default: 4093 pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE); 4094 pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 4095 s->timeout = PFTM_OTHER_FIRST_PACKET; 4096 } 4097 4098 s->creation = getuptime(); 4099 s->expire = getuptime(); 4100 4101 if (pd->proto == IPPROTO_TCP) { 4102 if (s->state_flags & PFSTATE_SCRUB_TCP && 4103 pf_normalize_tcp_init(pd, &s->src)) { 4104 REASON_SET(&reason, PFRES_MEMORY); 4105 goto csfailed; 4106 } 4107 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 4108 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 4109 rewrite)) { 4110 /* This really shouldn't happen!!! */ 4111 DPFPRINTF(LOG_ERR, 4112 "%s: tcp normalize failed on first pkt", __func__); 4113 goto csfailed; 4114 } 4115 } 4116 s->direction = pd->dir; 4117 4118 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 4119 REASON_SET(&reason, PFRES_MEMORY); 4120 goto csfailed; 4121 } 4122 4123 if (pf_set_rt_ifp(s, pd->src, (*skw)->af, sns) != 0) { 4124 REASON_SET(&reason, PFRES_NOROUTE); 4125 goto csfailed; 4126 } 4127 4128 for (i = 0; i < PF_SN_MAX; i++) 4129 if (sns[i] != NULL) { 4130 struct pf_sn_item *sni; 4131 4132 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 4133 if (sni == NULL) { 4134 REASON_SET(&reason, PFRES_MEMORY); 4135 goto csfailed; 4136 } 4137 sni->sn = sns[i]; 4138 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 4139 sni->sn->states++; 4140 } 4141 4142 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { 4143 pf_detach_state(s); 4144 *sks = *skw = NULL; 4145 REASON_SET(&reason, PFRES_STATEINS); 4146 goto csfailed; 4147 } else 4148 *sm = s; 4149 4150 /* 4151 * Make state responsible for rules it binds here. 4152 */ 4153 memcpy(&s->match_rules, rules, sizeof(s->match_rules)); 4154 memset(rules, 0, sizeof(*rules)); 4155 STATE_INC_COUNTERS(s); 4156 4157 if (tag > 0) { 4158 pf_tag_ref(tag); 4159 s->tag = tag; 4160 } 4161 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4162 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 4163 int rtid = pd->rdomain; 4164 if (act->rtableid >= 0) 4165 rtid = act->rtableid; 4166 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 4167 s->src.seqhi = arc4random(); 4168 /* Find mss option */ 4169 mss = pf_get_mss(pd); 4170 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 4171 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 4172 s->src.mss = mss; 4173 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4174 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4175 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); 4176 REASON_SET(&reason, PFRES_SYNPROXY); 4177 return (PF_SYNPROXY_DROP); 4178 } 4179 4180 return (PF_PASS); 4181 4182 csfailed: 4183 if (s) { 4184 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 4185 pf_src_tree_remove_state(s); 4186 pool_put(&pf_state_pl, s); 4187 } 4188 4189 for (i = 0; i < PF_SN_MAX; i++) 4190 if (sns[i] != NULL) 4191 pf_remove_src_node(sns[i]); 4192 4193 return (PF_DROP); 4194 } 4195 4196 int 4197 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4198 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4199 int icmp_dir) 4200 { 4201 /* 4202 * when called from bpf_mtap_pflog, there are extra constraints: 4203 * -mbuf is faked, m_data is the bpf buffer 4204 * -pd is not fully set up 4205 */ 4206 int rewrite = 0; 4207 int afto = pd->af != pd->naf; 4208 4209 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4210 pd->destchg = 1; 4211 4212 switch (pd->proto) { 4213 case IPPROTO_TCP: /* FALLTHROUGH */ 4214 case IPPROTO_UDP: 4215 rewrite += pf_patch_16(pd, pd->sport, sport); 4216 rewrite += pf_patch_16(pd, pd->dport, dport); 4217 break; 4218 4219 case IPPROTO_ICMP: 4220 /* pf_translate() is also used when logging invalid packets */ 4221 if (pd->af != AF_INET) 4222 return (0); 4223 4224 if (afto) { 4225 #ifdef INET6 4226 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 4227 return (0); 4228 pd->proto = IPPROTO_ICMPV6; 4229 rewrite = 1; 4230 #endif /* INET6 */ 4231 } 4232 if (virtual_type == htons(ICMP_ECHO)) { 4233 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4234 rewrite += pf_patch_16(pd, 4235 &pd->hdr.icmp.icmp_id, icmpid); 4236 } 4237 break; 4238 4239 #ifdef INET6 4240 case IPPROTO_ICMPV6: 4241 /* pf_translate() is also used when logging invalid packets */ 4242 if (pd->af != AF_INET6) 4243 return (0); 4244 4245 if (afto) { 4246 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 4247 return (0); 4248 pd->proto = IPPROTO_ICMP; 4249 rewrite = 1; 4250 } 4251 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4252 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4253 rewrite += pf_patch_16(pd, 4254 &pd->hdr.icmp6.icmp6_id, icmpid); 4255 } 4256 break; 4257 #endif /* INET6 */ 4258 } 4259 4260 if (!afto) { 4261 rewrite += pf_translate_a(pd, pd->src, saddr); 4262 rewrite += pf_translate_a(pd, pd->dst, daddr); 4263 } 4264 4265 return (rewrite); 4266 } 4267 4268 int 4269 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4270 int *copyback, int reverse) 4271 { 4272 struct tcphdr *th = &pd->hdr.tcp; 4273 struct pf_state_peer *src, *dst; 4274 u_int16_t win = ntohs(th->th_win); 4275 u_int32_t ack, end, data_end, seq, orig_seq; 4276 u_int8_t sws, dws, psrc, pdst; 4277 int ackskew; 4278 4279 if ((pd->dir == (*state)->direction && !reverse) || 4280 (pd->dir != (*state)->direction && reverse)) { 4281 src = &(*state)->src; 4282 dst = &(*state)->dst; 4283 psrc = PF_PEER_SRC; 4284 pdst = PF_PEER_DST; 4285 } else { 4286 src = &(*state)->dst; 4287 dst = &(*state)->src; 4288 psrc = PF_PEER_DST; 4289 pdst = PF_PEER_SRC; 4290 } 4291 4292 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4293 sws = src->wscale & PF_WSCALE_MASK; 4294 dws = dst->wscale & PF_WSCALE_MASK; 4295 } else 4296 sws = dws = 0; 4297 4298 /* 4299 * Sequence tracking algorithm from Guido van Rooij's paper: 4300 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4301 * tcp_filtering.ps 4302 */ 4303 4304 orig_seq = seq = ntohl(th->th_seq); 4305 if (src->seqlo == 0) { 4306 /* First packet from this end. Set its state */ 4307 4308 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4309 src->scrub == NULL) { 4310 if (pf_normalize_tcp_init(pd, src)) { 4311 REASON_SET(reason, PFRES_MEMORY); 4312 return (PF_DROP); 4313 } 4314 } 4315 4316 /* Deferred generation of sequence number modulator */ 4317 if (dst->seqdiff && !src->seqdiff) { 4318 /* use random iss for the TCP server */ 4319 while ((src->seqdiff = arc4random() - seq) == 0) 4320 continue; 4321 ack = ntohl(th->th_ack) - dst->seqdiff; 4322 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4323 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4324 *copyback = 1; 4325 } else { 4326 ack = ntohl(th->th_ack); 4327 } 4328 4329 end = seq + pd->p_len; 4330 if (th->th_flags & TH_SYN) { 4331 end++; 4332 if (dst->wscale & PF_WSCALE_FLAG) { 4333 src->wscale = pf_get_wscale(pd); 4334 if (src->wscale & PF_WSCALE_FLAG) { 4335 /* Remove scale factor from initial 4336 * window */ 4337 sws = src->wscale & PF_WSCALE_MASK; 4338 win = ((u_int32_t)win + (1 << sws) - 1) 4339 >> sws; 4340 dws = dst->wscale & PF_WSCALE_MASK; 4341 } else { 4342 /* fixup other window */ 4343 dst->max_win = MIN(TCP_MAXWIN, 4344 (u_int32_t)dst->max_win << 4345 (dst->wscale & PF_WSCALE_MASK)); 4346 /* in case of a retrans SYN|ACK */ 4347 dst->wscale = 0; 4348 } 4349 } 4350 } 4351 data_end = end; 4352 if (th->th_flags & TH_FIN) 4353 end++; 4354 4355 src->seqlo = seq; 4356 if (src->state < TCPS_SYN_SENT) 4357 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4358 4359 /* 4360 * May need to slide the window (seqhi may have been set by 4361 * the crappy stack check or if we picked up the connection 4362 * after establishment) 4363 */ 4364 if (src->seqhi == 1 || 4365 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4366 src->seqhi = end + MAX(1, dst->max_win << dws); 4367 if (win > src->max_win) 4368 src->max_win = win; 4369 4370 } else { 4371 ack = ntohl(th->th_ack) - dst->seqdiff; 4372 if (src->seqdiff) { 4373 /* Modulate sequence numbers */ 4374 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4375 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4376 *copyback = 1; 4377 } 4378 end = seq + pd->p_len; 4379 if (th->th_flags & TH_SYN) 4380 end++; 4381 data_end = end; 4382 if (th->th_flags & TH_FIN) 4383 end++; 4384 } 4385 4386 if ((th->th_flags & TH_ACK) == 0) { 4387 /* Let it pass through the ack skew check */ 4388 ack = dst->seqlo; 4389 } else if ((ack == 0 && 4390 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4391 /* broken tcp stacks do not set ack */ 4392 (dst->state < TCPS_SYN_SENT)) { 4393 /* 4394 * Many stacks (ours included) will set the ACK number in an 4395 * FIN|ACK if the SYN times out -- no sequence to ACK. 4396 */ 4397 ack = dst->seqlo; 4398 } 4399 4400 if (seq == end) { 4401 /* Ease sequencing restrictions on no data packets */ 4402 seq = src->seqlo; 4403 data_end = end = seq; 4404 } 4405 4406 ackskew = dst->seqlo - ack; 4407 4408 4409 /* 4410 * Need to demodulate the sequence numbers in any TCP SACK options 4411 * (Selective ACK). We could optionally validate the SACK values 4412 * against the current ACK window, either forwards or backwards, but 4413 * I'm not confident that SACK has been implemented properly 4414 * everywhere. It wouldn't surprise me if several stacks accidently 4415 * SACK too far backwards of previously ACKed data. There really aren't 4416 * any security implications of bad SACKing unless the target stack 4417 * doesn't validate the option length correctly. Someone trying to 4418 * spoof into a TCP connection won't bother blindly sending SACK 4419 * options anyway. 4420 */ 4421 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4422 if (pf_modulate_sack(pd, dst)) 4423 *copyback = 1; 4424 } 4425 4426 4427 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4428 if (SEQ_GEQ(src->seqhi, data_end) && 4429 /* Last octet inside other's window space */ 4430 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4431 /* Retrans: not more than one window back */ 4432 (ackskew >= -MAXACKWINDOW) && 4433 /* Acking not more than one reassembled fragment backwards */ 4434 (ackskew <= (MAXACKWINDOW << sws)) && 4435 /* Acking not more than one window forward */ 4436 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4437 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4438 /* Require an exact/+1 sequence match on resets when possible */ 4439 4440 if (dst->scrub || src->scrub) { 4441 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4442 dst, copyback)) 4443 return (PF_DROP); 4444 } 4445 4446 /* update max window */ 4447 if (src->max_win < win) 4448 src->max_win = win; 4449 /* synchronize sequencing */ 4450 if (SEQ_GT(end, src->seqlo)) 4451 src->seqlo = end; 4452 /* slide the window of what the other end can send */ 4453 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4454 dst->seqhi = ack + MAX((win << sws), 1); 4455 4456 /* update states */ 4457 if (th->th_flags & TH_SYN) 4458 if (src->state < TCPS_SYN_SENT) 4459 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4460 if (th->th_flags & TH_FIN) 4461 if (src->state < TCPS_CLOSING) 4462 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4463 if (th->th_flags & TH_ACK) { 4464 if (dst->state == TCPS_SYN_SENT) { 4465 pf_set_protostate(*state, pdst, 4466 TCPS_ESTABLISHED); 4467 if (src->state == TCPS_ESTABLISHED && 4468 !SLIST_EMPTY(&(*state)->src_nodes) && 4469 pf_src_connlimit(state)) { 4470 REASON_SET(reason, PFRES_SRCLIMIT); 4471 return (PF_DROP); 4472 } 4473 } else if (dst->state == TCPS_CLOSING) 4474 pf_set_protostate(*state, pdst, 4475 TCPS_FIN_WAIT_2); 4476 } 4477 if (th->th_flags & TH_RST) 4478 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4479 4480 /* update expire time */ 4481 (*state)->expire = getuptime(); 4482 if (src->state >= TCPS_FIN_WAIT_2 && 4483 dst->state >= TCPS_FIN_WAIT_2) 4484 (*state)->timeout = PFTM_TCP_CLOSED; 4485 else if (src->state >= TCPS_CLOSING && 4486 dst->state >= TCPS_CLOSING) 4487 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4488 else if (src->state < TCPS_ESTABLISHED || 4489 dst->state < TCPS_ESTABLISHED) 4490 (*state)->timeout = PFTM_TCP_OPENING; 4491 else if (src->state >= TCPS_CLOSING || 4492 dst->state >= TCPS_CLOSING) 4493 (*state)->timeout = PFTM_TCP_CLOSING; 4494 else 4495 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4496 4497 /* Fall through to PASS packet */ 4498 } else if ((dst->state < TCPS_SYN_SENT || 4499 dst->state >= TCPS_FIN_WAIT_2 || 4500 src->state >= TCPS_FIN_WAIT_2) && 4501 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4502 /* Within a window forward of the originating packet */ 4503 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4504 /* Within a window backward of the originating packet */ 4505 4506 /* 4507 * This currently handles three situations: 4508 * 1) Stupid stacks will shotgun SYNs before their peer 4509 * replies. 4510 * 2) When PF catches an already established stream (the 4511 * firewall rebooted, the state table was flushed, routes 4512 * changed...) 4513 * 3) Packets get funky immediately after the connection 4514 * closes (this should catch Solaris spurious ACK|FINs 4515 * that web servers like to spew after a close) 4516 * 4517 * This must be a little more careful than the above code 4518 * since packet floods will also be caught here. We don't 4519 * update the TTL here to mitigate the damage of a packet 4520 * flood and so the same code can handle awkward establishment 4521 * and a loosened connection close. 4522 * In the establishment case, a correct peer response will 4523 * validate the connection, go through the normal state code 4524 * and keep updating the state TTL. 4525 */ 4526 4527 if (pf_status.debug >= LOG_NOTICE) { 4528 log(LOG_NOTICE, "pf: loose state match: "); 4529 pf_print_state(*state); 4530 pf_print_flags(th->th_flags); 4531 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4532 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4533 pd->p_len, ackskew, (*state)->packets[0], 4534 (*state)->packets[1], 4535 pd->dir == PF_IN ? "in" : "out", 4536 pd->dir == (*state)->direction ? "fwd" : "rev"); 4537 } 4538 4539 if (dst->scrub || src->scrub) { 4540 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4541 dst, copyback)) 4542 return (PF_DROP); 4543 } 4544 4545 /* update max window */ 4546 if (src->max_win < win) 4547 src->max_win = win; 4548 /* synchronize sequencing */ 4549 if (SEQ_GT(end, src->seqlo)) 4550 src->seqlo = end; 4551 /* slide the window of what the other end can send */ 4552 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4553 dst->seqhi = ack + MAX((win << sws), 1); 4554 4555 /* 4556 * Cannot set dst->seqhi here since this could be a shotgunned 4557 * SYN and not an already established connection. 4558 */ 4559 if (th->th_flags & TH_FIN) 4560 if (src->state < TCPS_CLOSING) 4561 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4562 if (th->th_flags & TH_RST) 4563 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4564 4565 /* Fall through to PASS packet */ 4566 } else { 4567 if ((*state)->dst.state == TCPS_SYN_SENT && 4568 (*state)->src.state == TCPS_SYN_SENT) { 4569 /* Send RST for state mismatches during handshake */ 4570 if (!(th->th_flags & TH_RST)) 4571 pf_send_tcp((*state)->rule.ptr, pd->af, 4572 pd->dst, pd->src, th->th_dport, 4573 th->th_sport, ntohl(th->th_ack), 0, 4574 TH_RST, 0, 0, 4575 (*state)->rule.ptr->return_ttl, 1, 0, 4576 pd->rdomain); 4577 src->seqlo = 0; 4578 src->seqhi = 1; 4579 src->max_win = 1; 4580 } else if (pf_status.debug >= LOG_NOTICE) { 4581 log(LOG_NOTICE, "pf: BAD state: "); 4582 pf_print_state(*state); 4583 pf_print_flags(th->th_flags); 4584 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4585 "pkts=%llu:%llu dir=%s,%s\n", 4586 seq, orig_seq, ack, pd->p_len, ackskew, 4587 (*state)->packets[0], (*state)->packets[1], 4588 pd->dir == PF_IN ? "in" : "out", 4589 pd->dir == (*state)->direction ? "fwd" : "rev"); 4590 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4591 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 4592 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4593 ' ': '2', 4594 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4595 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4596 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 4597 ' ' :'5', 4598 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4599 } 4600 REASON_SET(reason, PFRES_BADSTATE); 4601 return (PF_DROP); 4602 } 4603 4604 return (PF_PASS); 4605 } 4606 4607 int 4608 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **state, 4609 u_short *reason) 4610 { 4611 struct tcphdr *th = &pd->hdr.tcp; 4612 struct pf_state_peer *src, *dst; 4613 u_int8_t psrc, pdst; 4614 4615 if (pd->dir == (*state)->direction) { 4616 src = &(*state)->src; 4617 dst = &(*state)->dst; 4618 psrc = PF_PEER_SRC; 4619 pdst = PF_PEER_DST; 4620 } else { 4621 src = &(*state)->dst; 4622 dst = &(*state)->src; 4623 psrc = PF_PEER_DST; 4624 pdst = PF_PEER_SRC; 4625 } 4626 4627 if (th->th_flags & TH_SYN) 4628 if (src->state < TCPS_SYN_SENT) 4629 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4630 if (th->th_flags & TH_FIN) 4631 if (src->state < TCPS_CLOSING) 4632 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4633 if (th->th_flags & TH_ACK) { 4634 if (dst->state == TCPS_SYN_SENT) { 4635 pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); 4636 if (src->state == TCPS_ESTABLISHED && 4637 !SLIST_EMPTY(&(*state)->src_nodes) && 4638 pf_src_connlimit(state)) { 4639 REASON_SET(reason, PFRES_SRCLIMIT); 4640 return (PF_DROP); 4641 } 4642 } else if (dst->state == TCPS_CLOSING) { 4643 pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2); 4644 } else if (src->state == TCPS_SYN_SENT && 4645 dst->state < TCPS_SYN_SENT) { 4646 /* 4647 * Handle a special sloppy case where we only see one 4648 * half of the connection. If there is a ACK after 4649 * the initial SYN without ever seeing a packet from 4650 * the destination, set the connection to established. 4651 */ 4652 pf_set_protostate(*state, PF_PEER_BOTH, 4653 TCPS_ESTABLISHED); 4654 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4655 pf_src_connlimit(state)) { 4656 REASON_SET(reason, PFRES_SRCLIMIT); 4657 return (PF_DROP); 4658 } 4659 } else if (src->state == TCPS_CLOSING && 4660 dst->state == TCPS_ESTABLISHED && 4661 dst->seqlo == 0) { 4662 /* 4663 * Handle the closing of half connections where we 4664 * don't see the full bidirectional FIN/ACK+ACK 4665 * handshake. 4666 */ 4667 pf_set_protostate(*state, pdst, TCPS_CLOSING); 4668 } 4669 } 4670 if (th->th_flags & TH_RST) 4671 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4672 4673 /* update expire time */ 4674 (*state)->expire = getuptime(); 4675 if (src->state >= TCPS_FIN_WAIT_2 && 4676 dst->state >= TCPS_FIN_WAIT_2) 4677 (*state)->timeout = PFTM_TCP_CLOSED; 4678 else if (src->state >= TCPS_CLOSING && 4679 dst->state >= TCPS_CLOSING) 4680 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4681 else if (src->state < TCPS_ESTABLISHED || 4682 dst->state < TCPS_ESTABLISHED) 4683 (*state)->timeout = PFTM_TCP_OPENING; 4684 else if (src->state >= TCPS_CLOSING || 4685 dst->state >= TCPS_CLOSING) 4686 (*state)->timeout = PFTM_TCP_CLOSING; 4687 else 4688 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4689 4690 return (PF_PASS); 4691 } 4692 4693 static __inline int 4694 pf_synproxy(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4695 { 4696 struct pf_state_key *sk = (*state)->key[pd->didx]; 4697 4698 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4699 struct tcphdr *th = &pd->hdr.tcp; 4700 4701 if (pd->dir != (*state)->direction) { 4702 REASON_SET(reason, PFRES_SYNPROXY); 4703 return (PF_SYNPROXY_DROP); 4704 } 4705 if (th->th_flags & TH_SYN) { 4706 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4707 REASON_SET(reason, PFRES_SYNPROXY); 4708 return (PF_DROP); 4709 } 4710 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4711 pd->src, th->th_dport, th->th_sport, 4712 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4713 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4714 0, pd->rdomain); 4715 REASON_SET(reason, PFRES_SYNPROXY); 4716 return (PF_SYNPROXY_DROP); 4717 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 4718 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4719 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4720 REASON_SET(reason, PFRES_SYNPROXY); 4721 return (PF_DROP); 4722 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4723 pf_src_connlimit(state)) { 4724 REASON_SET(reason, PFRES_SRCLIMIT); 4725 return (PF_DROP); 4726 } else 4727 pf_set_protostate(*state, PF_PEER_SRC, 4728 PF_TCPS_PROXY_DST); 4729 } 4730 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4731 struct tcphdr *th = &pd->hdr.tcp; 4732 4733 if (pd->dir == (*state)->direction) { 4734 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4735 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4736 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4737 REASON_SET(reason, PFRES_SYNPROXY); 4738 return (PF_DROP); 4739 } 4740 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4741 if ((*state)->dst.seqhi == 1) 4742 (*state)->dst.seqhi = arc4random(); 4743 pf_send_tcp((*state)->rule.ptr, pd->af, 4744 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4745 sk->port[pd->sidx], sk->port[pd->didx], 4746 (*state)->dst.seqhi, 0, TH_SYN, 0, 4747 (*state)->src.mss, 0, 0, (*state)->tag, 4748 sk->rdomain); 4749 REASON_SET(reason, PFRES_SYNPROXY); 4750 return (PF_SYNPROXY_DROP); 4751 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4752 (TH_SYN|TH_ACK)) || 4753 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4754 REASON_SET(reason, PFRES_SYNPROXY); 4755 return (PF_DROP); 4756 } else { 4757 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4758 (*state)->dst.seqlo = ntohl(th->th_seq); 4759 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4760 pd->src, th->th_dport, th->th_sport, 4761 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4762 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4763 (*state)->tag, pd->rdomain); 4764 pf_send_tcp((*state)->rule.ptr, pd->af, 4765 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4766 sk->port[pd->sidx], sk->port[pd->didx], 4767 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4768 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4769 0, sk->rdomain); 4770 (*state)->src.seqdiff = (*state)->dst.seqhi - 4771 (*state)->src.seqlo; 4772 (*state)->dst.seqdiff = (*state)->src.seqhi - 4773 (*state)->dst.seqlo; 4774 (*state)->src.seqhi = (*state)->src.seqlo + 4775 (*state)->dst.max_win; 4776 (*state)->dst.seqhi = (*state)->dst.seqlo + 4777 (*state)->src.max_win; 4778 (*state)->src.wscale = (*state)->dst.wscale = 0; 4779 pf_set_protostate(*state, PF_PEER_BOTH, 4780 TCPS_ESTABLISHED); 4781 REASON_SET(reason, PFRES_SYNPROXY); 4782 return (PF_SYNPROXY_DROP); 4783 } 4784 } 4785 return (PF_PASS); 4786 } 4787 4788 int 4789 pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4790 int syncookie) 4791 { 4792 struct pf_state_key_cmp key; 4793 int copyback = 0; 4794 struct pf_state_peer *src, *dst; 4795 int action; 4796 struct inpcb *inp; 4797 u_int8_t psrc, pdst; 4798 4799 key.af = pd->af; 4800 key.proto = pd->virtual_proto; 4801 key.rdomain = pd->rdomain; 4802 pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); 4803 pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); 4804 key.port[pd->sidx] = pd->osport; 4805 key.port[pd->didx] = pd->odport; 4806 inp = pd->m->m_pkthdr.pf.inp; 4807 4808 action = pf_find_state(pd, &key, state); 4809 if (action != PF_MATCH) 4810 return (action); 4811 4812 action = PF_PASS; 4813 if (pd->dir == (*state)->direction) { 4814 src = &(*state)->src; 4815 dst = &(*state)->dst; 4816 psrc = PF_PEER_SRC; 4817 pdst = PF_PEER_DST; 4818 } else { 4819 src = &(*state)->dst; 4820 dst = &(*state)->src; 4821 psrc = PF_PEER_DST; 4822 pdst = PF_PEER_SRC; 4823 } 4824 4825 switch (pd->virtual_proto) { 4826 case IPPROTO_TCP: 4827 if (syncookie) { 4828 pf_set_protostate(*state, PF_PEER_SRC, 4829 PF_TCPS_PROXY_DST); 4830 (*state)->dst.seqhi = ntohl(pd->hdr.tcp.th_ack) - 1; 4831 } 4832 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) 4833 return (action); 4834 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 4835 4836 if (dst->state >= TCPS_FIN_WAIT_2 && 4837 src->state >= TCPS_FIN_WAIT_2) { 4838 if (pf_status.debug >= LOG_NOTICE) { 4839 log(LOG_NOTICE, "pf: state reuse "); 4840 pf_print_state(*state); 4841 pf_print_flags(pd->hdr.tcp.th_flags); 4842 addlog("\n"); 4843 } 4844 /* XXX make sure it's the same direction ?? */ 4845 (*state)->timeout = PFTM_PURGE; 4846 *state = NULL; 4847 pf_mbuf_link_inpcb(pd->m, inp); 4848 return (PF_DROP); 4849 } else if (dst->state >= TCPS_ESTABLISHED && 4850 src->state >= TCPS_ESTABLISHED) { 4851 /* 4852 * SYN matches existing state??? 4853 * Typically happens when sender boots up after 4854 * sudden panic. Certain protocols (NFSv3) are 4855 * always using same port numbers. Challenge 4856 * ACK enables all parties (firewall and peers) 4857 * to get in sync again. 4858 */ 4859 pf_send_challenge_ack(pd, *state, src, dst); 4860 return (PF_DROP); 4861 } 4862 } 4863 4864 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4865 if (pf_tcp_track_sloppy(pd, state, reason) == PF_DROP) 4866 return (PF_DROP); 4867 } else { 4868 if (pf_tcp_track_full(pd, state, reason, ©back, 4869 PF_REVERSED_KEY((*state)->key, pd->af)) == PF_DROP) 4870 return (PF_DROP); 4871 } 4872 break; 4873 case IPPROTO_UDP: 4874 /* update states */ 4875 if (src->state < PFUDPS_SINGLE) 4876 pf_set_protostate(*state, psrc, PFUDPS_SINGLE); 4877 if (dst->state == PFUDPS_SINGLE) 4878 pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE); 4879 4880 /* update expire time */ 4881 (*state)->expire = getuptime(); 4882 if (src->state == PFUDPS_MULTIPLE && 4883 dst->state == PFUDPS_MULTIPLE) 4884 (*state)->timeout = PFTM_UDP_MULTIPLE; 4885 else 4886 (*state)->timeout = PFTM_UDP_SINGLE; 4887 break; 4888 default: 4889 /* update states */ 4890 if (src->state < PFOTHERS_SINGLE) 4891 pf_set_protostate(*state, psrc, PFOTHERS_SINGLE); 4892 if (dst->state == PFOTHERS_SINGLE) 4893 pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE); 4894 4895 /* update expire time */ 4896 (*state)->expire = getuptime(); 4897 if (src->state == PFOTHERS_MULTIPLE && 4898 dst->state == PFOTHERS_MULTIPLE) 4899 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4900 else 4901 (*state)->timeout = PFTM_OTHER_SINGLE; 4902 break; 4903 } 4904 4905 /* translate source/destination address, if necessary */ 4906 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4907 struct pf_state_key *nk; 4908 int afto, sidx, didx; 4909 4910 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4911 nk = (*state)->key[pd->sidx]; 4912 else 4913 nk = (*state)->key[pd->didx]; 4914 4915 afto = pd->af != nk->af; 4916 sidx = afto ? pd->didx : pd->sidx; 4917 didx = afto ? pd->sidx : pd->didx; 4918 4919 #ifdef INET6 4920 if (afto) { 4921 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 4922 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 4923 pd->naf = nk->af; 4924 action = PF_AFRT; 4925 } 4926 #endif /* INET6 */ 4927 4928 if (!afto) 4929 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 4930 4931 if (pd->sport != NULL) 4932 pf_patch_16(pd, pd->sport, nk->port[sidx]); 4933 4934 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4935 pd->rdomain != nk->rdomain) 4936 pd->destchg = 1; 4937 4938 if (!afto) 4939 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 4940 4941 if (pd->dport != NULL) 4942 pf_patch_16(pd, pd->dport, nk->port[didx]); 4943 4944 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4945 copyback = 1; 4946 } 4947 4948 if (copyback && pd->hdrlen > 0) { 4949 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4950 } 4951 4952 return (action); 4953 } 4954 4955 int 4956 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 4957 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 4958 int icmp_dir, int *iidx, int multi, int inner) 4959 { 4960 int direction, action; 4961 4962 key->af = pd->af; 4963 key->proto = pd->proto; 4964 key->rdomain = pd->rdomain; 4965 if (icmp_dir == PF_IN) { 4966 *iidx = pd->sidx; 4967 key->port[pd->sidx] = icmpid; 4968 key->port[pd->didx] = type; 4969 } else { 4970 *iidx = pd->didx; 4971 key->port[pd->sidx] = type; 4972 key->port[pd->didx] = icmpid; 4973 } 4974 4975 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 4976 pd->dst, pd->af, multi)) 4977 return (PF_DROP); 4978 4979 action = pf_find_state(pd, key, state); 4980 if (action != PF_MATCH) 4981 return (action); 4982 4983 if ((*state)->state_flags & PFSTATE_SLOPPY) 4984 return (-1); 4985 4986 /* Is this ICMP message flowing in right direction? */ 4987 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 4988 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 4989 PF_IN : PF_OUT; 4990 else 4991 direction = (*state)->direction; 4992 if ((((!inner && direction == pd->dir) || 4993 (inner && direction != pd->dir)) ? 4994 PF_IN : PF_OUT) != icmp_dir) { 4995 if (pf_status.debug >= LOG_NOTICE) { 4996 log(LOG_NOTICE, 4997 "pf: icmp type %d in wrong direction (%d): ", 4998 ntohs(type), icmp_dir); 4999 pf_print_state(*state); 5000 addlog("\n"); 5001 } 5002 return (PF_DROP); 5003 } 5004 return (-1); 5005 } 5006 5007 int 5008 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 5009 u_short *reason) 5010 { 5011 u_int16_t virtual_id, virtual_type; 5012 u_int8_t icmptype, icmpcode; 5013 int icmp_dir, iidx, ret, copyback = 0; 5014 5015 struct pf_state_key_cmp key; 5016 5017 switch (pd->proto) { 5018 case IPPROTO_ICMP: 5019 icmptype = pd->hdr.icmp.icmp_type; 5020 icmpcode = pd->hdr.icmp.icmp_code; 5021 break; 5022 #ifdef INET6 5023 case IPPROTO_ICMPV6: 5024 icmptype = pd->hdr.icmp6.icmp6_type; 5025 icmpcode = pd->hdr.icmp6.icmp6_code; 5026 break; 5027 #endif /* INET6 */ 5028 default: 5029 panic("unhandled proto %d", pd->proto); 5030 } 5031 5032 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 5033 &virtual_type) == 0) { 5034 /* 5035 * ICMP query/reply message not related to a TCP/UDP packet. 5036 * Search for an ICMP state. 5037 */ 5038 ret = pf_icmp_state_lookup(pd, &key, state, 5039 virtual_id, virtual_type, icmp_dir, &iidx, 5040 0, 0); 5041 /* IPv6? try matching a multicast address */ 5042 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 5043 ret = pf_icmp_state_lookup(pd, &key, state, virtual_id, 5044 virtual_type, icmp_dir, &iidx, 1, 0); 5045 if (ret >= 0) 5046 return (ret); 5047 5048 (*state)->expire = getuptime(); 5049 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5050 5051 /* translate source/destination address, if necessary */ 5052 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5053 struct pf_state_key *nk; 5054 int afto, sidx, didx; 5055 5056 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5057 nk = (*state)->key[pd->sidx]; 5058 else 5059 nk = (*state)->key[pd->didx]; 5060 5061 afto = pd->af != nk->af; 5062 sidx = afto ? pd->didx : pd->sidx; 5063 didx = afto ? pd->sidx : pd->didx; 5064 iidx = afto ? !iidx : iidx; 5065 #ifdef INET6 5066 if (afto) { 5067 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 5068 nk->af); 5069 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 5070 nk->af); 5071 pd->naf = nk->af; 5072 } 5073 #endif /* INET6 */ 5074 if (!afto) { 5075 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5076 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5077 } 5078 5079 if (pd->rdomain != nk->rdomain) 5080 pd->destchg = 1; 5081 if (!afto && PF_ANEQ(pd->dst, 5082 &nk->addr[didx], pd->af)) 5083 pd->destchg = 1; 5084 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5085 5086 switch (pd->af) { 5087 case AF_INET: 5088 #ifdef INET6 5089 if (afto) { 5090 if (pf_translate_icmp_af(pd, AF_INET6, 5091 &pd->hdr.icmp)) 5092 return (PF_DROP); 5093 pd->proto = IPPROTO_ICMPV6; 5094 } 5095 #endif /* INET6 */ 5096 pf_patch_16(pd, 5097 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 5098 5099 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5100 &pd->hdr.icmp, M_NOWAIT); 5101 copyback = 1; 5102 break; 5103 #ifdef INET6 5104 case AF_INET6: 5105 if (afto) { 5106 if (pf_translate_icmp_af(pd, AF_INET, 5107 &pd->hdr.icmp6)) 5108 return (PF_DROP); 5109 pd->proto = IPPROTO_ICMP; 5110 } 5111 5112 pf_patch_16(pd, 5113 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 5114 5115 m_copyback(pd->m, pd->off, 5116 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5117 M_NOWAIT); 5118 copyback = 1; 5119 break; 5120 #endif /* INET6 */ 5121 } 5122 #ifdef INET6 5123 if (afto) 5124 return (PF_AFRT); 5125 #endif /* INET6 */ 5126 } 5127 } else { 5128 /* 5129 * ICMP error message in response to a TCP/UDP packet. 5130 * Extract the inner TCP/UDP header and search for that state. 5131 */ 5132 struct pf_pdesc pd2; 5133 struct ip h2; 5134 #ifdef INET6 5135 struct ip6_hdr h2_6; 5136 #endif /* INET6 */ 5137 int ipoff2; 5138 5139 /* Initialize pd2 fields valid for both packets with pd. */ 5140 memset(&pd2, 0, sizeof(pd2)); 5141 pd2.af = pd->af; 5142 pd2.dir = pd->dir; 5143 pd2.kif = pd->kif; 5144 pd2.m = pd->m; 5145 pd2.rdomain = pd->rdomain; 5146 /* Payload packet is from the opposite direction. */ 5147 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 5148 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 5149 switch (pd->af) { 5150 case AF_INET: 5151 /* offset of h2 in mbuf chain */ 5152 ipoff2 = pd->off + ICMP_MINLEN; 5153 5154 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 5155 NULL, reason, pd2.af)) { 5156 DPFPRINTF(LOG_NOTICE, 5157 "ICMP error message too short (ip)"); 5158 return (PF_DROP); 5159 } 5160 /* 5161 * ICMP error messages don't refer to non-first 5162 * fragments 5163 */ 5164 if (h2.ip_off & htons(IP_OFFMASK)) { 5165 REASON_SET(reason, PFRES_FRAG); 5166 return (PF_DROP); 5167 } 5168 5169 /* offset of protocol header that follows h2 */ 5170 pd2.off = ipoff2; 5171 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 5172 return (PF_DROP); 5173 5174 pd2.tot_len = ntohs(h2.ip_len); 5175 pd2.src = (struct pf_addr *)&h2.ip_src; 5176 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5177 break; 5178 #ifdef INET6 5179 case AF_INET6: 5180 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 5181 5182 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 5183 NULL, reason, pd2.af)) { 5184 DPFPRINTF(LOG_NOTICE, 5185 "ICMP error message too short (ip6)"); 5186 return (PF_DROP); 5187 } 5188 5189 pd2.off = ipoff2; 5190 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 5191 return (PF_DROP); 5192 5193 pd2.tot_len = ntohs(h2_6.ip6_plen) + 5194 sizeof(struct ip6_hdr); 5195 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5196 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5197 break; 5198 #endif /* INET6 */ 5199 default: 5200 unhandled_af(pd->af); 5201 } 5202 5203 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 5204 if (pf_status.debug >= LOG_NOTICE) { 5205 log(LOG_NOTICE, 5206 "pf: BAD ICMP %d:%d outer dst: ", 5207 icmptype, icmpcode); 5208 pf_print_host(pd->src, 0, pd->af); 5209 addlog(" -> "); 5210 pf_print_host(pd->dst, 0, pd->af); 5211 addlog(" inner src: "); 5212 pf_print_host(pd2.src, 0, pd2.af); 5213 addlog(" -> "); 5214 pf_print_host(pd2.dst, 0, pd2.af); 5215 addlog("\n"); 5216 } 5217 REASON_SET(reason, PFRES_BADSTATE); 5218 return (PF_DROP); 5219 } 5220 5221 switch (pd2.proto) { 5222 case IPPROTO_TCP: { 5223 struct tcphdr *th = &pd2.hdr.tcp; 5224 u_int32_t seq; 5225 struct pf_state_peer *src, *dst; 5226 u_int8_t dws; 5227 int action; 5228 5229 /* 5230 * Only the first 8 bytes of the TCP header can be 5231 * expected. Don't access any TCP header fields after 5232 * th_seq, an ackskew test is not possible. 5233 */ 5234 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason, 5235 pd2.af)) { 5236 DPFPRINTF(LOG_NOTICE, 5237 "ICMP error message too short (tcp)"); 5238 return (PF_DROP); 5239 } 5240 5241 key.af = pd2.af; 5242 key.proto = IPPROTO_TCP; 5243 key.rdomain = pd2.rdomain; 5244 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5245 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5246 key.port[pd2.sidx] = th->th_sport; 5247 key.port[pd2.didx] = th->th_dport; 5248 5249 action = pf_find_state(&pd2, &key, state); 5250 if (action != PF_MATCH) 5251 return (action); 5252 5253 if (pd2.dir == (*state)->direction) { 5254 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5255 src = &(*state)->src; 5256 dst = &(*state)->dst; 5257 } else { 5258 src = &(*state)->dst; 5259 dst = &(*state)->src; 5260 } 5261 } else { 5262 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5263 src = &(*state)->dst; 5264 dst = &(*state)->src; 5265 } else { 5266 src = &(*state)->src; 5267 dst = &(*state)->dst; 5268 } 5269 } 5270 5271 if (src->wscale && dst->wscale) 5272 dws = dst->wscale & PF_WSCALE_MASK; 5273 else 5274 dws = 0; 5275 5276 /* Demodulate sequence number */ 5277 seq = ntohl(th->th_seq) - src->seqdiff; 5278 if (src->seqdiff) { 5279 pf_patch_32(pd, &th->th_seq, htonl(seq)); 5280 copyback = 1; 5281 } 5282 5283 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5284 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5285 src->seqlo - (dst->max_win << dws)))) { 5286 if (pf_status.debug >= LOG_NOTICE) { 5287 log(LOG_NOTICE, 5288 "pf: BAD ICMP %d:%d ", 5289 icmptype, icmpcode); 5290 pf_print_host(pd->src, 0, pd->af); 5291 addlog(" -> "); 5292 pf_print_host(pd->dst, 0, pd->af); 5293 addlog(" state: "); 5294 pf_print_state(*state); 5295 addlog(" seq=%u\n", seq); 5296 } 5297 REASON_SET(reason, PFRES_BADSTATE); 5298 return (PF_DROP); 5299 } else { 5300 if (pf_status.debug >= LOG_DEBUG) { 5301 log(LOG_DEBUG, 5302 "pf: OK ICMP %d:%d ", 5303 icmptype, icmpcode); 5304 pf_print_host(pd->src, 0, pd->af); 5305 addlog(" -> "); 5306 pf_print_host(pd->dst, 0, pd->af); 5307 addlog(" state: "); 5308 pf_print_state(*state); 5309 addlog(" seq=%u\n", seq); 5310 } 5311 } 5312 5313 /* translate source/destination address, if necessary */ 5314 if ((*state)->key[PF_SK_WIRE] != 5315 (*state)->key[PF_SK_STACK]) { 5316 struct pf_state_key *nk; 5317 int afto, sidx, didx; 5318 5319 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5320 nk = (*state)->key[pd->sidx]; 5321 else 5322 nk = (*state)->key[pd->didx]; 5323 5324 afto = pd->af != nk->af; 5325 sidx = afto ? pd2.didx : pd2.sidx; 5326 didx = afto ? pd2.sidx : pd2.didx; 5327 5328 #ifdef INET6 5329 if (afto) { 5330 if (pf_translate_icmp_af(pd, nk->af, 5331 &pd->hdr.icmp)) 5332 return (PF_DROP); 5333 m_copyback(pd->m, pd->off, 5334 sizeof(struct icmp6_hdr), 5335 &pd->hdr.icmp6, M_NOWAIT); 5336 if (pf_change_icmp_af(pd->m, ipoff2, 5337 pd, &pd2, &nk->addr[sidx], 5338 &nk->addr[didx], pd->af, nk->af)) 5339 return (PF_DROP); 5340 if (nk->af == AF_INET) 5341 pd->proto = IPPROTO_ICMP; 5342 else 5343 pd->proto = IPPROTO_ICMPV6; 5344 pd->m->m_pkthdr.ph_rtableid = 5345 nk->rdomain; 5346 pd->destchg = 1; 5347 pf_addrcpy(&pd->nsaddr, 5348 &nk->addr[pd2.sidx], nk->af); 5349 pf_addrcpy(&pd->ndaddr, 5350 &nk->addr[pd2.didx], nk->af); 5351 pd->naf = nk->af; 5352 5353 pf_patch_16(pd, 5354 &th->th_sport, nk->port[sidx]); 5355 pf_patch_16(pd, 5356 &th->th_dport, nk->port[didx]); 5357 5358 m_copyback(pd2.m, pd2.off, 8, th, 5359 M_NOWAIT); 5360 return (PF_AFRT); 5361 } 5362 #endif /* INET6 */ 5363 if (PF_ANEQ(pd2.src, 5364 &nk->addr[pd2.sidx], pd2.af) || 5365 nk->port[pd2.sidx] != th->th_sport) 5366 pf_translate_icmp(pd, pd2.src, 5367 &th->th_sport, pd->dst, 5368 &nk->addr[pd2.sidx], 5369 nk->port[pd2.sidx]); 5370 5371 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5372 pd2.af) || pd2.rdomain != nk->rdomain) 5373 pd->destchg = 1; 5374 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5375 5376 if (PF_ANEQ(pd2.dst, 5377 &nk->addr[pd2.didx], pd2.af) || 5378 nk->port[pd2.didx] != th->th_dport) 5379 pf_translate_icmp(pd, pd2.dst, 5380 &th->th_dport, pd->src, 5381 &nk->addr[pd2.didx], 5382 nk->port[pd2.didx]); 5383 copyback = 1; 5384 } 5385 5386 if (copyback) { 5387 switch (pd2.af) { 5388 case AF_INET: 5389 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5390 &pd->hdr.icmp, M_NOWAIT); 5391 m_copyback(pd2.m, ipoff2, sizeof(h2), 5392 &h2, M_NOWAIT); 5393 break; 5394 #ifdef INET6 5395 case AF_INET6: 5396 m_copyback(pd->m, pd->off, 5397 sizeof(struct icmp6_hdr), 5398 &pd->hdr.icmp6, M_NOWAIT); 5399 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5400 &h2_6, M_NOWAIT); 5401 break; 5402 #endif /* INET6 */ 5403 } 5404 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 5405 } 5406 break; 5407 } 5408 case IPPROTO_UDP: { 5409 struct udphdr *uh = &pd2.hdr.udp; 5410 int action; 5411 5412 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 5413 NULL, reason, pd2.af)) { 5414 DPFPRINTF(LOG_NOTICE, 5415 "ICMP error message too short (udp)"); 5416 return (PF_DROP); 5417 } 5418 5419 key.af = pd2.af; 5420 key.proto = IPPROTO_UDP; 5421 key.rdomain = pd2.rdomain; 5422 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5423 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5424 key.port[pd2.sidx] = uh->uh_sport; 5425 key.port[pd2.didx] = uh->uh_dport; 5426 5427 action = pf_find_state(&pd2, &key, state); 5428 if (action != PF_MATCH) 5429 return (action); 5430 5431 /* translate source/destination address, if necessary */ 5432 if ((*state)->key[PF_SK_WIRE] != 5433 (*state)->key[PF_SK_STACK]) { 5434 struct pf_state_key *nk; 5435 int afto, sidx, didx; 5436 5437 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5438 nk = (*state)->key[pd->sidx]; 5439 else 5440 nk = (*state)->key[pd->didx]; 5441 5442 afto = pd->af != nk->af; 5443 sidx = afto ? pd2.didx : pd2.sidx; 5444 didx = afto ? pd2.sidx : pd2.didx; 5445 5446 #ifdef INET6 5447 if (afto) { 5448 if (pf_translate_icmp_af(pd, nk->af, 5449 &pd->hdr.icmp)) 5450 return (PF_DROP); 5451 m_copyback(pd->m, pd->off, 5452 sizeof(struct icmp6_hdr), 5453 &pd->hdr.icmp6, M_NOWAIT); 5454 if (pf_change_icmp_af(pd->m, ipoff2, 5455 pd, &pd2, &nk->addr[sidx], 5456 &nk->addr[didx], pd->af, nk->af)) 5457 return (PF_DROP); 5458 if (nk->af == AF_INET) 5459 pd->proto = IPPROTO_ICMP; 5460 else 5461 pd->proto = IPPROTO_ICMPV6; 5462 pd->m->m_pkthdr.ph_rtableid = 5463 nk->rdomain; 5464 pd->destchg = 1; 5465 pf_addrcpy(&pd->nsaddr, 5466 &nk->addr[pd2.sidx], nk->af); 5467 pf_addrcpy(&pd->ndaddr, 5468 &nk->addr[pd2.didx], nk->af); 5469 pd->naf = nk->af; 5470 5471 pf_patch_16(pd, 5472 &uh->uh_sport, nk->port[sidx]); 5473 pf_patch_16(pd, 5474 &uh->uh_dport, nk->port[didx]); 5475 5476 m_copyback(pd2.m, pd2.off, sizeof(*uh), 5477 uh, M_NOWAIT); 5478 return (PF_AFRT); 5479 } 5480 #endif /* INET6 */ 5481 5482 if (PF_ANEQ(pd2.src, 5483 &nk->addr[pd2.sidx], pd2.af) || 5484 nk->port[pd2.sidx] != uh->uh_sport) 5485 pf_translate_icmp(pd, pd2.src, 5486 &uh->uh_sport, pd->dst, 5487 &nk->addr[pd2.sidx], 5488 nk->port[pd2.sidx]); 5489 5490 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5491 pd2.af) || pd2.rdomain != nk->rdomain) 5492 pd->destchg = 1; 5493 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5494 5495 if (PF_ANEQ(pd2.dst, 5496 &nk->addr[pd2.didx], pd2.af) || 5497 nk->port[pd2.didx] != uh->uh_dport) 5498 pf_translate_icmp(pd, pd2.dst, 5499 &uh->uh_dport, pd->src, 5500 &nk->addr[pd2.didx], 5501 nk->port[pd2.didx]); 5502 5503 switch (pd2.af) { 5504 case AF_INET: 5505 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5506 &pd->hdr.icmp, M_NOWAIT); 5507 m_copyback(pd2.m, ipoff2, sizeof(h2), 5508 &h2, M_NOWAIT); 5509 break; 5510 #ifdef INET6 5511 case AF_INET6: 5512 m_copyback(pd->m, pd->off, 5513 sizeof(struct icmp6_hdr), 5514 &pd->hdr.icmp6, M_NOWAIT); 5515 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5516 &h2_6, M_NOWAIT); 5517 break; 5518 #endif /* INET6 */ 5519 } 5520 /* Avoid recomputing quoted UDP checksum. 5521 * note: udp6 0 csum invalid per rfc2460 p27. 5522 * but presumed nothing cares in this context */ 5523 pf_patch_16(pd, &uh->uh_sum, 0); 5524 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 5525 M_NOWAIT); 5526 copyback = 1; 5527 } 5528 break; 5529 } 5530 case IPPROTO_ICMP: { 5531 struct icmp *iih = &pd2.hdr.icmp; 5532 5533 if (pd2.af != AF_INET) { 5534 REASON_SET(reason, PFRES_NORM); 5535 return (PF_DROP); 5536 } 5537 5538 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 5539 NULL, reason, pd2.af)) { 5540 DPFPRINTF(LOG_NOTICE, 5541 "ICMP error message too short (icmp)"); 5542 return (PF_DROP); 5543 } 5544 5545 pf_icmp_mapping(&pd2, iih->icmp_type, 5546 &icmp_dir, &virtual_id, &virtual_type); 5547 5548 ret = pf_icmp_state_lookup(&pd2, &key, state, 5549 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5550 if (ret >= 0) 5551 return (ret); 5552 5553 /* translate source/destination address, if necessary */ 5554 if ((*state)->key[PF_SK_WIRE] != 5555 (*state)->key[PF_SK_STACK]) { 5556 struct pf_state_key *nk; 5557 int afto, sidx, didx; 5558 5559 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5560 nk = (*state)->key[pd->sidx]; 5561 else 5562 nk = (*state)->key[pd->didx]; 5563 5564 afto = pd->af != nk->af; 5565 sidx = afto ? pd2.didx : pd2.sidx; 5566 didx = afto ? pd2.sidx : pd2.didx; 5567 iidx = afto ? !iidx : iidx; 5568 5569 #ifdef INET6 5570 if (afto) { 5571 if (nk->af != AF_INET6) 5572 return (PF_DROP); 5573 if (pf_translate_icmp_af(pd, nk->af, 5574 &pd->hdr.icmp)) 5575 return (PF_DROP); 5576 m_copyback(pd->m, pd->off, 5577 sizeof(struct icmp6_hdr), 5578 &pd->hdr.icmp6, M_NOWAIT); 5579 if (pf_change_icmp_af(pd->m, ipoff2, 5580 pd, &pd2, &nk->addr[sidx], 5581 &nk->addr[didx], pd->af, nk->af)) 5582 return (PF_DROP); 5583 pd->proto = IPPROTO_ICMPV6; 5584 if (pf_translate_icmp_af(pd, 5585 nk->af, iih)) 5586 return (PF_DROP); 5587 if (virtual_type == htons(ICMP_ECHO)) 5588 pf_patch_16(pd, &iih->icmp_id, 5589 nk->port[iidx]); 5590 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5591 iih, M_NOWAIT); 5592 pd->m->m_pkthdr.ph_rtableid = 5593 nk->rdomain; 5594 pd->destchg = 1; 5595 pf_addrcpy(&pd->nsaddr, 5596 &nk->addr[pd2.sidx], nk->af); 5597 pf_addrcpy(&pd->ndaddr, 5598 &nk->addr[pd2.didx], nk->af); 5599 pd->naf = nk->af; 5600 return (PF_AFRT); 5601 } 5602 #endif /* INET6 */ 5603 5604 if (PF_ANEQ(pd2.src, 5605 &nk->addr[pd2.sidx], pd2.af) || 5606 (virtual_type == htons(ICMP_ECHO) && 5607 nk->port[iidx] != iih->icmp_id)) 5608 pf_translate_icmp(pd, pd2.src, 5609 (virtual_type == htons(ICMP_ECHO)) ? 5610 &iih->icmp_id : NULL, 5611 pd->dst, &nk->addr[pd2.sidx], 5612 (virtual_type == htons(ICMP_ECHO)) ? 5613 nk->port[iidx] : 0); 5614 5615 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5616 pd2.af) || pd2.rdomain != nk->rdomain) 5617 pd->destchg = 1; 5618 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5619 5620 if (PF_ANEQ(pd2.dst, 5621 &nk->addr[pd2.didx], pd2.af)) 5622 pf_translate_icmp(pd, pd2.dst, NULL, 5623 pd->src, &nk->addr[pd2.didx], 0); 5624 5625 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5626 &pd->hdr.icmp, M_NOWAIT); 5627 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5628 M_NOWAIT); 5629 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 5630 M_NOWAIT); 5631 copyback = 1; 5632 } 5633 break; 5634 } 5635 #ifdef INET6 5636 case IPPROTO_ICMPV6: { 5637 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 5638 5639 if (pd2.af != AF_INET6) { 5640 REASON_SET(reason, PFRES_NORM); 5641 return (PF_DROP); 5642 } 5643 5644 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 5645 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5646 DPFPRINTF(LOG_NOTICE, 5647 "ICMP error message too short (icmp6)"); 5648 return (PF_DROP); 5649 } 5650 5651 pf_icmp_mapping(&pd2, iih->icmp6_type, 5652 &icmp_dir, &virtual_id, &virtual_type); 5653 ret = pf_icmp_state_lookup(&pd2, &key, state, 5654 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5655 /* IPv6? try matching a multicast address */ 5656 if (ret == PF_DROP && pd2.af == AF_INET6 && 5657 icmp_dir == PF_OUT) 5658 ret = pf_icmp_state_lookup(&pd2, &key, state, 5659 virtual_id, virtual_type, icmp_dir, &iidx, 5660 1, 1); 5661 if (ret >= 0) 5662 return (ret); 5663 5664 /* translate source/destination address, if necessary */ 5665 if ((*state)->key[PF_SK_WIRE] != 5666 (*state)->key[PF_SK_STACK]) { 5667 struct pf_state_key *nk; 5668 int afto, sidx, didx; 5669 5670 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5671 nk = (*state)->key[pd->sidx]; 5672 else 5673 nk = (*state)->key[pd->didx]; 5674 5675 afto = pd->af != nk->af; 5676 sidx = afto ? pd2.didx : pd2.sidx; 5677 didx = afto ? pd2.sidx : pd2.didx; 5678 iidx = afto ? !iidx : iidx; 5679 5680 if (afto) { 5681 if (nk->af != AF_INET) 5682 return (PF_DROP); 5683 if (pf_translate_icmp_af(pd, nk->af, 5684 &pd->hdr.icmp)) 5685 return (PF_DROP); 5686 m_copyback(pd->m, pd->off, 5687 sizeof(struct icmp6_hdr), 5688 &pd->hdr.icmp6, M_NOWAIT); 5689 if (pf_change_icmp_af(pd->m, ipoff2, 5690 pd, &pd2, &nk->addr[sidx], 5691 &nk->addr[didx], pd->af, nk->af)) 5692 return (PF_DROP); 5693 pd->proto = IPPROTO_ICMP; 5694 if (pf_translate_icmp_af(pd, 5695 nk->af, iih)) 5696 return (PF_DROP); 5697 if (virtual_type == 5698 htons(ICMP6_ECHO_REQUEST)) 5699 pf_patch_16(pd, &iih->icmp6_id, 5700 nk->port[iidx]); 5701 m_copyback(pd2.m, pd2.off, 5702 sizeof(struct icmp6_hdr), iih, 5703 M_NOWAIT); 5704 pd->m->m_pkthdr.ph_rtableid = 5705 nk->rdomain; 5706 pd->destchg = 1; 5707 pf_addrcpy(&pd->nsaddr, 5708 &nk->addr[pd2.sidx], nk->af); 5709 pf_addrcpy(&pd->ndaddr, 5710 &nk->addr[pd2.didx], nk->af); 5711 pd->naf = nk->af; 5712 return (PF_AFRT); 5713 } 5714 5715 if (PF_ANEQ(pd2.src, 5716 &nk->addr[pd2.sidx], pd2.af) || 5717 ((virtual_type == 5718 htons(ICMP6_ECHO_REQUEST)) && 5719 nk->port[pd2.sidx] != iih->icmp6_id)) 5720 pf_translate_icmp(pd, pd2.src, 5721 (virtual_type == 5722 htons(ICMP6_ECHO_REQUEST)) 5723 ? &iih->icmp6_id : NULL, 5724 pd->dst, &nk->addr[pd2.sidx], 5725 (virtual_type == 5726 htons(ICMP6_ECHO_REQUEST)) 5727 ? nk->port[iidx] : 0); 5728 5729 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5730 pd2.af) || pd2.rdomain != nk->rdomain) 5731 pd->destchg = 1; 5732 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5733 5734 if (PF_ANEQ(pd2.dst, 5735 &nk->addr[pd2.didx], pd2.af)) 5736 pf_translate_icmp(pd, pd2.dst, NULL, 5737 pd->src, &nk->addr[pd2.didx], 0); 5738 5739 m_copyback(pd->m, pd->off, 5740 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5741 M_NOWAIT); 5742 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5743 M_NOWAIT); 5744 m_copyback(pd2.m, pd2.off, 5745 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 5746 copyback = 1; 5747 } 5748 break; 5749 } 5750 #endif /* INET6 */ 5751 default: { 5752 int action; 5753 5754 key.af = pd2.af; 5755 key.proto = pd2.proto; 5756 key.rdomain = pd2.rdomain; 5757 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5758 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5759 key.port[0] = key.port[1] = 0; 5760 5761 action = pf_find_state(&pd2, &key, state); 5762 if (action != PF_MATCH) 5763 return (action); 5764 5765 /* translate source/destination address, if necessary */ 5766 if ((*state)->key[PF_SK_WIRE] != 5767 (*state)->key[PF_SK_STACK]) { 5768 struct pf_state_key *nk = 5769 (*state)->key[pd->didx]; 5770 5771 if (PF_ANEQ(pd2.src, 5772 &nk->addr[pd2.sidx], pd2.af)) 5773 pf_translate_icmp(pd, pd2.src, NULL, 5774 pd->dst, &nk->addr[pd2.sidx], 0); 5775 5776 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5777 pd2.af) || pd2.rdomain != nk->rdomain) 5778 pd->destchg = 1; 5779 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5780 5781 if (PF_ANEQ(pd2.dst, 5782 &nk->addr[pd2.didx], pd2.af)) 5783 pf_translate_icmp(pd, pd2.dst, NULL, 5784 pd->src, &nk->addr[pd2.didx], 0); 5785 5786 switch (pd2.af) { 5787 case AF_INET: 5788 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5789 &pd->hdr.icmp, M_NOWAIT); 5790 m_copyback(pd2.m, ipoff2, sizeof(h2), 5791 &h2, M_NOWAIT); 5792 break; 5793 #ifdef INET6 5794 case AF_INET6: 5795 m_copyback(pd->m, pd->off, 5796 sizeof(struct icmp6_hdr), 5797 &pd->hdr.icmp6, M_NOWAIT); 5798 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5799 &h2_6, M_NOWAIT); 5800 break; 5801 #endif /* INET6 */ 5802 } 5803 copyback = 1; 5804 } 5805 break; 5806 } 5807 } 5808 } 5809 if (copyback) { 5810 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5811 } 5812 5813 return (PF_PASS); 5814 } 5815 5816 /* 5817 * ipoff and off are measured from the start of the mbuf chain. 5818 * h must be at "ipoff" on the mbuf chain. 5819 */ 5820 void * 5821 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5822 u_short *actionp, u_short *reasonp, sa_family_t af) 5823 { 5824 int iplen = 0; 5825 5826 switch (af) { 5827 case AF_INET: { 5828 struct ip *h = mtod(m, struct ip *); 5829 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5830 5831 if (fragoff) { 5832 if (fragoff >= len) 5833 ACTION_SET(actionp, PF_PASS); 5834 else { 5835 ACTION_SET(actionp, PF_DROP); 5836 REASON_SET(reasonp, PFRES_FRAG); 5837 } 5838 return (NULL); 5839 } 5840 iplen = ntohs(h->ip_len); 5841 break; 5842 } 5843 #ifdef INET6 5844 case AF_INET6: { 5845 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5846 5847 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5848 break; 5849 } 5850 #endif /* INET6 */ 5851 } 5852 if (m->m_pkthdr.len < off + len || iplen < off + len) { 5853 ACTION_SET(actionp, PF_DROP); 5854 REASON_SET(reasonp, PFRES_SHORT); 5855 return (NULL); 5856 } 5857 m_copydata(m, off, len, p); 5858 return (p); 5859 } 5860 5861 int 5862 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5863 int rtableid) 5864 { 5865 struct sockaddr_storage ss; 5866 struct sockaddr_in *dst; 5867 int ret = 1; 5868 int check_mpath; 5869 #ifdef INET6 5870 struct sockaddr_in6 *dst6; 5871 #endif /* INET6 */ 5872 struct rtentry *rt = NULL; 5873 5874 check_mpath = 0; 5875 memset(&ss, 0, sizeof(ss)); 5876 switch (af) { 5877 case AF_INET: 5878 dst = (struct sockaddr_in *)&ss; 5879 dst->sin_family = AF_INET; 5880 dst->sin_len = sizeof(*dst); 5881 dst->sin_addr = addr->v4; 5882 if (ipmultipath) 5883 check_mpath = 1; 5884 break; 5885 #ifdef INET6 5886 case AF_INET6: 5887 /* 5888 * Skip check for addresses with embedded interface scope, 5889 * as they would always match anyway. 5890 */ 5891 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5892 goto out; 5893 dst6 = (struct sockaddr_in6 *)&ss; 5894 dst6->sin6_family = AF_INET6; 5895 dst6->sin6_len = sizeof(*dst6); 5896 dst6->sin6_addr = addr->v6; 5897 if (ip6_multipath) 5898 check_mpath = 1; 5899 break; 5900 #endif /* INET6 */ 5901 } 5902 5903 /* Skip checks for ipsec interfaces */ 5904 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5905 goto out; 5906 5907 rt = rtalloc(sstosa(&ss), 0, rtableid); 5908 if (rt != NULL) { 5909 /* No interface given, this is a no-route check */ 5910 if (kif == NULL) 5911 goto out; 5912 5913 if (kif->pfik_ifp == NULL) { 5914 ret = 0; 5915 goto out; 5916 } 5917 5918 /* Perform uRPF check if passed input interface */ 5919 ret = 0; 5920 do { 5921 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 5922 ret = 1; 5923 #if NCARP > 0 5924 } else { 5925 struct ifnet *ifp; 5926 5927 ifp = if_get(rt->rt_ifidx); 5928 if (ifp != NULL && ifp->if_type == IFT_CARP && 5929 ifp->if_carpdev == kif->pfik_ifp) 5930 ret = 1; 5931 if_put(ifp); 5932 #endif /* NCARP */ 5933 } 5934 5935 rt = rtable_iterate(rt); 5936 } while (check_mpath == 1 && rt != NULL && ret == 0); 5937 } else 5938 ret = 0; 5939 out: 5940 rtfree(rt); 5941 return (ret); 5942 } 5943 5944 int 5945 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 5946 int rtableid) 5947 { 5948 struct sockaddr_storage ss; 5949 struct sockaddr_in *dst; 5950 #ifdef INET6 5951 struct sockaddr_in6 *dst6; 5952 #endif /* INET6 */ 5953 struct rtentry *rt; 5954 int ret = 0; 5955 5956 memset(&ss, 0, sizeof(ss)); 5957 switch (af) { 5958 case AF_INET: 5959 dst = (struct sockaddr_in *)&ss; 5960 dst->sin_family = AF_INET; 5961 dst->sin_len = sizeof(*dst); 5962 dst->sin_addr = addr->v4; 5963 break; 5964 #ifdef INET6 5965 case AF_INET6: 5966 dst6 = (struct sockaddr_in6 *)&ss; 5967 dst6->sin6_family = AF_INET6; 5968 dst6->sin6_len = sizeof(*dst6); 5969 dst6->sin6_addr = addr->v6; 5970 break; 5971 #endif /* INET6 */ 5972 } 5973 5974 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 5975 if (rt != NULL) { 5976 if (rt->rt_labelid == aw->v.rtlabel) 5977 ret = 1; 5978 rtfree(rt); 5979 } 5980 5981 return (ret); 5982 } 5983 5984 /* pf_route() may change pd->m, adjust local copies after calling */ 5985 void 5986 pf_route(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s) 5987 { 5988 struct mbuf *m0, *m1; 5989 struct sockaddr_in *dst, sin; 5990 struct rtentry *rt = NULL; 5991 struct ip *ip; 5992 struct ifnet *ifp = NULL; 5993 struct pf_addr naddr; 5994 struct pf_src_node *sns[PF_SN_MAX]; 5995 int error = 0; 5996 unsigned int rtableid; 5997 5998 if (pd->m->m_pkthdr.pf.routed++ > 3) { 5999 m_freem(pd->m); 6000 pd->m = NULL; 6001 return; 6002 } 6003 6004 if (r->rt == PF_DUPTO) { 6005 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6006 return; 6007 } else { 6008 if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir)) 6009 return; 6010 m0 = pd->m; 6011 } 6012 6013 if (m0->m_len < sizeof(struct ip)) { 6014 DPFPRINTF(LOG_ERR, 6015 "%s: m0->m_len < sizeof(struct ip)", __func__); 6016 goto bad; 6017 } 6018 6019 ip = mtod(m0, struct ip *); 6020 6021 memset(&sin, 0, sizeof(sin)); 6022 dst = &sin; 6023 dst->sin_family = AF_INET; 6024 dst->sin_len = sizeof(*dst); 6025 dst->sin_addr = ip->ip_dst; 6026 rtableid = m0->m_pkthdr.ph_rtableid; 6027 6028 if (pd->dir == PF_IN) { 6029 if (ip->ip_ttl <= IPTTLDEC) { 6030 if (r->rt != PF_DUPTO) 6031 pf_send_icmp(m0, ICMP_TIMXCEED, 6032 ICMP_TIMXCEED_INTRANS, 0, 6033 pd->af, r, pd->rdomain); 6034 goto bad; 6035 } 6036 ip->ip_ttl -= IPTTLDEC; 6037 } 6038 6039 if (s == NULL) { 6040 memset(sns, 0, sizeof(sns)); 6041 if (pf_map_addr(AF_INET, r, 6042 (struct pf_addr *)&ip->ip_src, 6043 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 6044 DPFPRINTF(LOG_ERR, 6045 "%s: pf_map_addr() failed", __func__); 6046 goto bad; 6047 } 6048 6049 if (!PF_AZERO(&naddr, AF_INET)) 6050 dst->sin_addr.s_addr = naddr.v4.s_addr; 6051 ifp = r->route.kif ? 6052 r->route.kif->pfik_ifp : NULL; 6053 } else { 6054 if (!PF_AZERO(&s->rt_addr, AF_INET)) 6055 dst->sin_addr.s_addr = 6056 s->rt_addr.v4.s_addr; 6057 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6058 } 6059 if (ifp == NULL) 6060 goto bad; 6061 6062 if (pd->kif->pfik_ifp != ifp) { 6063 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 6064 goto bad; 6065 else if (m0 == NULL) 6066 goto done; 6067 if (m0->m_len < sizeof(struct ip)) { 6068 DPFPRINTF(LOG_ERR, 6069 "%s: m0->m_len < sizeof(struct ip)", __func__); 6070 goto bad; 6071 } 6072 ip = mtod(m0, struct ip *); 6073 } 6074 6075 rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); 6076 if (!rtisvalid(rt)) { 6077 ipstat_inc(ips_noroute); 6078 goto bad; 6079 } 6080 /* A locally generated packet may have invalid source address. */ 6081 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 6082 (ifp->if_flags & IFF_LOOPBACK) == 0) 6083 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 6084 6085 in_proto_cksum_out(m0, ifp); 6086 6087 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 6088 ip->ip_sum = 0; 6089 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) 6090 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 6091 else { 6092 ipstat_inc(ips_outswcsum); 6093 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6094 } 6095 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6096 goto done; 6097 } 6098 6099 /* 6100 * Too large for interface; fragment if possible. 6101 * Must be able to put at least 8 bytes per fragment. 6102 */ 6103 if (ip->ip_off & htons(IP_DF)) { 6104 ipstat_inc(ips_cantfrag); 6105 if (r->rt != PF_DUPTO) 6106 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 6107 ifp->if_mtu, pd->af, r, pd->rdomain); 6108 goto bad; 6109 } 6110 6111 m1 = m0; 6112 error = ip_fragment(m0, ifp, ifp->if_mtu); 6113 if (error) { 6114 m0 = NULL; 6115 goto bad; 6116 } 6117 6118 for (m0 = m1; m0; m0 = m1) { 6119 m1 = m0->m_nextpkt; 6120 m0->m_nextpkt = 0; 6121 if (error == 0) 6122 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6123 else 6124 m_freem(m0); 6125 } 6126 6127 if (error == 0) 6128 ipstat_inc(ips_fragmented); 6129 6130 done: 6131 if (r->rt != PF_DUPTO) 6132 pd->m = NULL; 6133 rtfree(rt); 6134 return; 6135 6136 bad: 6137 m_freem(m0); 6138 goto done; 6139 } 6140 6141 #ifdef INET6 6142 /* pf_route6() may change pd->m, adjust local copies after calling */ 6143 void 6144 pf_route6(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s) 6145 { 6146 struct mbuf *m0; 6147 struct sockaddr_in6 *dst, sin6; 6148 struct rtentry *rt = NULL; 6149 struct ip6_hdr *ip6; 6150 struct ifnet *ifp = NULL; 6151 struct pf_addr naddr; 6152 struct pf_src_node *sns[PF_SN_MAX]; 6153 struct m_tag *mtag; 6154 unsigned int rtableid; 6155 6156 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6157 m_freem(pd->m); 6158 pd->m = NULL; 6159 return; 6160 } 6161 6162 if (r->rt == PF_DUPTO) { 6163 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6164 return; 6165 } else { 6166 if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir)) 6167 return; 6168 m0 = pd->m; 6169 } 6170 6171 if (m0->m_len < sizeof(struct ip6_hdr)) { 6172 DPFPRINTF(LOG_ERR, 6173 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6174 goto bad; 6175 } 6176 ip6 = mtod(m0, struct ip6_hdr *); 6177 6178 memset(&sin6, 0, sizeof(sin6)); 6179 dst = &sin6; 6180 dst->sin6_family = AF_INET6; 6181 dst->sin6_len = sizeof(*dst); 6182 dst->sin6_addr = ip6->ip6_dst; 6183 rtableid = m0->m_pkthdr.ph_rtableid; 6184 6185 if (pd->dir == PF_IN) { 6186 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 6187 if (r->rt != PF_DUPTO) 6188 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 6189 ICMP6_TIME_EXCEED_TRANSIT, 0, 6190 pd->af, r, pd->rdomain); 6191 goto bad; 6192 } 6193 ip6->ip6_hlim -= IPV6_HLIMDEC; 6194 } 6195 6196 if (s == NULL) { 6197 memset(sns, 0, sizeof(sns)); 6198 if (pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 6199 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 6200 DPFPRINTF(LOG_ERR, 6201 "%s: pf_map_addr() failed", __func__); 6202 goto bad; 6203 } 6204 if (!PF_AZERO(&naddr, AF_INET6)) 6205 pf_addrcpy((struct pf_addr *)&dst->sin6_addr, 6206 &naddr, AF_INET6); 6207 ifp = r->route.kif ? r->route.kif->pfik_ifp : NULL; 6208 } else { 6209 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 6210 pf_addrcpy((struct pf_addr *)&dst->sin6_addr, 6211 &s->rt_addr, AF_INET6); 6212 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6213 } 6214 if (ifp == NULL) 6215 goto bad; 6216 6217 if (pd->kif->pfik_ifp != ifp) { 6218 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6219 goto bad; 6220 else if (m0 == NULL) 6221 goto done; 6222 if (m0->m_len < sizeof(struct ip6_hdr)) { 6223 DPFPRINTF(LOG_ERR, 6224 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6225 goto bad; 6226 } 6227 } 6228 6229 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 6230 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 6231 rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid); 6232 if (!rtisvalid(rt)) { 6233 ip6stat_inc(ip6s_noroute); 6234 goto bad; 6235 } 6236 /* A locally generated packet may have invalid source address. */ 6237 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 6238 (ifp->if_flags & IFF_LOOPBACK) == 0) 6239 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 6240 6241 in6_proto_cksum_out(m0, ifp); 6242 6243 /* 6244 * If packet has been reassembled by PF earlier, we have to 6245 * use pf_refragment6() here to turn it back to fragments. 6246 */ 6247 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6248 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 6249 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6250 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 6251 } else { 6252 ip6stat_inc(ip6s_cantfrag); 6253 if (r->rt != PF_DUPTO) 6254 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 6255 ifp->if_mtu, pd->af, r, pd->rdomain); 6256 goto bad; 6257 } 6258 6259 done: 6260 if (r->rt != PF_DUPTO) 6261 pd->m = NULL; 6262 rtfree(rt); 6263 return; 6264 6265 bad: 6266 m_freem(m0); 6267 goto done; 6268 } 6269 #endif /* INET6 */ 6270 6271 6272 /* 6273 * check TCP checksum and set mbuf flag 6274 * off is the offset where the protocol header starts 6275 * len is the total length of protocol header plus payload 6276 * returns 0 when the checksum is valid, otherwise returns 1. 6277 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6278 */ 6279 int 6280 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6281 { 6282 u_int16_t sum; 6283 6284 if (m->m_pkthdr.csum_flags & 6285 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6286 return (0); 6287 } 6288 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6289 off < sizeof(struct ip) || 6290 m->m_pkthdr.len < off + len) { 6291 return (1); 6292 } 6293 6294 /* need to do it in software */ 6295 tcpstat_inc(tcps_inswcsum); 6296 6297 switch (af) { 6298 case AF_INET: 6299 if (m->m_len < sizeof(struct ip)) 6300 return (1); 6301 6302 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6303 break; 6304 #ifdef INET6 6305 case AF_INET6: 6306 if (m->m_len < sizeof(struct ip6_hdr)) 6307 return (1); 6308 6309 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6310 break; 6311 #endif /* INET6 */ 6312 default: 6313 unhandled_af(af); 6314 } 6315 if (sum) { 6316 tcpstat_inc(tcps_rcvbadsum); 6317 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6318 return (1); 6319 } 6320 6321 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6322 return (0); 6323 } 6324 6325 struct pf_divert * 6326 pf_find_divert(struct mbuf *m) 6327 { 6328 struct m_tag *mtag; 6329 6330 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6331 return (NULL); 6332 6333 return ((struct pf_divert *)(mtag + 1)); 6334 } 6335 6336 struct pf_divert * 6337 pf_get_divert(struct mbuf *m) 6338 { 6339 struct m_tag *mtag; 6340 6341 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6342 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6343 M_NOWAIT); 6344 if (mtag == NULL) 6345 return (NULL); 6346 memset(mtag + 1, 0, sizeof(struct pf_divert)); 6347 m_tag_prepend(m, mtag); 6348 } 6349 6350 return ((struct pf_divert *)(mtag + 1)); 6351 } 6352 6353 int 6354 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 6355 { 6356 struct ip6_ext ext; 6357 u_int32_t hlen, end; 6358 int hdr_cnt; 6359 6360 hlen = h->ip_hl << 2; 6361 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 6362 REASON_SET(reason, PFRES_SHORT); 6363 return (PF_DROP); 6364 } 6365 if (hlen != sizeof(struct ip)) 6366 pd->badopts++; 6367 end = pd->off + ntohs(h->ip_len); 6368 pd->off += hlen; 6369 pd->proto = h->ip_p; 6370 /* stop walking over non initial fragments */ 6371 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 6372 return (PF_PASS); 6373 6374 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6375 switch (pd->proto) { 6376 case IPPROTO_AH: 6377 /* fragments may be short */ 6378 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 6379 end < pd->off + sizeof(ext)) 6380 return (PF_PASS); 6381 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6382 NULL, reason, AF_INET)) { 6383 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 6384 return (PF_DROP); 6385 } 6386 pd->off += (ext.ip6e_len + 2) * 4; 6387 pd->proto = ext.ip6e_nxt; 6388 break; 6389 default: 6390 return (PF_PASS); 6391 } 6392 } 6393 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 6394 REASON_SET(reason, PFRES_IPOPTIONS); 6395 return (PF_DROP); 6396 } 6397 6398 #ifdef INET6 6399 int 6400 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6401 u_short *reason) 6402 { 6403 struct ip6_opt opt; 6404 struct ip6_opt_jumbo jumbo; 6405 6406 while (off < end) { 6407 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6408 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6409 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6410 return (PF_DROP); 6411 } 6412 if (opt.ip6o_type == IP6OPT_PAD1) { 6413 off++; 6414 continue; 6415 } 6416 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6417 NULL, reason, AF_INET6)) { 6418 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6419 return (PF_DROP); 6420 } 6421 if (off + sizeof(opt) + opt.ip6o_len > end) { 6422 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6423 REASON_SET(reason, PFRES_IPOPTIONS); 6424 return (PF_DROP); 6425 } 6426 switch (opt.ip6o_type) { 6427 case IP6OPT_JUMBO: 6428 if (pd->jumbolen != 0) { 6429 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6430 REASON_SET(reason, PFRES_IPOPTIONS); 6431 return (PF_DROP); 6432 } 6433 if (ntohs(h->ip6_plen) != 0) { 6434 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6435 REASON_SET(reason, PFRES_IPOPTIONS); 6436 return (PF_DROP); 6437 } 6438 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6439 NULL, reason, AF_INET6)) { 6440 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6441 return (PF_DROP); 6442 } 6443 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6444 sizeof(pd->jumbolen)); 6445 pd->jumbolen = ntohl(pd->jumbolen); 6446 if (pd->jumbolen < IPV6_MAXPACKET) { 6447 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6448 REASON_SET(reason, PFRES_IPOPTIONS); 6449 return (PF_DROP); 6450 } 6451 break; 6452 default: 6453 break; 6454 } 6455 off += sizeof(opt) + opt.ip6o_len; 6456 } 6457 6458 return (PF_PASS); 6459 } 6460 6461 int 6462 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6463 { 6464 struct ip6_frag frag; 6465 struct ip6_ext ext; 6466 struct ip6_rthdr rthdr; 6467 u_int32_t end; 6468 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 6469 6470 pd->off += sizeof(struct ip6_hdr); 6471 end = pd->off + ntohs(h->ip6_plen); 6472 pd->fragoff = pd->extoff = pd->jumbolen = 0; 6473 pd->proto = h->ip6_nxt; 6474 6475 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6476 switch (pd->proto) { 6477 case IPPROTO_ROUTING: 6478 case IPPROTO_HOPOPTS: 6479 case IPPROTO_DSTOPTS: 6480 pd->badopts++; 6481 break; 6482 } 6483 switch (pd->proto) { 6484 case IPPROTO_FRAGMENT: 6485 if (fraghdr_cnt++) { 6486 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 6487 REASON_SET(reason, PFRES_FRAG); 6488 return (PF_DROP); 6489 } 6490 /* jumbo payload packets cannot be fragmented */ 6491 if (pd->jumbolen != 0) { 6492 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6493 REASON_SET(reason, PFRES_FRAG); 6494 return (PF_DROP); 6495 } 6496 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6497 NULL, reason, AF_INET6)) { 6498 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6499 return (PF_DROP); 6500 } 6501 /* stop walking over non initial fragments */ 6502 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 6503 pd->fragoff = pd->off; 6504 return (PF_PASS); 6505 } 6506 /* RFC6946: reassemble only non atomic fragments */ 6507 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 6508 pd->fragoff = pd->off; 6509 pd->off += sizeof(frag); 6510 pd->proto = frag.ip6f_nxt; 6511 break; 6512 case IPPROTO_ROUTING: 6513 if (rthdr_cnt++) { 6514 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6515 REASON_SET(reason, PFRES_IPOPTIONS); 6516 return (PF_DROP); 6517 } 6518 /* fragments may be short */ 6519 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6520 pd->off = pd->fragoff; 6521 pd->proto = IPPROTO_FRAGMENT; 6522 return (PF_PASS); 6523 } 6524 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6525 NULL, reason, AF_INET6)) { 6526 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6527 return (PF_DROP); 6528 } 6529 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6530 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6531 REASON_SET(reason, PFRES_IPOPTIONS); 6532 return (PF_DROP); 6533 } 6534 /* FALLTHROUGH */ 6535 case IPPROTO_HOPOPTS: 6536 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 6537 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 6538 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 6539 REASON_SET(reason, PFRES_IPOPTIONS); 6540 return (PF_DROP); 6541 } 6542 /* FALLTHROUGH */ 6543 case IPPROTO_AH: 6544 case IPPROTO_DSTOPTS: 6545 /* fragments may be short */ 6546 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6547 pd->off = pd->fragoff; 6548 pd->proto = IPPROTO_FRAGMENT; 6549 return (PF_PASS); 6550 } 6551 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6552 NULL, reason, AF_INET6)) { 6553 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6554 return (PF_DROP); 6555 } 6556 /* reassembly needs the ext header before the frag */ 6557 if (pd->fragoff == 0) 6558 pd->extoff = pd->off; 6559 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { 6560 if (pf_walk_option6(pd, h, 6561 pd->off + sizeof(ext), 6562 pd->off + (ext.ip6e_len + 1) * 8, reason) 6563 != PF_PASS) 6564 return (PF_DROP); 6565 if (ntohs(h->ip6_plen) == 0 && 6566 pd->jumbolen != 0) { 6567 DPFPRINTF(LOG_NOTICE, 6568 "IPv6 missing jumbo"); 6569 REASON_SET(reason, PFRES_IPOPTIONS); 6570 return (PF_DROP); 6571 } 6572 } 6573 if (pd->proto == IPPROTO_AH) 6574 pd->off += (ext.ip6e_len + 2) * 4; 6575 else 6576 pd->off += (ext.ip6e_len + 1) * 8; 6577 pd->proto = ext.ip6e_nxt; 6578 break; 6579 case IPPROTO_TCP: 6580 case IPPROTO_UDP: 6581 case IPPROTO_ICMPV6: 6582 /* fragments may be short, ignore inner header then */ 6583 if (pd->fragoff != 0 && end < pd->off + 6584 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6585 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6586 sizeof(struct icmp6_hdr))) { 6587 pd->off = pd->fragoff; 6588 pd->proto = IPPROTO_FRAGMENT; 6589 } 6590 /* FALLTHROUGH */ 6591 default: 6592 return (PF_PASS); 6593 } 6594 } 6595 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 6596 REASON_SET(reason, PFRES_IPOPTIONS); 6597 return (PF_DROP); 6598 } 6599 #endif /* INET6 */ 6600 6601 int 6602 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 6603 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6604 { 6605 memset(pd, 0, sizeof(*pd)); 6606 pd->dir = dir; 6607 pd->kif = kif; /* kif is NULL when called by pflog */ 6608 pd->m = m; 6609 pd->sidx = (dir == PF_IN) ? 0 : 1; 6610 pd->didx = (dir == PF_IN) ? 1 : 0; 6611 pd->af = pd->naf = af; 6612 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 6613 6614 switch (pd->af) { 6615 case AF_INET: { 6616 struct ip *h; 6617 6618 /* Check for illegal packets */ 6619 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6620 REASON_SET(reason, PFRES_SHORT); 6621 return (PF_DROP); 6622 } 6623 6624 h = mtod(pd->m, struct ip *); 6625 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6626 REASON_SET(reason, PFRES_SHORT); 6627 return (PF_DROP); 6628 } 6629 6630 if (pf_walk_header(pd, h, reason) != PF_PASS) 6631 return (PF_DROP); 6632 6633 pd->src = (struct pf_addr *)&h->ip_src; 6634 pd->dst = (struct pf_addr *)&h->ip_dst; 6635 pd->tot_len = ntohs(h->ip_len); 6636 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6637 pd->ttl = h->ip_ttl; 6638 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 6639 PF_VPROTO_FRAGMENT : pd->proto; 6640 6641 break; 6642 } 6643 #ifdef INET6 6644 case AF_INET6: { 6645 struct ip6_hdr *h; 6646 6647 /* Check for illegal packets */ 6648 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6649 REASON_SET(reason, PFRES_SHORT); 6650 return (PF_DROP); 6651 } 6652 6653 h = mtod(pd->m, struct ip6_hdr *); 6654 if (pd->m->m_pkthdr.len < 6655 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6656 REASON_SET(reason, PFRES_SHORT); 6657 return (PF_DROP); 6658 } 6659 6660 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6661 return (PF_DROP); 6662 6663 #if 1 6664 /* 6665 * we do not support jumbogram yet. if we keep going, zero 6666 * ip6_plen will do something bad, so drop the packet for now. 6667 */ 6668 if (pd->jumbolen != 0) { 6669 REASON_SET(reason, PFRES_NORM); 6670 return (PF_DROP); 6671 } 6672 #endif /* 1 */ 6673 6674 pd->src = (struct pf_addr *)&h->ip6_src; 6675 pd->dst = (struct pf_addr *)&h->ip6_dst; 6676 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6677 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 6678 pd->ttl = h->ip6_hlim; 6679 pd->virtual_proto = (pd->fragoff != 0) ? 6680 PF_VPROTO_FRAGMENT : pd->proto; 6681 6682 break; 6683 } 6684 #endif /* INET6 */ 6685 default: 6686 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6687 6688 } 6689 6690 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6691 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6692 6693 switch (pd->virtual_proto) { 6694 case IPPROTO_TCP: { 6695 struct tcphdr *th = &pd->hdr.tcp; 6696 6697 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6698 NULL, reason, pd->af)) 6699 return (PF_DROP); 6700 pd->hdrlen = sizeof(*th); 6701 if (pd->off + (th->th_off << 2) > pd->tot_len || 6702 (th->th_off << 2) < sizeof(struct tcphdr)) { 6703 REASON_SET(reason, PFRES_SHORT); 6704 return (PF_DROP); 6705 } 6706 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6707 pd->sport = &th->th_sport; 6708 pd->dport = &th->th_dport; 6709 pd->pcksum = &th->th_sum; 6710 break; 6711 } 6712 case IPPROTO_UDP: { 6713 struct udphdr *uh = &pd->hdr.udp; 6714 6715 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6716 NULL, reason, pd->af)) 6717 return (PF_DROP); 6718 pd->hdrlen = sizeof(*uh); 6719 if (uh->uh_dport == 0 || 6720 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6721 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6722 REASON_SET(reason, PFRES_SHORT); 6723 return (PF_DROP); 6724 } 6725 pd->sport = &uh->uh_sport; 6726 pd->dport = &uh->uh_dport; 6727 pd->pcksum = &uh->uh_sum; 6728 break; 6729 } 6730 case IPPROTO_ICMP: { 6731 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 6732 NULL, reason, pd->af)) 6733 return (PF_DROP); 6734 pd->hdrlen = ICMP_MINLEN; 6735 if (pd->off + pd->hdrlen > pd->tot_len) { 6736 REASON_SET(reason, PFRES_SHORT); 6737 return (PF_DROP); 6738 } 6739 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 6740 break; 6741 } 6742 #ifdef INET6 6743 case IPPROTO_ICMPV6: { 6744 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6745 6746 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6747 NULL, reason, pd->af)) 6748 return (PF_DROP); 6749 /* ICMP headers we look further into to match state */ 6750 switch (pd->hdr.icmp6.icmp6_type) { 6751 case MLD_LISTENER_QUERY: 6752 case MLD_LISTENER_REPORT: 6753 icmp_hlen = sizeof(struct mld_hdr); 6754 break; 6755 case ND_NEIGHBOR_SOLICIT: 6756 case ND_NEIGHBOR_ADVERT: 6757 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6758 /* FALLTHROUGH */ 6759 case ND_ROUTER_SOLICIT: 6760 case ND_ROUTER_ADVERT: 6761 case ND_REDIRECT: 6762 if (pd->ttl != 255) { 6763 REASON_SET(reason, PFRES_NORM); 6764 return (PF_DROP); 6765 } 6766 break; 6767 } 6768 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6769 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6770 NULL, reason, pd->af)) 6771 return (PF_DROP); 6772 pd->hdrlen = icmp_hlen; 6773 if (pd->off + pd->hdrlen > pd->tot_len) { 6774 REASON_SET(reason, PFRES_SHORT); 6775 return (PF_DROP); 6776 } 6777 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 6778 break; 6779 } 6780 #endif /* INET6 */ 6781 } 6782 6783 if (pd->sport) 6784 pd->osport = pd->nsport = *pd->sport; 6785 if (pd->dport) 6786 pd->odport = pd->ndport = *pd->dport; 6787 6788 return (PF_PASS); 6789 } 6790 6791 void 6792 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6793 struct pf_rule *r, struct pf_rule *a) 6794 { 6795 int dirndx; 6796 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6797 [action != PF_PASS] += pd->tot_len; 6798 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6799 [action != PF_PASS]++; 6800 6801 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6802 dirndx = (pd->dir == PF_OUT); 6803 r->packets[dirndx]++; 6804 r->bytes[dirndx] += pd->tot_len; 6805 if (a != NULL) { 6806 a->packets[dirndx]++; 6807 a->bytes[dirndx] += pd->tot_len; 6808 } 6809 if (s != NULL) { 6810 struct pf_rule_item *ri; 6811 struct pf_sn_item *sni; 6812 6813 SLIST_FOREACH(sni, &s->src_nodes, next) { 6814 sni->sn->packets[dirndx]++; 6815 sni->sn->bytes[dirndx] += pd->tot_len; 6816 } 6817 dirndx = (pd->dir == s->direction) ? 0 : 1; 6818 s->packets[dirndx]++; 6819 s->bytes[dirndx] += pd->tot_len; 6820 6821 SLIST_FOREACH(ri, &s->match_rules, entry) { 6822 ri->r->packets[dirndx]++; 6823 ri->r->bytes[dirndx] += pd->tot_len; 6824 6825 if (ri->r->src.addr.type == PF_ADDR_TABLE) 6826 pfr_update_stats(ri->r->src.addr.p.tbl, 6827 &s->key[(s->direction == PF_IN)]-> 6828 addr[(s->direction == PF_OUT)], 6829 pd, ri->r->action, ri->r->src.neg); 6830 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 6831 pfr_update_stats(ri->r->dst.addr.p.tbl, 6832 &s->key[(s->direction == PF_IN)]-> 6833 addr[(s->direction == PF_IN)], 6834 pd, ri->r->action, ri->r->dst.neg); 6835 } 6836 } 6837 if (r->src.addr.type == PF_ADDR_TABLE) 6838 pfr_update_stats(r->src.addr.p.tbl, 6839 (s == NULL) ? pd->src : 6840 &s->key[(s->direction == PF_IN)]-> 6841 addr[(s->direction == PF_OUT)], 6842 pd, r->action, r->src.neg); 6843 if (r->dst.addr.type == PF_ADDR_TABLE) 6844 pfr_update_stats(r->dst.addr.p.tbl, 6845 (s == NULL) ? pd->dst : 6846 &s->key[(s->direction == PF_IN)]-> 6847 addr[(s->direction == PF_IN)], 6848 pd, r->action, r->dst.neg); 6849 } 6850 } 6851 6852 int 6853 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 6854 { 6855 struct pfi_kif *kif; 6856 u_short action, reason = 0; 6857 struct pf_rule *a = NULL, *r = &pf_default_rule; 6858 struct pf_state *s = NULL; 6859 struct pf_ruleset *ruleset = NULL; 6860 struct pf_pdesc pd; 6861 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6862 u_int32_t qid, pqid = 0; 6863 int have_pf_lock = 0; 6864 6865 if (!pf_status.running) 6866 return (PF_PASS); 6867 6868 #if NCARP > 0 6869 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6870 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6871 else 6872 #endif /* NCARP */ 6873 kif = (struct pfi_kif *)ifp->if_pf_kif; 6874 6875 if (kif == NULL) { 6876 DPFPRINTF(LOG_ERR, 6877 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 6878 return (PF_DROP); 6879 } 6880 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6881 return (PF_PASS); 6882 6883 #ifdef DIAGNOSTIC 6884 if (((*m0)->m_flags & M_PKTHDR) == 0) 6885 panic("non-M_PKTHDR is passed to pf_test"); 6886 #endif /* DIAGNOSTIC */ 6887 6888 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 6889 return (PF_PASS); 6890 6891 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) 6892 return (PF_PASS); 6893 6894 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 6895 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 6896 return (PF_PASS); 6897 } 6898 6899 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 6900 if (action != PF_PASS) { 6901 #if NPFLOG > 0 6902 pd.pflog |= PF_LOG_FORCE; 6903 #endif /* NPFLOG > 0 */ 6904 goto done; 6905 } 6906 6907 /* packet normalization and reassembly */ 6908 switch (pd.af) { 6909 case AF_INET: 6910 action = pf_normalize_ip(&pd, &reason); 6911 break; 6912 #ifdef INET6 6913 case AF_INET6: 6914 action = pf_normalize_ip6(&pd, &reason); 6915 break; 6916 #endif /* INET6 */ 6917 } 6918 *m0 = pd.m; 6919 /* if packet sits in reassembly queue, return without error */ 6920 if (pd.m == NULL) 6921 return PF_PASS; 6922 6923 if (action != PF_PASS) { 6924 #if NPFLOG > 0 6925 pd.pflog |= PF_LOG_FORCE; 6926 #endif /* NPFLOG > 0 */ 6927 goto done; 6928 } 6929 6930 /* if packet has been reassembled, update packet description */ 6931 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 6932 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 6933 if (action != PF_PASS) { 6934 #if NPFLOG > 0 6935 pd.pflog |= PF_LOG_FORCE; 6936 #endif /* NPFLOG > 0 */ 6937 goto done; 6938 } 6939 } 6940 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 6941 6942 /* 6943 * Avoid pcb-lookups from the forwarding path. They should never 6944 * match and would cause MP locking problems. 6945 */ 6946 if (fwdir == PF_FWD) { 6947 pd.lookup.done = -1; 6948 pd.lookup.uid = -1; 6949 pd.lookup.gid = -1; 6950 pd.lookup.pid = NO_PID; 6951 } 6952 6953 switch (pd.virtual_proto) { 6954 6955 case PF_VPROTO_FRAGMENT: { 6956 /* 6957 * handle fragments that aren't reassembled by 6958 * normalization 6959 */ 6960 PF_LOCK(); 6961 have_pf_lock = 1; 6962 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); 6963 s = pf_state_ref(s); 6964 if (action != PF_PASS) 6965 REASON_SET(&reason, PFRES_FRAG); 6966 break; 6967 } 6968 6969 case IPPROTO_ICMP: { 6970 if (pd.af != AF_INET) { 6971 action = PF_DROP; 6972 REASON_SET(&reason, PFRES_NORM); 6973 DPFPRINTF(LOG_NOTICE, 6974 "dropping IPv6 packet with ICMPv4 payload"); 6975 break; 6976 } 6977 PF_STATE_ENTER_READ(); 6978 action = pf_test_state_icmp(&pd, &s, &reason); 6979 s = pf_state_ref(s); 6980 PF_STATE_EXIT_READ(); 6981 if (action == PF_PASS || action == PF_AFRT) { 6982 #if NPFSYNC > 0 6983 pfsync_update_state(s, &have_pf_lock); 6984 #endif /* NPFSYNC > 0 */ 6985 r = s->rule.ptr; 6986 a = s->anchor.ptr; 6987 #if NPFLOG > 0 6988 pd.pflog |= s->log; 6989 #endif /* NPFLOG > 0 */ 6990 } else if (s == NULL) { 6991 PF_LOCK(); 6992 have_pf_lock = 1; 6993 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6994 &reason); 6995 s = pf_state_ref(s); 6996 } 6997 break; 6998 } 6999 7000 #ifdef INET6 7001 case IPPROTO_ICMPV6: { 7002 if (pd.af != AF_INET6) { 7003 action = PF_DROP; 7004 REASON_SET(&reason, PFRES_NORM); 7005 DPFPRINTF(LOG_NOTICE, 7006 "dropping IPv4 packet with ICMPv6 payload"); 7007 break; 7008 } 7009 PF_STATE_ENTER_READ(); 7010 action = pf_test_state_icmp(&pd, &s, &reason); 7011 s = pf_state_ref(s); 7012 PF_STATE_EXIT_READ(); 7013 if (action == PF_PASS || action == PF_AFRT) { 7014 #if NPFSYNC > 0 7015 pfsync_update_state(s, &have_pf_lock); 7016 #endif /* NPFSYNC > 0 */ 7017 r = s->rule.ptr; 7018 a = s->anchor.ptr; 7019 #if NPFLOG > 0 7020 pd.pflog |= s->log; 7021 #endif /* NPFLOG > 0 */ 7022 } else if (s == NULL) { 7023 PF_LOCK(); 7024 have_pf_lock = 1; 7025 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7026 &reason); 7027 s = pf_state_ref(s); 7028 } 7029 break; 7030 } 7031 #endif /* INET6 */ 7032 7033 default: 7034 if (pd.virtual_proto == IPPROTO_TCP) { 7035 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & 7036 (TH_SYN|TH_ACK)) == TH_SYN && 7037 pf_synflood_check(&pd)) { 7038 PF_LOCK(); 7039 have_pf_lock = 1; 7040 pf_syncookie_send(&pd); 7041 action = PF_DROP; 7042 break; 7043 } 7044 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 7045 pqid = 1; 7046 action = pf_normalize_tcp(&pd); 7047 if (action == PF_DROP) 7048 break; 7049 } 7050 PF_STATE_ENTER_READ(); 7051 action = pf_test_state(&pd, &s, &reason, 0); 7052 s = pf_state_ref(s); 7053 PF_STATE_EXIT_READ(); 7054 if (s == NULL && action != PF_PASS && action != PF_AFRT && 7055 pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 7056 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 7057 pf_syncookie_validate(&pd)) { 7058 struct mbuf *msyn; 7059 msyn = pf_syncookie_recreate_syn(&pd); 7060 if (msyn) { 7061 action = pf_test(af, fwdir, ifp, &msyn); 7062 m_freem(msyn); 7063 if (action == PF_PASS || action == PF_AFRT) { 7064 PF_STATE_ENTER_READ(); 7065 pf_test_state(&pd, &s, &reason, 1); 7066 s = pf_state_ref(s); 7067 PF_STATE_EXIT_READ(); 7068 if (s == NULL) 7069 return (PF_DROP); 7070 s->src.seqhi = 7071 ntohl(pd.hdr.tcp.th_ack) - 1; 7072 s->src.seqlo = 7073 ntohl(pd.hdr.tcp.th_seq) - 1; 7074 pf_set_protostate(s, PF_PEER_SRC, 7075 PF_TCPS_PROXY_DST); 7076 PF_LOCK(); 7077 have_pf_lock = 1; 7078 action = pf_synproxy(&pd, &s, &reason); 7079 if (action != PF_PASS) { 7080 PF_UNLOCK(); 7081 pf_state_unref(s); 7082 return (action); 7083 } 7084 } 7085 } else 7086 action = PF_DROP; 7087 } 7088 7089 if (action == PF_PASS || action == PF_AFRT) { 7090 #if NPFSYNC > 0 7091 pfsync_update_state(s, &have_pf_lock); 7092 #endif /* NPFSYNC > 0 */ 7093 r = s->rule.ptr; 7094 a = s->anchor.ptr; 7095 #if NPFLOG > 0 7096 pd.pflog |= s->log; 7097 #endif /* NPFLOG > 0 */ 7098 } else if (s == NULL) { 7099 PF_LOCK(); 7100 have_pf_lock = 1; 7101 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7102 &reason); 7103 s = pf_state_ref(s); 7104 } 7105 7106 if (pd.virtual_proto == IPPROTO_TCP) { 7107 if (s) { 7108 if (s->max_mss) 7109 pf_normalize_mss(&pd, s->max_mss); 7110 } else if (r->max_mss) 7111 pf_normalize_mss(&pd, r->max_mss); 7112 } 7113 7114 break; 7115 } 7116 7117 if (have_pf_lock != 0) 7118 PF_UNLOCK(); 7119 7120 /* 7121 * At the moment, we rely on NET_LOCK() to prevent removal of items 7122 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 7123 * to be refcounted when NET_LOCK() is gone. 7124 */ 7125 7126 done: 7127 if (action != PF_DROP) { 7128 if (s) { 7129 /* The non-state case is handled in pf_test_rule() */ 7130 if (action == PF_PASS && pd.badopts && 7131 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 7132 action = PF_DROP; 7133 REASON_SET(&reason, PFRES_IPOPTIONS); 7134 #if NPFLOG > 0 7135 pd.pflog |= PF_LOG_FORCE; 7136 #endif /* NPFLOG > 0 */ 7137 DPFPRINTF(LOG_NOTICE, "dropping packet with " 7138 "ip/ipv6 options in pf_test()"); 7139 } 7140 7141 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 7142 s->set_tos); 7143 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 7144 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7145 qid = s->pqid; 7146 if (s->state_flags & PFSTATE_SETPRIO) 7147 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 7148 } else { 7149 qid = s->qid; 7150 if (s->state_flags & PFSTATE_SETPRIO) 7151 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 7152 } 7153 pd.m->m_pkthdr.pf.delay = s->delay; 7154 } else { 7155 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 7156 r->set_tos); 7157 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7158 qid = r->pqid; 7159 if (r->scrub_flags & PFSTATE_SETPRIO) 7160 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 7161 } else { 7162 qid = r->qid; 7163 if (r->scrub_flags & PFSTATE_SETPRIO) 7164 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 7165 } 7166 pd.m->m_pkthdr.pf.delay = r->delay; 7167 } 7168 } 7169 7170 if (action == PF_PASS && qid) 7171 pd.m->m_pkthdr.pf.qid = qid; 7172 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) 7173 pf_mbuf_link_state_key(pd.m, s->key[PF_SK_STACK]); 7174 if (pd.dir == PF_OUT && 7175 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 7176 s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) 7177 pf_state_key_link_inpcb(s->key[PF_SK_STACK], 7178 pd.m->m_pkthdr.pf.inp); 7179 7180 if (s && (pd.m->m_pkthdr.csum_flags & M_FLOWID) == 0) 7181 pd.m->m_pkthdr.ph_flowid = bemtoh64(&s->id); 7182 7183 /* 7184 * connections redirected to loopback should not match sockets 7185 * bound specifically to loopback due to security implications, 7186 * see in_pcblookup_listen(). 7187 */ 7188 if (pd.destchg) 7189 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 7190 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 7191 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 7192 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7193 /* We need to redo the route lookup on outgoing routes. */ 7194 if (pd.destchg && pd.dir == PF_OUT) 7195 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 7196 7197 if (pd.dir == PF_IN && action == PF_PASS && 7198 (r->divert.type == PF_DIVERT_TO || 7199 r->divert.type == PF_DIVERT_REPLY)) { 7200 struct pf_divert *divert; 7201 7202 if ((divert = pf_get_divert(pd.m))) { 7203 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7204 divert->addr = r->divert.addr; 7205 divert->port = r->divert.port; 7206 divert->rdomain = pd.rdomain; 7207 divert->type = r->divert.type; 7208 } 7209 } 7210 7211 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 7212 action = PF_DIVERT; 7213 7214 #if NPFLOG > 0 7215 if (pd.pflog) { 7216 struct pf_rule_item *ri; 7217 7218 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 7219 pflog_packet(&pd, reason, r, a, ruleset, NULL); 7220 if (s) { 7221 SLIST_FOREACH(ri, &s->match_rules, entry) 7222 if (ri->r->log & PF_LOG_ALL) 7223 pflog_packet(&pd, reason, ri->r, a, 7224 ruleset, NULL); 7225 } 7226 } 7227 #endif /* NPFLOG > 0 */ 7228 7229 pf_counters_inc(action, &pd, s, r, a); 7230 7231 switch (action) { 7232 case PF_SYNPROXY_DROP: 7233 m_freem(pd.m); 7234 /* FALLTHROUGH */ 7235 case PF_DEFER: 7236 pd.m = NULL; 7237 action = PF_PASS; 7238 break; 7239 case PF_DIVERT: 7240 switch (pd.af) { 7241 case AF_INET: 7242 if (!divert_packet(pd.m, pd.dir, r->divert.port)) 7243 pd.m = NULL; 7244 break; 7245 #ifdef INET6 7246 case AF_INET6: 7247 if (!divert6_packet(pd.m, pd.dir, r->divert.port)) 7248 pd.m = NULL; 7249 break; 7250 #endif /* INET6 */ 7251 } 7252 action = PF_PASS; 7253 break; 7254 #ifdef INET6 7255 case PF_AFRT: 7256 if (pf_translate_af(&pd)) { 7257 action = PF_DROP; 7258 break; 7259 } 7260 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 7261 switch (pd.naf) { 7262 case AF_INET: 7263 if (pd.dir == PF_IN) 7264 ip_forward(pd.m, ifp, NULL, 1); 7265 else 7266 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 7267 break; 7268 case AF_INET6: 7269 if (pd.dir == PF_IN) 7270 ip6_forward(pd.m, NULL, 1); 7271 else 7272 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 7273 break; 7274 } 7275 pd.m = NULL; 7276 action = PF_PASS; 7277 break; 7278 #endif /* INET6 */ 7279 case PF_DROP: 7280 m_freem(pd.m); 7281 pd.m = NULL; 7282 break; 7283 default: 7284 if (r->rt) { 7285 switch (pd.af) { 7286 case AF_INET: 7287 pf_route(&pd, r, s); 7288 break; 7289 #ifdef INET6 7290 case AF_INET6: 7291 pf_route6(&pd, r, s); 7292 break; 7293 #endif /* INET6 */ 7294 } 7295 } 7296 break; 7297 } 7298 7299 #ifdef INET6 7300 /* if reassembled packet passed, create new fragments */ 7301 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 7302 pd.af == AF_INET6) { 7303 struct m_tag *mtag; 7304 7305 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 7306 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 7307 } 7308 #endif /* INET6 */ 7309 if (s && action != PF_DROP) { 7310 if (!s->if_index_in && dir == PF_IN) 7311 s->if_index_in = ifp->if_index; 7312 else if (!s->if_index_out && dir == PF_OUT) 7313 s->if_index_out = ifp->if_index; 7314 } 7315 7316 *m0 = pd.m; 7317 7318 pf_state_unref(s); 7319 7320 return (action); 7321 } 7322 7323 int 7324 pf_ouraddr(struct mbuf *m) 7325 { 7326 struct pf_state_key *sk; 7327 7328 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 7329 return (1); 7330 7331 sk = m->m_pkthdr.pf.statekey; 7332 if (sk != NULL) { 7333 if (sk->inp != NULL) 7334 return (1); 7335 } 7336 7337 return (-1); 7338 } 7339 7340 /* 7341 * must be called whenever any addressing information such as 7342 * address, port, protocol has changed 7343 */ 7344 void 7345 pf_pkt_addr_changed(struct mbuf *m) 7346 { 7347 pf_mbuf_unlink_state_key(m); 7348 pf_mbuf_unlink_inpcb(m); 7349 } 7350 7351 struct inpcb * 7352 pf_inp_lookup(struct mbuf *m) 7353 { 7354 struct inpcb *inp = NULL; 7355 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7356 7357 if (!pf_state_key_isvalid(sk)) 7358 pf_mbuf_unlink_state_key(m); 7359 else 7360 inp = m->m_pkthdr.pf.statekey->inp; 7361 7362 if (inp && inp->inp_pf_sk) 7363 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 7364 7365 return (inp); 7366 } 7367 7368 void 7369 pf_inp_link(struct mbuf *m, struct inpcb *inp) 7370 { 7371 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7372 7373 if (!pf_state_key_isvalid(sk)) { 7374 pf_mbuf_unlink_state_key(m); 7375 return; 7376 } 7377 7378 /* 7379 * we don't need to grab PF-lock here. At worst case we link inp to 7380 * state, which might be just being marked as deleted by another 7381 * thread. 7382 */ 7383 if (inp && !sk->inp && !inp->inp_pf_sk) 7384 pf_state_key_link_inpcb(sk, inp); 7385 7386 /* The statekey has finished finding the inp, it is no longer needed. */ 7387 pf_mbuf_unlink_state_key(m); 7388 } 7389 7390 void 7391 pf_inp_unlink(struct inpcb *inp) 7392 { 7393 pf_inpcb_unlink_state_key(inp); 7394 } 7395 7396 void 7397 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 7398 { 7399 /* Note that sk and skrev may be equal, then we refcount twice. */ 7400 KASSERT(sk->reverse == NULL); 7401 KASSERT(skrev->reverse == NULL); 7402 sk->reverse = pf_state_key_ref(skrev); 7403 skrev->reverse = pf_state_key_ref(sk); 7404 } 7405 7406 #if NPFLOG > 0 7407 void 7408 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 7409 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 7410 { 7411 struct pf_rule_item *ri; 7412 7413 /* if this is the log(matches) rule, packet has been logged already */ 7414 if (rm->log & PF_LOG_MATCHES) 7415 return; 7416 7417 SLIST_FOREACH(ri, matchrules, entry) 7418 if (ri->r->log & PF_LOG_MATCHES) 7419 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 7420 } 7421 #endif /* NPFLOG > 0 */ 7422 7423 struct pf_state_key * 7424 pf_state_key_ref(struct pf_state_key *sk) 7425 { 7426 if (sk != NULL) 7427 PF_REF_TAKE(sk->refcnt); 7428 7429 return (sk); 7430 } 7431 7432 void 7433 pf_state_key_unref(struct pf_state_key *sk) 7434 { 7435 if (PF_REF_RELE(sk->refcnt)) { 7436 /* state key must be removed from tree */ 7437 KASSERT(!pf_state_key_isvalid(sk)); 7438 /* state key must be unlinked from reverse key */ 7439 KASSERT(sk->reverse == NULL); 7440 /* state key must be unlinked from socket */ 7441 KASSERT(sk->inp == NULL); 7442 pool_put(&pf_state_key_pl, sk); 7443 } 7444 } 7445 7446 int 7447 pf_state_key_isvalid(struct pf_state_key *sk) 7448 { 7449 return ((sk != NULL) && (sk->removed == 0)); 7450 } 7451 7452 void 7453 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 7454 { 7455 KASSERT(m->m_pkthdr.pf.statekey == NULL); 7456 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 7457 } 7458 7459 void 7460 pf_mbuf_unlink_state_key(struct mbuf *m) 7461 { 7462 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7463 7464 if (sk != NULL) { 7465 m->m_pkthdr.pf.statekey = NULL; 7466 pf_state_key_unref(sk); 7467 } 7468 } 7469 7470 void 7471 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 7472 { 7473 KASSERT(m->m_pkthdr.pf.inp == NULL); 7474 m->m_pkthdr.pf.inp = in_pcbref(inp); 7475 } 7476 7477 void 7478 pf_mbuf_unlink_inpcb(struct mbuf *m) 7479 { 7480 struct inpcb *inp = m->m_pkthdr.pf.inp; 7481 7482 if (inp != NULL) { 7483 m->m_pkthdr.pf.inp = NULL; 7484 in_pcbunref(inp); 7485 } 7486 } 7487 7488 void 7489 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 7490 { 7491 KASSERT(sk->inp == NULL); 7492 sk->inp = in_pcbref(inp); 7493 KASSERT(inp->inp_pf_sk == NULL); 7494 inp->inp_pf_sk = pf_state_key_ref(sk); 7495 } 7496 7497 void 7498 pf_inpcb_unlink_state_key(struct inpcb *inp) 7499 { 7500 struct pf_state_key *sk = inp->inp_pf_sk; 7501 7502 if (sk != NULL) { 7503 KASSERT(sk->inp == inp); 7504 sk->inp = NULL; 7505 inp->inp_pf_sk = NULL; 7506 pf_state_key_unref(sk); 7507 in_pcbunref(inp); 7508 } 7509 } 7510 7511 void 7512 pf_state_key_unlink_inpcb(struct pf_state_key *sk) 7513 { 7514 struct inpcb *inp = sk->inp; 7515 7516 if (inp != NULL) { 7517 KASSERT(inp->inp_pf_sk == sk); 7518 sk->inp = NULL; 7519 inp->inp_pf_sk = NULL; 7520 pf_state_key_unref(sk); 7521 in_pcbunref(inp); 7522 } 7523 } 7524 7525 void 7526 pf_state_key_unlink_reverse(struct pf_state_key *sk) 7527 { 7528 struct pf_state_key *skrev = sk->reverse; 7529 7530 /* Note that sk and skrev may be equal, then we unref twice. */ 7531 if (skrev != NULL) { 7532 KASSERT(skrev->reverse == sk); 7533 sk->reverse = NULL; 7534 skrev->reverse = NULL; 7535 pf_state_key_unref(skrev); 7536 pf_state_key_unref(sk); 7537 } 7538 } 7539 7540 struct pf_state * 7541 pf_state_ref(struct pf_state *s) 7542 { 7543 if (s != NULL) 7544 PF_REF_TAKE(s->refcnt); 7545 return (s); 7546 } 7547 7548 void 7549 pf_state_unref(struct pf_state *s) 7550 { 7551 if ((s != NULL) && PF_REF_RELE(s->refcnt)) { 7552 /* never inserted or removed */ 7553 #if NPFSYNC > 0 7554 KASSERT((TAILQ_NEXT(s, sync_list) == NULL) || 7555 ((TAILQ_NEXT(s, sync_list) == _Q_INVALID) && 7556 (s->sync_state == PFSYNC_S_NONE))); 7557 #endif /* NPFSYNC */ 7558 KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || 7559 (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); 7560 KASSERT((s->key[PF_SK_WIRE] == NULL) && 7561 (s->key[PF_SK_STACK] == NULL)); 7562 7563 pool_put(&pf_state_pl, s); 7564 } 7565 } 7566 7567 int 7568 pf_delay_pkt(struct mbuf *m, u_int ifidx) 7569 { 7570 struct pf_pktdelay *pdy; 7571 7572 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 7573 m_freem(m); 7574 return (ENOBUFS); 7575 } 7576 pdy->ifidx = ifidx; 7577 pdy->m = m; 7578 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy); 7579 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay); 7580 m->m_pkthdr.pf.delay = 0; 7581 return (0); 7582 } 7583 7584 void 7585 pf_pktenqueue_delayed(void *arg) 7586 { 7587 struct pf_pktdelay *pdy = arg; 7588 struct ifnet *ifp; 7589 7590 ifp = if_get(pdy->ifidx); 7591 if (ifp != NULL) { 7592 if_enqueue(ifp, pdy->m); 7593 if_put(ifp); 7594 } else 7595 m_freem(pdy->m); 7596 7597 pool_put(&pf_pktdelay_pl, pdy); 7598 } 7599