1 /* $OpenBSD: pf.c,v 1.1097 2021/01/04 12:48:27 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/in_pcb.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp_var.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_timer.h> 74 #include <netinet/tcp_var.h> 75 #include <netinet/tcp_fsm.h> 76 #include <netinet/udp.h> 77 #include <netinet/udp_var.h> 78 #include <netinet/ip_divert.h> 79 80 #ifdef INET6 81 #include <netinet6/in6_var.h> 82 #include <netinet/ip6.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet/icmp6.h> 85 #include <netinet6/nd6.h> 86 #include <netinet6/ip6_divert.h> 87 #endif /* INET6 */ 88 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 92 #if NPFLOG > 0 93 #include <net/if_pflog.h> 94 #endif /* NPFLOG > 0 */ 95 96 #if NPFLOW > 0 97 #include <net/if_pflow.h> 98 #endif /* NPFLOW > 0 */ 99 100 #if NPFSYNC > 0 101 #include <net/if_pfsync.h> 102 #endif /* NPFSYNC > 0 */ 103 104 #ifdef DDB 105 #include <machine/db_machdep.h> 106 #include <ddb/db_interface.h> 107 #endif 108 109 /* 110 * Global variables 111 */ 112 struct pf_state_tree pf_statetbl; 113 struct pf_queuehead pf_queues[2]; 114 struct pf_queuehead *pf_queues_active; 115 struct pf_queuehead *pf_queues_inactive; 116 117 struct pf_status pf_status; 118 119 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 120 121 SHA2_CTX pf_tcp_secret_ctx; 122 u_char pf_tcp_secret[16]; 123 int pf_tcp_secret_init; 124 int pf_tcp_iss_off; 125 126 int pf_npurge; 127 struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); 128 struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); 129 130 enum pf_test_status { 131 PF_TEST_FAIL = -1, 132 PF_TEST_OK, 133 PF_TEST_QUICK 134 }; 135 136 struct pf_test_ctx { 137 enum pf_test_status test_status; 138 struct pf_pdesc *pd; 139 struct pf_rule_actions act; 140 u_int8_t icmpcode; 141 u_int8_t icmptype; 142 int icmp_dir; 143 int state_icmp; 144 int tag; 145 u_short reason; 146 struct pf_rule_item *ri; 147 struct pf_src_node *sns[PF_SN_MAX]; 148 struct pf_rule_slist rules; 149 struct pf_rule *nr; 150 struct pf_rule **rm; 151 struct pf_rule *a; 152 struct pf_rule **am; 153 struct pf_ruleset **rsm; 154 struct pf_ruleset *arsm; 155 struct pf_ruleset *aruleset; 156 struct tcphdr *th; 157 int depth; 158 }; 159 160 #define PF_ANCHOR_STACK_MAX 64 161 162 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 163 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 164 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 165 166 void pf_add_threshold(struct pf_threshold *); 167 int pf_check_threshold(struct pf_threshold *); 168 int pf_check_tcp_cksum(struct mbuf *, int, int, 169 sa_family_t); 170 static __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 171 u_int8_t); 172 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 173 const struct pf_addr *, sa_family_t, u_int8_t); 174 int pf_modulate_sack(struct pf_pdesc *, 175 struct pf_state_peer *); 176 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 177 u_int16_t *, u_int16_t *); 178 int pf_change_icmp_af(struct mbuf *, int, 179 struct pf_pdesc *, struct pf_pdesc *, 180 struct pf_addr *, struct pf_addr *, sa_family_t, 181 sa_family_t); 182 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 183 struct pf_addr *); 184 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 185 u_int16_t *, struct pf_addr *, struct pf_addr *, 186 u_int16_t); 187 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 188 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 189 sa_family_t, struct pf_rule *, u_int); 190 void pf_detach_state(struct pf_state *); 191 void pf_state_key_detach(struct pf_state *, int); 192 u_int32_t pf_tcp_iss(struct pf_pdesc *); 193 void pf_rule_to_actions(struct pf_rule *, 194 struct pf_rule_actions *); 195 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 196 struct pf_state **, struct pf_rule **, 197 struct pf_ruleset **, u_short *); 198 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 199 struct pf_rule *, struct pf_rule *, 200 struct pf_state_key **, struct pf_state_key **, 201 int *, struct pf_state **, int, 202 struct pf_rule_slist *, struct pf_rule_actions *, 203 struct pf_src_node *[]); 204 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 205 int, struct pf_addr *, int, struct pf_addr *, 206 int, int); 207 int pf_state_key_setup(struct pf_pdesc *, struct 208 pf_state_key **, struct pf_state_key **, int); 209 int pf_tcp_track_full(struct pf_pdesc *, 210 struct pf_state **, u_short *, int *, int); 211 int pf_tcp_track_sloppy(struct pf_pdesc *, 212 struct pf_state **, u_short *); 213 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 214 u_short *); 215 int pf_test_state(struct pf_pdesc *, struct pf_state **, 216 u_short *, int); 217 int pf_icmp_state_lookup(struct pf_pdesc *, 218 struct pf_state_key_cmp *, struct pf_state **, 219 u_int16_t, u_int16_t, int, int *, int, int); 220 int pf_test_state_icmp(struct pf_pdesc *, 221 struct pf_state **, u_short *); 222 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 223 u_int16_t); 224 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 225 sa_family_t, struct pf_src_node **); 226 struct pf_divert *pf_get_divert(struct mbuf *); 227 int pf_walk_header(struct pf_pdesc *, struct ip *, 228 u_short *); 229 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 230 int, int, u_short *); 231 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 232 u_short *); 233 void pf_print_state_parts(struct pf_state *, 234 struct pf_state_key *, struct pf_state_key *); 235 int pf_addr_wrap_neq(struct pf_addr_wrap *, 236 struct pf_addr_wrap *); 237 int pf_compare_state_keys(struct pf_state_key *, 238 struct pf_state_key *, struct pfi_kif *, u_int); 239 int pf_find_state(struct pf_pdesc *, 240 struct pf_state_key_cmp *, struct pf_state **); 241 int pf_src_connlimit(struct pf_state **); 242 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 243 int pf_step_into_anchor(struct pf_test_ctx *, 244 struct pf_rule *); 245 int pf_match_rule(struct pf_test_ctx *, 246 struct pf_ruleset *); 247 void pf_counters_inc(int, struct pf_pdesc *, 248 struct pf_state *, struct pf_rule *, 249 struct pf_rule *); 250 251 int pf_state_key_isvalid(struct pf_state_key *); 252 struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 253 void pf_state_key_unref(struct pf_state_key *); 254 void pf_state_key_link_reverse(struct pf_state_key *, 255 struct pf_state_key *); 256 void pf_state_key_unlink_reverse(struct pf_state_key *); 257 void pf_state_key_link_inpcb(struct pf_state_key *, 258 struct inpcb *); 259 void pf_state_key_unlink_inpcb(struct pf_state_key *); 260 void pf_inpcb_unlink_state_key(struct inpcb *); 261 void pf_pktenqueue_delayed(void *); 262 263 #if NPFLOG > 0 264 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 265 struct pf_rule *, struct pf_ruleset *, 266 struct pf_rule_slist *); 267 #endif /* NPFLOG > 0 */ 268 269 extern struct pool pfr_ktable_pl; 270 extern struct pool pfr_kentry_pl; 271 272 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 273 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 274 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 275 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 276 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 277 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 278 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS } 279 }; 280 281 #define BOUND_IFACE(r, k) \ 282 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 283 284 #define STATE_INC_COUNTERS(s) \ 285 do { \ 286 struct pf_rule_item *mrm; \ 287 s->rule.ptr->states_cur++; \ 288 s->rule.ptr->states_tot++; \ 289 if (s->anchor.ptr != NULL) { \ 290 s->anchor.ptr->states_cur++; \ 291 s->anchor.ptr->states_tot++; \ 292 } \ 293 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 294 mrm->r->states_cur++; \ 295 } while (0) 296 297 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 298 static __inline int pf_state_compare_key(struct pf_state_key *, 299 struct pf_state_key *); 300 static __inline int pf_state_compare_id(struct pf_state *, 301 struct pf_state *); 302 #ifdef INET6 303 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 304 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 305 #endif /* INET6 */ 306 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 307 308 struct pf_src_tree tree_src_tracking; 309 310 struct pf_state_tree_id tree_id; 311 struct pf_state_queue state_list; 312 313 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 314 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 315 RB_GENERATE(pf_state_tree_id, pf_state, 316 entry_id, pf_state_compare_id); 317 318 SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl = 319 SLIST_HEAD_INITIALIZER(pf_rule_gcl); 320 321 __inline int 322 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 323 { 324 switch (af) { 325 case AF_INET: 326 if (a->addr32[0] > b->addr32[0]) 327 return (1); 328 if (a->addr32[0] < b->addr32[0]) 329 return (-1); 330 break; 331 #ifdef INET6 332 case AF_INET6: 333 if (a->addr32[3] > b->addr32[3]) 334 return (1); 335 if (a->addr32[3] < b->addr32[3]) 336 return (-1); 337 if (a->addr32[2] > b->addr32[2]) 338 return (1); 339 if (a->addr32[2] < b->addr32[2]) 340 return (-1); 341 if (a->addr32[1] > b->addr32[1]) 342 return (1); 343 if (a->addr32[1] < b->addr32[1]) 344 return (-1); 345 if (a->addr32[0] > b->addr32[0]) 346 return (1); 347 if (a->addr32[0] < b->addr32[0]) 348 return (-1); 349 break; 350 #endif /* INET6 */ 351 } 352 return (0); 353 } 354 355 static __inline int 356 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 357 { 358 int diff; 359 360 if (a->rule.ptr > b->rule.ptr) 361 return (1); 362 if (a->rule.ptr < b->rule.ptr) 363 return (-1); 364 if ((diff = a->type - b->type) != 0) 365 return (diff); 366 if ((diff = a->af - b->af) != 0) 367 return (diff); 368 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 369 return (diff); 370 return (0); 371 } 372 373 static __inline void 374 pf_set_protostate(struct pf_state *s, int which, u_int8_t newstate) 375 { 376 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 377 s->dst.state = newstate; 378 if (which == PF_PEER_DST) 379 return; 380 381 if (s->src.state == newstate) 382 return; 383 if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK] != NULL && 384 s->key[PF_SK_STACK]->proto == IPPROTO_TCP && 385 !(TCPS_HAVEESTABLISHED(s->src.state) || 386 s->src.state == TCPS_CLOSED) && 387 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 388 pf_status.states_halfopen--; 389 390 s->src.state = newstate; 391 } 392 393 void 394 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 395 { 396 switch (af) { 397 case AF_INET: 398 dst->addr32[0] = src->addr32[0]; 399 break; 400 #ifdef INET6 401 case AF_INET6: 402 dst->addr32[0] = src->addr32[0]; 403 dst->addr32[1] = src->addr32[1]; 404 dst->addr32[2] = src->addr32[2]; 405 dst->addr32[3] = src->addr32[3]; 406 break; 407 #endif /* INET6 */ 408 default: 409 unhandled_af(af); 410 } 411 } 412 413 void 414 pf_init_threshold(struct pf_threshold *threshold, 415 u_int32_t limit, u_int32_t seconds) 416 { 417 threshold->limit = limit * PF_THRESHOLD_MULT; 418 threshold->seconds = seconds; 419 threshold->count = 0; 420 threshold->last = getuptime(); 421 } 422 423 void 424 pf_add_threshold(struct pf_threshold *threshold) 425 { 426 u_int32_t t = getuptime(), diff = t - threshold->last; 427 428 if (diff >= threshold->seconds) 429 threshold->count = 0; 430 else 431 threshold->count -= threshold->count * diff / 432 threshold->seconds; 433 threshold->count += PF_THRESHOLD_MULT; 434 threshold->last = t; 435 } 436 437 int 438 pf_check_threshold(struct pf_threshold *threshold) 439 { 440 return (threshold->count > threshold->limit); 441 } 442 443 int 444 pf_src_connlimit(struct pf_state **state) 445 { 446 int bad = 0; 447 struct pf_src_node *sn; 448 449 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 450 return (0); 451 452 sn->conn++; 453 (*state)->src.tcp_est = 1; 454 pf_add_threshold(&sn->conn_rate); 455 456 if ((*state)->rule.ptr->max_src_conn && 457 (*state)->rule.ptr->max_src_conn < sn->conn) { 458 pf_status.lcounters[LCNT_SRCCONN]++; 459 bad++; 460 } 461 462 if ((*state)->rule.ptr->max_src_conn_rate.limit && 463 pf_check_threshold(&sn->conn_rate)) { 464 pf_status.lcounters[LCNT_SRCCONNRATE]++; 465 bad++; 466 } 467 468 if (!bad) 469 return (0); 470 471 if ((*state)->rule.ptr->overload_tbl) { 472 struct pfr_addr p; 473 u_int32_t killed = 0; 474 475 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 476 if (pf_status.debug >= LOG_NOTICE) { 477 log(LOG_NOTICE, 478 "pf: pf_src_connlimit: blocking address "); 479 pf_print_host(&sn->addr, 0, 480 (*state)->key[PF_SK_WIRE]->af); 481 } 482 483 memset(&p, 0, sizeof(p)); 484 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 485 switch ((*state)->key[PF_SK_WIRE]->af) { 486 case AF_INET: 487 p.pfra_net = 32; 488 p.pfra_ip4addr = sn->addr.v4; 489 break; 490 #ifdef INET6 491 case AF_INET6: 492 p.pfra_net = 128; 493 p.pfra_ip6addr = sn->addr.v6; 494 break; 495 #endif /* INET6 */ 496 } 497 498 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 499 &p, gettime()); 500 501 /* kill existing states if that's required. */ 502 if ((*state)->rule.ptr->flush) { 503 struct pf_state_key *sk; 504 struct pf_state *st; 505 506 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 507 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 508 sk = st->key[PF_SK_WIRE]; 509 /* 510 * Kill states from this source. (Only those 511 * from the same rule if PF_FLUSH_GLOBAL is not 512 * set) 513 */ 514 if (sk->af == 515 (*state)->key[PF_SK_WIRE]->af && 516 (((*state)->direction == PF_OUT && 517 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 518 ((*state)->direction == PF_IN && 519 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 520 ((*state)->rule.ptr->flush & 521 PF_FLUSH_GLOBAL || 522 (*state)->rule.ptr == st->rule.ptr)) { 523 st->timeout = PFTM_PURGE; 524 pf_set_protostate(st, PF_PEER_BOTH, 525 TCPS_CLOSED); 526 killed++; 527 } 528 } 529 if (pf_status.debug >= LOG_NOTICE) 530 addlog(", %u states killed", killed); 531 } 532 if (pf_status.debug >= LOG_NOTICE) 533 addlog("\n"); 534 } 535 536 /* kill this state */ 537 (*state)->timeout = PFTM_PURGE; 538 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); 539 return (1); 540 } 541 542 int 543 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 544 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 545 struct pf_addr *raddr, struct pfi_kif *kif) 546 { 547 struct pf_src_node k; 548 549 if (*sn == NULL) { 550 k.af = af; 551 k.type = type; 552 pf_addrcpy(&k.addr, src, af); 553 k.rule.ptr = rule; 554 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 555 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 556 } 557 if (*sn == NULL) { 558 if (!rule->max_src_nodes || 559 rule->src_nodes < rule->max_src_nodes) 560 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 561 else 562 pf_status.lcounters[LCNT_SRCNODES]++; 563 if ((*sn) == NULL) 564 return (-1); 565 566 pf_init_threshold(&(*sn)->conn_rate, 567 rule->max_src_conn_rate.limit, 568 rule->max_src_conn_rate.seconds); 569 570 (*sn)->type = type; 571 (*sn)->af = af; 572 (*sn)->rule.ptr = rule; 573 pf_addrcpy(&(*sn)->addr, src, af); 574 if (raddr) 575 pf_addrcpy(&(*sn)->raddr, raddr, af); 576 if (RB_INSERT(pf_src_tree, 577 &tree_src_tracking, *sn) != NULL) { 578 if (pf_status.debug >= LOG_NOTICE) { 579 log(LOG_NOTICE, 580 "pf: src_tree insert failed: "); 581 pf_print_host(&(*sn)->addr, 0, af); 582 addlog("\n"); 583 } 584 pool_put(&pf_src_tree_pl, *sn); 585 return (-1); 586 } 587 (*sn)->creation = getuptime(); 588 (*sn)->rule.ptr->src_nodes++; 589 if (kif != NULL) { 590 (*sn)->kif = kif; 591 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE); 592 } 593 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 594 pf_status.src_nodes++; 595 } else { 596 if (rule->max_src_states && 597 (*sn)->states >= rule->max_src_states) { 598 pf_status.lcounters[LCNT_SRCSTATES]++; 599 return (-1); 600 } 601 } 602 return (0); 603 } 604 605 void 606 pf_remove_src_node(struct pf_src_node *sn) 607 { 608 if (sn->states > 0 || sn->expire > getuptime()) 609 return; 610 611 sn->rule.ptr->src_nodes--; 612 if (sn->rule.ptr->states_cur == 0 && 613 sn->rule.ptr->src_nodes == 0) 614 pf_rm_rule(NULL, sn->rule.ptr); 615 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 616 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 617 pf_status.src_nodes--; 618 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE); 619 pool_put(&pf_src_tree_pl, sn); 620 } 621 622 struct pf_src_node * 623 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 624 { 625 struct pf_sn_item *sni; 626 627 SLIST_FOREACH(sni, &s->src_nodes, next) 628 if (sni->sn->type == type) 629 return (sni->sn); 630 return (NULL); 631 } 632 633 void 634 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 635 { 636 struct pf_sn_item *sni, *snin, *snip = NULL; 637 638 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 639 snin = SLIST_NEXT(sni, next); 640 if (sni->sn == sn) { 641 if (snip) 642 SLIST_REMOVE_AFTER(snip, next); 643 else 644 SLIST_REMOVE_HEAD(&s->src_nodes, next); 645 pool_put(&pf_sn_item_pl, sni); 646 sni = NULL; 647 sn->states--; 648 } 649 if (sni != NULL) 650 snip = sni; 651 } 652 } 653 654 /* state table stuff */ 655 656 static __inline int 657 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 658 { 659 int diff; 660 661 if ((diff = a->proto - b->proto) != 0) 662 return (diff); 663 if ((diff = a->af - b->af) != 0) 664 return (diff); 665 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 666 return (diff); 667 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 668 return (diff); 669 if ((diff = a->port[0] - b->port[0]) != 0) 670 return (diff); 671 if ((diff = a->port[1] - b->port[1]) != 0) 672 return (diff); 673 if ((diff = a->rdomain - b->rdomain) != 0) 674 return (diff); 675 return (0); 676 } 677 678 static __inline int 679 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 680 { 681 if (a->id > b->id) 682 return (1); 683 if (a->id < b->id) 684 return (-1); 685 if (a->creatorid > b->creatorid) 686 return (1); 687 if (a->creatorid < b->creatorid) 688 return (-1); 689 690 return (0); 691 } 692 693 int 694 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 695 { 696 struct pf_state_item *si; 697 struct pf_state_key *cur; 698 struct pf_state *olds = NULL; 699 700 KASSERT(s->key[idx] == NULL); 701 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 702 /* key exists. check for same kif, if none, add to key */ 703 TAILQ_FOREACH(si, &cur->states, entry) 704 if (si->s->kif == s->kif && 705 ((si->s->key[PF_SK_WIRE]->af == sk->af && 706 si->s->direction == s->direction) || 707 (si->s->key[PF_SK_WIRE]->af != 708 si->s->key[PF_SK_STACK]->af && 709 sk->af == si->s->key[PF_SK_STACK]->af && 710 si->s->direction != s->direction))) { 711 int reuse = 0; 712 713 if (sk->proto == IPPROTO_TCP && 714 si->s->src.state >= TCPS_FIN_WAIT_2 && 715 si->s->dst.state >= TCPS_FIN_WAIT_2) 716 reuse = 1; 717 if (pf_status.debug >= LOG_NOTICE) { 718 log(LOG_NOTICE, 719 "pf: %s key attach %s on %s: ", 720 (idx == PF_SK_WIRE) ? 721 "wire" : "stack", 722 reuse ? "reuse" : "failed", 723 s->kif->pfik_name); 724 pf_print_state_parts(s, 725 (idx == PF_SK_WIRE) ? sk : NULL, 726 (idx == PF_SK_STACK) ? sk : NULL); 727 addlog(", existing: "); 728 pf_print_state_parts(si->s, 729 (idx == PF_SK_WIRE) ? sk : NULL, 730 (idx == PF_SK_STACK) ? sk : NULL); 731 addlog("\n"); 732 } 733 if (reuse) { 734 pf_set_protostate(si->s, PF_PEER_BOTH, 735 TCPS_CLOSED); 736 /* remove late or sks can go away */ 737 olds = si->s; 738 } else { 739 pool_put(&pf_state_key_pl, sk); 740 return (-1); /* collision! */ 741 } 742 } 743 pool_put(&pf_state_key_pl, sk); 744 s->key[idx] = cur; 745 } else 746 s->key[idx] = sk; 747 748 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 749 pf_state_key_detach(s, idx); 750 return (-1); 751 } 752 si->s = s; 753 754 /* list is sorted, if-bound states before floating */ 755 if (s->kif == pfi_all) 756 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 757 else 758 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 759 760 if (olds) 761 pf_remove_state(olds); 762 763 return (0); 764 } 765 766 void 767 pf_detach_state(struct pf_state *s) 768 { 769 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 770 s->key[PF_SK_WIRE] = NULL; 771 772 if (s->key[PF_SK_STACK] != NULL) 773 pf_state_key_detach(s, PF_SK_STACK); 774 775 if (s->key[PF_SK_WIRE] != NULL) 776 pf_state_key_detach(s, PF_SK_WIRE); 777 } 778 779 void 780 pf_state_key_detach(struct pf_state *s, int idx) 781 { 782 struct pf_state_item *si; 783 struct pf_state_key *sk; 784 785 if (s->key[idx] == NULL) 786 return; 787 788 si = TAILQ_FIRST(&s->key[idx]->states); 789 while (si && si->s != s) 790 si = TAILQ_NEXT(si, entry); 791 792 if (si) { 793 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 794 pool_put(&pf_state_item_pl, si); 795 } 796 797 sk = s->key[idx]; 798 s->key[idx] = NULL; 799 if (TAILQ_EMPTY(&sk->states)) { 800 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 801 sk->removed = 1; 802 pf_state_key_unlink_reverse(sk); 803 pf_state_key_unlink_inpcb(sk); 804 pf_state_key_unref(sk); 805 } 806 } 807 808 struct pf_state_key * 809 pf_alloc_state_key(int pool_flags) 810 { 811 struct pf_state_key *sk; 812 813 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 814 return (NULL); 815 TAILQ_INIT(&sk->states); 816 817 return (sk); 818 } 819 820 static __inline int 821 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 822 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 823 { 824 struct pf_state_key_cmp *key = arg; 825 #ifdef INET6 826 struct pf_addr *target; 827 828 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 829 goto copy; 830 831 switch (pd->hdr.icmp6.icmp6_type) { 832 case ND_NEIGHBOR_SOLICIT: 833 if (multi) 834 return (-1); 835 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 836 daddr = target; 837 break; 838 case ND_NEIGHBOR_ADVERT: 839 if (multi) 840 return (-1); 841 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 842 saddr = target; 843 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 844 key->addr[didx].addr32[0] = 0; 845 key->addr[didx].addr32[1] = 0; 846 key->addr[didx].addr32[2] = 0; 847 key->addr[didx].addr32[3] = 0; 848 daddr = NULL; /* overwritten */ 849 } 850 break; 851 default: 852 if (multi) { 853 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 854 key->addr[sidx].addr32[1] = 0; 855 key->addr[sidx].addr32[2] = 0; 856 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 857 saddr = NULL; /* overwritten */ 858 } 859 } 860 copy: 861 #endif /* INET6 */ 862 if (saddr) 863 pf_addrcpy(&key->addr[sidx], saddr, af); 864 if (daddr) 865 pf_addrcpy(&key->addr[didx], daddr, af); 866 867 return (0); 868 } 869 870 int 871 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 872 struct pf_state_key **sks, int rtableid) 873 { 874 /* if returning error we MUST pool_put state keys ourselves */ 875 struct pf_state_key *sk1, *sk2; 876 u_int wrdom = pd->rdomain; 877 int afto = pd->af != pd->naf; 878 879 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 880 return (ENOMEM); 881 882 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 883 pd->af, 0); 884 sk1->port[pd->sidx] = pd->osport; 885 sk1->port[pd->didx] = pd->odport; 886 sk1->proto = pd->proto; 887 sk1->af = pd->af; 888 sk1->rdomain = pd->rdomain; 889 PF_REF_INIT(sk1->refcnt); 890 sk1->removed = 0; 891 if (rtableid >= 0) 892 wrdom = rtable_l2(rtableid); 893 894 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 895 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 896 pd->nsport != pd->osport || pd->ndport != pd->odport || 897 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 898 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 899 pool_put(&pf_state_key_pl, sk1); 900 return (ENOMEM); 901 } 902 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 903 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 904 pd->naf, 0); 905 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 906 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 907 if (afto) { 908 switch (pd->proto) { 909 case IPPROTO_ICMP: 910 sk2->proto = IPPROTO_ICMPV6; 911 break; 912 case IPPROTO_ICMPV6: 913 sk2->proto = IPPROTO_ICMP; 914 break; 915 default: 916 sk2->proto = pd->proto; 917 } 918 } else 919 sk2->proto = pd->proto; 920 sk2->af = pd->naf; 921 sk2->rdomain = wrdom; 922 PF_REF_INIT(sk2->refcnt); 923 sk2->removed = 0; 924 } else 925 sk2 = sk1; 926 927 if (pd->dir == PF_IN) { 928 *skw = sk1; 929 *sks = sk2; 930 } else { 931 *sks = sk1; 932 *skw = sk2; 933 } 934 935 if (pf_status.debug >= LOG_DEBUG) { 936 log(LOG_DEBUG, "pf: key setup: "); 937 pf_print_state_parts(NULL, *skw, *sks); 938 addlog("\n"); 939 } 940 941 return (0); 942 } 943 944 int 945 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, 946 struct pf_state_key **sks, struct pf_state *s) 947 { 948 PF_ASSERT_LOCKED(); 949 950 s->kif = kif; 951 PF_STATE_ENTER_WRITE(); 952 if (*skw == *sks) { 953 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 954 PF_STATE_EXIT_WRITE(); 955 return (-1); 956 } 957 *skw = *sks = s->key[PF_SK_WIRE]; 958 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 959 } else { 960 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 961 pool_put(&pf_state_key_pl, *sks); 962 PF_STATE_EXIT_WRITE(); 963 return (-1); 964 } 965 *skw = s->key[PF_SK_WIRE]; 966 if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { 967 pf_state_key_detach(s, PF_SK_WIRE); 968 PF_STATE_EXIT_WRITE(); 969 return (-1); 970 } 971 *sks = s->key[PF_SK_STACK]; 972 } 973 974 if (s->id == 0 && s->creatorid == 0) { 975 s->id = htobe64(pf_status.stateid++); 976 s->creatorid = pf_status.hostid; 977 } 978 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 979 if (pf_status.debug >= LOG_NOTICE) { 980 log(LOG_NOTICE, "pf: state insert failed: " 981 "id: %016llx creatorid: %08x", 982 betoh64(s->id), ntohl(s->creatorid)); 983 addlog("\n"); 984 } 985 pf_detach_state(s); 986 PF_STATE_EXIT_WRITE(); 987 return (-1); 988 } 989 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 990 pf_status.fcounters[FCNT_STATE_INSERT]++; 991 pf_status.states++; 992 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 993 PF_STATE_EXIT_WRITE(); 994 #if NPFSYNC > 0 995 pfsync_insert_state(s); 996 #endif /* NPFSYNC > 0 */ 997 return (0); 998 } 999 1000 struct pf_state * 1001 pf_find_state_byid(struct pf_state_cmp *key) 1002 { 1003 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1004 1005 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 1006 } 1007 1008 int 1009 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 1010 struct pfi_kif *kif, u_int dir) 1011 { 1012 /* a (from hdr) and b (new) must be exact opposites of each other */ 1013 if (a->af == b->af && a->proto == b->proto && 1014 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1015 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1016 a->port[0] == b->port[1] && 1017 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1018 return (0); 1019 else { 1020 /* mismatch. must not happen. */ 1021 if (pf_status.debug >= LOG_ERR) { 1022 log(LOG_ERR, 1023 "pf: state key linking mismatch! dir=%s, " 1024 "if=%s, stored af=%u, a0: ", 1025 dir == PF_OUT ? "OUT" : "IN", 1026 kif->pfik_name, a->af); 1027 pf_print_host(&a->addr[0], a->port[0], a->af); 1028 addlog(", a1: "); 1029 pf_print_host(&a->addr[1], a->port[1], a->af); 1030 addlog(", proto=%u", a->proto); 1031 addlog(", found af=%u, a0: ", b->af); 1032 pf_print_host(&b->addr[0], b->port[0], b->af); 1033 addlog(", a1: "); 1034 pf_print_host(&b->addr[1], b->port[1], b->af); 1035 addlog(", proto=%u", b->proto); 1036 addlog("\n"); 1037 } 1038 return (-1); 1039 } 1040 } 1041 1042 int 1043 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1044 struct pf_state **state) 1045 { 1046 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1047 struct pf_state_item *si; 1048 struct pf_state *s = NULL; 1049 1050 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1051 if (pf_status.debug >= LOG_DEBUG) { 1052 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1053 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1054 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1055 addlog("\n"); 1056 } 1057 1058 inp_sk = NULL; 1059 pkt_sk = NULL; 1060 sk = NULL; 1061 if (pd->dir == PF_OUT) { 1062 /* first if block deals with outbound forwarded packet */ 1063 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1064 1065 if (!pf_state_key_isvalid(pkt_sk)) { 1066 pf_mbuf_unlink_state_key(pd->m); 1067 pkt_sk = NULL; 1068 } 1069 1070 if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse)) 1071 sk = pkt_sk->reverse; 1072 1073 if (pkt_sk == NULL) { 1074 /* here we deal with local outbound packet */ 1075 if (pd->m->m_pkthdr.pf.inp != NULL) { 1076 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk; 1077 if (pf_state_key_isvalid(inp_sk)) 1078 sk = inp_sk; 1079 else 1080 pf_inpcb_unlink_state_key( 1081 pd->m->m_pkthdr.pf.inp); 1082 } 1083 } 1084 } 1085 1086 if (sk == NULL) { 1087 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1088 (struct pf_state_key *)key)) == NULL) 1089 return (PF_DROP); 1090 if (pd->dir == PF_OUT && pkt_sk && 1091 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1092 pf_state_key_link_reverse(sk, pkt_sk); 1093 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp && 1094 !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->inp) 1095 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1096 } 1097 1098 /* remove firewall data from outbound packet */ 1099 if (pd->dir == PF_OUT) 1100 pf_pkt_addr_changed(pd->m); 1101 1102 /* list is sorted, if-bound states before floating ones */ 1103 TAILQ_FOREACH(si, &sk->states, entry) 1104 if ((si->s->kif == pfi_all || si->s->kif == pd->kif) && 1105 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1106 && sk == (pd->dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1107 si->s->key[PF_SK_STACK])) || 1108 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1109 && pd->dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1110 sk == si->s->key[PF_SK_WIRE])))) { 1111 s = si->s; 1112 break; 1113 } 1114 1115 if (s == NULL || s->timeout == PFTM_PURGE) 1116 return (PF_DROP); 1117 1118 if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) { 1119 pf_add_threshold(&s->rule.ptr->pktrate); 1120 if (pf_check_threshold(&s->rule.ptr->pktrate)) 1121 return (PF_DROP); 1122 } 1123 1124 *state = s; 1125 if (pd->dir == PF_OUT && s->rt_kif != NULL && s->rt_kif != pd->kif && 1126 ((s->rule.ptr->rt == PF_ROUTETO && 1127 s->rule.ptr->direction == PF_OUT) || 1128 (s->rule.ptr->rt == PF_REPLYTO && 1129 s->rule.ptr->direction == PF_IN))) 1130 return (PF_PASS); 1131 1132 return (PF_MATCH); 1133 } 1134 1135 struct pf_state * 1136 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1137 { 1138 struct pf_state_key *sk; 1139 struct pf_state_item *si, *ret = NULL; 1140 1141 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1142 1143 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1144 1145 if (sk != NULL) { 1146 TAILQ_FOREACH(si, &sk->states, entry) 1147 if (dir == PF_INOUT || 1148 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1149 si->s->key[PF_SK_STACK]))) { 1150 if (more == NULL) 1151 return (si->s); 1152 1153 if (ret) 1154 (*more)++; 1155 else 1156 ret = si; 1157 } 1158 } 1159 return (ret ? ret->s : NULL); 1160 } 1161 1162 void 1163 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1164 { 1165 int32_t expire; 1166 1167 memset(sp, 0, sizeof(struct pfsync_state)); 1168 1169 /* copy from state key */ 1170 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1171 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1172 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1173 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1174 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1175 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1176 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1177 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1178 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1179 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1180 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1181 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1182 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1183 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1184 sp->proto = st->key[PF_SK_WIRE]->proto; 1185 sp->af = st->key[PF_SK_WIRE]->af; 1186 1187 /* copy from state */ 1188 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1189 sp->rt_addr = st->rt_addr; 1190 sp->creation = htonl(getuptime() - st->creation); 1191 expire = pf_state_expires(st); 1192 if (expire <= getuptime()) 1193 sp->expire = htonl(0); 1194 else 1195 sp->expire = htonl(expire - getuptime()); 1196 1197 sp->direction = st->direction; 1198 #if NPFLOG > 0 1199 sp->log = st->log; 1200 #endif /* NPFLOG > 0 */ 1201 sp->timeout = st->timeout; 1202 sp->state_flags = htons(st->state_flags); 1203 if (!SLIST_EMPTY(&st->src_nodes)) 1204 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1205 1206 sp->id = st->id; 1207 sp->creatorid = st->creatorid; 1208 pf_state_peer_hton(&st->src, &sp->src); 1209 pf_state_peer_hton(&st->dst, &sp->dst); 1210 1211 if (st->rule.ptr == NULL) 1212 sp->rule = htonl(-1); 1213 else 1214 sp->rule = htonl(st->rule.ptr->nr); 1215 if (st->anchor.ptr == NULL) 1216 sp->anchor = htonl(-1); 1217 else 1218 sp->anchor = htonl(st->anchor.ptr->nr); 1219 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1220 1221 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1222 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1223 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1224 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1225 1226 sp->max_mss = htons(st->max_mss); 1227 sp->min_ttl = st->min_ttl; 1228 sp->set_tos = st->set_tos; 1229 sp->set_prio[0] = st->set_prio[0]; 1230 sp->set_prio[1] = st->set_prio[1]; 1231 } 1232 1233 /* END state table stuff */ 1234 1235 void 1236 pf_purge_expired_rules(void) 1237 { 1238 struct pf_rule *r; 1239 1240 PF_ASSERT_LOCKED(); 1241 1242 if (SLIST_EMPTY(&pf_rule_gcl)) 1243 return; 1244 1245 while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { 1246 SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); 1247 KASSERT(r->rule_flag & PFRULE_EXPIRED); 1248 pf_purge_rule(r); 1249 } 1250 } 1251 1252 void 1253 pf_purge_timeout(void *unused) 1254 { 1255 task_add(net_tq(0), &pf_purge_task); 1256 } 1257 1258 void 1259 pf_purge(void *xnloops) 1260 { 1261 int *nloops = xnloops; 1262 1263 KERNEL_LOCK(); 1264 NET_LOCK(); 1265 1266 /* 1267 * process a fraction of the state table every second 1268 * Note: 1269 * we no longer need PF_LOCK() here, because 1270 * pf_purge_expired_states() uses pf_state_lock to maintain 1271 * consistency. 1272 */ 1273 pf_purge_expired_states(1 + (pf_status.states 1274 / pf_default_rule.timeout[PFTM_INTERVAL])); 1275 1276 PF_LOCK(); 1277 /* purge other expired types every PFTM_INTERVAL seconds */ 1278 if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1279 pf_purge_expired_src_nodes(); 1280 pf_purge_expired_rules(); 1281 } 1282 PF_UNLOCK(); 1283 1284 /* 1285 * Fragments don't require PF_LOCK(), they use their own lock. 1286 */ 1287 if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1288 pf_purge_expired_fragments(); 1289 *nloops = 0; 1290 } 1291 NET_UNLOCK(); 1292 KERNEL_UNLOCK(); 1293 1294 timeout_add_sec(&pf_purge_to, 1); 1295 } 1296 1297 int32_t 1298 pf_state_expires(const struct pf_state *state) 1299 { 1300 u_int32_t timeout; 1301 u_int32_t start; 1302 u_int32_t end; 1303 u_int32_t states; 1304 1305 /* handle all PFTM_* > PFTM_MAX here */ 1306 if (state->timeout == PFTM_PURGE) 1307 return (0); 1308 1309 KASSERT(state->timeout != PFTM_UNLINKED); 1310 KASSERT(state->timeout < PFTM_MAX); 1311 1312 timeout = state->rule.ptr->timeout[state->timeout]; 1313 if (!timeout) 1314 timeout = pf_default_rule.timeout[state->timeout]; 1315 1316 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1317 if (start) { 1318 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1319 states = state->rule.ptr->states_cur; 1320 } else { 1321 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1322 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1323 states = pf_status.states; 1324 } 1325 if (end && states > start && start < end) { 1326 if (states >= end) 1327 return (0); 1328 1329 timeout = (u_int64_t)timeout * (end - states) / (end - start); 1330 } 1331 1332 return (state->expire + timeout); 1333 } 1334 1335 void 1336 pf_purge_expired_src_nodes(void) 1337 { 1338 struct pf_src_node *cur, *next; 1339 1340 PF_ASSERT_LOCKED(); 1341 1342 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1343 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1344 1345 if (cur->states == 0 && cur->expire <= getuptime()) { 1346 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1347 pf_remove_src_node(cur); 1348 } 1349 } 1350 } 1351 1352 void 1353 pf_src_tree_remove_state(struct pf_state *s) 1354 { 1355 u_int32_t timeout; 1356 struct pf_sn_item *sni; 1357 1358 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1359 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1360 if (s->src.tcp_est) 1361 --sni->sn->conn; 1362 if (--sni->sn->states == 0) { 1363 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1364 if (!timeout) 1365 timeout = 1366 pf_default_rule.timeout[PFTM_SRC_NODE]; 1367 sni->sn->expire = getuptime() + timeout; 1368 } 1369 pool_put(&pf_sn_item_pl, sni); 1370 } 1371 } 1372 1373 void 1374 pf_remove_state(struct pf_state *cur) 1375 { 1376 PF_ASSERT_LOCKED(); 1377 1378 /* handle load balancing related tasks */ 1379 pf_postprocess_addr(cur); 1380 1381 if (cur->src.state == PF_TCPS_PROXY_DST) { 1382 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1383 &cur->key[PF_SK_WIRE]->addr[1], 1384 &cur->key[PF_SK_WIRE]->addr[0], 1385 cur->key[PF_SK_WIRE]->port[1], 1386 cur->key[PF_SK_WIRE]->port[0], 1387 cur->src.seqhi, cur->src.seqlo + 1, 1388 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1389 cur->key[PF_SK_WIRE]->rdomain); 1390 } 1391 if (cur->key[PF_SK_STACK]->proto == IPPROTO_TCP) 1392 pf_set_protostate(cur, PF_PEER_BOTH, TCPS_CLOSED); 1393 1394 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1395 #if NPFLOW > 0 1396 if (cur->state_flags & PFSTATE_PFLOW) 1397 export_pflow(cur); 1398 #endif /* NPFLOW > 0 */ 1399 #if NPFSYNC > 0 1400 pfsync_delete_state(cur); 1401 #endif /* NPFSYNC > 0 */ 1402 cur->timeout = PFTM_UNLINKED; 1403 pf_src_tree_remove_state(cur); 1404 pf_detach_state(cur); 1405 } 1406 1407 void 1408 pf_remove_divert_state(struct pf_state_key *sk) 1409 { 1410 struct pf_state_item *si; 1411 1412 TAILQ_FOREACH(si, &sk->states, entry) { 1413 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 1414 (si->s->rule.ptr->divert.type == PF_DIVERT_TO || 1415 si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 1416 pf_remove_state(si->s); 1417 break; 1418 } 1419 } 1420 } 1421 1422 void 1423 pf_free_state(struct pf_state *cur) 1424 { 1425 struct pf_rule_item *ri; 1426 1427 PF_ASSERT_LOCKED(); 1428 1429 #if NPFSYNC > 0 1430 if (pfsync_state_in_use(cur)) 1431 return; 1432 #endif /* NPFSYNC > 0 */ 1433 KASSERT(cur->timeout == PFTM_UNLINKED); 1434 if (--cur->rule.ptr->states_cur == 0 && 1435 cur->rule.ptr->src_nodes == 0) 1436 pf_rm_rule(NULL, cur->rule.ptr); 1437 if (cur->anchor.ptr != NULL) 1438 if (--cur->anchor.ptr->states_cur == 0) 1439 pf_rm_rule(NULL, cur->anchor.ptr); 1440 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1441 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1442 if (--ri->r->states_cur == 0 && 1443 ri->r->src_nodes == 0) 1444 pf_rm_rule(NULL, ri->r); 1445 pool_put(&pf_rule_item_pl, ri); 1446 } 1447 pf_normalize_tcp_cleanup(cur); 1448 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1449 TAILQ_REMOVE(&state_list, cur, entry_list); 1450 if (cur->tag) 1451 pf_tag_unref(cur->tag); 1452 pf_state_unref(cur); 1453 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1454 pf_status.states--; 1455 } 1456 1457 void 1458 pf_purge_expired_states(u_int32_t maxcheck) 1459 { 1460 static struct pf_state *cur = NULL; 1461 struct pf_state *next; 1462 SLIST_HEAD(pf_state_gcl, pf_state) gcl; 1463 1464 PF_ASSERT_UNLOCKED(); 1465 SLIST_INIT(&gcl); 1466 1467 PF_STATE_ENTER_READ(); 1468 while (maxcheck--) { 1469 /* wrap to start of list when we hit the end */ 1470 if (cur == NULL) { 1471 cur = pf_state_ref(TAILQ_FIRST(&state_list)); 1472 if (cur == NULL) 1473 break; /* list empty */ 1474 } 1475 1476 /* get next state, as cur may get deleted */ 1477 next = TAILQ_NEXT(cur, entry_list); 1478 1479 if ((cur->timeout == PFTM_UNLINKED) || 1480 (pf_state_expires(cur) <= getuptime())) 1481 SLIST_INSERT_HEAD(&gcl, cur, gc_list); 1482 else 1483 pf_state_unref(cur); 1484 1485 cur = pf_state_ref(next); 1486 1487 if (cur == NULL) 1488 break; 1489 } 1490 PF_STATE_EXIT_READ(); 1491 1492 PF_LOCK(); 1493 PF_STATE_ENTER_WRITE(); 1494 while ((next = SLIST_FIRST(&gcl)) != NULL) { 1495 SLIST_REMOVE_HEAD(&gcl, gc_list); 1496 if (next->timeout == PFTM_UNLINKED) 1497 pf_free_state(next); 1498 else { 1499 pf_remove_state(next); 1500 pf_free_state(next); 1501 } 1502 1503 pf_state_unref(next); 1504 } 1505 PF_STATE_EXIT_WRITE(); 1506 PF_UNLOCK(); 1507 } 1508 1509 int 1510 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1511 { 1512 if (aw->type != PF_ADDR_TABLE) 1513 return (0); 1514 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1515 return (1); 1516 return (0); 1517 } 1518 1519 void 1520 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1521 { 1522 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1523 return; 1524 pfr_detach_table(aw->p.tbl); 1525 aw->p.tbl = NULL; 1526 } 1527 1528 void 1529 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1530 { 1531 struct pfr_ktable *kt = aw->p.tbl; 1532 1533 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1534 return; 1535 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1536 kt = kt->pfrkt_root; 1537 aw->p.tbl = NULL; 1538 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1539 kt->pfrkt_cnt : -1; 1540 } 1541 1542 void 1543 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1544 { 1545 switch (af) { 1546 case AF_INET: { 1547 u_int32_t a = ntohl(addr->addr32[0]); 1548 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1549 (a>>8)&255, a&255); 1550 if (p) { 1551 p = ntohs(p); 1552 addlog(":%u", p); 1553 } 1554 break; 1555 } 1556 #ifdef INET6 1557 case AF_INET6: { 1558 u_int16_t b; 1559 u_int8_t i, curstart, curend, maxstart, maxend; 1560 curstart = curend = maxstart = maxend = 255; 1561 for (i = 0; i < 8; i++) { 1562 if (!addr->addr16[i]) { 1563 if (curstart == 255) 1564 curstart = i; 1565 curend = i; 1566 } else { 1567 if ((curend - curstart) > 1568 (maxend - maxstart)) { 1569 maxstart = curstart; 1570 maxend = curend; 1571 } 1572 curstart = curend = 255; 1573 } 1574 } 1575 if ((curend - curstart) > 1576 (maxend - maxstart)) { 1577 maxstart = curstart; 1578 maxend = curend; 1579 } 1580 for (i = 0; i < 8; i++) { 1581 if (i >= maxstart && i <= maxend) { 1582 if (i == 0) 1583 addlog(":"); 1584 if (i == maxend) 1585 addlog(":"); 1586 } else { 1587 b = ntohs(addr->addr16[i]); 1588 addlog("%x", b); 1589 if (i < 7) 1590 addlog(":"); 1591 } 1592 } 1593 if (p) { 1594 p = ntohs(p); 1595 addlog("[%u]", p); 1596 } 1597 break; 1598 } 1599 #endif /* INET6 */ 1600 } 1601 } 1602 1603 void 1604 pf_print_state(struct pf_state *s) 1605 { 1606 pf_print_state_parts(s, NULL, NULL); 1607 } 1608 1609 void 1610 pf_print_state_parts(struct pf_state *s, 1611 struct pf_state_key *skwp, struct pf_state_key *sksp) 1612 { 1613 struct pf_state_key *skw, *sks; 1614 u_int8_t proto, dir; 1615 1616 /* Do our best to fill these, but they're skipped if NULL */ 1617 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1618 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1619 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1620 dir = s ? s->direction : 0; 1621 1622 switch (proto) { 1623 case IPPROTO_IPV4: 1624 addlog("IPv4"); 1625 break; 1626 case IPPROTO_IPV6: 1627 addlog("IPv6"); 1628 break; 1629 case IPPROTO_TCP: 1630 addlog("TCP"); 1631 break; 1632 case IPPROTO_UDP: 1633 addlog("UDP"); 1634 break; 1635 case IPPROTO_ICMP: 1636 addlog("ICMP"); 1637 break; 1638 case IPPROTO_ICMPV6: 1639 addlog("ICMPv6"); 1640 break; 1641 default: 1642 addlog("%u", proto); 1643 break; 1644 } 1645 switch (dir) { 1646 case PF_IN: 1647 addlog(" in"); 1648 break; 1649 case PF_OUT: 1650 addlog(" out"); 1651 break; 1652 } 1653 if (skw) { 1654 addlog(" wire: (%d) ", skw->rdomain); 1655 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1656 addlog(" "); 1657 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1658 } 1659 if (sks) { 1660 addlog(" stack: (%d) ", sks->rdomain); 1661 if (sks != skw) { 1662 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1663 addlog(" "); 1664 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1665 } else 1666 addlog("-"); 1667 } 1668 if (s) { 1669 if (proto == IPPROTO_TCP) { 1670 addlog(" [lo=%u high=%u win=%u modulator=%u", 1671 s->src.seqlo, s->src.seqhi, 1672 s->src.max_win, s->src.seqdiff); 1673 if (s->src.wscale && s->dst.wscale) 1674 addlog(" wscale=%u", 1675 s->src.wscale & PF_WSCALE_MASK); 1676 addlog("]"); 1677 addlog(" [lo=%u high=%u win=%u modulator=%u", 1678 s->dst.seqlo, s->dst.seqhi, 1679 s->dst.max_win, s->dst.seqdiff); 1680 if (s->src.wscale && s->dst.wscale) 1681 addlog(" wscale=%u", 1682 s->dst.wscale & PF_WSCALE_MASK); 1683 addlog("]"); 1684 } 1685 addlog(" %u:%u", s->src.state, s->dst.state); 1686 if (s->rule.ptr) 1687 addlog(" @%d", s->rule.ptr->nr); 1688 } 1689 } 1690 1691 void 1692 pf_print_flags(u_int8_t f) 1693 { 1694 if (f) 1695 addlog(" "); 1696 if (f & TH_FIN) 1697 addlog("F"); 1698 if (f & TH_SYN) 1699 addlog("S"); 1700 if (f & TH_RST) 1701 addlog("R"); 1702 if (f & TH_PUSH) 1703 addlog("P"); 1704 if (f & TH_ACK) 1705 addlog("A"); 1706 if (f & TH_URG) 1707 addlog("U"); 1708 if (f & TH_ECE) 1709 addlog("E"); 1710 if (f & TH_CWR) 1711 addlog("W"); 1712 } 1713 1714 #define PF_SET_SKIP_STEPS(i) \ 1715 do { \ 1716 while (head[i] != cur) { \ 1717 head[i]->skip[i].ptr = cur; \ 1718 head[i] = TAILQ_NEXT(head[i], entries); \ 1719 } \ 1720 } while (0) 1721 1722 void 1723 pf_calc_skip_steps(struct pf_rulequeue *rules) 1724 { 1725 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1726 int i; 1727 1728 cur = TAILQ_FIRST(rules); 1729 prev = cur; 1730 for (i = 0; i < PF_SKIP_COUNT; ++i) 1731 head[i] = cur; 1732 while (cur != NULL) { 1733 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1734 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1735 if (cur->direction != prev->direction) 1736 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1737 if (cur->onrdomain != prev->onrdomain || 1738 cur->ifnot != prev->ifnot) 1739 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1740 if (cur->af != prev->af) 1741 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1742 if (cur->proto != prev->proto) 1743 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1744 if (cur->src.neg != prev->src.neg || 1745 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1746 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1747 if (cur->dst.neg != prev->dst.neg || 1748 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1749 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1750 if (cur->src.port[0] != prev->src.port[0] || 1751 cur->src.port[1] != prev->src.port[1] || 1752 cur->src.port_op != prev->src.port_op) 1753 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1754 if (cur->dst.port[0] != prev->dst.port[0] || 1755 cur->dst.port[1] != prev->dst.port[1] || 1756 cur->dst.port_op != prev->dst.port_op) 1757 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1758 1759 prev = cur; 1760 cur = TAILQ_NEXT(cur, entries); 1761 } 1762 for (i = 0; i < PF_SKIP_COUNT; ++i) 1763 PF_SET_SKIP_STEPS(i); 1764 } 1765 1766 int 1767 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1768 { 1769 if (aw1->type != aw2->type) 1770 return (1); 1771 switch (aw1->type) { 1772 case PF_ADDR_ADDRMASK: 1773 case PF_ADDR_RANGE: 1774 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1775 return (1); 1776 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1777 return (1); 1778 return (0); 1779 case PF_ADDR_DYNIFTL: 1780 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1781 case PF_ADDR_NONE: 1782 case PF_ADDR_NOROUTE: 1783 case PF_ADDR_URPFFAILED: 1784 return (0); 1785 case PF_ADDR_TABLE: 1786 return (aw1->p.tbl != aw2->p.tbl); 1787 case PF_ADDR_RTLABEL: 1788 return (aw1->v.rtlabel != aw2->v.rtlabel); 1789 default: 1790 addlog("invalid address type: %d\n", aw1->type); 1791 return (1); 1792 } 1793 } 1794 1795 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 1796 * emulate at most one ones-complement subtraction. This thereby limits net 1797 * carries/borrows to at most one, eliminating a reduction step and saving one 1798 * each of +, >>, & and ~. 1799 * 1800 * def. x mod y = x - (x//y)*y for integer x,y 1801 * def. sum = x mod 2^16 1802 * def. accumulator = (x >> 16) mod 2^16 1803 * 1804 * The trick works as follows: subtracting exactly one u_int16_t from the 1805 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 1806 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 1807 * ones-complement borrow: 1808 * 1809 * (sum + accumulator) mod 2^16 1810 * = { assume underflow: accumulator := 2^16 - 1 } 1811 * (sum + 2^16 - 1) mod 2^16 1812 * = { mod } 1813 * (sum - 1) mod 2^16 1814 * 1815 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 1816 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 1817 * to zero as that requires subtraction of at least 2^16, which exceeds a 1818 * single u_int16_t's range. 1819 * 1820 * We use the following theorem to derive the implementation: 1821 * 1822 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 1823 * proof. 1824 * (x + (y mod z)) mod z 1825 * = { def mod } 1826 * (x + y - (y//z)*z) mod z 1827 * = { (a + b*c) mod c = a mod c } 1828 * (x + y) mod z [end of proof] 1829 * 1830 * ... and thereby obtain: 1831 * 1832 * (sum + accumulator) mod 2^16 1833 * = { def. accumulator, def. sum } 1834 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 1835 * = { (0), twice } 1836 * (x + (x >> 16)) mod 2^16 1837 * = { x mod 2^n = x & (2^n - 1) } 1838 * (x + (x >> 16)) & 0xffff 1839 * 1840 * Note: this serves also as a reduction step for at most one add (as the 1841 * trailing mod 2^16 prevents further reductions by destroying carries). 1842 */ 1843 static __inline void 1844 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 1845 u_int8_t proto) 1846 { 1847 u_int32_t x; 1848 const int udp = proto == IPPROTO_UDP; 1849 1850 x = *cksum + was - now; 1851 x = (x + (x >> 16)) & 0xffff; 1852 1853 /* optimise: eliminate a branch when not udp */ 1854 if (udp && *cksum == 0x0000) 1855 return; 1856 if (udp && x == 0x0000) 1857 x = 0xffff; 1858 1859 *cksum = (u_int16_t)(x); 1860 } 1861 1862 #ifdef INET6 1863 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 1864 static __inline void 1865 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 1866 { 1867 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 1868 } 1869 1870 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 1871 static __inline void 1872 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 1873 { 1874 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 1875 } 1876 #endif /* INET6 */ 1877 1878 /* pre: *a is 16-bit aligned within its packet 1879 * 1880 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 1881 * machine by conserving ones-complement's otherwise discarded carries in the 1882 * upper bits of x. These accumulated carries when added to the lower 16-bits 1883 * over at least zero 'reduction' steps then complete the ones-complement sum. 1884 * 1885 * def. sum = x mod 2^16 1886 * def. accumulator = (x >> 16) 1887 * 1888 * At most two reduction steps 1889 * 1890 * x := sum + accumulator 1891 * = { def sum, def accumulator } 1892 * x := x mod 2^16 + (x >> 16) 1893 * = { x mod 2^n = x & (2^n - 1) } 1894 * x := (x & 0xffff) + (x >> 16) 1895 * 1896 * are necessary to incorporate the accumulated carries (at most one per add) 1897 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 1898 * 1899 * The function is also invariant over the endian of the host. Why? 1900 * 1901 * Define the unary transpose operator ~ on a bitstring in python slice 1902 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 1903 * 1904 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 1905 * 1906 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 1907 * 1908 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 1909 * 'half-adds'. Under ones-complement addition, each half-add carries to the 1910 * other, so the sum of each half-add is unaffected by their relative 1911 * order. Therefore: 1912 * 1913 * ~m +_1 ~n 1914 * = { half-adds invariant under transposition } 1915 * ~s 1916 * = { substitute } 1917 * ~(m +_1 n) [end of proof] 1918 * 1919 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 1920 * with the converse endian does not alter the result. 1921 * 1922 * proof. 1923 * { converse machine endian: load/store transposes, P := 8 } 1924 * ~(~m +_1 ~n) 1925 * = { ~ over +_1 } 1926 * ~~m +_1 ~~n 1927 * = { ~ is an involution } 1928 * m +_1 n [end of proof] 1929 * 1930 */ 1931 #define NEG(x) ((u_int16_t)~(x)) 1932 void 1933 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 1934 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 1935 { 1936 u_int32_t x; 1937 const u_int16_t *n = an->addr16; 1938 const u_int16_t *o = a->addr16; 1939 const int udp = proto == IPPROTO_UDP; 1940 1941 switch (af) { 1942 case AF_INET: 1943 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 1944 break; 1945 #ifdef INET6 1946 case AF_INET6: 1947 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 1948 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 1949 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 1950 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 1951 break; 1952 #endif /* INET6 */ 1953 default: 1954 unhandled_af(af); 1955 } 1956 1957 x = (x & 0xffff) + (x >> 16); 1958 x = (x & 0xffff) + (x >> 16); 1959 1960 /* optimise: eliminate a branch when not udp */ 1961 if (udp && *cksum == 0x0000) 1962 return; 1963 if (udp && x == 0x0000) 1964 x = 0xffff; 1965 1966 *cksum = (u_int16_t)(x); 1967 } 1968 1969 int 1970 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 1971 { 1972 int rewrite = 0; 1973 1974 if (*f != v) { 1975 u_int16_t old = htons(hi ? (*f << 8) : *f); 1976 u_int16_t new = htons(hi ? ( v << 8) : v); 1977 1978 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 1979 *f = v; 1980 rewrite = 1; 1981 } 1982 1983 return (rewrite); 1984 } 1985 1986 /* pre: *f is 16-bit aligned within its packet */ 1987 int 1988 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 1989 { 1990 int rewrite = 0; 1991 1992 if (*f != v) { 1993 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 1994 *f = v; 1995 rewrite = 1; 1996 } 1997 1998 return (rewrite); 1999 } 2000 2001 int 2002 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 2003 { 2004 int rewrite = 0; 2005 u_int8_t *fb = (u_int8_t*)f; 2006 u_int8_t *vb = (u_int8_t*)&v; 2007 2008 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 2009 return (pf_patch_16(pd, f, v)); /* optimise */ 2010 } 2011 2012 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2013 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2014 2015 return (rewrite); 2016 } 2017 2018 /* pre: *f is 16-bit aligned within its packet */ 2019 /* pre: pd->proto != IPPROTO_UDP */ 2020 int 2021 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2022 { 2023 int rewrite = 0; 2024 u_int16_t *pc = pd->pcksum; 2025 u_int8_t proto = pd->proto; 2026 2027 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2028 if (proto == IPPROTO_UDP) 2029 panic("%s: udp", __func__); 2030 2031 /* optimise: skip *f != v guard; true for all use-cases */ 2032 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2033 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2034 2035 *f = v; 2036 rewrite = 1; 2037 2038 return (rewrite); 2039 } 2040 2041 int 2042 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2043 { 2044 int rewrite = 0; 2045 u_int8_t *fb = (u_int8_t*)f; 2046 u_int8_t *vb = (u_int8_t*)&v; 2047 2048 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2049 return (pf_patch_32(pd, f, v)); /* optimise */ 2050 } 2051 2052 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2053 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2054 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2055 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2056 2057 return (rewrite); 2058 } 2059 2060 int 2061 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2062 u_int16_t *virtual_id, u_int16_t *virtual_type) 2063 { 2064 /* 2065 * ICMP types marked with PF_OUT are typically responses to 2066 * PF_IN, and will match states in the opposite direction. 2067 * PF_IN ICMP types need to match a state with that type. 2068 */ 2069 *icmp_dir = PF_OUT; 2070 2071 /* Queries (and responses) */ 2072 switch (pd->af) { 2073 case AF_INET: 2074 switch (type) { 2075 case ICMP_ECHO: 2076 *icmp_dir = PF_IN; 2077 /* FALLTHROUGH */ 2078 case ICMP_ECHOREPLY: 2079 *virtual_type = ICMP_ECHO; 2080 *virtual_id = pd->hdr.icmp.icmp_id; 2081 break; 2082 2083 case ICMP_TSTAMP: 2084 *icmp_dir = PF_IN; 2085 /* FALLTHROUGH */ 2086 case ICMP_TSTAMPREPLY: 2087 *virtual_type = ICMP_TSTAMP; 2088 *virtual_id = pd->hdr.icmp.icmp_id; 2089 break; 2090 2091 case ICMP_IREQ: 2092 *icmp_dir = PF_IN; 2093 /* FALLTHROUGH */ 2094 case ICMP_IREQREPLY: 2095 *virtual_type = ICMP_IREQ; 2096 *virtual_id = pd->hdr.icmp.icmp_id; 2097 break; 2098 2099 case ICMP_MASKREQ: 2100 *icmp_dir = PF_IN; 2101 /* FALLTHROUGH */ 2102 case ICMP_MASKREPLY: 2103 *virtual_type = ICMP_MASKREQ; 2104 *virtual_id = pd->hdr.icmp.icmp_id; 2105 break; 2106 2107 case ICMP_IPV6_WHEREAREYOU: 2108 *icmp_dir = PF_IN; 2109 /* FALLTHROUGH */ 2110 case ICMP_IPV6_IAMHERE: 2111 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2112 *virtual_id = 0; /* Nothing sane to match on! */ 2113 break; 2114 2115 case ICMP_MOBILE_REGREQUEST: 2116 *icmp_dir = PF_IN; 2117 /* FALLTHROUGH */ 2118 case ICMP_MOBILE_REGREPLY: 2119 *virtual_type = ICMP_MOBILE_REGREQUEST; 2120 *virtual_id = 0; /* Nothing sane to match on! */ 2121 break; 2122 2123 case ICMP_ROUTERSOLICIT: 2124 *icmp_dir = PF_IN; 2125 /* FALLTHROUGH */ 2126 case ICMP_ROUTERADVERT: 2127 *virtual_type = ICMP_ROUTERSOLICIT; 2128 *virtual_id = 0; /* Nothing sane to match on! */ 2129 break; 2130 2131 /* These ICMP types map to other connections */ 2132 case ICMP_UNREACH: 2133 case ICMP_SOURCEQUENCH: 2134 case ICMP_REDIRECT: 2135 case ICMP_TIMXCEED: 2136 case ICMP_PARAMPROB: 2137 /* These will not be used, but set them anyway */ 2138 *icmp_dir = PF_IN; 2139 *virtual_type = htons(type); 2140 *virtual_id = 0; 2141 return (1); /* These types match to another state */ 2142 2143 /* 2144 * All remaining ICMP types get their own states, 2145 * and will only match in one direction. 2146 */ 2147 default: 2148 *icmp_dir = PF_IN; 2149 *virtual_type = type; 2150 *virtual_id = 0; 2151 break; 2152 } 2153 break; 2154 #ifdef INET6 2155 case AF_INET6: 2156 switch (type) { 2157 case ICMP6_ECHO_REQUEST: 2158 *icmp_dir = PF_IN; 2159 /* FALLTHROUGH */ 2160 case ICMP6_ECHO_REPLY: 2161 *virtual_type = ICMP6_ECHO_REQUEST; 2162 *virtual_id = pd->hdr.icmp6.icmp6_id; 2163 break; 2164 2165 case MLD_LISTENER_QUERY: 2166 case MLD_LISTENER_REPORT: { 2167 struct mld_hdr *mld = &pd->hdr.mld; 2168 u_int32_t h; 2169 2170 /* 2171 * Listener Report can be sent by clients 2172 * without an associated Listener Query. 2173 * In addition to that, when Report is sent as a 2174 * reply to a Query its source and destination 2175 * address are different. 2176 */ 2177 *icmp_dir = PF_IN; 2178 *virtual_type = MLD_LISTENER_QUERY; 2179 /* generate fake id for these messages */ 2180 h = mld->mld_addr.s6_addr32[0] ^ 2181 mld->mld_addr.s6_addr32[1] ^ 2182 mld->mld_addr.s6_addr32[2] ^ 2183 mld->mld_addr.s6_addr32[3]; 2184 *virtual_id = (h >> 16) ^ (h & 0xffff); 2185 break; 2186 } 2187 2188 /* 2189 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2190 * ICMP6_WRU 2191 */ 2192 case ICMP6_WRUREQUEST: 2193 *icmp_dir = PF_IN; 2194 /* FALLTHROUGH */ 2195 case ICMP6_WRUREPLY: 2196 *virtual_type = ICMP6_WRUREQUEST; 2197 *virtual_id = 0; /* Nothing sane to match on! */ 2198 break; 2199 2200 case MLD_MTRACE: 2201 *icmp_dir = PF_IN; 2202 /* FALLTHROUGH */ 2203 case MLD_MTRACE_RESP: 2204 *virtual_type = MLD_MTRACE; 2205 *virtual_id = 0; /* Nothing sane to match on! */ 2206 break; 2207 2208 case ND_NEIGHBOR_SOLICIT: 2209 *icmp_dir = PF_IN; 2210 /* FALLTHROUGH */ 2211 case ND_NEIGHBOR_ADVERT: { 2212 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 2213 u_int32_t h; 2214 2215 *virtual_type = ND_NEIGHBOR_SOLICIT; 2216 /* generate fake id for these messages */ 2217 h = nd->nd_ns_target.s6_addr32[0] ^ 2218 nd->nd_ns_target.s6_addr32[1] ^ 2219 nd->nd_ns_target.s6_addr32[2] ^ 2220 nd->nd_ns_target.s6_addr32[3]; 2221 *virtual_id = (h >> 16) ^ (h & 0xffff); 2222 break; 2223 } 2224 2225 /* 2226 * These ICMP types map to other connections. 2227 * ND_REDIRECT can't be in this list because the triggering 2228 * packet header is optional. 2229 */ 2230 case ICMP6_DST_UNREACH: 2231 case ICMP6_PACKET_TOO_BIG: 2232 case ICMP6_TIME_EXCEEDED: 2233 case ICMP6_PARAM_PROB: 2234 /* These will not be used, but set them anyway */ 2235 *icmp_dir = PF_IN; 2236 *virtual_type = htons(type); 2237 *virtual_id = 0; 2238 return (1); /* These types match to another state */ 2239 /* 2240 * All remaining ICMP6 types get their own states, 2241 * and will only match in one direction. 2242 */ 2243 default: 2244 *icmp_dir = PF_IN; 2245 *virtual_type = type; 2246 *virtual_id = 0; 2247 break; 2248 } 2249 break; 2250 #endif /* INET6 */ 2251 } 2252 *virtual_type = htons(*virtual_type); 2253 return (0); /* These types match to their own state */ 2254 } 2255 2256 void 2257 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2258 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2259 { 2260 /* note: doesn't trouble to fixup quoted checksums, if any */ 2261 2262 /* change quoted protocol port */ 2263 if (qp != NULL) 2264 pf_patch_16(pd, qp, np); 2265 2266 /* change quoted ip address */ 2267 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2268 pf_addrcpy(qa, na, pd->af); 2269 2270 /* change network-header's ip address */ 2271 if (oa) 2272 pf_translate_a(pd, oa, na); 2273 } 2274 2275 /* pre: *a is 16-bit aligned within its packet */ 2276 /* *a is a network header src/dst address */ 2277 int 2278 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2279 { 2280 int rewrite = 0; 2281 2282 /* warning: !PF_ANEQ != PF_AEQ */ 2283 if (!PF_ANEQ(a, an, pd->af)) 2284 return (0); 2285 2286 /* fixup transport pseudo-header, if any */ 2287 switch (pd->proto) { 2288 case IPPROTO_TCP: /* FALLTHROUGH */ 2289 case IPPROTO_UDP: /* FALLTHROUGH */ 2290 case IPPROTO_ICMPV6: 2291 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2292 break; 2293 default: 2294 break; /* assume no pseudo-header */ 2295 } 2296 2297 pf_addrcpy(a, an, pd->af); 2298 rewrite = 1; 2299 2300 return (rewrite); 2301 } 2302 2303 #if INET6 2304 /* pf_translate_af() may change pd->m, adjust local copies after calling */ 2305 int 2306 pf_translate_af(struct pf_pdesc *pd) 2307 { 2308 static const struct pf_addr zero; 2309 struct ip *ip4; 2310 struct ip6_hdr *ip6; 2311 int copyback = 0; 2312 u_int hlen, ohlen, dlen; 2313 u_int16_t *pc; 2314 u_int8_t af_proto, naf_proto; 2315 2316 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2317 ohlen = pd->off; 2318 dlen = pd->tot_len - pd->off; 2319 pc = pd->pcksum; 2320 2321 af_proto = naf_proto = pd->proto; 2322 if (naf_proto == IPPROTO_ICMP) 2323 af_proto = IPPROTO_ICMPV6; 2324 if (naf_proto == IPPROTO_ICMPV6) 2325 af_proto = IPPROTO_ICMP; 2326 2327 /* uncover stale pseudo-header */ 2328 switch (af_proto) { 2329 case IPPROTO_ICMPV6: 2330 /* optimise: unchanged for TCP/UDP */ 2331 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2332 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2333 /* FALLTHROUGH */ 2334 case IPPROTO_UDP: /* FALLTHROUGH */ 2335 case IPPROTO_TCP: 2336 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2337 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2338 copyback = 1; 2339 break; 2340 default: 2341 break; /* assume no pseudo-header */ 2342 } 2343 2344 /* replace the network header */ 2345 m_adj(pd->m, pd->off); 2346 pd->src = NULL; 2347 pd->dst = NULL; 2348 2349 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2350 pd->m = NULL; 2351 return (-1); 2352 } 2353 2354 pd->off = hlen; 2355 pd->tot_len += hlen - ohlen; 2356 2357 switch (pd->naf) { 2358 case AF_INET: 2359 ip4 = mtod(pd->m, struct ip *); 2360 memset(ip4, 0, hlen); 2361 ip4->ip_v = IPVERSION; 2362 ip4->ip_hl = hlen >> 2; 2363 ip4->ip_tos = pd->tos; 2364 ip4->ip_len = htons(hlen + dlen); 2365 ip4->ip_id = htons(ip_randomid()); 2366 ip4->ip_off = htons(IP_DF); 2367 ip4->ip_ttl = pd->ttl; 2368 ip4->ip_p = pd->proto; 2369 ip4->ip_src = pd->nsaddr.v4; 2370 ip4->ip_dst = pd->ndaddr.v4; 2371 break; 2372 case AF_INET6: 2373 ip6 = mtod(pd->m, struct ip6_hdr *); 2374 memset(ip6, 0, hlen); 2375 ip6->ip6_vfc = IPV6_VERSION; 2376 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2377 ip6->ip6_plen = htons(dlen); 2378 ip6->ip6_nxt = pd->proto; 2379 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2380 ip6->ip6_hlim = IPV6_DEFHLIM; 2381 else 2382 ip6->ip6_hlim = pd->ttl; 2383 ip6->ip6_src = pd->nsaddr.v6; 2384 ip6->ip6_dst = pd->ndaddr.v6; 2385 break; 2386 default: 2387 unhandled_af(pd->naf); 2388 } 2389 2390 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2391 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2392 pd->naf == AF_INET6) { 2393 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2394 } 2395 2396 /* cover fresh pseudo-header */ 2397 switch (naf_proto) { 2398 case IPPROTO_ICMPV6: 2399 /* optimise: unchanged for TCP/UDP */ 2400 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2401 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2402 /* FALLTHROUGH */ 2403 case IPPROTO_UDP: /* FALLTHROUGH */ 2404 case IPPROTO_TCP: 2405 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2406 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2407 copyback = 1; 2408 break; 2409 default: 2410 break; /* assume no pseudo-header */ 2411 } 2412 2413 /* flush pd->pcksum */ 2414 if (copyback) 2415 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 2416 2417 return (0); 2418 } 2419 2420 int 2421 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2422 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2423 sa_family_t af, sa_family_t naf) 2424 { 2425 struct mbuf *n = NULL; 2426 struct ip *ip4; 2427 struct ip6_hdr *ip6; 2428 u_int hlen, ohlen, dlen; 2429 int d; 2430 2431 if (af == naf || (af != AF_INET && af != AF_INET6) || 2432 (naf != AF_INET && naf != AF_INET6)) 2433 return (-1); 2434 2435 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2436 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2437 return (-1); 2438 2439 /* new quoted header */ 2440 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2441 /* old quoted header */ 2442 ohlen = pd2->off - ipoff2; 2443 2444 /* trim old quoted header */ 2445 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2446 m_adj(n, ohlen); 2447 2448 /* prepend a new, translated, quoted header */ 2449 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2450 return (-1); 2451 2452 switch (naf) { 2453 case AF_INET: 2454 ip4 = mtod(n, struct ip *); 2455 memset(ip4, 0, sizeof(*ip4)); 2456 ip4->ip_v = IPVERSION; 2457 ip4->ip_hl = sizeof(*ip4) >> 2; 2458 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2459 ip4->ip_id = htons(ip_randomid()); 2460 ip4->ip_off = htons(IP_DF); 2461 ip4->ip_ttl = pd2->ttl; 2462 if (pd2->proto == IPPROTO_ICMPV6) 2463 ip4->ip_p = IPPROTO_ICMP; 2464 else 2465 ip4->ip_p = pd2->proto; 2466 ip4->ip_src = src->v4; 2467 ip4->ip_dst = dst->v4; 2468 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2469 break; 2470 case AF_INET6: 2471 ip6 = mtod(n, struct ip6_hdr *); 2472 memset(ip6, 0, sizeof(*ip6)); 2473 ip6->ip6_vfc = IPV6_VERSION; 2474 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2475 if (pd2->proto == IPPROTO_ICMP) 2476 ip6->ip6_nxt = IPPROTO_ICMPV6; 2477 else 2478 ip6->ip6_nxt = pd2->proto; 2479 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2480 ip6->ip6_hlim = IPV6_DEFHLIM; 2481 else 2482 ip6->ip6_hlim = pd2->ttl; 2483 ip6->ip6_src = src->v6; 2484 ip6->ip6_dst = dst->v6; 2485 break; 2486 } 2487 2488 /* cover new quoted header */ 2489 /* optimise: any new AF_INET header of ours sums to zero */ 2490 if (naf != AF_INET) { 2491 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2492 } 2493 2494 /* reattach modified quoted packet to outer header */ 2495 { 2496 int nlen = n->m_pkthdr.len; 2497 m_cat(m, n); 2498 m->m_pkthdr.len += nlen; 2499 } 2500 2501 /* account for altered length */ 2502 d = hlen - ohlen; 2503 2504 if (pd->proto == IPPROTO_ICMPV6) { 2505 /* fixup pseudo-header */ 2506 dlen = pd->tot_len - pd->off; 2507 pf_cksum_fixup(pd->pcksum, 2508 htons(dlen), htons(dlen + d), pd->proto); 2509 } 2510 2511 pd->tot_len += d; 2512 pd2->tot_len += d; 2513 pd2->off += d; 2514 2515 /* note: not bothering to update network headers as 2516 these due for rewrite by pf_translate_af() */ 2517 2518 return (0); 2519 } 2520 2521 2522 #define PTR_IP(field) (offsetof(struct ip, field)) 2523 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2524 2525 int 2526 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 2527 { 2528 struct icmp *icmp4; 2529 struct icmp6_hdr *icmp6; 2530 u_int32_t mtu; 2531 int32_t ptr = -1; 2532 u_int8_t type; 2533 u_int8_t code; 2534 2535 switch (af) { 2536 case AF_INET: 2537 icmp6 = arg; 2538 type = icmp6->icmp6_type; 2539 code = icmp6->icmp6_code; 2540 mtu = ntohl(icmp6->icmp6_mtu); 2541 2542 switch (type) { 2543 case ICMP6_ECHO_REQUEST: 2544 type = ICMP_ECHO; 2545 break; 2546 case ICMP6_ECHO_REPLY: 2547 type = ICMP_ECHOREPLY; 2548 break; 2549 case ICMP6_DST_UNREACH: 2550 type = ICMP_UNREACH; 2551 switch (code) { 2552 case ICMP6_DST_UNREACH_NOROUTE: 2553 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2554 case ICMP6_DST_UNREACH_ADDR: 2555 code = ICMP_UNREACH_HOST; 2556 break; 2557 case ICMP6_DST_UNREACH_ADMIN: 2558 code = ICMP_UNREACH_HOST_PROHIB; 2559 break; 2560 case ICMP6_DST_UNREACH_NOPORT: 2561 code = ICMP_UNREACH_PORT; 2562 break; 2563 default: 2564 return (-1); 2565 } 2566 break; 2567 case ICMP6_PACKET_TOO_BIG: 2568 type = ICMP_UNREACH; 2569 code = ICMP_UNREACH_NEEDFRAG; 2570 mtu -= 20; 2571 break; 2572 case ICMP6_TIME_EXCEEDED: 2573 type = ICMP_TIMXCEED; 2574 break; 2575 case ICMP6_PARAM_PROB: 2576 switch (code) { 2577 case ICMP6_PARAMPROB_HEADER: 2578 type = ICMP_PARAMPROB; 2579 code = ICMP_PARAMPROB_ERRATPTR; 2580 ptr = ntohl(icmp6->icmp6_pptr); 2581 2582 if (ptr == PTR_IP6(ip6_vfc)) 2583 ; /* preserve */ 2584 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2585 ptr = PTR_IP(ip_tos); 2586 else if (ptr == PTR_IP6(ip6_plen) || 2587 ptr == PTR_IP6(ip6_plen) + 1) 2588 ptr = PTR_IP(ip_len); 2589 else if (ptr == PTR_IP6(ip6_nxt)) 2590 ptr = PTR_IP(ip_p); 2591 else if (ptr == PTR_IP6(ip6_hlim)) 2592 ptr = PTR_IP(ip_ttl); 2593 else if (ptr >= PTR_IP6(ip6_src) && 2594 ptr < PTR_IP6(ip6_dst)) 2595 ptr = PTR_IP(ip_src); 2596 else if (ptr >= PTR_IP6(ip6_dst) && 2597 ptr < sizeof(struct ip6_hdr)) 2598 ptr = PTR_IP(ip_dst); 2599 else { 2600 return (-1); 2601 } 2602 break; 2603 case ICMP6_PARAMPROB_NEXTHEADER: 2604 type = ICMP_UNREACH; 2605 code = ICMP_UNREACH_PROTOCOL; 2606 break; 2607 default: 2608 return (-1); 2609 } 2610 break; 2611 default: 2612 return (-1); 2613 } 2614 2615 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 2616 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 2617 2618 /* aligns well with a icmpv4 nextmtu */ 2619 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 2620 2621 /* icmpv4 pptr is a one most significant byte */ 2622 if (ptr >= 0) 2623 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 2624 break; 2625 case AF_INET6: 2626 icmp4 = arg; 2627 type = icmp4->icmp_type; 2628 code = icmp4->icmp_code; 2629 mtu = ntohs(icmp4->icmp_nextmtu); 2630 2631 switch (type) { 2632 case ICMP_ECHO: 2633 type = ICMP6_ECHO_REQUEST; 2634 break; 2635 case ICMP_ECHOREPLY: 2636 type = ICMP6_ECHO_REPLY; 2637 break; 2638 case ICMP_UNREACH: 2639 type = ICMP6_DST_UNREACH; 2640 switch (code) { 2641 case ICMP_UNREACH_NET: 2642 case ICMP_UNREACH_HOST: 2643 case ICMP_UNREACH_NET_UNKNOWN: 2644 case ICMP_UNREACH_HOST_UNKNOWN: 2645 case ICMP_UNREACH_ISOLATED: 2646 case ICMP_UNREACH_TOSNET: 2647 case ICMP_UNREACH_TOSHOST: 2648 code = ICMP6_DST_UNREACH_NOROUTE; 2649 break; 2650 case ICMP_UNREACH_PORT: 2651 code = ICMP6_DST_UNREACH_NOPORT; 2652 break; 2653 case ICMP_UNREACH_NET_PROHIB: 2654 case ICMP_UNREACH_HOST_PROHIB: 2655 case ICMP_UNREACH_FILTER_PROHIB: 2656 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2657 code = ICMP6_DST_UNREACH_ADMIN; 2658 break; 2659 case ICMP_UNREACH_PROTOCOL: 2660 type = ICMP6_PARAM_PROB; 2661 code = ICMP6_PARAMPROB_NEXTHEADER; 2662 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2663 break; 2664 case ICMP_UNREACH_NEEDFRAG: 2665 type = ICMP6_PACKET_TOO_BIG; 2666 code = 0; 2667 mtu += 20; 2668 break; 2669 default: 2670 return (-1); 2671 } 2672 break; 2673 case ICMP_TIMXCEED: 2674 type = ICMP6_TIME_EXCEEDED; 2675 break; 2676 case ICMP_PARAMPROB: 2677 type = ICMP6_PARAM_PROB; 2678 switch (code) { 2679 case ICMP_PARAMPROB_ERRATPTR: 2680 code = ICMP6_PARAMPROB_HEADER; 2681 break; 2682 case ICMP_PARAMPROB_LENGTH: 2683 code = ICMP6_PARAMPROB_HEADER; 2684 break; 2685 default: 2686 return (-1); 2687 } 2688 2689 ptr = icmp4->icmp_pptr; 2690 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2691 ; /* preserve */ 2692 else if (ptr == PTR_IP(ip_len) || 2693 ptr == PTR_IP(ip_len) + 1) 2694 ptr = PTR_IP6(ip6_plen); 2695 else if (ptr == PTR_IP(ip_ttl)) 2696 ptr = PTR_IP6(ip6_hlim); 2697 else if (ptr == PTR_IP(ip_p)) 2698 ptr = PTR_IP6(ip6_nxt); 2699 else if (ptr >= PTR_IP(ip_src) && 2700 ptr < PTR_IP(ip_dst)) 2701 ptr = PTR_IP6(ip6_src); 2702 else if (ptr >= PTR_IP(ip_dst) && 2703 ptr < sizeof(struct ip)) 2704 ptr = PTR_IP6(ip6_dst); 2705 else { 2706 return (-1); 2707 } 2708 break; 2709 default: 2710 return (-1); 2711 } 2712 2713 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 2714 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 2715 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 2716 if (ptr >= 0) 2717 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 2718 break; 2719 } 2720 2721 return (0); 2722 } 2723 #endif /* INET6 */ 2724 2725 /* 2726 * Need to modulate the sequence numbers in the TCP SACK option 2727 * (credits to Krzysztof Pfaff for report and patch) 2728 */ 2729 int 2730 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2731 { 2732 struct sackblk sack; 2733 int copyback = 0, i; 2734 int olen, optsoff; 2735 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 2736 2737 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 2738 optsoff = pd->off + sizeof(struct tcphdr); 2739 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 2740 if (olen < TCPOLEN_MINSACK || 2741 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) 2742 return (0); 2743 2744 eoh = opts + olen; 2745 opt = opts; 2746 while ((opt = pf_find_tcpopt(opt, opts, olen, 2747 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 2748 { 2749 size_t safelen = MIN(opt[1], (eoh - opt)); 2750 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 2751 size_t startoff = (opt + i) - opts; 2752 memcpy(&sack, &opt[i], sizeof(sack)); 2753 pf_patch_32_unaligned(pd, &sack.start, 2754 htonl(ntohl(sack.start) - dst->seqdiff), 2755 PF_ALGNMNT(startoff)); 2756 pf_patch_32_unaligned(pd, &sack.end, 2757 htonl(ntohl(sack.end) - dst->seqdiff), 2758 PF_ALGNMNT(startoff + sizeof(sack.start))); 2759 memcpy(&opt[i], &sack, sizeof(sack)); 2760 } 2761 copyback = 1; 2762 opt += opt[1]; 2763 } 2764 2765 if (copyback) 2766 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 2767 return (copyback); 2768 } 2769 2770 struct mbuf * 2771 pf_build_tcp(const struct pf_rule *r, sa_family_t af, 2772 const struct pf_addr *saddr, const struct pf_addr *daddr, 2773 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2774 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2775 u_int16_t rtag, u_int sack, u_int rdom) 2776 { 2777 struct mbuf *m; 2778 int len, tlen; 2779 struct ip *h; 2780 #ifdef INET6 2781 struct ip6_hdr *h6; 2782 #endif /* INET6 */ 2783 struct tcphdr *th; 2784 char *opt; 2785 2786 /* maximum segment size tcp option */ 2787 tlen = sizeof(struct tcphdr); 2788 if (mss) 2789 tlen += 4; 2790 if (sack) 2791 tlen += 2; 2792 2793 switch (af) { 2794 case AF_INET: 2795 len = sizeof(struct ip) + tlen; 2796 break; 2797 #ifdef INET6 2798 case AF_INET6: 2799 len = sizeof(struct ip6_hdr) + tlen; 2800 break; 2801 #endif /* INET6 */ 2802 default: 2803 unhandled_af(af); 2804 } 2805 2806 /* create outgoing mbuf */ 2807 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2808 if (m == NULL) 2809 return (NULL); 2810 if (tag) 2811 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2812 m->m_pkthdr.pf.tag = rtag; 2813 m->m_pkthdr.ph_rtableid = rdom; 2814 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2815 m->m_pkthdr.pf.prio = r->set_prio[0]; 2816 if (r && r->qid) 2817 m->m_pkthdr.pf.qid = r->qid; 2818 m->m_data += max_linkhdr; 2819 m->m_pkthdr.len = m->m_len = len; 2820 m->m_pkthdr.ph_ifidx = 0; 2821 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 2822 memset(m->m_data, 0, len); 2823 switch (af) { 2824 case AF_INET: 2825 h = mtod(m, struct ip *); 2826 h->ip_p = IPPROTO_TCP; 2827 h->ip_len = htons(tlen); 2828 h->ip_v = 4; 2829 h->ip_hl = sizeof(*h) >> 2; 2830 h->ip_tos = IPTOS_LOWDELAY; 2831 h->ip_len = htons(len); 2832 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2833 h->ip_ttl = ttl ? ttl : ip_defttl; 2834 h->ip_sum = 0; 2835 h->ip_src.s_addr = saddr->v4.s_addr; 2836 h->ip_dst.s_addr = daddr->v4.s_addr; 2837 2838 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2839 break; 2840 #ifdef INET6 2841 case AF_INET6: 2842 h6 = mtod(m, struct ip6_hdr *); 2843 h6->ip6_nxt = IPPROTO_TCP; 2844 h6->ip6_plen = htons(tlen); 2845 h6->ip6_vfc |= IPV6_VERSION; 2846 h6->ip6_hlim = IPV6_DEFHLIM; 2847 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2848 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2849 2850 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2851 break; 2852 #endif /* INET6 */ 2853 default: 2854 unhandled_af(af); 2855 } 2856 2857 /* TCP header */ 2858 th->th_sport = sport; 2859 th->th_dport = dport; 2860 th->th_seq = htonl(seq); 2861 th->th_ack = htonl(ack); 2862 th->th_off = tlen >> 2; 2863 th->th_flags = flags; 2864 th->th_win = htons(win); 2865 2866 opt = (char *)(th + 1); 2867 if (mss) { 2868 opt[0] = TCPOPT_MAXSEG; 2869 opt[1] = 4; 2870 mss = htons(mss); 2871 memcpy((opt + 2), &mss, 2); 2872 opt += 4; 2873 } 2874 if (sack) { 2875 opt[0] = TCPOPT_SACK_PERMITTED; 2876 opt[1] = 2; 2877 opt += 2; 2878 } 2879 2880 return (m); 2881 } 2882 2883 void 2884 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2885 const struct pf_addr *saddr, const struct pf_addr *daddr, 2886 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2887 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2888 u_int16_t rtag, u_int rdom) 2889 { 2890 struct mbuf *m; 2891 2892 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 2893 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL) 2894 return; 2895 2896 switch (af) { 2897 case AF_INET: 2898 ip_send(m); 2899 break; 2900 #ifdef INET6 2901 case AF_INET6: 2902 ip6_send(m); 2903 break; 2904 #endif /* INET6 */ 2905 } 2906 } 2907 2908 static void 2909 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *s, 2910 struct pf_state_peer *src, struct pf_state_peer *dst) 2911 { 2912 /* 2913 * We are sending challenge ACK as a response to SYN packet, which 2914 * matches existing state (modulo TCP window check). Therefore packet 2915 * must be sent on behalf of destination. 2916 * 2917 * We expect sender to remain either silent, or send RST packet 2918 * so both, firewall and remote peer, can purge dead state from 2919 * memory. 2920 */ 2921 pf_send_tcp(s->rule.ptr, pd->af, pd->dst, pd->src, 2922 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 2923 src->seqlo, TH_ACK, 0, 0, s->rule.ptr->return_ttl, 1, 0, 2924 pd->rdomain); 2925 } 2926 2927 void 2928 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 2929 sa_family_t af, struct pf_rule *r, u_int rdomain) 2930 { 2931 struct mbuf *m0; 2932 2933 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 2934 return; 2935 2936 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2937 m0->m_pkthdr.ph_rtableid = rdomain; 2938 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2939 m0->m_pkthdr.pf.prio = r->set_prio[0]; 2940 if (r && r->qid) 2941 m0->m_pkthdr.pf.qid = r->qid; 2942 2943 switch (af) { 2944 case AF_INET: 2945 icmp_error(m0, type, code, 0, param); 2946 break; 2947 #ifdef INET6 2948 case AF_INET6: 2949 icmp6_error(m0, type, code, param); 2950 break; 2951 #endif /* INET6 */ 2952 } 2953 } 2954 2955 /* 2956 * Return ((n = 0) == (a = b [with mask m])) 2957 * Note: n != 0 => returns (a != b [with mask m]) 2958 */ 2959 int 2960 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2961 struct pf_addr *b, sa_family_t af) 2962 { 2963 switch (af) { 2964 case AF_INET: 2965 if ((a->addr32[0] & m->addr32[0]) == 2966 (b->addr32[0] & m->addr32[0])) 2967 return (n == 0); 2968 break; 2969 #ifdef INET6 2970 case AF_INET6: 2971 if (((a->addr32[0] & m->addr32[0]) == 2972 (b->addr32[0] & m->addr32[0])) && 2973 ((a->addr32[1] & m->addr32[1]) == 2974 (b->addr32[1] & m->addr32[1])) && 2975 ((a->addr32[2] & m->addr32[2]) == 2976 (b->addr32[2] & m->addr32[2])) && 2977 ((a->addr32[3] & m->addr32[3]) == 2978 (b->addr32[3] & m->addr32[3]))) 2979 return (n == 0); 2980 break; 2981 #endif /* INET6 */ 2982 } 2983 2984 return (n != 0); 2985 } 2986 2987 /* 2988 * Return 1 if b <= a <= e, otherwise return 0. 2989 */ 2990 int 2991 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2992 struct pf_addr *a, sa_family_t af) 2993 { 2994 switch (af) { 2995 case AF_INET: 2996 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 2997 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 2998 return (0); 2999 break; 3000 #ifdef INET6 3001 case AF_INET6: { 3002 int i; 3003 3004 /* check a >= b */ 3005 for (i = 0; i < 4; ++i) 3006 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 3007 break; 3008 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 3009 return (0); 3010 /* check a <= e */ 3011 for (i = 0; i < 4; ++i) 3012 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 3013 break; 3014 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3015 return (0); 3016 break; 3017 } 3018 #endif /* INET6 */ 3019 } 3020 return (1); 3021 } 3022 3023 int 3024 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3025 { 3026 switch (op) { 3027 case PF_OP_IRG: 3028 return ((p > a1) && (p < a2)); 3029 case PF_OP_XRG: 3030 return ((p < a1) || (p > a2)); 3031 case PF_OP_RRG: 3032 return ((p >= a1) && (p <= a2)); 3033 case PF_OP_EQ: 3034 return (p == a1); 3035 case PF_OP_NE: 3036 return (p != a1); 3037 case PF_OP_LT: 3038 return (p < a1); 3039 case PF_OP_LE: 3040 return (p <= a1); 3041 case PF_OP_GT: 3042 return (p > a1); 3043 case PF_OP_GE: 3044 return (p >= a1); 3045 } 3046 return (0); /* never reached */ 3047 } 3048 3049 int 3050 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3051 { 3052 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3053 } 3054 3055 int 3056 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3057 { 3058 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3059 return (0); 3060 return (pf_match(op, a1, a2, u)); 3061 } 3062 3063 int 3064 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3065 { 3066 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3067 return (0); 3068 return (pf_match(op, a1, a2, g)); 3069 } 3070 3071 int 3072 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3073 { 3074 if (*tag == -1) 3075 *tag = m->m_pkthdr.pf.tag; 3076 3077 return ((!r->match_tag_not && r->match_tag == *tag) || 3078 (r->match_tag_not && r->match_tag != *tag)); 3079 } 3080 3081 int 3082 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3083 { 3084 struct ifnet *ifp, *ifp0; 3085 struct pfi_kif *kif; 3086 3087 ifp = if_get(m->m_pkthdr.ph_ifidx); 3088 if (ifp == NULL) 3089 return (0); 3090 3091 #if NCARP > 0 3092 if (ifp->if_type == IFT_CARP && 3093 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 3094 kif = (struct pfi_kif *)ifp0->if_pf_kif; 3095 if_put(ifp0); 3096 } else 3097 #endif /* NCARP */ 3098 kif = (struct pfi_kif *)ifp->if_pf_kif; 3099 3100 if_put(ifp); 3101 3102 if (kif == NULL) { 3103 DPFPRINTF(LOG_ERR, 3104 "%s: kif == NULL, @%d via %s", __func__, 3105 r->nr, r->rcv_ifname); 3106 return (0); 3107 } 3108 3109 return (pfi_kif_match(r->rcv_kif, kif)); 3110 } 3111 3112 void 3113 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3114 { 3115 if (tag > 0) 3116 m->m_pkthdr.pf.tag = tag; 3117 if (rtableid >= 0) 3118 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3119 } 3120 3121 enum pf_test_status 3122 pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) 3123 { 3124 int rv; 3125 3126 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 3127 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 3128 return (PF_TEST_FAIL); 3129 } 3130 3131 ctx->depth++; 3132 3133 if (r->anchor_wildcard) { 3134 struct pf_anchor *child; 3135 rv = PF_TEST_OK; 3136 RB_FOREACH(child, pf_anchor_node, &r->anchor->children) { 3137 rv = pf_match_rule(ctx, &child->ruleset); 3138 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 3139 /* 3140 * we either hit a rule with quick action 3141 * (more likely), or hit some runtime 3142 * error (e.g. pool_get() failure). 3143 */ 3144 break; 3145 } 3146 } 3147 } else { 3148 rv = pf_match_rule(ctx, &r->anchor->ruleset); 3149 /* 3150 * Unless errors occured, stop iff any rule matched 3151 * within quick anchors. 3152 */ 3153 if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && 3154 *ctx->am == r) 3155 rv = PF_TEST_QUICK; 3156 } 3157 3158 ctx->depth--; 3159 3160 return (rv); 3161 } 3162 3163 void 3164 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3165 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3166 { 3167 switch (af) { 3168 case AF_INET: 3169 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3170 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3171 break; 3172 #ifdef INET6 3173 case AF_INET6: 3174 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3175 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3176 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3177 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3178 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3179 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3180 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3181 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3182 break; 3183 #endif /* INET6 */ 3184 default: 3185 unhandled_af(af); 3186 } 3187 } 3188 3189 void 3190 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3191 { 3192 switch (af) { 3193 case AF_INET: 3194 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3195 break; 3196 #ifdef INET6 3197 case AF_INET6: 3198 if (addr->addr32[3] == 0xffffffff) { 3199 addr->addr32[3] = 0; 3200 if (addr->addr32[2] == 0xffffffff) { 3201 addr->addr32[2] = 0; 3202 if (addr->addr32[1] == 0xffffffff) { 3203 addr->addr32[1] = 0; 3204 addr->addr32[0] = 3205 htonl(ntohl(addr->addr32[0]) + 1); 3206 } else 3207 addr->addr32[1] = 3208 htonl(ntohl(addr->addr32[1]) + 1); 3209 } else 3210 addr->addr32[2] = 3211 htonl(ntohl(addr->addr32[2]) + 1); 3212 } else 3213 addr->addr32[3] = 3214 htonl(ntohl(addr->addr32[3]) + 1); 3215 break; 3216 #endif /* INET6 */ 3217 default: 3218 unhandled_af(af); 3219 } 3220 } 3221 3222 int 3223 pf_socket_lookup(struct pf_pdesc *pd) 3224 { 3225 struct pf_addr *saddr, *daddr; 3226 u_int16_t sport, dport; 3227 struct inpcbtable *tb; 3228 struct inpcb *inp; 3229 3230 pd->lookup.uid = -1; 3231 pd->lookup.gid = -1; 3232 pd->lookup.pid = NO_PID; 3233 switch (pd->virtual_proto) { 3234 case IPPROTO_TCP: 3235 sport = pd->hdr.tcp.th_sport; 3236 dport = pd->hdr.tcp.th_dport; 3237 PF_ASSERT_LOCKED(); 3238 NET_ASSERT_LOCKED(); 3239 tb = &tcbtable; 3240 break; 3241 case IPPROTO_UDP: 3242 sport = pd->hdr.udp.uh_sport; 3243 dport = pd->hdr.udp.uh_dport; 3244 PF_ASSERT_LOCKED(); 3245 NET_ASSERT_LOCKED(); 3246 tb = &udbtable; 3247 break; 3248 default: 3249 return (-1); 3250 } 3251 if (pd->dir == PF_IN) { 3252 saddr = pd->src; 3253 daddr = pd->dst; 3254 } else { 3255 u_int16_t p; 3256 3257 p = sport; 3258 sport = dport; 3259 dport = p; 3260 saddr = pd->dst; 3261 daddr = pd->src; 3262 } 3263 switch (pd->af) { 3264 case AF_INET: 3265 /* 3266 * Fails when rtable is changed while evaluating the ruleset 3267 * The socket looked up will not match the one hit in the end. 3268 */ 3269 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 3270 pd->rdomain); 3271 if (inp == NULL) { 3272 inp = in_pcblookup_listen(tb, daddr->v4, dport, 3273 NULL, pd->rdomain); 3274 if (inp == NULL) 3275 return (-1); 3276 } 3277 break; 3278 #ifdef INET6 3279 case AF_INET6: 3280 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 3281 dport, pd->rdomain); 3282 if (inp == NULL) { 3283 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 3284 NULL, pd->rdomain); 3285 if (inp == NULL) 3286 return (-1); 3287 } 3288 break; 3289 #endif /* INET6 */ 3290 default: 3291 unhandled_af(pd->af); 3292 } 3293 pd->lookup.uid = inp->inp_socket->so_euid; 3294 pd->lookup.gid = inp->inp_socket->so_egid; 3295 pd->lookup.pid = inp->inp_socket->so_cpid; 3296 return (1); 3297 } 3298 3299 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 3300 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 3301 * 3302 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 3303 */ 3304 u_int8_t* 3305 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 3306 u_int8_t min_typelen) 3307 { 3308 u_int8_t *eoh = opts + hlen; 3309 3310 if (min_typelen < 2) 3311 return (NULL); 3312 3313 while ((eoh - opt) >= min_typelen) { 3314 switch (*opt) { 3315 case TCPOPT_EOL: 3316 /* FALLTHROUGH - Workaround the failure of some 3317 systems to NOP-pad their bzero'd option buffers, 3318 producing spurious EOLs */ 3319 case TCPOPT_NOP: 3320 opt++; 3321 continue; 3322 default: 3323 if (opt[0] == type && 3324 opt[1] >= min_typelen) 3325 return (opt); 3326 } 3327 3328 opt += MAX(opt[1], 2); /* evade infinite loops */ 3329 } 3330 3331 return (NULL); 3332 } 3333 3334 u_int8_t 3335 pf_get_wscale(struct pf_pdesc *pd) 3336 { 3337 int olen; 3338 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3339 u_int8_t wscale = 0; 3340 3341 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3342 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 3343 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3344 return (0); 3345 3346 opt = opts; 3347 while ((opt = pf_find_tcpopt(opt, opts, olen, 3348 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 3349 wscale = opt[2]; 3350 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 3351 wscale |= PF_WSCALE_FLAG; 3352 3353 opt += opt[1]; 3354 } 3355 3356 return (wscale); 3357 } 3358 3359 u_int16_t 3360 pf_get_mss(struct pf_pdesc *pd) 3361 { 3362 int olen; 3363 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3364 u_int16_t mss = tcp_mssdflt; 3365 3366 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3367 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 3368 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3369 return (0); 3370 3371 opt = opts; 3372 while ((opt = pf_find_tcpopt(opt, opts, olen, 3373 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 3374 memcpy(&mss, (opt + 2), 2); 3375 mss = ntohs(mss); 3376 3377 opt += opt[1]; 3378 } 3379 return (mss); 3380 } 3381 3382 u_int16_t 3383 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3384 { 3385 struct ifnet *ifp; 3386 struct sockaddr_in *dst; 3387 #ifdef INET6 3388 struct sockaddr_in6 *dst6; 3389 #endif /* INET6 */ 3390 struct rtentry *rt = NULL; 3391 struct sockaddr_storage ss; 3392 int hlen; 3393 u_int16_t mss = tcp_mssdflt; 3394 3395 memset(&ss, 0, sizeof(ss)); 3396 3397 switch (af) { 3398 case AF_INET: 3399 hlen = sizeof(struct ip); 3400 dst = (struct sockaddr_in *)&ss; 3401 dst->sin_family = AF_INET; 3402 dst->sin_len = sizeof(*dst); 3403 dst->sin_addr = addr->v4; 3404 rt = rtalloc(sintosa(dst), 0, rtableid); 3405 break; 3406 #ifdef INET6 3407 case AF_INET6: 3408 hlen = sizeof(struct ip6_hdr); 3409 dst6 = (struct sockaddr_in6 *)&ss; 3410 dst6->sin6_family = AF_INET6; 3411 dst6->sin6_len = sizeof(*dst6); 3412 dst6->sin6_addr = addr->v6; 3413 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3414 break; 3415 #endif /* INET6 */ 3416 } 3417 3418 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3419 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3420 mss = max(tcp_mssdflt, mss); 3421 if_put(ifp); 3422 } 3423 rtfree(rt); 3424 mss = min(mss, offer); 3425 mss = max(mss, 64); /* sanity - at least max opt space */ 3426 return (mss); 3427 } 3428 3429 static __inline int 3430 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af, 3431 struct pf_src_node **sns) 3432 { 3433 struct pf_rule *r = s->rule.ptr; 3434 int rv; 3435 3436 s->rt_kif = NULL; 3437 if (!r->rt) 3438 return (0); 3439 3440 rv = pf_map_addr(af, r, saddr, &s->rt_addr, NULL, sns, 3441 &r->route, PF_SN_ROUTE); 3442 if (rv == 0) { 3443 s->rt_kif = r->route.kif; 3444 s->natrule.ptr = r; 3445 } 3446 3447 return (rv); 3448 } 3449 3450 u_int32_t 3451 pf_tcp_iss(struct pf_pdesc *pd) 3452 { 3453 SHA2_CTX ctx; 3454 union { 3455 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3456 uint32_t words[1]; 3457 } digest; 3458 3459 if (pf_tcp_secret_init == 0) { 3460 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3461 SHA512Init(&pf_tcp_secret_ctx); 3462 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3463 sizeof(pf_tcp_secret)); 3464 pf_tcp_secret_init = 1; 3465 } 3466 ctx = pf_tcp_secret_ctx; 3467 3468 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3469 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 3470 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 3471 switch (pd->af) { 3472 case AF_INET: 3473 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3474 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3475 break; 3476 #ifdef INET6 3477 case AF_INET6: 3478 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3479 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3480 break; 3481 #endif /* INET6 */ 3482 } 3483 SHA512Final(digest.bytes, &ctx); 3484 pf_tcp_iss_off += 4096; 3485 return (digest.words[0] + tcp_iss + pf_tcp_iss_off); 3486 } 3487 3488 void 3489 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3490 { 3491 if (r->qid) 3492 a->qid = r->qid; 3493 if (r->pqid) 3494 a->pqid = r->pqid; 3495 if (r->rtableid >= 0) 3496 a->rtableid = r->rtableid; 3497 #if NPFLOG > 0 3498 a->log |= r->log; 3499 #endif /* NPFLOG > 0 */ 3500 if (r->scrub_flags & PFSTATE_SETTOS) 3501 a->set_tos = r->set_tos; 3502 if (r->min_ttl) 3503 a->min_ttl = r->min_ttl; 3504 if (r->max_mss) 3505 a->max_mss = r->max_mss; 3506 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3507 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3508 if (r->scrub_flags & PFSTATE_SETPRIO) { 3509 a->set_prio[0] = r->set_prio[0]; 3510 a->set_prio[1] = r->set_prio[1]; 3511 } 3512 if (r->rule_flag & PFRULE_SETDELAY) 3513 a->delay = r->delay; 3514 } 3515 3516 #define PF_TEST_ATTRIB(t, a) \ 3517 if (t) { \ 3518 r = a; \ 3519 continue; \ 3520 } else do { \ 3521 } while (0) 3522 3523 enum pf_test_status 3524 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 3525 { 3526 struct pf_rule *r; 3527 struct pf_rule *save_a; 3528 struct pf_ruleset *save_aruleset; 3529 3530 r = TAILQ_FIRST(ruleset->rules.active.ptr); 3531 while (r != NULL) { 3532 r->evaluations++; 3533 PF_TEST_ATTRIB( 3534 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 3535 r->skip[PF_SKIP_IFP].ptr); 3536 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 3537 r->skip[PF_SKIP_DIR].ptr); 3538 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3539 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 3540 r->skip[PF_SKIP_RDOM].ptr); 3541 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 3542 r->skip[PF_SKIP_AF].ptr); 3543 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 3544 r->skip[PF_SKIP_PROTO].ptr); 3545 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 3546 ctx->pd->naf, r->src.neg, ctx->pd->kif, 3547 ctx->act.rtableid)), 3548 r->skip[PF_SKIP_SRC_ADDR].ptr); 3549 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 3550 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 3551 r->skip[PF_SKIP_DST_ADDR].ptr); 3552 3553 switch (ctx->pd->virtual_proto) { 3554 case PF_VPROTO_FRAGMENT: 3555 /* tcp/udp only. port_op always 0 in other cases */ 3556 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3557 TAILQ_NEXT(r, entries)); 3558 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 3559 r->flagset), 3560 TAILQ_NEXT(r, entries)); 3561 /* icmp only. type/code always 0 in other cases */ 3562 PF_TEST_ATTRIB((r->type || r->code), 3563 TAILQ_NEXT(r, entries)); 3564 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3565 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3566 TAILQ_NEXT(r, entries)); 3567 break; 3568 3569 case IPPROTO_TCP: 3570 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 3571 r->flags), 3572 TAILQ_NEXT(r, entries)); 3573 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3574 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 3575 r->os_fingerprint)), 3576 TAILQ_NEXT(r, entries)); 3577 /* FALLTHROUGH */ 3578 3579 case IPPROTO_UDP: 3580 /* tcp/udp only. port_op always 0 in other cases */ 3581 PF_TEST_ATTRIB((r->src.port_op && 3582 !pf_match_port(r->src.port_op, r->src.port[0], 3583 r->src.port[1], ctx->pd->nsport)), 3584 r->skip[PF_SKIP_SRC_PORT].ptr); 3585 PF_TEST_ATTRIB((r->dst.port_op && 3586 !pf_match_port(r->dst.port_op, r->dst.port[0], 3587 r->dst.port[1], ctx->pd->ndport)), 3588 r->skip[PF_SKIP_DST_PORT].ptr); 3589 /* tcp/udp only. uid.op always 0 in other cases */ 3590 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 3591 (ctx->pd->lookup.done = 3592 pf_socket_lookup(ctx->pd), 1)) && 3593 !pf_match_uid(r->uid.op, r->uid.uid[0], 3594 r->uid.uid[1], ctx->pd->lookup.uid)), 3595 TAILQ_NEXT(r, entries)); 3596 /* tcp/udp only. gid.op always 0 in other cases */ 3597 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 3598 (ctx->pd->lookup.done = 3599 pf_socket_lookup(ctx->pd), 1)) && 3600 !pf_match_gid(r->gid.op, r->gid.gid[0], 3601 r->gid.gid[1], ctx->pd->lookup.gid)), 3602 TAILQ_NEXT(r, entries)); 3603 break; 3604 3605 case IPPROTO_ICMP: 3606 case IPPROTO_ICMPV6: 3607 /* icmp only. type always 0 in other cases */ 3608 PF_TEST_ATTRIB((r->type && 3609 r->type != ctx->icmptype + 1), 3610 TAILQ_NEXT(r, entries)); 3611 /* icmp only. type always 0 in other cases */ 3612 PF_TEST_ATTRIB((r->code && 3613 r->code != ctx->icmpcode + 1), 3614 TAILQ_NEXT(r, entries)); 3615 /* icmp only. don't create states on replies */ 3616 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 3617 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 3618 ctx->icmp_dir != PF_IN), 3619 TAILQ_NEXT(r, entries)); 3620 break; 3621 3622 default: 3623 break; 3624 } 3625 3626 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3627 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 3628 TAILQ_NEXT(r, entries)); 3629 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 3630 TAILQ_NEXT(r, entries)); 3631 PF_TEST_ATTRIB((r->prob && 3632 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3633 TAILQ_NEXT(r, entries)); 3634 PF_TEST_ATTRIB((r->match_tag && 3635 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 3636 TAILQ_NEXT(r, entries)); 3637 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 3638 r->rcvifnot), 3639 TAILQ_NEXT(r, entries)); 3640 PF_TEST_ATTRIB((r->prio && 3641 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 3642 ctx->pd->m->m_pkthdr.pf.prio), 3643 TAILQ_NEXT(r, entries)); 3644 3645 /* must be last! */ 3646 if (r->pktrate.limit) { 3647 pf_add_threshold(&r->pktrate); 3648 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 3649 TAILQ_NEXT(r, entries)); 3650 } 3651 3652 /* FALLTHROUGH */ 3653 if (r->tag) 3654 ctx->tag = r->tag; 3655 if (r->anchor == NULL) { 3656 if (r->action == PF_MATCH) { 3657 if ((ctx->ri = pool_get(&pf_rule_item_pl, 3658 PR_NOWAIT)) == NULL) { 3659 REASON_SET(&ctx->reason, PFRES_MEMORY); 3660 ctx->test_status = PF_TEST_FAIL; 3661 break; 3662 } 3663 ctx->ri->r = r; 3664 /* order is irrelevant */ 3665 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 3666 ctx->ri = NULL; 3667 pf_rule_to_actions(r, &ctx->act); 3668 if (r->rule_flag & PFRULE_AFTO) 3669 ctx->pd->naf = r->naf; 3670 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 3671 &ctx->nr) == -1) { 3672 REASON_SET(&ctx->reason, 3673 PFRES_TRANSLATE); 3674 ctx->test_status = PF_TEST_FAIL; 3675 break; 3676 } 3677 #if NPFLOG > 0 3678 if (r->log) { 3679 REASON_SET(&ctx->reason, PFRES_MATCH); 3680 pflog_packet(ctx->pd, ctx->reason, r, 3681 ctx->a, ruleset, NULL); 3682 } 3683 #endif /* NPFLOG > 0 */ 3684 } else { 3685 /* 3686 * found matching r 3687 */ 3688 *ctx->rm = r; 3689 /* 3690 * anchor, with ruleset, where r belongs to 3691 */ 3692 *ctx->am = ctx->a; 3693 /* 3694 * ruleset where r belongs to 3695 */ 3696 *ctx->rsm = ruleset; 3697 /* 3698 * ruleset, where anchor belongs to. 3699 */ 3700 ctx->arsm = ctx->aruleset; 3701 } 3702 3703 #if NPFLOG > 0 3704 if (ctx->act.log & PF_LOG_MATCHES) 3705 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 3706 &ctx->rules); 3707 #endif /* NPFLOG > 0 */ 3708 3709 if (r->quick) { 3710 ctx->test_status = PF_TEST_QUICK; 3711 break; 3712 } 3713 } else { 3714 save_a = ctx->a; 3715 save_aruleset = ctx->aruleset; 3716 ctx->a = r; /* remember anchor */ 3717 ctx->aruleset = ruleset; /* and its ruleset */ 3718 /* 3719 * Note: we don't need to restore if we are not going 3720 * to continue with ruleset evaluation. 3721 */ 3722 if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) 3723 break; 3724 ctx->a = save_a; 3725 ctx->aruleset = save_aruleset; 3726 } 3727 r = TAILQ_NEXT(r, entries); 3728 } 3729 3730 return (ctx->test_status); 3731 } 3732 3733 int 3734 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3735 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason) 3736 { 3737 struct pf_rule *r = NULL; 3738 struct pf_rule *a = NULL; 3739 struct pf_ruleset *ruleset = NULL; 3740 struct pf_state_key *skw = NULL, *sks = NULL; 3741 int rewrite = 0; 3742 u_int16_t virtual_type, virtual_id; 3743 int action = PF_DROP; 3744 struct pf_test_ctx ctx; 3745 int rv; 3746 3747 memset(&ctx, 0, sizeof(ctx)); 3748 ctx.pd = pd; 3749 ctx.rm = rm; 3750 ctx.am = am; 3751 ctx.rsm = rsm; 3752 ctx.th = &pd->hdr.tcp; 3753 ctx.act.rtableid = pd->rdomain; 3754 ctx.tag = -1; 3755 SLIST_INIT(&ctx.rules); 3756 3757 if (pd->dir == PF_IN && if_congested()) { 3758 REASON_SET(&ctx.reason, PFRES_CONGEST); 3759 return (PF_DROP); 3760 } 3761 3762 switch (pd->virtual_proto) { 3763 case IPPROTO_ICMP: 3764 ctx.icmptype = pd->hdr.icmp.icmp_type; 3765 ctx.icmpcode = pd->hdr.icmp.icmp_code; 3766 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3767 &ctx.icmp_dir, &virtual_id, &virtual_type); 3768 if (ctx.icmp_dir == PF_IN) { 3769 pd->osport = pd->nsport = virtual_id; 3770 pd->odport = pd->ndport = virtual_type; 3771 } else { 3772 pd->osport = pd->nsport = virtual_type; 3773 pd->odport = pd->ndport = virtual_id; 3774 } 3775 break; 3776 #ifdef INET6 3777 case IPPROTO_ICMPV6: 3778 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 3779 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 3780 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3781 &ctx.icmp_dir, &virtual_id, &virtual_type); 3782 if (ctx.icmp_dir == PF_IN) { 3783 pd->osport = pd->nsport = virtual_id; 3784 pd->odport = pd->ndport = virtual_type; 3785 } else { 3786 pd->osport = pd->nsport = virtual_type; 3787 pd->odport = pd->ndport = virtual_id; 3788 } 3789 break; 3790 #endif /* INET6 */ 3791 } 3792 3793 ruleset = &pf_main_ruleset; 3794 rv = pf_match_rule(&ctx, ruleset); 3795 if (rv == PF_TEST_FAIL) { 3796 /* 3797 * Reason has been set in pf_match_rule() already. 3798 */ 3799 goto cleanup; 3800 } 3801 3802 r = *ctx.rm; /* matching rule */ 3803 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 3804 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 3805 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 3806 3807 /* apply actions for last matching pass/block rule */ 3808 pf_rule_to_actions(r, &ctx.act); 3809 if (r->rule_flag & PFRULE_AFTO) 3810 pd->naf = r->naf; 3811 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 3812 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 3813 goto cleanup; 3814 } 3815 REASON_SET(&ctx.reason, PFRES_MATCH); 3816 3817 #if NPFLOG > 0 3818 if (r->log) 3819 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 3820 if (ctx.act.log & PF_LOG_MATCHES) 3821 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 3822 #endif /* NPFLOG > 0 */ 3823 3824 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3825 (r->action == PF_DROP) && 3826 ((r->rule_flag & PFRULE_RETURNRST) || 3827 (r->rule_flag & PFRULE_RETURNICMP) || 3828 (r->rule_flag & PFRULE_RETURN))) { 3829 if (pd->proto == IPPROTO_TCP && 3830 ((r->rule_flag & PFRULE_RETURNRST) || 3831 (r->rule_flag & PFRULE_RETURN)) && 3832 !(ctx.th->th_flags & TH_RST)) { 3833 u_int32_t ack = 3834 ntohl(ctx.th->th_seq) + pd->p_len; 3835 3836 if (pf_check_tcp_cksum(pd->m, pd->off, 3837 pd->tot_len - pd->off, pd->af)) 3838 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 3839 else { 3840 if (ctx.th->th_flags & TH_SYN) 3841 ack++; 3842 if (ctx.th->th_flags & TH_FIN) 3843 ack++; 3844 pf_send_tcp(r, pd->af, pd->dst, 3845 pd->src, ctx.th->th_dport, 3846 ctx.th->th_sport, ntohl(ctx.th->th_ack), 3847 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 3848 1, 0, pd->rdomain); 3849 } 3850 } else if ((pd->proto != IPPROTO_ICMP || 3851 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 3852 r->return_icmp) 3853 pf_send_icmp(pd->m, r->return_icmp >> 8, 3854 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 3855 else if ((pd->proto != IPPROTO_ICMPV6 || 3856 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 3857 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3858 r->return_icmp6) 3859 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3860 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 3861 } 3862 3863 if (r->action == PF_DROP) 3864 goto cleanup; 3865 3866 /* 3867 * If an expired "once" rule has not been purged, drop any new matching 3868 * packets. 3869 */ 3870 if (r->rule_flag & PFRULE_EXPIRED) 3871 goto cleanup; 3872 3873 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 3874 if (ctx.act.rtableid >= 0 && 3875 rtable_l2(ctx.act.rtableid) != pd->rdomain) 3876 pd->destchg = 1; 3877 3878 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3879 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 3880 #if NPFLOG > 0 3881 pd->pflog |= PF_LOG_FORCE; 3882 #endif /* NPFLOG > 0 */ 3883 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3884 "ip/ipv6 options in pf_test_rule()"); 3885 goto cleanup; 3886 } 3887 3888 action = PF_PASS; 3889 3890 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3891 && !ctx.state_icmp && r->keep_state) { 3892 3893 if (r->rule_flag & PFRULE_SRCTRACK && 3894 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 3895 pd->af, pd->src, NULL, NULL) != 0) { 3896 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 3897 goto cleanup; 3898 } 3899 3900 if (r->max_states && (r->states_cur >= r->max_states)) { 3901 pf_status.lcounters[LCNT_STATES]++; 3902 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 3903 goto cleanup; 3904 } 3905 3906 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 3907 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); 3908 3909 if (action != PF_PASS) 3910 goto cleanup; 3911 if (sks != skw) { 3912 struct pf_state_key *sk; 3913 3914 if (pd->dir == PF_IN) 3915 sk = sks; 3916 else 3917 sk = skw; 3918 rewrite += pf_translate(pd, 3919 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 3920 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 3921 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 3922 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 3923 virtual_type, ctx.icmp_dir); 3924 } 3925 3926 #ifdef INET6 3927 if (rewrite && skw->af != sks->af) 3928 action = PF_AFRT; 3929 #endif /* INET6 */ 3930 3931 } else { 3932 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 3933 SLIST_REMOVE_HEAD(&ctx.rules, entry); 3934 pool_put(&pf_rule_item_pl, ctx.ri); 3935 } 3936 } 3937 3938 /* copy back packet headers if needed */ 3939 if (rewrite && pd->hdrlen) { 3940 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 3941 } 3942 3943 #if NPFSYNC > 0 3944 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 3945 pd->dir == PF_OUT && pfsync_up()) { 3946 /* 3947 * We want the state created, but we dont 3948 * want to send this in case a partner 3949 * firewall has to know about it to allow 3950 * replies through it. 3951 */ 3952 if (pfsync_defer(*sm, pd->m)) 3953 return (PF_DEFER); 3954 } 3955 #endif /* NPFSYNC > 0 */ 3956 3957 if (r->rule_flag & PFRULE_ONCE) { 3958 u_int32_t rule_flag; 3959 3960 /* 3961 * Use atomic_cas() to determine a clear winner, which will 3962 * insert an expired rule to gcl. 3963 */ 3964 rule_flag = r->rule_flag; 3965 if (((rule_flag & PFRULE_EXPIRED) == 0) && 3966 atomic_cas_uint(&r->rule_flag, rule_flag, 3967 rule_flag | PFRULE_EXPIRED) == rule_flag) { 3968 r->exptime = gettime(); 3969 SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle); 3970 } 3971 } 3972 3973 return (action); 3974 3975 cleanup: 3976 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 3977 SLIST_REMOVE_HEAD(&ctx.rules, entry); 3978 pool_put(&pf_rule_item_pl, ctx.ri); 3979 } 3980 3981 return (action); 3982 } 3983 3984 static __inline int 3985 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 3986 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 3987 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 3988 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 3989 { 3990 struct pf_state *s = NULL; 3991 struct tcphdr *th = &pd->hdr.tcp; 3992 u_int16_t mss = tcp_mssdflt; 3993 u_short reason; 3994 u_int i; 3995 3996 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 3997 if (s == NULL) { 3998 REASON_SET(&reason, PFRES_MEMORY); 3999 goto csfailed; 4000 } 4001 s->rule.ptr = r; 4002 s->anchor.ptr = a; 4003 s->natrule.ptr = nr; 4004 if (r->allow_opts) 4005 s->state_flags |= PFSTATE_ALLOWOPTS; 4006 if (r->rule_flag & PFRULE_STATESLOPPY) 4007 s->state_flags |= PFSTATE_SLOPPY; 4008 if (r->rule_flag & PFRULE_PFLOW) 4009 s->state_flags |= PFSTATE_PFLOW; 4010 #if NPFLOG > 0 4011 s->log = act->log & PF_LOG_ALL; 4012 #endif /* NPFLOG > 0 */ 4013 s->qid = act->qid; 4014 s->pqid = act->pqid; 4015 s->rtableid[pd->didx] = act->rtableid; 4016 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 4017 s->min_ttl = act->min_ttl; 4018 s->set_tos = act->set_tos; 4019 s->max_mss = act->max_mss; 4020 s->state_flags |= act->flags; 4021 #if NPFSYNC > 0 4022 s->sync_state = PFSYNC_S_NONE; 4023 #endif /* NPFSYNC > 0 */ 4024 s->set_prio[0] = act->set_prio[0]; 4025 s->set_prio[1] = act->set_prio[1]; 4026 s->delay = act->delay; 4027 SLIST_INIT(&s->src_nodes); 4028 /* 4029 * must initialize refcnt, before pf_state_insert() gets called. 4030 * pf_state_inserts() grabs reference for pfsync! 4031 */ 4032 refcnt_init(&s->refcnt); 4033 4034 switch (pd->proto) { 4035 case IPPROTO_TCP: 4036 s->src.seqlo = ntohl(th->th_seq); 4037 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4038 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4039 r->keep_state == PF_STATE_MODULATE) { 4040 /* Generate sequence number modulator */ 4041 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4042 0) 4043 s->src.seqdiff = 1; 4044 pf_patch_32(pd, 4045 &th->th_seq, htonl(s->src.seqlo + s->src.seqdiff)); 4046 *rewrite = 1; 4047 } else 4048 s->src.seqdiff = 0; 4049 if (th->th_flags & TH_SYN) { 4050 s->src.seqhi++; 4051 s->src.wscale = pf_get_wscale(pd); 4052 } 4053 s->src.max_win = MAX(ntohs(th->th_win), 1); 4054 if (s->src.wscale & PF_WSCALE_MASK) { 4055 /* Remove scale factor from initial window */ 4056 int win = s->src.max_win; 4057 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4058 s->src.max_win = (win - 1) >> 4059 (s->src.wscale & PF_WSCALE_MASK); 4060 } 4061 if (th->th_flags & TH_FIN) 4062 s->src.seqhi++; 4063 s->dst.seqhi = 1; 4064 s->dst.max_win = 1; 4065 pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT); 4066 pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED); 4067 s->timeout = PFTM_TCP_FIRST_PACKET; 4068 pf_status.states_halfopen++; 4069 break; 4070 case IPPROTO_UDP: 4071 pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE); 4072 pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 4073 s->timeout = PFTM_UDP_FIRST_PACKET; 4074 break; 4075 case IPPROTO_ICMP: 4076 #ifdef INET6 4077 case IPPROTO_ICMPV6: 4078 #endif /* INET6 */ 4079 s->timeout = PFTM_ICMP_FIRST_PACKET; 4080 break; 4081 default: 4082 pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE); 4083 pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 4084 s->timeout = PFTM_OTHER_FIRST_PACKET; 4085 } 4086 4087 s->creation = getuptime(); 4088 s->expire = getuptime(); 4089 4090 if (pd->proto == IPPROTO_TCP) { 4091 if (s->state_flags & PFSTATE_SCRUB_TCP && 4092 pf_normalize_tcp_init(pd, &s->src)) { 4093 REASON_SET(&reason, PFRES_MEMORY); 4094 goto csfailed; 4095 } 4096 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 4097 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 4098 rewrite)) { 4099 /* This really shouldn't happen!!! */ 4100 DPFPRINTF(LOG_ERR, 4101 "%s: tcp normalize failed on first pkt", __func__); 4102 goto csfailed; 4103 } 4104 } 4105 s->direction = pd->dir; 4106 4107 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 4108 REASON_SET(&reason, PFRES_MEMORY); 4109 goto csfailed; 4110 } 4111 4112 if (pf_set_rt_ifp(s, pd->src, (*skw)->af, sns) != 0) { 4113 REASON_SET(&reason, PFRES_NOROUTE); 4114 goto csfailed; 4115 } 4116 4117 for (i = 0; i < PF_SN_MAX; i++) 4118 if (sns[i] != NULL) { 4119 struct pf_sn_item *sni; 4120 4121 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 4122 if (sni == NULL) { 4123 REASON_SET(&reason, PFRES_MEMORY); 4124 goto csfailed; 4125 } 4126 sni->sn = sns[i]; 4127 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 4128 sni->sn->states++; 4129 } 4130 4131 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { 4132 pf_detach_state(s); 4133 *sks = *skw = NULL; 4134 REASON_SET(&reason, PFRES_STATEINS); 4135 goto csfailed; 4136 } else 4137 *sm = s; 4138 4139 /* 4140 * Make state responsible for rules it binds here. 4141 */ 4142 memcpy(&s->match_rules, rules, sizeof(s->match_rules)); 4143 memset(rules, 0, sizeof(*rules)); 4144 STATE_INC_COUNTERS(s); 4145 4146 if (tag > 0) { 4147 pf_tag_ref(tag); 4148 s->tag = tag; 4149 } 4150 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4151 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { 4152 int rtid = pd->rdomain; 4153 if (act->rtableid >= 0) 4154 rtid = act->rtableid; 4155 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 4156 s->src.seqhi = arc4random(); 4157 /* Find mss option */ 4158 mss = pf_get_mss(pd); 4159 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 4160 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 4161 s->src.mss = mss; 4162 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4163 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4164 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); 4165 REASON_SET(&reason, PFRES_SYNPROXY); 4166 return (PF_SYNPROXY_DROP); 4167 } 4168 4169 return (PF_PASS); 4170 4171 csfailed: 4172 if (s) { 4173 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 4174 pf_src_tree_remove_state(s); 4175 pool_put(&pf_state_pl, s); 4176 } 4177 4178 for (i = 0; i < PF_SN_MAX; i++) 4179 if (sns[i] != NULL) 4180 pf_remove_src_node(sns[i]); 4181 4182 return (PF_DROP); 4183 } 4184 4185 int 4186 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4187 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4188 int icmp_dir) 4189 { 4190 /* 4191 * when called from bpf_mtap_pflog, there are extra constraints: 4192 * -mbuf is faked, m_data is the bpf buffer 4193 * -pd is not fully set up 4194 */ 4195 int rewrite = 0; 4196 int afto = pd->af != pd->naf; 4197 4198 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4199 pd->destchg = 1; 4200 4201 switch (pd->proto) { 4202 case IPPROTO_TCP: /* FALLTHROUGH */ 4203 case IPPROTO_UDP: 4204 rewrite += pf_patch_16(pd, pd->sport, sport); 4205 rewrite += pf_patch_16(pd, pd->dport, dport); 4206 break; 4207 4208 case IPPROTO_ICMP: 4209 /* pf_translate() is also used when logging invalid packets */ 4210 if (pd->af != AF_INET) 4211 return (0); 4212 4213 if (afto) { 4214 #ifdef INET6 4215 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 4216 return (0); 4217 pd->proto = IPPROTO_ICMPV6; 4218 rewrite = 1; 4219 #endif /* INET6 */ 4220 } 4221 if (virtual_type == htons(ICMP_ECHO)) { 4222 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4223 rewrite += pf_patch_16(pd, 4224 &pd->hdr.icmp.icmp_id, icmpid); 4225 } 4226 break; 4227 4228 #ifdef INET6 4229 case IPPROTO_ICMPV6: 4230 /* pf_translate() is also used when logging invalid packets */ 4231 if (pd->af != AF_INET6) 4232 return (0); 4233 4234 if (afto) { 4235 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 4236 return (0); 4237 pd->proto = IPPROTO_ICMP; 4238 rewrite = 1; 4239 } 4240 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4241 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4242 rewrite += pf_patch_16(pd, 4243 &pd->hdr.icmp6.icmp6_id, icmpid); 4244 } 4245 break; 4246 #endif /* INET6 */ 4247 } 4248 4249 if (!afto) { 4250 rewrite += pf_translate_a(pd, pd->src, saddr); 4251 rewrite += pf_translate_a(pd, pd->dst, daddr); 4252 } 4253 4254 return (rewrite); 4255 } 4256 4257 int 4258 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4259 int *copyback, int reverse) 4260 { 4261 struct tcphdr *th = &pd->hdr.tcp; 4262 struct pf_state_peer *src, *dst; 4263 u_int16_t win = ntohs(th->th_win); 4264 u_int32_t ack, end, data_end, seq, orig_seq; 4265 u_int8_t sws, dws, psrc, pdst; 4266 int ackskew; 4267 4268 if ((pd->dir == (*state)->direction && !reverse) || 4269 (pd->dir != (*state)->direction && reverse)) { 4270 src = &(*state)->src; 4271 dst = &(*state)->dst; 4272 psrc = PF_PEER_SRC; 4273 pdst = PF_PEER_DST; 4274 } else { 4275 src = &(*state)->dst; 4276 dst = &(*state)->src; 4277 psrc = PF_PEER_DST; 4278 pdst = PF_PEER_SRC; 4279 } 4280 4281 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4282 sws = src->wscale & PF_WSCALE_MASK; 4283 dws = dst->wscale & PF_WSCALE_MASK; 4284 } else 4285 sws = dws = 0; 4286 4287 /* 4288 * Sequence tracking algorithm from Guido van Rooij's paper: 4289 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4290 * tcp_filtering.ps 4291 */ 4292 4293 orig_seq = seq = ntohl(th->th_seq); 4294 if (src->seqlo == 0) { 4295 /* First packet from this end. Set its state */ 4296 4297 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4298 src->scrub == NULL) { 4299 if (pf_normalize_tcp_init(pd, src)) { 4300 REASON_SET(reason, PFRES_MEMORY); 4301 return (PF_DROP); 4302 } 4303 } 4304 4305 /* Deferred generation of sequence number modulator */ 4306 if (dst->seqdiff && !src->seqdiff) { 4307 /* use random iss for the TCP server */ 4308 while ((src->seqdiff = arc4random() - seq) == 0) 4309 continue; 4310 ack = ntohl(th->th_ack) - dst->seqdiff; 4311 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4312 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4313 *copyback = 1; 4314 } else { 4315 ack = ntohl(th->th_ack); 4316 } 4317 4318 end = seq + pd->p_len; 4319 if (th->th_flags & TH_SYN) { 4320 end++; 4321 if (dst->wscale & PF_WSCALE_FLAG) { 4322 src->wscale = pf_get_wscale(pd); 4323 if (src->wscale & PF_WSCALE_FLAG) { 4324 /* Remove scale factor from initial 4325 * window */ 4326 sws = src->wscale & PF_WSCALE_MASK; 4327 win = ((u_int32_t)win + (1 << sws) - 1) 4328 >> sws; 4329 dws = dst->wscale & PF_WSCALE_MASK; 4330 } else { 4331 /* fixup other window */ 4332 dst->max_win = MIN(TCP_MAXWIN, 4333 (u_int32_t)dst->max_win << 4334 (dst->wscale & PF_WSCALE_MASK)); 4335 /* in case of a retrans SYN|ACK */ 4336 dst->wscale = 0; 4337 } 4338 } 4339 } 4340 data_end = end; 4341 if (th->th_flags & TH_FIN) 4342 end++; 4343 4344 src->seqlo = seq; 4345 if (src->state < TCPS_SYN_SENT) 4346 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4347 4348 /* 4349 * May need to slide the window (seqhi may have been set by 4350 * the crappy stack check or if we picked up the connection 4351 * after establishment) 4352 */ 4353 if (src->seqhi == 1 || 4354 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4355 src->seqhi = end + MAX(1, dst->max_win << dws); 4356 if (win > src->max_win) 4357 src->max_win = win; 4358 4359 } else { 4360 ack = ntohl(th->th_ack) - dst->seqdiff; 4361 if (src->seqdiff) { 4362 /* Modulate sequence numbers */ 4363 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4364 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4365 *copyback = 1; 4366 } 4367 end = seq + pd->p_len; 4368 if (th->th_flags & TH_SYN) 4369 end++; 4370 data_end = end; 4371 if (th->th_flags & TH_FIN) 4372 end++; 4373 } 4374 4375 if ((th->th_flags & TH_ACK) == 0) { 4376 /* Let it pass through the ack skew check */ 4377 ack = dst->seqlo; 4378 } else if ((ack == 0 && 4379 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4380 /* broken tcp stacks do not set ack */ 4381 (dst->state < TCPS_SYN_SENT)) { 4382 /* 4383 * Many stacks (ours included) will set the ACK number in an 4384 * FIN|ACK if the SYN times out -- no sequence to ACK. 4385 */ 4386 ack = dst->seqlo; 4387 } 4388 4389 if (seq == end) { 4390 /* Ease sequencing restrictions on no data packets */ 4391 seq = src->seqlo; 4392 data_end = end = seq; 4393 } 4394 4395 ackskew = dst->seqlo - ack; 4396 4397 4398 /* 4399 * Need to demodulate the sequence numbers in any TCP SACK options 4400 * (Selective ACK). We could optionally validate the SACK values 4401 * against the current ACK window, either forwards or backwards, but 4402 * I'm not confident that SACK has been implemented properly 4403 * everywhere. It wouldn't surprise me if several stacks accidently 4404 * SACK too far backwards of previously ACKed data. There really aren't 4405 * any security implications of bad SACKing unless the target stack 4406 * doesn't validate the option length correctly. Someone trying to 4407 * spoof into a TCP connection won't bother blindly sending SACK 4408 * options anyway. 4409 */ 4410 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4411 if (pf_modulate_sack(pd, dst)) 4412 *copyback = 1; 4413 } 4414 4415 4416 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4417 if (SEQ_GEQ(src->seqhi, data_end) && 4418 /* Last octet inside other's window space */ 4419 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4420 /* Retrans: not more than one window back */ 4421 (ackskew >= -MAXACKWINDOW) && 4422 /* Acking not more than one reassembled fragment backwards */ 4423 (ackskew <= (MAXACKWINDOW << sws)) && 4424 /* Acking not more than one window forward */ 4425 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4426 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4427 /* Require an exact/+1 sequence match on resets when possible */ 4428 4429 if (dst->scrub || src->scrub) { 4430 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4431 dst, copyback)) 4432 return (PF_DROP); 4433 } 4434 4435 /* update max window */ 4436 if (src->max_win < win) 4437 src->max_win = win; 4438 /* synchronize sequencing */ 4439 if (SEQ_GT(end, src->seqlo)) 4440 src->seqlo = end; 4441 /* slide the window of what the other end can send */ 4442 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4443 dst->seqhi = ack + MAX((win << sws), 1); 4444 4445 /* update states */ 4446 if (th->th_flags & TH_SYN) 4447 if (src->state < TCPS_SYN_SENT) 4448 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4449 if (th->th_flags & TH_FIN) 4450 if (src->state < TCPS_CLOSING) 4451 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4452 if (th->th_flags & TH_ACK) { 4453 if (dst->state == TCPS_SYN_SENT) { 4454 pf_set_protostate(*state, pdst, 4455 TCPS_ESTABLISHED); 4456 if (src->state == TCPS_ESTABLISHED && 4457 !SLIST_EMPTY(&(*state)->src_nodes) && 4458 pf_src_connlimit(state)) { 4459 REASON_SET(reason, PFRES_SRCLIMIT); 4460 return (PF_DROP); 4461 } 4462 } else if (dst->state == TCPS_CLOSING) 4463 pf_set_protostate(*state, pdst, 4464 TCPS_FIN_WAIT_2); 4465 } 4466 if (th->th_flags & TH_RST) 4467 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4468 4469 /* update expire time */ 4470 (*state)->expire = getuptime(); 4471 if (src->state >= TCPS_FIN_WAIT_2 && 4472 dst->state >= TCPS_FIN_WAIT_2) 4473 (*state)->timeout = PFTM_TCP_CLOSED; 4474 else if (src->state >= TCPS_CLOSING && 4475 dst->state >= TCPS_CLOSING) 4476 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4477 else if (src->state < TCPS_ESTABLISHED || 4478 dst->state < TCPS_ESTABLISHED) 4479 (*state)->timeout = PFTM_TCP_OPENING; 4480 else if (src->state >= TCPS_CLOSING || 4481 dst->state >= TCPS_CLOSING) 4482 (*state)->timeout = PFTM_TCP_CLOSING; 4483 else 4484 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4485 4486 /* Fall through to PASS packet */ 4487 } else if ((dst->state < TCPS_SYN_SENT || 4488 dst->state >= TCPS_FIN_WAIT_2 || 4489 src->state >= TCPS_FIN_WAIT_2) && 4490 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4491 /* Within a window forward of the originating packet */ 4492 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4493 /* Within a window backward of the originating packet */ 4494 4495 /* 4496 * This currently handles three situations: 4497 * 1) Stupid stacks will shotgun SYNs before their peer 4498 * replies. 4499 * 2) When PF catches an already established stream (the 4500 * firewall rebooted, the state table was flushed, routes 4501 * changed...) 4502 * 3) Packets get funky immediately after the connection 4503 * closes (this should catch Solaris spurious ACK|FINs 4504 * that web servers like to spew after a close) 4505 * 4506 * This must be a little more careful than the above code 4507 * since packet floods will also be caught here. We don't 4508 * update the TTL here to mitigate the damage of a packet 4509 * flood and so the same code can handle awkward establishment 4510 * and a loosened connection close. 4511 * In the establishment case, a correct peer response will 4512 * validate the connection, go through the normal state code 4513 * and keep updating the state TTL. 4514 */ 4515 4516 if (pf_status.debug >= LOG_NOTICE) { 4517 log(LOG_NOTICE, "pf: loose state match: "); 4518 pf_print_state(*state); 4519 pf_print_flags(th->th_flags); 4520 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4521 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4522 pd->p_len, ackskew, (*state)->packets[0], 4523 (*state)->packets[1], 4524 pd->dir == PF_IN ? "in" : "out", 4525 pd->dir == (*state)->direction ? "fwd" : "rev"); 4526 } 4527 4528 if (dst->scrub || src->scrub) { 4529 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4530 dst, copyback)) 4531 return (PF_DROP); 4532 } 4533 4534 /* update max window */ 4535 if (src->max_win < win) 4536 src->max_win = win; 4537 /* synchronize sequencing */ 4538 if (SEQ_GT(end, src->seqlo)) 4539 src->seqlo = end; 4540 /* slide the window of what the other end can send */ 4541 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4542 dst->seqhi = ack + MAX((win << sws), 1); 4543 4544 /* 4545 * Cannot set dst->seqhi here since this could be a shotgunned 4546 * SYN and not an already established connection. 4547 */ 4548 if (th->th_flags & TH_FIN) 4549 if (src->state < TCPS_CLOSING) 4550 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4551 if (th->th_flags & TH_RST) 4552 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4553 4554 /* Fall through to PASS packet */ 4555 } else { 4556 if ((*state)->dst.state == TCPS_SYN_SENT && 4557 (*state)->src.state == TCPS_SYN_SENT) { 4558 /* Send RST for state mismatches during handshake */ 4559 if (!(th->th_flags & TH_RST)) 4560 pf_send_tcp((*state)->rule.ptr, pd->af, 4561 pd->dst, pd->src, th->th_dport, 4562 th->th_sport, ntohl(th->th_ack), 0, 4563 TH_RST, 0, 0, 4564 (*state)->rule.ptr->return_ttl, 1, 0, 4565 pd->rdomain); 4566 src->seqlo = 0; 4567 src->seqhi = 1; 4568 src->max_win = 1; 4569 } else if (pf_status.debug >= LOG_NOTICE) { 4570 log(LOG_NOTICE, "pf: BAD state: "); 4571 pf_print_state(*state); 4572 pf_print_flags(th->th_flags); 4573 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4574 "pkts=%llu:%llu dir=%s,%s\n", 4575 seq, orig_seq, ack, pd->p_len, ackskew, 4576 (*state)->packets[0], (*state)->packets[1], 4577 pd->dir == PF_IN ? "in" : "out", 4578 pd->dir == (*state)->direction ? "fwd" : "rev"); 4579 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4580 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 4581 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4582 ' ': '2', 4583 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4584 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4585 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 4586 ' ' :'5', 4587 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4588 } 4589 REASON_SET(reason, PFRES_BADSTATE); 4590 return (PF_DROP); 4591 } 4592 4593 return (PF_PASS); 4594 } 4595 4596 int 4597 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **state, 4598 u_short *reason) 4599 { 4600 struct tcphdr *th = &pd->hdr.tcp; 4601 struct pf_state_peer *src, *dst; 4602 u_int8_t psrc, pdst; 4603 4604 if (pd->dir == (*state)->direction) { 4605 src = &(*state)->src; 4606 dst = &(*state)->dst; 4607 psrc = PF_PEER_SRC; 4608 pdst = PF_PEER_DST; 4609 } else { 4610 src = &(*state)->dst; 4611 dst = &(*state)->src; 4612 psrc = PF_PEER_DST; 4613 pdst = PF_PEER_SRC; 4614 } 4615 4616 if (th->th_flags & TH_SYN) 4617 if (src->state < TCPS_SYN_SENT) 4618 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4619 if (th->th_flags & TH_FIN) 4620 if (src->state < TCPS_CLOSING) 4621 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4622 if (th->th_flags & TH_ACK) { 4623 if (dst->state == TCPS_SYN_SENT) { 4624 pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); 4625 if (src->state == TCPS_ESTABLISHED && 4626 !SLIST_EMPTY(&(*state)->src_nodes) && 4627 pf_src_connlimit(state)) { 4628 REASON_SET(reason, PFRES_SRCLIMIT); 4629 return (PF_DROP); 4630 } 4631 } else if (dst->state == TCPS_CLOSING) { 4632 pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2); 4633 } else if (src->state == TCPS_SYN_SENT && 4634 dst->state < TCPS_SYN_SENT) { 4635 /* 4636 * Handle a special sloppy case where we only see one 4637 * half of the connection. If there is a ACK after 4638 * the initial SYN without ever seeing a packet from 4639 * the destination, set the connection to established. 4640 */ 4641 pf_set_protostate(*state, PF_PEER_BOTH, 4642 TCPS_ESTABLISHED); 4643 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4644 pf_src_connlimit(state)) { 4645 REASON_SET(reason, PFRES_SRCLIMIT); 4646 return (PF_DROP); 4647 } 4648 } else if (src->state == TCPS_CLOSING && 4649 dst->state == TCPS_ESTABLISHED && 4650 dst->seqlo == 0) { 4651 /* 4652 * Handle the closing of half connections where we 4653 * don't see the full bidirectional FIN/ACK+ACK 4654 * handshake. 4655 */ 4656 pf_set_protostate(*state, pdst, TCPS_CLOSING); 4657 } 4658 } 4659 if (th->th_flags & TH_RST) 4660 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4661 4662 /* update expire time */ 4663 (*state)->expire = getuptime(); 4664 if (src->state >= TCPS_FIN_WAIT_2 && 4665 dst->state >= TCPS_FIN_WAIT_2) 4666 (*state)->timeout = PFTM_TCP_CLOSED; 4667 else if (src->state >= TCPS_CLOSING && 4668 dst->state >= TCPS_CLOSING) 4669 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4670 else if (src->state < TCPS_ESTABLISHED || 4671 dst->state < TCPS_ESTABLISHED) 4672 (*state)->timeout = PFTM_TCP_OPENING; 4673 else if (src->state >= TCPS_CLOSING || 4674 dst->state >= TCPS_CLOSING) 4675 (*state)->timeout = PFTM_TCP_CLOSING; 4676 else 4677 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4678 4679 return (PF_PASS); 4680 } 4681 4682 static __inline int 4683 pf_synproxy(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4684 { 4685 struct pf_state_key *sk = (*state)->key[pd->didx]; 4686 4687 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4688 struct tcphdr *th = &pd->hdr.tcp; 4689 4690 if (pd->dir != (*state)->direction) { 4691 REASON_SET(reason, PFRES_SYNPROXY); 4692 return (PF_SYNPROXY_DROP); 4693 } 4694 if (th->th_flags & TH_SYN) { 4695 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4696 REASON_SET(reason, PFRES_SYNPROXY); 4697 return (PF_DROP); 4698 } 4699 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4700 pd->src, th->th_dport, th->th_sport, 4701 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4702 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4703 0, pd->rdomain); 4704 REASON_SET(reason, PFRES_SYNPROXY); 4705 return (PF_SYNPROXY_DROP); 4706 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 4707 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4708 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4709 REASON_SET(reason, PFRES_SYNPROXY); 4710 return (PF_DROP); 4711 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4712 pf_src_connlimit(state)) { 4713 REASON_SET(reason, PFRES_SRCLIMIT); 4714 return (PF_DROP); 4715 } else 4716 pf_set_protostate(*state, PF_PEER_SRC, 4717 PF_TCPS_PROXY_DST); 4718 } 4719 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4720 struct tcphdr *th = &pd->hdr.tcp; 4721 4722 if (pd->dir == (*state)->direction) { 4723 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4724 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4725 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4726 REASON_SET(reason, PFRES_SYNPROXY); 4727 return (PF_DROP); 4728 } 4729 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4730 if ((*state)->dst.seqhi == 1) 4731 (*state)->dst.seqhi = arc4random(); 4732 pf_send_tcp((*state)->rule.ptr, pd->af, 4733 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4734 sk->port[pd->sidx], sk->port[pd->didx], 4735 (*state)->dst.seqhi, 0, TH_SYN, 0, 4736 (*state)->src.mss, 0, 0, (*state)->tag, 4737 sk->rdomain); 4738 REASON_SET(reason, PFRES_SYNPROXY); 4739 return (PF_SYNPROXY_DROP); 4740 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4741 (TH_SYN|TH_ACK)) || 4742 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4743 REASON_SET(reason, PFRES_SYNPROXY); 4744 return (PF_DROP); 4745 } else { 4746 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4747 (*state)->dst.seqlo = ntohl(th->th_seq); 4748 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4749 pd->src, th->th_dport, th->th_sport, 4750 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4751 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4752 (*state)->tag, pd->rdomain); 4753 pf_send_tcp((*state)->rule.ptr, pd->af, 4754 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4755 sk->port[pd->sidx], sk->port[pd->didx], 4756 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4757 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4758 0, sk->rdomain); 4759 (*state)->src.seqdiff = (*state)->dst.seqhi - 4760 (*state)->src.seqlo; 4761 (*state)->dst.seqdiff = (*state)->src.seqhi - 4762 (*state)->dst.seqlo; 4763 (*state)->src.seqhi = (*state)->src.seqlo + 4764 (*state)->dst.max_win; 4765 (*state)->dst.seqhi = (*state)->dst.seqlo + 4766 (*state)->src.max_win; 4767 (*state)->src.wscale = (*state)->dst.wscale = 0; 4768 pf_set_protostate(*state, PF_PEER_BOTH, 4769 TCPS_ESTABLISHED); 4770 REASON_SET(reason, PFRES_SYNPROXY); 4771 return (PF_SYNPROXY_DROP); 4772 } 4773 } 4774 return (PF_PASS); 4775 } 4776 4777 int 4778 pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4779 int syncookie) 4780 { 4781 struct pf_state_key_cmp key; 4782 int copyback = 0; 4783 struct pf_state_peer *src, *dst; 4784 int action; 4785 struct inpcb *inp; 4786 u_int8_t psrc, pdst; 4787 4788 key.af = pd->af; 4789 key.proto = pd->virtual_proto; 4790 key.rdomain = pd->rdomain; 4791 pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); 4792 pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); 4793 key.port[pd->sidx] = pd->osport; 4794 key.port[pd->didx] = pd->odport; 4795 inp = pd->m->m_pkthdr.pf.inp; 4796 4797 action = pf_find_state(pd, &key, state); 4798 if (action != PF_MATCH) 4799 return (action); 4800 4801 action = PF_PASS; 4802 if (pd->dir == (*state)->direction) { 4803 src = &(*state)->src; 4804 dst = &(*state)->dst; 4805 psrc = PF_PEER_SRC; 4806 pdst = PF_PEER_DST; 4807 } else { 4808 src = &(*state)->dst; 4809 dst = &(*state)->src; 4810 psrc = PF_PEER_DST; 4811 pdst = PF_PEER_SRC; 4812 } 4813 4814 switch (pd->virtual_proto) { 4815 case IPPROTO_TCP: 4816 if (syncookie) { 4817 pf_set_protostate(*state, PF_PEER_SRC, 4818 PF_TCPS_PROXY_DST); 4819 (*state)->dst.seqhi = ntohl(pd->hdr.tcp.th_ack) - 1; 4820 } 4821 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) 4822 return (action); 4823 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 4824 4825 if (dst->state >= TCPS_FIN_WAIT_2 && 4826 src->state >= TCPS_FIN_WAIT_2) { 4827 if (pf_status.debug >= LOG_NOTICE) { 4828 log(LOG_NOTICE, "pf: state reuse "); 4829 pf_print_state(*state); 4830 pf_print_flags(pd->hdr.tcp.th_flags); 4831 addlog("\n"); 4832 } 4833 /* XXX make sure it's the same direction ?? */ 4834 (*state)->timeout = PFTM_PURGE; 4835 *state = NULL; 4836 pf_mbuf_link_inpcb(pd->m, inp); 4837 return (PF_DROP); 4838 } else if (dst->state >= TCPS_ESTABLISHED && 4839 src->state >= TCPS_ESTABLISHED) { 4840 /* 4841 * SYN matches existing state??? 4842 * Typically happens when sender boots up after 4843 * sudden panic. Certain protocols (NFSv3) are 4844 * always using same port numbers. Challenge 4845 * ACK enables all parties (firewall and peers) 4846 * to get in sync again. 4847 */ 4848 pf_send_challenge_ack(pd, *state, src, dst); 4849 return (PF_DROP); 4850 } 4851 } 4852 4853 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4854 if (pf_tcp_track_sloppy(pd, state, reason) == PF_DROP) 4855 return (PF_DROP); 4856 } else { 4857 if (pf_tcp_track_full(pd, state, reason, ©back, 4858 PF_REVERSED_KEY((*state)->key, pd->af)) == PF_DROP) 4859 return (PF_DROP); 4860 } 4861 break; 4862 case IPPROTO_UDP: 4863 /* update states */ 4864 if (src->state < PFUDPS_SINGLE) 4865 pf_set_protostate(*state, psrc, PFUDPS_SINGLE); 4866 if (dst->state == PFUDPS_SINGLE) 4867 pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE); 4868 4869 /* update expire time */ 4870 (*state)->expire = getuptime(); 4871 if (src->state == PFUDPS_MULTIPLE && 4872 dst->state == PFUDPS_MULTIPLE) 4873 (*state)->timeout = PFTM_UDP_MULTIPLE; 4874 else 4875 (*state)->timeout = PFTM_UDP_SINGLE; 4876 break; 4877 default: 4878 /* update states */ 4879 if (src->state < PFOTHERS_SINGLE) 4880 pf_set_protostate(*state, psrc, PFOTHERS_SINGLE); 4881 if (dst->state == PFOTHERS_SINGLE) 4882 pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE); 4883 4884 /* update expire time */ 4885 (*state)->expire = getuptime(); 4886 if (src->state == PFOTHERS_MULTIPLE && 4887 dst->state == PFOTHERS_MULTIPLE) 4888 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4889 else 4890 (*state)->timeout = PFTM_OTHER_SINGLE; 4891 break; 4892 } 4893 4894 /* translate source/destination address, if necessary */ 4895 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4896 struct pf_state_key *nk; 4897 int afto, sidx, didx; 4898 4899 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4900 nk = (*state)->key[pd->sidx]; 4901 else 4902 nk = (*state)->key[pd->didx]; 4903 4904 afto = pd->af != nk->af; 4905 sidx = afto ? pd->didx : pd->sidx; 4906 didx = afto ? pd->sidx : pd->didx; 4907 4908 #ifdef INET6 4909 if (afto) { 4910 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 4911 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 4912 pd->naf = nk->af; 4913 action = PF_AFRT; 4914 } 4915 #endif /* INET6 */ 4916 4917 if (!afto) 4918 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 4919 4920 if (pd->sport != NULL) 4921 pf_patch_16(pd, pd->sport, nk->port[sidx]); 4922 4923 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4924 pd->rdomain != nk->rdomain) 4925 pd->destchg = 1; 4926 4927 if (!afto) 4928 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 4929 4930 if (pd->dport != NULL) 4931 pf_patch_16(pd, pd->dport, nk->port[didx]); 4932 4933 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4934 copyback = 1; 4935 } 4936 4937 if (copyback && pd->hdrlen > 0) { 4938 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4939 } 4940 4941 return (action); 4942 } 4943 4944 int 4945 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 4946 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 4947 int icmp_dir, int *iidx, int multi, int inner) 4948 { 4949 int direction, action; 4950 4951 key->af = pd->af; 4952 key->proto = pd->proto; 4953 key->rdomain = pd->rdomain; 4954 if (icmp_dir == PF_IN) { 4955 *iidx = pd->sidx; 4956 key->port[pd->sidx] = icmpid; 4957 key->port[pd->didx] = type; 4958 } else { 4959 *iidx = pd->didx; 4960 key->port[pd->sidx] = type; 4961 key->port[pd->didx] = icmpid; 4962 } 4963 4964 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 4965 pd->dst, pd->af, multi)) 4966 return (PF_DROP); 4967 4968 action = pf_find_state(pd, key, state); 4969 if (action != PF_MATCH) 4970 return (action); 4971 4972 if ((*state)->state_flags & PFSTATE_SLOPPY) 4973 return (-1); 4974 4975 /* Is this ICMP message flowing in right direction? */ 4976 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 4977 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 4978 PF_IN : PF_OUT; 4979 else 4980 direction = (*state)->direction; 4981 if ((((!inner && direction == pd->dir) || 4982 (inner && direction != pd->dir)) ? 4983 PF_IN : PF_OUT) != icmp_dir) { 4984 if (pf_status.debug >= LOG_NOTICE) { 4985 log(LOG_NOTICE, 4986 "pf: icmp type %d in wrong direction (%d): ", 4987 ntohs(type), icmp_dir); 4988 pf_print_state(*state); 4989 addlog("\n"); 4990 } 4991 return (PF_DROP); 4992 } 4993 return (-1); 4994 } 4995 4996 int 4997 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 4998 u_short *reason) 4999 { 5000 u_int16_t virtual_id, virtual_type; 5001 u_int8_t icmptype, icmpcode; 5002 int icmp_dir, iidx, ret, copyback = 0; 5003 5004 struct pf_state_key_cmp key; 5005 5006 switch (pd->proto) { 5007 case IPPROTO_ICMP: 5008 icmptype = pd->hdr.icmp.icmp_type; 5009 icmpcode = pd->hdr.icmp.icmp_code; 5010 break; 5011 #ifdef INET6 5012 case IPPROTO_ICMPV6: 5013 icmptype = pd->hdr.icmp6.icmp6_type; 5014 icmpcode = pd->hdr.icmp6.icmp6_code; 5015 break; 5016 #endif /* INET6 */ 5017 default: 5018 panic("unhandled proto %d", pd->proto); 5019 } 5020 5021 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 5022 &virtual_type) == 0) { 5023 /* 5024 * ICMP query/reply message not related to a TCP/UDP packet. 5025 * Search for an ICMP state. 5026 */ 5027 ret = pf_icmp_state_lookup(pd, &key, state, 5028 virtual_id, virtual_type, icmp_dir, &iidx, 5029 0, 0); 5030 /* IPv6? try matching a multicast address */ 5031 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 5032 ret = pf_icmp_state_lookup(pd, &key, state, virtual_id, 5033 virtual_type, icmp_dir, &iidx, 1, 0); 5034 if (ret >= 0) 5035 return (ret); 5036 5037 (*state)->expire = getuptime(); 5038 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5039 5040 /* translate source/destination address, if necessary */ 5041 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5042 struct pf_state_key *nk; 5043 int afto, sidx, didx; 5044 5045 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5046 nk = (*state)->key[pd->sidx]; 5047 else 5048 nk = (*state)->key[pd->didx]; 5049 5050 afto = pd->af != nk->af; 5051 sidx = afto ? pd->didx : pd->sidx; 5052 didx = afto ? pd->sidx : pd->didx; 5053 iidx = afto ? !iidx : iidx; 5054 #ifdef INET6 5055 if (afto) { 5056 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 5057 nk->af); 5058 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 5059 nk->af); 5060 pd->naf = nk->af; 5061 } 5062 #endif /* INET6 */ 5063 if (!afto) { 5064 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5065 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5066 } 5067 5068 if (pd->rdomain != nk->rdomain) 5069 pd->destchg = 1; 5070 if (!afto && PF_ANEQ(pd->dst, 5071 &nk->addr[didx], pd->af)) 5072 pd->destchg = 1; 5073 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5074 5075 switch (pd->af) { 5076 case AF_INET: 5077 #ifdef INET6 5078 if (afto) { 5079 if (pf_translate_icmp_af(pd, AF_INET6, 5080 &pd->hdr.icmp)) 5081 return (PF_DROP); 5082 pd->proto = IPPROTO_ICMPV6; 5083 } 5084 #endif /* INET6 */ 5085 pf_patch_16(pd, 5086 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 5087 5088 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5089 &pd->hdr.icmp, M_NOWAIT); 5090 copyback = 1; 5091 break; 5092 #ifdef INET6 5093 case AF_INET6: 5094 if (afto) { 5095 if (pf_translate_icmp_af(pd, AF_INET, 5096 &pd->hdr.icmp6)) 5097 return (PF_DROP); 5098 pd->proto = IPPROTO_ICMP; 5099 } 5100 5101 pf_patch_16(pd, 5102 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 5103 5104 m_copyback(pd->m, pd->off, 5105 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5106 M_NOWAIT); 5107 copyback = 1; 5108 break; 5109 #endif /* INET6 */ 5110 } 5111 #ifdef INET6 5112 if (afto) 5113 return (PF_AFRT); 5114 #endif /* INET6 */ 5115 } 5116 } else { 5117 /* 5118 * ICMP error message in response to a TCP/UDP packet. 5119 * Extract the inner TCP/UDP header and search for that state. 5120 */ 5121 struct pf_pdesc pd2; 5122 struct ip h2; 5123 #ifdef INET6 5124 struct ip6_hdr h2_6; 5125 #endif /* INET6 */ 5126 int ipoff2; 5127 5128 /* Initialize pd2 fields valid for both packets with pd. */ 5129 memset(&pd2, 0, sizeof(pd2)); 5130 pd2.af = pd->af; 5131 pd2.dir = pd->dir; 5132 pd2.kif = pd->kif; 5133 pd2.m = pd->m; 5134 pd2.rdomain = pd->rdomain; 5135 /* Payload packet is from the opposite direction. */ 5136 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 5137 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 5138 switch (pd->af) { 5139 case AF_INET: 5140 /* offset of h2 in mbuf chain */ 5141 ipoff2 = pd->off + ICMP_MINLEN; 5142 5143 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 5144 NULL, reason, pd2.af)) { 5145 DPFPRINTF(LOG_NOTICE, 5146 "ICMP error message too short (ip)"); 5147 return (PF_DROP); 5148 } 5149 /* 5150 * ICMP error messages don't refer to non-first 5151 * fragments 5152 */ 5153 if (h2.ip_off & htons(IP_OFFMASK)) { 5154 REASON_SET(reason, PFRES_FRAG); 5155 return (PF_DROP); 5156 } 5157 5158 /* offset of protocol header that follows h2 */ 5159 pd2.off = ipoff2; 5160 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 5161 return (PF_DROP); 5162 5163 pd2.tot_len = ntohs(h2.ip_len); 5164 pd2.src = (struct pf_addr *)&h2.ip_src; 5165 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5166 break; 5167 #ifdef INET6 5168 case AF_INET6: 5169 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 5170 5171 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 5172 NULL, reason, pd2.af)) { 5173 DPFPRINTF(LOG_NOTICE, 5174 "ICMP error message too short (ip6)"); 5175 return (PF_DROP); 5176 } 5177 5178 pd2.off = ipoff2; 5179 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 5180 return (PF_DROP); 5181 5182 pd2.tot_len = ntohs(h2_6.ip6_plen) + 5183 sizeof(struct ip6_hdr); 5184 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5185 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5186 break; 5187 #endif /* INET6 */ 5188 default: 5189 unhandled_af(pd->af); 5190 } 5191 5192 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 5193 if (pf_status.debug >= LOG_NOTICE) { 5194 log(LOG_NOTICE, 5195 "pf: BAD ICMP %d:%d outer dst: ", 5196 icmptype, icmpcode); 5197 pf_print_host(pd->src, 0, pd->af); 5198 addlog(" -> "); 5199 pf_print_host(pd->dst, 0, pd->af); 5200 addlog(" inner src: "); 5201 pf_print_host(pd2.src, 0, pd2.af); 5202 addlog(" -> "); 5203 pf_print_host(pd2.dst, 0, pd2.af); 5204 addlog("\n"); 5205 } 5206 REASON_SET(reason, PFRES_BADSTATE); 5207 return (PF_DROP); 5208 } 5209 5210 switch (pd2.proto) { 5211 case IPPROTO_TCP: { 5212 struct tcphdr *th = &pd2.hdr.tcp; 5213 u_int32_t seq; 5214 struct pf_state_peer *src, *dst; 5215 u_int8_t dws; 5216 int action; 5217 5218 /* 5219 * Only the first 8 bytes of the TCP header can be 5220 * expected. Don't access any TCP header fields after 5221 * th_seq, an ackskew test is not possible. 5222 */ 5223 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason, 5224 pd2.af)) { 5225 DPFPRINTF(LOG_NOTICE, 5226 "ICMP error message too short (tcp)"); 5227 return (PF_DROP); 5228 } 5229 5230 key.af = pd2.af; 5231 key.proto = IPPROTO_TCP; 5232 key.rdomain = pd2.rdomain; 5233 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5234 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5235 key.port[pd2.sidx] = th->th_sport; 5236 key.port[pd2.didx] = th->th_dport; 5237 5238 action = pf_find_state(&pd2, &key, state); 5239 if (action != PF_MATCH) 5240 return (action); 5241 5242 if (pd2.dir == (*state)->direction) { 5243 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5244 src = &(*state)->src; 5245 dst = &(*state)->dst; 5246 } else { 5247 src = &(*state)->dst; 5248 dst = &(*state)->src; 5249 } 5250 } else { 5251 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5252 src = &(*state)->dst; 5253 dst = &(*state)->src; 5254 } else { 5255 src = &(*state)->src; 5256 dst = &(*state)->dst; 5257 } 5258 } 5259 5260 if (src->wscale && dst->wscale) 5261 dws = dst->wscale & PF_WSCALE_MASK; 5262 else 5263 dws = 0; 5264 5265 /* Demodulate sequence number */ 5266 seq = ntohl(th->th_seq) - src->seqdiff; 5267 if (src->seqdiff) { 5268 pf_patch_32(pd, &th->th_seq, htonl(seq)); 5269 copyback = 1; 5270 } 5271 5272 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5273 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5274 src->seqlo - (dst->max_win << dws)))) { 5275 if (pf_status.debug >= LOG_NOTICE) { 5276 log(LOG_NOTICE, 5277 "pf: BAD ICMP %d:%d ", 5278 icmptype, icmpcode); 5279 pf_print_host(pd->src, 0, pd->af); 5280 addlog(" -> "); 5281 pf_print_host(pd->dst, 0, pd->af); 5282 addlog(" state: "); 5283 pf_print_state(*state); 5284 addlog(" seq=%u\n", seq); 5285 } 5286 REASON_SET(reason, PFRES_BADSTATE); 5287 return (PF_DROP); 5288 } else { 5289 if (pf_status.debug >= LOG_DEBUG) { 5290 log(LOG_DEBUG, 5291 "pf: OK ICMP %d:%d ", 5292 icmptype, icmpcode); 5293 pf_print_host(pd->src, 0, pd->af); 5294 addlog(" -> "); 5295 pf_print_host(pd->dst, 0, pd->af); 5296 addlog(" state: "); 5297 pf_print_state(*state); 5298 addlog(" seq=%u\n", seq); 5299 } 5300 } 5301 5302 /* translate source/destination address, if necessary */ 5303 if ((*state)->key[PF_SK_WIRE] != 5304 (*state)->key[PF_SK_STACK]) { 5305 struct pf_state_key *nk; 5306 int afto, sidx, didx; 5307 5308 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5309 nk = (*state)->key[pd->sidx]; 5310 else 5311 nk = (*state)->key[pd->didx]; 5312 5313 afto = pd->af != nk->af; 5314 sidx = afto ? pd2.didx : pd2.sidx; 5315 didx = afto ? pd2.sidx : pd2.didx; 5316 5317 #ifdef INET6 5318 if (afto) { 5319 if (pf_translate_icmp_af(pd, nk->af, 5320 &pd->hdr.icmp)) 5321 return (PF_DROP); 5322 m_copyback(pd->m, pd->off, 5323 sizeof(struct icmp6_hdr), 5324 &pd->hdr.icmp6, M_NOWAIT); 5325 if (pf_change_icmp_af(pd->m, ipoff2, 5326 pd, &pd2, &nk->addr[sidx], 5327 &nk->addr[didx], pd->af, nk->af)) 5328 return (PF_DROP); 5329 if (nk->af == AF_INET) 5330 pd->proto = IPPROTO_ICMP; 5331 else 5332 pd->proto = IPPROTO_ICMPV6; 5333 pd->m->m_pkthdr.ph_rtableid = 5334 nk->rdomain; 5335 pd->destchg = 1; 5336 pf_addrcpy(&pd->nsaddr, 5337 &nk->addr[pd2.sidx], nk->af); 5338 pf_addrcpy(&pd->ndaddr, 5339 &nk->addr[pd2.didx], nk->af); 5340 pd->naf = nk->af; 5341 5342 pf_patch_16(pd, 5343 &th->th_sport, nk->port[sidx]); 5344 pf_patch_16(pd, 5345 &th->th_dport, nk->port[didx]); 5346 5347 m_copyback(pd2.m, pd2.off, 8, th, 5348 M_NOWAIT); 5349 return (PF_AFRT); 5350 } 5351 #endif /* INET6 */ 5352 if (PF_ANEQ(pd2.src, 5353 &nk->addr[pd2.sidx], pd2.af) || 5354 nk->port[pd2.sidx] != th->th_sport) 5355 pf_translate_icmp(pd, pd2.src, 5356 &th->th_sport, pd->dst, 5357 &nk->addr[pd2.sidx], 5358 nk->port[pd2.sidx]); 5359 5360 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5361 pd2.af) || pd2.rdomain != nk->rdomain) 5362 pd->destchg = 1; 5363 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5364 5365 if (PF_ANEQ(pd2.dst, 5366 &nk->addr[pd2.didx], pd2.af) || 5367 nk->port[pd2.didx] != th->th_dport) 5368 pf_translate_icmp(pd, pd2.dst, 5369 &th->th_dport, pd->src, 5370 &nk->addr[pd2.didx], 5371 nk->port[pd2.didx]); 5372 copyback = 1; 5373 } 5374 5375 if (copyback) { 5376 switch (pd2.af) { 5377 case AF_INET: 5378 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5379 &pd->hdr.icmp, M_NOWAIT); 5380 m_copyback(pd2.m, ipoff2, sizeof(h2), 5381 &h2, M_NOWAIT); 5382 break; 5383 #ifdef INET6 5384 case AF_INET6: 5385 m_copyback(pd->m, pd->off, 5386 sizeof(struct icmp6_hdr), 5387 &pd->hdr.icmp6, M_NOWAIT); 5388 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5389 &h2_6, M_NOWAIT); 5390 break; 5391 #endif /* INET6 */ 5392 } 5393 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 5394 } 5395 break; 5396 } 5397 case IPPROTO_UDP: { 5398 struct udphdr *uh = &pd2.hdr.udp; 5399 int action; 5400 5401 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 5402 NULL, reason, pd2.af)) { 5403 DPFPRINTF(LOG_NOTICE, 5404 "ICMP error message too short (udp)"); 5405 return (PF_DROP); 5406 } 5407 5408 key.af = pd2.af; 5409 key.proto = IPPROTO_UDP; 5410 key.rdomain = pd2.rdomain; 5411 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5412 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5413 key.port[pd2.sidx] = uh->uh_sport; 5414 key.port[pd2.didx] = uh->uh_dport; 5415 5416 action = pf_find_state(&pd2, &key, state); 5417 if (action != PF_MATCH) 5418 return (action); 5419 5420 /* translate source/destination address, if necessary */ 5421 if ((*state)->key[PF_SK_WIRE] != 5422 (*state)->key[PF_SK_STACK]) { 5423 struct pf_state_key *nk; 5424 int afto, sidx, didx; 5425 5426 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5427 nk = (*state)->key[pd->sidx]; 5428 else 5429 nk = (*state)->key[pd->didx]; 5430 5431 afto = pd->af != nk->af; 5432 sidx = afto ? pd2.didx : pd2.sidx; 5433 didx = afto ? pd2.sidx : pd2.didx; 5434 5435 #ifdef INET6 5436 if (afto) { 5437 if (pf_translate_icmp_af(pd, nk->af, 5438 &pd->hdr.icmp)) 5439 return (PF_DROP); 5440 m_copyback(pd->m, pd->off, 5441 sizeof(struct icmp6_hdr), 5442 &pd->hdr.icmp6, M_NOWAIT); 5443 if (pf_change_icmp_af(pd->m, ipoff2, 5444 pd, &pd2, &nk->addr[sidx], 5445 &nk->addr[didx], pd->af, nk->af)) 5446 return (PF_DROP); 5447 if (nk->af == AF_INET) 5448 pd->proto = IPPROTO_ICMP; 5449 else 5450 pd->proto = IPPROTO_ICMPV6; 5451 pd->m->m_pkthdr.ph_rtableid = 5452 nk->rdomain; 5453 pd->destchg = 1; 5454 pf_addrcpy(&pd->nsaddr, 5455 &nk->addr[pd2.sidx], nk->af); 5456 pf_addrcpy(&pd->ndaddr, 5457 &nk->addr[pd2.didx], nk->af); 5458 pd->naf = nk->af; 5459 5460 pf_patch_16(pd, 5461 &uh->uh_sport, nk->port[sidx]); 5462 pf_patch_16(pd, 5463 &uh->uh_dport, nk->port[didx]); 5464 5465 m_copyback(pd2.m, pd2.off, sizeof(*uh), 5466 uh, M_NOWAIT); 5467 return (PF_AFRT); 5468 } 5469 #endif /* INET6 */ 5470 5471 if (PF_ANEQ(pd2.src, 5472 &nk->addr[pd2.sidx], pd2.af) || 5473 nk->port[pd2.sidx] != uh->uh_sport) 5474 pf_translate_icmp(pd, pd2.src, 5475 &uh->uh_sport, pd->dst, 5476 &nk->addr[pd2.sidx], 5477 nk->port[pd2.sidx]); 5478 5479 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5480 pd2.af) || pd2.rdomain != nk->rdomain) 5481 pd->destchg = 1; 5482 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5483 5484 if (PF_ANEQ(pd2.dst, 5485 &nk->addr[pd2.didx], pd2.af) || 5486 nk->port[pd2.didx] != uh->uh_dport) 5487 pf_translate_icmp(pd, pd2.dst, 5488 &uh->uh_dport, pd->src, 5489 &nk->addr[pd2.didx], 5490 nk->port[pd2.didx]); 5491 5492 switch (pd2.af) { 5493 case AF_INET: 5494 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5495 &pd->hdr.icmp, M_NOWAIT); 5496 m_copyback(pd2.m, ipoff2, sizeof(h2), 5497 &h2, M_NOWAIT); 5498 break; 5499 #ifdef INET6 5500 case AF_INET6: 5501 m_copyback(pd->m, pd->off, 5502 sizeof(struct icmp6_hdr), 5503 &pd->hdr.icmp6, M_NOWAIT); 5504 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5505 &h2_6, M_NOWAIT); 5506 break; 5507 #endif /* INET6 */ 5508 } 5509 /* Avoid recomputing quoted UDP checksum. 5510 * note: udp6 0 csum invalid per rfc2460 p27. 5511 * but presumed nothing cares in this context */ 5512 pf_patch_16(pd, &uh->uh_sum, 0); 5513 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 5514 M_NOWAIT); 5515 copyback = 1; 5516 } 5517 break; 5518 } 5519 case IPPROTO_ICMP: { 5520 struct icmp *iih = &pd2.hdr.icmp; 5521 5522 if (pd2.af != AF_INET) { 5523 REASON_SET(reason, PFRES_NORM); 5524 return (PF_DROP); 5525 } 5526 5527 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 5528 NULL, reason, pd2.af)) { 5529 DPFPRINTF(LOG_NOTICE, 5530 "ICMP error message too short (icmp)"); 5531 return (PF_DROP); 5532 } 5533 5534 pf_icmp_mapping(&pd2, iih->icmp_type, 5535 &icmp_dir, &virtual_id, &virtual_type); 5536 5537 ret = pf_icmp_state_lookup(&pd2, &key, state, 5538 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5539 if (ret >= 0) 5540 return (ret); 5541 5542 /* translate source/destination address, if necessary */ 5543 if ((*state)->key[PF_SK_WIRE] != 5544 (*state)->key[PF_SK_STACK]) { 5545 struct pf_state_key *nk; 5546 int afto, sidx, didx; 5547 5548 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5549 nk = (*state)->key[pd->sidx]; 5550 else 5551 nk = (*state)->key[pd->didx]; 5552 5553 afto = pd->af != nk->af; 5554 sidx = afto ? pd2.didx : pd2.sidx; 5555 didx = afto ? pd2.sidx : pd2.didx; 5556 iidx = afto ? !iidx : iidx; 5557 5558 #ifdef INET6 5559 if (afto) { 5560 if (nk->af != AF_INET6) 5561 return (PF_DROP); 5562 if (pf_translate_icmp_af(pd, nk->af, 5563 &pd->hdr.icmp)) 5564 return (PF_DROP); 5565 m_copyback(pd->m, pd->off, 5566 sizeof(struct icmp6_hdr), 5567 &pd->hdr.icmp6, M_NOWAIT); 5568 if (pf_change_icmp_af(pd->m, ipoff2, 5569 pd, &pd2, &nk->addr[sidx], 5570 &nk->addr[didx], pd->af, nk->af)) 5571 return (PF_DROP); 5572 pd->proto = IPPROTO_ICMPV6; 5573 if (pf_translate_icmp_af(pd, 5574 nk->af, iih)) 5575 return (PF_DROP); 5576 if (virtual_type == htons(ICMP_ECHO)) 5577 pf_patch_16(pd, &iih->icmp_id, 5578 nk->port[iidx]); 5579 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5580 iih, M_NOWAIT); 5581 pd->m->m_pkthdr.ph_rtableid = 5582 nk->rdomain; 5583 pd->destchg = 1; 5584 pf_addrcpy(&pd->nsaddr, 5585 &nk->addr[pd2.sidx], nk->af); 5586 pf_addrcpy(&pd->ndaddr, 5587 &nk->addr[pd2.didx], nk->af); 5588 pd->naf = nk->af; 5589 return (PF_AFRT); 5590 } 5591 #endif /* INET6 */ 5592 5593 if (PF_ANEQ(pd2.src, 5594 &nk->addr[pd2.sidx], pd2.af) || 5595 (virtual_type == htons(ICMP_ECHO) && 5596 nk->port[iidx] != iih->icmp_id)) 5597 pf_translate_icmp(pd, pd2.src, 5598 (virtual_type == htons(ICMP_ECHO)) ? 5599 &iih->icmp_id : NULL, 5600 pd->dst, &nk->addr[pd2.sidx], 5601 (virtual_type == htons(ICMP_ECHO)) ? 5602 nk->port[iidx] : 0); 5603 5604 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5605 pd2.af) || pd2.rdomain != nk->rdomain) 5606 pd->destchg = 1; 5607 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5608 5609 if (PF_ANEQ(pd2.dst, 5610 &nk->addr[pd2.didx], pd2.af)) 5611 pf_translate_icmp(pd, pd2.dst, NULL, 5612 pd->src, &nk->addr[pd2.didx], 0); 5613 5614 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5615 &pd->hdr.icmp, M_NOWAIT); 5616 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5617 M_NOWAIT); 5618 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 5619 M_NOWAIT); 5620 copyback = 1; 5621 } 5622 break; 5623 } 5624 #ifdef INET6 5625 case IPPROTO_ICMPV6: { 5626 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 5627 5628 if (pd2.af != AF_INET6) { 5629 REASON_SET(reason, PFRES_NORM); 5630 return (PF_DROP); 5631 } 5632 5633 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 5634 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5635 DPFPRINTF(LOG_NOTICE, 5636 "ICMP error message too short (icmp6)"); 5637 return (PF_DROP); 5638 } 5639 5640 pf_icmp_mapping(&pd2, iih->icmp6_type, 5641 &icmp_dir, &virtual_id, &virtual_type); 5642 ret = pf_icmp_state_lookup(&pd2, &key, state, 5643 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5644 /* IPv6? try matching a multicast address */ 5645 if (ret == PF_DROP && pd2.af == AF_INET6 && 5646 icmp_dir == PF_OUT) 5647 ret = pf_icmp_state_lookup(&pd2, &key, state, 5648 virtual_id, virtual_type, icmp_dir, &iidx, 5649 1, 1); 5650 if (ret >= 0) 5651 return (ret); 5652 5653 /* translate source/destination address, if necessary */ 5654 if ((*state)->key[PF_SK_WIRE] != 5655 (*state)->key[PF_SK_STACK]) { 5656 struct pf_state_key *nk; 5657 int afto, sidx, didx; 5658 5659 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5660 nk = (*state)->key[pd->sidx]; 5661 else 5662 nk = (*state)->key[pd->didx]; 5663 5664 afto = pd->af != nk->af; 5665 sidx = afto ? pd2.didx : pd2.sidx; 5666 didx = afto ? pd2.sidx : pd2.didx; 5667 iidx = afto ? !iidx : iidx; 5668 5669 if (afto) { 5670 if (nk->af != AF_INET) 5671 return (PF_DROP); 5672 if (pf_translate_icmp_af(pd, nk->af, 5673 &pd->hdr.icmp)) 5674 return (PF_DROP); 5675 m_copyback(pd->m, pd->off, 5676 sizeof(struct icmp6_hdr), 5677 &pd->hdr.icmp6, M_NOWAIT); 5678 if (pf_change_icmp_af(pd->m, ipoff2, 5679 pd, &pd2, &nk->addr[sidx], 5680 &nk->addr[didx], pd->af, nk->af)) 5681 return (PF_DROP); 5682 pd->proto = IPPROTO_ICMP; 5683 if (pf_translate_icmp_af(pd, 5684 nk->af, iih)) 5685 return (PF_DROP); 5686 if (virtual_type == 5687 htons(ICMP6_ECHO_REQUEST)) 5688 pf_patch_16(pd, &iih->icmp6_id, 5689 nk->port[iidx]); 5690 m_copyback(pd2.m, pd2.off, 5691 sizeof(struct icmp6_hdr), iih, 5692 M_NOWAIT); 5693 pd->m->m_pkthdr.ph_rtableid = 5694 nk->rdomain; 5695 pd->destchg = 1; 5696 pf_addrcpy(&pd->nsaddr, 5697 &nk->addr[pd2.sidx], nk->af); 5698 pf_addrcpy(&pd->ndaddr, 5699 &nk->addr[pd2.didx], nk->af); 5700 pd->naf = nk->af; 5701 return (PF_AFRT); 5702 } 5703 5704 if (PF_ANEQ(pd2.src, 5705 &nk->addr[pd2.sidx], pd2.af) || 5706 ((virtual_type == 5707 htons(ICMP6_ECHO_REQUEST)) && 5708 nk->port[pd2.sidx] != iih->icmp6_id)) 5709 pf_translate_icmp(pd, pd2.src, 5710 (virtual_type == 5711 htons(ICMP6_ECHO_REQUEST)) 5712 ? &iih->icmp6_id : NULL, 5713 pd->dst, &nk->addr[pd2.sidx], 5714 (virtual_type == 5715 htons(ICMP6_ECHO_REQUEST)) 5716 ? nk->port[iidx] : 0); 5717 5718 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5719 pd2.af) || pd2.rdomain != nk->rdomain) 5720 pd->destchg = 1; 5721 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5722 5723 if (PF_ANEQ(pd2.dst, 5724 &nk->addr[pd2.didx], pd2.af)) 5725 pf_translate_icmp(pd, pd2.dst, NULL, 5726 pd->src, &nk->addr[pd2.didx], 0); 5727 5728 m_copyback(pd->m, pd->off, 5729 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5730 M_NOWAIT); 5731 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5732 M_NOWAIT); 5733 m_copyback(pd2.m, pd2.off, 5734 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 5735 copyback = 1; 5736 } 5737 break; 5738 } 5739 #endif /* INET6 */ 5740 default: { 5741 int action; 5742 5743 key.af = pd2.af; 5744 key.proto = pd2.proto; 5745 key.rdomain = pd2.rdomain; 5746 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5747 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5748 key.port[0] = key.port[1] = 0; 5749 5750 action = pf_find_state(&pd2, &key, state); 5751 if (action != PF_MATCH) 5752 return (action); 5753 5754 /* translate source/destination address, if necessary */ 5755 if ((*state)->key[PF_SK_WIRE] != 5756 (*state)->key[PF_SK_STACK]) { 5757 struct pf_state_key *nk = 5758 (*state)->key[pd->didx]; 5759 5760 if (PF_ANEQ(pd2.src, 5761 &nk->addr[pd2.sidx], pd2.af)) 5762 pf_translate_icmp(pd, pd2.src, NULL, 5763 pd->dst, &nk->addr[pd2.sidx], 0); 5764 5765 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5766 pd2.af) || pd2.rdomain != nk->rdomain) 5767 pd->destchg = 1; 5768 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5769 5770 if (PF_ANEQ(pd2.dst, 5771 &nk->addr[pd2.didx], pd2.af)) 5772 pf_translate_icmp(pd, pd2.dst, NULL, 5773 pd->src, &nk->addr[pd2.didx], 0); 5774 5775 switch (pd2.af) { 5776 case AF_INET: 5777 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5778 &pd->hdr.icmp, M_NOWAIT); 5779 m_copyback(pd2.m, ipoff2, sizeof(h2), 5780 &h2, M_NOWAIT); 5781 break; 5782 #ifdef INET6 5783 case AF_INET6: 5784 m_copyback(pd->m, pd->off, 5785 sizeof(struct icmp6_hdr), 5786 &pd->hdr.icmp6, M_NOWAIT); 5787 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5788 &h2_6, M_NOWAIT); 5789 break; 5790 #endif /* INET6 */ 5791 } 5792 copyback = 1; 5793 } 5794 break; 5795 } 5796 } 5797 } 5798 if (copyback) { 5799 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5800 } 5801 5802 return (PF_PASS); 5803 } 5804 5805 /* 5806 * ipoff and off are measured from the start of the mbuf chain. 5807 * h must be at "ipoff" on the mbuf chain. 5808 */ 5809 void * 5810 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5811 u_short *actionp, u_short *reasonp, sa_family_t af) 5812 { 5813 int iplen = 0; 5814 5815 switch (af) { 5816 case AF_INET: { 5817 struct ip *h = mtod(m, struct ip *); 5818 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5819 5820 if (fragoff) { 5821 if (fragoff >= len) 5822 ACTION_SET(actionp, PF_PASS); 5823 else { 5824 ACTION_SET(actionp, PF_DROP); 5825 REASON_SET(reasonp, PFRES_FRAG); 5826 } 5827 return (NULL); 5828 } 5829 iplen = ntohs(h->ip_len); 5830 break; 5831 } 5832 #ifdef INET6 5833 case AF_INET6: { 5834 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5835 5836 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5837 break; 5838 } 5839 #endif /* INET6 */ 5840 } 5841 if (m->m_pkthdr.len < off + len || iplen < off + len) { 5842 ACTION_SET(actionp, PF_DROP); 5843 REASON_SET(reasonp, PFRES_SHORT); 5844 return (NULL); 5845 } 5846 m_copydata(m, off, len, p); 5847 return (p); 5848 } 5849 5850 int 5851 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5852 int rtableid) 5853 { 5854 struct sockaddr_storage ss; 5855 struct sockaddr_in *dst; 5856 int ret = 1; 5857 int check_mpath; 5858 #ifdef INET6 5859 struct sockaddr_in6 *dst6; 5860 #endif /* INET6 */ 5861 struct rtentry *rt = NULL; 5862 5863 check_mpath = 0; 5864 memset(&ss, 0, sizeof(ss)); 5865 switch (af) { 5866 case AF_INET: 5867 dst = (struct sockaddr_in *)&ss; 5868 dst->sin_family = AF_INET; 5869 dst->sin_len = sizeof(*dst); 5870 dst->sin_addr = addr->v4; 5871 if (ipmultipath) 5872 check_mpath = 1; 5873 break; 5874 #ifdef INET6 5875 case AF_INET6: 5876 /* 5877 * Skip check for addresses with embedded interface scope, 5878 * as they would always match anyway. 5879 */ 5880 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5881 goto out; 5882 dst6 = (struct sockaddr_in6 *)&ss; 5883 dst6->sin6_family = AF_INET6; 5884 dst6->sin6_len = sizeof(*dst6); 5885 dst6->sin6_addr = addr->v6; 5886 if (ip6_multipath) 5887 check_mpath = 1; 5888 break; 5889 #endif /* INET6 */ 5890 } 5891 5892 /* Skip checks for ipsec interfaces */ 5893 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5894 goto out; 5895 5896 rt = rtalloc(sstosa(&ss), 0, rtableid); 5897 if (rt != NULL) { 5898 /* No interface given, this is a no-route check */ 5899 if (kif == NULL) 5900 goto out; 5901 5902 if (kif->pfik_ifp == NULL) { 5903 ret = 0; 5904 goto out; 5905 } 5906 5907 /* Perform uRPF check if passed input interface */ 5908 ret = 0; 5909 do { 5910 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 5911 ret = 1; 5912 #if NCARP > 0 5913 } else { 5914 struct ifnet *ifp; 5915 5916 ifp = if_get(rt->rt_ifidx); 5917 if (ifp != NULL && ifp->if_type == IFT_CARP && 5918 ifp->if_carpdevidx == 5919 kif->pfik_ifp->if_index) 5920 ret = 1; 5921 if_put(ifp); 5922 #endif /* NCARP */ 5923 } 5924 5925 rt = rtable_iterate(rt); 5926 } while (check_mpath == 1 && rt != NULL && ret == 0); 5927 } else 5928 ret = 0; 5929 out: 5930 rtfree(rt); 5931 return (ret); 5932 } 5933 5934 int 5935 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 5936 int rtableid) 5937 { 5938 struct sockaddr_storage ss; 5939 struct sockaddr_in *dst; 5940 #ifdef INET6 5941 struct sockaddr_in6 *dst6; 5942 #endif /* INET6 */ 5943 struct rtentry *rt; 5944 int ret = 0; 5945 5946 memset(&ss, 0, sizeof(ss)); 5947 switch (af) { 5948 case AF_INET: 5949 dst = (struct sockaddr_in *)&ss; 5950 dst->sin_family = AF_INET; 5951 dst->sin_len = sizeof(*dst); 5952 dst->sin_addr = addr->v4; 5953 break; 5954 #ifdef INET6 5955 case AF_INET6: 5956 dst6 = (struct sockaddr_in6 *)&ss; 5957 dst6->sin6_family = AF_INET6; 5958 dst6->sin6_len = sizeof(*dst6); 5959 dst6->sin6_addr = addr->v6; 5960 break; 5961 #endif /* INET6 */ 5962 } 5963 5964 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 5965 if (rt != NULL) { 5966 if (rt->rt_labelid == aw->v.rtlabel) 5967 ret = 1; 5968 rtfree(rt); 5969 } 5970 5971 return (ret); 5972 } 5973 5974 /* pf_route() may change pd->m, adjust local copies after calling */ 5975 void 5976 pf_route(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s) 5977 { 5978 struct mbuf *m0, *m1; 5979 struct sockaddr_in *dst, sin; 5980 struct rtentry *rt = NULL; 5981 struct ip *ip; 5982 struct ifnet *ifp = NULL; 5983 struct pf_addr naddr; 5984 struct pf_src_node *sns[PF_SN_MAX]; 5985 int error = 0; 5986 unsigned int rtableid; 5987 5988 if (pd->m->m_pkthdr.pf.routed++ > 3) { 5989 m_freem(pd->m); 5990 pd->m = NULL; 5991 return; 5992 } 5993 5994 if (r->rt == PF_DUPTO) { 5995 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 5996 return; 5997 } else { 5998 if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir)) 5999 return; 6000 m0 = pd->m; 6001 } 6002 6003 if (m0->m_len < sizeof(struct ip)) { 6004 DPFPRINTF(LOG_ERR, 6005 "%s: m0->m_len < sizeof(struct ip)", __func__); 6006 goto bad; 6007 } 6008 6009 ip = mtod(m0, struct ip *); 6010 6011 memset(&sin, 0, sizeof(sin)); 6012 dst = &sin; 6013 dst->sin_family = AF_INET; 6014 dst->sin_len = sizeof(*dst); 6015 dst->sin_addr = ip->ip_dst; 6016 rtableid = m0->m_pkthdr.ph_rtableid; 6017 6018 if (pd->dir == PF_IN) { 6019 if (ip->ip_ttl <= IPTTLDEC) { 6020 if (r->rt != PF_DUPTO) 6021 pf_send_icmp(m0, ICMP_TIMXCEED, 6022 ICMP_TIMXCEED_INTRANS, 0, 6023 pd->af, r, pd->rdomain); 6024 goto bad; 6025 } 6026 ip->ip_ttl -= IPTTLDEC; 6027 } 6028 6029 if (s == NULL) { 6030 memset(sns, 0, sizeof(sns)); 6031 if (pf_map_addr(AF_INET, r, 6032 (struct pf_addr *)&ip->ip_src, 6033 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 6034 DPFPRINTF(LOG_ERR, 6035 "%s: pf_map_addr() failed", __func__); 6036 goto bad; 6037 } 6038 6039 if (!PF_AZERO(&naddr, AF_INET)) 6040 dst->sin_addr.s_addr = naddr.v4.s_addr; 6041 ifp = r->route.kif ? 6042 r->route.kif->pfik_ifp : NULL; 6043 } else { 6044 if (!PF_AZERO(&s->rt_addr, AF_INET)) 6045 dst->sin_addr.s_addr = 6046 s->rt_addr.v4.s_addr; 6047 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6048 } 6049 if (ifp == NULL) 6050 goto bad; 6051 6052 if (pd->kif->pfik_ifp != ifp) { 6053 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 6054 goto bad; 6055 else if (m0 == NULL) 6056 goto done; 6057 if (m0->m_len < sizeof(struct ip)) { 6058 DPFPRINTF(LOG_ERR, 6059 "%s: m0->m_len < sizeof(struct ip)", __func__); 6060 goto bad; 6061 } 6062 ip = mtod(m0, struct ip *); 6063 } 6064 6065 rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); 6066 if (!rtisvalid(rt)) { 6067 ipstat_inc(ips_noroute); 6068 goto bad; 6069 } 6070 /* A locally generated packet may have invalid source address. */ 6071 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 6072 (ifp->if_flags & IFF_LOOPBACK) == 0) 6073 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 6074 6075 in_proto_cksum_out(m0, ifp); 6076 6077 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 6078 ip->ip_sum = 0; 6079 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) 6080 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 6081 else { 6082 ipstat_inc(ips_outswcsum); 6083 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6084 } 6085 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6086 goto done; 6087 } 6088 6089 /* 6090 * Too large for interface; fragment if possible. 6091 * Must be able to put at least 8 bytes per fragment. 6092 */ 6093 if (ip->ip_off & htons(IP_DF)) { 6094 ipstat_inc(ips_cantfrag); 6095 if (r->rt != PF_DUPTO) 6096 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 6097 ifp->if_mtu, pd->af, r, pd->rdomain); 6098 goto bad; 6099 } 6100 6101 m1 = m0; 6102 error = ip_fragment(m0, ifp, ifp->if_mtu); 6103 if (error) { 6104 m0 = NULL; 6105 goto bad; 6106 } 6107 6108 for (m0 = m1; m0; m0 = m1) { 6109 m1 = m0->m_nextpkt; 6110 m0->m_nextpkt = 0; 6111 if (error == 0) 6112 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6113 else 6114 m_freem(m0); 6115 } 6116 6117 if (error == 0) 6118 ipstat_inc(ips_fragmented); 6119 6120 done: 6121 if (r->rt != PF_DUPTO) 6122 pd->m = NULL; 6123 rtfree(rt); 6124 return; 6125 6126 bad: 6127 m_freem(m0); 6128 goto done; 6129 } 6130 6131 #ifdef INET6 6132 /* pf_route6() may change pd->m, adjust local copies after calling */ 6133 void 6134 pf_route6(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s) 6135 { 6136 struct mbuf *m0; 6137 struct sockaddr_in6 *dst, sin6; 6138 struct rtentry *rt = NULL; 6139 struct ip6_hdr *ip6; 6140 struct ifnet *ifp = NULL; 6141 struct pf_addr naddr; 6142 struct pf_src_node *sns[PF_SN_MAX]; 6143 struct m_tag *mtag; 6144 unsigned int rtableid; 6145 6146 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6147 m_freem(pd->m); 6148 pd->m = NULL; 6149 return; 6150 } 6151 6152 if (r->rt == PF_DUPTO) { 6153 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6154 return; 6155 } else { 6156 if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir)) 6157 return; 6158 m0 = pd->m; 6159 } 6160 6161 if (m0->m_len < sizeof(struct ip6_hdr)) { 6162 DPFPRINTF(LOG_ERR, 6163 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6164 goto bad; 6165 } 6166 ip6 = mtod(m0, struct ip6_hdr *); 6167 6168 memset(&sin6, 0, sizeof(sin6)); 6169 dst = &sin6; 6170 dst->sin6_family = AF_INET6; 6171 dst->sin6_len = sizeof(*dst); 6172 dst->sin6_addr = ip6->ip6_dst; 6173 rtableid = m0->m_pkthdr.ph_rtableid; 6174 6175 if (pd->dir == PF_IN) { 6176 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 6177 if (r->rt != PF_DUPTO) 6178 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 6179 ICMP6_TIME_EXCEED_TRANSIT, 0, 6180 pd->af, r, pd->rdomain); 6181 goto bad; 6182 } 6183 ip6->ip6_hlim -= IPV6_HLIMDEC; 6184 } 6185 6186 if (s == NULL) { 6187 memset(sns, 0, sizeof(sns)); 6188 if (pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 6189 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 6190 DPFPRINTF(LOG_ERR, 6191 "%s: pf_map_addr() failed", __func__); 6192 goto bad; 6193 } 6194 if (!PF_AZERO(&naddr, AF_INET6)) 6195 pf_addrcpy((struct pf_addr *)&dst->sin6_addr, 6196 &naddr, AF_INET6); 6197 ifp = r->route.kif ? r->route.kif->pfik_ifp : NULL; 6198 } else { 6199 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 6200 pf_addrcpy((struct pf_addr *)&dst->sin6_addr, 6201 &s->rt_addr, AF_INET6); 6202 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6203 } 6204 if (ifp == NULL) 6205 goto bad; 6206 6207 if (pd->kif->pfik_ifp != ifp) { 6208 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6209 goto bad; 6210 else if (m0 == NULL) 6211 goto done; 6212 if (m0->m_len < sizeof(struct ip6_hdr)) { 6213 DPFPRINTF(LOG_ERR, 6214 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6215 goto bad; 6216 } 6217 } 6218 6219 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 6220 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 6221 rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid); 6222 if (!rtisvalid(rt)) { 6223 ip6stat_inc(ip6s_noroute); 6224 goto bad; 6225 } 6226 /* A locally generated packet may have invalid source address. */ 6227 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 6228 (ifp->if_flags & IFF_LOOPBACK) == 0) 6229 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 6230 6231 in6_proto_cksum_out(m0, ifp); 6232 6233 /* 6234 * If packet has been reassembled by PF earlier, we have to 6235 * use pf_refragment6() here to turn it back to fragments. 6236 */ 6237 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6238 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 6239 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6240 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 6241 } else { 6242 ip6stat_inc(ip6s_cantfrag); 6243 if (r->rt != PF_DUPTO) 6244 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 6245 ifp->if_mtu, pd->af, r, pd->rdomain); 6246 goto bad; 6247 } 6248 6249 done: 6250 if (r->rt != PF_DUPTO) 6251 pd->m = NULL; 6252 rtfree(rt); 6253 return; 6254 6255 bad: 6256 m_freem(m0); 6257 goto done; 6258 } 6259 #endif /* INET6 */ 6260 6261 /* 6262 * check TCP checksum and set mbuf flag 6263 * off is the offset where the protocol header starts 6264 * len is the total length of protocol header plus payload 6265 * returns 0 when the checksum is valid, otherwise returns 1. 6266 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6267 */ 6268 int 6269 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6270 { 6271 u_int16_t sum; 6272 6273 if (m->m_pkthdr.csum_flags & 6274 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6275 return (0); 6276 } 6277 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6278 off < sizeof(struct ip) || 6279 m->m_pkthdr.len < off + len) { 6280 return (1); 6281 } 6282 6283 /* need to do it in software */ 6284 tcpstat_inc(tcps_inswcsum); 6285 6286 switch (af) { 6287 case AF_INET: 6288 if (m->m_len < sizeof(struct ip)) 6289 return (1); 6290 6291 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6292 break; 6293 #ifdef INET6 6294 case AF_INET6: 6295 if (m->m_len < sizeof(struct ip6_hdr)) 6296 return (1); 6297 6298 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6299 break; 6300 #endif /* INET6 */ 6301 default: 6302 unhandled_af(af); 6303 } 6304 if (sum) { 6305 tcpstat_inc(tcps_rcvbadsum); 6306 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6307 return (1); 6308 } 6309 6310 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6311 return (0); 6312 } 6313 6314 struct pf_divert * 6315 pf_find_divert(struct mbuf *m) 6316 { 6317 struct m_tag *mtag; 6318 6319 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6320 return (NULL); 6321 6322 return ((struct pf_divert *)(mtag + 1)); 6323 } 6324 6325 struct pf_divert * 6326 pf_get_divert(struct mbuf *m) 6327 { 6328 struct m_tag *mtag; 6329 6330 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6331 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6332 M_NOWAIT); 6333 if (mtag == NULL) 6334 return (NULL); 6335 memset(mtag + 1, 0, sizeof(struct pf_divert)); 6336 m_tag_prepend(m, mtag); 6337 } 6338 6339 return ((struct pf_divert *)(mtag + 1)); 6340 } 6341 6342 int 6343 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 6344 { 6345 struct ip6_ext ext; 6346 u_int32_t hlen, end; 6347 int hdr_cnt; 6348 6349 hlen = h->ip_hl << 2; 6350 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 6351 REASON_SET(reason, PFRES_SHORT); 6352 return (PF_DROP); 6353 } 6354 if (hlen != sizeof(struct ip)) 6355 pd->badopts++; 6356 end = pd->off + ntohs(h->ip_len); 6357 pd->off += hlen; 6358 pd->proto = h->ip_p; 6359 /* stop walking over non initial fragments */ 6360 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 6361 return (PF_PASS); 6362 6363 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6364 switch (pd->proto) { 6365 case IPPROTO_AH: 6366 /* fragments may be short */ 6367 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 6368 end < pd->off + sizeof(ext)) 6369 return (PF_PASS); 6370 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6371 NULL, reason, AF_INET)) { 6372 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 6373 return (PF_DROP); 6374 } 6375 pd->off += (ext.ip6e_len + 2) * 4; 6376 pd->proto = ext.ip6e_nxt; 6377 break; 6378 default: 6379 return (PF_PASS); 6380 } 6381 } 6382 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 6383 REASON_SET(reason, PFRES_IPOPTIONS); 6384 return (PF_DROP); 6385 } 6386 6387 #ifdef INET6 6388 int 6389 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6390 u_short *reason) 6391 { 6392 struct ip6_opt opt; 6393 struct ip6_opt_jumbo jumbo; 6394 6395 while (off < end) { 6396 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6397 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6398 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6399 return (PF_DROP); 6400 } 6401 if (opt.ip6o_type == IP6OPT_PAD1) { 6402 off++; 6403 continue; 6404 } 6405 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6406 NULL, reason, AF_INET6)) { 6407 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6408 return (PF_DROP); 6409 } 6410 if (off + sizeof(opt) + opt.ip6o_len > end) { 6411 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6412 REASON_SET(reason, PFRES_IPOPTIONS); 6413 return (PF_DROP); 6414 } 6415 switch (opt.ip6o_type) { 6416 case IP6OPT_JUMBO: 6417 if (pd->jumbolen != 0) { 6418 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6419 REASON_SET(reason, PFRES_IPOPTIONS); 6420 return (PF_DROP); 6421 } 6422 if (ntohs(h->ip6_plen) != 0) { 6423 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6424 REASON_SET(reason, PFRES_IPOPTIONS); 6425 return (PF_DROP); 6426 } 6427 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6428 NULL, reason, AF_INET6)) { 6429 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6430 return (PF_DROP); 6431 } 6432 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6433 sizeof(pd->jumbolen)); 6434 pd->jumbolen = ntohl(pd->jumbolen); 6435 if (pd->jumbolen < IPV6_MAXPACKET) { 6436 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6437 REASON_SET(reason, PFRES_IPOPTIONS); 6438 return (PF_DROP); 6439 } 6440 break; 6441 default: 6442 break; 6443 } 6444 off += sizeof(opt) + opt.ip6o_len; 6445 } 6446 6447 return (PF_PASS); 6448 } 6449 6450 int 6451 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6452 { 6453 struct ip6_frag frag; 6454 struct ip6_ext ext; 6455 struct ip6_rthdr rthdr; 6456 u_int32_t end; 6457 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 6458 6459 pd->off += sizeof(struct ip6_hdr); 6460 end = pd->off + ntohs(h->ip6_plen); 6461 pd->fragoff = pd->extoff = pd->jumbolen = 0; 6462 pd->proto = h->ip6_nxt; 6463 6464 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6465 switch (pd->proto) { 6466 case IPPROTO_ROUTING: 6467 case IPPROTO_HOPOPTS: 6468 case IPPROTO_DSTOPTS: 6469 pd->badopts++; 6470 break; 6471 } 6472 switch (pd->proto) { 6473 case IPPROTO_FRAGMENT: 6474 if (fraghdr_cnt++) { 6475 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 6476 REASON_SET(reason, PFRES_FRAG); 6477 return (PF_DROP); 6478 } 6479 /* jumbo payload packets cannot be fragmented */ 6480 if (pd->jumbolen != 0) { 6481 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6482 REASON_SET(reason, PFRES_FRAG); 6483 return (PF_DROP); 6484 } 6485 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6486 NULL, reason, AF_INET6)) { 6487 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6488 return (PF_DROP); 6489 } 6490 /* stop walking over non initial fragments */ 6491 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 6492 pd->fragoff = pd->off; 6493 return (PF_PASS); 6494 } 6495 /* RFC6946: reassemble only non atomic fragments */ 6496 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 6497 pd->fragoff = pd->off; 6498 pd->off += sizeof(frag); 6499 pd->proto = frag.ip6f_nxt; 6500 break; 6501 case IPPROTO_ROUTING: 6502 if (rthdr_cnt++) { 6503 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6504 REASON_SET(reason, PFRES_IPOPTIONS); 6505 return (PF_DROP); 6506 } 6507 /* fragments may be short */ 6508 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6509 pd->off = pd->fragoff; 6510 pd->proto = IPPROTO_FRAGMENT; 6511 return (PF_PASS); 6512 } 6513 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6514 NULL, reason, AF_INET6)) { 6515 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6516 return (PF_DROP); 6517 } 6518 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6519 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6520 REASON_SET(reason, PFRES_IPOPTIONS); 6521 return (PF_DROP); 6522 } 6523 /* FALLTHROUGH */ 6524 case IPPROTO_HOPOPTS: 6525 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 6526 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 6527 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 6528 REASON_SET(reason, PFRES_IPOPTIONS); 6529 return (PF_DROP); 6530 } 6531 /* FALLTHROUGH */ 6532 case IPPROTO_AH: 6533 case IPPROTO_DSTOPTS: 6534 /* fragments may be short */ 6535 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6536 pd->off = pd->fragoff; 6537 pd->proto = IPPROTO_FRAGMENT; 6538 return (PF_PASS); 6539 } 6540 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6541 NULL, reason, AF_INET6)) { 6542 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6543 return (PF_DROP); 6544 } 6545 /* reassembly needs the ext header before the frag */ 6546 if (pd->fragoff == 0) 6547 pd->extoff = pd->off; 6548 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { 6549 if (pf_walk_option6(pd, h, 6550 pd->off + sizeof(ext), 6551 pd->off + (ext.ip6e_len + 1) * 8, reason) 6552 != PF_PASS) 6553 return (PF_DROP); 6554 if (ntohs(h->ip6_plen) == 0 && 6555 pd->jumbolen != 0) { 6556 DPFPRINTF(LOG_NOTICE, 6557 "IPv6 missing jumbo"); 6558 REASON_SET(reason, PFRES_IPOPTIONS); 6559 return (PF_DROP); 6560 } 6561 } 6562 if (pd->proto == IPPROTO_AH) 6563 pd->off += (ext.ip6e_len + 2) * 4; 6564 else 6565 pd->off += (ext.ip6e_len + 1) * 8; 6566 pd->proto = ext.ip6e_nxt; 6567 break; 6568 case IPPROTO_TCP: 6569 case IPPROTO_UDP: 6570 case IPPROTO_ICMPV6: 6571 /* fragments may be short, ignore inner header then */ 6572 if (pd->fragoff != 0 && end < pd->off + 6573 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6574 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6575 sizeof(struct icmp6_hdr))) { 6576 pd->off = pd->fragoff; 6577 pd->proto = IPPROTO_FRAGMENT; 6578 } 6579 /* FALLTHROUGH */ 6580 default: 6581 return (PF_PASS); 6582 } 6583 } 6584 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 6585 REASON_SET(reason, PFRES_IPOPTIONS); 6586 return (PF_DROP); 6587 } 6588 #endif /* INET6 */ 6589 6590 int 6591 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 6592 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6593 { 6594 memset(pd, 0, sizeof(*pd)); 6595 pd->dir = dir; 6596 pd->kif = kif; /* kif is NULL when called by pflog */ 6597 pd->m = m; 6598 pd->sidx = (dir == PF_IN) ? 0 : 1; 6599 pd->didx = (dir == PF_IN) ? 1 : 0; 6600 pd->af = pd->naf = af; 6601 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 6602 6603 switch (pd->af) { 6604 case AF_INET: { 6605 struct ip *h; 6606 6607 /* Check for illegal packets */ 6608 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6609 REASON_SET(reason, PFRES_SHORT); 6610 return (PF_DROP); 6611 } 6612 6613 h = mtod(pd->m, struct ip *); 6614 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6615 REASON_SET(reason, PFRES_SHORT); 6616 return (PF_DROP); 6617 } 6618 6619 if (pf_walk_header(pd, h, reason) != PF_PASS) 6620 return (PF_DROP); 6621 6622 pd->src = (struct pf_addr *)&h->ip_src; 6623 pd->dst = (struct pf_addr *)&h->ip_dst; 6624 pd->tot_len = ntohs(h->ip_len); 6625 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6626 pd->ttl = h->ip_ttl; 6627 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 6628 PF_VPROTO_FRAGMENT : pd->proto; 6629 6630 break; 6631 } 6632 #ifdef INET6 6633 case AF_INET6: { 6634 struct ip6_hdr *h; 6635 6636 /* Check for illegal packets */ 6637 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6638 REASON_SET(reason, PFRES_SHORT); 6639 return (PF_DROP); 6640 } 6641 6642 h = mtod(pd->m, struct ip6_hdr *); 6643 if (pd->m->m_pkthdr.len < 6644 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6645 REASON_SET(reason, PFRES_SHORT); 6646 return (PF_DROP); 6647 } 6648 6649 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6650 return (PF_DROP); 6651 6652 #if 1 6653 /* 6654 * we do not support jumbogram yet. if we keep going, zero 6655 * ip6_plen will do something bad, so drop the packet for now. 6656 */ 6657 if (pd->jumbolen != 0) { 6658 REASON_SET(reason, PFRES_NORM); 6659 return (PF_DROP); 6660 } 6661 #endif /* 1 */ 6662 6663 pd->src = (struct pf_addr *)&h->ip6_src; 6664 pd->dst = (struct pf_addr *)&h->ip6_dst; 6665 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6666 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 6667 pd->ttl = h->ip6_hlim; 6668 pd->virtual_proto = (pd->fragoff != 0) ? 6669 PF_VPROTO_FRAGMENT : pd->proto; 6670 6671 break; 6672 } 6673 #endif /* INET6 */ 6674 default: 6675 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6676 6677 } 6678 6679 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6680 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6681 6682 switch (pd->virtual_proto) { 6683 case IPPROTO_TCP: { 6684 struct tcphdr *th = &pd->hdr.tcp; 6685 6686 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6687 NULL, reason, pd->af)) 6688 return (PF_DROP); 6689 pd->hdrlen = sizeof(*th); 6690 if (pd->off + (th->th_off << 2) > pd->tot_len || 6691 (th->th_off << 2) < sizeof(struct tcphdr)) { 6692 REASON_SET(reason, PFRES_SHORT); 6693 return (PF_DROP); 6694 } 6695 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6696 pd->sport = &th->th_sport; 6697 pd->dport = &th->th_dport; 6698 pd->pcksum = &th->th_sum; 6699 break; 6700 } 6701 case IPPROTO_UDP: { 6702 struct udphdr *uh = &pd->hdr.udp; 6703 6704 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6705 NULL, reason, pd->af)) 6706 return (PF_DROP); 6707 pd->hdrlen = sizeof(*uh); 6708 if (uh->uh_dport == 0 || 6709 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6710 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6711 REASON_SET(reason, PFRES_SHORT); 6712 return (PF_DROP); 6713 } 6714 pd->sport = &uh->uh_sport; 6715 pd->dport = &uh->uh_dport; 6716 pd->pcksum = &uh->uh_sum; 6717 break; 6718 } 6719 case IPPROTO_ICMP: { 6720 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 6721 NULL, reason, pd->af)) 6722 return (PF_DROP); 6723 pd->hdrlen = ICMP_MINLEN; 6724 if (pd->off + pd->hdrlen > pd->tot_len) { 6725 REASON_SET(reason, PFRES_SHORT); 6726 return (PF_DROP); 6727 } 6728 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 6729 break; 6730 } 6731 #ifdef INET6 6732 case IPPROTO_ICMPV6: { 6733 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6734 6735 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6736 NULL, reason, pd->af)) 6737 return (PF_DROP); 6738 /* ICMP headers we look further into to match state */ 6739 switch (pd->hdr.icmp6.icmp6_type) { 6740 case MLD_LISTENER_QUERY: 6741 case MLD_LISTENER_REPORT: 6742 icmp_hlen = sizeof(struct mld_hdr); 6743 break; 6744 case ND_NEIGHBOR_SOLICIT: 6745 case ND_NEIGHBOR_ADVERT: 6746 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6747 /* FALLTHROUGH */ 6748 case ND_ROUTER_SOLICIT: 6749 case ND_ROUTER_ADVERT: 6750 case ND_REDIRECT: 6751 if (pd->ttl != 255) { 6752 REASON_SET(reason, PFRES_NORM); 6753 return (PF_DROP); 6754 } 6755 break; 6756 } 6757 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6758 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6759 NULL, reason, pd->af)) 6760 return (PF_DROP); 6761 pd->hdrlen = icmp_hlen; 6762 if (pd->off + pd->hdrlen > pd->tot_len) { 6763 REASON_SET(reason, PFRES_SHORT); 6764 return (PF_DROP); 6765 } 6766 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 6767 break; 6768 } 6769 #endif /* INET6 */ 6770 } 6771 6772 if (pd->sport) 6773 pd->osport = pd->nsport = *pd->sport; 6774 if (pd->dport) 6775 pd->odport = pd->ndport = *pd->dport; 6776 6777 return (PF_PASS); 6778 } 6779 6780 void 6781 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6782 struct pf_rule *r, struct pf_rule *a) 6783 { 6784 int dirndx; 6785 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6786 [action != PF_PASS] += pd->tot_len; 6787 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6788 [action != PF_PASS]++; 6789 6790 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6791 dirndx = (pd->dir == PF_OUT); 6792 r->packets[dirndx]++; 6793 r->bytes[dirndx] += pd->tot_len; 6794 if (a != NULL) { 6795 a->packets[dirndx]++; 6796 a->bytes[dirndx] += pd->tot_len; 6797 } 6798 if (s != NULL) { 6799 struct pf_rule_item *ri; 6800 struct pf_sn_item *sni; 6801 6802 SLIST_FOREACH(sni, &s->src_nodes, next) { 6803 sni->sn->packets[dirndx]++; 6804 sni->sn->bytes[dirndx] += pd->tot_len; 6805 } 6806 dirndx = (pd->dir == s->direction) ? 0 : 1; 6807 s->packets[dirndx]++; 6808 s->bytes[dirndx] += pd->tot_len; 6809 6810 SLIST_FOREACH(ri, &s->match_rules, entry) { 6811 ri->r->packets[dirndx]++; 6812 ri->r->bytes[dirndx] += pd->tot_len; 6813 6814 if (ri->r->src.addr.type == PF_ADDR_TABLE) 6815 pfr_update_stats(ri->r->src.addr.p.tbl, 6816 &s->key[(s->direction == PF_IN)]-> 6817 addr[(s->direction == PF_OUT)], 6818 pd, ri->r->action, ri->r->src.neg); 6819 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 6820 pfr_update_stats(ri->r->dst.addr.p.tbl, 6821 &s->key[(s->direction == PF_IN)]-> 6822 addr[(s->direction == PF_IN)], 6823 pd, ri->r->action, ri->r->dst.neg); 6824 } 6825 } 6826 if (r->src.addr.type == PF_ADDR_TABLE) 6827 pfr_update_stats(r->src.addr.p.tbl, 6828 (s == NULL) ? pd->src : 6829 &s->key[(s->direction == PF_IN)]-> 6830 addr[(s->direction == PF_OUT)], 6831 pd, r->action, r->src.neg); 6832 if (r->dst.addr.type == PF_ADDR_TABLE) 6833 pfr_update_stats(r->dst.addr.p.tbl, 6834 (s == NULL) ? pd->dst : 6835 &s->key[(s->direction == PF_IN)]-> 6836 addr[(s->direction == PF_IN)], 6837 pd, r->action, r->dst.neg); 6838 } 6839 } 6840 6841 int 6842 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 6843 { 6844 struct ifnet *ifp0; 6845 struct pfi_kif *kif; 6846 u_short action, reason = 0; 6847 struct pf_rule *a = NULL, *r = &pf_default_rule; 6848 struct pf_state *s = NULL; 6849 struct pf_ruleset *ruleset = NULL; 6850 struct pf_pdesc pd; 6851 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6852 u_int32_t qid, pqid = 0; 6853 int have_pf_lock = 0; 6854 6855 if (!pf_status.running) 6856 return (PF_PASS); 6857 6858 #if NCARP > 0 6859 if (ifp->if_type == IFT_CARP && 6860 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 6861 kif = (struct pfi_kif *)ifp0->if_pf_kif; 6862 if_put(ifp0); 6863 } else 6864 #endif /* NCARP */ 6865 kif = (struct pfi_kif *)ifp->if_pf_kif; 6866 6867 if (kif == NULL) { 6868 DPFPRINTF(LOG_ERR, 6869 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 6870 return (PF_DROP); 6871 } 6872 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6873 return (PF_PASS); 6874 6875 #ifdef DIAGNOSTIC 6876 if (((*m0)->m_flags & M_PKTHDR) == 0) 6877 panic("non-M_PKTHDR is passed to pf_test"); 6878 #endif /* DIAGNOSTIC */ 6879 6880 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 6881 return (PF_PASS); 6882 6883 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) 6884 return (PF_PASS); 6885 6886 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 6887 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 6888 return (PF_PASS); 6889 } 6890 6891 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 6892 if (action != PF_PASS) { 6893 #if NPFLOG > 0 6894 pd.pflog |= PF_LOG_FORCE; 6895 #endif /* NPFLOG > 0 */ 6896 goto done; 6897 } 6898 6899 /* packet normalization and reassembly */ 6900 switch (pd.af) { 6901 case AF_INET: 6902 action = pf_normalize_ip(&pd, &reason); 6903 break; 6904 #ifdef INET6 6905 case AF_INET6: 6906 action = pf_normalize_ip6(&pd, &reason); 6907 break; 6908 #endif /* INET6 */ 6909 } 6910 *m0 = pd.m; 6911 /* if packet sits in reassembly queue, return without error */ 6912 if (pd.m == NULL) 6913 return PF_PASS; 6914 6915 if (action != PF_PASS) { 6916 #if NPFLOG > 0 6917 pd.pflog |= PF_LOG_FORCE; 6918 #endif /* NPFLOG > 0 */ 6919 goto done; 6920 } 6921 6922 /* if packet has been reassembled, update packet description */ 6923 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 6924 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 6925 if (action != PF_PASS) { 6926 #if NPFLOG > 0 6927 pd.pflog |= PF_LOG_FORCE; 6928 #endif /* NPFLOG > 0 */ 6929 goto done; 6930 } 6931 } 6932 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 6933 6934 /* 6935 * Avoid pcb-lookups from the forwarding path. They should never 6936 * match and would cause MP locking problems. 6937 */ 6938 if (fwdir == PF_FWD) { 6939 pd.lookup.done = -1; 6940 pd.lookup.uid = -1; 6941 pd.lookup.gid = -1; 6942 pd.lookup.pid = NO_PID; 6943 } 6944 6945 switch (pd.virtual_proto) { 6946 6947 case PF_VPROTO_FRAGMENT: { 6948 /* 6949 * handle fragments that aren't reassembled by 6950 * normalization 6951 */ 6952 PF_LOCK(); 6953 have_pf_lock = 1; 6954 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); 6955 s = pf_state_ref(s); 6956 if (action != PF_PASS) 6957 REASON_SET(&reason, PFRES_FRAG); 6958 break; 6959 } 6960 6961 case IPPROTO_ICMP: { 6962 if (pd.af != AF_INET) { 6963 action = PF_DROP; 6964 REASON_SET(&reason, PFRES_NORM); 6965 DPFPRINTF(LOG_NOTICE, 6966 "dropping IPv6 packet with ICMPv4 payload"); 6967 break; 6968 } 6969 PF_STATE_ENTER_READ(); 6970 action = pf_test_state_icmp(&pd, &s, &reason); 6971 s = pf_state_ref(s); 6972 PF_STATE_EXIT_READ(); 6973 if (action == PF_PASS || action == PF_AFRT) { 6974 #if NPFSYNC > 0 6975 pfsync_update_state(s, &have_pf_lock); 6976 #endif /* NPFSYNC > 0 */ 6977 r = s->rule.ptr; 6978 a = s->anchor.ptr; 6979 #if NPFLOG > 0 6980 pd.pflog |= s->log; 6981 #endif /* NPFLOG > 0 */ 6982 } else if (s == NULL) { 6983 PF_LOCK(); 6984 have_pf_lock = 1; 6985 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6986 &reason); 6987 s = pf_state_ref(s); 6988 } 6989 break; 6990 } 6991 6992 #ifdef INET6 6993 case IPPROTO_ICMPV6: { 6994 if (pd.af != AF_INET6) { 6995 action = PF_DROP; 6996 REASON_SET(&reason, PFRES_NORM); 6997 DPFPRINTF(LOG_NOTICE, 6998 "dropping IPv4 packet with ICMPv6 payload"); 6999 break; 7000 } 7001 PF_STATE_ENTER_READ(); 7002 action = pf_test_state_icmp(&pd, &s, &reason); 7003 s = pf_state_ref(s); 7004 PF_STATE_EXIT_READ(); 7005 if (action == PF_PASS || action == PF_AFRT) { 7006 #if NPFSYNC > 0 7007 pfsync_update_state(s, &have_pf_lock); 7008 #endif /* NPFSYNC > 0 */ 7009 r = s->rule.ptr; 7010 a = s->anchor.ptr; 7011 #if NPFLOG > 0 7012 pd.pflog |= s->log; 7013 #endif /* NPFLOG > 0 */ 7014 } else if (s == NULL) { 7015 PF_LOCK(); 7016 have_pf_lock = 1; 7017 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7018 &reason); 7019 s = pf_state_ref(s); 7020 } 7021 break; 7022 } 7023 #endif /* INET6 */ 7024 7025 default: 7026 if (pd.virtual_proto == IPPROTO_TCP) { 7027 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & 7028 (TH_SYN|TH_ACK)) == TH_SYN && 7029 pf_synflood_check(&pd)) { 7030 PF_LOCK(); 7031 have_pf_lock = 1; 7032 pf_syncookie_send(&pd); 7033 action = PF_DROP; 7034 break; 7035 } 7036 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 7037 pqid = 1; 7038 action = pf_normalize_tcp(&pd); 7039 if (action == PF_DROP) 7040 break; 7041 } 7042 PF_STATE_ENTER_READ(); 7043 action = pf_test_state(&pd, &s, &reason, 0); 7044 s = pf_state_ref(s); 7045 PF_STATE_EXIT_READ(); 7046 if (s == NULL && action != PF_PASS && action != PF_AFRT && 7047 pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 7048 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 7049 pf_syncookie_validate(&pd)) { 7050 struct mbuf *msyn; 7051 msyn = pf_syncookie_recreate_syn(&pd); 7052 if (msyn) { 7053 action = pf_test(af, fwdir, ifp, &msyn); 7054 m_freem(msyn); 7055 if (action == PF_PASS || action == PF_AFRT) { 7056 PF_STATE_ENTER_READ(); 7057 pf_test_state(&pd, &s, &reason, 1); 7058 s = pf_state_ref(s); 7059 PF_STATE_EXIT_READ(); 7060 if (s == NULL) 7061 return (PF_DROP); 7062 s->src.seqhi = 7063 ntohl(pd.hdr.tcp.th_ack) - 1; 7064 s->src.seqlo = 7065 ntohl(pd.hdr.tcp.th_seq) - 1; 7066 pf_set_protostate(s, PF_PEER_SRC, 7067 PF_TCPS_PROXY_DST); 7068 PF_LOCK(); 7069 have_pf_lock = 1; 7070 action = pf_synproxy(&pd, &s, &reason); 7071 if (action != PF_PASS) { 7072 PF_UNLOCK(); 7073 pf_state_unref(s); 7074 return (action); 7075 } 7076 } 7077 } else 7078 action = PF_DROP; 7079 } 7080 7081 if (action == PF_PASS || action == PF_AFRT) { 7082 #if NPFSYNC > 0 7083 pfsync_update_state(s, &have_pf_lock); 7084 #endif /* NPFSYNC > 0 */ 7085 r = s->rule.ptr; 7086 a = s->anchor.ptr; 7087 #if NPFLOG > 0 7088 pd.pflog |= s->log; 7089 #endif /* NPFLOG > 0 */ 7090 } else if (s == NULL) { 7091 PF_LOCK(); 7092 have_pf_lock = 1; 7093 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7094 &reason); 7095 s = pf_state_ref(s); 7096 } 7097 7098 if (pd.virtual_proto == IPPROTO_TCP) { 7099 if (s) { 7100 if (s->max_mss) 7101 pf_normalize_mss(&pd, s->max_mss); 7102 } else if (r->max_mss) 7103 pf_normalize_mss(&pd, r->max_mss); 7104 } 7105 7106 break; 7107 } 7108 7109 if (have_pf_lock != 0) 7110 PF_UNLOCK(); 7111 7112 /* 7113 * At the moment, we rely on NET_LOCK() to prevent removal of items 7114 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 7115 * to be refcounted when NET_LOCK() is gone. 7116 */ 7117 7118 done: 7119 if (action != PF_DROP) { 7120 if (s) { 7121 /* The non-state case is handled in pf_test_rule() */ 7122 if (action == PF_PASS && pd.badopts && 7123 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 7124 action = PF_DROP; 7125 REASON_SET(&reason, PFRES_IPOPTIONS); 7126 #if NPFLOG > 0 7127 pd.pflog |= PF_LOG_FORCE; 7128 #endif /* NPFLOG > 0 */ 7129 DPFPRINTF(LOG_NOTICE, "dropping packet with " 7130 "ip/ipv6 options in pf_test()"); 7131 } 7132 7133 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 7134 s->set_tos); 7135 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 7136 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7137 qid = s->pqid; 7138 if (s->state_flags & PFSTATE_SETPRIO) 7139 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 7140 } else { 7141 qid = s->qid; 7142 if (s->state_flags & PFSTATE_SETPRIO) 7143 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 7144 } 7145 pd.m->m_pkthdr.pf.delay = s->delay; 7146 } else { 7147 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 7148 r->set_tos); 7149 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7150 qid = r->pqid; 7151 if (r->scrub_flags & PFSTATE_SETPRIO) 7152 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 7153 } else { 7154 qid = r->qid; 7155 if (r->scrub_flags & PFSTATE_SETPRIO) 7156 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 7157 } 7158 pd.m->m_pkthdr.pf.delay = r->delay; 7159 } 7160 } 7161 7162 if (action == PF_PASS && qid) 7163 pd.m->m_pkthdr.pf.qid = qid; 7164 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) 7165 pf_mbuf_link_state_key(pd.m, s->key[PF_SK_STACK]); 7166 if (pd.dir == PF_OUT && 7167 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 7168 s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) 7169 pf_state_key_link_inpcb(s->key[PF_SK_STACK], 7170 pd.m->m_pkthdr.pf.inp); 7171 7172 if (s != NULL && !ISSET(pd.m->m_pkthdr.csum_flags, M_FLOWID)) { 7173 pd.m->m_pkthdr.ph_flowid = bemtoh64(&s->id); 7174 SET(pd.m->m_pkthdr.csum_flags, M_FLOWID); 7175 } 7176 7177 /* 7178 * connections redirected to loopback should not match sockets 7179 * bound specifically to loopback due to security implications, 7180 * see in_pcblookup_listen(). 7181 */ 7182 if (pd.destchg) 7183 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 7184 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 7185 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 7186 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7187 /* We need to redo the route lookup on outgoing routes. */ 7188 if (pd.destchg && pd.dir == PF_OUT) 7189 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 7190 7191 if (pd.dir == PF_IN && action == PF_PASS && 7192 (r->divert.type == PF_DIVERT_TO || 7193 r->divert.type == PF_DIVERT_REPLY)) { 7194 struct pf_divert *divert; 7195 7196 if ((divert = pf_get_divert(pd.m))) { 7197 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7198 divert->addr = r->divert.addr; 7199 divert->port = r->divert.port; 7200 divert->rdomain = pd.rdomain; 7201 divert->type = r->divert.type; 7202 } 7203 } 7204 7205 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 7206 action = PF_DIVERT; 7207 7208 #if NPFLOG > 0 7209 if (pd.pflog) { 7210 struct pf_rule_item *ri; 7211 7212 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 7213 pflog_packet(&pd, reason, r, a, ruleset, NULL); 7214 if (s) { 7215 SLIST_FOREACH(ri, &s->match_rules, entry) 7216 if (ri->r->log & PF_LOG_ALL) 7217 pflog_packet(&pd, reason, ri->r, a, 7218 ruleset, NULL); 7219 } 7220 } 7221 #endif /* NPFLOG > 0 */ 7222 7223 pf_counters_inc(action, &pd, s, r, a); 7224 7225 switch (action) { 7226 case PF_SYNPROXY_DROP: 7227 m_freem(pd.m); 7228 /* FALLTHROUGH */ 7229 case PF_DEFER: 7230 pd.m = NULL; 7231 action = PF_PASS; 7232 break; 7233 case PF_DIVERT: 7234 switch (pd.af) { 7235 case AF_INET: 7236 if (!divert_packet(pd.m, pd.dir, r->divert.port)) 7237 pd.m = NULL; 7238 break; 7239 #ifdef INET6 7240 case AF_INET6: 7241 if (!divert6_packet(pd.m, pd.dir, r->divert.port)) 7242 pd.m = NULL; 7243 break; 7244 #endif /* INET6 */ 7245 } 7246 action = PF_PASS; 7247 break; 7248 #ifdef INET6 7249 case PF_AFRT: 7250 if (pf_translate_af(&pd)) { 7251 action = PF_DROP; 7252 break; 7253 } 7254 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 7255 switch (pd.naf) { 7256 case AF_INET: 7257 if (pd.dir == PF_IN) 7258 ip_forward(pd.m, ifp, NULL, 1); 7259 else 7260 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 7261 break; 7262 case AF_INET6: 7263 if (pd.dir == PF_IN) 7264 ip6_forward(pd.m, NULL, 1); 7265 else 7266 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 7267 break; 7268 } 7269 pd.m = NULL; 7270 action = PF_PASS; 7271 break; 7272 #endif /* INET6 */ 7273 case PF_DROP: 7274 m_freem(pd.m); 7275 pd.m = NULL; 7276 break; 7277 default: 7278 if (r->rt) { 7279 switch (pd.af) { 7280 case AF_INET: 7281 pf_route(&pd, r, s); 7282 break; 7283 #ifdef INET6 7284 case AF_INET6: 7285 pf_route6(&pd, r, s); 7286 break; 7287 #endif /* INET6 */ 7288 } 7289 } 7290 break; 7291 } 7292 7293 #ifdef INET6 7294 /* if reassembled packet passed, create new fragments */ 7295 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 7296 pd.af == AF_INET6) { 7297 struct m_tag *mtag; 7298 7299 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 7300 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 7301 } 7302 #endif /* INET6 */ 7303 if (s && action != PF_DROP) { 7304 if (!s->if_index_in && dir == PF_IN) 7305 s->if_index_in = ifp->if_index; 7306 else if (!s->if_index_out && dir == PF_OUT) 7307 s->if_index_out = ifp->if_index; 7308 } 7309 7310 *m0 = pd.m; 7311 7312 pf_state_unref(s); 7313 7314 return (action); 7315 } 7316 7317 int 7318 pf_ouraddr(struct mbuf *m) 7319 { 7320 struct pf_state_key *sk; 7321 7322 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 7323 return (1); 7324 7325 sk = m->m_pkthdr.pf.statekey; 7326 if (sk != NULL) { 7327 if (sk->inp != NULL) 7328 return (1); 7329 } 7330 7331 return (-1); 7332 } 7333 7334 /* 7335 * must be called whenever any addressing information such as 7336 * address, port, protocol has changed 7337 */ 7338 void 7339 pf_pkt_addr_changed(struct mbuf *m) 7340 { 7341 pf_mbuf_unlink_state_key(m); 7342 pf_mbuf_unlink_inpcb(m); 7343 } 7344 7345 struct inpcb * 7346 pf_inp_lookup(struct mbuf *m) 7347 { 7348 struct inpcb *inp = NULL; 7349 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7350 7351 if (!pf_state_key_isvalid(sk)) 7352 pf_mbuf_unlink_state_key(m); 7353 else 7354 inp = m->m_pkthdr.pf.statekey->inp; 7355 7356 if (inp && inp->inp_pf_sk) 7357 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 7358 7359 return (inp); 7360 } 7361 7362 void 7363 pf_inp_link(struct mbuf *m, struct inpcb *inp) 7364 { 7365 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7366 7367 if (!pf_state_key_isvalid(sk)) { 7368 pf_mbuf_unlink_state_key(m); 7369 return; 7370 } 7371 7372 /* 7373 * we don't need to grab PF-lock here. At worst case we link inp to 7374 * state, which might be just being marked as deleted by another 7375 * thread. 7376 */ 7377 if (inp && !sk->inp && !inp->inp_pf_sk) 7378 pf_state_key_link_inpcb(sk, inp); 7379 7380 /* The statekey has finished finding the inp, it is no longer needed. */ 7381 pf_mbuf_unlink_state_key(m); 7382 } 7383 7384 void 7385 pf_inp_unlink(struct inpcb *inp) 7386 { 7387 pf_inpcb_unlink_state_key(inp); 7388 } 7389 7390 void 7391 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 7392 { 7393 /* Note that sk and skrev may be equal, then we refcount twice. */ 7394 KASSERT(sk->reverse == NULL); 7395 KASSERT(skrev->reverse == NULL); 7396 sk->reverse = pf_state_key_ref(skrev); 7397 skrev->reverse = pf_state_key_ref(sk); 7398 } 7399 7400 #if NPFLOG > 0 7401 void 7402 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 7403 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 7404 { 7405 struct pf_rule_item *ri; 7406 7407 /* if this is the log(matches) rule, packet has been logged already */ 7408 if (rm->log & PF_LOG_MATCHES) 7409 return; 7410 7411 SLIST_FOREACH(ri, matchrules, entry) 7412 if (ri->r->log & PF_LOG_MATCHES) 7413 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 7414 } 7415 #endif /* NPFLOG > 0 */ 7416 7417 struct pf_state_key * 7418 pf_state_key_ref(struct pf_state_key *sk) 7419 { 7420 if (sk != NULL) 7421 PF_REF_TAKE(sk->refcnt); 7422 7423 return (sk); 7424 } 7425 7426 void 7427 pf_state_key_unref(struct pf_state_key *sk) 7428 { 7429 if (PF_REF_RELE(sk->refcnt)) { 7430 /* state key must be removed from tree */ 7431 KASSERT(!pf_state_key_isvalid(sk)); 7432 /* state key must be unlinked from reverse key */ 7433 KASSERT(sk->reverse == NULL); 7434 /* state key must be unlinked from socket */ 7435 KASSERT(sk->inp == NULL); 7436 pool_put(&pf_state_key_pl, sk); 7437 } 7438 } 7439 7440 int 7441 pf_state_key_isvalid(struct pf_state_key *sk) 7442 { 7443 return ((sk != NULL) && (sk->removed == 0)); 7444 } 7445 7446 void 7447 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 7448 { 7449 KASSERT(m->m_pkthdr.pf.statekey == NULL); 7450 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 7451 } 7452 7453 void 7454 pf_mbuf_unlink_state_key(struct mbuf *m) 7455 { 7456 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7457 7458 if (sk != NULL) { 7459 m->m_pkthdr.pf.statekey = NULL; 7460 pf_state_key_unref(sk); 7461 } 7462 } 7463 7464 void 7465 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 7466 { 7467 KASSERT(m->m_pkthdr.pf.inp == NULL); 7468 m->m_pkthdr.pf.inp = in_pcbref(inp); 7469 } 7470 7471 void 7472 pf_mbuf_unlink_inpcb(struct mbuf *m) 7473 { 7474 struct inpcb *inp = m->m_pkthdr.pf.inp; 7475 7476 if (inp != NULL) { 7477 m->m_pkthdr.pf.inp = NULL; 7478 in_pcbunref(inp); 7479 } 7480 } 7481 7482 void 7483 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 7484 { 7485 KASSERT(sk->inp == NULL); 7486 sk->inp = in_pcbref(inp); 7487 KASSERT(inp->inp_pf_sk == NULL); 7488 inp->inp_pf_sk = pf_state_key_ref(sk); 7489 } 7490 7491 void 7492 pf_inpcb_unlink_state_key(struct inpcb *inp) 7493 { 7494 struct pf_state_key *sk = inp->inp_pf_sk; 7495 7496 if (sk != NULL) { 7497 KASSERT(sk->inp == inp); 7498 sk->inp = NULL; 7499 inp->inp_pf_sk = NULL; 7500 pf_state_key_unref(sk); 7501 in_pcbunref(inp); 7502 } 7503 } 7504 7505 void 7506 pf_state_key_unlink_inpcb(struct pf_state_key *sk) 7507 { 7508 struct inpcb *inp = sk->inp; 7509 7510 if (inp != NULL) { 7511 KASSERT(inp->inp_pf_sk == sk); 7512 sk->inp = NULL; 7513 inp->inp_pf_sk = NULL; 7514 pf_state_key_unref(sk); 7515 in_pcbunref(inp); 7516 } 7517 } 7518 7519 void 7520 pf_state_key_unlink_reverse(struct pf_state_key *sk) 7521 { 7522 struct pf_state_key *skrev = sk->reverse; 7523 7524 /* Note that sk and skrev may be equal, then we unref twice. */ 7525 if (skrev != NULL) { 7526 KASSERT(skrev->reverse == sk); 7527 sk->reverse = NULL; 7528 skrev->reverse = NULL; 7529 pf_state_key_unref(skrev); 7530 pf_state_key_unref(sk); 7531 } 7532 } 7533 7534 struct pf_state * 7535 pf_state_ref(struct pf_state *s) 7536 { 7537 if (s != NULL) 7538 PF_REF_TAKE(s->refcnt); 7539 return (s); 7540 } 7541 7542 void 7543 pf_state_unref(struct pf_state *s) 7544 { 7545 if ((s != NULL) && PF_REF_RELE(s->refcnt)) { 7546 /* never inserted or removed */ 7547 #if NPFSYNC > 0 7548 KASSERT((TAILQ_NEXT(s, sync_list) == NULL) || 7549 ((TAILQ_NEXT(s, sync_list) == _Q_INVALID) && 7550 (s->sync_state == PFSYNC_S_NONE))); 7551 #endif /* NPFSYNC */ 7552 KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || 7553 (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); 7554 KASSERT((s->key[PF_SK_WIRE] == NULL) && 7555 (s->key[PF_SK_STACK] == NULL)); 7556 7557 pool_put(&pf_state_pl, s); 7558 } 7559 } 7560 7561 int 7562 pf_delay_pkt(struct mbuf *m, u_int ifidx) 7563 { 7564 struct pf_pktdelay *pdy; 7565 7566 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 7567 m_freem(m); 7568 return (ENOBUFS); 7569 } 7570 pdy->ifidx = ifidx; 7571 pdy->m = m; 7572 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy); 7573 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay); 7574 m->m_pkthdr.pf.delay = 0; 7575 return (0); 7576 } 7577 7578 void 7579 pf_pktenqueue_delayed(void *arg) 7580 { 7581 struct pf_pktdelay *pdy = arg; 7582 struct ifnet *ifp; 7583 7584 ifp = if_get(pdy->ifidx); 7585 if (ifp != NULL) { 7586 if_enqueue(ifp, pdy->m); 7587 if_put(ifp); 7588 } else 7589 m_freem(pdy->m); 7590 7591 pool_put(&pf_pktdelay_pl, pdy); 7592 } 7593