1 /* $OpenBSD: pf.c,v 1.1181 2023/06/05 08:37:27 sashan Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/toeplitz.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/ip_var.h> 70 #include <netinet/ip_icmp.h> 71 #include <netinet/icmp_var.h> 72 #include <netinet/tcp.h> 73 #include <netinet/tcp_seq.h> 74 #include <netinet/tcp_timer.h> 75 #include <netinet/tcp_var.h> 76 #include <netinet/tcp_fsm.h> 77 #include <netinet/udp.h> 78 #include <netinet/udp_var.h> 79 #include <netinet/ip_divert.h> 80 81 #ifdef INET6 82 #include <netinet6/in6_var.h> 83 #include <netinet/ip6.h> 84 #include <netinet6/ip6_var.h> 85 #include <netinet/icmp6.h> 86 #include <netinet6/nd6.h> 87 #include <netinet6/ip6_divert.h> 88 #endif /* INET6 */ 89 90 #include <net/pfvar.h> 91 #include <net/pfvar_priv.h> 92 93 #if NPFLOG > 0 94 #include <net/if_pflog.h> 95 #endif /* NPFLOG > 0 */ 96 97 #if NPFLOW > 0 98 #include <net/if_pflow.h> 99 #endif /* NPFLOW > 0 */ 100 101 #if NPFSYNC > 0 102 #include <net/if_pfsync.h> 103 #else 104 struct pfsync_deferral; 105 #endif /* NPFSYNC > 0 */ 106 107 /* 108 * Global variables 109 */ 110 struct pf_state_tree pf_statetbl; 111 struct pf_queuehead pf_queues[2]; 112 struct pf_queuehead *pf_queues_active; 113 struct pf_queuehead *pf_queues_inactive; 114 115 struct pf_status pf_status; 116 117 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 118 119 SHA2_CTX pf_tcp_secret_ctx; 120 u_char pf_tcp_secret[16]; 121 int pf_tcp_secret_init; 122 int pf_tcp_iss_off; 123 124 int pf_npurge; 125 struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); 126 struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); 127 128 enum pf_test_status { 129 PF_TEST_FAIL = -1, 130 PF_TEST_OK, 131 PF_TEST_QUICK 132 }; 133 134 struct pf_test_ctx { 135 struct pf_pdesc *pd; 136 struct pf_rule_actions act; 137 u_int8_t icmpcode; 138 u_int8_t icmptype; 139 int icmp_dir; 140 int state_icmp; 141 int tag; 142 u_short reason; 143 struct pf_rule_item *ri; 144 struct pf_src_node *sns[PF_SN_MAX]; 145 struct pf_rule_slist rules; 146 struct pf_rule *nr; 147 struct pf_rule **rm; 148 struct pf_rule *a; 149 struct pf_rule **am; 150 struct pf_ruleset **rsm; 151 struct pf_ruleset *arsm; 152 struct pf_ruleset *aruleset; 153 struct tcphdr *th; 154 }; 155 156 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 157 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 158 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 159 160 void pf_add_threshold(struct pf_threshold *); 161 int pf_check_threshold(struct pf_threshold *); 162 int pf_check_tcp_cksum(struct mbuf *, int, int, 163 sa_family_t); 164 __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 165 u_int8_t); 166 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 167 const struct pf_addr *, sa_family_t, u_int8_t); 168 int pf_modulate_sack(struct pf_pdesc *, 169 struct pf_state_peer *); 170 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 171 u_int16_t *, u_int16_t *); 172 int pf_change_icmp_af(struct mbuf *, int, 173 struct pf_pdesc *, struct pf_pdesc *, 174 struct pf_addr *, struct pf_addr *, sa_family_t, 175 sa_family_t); 176 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 177 struct pf_addr *); 178 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 179 u_int16_t *, struct pf_addr *, struct pf_addr *, 180 u_int16_t); 181 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 182 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 183 sa_family_t, struct pf_rule *, u_int); 184 void pf_detach_state(struct pf_state *); 185 struct pf_state_key *pf_state_key_attach(struct pf_state_key *, 186 struct pf_state *, int); 187 void pf_state_key_detach(struct pf_state *, int); 188 u_int32_t pf_tcp_iss(struct pf_pdesc *); 189 void pf_rule_to_actions(struct pf_rule *, 190 struct pf_rule_actions *); 191 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 192 struct pf_state **, struct pf_rule **, 193 struct pf_ruleset **, u_short *, 194 struct pfsync_deferral **); 195 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 196 struct pf_rule *, struct pf_rule *, 197 struct pf_state_key **, struct pf_state_key **, 198 int *, struct pf_state **, int, 199 struct pf_rule_slist *, struct pf_rule_actions *, 200 struct pf_src_node **); 201 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 202 int, struct pf_addr *, int, struct pf_addr *, 203 int, int); 204 int pf_state_key_setup(struct pf_pdesc *, struct 205 pf_state_key **, struct pf_state_key **, int); 206 int pf_tcp_track_full(struct pf_pdesc *, 207 struct pf_state **, u_short *, int *, int); 208 int pf_tcp_track_sloppy(struct pf_pdesc *, 209 struct pf_state **, u_short *); 210 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 211 u_short *); 212 int pf_test_state(struct pf_pdesc *, struct pf_state **, 213 u_short *); 214 int pf_icmp_state_lookup(struct pf_pdesc *, 215 struct pf_state_key_cmp *, struct pf_state **, 216 u_int16_t, u_int16_t, int, int *, int, int); 217 int pf_test_state_icmp(struct pf_pdesc *, 218 struct pf_state **, u_short *); 219 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 220 u_int16_t); 221 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 222 sa_family_t, struct pf_src_node **); 223 struct pf_divert *pf_get_divert(struct mbuf *); 224 int pf_walk_option(struct pf_pdesc *, struct ip *, 225 int, int, u_short *); 226 int pf_walk_header(struct pf_pdesc *, struct ip *, 227 u_short *); 228 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 229 int, int, u_short *); 230 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 231 u_short *); 232 void pf_print_state_parts(struct pf_state *, 233 struct pf_state_key *, struct pf_state_key *); 234 int pf_addr_wrap_neq(struct pf_addr_wrap *, 235 struct pf_addr_wrap *); 236 int pf_compare_state_keys(struct pf_state_key *, 237 struct pf_state_key *, struct pfi_kif *, u_int); 238 u_int16_t pf_pkt_hash(sa_family_t, uint8_t, 239 const struct pf_addr *, const struct pf_addr *, 240 uint16_t, uint16_t); 241 int pf_find_state(struct pf_pdesc *, 242 struct pf_state_key_cmp *, struct pf_state **); 243 int pf_src_connlimit(struct pf_state **); 244 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 245 int pf_step_into_anchor(struct pf_test_ctx *, 246 struct pf_rule *); 247 int pf_match_rule(struct pf_test_ctx *, 248 struct pf_ruleset *); 249 void pf_counters_inc(int, struct pf_pdesc *, 250 struct pf_state *, struct pf_rule *, 251 struct pf_rule *); 252 253 int pf_state_key_isvalid(struct pf_state_key *); 254 struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 255 void pf_state_key_unref(struct pf_state_key *); 256 void pf_state_key_link_reverse(struct pf_state_key *, 257 struct pf_state_key *); 258 void pf_state_key_unlink_reverse(struct pf_state_key *); 259 void pf_state_key_link_inpcb(struct pf_state_key *, 260 struct inpcb *); 261 void pf_state_key_unlink_inpcb(struct pf_state_key *); 262 void pf_inpcb_unlink_state_key(struct inpcb *); 263 void pf_pktenqueue_delayed(void *); 264 int32_t pf_state_expires(const struct pf_state *, uint8_t); 265 266 #if NPFLOG > 0 267 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 268 struct pf_rule *, struct pf_ruleset *, 269 struct pf_rule_slist *); 270 #endif /* NPFLOG > 0 */ 271 272 extern struct pool pfr_ktable_pl; 273 extern struct pool pfr_kentry_pl; 274 275 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 276 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 277 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 278 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 279 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 280 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 281 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS }, 282 { &pf_anchor_pl, PF_ANCHOR_HIWAT, PF_ANCHOR_HIWAT } 283 }; 284 285 #define BOUND_IFACE(r, k) \ 286 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 287 288 #define STATE_INC_COUNTERS(s) \ 289 do { \ 290 struct pf_rule_item *mrm; \ 291 s->rule.ptr->states_cur++; \ 292 s->rule.ptr->states_tot++; \ 293 if (s->anchor.ptr != NULL) { \ 294 s->anchor.ptr->states_cur++; \ 295 s->anchor.ptr->states_tot++; \ 296 } \ 297 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 298 mrm->r->states_cur++; \ 299 } while (0) 300 301 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 302 static inline int pf_state_compare_key(const struct pf_state_key *, 303 const struct pf_state_key *); 304 static inline int pf_state_compare_id(const struct pf_state *, 305 const struct pf_state *); 306 #ifdef INET6 307 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 308 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 309 #endif /* INET6 */ 310 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 311 312 struct pf_src_tree tree_src_tracking; 313 314 struct pf_state_tree_id tree_id; 315 struct pf_state_list pf_state_list = PF_STATE_LIST_INITIALIZER(pf_state_list); 316 317 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 318 RBT_GENERATE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key); 319 RBT_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); 320 321 int 322 pf_addr_compare(const struct pf_addr *a, const struct pf_addr *b, 323 sa_family_t af) 324 { 325 switch (af) { 326 case AF_INET: 327 if (a->addr32[0] > b->addr32[0]) 328 return (1); 329 if (a->addr32[0] < b->addr32[0]) 330 return (-1); 331 break; 332 #ifdef INET6 333 case AF_INET6: 334 if (a->addr32[3] > b->addr32[3]) 335 return (1); 336 if (a->addr32[3] < b->addr32[3]) 337 return (-1); 338 if (a->addr32[2] > b->addr32[2]) 339 return (1); 340 if (a->addr32[2] < b->addr32[2]) 341 return (-1); 342 if (a->addr32[1] > b->addr32[1]) 343 return (1); 344 if (a->addr32[1] < b->addr32[1]) 345 return (-1); 346 if (a->addr32[0] > b->addr32[0]) 347 return (1); 348 if (a->addr32[0] < b->addr32[0]) 349 return (-1); 350 break; 351 #endif /* INET6 */ 352 } 353 return (0); 354 } 355 356 static __inline int 357 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 358 { 359 int diff; 360 361 if (a->rule.ptr > b->rule.ptr) 362 return (1); 363 if (a->rule.ptr < b->rule.ptr) 364 return (-1); 365 if ((diff = a->type - b->type) != 0) 366 return (diff); 367 if ((diff = a->af - b->af) != 0) 368 return (diff); 369 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 370 return (diff); 371 return (0); 372 } 373 374 static __inline void 375 pf_set_protostate(struct pf_state *st, int which, u_int8_t newstate) 376 { 377 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 378 st->dst.state = newstate; 379 if (which == PF_PEER_DST) 380 return; 381 382 if (st->src.state == newstate) 383 return; 384 if (st->creatorid == pf_status.hostid && 385 st->key[PF_SK_STACK]->proto == IPPROTO_TCP && 386 !(TCPS_HAVEESTABLISHED(st->src.state) || 387 st->src.state == TCPS_CLOSED) && 388 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 389 pf_status.states_halfopen--; 390 391 st->src.state = newstate; 392 } 393 394 void 395 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 396 { 397 switch (af) { 398 case AF_INET: 399 dst->addr32[0] = src->addr32[0]; 400 break; 401 #ifdef INET6 402 case AF_INET6: 403 dst->addr32[0] = src->addr32[0]; 404 dst->addr32[1] = src->addr32[1]; 405 dst->addr32[2] = src->addr32[2]; 406 dst->addr32[3] = src->addr32[3]; 407 break; 408 #endif /* INET6 */ 409 default: 410 unhandled_af(af); 411 } 412 } 413 414 void 415 pf_init_threshold(struct pf_threshold *threshold, 416 u_int32_t limit, u_int32_t seconds) 417 { 418 threshold->limit = limit * PF_THRESHOLD_MULT; 419 threshold->seconds = seconds; 420 threshold->count = 0; 421 threshold->last = getuptime(); 422 } 423 424 void 425 pf_add_threshold(struct pf_threshold *threshold) 426 { 427 u_int32_t t = getuptime(), diff = t - threshold->last; 428 429 if (diff >= threshold->seconds) 430 threshold->count = 0; 431 else 432 threshold->count -= threshold->count * diff / 433 threshold->seconds; 434 threshold->count += PF_THRESHOLD_MULT; 435 threshold->last = t; 436 } 437 438 int 439 pf_check_threshold(struct pf_threshold *threshold) 440 { 441 return (threshold->count > threshold->limit); 442 } 443 444 void 445 pf_state_list_insert(struct pf_state_list *pfs, struct pf_state *st) 446 { 447 /* 448 * we can always put states on the end of the list. 449 * 450 * things reading the list should take a read lock, then 451 * the mutex, get the head and tail pointers, release the 452 * mutex, and then they can iterate between the head and tail. 453 */ 454 455 pf_state_ref(st); /* get a ref for the list */ 456 457 mtx_enter(&pfs->pfs_mtx); 458 TAILQ_INSERT_TAIL(&pfs->pfs_list, st, entry_list); 459 mtx_leave(&pfs->pfs_mtx); 460 } 461 462 void 463 pf_state_list_remove(struct pf_state_list *pfs, struct pf_state *st) 464 { 465 /* states can only be removed when the write lock is held */ 466 rw_assert_wrlock(&pfs->pfs_rwl); 467 468 mtx_enter(&pfs->pfs_mtx); 469 TAILQ_REMOVE(&pfs->pfs_list, st, entry_list); 470 mtx_leave(&pfs->pfs_mtx); 471 472 pf_state_unref(st); /* list no longer references the state */ 473 } 474 475 int 476 pf_src_connlimit(struct pf_state **stp) 477 { 478 int bad = 0; 479 struct pf_src_node *sn; 480 481 if ((sn = pf_get_src_node((*stp), PF_SN_NONE)) == NULL) 482 return (0); 483 484 sn->conn++; 485 (*stp)->src.tcp_est = 1; 486 pf_add_threshold(&sn->conn_rate); 487 488 if ((*stp)->rule.ptr->max_src_conn && 489 (*stp)->rule.ptr->max_src_conn < sn->conn) { 490 pf_status.lcounters[LCNT_SRCCONN]++; 491 bad++; 492 } 493 494 if ((*stp)->rule.ptr->max_src_conn_rate.limit && 495 pf_check_threshold(&sn->conn_rate)) { 496 pf_status.lcounters[LCNT_SRCCONNRATE]++; 497 bad++; 498 } 499 500 if (!bad) 501 return (0); 502 503 if ((*stp)->rule.ptr->overload_tbl) { 504 struct pfr_addr p; 505 u_int32_t killed = 0; 506 507 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 508 if (pf_status.debug >= LOG_NOTICE) { 509 log(LOG_NOTICE, 510 "pf: pf_src_connlimit: blocking address "); 511 pf_print_host(&sn->addr, 0, 512 (*stp)->key[PF_SK_WIRE]->af); 513 } 514 515 memset(&p, 0, sizeof(p)); 516 p.pfra_af = (*stp)->key[PF_SK_WIRE]->af; 517 switch ((*stp)->key[PF_SK_WIRE]->af) { 518 case AF_INET: 519 p.pfra_net = 32; 520 p.pfra_ip4addr = sn->addr.v4; 521 break; 522 #ifdef INET6 523 case AF_INET6: 524 p.pfra_net = 128; 525 p.pfra_ip6addr = sn->addr.v6; 526 break; 527 #endif /* INET6 */ 528 } 529 530 pfr_insert_kentry((*stp)->rule.ptr->overload_tbl, 531 &p, gettime()); 532 533 /* kill existing states if that's required. */ 534 if ((*stp)->rule.ptr->flush) { 535 struct pf_state_key *sk; 536 struct pf_state *st; 537 538 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 539 RBT_FOREACH(st, pf_state_tree_id, &tree_id) { 540 sk = st->key[PF_SK_WIRE]; 541 /* 542 * Kill states from this source. (Only those 543 * from the same rule if PF_FLUSH_GLOBAL is not 544 * set) 545 */ 546 if (sk->af == 547 (*stp)->key[PF_SK_WIRE]->af && 548 (((*stp)->direction == PF_OUT && 549 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 550 ((*stp)->direction == PF_IN && 551 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 552 ((*stp)->rule.ptr->flush & 553 PF_FLUSH_GLOBAL || 554 (*stp)->rule.ptr == st->rule.ptr)) { 555 st->timeout = PFTM_PURGE; 556 pf_set_protostate(st, PF_PEER_BOTH, 557 TCPS_CLOSED); 558 killed++; 559 } 560 } 561 if (pf_status.debug >= LOG_NOTICE) 562 addlog(", %u states killed", killed); 563 } 564 if (pf_status.debug >= LOG_NOTICE) 565 addlog("\n"); 566 } 567 568 /* kill this state */ 569 (*stp)->timeout = PFTM_PURGE; 570 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_CLOSED); 571 return (1); 572 } 573 574 int 575 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 576 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 577 struct pf_addr *raddr, struct pfi_kif *kif) 578 { 579 struct pf_src_node k; 580 581 if (*sn == NULL) { 582 k.af = af; 583 k.type = type; 584 pf_addrcpy(&k.addr, src, af); 585 k.rule.ptr = rule; 586 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 587 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 588 } 589 if (*sn == NULL) { 590 if (!rule->max_src_nodes || 591 rule->src_nodes < rule->max_src_nodes) 592 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 593 else 594 pf_status.lcounters[LCNT_SRCNODES]++; 595 if ((*sn) == NULL) 596 return (-1); 597 598 pf_init_threshold(&(*sn)->conn_rate, 599 rule->max_src_conn_rate.limit, 600 rule->max_src_conn_rate.seconds); 601 602 (*sn)->type = type; 603 (*sn)->af = af; 604 (*sn)->rule.ptr = rule; 605 pf_addrcpy(&(*sn)->addr, src, af); 606 if (raddr) 607 pf_addrcpy(&(*sn)->raddr, raddr, af); 608 if (RB_INSERT(pf_src_tree, 609 &tree_src_tracking, *sn) != NULL) { 610 if (pf_status.debug >= LOG_NOTICE) { 611 log(LOG_NOTICE, 612 "pf: src_tree insert failed: "); 613 pf_print_host(&(*sn)->addr, 0, af); 614 addlog("\n"); 615 } 616 pool_put(&pf_src_tree_pl, *sn); 617 return (-1); 618 } 619 (*sn)->creation = getuptime(); 620 (*sn)->rule.ptr->src_nodes++; 621 if (kif != NULL) { 622 (*sn)->kif = kif; 623 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE); 624 } 625 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 626 pf_status.src_nodes++; 627 } else { 628 if (rule->max_src_states && 629 (*sn)->states >= rule->max_src_states) { 630 pf_status.lcounters[LCNT_SRCSTATES]++; 631 return (-1); 632 } 633 } 634 return (0); 635 } 636 637 void 638 pf_remove_src_node(struct pf_src_node *sn) 639 { 640 if (sn->states > 0 || sn->expire > getuptime()) 641 return; 642 643 sn->rule.ptr->src_nodes--; 644 if (sn->rule.ptr->states_cur == 0 && 645 sn->rule.ptr->src_nodes == 0) 646 pf_rm_rule(NULL, sn->rule.ptr); 647 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 648 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 649 pf_status.src_nodes--; 650 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE); 651 pool_put(&pf_src_tree_pl, sn); 652 } 653 654 struct pf_src_node * 655 pf_get_src_node(struct pf_state *st, enum pf_sn_types type) 656 { 657 struct pf_sn_item *sni; 658 659 SLIST_FOREACH(sni, &st->src_nodes, next) 660 if (sni->sn->type == type) 661 return (sni->sn); 662 return (NULL); 663 } 664 665 void 666 pf_state_rm_src_node(struct pf_state *st, struct pf_src_node *sn) 667 { 668 struct pf_sn_item *sni, *snin, *snip = NULL; 669 670 for (sni = SLIST_FIRST(&st->src_nodes); sni; sni = snin) { 671 snin = SLIST_NEXT(sni, next); 672 if (sni->sn == sn) { 673 if (snip) 674 SLIST_REMOVE_AFTER(snip, next); 675 else 676 SLIST_REMOVE_HEAD(&st->src_nodes, next); 677 pool_put(&pf_sn_item_pl, sni); 678 sni = NULL; 679 sn->states--; 680 } 681 if (sni != NULL) 682 snip = sni; 683 } 684 } 685 686 /* state table stuff */ 687 688 static inline int 689 pf_state_compare_key(const struct pf_state_key *a, 690 const struct pf_state_key *b) 691 { 692 int diff; 693 694 if ((diff = a->hash - b->hash) != 0) 695 return (diff); 696 if ((diff = a->proto - b->proto) != 0) 697 return (diff); 698 if ((diff = a->af - b->af) != 0) 699 return (diff); 700 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 701 return (diff); 702 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 703 return (diff); 704 if ((diff = a->port[0] - b->port[0]) != 0) 705 return (diff); 706 if ((diff = a->port[1] - b->port[1]) != 0) 707 return (diff); 708 if ((diff = a->rdomain - b->rdomain) != 0) 709 return (diff); 710 return (0); 711 } 712 713 static inline int 714 pf_state_compare_id(const struct pf_state *a, const struct pf_state *b) 715 { 716 if (a->id > b->id) 717 return (1); 718 if (a->id < b->id) 719 return (-1); 720 if (a->creatorid > b->creatorid) 721 return (1); 722 if (a->creatorid < b->creatorid) 723 return (-1); 724 725 return (0); 726 } 727 728 /* 729 * on failure, pf_state_key_attach() releases the pf_state_key 730 * reference and returns NULL. 731 */ 732 struct pf_state_key * 733 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *st, int idx) 734 { 735 struct pf_state_item *si; 736 struct pf_state_key *cur; 737 struct pf_state *oldst = NULL; 738 739 PF_ASSERT_LOCKED(); 740 741 KASSERT(st->key[idx] == NULL); 742 sk->sk_removed = 0; 743 cur = RBT_INSERT(pf_state_tree, &pf_statetbl, sk); 744 if (cur != NULL) { 745 sk->sk_removed = 1; 746 /* key exists. check for same kif, if none, add to key */ 747 TAILQ_FOREACH(si, &cur->sk_states, si_entry) { 748 struct pf_state *sist = si->si_st; 749 if (sist->kif == st->kif && 750 ((sist->key[PF_SK_WIRE]->af == sk->af && 751 sist->direction == st->direction) || 752 (sist->key[PF_SK_WIRE]->af != 753 sist->key[PF_SK_STACK]->af && 754 sk->af == sist->key[PF_SK_STACK]->af && 755 sist->direction != st->direction))) { 756 int reuse = 0; 757 758 if (sk->proto == IPPROTO_TCP && 759 sist->src.state >= TCPS_FIN_WAIT_2 && 760 sist->dst.state >= TCPS_FIN_WAIT_2) 761 reuse = 1; 762 if (pf_status.debug >= LOG_NOTICE) { 763 log(LOG_NOTICE, 764 "pf: %s key attach %s on %s: ", 765 (idx == PF_SK_WIRE) ? 766 "wire" : "stack", 767 reuse ? "reuse" : "failed", 768 st->kif->pfik_name); 769 pf_print_state_parts(st, 770 (idx == PF_SK_WIRE) ? sk : NULL, 771 (idx == PF_SK_STACK) ? sk : NULL); 772 addlog(", existing: "); 773 pf_print_state_parts(sist, 774 (idx == PF_SK_WIRE) ? sk : NULL, 775 (idx == PF_SK_STACK) ? sk : NULL); 776 addlog("\n"); 777 } 778 if (reuse) { 779 pf_set_protostate(sist, PF_PEER_BOTH, 780 TCPS_CLOSED); 781 /* remove late or sks can go away */ 782 oldst = sist; 783 } else { 784 pf_state_key_unref(sk); 785 return (NULL); /* collision! */ 786 } 787 } 788 } 789 790 /* reuse the existing state key */ 791 pf_state_key_unref(sk); 792 sk = cur; 793 } 794 795 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 796 if (TAILQ_EMPTY(&sk->sk_states)) { 797 KASSERT(cur == NULL); 798 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk); 799 sk->sk_removed = 1; 800 pf_state_key_unref(sk); 801 } 802 803 return (NULL); 804 } 805 806 st->key[idx] = pf_state_key_ref(sk); /* give a ref to state */ 807 si->si_st = pf_state_ref(st); 808 809 /* list is sorted, if-bound states before floating */ 810 if (st->kif == pfi_all) 811 TAILQ_INSERT_TAIL(&sk->sk_states, si, si_entry); 812 else 813 TAILQ_INSERT_HEAD(&sk->sk_states, si, si_entry); 814 815 if (oldst) 816 pf_remove_state(oldst); 817 818 /* caller owns the pf_state ref, which owns a pf_state_key ref now */ 819 return (sk); 820 } 821 822 void 823 pf_detach_state(struct pf_state *st) 824 { 825 KASSERT(st->key[PF_SK_WIRE] != NULL); 826 pf_state_key_detach(st, PF_SK_WIRE); 827 828 KASSERT(st->key[PF_SK_STACK] != NULL); 829 if (st->key[PF_SK_STACK] != st->key[PF_SK_WIRE]) 830 pf_state_key_detach(st, PF_SK_STACK); 831 } 832 833 void 834 pf_state_key_detach(struct pf_state *st, int idx) 835 { 836 struct pf_state_item *si; 837 struct pf_state_key *sk; 838 839 PF_ASSERT_LOCKED(); 840 841 sk = st->key[idx]; 842 if (sk == NULL) 843 return; 844 845 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 846 if (si->si_st == st) 847 break; 848 } 849 if (si == NULL) 850 return; 851 852 TAILQ_REMOVE(&sk->sk_states, si, si_entry); 853 pool_put(&pf_state_item_pl, si); 854 855 if (TAILQ_EMPTY(&sk->sk_states)) { 856 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk); 857 sk->sk_removed = 1; 858 pf_state_key_unlink_reverse(sk); 859 pf_state_key_unlink_inpcb(sk); 860 pf_state_key_unref(sk); 861 } 862 863 pf_state_unref(st); 864 } 865 866 struct pf_state_key * 867 pf_alloc_state_key(int pool_flags) 868 { 869 struct pf_state_key *sk; 870 871 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 872 return (NULL); 873 874 PF_REF_INIT(sk->sk_refcnt); 875 TAILQ_INIT(&sk->sk_states); 876 sk->sk_removed = 1; 877 878 return (sk); 879 } 880 881 static __inline int 882 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 883 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 884 { 885 struct pf_state_key_cmp *key = arg; 886 #ifdef INET6 887 struct pf_addr *target; 888 889 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 890 goto copy; 891 892 switch (pd->hdr.icmp6.icmp6_type) { 893 case ND_NEIGHBOR_SOLICIT: 894 if (multi) 895 return (-1); 896 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 897 daddr = target; 898 break; 899 case ND_NEIGHBOR_ADVERT: 900 if (multi) 901 return (-1); 902 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 903 saddr = target; 904 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 905 key->addr[didx].addr32[0] = 0; 906 key->addr[didx].addr32[1] = 0; 907 key->addr[didx].addr32[2] = 0; 908 key->addr[didx].addr32[3] = 0; 909 daddr = NULL; /* overwritten */ 910 } 911 break; 912 default: 913 if (multi) { 914 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 915 key->addr[sidx].addr32[1] = 0; 916 key->addr[sidx].addr32[2] = 0; 917 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 918 saddr = NULL; /* overwritten */ 919 } 920 } 921 copy: 922 #endif /* INET6 */ 923 if (saddr) 924 pf_addrcpy(&key->addr[sidx], saddr, af); 925 if (daddr) 926 pf_addrcpy(&key->addr[didx], daddr, af); 927 928 return (0); 929 } 930 931 int 932 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 933 struct pf_state_key **sks, int rtableid) 934 { 935 /* if returning error we MUST pool_put state keys ourselves */ 936 struct pf_state_key *sk1, *sk2; 937 u_int wrdom = pd->rdomain; 938 int afto = pd->af != pd->naf; 939 940 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 941 return (ENOMEM); 942 943 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 944 pd->af, 0); 945 sk1->port[pd->sidx] = pd->osport; 946 sk1->port[pd->didx] = pd->odport; 947 sk1->proto = pd->proto; 948 sk1->af = pd->af; 949 sk1->rdomain = pd->rdomain; 950 sk1->hash = pf_pkt_hash(sk1->af, sk1->proto, 951 &sk1->addr[0], &sk1->addr[1], sk1->port[0], sk1->port[1]); 952 if (rtableid >= 0) 953 wrdom = rtable_l2(rtableid); 954 955 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 956 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 957 pd->nsport != pd->osport || pd->ndport != pd->odport || 958 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 959 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 960 pf_state_key_unref(sk1); 961 return (ENOMEM); 962 } 963 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 964 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 965 pd->naf, 0); 966 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 967 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 968 if (afto) { 969 switch (pd->proto) { 970 case IPPROTO_ICMP: 971 sk2->proto = IPPROTO_ICMPV6; 972 break; 973 case IPPROTO_ICMPV6: 974 sk2->proto = IPPROTO_ICMP; 975 break; 976 default: 977 sk2->proto = pd->proto; 978 } 979 } else 980 sk2->proto = pd->proto; 981 sk2->af = pd->naf; 982 sk2->rdomain = wrdom; 983 sk2->hash = pf_pkt_hash(sk2->af, sk2->proto, 984 &sk2->addr[0], &sk2->addr[1], sk2->port[0], sk2->port[1]); 985 } else 986 sk2 = pf_state_key_ref(sk1); 987 988 if (pd->dir == PF_IN) { 989 *skw = sk1; 990 *sks = sk2; 991 } else { 992 *sks = sk1; 993 *skw = sk2; 994 } 995 996 if (pf_status.debug >= LOG_DEBUG) { 997 log(LOG_DEBUG, "pf: key setup: "); 998 pf_print_state_parts(NULL, *skw, *sks); 999 addlog("\n"); 1000 } 1001 1002 return (0); 1003 } 1004 1005 /* 1006 * pf_state_insert() does the following: 1007 * - links the pf_state up with pf_state_key(s). 1008 * - inserts the pf_state_keys into pf_state_tree. 1009 * - inserts the pf_state into the into pf_state_tree_id. 1010 * - tells pfsync about the state. 1011 * 1012 * pf_state_insert() owns the references to the pf_state_key structs 1013 * it is given. on failure to insert, these references are released. 1014 * on success, the caller owns a pf_state reference that allows it 1015 * to access the state keys. 1016 */ 1017 1018 int 1019 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp, 1020 struct pf_state_key **sksp, struct pf_state *st) 1021 { 1022 struct pf_state_key *skw = *skwp; 1023 struct pf_state_key *sks = *sksp; 1024 int same = (skw == sks); 1025 1026 PF_ASSERT_LOCKED(); 1027 1028 st->kif = kif; 1029 PF_STATE_ENTER_WRITE(); 1030 1031 skw = pf_state_key_attach(skw, st, PF_SK_WIRE); 1032 if (skw == NULL) { 1033 pf_state_key_unref(sks); 1034 PF_STATE_EXIT_WRITE(); 1035 return (-1); 1036 } 1037 1038 if (same) { 1039 /* pf_state_key_attach might have swapped skw */ 1040 pf_state_key_unref(sks); 1041 st->key[PF_SK_STACK] = sks = pf_state_key_ref(skw); 1042 } else if (pf_state_key_attach(sks, st, PF_SK_STACK) == NULL) { 1043 pf_state_key_detach(st, PF_SK_WIRE); 1044 PF_STATE_EXIT_WRITE(); 1045 return (-1); 1046 } 1047 1048 if (st->id == 0 && st->creatorid == 0) { 1049 st->id = htobe64(pf_status.stateid++); 1050 st->creatorid = pf_status.hostid; 1051 } 1052 if (RBT_INSERT(pf_state_tree_id, &tree_id, st) != NULL) { 1053 if (pf_status.debug >= LOG_NOTICE) { 1054 log(LOG_NOTICE, "pf: state insert failed: " 1055 "id: %016llx creatorid: %08x", 1056 betoh64(st->id), ntohl(st->creatorid)); 1057 addlog("\n"); 1058 } 1059 pf_detach_state(st); 1060 PF_STATE_EXIT_WRITE(); 1061 return (-1); 1062 } 1063 pf_state_list_insert(&pf_state_list, st); 1064 pf_status.fcounters[FCNT_STATE_INSERT]++; 1065 pf_status.states++; 1066 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 1067 #if NPFSYNC > 0 1068 pfsync_insert_state(st); 1069 #endif /* NPFSYNC > 0 */ 1070 PF_STATE_EXIT_WRITE(); 1071 1072 *skwp = skw; 1073 *sksp = sks; 1074 1075 return (0); 1076 } 1077 1078 struct pf_state * 1079 pf_find_state_byid(struct pf_state_cmp *key) 1080 { 1081 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1082 1083 return (RBT_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 1084 } 1085 1086 int 1087 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 1088 struct pfi_kif *kif, u_int dir) 1089 { 1090 /* a (from hdr) and b (new) must be exact opposites of each other */ 1091 if (a->af == b->af && a->proto == b->proto && 1092 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1093 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1094 a->port[0] == b->port[1] && 1095 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1096 return (0); 1097 else { 1098 /* mismatch. must not happen. */ 1099 if (pf_status.debug >= LOG_ERR) { 1100 log(LOG_ERR, 1101 "pf: state key linking mismatch! dir=%s, " 1102 "if=%s, stored af=%u, a0: ", 1103 dir == PF_OUT ? "OUT" : "IN", 1104 kif->pfik_name, a->af); 1105 pf_print_host(&a->addr[0], a->port[0], a->af); 1106 addlog(", a1: "); 1107 pf_print_host(&a->addr[1], a->port[1], a->af); 1108 addlog(", proto=%u", a->proto); 1109 addlog(", found af=%u, a0: ", b->af); 1110 pf_print_host(&b->addr[0], b->port[0], b->af); 1111 addlog(", a1: "); 1112 pf_print_host(&b->addr[1], b->port[1], b->af); 1113 addlog(", proto=%u", b->proto); 1114 addlog("\n"); 1115 } 1116 return (-1); 1117 } 1118 } 1119 1120 int 1121 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1122 struct pf_state **stp) 1123 { 1124 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1125 struct pf_state_item *si; 1126 struct pf_state *st = NULL; 1127 1128 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1129 if (pf_status.debug >= LOG_DEBUG) { 1130 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1131 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1132 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1133 addlog("\n"); 1134 } 1135 1136 inp_sk = NULL; 1137 pkt_sk = NULL; 1138 sk = NULL; 1139 if (pd->dir == PF_OUT) { 1140 /* first if block deals with outbound forwarded packet */ 1141 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1142 1143 if (!pf_state_key_isvalid(pkt_sk)) { 1144 pf_mbuf_unlink_state_key(pd->m); 1145 pkt_sk = NULL; 1146 } 1147 1148 if (pkt_sk && pf_state_key_isvalid(pkt_sk->sk_reverse)) 1149 sk = pkt_sk->sk_reverse; 1150 1151 if (pkt_sk == NULL) { 1152 /* here we deal with local outbound packet */ 1153 if (pd->m->m_pkthdr.pf.inp != NULL) { 1154 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk; 1155 if (pf_state_key_isvalid(inp_sk)) 1156 sk = inp_sk; 1157 else 1158 pf_inpcb_unlink_state_key( 1159 pd->m->m_pkthdr.pf.inp); 1160 } 1161 } 1162 } 1163 1164 if (sk == NULL) { 1165 if ((sk = RBT_FIND(pf_state_tree, &pf_statetbl, 1166 (struct pf_state_key *)key)) == NULL) 1167 return (PF_DROP); 1168 if (pd->dir == PF_OUT && pkt_sk && 1169 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1170 pf_state_key_link_reverse(sk, pkt_sk); 1171 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp && 1172 !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->sk_inp) 1173 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1174 } 1175 1176 /* remove firewall data from outbound packet */ 1177 if (pd->dir == PF_OUT) 1178 pf_pkt_addr_changed(pd->m); 1179 1180 /* list is sorted, if-bound states before floating ones */ 1181 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 1182 struct pf_state *sist = si->si_st; 1183 if (sist->timeout != PFTM_PURGE && 1184 (sist->kif == pfi_all || sist->kif == pd->kif) && 1185 ((sist->key[PF_SK_WIRE]->af == sist->key[PF_SK_STACK]->af && 1186 sk == (pd->dir == PF_IN ? sist->key[PF_SK_WIRE] : 1187 sist->key[PF_SK_STACK])) || 1188 (sist->key[PF_SK_WIRE]->af != sist->key[PF_SK_STACK]->af 1189 && pd->dir == PF_IN && (sk == sist->key[PF_SK_STACK] || 1190 sk == sist->key[PF_SK_WIRE])))) { 1191 st = sist; 1192 break; 1193 } 1194 } 1195 1196 if (st == NULL) 1197 return (PF_DROP); 1198 if (ISSET(st->state_flags, PFSTATE_INP_UNLINKED)) 1199 return (PF_DROP); 1200 1201 if (st->rule.ptr->pktrate.limit && pd->dir == st->direction) { 1202 pf_add_threshold(&st->rule.ptr->pktrate); 1203 if (pf_check_threshold(&st->rule.ptr->pktrate)) 1204 return (PF_DROP); 1205 } 1206 1207 *stp = st; 1208 1209 return (PF_MATCH); 1210 } 1211 1212 struct pf_state * 1213 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1214 { 1215 struct pf_state_key *sk; 1216 struct pf_state_item *si, *ret = NULL; 1217 1218 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1219 1220 sk = RBT_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1221 1222 if (sk != NULL) { 1223 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 1224 struct pf_state *sist = si->si_st; 1225 if (dir == PF_INOUT || 1226 (sk == (dir == PF_IN ? sist->key[PF_SK_WIRE] : 1227 sist->key[PF_SK_STACK]))) { 1228 if (more == NULL) 1229 return (sist); 1230 1231 if (ret) 1232 (*more)++; 1233 else 1234 ret = si; 1235 } 1236 } 1237 } 1238 return (ret ? ret->si_st : NULL); 1239 } 1240 1241 void 1242 pf_state_peer_hton(const struct pf_state_peer *s, struct pfsync_state_peer *d) 1243 { 1244 d->seqlo = htonl(s->seqlo); 1245 d->seqhi = htonl(s->seqhi); 1246 d->seqdiff = htonl(s->seqdiff); 1247 d->max_win = htons(s->max_win); 1248 d->mss = htons(s->mss); 1249 d->state = s->state; 1250 d->wscale = s->wscale; 1251 if (s->scrub) { 1252 d->scrub.pfss_flags = 1253 htons(s->scrub->pfss_flags & PFSS_TIMESTAMP); 1254 d->scrub.pfss_ttl = (s)->scrub->pfss_ttl; 1255 d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod); 1256 d->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; 1257 } 1258 } 1259 1260 void 1261 pf_state_peer_ntoh(const struct pfsync_state_peer *s, struct pf_state_peer *d) 1262 { 1263 d->seqlo = ntohl(s->seqlo); 1264 d->seqhi = ntohl(s->seqhi); 1265 d->seqdiff = ntohl(s->seqdiff); 1266 d->max_win = ntohs(s->max_win); 1267 d->mss = ntohs(s->mss); 1268 d->state = s->state; 1269 d->wscale = s->wscale; 1270 if (s->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && 1271 d->scrub != NULL) { 1272 d->scrub->pfss_flags = 1273 ntohs(s->scrub.pfss_flags) & PFSS_TIMESTAMP; 1274 d->scrub->pfss_ttl = s->scrub.pfss_ttl; 1275 d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod); 1276 } 1277 } 1278 1279 void 1280 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1281 { 1282 int32_t expire; 1283 1284 memset(sp, 0, sizeof(struct pfsync_state)); 1285 1286 /* copy from state key */ 1287 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1288 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1289 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1290 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1291 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1292 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1293 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1294 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1295 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1296 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1297 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1298 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1299 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1300 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1301 sp->proto = st->key[PF_SK_WIRE]->proto; 1302 sp->af = st->key[PF_SK_WIRE]->af; 1303 1304 /* copy from state */ 1305 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1306 sp->rt = st->rt; 1307 sp->rt_addr = st->rt_addr; 1308 sp->creation = htonl(getuptime() - st->creation); 1309 expire = pf_state_expires(st, st->timeout); 1310 if (expire <= getuptime()) 1311 sp->expire = htonl(0); 1312 else 1313 sp->expire = htonl(expire - getuptime()); 1314 1315 sp->direction = st->direction; 1316 #if NPFLOG > 0 1317 sp->log = st->log; 1318 #endif /* NPFLOG > 0 */ 1319 sp->timeout = st->timeout; 1320 sp->state_flags = htons(st->state_flags); 1321 if (!SLIST_EMPTY(&st->src_nodes)) 1322 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1323 1324 sp->id = st->id; 1325 sp->creatorid = st->creatorid; 1326 pf_state_peer_hton(&st->src, &sp->src); 1327 pf_state_peer_hton(&st->dst, &sp->dst); 1328 1329 if (st->rule.ptr == NULL) 1330 sp->rule = htonl(-1); 1331 else 1332 sp->rule = htonl(st->rule.ptr->nr); 1333 if (st->anchor.ptr == NULL) 1334 sp->anchor = htonl(-1); 1335 else 1336 sp->anchor = htonl(st->anchor.ptr->nr); 1337 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1338 1339 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1340 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1341 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1342 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1343 1344 sp->max_mss = htons(st->max_mss); 1345 sp->min_ttl = st->min_ttl; 1346 sp->set_tos = st->set_tos; 1347 sp->set_prio[0] = st->set_prio[0]; 1348 sp->set_prio[1] = st->set_prio[1]; 1349 } 1350 1351 int 1352 pf_state_alloc_scrub_memory(const struct pfsync_state_peer *s, 1353 struct pf_state_peer *d) 1354 { 1355 if (s->scrub.scrub_flag && d->scrub == NULL) 1356 return (pf_normalize_tcp_alloc(d)); 1357 1358 return (0); 1359 } 1360 1361 #if NPFSYNC > 0 1362 int 1363 pf_state_import(const struct pfsync_state *sp, int flags) 1364 { 1365 struct pf_state *st = NULL; 1366 struct pf_state_key *skw = NULL, *sks = NULL; 1367 struct pf_rule *r = NULL; 1368 struct pfi_kif *kif; 1369 int pool_flags; 1370 int error = ENOMEM; 1371 int n = 0; 1372 1373 PF_ASSERT_LOCKED(); 1374 1375 if (sp->creatorid == 0) { 1376 DPFPRINTF(LOG_NOTICE, "%s: invalid creator id: %08x", __func__, 1377 ntohl(sp->creatorid)); 1378 return (EINVAL); 1379 } 1380 1381 if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) { 1382 DPFPRINTF(LOG_NOTICE, "%s: unknown interface: %s", __func__, 1383 sp->ifname); 1384 if (flags & PFSYNC_SI_IOCTL) 1385 return (EINVAL); 1386 return (0); /* skip this state */ 1387 } 1388 1389 if (sp->af == 0) 1390 return (0); /* skip this state */ 1391 1392 /* 1393 * If the ruleset checksums match or the state is coming from the ioctl, 1394 * it's safe to associate the state with the rule of that number. 1395 */ 1396 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 1397 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && 1398 ntohl(sp->rule) < pf_main_ruleset.rules.active.rcount) { 1399 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries) 1400 if (ntohl(sp->rule) == n++) 1401 break; 1402 } else 1403 r = &pf_default_rule; 1404 1405 if ((r->max_states && r->states_cur >= r->max_states)) 1406 goto cleanup; 1407 1408 if (flags & PFSYNC_SI_IOCTL) 1409 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 1410 else 1411 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 1412 1413 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 1414 goto cleanup; 1415 1416 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 1417 goto cleanup; 1418 1419 if ((sp->key[PF_SK_WIRE].af && 1420 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 1421 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 1422 &sp->key[PF_SK_STACK].addr[0], sp->af) || 1423 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 1424 &sp->key[PF_SK_STACK].addr[1], sp->af) || 1425 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 1426 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 1427 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 1428 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 1429 goto cleanup; 1430 } else 1431 sks = pf_state_key_ref(skw); 1432 1433 /* allocate memory for scrub info */ 1434 if (pf_state_alloc_scrub_memory(&sp->src, &st->src) || 1435 pf_state_alloc_scrub_memory(&sp->dst, &st->dst)) 1436 goto cleanup; 1437 1438 /* copy to state key(s) */ 1439 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 1440 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 1441 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 1442 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 1443 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 1444 skw->proto = sp->proto; 1445 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 1446 skw->af = sp->af; 1447 skw->hash = pf_pkt_hash(skw->af, skw->proto, 1448 &skw->addr[0], &skw->addr[1], skw->port[0], skw->port[1]); 1449 1450 if (sks != skw) { 1451 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 1452 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 1453 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 1454 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 1455 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 1456 if (!(sks->af = sp->key[PF_SK_STACK].af)) 1457 sks->af = sp->af; 1458 if (sks->af != skw->af) { 1459 switch (sp->proto) { 1460 case IPPROTO_ICMP: 1461 sks->proto = IPPROTO_ICMPV6; 1462 break; 1463 case IPPROTO_ICMPV6: 1464 sks->proto = IPPROTO_ICMP; 1465 break; 1466 default: 1467 sks->proto = sp->proto; 1468 } 1469 } else 1470 sks->proto = sp->proto; 1471 1472 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) || 1473 ((skw->af != AF_INET) && (skw->af != AF_INET6))) { 1474 error = EINVAL; 1475 goto cleanup; 1476 } 1477 1478 sks->hash = pf_pkt_hash(sks->af, sks->proto, 1479 &sks->addr[0], &sks->addr[1], sks->port[0], sks->port[1]); 1480 1481 } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { 1482 error = EINVAL; 1483 goto cleanup; 1484 } 1485 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 1486 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 1487 1488 /* copy to state */ 1489 st->rt_addr = sp->rt_addr; 1490 st->rt = sp->rt; 1491 st->creation = getuptime() - ntohl(sp->creation); 1492 st->expire = getuptime(); 1493 if (ntohl(sp->expire)) { 1494 u_int32_t timeout; 1495 1496 timeout = r->timeout[sp->timeout]; 1497 if (!timeout) 1498 timeout = pf_default_rule.timeout[sp->timeout]; 1499 1500 /* sp->expire may have been adaptively scaled by export. */ 1501 st->expire -= timeout - ntohl(sp->expire); 1502 } 1503 1504 st->direction = sp->direction; 1505 st->log = sp->log; 1506 st->timeout = sp->timeout; 1507 st->state_flags = ntohs(sp->state_flags); 1508 st->max_mss = ntohs(sp->max_mss); 1509 st->min_ttl = sp->min_ttl; 1510 st->set_tos = sp->set_tos; 1511 st->set_prio[0] = sp->set_prio[0]; 1512 st->set_prio[1] = sp->set_prio[1]; 1513 1514 st->id = sp->id; 1515 st->creatorid = sp->creatorid; 1516 pf_state_peer_ntoh(&sp->src, &st->src); 1517 pf_state_peer_ntoh(&sp->dst, &st->dst); 1518 1519 st->rule.ptr = r; 1520 st->anchor.ptr = NULL; 1521 1522 st->pfsync_time = getuptime(); 1523 st->sync_state = PFSYNC_S_NONE; 1524 1525 PF_REF_INIT(st->refcnt); 1526 mtx_init(&st->mtx, IPL_NET); 1527 1528 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 1529 r->states_cur++; 1530 r->states_tot++; 1531 1532 #if NPFSYNC > 0 1533 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 1534 SET(st->state_flags, PFSTATE_NOSYNC); 1535 #endif 1536 1537 /* 1538 * We just set PFSTATE_NOSYNC bit, which prevents 1539 * pfsync_insert_state() to insert state to pfsync. 1540 */ 1541 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 1542 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 1543 r->states_cur--; 1544 error = EEXIST; 1545 goto cleanup_state; 1546 } 1547 1548 #if NPFSYNC > 0 1549 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 1550 CLR(st->state_flags, PFSTATE_NOSYNC); 1551 if (ISSET(st->state_flags, PFSTATE_ACK)) 1552 pfsync_iack(st); 1553 } 1554 CLR(st->state_flags, PFSTATE_ACK); 1555 #endif 1556 1557 return (0); 1558 1559 cleanup: 1560 if (skw != NULL) 1561 pf_state_key_unref(skw); 1562 if (sks != NULL) 1563 pf_state_key_unref(sks); 1564 1565 cleanup_state: /* pf_state_insert frees the state keys */ 1566 if (st) { 1567 if (st->dst.scrub) 1568 pool_put(&pf_state_scrub_pl, st->dst.scrub); 1569 if (st->src.scrub) 1570 pool_put(&pf_state_scrub_pl, st->src.scrub); 1571 pool_put(&pf_state_pl, st); 1572 } 1573 return (error); 1574 } 1575 #endif /* NPFSYNC > 0 */ 1576 1577 /* END state table stuff */ 1578 1579 void 1580 pf_purge_timeout(void *unused) 1581 { 1582 /* XXX move to systqmp to avoid KERNEL_LOCK */ 1583 task_add(systq, &pf_purge_task); 1584 } 1585 1586 void 1587 pf_purge(void *xnloops) 1588 { 1589 int *nloops = xnloops; 1590 1591 /* 1592 * process a fraction of the state table every second 1593 * Note: 1594 * we no longer need PF_LOCK() here, because 1595 * pf_purge_expired_states() uses pf_state_lock to maintain 1596 * consistency. 1597 */ 1598 if (pf_default_rule.timeout[PFTM_INTERVAL] > 0) 1599 pf_purge_expired_states(1 + (pf_status.states 1600 / pf_default_rule.timeout[PFTM_INTERVAL])); 1601 1602 NET_LOCK(); 1603 1604 PF_LOCK(); 1605 /* purge other expired types every PFTM_INTERVAL seconds */ 1606 if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) 1607 pf_purge_expired_src_nodes(); 1608 PF_UNLOCK(); 1609 1610 /* 1611 * Fragments don't require PF_LOCK(), they use their own lock. 1612 */ 1613 if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1614 pf_purge_expired_fragments(); 1615 *nloops = 0; 1616 } 1617 NET_UNLOCK(); 1618 1619 timeout_add_sec(&pf_purge_to, 1); 1620 } 1621 1622 int32_t 1623 pf_state_expires(const struct pf_state *st, uint8_t stimeout) 1624 { 1625 u_int32_t timeout; 1626 u_int32_t start; 1627 u_int32_t end; 1628 u_int32_t states; 1629 1630 /* 1631 * pf_state_expires is used by the state purge task to 1632 * decide if a state is a candidate for cleanup, and by the 1633 * pfsync state export code to populate an expiry time. 1634 * 1635 * this function may be called by the state purge task while 1636 * the state is being modified. avoid inconsistent reads of 1637 * state->timeout by having the caller do the read (and any 1638 * checks it needs to do on the same variable) and then pass 1639 * their view of the timeout in here for this function to use. 1640 * the only consequence of using a stale timeout value is 1641 * that the state won't be a candidate for purging until the 1642 * next pass of the purge task. 1643 */ 1644 1645 /* handle all PFTM_* >= PFTM_MAX here */ 1646 if (stimeout >= PFTM_MAX) 1647 return (0); 1648 1649 KASSERT(stimeout < PFTM_MAX); 1650 1651 timeout = st->rule.ptr->timeout[stimeout]; 1652 if (!timeout) 1653 timeout = pf_default_rule.timeout[stimeout]; 1654 1655 start = st->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1656 if (start) { 1657 end = st->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1658 states = st->rule.ptr->states_cur; 1659 } else { 1660 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1661 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1662 states = pf_status.states; 1663 } 1664 if (end && states > start && start < end) { 1665 if (states >= end) 1666 return (0); 1667 1668 timeout = (u_int64_t)timeout * (end - states) / (end - start); 1669 } 1670 1671 return (st->expire + timeout); 1672 } 1673 1674 void 1675 pf_purge_expired_src_nodes(void) 1676 { 1677 struct pf_src_node *cur, *next; 1678 1679 PF_ASSERT_LOCKED(); 1680 1681 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1682 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1683 1684 if (cur->states == 0 && cur->expire <= getuptime()) { 1685 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1686 pf_remove_src_node(cur); 1687 } 1688 } 1689 } 1690 1691 void 1692 pf_src_tree_remove_state(struct pf_state *st) 1693 { 1694 u_int32_t timeout; 1695 struct pf_sn_item *sni; 1696 1697 while ((sni = SLIST_FIRST(&st->src_nodes)) != NULL) { 1698 SLIST_REMOVE_HEAD(&st->src_nodes, next); 1699 if (st->src.tcp_est) 1700 --sni->sn->conn; 1701 if (--sni->sn->states == 0) { 1702 timeout = st->rule.ptr->timeout[PFTM_SRC_NODE]; 1703 if (!timeout) 1704 timeout = 1705 pf_default_rule.timeout[PFTM_SRC_NODE]; 1706 sni->sn->expire = getuptime() + timeout; 1707 } 1708 pool_put(&pf_sn_item_pl, sni); 1709 } 1710 } 1711 1712 void 1713 pf_remove_state(struct pf_state *st) 1714 { 1715 PF_ASSERT_LOCKED(); 1716 1717 if (st->timeout == PFTM_UNLINKED) 1718 return; 1719 1720 /* handle load balancing related tasks */ 1721 pf_postprocess_addr(st); 1722 1723 if (st->src.state == PF_TCPS_PROXY_DST) { 1724 pf_send_tcp(st->rule.ptr, st->key[PF_SK_WIRE]->af, 1725 &st->key[PF_SK_WIRE]->addr[1], 1726 &st->key[PF_SK_WIRE]->addr[0], 1727 st->key[PF_SK_WIRE]->port[1], 1728 st->key[PF_SK_WIRE]->port[0], 1729 st->src.seqhi, st->src.seqlo + 1, 1730 TH_RST|TH_ACK, 0, 0, 0, 1, st->tag, 1731 st->key[PF_SK_WIRE]->rdomain); 1732 } 1733 if (st->key[PF_SK_STACK]->proto == IPPROTO_TCP) 1734 pf_set_protostate(st, PF_PEER_BOTH, TCPS_CLOSED); 1735 1736 RBT_REMOVE(pf_state_tree_id, &tree_id, st); 1737 #if NPFLOW > 0 1738 if (st->state_flags & PFSTATE_PFLOW) 1739 export_pflow(st); 1740 #endif /* NPFLOW > 0 */ 1741 #if NPFSYNC > 0 1742 pfsync_delete_state(st); 1743 #endif /* NPFSYNC > 0 */ 1744 st->timeout = PFTM_UNLINKED; 1745 pf_src_tree_remove_state(st); 1746 pf_detach_state(st); 1747 } 1748 1749 void 1750 pf_remove_divert_state(struct pf_state_key *sk) 1751 { 1752 struct pf_state_item *si; 1753 1754 PF_ASSERT_UNLOCKED(); 1755 1756 PF_LOCK(); 1757 PF_STATE_ENTER_WRITE(); 1758 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 1759 struct pf_state *sist = si->si_st; 1760 if (sk == sist->key[PF_SK_STACK] && sist->rule.ptr && 1761 (sist->rule.ptr->divert.type == PF_DIVERT_TO || 1762 sist->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 1763 if (sist->key[PF_SK_STACK]->proto == IPPROTO_TCP && 1764 sist->key[PF_SK_WIRE] != sist->key[PF_SK_STACK]) { 1765 /* 1766 * If the local address is translated, keep 1767 * the state for "tcp.closed" seconds to 1768 * prevent its source port from being reused. 1769 */ 1770 if (sist->src.state < TCPS_FIN_WAIT_2 || 1771 sist->dst.state < TCPS_FIN_WAIT_2) { 1772 pf_set_protostate(sist, PF_PEER_BOTH, 1773 TCPS_TIME_WAIT); 1774 sist->timeout = PFTM_TCP_CLOSED; 1775 sist->expire = getuptime(); 1776 } 1777 sist->state_flags |= PFSTATE_INP_UNLINKED; 1778 } else 1779 pf_remove_state(sist); 1780 break; 1781 } 1782 } 1783 PF_STATE_EXIT_WRITE(); 1784 PF_UNLOCK(); 1785 } 1786 1787 void 1788 pf_free_state(struct pf_state *st) 1789 { 1790 struct pf_rule_item *ri; 1791 1792 PF_ASSERT_LOCKED(); 1793 1794 #if NPFSYNC > 0 1795 if (pfsync_state_in_use(st)) 1796 return; 1797 #endif /* NPFSYNC > 0 */ 1798 KASSERT(st->timeout == PFTM_UNLINKED); 1799 if (--st->rule.ptr->states_cur == 0 && 1800 st->rule.ptr->src_nodes == 0) 1801 pf_rm_rule(NULL, st->rule.ptr); 1802 if (st->anchor.ptr != NULL) 1803 if (--st->anchor.ptr->states_cur == 0) 1804 pf_rm_rule(NULL, st->anchor.ptr); 1805 while ((ri = SLIST_FIRST(&st->match_rules))) { 1806 SLIST_REMOVE_HEAD(&st->match_rules, entry); 1807 if (--ri->r->states_cur == 0 && 1808 ri->r->src_nodes == 0) 1809 pf_rm_rule(NULL, ri->r); 1810 pool_put(&pf_rule_item_pl, ri); 1811 } 1812 pf_normalize_tcp_cleanup(st); 1813 pfi_kif_unref(st->kif, PFI_KIF_REF_STATE); 1814 pf_state_list_remove(&pf_state_list, st); 1815 if (st->tag) 1816 pf_tag_unref(st->tag); 1817 pf_state_unref(st); 1818 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1819 pf_status.states--; 1820 } 1821 1822 void 1823 pf_purge_expired_states(u_int32_t maxcheck) 1824 { 1825 /* 1826 * this task/thread/context/whatever is the only thing that 1827 * removes states from the pf_state_list, so the cur reference 1828 * it holds between calls is guaranteed to still be in the 1829 * list. 1830 */ 1831 static struct pf_state *cur = NULL; 1832 1833 struct pf_state *head, *tail; 1834 struct pf_state *st; 1835 SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl); 1836 time_t now; 1837 1838 PF_ASSERT_UNLOCKED(); 1839 1840 rw_enter_read(&pf_state_list.pfs_rwl); 1841 1842 mtx_enter(&pf_state_list.pfs_mtx); 1843 head = TAILQ_FIRST(&pf_state_list.pfs_list); 1844 tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 1845 mtx_leave(&pf_state_list.pfs_mtx); 1846 1847 if (head == NULL) { 1848 /* the list is empty */ 1849 rw_exit_read(&pf_state_list.pfs_rwl); 1850 return; 1851 } 1852 1853 /* (re)start at the front of the list */ 1854 if (cur == NULL) 1855 cur = head; 1856 1857 now = getuptime(); 1858 1859 do { 1860 uint8_t stimeout = cur->timeout; 1861 1862 if ((stimeout == PFTM_UNLINKED) || 1863 (pf_state_expires(cur, stimeout) <= now)) { 1864 st = pf_state_ref(cur); 1865 SLIST_INSERT_HEAD(&gcl, st, gc_list); 1866 } 1867 1868 /* don't iterate past the end of our view of the list */ 1869 if (cur == tail) { 1870 cur = NULL; 1871 break; 1872 } 1873 1874 cur = TAILQ_NEXT(cur, entry_list); 1875 } while (maxcheck--); 1876 1877 rw_exit_read(&pf_state_list.pfs_rwl); 1878 1879 if (SLIST_EMPTY(&gcl)) 1880 return; 1881 1882 NET_LOCK(); 1883 rw_enter_write(&pf_state_list.pfs_rwl); 1884 PF_LOCK(); 1885 PF_STATE_ENTER_WRITE(); 1886 SLIST_FOREACH(st, &gcl, gc_list) { 1887 if (st->timeout != PFTM_UNLINKED) 1888 pf_remove_state(st); 1889 1890 pf_free_state(st); 1891 } 1892 PF_STATE_EXIT_WRITE(); 1893 PF_UNLOCK(); 1894 rw_exit_write(&pf_state_list.pfs_rwl); 1895 NET_UNLOCK(); 1896 1897 while ((st = SLIST_FIRST(&gcl)) != NULL) { 1898 SLIST_REMOVE_HEAD(&gcl, gc_list); 1899 pf_state_unref(st); 1900 } 1901 } 1902 1903 int 1904 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw, int wait) 1905 { 1906 if (aw->type != PF_ADDR_TABLE) 1907 return (0); 1908 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, wait)) == NULL) 1909 return (1); 1910 return (0); 1911 } 1912 1913 void 1914 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1915 { 1916 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1917 return; 1918 pfr_detach_table(aw->p.tbl); 1919 aw->p.tbl = NULL; 1920 } 1921 1922 void 1923 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1924 { 1925 struct pfr_ktable *kt = aw->p.tbl; 1926 1927 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1928 return; 1929 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1930 kt = kt->pfrkt_root; 1931 aw->p.tbl = NULL; 1932 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1933 kt->pfrkt_cnt : -1; 1934 } 1935 1936 void 1937 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1938 { 1939 switch (af) { 1940 case AF_INET: { 1941 u_int32_t a = ntohl(addr->addr32[0]); 1942 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1943 (a>>8)&255, a&255); 1944 if (p) { 1945 p = ntohs(p); 1946 addlog(":%u", p); 1947 } 1948 break; 1949 } 1950 #ifdef INET6 1951 case AF_INET6: { 1952 u_int16_t b; 1953 u_int8_t i, curstart, curend, maxstart, maxend; 1954 curstart = curend = maxstart = maxend = 255; 1955 for (i = 0; i < 8; i++) { 1956 if (!addr->addr16[i]) { 1957 if (curstart == 255) 1958 curstart = i; 1959 curend = i; 1960 } else { 1961 if ((curend - curstart) > 1962 (maxend - maxstart)) { 1963 maxstart = curstart; 1964 maxend = curend; 1965 } 1966 curstart = curend = 255; 1967 } 1968 } 1969 if ((curend - curstart) > 1970 (maxend - maxstart)) { 1971 maxstart = curstart; 1972 maxend = curend; 1973 } 1974 for (i = 0; i < 8; i++) { 1975 if (i >= maxstart && i <= maxend) { 1976 if (i == 0) 1977 addlog(":"); 1978 if (i == maxend) 1979 addlog(":"); 1980 } else { 1981 b = ntohs(addr->addr16[i]); 1982 addlog("%x", b); 1983 if (i < 7) 1984 addlog(":"); 1985 } 1986 } 1987 if (p) { 1988 p = ntohs(p); 1989 addlog("[%u]", p); 1990 } 1991 break; 1992 } 1993 #endif /* INET6 */ 1994 } 1995 } 1996 1997 void 1998 pf_print_state(struct pf_state *st) 1999 { 2000 pf_print_state_parts(st, NULL, NULL); 2001 } 2002 2003 void 2004 pf_print_state_parts(struct pf_state *st, 2005 struct pf_state_key *skwp, struct pf_state_key *sksp) 2006 { 2007 struct pf_state_key *skw, *sks; 2008 u_int8_t proto, dir; 2009 2010 /* Do our best to fill these, but they're skipped if NULL */ 2011 skw = skwp ? skwp : (st ? st->key[PF_SK_WIRE] : NULL); 2012 sks = sksp ? sksp : (st ? st->key[PF_SK_STACK] : NULL); 2013 proto = skw ? skw->proto : (sks ? sks->proto : 0); 2014 dir = st ? st->direction : 0; 2015 2016 switch (proto) { 2017 case IPPROTO_IPV4: 2018 addlog("IPv4"); 2019 break; 2020 case IPPROTO_IPV6: 2021 addlog("IPv6"); 2022 break; 2023 case IPPROTO_TCP: 2024 addlog("TCP"); 2025 break; 2026 case IPPROTO_UDP: 2027 addlog("UDP"); 2028 break; 2029 case IPPROTO_ICMP: 2030 addlog("ICMP"); 2031 break; 2032 case IPPROTO_ICMPV6: 2033 addlog("ICMPv6"); 2034 break; 2035 default: 2036 addlog("%u", proto); 2037 break; 2038 } 2039 switch (dir) { 2040 case PF_IN: 2041 addlog(" in"); 2042 break; 2043 case PF_OUT: 2044 addlog(" out"); 2045 break; 2046 } 2047 if (skw) { 2048 addlog(" wire: (%d) ", skw->rdomain); 2049 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 2050 addlog(" "); 2051 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 2052 } 2053 if (sks) { 2054 addlog(" stack: (%d) ", sks->rdomain); 2055 if (sks != skw) { 2056 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 2057 addlog(" "); 2058 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 2059 } else 2060 addlog("-"); 2061 } 2062 if (st) { 2063 if (proto == IPPROTO_TCP) { 2064 addlog(" [lo=%u high=%u win=%u modulator=%u", 2065 st->src.seqlo, st->src.seqhi, 2066 st->src.max_win, st->src.seqdiff); 2067 if (st->src.wscale && st->dst.wscale) 2068 addlog(" wscale=%u", 2069 st->src.wscale & PF_WSCALE_MASK); 2070 addlog("]"); 2071 addlog(" [lo=%u high=%u win=%u modulator=%u", 2072 st->dst.seqlo, st->dst.seqhi, 2073 st->dst.max_win, st->dst.seqdiff); 2074 if (st->src.wscale && st->dst.wscale) 2075 addlog(" wscale=%u", 2076 st->dst.wscale & PF_WSCALE_MASK); 2077 addlog("]"); 2078 } 2079 addlog(" %u:%u", st->src.state, st->dst.state); 2080 if (st->rule.ptr) 2081 addlog(" @%d", st->rule.ptr->nr); 2082 } 2083 } 2084 2085 void 2086 pf_print_flags(u_int8_t f) 2087 { 2088 if (f) 2089 addlog(" "); 2090 if (f & TH_FIN) 2091 addlog("F"); 2092 if (f & TH_SYN) 2093 addlog("S"); 2094 if (f & TH_RST) 2095 addlog("R"); 2096 if (f & TH_PUSH) 2097 addlog("P"); 2098 if (f & TH_ACK) 2099 addlog("A"); 2100 if (f & TH_URG) 2101 addlog("U"); 2102 if (f & TH_ECE) 2103 addlog("E"); 2104 if (f & TH_CWR) 2105 addlog("W"); 2106 } 2107 2108 #define PF_SET_SKIP_STEPS(i) \ 2109 do { \ 2110 while (head[i] != cur) { \ 2111 head[i]->skip[i].ptr = cur; \ 2112 head[i] = TAILQ_NEXT(head[i], entries); \ 2113 } \ 2114 } while (0) 2115 2116 void 2117 pf_calc_skip_steps(struct pf_rulequeue *rules) 2118 { 2119 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 2120 int i; 2121 2122 cur = TAILQ_FIRST(rules); 2123 prev = cur; 2124 for (i = 0; i < PF_SKIP_COUNT; ++i) 2125 head[i] = cur; 2126 while (cur != NULL) { 2127 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 2128 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 2129 if (cur->direction != prev->direction) 2130 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 2131 if (cur->onrdomain != prev->onrdomain || 2132 cur->ifnot != prev->ifnot) 2133 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 2134 if (cur->af != prev->af) 2135 PF_SET_SKIP_STEPS(PF_SKIP_AF); 2136 if (cur->proto != prev->proto) 2137 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 2138 if (cur->src.neg != prev->src.neg || 2139 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 2140 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 2141 if (cur->dst.neg != prev->dst.neg || 2142 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 2143 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 2144 if (cur->src.port[0] != prev->src.port[0] || 2145 cur->src.port[1] != prev->src.port[1] || 2146 cur->src.port_op != prev->src.port_op) 2147 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 2148 if (cur->dst.port[0] != prev->dst.port[0] || 2149 cur->dst.port[1] != prev->dst.port[1] || 2150 cur->dst.port_op != prev->dst.port_op) 2151 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 2152 2153 prev = cur; 2154 cur = TAILQ_NEXT(cur, entries); 2155 } 2156 for (i = 0; i < PF_SKIP_COUNT; ++i) 2157 PF_SET_SKIP_STEPS(i); 2158 } 2159 2160 int 2161 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 2162 { 2163 if (aw1->type != aw2->type) 2164 return (1); 2165 switch (aw1->type) { 2166 case PF_ADDR_ADDRMASK: 2167 case PF_ADDR_RANGE: 2168 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 2169 return (1); 2170 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 2171 return (1); 2172 return (0); 2173 case PF_ADDR_DYNIFTL: 2174 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 2175 case PF_ADDR_NONE: 2176 case PF_ADDR_NOROUTE: 2177 case PF_ADDR_URPFFAILED: 2178 return (0); 2179 case PF_ADDR_TABLE: 2180 return (aw1->p.tbl != aw2->p.tbl); 2181 case PF_ADDR_RTLABEL: 2182 return (aw1->v.rtlabel != aw2->v.rtlabel); 2183 default: 2184 addlog("invalid address type: %d\n", aw1->type); 2185 return (1); 2186 } 2187 } 2188 2189 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 2190 * emulate at most one ones-complement subtraction. This thereby limits net 2191 * carries/borrows to at most one, eliminating a reduction step and saving one 2192 * each of +, >>, & and ~. 2193 * 2194 * def. x mod y = x - (x//y)*y for integer x,y 2195 * def. sum = x mod 2^16 2196 * def. accumulator = (x >> 16) mod 2^16 2197 * 2198 * The trick works as follows: subtracting exactly one u_int16_t from the 2199 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 2200 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 2201 * ones-complement borrow: 2202 * 2203 * (sum + accumulator) mod 2^16 2204 * = { assume underflow: accumulator := 2^16 - 1 } 2205 * (sum + 2^16 - 1) mod 2^16 2206 * = { mod } 2207 * (sum - 1) mod 2^16 2208 * 2209 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 2210 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 2211 * to zero as that requires subtraction of at least 2^16, which exceeds a 2212 * single u_int16_t's range. 2213 * 2214 * We use the following theorem to derive the implementation: 2215 * 2216 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 2217 * proof. 2218 * (x + (y mod z)) mod z 2219 * = { def mod } 2220 * (x + y - (y//z)*z) mod z 2221 * = { (a + b*c) mod c = a mod c } 2222 * (x + y) mod z [end of proof] 2223 * 2224 * ... and thereby obtain: 2225 * 2226 * (sum + accumulator) mod 2^16 2227 * = { def. accumulator, def. sum } 2228 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 2229 * = { (0), twice } 2230 * (x + (x >> 16)) mod 2^16 2231 * = { x mod 2^n = x & (2^n - 1) } 2232 * (x + (x >> 16)) & 0xffff 2233 * 2234 * Note: this serves also as a reduction step for at most one add (as the 2235 * trailing mod 2^16 prevents further reductions by destroying carries). 2236 */ 2237 __inline void 2238 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 2239 u_int8_t proto) 2240 { 2241 u_int32_t x; 2242 const int udp = proto == IPPROTO_UDP; 2243 2244 x = *cksum + was - now; 2245 x = (x + (x >> 16)) & 0xffff; 2246 2247 /* optimise: eliminate a branch when not udp */ 2248 if (udp && *cksum == 0x0000) 2249 return; 2250 if (udp && x == 0x0000) 2251 x = 0xffff; 2252 2253 *cksum = (u_int16_t)(x); 2254 } 2255 2256 #ifdef INET6 2257 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 2258 static __inline void 2259 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 2260 { 2261 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 2262 } 2263 2264 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 2265 static __inline void 2266 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 2267 { 2268 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 2269 } 2270 #endif /* INET6 */ 2271 2272 /* pre: *a is 16-bit aligned within its packet 2273 * 2274 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 2275 * machine by conserving ones-complement's otherwise discarded carries in the 2276 * upper bits of x. These accumulated carries when added to the lower 16-bits 2277 * over at least zero 'reduction' steps then complete the ones-complement sum. 2278 * 2279 * def. sum = x mod 2^16 2280 * def. accumulator = (x >> 16) 2281 * 2282 * At most two reduction steps 2283 * 2284 * x := sum + accumulator 2285 * = { def sum, def accumulator } 2286 * x := x mod 2^16 + (x >> 16) 2287 * = { x mod 2^n = x & (2^n - 1) } 2288 * x := (x & 0xffff) + (x >> 16) 2289 * 2290 * are necessary to incorporate the accumulated carries (at most one per add) 2291 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 2292 * 2293 * The function is also invariant over the endian of the host. Why? 2294 * 2295 * Define the unary transpose operator ~ on a bitstring in python slice 2296 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 2297 * 2298 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 2299 * 2300 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 2301 * 2302 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 2303 * 'half-adds'. Under ones-complement addition, each half-add carries to the 2304 * other, so the sum of each half-add is unaffected by their relative 2305 * order. Therefore: 2306 * 2307 * ~m +_1 ~n 2308 * = { half-adds invariant under transposition } 2309 * ~s 2310 * = { substitute } 2311 * ~(m +_1 n) [end of proof] 2312 * 2313 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 2314 * with the converse endian does not alter the result. 2315 * 2316 * proof. 2317 * { converse machine endian: load/store transposes, P := 8 } 2318 * ~(~m +_1 ~n) 2319 * = { ~ over +_1 } 2320 * ~~m +_1 ~~n 2321 * = { ~ is an involution } 2322 * m +_1 n [end of proof] 2323 * 2324 */ 2325 #define NEG(x) ((u_int16_t)~(x)) 2326 void 2327 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 2328 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 2329 { 2330 u_int32_t x; 2331 const u_int16_t *n = an->addr16; 2332 const u_int16_t *o = a->addr16; 2333 const int udp = proto == IPPROTO_UDP; 2334 2335 switch (af) { 2336 case AF_INET: 2337 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 2338 break; 2339 #ifdef INET6 2340 case AF_INET6: 2341 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 2342 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 2343 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 2344 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 2345 break; 2346 #endif /* INET6 */ 2347 default: 2348 unhandled_af(af); 2349 } 2350 2351 x = (x & 0xffff) + (x >> 16); 2352 x = (x & 0xffff) + (x >> 16); 2353 2354 /* optimise: eliminate a branch when not udp */ 2355 if (udp && *cksum == 0x0000) 2356 return; 2357 if (udp && x == 0x0000) 2358 x = 0xffff; 2359 2360 *cksum = (u_int16_t)(x); 2361 } 2362 2363 int 2364 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 2365 { 2366 int rewrite = 0; 2367 2368 if (*f != v) { 2369 u_int16_t old = htons(hi ? (*f << 8) : *f); 2370 u_int16_t new = htons(hi ? ( v << 8) : v); 2371 2372 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 2373 *f = v; 2374 rewrite = 1; 2375 } 2376 2377 return (rewrite); 2378 } 2379 2380 /* pre: *f is 16-bit aligned within its packet */ 2381 int 2382 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 2383 { 2384 int rewrite = 0; 2385 2386 if (*f != v) { 2387 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 2388 *f = v; 2389 rewrite = 1; 2390 } 2391 2392 return (rewrite); 2393 } 2394 2395 int 2396 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 2397 { 2398 int rewrite = 0; 2399 u_int8_t *fb = (u_int8_t*)f; 2400 u_int8_t *vb = (u_int8_t*)&v; 2401 2402 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 2403 return (pf_patch_16(pd, f, v)); /* optimise */ 2404 } 2405 2406 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2407 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2408 2409 return (rewrite); 2410 } 2411 2412 /* pre: *f is 16-bit aligned within its packet */ 2413 /* pre: pd->proto != IPPROTO_UDP */ 2414 int 2415 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2416 { 2417 int rewrite = 0; 2418 u_int16_t *pc = pd->pcksum; 2419 u_int8_t proto = pd->proto; 2420 2421 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2422 if (proto == IPPROTO_UDP) 2423 panic("%s: udp", __func__); 2424 2425 /* optimise: skip *f != v guard; true for all use-cases */ 2426 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2427 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2428 2429 *f = v; 2430 rewrite = 1; 2431 2432 return (rewrite); 2433 } 2434 2435 int 2436 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2437 { 2438 int rewrite = 0; 2439 u_int8_t *fb = (u_int8_t*)f; 2440 u_int8_t *vb = (u_int8_t*)&v; 2441 2442 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2443 return (pf_patch_32(pd, f, v)); /* optimise */ 2444 } 2445 2446 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2447 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2448 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2449 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2450 2451 return (rewrite); 2452 } 2453 2454 int 2455 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2456 u_int16_t *virtual_id, u_int16_t *virtual_type) 2457 { 2458 /* 2459 * ICMP types marked with PF_OUT are typically responses to 2460 * PF_IN, and will match states in the opposite direction. 2461 * PF_IN ICMP types need to match a state with that type. 2462 */ 2463 *icmp_dir = PF_OUT; 2464 2465 /* Queries (and responses) */ 2466 switch (pd->af) { 2467 case AF_INET: 2468 switch (type) { 2469 case ICMP_ECHO: 2470 *icmp_dir = PF_IN; 2471 /* FALLTHROUGH */ 2472 case ICMP_ECHOREPLY: 2473 *virtual_type = ICMP_ECHO; 2474 *virtual_id = pd->hdr.icmp.icmp_id; 2475 break; 2476 2477 case ICMP_TSTAMP: 2478 *icmp_dir = PF_IN; 2479 /* FALLTHROUGH */ 2480 case ICMP_TSTAMPREPLY: 2481 *virtual_type = ICMP_TSTAMP; 2482 *virtual_id = pd->hdr.icmp.icmp_id; 2483 break; 2484 2485 case ICMP_IREQ: 2486 *icmp_dir = PF_IN; 2487 /* FALLTHROUGH */ 2488 case ICMP_IREQREPLY: 2489 *virtual_type = ICMP_IREQ; 2490 *virtual_id = pd->hdr.icmp.icmp_id; 2491 break; 2492 2493 case ICMP_MASKREQ: 2494 *icmp_dir = PF_IN; 2495 /* FALLTHROUGH */ 2496 case ICMP_MASKREPLY: 2497 *virtual_type = ICMP_MASKREQ; 2498 *virtual_id = pd->hdr.icmp.icmp_id; 2499 break; 2500 2501 case ICMP_IPV6_WHEREAREYOU: 2502 *icmp_dir = PF_IN; 2503 /* FALLTHROUGH */ 2504 case ICMP_IPV6_IAMHERE: 2505 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2506 *virtual_id = 0; /* Nothing sane to match on! */ 2507 break; 2508 2509 case ICMP_MOBILE_REGREQUEST: 2510 *icmp_dir = PF_IN; 2511 /* FALLTHROUGH */ 2512 case ICMP_MOBILE_REGREPLY: 2513 *virtual_type = ICMP_MOBILE_REGREQUEST; 2514 *virtual_id = 0; /* Nothing sane to match on! */ 2515 break; 2516 2517 case ICMP_ROUTERSOLICIT: 2518 *icmp_dir = PF_IN; 2519 /* FALLTHROUGH */ 2520 case ICMP_ROUTERADVERT: 2521 *virtual_type = ICMP_ROUTERSOLICIT; 2522 *virtual_id = 0; /* Nothing sane to match on! */ 2523 break; 2524 2525 /* These ICMP types map to other connections */ 2526 case ICMP_UNREACH: 2527 case ICMP_SOURCEQUENCH: 2528 case ICMP_REDIRECT: 2529 case ICMP_TIMXCEED: 2530 case ICMP_PARAMPROB: 2531 /* These will not be used, but set them anyway */ 2532 *icmp_dir = PF_IN; 2533 *virtual_type = htons(type); 2534 *virtual_id = 0; 2535 return (1); /* These types match to another state */ 2536 2537 /* 2538 * All remaining ICMP types get their own states, 2539 * and will only match in one direction. 2540 */ 2541 default: 2542 *icmp_dir = PF_IN; 2543 *virtual_type = type; 2544 *virtual_id = 0; 2545 break; 2546 } 2547 break; 2548 #ifdef INET6 2549 case AF_INET6: 2550 switch (type) { 2551 case ICMP6_ECHO_REQUEST: 2552 *icmp_dir = PF_IN; 2553 /* FALLTHROUGH */ 2554 case ICMP6_ECHO_REPLY: 2555 *virtual_type = ICMP6_ECHO_REQUEST; 2556 *virtual_id = pd->hdr.icmp6.icmp6_id; 2557 break; 2558 2559 case MLD_LISTENER_QUERY: 2560 case MLD_LISTENER_REPORT: { 2561 struct mld_hdr *mld = &pd->hdr.mld; 2562 u_int32_t h; 2563 2564 /* 2565 * Listener Report can be sent by clients 2566 * without an associated Listener Query. 2567 * In addition to that, when Report is sent as a 2568 * reply to a Query its source and destination 2569 * address are different. 2570 */ 2571 *icmp_dir = PF_IN; 2572 *virtual_type = MLD_LISTENER_QUERY; 2573 /* generate fake id for these messages */ 2574 h = mld->mld_addr.s6_addr32[0] ^ 2575 mld->mld_addr.s6_addr32[1] ^ 2576 mld->mld_addr.s6_addr32[2] ^ 2577 mld->mld_addr.s6_addr32[3]; 2578 *virtual_id = (h >> 16) ^ (h & 0xffff); 2579 break; 2580 } 2581 2582 /* 2583 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2584 * ICMP6_WRU 2585 */ 2586 case ICMP6_WRUREQUEST: 2587 *icmp_dir = PF_IN; 2588 /* FALLTHROUGH */ 2589 case ICMP6_WRUREPLY: 2590 *virtual_type = ICMP6_WRUREQUEST; 2591 *virtual_id = 0; /* Nothing sane to match on! */ 2592 break; 2593 2594 case MLD_MTRACE: 2595 *icmp_dir = PF_IN; 2596 /* FALLTHROUGH */ 2597 case MLD_MTRACE_RESP: 2598 *virtual_type = MLD_MTRACE; 2599 *virtual_id = 0; /* Nothing sane to match on! */ 2600 break; 2601 2602 case ND_NEIGHBOR_SOLICIT: 2603 *icmp_dir = PF_IN; 2604 /* FALLTHROUGH */ 2605 case ND_NEIGHBOR_ADVERT: { 2606 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 2607 u_int32_t h; 2608 2609 *virtual_type = ND_NEIGHBOR_SOLICIT; 2610 /* generate fake id for these messages */ 2611 h = nd->nd_ns_target.s6_addr32[0] ^ 2612 nd->nd_ns_target.s6_addr32[1] ^ 2613 nd->nd_ns_target.s6_addr32[2] ^ 2614 nd->nd_ns_target.s6_addr32[3]; 2615 *virtual_id = (h >> 16) ^ (h & 0xffff); 2616 /* 2617 * the extra work here deals with 'keep state' option 2618 * at pass rule for unsolicited advertisement. By 2619 * returning 1 (state_icmp = 1) we override 'keep 2620 * state' to 'no state' so we don't create state for 2621 * unsolicited advertisements. No one expects answer to 2622 * unsolicited advertisements so we should be good. 2623 */ 2624 if (type == ND_NEIGHBOR_ADVERT) { 2625 *virtual_type = htons(*virtual_type); 2626 return (1); 2627 } 2628 break; 2629 } 2630 2631 /* 2632 * These ICMP types map to other connections. 2633 * ND_REDIRECT can't be in this list because the triggering 2634 * packet header is optional. 2635 */ 2636 case ICMP6_DST_UNREACH: 2637 case ICMP6_PACKET_TOO_BIG: 2638 case ICMP6_TIME_EXCEEDED: 2639 case ICMP6_PARAM_PROB: 2640 /* These will not be used, but set them anyway */ 2641 *icmp_dir = PF_IN; 2642 *virtual_type = htons(type); 2643 *virtual_id = 0; 2644 return (1); /* These types match to another state */ 2645 /* 2646 * All remaining ICMP6 types get their own states, 2647 * and will only match in one direction. 2648 */ 2649 default: 2650 *icmp_dir = PF_IN; 2651 *virtual_type = type; 2652 *virtual_id = 0; 2653 break; 2654 } 2655 break; 2656 #endif /* INET6 */ 2657 } 2658 *virtual_type = htons(*virtual_type); 2659 return (0); /* These types match to their own state */ 2660 } 2661 2662 void 2663 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2664 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2665 { 2666 /* note: doesn't trouble to fixup quoted checksums, if any */ 2667 2668 /* change quoted protocol port */ 2669 if (qp != NULL) 2670 pf_patch_16(pd, qp, np); 2671 2672 /* change quoted ip address */ 2673 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2674 pf_addrcpy(qa, na, pd->af); 2675 2676 /* change network-header's ip address */ 2677 if (oa) 2678 pf_translate_a(pd, oa, na); 2679 } 2680 2681 /* pre: *a is 16-bit aligned within its packet */ 2682 /* *a is a network header src/dst address */ 2683 int 2684 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2685 { 2686 int rewrite = 0; 2687 2688 /* warning: !PF_ANEQ != PF_AEQ */ 2689 if (!PF_ANEQ(a, an, pd->af)) 2690 return (0); 2691 2692 /* fixup transport pseudo-header, if any */ 2693 switch (pd->proto) { 2694 case IPPROTO_TCP: /* FALLTHROUGH */ 2695 case IPPROTO_UDP: /* FALLTHROUGH */ 2696 case IPPROTO_ICMPV6: 2697 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2698 break; 2699 default: 2700 break; /* assume no pseudo-header */ 2701 } 2702 2703 pf_addrcpy(a, an, pd->af); 2704 rewrite = 1; 2705 2706 return (rewrite); 2707 } 2708 2709 #ifdef INET6 2710 /* pf_translate_af() may change pd->m, adjust local copies after calling */ 2711 int 2712 pf_translate_af(struct pf_pdesc *pd) 2713 { 2714 static const struct pf_addr zero; 2715 struct ip *ip4; 2716 struct ip6_hdr *ip6; 2717 int copyback = 0; 2718 u_int hlen, ohlen, dlen; 2719 u_int16_t *pc; 2720 u_int8_t af_proto, naf_proto; 2721 2722 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2723 ohlen = pd->off; 2724 dlen = pd->tot_len - pd->off; 2725 pc = pd->pcksum; 2726 2727 af_proto = naf_proto = pd->proto; 2728 if (naf_proto == IPPROTO_ICMP) 2729 af_proto = IPPROTO_ICMPV6; 2730 if (naf_proto == IPPROTO_ICMPV6) 2731 af_proto = IPPROTO_ICMP; 2732 2733 /* uncover stale pseudo-header */ 2734 switch (af_proto) { 2735 case IPPROTO_ICMPV6: 2736 /* optimise: unchanged for TCP/UDP */ 2737 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2738 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2739 /* FALLTHROUGH */ 2740 case IPPROTO_UDP: /* FALLTHROUGH */ 2741 case IPPROTO_TCP: 2742 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2743 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2744 copyback = 1; 2745 break; 2746 default: 2747 break; /* assume no pseudo-header */ 2748 } 2749 2750 /* replace the network header */ 2751 m_adj(pd->m, pd->off); 2752 pd->src = NULL; 2753 pd->dst = NULL; 2754 2755 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2756 pd->m = NULL; 2757 return (-1); 2758 } 2759 2760 pd->off = hlen; 2761 pd->tot_len += hlen - ohlen; 2762 2763 switch (pd->naf) { 2764 case AF_INET: 2765 ip4 = mtod(pd->m, struct ip *); 2766 memset(ip4, 0, hlen); 2767 ip4->ip_v = IPVERSION; 2768 ip4->ip_hl = hlen >> 2; 2769 ip4->ip_tos = pd->tos; 2770 ip4->ip_len = htons(hlen + dlen); 2771 ip4->ip_id = htons(ip_randomid()); 2772 ip4->ip_off = htons(IP_DF); 2773 ip4->ip_ttl = pd->ttl; 2774 ip4->ip_p = pd->proto; 2775 ip4->ip_src = pd->nsaddr.v4; 2776 ip4->ip_dst = pd->ndaddr.v4; 2777 break; 2778 case AF_INET6: 2779 ip6 = mtod(pd->m, struct ip6_hdr *); 2780 memset(ip6, 0, hlen); 2781 ip6->ip6_vfc = IPV6_VERSION; 2782 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2783 ip6->ip6_plen = htons(dlen); 2784 ip6->ip6_nxt = pd->proto; 2785 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2786 ip6->ip6_hlim = IPV6_DEFHLIM; 2787 else 2788 ip6->ip6_hlim = pd->ttl; 2789 ip6->ip6_src = pd->nsaddr.v6; 2790 ip6->ip6_dst = pd->ndaddr.v6; 2791 break; 2792 default: 2793 unhandled_af(pd->naf); 2794 } 2795 2796 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2797 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2798 pd->naf == AF_INET6) { 2799 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2800 } 2801 2802 /* cover fresh pseudo-header */ 2803 switch (naf_proto) { 2804 case IPPROTO_ICMPV6: 2805 /* optimise: unchanged for TCP/UDP */ 2806 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2807 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2808 /* FALLTHROUGH */ 2809 case IPPROTO_UDP: /* FALLTHROUGH */ 2810 case IPPROTO_TCP: 2811 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2812 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2813 copyback = 1; 2814 break; 2815 default: 2816 break; /* assume no pseudo-header */ 2817 } 2818 2819 /* flush pd->pcksum */ 2820 if (copyback) 2821 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 2822 2823 return (0); 2824 } 2825 2826 int 2827 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2828 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2829 sa_family_t af, sa_family_t naf) 2830 { 2831 struct mbuf *n = NULL; 2832 struct ip *ip4; 2833 struct ip6_hdr *ip6; 2834 u_int hlen, ohlen, dlen; 2835 int d; 2836 2837 if (af == naf || (af != AF_INET && af != AF_INET6) || 2838 (naf != AF_INET && naf != AF_INET6)) 2839 return (-1); 2840 2841 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2842 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2843 return (-1); 2844 2845 /* new quoted header */ 2846 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2847 /* old quoted header */ 2848 ohlen = pd2->off - ipoff2; 2849 2850 /* trim old quoted header */ 2851 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2852 m_adj(n, ohlen); 2853 2854 /* prepend a new, translated, quoted header */ 2855 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2856 return (-1); 2857 2858 switch (naf) { 2859 case AF_INET: 2860 ip4 = mtod(n, struct ip *); 2861 memset(ip4, 0, sizeof(*ip4)); 2862 ip4->ip_v = IPVERSION; 2863 ip4->ip_hl = sizeof(*ip4) >> 2; 2864 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2865 ip4->ip_id = htons(ip_randomid()); 2866 ip4->ip_off = htons(IP_DF); 2867 ip4->ip_ttl = pd2->ttl; 2868 if (pd2->proto == IPPROTO_ICMPV6) 2869 ip4->ip_p = IPPROTO_ICMP; 2870 else 2871 ip4->ip_p = pd2->proto; 2872 ip4->ip_src = src->v4; 2873 ip4->ip_dst = dst->v4; 2874 in_hdr_cksum_out(n, NULL); 2875 break; 2876 case AF_INET6: 2877 ip6 = mtod(n, struct ip6_hdr *); 2878 memset(ip6, 0, sizeof(*ip6)); 2879 ip6->ip6_vfc = IPV6_VERSION; 2880 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2881 if (pd2->proto == IPPROTO_ICMP) 2882 ip6->ip6_nxt = IPPROTO_ICMPV6; 2883 else 2884 ip6->ip6_nxt = pd2->proto; 2885 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2886 ip6->ip6_hlim = IPV6_DEFHLIM; 2887 else 2888 ip6->ip6_hlim = pd2->ttl; 2889 ip6->ip6_src = src->v6; 2890 ip6->ip6_dst = dst->v6; 2891 break; 2892 } 2893 2894 /* cover new quoted header */ 2895 /* optimise: any new AF_INET header of ours sums to zero */ 2896 if (naf != AF_INET) { 2897 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2898 } 2899 2900 /* reattach modified quoted packet to outer header */ 2901 { 2902 int nlen = n->m_pkthdr.len; 2903 m_cat(m, n); 2904 m->m_pkthdr.len += nlen; 2905 } 2906 2907 /* account for altered length */ 2908 d = hlen - ohlen; 2909 2910 if (pd->proto == IPPROTO_ICMPV6) { 2911 /* fixup pseudo-header */ 2912 dlen = pd->tot_len - pd->off; 2913 pf_cksum_fixup(pd->pcksum, 2914 htons(dlen), htons(dlen + d), pd->proto); 2915 } 2916 2917 pd->tot_len += d; 2918 pd2->tot_len += d; 2919 pd2->off += d; 2920 2921 /* note: not bothering to update network headers as 2922 these due for rewrite by pf_translate_af() */ 2923 2924 return (0); 2925 } 2926 2927 2928 #define PTR_IP(field) (offsetof(struct ip, field)) 2929 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2930 2931 int 2932 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 2933 { 2934 struct icmp *icmp4; 2935 struct icmp6_hdr *icmp6; 2936 u_int32_t mtu; 2937 int32_t ptr = -1; 2938 u_int8_t type; 2939 u_int8_t code; 2940 2941 switch (af) { 2942 case AF_INET: 2943 icmp6 = arg; 2944 type = icmp6->icmp6_type; 2945 code = icmp6->icmp6_code; 2946 mtu = ntohl(icmp6->icmp6_mtu); 2947 2948 switch (type) { 2949 case ICMP6_ECHO_REQUEST: 2950 type = ICMP_ECHO; 2951 break; 2952 case ICMP6_ECHO_REPLY: 2953 type = ICMP_ECHOREPLY; 2954 break; 2955 case ICMP6_DST_UNREACH: 2956 type = ICMP_UNREACH; 2957 switch (code) { 2958 case ICMP6_DST_UNREACH_NOROUTE: 2959 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2960 case ICMP6_DST_UNREACH_ADDR: 2961 code = ICMP_UNREACH_HOST; 2962 break; 2963 case ICMP6_DST_UNREACH_ADMIN: 2964 code = ICMP_UNREACH_HOST_PROHIB; 2965 break; 2966 case ICMP6_DST_UNREACH_NOPORT: 2967 code = ICMP_UNREACH_PORT; 2968 break; 2969 default: 2970 return (-1); 2971 } 2972 break; 2973 case ICMP6_PACKET_TOO_BIG: 2974 type = ICMP_UNREACH; 2975 code = ICMP_UNREACH_NEEDFRAG; 2976 mtu -= 20; 2977 break; 2978 case ICMP6_TIME_EXCEEDED: 2979 type = ICMP_TIMXCEED; 2980 break; 2981 case ICMP6_PARAM_PROB: 2982 switch (code) { 2983 case ICMP6_PARAMPROB_HEADER: 2984 type = ICMP_PARAMPROB; 2985 code = ICMP_PARAMPROB_ERRATPTR; 2986 ptr = ntohl(icmp6->icmp6_pptr); 2987 2988 if (ptr == PTR_IP6(ip6_vfc)) 2989 ; /* preserve */ 2990 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2991 ptr = PTR_IP(ip_tos); 2992 else if (ptr == PTR_IP6(ip6_plen) || 2993 ptr == PTR_IP6(ip6_plen) + 1) 2994 ptr = PTR_IP(ip_len); 2995 else if (ptr == PTR_IP6(ip6_nxt)) 2996 ptr = PTR_IP(ip_p); 2997 else if (ptr == PTR_IP6(ip6_hlim)) 2998 ptr = PTR_IP(ip_ttl); 2999 else if (ptr >= PTR_IP6(ip6_src) && 3000 ptr < PTR_IP6(ip6_dst)) 3001 ptr = PTR_IP(ip_src); 3002 else if (ptr >= PTR_IP6(ip6_dst) && 3003 ptr < sizeof(struct ip6_hdr)) 3004 ptr = PTR_IP(ip_dst); 3005 else { 3006 return (-1); 3007 } 3008 break; 3009 case ICMP6_PARAMPROB_NEXTHEADER: 3010 type = ICMP_UNREACH; 3011 code = ICMP_UNREACH_PROTOCOL; 3012 break; 3013 default: 3014 return (-1); 3015 } 3016 break; 3017 default: 3018 return (-1); 3019 } 3020 3021 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 3022 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 3023 3024 /* aligns well with a icmpv4 nextmtu */ 3025 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 3026 3027 /* icmpv4 pptr is a one most significant byte */ 3028 if (ptr >= 0) 3029 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 3030 break; 3031 case AF_INET6: 3032 icmp4 = arg; 3033 type = icmp4->icmp_type; 3034 code = icmp4->icmp_code; 3035 mtu = ntohs(icmp4->icmp_nextmtu); 3036 3037 switch (type) { 3038 case ICMP_ECHO: 3039 type = ICMP6_ECHO_REQUEST; 3040 break; 3041 case ICMP_ECHOREPLY: 3042 type = ICMP6_ECHO_REPLY; 3043 break; 3044 case ICMP_UNREACH: 3045 type = ICMP6_DST_UNREACH; 3046 switch (code) { 3047 case ICMP_UNREACH_NET: 3048 case ICMP_UNREACH_HOST: 3049 case ICMP_UNREACH_NET_UNKNOWN: 3050 case ICMP_UNREACH_HOST_UNKNOWN: 3051 case ICMP_UNREACH_ISOLATED: 3052 case ICMP_UNREACH_TOSNET: 3053 case ICMP_UNREACH_TOSHOST: 3054 code = ICMP6_DST_UNREACH_NOROUTE; 3055 break; 3056 case ICMP_UNREACH_PORT: 3057 code = ICMP6_DST_UNREACH_NOPORT; 3058 break; 3059 case ICMP_UNREACH_NET_PROHIB: 3060 case ICMP_UNREACH_HOST_PROHIB: 3061 case ICMP_UNREACH_FILTER_PROHIB: 3062 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 3063 code = ICMP6_DST_UNREACH_ADMIN; 3064 break; 3065 case ICMP_UNREACH_PROTOCOL: 3066 type = ICMP6_PARAM_PROB; 3067 code = ICMP6_PARAMPROB_NEXTHEADER; 3068 ptr = offsetof(struct ip6_hdr, ip6_nxt); 3069 break; 3070 case ICMP_UNREACH_NEEDFRAG: 3071 type = ICMP6_PACKET_TOO_BIG; 3072 code = 0; 3073 mtu += 20; 3074 break; 3075 default: 3076 return (-1); 3077 } 3078 break; 3079 case ICMP_TIMXCEED: 3080 type = ICMP6_TIME_EXCEEDED; 3081 break; 3082 case ICMP_PARAMPROB: 3083 type = ICMP6_PARAM_PROB; 3084 switch (code) { 3085 case ICMP_PARAMPROB_ERRATPTR: 3086 code = ICMP6_PARAMPROB_HEADER; 3087 break; 3088 case ICMP_PARAMPROB_LENGTH: 3089 code = ICMP6_PARAMPROB_HEADER; 3090 break; 3091 default: 3092 return (-1); 3093 } 3094 3095 ptr = icmp4->icmp_pptr; 3096 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 3097 ; /* preserve */ 3098 else if (ptr == PTR_IP(ip_len) || 3099 ptr == PTR_IP(ip_len) + 1) 3100 ptr = PTR_IP6(ip6_plen); 3101 else if (ptr == PTR_IP(ip_ttl)) 3102 ptr = PTR_IP6(ip6_hlim); 3103 else if (ptr == PTR_IP(ip_p)) 3104 ptr = PTR_IP6(ip6_nxt); 3105 else if (ptr >= PTR_IP(ip_src) && 3106 ptr < PTR_IP(ip_dst)) 3107 ptr = PTR_IP6(ip6_src); 3108 else if (ptr >= PTR_IP(ip_dst) && 3109 ptr < sizeof(struct ip)) 3110 ptr = PTR_IP6(ip6_dst); 3111 else { 3112 return (-1); 3113 } 3114 break; 3115 default: 3116 return (-1); 3117 } 3118 3119 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 3120 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 3121 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 3122 if (ptr >= 0) 3123 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 3124 break; 3125 } 3126 3127 return (0); 3128 } 3129 #endif /* INET6 */ 3130 3131 /* 3132 * Need to modulate the sequence numbers in the TCP SACK option 3133 * (credits to Krzysztof Pfaff for report and patch) 3134 */ 3135 int 3136 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 3137 { 3138 struct sackblk sack; 3139 int copyback = 0, i; 3140 int olen, optsoff; 3141 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 3142 3143 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3144 optsoff = pd->off + sizeof(struct tcphdr); 3145 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 3146 if (olen < TCPOLEN_MINSACK || 3147 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) 3148 return (0); 3149 3150 eoh = opts + olen; 3151 opt = opts; 3152 while ((opt = pf_find_tcpopt(opt, opts, olen, 3153 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 3154 { 3155 size_t safelen = MIN(opt[1], (eoh - opt)); 3156 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 3157 size_t startoff = (opt + i) - opts; 3158 memcpy(&sack, &opt[i], sizeof(sack)); 3159 pf_patch_32_unaligned(pd, &sack.start, 3160 htonl(ntohl(sack.start) - dst->seqdiff), 3161 PF_ALGNMNT(startoff)); 3162 pf_patch_32_unaligned(pd, &sack.end, 3163 htonl(ntohl(sack.end) - dst->seqdiff), 3164 PF_ALGNMNT(startoff + sizeof(sack.start))); 3165 memcpy(&opt[i], &sack, sizeof(sack)); 3166 } 3167 copyback = 1; 3168 opt += opt[1]; 3169 } 3170 3171 if (copyback) 3172 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 3173 return (copyback); 3174 } 3175 3176 struct mbuf * 3177 pf_build_tcp(const struct pf_rule *r, sa_family_t af, 3178 const struct pf_addr *saddr, const struct pf_addr *daddr, 3179 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 3180 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 3181 u_int16_t rtag, u_int sack, u_int rdom) 3182 { 3183 struct mbuf *m; 3184 int len, tlen; 3185 struct ip *h; 3186 #ifdef INET6 3187 struct ip6_hdr *h6; 3188 #endif /* INET6 */ 3189 struct tcphdr *th; 3190 char *opt; 3191 3192 /* maximum segment size tcp option */ 3193 tlen = sizeof(struct tcphdr); 3194 if (mss) 3195 tlen += 4; 3196 if (sack) 3197 tlen += 2; 3198 3199 switch (af) { 3200 case AF_INET: 3201 len = sizeof(struct ip) + tlen; 3202 break; 3203 #ifdef INET6 3204 case AF_INET6: 3205 len = sizeof(struct ip6_hdr) + tlen; 3206 break; 3207 #endif /* INET6 */ 3208 default: 3209 unhandled_af(af); 3210 } 3211 3212 /* create outgoing mbuf */ 3213 m = m_gethdr(M_DONTWAIT, MT_HEADER); 3214 if (m == NULL) 3215 return (NULL); 3216 if (tag) 3217 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 3218 m->m_pkthdr.pf.tag = rtag; 3219 m->m_pkthdr.ph_rtableid = rdom; 3220 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 3221 m->m_pkthdr.pf.prio = r->set_prio[0]; 3222 if (r && r->qid) 3223 m->m_pkthdr.pf.qid = r->qid; 3224 m->m_data += max_linkhdr; 3225 m->m_pkthdr.len = m->m_len = len; 3226 m->m_pkthdr.ph_ifidx = 0; 3227 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 3228 memset(m->m_data, 0, len); 3229 switch (af) { 3230 case AF_INET: 3231 h = mtod(m, struct ip *); 3232 h->ip_p = IPPROTO_TCP; 3233 h->ip_len = htons(tlen); 3234 h->ip_v = 4; 3235 h->ip_hl = sizeof(*h) >> 2; 3236 h->ip_tos = IPTOS_LOWDELAY; 3237 h->ip_len = htons(len); 3238 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 3239 h->ip_ttl = ttl ? ttl : ip_defttl; 3240 h->ip_sum = 0; 3241 h->ip_src.s_addr = saddr->v4.s_addr; 3242 h->ip_dst.s_addr = daddr->v4.s_addr; 3243 3244 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 3245 break; 3246 #ifdef INET6 3247 case AF_INET6: 3248 h6 = mtod(m, struct ip6_hdr *); 3249 h6->ip6_nxt = IPPROTO_TCP; 3250 h6->ip6_plen = htons(tlen); 3251 h6->ip6_vfc |= IPV6_VERSION; 3252 h6->ip6_hlim = IPV6_DEFHLIM; 3253 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 3254 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 3255 3256 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 3257 break; 3258 #endif /* INET6 */ 3259 default: 3260 unhandled_af(af); 3261 } 3262 3263 /* TCP header */ 3264 th->th_sport = sport; 3265 th->th_dport = dport; 3266 th->th_seq = htonl(seq); 3267 th->th_ack = htonl(ack); 3268 th->th_off = tlen >> 2; 3269 th->th_flags = flags; 3270 th->th_win = htons(win); 3271 3272 opt = (char *)(th + 1); 3273 if (mss) { 3274 opt[0] = TCPOPT_MAXSEG; 3275 opt[1] = 4; 3276 mss = htons(mss); 3277 memcpy((opt + 2), &mss, 2); 3278 opt += 4; 3279 } 3280 if (sack) { 3281 opt[0] = TCPOPT_SACK_PERMITTED; 3282 opt[1] = 2; 3283 opt += 2; 3284 } 3285 3286 return (m); 3287 } 3288 3289 void 3290 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 3291 const struct pf_addr *saddr, const struct pf_addr *daddr, 3292 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 3293 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 3294 u_int16_t rtag, u_int rdom) 3295 { 3296 struct mbuf *m; 3297 3298 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 3299 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL) 3300 return; 3301 3302 switch (af) { 3303 case AF_INET: 3304 ip_send(m); 3305 break; 3306 #ifdef INET6 3307 case AF_INET6: 3308 ip6_send(m); 3309 break; 3310 #endif /* INET6 */ 3311 } 3312 } 3313 3314 static void 3315 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *st, 3316 struct pf_state_peer *src, struct pf_state_peer *dst) 3317 { 3318 /* 3319 * We are sending challenge ACK as a response to SYN packet, which 3320 * matches existing state (modulo TCP window check). Therefore packet 3321 * must be sent on behalf of destination. 3322 * 3323 * We expect sender to remain either silent, or send RST packet 3324 * so both, firewall and remote peer, can purge dead state from 3325 * memory. 3326 */ 3327 pf_send_tcp(st->rule.ptr, pd->af, pd->dst, pd->src, 3328 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 3329 src->seqlo, TH_ACK, 0, 0, st->rule.ptr->return_ttl, 1, 0, 3330 pd->rdomain); 3331 } 3332 3333 void 3334 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 3335 sa_family_t af, struct pf_rule *r, u_int rdomain) 3336 { 3337 struct mbuf *m0; 3338 3339 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 3340 return; 3341 3342 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 3343 m0->m_pkthdr.ph_rtableid = rdomain; 3344 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 3345 m0->m_pkthdr.pf.prio = r->set_prio[0]; 3346 if (r && r->qid) 3347 m0->m_pkthdr.pf.qid = r->qid; 3348 3349 switch (af) { 3350 case AF_INET: 3351 icmp_error(m0, type, code, 0, param); 3352 break; 3353 #ifdef INET6 3354 case AF_INET6: 3355 icmp6_error(m0, type, code, param); 3356 break; 3357 #endif /* INET6 */ 3358 } 3359 } 3360 3361 /* 3362 * Return ((n = 0) == (a = b [with mask m])) 3363 * Note: n != 0 => returns (a != b [with mask m]) 3364 */ 3365 int 3366 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 3367 struct pf_addr *b, sa_family_t af) 3368 { 3369 switch (af) { 3370 case AF_INET: 3371 if ((a->addr32[0] & m->addr32[0]) == 3372 (b->addr32[0] & m->addr32[0])) 3373 return (n == 0); 3374 break; 3375 #ifdef INET6 3376 case AF_INET6: 3377 if (((a->addr32[0] & m->addr32[0]) == 3378 (b->addr32[0] & m->addr32[0])) && 3379 ((a->addr32[1] & m->addr32[1]) == 3380 (b->addr32[1] & m->addr32[1])) && 3381 ((a->addr32[2] & m->addr32[2]) == 3382 (b->addr32[2] & m->addr32[2])) && 3383 ((a->addr32[3] & m->addr32[3]) == 3384 (b->addr32[3] & m->addr32[3]))) 3385 return (n == 0); 3386 break; 3387 #endif /* INET6 */ 3388 } 3389 3390 return (n != 0); 3391 } 3392 3393 /* 3394 * Return 1 if b <= a <= e, otherwise return 0. 3395 */ 3396 int 3397 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 3398 struct pf_addr *a, sa_family_t af) 3399 { 3400 switch (af) { 3401 case AF_INET: 3402 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 3403 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 3404 return (0); 3405 break; 3406 #ifdef INET6 3407 case AF_INET6: { 3408 int i; 3409 3410 /* check a >= b */ 3411 for (i = 0; i < 4; ++i) 3412 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 3413 break; 3414 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 3415 return (0); 3416 /* check a <= e */ 3417 for (i = 0; i < 4; ++i) 3418 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 3419 break; 3420 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3421 return (0); 3422 break; 3423 } 3424 #endif /* INET6 */ 3425 } 3426 return (1); 3427 } 3428 3429 int 3430 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3431 { 3432 switch (op) { 3433 case PF_OP_IRG: 3434 return ((p > a1) && (p < a2)); 3435 case PF_OP_XRG: 3436 return ((p < a1) || (p > a2)); 3437 case PF_OP_RRG: 3438 return ((p >= a1) && (p <= a2)); 3439 case PF_OP_EQ: 3440 return (p == a1); 3441 case PF_OP_NE: 3442 return (p != a1); 3443 case PF_OP_LT: 3444 return (p < a1); 3445 case PF_OP_LE: 3446 return (p <= a1); 3447 case PF_OP_GT: 3448 return (p > a1); 3449 case PF_OP_GE: 3450 return (p >= a1); 3451 } 3452 return (0); /* never reached */ 3453 } 3454 3455 int 3456 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3457 { 3458 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3459 } 3460 3461 int 3462 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3463 { 3464 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3465 return (0); 3466 return (pf_match(op, a1, a2, u)); 3467 } 3468 3469 int 3470 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3471 { 3472 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3473 return (0); 3474 return (pf_match(op, a1, a2, g)); 3475 } 3476 3477 int 3478 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3479 { 3480 if (*tag == -1) 3481 *tag = m->m_pkthdr.pf.tag; 3482 3483 return ((!r->match_tag_not && r->match_tag == *tag) || 3484 (r->match_tag_not && r->match_tag != *tag)); 3485 } 3486 3487 int 3488 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3489 { 3490 struct ifnet *ifp; 3491 #if NCARP > 0 3492 struct ifnet *ifp0; 3493 #endif 3494 struct pfi_kif *kif; 3495 3496 ifp = if_get(m->m_pkthdr.ph_ifidx); 3497 if (ifp == NULL) 3498 return (0); 3499 3500 #if NCARP > 0 3501 if (ifp->if_type == IFT_CARP && 3502 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 3503 kif = (struct pfi_kif *)ifp0->if_pf_kif; 3504 if_put(ifp0); 3505 } else 3506 #endif /* NCARP */ 3507 kif = (struct pfi_kif *)ifp->if_pf_kif; 3508 3509 if_put(ifp); 3510 3511 if (kif == NULL) { 3512 DPFPRINTF(LOG_ERR, 3513 "%s: kif == NULL, @%d via %s", __func__, 3514 r->nr, r->rcv_ifname); 3515 return (0); 3516 } 3517 3518 return (pfi_kif_match(r->rcv_kif, kif)); 3519 } 3520 3521 void 3522 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3523 { 3524 if (tag > 0) 3525 m->m_pkthdr.pf.tag = tag; 3526 if (rtableid >= 0) 3527 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3528 } 3529 3530 void 3531 pf_anchor_stack_init(void) 3532 { 3533 struct pf_anchor_stackframe *stack; 3534 3535 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3536 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = &stack[0]; 3537 cpumem_leave(pf_anchor_stack, stack); 3538 } 3539 3540 int 3541 pf_anchor_stack_is_full(struct pf_anchor_stackframe *sf) 3542 { 3543 struct pf_anchor_stackframe *stack; 3544 int rv; 3545 3546 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3547 rv = (sf == &stack[PF_ANCHOR_STACK_MAX]); 3548 cpumem_leave(pf_anchor_stack, stack); 3549 3550 return (rv); 3551 } 3552 3553 int 3554 pf_anchor_stack_is_empty(struct pf_anchor_stackframe *sf) 3555 { 3556 struct pf_anchor_stackframe *stack; 3557 int rv; 3558 3559 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3560 rv = (sf == &stack[0]); 3561 cpumem_leave(pf_anchor_stack, stack); 3562 3563 return (rv); 3564 } 3565 3566 struct pf_anchor_stackframe * 3567 pf_anchor_stack_top(void) 3568 { 3569 struct pf_anchor_stackframe *stack; 3570 struct pf_anchor_stackframe *top_sf; 3571 3572 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3573 top_sf = stack[PF_ANCHOR_STACK_MAX].sf_stack_top; 3574 cpumem_leave(pf_anchor_stack, stack); 3575 3576 return (top_sf); 3577 } 3578 3579 int 3580 pf_anchor_stack_push(struct pf_ruleset *rs, struct pf_rule *r, 3581 struct pf_anchor *child, int jump_target) 3582 { 3583 struct pf_anchor_stackframe *stack; 3584 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top(); 3585 3586 top_sf++; 3587 if (pf_anchor_stack_is_full(top_sf)) 3588 return (-1); 3589 3590 top_sf->sf_rs = rs; 3591 top_sf->sf_r = r; 3592 top_sf->sf_child = child; 3593 top_sf->sf_jump_target = jump_target; 3594 3595 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3596 3597 if ((top_sf <= &stack[0]) || (top_sf >= &stack[PF_ANCHOR_STACK_MAX])) 3598 panic("%s: top frame outside of anchor stack range", __func__); 3599 3600 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf; 3601 cpumem_leave(pf_anchor_stack, stack); 3602 3603 return (0); 3604 } 3605 3606 int 3607 pf_anchor_stack_pop(struct pf_ruleset **rs, struct pf_rule **r, 3608 struct pf_anchor **child, int *jump_target) 3609 { 3610 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top(); 3611 struct pf_anchor_stackframe *stack; 3612 int on_top; 3613 3614 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3615 if (pf_anchor_stack_is_empty(top_sf)) { 3616 on_top = -1; 3617 } else { 3618 if ((top_sf <= &stack[0]) || 3619 (top_sf >= &stack[PF_ANCHOR_STACK_MAX])) 3620 panic("%s: top frame outside of anchor stack range", 3621 __func__); 3622 3623 *rs = top_sf->sf_rs; 3624 *r = top_sf->sf_r; 3625 *child = top_sf->sf_child; 3626 *jump_target = top_sf->sf_jump_target; 3627 top_sf--; 3628 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf; 3629 on_top = 0; 3630 } 3631 cpumem_leave(pf_anchor_stack, stack); 3632 3633 return (on_top); 3634 } 3635 3636 void 3637 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3638 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3639 { 3640 switch (af) { 3641 case AF_INET: 3642 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3643 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3644 break; 3645 #ifdef INET6 3646 case AF_INET6: 3647 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3648 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3649 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3650 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3651 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3652 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3653 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3654 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3655 break; 3656 #endif /* INET6 */ 3657 default: 3658 unhandled_af(af); 3659 } 3660 } 3661 3662 void 3663 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3664 { 3665 switch (af) { 3666 case AF_INET: 3667 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3668 break; 3669 #ifdef INET6 3670 case AF_INET6: 3671 if (addr->addr32[3] == 0xffffffff) { 3672 addr->addr32[3] = 0; 3673 if (addr->addr32[2] == 0xffffffff) { 3674 addr->addr32[2] = 0; 3675 if (addr->addr32[1] == 0xffffffff) { 3676 addr->addr32[1] = 0; 3677 addr->addr32[0] = 3678 htonl(ntohl(addr->addr32[0]) + 1); 3679 } else 3680 addr->addr32[1] = 3681 htonl(ntohl(addr->addr32[1]) + 1); 3682 } else 3683 addr->addr32[2] = 3684 htonl(ntohl(addr->addr32[2]) + 1); 3685 } else 3686 addr->addr32[3] = 3687 htonl(ntohl(addr->addr32[3]) + 1); 3688 break; 3689 #endif /* INET6 */ 3690 default: 3691 unhandled_af(af); 3692 } 3693 } 3694 3695 int 3696 pf_socket_lookup(struct pf_pdesc *pd) 3697 { 3698 struct pf_addr *saddr, *daddr; 3699 u_int16_t sport, dport; 3700 struct inpcbtable *tb; 3701 struct inpcb *inp; 3702 3703 pd->lookup.uid = -1; 3704 pd->lookup.gid = -1; 3705 pd->lookup.pid = NO_PID; 3706 switch (pd->virtual_proto) { 3707 case IPPROTO_TCP: 3708 sport = pd->hdr.tcp.th_sport; 3709 dport = pd->hdr.tcp.th_dport; 3710 PF_ASSERT_LOCKED(); 3711 NET_ASSERT_LOCKED(); 3712 tb = &tcbtable; 3713 break; 3714 case IPPROTO_UDP: 3715 sport = pd->hdr.udp.uh_sport; 3716 dport = pd->hdr.udp.uh_dport; 3717 PF_ASSERT_LOCKED(); 3718 NET_ASSERT_LOCKED(); 3719 tb = &udbtable; 3720 break; 3721 default: 3722 return (-1); 3723 } 3724 if (pd->dir == PF_IN) { 3725 saddr = pd->src; 3726 daddr = pd->dst; 3727 } else { 3728 u_int16_t p; 3729 3730 p = sport; 3731 sport = dport; 3732 dport = p; 3733 saddr = pd->dst; 3734 daddr = pd->src; 3735 } 3736 switch (pd->af) { 3737 case AF_INET: 3738 /* 3739 * Fails when rtable is changed while evaluating the ruleset 3740 * The socket looked up will not match the one hit in the end. 3741 */ 3742 inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport, 3743 pd->rdomain); 3744 if (inp == NULL) { 3745 inp = in_pcblookup_listen(tb, daddr->v4, dport, 3746 NULL, pd->rdomain); 3747 if (inp == NULL) 3748 return (-1); 3749 } 3750 break; 3751 #ifdef INET6 3752 case AF_INET6: 3753 inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6, 3754 dport, pd->rdomain); 3755 if (inp == NULL) { 3756 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 3757 NULL, pd->rdomain); 3758 if (inp == NULL) 3759 return (-1); 3760 } 3761 break; 3762 #endif /* INET6 */ 3763 default: 3764 unhandled_af(pd->af); 3765 } 3766 pd->lookup.uid = inp->inp_socket->so_euid; 3767 pd->lookup.gid = inp->inp_socket->so_egid; 3768 pd->lookup.pid = inp->inp_socket->so_cpid; 3769 in_pcbunref(inp); 3770 return (1); 3771 } 3772 3773 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 3774 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 3775 * 3776 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 3777 */ 3778 u_int8_t* 3779 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 3780 u_int8_t min_typelen) 3781 { 3782 u_int8_t *eoh = opts + hlen; 3783 3784 if (min_typelen < 2) 3785 return (NULL); 3786 3787 while ((eoh - opt) >= min_typelen) { 3788 switch (*opt) { 3789 case TCPOPT_EOL: 3790 /* FALLTHROUGH - Workaround the failure of some 3791 systems to NOP-pad their bzero'd option buffers, 3792 producing spurious EOLs */ 3793 case TCPOPT_NOP: 3794 opt++; 3795 continue; 3796 default: 3797 if (opt[0] == type && 3798 opt[1] >= min_typelen) 3799 return (opt); 3800 } 3801 3802 opt += MAX(opt[1], 2); /* evade infinite loops */ 3803 } 3804 3805 return (NULL); 3806 } 3807 3808 u_int8_t 3809 pf_get_wscale(struct pf_pdesc *pd) 3810 { 3811 int olen; 3812 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3813 u_int8_t wscale = 0; 3814 3815 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3816 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 3817 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3818 return (0); 3819 3820 opt = opts; 3821 while ((opt = pf_find_tcpopt(opt, opts, olen, 3822 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 3823 wscale = opt[2]; 3824 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 3825 wscale |= PF_WSCALE_FLAG; 3826 3827 opt += opt[1]; 3828 } 3829 3830 return (wscale); 3831 } 3832 3833 u_int16_t 3834 pf_get_mss(struct pf_pdesc *pd) 3835 { 3836 int olen; 3837 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3838 u_int16_t mss = tcp_mssdflt; 3839 3840 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3841 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 3842 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3843 return (0); 3844 3845 opt = opts; 3846 while ((opt = pf_find_tcpopt(opt, opts, olen, 3847 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 3848 memcpy(&mss, (opt + 2), 2); 3849 mss = ntohs(mss); 3850 3851 opt += opt[1]; 3852 } 3853 return (mss); 3854 } 3855 3856 u_int16_t 3857 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3858 { 3859 struct ifnet *ifp; 3860 struct sockaddr_in *dst; 3861 #ifdef INET6 3862 struct sockaddr_in6 *dst6; 3863 #endif /* INET6 */ 3864 struct rtentry *rt = NULL; 3865 struct sockaddr_storage ss; 3866 int hlen; 3867 u_int16_t mss = tcp_mssdflt; 3868 3869 memset(&ss, 0, sizeof(ss)); 3870 3871 switch (af) { 3872 case AF_INET: 3873 hlen = sizeof(struct ip); 3874 dst = (struct sockaddr_in *)&ss; 3875 dst->sin_family = AF_INET; 3876 dst->sin_len = sizeof(*dst); 3877 dst->sin_addr = addr->v4; 3878 rt = rtalloc(sintosa(dst), 0, rtableid); 3879 break; 3880 #ifdef INET6 3881 case AF_INET6: 3882 hlen = sizeof(struct ip6_hdr); 3883 dst6 = (struct sockaddr_in6 *)&ss; 3884 dst6->sin6_family = AF_INET6; 3885 dst6->sin6_len = sizeof(*dst6); 3886 dst6->sin6_addr = addr->v6; 3887 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3888 break; 3889 #endif /* INET6 */ 3890 } 3891 3892 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3893 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3894 mss = max(tcp_mssdflt, mss); 3895 if_put(ifp); 3896 } 3897 rtfree(rt); 3898 mss = min(mss, offer); 3899 mss = max(mss, 64); /* sanity - at least max opt space */ 3900 return (mss); 3901 } 3902 3903 static __inline int 3904 pf_set_rt_ifp(struct pf_state *st, struct pf_addr *saddr, sa_family_t af, 3905 struct pf_src_node **sns) 3906 { 3907 struct pf_rule *r = st->rule.ptr; 3908 int rv; 3909 3910 if (!r->rt) 3911 return (0); 3912 3913 rv = pf_map_addr(af, r, saddr, &st->rt_addr, NULL, sns, 3914 &r->route, PF_SN_ROUTE); 3915 if (rv == 0) 3916 st->rt = r->rt; 3917 3918 return (rv); 3919 } 3920 3921 u_int32_t 3922 pf_tcp_iss(struct pf_pdesc *pd) 3923 { 3924 SHA2_CTX ctx; 3925 union { 3926 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3927 uint32_t words[1]; 3928 } digest; 3929 3930 if (pf_tcp_secret_init == 0) { 3931 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3932 SHA512Init(&pf_tcp_secret_ctx); 3933 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3934 sizeof(pf_tcp_secret)); 3935 pf_tcp_secret_init = 1; 3936 } 3937 ctx = pf_tcp_secret_ctx; 3938 3939 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3940 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 3941 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 3942 switch (pd->af) { 3943 case AF_INET: 3944 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3945 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3946 break; 3947 #ifdef INET6 3948 case AF_INET6: 3949 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3950 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3951 break; 3952 #endif /* INET6 */ 3953 } 3954 SHA512Final(digest.bytes, &ctx); 3955 pf_tcp_iss_off += 4096; 3956 return (digest.words[0] + READ_ONCE(tcp_iss) + pf_tcp_iss_off); 3957 } 3958 3959 void 3960 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3961 { 3962 if (r->qid) 3963 a->qid = r->qid; 3964 if (r->pqid) 3965 a->pqid = r->pqid; 3966 if (r->rtableid >= 0) 3967 a->rtableid = r->rtableid; 3968 #if NPFLOG > 0 3969 a->log |= r->log; 3970 #endif /* NPFLOG > 0 */ 3971 if (r->scrub_flags & PFSTATE_SETTOS) 3972 a->set_tos = r->set_tos; 3973 if (r->min_ttl) 3974 a->min_ttl = r->min_ttl; 3975 if (r->max_mss) 3976 a->max_mss = r->max_mss; 3977 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3978 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3979 if (r->scrub_flags & PFSTATE_SETPRIO) { 3980 a->set_prio[0] = r->set_prio[0]; 3981 a->set_prio[1] = r->set_prio[1]; 3982 } 3983 if (r->rule_flag & PFRULE_SETDELAY) 3984 a->delay = r->delay; 3985 } 3986 3987 #define PF_TEST_ATTRIB(t, a) \ 3988 if (t) { \ 3989 r = a; \ 3990 continue; \ 3991 } else do { \ 3992 } while (0) 3993 3994 enum pf_test_status 3995 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 3996 { 3997 struct pf_rule *r; 3998 struct pf_anchor *child = NULL; 3999 int target; 4000 4001 pf_anchor_stack_init(); 4002 enter_ruleset: 4003 r = TAILQ_FIRST(ruleset->rules.active.ptr); 4004 while (r != NULL) { 4005 PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED, 4006 TAILQ_NEXT(r, entries)); 4007 r->evaluations++; 4008 PF_TEST_ATTRIB( 4009 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 4010 r->skip[PF_SKIP_IFP].ptr); 4011 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 4012 r->skip[PF_SKIP_DIR].ptr); 4013 PF_TEST_ATTRIB((r->onrdomain >= 0 && 4014 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 4015 r->skip[PF_SKIP_RDOM].ptr); 4016 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 4017 r->skip[PF_SKIP_AF].ptr); 4018 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 4019 r->skip[PF_SKIP_PROTO].ptr); 4020 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 4021 ctx->pd->naf, r->src.neg, ctx->pd->kif, 4022 ctx->act.rtableid)), 4023 r->skip[PF_SKIP_SRC_ADDR].ptr); 4024 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 4025 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 4026 r->skip[PF_SKIP_DST_ADDR].ptr); 4027 4028 switch (ctx->pd->virtual_proto) { 4029 case PF_VPROTO_FRAGMENT: 4030 /* tcp/udp only. port_op always 0 in other cases */ 4031 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 4032 TAILQ_NEXT(r, entries)); 4033 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 4034 r->flagset), 4035 TAILQ_NEXT(r, entries)); 4036 /* icmp only. type/code always 0 in other cases */ 4037 PF_TEST_ATTRIB((r->type || r->code), 4038 TAILQ_NEXT(r, entries)); 4039 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 4040 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 4041 TAILQ_NEXT(r, entries)); 4042 break; 4043 4044 case IPPROTO_TCP: 4045 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 4046 r->flags), 4047 TAILQ_NEXT(r, entries)); 4048 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 4049 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 4050 r->os_fingerprint)), 4051 TAILQ_NEXT(r, entries)); 4052 /* FALLTHROUGH */ 4053 4054 case IPPROTO_UDP: 4055 /* tcp/udp only. port_op always 0 in other cases */ 4056 PF_TEST_ATTRIB((r->src.port_op && 4057 !pf_match_port(r->src.port_op, r->src.port[0], 4058 r->src.port[1], ctx->pd->nsport)), 4059 r->skip[PF_SKIP_SRC_PORT].ptr); 4060 PF_TEST_ATTRIB((r->dst.port_op && 4061 !pf_match_port(r->dst.port_op, r->dst.port[0], 4062 r->dst.port[1], ctx->pd->ndport)), 4063 r->skip[PF_SKIP_DST_PORT].ptr); 4064 /* tcp/udp only. uid.op always 0 in other cases */ 4065 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 4066 (ctx->pd->lookup.done = 4067 pf_socket_lookup(ctx->pd), 1)) && 4068 !pf_match_uid(r->uid.op, r->uid.uid[0], 4069 r->uid.uid[1], ctx->pd->lookup.uid)), 4070 TAILQ_NEXT(r, entries)); 4071 /* tcp/udp only. gid.op always 0 in other cases */ 4072 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 4073 (ctx->pd->lookup.done = 4074 pf_socket_lookup(ctx->pd), 1)) && 4075 !pf_match_gid(r->gid.op, r->gid.gid[0], 4076 r->gid.gid[1], ctx->pd->lookup.gid)), 4077 TAILQ_NEXT(r, entries)); 4078 break; 4079 4080 case IPPROTO_ICMP: 4081 /* icmp only. type always 0 in other cases */ 4082 PF_TEST_ATTRIB((r->type && 4083 r->type != ctx->icmptype + 1), 4084 TAILQ_NEXT(r, entries)); 4085 /* icmp only. type always 0 in other cases */ 4086 PF_TEST_ATTRIB((r->code && 4087 r->code != ctx->icmpcode + 1), 4088 TAILQ_NEXT(r, entries)); 4089 /* icmp only. don't create states on replies */ 4090 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 4091 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 4092 ctx->icmp_dir != PF_IN), 4093 TAILQ_NEXT(r, entries)); 4094 break; 4095 4096 case IPPROTO_ICMPV6: 4097 /* icmp only. type always 0 in other cases */ 4098 PF_TEST_ATTRIB((r->type && 4099 r->type != ctx->icmptype + 1), 4100 TAILQ_NEXT(r, entries)); 4101 /* icmp only. type always 0 in other cases */ 4102 PF_TEST_ATTRIB((r->code && 4103 r->code != ctx->icmpcode + 1), 4104 TAILQ_NEXT(r, entries)); 4105 /* icmp only. don't create states on replies */ 4106 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 4107 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 4108 ctx->icmp_dir != PF_IN && 4109 ctx->icmptype != ND_NEIGHBOR_ADVERT), 4110 TAILQ_NEXT(r, entries)); 4111 break; 4112 4113 default: 4114 break; 4115 } 4116 4117 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 4118 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 4119 TAILQ_NEXT(r, entries)); 4120 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 4121 TAILQ_NEXT(r, entries)); 4122 PF_TEST_ATTRIB((r->prob && 4123 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 4124 TAILQ_NEXT(r, entries)); 4125 PF_TEST_ATTRIB((r->match_tag && 4126 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 4127 TAILQ_NEXT(r, entries)); 4128 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 4129 r->rcvifnot), 4130 TAILQ_NEXT(r, entries)); 4131 PF_TEST_ATTRIB((r->prio && 4132 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 4133 ctx->pd->m->m_pkthdr.pf.prio), 4134 TAILQ_NEXT(r, entries)); 4135 4136 /* must be last! */ 4137 if (r->pktrate.limit) { 4138 pf_add_threshold(&r->pktrate); 4139 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 4140 TAILQ_NEXT(r, entries)); 4141 } 4142 4143 /* FALLTHROUGH */ 4144 if (r->tag) 4145 ctx->tag = r->tag; 4146 if (r->anchor == NULL) { 4147 4148 if (r->rule_flag & PFRULE_ONCE) { 4149 u_int32_t rule_flag; 4150 4151 rule_flag = r->rule_flag; 4152 if (((rule_flag & PFRULE_EXPIRED) == 0) && 4153 atomic_cas_uint(&r->rule_flag, rule_flag, 4154 rule_flag | PFRULE_EXPIRED) == rule_flag) { 4155 r->exptime = gettime(); 4156 } else { 4157 r = TAILQ_NEXT(r, entries); 4158 continue; 4159 } 4160 } 4161 4162 if (r->action == PF_MATCH) { 4163 if ((ctx->ri = pool_get(&pf_rule_item_pl, 4164 PR_NOWAIT)) == NULL) { 4165 REASON_SET(&ctx->reason, PFRES_MEMORY); 4166 return (PF_TEST_FAIL); 4167 } 4168 ctx->ri->r = r; 4169 /* order is irrelevant */ 4170 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 4171 ctx->ri = NULL; 4172 pf_rule_to_actions(r, &ctx->act); 4173 if (r->rule_flag & PFRULE_AFTO) 4174 ctx->pd->naf = r->naf; 4175 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 4176 &ctx->nr) == -1) { 4177 REASON_SET(&ctx->reason, 4178 PFRES_TRANSLATE); 4179 return (PF_TEST_FAIL); 4180 } 4181 #if NPFLOG > 0 4182 if (r->log) { 4183 REASON_SET(&ctx->reason, PFRES_MATCH); 4184 pflog_packet(ctx->pd, ctx->reason, r, 4185 ctx->a, ruleset, NULL); 4186 } 4187 #endif /* NPFLOG > 0 */ 4188 } else { 4189 /* 4190 * found matching r 4191 */ 4192 *ctx->rm = r; 4193 /* 4194 * anchor, with ruleset, where r belongs to 4195 */ 4196 *ctx->am = ctx->a; 4197 /* 4198 * ruleset where r belongs to 4199 */ 4200 *ctx->rsm = ruleset; 4201 /* 4202 * ruleset, where anchor belongs to. 4203 */ 4204 ctx->arsm = ctx->aruleset; 4205 } 4206 4207 #if NPFLOG > 0 4208 if (ctx->act.log & PF_LOG_MATCHES) 4209 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 4210 &ctx->rules); 4211 #endif /* NPFLOG > 0 */ 4212 4213 if (r->quick) 4214 return (PF_TEST_QUICK); 4215 } else { 4216 ctx->a = r; 4217 ctx->aruleset = &r->anchor->ruleset; 4218 if (r->anchor_wildcard) { 4219 RB_FOREACH(child, pf_anchor_node, 4220 &r->anchor->children) { 4221 if (pf_anchor_stack_push(ruleset, r, child, 4222 PF_NEXT_CHILD) != 0) 4223 return (PF_TEST_FAIL); 4224 4225 ruleset = &child->ruleset; 4226 goto enter_ruleset; 4227 next_child: 4228 continue; /* with RB_FOREACH() */ 4229 } 4230 } else { 4231 if (pf_anchor_stack_push(ruleset, r, child, 4232 PF_NEXT_RULE) != 0) 4233 return (PF_TEST_FAIL); 4234 4235 ruleset = &r->anchor->ruleset; 4236 child = NULL; 4237 goto enter_ruleset; 4238 next_rule: 4239 ; 4240 } 4241 } 4242 r = TAILQ_NEXT(r, entries); 4243 } 4244 4245 if (pf_anchor_stack_pop(&ruleset, &r, &child, &target) == 0) { 4246 /* stop if any rule matched within quick anchors. */ 4247 if (r->quick == PF_TEST_QUICK && *ctx->am == r) 4248 return (PF_TEST_QUICK); 4249 4250 switch (target) { 4251 case PF_NEXT_CHILD: 4252 goto next_child; 4253 case PF_NEXT_RULE: 4254 goto next_rule; 4255 default: 4256 panic("%s: unknown jump target", __func__); 4257 } 4258 } 4259 4260 return (PF_TEST_OK); 4261 } 4262 4263 int 4264 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 4265 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason, 4266 struct pfsync_deferral **pdeferral) 4267 { 4268 struct pf_rule *r = NULL; 4269 struct pf_rule *a = NULL; 4270 struct pf_ruleset *ruleset = NULL; 4271 struct pf_state_key *skw = NULL, *sks = NULL; 4272 int rewrite = 0; 4273 u_int16_t virtual_type, virtual_id; 4274 int action = PF_DROP; 4275 struct pf_test_ctx ctx; 4276 int rv; 4277 4278 PF_ASSERT_LOCKED(); 4279 4280 memset(&ctx, 0, sizeof(ctx)); 4281 ctx.pd = pd; 4282 ctx.rm = rm; 4283 ctx.am = am; 4284 ctx.rsm = rsm; 4285 ctx.th = &pd->hdr.tcp; 4286 ctx.act.rtableid = pd->rdomain; 4287 ctx.tag = -1; 4288 SLIST_INIT(&ctx.rules); 4289 4290 if (pd->dir == PF_IN && if_congested()) { 4291 REASON_SET(&ctx.reason, PFRES_CONGEST); 4292 return (PF_DROP); 4293 } 4294 4295 switch (pd->virtual_proto) { 4296 case IPPROTO_ICMP: 4297 ctx.icmptype = pd->hdr.icmp.icmp_type; 4298 ctx.icmpcode = pd->hdr.icmp.icmp_code; 4299 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 4300 &ctx.icmp_dir, &virtual_id, &virtual_type); 4301 if (ctx.icmp_dir == PF_IN) { 4302 pd->osport = pd->nsport = virtual_id; 4303 pd->odport = pd->ndport = virtual_type; 4304 } else { 4305 pd->osport = pd->nsport = virtual_type; 4306 pd->odport = pd->ndport = virtual_id; 4307 } 4308 break; 4309 #ifdef INET6 4310 case IPPROTO_ICMPV6: 4311 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 4312 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 4313 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 4314 &ctx.icmp_dir, &virtual_id, &virtual_type); 4315 if (ctx.icmp_dir == PF_IN) { 4316 pd->osport = pd->nsport = virtual_id; 4317 pd->odport = pd->ndport = virtual_type; 4318 } else { 4319 pd->osport = pd->nsport = virtual_type; 4320 pd->odport = pd->ndport = virtual_id; 4321 } 4322 break; 4323 #endif /* INET6 */ 4324 } 4325 4326 ruleset = &pf_main_ruleset; 4327 rv = pf_match_rule(&ctx, ruleset); 4328 if (rv == PF_TEST_FAIL) { 4329 /* 4330 * Reason has been set in pf_match_rule() already. 4331 */ 4332 goto cleanup; 4333 } 4334 4335 r = *ctx.rm; /* matching rule */ 4336 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 4337 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 4338 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 4339 4340 /* apply actions for last matching pass/block rule */ 4341 pf_rule_to_actions(r, &ctx.act); 4342 if (r->rule_flag & PFRULE_AFTO) 4343 pd->naf = r->naf; 4344 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 4345 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 4346 goto cleanup; 4347 } 4348 REASON_SET(&ctx.reason, PFRES_MATCH); 4349 4350 #if NPFLOG > 0 4351 if (r->log) 4352 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 4353 if (ctx.act.log & PF_LOG_MATCHES) 4354 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 4355 #endif /* NPFLOG > 0 */ 4356 4357 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 4358 (r->action == PF_DROP) && 4359 ((r->rule_flag & PFRULE_RETURNRST) || 4360 (r->rule_flag & PFRULE_RETURNICMP) || 4361 (r->rule_flag & PFRULE_RETURN))) { 4362 if (pd->proto == IPPROTO_TCP && 4363 ((r->rule_flag & PFRULE_RETURNRST) || 4364 (r->rule_flag & PFRULE_RETURN)) && 4365 !(ctx.th->th_flags & TH_RST)) { 4366 u_int32_t ack = 4367 ntohl(ctx.th->th_seq) + pd->p_len; 4368 4369 if (pf_check_tcp_cksum(pd->m, pd->off, 4370 pd->tot_len - pd->off, pd->af)) 4371 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 4372 else { 4373 if (ctx.th->th_flags & TH_SYN) 4374 ack++; 4375 if (ctx.th->th_flags & TH_FIN) 4376 ack++; 4377 pf_send_tcp(r, pd->af, pd->dst, 4378 pd->src, ctx.th->th_dport, 4379 ctx.th->th_sport, ntohl(ctx.th->th_ack), 4380 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 4381 1, 0, pd->rdomain); 4382 } 4383 } else if ((pd->proto != IPPROTO_ICMP || 4384 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 4385 r->return_icmp) 4386 pf_send_icmp(pd->m, r->return_icmp >> 8, 4387 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 4388 else if ((pd->proto != IPPROTO_ICMPV6 || 4389 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 4390 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 4391 r->return_icmp6) 4392 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 4393 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 4394 } 4395 4396 if (r->action == PF_DROP) 4397 goto cleanup; 4398 4399 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 4400 if (ctx.act.rtableid >= 0 && 4401 rtable_l2(ctx.act.rtableid) != pd->rdomain) 4402 pd->destchg = 1; 4403 4404 if (r->action == PF_PASS && pd->badopts != 0 && ! r->allow_opts) { 4405 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 4406 #if NPFLOG > 0 4407 pd->pflog |= PF_LOG_FORCE; 4408 #endif /* NPFLOG > 0 */ 4409 DPFPRINTF(LOG_NOTICE, "dropping packet with " 4410 "ip/ipv6 options in pf_test_rule()"); 4411 goto cleanup; 4412 } 4413 4414 action = PF_PASS; 4415 4416 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 4417 && !ctx.state_icmp && r->keep_state) { 4418 4419 if (r->rule_flag & PFRULE_SRCTRACK && 4420 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 4421 pd->af, pd->src, NULL, NULL) != 0) { 4422 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 4423 goto cleanup; 4424 } 4425 4426 if (r->max_states && (r->states_cur >= r->max_states)) { 4427 pf_status.lcounters[LCNT_STATES]++; 4428 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 4429 goto cleanup; 4430 } 4431 4432 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 4433 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); 4434 4435 if (action != PF_PASS) 4436 goto cleanup; 4437 if (sks != skw) { 4438 struct pf_state_key *sk; 4439 4440 if (pd->dir == PF_IN) 4441 sk = sks; 4442 else 4443 sk = skw; 4444 rewrite += pf_translate(pd, 4445 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 4446 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 4447 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 4448 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 4449 virtual_type, ctx.icmp_dir); 4450 } 4451 4452 #ifdef INET6 4453 if (rewrite && skw->af != sks->af) 4454 action = PF_AFRT; 4455 #endif /* INET6 */ 4456 4457 } else { 4458 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4459 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4460 pool_put(&pf_rule_item_pl, ctx.ri); 4461 } 4462 } 4463 4464 /* copy back packet headers if needed */ 4465 if (rewrite && pd->hdrlen) { 4466 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4467 } 4468 4469 #if NPFSYNC > 0 4470 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 4471 pd->dir == PF_OUT && pfsync_is_up()) { 4472 /* 4473 * We want the state created, but we dont 4474 * want to send this in case a partner 4475 * firewall has to know about it to allow 4476 * replies through it. 4477 */ 4478 if (pfsync_defer(*sm, pd->m, pdeferral)) 4479 return (PF_DEFER); 4480 } 4481 #endif /* NPFSYNC > 0 */ 4482 4483 return (action); 4484 4485 cleanup: 4486 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4487 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4488 pool_put(&pf_rule_item_pl, ctx.ri); 4489 } 4490 4491 return (action); 4492 } 4493 4494 static __inline int 4495 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 4496 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 4497 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 4498 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 4499 { 4500 struct pf_state *st = NULL; 4501 struct tcphdr *th = &pd->hdr.tcp; 4502 u_int16_t mss = tcp_mssdflt; 4503 u_short reason; 4504 u_int i; 4505 4506 st = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 4507 if (st == NULL) { 4508 REASON_SET(&reason, PFRES_MEMORY); 4509 goto csfailed; 4510 } 4511 st->rule.ptr = r; 4512 st->anchor.ptr = a; 4513 st->natrule.ptr = nr; 4514 if (r->allow_opts) 4515 st->state_flags |= PFSTATE_ALLOWOPTS; 4516 if (r->rule_flag & PFRULE_STATESLOPPY) 4517 st->state_flags |= PFSTATE_SLOPPY; 4518 if (r->rule_flag & PFRULE_PFLOW) 4519 st->state_flags |= PFSTATE_PFLOW; 4520 #if NPFLOG > 0 4521 st->log = act->log & PF_LOG_ALL; 4522 #endif /* NPFLOG > 0 */ 4523 st->qid = act->qid; 4524 st->pqid = act->pqid; 4525 st->rtableid[pd->didx] = act->rtableid; 4526 st->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 4527 st->min_ttl = act->min_ttl; 4528 st->set_tos = act->set_tos; 4529 st->max_mss = act->max_mss; 4530 st->state_flags |= act->flags; 4531 #if NPFSYNC > 0 4532 st->sync_state = PFSYNC_S_NONE; 4533 #endif /* NPFSYNC > 0 */ 4534 st->set_prio[0] = act->set_prio[0]; 4535 st->set_prio[1] = act->set_prio[1]; 4536 st->delay = act->delay; 4537 SLIST_INIT(&st->src_nodes); 4538 /* 4539 * must initialize refcnt, before pf_state_insert() gets called. 4540 * pf_state_inserts() grabs reference for pfsync! 4541 */ 4542 PF_REF_INIT(st->refcnt); 4543 mtx_init(&st->mtx, IPL_NET); 4544 4545 switch (pd->proto) { 4546 case IPPROTO_TCP: 4547 st->src.seqlo = ntohl(th->th_seq); 4548 st->src.seqhi = st->src.seqlo + pd->p_len + 1; 4549 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4550 r->keep_state == PF_STATE_MODULATE) { 4551 /* Generate sequence number modulator */ 4552 st->src.seqdiff = pf_tcp_iss(pd) - st->src.seqlo; 4553 if (st->src.seqdiff == 0) 4554 st->src.seqdiff = 1; 4555 pf_patch_32(pd, &th->th_seq, 4556 htonl(st->src.seqlo + st->src.seqdiff)); 4557 *rewrite = 1; 4558 } else 4559 st->src.seqdiff = 0; 4560 if (th->th_flags & TH_SYN) { 4561 st->src.seqhi++; 4562 st->src.wscale = pf_get_wscale(pd); 4563 } 4564 st->src.max_win = MAX(ntohs(th->th_win), 1); 4565 if (st->src.wscale & PF_WSCALE_MASK) { 4566 /* Remove scale factor from initial window */ 4567 int win = st->src.max_win; 4568 win += 1 << (st->src.wscale & PF_WSCALE_MASK); 4569 st->src.max_win = (win - 1) >> 4570 (st->src.wscale & PF_WSCALE_MASK); 4571 } 4572 if (th->th_flags & TH_FIN) 4573 st->src.seqhi++; 4574 st->dst.seqhi = 1; 4575 st->dst.max_win = 1; 4576 pf_set_protostate(st, PF_PEER_SRC, TCPS_SYN_SENT); 4577 pf_set_protostate(st, PF_PEER_DST, TCPS_CLOSED); 4578 st->timeout = PFTM_TCP_FIRST_PACKET; 4579 pf_status.states_halfopen++; 4580 break; 4581 case IPPROTO_UDP: 4582 pf_set_protostate(st, PF_PEER_SRC, PFUDPS_SINGLE); 4583 pf_set_protostate(st, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 4584 st->timeout = PFTM_UDP_FIRST_PACKET; 4585 break; 4586 case IPPROTO_ICMP: 4587 #ifdef INET6 4588 case IPPROTO_ICMPV6: 4589 #endif /* INET6 */ 4590 st->timeout = PFTM_ICMP_FIRST_PACKET; 4591 break; 4592 default: 4593 pf_set_protostate(st, PF_PEER_SRC, PFOTHERS_SINGLE); 4594 pf_set_protostate(st, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 4595 st->timeout = PFTM_OTHER_FIRST_PACKET; 4596 } 4597 4598 st->creation = getuptime(); 4599 st->expire = getuptime(); 4600 4601 if (pd->proto == IPPROTO_TCP) { 4602 if (st->state_flags & PFSTATE_SCRUB_TCP && 4603 pf_normalize_tcp_init(pd, &st->src)) { 4604 REASON_SET(&reason, PFRES_MEMORY); 4605 goto csfailed; 4606 } 4607 if (st->state_flags & PFSTATE_SCRUB_TCP && st->src.scrub && 4608 pf_normalize_tcp_stateful(pd, &reason, st, 4609 &st->src, &st->dst, rewrite)) { 4610 /* This really shouldn't happen!!! */ 4611 DPFPRINTF(LOG_ERR, 4612 "%s: tcp normalize failed on first pkt", __func__); 4613 goto csfailed; 4614 } 4615 } 4616 st->direction = pd->dir; 4617 4618 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 4619 REASON_SET(&reason, PFRES_MEMORY); 4620 goto csfailed; 4621 } 4622 4623 if (pf_set_rt_ifp(st, pd->src, (*skw)->af, sns) != 0) { 4624 REASON_SET(&reason, PFRES_NOROUTE); 4625 goto csfailed; 4626 } 4627 4628 for (i = 0; i < PF_SN_MAX; i++) 4629 if (sns[i] != NULL) { 4630 struct pf_sn_item *sni; 4631 4632 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 4633 if (sni == NULL) { 4634 REASON_SET(&reason, PFRES_MEMORY); 4635 goto csfailed; 4636 } 4637 sni->sn = sns[i]; 4638 SLIST_INSERT_HEAD(&st->src_nodes, sni, next); 4639 sni->sn->states++; 4640 } 4641 4642 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, st)) { 4643 *sks = *skw = NULL; 4644 REASON_SET(&reason, PFRES_STATEINS); 4645 goto csfailed; 4646 } else 4647 *sm = st; 4648 4649 /* 4650 * Make state responsible for rules it binds here. 4651 */ 4652 memcpy(&st->match_rules, rules, sizeof(st->match_rules)); 4653 memset(rules, 0, sizeof(*rules)); 4654 STATE_INC_COUNTERS(st); 4655 4656 if (tag > 0) { 4657 pf_tag_ref(tag); 4658 st->tag = tag; 4659 } 4660 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4661 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { 4662 int rtid = pd->rdomain; 4663 if (act->rtableid >= 0) 4664 rtid = act->rtableid; 4665 pf_set_protostate(st, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 4666 st->src.seqhi = arc4random(); 4667 /* Find mss option */ 4668 mss = pf_get_mss(pd); 4669 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 4670 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 4671 st->src.mss = mss; 4672 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4673 th->th_sport, st->src.seqhi, ntohl(th->th_seq) + 1, 4674 TH_SYN|TH_ACK, 0, st->src.mss, 0, 1, 0, pd->rdomain); 4675 REASON_SET(&reason, PFRES_SYNPROXY); 4676 return (PF_SYNPROXY_DROP); 4677 } 4678 4679 return (PF_PASS); 4680 4681 csfailed: 4682 if (st) { 4683 pf_normalize_tcp_cleanup(st); /* safe even w/o init */ 4684 pf_src_tree_remove_state(st); 4685 pool_put(&pf_state_pl, st); 4686 } 4687 4688 for (i = 0; i < PF_SN_MAX; i++) 4689 if (sns[i] != NULL) 4690 pf_remove_src_node(sns[i]); 4691 4692 return (PF_DROP); 4693 } 4694 4695 int 4696 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4697 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4698 int icmp_dir) 4699 { 4700 int rewrite = 0; 4701 int afto = pd->af != pd->naf; 4702 4703 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4704 pd->destchg = 1; 4705 4706 switch (pd->proto) { 4707 case IPPROTO_TCP: /* FALLTHROUGH */ 4708 case IPPROTO_UDP: 4709 rewrite += pf_patch_16(pd, pd->sport, sport); 4710 rewrite += pf_patch_16(pd, pd->dport, dport); 4711 break; 4712 4713 case IPPROTO_ICMP: 4714 if (pd->af != AF_INET) 4715 return (0); 4716 4717 #ifdef INET6 4718 if (afto) { 4719 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 4720 return (0); 4721 pd->proto = IPPROTO_ICMPV6; 4722 rewrite = 1; 4723 } 4724 #endif /* INET6 */ 4725 if (virtual_type == htons(ICMP_ECHO)) { 4726 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4727 rewrite += pf_patch_16(pd, 4728 &pd->hdr.icmp.icmp_id, icmpid); 4729 } 4730 break; 4731 4732 #ifdef INET6 4733 case IPPROTO_ICMPV6: 4734 if (pd->af != AF_INET6) 4735 return (0); 4736 4737 if (afto) { 4738 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 4739 return (0); 4740 pd->proto = IPPROTO_ICMP; 4741 rewrite = 1; 4742 } 4743 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4744 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4745 rewrite += pf_patch_16(pd, 4746 &pd->hdr.icmp6.icmp6_id, icmpid); 4747 } 4748 break; 4749 #endif /* INET6 */ 4750 } 4751 4752 if (!afto) { 4753 rewrite += pf_translate_a(pd, pd->src, saddr); 4754 rewrite += pf_translate_a(pd, pd->dst, daddr); 4755 } 4756 4757 return (rewrite); 4758 } 4759 4760 int 4761 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason, 4762 int *copyback, int reverse) 4763 { 4764 struct tcphdr *th = &pd->hdr.tcp; 4765 struct pf_state_peer *src, *dst; 4766 u_int16_t win = ntohs(th->th_win); 4767 u_int32_t ack, end, data_end, seq, orig_seq; 4768 u_int8_t sws, dws, psrc, pdst; 4769 int ackskew; 4770 4771 if ((pd->dir == (*stp)->direction && !reverse) || 4772 (pd->dir != (*stp)->direction && reverse)) { 4773 src = &(*stp)->src; 4774 dst = &(*stp)->dst; 4775 psrc = PF_PEER_SRC; 4776 pdst = PF_PEER_DST; 4777 } else { 4778 src = &(*stp)->dst; 4779 dst = &(*stp)->src; 4780 psrc = PF_PEER_DST; 4781 pdst = PF_PEER_SRC; 4782 } 4783 4784 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4785 sws = src->wscale & PF_WSCALE_MASK; 4786 dws = dst->wscale & PF_WSCALE_MASK; 4787 } else 4788 sws = dws = 0; 4789 4790 /* 4791 * Sequence tracking algorithm from Guido van Rooij's paper: 4792 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4793 * tcp_filtering.ps 4794 */ 4795 4796 orig_seq = seq = ntohl(th->th_seq); 4797 if (src->seqlo == 0) { 4798 /* First packet from this end. Set its state */ 4799 4800 if (((*stp)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4801 src->scrub == NULL) { 4802 if (pf_normalize_tcp_init(pd, src)) { 4803 REASON_SET(reason, PFRES_MEMORY); 4804 return (PF_DROP); 4805 } 4806 } 4807 4808 /* Deferred generation of sequence number modulator */ 4809 if (dst->seqdiff && !src->seqdiff) { 4810 /* use random iss for the TCP server */ 4811 while ((src->seqdiff = arc4random() - seq) == 0) 4812 continue; 4813 ack = ntohl(th->th_ack) - dst->seqdiff; 4814 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4815 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4816 *copyback = 1; 4817 } else { 4818 ack = ntohl(th->th_ack); 4819 } 4820 4821 end = seq + pd->p_len; 4822 if (th->th_flags & TH_SYN) { 4823 end++; 4824 if (dst->wscale & PF_WSCALE_FLAG) { 4825 src->wscale = pf_get_wscale(pd); 4826 if (src->wscale & PF_WSCALE_FLAG) { 4827 /* Remove scale factor from initial 4828 * window */ 4829 sws = src->wscale & PF_WSCALE_MASK; 4830 win = ((u_int32_t)win + (1 << sws) - 1) 4831 >> sws; 4832 dws = dst->wscale & PF_WSCALE_MASK; 4833 } else { 4834 /* fixup other window */ 4835 dst->max_win = MIN(TCP_MAXWIN, 4836 (u_int32_t)dst->max_win << 4837 (dst->wscale & PF_WSCALE_MASK)); 4838 /* in case of a retrans SYN|ACK */ 4839 dst->wscale = 0; 4840 } 4841 } 4842 } 4843 data_end = end; 4844 if (th->th_flags & TH_FIN) 4845 end++; 4846 4847 src->seqlo = seq; 4848 if (src->state < TCPS_SYN_SENT) 4849 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT); 4850 4851 /* 4852 * May need to slide the window (seqhi may have been set by 4853 * the crappy stack check or if we picked up the connection 4854 * after establishment) 4855 */ 4856 if (src->seqhi == 1 || 4857 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4858 src->seqhi = end + MAX(1, dst->max_win << dws); 4859 if (win > src->max_win) 4860 src->max_win = win; 4861 4862 } else { 4863 ack = ntohl(th->th_ack) - dst->seqdiff; 4864 if (src->seqdiff) { 4865 /* Modulate sequence numbers */ 4866 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4867 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4868 *copyback = 1; 4869 } 4870 end = seq + pd->p_len; 4871 if (th->th_flags & TH_SYN) 4872 end++; 4873 data_end = end; 4874 if (th->th_flags & TH_FIN) 4875 end++; 4876 } 4877 4878 if ((th->th_flags & TH_ACK) == 0) { 4879 /* Let it pass through the ack skew check */ 4880 ack = dst->seqlo; 4881 } else if ((ack == 0 && 4882 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4883 /* broken tcp stacks do not set ack */ 4884 (dst->state < TCPS_SYN_SENT)) { 4885 /* 4886 * Many stacks (ours included) will set the ACK number in an 4887 * FIN|ACK if the SYN times out -- no sequence to ACK. 4888 */ 4889 ack = dst->seqlo; 4890 } 4891 4892 if (seq == end) { 4893 /* Ease sequencing restrictions on no data packets */ 4894 seq = src->seqlo; 4895 data_end = end = seq; 4896 } 4897 4898 ackskew = dst->seqlo - ack; 4899 4900 4901 /* 4902 * Need to demodulate the sequence numbers in any TCP SACK options 4903 * (Selective ACK). We could optionally validate the SACK values 4904 * against the current ACK window, either forwards or backwards, but 4905 * I'm not confident that SACK has been implemented properly 4906 * everywhere. It wouldn't surprise me if several stacks accidently 4907 * SACK too far backwards of previously ACKed data. There really aren't 4908 * any security implications of bad SACKing unless the target stack 4909 * doesn't validate the option length correctly. Someone trying to 4910 * spoof into a TCP connection won't bother blindly sending SACK 4911 * options anyway. 4912 */ 4913 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4914 if (pf_modulate_sack(pd, dst)) 4915 *copyback = 1; 4916 } 4917 4918 4919 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4920 if (SEQ_GEQ(src->seqhi, data_end) && 4921 /* Last octet inside other's window space */ 4922 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4923 /* Retrans: not more than one window back */ 4924 (ackskew >= -MAXACKWINDOW) && 4925 /* Acking not more than one reassembled fragment backwards */ 4926 (ackskew <= (MAXACKWINDOW << sws)) && 4927 /* Acking not more than one window forward */ 4928 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4929 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4930 /* Require an exact/+1 sequence match on resets when possible */ 4931 4932 if (dst->scrub || src->scrub) { 4933 if (pf_normalize_tcp_stateful(pd, reason, *stp, src, 4934 dst, copyback)) 4935 return (PF_DROP); 4936 } 4937 4938 /* update max window */ 4939 if (src->max_win < win) 4940 src->max_win = win; 4941 /* synchronize sequencing */ 4942 if (SEQ_GT(end, src->seqlo)) 4943 src->seqlo = end; 4944 /* slide the window of what the other end can send */ 4945 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4946 dst->seqhi = ack + MAX((win << sws), 1); 4947 4948 /* update states */ 4949 if (th->th_flags & TH_SYN) 4950 if (src->state < TCPS_SYN_SENT) 4951 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT); 4952 if (th->th_flags & TH_FIN) 4953 if (src->state < TCPS_CLOSING) 4954 pf_set_protostate(*stp, psrc, TCPS_CLOSING); 4955 if (th->th_flags & TH_ACK) { 4956 if (dst->state == TCPS_SYN_SENT) { 4957 pf_set_protostate(*stp, pdst, 4958 TCPS_ESTABLISHED); 4959 if (src->state == TCPS_ESTABLISHED && 4960 !SLIST_EMPTY(&(*stp)->src_nodes) && 4961 pf_src_connlimit(stp)) { 4962 REASON_SET(reason, PFRES_SRCLIMIT); 4963 return (PF_DROP); 4964 } 4965 } else if (dst->state == TCPS_CLOSING) 4966 pf_set_protostate(*stp, pdst, 4967 TCPS_FIN_WAIT_2); 4968 } 4969 if (th->th_flags & TH_RST) 4970 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT); 4971 4972 /* update expire time */ 4973 (*stp)->expire = getuptime(); 4974 if (src->state >= TCPS_FIN_WAIT_2 && 4975 dst->state >= TCPS_FIN_WAIT_2) 4976 (*stp)->timeout = PFTM_TCP_CLOSED; 4977 else if (src->state >= TCPS_CLOSING && 4978 dst->state >= TCPS_CLOSING) 4979 (*stp)->timeout = PFTM_TCP_FIN_WAIT; 4980 else if (src->state < TCPS_ESTABLISHED || 4981 dst->state < TCPS_ESTABLISHED) 4982 (*stp)->timeout = PFTM_TCP_OPENING; 4983 else if (src->state >= TCPS_CLOSING || 4984 dst->state >= TCPS_CLOSING) 4985 (*stp)->timeout = PFTM_TCP_CLOSING; 4986 else 4987 (*stp)->timeout = PFTM_TCP_ESTABLISHED; 4988 4989 /* Fall through to PASS packet */ 4990 } else if ((dst->state < TCPS_SYN_SENT || 4991 dst->state >= TCPS_FIN_WAIT_2 || 4992 src->state >= TCPS_FIN_WAIT_2) && 4993 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4994 /* Within a window forward of the originating packet */ 4995 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4996 /* Within a window backward of the originating packet */ 4997 4998 /* 4999 * This currently handles three situations: 5000 * 1) Stupid stacks will shotgun SYNs before their peer 5001 * replies. 5002 * 2) When PF catches an already established stream (the 5003 * firewall rebooted, the state table was flushed, routes 5004 * changed...) 5005 * 3) Packets get funky immediately after the connection 5006 * closes (this should catch Solaris spurious ACK|FINs 5007 * that web servers like to spew after a close) 5008 * 5009 * This must be a little more careful than the above code 5010 * since packet floods will also be caught here. We don't 5011 * update the TTL here to mitigate the damage of a packet 5012 * flood and so the same code can handle awkward establishment 5013 * and a loosened connection close. 5014 * In the establishment case, a correct peer response will 5015 * validate the connection, go through the normal state code 5016 * and keep updating the state TTL. 5017 */ 5018 5019 if (pf_status.debug >= LOG_NOTICE) { 5020 log(LOG_NOTICE, "pf: loose state match: "); 5021 pf_print_state(*stp); 5022 pf_print_flags(th->th_flags); 5023 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 5024 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 5025 pd->p_len, ackskew, (*stp)->packets[0], 5026 (*stp)->packets[1], 5027 pd->dir == PF_IN ? "in" : "out", 5028 pd->dir == (*stp)->direction ? "fwd" : "rev"); 5029 } 5030 5031 if (dst->scrub || src->scrub) { 5032 if (pf_normalize_tcp_stateful(pd, reason, *stp, src, 5033 dst, copyback)) 5034 return (PF_DROP); 5035 } 5036 5037 /* update max window */ 5038 if (src->max_win < win) 5039 src->max_win = win; 5040 /* synchronize sequencing */ 5041 if (SEQ_GT(end, src->seqlo)) 5042 src->seqlo = end; 5043 /* slide the window of what the other end can send */ 5044 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 5045 dst->seqhi = ack + MAX((win << sws), 1); 5046 5047 /* 5048 * Cannot set dst->seqhi here since this could be a shotgunned 5049 * SYN and not an already established connection. 5050 */ 5051 if (th->th_flags & TH_FIN) 5052 if (src->state < TCPS_CLOSING) 5053 pf_set_protostate(*stp, psrc, TCPS_CLOSING); 5054 if (th->th_flags & TH_RST) 5055 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT); 5056 5057 /* Fall through to PASS packet */ 5058 } else { 5059 if ((*stp)->dst.state == TCPS_SYN_SENT && 5060 (*stp)->src.state == TCPS_SYN_SENT) { 5061 /* Send RST for state mismatches during handshake */ 5062 if (!(th->th_flags & TH_RST)) 5063 pf_send_tcp((*stp)->rule.ptr, pd->af, 5064 pd->dst, pd->src, th->th_dport, 5065 th->th_sport, ntohl(th->th_ack), 0, 5066 TH_RST, 0, 0, 5067 (*stp)->rule.ptr->return_ttl, 1, 0, 5068 pd->rdomain); 5069 src->seqlo = 0; 5070 src->seqhi = 1; 5071 src->max_win = 1; 5072 } else if (pf_status.debug >= LOG_NOTICE) { 5073 log(LOG_NOTICE, "pf: BAD state: "); 5074 pf_print_state(*stp); 5075 pf_print_flags(th->th_flags); 5076 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 5077 "pkts=%llu:%llu dir=%s,%s\n", 5078 seq, orig_seq, ack, pd->p_len, ackskew, 5079 (*stp)->packets[0], (*stp)->packets[1], 5080 pd->dir == PF_IN ? "in" : "out", 5081 pd->dir == (*stp)->direction ? "fwd" : "rev"); 5082 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 5083 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 5084 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 5085 ' ': '2', 5086 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 5087 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 5088 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 5089 ' ' :'5', 5090 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 5091 } 5092 REASON_SET(reason, PFRES_BADSTATE); 5093 return (PF_DROP); 5094 } 5095 5096 return (PF_PASS); 5097 } 5098 5099 int 5100 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **stp, 5101 u_short *reason) 5102 { 5103 struct tcphdr *th = &pd->hdr.tcp; 5104 struct pf_state_peer *src, *dst; 5105 u_int8_t psrc, pdst; 5106 5107 if (pd->dir == (*stp)->direction) { 5108 src = &(*stp)->src; 5109 dst = &(*stp)->dst; 5110 psrc = PF_PEER_SRC; 5111 pdst = PF_PEER_DST; 5112 } else { 5113 src = &(*stp)->dst; 5114 dst = &(*stp)->src; 5115 psrc = PF_PEER_DST; 5116 pdst = PF_PEER_SRC; 5117 } 5118 5119 if (th->th_flags & TH_SYN) 5120 if (src->state < TCPS_SYN_SENT) 5121 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT); 5122 if (th->th_flags & TH_FIN) 5123 if (src->state < TCPS_CLOSING) 5124 pf_set_protostate(*stp, psrc, TCPS_CLOSING); 5125 if (th->th_flags & TH_ACK) { 5126 if (dst->state == TCPS_SYN_SENT) { 5127 pf_set_protostate(*stp, pdst, TCPS_ESTABLISHED); 5128 if (src->state == TCPS_ESTABLISHED && 5129 !SLIST_EMPTY(&(*stp)->src_nodes) && 5130 pf_src_connlimit(stp)) { 5131 REASON_SET(reason, PFRES_SRCLIMIT); 5132 return (PF_DROP); 5133 } 5134 } else if (dst->state == TCPS_CLOSING) { 5135 pf_set_protostate(*stp, pdst, TCPS_FIN_WAIT_2); 5136 } else if (src->state == TCPS_SYN_SENT && 5137 dst->state < TCPS_SYN_SENT) { 5138 /* 5139 * Handle a special sloppy case where we only see one 5140 * half of the connection. If there is a ACK after 5141 * the initial SYN without ever seeing a packet from 5142 * the destination, set the connection to established. 5143 */ 5144 pf_set_protostate(*stp, PF_PEER_BOTH, 5145 TCPS_ESTABLISHED); 5146 if (!SLIST_EMPTY(&(*stp)->src_nodes) && 5147 pf_src_connlimit(stp)) { 5148 REASON_SET(reason, PFRES_SRCLIMIT); 5149 return (PF_DROP); 5150 } 5151 } else if (src->state == TCPS_CLOSING && 5152 dst->state == TCPS_ESTABLISHED && 5153 dst->seqlo == 0) { 5154 /* 5155 * Handle the closing of half connections where we 5156 * don't see the full bidirectional FIN/ACK+ACK 5157 * handshake. 5158 */ 5159 pf_set_protostate(*stp, pdst, TCPS_CLOSING); 5160 } 5161 } 5162 if (th->th_flags & TH_RST) 5163 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT); 5164 5165 /* update expire time */ 5166 (*stp)->expire = getuptime(); 5167 if (src->state >= TCPS_FIN_WAIT_2 && 5168 dst->state >= TCPS_FIN_WAIT_2) 5169 (*stp)->timeout = PFTM_TCP_CLOSED; 5170 else if (src->state >= TCPS_CLOSING && 5171 dst->state >= TCPS_CLOSING) 5172 (*stp)->timeout = PFTM_TCP_FIN_WAIT; 5173 else if (src->state < TCPS_ESTABLISHED || 5174 dst->state < TCPS_ESTABLISHED) 5175 (*stp)->timeout = PFTM_TCP_OPENING; 5176 else if (src->state >= TCPS_CLOSING || 5177 dst->state >= TCPS_CLOSING) 5178 (*stp)->timeout = PFTM_TCP_CLOSING; 5179 else 5180 (*stp)->timeout = PFTM_TCP_ESTABLISHED; 5181 5182 return (PF_PASS); 5183 } 5184 5185 static __inline int 5186 pf_synproxy(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason) 5187 { 5188 struct pf_state_key *sk = (*stp)->key[pd->didx]; 5189 5190 if ((*stp)->src.state == PF_TCPS_PROXY_SRC) { 5191 struct tcphdr *th = &pd->hdr.tcp; 5192 5193 if (pd->dir != (*stp)->direction) { 5194 REASON_SET(reason, PFRES_SYNPROXY); 5195 return (PF_SYNPROXY_DROP); 5196 } 5197 if (th->th_flags & TH_SYN) { 5198 if (ntohl(th->th_seq) != (*stp)->src.seqlo) { 5199 REASON_SET(reason, PFRES_SYNPROXY); 5200 return (PF_DROP); 5201 } 5202 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst, 5203 pd->src, th->th_dport, th->th_sport, 5204 (*stp)->src.seqhi, ntohl(th->th_seq) + 1, 5205 TH_SYN|TH_ACK, 0, (*stp)->src.mss, 0, 1, 5206 0, pd->rdomain); 5207 REASON_SET(reason, PFRES_SYNPROXY); 5208 return (PF_SYNPROXY_DROP); 5209 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 5210 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) || 5211 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) { 5212 REASON_SET(reason, PFRES_SYNPROXY); 5213 return (PF_DROP); 5214 } else if (!SLIST_EMPTY(&(*stp)->src_nodes) && 5215 pf_src_connlimit(stp)) { 5216 REASON_SET(reason, PFRES_SRCLIMIT); 5217 return (PF_DROP); 5218 } else 5219 pf_set_protostate(*stp, PF_PEER_SRC, 5220 PF_TCPS_PROXY_DST); 5221 } 5222 if ((*stp)->src.state == PF_TCPS_PROXY_DST) { 5223 struct tcphdr *th = &pd->hdr.tcp; 5224 5225 if (pd->dir == (*stp)->direction) { 5226 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 5227 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) || 5228 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) { 5229 REASON_SET(reason, PFRES_SYNPROXY); 5230 return (PF_DROP); 5231 } 5232 (*stp)->src.max_win = MAX(ntohs(th->th_win), 1); 5233 if ((*stp)->dst.seqhi == 1) 5234 (*stp)->dst.seqhi = arc4random(); 5235 pf_send_tcp((*stp)->rule.ptr, pd->af, 5236 &sk->addr[pd->sidx], &sk->addr[pd->didx], 5237 sk->port[pd->sidx], sk->port[pd->didx], 5238 (*stp)->dst.seqhi, 0, TH_SYN, 0, 5239 (*stp)->src.mss, 0, 0, (*stp)->tag, 5240 sk->rdomain); 5241 REASON_SET(reason, PFRES_SYNPROXY); 5242 return (PF_SYNPROXY_DROP); 5243 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 5244 (TH_SYN|TH_ACK)) || 5245 (ntohl(th->th_ack) != (*stp)->dst.seqhi + 1)) { 5246 REASON_SET(reason, PFRES_SYNPROXY); 5247 return (PF_DROP); 5248 } else { 5249 (*stp)->dst.max_win = MAX(ntohs(th->th_win), 1); 5250 (*stp)->dst.seqlo = ntohl(th->th_seq); 5251 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst, 5252 pd->src, th->th_dport, th->th_sport, 5253 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 5254 TH_ACK, (*stp)->src.max_win, 0, 0, 0, 5255 (*stp)->tag, pd->rdomain); 5256 pf_send_tcp((*stp)->rule.ptr, pd->af, 5257 &sk->addr[pd->sidx], &sk->addr[pd->didx], 5258 sk->port[pd->sidx], sk->port[pd->didx], 5259 (*stp)->src.seqhi + 1, (*stp)->src.seqlo + 1, 5260 TH_ACK, (*stp)->dst.max_win, 0, 0, 1, 5261 0, sk->rdomain); 5262 (*stp)->src.seqdiff = (*stp)->dst.seqhi - 5263 (*stp)->src.seqlo; 5264 (*stp)->dst.seqdiff = (*stp)->src.seqhi - 5265 (*stp)->dst.seqlo; 5266 (*stp)->src.seqhi = (*stp)->src.seqlo + 5267 (*stp)->dst.max_win; 5268 (*stp)->dst.seqhi = (*stp)->dst.seqlo + 5269 (*stp)->src.max_win; 5270 (*stp)->src.wscale = (*stp)->dst.wscale = 0; 5271 pf_set_protostate(*stp, PF_PEER_BOTH, 5272 TCPS_ESTABLISHED); 5273 REASON_SET(reason, PFRES_SYNPROXY); 5274 return (PF_SYNPROXY_DROP); 5275 } 5276 } 5277 return (PF_PASS); 5278 } 5279 5280 int 5281 pf_test_state(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason) 5282 { 5283 int copyback = 0; 5284 struct pf_state_peer *src, *dst; 5285 int action; 5286 struct inpcb *inp = pd->m->m_pkthdr.pf.inp; 5287 u_int8_t psrc, pdst; 5288 5289 action = PF_PASS; 5290 if (pd->dir == (*stp)->direction) { 5291 src = &(*stp)->src; 5292 dst = &(*stp)->dst; 5293 psrc = PF_PEER_SRC; 5294 pdst = PF_PEER_DST; 5295 } else { 5296 src = &(*stp)->dst; 5297 dst = &(*stp)->src; 5298 psrc = PF_PEER_DST; 5299 pdst = PF_PEER_SRC; 5300 } 5301 5302 switch (pd->virtual_proto) { 5303 case IPPROTO_TCP: 5304 if ((action = pf_synproxy(pd, stp, reason)) != PF_PASS) 5305 return (action); 5306 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 5307 5308 if (dst->state >= TCPS_FIN_WAIT_2 && 5309 src->state >= TCPS_FIN_WAIT_2) { 5310 if (pf_status.debug >= LOG_NOTICE) { 5311 log(LOG_NOTICE, "pf: state reuse "); 5312 pf_print_state(*stp); 5313 pf_print_flags(pd->hdr.tcp.th_flags); 5314 addlog("\n"); 5315 } 5316 /* XXX make sure it's the same direction ?? */ 5317 (*stp)->timeout = PFTM_PURGE; 5318 pf_state_unref(*stp); 5319 *stp = NULL; 5320 pf_mbuf_link_inpcb(pd->m, inp); 5321 return (PF_DROP); 5322 } else if (dst->state >= TCPS_ESTABLISHED && 5323 src->state >= TCPS_ESTABLISHED) { 5324 /* 5325 * SYN matches existing state??? 5326 * Typically happens when sender boots up after 5327 * sudden panic. Certain protocols (NFSv3) are 5328 * always using same port numbers. Challenge 5329 * ACK enables all parties (firewall and peers) 5330 * to get in sync again. 5331 */ 5332 pf_send_challenge_ack(pd, *stp, src, dst); 5333 return (PF_DROP); 5334 } 5335 } 5336 5337 if ((*stp)->state_flags & PFSTATE_SLOPPY) { 5338 if (pf_tcp_track_sloppy(pd, stp, reason) == PF_DROP) 5339 return (PF_DROP); 5340 } else { 5341 if (pf_tcp_track_full(pd, stp, reason, ©back, 5342 PF_REVERSED_KEY((*stp)->key, pd->af)) == PF_DROP) 5343 return (PF_DROP); 5344 } 5345 break; 5346 case IPPROTO_UDP: 5347 /* update states */ 5348 if (src->state < PFUDPS_SINGLE) 5349 pf_set_protostate(*stp, psrc, PFUDPS_SINGLE); 5350 if (dst->state == PFUDPS_SINGLE) 5351 pf_set_protostate(*stp, pdst, PFUDPS_MULTIPLE); 5352 5353 /* update expire time */ 5354 (*stp)->expire = getuptime(); 5355 if (src->state == PFUDPS_MULTIPLE && 5356 dst->state == PFUDPS_MULTIPLE) 5357 (*stp)->timeout = PFTM_UDP_MULTIPLE; 5358 else 5359 (*stp)->timeout = PFTM_UDP_SINGLE; 5360 break; 5361 default: 5362 /* update states */ 5363 if (src->state < PFOTHERS_SINGLE) 5364 pf_set_protostate(*stp, psrc, PFOTHERS_SINGLE); 5365 if (dst->state == PFOTHERS_SINGLE) 5366 pf_set_protostate(*stp, pdst, PFOTHERS_MULTIPLE); 5367 5368 /* update expire time */ 5369 (*stp)->expire = getuptime(); 5370 if (src->state == PFOTHERS_MULTIPLE && 5371 dst->state == PFOTHERS_MULTIPLE) 5372 (*stp)->timeout = PFTM_OTHER_MULTIPLE; 5373 else 5374 (*stp)->timeout = PFTM_OTHER_SINGLE; 5375 break; 5376 } 5377 5378 /* translate source/destination address, if necessary */ 5379 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) { 5380 struct pf_state_key *nk; 5381 int afto, sidx, didx; 5382 5383 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 5384 nk = (*stp)->key[pd->sidx]; 5385 else 5386 nk = (*stp)->key[pd->didx]; 5387 5388 afto = pd->af != nk->af; 5389 sidx = afto ? pd->didx : pd->sidx; 5390 didx = afto ? pd->sidx : pd->didx; 5391 5392 #ifdef INET6 5393 if (afto) { 5394 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 5395 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 5396 pd->naf = nk->af; 5397 action = PF_AFRT; 5398 } 5399 #endif /* INET6 */ 5400 5401 if (!afto) 5402 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5403 5404 if (pd->sport != NULL) 5405 pf_patch_16(pd, pd->sport, nk->port[sidx]); 5406 5407 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 5408 pd->rdomain != nk->rdomain) 5409 pd->destchg = 1; 5410 5411 if (!afto) 5412 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5413 5414 if (pd->dport != NULL) 5415 pf_patch_16(pd, pd->dport, nk->port[didx]); 5416 5417 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5418 copyback = 1; 5419 } 5420 5421 if (copyback && pd->hdrlen > 0) { 5422 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5423 } 5424 5425 return (action); 5426 } 5427 5428 int 5429 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 5430 struct pf_state **stp, u_int16_t icmpid, u_int16_t type, 5431 int icmp_dir, int *iidx, int multi, int inner) 5432 { 5433 int direction, action; 5434 5435 key->af = pd->af; 5436 key->proto = pd->proto; 5437 key->rdomain = pd->rdomain; 5438 if (icmp_dir == PF_IN) { 5439 *iidx = pd->sidx; 5440 key->port[pd->sidx] = icmpid; 5441 key->port[pd->didx] = type; 5442 } else { 5443 *iidx = pd->didx; 5444 key->port[pd->sidx] = type; 5445 key->port[pd->didx] = icmpid; 5446 } 5447 5448 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 5449 pd->dst, pd->af, multi)) 5450 return (PF_DROP); 5451 5452 key->hash = pf_pkt_hash(key->af, key->proto, 5453 &key->addr[0], &key->addr[1], 0, 0); 5454 5455 action = pf_find_state(pd, key, stp); 5456 if (action != PF_MATCH) 5457 return (action); 5458 5459 if ((*stp)->state_flags & PFSTATE_SLOPPY) 5460 return (-1); 5461 5462 /* Is this ICMP message flowing in right direction? */ 5463 if ((*stp)->key[PF_SK_WIRE]->af != (*stp)->key[PF_SK_STACK]->af) 5464 direction = (pd->af == (*stp)->key[PF_SK_WIRE]->af) ? 5465 PF_IN : PF_OUT; 5466 else 5467 direction = (*stp)->direction; 5468 if ((((!inner && direction == pd->dir) || 5469 (inner && direction != pd->dir)) ? 5470 PF_IN : PF_OUT) != icmp_dir) { 5471 if (pf_status.debug >= LOG_NOTICE) { 5472 log(LOG_NOTICE, 5473 "pf: icmp type %d in wrong direction (%d): ", 5474 ntohs(type), icmp_dir); 5475 pf_print_state(*stp); 5476 addlog("\n"); 5477 } 5478 return (PF_DROP); 5479 } 5480 return (-1); 5481 } 5482 5483 int 5484 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **stp, 5485 u_short *reason) 5486 { 5487 u_int16_t virtual_id, virtual_type; 5488 u_int8_t icmptype, icmpcode; 5489 int icmp_dir, iidx, ret, copyback = 0; 5490 5491 struct pf_state_key_cmp key; 5492 5493 switch (pd->proto) { 5494 case IPPROTO_ICMP: 5495 icmptype = pd->hdr.icmp.icmp_type; 5496 icmpcode = pd->hdr.icmp.icmp_code; 5497 break; 5498 #ifdef INET6 5499 case IPPROTO_ICMPV6: 5500 icmptype = pd->hdr.icmp6.icmp6_type; 5501 icmpcode = pd->hdr.icmp6.icmp6_code; 5502 break; 5503 #endif /* INET6 */ 5504 default: 5505 panic("unhandled proto %d", pd->proto); 5506 } 5507 5508 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 5509 &virtual_type) == 0) { 5510 /* 5511 * ICMP query/reply message not related to a TCP/UDP packet. 5512 * Search for an ICMP state. 5513 */ 5514 ret = pf_icmp_state_lookup(pd, &key, stp, 5515 virtual_id, virtual_type, icmp_dir, &iidx, 5516 0, 0); 5517 /* IPv6? try matching a multicast address */ 5518 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 5519 ret = pf_icmp_state_lookup(pd, &key, stp, virtual_id, 5520 virtual_type, icmp_dir, &iidx, 1, 0); 5521 if (ret >= 0) 5522 return (ret); 5523 5524 (*stp)->expire = getuptime(); 5525 (*stp)->timeout = PFTM_ICMP_ERROR_REPLY; 5526 5527 /* translate source/destination address, if necessary */ 5528 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) { 5529 struct pf_state_key *nk; 5530 int afto, sidx, didx; 5531 5532 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 5533 nk = (*stp)->key[pd->sidx]; 5534 else 5535 nk = (*stp)->key[pd->didx]; 5536 5537 afto = pd->af != nk->af; 5538 sidx = afto ? pd->didx : pd->sidx; 5539 didx = afto ? pd->sidx : pd->didx; 5540 iidx = afto ? !iidx : iidx; 5541 #ifdef INET6 5542 if (afto) { 5543 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 5544 nk->af); 5545 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 5546 nk->af); 5547 pd->naf = nk->af; 5548 } 5549 #endif /* INET6 */ 5550 if (!afto) { 5551 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5552 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5553 } 5554 5555 if (pd->rdomain != nk->rdomain) 5556 pd->destchg = 1; 5557 if (!afto && PF_ANEQ(pd->dst, 5558 &nk->addr[didx], pd->af)) 5559 pd->destchg = 1; 5560 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5561 5562 switch (pd->af) { 5563 case AF_INET: 5564 #ifdef INET6 5565 if (afto) { 5566 if (pf_translate_icmp_af(pd, AF_INET6, 5567 &pd->hdr.icmp)) 5568 return (PF_DROP); 5569 pd->proto = IPPROTO_ICMPV6; 5570 } 5571 #endif /* INET6 */ 5572 pf_patch_16(pd, 5573 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 5574 5575 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5576 &pd->hdr.icmp, M_NOWAIT); 5577 copyback = 1; 5578 break; 5579 #ifdef INET6 5580 case AF_INET6: 5581 if (afto) { 5582 if (pf_translate_icmp_af(pd, AF_INET, 5583 &pd->hdr.icmp6)) 5584 return (PF_DROP); 5585 pd->proto = IPPROTO_ICMP; 5586 } 5587 5588 pf_patch_16(pd, 5589 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 5590 5591 m_copyback(pd->m, pd->off, 5592 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5593 M_NOWAIT); 5594 copyback = 1; 5595 break; 5596 #endif /* INET6 */ 5597 } 5598 #ifdef INET6 5599 if (afto) 5600 return (PF_AFRT); 5601 #endif /* INET6 */ 5602 } 5603 } else { 5604 /* 5605 * ICMP error message in response to a TCP/UDP packet. 5606 * Extract the inner TCP/UDP header and search for that state. 5607 */ 5608 struct pf_pdesc pd2; 5609 struct ip h2; 5610 #ifdef INET6 5611 struct ip6_hdr h2_6; 5612 #endif /* INET6 */ 5613 int ipoff2; 5614 5615 /* Initialize pd2 fields valid for both packets with pd. */ 5616 memset(&pd2, 0, sizeof(pd2)); 5617 pd2.af = pd->af; 5618 pd2.dir = pd->dir; 5619 pd2.kif = pd->kif; 5620 pd2.m = pd->m; 5621 pd2.rdomain = pd->rdomain; 5622 /* Payload packet is from the opposite direction. */ 5623 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 5624 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 5625 switch (pd->af) { 5626 case AF_INET: 5627 /* offset of h2 in mbuf chain */ 5628 ipoff2 = pd->off + ICMP_MINLEN; 5629 5630 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 5631 NULL, reason, pd2.af)) { 5632 DPFPRINTF(LOG_NOTICE, 5633 "ICMP error message too short (ip)"); 5634 return (PF_DROP); 5635 } 5636 /* 5637 * ICMP error messages don't refer to non-first 5638 * fragments 5639 */ 5640 if (h2.ip_off & htons(IP_OFFMASK)) { 5641 REASON_SET(reason, PFRES_FRAG); 5642 return (PF_DROP); 5643 } 5644 5645 /* offset of protocol header that follows h2 */ 5646 pd2.off = ipoff2; 5647 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 5648 return (PF_DROP); 5649 5650 pd2.tot_len = ntohs(h2.ip_len); 5651 pd2.src = (struct pf_addr *)&h2.ip_src; 5652 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5653 break; 5654 #ifdef INET6 5655 case AF_INET6: 5656 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 5657 5658 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 5659 NULL, reason, pd2.af)) { 5660 DPFPRINTF(LOG_NOTICE, 5661 "ICMP error message too short (ip6)"); 5662 return (PF_DROP); 5663 } 5664 5665 pd2.off = ipoff2; 5666 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 5667 return (PF_DROP); 5668 5669 pd2.tot_len = ntohs(h2_6.ip6_plen) + 5670 sizeof(struct ip6_hdr); 5671 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5672 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5673 break; 5674 #endif /* INET6 */ 5675 default: 5676 unhandled_af(pd->af); 5677 } 5678 5679 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 5680 if (pf_status.debug >= LOG_NOTICE) { 5681 log(LOG_NOTICE, 5682 "pf: BAD ICMP %d:%d outer dst: ", 5683 icmptype, icmpcode); 5684 pf_print_host(pd->src, 0, pd->af); 5685 addlog(" -> "); 5686 pf_print_host(pd->dst, 0, pd->af); 5687 addlog(" inner src: "); 5688 pf_print_host(pd2.src, 0, pd2.af); 5689 addlog(" -> "); 5690 pf_print_host(pd2.dst, 0, pd2.af); 5691 addlog("\n"); 5692 } 5693 REASON_SET(reason, PFRES_BADSTATE); 5694 return (PF_DROP); 5695 } 5696 5697 switch (pd2.proto) { 5698 case IPPROTO_TCP: { 5699 struct tcphdr *th = &pd2.hdr.tcp; 5700 u_int32_t seq; 5701 struct pf_state_peer *src, *dst; 5702 u_int8_t dws; 5703 int action; 5704 5705 /* 5706 * Only the first 8 bytes of the TCP header can be 5707 * expected. Don't access any TCP header fields after 5708 * th_seq, an ackskew test is not possible. 5709 */ 5710 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason, 5711 pd2.af)) { 5712 DPFPRINTF(LOG_NOTICE, 5713 "ICMP error message too short (tcp)"); 5714 return (PF_DROP); 5715 } 5716 5717 key.af = pd2.af; 5718 key.proto = IPPROTO_TCP; 5719 key.rdomain = pd2.rdomain; 5720 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5721 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5722 key.port[pd2.sidx] = th->th_sport; 5723 key.port[pd2.didx] = th->th_dport; 5724 key.hash = pf_pkt_hash(pd2.af, pd2.proto, 5725 pd2.src, pd2.dst, th->th_sport, th->th_dport); 5726 5727 action = pf_find_state(&pd2, &key, stp); 5728 if (action != PF_MATCH) 5729 return (action); 5730 5731 if (pd2.dir == (*stp)->direction) { 5732 if (PF_REVERSED_KEY((*stp)->key, pd->af)) { 5733 src = &(*stp)->src; 5734 dst = &(*stp)->dst; 5735 } else { 5736 src = &(*stp)->dst; 5737 dst = &(*stp)->src; 5738 } 5739 } else { 5740 if (PF_REVERSED_KEY((*stp)->key, pd->af)) { 5741 src = &(*stp)->dst; 5742 dst = &(*stp)->src; 5743 } else { 5744 src = &(*stp)->src; 5745 dst = &(*stp)->dst; 5746 } 5747 } 5748 5749 if (src->wscale && dst->wscale) 5750 dws = dst->wscale & PF_WSCALE_MASK; 5751 else 5752 dws = 0; 5753 5754 /* Demodulate sequence number */ 5755 seq = ntohl(th->th_seq) - src->seqdiff; 5756 if (src->seqdiff) { 5757 pf_patch_32(pd, &th->th_seq, htonl(seq)); 5758 copyback = 1; 5759 } 5760 5761 if (!((*stp)->state_flags & PFSTATE_SLOPPY) && 5762 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5763 src->seqlo - (dst->max_win << dws)))) { 5764 if (pf_status.debug >= LOG_NOTICE) { 5765 log(LOG_NOTICE, 5766 "pf: BAD ICMP %d:%d ", 5767 icmptype, icmpcode); 5768 pf_print_host(pd->src, 0, pd->af); 5769 addlog(" -> "); 5770 pf_print_host(pd->dst, 0, pd->af); 5771 addlog(" state: "); 5772 pf_print_state(*stp); 5773 addlog(" seq=%u\n", seq); 5774 } 5775 REASON_SET(reason, PFRES_BADSTATE); 5776 return (PF_DROP); 5777 } else { 5778 if (pf_status.debug >= LOG_DEBUG) { 5779 log(LOG_DEBUG, 5780 "pf: OK ICMP %d:%d ", 5781 icmptype, icmpcode); 5782 pf_print_host(pd->src, 0, pd->af); 5783 addlog(" -> "); 5784 pf_print_host(pd->dst, 0, pd->af); 5785 addlog(" state: "); 5786 pf_print_state(*stp); 5787 addlog(" seq=%u\n", seq); 5788 } 5789 } 5790 5791 /* translate source/destination address, if necessary */ 5792 if ((*stp)->key[PF_SK_WIRE] != 5793 (*stp)->key[PF_SK_STACK]) { 5794 struct pf_state_key *nk; 5795 int afto, sidx, didx; 5796 5797 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 5798 nk = (*stp)->key[pd->sidx]; 5799 else 5800 nk = (*stp)->key[pd->didx]; 5801 5802 afto = pd->af != nk->af; 5803 sidx = afto ? pd2.didx : pd2.sidx; 5804 didx = afto ? pd2.sidx : pd2.didx; 5805 5806 #ifdef INET6 5807 if (afto) { 5808 if (pf_translate_icmp_af(pd, nk->af, 5809 &pd->hdr.icmp)) 5810 return (PF_DROP); 5811 m_copyback(pd->m, pd->off, 5812 sizeof(struct icmp6_hdr), 5813 &pd->hdr.icmp6, M_NOWAIT); 5814 if (pf_change_icmp_af(pd->m, ipoff2, 5815 pd, &pd2, &nk->addr[sidx], 5816 &nk->addr[didx], pd->af, nk->af)) 5817 return (PF_DROP); 5818 if (nk->af == AF_INET) 5819 pd->proto = IPPROTO_ICMP; 5820 else 5821 pd->proto = IPPROTO_ICMPV6; 5822 pd->m->m_pkthdr.ph_rtableid = 5823 nk->rdomain; 5824 pd->destchg = 1; 5825 pf_addrcpy(&pd->nsaddr, 5826 &nk->addr[pd2.sidx], nk->af); 5827 pf_addrcpy(&pd->ndaddr, 5828 &nk->addr[pd2.didx], nk->af); 5829 pd->naf = nk->af; 5830 5831 pf_patch_16(pd, 5832 &th->th_sport, nk->port[sidx]); 5833 pf_patch_16(pd, 5834 &th->th_dport, nk->port[didx]); 5835 5836 m_copyback(pd2.m, pd2.off, 8, th, 5837 M_NOWAIT); 5838 return (PF_AFRT); 5839 } 5840 #endif /* INET6 */ 5841 if (PF_ANEQ(pd2.src, 5842 &nk->addr[pd2.sidx], pd2.af) || 5843 nk->port[pd2.sidx] != th->th_sport) 5844 pf_translate_icmp(pd, pd2.src, 5845 &th->th_sport, pd->dst, 5846 &nk->addr[pd2.sidx], 5847 nk->port[pd2.sidx]); 5848 5849 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5850 pd2.af) || pd2.rdomain != nk->rdomain) 5851 pd->destchg = 1; 5852 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5853 5854 if (PF_ANEQ(pd2.dst, 5855 &nk->addr[pd2.didx], pd2.af) || 5856 nk->port[pd2.didx] != th->th_dport) 5857 pf_translate_icmp(pd, pd2.dst, 5858 &th->th_dport, pd->src, 5859 &nk->addr[pd2.didx], 5860 nk->port[pd2.didx]); 5861 copyback = 1; 5862 } 5863 5864 if (copyback) { 5865 switch (pd2.af) { 5866 case AF_INET: 5867 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5868 &pd->hdr.icmp, M_NOWAIT); 5869 m_copyback(pd2.m, ipoff2, sizeof(h2), 5870 &h2, M_NOWAIT); 5871 break; 5872 #ifdef INET6 5873 case AF_INET6: 5874 m_copyback(pd->m, pd->off, 5875 sizeof(struct icmp6_hdr), 5876 &pd->hdr.icmp6, M_NOWAIT); 5877 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5878 &h2_6, M_NOWAIT); 5879 break; 5880 #endif /* INET6 */ 5881 } 5882 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 5883 } 5884 break; 5885 } 5886 case IPPROTO_UDP: { 5887 struct udphdr *uh = &pd2.hdr.udp; 5888 int action; 5889 5890 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 5891 NULL, reason, pd2.af)) { 5892 DPFPRINTF(LOG_NOTICE, 5893 "ICMP error message too short (udp)"); 5894 return (PF_DROP); 5895 } 5896 5897 key.af = pd2.af; 5898 key.proto = IPPROTO_UDP; 5899 key.rdomain = pd2.rdomain; 5900 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5901 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5902 key.port[pd2.sidx] = uh->uh_sport; 5903 key.port[pd2.didx] = uh->uh_dport; 5904 key.hash = pf_pkt_hash(pd2.af, pd2.proto, 5905 pd2.src, pd2.dst, uh->uh_sport, uh->uh_dport); 5906 5907 action = pf_find_state(&pd2, &key, stp); 5908 if (action != PF_MATCH) 5909 return (action); 5910 5911 /* translate source/destination address, if necessary */ 5912 if ((*stp)->key[PF_SK_WIRE] != 5913 (*stp)->key[PF_SK_STACK]) { 5914 struct pf_state_key *nk; 5915 int afto, sidx, didx; 5916 5917 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 5918 nk = (*stp)->key[pd->sidx]; 5919 else 5920 nk = (*stp)->key[pd->didx]; 5921 5922 afto = pd->af != nk->af; 5923 sidx = afto ? pd2.didx : pd2.sidx; 5924 didx = afto ? pd2.sidx : pd2.didx; 5925 5926 #ifdef INET6 5927 if (afto) { 5928 if (pf_translate_icmp_af(pd, nk->af, 5929 &pd->hdr.icmp)) 5930 return (PF_DROP); 5931 m_copyback(pd->m, pd->off, 5932 sizeof(struct icmp6_hdr), 5933 &pd->hdr.icmp6, M_NOWAIT); 5934 if (pf_change_icmp_af(pd->m, ipoff2, 5935 pd, &pd2, &nk->addr[sidx], 5936 &nk->addr[didx], pd->af, nk->af)) 5937 return (PF_DROP); 5938 if (nk->af == AF_INET) 5939 pd->proto = IPPROTO_ICMP; 5940 else 5941 pd->proto = IPPROTO_ICMPV6; 5942 pd->m->m_pkthdr.ph_rtableid = 5943 nk->rdomain; 5944 pd->destchg = 1; 5945 pf_addrcpy(&pd->nsaddr, 5946 &nk->addr[pd2.sidx], nk->af); 5947 pf_addrcpy(&pd->ndaddr, 5948 &nk->addr[pd2.didx], nk->af); 5949 pd->naf = nk->af; 5950 5951 pf_patch_16(pd, 5952 &uh->uh_sport, nk->port[sidx]); 5953 pf_patch_16(pd, 5954 &uh->uh_dport, nk->port[didx]); 5955 5956 m_copyback(pd2.m, pd2.off, sizeof(*uh), 5957 uh, M_NOWAIT); 5958 return (PF_AFRT); 5959 } 5960 #endif /* INET6 */ 5961 5962 if (PF_ANEQ(pd2.src, 5963 &nk->addr[pd2.sidx], pd2.af) || 5964 nk->port[pd2.sidx] != uh->uh_sport) 5965 pf_translate_icmp(pd, pd2.src, 5966 &uh->uh_sport, pd->dst, 5967 &nk->addr[pd2.sidx], 5968 nk->port[pd2.sidx]); 5969 5970 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5971 pd2.af) || pd2.rdomain != nk->rdomain) 5972 pd->destchg = 1; 5973 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5974 5975 if (PF_ANEQ(pd2.dst, 5976 &nk->addr[pd2.didx], pd2.af) || 5977 nk->port[pd2.didx] != uh->uh_dport) 5978 pf_translate_icmp(pd, pd2.dst, 5979 &uh->uh_dport, pd->src, 5980 &nk->addr[pd2.didx], 5981 nk->port[pd2.didx]); 5982 5983 switch (pd2.af) { 5984 case AF_INET: 5985 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5986 &pd->hdr.icmp, M_NOWAIT); 5987 m_copyback(pd2.m, ipoff2, sizeof(h2), 5988 &h2, M_NOWAIT); 5989 break; 5990 #ifdef INET6 5991 case AF_INET6: 5992 m_copyback(pd->m, pd->off, 5993 sizeof(struct icmp6_hdr), 5994 &pd->hdr.icmp6, M_NOWAIT); 5995 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5996 &h2_6, M_NOWAIT); 5997 break; 5998 #endif /* INET6 */ 5999 } 6000 /* Avoid recomputing quoted UDP checksum. 6001 * note: udp6 0 csum invalid per rfc2460 p27. 6002 * but presumed nothing cares in this context */ 6003 pf_patch_16(pd, &uh->uh_sum, 0); 6004 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 6005 M_NOWAIT); 6006 copyback = 1; 6007 } 6008 break; 6009 } 6010 case IPPROTO_ICMP: { 6011 struct icmp *iih = &pd2.hdr.icmp; 6012 6013 if (pd2.af != AF_INET) { 6014 REASON_SET(reason, PFRES_NORM); 6015 return (PF_DROP); 6016 } 6017 6018 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 6019 NULL, reason, pd2.af)) { 6020 DPFPRINTF(LOG_NOTICE, 6021 "ICMP error message too short (icmp)"); 6022 return (PF_DROP); 6023 } 6024 6025 pf_icmp_mapping(&pd2, iih->icmp_type, 6026 &icmp_dir, &virtual_id, &virtual_type); 6027 6028 ret = pf_icmp_state_lookup(&pd2, &key, stp, 6029 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 6030 if (ret >= 0) 6031 return (ret); 6032 6033 /* translate source/destination address, if necessary */ 6034 if ((*stp)->key[PF_SK_WIRE] != 6035 (*stp)->key[PF_SK_STACK]) { 6036 struct pf_state_key *nk; 6037 int afto, sidx, didx; 6038 6039 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6040 nk = (*stp)->key[pd->sidx]; 6041 else 6042 nk = (*stp)->key[pd->didx]; 6043 6044 afto = pd->af != nk->af; 6045 sidx = afto ? pd2.didx : pd2.sidx; 6046 didx = afto ? pd2.sidx : pd2.didx; 6047 iidx = afto ? !iidx : iidx; 6048 6049 #ifdef INET6 6050 if (afto) { 6051 if (nk->af != AF_INET6) 6052 return (PF_DROP); 6053 if (pf_translate_icmp_af(pd, nk->af, 6054 &pd->hdr.icmp)) 6055 return (PF_DROP); 6056 m_copyback(pd->m, pd->off, 6057 sizeof(struct icmp6_hdr), 6058 &pd->hdr.icmp6, M_NOWAIT); 6059 if (pf_change_icmp_af(pd->m, ipoff2, 6060 pd, &pd2, &nk->addr[sidx], 6061 &nk->addr[didx], pd->af, nk->af)) 6062 return (PF_DROP); 6063 pd->proto = IPPROTO_ICMPV6; 6064 if (pf_translate_icmp_af(pd, 6065 nk->af, iih)) 6066 return (PF_DROP); 6067 if (virtual_type == htons(ICMP_ECHO)) 6068 pf_patch_16(pd, &iih->icmp_id, 6069 nk->port[iidx]); 6070 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 6071 iih, M_NOWAIT); 6072 pd->m->m_pkthdr.ph_rtableid = 6073 nk->rdomain; 6074 pd->destchg = 1; 6075 pf_addrcpy(&pd->nsaddr, 6076 &nk->addr[pd2.sidx], nk->af); 6077 pf_addrcpy(&pd->ndaddr, 6078 &nk->addr[pd2.didx], nk->af); 6079 pd->naf = nk->af; 6080 return (PF_AFRT); 6081 } 6082 #endif /* INET6 */ 6083 6084 if (PF_ANEQ(pd2.src, 6085 &nk->addr[pd2.sidx], pd2.af) || 6086 (virtual_type == htons(ICMP_ECHO) && 6087 nk->port[iidx] != iih->icmp_id)) 6088 pf_translate_icmp(pd, pd2.src, 6089 (virtual_type == htons(ICMP_ECHO)) ? 6090 &iih->icmp_id : NULL, 6091 pd->dst, &nk->addr[pd2.sidx], 6092 (virtual_type == htons(ICMP_ECHO)) ? 6093 nk->port[iidx] : 0); 6094 6095 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6096 pd2.af) || pd2.rdomain != nk->rdomain) 6097 pd->destchg = 1; 6098 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6099 6100 if (PF_ANEQ(pd2.dst, 6101 &nk->addr[pd2.didx], pd2.af)) 6102 pf_translate_icmp(pd, pd2.dst, NULL, 6103 pd->src, &nk->addr[pd2.didx], 0); 6104 6105 m_copyback(pd->m, pd->off, ICMP_MINLEN, 6106 &pd->hdr.icmp, M_NOWAIT); 6107 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 6108 M_NOWAIT); 6109 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 6110 M_NOWAIT); 6111 copyback = 1; 6112 } 6113 break; 6114 } 6115 #ifdef INET6 6116 case IPPROTO_ICMPV6: { 6117 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 6118 6119 if (pd2.af != AF_INET6) { 6120 REASON_SET(reason, PFRES_NORM); 6121 return (PF_DROP); 6122 } 6123 6124 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 6125 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 6126 DPFPRINTF(LOG_NOTICE, 6127 "ICMP error message too short (icmp6)"); 6128 return (PF_DROP); 6129 } 6130 6131 pf_icmp_mapping(&pd2, iih->icmp6_type, 6132 &icmp_dir, &virtual_id, &virtual_type); 6133 ret = pf_icmp_state_lookup(&pd2, &key, stp, 6134 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 6135 /* IPv6? try matching a multicast address */ 6136 if (ret == PF_DROP && pd2.af == AF_INET6 && 6137 icmp_dir == PF_OUT) 6138 ret = pf_icmp_state_lookup(&pd2, &key, stp, 6139 virtual_id, virtual_type, icmp_dir, &iidx, 6140 1, 1); 6141 if (ret >= 0) 6142 return (ret); 6143 6144 /* translate source/destination address, if necessary */ 6145 if ((*stp)->key[PF_SK_WIRE] != 6146 (*stp)->key[PF_SK_STACK]) { 6147 struct pf_state_key *nk; 6148 int afto, sidx, didx; 6149 6150 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6151 nk = (*stp)->key[pd->sidx]; 6152 else 6153 nk = (*stp)->key[pd->didx]; 6154 6155 afto = pd->af != nk->af; 6156 sidx = afto ? pd2.didx : pd2.sidx; 6157 didx = afto ? pd2.sidx : pd2.didx; 6158 iidx = afto ? !iidx : iidx; 6159 6160 if (afto) { 6161 if (nk->af != AF_INET) 6162 return (PF_DROP); 6163 if (pf_translate_icmp_af(pd, nk->af, 6164 &pd->hdr.icmp)) 6165 return (PF_DROP); 6166 m_copyback(pd->m, pd->off, 6167 sizeof(struct icmp6_hdr), 6168 &pd->hdr.icmp6, M_NOWAIT); 6169 if (pf_change_icmp_af(pd->m, ipoff2, 6170 pd, &pd2, &nk->addr[sidx], 6171 &nk->addr[didx], pd->af, nk->af)) 6172 return (PF_DROP); 6173 pd->proto = IPPROTO_ICMP; 6174 if (pf_translate_icmp_af(pd, 6175 nk->af, iih)) 6176 return (PF_DROP); 6177 if (virtual_type == 6178 htons(ICMP6_ECHO_REQUEST)) 6179 pf_patch_16(pd, &iih->icmp6_id, 6180 nk->port[iidx]); 6181 m_copyback(pd2.m, pd2.off, 6182 sizeof(struct icmp6_hdr), iih, 6183 M_NOWAIT); 6184 pd->m->m_pkthdr.ph_rtableid = 6185 nk->rdomain; 6186 pd->destchg = 1; 6187 pf_addrcpy(&pd->nsaddr, 6188 &nk->addr[pd2.sidx], nk->af); 6189 pf_addrcpy(&pd->ndaddr, 6190 &nk->addr[pd2.didx], nk->af); 6191 pd->naf = nk->af; 6192 return (PF_AFRT); 6193 } 6194 6195 if (PF_ANEQ(pd2.src, 6196 &nk->addr[pd2.sidx], pd2.af) || 6197 ((virtual_type == 6198 htons(ICMP6_ECHO_REQUEST)) && 6199 nk->port[pd2.sidx] != iih->icmp6_id)) 6200 pf_translate_icmp(pd, pd2.src, 6201 (virtual_type == 6202 htons(ICMP6_ECHO_REQUEST)) 6203 ? &iih->icmp6_id : NULL, 6204 pd->dst, &nk->addr[pd2.sidx], 6205 (virtual_type == 6206 htons(ICMP6_ECHO_REQUEST)) 6207 ? nk->port[iidx] : 0); 6208 6209 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6210 pd2.af) || pd2.rdomain != nk->rdomain) 6211 pd->destchg = 1; 6212 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6213 6214 if (PF_ANEQ(pd2.dst, 6215 &nk->addr[pd2.didx], pd2.af)) 6216 pf_translate_icmp(pd, pd2.dst, NULL, 6217 pd->src, &nk->addr[pd2.didx], 0); 6218 6219 m_copyback(pd->m, pd->off, 6220 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 6221 M_NOWAIT); 6222 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 6223 M_NOWAIT); 6224 m_copyback(pd2.m, pd2.off, 6225 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 6226 copyback = 1; 6227 } 6228 break; 6229 } 6230 #endif /* INET6 */ 6231 default: { 6232 int action; 6233 6234 key.af = pd2.af; 6235 key.proto = pd2.proto; 6236 key.rdomain = pd2.rdomain; 6237 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 6238 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 6239 key.port[0] = key.port[1] = 0; 6240 key.hash = pf_pkt_hash(pd2.af, pd2.proto, 6241 pd2.src, pd2.dst, 0, 0); 6242 6243 action = pf_find_state(&pd2, &key, stp); 6244 if (action != PF_MATCH) 6245 return (action); 6246 6247 /* translate source/destination address, if necessary */ 6248 if ((*stp)->key[PF_SK_WIRE] != 6249 (*stp)->key[PF_SK_STACK]) { 6250 struct pf_state_key *nk = 6251 (*stp)->key[pd->didx]; 6252 6253 if (PF_ANEQ(pd2.src, 6254 &nk->addr[pd2.sidx], pd2.af)) 6255 pf_translate_icmp(pd, pd2.src, NULL, 6256 pd->dst, &nk->addr[pd2.sidx], 0); 6257 6258 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6259 pd2.af) || pd2.rdomain != nk->rdomain) 6260 pd->destchg = 1; 6261 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6262 6263 if (PF_ANEQ(pd2.dst, 6264 &nk->addr[pd2.didx], pd2.af)) 6265 pf_translate_icmp(pd, pd2.dst, NULL, 6266 pd->src, &nk->addr[pd2.didx], 0); 6267 6268 switch (pd2.af) { 6269 case AF_INET: 6270 m_copyback(pd->m, pd->off, ICMP_MINLEN, 6271 &pd->hdr.icmp, M_NOWAIT); 6272 m_copyback(pd2.m, ipoff2, sizeof(h2), 6273 &h2, M_NOWAIT); 6274 break; 6275 #ifdef INET6 6276 case AF_INET6: 6277 m_copyback(pd->m, pd->off, 6278 sizeof(struct icmp6_hdr), 6279 &pd->hdr.icmp6, M_NOWAIT); 6280 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 6281 &h2_6, M_NOWAIT); 6282 break; 6283 #endif /* INET6 */ 6284 } 6285 copyback = 1; 6286 } 6287 break; 6288 } 6289 } 6290 } 6291 if (copyback) { 6292 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 6293 } 6294 6295 return (PF_PASS); 6296 } 6297 6298 /* 6299 * ipoff and off are measured from the start of the mbuf chain. 6300 * h must be at "ipoff" on the mbuf chain. 6301 */ 6302 void * 6303 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 6304 u_short *actionp, u_short *reasonp, sa_family_t af) 6305 { 6306 int iplen = 0; 6307 6308 switch (af) { 6309 case AF_INET: { 6310 struct ip *h = mtod(m, struct ip *); 6311 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 6312 6313 if (fragoff) { 6314 if (fragoff >= len) 6315 ACTION_SET(actionp, PF_PASS); 6316 else { 6317 ACTION_SET(actionp, PF_DROP); 6318 REASON_SET(reasonp, PFRES_FRAG); 6319 } 6320 return (NULL); 6321 } 6322 iplen = ntohs(h->ip_len); 6323 break; 6324 } 6325 #ifdef INET6 6326 case AF_INET6: { 6327 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 6328 6329 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6330 break; 6331 } 6332 #endif /* INET6 */ 6333 } 6334 if (m->m_pkthdr.len < off + len || iplen < off + len) { 6335 ACTION_SET(actionp, PF_DROP); 6336 REASON_SET(reasonp, PFRES_SHORT); 6337 return (NULL); 6338 } 6339 m_copydata(m, off, len, p); 6340 return (p); 6341 } 6342 6343 int 6344 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 6345 int rtableid) 6346 { 6347 struct sockaddr_storage ss; 6348 struct sockaddr_in *dst; 6349 int ret = 1; 6350 int check_mpath; 6351 #ifdef INET6 6352 struct sockaddr_in6 *dst6; 6353 #endif /* INET6 */ 6354 struct rtentry *rt = NULL; 6355 6356 check_mpath = 0; 6357 memset(&ss, 0, sizeof(ss)); 6358 switch (af) { 6359 case AF_INET: 6360 dst = (struct sockaddr_in *)&ss; 6361 dst->sin_family = AF_INET; 6362 dst->sin_len = sizeof(*dst); 6363 dst->sin_addr = addr->v4; 6364 if (ipmultipath) 6365 check_mpath = 1; 6366 break; 6367 #ifdef INET6 6368 case AF_INET6: 6369 /* 6370 * Skip check for addresses with embedded interface scope, 6371 * as they would always match anyway. 6372 */ 6373 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 6374 goto out; 6375 dst6 = (struct sockaddr_in6 *)&ss; 6376 dst6->sin6_family = AF_INET6; 6377 dst6->sin6_len = sizeof(*dst6); 6378 dst6->sin6_addr = addr->v6; 6379 if (ip6_multipath) 6380 check_mpath = 1; 6381 break; 6382 #endif /* INET6 */ 6383 } 6384 6385 /* Skip checks for ipsec interfaces */ 6386 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 6387 goto out; 6388 6389 rt = rtalloc(sstosa(&ss), 0, rtableid); 6390 if (rt != NULL) { 6391 /* No interface given, this is a no-route check */ 6392 if (kif == NULL) 6393 goto out; 6394 6395 if (kif->pfik_ifp == NULL) { 6396 ret = 0; 6397 goto out; 6398 } 6399 6400 /* Perform uRPF check if passed input interface */ 6401 ret = 0; 6402 do { 6403 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 6404 ret = 1; 6405 #if NCARP > 0 6406 } else { 6407 struct ifnet *ifp; 6408 6409 ifp = if_get(rt->rt_ifidx); 6410 if (ifp != NULL && ifp->if_type == IFT_CARP && 6411 ifp->if_carpdevidx == 6412 kif->pfik_ifp->if_index) 6413 ret = 1; 6414 if_put(ifp); 6415 #endif /* NCARP */ 6416 } 6417 6418 rt = rtable_iterate(rt); 6419 } while (check_mpath == 1 && rt != NULL && ret == 0); 6420 } else 6421 ret = 0; 6422 out: 6423 rtfree(rt); 6424 return (ret); 6425 } 6426 6427 int 6428 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 6429 int rtableid) 6430 { 6431 struct sockaddr_storage ss; 6432 struct sockaddr_in *dst; 6433 #ifdef INET6 6434 struct sockaddr_in6 *dst6; 6435 #endif /* INET6 */ 6436 struct rtentry *rt; 6437 int ret = 0; 6438 6439 memset(&ss, 0, sizeof(ss)); 6440 switch (af) { 6441 case AF_INET: 6442 dst = (struct sockaddr_in *)&ss; 6443 dst->sin_family = AF_INET; 6444 dst->sin_len = sizeof(*dst); 6445 dst->sin_addr = addr->v4; 6446 break; 6447 #ifdef INET6 6448 case AF_INET6: 6449 dst6 = (struct sockaddr_in6 *)&ss; 6450 dst6->sin6_family = AF_INET6; 6451 dst6->sin6_len = sizeof(*dst6); 6452 dst6->sin6_addr = addr->v6; 6453 break; 6454 #endif /* INET6 */ 6455 } 6456 6457 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 6458 if (rt != NULL) { 6459 if (rt->rt_labelid == aw->v.rtlabel) 6460 ret = 1; 6461 rtfree(rt); 6462 } 6463 6464 return (ret); 6465 } 6466 6467 /* pf_route() may change pd->m, adjust local copies after calling */ 6468 void 6469 pf_route(struct pf_pdesc *pd, struct pf_state *st) 6470 { 6471 struct mbuf *m0; 6472 struct mbuf_list ml; 6473 struct sockaddr_in *dst, sin; 6474 struct rtentry *rt = NULL; 6475 struct ip *ip; 6476 struct ifnet *ifp = NULL; 6477 unsigned int rtableid; 6478 6479 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6480 m_freem(pd->m); 6481 pd->m = NULL; 6482 return; 6483 } 6484 6485 if (st->rt == PF_DUPTO) { 6486 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6487 return; 6488 } else { 6489 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir)) 6490 return; 6491 m0 = pd->m; 6492 pd->m = NULL; 6493 } 6494 6495 if (m0->m_len < sizeof(struct ip)) { 6496 DPFPRINTF(LOG_ERR, 6497 "%s: m0->m_len < sizeof(struct ip)", __func__); 6498 goto bad; 6499 } 6500 6501 ip = mtod(m0, struct ip *); 6502 6503 if (pd->dir == PF_IN) { 6504 if (ip->ip_ttl <= IPTTLDEC) { 6505 if (st->rt != PF_DUPTO) { 6506 pf_send_icmp(m0, ICMP_TIMXCEED, 6507 ICMP_TIMXCEED_INTRANS, 0, 6508 pd->af, st->rule.ptr, pd->rdomain); 6509 } 6510 goto bad; 6511 } 6512 ip->ip_ttl -= IPTTLDEC; 6513 } 6514 6515 memset(&sin, 0, sizeof(sin)); 6516 dst = &sin; 6517 dst->sin_family = AF_INET; 6518 dst->sin_len = sizeof(*dst); 6519 dst->sin_addr = st->rt_addr.v4; 6520 rtableid = m0->m_pkthdr.ph_rtableid; 6521 6522 rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid); 6523 if (!rtisvalid(rt)) { 6524 if (st->rt != PF_DUPTO) { 6525 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST, 6526 0, pd->af, st->rule.ptr, pd->rdomain); 6527 } 6528 ipstat_inc(ips_noroute); 6529 goto bad; 6530 } 6531 6532 ifp = if_get(rt->rt_ifidx); 6533 if (ifp == NULL) 6534 goto bad; 6535 6536 /* A locally generated packet may have invalid source address. */ 6537 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 6538 (ifp->if_flags & IFF_LOOPBACK) == 0) 6539 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 6540 6541 if (st->rt != PF_DUPTO && pd->dir == PF_IN) { 6542 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 6543 goto bad; 6544 else if (m0 == NULL) 6545 goto done; 6546 if (m0->m_len < sizeof(struct ip)) { 6547 DPFPRINTF(LOG_ERR, 6548 "%s: m0->m_len < sizeof(struct ip)", __func__); 6549 goto bad; 6550 } 6551 ip = mtod(m0, struct ip *); 6552 } 6553 6554 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 6555 in_hdr_cksum_out(m0, ifp); 6556 in_proto_cksum_out(m0, ifp); 6557 ifp->if_output(ifp, m0, sintosa(dst), rt); 6558 goto done; 6559 } 6560 6561 if (tcp_if_output_tso(ifp, &m0, sintosa(dst), rt, 6562 IFCAP_TSOv4, ifp->if_mtu) || m0 == NULL) 6563 goto done; 6564 6565 /* 6566 * Too large for interface; fragment if possible. 6567 * Must be able to put at least 8 bytes per fragment. 6568 */ 6569 if (ip->ip_off & htons(IP_DF)) { 6570 ipstat_inc(ips_cantfrag); 6571 if (st->rt != PF_DUPTO) 6572 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 6573 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); 6574 goto bad; 6575 } 6576 6577 if (ip_fragment(m0, &ml, ifp, ifp->if_mtu) || 6578 if_output_ml(ifp, &ml, sintosa(dst), rt)) 6579 goto done; 6580 ipstat_inc(ips_fragmented); 6581 6582 done: 6583 if_put(ifp); 6584 rtfree(rt); 6585 return; 6586 6587 bad: 6588 m_freem(m0); 6589 goto done; 6590 } 6591 6592 #ifdef INET6 6593 /* pf_route6() may change pd->m, adjust local copies after calling */ 6594 void 6595 pf_route6(struct pf_pdesc *pd, struct pf_state *st) 6596 { 6597 struct mbuf *m0; 6598 struct sockaddr_in6 *dst, sin6; 6599 struct rtentry *rt = NULL; 6600 struct ip6_hdr *ip6; 6601 struct ifnet *ifp = NULL; 6602 struct m_tag *mtag; 6603 unsigned int rtableid; 6604 6605 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6606 m_freem(pd->m); 6607 pd->m = NULL; 6608 return; 6609 } 6610 6611 if (st->rt == PF_DUPTO) { 6612 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6613 return; 6614 } else { 6615 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir)) 6616 return; 6617 m0 = pd->m; 6618 pd->m = NULL; 6619 } 6620 6621 if (m0->m_len < sizeof(struct ip6_hdr)) { 6622 DPFPRINTF(LOG_ERR, 6623 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6624 goto bad; 6625 } 6626 ip6 = mtod(m0, struct ip6_hdr *); 6627 6628 if (pd->dir == PF_IN) { 6629 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 6630 if (st->rt != PF_DUPTO) { 6631 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 6632 ICMP6_TIME_EXCEED_TRANSIT, 0, 6633 pd->af, st->rule.ptr, pd->rdomain); 6634 } 6635 goto bad; 6636 } 6637 ip6->ip6_hlim -= IPV6_HLIMDEC; 6638 } 6639 6640 memset(&sin6, 0, sizeof(sin6)); 6641 dst = &sin6; 6642 dst->sin6_family = AF_INET6; 6643 dst->sin6_len = sizeof(*dst); 6644 dst->sin6_addr = st->rt_addr.v6; 6645 rtableid = m0->m_pkthdr.ph_rtableid; 6646 6647 rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0], 6648 rtableid); 6649 if (!rtisvalid(rt)) { 6650 if (st->rt != PF_DUPTO) { 6651 pf_send_icmp(m0, ICMP6_DST_UNREACH, 6652 ICMP6_DST_UNREACH_NOROUTE, 0, 6653 pd->af, st->rule.ptr, pd->rdomain); 6654 } 6655 ip6stat_inc(ip6s_noroute); 6656 goto bad; 6657 } 6658 6659 ifp = if_get(rt->rt_ifidx); 6660 if (ifp == NULL) 6661 goto bad; 6662 6663 /* A locally generated packet may have invalid source address. */ 6664 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 6665 (ifp->if_flags & IFF_LOOPBACK) == 0) 6666 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 6667 6668 if (st->rt != PF_DUPTO && pd->dir == PF_IN) { 6669 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6670 goto bad; 6671 else if (m0 == NULL) 6672 goto done; 6673 if (m0->m_len < sizeof(struct ip6_hdr)) { 6674 DPFPRINTF(LOG_ERR, 6675 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6676 goto bad; 6677 } 6678 } 6679 6680 /* 6681 * If packet has been reassembled by PF earlier, we have to 6682 * use pf_refragment6() here to turn it back to fragments. 6683 */ 6684 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6685 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 6686 goto done; 6687 } 6688 6689 if (m0->m_pkthdr.len <= ifp->if_mtu) { 6690 in6_proto_cksum_out(m0, ifp); 6691 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 6692 goto done; 6693 } 6694 6695 if (tcp_if_output_tso(ifp, &m0, sin6tosa(dst), rt, 6696 IFCAP_TSOv6, ifp->if_mtu) || m0 == NULL) 6697 goto done; 6698 6699 ip6stat_inc(ip6s_cantfrag); 6700 if (st->rt != PF_DUPTO) 6701 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 6702 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); 6703 goto bad; 6704 6705 done: 6706 if_put(ifp); 6707 rtfree(rt); 6708 return; 6709 6710 bad: 6711 m_freem(m0); 6712 goto done; 6713 } 6714 #endif /* INET6 */ 6715 6716 /* 6717 * check TCP checksum and set mbuf flag 6718 * off is the offset where the protocol header starts 6719 * len is the total length of protocol header plus payload 6720 * returns 0 when the checksum is valid, otherwise returns 1. 6721 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6722 */ 6723 int 6724 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6725 { 6726 u_int16_t sum; 6727 6728 if (m->m_pkthdr.csum_flags & 6729 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6730 return (0); 6731 } 6732 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6733 off < sizeof(struct ip) || 6734 m->m_pkthdr.len < off + len) { 6735 return (1); 6736 } 6737 6738 /* need to do it in software */ 6739 tcpstat_inc(tcps_inswcsum); 6740 6741 switch (af) { 6742 case AF_INET: 6743 if (m->m_len < sizeof(struct ip)) 6744 return (1); 6745 6746 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6747 break; 6748 #ifdef INET6 6749 case AF_INET6: 6750 if (m->m_len < sizeof(struct ip6_hdr)) 6751 return (1); 6752 6753 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6754 break; 6755 #endif /* INET6 */ 6756 default: 6757 unhandled_af(af); 6758 } 6759 if (sum) { 6760 tcpstat_inc(tcps_rcvbadsum); 6761 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6762 return (1); 6763 } 6764 6765 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6766 return (0); 6767 } 6768 6769 struct pf_divert * 6770 pf_find_divert(struct mbuf *m) 6771 { 6772 struct m_tag *mtag; 6773 6774 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6775 return (NULL); 6776 6777 return ((struct pf_divert *)(mtag + 1)); 6778 } 6779 6780 struct pf_divert * 6781 pf_get_divert(struct mbuf *m) 6782 { 6783 struct m_tag *mtag; 6784 6785 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6786 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6787 M_NOWAIT); 6788 if (mtag == NULL) 6789 return (NULL); 6790 memset(mtag + 1, 0, sizeof(struct pf_divert)); 6791 m_tag_prepend(m, mtag); 6792 } 6793 6794 return ((struct pf_divert *)(mtag + 1)); 6795 } 6796 6797 int 6798 pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end, 6799 u_short *reason) 6800 { 6801 uint8_t type, length, opts[15 * 4 - sizeof(struct ip)]; 6802 6803 /* IP header in payload of ICMP packet may be too short */ 6804 if (pd->m->m_pkthdr.len < end) { 6805 DPFPRINTF(LOG_NOTICE, "IP option too short"); 6806 REASON_SET(reason, PFRES_SHORT); 6807 return (PF_DROP); 6808 } 6809 6810 KASSERT(end - off <= sizeof(opts)); 6811 m_copydata(pd->m, off, end - off, opts); 6812 end -= off; 6813 off = 0; 6814 6815 while (off < end) { 6816 type = opts[off]; 6817 if (type == IPOPT_EOL) 6818 break; 6819 if (type == IPOPT_NOP) { 6820 off++; 6821 continue; 6822 } 6823 if (off + 2 > end) { 6824 DPFPRINTF(LOG_NOTICE, "IP length opt"); 6825 REASON_SET(reason, PFRES_IPOPTIONS); 6826 return (PF_DROP); 6827 } 6828 length = opts[off + 1]; 6829 if (length < 2) { 6830 DPFPRINTF(LOG_NOTICE, "IP short opt"); 6831 REASON_SET(reason, PFRES_IPOPTIONS); 6832 return (PF_DROP); 6833 } 6834 if (off + length > end) { 6835 DPFPRINTF(LOG_NOTICE, "IP long opt"); 6836 REASON_SET(reason, PFRES_IPOPTIONS); 6837 return (PF_DROP); 6838 } 6839 switch (type) { 6840 case IPOPT_RA: 6841 SET(pd->badopts, PF_OPT_ROUTER_ALERT); 6842 break; 6843 default: 6844 SET(pd->badopts, PF_OPT_OTHER); 6845 break; 6846 } 6847 off += length; 6848 } 6849 6850 return (PF_PASS); 6851 } 6852 6853 int 6854 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 6855 { 6856 struct ip6_ext ext; 6857 u_int32_t hlen, end; 6858 int hdr_cnt; 6859 6860 hlen = h->ip_hl << 2; 6861 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 6862 REASON_SET(reason, PFRES_SHORT); 6863 return (PF_DROP); 6864 } 6865 if (hlen != sizeof(struct ip)) { 6866 if (pf_walk_option(pd, h, pd->off + sizeof(struct ip), 6867 pd->off + hlen, reason) != PF_PASS) 6868 return (PF_DROP); 6869 /* header options which contain only padding is fishy */ 6870 if (pd->badopts == 0) 6871 SET(pd->badopts, PF_OPT_OTHER); 6872 } 6873 end = pd->off + ntohs(h->ip_len); 6874 pd->off += hlen; 6875 pd->proto = h->ip_p; 6876 /* IGMP packets have router alert options, allow them */ 6877 if (pd->proto == IPPROTO_IGMP) { 6878 /* 6879 * According to RFC 1112 ttl must be set to 1 in all IGMP 6880 * packets sent to 224.0.0.1 6881 */ 6882 if ((h->ip_ttl != 1) && 6883 (h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) { 6884 DPFPRINTF(LOG_NOTICE, "Invalid IGMP"); 6885 REASON_SET(reason, PFRES_IPOPTIONS); 6886 return (PF_DROP); 6887 } 6888 CLR(pd->badopts, PF_OPT_ROUTER_ALERT); 6889 } 6890 /* stop walking over non initial fragments */ 6891 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 6892 return (PF_PASS); 6893 6894 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6895 switch (pd->proto) { 6896 case IPPROTO_AH: 6897 /* fragments may be short */ 6898 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 6899 end < pd->off + sizeof(ext)) 6900 return (PF_PASS); 6901 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6902 NULL, reason, AF_INET)) { 6903 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 6904 return (PF_DROP); 6905 } 6906 pd->off += (ext.ip6e_len + 2) * 4; 6907 pd->proto = ext.ip6e_nxt; 6908 break; 6909 default: 6910 return (PF_PASS); 6911 } 6912 } 6913 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 6914 REASON_SET(reason, PFRES_IPOPTIONS); 6915 return (PF_DROP); 6916 } 6917 6918 #ifdef INET6 6919 int 6920 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6921 u_short *reason) 6922 { 6923 struct ip6_opt opt; 6924 struct ip6_opt_jumbo jumbo; 6925 6926 while (off < end) { 6927 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6928 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6929 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6930 return (PF_DROP); 6931 } 6932 if (opt.ip6o_type == IP6OPT_PAD1) { 6933 off++; 6934 continue; 6935 } 6936 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6937 NULL, reason, AF_INET6)) { 6938 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6939 return (PF_DROP); 6940 } 6941 if (off + sizeof(opt) + opt.ip6o_len > end) { 6942 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6943 REASON_SET(reason, PFRES_IPOPTIONS); 6944 return (PF_DROP); 6945 } 6946 switch (opt.ip6o_type) { 6947 case IP6OPT_PADN: 6948 break; 6949 case IP6OPT_JUMBO: 6950 SET(pd->badopts, PF_OPT_JUMBO); 6951 if (pd->jumbolen != 0) { 6952 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6953 REASON_SET(reason, PFRES_IPOPTIONS); 6954 return (PF_DROP); 6955 } 6956 if (ntohs(h->ip6_plen) != 0) { 6957 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6958 REASON_SET(reason, PFRES_IPOPTIONS); 6959 return (PF_DROP); 6960 } 6961 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6962 NULL, reason, AF_INET6)) { 6963 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6964 return (PF_DROP); 6965 } 6966 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6967 sizeof(pd->jumbolen)); 6968 pd->jumbolen = ntohl(pd->jumbolen); 6969 if (pd->jumbolen < IPV6_MAXPACKET) { 6970 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6971 REASON_SET(reason, PFRES_IPOPTIONS); 6972 return (PF_DROP); 6973 } 6974 break; 6975 case IP6OPT_ROUTER_ALERT: 6976 SET(pd->badopts, PF_OPT_ROUTER_ALERT); 6977 break; 6978 default: 6979 SET(pd->badopts, PF_OPT_OTHER); 6980 break; 6981 } 6982 off += sizeof(opt) + opt.ip6o_len; 6983 } 6984 6985 return (PF_PASS); 6986 } 6987 6988 int 6989 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6990 { 6991 struct ip6_frag frag; 6992 struct ip6_ext ext; 6993 struct icmp6_hdr icmp6; 6994 struct ip6_rthdr rthdr; 6995 u_int32_t end; 6996 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 6997 6998 pd->off += sizeof(struct ip6_hdr); 6999 end = pd->off + ntohs(h->ip6_plen); 7000 pd->fragoff = pd->extoff = pd->jumbolen = 0; 7001 pd->proto = h->ip6_nxt; 7002 7003 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 7004 switch (pd->proto) { 7005 case IPPROTO_ROUTING: 7006 case IPPROTO_DSTOPTS: 7007 SET(pd->badopts, PF_OPT_OTHER); 7008 break; 7009 case IPPROTO_HOPOPTS: 7010 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 7011 NULL, reason, AF_INET6)) { 7012 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 7013 return (PF_DROP); 7014 } 7015 if (pf_walk_option6(pd, h, pd->off + sizeof(ext), 7016 pd->off + (ext.ip6e_len + 1) * 8, reason) 7017 != PF_PASS) 7018 return (PF_DROP); 7019 /* option header which contains only padding is fishy */ 7020 if (pd->badopts == 0) 7021 SET(pd->badopts, PF_OPT_OTHER); 7022 break; 7023 } 7024 switch (pd->proto) { 7025 case IPPROTO_FRAGMENT: 7026 if (fraghdr_cnt++) { 7027 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 7028 REASON_SET(reason, PFRES_FRAG); 7029 return (PF_DROP); 7030 } 7031 /* jumbo payload packets cannot be fragmented */ 7032 if (pd->jumbolen != 0) { 7033 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 7034 REASON_SET(reason, PFRES_FRAG); 7035 return (PF_DROP); 7036 } 7037 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 7038 NULL, reason, AF_INET6)) { 7039 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 7040 return (PF_DROP); 7041 } 7042 /* stop walking over non initial fragments */ 7043 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 7044 pd->fragoff = pd->off; 7045 return (PF_PASS); 7046 } 7047 /* RFC6946: reassemble only non atomic fragments */ 7048 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 7049 pd->fragoff = pd->off; 7050 pd->off += sizeof(frag); 7051 pd->proto = frag.ip6f_nxt; 7052 break; 7053 case IPPROTO_ROUTING: 7054 if (rthdr_cnt++) { 7055 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 7056 REASON_SET(reason, PFRES_IPOPTIONS); 7057 return (PF_DROP); 7058 } 7059 /* fragments may be short */ 7060 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 7061 pd->off = pd->fragoff; 7062 pd->proto = IPPROTO_FRAGMENT; 7063 return (PF_PASS); 7064 } 7065 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 7066 NULL, reason, AF_INET6)) { 7067 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 7068 return (PF_DROP); 7069 } 7070 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 7071 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 7072 REASON_SET(reason, PFRES_IPOPTIONS); 7073 return (PF_DROP); 7074 } 7075 /* FALLTHROUGH */ 7076 case IPPROTO_HOPOPTS: 7077 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 7078 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 7079 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 7080 REASON_SET(reason, PFRES_IPOPTIONS); 7081 return (PF_DROP); 7082 } 7083 /* FALLTHROUGH */ 7084 case IPPROTO_AH: 7085 case IPPROTO_DSTOPTS: 7086 /* fragments may be short */ 7087 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 7088 pd->off = pd->fragoff; 7089 pd->proto = IPPROTO_FRAGMENT; 7090 return (PF_PASS); 7091 } 7092 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 7093 NULL, reason, AF_INET6)) { 7094 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 7095 return (PF_DROP); 7096 } 7097 /* reassembly needs the ext header before the frag */ 7098 if (pd->fragoff == 0) 7099 pd->extoff = pd->off; 7100 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 && 7101 ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) { 7102 DPFPRINTF(LOG_NOTICE, "IPv6 missing jumbo"); 7103 REASON_SET(reason, PFRES_IPOPTIONS); 7104 return (PF_DROP); 7105 } 7106 if (pd->proto == IPPROTO_AH) 7107 pd->off += (ext.ip6e_len + 2) * 4; 7108 else 7109 pd->off += (ext.ip6e_len + 1) * 8; 7110 pd->proto = ext.ip6e_nxt; 7111 break; 7112 case IPPROTO_ICMPV6: 7113 /* fragments may be short, ignore inner header then */ 7114 if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) { 7115 pd->off = pd->fragoff; 7116 pd->proto = IPPROTO_FRAGMENT; 7117 return (PF_PASS); 7118 } 7119 if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6), 7120 NULL, reason, AF_INET6)) { 7121 DPFPRINTF(LOG_NOTICE, "IPv6 short icmp6hdr"); 7122 return (PF_DROP); 7123 } 7124 /* ICMP multicast packets have router alert options */ 7125 switch (icmp6.icmp6_type) { 7126 case MLD_LISTENER_QUERY: 7127 case MLD_LISTENER_REPORT: 7128 case MLD_LISTENER_DONE: 7129 case MLDV2_LISTENER_REPORT: 7130 /* 7131 * According to RFC 2710 all MLD messages are 7132 * sent with hop-limit (ttl) set to 1, and link 7133 * local source address. If either one is 7134 * missing then MLD message is invalid and 7135 * should be discarded. 7136 */ 7137 if ((h->ip6_hlim != 1) || 7138 !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) { 7139 DPFPRINTF(LOG_NOTICE, "Invalid MLD"); 7140 REASON_SET(reason, PFRES_IPOPTIONS); 7141 return (PF_DROP); 7142 } 7143 CLR(pd->badopts, PF_OPT_ROUTER_ALERT); 7144 break; 7145 } 7146 return (PF_PASS); 7147 case IPPROTO_TCP: 7148 case IPPROTO_UDP: 7149 /* fragments may be short, ignore inner header then */ 7150 if (pd->fragoff != 0 && end < pd->off + 7151 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 7152 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 7153 sizeof(struct icmp6_hdr))) { 7154 pd->off = pd->fragoff; 7155 pd->proto = IPPROTO_FRAGMENT; 7156 } 7157 /* FALLTHROUGH */ 7158 default: 7159 return (PF_PASS); 7160 } 7161 } 7162 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 7163 REASON_SET(reason, PFRES_IPOPTIONS); 7164 return (PF_DROP); 7165 } 7166 #endif /* INET6 */ 7167 7168 u_int16_t 7169 pf_pkt_hash(sa_family_t af, uint8_t proto, 7170 const struct pf_addr *src, const struct pf_addr *dst, 7171 uint16_t sport, uint16_t dport) 7172 { 7173 uint32_t hash; 7174 7175 hash = src->addr32[0] ^ dst->addr32[0]; 7176 #ifdef INET6 7177 if (af == AF_INET6) { 7178 hash ^= src->addr32[1] ^ dst->addr32[1]; 7179 hash ^= src->addr32[2] ^ dst->addr32[2]; 7180 hash ^= src->addr32[3] ^ dst->addr32[3]; 7181 } 7182 #endif 7183 7184 switch (proto) { 7185 case IPPROTO_TCP: 7186 case IPPROTO_UDP: 7187 hash ^= sport ^ dport; 7188 break; 7189 } 7190 7191 return stoeplitz_n32(hash); 7192 } 7193 7194 int 7195 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 7196 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 7197 { 7198 memset(pd, 0, sizeof(*pd)); 7199 pd->dir = dir; 7200 pd->kif = kif; /* kif is NULL when called by pflog */ 7201 pd->m = m; 7202 pd->sidx = (dir == PF_IN) ? 0 : 1; 7203 pd->didx = (dir == PF_IN) ? 1 : 0; 7204 pd->af = pd->naf = af; 7205 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 7206 7207 switch (pd->af) { 7208 case AF_INET: { 7209 struct ip *h; 7210 7211 /* Check for illegal packets */ 7212 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 7213 REASON_SET(reason, PFRES_SHORT); 7214 return (PF_DROP); 7215 } 7216 7217 h = mtod(pd->m, struct ip *); 7218 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 7219 REASON_SET(reason, PFRES_SHORT); 7220 return (PF_DROP); 7221 } 7222 7223 if (pf_walk_header(pd, h, reason) != PF_PASS) 7224 return (PF_DROP); 7225 7226 pd->src = (struct pf_addr *)&h->ip_src; 7227 pd->dst = (struct pf_addr *)&h->ip_dst; 7228 pd->tot_len = ntohs(h->ip_len); 7229 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 7230 pd->ttl = h->ip_ttl; 7231 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 7232 PF_VPROTO_FRAGMENT : pd->proto; 7233 7234 break; 7235 } 7236 #ifdef INET6 7237 case AF_INET6: { 7238 struct ip6_hdr *h; 7239 7240 /* Check for illegal packets */ 7241 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 7242 REASON_SET(reason, PFRES_SHORT); 7243 return (PF_DROP); 7244 } 7245 7246 h = mtod(pd->m, struct ip6_hdr *); 7247 if (pd->m->m_pkthdr.len < 7248 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 7249 REASON_SET(reason, PFRES_SHORT); 7250 return (PF_DROP); 7251 } 7252 7253 if (pf_walk_header6(pd, h, reason) != PF_PASS) 7254 return (PF_DROP); 7255 7256 #if 1 7257 /* 7258 * we do not support jumbogram yet. if we keep going, zero 7259 * ip6_plen will do something bad, so drop the packet for now. 7260 */ 7261 if (pd->jumbolen != 0) { 7262 REASON_SET(reason, PFRES_NORM); 7263 return (PF_DROP); 7264 } 7265 #endif /* 1 */ 7266 7267 pd->src = (struct pf_addr *)&h->ip6_src; 7268 pd->dst = (struct pf_addr *)&h->ip6_dst; 7269 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 7270 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 7271 pd->ttl = h->ip6_hlim; 7272 pd->virtual_proto = (pd->fragoff != 0) ? 7273 PF_VPROTO_FRAGMENT : pd->proto; 7274 7275 break; 7276 } 7277 #endif /* INET6 */ 7278 default: 7279 panic("pf_setup_pdesc called with illegal af %u", pd->af); 7280 7281 } 7282 7283 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 7284 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 7285 7286 switch (pd->virtual_proto) { 7287 case IPPROTO_TCP: { 7288 struct tcphdr *th = &pd->hdr.tcp; 7289 7290 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 7291 NULL, reason, pd->af)) 7292 return (PF_DROP); 7293 pd->hdrlen = sizeof(*th); 7294 if (th->th_dport == 0 || 7295 pd->off + (th->th_off << 2) > pd->tot_len || 7296 (th->th_off << 2) < sizeof(struct tcphdr)) { 7297 REASON_SET(reason, PFRES_SHORT); 7298 return (PF_DROP); 7299 } 7300 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 7301 pd->sport = &th->th_sport; 7302 pd->dport = &th->th_dport; 7303 pd->pcksum = &th->th_sum; 7304 break; 7305 } 7306 case IPPROTO_UDP: { 7307 struct udphdr *uh = &pd->hdr.udp; 7308 7309 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 7310 NULL, reason, pd->af)) 7311 return (PF_DROP); 7312 pd->hdrlen = sizeof(*uh); 7313 if (uh->uh_dport == 0 || 7314 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 7315 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 7316 REASON_SET(reason, PFRES_SHORT); 7317 return (PF_DROP); 7318 } 7319 pd->sport = &uh->uh_sport; 7320 pd->dport = &uh->uh_dport; 7321 pd->pcksum = &uh->uh_sum; 7322 break; 7323 } 7324 case IPPROTO_ICMP: { 7325 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 7326 NULL, reason, pd->af)) 7327 return (PF_DROP); 7328 pd->hdrlen = ICMP_MINLEN; 7329 if (pd->off + pd->hdrlen > pd->tot_len) { 7330 REASON_SET(reason, PFRES_SHORT); 7331 return (PF_DROP); 7332 } 7333 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 7334 break; 7335 } 7336 #ifdef INET6 7337 case IPPROTO_ICMPV6: { 7338 size_t icmp_hlen = sizeof(struct icmp6_hdr); 7339 7340 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 7341 NULL, reason, pd->af)) 7342 return (PF_DROP); 7343 /* ICMP headers we look further into to match state */ 7344 switch (pd->hdr.icmp6.icmp6_type) { 7345 case MLD_LISTENER_QUERY: 7346 case MLD_LISTENER_REPORT: 7347 icmp_hlen = sizeof(struct mld_hdr); 7348 break; 7349 case ND_NEIGHBOR_SOLICIT: 7350 case ND_NEIGHBOR_ADVERT: 7351 icmp_hlen = sizeof(struct nd_neighbor_solicit); 7352 /* FALLTHROUGH */ 7353 case ND_ROUTER_SOLICIT: 7354 case ND_ROUTER_ADVERT: 7355 case ND_REDIRECT: 7356 if (pd->ttl != 255) { 7357 REASON_SET(reason, PFRES_NORM); 7358 return (PF_DROP); 7359 } 7360 break; 7361 } 7362 if (icmp_hlen > sizeof(struct icmp6_hdr) && 7363 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 7364 NULL, reason, pd->af)) 7365 return (PF_DROP); 7366 pd->hdrlen = icmp_hlen; 7367 if (pd->off + pd->hdrlen > pd->tot_len) { 7368 REASON_SET(reason, PFRES_SHORT); 7369 return (PF_DROP); 7370 } 7371 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 7372 break; 7373 } 7374 #endif /* INET6 */ 7375 } 7376 7377 if (pd->sport) 7378 pd->osport = pd->nsport = *pd->sport; 7379 if (pd->dport) 7380 pd->odport = pd->ndport = *pd->dport; 7381 7382 pd->hash = pf_pkt_hash(pd->af, pd->proto, 7383 pd->src, pd->dst, pd->osport, pd->odport); 7384 7385 return (PF_PASS); 7386 } 7387 7388 void 7389 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *st, 7390 struct pf_rule *r, struct pf_rule *a) 7391 { 7392 int dirndx; 7393 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 7394 [action != PF_PASS] += pd->tot_len; 7395 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 7396 [action != PF_PASS]++; 7397 7398 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 7399 dirndx = (pd->dir == PF_OUT); 7400 r->packets[dirndx]++; 7401 r->bytes[dirndx] += pd->tot_len; 7402 if (a != NULL) { 7403 a->packets[dirndx]++; 7404 a->bytes[dirndx] += pd->tot_len; 7405 } 7406 if (st != NULL) { 7407 struct pf_rule_item *ri; 7408 struct pf_sn_item *sni; 7409 7410 SLIST_FOREACH(sni, &st->src_nodes, next) { 7411 sni->sn->packets[dirndx]++; 7412 sni->sn->bytes[dirndx] += pd->tot_len; 7413 } 7414 dirndx = (pd->dir == st->direction) ? 0 : 1; 7415 st->packets[dirndx]++; 7416 st->bytes[dirndx] += pd->tot_len; 7417 7418 SLIST_FOREACH(ri, &st->match_rules, entry) { 7419 ri->r->packets[dirndx]++; 7420 ri->r->bytes[dirndx] += pd->tot_len; 7421 7422 if (ri->r->src.addr.type == PF_ADDR_TABLE) 7423 pfr_update_stats(ri->r->src.addr.p.tbl, 7424 &st->key[(st->direction == PF_IN)]-> 7425 addr[(st->direction == PF_OUT)], 7426 pd, ri->r->action, ri->r->src.neg); 7427 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 7428 pfr_update_stats(ri->r->dst.addr.p.tbl, 7429 &st->key[(st->direction == PF_IN)]-> 7430 addr[(st->direction == PF_IN)], 7431 pd, ri->r->action, ri->r->dst.neg); 7432 } 7433 } 7434 if (r->src.addr.type == PF_ADDR_TABLE) 7435 pfr_update_stats(r->src.addr.p.tbl, 7436 (st == NULL) ? pd->src : 7437 &st->key[(st->direction == PF_IN)]-> 7438 addr[(st->direction == PF_OUT)], 7439 pd, r->action, r->src.neg); 7440 if (r->dst.addr.type == PF_ADDR_TABLE) 7441 pfr_update_stats(r->dst.addr.p.tbl, 7442 (st == NULL) ? pd->dst : 7443 &st->key[(st->direction == PF_IN)]-> 7444 addr[(st->direction == PF_IN)], 7445 pd, r->action, r->dst.neg); 7446 } 7447 } 7448 7449 int 7450 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 7451 { 7452 #if NCARP > 0 7453 struct ifnet *ifp0; 7454 #endif 7455 struct pfi_kif *kif; 7456 u_short action, reason = 0; 7457 struct pf_rule *a = NULL, *r = &pf_default_rule; 7458 struct pf_state *st = NULL; 7459 struct pf_state_key_cmp key; 7460 struct pf_ruleset *ruleset = NULL; 7461 struct pf_pdesc pd; 7462 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 7463 u_int32_t qid, pqid = 0; 7464 int have_pf_lock = 0; 7465 struct pfsync_deferral *deferral = NULL; 7466 7467 if (!pf_status.running) 7468 return (PF_PASS); 7469 7470 #if NCARP > 0 7471 if (ifp->if_type == IFT_CARP && 7472 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 7473 kif = (struct pfi_kif *)ifp0->if_pf_kif; 7474 if_put(ifp0); 7475 } else 7476 #endif /* NCARP */ 7477 kif = (struct pfi_kif *)ifp->if_pf_kif; 7478 7479 if (kif == NULL) { 7480 DPFPRINTF(LOG_ERR, 7481 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 7482 return (PF_DROP); 7483 } 7484 if (kif->pfik_flags & PFI_IFLAG_SKIP) 7485 return (PF_PASS); 7486 7487 #ifdef DIAGNOSTIC 7488 if (((*m0)->m_flags & M_PKTHDR) == 0) 7489 panic("non-M_PKTHDR is passed to pf_test"); 7490 #endif /* DIAGNOSTIC */ 7491 7492 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 7493 return (PF_PASS); 7494 7495 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) { 7496 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET; 7497 return (PF_PASS); 7498 } 7499 7500 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 7501 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 7502 return (PF_PASS); 7503 } 7504 7505 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 7506 if (action != PF_PASS) { 7507 #if NPFLOG > 0 7508 pd.pflog |= PF_LOG_FORCE; 7509 #endif /* NPFLOG > 0 */ 7510 goto done; 7511 } 7512 7513 /* packet normalization and reassembly */ 7514 switch (pd.af) { 7515 case AF_INET: 7516 action = pf_normalize_ip(&pd, &reason); 7517 break; 7518 #ifdef INET6 7519 case AF_INET6: 7520 action = pf_normalize_ip6(&pd, &reason); 7521 break; 7522 #endif /* INET6 */ 7523 } 7524 *m0 = pd.m; 7525 /* if packet sits in reassembly queue, return without error */ 7526 if (pd.m == NULL) 7527 return PF_PASS; 7528 7529 if (action != PF_PASS) { 7530 #if NPFLOG > 0 7531 pd.pflog |= PF_LOG_FORCE; 7532 #endif /* NPFLOG > 0 */ 7533 goto done; 7534 } 7535 7536 /* if packet has been reassembled, update packet description */ 7537 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 7538 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 7539 if (action != PF_PASS) { 7540 #if NPFLOG > 0 7541 pd.pflog |= PF_LOG_FORCE; 7542 #endif /* NPFLOG > 0 */ 7543 goto done; 7544 } 7545 } 7546 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 7547 7548 /* 7549 * Avoid pcb-lookups from the forwarding path. They should never 7550 * match and would cause MP locking problems. 7551 */ 7552 if (fwdir == PF_FWD) { 7553 pd.lookup.done = -1; 7554 pd.lookup.uid = -1; 7555 pd.lookup.gid = -1; 7556 pd.lookup.pid = NO_PID; 7557 } 7558 7559 switch (pd.virtual_proto) { 7560 7561 case PF_VPROTO_FRAGMENT: { 7562 /* 7563 * handle fragments that aren't reassembled by 7564 * normalization 7565 */ 7566 PF_LOCK(); 7567 have_pf_lock = 1; 7568 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, &reason, 7569 &deferral); 7570 st = pf_state_ref(st); 7571 if (action != PF_PASS) 7572 REASON_SET(&reason, PFRES_FRAG); 7573 break; 7574 } 7575 7576 case IPPROTO_ICMP: { 7577 if (pd.af != AF_INET) { 7578 action = PF_DROP; 7579 REASON_SET(&reason, PFRES_NORM); 7580 DPFPRINTF(LOG_NOTICE, 7581 "dropping IPv6 packet with ICMPv4 payload"); 7582 break; 7583 } 7584 PF_STATE_ENTER_READ(); 7585 action = pf_test_state_icmp(&pd, &st, &reason); 7586 st = pf_state_ref(st); 7587 PF_STATE_EXIT_READ(); 7588 if (action == PF_PASS || action == PF_AFRT) { 7589 #if NPFSYNC > 0 7590 pfsync_update_state(st); 7591 #endif /* NPFSYNC > 0 */ 7592 r = st->rule.ptr; 7593 a = st->anchor.ptr; 7594 #if NPFLOG > 0 7595 pd.pflog |= st->log; 7596 #endif /* NPFLOG > 0 */ 7597 } else if (st == NULL) { 7598 PF_LOCK(); 7599 have_pf_lock = 1; 7600 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, 7601 &reason, &deferral); 7602 st = pf_state_ref(st); 7603 } 7604 break; 7605 } 7606 7607 #ifdef INET6 7608 case IPPROTO_ICMPV6: { 7609 if (pd.af != AF_INET6) { 7610 action = PF_DROP; 7611 REASON_SET(&reason, PFRES_NORM); 7612 DPFPRINTF(LOG_NOTICE, 7613 "dropping IPv4 packet with ICMPv6 payload"); 7614 break; 7615 } 7616 PF_STATE_ENTER_READ(); 7617 action = pf_test_state_icmp(&pd, &st, &reason); 7618 st = pf_state_ref(st); 7619 PF_STATE_EXIT_READ(); 7620 if (action == PF_PASS || action == PF_AFRT) { 7621 #if NPFSYNC > 0 7622 pfsync_update_state(st); 7623 #endif /* NPFSYNC > 0 */ 7624 r = st->rule.ptr; 7625 a = st->anchor.ptr; 7626 #if NPFLOG > 0 7627 pd.pflog |= st->log; 7628 #endif /* NPFLOG > 0 */ 7629 } else if (st == NULL) { 7630 PF_LOCK(); 7631 have_pf_lock = 1; 7632 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, 7633 &reason, &deferral); 7634 st = pf_state_ref(st); 7635 } 7636 break; 7637 } 7638 #endif /* INET6 */ 7639 7640 default: 7641 if (pd.virtual_proto == IPPROTO_TCP) { 7642 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & 7643 (TH_SYN|TH_ACK)) == TH_SYN && 7644 pf_synflood_check(&pd)) { 7645 PF_LOCK(); 7646 have_pf_lock = 1; 7647 pf_syncookie_send(&pd); 7648 action = PF_DROP; 7649 break; 7650 } 7651 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 7652 pqid = 1; 7653 action = pf_normalize_tcp(&pd); 7654 if (action == PF_DROP) 7655 break; 7656 } 7657 7658 key.af = pd.af; 7659 key.proto = pd.virtual_proto; 7660 key.rdomain = pd.rdomain; 7661 pf_addrcpy(&key.addr[pd.sidx], pd.src, key.af); 7662 pf_addrcpy(&key.addr[pd.didx], pd.dst, key.af); 7663 key.port[pd.sidx] = pd.osport; 7664 key.port[pd.didx] = pd.odport; 7665 key.hash = pd.hash; 7666 7667 PF_STATE_ENTER_READ(); 7668 action = pf_find_state(&pd, &key, &st); 7669 st = pf_state_ref(st); 7670 PF_STATE_EXIT_READ(); 7671 7672 /* check for syncookies if tcp ack and no active state */ 7673 if (pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 7674 (st == NULL || (st->src.state >= TCPS_FIN_WAIT_2 && 7675 st->dst.state >= TCPS_FIN_WAIT_2)) && 7676 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 7677 pf_syncookie_validate(&pd)) { 7678 struct mbuf *msyn = pf_syncookie_recreate_syn(&pd); 7679 if (msyn) { 7680 action = pf_test(af, fwdir, ifp, &msyn); 7681 m_freem(msyn); 7682 if (action == PF_PASS || action == PF_AFRT) { 7683 PF_STATE_ENTER_READ(); 7684 pf_state_unref(st); 7685 action = pf_find_state(&pd, &key, &st); 7686 st = pf_state_ref(st); 7687 PF_STATE_EXIT_READ(); 7688 if (st == NULL) 7689 return (PF_DROP); 7690 st->src.seqhi = st->dst.seqhi = 7691 ntohl(pd.hdr.tcp.th_ack) - 1; 7692 st->src.seqlo = 7693 ntohl(pd.hdr.tcp.th_seq) - 1; 7694 pf_set_protostate(st, PF_PEER_SRC, 7695 PF_TCPS_PROXY_DST); 7696 } 7697 } else 7698 action = PF_DROP; 7699 } 7700 7701 if (action == PF_MATCH) 7702 action = pf_test_state(&pd, &st, &reason); 7703 7704 if (action == PF_PASS || action == PF_AFRT) { 7705 #if NPFSYNC > 0 7706 pfsync_update_state(st); 7707 #endif /* NPFSYNC > 0 */ 7708 r = st->rule.ptr; 7709 a = st->anchor.ptr; 7710 #if NPFLOG > 0 7711 pd.pflog |= st->log; 7712 #endif /* NPFLOG > 0 */ 7713 } else if (st == NULL) { 7714 PF_LOCK(); 7715 have_pf_lock = 1; 7716 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, 7717 &reason, &deferral); 7718 st = pf_state_ref(st); 7719 } 7720 7721 if (pd.virtual_proto == IPPROTO_TCP) { 7722 if (st) { 7723 if (st->max_mss) 7724 pf_normalize_mss(&pd, st->max_mss); 7725 } else if (r->max_mss) 7726 pf_normalize_mss(&pd, r->max_mss); 7727 } 7728 7729 break; 7730 } 7731 7732 if (have_pf_lock != 0) 7733 PF_UNLOCK(); 7734 7735 /* 7736 * At the moment, we rely on NET_LOCK() to prevent removal of items 7737 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 7738 * to be refcounted when NET_LOCK() is gone. 7739 */ 7740 7741 done: 7742 if (action != PF_DROP) { 7743 if (st) { 7744 /* The non-state case is handled in pf_test_rule() */ 7745 if (action == PF_PASS && pd.badopts != 0 && 7746 !(st->state_flags & PFSTATE_ALLOWOPTS)) { 7747 action = PF_DROP; 7748 REASON_SET(&reason, PFRES_IPOPTIONS); 7749 #if NPFLOG > 0 7750 pd.pflog |= PF_LOG_FORCE; 7751 #endif /* NPFLOG > 0 */ 7752 DPFPRINTF(LOG_NOTICE, "dropping packet with " 7753 "ip/ipv6 options in pf_test()"); 7754 } 7755 7756 pf_scrub(pd.m, st->state_flags, pd.af, st->min_ttl, 7757 st->set_tos); 7758 pf_tag_packet(pd.m, st->tag, st->rtableid[pd.didx]); 7759 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7760 qid = st->pqid; 7761 if (st->state_flags & PFSTATE_SETPRIO) { 7762 pd.m->m_pkthdr.pf.prio = 7763 st->set_prio[1]; 7764 } 7765 } else { 7766 qid = st->qid; 7767 if (st->state_flags & PFSTATE_SETPRIO) { 7768 pd.m->m_pkthdr.pf.prio = 7769 st->set_prio[0]; 7770 } 7771 } 7772 pd.m->m_pkthdr.pf.delay = st->delay; 7773 } else { 7774 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 7775 r->set_tos); 7776 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7777 qid = r->pqid; 7778 if (r->scrub_flags & PFSTATE_SETPRIO) 7779 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 7780 } else { 7781 qid = r->qid; 7782 if (r->scrub_flags & PFSTATE_SETPRIO) 7783 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 7784 } 7785 pd.m->m_pkthdr.pf.delay = r->delay; 7786 } 7787 } 7788 7789 if (action == PF_PASS && qid) 7790 pd.m->m_pkthdr.pf.qid = qid; 7791 if (pd.dir == PF_IN && st && st->key[PF_SK_STACK]) 7792 pf_mbuf_link_state_key(pd.m, st->key[PF_SK_STACK]); 7793 if (pd.dir == PF_OUT && 7794 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 7795 st && st->key[PF_SK_STACK] && !st->key[PF_SK_STACK]->sk_inp) 7796 pf_state_key_link_inpcb(st->key[PF_SK_STACK], 7797 pd.m->m_pkthdr.pf.inp); 7798 7799 if (st != NULL && !ISSET(pd.m->m_pkthdr.csum_flags, M_FLOWID)) { 7800 pd.m->m_pkthdr.ph_flowid = st->key[PF_SK_WIRE]->hash; 7801 SET(pd.m->m_pkthdr.csum_flags, M_FLOWID); 7802 } 7803 7804 /* 7805 * connections redirected to loopback should not match sockets 7806 * bound specifically to loopback due to security implications, 7807 * see in_pcblookup_listen(). 7808 */ 7809 if (pd.destchg) 7810 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 7811 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 7812 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 7813 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7814 /* We need to redo the route lookup on outgoing routes. */ 7815 if (pd.destchg && pd.dir == PF_OUT) 7816 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 7817 7818 if (pd.dir == PF_IN && action == PF_PASS && 7819 (r->divert.type == PF_DIVERT_TO || 7820 r->divert.type == PF_DIVERT_REPLY)) { 7821 struct pf_divert *divert; 7822 7823 if ((divert = pf_get_divert(pd.m))) { 7824 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7825 divert->addr = r->divert.addr; 7826 divert->port = r->divert.port; 7827 divert->rdomain = pd.rdomain; 7828 divert->type = r->divert.type; 7829 } 7830 } 7831 7832 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 7833 action = PF_DIVERT; 7834 7835 #if NPFLOG > 0 7836 if (pd.pflog) { 7837 struct pf_rule_item *ri; 7838 7839 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 7840 pflog_packet(&pd, reason, r, a, ruleset, NULL); 7841 if (st) { 7842 SLIST_FOREACH(ri, &st->match_rules, entry) 7843 if (ri->r->log & PF_LOG_ALL) 7844 pflog_packet(&pd, reason, ri->r, a, 7845 ruleset, NULL); 7846 } 7847 } 7848 #endif /* NPFLOG > 0 */ 7849 7850 pf_counters_inc(action, &pd, st, r, a); 7851 7852 switch (action) { 7853 case PF_SYNPROXY_DROP: 7854 m_freem(pd.m); 7855 /* FALLTHROUGH */ 7856 case PF_DEFER: 7857 #if NPFSYNC > 0 7858 /* 7859 * We no longer hold PF_LOCK() here, so we can dispatch 7860 * deferral if we are asked to do so. 7861 */ 7862 if (deferral != NULL) 7863 pfsync_undefer(deferral, 0); 7864 #endif /* NPFSYNC > 0 */ 7865 pd.m = NULL; 7866 action = PF_PASS; 7867 break; 7868 case PF_DIVERT: 7869 switch (pd.af) { 7870 case AF_INET: 7871 divert_packet(pd.m, pd.dir, r->divert.port); 7872 pd.m = NULL; 7873 break; 7874 #ifdef INET6 7875 case AF_INET6: 7876 divert6_packet(pd.m, pd.dir, r->divert.port); 7877 pd.m = NULL; 7878 break; 7879 #endif /* INET6 */ 7880 } 7881 action = PF_PASS; 7882 break; 7883 #ifdef INET6 7884 case PF_AFRT: 7885 if (pf_translate_af(&pd)) { 7886 action = PF_DROP; 7887 break; 7888 } 7889 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 7890 switch (pd.naf) { 7891 case AF_INET: 7892 if (pd.dir == PF_IN) { 7893 if (ipforwarding == 0) { 7894 ipstat_inc(ips_cantforward); 7895 action = PF_DROP; 7896 break; 7897 } 7898 ip_forward(pd.m, ifp, NULL, 1); 7899 } else 7900 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 7901 break; 7902 case AF_INET6: 7903 if (pd.dir == PF_IN) { 7904 if (ip6_forwarding == 0) { 7905 ip6stat_inc(ip6s_cantforward); 7906 action = PF_DROP; 7907 break; 7908 } 7909 ip6_forward(pd.m, NULL, 1); 7910 } else 7911 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 7912 break; 7913 } 7914 if (action != PF_DROP) { 7915 pd.m = NULL; 7916 action = PF_PASS; 7917 } 7918 break; 7919 #endif /* INET6 */ 7920 case PF_DROP: 7921 m_freem(pd.m); 7922 pd.m = NULL; 7923 break; 7924 default: 7925 if (st && st->rt) { 7926 switch (pd.af) { 7927 case AF_INET: 7928 pf_route(&pd, st); 7929 break; 7930 #ifdef INET6 7931 case AF_INET6: 7932 pf_route6(&pd, st); 7933 break; 7934 #endif /* INET6 */ 7935 } 7936 } 7937 break; 7938 } 7939 7940 #ifdef INET6 7941 /* if reassembled packet passed, create new fragments */ 7942 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 7943 pd.af == AF_INET6) { 7944 struct m_tag *mtag; 7945 7946 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 7947 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 7948 } 7949 #endif /* INET6 */ 7950 if (st && action != PF_DROP) { 7951 if (!st->if_index_in && dir == PF_IN) 7952 st->if_index_in = ifp->if_index; 7953 else if (!st->if_index_out && dir == PF_OUT) 7954 st->if_index_out = ifp->if_index; 7955 } 7956 7957 *m0 = pd.m; 7958 7959 pf_state_unref(st); 7960 7961 return (action); 7962 } 7963 7964 int 7965 pf_ouraddr(struct mbuf *m) 7966 { 7967 struct pf_state_key *sk; 7968 7969 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 7970 return (1); 7971 7972 sk = m->m_pkthdr.pf.statekey; 7973 if (sk != NULL) { 7974 if (sk->sk_inp != NULL) 7975 return (1); 7976 } 7977 7978 return (-1); 7979 } 7980 7981 /* 7982 * must be called whenever any addressing information such as 7983 * address, port, protocol has changed 7984 */ 7985 void 7986 pf_pkt_addr_changed(struct mbuf *m) 7987 { 7988 pf_mbuf_unlink_state_key(m); 7989 pf_mbuf_unlink_inpcb(m); 7990 } 7991 7992 struct inpcb * 7993 pf_inp_lookup(struct mbuf *m) 7994 { 7995 struct inpcb *inp = NULL; 7996 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7997 7998 if (!pf_state_key_isvalid(sk)) 7999 pf_mbuf_unlink_state_key(m); 8000 else 8001 inp = m->m_pkthdr.pf.statekey->sk_inp; 8002 8003 if (inp && inp->inp_pf_sk) 8004 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 8005 8006 in_pcbref(inp); 8007 return (inp); 8008 } 8009 8010 void 8011 pf_inp_link(struct mbuf *m, struct inpcb *inp) 8012 { 8013 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 8014 8015 if (!pf_state_key_isvalid(sk)) { 8016 pf_mbuf_unlink_state_key(m); 8017 return; 8018 } 8019 8020 /* 8021 * we don't need to grab PF-lock here. At worst case we link inp to 8022 * state, which might be just being marked as deleted by another 8023 * thread. 8024 */ 8025 if (inp && !sk->sk_inp && !inp->inp_pf_sk) 8026 pf_state_key_link_inpcb(sk, inp); 8027 8028 /* The statekey has finished finding the inp, it is no longer needed. */ 8029 pf_mbuf_unlink_state_key(m); 8030 } 8031 8032 void 8033 pf_inp_unlink(struct inpcb *inp) 8034 { 8035 pf_inpcb_unlink_state_key(inp); 8036 } 8037 8038 void 8039 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 8040 { 8041 struct pf_state_key *old_reverse; 8042 8043 old_reverse = atomic_cas_ptr(&sk->sk_reverse, NULL, skrev); 8044 if (old_reverse != NULL) 8045 KASSERT(old_reverse == skrev); 8046 else { 8047 pf_state_key_ref(skrev); 8048 8049 /* 8050 * NOTE: if sk == skrev, then KASSERT() below holds true, we 8051 * still want to grab a reference in such case, because 8052 * pf_state_key_unlink_reverse() does not check whether keys 8053 * are identical or not. 8054 */ 8055 old_reverse = atomic_cas_ptr(&skrev->sk_reverse, NULL, sk); 8056 if (old_reverse != NULL) 8057 KASSERT(old_reverse == sk); 8058 8059 pf_state_key_ref(sk); 8060 } 8061 } 8062 8063 #if NPFLOG > 0 8064 void 8065 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 8066 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 8067 { 8068 struct pf_rule_item *ri; 8069 8070 /* if this is the log(matches) rule, packet has been logged already */ 8071 if (rm->log & PF_LOG_MATCHES) 8072 return; 8073 8074 SLIST_FOREACH(ri, matchrules, entry) 8075 if (ri->r->log & PF_LOG_MATCHES) 8076 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 8077 } 8078 #endif /* NPFLOG > 0 */ 8079 8080 struct pf_state_key * 8081 pf_state_key_ref(struct pf_state_key *sk) 8082 { 8083 if (sk != NULL) 8084 PF_REF_TAKE(sk->sk_refcnt); 8085 8086 return (sk); 8087 } 8088 8089 void 8090 pf_state_key_unref(struct pf_state_key *sk) 8091 { 8092 if (PF_REF_RELE(sk->sk_refcnt)) { 8093 /* state key must be removed from tree */ 8094 KASSERT(!pf_state_key_isvalid(sk)); 8095 /* state key must be unlinked from reverse key */ 8096 KASSERT(sk->sk_reverse == NULL); 8097 /* state key must be unlinked from socket */ 8098 KASSERT(sk->sk_inp == NULL); 8099 pool_put(&pf_state_key_pl, sk); 8100 } 8101 } 8102 8103 int 8104 pf_state_key_isvalid(struct pf_state_key *sk) 8105 { 8106 return ((sk != NULL) && (sk->sk_removed == 0)); 8107 } 8108 8109 void 8110 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 8111 { 8112 KASSERT(m->m_pkthdr.pf.statekey == NULL); 8113 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 8114 } 8115 8116 void 8117 pf_mbuf_unlink_state_key(struct mbuf *m) 8118 { 8119 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 8120 8121 if (sk != NULL) { 8122 m->m_pkthdr.pf.statekey = NULL; 8123 pf_state_key_unref(sk); 8124 } 8125 } 8126 8127 void 8128 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 8129 { 8130 KASSERT(m->m_pkthdr.pf.inp == NULL); 8131 m->m_pkthdr.pf.inp = in_pcbref(inp); 8132 } 8133 8134 void 8135 pf_mbuf_unlink_inpcb(struct mbuf *m) 8136 { 8137 struct inpcb *inp = m->m_pkthdr.pf.inp; 8138 8139 if (inp != NULL) { 8140 m->m_pkthdr.pf.inp = NULL; 8141 in_pcbunref(inp); 8142 } 8143 } 8144 8145 void 8146 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 8147 { 8148 KASSERT(sk->sk_inp == NULL); 8149 sk->sk_inp = in_pcbref(inp); 8150 KASSERT(inp->inp_pf_sk == NULL); 8151 inp->inp_pf_sk = pf_state_key_ref(sk); 8152 } 8153 8154 void 8155 pf_inpcb_unlink_state_key(struct inpcb *inp) 8156 { 8157 struct pf_state_key *sk = inp->inp_pf_sk; 8158 8159 if (sk != NULL) { 8160 KASSERT(sk->sk_inp == inp); 8161 sk->sk_inp = NULL; 8162 inp->inp_pf_sk = NULL; 8163 pf_state_key_unref(sk); 8164 in_pcbunref(inp); 8165 } 8166 } 8167 8168 void 8169 pf_state_key_unlink_inpcb(struct pf_state_key *sk) 8170 { 8171 struct inpcb *inp = sk->sk_inp; 8172 8173 if (inp != NULL) { 8174 KASSERT(inp->inp_pf_sk == sk); 8175 sk->sk_inp = NULL; 8176 inp->inp_pf_sk = NULL; 8177 pf_state_key_unref(sk); 8178 in_pcbunref(inp); 8179 } 8180 } 8181 8182 void 8183 pf_state_key_unlink_reverse(struct pf_state_key *sk) 8184 { 8185 struct pf_state_key *skrev = sk->sk_reverse; 8186 8187 /* Note that sk and skrev may be equal, then we unref twice. */ 8188 if (skrev != NULL) { 8189 KASSERT(skrev->sk_reverse == sk); 8190 sk->sk_reverse = NULL; 8191 skrev->sk_reverse = NULL; 8192 pf_state_key_unref(skrev); 8193 pf_state_key_unref(sk); 8194 } 8195 } 8196 8197 struct pf_state * 8198 pf_state_ref(struct pf_state *st) 8199 { 8200 if (st != NULL) 8201 PF_REF_TAKE(st->refcnt); 8202 return (st); 8203 } 8204 8205 void 8206 pf_state_unref(struct pf_state *st) 8207 { 8208 if ((st != NULL) && PF_REF_RELE(st->refcnt)) { 8209 /* never inserted or removed */ 8210 #if NPFSYNC > 0 8211 KASSERT((TAILQ_NEXT(st, sync_list) == NULL) || 8212 ((TAILQ_NEXT(st, sync_list) == _Q_INVALID) && 8213 (st->sync_state == PFSYNC_S_NONE))); 8214 #endif /* NPFSYNC */ 8215 KASSERT((TAILQ_NEXT(st, entry_list) == NULL) || 8216 (TAILQ_NEXT(st, entry_list) == _Q_INVALID)); 8217 8218 pf_state_key_unref(st->key[PF_SK_WIRE]); 8219 pf_state_key_unref(st->key[PF_SK_STACK]); 8220 8221 pool_put(&pf_state_pl, st); 8222 } 8223 } 8224 8225 int 8226 pf_delay_pkt(struct mbuf *m, u_int ifidx) 8227 { 8228 struct pf_pktdelay *pdy; 8229 8230 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 8231 m_freem(m); 8232 return (ENOBUFS); 8233 } 8234 pdy->ifidx = ifidx; 8235 pdy->m = m; 8236 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy); 8237 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay); 8238 m->m_pkthdr.pf.delay = 0; 8239 return (0); 8240 } 8241 8242 void 8243 pf_pktenqueue_delayed(void *arg) 8244 { 8245 struct pf_pktdelay *pdy = arg; 8246 struct ifnet *ifp; 8247 8248 ifp = if_get(pdy->ifidx); 8249 if (ifp != NULL) { 8250 if_enqueue(ifp, pdy->m); 8251 if_put(ifp); 8252 } else 8253 m_freem(pdy->m); 8254 8255 pool_put(&pf_pktdelay_pl, pdy); 8256 } 8257