1 /* $OpenBSD: pf.c,v 1.1207 2024/12/26 10:15:27 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/toeplitz.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/ip_var.h> 70 #include <netinet/ip_icmp.h> 71 #include <netinet/icmp_var.h> 72 #include <netinet/tcp.h> 73 #include <netinet/tcp_seq.h> 74 #include <netinet/tcp_timer.h> 75 #include <netinet/tcp_var.h> 76 #include <netinet/tcp_fsm.h> 77 #include <netinet/udp.h> 78 #include <netinet/udp_var.h> 79 #include <netinet/ip_divert.h> 80 81 #ifdef INET6 82 #include <netinet6/in6_var.h> 83 #include <netinet/ip6.h> 84 #include <netinet6/ip6_var.h> 85 #include <netinet/icmp6.h> 86 #include <netinet6/nd6.h> 87 #include <netinet6/ip6_divert.h> 88 #endif /* INET6 */ 89 90 #include <net/pfvar.h> 91 #include <net/pfvar_priv.h> 92 93 #if NPFLOG > 0 94 #include <net/if_pflog.h> 95 #endif /* NPFLOG > 0 */ 96 97 #if NPFLOW > 0 98 #include <net/if_pflow.h> 99 #endif /* NPFLOW > 0 */ 100 101 #if NPFSYNC > 0 102 #include <net/if_pfsync.h> 103 #endif /* NPFSYNC > 0 */ 104 105 /* 106 * Global variables 107 */ 108 struct pf_state_tree pf_statetbl; 109 struct pf_queuehead pf_queues[2]; 110 struct pf_queuehead *pf_queues_active; 111 struct pf_queuehead *pf_queues_inactive; 112 113 struct pf_status pf_status; 114 115 struct mutex pf_inp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); 116 117 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 118 119 SHA2_CTX pf_tcp_secret_ctx; 120 u_char pf_tcp_secret[16]; 121 int pf_tcp_secret_init; 122 int pf_tcp_iss_off; 123 124 enum pf_test_status { 125 PF_TEST_FAIL = -1, 126 PF_TEST_OK, 127 PF_TEST_QUICK 128 }; 129 130 struct pf_test_ctx { 131 struct pf_pdesc *pd; 132 struct pf_rule_actions act; 133 u_int8_t icmpcode; 134 u_int8_t icmptype; 135 int icmp_dir; 136 int state_icmp; 137 int tag; 138 u_short reason; 139 struct pf_rule_item *ri; 140 struct pf_src_node *sns[PF_SN_MAX]; 141 struct pf_rule_slist rules; 142 struct pf_rule *nr; 143 struct pf_rule **rm; 144 struct pf_rule *a; 145 struct pf_rule **am; 146 struct pf_ruleset **rsm; 147 struct pf_ruleset *arsm; 148 struct pf_ruleset *aruleset; 149 struct tcphdr *th; 150 }; 151 152 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 153 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 154 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 155 156 void pf_add_threshold(struct pf_threshold *); 157 int pf_check_threshold(struct pf_threshold *); 158 int pf_check_tcp_cksum(struct mbuf *, int, int, 159 sa_family_t); 160 __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 161 u_int8_t); 162 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 163 const struct pf_addr *, sa_family_t, u_int8_t); 164 int pf_modulate_sack(struct pf_pdesc *, 165 struct pf_state_peer *); 166 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 167 u_int16_t *, u_int16_t *); 168 int pf_change_icmp_af(struct mbuf *, int, 169 struct pf_pdesc *, struct pf_pdesc *, 170 struct pf_addr *, struct pf_addr *, sa_family_t, 171 sa_family_t); 172 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 173 struct pf_addr *); 174 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 175 u_int16_t *, struct pf_addr *, struct pf_addr *, 176 u_int16_t); 177 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 178 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 179 sa_family_t, struct pf_rule *, u_int); 180 void pf_detach_state(struct pf_state *); 181 struct pf_state_key *pf_state_key_attach(struct pf_state_key *, 182 struct pf_state *, int); 183 void pf_state_key_detach(struct pf_state *, int); 184 u_int32_t pf_tcp_iss(struct pf_pdesc *); 185 void pf_rule_to_actions(struct pf_rule *, 186 struct pf_rule_actions *); 187 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 188 struct pf_state **, struct pf_rule **, 189 struct pf_ruleset **, u_short *); 190 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 191 struct pf_rule *, struct pf_rule *, 192 struct pf_state_key **, struct pf_state_key **, 193 int *, struct pf_state **, int, 194 struct pf_rule_slist *, struct pf_rule_actions *, 195 struct pf_src_node **); 196 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 197 int, struct pf_addr *, int, struct pf_addr *, 198 int, int); 199 int pf_state_key_setup(struct pf_pdesc *, struct 200 pf_state_key **, struct pf_state_key **, int); 201 int pf_tcp_track_full(struct pf_pdesc *, 202 struct pf_state **, u_short *, int *, int); 203 int pf_tcp_track_sloppy(struct pf_pdesc *, 204 struct pf_state **, u_short *); 205 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 206 u_short *); 207 int pf_test_state(struct pf_pdesc *, struct pf_state **, 208 u_short *); 209 int pf_icmp_state_lookup(struct pf_pdesc *, 210 struct pf_state_key_cmp *, struct pf_state **, 211 u_int16_t, u_int16_t, int, int *, int, int); 212 int pf_test_state_icmp(struct pf_pdesc *, 213 struct pf_state **, u_short *); 214 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 215 uint16_t, uint16_t); 216 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 217 sa_family_t, struct pf_src_node **); 218 struct pf_divert *pf_get_divert(struct mbuf *); 219 int pf_walk_option(struct pf_pdesc *, struct ip *, 220 int, int, u_short *); 221 int pf_walk_header(struct pf_pdesc *, struct ip *, 222 u_short *); 223 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 224 int, int, u_short *); 225 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 226 u_short *); 227 void pf_print_state_parts(struct pf_state *, 228 struct pf_state_key *, struct pf_state_key *); 229 int pf_addr_wrap_neq(struct pf_addr_wrap *, 230 struct pf_addr_wrap *); 231 int pf_compare_state_keys(struct pf_state_key *, 232 struct pf_state_key *, struct pfi_kif *, u_int); 233 u_int16_t pf_pkt_hash(sa_family_t, uint8_t, 234 const struct pf_addr *, const struct pf_addr *, 235 uint16_t, uint16_t); 236 int pf_find_state(struct pf_pdesc *, 237 struct pf_state_key_cmp *, struct pf_state **); 238 int pf_src_connlimit(struct pf_state **); 239 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 240 enum pf_test_status pf_match_rule(struct pf_test_ctx *, 241 struct pf_ruleset *); 242 void pf_counters_inc(int, struct pf_pdesc *, 243 struct pf_state *, struct pf_rule *, 244 struct pf_rule *); 245 246 int pf_state_insert(struct pfi_kif *, 247 struct pf_state_key **, struct pf_state_key **, 248 struct pf_state *); 249 250 int pf_state_key_isvalid(struct pf_state_key *); 251 struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 252 void pf_state_key_unref(struct pf_state_key *); 253 void pf_state_key_link_reverse(struct pf_state_key *, 254 struct pf_state_key *); 255 void pf_state_key_unlink_reverse(struct pf_state_key *); 256 void pf_state_key_link_inpcb(struct pf_state_key *, 257 struct inpcb *); 258 void pf_state_key_unlink_inpcb(struct pf_state_key *); 259 void pf_pktenqueue_delayed(void *); 260 int32_t pf_state_expires(const struct pf_state *, uint8_t); 261 262 #if NPFLOG > 0 263 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 264 struct pf_rule *, struct pf_ruleset *, 265 struct pf_rule_slist *); 266 #endif /* NPFLOG > 0 */ 267 268 extern struct pool pfr_ktable_pl; 269 extern struct pool pfr_kentry_pl; 270 271 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 272 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 273 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 274 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 275 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 276 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 277 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS }, 278 { &pf_anchor_pl, PF_ANCHOR_HIWAT, PF_ANCHOR_HIWAT } 279 }; 280 281 #define BOUND_IFACE(r, k) \ 282 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 283 284 #define STATE_INC_COUNTERS(s) \ 285 do { \ 286 struct pf_rule_item *mrm; \ 287 s->rule.ptr->states_cur++; \ 288 s->rule.ptr->states_tot++; \ 289 if (s->anchor.ptr != NULL) { \ 290 s->anchor.ptr->states_cur++; \ 291 s->anchor.ptr->states_tot++; \ 292 } \ 293 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 294 mrm->r->states_cur++; \ 295 } while (0) 296 297 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 298 static inline int pf_state_compare_key(const struct pf_state_key *, 299 const struct pf_state_key *); 300 static inline int pf_state_compare_id(const struct pf_state *, 301 const struct pf_state *); 302 #ifdef INET6 303 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 304 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 305 #endif /* INET6 */ 306 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 307 308 struct pf_src_tree tree_src_tracking; 309 310 struct pf_state_tree_id tree_id; 311 struct pf_state_list pf_state_list = PF_STATE_LIST_INITIALIZER(pf_state_list); 312 313 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 314 RBT_GENERATE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key); 315 RBT_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); 316 317 int 318 pf_addr_compare(const struct pf_addr *a, const struct pf_addr *b, 319 sa_family_t af) 320 { 321 switch (af) { 322 case AF_INET: 323 if (a->addr32[0] > b->addr32[0]) 324 return (1); 325 if (a->addr32[0] < b->addr32[0]) 326 return (-1); 327 break; 328 #ifdef INET6 329 case AF_INET6: 330 if (a->addr32[3] > b->addr32[3]) 331 return (1); 332 if (a->addr32[3] < b->addr32[3]) 333 return (-1); 334 if (a->addr32[2] > b->addr32[2]) 335 return (1); 336 if (a->addr32[2] < b->addr32[2]) 337 return (-1); 338 if (a->addr32[1] > b->addr32[1]) 339 return (1); 340 if (a->addr32[1] < b->addr32[1]) 341 return (-1); 342 if (a->addr32[0] > b->addr32[0]) 343 return (1); 344 if (a->addr32[0] < b->addr32[0]) 345 return (-1); 346 break; 347 #endif /* INET6 */ 348 } 349 return (0); 350 } 351 352 static __inline int 353 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 354 { 355 int diff; 356 357 if (a->rule.ptr > b->rule.ptr) 358 return (1); 359 if (a->rule.ptr < b->rule.ptr) 360 return (-1); 361 if ((diff = a->type - b->type) != 0) 362 return (diff); 363 if ((diff = a->af - b->af) != 0) 364 return (diff); 365 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 366 return (diff); 367 return (0); 368 } 369 370 static __inline void 371 pf_set_protostate(struct pf_state *st, int which, u_int8_t newstate) 372 { 373 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 374 st->dst.state = newstate; 375 if (which == PF_PEER_DST) 376 return; 377 378 if (st->src.state == newstate) 379 return; 380 if (st->creatorid == pf_status.hostid && 381 st->key[PF_SK_STACK]->proto == IPPROTO_TCP && 382 !(TCPS_HAVEESTABLISHED(st->src.state) || 383 st->src.state == TCPS_CLOSED) && 384 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 385 pf_status.states_halfopen--; 386 387 st->src.state = newstate; 388 } 389 390 void 391 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 392 { 393 switch (af) { 394 case AF_INET: 395 dst->addr32[0] = src->addr32[0]; 396 break; 397 #ifdef INET6 398 case AF_INET6: 399 dst->addr32[0] = src->addr32[0]; 400 dst->addr32[1] = src->addr32[1]; 401 dst->addr32[2] = src->addr32[2]; 402 dst->addr32[3] = src->addr32[3]; 403 break; 404 #endif /* INET6 */ 405 default: 406 unhandled_af(af); 407 } 408 } 409 410 void 411 pf_init_threshold(struct pf_threshold *threshold, 412 u_int32_t limit, u_int32_t seconds) 413 { 414 threshold->limit = limit * PF_THRESHOLD_MULT; 415 threshold->seconds = seconds; 416 threshold->count = 0; 417 threshold->last = getuptime(); 418 } 419 420 void 421 pf_add_threshold(struct pf_threshold *threshold) 422 { 423 u_int32_t t = getuptime(), diff = t - threshold->last; 424 425 if (diff >= threshold->seconds) 426 threshold->count = 0; 427 else 428 threshold->count -= threshold->count * diff / 429 threshold->seconds; 430 threshold->count += PF_THRESHOLD_MULT; 431 threshold->last = t; 432 } 433 434 int 435 pf_check_threshold(struct pf_threshold *threshold) 436 { 437 return (threshold->count > threshold->limit); 438 } 439 440 void 441 pf_state_list_insert(struct pf_state_list *pfs, struct pf_state *st) 442 { 443 /* 444 * we can always put states on the end of the list. 445 * 446 * things reading the list should take a read lock, then 447 * the mutex, get the head and tail pointers, release the 448 * mutex, and then they can iterate between the head and tail. 449 */ 450 451 pf_state_ref(st); /* get a ref for the list */ 452 453 mtx_enter(&pfs->pfs_mtx); 454 TAILQ_INSERT_TAIL(&pfs->pfs_list, st, entry_list); 455 mtx_leave(&pfs->pfs_mtx); 456 } 457 458 void 459 pf_state_list_remove(struct pf_state_list *pfs, struct pf_state *st) 460 { 461 /* states can only be removed when the write lock is held */ 462 rw_assert_wrlock(&pfs->pfs_rwl); 463 464 mtx_enter(&pfs->pfs_mtx); 465 TAILQ_REMOVE(&pfs->pfs_list, st, entry_list); 466 mtx_leave(&pfs->pfs_mtx); 467 468 pf_state_unref(st); /* list no longer references the state */ 469 } 470 471 void 472 pf_update_state_timeout(struct pf_state *st, int to) 473 { 474 mtx_enter(&st->mtx); 475 if (st->timeout != PFTM_UNLINKED) 476 st->timeout = to; 477 mtx_leave(&st->mtx); 478 } 479 480 int 481 pf_src_connlimit(struct pf_state **stp) 482 { 483 int bad = 0; 484 struct pf_src_node *sn; 485 486 if ((sn = pf_get_src_node((*stp), PF_SN_NONE)) == NULL) 487 return (0); 488 489 sn->conn++; 490 (*stp)->src.tcp_est = 1; 491 pf_add_threshold(&sn->conn_rate); 492 493 if ((*stp)->rule.ptr->max_src_conn && 494 (*stp)->rule.ptr->max_src_conn < sn->conn) { 495 pf_status.lcounters[LCNT_SRCCONN]++; 496 bad++; 497 } 498 499 if ((*stp)->rule.ptr->max_src_conn_rate.limit && 500 pf_check_threshold(&sn->conn_rate)) { 501 pf_status.lcounters[LCNT_SRCCONNRATE]++; 502 bad++; 503 } 504 505 if (!bad) 506 return (0); 507 508 if ((*stp)->rule.ptr->overload_tbl) { 509 struct pfr_addr p; 510 u_int32_t killed = 0; 511 512 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 513 if (pf_status.debug >= LOG_NOTICE) { 514 log(LOG_NOTICE, 515 "pf: pf_src_connlimit: blocking address "); 516 pf_print_host(&sn->addr, 0, 517 (*stp)->key[PF_SK_WIRE]->af); 518 } 519 520 memset(&p, 0, sizeof(p)); 521 p.pfra_af = (*stp)->key[PF_SK_WIRE]->af; 522 switch ((*stp)->key[PF_SK_WIRE]->af) { 523 case AF_INET: 524 p.pfra_net = 32; 525 p.pfra_ip4addr = sn->addr.v4; 526 break; 527 #ifdef INET6 528 case AF_INET6: 529 p.pfra_net = 128; 530 p.pfra_ip6addr = sn->addr.v6; 531 break; 532 #endif /* INET6 */ 533 } 534 535 pfr_insert_kentry((*stp)->rule.ptr->overload_tbl, 536 &p, gettime()); 537 538 /* kill existing states if that's required. */ 539 if ((*stp)->rule.ptr->flush) { 540 struct pf_state_key *sk; 541 struct pf_state *st; 542 543 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 544 RBT_FOREACH(st, pf_state_tree_id, &tree_id) { 545 sk = st->key[PF_SK_WIRE]; 546 /* 547 * Kill states from this source. (Only those 548 * from the same rule if PF_FLUSH_GLOBAL is not 549 * set) 550 */ 551 if (sk->af == 552 (*stp)->key[PF_SK_WIRE]->af && 553 (((*stp)->direction == PF_OUT && 554 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 555 ((*stp)->direction == PF_IN && 556 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 557 ((*stp)->rule.ptr->flush & 558 PF_FLUSH_GLOBAL || 559 (*stp)->rule.ptr == st->rule.ptr)) { 560 pf_update_state_timeout(st, PFTM_PURGE); 561 pf_set_protostate(st, PF_PEER_BOTH, 562 TCPS_CLOSED); 563 killed++; 564 } 565 } 566 if (pf_status.debug >= LOG_NOTICE) 567 addlog(", %u states killed", killed); 568 } 569 if (pf_status.debug >= LOG_NOTICE) 570 addlog("\n"); 571 } 572 573 /* kill this state */ 574 pf_update_state_timeout(*stp, PFTM_PURGE); 575 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_CLOSED); 576 return (1); 577 } 578 579 int 580 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 581 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 582 struct pf_addr *raddr, struct pfi_kif *kif) 583 { 584 struct pf_src_node k; 585 586 if (*sn == NULL) { 587 k.af = af; 588 k.type = type; 589 pf_addrcpy(&k.addr, src, af); 590 k.rule.ptr = rule; 591 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 592 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 593 } 594 if (*sn == NULL) { 595 if (!rule->max_src_nodes || 596 rule->src_nodes < rule->max_src_nodes) 597 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 598 else 599 pf_status.lcounters[LCNT_SRCNODES]++; 600 if ((*sn) == NULL) 601 return (-1); 602 603 pf_init_threshold(&(*sn)->conn_rate, 604 rule->max_src_conn_rate.limit, 605 rule->max_src_conn_rate.seconds); 606 607 (*sn)->type = type; 608 (*sn)->af = af; 609 (*sn)->rule.ptr = rule; 610 pf_addrcpy(&(*sn)->addr, src, af); 611 if (raddr) 612 pf_addrcpy(&(*sn)->raddr, raddr, af); 613 if (RB_INSERT(pf_src_tree, 614 &tree_src_tracking, *sn) != NULL) { 615 if (pf_status.debug >= LOG_NOTICE) { 616 log(LOG_NOTICE, 617 "pf: src_tree insert failed: "); 618 pf_print_host(&(*sn)->addr, 0, af); 619 addlog("\n"); 620 } 621 pool_put(&pf_src_tree_pl, *sn); 622 return (-1); 623 } 624 (*sn)->creation = getuptime(); 625 (*sn)->rule.ptr->src_nodes++; 626 if (kif != NULL) { 627 (*sn)->kif = kif; 628 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE); 629 } 630 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 631 pf_status.src_nodes++; 632 } else { 633 if (rule->max_src_states && 634 (*sn)->states >= rule->max_src_states) { 635 pf_status.lcounters[LCNT_SRCSTATES]++; 636 return (-1); 637 } 638 } 639 return (0); 640 } 641 642 void 643 pf_remove_src_node(struct pf_src_node *sn) 644 { 645 if (sn->states > 0 || sn->expire > getuptime()) 646 return; 647 648 sn->rule.ptr->src_nodes--; 649 if (sn->rule.ptr->states_cur == 0 && 650 sn->rule.ptr->src_nodes == 0) 651 pf_rm_rule(NULL, sn->rule.ptr); 652 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 653 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 654 pf_status.src_nodes--; 655 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE); 656 pool_put(&pf_src_tree_pl, sn); 657 } 658 659 struct pf_src_node * 660 pf_get_src_node(struct pf_state *st, enum pf_sn_types type) 661 { 662 struct pf_sn_item *sni; 663 664 SLIST_FOREACH(sni, &st->src_nodes, next) 665 if (sni->sn->type == type) 666 return (sni->sn); 667 return (NULL); 668 } 669 670 void 671 pf_state_rm_src_node(struct pf_state *st, struct pf_src_node *sn) 672 { 673 struct pf_sn_item *sni, *snin, *snip = NULL; 674 675 for (sni = SLIST_FIRST(&st->src_nodes); sni; sni = snin) { 676 snin = SLIST_NEXT(sni, next); 677 if (sni->sn == sn) { 678 if (snip) 679 SLIST_REMOVE_AFTER(snip, next); 680 else 681 SLIST_REMOVE_HEAD(&st->src_nodes, next); 682 pool_put(&pf_sn_item_pl, sni); 683 sni = NULL; 684 sn->states--; 685 } 686 if (sni != NULL) 687 snip = sni; 688 } 689 } 690 691 /* state table stuff */ 692 693 static inline int 694 pf_state_compare_key(const struct pf_state_key *a, 695 const struct pf_state_key *b) 696 { 697 int diff; 698 699 if ((diff = a->hash - b->hash) != 0) 700 return (diff); 701 if ((diff = a->proto - b->proto) != 0) 702 return (diff); 703 if ((diff = a->af - b->af) != 0) 704 return (diff); 705 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 706 return (diff); 707 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 708 return (diff); 709 if ((diff = a->port[0] - b->port[0]) != 0) 710 return (diff); 711 if ((diff = a->port[1] - b->port[1]) != 0) 712 return (diff); 713 if ((diff = a->rdomain - b->rdomain) != 0) 714 return (diff); 715 return (0); 716 } 717 718 static inline int 719 pf_state_compare_id(const struct pf_state *a, const struct pf_state *b) 720 { 721 if (a->id > b->id) 722 return (1); 723 if (a->id < b->id) 724 return (-1); 725 if (a->creatorid > b->creatorid) 726 return (1); 727 if (a->creatorid < b->creatorid) 728 return (-1); 729 730 return (0); 731 } 732 733 /* 734 * on failure, pf_state_key_attach() releases the pf_state_key 735 * reference and returns NULL. 736 */ 737 struct pf_state_key * 738 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *st, int idx) 739 { 740 struct pf_state_item *si; 741 struct pf_state_key *cur; 742 struct pf_state *oldst = NULL; 743 744 PF_ASSERT_LOCKED(); 745 746 KASSERT(st->key[idx] == NULL); 747 sk->sk_removed = 0; 748 cur = RBT_INSERT(pf_state_tree, &pf_statetbl, sk); 749 if (cur != NULL) { 750 sk->sk_removed = 1; 751 /* key exists. check for same kif, if none, add to key */ 752 TAILQ_FOREACH(si, &cur->sk_states, si_entry) { 753 struct pf_state *sist = si->si_st; 754 if (sist->kif == st->kif && 755 ((sist->key[PF_SK_WIRE]->af == sk->af && 756 sist->direction == st->direction) || 757 (sist->key[PF_SK_WIRE]->af != 758 sist->key[PF_SK_STACK]->af && 759 sk->af == sist->key[PF_SK_STACK]->af && 760 sist->direction != st->direction))) { 761 int reuse = 0; 762 763 if (sk->proto == IPPROTO_TCP && 764 sist->src.state >= TCPS_FIN_WAIT_2 && 765 sist->dst.state >= TCPS_FIN_WAIT_2) 766 reuse = 1; 767 if (pf_status.debug >= LOG_NOTICE) { 768 log(LOG_NOTICE, 769 "pf: %s key attach %s on %s: ", 770 (idx == PF_SK_WIRE) ? 771 "wire" : "stack", 772 reuse ? "reuse" : "failed", 773 st->kif->pfik_name); 774 pf_print_state_parts(st, 775 (idx == PF_SK_WIRE) ? sk : NULL, 776 (idx == PF_SK_STACK) ? sk : NULL); 777 addlog(", existing: "); 778 pf_print_state_parts(sist, 779 (idx == PF_SK_WIRE) ? sk : NULL, 780 (idx == PF_SK_STACK) ? sk : NULL); 781 addlog("\n"); 782 } 783 if (reuse) { 784 pf_set_protostate(sist, PF_PEER_BOTH, 785 TCPS_CLOSED); 786 /* remove late or sks can go away */ 787 oldst = sist; 788 } else { 789 pf_state_key_unref(sk); 790 return (NULL); /* collision! */ 791 } 792 } 793 } 794 795 /* reuse the existing state key */ 796 pf_state_key_unref(sk); 797 sk = cur; 798 } 799 800 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 801 if (TAILQ_EMPTY(&sk->sk_states)) { 802 KASSERT(cur == NULL); 803 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk); 804 sk->sk_removed = 1; 805 pf_state_key_unref(sk); 806 } 807 808 return (NULL); 809 } 810 811 st->key[idx] = pf_state_key_ref(sk); /* give a ref to state */ 812 si->si_st = pf_state_ref(st); 813 814 /* list is sorted, if-bound states before floating */ 815 if (st->kif == pfi_all) 816 TAILQ_INSERT_TAIL(&sk->sk_states, si, si_entry); 817 else 818 TAILQ_INSERT_HEAD(&sk->sk_states, si, si_entry); 819 820 if (oldst) 821 pf_remove_state(oldst); 822 823 /* caller owns the pf_state ref, which owns a pf_state_key ref now */ 824 return (sk); 825 } 826 827 void 828 pf_detach_state(struct pf_state *st) 829 { 830 KASSERT(st->key[PF_SK_WIRE] != NULL); 831 pf_state_key_detach(st, PF_SK_WIRE); 832 833 KASSERT(st->key[PF_SK_STACK] != NULL); 834 if (st->key[PF_SK_STACK] != st->key[PF_SK_WIRE]) 835 pf_state_key_detach(st, PF_SK_STACK); 836 } 837 838 void 839 pf_state_key_detach(struct pf_state *st, int idx) 840 { 841 struct pf_state_item *si; 842 struct pf_state_key *sk; 843 844 PF_ASSERT_LOCKED(); 845 846 sk = st->key[idx]; 847 if (sk == NULL) 848 return; 849 850 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 851 if (si->si_st == st) 852 break; 853 } 854 if (si == NULL) 855 return; 856 857 TAILQ_REMOVE(&sk->sk_states, si, si_entry); 858 pool_put(&pf_state_item_pl, si); 859 860 if (TAILQ_EMPTY(&sk->sk_states)) { 861 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk); 862 sk->sk_removed = 1; 863 pf_state_key_unlink_reverse(sk); 864 pf_state_key_unlink_inpcb(sk); 865 pf_state_key_unref(sk); 866 } 867 868 pf_state_unref(st); 869 } 870 871 struct pf_state_key * 872 pf_alloc_state_key(int pool_flags) 873 { 874 struct pf_state_key *sk; 875 876 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 877 return (NULL); 878 879 PF_REF_INIT(sk->sk_refcnt); 880 TAILQ_INIT(&sk->sk_states); 881 sk->sk_removed = 1; 882 883 return (sk); 884 } 885 886 static __inline int 887 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 888 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 889 { 890 struct pf_state_key_cmp *key = arg; 891 #ifdef INET6 892 struct pf_addr *target; 893 894 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 895 goto copy; 896 897 switch (pd->hdr.icmp6.icmp6_type) { 898 case ND_NEIGHBOR_SOLICIT: 899 if (multi) 900 return (-1); 901 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 902 daddr = target; 903 break; 904 case ND_NEIGHBOR_ADVERT: 905 if (multi) 906 return (-1); 907 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 908 saddr = target; 909 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 910 key->addr[didx].addr32[0] = 0; 911 key->addr[didx].addr32[1] = 0; 912 key->addr[didx].addr32[2] = 0; 913 key->addr[didx].addr32[3] = 0; 914 daddr = NULL; /* overwritten */ 915 } 916 break; 917 default: 918 if (multi) { 919 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 920 key->addr[sidx].addr32[1] = 0; 921 key->addr[sidx].addr32[2] = 0; 922 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 923 saddr = NULL; /* overwritten */ 924 } 925 } 926 copy: 927 #endif /* INET6 */ 928 if (saddr) 929 pf_addrcpy(&key->addr[sidx], saddr, af); 930 if (daddr) 931 pf_addrcpy(&key->addr[didx], daddr, af); 932 933 return (0); 934 } 935 936 int 937 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 938 struct pf_state_key **sks, int rtableid) 939 { 940 /* if returning error we MUST pool_put state keys ourselves */ 941 struct pf_state_key *sk1, *sk2; 942 u_int wrdom = pd->rdomain; 943 int afto = pd->af != pd->naf; 944 945 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 946 return (ENOMEM); 947 948 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 949 pd->af, 0); 950 sk1->port[pd->sidx] = pd->osport; 951 sk1->port[pd->didx] = pd->odport; 952 sk1->proto = pd->proto; 953 sk1->af = pd->af; 954 sk1->rdomain = pd->rdomain; 955 sk1->hash = pf_pkt_hash(sk1->af, sk1->proto, 956 &sk1->addr[0], &sk1->addr[1], sk1->port[0], sk1->port[1]); 957 if (rtableid >= 0) 958 wrdom = rtable_l2(rtableid); 959 960 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 961 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 962 pd->nsport != pd->osport || pd->ndport != pd->odport || 963 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 964 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 965 pf_state_key_unref(sk1); 966 return (ENOMEM); 967 } 968 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 969 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 970 pd->naf, 0); 971 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 972 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 973 if (afto) { 974 switch (pd->proto) { 975 case IPPROTO_ICMP: 976 sk2->proto = IPPROTO_ICMPV6; 977 break; 978 case IPPROTO_ICMPV6: 979 sk2->proto = IPPROTO_ICMP; 980 break; 981 default: 982 sk2->proto = pd->proto; 983 } 984 } else 985 sk2->proto = pd->proto; 986 sk2->af = pd->naf; 987 sk2->rdomain = wrdom; 988 sk2->hash = pf_pkt_hash(sk2->af, sk2->proto, 989 &sk2->addr[0], &sk2->addr[1], sk2->port[0], sk2->port[1]); 990 } else 991 sk2 = pf_state_key_ref(sk1); 992 993 if (pd->dir == PF_IN) { 994 *skw = sk1; 995 *sks = sk2; 996 } else { 997 *sks = sk1; 998 *skw = sk2; 999 } 1000 1001 if (pf_status.debug >= LOG_DEBUG) { 1002 log(LOG_DEBUG, "pf: key setup: "); 1003 pf_print_state_parts(NULL, *skw, *sks); 1004 addlog("\n"); 1005 } 1006 1007 return (0); 1008 } 1009 1010 /* 1011 * pf_state_insert() does the following: 1012 * - links the pf_state up with pf_state_key(s). 1013 * - inserts the pf_state_keys into pf_state_tree. 1014 * - inserts the pf_state into the into pf_state_tree_id. 1015 * - tells pfsync about the state. 1016 * 1017 * pf_state_insert() owns the references to the pf_state_key structs 1018 * it is given. on failure to insert, these references are released. 1019 * on success, the caller owns a pf_state reference that allows it 1020 * to access the state keys. 1021 */ 1022 1023 int 1024 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp, 1025 struct pf_state_key **sksp, struct pf_state *st) 1026 { 1027 struct pf_state_key *skw = *skwp; 1028 struct pf_state_key *sks = *sksp; 1029 int same = (skw == sks); 1030 1031 PF_ASSERT_LOCKED(); 1032 1033 st->kif = kif; 1034 PF_STATE_ENTER_WRITE(); 1035 1036 skw = pf_state_key_attach(skw, st, PF_SK_WIRE); 1037 if (skw == NULL) { 1038 pf_state_key_unref(sks); 1039 PF_STATE_EXIT_WRITE(); 1040 return (-1); 1041 } 1042 1043 if (same) { 1044 /* pf_state_key_attach might have swapped skw */ 1045 pf_state_key_unref(sks); 1046 st->key[PF_SK_STACK] = sks = pf_state_key_ref(skw); 1047 } else if (pf_state_key_attach(sks, st, PF_SK_STACK) == NULL) { 1048 pf_state_key_detach(st, PF_SK_WIRE); 1049 PF_STATE_EXIT_WRITE(); 1050 return (-1); 1051 } 1052 1053 if (st->id == 0 && st->creatorid == 0) { 1054 st->id = htobe64(pf_status.stateid++); 1055 st->creatorid = pf_status.hostid; 1056 } 1057 if (RBT_INSERT(pf_state_tree_id, &tree_id, st) != NULL) { 1058 if (pf_status.debug >= LOG_NOTICE) { 1059 log(LOG_NOTICE, "pf: state insert failed: " 1060 "id: %016llx creatorid: %08x", 1061 betoh64(st->id), ntohl(st->creatorid)); 1062 addlog("\n"); 1063 } 1064 pf_detach_state(st); 1065 PF_STATE_EXIT_WRITE(); 1066 return (-1); 1067 } 1068 pf_state_list_insert(&pf_state_list, st); 1069 pf_status.fcounters[FCNT_STATE_INSERT]++; 1070 pf_status.states++; 1071 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 1072 PF_STATE_EXIT_WRITE(); 1073 1074 #if NPFSYNC > 0 1075 pfsync_insert_state(st); 1076 #endif /* NPFSYNC > 0 */ 1077 1078 *skwp = skw; 1079 *sksp = sks; 1080 1081 return (0); 1082 } 1083 1084 struct pf_state * 1085 pf_find_state_byid(struct pf_state_cmp *key) 1086 { 1087 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1088 1089 return (RBT_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 1090 } 1091 1092 int 1093 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 1094 struct pfi_kif *kif, u_int dir) 1095 { 1096 /* a (from hdr) and b (new) must be exact opposites of each other */ 1097 if (a->af == b->af && a->proto == b->proto && 1098 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1099 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1100 a->port[0] == b->port[1] && 1101 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1102 return (0); 1103 else { 1104 /* mismatch. must not happen. */ 1105 if (pf_status.debug >= LOG_ERR) { 1106 log(LOG_ERR, 1107 "pf: state key linking mismatch! dir=%s, " 1108 "if=%s, stored af=%u, a0: ", 1109 dir == PF_OUT ? "OUT" : "IN", 1110 kif->pfik_name, a->af); 1111 pf_print_host(&a->addr[0], a->port[0], a->af); 1112 addlog(", a1: "); 1113 pf_print_host(&a->addr[1], a->port[1], a->af); 1114 addlog(", proto=%u", a->proto); 1115 addlog(", found af=%u, a0: ", b->af); 1116 pf_print_host(&b->addr[0], b->port[0], b->af); 1117 addlog(", a1: "); 1118 pf_print_host(&b->addr[1], b->port[1], b->af); 1119 addlog(", proto=%u", b->proto); 1120 addlog("\n"); 1121 } 1122 return (-1); 1123 } 1124 } 1125 1126 int 1127 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1128 struct pf_state **stp) 1129 { 1130 struct pf_state_key *sk, *pkt_sk; 1131 struct pf_state_item *si; 1132 struct pf_state *st = NULL; 1133 1134 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1135 if (pf_status.debug >= LOG_DEBUG) { 1136 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1137 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1138 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1139 addlog("\n"); 1140 } 1141 1142 pkt_sk = NULL; 1143 sk = NULL; 1144 if (pd->dir == PF_OUT) { 1145 /* first if block deals with outbound forwarded packet */ 1146 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1147 1148 if (!pf_state_key_isvalid(pkt_sk)) { 1149 pf_mbuf_unlink_state_key(pd->m); 1150 pkt_sk = NULL; 1151 } 1152 1153 if (pkt_sk && pf_state_key_isvalid(pkt_sk->sk_reverse)) 1154 sk = pkt_sk->sk_reverse; 1155 1156 if (pkt_sk == NULL) { 1157 struct inpcb *inp = pd->m->m_pkthdr.pf.inp; 1158 1159 /* here we deal with local outbound packet */ 1160 if (inp != NULL) { 1161 struct pf_state_key *inp_sk; 1162 1163 mtx_enter(&pf_inp_mtx); 1164 inp_sk = inp->inp_pf_sk; 1165 if (pf_state_key_isvalid(inp_sk)) { 1166 sk = inp_sk; 1167 mtx_leave(&pf_inp_mtx); 1168 } else if (inp_sk != NULL) { 1169 KASSERT(inp_sk->sk_inp == inp); 1170 inp_sk->sk_inp = NULL; 1171 inp->inp_pf_sk = NULL; 1172 mtx_leave(&pf_inp_mtx); 1173 1174 pf_state_key_unref(inp_sk); 1175 in_pcbunref(inp); 1176 } else 1177 mtx_leave(&pf_inp_mtx); 1178 } 1179 } 1180 } 1181 1182 if (sk == NULL) { 1183 if ((sk = RBT_FIND(pf_state_tree, &pf_statetbl, 1184 (struct pf_state_key *)key)) == NULL) 1185 return (PF_DROP); 1186 if (pd->dir == PF_OUT && pkt_sk && 1187 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1188 pf_state_key_link_reverse(sk, pkt_sk); 1189 else if (pd->dir == PF_OUT) 1190 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1191 } 1192 1193 /* remove firewall data from outbound packet */ 1194 if (pd->dir == PF_OUT) 1195 pf_pkt_addr_changed(pd->m); 1196 1197 /* list is sorted, if-bound states before floating ones */ 1198 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 1199 struct pf_state *sist = si->si_st; 1200 if (sist->timeout != PFTM_PURGE && 1201 (sist->kif == pfi_all || sist->kif == pd->kif) && 1202 ((sist->key[PF_SK_WIRE]->af == sist->key[PF_SK_STACK]->af && 1203 sk == (pd->dir == PF_IN ? sist->key[PF_SK_WIRE] : 1204 sist->key[PF_SK_STACK])) || 1205 (sist->key[PF_SK_WIRE]->af != sist->key[PF_SK_STACK]->af 1206 && pd->dir == PF_IN && (sk == sist->key[PF_SK_STACK] || 1207 sk == sist->key[PF_SK_WIRE])))) { 1208 st = sist; 1209 break; 1210 } 1211 } 1212 1213 if (st == NULL) 1214 return (PF_DROP); 1215 if (ISSET(st->state_flags, PFSTATE_INP_UNLINKED)) 1216 return (PF_DROP); 1217 1218 if (st->rule.ptr->pktrate.limit && pd->dir == st->direction) { 1219 pf_add_threshold(&st->rule.ptr->pktrate); 1220 if (pf_check_threshold(&st->rule.ptr->pktrate)) 1221 return (PF_DROP); 1222 } 1223 1224 *stp = st; 1225 1226 return (PF_MATCH); 1227 } 1228 1229 struct pf_state * 1230 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1231 { 1232 struct pf_state_key *sk; 1233 struct pf_state_item *si, *ret = NULL; 1234 1235 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1236 1237 sk = RBT_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1238 1239 if (sk != NULL) { 1240 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 1241 struct pf_state *sist = si->si_st; 1242 if (dir == PF_INOUT || 1243 (sk == (dir == PF_IN ? sist->key[PF_SK_WIRE] : 1244 sist->key[PF_SK_STACK]))) { 1245 if (more == NULL) 1246 return (sist); 1247 1248 if (ret) 1249 (*more)++; 1250 else 1251 ret = si; 1252 } 1253 } 1254 } 1255 return (ret ? ret->si_st : NULL); 1256 } 1257 1258 void 1259 pf_state_peer_hton(const struct pf_state_peer *s, struct pfsync_state_peer *d) 1260 { 1261 d->seqlo = htonl(s->seqlo); 1262 d->seqhi = htonl(s->seqhi); 1263 d->seqdiff = htonl(s->seqdiff); 1264 d->max_win = htons(s->max_win); 1265 d->mss = htons(s->mss); 1266 d->state = s->state; 1267 d->wscale = s->wscale; 1268 if (s->scrub) { 1269 d->scrub.pfss_flags = 1270 htons(s->scrub->pfss_flags & PFSS_TIMESTAMP); 1271 d->scrub.pfss_ttl = (s)->scrub->pfss_ttl; 1272 d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod); 1273 d->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; 1274 } 1275 } 1276 1277 void 1278 pf_state_peer_ntoh(const struct pfsync_state_peer *s, struct pf_state_peer *d) 1279 { 1280 d->seqlo = ntohl(s->seqlo); 1281 d->seqhi = ntohl(s->seqhi); 1282 d->seqdiff = ntohl(s->seqdiff); 1283 d->max_win = ntohs(s->max_win); 1284 d->mss = ntohs(s->mss); 1285 d->state = s->state; 1286 d->wscale = s->wscale; 1287 if (s->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && 1288 d->scrub != NULL) { 1289 d->scrub->pfss_flags = 1290 ntohs(s->scrub.pfss_flags) & PFSS_TIMESTAMP; 1291 d->scrub->pfss_ttl = s->scrub.pfss_ttl; 1292 d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod); 1293 } 1294 } 1295 1296 void 1297 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1298 { 1299 int32_t expire; 1300 1301 memset(sp, 0, sizeof(struct pfsync_state)); 1302 1303 /* copy from state key */ 1304 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1305 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1306 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1307 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1308 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1309 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1310 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1311 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1312 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1313 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1314 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1315 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1316 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1317 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1318 sp->proto = st->key[PF_SK_WIRE]->proto; 1319 sp->af = st->key[PF_SK_WIRE]->af; 1320 1321 /* copy from state */ 1322 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1323 sp->rt = st->rt; 1324 sp->rt_addr = st->rt_addr; 1325 sp->creation = htonl(getuptime() - st->creation); 1326 expire = pf_state_expires(st, st->timeout); 1327 if (expire <= getuptime()) 1328 sp->expire = htonl(0); 1329 else 1330 sp->expire = htonl(expire - getuptime()); 1331 1332 sp->direction = st->direction; 1333 #if NPFLOG > 0 1334 sp->log = st->log; 1335 #endif /* NPFLOG > 0 */ 1336 sp->timeout = st->timeout; 1337 sp->state_flags = htons(st->state_flags); 1338 if (READ_ONCE(st->sync_defer) != NULL) 1339 sp->state_flags |= htons(PFSTATE_ACK); 1340 if (!SLIST_EMPTY(&st->src_nodes)) 1341 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1342 1343 sp->id = st->id; 1344 sp->creatorid = st->creatorid; 1345 pf_state_peer_hton(&st->src, &sp->src); 1346 pf_state_peer_hton(&st->dst, &sp->dst); 1347 1348 if (st->rule.ptr == NULL) 1349 sp->rule = htonl(-1); 1350 else 1351 sp->rule = htonl(st->rule.ptr->nr); 1352 if (st->anchor.ptr == NULL) 1353 sp->anchor = htonl(-1); 1354 else 1355 sp->anchor = htonl(st->anchor.ptr->nr); 1356 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1357 1358 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1359 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1360 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1361 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1362 1363 sp->max_mss = htons(st->max_mss); 1364 sp->min_ttl = st->min_ttl; 1365 sp->set_tos = st->set_tos; 1366 sp->set_prio[0] = st->set_prio[0]; 1367 sp->set_prio[1] = st->set_prio[1]; 1368 } 1369 1370 int 1371 pf_state_alloc_scrub_memory(const struct pfsync_state_peer *s, 1372 struct pf_state_peer *d) 1373 { 1374 if (s->scrub.scrub_flag && d->scrub == NULL) 1375 return (pf_normalize_tcp_alloc(d)); 1376 1377 return (0); 1378 } 1379 1380 #if NPFSYNC > 0 1381 int 1382 pf_state_import(const struct pfsync_state *sp, int flags) 1383 { 1384 struct pf_state *st = NULL; 1385 struct pf_state_key *skw = NULL, *sks = NULL; 1386 struct pf_rule *r = NULL; 1387 struct pfi_kif *kif; 1388 int pool_flags; 1389 int error = ENOMEM; 1390 int n = 0; 1391 1392 PF_ASSERT_LOCKED(); 1393 1394 if (sp->creatorid == 0) { 1395 DPFPRINTF(LOG_NOTICE, "%s: invalid creator id: %08x", __func__, 1396 ntohl(sp->creatorid)); 1397 return (EINVAL); 1398 } 1399 1400 if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) { 1401 DPFPRINTF(LOG_NOTICE, "%s: unknown interface: %s", __func__, 1402 sp->ifname); 1403 if (flags & PFSYNC_SI_IOCTL) 1404 return (EINVAL); 1405 return (0); /* skip this state */ 1406 } 1407 1408 if (sp->af == 0) 1409 return (0); /* skip this state */ 1410 1411 /* 1412 * If the ruleset checksums match or the state is coming from the ioctl, 1413 * it's safe to associate the state with the rule of that number. 1414 */ 1415 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 1416 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && 1417 ntohl(sp->rule) < pf_main_ruleset.rules.active.rcount) { 1418 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries) 1419 if (ntohl(sp->rule) == n++) 1420 break; 1421 } else 1422 r = &pf_default_rule; 1423 1424 if ((r->max_states && r->states_cur >= r->max_states)) 1425 goto cleanup; 1426 1427 if (flags & PFSYNC_SI_IOCTL) 1428 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 1429 else 1430 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 1431 1432 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 1433 goto cleanup; 1434 1435 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 1436 goto cleanup; 1437 1438 if ((sp->key[PF_SK_WIRE].af && 1439 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 1440 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 1441 &sp->key[PF_SK_STACK].addr[0], sp->af) || 1442 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 1443 &sp->key[PF_SK_STACK].addr[1], sp->af) || 1444 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 1445 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 1446 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 1447 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 1448 goto cleanup; 1449 } else 1450 sks = pf_state_key_ref(skw); 1451 1452 /* allocate memory for scrub info */ 1453 if (pf_state_alloc_scrub_memory(&sp->src, &st->src) || 1454 pf_state_alloc_scrub_memory(&sp->dst, &st->dst)) 1455 goto cleanup; 1456 1457 /* copy to state key(s) */ 1458 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 1459 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 1460 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 1461 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 1462 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 1463 skw->proto = sp->proto; 1464 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 1465 skw->af = sp->af; 1466 skw->hash = pf_pkt_hash(skw->af, skw->proto, 1467 &skw->addr[0], &skw->addr[1], skw->port[0], skw->port[1]); 1468 1469 if (sks != skw) { 1470 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 1471 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 1472 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 1473 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 1474 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 1475 if (!(sks->af = sp->key[PF_SK_STACK].af)) 1476 sks->af = sp->af; 1477 if (sks->af != skw->af) { 1478 switch (sp->proto) { 1479 case IPPROTO_ICMP: 1480 sks->proto = IPPROTO_ICMPV6; 1481 break; 1482 case IPPROTO_ICMPV6: 1483 sks->proto = IPPROTO_ICMP; 1484 break; 1485 default: 1486 sks->proto = sp->proto; 1487 } 1488 } else 1489 sks->proto = sp->proto; 1490 1491 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) || 1492 ((skw->af != AF_INET) && (skw->af != AF_INET6))) { 1493 error = EINVAL; 1494 goto cleanup; 1495 } 1496 1497 sks->hash = pf_pkt_hash(sks->af, sks->proto, 1498 &sks->addr[0], &sks->addr[1], sks->port[0], sks->port[1]); 1499 1500 } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { 1501 error = EINVAL; 1502 goto cleanup; 1503 } 1504 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 1505 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 1506 1507 /* copy to state */ 1508 st->rt_addr = sp->rt_addr; 1509 st->rt = sp->rt; 1510 st->creation = getuptime() - ntohl(sp->creation); 1511 st->expire = getuptime(); 1512 if (ntohl(sp->expire)) { 1513 u_int32_t timeout; 1514 1515 timeout = r->timeout[sp->timeout]; 1516 if (!timeout) 1517 timeout = pf_default_rule.timeout[sp->timeout]; 1518 1519 /* sp->expire may have been adaptively scaled by export. */ 1520 st->expire -= timeout - ntohl(sp->expire); 1521 } 1522 1523 st->direction = sp->direction; 1524 st->log = sp->log; 1525 st->timeout = sp->timeout; 1526 st->state_flags = ntohs(sp->state_flags); 1527 st->max_mss = ntohs(sp->max_mss); 1528 st->min_ttl = sp->min_ttl; 1529 st->set_tos = sp->set_tos; 1530 st->set_prio[0] = sp->set_prio[0]; 1531 st->set_prio[1] = sp->set_prio[1]; 1532 1533 st->id = sp->id; 1534 st->creatorid = sp->creatorid; 1535 pf_state_peer_ntoh(&sp->src, &st->src); 1536 pf_state_peer_ntoh(&sp->dst, &st->dst); 1537 1538 st->rule.ptr = r; 1539 st->anchor.ptr = NULL; 1540 1541 PF_REF_INIT(st->refcnt); 1542 mtx_init(&st->mtx, IPL_NET); 1543 1544 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 1545 r->states_cur++; 1546 r->states_tot++; 1547 1548 st->sync_state = PFSYNC_S_NONE; 1549 st->pfsync_time = getuptime(); 1550 #if NPFSYNC > 0 1551 pfsync_init_state(st, skw, sks, flags); 1552 #endif 1553 1554 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 1555 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 1556 r->states_cur--; 1557 error = EEXIST; 1558 goto cleanup_state; 1559 } 1560 1561 return (0); 1562 1563 cleanup: 1564 if (skw != NULL) 1565 pf_state_key_unref(skw); 1566 if (sks != NULL) 1567 pf_state_key_unref(sks); 1568 1569 cleanup_state: /* pf_state_insert frees the state keys */ 1570 if (st) { 1571 if (st->dst.scrub) 1572 pool_put(&pf_state_scrub_pl, st->dst.scrub); 1573 if (st->src.scrub) 1574 pool_put(&pf_state_scrub_pl, st->src.scrub); 1575 pool_put(&pf_state_pl, st); 1576 } 1577 return (error); 1578 } 1579 #endif /* NPFSYNC > 0 */ 1580 1581 /* END state table stuff */ 1582 1583 void pf_purge_states(void *); 1584 struct task pf_purge_states_task = 1585 TASK_INITIALIZER(pf_purge_states, NULL); 1586 1587 void pf_purge_states_tick(void *); 1588 struct timeout pf_purge_states_to = 1589 TIMEOUT_INITIALIZER(pf_purge_states_tick, NULL); 1590 1591 unsigned int pf_purge_expired_states(unsigned int, unsigned int); 1592 1593 /* 1594 * how many states to scan this interval. 1595 * 1596 * this is set when the timeout fires, and reduced by the task. the 1597 * task will reschedule itself until the limit is reduced to zero, 1598 * and then it adds the timeout again. 1599 */ 1600 unsigned int pf_purge_states_limit; 1601 1602 /* 1603 * limit how many states are processed with locks held per run of 1604 * the state purge task. 1605 */ 1606 unsigned int pf_purge_states_collect = 64; 1607 1608 void 1609 pf_purge_states_tick(void *null) 1610 { 1611 unsigned int limit = pf_status.states; 1612 unsigned int interval = pf_default_rule.timeout[PFTM_INTERVAL]; 1613 1614 if (limit == 0) { 1615 timeout_add_sec(&pf_purge_states_to, 1); 1616 return; 1617 } 1618 1619 /* 1620 * process a fraction of the state table every second 1621 */ 1622 1623 if (interval > 1) 1624 limit /= interval; 1625 1626 pf_purge_states_limit = limit; 1627 task_add(systqmp, &pf_purge_states_task); 1628 } 1629 1630 void 1631 pf_purge_states(void *null) 1632 { 1633 unsigned int limit; 1634 unsigned int scanned; 1635 1636 limit = pf_purge_states_limit; 1637 if (limit < pf_purge_states_collect) 1638 limit = pf_purge_states_collect; 1639 1640 scanned = pf_purge_expired_states(limit, pf_purge_states_collect); 1641 if (scanned >= pf_purge_states_limit) { 1642 /* we've run out of states to scan this "interval" */ 1643 timeout_add_sec(&pf_purge_states_to, 1); 1644 return; 1645 } 1646 1647 pf_purge_states_limit -= scanned; 1648 task_add(systqmp, &pf_purge_states_task); 1649 } 1650 1651 void pf_purge_tick(void *); 1652 struct timeout pf_purge_to = 1653 TIMEOUT_INITIALIZER(pf_purge_tick, NULL); 1654 1655 void pf_purge(void *); 1656 struct task pf_purge_task = 1657 TASK_INITIALIZER(pf_purge, NULL); 1658 1659 void 1660 pf_purge_tick(void *null) 1661 { 1662 task_add(systqmp, &pf_purge_task); 1663 } 1664 1665 void 1666 pf_purge(void *null) 1667 { 1668 unsigned int interval = max(1, pf_default_rule.timeout[PFTM_INTERVAL]); 1669 1670 PF_LOCK(); 1671 1672 pf_purge_expired_src_nodes(); 1673 1674 PF_UNLOCK(); 1675 1676 /* 1677 * Fragments don't require PF_LOCK(), they use their own lock. 1678 */ 1679 pf_purge_expired_fragments(); 1680 1681 /* interpret the interval as idle time between runs */ 1682 timeout_add_sec(&pf_purge_to, interval); 1683 } 1684 1685 int32_t 1686 pf_state_expires(const struct pf_state *st, uint8_t stimeout) 1687 { 1688 u_int32_t timeout; 1689 u_int32_t start; 1690 u_int32_t end; 1691 u_int32_t states; 1692 1693 /* 1694 * pf_state_expires is used by the state purge task to 1695 * decide if a state is a candidate for cleanup, and by the 1696 * pfsync state export code to populate an expiry time. 1697 * 1698 * this function may be called by the state purge task while 1699 * the state is being modified. avoid inconsistent reads of 1700 * state->timeout by having the caller do the read (and any 1701 * checks it needs to do on the same variable) and then pass 1702 * their view of the timeout in here for this function to use. 1703 * the only consequence of using a stale timeout value is 1704 * that the state won't be a candidate for purging until the 1705 * next pass of the purge task. 1706 */ 1707 1708 /* handle all PFTM_* >= PFTM_MAX here */ 1709 if (stimeout >= PFTM_MAX) 1710 return (0); 1711 1712 KASSERT(stimeout < PFTM_MAX); 1713 1714 timeout = st->rule.ptr->timeout[stimeout]; 1715 if (!timeout) 1716 timeout = pf_default_rule.timeout[stimeout]; 1717 1718 start = st->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1719 if (start) { 1720 end = st->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1721 states = st->rule.ptr->states_cur; 1722 } else { 1723 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1724 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1725 states = pf_status.states; 1726 } 1727 if (end && states > start && start < end) { 1728 if (states >= end) 1729 return (0); 1730 1731 timeout = (u_int64_t)timeout * (end - states) / (end - start); 1732 } 1733 1734 return (st->expire + timeout); 1735 } 1736 1737 void 1738 pf_purge_expired_src_nodes(void) 1739 { 1740 struct pf_src_node *cur, *next; 1741 1742 PF_ASSERT_LOCKED(); 1743 1744 RB_FOREACH_SAFE(cur, pf_src_tree, &tree_src_tracking, next) { 1745 if (cur->states == 0 && cur->expire <= getuptime()) { 1746 pf_remove_src_node(cur); 1747 } 1748 } 1749 } 1750 1751 void 1752 pf_src_tree_remove_state(struct pf_state *st) 1753 { 1754 u_int32_t timeout; 1755 struct pf_sn_item *sni; 1756 1757 while ((sni = SLIST_FIRST(&st->src_nodes)) != NULL) { 1758 SLIST_REMOVE_HEAD(&st->src_nodes, next); 1759 if (st->src.tcp_est) 1760 --sni->sn->conn; 1761 if (--sni->sn->states == 0) { 1762 timeout = st->rule.ptr->timeout[PFTM_SRC_NODE]; 1763 if (!timeout) 1764 timeout = 1765 pf_default_rule.timeout[PFTM_SRC_NODE]; 1766 sni->sn->expire = getuptime() + timeout; 1767 } 1768 pool_put(&pf_sn_item_pl, sni); 1769 } 1770 } 1771 1772 void 1773 pf_remove_state(struct pf_state *st) 1774 { 1775 PF_ASSERT_LOCKED(); 1776 1777 mtx_enter(&st->mtx); 1778 if (st->timeout == PFTM_UNLINKED) { 1779 mtx_leave(&st->mtx); 1780 return; 1781 } 1782 st->timeout = PFTM_UNLINKED; 1783 mtx_leave(&st->mtx); 1784 1785 /* handle load balancing related tasks */ 1786 pf_postprocess_addr(st); 1787 1788 if (st->src.state == PF_TCPS_PROXY_DST) { 1789 pf_send_tcp(st->rule.ptr, st->key[PF_SK_WIRE]->af, 1790 &st->key[PF_SK_WIRE]->addr[1], 1791 &st->key[PF_SK_WIRE]->addr[0], 1792 st->key[PF_SK_WIRE]->port[1], 1793 st->key[PF_SK_WIRE]->port[0], 1794 st->src.seqhi, st->src.seqlo + 1, 1795 TH_RST|TH_ACK, 0, 0, 0, 1, st->tag, 1796 st->key[PF_SK_WIRE]->rdomain); 1797 } 1798 if (st->key[PF_SK_STACK]->proto == IPPROTO_TCP) 1799 pf_set_protostate(st, PF_PEER_BOTH, TCPS_CLOSED); 1800 1801 RBT_REMOVE(pf_state_tree_id, &tree_id, st); 1802 #if NPFLOW > 0 1803 if (st->state_flags & PFSTATE_PFLOW) 1804 export_pflow(st); 1805 #endif /* NPFLOW > 0 */ 1806 #if NPFSYNC > 0 1807 pfsync_delete_state(st); 1808 #endif /* NPFSYNC > 0 */ 1809 pf_src_tree_remove_state(st); 1810 pf_detach_state(st); 1811 } 1812 1813 void 1814 pf_remove_divert_state(struct inpcb *inp) 1815 { 1816 struct pf_state_key *sk; 1817 struct pf_state_item *si; 1818 1819 PF_ASSERT_UNLOCKED(); 1820 1821 if (READ_ONCE(inp->inp_pf_sk) == NULL) 1822 return; 1823 1824 mtx_enter(&pf_inp_mtx); 1825 sk = pf_state_key_ref(inp->inp_pf_sk); 1826 mtx_leave(&pf_inp_mtx); 1827 if (sk == NULL) 1828 return; 1829 1830 PF_LOCK(); 1831 PF_STATE_ENTER_WRITE(); 1832 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 1833 struct pf_state *sist = si->si_st; 1834 if (sk == sist->key[PF_SK_STACK] && sist->rule.ptr && 1835 (sist->rule.ptr->divert.type == PF_DIVERT_TO || 1836 sist->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 1837 if (sist->key[PF_SK_STACK]->proto == IPPROTO_TCP && 1838 sist->key[PF_SK_WIRE] != sist->key[PF_SK_STACK]) { 1839 /* 1840 * If the local address is translated, keep 1841 * the state for "tcp.closed" seconds to 1842 * prevent its source port from being reused. 1843 */ 1844 if (sist->src.state < TCPS_FIN_WAIT_2 || 1845 sist->dst.state < TCPS_FIN_WAIT_2) { 1846 pf_set_protostate(sist, PF_PEER_BOTH, 1847 TCPS_TIME_WAIT); 1848 pf_update_state_timeout(sist, 1849 PFTM_TCP_CLOSED); 1850 sist->expire = getuptime(); 1851 } 1852 sist->state_flags |= PFSTATE_INP_UNLINKED; 1853 } else 1854 pf_remove_state(sist); 1855 break; 1856 } 1857 } 1858 PF_STATE_EXIT_WRITE(); 1859 PF_UNLOCK(); 1860 1861 pf_state_key_unref(sk); 1862 } 1863 1864 void 1865 pf_free_state(struct pf_state *st) 1866 { 1867 struct pf_rule_item *ri; 1868 1869 PF_ASSERT_LOCKED(); 1870 1871 #if NPFSYNC > 0 1872 if (pfsync_state_in_use(st)) 1873 return; 1874 #endif /* NPFSYNC > 0 */ 1875 1876 KASSERT(st->timeout == PFTM_UNLINKED); 1877 if (--st->rule.ptr->states_cur == 0 && 1878 st->rule.ptr->src_nodes == 0) 1879 pf_rm_rule(NULL, st->rule.ptr); 1880 if (st->anchor.ptr != NULL) 1881 if (--st->anchor.ptr->states_cur == 0) 1882 pf_rm_rule(NULL, st->anchor.ptr); 1883 while ((ri = SLIST_FIRST(&st->match_rules))) { 1884 SLIST_REMOVE_HEAD(&st->match_rules, entry); 1885 if (--ri->r->states_cur == 0 && 1886 ri->r->src_nodes == 0) 1887 pf_rm_rule(NULL, ri->r); 1888 pool_put(&pf_rule_item_pl, ri); 1889 } 1890 pf_normalize_tcp_cleanup(st); 1891 pfi_kif_unref(st->kif, PFI_KIF_REF_STATE); 1892 pf_state_list_remove(&pf_state_list, st); 1893 if (st->tag) 1894 pf_tag_unref(st->tag); 1895 pf_state_unref(st); 1896 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1897 pf_status.states--; 1898 } 1899 1900 unsigned int 1901 pf_purge_expired_states(const unsigned int limit, const unsigned int collect) 1902 { 1903 /* 1904 * this task/thread/context/whatever is the only thing that 1905 * removes states from the pf_state_list, so the cur reference 1906 * it holds between calls is guaranteed to still be in the 1907 * list. 1908 */ 1909 static struct pf_state *cur = NULL; 1910 1911 struct pf_state *head, *tail; 1912 struct pf_state *st; 1913 SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl); 1914 time_t now; 1915 unsigned int scanned; 1916 unsigned int collected = 0; 1917 1918 PF_ASSERT_UNLOCKED(); 1919 1920 rw_enter_read(&pf_state_list.pfs_rwl); 1921 1922 mtx_enter(&pf_state_list.pfs_mtx); 1923 head = TAILQ_FIRST(&pf_state_list.pfs_list); 1924 tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 1925 mtx_leave(&pf_state_list.pfs_mtx); 1926 1927 if (head == NULL) { 1928 /* the list is empty */ 1929 rw_exit_read(&pf_state_list.pfs_rwl); 1930 return (limit); 1931 } 1932 1933 /* (re)start at the front of the list */ 1934 if (cur == NULL) 1935 cur = head; 1936 1937 now = getuptime(); 1938 1939 for (scanned = 0; scanned < limit; scanned++) { 1940 uint8_t stimeout = cur->timeout; 1941 unsigned int limited = 0; 1942 1943 if ((stimeout == PFTM_UNLINKED) || 1944 (pf_state_expires(cur, stimeout) <= now)) { 1945 st = pf_state_ref(cur); 1946 SLIST_INSERT_HEAD(&gcl, st, gc_list); 1947 1948 if (++collected >= collect) 1949 limited = 1; 1950 } 1951 1952 /* don't iterate past the end of our view of the list */ 1953 if (cur == tail) { 1954 cur = NULL; 1955 break; 1956 } 1957 1958 cur = TAILQ_NEXT(cur, entry_list); 1959 1960 /* don't spend too much time here. */ 1961 if (ISSET(READ_ONCE(curcpu()->ci_schedstate.spc_schedflags), 1962 SPCF_SHOULDYIELD) || limited) 1963 break; 1964 } 1965 1966 rw_exit_read(&pf_state_list.pfs_rwl); 1967 1968 if (SLIST_EMPTY(&gcl)) 1969 return (scanned); 1970 1971 rw_enter_write(&pf_state_list.pfs_rwl); 1972 PF_LOCK(); 1973 PF_STATE_ENTER_WRITE(); 1974 SLIST_FOREACH(st, &gcl, gc_list) { 1975 if (st->timeout != PFTM_UNLINKED) 1976 pf_remove_state(st); 1977 1978 pf_free_state(st); 1979 } 1980 PF_STATE_EXIT_WRITE(); 1981 PF_UNLOCK(); 1982 rw_exit_write(&pf_state_list.pfs_rwl); 1983 1984 while ((st = SLIST_FIRST(&gcl)) != NULL) { 1985 SLIST_REMOVE_HEAD(&gcl, gc_list); 1986 pf_state_unref(st); 1987 } 1988 1989 return (scanned); 1990 } 1991 1992 int 1993 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw, int wait) 1994 { 1995 if (aw->type != PF_ADDR_TABLE) 1996 return (0); 1997 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, wait)) == NULL) 1998 return (1); 1999 return (0); 2000 } 2001 2002 void 2003 pf_tbladdr_remove(struct pf_addr_wrap *aw) 2004 { 2005 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 2006 return; 2007 pfr_detach_table(aw->p.tbl); 2008 aw->p.tbl = NULL; 2009 } 2010 2011 void 2012 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 2013 { 2014 struct pfr_ktable *kt = aw->p.tbl; 2015 2016 if (aw->type != PF_ADDR_TABLE || kt == NULL) 2017 return; 2018 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 2019 kt = kt->pfrkt_root; 2020 aw->p.tbl = NULL; 2021 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 2022 kt->pfrkt_cnt : -1; 2023 } 2024 2025 void 2026 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 2027 { 2028 switch (af) { 2029 case AF_INET: { 2030 u_int32_t a = ntohl(addr->addr32[0]); 2031 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 2032 (a>>8)&255, a&255); 2033 if (p) { 2034 p = ntohs(p); 2035 addlog(":%u", p); 2036 } 2037 break; 2038 } 2039 #ifdef INET6 2040 case AF_INET6: { 2041 u_int16_t b; 2042 u_int8_t i, curstart, curend, maxstart, maxend; 2043 curstart = curend = maxstart = maxend = 255; 2044 for (i = 0; i < 8; i++) { 2045 if (!addr->addr16[i]) { 2046 if (curstart == 255) 2047 curstart = i; 2048 curend = i; 2049 } else { 2050 if ((curend - curstart) > 2051 (maxend - maxstart)) { 2052 maxstart = curstart; 2053 maxend = curend; 2054 } 2055 curstart = curend = 255; 2056 } 2057 } 2058 if ((curend - curstart) > 2059 (maxend - maxstart)) { 2060 maxstart = curstart; 2061 maxend = curend; 2062 } 2063 for (i = 0; i < 8; i++) { 2064 if (i >= maxstart && i <= maxend) { 2065 if (i == 0) 2066 addlog(":"); 2067 if (i == maxend) 2068 addlog(":"); 2069 } else { 2070 b = ntohs(addr->addr16[i]); 2071 addlog("%x", b); 2072 if (i < 7) 2073 addlog(":"); 2074 } 2075 } 2076 if (p) { 2077 p = ntohs(p); 2078 addlog("[%u]", p); 2079 } 2080 break; 2081 } 2082 #endif /* INET6 */ 2083 } 2084 } 2085 2086 void 2087 pf_print_state(struct pf_state *st) 2088 { 2089 pf_print_state_parts(st, NULL, NULL); 2090 } 2091 2092 void 2093 pf_print_state_parts(struct pf_state *st, 2094 struct pf_state_key *skwp, struct pf_state_key *sksp) 2095 { 2096 struct pf_state_key *skw, *sks; 2097 u_int8_t proto, dir; 2098 2099 /* Do our best to fill these, but they're skipped if NULL */ 2100 skw = skwp ? skwp : (st ? st->key[PF_SK_WIRE] : NULL); 2101 sks = sksp ? sksp : (st ? st->key[PF_SK_STACK] : NULL); 2102 proto = skw ? skw->proto : (sks ? sks->proto : 0); 2103 dir = st ? st->direction : 0; 2104 2105 switch (proto) { 2106 case IPPROTO_IPV4: 2107 addlog("IPv4"); 2108 break; 2109 case IPPROTO_IPV6: 2110 addlog("IPv6"); 2111 break; 2112 case IPPROTO_TCP: 2113 addlog("TCP"); 2114 break; 2115 case IPPROTO_UDP: 2116 addlog("UDP"); 2117 break; 2118 case IPPROTO_ICMP: 2119 addlog("ICMP"); 2120 break; 2121 case IPPROTO_ICMPV6: 2122 addlog("ICMPv6"); 2123 break; 2124 default: 2125 addlog("%u", proto); 2126 break; 2127 } 2128 switch (dir) { 2129 case PF_IN: 2130 addlog(" in"); 2131 break; 2132 case PF_OUT: 2133 addlog(" out"); 2134 break; 2135 } 2136 if (skw) { 2137 addlog(" wire: (%d) ", skw->rdomain); 2138 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 2139 addlog(" "); 2140 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 2141 } 2142 if (sks) { 2143 addlog(" stack: (%d) ", sks->rdomain); 2144 if (sks != skw) { 2145 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 2146 addlog(" "); 2147 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 2148 } else 2149 addlog("-"); 2150 } 2151 if (st) { 2152 if (proto == IPPROTO_TCP) { 2153 addlog(" [lo=%u high=%u win=%u modulator=%u", 2154 st->src.seqlo, st->src.seqhi, 2155 st->src.max_win, st->src.seqdiff); 2156 if (st->src.wscale && st->dst.wscale) 2157 addlog(" wscale=%u", 2158 st->src.wscale & PF_WSCALE_MASK); 2159 addlog("]"); 2160 addlog(" [lo=%u high=%u win=%u modulator=%u", 2161 st->dst.seqlo, st->dst.seqhi, 2162 st->dst.max_win, st->dst.seqdiff); 2163 if (st->src.wscale && st->dst.wscale) 2164 addlog(" wscale=%u", 2165 st->dst.wscale & PF_WSCALE_MASK); 2166 addlog("]"); 2167 } 2168 addlog(" %u:%u", st->src.state, st->dst.state); 2169 if (st->rule.ptr) 2170 addlog(" @%d", st->rule.ptr->nr); 2171 } 2172 } 2173 2174 void 2175 pf_print_flags(u_int8_t f) 2176 { 2177 if (f) 2178 addlog(" "); 2179 if (f & TH_FIN) 2180 addlog("F"); 2181 if (f & TH_SYN) 2182 addlog("S"); 2183 if (f & TH_RST) 2184 addlog("R"); 2185 if (f & TH_PUSH) 2186 addlog("P"); 2187 if (f & TH_ACK) 2188 addlog("A"); 2189 if (f & TH_URG) 2190 addlog("U"); 2191 if (f & TH_ECE) 2192 addlog("E"); 2193 if (f & TH_CWR) 2194 addlog("W"); 2195 } 2196 2197 #define PF_SET_SKIP_STEPS(i) \ 2198 do { \ 2199 while (head[i] != cur) { \ 2200 head[i]->skip[i].ptr = cur; \ 2201 head[i] = TAILQ_NEXT(head[i], entries); \ 2202 } \ 2203 } while (0) 2204 2205 void 2206 pf_calc_skip_steps(struct pf_rulequeue *rules) 2207 { 2208 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 2209 int i; 2210 2211 cur = TAILQ_FIRST(rules); 2212 prev = cur; 2213 for (i = 0; i < PF_SKIP_COUNT; ++i) 2214 head[i] = cur; 2215 while (cur != NULL) { 2216 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 2217 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 2218 if (cur->direction != prev->direction) 2219 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 2220 if (cur->onrdomain != prev->onrdomain || 2221 cur->ifnot != prev->ifnot) 2222 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 2223 if (cur->af != prev->af) 2224 PF_SET_SKIP_STEPS(PF_SKIP_AF); 2225 if (cur->proto != prev->proto) 2226 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 2227 if (cur->src.neg != prev->src.neg || 2228 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 2229 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 2230 if (cur->dst.neg != prev->dst.neg || 2231 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 2232 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 2233 if (cur->src.port[0] != prev->src.port[0] || 2234 cur->src.port[1] != prev->src.port[1] || 2235 cur->src.port_op != prev->src.port_op) 2236 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 2237 if (cur->dst.port[0] != prev->dst.port[0] || 2238 cur->dst.port[1] != prev->dst.port[1] || 2239 cur->dst.port_op != prev->dst.port_op) 2240 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 2241 2242 prev = cur; 2243 cur = TAILQ_NEXT(cur, entries); 2244 } 2245 for (i = 0; i < PF_SKIP_COUNT; ++i) 2246 PF_SET_SKIP_STEPS(i); 2247 } 2248 2249 int 2250 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 2251 { 2252 if (aw1->type != aw2->type) 2253 return (1); 2254 switch (aw1->type) { 2255 case PF_ADDR_ADDRMASK: 2256 case PF_ADDR_RANGE: 2257 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 2258 return (1); 2259 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 2260 return (1); 2261 return (0); 2262 case PF_ADDR_DYNIFTL: 2263 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 2264 case PF_ADDR_NONE: 2265 case PF_ADDR_NOROUTE: 2266 case PF_ADDR_URPFFAILED: 2267 return (0); 2268 case PF_ADDR_TABLE: 2269 return (aw1->p.tbl != aw2->p.tbl); 2270 case PF_ADDR_RTLABEL: 2271 return (aw1->v.rtlabel != aw2->v.rtlabel); 2272 default: 2273 addlog("invalid address type: %d\n", aw1->type); 2274 return (1); 2275 } 2276 } 2277 2278 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 2279 * emulate at most one ones-complement subtraction. This thereby limits net 2280 * carries/borrows to at most one, eliminating a reduction step and saving one 2281 * each of +, >>, & and ~. 2282 * 2283 * def. x mod y = x - (x//y)*y for integer x,y 2284 * def. sum = x mod 2^16 2285 * def. accumulator = (x >> 16) mod 2^16 2286 * 2287 * The trick works as follows: subtracting exactly one u_int16_t from the 2288 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 2289 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 2290 * ones-complement borrow: 2291 * 2292 * (sum + accumulator) mod 2^16 2293 * = { assume underflow: accumulator := 2^16 - 1 } 2294 * (sum + 2^16 - 1) mod 2^16 2295 * = { mod } 2296 * (sum - 1) mod 2^16 2297 * 2298 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 2299 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 2300 * to zero as that requires subtraction of at least 2^16, which exceeds a 2301 * single u_int16_t's range. 2302 * 2303 * We use the following theorem to derive the implementation: 2304 * 2305 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 2306 * proof. 2307 * (x + (y mod z)) mod z 2308 * = { def mod } 2309 * (x + y - (y//z)*z) mod z 2310 * = { (a + b*c) mod c = a mod c } 2311 * (x + y) mod z [end of proof] 2312 * 2313 * ... and thereby obtain: 2314 * 2315 * (sum + accumulator) mod 2^16 2316 * = { def. accumulator, def. sum } 2317 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 2318 * = { (0), twice } 2319 * (x + (x >> 16)) mod 2^16 2320 * = { x mod 2^n = x & (2^n - 1) } 2321 * (x + (x >> 16)) & 0xffff 2322 * 2323 * Note: this serves also as a reduction step for at most one add (as the 2324 * trailing mod 2^16 prevents further reductions by destroying carries). 2325 */ 2326 __inline void 2327 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 2328 u_int8_t proto) 2329 { 2330 u_int32_t x; 2331 const int udp = proto == IPPROTO_UDP; 2332 2333 x = *cksum + was - now; 2334 x = (x + (x >> 16)) & 0xffff; 2335 2336 /* optimise: eliminate a branch when not udp */ 2337 if (udp && *cksum == 0x0000) 2338 return; 2339 if (udp && x == 0x0000) 2340 x = 0xffff; 2341 2342 *cksum = (u_int16_t)(x); 2343 } 2344 2345 #ifdef INET6 2346 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 2347 static __inline void 2348 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 2349 { 2350 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 2351 } 2352 2353 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 2354 static __inline void 2355 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 2356 { 2357 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 2358 } 2359 #endif /* INET6 */ 2360 2361 /* pre: *a is 16-bit aligned within its packet 2362 * 2363 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 2364 * machine by conserving ones-complement's otherwise discarded carries in the 2365 * upper bits of x. These accumulated carries when added to the lower 16-bits 2366 * over at least zero 'reduction' steps then complete the ones-complement sum. 2367 * 2368 * def. sum = x mod 2^16 2369 * def. accumulator = (x >> 16) 2370 * 2371 * At most two reduction steps 2372 * 2373 * x := sum + accumulator 2374 * = { def sum, def accumulator } 2375 * x := x mod 2^16 + (x >> 16) 2376 * = { x mod 2^n = x & (2^n - 1) } 2377 * x := (x & 0xffff) + (x >> 16) 2378 * 2379 * are necessary to incorporate the accumulated carries (at most one per add) 2380 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 2381 * 2382 * The function is also invariant over the endian of the host. Why? 2383 * 2384 * Define the unary transpose operator ~ on a bitstring in python slice 2385 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 2386 * 2387 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 2388 * 2389 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 2390 * 2391 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 2392 * 'half-adds'. Under ones-complement addition, each half-add carries to the 2393 * other, so the sum of each half-add is unaffected by their relative 2394 * order. Therefore: 2395 * 2396 * ~m +_1 ~n 2397 * = { half-adds invariant under transposition } 2398 * ~s 2399 * = { substitute } 2400 * ~(m +_1 n) [end of proof] 2401 * 2402 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 2403 * with the converse endian does not alter the result. 2404 * 2405 * proof. 2406 * { converse machine endian: load/store transposes, P := 8 } 2407 * ~(~m +_1 ~n) 2408 * = { ~ over +_1 } 2409 * ~~m +_1 ~~n 2410 * = { ~ is an involution } 2411 * m +_1 n [end of proof] 2412 * 2413 */ 2414 #define NEG(x) ((u_int16_t)~(x)) 2415 void 2416 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 2417 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 2418 { 2419 u_int32_t x; 2420 const u_int16_t *n = an->addr16; 2421 const u_int16_t *o = a->addr16; 2422 const int udp = proto == IPPROTO_UDP; 2423 2424 switch (af) { 2425 case AF_INET: 2426 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 2427 break; 2428 #ifdef INET6 2429 case AF_INET6: 2430 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 2431 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 2432 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 2433 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 2434 break; 2435 #endif /* INET6 */ 2436 default: 2437 unhandled_af(af); 2438 } 2439 2440 x = (x & 0xffff) + (x >> 16); 2441 x = (x & 0xffff) + (x >> 16); 2442 2443 /* optimise: eliminate a branch when not udp */ 2444 if (udp && *cksum == 0x0000) 2445 return; 2446 if (udp && x == 0x0000) 2447 x = 0xffff; 2448 2449 *cksum = (u_int16_t)(x); 2450 } 2451 2452 int 2453 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 2454 { 2455 int rewrite = 0; 2456 2457 if (*f != v) { 2458 u_int16_t old = htons(hi ? (*f << 8) : *f); 2459 u_int16_t new = htons(hi ? ( v << 8) : v); 2460 2461 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 2462 *f = v; 2463 rewrite = 1; 2464 } 2465 2466 return (rewrite); 2467 } 2468 2469 /* pre: *f is 16-bit aligned within its packet */ 2470 int 2471 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 2472 { 2473 int rewrite = 0; 2474 2475 if (*f != v) { 2476 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 2477 *f = v; 2478 rewrite = 1; 2479 } 2480 2481 return (rewrite); 2482 } 2483 2484 int 2485 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 2486 { 2487 int rewrite = 0; 2488 u_int8_t *fb = (u_int8_t*)f; 2489 u_int8_t *vb = (u_int8_t*)&v; 2490 2491 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 2492 return (pf_patch_16(pd, f, v)); /* optimise */ 2493 } 2494 2495 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2496 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2497 2498 return (rewrite); 2499 } 2500 2501 /* pre: *f is 16-bit aligned within its packet */ 2502 /* pre: pd->proto != IPPROTO_UDP */ 2503 int 2504 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2505 { 2506 int rewrite = 0; 2507 u_int16_t *pc = pd->pcksum; 2508 u_int8_t proto = pd->proto; 2509 2510 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2511 if (proto == IPPROTO_UDP) 2512 panic("%s: udp", __func__); 2513 2514 /* optimise: skip *f != v guard; true for all use-cases */ 2515 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2516 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2517 2518 *f = v; 2519 rewrite = 1; 2520 2521 return (rewrite); 2522 } 2523 2524 int 2525 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2526 { 2527 int rewrite = 0; 2528 u_int8_t *fb = (u_int8_t*)f; 2529 u_int8_t *vb = (u_int8_t*)&v; 2530 2531 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2532 return (pf_patch_32(pd, f, v)); /* optimise */ 2533 } 2534 2535 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2536 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2537 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2538 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2539 2540 return (rewrite); 2541 } 2542 2543 int 2544 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2545 u_int16_t *virtual_id, u_int16_t *virtual_type) 2546 { 2547 /* 2548 * ICMP types marked with PF_OUT are typically responses to 2549 * PF_IN, and will match states in the opposite direction. 2550 * PF_IN ICMP types need to match a state with that type. 2551 */ 2552 *icmp_dir = PF_OUT; 2553 2554 /* Queries (and responses) */ 2555 switch (pd->af) { 2556 case AF_INET: 2557 switch (type) { 2558 case ICMP_ECHO: 2559 *icmp_dir = PF_IN; 2560 /* FALLTHROUGH */ 2561 case ICMP_ECHOREPLY: 2562 *virtual_type = ICMP_ECHO; 2563 *virtual_id = pd->hdr.icmp.icmp_id; 2564 break; 2565 2566 case ICMP_TSTAMP: 2567 *icmp_dir = PF_IN; 2568 /* FALLTHROUGH */ 2569 case ICMP_TSTAMPREPLY: 2570 *virtual_type = ICMP_TSTAMP; 2571 *virtual_id = pd->hdr.icmp.icmp_id; 2572 break; 2573 2574 case ICMP_IREQ: 2575 *icmp_dir = PF_IN; 2576 /* FALLTHROUGH */ 2577 case ICMP_IREQREPLY: 2578 *virtual_type = ICMP_IREQ; 2579 *virtual_id = pd->hdr.icmp.icmp_id; 2580 break; 2581 2582 case ICMP_MASKREQ: 2583 *icmp_dir = PF_IN; 2584 /* FALLTHROUGH */ 2585 case ICMP_MASKREPLY: 2586 *virtual_type = ICMP_MASKREQ; 2587 *virtual_id = pd->hdr.icmp.icmp_id; 2588 break; 2589 2590 case ICMP_IPV6_WHEREAREYOU: 2591 *icmp_dir = PF_IN; 2592 /* FALLTHROUGH */ 2593 case ICMP_IPV6_IAMHERE: 2594 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2595 *virtual_id = 0; /* Nothing sane to match on! */ 2596 break; 2597 2598 case ICMP_MOBILE_REGREQUEST: 2599 *icmp_dir = PF_IN; 2600 /* FALLTHROUGH */ 2601 case ICMP_MOBILE_REGREPLY: 2602 *virtual_type = ICMP_MOBILE_REGREQUEST; 2603 *virtual_id = 0; /* Nothing sane to match on! */ 2604 break; 2605 2606 case ICMP_ROUTERSOLICIT: 2607 *icmp_dir = PF_IN; 2608 /* FALLTHROUGH */ 2609 case ICMP_ROUTERADVERT: 2610 *virtual_type = ICMP_ROUTERSOLICIT; 2611 *virtual_id = 0; /* Nothing sane to match on! */ 2612 break; 2613 2614 /* These ICMP types map to other connections */ 2615 case ICMP_UNREACH: 2616 case ICMP_SOURCEQUENCH: 2617 case ICMP_REDIRECT: 2618 case ICMP_TIMXCEED: 2619 case ICMP_PARAMPROB: 2620 /* These will not be used, but set them anyway */ 2621 *icmp_dir = PF_IN; 2622 *virtual_type = htons(type); 2623 *virtual_id = 0; 2624 return (1); /* These types match to another state */ 2625 2626 /* 2627 * All remaining ICMP types get their own states, 2628 * and will only match in one direction. 2629 */ 2630 default: 2631 *icmp_dir = PF_IN; 2632 *virtual_type = type; 2633 *virtual_id = 0; 2634 break; 2635 } 2636 break; 2637 #ifdef INET6 2638 case AF_INET6: 2639 switch (type) { 2640 case ICMP6_ECHO_REQUEST: 2641 *icmp_dir = PF_IN; 2642 /* FALLTHROUGH */ 2643 case ICMP6_ECHO_REPLY: 2644 *virtual_type = ICMP6_ECHO_REQUEST; 2645 *virtual_id = pd->hdr.icmp6.icmp6_id; 2646 break; 2647 2648 case MLD_LISTENER_QUERY: 2649 case MLD_LISTENER_REPORT: { 2650 struct mld_hdr *mld = &pd->hdr.mld; 2651 u_int32_t h; 2652 2653 /* 2654 * Listener Report can be sent by clients 2655 * without an associated Listener Query. 2656 * In addition to that, when Report is sent as a 2657 * reply to a Query its source and destination 2658 * address are different. 2659 */ 2660 *icmp_dir = PF_IN; 2661 *virtual_type = MLD_LISTENER_QUERY; 2662 /* generate fake id for these messages */ 2663 h = mld->mld_addr.s6_addr32[0] ^ 2664 mld->mld_addr.s6_addr32[1] ^ 2665 mld->mld_addr.s6_addr32[2] ^ 2666 mld->mld_addr.s6_addr32[3]; 2667 *virtual_id = (h >> 16) ^ (h & 0xffff); 2668 break; 2669 } 2670 2671 /* 2672 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2673 * ICMP6_WRU 2674 */ 2675 case ICMP6_WRUREQUEST: 2676 *icmp_dir = PF_IN; 2677 /* FALLTHROUGH */ 2678 case ICMP6_WRUREPLY: 2679 *virtual_type = ICMP6_WRUREQUEST; 2680 *virtual_id = 0; /* Nothing sane to match on! */ 2681 break; 2682 2683 case MLD_MTRACE: 2684 *icmp_dir = PF_IN; 2685 /* FALLTHROUGH */ 2686 case MLD_MTRACE_RESP: 2687 *virtual_type = MLD_MTRACE; 2688 *virtual_id = 0; /* Nothing sane to match on! */ 2689 break; 2690 2691 case ND_NEIGHBOR_SOLICIT: 2692 *icmp_dir = PF_IN; 2693 /* FALLTHROUGH */ 2694 case ND_NEIGHBOR_ADVERT: { 2695 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 2696 u_int32_t h; 2697 2698 *virtual_type = ND_NEIGHBOR_SOLICIT; 2699 /* generate fake id for these messages */ 2700 h = nd->nd_ns_target.s6_addr32[0] ^ 2701 nd->nd_ns_target.s6_addr32[1] ^ 2702 nd->nd_ns_target.s6_addr32[2] ^ 2703 nd->nd_ns_target.s6_addr32[3]; 2704 *virtual_id = (h >> 16) ^ (h & 0xffff); 2705 /* 2706 * the extra work here deals with 'keep state' option 2707 * at pass rule for unsolicited advertisement. By 2708 * returning 1 (state_icmp = 1) we override 'keep 2709 * state' to 'no state' so we don't create state for 2710 * unsolicited advertisements. No one expects answer to 2711 * unsolicited advertisements so we should be good. 2712 */ 2713 if (type == ND_NEIGHBOR_ADVERT) { 2714 *virtual_type = htons(*virtual_type); 2715 return (1); 2716 } 2717 break; 2718 } 2719 2720 /* 2721 * These ICMP types map to other connections. 2722 * ND_REDIRECT can't be in this list because the triggering 2723 * packet header is optional. 2724 */ 2725 case ICMP6_DST_UNREACH: 2726 case ICMP6_PACKET_TOO_BIG: 2727 case ICMP6_TIME_EXCEEDED: 2728 case ICMP6_PARAM_PROB: 2729 /* These will not be used, but set them anyway */ 2730 *icmp_dir = PF_IN; 2731 *virtual_type = htons(type); 2732 *virtual_id = 0; 2733 return (1); /* These types match to another state */ 2734 /* 2735 * All remaining ICMP6 types get their own states, 2736 * and will only match in one direction. 2737 */ 2738 default: 2739 *icmp_dir = PF_IN; 2740 *virtual_type = type; 2741 *virtual_id = 0; 2742 break; 2743 } 2744 break; 2745 #endif /* INET6 */ 2746 } 2747 *virtual_type = htons(*virtual_type); 2748 return (0); /* These types match to their own state */ 2749 } 2750 2751 void 2752 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2753 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2754 { 2755 /* note: doesn't trouble to fixup quoted checksums, if any */ 2756 2757 /* change quoted protocol port */ 2758 if (qp != NULL) 2759 pf_patch_16(pd, qp, np); 2760 2761 /* change quoted ip address */ 2762 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2763 pf_addrcpy(qa, na, pd->af); 2764 2765 /* change network-header's ip address */ 2766 if (oa) 2767 pf_translate_a(pd, oa, na); 2768 } 2769 2770 /* pre: *a is 16-bit aligned within its packet */ 2771 /* *a is a network header src/dst address */ 2772 int 2773 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2774 { 2775 int rewrite = 0; 2776 2777 /* warning: !PF_ANEQ != PF_AEQ */ 2778 if (!PF_ANEQ(a, an, pd->af)) 2779 return (0); 2780 2781 /* fixup transport pseudo-header, if any */ 2782 switch (pd->proto) { 2783 case IPPROTO_TCP: /* FALLTHROUGH */ 2784 case IPPROTO_UDP: /* FALLTHROUGH */ 2785 case IPPROTO_ICMPV6: 2786 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2787 break; 2788 default: 2789 break; /* assume no pseudo-header */ 2790 } 2791 2792 pf_addrcpy(a, an, pd->af); 2793 rewrite = 1; 2794 2795 return (rewrite); 2796 } 2797 2798 #ifdef INET6 2799 /* pf_translate_af() may change pd->m, adjust local copies after calling */ 2800 int 2801 pf_translate_af(struct pf_pdesc *pd) 2802 { 2803 static const struct pf_addr zero; 2804 struct ip *ip4; 2805 struct ip6_hdr *ip6; 2806 int copyback = 0; 2807 u_int hlen, ohlen, dlen; 2808 u_int16_t *pc; 2809 u_int8_t af_proto, naf_proto; 2810 2811 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2812 ohlen = pd->off; 2813 dlen = pd->tot_len - pd->off; 2814 pc = pd->pcksum; 2815 2816 af_proto = naf_proto = pd->proto; 2817 if (naf_proto == IPPROTO_ICMP) 2818 af_proto = IPPROTO_ICMPV6; 2819 if (naf_proto == IPPROTO_ICMPV6) 2820 af_proto = IPPROTO_ICMP; 2821 2822 /* uncover stale pseudo-header */ 2823 switch (af_proto) { 2824 case IPPROTO_ICMPV6: 2825 /* optimise: unchanged for TCP/UDP */ 2826 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2827 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2828 /* FALLTHROUGH */ 2829 case IPPROTO_UDP: /* FALLTHROUGH */ 2830 case IPPROTO_TCP: 2831 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2832 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2833 copyback = 1; 2834 break; 2835 default: 2836 break; /* assume no pseudo-header */ 2837 } 2838 2839 /* replace the network header */ 2840 m_adj(pd->m, pd->off); 2841 pd->src = NULL; 2842 pd->dst = NULL; 2843 2844 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2845 pd->m = NULL; 2846 return (-1); 2847 } 2848 2849 pd->off = hlen; 2850 pd->tot_len += hlen - ohlen; 2851 2852 switch (pd->naf) { 2853 case AF_INET: 2854 ip4 = mtod(pd->m, struct ip *); 2855 memset(ip4, 0, hlen); 2856 ip4->ip_v = IPVERSION; 2857 ip4->ip_hl = hlen >> 2; 2858 ip4->ip_tos = pd->tos; 2859 ip4->ip_len = htons(hlen + dlen); 2860 ip4->ip_id = htons(ip_randomid()); 2861 ip4->ip_off = htons(IP_DF); 2862 ip4->ip_ttl = pd->ttl; 2863 ip4->ip_p = pd->proto; 2864 ip4->ip_src = pd->nsaddr.v4; 2865 ip4->ip_dst = pd->ndaddr.v4; 2866 break; 2867 case AF_INET6: 2868 ip6 = mtod(pd->m, struct ip6_hdr *); 2869 memset(ip6, 0, hlen); 2870 ip6->ip6_vfc = IPV6_VERSION; 2871 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2872 ip6->ip6_plen = htons(dlen); 2873 ip6->ip6_nxt = pd->proto; 2874 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2875 ip6->ip6_hlim = IPV6_DEFHLIM; 2876 else 2877 ip6->ip6_hlim = pd->ttl; 2878 ip6->ip6_src = pd->nsaddr.v6; 2879 ip6->ip6_dst = pd->ndaddr.v6; 2880 break; 2881 default: 2882 unhandled_af(pd->naf); 2883 } 2884 2885 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2886 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2887 pd->naf == AF_INET6) { 2888 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2889 } 2890 2891 /* cover fresh pseudo-header */ 2892 switch (naf_proto) { 2893 case IPPROTO_ICMPV6: 2894 /* optimise: unchanged for TCP/UDP */ 2895 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2896 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2897 /* FALLTHROUGH */ 2898 case IPPROTO_UDP: /* FALLTHROUGH */ 2899 case IPPROTO_TCP: 2900 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2901 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2902 copyback = 1; 2903 break; 2904 default: 2905 break; /* assume no pseudo-header */ 2906 } 2907 2908 /* flush pd->pcksum */ 2909 if (copyback) 2910 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 2911 2912 return (0); 2913 } 2914 2915 int 2916 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2917 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2918 sa_family_t af, sa_family_t naf) 2919 { 2920 struct mbuf *n = NULL; 2921 struct ip *ip4; 2922 struct ip6_hdr *ip6; 2923 u_int hlen, ohlen, dlen; 2924 int d; 2925 2926 if (af == naf || (af != AF_INET && af != AF_INET6) || 2927 (naf != AF_INET && naf != AF_INET6)) 2928 return (-1); 2929 2930 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2931 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2932 return (-1); 2933 2934 /* new quoted header */ 2935 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2936 /* old quoted header */ 2937 ohlen = pd2->off - ipoff2; 2938 2939 /* trim old quoted header */ 2940 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2941 m_adj(n, ohlen); 2942 2943 /* prepend a new, translated, quoted header */ 2944 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2945 return (-1); 2946 2947 switch (naf) { 2948 case AF_INET: 2949 ip4 = mtod(n, struct ip *); 2950 memset(ip4, 0, sizeof(*ip4)); 2951 ip4->ip_v = IPVERSION; 2952 ip4->ip_hl = sizeof(*ip4) >> 2; 2953 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2954 ip4->ip_id = htons(ip_randomid()); 2955 ip4->ip_off = htons(IP_DF); 2956 ip4->ip_ttl = pd2->ttl; 2957 if (pd2->proto == IPPROTO_ICMPV6) 2958 ip4->ip_p = IPPROTO_ICMP; 2959 else 2960 ip4->ip_p = pd2->proto; 2961 ip4->ip_src = src->v4; 2962 ip4->ip_dst = dst->v4; 2963 in_hdr_cksum_out(n, NULL); 2964 break; 2965 case AF_INET6: 2966 ip6 = mtod(n, struct ip6_hdr *); 2967 memset(ip6, 0, sizeof(*ip6)); 2968 ip6->ip6_vfc = IPV6_VERSION; 2969 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2970 if (pd2->proto == IPPROTO_ICMP) 2971 ip6->ip6_nxt = IPPROTO_ICMPV6; 2972 else 2973 ip6->ip6_nxt = pd2->proto; 2974 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2975 ip6->ip6_hlim = IPV6_DEFHLIM; 2976 else 2977 ip6->ip6_hlim = pd2->ttl; 2978 ip6->ip6_src = src->v6; 2979 ip6->ip6_dst = dst->v6; 2980 break; 2981 } 2982 2983 /* cover new quoted header */ 2984 /* optimise: any new AF_INET header of ours sums to zero */ 2985 if (naf != AF_INET) { 2986 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2987 } 2988 2989 /* reattach modified quoted packet to outer header */ 2990 { 2991 int nlen = n->m_pkthdr.len; 2992 m_cat(m, n); 2993 m->m_pkthdr.len += nlen; 2994 } 2995 2996 /* account for altered length */ 2997 d = hlen - ohlen; 2998 2999 if (pd->proto == IPPROTO_ICMPV6) { 3000 /* fixup pseudo-header */ 3001 dlen = pd->tot_len - pd->off; 3002 pf_cksum_fixup(pd->pcksum, 3003 htons(dlen), htons(dlen + d), pd->proto); 3004 } 3005 3006 pd->tot_len += d; 3007 pd2->tot_len += d; 3008 pd2->off += d; 3009 3010 /* note: not bothering to update network headers as 3011 these due for rewrite by pf_translate_af() */ 3012 3013 return (0); 3014 } 3015 3016 3017 #define PTR_IP(field) (offsetof(struct ip, field)) 3018 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 3019 3020 int 3021 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 3022 { 3023 struct icmp *icmp4; 3024 struct icmp6_hdr *icmp6; 3025 u_int32_t mtu; 3026 int32_t ptr = -1; 3027 u_int8_t type; 3028 u_int8_t code; 3029 3030 switch (af) { 3031 case AF_INET: 3032 icmp6 = arg; 3033 type = icmp6->icmp6_type; 3034 code = icmp6->icmp6_code; 3035 mtu = ntohl(icmp6->icmp6_mtu); 3036 3037 switch (type) { 3038 case ICMP6_ECHO_REQUEST: 3039 type = ICMP_ECHO; 3040 break; 3041 case ICMP6_ECHO_REPLY: 3042 type = ICMP_ECHOREPLY; 3043 break; 3044 case ICMP6_DST_UNREACH: 3045 type = ICMP_UNREACH; 3046 switch (code) { 3047 case ICMP6_DST_UNREACH_NOROUTE: 3048 case ICMP6_DST_UNREACH_BEYONDSCOPE: 3049 case ICMP6_DST_UNREACH_ADDR: 3050 code = ICMP_UNREACH_HOST; 3051 break; 3052 case ICMP6_DST_UNREACH_ADMIN: 3053 code = ICMP_UNREACH_HOST_PROHIB; 3054 break; 3055 case ICMP6_DST_UNREACH_NOPORT: 3056 code = ICMP_UNREACH_PORT; 3057 break; 3058 default: 3059 return (-1); 3060 } 3061 break; 3062 case ICMP6_PACKET_TOO_BIG: 3063 type = ICMP_UNREACH; 3064 code = ICMP_UNREACH_NEEDFRAG; 3065 mtu -= 20; 3066 break; 3067 case ICMP6_TIME_EXCEEDED: 3068 type = ICMP_TIMXCEED; 3069 break; 3070 case ICMP6_PARAM_PROB: 3071 switch (code) { 3072 case ICMP6_PARAMPROB_HEADER: 3073 type = ICMP_PARAMPROB; 3074 code = ICMP_PARAMPROB_ERRATPTR; 3075 ptr = ntohl(icmp6->icmp6_pptr); 3076 3077 if (ptr == PTR_IP6(ip6_vfc)) 3078 ; /* preserve */ 3079 else if (ptr == PTR_IP6(ip6_vfc) + 1) 3080 ptr = PTR_IP(ip_tos); 3081 else if (ptr == PTR_IP6(ip6_plen) || 3082 ptr == PTR_IP6(ip6_plen) + 1) 3083 ptr = PTR_IP(ip_len); 3084 else if (ptr == PTR_IP6(ip6_nxt)) 3085 ptr = PTR_IP(ip_p); 3086 else if (ptr == PTR_IP6(ip6_hlim)) 3087 ptr = PTR_IP(ip_ttl); 3088 else if (ptr >= PTR_IP6(ip6_src) && 3089 ptr < PTR_IP6(ip6_dst)) 3090 ptr = PTR_IP(ip_src); 3091 else if (ptr >= PTR_IP6(ip6_dst) && 3092 ptr < sizeof(struct ip6_hdr)) 3093 ptr = PTR_IP(ip_dst); 3094 else { 3095 return (-1); 3096 } 3097 break; 3098 case ICMP6_PARAMPROB_NEXTHEADER: 3099 type = ICMP_UNREACH; 3100 code = ICMP_UNREACH_PROTOCOL; 3101 break; 3102 default: 3103 return (-1); 3104 } 3105 break; 3106 default: 3107 return (-1); 3108 } 3109 3110 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 3111 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 3112 3113 /* aligns well with a icmpv4 nextmtu */ 3114 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 3115 3116 /* icmpv4 pptr is a one most significant byte */ 3117 if (ptr >= 0) 3118 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 3119 break; 3120 case AF_INET6: 3121 icmp4 = arg; 3122 type = icmp4->icmp_type; 3123 code = icmp4->icmp_code; 3124 mtu = ntohs(icmp4->icmp_nextmtu); 3125 3126 switch (type) { 3127 case ICMP_ECHO: 3128 type = ICMP6_ECHO_REQUEST; 3129 break; 3130 case ICMP_ECHOREPLY: 3131 type = ICMP6_ECHO_REPLY; 3132 break; 3133 case ICMP_UNREACH: 3134 type = ICMP6_DST_UNREACH; 3135 switch (code) { 3136 case ICMP_UNREACH_NET: 3137 case ICMP_UNREACH_HOST: 3138 case ICMP_UNREACH_NET_UNKNOWN: 3139 case ICMP_UNREACH_HOST_UNKNOWN: 3140 case ICMP_UNREACH_ISOLATED: 3141 case ICMP_UNREACH_TOSNET: 3142 case ICMP_UNREACH_TOSHOST: 3143 code = ICMP6_DST_UNREACH_NOROUTE; 3144 break; 3145 case ICMP_UNREACH_PORT: 3146 code = ICMP6_DST_UNREACH_NOPORT; 3147 break; 3148 case ICMP_UNREACH_NET_PROHIB: 3149 case ICMP_UNREACH_HOST_PROHIB: 3150 case ICMP_UNREACH_FILTER_PROHIB: 3151 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 3152 code = ICMP6_DST_UNREACH_ADMIN; 3153 break; 3154 case ICMP_UNREACH_PROTOCOL: 3155 type = ICMP6_PARAM_PROB; 3156 code = ICMP6_PARAMPROB_NEXTHEADER; 3157 ptr = offsetof(struct ip6_hdr, ip6_nxt); 3158 break; 3159 case ICMP_UNREACH_NEEDFRAG: 3160 type = ICMP6_PACKET_TOO_BIG; 3161 code = 0; 3162 mtu += 20; 3163 break; 3164 default: 3165 return (-1); 3166 } 3167 break; 3168 case ICMP_TIMXCEED: 3169 type = ICMP6_TIME_EXCEEDED; 3170 break; 3171 case ICMP_PARAMPROB: 3172 type = ICMP6_PARAM_PROB; 3173 switch (code) { 3174 case ICMP_PARAMPROB_ERRATPTR: 3175 code = ICMP6_PARAMPROB_HEADER; 3176 break; 3177 case ICMP_PARAMPROB_LENGTH: 3178 code = ICMP6_PARAMPROB_HEADER; 3179 break; 3180 default: 3181 return (-1); 3182 } 3183 3184 ptr = icmp4->icmp_pptr; 3185 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 3186 ; /* preserve */ 3187 else if (ptr == PTR_IP(ip_len) || 3188 ptr == PTR_IP(ip_len) + 1) 3189 ptr = PTR_IP6(ip6_plen); 3190 else if (ptr == PTR_IP(ip_ttl)) 3191 ptr = PTR_IP6(ip6_hlim); 3192 else if (ptr == PTR_IP(ip_p)) 3193 ptr = PTR_IP6(ip6_nxt); 3194 else if (ptr >= PTR_IP(ip_src) && 3195 ptr < PTR_IP(ip_dst)) 3196 ptr = PTR_IP6(ip6_src); 3197 else if (ptr >= PTR_IP(ip_dst) && 3198 ptr < sizeof(struct ip)) 3199 ptr = PTR_IP6(ip6_dst); 3200 else { 3201 return (-1); 3202 } 3203 break; 3204 default: 3205 return (-1); 3206 } 3207 3208 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 3209 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 3210 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 3211 if (ptr >= 0) 3212 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 3213 break; 3214 } 3215 3216 return (0); 3217 } 3218 #endif /* INET6 */ 3219 3220 /* 3221 * Need to modulate the sequence numbers in the TCP SACK option 3222 * (credits to Krzysztof Pfaff for report and patch) 3223 */ 3224 int 3225 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 3226 { 3227 struct sackblk sack; 3228 int copyback = 0, i; 3229 int olen, optsoff; 3230 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 3231 3232 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3233 optsoff = pd->off + sizeof(struct tcphdr); 3234 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 3235 if (olen < TCPOLEN_MINSACK || 3236 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af)) 3237 return (0); 3238 3239 eoh = opts + olen; 3240 opt = opts; 3241 while ((opt = pf_find_tcpopt(opt, opts, olen, 3242 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 3243 { 3244 size_t safelen = MIN(opt[1], (eoh - opt)); 3245 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 3246 size_t startoff = (opt + i) - opts; 3247 memcpy(&sack, &opt[i], sizeof(sack)); 3248 pf_patch_32_unaligned(pd, &sack.start, 3249 htonl(ntohl(sack.start) - dst->seqdiff), 3250 PF_ALGNMNT(startoff)); 3251 pf_patch_32_unaligned(pd, &sack.end, 3252 htonl(ntohl(sack.end) - dst->seqdiff), 3253 PF_ALGNMNT(startoff + sizeof(sack.start))); 3254 memcpy(&opt[i], &sack, sizeof(sack)); 3255 } 3256 copyback = 1; 3257 opt += opt[1]; 3258 } 3259 3260 if (copyback) 3261 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 3262 return (copyback); 3263 } 3264 3265 struct mbuf * 3266 pf_build_tcp(const struct pf_rule *r, sa_family_t af, 3267 const struct pf_addr *saddr, const struct pf_addr *daddr, 3268 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 3269 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 3270 u_int16_t rtag, u_int sack, u_int rdom) 3271 { 3272 struct mbuf *m; 3273 int len, tlen; 3274 struct ip *h; 3275 #ifdef INET6 3276 struct ip6_hdr *h6; 3277 #endif /* INET6 */ 3278 struct tcphdr *th; 3279 char *opt; 3280 3281 /* maximum segment size tcp option */ 3282 tlen = sizeof(struct tcphdr); 3283 if (mss) 3284 tlen += 4; 3285 if (sack) 3286 tlen += 2; 3287 3288 switch (af) { 3289 case AF_INET: 3290 len = sizeof(struct ip) + tlen; 3291 break; 3292 #ifdef INET6 3293 case AF_INET6: 3294 len = sizeof(struct ip6_hdr) + tlen; 3295 break; 3296 #endif /* INET6 */ 3297 default: 3298 unhandled_af(af); 3299 } 3300 3301 /* create outgoing mbuf */ 3302 m = m_gethdr(M_DONTWAIT, MT_HEADER); 3303 if (m == NULL) 3304 return (NULL); 3305 if (tag) 3306 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 3307 m->m_pkthdr.pf.tag = rtag; 3308 m->m_pkthdr.ph_rtableid = rdom; 3309 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 3310 m->m_pkthdr.pf.prio = r->set_prio[0]; 3311 if (r && r->qid) 3312 m->m_pkthdr.pf.qid = r->qid; 3313 m->m_data += max_linkhdr; 3314 m->m_pkthdr.len = m->m_len = len; 3315 m->m_pkthdr.ph_ifidx = 0; 3316 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 3317 memset(m->m_data, 0, len); 3318 switch (af) { 3319 case AF_INET: 3320 h = mtod(m, struct ip *); 3321 h->ip_p = IPPROTO_TCP; 3322 h->ip_len = htons(tlen); 3323 h->ip_v = 4; 3324 h->ip_hl = sizeof(*h) >> 2; 3325 h->ip_tos = IPTOS_LOWDELAY; 3326 h->ip_len = htons(len); 3327 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 3328 h->ip_ttl = ttl ? ttl : ip_defttl; 3329 h->ip_sum = 0; 3330 h->ip_src.s_addr = saddr->v4.s_addr; 3331 h->ip_dst.s_addr = daddr->v4.s_addr; 3332 3333 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 3334 break; 3335 #ifdef INET6 3336 case AF_INET6: 3337 h6 = mtod(m, struct ip6_hdr *); 3338 h6->ip6_nxt = IPPROTO_TCP; 3339 h6->ip6_plen = htons(tlen); 3340 h6->ip6_vfc |= IPV6_VERSION; 3341 h6->ip6_hlim = IPV6_DEFHLIM; 3342 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 3343 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 3344 3345 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 3346 break; 3347 #endif /* INET6 */ 3348 default: 3349 unhandled_af(af); 3350 } 3351 3352 /* TCP header */ 3353 th->th_sport = sport; 3354 th->th_dport = dport; 3355 th->th_seq = htonl(seq); 3356 th->th_ack = htonl(ack); 3357 th->th_off = tlen >> 2; 3358 th->th_flags = flags; 3359 th->th_win = htons(win); 3360 3361 opt = (char *)(th + 1); 3362 if (mss) { 3363 opt[0] = TCPOPT_MAXSEG; 3364 opt[1] = 4; 3365 mss = htons(mss); 3366 memcpy((opt + 2), &mss, 2); 3367 opt += 4; 3368 } 3369 if (sack) { 3370 opt[0] = TCPOPT_SACK_PERMITTED; 3371 opt[1] = 2; 3372 opt += 2; 3373 } 3374 3375 return (m); 3376 } 3377 3378 void 3379 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 3380 const struct pf_addr *saddr, const struct pf_addr *daddr, 3381 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 3382 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 3383 u_int16_t rtag, u_int rdom) 3384 { 3385 struct mbuf *m; 3386 3387 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 3388 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL) 3389 return; 3390 3391 switch (af) { 3392 case AF_INET: 3393 ip_send(m); 3394 break; 3395 #ifdef INET6 3396 case AF_INET6: 3397 ip6_send(m); 3398 break; 3399 #endif /* INET6 */ 3400 } 3401 } 3402 3403 static void 3404 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *st, 3405 struct pf_state_peer *src, struct pf_state_peer *dst) 3406 { 3407 /* 3408 * We are sending challenge ACK as a response to SYN packet, which 3409 * matches existing state (modulo TCP window check). Therefore packet 3410 * must be sent on behalf of destination. 3411 * 3412 * We expect sender to remain either silent, or send RST packet 3413 * so both, firewall and remote peer, can purge dead state from 3414 * memory. 3415 */ 3416 pf_send_tcp(st->rule.ptr, pd->af, pd->dst, pd->src, 3417 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 3418 src->seqlo, TH_ACK, 0, 0, st->rule.ptr->return_ttl, 1, 0, 3419 pd->rdomain); 3420 } 3421 3422 void 3423 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 3424 sa_family_t af, struct pf_rule *r, u_int rdomain) 3425 { 3426 struct mbuf *m0; 3427 3428 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 3429 return; 3430 3431 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 3432 m0->m_pkthdr.ph_rtableid = rdomain; 3433 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 3434 m0->m_pkthdr.pf.prio = r->set_prio[0]; 3435 if (r && r->qid) 3436 m0->m_pkthdr.pf.qid = r->qid; 3437 3438 switch (af) { 3439 case AF_INET: 3440 icmp_error(m0, type, code, 0, param); 3441 break; 3442 #ifdef INET6 3443 case AF_INET6: 3444 icmp6_error(m0, type, code, param); 3445 break; 3446 #endif /* INET6 */ 3447 } 3448 } 3449 3450 /* 3451 * Return ((n = 0) == (a = b [with mask m])) 3452 * Note: n != 0 => returns (a != b [with mask m]) 3453 */ 3454 int 3455 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 3456 struct pf_addr *b, sa_family_t af) 3457 { 3458 switch (af) { 3459 case AF_INET: 3460 if ((a->addr32[0] & m->addr32[0]) == 3461 (b->addr32[0] & m->addr32[0])) 3462 return (n == 0); 3463 break; 3464 #ifdef INET6 3465 case AF_INET6: 3466 if (((a->addr32[0] & m->addr32[0]) == 3467 (b->addr32[0] & m->addr32[0])) && 3468 ((a->addr32[1] & m->addr32[1]) == 3469 (b->addr32[1] & m->addr32[1])) && 3470 ((a->addr32[2] & m->addr32[2]) == 3471 (b->addr32[2] & m->addr32[2])) && 3472 ((a->addr32[3] & m->addr32[3]) == 3473 (b->addr32[3] & m->addr32[3]))) 3474 return (n == 0); 3475 break; 3476 #endif /* INET6 */ 3477 } 3478 3479 return (n != 0); 3480 } 3481 3482 /* 3483 * Return 1 if b <= a <= e, otherwise return 0. 3484 */ 3485 int 3486 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 3487 struct pf_addr *a, sa_family_t af) 3488 { 3489 switch (af) { 3490 case AF_INET: 3491 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 3492 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 3493 return (0); 3494 break; 3495 #ifdef INET6 3496 case AF_INET6: { 3497 int i; 3498 3499 /* check a >= b */ 3500 for (i = 0; i < 4; ++i) 3501 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 3502 break; 3503 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 3504 return (0); 3505 /* check a <= e */ 3506 for (i = 0; i < 4; ++i) 3507 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 3508 break; 3509 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3510 return (0); 3511 break; 3512 } 3513 #endif /* INET6 */ 3514 } 3515 return (1); 3516 } 3517 3518 int 3519 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3520 { 3521 switch (op) { 3522 case PF_OP_IRG: 3523 return ((p > a1) && (p < a2)); 3524 case PF_OP_XRG: 3525 return ((p < a1) || (p > a2)); 3526 case PF_OP_RRG: 3527 return ((p >= a1) && (p <= a2)); 3528 case PF_OP_EQ: 3529 return (p == a1); 3530 case PF_OP_NE: 3531 return (p != a1); 3532 case PF_OP_LT: 3533 return (p < a1); 3534 case PF_OP_LE: 3535 return (p <= a1); 3536 case PF_OP_GT: 3537 return (p > a1); 3538 case PF_OP_GE: 3539 return (p >= a1); 3540 } 3541 return (0); /* never reached */ 3542 } 3543 3544 int 3545 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3546 { 3547 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3548 } 3549 3550 int 3551 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3552 { 3553 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3554 return (0); 3555 return (pf_match(op, a1, a2, u)); 3556 } 3557 3558 int 3559 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3560 { 3561 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3562 return (0); 3563 return (pf_match(op, a1, a2, g)); 3564 } 3565 3566 int 3567 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3568 { 3569 if (*tag == -1) 3570 *tag = m->m_pkthdr.pf.tag; 3571 3572 return ((!r->match_tag_not && r->match_tag == *tag) || 3573 (r->match_tag_not && r->match_tag != *tag)); 3574 } 3575 3576 int 3577 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3578 { 3579 struct ifnet *ifp; 3580 #if NCARP > 0 3581 struct ifnet *ifp0; 3582 #endif 3583 struct pfi_kif *kif; 3584 3585 ifp = if_get(m->m_pkthdr.ph_ifidx); 3586 if (ifp == NULL) 3587 return (0); 3588 3589 #if NCARP > 0 3590 if (ifp->if_type == IFT_CARP && 3591 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 3592 kif = (struct pfi_kif *)ifp0->if_pf_kif; 3593 if_put(ifp0); 3594 } else 3595 #endif /* NCARP */ 3596 kif = (struct pfi_kif *)ifp->if_pf_kif; 3597 3598 if_put(ifp); 3599 3600 if (kif == NULL) { 3601 DPFPRINTF(LOG_ERR, 3602 "%s: kif == NULL, @%d via %s", __func__, 3603 r->nr, r->rcv_ifname); 3604 return (0); 3605 } 3606 3607 return (pfi_kif_match(r->rcv_kif, kif)); 3608 } 3609 3610 void 3611 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3612 { 3613 if (tag > 0) 3614 m->m_pkthdr.pf.tag = tag; 3615 if (rtableid >= 0) 3616 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3617 } 3618 3619 void 3620 pf_anchor_stack_init(void) 3621 { 3622 struct pf_anchor_stackframe *stack; 3623 3624 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3625 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = &stack[0]; 3626 cpumem_leave(pf_anchor_stack, stack); 3627 } 3628 3629 int 3630 pf_anchor_stack_is_full(struct pf_anchor_stackframe *sf) 3631 { 3632 struct pf_anchor_stackframe *stack; 3633 int rv; 3634 3635 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3636 rv = (sf == &stack[PF_ANCHOR_STACK_MAX]); 3637 cpumem_leave(pf_anchor_stack, stack); 3638 3639 return (rv); 3640 } 3641 3642 int 3643 pf_anchor_stack_is_empty(struct pf_anchor_stackframe *sf) 3644 { 3645 struct pf_anchor_stackframe *stack; 3646 int rv; 3647 3648 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3649 rv = (sf == &stack[0]); 3650 cpumem_leave(pf_anchor_stack, stack); 3651 3652 return (rv); 3653 } 3654 3655 struct pf_anchor_stackframe * 3656 pf_anchor_stack_top(void) 3657 { 3658 struct pf_anchor_stackframe *stack; 3659 struct pf_anchor_stackframe *top_sf; 3660 3661 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3662 top_sf = stack[PF_ANCHOR_STACK_MAX].sf_stack_top; 3663 cpumem_leave(pf_anchor_stack, stack); 3664 3665 return (top_sf); 3666 } 3667 3668 int 3669 pf_anchor_stack_push(struct pf_ruleset *rs, struct pf_rule *anchor, 3670 struct pf_rule *r, struct pf_anchor *child, int jump_target) 3671 { 3672 struct pf_anchor_stackframe *stack; 3673 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top(); 3674 3675 top_sf++; 3676 if (pf_anchor_stack_is_full(top_sf)) 3677 return (-1); 3678 3679 top_sf->sf_rs = rs; 3680 top_sf->sf_anchor = anchor; 3681 top_sf->sf_r = r; 3682 top_sf->sf_child = child; 3683 top_sf->sf_jump_target = jump_target; 3684 3685 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3686 3687 if ((top_sf <= &stack[0]) || (top_sf >= &stack[PF_ANCHOR_STACK_MAX])) 3688 panic("%s: top frame outside of anchor stack range", __func__); 3689 3690 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf; 3691 cpumem_leave(pf_anchor_stack, stack); 3692 3693 return (0); 3694 } 3695 3696 int 3697 pf_anchor_stack_pop(struct pf_ruleset **rs, struct pf_rule **anchor, 3698 struct pf_rule **r, struct pf_anchor **child, int *jump_target) 3699 { 3700 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top(); 3701 struct pf_anchor_stackframe *stack; 3702 int on_top; 3703 3704 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3705 if (pf_anchor_stack_is_empty(top_sf)) { 3706 on_top = -1; 3707 } else { 3708 if ((top_sf <= &stack[0]) || 3709 (top_sf >= &stack[PF_ANCHOR_STACK_MAX])) 3710 panic("%s: top frame outside of anchor stack range", 3711 __func__); 3712 3713 *rs = top_sf->sf_rs; 3714 *anchor = top_sf->sf_anchor; 3715 *r = top_sf->sf_r; 3716 *child = top_sf->sf_child; 3717 *jump_target = top_sf->sf_jump_target; 3718 top_sf--; 3719 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf; 3720 on_top = 0; 3721 } 3722 cpumem_leave(pf_anchor_stack, stack); 3723 3724 return (on_top); 3725 } 3726 3727 void 3728 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3729 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3730 { 3731 switch (af) { 3732 case AF_INET: 3733 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3734 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3735 break; 3736 #ifdef INET6 3737 case AF_INET6: 3738 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3739 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3740 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3741 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3742 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3743 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3744 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3745 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3746 break; 3747 #endif /* INET6 */ 3748 default: 3749 unhandled_af(af); 3750 } 3751 } 3752 3753 void 3754 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3755 { 3756 switch (af) { 3757 case AF_INET: 3758 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3759 break; 3760 #ifdef INET6 3761 case AF_INET6: 3762 if (addr->addr32[3] == 0xffffffff) { 3763 addr->addr32[3] = 0; 3764 if (addr->addr32[2] == 0xffffffff) { 3765 addr->addr32[2] = 0; 3766 if (addr->addr32[1] == 0xffffffff) { 3767 addr->addr32[1] = 0; 3768 addr->addr32[0] = 3769 htonl(ntohl(addr->addr32[0]) + 1); 3770 } else 3771 addr->addr32[1] = 3772 htonl(ntohl(addr->addr32[1]) + 1); 3773 } else 3774 addr->addr32[2] = 3775 htonl(ntohl(addr->addr32[2]) + 1); 3776 } else 3777 addr->addr32[3] = 3778 htonl(ntohl(addr->addr32[3]) + 1); 3779 break; 3780 #endif /* INET6 */ 3781 default: 3782 unhandled_af(af); 3783 } 3784 } 3785 3786 int 3787 pf_socket_lookup(struct pf_pdesc *pd) 3788 { 3789 struct pf_addr *saddr, *daddr; 3790 u_int16_t sport, dport; 3791 struct inpcbtable *table; 3792 struct inpcb *inp; 3793 3794 pd->lookup.uid = -1; 3795 pd->lookup.gid = -1; 3796 pd->lookup.pid = NO_PID; 3797 switch (pd->virtual_proto) { 3798 case IPPROTO_TCP: 3799 sport = pd->hdr.tcp.th_sport; 3800 dport = pd->hdr.tcp.th_dport; 3801 PF_ASSERT_LOCKED(); 3802 NET_ASSERT_LOCKED(); 3803 table = &tcbtable; 3804 break; 3805 case IPPROTO_UDP: 3806 sport = pd->hdr.udp.uh_sport; 3807 dport = pd->hdr.udp.uh_dport; 3808 PF_ASSERT_LOCKED(); 3809 NET_ASSERT_LOCKED(); 3810 table = &udbtable; 3811 break; 3812 default: 3813 return (-1); 3814 } 3815 if (pd->dir == PF_IN) { 3816 saddr = pd->src; 3817 daddr = pd->dst; 3818 } else { 3819 u_int16_t p; 3820 3821 p = sport; 3822 sport = dport; 3823 dport = p; 3824 saddr = pd->dst; 3825 daddr = pd->src; 3826 } 3827 switch (pd->af) { 3828 case AF_INET: 3829 /* 3830 * Fails when rtable is changed while evaluating the ruleset 3831 * The socket looked up will not match the one hit in the end. 3832 */ 3833 inp = in_pcblookup(table, saddr->v4, sport, daddr->v4, dport, 3834 pd->rdomain); 3835 if (inp == NULL) { 3836 inp = in_pcblookup_listen(table, daddr->v4, dport, 3837 NULL, pd->rdomain); 3838 if (inp == NULL) 3839 return (-1); 3840 } 3841 break; 3842 #ifdef INET6 3843 case AF_INET6: 3844 if (pd->virtual_proto == IPPROTO_UDP) 3845 table = &udb6table; 3846 if (pd->virtual_proto == IPPROTO_TCP) 3847 table = &tcb6table; 3848 inp = in6_pcblookup(table, &saddr->v6, sport, &daddr->v6, 3849 dport, pd->rdomain); 3850 if (inp == NULL) { 3851 inp = in6_pcblookup_listen(table, &daddr->v6, dport, 3852 NULL, pd->rdomain); 3853 if (inp == NULL) 3854 return (-1); 3855 } 3856 break; 3857 #endif /* INET6 */ 3858 default: 3859 unhandled_af(pd->af); 3860 } 3861 pd->lookup.uid = inp->inp_socket->so_euid; 3862 pd->lookup.gid = inp->inp_socket->so_egid; 3863 pd->lookup.pid = inp->inp_socket->so_cpid; 3864 in_pcbunref(inp); 3865 return (1); 3866 } 3867 3868 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 3869 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 3870 * 3871 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 3872 */ 3873 u_int8_t* 3874 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 3875 u_int8_t min_typelen) 3876 { 3877 u_int8_t *eoh = opts + hlen; 3878 3879 if (min_typelen < 2) 3880 return (NULL); 3881 3882 while ((eoh - opt) >= min_typelen) { 3883 switch (*opt) { 3884 case TCPOPT_EOL: 3885 /* FALLTHROUGH - Workaround the failure of some 3886 systems to NOP-pad their bzero'd option buffers, 3887 producing spurious EOLs */ 3888 case TCPOPT_NOP: 3889 opt++; 3890 continue; 3891 default: 3892 if (opt[0] == type && 3893 opt[1] >= min_typelen) 3894 return (opt); 3895 } 3896 3897 opt += MAX(opt[1], 2); /* evade infinite loops */ 3898 } 3899 3900 return (NULL); 3901 } 3902 3903 u_int8_t 3904 pf_get_wscale(struct pf_pdesc *pd) 3905 { 3906 int olen; 3907 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3908 u_int8_t wscale = 0; 3909 3910 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3911 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 3912 pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) 3913 return (0); 3914 3915 opt = opts; 3916 while ((opt = pf_find_tcpopt(opt, opts, olen, 3917 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 3918 wscale = opt[2]; 3919 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 3920 wscale |= PF_WSCALE_FLAG; 3921 3922 opt += opt[1]; 3923 } 3924 3925 return (wscale); 3926 } 3927 3928 u_int16_t 3929 pf_get_mss(struct pf_pdesc *pd, uint16_t mssdflt) 3930 { 3931 int olen; 3932 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3933 u_int16_t mss; 3934 3935 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3936 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 3937 pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) 3938 return (0); 3939 3940 mss = mssdflt; 3941 opt = opts; 3942 while ((opt = pf_find_tcpopt(opt, opts, olen, 3943 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 3944 memcpy(&mss, (opt + 2), 2); 3945 mss = ntohs(mss); 3946 3947 opt += opt[1]; 3948 } 3949 return (mss); 3950 } 3951 3952 u_int16_t 3953 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, uint16_t offer, 3954 uint16_t mssdflt) 3955 { 3956 struct ifnet *ifp; 3957 struct sockaddr_in *dst; 3958 #ifdef INET6 3959 struct sockaddr_in6 *dst6; 3960 #endif /* INET6 */ 3961 struct rtentry *rt = NULL; 3962 struct sockaddr_storage ss; 3963 int hlen, mss; 3964 3965 memset(&ss, 0, sizeof(ss)); 3966 3967 switch (af) { 3968 case AF_INET: 3969 hlen = sizeof(struct ip); 3970 dst = (struct sockaddr_in *)&ss; 3971 dst->sin_family = AF_INET; 3972 dst->sin_len = sizeof(*dst); 3973 dst->sin_addr = addr->v4; 3974 rt = rtalloc(sintosa(dst), 0, rtableid); 3975 break; 3976 #ifdef INET6 3977 case AF_INET6: 3978 hlen = sizeof(struct ip6_hdr); 3979 dst6 = (struct sockaddr_in6 *)&ss; 3980 dst6->sin6_family = AF_INET6; 3981 dst6->sin6_len = sizeof(*dst6); 3982 dst6->sin6_addr = addr->v6; 3983 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3984 break; 3985 #endif /* INET6 */ 3986 } 3987 3988 mss = mssdflt; 3989 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3990 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3991 mss = imax(mss, mssdflt); 3992 if_put(ifp); 3993 } 3994 rtfree(rt); 3995 mss = imin(mss, offer); 3996 mss = imax(mss, 64); /* sanity - at least max opt space */ 3997 return (mss); 3998 } 3999 4000 static __inline int 4001 pf_set_rt_ifp(struct pf_state *st, struct pf_addr *saddr, sa_family_t af, 4002 struct pf_src_node **sns) 4003 { 4004 struct pf_rule *r = st->rule.ptr; 4005 int rv; 4006 4007 if (!r->rt) 4008 return (0); 4009 4010 rv = pf_map_addr(af, r, saddr, &st->rt_addr, NULL, sns, 4011 &r->route, PF_SN_ROUTE); 4012 if (rv == 0) 4013 st->rt = r->rt; 4014 4015 return (rv); 4016 } 4017 4018 u_int32_t 4019 pf_tcp_iss(struct pf_pdesc *pd) 4020 { 4021 SHA2_CTX ctx; 4022 union { 4023 uint8_t bytes[SHA512_DIGEST_LENGTH]; 4024 uint32_t words[1]; 4025 } digest; 4026 4027 if (pf_tcp_secret_init == 0) { 4028 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 4029 SHA512Init(&pf_tcp_secret_ctx); 4030 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 4031 sizeof(pf_tcp_secret)); 4032 pf_tcp_secret_init = 1; 4033 } 4034 ctx = pf_tcp_secret_ctx; 4035 4036 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 4037 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 4038 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 4039 switch (pd->af) { 4040 case AF_INET: 4041 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 4042 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 4043 break; 4044 #ifdef INET6 4045 case AF_INET6: 4046 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 4047 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 4048 break; 4049 #endif /* INET6 */ 4050 } 4051 SHA512Final(digest.bytes, &ctx); 4052 pf_tcp_iss_off += 4096; 4053 return (digest.words[0] + READ_ONCE(tcp_iss) + pf_tcp_iss_off); 4054 } 4055 4056 void 4057 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 4058 { 4059 if (r->qid) 4060 a->qid = r->qid; 4061 if (r->pqid) 4062 a->pqid = r->pqid; 4063 if (r->rtableid >= 0) 4064 a->rtableid = r->rtableid; 4065 #if NPFLOG > 0 4066 a->log |= r->log; 4067 #endif /* NPFLOG > 0 */ 4068 if (r->scrub_flags & PFSTATE_SETTOS) 4069 a->set_tos = r->set_tos; 4070 if (r->min_ttl) 4071 a->min_ttl = r->min_ttl; 4072 if (r->max_mss) 4073 a->max_mss = r->max_mss; 4074 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 4075 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 4076 if (r->scrub_flags & PFSTATE_SETPRIO) { 4077 a->set_prio[0] = r->set_prio[0]; 4078 a->set_prio[1] = r->set_prio[1]; 4079 } 4080 if (r->rule_flag & PFRULE_SETDELAY) 4081 a->delay = r->delay; 4082 } 4083 4084 #define PF_TEST_ATTRIB(t, a) \ 4085 if (t) { \ 4086 r = a; \ 4087 continue; \ 4088 } else do { \ 4089 } while (0) 4090 4091 enum pf_test_status 4092 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 4093 { 4094 struct pf_rule *r; 4095 struct pf_anchor *child = NULL; 4096 int target; 4097 4098 pf_anchor_stack_init(); 4099 enter_ruleset: 4100 r = TAILQ_FIRST(ruleset->rules.active.ptr); 4101 while (r != NULL) { 4102 PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED, 4103 TAILQ_NEXT(r, entries)); 4104 r->evaluations++; 4105 PF_TEST_ATTRIB( 4106 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 4107 r->skip[PF_SKIP_IFP].ptr); 4108 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 4109 r->skip[PF_SKIP_DIR].ptr); 4110 PF_TEST_ATTRIB((r->onrdomain >= 0 && 4111 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 4112 r->skip[PF_SKIP_RDOM].ptr); 4113 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 4114 r->skip[PF_SKIP_AF].ptr); 4115 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 4116 r->skip[PF_SKIP_PROTO].ptr); 4117 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 4118 ctx->pd->naf, r->src.neg, ctx->pd->kif, 4119 ctx->act.rtableid)), 4120 r->skip[PF_SKIP_SRC_ADDR].ptr); 4121 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 4122 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 4123 r->skip[PF_SKIP_DST_ADDR].ptr); 4124 4125 switch (ctx->pd->virtual_proto) { 4126 case PF_VPROTO_FRAGMENT: 4127 /* tcp/udp only. port_op always 0 in other cases */ 4128 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 4129 TAILQ_NEXT(r, entries)); 4130 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 4131 r->flagset), 4132 TAILQ_NEXT(r, entries)); 4133 /* icmp only. type/code always 0 in other cases */ 4134 PF_TEST_ATTRIB((r->type || r->code), 4135 TAILQ_NEXT(r, entries)); 4136 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 4137 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 4138 TAILQ_NEXT(r, entries)); 4139 break; 4140 4141 case IPPROTO_TCP: 4142 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 4143 r->flags), 4144 TAILQ_NEXT(r, entries)); 4145 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 4146 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 4147 r->os_fingerprint)), 4148 TAILQ_NEXT(r, entries)); 4149 /* FALLTHROUGH */ 4150 4151 case IPPROTO_UDP: 4152 /* tcp/udp only. port_op always 0 in other cases */ 4153 PF_TEST_ATTRIB((r->src.port_op && 4154 !pf_match_port(r->src.port_op, r->src.port[0], 4155 r->src.port[1], ctx->pd->nsport)), 4156 r->skip[PF_SKIP_SRC_PORT].ptr); 4157 PF_TEST_ATTRIB((r->dst.port_op && 4158 !pf_match_port(r->dst.port_op, r->dst.port[0], 4159 r->dst.port[1], ctx->pd->ndport)), 4160 r->skip[PF_SKIP_DST_PORT].ptr); 4161 /* tcp/udp only. uid.op always 0 in other cases */ 4162 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 4163 (ctx->pd->lookup.done = 4164 pf_socket_lookup(ctx->pd), 1)) && 4165 !pf_match_uid(r->uid.op, r->uid.uid[0], 4166 r->uid.uid[1], ctx->pd->lookup.uid)), 4167 TAILQ_NEXT(r, entries)); 4168 /* tcp/udp only. gid.op always 0 in other cases */ 4169 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 4170 (ctx->pd->lookup.done = 4171 pf_socket_lookup(ctx->pd), 1)) && 4172 !pf_match_gid(r->gid.op, r->gid.gid[0], 4173 r->gid.gid[1], ctx->pd->lookup.gid)), 4174 TAILQ_NEXT(r, entries)); 4175 break; 4176 4177 case IPPROTO_ICMP: 4178 /* icmp only. type always 0 in other cases */ 4179 PF_TEST_ATTRIB((r->type && 4180 r->type != ctx->icmptype + 1), 4181 TAILQ_NEXT(r, entries)); 4182 /* icmp only. type always 0 in other cases */ 4183 PF_TEST_ATTRIB((r->code && 4184 r->code != ctx->icmpcode + 1), 4185 TAILQ_NEXT(r, entries)); 4186 /* icmp only. don't create states on replies */ 4187 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 4188 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 4189 ctx->icmp_dir != PF_IN), 4190 TAILQ_NEXT(r, entries)); 4191 break; 4192 4193 case IPPROTO_ICMPV6: 4194 /* icmp only. type always 0 in other cases */ 4195 PF_TEST_ATTRIB((r->type && 4196 r->type != ctx->icmptype + 1), 4197 TAILQ_NEXT(r, entries)); 4198 /* icmp only. type always 0 in other cases */ 4199 PF_TEST_ATTRIB((r->code && 4200 r->code != ctx->icmpcode + 1), 4201 TAILQ_NEXT(r, entries)); 4202 /* icmp only. don't create states on replies */ 4203 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 4204 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 4205 ctx->icmp_dir != PF_IN && 4206 ctx->icmptype != ND_NEIGHBOR_ADVERT), 4207 TAILQ_NEXT(r, entries)); 4208 break; 4209 4210 default: 4211 break; 4212 } 4213 4214 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 4215 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 4216 TAILQ_NEXT(r, entries)); 4217 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 4218 TAILQ_NEXT(r, entries)); 4219 PF_TEST_ATTRIB((r->prob && 4220 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 4221 TAILQ_NEXT(r, entries)); 4222 PF_TEST_ATTRIB((r->match_tag && 4223 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 4224 TAILQ_NEXT(r, entries)); 4225 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 4226 r->rcvifnot), 4227 TAILQ_NEXT(r, entries)); 4228 PF_TEST_ATTRIB((r->prio && 4229 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 4230 ctx->pd->m->m_pkthdr.pf.prio), 4231 TAILQ_NEXT(r, entries)); 4232 4233 /* must be last! */ 4234 if (r->pktrate.limit) { 4235 pf_add_threshold(&r->pktrate); 4236 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 4237 TAILQ_NEXT(r, entries)); 4238 } 4239 4240 /* FALLTHROUGH */ 4241 if (r->tag) 4242 ctx->tag = r->tag; 4243 if (r->anchor == NULL) { 4244 4245 if (r->rule_flag & PFRULE_ONCE) { 4246 u_int32_t rule_flag; 4247 4248 rule_flag = r->rule_flag; 4249 if (((rule_flag & PFRULE_EXPIRED) == 0) && 4250 atomic_cas_uint(&r->rule_flag, rule_flag, 4251 rule_flag | PFRULE_EXPIRED) == rule_flag) { 4252 r->exptime = gettime(); 4253 } else { 4254 r = TAILQ_NEXT(r, entries); 4255 continue; 4256 } 4257 } 4258 4259 if (r->action == PF_MATCH) { 4260 if ((ctx->ri = pool_get(&pf_rule_item_pl, 4261 PR_NOWAIT)) == NULL) { 4262 REASON_SET(&ctx->reason, PFRES_MEMORY); 4263 return (PF_TEST_FAIL); 4264 } 4265 ctx->ri->r = r; 4266 /* order is irrelevant */ 4267 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 4268 ctx->ri = NULL; 4269 pf_rule_to_actions(r, &ctx->act); 4270 if (r->rule_flag & PFRULE_AFTO) 4271 ctx->pd->naf = r->naf; 4272 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 4273 &ctx->nr) == -1) { 4274 REASON_SET(&ctx->reason, 4275 PFRES_TRANSLATE); 4276 return (PF_TEST_FAIL); 4277 } 4278 #if NPFLOG > 0 4279 if (r->log) { 4280 REASON_SET(&ctx->reason, PFRES_MATCH); 4281 pflog_packet(ctx->pd, ctx->reason, r, 4282 ctx->a, ruleset, NULL); 4283 } 4284 #endif /* NPFLOG > 0 */ 4285 } else { 4286 /* 4287 * found matching r 4288 */ 4289 *ctx->rm = r; 4290 /* 4291 * anchor, with ruleset, where r belongs to 4292 */ 4293 *ctx->am = ctx->a; 4294 /* 4295 * ruleset where r belongs to 4296 */ 4297 *ctx->rsm = ruleset; 4298 /* 4299 * ruleset, where anchor belongs to. 4300 */ 4301 ctx->arsm = ctx->aruleset; 4302 } 4303 4304 #if NPFLOG > 0 4305 if (ctx->act.log & PF_LOG_MATCHES) 4306 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 4307 &ctx->rules); 4308 #endif /* NPFLOG > 0 */ 4309 4310 if (r->quick) 4311 return (PF_TEST_QUICK); 4312 } else { 4313 ctx->aruleset = &r->anchor->ruleset; 4314 if (r->anchor_wildcard) { 4315 RB_FOREACH(child, pf_anchor_node, 4316 &r->anchor->children) { 4317 if (pf_anchor_stack_push(ruleset, 4318 ctx->a, r, child, 4319 PF_NEXT_CHILD) != 0) 4320 return (PF_TEST_FAIL); 4321 4322 ctx->a = r; 4323 ruleset = &child->ruleset; 4324 goto enter_ruleset; 4325 next_child: 4326 continue; /* with RB_FOREACH() */ 4327 } 4328 } else { 4329 if (pf_anchor_stack_push(ruleset, ctx->a, 4330 r, child, PF_NEXT_RULE) != 0) 4331 return (PF_TEST_FAIL); 4332 4333 ctx->a = r; 4334 ruleset = &r->anchor->ruleset; 4335 child = NULL; 4336 goto enter_ruleset; 4337 next_rule: 4338 ; 4339 } 4340 } 4341 r = TAILQ_NEXT(r, entries); 4342 } 4343 4344 if (pf_anchor_stack_pop(&ruleset, &ctx->a, &r, &child, 4345 &target) == 0) { 4346 4347 /* stop if any rule matched within quick anchors. */ 4348 if (r->quick == PF_TEST_QUICK && *ctx->am == r) 4349 return (PF_TEST_QUICK); 4350 4351 switch (target) { 4352 case PF_NEXT_CHILD: 4353 goto next_child; 4354 case PF_NEXT_RULE: 4355 goto next_rule; 4356 default: 4357 panic("%s: unknown jump target", __func__); 4358 } 4359 } 4360 4361 return (PF_TEST_OK); 4362 } 4363 4364 int 4365 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 4366 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason) 4367 { 4368 struct pf_rule *r = NULL; 4369 struct pf_rule *a = NULL; 4370 struct pf_ruleset *ruleset = NULL; 4371 struct pf_state_key *skw = NULL, *sks = NULL; 4372 int rewrite = 0; 4373 u_int16_t virtual_type, virtual_id; 4374 int action = PF_DROP; 4375 struct pf_test_ctx ctx; 4376 int rv; 4377 4378 PF_ASSERT_LOCKED(); 4379 4380 memset(&ctx, 0, sizeof(ctx)); 4381 ctx.pd = pd; 4382 ctx.rm = rm; 4383 ctx.am = am; 4384 ctx.rsm = rsm; 4385 ctx.th = &pd->hdr.tcp; 4386 ctx.act.rtableid = pd->rdomain; 4387 ctx.tag = -1; 4388 SLIST_INIT(&ctx.rules); 4389 4390 if (pd->dir == PF_IN && if_congested()) { 4391 REASON_SET(&ctx.reason, PFRES_CONGEST); 4392 return (PF_DROP); 4393 } 4394 4395 switch (pd->virtual_proto) { 4396 case IPPROTO_ICMP: 4397 ctx.icmptype = pd->hdr.icmp.icmp_type; 4398 ctx.icmpcode = pd->hdr.icmp.icmp_code; 4399 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 4400 &ctx.icmp_dir, &virtual_id, &virtual_type); 4401 if (ctx.icmp_dir == PF_IN) { 4402 pd->osport = pd->nsport = virtual_id; 4403 pd->odport = pd->ndport = virtual_type; 4404 } else { 4405 pd->osport = pd->nsport = virtual_type; 4406 pd->odport = pd->ndport = virtual_id; 4407 } 4408 break; 4409 #ifdef INET6 4410 case IPPROTO_ICMPV6: 4411 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 4412 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 4413 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 4414 &ctx.icmp_dir, &virtual_id, &virtual_type); 4415 if (ctx.icmp_dir == PF_IN) { 4416 pd->osport = pd->nsport = virtual_id; 4417 pd->odport = pd->ndport = virtual_type; 4418 } else { 4419 pd->osport = pd->nsport = virtual_type; 4420 pd->odport = pd->ndport = virtual_id; 4421 } 4422 break; 4423 #endif /* INET6 */ 4424 } 4425 4426 ruleset = &pf_main_ruleset; 4427 rv = pf_match_rule(&ctx, ruleset); 4428 if (rv == PF_TEST_FAIL) { 4429 /* 4430 * Reason has been set in pf_match_rule() already. 4431 */ 4432 goto cleanup; 4433 } 4434 4435 r = *ctx.rm; /* matching rule */ 4436 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 4437 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 4438 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 4439 4440 /* apply actions for last matching pass/block rule */ 4441 pf_rule_to_actions(r, &ctx.act); 4442 if (r->rule_flag & PFRULE_AFTO) 4443 pd->naf = r->naf; 4444 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 4445 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 4446 goto cleanup; 4447 } 4448 REASON_SET(&ctx.reason, PFRES_MATCH); 4449 4450 #if NPFLOG > 0 4451 if (r->log) 4452 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 4453 if (ctx.act.log & PF_LOG_MATCHES) 4454 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 4455 #endif /* NPFLOG > 0 */ 4456 4457 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 4458 (r->action == PF_DROP) && 4459 ((r->rule_flag & PFRULE_RETURNRST) || 4460 (r->rule_flag & PFRULE_RETURNICMP) || 4461 (r->rule_flag & PFRULE_RETURN))) { 4462 if (pd->proto == IPPROTO_TCP && 4463 ((r->rule_flag & PFRULE_RETURNRST) || 4464 (r->rule_flag & PFRULE_RETURN)) && 4465 !(ctx.th->th_flags & TH_RST)) { 4466 u_int32_t ack = 4467 ntohl(ctx.th->th_seq) + pd->p_len; 4468 4469 if (pf_check_tcp_cksum(pd->m, pd->off, 4470 pd->tot_len - pd->off, pd->af)) 4471 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 4472 else { 4473 if (ctx.th->th_flags & TH_SYN) 4474 ack++; 4475 if (ctx.th->th_flags & TH_FIN) 4476 ack++; 4477 pf_send_tcp(r, pd->af, pd->dst, 4478 pd->src, ctx.th->th_dport, 4479 ctx.th->th_sport, ntohl(ctx.th->th_ack), 4480 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 4481 1, 0, pd->rdomain); 4482 } 4483 } else if ((pd->proto != IPPROTO_ICMP || 4484 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 4485 r->return_icmp) 4486 pf_send_icmp(pd->m, r->return_icmp >> 8, 4487 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 4488 else if ((pd->proto != IPPROTO_ICMPV6 || 4489 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 4490 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 4491 r->return_icmp6) 4492 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 4493 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 4494 } 4495 4496 if (r->action == PF_DROP) 4497 goto cleanup; 4498 4499 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 4500 if (ctx.act.rtableid >= 0 && 4501 rtable_l2(ctx.act.rtableid) != pd->rdomain) 4502 pd->destchg = 1; 4503 4504 if (r->action == PF_PASS && pd->badopts != 0 && ! r->allow_opts) { 4505 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 4506 #if NPFLOG > 0 4507 pd->pflog |= PF_LOG_FORCE; 4508 #endif /* NPFLOG > 0 */ 4509 DPFPRINTF(LOG_NOTICE, "dropping packet with " 4510 "ip/ipv6 options in pf_test_rule()"); 4511 goto cleanup; 4512 } 4513 4514 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 4515 && !ctx.state_icmp && r->keep_state) { 4516 4517 if (r->rule_flag & PFRULE_SRCTRACK && 4518 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 4519 pd->af, pd->src, NULL, NULL) != 0) { 4520 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 4521 goto cleanup; 4522 } 4523 4524 if (r->max_states && (r->states_cur >= r->max_states)) { 4525 pf_status.lcounters[LCNT_STATES]++; 4526 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 4527 goto cleanup; 4528 } 4529 4530 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 4531 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); 4532 4533 if (action != PF_PASS) 4534 goto cleanup; 4535 if (sks != skw) { 4536 struct pf_state_key *sk; 4537 4538 if (pd->dir == PF_IN) 4539 sk = sks; 4540 else 4541 sk = skw; 4542 rewrite += pf_translate(pd, 4543 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 4544 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 4545 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 4546 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 4547 virtual_type, ctx.icmp_dir); 4548 } 4549 4550 #ifdef INET6 4551 if (rewrite && skw->af != sks->af) 4552 action = PF_AFRT; 4553 #endif /* INET6 */ 4554 4555 } else { 4556 action = PF_PASS; 4557 4558 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4559 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4560 pool_put(&pf_rule_item_pl, ctx.ri); 4561 } 4562 } 4563 4564 /* copy back packet headers if needed */ 4565 if (rewrite && pd->hdrlen) { 4566 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4567 } 4568 4569 #if NPFSYNC > 0 4570 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 4571 pd->dir == PF_OUT && pfsync_is_up()) { 4572 /* 4573 * We want the state created, but we dont 4574 * want to send this in case a partner 4575 * firewall has to know about it to allow 4576 * replies through it. 4577 */ 4578 if (pfsync_defer(*sm, pd->m)) 4579 return (PF_DEFER); 4580 } 4581 #endif /* NPFSYNC > 0 */ 4582 4583 return (action); 4584 4585 cleanup: 4586 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4587 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4588 pool_put(&pf_rule_item_pl, ctx.ri); 4589 } 4590 4591 return (action); 4592 } 4593 4594 static __inline int 4595 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 4596 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 4597 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 4598 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 4599 { 4600 struct pf_state *st = NULL; 4601 struct tcphdr *th = &pd->hdr.tcp; 4602 u_short reason; 4603 u_int i; 4604 4605 st = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 4606 if (st == NULL) { 4607 REASON_SET(&reason, PFRES_MEMORY); 4608 goto csfailed; 4609 } 4610 st->rule.ptr = r; 4611 st->anchor.ptr = a; 4612 st->natrule.ptr = nr; 4613 if (r->allow_opts) 4614 st->state_flags |= PFSTATE_ALLOWOPTS; 4615 if (r->rule_flag & PFRULE_STATESLOPPY) 4616 st->state_flags |= PFSTATE_SLOPPY; 4617 if (r->rule_flag & PFRULE_PFLOW) 4618 st->state_flags |= PFSTATE_PFLOW; 4619 if (r->rule_flag & PFRULE_NOSYNC) 4620 st->state_flags |= PFSTATE_NOSYNC; 4621 #if NPFLOG > 0 4622 st->log = act->log & PF_LOG_ALL; 4623 #endif /* NPFLOG > 0 */ 4624 st->qid = act->qid; 4625 st->pqid = act->pqid; 4626 st->rtableid[pd->didx] = act->rtableid; 4627 st->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 4628 st->min_ttl = act->min_ttl; 4629 st->set_tos = act->set_tos; 4630 st->max_mss = act->max_mss; 4631 st->state_flags |= act->flags; 4632 #if NPFSYNC > 0 4633 st->sync_state = PFSYNC_S_NONE; 4634 #endif /* NPFSYNC > 0 */ 4635 st->set_prio[0] = act->set_prio[0]; 4636 st->set_prio[1] = act->set_prio[1]; 4637 st->delay = act->delay; 4638 SLIST_INIT(&st->src_nodes); 4639 4640 /* 4641 * must initialize refcnt, before pf_state_insert() gets called. 4642 * pf_state_inserts() grabs reference for pfsync! 4643 */ 4644 PF_REF_INIT(st->refcnt); 4645 mtx_init(&st->mtx, IPL_NET); 4646 4647 switch (pd->proto) { 4648 case IPPROTO_TCP: 4649 st->src.seqlo = ntohl(th->th_seq); 4650 st->src.seqhi = st->src.seqlo + pd->p_len + 1; 4651 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4652 r->keep_state == PF_STATE_MODULATE) { 4653 /* Generate sequence number modulator */ 4654 st->src.seqdiff = pf_tcp_iss(pd) - st->src.seqlo; 4655 if (st->src.seqdiff == 0) 4656 st->src.seqdiff = 1; 4657 pf_patch_32(pd, &th->th_seq, 4658 htonl(st->src.seqlo + st->src.seqdiff)); 4659 *rewrite = 1; 4660 } else 4661 st->src.seqdiff = 0; 4662 if (th->th_flags & TH_SYN) { 4663 st->src.seqhi++; 4664 st->src.wscale = pf_get_wscale(pd); 4665 } 4666 st->src.max_win = MAX(ntohs(th->th_win), 1); 4667 if (st->src.wscale & PF_WSCALE_MASK) { 4668 /* Remove scale factor from initial window */ 4669 int win = st->src.max_win; 4670 win += 1 << (st->src.wscale & PF_WSCALE_MASK); 4671 st->src.max_win = (win - 1) >> 4672 (st->src.wscale & PF_WSCALE_MASK); 4673 } 4674 if (th->th_flags & TH_FIN) 4675 st->src.seqhi++; 4676 st->dst.seqhi = 1; 4677 st->dst.max_win = 1; 4678 pf_set_protostate(st, PF_PEER_SRC, TCPS_SYN_SENT); 4679 pf_set_protostate(st, PF_PEER_DST, TCPS_CLOSED); 4680 st->timeout = PFTM_TCP_FIRST_PACKET; 4681 pf_status.states_halfopen++; 4682 break; 4683 case IPPROTO_UDP: 4684 pf_set_protostate(st, PF_PEER_SRC, PFUDPS_SINGLE); 4685 pf_set_protostate(st, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 4686 st->timeout = PFTM_UDP_FIRST_PACKET; 4687 break; 4688 case IPPROTO_ICMP: 4689 #ifdef INET6 4690 case IPPROTO_ICMPV6: 4691 #endif /* INET6 */ 4692 st->timeout = PFTM_ICMP_FIRST_PACKET; 4693 break; 4694 default: 4695 pf_set_protostate(st, PF_PEER_SRC, PFOTHERS_SINGLE); 4696 pf_set_protostate(st, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 4697 st->timeout = PFTM_OTHER_FIRST_PACKET; 4698 } 4699 4700 st->creation = getuptime(); 4701 st->expire = getuptime(); 4702 4703 if (pd->proto == IPPROTO_TCP) { 4704 if (st->state_flags & PFSTATE_SCRUB_TCP && 4705 pf_normalize_tcp_init(pd, &st->src)) { 4706 REASON_SET(&reason, PFRES_MEMORY); 4707 goto csfailed; 4708 } 4709 if (st->state_flags & PFSTATE_SCRUB_TCP && st->src.scrub && 4710 pf_normalize_tcp_stateful(pd, &reason, st, 4711 &st->src, &st->dst, rewrite)) { 4712 /* This really shouldn't happen!!! */ 4713 DPFPRINTF(LOG_ERR, 4714 "%s: tcp normalize failed on first pkt", __func__); 4715 goto csfailed; 4716 } 4717 } 4718 st->direction = pd->dir; 4719 4720 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 4721 REASON_SET(&reason, PFRES_MEMORY); 4722 goto csfailed; 4723 } 4724 4725 if (pf_set_rt_ifp(st, pd->src, (*skw)->af, sns) != 0) { 4726 REASON_SET(&reason, PFRES_NOROUTE); 4727 goto csfailed; 4728 } 4729 4730 for (i = 0; i < PF_SN_MAX; i++) 4731 if (sns[i] != NULL) { 4732 struct pf_sn_item *sni; 4733 4734 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 4735 if (sni == NULL) { 4736 REASON_SET(&reason, PFRES_MEMORY); 4737 goto csfailed; 4738 } 4739 sni->sn = sns[i]; 4740 SLIST_INSERT_HEAD(&st->src_nodes, sni, next); 4741 sni->sn->states++; 4742 } 4743 4744 #if NPFSYNC > 0 4745 pfsync_init_state(st, *skw, *sks, 0); 4746 #endif 4747 4748 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, st)) { 4749 *sks = *skw = NULL; 4750 REASON_SET(&reason, PFRES_STATEINS); 4751 goto csfailed; 4752 } else 4753 *sm = st; 4754 4755 /* 4756 * Make state responsible for rules it binds here. 4757 */ 4758 memcpy(&st->match_rules, rules, sizeof(st->match_rules)); 4759 memset(rules, 0, sizeof(*rules)); 4760 STATE_INC_COUNTERS(st); 4761 4762 if (tag > 0) { 4763 pf_tag_ref(tag); 4764 st->tag = tag; 4765 } 4766 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4767 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { 4768 int rtid; 4769 uint16_t mss, mssdflt; 4770 4771 rtid = (act->rtableid >= 0) ? act->rtableid : pd->rdomain; 4772 pf_set_protostate(st, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 4773 st->src.seqhi = arc4random(); 4774 /* Find mss option */ 4775 mssdflt = atomic_load_int(&tcp_mssdflt); 4776 mss = pf_get_mss(pd, mssdflt); 4777 mss = pf_calc_mss(pd->src, pd->af, rtid, mss, mssdflt); 4778 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss, mssdflt); 4779 st->src.mss = mss; 4780 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4781 th->th_sport, st->src.seqhi, ntohl(th->th_seq) + 1, 4782 TH_SYN|TH_ACK, 0, st->src.mss, 0, 1, 0, pd->rdomain); 4783 REASON_SET(&reason, PFRES_SYNPROXY); 4784 return (PF_SYNPROXY_DROP); 4785 } 4786 4787 return (PF_PASS); 4788 4789 csfailed: 4790 if (st) { 4791 pf_normalize_tcp_cleanup(st); /* safe even w/o init */ 4792 pf_src_tree_remove_state(st); 4793 pool_put(&pf_state_pl, st); 4794 } 4795 4796 for (i = 0; i < PF_SN_MAX; i++) 4797 if (sns[i] != NULL) 4798 pf_remove_src_node(sns[i]); 4799 4800 return (PF_DROP); 4801 } 4802 4803 int 4804 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4805 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4806 int icmp_dir) 4807 { 4808 int rewrite = 0; 4809 int afto = pd->af != pd->naf; 4810 4811 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4812 pd->destchg = 1; 4813 4814 switch (pd->proto) { 4815 case IPPROTO_TCP: /* FALLTHROUGH */ 4816 case IPPROTO_UDP: 4817 rewrite += pf_patch_16(pd, pd->sport, sport); 4818 rewrite += pf_patch_16(pd, pd->dport, dport); 4819 break; 4820 4821 case IPPROTO_ICMP: 4822 if (pd->af != AF_INET) 4823 return (0); 4824 4825 #ifdef INET6 4826 if (afto) { 4827 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 4828 return (0); 4829 pd->proto = IPPROTO_ICMPV6; 4830 rewrite = 1; 4831 } 4832 #endif /* INET6 */ 4833 if (virtual_type == htons(ICMP_ECHO)) { 4834 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4835 rewrite += pf_patch_16(pd, 4836 &pd->hdr.icmp.icmp_id, icmpid); 4837 } 4838 break; 4839 4840 #ifdef INET6 4841 case IPPROTO_ICMPV6: 4842 if (pd->af != AF_INET6) 4843 return (0); 4844 4845 if (afto) { 4846 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 4847 return (0); 4848 pd->proto = IPPROTO_ICMP; 4849 rewrite = 1; 4850 } 4851 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4852 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4853 rewrite += pf_patch_16(pd, 4854 &pd->hdr.icmp6.icmp6_id, icmpid); 4855 } 4856 break; 4857 #endif /* INET6 */ 4858 } 4859 4860 if (!afto) { 4861 rewrite += pf_translate_a(pd, pd->src, saddr); 4862 rewrite += pf_translate_a(pd, pd->dst, daddr); 4863 } 4864 4865 return (rewrite); 4866 } 4867 4868 int 4869 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason, 4870 int *copyback, int reverse) 4871 { 4872 struct tcphdr *th = &pd->hdr.tcp; 4873 struct pf_state_peer *src, *dst; 4874 u_int16_t win = ntohs(th->th_win); 4875 u_int32_t ack, end, data_end, seq, orig_seq; 4876 u_int8_t sws, dws, psrc, pdst; 4877 int ackskew; 4878 4879 if ((pd->dir == (*stp)->direction && !reverse) || 4880 (pd->dir != (*stp)->direction && reverse)) { 4881 src = &(*stp)->src; 4882 dst = &(*stp)->dst; 4883 psrc = PF_PEER_SRC; 4884 pdst = PF_PEER_DST; 4885 } else { 4886 src = &(*stp)->dst; 4887 dst = &(*stp)->src; 4888 psrc = PF_PEER_DST; 4889 pdst = PF_PEER_SRC; 4890 } 4891 4892 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4893 sws = src->wscale & PF_WSCALE_MASK; 4894 dws = dst->wscale & PF_WSCALE_MASK; 4895 } else 4896 sws = dws = 0; 4897 4898 /* 4899 * Sequence tracking algorithm from Guido van Rooij's paper: 4900 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4901 * tcp_filtering.ps 4902 */ 4903 4904 orig_seq = seq = ntohl(th->th_seq); 4905 if (src->seqlo == 0) { 4906 /* First packet from this end. Set its state */ 4907 4908 if (((*stp)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4909 src->scrub == NULL) { 4910 if (pf_normalize_tcp_init(pd, src)) { 4911 REASON_SET(reason, PFRES_MEMORY); 4912 return (PF_DROP); 4913 } 4914 } 4915 4916 /* Deferred generation of sequence number modulator */ 4917 if (dst->seqdiff && !src->seqdiff) { 4918 /* use random iss for the TCP server */ 4919 while ((src->seqdiff = arc4random() - seq) == 0) 4920 continue; 4921 ack = ntohl(th->th_ack) - dst->seqdiff; 4922 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4923 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4924 *copyback = 1; 4925 } else { 4926 ack = ntohl(th->th_ack); 4927 } 4928 4929 end = seq + pd->p_len; 4930 if (th->th_flags & TH_SYN) { 4931 end++; 4932 if (dst->wscale & PF_WSCALE_FLAG) { 4933 src->wscale = pf_get_wscale(pd); 4934 if (src->wscale & PF_WSCALE_FLAG) { 4935 /* Remove scale factor from initial 4936 * window */ 4937 sws = src->wscale & PF_WSCALE_MASK; 4938 win = ((u_int32_t)win + (1 << sws) - 1) 4939 >> sws; 4940 dws = dst->wscale & PF_WSCALE_MASK; 4941 } else { 4942 /* fixup other window */ 4943 dst->max_win = MIN(TCP_MAXWIN, 4944 (u_int32_t)dst->max_win << 4945 (dst->wscale & PF_WSCALE_MASK)); 4946 /* in case of a retrans SYN|ACK */ 4947 dst->wscale = 0; 4948 } 4949 } 4950 } 4951 data_end = end; 4952 if (th->th_flags & TH_FIN) 4953 end++; 4954 4955 src->seqlo = seq; 4956 if (src->state < TCPS_SYN_SENT) 4957 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT); 4958 4959 /* 4960 * May need to slide the window (seqhi may have been set by 4961 * the crappy stack check or if we picked up the connection 4962 * after establishment) 4963 */ 4964 if (src->seqhi == 1 || 4965 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4966 src->seqhi = end + MAX(1, dst->max_win << dws); 4967 if (win > src->max_win) 4968 src->max_win = win; 4969 4970 } else { 4971 ack = ntohl(th->th_ack) - dst->seqdiff; 4972 if (src->seqdiff) { 4973 /* Modulate sequence numbers */ 4974 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4975 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4976 *copyback = 1; 4977 } 4978 end = seq + pd->p_len; 4979 if (th->th_flags & TH_SYN) 4980 end++; 4981 data_end = end; 4982 if (th->th_flags & TH_FIN) 4983 end++; 4984 } 4985 4986 if ((th->th_flags & TH_ACK) == 0) { 4987 /* Let it pass through the ack skew check */ 4988 ack = dst->seqlo; 4989 } else if ((ack == 0 && 4990 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4991 /* broken tcp stacks do not set ack */ 4992 (dst->state < TCPS_SYN_SENT)) { 4993 /* 4994 * Many stacks (ours included) will set the ACK number in an 4995 * FIN|ACK if the SYN times out -- no sequence to ACK. 4996 */ 4997 ack = dst->seqlo; 4998 } 4999 5000 if (seq == end) { 5001 /* Ease sequencing restrictions on no data packets */ 5002 seq = src->seqlo; 5003 data_end = end = seq; 5004 } 5005 5006 ackskew = dst->seqlo - ack; 5007 5008 5009 /* 5010 * Need to demodulate the sequence numbers in any TCP SACK options 5011 * (Selective ACK). We could optionally validate the SACK values 5012 * against the current ACK window, either forwards or backwards, but 5013 * I'm not confident that SACK has been implemented properly 5014 * everywhere. It wouldn't surprise me if several stacks accidentally 5015 * SACK too far backwards of previously ACKed data. There really aren't 5016 * any security implications of bad SACKing unless the target stack 5017 * doesn't validate the option length correctly. Someone trying to 5018 * spoof into a TCP connection won't bother blindly sending SACK 5019 * options anyway. 5020 */ 5021 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 5022 if (pf_modulate_sack(pd, dst)) 5023 *copyback = 1; 5024 } 5025 5026 5027 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 5028 if (SEQ_GEQ(src->seqhi, data_end) && 5029 /* Last octet inside other's window space */ 5030 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 5031 /* Retrans: not more than one window back */ 5032 (ackskew >= -MAXACKWINDOW) && 5033 /* Acking not more than one reassembled fragment backwards */ 5034 (ackskew <= (MAXACKWINDOW << sws)) && 5035 /* Acking not more than one window forward */ 5036 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 5037 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 5038 /* Require an exact/+1 sequence match on resets when possible */ 5039 5040 if (dst->scrub || src->scrub) { 5041 if (pf_normalize_tcp_stateful(pd, reason, *stp, src, 5042 dst, copyback)) 5043 return (PF_DROP); 5044 } 5045 5046 /* update max window */ 5047 if (src->max_win < win) 5048 src->max_win = win; 5049 /* synchronize sequencing */ 5050 if (SEQ_GT(end, src->seqlo)) 5051 src->seqlo = end; 5052 /* slide the window of what the other end can send */ 5053 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 5054 dst->seqhi = ack + MAX((win << sws), 1); 5055 5056 /* update states */ 5057 if (th->th_flags & TH_SYN) 5058 if (src->state < TCPS_SYN_SENT) 5059 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT); 5060 if (th->th_flags & TH_FIN) 5061 if (src->state < TCPS_CLOSING) 5062 pf_set_protostate(*stp, psrc, TCPS_CLOSING); 5063 if (th->th_flags & TH_ACK) { 5064 if (dst->state == TCPS_SYN_SENT) { 5065 pf_set_protostate(*stp, pdst, 5066 TCPS_ESTABLISHED); 5067 if (src->state == TCPS_ESTABLISHED && 5068 !SLIST_EMPTY(&(*stp)->src_nodes) && 5069 pf_src_connlimit(stp)) { 5070 REASON_SET(reason, PFRES_SRCLIMIT); 5071 return (PF_DROP); 5072 } 5073 } else if (dst->state == TCPS_CLOSING) 5074 pf_set_protostate(*stp, pdst, 5075 TCPS_FIN_WAIT_2); 5076 } 5077 if (th->th_flags & TH_RST) 5078 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT); 5079 5080 /* update expire time */ 5081 (*stp)->expire = getuptime(); 5082 if (src->state >= TCPS_FIN_WAIT_2 && 5083 dst->state >= TCPS_FIN_WAIT_2) 5084 pf_update_state_timeout(*stp, PFTM_TCP_CLOSED); 5085 else if (src->state >= TCPS_CLOSING && 5086 dst->state >= TCPS_CLOSING) 5087 pf_update_state_timeout(*stp, PFTM_TCP_FIN_WAIT); 5088 else if (src->state < TCPS_ESTABLISHED || 5089 dst->state < TCPS_ESTABLISHED) 5090 pf_update_state_timeout(*stp, PFTM_TCP_OPENING); 5091 else if (src->state >= TCPS_CLOSING || 5092 dst->state >= TCPS_CLOSING) 5093 pf_update_state_timeout(*stp, PFTM_TCP_CLOSING); 5094 else 5095 pf_update_state_timeout(*stp, PFTM_TCP_ESTABLISHED); 5096 5097 /* Fall through to PASS packet */ 5098 } else if ((dst->state < TCPS_SYN_SENT || 5099 dst->state >= TCPS_FIN_WAIT_2 || 5100 src->state >= TCPS_FIN_WAIT_2) && 5101 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 5102 /* Within a window forward of the originating packet */ 5103 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 5104 /* Within a window backward of the originating packet */ 5105 5106 /* 5107 * This currently handles three situations: 5108 * 1) Stupid stacks will shotgun SYNs before their peer 5109 * replies. 5110 * 2) When PF catches an already established stream (the 5111 * firewall rebooted, the state table was flushed, routes 5112 * changed...) 5113 * 3) Packets get funky immediately after the connection 5114 * closes (this should catch Solaris spurious ACK|FINs 5115 * that web servers like to spew after a close) 5116 * 5117 * This must be a little more careful than the above code 5118 * since packet floods will also be caught here. We don't 5119 * update the TTL here to mitigate the damage of a packet 5120 * flood and so the same code can handle awkward establishment 5121 * and a loosened connection close. 5122 * In the establishment case, a correct peer response will 5123 * validate the connection, go through the normal state code 5124 * and keep updating the state TTL. 5125 */ 5126 5127 if (pf_status.debug >= LOG_NOTICE) { 5128 log(LOG_NOTICE, "pf: loose state match: "); 5129 pf_print_state(*stp); 5130 pf_print_flags(th->th_flags); 5131 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 5132 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 5133 pd->p_len, ackskew, (*stp)->packets[0], 5134 (*stp)->packets[1], 5135 pd->dir == PF_IN ? "in" : "out", 5136 pd->dir == (*stp)->direction ? "fwd" : "rev"); 5137 } 5138 5139 if (dst->scrub || src->scrub) { 5140 if (pf_normalize_tcp_stateful(pd, reason, *stp, src, 5141 dst, copyback)) 5142 return (PF_DROP); 5143 } 5144 5145 /* update max window */ 5146 if (src->max_win < win) 5147 src->max_win = win; 5148 /* synchronize sequencing */ 5149 if (SEQ_GT(end, src->seqlo)) 5150 src->seqlo = end; 5151 /* slide the window of what the other end can send */ 5152 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 5153 dst->seqhi = ack + MAX((win << sws), 1); 5154 5155 /* 5156 * Cannot set dst->seqhi here since this could be a shotgunned 5157 * SYN and not an already established connection. 5158 */ 5159 if (th->th_flags & TH_FIN) 5160 if (src->state < TCPS_CLOSING) 5161 pf_set_protostate(*stp, psrc, TCPS_CLOSING); 5162 if (th->th_flags & TH_RST) 5163 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT); 5164 5165 /* Fall through to PASS packet */ 5166 } else { 5167 if ((*stp)->dst.state == TCPS_SYN_SENT && 5168 (*stp)->src.state == TCPS_SYN_SENT) { 5169 /* Send RST for state mismatches during handshake */ 5170 if (!(th->th_flags & TH_RST)) 5171 pf_send_tcp((*stp)->rule.ptr, pd->af, 5172 pd->dst, pd->src, th->th_dport, 5173 th->th_sport, ntohl(th->th_ack), 0, 5174 TH_RST, 0, 0, 5175 (*stp)->rule.ptr->return_ttl, 1, 0, 5176 pd->rdomain); 5177 src->seqlo = 0; 5178 src->seqhi = 1; 5179 src->max_win = 1; 5180 } else if (pf_status.debug >= LOG_NOTICE) { 5181 log(LOG_NOTICE, "pf: BAD state: "); 5182 pf_print_state(*stp); 5183 pf_print_flags(th->th_flags); 5184 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 5185 "pkts=%llu:%llu dir=%s,%s\n", 5186 seq, orig_seq, ack, pd->p_len, ackskew, 5187 (*stp)->packets[0], (*stp)->packets[1], 5188 pd->dir == PF_IN ? "in" : "out", 5189 pd->dir == (*stp)->direction ? "fwd" : "rev"); 5190 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 5191 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 5192 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 5193 ' ': '2', 5194 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 5195 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 5196 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 5197 ' ' :'5', 5198 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 5199 } 5200 REASON_SET(reason, PFRES_BADSTATE); 5201 return (PF_DROP); 5202 } 5203 5204 return (PF_PASS); 5205 } 5206 5207 int 5208 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **stp, 5209 u_short *reason) 5210 { 5211 struct tcphdr *th = &pd->hdr.tcp; 5212 struct pf_state_peer *src, *dst; 5213 u_int8_t psrc, pdst; 5214 5215 if (pd->dir == (*stp)->direction) { 5216 src = &(*stp)->src; 5217 dst = &(*stp)->dst; 5218 psrc = PF_PEER_SRC; 5219 pdst = PF_PEER_DST; 5220 } else { 5221 src = &(*stp)->dst; 5222 dst = &(*stp)->src; 5223 psrc = PF_PEER_DST; 5224 pdst = PF_PEER_SRC; 5225 } 5226 5227 if (th->th_flags & TH_SYN) 5228 if (src->state < TCPS_SYN_SENT) 5229 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT); 5230 if (th->th_flags & TH_FIN) 5231 if (src->state < TCPS_CLOSING) 5232 pf_set_protostate(*stp, psrc, TCPS_CLOSING); 5233 if (th->th_flags & TH_ACK) { 5234 if (dst->state == TCPS_SYN_SENT) { 5235 pf_set_protostate(*stp, pdst, TCPS_ESTABLISHED); 5236 if (src->state == TCPS_ESTABLISHED && 5237 !SLIST_EMPTY(&(*stp)->src_nodes) && 5238 pf_src_connlimit(stp)) { 5239 REASON_SET(reason, PFRES_SRCLIMIT); 5240 return (PF_DROP); 5241 } 5242 } else if (dst->state == TCPS_CLOSING) { 5243 pf_set_protostate(*stp, pdst, TCPS_FIN_WAIT_2); 5244 } else if (src->state == TCPS_SYN_SENT && 5245 dst->state < TCPS_SYN_SENT) { 5246 /* 5247 * Handle a special sloppy case where we only see one 5248 * half of the connection. If there is a ACK after 5249 * the initial SYN without ever seeing a packet from 5250 * the destination, set the connection to established. 5251 */ 5252 pf_set_protostate(*stp, PF_PEER_BOTH, 5253 TCPS_ESTABLISHED); 5254 if (!SLIST_EMPTY(&(*stp)->src_nodes) && 5255 pf_src_connlimit(stp)) { 5256 REASON_SET(reason, PFRES_SRCLIMIT); 5257 return (PF_DROP); 5258 } 5259 } else if (src->state == TCPS_CLOSING && 5260 dst->state == TCPS_ESTABLISHED && 5261 dst->seqlo == 0) { 5262 /* 5263 * Handle the closing of half connections where we 5264 * don't see the full bidirectional FIN/ACK+ACK 5265 * handshake. 5266 */ 5267 pf_set_protostate(*stp, pdst, TCPS_CLOSING); 5268 } 5269 } 5270 if (th->th_flags & TH_RST) 5271 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT); 5272 5273 /* update expire time */ 5274 (*stp)->expire = getuptime(); 5275 if (src->state >= TCPS_FIN_WAIT_2 && 5276 dst->state >= TCPS_FIN_WAIT_2) 5277 pf_update_state_timeout(*stp, PFTM_TCP_CLOSED); 5278 else if (src->state >= TCPS_CLOSING && 5279 dst->state >= TCPS_CLOSING) 5280 pf_update_state_timeout(*stp, PFTM_TCP_FIN_WAIT); 5281 else if (src->state < TCPS_ESTABLISHED || 5282 dst->state < TCPS_ESTABLISHED) 5283 pf_update_state_timeout(*stp, PFTM_TCP_OPENING); 5284 else if (src->state >= TCPS_CLOSING || 5285 dst->state >= TCPS_CLOSING) 5286 pf_update_state_timeout(*stp, PFTM_TCP_CLOSING); 5287 else 5288 pf_update_state_timeout(*stp, PFTM_TCP_ESTABLISHED); 5289 5290 return (PF_PASS); 5291 } 5292 5293 static __inline int 5294 pf_synproxy(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason) 5295 { 5296 struct pf_state_key *sk = (*stp)->key[pd->didx]; 5297 5298 if ((*stp)->src.state == PF_TCPS_PROXY_SRC) { 5299 struct tcphdr *th = &pd->hdr.tcp; 5300 5301 if (pd->dir != (*stp)->direction) { 5302 REASON_SET(reason, PFRES_SYNPROXY); 5303 return (PF_SYNPROXY_DROP); 5304 } 5305 if (th->th_flags & TH_SYN) { 5306 if (ntohl(th->th_seq) != (*stp)->src.seqlo) { 5307 REASON_SET(reason, PFRES_SYNPROXY); 5308 return (PF_DROP); 5309 } 5310 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst, 5311 pd->src, th->th_dport, th->th_sport, 5312 (*stp)->src.seqhi, ntohl(th->th_seq) + 1, 5313 TH_SYN|TH_ACK, 0, (*stp)->src.mss, 0, 1, 5314 0, pd->rdomain); 5315 REASON_SET(reason, PFRES_SYNPROXY); 5316 return (PF_SYNPROXY_DROP); 5317 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 5318 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) || 5319 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) { 5320 REASON_SET(reason, PFRES_SYNPROXY); 5321 return (PF_DROP); 5322 } else if (!SLIST_EMPTY(&(*stp)->src_nodes) && 5323 pf_src_connlimit(stp)) { 5324 REASON_SET(reason, PFRES_SRCLIMIT); 5325 return (PF_DROP); 5326 } else 5327 pf_set_protostate(*stp, PF_PEER_SRC, 5328 PF_TCPS_PROXY_DST); 5329 } 5330 if ((*stp)->src.state == PF_TCPS_PROXY_DST) { 5331 struct tcphdr *th = &pd->hdr.tcp; 5332 5333 if (pd->dir == (*stp)->direction) { 5334 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 5335 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) || 5336 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) { 5337 REASON_SET(reason, PFRES_SYNPROXY); 5338 return (PF_DROP); 5339 } 5340 (*stp)->src.max_win = MAX(ntohs(th->th_win), 1); 5341 if ((*stp)->dst.seqhi == 1) 5342 (*stp)->dst.seqhi = arc4random(); 5343 pf_send_tcp((*stp)->rule.ptr, pd->af, 5344 &sk->addr[pd->sidx], &sk->addr[pd->didx], 5345 sk->port[pd->sidx], sk->port[pd->didx], 5346 (*stp)->dst.seqhi, 0, TH_SYN, 0, 5347 (*stp)->src.mss, 0, 0, (*stp)->tag, 5348 sk->rdomain); 5349 REASON_SET(reason, PFRES_SYNPROXY); 5350 return (PF_SYNPROXY_DROP); 5351 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 5352 (TH_SYN|TH_ACK)) || 5353 (ntohl(th->th_ack) != (*stp)->dst.seqhi + 1)) { 5354 REASON_SET(reason, PFRES_SYNPROXY); 5355 return (PF_DROP); 5356 } else { 5357 (*stp)->dst.max_win = MAX(ntohs(th->th_win), 1); 5358 (*stp)->dst.seqlo = ntohl(th->th_seq); 5359 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst, 5360 pd->src, th->th_dport, th->th_sport, 5361 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 5362 TH_ACK, (*stp)->src.max_win, 0, 0, 0, 5363 (*stp)->tag, pd->rdomain); 5364 pf_send_tcp((*stp)->rule.ptr, pd->af, 5365 &sk->addr[pd->sidx], &sk->addr[pd->didx], 5366 sk->port[pd->sidx], sk->port[pd->didx], 5367 (*stp)->src.seqhi + 1, (*stp)->src.seqlo + 1, 5368 TH_ACK, (*stp)->dst.max_win, 0, 0, 1, 5369 0, sk->rdomain); 5370 (*stp)->src.seqdiff = (*stp)->dst.seqhi - 5371 (*stp)->src.seqlo; 5372 (*stp)->dst.seqdiff = (*stp)->src.seqhi - 5373 (*stp)->dst.seqlo; 5374 (*stp)->src.seqhi = (*stp)->src.seqlo + 5375 (*stp)->dst.max_win; 5376 (*stp)->dst.seqhi = (*stp)->dst.seqlo + 5377 (*stp)->src.max_win; 5378 (*stp)->src.wscale = (*stp)->dst.wscale = 0; 5379 pf_set_protostate(*stp, PF_PEER_BOTH, 5380 TCPS_ESTABLISHED); 5381 REASON_SET(reason, PFRES_SYNPROXY); 5382 return (PF_SYNPROXY_DROP); 5383 } 5384 } 5385 return (PF_PASS); 5386 } 5387 5388 int 5389 pf_test_state(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason) 5390 { 5391 int copyback = 0; 5392 struct pf_state_peer *src, *dst; 5393 int action; 5394 struct inpcb *inp = pd->m->m_pkthdr.pf.inp; 5395 u_int8_t psrc, pdst; 5396 5397 action = PF_PASS; 5398 if (pd->dir == (*stp)->direction) { 5399 src = &(*stp)->src; 5400 dst = &(*stp)->dst; 5401 psrc = PF_PEER_SRC; 5402 pdst = PF_PEER_DST; 5403 } else { 5404 src = &(*stp)->dst; 5405 dst = &(*stp)->src; 5406 psrc = PF_PEER_DST; 5407 pdst = PF_PEER_SRC; 5408 } 5409 5410 switch (pd->virtual_proto) { 5411 case IPPROTO_TCP: 5412 if ((action = pf_synproxy(pd, stp, reason)) != PF_PASS) 5413 return (action); 5414 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 5415 5416 if (dst->state >= TCPS_FIN_WAIT_2 && 5417 src->state >= TCPS_FIN_WAIT_2) { 5418 if (pf_status.debug >= LOG_NOTICE) { 5419 log(LOG_NOTICE, "pf: state reuse "); 5420 pf_print_state(*stp); 5421 pf_print_flags(pd->hdr.tcp.th_flags); 5422 addlog("\n"); 5423 } 5424 /* XXX make sure it's the same direction ?? */ 5425 pf_update_state_timeout(*stp, PFTM_PURGE); 5426 pf_state_unref(*stp); 5427 *stp = NULL; 5428 pf_mbuf_link_inpcb(pd->m, inp); 5429 return (PF_DROP); 5430 } else if (dst->state >= TCPS_ESTABLISHED && 5431 src->state >= TCPS_ESTABLISHED) { 5432 /* 5433 * SYN matches existing state??? 5434 * Typically happens when sender boots up after 5435 * sudden panic. Certain protocols (NFSv3) are 5436 * always using same port numbers. Challenge 5437 * ACK enables all parties (firewall and peers) 5438 * to get in sync again. 5439 */ 5440 pf_send_challenge_ack(pd, *stp, src, dst); 5441 return (PF_DROP); 5442 } 5443 } 5444 5445 if ((*stp)->state_flags & PFSTATE_SLOPPY) { 5446 if (pf_tcp_track_sloppy(pd, stp, reason) == PF_DROP) 5447 return (PF_DROP); 5448 } else { 5449 if (pf_tcp_track_full(pd, stp, reason, ©back, 5450 PF_REVERSED_KEY((*stp)->key, pd->af)) == PF_DROP) 5451 return (PF_DROP); 5452 } 5453 break; 5454 case IPPROTO_UDP: 5455 /* update states */ 5456 if (src->state < PFUDPS_SINGLE) 5457 pf_set_protostate(*stp, psrc, PFUDPS_SINGLE); 5458 if (dst->state == PFUDPS_SINGLE) 5459 pf_set_protostate(*stp, pdst, PFUDPS_MULTIPLE); 5460 5461 /* update expire time */ 5462 (*stp)->expire = getuptime(); 5463 if (src->state == PFUDPS_MULTIPLE && 5464 dst->state == PFUDPS_MULTIPLE) 5465 pf_update_state_timeout(*stp, PFTM_UDP_MULTIPLE); 5466 else 5467 pf_update_state_timeout(*stp, PFTM_UDP_SINGLE); 5468 break; 5469 default: 5470 /* update states */ 5471 if (src->state < PFOTHERS_SINGLE) 5472 pf_set_protostate(*stp, psrc, PFOTHERS_SINGLE); 5473 if (dst->state == PFOTHERS_SINGLE) 5474 pf_set_protostate(*stp, pdst, PFOTHERS_MULTIPLE); 5475 5476 /* update expire time */ 5477 (*stp)->expire = getuptime(); 5478 if (src->state == PFOTHERS_MULTIPLE && 5479 dst->state == PFOTHERS_MULTIPLE) 5480 pf_update_state_timeout(*stp, PFTM_OTHER_MULTIPLE); 5481 else 5482 pf_update_state_timeout(*stp, PFTM_OTHER_SINGLE); 5483 break; 5484 } 5485 5486 /* translate source/destination address, if necessary */ 5487 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) { 5488 struct pf_state_key *nk; 5489 int afto, sidx, didx; 5490 5491 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 5492 nk = (*stp)->key[pd->sidx]; 5493 else 5494 nk = (*stp)->key[pd->didx]; 5495 5496 afto = pd->af != nk->af; 5497 sidx = afto ? pd->didx : pd->sidx; 5498 didx = afto ? pd->sidx : pd->didx; 5499 5500 #ifdef INET6 5501 if (afto) { 5502 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 5503 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 5504 pd->naf = nk->af; 5505 action = PF_AFRT; 5506 } 5507 #endif /* INET6 */ 5508 5509 if (!afto) 5510 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5511 5512 if (pd->sport != NULL) 5513 pf_patch_16(pd, pd->sport, nk->port[sidx]); 5514 5515 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 5516 pd->rdomain != nk->rdomain) 5517 pd->destchg = 1; 5518 5519 if (!afto) 5520 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5521 5522 if (pd->dport != NULL) 5523 pf_patch_16(pd, pd->dport, nk->port[didx]); 5524 5525 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5526 copyback = 1; 5527 } 5528 5529 if (copyback && pd->hdrlen > 0) { 5530 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5531 } 5532 5533 return (action); 5534 } 5535 5536 int 5537 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 5538 struct pf_state **stp, u_int16_t icmpid, u_int16_t type, 5539 int icmp_dir, int *iidx, int multi, int inner) 5540 { 5541 int direction, action; 5542 5543 key->af = pd->af; 5544 key->proto = pd->proto; 5545 key->rdomain = pd->rdomain; 5546 if (icmp_dir == PF_IN) { 5547 *iidx = pd->sidx; 5548 key->port[pd->sidx] = icmpid; 5549 key->port[pd->didx] = type; 5550 } else { 5551 *iidx = pd->didx; 5552 key->port[pd->sidx] = type; 5553 key->port[pd->didx] = icmpid; 5554 } 5555 5556 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 5557 pd->dst, pd->af, multi)) 5558 return (PF_DROP); 5559 5560 key->hash = pf_pkt_hash(key->af, key->proto, 5561 &key->addr[0], &key->addr[1], 0, 0); 5562 5563 action = pf_find_state(pd, key, stp); 5564 if (action != PF_MATCH) 5565 return (action); 5566 5567 if ((*stp)->state_flags & PFSTATE_SLOPPY) 5568 return (-1); 5569 5570 /* Is this ICMP message flowing in right direction? */ 5571 if ((*stp)->key[PF_SK_WIRE]->af != (*stp)->key[PF_SK_STACK]->af) 5572 direction = (pd->af == (*stp)->key[PF_SK_WIRE]->af) ? 5573 PF_IN : PF_OUT; 5574 else 5575 direction = (*stp)->direction; 5576 if ((((!inner && direction == pd->dir) || 5577 (inner && direction != pd->dir)) ? 5578 PF_IN : PF_OUT) != icmp_dir) { 5579 if (pf_status.debug >= LOG_NOTICE) { 5580 log(LOG_NOTICE, 5581 "pf: icmp type %d in wrong direction (%d): ", 5582 ntohs(type), icmp_dir); 5583 pf_print_state(*stp); 5584 addlog("\n"); 5585 } 5586 return (PF_DROP); 5587 } 5588 return (-1); 5589 } 5590 5591 int 5592 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **stp, 5593 u_short *reason) 5594 { 5595 u_int16_t virtual_id, virtual_type; 5596 u_int8_t icmptype, icmpcode; 5597 int icmp_dir, iidx, ret, copyback = 0; 5598 5599 struct pf_state_key_cmp key; 5600 5601 switch (pd->proto) { 5602 case IPPROTO_ICMP: 5603 icmptype = pd->hdr.icmp.icmp_type; 5604 icmpcode = pd->hdr.icmp.icmp_code; 5605 break; 5606 #ifdef INET6 5607 case IPPROTO_ICMPV6: 5608 icmptype = pd->hdr.icmp6.icmp6_type; 5609 icmpcode = pd->hdr.icmp6.icmp6_code; 5610 break; 5611 #endif /* INET6 */ 5612 default: 5613 panic("unhandled proto %d", pd->proto); 5614 } 5615 5616 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 5617 &virtual_type) == 0) { 5618 /* 5619 * ICMP query/reply message not related to a TCP/UDP packet. 5620 * Search for an ICMP state. 5621 */ 5622 ret = pf_icmp_state_lookup(pd, &key, stp, 5623 virtual_id, virtual_type, icmp_dir, &iidx, 5624 0, 0); 5625 /* IPv6? try matching a multicast address */ 5626 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 5627 ret = pf_icmp_state_lookup(pd, &key, stp, virtual_id, 5628 virtual_type, icmp_dir, &iidx, 1, 0); 5629 if (ret >= 0) 5630 return (ret); 5631 5632 (*stp)->expire = getuptime(); 5633 pf_update_state_timeout(*stp, PFTM_ICMP_ERROR_REPLY); 5634 5635 /* translate source/destination address, if necessary */ 5636 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) { 5637 struct pf_state_key *nk; 5638 int afto, sidx, didx; 5639 5640 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 5641 nk = (*stp)->key[pd->sidx]; 5642 else 5643 nk = (*stp)->key[pd->didx]; 5644 5645 afto = pd->af != nk->af; 5646 sidx = afto ? pd->didx : pd->sidx; 5647 didx = afto ? pd->sidx : pd->didx; 5648 iidx = afto ? !iidx : iidx; 5649 #ifdef INET6 5650 if (afto) { 5651 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 5652 nk->af); 5653 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 5654 nk->af); 5655 pd->naf = nk->af; 5656 } 5657 #endif /* INET6 */ 5658 if (!afto) { 5659 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5660 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5661 } 5662 5663 if (pd->rdomain != nk->rdomain) 5664 pd->destchg = 1; 5665 if (!afto && PF_ANEQ(pd->dst, 5666 &nk->addr[didx], pd->af)) 5667 pd->destchg = 1; 5668 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5669 5670 switch (pd->af) { 5671 case AF_INET: 5672 #ifdef INET6 5673 if (afto) { 5674 if (pf_translate_icmp_af(pd, AF_INET6, 5675 &pd->hdr.icmp)) 5676 return (PF_DROP); 5677 pd->proto = IPPROTO_ICMPV6; 5678 } 5679 #endif /* INET6 */ 5680 pf_patch_16(pd, 5681 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 5682 5683 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5684 &pd->hdr.icmp, M_NOWAIT); 5685 copyback = 1; 5686 break; 5687 #ifdef INET6 5688 case AF_INET6: 5689 if (afto) { 5690 if (pf_translate_icmp_af(pd, AF_INET, 5691 &pd->hdr.icmp6)) 5692 return (PF_DROP); 5693 pd->proto = IPPROTO_ICMP; 5694 } 5695 5696 pf_patch_16(pd, 5697 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 5698 5699 m_copyback(pd->m, pd->off, 5700 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5701 M_NOWAIT); 5702 copyback = 1; 5703 break; 5704 #endif /* INET6 */ 5705 } 5706 #ifdef INET6 5707 if (afto) 5708 return (PF_AFRT); 5709 #endif /* INET6 */ 5710 } 5711 } else { 5712 /* 5713 * ICMP error message in response to a TCP/UDP packet. 5714 * Extract the inner TCP/UDP header and search for that state. 5715 */ 5716 struct pf_pdesc pd2; 5717 struct ip h2; 5718 #ifdef INET6 5719 struct ip6_hdr h2_6; 5720 #endif /* INET6 */ 5721 int ipoff2; 5722 5723 /* Initialize pd2 fields valid for both packets with pd. */ 5724 memset(&pd2, 0, sizeof(pd2)); 5725 pd2.af = pd->af; 5726 pd2.dir = pd->dir; 5727 pd2.kif = pd->kif; 5728 pd2.m = pd->m; 5729 pd2.rdomain = pd->rdomain; 5730 /* Payload packet is from the opposite direction. */ 5731 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 5732 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 5733 switch (pd->af) { 5734 case AF_INET: 5735 /* offset of h2 in mbuf chain */ 5736 ipoff2 = pd->off + ICMP_MINLEN; 5737 5738 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 5739 reason, pd2.af)) { 5740 DPFPRINTF(LOG_NOTICE, 5741 "ICMP error message too short (ip)"); 5742 return (PF_DROP); 5743 } 5744 /* 5745 * ICMP error messages don't refer to non-first 5746 * fragments 5747 */ 5748 if (h2.ip_off & htons(IP_OFFMASK)) { 5749 REASON_SET(reason, PFRES_FRAG); 5750 return (PF_DROP); 5751 } 5752 5753 /* offset of protocol header that follows h2 */ 5754 pd2.off = ipoff2; 5755 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 5756 return (PF_DROP); 5757 5758 pd2.tot_len = ntohs(h2.ip_len); 5759 pd2.ttl = h2.ip_ttl; 5760 pd2.src = (struct pf_addr *)&h2.ip_src; 5761 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5762 break; 5763 #ifdef INET6 5764 case AF_INET6: 5765 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 5766 5767 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 5768 reason, pd2.af)) { 5769 DPFPRINTF(LOG_NOTICE, 5770 "ICMP error message too short (ip6)"); 5771 return (PF_DROP); 5772 } 5773 5774 pd2.off = ipoff2; 5775 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 5776 return (PF_DROP); 5777 5778 pd2.tot_len = ntohs(h2_6.ip6_plen) + 5779 sizeof(struct ip6_hdr); 5780 pd2.ttl = h2_6.ip6_hlim; 5781 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5782 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5783 break; 5784 #endif /* INET6 */ 5785 default: 5786 unhandled_af(pd->af); 5787 } 5788 5789 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 5790 if (pf_status.debug >= LOG_NOTICE) { 5791 log(LOG_NOTICE, 5792 "pf: BAD ICMP %d:%d outer dst: ", 5793 icmptype, icmpcode); 5794 pf_print_host(pd->src, 0, pd->af); 5795 addlog(" -> "); 5796 pf_print_host(pd->dst, 0, pd->af); 5797 addlog(" inner src: "); 5798 pf_print_host(pd2.src, 0, pd2.af); 5799 addlog(" -> "); 5800 pf_print_host(pd2.dst, 0, pd2.af); 5801 addlog("\n"); 5802 } 5803 REASON_SET(reason, PFRES_BADSTATE); 5804 return (PF_DROP); 5805 } 5806 5807 switch (pd2.proto) { 5808 case IPPROTO_TCP: { 5809 struct tcphdr *th = &pd2.hdr.tcp; 5810 u_int32_t seq; 5811 struct pf_state_peer *src, *dst; 5812 u_int8_t dws; 5813 int action; 5814 5815 /* 5816 * Only the first 8 bytes of the TCP header can be 5817 * expected. Don't access any TCP header fields after 5818 * th_seq, an ackskew test is not possible. 5819 */ 5820 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, reason, 5821 pd2.af)) { 5822 DPFPRINTF(LOG_NOTICE, 5823 "ICMP error message too short (tcp)"); 5824 return (PF_DROP); 5825 } 5826 5827 key.af = pd2.af; 5828 key.proto = IPPROTO_TCP; 5829 key.rdomain = pd2.rdomain; 5830 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5831 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5832 key.port[pd2.sidx] = th->th_sport; 5833 key.port[pd2.didx] = th->th_dport; 5834 key.hash = pf_pkt_hash(pd2.af, pd2.proto, 5835 pd2.src, pd2.dst, th->th_sport, th->th_dport); 5836 5837 action = pf_find_state(&pd2, &key, stp); 5838 if (action != PF_MATCH) 5839 return (action); 5840 5841 if (pd2.dir == (*stp)->direction) { 5842 if (PF_REVERSED_KEY((*stp)->key, pd->af)) { 5843 src = &(*stp)->src; 5844 dst = &(*stp)->dst; 5845 } else { 5846 src = &(*stp)->dst; 5847 dst = &(*stp)->src; 5848 } 5849 } else { 5850 if (PF_REVERSED_KEY((*stp)->key, pd->af)) { 5851 src = &(*stp)->dst; 5852 dst = &(*stp)->src; 5853 } else { 5854 src = &(*stp)->src; 5855 dst = &(*stp)->dst; 5856 } 5857 } 5858 5859 if (src->wscale && dst->wscale) 5860 dws = dst->wscale & PF_WSCALE_MASK; 5861 else 5862 dws = 0; 5863 5864 /* Demodulate sequence number */ 5865 seq = ntohl(th->th_seq) - src->seqdiff; 5866 if (src->seqdiff) { 5867 pf_patch_32(pd, &th->th_seq, htonl(seq)); 5868 copyback = 1; 5869 } 5870 5871 if (!((*stp)->state_flags & PFSTATE_SLOPPY) && 5872 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5873 src->seqlo - (dst->max_win << dws)))) { 5874 if (pf_status.debug >= LOG_NOTICE) { 5875 log(LOG_NOTICE, 5876 "pf: BAD ICMP %d:%d ", 5877 icmptype, icmpcode); 5878 pf_print_host(pd->src, 0, pd->af); 5879 addlog(" -> "); 5880 pf_print_host(pd->dst, 0, pd->af); 5881 addlog(" state: "); 5882 pf_print_state(*stp); 5883 addlog(" seq=%u\n", seq); 5884 } 5885 REASON_SET(reason, PFRES_BADSTATE); 5886 return (PF_DROP); 5887 } else { 5888 if (pf_status.debug >= LOG_DEBUG) { 5889 log(LOG_DEBUG, 5890 "pf: OK ICMP %d:%d ", 5891 icmptype, icmpcode); 5892 pf_print_host(pd->src, 0, pd->af); 5893 addlog(" -> "); 5894 pf_print_host(pd->dst, 0, pd->af); 5895 addlog(" state: "); 5896 pf_print_state(*stp); 5897 addlog(" seq=%u\n", seq); 5898 } 5899 } 5900 5901 /* translate source/destination address, if necessary */ 5902 if ((*stp)->key[PF_SK_WIRE] != 5903 (*stp)->key[PF_SK_STACK]) { 5904 struct pf_state_key *nk; 5905 int afto, sidx, didx; 5906 5907 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 5908 nk = (*stp)->key[pd->sidx]; 5909 else 5910 nk = (*stp)->key[pd->didx]; 5911 5912 afto = pd->af != nk->af; 5913 sidx = afto ? pd2.didx : pd2.sidx; 5914 didx = afto ? pd2.sidx : pd2.didx; 5915 5916 #ifdef INET6 5917 if (afto) { 5918 if (pf_translate_icmp_af(pd, nk->af, 5919 &pd->hdr.icmp)) 5920 return (PF_DROP); 5921 m_copyback(pd->m, pd->off, 5922 sizeof(struct icmp6_hdr), 5923 &pd->hdr.icmp6, M_NOWAIT); 5924 if (pf_change_icmp_af(pd->m, ipoff2, 5925 pd, &pd2, &nk->addr[sidx], 5926 &nk->addr[didx], pd->af, nk->af)) 5927 return (PF_DROP); 5928 if (nk->af == AF_INET) 5929 pd->proto = IPPROTO_ICMP; 5930 else 5931 pd->proto = IPPROTO_ICMPV6; 5932 pd->m->m_pkthdr.ph_rtableid = 5933 nk->rdomain; 5934 pd->destchg = 1; 5935 pf_addrcpy(&pd->nsaddr, 5936 &nk->addr[pd2.sidx], nk->af); 5937 pf_addrcpy(&pd->ndaddr, 5938 &nk->addr[pd2.didx], nk->af); 5939 pd->naf = nk->af; 5940 5941 pf_patch_16(pd, 5942 &th->th_sport, nk->port[sidx]); 5943 pf_patch_16(pd, 5944 &th->th_dport, nk->port[didx]); 5945 5946 m_copyback(pd2.m, pd2.off, 8, th, 5947 M_NOWAIT); 5948 return (PF_AFRT); 5949 } 5950 #endif /* INET6 */ 5951 if (PF_ANEQ(pd2.src, 5952 &nk->addr[pd2.sidx], pd2.af) || 5953 nk->port[pd2.sidx] != th->th_sport) 5954 pf_translate_icmp(pd, pd2.src, 5955 &th->th_sport, pd->dst, 5956 &nk->addr[pd2.sidx], 5957 nk->port[pd2.sidx]); 5958 5959 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5960 pd2.af) || pd2.rdomain != nk->rdomain) 5961 pd->destchg = 1; 5962 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5963 5964 if (PF_ANEQ(pd2.dst, 5965 &nk->addr[pd2.didx], pd2.af) || 5966 nk->port[pd2.didx] != th->th_dport) 5967 pf_translate_icmp(pd, pd2.dst, 5968 &th->th_dport, pd->src, 5969 &nk->addr[pd2.didx], 5970 nk->port[pd2.didx]); 5971 copyback = 1; 5972 } 5973 5974 if (copyback) { 5975 switch (pd2.af) { 5976 case AF_INET: 5977 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5978 &pd->hdr.icmp, M_NOWAIT); 5979 m_copyback(pd2.m, ipoff2, sizeof(h2), 5980 &h2, M_NOWAIT); 5981 break; 5982 #ifdef INET6 5983 case AF_INET6: 5984 m_copyback(pd->m, pd->off, 5985 sizeof(struct icmp6_hdr), 5986 &pd->hdr.icmp6, M_NOWAIT); 5987 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5988 &h2_6, M_NOWAIT); 5989 break; 5990 #endif /* INET6 */ 5991 } 5992 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 5993 } 5994 break; 5995 } 5996 case IPPROTO_UDP: { 5997 struct udphdr *uh = &pd2.hdr.udp; 5998 int action; 5999 6000 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 6001 reason, pd2.af)) { 6002 DPFPRINTF(LOG_NOTICE, 6003 "ICMP error message too short (udp)"); 6004 return (PF_DROP); 6005 } 6006 6007 key.af = pd2.af; 6008 key.proto = IPPROTO_UDP; 6009 key.rdomain = pd2.rdomain; 6010 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 6011 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 6012 key.port[pd2.sidx] = uh->uh_sport; 6013 key.port[pd2.didx] = uh->uh_dport; 6014 key.hash = pf_pkt_hash(pd2.af, pd2.proto, 6015 pd2.src, pd2.dst, uh->uh_sport, uh->uh_dport); 6016 6017 action = pf_find_state(&pd2, &key, stp); 6018 if (action != PF_MATCH) 6019 return (action); 6020 6021 /* translate source/destination address, if necessary */ 6022 if ((*stp)->key[PF_SK_WIRE] != 6023 (*stp)->key[PF_SK_STACK]) { 6024 struct pf_state_key *nk; 6025 int afto, sidx, didx; 6026 6027 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6028 nk = (*stp)->key[pd->sidx]; 6029 else 6030 nk = (*stp)->key[pd->didx]; 6031 6032 afto = pd->af != nk->af; 6033 sidx = afto ? pd2.didx : pd2.sidx; 6034 didx = afto ? pd2.sidx : pd2.didx; 6035 6036 #ifdef INET6 6037 if (afto) { 6038 if (pf_translate_icmp_af(pd, nk->af, 6039 &pd->hdr.icmp)) 6040 return (PF_DROP); 6041 m_copyback(pd->m, pd->off, 6042 sizeof(struct icmp6_hdr), 6043 &pd->hdr.icmp6, M_NOWAIT); 6044 if (pf_change_icmp_af(pd->m, ipoff2, 6045 pd, &pd2, &nk->addr[sidx], 6046 &nk->addr[didx], pd->af, nk->af)) 6047 return (PF_DROP); 6048 if (nk->af == AF_INET) 6049 pd->proto = IPPROTO_ICMP; 6050 else 6051 pd->proto = IPPROTO_ICMPV6; 6052 pd->m->m_pkthdr.ph_rtableid = 6053 nk->rdomain; 6054 pd->destchg = 1; 6055 pf_addrcpy(&pd->nsaddr, 6056 &nk->addr[pd2.sidx], nk->af); 6057 pf_addrcpy(&pd->ndaddr, 6058 &nk->addr[pd2.didx], nk->af); 6059 pd->naf = nk->af; 6060 6061 pf_patch_16(pd, 6062 &uh->uh_sport, nk->port[sidx]); 6063 pf_patch_16(pd, 6064 &uh->uh_dport, nk->port[didx]); 6065 6066 m_copyback(pd2.m, pd2.off, sizeof(*uh), 6067 uh, M_NOWAIT); 6068 return (PF_AFRT); 6069 } 6070 #endif /* INET6 */ 6071 6072 if (PF_ANEQ(pd2.src, 6073 &nk->addr[pd2.sidx], pd2.af) || 6074 nk->port[pd2.sidx] != uh->uh_sport) 6075 pf_translate_icmp(pd, pd2.src, 6076 &uh->uh_sport, pd->dst, 6077 &nk->addr[pd2.sidx], 6078 nk->port[pd2.sidx]); 6079 6080 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6081 pd2.af) || pd2.rdomain != nk->rdomain) 6082 pd->destchg = 1; 6083 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6084 6085 if (PF_ANEQ(pd2.dst, 6086 &nk->addr[pd2.didx], pd2.af) || 6087 nk->port[pd2.didx] != uh->uh_dport) 6088 pf_translate_icmp(pd, pd2.dst, 6089 &uh->uh_dport, pd->src, 6090 &nk->addr[pd2.didx], 6091 nk->port[pd2.didx]); 6092 6093 switch (pd2.af) { 6094 case AF_INET: 6095 m_copyback(pd->m, pd->off, ICMP_MINLEN, 6096 &pd->hdr.icmp, M_NOWAIT); 6097 m_copyback(pd2.m, ipoff2, sizeof(h2), 6098 &h2, M_NOWAIT); 6099 break; 6100 #ifdef INET6 6101 case AF_INET6: 6102 m_copyback(pd->m, pd->off, 6103 sizeof(struct icmp6_hdr), 6104 &pd->hdr.icmp6, M_NOWAIT); 6105 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 6106 &h2_6, M_NOWAIT); 6107 break; 6108 #endif /* INET6 */ 6109 } 6110 /* Avoid recomputing quoted UDP checksum. 6111 * note: udp6 0 csum invalid per rfc2460 p27. 6112 * but presumed nothing cares in this context */ 6113 pf_patch_16(pd, &uh->uh_sum, 0); 6114 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 6115 M_NOWAIT); 6116 copyback = 1; 6117 } 6118 break; 6119 } 6120 case IPPROTO_ICMP: { 6121 struct icmp *iih = &pd2.hdr.icmp; 6122 6123 if (pd2.af != AF_INET) { 6124 REASON_SET(reason, PFRES_NORM); 6125 return (PF_DROP); 6126 } 6127 6128 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 6129 reason, pd2.af)) { 6130 DPFPRINTF(LOG_NOTICE, 6131 "ICMP error message too short (icmp)"); 6132 return (PF_DROP); 6133 } 6134 6135 pf_icmp_mapping(&pd2, iih->icmp_type, 6136 &icmp_dir, &virtual_id, &virtual_type); 6137 6138 ret = pf_icmp_state_lookup(&pd2, &key, stp, 6139 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 6140 if (ret >= 0) 6141 return (ret); 6142 6143 /* translate source/destination address, if necessary */ 6144 if ((*stp)->key[PF_SK_WIRE] != 6145 (*stp)->key[PF_SK_STACK]) { 6146 struct pf_state_key *nk; 6147 int afto, sidx, didx; 6148 6149 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6150 nk = (*stp)->key[pd->sidx]; 6151 else 6152 nk = (*stp)->key[pd->didx]; 6153 6154 afto = pd->af != nk->af; 6155 sidx = afto ? pd2.didx : pd2.sidx; 6156 didx = afto ? pd2.sidx : pd2.didx; 6157 iidx = afto ? !iidx : iidx; 6158 6159 #ifdef INET6 6160 if (afto) { 6161 if (nk->af != AF_INET6) 6162 return (PF_DROP); 6163 if (pf_translate_icmp_af(pd, nk->af, 6164 &pd->hdr.icmp)) 6165 return (PF_DROP); 6166 m_copyback(pd->m, pd->off, 6167 sizeof(struct icmp6_hdr), 6168 &pd->hdr.icmp6, M_NOWAIT); 6169 if (pf_change_icmp_af(pd->m, ipoff2, 6170 pd, &pd2, &nk->addr[sidx], 6171 &nk->addr[didx], pd->af, nk->af)) 6172 return (PF_DROP); 6173 pd->proto = IPPROTO_ICMPV6; 6174 if (pf_translate_icmp_af(pd, 6175 nk->af, iih)) 6176 return (PF_DROP); 6177 if (virtual_type == htons(ICMP_ECHO)) 6178 pf_patch_16(pd, &iih->icmp_id, 6179 nk->port[iidx]); 6180 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 6181 iih, M_NOWAIT); 6182 pd->m->m_pkthdr.ph_rtableid = 6183 nk->rdomain; 6184 pd->destchg = 1; 6185 pf_addrcpy(&pd->nsaddr, 6186 &nk->addr[pd2.sidx], nk->af); 6187 pf_addrcpy(&pd->ndaddr, 6188 &nk->addr[pd2.didx], nk->af); 6189 pd->naf = nk->af; 6190 return (PF_AFRT); 6191 } 6192 #endif /* INET6 */ 6193 6194 if (PF_ANEQ(pd2.src, 6195 &nk->addr[pd2.sidx], pd2.af) || 6196 (virtual_type == htons(ICMP_ECHO) && 6197 nk->port[iidx] != iih->icmp_id)) 6198 pf_translate_icmp(pd, pd2.src, 6199 (virtual_type == htons(ICMP_ECHO)) ? 6200 &iih->icmp_id : NULL, 6201 pd->dst, &nk->addr[pd2.sidx], 6202 (virtual_type == htons(ICMP_ECHO)) ? 6203 nk->port[iidx] : 0); 6204 6205 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6206 pd2.af) || pd2.rdomain != nk->rdomain) 6207 pd->destchg = 1; 6208 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6209 6210 if (PF_ANEQ(pd2.dst, 6211 &nk->addr[pd2.didx], pd2.af)) 6212 pf_translate_icmp(pd, pd2.dst, NULL, 6213 pd->src, &nk->addr[pd2.didx], 0); 6214 6215 m_copyback(pd->m, pd->off, ICMP_MINLEN, 6216 &pd->hdr.icmp, M_NOWAIT); 6217 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 6218 M_NOWAIT); 6219 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 6220 M_NOWAIT); 6221 copyback = 1; 6222 } 6223 break; 6224 } 6225 #ifdef INET6 6226 case IPPROTO_ICMPV6: { 6227 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 6228 6229 if (pd2.af != AF_INET6) { 6230 REASON_SET(reason, PFRES_NORM); 6231 return (PF_DROP); 6232 } 6233 6234 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 6235 sizeof(struct icmp6_hdr), reason, pd2.af)) { 6236 DPFPRINTF(LOG_NOTICE, 6237 "ICMP error message too short (icmp6)"); 6238 return (PF_DROP); 6239 } 6240 6241 pf_icmp_mapping(&pd2, iih->icmp6_type, 6242 &icmp_dir, &virtual_id, &virtual_type); 6243 ret = pf_icmp_state_lookup(&pd2, &key, stp, 6244 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 6245 /* IPv6? try matching a multicast address */ 6246 if (ret == PF_DROP && pd2.af == AF_INET6 && 6247 icmp_dir == PF_OUT) 6248 ret = pf_icmp_state_lookup(&pd2, &key, stp, 6249 virtual_id, virtual_type, icmp_dir, &iidx, 6250 1, 1); 6251 if (ret >= 0) 6252 return (ret); 6253 6254 /* translate source/destination address, if necessary */ 6255 if ((*stp)->key[PF_SK_WIRE] != 6256 (*stp)->key[PF_SK_STACK]) { 6257 struct pf_state_key *nk; 6258 int afto, sidx, didx; 6259 6260 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6261 nk = (*stp)->key[pd->sidx]; 6262 else 6263 nk = (*stp)->key[pd->didx]; 6264 6265 afto = pd->af != nk->af; 6266 sidx = afto ? pd2.didx : pd2.sidx; 6267 didx = afto ? pd2.sidx : pd2.didx; 6268 iidx = afto ? !iidx : iidx; 6269 6270 if (afto) { 6271 if (nk->af != AF_INET) 6272 return (PF_DROP); 6273 if (pf_translate_icmp_af(pd, nk->af, 6274 &pd->hdr.icmp)) 6275 return (PF_DROP); 6276 m_copyback(pd->m, pd->off, 6277 sizeof(struct icmp6_hdr), 6278 &pd->hdr.icmp6, M_NOWAIT); 6279 if (pf_change_icmp_af(pd->m, ipoff2, 6280 pd, &pd2, &nk->addr[sidx], 6281 &nk->addr[didx], pd->af, nk->af)) 6282 return (PF_DROP); 6283 pd->proto = IPPROTO_ICMP; 6284 if (pf_translate_icmp_af(pd, 6285 nk->af, iih)) 6286 return (PF_DROP); 6287 if (virtual_type == 6288 htons(ICMP6_ECHO_REQUEST)) 6289 pf_patch_16(pd, &iih->icmp6_id, 6290 nk->port[iidx]); 6291 m_copyback(pd2.m, pd2.off, 6292 sizeof(struct icmp6_hdr), iih, 6293 M_NOWAIT); 6294 pd->m->m_pkthdr.ph_rtableid = 6295 nk->rdomain; 6296 pd->destchg = 1; 6297 pf_addrcpy(&pd->nsaddr, 6298 &nk->addr[pd2.sidx], nk->af); 6299 pf_addrcpy(&pd->ndaddr, 6300 &nk->addr[pd2.didx], nk->af); 6301 pd->naf = nk->af; 6302 return (PF_AFRT); 6303 } 6304 6305 if (PF_ANEQ(pd2.src, 6306 &nk->addr[pd2.sidx], pd2.af) || 6307 ((virtual_type == 6308 htons(ICMP6_ECHO_REQUEST)) && 6309 nk->port[pd2.sidx] != iih->icmp6_id)) 6310 pf_translate_icmp(pd, pd2.src, 6311 (virtual_type == 6312 htons(ICMP6_ECHO_REQUEST)) 6313 ? &iih->icmp6_id : NULL, 6314 pd->dst, &nk->addr[pd2.sidx], 6315 (virtual_type == 6316 htons(ICMP6_ECHO_REQUEST)) 6317 ? nk->port[iidx] : 0); 6318 6319 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6320 pd2.af) || pd2.rdomain != nk->rdomain) 6321 pd->destchg = 1; 6322 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6323 6324 if (PF_ANEQ(pd2.dst, 6325 &nk->addr[pd2.didx], pd2.af)) 6326 pf_translate_icmp(pd, pd2.dst, NULL, 6327 pd->src, &nk->addr[pd2.didx], 0); 6328 6329 m_copyback(pd->m, pd->off, 6330 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 6331 M_NOWAIT); 6332 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 6333 M_NOWAIT); 6334 m_copyback(pd2.m, pd2.off, 6335 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 6336 copyback = 1; 6337 } 6338 break; 6339 } 6340 #endif /* INET6 */ 6341 default: { 6342 int action; 6343 6344 key.af = pd2.af; 6345 key.proto = pd2.proto; 6346 key.rdomain = pd2.rdomain; 6347 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 6348 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 6349 key.port[0] = key.port[1] = 0; 6350 key.hash = pf_pkt_hash(pd2.af, pd2.proto, 6351 pd2.src, pd2.dst, 0, 0); 6352 6353 action = pf_find_state(&pd2, &key, stp); 6354 if (action != PF_MATCH) 6355 return (action); 6356 6357 /* translate source/destination address, if necessary */ 6358 if ((*stp)->key[PF_SK_WIRE] != 6359 (*stp)->key[PF_SK_STACK]) { 6360 struct pf_state_key *nk = 6361 (*stp)->key[pd->didx]; 6362 6363 if (PF_ANEQ(pd2.src, 6364 &nk->addr[pd2.sidx], pd2.af)) 6365 pf_translate_icmp(pd, pd2.src, NULL, 6366 pd->dst, &nk->addr[pd2.sidx], 0); 6367 6368 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6369 pd2.af) || pd2.rdomain != nk->rdomain) 6370 pd->destchg = 1; 6371 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6372 6373 if (PF_ANEQ(pd2.dst, 6374 &nk->addr[pd2.didx], pd2.af)) 6375 pf_translate_icmp(pd, pd2.dst, NULL, 6376 pd->src, &nk->addr[pd2.didx], 0); 6377 6378 switch (pd2.af) { 6379 case AF_INET: 6380 m_copyback(pd->m, pd->off, ICMP_MINLEN, 6381 &pd->hdr.icmp, M_NOWAIT); 6382 m_copyback(pd2.m, ipoff2, sizeof(h2), 6383 &h2, M_NOWAIT); 6384 break; 6385 #ifdef INET6 6386 case AF_INET6: 6387 m_copyback(pd->m, pd->off, 6388 sizeof(struct icmp6_hdr), 6389 &pd->hdr.icmp6, M_NOWAIT); 6390 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 6391 &h2_6, M_NOWAIT); 6392 break; 6393 #endif /* INET6 */ 6394 } 6395 copyback = 1; 6396 } 6397 break; 6398 } 6399 } 6400 } 6401 if (copyback) { 6402 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 6403 } 6404 6405 return (PF_PASS); 6406 } 6407 6408 /* 6409 * ipoff and off are measured from the start of the mbuf chain. 6410 * h must be at "ipoff" on the mbuf chain. 6411 */ 6412 void * 6413 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 6414 u_short *reasonp, sa_family_t af) 6415 { 6416 int iplen = 0; 6417 6418 switch (af) { 6419 case AF_INET: { 6420 struct ip *h = mtod(m, struct ip *); 6421 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 6422 6423 if (fragoff) { 6424 REASON_SET(reasonp, PFRES_FRAG); 6425 return (NULL); 6426 } 6427 iplen = ntohs(h->ip_len); 6428 break; 6429 } 6430 #ifdef INET6 6431 case AF_INET6: { 6432 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 6433 6434 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6435 break; 6436 } 6437 #endif /* INET6 */ 6438 } 6439 if (m->m_pkthdr.len < off + len || iplen < off + len) { 6440 REASON_SET(reasonp, PFRES_SHORT); 6441 return (NULL); 6442 } 6443 m_copydata(m, off, len, p); 6444 return (p); 6445 } 6446 6447 int 6448 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 6449 int rtableid) 6450 { 6451 struct sockaddr_storage ss; 6452 struct sockaddr_in *dst; 6453 int ret = 1; 6454 int check_mpath; 6455 #ifdef INET6 6456 struct sockaddr_in6 *dst6; 6457 #endif /* INET6 */ 6458 struct rtentry *rt = NULL; 6459 6460 check_mpath = 0; 6461 memset(&ss, 0, sizeof(ss)); 6462 switch (af) { 6463 case AF_INET: 6464 dst = (struct sockaddr_in *)&ss; 6465 dst->sin_family = AF_INET; 6466 dst->sin_len = sizeof(*dst); 6467 dst->sin_addr = addr->v4; 6468 if (ipmultipath) 6469 check_mpath = 1; 6470 break; 6471 #ifdef INET6 6472 case AF_INET6: 6473 /* 6474 * Skip check for addresses with embedded interface scope, 6475 * as they would always match anyway. 6476 */ 6477 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 6478 goto out; 6479 dst6 = (struct sockaddr_in6 *)&ss; 6480 dst6->sin6_family = AF_INET6; 6481 dst6->sin6_len = sizeof(*dst6); 6482 dst6->sin6_addr = addr->v6; 6483 if (ip6_multipath) 6484 check_mpath = 1; 6485 break; 6486 #endif /* INET6 */ 6487 } 6488 6489 /* Skip checks for ipsec interfaces */ 6490 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 6491 goto out; 6492 6493 rt = rtalloc(sstosa(&ss), 0, rtableid); 6494 if (rt != NULL) { 6495 /* No interface given, this is a no-route check */ 6496 if (kif == NULL) 6497 goto out; 6498 6499 if (kif->pfik_ifp == NULL) { 6500 ret = 0; 6501 goto out; 6502 } 6503 6504 /* Perform uRPF check if passed input interface */ 6505 ret = 0; 6506 do { 6507 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 6508 ret = 1; 6509 #if NCARP > 0 6510 } else { 6511 struct ifnet *ifp; 6512 6513 ifp = if_get(rt->rt_ifidx); 6514 if (ifp != NULL && ifp->if_type == IFT_CARP && 6515 ifp->if_carpdevidx == 6516 kif->pfik_ifp->if_index) 6517 ret = 1; 6518 if_put(ifp); 6519 #endif /* NCARP */ 6520 } 6521 6522 rt = rtable_iterate(rt); 6523 } while (check_mpath == 1 && rt != NULL && ret == 0); 6524 } else 6525 ret = 0; 6526 out: 6527 rtfree(rt); 6528 return (ret); 6529 } 6530 6531 int 6532 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 6533 int rtableid) 6534 { 6535 struct sockaddr_storage ss; 6536 struct sockaddr_in *dst; 6537 #ifdef INET6 6538 struct sockaddr_in6 *dst6; 6539 #endif /* INET6 */ 6540 struct rtentry *rt; 6541 int ret = 0; 6542 6543 memset(&ss, 0, sizeof(ss)); 6544 switch (af) { 6545 case AF_INET: 6546 dst = (struct sockaddr_in *)&ss; 6547 dst->sin_family = AF_INET; 6548 dst->sin_len = sizeof(*dst); 6549 dst->sin_addr = addr->v4; 6550 break; 6551 #ifdef INET6 6552 case AF_INET6: 6553 dst6 = (struct sockaddr_in6 *)&ss; 6554 dst6->sin6_family = AF_INET6; 6555 dst6->sin6_len = sizeof(*dst6); 6556 dst6->sin6_addr = addr->v6; 6557 break; 6558 #endif /* INET6 */ 6559 } 6560 6561 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 6562 if (rt != NULL) { 6563 if (rt->rt_labelid == aw->v.rtlabel) 6564 ret = 1; 6565 rtfree(rt); 6566 } 6567 6568 return (ret); 6569 } 6570 6571 /* pf_route() may change pd->m, adjust local copies after calling */ 6572 void 6573 pf_route(struct pf_pdesc *pd, struct pf_state *st) 6574 { 6575 struct mbuf *m0; 6576 struct mbuf_list ml; 6577 struct sockaddr_in *dst, sin; 6578 struct rtentry *rt = NULL; 6579 struct ip *ip; 6580 struct ifnet *ifp = NULL; 6581 unsigned int rtableid; 6582 6583 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6584 m_freem(pd->m); 6585 pd->m = NULL; 6586 return; 6587 } 6588 6589 if (st->rt == PF_DUPTO) { 6590 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6591 return; 6592 } else { 6593 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir)) 6594 return; 6595 m0 = pd->m; 6596 pd->m = NULL; 6597 } 6598 6599 if (m0->m_len < sizeof(struct ip)) { 6600 DPFPRINTF(LOG_ERR, 6601 "%s: m0->m_len < sizeof(struct ip)", __func__); 6602 goto bad; 6603 } 6604 6605 ip = mtod(m0, struct ip *); 6606 6607 if (pd->dir == PF_IN) { 6608 if (ip->ip_ttl <= IPTTLDEC) { 6609 if (st->rt != PF_DUPTO) { 6610 pf_send_icmp(m0, ICMP_TIMXCEED, 6611 ICMP_TIMXCEED_INTRANS, 0, 6612 pd->af, st->rule.ptr, pd->rdomain); 6613 } 6614 goto bad; 6615 } 6616 ip->ip_ttl -= IPTTLDEC; 6617 } 6618 6619 memset(&sin, 0, sizeof(sin)); 6620 dst = &sin; 6621 dst->sin_family = AF_INET; 6622 dst->sin_len = sizeof(*dst); 6623 dst->sin_addr = st->rt_addr.v4; 6624 rtableid = m0->m_pkthdr.ph_rtableid; 6625 6626 rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid); 6627 if (!rtisvalid(rt)) { 6628 if (st->rt != PF_DUPTO) { 6629 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST, 6630 0, pd->af, st->rule.ptr, pd->rdomain); 6631 } 6632 ipstat_inc(ips_noroute); 6633 goto bad; 6634 } 6635 6636 ifp = if_get(rt->rt_ifidx); 6637 if (ifp == NULL) 6638 goto bad; 6639 6640 /* A locally generated packet may have invalid source address. */ 6641 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 6642 (ifp->if_flags & IFF_LOOPBACK) == 0) 6643 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 6644 6645 if (st->rt != PF_DUPTO && pd->dir == PF_IN) { 6646 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 6647 goto bad; 6648 else if (m0 == NULL) 6649 goto done; 6650 if (m0->m_len < sizeof(struct ip)) { 6651 DPFPRINTF(LOG_ERR, 6652 "%s: m0->m_len < sizeof(struct ip)", __func__); 6653 goto bad; 6654 } 6655 ip = mtod(m0, struct ip *); 6656 } 6657 6658 if (if_output_tso(ifp, &m0, sintosa(dst), rt, ifp->if_mtu) || 6659 m0 == NULL) 6660 goto done; 6661 6662 /* 6663 * Too large for interface; fragment if possible. 6664 * Must be able to put at least 8 bytes per fragment. 6665 */ 6666 if (ip->ip_off & htons(IP_DF)) { 6667 ipstat_inc(ips_cantfrag); 6668 if (st->rt != PF_DUPTO) 6669 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 6670 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); 6671 goto bad; 6672 } 6673 6674 if (ip_fragment(m0, &ml, ifp, ifp->if_mtu) || 6675 if_output_ml(ifp, &ml, sintosa(dst), rt)) 6676 goto done; 6677 ipstat_inc(ips_fragmented); 6678 6679 done: 6680 if_put(ifp); 6681 rtfree(rt); 6682 return; 6683 6684 bad: 6685 m_freem(m0); 6686 goto done; 6687 } 6688 6689 #ifdef INET6 6690 /* pf_route6() may change pd->m, adjust local copies after calling */ 6691 void 6692 pf_route6(struct pf_pdesc *pd, struct pf_state *st) 6693 { 6694 struct mbuf *m0; 6695 struct sockaddr_in6 *dst, sin6; 6696 struct rtentry *rt = NULL; 6697 struct ip6_hdr *ip6; 6698 struct ifnet *ifp = NULL; 6699 struct m_tag *mtag; 6700 unsigned int rtableid; 6701 6702 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6703 m_freem(pd->m); 6704 pd->m = NULL; 6705 return; 6706 } 6707 6708 if (st->rt == PF_DUPTO) { 6709 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6710 return; 6711 } else { 6712 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir)) 6713 return; 6714 m0 = pd->m; 6715 pd->m = NULL; 6716 } 6717 6718 if (m0->m_len < sizeof(struct ip6_hdr)) { 6719 DPFPRINTF(LOG_ERR, 6720 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6721 goto bad; 6722 } 6723 ip6 = mtod(m0, struct ip6_hdr *); 6724 6725 if (pd->dir == PF_IN) { 6726 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 6727 if (st->rt != PF_DUPTO) { 6728 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 6729 ICMP6_TIME_EXCEED_TRANSIT, 0, 6730 pd->af, st->rule.ptr, pd->rdomain); 6731 } 6732 goto bad; 6733 } 6734 ip6->ip6_hlim -= IPV6_HLIMDEC; 6735 } 6736 6737 memset(&sin6, 0, sizeof(sin6)); 6738 dst = &sin6; 6739 dst->sin6_family = AF_INET6; 6740 dst->sin6_len = sizeof(*dst); 6741 dst->sin6_addr = st->rt_addr.v6; 6742 rtableid = m0->m_pkthdr.ph_rtableid; 6743 6744 rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0], 6745 rtableid); 6746 if (!rtisvalid(rt)) { 6747 if (st->rt != PF_DUPTO) { 6748 pf_send_icmp(m0, ICMP6_DST_UNREACH, 6749 ICMP6_DST_UNREACH_NOROUTE, 0, 6750 pd->af, st->rule.ptr, pd->rdomain); 6751 } 6752 ip6stat_inc(ip6s_noroute); 6753 goto bad; 6754 } 6755 6756 ifp = if_get(rt->rt_ifidx); 6757 if (ifp == NULL) 6758 goto bad; 6759 6760 /* A locally generated packet may have invalid source address. */ 6761 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 6762 (ifp->if_flags & IFF_LOOPBACK) == 0) 6763 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 6764 6765 if (st->rt != PF_DUPTO && pd->dir == PF_IN) { 6766 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6767 goto bad; 6768 else if (m0 == NULL) 6769 goto done; 6770 if (m0->m_len < sizeof(struct ip6_hdr)) { 6771 DPFPRINTF(LOG_ERR, 6772 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6773 goto bad; 6774 } 6775 } 6776 6777 /* 6778 * If packet has been reassembled by PF earlier, we have to 6779 * use pf_refragment6() here to turn it back to fragments. 6780 */ 6781 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6782 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 6783 goto done; 6784 } 6785 6786 if (if_output_tso(ifp, &m0, sin6tosa(dst), rt, ifp->if_mtu) || 6787 m0 == NULL) 6788 goto done; 6789 6790 ip6stat_inc(ip6s_cantfrag); 6791 if (st->rt != PF_DUPTO) 6792 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 6793 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); 6794 goto bad; 6795 6796 done: 6797 if_put(ifp); 6798 rtfree(rt); 6799 return; 6800 6801 bad: 6802 m_freem(m0); 6803 goto done; 6804 } 6805 #endif /* INET6 */ 6806 6807 /* 6808 * check TCP checksum and set mbuf flag 6809 * off is the offset where the protocol header starts 6810 * len is the total length of protocol header plus payload 6811 * returns 0 when the checksum is valid, otherwise returns 1. 6812 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6813 */ 6814 int 6815 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6816 { 6817 u_int16_t sum; 6818 6819 if (m->m_pkthdr.csum_flags & 6820 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6821 return (0); 6822 } 6823 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6824 off < sizeof(struct ip) || 6825 m->m_pkthdr.len < off + len) { 6826 return (1); 6827 } 6828 6829 /* need to do it in software */ 6830 tcpstat_inc(tcps_inswcsum); 6831 6832 switch (af) { 6833 case AF_INET: 6834 if (m->m_len < sizeof(struct ip)) 6835 return (1); 6836 6837 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6838 break; 6839 #ifdef INET6 6840 case AF_INET6: 6841 if (m->m_len < sizeof(struct ip6_hdr)) 6842 return (1); 6843 6844 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6845 break; 6846 #endif /* INET6 */ 6847 default: 6848 unhandled_af(af); 6849 } 6850 if (sum) { 6851 tcpstat_inc(tcps_rcvbadsum); 6852 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6853 return (1); 6854 } 6855 6856 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6857 return (0); 6858 } 6859 6860 struct pf_divert * 6861 pf_find_divert(struct mbuf *m) 6862 { 6863 struct m_tag *mtag; 6864 6865 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6866 return (NULL); 6867 6868 return ((struct pf_divert *)(mtag + 1)); 6869 } 6870 6871 struct pf_divert * 6872 pf_get_divert(struct mbuf *m) 6873 { 6874 struct m_tag *mtag; 6875 6876 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6877 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6878 M_NOWAIT); 6879 if (mtag == NULL) 6880 return (NULL); 6881 memset(mtag + 1, 0, sizeof(struct pf_divert)); 6882 m_tag_prepend(m, mtag); 6883 } 6884 6885 return ((struct pf_divert *)(mtag + 1)); 6886 } 6887 6888 int 6889 pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end, 6890 u_short *reason) 6891 { 6892 uint8_t type, length, opts[15 * 4 - sizeof(struct ip)]; 6893 6894 /* IP header in payload of ICMP packet may be too short */ 6895 if (pd->m->m_pkthdr.len < end) { 6896 DPFPRINTF(LOG_NOTICE, "IP option too short"); 6897 REASON_SET(reason, PFRES_SHORT); 6898 return (PF_DROP); 6899 } 6900 6901 KASSERT(end - off <= sizeof(opts)); 6902 m_copydata(pd->m, off, end - off, opts); 6903 end -= off; 6904 off = 0; 6905 6906 while (off < end) { 6907 type = opts[off]; 6908 if (type == IPOPT_EOL) 6909 break; 6910 if (type == IPOPT_NOP) { 6911 off++; 6912 continue; 6913 } 6914 if (off + 2 > end) { 6915 DPFPRINTF(LOG_NOTICE, "IP length opt"); 6916 REASON_SET(reason, PFRES_IPOPTIONS); 6917 return (PF_DROP); 6918 } 6919 length = opts[off + 1]; 6920 if (length < 2) { 6921 DPFPRINTF(LOG_NOTICE, "IP short opt"); 6922 REASON_SET(reason, PFRES_IPOPTIONS); 6923 return (PF_DROP); 6924 } 6925 if (off + length > end) { 6926 DPFPRINTF(LOG_NOTICE, "IP long opt"); 6927 REASON_SET(reason, PFRES_IPOPTIONS); 6928 return (PF_DROP); 6929 } 6930 switch (type) { 6931 case IPOPT_RA: 6932 SET(pd->badopts, PF_OPT_ROUTER_ALERT); 6933 break; 6934 default: 6935 SET(pd->badopts, PF_OPT_OTHER); 6936 break; 6937 } 6938 off += length; 6939 } 6940 6941 return (PF_PASS); 6942 } 6943 6944 int 6945 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 6946 { 6947 struct ip6_ext ext; 6948 u_int32_t hlen, end; 6949 int hdr_cnt; 6950 6951 hlen = h->ip_hl << 2; 6952 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 6953 REASON_SET(reason, PFRES_SHORT); 6954 return (PF_DROP); 6955 } 6956 if (hlen != sizeof(struct ip)) { 6957 if (pf_walk_option(pd, h, pd->off + sizeof(struct ip), 6958 pd->off + hlen, reason) != PF_PASS) 6959 return (PF_DROP); 6960 /* header options which contain only padding is fishy */ 6961 if (pd->badopts == 0) 6962 SET(pd->badopts, PF_OPT_OTHER); 6963 } 6964 end = pd->off + ntohs(h->ip_len); 6965 pd->off += hlen; 6966 pd->proto = h->ip_p; 6967 /* IGMP packets have router alert options, allow them */ 6968 if (pd->proto == IPPROTO_IGMP) { 6969 /* 6970 * According to RFC 1112 ttl must be set to 1 in all IGMP 6971 * packets sent to 224.0.0.1 6972 */ 6973 if ((h->ip_ttl != 1) && 6974 (h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) { 6975 DPFPRINTF(LOG_NOTICE, "Invalid IGMP"); 6976 REASON_SET(reason, PFRES_IPOPTIONS); 6977 return (PF_DROP); 6978 } 6979 CLR(pd->badopts, PF_OPT_ROUTER_ALERT); 6980 } 6981 /* stop walking over non initial fragments */ 6982 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 6983 return (PF_PASS); 6984 6985 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6986 switch (pd->proto) { 6987 case IPPROTO_AH: 6988 /* fragments may be short */ 6989 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 6990 end < pd->off + sizeof(ext)) 6991 return (PF_PASS); 6992 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6993 reason, AF_INET)) { 6994 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 6995 return (PF_DROP); 6996 } 6997 pd->off += (ext.ip6e_len + 2) * 4; 6998 pd->proto = ext.ip6e_nxt; 6999 break; 7000 default: 7001 return (PF_PASS); 7002 } 7003 } 7004 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 7005 REASON_SET(reason, PFRES_IPOPTIONS); 7006 return (PF_DROP); 7007 } 7008 7009 #ifdef INET6 7010 int 7011 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 7012 u_short *reason) 7013 { 7014 struct ip6_opt opt; 7015 struct ip6_opt_jumbo jumbo; 7016 7017 while (off < end) { 7018 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 7019 sizeof(opt.ip6o_type), reason, AF_INET6)) { 7020 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 7021 return (PF_DROP); 7022 } 7023 if (opt.ip6o_type == IP6OPT_PAD1) { 7024 off++; 7025 continue; 7026 } 7027 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 7028 reason, AF_INET6)) { 7029 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 7030 return (PF_DROP); 7031 } 7032 if (off + sizeof(opt) + opt.ip6o_len > end) { 7033 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 7034 REASON_SET(reason, PFRES_IPOPTIONS); 7035 return (PF_DROP); 7036 } 7037 switch (opt.ip6o_type) { 7038 case IP6OPT_PADN: 7039 break; 7040 case IP6OPT_JUMBO: 7041 SET(pd->badopts, PF_OPT_JUMBO); 7042 if (pd->jumbolen != 0) { 7043 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 7044 REASON_SET(reason, PFRES_IPOPTIONS); 7045 return (PF_DROP); 7046 } 7047 if (ntohs(h->ip6_plen) != 0) { 7048 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 7049 REASON_SET(reason, PFRES_IPOPTIONS); 7050 return (PF_DROP); 7051 } 7052 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 7053 reason, AF_INET6)) { 7054 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 7055 return (PF_DROP); 7056 } 7057 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 7058 sizeof(pd->jumbolen)); 7059 pd->jumbolen = ntohl(pd->jumbolen); 7060 if (pd->jumbolen < IPV6_MAXPACKET) { 7061 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 7062 REASON_SET(reason, PFRES_IPOPTIONS); 7063 return (PF_DROP); 7064 } 7065 break; 7066 case IP6OPT_ROUTER_ALERT: 7067 SET(pd->badopts, PF_OPT_ROUTER_ALERT); 7068 break; 7069 default: 7070 SET(pd->badopts, PF_OPT_OTHER); 7071 break; 7072 } 7073 off += sizeof(opt) + opt.ip6o_len; 7074 } 7075 7076 return (PF_PASS); 7077 } 7078 7079 int 7080 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 7081 { 7082 struct ip6_frag frag; 7083 struct ip6_ext ext; 7084 struct icmp6_hdr icmp6; 7085 struct ip6_rthdr rthdr; 7086 u_int32_t end; 7087 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 7088 7089 pd->off += sizeof(struct ip6_hdr); 7090 end = pd->off + ntohs(h->ip6_plen); 7091 pd->fragoff = pd->extoff = pd->jumbolen = 0; 7092 pd->proto = h->ip6_nxt; 7093 7094 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 7095 switch (pd->proto) { 7096 case IPPROTO_ROUTING: 7097 case IPPROTO_DSTOPTS: 7098 SET(pd->badopts, PF_OPT_OTHER); 7099 break; 7100 case IPPROTO_HOPOPTS: 7101 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 7102 reason, AF_INET6)) { 7103 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 7104 return (PF_DROP); 7105 } 7106 if (pf_walk_option6(pd, h, pd->off + sizeof(ext), 7107 pd->off + (ext.ip6e_len + 1) * 8, reason) 7108 != PF_PASS) 7109 return (PF_DROP); 7110 /* option header which contains only padding is fishy */ 7111 if (pd->badopts == 0) 7112 SET(pd->badopts, PF_OPT_OTHER); 7113 break; 7114 } 7115 switch (pd->proto) { 7116 case IPPROTO_FRAGMENT: 7117 if (fraghdr_cnt++) { 7118 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 7119 REASON_SET(reason, PFRES_FRAG); 7120 return (PF_DROP); 7121 } 7122 /* jumbo payload packets cannot be fragmented */ 7123 if (pd->jumbolen != 0) { 7124 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 7125 REASON_SET(reason, PFRES_FRAG); 7126 return (PF_DROP); 7127 } 7128 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 7129 reason, AF_INET6)) { 7130 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 7131 return (PF_DROP); 7132 } 7133 /* stop walking over non initial fragments */ 7134 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 7135 pd->fragoff = pd->off; 7136 return (PF_PASS); 7137 } 7138 /* RFC6946: reassemble only non atomic fragments */ 7139 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 7140 pd->fragoff = pd->off; 7141 pd->off += sizeof(frag); 7142 pd->proto = frag.ip6f_nxt; 7143 break; 7144 case IPPROTO_ROUTING: 7145 if (rthdr_cnt++) { 7146 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 7147 REASON_SET(reason, PFRES_IPOPTIONS); 7148 return (PF_DROP); 7149 } 7150 /* fragments may be short */ 7151 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 7152 pd->off = pd->fragoff; 7153 pd->proto = IPPROTO_FRAGMENT; 7154 return (PF_PASS); 7155 } 7156 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 7157 reason, AF_INET6)) { 7158 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 7159 return (PF_DROP); 7160 } 7161 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 7162 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 7163 REASON_SET(reason, PFRES_IPOPTIONS); 7164 return (PF_DROP); 7165 } 7166 /* FALLTHROUGH */ 7167 case IPPROTO_HOPOPTS: 7168 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 7169 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 7170 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 7171 REASON_SET(reason, PFRES_IPOPTIONS); 7172 return (PF_DROP); 7173 } 7174 /* FALLTHROUGH */ 7175 case IPPROTO_AH: 7176 case IPPROTO_DSTOPTS: 7177 /* fragments may be short */ 7178 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 7179 pd->off = pd->fragoff; 7180 pd->proto = IPPROTO_FRAGMENT; 7181 return (PF_PASS); 7182 } 7183 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 7184 reason, AF_INET6)) { 7185 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 7186 return (PF_DROP); 7187 } 7188 /* reassembly needs the ext header before the frag */ 7189 if (pd->fragoff == 0) 7190 pd->extoff = pd->off; 7191 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 && 7192 ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) { 7193 DPFPRINTF(LOG_NOTICE, "IPv6 missing jumbo"); 7194 REASON_SET(reason, PFRES_IPOPTIONS); 7195 return (PF_DROP); 7196 } 7197 if (pd->proto == IPPROTO_AH) 7198 pd->off += (ext.ip6e_len + 2) * 4; 7199 else 7200 pd->off += (ext.ip6e_len + 1) * 8; 7201 pd->proto = ext.ip6e_nxt; 7202 break; 7203 case IPPROTO_ICMPV6: 7204 /* fragments may be short, ignore inner header then */ 7205 if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) { 7206 pd->off = pd->fragoff; 7207 pd->proto = IPPROTO_FRAGMENT; 7208 return (PF_PASS); 7209 } 7210 if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6), 7211 reason, AF_INET6)) { 7212 DPFPRINTF(LOG_NOTICE, "IPv6 short icmp6hdr"); 7213 return (PF_DROP); 7214 } 7215 /* ICMP multicast packets have router alert options */ 7216 switch (icmp6.icmp6_type) { 7217 case MLD_LISTENER_QUERY: 7218 case MLD_LISTENER_REPORT: 7219 case MLD_LISTENER_DONE: 7220 case MLDV2_LISTENER_REPORT: 7221 /* 7222 * According to RFC 2710 all MLD messages are 7223 * sent with hop-limit (ttl) set to 1, and link 7224 * local source address. If either one is 7225 * missing then MLD message is invalid and 7226 * should be discarded. 7227 */ 7228 if ((h->ip6_hlim != 1) || 7229 !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) { 7230 DPFPRINTF(LOG_NOTICE, "Invalid MLD"); 7231 REASON_SET(reason, PFRES_IPOPTIONS); 7232 return (PF_DROP); 7233 } 7234 CLR(pd->badopts, PF_OPT_ROUTER_ALERT); 7235 break; 7236 } 7237 return (PF_PASS); 7238 case IPPROTO_TCP: 7239 case IPPROTO_UDP: 7240 /* fragments may be short, ignore inner header then */ 7241 if (pd->fragoff != 0 && end < pd->off + 7242 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 7243 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 7244 sizeof(struct icmp6_hdr))) { 7245 pd->off = pd->fragoff; 7246 pd->proto = IPPROTO_FRAGMENT; 7247 } 7248 /* FALLTHROUGH */ 7249 default: 7250 return (PF_PASS); 7251 } 7252 } 7253 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 7254 REASON_SET(reason, PFRES_IPOPTIONS); 7255 return (PF_DROP); 7256 } 7257 #endif /* INET6 */ 7258 7259 u_int16_t 7260 pf_pkt_hash(sa_family_t af, uint8_t proto, 7261 const struct pf_addr *src, const struct pf_addr *dst, 7262 uint16_t sport, uint16_t dport) 7263 { 7264 uint32_t hash; 7265 7266 hash = src->addr32[0] ^ dst->addr32[0]; 7267 #ifdef INET6 7268 if (af == AF_INET6) { 7269 hash ^= src->addr32[1] ^ dst->addr32[1]; 7270 hash ^= src->addr32[2] ^ dst->addr32[2]; 7271 hash ^= src->addr32[3] ^ dst->addr32[3]; 7272 } 7273 #endif 7274 7275 switch (proto) { 7276 case IPPROTO_TCP: 7277 case IPPROTO_UDP: 7278 hash ^= sport ^ dport; 7279 break; 7280 } 7281 7282 return stoeplitz_n32(hash); 7283 } 7284 7285 int 7286 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 7287 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 7288 { 7289 memset(pd, 0, sizeof(*pd)); 7290 pd->dir = dir; 7291 pd->kif = kif; /* kif is NULL when called by pflog */ 7292 pd->m = m; 7293 pd->sidx = (dir == PF_IN) ? 0 : 1; 7294 pd->didx = (dir == PF_IN) ? 1 : 0; 7295 pd->af = pd->naf = af; 7296 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 7297 7298 switch (pd->af) { 7299 case AF_INET: { 7300 struct ip *h; 7301 7302 /* Check for illegal packets */ 7303 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 7304 REASON_SET(reason, PFRES_SHORT); 7305 return (PF_DROP); 7306 } 7307 7308 h = mtod(pd->m, struct ip *); 7309 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 7310 REASON_SET(reason, PFRES_SHORT); 7311 return (PF_DROP); 7312 } 7313 7314 if (pf_walk_header(pd, h, reason) != PF_PASS) 7315 return (PF_DROP); 7316 7317 pd->src = (struct pf_addr *)&h->ip_src; 7318 pd->dst = (struct pf_addr *)&h->ip_dst; 7319 pd->tot_len = ntohs(h->ip_len); 7320 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 7321 pd->ttl = h->ip_ttl; 7322 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 7323 PF_VPROTO_FRAGMENT : pd->proto; 7324 7325 break; 7326 } 7327 #ifdef INET6 7328 case AF_INET6: { 7329 struct ip6_hdr *h; 7330 7331 /* Check for illegal packets */ 7332 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 7333 REASON_SET(reason, PFRES_SHORT); 7334 return (PF_DROP); 7335 } 7336 7337 h = mtod(pd->m, struct ip6_hdr *); 7338 if (pd->m->m_pkthdr.len < 7339 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 7340 REASON_SET(reason, PFRES_SHORT); 7341 return (PF_DROP); 7342 } 7343 7344 if (pf_walk_header6(pd, h, reason) != PF_PASS) 7345 return (PF_DROP); 7346 7347 #if 1 7348 /* 7349 * we do not support jumbogram yet. if we keep going, zero 7350 * ip6_plen will do something bad, so drop the packet for now. 7351 */ 7352 if (pd->jumbolen != 0) { 7353 REASON_SET(reason, PFRES_NORM); 7354 return (PF_DROP); 7355 } 7356 #endif /* 1 */ 7357 7358 pd->src = (struct pf_addr *)&h->ip6_src; 7359 pd->dst = (struct pf_addr *)&h->ip6_dst; 7360 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 7361 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 7362 pd->ttl = h->ip6_hlim; 7363 pd->virtual_proto = (pd->fragoff != 0) ? 7364 PF_VPROTO_FRAGMENT : pd->proto; 7365 7366 break; 7367 } 7368 #endif /* INET6 */ 7369 default: 7370 panic("pf_setup_pdesc called with illegal af %u", pd->af); 7371 7372 } 7373 7374 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 7375 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 7376 7377 switch (pd->virtual_proto) { 7378 case IPPROTO_TCP: { 7379 struct tcphdr *th = &pd->hdr.tcp; 7380 7381 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 7382 reason, pd->af)) 7383 return (PF_DROP); 7384 pd->hdrlen = sizeof(*th); 7385 if (th->th_dport == 0 || 7386 pd->off + (th->th_off << 2) > pd->tot_len || 7387 (th->th_off << 2) < sizeof(struct tcphdr)) { 7388 REASON_SET(reason, PFRES_SHORT); 7389 return (PF_DROP); 7390 } 7391 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 7392 pd->sport = &th->th_sport; 7393 pd->dport = &th->th_dport; 7394 pd->pcksum = &th->th_sum; 7395 break; 7396 } 7397 case IPPROTO_UDP: { 7398 struct udphdr *uh = &pd->hdr.udp; 7399 7400 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 7401 reason, pd->af)) 7402 return (PF_DROP); 7403 pd->hdrlen = sizeof(*uh); 7404 if (uh->uh_dport == 0 || 7405 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 7406 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 7407 REASON_SET(reason, PFRES_SHORT); 7408 return (PF_DROP); 7409 } 7410 pd->sport = &uh->uh_sport; 7411 pd->dport = &uh->uh_dport; 7412 pd->pcksum = &uh->uh_sum; 7413 break; 7414 } 7415 case IPPROTO_ICMP: { 7416 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 7417 reason, pd->af)) 7418 return (PF_DROP); 7419 pd->hdrlen = ICMP_MINLEN; 7420 if (pd->off + pd->hdrlen > pd->tot_len) { 7421 REASON_SET(reason, PFRES_SHORT); 7422 return (PF_DROP); 7423 } 7424 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 7425 break; 7426 } 7427 #ifdef INET6 7428 case IPPROTO_ICMPV6: { 7429 size_t icmp_hlen = sizeof(struct icmp6_hdr); 7430 7431 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 7432 reason, pd->af)) 7433 return (PF_DROP); 7434 /* ICMP headers we look further into to match state */ 7435 switch (pd->hdr.icmp6.icmp6_type) { 7436 case MLD_LISTENER_QUERY: 7437 case MLD_LISTENER_REPORT: 7438 icmp_hlen = sizeof(struct mld_hdr); 7439 break; 7440 case ND_NEIGHBOR_SOLICIT: 7441 case ND_NEIGHBOR_ADVERT: 7442 icmp_hlen = sizeof(struct nd_neighbor_solicit); 7443 /* FALLTHROUGH */ 7444 case ND_ROUTER_SOLICIT: 7445 case ND_ROUTER_ADVERT: 7446 case ND_REDIRECT: 7447 if (pd->ttl != 255) { 7448 REASON_SET(reason, PFRES_NORM); 7449 return (PF_DROP); 7450 } 7451 break; 7452 } 7453 if (icmp_hlen > sizeof(struct icmp6_hdr) && 7454 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 7455 reason, pd->af)) 7456 return (PF_DROP); 7457 pd->hdrlen = icmp_hlen; 7458 if (pd->off + pd->hdrlen > pd->tot_len) { 7459 REASON_SET(reason, PFRES_SHORT); 7460 return (PF_DROP); 7461 } 7462 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 7463 break; 7464 } 7465 #endif /* INET6 */ 7466 } 7467 7468 if (pd->sport) 7469 pd->osport = pd->nsport = *pd->sport; 7470 if (pd->dport) 7471 pd->odport = pd->ndport = *pd->dport; 7472 7473 pd->hash = pf_pkt_hash(pd->af, pd->proto, 7474 pd->src, pd->dst, pd->osport, pd->odport); 7475 7476 return (PF_PASS); 7477 } 7478 7479 void 7480 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *st, 7481 struct pf_rule *r, struct pf_rule *a) 7482 { 7483 int dirndx; 7484 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 7485 [action != PF_PASS] += pd->tot_len; 7486 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 7487 [action != PF_PASS]++; 7488 7489 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 7490 dirndx = (pd->dir == PF_OUT); 7491 r->packets[dirndx]++; 7492 r->bytes[dirndx] += pd->tot_len; 7493 if (a != NULL) { 7494 a->packets[dirndx]++; 7495 a->bytes[dirndx] += pd->tot_len; 7496 } 7497 if (st != NULL) { 7498 struct pf_rule_item *ri; 7499 struct pf_sn_item *sni; 7500 7501 SLIST_FOREACH(sni, &st->src_nodes, next) { 7502 sni->sn->packets[dirndx]++; 7503 sni->sn->bytes[dirndx] += pd->tot_len; 7504 } 7505 dirndx = (pd->dir == st->direction) ? 0 : 1; 7506 st->packets[dirndx]++; 7507 st->bytes[dirndx] += pd->tot_len; 7508 7509 SLIST_FOREACH(ri, &st->match_rules, entry) { 7510 ri->r->packets[dirndx]++; 7511 ri->r->bytes[dirndx] += pd->tot_len; 7512 7513 if (ri->r->src.addr.type == PF_ADDR_TABLE) 7514 pfr_update_stats(ri->r->src.addr.p.tbl, 7515 &st->key[(st->direction == PF_IN)]-> 7516 addr[(st->direction == PF_OUT)], 7517 pd, ri->r->action, ri->r->src.neg); 7518 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 7519 pfr_update_stats(ri->r->dst.addr.p.tbl, 7520 &st->key[(st->direction == PF_IN)]-> 7521 addr[(st->direction == PF_IN)], 7522 pd, ri->r->action, ri->r->dst.neg); 7523 } 7524 } 7525 if (r->src.addr.type == PF_ADDR_TABLE) 7526 pfr_update_stats(r->src.addr.p.tbl, 7527 (st == NULL) ? pd->src : 7528 &st->key[(st->direction == PF_IN)]-> 7529 addr[(st->direction == PF_OUT)], 7530 pd, r->action, r->src.neg); 7531 if (r->dst.addr.type == PF_ADDR_TABLE) 7532 pfr_update_stats(r->dst.addr.p.tbl, 7533 (st == NULL) ? pd->dst : 7534 &st->key[(st->direction == PF_IN)]-> 7535 addr[(st->direction == PF_IN)], 7536 pd, r->action, r->dst.neg); 7537 } 7538 } 7539 7540 int 7541 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 7542 { 7543 #if NCARP > 0 7544 struct ifnet *ifp0; 7545 #endif 7546 struct pfi_kif *kif; 7547 u_short action, reason = 0; 7548 struct pf_rule *a = NULL, *r = &pf_default_rule; 7549 struct pf_state *st = NULL; 7550 struct pf_state_key_cmp key; 7551 struct pf_ruleset *ruleset = NULL; 7552 struct pf_pdesc pd; 7553 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 7554 u_int32_t qid, pqid = 0; 7555 int have_pf_lock = 0; 7556 7557 if (!pf_status.running) 7558 return (PF_PASS); 7559 7560 #if NCARP > 0 7561 if (ifp->if_type == IFT_CARP && 7562 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 7563 kif = (struct pfi_kif *)ifp0->if_pf_kif; 7564 if_put(ifp0); 7565 } else 7566 #endif /* NCARP */ 7567 kif = (struct pfi_kif *)ifp->if_pf_kif; 7568 7569 if (kif == NULL) { 7570 DPFPRINTF(LOG_ERR, 7571 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 7572 return (PF_DROP); 7573 } 7574 if (kif->pfik_flags & PFI_IFLAG_SKIP) 7575 return (PF_PASS); 7576 7577 #ifdef DIAGNOSTIC 7578 if (((*m0)->m_flags & M_PKTHDR) == 0) 7579 panic("non-M_PKTHDR is passed to pf_test"); 7580 #endif /* DIAGNOSTIC */ 7581 7582 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 7583 return (PF_PASS); 7584 7585 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) { 7586 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET; 7587 return (PF_PASS); 7588 } 7589 7590 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 7591 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 7592 return (PF_PASS); 7593 } 7594 7595 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 7596 if (action != PF_PASS) { 7597 #if NPFLOG > 0 7598 pd.pflog |= PF_LOG_FORCE; 7599 #endif /* NPFLOG > 0 */ 7600 goto done; 7601 } 7602 7603 /* packet normalization and reassembly */ 7604 switch (pd.af) { 7605 case AF_INET: 7606 action = pf_normalize_ip(&pd, &reason); 7607 break; 7608 #ifdef INET6 7609 case AF_INET6: 7610 action = pf_normalize_ip6(&pd, &reason); 7611 break; 7612 #endif /* INET6 */ 7613 } 7614 *m0 = pd.m; 7615 /* if packet sits in reassembly queue, return without error */ 7616 if (pd.m == NULL) 7617 return PF_PASS; 7618 7619 if (action != PF_PASS) { 7620 #if NPFLOG > 0 7621 pd.pflog |= PF_LOG_FORCE; 7622 #endif /* NPFLOG > 0 */ 7623 goto done; 7624 } 7625 7626 /* if packet has been reassembled, update packet description */ 7627 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 7628 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 7629 if (action != PF_PASS) { 7630 #if NPFLOG > 0 7631 pd.pflog |= PF_LOG_FORCE; 7632 #endif /* NPFLOG > 0 */ 7633 goto done; 7634 } 7635 } 7636 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 7637 7638 /* 7639 * Avoid pcb-lookups from the forwarding path. They should never 7640 * match and would cause MP locking problems. 7641 */ 7642 if (fwdir == PF_FWD) { 7643 pd.lookup.done = -1; 7644 pd.lookup.uid = -1; 7645 pd.lookup.gid = -1; 7646 pd.lookup.pid = NO_PID; 7647 } 7648 7649 switch (pd.virtual_proto) { 7650 7651 case PF_VPROTO_FRAGMENT: { 7652 /* 7653 * handle fragments that aren't reassembled by 7654 * normalization 7655 */ 7656 PF_LOCK(); 7657 have_pf_lock = 1; 7658 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, &reason); 7659 st = pf_state_ref(st); 7660 if (action != PF_PASS) 7661 REASON_SET(&reason, PFRES_FRAG); 7662 break; 7663 } 7664 7665 case IPPROTO_ICMP: { 7666 if (pd.af != AF_INET) { 7667 action = PF_DROP; 7668 REASON_SET(&reason, PFRES_NORM); 7669 DPFPRINTF(LOG_NOTICE, 7670 "dropping IPv6 packet with ICMPv4 payload"); 7671 break; 7672 } 7673 PF_STATE_ENTER_READ(); 7674 action = pf_test_state_icmp(&pd, &st, &reason); 7675 st = pf_state_ref(st); 7676 PF_STATE_EXIT_READ(); 7677 if (action == PF_PASS || action == PF_AFRT) { 7678 #if NPFSYNC > 0 7679 pfsync_update_state(st); 7680 #endif /* NPFSYNC > 0 */ 7681 r = st->rule.ptr; 7682 a = st->anchor.ptr; 7683 #if NPFLOG > 0 7684 pd.pflog |= st->log; 7685 #endif /* NPFLOG > 0 */ 7686 } else if (st == NULL) { 7687 PF_LOCK(); 7688 have_pf_lock = 1; 7689 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, 7690 &reason); 7691 st = pf_state_ref(st); 7692 } 7693 break; 7694 } 7695 7696 #ifdef INET6 7697 case IPPROTO_ICMPV6: { 7698 if (pd.af != AF_INET6) { 7699 action = PF_DROP; 7700 REASON_SET(&reason, PFRES_NORM); 7701 DPFPRINTF(LOG_NOTICE, 7702 "dropping IPv4 packet with ICMPv6 payload"); 7703 break; 7704 } 7705 PF_STATE_ENTER_READ(); 7706 action = pf_test_state_icmp(&pd, &st, &reason); 7707 st = pf_state_ref(st); 7708 PF_STATE_EXIT_READ(); 7709 if (action == PF_PASS || action == PF_AFRT) { 7710 #if NPFSYNC > 0 7711 pfsync_update_state(st); 7712 #endif /* NPFSYNC > 0 */ 7713 r = st->rule.ptr; 7714 a = st->anchor.ptr; 7715 #if NPFLOG > 0 7716 pd.pflog |= st->log; 7717 #endif /* NPFLOG > 0 */ 7718 } else if (st == NULL) { 7719 PF_LOCK(); 7720 have_pf_lock = 1; 7721 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, 7722 &reason); 7723 st = pf_state_ref(st); 7724 } 7725 break; 7726 } 7727 #endif /* INET6 */ 7728 7729 default: 7730 if (pd.virtual_proto == IPPROTO_TCP) { 7731 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & 7732 (TH_SYN|TH_ACK)) == TH_SYN && 7733 pf_synflood_check(&pd)) { 7734 PF_LOCK(); 7735 have_pf_lock = 1; 7736 pf_syncookie_send(&pd); 7737 action = PF_DROP; 7738 break; 7739 } 7740 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 7741 pqid = 1; 7742 action = pf_normalize_tcp(&pd); 7743 if (action == PF_DROP) 7744 break; 7745 } 7746 7747 key.af = pd.af; 7748 key.proto = pd.virtual_proto; 7749 key.rdomain = pd.rdomain; 7750 pf_addrcpy(&key.addr[pd.sidx], pd.src, key.af); 7751 pf_addrcpy(&key.addr[pd.didx], pd.dst, key.af); 7752 key.port[pd.sidx] = pd.osport; 7753 key.port[pd.didx] = pd.odport; 7754 key.hash = pd.hash; 7755 7756 PF_STATE_ENTER_READ(); 7757 action = pf_find_state(&pd, &key, &st); 7758 st = pf_state_ref(st); 7759 PF_STATE_EXIT_READ(); 7760 7761 /* check for syncookies if tcp ack and no active state */ 7762 if (pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 7763 (st == NULL || (st->src.state >= TCPS_FIN_WAIT_2 && 7764 st->dst.state >= TCPS_FIN_WAIT_2)) && 7765 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 7766 pf_syncookie_validate(&pd)) { 7767 struct mbuf *msyn = pf_syncookie_recreate_syn(&pd); 7768 if (msyn) { 7769 action = pf_test(af, fwdir, ifp, &msyn); 7770 m_freem(msyn); 7771 if (action == PF_PASS || action == PF_AFRT) { 7772 PF_STATE_ENTER_READ(); 7773 pf_state_unref(st); 7774 action = pf_find_state(&pd, &key, &st); 7775 st = pf_state_ref(st); 7776 PF_STATE_EXIT_READ(); 7777 if (st == NULL) 7778 return (PF_DROP); 7779 st->src.seqhi = st->dst.seqhi = 7780 ntohl(pd.hdr.tcp.th_ack) - 1; 7781 st->src.seqlo = 7782 ntohl(pd.hdr.tcp.th_seq) - 1; 7783 pf_set_protostate(st, PF_PEER_SRC, 7784 PF_TCPS_PROXY_DST); 7785 } 7786 } else 7787 action = PF_DROP; 7788 } 7789 7790 if (action == PF_MATCH) 7791 action = pf_test_state(&pd, &st, &reason); 7792 7793 if (action == PF_PASS || action == PF_AFRT) { 7794 #if NPFSYNC > 0 7795 pfsync_update_state(st); 7796 #endif /* NPFSYNC > 0 */ 7797 r = st->rule.ptr; 7798 a = st->anchor.ptr; 7799 #if NPFLOG > 0 7800 pd.pflog |= st->log; 7801 #endif /* NPFLOG > 0 */ 7802 } else if (st == NULL) { 7803 PF_LOCK(); 7804 have_pf_lock = 1; 7805 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, 7806 &reason); 7807 st = pf_state_ref(st); 7808 } 7809 7810 if (pd.virtual_proto == IPPROTO_TCP) { 7811 if (st) { 7812 if (st->max_mss) 7813 pf_normalize_mss(&pd, st->max_mss); 7814 } else if (r->max_mss) 7815 pf_normalize_mss(&pd, r->max_mss); 7816 } 7817 7818 break; 7819 } 7820 7821 if (have_pf_lock != 0) 7822 PF_UNLOCK(); 7823 7824 /* 7825 * At the moment, we rely on NET_LOCK() to prevent removal of items 7826 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 7827 * to be refcounted when NET_LOCK() is gone. 7828 */ 7829 7830 done: 7831 if (action != PF_DROP) { 7832 if (st) { 7833 /* The non-state case is handled in pf_test_rule() */ 7834 if (action == PF_PASS && pd.badopts != 0 && 7835 !(st->state_flags & PFSTATE_ALLOWOPTS)) { 7836 action = PF_DROP; 7837 REASON_SET(&reason, PFRES_IPOPTIONS); 7838 #if NPFLOG > 0 7839 pd.pflog |= PF_LOG_FORCE; 7840 #endif /* NPFLOG > 0 */ 7841 DPFPRINTF(LOG_NOTICE, "dropping packet with " 7842 "ip/ipv6 options in pf_test()"); 7843 } 7844 7845 pf_scrub(pd.m, st->state_flags, pd.af, st->min_ttl, 7846 st->set_tos); 7847 pf_tag_packet(pd.m, st->tag, st->rtableid[pd.didx]); 7848 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7849 qid = st->pqid; 7850 if (st->state_flags & PFSTATE_SETPRIO) { 7851 pd.m->m_pkthdr.pf.prio = 7852 st->set_prio[1]; 7853 } 7854 } else { 7855 qid = st->qid; 7856 if (st->state_flags & PFSTATE_SETPRIO) { 7857 pd.m->m_pkthdr.pf.prio = 7858 st->set_prio[0]; 7859 } 7860 } 7861 pd.m->m_pkthdr.pf.delay = st->delay; 7862 } else { 7863 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 7864 r->set_tos); 7865 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7866 qid = r->pqid; 7867 if (r->scrub_flags & PFSTATE_SETPRIO) 7868 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 7869 } else { 7870 qid = r->qid; 7871 if (r->scrub_flags & PFSTATE_SETPRIO) 7872 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 7873 } 7874 pd.m->m_pkthdr.pf.delay = r->delay; 7875 } 7876 } 7877 7878 if (action == PF_PASS && qid) 7879 pd.m->m_pkthdr.pf.qid = qid; 7880 if (pd.dir == PF_IN && st && st->key[PF_SK_STACK]) 7881 pf_mbuf_link_state_key(pd.m, st->key[PF_SK_STACK]); 7882 if (pd.dir == PF_OUT && st && st->key[PF_SK_STACK]) 7883 pf_state_key_link_inpcb(st->key[PF_SK_STACK], 7884 pd.m->m_pkthdr.pf.inp); 7885 7886 if (st != NULL && !ISSET(pd.m->m_pkthdr.csum_flags, M_FLOWID)) { 7887 pd.m->m_pkthdr.ph_flowid = st->key[PF_SK_WIRE]->hash; 7888 SET(pd.m->m_pkthdr.csum_flags, M_FLOWID); 7889 } 7890 7891 /* 7892 * connections redirected to loopback should not match sockets 7893 * bound specifically to loopback due to security implications, 7894 * see in_pcblookup_listen(). 7895 */ 7896 if (pd.destchg) 7897 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 7898 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 7899 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 7900 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7901 /* We need to redo the route lookup on outgoing routes. */ 7902 if (pd.destchg && pd.dir == PF_OUT) 7903 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 7904 7905 if (pd.dir == PF_IN && action == PF_PASS && 7906 (r->divert.type == PF_DIVERT_TO || 7907 r->divert.type == PF_DIVERT_REPLY)) { 7908 struct pf_divert *divert; 7909 7910 if ((divert = pf_get_divert(pd.m))) { 7911 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7912 divert->addr = r->divert.addr; 7913 divert->port = r->divert.port; 7914 divert->rdomain = pd.rdomain; 7915 divert->type = r->divert.type; 7916 } 7917 } 7918 7919 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 7920 action = PF_DIVERT; 7921 7922 #if NPFLOG > 0 7923 if (pd.pflog) { 7924 struct pf_rule_item *ri; 7925 7926 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 7927 pflog_packet(&pd, reason, r, a, ruleset, NULL); 7928 if (st) { 7929 SLIST_FOREACH(ri, &st->match_rules, entry) 7930 if (ri->r->log & PF_LOG_ALL) 7931 pflog_packet(&pd, reason, ri->r, a, 7932 ruleset, NULL); 7933 } 7934 } 7935 #endif /* NPFLOG > 0 */ 7936 7937 pf_counters_inc(action, &pd, st, r, a); 7938 7939 switch (action) { 7940 case PF_SYNPROXY_DROP: 7941 m_freem(pd.m); 7942 /* FALLTHROUGH */ 7943 case PF_DEFER: 7944 pd.m = NULL; 7945 action = PF_PASS; 7946 break; 7947 case PF_DIVERT: 7948 switch (pd.af) { 7949 case AF_INET: 7950 divert_packet(pd.m, pd.dir, r->divert.port); 7951 pd.m = NULL; 7952 break; 7953 #ifdef INET6 7954 case AF_INET6: 7955 divert6_packet(pd.m, pd.dir, r->divert.port); 7956 pd.m = NULL; 7957 break; 7958 #endif /* INET6 */ 7959 } 7960 action = PF_PASS; 7961 break; 7962 #ifdef INET6 7963 case PF_AFRT: 7964 if (pf_translate_af(&pd)) { 7965 action = PF_DROP; 7966 goto out; 7967 } 7968 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 7969 switch (pd.naf) { 7970 case AF_INET: 7971 if (pd.dir == PF_IN) { 7972 int flags = IP_REDIRECT; 7973 7974 switch (atomic_load_int(&ip_forwarding)) { 7975 case 2: 7976 SET(flags, IP_FORWARDING_IPSEC); 7977 /* FALLTHROUGH */ 7978 case 1: 7979 SET(flags, IP_FORWARDING); 7980 break; 7981 default: 7982 ipstat_inc(ips_cantforward); 7983 action = PF_DROP; 7984 goto out; 7985 } 7986 if (atomic_load_int(&ip_directedbcast)) 7987 SET(flags, IP_ALLOWBROADCAST); 7988 ip_forward(pd.m, ifp, NULL, flags); 7989 } else 7990 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 7991 break; 7992 case AF_INET6: 7993 if (pd.dir == PF_IN) { 7994 int flags = IPV6_REDIRECT; 7995 7996 switch (atomic_load_int(&ip6_forwarding)) { 7997 case 2: 7998 SET(flags, IPV6_FORWARDING_IPSEC); 7999 /* FALLTHROUGH */ 8000 case 1: 8001 SET(flags, IPV6_FORWARDING); 8002 break; 8003 default: 8004 ip6stat_inc(ip6s_cantforward); 8005 action = PF_DROP; 8006 goto out; 8007 } 8008 ip6_forward(pd.m, NULL, flags); 8009 } else 8010 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 8011 break; 8012 } 8013 pd.m = NULL; 8014 action = PF_PASS; 8015 break; 8016 #endif /* INET6 */ 8017 case PF_DROP: 8018 m_freem(pd.m); 8019 pd.m = NULL; 8020 break; 8021 default: 8022 if (st && st->rt) { 8023 switch (pd.af) { 8024 case AF_INET: 8025 pf_route(&pd, st); 8026 break; 8027 #ifdef INET6 8028 case AF_INET6: 8029 pf_route6(&pd, st); 8030 break; 8031 #endif /* INET6 */ 8032 } 8033 } 8034 break; 8035 } 8036 8037 #ifdef INET6 8038 /* if reassembled packet passed, create new fragments */ 8039 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 8040 pd.af == AF_INET6) { 8041 struct m_tag *mtag; 8042 8043 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 8044 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 8045 } 8046 #endif /* INET6 */ 8047 if (st && action != PF_DROP) { 8048 if (!st->if_index_in && dir == PF_IN) 8049 st->if_index_in = ifp->if_index; 8050 else if (!st->if_index_out && dir == PF_OUT) 8051 st->if_index_out = ifp->if_index; 8052 } 8053 8054 out: 8055 *m0 = pd.m; 8056 8057 pf_state_unref(st); 8058 8059 return (action); 8060 } 8061 8062 int 8063 pf_ouraddr(struct mbuf *m) 8064 { 8065 struct pf_state_key *sk; 8066 8067 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 8068 return (1); 8069 8070 sk = m->m_pkthdr.pf.statekey; 8071 if (sk != NULL) { 8072 if (READ_ONCE(sk->sk_inp) != NULL) 8073 return (1); 8074 } 8075 8076 return (-1); 8077 } 8078 8079 /* 8080 * must be called whenever any addressing information such as 8081 * address, port, protocol has changed 8082 */ 8083 void 8084 pf_pkt_addr_changed(struct mbuf *m) 8085 { 8086 pf_mbuf_unlink_state_key(m); 8087 pf_mbuf_unlink_inpcb(m); 8088 } 8089 8090 struct inpcb * 8091 pf_inp_lookup(struct mbuf *m) 8092 { 8093 struct inpcb *inp = NULL; 8094 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 8095 8096 if (!pf_state_key_isvalid(sk)) 8097 pf_mbuf_unlink_state_key(m); 8098 else if (READ_ONCE(sk->sk_inp) != NULL) { 8099 mtx_enter(&pf_inp_mtx); 8100 inp = in_pcbref(sk->sk_inp); 8101 mtx_leave(&pf_inp_mtx); 8102 } 8103 8104 return (inp); 8105 } 8106 8107 void 8108 pf_inp_link(struct mbuf *m, struct inpcb *inp) 8109 { 8110 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 8111 8112 if (!pf_state_key_isvalid(sk)) { 8113 pf_mbuf_unlink_state_key(m); 8114 return; 8115 } 8116 8117 /* 8118 * we don't need to grab PF-lock here. At worst case we link inp to 8119 * state, which might be just being marked as deleted by another 8120 * thread. 8121 */ 8122 pf_state_key_link_inpcb(sk, inp); 8123 8124 /* The statekey has finished finding the inp, it is no longer needed. */ 8125 pf_mbuf_unlink_state_key(m); 8126 } 8127 8128 void 8129 pf_inp_unlink(struct inpcb *inp) 8130 { 8131 struct pf_state_key *sk; 8132 8133 if (READ_ONCE(inp->inp_pf_sk) == NULL) 8134 return; 8135 8136 mtx_enter(&pf_inp_mtx); 8137 sk = inp->inp_pf_sk; 8138 if (sk == NULL) { 8139 mtx_leave(&pf_inp_mtx); 8140 return; 8141 } 8142 KASSERT(sk->sk_inp == inp); 8143 sk->sk_inp = NULL; 8144 inp->inp_pf_sk = NULL; 8145 mtx_leave(&pf_inp_mtx); 8146 8147 pf_state_key_unref(sk); 8148 in_pcbunref(inp); 8149 } 8150 8151 void 8152 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 8153 { 8154 struct pf_state_key *old_reverse; 8155 8156 old_reverse = atomic_cas_ptr(&sk->sk_reverse, NULL, skrev); 8157 if (old_reverse != NULL) 8158 KASSERT(old_reverse == skrev); 8159 else { 8160 pf_state_key_ref(skrev); 8161 8162 /* 8163 * NOTE: if sk == skrev, then KASSERT() below holds true, we 8164 * still want to grab a reference in such case, because 8165 * pf_state_key_unlink_reverse() does not check whether keys 8166 * are identical or not. 8167 */ 8168 old_reverse = atomic_cas_ptr(&skrev->sk_reverse, NULL, sk); 8169 if (old_reverse != NULL) 8170 KASSERT(old_reverse == sk); 8171 8172 pf_state_key_ref(sk); 8173 } 8174 } 8175 8176 #if NPFLOG > 0 8177 void 8178 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 8179 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 8180 { 8181 struct pf_rule_item *ri; 8182 8183 /* if this is the log(matches) rule, packet has been logged already */ 8184 if (rm->log & PF_LOG_MATCHES) 8185 return; 8186 8187 SLIST_FOREACH(ri, matchrules, entry) 8188 if (ri->r->log & PF_LOG_MATCHES) 8189 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 8190 } 8191 #endif /* NPFLOG > 0 */ 8192 8193 struct pf_state_key * 8194 pf_state_key_ref(struct pf_state_key *sk) 8195 { 8196 if (sk != NULL) 8197 PF_REF_TAKE(sk->sk_refcnt); 8198 8199 return (sk); 8200 } 8201 8202 void 8203 pf_state_key_unref(struct pf_state_key *sk) 8204 { 8205 if (PF_REF_RELE(sk->sk_refcnt)) { 8206 /* state key must be removed from tree */ 8207 KASSERT(!pf_state_key_isvalid(sk)); 8208 /* state key must be unlinked from reverse key */ 8209 KASSERT(sk->sk_reverse == NULL); 8210 /* state key must be unlinked from socket */ 8211 KASSERT(sk->sk_inp == NULL); 8212 pool_put(&pf_state_key_pl, sk); 8213 } 8214 } 8215 8216 int 8217 pf_state_key_isvalid(struct pf_state_key *sk) 8218 { 8219 return ((sk != NULL) && (sk->sk_removed == 0)); 8220 } 8221 8222 void 8223 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 8224 { 8225 KASSERT(m->m_pkthdr.pf.statekey == NULL); 8226 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 8227 } 8228 8229 void 8230 pf_mbuf_unlink_state_key(struct mbuf *m) 8231 { 8232 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 8233 8234 if (sk != NULL) { 8235 m->m_pkthdr.pf.statekey = NULL; 8236 pf_state_key_unref(sk); 8237 } 8238 } 8239 8240 void 8241 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 8242 { 8243 KASSERT(m->m_pkthdr.pf.inp == NULL); 8244 m->m_pkthdr.pf.inp = in_pcbref(inp); 8245 } 8246 8247 void 8248 pf_mbuf_unlink_inpcb(struct mbuf *m) 8249 { 8250 struct inpcb *inp = m->m_pkthdr.pf.inp; 8251 8252 if (inp != NULL) { 8253 m->m_pkthdr.pf.inp = NULL; 8254 in_pcbunref(inp); 8255 } 8256 } 8257 8258 void 8259 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 8260 { 8261 if (inp == NULL || READ_ONCE(sk->sk_inp) != NULL) 8262 return; 8263 8264 mtx_enter(&pf_inp_mtx); 8265 if (inp->inp_pf_sk != NULL || sk->sk_inp != NULL) { 8266 mtx_leave(&pf_inp_mtx); 8267 return; 8268 } 8269 sk->sk_inp = in_pcbref(inp); 8270 inp->inp_pf_sk = pf_state_key_ref(sk); 8271 mtx_leave(&pf_inp_mtx); 8272 } 8273 8274 void 8275 pf_state_key_unlink_inpcb(struct pf_state_key *sk) 8276 { 8277 struct inpcb *inp; 8278 8279 if (READ_ONCE(sk->sk_inp) == NULL) 8280 return; 8281 8282 mtx_enter(&pf_inp_mtx); 8283 inp = sk->sk_inp; 8284 if (inp == NULL) { 8285 mtx_leave(&pf_inp_mtx); 8286 return; 8287 } 8288 KASSERT(inp->inp_pf_sk == sk); 8289 sk->sk_inp = NULL; 8290 inp->inp_pf_sk = NULL; 8291 mtx_leave(&pf_inp_mtx); 8292 8293 pf_state_key_unref(sk); 8294 in_pcbunref(inp); 8295 } 8296 8297 void 8298 pf_state_key_unlink_reverse(struct pf_state_key *sk) 8299 { 8300 struct pf_state_key *skrev = sk->sk_reverse; 8301 8302 /* Note that sk and skrev may be equal, then we unref twice. */ 8303 if (skrev != NULL) { 8304 KASSERT(skrev->sk_reverse == sk); 8305 sk->sk_reverse = NULL; 8306 skrev->sk_reverse = NULL; 8307 pf_state_key_unref(skrev); 8308 pf_state_key_unref(sk); 8309 } 8310 } 8311 8312 struct pf_state * 8313 pf_state_ref(struct pf_state *st) 8314 { 8315 if (st != NULL) 8316 PF_REF_TAKE(st->refcnt); 8317 return (st); 8318 } 8319 8320 void 8321 pf_state_unref(struct pf_state *st) 8322 { 8323 if ((st != NULL) && PF_REF_RELE(st->refcnt)) { 8324 /* never inserted or removed */ 8325 #if NPFSYNC > 0 8326 KASSERT((TAILQ_NEXT(st, sync_list) == NULL) || 8327 ((TAILQ_NEXT(st, sync_list) == _Q_INVALID) && 8328 (st->sync_state >= PFSYNC_S_NONE))); 8329 #endif /* NPFSYNC */ 8330 KASSERT((TAILQ_NEXT(st, entry_list) == NULL) || 8331 (TAILQ_NEXT(st, entry_list) == _Q_INVALID)); 8332 8333 pf_state_key_unref(st->key[PF_SK_WIRE]); 8334 pf_state_key_unref(st->key[PF_SK_STACK]); 8335 8336 pool_put(&pf_state_pl, st); 8337 } 8338 } 8339 8340 int 8341 pf_delay_pkt(struct mbuf *m, u_int ifidx) 8342 { 8343 struct pf_pktdelay *pdy; 8344 8345 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 8346 m_freem(m); 8347 return (ENOBUFS); 8348 } 8349 pdy->ifidx = ifidx; 8350 pdy->m = m; 8351 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy); 8352 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay); 8353 m->m_pkthdr.pf.delay = 0; 8354 return (0); 8355 } 8356 8357 void 8358 pf_pktenqueue_delayed(void *arg) 8359 { 8360 struct pf_pktdelay *pdy = arg; 8361 struct ifnet *ifp; 8362 8363 ifp = if_get(pdy->ifidx); 8364 if (ifp != NULL) { 8365 if_enqueue(ifp, pdy->m); 8366 if_put(ifp); 8367 } else 8368 m_freem(pdy->m); 8369 8370 pool_put(&pf_pktdelay_pl, pdy); 8371 } 8372