1 /* $OpenBSD: pf.c,v 1.1136 2022/07/20 09:33:11 mbuhl Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/in_pcb.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp_var.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_timer.h> 74 #include <netinet/tcp_var.h> 75 #include <netinet/tcp_fsm.h> 76 #include <netinet/udp.h> 77 #include <netinet/udp_var.h> 78 #include <netinet/ip_divert.h> 79 80 #ifdef INET6 81 #include <netinet6/in6_var.h> 82 #include <netinet/ip6.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet/icmp6.h> 85 #include <netinet6/nd6.h> 86 #include <netinet6/ip6_divert.h> 87 #endif /* INET6 */ 88 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 92 #if NPFLOG > 0 93 #include <net/if_pflog.h> 94 #endif /* NPFLOG > 0 */ 95 96 #if NPFLOW > 0 97 #include <net/if_pflow.h> 98 #endif /* NPFLOW > 0 */ 99 100 #if NPFSYNC > 0 101 #include <net/if_pfsync.h> 102 #else 103 struct pfsync_deferral; 104 #endif /* NPFSYNC > 0 */ 105 106 /* 107 * Global variables 108 */ 109 struct pf_state_tree pf_statetbl; 110 struct pf_queuehead pf_queues[2]; 111 struct pf_queuehead *pf_queues_active; 112 struct pf_queuehead *pf_queues_inactive; 113 114 struct pf_status pf_status; 115 116 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 117 118 SHA2_CTX pf_tcp_secret_ctx; 119 u_char pf_tcp_secret[16]; 120 int pf_tcp_secret_init; 121 int pf_tcp_iss_off; 122 123 int pf_npurge; 124 struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); 125 struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); 126 127 enum pf_test_status { 128 PF_TEST_FAIL = -1, 129 PF_TEST_OK, 130 PF_TEST_QUICK 131 }; 132 133 struct pf_test_ctx { 134 enum pf_test_status test_status; 135 struct pf_pdesc *pd; 136 struct pf_rule_actions act; 137 u_int8_t icmpcode; 138 u_int8_t icmptype; 139 int icmp_dir; 140 int state_icmp; 141 int tag; 142 u_short reason; 143 struct pf_rule_item *ri; 144 struct pf_src_node *sns[PF_SN_MAX]; 145 struct pf_rule_slist rules; 146 struct pf_rule *nr; 147 struct pf_rule **rm; 148 struct pf_rule *a; 149 struct pf_rule **am; 150 struct pf_ruleset **rsm; 151 struct pf_ruleset *arsm; 152 struct pf_ruleset *aruleset; 153 struct tcphdr *th; 154 int depth; 155 }; 156 157 #define PF_ANCHOR_STACK_MAX 64 158 159 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 160 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 161 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 162 163 void pf_add_threshold(struct pf_threshold *); 164 int pf_check_threshold(struct pf_threshold *); 165 int pf_check_tcp_cksum(struct mbuf *, int, int, 166 sa_family_t); 167 static __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 168 u_int8_t); 169 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 170 const struct pf_addr *, sa_family_t, u_int8_t); 171 int pf_modulate_sack(struct pf_pdesc *, 172 struct pf_state_peer *); 173 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 174 u_int16_t *, u_int16_t *); 175 int pf_change_icmp_af(struct mbuf *, int, 176 struct pf_pdesc *, struct pf_pdesc *, 177 struct pf_addr *, struct pf_addr *, sa_family_t, 178 sa_family_t); 179 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 180 struct pf_addr *); 181 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 182 u_int16_t *, struct pf_addr *, struct pf_addr *, 183 u_int16_t); 184 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 185 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 186 sa_family_t, struct pf_rule *, u_int); 187 void pf_detach_state(struct pf_state *); 188 void pf_state_key_detach(struct pf_state *, int); 189 u_int32_t pf_tcp_iss(struct pf_pdesc *); 190 void pf_rule_to_actions(struct pf_rule *, 191 struct pf_rule_actions *); 192 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 193 struct pf_state **, struct pf_rule **, 194 struct pf_ruleset **, u_short *, 195 struct pfsync_deferral **); 196 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 197 struct pf_rule *, struct pf_rule *, 198 struct pf_state_key **, struct pf_state_key **, 199 int *, struct pf_state **, int, 200 struct pf_rule_slist *, struct pf_rule_actions *, 201 struct pf_src_node *[]); 202 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 203 int, struct pf_addr *, int, struct pf_addr *, 204 int, int); 205 int pf_state_key_setup(struct pf_pdesc *, struct 206 pf_state_key **, struct pf_state_key **, int); 207 int pf_tcp_track_full(struct pf_pdesc *, 208 struct pf_state **, u_short *, int *, int); 209 int pf_tcp_track_sloppy(struct pf_pdesc *, 210 struct pf_state **, u_short *); 211 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 212 u_short *); 213 int pf_test_state(struct pf_pdesc *, struct pf_state **, 214 u_short *); 215 int pf_icmp_state_lookup(struct pf_pdesc *, 216 struct pf_state_key_cmp *, struct pf_state **, 217 u_int16_t, u_int16_t, int, int *, int, int); 218 int pf_test_state_icmp(struct pf_pdesc *, 219 struct pf_state **, u_short *); 220 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 221 u_int16_t); 222 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 223 sa_family_t, struct pf_src_node **); 224 struct pf_divert *pf_get_divert(struct mbuf *); 225 int pf_walk_option(struct pf_pdesc *, struct ip *, 226 int, int, u_short *); 227 int pf_walk_header(struct pf_pdesc *, struct ip *, 228 u_short *); 229 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 230 int, int, u_short *); 231 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 232 u_short *); 233 void pf_print_state_parts(struct pf_state *, 234 struct pf_state_key *, struct pf_state_key *); 235 int pf_addr_wrap_neq(struct pf_addr_wrap *, 236 struct pf_addr_wrap *); 237 int pf_compare_state_keys(struct pf_state_key *, 238 struct pf_state_key *, struct pfi_kif *, u_int); 239 int pf_find_state(struct pf_pdesc *, 240 struct pf_state_key_cmp *, struct pf_state **); 241 int pf_src_connlimit(struct pf_state **); 242 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 243 int pf_step_into_anchor(struct pf_test_ctx *, 244 struct pf_rule *); 245 int pf_match_rule(struct pf_test_ctx *, 246 struct pf_ruleset *); 247 void pf_counters_inc(int, struct pf_pdesc *, 248 struct pf_state *, struct pf_rule *, 249 struct pf_rule *); 250 251 int pf_state_key_isvalid(struct pf_state_key *); 252 struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 253 void pf_state_key_unref(struct pf_state_key *); 254 void pf_state_key_link_reverse(struct pf_state_key *, 255 struct pf_state_key *); 256 void pf_state_key_unlink_reverse(struct pf_state_key *); 257 void pf_state_key_link_inpcb(struct pf_state_key *, 258 struct inpcb *); 259 void pf_state_key_unlink_inpcb(struct pf_state_key *); 260 void pf_inpcb_unlink_state_key(struct inpcb *); 261 void pf_pktenqueue_delayed(void *); 262 int32_t pf_state_expires(const struct pf_state *, uint8_t); 263 264 #if NPFLOG > 0 265 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 266 struct pf_rule *, struct pf_ruleset *, 267 struct pf_rule_slist *); 268 #endif /* NPFLOG > 0 */ 269 270 extern struct pool pfr_ktable_pl; 271 extern struct pool pfr_kentry_pl; 272 273 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 274 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 275 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 276 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 277 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 278 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 279 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS }, 280 { &pf_anchor_pl, PF_ANCHOR_HIWAT, PF_ANCHOR_HIWAT } 281 }; 282 283 #define BOUND_IFACE(r, k) \ 284 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 285 286 #define STATE_INC_COUNTERS(s) \ 287 do { \ 288 struct pf_rule_item *mrm; \ 289 s->rule.ptr->states_cur++; \ 290 s->rule.ptr->states_tot++; \ 291 if (s->anchor.ptr != NULL) { \ 292 s->anchor.ptr->states_cur++; \ 293 s->anchor.ptr->states_tot++; \ 294 } \ 295 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 296 mrm->r->states_cur++; \ 297 } while (0) 298 299 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 300 static __inline int pf_state_compare_key(struct pf_state_key *, 301 struct pf_state_key *); 302 static __inline int pf_state_compare_id(struct pf_state *, 303 struct pf_state *); 304 #ifdef INET6 305 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 306 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 307 #endif /* INET6 */ 308 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 309 310 struct pf_src_tree tree_src_tracking; 311 312 struct pf_state_tree_id tree_id; 313 struct pf_state_list pf_state_list = PF_STATE_LIST_INITIALIZER(pf_state_list); 314 315 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 316 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 317 RB_GENERATE(pf_state_tree_id, pf_state, 318 entry_id, pf_state_compare_id); 319 320 SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl = 321 SLIST_HEAD_INITIALIZER(pf_rule_gcl); 322 323 __inline int 324 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 325 { 326 switch (af) { 327 case AF_INET: 328 if (a->addr32[0] > b->addr32[0]) 329 return (1); 330 if (a->addr32[0] < b->addr32[0]) 331 return (-1); 332 break; 333 #ifdef INET6 334 case AF_INET6: 335 if (a->addr32[3] > b->addr32[3]) 336 return (1); 337 if (a->addr32[3] < b->addr32[3]) 338 return (-1); 339 if (a->addr32[2] > b->addr32[2]) 340 return (1); 341 if (a->addr32[2] < b->addr32[2]) 342 return (-1); 343 if (a->addr32[1] > b->addr32[1]) 344 return (1); 345 if (a->addr32[1] < b->addr32[1]) 346 return (-1); 347 if (a->addr32[0] > b->addr32[0]) 348 return (1); 349 if (a->addr32[0] < b->addr32[0]) 350 return (-1); 351 break; 352 #endif /* INET6 */ 353 } 354 return (0); 355 } 356 357 static __inline int 358 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 359 { 360 int diff; 361 362 if (a->rule.ptr > b->rule.ptr) 363 return (1); 364 if (a->rule.ptr < b->rule.ptr) 365 return (-1); 366 if ((diff = a->type - b->type) != 0) 367 return (diff); 368 if ((diff = a->af - b->af) != 0) 369 return (diff); 370 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 371 return (diff); 372 return (0); 373 } 374 375 static __inline void 376 pf_set_protostate(struct pf_state *s, int which, u_int8_t newstate) 377 { 378 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 379 s->dst.state = newstate; 380 if (which == PF_PEER_DST) 381 return; 382 383 if (s->src.state == newstate) 384 return; 385 if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK] != NULL && 386 s->key[PF_SK_STACK]->proto == IPPROTO_TCP && 387 !(TCPS_HAVEESTABLISHED(s->src.state) || 388 s->src.state == TCPS_CLOSED) && 389 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 390 pf_status.states_halfopen--; 391 392 s->src.state = newstate; 393 } 394 395 void 396 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 397 { 398 switch (af) { 399 case AF_INET: 400 dst->addr32[0] = src->addr32[0]; 401 break; 402 #ifdef INET6 403 case AF_INET6: 404 dst->addr32[0] = src->addr32[0]; 405 dst->addr32[1] = src->addr32[1]; 406 dst->addr32[2] = src->addr32[2]; 407 dst->addr32[3] = src->addr32[3]; 408 break; 409 #endif /* INET6 */ 410 default: 411 unhandled_af(af); 412 } 413 } 414 415 void 416 pf_init_threshold(struct pf_threshold *threshold, 417 u_int32_t limit, u_int32_t seconds) 418 { 419 threshold->limit = limit * PF_THRESHOLD_MULT; 420 threshold->seconds = seconds; 421 threshold->count = 0; 422 threshold->last = getuptime(); 423 } 424 425 void 426 pf_add_threshold(struct pf_threshold *threshold) 427 { 428 u_int32_t t = getuptime(), diff = t - threshold->last; 429 430 if (diff >= threshold->seconds) 431 threshold->count = 0; 432 else 433 threshold->count -= threshold->count * diff / 434 threshold->seconds; 435 threshold->count += PF_THRESHOLD_MULT; 436 threshold->last = t; 437 } 438 439 int 440 pf_check_threshold(struct pf_threshold *threshold) 441 { 442 return (threshold->count > threshold->limit); 443 } 444 445 void 446 pf_state_list_insert(struct pf_state_list *pfs, struct pf_state *st) 447 { 448 /* 449 * we can always put states on the end of the list. 450 * 451 * things reading the list should take a read lock, then 452 * the mutex, get the head and tail pointers, release the 453 * mutex, and then they can iterate between the head and tail. 454 */ 455 456 pf_state_ref(st); /* get a ref for the list */ 457 458 mtx_enter(&pfs->pfs_mtx); 459 TAILQ_INSERT_TAIL(&pfs->pfs_list, st, entry_list); 460 mtx_leave(&pfs->pfs_mtx); 461 } 462 463 void 464 pf_state_list_remove(struct pf_state_list *pfs, struct pf_state *st) 465 { 466 /* states can only be removed when the write lock is held */ 467 rw_assert_wrlock(&pfs->pfs_rwl); 468 469 mtx_enter(&pfs->pfs_mtx); 470 TAILQ_REMOVE(&pfs->pfs_list, st, entry_list); 471 mtx_leave(&pfs->pfs_mtx); 472 473 pf_state_unref(st); /* list no longer references the state */ 474 } 475 476 int 477 pf_src_connlimit(struct pf_state **state) 478 { 479 int bad = 0; 480 struct pf_src_node *sn; 481 482 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 483 return (0); 484 485 sn->conn++; 486 (*state)->src.tcp_est = 1; 487 pf_add_threshold(&sn->conn_rate); 488 489 if ((*state)->rule.ptr->max_src_conn && 490 (*state)->rule.ptr->max_src_conn < sn->conn) { 491 pf_status.lcounters[LCNT_SRCCONN]++; 492 bad++; 493 } 494 495 if ((*state)->rule.ptr->max_src_conn_rate.limit && 496 pf_check_threshold(&sn->conn_rate)) { 497 pf_status.lcounters[LCNT_SRCCONNRATE]++; 498 bad++; 499 } 500 501 if (!bad) 502 return (0); 503 504 if ((*state)->rule.ptr->overload_tbl) { 505 struct pfr_addr p; 506 u_int32_t killed = 0; 507 508 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 509 if (pf_status.debug >= LOG_NOTICE) { 510 log(LOG_NOTICE, 511 "pf: pf_src_connlimit: blocking address "); 512 pf_print_host(&sn->addr, 0, 513 (*state)->key[PF_SK_WIRE]->af); 514 } 515 516 memset(&p, 0, sizeof(p)); 517 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 518 switch ((*state)->key[PF_SK_WIRE]->af) { 519 case AF_INET: 520 p.pfra_net = 32; 521 p.pfra_ip4addr = sn->addr.v4; 522 break; 523 #ifdef INET6 524 case AF_INET6: 525 p.pfra_net = 128; 526 p.pfra_ip6addr = sn->addr.v6; 527 break; 528 #endif /* INET6 */ 529 } 530 531 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 532 &p, gettime()); 533 534 /* kill existing states if that's required. */ 535 if ((*state)->rule.ptr->flush) { 536 struct pf_state_key *sk; 537 struct pf_state *st; 538 539 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 540 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 541 sk = st->key[PF_SK_WIRE]; 542 /* 543 * Kill states from this source. (Only those 544 * from the same rule if PF_FLUSH_GLOBAL is not 545 * set) 546 */ 547 if (sk->af == 548 (*state)->key[PF_SK_WIRE]->af && 549 (((*state)->direction == PF_OUT && 550 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 551 ((*state)->direction == PF_IN && 552 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 553 ((*state)->rule.ptr->flush & 554 PF_FLUSH_GLOBAL || 555 (*state)->rule.ptr == st->rule.ptr)) { 556 st->timeout = PFTM_PURGE; 557 pf_set_protostate(st, PF_PEER_BOTH, 558 TCPS_CLOSED); 559 killed++; 560 } 561 } 562 if (pf_status.debug >= LOG_NOTICE) 563 addlog(", %u states killed", killed); 564 } 565 if (pf_status.debug >= LOG_NOTICE) 566 addlog("\n"); 567 } 568 569 /* kill this state */ 570 (*state)->timeout = PFTM_PURGE; 571 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); 572 return (1); 573 } 574 575 int 576 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 577 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 578 struct pf_addr *raddr, struct pfi_kif *kif) 579 { 580 struct pf_src_node k; 581 582 if (*sn == NULL) { 583 k.af = af; 584 k.type = type; 585 pf_addrcpy(&k.addr, src, af); 586 k.rule.ptr = rule; 587 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 588 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 589 } 590 if (*sn == NULL) { 591 if (!rule->max_src_nodes || 592 rule->src_nodes < rule->max_src_nodes) 593 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 594 else 595 pf_status.lcounters[LCNT_SRCNODES]++; 596 if ((*sn) == NULL) 597 return (-1); 598 599 pf_init_threshold(&(*sn)->conn_rate, 600 rule->max_src_conn_rate.limit, 601 rule->max_src_conn_rate.seconds); 602 603 (*sn)->type = type; 604 (*sn)->af = af; 605 (*sn)->rule.ptr = rule; 606 pf_addrcpy(&(*sn)->addr, src, af); 607 if (raddr) 608 pf_addrcpy(&(*sn)->raddr, raddr, af); 609 if (RB_INSERT(pf_src_tree, 610 &tree_src_tracking, *sn) != NULL) { 611 if (pf_status.debug >= LOG_NOTICE) { 612 log(LOG_NOTICE, 613 "pf: src_tree insert failed: "); 614 pf_print_host(&(*sn)->addr, 0, af); 615 addlog("\n"); 616 } 617 pool_put(&pf_src_tree_pl, *sn); 618 return (-1); 619 } 620 (*sn)->creation = getuptime(); 621 (*sn)->rule.ptr->src_nodes++; 622 if (kif != NULL) { 623 (*sn)->kif = kif; 624 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE); 625 } 626 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 627 pf_status.src_nodes++; 628 } else { 629 if (rule->max_src_states && 630 (*sn)->states >= rule->max_src_states) { 631 pf_status.lcounters[LCNT_SRCSTATES]++; 632 return (-1); 633 } 634 } 635 return (0); 636 } 637 638 void 639 pf_remove_src_node(struct pf_src_node *sn) 640 { 641 if (sn->states > 0 || sn->expire > getuptime()) 642 return; 643 644 sn->rule.ptr->src_nodes--; 645 if (sn->rule.ptr->states_cur == 0 && 646 sn->rule.ptr->src_nodes == 0) 647 pf_rm_rule(NULL, sn->rule.ptr); 648 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 649 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 650 pf_status.src_nodes--; 651 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE); 652 pool_put(&pf_src_tree_pl, sn); 653 } 654 655 struct pf_src_node * 656 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 657 { 658 struct pf_sn_item *sni; 659 660 SLIST_FOREACH(sni, &s->src_nodes, next) 661 if (sni->sn->type == type) 662 return (sni->sn); 663 return (NULL); 664 } 665 666 void 667 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 668 { 669 struct pf_sn_item *sni, *snin, *snip = NULL; 670 671 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 672 snin = SLIST_NEXT(sni, next); 673 if (sni->sn == sn) { 674 if (snip) 675 SLIST_REMOVE_AFTER(snip, next); 676 else 677 SLIST_REMOVE_HEAD(&s->src_nodes, next); 678 pool_put(&pf_sn_item_pl, sni); 679 sni = NULL; 680 sn->states--; 681 } 682 if (sni != NULL) 683 snip = sni; 684 } 685 } 686 687 /* state table stuff */ 688 689 static __inline int 690 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 691 { 692 int diff; 693 694 if ((diff = a->proto - b->proto) != 0) 695 return (diff); 696 if ((diff = a->af - b->af) != 0) 697 return (diff); 698 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 699 return (diff); 700 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 701 return (diff); 702 if ((diff = a->port[0] - b->port[0]) != 0) 703 return (diff); 704 if ((diff = a->port[1] - b->port[1]) != 0) 705 return (diff); 706 if ((diff = a->rdomain - b->rdomain) != 0) 707 return (diff); 708 return (0); 709 } 710 711 static __inline int 712 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 713 { 714 if (a->id > b->id) 715 return (1); 716 if (a->id < b->id) 717 return (-1); 718 if (a->creatorid > b->creatorid) 719 return (1); 720 if (a->creatorid < b->creatorid) 721 return (-1); 722 723 return (0); 724 } 725 726 int 727 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 728 { 729 struct pf_state_item *si; 730 struct pf_state_key *cur; 731 struct pf_state *olds = NULL; 732 733 KASSERT(s->key[idx] == NULL); 734 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 735 /* key exists. check for same kif, if none, add to key */ 736 TAILQ_FOREACH(si, &cur->states, entry) 737 if (si->s->kif == s->kif && 738 ((si->s->key[PF_SK_WIRE]->af == sk->af && 739 si->s->direction == s->direction) || 740 (si->s->key[PF_SK_WIRE]->af != 741 si->s->key[PF_SK_STACK]->af && 742 sk->af == si->s->key[PF_SK_STACK]->af && 743 si->s->direction != s->direction))) { 744 int reuse = 0; 745 746 if (sk->proto == IPPROTO_TCP && 747 si->s->src.state >= TCPS_FIN_WAIT_2 && 748 si->s->dst.state >= TCPS_FIN_WAIT_2) 749 reuse = 1; 750 if (pf_status.debug >= LOG_NOTICE) { 751 log(LOG_NOTICE, 752 "pf: %s key attach %s on %s: ", 753 (idx == PF_SK_WIRE) ? 754 "wire" : "stack", 755 reuse ? "reuse" : "failed", 756 s->kif->pfik_name); 757 pf_print_state_parts(s, 758 (idx == PF_SK_WIRE) ? sk : NULL, 759 (idx == PF_SK_STACK) ? sk : NULL); 760 addlog(", existing: "); 761 pf_print_state_parts(si->s, 762 (idx == PF_SK_WIRE) ? sk : NULL, 763 (idx == PF_SK_STACK) ? sk : NULL); 764 addlog("\n"); 765 } 766 if (reuse) { 767 pf_set_protostate(si->s, PF_PEER_BOTH, 768 TCPS_CLOSED); 769 /* remove late or sks can go away */ 770 olds = si->s; 771 } else { 772 pool_put(&pf_state_key_pl, sk); 773 return (-1); /* collision! */ 774 } 775 } 776 pool_put(&pf_state_key_pl, sk); 777 s->key[idx] = cur; 778 } else 779 s->key[idx] = sk; 780 781 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 782 pf_state_key_detach(s, idx); 783 return (-1); 784 } 785 si->s = s; 786 787 /* list is sorted, if-bound states before floating */ 788 if (s->kif == pfi_all) 789 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 790 else 791 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 792 793 if (olds) 794 pf_remove_state(olds); 795 796 return (0); 797 } 798 799 void 800 pf_detach_state(struct pf_state *s) 801 { 802 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 803 s->key[PF_SK_WIRE] = NULL; 804 805 if (s->key[PF_SK_STACK] != NULL) 806 pf_state_key_detach(s, PF_SK_STACK); 807 808 if (s->key[PF_SK_WIRE] != NULL) 809 pf_state_key_detach(s, PF_SK_WIRE); 810 } 811 812 void 813 pf_state_key_detach(struct pf_state *s, int idx) 814 { 815 struct pf_state_item *si; 816 struct pf_state_key *sk; 817 818 if (s->key[idx] == NULL) 819 return; 820 821 si = TAILQ_FIRST(&s->key[idx]->states); 822 while (si && si->s != s) 823 si = TAILQ_NEXT(si, entry); 824 825 if (si) { 826 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 827 pool_put(&pf_state_item_pl, si); 828 } 829 830 sk = s->key[idx]; 831 s->key[idx] = NULL; 832 if (TAILQ_EMPTY(&sk->states)) { 833 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 834 sk->removed = 1; 835 pf_state_key_unlink_reverse(sk); 836 pf_state_key_unlink_inpcb(sk); 837 pf_state_key_unref(sk); 838 } 839 } 840 841 struct pf_state_key * 842 pf_alloc_state_key(int pool_flags) 843 { 844 struct pf_state_key *sk; 845 846 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 847 return (NULL); 848 TAILQ_INIT(&sk->states); 849 850 return (sk); 851 } 852 853 static __inline int 854 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 855 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 856 { 857 struct pf_state_key_cmp *key = arg; 858 #ifdef INET6 859 struct pf_addr *target; 860 861 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 862 goto copy; 863 864 switch (pd->hdr.icmp6.icmp6_type) { 865 case ND_NEIGHBOR_SOLICIT: 866 if (multi) 867 return (-1); 868 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 869 daddr = target; 870 break; 871 case ND_NEIGHBOR_ADVERT: 872 if (multi) 873 return (-1); 874 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 875 saddr = target; 876 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 877 key->addr[didx].addr32[0] = 0; 878 key->addr[didx].addr32[1] = 0; 879 key->addr[didx].addr32[2] = 0; 880 key->addr[didx].addr32[3] = 0; 881 daddr = NULL; /* overwritten */ 882 } 883 break; 884 default: 885 if (multi) { 886 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 887 key->addr[sidx].addr32[1] = 0; 888 key->addr[sidx].addr32[2] = 0; 889 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 890 saddr = NULL; /* overwritten */ 891 } 892 } 893 copy: 894 #endif /* INET6 */ 895 if (saddr) 896 pf_addrcpy(&key->addr[sidx], saddr, af); 897 if (daddr) 898 pf_addrcpy(&key->addr[didx], daddr, af); 899 900 return (0); 901 } 902 903 int 904 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 905 struct pf_state_key **sks, int rtableid) 906 { 907 /* if returning error we MUST pool_put state keys ourselves */ 908 struct pf_state_key *sk1, *sk2; 909 u_int wrdom = pd->rdomain; 910 int afto = pd->af != pd->naf; 911 912 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 913 return (ENOMEM); 914 915 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 916 pd->af, 0); 917 sk1->port[pd->sidx] = pd->osport; 918 sk1->port[pd->didx] = pd->odport; 919 sk1->proto = pd->proto; 920 sk1->af = pd->af; 921 sk1->rdomain = pd->rdomain; 922 PF_REF_INIT(sk1->refcnt); 923 sk1->removed = 0; 924 if (rtableid >= 0) 925 wrdom = rtable_l2(rtableid); 926 927 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 928 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 929 pd->nsport != pd->osport || pd->ndport != pd->odport || 930 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 931 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 932 pool_put(&pf_state_key_pl, sk1); 933 return (ENOMEM); 934 } 935 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 936 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 937 pd->naf, 0); 938 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 939 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 940 if (afto) { 941 switch (pd->proto) { 942 case IPPROTO_ICMP: 943 sk2->proto = IPPROTO_ICMPV6; 944 break; 945 case IPPROTO_ICMPV6: 946 sk2->proto = IPPROTO_ICMP; 947 break; 948 default: 949 sk2->proto = pd->proto; 950 } 951 } else 952 sk2->proto = pd->proto; 953 sk2->af = pd->naf; 954 sk2->rdomain = wrdom; 955 PF_REF_INIT(sk2->refcnt); 956 sk2->removed = 0; 957 } else 958 sk2 = sk1; 959 960 if (pd->dir == PF_IN) { 961 *skw = sk1; 962 *sks = sk2; 963 } else { 964 *sks = sk1; 965 *skw = sk2; 966 } 967 968 if (pf_status.debug >= LOG_DEBUG) { 969 log(LOG_DEBUG, "pf: key setup: "); 970 pf_print_state_parts(NULL, *skw, *sks); 971 addlog("\n"); 972 } 973 974 return (0); 975 } 976 977 int 978 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, 979 struct pf_state_key **sks, struct pf_state *s) 980 { 981 PF_ASSERT_LOCKED(); 982 983 s->kif = kif; 984 PF_STATE_ENTER_WRITE(); 985 if (*skw == *sks) { 986 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 987 PF_STATE_EXIT_WRITE(); 988 return (-1); 989 } 990 *skw = *sks = s->key[PF_SK_WIRE]; 991 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 992 } else { 993 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 994 pool_put(&pf_state_key_pl, *sks); 995 PF_STATE_EXIT_WRITE(); 996 return (-1); 997 } 998 *skw = s->key[PF_SK_WIRE]; 999 if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { 1000 pf_state_key_detach(s, PF_SK_WIRE); 1001 PF_STATE_EXIT_WRITE(); 1002 return (-1); 1003 } 1004 *sks = s->key[PF_SK_STACK]; 1005 } 1006 1007 if (s->id == 0 && s->creatorid == 0) { 1008 s->id = htobe64(pf_status.stateid++); 1009 s->creatorid = pf_status.hostid; 1010 } 1011 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 1012 if (pf_status.debug >= LOG_NOTICE) { 1013 log(LOG_NOTICE, "pf: state insert failed: " 1014 "id: %016llx creatorid: %08x", 1015 betoh64(s->id), ntohl(s->creatorid)); 1016 addlog("\n"); 1017 } 1018 pf_detach_state(s); 1019 PF_STATE_EXIT_WRITE(); 1020 return (-1); 1021 } 1022 pf_state_list_insert(&pf_state_list, s); 1023 pf_status.fcounters[FCNT_STATE_INSERT]++; 1024 pf_status.states++; 1025 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 1026 PF_STATE_EXIT_WRITE(); 1027 #if NPFSYNC > 0 1028 pfsync_insert_state(s); 1029 #endif /* NPFSYNC > 0 */ 1030 return (0); 1031 } 1032 1033 struct pf_state * 1034 pf_find_state_byid(struct pf_state_cmp *key) 1035 { 1036 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1037 1038 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 1039 } 1040 1041 int 1042 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 1043 struct pfi_kif *kif, u_int dir) 1044 { 1045 /* a (from hdr) and b (new) must be exact opposites of each other */ 1046 if (a->af == b->af && a->proto == b->proto && 1047 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1048 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1049 a->port[0] == b->port[1] && 1050 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1051 return (0); 1052 else { 1053 /* mismatch. must not happen. */ 1054 if (pf_status.debug >= LOG_ERR) { 1055 log(LOG_ERR, 1056 "pf: state key linking mismatch! dir=%s, " 1057 "if=%s, stored af=%u, a0: ", 1058 dir == PF_OUT ? "OUT" : "IN", 1059 kif->pfik_name, a->af); 1060 pf_print_host(&a->addr[0], a->port[0], a->af); 1061 addlog(", a1: "); 1062 pf_print_host(&a->addr[1], a->port[1], a->af); 1063 addlog(", proto=%u", a->proto); 1064 addlog(", found af=%u, a0: ", b->af); 1065 pf_print_host(&b->addr[0], b->port[0], b->af); 1066 addlog(", a1: "); 1067 pf_print_host(&b->addr[1], b->port[1], b->af); 1068 addlog(", proto=%u", b->proto); 1069 addlog("\n"); 1070 } 1071 return (-1); 1072 } 1073 } 1074 1075 int 1076 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1077 struct pf_state **state) 1078 { 1079 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1080 struct pf_state_item *si; 1081 struct pf_state *s = NULL; 1082 1083 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1084 if (pf_status.debug >= LOG_DEBUG) { 1085 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1086 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1087 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1088 addlog("\n"); 1089 } 1090 1091 inp_sk = NULL; 1092 pkt_sk = NULL; 1093 sk = NULL; 1094 if (pd->dir == PF_OUT) { 1095 /* first if block deals with outbound forwarded packet */ 1096 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1097 1098 if (!pf_state_key_isvalid(pkt_sk)) { 1099 pf_mbuf_unlink_state_key(pd->m); 1100 pkt_sk = NULL; 1101 } 1102 1103 if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse)) 1104 sk = pkt_sk->reverse; 1105 1106 if (pkt_sk == NULL) { 1107 /* here we deal with local outbound packet */ 1108 if (pd->m->m_pkthdr.pf.inp != NULL) { 1109 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk; 1110 if (pf_state_key_isvalid(inp_sk)) 1111 sk = inp_sk; 1112 else 1113 pf_inpcb_unlink_state_key( 1114 pd->m->m_pkthdr.pf.inp); 1115 } 1116 } 1117 } 1118 1119 if (sk == NULL) { 1120 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1121 (struct pf_state_key *)key)) == NULL) 1122 return (PF_DROP); 1123 if (pd->dir == PF_OUT && pkt_sk && 1124 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1125 pf_state_key_link_reverse(sk, pkt_sk); 1126 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp && 1127 !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->inp) 1128 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1129 } 1130 1131 /* remove firewall data from outbound packet */ 1132 if (pd->dir == PF_OUT) 1133 pf_pkt_addr_changed(pd->m); 1134 1135 /* list is sorted, if-bound states before floating ones */ 1136 TAILQ_FOREACH(si, &sk->states, entry) 1137 if (si->s->timeout != PFTM_PURGE && 1138 (si->s->kif == pfi_all || si->s->kif == pd->kif) && 1139 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1140 && sk == (pd->dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1141 si->s->key[PF_SK_STACK])) || 1142 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1143 && pd->dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1144 sk == si->s->key[PF_SK_WIRE])))) { 1145 s = si->s; 1146 break; 1147 } 1148 1149 if (s == NULL) 1150 return (PF_DROP); 1151 1152 if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) { 1153 pf_add_threshold(&s->rule.ptr->pktrate); 1154 if (pf_check_threshold(&s->rule.ptr->pktrate)) 1155 return (PF_DROP); 1156 } 1157 1158 *state = s; 1159 1160 return (PF_MATCH); 1161 } 1162 1163 struct pf_state * 1164 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1165 { 1166 struct pf_state_key *sk; 1167 struct pf_state_item *si, *ret = NULL; 1168 1169 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1170 1171 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1172 1173 if (sk != NULL) { 1174 TAILQ_FOREACH(si, &sk->states, entry) 1175 if (dir == PF_INOUT || 1176 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1177 si->s->key[PF_SK_STACK]))) { 1178 if (more == NULL) 1179 return (si->s); 1180 1181 if (ret) 1182 (*more)++; 1183 else 1184 ret = si; 1185 } 1186 } 1187 return (ret ? ret->s : NULL); 1188 } 1189 1190 void 1191 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1192 { 1193 int32_t expire; 1194 1195 memset(sp, 0, sizeof(struct pfsync_state)); 1196 1197 /* copy from state key */ 1198 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1199 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1200 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1201 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1202 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1203 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1204 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1205 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1206 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1207 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1208 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1209 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1210 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1211 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1212 sp->proto = st->key[PF_SK_WIRE]->proto; 1213 sp->af = st->key[PF_SK_WIRE]->af; 1214 1215 /* copy from state */ 1216 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1217 sp->rt = st->rt; 1218 sp->rt_addr = st->rt_addr; 1219 sp->creation = htonl(getuptime() - st->creation); 1220 expire = pf_state_expires(st, st->timeout); 1221 if (expire <= getuptime()) 1222 sp->expire = htonl(0); 1223 else 1224 sp->expire = htonl(expire - getuptime()); 1225 1226 sp->direction = st->direction; 1227 #if NPFLOG > 0 1228 sp->log = st->log; 1229 #endif /* NPFLOG > 0 */ 1230 sp->timeout = st->timeout; 1231 sp->state_flags = htons(st->state_flags); 1232 if (!SLIST_EMPTY(&st->src_nodes)) 1233 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1234 1235 sp->id = st->id; 1236 sp->creatorid = st->creatorid; 1237 pf_state_peer_hton(&st->src, &sp->src); 1238 pf_state_peer_hton(&st->dst, &sp->dst); 1239 1240 if (st->rule.ptr == NULL) 1241 sp->rule = htonl(-1); 1242 else 1243 sp->rule = htonl(st->rule.ptr->nr); 1244 if (st->anchor.ptr == NULL) 1245 sp->anchor = htonl(-1); 1246 else 1247 sp->anchor = htonl(st->anchor.ptr->nr); 1248 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1249 1250 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1251 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1252 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1253 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1254 1255 sp->max_mss = htons(st->max_mss); 1256 sp->min_ttl = st->min_ttl; 1257 sp->set_tos = st->set_tos; 1258 sp->set_prio[0] = st->set_prio[0]; 1259 sp->set_prio[1] = st->set_prio[1]; 1260 } 1261 1262 /* END state table stuff */ 1263 1264 void 1265 pf_purge_expired_rules(void) 1266 { 1267 struct pf_rule *r; 1268 1269 PF_ASSERT_LOCKED(); 1270 1271 if (SLIST_EMPTY(&pf_rule_gcl)) 1272 return; 1273 1274 while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { 1275 SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); 1276 KASSERT(r->rule_flag & PFRULE_EXPIRED); 1277 pf_purge_rule(r); 1278 } 1279 } 1280 1281 void 1282 pf_purge_timeout(void *unused) 1283 { 1284 /* XXX move to systqmp to avoid KERNEL_LOCK */ 1285 task_add(systq, &pf_purge_task); 1286 } 1287 1288 void 1289 pf_purge(void *xnloops) 1290 { 1291 int *nloops = xnloops; 1292 1293 /* 1294 * process a fraction of the state table every second 1295 * Note: 1296 * we no longer need PF_LOCK() here, because 1297 * pf_purge_expired_states() uses pf_state_lock to maintain 1298 * consistency. 1299 */ 1300 if (pf_default_rule.timeout[PFTM_INTERVAL] > 0) 1301 pf_purge_expired_states(1 + (pf_status.states 1302 / pf_default_rule.timeout[PFTM_INTERVAL])); 1303 1304 NET_LOCK(); 1305 1306 PF_LOCK(); 1307 /* purge other expired types every PFTM_INTERVAL seconds */ 1308 if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1309 pf_purge_expired_src_nodes(); 1310 pf_purge_expired_rules(); 1311 } 1312 PF_UNLOCK(); 1313 1314 /* 1315 * Fragments don't require PF_LOCK(), they use their own lock. 1316 */ 1317 if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1318 pf_purge_expired_fragments(); 1319 *nloops = 0; 1320 } 1321 NET_UNLOCK(); 1322 1323 timeout_add_sec(&pf_purge_to, 1); 1324 } 1325 1326 int32_t 1327 pf_state_expires(const struct pf_state *state, uint8_t stimeout) 1328 { 1329 u_int32_t timeout; 1330 u_int32_t start; 1331 u_int32_t end; 1332 u_int32_t states; 1333 1334 /* 1335 * pf_state_expires is used by the state purge task to 1336 * decide if a state is a candidate for cleanup, and by the 1337 * pfsync state export code to populate an expiry time. 1338 * 1339 * this function may be called by the state purge task while 1340 * the state is being modified. avoid inconsistent reads of 1341 * state->timeout by having the caller do the read (and any 1342 * checks it needs to do on the same variable) and then pass 1343 * their view of the timeout in here for this function to use. 1344 * the only consequence of using a stale timeout value is 1345 * that the state won't be a candidate for purging until the 1346 * next pass of the purge task. 1347 */ 1348 1349 /* handle all PFTM_* > PFTM_MAX here */ 1350 if (stimeout == PFTM_PURGE) 1351 return (0); 1352 1353 KASSERT(stimeout != PFTM_UNLINKED); 1354 KASSERT(stimeout < PFTM_MAX); 1355 1356 timeout = state->rule.ptr->timeout[stimeout]; 1357 if (!timeout) 1358 timeout = pf_default_rule.timeout[stimeout]; 1359 1360 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1361 if (start) { 1362 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1363 states = state->rule.ptr->states_cur; 1364 } else { 1365 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1366 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1367 states = pf_status.states; 1368 } 1369 if (end && states > start && start < end) { 1370 if (states >= end) 1371 return (0); 1372 1373 timeout = (u_int64_t)timeout * (end - states) / (end - start); 1374 } 1375 1376 return (state->expire + timeout); 1377 } 1378 1379 void 1380 pf_purge_expired_src_nodes(void) 1381 { 1382 struct pf_src_node *cur, *next; 1383 1384 PF_ASSERT_LOCKED(); 1385 1386 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1387 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1388 1389 if (cur->states == 0 && cur->expire <= getuptime()) { 1390 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1391 pf_remove_src_node(cur); 1392 } 1393 } 1394 } 1395 1396 void 1397 pf_src_tree_remove_state(struct pf_state *s) 1398 { 1399 u_int32_t timeout; 1400 struct pf_sn_item *sni; 1401 1402 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1403 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1404 if (s->src.tcp_est) 1405 --sni->sn->conn; 1406 if (--sni->sn->states == 0) { 1407 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1408 if (!timeout) 1409 timeout = 1410 pf_default_rule.timeout[PFTM_SRC_NODE]; 1411 sni->sn->expire = getuptime() + timeout; 1412 } 1413 pool_put(&pf_sn_item_pl, sni); 1414 } 1415 } 1416 1417 void 1418 pf_remove_state(struct pf_state *cur) 1419 { 1420 PF_ASSERT_LOCKED(); 1421 1422 /* handle load balancing related tasks */ 1423 pf_postprocess_addr(cur); 1424 1425 if (cur->src.state == PF_TCPS_PROXY_DST) { 1426 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1427 &cur->key[PF_SK_WIRE]->addr[1], 1428 &cur->key[PF_SK_WIRE]->addr[0], 1429 cur->key[PF_SK_WIRE]->port[1], 1430 cur->key[PF_SK_WIRE]->port[0], 1431 cur->src.seqhi, cur->src.seqlo + 1, 1432 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1433 cur->key[PF_SK_WIRE]->rdomain); 1434 } 1435 if (cur->key[PF_SK_STACK]->proto == IPPROTO_TCP) 1436 pf_set_protostate(cur, PF_PEER_BOTH, TCPS_CLOSED); 1437 1438 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1439 #if NPFLOW > 0 1440 if (cur->state_flags & PFSTATE_PFLOW) 1441 export_pflow(cur); 1442 #endif /* NPFLOW > 0 */ 1443 #if NPFSYNC > 0 1444 pfsync_delete_state(cur); 1445 #endif /* NPFSYNC > 0 */ 1446 cur->timeout = PFTM_UNLINKED; 1447 pf_src_tree_remove_state(cur); 1448 pf_detach_state(cur); 1449 } 1450 1451 void 1452 pf_remove_divert_state(struct pf_state_key *sk) 1453 { 1454 struct pf_state_item *si; 1455 1456 PF_ASSERT_UNLOCKED(); 1457 1458 PF_LOCK(); 1459 PF_STATE_ENTER_WRITE(); 1460 TAILQ_FOREACH(si, &sk->states, entry) { 1461 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 1462 (si->s->rule.ptr->divert.type == PF_DIVERT_TO || 1463 si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 1464 pf_remove_state(si->s); 1465 break; 1466 } 1467 } 1468 PF_STATE_EXIT_WRITE(); 1469 PF_UNLOCK(); 1470 } 1471 1472 void 1473 pf_free_state(struct pf_state *cur) 1474 { 1475 struct pf_rule_item *ri; 1476 1477 PF_ASSERT_LOCKED(); 1478 1479 #if NPFSYNC > 0 1480 if (pfsync_state_in_use(cur)) 1481 return; 1482 #endif /* NPFSYNC > 0 */ 1483 KASSERT(cur->timeout == PFTM_UNLINKED); 1484 if (--cur->rule.ptr->states_cur == 0 && 1485 cur->rule.ptr->src_nodes == 0) 1486 pf_rm_rule(NULL, cur->rule.ptr); 1487 if (cur->anchor.ptr != NULL) 1488 if (--cur->anchor.ptr->states_cur == 0) 1489 pf_rm_rule(NULL, cur->anchor.ptr); 1490 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1491 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1492 if (--ri->r->states_cur == 0 && 1493 ri->r->src_nodes == 0) 1494 pf_rm_rule(NULL, ri->r); 1495 pool_put(&pf_rule_item_pl, ri); 1496 } 1497 pf_normalize_tcp_cleanup(cur); 1498 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1499 pf_state_list_remove(&pf_state_list, cur); 1500 if (cur->tag) 1501 pf_tag_unref(cur->tag); 1502 pf_state_unref(cur); 1503 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1504 pf_status.states--; 1505 } 1506 1507 void 1508 pf_purge_expired_states(u_int32_t maxcheck) 1509 { 1510 /* 1511 * this task/thread/context/whatever is the only thing that 1512 * removes states from the pf_state_list, so the cur reference 1513 * it holds between calls is guaranteed to still be in the 1514 * list. 1515 */ 1516 static struct pf_state *cur = NULL; 1517 1518 struct pf_state *head, *tail; 1519 struct pf_state *st; 1520 SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl); 1521 time_t now; 1522 1523 PF_ASSERT_UNLOCKED(); 1524 1525 rw_enter_read(&pf_state_list.pfs_rwl); 1526 1527 mtx_enter(&pf_state_list.pfs_mtx); 1528 head = TAILQ_FIRST(&pf_state_list.pfs_list); 1529 tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 1530 mtx_leave(&pf_state_list.pfs_mtx); 1531 1532 if (head == NULL) { 1533 /* the list is empty */ 1534 rw_exit_read(&pf_state_list.pfs_rwl); 1535 return; 1536 } 1537 1538 /* (re)start at the front of the list */ 1539 if (cur == NULL) 1540 cur = head; 1541 1542 now = getuptime(); 1543 1544 do { 1545 uint8_t stimeout = cur->timeout; 1546 1547 if ((stimeout == PFTM_UNLINKED) || 1548 (pf_state_expires(cur, stimeout) <= now)) { 1549 st = pf_state_ref(cur); 1550 SLIST_INSERT_HEAD(&gcl, st, gc_list); 1551 } 1552 1553 /* don't iterate past the end of our view of the list */ 1554 if (cur == tail) { 1555 cur = NULL; 1556 break; 1557 } 1558 1559 cur = TAILQ_NEXT(cur, entry_list); 1560 } while (maxcheck--); 1561 1562 rw_exit_read(&pf_state_list.pfs_rwl); 1563 1564 if (SLIST_EMPTY(&gcl)) 1565 return; 1566 1567 NET_LOCK(); 1568 rw_enter_write(&pf_state_list.pfs_rwl); 1569 PF_LOCK(); 1570 PF_STATE_ENTER_WRITE(); 1571 SLIST_FOREACH(st, &gcl, gc_list) { 1572 if (st->timeout != PFTM_UNLINKED) 1573 pf_remove_state(st); 1574 1575 pf_free_state(st); 1576 } 1577 PF_STATE_EXIT_WRITE(); 1578 PF_UNLOCK(); 1579 rw_exit_write(&pf_state_list.pfs_rwl); 1580 NET_UNLOCK(); 1581 1582 while ((st = SLIST_FIRST(&gcl)) != NULL) { 1583 SLIST_REMOVE_HEAD(&gcl, gc_list); 1584 pf_state_unref(st); 1585 } 1586 } 1587 1588 int 1589 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw, int wait) 1590 { 1591 if (aw->type != PF_ADDR_TABLE) 1592 return (0); 1593 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, wait)) == NULL) 1594 return (1); 1595 return (0); 1596 } 1597 1598 void 1599 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1600 { 1601 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1602 return; 1603 pfr_detach_table(aw->p.tbl); 1604 aw->p.tbl = NULL; 1605 } 1606 1607 void 1608 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1609 { 1610 struct pfr_ktable *kt = aw->p.tbl; 1611 1612 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1613 return; 1614 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1615 kt = kt->pfrkt_root; 1616 aw->p.tbl = NULL; 1617 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1618 kt->pfrkt_cnt : -1; 1619 } 1620 1621 void 1622 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1623 { 1624 switch (af) { 1625 case AF_INET: { 1626 u_int32_t a = ntohl(addr->addr32[0]); 1627 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1628 (a>>8)&255, a&255); 1629 if (p) { 1630 p = ntohs(p); 1631 addlog(":%u", p); 1632 } 1633 break; 1634 } 1635 #ifdef INET6 1636 case AF_INET6: { 1637 u_int16_t b; 1638 u_int8_t i, curstart, curend, maxstart, maxend; 1639 curstart = curend = maxstart = maxend = 255; 1640 for (i = 0; i < 8; i++) { 1641 if (!addr->addr16[i]) { 1642 if (curstart == 255) 1643 curstart = i; 1644 curend = i; 1645 } else { 1646 if ((curend - curstart) > 1647 (maxend - maxstart)) { 1648 maxstart = curstart; 1649 maxend = curend; 1650 } 1651 curstart = curend = 255; 1652 } 1653 } 1654 if ((curend - curstart) > 1655 (maxend - maxstart)) { 1656 maxstart = curstart; 1657 maxend = curend; 1658 } 1659 for (i = 0; i < 8; i++) { 1660 if (i >= maxstart && i <= maxend) { 1661 if (i == 0) 1662 addlog(":"); 1663 if (i == maxend) 1664 addlog(":"); 1665 } else { 1666 b = ntohs(addr->addr16[i]); 1667 addlog("%x", b); 1668 if (i < 7) 1669 addlog(":"); 1670 } 1671 } 1672 if (p) { 1673 p = ntohs(p); 1674 addlog("[%u]", p); 1675 } 1676 break; 1677 } 1678 #endif /* INET6 */ 1679 } 1680 } 1681 1682 void 1683 pf_print_state(struct pf_state *s) 1684 { 1685 pf_print_state_parts(s, NULL, NULL); 1686 } 1687 1688 void 1689 pf_print_state_parts(struct pf_state *s, 1690 struct pf_state_key *skwp, struct pf_state_key *sksp) 1691 { 1692 struct pf_state_key *skw, *sks; 1693 u_int8_t proto, dir; 1694 1695 /* Do our best to fill these, but they're skipped if NULL */ 1696 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1697 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1698 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1699 dir = s ? s->direction : 0; 1700 1701 switch (proto) { 1702 case IPPROTO_IPV4: 1703 addlog("IPv4"); 1704 break; 1705 case IPPROTO_IPV6: 1706 addlog("IPv6"); 1707 break; 1708 case IPPROTO_TCP: 1709 addlog("TCP"); 1710 break; 1711 case IPPROTO_UDP: 1712 addlog("UDP"); 1713 break; 1714 case IPPROTO_ICMP: 1715 addlog("ICMP"); 1716 break; 1717 case IPPROTO_ICMPV6: 1718 addlog("ICMPv6"); 1719 break; 1720 default: 1721 addlog("%u", proto); 1722 break; 1723 } 1724 switch (dir) { 1725 case PF_IN: 1726 addlog(" in"); 1727 break; 1728 case PF_OUT: 1729 addlog(" out"); 1730 break; 1731 } 1732 if (skw) { 1733 addlog(" wire: (%d) ", skw->rdomain); 1734 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1735 addlog(" "); 1736 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1737 } 1738 if (sks) { 1739 addlog(" stack: (%d) ", sks->rdomain); 1740 if (sks != skw) { 1741 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1742 addlog(" "); 1743 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1744 } else 1745 addlog("-"); 1746 } 1747 if (s) { 1748 if (proto == IPPROTO_TCP) { 1749 addlog(" [lo=%u high=%u win=%u modulator=%u", 1750 s->src.seqlo, s->src.seqhi, 1751 s->src.max_win, s->src.seqdiff); 1752 if (s->src.wscale && s->dst.wscale) 1753 addlog(" wscale=%u", 1754 s->src.wscale & PF_WSCALE_MASK); 1755 addlog("]"); 1756 addlog(" [lo=%u high=%u win=%u modulator=%u", 1757 s->dst.seqlo, s->dst.seqhi, 1758 s->dst.max_win, s->dst.seqdiff); 1759 if (s->src.wscale && s->dst.wscale) 1760 addlog(" wscale=%u", 1761 s->dst.wscale & PF_WSCALE_MASK); 1762 addlog("]"); 1763 } 1764 addlog(" %u:%u", s->src.state, s->dst.state); 1765 if (s->rule.ptr) 1766 addlog(" @%d", s->rule.ptr->nr); 1767 } 1768 } 1769 1770 void 1771 pf_print_flags(u_int8_t f) 1772 { 1773 if (f) 1774 addlog(" "); 1775 if (f & TH_FIN) 1776 addlog("F"); 1777 if (f & TH_SYN) 1778 addlog("S"); 1779 if (f & TH_RST) 1780 addlog("R"); 1781 if (f & TH_PUSH) 1782 addlog("P"); 1783 if (f & TH_ACK) 1784 addlog("A"); 1785 if (f & TH_URG) 1786 addlog("U"); 1787 if (f & TH_ECE) 1788 addlog("E"); 1789 if (f & TH_CWR) 1790 addlog("W"); 1791 } 1792 1793 #define PF_SET_SKIP_STEPS(i) \ 1794 do { \ 1795 while (head[i] != cur) { \ 1796 head[i]->skip[i].ptr = cur; \ 1797 head[i] = TAILQ_NEXT(head[i], entries); \ 1798 } \ 1799 } while (0) 1800 1801 void 1802 pf_calc_skip_steps(struct pf_rulequeue *rules) 1803 { 1804 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1805 int i; 1806 1807 cur = TAILQ_FIRST(rules); 1808 prev = cur; 1809 for (i = 0; i < PF_SKIP_COUNT; ++i) 1810 head[i] = cur; 1811 while (cur != NULL) { 1812 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1813 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1814 if (cur->direction != prev->direction) 1815 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1816 if (cur->onrdomain != prev->onrdomain || 1817 cur->ifnot != prev->ifnot) 1818 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1819 if (cur->af != prev->af) 1820 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1821 if (cur->proto != prev->proto) 1822 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1823 if (cur->src.neg != prev->src.neg || 1824 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1825 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1826 if (cur->dst.neg != prev->dst.neg || 1827 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1828 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1829 if (cur->src.port[0] != prev->src.port[0] || 1830 cur->src.port[1] != prev->src.port[1] || 1831 cur->src.port_op != prev->src.port_op) 1832 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1833 if (cur->dst.port[0] != prev->dst.port[0] || 1834 cur->dst.port[1] != prev->dst.port[1] || 1835 cur->dst.port_op != prev->dst.port_op) 1836 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1837 1838 prev = cur; 1839 cur = TAILQ_NEXT(cur, entries); 1840 } 1841 for (i = 0; i < PF_SKIP_COUNT; ++i) 1842 PF_SET_SKIP_STEPS(i); 1843 } 1844 1845 int 1846 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1847 { 1848 if (aw1->type != aw2->type) 1849 return (1); 1850 switch (aw1->type) { 1851 case PF_ADDR_ADDRMASK: 1852 case PF_ADDR_RANGE: 1853 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1854 return (1); 1855 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1856 return (1); 1857 return (0); 1858 case PF_ADDR_DYNIFTL: 1859 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1860 case PF_ADDR_NONE: 1861 case PF_ADDR_NOROUTE: 1862 case PF_ADDR_URPFFAILED: 1863 return (0); 1864 case PF_ADDR_TABLE: 1865 return (aw1->p.tbl != aw2->p.tbl); 1866 case PF_ADDR_RTLABEL: 1867 return (aw1->v.rtlabel != aw2->v.rtlabel); 1868 default: 1869 addlog("invalid address type: %d\n", aw1->type); 1870 return (1); 1871 } 1872 } 1873 1874 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 1875 * emulate at most one ones-complement subtraction. This thereby limits net 1876 * carries/borrows to at most one, eliminating a reduction step and saving one 1877 * each of +, >>, & and ~. 1878 * 1879 * def. x mod y = x - (x//y)*y for integer x,y 1880 * def. sum = x mod 2^16 1881 * def. accumulator = (x >> 16) mod 2^16 1882 * 1883 * The trick works as follows: subtracting exactly one u_int16_t from the 1884 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 1885 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 1886 * ones-complement borrow: 1887 * 1888 * (sum + accumulator) mod 2^16 1889 * = { assume underflow: accumulator := 2^16 - 1 } 1890 * (sum + 2^16 - 1) mod 2^16 1891 * = { mod } 1892 * (sum - 1) mod 2^16 1893 * 1894 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 1895 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 1896 * to zero as that requires subtraction of at least 2^16, which exceeds a 1897 * single u_int16_t's range. 1898 * 1899 * We use the following theorem to derive the implementation: 1900 * 1901 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 1902 * proof. 1903 * (x + (y mod z)) mod z 1904 * = { def mod } 1905 * (x + y - (y//z)*z) mod z 1906 * = { (a + b*c) mod c = a mod c } 1907 * (x + y) mod z [end of proof] 1908 * 1909 * ... and thereby obtain: 1910 * 1911 * (sum + accumulator) mod 2^16 1912 * = { def. accumulator, def. sum } 1913 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 1914 * = { (0), twice } 1915 * (x + (x >> 16)) mod 2^16 1916 * = { x mod 2^n = x & (2^n - 1) } 1917 * (x + (x >> 16)) & 0xffff 1918 * 1919 * Note: this serves also as a reduction step for at most one add (as the 1920 * trailing mod 2^16 prevents further reductions by destroying carries). 1921 */ 1922 static __inline void 1923 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 1924 u_int8_t proto) 1925 { 1926 u_int32_t x; 1927 const int udp = proto == IPPROTO_UDP; 1928 1929 x = *cksum + was - now; 1930 x = (x + (x >> 16)) & 0xffff; 1931 1932 /* optimise: eliminate a branch when not udp */ 1933 if (udp && *cksum == 0x0000) 1934 return; 1935 if (udp && x == 0x0000) 1936 x = 0xffff; 1937 1938 *cksum = (u_int16_t)(x); 1939 } 1940 1941 #ifdef INET6 1942 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 1943 static __inline void 1944 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 1945 { 1946 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 1947 } 1948 1949 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 1950 static __inline void 1951 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 1952 { 1953 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 1954 } 1955 #endif /* INET6 */ 1956 1957 /* pre: *a is 16-bit aligned within its packet 1958 * 1959 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 1960 * machine by conserving ones-complement's otherwise discarded carries in the 1961 * upper bits of x. These accumulated carries when added to the lower 16-bits 1962 * over at least zero 'reduction' steps then complete the ones-complement sum. 1963 * 1964 * def. sum = x mod 2^16 1965 * def. accumulator = (x >> 16) 1966 * 1967 * At most two reduction steps 1968 * 1969 * x := sum + accumulator 1970 * = { def sum, def accumulator } 1971 * x := x mod 2^16 + (x >> 16) 1972 * = { x mod 2^n = x & (2^n - 1) } 1973 * x := (x & 0xffff) + (x >> 16) 1974 * 1975 * are necessary to incorporate the accumulated carries (at most one per add) 1976 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 1977 * 1978 * The function is also invariant over the endian of the host. Why? 1979 * 1980 * Define the unary transpose operator ~ on a bitstring in python slice 1981 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 1982 * 1983 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 1984 * 1985 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 1986 * 1987 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 1988 * 'half-adds'. Under ones-complement addition, each half-add carries to the 1989 * other, so the sum of each half-add is unaffected by their relative 1990 * order. Therefore: 1991 * 1992 * ~m +_1 ~n 1993 * = { half-adds invariant under transposition } 1994 * ~s 1995 * = { substitute } 1996 * ~(m +_1 n) [end of proof] 1997 * 1998 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 1999 * with the converse endian does not alter the result. 2000 * 2001 * proof. 2002 * { converse machine endian: load/store transposes, P := 8 } 2003 * ~(~m +_1 ~n) 2004 * = { ~ over +_1 } 2005 * ~~m +_1 ~~n 2006 * = { ~ is an involution } 2007 * m +_1 n [end of proof] 2008 * 2009 */ 2010 #define NEG(x) ((u_int16_t)~(x)) 2011 void 2012 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 2013 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 2014 { 2015 u_int32_t x; 2016 const u_int16_t *n = an->addr16; 2017 const u_int16_t *o = a->addr16; 2018 const int udp = proto == IPPROTO_UDP; 2019 2020 switch (af) { 2021 case AF_INET: 2022 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 2023 break; 2024 #ifdef INET6 2025 case AF_INET6: 2026 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 2027 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 2028 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 2029 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 2030 break; 2031 #endif /* INET6 */ 2032 default: 2033 unhandled_af(af); 2034 } 2035 2036 x = (x & 0xffff) + (x >> 16); 2037 x = (x & 0xffff) + (x >> 16); 2038 2039 /* optimise: eliminate a branch when not udp */ 2040 if (udp && *cksum == 0x0000) 2041 return; 2042 if (udp && x == 0x0000) 2043 x = 0xffff; 2044 2045 *cksum = (u_int16_t)(x); 2046 } 2047 2048 int 2049 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 2050 { 2051 int rewrite = 0; 2052 2053 if (*f != v) { 2054 u_int16_t old = htons(hi ? (*f << 8) : *f); 2055 u_int16_t new = htons(hi ? ( v << 8) : v); 2056 2057 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 2058 *f = v; 2059 rewrite = 1; 2060 } 2061 2062 return (rewrite); 2063 } 2064 2065 /* pre: *f is 16-bit aligned within its packet */ 2066 int 2067 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 2068 { 2069 int rewrite = 0; 2070 2071 if (*f != v) { 2072 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 2073 *f = v; 2074 rewrite = 1; 2075 } 2076 2077 return (rewrite); 2078 } 2079 2080 int 2081 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 2082 { 2083 int rewrite = 0; 2084 u_int8_t *fb = (u_int8_t*)f; 2085 u_int8_t *vb = (u_int8_t*)&v; 2086 2087 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 2088 return (pf_patch_16(pd, f, v)); /* optimise */ 2089 } 2090 2091 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2092 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2093 2094 return (rewrite); 2095 } 2096 2097 /* pre: *f is 16-bit aligned within its packet */ 2098 /* pre: pd->proto != IPPROTO_UDP */ 2099 int 2100 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2101 { 2102 int rewrite = 0; 2103 u_int16_t *pc = pd->pcksum; 2104 u_int8_t proto = pd->proto; 2105 2106 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2107 if (proto == IPPROTO_UDP) 2108 panic("%s: udp", __func__); 2109 2110 /* optimise: skip *f != v guard; true for all use-cases */ 2111 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2112 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2113 2114 *f = v; 2115 rewrite = 1; 2116 2117 return (rewrite); 2118 } 2119 2120 int 2121 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2122 { 2123 int rewrite = 0; 2124 u_int8_t *fb = (u_int8_t*)f; 2125 u_int8_t *vb = (u_int8_t*)&v; 2126 2127 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2128 return (pf_patch_32(pd, f, v)); /* optimise */ 2129 } 2130 2131 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2132 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2133 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2134 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2135 2136 return (rewrite); 2137 } 2138 2139 int 2140 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2141 u_int16_t *virtual_id, u_int16_t *virtual_type) 2142 { 2143 /* 2144 * ICMP types marked with PF_OUT are typically responses to 2145 * PF_IN, and will match states in the opposite direction. 2146 * PF_IN ICMP types need to match a state with that type. 2147 */ 2148 *icmp_dir = PF_OUT; 2149 2150 /* Queries (and responses) */ 2151 switch (pd->af) { 2152 case AF_INET: 2153 switch (type) { 2154 case ICMP_ECHO: 2155 *icmp_dir = PF_IN; 2156 /* FALLTHROUGH */ 2157 case ICMP_ECHOREPLY: 2158 *virtual_type = ICMP_ECHO; 2159 *virtual_id = pd->hdr.icmp.icmp_id; 2160 break; 2161 2162 case ICMP_TSTAMP: 2163 *icmp_dir = PF_IN; 2164 /* FALLTHROUGH */ 2165 case ICMP_TSTAMPREPLY: 2166 *virtual_type = ICMP_TSTAMP; 2167 *virtual_id = pd->hdr.icmp.icmp_id; 2168 break; 2169 2170 case ICMP_IREQ: 2171 *icmp_dir = PF_IN; 2172 /* FALLTHROUGH */ 2173 case ICMP_IREQREPLY: 2174 *virtual_type = ICMP_IREQ; 2175 *virtual_id = pd->hdr.icmp.icmp_id; 2176 break; 2177 2178 case ICMP_MASKREQ: 2179 *icmp_dir = PF_IN; 2180 /* FALLTHROUGH */ 2181 case ICMP_MASKREPLY: 2182 *virtual_type = ICMP_MASKREQ; 2183 *virtual_id = pd->hdr.icmp.icmp_id; 2184 break; 2185 2186 case ICMP_IPV6_WHEREAREYOU: 2187 *icmp_dir = PF_IN; 2188 /* FALLTHROUGH */ 2189 case ICMP_IPV6_IAMHERE: 2190 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2191 *virtual_id = 0; /* Nothing sane to match on! */ 2192 break; 2193 2194 case ICMP_MOBILE_REGREQUEST: 2195 *icmp_dir = PF_IN; 2196 /* FALLTHROUGH */ 2197 case ICMP_MOBILE_REGREPLY: 2198 *virtual_type = ICMP_MOBILE_REGREQUEST; 2199 *virtual_id = 0; /* Nothing sane to match on! */ 2200 break; 2201 2202 case ICMP_ROUTERSOLICIT: 2203 *icmp_dir = PF_IN; 2204 /* FALLTHROUGH */ 2205 case ICMP_ROUTERADVERT: 2206 *virtual_type = ICMP_ROUTERSOLICIT; 2207 *virtual_id = 0; /* Nothing sane to match on! */ 2208 break; 2209 2210 /* These ICMP types map to other connections */ 2211 case ICMP_UNREACH: 2212 case ICMP_SOURCEQUENCH: 2213 case ICMP_REDIRECT: 2214 case ICMP_TIMXCEED: 2215 case ICMP_PARAMPROB: 2216 /* These will not be used, but set them anyway */ 2217 *icmp_dir = PF_IN; 2218 *virtual_type = htons(type); 2219 *virtual_id = 0; 2220 return (1); /* These types match to another state */ 2221 2222 /* 2223 * All remaining ICMP types get their own states, 2224 * and will only match in one direction. 2225 */ 2226 default: 2227 *icmp_dir = PF_IN; 2228 *virtual_type = type; 2229 *virtual_id = 0; 2230 break; 2231 } 2232 break; 2233 #ifdef INET6 2234 case AF_INET6: 2235 switch (type) { 2236 case ICMP6_ECHO_REQUEST: 2237 *icmp_dir = PF_IN; 2238 /* FALLTHROUGH */ 2239 case ICMP6_ECHO_REPLY: 2240 *virtual_type = ICMP6_ECHO_REQUEST; 2241 *virtual_id = pd->hdr.icmp6.icmp6_id; 2242 break; 2243 2244 case MLD_LISTENER_QUERY: 2245 case MLD_LISTENER_REPORT: { 2246 struct mld_hdr *mld = &pd->hdr.mld; 2247 u_int32_t h; 2248 2249 /* 2250 * Listener Report can be sent by clients 2251 * without an associated Listener Query. 2252 * In addition to that, when Report is sent as a 2253 * reply to a Query its source and destination 2254 * address are different. 2255 */ 2256 *icmp_dir = PF_IN; 2257 *virtual_type = MLD_LISTENER_QUERY; 2258 /* generate fake id for these messages */ 2259 h = mld->mld_addr.s6_addr32[0] ^ 2260 mld->mld_addr.s6_addr32[1] ^ 2261 mld->mld_addr.s6_addr32[2] ^ 2262 mld->mld_addr.s6_addr32[3]; 2263 *virtual_id = (h >> 16) ^ (h & 0xffff); 2264 break; 2265 } 2266 2267 /* 2268 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2269 * ICMP6_WRU 2270 */ 2271 case ICMP6_WRUREQUEST: 2272 *icmp_dir = PF_IN; 2273 /* FALLTHROUGH */ 2274 case ICMP6_WRUREPLY: 2275 *virtual_type = ICMP6_WRUREQUEST; 2276 *virtual_id = 0; /* Nothing sane to match on! */ 2277 break; 2278 2279 case MLD_MTRACE: 2280 *icmp_dir = PF_IN; 2281 /* FALLTHROUGH */ 2282 case MLD_MTRACE_RESP: 2283 *virtual_type = MLD_MTRACE; 2284 *virtual_id = 0; /* Nothing sane to match on! */ 2285 break; 2286 2287 case ND_NEIGHBOR_SOLICIT: 2288 *icmp_dir = PF_IN; 2289 /* FALLTHROUGH */ 2290 case ND_NEIGHBOR_ADVERT: { 2291 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 2292 u_int32_t h; 2293 2294 *virtual_type = ND_NEIGHBOR_SOLICIT; 2295 /* generate fake id for these messages */ 2296 h = nd->nd_ns_target.s6_addr32[0] ^ 2297 nd->nd_ns_target.s6_addr32[1] ^ 2298 nd->nd_ns_target.s6_addr32[2] ^ 2299 nd->nd_ns_target.s6_addr32[3]; 2300 *virtual_id = (h >> 16) ^ (h & 0xffff); 2301 break; 2302 } 2303 2304 /* 2305 * These ICMP types map to other connections. 2306 * ND_REDIRECT can't be in this list because the triggering 2307 * packet header is optional. 2308 */ 2309 case ICMP6_DST_UNREACH: 2310 case ICMP6_PACKET_TOO_BIG: 2311 case ICMP6_TIME_EXCEEDED: 2312 case ICMP6_PARAM_PROB: 2313 /* These will not be used, but set them anyway */ 2314 *icmp_dir = PF_IN; 2315 *virtual_type = htons(type); 2316 *virtual_id = 0; 2317 return (1); /* These types match to another state */ 2318 /* 2319 * All remaining ICMP6 types get their own states, 2320 * and will only match in one direction. 2321 */ 2322 default: 2323 *icmp_dir = PF_IN; 2324 *virtual_type = type; 2325 *virtual_id = 0; 2326 break; 2327 } 2328 break; 2329 #endif /* INET6 */ 2330 } 2331 *virtual_type = htons(*virtual_type); 2332 return (0); /* These types match to their own state */ 2333 } 2334 2335 void 2336 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2337 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2338 { 2339 /* note: doesn't trouble to fixup quoted checksums, if any */ 2340 2341 /* change quoted protocol port */ 2342 if (qp != NULL) 2343 pf_patch_16(pd, qp, np); 2344 2345 /* change quoted ip address */ 2346 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2347 pf_addrcpy(qa, na, pd->af); 2348 2349 /* change network-header's ip address */ 2350 if (oa) 2351 pf_translate_a(pd, oa, na); 2352 } 2353 2354 /* pre: *a is 16-bit aligned within its packet */ 2355 /* *a is a network header src/dst address */ 2356 int 2357 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2358 { 2359 int rewrite = 0; 2360 2361 /* warning: !PF_ANEQ != PF_AEQ */ 2362 if (!PF_ANEQ(a, an, pd->af)) 2363 return (0); 2364 2365 /* fixup transport pseudo-header, if any */ 2366 switch (pd->proto) { 2367 case IPPROTO_TCP: /* FALLTHROUGH */ 2368 case IPPROTO_UDP: /* FALLTHROUGH */ 2369 case IPPROTO_ICMPV6: 2370 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2371 break; 2372 default: 2373 break; /* assume no pseudo-header */ 2374 } 2375 2376 pf_addrcpy(a, an, pd->af); 2377 rewrite = 1; 2378 2379 return (rewrite); 2380 } 2381 2382 #ifdef INET6 2383 /* pf_translate_af() may change pd->m, adjust local copies after calling */ 2384 int 2385 pf_translate_af(struct pf_pdesc *pd) 2386 { 2387 static const struct pf_addr zero; 2388 struct ip *ip4; 2389 struct ip6_hdr *ip6; 2390 int copyback = 0; 2391 u_int hlen, ohlen, dlen; 2392 u_int16_t *pc; 2393 u_int8_t af_proto, naf_proto; 2394 2395 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2396 ohlen = pd->off; 2397 dlen = pd->tot_len - pd->off; 2398 pc = pd->pcksum; 2399 2400 af_proto = naf_proto = pd->proto; 2401 if (naf_proto == IPPROTO_ICMP) 2402 af_proto = IPPROTO_ICMPV6; 2403 if (naf_proto == IPPROTO_ICMPV6) 2404 af_proto = IPPROTO_ICMP; 2405 2406 /* uncover stale pseudo-header */ 2407 switch (af_proto) { 2408 case IPPROTO_ICMPV6: 2409 /* optimise: unchanged for TCP/UDP */ 2410 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2411 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2412 /* FALLTHROUGH */ 2413 case IPPROTO_UDP: /* FALLTHROUGH */ 2414 case IPPROTO_TCP: 2415 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2416 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2417 copyback = 1; 2418 break; 2419 default: 2420 break; /* assume no pseudo-header */ 2421 } 2422 2423 /* replace the network header */ 2424 m_adj(pd->m, pd->off); 2425 pd->src = NULL; 2426 pd->dst = NULL; 2427 2428 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2429 pd->m = NULL; 2430 return (-1); 2431 } 2432 2433 pd->off = hlen; 2434 pd->tot_len += hlen - ohlen; 2435 2436 switch (pd->naf) { 2437 case AF_INET: 2438 ip4 = mtod(pd->m, struct ip *); 2439 memset(ip4, 0, hlen); 2440 ip4->ip_v = IPVERSION; 2441 ip4->ip_hl = hlen >> 2; 2442 ip4->ip_tos = pd->tos; 2443 ip4->ip_len = htons(hlen + dlen); 2444 ip4->ip_id = htons(ip_randomid()); 2445 ip4->ip_off = htons(IP_DF); 2446 ip4->ip_ttl = pd->ttl; 2447 ip4->ip_p = pd->proto; 2448 ip4->ip_src = pd->nsaddr.v4; 2449 ip4->ip_dst = pd->ndaddr.v4; 2450 break; 2451 case AF_INET6: 2452 ip6 = mtod(pd->m, struct ip6_hdr *); 2453 memset(ip6, 0, hlen); 2454 ip6->ip6_vfc = IPV6_VERSION; 2455 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2456 ip6->ip6_plen = htons(dlen); 2457 ip6->ip6_nxt = pd->proto; 2458 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2459 ip6->ip6_hlim = IPV6_DEFHLIM; 2460 else 2461 ip6->ip6_hlim = pd->ttl; 2462 ip6->ip6_src = pd->nsaddr.v6; 2463 ip6->ip6_dst = pd->ndaddr.v6; 2464 break; 2465 default: 2466 unhandled_af(pd->naf); 2467 } 2468 2469 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2470 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2471 pd->naf == AF_INET6) { 2472 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2473 } 2474 2475 /* cover fresh pseudo-header */ 2476 switch (naf_proto) { 2477 case IPPROTO_ICMPV6: 2478 /* optimise: unchanged for TCP/UDP */ 2479 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2480 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2481 /* FALLTHROUGH */ 2482 case IPPROTO_UDP: /* FALLTHROUGH */ 2483 case IPPROTO_TCP: 2484 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2485 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2486 copyback = 1; 2487 break; 2488 default: 2489 break; /* assume no pseudo-header */ 2490 } 2491 2492 /* flush pd->pcksum */ 2493 if (copyback) 2494 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 2495 2496 return (0); 2497 } 2498 2499 int 2500 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2501 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2502 sa_family_t af, sa_family_t naf) 2503 { 2504 struct mbuf *n = NULL; 2505 struct ip *ip4; 2506 struct ip6_hdr *ip6; 2507 u_int hlen, ohlen, dlen; 2508 int d; 2509 2510 if (af == naf || (af != AF_INET && af != AF_INET6) || 2511 (naf != AF_INET && naf != AF_INET6)) 2512 return (-1); 2513 2514 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2515 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2516 return (-1); 2517 2518 /* new quoted header */ 2519 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2520 /* old quoted header */ 2521 ohlen = pd2->off - ipoff2; 2522 2523 /* trim old quoted header */ 2524 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2525 m_adj(n, ohlen); 2526 2527 /* prepend a new, translated, quoted header */ 2528 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2529 return (-1); 2530 2531 switch (naf) { 2532 case AF_INET: 2533 ip4 = mtod(n, struct ip *); 2534 memset(ip4, 0, sizeof(*ip4)); 2535 ip4->ip_v = IPVERSION; 2536 ip4->ip_hl = sizeof(*ip4) >> 2; 2537 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2538 ip4->ip_id = htons(ip_randomid()); 2539 ip4->ip_off = htons(IP_DF); 2540 ip4->ip_ttl = pd2->ttl; 2541 if (pd2->proto == IPPROTO_ICMPV6) 2542 ip4->ip_p = IPPROTO_ICMP; 2543 else 2544 ip4->ip_p = pd2->proto; 2545 ip4->ip_src = src->v4; 2546 ip4->ip_dst = dst->v4; 2547 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2548 break; 2549 case AF_INET6: 2550 ip6 = mtod(n, struct ip6_hdr *); 2551 memset(ip6, 0, sizeof(*ip6)); 2552 ip6->ip6_vfc = IPV6_VERSION; 2553 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2554 if (pd2->proto == IPPROTO_ICMP) 2555 ip6->ip6_nxt = IPPROTO_ICMPV6; 2556 else 2557 ip6->ip6_nxt = pd2->proto; 2558 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2559 ip6->ip6_hlim = IPV6_DEFHLIM; 2560 else 2561 ip6->ip6_hlim = pd2->ttl; 2562 ip6->ip6_src = src->v6; 2563 ip6->ip6_dst = dst->v6; 2564 break; 2565 } 2566 2567 /* cover new quoted header */ 2568 /* optimise: any new AF_INET header of ours sums to zero */ 2569 if (naf != AF_INET) { 2570 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2571 } 2572 2573 /* reattach modified quoted packet to outer header */ 2574 { 2575 int nlen = n->m_pkthdr.len; 2576 m_cat(m, n); 2577 m->m_pkthdr.len += nlen; 2578 } 2579 2580 /* account for altered length */ 2581 d = hlen - ohlen; 2582 2583 if (pd->proto == IPPROTO_ICMPV6) { 2584 /* fixup pseudo-header */ 2585 dlen = pd->tot_len - pd->off; 2586 pf_cksum_fixup(pd->pcksum, 2587 htons(dlen), htons(dlen + d), pd->proto); 2588 } 2589 2590 pd->tot_len += d; 2591 pd2->tot_len += d; 2592 pd2->off += d; 2593 2594 /* note: not bothering to update network headers as 2595 these due for rewrite by pf_translate_af() */ 2596 2597 return (0); 2598 } 2599 2600 2601 #define PTR_IP(field) (offsetof(struct ip, field)) 2602 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2603 2604 int 2605 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 2606 { 2607 struct icmp *icmp4; 2608 struct icmp6_hdr *icmp6; 2609 u_int32_t mtu; 2610 int32_t ptr = -1; 2611 u_int8_t type; 2612 u_int8_t code; 2613 2614 switch (af) { 2615 case AF_INET: 2616 icmp6 = arg; 2617 type = icmp6->icmp6_type; 2618 code = icmp6->icmp6_code; 2619 mtu = ntohl(icmp6->icmp6_mtu); 2620 2621 switch (type) { 2622 case ICMP6_ECHO_REQUEST: 2623 type = ICMP_ECHO; 2624 break; 2625 case ICMP6_ECHO_REPLY: 2626 type = ICMP_ECHOREPLY; 2627 break; 2628 case ICMP6_DST_UNREACH: 2629 type = ICMP_UNREACH; 2630 switch (code) { 2631 case ICMP6_DST_UNREACH_NOROUTE: 2632 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2633 case ICMP6_DST_UNREACH_ADDR: 2634 code = ICMP_UNREACH_HOST; 2635 break; 2636 case ICMP6_DST_UNREACH_ADMIN: 2637 code = ICMP_UNREACH_HOST_PROHIB; 2638 break; 2639 case ICMP6_DST_UNREACH_NOPORT: 2640 code = ICMP_UNREACH_PORT; 2641 break; 2642 default: 2643 return (-1); 2644 } 2645 break; 2646 case ICMP6_PACKET_TOO_BIG: 2647 type = ICMP_UNREACH; 2648 code = ICMP_UNREACH_NEEDFRAG; 2649 mtu -= 20; 2650 break; 2651 case ICMP6_TIME_EXCEEDED: 2652 type = ICMP_TIMXCEED; 2653 break; 2654 case ICMP6_PARAM_PROB: 2655 switch (code) { 2656 case ICMP6_PARAMPROB_HEADER: 2657 type = ICMP_PARAMPROB; 2658 code = ICMP_PARAMPROB_ERRATPTR; 2659 ptr = ntohl(icmp6->icmp6_pptr); 2660 2661 if (ptr == PTR_IP6(ip6_vfc)) 2662 ; /* preserve */ 2663 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2664 ptr = PTR_IP(ip_tos); 2665 else if (ptr == PTR_IP6(ip6_plen) || 2666 ptr == PTR_IP6(ip6_plen) + 1) 2667 ptr = PTR_IP(ip_len); 2668 else if (ptr == PTR_IP6(ip6_nxt)) 2669 ptr = PTR_IP(ip_p); 2670 else if (ptr == PTR_IP6(ip6_hlim)) 2671 ptr = PTR_IP(ip_ttl); 2672 else if (ptr >= PTR_IP6(ip6_src) && 2673 ptr < PTR_IP6(ip6_dst)) 2674 ptr = PTR_IP(ip_src); 2675 else if (ptr >= PTR_IP6(ip6_dst) && 2676 ptr < sizeof(struct ip6_hdr)) 2677 ptr = PTR_IP(ip_dst); 2678 else { 2679 return (-1); 2680 } 2681 break; 2682 case ICMP6_PARAMPROB_NEXTHEADER: 2683 type = ICMP_UNREACH; 2684 code = ICMP_UNREACH_PROTOCOL; 2685 break; 2686 default: 2687 return (-1); 2688 } 2689 break; 2690 default: 2691 return (-1); 2692 } 2693 2694 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 2695 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 2696 2697 /* aligns well with a icmpv4 nextmtu */ 2698 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 2699 2700 /* icmpv4 pptr is a one most significant byte */ 2701 if (ptr >= 0) 2702 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 2703 break; 2704 case AF_INET6: 2705 icmp4 = arg; 2706 type = icmp4->icmp_type; 2707 code = icmp4->icmp_code; 2708 mtu = ntohs(icmp4->icmp_nextmtu); 2709 2710 switch (type) { 2711 case ICMP_ECHO: 2712 type = ICMP6_ECHO_REQUEST; 2713 break; 2714 case ICMP_ECHOREPLY: 2715 type = ICMP6_ECHO_REPLY; 2716 break; 2717 case ICMP_UNREACH: 2718 type = ICMP6_DST_UNREACH; 2719 switch (code) { 2720 case ICMP_UNREACH_NET: 2721 case ICMP_UNREACH_HOST: 2722 case ICMP_UNREACH_NET_UNKNOWN: 2723 case ICMP_UNREACH_HOST_UNKNOWN: 2724 case ICMP_UNREACH_ISOLATED: 2725 case ICMP_UNREACH_TOSNET: 2726 case ICMP_UNREACH_TOSHOST: 2727 code = ICMP6_DST_UNREACH_NOROUTE; 2728 break; 2729 case ICMP_UNREACH_PORT: 2730 code = ICMP6_DST_UNREACH_NOPORT; 2731 break; 2732 case ICMP_UNREACH_NET_PROHIB: 2733 case ICMP_UNREACH_HOST_PROHIB: 2734 case ICMP_UNREACH_FILTER_PROHIB: 2735 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2736 code = ICMP6_DST_UNREACH_ADMIN; 2737 break; 2738 case ICMP_UNREACH_PROTOCOL: 2739 type = ICMP6_PARAM_PROB; 2740 code = ICMP6_PARAMPROB_NEXTHEADER; 2741 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2742 break; 2743 case ICMP_UNREACH_NEEDFRAG: 2744 type = ICMP6_PACKET_TOO_BIG; 2745 code = 0; 2746 mtu += 20; 2747 break; 2748 default: 2749 return (-1); 2750 } 2751 break; 2752 case ICMP_TIMXCEED: 2753 type = ICMP6_TIME_EXCEEDED; 2754 break; 2755 case ICMP_PARAMPROB: 2756 type = ICMP6_PARAM_PROB; 2757 switch (code) { 2758 case ICMP_PARAMPROB_ERRATPTR: 2759 code = ICMP6_PARAMPROB_HEADER; 2760 break; 2761 case ICMP_PARAMPROB_LENGTH: 2762 code = ICMP6_PARAMPROB_HEADER; 2763 break; 2764 default: 2765 return (-1); 2766 } 2767 2768 ptr = icmp4->icmp_pptr; 2769 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2770 ; /* preserve */ 2771 else if (ptr == PTR_IP(ip_len) || 2772 ptr == PTR_IP(ip_len) + 1) 2773 ptr = PTR_IP6(ip6_plen); 2774 else if (ptr == PTR_IP(ip_ttl)) 2775 ptr = PTR_IP6(ip6_hlim); 2776 else if (ptr == PTR_IP(ip_p)) 2777 ptr = PTR_IP6(ip6_nxt); 2778 else if (ptr >= PTR_IP(ip_src) && 2779 ptr < PTR_IP(ip_dst)) 2780 ptr = PTR_IP6(ip6_src); 2781 else if (ptr >= PTR_IP(ip_dst) && 2782 ptr < sizeof(struct ip)) 2783 ptr = PTR_IP6(ip6_dst); 2784 else { 2785 return (-1); 2786 } 2787 break; 2788 default: 2789 return (-1); 2790 } 2791 2792 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 2793 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 2794 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 2795 if (ptr >= 0) 2796 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 2797 break; 2798 } 2799 2800 return (0); 2801 } 2802 #endif /* INET6 */ 2803 2804 /* 2805 * Need to modulate the sequence numbers in the TCP SACK option 2806 * (credits to Krzysztof Pfaff for report and patch) 2807 */ 2808 int 2809 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2810 { 2811 struct sackblk sack; 2812 int copyback = 0, i; 2813 int olen, optsoff; 2814 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 2815 2816 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 2817 optsoff = pd->off + sizeof(struct tcphdr); 2818 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 2819 if (olen < TCPOLEN_MINSACK || 2820 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) 2821 return (0); 2822 2823 eoh = opts + olen; 2824 opt = opts; 2825 while ((opt = pf_find_tcpopt(opt, opts, olen, 2826 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 2827 { 2828 size_t safelen = MIN(opt[1], (eoh - opt)); 2829 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 2830 size_t startoff = (opt + i) - opts; 2831 memcpy(&sack, &opt[i], sizeof(sack)); 2832 pf_patch_32_unaligned(pd, &sack.start, 2833 htonl(ntohl(sack.start) - dst->seqdiff), 2834 PF_ALGNMNT(startoff)); 2835 pf_patch_32_unaligned(pd, &sack.end, 2836 htonl(ntohl(sack.end) - dst->seqdiff), 2837 PF_ALGNMNT(startoff + sizeof(sack.start))); 2838 memcpy(&opt[i], &sack, sizeof(sack)); 2839 } 2840 copyback = 1; 2841 opt += opt[1]; 2842 } 2843 2844 if (copyback) 2845 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 2846 return (copyback); 2847 } 2848 2849 struct mbuf * 2850 pf_build_tcp(const struct pf_rule *r, sa_family_t af, 2851 const struct pf_addr *saddr, const struct pf_addr *daddr, 2852 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2853 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2854 u_int16_t rtag, u_int sack, u_int rdom) 2855 { 2856 struct mbuf *m; 2857 int len, tlen; 2858 struct ip *h; 2859 #ifdef INET6 2860 struct ip6_hdr *h6; 2861 #endif /* INET6 */ 2862 struct tcphdr *th; 2863 char *opt; 2864 2865 /* maximum segment size tcp option */ 2866 tlen = sizeof(struct tcphdr); 2867 if (mss) 2868 tlen += 4; 2869 if (sack) 2870 tlen += 2; 2871 2872 switch (af) { 2873 case AF_INET: 2874 len = sizeof(struct ip) + tlen; 2875 break; 2876 #ifdef INET6 2877 case AF_INET6: 2878 len = sizeof(struct ip6_hdr) + tlen; 2879 break; 2880 #endif /* INET6 */ 2881 default: 2882 unhandled_af(af); 2883 } 2884 2885 /* create outgoing mbuf */ 2886 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2887 if (m == NULL) 2888 return (NULL); 2889 if (tag) 2890 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2891 m->m_pkthdr.pf.tag = rtag; 2892 m->m_pkthdr.ph_rtableid = rdom; 2893 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2894 m->m_pkthdr.pf.prio = r->set_prio[0]; 2895 if (r && r->qid) 2896 m->m_pkthdr.pf.qid = r->qid; 2897 m->m_data += max_linkhdr; 2898 m->m_pkthdr.len = m->m_len = len; 2899 m->m_pkthdr.ph_ifidx = 0; 2900 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 2901 memset(m->m_data, 0, len); 2902 switch (af) { 2903 case AF_INET: 2904 h = mtod(m, struct ip *); 2905 h->ip_p = IPPROTO_TCP; 2906 h->ip_len = htons(tlen); 2907 h->ip_v = 4; 2908 h->ip_hl = sizeof(*h) >> 2; 2909 h->ip_tos = IPTOS_LOWDELAY; 2910 h->ip_len = htons(len); 2911 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2912 h->ip_ttl = ttl ? ttl : ip_defttl; 2913 h->ip_sum = 0; 2914 h->ip_src.s_addr = saddr->v4.s_addr; 2915 h->ip_dst.s_addr = daddr->v4.s_addr; 2916 2917 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2918 break; 2919 #ifdef INET6 2920 case AF_INET6: 2921 h6 = mtod(m, struct ip6_hdr *); 2922 h6->ip6_nxt = IPPROTO_TCP; 2923 h6->ip6_plen = htons(tlen); 2924 h6->ip6_vfc |= IPV6_VERSION; 2925 h6->ip6_hlim = IPV6_DEFHLIM; 2926 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2927 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2928 2929 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2930 break; 2931 #endif /* INET6 */ 2932 default: 2933 unhandled_af(af); 2934 } 2935 2936 /* TCP header */ 2937 th->th_sport = sport; 2938 th->th_dport = dport; 2939 th->th_seq = htonl(seq); 2940 th->th_ack = htonl(ack); 2941 th->th_off = tlen >> 2; 2942 th->th_flags = flags; 2943 th->th_win = htons(win); 2944 2945 opt = (char *)(th + 1); 2946 if (mss) { 2947 opt[0] = TCPOPT_MAXSEG; 2948 opt[1] = 4; 2949 mss = htons(mss); 2950 memcpy((opt + 2), &mss, 2); 2951 opt += 4; 2952 } 2953 if (sack) { 2954 opt[0] = TCPOPT_SACK_PERMITTED; 2955 opt[1] = 2; 2956 opt += 2; 2957 } 2958 2959 return (m); 2960 } 2961 2962 void 2963 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2964 const struct pf_addr *saddr, const struct pf_addr *daddr, 2965 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2966 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2967 u_int16_t rtag, u_int rdom) 2968 { 2969 struct mbuf *m; 2970 2971 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 2972 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL) 2973 return; 2974 2975 switch (af) { 2976 case AF_INET: 2977 ip_send(m); 2978 break; 2979 #ifdef INET6 2980 case AF_INET6: 2981 ip6_send(m); 2982 break; 2983 #endif /* INET6 */ 2984 } 2985 } 2986 2987 static void 2988 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *s, 2989 struct pf_state_peer *src, struct pf_state_peer *dst) 2990 { 2991 /* 2992 * We are sending challenge ACK as a response to SYN packet, which 2993 * matches existing state (modulo TCP window check). Therefore packet 2994 * must be sent on behalf of destination. 2995 * 2996 * We expect sender to remain either silent, or send RST packet 2997 * so both, firewall and remote peer, can purge dead state from 2998 * memory. 2999 */ 3000 pf_send_tcp(s->rule.ptr, pd->af, pd->dst, pd->src, 3001 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 3002 src->seqlo, TH_ACK, 0, 0, s->rule.ptr->return_ttl, 1, 0, 3003 pd->rdomain); 3004 } 3005 3006 void 3007 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 3008 sa_family_t af, struct pf_rule *r, u_int rdomain) 3009 { 3010 struct mbuf *m0; 3011 3012 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 3013 return; 3014 3015 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 3016 m0->m_pkthdr.ph_rtableid = rdomain; 3017 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 3018 m0->m_pkthdr.pf.prio = r->set_prio[0]; 3019 if (r && r->qid) 3020 m0->m_pkthdr.pf.qid = r->qid; 3021 3022 switch (af) { 3023 case AF_INET: 3024 icmp_error(m0, type, code, 0, param); 3025 break; 3026 #ifdef INET6 3027 case AF_INET6: 3028 icmp6_error(m0, type, code, param); 3029 break; 3030 #endif /* INET6 */ 3031 } 3032 } 3033 3034 /* 3035 * Return ((n = 0) == (a = b [with mask m])) 3036 * Note: n != 0 => returns (a != b [with mask m]) 3037 */ 3038 int 3039 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 3040 struct pf_addr *b, sa_family_t af) 3041 { 3042 switch (af) { 3043 case AF_INET: 3044 if ((a->addr32[0] & m->addr32[0]) == 3045 (b->addr32[0] & m->addr32[0])) 3046 return (n == 0); 3047 break; 3048 #ifdef INET6 3049 case AF_INET6: 3050 if (((a->addr32[0] & m->addr32[0]) == 3051 (b->addr32[0] & m->addr32[0])) && 3052 ((a->addr32[1] & m->addr32[1]) == 3053 (b->addr32[1] & m->addr32[1])) && 3054 ((a->addr32[2] & m->addr32[2]) == 3055 (b->addr32[2] & m->addr32[2])) && 3056 ((a->addr32[3] & m->addr32[3]) == 3057 (b->addr32[3] & m->addr32[3]))) 3058 return (n == 0); 3059 break; 3060 #endif /* INET6 */ 3061 } 3062 3063 return (n != 0); 3064 } 3065 3066 /* 3067 * Return 1 if b <= a <= e, otherwise return 0. 3068 */ 3069 int 3070 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 3071 struct pf_addr *a, sa_family_t af) 3072 { 3073 switch (af) { 3074 case AF_INET: 3075 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 3076 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 3077 return (0); 3078 break; 3079 #ifdef INET6 3080 case AF_INET6: { 3081 int i; 3082 3083 /* check a >= b */ 3084 for (i = 0; i < 4; ++i) 3085 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 3086 break; 3087 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 3088 return (0); 3089 /* check a <= e */ 3090 for (i = 0; i < 4; ++i) 3091 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 3092 break; 3093 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3094 return (0); 3095 break; 3096 } 3097 #endif /* INET6 */ 3098 } 3099 return (1); 3100 } 3101 3102 int 3103 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3104 { 3105 switch (op) { 3106 case PF_OP_IRG: 3107 return ((p > a1) && (p < a2)); 3108 case PF_OP_XRG: 3109 return ((p < a1) || (p > a2)); 3110 case PF_OP_RRG: 3111 return ((p >= a1) && (p <= a2)); 3112 case PF_OP_EQ: 3113 return (p == a1); 3114 case PF_OP_NE: 3115 return (p != a1); 3116 case PF_OP_LT: 3117 return (p < a1); 3118 case PF_OP_LE: 3119 return (p <= a1); 3120 case PF_OP_GT: 3121 return (p > a1); 3122 case PF_OP_GE: 3123 return (p >= a1); 3124 } 3125 return (0); /* never reached */ 3126 } 3127 3128 int 3129 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3130 { 3131 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3132 } 3133 3134 int 3135 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3136 { 3137 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3138 return (0); 3139 return (pf_match(op, a1, a2, u)); 3140 } 3141 3142 int 3143 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3144 { 3145 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3146 return (0); 3147 return (pf_match(op, a1, a2, g)); 3148 } 3149 3150 int 3151 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3152 { 3153 if (*tag == -1) 3154 *tag = m->m_pkthdr.pf.tag; 3155 3156 return ((!r->match_tag_not && r->match_tag == *tag) || 3157 (r->match_tag_not && r->match_tag != *tag)); 3158 } 3159 3160 int 3161 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3162 { 3163 struct ifnet *ifp; 3164 #if NCARP > 0 3165 struct ifnet *ifp0; 3166 #endif 3167 struct pfi_kif *kif; 3168 3169 ifp = if_get(m->m_pkthdr.ph_ifidx); 3170 if (ifp == NULL) 3171 return (0); 3172 3173 #if NCARP > 0 3174 if (ifp->if_type == IFT_CARP && 3175 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 3176 kif = (struct pfi_kif *)ifp0->if_pf_kif; 3177 if_put(ifp0); 3178 } else 3179 #endif /* NCARP */ 3180 kif = (struct pfi_kif *)ifp->if_pf_kif; 3181 3182 if_put(ifp); 3183 3184 if (kif == NULL) { 3185 DPFPRINTF(LOG_ERR, 3186 "%s: kif == NULL, @%d via %s", __func__, 3187 r->nr, r->rcv_ifname); 3188 return (0); 3189 } 3190 3191 return (pfi_kif_match(r->rcv_kif, kif)); 3192 } 3193 3194 void 3195 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3196 { 3197 if (tag > 0) 3198 m->m_pkthdr.pf.tag = tag; 3199 if (rtableid >= 0) 3200 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3201 } 3202 3203 enum pf_test_status 3204 pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) 3205 { 3206 int rv; 3207 3208 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 3209 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 3210 return (PF_TEST_FAIL); 3211 } 3212 3213 ctx->depth++; 3214 3215 if (r->anchor_wildcard) { 3216 struct pf_anchor *child; 3217 rv = PF_TEST_OK; 3218 RB_FOREACH(child, pf_anchor_node, &r->anchor->children) { 3219 rv = pf_match_rule(ctx, &child->ruleset); 3220 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 3221 /* 3222 * we either hit a rule with quick action 3223 * (more likely), or hit some runtime 3224 * error (e.g. pool_get() failure). 3225 */ 3226 break; 3227 } 3228 } 3229 } else { 3230 rv = pf_match_rule(ctx, &r->anchor->ruleset); 3231 /* 3232 * Unless errors occurred, stop iff any rule matched 3233 * within quick anchors. 3234 */ 3235 if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && 3236 *ctx->am == r) 3237 rv = PF_TEST_QUICK; 3238 } 3239 3240 ctx->depth--; 3241 3242 return (rv); 3243 } 3244 3245 void 3246 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3247 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3248 { 3249 switch (af) { 3250 case AF_INET: 3251 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3252 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3253 break; 3254 #ifdef INET6 3255 case AF_INET6: 3256 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3257 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3258 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3259 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3260 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3261 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3262 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3263 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3264 break; 3265 #endif /* INET6 */ 3266 default: 3267 unhandled_af(af); 3268 } 3269 } 3270 3271 void 3272 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3273 { 3274 switch (af) { 3275 case AF_INET: 3276 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3277 break; 3278 #ifdef INET6 3279 case AF_INET6: 3280 if (addr->addr32[3] == 0xffffffff) { 3281 addr->addr32[3] = 0; 3282 if (addr->addr32[2] == 0xffffffff) { 3283 addr->addr32[2] = 0; 3284 if (addr->addr32[1] == 0xffffffff) { 3285 addr->addr32[1] = 0; 3286 addr->addr32[0] = 3287 htonl(ntohl(addr->addr32[0]) + 1); 3288 } else 3289 addr->addr32[1] = 3290 htonl(ntohl(addr->addr32[1]) + 1); 3291 } else 3292 addr->addr32[2] = 3293 htonl(ntohl(addr->addr32[2]) + 1); 3294 } else 3295 addr->addr32[3] = 3296 htonl(ntohl(addr->addr32[3]) + 1); 3297 break; 3298 #endif /* INET6 */ 3299 default: 3300 unhandled_af(af); 3301 } 3302 } 3303 3304 int 3305 pf_socket_lookup(struct pf_pdesc *pd) 3306 { 3307 struct pf_addr *saddr, *daddr; 3308 u_int16_t sport, dport; 3309 struct inpcbtable *tb; 3310 struct inpcb *inp; 3311 3312 pd->lookup.uid = -1; 3313 pd->lookup.gid = -1; 3314 pd->lookup.pid = NO_PID; 3315 switch (pd->virtual_proto) { 3316 case IPPROTO_TCP: 3317 sport = pd->hdr.tcp.th_sport; 3318 dport = pd->hdr.tcp.th_dport; 3319 PF_ASSERT_LOCKED(); 3320 NET_ASSERT_LOCKED(); 3321 tb = &tcbtable; 3322 break; 3323 case IPPROTO_UDP: 3324 sport = pd->hdr.udp.uh_sport; 3325 dport = pd->hdr.udp.uh_dport; 3326 PF_ASSERT_LOCKED(); 3327 NET_ASSERT_LOCKED(); 3328 tb = &udbtable; 3329 break; 3330 default: 3331 return (-1); 3332 } 3333 if (pd->dir == PF_IN) { 3334 saddr = pd->src; 3335 daddr = pd->dst; 3336 } else { 3337 u_int16_t p; 3338 3339 p = sport; 3340 sport = dport; 3341 dport = p; 3342 saddr = pd->dst; 3343 daddr = pd->src; 3344 } 3345 switch (pd->af) { 3346 case AF_INET: 3347 /* 3348 * Fails when rtable is changed while evaluating the ruleset 3349 * The socket looked up will not match the one hit in the end. 3350 */ 3351 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 3352 pd->rdomain); 3353 if (inp == NULL) { 3354 inp = in_pcblookup_listen(tb, daddr->v4, dport, 3355 NULL, pd->rdomain); 3356 if (inp == NULL) 3357 return (-1); 3358 } 3359 break; 3360 #ifdef INET6 3361 case AF_INET6: 3362 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 3363 dport, pd->rdomain); 3364 if (inp == NULL) { 3365 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 3366 NULL, pd->rdomain); 3367 if (inp == NULL) 3368 return (-1); 3369 } 3370 break; 3371 #endif /* INET6 */ 3372 default: 3373 unhandled_af(pd->af); 3374 } 3375 pd->lookup.uid = inp->inp_socket->so_euid; 3376 pd->lookup.gid = inp->inp_socket->so_egid; 3377 pd->lookup.pid = inp->inp_socket->so_cpid; 3378 return (1); 3379 } 3380 3381 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 3382 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 3383 * 3384 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 3385 */ 3386 u_int8_t* 3387 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 3388 u_int8_t min_typelen) 3389 { 3390 u_int8_t *eoh = opts + hlen; 3391 3392 if (min_typelen < 2) 3393 return (NULL); 3394 3395 while ((eoh - opt) >= min_typelen) { 3396 switch (*opt) { 3397 case TCPOPT_EOL: 3398 /* FALLTHROUGH - Workaround the failure of some 3399 systems to NOP-pad their bzero'd option buffers, 3400 producing spurious EOLs */ 3401 case TCPOPT_NOP: 3402 opt++; 3403 continue; 3404 default: 3405 if (opt[0] == type && 3406 opt[1] >= min_typelen) 3407 return (opt); 3408 } 3409 3410 opt += MAX(opt[1], 2); /* evade infinite loops */ 3411 } 3412 3413 return (NULL); 3414 } 3415 3416 u_int8_t 3417 pf_get_wscale(struct pf_pdesc *pd) 3418 { 3419 int olen; 3420 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3421 u_int8_t wscale = 0; 3422 3423 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3424 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 3425 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3426 return (0); 3427 3428 opt = opts; 3429 while ((opt = pf_find_tcpopt(opt, opts, olen, 3430 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 3431 wscale = opt[2]; 3432 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 3433 wscale |= PF_WSCALE_FLAG; 3434 3435 opt += opt[1]; 3436 } 3437 3438 return (wscale); 3439 } 3440 3441 u_int16_t 3442 pf_get_mss(struct pf_pdesc *pd) 3443 { 3444 int olen; 3445 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3446 u_int16_t mss = tcp_mssdflt; 3447 3448 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3449 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 3450 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3451 return (0); 3452 3453 opt = opts; 3454 while ((opt = pf_find_tcpopt(opt, opts, olen, 3455 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 3456 memcpy(&mss, (opt + 2), 2); 3457 mss = ntohs(mss); 3458 3459 opt += opt[1]; 3460 } 3461 return (mss); 3462 } 3463 3464 u_int16_t 3465 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3466 { 3467 struct ifnet *ifp; 3468 struct sockaddr_in *dst; 3469 #ifdef INET6 3470 struct sockaddr_in6 *dst6; 3471 #endif /* INET6 */ 3472 struct rtentry *rt = NULL; 3473 struct sockaddr_storage ss; 3474 int hlen; 3475 u_int16_t mss = tcp_mssdflt; 3476 3477 memset(&ss, 0, sizeof(ss)); 3478 3479 switch (af) { 3480 case AF_INET: 3481 hlen = sizeof(struct ip); 3482 dst = (struct sockaddr_in *)&ss; 3483 dst->sin_family = AF_INET; 3484 dst->sin_len = sizeof(*dst); 3485 dst->sin_addr = addr->v4; 3486 rt = rtalloc(sintosa(dst), 0, rtableid); 3487 break; 3488 #ifdef INET6 3489 case AF_INET6: 3490 hlen = sizeof(struct ip6_hdr); 3491 dst6 = (struct sockaddr_in6 *)&ss; 3492 dst6->sin6_family = AF_INET6; 3493 dst6->sin6_len = sizeof(*dst6); 3494 dst6->sin6_addr = addr->v6; 3495 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3496 break; 3497 #endif /* INET6 */ 3498 } 3499 3500 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3501 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3502 mss = max(tcp_mssdflt, mss); 3503 if_put(ifp); 3504 } 3505 rtfree(rt); 3506 mss = min(mss, offer); 3507 mss = max(mss, 64); /* sanity - at least max opt space */ 3508 return (mss); 3509 } 3510 3511 static __inline int 3512 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af, 3513 struct pf_src_node **sns) 3514 { 3515 struct pf_rule *r = s->rule.ptr; 3516 int rv; 3517 3518 if (!r->rt) 3519 return (0); 3520 3521 rv = pf_map_addr(af, r, saddr, &s->rt_addr, NULL, sns, 3522 &r->route, PF_SN_ROUTE); 3523 if (rv == 0) 3524 s->rt = r->rt; 3525 3526 return (rv); 3527 } 3528 3529 u_int32_t 3530 pf_tcp_iss(struct pf_pdesc *pd) 3531 { 3532 SHA2_CTX ctx; 3533 union { 3534 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3535 uint32_t words[1]; 3536 } digest; 3537 3538 if (pf_tcp_secret_init == 0) { 3539 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3540 SHA512Init(&pf_tcp_secret_ctx); 3541 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3542 sizeof(pf_tcp_secret)); 3543 pf_tcp_secret_init = 1; 3544 } 3545 ctx = pf_tcp_secret_ctx; 3546 3547 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3548 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 3549 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 3550 switch (pd->af) { 3551 case AF_INET: 3552 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3553 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3554 break; 3555 #ifdef INET6 3556 case AF_INET6: 3557 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3558 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3559 break; 3560 #endif /* INET6 */ 3561 } 3562 SHA512Final(digest.bytes, &ctx); 3563 pf_tcp_iss_off += 4096; 3564 return (digest.words[0] + tcp_iss + pf_tcp_iss_off); 3565 } 3566 3567 void 3568 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3569 { 3570 if (r->qid) 3571 a->qid = r->qid; 3572 if (r->pqid) 3573 a->pqid = r->pqid; 3574 if (r->rtableid >= 0) 3575 a->rtableid = r->rtableid; 3576 #if NPFLOG > 0 3577 a->log |= r->log; 3578 #endif /* NPFLOG > 0 */ 3579 if (r->scrub_flags & PFSTATE_SETTOS) 3580 a->set_tos = r->set_tos; 3581 if (r->min_ttl) 3582 a->min_ttl = r->min_ttl; 3583 if (r->max_mss) 3584 a->max_mss = r->max_mss; 3585 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3586 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3587 if (r->scrub_flags & PFSTATE_SETPRIO) { 3588 a->set_prio[0] = r->set_prio[0]; 3589 a->set_prio[1] = r->set_prio[1]; 3590 } 3591 if (r->rule_flag & PFRULE_SETDELAY) 3592 a->delay = r->delay; 3593 } 3594 3595 #define PF_TEST_ATTRIB(t, a) \ 3596 if (t) { \ 3597 r = a; \ 3598 continue; \ 3599 } else do { \ 3600 } while (0) 3601 3602 enum pf_test_status 3603 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 3604 { 3605 struct pf_rule *r; 3606 struct pf_rule *save_a; 3607 struct pf_ruleset *save_aruleset; 3608 3609 r = TAILQ_FIRST(ruleset->rules.active.ptr); 3610 while (r != NULL) { 3611 r->evaluations++; 3612 PF_TEST_ATTRIB( 3613 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 3614 r->skip[PF_SKIP_IFP].ptr); 3615 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 3616 r->skip[PF_SKIP_DIR].ptr); 3617 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3618 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 3619 r->skip[PF_SKIP_RDOM].ptr); 3620 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 3621 r->skip[PF_SKIP_AF].ptr); 3622 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 3623 r->skip[PF_SKIP_PROTO].ptr); 3624 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 3625 ctx->pd->naf, r->src.neg, ctx->pd->kif, 3626 ctx->act.rtableid)), 3627 r->skip[PF_SKIP_SRC_ADDR].ptr); 3628 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 3629 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 3630 r->skip[PF_SKIP_DST_ADDR].ptr); 3631 3632 switch (ctx->pd->virtual_proto) { 3633 case PF_VPROTO_FRAGMENT: 3634 /* tcp/udp only. port_op always 0 in other cases */ 3635 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3636 TAILQ_NEXT(r, entries)); 3637 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 3638 r->flagset), 3639 TAILQ_NEXT(r, entries)); 3640 /* icmp only. type/code always 0 in other cases */ 3641 PF_TEST_ATTRIB((r->type || r->code), 3642 TAILQ_NEXT(r, entries)); 3643 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3644 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3645 TAILQ_NEXT(r, entries)); 3646 break; 3647 3648 case IPPROTO_TCP: 3649 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 3650 r->flags), 3651 TAILQ_NEXT(r, entries)); 3652 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3653 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 3654 r->os_fingerprint)), 3655 TAILQ_NEXT(r, entries)); 3656 /* FALLTHROUGH */ 3657 3658 case IPPROTO_UDP: 3659 /* tcp/udp only. port_op always 0 in other cases */ 3660 PF_TEST_ATTRIB((r->src.port_op && 3661 !pf_match_port(r->src.port_op, r->src.port[0], 3662 r->src.port[1], ctx->pd->nsport)), 3663 r->skip[PF_SKIP_SRC_PORT].ptr); 3664 PF_TEST_ATTRIB((r->dst.port_op && 3665 !pf_match_port(r->dst.port_op, r->dst.port[0], 3666 r->dst.port[1], ctx->pd->ndport)), 3667 r->skip[PF_SKIP_DST_PORT].ptr); 3668 /* tcp/udp only. uid.op always 0 in other cases */ 3669 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 3670 (ctx->pd->lookup.done = 3671 pf_socket_lookup(ctx->pd), 1)) && 3672 !pf_match_uid(r->uid.op, r->uid.uid[0], 3673 r->uid.uid[1], ctx->pd->lookup.uid)), 3674 TAILQ_NEXT(r, entries)); 3675 /* tcp/udp only. gid.op always 0 in other cases */ 3676 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 3677 (ctx->pd->lookup.done = 3678 pf_socket_lookup(ctx->pd), 1)) && 3679 !pf_match_gid(r->gid.op, r->gid.gid[0], 3680 r->gid.gid[1], ctx->pd->lookup.gid)), 3681 TAILQ_NEXT(r, entries)); 3682 break; 3683 3684 case IPPROTO_ICMP: 3685 case IPPROTO_ICMPV6: 3686 /* icmp only. type always 0 in other cases */ 3687 PF_TEST_ATTRIB((r->type && 3688 r->type != ctx->icmptype + 1), 3689 TAILQ_NEXT(r, entries)); 3690 /* icmp only. type always 0 in other cases */ 3691 PF_TEST_ATTRIB((r->code && 3692 r->code != ctx->icmpcode + 1), 3693 TAILQ_NEXT(r, entries)); 3694 /* icmp only. don't create states on replies */ 3695 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 3696 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 3697 ctx->icmp_dir != PF_IN), 3698 TAILQ_NEXT(r, entries)); 3699 break; 3700 3701 default: 3702 break; 3703 } 3704 3705 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3706 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 3707 TAILQ_NEXT(r, entries)); 3708 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 3709 TAILQ_NEXT(r, entries)); 3710 PF_TEST_ATTRIB((r->prob && 3711 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3712 TAILQ_NEXT(r, entries)); 3713 PF_TEST_ATTRIB((r->match_tag && 3714 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 3715 TAILQ_NEXT(r, entries)); 3716 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 3717 r->rcvifnot), 3718 TAILQ_NEXT(r, entries)); 3719 PF_TEST_ATTRIB((r->prio && 3720 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 3721 ctx->pd->m->m_pkthdr.pf.prio), 3722 TAILQ_NEXT(r, entries)); 3723 3724 /* must be last! */ 3725 if (r->pktrate.limit) { 3726 pf_add_threshold(&r->pktrate); 3727 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 3728 TAILQ_NEXT(r, entries)); 3729 } 3730 3731 /* FALLTHROUGH */ 3732 if (r->tag) 3733 ctx->tag = r->tag; 3734 if (r->anchor == NULL) { 3735 if (r->action == PF_MATCH) { 3736 if ((ctx->ri = pool_get(&pf_rule_item_pl, 3737 PR_NOWAIT)) == NULL) { 3738 REASON_SET(&ctx->reason, PFRES_MEMORY); 3739 ctx->test_status = PF_TEST_FAIL; 3740 break; 3741 } 3742 ctx->ri->r = r; 3743 /* order is irrelevant */ 3744 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 3745 ctx->ri = NULL; 3746 pf_rule_to_actions(r, &ctx->act); 3747 if (r->rule_flag & PFRULE_AFTO) 3748 ctx->pd->naf = r->naf; 3749 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 3750 &ctx->nr) == -1) { 3751 REASON_SET(&ctx->reason, 3752 PFRES_TRANSLATE); 3753 ctx->test_status = PF_TEST_FAIL; 3754 break; 3755 } 3756 #if NPFLOG > 0 3757 if (r->log) { 3758 REASON_SET(&ctx->reason, PFRES_MATCH); 3759 pflog_packet(ctx->pd, ctx->reason, r, 3760 ctx->a, ruleset, NULL); 3761 } 3762 #endif /* NPFLOG > 0 */ 3763 } else { 3764 /* 3765 * found matching r 3766 */ 3767 *ctx->rm = r; 3768 /* 3769 * anchor, with ruleset, where r belongs to 3770 */ 3771 *ctx->am = ctx->a; 3772 /* 3773 * ruleset where r belongs to 3774 */ 3775 *ctx->rsm = ruleset; 3776 /* 3777 * ruleset, where anchor belongs to. 3778 */ 3779 ctx->arsm = ctx->aruleset; 3780 } 3781 3782 #if NPFLOG > 0 3783 if (ctx->act.log & PF_LOG_MATCHES) 3784 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 3785 &ctx->rules); 3786 #endif /* NPFLOG > 0 */ 3787 3788 if (r->quick) { 3789 ctx->test_status = PF_TEST_QUICK; 3790 break; 3791 } 3792 } else { 3793 save_a = ctx->a; 3794 save_aruleset = ctx->aruleset; 3795 ctx->a = r; /* remember anchor */ 3796 ctx->aruleset = ruleset; /* and its ruleset */ 3797 /* 3798 * Note: we don't need to restore if we are not going 3799 * to continue with ruleset evaluation. 3800 */ 3801 if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) 3802 break; 3803 ctx->a = save_a; 3804 ctx->aruleset = save_aruleset; 3805 } 3806 r = TAILQ_NEXT(r, entries); 3807 } 3808 3809 return (ctx->test_status); 3810 } 3811 3812 int 3813 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3814 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason, 3815 struct pfsync_deferral **pdeferral) 3816 { 3817 struct pf_rule *r = NULL; 3818 struct pf_rule *a = NULL; 3819 struct pf_ruleset *ruleset = NULL; 3820 struct pf_state_key *skw = NULL, *sks = NULL; 3821 int rewrite = 0; 3822 u_int16_t virtual_type, virtual_id; 3823 int action = PF_DROP; 3824 struct pf_test_ctx ctx; 3825 int rv; 3826 3827 memset(&ctx, 0, sizeof(ctx)); 3828 ctx.pd = pd; 3829 ctx.rm = rm; 3830 ctx.am = am; 3831 ctx.rsm = rsm; 3832 ctx.th = &pd->hdr.tcp; 3833 ctx.act.rtableid = pd->rdomain; 3834 ctx.tag = -1; 3835 SLIST_INIT(&ctx.rules); 3836 3837 if (pd->dir == PF_IN && if_congested()) { 3838 REASON_SET(&ctx.reason, PFRES_CONGEST); 3839 return (PF_DROP); 3840 } 3841 3842 switch (pd->virtual_proto) { 3843 case IPPROTO_ICMP: 3844 ctx.icmptype = pd->hdr.icmp.icmp_type; 3845 ctx.icmpcode = pd->hdr.icmp.icmp_code; 3846 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3847 &ctx.icmp_dir, &virtual_id, &virtual_type); 3848 if (ctx.icmp_dir == PF_IN) { 3849 pd->osport = pd->nsport = virtual_id; 3850 pd->odport = pd->ndport = virtual_type; 3851 } else { 3852 pd->osport = pd->nsport = virtual_type; 3853 pd->odport = pd->ndport = virtual_id; 3854 } 3855 break; 3856 #ifdef INET6 3857 case IPPROTO_ICMPV6: 3858 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 3859 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 3860 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3861 &ctx.icmp_dir, &virtual_id, &virtual_type); 3862 if (ctx.icmp_dir == PF_IN) { 3863 pd->osport = pd->nsport = virtual_id; 3864 pd->odport = pd->ndport = virtual_type; 3865 } else { 3866 pd->osport = pd->nsport = virtual_type; 3867 pd->odport = pd->ndport = virtual_id; 3868 } 3869 break; 3870 #endif /* INET6 */ 3871 } 3872 3873 ruleset = &pf_main_ruleset; 3874 rv = pf_match_rule(&ctx, ruleset); 3875 if (rv == PF_TEST_FAIL) { 3876 /* 3877 * Reason has been set in pf_match_rule() already. 3878 */ 3879 goto cleanup; 3880 } 3881 3882 r = *ctx.rm; /* matching rule */ 3883 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 3884 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 3885 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 3886 3887 /* apply actions for last matching pass/block rule */ 3888 pf_rule_to_actions(r, &ctx.act); 3889 if (r->rule_flag & PFRULE_AFTO) 3890 pd->naf = r->naf; 3891 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 3892 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 3893 goto cleanup; 3894 } 3895 REASON_SET(&ctx.reason, PFRES_MATCH); 3896 3897 #if NPFLOG > 0 3898 if (r->log) 3899 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 3900 if (ctx.act.log & PF_LOG_MATCHES) 3901 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 3902 #endif /* NPFLOG > 0 */ 3903 3904 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3905 (r->action == PF_DROP) && 3906 ((r->rule_flag & PFRULE_RETURNRST) || 3907 (r->rule_flag & PFRULE_RETURNICMP) || 3908 (r->rule_flag & PFRULE_RETURN))) { 3909 if (pd->proto == IPPROTO_TCP && 3910 ((r->rule_flag & PFRULE_RETURNRST) || 3911 (r->rule_flag & PFRULE_RETURN)) && 3912 !(ctx.th->th_flags & TH_RST)) { 3913 u_int32_t ack = 3914 ntohl(ctx.th->th_seq) + pd->p_len; 3915 3916 if (pf_check_tcp_cksum(pd->m, pd->off, 3917 pd->tot_len - pd->off, pd->af)) 3918 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 3919 else { 3920 if (ctx.th->th_flags & TH_SYN) 3921 ack++; 3922 if (ctx.th->th_flags & TH_FIN) 3923 ack++; 3924 pf_send_tcp(r, pd->af, pd->dst, 3925 pd->src, ctx.th->th_dport, 3926 ctx.th->th_sport, ntohl(ctx.th->th_ack), 3927 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 3928 1, 0, pd->rdomain); 3929 } 3930 } else if ((pd->proto != IPPROTO_ICMP || 3931 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 3932 r->return_icmp) 3933 pf_send_icmp(pd->m, r->return_icmp >> 8, 3934 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 3935 else if ((pd->proto != IPPROTO_ICMPV6 || 3936 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 3937 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3938 r->return_icmp6) 3939 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3940 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 3941 } 3942 3943 if (r->action == PF_DROP) 3944 goto cleanup; 3945 3946 /* 3947 * If an expired "once" rule has not been purged, drop any new matching 3948 * packets. 3949 */ 3950 if (r->rule_flag & PFRULE_EXPIRED) 3951 goto cleanup; 3952 3953 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 3954 if (ctx.act.rtableid >= 0 && 3955 rtable_l2(ctx.act.rtableid) != pd->rdomain) 3956 pd->destchg = 1; 3957 3958 if (r->action == PF_PASS && pd->badopts != 0 && ! r->allow_opts) { 3959 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 3960 #if NPFLOG > 0 3961 pd->pflog |= PF_LOG_FORCE; 3962 #endif /* NPFLOG > 0 */ 3963 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3964 "ip/ipv6 options in pf_test_rule()"); 3965 goto cleanup; 3966 } 3967 3968 action = PF_PASS; 3969 3970 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3971 && !ctx.state_icmp && r->keep_state) { 3972 3973 if (r->rule_flag & PFRULE_SRCTRACK && 3974 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 3975 pd->af, pd->src, NULL, NULL) != 0) { 3976 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 3977 goto cleanup; 3978 } 3979 3980 if (r->max_states && (r->states_cur >= r->max_states)) { 3981 pf_status.lcounters[LCNT_STATES]++; 3982 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 3983 goto cleanup; 3984 } 3985 3986 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 3987 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); 3988 3989 if (action != PF_PASS) 3990 goto cleanup; 3991 if (sks != skw) { 3992 struct pf_state_key *sk; 3993 3994 if (pd->dir == PF_IN) 3995 sk = sks; 3996 else 3997 sk = skw; 3998 rewrite += pf_translate(pd, 3999 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 4000 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 4001 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 4002 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 4003 virtual_type, ctx.icmp_dir); 4004 } 4005 4006 #ifdef INET6 4007 if (rewrite && skw->af != sks->af) 4008 action = PF_AFRT; 4009 #endif /* INET6 */ 4010 4011 } else { 4012 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4013 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4014 pool_put(&pf_rule_item_pl, ctx.ri); 4015 } 4016 } 4017 4018 /* copy back packet headers if needed */ 4019 if (rewrite && pd->hdrlen) { 4020 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4021 } 4022 4023 if (r->rule_flag & PFRULE_ONCE) { 4024 u_int32_t rule_flag; 4025 4026 /* 4027 * Use atomic_cas() to determine a clear winner, which will 4028 * insert an expired rule to gcl. 4029 */ 4030 rule_flag = r->rule_flag; 4031 if (((rule_flag & PFRULE_EXPIRED) == 0) && 4032 atomic_cas_uint(&r->rule_flag, rule_flag, 4033 rule_flag | PFRULE_EXPIRED) == rule_flag) { 4034 r->exptime = gettime(); 4035 SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle); 4036 } 4037 } 4038 4039 #if NPFSYNC > 0 4040 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 4041 pd->dir == PF_OUT && pfsync_up()) { 4042 /* 4043 * We want the state created, but we dont 4044 * want to send this in case a partner 4045 * firewall has to know about it to allow 4046 * replies through it. 4047 */ 4048 if (pfsync_defer(*sm, pd->m, pdeferral)) 4049 return (PF_DEFER); 4050 } 4051 #endif /* NPFSYNC > 0 */ 4052 4053 return (action); 4054 4055 cleanup: 4056 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4057 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4058 pool_put(&pf_rule_item_pl, ctx.ri); 4059 } 4060 4061 return (action); 4062 } 4063 4064 static __inline int 4065 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 4066 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 4067 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 4068 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 4069 { 4070 struct pf_state *s = NULL; 4071 struct tcphdr *th = &pd->hdr.tcp; 4072 u_int16_t mss = tcp_mssdflt; 4073 u_short reason; 4074 u_int i; 4075 4076 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 4077 if (s == NULL) { 4078 REASON_SET(&reason, PFRES_MEMORY); 4079 goto csfailed; 4080 } 4081 s->rule.ptr = r; 4082 s->anchor.ptr = a; 4083 s->natrule.ptr = nr; 4084 if (r->allow_opts) 4085 s->state_flags |= PFSTATE_ALLOWOPTS; 4086 if (r->rule_flag & PFRULE_STATESLOPPY) 4087 s->state_flags |= PFSTATE_SLOPPY; 4088 if (r->rule_flag & PFRULE_PFLOW) 4089 s->state_flags |= PFSTATE_PFLOW; 4090 #if NPFLOG > 0 4091 s->log = act->log & PF_LOG_ALL; 4092 #endif /* NPFLOG > 0 */ 4093 s->qid = act->qid; 4094 s->pqid = act->pqid; 4095 s->rtableid[pd->didx] = act->rtableid; 4096 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 4097 s->min_ttl = act->min_ttl; 4098 s->set_tos = act->set_tos; 4099 s->max_mss = act->max_mss; 4100 s->state_flags |= act->flags; 4101 #if NPFSYNC > 0 4102 s->sync_state = PFSYNC_S_NONE; 4103 #endif /* NPFSYNC > 0 */ 4104 s->set_prio[0] = act->set_prio[0]; 4105 s->set_prio[1] = act->set_prio[1]; 4106 s->delay = act->delay; 4107 SLIST_INIT(&s->src_nodes); 4108 /* 4109 * must initialize refcnt, before pf_state_insert() gets called. 4110 * pf_state_inserts() grabs reference for pfsync! 4111 */ 4112 refcnt_init(&s->refcnt); 4113 4114 switch (pd->proto) { 4115 case IPPROTO_TCP: 4116 s->src.seqlo = ntohl(th->th_seq); 4117 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4118 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4119 r->keep_state == PF_STATE_MODULATE) { 4120 /* Generate sequence number modulator */ 4121 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4122 0) 4123 s->src.seqdiff = 1; 4124 pf_patch_32(pd, 4125 &th->th_seq, htonl(s->src.seqlo + s->src.seqdiff)); 4126 *rewrite = 1; 4127 } else 4128 s->src.seqdiff = 0; 4129 if (th->th_flags & TH_SYN) { 4130 s->src.seqhi++; 4131 s->src.wscale = pf_get_wscale(pd); 4132 } 4133 s->src.max_win = MAX(ntohs(th->th_win), 1); 4134 if (s->src.wscale & PF_WSCALE_MASK) { 4135 /* Remove scale factor from initial window */ 4136 int win = s->src.max_win; 4137 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4138 s->src.max_win = (win - 1) >> 4139 (s->src.wscale & PF_WSCALE_MASK); 4140 } 4141 if (th->th_flags & TH_FIN) 4142 s->src.seqhi++; 4143 s->dst.seqhi = 1; 4144 s->dst.max_win = 1; 4145 pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT); 4146 pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED); 4147 s->timeout = PFTM_TCP_FIRST_PACKET; 4148 pf_status.states_halfopen++; 4149 break; 4150 case IPPROTO_UDP: 4151 pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE); 4152 pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 4153 s->timeout = PFTM_UDP_FIRST_PACKET; 4154 break; 4155 case IPPROTO_ICMP: 4156 #ifdef INET6 4157 case IPPROTO_ICMPV6: 4158 #endif /* INET6 */ 4159 s->timeout = PFTM_ICMP_FIRST_PACKET; 4160 break; 4161 default: 4162 pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE); 4163 pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 4164 s->timeout = PFTM_OTHER_FIRST_PACKET; 4165 } 4166 4167 s->creation = getuptime(); 4168 s->expire = getuptime(); 4169 4170 if (pd->proto == IPPROTO_TCP) { 4171 if (s->state_flags & PFSTATE_SCRUB_TCP && 4172 pf_normalize_tcp_init(pd, &s->src)) { 4173 REASON_SET(&reason, PFRES_MEMORY); 4174 goto csfailed; 4175 } 4176 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 4177 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 4178 rewrite)) { 4179 /* This really shouldn't happen!!! */ 4180 DPFPRINTF(LOG_ERR, 4181 "%s: tcp normalize failed on first pkt", __func__); 4182 goto csfailed; 4183 } 4184 } 4185 s->direction = pd->dir; 4186 4187 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 4188 REASON_SET(&reason, PFRES_MEMORY); 4189 goto csfailed; 4190 } 4191 4192 if (pf_set_rt_ifp(s, pd->src, (*skw)->af, sns) != 0) { 4193 REASON_SET(&reason, PFRES_NOROUTE); 4194 goto csfailed; 4195 } 4196 4197 for (i = 0; i < PF_SN_MAX; i++) 4198 if (sns[i] != NULL) { 4199 struct pf_sn_item *sni; 4200 4201 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 4202 if (sni == NULL) { 4203 REASON_SET(&reason, PFRES_MEMORY); 4204 goto csfailed; 4205 } 4206 sni->sn = sns[i]; 4207 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 4208 sni->sn->states++; 4209 } 4210 4211 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { 4212 pf_detach_state(s); 4213 *sks = *skw = NULL; 4214 REASON_SET(&reason, PFRES_STATEINS); 4215 goto csfailed; 4216 } else 4217 *sm = s; 4218 4219 /* 4220 * Make state responsible for rules it binds here. 4221 */ 4222 memcpy(&s->match_rules, rules, sizeof(s->match_rules)); 4223 memset(rules, 0, sizeof(*rules)); 4224 STATE_INC_COUNTERS(s); 4225 4226 if (tag > 0) { 4227 pf_tag_ref(tag); 4228 s->tag = tag; 4229 } 4230 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4231 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { 4232 int rtid = pd->rdomain; 4233 if (act->rtableid >= 0) 4234 rtid = act->rtableid; 4235 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 4236 s->src.seqhi = arc4random(); 4237 /* Find mss option */ 4238 mss = pf_get_mss(pd); 4239 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 4240 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 4241 s->src.mss = mss; 4242 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4243 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4244 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); 4245 REASON_SET(&reason, PFRES_SYNPROXY); 4246 return (PF_SYNPROXY_DROP); 4247 } 4248 4249 return (PF_PASS); 4250 4251 csfailed: 4252 if (s) { 4253 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 4254 pf_src_tree_remove_state(s); 4255 pool_put(&pf_state_pl, s); 4256 } 4257 4258 for (i = 0; i < PF_SN_MAX; i++) 4259 if (sns[i] != NULL) 4260 pf_remove_src_node(sns[i]); 4261 4262 return (PF_DROP); 4263 } 4264 4265 int 4266 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4267 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4268 int icmp_dir) 4269 { 4270 int rewrite = 0; 4271 int afto = pd->af != pd->naf; 4272 4273 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4274 pd->destchg = 1; 4275 4276 switch (pd->proto) { 4277 case IPPROTO_TCP: /* FALLTHROUGH */ 4278 case IPPROTO_UDP: 4279 rewrite += pf_patch_16(pd, pd->sport, sport); 4280 rewrite += pf_patch_16(pd, pd->dport, dport); 4281 break; 4282 4283 case IPPROTO_ICMP: 4284 if (pd->af != AF_INET) 4285 return (0); 4286 4287 #ifdef INET6 4288 if (afto) { 4289 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 4290 return (0); 4291 pd->proto = IPPROTO_ICMPV6; 4292 rewrite = 1; 4293 } 4294 #endif /* INET6 */ 4295 if (virtual_type == htons(ICMP_ECHO)) { 4296 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4297 rewrite += pf_patch_16(pd, 4298 &pd->hdr.icmp.icmp_id, icmpid); 4299 } 4300 break; 4301 4302 #ifdef INET6 4303 case IPPROTO_ICMPV6: 4304 if (pd->af != AF_INET6) 4305 return (0); 4306 4307 if (afto) { 4308 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 4309 return (0); 4310 pd->proto = IPPROTO_ICMP; 4311 rewrite = 1; 4312 } 4313 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4314 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4315 rewrite += pf_patch_16(pd, 4316 &pd->hdr.icmp6.icmp6_id, icmpid); 4317 } 4318 break; 4319 #endif /* INET6 */ 4320 } 4321 4322 if (!afto) { 4323 rewrite += pf_translate_a(pd, pd->src, saddr); 4324 rewrite += pf_translate_a(pd, pd->dst, daddr); 4325 } 4326 4327 return (rewrite); 4328 } 4329 4330 int 4331 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4332 int *copyback, int reverse) 4333 { 4334 struct tcphdr *th = &pd->hdr.tcp; 4335 struct pf_state_peer *src, *dst; 4336 u_int16_t win = ntohs(th->th_win); 4337 u_int32_t ack, end, data_end, seq, orig_seq; 4338 u_int8_t sws, dws, psrc, pdst; 4339 int ackskew; 4340 4341 if ((pd->dir == (*state)->direction && !reverse) || 4342 (pd->dir != (*state)->direction && reverse)) { 4343 src = &(*state)->src; 4344 dst = &(*state)->dst; 4345 psrc = PF_PEER_SRC; 4346 pdst = PF_PEER_DST; 4347 } else { 4348 src = &(*state)->dst; 4349 dst = &(*state)->src; 4350 psrc = PF_PEER_DST; 4351 pdst = PF_PEER_SRC; 4352 } 4353 4354 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4355 sws = src->wscale & PF_WSCALE_MASK; 4356 dws = dst->wscale & PF_WSCALE_MASK; 4357 } else 4358 sws = dws = 0; 4359 4360 /* 4361 * Sequence tracking algorithm from Guido van Rooij's paper: 4362 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4363 * tcp_filtering.ps 4364 */ 4365 4366 orig_seq = seq = ntohl(th->th_seq); 4367 if (src->seqlo == 0) { 4368 /* First packet from this end. Set its state */ 4369 4370 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4371 src->scrub == NULL) { 4372 if (pf_normalize_tcp_init(pd, src)) { 4373 REASON_SET(reason, PFRES_MEMORY); 4374 return (PF_DROP); 4375 } 4376 } 4377 4378 /* Deferred generation of sequence number modulator */ 4379 if (dst->seqdiff && !src->seqdiff) { 4380 /* use random iss for the TCP server */ 4381 while ((src->seqdiff = arc4random() - seq) == 0) 4382 continue; 4383 ack = ntohl(th->th_ack) - dst->seqdiff; 4384 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4385 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4386 *copyback = 1; 4387 } else { 4388 ack = ntohl(th->th_ack); 4389 } 4390 4391 end = seq + pd->p_len; 4392 if (th->th_flags & TH_SYN) { 4393 end++; 4394 if (dst->wscale & PF_WSCALE_FLAG) { 4395 src->wscale = pf_get_wscale(pd); 4396 if (src->wscale & PF_WSCALE_FLAG) { 4397 /* Remove scale factor from initial 4398 * window */ 4399 sws = src->wscale & PF_WSCALE_MASK; 4400 win = ((u_int32_t)win + (1 << sws) - 1) 4401 >> sws; 4402 dws = dst->wscale & PF_WSCALE_MASK; 4403 } else { 4404 /* fixup other window */ 4405 dst->max_win = MIN(TCP_MAXWIN, 4406 (u_int32_t)dst->max_win << 4407 (dst->wscale & PF_WSCALE_MASK)); 4408 /* in case of a retrans SYN|ACK */ 4409 dst->wscale = 0; 4410 } 4411 } 4412 } 4413 data_end = end; 4414 if (th->th_flags & TH_FIN) 4415 end++; 4416 4417 src->seqlo = seq; 4418 if (src->state < TCPS_SYN_SENT) 4419 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4420 4421 /* 4422 * May need to slide the window (seqhi may have been set by 4423 * the crappy stack check or if we picked up the connection 4424 * after establishment) 4425 */ 4426 if (src->seqhi == 1 || 4427 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4428 src->seqhi = end + MAX(1, dst->max_win << dws); 4429 if (win > src->max_win) 4430 src->max_win = win; 4431 4432 } else { 4433 ack = ntohl(th->th_ack) - dst->seqdiff; 4434 if (src->seqdiff) { 4435 /* Modulate sequence numbers */ 4436 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4437 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4438 *copyback = 1; 4439 } 4440 end = seq + pd->p_len; 4441 if (th->th_flags & TH_SYN) 4442 end++; 4443 data_end = end; 4444 if (th->th_flags & TH_FIN) 4445 end++; 4446 } 4447 4448 if ((th->th_flags & TH_ACK) == 0) { 4449 /* Let it pass through the ack skew check */ 4450 ack = dst->seqlo; 4451 } else if ((ack == 0 && 4452 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4453 /* broken tcp stacks do not set ack */ 4454 (dst->state < TCPS_SYN_SENT)) { 4455 /* 4456 * Many stacks (ours included) will set the ACK number in an 4457 * FIN|ACK if the SYN times out -- no sequence to ACK. 4458 */ 4459 ack = dst->seqlo; 4460 } 4461 4462 if (seq == end) { 4463 /* Ease sequencing restrictions on no data packets */ 4464 seq = src->seqlo; 4465 data_end = end = seq; 4466 } 4467 4468 ackskew = dst->seqlo - ack; 4469 4470 4471 /* 4472 * Need to demodulate the sequence numbers in any TCP SACK options 4473 * (Selective ACK). We could optionally validate the SACK values 4474 * against the current ACK window, either forwards or backwards, but 4475 * I'm not confident that SACK has been implemented properly 4476 * everywhere. It wouldn't surprise me if several stacks accidently 4477 * SACK too far backwards of previously ACKed data. There really aren't 4478 * any security implications of bad SACKing unless the target stack 4479 * doesn't validate the option length correctly. Someone trying to 4480 * spoof into a TCP connection won't bother blindly sending SACK 4481 * options anyway. 4482 */ 4483 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4484 if (pf_modulate_sack(pd, dst)) 4485 *copyback = 1; 4486 } 4487 4488 4489 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4490 if (SEQ_GEQ(src->seqhi, data_end) && 4491 /* Last octet inside other's window space */ 4492 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4493 /* Retrans: not more than one window back */ 4494 (ackskew >= -MAXACKWINDOW) && 4495 /* Acking not more than one reassembled fragment backwards */ 4496 (ackskew <= (MAXACKWINDOW << sws)) && 4497 /* Acking not more than one window forward */ 4498 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4499 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4500 /* Require an exact/+1 sequence match on resets when possible */ 4501 4502 if (dst->scrub || src->scrub) { 4503 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4504 dst, copyback)) 4505 return (PF_DROP); 4506 } 4507 4508 /* update max window */ 4509 if (src->max_win < win) 4510 src->max_win = win; 4511 /* synchronize sequencing */ 4512 if (SEQ_GT(end, src->seqlo)) 4513 src->seqlo = end; 4514 /* slide the window of what the other end can send */ 4515 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4516 dst->seqhi = ack + MAX((win << sws), 1); 4517 4518 /* update states */ 4519 if (th->th_flags & TH_SYN) 4520 if (src->state < TCPS_SYN_SENT) 4521 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4522 if (th->th_flags & TH_FIN) 4523 if (src->state < TCPS_CLOSING) 4524 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4525 if (th->th_flags & TH_ACK) { 4526 if (dst->state == TCPS_SYN_SENT) { 4527 pf_set_protostate(*state, pdst, 4528 TCPS_ESTABLISHED); 4529 if (src->state == TCPS_ESTABLISHED && 4530 !SLIST_EMPTY(&(*state)->src_nodes) && 4531 pf_src_connlimit(state)) { 4532 REASON_SET(reason, PFRES_SRCLIMIT); 4533 return (PF_DROP); 4534 } 4535 } else if (dst->state == TCPS_CLOSING) 4536 pf_set_protostate(*state, pdst, 4537 TCPS_FIN_WAIT_2); 4538 } 4539 if (th->th_flags & TH_RST) 4540 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4541 4542 /* update expire time */ 4543 (*state)->expire = getuptime(); 4544 if (src->state >= TCPS_FIN_WAIT_2 && 4545 dst->state >= TCPS_FIN_WAIT_2) 4546 (*state)->timeout = PFTM_TCP_CLOSED; 4547 else if (src->state >= TCPS_CLOSING && 4548 dst->state >= TCPS_CLOSING) 4549 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4550 else if (src->state < TCPS_ESTABLISHED || 4551 dst->state < TCPS_ESTABLISHED) 4552 (*state)->timeout = PFTM_TCP_OPENING; 4553 else if (src->state >= TCPS_CLOSING || 4554 dst->state >= TCPS_CLOSING) 4555 (*state)->timeout = PFTM_TCP_CLOSING; 4556 else 4557 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4558 4559 /* Fall through to PASS packet */ 4560 } else if ((dst->state < TCPS_SYN_SENT || 4561 dst->state >= TCPS_FIN_WAIT_2 || 4562 src->state >= TCPS_FIN_WAIT_2) && 4563 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4564 /* Within a window forward of the originating packet */ 4565 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4566 /* Within a window backward of the originating packet */ 4567 4568 /* 4569 * This currently handles three situations: 4570 * 1) Stupid stacks will shotgun SYNs before their peer 4571 * replies. 4572 * 2) When PF catches an already established stream (the 4573 * firewall rebooted, the state table was flushed, routes 4574 * changed...) 4575 * 3) Packets get funky immediately after the connection 4576 * closes (this should catch Solaris spurious ACK|FINs 4577 * that web servers like to spew after a close) 4578 * 4579 * This must be a little more careful than the above code 4580 * since packet floods will also be caught here. We don't 4581 * update the TTL here to mitigate the damage of a packet 4582 * flood and so the same code can handle awkward establishment 4583 * and a loosened connection close. 4584 * In the establishment case, a correct peer response will 4585 * validate the connection, go through the normal state code 4586 * and keep updating the state TTL. 4587 */ 4588 4589 if (pf_status.debug >= LOG_NOTICE) { 4590 log(LOG_NOTICE, "pf: loose state match: "); 4591 pf_print_state(*state); 4592 pf_print_flags(th->th_flags); 4593 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4594 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4595 pd->p_len, ackskew, (*state)->packets[0], 4596 (*state)->packets[1], 4597 pd->dir == PF_IN ? "in" : "out", 4598 pd->dir == (*state)->direction ? "fwd" : "rev"); 4599 } 4600 4601 if (dst->scrub || src->scrub) { 4602 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4603 dst, copyback)) 4604 return (PF_DROP); 4605 } 4606 4607 /* update max window */ 4608 if (src->max_win < win) 4609 src->max_win = win; 4610 /* synchronize sequencing */ 4611 if (SEQ_GT(end, src->seqlo)) 4612 src->seqlo = end; 4613 /* slide the window of what the other end can send */ 4614 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4615 dst->seqhi = ack + MAX((win << sws), 1); 4616 4617 /* 4618 * Cannot set dst->seqhi here since this could be a shotgunned 4619 * SYN and not an already established connection. 4620 */ 4621 if (th->th_flags & TH_FIN) 4622 if (src->state < TCPS_CLOSING) 4623 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4624 if (th->th_flags & TH_RST) 4625 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4626 4627 /* Fall through to PASS packet */ 4628 } else { 4629 if ((*state)->dst.state == TCPS_SYN_SENT && 4630 (*state)->src.state == TCPS_SYN_SENT) { 4631 /* Send RST for state mismatches during handshake */ 4632 if (!(th->th_flags & TH_RST)) 4633 pf_send_tcp((*state)->rule.ptr, pd->af, 4634 pd->dst, pd->src, th->th_dport, 4635 th->th_sport, ntohl(th->th_ack), 0, 4636 TH_RST, 0, 0, 4637 (*state)->rule.ptr->return_ttl, 1, 0, 4638 pd->rdomain); 4639 src->seqlo = 0; 4640 src->seqhi = 1; 4641 src->max_win = 1; 4642 } else if (pf_status.debug >= LOG_NOTICE) { 4643 log(LOG_NOTICE, "pf: BAD state: "); 4644 pf_print_state(*state); 4645 pf_print_flags(th->th_flags); 4646 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4647 "pkts=%llu:%llu dir=%s,%s\n", 4648 seq, orig_seq, ack, pd->p_len, ackskew, 4649 (*state)->packets[0], (*state)->packets[1], 4650 pd->dir == PF_IN ? "in" : "out", 4651 pd->dir == (*state)->direction ? "fwd" : "rev"); 4652 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4653 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 4654 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4655 ' ': '2', 4656 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4657 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4658 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 4659 ' ' :'5', 4660 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4661 } 4662 REASON_SET(reason, PFRES_BADSTATE); 4663 return (PF_DROP); 4664 } 4665 4666 return (PF_PASS); 4667 } 4668 4669 int 4670 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **state, 4671 u_short *reason) 4672 { 4673 struct tcphdr *th = &pd->hdr.tcp; 4674 struct pf_state_peer *src, *dst; 4675 u_int8_t psrc, pdst; 4676 4677 if (pd->dir == (*state)->direction) { 4678 src = &(*state)->src; 4679 dst = &(*state)->dst; 4680 psrc = PF_PEER_SRC; 4681 pdst = PF_PEER_DST; 4682 } else { 4683 src = &(*state)->dst; 4684 dst = &(*state)->src; 4685 psrc = PF_PEER_DST; 4686 pdst = PF_PEER_SRC; 4687 } 4688 4689 if (th->th_flags & TH_SYN) 4690 if (src->state < TCPS_SYN_SENT) 4691 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4692 if (th->th_flags & TH_FIN) 4693 if (src->state < TCPS_CLOSING) 4694 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4695 if (th->th_flags & TH_ACK) { 4696 if (dst->state == TCPS_SYN_SENT) { 4697 pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); 4698 if (src->state == TCPS_ESTABLISHED && 4699 !SLIST_EMPTY(&(*state)->src_nodes) && 4700 pf_src_connlimit(state)) { 4701 REASON_SET(reason, PFRES_SRCLIMIT); 4702 return (PF_DROP); 4703 } 4704 } else if (dst->state == TCPS_CLOSING) { 4705 pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2); 4706 } else if (src->state == TCPS_SYN_SENT && 4707 dst->state < TCPS_SYN_SENT) { 4708 /* 4709 * Handle a special sloppy case where we only see one 4710 * half of the connection. If there is a ACK after 4711 * the initial SYN without ever seeing a packet from 4712 * the destination, set the connection to established. 4713 */ 4714 pf_set_protostate(*state, PF_PEER_BOTH, 4715 TCPS_ESTABLISHED); 4716 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4717 pf_src_connlimit(state)) { 4718 REASON_SET(reason, PFRES_SRCLIMIT); 4719 return (PF_DROP); 4720 } 4721 } else if (src->state == TCPS_CLOSING && 4722 dst->state == TCPS_ESTABLISHED && 4723 dst->seqlo == 0) { 4724 /* 4725 * Handle the closing of half connections where we 4726 * don't see the full bidirectional FIN/ACK+ACK 4727 * handshake. 4728 */ 4729 pf_set_protostate(*state, pdst, TCPS_CLOSING); 4730 } 4731 } 4732 if (th->th_flags & TH_RST) 4733 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4734 4735 /* update expire time */ 4736 (*state)->expire = getuptime(); 4737 if (src->state >= TCPS_FIN_WAIT_2 && 4738 dst->state >= TCPS_FIN_WAIT_2) 4739 (*state)->timeout = PFTM_TCP_CLOSED; 4740 else if (src->state >= TCPS_CLOSING && 4741 dst->state >= TCPS_CLOSING) 4742 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4743 else if (src->state < TCPS_ESTABLISHED || 4744 dst->state < TCPS_ESTABLISHED) 4745 (*state)->timeout = PFTM_TCP_OPENING; 4746 else if (src->state >= TCPS_CLOSING || 4747 dst->state >= TCPS_CLOSING) 4748 (*state)->timeout = PFTM_TCP_CLOSING; 4749 else 4750 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4751 4752 return (PF_PASS); 4753 } 4754 4755 static __inline int 4756 pf_synproxy(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4757 { 4758 struct pf_state_key *sk = (*state)->key[pd->didx]; 4759 4760 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4761 struct tcphdr *th = &pd->hdr.tcp; 4762 4763 if (pd->dir != (*state)->direction) { 4764 REASON_SET(reason, PFRES_SYNPROXY); 4765 return (PF_SYNPROXY_DROP); 4766 } 4767 if (th->th_flags & TH_SYN) { 4768 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4769 REASON_SET(reason, PFRES_SYNPROXY); 4770 return (PF_DROP); 4771 } 4772 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4773 pd->src, th->th_dport, th->th_sport, 4774 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4775 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4776 0, pd->rdomain); 4777 REASON_SET(reason, PFRES_SYNPROXY); 4778 return (PF_SYNPROXY_DROP); 4779 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 4780 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4781 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4782 REASON_SET(reason, PFRES_SYNPROXY); 4783 return (PF_DROP); 4784 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4785 pf_src_connlimit(state)) { 4786 REASON_SET(reason, PFRES_SRCLIMIT); 4787 return (PF_DROP); 4788 } else 4789 pf_set_protostate(*state, PF_PEER_SRC, 4790 PF_TCPS_PROXY_DST); 4791 } 4792 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4793 struct tcphdr *th = &pd->hdr.tcp; 4794 4795 if (pd->dir == (*state)->direction) { 4796 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4797 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4798 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4799 REASON_SET(reason, PFRES_SYNPROXY); 4800 return (PF_DROP); 4801 } 4802 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4803 if ((*state)->dst.seqhi == 1) 4804 (*state)->dst.seqhi = arc4random(); 4805 pf_send_tcp((*state)->rule.ptr, pd->af, 4806 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4807 sk->port[pd->sidx], sk->port[pd->didx], 4808 (*state)->dst.seqhi, 0, TH_SYN, 0, 4809 (*state)->src.mss, 0, 0, (*state)->tag, 4810 sk->rdomain); 4811 REASON_SET(reason, PFRES_SYNPROXY); 4812 return (PF_SYNPROXY_DROP); 4813 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4814 (TH_SYN|TH_ACK)) || 4815 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4816 REASON_SET(reason, PFRES_SYNPROXY); 4817 return (PF_DROP); 4818 } else { 4819 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4820 (*state)->dst.seqlo = ntohl(th->th_seq); 4821 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4822 pd->src, th->th_dport, th->th_sport, 4823 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4824 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4825 (*state)->tag, pd->rdomain); 4826 pf_send_tcp((*state)->rule.ptr, pd->af, 4827 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4828 sk->port[pd->sidx], sk->port[pd->didx], 4829 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4830 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4831 0, sk->rdomain); 4832 (*state)->src.seqdiff = (*state)->dst.seqhi - 4833 (*state)->src.seqlo; 4834 (*state)->dst.seqdiff = (*state)->src.seqhi - 4835 (*state)->dst.seqlo; 4836 (*state)->src.seqhi = (*state)->src.seqlo + 4837 (*state)->dst.max_win; 4838 (*state)->dst.seqhi = (*state)->dst.seqlo + 4839 (*state)->src.max_win; 4840 (*state)->src.wscale = (*state)->dst.wscale = 0; 4841 pf_set_protostate(*state, PF_PEER_BOTH, 4842 TCPS_ESTABLISHED); 4843 REASON_SET(reason, PFRES_SYNPROXY); 4844 return (PF_SYNPROXY_DROP); 4845 } 4846 } 4847 return (PF_PASS); 4848 } 4849 4850 int 4851 pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4852 { 4853 int copyback = 0; 4854 struct pf_state_peer *src, *dst; 4855 int action; 4856 struct inpcb *inp = pd->m->m_pkthdr.pf.inp; 4857 u_int8_t psrc, pdst; 4858 4859 action = PF_PASS; 4860 if (pd->dir == (*state)->direction) { 4861 src = &(*state)->src; 4862 dst = &(*state)->dst; 4863 psrc = PF_PEER_SRC; 4864 pdst = PF_PEER_DST; 4865 } else { 4866 src = &(*state)->dst; 4867 dst = &(*state)->src; 4868 psrc = PF_PEER_DST; 4869 pdst = PF_PEER_SRC; 4870 } 4871 4872 switch (pd->virtual_proto) { 4873 case IPPROTO_TCP: 4874 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) 4875 return (action); 4876 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 4877 4878 if (dst->state >= TCPS_FIN_WAIT_2 && 4879 src->state >= TCPS_FIN_WAIT_2) { 4880 if (pf_status.debug >= LOG_NOTICE) { 4881 log(LOG_NOTICE, "pf: state reuse "); 4882 pf_print_state(*state); 4883 pf_print_flags(pd->hdr.tcp.th_flags); 4884 addlog("\n"); 4885 } 4886 /* XXX make sure it's the same direction ?? */ 4887 (*state)->timeout = PFTM_PURGE; 4888 pf_state_unref(*state); 4889 *state = NULL; 4890 pf_mbuf_link_inpcb(pd->m, inp); 4891 return (PF_DROP); 4892 } else if (dst->state >= TCPS_ESTABLISHED && 4893 src->state >= TCPS_ESTABLISHED) { 4894 /* 4895 * SYN matches existing state??? 4896 * Typically happens when sender boots up after 4897 * sudden panic. Certain protocols (NFSv3) are 4898 * always using same port numbers. Challenge 4899 * ACK enables all parties (firewall and peers) 4900 * to get in sync again. 4901 */ 4902 pf_send_challenge_ack(pd, *state, src, dst); 4903 return (PF_DROP); 4904 } 4905 } 4906 4907 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4908 if (pf_tcp_track_sloppy(pd, state, reason) == PF_DROP) 4909 return (PF_DROP); 4910 } else { 4911 if (pf_tcp_track_full(pd, state, reason, ©back, 4912 PF_REVERSED_KEY((*state)->key, pd->af)) == PF_DROP) 4913 return (PF_DROP); 4914 } 4915 break; 4916 case IPPROTO_UDP: 4917 /* update states */ 4918 if (src->state < PFUDPS_SINGLE) 4919 pf_set_protostate(*state, psrc, PFUDPS_SINGLE); 4920 if (dst->state == PFUDPS_SINGLE) 4921 pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE); 4922 4923 /* update expire time */ 4924 (*state)->expire = getuptime(); 4925 if (src->state == PFUDPS_MULTIPLE && 4926 dst->state == PFUDPS_MULTIPLE) 4927 (*state)->timeout = PFTM_UDP_MULTIPLE; 4928 else 4929 (*state)->timeout = PFTM_UDP_SINGLE; 4930 break; 4931 default: 4932 /* update states */ 4933 if (src->state < PFOTHERS_SINGLE) 4934 pf_set_protostate(*state, psrc, PFOTHERS_SINGLE); 4935 if (dst->state == PFOTHERS_SINGLE) 4936 pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE); 4937 4938 /* update expire time */ 4939 (*state)->expire = getuptime(); 4940 if (src->state == PFOTHERS_MULTIPLE && 4941 dst->state == PFOTHERS_MULTIPLE) 4942 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4943 else 4944 (*state)->timeout = PFTM_OTHER_SINGLE; 4945 break; 4946 } 4947 4948 /* translate source/destination address, if necessary */ 4949 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4950 struct pf_state_key *nk; 4951 int afto, sidx, didx; 4952 4953 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4954 nk = (*state)->key[pd->sidx]; 4955 else 4956 nk = (*state)->key[pd->didx]; 4957 4958 afto = pd->af != nk->af; 4959 sidx = afto ? pd->didx : pd->sidx; 4960 didx = afto ? pd->sidx : pd->didx; 4961 4962 #ifdef INET6 4963 if (afto) { 4964 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 4965 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 4966 pd->naf = nk->af; 4967 action = PF_AFRT; 4968 } 4969 #endif /* INET6 */ 4970 4971 if (!afto) 4972 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 4973 4974 if (pd->sport != NULL) 4975 pf_patch_16(pd, pd->sport, nk->port[sidx]); 4976 4977 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4978 pd->rdomain != nk->rdomain) 4979 pd->destchg = 1; 4980 4981 if (!afto) 4982 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 4983 4984 if (pd->dport != NULL) 4985 pf_patch_16(pd, pd->dport, nk->port[didx]); 4986 4987 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4988 copyback = 1; 4989 } 4990 4991 if (copyback && pd->hdrlen > 0) { 4992 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4993 } 4994 4995 return (action); 4996 } 4997 4998 int 4999 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 5000 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 5001 int icmp_dir, int *iidx, int multi, int inner) 5002 { 5003 int direction, action; 5004 5005 key->af = pd->af; 5006 key->proto = pd->proto; 5007 key->rdomain = pd->rdomain; 5008 if (icmp_dir == PF_IN) { 5009 *iidx = pd->sidx; 5010 key->port[pd->sidx] = icmpid; 5011 key->port[pd->didx] = type; 5012 } else { 5013 *iidx = pd->didx; 5014 key->port[pd->sidx] = type; 5015 key->port[pd->didx] = icmpid; 5016 } 5017 5018 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 5019 pd->dst, pd->af, multi)) 5020 return (PF_DROP); 5021 5022 action = pf_find_state(pd, key, state); 5023 if (action != PF_MATCH) 5024 return (action); 5025 5026 if ((*state)->state_flags & PFSTATE_SLOPPY) 5027 return (-1); 5028 5029 /* Is this ICMP message flowing in right direction? */ 5030 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 5031 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 5032 PF_IN : PF_OUT; 5033 else 5034 direction = (*state)->direction; 5035 if ((((!inner && direction == pd->dir) || 5036 (inner && direction != pd->dir)) ? 5037 PF_IN : PF_OUT) != icmp_dir) { 5038 if (pf_status.debug >= LOG_NOTICE) { 5039 log(LOG_NOTICE, 5040 "pf: icmp type %d in wrong direction (%d): ", 5041 ntohs(type), icmp_dir); 5042 pf_print_state(*state); 5043 addlog("\n"); 5044 } 5045 return (PF_DROP); 5046 } 5047 return (-1); 5048 } 5049 5050 int 5051 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 5052 u_short *reason) 5053 { 5054 u_int16_t virtual_id, virtual_type; 5055 u_int8_t icmptype, icmpcode; 5056 int icmp_dir, iidx, ret, copyback = 0; 5057 5058 struct pf_state_key_cmp key; 5059 5060 switch (pd->proto) { 5061 case IPPROTO_ICMP: 5062 icmptype = pd->hdr.icmp.icmp_type; 5063 icmpcode = pd->hdr.icmp.icmp_code; 5064 break; 5065 #ifdef INET6 5066 case IPPROTO_ICMPV6: 5067 icmptype = pd->hdr.icmp6.icmp6_type; 5068 icmpcode = pd->hdr.icmp6.icmp6_code; 5069 break; 5070 #endif /* INET6 */ 5071 default: 5072 panic("unhandled proto %d", pd->proto); 5073 } 5074 5075 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 5076 &virtual_type) == 0) { 5077 /* 5078 * ICMP query/reply message not related to a TCP/UDP packet. 5079 * Search for an ICMP state. 5080 */ 5081 ret = pf_icmp_state_lookup(pd, &key, state, 5082 virtual_id, virtual_type, icmp_dir, &iidx, 5083 0, 0); 5084 /* IPv6? try matching a multicast address */ 5085 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 5086 ret = pf_icmp_state_lookup(pd, &key, state, virtual_id, 5087 virtual_type, icmp_dir, &iidx, 1, 0); 5088 if (ret >= 0) 5089 return (ret); 5090 5091 (*state)->expire = getuptime(); 5092 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5093 5094 /* translate source/destination address, if necessary */ 5095 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5096 struct pf_state_key *nk; 5097 int afto, sidx, didx; 5098 5099 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5100 nk = (*state)->key[pd->sidx]; 5101 else 5102 nk = (*state)->key[pd->didx]; 5103 5104 afto = pd->af != nk->af; 5105 sidx = afto ? pd->didx : pd->sidx; 5106 didx = afto ? pd->sidx : pd->didx; 5107 iidx = afto ? !iidx : iidx; 5108 #ifdef INET6 5109 if (afto) { 5110 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 5111 nk->af); 5112 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 5113 nk->af); 5114 pd->naf = nk->af; 5115 } 5116 #endif /* INET6 */ 5117 if (!afto) { 5118 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5119 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5120 } 5121 5122 if (pd->rdomain != nk->rdomain) 5123 pd->destchg = 1; 5124 if (!afto && PF_ANEQ(pd->dst, 5125 &nk->addr[didx], pd->af)) 5126 pd->destchg = 1; 5127 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5128 5129 switch (pd->af) { 5130 case AF_INET: 5131 #ifdef INET6 5132 if (afto) { 5133 if (pf_translate_icmp_af(pd, AF_INET6, 5134 &pd->hdr.icmp)) 5135 return (PF_DROP); 5136 pd->proto = IPPROTO_ICMPV6; 5137 } 5138 #endif /* INET6 */ 5139 pf_patch_16(pd, 5140 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 5141 5142 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5143 &pd->hdr.icmp, M_NOWAIT); 5144 copyback = 1; 5145 break; 5146 #ifdef INET6 5147 case AF_INET6: 5148 if (afto) { 5149 if (pf_translate_icmp_af(pd, AF_INET, 5150 &pd->hdr.icmp6)) 5151 return (PF_DROP); 5152 pd->proto = IPPROTO_ICMP; 5153 } 5154 5155 pf_patch_16(pd, 5156 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 5157 5158 m_copyback(pd->m, pd->off, 5159 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5160 M_NOWAIT); 5161 copyback = 1; 5162 break; 5163 #endif /* INET6 */ 5164 } 5165 #ifdef INET6 5166 if (afto) 5167 return (PF_AFRT); 5168 #endif /* INET6 */ 5169 } 5170 } else { 5171 /* 5172 * ICMP error message in response to a TCP/UDP packet. 5173 * Extract the inner TCP/UDP header and search for that state. 5174 */ 5175 struct pf_pdesc pd2; 5176 struct ip h2; 5177 #ifdef INET6 5178 struct ip6_hdr h2_6; 5179 #endif /* INET6 */ 5180 int ipoff2; 5181 5182 /* Initialize pd2 fields valid for both packets with pd. */ 5183 memset(&pd2, 0, sizeof(pd2)); 5184 pd2.af = pd->af; 5185 pd2.dir = pd->dir; 5186 pd2.kif = pd->kif; 5187 pd2.m = pd->m; 5188 pd2.rdomain = pd->rdomain; 5189 /* Payload packet is from the opposite direction. */ 5190 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 5191 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 5192 switch (pd->af) { 5193 case AF_INET: 5194 /* offset of h2 in mbuf chain */ 5195 ipoff2 = pd->off + ICMP_MINLEN; 5196 5197 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 5198 NULL, reason, pd2.af)) { 5199 DPFPRINTF(LOG_NOTICE, 5200 "ICMP error message too short (ip)"); 5201 return (PF_DROP); 5202 } 5203 /* 5204 * ICMP error messages don't refer to non-first 5205 * fragments 5206 */ 5207 if (h2.ip_off & htons(IP_OFFMASK)) { 5208 REASON_SET(reason, PFRES_FRAG); 5209 return (PF_DROP); 5210 } 5211 5212 /* offset of protocol header that follows h2 */ 5213 pd2.off = ipoff2; 5214 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 5215 return (PF_DROP); 5216 5217 pd2.tot_len = ntohs(h2.ip_len); 5218 pd2.src = (struct pf_addr *)&h2.ip_src; 5219 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5220 break; 5221 #ifdef INET6 5222 case AF_INET6: 5223 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 5224 5225 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 5226 NULL, reason, pd2.af)) { 5227 DPFPRINTF(LOG_NOTICE, 5228 "ICMP error message too short (ip6)"); 5229 return (PF_DROP); 5230 } 5231 5232 pd2.off = ipoff2; 5233 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 5234 return (PF_DROP); 5235 5236 pd2.tot_len = ntohs(h2_6.ip6_plen) + 5237 sizeof(struct ip6_hdr); 5238 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5239 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5240 break; 5241 #endif /* INET6 */ 5242 default: 5243 unhandled_af(pd->af); 5244 } 5245 5246 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 5247 if (pf_status.debug >= LOG_NOTICE) { 5248 log(LOG_NOTICE, 5249 "pf: BAD ICMP %d:%d outer dst: ", 5250 icmptype, icmpcode); 5251 pf_print_host(pd->src, 0, pd->af); 5252 addlog(" -> "); 5253 pf_print_host(pd->dst, 0, pd->af); 5254 addlog(" inner src: "); 5255 pf_print_host(pd2.src, 0, pd2.af); 5256 addlog(" -> "); 5257 pf_print_host(pd2.dst, 0, pd2.af); 5258 addlog("\n"); 5259 } 5260 REASON_SET(reason, PFRES_BADSTATE); 5261 return (PF_DROP); 5262 } 5263 5264 switch (pd2.proto) { 5265 case IPPROTO_TCP: { 5266 struct tcphdr *th = &pd2.hdr.tcp; 5267 u_int32_t seq; 5268 struct pf_state_peer *src, *dst; 5269 u_int8_t dws; 5270 int action; 5271 5272 /* 5273 * Only the first 8 bytes of the TCP header can be 5274 * expected. Don't access any TCP header fields after 5275 * th_seq, an ackskew test is not possible. 5276 */ 5277 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason, 5278 pd2.af)) { 5279 DPFPRINTF(LOG_NOTICE, 5280 "ICMP error message too short (tcp)"); 5281 return (PF_DROP); 5282 } 5283 5284 key.af = pd2.af; 5285 key.proto = IPPROTO_TCP; 5286 key.rdomain = pd2.rdomain; 5287 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5288 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5289 key.port[pd2.sidx] = th->th_sport; 5290 key.port[pd2.didx] = th->th_dport; 5291 5292 action = pf_find_state(&pd2, &key, state); 5293 if (action != PF_MATCH) 5294 return (action); 5295 5296 if (pd2.dir == (*state)->direction) { 5297 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5298 src = &(*state)->src; 5299 dst = &(*state)->dst; 5300 } else { 5301 src = &(*state)->dst; 5302 dst = &(*state)->src; 5303 } 5304 } else { 5305 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5306 src = &(*state)->dst; 5307 dst = &(*state)->src; 5308 } else { 5309 src = &(*state)->src; 5310 dst = &(*state)->dst; 5311 } 5312 } 5313 5314 if (src->wscale && dst->wscale) 5315 dws = dst->wscale & PF_WSCALE_MASK; 5316 else 5317 dws = 0; 5318 5319 /* Demodulate sequence number */ 5320 seq = ntohl(th->th_seq) - src->seqdiff; 5321 if (src->seqdiff) { 5322 pf_patch_32(pd, &th->th_seq, htonl(seq)); 5323 copyback = 1; 5324 } 5325 5326 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5327 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5328 src->seqlo - (dst->max_win << dws)))) { 5329 if (pf_status.debug >= LOG_NOTICE) { 5330 log(LOG_NOTICE, 5331 "pf: BAD ICMP %d:%d ", 5332 icmptype, icmpcode); 5333 pf_print_host(pd->src, 0, pd->af); 5334 addlog(" -> "); 5335 pf_print_host(pd->dst, 0, pd->af); 5336 addlog(" state: "); 5337 pf_print_state(*state); 5338 addlog(" seq=%u\n", seq); 5339 } 5340 REASON_SET(reason, PFRES_BADSTATE); 5341 return (PF_DROP); 5342 } else { 5343 if (pf_status.debug >= LOG_DEBUG) { 5344 log(LOG_DEBUG, 5345 "pf: OK ICMP %d:%d ", 5346 icmptype, icmpcode); 5347 pf_print_host(pd->src, 0, pd->af); 5348 addlog(" -> "); 5349 pf_print_host(pd->dst, 0, pd->af); 5350 addlog(" state: "); 5351 pf_print_state(*state); 5352 addlog(" seq=%u\n", seq); 5353 } 5354 } 5355 5356 /* translate source/destination address, if necessary */ 5357 if ((*state)->key[PF_SK_WIRE] != 5358 (*state)->key[PF_SK_STACK]) { 5359 struct pf_state_key *nk; 5360 int afto, sidx, didx; 5361 5362 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5363 nk = (*state)->key[pd->sidx]; 5364 else 5365 nk = (*state)->key[pd->didx]; 5366 5367 afto = pd->af != nk->af; 5368 sidx = afto ? pd2.didx : pd2.sidx; 5369 didx = afto ? pd2.sidx : pd2.didx; 5370 5371 #ifdef INET6 5372 if (afto) { 5373 if (pf_translate_icmp_af(pd, nk->af, 5374 &pd->hdr.icmp)) 5375 return (PF_DROP); 5376 m_copyback(pd->m, pd->off, 5377 sizeof(struct icmp6_hdr), 5378 &pd->hdr.icmp6, M_NOWAIT); 5379 if (pf_change_icmp_af(pd->m, ipoff2, 5380 pd, &pd2, &nk->addr[sidx], 5381 &nk->addr[didx], pd->af, nk->af)) 5382 return (PF_DROP); 5383 if (nk->af == AF_INET) 5384 pd->proto = IPPROTO_ICMP; 5385 else 5386 pd->proto = IPPROTO_ICMPV6; 5387 pd->m->m_pkthdr.ph_rtableid = 5388 nk->rdomain; 5389 pd->destchg = 1; 5390 pf_addrcpy(&pd->nsaddr, 5391 &nk->addr[pd2.sidx], nk->af); 5392 pf_addrcpy(&pd->ndaddr, 5393 &nk->addr[pd2.didx], nk->af); 5394 pd->naf = nk->af; 5395 5396 pf_patch_16(pd, 5397 &th->th_sport, nk->port[sidx]); 5398 pf_patch_16(pd, 5399 &th->th_dport, nk->port[didx]); 5400 5401 m_copyback(pd2.m, pd2.off, 8, th, 5402 M_NOWAIT); 5403 return (PF_AFRT); 5404 } 5405 #endif /* INET6 */ 5406 if (PF_ANEQ(pd2.src, 5407 &nk->addr[pd2.sidx], pd2.af) || 5408 nk->port[pd2.sidx] != th->th_sport) 5409 pf_translate_icmp(pd, pd2.src, 5410 &th->th_sport, pd->dst, 5411 &nk->addr[pd2.sidx], 5412 nk->port[pd2.sidx]); 5413 5414 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5415 pd2.af) || pd2.rdomain != nk->rdomain) 5416 pd->destchg = 1; 5417 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5418 5419 if (PF_ANEQ(pd2.dst, 5420 &nk->addr[pd2.didx], pd2.af) || 5421 nk->port[pd2.didx] != th->th_dport) 5422 pf_translate_icmp(pd, pd2.dst, 5423 &th->th_dport, pd->src, 5424 &nk->addr[pd2.didx], 5425 nk->port[pd2.didx]); 5426 copyback = 1; 5427 } 5428 5429 if (copyback) { 5430 switch (pd2.af) { 5431 case AF_INET: 5432 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5433 &pd->hdr.icmp, M_NOWAIT); 5434 m_copyback(pd2.m, ipoff2, sizeof(h2), 5435 &h2, M_NOWAIT); 5436 break; 5437 #ifdef INET6 5438 case AF_INET6: 5439 m_copyback(pd->m, pd->off, 5440 sizeof(struct icmp6_hdr), 5441 &pd->hdr.icmp6, M_NOWAIT); 5442 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5443 &h2_6, M_NOWAIT); 5444 break; 5445 #endif /* INET6 */ 5446 } 5447 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 5448 } 5449 break; 5450 } 5451 case IPPROTO_UDP: { 5452 struct udphdr *uh = &pd2.hdr.udp; 5453 int action; 5454 5455 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 5456 NULL, reason, pd2.af)) { 5457 DPFPRINTF(LOG_NOTICE, 5458 "ICMP error message too short (udp)"); 5459 return (PF_DROP); 5460 } 5461 5462 key.af = pd2.af; 5463 key.proto = IPPROTO_UDP; 5464 key.rdomain = pd2.rdomain; 5465 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5466 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5467 key.port[pd2.sidx] = uh->uh_sport; 5468 key.port[pd2.didx] = uh->uh_dport; 5469 5470 action = pf_find_state(&pd2, &key, state); 5471 if (action != PF_MATCH) 5472 return (action); 5473 5474 /* translate source/destination address, if necessary */ 5475 if ((*state)->key[PF_SK_WIRE] != 5476 (*state)->key[PF_SK_STACK]) { 5477 struct pf_state_key *nk; 5478 int afto, sidx, didx; 5479 5480 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5481 nk = (*state)->key[pd->sidx]; 5482 else 5483 nk = (*state)->key[pd->didx]; 5484 5485 afto = pd->af != nk->af; 5486 sidx = afto ? pd2.didx : pd2.sidx; 5487 didx = afto ? pd2.sidx : pd2.didx; 5488 5489 #ifdef INET6 5490 if (afto) { 5491 if (pf_translate_icmp_af(pd, nk->af, 5492 &pd->hdr.icmp)) 5493 return (PF_DROP); 5494 m_copyback(pd->m, pd->off, 5495 sizeof(struct icmp6_hdr), 5496 &pd->hdr.icmp6, M_NOWAIT); 5497 if (pf_change_icmp_af(pd->m, ipoff2, 5498 pd, &pd2, &nk->addr[sidx], 5499 &nk->addr[didx], pd->af, nk->af)) 5500 return (PF_DROP); 5501 if (nk->af == AF_INET) 5502 pd->proto = IPPROTO_ICMP; 5503 else 5504 pd->proto = IPPROTO_ICMPV6; 5505 pd->m->m_pkthdr.ph_rtableid = 5506 nk->rdomain; 5507 pd->destchg = 1; 5508 pf_addrcpy(&pd->nsaddr, 5509 &nk->addr[pd2.sidx], nk->af); 5510 pf_addrcpy(&pd->ndaddr, 5511 &nk->addr[pd2.didx], nk->af); 5512 pd->naf = nk->af; 5513 5514 pf_patch_16(pd, 5515 &uh->uh_sport, nk->port[sidx]); 5516 pf_patch_16(pd, 5517 &uh->uh_dport, nk->port[didx]); 5518 5519 m_copyback(pd2.m, pd2.off, sizeof(*uh), 5520 uh, M_NOWAIT); 5521 return (PF_AFRT); 5522 } 5523 #endif /* INET6 */ 5524 5525 if (PF_ANEQ(pd2.src, 5526 &nk->addr[pd2.sidx], pd2.af) || 5527 nk->port[pd2.sidx] != uh->uh_sport) 5528 pf_translate_icmp(pd, pd2.src, 5529 &uh->uh_sport, pd->dst, 5530 &nk->addr[pd2.sidx], 5531 nk->port[pd2.sidx]); 5532 5533 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5534 pd2.af) || pd2.rdomain != nk->rdomain) 5535 pd->destchg = 1; 5536 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5537 5538 if (PF_ANEQ(pd2.dst, 5539 &nk->addr[pd2.didx], pd2.af) || 5540 nk->port[pd2.didx] != uh->uh_dport) 5541 pf_translate_icmp(pd, pd2.dst, 5542 &uh->uh_dport, pd->src, 5543 &nk->addr[pd2.didx], 5544 nk->port[pd2.didx]); 5545 5546 switch (pd2.af) { 5547 case AF_INET: 5548 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5549 &pd->hdr.icmp, M_NOWAIT); 5550 m_copyback(pd2.m, ipoff2, sizeof(h2), 5551 &h2, M_NOWAIT); 5552 break; 5553 #ifdef INET6 5554 case AF_INET6: 5555 m_copyback(pd->m, pd->off, 5556 sizeof(struct icmp6_hdr), 5557 &pd->hdr.icmp6, M_NOWAIT); 5558 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5559 &h2_6, M_NOWAIT); 5560 break; 5561 #endif /* INET6 */ 5562 } 5563 /* Avoid recomputing quoted UDP checksum. 5564 * note: udp6 0 csum invalid per rfc2460 p27. 5565 * but presumed nothing cares in this context */ 5566 pf_patch_16(pd, &uh->uh_sum, 0); 5567 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 5568 M_NOWAIT); 5569 copyback = 1; 5570 } 5571 break; 5572 } 5573 case IPPROTO_ICMP: { 5574 struct icmp *iih = &pd2.hdr.icmp; 5575 5576 if (pd2.af != AF_INET) { 5577 REASON_SET(reason, PFRES_NORM); 5578 return (PF_DROP); 5579 } 5580 5581 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 5582 NULL, reason, pd2.af)) { 5583 DPFPRINTF(LOG_NOTICE, 5584 "ICMP error message too short (icmp)"); 5585 return (PF_DROP); 5586 } 5587 5588 pf_icmp_mapping(&pd2, iih->icmp_type, 5589 &icmp_dir, &virtual_id, &virtual_type); 5590 5591 ret = pf_icmp_state_lookup(&pd2, &key, state, 5592 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5593 if (ret >= 0) 5594 return (ret); 5595 5596 /* translate source/destination address, if necessary */ 5597 if ((*state)->key[PF_SK_WIRE] != 5598 (*state)->key[PF_SK_STACK]) { 5599 struct pf_state_key *nk; 5600 int afto, sidx, didx; 5601 5602 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5603 nk = (*state)->key[pd->sidx]; 5604 else 5605 nk = (*state)->key[pd->didx]; 5606 5607 afto = pd->af != nk->af; 5608 sidx = afto ? pd2.didx : pd2.sidx; 5609 didx = afto ? pd2.sidx : pd2.didx; 5610 iidx = afto ? !iidx : iidx; 5611 5612 #ifdef INET6 5613 if (afto) { 5614 if (nk->af != AF_INET6) 5615 return (PF_DROP); 5616 if (pf_translate_icmp_af(pd, nk->af, 5617 &pd->hdr.icmp)) 5618 return (PF_DROP); 5619 m_copyback(pd->m, pd->off, 5620 sizeof(struct icmp6_hdr), 5621 &pd->hdr.icmp6, M_NOWAIT); 5622 if (pf_change_icmp_af(pd->m, ipoff2, 5623 pd, &pd2, &nk->addr[sidx], 5624 &nk->addr[didx], pd->af, nk->af)) 5625 return (PF_DROP); 5626 pd->proto = IPPROTO_ICMPV6; 5627 if (pf_translate_icmp_af(pd, 5628 nk->af, iih)) 5629 return (PF_DROP); 5630 if (virtual_type == htons(ICMP_ECHO)) 5631 pf_patch_16(pd, &iih->icmp_id, 5632 nk->port[iidx]); 5633 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5634 iih, M_NOWAIT); 5635 pd->m->m_pkthdr.ph_rtableid = 5636 nk->rdomain; 5637 pd->destchg = 1; 5638 pf_addrcpy(&pd->nsaddr, 5639 &nk->addr[pd2.sidx], nk->af); 5640 pf_addrcpy(&pd->ndaddr, 5641 &nk->addr[pd2.didx], nk->af); 5642 pd->naf = nk->af; 5643 return (PF_AFRT); 5644 } 5645 #endif /* INET6 */ 5646 5647 if (PF_ANEQ(pd2.src, 5648 &nk->addr[pd2.sidx], pd2.af) || 5649 (virtual_type == htons(ICMP_ECHO) && 5650 nk->port[iidx] != iih->icmp_id)) 5651 pf_translate_icmp(pd, pd2.src, 5652 (virtual_type == htons(ICMP_ECHO)) ? 5653 &iih->icmp_id : NULL, 5654 pd->dst, &nk->addr[pd2.sidx], 5655 (virtual_type == htons(ICMP_ECHO)) ? 5656 nk->port[iidx] : 0); 5657 5658 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5659 pd2.af) || pd2.rdomain != nk->rdomain) 5660 pd->destchg = 1; 5661 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5662 5663 if (PF_ANEQ(pd2.dst, 5664 &nk->addr[pd2.didx], pd2.af)) 5665 pf_translate_icmp(pd, pd2.dst, NULL, 5666 pd->src, &nk->addr[pd2.didx], 0); 5667 5668 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5669 &pd->hdr.icmp, M_NOWAIT); 5670 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5671 M_NOWAIT); 5672 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 5673 M_NOWAIT); 5674 copyback = 1; 5675 } 5676 break; 5677 } 5678 #ifdef INET6 5679 case IPPROTO_ICMPV6: { 5680 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 5681 5682 if (pd2.af != AF_INET6) { 5683 REASON_SET(reason, PFRES_NORM); 5684 return (PF_DROP); 5685 } 5686 5687 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 5688 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5689 DPFPRINTF(LOG_NOTICE, 5690 "ICMP error message too short (icmp6)"); 5691 return (PF_DROP); 5692 } 5693 5694 pf_icmp_mapping(&pd2, iih->icmp6_type, 5695 &icmp_dir, &virtual_id, &virtual_type); 5696 ret = pf_icmp_state_lookup(&pd2, &key, state, 5697 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5698 /* IPv6? try matching a multicast address */ 5699 if (ret == PF_DROP && pd2.af == AF_INET6 && 5700 icmp_dir == PF_OUT) 5701 ret = pf_icmp_state_lookup(&pd2, &key, state, 5702 virtual_id, virtual_type, icmp_dir, &iidx, 5703 1, 1); 5704 if (ret >= 0) 5705 return (ret); 5706 5707 /* translate source/destination address, if necessary */ 5708 if ((*state)->key[PF_SK_WIRE] != 5709 (*state)->key[PF_SK_STACK]) { 5710 struct pf_state_key *nk; 5711 int afto, sidx, didx; 5712 5713 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5714 nk = (*state)->key[pd->sidx]; 5715 else 5716 nk = (*state)->key[pd->didx]; 5717 5718 afto = pd->af != nk->af; 5719 sidx = afto ? pd2.didx : pd2.sidx; 5720 didx = afto ? pd2.sidx : pd2.didx; 5721 iidx = afto ? !iidx : iidx; 5722 5723 if (afto) { 5724 if (nk->af != AF_INET) 5725 return (PF_DROP); 5726 if (pf_translate_icmp_af(pd, nk->af, 5727 &pd->hdr.icmp)) 5728 return (PF_DROP); 5729 m_copyback(pd->m, pd->off, 5730 sizeof(struct icmp6_hdr), 5731 &pd->hdr.icmp6, M_NOWAIT); 5732 if (pf_change_icmp_af(pd->m, ipoff2, 5733 pd, &pd2, &nk->addr[sidx], 5734 &nk->addr[didx], pd->af, nk->af)) 5735 return (PF_DROP); 5736 pd->proto = IPPROTO_ICMP; 5737 if (pf_translate_icmp_af(pd, 5738 nk->af, iih)) 5739 return (PF_DROP); 5740 if (virtual_type == 5741 htons(ICMP6_ECHO_REQUEST)) 5742 pf_patch_16(pd, &iih->icmp6_id, 5743 nk->port[iidx]); 5744 m_copyback(pd2.m, pd2.off, 5745 sizeof(struct icmp6_hdr), iih, 5746 M_NOWAIT); 5747 pd->m->m_pkthdr.ph_rtableid = 5748 nk->rdomain; 5749 pd->destchg = 1; 5750 pf_addrcpy(&pd->nsaddr, 5751 &nk->addr[pd2.sidx], nk->af); 5752 pf_addrcpy(&pd->ndaddr, 5753 &nk->addr[pd2.didx], nk->af); 5754 pd->naf = nk->af; 5755 return (PF_AFRT); 5756 } 5757 5758 if (PF_ANEQ(pd2.src, 5759 &nk->addr[pd2.sidx], pd2.af) || 5760 ((virtual_type == 5761 htons(ICMP6_ECHO_REQUEST)) && 5762 nk->port[pd2.sidx] != iih->icmp6_id)) 5763 pf_translate_icmp(pd, pd2.src, 5764 (virtual_type == 5765 htons(ICMP6_ECHO_REQUEST)) 5766 ? &iih->icmp6_id : NULL, 5767 pd->dst, &nk->addr[pd2.sidx], 5768 (virtual_type == 5769 htons(ICMP6_ECHO_REQUEST)) 5770 ? nk->port[iidx] : 0); 5771 5772 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5773 pd2.af) || pd2.rdomain != nk->rdomain) 5774 pd->destchg = 1; 5775 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5776 5777 if (PF_ANEQ(pd2.dst, 5778 &nk->addr[pd2.didx], pd2.af)) 5779 pf_translate_icmp(pd, pd2.dst, NULL, 5780 pd->src, &nk->addr[pd2.didx], 0); 5781 5782 m_copyback(pd->m, pd->off, 5783 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5784 M_NOWAIT); 5785 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5786 M_NOWAIT); 5787 m_copyback(pd2.m, pd2.off, 5788 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 5789 copyback = 1; 5790 } 5791 break; 5792 } 5793 #endif /* INET6 */ 5794 default: { 5795 int action; 5796 5797 key.af = pd2.af; 5798 key.proto = pd2.proto; 5799 key.rdomain = pd2.rdomain; 5800 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5801 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5802 key.port[0] = key.port[1] = 0; 5803 5804 action = pf_find_state(&pd2, &key, state); 5805 if (action != PF_MATCH) 5806 return (action); 5807 5808 /* translate source/destination address, if necessary */ 5809 if ((*state)->key[PF_SK_WIRE] != 5810 (*state)->key[PF_SK_STACK]) { 5811 struct pf_state_key *nk = 5812 (*state)->key[pd->didx]; 5813 5814 if (PF_ANEQ(pd2.src, 5815 &nk->addr[pd2.sidx], pd2.af)) 5816 pf_translate_icmp(pd, pd2.src, NULL, 5817 pd->dst, &nk->addr[pd2.sidx], 0); 5818 5819 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5820 pd2.af) || pd2.rdomain != nk->rdomain) 5821 pd->destchg = 1; 5822 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5823 5824 if (PF_ANEQ(pd2.dst, 5825 &nk->addr[pd2.didx], pd2.af)) 5826 pf_translate_icmp(pd, pd2.dst, NULL, 5827 pd->src, &nk->addr[pd2.didx], 0); 5828 5829 switch (pd2.af) { 5830 case AF_INET: 5831 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5832 &pd->hdr.icmp, M_NOWAIT); 5833 m_copyback(pd2.m, ipoff2, sizeof(h2), 5834 &h2, M_NOWAIT); 5835 break; 5836 #ifdef INET6 5837 case AF_INET6: 5838 m_copyback(pd->m, pd->off, 5839 sizeof(struct icmp6_hdr), 5840 &pd->hdr.icmp6, M_NOWAIT); 5841 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5842 &h2_6, M_NOWAIT); 5843 break; 5844 #endif /* INET6 */ 5845 } 5846 copyback = 1; 5847 } 5848 break; 5849 } 5850 } 5851 } 5852 if (copyback) { 5853 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5854 } 5855 5856 return (PF_PASS); 5857 } 5858 5859 /* 5860 * ipoff and off are measured from the start of the mbuf chain. 5861 * h must be at "ipoff" on the mbuf chain. 5862 */ 5863 void * 5864 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5865 u_short *actionp, u_short *reasonp, sa_family_t af) 5866 { 5867 int iplen = 0; 5868 5869 switch (af) { 5870 case AF_INET: { 5871 struct ip *h = mtod(m, struct ip *); 5872 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5873 5874 if (fragoff) { 5875 if (fragoff >= len) 5876 ACTION_SET(actionp, PF_PASS); 5877 else { 5878 ACTION_SET(actionp, PF_DROP); 5879 REASON_SET(reasonp, PFRES_FRAG); 5880 } 5881 return (NULL); 5882 } 5883 iplen = ntohs(h->ip_len); 5884 break; 5885 } 5886 #ifdef INET6 5887 case AF_INET6: { 5888 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5889 5890 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5891 break; 5892 } 5893 #endif /* INET6 */ 5894 } 5895 if (m->m_pkthdr.len < off + len || iplen < off + len) { 5896 ACTION_SET(actionp, PF_DROP); 5897 REASON_SET(reasonp, PFRES_SHORT); 5898 return (NULL); 5899 } 5900 m_copydata(m, off, len, p); 5901 return (p); 5902 } 5903 5904 int 5905 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5906 int rtableid) 5907 { 5908 struct sockaddr_storage ss; 5909 struct sockaddr_in *dst; 5910 int ret = 1; 5911 int check_mpath; 5912 #ifdef INET6 5913 struct sockaddr_in6 *dst6; 5914 #endif /* INET6 */ 5915 struct rtentry *rt = NULL; 5916 5917 check_mpath = 0; 5918 memset(&ss, 0, sizeof(ss)); 5919 switch (af) { 5920 case AF_INET: 5921 dst = (struct sockaddr_in *)&ss; 5922 dst->sin_family = AF_INET; 5923 dst->sin_len = sizeof(*dst); 5924 dst->sin_addr = addr->v4; 5925 if (ipmultipath) 5926 check_mpath = 1; 5927 break; 5928 #ifdef INET6 5929 case AF_INET6: 5930 /* 5931 * Skip check for addresses with embedded interface scope, 5932 * as they would always match anyway. 5933 */ 5934 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5935 goto out; 5936 dst6 = (struct sockaddr_in6 *)&ss; 5937 dst6->sin6_family = AF_INET6; 5938 dst6->sin6_len = sizeof(*dst6); 5939 dst6->sin6_addr = addr->v6; 5940 if (ip6_multipath) 5941 check_mpath = 1; 5942 break; 5943 #endif /* INET6 */ 5944 } 5945 5946 /* Skip checks for ipsec interfaces */ 5947 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5948 goto out; 5949 5950 rt = rtalloc(sstosa(&ss), 0, rtableid); 5951 if (rt != NULL) { 5952 /* No interface given, this is a no-route check */ 5953 if (kif == NULL) 5954 goto out; 5955 5956 if (kif->pfik_ifp == NULL) { 5957 ret = 0; 5958 goto out; 5959 } 5960 5961 /* Perform uRPF check if passed input interface */ 5962 ret = 0; 5963 do { 5964 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 5965 ret = 1; 5966 #if NCARP > 0 5967 } else { 5968 struct ifnet *ifp; 5969 5970 ifp = if_get(rt->rt_ifidx); 5971 if (ifp != NULL && ifp->if_type == IFT_CARP && 5972 ifp->if_carpdevidx == 5973 kif->pfik_ifp->if_index) 5974 ret = 1; 5975 if_put(ifp); 5976 #endif /* NCARP */ 5977 } 5978 5979 rt = rtable_iterate(rt); 5980 } while (check_mpath == 1 && rt != NULL && ret == 0); 5981 } else 5982 ret = 0; 5983 out: 5984 rtfree(rt); 5985 return (ret); 5986 } 5987 5988 int 5989 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 5990 int rtableid) 5991 { 5992 struct sockaddr_storage ss; 5993 struct sockaddr_in *dst; 5994 #ifdef INET6 5995 struct sockaddr_in6 *dst6; 5996 #endif /* INET6 */ 5997 struct rtentry *rt; 5998 int ret = 0; 5999 6000 memset(&ss, 0, sizeof(ss)); 6001 switch (af) { 6002 case AF_INET: 6003 dst = (struct sockaddr_in *)&ss; 6004 dst->sin_family = AF_INET; 6005 dst->sin_len = sizeof(*dst); 6006 dst->sin_addr = addr->v4; 6007 break; 6008 #ifdef INET6 6009 case AF_INET6: 6010 dst6 = (struct sockaddr_in6 *)&ss; 6011 dst6->sin6_family = AF_INET6; 6012 dst6->sin6_len = sizeof(*dst6); 6013 dst6->sin6_addr = addr->v6; 6014 break; 6015 #endif /* INET6 */ 6016 } 6017 6018 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 6019 if (rt != NULL) { 6020 if (rt->rt_labelid == aw->v.rtlabel) 6021 ret = 1; 6022 rtfree(rt); 6023 } 6024 6025 return (ret); 6026 } 6027 6028 /* pf_route() may change pd->m, adjust local copies after calling */ 6029 void 6030 pf_route(struct pf_pdesc *pd, struct pf_state *s) 6031 { 6032 struct mbuf *m0; 6033 struct mbuf_list fml; 6034 struct sockaddr_in *dst, sin; 6035 struct rtentry *rt = NULL; 6036 struct ip *ip; 6037 struct ifnet *ifp = NULL; 6038 int error = 0; 6039 unsigned int rtableid; 6040 6041 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6042 m_freem(pd->m); 6043 pd->m = NULL; 6044 return; 6045 } 6046 6047 if (s->rt == PF_DUPTO) { 6048 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6049 return; 6050 } else { 6051 if ((s->rt == PF_REPLYTO) == (s->direction == pd->dir)) 6052 return; 6053 m0 = pd->m; 6054 pd->m = NULL; 6055 } 6056 6057 if (m0->m_len < sizeof(struct ip)) { 6058 DPFPRINTF(LOG_ERR, 6059 "%s: m0->m_len < sizeof(struct ip)", __func__); 6060 goto bad; 6061 } 6062 6063 ip = mtod(m0, struct ip *); 6064 6065 if (pd->dir == PF_IN) { 6066 if (ip->ip_ttl <= IPTTLDEC) { 6067 if (s->rt != PF_DUPTO) { 6068 pf_send_icmp(m0, ICMP_TIMXCEED, 6069 ICMP_TIMXCEED_INTRANS, 0, 6070 pd->af, s->rule.ptr, pd->rdomain); 6071 } 6072 goto bad; 6073 } 6074 ip->ip_ttl -= IPTTLDEC; 6075 } 6076 6077 memset(&sin, 0, sizeof(sin)); 6078 dst = &sin; 6079 dst->sin_family = AF_INET; 6080 dst->sin_len = sizeof(*dst); 6081 dst->sin_addr = s->rt_addr.v4; 6082 rtableid = m0->m_pkthdr.ph_rtableid; 6083 6084 rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid); 6085 if (!rtisvalid(rt)) { 6086 if (s->rt != PF_DUPTO) { 6087 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST, 6088 0, pd->af, s->rule.ptr, pd->rdomain); 6089 } 6090 ipstat_inc(ips_noroute); 6091 goto bad; 6092 } 6093 6094 ifp = if_get(rt->rt_ifidx); 6095 if (ifp == NULL) 6096 goto bad; 6097 6098 /* A locally generated packet may have invalid source address. */ 6099 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 6100 (ifp->if_flags & IFF_LOOPBACK) == 0) 6101 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 6102 6103 if (s->rt != PF_DUPTO && pd->dir == PF_IN) { 6104 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 6105 goto bad; 6106 else if (m0 == NULL) 6107 goto done; 6108 if (m0->m_len < sizeof(struct ip)) { 6109 DPFPRINTF(LOG_ERR, 6110 "%s: m0->m_len < sizeof(struct ip)", __func__); 6111 goto bad; 6112 } 6113 ip = mtod(m0, struct ip *); 6114 } 6115 6116 in_proto_cksum_out(m0, ifp); 6117 6118 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 6119 ip->ip_sum = 0; 6120 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) 6121 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 6122 else { 6123 ipstat_inc(ips_outswcsum); 6124 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6125 } 6126 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6127 goto done; 6128 } 6129 6130 /* 6131 * Too large for interface; fragment if possible. 6132 * Must be able to put at least 8 bytes per fragment. 6133 */ 6134 if (ip->ip_off & htons(IP_DF)) { 6135 ipstat_inc(ips_cantfrag); 6136 if (s->rt != PF_DUPTO) 6137 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 6138 ifp->if_mtu, pd->af, s->rule.ptr, pd->rdomain); 6139 goto bad; 6140 } 6141 6142 error = ip_fragment(m0, &fml, ifp, ifp->if_mtu); 6143 if (error) 6144 goto done; 6145 6146 while ((m0 = ml_dequeue(&fml)) != NULL) { 6147 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6148 if (error) 6149 break; 6150 } 6151 if (error) 6152 ml_purge(&fml); 6153 else 6154 ipstat_inc(ips_fragmented); 6155 6156 done: 6157 if_put(ifp); 6158 rtfree(rt); 6159 return; 6160 6161 bad: 6162 m_freem(m0); 6163 goto done; 6164 } 6165 6166 #ifdef INET6 6167 /* pf_route6() may change pd->m, adjust local copies after calling */ 6168 void 6169 pf_route6(struct pf_pdesc *pd, struct pf_state *s) 6170 { 6171 struct mbuf *m0; 6172 struct sockaddr_in6 *dst, sin6; 6173 struct rtentry *rt = NULL; 6174 struct ip6_hdr *ip6; 6175 struct ifnet *ifp = NULL; 6176 struct m_tag *mtag; 6177 unsigned int rtableid; 6178 6179 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6180 m_freem(pd->m); 6181 pd->m = NULL; 6182 return; 6183 } 6184 6185 if (s->rt == PF_DUPTO) { 6186 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6187 return; 6188 } else { 6189 if ((s->rt == PF_REPLYTO) == (s->direction == pd->dir)) 6190 return; 6191 m0 = pd->m; 6192 pd->m = NULL; 6193 } 6194 6195 if (m0->m_len < sizeof(struct ip6_hdr)) { 6196 DPFPRINTF(LOG_ERR, 6197 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6198 goto bad; 6199 } 6200 ip6 = mtod(m0, struct ip6_hdr *); 6201 6202 if (pd->dir == PF_IN) { 6203 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 6204 if (s->rt != PF_DUPTO) { 6205 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 6206 ICMP6_TIME_EXCEED_TRANSIT, 0, 6207 pd->af, s->rule.ptr, pd->rdomain); 6208 } 6209 goto bad; 6210 } 6211 ip6->ip6_hlim -= IPV6_HLIMDEC; 6212 } 6213 6214 memset(&sin6, 0, sizeof(sin6)); 6215 dst = &sin6; 6216 dst->sin6_family = AF_INET6; 6217 dst->sin6_len = sizeof(*dst); 6218 dst->sin6_addr = s->rt_addr.v6; 6219 rtableid = m0->m_pkthdr.ph_rtableid; 6220 6221 rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0], 6222 rtableid); 6223 if (!rtisvalid(rt)) { 6224 if (s->rt != PF_DUPTO) { 6225 pf_send_icmp(m0, ICMP6_DST_UNREACH, 6226 ICMP6_DST_UNREACH_NOROUTE, 0, 6227 pd->af, s->rule.ptr, pd->rdomain); 6228 } 6229 ip6stat_inc(ip6s_noroute); 6230 goto bad; 6231 } 6232 6233 ifp = if_get(rt->rt_ifidx); 6234 if (ifp == NULL) 6235 goto bad; 6236 6237 /* A locally generated packet may have invalid source address. */ 6238 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 6239 (ifp->if_flags & IFF_LOOPBACK) == 0) 6240 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 6241 6242 if (s->rt != PF_DUPTO && pd->dir == PF_IN) { 6243 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6244 goto bad; 6245 else if (m0 == NULL) 6246 goto done; 6247 if (m0->m_len < sizeof(struct ip6_hdr)) { 6248 DPFPRINTF(LOG_ERR, 6249 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6250 goto bad; 6251 } 6252 } 6253 6254 in6_proto_cksum_out(m0, ifp); 6255 6256 /* 6257 * If packet has been reassembled by PF earlier, we have to 6258 * use pf_refragment6() here to turn it back to fragments. 6259 */ 6260 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6261 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 6262 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6263 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 6264 } else { 6265 ip6stat_inc(ip6s_cantfrag); 6266 if (s->rt != PF_DUPTO) 6267 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 6268 ifp->if_mtu, pd->af, s->rule.ptr, pd->rdomain); 6269 goto bad; 6270 } 6271 6272 done: 6273 if_put(ifp); 6274 rtfree(rt); 6275 return; 6276 6277 bad: 6278 m_freem(m0); 6279 goto done; 6280 } 6281 #endif /* INET6 */ 6282 6283 /* 6284 * check TCP checksum and set mbuf flag 6285 * off is the offset where the protocol header starts 6286 * len is the total length of protocol header plus payload 6287 * returns 0 when the checksum is valid, otherwise returns 1. 6288 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6289 */ 6290 int 6291 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6292 { 6293 u_int16_t sum; 6294 6295 if (m->m_pkthdr.csum_flags & 6296 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6297 return (0); 6298 } 6299 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6300 off < sizeof(struct ip) || 6301 m->m_pkthdr.len < off + len) { 6302 return (1); 6303 } 6304 6305 /* need to do it in software */ 6306 tcpstat_inc(tcps_inswcsum); 6307 6308 switch (af) { 6309 case AF_INET: 6310 if (m->m_len < sizeof(struct ip)) 6311 return (1); 6312 6313 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6314 break; 6315 #ifdef INET6 6316 case AF_INET6: 6317 if (m->m_len < sizeof(struct ip6_hdr)) 6318 return (1); 6319 6320 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6321 break; 6322 #endif /* INET6 */ 6323 default: 6324 unhandled_af(af); 6325 } 6326 if (sum) { 6327 tcpstat_inc(tcps_rcvbadsum); 6328 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6329 return (1); 6330 } 6331 6332 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6333 return (0); 6334 } 6335 6336 struct pf_divert * 6337 pf_find_divert(struct mbuf *m) 6338 { 6339 struct m_tag *mtag; 6340 6341 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6342 return (NULL); 6343 6344 return ((struct pf_divert *)(mtag + 1)); 6345 } 6346 6347 struct pf_divert * 6348 pf_get_divert(struct mbuf *m) 6349 { 6350 struct m_tag *mtag; 6351 6352 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6353 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6354 M_NOWAIT); 6355 if (mtag == NULL) 6356 return (NULL); 6357 memset(mtag + 1, 0, sizeof(struct pf_divert)); 6358 m_tag_prepend(m, mtag); 6359 } 6360 6361 return ((struct pf_divert *)(mtag + 1)); 6362 } 6363 6364 int 6365 pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end, 6366 u_short *reason) 6367 { 6368 uint8_t type, length, opts[15 * 4 - sizeof(struct ip)]; 6369 6370 /* IP header in payload of ICMP packet may be too short */ 6371 if (pd->m->m_pkthdr.len < end) { 6372 DPFPRINTF(LOG_NOTICE, "IP option too short"); 6373 REASON_SET(reason, PFRES_SHORT); 6374 return (PF_DROP); 6375 } 6376 6377 KASSERT(end - off <= sizeof(opts)); 6378 m_copydata(pd->m, off, end - off, opts); 6379 end -= off; 6380 off = 0; 6381 6382 while (off < end) { 6383 type = opts[off]; 6384 if (type == IPOPT_EOL) 6385 break; 6386 if (type == IPOPT_NOP) { 6387 off++; 6388 continue; 6389 } 6390 if (off + 2 > end) { 6391 DPFPRINTF(LOG_NOTICE, "IP length opt"); 6392 REASON_SET(reason, PFRES_IPOPTIONS); 6393 return (PF_DROP); 6394 } 6395 length = opts[off + 1]; 6396 if (length < 2) { 6397 DPFPRINTF(LOG_NOTICE, "IP short opt"); 6398 REASON_SET(reason, PFRES_IPOPTIONS); 6399 return (PF_DROP); 6400 } 6401 if (off + length > end) { 6402 DPFPRINTF(LOG_NOTICE, "IP long opt"); 6403 REASON_SET(reason, PFRES_IPOPTIONS); 6404 return (PF_DROP); 6405 } 6406 switch (type) { 6407 case IPOPT_RA: 6408 SET(pd->badopts, PF_OPT_ROUTER_ALERT); 6409 break; 6410 default: 6411 SET(pd->badopts, PF_OPT_OTHER); 6412 break; 6413 } 6414 off += length; 6415 } 6416 6417 return (PF_PASS); 6418 } 6419 6420 int 6421 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 6422 { 6423 struct ip6_ext ext; 6424 u_int32_t hlen, end; 6425 int hdr_cnt; 6426 6427 hlen = h->ip_hl << 2; 6428 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 6429 REASON_SET(reason, PFRES_SHORT); 6430 return (PF_DROP); 6431 } 6432 if (hlen != sizeof(struct ip)) { 6433 if (pf_walk_option(pd, h, pd->off + sizeof(struct ip), 6434 pd->off + hlen, reason) != PF_PASS) 6435 return (PF_DROP); 6436 /* header options which contain only padding is fishy */ 6437 if (pd->badopts == 0) 6438 SET(pd->badopts, PF_OPT_OTHER); 6439 } 6440 end = pd->off + ntohs(h->ip_len); 6441 pd->off += hlen; 6442 pd->proto = h->ip_p; 6443 /* IGMP packets have router alert options, allow them */ 6444 if (pd->proto == IPPROTO_IGMP) { 6445 /* According to RFC 1112 ttl must be set to 1. */ 6446 if ((h->ip_ttl != 1) || !IN_MULTICAST(h->ip_dst.s_addr)) { 6447 DPFPRINTF(LOG_NOTICE, "Invalid IGMP"); 6448 REASON_SET(reason, PFRES_IPOPTIONS); 6449 return (PF_DROP); 6450 } 6451 CLR(pd->badopts, PF_OPT_ROUTER_ALERT); 6452 } 6453 /* stop walking over non initial fragments */ 6454 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 6455 return (PF_PASS); 6456 6457 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6458 switch (pd->proto) { 6459 case IPPROTO_AH: 6460 /* fragments may be short */ 6461 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 6462 end < pd->off + sizeof(ext)) 6463 return (PF_PASS); 6464 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6465 NULL, reason, AF_INET)) { 6466 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 6467 return (PF_DROP); 6468 } 6469 pd->off += (ext.ip6e_len + 2) * 4; 6470 pd->proto = ext.ip6e_nxt; 6471 break; 6472 default: 6473 return (PF_PASS); 6474 } 6475 } 6476 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 6477 REASON_SET(reason, PFRES_IPOPTIONS); 6478 return (PF_DROP); 6479 } 6480 6481 #ifdef INET6 6482 int 6483 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6484 u_short *reason) 6485 { 6486 struct ip6_opt opt; 6487 struct ip6_opt_jumbo jumbo; 6488 6489 while (off < end) { 6490 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6491 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6492 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6493 return (PF_DROP); 6494 } 6495 if (opt.ip6o_type == IP6OPT_PAD1) { 6496 off++; 6497 continue; 6498 } 6499 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6500 NULL, reason, AF_INET6)) { 6501 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6502 return (PF_DROP); 6503 } 6504 if (off + sizeof(opt) + opt.ip6o_len > end) { 6505 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6506 REASON_SET(reason, PFRES_IPOPTIONS); 6507 return (PF_DROP); 6508 } 6509 switch (opt.ip6o_type) { 6510 case IP6OPT_PADN: 6511 break; 6512 case IP6OPT_JUMBO: 6513 SET(pd->badopts, PF_OPT_JUMBO); 6514 if (pd->jumbolen != 0) { 6515 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6516 REASON_SET(reason, PFRES_IPOPTIONS); 6517 return (PF_DROP); 6518 } 6519 if (ntohs(h->ip6_plen) != 0) { 6520 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6521 REASON_SET(reason, PFRES_IPOPTIONS); 6522 return (PF_DROP); 6523 } 6524 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6525 NULL, reason, AF_INET6)) { 6526 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6527 return (PF_DROP); 6528 } 6529 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6530 sizeof(pd->jumbolen)); 6531 pd->jumbolen = ntohl(pd->jumbolen); 6532 if (pd->jumbolen < IPV6_MAXPACKET) { 6533 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6534 REASON_SET(reason, PFRES_IPOPTIONS); 6535 return (PF_DROP); 6536 } 6537 break; 6538 case IP6OPT_ROUTER_ALERT: 6539 SET(pd->badopts, PF_OPT_ROUTER_ALERT); 6540 break; 6541 default: 6542 SET(pd->badopts, PF_OPT_OTHER); 6543 break; 6544 } 6545 off += sizeof(opt) + opt.ip6o_len; 6546 } 6547 6548 return (PF_PASS); 6549 } 6550 6551 int 6552 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6553 { 6554 struct ip6_frag frag; 6555 struct ip6_ext ext; 6556 struct icmp6_hdr icmp6; 6557 struct ip6_rthdr rthdr; 6558 u_int32_t end; 6559 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 6560 6561 pd->off += sizeof(struct ip6_hdr); 6562 end = pd->off + ntohs(h->ip6_plen); 6563 pd->fragoff = pd->extoff = pd->jumbolen = 0; 6564 pd->proto = h->ip6_nxt; 6565 6566 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6567 switch (pd->proto) { 6568 case IPPROTO_ROUTING: 6569 case IPPROTO_DSTOPTS: 6570 SET(pd->badopts, PF_OPT_OTHER); 6571 break; 6572 case IPPROTO_HOPOPTS: 6573 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6574 NULL, reason, AF_INET6)) { 6575 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6576 return (PF_DROP); 6577 } 6578 if (pf_walk_option6(pd, h, pd->off + sizeof(ext), 6579 pd->off + (ext.ip6e_len + 1) * 8, reason) 6580 != PF_PASS) 6581 return (PF_DROP); 6582 /* option header which contains only padding is fishy */ 6583 if (pd->badopts == 0) 6584 SET(pd->badopts, PF_OPT_OTHER); 6585 break; 6586 } 6587 switch (pd->proto) { 6588 case IPPROTO_FRAGMENT: 6589 if (fraghdr_cnt++) { 6590 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 6591 REASON_SET(reason, PFRES_FRAG); 6592 return (PF_DROP); 6593 } 6594 /* jumbo payload packets cannot be fragmented */ 6595 if (pd->jumbolen != 0) { 6596 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6597 REASON_SET(reason, PFRES_FRAG); 6598 return (PF_DROP); 6599 } 6600 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6601 NULL, reason, AF_INET6)) { 6602 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6603 return (PF_DROP); 6604 } 6605 /* stop walking over non initial fragments */ 6606 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 6607 pd->fragoff = pd->off; 6608 return (PF_PASS); 6609 } 6610 /* RFC6946: reassemble only non atomic fragments */ 6611 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 6612 pd->fragoff = pd->off; 6613 pd->off += sizeof(frag); 6614 pd->proto = frag.ip6f_nxt; 6615 break; 6616 case IPPROTO_ROUTING: 6617 if (rthdr_cnt++) { 6618 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6619 REASON_SET(reason, PFRES_IPOPTIONS); 6620 return (PF_DROP); 6621 } 6622 /* fragments may be short */ 6623 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6624 pd->off = pd->fragoff; 6625 pd->proto = IPPROTO_FRAGMENT; 6626 return (PF_PASS); 6627 } 6628 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6629 NULL, reason, AF_INET6)) { 6630 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6631 return (PF_DROP); 6632 } 6633 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6634 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6635 REASON_SET(reason, PFRES_IPOPTIONS); 6636 return (PF_DROP); 6637 } 6638 /* FALLTHROUGH */ 6639 case IPPROTO_HOPOPTS: 6640 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 6641 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 6642 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 6643 REASON_SET(reason, PFRES_IPOPTIONS); 6644 return (PF_DROP); 6645 } 6646 /* FALLTHROUGH */ 6647 case IPPROTO_AH: 6648 case IPPROTO_DSTOPTS: 6649 /* fragments may be short */ 6650 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6651 pd->off = pd->fragoff; 6652 pd->proto = IPPROTO_FRAGMENT; 6653 return (PF_PASS); 6654 } 6655 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6656 NULL, reason, AF_INET6)) { 6657 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6658 return (PF_DROP); 6659 } 6660 /* reassembly needs the ext header before the frag */ 6661 if (pd->fragoff == 0) 6662 pd->extoff = pd->off; 6663 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 && 6664 ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) { 6665 DPFPRINTF(LOG_NOTICE, "IPv6 missing jumbo"); 6666 REASON_SET(reason, PFRES_IPOPTIONS); 6667 return (PF_DROP); 6668 } 6669 if (pd->proto == IPPROTO_AH) 6670 pd->off += (ext.ip6e_len + 2) * 4; 6671 else 6672 pd->off += (ext.ip6e_len + 1) * 8; 6673 pd->proto = ext.ip6e_nxt; 6674 break; 6675 case IPPROTO_ICMPV6: 6676 /* fragments may be short, ignore inner header then */ 6677 if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) { 6678 pd->off = pd->fragoff; 6679 pd->proto = IPPROTO_FRAGMENT; 6680 return (PF_PASS); 6681 } 6682 if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6), 6683 NULL, reason, AF_INET6)) { 6684 DPFPRINTF(LOG_NOTICE, "IPv6 short icmp6hdr"); 6685 return (PF_DROP); 6686 } 6687 /* ICMP multicast packets have router alert options */ 6688 switch (icmp6.icmp6_type) { 6689 case MLD_LISTENER_QUERY: 6690 case MLD_LISTENER_REPORT: 6691 case MLD_LISTENER_DONE: 6692 case MLDV2_LISTENER_REPORT: 6693 /* 6694 * According to RFC 2710 all MLD messages are 6695 * sent with hop-limit (ttl) set to 1, and link 6696 * local source address. If either one is 6697 * missing then MLD message is invalid and 6698 * should be discarded. 6699 */ 6700 if ((h->ip6_hlim != 1) || 6701 !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) { 6702 DPFPRINTF(LOG_NOTICE, "Invalid MLD"); 6703 REASON_SET(reason, PFRES_IPOPTIONS); 6704 return (PF_DROP); 6705 } 6706 CLR(pd->badopts, PF_OPT_ROUTER_ALERT); 6707 break; 6708 } 6709 return (PF_PASS); 6710 case IPPROTO_TCP: 6711 case IPPROTO_UDP: 6712 /* fragments may be short, ignore inner header then */ 6713 if (pd->fragoff != 0 && end < pd->off + 6714 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6715 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6716 sizeof(struct icmp6_hdr))) { 6717 pd->off = pd->fragoff; 6718 pd->proto = IPPROTO_FRAGMENT; 6719 } 6720 /* FALLTHROUGH */ 6721 default: 6722 return (PF_PASS); 6723 } 6724 } 6725 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 6726 REASON_SET(reason, PFRES_IPOPTIONS); 6727 return (PF_DROP); 6728 } 6729 #endif /* INET6 */ 6730 6731 int 6732 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 6733 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6734 { 6735 memset(pd, 0, sizeof(*pd)); 6736 pd->dir = dir; 6737 pd->kif = kif; /* kif is NULL when called by pflog */ 6738 pd->m = m; 6739 pd->sidx = (dir == PF_IN) ? 0 : 1; 6740 pd->didx = (dir == PF_IN) ? 1 : 0; 6741 pd->af = pd->naf = af; 6742 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 6743 6744 switch (pd->af) { 6745 case AF_INET: { 6746 struct ip *h; 6747 6748 /* Check for illegal packets */ 6749 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6750 REASON_SET(reason, PFRES_SHORT); 6751 return (PF_DROP); 6752 } 6753 6754 h = mtod(pd->m, struct ip *); 6755 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6756 REASON_SET(reason, PFRES_SHORT); 6757 return (PF_DROP); 6758 } 6759 6760 if (pf_walk_header(pd, h, reason) != PF_PASS) 6761 return (PF_DROP); 6762 6763 pd->src = (struct pf_addr *)&h->ip_src; 6764 pd->dst = (struct pf_addr *)&h->ip_dst; 6765 pd->tot_len = ntohs(h->ip_len); 6766 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6767 pd->ttl = h->ip_ttl; 6768 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 6769 PF_VPROTO_FRAGMENT : pd->proto; 6770 6771 break; 6772 } 6773 #ifdef INET6 6774 case AF_INET6: { 6775 struct ip6_hdr *h; 6776 6777 /* Check for illegal packets */ 6778 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6779 REASON_SET(reason, PFRES_SHORT); 6780 return (PF_DROP); 6781 } 6782 6783 h = mtod(pd->m, struct ip6_hdr *); 6784 if (pd->m->m_pkthdr.len < 6785 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6786 REASON_SET(reason, PFRES_SHORT); 6787 return (PF_DROP); 6788 } 6789 6790 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6791 return (PF_DROP); 6792 6793 #if 1 6794 /* 6795 * we do not support jumbogram yet. if we keep going, zero 6796 * ip6_plen will do something bad, so drop the packet for now. 6797 */ 6798 if (pd->jumbolen != 0) { 6799 REASON_SET(reason, PFRES_NORM); 6800 return (PF_DROP); 6801 } 6802 #endif /* 1 */ 6803 6804 pd->src = (struct pf_addr *)&h->ip6_src; 6805 pd->dst = (struct pf_addr *)&h->ip6_dst; 6806 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6807 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 6808 pd->ttl = h->ip6_hlim; 6809 pd->virtual_proto = (pd->fragoff != 0) ? 6810 PF_VPROTO_FRAGMENT : pd->proto; 6811 6812 break; 6813 } 6814 #endif /* INET6 */ 6815 default: 6816 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6817 6818 } 6819 6820 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6821 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6822 6823 switch (pd->virtual_proto) { 6824 case IPPROTO_TCP: { 6825 struct tcphdr *th = &pd->hdr.tcp; 6826 6827 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6828 NULL, reason, pd->af)) 6829 return (PF_DROP); 6830 pd->hdrlen = sizeof(*th); 6831 if (pd->off + (th->th_off << 2) > pd->tot_len || 6832 (th->th_off << 2) < sizeof(struct tcphdr)) { 6833 REASON_SET(reason, PFRES_SHORT); 6834 return (PF_DROP); 6835 } 6836 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6837 pd->sport = &th->th_sport; 6838 pd->dport = &th->th_dport; 6839 pd->pcksum = &th->th_sum; 6840 break; 6841 } 6842 case IPPROTO_UDP: { 6843 struct udphdr *uh = &pd->hdr.udp; 6844 6845 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6846 NULL, reason, pd->af)) 6847 return (PF_DROP); 6848 pd->hdrlen = sizeof(*uh); 6849 if (uh->uh_dport == 0 || 6850 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6851 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6852 REASON_SET(reason, PFRES_SHORT); 6853 return (PF_DROP); 6854 } 6855 pd->sport = &uh->uh_sport; 6856 pd->dport = &uh->uh_dport; 6857 pd->pcksum = &uh->uh_sum; 6858 break; 6859 } 6860 case IPPROTO_ICMP: { 6861 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 6862 NULL, reason, pd->af)) 6863 return (PF_DROP); 6864 pd->hdrlen = ICMP_MINLEN; 6865 if (pd->off + pd->hdrlen > pd->tot_len) { 6866 REASON_SET(reason, PFRES_SHORT); 6867 return (PF_DROP); 6868 } 6869 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 6870 break; 6871 } 6872 #ifdef INET6 6873 case IPPROTO_ICMPV6: { 6874 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6875 6876 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6877 NULL, reason, pd->af)) 6878 return (PF_DROP); 6879 /* ICMP headers we look further into to match state */ 6880 switch (pd->hdr.icmp6.icmp6_type) { 6881 case MLD_LISTENER_QUERY: 6882 case MLD_LISTENER_REPORT: 6883 icmp_hlen = sizeof(struct mld_hdr); 6884 break; 6885 case ND_NEIGHBOR_SOLICIT: 6886 case ND_NEIGHBOR_ADVERT: 6887 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6888 /* FALLTHROUGH */ 6889 case ND_ROUTER_SOLICIT: 6890 case ND_ROUTER_ADVERT: 6891 case ND_REDIRECT: 6892 if (pd->ttl != 255) { 6893 REASON_SET(reason, PFRES_NORM); 6894 return (PF_DROP); 6895 } 6896 break; 6897 } 6898 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6899 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6900 NULL, reason, pd->af)) 6901 return (PF_DROP); 6902 pd->hdrlen = icmp_hlen; 6903 if (pd->off + pd->hdrlen > pd->tot_len) { 6904 REASON_SET(reason, PFRES_SHORT); 6905 return (PF_DROP); 6906 } 6907 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 6908 break; 6909 } 6910 #endif /* INET6 */ 6911 } 6912 6913 if (pd->sport) 6914 pd->osport = pd->nsport = *pd->sport; 6915 if (pd->dport) 6916 pd->odport = pd->ndport = *pd->dport; 6917 6918 return (PF_PASS); 6919 } 6920 6921 void 6922 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6923 struct pf_rule *r, struct pf_rule *a) 6924 { 6925 int dirndx; 6926 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6927 [action != PF_PASS] += pd->tot_len; 6928 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6929 [action != PF_PASS]++; 6930 6931 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6932 dirndx = (pd->dir == PF_OUT); 6933 r->packets[dirndx]++; 6934 r->bytes[dirndx] += pd->tot_len; 6935 if (a != NULL) { 6936 a->packets[dirndx]++; 6937 a->bytes[dirndx] += pd->tot_len; 6938 } 6939 if (s != NULL) { 6940 struct pf_rule_item *ri; 6941 struct pf_sn_item *sni; 6942 6943 SLIST_FOREACH(sni, &s->src_nodes, next) { 6944 sni->sn->packets[dirndx]++; 6945 sni->sn->bytes[dirndx] += pd->tot_len; 6946 } 6947 dirndx = (pd->dir == s->direction) ? 0 : 1; 6948 s->packets[dirndx]++; 6949 s->bytes[dirndx] += pd->tot_len; 6950 6951 SLIST_FOREACH(ri, &s->match_rules, entry) { 6952 ri->r->packets[dirndx]++; 6953 ri->r->bytes[dirndx] += pd->tot_len; 6954 6955 if (ri->r->src.addr.type == PF_ADDR_TABLE) 6956 pfr_update_stats(ri->r->src.addr.p.tbl, 6957 &s->key[(s->direction == PF_IN)]-> 6958 addr[(s->direction == PF_OUT)], 6959 pd, ri->r->action, ri->r->src.neg); 6960 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 6961 pfr_update_stats(ri->r->dst.addr.p.tbl, 6962 &s->key[(s->direction == PF_IN)]-> 6963 addr[(s->direction == PF_IN)], 6964 pd, ri->r->action, ri->r->dst.neg); 6965 } 6966 } 6967 if (r->src.addr.type == PF_ADDR_TABLE) 6968 pfr_update_stats(r->src.addr.p.tbl, 6969 (s == NULL) ? pd->src : 6970 &s->key[(s->direction == PF_IN)]-> 6971 addr[(s->direction == PF_OUT)], 6972 pd, r->action, r->src.neg); 6973 if (r->dst.addr.type == PF_ADDR_TABLE) 6974 pfr_update_stats(r->dst.addr.p.tbl, 6975 (s == NULL) ? pd->dst : 6976 &s->key[(s->direction == PF_IN)]-> 6977 addr[(s->direction == PF_IN)], 6978 pd, r->action, r->dst.neg); 6979 } 6980 } 6981 6982 int 6983 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 6984 { 6985 #if NCARP > 0 6986 struct ifnet *ifp0; 6987 #endif 6988 struct pfi_kif *kif; 6989 u_short action, reason = 0; 6990 struct pf_rule *a = NULL, *r = &pf_default_rule; 6991 struct pf_state *s = NULL; 6992 struct pf_state_key_cmp key; 6993 struct pf_ruleset *ruleset = NULL; 6994 struct pf_pdesc pd; 6995 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6996 u_int32_t qid, pqid = 0; 6997 int have_pf_lock = 0; 6998 struct pfsync_deferral *deferral = NULL; 6999 7000 if (!pf_status.running) 7001 return (PF_PASS); 7002 7003 #if NCARP > 0 7004 if (ifp->if_type == IFT_CARP && 7005 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 7006 kif = (struct pfi_kif *)ifp0->if_pf_kif; 7007 if_put(ifp0); 7008 } else 7009 #endif /* NCARP */ 7010 kif = (struct pfi_kif *)ifp->if_pf_kif; 7011 7012 if (kif == NULL) { 7013 DPFPRINTF(LOG_ERR, 7014 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 7015 return (PF_DROP); 7016 } 7017 if (kif->pfik_flags & PFI_IFLAG_SKIP) 7018 return (PF_PASS); 7019 7020 #ifdef DIAGNOSTIC 7021 if (((*m0)->m_flags & M_PKTHDR) == 0) 7022 panic("non-M_PKTHDR is passed to pf_test"); 7023 #endif /* DIAGNOSTIC */ 7024 7025 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 7026 return (PF_PASS); 7027 7028 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) { 7029 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET; 7030 return (PF_PASS); 7031 } 7032 7033 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 7034 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 7035 return (PF_PASS); 7036 } 7037 7038 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 7039 if (action != PF_PASS) { 7040 #if NPFLOG > 0 7041 pd.pflog |= PF_LOG_FORCE; 7042 #endif /* NPFLOG > 0 */ 7043 goto done; 7044 } 7045 7046 /* packet normalization and reassembly */ 7047 switch (pd.af) { 7048 case AF_INET: 7049 action = pf_normalize_ip(&pd, &reason); 7050 break; 7051 #ifdef INET6 7052 case AF_INET6: 7053 action = pf_normalize_ip6(&pd, &reason); 7054 break; 7055 #endif /* INET6 */ 7056 } 7057 *m0 = pd.m; 7058 /* if packet sits in reassembly queue, return without error */ 7059 if (pd.m == NULL) 7060 return PF_PASS; 7061 7062 if (action != PF_PASS) { 7063 #if NPFLOG > 0 7064 pd.pflog |= PF_LOG_FORCE; 7065 #endif /* NPFLOG > 0 */ 7066 goto done; 7067 } 7068 7069 /* if packet has been reassembled, update packet description */ 7070 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 7071 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 7072 if (action != PF_PASS) { 7073 #if NPFLOG > 0 7074 pd.pflog |= PF_LOG_FORCE; 7075 #endif /* NPFLOG > 0 */ 7076 goto done; 7077 } 7078 } 7079 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 7080 7081 /* 7082 * Avoid pcb-lookups from the forwarding path. They should never 7083 * match and would cause MP locking problems. 7084 */ 7085 if (fwdir == PF_FWD) { 7086 pd.lookup.done = -1; 7087 pd.lookup.uid = -1; 7088 pd.lookup.gid = -1; 7089 pd.lookup.pid = NO_PID; 7090 } 7091 7092 switch (pd.virtual_proto) { 7093 7094 case PF_VPROTO_FRAGMENT: { 7095 /* 7096 * handle fragments that aren't reassembled by 7097 * normalization 7098 */ 7099 PF_LOCK(); 7100 have_pf_lock = 1; 7101 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason, 7102 &deferral); 7103 s = pf_state_ref(s); 7104 if (action != PF_PASS) 7105 REASON_SET(&reason, PFRES_FRAG); 7106 break; 7107 } 7108 7109 case IPPROTO_ICMP: { 7110 if (pd.af != AF_INET) { 7111 action = PF_DROP; 7112 REASON_SET(&reason, PFRES_NORM); 7113 DPFPRINTF(LOG_NOTICE, 7114 "dropping IPv6 packet with ICMPv4 payload"); 7115 break; 7116 } 7117 PF_STATE_ENTER_READ(); 7118 action = pf_test_state_icmp(&pd, &s, &reason); 7119 s = pf_state_ref(s); 7120 PF_STATE_EXIT_READ(); 7121 if (action == PF_PASS || action == PF_AFRT) { 7122 #if NPFSYNC > 0 7123 pfsync_update_state(s); 7124 #endif /* NPFSYNC > 0 */ 7125 r = s->rule.ptr; 7126 a = s->anchor.ptr; 7127 #if NPFLOG > 0 7128 pd.pflog |= s->log; 7129 #endif /* NPFLOG > 0 */ 7130 } else if (s == NULL) { 7131 PF_LOCK(); 7132 have_pf_lock = 1; 7133 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7134 &reason, &deferral); 7135 s = pf_state_ref(s); 7136 } 7137 break; 7138 } 7139 7140 #ifdef INET6 7141 case IPPROTO_ICMPV6: { 7142 if (pd.af != AF_INET6) { 7143 action = PF_DROP; 7144 REASON_SET(&reason, PFRES_NORM); 7145 DPFPRINTF(LOG_NOTICE, 7146 "dropping IPv4 packet with ICMPv6 payload"); 7147 break; 7148 } 7149 PF_STATE_ENTER_READ(); 7150 action = pf_test_state_icmp(&pd, &s, &reason); 7151 s = pf_state_ref(s); 7152 PF_STATE_EXIT_READ(); 7153 if (action == PF_PASS || action == PF_AFRT) { 7154 #if NPFSYNC > 0 7155 pfsync_update_state(s); 7156 #endif /* NPFSYNC > 0 */ 7157 r = s->rule.ptr; 7158 a = s->anchor.ptr; 7159 #if NPFLOG > 0 7160 pd.pflog |= s->log; 7161 #endif /* NPFLOG > 0 */ 7162 } else if (s == NULL) { 7163 PF_LOCK(); 7164 have_pf_lock = 1; 7165 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7166 &reason, &deferral); 7167 s = pf_state_ref(s); 7168 } 7169 break; 7170 } 7171 #endif /* INET6 */ 7172 7173 default: 7174 if (pd.virtual_proto == IPPROTO_TCP) { 7175 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & 7176 (TH_SYN|TH_ACK)) == TH_SYN && 7177 pf_synflood_check(&pd)) { 7178 PF_LOCK(); 7179 have_pf_lock = 1; 7180 pf_syncookie_send(&pd); 7181 action = PF_DROP; 7182 break; 7183 } 7184 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 7185 pqid = 1; 7186 action = pf_normalize_tcp(&pd); 7187 if (action == PF_DROP) 7188 break; 7189 } 7190 7191 key.af = pd.af; 7192 key.proto = pd.virtual_proto; 7193 key.rdomain = pd.rdomain; 7194 pf_addrcpy(&key.addr[pd.sidx], pd.src, key.af); 7195 pf_addrcpy(&key.addr[pd.didx], pd.dst, key.af); 7196 key.port[pd.sidx] = pd.osport; 7197 key.port[pd.didx] = pd.odport; 7198 7199 PF_STATE_ENTER_READ(); 7200 action = pf_find_state(&pd, &key, &s); 7201 s = pf_state_ref(s); 7202 PF_STATE_EXIT_READ(); 7203 7204 /* check for syncookies if tcp ack and no active state */ 7205 if (pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 7206 (s == NULL || (s->src.state >= TCPS_FIN_WAIT_2 && 7207 s->dst.state >= TCPS_FIN_WAIT_2)) && 7208 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 7209 pf_syncookie_validate(&pd)) { 7210 struct mbuf *msyn = pf_syncookie_recreate_syn(&pd); 7211 if (msyn) { 7212 action = pf_test(af, fwdir, ifp, &msyn); 7213 m_freem(msyn); 7214 if (action == PF_PASS || action == PF_AFRT) { 7215 PF_STATE_ENTER_READ(); 7216 pf_state_unref(s); 7217 action = pf_find_state(&pd, &key, &s); 7218 s = pf_state_ref(s); 7219 PF_STATE_EXIT_READ(); 7220 if (s == NULL) 7221 return (PF_DROP); 7222 s->src.seqhi = s->dst.seqhi = 7223 ntohl(pd.hdr.tcp.th_ack) - 1; 7224 s->src.seqlo = 7225 ntohl(pd.hdr.tcp.th_seq) - 1; 7226 pf_set_protostate(s, PF_PEER_SRC, 7227 PF_TCPS_PROXY_DST); 7228 } 7229 } else 7230 action = PF_DROP; 7231 } 7232 7233 if (action == PF_MATCH) 7234 action = pf_test_state(&pd, &s, &reason); 7235 7236 if (action == PF_PASS || action == PF_AFRT) { 7237 #if NPFSYNC > 0 7238 pfsync_update_state(s); 7239 #endif /* NPFSYNC > 0 */ 7240 r = s->rule.ptr; 7241 a = s->anchor.ptr; 7242 #if NPFLOG > 0 7243 pd.pflog |= s->log; 7244 #endif /* NPFLOG > 0 */ 7245 } else if (s == NULL) { 7246 PF_LOCK(); 7247 have_pf_lock = 1; 7248 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7249 &reason, &deferral); 7250 s = pf_state_ref(s); 7251 } 7252 7253 if (pd.virtual_proto == IPPROTO_TCP) { 7254 if (s) { 7255 if (s->max_mss) 7256 pf_normalize_mss(&pd, s->max_mss); 7257 } else if (r->max_mss) 7258 pf_normalize_mss(&pd, r->max_mss); 7259 } 7260 7261 break; 7262 } 7263 7264 if (have_pf_lock != 0) 7265 PF_UNLOCK(); 7266 7267 /* 7268 * At the moment, we rely on NET_LOCK() to prevent removal of items 7269 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 7270 * to be refcounted when NET_LOCK() is gone. 7271 */ 7272 7273 done: 7274 if (action != PF_DROP) { 7275 if (s) { 7276 /* The non-state case is handled in pf_test_rule() */ 7277 if (action == PF_PASS && pd.badopts != 0 && 7278 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 7279 action = PF_DROP; 7280 REASON_SET(&reason, PFRES_IPOPTIONS); 7281 #if NPFLOG > 0 7282 pd.pflog |= PF_LOG_FORCE; 7283 #endif /* NPFLOG > 0 */ 7284 DPFPRINTF(LOG_NOTICE, "dropping packet with " 7285 "ip/ipv6 options in pf_test()"); 7286 } 7287 7288 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 7289 s->set_tos); 7290 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 7291 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7292 qid = s->pqid; 7293 if (s->state_flags & PFSTATE_SETPRIO) 7294 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 7295 } else { 7296 qid = s->qid; 7297 if (s->state_flags & PFSTATE_SETPRIO) 7298 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 7299 } 7300 pd.m->m_pkthdr.pf.delay = s->delay; 7301 } else { 7302 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 7303 r->set_tos); 7304 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7305 qid = r->pqid; 7306 if (r->scrub_flags & PFSTATE_SETPRIO) 7307 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 7308 } else { 7309 qid = r->qid; 7310 if (r->scrub_flags & PFSTATE_SETPRIO) 7311 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 7312 } 7313 pd.m->m_pkthdr.pf.delay = r->delay; 7314 } 7315 } 7316 7317 if (action == PF_PASS && qid) 7318 pd.m->m_pkthdr.pf.qid = qid; 7319 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) 7320 pf_mbuf_link_state_key(pd.m, s->key[PF_SK_STACK]); 7321 if (pd.dir == PF_OUT && 7322 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 7323 s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) 7324 pf_state_key_link_inpcb(s->key[PF_SK_STACK], 7325 pd.m->m_pkthdr.pf.inp); 7326 7327 if (s != NULL && !ISSET(pd.m->m_pkthdr.csum_flags, M_FLOWID)) { 7328 pd.m->m_pkthdr.ph_flowid = bemtoh64(&s->id); 7329 SET(pd.m->m_pkthdr.csum_flags, M_FLOWID); 7330 } 7331 7332 /* 7333 * connections redirected to loopback should not match sockets 7334 * bound specifically to loopback due to security implications, 7335 * see in_pcblookup_listen(). 7336 */ 7337 if (pd.destchg) 7338 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 7339 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 7340 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 7341 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7342 /* We need to redo the route lookup on outgoing routes. */ 7343 if (pd.destchg && pd.dir == PF_OUT) 7344 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 7345 7346 if (pd.dir == PF_IN && action == PF_PASS && 7347 (r->divert.type == PF_DIVERT_TO || 7348 r->divert.type == PF_DIVERT_REPLY)) { 7349 struct pf_divert *divert; 7350 7351 if ((divert = pf_get_divert(pd.m))) { 7352 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7353 divert->addr = r->divert.addr; 7354 divert->port = r->divert.port; 7355 divert->rdomain = pd.rdomain; 7356 divert->type = r->divert.type; 7357 } 7358 } 7359 7360 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 7361 action = PF_DIVERT; 7362 7363 #if NPFLOG > 0 7364 if (pd.pflog) { 7365 struct pf_rule_item *ri; 7366 7367 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 7368 pflog_packet(&pd, reason, r, a, ruleset, NULL); 7369 if (s) { 7370 SLIST_FOREACH(ri, &s->match_rules, entry) 7371 if (ri->r->log & PF_LOG_ALL) 7372 pflog_packet(&pd, reason, ri->r, a, 7373 ruleset, NULL); 7374 } 7375 } 7376 #endif /* NPFLOG > 0 */ 7377 7378 pf_counters_inc(action, &pd, s, r, a); 7379 7380 switch (action) { 7381 case PF_SYNPROXY_DROP: 7382 m_freem(pd.m); 7383 /* FALLTHROUGH */ 7384 case PF_DEFER: 7385 #if NPFSYNC > 0 7386 /* 7387 * We no longer hold PF_LOCK() here, so we can dispatch 7388 * deferral if we are asked to do so. 7389 */ 7390 if (deferral != NULL) 7391 pfsync_undefer(deferral, 0); 7392 #endif /* NPFSYNC > 0 */ 7393 pd.m = NULL; 7394 action = PF_PASS; 7395 break; 7396 case PF_DIVERT: 7397 switch (pd.af) { 7398 case AF_INET: 7399 divert_packet(pd.m, pd.dir, r->divert.port); 7400 pd.m = NULL; 7401 break; 7402 #ifdef INET6 7403 case AF_INET6: 7404 divert6_packet(pd.m, pd.dir, r->divert.port); 7405 pd.m = NULL; 7406 break; 7407 #endif /* INET6 */ 7408 } 7409 action = PF_PASS; 7410 break; 7411 #ifdef INET6 7412 case PF_AFRT: 7413 if (pf_translate_af(&pd)) { 7414 action = PF_DROP; 7415 break; 7416 } 7417 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 7418 switch (pd.naf) { 7419 case AF_INET: 7420 if (pd.dir == PF_IN) { 7421 if (ipforwarding == 0) { 7422 ipstat_inc(ips_cantforward); 7423 action = PF_DROP; 7424 break; 7425 } 7426 ip_forward(pd.m, ifp, NULL, 1); 7427 } else 7428 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 7429 break; 7430 case AF_INET6: 7431 if (pd.dir == PF_IN) { 7432 if (ip6_forwarding == 0) { 7433 ip6stat_inc(ip6s_cantforward); 7434 action = PF_DROP; 7435 break; 7436 } 7437 ip6_forward(pd.m, NULL, 1); 7438 } else 7439 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 7440 break; 7441 } 7442 if (action != PF_DROP) { 7443 pd.m = NULL; 7444 action = PF_PASS; 7445 } 7446 break; 7447 #endif /* INET6 */ 7448 case PF_DROP: 7449 m_freem(pd.m); 7450 pd.m = NULL; 7451 break; 7452 default: 7453 if (s && s->rt) { 7454 switch (pd.af) { 7455 case AF_INET: 7456 pf_route(&pd, s); 7457 break; 7458 #ifdef INET6 7459 case AF_INET6: 7460 pf_route6(&pd, s); 7461 break; 7462 #endif /* INET6 */ 7463 } 7464 } 7465 break; 7466 } 7467 7468 #ifdef INET6 7469 /* if reassembled packet passed, create new fragments */ 7470 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 7471 pd.af == AF_INET6) { 7472 struct m_tag *mtag; 7473 7474 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 7475 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 7476 } 7477 #endif /* INET6 */ 7478 if (s && action != PF_DROP) { 7479 if (!s->if_index_in && dir == PF_IN) 7480 s->if_index_in = ifp->if_index; 7481 else if (!s->if_index_out && dir == PF_OUT) 7482 s->if_index_out = ifp->if_index; 7483 } 7484 7485 *m0 = pd.m; 7486 7487 pf_state_unref(s); 7488 7489 return (action); 7490 } 7491 7492 int 7493 pf_ouraddr(struct mbuf *m) 7494 { 7495 struct pf_state_key *sk; 7496 7497 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 7498 return (1); 7499 7500 sk = m->m_pkthdr.pf.statekey; 7501 if (sk != NULL) { 7502 if (sk->inp != NULL) 7503 return (1); 7504 } 7505 7506 return (-1); 7507 } 7508 7509 /* 7510 * must be called whenever any addressing information such as 7511 * address, port, protocol has changed 7512 */ 7513 void 7514 pf_pkt_addr_changed(struct mbuf *m) 7515 { 7516 pf_mbuf_unlink_state_key(m); 7517 pf_mbuf_unlink_inpcb(m); 7518 } 7519 7520 struct inpcb * 7521 pf_inp_lookup(struct mbuf *m) 7522 { 7523 struct inpcb *inp = NULL; 7524 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7525 7526 if (!pf_state_key_isvalid(sk)) 7527 pf_mbuf_unlink_state_key(m); 7528 else 7529 inp = m->m_pkthdr.pf.statekey->inp; 7530 7531 if (inp && inp->inp_pf_sk) 7532 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 7533 7534 return (inp); 7535 } 7536 7537 void 7538 pf_inp_link(struct mbuf *m, struct inpcb *inp) 7539 { 7540 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7541 7542 if (!pf_state_key_isvalid(sk)) { 7543 pf_mbuf_unlink_state_key(m); 7544 return; 7545 } 7546 7547 /* 7548 * we don't need to grab PF-lock here. At worst case we link inp to 7549 * state, which might be just being marked as deleted by another 7550 * thread. 7551 */ 7552 if (inp && !sk->inp && !inp->inp_pf_sk) 7553 pf_state_key_link_inpcb(sk, inp); 7554 7555 /* The statekey has finished finding the inp, it is no longer needed. */ 7556 pf_mbuf_unlink_state_key(m); 7557 } 7558 7559 void 7560 pf_inp_unlink(struct inpcb *inp) 7561 { 7562 pf_inpcb_unlink_state_key(inp); 7563 } 7564 7565 void 7566 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 7567 { 7568 struct pf_state_key *old_reverse; 7569 7570 old_reverse = atomic_cas_ptr(&sk->reverse, NULL, skrev); 7571 if (old_reverse != NULL) 7572 KASSERT(old_reverse == skrev); 7573 else { 7574 pf_state_key_ref(skrev); 7575 7576 /* 7577 * NOTE: if sk == skrev, then KASSERT() below holds true, we 7578 * still want to grab a reference in such case, because 7579 * pf_state_key_unlink_reverse() does not check whether keys 7580 * are identical or not. 7581 */ 7582 old_reverse = atomic_cas_ptr(&skrev->reverse, NULL, sk); 7583 if (old_reverse != NULL) 7584 KASSERT(old_reverse == sk); 7585 7586 pf_state_key_ref(sk); 7587 } 7588 } 7589 7590 #if NPFLOG > 0 7591 void 7592 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 7593 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 7594 { 7595 struct pf_rule_item *ri; 7596 7597 /* if this is the log(matches) rule, packet has been logged already */ 7598 if (rm->log & PF_LOG_MATCHES) 7599 return; 7600 7601 SLIST_FOREACH(ri, matchrules, entry) 7602 if (ri->r->log & PF_LOG_MATCHES) 7603 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 7604 } 7605 #endif /* NPFLOG > 0 */ 7606 7607 struct pf_state_key * 7608 pf_state_key_ref(struct pf_state_key *sk) 7609 { 7610 if (sk != NULL) 7611 PF_REF_TAKE(sk->refcnt); 7612 7613 return (sk); 7614 } 7615 7616 void 7617 pf_state_key_unref(struct pf_state_key *sk) 7618 { 7619 if (PF_REF_RELE(sk->refcnt)) { 7620 /* state key must be removed from tree */ 7621 KASSERT(!pf_state_key_isvalid(sk)); 7622 /* state key must be unlinked from reverse key */ 7623 KASSERT(sk->reverse == NULL); 7624 /* state key must be unlinked from socket */ 7625 KASSERT(sk->inp == NULL); 7626 pool_put(&pf_state_key_pl, sk); 7627 } 7628 } 7629 7630 int 7631 pf_state_key_isvalid(struct pf_state_key *sk) 7632 { 7633 return ((sk != NULL) && (sk->removed == 0)); 7634 } 7635 7636 void 7637 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 7638 { 7639 KASSERT(m->m_pkthdr.pf.statekey == NULL); 7640 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 7641 } 7642 7643 void 7644 pf_mbuf_unlink_state_key(struct mbuf *m) 7645 { 7646 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7647 7648 if (sk != NULL) { 7649 m->m_pkthdr.pf.statekey = NULL; 7650 pf_state_key_unref(sk); 7651 } 7652 } 7653 7654 void 7655 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 7656 { 7657 KASSERT(m->m_pkthdr.pf.inp == NULL); 7658 m->m_pkthdr.pf.inp = in_pcbref(inp); 7659 } 7660 7661 void 7662 pf_mbuf_unlink_inpcb(struct mbuf *m) 7663 { 7664 struct inpcb *inp = m->m_pkthdr.pf.inp; 7665 7666 if (inp != NULL) { 7667 m->m_pkthdr.pf.inp = NULL; 7668 in_pcbunref(inp); 7669 } 7670 } 7671 7672 void 7673 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 7674 { 7675 KASSERT(sk->inp == NULL); 7676 sk->inp = in_pcbref(inp); 7677 KASSERT(inp->inp_pf_sk == NULL); 7678 inp->inp_pf_sk = pf_state_key_ref(sk); 7679 } 7680 7681 void 7682 pf_inpcb_unlink_state_key(struct inpcb *inp) 7683 { 7684 struct pf_state_key *sk = inp->inp_pf_sk; 7685 7686 if (sk != NULL) { 7687 KASSERT(sk->inp == inp); 7688 sk->inp = NULL; 7689 inp->inp_pf_sk = NULL; 7690 pf_state_key_unref(sk); 7691 in_pcbunref(inp); 7692 } 7693 } 7694 7695 void 7696 pf_state_key_unlink_inpcb(struct pf_state_key *sk) 7697 { 7698 struct inpcb *inp = sk->inp; 7699 7700 if (inp != NULL) { 7701 KASSERT(inp->inp_pf_sk == sk); 7702 sk->inp = NULL; 7703 inp->inp_pf_sk = NULL; 7704 pf_state_key_unref(sk); 7705 in_pcbunref(inp); 7706 } 7707 } 7708 7709 void 7710 pf_state_key_unlink_reverse(struct pf_state_key *sk) 7711 { 7712 struct pf_state_key *skrev = sk->reverse; 7713 7714 /* Note that sk and skrev may be equal, then we unref twice. */ 7715 if (skrev != NULL) { 7716 KASSERT(skrev->reverse == sk); 7717 sk->reverse = NULL; 7718 skrev->reverse = NULL; 7719 pf_state_key_unref(skrev); 7720 pf_state_key_unref(sk); 7721 } 7722 } 7723 7724 struct pf_state * 7725 pf_state_ref(struct pf_state *s) 7726 { 7727 if (s != NULL) 7728 PF_REF_TAKE(s->refcnt); 7729 return (s); 7730 } 7731 7732 void 7733 pf_state_unref(struct pf_state *s) 7734 { 7735 if ((s != NULL) && PF_REF_RELE(s->refcnt)) { 7736 /* never inserted or removed */ 7737 #if NPFSYNC > 0 7738 KASSERT((TAILQ_NEXT(s, sync_list) == NULL) || 7739 ((TAILQ_NEXT(s, sync_list) == _Q_INVALID) && 7740 (s->sync_state == PFSYNC_S_NONE))); 7741 #endif /* NPFSYNC */ 7742 KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || 7743 (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); 7744 KASSERT((s->key[PF_SK_WIRE] == NULL) && 7745 (s->key[PF_SK_STACK] == NULL)); 7746 7747 pool_put(&pf_state_pl, s); 7748 } 7749 } 7750 7751 int 7752 pf_delay_pkt(struct mbuf *m, u_int ifidx) 7753 { 7754 struct pf_pktdelay *pdy; 7755 7756 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 7757 m_freem(m); 7758 return (ENOBUFS); 7759 } 7760 pdy->ifidx = ifidx; 7761 pdy->m = m; 7762 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy); 7763 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay); 7764 m->m_pkthdr.pf.delay = 0; 7765 return (0); 7766 } 7767 7768 void 7769 pf_pktenqueue_delayed(void *arg) 7770 { 7771 struct pf_pktdelay *pdy = arg; 7772 struct ifnet *ifp; 7773 7774 ifp = if_get(pdy->ifidx); 7775 if (ifp != NULL) { 7776 if_enqueue(ifp, pdy->m); 7777 if_put(ifp); 7778 } else 7779 m_freem(pdy->m); 7780 7781 pool_put(&pf_pktdelay_pl, pdy); 7782 } 7783