1 /* $OpenBSD: pf.c,v 1.1126 2022/03/17 18:27:55 sthen Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/in_pcb.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp_var.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_timer.h> 74 #include <netinet/tcp_var.h> 75 #include <netinet/tcp_fsm.h> 76 #include <netinet/udp.h> 77 #include <netinet/udp_var.h> 78 #include <netinet/ip_divert.h> 79 80 #ifdef INET6 81 #include <netinet6/in6_var.h> 82 #include <netinet/ip6.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet/icmp6.h> 85 #include <netinet6/nd6.h> 86 #include <netinet6/ip6_divert.h> 87 #endif /* INET6 */ 88 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 92 #if NPFLOG > 0 93 #include <net/if_pflog.h> 94 #endif /* NPFLOG > 0 */ 95 96 #if NPFLOW > 0 97 #include <net/if_pflow.h> 98 #endif /* NPFLOW > 0 */ 99 100 #if NPFSYNC > 0 101 #include <net/if_pfsync.h> 102 #else 103 struct pfsync_deferral; 104 #endif /* NPFSYNC > 0 */ 105 106 #ifdef DDB 107 #include <machine/db_machdep.h> 108 #include <ddb/db_interface.h> 109 #endif 110 111 /* 112 * Global variables 113 */ 114 struct pf_state_tree pf_statetbl; 115 struct pf_queuehead pf_queues[2]; 116 struct pf_queuehead *pf_queues_active; 117 struct pf_queuehead *pf_queues_inactive; 118 119 struct pf_status pf_status; 120 121 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 122 123 SHA2_CTX pf_tcp_secret_ctx; 124 u_char pf_tcp_secret[16]; 125 int pf_tcp_secret_init; 126 int pf_tcp_iss_off; 127 128 int pf_npurge; 129 struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); 130 struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); 131 132 enum pf_test_status { 133 PF_TEST_FAIL = -1, 134 PF_TEST_OK, 135 PF_TEST_QUICK 136 }; 137 138 struct pf_test_ctx { 139 enum pf_test_status test_status; 140 struct pf_pdesc *pd; 141 struct pf_rule_actions act; 142 u_int8_t icmpcode; 143 u_int8_t icmptype; 144 int icmp_dir; 145 int state_icmp; 146 int tag; 147 u_short reason; 148 struct pf_rule_item *ri; 149 struct pf_src_node *sns[PF_SN_MAX]; 150 struct pf_rule_slist rules; 151 struct pf_rule *nr; 152 struct pf_rule **rm; 153 struct pf_rule *a; 154 struct pf_rule **am; 155 struct pf_ruleset **rsm; 156 struct pf_ruleset *arsm; 157 struct pf_ruleset *aruleset; 158 struct tcphdr *th; 159 int depth; 160 }; 161 162 #define PF_ANCHOR_STACK_MAX 64 163 164 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 165 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 166 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 167 168 void pf_add_threshold(struct pf_threshold *); 169 int pf_check_threshold(struct pf_threshold *); 170 int pf_check_tcp_cksum(struct mbuf *, int, int, 171 sa_family_t); 172 static __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 173 u_int8_t); 174 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 175 const struct pf_addr *, sa_family_t, u_int8_t); 176 int pf_modulate_sack(struct pf_pdesc *, 177 struct pf_state_peer *); 178 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 179 u_int16_t *, u_int16_t *); 180 int pf_change_icmp_af(struct mbuf *, int, 181 struct pf_pdesc *, struct pf_pdesc *, 182 struct pf_addr *, struct pf_addr *, sa_family_t, 183 sa_family_t); 184 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 185 struct pf_addr *); 186 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 187 u_int16_t *, struct pf_addr *, struct pf_addr *, 188 u_int16_t); 189 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 190 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 191 sa_family_t, struct pf_rule *, u_int); 192 void pf_detach_state(struct pf_state *); 193 void pf_state_key_detach(struct pf_state *, int); 194 u_int32_t pf_tcp_iss(struct pf_pdesc *); 195 void pf_rule_to_actions(struct pf_rule *, 196 struct pf_rule_actions *); 197 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 198 struct pf_state **, struct pf_rule **, 199 struct pf_ruleset **, u_short *, 200 struct pfsync_deferral **); 201 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 202 struct pf_rule *, struct pf_rule *, 203 struct pf_state_key **, struct pf_state_key **, 204 int *, struct pf_state **, int, 205 struct pf_rule_slist *, struct pf_rule_actions *, 206 struct pf_src_node *[]); 207 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 208 int, struct pf_addr *, int, struct pf_addr *, 209 int, int); 210 int pf_state_key_setup(struct pf_pdesc *, struct 211 pf_state_key **, struct pf_state_key **, int); 212 int pf_tcp_track_full(struct pf_pdesc *, 213 struct pf_state **, u_short *, int *, int); 214 int pf_tcp_track_sloppy(struct pf_pdesc *, 215 struct pf_state **, u_short *); 216 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 217 u_short *); 218 int pf_test_state(struct pf_pdesc *, struct pf_state **, 219 u_short *, int); 220 int pf_icmp_state_lookup(struct pf_pdesc *, 221 struct pf_state_key_cmp *, struct pf_state **, 222 u_int16_t, u_int16_t, int, int *, int, int); 223 int pf_test_state_icmp(struct pf_pdesc *, 224 struct pf_state **, u_short *); 225 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 226 u_int16_t); 227 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 228 sa_family_t, struct pf_src_node **); 229 struct pf_divert *pf_get_divert(struct mbuf *); 230 int pf_walk_header(struct pf_pdesc *, struct ip *, 231 u_short *); 232 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 233 int, int, u_short *); 234 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 235 u_short *); 236 void pf_print_state_parts(struct pf_state *, 237 struct pf_state_key *, struct pf_state_key *); 238 int pf_addr_wrap_neq(struct pf_addr_wrap *, 239 struct pf_addr_wrap *); 240 int pf_compare_state_keys(struct pf_state_key *, 241 struct pf_state_key *, struct pfi_kif *, u_int); 242 int pf_find_state(struct pf_pdesc *, 243 struct pf_state_key_cmp *, struct pf_state **); 244 int pf_src_connlimit(struct pf_state **); 245 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 246 int pf_step_into_anchor(struct pf_test_ctx *, 247 struct pf_rule *); 248 int pf_match_rule(struct pf_test_ctx *, 249 struct pf_ruleset *); 250 void pf_counters_inc(int, struct pf_pdesc *, 251 struct pf_state *, struct pf_rule *, 252 struct pf_rule *); 253 254 int pf_state_key_isvalid(struct pf_state_key *); 255 struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 256 void pf_state_key_unref(struct pf_state_key *); 257 void pf_state_key_link_reverse(struct pf_state_key *, 258 struct pf_state_key *); 259 void pf_state_key_unlink_reverse(struct pf_state_key *); 260 void pf_state_key_link_inpcb(struct pf_state_key *, 261 struct inpcb *); 262 void pf_state_key_unlink_inpcb(struct pf_state_key *); 263 void pf_inpcb_unlink_state_key(struct inpcb *); 264 void pf_pktenqueue_delayed(void *); 265 int32_t pf_state_expires(const struct pf_state *, uint8_t); 266 267 #if NPFLOG > 0 268 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 269 struct pf_rule *, struct pf_ruleset *, 270 struct pf_rule_slist *); 271 #endif /* NPFLOG > 0 */ 272 273 extern struct pool pfr_ktable_pl; 274 extern struct pool pfr_kentry_pl; 275 276 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 277 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 278 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 279 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 280 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 281 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 282 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS } 283 }; 284 285 #define BOUND_IFACE(r, k) \ 286 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 287 288 #define STATE_INC_COUNTERS(s) \ 289 do { \ 290 struct pf_rule_item *mrm; \ 291 s->rule.ptr->states_cur++; \ 292 s->rule.ptr->states_tot++; \ 293 if (s->anchor.ptr != NULL) { \ 294 s->anchor.ptr->states_cur++; \ 295 s->anchor.ptr->states_tot++; \ 296 } \ 297 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 298 mrm->r->states_cur++; \ 299 } while (0) 300 301 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 302 static __inline int pf_state_compare_key(struct pf_state_key *, 303 struct pf_state_key *); 304 static __inline int pf_state_compare_id(struct pf_state *, 305 struct pf_state *); 306 #ifdef INET6 307 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 308 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 309 #endif /* INET6 */ 310 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 311 312 struct pf_src_tree tree_src_tracking; 313 314 struct pf_state_tree_id tree_id; 315 struct pf_state_list pf_state_list = PF_STATE_LIST_INITIALIZER(pf_state_list); 316 317 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 318 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 319 RB_GENERATE(pf_state_tree_id, pf_state, 320 entry_id, pf_state_compare_id); 321 322 SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl = 323 SLIST_HEAD_INITIALIZER(pf_rule_gcl); 324 325 __inline int 326 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 327 { 328 switch (af) { 329 case AF_INET: 330 if (a->addr32[0] > b->addr32[0]) 331 return (1); 332 if (a->addr32[0] < b->addr32[0]) 333 return (-1); 334 break; 335 #ifdef INET6 336 case AF_INET6: 337 if (a->addr32[3] > b->addr32[3]) 338 return (1); 339 if (a->addr32[3] < b->addr32[3]) 340 return (-1); 341 if (a->addr32[2] > b->addr32[2]) 342 return (1); 343 if (a->addr32[2] < b->addr32[2]) 344 return (-1); 345 if (a->addr32[1] > b->addr32[1]) 346 return (1); 347 if (a->addr32[1] < b->addr32[1]) 348 return (-1); 349 if (a->addr32[0] > b->addr32[0]) 350 return (1); 351 if (a->addr32[0] < b->addr32[0]) 352 return (-1); 353 break; 354 #endif /* INET6 */ 355 } 356 return (0); 357 } 358 359 static __inline int 360 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 361 { 362 int diff; 363 364 if (a->rule.ptr > b->rule.ptr) 365 return (1); 366 if (a->rule.ptr < b->rule.ptr) 367 return (-1); 368 if ((diff = a->type - b->type) != 0) 369 return (diff); 370 if ((diff = a->af - b->af) != 0) 371 return (diff); 372 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 373 return (diff); 374 return (0); 375 } 376 377 static __inline void 378 pf_set_protostate(struct pf_state *s, int which, u_int8_t newstate) 379 { 380 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 381 s->dst.state = newstate; 382 if (which == PF_PEER_DST) 383 return; 384 385 if (s->src.state == newstate) 386 return; 387 if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK] != NULL && 388 s->key[PF_SK_STACK]->proto == IPPROTO_TCP && 389 !(TCPS_HAVEESTABLISHED(s->src.state) || 390 s->src.state == TCPS_CLOSED) && 391 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 392 pf_status.states_halfopen--; 393 394 s->src.state = newstate; 395 } 396 397 void 398 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 399 { 400 switch (af) { 401 case AF_INET: 402 dst->addr32[0] = src->addr32[0]; 403 break; 404 #ifdef INET6 405 case AF_INET6: 406 dst->addr32[0] = src->addr32[0]; 407 dst->addr32[1] = src->addr32[1]; 408 dst->addr32[2] = src->addr32[2]; 409 dst->addr32[3] = src->addr32[3]; 410 break; 411 #endif /* INET6 */ 412 default: 413 unhandled_af(af); 414 } 415 } 416 417 void 418 pf_init_threshold(struct pf_threshold *threshold, 419 u_int32_t limit, u_int32_t seconds) 420 { 421 threshold->limit = limit * PF_THRESHOLD_MULT; 422 threshold->seconds = seconds; 423 threshold->count = 0; 424 threshold->last = getuptime(); 425 } 426 427 void 428 pf_add_threshold(struct pf_threshold *threshold) 429 { 430 u_int32_t t = getuptime(), diff = t - threshold->last; 431 432 if (diff >= threshold->seconds) 433 threshold->count = 0; 434 else 435 threshold->count -= threshold->count * diff / 436 threshold->seconds; 437 threshold->count += PF_THRESHOLD_MULT; 438 threshold->last = t; 439 } 440 441 int 442 pf_check_threshold(struct pf_threshold *threshold) 443 { 444 return (threshold->count > threshold->limit); 445 } 446 447 void 448 pf_state_list_insert(struct pf_state_list *pfs, struct pf_state *st) 449 { 450 /* 451 * we can always put states on the end of the list. 452 * 453 * things reading the list should take a read lock, then 454 * the mutex, get the head and tail pointers, release the 455 * mutex, and then they can iterate between the head and tail. 456 */ 457 458 pf_state_ref(st); /* get a ref for the list */ 459 460 mtx_enter(&pfs->pfs_mtx); 461 TAILQ_INSERT_TAIL(&pfs->pfs_list, st, entry_list); 462 mtx_leave(&pfs->pfs_mtx); 463 } 464 465 void 466 pf_state_list_remove(struct pf_state_list *pfs, struct pf_state *st) 467 { 468 /* states can only be removed when the write lock is held */ 469 rw_assert_wrlock(&pfs->pfs_rwl); 470 471 mtx_enter(&pfs->pfs_mtx); 472 TAILQ_REMOVE(&pfs->pfs_list, st, entry_list); 473 mtx_leave(&pfs->pfs_mtx); 474 475 pf_state_unref(st); /* list no longer references the state */ 476 } 477 478 int 479 pf_src_connlimit(struct pf_state **state) 480 { 481 int bad = 0; 482 struct pf_src_node *sn; 483 484 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 485 return (0); 486 487 sn->conn++; 488 (*state)->src.tcp_est = 1; 489 pf_add_threshold(&sn->conn_rate); 490 491 if ((*state)->rule.ptr->max_src_conn && 492 (*state)->rule.ptr->max_src_conn < sn->conn) { 493 pf_status.lcounters[LCNT_SRCCONN]++; 494 bad++; 495 } 496 497 if ((*state)->rule.ptr->max_src_conn_rate.limit && 498 pf_check_threshold(&sn->conn_rate)) { 499 pf_status.lcounters[LCNT_SRCCONNRATE]++; 500 bad++; 501 } 502 503 if (!bad) 504 return (0); 505 506 if ((*state)->rule.ptr->overload_tbl) { 507 struct pfr_addr p; 508 u_int32_t killed = 0; 509 510 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 511 if (pf_status.debug >= LOG_NOTICE) { 512 log(LOG_NOTICE, 513 "pf: pf_src_connlimit: blocking address "); 514 pf_print_host(&sn->addr, 0, 515 (*state)->key[PF_SK_WIRE]->af); 516 } 517 518 memset(&p, 0, sizeof(p)); 519 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 520 switch ((*state)->key[PF_SK_WIRE]->af) { 521 case AF_INET: 522 p.pfra_net = 32; 523 p.pfra_ip4addr = sn->addr.v4; 524 break; 525 #ifdef INET6 526 case AF_INET6: 527 p.pfra_net = 128; 528 p.pfra_ip6addr = sn->addr.v6; 529 break; 530 #endif /* INET6 */ 531 } 532 533 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 534 &p, gettime()); 535 536 /* kill existing states if that's required. */ 537 if ((*state)->rule.ptr->flush) { 538 struct pf_state_key *sk; 539 struct pf_state *st; 540 541 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 542 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 543 sk = st->key[PF_SK_WIRE]; 544 /* 545 * Kill states from this source. (Only those 546 * from the same rule if PF_FLUSH_GLOBAL is not 547 * set) 548 */ 549 if (sk->af == 550 (*state)->key[PF_SK_WIRE]->af && 551 (((*state)->direction == PF_OUT && 552 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 553 ((*state)->direction == PF_IN && 554 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 555 ((*state)->rule.ptr->flush & 556 PF_FLUSH_GLOBAL || 557 (*state)->rule.ptr == st->rule.ptr)) { 558 st->timeout = PFTM_PURGE; 559 pf_set_protostate(st, PF_PEER_BOTH, 560 TCPS_CLOSED); 561 killed++; 562 } 563 } 564 if (pf_status.debug >= LOG_NOTICE) 565 addlog(", %u states killed", killed); 566 } 567 if (pf_status.debug >= LOG_NOTICE) 568 addlog("\n"); 569 } 570 571 /* kill this state */ 572 (*state)->timeout = PFTM_PURGE; 573 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); 574 return (1); 575 } 576 577 int 578 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 579 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 580 struct pf_addr *raddr, struct pfi_kif *kif) 581 { 582 struct pf_src_node k; 583 584 if (*sn == NULL) { 585 k.af = af; 586 k.type = type; 587 pf_addrcpy(&k.addr, src, af); 588 k.rule.ptr = rule; 589 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 590 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 591 } 592 if (*sn == NULL) { 593 if (!rule->max_src_nodes || 594 rule->src_nodes < rule->max_src_nodes) 595 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 596 else 597 pf_status.lcounters[LCNT_SRCNODES]++; 598 if ((*sn) == NULL) 599 return (-1); 600 601 pf_init_threshold(&(*sn)->conn_rate, 602 rule->max_src_conn_rate.limit, 603 rule->max_src_conn_rate.seconds); 604 605 (*sn)->type = type; 606 (*sn)->af = af; 607 (*sn)->rule.ptr = rule; 608 pf_addrcpy(&(*sn)->addr, src, af); 609 if (raddr) 610 pf_addrcpy(&(*sn)->raddr, raddr, af); 611 if (RB_INSERT(pf_src_tree, 612 &tree_src_tracking, *sn) != NULL) { 613 if (pf_status.debug >= LOG_NOTICE) { 614 log(LOG_NOTICE, 615 "pf: src_tree insert failed: "); 616 pf_print_host(&(*sn)->addr, 0, af); 617 addlog("\n"); 618 } 619 pool_put(&pf_src_tree_pl, *sn); 620 return (-1); 621 } 622 (*sn)->creation = getuptime(); 623 (*sn)->rule.ptr->src_nodes++; 624 if (kif != NULL) { 625 (*sn)->kif = kif; 626 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE); 627 } 628 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 629 pf_status.src_nodes++; 630 } else { 631 if (rule->max_src_states && 632 (*sn)->states >= rule->max_src_states) { 633 pf_status.lcounters[LCNT_SRCSTATES]++; 634 return (-1); 635 } 636 } 637 return (0); 638 } 639 640 void 641 pf_remove_src_node(struct pf_src_node *sn) 642 { 643 if (sn->states > 0 || sn->expire > getuptime()) 644 return; 645 646 sn->rule.ptr->src_nodes--; 647 if (sn->rule.ptr->states_cur == 0 && 648 sn->rule.ptr->src_nodes == 0) 649 pf_rm_rule(NULL, sn->rule.ptr); 650 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 651 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 652 pf_status.src_nodes--; 653 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE); 654 pool_put(&pf_src_tree_pl, sn); 655 } 656 657 struct pf_src_node * 658 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 659 { 660 struct pf_sn_item *sni; 661 662 SLIST_FOREACH(sni, &s->src_nodes, next) 663 if (sni->sn->type == type) 664 return (sni->sn); 665 return (NULL); 666 } 667 668 void 669 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 670 { 671 struct pf_sn_item *sni, *snin, *snip = NULL; 672 673 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 674 snin = SLIST_NEXT(sni, next); 675 if (sni->sn == sn) { 676 if (snip) 677 SLIST_REMOVE_AFTER(snip, next); 678 else 679 SLIST_REMOVE_HEAD(&s->src_nodes, next); 680 pool_put(&pf_sn_item_pl, sni); 681 sni = NULL; 682 sn->states--; 683 } 684 if (sni != NULL) 685 snip = sni; 686 } 687 } 688 689 /* state table stuff */ 690 691 static __inline int 692 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 693 { 694 int diff; 695 696 if ((diff = a->proto - b->proto) != 0) 697 return (diff); 698 if ((diff = a->af - b->af) != 0) 699 return (diff); 700 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 701 return (diff); 702 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 703 return (diff); 704 if ((diff = a->port[0] - b->port[0]) != 0) 705 return (diff); 706 if ((diff = a->port[1] - b->port[1]) != 0) 707 return (diff); 708 if ((diff = a->rdomain - b->rdomain) != 0) 709 return (diff); 710 return (0); 711 } 712 713 static __inline int 714 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 715 { 716 if (a->id > b->id) 717 return (1); 718 if (a->id < b->id) 719 return (-1); 720 if (a->creatorid > b->creatorid) 721 return (1); 722 if (a->creatorid < b->creatorid) 723 return (-1); 724 725 return (0); 726 } 727 728 int 729 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 730 { 731 struct pf_state_item *si; 732 struct pf_state_key *cur; 733 struct pf_state *olds = NULL; 734 735 KASSERT(s->key[idx] == NULL); 736 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 737 /* key exists. check for same kif, if none, add to key */ 738 TAILQ_FOREACH(si, &cur->states, entry) 739 if (si->s->kif == s->kif && 740 ((si->s->key[PF_SK_WIRE]->af == sk->af && 741 si->s->direction == s->direction) || 742 (si->s->key[PF_SK_WIRE]->af != 743 si->s->key[PF_SK_STACK]->af && 744 sk->af == si->s->key[PF_SK_STACK]->af && 745 si->s->direction != s->direction))) { 746 int reuse = 0; 747 748 if (sk->proto == IPPROTO_TCP && 749 si->s->src.state >= TCPS_FIN_WAIT_2 && 750 si->s->dst.state >= TCPS_FIN_WAIT_2) 751 reuse = 1; 752 if (pf_status.debug >= LOG_NOTICE) { 753 log(LOG_NOTICE, 754 "pf: %s key attach %s on %s: ", 755 (idx == PF_SK_WIRE) ? 756 "wire" : "stack", 757 reuse ? "reuse" : "failed", 758 s->kif->pfik_name); 759 pf_print_state_parts(s, 760 (idx == PF_SK_WIRE) ? sk : NULL, 761 (idx == PF_SK_STACK) ? sk : NULL); 762 addlog(", existing: "); 763 pf_print_state_parts(si->s, 764 (idx == PF_SK_WIRE) ? sk : NULL, 765 (idx == PF_SK_STACK) ? sk : NULL); 766 addlog("\n"); 767 } 768 if (reuse) { 769 pf_set_protostate(si->s, PF_PEER_BOTH, 770 TCPS_CLOSED); 771 /* remove late or sks can go away */ 772 olds = si->s; 773 } else { 774 pool_put(&pf_state_key_pl, sk); 775 return (-1); /* collision! */ 776 } 777 } 778 pool_put(&pf_state_key_pl, sk); 779 s->key[idx] = cur; 780 } else 781 s->key[idx] = sk; 782 783 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 784 pf_state_key_detach(s, idx); 785 return (-1); 786 } 787 si->s = s; 788 789 /* list is sorted, if-bound states before floating */ 790 if (s->kif == pfi_all) 791 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 792 else 793 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 794 795 if (olds) 796 pf_remove_state(olds); 797 798 return (0); 799 } 800 801 void 802 pf_detach_state(struct pf_state *s) 803 { 804 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 805 s->key[PF_SK_WIRE] = NULL; 806 807 if (s->key[PF_SK_STACK] != NULL) 808 pf_state_key_detach(s, PF_SK_STACK); 809 810 if (s->key[PF_SK_WIRE] != NULL) 811 pf_state_key_detach(s, PF_SK_WIRE); 812 } 813 814 void 815 pf_state_key_detach(struct pf_state *s, int idx) 816 { 817 struct pf_state_item *si; 818 struct pf_state_key *sk; 819 820 if (s->key[idx] == NULL) 821 return; 822 823 si = TAILQ_FIRST(&s->key[idx]->states); 824 while (si && si->s != s) 825 si = TAILQ_NEXT(si, entry); 826 827 if (si) { 828 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 829 pool_put(&pf_state_item_pl, si); 830 } 831 832 sk = s->key[idx]; 833 s->key[idx] = NULL; 834 if (TAILQ_EMPTY(&sk->states)) { 835 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 836 sk->removed = 1; 837 pf_state_key_unlink_reverse(sk); 838 pf_state_key_unlink_inpcb(sk); 839 pf_state_key_unref(sk); 840 } 841 } 842 843 struct pf_state_key * 844 pf_alloc_state_key(int pool_flags) 845 { 846 struct pf_state_key *sk; 847 848 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 849 return (NULL); 850 TAILQ_INIT(&sk->states); 851 852 return (sk); 853 } 854 855 static __inline int 856 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 857 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 858 { 859 struct pf_state_key_cmp *key = arg; 860 #ifdef INET6 861 struct pf_addr *target; 862 863 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 864 goto copy; 865 866 switch (pd->hdr.icmp6.icmp6_type) { 867 case ND_NEIGHBOR_SOLICIT: 868 if (multi) 869 return (-1); 870 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 871 daddr = target; 872 break; 873 case ND_NEIGHBOR_ADVERT: 874 if (multi) 875 return (-1); 876 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 877 saddr = target; 878 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 879 key->addr[didx].addr32[0] = 0; 880 key->addr[didx].addr32[1] = 0; 881 key->addr[didx].addr32[2] = 0; 882 key->addr[didx].addr32[3] = 0; 883 daddr = NULL; /* overwritten */ 884 } 885 break; 886 default: 887 if (multi) { 888 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 889 key->addr[sidx].addr32[1] = 0; 890 key->addr[sidx].addr32[2] = 0; 891 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 892 saddr = NULL; /* overwritten */ 893 } 894 } 895 copy: 896 #endif /* INET6 */ 897 if (saddr) 898 pf_addrcpy(&key->addr[sidx], saddr, af); 899 if (daddr) 900 pf_addrcpy(&key->addr[didx], daddr, af); 901 902 return (0); 903 } 904 905 int 906 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 907 struct pf_state_key **sks, int rtableid) 908 { 909 /* if returning error we MUST pool_put state keys ourselves */ 910 struct pf_state_key *sk1, *sk2; 911 u_int wrdom = pd->rdomain; 912 int afto = pd->af != pd->naf; 913 914 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 915 return (ENOMEM); 916 917 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 918 pd->af, 0); 919 sk1->port[pd->sidx] = pd->osport; 920 sk1->port[pd->didx] = pd->odport; 921 sk1->proto = pd->proto; 922 sk1->af = pd->af; 923 sk1->rdomain = pd->rdomain; 924 PF_REF_INIT(sk1->refcnt); 925 sk1->removed = 0; 926 if (rtableid >= 0) 927 wrdom = rtable_l2(rtableid); 928 929 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 930 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 931 pd->nsport != pd->osport || pd->ndport != pd->odport || 932 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 933 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 934 pool_put(&pf_state_key_pl, sk1); 935 return (ENOMEM); 936 } 937 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 938 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 939 pd->naf, 0); 940 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 941 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 942 if (afto) { 943 switch (pd->proto) { 944 case IPPROTO_ICMP: 945 sk2->proto = IPPROTO_ICMPV6; 946 break; 947 case IPPROTO_ICMPV6: 948 sk2->proto = IPPROTO_ICMP; 949 break; 950 default: 951 sk2->proto = pd->proto; 952 } 953 } else 954 sk2->proto = pd->proto; 955 sk2->af = pd->naf; 956 sk2->rdomain = wrdom; 957 PF_REF_INIT(sk2->refcnt); 958 sk2->removed = 0; 959 } else 960 sk2 = sk1; 961 962 if (pd->dir == PF_IN) { 963 *skw = sk1; 964 *sks = sk2; 965 } else { 966 *sks = sk1; 967 *skw = sk2; 968 } 969 970 if (pf_status.debug >= LOG_DEBUG) { 971 log(LOG_DEBUG, "pf: key setup: "); 972 pf_print_state_parts(NULL, *skw, *sks); 973 addlog("\n"); 974 } 975 976 return (0); 977 } 978 979 int 980 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, 981 struct pf_state_key **sks, struct pf_state *s) 982 { 983 PF_ASSERT_LOCKED(); 984 985 s->kif = kif; 986 PF_STATE_ENTER_WRITE(); 987 if (*skw == *sks) { 988 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 989 PF_STATE_EXIT_WRITE(); 990 return (-1); 991 } 992 *skw = *sks = s->key[PF_SK_WIRE]; 993 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 994 } else { 995 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 996 pool_put(&pf_state_key_pl, *sks); 997 PF_STATE_EXIT_WRITE(); 998 return (-1); 999 } 1000 *skw = s->key[PF_SK_WIRE]; 1001 if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { 1002 pf_state_key_detach(s, PF_SK_WIRE); 1003 PF_STATE_EXIT_WRITE(); 1004 return (-1); 1005 } 1006 *sks = s->key[PF_SK_STACK]; 1007 } 1008 1009 if (s->id == 0 && s->creatorid == 0) { 1010 s->id = htobe64(pf_status.stateid++); 1011 s->creatorid = pf_status.hostid; 1012 } 1013 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 1014 if (pf_status.debug >= LOG_NOTICE) { 1015 log(LOG_NOTICE, "pf: state insert failed: " 1016 "id: %016llx creatorid: %08x", 1017 betoh64(s->id), ntohl(s->creatorid)); 1018 addlog("\n"); 1019 } 1020 pf_detach_state(s); 1021 PF_STATE_EXIT_WRITE(); 1022 return (-1); 1023 } 1024 pf_state_list_insert(&pf_state_list, s); 1025 pf_status.fcounters[FCNT_STATE_INSERT]++; 1026 pf_status.states++; 1027 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 1028 PF_STATE_EXIT_WRITE(); 1029 #if NPFSYNC > 0 1030 pfsync_insert_state(s); 1031 #endif /* NPFSYNC > 0 */ 1032 return (0); 1033 } 1034 1035 struct pf_state * 1036 pf_find_state_byid(struct pf_state_cmp *key) 1037 { 1038 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1039 1040 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 1041 } 1042 1043 int 1044 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 1045 struct pfi_kif *kif, u_int dir) 1046 { 1047 /* a (from hdr) and b (new) must be exact opposites of each other */ 1048 if (a->af == b->af && a->proto == b->proto && 1049 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1050 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1051 a->port[0] == b->port[1] && 1052 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1053 return (0); 1054 else { 1055 /* mismatch. must not happen. */ 1056 if (pf_status.debug >= LOG_ERR) { 1057 log(LOG_ERR, 1058 "pf: state key linking mismatch! dir=%s, " 1059 "if=%s, stored af=%u, a0: ", 1060 dir == PF_OUT ? "OUT" : "IN", 1061 kif->pfik_name, a->af); 1062 pf_print_host(&a->addr[0], a->port[0], a->af); 1063 addlog(", a1: "); 1064 pf_print_host(&a->addr[1], a->port[1], a->af); 1065 addlog(", proto=%u", a->proto); 1066 addlog(", found af=%u, a0: ", b->af); 1067 pf_print_host(&b->addr[0], b->port[0], b->af); 1068 addlog(", a1: "); 1069 pf_print_host(&b->addr[1], b->port[1], b->af); 1070 addlog(", proto=%u", b->proto); 1071 addlog("\n"); 1072 } 1073 return (-1); 1074 } 1075 } 1076 1077 int 1078 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1079 struct pf_state **state) 1080 { 1081 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1082 struct pf_state_item *si; 1083 struct pf_state *s = NULL; 1084 1085 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1086 if (pf_status.debug >= LOG_DEBUG) { 1087 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1088 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1089 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1090 addlog("\n"); 1091 } 1092 1093 inp_sk = NULL; 1094 pkt_sk = NULL; 1095 sk = NULL; 1096 if (pd->dir == PF_OUT) { 1097 /* first if block deals with outbound forwarded packet */ 1098 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1099 1100 if (!pf_state_key_isvalid(pkt_sk)) { 1101 pf_mbuf_unlink_state_key(pd->m); 1102 pkt_sk = NULL; 1103 } 1104 1105 if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse)) 1106 sk = pkt_sk->reverse; 1107 1108 if (pkt_sk == NULL) { 1109 /* here we deal with local outbound packet */ 1110 if (pd->m->m_pkthdr.pf.inp != NULL) { 1111 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk; 1112 if (pf_state_key_isvalid(inp_sk)) 1113 sk = inp_sk; 1114 else 1115 pf_inpcb_unlink_state_key( 1116 pd->m->m_pkthdr.pf.inp); 1117 } 1118 } 1119 } 1120 1121 if (sk == NULL) { 1122 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1123 (struct pf_state_key *)key)) == NULL) 1124 return (PF_DROP); 1125 if (pd->dir == PF_OUT && pkt_sk && 1126 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1127 pf_state_key_link_reverse(sk, pkt_sk); 1128 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp && 1129 !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->inp) 1130 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1131 } 1132 1133 /* remove firewall data from outbound packet */ 1134 if (pd->dir == PF_OUT) 1135 pf_pkt_addr_changed(pd->m); 1136 1137 /* list is sorted, if-bound states before floating ones */ 1138 TAILQ_FOREACH(si, &sk->states, entry) 1139 if ((si->s->kif == pfi_all || si->s->kif == pd->kif) && 1140 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1141 && sk == (pd->dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1142 si->s->key[PF_SK_STACK])) || 1143 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1144 && pd->dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1145 sk == si->s->key[PF_SK_WIRE])))) { 1146 s = si->s; 1147 break; 1148 } 1149 1150 if (s == NULL || s->timeout == PFTM_PURGE) 1151 return (PF_DROP); 1152 1153 if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) { 1154 pf_add_threshold(&s->rule.ptr->pktrate); 1155 if (pf_check_threshold(&s->rule.ptr->pktrate)) 1156 return (PF_DROP); 1157 } 1158 1159 *state = s; 1160 1161 return (PF_MATCH); 1162 } 1163 1164 struct pf_state * 1165 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1166 { 1167 struct pf_state_key *sk; 1168 struct pf_state_item *si, *ret = NULL; 1169 1170 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1171 1172 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1173 1174 if (sk != NULL) { 1175 TAILQ_FOREACH(si, &sk->states, entry) 1176 if (dir == PF_INOUT || 1177 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1178 si->s->key[PF_SK_STACK]))) { 1179 if (more == NULL) 1180 return (si->s); 1181 1182 if (ret) 1183 (*more)++; 1184 else 1185 ret = si; 1186 } 1187 } 1188 return (ret ? ret->s : NULL); 1189 } 1190 1191 void 1192 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1193 { 1194 int32_t expire; 1195 1196 memset(sp, 0, sizeof(struct pfsync_state)); 1197 1198 /* copy from state key */ 1199 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1200 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1201 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1202 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1203 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1204 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1205 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1206 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1207 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1208 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1209 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1210 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1211 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1212 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1213 sp->proto = st->key[PF_SK_WIRE]->proto; 1214 sp->af = st->key[PF_SK_WIRE]->af; 1215 1216 /* copy from state */ 1217 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1218 sp->rt = st->rt; 1219 sp->rt_addr = st->rt_addr; 1220 sp->creation = htonl(getuptime() - st->creation); 1221 expire = pf_state_expires(st, st->timeout); 1222 if (expire <= getuptime()) 1223 sp->expire = htonl(0); 1224 else 1225 sp->expire = htonl(expire - getuptime()); 1226 1227 sp->direction = st->direction; 1228 #if NPFLOG > 0 1229 sp->log = st->log; 1230 #endif /* NPFLOG > 0 */ 1231 sp->timeout = st->timeout; 1232 sp->state_flags = htons(st->state_flags); 1233 if (!SLIST_EMPTY(&st->src_nodes)) 1234 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1235 1236 sp->id = st->id; 1237 sp->creatorid = st->creatorid; 1238 pf_state_peer_hton(&st->src, &sp->src); 1239 pf_state_peer_hton(&st->dst, &sp->dst); 1240 1241 if (st->rule.ptr == NULL) 1242 sp->rule = htonl(-1); 1243 else 1244 sp->rule = htonl(st->rule.ptr->nr); 1245 if (st->anchor.ptr == NULL) 1246 sp->anchor = htonl(-1); 1247 else 1248 sp->anchor = htonl(st->anchor.ptr->nr); 1249 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1250 1251 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1252 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1253 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1254 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1255 1256 sp->max_mss = htons(st->max_mss); 1257 sp->min_ttl = st->min_ttl; 1258 sp->set_tos = st->set_tos; 1259 sp->set_prio[0] = st->set_prio[0]; 1260 sp->set_prio[1] = st->set_prio[1]; 1261 } 1262 1263 /* END state table stuff */ 1264 1265 void 1266 pf_purge_expired_rules(void) 1267 { 1268 struct pf_rule *r; 1269 1270 PF_ASSERT_LOCKED(); 1271 1272 if (SLIST_EMPTY(&pf_rule_gcl)) 1273 return; 1274 1275 while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { 1276 SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); 1277 KASSERT(r->rule_flag & PFRULE_EXPIRED); 1278 pf_purge_rule(r); 1279 } 1280 } 1281 1282 void 1283 pf_purge_timeout(void *unused) 1284 { 1285 /* XXX move to systqmp to avoid KERNEL_LOCK */ 1286 task_add(systq, &pf_purge_task); 1287 } 1288 1289 void 1290 pf_purge(void *xnloops) 1291 { 1292 int *nloops = xnloops; 1293 1294 /* 1295 * process a fraction of the state table every second 1296 * Note: 1297 * we no longer need PF_LOCK() here, because 1298 * pf_purge_expired_states() uses pf_state_lock to maintain 1299 * consistency. 1300 */ 1301 if (pf_default_rule.timeout[PFTM_INTERVAL] > 0) 1302 pf_purge_expired_states(1 + (pf_status.states 1303 / pf_default_rule.timeout[PFTM_INTERVAL])); 1304 1305 NET_LOCK(); 1306 1307 PF_LOCK(); 1308 /* purge other expired types every PFTM_INTERVAL seconds */ 1309 if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1310 pf_purge_expired_src_nodes(); 1311 pf_purge_expired_rules(); 1312 } 1313 PF_UNLOCK(); 1314 1315 /* 1316 * Fragments don't require PF_LOCK(), they use their own lock. 1317 */ 1318 if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1319 pf_purge_expired_fragments(); 1320 *nloops = 0; 1321 } 1322 NET_UNLOCK(); 1323 1324 timeout_add_sec(&pf_purge_to, 1); 1325 } 1326 1327 int32_t 1328 pf_state_expires(const struct pf_state *state, uint8_t stimeout) 1329 { 1330 u_int32_t timeout; 1331 u_int32_t start; 1332 u_int32_t end; 1333 u_int32_t states; 1334 1335 /* 1336 * pf_state_expires is used by the state purge task to 1337 * decide if a state is a candidate for cleanup, and by the 1338 * pfsync state export code to populate an expiry time. 1339 * 1340 * this function may be called by the state purge task while 1341 * the state is being modified. avoid inconsistent reads of 1342 * state->timeout by having the caller do the read (and any 1343 * checks it needs to do on the same variable) and then pass 1344 * their view of the timeout in here for this function to use. 1345 * the only consequence of using a stale timeout value is 1346 * that the state won't be a candidate for purging until the 1347 * next pass of the purge task. 1348 */ 1349 1350 /* handle all PFTM_* > PFTM_MAX here */ 1351 if (stimeout == PFTM_PURGE) 1352 return (0); 1353 1354 KASSERT(stimeout != PFTM_UNLINKED); 1355 KASSERT(stimeout < PFTM_MAX); 1356 1357 timeout = state->rule.ptr->timeout[stimeout]; 1358 if (!timeout) 1359 timeout = pf_default_rule.timeout[stimeout]; 1360 1361 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1362 if (start) { 1363 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1364 states = state->rule.ptr->states_cur; 1365 } else { 1366 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1367 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1368 states = pf_status.states; 1369 } 1370 if (end && states > start && start < end) { 1371 if (states >= end) 1372 return (0); 1373 1374 timeout = (u_int64_t)timeout * (end - states) / (end - start); 1375 } 1376 1377 return (state->expire + timeout); 1378 } 1379 1380 void 1381 pf_purge_expired_src_nodes(void) 1382 { 1383 struct pf_src_node *cur, *next; 1384 1385 PF_ASSERT_LOCKED(); 1386 1387 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1388 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1389 1390 if (cur->states == 0 && cur->expire <= getuptime()) { 1391 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1392 pf_remove_src_node(cur); 1393 } 1394 } 1395 } 1396 1397 void 1398 pf_src_tree_remove_state(struct pf_state *s) 1399 { 1400 u_int32_t timeout; 1401 struct pf_sn_item *sni; 1402 1403 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1404 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1405 if (s->src.tcp_est) 1406 --sni->sn->conn; 1407 if (--sni->sn->states == 0) { 1408 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1409 if (!timeout) 1410 timeout = 1411 pf_default_rule.timeout[PFTM_SRC_NODE]; 1412 sni->sn->expire = getuptime() + timeout; 1413 } 1414 pool_put(&pf_sn_item_pl, sni); 1415 } 1416 } 1417 1418 void 1419 pf_remove_state(struct pf_state *cur) 1420 { 1421 PF_ASSERT_LOCKED(); 1422 1423 /* handle load balancing related tasks */ 1424 pf_postprocess_addr(cur); 1425 1426 if (cur->src.state == PF_TCPS_PROXY_DST) { 1427 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1428 &cur->key[PF_SK_WIRE]->addr[1], 1429 &cur->key[PF_SK_WIRE]->addr[0], 1430 cur->key[PF_SK_WIRE]->port[1], 1431 cur->key[PF_SK_WIRE]->port[0], 1432 cur->src.seqhi, cur->src.seqlo + 1, 1433 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1434 cur->key[PF_SK_WIRE]->rdomain); 1435 } 1436 if (cur->key[PF_SK_STACK]->proto == IPPROTO_TCP) 1437 pf_set_protostate(cur, PF_PEER_BOTH, TCPS_CLOSED); 1438 1439 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1440 #if NPFLOW > 0 1441 if (cur->state_flags & PFSTATE_PFLOW) 1442 export_pflow(cur); 1443 #endif /* NPFLOW > 0 */ 1444 #if NPFSYNC > 0 1445 pfsync_delete_state(cur); 1446 #endif /* NPFSYNC > 0 */ 1447 cur->timeout = PFTM_UNLINKED; 1448 pf_src_tree_remove_state(cur); 1449 pf_detach_state(cur); 1450 } 1451 1452 void 1453 pf_remove_divert_state(struct pf_state_key *sk) 1454 { 1455 struct pf_state_item *si; 1456 1457 PF_ASSERT_UNLOCKED(); 1458 1459 PF_LOCK(); 1460 PF_STATE_ENTER_WRITE(); 1461 TAILQ_FOREACH(si, &sk->states, entry) { 1462 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 1463 (si->s->rule.ptr->divert.type == PF_DIVERT_TO || 1464 si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 1465 pf_remove_state(si->s); 1466 break; 1467 } 1468 } 1469 PF_STATE_EXIT_WRITE(); 1470 PF_UNLOCK(); 1471 } 1472 1473 void 1474 pf_free_state(struct pf_state *cur) 1475 { 1476 struct pf_rule_item *ri; 1477 1478 PF_ASSERT_LOCKED(); 1479 1480 #if NPFSYNC > 0 1481 if (pfsync_state_in_use(cur)) 1482 return; 1483 #endif /* NPFSYNC > 0 */ 1484 KASSERT(cur->timeout == PFTM_UNLINKED); 1485 if (--cur->rule.ptr->states_cur == 0 && 1486 cur->rule.ptr->src_nodes == 0) 1487 pf_rm_rule(NULL, cur->rule.ptr); 1488 if (cur->anchor.ptr != NULL) 1489 if (--cur->anchor.ptr->states_cur == 0) 1490 pf_rm_rule(NULL, cur->anchor.ptr); 1491 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1492 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1493 if (--ri->r->states_cur == 0 && 1494 ri->r->src_nodes == 0) 1495 pf_rm_rule(NULL, ri->r); 1496 pool_put(&pf_rule_item_pl, ri); 1497 } 1498 pf_normalize_tcp_cleanup(cur); 1499 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1500 pf_state_list_remove(&pf_state_list, cur); 1501 if (cur->tag) 1502 pf_tag_unref(cur->tag); 1503 pf_state_unref(cur); 1504 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1505 pf_status.states--; 1506 } 1507 1508 void 1509 pf_purge_expired_states(u_int32_t maxcheck) 1510 { 1511 /* 1512 * this task/thread/context/whatever is the only thing that 1513 * removes states from the pf_state_list, so the cur reference 1514 * it holds between calls is guaranteed to still be in the 1515 * list. 1516 */ 1517 static struct pf_state *cur = NULL; 1518 1519 struct pf_state *head, *tail; 1520 struct pf_state *st; 1521 SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl); 1522 time_t now; 1523 1524 PF_ASSERT_UNLOCKED(); 1525 1526 rw_enter_read(&pf_state_list.pfs_rwl); 1527 1528 mtx_enter(&pf_state_list.pfs_mtx); 1529 head = TAILQ_FIRST(&pf_state_list.pfs_list); 1530 tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 1531 mtx_leave(&pf_state_list.pfs_mtx); 1532 1533 if (head == NULL) { 1534 /* the list is empty */ 1535 rw_exit_read(&pf_state_list.pfs_rwl); 1536 return; 1537 } 1538 1539 /* (re)start at the front of the list */ 1540 if (cur == NULL) 1541 cur = head; 1542 1543 now = getuptime(); 1544 1545 do { 1546 uint8_t stimeout = cur->timeout; 1547 1548 if ((stimeout == PFTM_UNLINKED) || 1549 (pf_state_expires(cur, stimeout) <= now)) { 1550 st = pf_state_ref(cur); 1551 SLIST_INSERT_HEAD(&gcl, st, gc_list); 1552 } 1553 1554 /* don't iterate past the end of our view of the list */ 1555 if (cur == tail) { 1556 cur = NULL; 1557 break; 1558 } 1559 1560 cur = TAILQ_NEXT(cur, entry_list); 1561 } while (maxcheck--); 1562 1563 rw_exit_read(&pf_state_list.pfs_rwl); 1564 1565 if (SLIST_EMPTY(&gcl)) 1566 return; 1567 1568 NET_LOCK(); 1569 rw_enter_write(&pf_state_list.pfs_rwl); 1570 PF_LOCK(); 1571 PF_STATE_ENTER_WRITE(); 1572 SLIST_FOREACH(st, &gcl, gc_list) { 1573 if (st->timeout != PFTM_UNLINKED) 1574 pf_remove_state(st); 1575 1576 pf_free_state(st); 1577 } 1578 PF_STATE_EXIT_WRITE(); 1579 PF_UNLOCK(); 1580 rw_exit_write(&pf_state_list.pfs_rwl); 1581 NET_UNLOCK(); 1582 1583 while ((st = SLIST_FIRST(&gcl)) != NULL) { 1584 SLIST_REMOVE_HEAD(&gcl, gc_list); 1585 pf_state_unref(st); 1586 } 1587 } 1588 1589 int 1590 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1591 { 1592 if (aw->type != PF_ADDR_TABLE) 1593 return (0); 1594 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1595 return (1); 1596 return (0); 1597 } 1598 1599 void 1600 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1601 { 1602 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1603 return; 1604 pfr_detach_table(aw->p.tbl); 1605 aw->p.tbl = NULL; 1606 } 1607 1608 void 1609 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1610 { 1611 struct pfr_ktable *kt = aw->p.tbl; 1612 1613 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1614 return; 1615 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1616 kt = kt->pfrkt_root; 1617 aw->p.tbl = NULL; 1618 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1619 kt->pfrkt_cnt : -1; 1620 } 1621 1622 void 1623 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1624 { 1625 switch (af) { 1626 case AF_INET: { 1627 u_int32_t a = ntohl(addr->addr32[0]); 1628 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1629 (a>>8)&255, a&255); 1630 if (p) { 1631 p = ntohs(p); 1632 addlog(":%u", p); 1633 } 1634 break; 1635 } 1636 #ifdef INET6 1637 case AF_INET6: { 1638 u_int16_t b; 1639 u_int8_t i, curstart, curend, maxstart, maxend; 1640 curstart = curend = maxstart = maxend = 255; 1641 for (i = 0; i < 8; i++) { 1642 if (!addr->addr16[i]) { 1643 if (curstart == 255) 1644 curstart = i; 1645 curend = i; 1646 } else { 1647 if ((curend - curstart) > 1648 (maxend - maxstart)) { 1649 maxstart = curstart; 1650 maxend = curend; 1651 } 1652 curstart = curend = 255; 1653 } 1654 } 1655 if ((curend - curstart) > 1656 (maxend - maxstart)) { 1657 maxstart = curstart; 1658 maxend = curend; 1659 } 1660 for (i = 0; i < 8; i++) { 1661 if (i >= maxstart && i <= maxend) { 1662 if (i == 0) 1663 addlog(":"); 1664 if (i == maxend) 1665 addlog(":"); 1666 } else { 1667 b = ntohs(addr->addr16[i]); 1668 addlog("%x", b); 1669 if (i < 7) 1670 addlog(":"); 1671 } 1672 } 1673 if (p) { 1674 p = ntohs(p); 1675 addlog("[%u]", p); 1676 } 1677 break; 1678 } 1679 #endif /* INET6 */ 1680 } 1681 } 1682 1683 void 1684 pf_print_state(struct pf_state *s) 1685 { 1686 pf_print_state_parts(s, NULL, NULL); 1687 } 1688 1689 void 1690 pf_print_state_parts(struct pf_state *s, 1691 struct pf_state_key *skwp, struct pf_state_key *sksp) 1692 { 1693 struct pf_state_key *skw, *sks; 1694 u_int8_t proto, dir; 1695 1696 /* Do our best to fill these, but they're skipped if NULL */ 1697 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1698 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1699 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1700 dir = s ? s->direction : 0; 1701 1702 switch (proto) { 1703 case IPPROTO_IPV4: 1704 addlog("IPv4"); 1705 break; 1706 case IPPROTO_IPV6: 1707 addlog("IPv6"); 1708 break; 1709 case IPPROTO_TCP: 1710 addlog("TCP"); 1711 break; 1712 case IPPROTO_UDP: 1713 addlog("UDP"); 1714 break; 1715 case IPPROTO_ICMP: 1716 addlog("ICMP"); 1717 break; 1718 case IPPROTO_ICMPV6: 1719 addlog("ICMPv6"); 1720 break; 1721 default: 1722 addlog("%u", proto); 1723 break; 1724 } 1725 switch (dir) { 1726 case PF_IN: 1727 addlog(" in"); 1728 break; 1729 case PF_OUT: 1730 addlog(" out"); 1731 break; 1732 } 1733 if (skw) { 1734 addlog(" wire: (%d) ", skw->rdomain); 1735 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1736 addlog(" "); 1737 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1738 } 1739 if (sks) { 1740 addlog(" stack: (%d) ", sks->rdomain); 1741 if (sks != skw) { 1742 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1743 addlog(" "); 1744 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1745 } else 1746 addlog("-"); 1747 } 1748 if (s) { 1749 if (proto == IPPROTO_TCP) { 1750 addlog(" [lo=%u high=%u win=%u modulator=%u", 1751 s->src.seqlo, s->src.seqhi, 1752 s->src.max_win, s->src.seqdiff); 1753 if (s->src.wscale && s->dst.wscale) 1754 addlog(" wscale=%u", 1755 s->src.wscale & PF_WSCALE_MASK); 1756 addlog("]"); 1757 addlog(" [lo=%u high=%u win=%u modulator=%u", 1758 s->dst.seqlo, s->dst.seqhi, 1759 s->dst.max_win, s->dst.seqdiff); 1760 if (s->src.wscale && s->dst.wscale) 1761 addlog(" wscale=%u", 1762 s->dst.wscale & PF_WSCALE_MASK); 1763 addlog("]"); 1764 } 1765 addlog(" %u:%u", s->src.state, s->dst.state); 1766 if (s->rule.ptr) 1767 addlog(" @%d", s->rule.ptr->nr); 1768 } 1769 } 1770 1771 void 1772 pf_print_flags(u_int8_t f) 1773 { 1774 if (f) 1775 addlog(" "); 1776 if (f & TH_FIN) 1777 addlog("F"); 1778 if (f & TH_SYN) 1779 addlog("S"); 1780 if (f & TH_RST) 1781 addlog("R"); 1782 if (f & TH_PUSH) 1783 addlog("P"); 1784 if (f & TH_ACK) 1785 addlog("A"); 1786 if (f & TH_URG) 1787 addlog("U"); 1788 if (f & TH_ECE) 1789 addlog("E"); 1790 if (f & TH_CWR) 1791 addlog("W"); 1792 } 1793 1794 #define PF_SET_SKIP_STEPS(i) \ 1795 do { \ 1796 while (head[i] != cur) { \ 1797 head[i]->skip[i].ptr = cur; \ 1798 head[i] = TAILQ_NEXT(head[i], entries); \ 1799 } \ 1800 } while (0) 1801 1802 void 1803 pf_calc_skip_steps(struct pf_rulequeue *rules) 1804 { 1805 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1806 int i; 1807 1808 cur = TAILQ_FIRST(rules); 1809 prev = cur; 1810 for (i = 0; i < PF_SKIP_COUNT; ++i) 1811 head[i] = cur; 1812 while (cur != NULL) { 1813 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1814 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1815 if (cur->direction != prev->direction) 1816 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1817 if (cur->onrdomain != prev->onrdomain || 1818 cur->ifnot != prev->ifnot) 1819 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1820 if (cur->af != prev->af) 1821 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1822 if (cur->proto != prev->proto) 1823 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1824 if (cur->src.neg != prev->src.neg || 1825 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1826 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1827 if (cur->dst.neg != prev->dst.neg || 1828 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1829 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1830 if (cur->src.port[0] != prev->src.port[0] || 1831 cur->src.port[1] != prev->src.port[1] || 1832 cur->src.port_op != prev->src.port_op) 1833 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1834 if (cur->dst.port[0] != prev->dst.port[0] || 1835 cur->dst.port[1] != prev->dst.port[1] || 1836 cur->dst.port_op != prev->dst.port_op) 1837 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1838 1839 prev = cur; 1840 cur = TAILQ_NEXT(cur, entries); 1841 } 1842 for (i = 0; i < PF_SKIP_COUNT; ++i) 1843 PF_SET_SKIP_STEPS(i); 1844 } 1845 1846 int 1847 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1848 { 1849 if (aw1->type != aw2->type) 1850 return (1); 1851 switch (aw1->type) { 1852 case PF_ADDR_ADDRMASK: 1853 case PF_ADDR_RANGE: 1854 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1855 return (1); 1856 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1857 return (1); 1858 return (0); 1859 case PF_ADDR_DYNIFTL: 1860 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1861 case PF_ADDR_NONE: 1862 case PF_ADDR_NOROUTE: 1863 case PF_ADDR_URPFFAILED: 1864 return (0); 1865 case PF_ADDR_TABLE: 1866 return (aw1->p.tbl != aw2->p.tbl); 1867 case PF_ADDR_RTLABEL: 1868 return (aw1->v.rtlabel != aw2->v.rtlabel); 1869 default: 1870 addlog("invalid address type: %d\n", aw1->type); 1871 return (1); 1872 } 1873 } 1874 1875 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 1876 * emulate at most one ones-complement subtraction. This thereby limits net 1877 * carries/borrows to at most one, eliminating a reduction step and saving one 1878 * each of +, >>, & and ~. 1879 * 1880 * def. x mod y = x - (x//y)*y for integer x,y 1881 * def. sum = x mod 2^16 1882 * def. accumulator = (x >> 16) mod 2^16 1883 * 1884 * The trick works as follows: subtracting exactly one u_int16_t from the 1885 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 1886 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 1887 * ones-complement borrow: 1888 * 1889 * (sum + accumulator) mod 2^16 1890 * = { assume underflow: accumulator := 2^16 - 1 } 1891 * (sum + 2^16 - 1) mod 2^16 1892 * = { mod } 1893 * (sum - 1) mod 2^16 1894 * 1895 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 1896 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 1897 * to zero as that requires subtraction of at least 2^16, which exceeds a 1898 * single u_int16_t's range. 1899 * 1900 * We use the following theorem to derive the implementation: 1901 * 1902 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 1903 * proof. 1904 * (x + (y mod z)) mod z 1905 * = { def mod } 1906 * (x + y - (y//z)*z) mod z 1907 * = { (a + b*c) mod c = a mod c } 1908 * (x + y) mod z [end of proof] 1909 * 1910 * ... and thereby obtain: 1911 * 1912 * (sum + accumulator) mod 2^16 1913 * = { def. accumulator, def. sum } 1914 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 1915 * = { (0), twice } 1916 * (x + (x >> 16)) mod 2^16 1917 * = { x mod 2^n = x & (2^n - 1) } 1918 * (x + (x >> 16)) & 0xffff 1919 * 1920 * Note: this serves also as a reduction step for at most one add (as the 1921 * trailing mod 2^16 prevents further reductions by destroying carries). 1922 */ 1923 static __inline void 1924 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 1925 u_int8_t proto) 1926 { 1927 u_int32_t x; 1928 const int udp = proto == IPPROTO_UDP; 1929 1930 x = *cksum + was - now; 1931 x = (x + (x >> 16)) & 0xffff; 1932 1933 /* optimise: eliminate a branch when not udp */ 1934 if (udp && *cksum == 0x0000) 1935 return; 1936 if (udp && x == 0x0000) 1937 x = 0xffff; 1938 1939 *cksum = (u_int16_t)(x); 1940 } 1941 1942 #ifdef INET6 1943 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 1944 static __inline void 1945 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 1946 { 1947 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 1948 } 1949 1950 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 1951 static __inline void 1952 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 1953 { 1954 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 1955 } 1956 #endif /* INET6 */ 1957 1958 /* pre: *a is 16-bit aligned within its packet 1959 * 1960 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 1961 * machine by conserving ones-complement's otherwise discarded carries in the 1962 * upper bits of x. These accumulated carries when added to the lower 16-bits 1963 * over at least zero 'reduction' steps then complete the ones-complement sum. 1964 * 1965 * def. sum = x mod 2^16 1966 * def. accumulator = (x >> 16) 1967 * 1968 * At most two reduction steps 1969 * 1970 * x := sum + accumulator 1971 * = { def sum, def accumulator } 1972 * x := x mod 2^16 + (x >> 16) 1973 * = { x mod 2^n = x & (2^n - 1) } 1974 * x := (x & 0xffff) + (x >> 16) 1975 * 1976 * are necessary to incorporate the accumulated carries (at most one per add) 1977 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 1978 * 1979 * The function is also invariant over the endian of the host. Why? 1980 * 1981 * Define the unary transpose operator ~ on a bitstring in python slice 1982 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 1983 * 1984 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 1985 * 1986 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 1987 * 1988 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 1989 * 'half-adds'. Under ones-complement addition, each half-add carries to the 1990 * other, so the sum of each half-add is unaffected by their relative 1991 * order. Therefore: 1992 * 1993 * ~m +_1 ~n 1994 * = { half-adds invariant under transposition } 1995 * ~s 1996 * = { substitute } 1997 * ~(m +_1 n) [end of proof] 1998 * 1999 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 2000 * with the converse endian does not alter the result. 2001 * 2002 * proof. 2003 * { converse machine endian: load/store transposes, P := 8 } 2004 * ~(~m +_1 ~n) 2005 * = { ~ over +_1 } 2006 * ~~m +_1 ~~n 2007 * = { ~ is an involution } 2008 * m +_1 n [end of proof] 2009 * 2010 */ 2011 #define NEG(x) ((u_int16_t)~(x)) 2012 void 2013 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 2014 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 2015 { 2016 u_int32_t x; 2017 const u_int16_t *n = an->addr16; 2018 const u_int16_t *o = a->addr16; 2019 const int udp = proto == IPPROTO_UDP; 2020 2021 switch (af) { 2022 case AF_INET: 2023 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 2024 break; 2025 #ifdef INET6 2026 case AF_INET6: 2027 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 2028 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 2029 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 2030 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 2031 break; 2032 #endif /* INET6 */ 2033 default: 2034 unhandled_af(af); 2035 } 2036 2037 x = (x & 0xffff) + (x >> 16); 2038 x = (x & 0xffff) + (x >> 16); 2039 2040 /* optimise: eliminate a branch when not udp */ 2041 if (udp && *cksum == 0x0000) 2042 return; 2043 if (udp && x == 0x0000) 2044 x = 0xffff; 2045 2046 *cksum = (u_int16_t)(x); 2047 } 2048 2049 int 2050 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 2051 { 2052 int rewrite = 0; 2053 2054 if (*f != v) { 2055 u_int16_t old = htons(hi ? (*f << 8) : *f); 2056 u_int16_t new = htons(hi ? ( v << 8) : v); 2057 2058 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 2059 *f = v; 2060 rewrite = 1; 2061 } 2062 2063 return (rewrite); 2064 } 2065 2066 /* pre: *f is 16-bit aligned within its packet */ 2067 int 2068 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 2069 { 2070 int rewrite = 0; 2071 2072 if (*f != v) { 2073 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 2074 *f = v; 2075 rewrite = 1; 2076 } 2077 2078 return (rewrite); 2079 } 2080 2081 int 2082 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 2083 { 2084 int rewrite = 0; 2085 u_int8_t *fb = (u_int8_t*)f; 2086 u_int8_t *vb = (u_int8_t*)&v; 2087 2088 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 2089 return (pf_patch_16(pd, f, v)); /* optimise */ 2090 } 2091 2092 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2093 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2094 2095 return (rewrite); 2096 } 2097 2098 /* pre: *f is 16-bit aligned within its packet */ 2099 /* pre: pd->proto != IPPROTO_UDP */ 2100 int 2101 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2102 { 2103 int rewrite = 0; 2104 u_int16_t *pc = pd->pcksum; 2105 u_int8_t proto = pd->proto; 2106 2107 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2108 if (proto == IPPROTO_UDP) 2109 panic("%s: udp", __func__); 2110 2111 /* optimise: skip *f != v guard; true for all use-cases */ 2112 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2113 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2114 2115 *f = v; 2116 rewrite = 1; 2117 2118 return (rewrite); 2119 } 2120 2121 int 2122 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2123 { 2124 int rewrite = 0; 2125 u_int8_t *fb = (u_int8_t*)f; 2126 u_int8_t *vb = (u_int8_t*)&v; 2127 2128 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2129 return (pf_patch_32(pd, f, v)); /* optimise */ 2130 } 2131 2132 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2133 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2134 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2135 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2136 2137 return (rewrite); 2138 } 2139 2140 int 2141 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2142 u_int16_t *virtual_id, u_int16_t *virtual_type) 2143 { 2144 /* 2145 * ICMP types marked with PF_OUT are typically responses to 2146 * PF_IN, and will match states in the opposite direction. 2147 * PF_IN ICMP types need to match a state with that type. 2148 */ 2149 *icmp_dir = PF_OUT; 2150 2151 /* Queries (and responses) */ 2152 switch (pd->af) { 2153 case AF_INET: 2154 switch (type) { 2155 case ICMP_ECHO: 2156 *icmp_dir = PF_IN; 2157 /* FALLTHROUGH */ 2158 case ICMP_ECHOREPLY: 2159 *virtual_type = ICMP_ECHO; 2160 *virtual_id = pd->hdr.icmp.icmp_id; 2161 break; 2162 2163 case ICMP_TSTAMP: 2164 *icmp_dir = PF_IN; 2165 /* FALLTHROUGH */ 2166 case ICMP_TSTAMPREPLY: 2167 *virtual_type = ICMP_TSTAMP; 2168 *virtual_id = pd->hdr.icmp.icmp_id; 2169 break; 2170 2171 case ICMP_IREQ: 2172 *icmp_dir = PF_IN; 2173 /* FALLTHROUGH */ 2174 case ICMP_IREQREPLY: 2175 *virtual_type = ICMP_IREQ; 2176 *virtual_id = pd->hdr.icmp.icmp_id; 2177 break; 2178 2179 case ICMP_MASKREQ: 2180 *icmp_dir = PF_IN; 2181 /* FALLTHROUGH */ 2182 case ICMP_MASKREPLY: 2183 *virtual_type = ICMP_MASKREQ; 2184 *virtual_id = pd->hdr.icmp.icmp_id; 2185 break; 2186 2187 case ICMP_IPV6_WHEREAREYOU: 2188 *icmp_dir = PF_IN; 2189 /* FALLTHROUGH */ 2190 case ICMP_IPV6_IAMHERE: 2191 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2192 *virtual_id = 0; /* Nothing sane to match on! */ 2193 break; 2194 2195 case ICMP_MOBILE_REGREQUEST: 2196 *icmp_dir = PF_IN; 2197 /* FALLTHROUGH */ 2198 case ICMP_MOBILE_REGREPLY: 2199 *virtual_type = ICMP_MOBILE_REGREQUEST; 2200 *virtual_id = 0; /* Nothing sane to match on! */ 2201 break; 2202 2203 case ICMP_ROUTERSOLICIT: 2204 *icmp_dir = PF_IN; 2205 /* FALLTHROUGH */ 2206 case ICMP_ROUTERADVERT: 2207 *virtual_type = ICMP_ROUTERSOLICIT; 2208 *virtual_id = 0; /* Nothing sane to match on! */ 2209 break; 2210 2211 /* These ICMP types map to other connections */ 2212 case ICMP_UNREACH: 2213 case ICMP_SOURCEQUENCH: 2214 case ICMP_REDIRECT: 2215 case ICMP_TIMXCEED: 2216 case ICMP_PARAMPROB: 2217 /* These will not be used, but set them anyway */ 2218 *icmp_dir = PF_IN; 2219 *virtual_type = htons(type); 2220 *virtual_id = 0; 2221 return (1); /* These types match to another state */ 2222 2223 /* 2224 * All remaining ICMP types get their own states, 2225 * and will only match in one direction. 2226 */ 2227 default: 2228 *icmp_dir = PF_IN; 2229 *virtual_type = type; 2230 *virtual_id = 0; 2231 break; 2232 } 2233 break; 2234 #ifdef INET6 2235 case AF_INET6: 2236 switch (type) { 2237 case ICMP6_ECHO_REQUEST: 2238 *icmp_dir = PF_IN; 2239 /* FALLTHROUGH */ 2240 case ICMP6_ECHO_REPLY: 2241 *virtual_type = ICMP6_ECHO_REQUEST; 2242 *virtual_id = pd->hdr.icmp6.icmp6_id; 2243 break; 2244 2245 case MLD_LISTENER_QUERY: 2246 case MLD_LISTENER_REPORT: { 2247 struct mld_hdr *mld = &pd->hdr.mld; 2248 u_int32_t h; 2249 2250 /* 2251 * Listener Report can be sent by clients 2252 * without an associated Listener Query. 2253 * In addition to that, when Report is sent as a 2254 * reply to a Query its source and destination 2255 * address are different. 2256 */ 2257 *icmp_dir = PF_IN; 2258 *virtual_type = MLD_LISTENER_QUERY; 2259 /* generate fake id for these messages */ 2260 h = mld->mld_addr.s6_addr32[0] ^ 2261 mld->mld_addr.s6_addr32[1] ^ 2262 mld->mld_addr.s6_addr32[2] ^ 2263 mld->mld_addr.s6_addr32[3]; 2264 *virtual_id = (h >> 16) ^ (h & 0xffff); 2265 break; 2266 } 2267 2268 /* 2269 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2270 * ICMP6_WRU 2271 */ 2272 case ICMP6_WRUREQUEST: 2273 *icmp_dir = PF_IN; 2274 /* FALLTHROUGH */ 2275 case ICMP6_WRUREPLY: 2276 *virtual_type = ICMP6_WRUREQUEST; 2277 *virtual_id = 0; /* Nothing sane to match on! */ 2278 break; 2279 2280 case MLD_MTRACE: 2281 *icmp_dir = PF_IN; 2282 /* FALLTHROUGH */ 2283 case MLD_MTRACE_RESP: 2284 *virtual_type = MLD_MTRACE; 2285 *virtual_id = 0; /* Nothing sane to match on! */ 2286 break; 2287 2288 case ND_NEIGHBOR_SOLICIT: 2289 *icmp_dir = PF_IN; 2290 /* FALLTHROUGH */ 2291 case ND_NEIGHBOR_ADVERT: { 2292 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 2293 u_int32_t h; 2294 2295 *virtual_type = ND_NEIGHBOR_SOLICIT; 2296 /* generate fake id for these messages */ 2297 h = nd->nd_ns_target.s6_addr32[0] ^ 2298 nd->nd_ns_target.s6_addr32[1] ^ 2299 nd->nd_ns_target.s6_addr32[2] ^ 2300 nd->nd_ns_target.s6_addr32[3]; 2301 *virtual_id = (h >> 16) ^ (h & 0xffff); 2302 break; 2303 } 2304 2305 /* 2306 * These ICMP types map to other connections. 2307 * ND_REDIRECT can't be in this list because the triggering 2308 * packet header is optional. 2309 */ 2310 case ICMP6_DST_UNREACH: 2311 case ICMP6_PACKET_TOO_BIG: 2312 case ICMP6_TIME_EXCEEDED: 2313 case ICMP6_PARAM_PROB: 2314 /* These will not be used, but set them anyway */ 2315 *icmp_dir = PF_IN; 2316 *virtual_type = htons(type); 2317 *virtual_id = 0; 2318 return (1); /* These types match to another state */ 2319 /* 2320 * All remaining ICMP6 types get their own states, 2321 * and will only match in one direction. 2322 */ 2323 default: 2324 *icmp_dir = PF_IN; 2325 *virtual_type = type; 2326 *virtual_id = 0; 2327 break; 2328 } 2329 break; 2330 #endif /* INET6 */ 2331 } 2332 *virtual_type = htons(*virtual_type); 2333 return (0); /* These types match to their own state */ 2334 } 2335 2336 void 2337 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2338 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2339 { 2340 /* note: doesn't trouble to fixup quoted checksums, if any */ 2341 2342 /* change quoted protocol port */ 2343 if (qp != NULL) 2344 pf_patch_16(pd, qp, np); 2345 2346 /* change quoted ip address */ 2347 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2348 pf_addrcpy(qa, na, pd->af); 2349 2350 /* change network-header's ip address */ 2351 if (oa) 2352 pf_translate_a(pd, oa, na); 2353 } 2354 2355 /* pre: *a is 16-bit aligned within its packet */ 2356 /* *a is a network header src/dst address */ 2357 int 2358 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2359 { 2360 int rewrite = 0; 2361 2362 /* warning: !PF_ANEQ != PF_AEQ */ 2363 if (!PF_ANEQ(a, an, pd->af)) 2364 return (0); 2365 2366 /* fixup transport pseudo-header, if any */ 2367 switch (pd->proto) { 2368 case IPPROTO_TCP: /* FALLTHROUGH */ 2369 case IPPROTO_UDP: /* FALLTHROUGH */ 2370 case IPPROTO_ICMPV6: 2371 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2372 break; 2373 default: 2374 break; /* assume no pseudo-header */ 2375 } 2376 2377 pf_addrcpy(a, an, pd->af); 2378 rewrite = 1; 2379 2380 return (rewrite); 2381 } 2382 2383 #ifdef INET6 2384 /* pf_translate_af() may change pd->m, adjust local copies after calling */ 2385 int 2386 pf_translate_af(struct pf_pdesc *pd) 2387 { 2388 static const struct pf_addr zero; 2389 struct ip *ip4; 2390 struct ip6_hdr *ip6; 2391 int copyback = 0; 2392 u_int hlen, ohlen, dlen; 2393 u_int16_t *pc; 2394 u_int8_t af_proto, naf_proto; 2395 2396 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2397 ohlen = pd->off; 2398 dlen = pd->tot_len - pd->off; 2399 pc = pd->pcksum; 2400 2401 af_proto = naf_proto = pd->proto; 2402 if (naf_proto == IPPROTO_ICMP) 2403 af_proto = IPPROTO_ICMPV6; 2404 if (naf_proto == IPPROTO_ICMPV6) 2405 af_proto = IPPROTO_ICMP; 2406 2407 /* uncover stale pseudo-header */ 2408 switch (af_proto) { 2409 case IPPROTO_ICMPV6: 2410 /* optimise: unchanged for TCP/UDP */ 2411 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2412 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2413 /* FALLTHROUGH */ 2414 case IPPROTO_UDP: /* FALLTHROUGH */ 2415 case IPPROTO_TCP: 2416 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2417 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2418 copyback = 1; 2419 break; 2420 default: 2421 break; /* assume no pseudo-header */ 2422 } 2423 2424 /* replace the network header */ 2425 m_adj(pd->m, pd->off); 2426 pd->src = NULL; 2427 pd->dst = NULL; 2428 2429 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2430 pd->m = NULL; 2431 return (-1); 2432 } 2433 2434 pd->off = hlen; 2435 pd->tot_len += hlen - ohlen; 2436 2437 switch (pd->naf) { 2438 case AF_INET: 2439 ip4 = mtod(pd->m, struct ip *); 2440 memset(ip4, 0, hlen); 2441 ip4->ip_v = IPVERSION; 2442 ip4->ip_hl = hlen >> 2; 2443 ip4->ip_tos = pd->tos; 2444 ip4->ip_len = htons(hlen + dlen); 2445 ip4->ip_id = htons(ip_randomid()); 2446 ip4->ip_off = htons(IP_DF); 2447 ip4->ip_ttl = pd->ttl; 2448 ip4->ip_p = pd->proto; 2449 ip4->ip_src = pd->nsaddr.v4; 2450 ip4->ip_dst = pd->ndaddr.v4; 2451 break; 2452 case AF_INET6: 2453 ip6 = mtod(pd->m, struct ip6_hdr *); 2454 memset(ip6, 0, hlen); 2455 ip6->ip6_vfc = IPV6_VERSION; 2456 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2457 ip6->ip6_plen = htons(dlen); 2458 ip6->ip6_nxt = pd->proto; 2459 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2460 ip6->ip6_hlim = IPV6_DEFHLIM; 2461 else 2462 ip6->ip6_hlim = pd->ttl; 2463 ip6->ip6_src = pd->nsaddr.v6; 2464 ip6->ip6_dst = pd->ndaddr.v6; 2465 break; 2466 default: 2467 unhandled_af(pd->naf); 2468 } 2469 2470 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2471 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2472 pd->naf == AF_INET6) { 2473 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2474 } 2475 2476 /* cover fresh pseudo-header */ 2477 switch (naf_proto) { 2478 case IPPROTO_ICMPV6: 2479 /* optimise: unchanged for TCP/UDP */ 2480 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2481 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2482 /* FALLTHROUGH */ 2483 case IPPROTO_UDP: /* FALLTHROUGH */ 2484 case IPPROTO_TCP: 2485 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2486 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2487 copyback = 1; 2488 break; 2489 default: 2490 break; /* assume no pseudo-header */ 2491 } 2492 2493 /* flush pd->pcksum */ 2494 if (copyback) 2495 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 2496 2497 return (0); 2498 } 2499 2500 int 2501 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2502 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2503 sa_family_t af, sa_family_t naf) 2504 { 2505 struct mbuf *n = NULL; 2506 struct ip *ip4; 2507 struct ip6_hdr *ip6; 2508 u_int hlen, ohlen, dlen; 2509 int d; 2510 2511 if (af == naf || (af != AF_INET && af != AF_INET6) || 2512 (naf != AF_INET && naf != AF_INET6)) 2513 return (-1); 2514 2515 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2516 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2517 return (-1); 2518 2519 /* new quoted header */ 2520 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2521 /* old quoted header */ 2522 ohlen = pd2->off - ipoff2; 2523 2524 /* trim old quoted header */ 2525 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2526 m_adj(n, ohlen); 2527 2528 /* prepend a new, translated, quoted header */ 2529 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2530 return (-1); 2531 2532 switch (naf) { 2533 case AF_INET: 2534 ip4 = mtod(n, struct ip *); 2535 memset(ip4, 0, sizeof(*ip4)); 2536 ip4->ip_v = IPVERSION; 2537 ip4->ip_hl = sizeof(*ip4) >> 2; 2538 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2539 ip4->ip_id = htons(ip_randomid()); 2540 ip4->ip_off = htons(IP_DF); 2541 ip4->ip_ttl = pd2->ttl; 2542 if (pd2->proto == IPPROTO_ICMPV6) 2543 ip4->ip_p = IPPROTO_ICMP; 2544 else 2545 ip4->ip_p = pd2->proto; 2546 ip4->ip_src = src->v4; 2547 ip4->ip_dst = dst->v4; 2548 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2549 break; 2550 case AF_INET6: 2551 ip6 = mtod(n, struct ip6_hdr *); 2552 memset(ip6, 0, sizeof(*ip6)); 2553 ip6->ip6_vfc = IPV6_VERSION; 2554 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2555 if (pd2->proto == IPPROTO_ICMP) 2556 ip6->ip6_nxt = IPPROTO_ICMPV6; 2557 else 2558 ip6->ip6_nxt = pd2->proto; 2559 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2560 ip6->ip6_hlim = IPV6_DEFHLIM; 2561 else 2562 ip6->ip6_hlim = pd2->ttl; 2563 ip6->ip6_src = src->v6; 2564 ip6->ip6_dst = dst->v6; 2565 break; 2566 } 2567 2568 /* cover new quoted header */ 2569 /* optimise: any new AF_INET header of ours sums to zero */ 2570 if (naf != AF_INET) { 2571 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2572 } 2573 2574 /* reattach modified quoted packet to outer header */ 2575 { 2576 int nlen = n->m_pkthdr.len; 2577 m_cat(m, n); 2578 m->m_pkthdr.len += nlen; 2579 } 2580 2581 /* account for altered length */ 2582 d = hlen - ohlen; 2583 2584 if (pd->proto == IPPROTO_ICMPV6) { 2585 /* fixup pseudo-header */ 2586 dlen = pd->tot_len - pd->off; 2587 pf_cksum_fixup(pd->pcksum, 2588 htons(dlen), htons(dlen + d), pd->proto); 2589 } 2590 2591 pd->tot_len += d; 2592 pd2->tot_len += d; 2593 pd2->off += d; 2594 2595 /* note: not bothering to update network headers as 2596 these due for rewrite by pf_translate_af() */ 2597 2598 return (0); 2599 } 2600 2601 2602 #define PTR_IP(field) (offsetof(struct ip, field)) 2603 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2604 2605 int 2606 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 2607 { 2608 struct icmp *icmp4; 2609 struct icmp6_hdr *icmp6; 2610 u_int32_t mtu; 2611 int32_t ptr = -1; 2612 u_int8_t type; 2613 u_int8_t code; 2614 2615 switch (af) { 2616 case AF_INET: 2617 icmp6 = arg; 2618 type = icmp6->icmp6_type; 2619 code = icmp6->icmp6_code; 2620 mtu = ntohl(icmp6->icmp6_mtu); 2621 2622 switch (type) { 2623 case ICMP6_ECHO_REQUEST: 2624 type = ICMP_ECHO; 2625 break; 2626 case ICMP6_ECHO_REPLY: 2627 type = ICMP_ECHOREPLY; 2628 break; 2629 case ICMP6_DST_UNREACH: 2630 type = ICMP_UNREACH; 2631 switch (code) { 2632 case ICMP6_DST_UNREACH_NOROUTE: 2633 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2634 case ICMP6_DST_UNREACH_ADDR: 2635 code = ICMP_UNREACH_HOST; 2636 break; 2637 case ICMP6_DST_UNREACH_ADMIN: 2638 code = ICMP_UNREACH_HOST_PROHIB; 2639 break; 2640 case ICMP6_DST_UNREACH_NOPORT: 2641 code = ICMP_UNREACH_PORT; 2642 break; 2643 default: 2644 return (-1); 2645 } 2646 break; 2647 case ICMP6_PACKET_TOO_BIG: 2648 type = ICMP_UNREACH; 2649 code = ICMP_UNREACH_NEEDFRAG; 2650 mtu -= 20; 2651 break; 2652 case ICMP6_TIME_EXCEEDED: 2653 type = ICMP_TIMXCEED; 2654 break; 2655 case ICMP6_PARAM_PROB: 2656 switch (code) { 2657 case ICMP6_PARAMPROB_HEADER: 2658 type = ICMP_PARAMPROB; 2659 code = ICMP_PARAMPROB_ERRATPTR; 2660 ptr = ntohl(icmp6->icmp6_pptr); 2661 2662 if (ptr == PTR_IP6(ip6_vfc)) 2663 ; /* preserve */ 2664 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2665 ptr = PTR_IP(ip_tos); 2666 else if (ptr == PTR_IP6(ip6_plen) || 2667 ptr == PTR_IP6(ip6_plen) + 1) 2668 ptr = PTR_IP(ip_len); 2669 else if (ptr == PTR_IP6(ip6_nxt)) 2670 ptr = PTR_IP(ip_p); 2671 else if (ptr == PTR_IP6(ip6_hlim)) 2672 ptr = PTR_IP(ip_ttl); 2673 else if (ptr >= PTR_IP6(ip6_src) && 2674 ptr < PTR_IP6(ip6_dst)) 2675 ptr = PTR_IP(ip_src); 2676 else if (ptr >= PTR_IP6(ip6_dst) && 2677 ptr < sizeof(struct ip6_hdr)) 2678 ptr = PTR_IP(ip_dst); 2679 else { 2680 return (-1); 2681 } 2682 break; 2683 case ICMP6_PARAMPROB_NEXTHEADER: 2684 type = ICMP_UNREACH; 2685 code = ICMP_UNREACH_PROTOCOL; 2686 break; 2687 default: 2688 return (-1); 2689 } 2690 break; 2691 default: 2692 return (-1); 2693 } 2694 2695 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 2696 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 2697 2698 /* aligns well with a icmpv4 nextmtu */ 2699 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 2700 2701 /* icmpv4 pptr is a one most significant byte */ 2702 if (ptr >= 0) 2703 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 2704 break; 2705 case AF_INET6: 2706 icmp4 = arg; 2707 type = icmp4->icmp_type; 2708 code = icmp4->icmp_code; 2709 mtu = ntohs(icmp4->icmp_nextmtu); 2710 2711 switch (type) { 2712 case ICMP_ECHO: 2713 type = ICMP6_ECHO_REQUEST; 2714 break; 2715 case ICMP_ECHOREPLY: 2716 type = ICMP6_ECHO_REPLY; 2717 break; 2718 case ICMP_UNREACH: 2719 type = ICMP6_DST_UNREACH; 2720 switch (code) { 2721 case ICMP_UNREACH_NET: 2722 case ICMP_UNREACH_HOST: 2723 case ICMP_UNREACH_NET_UNKNOWN: 2724 case ICMP_UNREACH_HOST_UNKNOWN: 2725 case ICMP_UNREACH_ISOLATED: 2726 case ICMP_UNREACH_TOSNET: 2727 case ICMP_UNREACH_TOSHOST: 2728 code = ICMP6_DST_UNREACH_NOROUTE; 2729 break; 2730 case ICMP_UNREACH_PORT: 2731 code = ICMP6_DST_UNREACH_NOPORT; 2732 break; 2733 case ICMP_UNREACH_NET_PROHIB: 2734 case ICMP_UNREACH_HOST_PROHIB: 2735 case ICMP_UNREACH_FILTER_PROHIB: 2736 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2737 code = ICMP6_DST_UNREACH_ADMIN; 2738 break; 2739 case ICMP_UNREACH_PROTOCOL: 2740 type = ICMP6_PARAM_PROB; 2741 code = ICMP6_PARAMPROB_NEXTHEADER; 2742 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2743 break; 2744 case ICMP_UNREACH_NEEDFRAG: 2745 type = ICMP6_PACKET_TOO_BIG; 2746 code = 0; 2747 mtu += 20; 2748 break; 2749 default: 2750 return (-1); 2751 } 2752 break; 2753 case ICMP_TIMXCEED: 2754 type = ICMP6_TIME_EXCEEDED; 2755 break; 2756 case ICMP_PARAMPROB: 2757 type = ICMP6_PARAM_PROB; 2758 switch (code) { 2759 case ICMP_PARAMPROB_ERRATPTR: 2760 code = ICMP6_PARAMPROB_HEADER; 2761 break; 2762 case ICMP_PARAMPROB_LENGTH: 2763 code = ICMP6_PARAMPROB_HEADER; 2764 break; 2765 default: 2766 return (-1); 2767 } 2768 2769 ptr = icmp4->icmp_pptr; 2770 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2771 ; /* preserve */ 2772 else if (ptr == PTR_IP(ip_len) || 2773 ptr == PTR_IP(ip_len) + 1) 2774 ptr = PTR_IP6(ip6_plen); 2775 else if (ptr == PTR_IP(ip_ttl)) 2776 ptr = PTR_IP6(ip6_hlim); 2777 else if (ptr == PTR_IP(ip_p)) 2778 ptr = PTR_IP6(ip6_nxt); 2779 else if (ptr >= PTR_IP(ip_src) && 2780 ptr < PTR_IP(ip_dst)) 2781 ptr = PTR_IP6(ip6_src); 2782 else if (ptr >= PTR_IP(ip_dst) && 2783 ptr < sizeof(struct ip)) 2784 ptr = PTR_IP6(ip6_dst); 2785 else { 2786 return (-1); 2787 } 2788 break; 2789 default: 2790 return (-1); 2791 } 2792 2793 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 2794 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 2795 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 2796 if (ptr >= 0) 2797 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 2798 break; 2799 } 2800 2801 return (0); 2802 } 2803 #endif /* INET6 */ 2804 2805 /* 2806 * Need to modulate the sequence numbers in the TCP SACK option 2807 * (credits to Krzysztof Pfaff for report and patch) 2808 */ 2809 int 2810 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2811 { 2812 struct sackblk sack; 2813 int copyback = 0, i; 2814 int olen, optsoff; 2815 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 2816 2817 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 2818 optsoff = pd->off + sizeof(struct tcphdr); 2819 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 2820 if (olen < TCPOLEN_MINSACK || 2821 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) 2822 return (0); 2823 2824 eoh = opts + olen; 2825 opt = opts; 2826 while ((opt = pf_find_tcpopt(opt, opts, olen, 2827 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 2828 { 2829 size_t safelen = MIN(opt[1], (eoh - opt)); 2830 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 2831 size_t startoff = (opt + i) - opts; 2832 memcpy(&sack, &opt[i], sizeof(sack)); 2833 pf_patch_32_unaligned(pd, &sack.start, 2834 htonl(ntohl(sack.start) - dst->seqdiff), 2835 PF_ALGNMNT(startoff)); 2836 pf_patch_32_unaligned(pd, &sack.end, 2837 htonl(ntohl(sack.end) - dst->seqdiff), 2838 PF_ALGNMNT(startoff + sizeof(sack.start))); 2839 memcpy(&opt[i], &sack, sizeof(sack)); 2840 } 2841 copyback = 1; 2842 opt += opt[1]; 2843 } 2844 2845 if (copyback) 2846 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 2847 return (copyback); 2848 } 2849 2850 struct mbuf * 2851 pf_build_tcp(const struct pf_rule *r, sa_family_t af, 2852 const struct pf_addr *saddr, const struct pf_addr *daddr, 2853 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2854 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2855 u_int16_t rtag, u_int sack, u_int rdom) 2856 { 2857 struct mbuf *m; 2858 int len, tlen; 2859 struct ip *h; 2860 #ifdef INET6 2861 struct ip6_hdr *h6; 2862 #endif /* INET6 */ 2863 struct tcphdr *th; 2864 char *opt; 2865 2866 /* maximum segment size tcp option */ 2867 tlen = sizeof(struct tcphdr); 2868 if (mss) 2869 tlen += 4; 2870 if (sack) 2871 tlen += 2; 2872 2873 switch (af) { 2874 case AF_INET: 2875 len = sizeof(struct ip) + tlen; 2876 break; 2877 #ifdef INET6 2878 case AF_INET6: 2879 len = sizeof(struct ip6_hdr) + tlen; 2880 break; 2881 #endif /* INET6 */ 2882 default: 2883 unhandled_af(af); 2884 } 2885 2886 /* create outgoing mbuf */ 2887 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2888 if (m == NULL) 2889 return (NULL); 2890 if (tag) 2891 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2892 m->m_pkthdr.pf.tag = rtag; 2893 m->m_pkthdr.ph_rtableid = rdom; 2894 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2895 m->m_pkthdr.pf.prio = r->set_prio[0]; 2896 if (r && r->qid) 2897 m->m_pkthdr.pf.qid = r->qid; 2898 m->m_data += max_linkhdr; 2899 m->m_pkthdr.len = m->m_len = len; 2900 m->m_pkthdr.ph_ifidx = 0; 2901 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 2902 memset(m->m_data, 0, len); 2903 switch (af) { 2904 case AF_INET: 2905 h = mtod(m, struct ip *); 2906 h->ip_p = IPPROTO_TCP; 2907 h->ip_len = htons(tlen); 2908 h->ip_v = 4; 2909 h->ip_hl = sizeof(*h) >> 2; 2910 h->ip_tos = IPTOS_LOWDELAY; 2911 h->ip_len = htons(len); 2912 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2913 h->ip_ttl = ttl ? ttl : ip_defttl; 2914 h->ip_sum = 0; 2915 h->ip_src.s_addr = saddr->v4.s_addr; 2916 h->ip_dst.s_addr = daddr->v4.s_addr; 2917 2918 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2919 break; 2920 #ifdef INET6 2921 case AF_INET6: 2922 h6 = mtod(m, struct ip6_hdr *); 2923 h6->ip6_nxt = IPPROTO_TCP; 2924 h6->ip6_plen = htons(tlen); 2925 h6->ip6_vfc |= IPV6_VERSION; 2926 h6->ip6_hlim = IPV6_DEFHLIM; 2927 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2928 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2929 2930 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2931 break; 2932 #endif /* INET6 */ 2933 default: 2934 unhandled_af(af); 2935 } 2936 2937 /* TCP header */ 2938 th->th_sport = sport; 2939 th->th_dport = dport; 2940 th->th_seq = htonl(seq); 2941 th->th_ack = htonl(ack); 2942 th->th_off = tlen >> 2; 2943 th->th_flags = flags; 2944 th->th_win = htons(win); 2945 2946 opt = (char *)(th + 1); 2947 if (mss) { 2948 opt[0] = TCPOPT_MAXSEG; 2949 opt[1] = 4; 2950 mss = htons(mss); 2951 memcpy((opt + 2), &mss, 2); 2952 opt += 4; 2953 } 2954 if (sack) { 2955 opt[0] = TCPOPT_SACK_PERMITTED; 2956 opt[1] = 2; 2957 opt += 2; 2958 } 2959 2960 return (m); 2961 } 2962 2963 void 2964 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2965 const struct pf_addr *saddr, const struct pf_addr *daddr, 2966 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2967 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2968 u_int16_t rtag, u_int rdom) 2969 { 2970 struct mbuf *m; 2971 2972 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 2973 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL) 2974 return; 2975 2976 switch (af) { 2977 case AF_INET: 2978 ip_send(m); 2979 break; 2980 #ifdef INET6 2981 case AF_INET6: 2982 ip6_send(m); 2983 break; 2984 #endif /* INET6 */ 2985 } 2986 } 2987 2988 static void 2989 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *s, 2990 struct pf_state_peer *src, struct pf_state_peer *dst) 2991 { 2992 /* 2993 * We are sending challenge ACK as a response to SYN packet, which 2994 * matches existing state (modulo TCP window check). Therefore packet 2995 * must be sent on behalf of destination. 2996 * 2997 * We expect sender to remain either silent, or send RST packet 2998 * so both, firewall and remote peer, can purge dead state from 2999 * memory. 3000 */ 3001 pf_send_tcp(s->rule.ptr, pd->af, pd->dst, pd->src, 3002 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 3003 src->seqlo, TH_ACK, 0, 0, s->rule.ptr->return_ttl, 1, 0, 3004 pd->rdomain); 3005 } 3006 3007 void 3008 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 3009 sa_family_t af, struct pf_rule *r, u_int rdomain) 3010 { 3011 struct mbuf *m0; 3012 3013 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 3014 return; 3015 3016 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 3017 m0->m_pkthdr.ph_rtableid = rdomain; 3018 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 3019 m0->m_pkthdr.pf.prio = r->set_prio[0]; 3020 if (r && r->qid) 3021 m0->m_pkthdr.pf.qid = r->qid; 3022 3023 switch (af) { 3024 case AF_INET: 3025 icmp_error(m0, type, code, 0, param); 3026 break; 3027 #ifdef INET6 3028 case AF_INET6: 3029 icmp6_error(m0, type, code, param); 3030 break; 3031 #endif /* INET6 */ 3032 } 3033 } 3034 3035 /* 3036 * Return ((n = 0) == (a = b [with mask m])) 3037 * Note: n != 0 => returns (a != b [with mask m]) 3038 */ 3039 int 3040 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 3041 struct pf_addr *b, sa_family_t af) 3042 { 3043 switch (af) { 3044 case AF_INET: 3045 if ((a->addr32[0] & m->addr32[0]) == 3046 (b->addr32[0] & m->addr32[0])) 3047 return (n == 0); 3048 break; 3049 #ifdef INET6 3050 case AF_INET6: 3051 if (((a->addr32[0] & m->addr32[0]) == 3052 (b->addr32[0] & m->addr32[0])) && 3053 ((a->addr32[1] & m->addr32[1]) == 3054 (b->addr32[1] & m->addr32[1])) && 3055 ((a->addr32[2] & m->addr32[2]) == 3056 (b->addr32[2] & m->addr32[2])) && 3057 ((a->addr32[3] & m->addr32[3]) == 3058 (b->addr32[3] & m->addr32[3]))) 3059 return (n == 0); 3060 break; 3061 #endif /* INET6 */ 3062 } 3063 3064 return (n != 0); 3065 } 3066 3067 /* 3068 * Return 1 if b <= a <= e, otherwise return 0. 3069 */ 3070 int 3071 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 3072 struct pf_addr *a, sa_family_t af) 3073 { 3074 switch (af) { 3075 case AF_INET: 3076 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 3077 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 3078 return (0); 3079 break; 3080 #ifdef INET6 3081 case AF_INET6: { 3082 int i; 3083 3084 /* check a >= b */ 3085 for (i = 0; i < 4; ++i) 3086 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 3087 break; 3088 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 3089 return (0); 3090 /* check a <= e */ 3091 for (i = 0; i < 4; ++i) 3092 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 3093 break; 3094 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3095 return (0); 3096 break; 3097 } 3098 #endif /* INET6 */ 3099 } 3100 return (1); 3101 } 3102 3103 int 3104 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3105 { 3106 switch (op) { 3107 case PF_OP_IRG: 3108 return ((p > a1) && (p < a2)); 3109 case PF_OP_XRG: 3110 return ((p < a1) || (p > a2)); 3111 case PF_OP_RRG: 3112 return ((p >= a1) && (p <= a2)); 3113 case PF_OP_EQ: 3114 return (p == a1); 3115 case PF_OP_NE: 3116 return (p != a1); 3117 case PF_OP_LT: 3118 return (p < a1); 3119 case PF_OP_LE: 3120 return (p <= a1); 3121 case PF_OP_GT: 3122 return (p > a1); 3123 case PF_OP_GE: 3124 return (p >= a1); 3125 } 3126 return (0); /* never reached */ 3127 } 3128 3129 int 3130 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3131 { 3132 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3133 } 3134 3135 int 3136 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3137 { 3138 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3139 return (0); 3140 return (pf_match(op, a1, a2, u)); 3141 } 3142 3143 int 3144 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3145 { 3146 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3147 return (0); 3148 return (pf_match(op, a1, a2, g)); 3149 } 3150 3151 int 3152 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3153 { 3154 if (*tag == -1) 3155 *tag = m->m_pkthdr.pf.tag; 3156 3157 return ((!r->match_tag_not && r->match_tag == *tag) || 3158 (r->match_tag_not && r->match_tag != *tag)); 3159 } 3160 3161 int 3162 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3163 { 3164 struct ifnet *ifp; 3165 #if NCARP > 0 3166 struct ifnet *ifp0; 3167 #endif 3168 struct pfi_kif *kif; 3169 3170 ifp = if_get(m->m_pkthdr.ph_ifidx); 3171 if (ifp == NULL) 3172 return (0); 3173 3174 #if NCARP > 0 3175 if (ifp->if_type == IFT_CARP && 3176 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 3177 kif = (struct pfi_kif *)ifp0->if_pf_kif; 3178 if_put(ifp0); 3179 } else 3180 #endif /* NCARP */ 3181 kif = (struct pfi_kif *)ifp->if_pf_kif; 3182 3183 if_put(ifp); 3184 3185 if (kif == NULL) { 3186 DPFPRINTF(LOG_ERR, 3187 "%s: kif == NULL, @%d via %s", __func__, 3188 r->nr, r->rcv_ifname); 3189 return (0); 3190 } 3191 3192 return (pfi_kif_match(r->rcv_kif, kif)); 3193 } 3194 3195 void 3196 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3197 { 3198 if (tag > 0) 3199 m->m_pkthdr.pf.tag = tag; 3200 if (rtableid >= 0) 3201 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3202 } 3203 3204 enum pf_test_status 3205 pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) 3206 { 3207 int rv; 3208 3209 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 3210 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 3211 return (PF_TEST_FAIL); 3212 } 3213 3214 ctx->depth++; 3215 3216 if (r->anchor_wildcard) { 3217 struct pf_anchor *child; 3218 rv = PF_TEST_OK; 3219 RB_FOREACH(child, pf_anchor_node, &r->anchor->children) { 3220 rv = pf_match_rule(ctx, &child->ruleset); 3221 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 3222 /* 3223 * we either hit a rule with quick action 3224 * (more likely), or hit some runtime 3225 * error (e.g. pool_get() failure). 3226 */ 3227 break; 3228 } 3229 } 3230 } else { 3231 rv = pf_match_rule(ctx, &r->anchor->ruleset); 3232 /* 3233 * Unless errors occurred, stop iff any rule matched 3234 * within quick anchors. 3235 */ 3236 if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && 3237 *ctx->am == r) 3238 rv = PF_TEST_QUICK; 3239 } 3240 3241 ctx->depth--; 3242 3243 return (rv); 3244 } 3245 3246 void 3247 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3248 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3249 { 3250 switch (af) { 3251 case AF_INET: 3252 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3253 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3254 break; 3255 #ifdef INET6 3256 case AF_INET6: 3257 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3258 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3259 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3260 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3261 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3262 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3263 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3264 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3265 break; 3266 #endif /* INET6 */ 3267 default: 3268 unhandled_af(af); 3269 } 3270 } 3271 3272 void 3273 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3274 { 3275 switch (af) { 3276 case AF_INET: 3277 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3278 break; 3279 #ifdef INET6 3280 case AF_INET6: 3281 if (addr->addr32[3] == 0xffffffff) { 3282 addr->addr32[3] = 0; 3283 if (addr->addr32[2] == 0xffffffff) { 3284 addr->addr32[2] = 0; 3285 if (addr->addr32[1] == 0xffffffff) { 3286 addr->addr32[1] = 0; 3287 addr->addr32[0] = 3288 htonl(ntohl(addr->addr32[0]) + 1); 3289 } else 3290 addr->addr32[1] = 3291 htonl(ntohl(addr->addr32[1]) + 1); 3292 } else 3293 addr->addr32[2] = 3294 htonl(ntohl(addr->addr32[2]) + 1); 3295 } else 3296 addr->addr32[3] = 3297 htonl(ntohl(addr->addr32[3]) + 1); 3298 break; 3299 #endif /* INET6 */ 3300 default: 3301 unhandled_af(af); 3302 } 3303 } 3304 3305 int 3306 pf_socket_lookup(struct pf_pdesc *pd) 3307 { 3308 struct pf_addr *saddr, *daddr; 3309 u_int16_t sport, dport; 3310 struct inpcbtable *tb; 3311 struct inpcb *inp; 3312 3313 pd->lookup.uid = -1; 3314 pd->lookup.gid = -1; 3315 pd->lookup.pid = NO_PID; 3316 switch (pd->virtual_proto) { 3317 case IPPROTO_TCP: 3318 sport = pd->hdr.tcp.th_sport; 3319 dport = pd->hdr.tcp.th_dport; 3320 PF_ASSERT_LOCKED(); 3321 NET_ASSERT_LOCKED(); 3322 tb = &tcbtable; 3323 break; 3324 case IPPROTO_UDP: 3325 sport = pd->hdr.udp.uh_sport; 3326 dport = pd->hdr.udp.uh_dport; 3327 PF_ASSERT_LOCKED(); 3328 NET_ASSERT_LOCKED(); 3329 tb = &udbtable; 3330 break; 3331 default: 3332 return (-1); 3333 } 3334 if (pd->dir == PF_IN) { 3335 saddr = pd->src; 3336 daddr = pd->dst; 3337 } else { 3338 u_int16_t p; 3339 3340 p = sport; 3341 sport = dport; 3342 dport = p; 3343 saddr = pd->dst; 3344 daddr = pd->src; 3345 } 3346 switch (pd->af) { 3347 case AF_INET: 3348 /* 3349 * Fails when rtable is changed while evaluating the ruleset 3350 * The socket looked up will not match the one hit in the end. 3351 */ 3352 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 3353 pd->rdomain); 3354 if (inp == NULL) { 3355 inp = in_pcblookup_listen(tb, daddr->v4, dport, 3356 NULL, pd->rdomain); 3357 if (inp == NULL) 3358 return (-1); 3359 } 3360 break; 3361 #ifdef INET6 3362 case AF_INET6: 3363 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 3364 dport, pd->rdomain); 3365 if (inp == NULL) { 3366 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 3367 NULL, pd->rdomain); 3368 if (inp == NULL) 3369 return (-1); 3370 } 3371 break; 3372 #endif /* INET6 */ 3373 default: 3374 unhandled_af(pd->af); 3375 } 3376 pd->lookup.uid = inp->inp_socket->so_euid; 3377 pd->lookup.gid = inp->inp_socket->so_egid; 3378 pd->lookup.pid = inp->inp_socket->so_cpid; 3379 return (1); 3380 } 3381 3382 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 3383 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 3384 * 3385 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 3386 */ 3387 u_int8_t* 3388 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 3389 u_int8_t min_typelen) 3390 { 3391 u_int8_t *eoh = opts + hlen; 3392 3393 if (min_typelen < 2) 3394 return (NULL); 3395 3396 while ((eoh - opt) >= min_typelen) { 3397 switch (*opt) { 3398 case TCPOPT_EOL: 3399 /* FALLTHROUGH - Workaround the failure of some 3400 systems to NOP-pad their bzero'd option buffers, 3401 producing spurious EOLs */ 3402 case TCPOPT_NOP: 3403 opt++; 3404 continue; 3405 default: 3406 if (opt[0] == type && 3407 opt[1] >= min_typelen) 3408 return (opt); 3409 } 3410 3411 opt += MAX(opt[1], 2); /* evade infinite loops */ 3412 } 3413 3414 return (NULL); 3415 } 3416 3417 u_int8_t 3418 pf_get_wscale(struct pf_pdesc *pd) 3419 { 3420 int olen; 3421 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3422 u_int8_t wscale = 0; 3423 3424 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3425 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 3426 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3427 return (0); 3428 3429 opt = opts; 3430 while ((opt = pf_find_tcpopt(opt, opts, olen, 3431 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 3432 wscale = opt[2]; 3433 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 3434 wscale |= PF_WSCALE_FLAG; 3435 3436 opt += opt[1]; 3437 } 3438 3439 return (wscale); 3440 } 3441 3442 u_int16_t 3443 pf_get_mss(struct pf_pdesc *pd) 3444 { 3445 int olen; 3446 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3447 u_int16_t mss = tcp_mssdflt; 3448 3449 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3450 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 3451 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3452 return (0); 3453 3454 opt = opts; 3455 while ((opt = pf_find_tcpopt(opt, opts, olen, 3456 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 3457 memcpy(&mss, (opt + 2), 2); 3458 mss = ntohs(mss); 3459 3460 opt += opt[1]; 3461 } 3462 return (mss); 3463 } 3464 3465 u_int16_t 3466 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3467 { 3468 struct ifnet *ifp; 3469 struct sockaddr_in *dst; 3470 #ifdef INET6 3471 struct sockaddr_in6 *dst6; 3472 #endif /* INET6 */ 3473 struct rtentry *rt = NULL; 3474 struct sockaddr_storage ss; 3475 int hlen; 3476 u_int16_t mss = tcp_mssdflt; 3477 3478 memset(&ss, 0, sizeof(ss)); 3479 3480 switch (af) { 3481 case AF_INET: 3482 hlen = sizeof(struct ip); 3483 dst = (struct sockaddr_in *)&ss; 3484 dst->sin_family = AF_INET; 3485 dst->sin_len = sizeof(*dst); 3486 dst->sin_addr = addr->v4; 3487 rt = rtalloc(sintosa(dst), 0, rtableid); 3488 break; 3489 #ifdef INET6 3490 case AF_INET6: 3491 hlen = sizeof(struct ip6_hdr); 3492 dst6 = (struct sockaddr_in6 *)&ss; 3493 dst6->sin6_family = AF_INET6; 3494 dst6->sin6_len = sizeof(*dst6); 3495 dst6->sin6_addr = addr->v6; 3496 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3497 break; 3498 #endif /* INET6 */ 3499 } 3500 3501 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3502 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3503 mss = max(tcp_mssdflt, mss); 3504 if_put(ifp); 3505 } 3506 rtfree(rt); 3507 mss = min(mss, offer); 3508 mss = max(mss, 64); /* sanity - at least max opt space */ 3509 return (mss); 3510 } 3511 3512 static __inline int 3513 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af, 3514 struct pf_src_node **sns) 3515 { 3516 struct pf_rule *r = s->rule.ptr; 3517 int rv; 3518 3519 if (!r->rt) 3520 return (0); 3521 3522 rv = pf_map_addr(af, r, saddr, &s->rt_addr, NULL, sns, 3523 &r->route, PF_SN_ROUTE); 3524 if (rv == 0) 3525 s->rt = r->rt; 3526 3527 return (rv); 3528 } 3529 3530 u_int32_t 3531 pf_tcp_iss(struct pf_pdesc *pd) 3532 { 3533 SHA2_CTX ctx; 3534 union { 3535 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3536 uint32_t words[1]; 3537 } digest; 3538 3539 if (pf_tcp_secret_init == 0) { 3540 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3541 SHA512Init(&pf_tcp_secret_ctx); 3542 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3543 sizeof(pf_tcp_secret)); 3544 pf_tcp_secret_init = 1; 3545 } 3546 ctx = pf_tcp_secret_ctx; 3547 3548 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3549 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 3550 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 3551 switch (pd->af) { 3552 case AF_INET: 3553 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3554 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3555 break; 3556 #ifdef INET6 3557 case AF_INET6: 3558 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3559 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3560 break; 3561 #endif /* INET6 */ 3562 } 3563 SHA512Final(digest.bytes, &ctx); 3564 pf_tcp_iss_off += 4096; 3565 return (digest.words[0] + tcp_iss + pf_tcp_iss_off); 3566 } 3567 3568 void 3569 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3570 { 3571 if (r->qid) 3572 a->qid = r->qid; 3573 if (r->pqid) 3574 a->pqid = r->pqid; 3575 if (r->rtableid >= 0) 3576 a->rtableid = r->rtableid; 3577 #if NPFLOG > 0 3578 a->log |= r->log; 3579 #endif /* NPFLOG > 0 */ 3580 if (r->scrub_flags & PFSTATE_SETTOS) 3581 a->set_tos = r->set_tos; 3582 if (r->min_ttl) 3583 a->min_ttl = r->min_ttl; 3584 if (r->max_mss) 3585 a->max_mss = r->max_mss; 3586 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3587 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3588 if (r->scrub_flags & PFSTATE_SETPRIO) { 3589 a->set_prio[0] = r->set_prio[0]; 3590 a->set_prio[1] = r->set_prio[1]; 3591 } 3592 if (r->rule_flag & PFRULE_SETDELAY) 3593 a->delay = r->delay; 3594 } 3595 3596 #define PF_TEST_ATTRIB(t, a) \ 3597 if (t) { \ 3598 r = a; \ 3599 continue; \ 3600 } else do { \ 3601 } while (0) 3602 3603 enum pf_test_status 3604 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 3605 { 3606 struct pf_rule *r; 3607 struct pf_rule *save_a; 3608 struct pf_ruleset *save_aruleset; 3609 3610 r = TAILQ_FIRST(ruleset->rules.active.ptr); 3611 while (r != NULL) { 3612 r->evaluations++; 3613 PF_TEST_ATTRIB( 3614 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 3615 r->skip[PF_SKIP_IFP].ptr); 3616 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 3617 r->skip[PF_SKIP_DIR].ptr); 3618 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3619 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 3620 r->skip[PF_SKIP_RDOM].ptr); 3621 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 3622 r->skip[PF_SKIP_AF].ptr); 3623 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 3624 r->skip[PF_SKIP_PROTO].ptr); 3625 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 3626 ctx->pd->naf, r->src.neg, ctx->pd->kif, 3627 ctx->act.rtableid)), 3628 r->skip[PF_SKIP_SRC_ADDR].ptr); 3629 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 3630 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 3631 r->skip[PF_SKIP_DST_ADDR].ptr); 3632 3633 switch (ctx->pd->virtual_proto) { 3634 case PF_VPROTO_FRAGMENT: 3635 /* tcp/udp only. port_op always 0 in other cases */ 3636 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3637 TAILQ_NEXT(r, entries)); 3638 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 3639 r->flagset), 3640 TAILQ_NEXT(r, entries)); 3641 /* icmp only. type/code always 0 in other cases */ 3642 PF_TEST_ATTRIB((r->type || r->code), 3643 TAILQ_NEXT(r, entries)); 3644 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3645 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3646 TAILQ_NEXT(r, entries)); 3647 break; 3648 3649 case IPPROTO_TCP: 3650 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 3651 r->flags), 3652 TAILQ_NEXT(r, entries)); 3653 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3654 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 3655 r->os_fingerprint)), 3656 TAILQ_NEXT(r, entries)); 3657 /* FALLTHROUGH */ 3658 3659 case IPPROTO_UDP: 3660 /* tcp/udp only. port_op always 0 in other cases */ 3661 PF_TEST_ATTRIB((r->src.port_op && 3662 !pf_match_port(r->src.port_op, r->src.port[0], 3663 r->src.port[1], ctx->pd->nsport)), 3664 r->skip[PF_SKIP_SRC_PORT].ptr); 3665 PF_TEST_ATTRIB((r->dst.port_op && 3666 !pf_match_port(r->dst.port_op, r->dst.port[0], 3667 r->dst.port[1], ctx->pd->ndport)), 3668 r->skip[PF_SKIP_DST_PORT].ptr); 3669 /* tcp/udp only. uid.op always 0 in other cases */ 3670 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 3671 (ctx->pd->lookup.done = 3672 pf_socket_lookup(ctx->pd), 1)) && 3673 !pf_match_uid(r->uid.op, r->uid.uid[0], 3674 r->uid.uid[1], ctx->pd->lookup.uid)), 3675 TAILQ_NEXT(r, entries)); 3676 /* tcp/udp only. gid.op always 0 in other cases */ 3677 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 3678 (ctx->pd->lookup.done = 3679 pf_socket_lookup(ctx->pd), 1)) && 3680 !pf_match_gid(r->gid.op, r->gid.gid[0], 3681 r->gid.gid[1], ctx->pd->lookup.gid)), 3682 TAILQ_NEXT(r, entries)); 3683 break; 3684 3685 case IPPROTO_ICMP: 3686 case IPPROTO_ICMPV6: 3687 /* icmp only. type always 0 in other cases */ 3688 PF_TEST_ATTRIB((r->type && 3689 r->type != ctx->icmptype + 1), 3690 TAILQ_NEXT(r, entries)); 3691 /* icmp only. type always 0 in other cases */ 3692 PF_TEST_ATTRIB((r->code && 3693 r->code != ctx->icmpcode + 1), 3694 TAILQ_NEXT(r, entries)); 3695 /* icmp only. don't create states on replies */ 3696 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 3697 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 3698 ctx->icmp_dir != PF_IN), 3699 TAILQ_NEXT(r, entries)); 3700 break; 3701 3702 default: 3703 break; 3704 } 3705 3706 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3707 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 3708 TAILQ_NEXT(r, entries)); 3709 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 3710 TAILQ_NEXT(r, entries)); 3711 PF_TEST_ATTRIB((r->prob && 3712 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3713 TAILQ_NEXT(r, entries)); 3714 PF_TEST_ATTRIB((r->match_tag && 3715 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 3716 TAILQ_NEXT(r, entries)); 3717 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 3718 r->rcvifnot), 3719 TAILQ_NEXT(r, entries)); 3720 PF_TEST_ATTRIB((r->prio && 3721 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 3722 ctx->pd->m->m_pkthdr.pf.prio), 3723 TAILQ_NEXT(r, entries)); 3724 3725 /* must be last! */ 3726 if (r->pktrate.limit) { 3727 pf_add_threshold(&r->pktrate); 3728 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 3729 TAILQ_NEXT(r, entries)); 3730 } 3731 3732 /* FALLTHROUGH */ 3733 if (r->tag) 3734 ctx->tag = r->tag; 3735 if (r->anchor == NULL) { 3736 if (r->action == PF_MATCH) { 3737 if ((ctx->ri = pool_get(&pf_rule_item_pl, 3738 PR_NOWAIT)) == NULL) { 3739 REASON_SET(&ctx->reason, PFRES_MEMORY); 3740 ctx->test_status = PF_TEST_FAIL; 3741 break; 3742 } 3743 ctx->ri->r = r; 3744 /* order is irrelevant */ 3745 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 3746 ctx->ri = NULL; 3747 pf_rule_to_actions(r, &ctx->act); 3748 if (r->rule_flag & PFRULE_AFTO) 3749 ctx->pd->naf = r->naf; 3750 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 3751 &ctx->nr) == -1) { 3752 REASON_SET(&ctx->reason, 3753 PFRES_TRANSLATE); 3754 ctx->test_status = PF_TEST_FAIL; 3755 break; 3756 } 3757 #if NPFLOG > 0 3758 if (r->log) { 3759 REASON_SET(&ctx->reason, PFRES_MATCH); 3760 pflog_packet(ctx->pd, ctx->reason, r, 3761 ctx->a, ruleset, NULL); 3762 } 3763 #endif /* NPFLOG > 0 */ 3764 } else { 3765 /* 3766 * found matching r 3767 */ 3768 *ctx->rm = r; 3769 /* 3770 * anchor, with ruleset, where r belongs to 3771 */ 3772 *ctx->am = ctx->a; 3773 /* 3774 * ruleset where r belongs to 3775 */ 3776 *ctx->rsm = ruleset; 3777 /* 3778 * ruleset, where anchor belongs to. 3779 */ 3780 ctx->arsm = ctx->aruleset; 3781 } 3782 3783 #if NPFLOG > 0 3784 if (ctx->act.log & PF_LOG_MATCHES) 3785 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 3786 &ctx->rules); 3787 #endif /* NPFLOG > 0 */ 3788 3789 if (r->quick) { 3790 ctx->test_status = PF_TEST_QUICK; 3791 break; 3792 } 3793 } else { 3794 save_a = ctx->a; 3795 save_aruleset = ctx->aruleset; 3796 ctx->a = r; /* remember anchor */ 3797 ctx->aruleset = ruleset; /* and its ruleset */ 3798 /* 3799 * Note: we don't need to restore if we are not going 3800 * to continue with ruleset evaluation. 3801 */ 3802 if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) 3803 break; 3804 ctx->a = save_a; 3805 ctx->aruleset = save_aruleset; 3806 } 3807 r = TAILQ_NEXT(r, entries); 3808 } 3809 3810 return (ctx->test_status); 3811 } 3812 3813 int 3814 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3815 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason, 3816 struct pfsync_deferral **pdeferral) 3817 { 3818 struct pf_rule *r = NULL; 3819 struct pf_rule *a = NULL; 3820 struct pf_ruleset *ruleset = NULL; 3821 struct pf_state_key *skw = NULL, *sks = NULL; 3822 int rewrite = 0; 3823 u_int16_t virtual_type, virtual_id; 3824 int action = PF_DROP; 3825 struct pf_test_ctx ctx; 3826 int rv; 3827 3828 memset(&ctx, 0, sizeof(ctx)); 3829 ctx.pd = pd; 3830 ctx.rm = rm; 3831 ctx.am = am; 3832 ctx.rsm = rsm; 3833 ctx.th = &pd->hdr.tcp; 3834 ctx.act.rtableid = pd->rdomain; 3835 ctx.tag = -1; 3836 SLIST_INIT(&ctx.rules); 3837 3838 if (pd->dir == PF_IN && if_congested()) { 3839 REASON_SET(&ctx.reason, PFRES_CONGEST); 3840 return (PF_DROP); 3841 } 3842 3843 switch (pd->virtual_proto) { 3844 case IPPROTO_ICMP: 3845 ctx.icmptype = pd->hdr.icmp.icmp_type; 3846 ctx.icmpcode = pd->hdr.icmp.icmp_code; 3847 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3848 &ctx.icmp_dir, &virtual_id, &virtual_type); 3849 if (ctx.icmp_dir == PF_IN) { 3850 pd->osport = pd->nsport = virtual_id; 3851 pd->odport = pd->ndport = virtual_type; 3852 } else { 3853 pd->osport = pd->nsport = virtual_type; 3854 pd->odport = pd->ndport = virtual_id; 3855 } 3856 break; 3857 #ifdef INET6 3858 case IPPROTO_ICMPV6: 3859 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 3860 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 3861 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3862 &ctx.icmp_dir, &virtual_id, &virtual_type); 3863 if (ctx.icmp_dir == PF_IN) { 3864 pd->osport = pd->nsport = virtual_id; 3865 pd->odport = pd->ndport = virtual_type; 3866 } else { 3867 pd->osport = pd->nsport = virtual_type; 3868 pd->odport = pd->ndport = virtual_id; 3869 } 3870 break; 3871 #endif /* INET6 */ 3872 } 3873 3874 ruleset = &pf_main_ruleset; 3875 rv = pf_match_rule(&ctx, ruleset); 3876 if (rv == PF_TEST_FAIL) { 3877 /* 3878 * Reason has been set in pf_match_rule() already. 3879 */ 3880 goto cleanup; 3881 } 3882 3883 r = *ctx.rm; /* matching rule */ 3884 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 3885 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 3886 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 3887 3888 /* apply actions for last matching pass/block rule */ 3889 pf_rule_to_actions(r, &ctx.act); 3890 if (r->rule_flag & PFRULE_AFTO) 3891 pd->naf = r->naf; 3892 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 3893 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 3894 goto cleanup; 3895 } 3896 REASON_SET(&ctx.reason, PFRES_MATCH); 3897 3898 #if NPFLOG > 0 3899 if (r->log) 3900 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 3901 if (ctx.act.log & PF_LOG_MATCHES) 3902 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 3903 #endif /* NPFLOG > 0 */ 3904 3905 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3906 (r->action == PF_DROP) && 3907 ((r->rule_flag & PFRULE_RETURNRST) || 3908 (r->rule_flag & PFRULE_RETURNICMP) || 3909 (r->rule_flag & PFRULE_RETURN))) { 3910 if (pd->proto == IPPROTO_TCP && 3911 ((r->rule_flag & PFRULE_RETURNRST) || 3912 (r->rule_flag & PFRULE_RETURN)) && 3913 !(ctx.th->th_flags & TH_RST)) { 3914 u_int32_t ack = 3915 ntohl(ctx.th->th_seq) + pd->p_len; 3916 3917 if (pf_check_tcp_cksum(pd->m, pd->off, 3918 pd->tot_len - pd->off, pd->af)) 3919 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 3920 else { 3921 if (ctx.th->th_flags & TH_SYN) 3922 ack++; 3923 if (ctx.th->th_flags & TH_FIN) 3924 ack++; 3925 pf_send_tcp(r, pd->af, pd->dst, 3926 pd->src, ctx.th->th_dport, 3927 ctx.th->th_sport, ntohl(ctx.th->th_ack), 3928 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 3929 1, 0, pd->rdomain); 3930 } 3931 } else if ((pd->proto != IPPROTO_ICMP || 3932 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 3933 r->return_icmp) 3934 pf_send_icmp(pd->m, r->return_icmp >> 8, 3935 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 3936 else if ((pd->proto != IPPROTO_ICMPV6 || 3937 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 3938 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3939 r->return_icmp6) 3940 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3941 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 3942 } 3943 3944 if (r->action == PF_DROP) 3945 goto cleanup; 3946 3947 /* 3948 * If an expired "once" rule has not been purged, drop any new matching 3949 * packets. 3950 */ 3951 if (r->rule_flag & PFRULE_EXPIRED) 3952 goto cleanup; 3953 3954 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 3955 if (ctx.act.rtableid >= 0 && 3956 rtable_l2(ctx.act.rtableid) != pd->rdomain) 3957 pd->destchg = 1; 3958 3959 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3960 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 3961 #if NPFLOG > 0 3962 pd->pflog |= PF_LOG_FORCE; 3963 #endif /* NPFLOG > 0 */ 3964 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3965 "ip/ipv6 options in pf_test_rule()"); 3966 goto cleanup; 3967 } 3968 3969 action = PF_PASS; 3970 3971 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3972 && !ctx.state_icmp && r->keep_state) { 3973 3974 if (r->rule_flag & PFRULE_SRCTRACK && 3975 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 3976 pd->af, pd->src, NULL, NULL) != 0) { 3977 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 3978 goto cleanup; 3979 } 3980 3981 if (r->max_states && (r->states_cur >= r->max_states)) { 3982 pf_status.lcounters[LCNT_STATES]++; 3983 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 3984 goto cleanup; 3985 } 3986 3987 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 3988 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); 3989 3990 if (action != PF_PASS) 3991 goto cleanup; 3992 if (sks != skw) { 3993 struct pf_state_key *sk; 3994 3995 if (pd->dir == PF_IN) 3996 sk = sks; 3997 else 3998 sk = skw; 3999 rewrite += pf_translate(pd, 4000 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 4001 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 4002 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 4003 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 4004 virtual_type, ctx.icmp_dir); 4005 } 4006 4007 #ifdef INET6 4008 if (rewrite && skw->af != sks->af) 4009 action = PF_AFRT; 4010 #endif /* INET6 */ 4011 4012 } else { 4013 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4014 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4015 pool_put(&pf_rule_item_pl, ctx.ri); 4016 } 4017 } 4018 4019 /* copy back packet headers if needed */ 4020 if (rewrite && pd->hdrlen) { 4021 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4022 } 4023 4024 if (r->rule_flag & PFRULE_ONCE) { 4025 u_int32_t rule_flag; 4026 4027 /* 4028 * Use atomic_cas() to determine a clear winner, which will 4029 * insert an expired rule to gcl. 4030 */ 4031 rule_flag = r->rule_flag; 4032 if (((rule_flag & PFRULE_EXPIRED) == 0) && 4033 atomic_cas_uint(&r->rule_flag, rule_flag, 4034 rule_flag | PFRULE_EXPIRED) == rule_flag) { 4035 r->exptime = gettime(); 4036 SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle); 4037 } 4038 } 4039 4040 #if NPFSYNC > 0 4041 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 4042 pd->dir == PF_OUT && pfsync_up()) { 4043 /* 4044 * We want the state created, but we dont 4045 * want to send this in case a partner 4046 * firewall has to know about it to allow 4047 * replies through it. 4048 */ 4049 if (pfsync_defer(*sm, pd->m, pdeferral)) 4050 return (PF_DEFER); 4051 } 4052 #endif /* NPFSYNC > 0 */ 4053 4054 return (action); 4055 4056 cleanup: 4057 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4058 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4059 pool_put(&pf_rule_item_pl, ctx.ri); 4060 } 4061 4062 return (action); 4063 } 4064 4065 static __inline int 4066 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 4067 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 4068 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 4069 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 4070 { 4071 struct pf_state *s = NULL; 4072 struct tcphdr *th = &pd->hdr.tcp; 4073 u_int16_t mss = tcp_mssdflt; 4074 u_short reason; 4075 u_int i; 4076 4077 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 4078 if (s == NULL) { 4079 REASON_SET(&reason, PFRES_MEMORY); 4080 goto csfailed; 4081 } 4082 s->rule.ptr = r; 4083 s->anchor.ptr = a; 4084 s->natrule.ptr = nr; 4085 if (r->allow_opts) 4086 s->state_flags |= PFSTATE_ALLOWOPTS; 4087 if (r->rule_flag & PFRULE_STATESLOPPY) 4088 s->state_flags |= PFSTATE_SLOPPY; 4089 if (r->rule_flag & PFRULE_PFLOW) 4090 s->state_flags |= PFSTATE_PFLOW; 4091 #if NPFLOG > 0 4092 s->log = act->log & PF_LOG_ALL; 4093 #endif /* NPFLOG > 0 */ 4094 s->qid = act->qid; 4095 s->pqid = act->pqid; 4096 s->rtableid[pd->didx] = act->rtableid; 4097 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 4098 s->min_ttl = act->min_ttl; 4099 s->set_tos = act->set_tos; 4100 s->max_mss = act->max_mss; 4101 s->state_flags |= act->flags; 4102 #if NPFSYNC > 0 4103 s->sync_state = PFSYNC_S_NONE; 4104 #endif /* NPFSYNC > 0 */ 4105 s->set_prio[0] = act->set_prio[0]; 4106 s->set_prio[1] = act->set_prio[1]; 4107 s->delay = act->delay; 4108 SLIST_INIT(&s->src_nodes); 4109 /* 4110 * must initialize refcnt, before pf_state_insert() gets called. 4111 * pf_state_inserts() grabs reference for pfsync! 4112 */ 4113 refcnt_init(&s->refcnt); 4114 4115 switch (pd->proto) { 4116 case IPPROTO_TCP: 4117 s->src.seqlo = ntohl(th->th_seq); 4118 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4119 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4120 r->keep_state == PF_STATE_MODULATE) { 4121 /* Generate sequence number modulator */ 4122 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4123 0) 4124 s->src.seqdiff = 1; 4125 pf_patch_32(pd, 4126 &th->th_seq, htonl(s->src.seqlo + s->src.seqdiff)); 4127 *rewrite = 1; 4128 } else 4129 s->src.seqdiff = 0; 4130 if (th->th_flags & TH_SYN) { 4131 s->src.seqhi++; 4132 s->src.wscale = pf_get_wscale(pd); 4133 } 4134 s->src.max_win = MAX(ntohs(th->th_win), 1); 4135 if (s->src.wscale & PF_WSCALE_MASK) { 4136 /* Remove scale factor from initial window */ 4137 int win = s->src.max_win; 4138 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4139 s->src.max_win = (win - 1) >> 4140 (s->src.wscale & PF_WSCALE_MASK); 4141 } 4142 if (th->th_flags & TH_FIN) 4143 s->src.seqhi++; 4144 s->dst.seqhi = 1; 4145 s->dst.max_win = 1; 4146 pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT); 4147 pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED); 4148 s->timeout = PFTM_TCP_FIRST_PACKET; 4149 pf_status.states_halfopen++; 4150 break; 4151 case IPPROTO_UDP: 4152 pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE); 4153 pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 4154 s->timeout = PFTM_UDP_FIRST_PACKET; 4155 break; 4156 case IPPROTO_ICMP: 4157 #ifdef INET6 4158 case IPPROTO_ICMPV6: 4159 #endif /* INET6 */ 4160 s->timeout = PFTM_ICMP_FIRST_PACKET; 4161 break; 4162 default: 4163 pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE); 4164 pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 4165 s->timeout = PFTM_OTHER_FIRST_PACKET; 4166 } 4167 4168 s->creation = getuptime(); 4169 s->expire = getuptime(); 4170 4171 if (pd->proto == IPPROTO_TCP) { 4172 if (s->state_flags & PFSTATE_SCRUB_TCP && 4173 pf_normalize_tcp_init(pd, &s->src)) { 4174 REASON_SET(&reason, PFRES_MEMORY); 4175 goto csfailed; 4176 } 4177 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 4178 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 4179 rewrite)) { 4180 /* This really shouldn't happen!!! */ 4181 DPFPRINTF(LOG_ERR, 4182 "%s: tcp normalize failed on first pkt", __func__); 4183 goto csfailed; 4184 } 4185 } 4186 s->direction = pd->dir; 4187 4188 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 4189 REASON_SET(&reason, PFRES_MEMORY); 4190 goto csfailed; 4191 } 4192 4193 if (pf_set_rt_ifp(s, pd->src, (*skw)->af, sns) != 0) { 4194 REASON_SET(&reason, PFRES_NOROUTE); 4195 goto csfailed; 4196 } 4197 4198 for (i = 0; i < PF_SN_MAX; i++) 4199 if (sns[i] != NULL) { 4200 struct pf_sn_item *sni; 4201 4202 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 4203 if (sni == NULL) { 4204 REASON_SET(&reason, PFRES_MEMORY); 4205 goto csfailed; 4206 } 4207 sni->sn = sns[i]; 4208 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 4209 sni->sn->states++; 4210 } 4211 4212 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { 4213 pf_detach_state(s); 4214 *sks = *skw = NULL; 4215 REASON_SET(&reason, PFRES_STATEINS); 4216 goto csfailed; 4217 } else 4218 *sm = s; 4219 4220 /* 4221 * Make state responsible for rules it binds here. 4222 */ 4223 memcpy(&s->match_rules, rules, sizeof(s->match_rules)); 4224 memset(rules, 0, sizeof(*rules)); 4225 STATE_INC_COUNTERS(s); 4226 4227 if (tag > 0) { 4228 pf_tag_ref(tag); 4229 s->tag = tag; 4230 } 4231 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4232 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { 4233 int rtid = pd->rdomain; 4234 if (act->rtableid >= 0) 4235 rtid = act->rtableid; 4236 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 4237 s->src.seqhi = arc4random(); 4238 /* Find mss option */ 4239 mss = pf_get_mss(pd); 4240 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 4241 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 4242 s->src.mss = mss; 4243 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4244 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4245 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); 4246 REASON_SET(&reason, PFRES_SYNPROXY); 4247 return (PF_SYNPROXY_DROP); 4248 } 4249 4250 return (PF_PASS); 4251 4252 csfailed: 4253 if (s) { 4254 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 4255 pf_src_tree_remove_state(s); 4256 pool_put(&pf_state_pl, s); 4257 } 4258 4259 for (i = 0; i < PF_SN_MAX; i++) 4260 if (sns[i] != NULL) 4261 pf_remove_src_node(sns[i]); 4262 4263 return (PF_DROP); 4264 } 4265 4266 int 4267 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4268 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4269 int icmp_dir) 4270 { 4271 int rewrite = 0; 4272 int afto = pd->af != pd->naf; 4273 4274 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4275 pd->destchg = 1; 4276 4277 switch (pd->proto) { 4278 case IPPROTO_TCP: /* FALLTHROUGH */ 4279 case IPPROTO_UDP: 4280 rewrite += pf_patch_16(pd, pd->sport, sport); 4281 rewrite += pf_patch_16(pd, pd->dport, dport); 4282 break; 4283 4284 case IPPROTO_ICMP: 4285 if (pd->af != AF_INET) 4286 return (0); 4287 4288 #ifdef INET6 4289 if (afto) { 4290 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 4291 return (0); 4292 pd->proto = IPPROTO_ICMPV6; 4293 rewrite = 1; 4294 } 4295 #endif /* INET6 */ 4296 if (virtual_type == htons(ICMP_ECHO)) { 4297 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4298 rewrite += pf_patch_16(pd, 4299 &pd->hdr.icmp.icmp_id, icmpid); 4300 } 4301 break; 4302 4303 #ifdef INET6 4304 case IPPROTO_ICMPV6: 4305 if (pd->af != AF_INET6) 4306 return (0); 4307 4308 if (afto) { 4309 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 4310 return (0); 4311 pd->proto = IPPROTO_ICMP; 4312 rewrite = 1; 4313 } 4314 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4315 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4316 rewrite += pf_patch_16(pd, 4317 &pd->hdr.icmp6.icmp6_id, icmpid); 4318 } 4319 break; 4320 #endif /* INET6 */ 4321 } 4322 4323 if (!afto) { 4324 rewrite += pf_translate_a(pd, pd->src, saddr); 4325 rewrite += pf_translate_a(pd, pd->dst, daddr); 4326 } 4327 4328 return (rewrite); 4329 } 4330 4331 int 4332 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4333 int *copyback, int reverse) 4334 { 4335 struct tcphdr *th = &pd->hdr.tcp; 4336 struct pf_state_peer *src, *dst; 4337 u_int16_t win = ntohs(th->th_win); 4338 u_int32_t ack, end, data_end, seq, orig_seq; 4339 u_int8_t sws, dws, psrc, pdst; 4340 int ackskew; 4341 4342 if ((pd->dir == (*state)->direction && !reverse) || 4343 (pd->dir != (*state)->direction && reverse)) { 4344 src = &(*state)->src; 4345 dst = &(*state)->dst; 4346 psrc = PF_PEER_SRC; 4347 pdst = PF_PEER_DST; 4348 } else { 4349 src = &(*state)->dst; 4350 dst = &(*state)->src; 4351 psrc = PF_PEER_DST; 4352 pdst = PF_PEER_SRC; 4353 } 4354 4355 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4356 sws = src->wscale & PF_WSCALE_MASK; 4357 dws = dst->wscale & PF_WSCALE_MASK; 4358 } else 4359 sws = dws = 0; 4360 4361 /* 4362 * Sequence tracking algorithm from Guido van Rooij's paper: 4363 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4364 * tcp_filtering.ps 4365 */ 4366 4367 orig_seq = seq = ntohl(th->th_seq); 4368 if (src->seqlo == 0) { 4369 /* First packet from this end. Set its state */ 4370 4371 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4372 src->scrub == NULL) { 4373 if (pf_normalize_tcp_init(pd, src)) { 4374 REASON_SET(reason, PFRES_MEMORY); 4375 return (PF_DROP); 4376 } 4377 } 4378 4379 /* Deferred generation of sequence number modulator */ 4380 if (dst->seqdiff && !src->seqdiff) { 4381 /* use random iss for the TCP server */ 4382 while ((src->seqdiff = arc4random() - seq) == 0) 4383 continue; 4384 ack = ntohl(th->th_ack) - dst->seqdiff; 4385 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4386 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4387 *copyback = 1; 4388 } else { 4389 ack = ntohl(th->th_ack); 4390 } 4391 4392 end = seq + pd->p_len; 4393 if (th->th_flags & TH_SYN) { 4394 end++; 4395 if (dst->wscale & PF_WSCALE_FLAG) { 4396 src->wscale = pf_get_wscale(pd); 4397 if (src->wscale & PF_WSCALE_FLAG) { 4398 /* Remove scale factor from initial 4399 * window */ 4400 sws = src->wscale & PF_WSCALE_MASK; 4401 win = ((u_int32_t)win + (1 << sws) - 1) 4402 >> sws; 4403 dws = dst->wscale & PF_WSCALE_MASK; 4404 } else { 4405 /* fixup other window */ 4406 dst->max_win = MIN(TCP_MAXWIN, 4407 (u_int32_t)dst->max_win << 4408 (dst->wscale & PF_WSCALE_MASK)); 4409 /* in case of a retrans SYN|ACK */ 4410 dst->wscale = 0; 4411 } 4412 } 4413 } 4414 data_end = end; 4415 if (th->th_flags & TH_FIN) 4416 end++; 4417 4418 src->seqlo = seq; 4419 if (src->state < TCPS_SYN_SENT) 4420 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4421 4422 /* 4423 * May need to slide the window (seqhi may have been set by 4424 * the crappy stack check or if we picked up the connection 4425 * after establishment) 4426 */ 4427 if (src->seqhi == 1 || 4428 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4429 src->seqhi = end + MAX(1, dst->max_win << dws); 4430 if (win > src->max_win) 4431 src->max_win = win; 4432 4433 } else { 4434 ack = ntohl(th->th_ack) - dst->seqdiff; 4435 if (src->seqdiff) { 4436 /* Modulate sequence numbers */ 4437 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4438 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4439 *copyback = 1; 4440 } 4441 end = seq + pd->p_len; 4442 if (th->th_flags & TH_SYN) 4443 end++; 4444 data_end = end; 4445 if (th->th_flags & TH_FIN) 4446 end++; 4447 } 4448 4449 if ((th->th_flags & TH_ACK) == 0) { 4450 /* Let it pass through the ack skew check */ 4451 ack = dst->seqlo; 4452 } else if ((ack == 0 && 4453 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4454 /* broken tcp stacks do not set ack */ 4455 (dst->state < TCPS_SYN_SENT)) { 4456 /* 4457 * Many stacks (ours included) will set the ACK number in an 4458 * FIN|ACK if the SYN times out -- no sequence to ACK. 4459 */ 4460 ack = dst->seqlo; 4461 } 4462 4463 if (seq == end) { 4464 /* Ease sequencing restrictions on no data packets */ 4465 seq = src->seqlo; 4466 data_end = end = seq; 4467 } 4468 4469 ackskew = dst->seqlo - ack; 4470 4471 4472 /* 4473 * Need to demodulate the sequence numbers in any TCP SACK options 4474 * (Selective ACK). We could optionally validate the SACK values 4475 * against the current ACK window, either forwards or backwards, but 4476 * I'm not confident that SACK has been implemented properly 4477 * everywhere. It wouldn't surprise me if several stacks accidently 4478 * SACK too far backwards of previously ACKed data. There really aren't 4479 * any security implications of bad SACKing unless the target stack 4480 * doesn't validate the option length correctly. Someone trying to 4481 * spoof into a TCP connection won't bother blindly sending SACK 4482 * options anyway. 4483 */ 4484 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4485 if (pf_modulate_sack(pd, dst)) 4486 *copyback = 1; 4487 } 4488 4489 4490 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4491 if (SEQ_GEQ(src->seqhi, data_end) && 4492 /* Last octet inside other's window space */ 4493 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4494 /* Retrans: not more than one window back */ 4495 (ackskew >= -MAXACKWINDOW) && 4496 /* Acking not more than one reassembled fragment backwards */ 4497 (ackskew <= (MAXACKWINDOW << sws)) && 4498 /* Acking not more than one window forward */ 4499 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4500 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4501 /* Require an exact/+1 sequence match on resets when possible */ 4502 4503 if (dst->scrub || src->scrub) { 4504 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4505 dst, copyback)) 4506 return (PF_DROP); 4507 } 4508 4509 /* update max window */ 4510 if (src->max_win < win) 4511 src->max_win = win; 4512 /* synchronize sequencing */ 4513 if (SEQ_GT(end, src->seqlo)) 4514 src->seqlo = end; 4515 /* slide the window of what the other end can send */ 4516 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4517 dst->seqhi = ack + MAX((win << sws), 1); 4518 4519 /* update states */ 4520 if (th->th_flags & TH_SYN) 4521 if (src->state < TCPS_SYN_SENT) 4522 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4523 if (th->th_flags & TH_FIN) 4524 if (src->state < TCPS_CLOSING) 4525 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4526 if (th->th_flags & TH_ACK) { 4527 if (dst->state == TCPS_SYN_SENT) { 4528 pf_set_protostate(*state, pdst, 4529 TCPS_ESTABLISHED); 4530 if (src->state == TCPS_ESTABLISHED && 4531 !SLIST_EMPTY(&(*state)->src_nodes) && 4532 pf_src_connlimit(state)) { 4533 REASON_SET(reason, PFRES_SRCLIMIT); 4534 return (PF_DROP); 4535 } 4536 } else if (dst->state == TCPS_CLOSING) 4537 pf_set_protostate(*state, pdst, 4538 TCPS_FIN_WAIT_2); 4539 } 4540 if (th->th_flags & TH_RST) 4541 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4542 4543 /* update expire time */ 4544 (*state)->expire = getuptime(); 4545 if (src->state >= TCPS_FIN_WAIT_2 && 4546 dst->state >= TCPS_FIN_WAIT_2) 4547 (*state)->timeout = PFTM_TCP_CLOSED; 4548 else if (src->state >= TCPS_CLOSING && 4549 dst->state >= TCPS_CLOSING) 4550 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4551 else if (src->state < TCPS_ESTABLISHED || 4552 dst->state < TCPS_ESTABLISHED) 4553 (*state)->timeout = PFTM_TCP_OPENING; 4554 else if (src->state >= TCPS_CLOSING || 4555 dst->state >= TCPS_CLOSING) 4556 (*state)->timeout = PFTM_TCP_CLOSING; 4557 else 4558 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4559 4560 /* Fall through to PASS packet */ 4561 } else if ((dst->state < TCPS_SYN_SENT || 4562 dst->state >= TCPS_FIN_WAIT_2 || 4563 src->state >= TCPS_FIN_WAIT_2) && 4564 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4565 /* Within a window forward of the originating packet */ 4566 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4567 /* Within a window backward of the originating packet */ 4568 4569 /* 4570 * This currently handles three situations: 4571 * 1) Stupid stacks will shotgun SYNs before their peer 4572 * replies. 4573 * 2) When PF catches an already established stream (the 4574 * firewall rebooted, the state table was flushed, routes 4575 * changed...) 4576 * 3) Packets get funky immediately after the connection 4577 * closes (this should catch Solaris spurious ACK|FINs 4578 * that web servers like to spew after a close) 4579 * 4580 * This must be a little more careful than the above code 4581 * since packet floods will also be caught here. We don't 4582 * update the TTL here to mitigate the damage of a packet 4583 * flood and so the same code can handle awkward establishment 4584 * and a loosened connection close. 4585 * In the establishment case, a correct peer response will 4586 * validate the connection, go through the normal state code 4587 * and keep updating the state TTL. 4588 */ 4589 4590 if (pf_status.debug >= LOG_NOTICE) { 4591 log(LOG_NOTICE, "pf: loose state match: "); 4592 pf_print_state(*state); 4593 pf_print_flags(th->th_flags); 4594 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4595 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4596 pd->p_len, ackskew, (*state)->packets[0], 4597 (*state)->packets[1], 4598 pd->dir == PF_IN ? "in" : "out", 4599 pd->dir == (*state)->direction ? "fwd" : "rev"); 4600 } 4601 4602 if (dst->scrub || src->scrub) { 4603 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4604 dst, copyback)) 4605 return (PF_DROP); 4606 } 4607 4608 /* update max window */ 4609 if (src->max_win < win) 4610 src->max_win = win; 4611 /* synchronize sequencing */ 4612 if (SEQ_GT(end, src->seqlo)) 4613 src->seqlo = end; 4614 /* slide the window of what the other end can send */ 4615 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4616 dst->seqhi = ack + MAX((win << sws), 1); 4617 4618 /* 4619 * Cannot set dst->seqhi here since this could be a shotgunned 4620 * SYN and not an already established connection. 4621 */ 4622 if (th->th_flags & TH_FIN) 4623 if (src->state < TCPS_CLOSING) 4624 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4625 if (th->th_flags & TH_RST) 4626 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4627 4628 /* Fall through to PASS packet */ 4629 } else { 4630 if ((*state)->dst.state == TCPS_SYN_SENT && 4631 (*state)->src.state == TCPS_SYN_SENT) { 4632 /* Send RST for state mismatches during handshake */ 4633 if (!(th->th_flags & TH_RST)) 4634 pf_send_tcp((*state)->rule.ptr, pd->af, 4635 pd->dst, pd->src, th->th_dport, 4636 th->th_sport, ntohl(th->th_ack), 0, 4637 TH_RST, 0, 0, 4638 (*state)->rule.ptr->return_ttl, 1, 0, 4639 pd->rdomain); 4640 src->seqlo = 0; 4641 src->seqhi = 1; 4642 src->max_win = 1; 4643 } else if (pf_status.debug >= LOG_NOTICE) { 4644 log(LOG_NOTICE, "pf: BAD state: "); 4645 pf_print_state(*state); 4646 pf_print_flags(th->th_flags); 4647 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4648 "pkts=%llu:%llu dir=%s,%s\n", 4649 seq, orig_seq, ack, pd->p_len, ackskew, 4650 (*state)->packets[0], (*state)->packets[1], 4651 pd->dir == PF_IN ? "in" : "out", 4652 pd->dir == (*state)->direction ? "fwd" : "rev"); 4653 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4654 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 4655 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4656 ' ': '2', 4657 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4658 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4659 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 4660 ' ' :'5', 4661 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4662 } 4663 REASON_SET(reason, PFRES_BADSTATE); 4664 return (PF_DROP); 4665 } 4666 4667 return (PF_PASS); 4668 } 4669 4670 int 4671 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **state, 4672 u_short *reason) 4673 { 4674 struct tcphdr *th = &pd->hdr.tcp; 4675 struct pf_state_peer *src, *dst; 4676 u_int8_t psrc, pdst; 4677 4678 if (pd->dir == (*state)->direction) { 4679 src = &(*state)->src; 4680 dst = &(*state)->dst; 4681 psrc = PF_PEER_SRC; 4682 pdst = PF_PEER_DST; 4683 } else { 4684 src = &(*state)->dst; 4685 dst = &(*state)->src; 4686 psrc = PF_PEER_DST; 4687 pdst = PF_PEER_SRC; 4688 } 4689 4690 if (th->th_flags & TH_SYN) 4691 if (src->state < TCPS_SYN_SENT) 4692 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4693 if (th->th_flags & TH_FIN) 4694 if (src->state < TCPS_CLOSING) 4695 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4696 if (th->th_flags & TH_ACK) { 4697 if (dst->state == TCPS_SYN_SENT) { 4698 pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); 4699 if (src->state == TCPS_ESTABLISHED && 4700 !SLIST_EMPTY(&(*state)->src_nodes) && 4701 pf_src_connlimit(state)) { 4702 REASON_SET(reason, PFRES_SRCLIMIT); 4703 return (PF_DROP); 4704 } 4705 } else if (dst->state == TCPS_CLOSING) { 4706 pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2); 4707 } else if (src->state == TCPS_SYN_SENT && 4708 dst->state < TCPS_SYN_SENT) { 4709 /* 4710 * Handle a special sloppy case where we only see one 4711 * half of the connection. If there is a ACK after 4712 * the initial SYN without ever seeing a packet from 4713 * the destination, set the connection to established. 4714 */ 4715 pf_set_protostate(*state, PF_PEER_BOTH, 4716 TCPS_ESTABLISHED); 4717 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4718 pf_src_connlimit(state)) { 4719 REASON_SET(reason, PFRES_SRCLIMIT); 4720 return (PF_DROP); 4721 } 4722 } else if (src->state == TCPS_CLOSING && 4723 dst->state == TCPS_ESTABLISHED && 4724 dst->seqlo == 0) { 4725 /* 4726 * Handle the closing of half connections where we 4727 * don't see the full bidirectional FIN/ACK+ACK 4728 * handshake. 4729 */ 4730 pf_set_protostate(*state, pdst, TCPS_CLOSING); 4731 } 4732 } 4733 if (th->th_flags & TH_RST) 4734 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4735 4736 /* update expire time */ 4737 (*state)->expire = getuptime(); 4738 if (src->state >= TCPS_FIN_WAIT_2 && 4739 dst->state >= TCPS_FIN_WAIT_2) 4740 (*state)->timeout = PFTM_TCP_CLOSED; 4741 else if (src->state >= TCPS_CLOSING && 4742 dst->state >= TCPS_CLOSING) 4743 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4744 else if (src->state < TCPS_ESTABLISHED || 4745 dst->state < TCPS_ESTABLISHED) 4746 (*state)->timeout = PFTM_TCP_OPENING; 4747 else if (src->state >= TCPS_CLOSING || 4748 dst->state >= TCPS_CLOSING) 4749 (*state)->timeout = PFTM_TCP_CLOSING; 4750 else 4751 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4752 4753 return (PF_PASS); 4754 } 4755 4756 static __inline int 4757 pf_synproxy(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4758 { 4759 struct pf_state_key *sk = (*state)->key[pd->didx]; 4760 4761 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4762 struct tcphdr *th = &pd->hdr.tcp; 4763 4764 if (pd->dir != (*state)->direction) { 4765 REASON_SET(reason, PFRES_SYNPROXY); 4766 return (PF_SYNPROXY_DROP); 4767 } 4768 if (th->th_flags & TH_SYN) { 4769 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4770 REASON_SET(reason, PFRES_SYNPROXY); 4771 return (PF_DROP); 4772 } 4773 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4774 pd->src, th->th_dport, th->th_sport, 4775 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4776 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4777 0, pd->rdomain); 4778 REASON_SET(reason, PFRES_SYNPROXY); 4779 return (PF_SYNPROXY_DROP); 4780 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 4781 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4782 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4783 REASON_SET(reason, PFRES_SYNPROXY); 4784 return (PF_DROP); 4785 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4786 pf_src_connlimit(state)) { 4787 REASON_SET(reason, PFRES_SRCLIMIT); 4788 return (PF_DROP); 4789 } else 4790 pf_set_protostate(*state, PF_PEER_SRC, 4791 PF_TCPS_PROXY_DST); 4792 } 4793 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4794 struct tcphdr *th = &pd->hdr.tcp; 4795 4796 if (pd->dir == (*state)->direction) { 4797 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4798 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4799 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4800 REASON_SET(reason, PFRES_SYNPROXY); 4801 return (PF_DROP); 4802 } 4803 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4804 if ((*state)->dst.seqhi == 1) 4805 (*state)->dst.seqhi = arc4random(); 4806 pf_send_tcp((*state)->rule.ptr, pd->af, 4807 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4808 sk->port[pd->sidx], sk->port[pd->didx], 4809 (*state)->dst.seqhi, 0, TH_SYN, 0, 4810 (*state)->src.mss, 0, 0, (*state)->tag, 4811 sk->rdomain); 4812 REASON_SET(reason, PFRES_SYNPROXY); 4813 return (PF_SYNPROXY_DROP); 4814 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4815 (TH_SYN|TH_ACK)) || 4816 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4817 REASON_SET(reason, PFRES_SYNPROXY); 4818 return (PF_DROP); 4819 } else { 4820 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4821 (*state)->dst.seqlo = ntohl(th->th_seq); 4822 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4823 pd->src, th->th_dport, th->th_sport, 4824 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4825 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4826 (*state)->tag, pd->rdomain); 4827 pf_send_tcp((*state)->rule.ptr, pd->af, 4828 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4829 sk->port[pd->sidx], sk->port[pd->didx], 4830 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4831 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4832 0, sk->rdomain); 4833 (*state)->src.seqdiff = (*state)->dst.seqhi - 4834 (*state)->src.seqlo; 4835 (*state)->dst.seqdiff = (*state)->src.seqhi - 4836 (*state)->dst.seqlo; 4837 (*state)->src.seqhi = (*state)->src.seqlo + 4838 (*state)->dst.max_win; 4839 (*state)->dst.seqhi = (*state)->dst.seqlo + 4840 (*state)->src.max_win; 4841 (*state)->src.wscale = (*state)->dst.wscale = 0; 4842 pf_set_protostate(*state, PF_PEER_BOTH, 4843 TCPS_ESTABLISHED); 4844 REASON_SET(reason, PFRES_SYNPROXY); 4845 return (PF_SYNPROXY_DROP); 4846 } 4847 } 4848 return (PF_PASS); 4849 } 4850 4851 int 4852 pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4853 int syncookie) 4854 { 4855 struct pf_state_key_cmp key; 4856 int copyback = 0; 4857 struct pf_state_peer *src, *dst; 4858 int action; 4859 struct inpcb *inp; 4860 u_int8_t psrc, pdst; 4861 4862 key.af = pd->af; 4863 key.proto = pd->virtual_proto; 4864 key.rdomain = pd->rdomain; 4865 pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); 4866 pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); 4867 key.port[pd->sidx] = pd->osport; 4868 key.port[pd->didx] = pd->odport; 4869 inp = pd->m->m_pkthdr.pf.inp; 4870 4871 action = pf_find_state(pd, &key, state); 4872 if (action != PF_MATCH) 4873 return (action); 4874 4875 action = PF_PASS; 4876 if (pd->dir == (*state)->direction) { 4877 src = &(*state)->src; 4878 dst = &(*state)->dst; 4879 psrc = PF_PEER_SRC; 4880 pdst = PF_PEER_DST; 4881 } else { 4882 src = &(*state)->dst; 4883 dst = &(*state)->src; 4884 psrc = PF_PEER_DST; 4885 pdst = PF_PEER_SRC; 4886 } 4887 4888 switch (pd->virtual_proto) { 4889 case IPPROTO_TCP: 4890 if (syncookie) { 4891 pf_set_protostate(*state, PF_PEER_SRC, 4892 PF_TCPS_PROXY_DST); 4893 (*state)->dst.seqhi = ntohl(pd->hdr.tcp.th_ack) - 1; 4894 } 4895 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) 4896 return (action); 4897 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 4898 4899 if (dst->state >= TCPS_FIN_WAIT_2 && 4900 src->state >= TCPS_FIN_WAIT_2) { 4901 if (pf_status.debug >= LOG_NOTICE) { 4902 log(LOG_NOTICE, "pf: state reuse "); 4903 pf_print_state(*state); 4904 pf_print_flags(pd->hdr.tcp.th_flags); 4905 addlog("\n"); 4906 } 4907 /* XXX make sure it's the same direction ?? */ 4908 (*state)->timeout = PFTM_PURGE; 4909 *state = NULL; 4910 pf_mbuf_link_inpcb(pd->m, inp); 4911 return (PF_DROP); 4912 } else if (dst->state >= TCPS_ESTABLISHED && 4913 src->state >= TCPS_ESTABLISHED) { 4914 /* 4915 * SYN matches existing state??? 4916 * Typically happens when sender boots up after 4917 * sudden panic. Certain protocols (NFSv3) are 4918 * always using same port numbers. Challenge 4919 * ACK enables all parties (firewall and peers) 4920 * to get in sync again. 4921 */ 4922 pf_send_challenge_ack(pd, *state, src, dst); 4923 return (PF_DROP); 4924 } 4925 } 4926 4927 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4928 if (pf_tcp_track_sloppy(pd, state, reason) == PF_DROP) 4929 return (PF_DROP); 4930 } else { 4931 if (pf_tcp_track_full(pd, state, reason, ©back, 4932 PF_REVERSED_KEY((*state)->key, pd->af)) == PF_DROP) 4933 return (PF_DROP); 4934 } 4935 break; 4936 case IPPROTO_UDP: 4937 /* update states */ 4938 if (src->state < PFUDPS_SINGLE) 4939 pf_set_protostate(*state, psrc, PFUDPS_SINGLE); 4940 if (dst->state == PFUDPS_SINGLE) 4941 pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE); 4942 4943 /* update expire time */ 4944 (*state)->expire = getuptime(); 4945 if (src->state == PFUDPS_MULTIPLE && 4946 dst->state == PFUDPS_MULTIPLE) 4947 (*state)->timeout = PFTM_UDP_MULTIPLE; 4948 else 4949 (*state)->timeout = PFTM_UDP_SINGLE; 4950 break; 4951 default: 4952 /* update states */ 4953 if (src->state < PFOTHERS_SINGLE) 4954 pf_set_protostate(*state, psrc, PFOTHERS_SINGLE); 4955 if (dst->state == PFOTHERS_SINGLE) 4956 pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE); 4957 4958 /* update expire time */ 4959 (*state)->expire = getuptime(); 4960 if (src->state == PFOTHERS_MULTIPLE && 4961 dst->state == PFOTHERS_MULTIPLE) 4962 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4963 else 4964 (*state)->timeout = PFTM_OTHER_SINGLE; 4965 break; 4966 } 4967 4968 /* translate source/destination address, if necessary */ 4969 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4970 struct pf_state_key *nk; 4971 int afto, sidx, didx; 4972 4973 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4974 nk = (*state)->key[pd->sidx]; 4975 else 4976 nk = (*state)->key[pd->didx]; 4977 4978 afto = pd->af != nk->af; 4979 sidx = afto ? pd->didx : pd->sidx; 4980 didx = afto ? pd->sidx : pd->didx; 4981 4982 #ifdef INET6 4983 if (afto) { 4984 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 4985 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 4986 pd->naf = nk->af; 4987 action = PF_AFRT; 4988 } 4989 #endif /* INET6 */ 4990 4991 if (!afto) 4992 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 4993 4994 if (pd->sport != NULL) 4995 pf_patch_16(pd, pd->sport, nk->port[sidx]); 4996 4997 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4998 pd->rdomain != nk->rdomain) 4999 pd->destchg = 1; 5000 5001 if (!afto) 5002 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5003 5004 if (pd->dport != NULL) 5005 pf_patch_16(pd, pd->dport, nk->port[didx]); 5006 5007 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5008 copyback = 1; 5009 } 5010 5011 if (copyback && pd->hdrlen > 0) { 5012 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5013 } 5014 5015 return (action); 5016 } 5017 5018 int 5019 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 5020 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 5021 int icmp_dir, int *iidx, int multi, int inner) 5022 { 5023 int direction, action; 5024 5025 key->af = pd->af; 5026 key->proto = pd->proto; 5027 key->rdomain = pd->rdomain; 5028 if (icmp_dir == PF_IN) { 5029 *iidx = pd->sidx; 5030 key->port[pd->sidx] = icmpid; 5031 key->port[pd->didx] = type; 5032 } else { 5033 *iidx = pd->didx; 5034 key->port[pd->sidx] = type; 5035 key->port[pd->didx] = icmpid; 5036 } 5037 5038 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 5039 pd->dst, pd->af, multi)) 5040 return (PF_DROP); 5041 5042 action = pf_find_state(pd, key, state); 5043 if (action != PF_MATCH) 5044 return (action); 5045 5046 if ((*state)->state_flags & PFSTATE_SLOPPY) 5047 return (-1); 5048 5049 /* Is this ICMP message flowing in right direction? */ 5050 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 5051 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 5052 PF_IN : PF_OUT; 5053 else 5054 direction = (*state)->direction; 5055 if ((((!inner && direction == pd->dir) || 5056 (inner && direction != pd->dir)) ? 5057 PF_IN : PF_OUT) != icmp_dir) { 5058 if (pf_status.debug >= LOG_NOTICE) { 5059 log(LOG_NOTICE, 5060 "pf: icmp type %d in wrong direction (%d): ", 5061 ntohs(type), icmp_dir); 5062 pf_print_state(*state); 5063 addlog("\n"); 5064 } 5065 return (PF_DROP); 5066 } 5067 return (-1); 5068 } 5069 5070 int 5071 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 5072 u_short *reason) 5073 { 5074 u_int16_t virtual_id, virtual_type; 5075 u_int8_t icmptype, icmpcode; 5076 int icmp_dir, iidx, ret, copyback = 0; 5077 5078 struct pf_state_key_cmp key; 5079 5080 switch (pd->proto) { 5081 case IPPROTO_ICMP: 5082 icmptype = pd->hdr.icmp.icmp_type; 5083 icmpcode = pd->hdr.icmp.icmp_code; 5084 break; 5085 #ifdef INET6 5086 case IPPROTO_ICMPV6: 5087 icmptype = pd->hdr.icmp6.icmp6_type; 5088 icmpcode = pd->hdr.icmp6.icmp6_code; 5089 break; 5090 #endif /* INET6 */ 5091 default: 5092 panic("unhandled proto %d", pd->proto); 5093 } 5094 5095 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 5096 &virtual_type) == 0) { 5097 /* 5098 * ICMP query/reply message not related to a TCP/UDP packet. 5099 * Search for an ICMP state. 5100 */ 5101 ret = pf_icmp_state_lookup(pd, &key, state, 5102 virtual_id, virtual_type, icmp_dir, &iidx, 5103 0, 0); 5104 /* IPv6? try matching a multicast address */ 5105 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 5106 ret = pf_icmp_state_lookup(pd, &key, state, virtual_id, 5107 virtual_type, icmp_dir, &iidx, 1, 0); 5108 if (ret >= 0) 5109 return (ret); 5110 5111 (*state)->expire = getuptime(); 5112 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5113 5114 /* translate source/destination address, if necessary */ 5115 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5116 struct pf_state_key *nk; 5117 int afto, sidx, didx; 5118 5119 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5120 nk = (*state)->key[pd->sidx]; 5121 else 5122 nk = (*state)->key[pd->didx]; 5123 5124 afto = pd->af != nk->af; 5125 sidx = afto ? pd->didx : pd->sidx; 5126 didx = afto ? pd->sidx : pd->didx; 5127 iidx = afto ? !iidx : iidx; 5128 #ifdef INET6 5129 if (afto) { 5130 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 5131 nk->af); 5132 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 5133 nk->af); 5134 pd->naf = nk->af; 5135 } 5136 #endif /* INET6 */ 5137 if (!afto) { 5138 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5139 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5140 } 5141 5142 if (pd->rdomain != nk->rdomain) 5143 pd->destchg = 1; 5144 if (!afto && PF_ANEQ(pd->dst, 5145 &nk->addr[didx], pd->af)) 5146 pd->destchg = 1; 5147 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5148 5149 switch (pd->af) { 5150 case AF_INET: 5151 #ifdef INET6 5152 if (afto) { 5153 if (pf_translate_icmp_af(pd, AF_INET6, 5154 &pd->hdr.icmp)) 5155 return (PF_DROP); 5156 pd->proto = IPPROTO_ICMPV6; 5157 } 5158 #endif /* INET6 */ 5159 pf_patch_16(pd, 5160 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 5161 5162 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5163 &pd->hdr.icmp, M_NOWAIT); 5164 copyback = 1; 5165 break; 5166 #ifdef INET6 5167 case AF_INET6: 5168 if (afto) { 5169 if (pf_translate_icmp_af(pd, AF_INET, 5170 &pd->hdr.icmp6)) 5171 return (PF_DROP); 5172 pd->proto = IPPROTO_ICMP; 5173 } 5174 5175 pf_patch_16(pd, 5176 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 5177 5178 m_copyback(pd->m, pd->off, 5179 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5180 M_NOWAIT); 5181 copyback = 1; 5182 break; 5183 #endif /* INET6 */ 5184 } 5185 #ifdef INET6 5186 if (afto) 5187 return (PF_AFRT); 5188 #endif /* INET6 */ 5189 } 5190 } else { 5191 /* 5192 * ICMP error message in response to a TCP/UDP packet. 5193 * Extract the inner TCP/UDP header and search for that state. 5194 */ 5195 struct pf_pdesc pd2; 5196 struct ip h2; 5197 #ifdef INET6 5198 struct ip6_hdr h2_6; 5199 #endif /* INET6 */ 5200 int ipoff2; 5201 5202 /* Initialize pd2 fields valid for both packets with pd. */ 5203 memset(&pd2, 0, sizeof(pd2)); 5204 pd2.af = pd->af; 5205 pd2.dir = pd->dir; 5206 pd2.kif = pd->kif; 5207 pd2.m = pd->m; 5208 pd2.rdomain = pd->rdomain; 5209 /* Payload packet is from the opposite direction. */ 5210 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 5211 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 5212 switch (pd->af) { 5213 case AF_INET: 5214 /* offset of h2 in mbuf chain */ 5215 ipoff2 = pd->off + ICMP_MINLEN; 5216 5217 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 5218 NULL, reason, pd2.af)) { 5219 DPFPRINTF(LOG_NOTICE, 5220 "ICMP error message too short (ip)"); 5221 return (PF_DROP); 5222 } 5223 /* 5224 * ICMP error messages don't refer to non-first 5225 * fragments 5226 */ 5227 if (h2.ip_off & htons(IP_OFFMASK)) { 5228 REASON_SET(reason, PFRES_FRAG); 5229 return (PF_DROP); 5230 } 5231 5232 /* offset of protocol header that follows h2 */ 5233 pd2.off = ipoff2; 5234 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 5235 return (PF_DROP); 5236 5237 pd2.tot_len = ntohs(h2.ip_len); 5238 pd2.src = (struct pf_addr *)&h2.ip_src; 5239 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5240 break; 5241 #ifdef INET6 5242 case AF_INET6: 5243 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 5244 5245 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 5246 NULL, reason, pd2.af)) { 5247 DPFPRINTF(LOG_NOTICE, 5248 "ICMP error message too short (ip6)"); 5249 return (PF_DROP); 5250 } 5251 5252 pd2.off = ipoff2; 5253 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 5254 return (PF_DROP); 5255 5256 pd2.tot_len = ntohs(h2_6.ip6_plen) + 5257 sizeof(struct ip6_hdr); 5258 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5259 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5260 break; 5261 #endif /* INET6 */ 5262 default: 5263 unhandled_af(pd->af); 5264 } 5265 5266 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 5267 if (pf_status.debug >= LOG_NOTICE) { 5268 log(LOG_NOTICE, 5269 "pf: BAD ICMP %d:%d outer dst: ", 5270 icmptype, icmpcode); 5271 pf_print_host(pd->src, 0, pd->af); 5272 addlog(" -> "); 5273 pf_print_host(pd->dst, 0, pd->af); 5274 addlog(" inner src: "); 5275 pf_print_host(pd2.src, 0, pd2.af); 5276 addlog(" -> "); 5277 pf_print_host(pd2.dst, 0, pd2.af); 5278 addlog("\n"); 5279 } 5280 REASON_SET(reason, PFRES_BADSTATE); 5281 return (PF_DROP); 5282 } 5283 5284 switch (pd2.proto) { 5285 case IPPROTO_TCP: { 5286 struct tcphdr *th = &pd2.hdr.tcp; 5287 u_int32_t seq; 5288 struct pf_state_peer *src, *dst; 5289 u_int8_t dws; 5290 int action; 5291 5292 /* 5293 * Only the first 8 bytes of the TCP header can be 5294 * expected. Don't access any TCP header fields after 5295 * th_seq, an ackskew test is not possible. 5296 */ 5297 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason, 5298 pd2.af)) { 5299 DPFPRINTF(LOG_NOTICE, 5300 "ICMP error message too short (tcp)"); 5301 return (PF_DROP); 5302 } 5303 5304 key.af = pd2.af; 5305 key.proto = IPPROTO_TCP; 5306 key.rdomain = pd2.rdomain; 5307 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5308 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5309 key.port[pd2.sidx] = th->th_sport; 5310 key.port[pd2.didx] = th->th_dport; 5311 5312 action = pf_find_state(&pd2, &key, state); 5313 if (action != PF_MATCH) 5314 return (action); 5315 5316 if (pd2.dir == (*state)->direction) { 5317 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5318 src = &(*state)->src; 5319 dst = &(*state)->dst; 5320 } else { 5321 src = &(*state)->dst; 5322 dst = &(*state)->src; 5323 } 5324 } else { 5325 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5326 src = &(*state)->dst; 5327 dst = &(*state)->src; 5328 } else { 5329 src = &(*state)->src; 5330 dst = &(*state)->dst; 5331 } 5332 } 5333 5334 if (src->wscale && dst->wscale) 5335 dws = dst->wscale & PF_WSCALE_MASK; 5336 else 5337 dws = 0; 5338 5339 /* Demodulate sequence number */ 5340 seq = ntohl(th->th_seq) - src->seqdiff; 5341 if (src->seqdiff) { 5342 pf_patch_32(pd, &th->th_seq, htonl(seq)); 5343 copyback = 1; 5344 } 5345 5346 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5347 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5348 src->seqlo - (dst->max_win << dws)))) { 5349 if (pf_status.debug >= LOG_NOTICE) { 5350 log(LOG_NOTICE, 5351 "pf: BAD ICMP %d:%d ", 5352 icmptype, icmpcode); 5353 pf_print_host(pd->src, 0, pd->af); 5354 addlog(" -> "); 5355 pf_print_host(pd->dst, 0, pd->af); 5356 addlog(" state: "); 5357 pf_print_state(*state); 5358 addlog(" seq=%u\n", seq); 5359 } 5360 REASON_SET(reason, PFRES_BADSTATE); 5361 return (PF_DROP); 5362 } else { 5363 if (pf_status.debug >= LOG_DEBUG) { 5364 log(LOG_DEBUG, 5365 "pf: OK ICMP %d:%d ", 5366 icmptype, icmpcode); 5367 pf_print_host(pd->src, 0, pd->af); 5368 addlog(" -> "); 5369 pf_print_host(pd->dst, 0, pd->af); 5370 addlog(" state: "); 5371 pf_print_state(*state); 5372 addlog(" seq=%u\n", seq); 5373 } 5374 } 5375 5376 /* translate source/destination address, if necessary */ 5377 if ((*state)->key[PF_SK_WIRE] != 5378 (*state)->key[PF_SK_STACK]) { 5379 struct pf_state_key *nk; 5380 int afto, sidx, didx; 5381 5382 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5383 nk = (*state)->key[pd->sidx]; 5384 else 5385 nk = (*state)->key[pd->didx]; 5386 5387 afto = pd->af != nk->af; 5388 sidx = afto ? pd2.didx : pd2.sidx; 5389 didx = afto ? pd2.sidx : pd2.didx; 5390 5391 #ifdef INET6 5392 if (afto) { 5393 if (pf_translate_icmp_af(pd, nk->af, 5394 &pd->hdr.icmp)) 5395 return (PF_DROP); 5396 m_copyback(pd->m, pd->off, 5397 sizeof(struct icmp6_hdr), 5398 &pd->hdr.icmp6, M_NOWAIT); 5399 if (pf_change_icmp_af(pd->m, ipoff2, 5400 pd, &pd2, &nk->addr[sidx], 5401 &nk->addr[didx], pd->af, nk->af)) 5402 return (PF_DROP); 5403 if (nk->af == AF_INET) 5404 pd->proto = IPPROTO_ICMP; 5405 else 5406 pd->proto = IPPROTO_ICMPV6; 5407 pd->m->m_pkthdr.ph_rtableid = 5408 nk->rdomain; 5409 pd->destchg = 1; 5410 pf_addrcpy(&pd->nsaddr, 5411 &nk->addr[pd2.sidx], nk->af); 5412 pf_addrcpy(&pd->ndaddr, 5413 &nk->addr[pd2.didx], nk->af); 5414 pd->naf = nk->af; 5415 5416 pf_patch_16(pd, 5417 &th->th_sport, nk->port[sidx]); 5418 pf_patch_16(pd, 5419 &th->th_dport, nk->port[didx]); 5420 5421 m_copyback(pd2.m, pd2.off, 8, th, 5422 M_NOWAIT); 5423 return (PF_AFRT); 5424 } 5425 #endif /* INET6 */ 5426 if (PF_ANEQ(pd2.src, 5427 &nk->addr[pd2.sidx], pd2.af) || 5428 nk->port[pd2.sidx] != th->th_sport) 5429 pf_translate_icmp(pd, pd2.src, 5430 &th->th_sport, pd->dst, 5431 &nk->addr[pd2.sidx], 5432 nk->port[pd2.sidx]); 5433 5434 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5435 pd2.af) || pd2.rdomain != nk->rdomain) 5436 pd->destchg = 1; 5437 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5438 5439 if (PF_ANEQ(pd2.dst, 5440 &nk->addr[pd2.didx], pd2.af) || 5441 nk->port[pd2.didx] != th->th_dport) 5442 pf_translate_icmp(pd, pd2.dst, 5443 &th->th_dport, pd->src, 5444 &nk->addr[pd2.didx], 5445 nk->port[pd2.didx]); 5446 copyback = 1; 5447 } 5448 5449 if (copyback) { 5450 switch (pd2.af) { 5451 case AF_INET: 5452 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5453 &pd->hdr.icmp, M_NOWAIT); 5454 m_copyback(pd2.m, ipoff2, sizeof(h2), 5455 &h2, M_NOWAIT); 5456 break; 5457 #ifdef INET6 5458 case AF_INET6: 5459 m_copyback(pd->m, pd->off, 5460 sizeof(struct icmp6_hdr), 5461 &pd->hdr.icmp6, M_NOWAIT); 5462 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5463 &h2_6, M_NOWAIT); 5464 break; 5465 #endif /* INET6 */ 5466 } 5467 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 5468 } 5469 break; 5470 } 5471 case IPPROTO_UDP: { 5472 struct udphdr *uh = &pd2.hdr.udp; 5473 int action; 5474 5475 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 5476 NULL, reason, pd2.af)) { 5477 DPFPRINTF(LOG_NOTICE, 5478 "ICMP error message too short (udp)"); 5479 return (PF_DROP); 5480 } 5481 5482 key.af = pd2.af; 5483 key.proto = IPPROTO_UDP; 5484 key.rdomain = pd2.rdomain; 5485 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5486 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5487 key.port[pd2.sidx] = uh->uh_sport; 5488 key.port[pd2.didx] = uh->uh_dport; 5489 5490 action = pf_find_state(&pd2, &key, state); 5491 if (action != PF_MATCH) 5492 return (action); 5493 5494 /* translate source/destination address, if necessary */ 5495 if ((*state)->key[PF_SK_WIRE] != 5496 (*state)->key[PF_SK_STACK]) { 5497 struct pf_state_key *nk; 5498 int afto, sidx, didx; 5499 5500 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5501 nk = (*state)->key[pd->sidx]; 5502 else 5503 nk = (*state)->key[pd->didx]; 5504 5505 afto = pd->af != nk->af; 5506 sidx = afto ? pd2.didx : pd2.sidx; 5507 didx = afto ? pd2.sidx : pd2.didx; 5508 5509 #ifdef INET6 5510 if (afto) { 5511 if (pf_translate_icmp_af(pd, nk->af, 5512 &pd->hdr.icmp)) 5513 return (PF_DROP); 5514 m_copyback(pd->m, pd->off, 5515 sizeof(struct icmp6_hdr), 5516 &pd->hdr.icmp6, M_NOWAIT); 5517 if (pf_change_icmp_af(pd->m, ipoff2, 5518 pd, &pd2, &nk->addr[sidx], 5519 &nk->addr[didx], pd->af, nk->af)) 5520 return (PF_DROP); 5521 if (nk->af == AF_INET) 5522 pd->proto = IPPROTO_ICMP; 5523 else 5524 pd->proto = IPPROTO_ICMPV6; 5525 pd->m->m_pkthdr.ph_rtableid = 5526 nk->rdomain; 5527 pd->destchg = 1; 5528 pf_addrcpy(&pd->nsaddr, 5529 &nk->addr[pd2.sidx], nk->af); 5530 pf_addrcpy(&pd->ndaddr, 5531 &nk->addr[pd2.didx], nk->af); 5532 pd->naf = nk->af; 5533 5534 pf_patch_16(pd, 5535 &uh->uh_sport, nk->port[sidx]); 5536 pf_patch_16(pd, 5537 &uh->uh_dport, nk->port[didx]); 5538 5539 m_copyback(pd2.m, pd2.off, sizeof(*uh), 5540 uh, M_NOWAIT); 5541 return (PF_AFRT); 5542 } 5543 #endif /* INET6 */ 5544 5545 if (PF_ANEQ(pd2.src, 5546 &nk->addr[pd2.sidx], pd2.af) || 5547 nk->port[pd2.sidx] != uh->uh_sport) 5548 pf_translate_icmp(pd, pd2.src, 5549 &uh->uh_sport, pd->dst, 5550 &nk->addr[pd2.sidx], 5551 nk->port[pd2.sidx]); 5552 5553 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5554 pd2.af) || pd2.rdomain != nk->rdomain) 5555 pd->destchg = 1; 5556 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5557 5558 if (PF_ANEQ(pd2.dst, 5559 &nk->addr[pd2.didx], pd2.af) || 5560 nk->port[pd2.didx] != uh->uh_dport) 5561 pf_translate_icmp(pd, pd2.dst, 5562 &uh->uh_dport, pd->src, 5563 &nk->addr[pd2.didx], 5564 nk->port[pd2.didx]); 5565 5566 switch (pd2.af) { 5567 case AF_INET: 5568 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5569 &pd->hdr.icmp, M_NOWAIT); 5570 m_copyback(pd2.m, ipoff2, sizeof(h2), 5571 &h2, M_NOWAIT); 5572 break; 5573 #ifdef INET6 5574 case AF_INET6: 5575 m_copyback(pd->m, pd->off, 5576 sizeof(struct icmp6_hdr), 5577 &pd->hdr.icmp6, M_NOWAIT); 5578 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5579 &h2_6, M_NOWAIT); 5580 break; 5581 #endif /* INET6 */ 5582 } 5583 /* Avoid recomputing quoted UDP checksum. 5584 * note: udp6 0 csum invalid per rfc2460 p27. 5585 * but presumed nothing cares in this context */ 5586 pf_patch_16(pd, &uh->uh_sum, 0); 5587 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 5588 M_NOWAIT); 5589 copyback = 1; 5590 } 5591 break; 5592 } 5593 case IPPROTO_ICMP: { 5594 struct icmp *iih = &pd2.hdr.icmp; 5595 5596 if (pd2.af != AF_INET) { 5597 REASON_SET(reason, PFRES_NORM); 5598 return (PF_DROP); 5599 } 5600 5601 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 5602 NULL, reason, pd2.af)) { 5603 DPFPRINTF(LOG_NOTICE, 5604 "ICMP error message too short (icmp)"); 5605 return (PF_DROP); 5606 } 5607 5608 pf_icmp_mapping(&pd2, iih->icmp_type, 5609 &icmp_dir, &virtual_id, &virtual_type); 5610 5611 ret = pf_icmp_state_lookup(&pd2, &key, state, 5612 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5613 if (ret >= 0) 5614 return (ret); 5615 5616 /* translate source/destination address, if necessary */ 5617 if ((*state)->key[PF_SK_WIRE] != 5618 (*state)->key[PF_SK_STACK]) { 5619 struct pf_state_key *nk; 5620 int afto, sidx, didx; 5621 5622 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5623 nk = (*state)->key[pd->sidx]; 5624 else 5625 nk = (*state)->key[pd->didx]; 5626 5627 afto = pd->af != nk->af; 5628 sidx = afto ? pd2.didx : pd2.sidx; 5629 didx = afto ? pd2.sidx : pd2.didx; 5630 iidx = afto ? !iidx : iidx; 5631 5632 #ifdef INET6 5633 if (afto) { 5634 if (nk->af != AF_INET6) 5635 return (PF_DROP); 5636 if (pf_translate_icmp_af(pd, nk->af, 5637 &pd->hdr.icmp)) 5638 return (PF_DROP); 5639 m_copyback(pd->m, pd->off, 5640 sizeof(struct icmp6_hdr), 5641 &pd->hdr.icmp6, M_NOWAIT); 5642 if (pf_change_icmp_af(pd->m, ipoff2, 5643 pd, &pd2, &nk->addr[sidx], 5644 &nk->addr[didx], pd->af, nk->af)) 5645 return (PF_DROP); 5646 pd->proto = IPPROTO_ICMPV6; 5647 if (pf_translate_icmp_af(pd, 5648 nk->af, iih)) 5649 return (PF_DROP); 5650 if (virtual_type == htons(ICMP_ECHO)) 5651 pf_patch_16(pd, &iih->icmp_id, 5652 nk->port[iidx]); 5653 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5654 iih, M_NOWAIT); 5655 pd->m->m_pkthdr.ph_rtableid = 5656 nk->rdomain; 5657 pd->destchg = 1; 5658 pf_addrcpy(&pd->nsaddr, 5659 &nk->addr[pd2.sidx], nk->af); 5660 pf_addrcpy(&pd->ndaddr, 5661 &nk->addr[pd2.didx], nk->af); 5662 pd->naf = nk->af; 5663 return (PF_AFRT); 5664 } 5665 #endif /* INET6 */ 5666 5667 if (PF_ANEQ(pd2.src, 5668 &nk->addr[pd2.sidx], pd2.af) || 5669 (virtual_type == htons(ICMP_ECHO) && 5670 nk->port[iidx] != iih->icmp_id)) 5671 pf_translate_icmp(pd, pd2.src, 5672 (virtual_type == htons(ICMP_ECHO)) ? 5673 &iih->icmp_id : NULL, 5674 pd->dst, &nk->addr[pd2.sidx], 5675 (virtual_type == htons(ICMP_ECHO)) ? 5676 nk->port[iidx] : 0); 5677 5678 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5679 pd2.af) || pd2.rdomain != nk->rdomain) 5680 pd->destchg = 1; 5681 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5682 5683 if (PF_ANEQ(pd2.dst, 5684 &nk->addr[pd2.didx], pd2.af)) 5685 pf_translate_icmp(pd, pd2.dst, NULL, 5686 pd->src, &nk->addr[pd2.didx], 0); 5687 5688 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5689 &pd->hdr.icmp, M_NOWAIT); 5690 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5691 M_NOWAIT); 5692 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 5693 M_NOWAIT); 5694 copyback = 1; 5695 } 5696 break; 5697 } 5698 #ifdef INET6 5699 case IPPROTO_ICMPV6: { 5700 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 5701 5702 if (pd2.af != AF_INET6) { 5703 REASON_SET(reason, PFRES_NORM); 5704 return (PF_DROP); 5705 } 5706 5707 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 5708 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5709 DPFPRINTF(LOG_NOTICE, 5710 "ICMP error message too short (icmp6)"); 5711 return (PF_DROP); 5712 } 5713 5714 pf_icmp_mapping(&pd2, iih->icmp6_type, 5715 &icmp_dir, &virtual_id, &virtual_type); 5716 ret = pf_icmp_state_lookup(&pd2, &key, state, 5717 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5718 /* IPv6? try matching a multicast address */ 5719 if (ret == PF_DROP && pd2.af == AF_INET6 && 5720 icmp_dir == PF_OUT) 5721 ret = pf_icmp_state_lookup(&pd2, &key, state, 5722 virtual_id, virtual_type, icmp_dir, &iidx, 5723 1, 1); 5724 if (ret >= 0) 5725 return (ret); 5726 5727 /* translate source/destination address, if necessary */ 5728 if ((*state)->key[PF_SK_WIRE] != 5729 (*state)->key[PF_SK_STACK]) { 5730 struct pf_state_key *nk; 5731 int afto, sidx, didx; 5732 5733 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5734 nk = (*state)->key[pd->sidx]; 5735 else 5736 nk = (*state)->key[pd->didx]; 5737 5738 afto = pd->af != nk->af; 5739 sidx = afto ? pd2.didx : pd2.sidx; 5740 didx = afto ? pd2.sidx : pd2.didx; 5741 iidx = afto ? !iidx : iidx; 5742 5743 if (afto) { 5744 if (nk->af != AF_INET) 5745 return (PF_DROP); 5746 if (pf_translate_icmp_af(pd, nk->af, 5747 &pd->hdr.icmp)) 5748 return (PF_DROP); 5749 m_copyback(pd->m, pd->off, 5750 sizeof(struct icmp6_hdr), 5751 &pd->hdr.icmp6, M_NOWAIT); 5752 if (pf_change_icmp_af(pd->m, ipoff2, 5753 pd, &pd2, &nk->addr[sidx], 5754 &nk->addr[didx], pd->af, nk->af)) 5755 return (PF_DROP); 5756 pd->proto = IPPROTO_ICMP; 5757 if (pf_translate_icmp_af(pd, 5758 nk->af, iih)) 5759 return (PF_DROP); 5760 if (virtual_type == 5761 htons(ICMP6_ECHO_REQUEST)) 5762 pf_patch_16(pd, &iih->icmp6_id, 5763 nk->port[iidx]); 5764 m_copyback(pd2.m, pd2.off, 5765 sizeof(struct icmp6_hdr), iih, 5766 M_NOWAIT); 5767 pd->m->m_pkthdr.ph_rtableid = 5768 nk->rdomain; 5769 pd->destchg = 1; 5770 pf_addrcpy(&pd->nsaddr, 5771 &nk->addr[pd2.sidx], nk->af); 5772 pf_addrcpy(&pd->ndaddr, 5773 &nk->addr[pd2.didx], nk->af); 5774 pd->naf = nk->af; 5775 return (PF_AFRT); 5776 } 5777 5778 if (PF_ANEQ(pd2.src, 5779 &nk->addr[pd2.sidx], pd2.af) || 5780 ((virtual_type == 5781 htons(ICMP6_ECHO_REQUEST)) && 5782 nk->port[pd2.sidx] != iih->icmp6_id)) 5783 pf_translate_icmp(pd, pd2.src, 5784 (virtual_type == 5785 htons(ICMP6_ECHO_REQUEST)) 5786 ? &iih->icmp6_id : NULL, 5787 pd->dst, &nk->addr[pd2.sidx], 5788 (virtual_type == 5789 htons(ICMP6_ECHO_REQUEST)) 5790 ? nk->port[iidx] : 0); 5791 5792 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5793 pd2.af) || pd2.rdomain != nk->rdomain) 5794 pd->destchg = 1; 5795 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5796 5797 if (PF_ANEQ(pd2.dst, 5798 &nk->addr[pd2.didx], pd2.af)) 5799 pf_translate_icmp(pd, pd2.dst, NULL, 5800 pd->src, &nk->addr[pd2.didx], 0); 5801 5802 m_copyback(pd->m, pd->off, 5803 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5804 M_NOWAIT); 5805 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5806 M_NOWAIT); 5807 m_copyback(pd2.m, pd2.off, 5808 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 5809 copyback = 1; 5810 } 5811 break; 5812 } 5813 #endif /* INET6 */ 5814 default: { 5815 int action; 5816 5817 key.af = pd2.af; 5818 key.proto = pd2.proto; 5819 key.rdomain = pd2.rdomain; 5820 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5821 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5822 key.port[0] = key.port[1] = 0; 5823 5824 action = pf_find_state(&pd2, &key, state); 5825 if (action != PF_MATCH) 5826 return (action); 5827 5828 /* translate source/destination address, if necessary */ 5829 if ((*state)->key[PF_SK_WIRE] != 5830 (*state)->key[PF_SK_STACK]) { 5831 struct pf_state_key *nk = 5832 (*state)->key[pd->didx]; 5833 5834 if (PF_ANEQ(pd2.src, 5835 &nk->addr[pd2.sidx], pd2.af)) 5836 pf_translate_icmp(pd, pd2.src, NULL, 5837 pd->dst, &nk->addr[pd2.sidx], 0); 5838 5839 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5840 pd2.af) || pd2.rdomain != nk->rdomain) 5841 pd->destchg = 1; 5842 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5843 5844 if (PF_ANEQ(pd2.dst, 5845 &nk->addr[pd2.didx], pd2.af)) 5846 pf_translate_icmp(pd, pd2.dst, NULL, 5847 pd->src, &nk->addr[pd2.didx], 0); 5848 5849 switch (pd2.af) { 5850 case AF_INET: 5851 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5852 &pd->hdr.icmp, M_NOWAIT); 5853 m_copyback(pd2.m, ipoff2, sizeof(h2), 5854 &h2, M_NOWAIT); 5855 break; 5856 #ifdef INET6 5857 case AF_INET6: 5858 m_copyback(pd->m, pd->off, 5859 sizeof(struct icmp6_hdr), 5860 &pd->hdr.icmp6, M_NOWAIT); 5861 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5862 &h2_6, M_NOWAIT); 5863 break; 5864 #endif /* INET6 */ 5865 } 5866 copyback = 1; 5867 } 5868 break; 5869 } 5870 } 5871 } 5872 if (copyback) { 5873 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5874 } 5875 5876 return (PF_PASS); 5877 } 5878 5879 /* 5880 * ipoff and off are measured from the start of the mbuf chain. 5881 * h must be at "ipoff" on the mbuf chain. 5882 */ 5883 void * 5884 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5885 u_short *actionp, u_short *reasonp, sa_family_t af) 5886 { 5887 int iplen = 0; 5888 5889 switch (af) { 5890 case AF_INET: { 5891 struct ip *h = mtod(m, struct ip *); 5892 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5893 5894 if (fragoff) { 5895 if (fragoff >= len) 5896 ACTION_SET(actionp, PF_PASS); 5897 else { 5898 ACTION_SET(actionp, PF_DROP); 5899 REASON_SET(reasonp, PFRES_FRAG); 5900 } 5901 return (NULL); 5902 } 5903 iplen = ntohs(h->ip_len); 5904 break; 5905 } 5906 #ifdef INET6 5907 case AF_INET6: { 5908 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5909 5910 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5911 break; 5912 } 5913 #endif /* INET6 */ 5914 } 5915 if (m->m_pkthdr.len < off + len || iplen < off + len) { 5916 ACTION_SET(actionp, PF_DROP); 5917 REASON_SET(reasonp, PFRES_SHORT); 5918 return (NULL); 5919 } 5920 m_copydata(m, off, len, p); 5921 return (p); 5922 } 5923 5924 int 5925 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5926 int rtableid) 5927 { 5928 struct sockaddr_storage ss; 5929 struct sockaddr_in *dst; 5930 int ret = 1; 5931 int check_mpath; 5932 #ifdef INET6 5933 struct sockaddr_in6 *dst6; 5934 #endif /* INET6 */ 5935 struct rtentry *rt = NULL; 5936 5937 check_mpath = 0; 5938 memset(&ss, 0, sizeof(ss)); 5939 switch (af) { 5940 case AF_INET: 5941 dst = (struct sockaddr_in *)&ss; 5942 dst->sin_family = AF_INET; 5943 dst->sin_len = sizeof(*dst); 5944 dst->sin_addr = addr->v4; 5945 if (ipmultipath) 5946 check_mpath = 1; 5947 break; 5948 #ifdef INET6 5949 case AF_INET6: 5950 /* 5951 * Skip check for addresses with embedded interface scope, 5952 * as they would always match anyway. 5953 */ 5954 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5955 goto out; 5956 dst6 = (struct sockaddr_in6 *)&ss; 5957 dst6->sin6_family = AF_INET6; 5958 dst6->sin6_len = sizeof(*dst6); 5959 dst6->sin6_addr = addr->v6; 5960 if (ip6_multipath) 5961 check_mpath = 1; 5962 break; 5963 #endif /* INET6 */ 5964 } 5965 5966 /* Skip checks for ipsec interfaces */ 5967 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5968 goto out; 5969 5970 rt = rtalloc(sstosa(&ss), 0, rtableid); 5971 if (rt != NULL) { 5972 /* No interface given, this is a no-route check */ 5973 if (kif == NULL) 5974 goto out; 5975 5976 if (kif->pfik_ifp == NULL) { 5977 ret = 0; 5978 goto out; 5979 } 5980 5981 /* Perform uRPF check if passed input interface */ 5982 ret = 0; 5983 do { 5984 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 5985 ret = 1; 5986 #if NCARP > 0 5987 } else { 5988 struct ifnet *ifp; 5989 5990 ifp = if_get(rt->rt_ifidx); 5991 if (ifp != NULL && ifp->if_type == IFT_CARP && 5992 ifp->if_carpdevidx == 5993 kif->pfik_ifp->if_index) 5994 ret = 1; 5995 if_put(ifp); 5996 #endif /* NCARP */ 5997 } 5998 5999 rt = rtable_iterate(rt); 6000 } while (check_mpath == 1 && rt != NULL && ret == 0); 6001 } else 6002 ret = 0; 6003 out: 6004 rtfree(rt); 6005 return (ret); 6006 } 6007 6008 int 6009 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 6010 int rtableid) 6011 { 6012 struct sockaddr_storage ss; 6013 struct sockaddr_in *dst; 6014 #ifdef INET6 6015 struct sockaddr_in6 *dst6; 6016 #endif /* INET6 */ 6017 struct rtentry *rt; 6018 int ret = 0; 6019 6020 memset(&ss, 0, sizeof(ss)); 6021 switch (af) { 6022 case AF_INET: 6023 dst = (struct sockaddr_in *)&ss; 6024 dst->sin_family = AF_INET; 6025 dst->sin_len = sizeof(*dst); 6026 dst->sin_addr = addr->v4; 6027 break; 6028 #ifdef INET6 6029 case AF_INET6: 6030 dst6 = (struct sockaddr_in6 *)&ss; 6031 dst6->sin6_family = AF_INET6; 6032 dst6->sin6_len = sizeof(*dst6); 6033 dst6->sin6_addr = addr->v6; 6034 break; 6035 #endif /* INET6 */ 6036 } 6037 6038 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 6039 if (rt != NULL) { 6040 if (rt->rt_labelid == aw->v.rtlabel) 6041 ret = 1; 6042 rtfree(rt); 6043 } 6044 6045 return (ret); 6046 } 6047 6048 /* pf_route() may change pd->m, adjust local copies after calling */ 6049 void 6050 pf_route(struct pf_pdesc *pd, struct pf_state *s) 6051 { 6052 struct mbuf *m0; 6053 struct mbuf_list fml; 6054 struct sockaddr_in *dst, sin; 6055 struct rtentry *rt = NULL; 6056 struct ip *ip; 6057 struct ifnet *ifp = NULL; 6058 int error = 0; 6059 unsigned int rtableid; 6060 6061 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6062 m_freem(pd->m); 6063 pd->m = NULL; 6064 return; 6065 } 6066 6067 if (s->rt == PF_DUPTO) { 6068 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6069 return; 6070 } else { 6071 if ((s->rt == PF_REPLYTO) == (s->direction == pd->dir)) 6072 return; 6073 m0 = pd->m; 6074 pd->m = NULL; 6075 } 6076 6077 if (m0->m_len < sizeof(struct ip)) { 6078 DPFPRINTF(LOG_ERR, 6079 "%s: m0->m_len < sizeof(struct ip)", __func__); 6080 goto bad; 6081 } 6082 6083 ip = mtod(m0, struct ip *); 6084 6085 if (pd->dir == PF_IN) { 6086 if (ip->ip_ttl <= IPTTLDEC) { 6087 if (s->rt != PF_DUPTO) { 6088 pf_send_icmp(m0, ICMP_TIMXCEED, 6089 ICMP_TIMXCEED_INTRANS, 0, 6090 pd->af, s->rule.ptr, pd->rdomain); 6091 } 6092 goto bad; 6093 } 6094 ip->ip_ttl -= IPTTLDEC; 6095 } 6096 6097 memset(&sin, 0, sizeof(sin)); 6098 dst = &sin; 6099 dst->sin_family = AF_INET; 6100 dst->sin_len = sizeof(*dst); 6101 dst->sin_addr = s->rt_addr.v4; 6102 rtableid = m0->m_pkthdr.ph_rtableid; 6103 6104 rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid); 6105 if (!rtisvalid(rt)) { 6106 if (s->rt != PF_DUPTO) { 6107 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST, 6108 0, pd->af, s->rule.ptr, pd->rdomain); 6109 } 6110 ipstat_inc(ips_noroute); 6111 goto bad; 6112 } 6113 6114 ifp = if_get(rt->rt_ifidx); 6115 if (ifp == NULL) 6116 goto bad; 6117 6118 /* A locally generated packet may have invalid source address. */ 6119 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 6120 (ifp->if_flags & IFF_LOOPBACK) == 0) 6121 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 6122 6123 if (s->rt != PF_DUPTO && pd->dir == PF_IN) { 6124 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 6125 goto bad; 6126 else if (m0 == NULL) 6127 goto done; 6128 if (m0->m_len < sizeof(struct ip)) { 6129 DPFPRINTF(LOG_ERR, 6130 "%s: m0->m_len < sizeof(struct ip)", __func__); 6131 goto bad; 6132 } 6133 ip = mtod(m0, struct ip *); 6134 } 6135 6136 in_proto_cksum_out(m0, ifp); 6137 6138 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 6139 ip->ip_sum = 0; 6140 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) 6141 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 6142 else { 6143 ipstat_inc(ips_outswcsum); 6144 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6145 } 6146 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6147 goto done; 6148 } 6149 6150 /* 6151 * Too large for interface; fragment if possible. 6152 * Must be able to put at least 8 bytes per fragment. 6153 */ 6154 if (ip->ip_off & htons(IP_DF)) { 6155 ipstat_inc(ips_cantfrag); 6156 if (s->rt != PF_DUPTO) 6157 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 6158 ifp->if_mtu, pd->af, s->rule.ptr, pd->rdomain); 6159 goto bad; 6160 } 6161 6162 error = ip_fragment(m0, &fml, ifp, ifp->if_mtu); 6163 if (error) 6164 goto done; 6165 6166 while ((m0 = ml_dequeue(&fml)) != NULL) { 6167 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6168 if (error) 6169 break; 6170 } 6171 if (error) 6172 ml_purge(&fml); 6173 else 6174 ipstat_inc(ips_fragmented); 6175 6176 done: 6177 if_put(ifp); 6178 rtfree(rt); 6179 return; 6180 6181 bad: 6182 m_freem(m0); 6183 goto done; 6184 } 6185 6186 #ifdef INET6 6187 /* pf_route6() may change pd->m, adjust local copies after calling */ 6188 void 6189 pf_route6(struct pf_pdesc *pd, struct pf_state *s) 6190 { 6191 struct mbuf *m0; 6192 struct sockaddr_in6 *dst, sin6; 6193 struct rtentry *rt = NULL; 6194 struct ip6_hdr *ip6; 6195 struct ifnet *ifp = NULL; 6196 struct m_tag *mtag; 6197 unsigned int rtableid; 6198 6199 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6200 m_freem(pd->m); 6201 pd->m = NULL; 6202 return; 6203 } 6204 6205 if (s->rt == PF_DUPTO) { 6206 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6207 return; 6208 } else { 6209 if ((s->rt == PF_REPLYTO) == (s->direction == pd->dir)) 6210 return; 6211 m0 = pd->m; 6212 pd->m = NULL; 6213 } 6214 6215 if (m0->m_len < sizeof(struct ip6_hdr)) { 6216 DPFPRINTF(LOG_ERR, 6217 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6218 goto bad; 6219 } 6220 ip6 = mtod(m0, struct ip6_hdr *); 6221 6222 if (pd->dir == PF_IN) { 6223 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 6224 if (s->rt != PF_DUPTO) { 6225 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 6226 ICMP6_TIME_EXCEED_TRANSIT, 0, 6227 pd->af, s->rule.ptr, pd->rdomain); 6228 } 6229 goto bad; 6230 } 6231 ip6->ip6_hlim -= IPV6_HLIMDEC; 6232 } 6233 6234 memset(&sin6, 0, sizeof(sin6)); 6235 dst = &sin6; 6236 dst->sin6_family = AF_INET6; 6237 dst->sin6_len = sizeof(*dst); 6238 dst->sin6_addr = s->rt_addr.v6; 6239 rtableid = m0->m_pkthdr.ph_rtableid; 6240 6241 rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0], 6242 rtableid); 6243 if (!rtisvalid(rt)) { 6244 if (s->rt != PF_DUPTO) { 6245 pf_send_icmp(m0, ICMP6_DST_UNREACH, 6246 ICMP6_DST_UNREACH_NOROUTE, 0, 6247 pd->af, s->rule.ptr, pd->rdomain); 6248 } 6249 ip6stat_inc(ip6s_noroute); 6250 goto bad; 6251 } 6252 6253 ifp = if_get(rt->rt_ifidx); 6254 if (ifp == NULL) 6255 goto bad; 6256 6257 /* A locally generated packet may have invalid source address. */ 6258 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 6259 (ifp->if_flags & IFF_LOOPBACK) == 0) 6260 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 6261 6262 if (s->rt != PF_DUPTO && pd->dir == PF_IN) { 6263 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6264 goto bad; 6265 else if (m0 == NULL) 6266 goto done; 6267 if (m0->m_len < sizeof(struct ip6_hdr)) { 6268 DPFPRINTF(LOG_ERR, 6269 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6270 goto bad; 6271 } 6272 } 6273 6274 in6_proto_cksum_out(m0, ifp); 6275 6276 /* 6277 * If packet has been reassembled by PF earlier, we have to 6278 * use pf_refragment6() here to turn it back to fragments. 6279 */ 6280 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6281 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 6282 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6283 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 6284 } else { 6285 ip6stat_inc(ip6s_cantfrag); 6286 if (s->rt != PF_DUPTO) 6287 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 6288 ifp->if_mtu, pd->af, s->rule.ptr, pd->rdomain); 6289 goto bad; 6290 } 6291 6292 done: 6293 if_put(ifp); 6294 rtfree(rt); 6295 return; 6296 6297 bad: 6298 m_freem(m0); 6299 goto done; 6300 } 6301 #endif /* INET6 */ 6302 6303 /* 6304 * check TCP checksum and set mbuf flag 6305 * off is the offset where the protocol header starts 6306 * len is the total length of protocol header plus payload 6307 * returns 0 when the checksum is valid, otherwise returns 1. 6308 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6309 */ 6310 int 6311 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6312 { 6313 u_int16_t sum; 6314 6315 if (m->m_pkthdr.csum_flags & 6316 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6317 return (0); 6318 } 6319 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6320 off < sizeof(struct ip) || 6321 m->m_pkthdr.len < off + len) { 6322 return (1); 6323 } 6324 6325 /* need to do it in software */ 6326 tcpstat_inc(tcps_inswcsum); 6327 6328 switch (af) { 6329 case AF_INET: 6330 if (m->m_len < sizeof(struct ip)) 6331 return (1); 6332 6333 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6334 break; 6335 #ifdef INET6 6336 case AF_INET6: 6337 if (m->m_len < sizeof(struct ip6_hdr)) 6338 return (1); 6339 6340 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6341 break; 6342 #endif /* INET6 */ 6343 default: 6344 unhandled_af(af); 6345 } 6346 if (sum) { 6347 tcpstat_inc(tcps_rcvbadsum); 6348 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6349 return (1); 6350 } 6351 6352 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6353 return (0); 6354 } 6355 6356 struct pf_divert * 6357 pf_find_divert(struct mbuf *m) 6358 { 6359 struct m_tag *mtag; 6360 6361 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6362 return (NULL); 6363 6364 return ((struct pf_divert *)(mtag + 1)); 6365 } 6366 6367 struct pf_divert * 6368 pf_get_divert(struct mbuf *m) 6369 { 6370 struct m_tag *mtag; 6371 6372 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6373 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6374 M_NOWAIT); 6375 if (mtag == NULL) 6376 return (NULL); 6377 memset(mtag + 1, 0, sizeof(struct pf_divert)); 6378 m_tag_prepend(m, mtag); 6379 } 6380 6381 return ((struct pf_divert *)(mtag + 1)); 6382 } 6383 6384 int 6385 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 6386 { 6387 struct ip6_ext ext; 6388 u_int32_t hlen, end; 6389 int hdr_cnt; 6390 6391 hlen = h->ip_hl << 2; 6392 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 6393 REASON_SET(reason, PFRES_SHORT); 6394 return (PF_DROP); 6395 } 6396 if (hlen != sizeof(struct ip)) 6397 pd->badopts++; 6398 end = pd->off + ntohs(h->ip_len); 6399 pd->off += hlen; 6400 pd->proto = h->ip_p; 6401 /* stop walking over non initial fragments */ 6402 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 6403 return (PF_PASS); 6404 6405 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6406 switch (pd->proto) { 6407 case IPPROTO_AH: 6408 /* fragments may be short */ 6409 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 6410 end < pd->off + sizeof(ext)) 6411 return (PF_PASS); 6412 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6413 NULL, reason, AF_INET)) { 6414 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 6415 return (PF_DROP); 6416 } 6417 pd->off += (ext.ip6e_len + 2) * 4; 6418 pd->proto = ext.ip6e_nxt; 6419 break; 6420 default: 6421 return (PF_PASS); 6422 } 6423 } 6424 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 6425 REASON_SET(reason, PFRES_IPOPTIONS); 6426 return (PF_DROP); 6427 } 6428 6429 #ifdef INET6 6430 int 6431 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6432 u_short *reason) 6433 { 6434 struct ip6_opt opt; 6435 struct ip6_opt_jumbo jumbo; 6436 6437 while (off < end) { 6438 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6439 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6440 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6441 return (PF_DROP); 6442 } 6443 if (opt.ip6o_type == IP6OPT_PAD1) { 6444 off++; 6445 continue; 6446 } 6447 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6448 NULL, reason, AF_INET6)) { 6449 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6450 return (PF_DROP); 6451 } 6452 if (off + sizeof(opt) + opt.ip6o_len > end) { 6453 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6454 REASON_SET(reason, PFRES_IPOPTIONS); 6455 return (PF_DROP); 6456 } 6457 switch (opt.ip6o_type) { 6458 case IP6OPT_JUMBO: 6459 if (pd->jumbolen != 0) { 6460 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6461 REASON_SET(reason, PFRES_IPOPTIONS); 6462 return (PF_DROP); 6463 } 6464 if (ntohs(h->ip6_plen) != 0) { 6465 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6466 REASON_SET(reason, PFRES_IPOPTIONS); 6467 return (PF_DROP); 6468 } 6469 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6470 NULL, reason, AF_INET6)) { 6471 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6472 return (PF_DROP); 6473 } 6474 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6475 sizeof(pd->jumbolen)); 6476 pd->jumbolen = ntohl(pd->jumbolen); 6477 if (pd->jumbolen < IPV6_MAXPACKET) { 6478 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6479 REASON_SET(reason, PFRES_IPOPTIONS); 6480 return (PF_DROP); 6481 } 6482 break; 6483 default: 6484 break; 6485 } 6486 off += sizeof(opt) + opt.ip6o_len; 6487 } 6488 6489 return (PF_PASS); 6490 } 6491 6492 int 6493 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6494 { 6495 struct ip6_frag frag; 6496 struct ip6_ext ext; 6497 struct ip6_rthdr rthdr; 6498 u_int32_t end; 6499 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 6500 6501 pd->off += sizeof(struct ip6_hdr); 6502 end = pd->off + ntohs(h->ip6_plen); 6503 pd->fragoff = pd->extoff = pd->jumbolen = 0; 6504 pd->proto = h->ip6_nxt; 6505 6506 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6507 switch (pd->proto) { 6508 case IPPROTO_ROUTING: 6509 case IPPROTO_HOPOPTS: 6510 case IPPROTO_DSTOPTS: 6511 pd->badopts++; 6512 break; 6513 } 6514 switch (pd->proto) { 6515 case IPPROTO_FRAGMENT: 6516 if (fraghdr_cnt++) { 6517 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 6518 REASON_SET(reason, PFRES_FRAG); 6519 return (PF_DROP); 6520 } 6521 /* jumbo payload packets cannot be fragmented */ 6522 if (pd->jumbolen != 0) { 6523 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6524 REASON_SET(reason, PFRES_FRAG); 6525 return (PF_DROP); 6526 } 6527 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6528 NULL, reason, AF_INET6)) { 6529 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6530 return (PF_DROP); 6531 } 6532 /* stop walking over non initial fragments */ 6533 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 6534 pd->fragoff = pd->off; 6535 return (PF_PASS); 6536 } 6537 /* RFC6946: reassemble only non atomic fragments */ 6538 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 6539 pd->fragoff = pd->off; 6540 pd->off += sizeof(frag); 6541 pd->proto = frag.ip6f_nxt; 6542 break; 6543 case IPPROTO_ROUTING: 6544 if (rthdr_cnt++) { 6545 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6546 REASON_SET(reason, PFRES_IPOPTIONS); 6547 return (PF_DROP); 6548 } 6549 /* fragments may be short */ 6550 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6551 pd->off = pd->fragoff; 6552 pd->proto = IPPROTO_FRAGMENT; 6553 return (PF_PASS); 6554 } 6555 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6556 NULL, reason, AF_INET6)) { 6557 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6558 return (PF_DROP); 6559 } 6560 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6561 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6562 REASON_SET(reason, PFRES_IPOPTIONS); 6563 return (PF_DROP); 6564 } 6565 /* FALLTHROUGH */ 6566 case IPPROTO_HOPOPTS: 6567 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 6568 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 6569 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 6570 REASON_SET(reason, PFRES_IPOPTIONS); 6571 return (PF_DROP); 6572 } 6573 /* FALLTHROUGH */ 6574 case IPPROTO_AH: 6575 case IPPROTO_DSTOPTS: 6576 /* fragments may be short */ 6577 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6578 pd->off = pd->fragoff; 6579 pd->proto = IPPROTO_FRAGMENT; 6580 return (PF_PASS); 6581 } 6582 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6583 NULL, reason, AF_INET6)) { 6584 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6585 return (PF_DROP); 6586 } 6587 /* reassembly needs the ext header before the frag */ 6588 if (pd->fragoff == 0) 6589 pd->extoff = pd->off; 6590 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { 6591 if (pf_walk_option6(pd, h, 6592 pd->off + sizeof(ext), 6593 pd->off + (ext.ip6e_len + 1) * 8, reason) 6594 != PF_PASS) 6595 return (PF_DROP); 6596 if (ntohs(h->ip6_plen) == 0 && 6597 pd->jumbolen != 0) { 6598 DPFPRINTF(LOG_NOTICE, 6599 "IPv6 missing jumbo"); 6600 REASON_SET(reason, PFRES_IPOPTIONS); 6601 return (PF_DROP); 6602 } 6603 } 6604 if (pd->proto == IPPROTO_AH) 6605 pd->off += (ext.ip6e_len + 2) * 4; 6606 else 6607 pd->off += (ext.ip6e_len + 1) * 8; 6608 pd->proto = ext.ip6e_nxt; 6609 break; 6610 case IPPROTO_TCP: 6611 case IPPROTO_UDP: 6612 case IPPROTO_ICMPV6: 6613 /* fragments may be short, ignore inner header then */ 6614 if (pd->fragoff != 0 && end < pd->off + 6615 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6616 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6617 sizeof(struct icmp6_hdr))) { 6618 pd->off = pd->fragoff; 6619 pd->proto = IPPROTO_FRAGMENT; 6620 } 6621 /* FALLTHROUGH */ 6622 default: 6623 return (PF_PASS); 6624 } 6625 } 6626 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 6627 REASON_SET(reason, PFRES_IPOPTIONS); 6628 return (PF_DROP); 6629 } 6630 #endif /* INET6 */ 6631 6632 int 6633 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 6634 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6635 { 6636 memset(pd, 0, sizeof(*pd)); 6637 pd->dir = dir; 6638 pd->kif = kif; /* kif is NULL when called by pflog */ 6639 pd->m = m; 6640 pd->sidx = (dir == PF_IN) ? 0 : 1; 6641 pd->didx = (dir == PF_IN) ? 1 : 0; 6642 pd->af = pd->naf = af; 6643 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 6644 6645 switch (pd->af) { 6646 case AF_INET: { 6647 struct ip *h; 6648 6649 /* Check for illegal packets */ 6650 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6651 REASON_SET(reason, PFRES_SHORT); 6652 return (PF_DROP); 6653 } 6654 6655 h = mtod(pd->m, struct ip *); 6656 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6657 REASON_SET(reason, PFRES_SHORT); 6658 return (PF_DROP); 6659 } 6660 6661 if (pf_walk_header(pd, h, reason) != PF_PASS) 6662 return (PF_DROP); 6663 6664 pd->src = (struct pf_addr *)&h->ip_src; 6665 pd->dst = (struct pf_addr *)&h->ip_dst; 6666 pd->tot_len = ntohs(h->ip_len); 6667 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6668 pd->ttl = h->ip_ttl; 6669 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 6670 PF_VPROTO_FRAGMENT : pd->proto; 6671 6672 break; 6673 } 6674 #ifdef INET6 6675 case AF_INET6: { 6676 struct ip6_hdr *h; 6677 6678 /* Check for illegal packets */ 6679 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6680 REASON_SET(reason, PFRES_SHORT); 6681 return (PF_DROP); 6682 } 6683 6684 h = mtod(pd->m, struct ip6_hdr *); 6685 if (pd->m->m_pkthdr.len < 6686 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6687 REASON_SET(reason, PFRES_SHORT); 6688 return (PF_DROP); 6689 } 6690 6691 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6692 return (PF_DROP); 6693 6694 #if 1 6695 /* 6696 * we do not support jumbogram yet. if we keep going, zero 6697 * ip6_plen will do something bad, so drop the packet for now. 6698 */ 6699 if (pd->jumbolen != 0) { 6700 REASON_SET(reason, PFRES_NORM); 6701 return (PF_DROP); 6702 } 6703 #endif /* 1 */ 6704 6705 pd->src = (struct pf_addr *)&h->ip6_src; 6706 pd->dst = (struct pf_addr *)&h->ip6_dst; 6707 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6708 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 6709 pd->ttl = h->ip6_hlim; 6710 pd->virtual_proto = (pd->fragoff != 0) ? 6711 PF_VPROTO_FRAGMENT : pd->proto; 6712 6713 break; 6714 } 6715 #endif /* INET6 */ 6716 default: 6717 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6718 6719 } 6720 6721 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6722 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6723 6724 switch (pd->virtual_proto) { 6725 case IPPROTO_TCP: { 6726 struct tcphdr *th = &pd->hdr.tcp; 6727 6728 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6729 NULL, reason, pd->af)) 6730 return (PF_DROP); 6731 pd->hdrlen = sizeof(*th); 6732 if (pd->off + (th->th_off << 2) > pd->tot_len || 6733 (th->th_off << 2) < sizeof(struct tcphdr)) { 6734 REASON_SET(reason, PFRES_SHORT); 6735 return (PF_DROP); 6736 } 6737 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6738 pd->sport = &th->th_sport; 6739 pd->dport = &th->th_dport; 6740 pd->pcksum = &th->th_sum; 6741 break; 6742 } 6743 case IPPROTO_UDP: { 6744 struct udphdr *uh = &pd->hdr.udp; 6745 6746 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6747 NULL, reason, pd->af)) 6748 return (PF_DROP); 6749 pd->hdrlen = sizeof(*uh); 6750 if (uh->uh_dport == 0 || 6751 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6752 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6753 REASON_SET(reason, PFRES_SHORT); 6754 return (PF_DROP); 6755 } 6756 pd->sport = &uh->uh_sport; 6757 pd->dport = &uh->uh_dport; 6758 pd->pcksum = &uh->uh_sum; 6759 break; 6760 } 6761 case IPPROTO_ICMP: { 6762 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 6763 NULL, reason, pd->af)) 6764 return (PF_DROP); 6765 pd->hdrlen = ICMP_MINLEN; 6766 if (pd->off + pd->hdrlen > pd->tot_len) { 6767 REASON_SET(reason, PFRES_SHORT); 6768 return (PF_DROP); 6769 } 6770 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 6771 break; 6772 } 6773 #ifdef INET6 6774 case IPPROTO_ICMPV6: { 6775 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6776 6777 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6778 NULL, reason, pd->af)) 6779 return (PF_DROP); 6780 /* ICMP headers we look further into to match state */ 6781 switch (pd->hdr.icmp6.icmp6_type) { 6782 case MLD_LISTENER_QUERY: 6783 case MLD_LISTENER_REPORT: 6784 icmp_hlen = sizeof(struct mld_hdr); 6785 break; 6786 case ND_NEIGHBOR_SOLICIT: 6787 case ND_NEIGHBOR_ADVERT: 6788 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6789 /* FALLTHROUGH */ 6790 case ND_ROUTER_SOLICIT: 6791 case ND_ROUTER_ADVERT: 6792 case ND_REDIRECT: 6793 if (pd->ttl != 255) { 6794 REASON_SET(reason, PFRES_NORM); 6795 return (PF_DROP); 6796 } 6797 break; 6798 } 6799 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6800 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6801 NULL, reason, pd->af)) 6802 return (PF_DROP); 6803 pd->hdrlen = icmp_hlen; 6804 if (pd->off + pd->hdrlen > pd->tot_len) { 6805 REASON_SET(reason, PFRES_SHORT); 6806 return (PF_DROP); 6807 } 6808 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 6809 break; 6810 } 6811 #endif /* INET6 */ 6812 } 6813 6814 if (pd->sport) 6815 pd->osport = pd->nsport = *pd->sport; 6816 if (pd->dport) 6817 pd->odport = pd->ndport = *pd->dport; 6818 6819 return (PF_PASS); 6820 } 6821 6822 void 6823 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6824 struct pf_rule *r, struct pf_rule *a) 6825 { 6826 int dirndx; 6827 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6828 [action != PF_PASS] += pd->tot_len; 6829 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6830 [action != PF_PASS]++; 6831 6832 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6833 dirndx = (pd->dir == PF_OUT); 6834 r->packets[dirndx]++; 6835 r->bytes[dirndx] += pd->tot_len; 6836 if (a != NULL) { 6837 a->packets[dirndx]++; 6838 a->bytes[dirndx] += pd->tot_len; 6839 } 6840 if (s != NULL) { 6841 struct pf_rule_item *ri; 6842 struct pf_sn_item *sni; 6843 6844 SLIST_FOREACH(sni, &s->src_nodes, next) { 6845 sni->sn->packets[dirndx]++; 6846 sni->sn->bytes[dirndx] += pd->tot_len; 6847 } 6848 dirndx = (pd->dir == s->direction) ? 0 : 1; 6849 s->packets[dirndx]++; 6850 s->bytes[dirndx] += pd->tot_len; 6851 6852 SLIST_FOREACH(ri, &s->match_rules, entry) { 6853 ri->r->packets[dirndx]++; 6854 ri->r->bytes[dirndx] += pd->tot_len; 6855 6856 if (ri->r->src.addr.type == PF_ADDR_TABLE) 6857 pfr_update_stats(ri->r->src.addr.p.tbl, 6858 &s->key[(s->direction == PF_IN)]-> 6859 addr[(s->direction == PF_OUT)], 6860 pd, ri->r->action, ri->r->src.neg); 6861 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 6862 pfr_update_stats(ri->r->dst.addr.p.tbl, 6863 &s->key[(s->direction == PF_IN)]-> 6864 addr[(s->direction == PF_IN)], 6865 pd, ri->r->action, ri->r->dst.neg); 6866 } 6867 } 6868 if (r->src.addr.type == PF_ADDR_TABLE) 6869 pfr_update_stats(r->src.addr.p.tbl, 6870 (s == NULL) ? pd->src : 6871 &s->key[(s->direction == PF_IN)]-> 6872 addr[(s->direction == PF_OUT)], 6873 pd, r->action, r->src.neg); 6874 if (r->dst.addr.type == PF_ADDR_TABLE) 6875 pfr_update_stats(r->dst.addr.p.tbl, 6876 (s == NULL) ? pd->dst : 6877 &s->key[(s->direction == PF_IN)]-> 6878 addr[(s->direction == PF_IN)], 6879 pd, r->action, r->dst.neg); 6880 } 6881 } 6882 6883 int 6884 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 6885 { 6886 #if NCARP > 0 6887 struct ifnet *ifp0; 6888 #endif 6889 struct pfi_kif *kif; 6890 u_short action, reason = 0; 6891 struct pf_rule *a = NULL, *r = &pf_default_rule; 6892 struct pf_state *s = NULL; 6893 struct pf_ruleset *ruleset = NULL; 6894 struct pf_pdesc pd; 6895 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6896 u_int32_t qid, pqid = 0; 6897 int have_pf_lock = 0; 6898 struct pfsync_deferral *deferral = NULL; 6899 6900 if (!pf_status.running) 6901 return (PF_PASS); 6902 6903 #if NCARP > 0 6904 if (ifp->if_type == IFT_CARP && 6905 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 6906 kif = (struct pfi_kif *)ifp0->if_pf_kif; 6907 if_put(ifp0); 6908 } else 6909 #endif /* NCARP */ 6910 kif = (struct pfi_kif *)ifp->if_pf_kif; 6911 6912 if (kif == NULL) { 6913 DPFPRINTF(LOG_ERR, 6914 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 6915 return (PF_DROP); 6916 } 6917 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6918 return (PF_PASS); 6919 6920 #ifdef DIAGNOSTIC 6921 if (((*m0)->m_flags & M_PKTHDR) == 0) 6922 panic("non-M_PKTHDR is passed to pf_test"); 6923 #endif /* DIAGNOSTIC */ 6924 6925 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 6926 return (PF_PASS); 6927 6928 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) { 6929 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET; 6930 return (PF_PASS); 6931 } 6932 6933 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 6934 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 6935 return (PF_PASS); 6936 } 6937 6938 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 6939 if (action != PF_PASS) { 6940 #if NPFLOG > 0 6941 pd.pflog |= PF_LOG_FORCE; 6942 #endif /* NPFLOG > 0 */ 6943 goto done; 6944 } 6945 6946 /* packet normalization and reassembly */ 6947 switch (pd.af) { 6948 case AF_INET: 6949 action = pf_normalize_ip(&pd, &reason); 6950 break; 6951 #ifdef INET6 6952 case AF_INET6: 6953 action = pf_normalize_ip6(&pd, &reason); 6954 break; 6955 #endif /* INET6 */ 6956 } 6957 *m0 = pd.m; 6958 /* if packet sits in reassembly queue, return without error */ 6959 if (pd.m == NULL) 6960 return PF_PASS; 6961 6962 if (action != PF_PASS) { 6963 #if NPFLOG > 0 6964 pd.pflog |= PF_LOG_FORCE; 6965 #endif /* NPFLOG > 0 */ 6966 goto done; 6967 } 6968 6969 /* if packet has been reassembled, update packet description */ 6970 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 6971 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 6972 if (action != PF_PASS) { 6973 #if NPFLOG > 0 6974 pd.pflog |= PF_LOG_FORCE; 6975 #endif /* NPFLOG > 0 */ 6976 goto done; 6977 } 6978 } 6979 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 6980 6981 /* 6982 * Avoid pcb-lookups from the forwarding path. They should never 6983 * match and would cause MP locking problems. 6984 */ 6985 if (fwdir == PF_FWD) { 6986 pd.lookup.done = -1; 6987 pd.lookup.uid = -1; 6988 pd.lookup.gid = -1; 6989 pd.lookup.pid = NO_PID; 6990 } 6991 6992 switch (pd.virtual_proto) { 6993 6994 case PF_VPROTO_FRAGMENT: { 6995 /* 6996 * handle fragments that aren't reassembled by 6997 * normalization 6998 */ 6999 PF_LOCK(); 7000 have_pf_lock = 1; 7001 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason, 7002 &deferral); 7003 s = pf_state_ref(s); 7004 if (action != PF_PASS) 7005 REASON_SET(&reason, PFRES_FRAG); 7006 break; 7007 } 7008 7009 case IPPROTO_ICMP: { 7010 if (pd.af != AF_INET) { 7011 action = PF_DROP; 7012 REASON_SET(&reason, PFRES_NORM); 7013 DPFPRINTF(LOG_NOTICE, 7014 "dropping IPv6 packet with ICMPv4 payload"); 7015 break; 7016 } 7017 PF_STATE_ENTER_READ(); 7018 action = pf_test_state_icmp(&pd, &s, &reason); 7019 s = pf_state_ref(s); 7020 PF_STATE_EXIT_READ(); 7021 if (action == PF_PASS || action == PF_AFRT) { 7022 #if NPFSYNC > 0 7023 pfsync_update_state(s); 7024 #endif /* NPFSYNC > 0 */ 7025 r = s->rule.ptr; 7026 a = s->anchor.ptr; 7027 #if NPFLOG > 0 7028 pd.pflog |= s->log; 7029 #endif /* NPFLOG > 0 */ 7030 } else if (s == NULL) { 7031 PF_LOCK(); 7032 have_pf_lock = 1; 7033 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7034 &reason, &deferral); 7035 s = pf_state_ref(s); 7036 } 7037 break; 7038 } 7039 7040 #ifdef INET6 7041 case IPPROTO_ICMPV6: { 7042 if (pd.af != AF_INET6) { 7043 action = PF_DROP; 7044 REASON_SET(&reason, PFRES_NORM); 7045 DPFPRINTF(LOG_NOTICE, 7046 "dropping IPv4 packet with ICMPv6 payload"); 7047 break; 7048 } 7049 PF_STATE_ENTER_READ(); 7050 action = pf_test_state_icmp(&pd, &s, &reason); 7051 s = pf_state_ref(s); 7052 PF_STATE_EXIT_READ(); 7053 if (action == PF_PASS || action == PF_AFRT) { 7054 #if NPFSYNC > 0 7055 pfsync_update_state(s); 7056 #endif /* NPFSYNC > 0 */ 7057 r = s->rule.ptr; 7058 a = s->anchor.ptr; 7059 #if NPFLOG > 0 7060 pd.pflog |= s->log; 7061 #endif /* NPFLOG > 0 */ 7062 } else if (s == NULL) { 7063 PF_LOCK(); 7064 have_pf_lock = 1; 7065 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7066 &reason, &deferral); 7067 s = pf_state_ref(s); 7068 } 7069 break; 7070 } 7071 #endif /* INET6 */ 7072 7073 default: 7074 if (pd.virtual_proto == IPPROTO_TCP) { 7075 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & 7076 (TH_SYN|TH_ACK)) == TH_SYN && 7077 pf_synflood_check(&pd)) { 7078 PF_LOCK(); 7079 have_pf_lock = 1; 7080 pf_syncookie_send(&pd); 7081 action = PF_DROP; 7082 break; 7083 } 7084 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 7085 pqid = 1; 7086 action = pf_normalize_tcp(&pd); 7087 if (action == PF_DROP) 7088 break; 7089 } 7090 PF_STATE_ENTER_READ(); 7091 action = pf_test_state(&pd, &s, &reason, 0); 7092 s = pf_state_ref(s); 7093 PF_STATE_EXIT_READ(); 7094 if (s == NULL && action != PF_PASS && action != PF_AFRT && 7095 pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 7096 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 7097 pf_syncookie_validate(&pd)) { 7098 struct mbuf *msyn; 7099 msyn = pf_syncookie_recreate_syn(&pd); 7100 if (msyn) { 7101 action = pf_test(af, fwdir, ifp, &msyn); 7102 m_freem(msyn); 7103 if (action == PF_PASS || action == PF_AFRT) { 7104 PF_STATE_ENTER_READ(); 7105 pf_test_state(&pd, &s, &reason, 1); 7106 s = pf_state_ref(s); 7107 PF_STATE_EXIT_READ(); 7108 if (s == NULL) 7109 return (PF_DROP); 7110 s->src.seqhi = 7111 ntohl(pd.hdr.tcp.th_ack) - 1; 7112 s->src.seqlo = 7113 ntohl(pd.hdr.tcp.th_seq) - 1; 7114 pf_set_protostate(s, PF_PEER_SRC, 7115 PF_TCPS_PROXY_DST); 7116 PF_LOCK(); 7117 have_pf_lock = 1; 7118 action = pf_synproxy(&pd, &s, &reason); 7119 if (action != PF_PASS) { 7120 PF_UNLOCK(); 7121 pf_state_unref(s); 7122 return (action); 7123 } 7124 } 7125 } else 7126 action = PF_DROP; 7127 } 7128 7129 if (action == PF_PASS || action == PF_AFRT) { 7130 #if NPFSYNC > 0 7131 pfsync_update_state(s); 7132 #endif /* NPFSYNC > 0 */ 7133 r = s->rule.ptr; 7134 a = s->anchor.ptr; 7135 #if NPFLOG > 0 7136 pd.pflog |= s->log; 7137 #endif /* NPFLOG > 0 */ 7138 } else if (s == NULL) { 7139 PF_LOCK(); 7140 have_pf_lock = 1; 7141 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7142 &reason, &deferral); 7143 s = pf_state_ref(s); 7144 } 7145 7146 if (pd.virtual_proto == IPPROTO_TCP) { 7147 if (s) { 7148 if (s->max_mss) 7149 pf_normalize_mss(&pd, s->max_mss); 7150 } else if (r->max_mss) 7151 pf_normalize_mss(&pd, r->max_mss); 7152 } 7153 7154 break; 7155 } 7156 7157 if (have_pf_lock != 0) 7158 PF_UNLOCK(); 7159 7160 /* 7161 * At the moment, we rely on NET_LOCK() to prevent removal of items 7162 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 7163 * to be refcounted when NET_LOCK() is gone. 7164 */ 7165 7166 done: 7167 if (action != PF_DROP) { 7168 if (s) { 7169 /* The non-state case is handled in pf_test_rule() */ 7170 if (action == PF_PASS && pd.badopts && 7171 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 7172 action = PF_DROP; 7173 REASON_SET(&reason, PFRES_IPOPTIONS); 7174 #if NPFLOG > 0 7175 pd.pflog |= PF_LOG_FORCE; 7176 #endif /* NPFLOG > 0 */ 7177 DPFPRINTF(LOG_NOTICE, "dropping packet with " 7178 "ip/ipv6 options in pf_test()"); 7179 } 7180 7181 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 7182 s->set_tos); 7183 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 7184 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7185 qid = s->pqid; 7186 if (s->state_flags & PFSTATE_SETPRIO) 7187 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 7188 } else { 7189 qid = s->qid; 7190 if (s->state_flags & PFSTATE_SETPRIO) 7191 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 7192 } 7193 pd.m->m_pkthdr.pf.delay = s->delay; 7194 } else { 7195 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 7196 r->set_tos); 7197 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7198 qid = r->pqid; 7199 if (r->scrub_flags & PFSTATE_SETPRIO) 7200 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 7201 } else { 7202 qid = r->qid; 7203 if (r->scrub_flags & PFSTATE_SETPRIO) 7204 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 7205 } 7206 pd.m->m_pkthdr.pf.delay = r->delay; 7207 } 7208 } 7209 7210 if (action == PF_PASS && qid) 7211 pd.m->m_pkthdr.pf.qid = qid; 7212 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) 7213 pf_mbuf_link_state_key(pd.m, s->key[PF_SK_STACK]); 7214 if (pd.dir == PF_OUT && 7215 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 7216 s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) 7217 pf_state_key_link_inpcb(s->key[PF_SK_STACK], 7218 pd.m->m_pkthdr.pf.inp); 7219 7220 if (s != NULL && !ISSET(pd.m->m_pkthdr.csum_flags, M_FLOWID)) { 7221 pd.m->m_pkthdr.ph_flowid = bemtoh64(&s->id); 7222 SET(pd.m->m_pkthdr.csum_flags, M_FLOWID); 7223 } 7224 7225 /* 7226 * connections redirected to loopback should not match sockets 7227 * bound specifically to loopback due to security implications, 7228 * see in_pcblookup_listen(). 7229 */ 7230 if (pd.destchg) 7231 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 7232 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 7233 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 7234 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7235 /* We need to redo the route lookup on outgoing routes. */ 7236 if (pd.destchg && pd.dir == PF_OUT) 7237 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 7238 7239 if (pd.dir == PF_IN && action == PF_PASS && 7240 (r->divert.type == PF_DIVERT_TO || 7241 r->divert.type == PF_DIVERT_REPLY)) { 7242 struct pf_divert *divert; 7243 7244 if ((divert = pf_get_divert(pd.m))) { 7245 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7246 divert->addr = r->divert.addr; 7247 divert->port = r->divert.port; 7248 divert->rdomain = pd.rdomain; 7249 divert->type = r->divert.type; 7250 } 7251 } 7252 7253 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 7254 action = PF_DIVERT; 7255 7256 #if NPFLOG > 0 7257 if (pd.pflog) { 7258 struct pf_rule_item *ri; 7259 7260 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 7261 pflog_packet(&pd, reason, r, a, ruleset, NULL); 7262 if (s) { 7263 SLIST_FOREACH(ri, &s->match_rules, entry) 7264 if (ri->r->log & PF_LOG_ALL) 7265 pflog_packet(&pd, reason, ri->r, a, 7266 ruleset, NULL); 7267 } 7268 } 7269 #endif /* NPFLOG > 0 */ 7270 7271 pf_counters_inc(action, &pd, s, r, a); 7272 7273 switch (action) { 7274 case PF_SYNPROXY_DROP: 7275 m_freem(pd.m); 7276 /* FALLTHROUGH */ 7277 case PF_DEFER: 7278 #if NPFSYNC > 0 7279 /* 7280 * We no longer hold PF_LOCK() here, so we can dispatch 7281 * deferral if we are asked to do so. 7282 */ 7283 if (deferral != NULL) 7284 pfsync_undefer(deferral, 0); 7285 #endif /* NPFSYNC > 0 */ 7286 pd.m = NULL; 7287 action = PF_PASS; 7288 break; 7289 case PF_DIVERT: 7290 switch (pd.af) { 7291 case AF_INET: 7292 if (!divert_packet(pd.m, pd.dir, r->divert.port)) 7293 pd.m = NULL; 7294 break; 7295 #ifdef INET6 7296 case AF_INET6: 7297 if (!divert6_packet(pd.m, pd.dir, r->divert.port)) 7298 pd.m = NULL; 7299 break; 7300 #endif /* INET6 */ 7301 } 7302 action = PF_PASS; 7303 break; 7304 #ifdef INET6 7305 case PF_AFRT: 7306 if (pf_translate_af(&pd)) { 7307 action = PF_DROP; 7308 break; 7309 } 7310 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 7311 switch (pd.naf) { 7312 case AF_INET: 7313 if (pd.dir == PF_IN) { 7314 if (ipforwarding == 0) { 7315 ipstat_inc(ips_cantforward); 7316 action = PF_DROP; 7317 break; 7318 } 7319 ip_forward(pd.m, ifp, NULL, 1); 7320 } else 7321 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 7322 break; 7323 case AF_INET6: 7324 if (pd.dir == PF_IN) { 7325 if (ip6_forwarding == 0) { 7326 ip6stat_inc(ip6s_cantforward); 7327 action = PF_DROP; 7328 break; 7329 } 7330 ip6_forward(pd.m, NULL, 1); 7331 } else 7332 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 7333 break; 7334 } 7335 if (action != PF_DROP) { 7336 pd.m = NULL; 7337 action = PF_PASS; 7338 } 7339 break; 7340 #endif /* INET6 */ 7341 case PF_DROP: 7342 m_freem(pd.m); 7343 pd.m = NULL; 7344 break; 7345 default: 7346 if (s && s->rt) { 7347 switch (pd.af) { 7348 case AF_INET: 7349 pf_route(&pd, s); 7350 break; 7351 #ifdef INET6 7352 case AF_INET6: 7353 pf_route6(&pd, s); 7354 break; 7355 #endif /* INET6 */ 7356 } 7357 } 7358 break; 7359 } 7360 7361 #ifdef INET6 7362 /* if reassembled packet passed, create new fragments */ 7363 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 7364 pd.af == AF_INET6) { 7365 struct m_tag *mtag; 7366 7367 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 7368 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 7369 } 7370 #endif /* INET6 */ 7371 if (s && action != PF_DROP) { 7372 if (!s->if_index_in && dir == PF_IN) 7373 s->if_index_in = ifp->if_index; 7374 else if (!s->if_index_out && dir == PF_OUT) 7375 s->if_index_out = ifp->if_index; 7376 } 7377 7378 *m0 = pd.m; 7379 7380 pf_state_unref(s); 7381 7382 return (action); 7383 } 7384 7385 int 7386 pf_ouraddr(struct mbuf *m) 7387 { 7388 struct pf_state_key *sk; 7389 7390 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 7391 return (1); 7392 7393 sk = m->m_pkthdr.pf.statekey; 7394 if (sk != NULL) { 7395 if (sk->inp != NULL) 7396 return (1); 7397 } 7398 7399 return (-1); 7400 } 7401 7402 /* 7403 * must be called whenever any addressing information such as 7404 * address, port, protocol has changed 7405 */ 7406 void 7407 pf_pkt_addr_changed(struct mbuf *m) 7408 { 7409 pf_mbuf_unlink_state_key(m); 7410 pf_mbuf_unlink_inpcb(m); 7411 } 7412 7413 struct inpcb * 7414 pf_inp_lookup(struct mbuf *m) 7415 { 7416 struct inpcb *inp = NULL; 7417 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7418 7419 if (!pf_state_key_isvalid(sk)) 7420 pf_mbuf_unlink_state_key(m); 7421 else 7422 inp = m->m_pkthdr.pf.statekey->inp; 7423 7424 if (inp && inp->inp_pf_sk) 7425 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 7426 7427 return (inp); 7428 } 7429 7430 void 7431 pf_inp_link(struct mbuf *m, struct inpcb *inp) 7432 { 7433 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7434 7435 if (!pf_state_key_isvalid(sk)) { 7436 pf_mbuf_unlink_state_key(m); 7437 return; 7438 } 7439 7440 /* 7441 * we don't need to grab PF-lock here. At worst case we link inp to 7442 * state, which might be just being marked as deleted by another 7443 * thread. 7444 */ 7445 if (inp && !sk->inp && !inp->inp_pf_sk) 7446 pf_state_key_link_inpcb(sk, inp); 7447 7448 /* The statekey has finished finding the inp, it is no longer needed. */ 7449 pf_mbuf_unlink_state_key(m); 7450 } 7451 7452 void 7453 pf_inp_unlink(struct inpcb *inp) 7454 { 7455 pf_inpcb_unlink_state_key(inp); 7456 } 7457 7458 void 7459 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 7460 { 7461 struct pf_state_key *old_reverse; 7462 7463 old_reverse = atomic_cas_ptr(&sk->reverse, NULL, skrev); 7464 if (old_reverse != NULL) 7465 KASSERT(old_reverse == skrev); 7466 else { 7467 pf_state_key_ref(skrev); 7468 7469 /* 7470 * NOTE: if sk == skrev, then KASSERT() below holds true, we 7471 * still want to grab a reference in such case, because 7472 * pf_state_key_unlink_reverse() does not check whether keys 7473 * are identical or not. 7474 */ 7475 old_reverse = atomic_cas_ptr(&skrev->reverse, NULL, sk); 7476 if (old_reverse != NULL) 7477 KASSERT(old_reverse == sk); 7478 7479 pf_state_key_ref(sk); 7480 } 7481 } 7482 7483 #if NPFLOG > 0 7484 void 7485 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 7486 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 7487 { 7488 struct pf_rule_item *ri; 7489 7490 /* if this is the log(matches) rule, packet has been logged already */ 7491 if (rm->log & PF_LOG_MATCHES) 7492 return; 7493 7494 SLIST_FOREACH(ri, matchrules, entry) 7495 if (ri->r->log & PF_LOG_MATCHES) 7496 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 7497 } 7498 #endif /* NPFLOG > 0 */ 7499 7500 struct pf_state_key * 7501 pf_state_key_ref(struct pf_state_key *sk) 7502 { 7503 if (sk != NULL) 7504 PF_REF_TAKE(sk->refcnt); 7505 7506 return (sk); 7507 } 7508 7509 void 7510 pf_state_key_unref(struct pf_state_key *sk) 7511 { 7512 if (PF_REF_RELE(sk->refcnt)) { 7513 /* state key must be removed from tree */ 7514 KASSERT(!pf_state_key_isvalid(sk)); 7515 /* state key must be unlinked from reverse key */ 7516 KASSERT(sk->reverse == NULL); 7517 /* state key must be unlinked from socket */ 7518 KASSERT(sk->inp == NULL); 7519 pool_put(&pf_state_key_pl, sk); 7520 } 7521 } 7522 7523 int 7524 pf_state_key_isvalid(struct pf_state_key *sk) 7525 { 7526 return ((sk != NULL) && (sk->removed == 0)); 7527 } 7528 7529 void 7530 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 7531 { 7532 KASSERT(m->m_pkthdr.pf.statekey == NULL); 7533 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 7534 } 7535 7536 void 7537 pf_mbuf_unlink_state_key(struct mbuf *m) 7538 { 7539 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7540 7541 if (sk != NULL) { 7542 m->m_pkthdr.pf.statekey = NULL; 7543 pf_state_key_unref(sk); 7544 } 7545 } 7546 7547 void 7548 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 7549 { 7550 KASSERT(m->m_pkthdr.pf.inp == NULL); 7551 m->m_pkthdr.pf.inp = in_pcbref(inp); 7552 } 7553 7554 void 7555 pf_mbuf_unlink_inpcb(struct mbuf *m) 7556 { 7557 struct inpcb *inp = m->m_pkthdr.pf.inp; 7558 7559 if (inp != NULL) { 7560 m->m_pkthdr.pf.inp = NULL; 7561 in_pcbunref(inp); 7562 } 7563 } 7564 7565 void 7566 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 7567 { 7568 KASSERT(sk->inp == NULL); 7569 sk->inp = in_pcbref(inp); 7570 KASSERT(inp->inp_pf_sk == NULL); 7571 inp->inp_pf_sk = pf_state_key_ref(sk); 7572 } 7573 7574 void 7575 pf_inpcb_unlink_state_key(struct inpcb *inp) 7576 { 7577 struct pf_state_key *sk = inp->inp_pf_sk; 7578 7579 if (sk != NULL) { 7580 KASSERT(sk->inp == inp); 7581 sk->inp = NULL; 7582 inp->inp_pf_sk = NULL; 7583 pf_state_key_unref(sk); 7584 in_pcbunref(inp); 7585 } 7586 } 7587 7588 void 7589 pf_state_key_unlink_inpcb(struct pf_state_key *sk) 7590 { 7591 struct inpcb *inp = sk->inp; 7592 7593 if (inp != NULL) { 7594 KASSERT(inp->inp_pf_sk == sk); 7595 sk->inp = NULL; 7596 inp->inp_pf_sk = NULL; 7597 pf_state_key_unref(sk); 7598 in_pcbunref(inp); 7599 } 7600 } 7601 7602 void 7603 pf_state_key_unlink_reverse(struct pf_state_key *sk) 7604 { 7605 struct pf_state_key *skrev = sk->reverse; 7606 7607 /* Note that sk and skrev may be equal, then we unref twice. */ 7608 if (skrev != NULL) { 7609 KASSERT(skrev->reverse == sk); 7610 sk->reverse = NULL; 7611 skrev->reverse = NULL; 7612 pf_state_key_unref(skrev); 7613 pf_state_key_unref(sk); 7614 } 7615 } 7616 7617 struct pf_state * 7618 pf_state_ref(struct pf_state *s) 7619 { 7620 if (s != NULL) 7621 PF_REF_TAKE(s->refcnt); 7622 return (s); 7623 } 7624 7625 void 7626 pf_state_unref(struct pf_state *s) 7627 { 7628 if ((s != NULL) && PF_REF_RELE(s->refcnt)) { 7629 /* never inserted or removed */ 7630 #if NPFSYNC > 0 7631 KASSERT((TAILQ_NEXT(s, sync_list) == NULL) || 7632 ((TAILQ_NEXT(s, sync_list) == _Q_INVALID) && 7633 (s->sync_state == PFSYNC_S_NONE))); 7634 #endif /* NPFSYNC */ 7635 KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || 7636 (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); 7637 KASSERT((s->key[PF_SK_WIRE] == NULL) && 7638 (s->key[PF_SK_STACK] == NULL)); 7639 7640 pool_put(&pf_state_pl, s); 7641 } 7642 } 7643 7644 int 7645 pf_delay_pkt(struct mbuf *m, u_int ifidx) 7646 { 7647 struct pf_pktdelay *pdy; 7648 7649 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 7650 m_freem(m); 7651 return (ENOBUFS); 7652 } 7653 pdy->ifidx = ifidx; 7654 pdy->m = m; 7655 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy); 7656 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay); 7657 m->m_pkthdr.pf.delay = 0; 7658 return (0); 7659 } 7660 7661 void 7662 pf_pktenqueue_delayed(void *arg) 7663 { 7664 struct pf_pktdelay *pdy = arg; 7665 struct ifnet *ifp; 7666 7667 ifp = if_get(pdy->ifidx); 7668 if (ifp != NULL) { 7669 if_enqueue(ifp, pdy->m); 7670 if_put(ifp); 7671 } else 7672 m_freem(pdy->m); 7673 7674 pool_put(&pf_pktdelay_pl, pdy); 7675 } 7676