1 /* $OpenBSD: pf.c,v 1.1080 2018/12/17 09:11:10 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/in_pcb.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp_var.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_timer.h> 74 #include <netinet/tcp_var.h> 75 #include <netinet/tcp_fsm.h> 76 #include <netinet/udp.h> 77 #include <netinet/udp_var.h> 78 #include <netinet/ip_divert.h> 79 80 #ifdef INET6 81 #include <netinet6/in6_var.h> 82 #include <netinet/ip6.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet/icmp6.h> 85 #include <netinet6/nd6.h> 86 #include <netinet6/ip6_divert.h> 87 #endif /* INET6 */ 88 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 92 #if NPFLOG > 0 93 #include <net/if_pflog.h> 94 #endif /* NPFLOG > 0 */ 95 96 #if NPFLOW > 0 97 #include <net/if_pflow.h> 98 #endif /* NPFLOW > 0 */ 99 100 #if NPFSYNC > 0 101 #include <net/if_pfsync.h> 102 #endif /* NPFSYNC > 0 */ 103 104 #ifdef DDB 105 #include <machine/db_machdep.h> 106 #include <ddb/db_interface.h> 107 #endif 108 109 /* 110 * Global variables 111 */ 112 struct pf_state_tree pf_statetbl; 113 struct pf_queuehead pf_queues[2]; 114 struct pf_queuehead *pf_queues_active; 115 struct pf_queuehead *pf_queues_inactive; 116 117 struct pf_status pf_status; 118 119 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 120 121 SHA2_CTX pf_tcp_secret_ctx; 122 u_char pf_tcp_secret[16]; 123 int pf_tcp_secret_init; 124 int pf_tcp_iss_off; 125 126 int pf_npurge; 127 struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); 128 struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); 129 130 enum pf_test_status { 131 PF_TEST_FAIL = -1, 132 PF_TEST_OK, 133 PF_TEST_QUICK 134 }; 135 136 struct pf_test_ctx { 137 enum pf_test_status test_status; 138 struct pf_pdesc *pd; 139 struct pf_rule_actions act; 140 u_int8_t icmpcode; 141 u_int8_t icmptype; 142 int icmp_dir; 143 int state_icmp; 144 int tag; 145 u_short reason; 146 struct pf_rule_item *ri; 147 struct pf_src_node *sns[PF_SN_MAX]; 148 struct pf_rule_slist rules; 149 struct pf_rule *nr; 150 struct pf_rule **rm; 151 struct pf_rule *a; 152 struct pf_rule **am; 153 struct pf_ruleset **rsm; 154 struct pf_ruleset *arsm; 155 struct pf_ruleset *aruleset; 156 struct tcphdr *th; 157 int depth; 158 }; 159 160 #define PF_ANCHOR_STACK_MAX 64 161 162 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 163 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 164 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 165 166 void pf_add_threshold(struct pf_threshold *); 167 int pf_check_threshold(struct pf_threshold *); 168 int pf_check_tcp_cksum(struct mbuf *, int, int, 169 sa_family_t); 170 static __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 171 u_int8_t); 172 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 173 const struct pf_addr *, sa_family_t, u_int8_t); 174 int pf_modulate_sack(struct pf_pdesc *, 175 struct pf_state_peer *); 176 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 177 u_int16_t *, u_int16_t *); 178 int pf_change_icmp_af(struct mbuf *, int, 179 struct pf_pdesc *, struct pf_pdesc *, 180 struct pf_addr *, struct pf_addr *, sa_family_t, 181 sa_family_t); 182 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 183 struct pf_addr *); 184 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 185 u_int16_t *, struct pf_addr *, struct pf_addr *, 186 u_int16_t); 187 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 188 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 189 sa_family_t, struct pf_rule *, u_int); 190 void pf_detach_state(struct pf_state *); 191 void pf_state_key_detach(struct pf_state *, int); 192 u_int32_t pf_tcp_iss(struct pf_pdesc *); 193 void pf_rule_to_actions(struct pf_rule *, 194 struct pf_rule_actions *); 195 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 196 struct pf_state **, struct pf_rule **, 197 struct pf_ruleset **, u_short *); 198 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 199 struct pf_rule *, struct pf_rule *, 200 struct pf_state_key **, struct pf_state_key **, 201 int *, struct pf_state **, int, 202 struct pf_rule_slist *, struct pf_rule_actions *, 203 struct pf_src_node *[]); 204 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 205 int, struct pf_addr *, int, struct pf_addr *, 206 int, int); 207 int pf_state_key_setup(struct pf_pdesc *, struct 208 pf_state_key **, struct pf_state_key **, int); 209 int pf_tcp_track_full(struct pf_pdesc *, 210 struct pf_state **, u_short *, int *, int); 211 int pf_tcp_track_sloppy(struct pf_pdesc *, 212 struct pf_state **, u_short *); 213 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 214 u_short *); 215 int pf_test_state(struct pf_pdesc *, struct pf_state **, 216 u_short *, int); 217 int pf_icmp_state_lookup(struct pf_pdesc *, 218 struct pf_state_key_cmp *, struct pf_state **, 219 u_int16_t, u_int16_t, int, int *, int, int); 220 int pf_test_state_icmp(struct pf_pdesc *, 221 struct pf_state **, u_short *); 222 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 223 u_int16_t); 224 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 225 sa_family_t); 226 struct pf_divert *pf_get_divert(struct mbuf *); 227 int pf_walk_header(struct pf_pdesc *, struct ip *, 228 u_short *); 229 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 230 int, int, u_short *); 231 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 232 u_short *); 233 void pf_print_state_parts(struct pf_state *, 234 struct pf_state_key *, struct pf_state_key *); 235 int pf_addr_wrap_neq(struct pf_addr_wrap *, 236 struct pf_addr_wrap *); 237 int pf_compare_state_keys(struct pf_state_key *, 238 struct pf_state_key *, struct pfi_kif *, u_int); 239 int pf_find_state(struct pf_pdesc *, 240 struct pf_state_key_cmp *, struct pf_state **); 241 int pf_src_connlimit(struct pf_state **); 242 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 243 int pf_step_into_anchor(struct pf_test_ctx *, 244 struct pf_rule *); 245 int pf_match_rule(struct pf_test_ctx *, 246 struct pf_ruleset *); 247 void pf_counters_inc(int, struct pf_pdesc *, 248 struct pf_state *, struct pf_rule *, 249 struct pf_rule *); 250 251 int pf_state_key_isvalid(struct pf_state_key *); 252 struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 253 void pf_state_key_unref(struct pf_state_key *); 254 void pf_state_key_link_reverse(struct pf_state_key *, 255 struct pf_state_key *); 256 void pf_state_key_unlink_reverse(struct pf_state_key *); 257 void pf_state_key_link_inpcb(struct pf_state_key *, 258 struct inpcb *); 259 void pf_state_key_unlink_inpcb(struct pf_state_key *); 260 void pf_inpcb_unlink_state_key(struct inpcb *); 261 void pf_pktenqueue_delayed(void *); 262 263 #if NPFLOG > 0 264 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 265 struct pf_rule *, struct pf_ruleset *, 266 struct pf_rule_slist *); 267 #endif /* NPFLOG > 0 */ 268 269 extern struct pool pfr_ktable_pl; 270 extern struct pool pfr_kentry_pl; 271 272 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 273 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 274 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 275 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 276 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 277 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 278 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS } 279 }; 280 281 #define BOUND_IFACE(r, k) \ 282 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 283 284 #define STATE_INC_COUNTERS(s) \ 285 do { \ 286 struct pf_rule_item *mrm; \ 287 s->rule.ptr->states_cur++; \ 288 s->rule.ptr->states_tot++; \ 289 if (s->anchor.ptr != NULL) { \ 290 s->anchor.ptr->states_cur++; \ 291 s->anchor.ptr->states_tot++; \ 292 } \ 293 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 294 mrm->r->states_cur++; \ 295 } while (0) 296 297 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 298 static __inline int pf_state_compare_key(struct pf_state_key *, 299 struct pf_state_key *); 300 static __inline int pf_state_compare_id(struct pf_state *, 301 struct pf_state *); 302 #ifdef INET6 303 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 304 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 305 #endif /* INET6 */ 306 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 307 308 struct pf_src_tree tree_src_tracking; 309 310 struct pf_state_tree_id tree_id; 311 struct pf_state_queue state_list; 312 313 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 314 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 315 RB_GENERATE(pf_state_tree_id, pf_state, 316 entry_id, pf_state_compare_id); 317 318 SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl = 319 SLIST_HEAD_INITIALIZER(pf_rule_gcl); 320 321 __inline int 322 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 323 { 324 switch (af) { 325 case AF_INET: 326 if (a->addr32[0] > b->addr32[0]) 327 return (1); 328 if (a->addr32[0] < b->addr32[0]) 329 return (-1); 330 break; 331 #ifdef INET6 332 case AF_INET6: 333 if (a->addr32[3] > b->addr32[3]) 334 return (1); 335 if (a->addr32[3] < b->addr32[3]) 336 return (-1); 337 if (a->addr32[2] > b->addr32[2]) 338 return (1); 339 if (a->addr32[2] < b->addr32[2]) 340 return (-1); 341 if (a->addr32[1] > b->addr32[1]) 342 return (1); 343 if (a->addr32[1] < b->addr32[1]) 344 return (-1); 345 if (a->addr32[0] > b->addr32[0]) 346 return (1); 347 if (a->addr32[0] < b->addr32[0]) 348 return (-1); 349 break; 350 #endif /* INET6 */ 351 } 352 return (0); 353 } 354 355 static __inline int 356 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 357 { 358 int diff; 359 360 if (a->rule.ptr > b->rule.ptr) 361 return (1); 362 if (a->rule.ptr < b->rule.ptr) 363 return (-1); 364 if ((diff = a->type - b->type) != 0) 365 return (diff); 366 if ((diff = a->af - b->af) != 0) 367 return (diff); 368 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 369 return (diff); 370 return (0); 371 } 372 373 static __inline void 374 pf_set_protostate(struct pf_state *s, int which, u_int8_t newstate) 375 { 376 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 377 s->dst.state = newstate; 378 if (which == PF_PEER_DST) 379 return; 380 381 if (s->src.state == newstate) 382 return; 383 if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK] != NULL && 384 s->key[PF_SK_STACK]->proto == IPPROTO_TCP && 385 !(TCPS_HAVEESTABLISHED(s->src.state) || 386 s->src.state == TCPS_CLOSED) && 387 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 388 pf_status.states_halfopen--; 389 390 s->src.state = newstate; 391 } 392 393 void 394 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 395 { 396 switch (af) { 397 case AF_INET: 398 dst->addr32[0] = src->addr32[0]; 399 break; 400 #ifdef INET6 401 case AF_INET6: 402 dst->addr32[0] = src->addr32[0]; 403 dst->addr32[1] = src->addr32[1]; 404 dst->addr32[2] = src->addr32[2]; 405 dst->addr32[3] = src->addr32[3]; 406 break; 407 #endif /* INET6 */ 408 default: 409 unhandled_af(af); 410 } 411 } 412 413 void 414 pf_init_threshold(struct pf_threshold *threshold, 415 u_int32_t limit, u_int32_t seconds) 416 { 417 threshold->limit = limit * PF_THRESHOLD_MULT; 418 threshold->seconds = seconds; 419 threshold->count = 0; 420 threshold->last = time_uptime; 421 } 422 423 void 424 pf_add_threshold(struct pf_threshold *threshold) 425 { 426 u_int32_t t = time_uptime, diff = t - threshold->last; 427 428 if (diff >= threshold->seconds) 429 threshold->count = 0; 430 else 431 threshold->count -= threshold->count * diff / 432 threshold->seconds; 433 threshold->count += PF_THRESHOLD_MULT; 434 threshold->last = t; 435 } 436 437 int 438 pf_check_threshold(struct pf_threshold *threshold) 439 { 440 return (threshold->count > threshold->limit); 441 } 442 443 int 444 pf_src_connlimit(struct pf_state **state) 445 { 446 int bad = 0; 447 struct pf_src_node *sn; 448 449 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 450 return (0); 451 452 sn->conn++; 453 (*state)->src.tcp_est = 1; 454 pf_add_threshold(&sn->conn_rate); 455 456 if ((*state)->rule.ptr->max_src_conn && 457 (*state)->rule.ptr->max_src_conn < sn->conn) { 458 pf_status.lcounters[LCNT_SRCCONN]++; 459 bad++; 460 } 461 462 if ((*state)->rule.ptr->max_src_conn_rate.limit && 463 pf_check_threshold(&sn->conn_rate)) { 464 pf_status.lcounters[LCNT_SRCCONNRATE]++; 465 bad++; 466 } 467 468 if (!bad) 469 return (0); 470 471 if ((*state)->rule.ptr->overload_tbl) { 472 struct pfr_addr p; 473 u_int32_t killed = 0; 474 475 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 476 if (pf_status.debug >= LOG_NOTICE) { 477 log(LOG_NOTICE, 478 "pf: pf_src_connlimit: blocking address "); 479 pf_print_host(&sn->addr, 0, 480 (*state)->key[PF_SK_WIRE]->af); 481 } 482 483 memset(&p, 0, sizeof(p)); 484 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 485 switch ((*state)->key[PF_SK_WIRE]->af) { 486 case AF_INET: 487 p.pfra_net = 32; 488 p.pfra_ip4addr = sn->addr.v4; 489 break; 490 #ifdef INET6 491 case AF_INET6: 492 p.pfra_net = 128; 493 p.pfra_ip6addr = sn->addr.v6; 494 break; 495 #endif /* INET6 */ 496 } 497 498 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 499 &p, time_second); 500 501 /* kill existing states if that's required. */ 502 if ((*state)->rule.ptr->flush) { 503 struct pf_state_key *sk; 504 struct pf_state *st; 505 506 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 507 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 508 sk = st->key[PF_SK_WIRE]; 509 /* 510 * Kill states from this source. (Only those 511 * from the same rule if PF_FLUSH_GLOBAL is not 512 * set) 513 */ 514 if (sk->af == 515 (*state)->key[PF_SK_WIRE]->af && 516 (((*state)->direction == PF_OUT && 517 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 518 ((*state)->direction == PF_IN && 519 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 520 ((*state)->rule.ptr->flush & 521 PF_FLUSH_GLOBAL || 522 (*state)->rule.ptr == st->rule.ptr)) { 523 st->timeout = PFTM_PURGE; 524 pf_set_protostate(st, PF_PEER_BOTH, 525 TCPS_CLOSED); 526 killed++; 527 } 528 } 529 if (pf_status.debug >= LOG_NOTICE) 530 addlog(", %u states killed", killed); 531 } 532 if (pf_status.debug >= LOG_NOTICE) 533 addlog("\n"); 534 } 535 536 /* kill this state */ 537 (*state)->timeout = PFTM_PURGE; 538 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); 539 return (1); 540 } 541 542 int 543 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 544 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 545 struct pf_addr *raddr) 546 { 547 struct pf_src_node k; 548 549 if (*sn == NULL) { 550 k.af = af; 551 k.type = type; 552 pf_addrcpy(&k.addr, src, af); 553 k.rule.ptr = rule; 554 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 555 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 556 } 557 if (*sn == NULL) { 558 if (!rule->max_src_nodes || 559 rule->src_nodes < rule->max_src_nodes) 560 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 561 else 562 pf_status.lcounters[LCNT_SRCNODES]++; 563 if ((*sn) == NULL) 564 return (-1); 565 566 pf_init_threshold(&(*sn)->conn_rate, 567 rule->max_src_conn_rate.limit, 568 rule->max_src_conn_rate.seconds); 569 570 (*sn)->type = type; 571 (*sn)->af = af; 572 (*sn)->rule.ptr = rule; 573 pf_addrcpy(&(*sn)->addr, src, af); 574 if (raddr) 575 pf_addrcpy(&(*sn)->raddr, raddr, af); 576 if (RB_INSERT(pf_src_tree, 577 &tree_src_tracking, *sn) != NULL) { 578 if (pf_status.debug >= LOG_NOTICE) { 579 log(LOG_NOTICE, 580 "pf: src_tree insert failed: "); 581 pf_print_host(&(*sn)->addr, 0, af); 582 addlog("\n"); 583 } 584 pool_put(&pf_src_tree_pl, *sn); 585 return (-1); 586 } 587 (*sn)->creation = time_uptime; 588 (*sn)->rule.ptr->src_nodes++; 589 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 590 pf_status.src_nodes++; 591 } else { 592 if (rule->max_src_states && 593 (*sn)->states >= rule->max_src_states) { 594 pf_status.lcounters[LCNT_SRCSTATES]++; 595 return (-1); 596 } 597 } 598 return (0); 599 } 600 601 void 602 pf_remove_src_node(struct pf_src_node *sn) 603 { 604 if (sn->states > 0 || sn->expire > time_uptime) 605 return; 606 607 sn->rule.ptr->src_nodes--; 608 if (sn->rule.ptr->states_cur == 0 && 609 sn->rule.ptr->src_nodes == 0) 610 pf_rm_rule(NULL, sn->rule.ptr); 611 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 612 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 613 pf_status.src_nodes--; 614 pool_put(&pf_src_tree_pl, sn); 615 } 616 617 struct pf_src_node * 618 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 619 { 620 struct pf_sn_item *sni; 621 622 SLIST_FOREACH(sni, &s->src_nodes, next) 623 if (sni->sn->type == type) 624 return (sni->sn); 625 return (NULL); 626 } 627 628 void 629 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 630 { 631 struct pf_sn_item *sni, *snin, *snip = NULL; 632 633 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 634 snin = SLIST_NEXT(sni, next); 635 if (sni->sn == sn) { 636 if (snip) 637 SLIST_REMOVE_AFTER(snip, next); 638 else 639 SLIST_REMOVE_HEAD(&s->src_nodes, next); 640 pool_put(&pf_sn_item_pl, sni); 641 sni = NULL; 642 sn->states--; 643 } 644 if (sni != NULL) 645 snip = sni; 646 } 647 } 648 649 /* state table stuff */ 650 651 static __inline int 652 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 653 { 654 int diff; 655 656 if ((diff = a->proto - b->proto) != 0) 657 return (diff); 658 if ((diff = a->af - b->af) != 0) 659 return (diff); 660 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 661 return (diff); 662 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 663 return (diff); 664 if ((diff = a->port[0] - b->port[0]) != 0) 665 return (diff); 666 if ((diff = a->port[1] - b->port[1]) != 0) 667 return (diff); 668 if ((diff = a->rdomain - b->rdomain) != 0) 669 return (diff); 670 return (0); 671 } 672 673 static __inline int 674 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 675 { 676 if (a->id > b->id) 677 return (1); 678 if (a->id < b->id) 679 return (-1); 680 if (a->creatorid > b->creatorid) 681 return (1); 682 if (a->creatorid < b->creatorid) 683 return (-1); 684 685 return (0); 686 } 687 688 int 689 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 690 { 691 struct pf_state_item *si; 692 struct pf_state_key *cur; 693 struct pf_state *olds = NULL; 694 695 KASSERT(s->key[idx] == NULL); 696 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 697 /* key exists. check for same kif, if none, add to key */ 698 TAILQ_FOREACH(si, &cur->states, entry) 699 if (si->s->kif == s->kif && 700 ((si->s->key[PF_SK_WIRE]->af == sk->af && 701 si->s->direction == s->direction) || 702 (si->s->key[PF_SK_WIRE]->af != 703 si->s->key[PF_SK_STACK]->af && 704 sk->af == si->s->key[PF_SK_STACK]->af && 705 si->s->direction != s->direction))) { 706 int reuse = 0; 707 708 if (sk->proto == IPPROTO_TCP && 709 si->s->src.state >= TCPS_FIN_WAIT_2 && 710 si->s->dst.state >= TCPS_FIN_WAIT_2) 711 reuse = 1; 712 if (pf_status.debug >= LOG_NOTICE) { 713 log(LOG_NOTICE, 714 "pf: %s key attach %s on %s: ", 715 (idx == PF_SK_WIRE) ? 716 "wire" : "stack", 717 reuse ? "reuse" : "failed", 718 s->kif->pfik_name); 719 pf_print_state_parts(s, 720 (idx == PF_SK_WIRE) ? sk : NULL, 721 (idx == PF_SK_STACK) ? sk : NULL); 722 addlog(", existing: "); 723 pf_print_state_parts(si->s, 724 (idx == PF_SK_WIRE) ? sk : NULL, 725 (idx == PF_SK_STACK) ? sk : NULL); 726 addlog("\n"); 727 } 728 if (reuse) { 729 pf_set_protostate(si->s, PF_PEER_BOTH, 730 TCPS_CLOSED); 731 /* remove late or sks can go away */ 732 olds = si->s; 733 } else { 734 pool_put(&pf_state_key_pl, sk); 735 return (-1); /* collision! */ 736 } 737 } 738 pool_put(&pf_state_key_pl, sk); 739 s->key[idx] = cur; 740 } else 741 s->key[idx] = sk; 742 743 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 744 pf_state_key_detach(s, idx); 745 return (-1); 746 } 747 si->s = s; 748 749 /* list is sorted, if-bound states before floating */ 750 if (s->kif == pfi_all) 751 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 752 else 753 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 754 755 if (olds) 756 pf_remove_state(olds); 757 758 return (0); 759 } 760 761 void 762 pf_detach_state(struct pf_state *s) 763 { 764 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 765 s->key[PF_SK_WIRE] = NULL; 766 767 if (s->key[PF_SK_STACK] != NULL) 768 pf_state_key_detach(s, PF_SK_STACK); 769 770 if (s->key[PF_SK_WIRE] != NULL) 771 pf_state_key_detach(s, PF_SK_WIRE); 772 } 773 774 void 775 pf_state_key_detach(struct pf_state *s, int idx) 776 { 777 struct pf_state_item *si; 778 struct pf_state_key *sk; 779 780 if (s->key[idx] == NULL) 781 return; 782 783 si = TAILQ_FIRST(&s->key[idx]->states); 784 while (si && si->s != s) 785 si = TAILQ_NEXT(si, entry); 786 787 if (si) { 788 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 789 pool_put(&pf_state_item_pl, si); 790 } 791 792 sk = s->key[idx]; 793 s->key[idx] = NULL; 794 if (TAILQ_EMPTY(&sk->states)) { 795 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 796 sk->removed = 1; 797 pf_state_key_unlink_reverse(sk); 798 pf_state_key_unlink_inpcb(sk); 799 pf_state_key_unref(sk); 800 } 801 } 802 803 struct pf_state_key * 804 pf_alloc_state_key(int pool_flags) 805 { 806 struct pf_state_key *sk; 807 808 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 809 return (NULL); 810 TAILQ_INIT(&sk->states); 811 812 return (sk); 813 } 814 815 static __inline int 816 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 817 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 818 { 819 struct pf_state_key_cmp *key = arg; 820 #ifdef INET6 821 struct pf_addr *target; 822 823 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 824 goto copy; 825 826 switch (pd->hdr.icmp6.icmp6_type) { 827 case ND_NEIGHBOR_SOLICIT: 828 if (multi) 829 return (-1); 830 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 831 daddr = target; 832 break; 833 case ND_NEIGHBOR_ADVERT: 834 if (multi) 835 return (-1); 836 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 837 saddr = target; 838 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 839 key->addr[didx].addr32[0] = 0; 840 key->addr[didx].addr32[1] = 0; 841 key->addr[didx].addr32[2] = 0; 842 key->addr[didx].addr32[3] = 0; 843 daddr = NULL; /* overwritten */ 844 } 845 break; 846 default: 847 if (multi) { 848 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 849 key->addr[sidx].addr32[1] = 0; 850 key->addr[sidx].addr32[2] = 0; 851 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 852 saddr = NULL; /* overwritten */ 853 } 854 } 855 copy: 856 #endif /* INET6 */ 857 if (saddr) 858 pf_addrcpy(&key->addr[sidx], saddr, af); 859 if (daddr) 860 pf_addrcpy(&key->addr[didx], daddr, af); 861 862 return (0); 863 } 864 865 int 866 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 867 struct pf_state_key **sks, int rtableid) 868 { 869 /* if returning error we MUST pool_put state keys ourselves */ 870 struct pf_state_key *sk1, *sk2; 871 u_int wrdom = pd->rdomain; 872 int afto = pd->af != pd->naf; 873 874 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 875 return (ENOMEM); 876 877 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 878 pd->af, 0); 879 sk1->port[pd->sidx] = pd->osport; 880 sk1->port[pd->didx] = pd->odport; 881 sk1->proto = pd->proto; 882 sk1->af = pd->af; 883 sk1->rdomain = pd->rdomain; 884 PF_REF_INIT(sk1->refcnt); 885 sk1->removed = 0; 886 if (rtableid >= 0) 887 wrdom = rtable_l2(rtableid); 888 889 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 890 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 891 pd->nsport != pd->osport || pd->ndport != pd->odport || 892 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 893 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 894 pool_put(&pf_state_key_pl, sk1); 895 return (ENOMEM); 896 } 897 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 898 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 899 pd->naf, 0); 900 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 901 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 902 if (afto) { 903 switch (pd->proto) { 904 case IPPROTO_ICMP: 905 sk2->proto = IPPROTO_ICMPV6; 906 break; 907 case IPPROTO_ICMPV6: 908 sk2->proto = IPPROTO_ICMP; 909 break; 910 default: 911 sk2->proto = pd->proto; 912 } 913 } else 914 sk2->proto = pd->proto; 915 sk2->af = pd->naf; 916 sk2->rdomain = wrdom; 917 PF_REF_INIT(sk2->refcnt); 918 sk2->removed = 0; 919 } else 920 sk2 = sk1; 921 922 if (pd->dir == PF_IN) { 923 *skw = sk1; 924 *sks = sk2; 925 } else { 926 *sks = sk1; 927 *skw = sk2; 928 } 929 930 if (pf_status.debug >= LOG_DEBUG) { 931 log(LOG_DEBUG, "pf: key setup: "); 932 pf_print_state_parts(NULL, *skw, *sks); 933 addlog("\n"); 934 } 935 936 return (0); 937 } 938 939 int 940 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, 941 struct pf_state_key **sks, struct pf_state *s) 942 { 943 PF_ASSERT_LOCKED(); 944 945 s->kif = kif; 946 if (*skw == *sks) { 947 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) 948 return (-1); 949 *skw = *sks = s->key[PF_SK_WIRE]; 950 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 951 } else { 952 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 953 pool_put(&pf_state_key_pl, *sks); 954 return (-1); 955 } 956 *skw = s->key[PF_SK_WIRE]; 957 if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { 958 pf_state_key_detach(s, PF_SK_WIRE); 959 return (-1); 960 } 961 *sks = s->key[PF_SK_STACK]; 962 } 963 964 if (s->id == 0 && s->creatorid == 0) { 965 s->id = htobe64(pf_status.stateid++); 966 s->creatorid = pf_status.hostid; 967 } 968 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 969 if (pf_status.debug >= LOG_NOTICE) { 970 log(LOG_NOTICE, "pf: state insert failed: " 971 "id: %016llx creatorid: %08x", 972 betoh64(s->id), ntohl(s->creatorid)); 973 addlog("\n"); 974 } 975 pf_detach_state(s); 976 return (-1); 977 } 978 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 979 pf_status.fcounters[FCNT_STATE_INSERT]++; 980 pf_status.states++; 981 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 982 #if NPFSYNC > 0 983 pfsync_insert_state(s); 984 #endif /* NPFSYNC > 0 */ 985 return (0); 986 } 987 988 struct pf_state * 989 pf_find_state_byid(struct pf_state_cmp *key) 990 { 991 pf_status.fcounters[FCNT_STATE_SEARCH]++; 992 993 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 994 } 995 996 int 997 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 998 struct pfi_kif *kif, u_int dir) 999 { 1000 /* a (from hdr) and b (new) must be exact opposites of each other */ 1001 if (a->af == b->af && a->proto == b->proto && 1002 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1003 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1004 a->port[0] == b->port[1] && 1005 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1006 return (0); 1007 else { 1008 /* mismatch. must not happen. */ 1009 if (pf_status.debug >= LOG_ERR) { 1010 log(LOG_ERR, 1011 "pf: state key linking mismatch! dir=%s, " 1012 "if=%s, stored af=%u, a0: ", 1013 dir == PF_OUT ? "OUT" : "IN", 1014 kif->pfik_name, a->af); 1015 pf_print_host(&a->addr[0], a->port[0], a->af); 1016 addlog(", a1: "); 1017 pf_print_host(&a->addr[1], a->port[1], a->af); 1018 addlog(", proto=%u", a->proto); 1019 addlog(", found af=%u, a0: ", b->af); 1020 pf_print_host(&b->addr[0], b->port[0], b->af); 1021 addlog(", a1: "); 1022 pf_print_host(&b->addr[1], b->port[1], b->af); 1023 addlog(", proto=%u", b->proto); 1024 addlog("\n"); 1025 } 1026 return (-1); 1027 } 1028 } 1029 1030 int 1031 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1032 struct pf_state **state) 1033 { 1034 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1035 struct pf_state_item *si; 1036 struct pf_state *s = NULL; 1037 1038 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1039 if (pf_status.debug >= LOG_DEBUG) { 1040 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1041 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1042 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1043 addlog("\n"); 1044 } 1045 1046 inp_sk = NULL; 1047 pkt_sk = NULL; 1048 sk = NULL; 1049 if (pd->dir == PF_OUT) { 1050 /* first if block deals with outbound forwarded packet */ 1051 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1052 1053 if (!pf_state_key_isvalid(pkt_sk)) { 1054 pf_mbuf_unlink_state_key(pd->m); 1055 pkt_sk = NULL; 1056 } 1057 1058 if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse)) 1059 sk = pkt_sk->reverse; 1060 1061 if (pkt_sk == NULL) { 1062 /* here we deal with local outbound packet */ 1063 if (pd->m->m_pkthdr.pf.inp != NULL) { 1064 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk; 1065 if (pf_state_key_isvalid(inp_sk)) 1066 sk = inp_sk; 1067 else 1068 pf_inpcb_unlink_state_key( 1069 pd->m->m_pkthdr.pf.inp); 1070 } 1071 } 1072 } 1073 1074 if (sk == NULL) { 1075 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1076 (struct pf_state_key *)key)) == NULL) 1077 return (PF_DROP); 1078 if (pd->dir == PF_OUT && pkt_sk && 1079 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1080 pf_state_key_link_reverse(sk, pkt_sk); 1081 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp && 1082 !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->inp) 1083 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1084 } 1085 1086 /* remove firewall data from outbound packet */ 1087 if (pd->dir == PF_OUT) 1088 pf_pkt_addr_changed(pd->m); 1089 1090 /* list is sorted, if-bound states before floating ones */ 1091 TAILQ_FOREACH(si, &sk->states, entry) 1092 if ((si->s->kif == pfi_all || si->s->kif == pd->kif) && 1093 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1094 && sk == (pd->dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1095 si->s->key[PF_SK_STACK])) || 1096 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1097 && pd->dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1098 sk == si->s->key[PF_SK_WIRE])))) { 1099 s = si->s; 1100 break; 1101 } 1102 1103 if (s == NULL || s->timeout == PFTM_PURGE) 1104 return (PF_DROP); 1105 1106 if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) { 1107 pf_add_threshold(&s->rule.ptr->pktrate); 1108 if (pf_check_threshold(&s->rule.ptr->pktrate)) 1109 return (PF_DROP); 1110 } 1111 1112 *state = s; 1113 if (pd->dir == PF_OUT && s->rt_kif != NULL && s->rt_kif != pd->kif && 1114 ((s->rule.ptr->rt == PF_ROUTETO && 1115 s->rule.ptr->direction == PF_OUT) || 1116 (s->rule.ptr->rt == PF_REPLYTO && 1117 s->rule.ptr->direction == PF_IN))) 1118 return (PF_PASS); 1119 1120 return (PF_MATCH); 1121 } 1122 1123 struct pf_state * 1124 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1125 { 1126 struct pf_state_key *sk; 1127 struct pf_state_item *si, *ret = NULL; 1128 1129 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1130 1131 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1132 1133 if (sk != NULL) { 1134 TAILQ_FOREACH(si, &sk->states, entry) 1135 if (dir == PF_INOUT || 1136 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1137 si->s->key[PF_SK_STACK]))) { 1138 if (more == NULL) 1139 return (si->s); 1140 1141 if (ret) 1142 (*more)++; 1143 else 1144 ret = si; 1145 } 1146 } 1147 return (ret ? ret->s : NULL); 1148 } 1149 1150 void 1151 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1152 { 1153 int32_t expire; 1154 1155 memset(sp, 0, sizeof(struct pfsync_state)); 1156 1157 /* copy from state key */ 1158 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1159 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1160 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1161 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1162 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1163 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1164 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1165 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1166 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1167 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1168 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1169 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1170 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1171 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1172 sp->proto = st->key[PF_SK_WIRE]->proto; 1173 sp->af = st->key[PF_SK_WIRE]->af; 1174 1175 /* copy from state */ 1176 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1177 memcpy(&sp->rt_addr, &st->rt_addr, sizeof(sp->rt_addr)); 1178 sp->creation = htonl(time_uptime - st->creation); 1179 expire = pf_state_expires(st); 1180 if (expire <= time_uptime) 1181 sp->expire = htonl(0); 1182 else 1183 sp->expire = htonl(expire - time_uptime); 1184 1185 sp->direction = st->direction; 1186 #if NPFLOG > 0 1187 sp->log = st->log; 1188 #endif /* NPFLOG > 0 */ 1189 sp->timeout = st->timeout; 1190 sp->state_flags = htons(st->state_flags); 1191 if (!SLIST_EMPTY(&st->src_nodes)) 1192 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1193 1194 sp->id = st->id; 1195 sp->creatorid = st->creatorid; 1196 pf_state_peer_hton(&st->src, &sp->src); 1197 pf_state_peer_hton(&st->dst, &sp->dst); 1198 1199 if (st->rule.ptr == NULL) 1200 sp->rule = htonl(-1); 1201 else 1202 sp->rule = htonl(st->rule.ptr->nr); 1203 if (st->anchor.ptr == NULL) 1204 sp->anchor = htonl(-1); 1205 else 1206 sp->anchor = htonl(st->anchor.ptr->nr); 1207 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1208 1209 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1210 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1211 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1212 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1213 1214 sp->max_mss = htons(st->max_mss); 1215 sp->min_ttl = st->min_ttl; 1216 sp->set_tos = st->set_tos; 1217 sp->set_prio[0] = st->set_prio[0]; 1218 sp->set_prio[1] = st->set_prio[1]; 1219 } 1220 1221 /* END state table stuff */ 1222 1223 void 1224 pf_purge_expired_rules(void) 1225 { 1226 struct pf_rule *r; 1227 1228 PF_ASSERT_LOCKED(); 1229 1230 if (SLIST_EMPTY(&pf_rule_gcl)) 1231 return; 1232 1233 while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { 1234 SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); 1235 KASSERT(r->rule_flag & PFRULE_EXPIRED); 1236 pf_purge_rule(r); 1237 } 1238 } 1239 1240 void 1241 pf_purge_timeout(void *unused) 1242 { 1243 task_add(net_tq(0), &pf_purge_task); 1244 } 1245 1246 void 1247 pf_purge(void *xnloops) 1248 { 1249 int *nloops = xnloops; 1250 1251 KERNEL_LOCK(); 1252 NET_LOCK(); 1253 1254 /* 1255 * process a fraction of the state table every second 1256 * Note: 1257 * we no longer need PF_LOCK() here, because 1258 * pf_purge_expired_states() uses pf_state_lock to maintain 1259 * consistency. 1260 */ 1261 pf_purge_expired_states(1 + (pf_status.states 1262 / pf_default_rule.timeout[PFTM_INTERVAL])); 1263 1264 PF_LOCK(); 1265 /* purge other expired types every PFTM_INTERVAL seconds */ 1266 if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1267 pf_purge_expired_src_nodes(); 1268 pf_purge_expired_rules(); 1269 } 1270 PF_UNLOCK(); 1271 1272 /* 1273 * Fragments don't require PF_LOCK(), they use their own lock. 1274 */ 1275 if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1276 pf_purge_expired_fragments(); 1277 *nloops = 0; 1278 } 1279 NET_UNLOCK(); 1280 KERNEL_UNLOCK(); 1281 1282 timeout_add_sec(&pf_purge_to, 1); 1283 } 1284 1285 int32_t 1286 pf_state_expires(const struct pf_state *state) 1287 { 1288 u_int32_t timeout; 1289 u_int32_t start; 1290 u_int32_t end; 1291 u_int32_t states; 1292 1293 /* handle all PFTM_* > PFTM_MAX here */ 1294 if (state->timeout == PFTM_PURGE) 1295 return (0); 1296 1297 KASSERT(state->timeout != PFTM_UNLINKED); 1298 KASSERT(state->timeout < PFTM_MAX); 1299 1300 timeout = state->rule.ptr->timeout[state->timeout]; 1301 if (!timeout) 1302 timeout = pf_default_rule.timeout[state->timeout]; 1303 1304 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1305 if (start) { 1306 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1307 states = state->rule.ptr->states_cur; 1308 } else { 1309 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1310 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1311 states = pf_status.states; 1312 } 1313 if (end && states > start && start < end) { 1314 if (states >= end) 1315 return (0); 1316 1317 timeout = (u_int64_t)timeout * (end - states) / (end - start); 1318 } 1319 1320 return (state->expire + timeout); 1321 } 1322 1323 void 1324 pf_purge_expired_src_nodes(void) 1325 { 1326 struct pf_src_node *cur, *next; 1327 1328 PF_ASSERT_LOCKED(); 1329 1330 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1331 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1332 1333 if (cur->states == 0 && cur->expire <= time_uptime) { 1334 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1335 pf_remove_src_node(cur); 1336 } 1337 } 1338 } 1339 1340 void 1341 pf_src_tree_remove_state(struct pf_state *s) 1342 { 1343 u_int32_t timeout; 1344 struct pf_sn_item *sni; 1345 1346 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1347 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1348 if (s->src.tcp_est) 1349 --sni->sn->conn; 1350 if (--sni->sn->states == 0) { 1351 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1352 if (!timeout) 1353 timeout = 1354 pf_default_rule.timeout[PFTM_SRC_NODE]; 1355 sni->sn->expire = time_uptime + timeout; 1356 } 1357 pool_put(&pf_sn_item_pl, sni); 1358 } 1359 } 1360 1361 void 1362 pf_remove_state(struct pf_state *cur) 1363 { 1364 PF_ASSERT_LOCKED(); 1365 1366 /* handle load balancing related tasks */ 1367 pf_postprocess_addr(cur); 1368 1369 if (cur->src.state == PF_TCPS_PROXY_DST) { 1370 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1371 &cur->key[PF_SK_WIRE]->addr[1], 1372 &cur->key[PF_SK_WIRE]->addr[0], 1373 cur->key[PF_SK_WIRE]->port[1], 1374 cur->key[PF_SK_WIRE]->port[0], 1375 cur->src.seqhi, cur->src.seqlo + 1, 1376 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1377 cur->key[PF_SK_WIRE]->rdomain); 1378 } 1379 if (cur->key[PF_SK_STACK]->proto == IPPROTO_TCP) 1380 pf_set_protostate(cur, PF_PEER_BOTH, TCPS_CLOSED); 1381 1382 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1383 #if NPFLOW > 0 1384 if (cur->state_flags & PFSTATE_PFLOW) 1385 export_pflow(cur); 1386 #endif /* NPFLOW > 0 */ 1387 #if NPFSYNC > 0 1388 pfsync_delete_state(cur); 1389 #endif /* NPFSYNC > 0 */ 1390 cur->timeout = PFTM_UNLINKED; 1391 pf_src_tree_remove_state(cur); 1392 pf_detach_state(cur); 1393 } 1394 1395 void 1396 pf_remove_divert_state(struct pf_state_key *sk) 1397 { 1398 struct pf_state_item *si; 1399 1400 TAILQ_FOREACH(si, &sk->states, entry) { 1401 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 1402 (si->s->rule.ptr->divert.type == PF_DIVERT_TO || 1403 si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 1404 pf_remove_state(si->s); 1405 break; 1406 } 1407 } 1408 } 1409 1410 void 1411 pf_free_state(struct pf_state *cur) 1412 { 1413 struct pf_rule_item *ri; 1414 1415 PF_ASSERT_LOCKED(); 1416 1417 #if NPFSYNC > 0 1418 if (pfsync_state_in_use(cur)) 1419 return; 1420 #endif /* NPFSYNC > 0 */ 1421 KASSERT(cur->timeout == PFTM_UNLINKED); 1422 if (--cur->rule.ptr->states_cur == 0 && 1423 cur->rule.ptr->src_nodes == 0) 1424 pf_rm_rule(NULL, cur->rule.ptr); 1425 if (cur->anchor.ptr != NULL) 1426 if (--cur->anchor.ptr->states_cur == 0) 1427 pf_rm_rule(NULL, cur->anchor.ptr); 1428 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1429 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1430 if (--ri->r->states_cur == 0 && 1431 ri->r->src_nodes == 0) 1432 pf_rm_rule(NULL, ri->r); 1433 pool_put(&pf_rule_item_pl, ri); 1434 } 1435 pf_normalize_tcp_cleanup(cur); 1436 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1437 TAILQ_REMOVE(&state_list, cur, entry_list); 1438 if (cur->tag) 1439 pf_tag_unref(cur->tag); 1440 pf_state_unref(cur); 1441 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1442 pf_status.states--; 1443 } 1444 1445 void 1446 pf_purge_expired_states(u_int32_t maxcheck) 1447 { 1448 static struct pf_state *cur = NULL; 1449 struct pf_state *next; 1450 SLIST_HEAD(pf_state_gcl, pf_state) gcl; 1451 1452 PF_ASSERT_UNLOCKED(); 1453 SLIST_INIT(&gcl); 1454 1455 PF_STATE_ENTER_READ(); 1456 while (maxcheck--) { 1457 /* wrap to start of list when we hit the end */ 1458 if (cur == NULL) { 1459 cur = pf_state_ref(TAILQ_FIRST(&state_list)); 1460 if (cur == NULL) 1461 break; /* list empty */ 1462 } 1463 1464 /* get next state, as cur may get deleted */ 1465 next = TAILQ_NEXT(cur, entry_list); 1466 1467 if ((cur->timeout == PFTM_UNLINKED) || 1468 (pf_state_expires(cur) <= time_uptime)) 1469 SLIST_INSERT_HEAD(&gcl, cur, gc_list); 1470 else 1471 pf_state_unref(cur); 1472 1473 cur = pf_state_ref(next); 1474 } 1475 PF_STATE_EXIT_READ(); 1476 1477 PF_LOCK(); 1478 PF_STATE_ENTER_WRITE(); 1479 while ((next = SLIST_FIRST(&gcl)) != NULL) { 1480 SLIST_REMOVE_HEAD(&gcl, gc_list); 1481 if (next->timeout == PFTM_UNLINKED) 1482 pf_free_state(next); 1483 else if (pf_state_expires(next) <= time_uptime) { 1484 pf_remove_state(next); 1485 pf_free_state(next); 1486 } 1487 1488 pf_state_unref(next); 1489 } 1490 PF_STATE_EXIT_WRITE(); 1491 PF_UNLOCK(); 1492 } 1493 1494 int 1495 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1496 { 1497 if (aw->type != PF_ADDR_TABLE) 1498 return (0); 1499 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1500 return (1); 1501 return (0); 1502 } 1503 1504 void 1505 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1506 { 1507 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1508 return; 1509 pfr_detach_table(aw->p.tbl); 1510 aw->p.tbl = NULL; 1511 } 1512 1513 void 1514 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1515 { 1516 struct pfr_ktable *kt = aw->p.tbl; 1517 1518 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1519 return; 1520 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1521 kt = kt->pfrkt_root; 1522 aw->p.tbl = NULL; 1523 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1524 kt->pfrkt_cnt : -1; 1525 } 1526 1527 void 1528 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1529 { 1530 switch (af) { 1531 case AF_INET: { 1532 u_int32_t a = ntohl(addr->addr32[0]); 1533 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1534 (a>>8)&255, a&255); 1535 if (p) { 1536 p = ntohs(p); 1537 addlog(":%u", p); 1538 } 1539 break; 1540 } 1541 #ifdef INET6 1542 case AF_INET6: { 1543 u_int16_t b; 1544 u_int8_t i, curstart, curend, maxstart, maxend; 1545 curstart = curend = maxstart = maxend = 255; 1546 for (i = 0; i < 8; i++) { 1547 if (!addr->addr16[i]) { 1548 if (curstart == 255) 1549 curstart = i; 1550 curend = i; 1551 } else { 1552 if ((curend - curstart) > 1553 (maxend - maxstart)) { 1554 maxstart = curstart; 1555 maxend = curend; 1556 } 1557 curstart = curend = 255; 1558 } 1559 } 1560 if ((curend - curstart) > 1561 (maxend - maxstart)) { 1562 maxstart = curstart; 1563 maxend = curend; 1564 } 1565 for (i = 0; i < 8; i++) { 1566 if (i >= maxstart && i <= maxend) { 1567 if (i == 0) 1568 addlog(":"); 1569 if (i == maxend) 1570 addlog(":"); 1571 } else { 1572 b = ntohs(addr->addr16[i]); 1573 addlog("%x", b); 1574 if (i < 7) 1575 addlog(":"); 1576 } 1577 } 1578 if (p) { 1579 p = ntohs(p); 1580 addlog("[%u]", p); 1581 } 1582 break; 1583 } 1584 #endif /* INET6 */ 1585 } 1586 } 1587 1588 void 1589 pf_print_state(struct pf_state *s) 1590 { 1591 pf_print_state_parts(s, NULL, NULL); 1592 } 1593 1594 void 1595 pf_print_state_parts(struct pf_state *s, 1596 struct pf_state_key *skwp, struct pf_state_key *sksp) 1597 { 1598 struct pf_state_key *skw, *sks; 1599 u_int8_t proto, dir; 1600 1601 /* Do our best to fill these, but they're skipped if NULL */ 1602 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1603 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1604 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1605 dir = s ? s->direction : 0; 1606 1607 switch (proto) { 1608 case IPPROTO_IPV4: 1609 addlog("IPv4"); 1610 break; 1611 case IPPROTO_IPV6: 1612 addlog("IPv6"); 1613 break; 1614 case IPPROTO_TCP: 1615 addlog("TCP"); 1616 break; 1617 case IPPROTO_UDP: 1618 addlog("UDP"); 1619 break; 1620 case IPPROTO_ICMP: 1621 addlog("ICMP"); 1622 break; 1623 case IPPROTO_ICMPV6: 1624 addlog("ICMPv6"); 1625 break; 1626 default: 1627 addlog("%u", proto); 1628 break; 1629 } 1630 switch (dir) { 1631 case PF_IN: 1632 addlog(" in"); 1633 break; 1634 case PF_OUT: 1635 addlog(" out"); 1636 break; 1637 } 1638 if (skw) { 1639 addlog(" wire: (%d) ", skw->rdomain); 1640 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1641 addlog(" "); 1642 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1643 } 1644 if (sks) { 1645 addlog(" stack: (%d) ", sks->rdomain); 1646 if (sks != skw) { 1647 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1648 addlog(" "); 1649 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1650 } else 1651 addlog("-"); 1652 } 1653 if (s) { 1654 if (proto == IPPROTO_TCP) { 1655 addlog(" [lo=%u high=%u win=%u modulator=%u", 1656 s->src.seqlo, s->src.seqhi, 1657 s->src.max_win, s->src.seqdiff); 1658 if (s->src.wscale && s->dst.wscale) 1659 addlog(" wscale=%u", 1660 s->src.wscale & PF_WSCALE_MASK); 1661 addlog("]"); 1662 addlog(" [lo=%u high=%u win=%u modulator=%u", 1663 s->dst.seqlo, s->dst.seqhi, 1664 s->dst.max_win, s->dst.seqdiff); 1665 if (s->src.wscale && s->dst.wscale) 1666 addlog(" wscale=%u", 1667 s->dst.wscale & PF_WSCALE_MASK); 1668 addlog("]"); 1669 } 1670 addlog(" %u:%u", s->src.state, s->dst.state); 1671 if (s->rule.ptr) 1672 addlog(" @%d", s->rule.ptr->nr); 1673 } 1674 } 1675 1676 void 1677 pf_print_flags(u_int8_t f) 1678 { 1679 if (f) 1680 addlog(" "); 1681 if (f & TH_FIN) 1682 addlog("F"); 1683 if (f & TH_SYN) 1684 addlog("S"); 1685 if (f & TH_RST) 1686 addlog("R"); 1687 if (f & TH_PUSH) 1688 addlog("P"); 1689 if (f & TH_ACK) 1690 addlog("A"); 1691 if (f & TH_URG) 1692 addlog("U"); 1693 if (f & TH_ECE) 1694 addlog("E"); 1695 if (f & TH_CWR) 1696 addlog("W"); 1697 } 1698 1699 #define PF_SET_SKIP_STEPS(i) \ 1700 do { \ 1701 while (head[i] != cur) { \ 1702 head[i]->skip[i].ptr = cur; \ 1703 head[i] = TAILQ_NEXT(head[i], entries); \ 1704 } \ 1705 } while (0) 1706 1707 void 1708 pf_calc_skip_steps(struct pf_rulequeue *rules) 1709 { 1710 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1711 int i; 1712 1713 cur = TAILQ_FIRST(rules); 1714 prev = cur; 1715 for (i = 0; i < PF_SKIP_COUNT; ++i) 1716 head[i] = cur; 1717 while (cur != NULL) { 1718 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1719 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1720 if (cur->direction != prev->direction) 1721 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1722 if (cur->onrdomain != prev->onrdomain || 1723 cur->ifnot != prev->ifnot) 1724 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1725 if (cur->af != prev->af) 1726 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1727 if (cur->proto != prev->proto) 1728 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1729 if (cur->src.neg != prev->src.neg || 1730 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1731 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1732 if (cur->dst.neg != prev->dst.neg || 1733 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1734 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1735 if (cur->src.port[0] != prev->src.port[0] || 1736 cur->src.port[1] != prev->src.port[1] || 1737 cur->src.port_op != prev->src.port_op) 1738 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1739 if (cur->dst.port[0] != prev->dst.port[0] || 1740 cur->dst.port[1] != prev->dst.port[1] || 1741 cur->dst.port_op != prev->dst.port_op) 1742 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1743 1744 prev = cur; 1745 cur = TAILQ_NEXT(cur, entries); 1746 } 1747 for (i = 0; i < PF_SKIP_COUNT; ++i) 1748 PF_SET_SKIP_STEPS(i); 1749 } 1750 1751 int 1752 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1753 { 1754 if (aw1->type != aw2->type) 1755 return (1); 1756 switch (aw1->type) { 1757 case PF_ADDR_ADDRMASK: 1758 case PF_ADDR_RANGE: 1759 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1760 return (1); 1761 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1762 return (1); 1763 return (0); 1764 case PF_ADDR_DYNIFTL: 1765 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1766 case PF_ADDR_NONE: 1767 case PF_ADDR_NOROUTE: 1768 case PF_ADDR_URPFFAILED: 1769 return (0); 1770 case PF_ADDR_TABLE: 1771 return (aw1->p.tbl != aw2->p.tbl); 1772 case PF_ADDR_RTLABEL: 1773 return (aw1->v.rtlabel != aw2->v.rtlabel); 1774 default: 1775 addlog("invalid address type: %d\n", aw1->type); 1776 return (1); 1777 } 1778 } 1779 1780 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 1781 * emulate at most one ones-complement subtraction. This thereby limits net 1782 * carries/borrows to at most one, eliminating a reduction step and saving one 1783 * each of +, >>, & and ~. 1784 * 1785 * def. x mod y = x - (x//y)*y for integer x,y 1786 * def. sum = x mod 2^16 1787 * def. accumulator = (x >> 16) mod 2^16 1788 * 1789 * The trick works as follows: subtracting exactly one u_int16_t from the 1790 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 1791 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 1792 * ones-complement borrow: 1793 * 1794 * (sum + accumulator) mod 2^16 1795 * = { assume underflow: accumulator := 2^16 - 1 } 1796 * (sum + 2^16 - 1) mod 2^16 1797 * = { mod } 1798 * (sum - 1) mod 2^16 1799 * 1800 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 1801 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 1802 * to zero as that requires subtraction of at least 2^16, which exceeds a 1803 * single u_int16_t's range. 1804 * 1805 * We use the following theorem to derive the implementation: 1806 * 1807 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 1808 * proof. 1809 * (x + (y mod z)) mod z 1810 * = { def mod } 1811 * (x + y - (y//z)*z) mod z 1812 * = { (a + b*c) mod c = a mod c } 1813 * (x + y) mod z [end of proof] 1814 * 1815 * ... and thereby obtain: 1816 * 1817 * (sum + accumulator) mod 2^16 1818 * = { def. accumulator, def. sum } 1819 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 1820 * = { (0), twice } 1821 * (x + (x >> 16)) mod 2^16 1822 * = { x mod 2^n = x & (2^n - 1) } 1823 * (x + (x >> 16)) & 0xffff 1824 * 1825 * Note: this serves also as a reduction step for at most one add (as the 1826 * trailing mod 2^16 prevents further reductions by destroying carries). 1827 */ 1828 static __inline void 1829 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 1830 u_int8_t proto) 1831 { 1832 u_int32_t x; 1833 const int udp = proto == IPPROTO_UDP; 1834 1835 x = *cksum + was - now; 1836 x = (x + (x >> 16)) & 0xffff; 1837 1838 /* optimise: eliminate a branch when not udp */ 1839 if (udp && *cksum == 0x0000) 1840 return; 1841 if (udp && x == 0x0000) 1842 x = 0xffff; 1843 1844 *cksum = (u_int16_t)(x); 1845 } 1846 1847 #ifdef INET6 1848 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 1849 static __inline void 1850 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 1851 { 1852 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 1853 } 1854 1855 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 1856 static __inline void 1857 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 1858 { 1859 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 1860 } 1861 #endif /* INET6 */ 1862 1863 /* pre: *a is 16-bit aligned within its packet 1864 * 1865 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 1866 * machine by conserving ones-complement's otherwise discarded carries in the 1867 * upper bits of x. These accumulated carries when added to the lower 16-bits 1868 * over at least zero 'reduction' steps then complete the ones-complement sum. 1869 * 1870 * def. sum = x mod 2^16 1871 * def. accumulator = (x >> 16) 1872 * 1873 * At most two reduction steps 1874 * 1875 * x := sum + accumulator 1876 * = { def sum, def accumulator } 1877 * x := x mod 2^16 + (x >> 16) 1878 * = { x mod 2^n = x & (2^n - 1) } 1879 * x := (x & 0xffff) + (x >> 16) 1880 * 1881 * are necessary to incorporate the accumulated carries (at most one per add) 1882 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 1883 * 1884 * The function is also invariant over the endian of the host. Why? 1885 * 1886 * Define the unary transpose operator ~ on a bitstring in python slice 1887 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 1888 * 1889 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 1890 * 1891 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 1892 * 1893 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 1894 * 'half-adds'. Under ones-complement addition, each half-add carries to the 1895 * other, so the sum of each half-add is unaffected by their relative 1896 * order. Therefore: 1897 * 1898 * ~m +_1 ~n 1899 * = { half-adds invariant under transposition } 1900 * ~s 1901 * = { substitute } 1902 * ~(m +_1 n) [end of proof] 1903 * 1904 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 1905 * with the converse endian does not alter the result. 1906 * 1907 * proof. 1908 * { converse machine endian: load/store transposes, P := 8 } 1909 * ~(~m +_1 ~n) 1910 * = { ~ over +_1 } 1911 * ~~m +_1 ~~n 1912 * = { ~ is an involution } 1913 * m +_1 n [end of proof] 1914 * 1915 */ 1916 #define NEG(x) ((u_int16_t)~(x)) 1917 void 1918 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 1919 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 1920 { 1921 u_int32_t x; 1922 const u_int16_t *n = an->addr16; 1923 const u_int16_t *o = a->addr16; 1924 const int udp = proto == IPPROTO_UDP; 1925 1926 switch (af) { 1927 case AF_INET: 1928 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 1929 break; 1930 #ifdef INET6 1931 case AF_INET6: 1932 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 1933 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 1934 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 1935 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 1936 break; 1937 #endif /* INET6 */ 1938 default: 1939 unhandled_af(af); 1940 } 1941 1942 x = (x & 0xffff) + (x >> 16); 1943 x = (x & 0xffff) + (x >> 16); 1944 1945 /* optimise: eliminate a branch when not udp */ 1946 if (udp && *cksum == 0x0000) 1947 return; 1948 if (udp && x == 0x0000) 1949 x = 0xffff; 1950 1951 *cksum = (u_int16_t)(x); 1952 } 1953 1954 int 1955 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 1956 { 1957 int rewrite = 0; 1958 1959 if (*f != v) { 1960 u_int16_t old = htons(hi ? (*f << 8) : *f); 1961 u_int16_t new = htons(hi ? ( v << 8) : v); 1962 1963 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 1964 *f = v; 1965 rewrite = 1; 1966 } 1967 1968 return (rewrite); 1969 } 1970 1971 /* pre: *f is 16-bit aligned within its packet */ 1972 int 1973 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 1974 { 1975 int rewrite = 0; 1976 1977 if (*f != v) { 1978 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 1979 *f = v; 1980 rewrite = 1; 1981 } 1982 1983 return (rewrite); 1984 } 1985 1986 int 1987 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 1988 { 1989 int rewrite = 0; 1990 u_int8_t *fb = (u_int8_t*)f; 1991 u_int8_t *vb = (u_int8_t*)&v; 1992 1993 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 1994 return (pf_patch_16(pd, f, v)); /* optimise */ 1995 } 1996 1997 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 1998 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 1999 2000 return (rewrite); 2001 } 2002 2003 /* pre: *f is 16-bit aligned within its packet */ 2004 /* pre: pd->proto != IPPROTO_UDP */ 2005 int 2006 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2007 { 2008 int rewrite = 0; 2009 u_int16_t *pc = pd->pcksum; 2010 u_int8_t proto = pd->proto; 2011 2012 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2013 if (proto == IPPROTO_UDP) 2014 panic("%s: udp", __func__); 2015 2016 /* optimise: skip *f != v guard; true for all use-cases */ 2017 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2018 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2019 2020 *f = v; 2021 rewrite = 1; 2022 2023 return (rewrite); 2024 } 2025 2026 int 2027 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2028 { 2029 int rewrite = 0; 2030 u_int8_t *fb = (u_int8_t*)f; 2031 u_int8_t *vb = (u_int8_t*)&v; 2032 2033 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2034 return (pf_patch_32(pd, f, v)); /* optimise */ 2035 } 2036 2037 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2038 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2039 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2040 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2041 2042 return (rewrite); 2043 } 2044 2045 int 2046 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2047 u_int16_t *virtual_id, u_int16_t *virtual_type) 2048 { 2049 /* 2050 * ICMP types marked with PF_OUT are typically responses to 2051 * PF_IN, and will match states in the opposite direction. 2052 * PF_IN ICMP types need to match a state with that type. 2053 */ 2054 *icmp_dir = PF_OUT; 2055 2056 /* Queries (and responses) */ 2057 switch (pd->af) { 2058 case AF_INET: 2059 switch (type) { 2060 case ICMP_ECHO: 2061 *icmp_dir = PF_IN; 2062 /* FALLTHROUGH */ 2063 case ICMP_ECHOREPLY: 2064 *virtual_type = ICMP_ECHO; 2065 *virtual_id = pd->hdr.icmp.icmp_id; 2066 break; 2067 2068 case ICMP_TSTAMP: 2069 *icmp_dir = PF_IN; 2070 /* FALLTHROUGH */ 2071 case ICMP_TSTAMPREPLY: 2072 *virtual_type = ICMP_TSTAMP; 2073 *virtual_id = pd->hdr.icmp.icmp_id; 2074 break; 2075 2076 case ICMP_IREQ: 2077 *icmp_dir = PF_IN; 2078 /* FALLTHROUGH */ 2079 case ICMP_IREQREPLY: 2080 *virtual_type = ICMP_IREQ; 2081 *virtual_id = pd->hdr.icmp.icmp_id; 2082 break; 2083 2084 case ICMP_MASKREQ: 2085 *icmp_dir = PF_IN; 2086 /* FALLTHROUGH */ 2087 case ICMP_MASKREPLY: 2088 *virtual_type = ICMP_MASKREQ; 2089 *virtual_id = pd->hdr.icmp.icmp_id; 2090 break; 2091 2092 case ICMP_IPV6_WHEREAREYOU: 2093 *icmp_dir = PF_IN; 2094 /* FALLTHROUGH */ 2095 case ICMP_IPV6_IAMHERE: 2096 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2097 *virtual_id = 0; /* Nothing sane to match on! */ 2098 break; 2099 2100 case ICMP_MOBILE_REGREQUEST: 2101 *icmp_dir = PF_IN; 2102 /* FALLTHROUGH */ 2103 case ICMP_MOBILE_REGREPLY: 2104 *virtual_type = ICMP_MOBILE_REGREQUEST; 2105 *virtual_id = 0; /* Nothing sane to match on! */ 2106 break; 2107 2108 case ICMP_ROUTERSOLICIT: 2109 *icmp_dir = PF_IN; 2110 /* FALLTHROUGH */ 2111 case ICMP_ROUTERADVERT: 2112 *virtual_type = ICMP_ROUTERSOLICIT; 2113 *virtual_id = 0; /* Nothing sane to match on! */ 2114 break; 2115 2116 /* These ICMP types map to other connections */ 2117 case ICMP_UNREACH: 2118 case ICMP_SOURCEQUENCH: 2119 case ICMP_REDIRECT: 2120 case ICMP_TIMXCEED: 2121 case ICMP_PARAMPROB: 2122 /* These will not be used, but set them anyway */ 2123 *icmp_dir = PF_IN; 2124 *virtual_type = htons(type); 2125 *virtual_id = 0; 2126 return (1); /* These types match to another state */ 2127 2128 /* 2129 * All remaining ICMP types get their own states, 2130 * and will only match in one direction. 2131 */ 2132 default: 2133 *icmp_dir = PF_IN; 2134 *virtual_type = type; 2135 *virtual_id = 0; 2136 break; 2137 } 2138 break; 2139 #ifdef INET6 2140 case AF_INET6: 2141 switch (type) { 2142 case ICMP6_ECHO_REQUEST: 2143 *icmp_dir = PF_IN; 2144 /* FALLTHROUGH */ 2145 case ICMP6_ECHO_REPLY: 2146 *virtual_type = ICMP6_ECHO_REQUEST; 2147 *virtual_id = pd->hdr.icmp6.icmp6_id; 2148 break; 2149 2150 case MLD_LISTENER_QUERY: 2151 case MLD_LISTENER_REPORT: { 2152 struct mld_hdr *mld = &pd->hdr.mld; 2153 u_int32_t h; 2154 2155 /* 2156 * Listener Report can be sent by clients 2157 * without an associated Listener Query. 2158 * In addition to that, when Report is sent as a 2159 * reply to a Query its source and destination 2160 * address are different. 2161 */ 2162 *icmp_dir = PF_IN; 2163 *virtual_type = MLD_LISTENER_QUERY; 2164 /* generate fake id for these messages */ 2165 h = mld->mld_addr.s6_addr32[0] ^ 2166 mld->mld_addr.s6_addr32[1] ^ 2167 mld->mld_addr.s6_addr32[2] ^ 2168 mld->mld_addr.s6_addr32[3]; 2169 *virtual_id = (h >> 16) ^ (h & 0xffff); 2170 break; 2171 } 2172 2173 /* 2174 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2175 * ICMP6_WRU 2176 */ 2177 case ICMP6_WRUREQUEST: 2178 *icmp_dir = PF_IN; 2179 /* FALLTHROUGH */ 2180 case ICMP6_WRUREPLY: 2181 *virtual_type = ICMP6_WRUREQUEST; 2182 *virtual_id = 0; /* Nothing sane to match on! */ 2183 break; 2184 2185 case MLD_MTRACE: 2186 *icmp_dir = PF_IN; 2187 /* FALLTHROUGH */ 2188 case MLD_MTRACE_RESP: 2189 *virtual_type = MLD_MTRACE; 2190 *virtual_id = 0; /* Nothing sane to match on! */ 2191 break; 2192 2193 case ND_NEIGHBOR_SOLICIT: 2194 *icmp_dir = PF_IN; 2195 /* FALLTHROUGH */ 2196 case ND_NEIGHBOR_ADVERT: { 2197 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 2198 u_int32_t h; 2199 2200 *virtual_type = ND_NEIGHBOR_SOLICIT; 2201 /* generate fake id for these messages */ 2202 h = nd->nd_ns_target.s6_addr32[0] ^ 2203 nd->nd_ns_target.s6_addr32[1] ^ 2204 nd->nd_ns_target.s6_addr32[2] ^ 2205 nd->nd_ns_target.s6_addr32[3]; 2206 *virtual_id = (h >> 16) ^ (h & 0xffff); 2207 break; 2208 } 2209 2210 /* 2211 * These ICMP types map to other connections. 2212 * ND_REDIRECT can't be in this list because the triggering 2213 * packet header is optional. 2214 */ 2215 case ICMP6_DST_UNREACH: 2216 case ICMP6_PACKET_TOO_BIG: 2217 case ICMP6_TIME_EXCEEDED: 2218 case ICMP6_PARAM_PROB: 2219 /* These will not be used, but set them anyway */ 2220 *icmp_dir = PF_IN; 2221 *virtual_type = htons(type); 2222 *virtual_id = 0; 2223 return (1); /* These types match to another state */ 2224 /* 2225 * All remaining ICMP6 types get their own states, 2226 * and will only match in one direction. 2227 */ 2228 default: 2229 *icmp_dir = PF_IN; 2230 *virtual_type = type; 2231 *virtual_id = 0; 2232 break; 2233 } 2234 break; 2235 #endif /* INET6 */ 2236 } 2237 *virtual_type = htons(*virtual_type); 2238 return (0); /* These types match to their own state */ 2239 } 2240 2241 void 2242 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2243 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2244 { 2245 /* note: doesn't trouble to fixup quoted checksums, if any */ 2246 2247 /* change quoted protocol port */ 2248 if (qp != NULL) 2249 pf_patch_16(pd, qp, np); 2250 2251 /* change quoted ip address */ 2252 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2253 pf_addrcpy(qa, na, pd->af); 2254 2255 /* change network-header's ip address */ 2256 if (oa) 2257 pf_translate_a(pd, oa, na); 2258 } 2259 2260 /* pre: *a is 16-bit aligned within its packet */ 2261 /* *a is a network header src/dst address */ 2262 int 2263 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2264 { 2265 int rewrite = 0; 2266 2267 /* warning: !PF_ANEQ != PF_AEQ */ 2268 if (!PF_ANEQ(a, an, pd->af)) 2269 return (0); 2270 2271 /* fixup transport pseudo-header, if any */ 2272 switch (pd->proto) { 2273 case IPPROTO_TCP: /* FALLTHROUGH */ 2274 case IPPROTO_UDP: /* FALLTHROUGH */ 2275 case IPPROTO_ICMPV6: 2276 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2277 break; 2278 default: 2279 break; /* assume no pseudo-header */ 2280 } 2281 2282 pf_addrcpy(a, an, pd->af); 2283 rewrite = 1; 2284 2285 return (rewrite); 2286 } 2287 2288 #if INET6 2289 /* pf_translate_af() may change pd->m, adjust local copies after calling */ 2290 int 2291 pf_translate_af(struct pf_pdesc *pd) 2292 { 2293 static const struct pf_addr zero; 2294 struct ip *ip4; 2295 struct ip6_hdr *ip6; 2296 int copyback = 0; 2297 u_int hlen, ohlen, dlen; 2298 u_int16_t *pc; 2299 u_int8_t af_proto, naf_proto; 2300 2301 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2302 ohlen = pd->off; 2303 dlen = pd->tot_len - pd->off; 2304 pc = pd->pcksum; 2305 2306 af_proto = naf_proto = pd->proto; 2307 if (naf_proto == IPPROTO_ICMP) 2308 af_proto = IPPROTO_ICMPV6; 2309 if (naf_proto == IPPROTO_ICMPV6) 2310 af_proto = IPPROTO_ICMP; 2311 2312 /* uncover stale pseudo-header */ 2313 switch (af_proto) { 2314 case IPPROTO_ICMPV6: 2315 /* optimise: unchanged for TCP/UDP */ 2316 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2317 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2318 /* FALLTHROUGH */ 2319 case IPPROTO_UDP: /* FALLTHROUGH */ 2320 case IPPROTO_TCP: 2321 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2322 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2323 copyback = 1; 2324 break; 2325 default: 2326 break; /* assume no pseudo-header */ 2327 } 2328 2329 /* replace the network header */ 2330 m_adj(pd->m, pd->off); 2331 pd->src = NULL; 2332 pd->dst = NULL; 2333 2334 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2335 pd->m = NULL; 2336 return (-1); 2337 } 2338 2339 pd->off = hlen; 2340 pd->tot_len += hlen - ohlen; 2341 2342 switch (pd->naf) { 2343 case AF_INET: 2344 ip4 = mtod(pd->m, struct ip *); 2345 memset(ip4, 0, hlen); 2346 ip4->ip_v = IPVERSION; 2347 ip4->ip_hl = hlen >> 2; 2348 ip4->ip_tos = pd->tos; 2349 ip4->ip_len = htons(hlen + dlen); 2350 ip4->ip_id = htons(ip_randomid()); 2351 ip4->ip_off = htons(IP_DF); 2352 ip4->ip_ttl = pd->ttl; 2353 ip4->ip_p = pd->proto; 2354 ip4->ip_src = pd->nsaddr.v4; 2355 ip4->ip_dst = pd->ndaddr.v4; 2356 break; 2357 case AF_INET6: 2358 ip6 = mtod(pd->m, struct ip6_hdr *); 2359 memset(ip6, 0, hlen); 2360 ip6->ip6_vfc = IPV6_VERSION; 2361 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2362 ip6->ip6_plen = htons(dlen); 2363 ip6->ip6_nxt = pd->proto; 2364 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2365 ip6->ip6_hlim = IPV6_DEFHLIM; 2366 else 2367 ip6->ip6_hlim = pd->ttl; 2368 ip6->ip6_src = pd->nsaddr.v6; 2369 ip6->ip6_dst = pd->ndaddr.v6; 2370 break; 2371 default: 2372 unhandled_af(pd->naf); 2373 } 2374 2375 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2376 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2377 pd->naf == AF_INET6) { 2378 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2379 } 2380 2381 /* cover fresh pseudo-header */ 2382 switch (naf_proto) { 2383 case IPPROTO_ICMPV6: 2384 /* optimise: unchanged for TCP/UDP */ 2385 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2386 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2387 /* FALLTHROUGH */ 2388 case IPPROTO_UDP: /* FALLTHROUGH */ 2389 case IPPROTO_TCP: 2390 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2391 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2392 copyback = 1; 2393 break; 2394 default: 2395 break; /* assume no pseudo-header */ 2396 } 2397 2398 /* flush pd->pcksum */ 2399 if (copyback) 2400 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 2401 2402 return (0); 2403 } 2404 2405 int 2406 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2407 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2408 sa_family_t af, sa_family_t naf) 2409 { 2410 struct mbuf *n = NULL; 2411 struct ip *ip4; 2412 struct ip6_hdr *ip6; 2413 u_int hlen, ohlen, dlen; 2414 int d; 2415 2416 if (af == naf || (af != AF_INET && af != AF_INET6) || 2417 (naf != AF_INET && naf != AF_INET6)) 2418 return (-1); 2419 2420 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2421 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2422 return (-1); 2423 2424 /* new quoted header */ 2425 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2426 /* old quoted header */ 2427 ohlen = pd2->off - ipoff2; 2428 2429 /* trim old quoted header */ 2430 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2431 m_adj(n, ohlen); 2432 2433 /* prepend a new, translated, quoted header */ 2434 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2435 return (-1); 2436 2437 switch (naf) { 2438 case AF_INET: 2439 ip4 = mtod(n, struct ip *); 2440 memset(ip4, 0, sizeof(*ip4)); 2441 ip4->ip_v = IPVERSION; 2442 ip4->ip_hl = sizeof(*ip4) >> 2; 2443 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2444 ip4->ip_id = htons(ip_randomid()); 2445 ip4->ip_off = htons(IP_DF); 2446 ip4->ip_ttl = pd2->ttl; 2447 if (pd2->proto == IPPROTO_ICMPV6) 2448 ip4->ip_p = IPPROTO_ICMP; 2449 else 2450 ip4->ip_p = pd2->proto; 2451 ip4->ip_src = src->v4; 2452 ip4->ip_dst = dst->v4; 2453 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2454 break; 2455 case AF_INET6: 2456 ip6 = mtod(n, struct ip6_hdr *); 2457 memset(ip6, 0, sizeof(*ip6)); 2458 ip6->ip6_vfc = IPV6_VERSION; 2459 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2460 if (pd2->proto == IPPROTO_ICMP) 2461 ip6->ip6_nxt = IPPROTO_ICMPV6; 2462 else 2463 ip6->ip6_nxt = pd2->proto; 2464 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2465 ip6->ip6_hlim = IPV6_DEFHLIM; 2466 else 2467 ip6->ip6_hlim = pd2->ttl; 2468 ip6->ip6_src = src->v6; 2469 ip6->ip6_dst = dst->v6; 2470 break; 2471 } 2472 2473 /* cover new quoted header */ 2474 /* optimise: any new AF_INET header of ours sums to zero */ 2475 if (naf != AF_INET) { 2476 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2477 } 2478 2479 /* reattach modified quoted packet to outer header */ 2480 { 2481 int nlen = n->m_pkthdr.len; 2482 m_cat(m, n); 2483 m->m_pkthdr.len += nlen; 2484 } 2485 2486 /* account for altered length */ 2487 d = hlen - ohlen; 2488 2489 if (pd->proto == IPPROTO_ICMPV6) { 2490 /* fixup pseudo-header */ 2491 dlen = pd->tot_len - pd->off; 2492 pf_cksum_fixup(pd->pcksum, 2493 htons(dlen), htons(dlen + d), pd->proto); 2494 } 2495 2496 pd->tot_len += d; 2497 pd2->tot_len += d; 2498 pd2->off += d; 2499 2500 /* note: not bothering to update network headers as 2501 these due for rewrite by pf_translate_af() */ 2502 2503 return (0); 2504 } 2505 2506 2507 #define PTR_IP(field) (offsetof(struct ip, field)) 2508 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2509 2510 int 2511 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 2512 { 2513 struct icmp *icmp4; 2514 struct icmp6_hdr *icmp6; 2515 u_int32_t mtu; 2516 int32_t ptr = -1; 2517 u_int8_t type; 2518 u_int8_t code; 2519 2520 switch (af) { 2521 case AF_INET: 2522 icmp6 = arg; 2523 type = icmp6->icmp6_type; 2524 code = icmp6->icmp6_code; 2525 mtu = ntohl(icmp6->icmp6_mtu); 2526 2527 switch (type) { 2528 case ICMP6_ECHO_REQUEST: 2529 type = ICMP_ECHO; 2530 break; 2531 case ICMP6_ECHO_REPLY: 2532 type = ICMP_ECHOREPLY; 2533 break; 2534 case ICMP6_DST_UNREACH: 2535 type = ICMP_UNREACH; 2536 switch (code) { 2537 case ICMP6_DST_UNREACH_NOROUTE: 2538 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2539 case ICMP6_DST_UNREACH_ADDR: 2540 code = ICMP_UNREACH_HOST; 2541 break; 2542 case ICMP6_DST_UNREACH_ADMIN: 2543 code = ICMP_UNREACH_HOST_PROHIB; 2544 break; 2545 case ICMP6_DST_UNREACH_NOPORT: 2546 code = ICMP_UNREACH_PORT; 2547 break; 2548 default: 2549 return (-1); 2550 } 2551 break; 2552 case ICMP6_PACKET_TOO_BIG: 2553 type = ICMP_UNREACH; 2554 code = ICMP_UNREACH_NEEDFRAG; 2555 mtu -= 20; 2556 break; 2557 case ICMP6_TIME_EXCEEDED: 2558 type = ICMP_TIMXCEED; 2559 break; 2560 case ICMP6_PARAM_PROB: 2561 switch (code) { 2562 case ICMP6_PARAMPROB_HEADER: 2563 type = ICMP_PARAMPROB; 2564 code = ICMP_PARAMPROB_ERRATPTR; 2565 ptr = ntohl(icmp6->icmp6_pptr); 2566 2567 if (ptr == PTR_IP6(ip6_vfc)) 2568 ; /* preserve */ 2569 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2570 ptr = PTR_IP(ip_tos); 2571 else if (ptr == PTR_IP6(ip6_plen) || 2572 ptr == PTR_IP6(ip6_plen) + 1) 2573 ptr = PTR_IP(ip_len); 2574 else if (ptr == PTR_IP6(ip6_nxt)) 2575 ptr = PTR_IP(ip_p); 2576 else if (ptr == PTR_IP6(ip6_hlim)) 2577 ptr = PTR_IP(ip_ttl); 2578 else if (ptr >= PTR_IP6(ip6_src) && 2579 ptr < PTR_IP6(ip6_dst)) 2580 ptr = PTR_IP(ip_src); 2581 else if (ptr >= PTR_IP6(ip6_dst) && 2582 ptr < sizeof(struct ip6_hdr)) 2583 ptr = PTR_IP(ip_dst); 2584 else { 2585 return (-1); 2586 } 2587 break; 2588 case ICMP6_PARAMPROB_NEXTHEADER: 2589 type = ICMP_UNREACH; 2590 code = ICMP_UNREACH_PROTOCOL; 2591 break; 2592 default: 2593 return (-1); 2594 } 2595 break; 2596 default: 2597 return (-1); 2598 } 2599 2600 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 2601 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 2602 2603 /* aligns well with a icmpv4 nextmtu */ 2604 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 2605 2606 /* icmpv4 pptr is a one most significant byte */ 2607 if (ptr >= 0) 2608 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 2609 break; 2610 case AF_INET6: 2611 icmp4 = arg; 2612 type = icmp4->icmp_type; 2613 code = icmp4->icmp_code; 2614 mtu = ntohs(icmp4->icmp_nextmtu); 2615 2616 switch (type) { 2617 case ICMP_ECHO: 2618 type = ICMP6_ECHO_REQUEST; 2619 break; 2620 case ICMP_ECHOREPLY: 2621 type = ICMP6_ECHO_REPLY; 2622 break; 2623 case ICMP_UNREACH: 2624 type = ICMP6_DST_UNREACH; 2625 switch (code) { 2626 case ICMP_UNREACH_NET: 2627 case ICMP_UNREACH_HOST: 2628 case ICMP_UNREACH_NET_UNKNOWN: 2629 case ICMP_UNREACH_HOST_UNKNOWN: 2630 case ICMP_UNREACH_ISOLATED: 2631 case ICMP_UNREACH_TOSNET: 2632 case ICMP_UNREACH_TOSHOST: 2633 code = ICMP6_DST_UNREACH_NOROUTE; 2634 break; 2635 case ICMP_UNREACH_PORT: 2636 code = ICMP6_DST_UNREACH_NOPORT; 2637 break; 2638 case ICMP_UNREACH_NET_PROHIB: 2639 case ICMP_UNREACH_HOST_PROHIB: 2640 case ICMP_UNREACH_FILTER_PROHIB: 2641 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2642 code = ICMP6_DST_UNREACH_ADMIN; 2643 break; 2644 case ICMP_UNREACH_PROTOCOL: 2645 type = ICMP6_PARAM_PROB; 2646 code = ICMP6_PARAMPROB_NEXTHEADER; 2647 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2648 break; 2649 case ICMP_UNREACH_NEEDFRAG: 2650 type = ICMP6_PACKET_TOO_BIG; 2651 code = 0; 2652 mtu += 20; 2653 break; 2654 default: 2655 return (-1); 2656 } 2657 break; 2658 case ICMP_TIMXCEED: 2659 type = ICMP6_TIME_EXCEEDED; 2660 break; 2661 case ICMP_PARAMPROB: 2662 type = ICMP6_PARAM_PROB; 2663 switch (code) { 2664 case ICMP_PARAMPROB_ERRATPTR: 2665 code = ICMP6_PARAMPROB_HEADER; 2666 break; 2667 case ICMP_PARAMPROB_LENGTH: 2668 code = ICMP6_PARAMPROB_HEADER; 2669 break; 2670 default: 2671 return (-1); 2672 } 2673 2674 ptr = icmp4->icmp_pptr; 2675 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2676 ; /* preserve */ 2677 else if (ptr == PTR_IP(ip_len) || 2678 ptr == PTR_IP(ip_len) + 1) 2679 ptr = PTR_IP6(ip6_plen); 2680 else if (ptr == PTR_IP(ip_ttl)) 2681 ptr = PTR_IP6(ip6_hlim); 2682 else if (ptr == PTR_IP(ip_p)) 2683 ptr = PTR_IP6(ip6_nxt); 2684 else if (ptr >= PTR_IP(ip_src) && 2685 ptr < PTR_IP(ip_dst)) 2686 ptr = PTR_IP6(ip6_src); 2687 else if (ptr >= PTR_IP(ip_dst) && 2688 ptr < sizeof(struct ip)) 2689 ptr = PTR_IP6(ip6_dst); 2690 else { 2691 return (-1); 2692 } 2693 break; 2694 default: 2695 return (-1); 2696 } 2697 2698 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 2699 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 2700 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 2701 if (ptr >= 0) 2702 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 2703 break; 2704 } 2705 2706 return (0); 2707 } 2708 #endif /* INET6 */ 2709 2710 /* 2711 * Need to modulate the sequence numbers in the TCP SACK option 2712 * (credits to Krzysztof Pfaff for report and patch) 2713 */ 2714 int 2715 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2716 { 2717 struct sackblk sack; 2718 int copyback = 0, i; 2719 int olen, optsoff; 2720 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 2721 2722 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 2723 optsoff = pd->off + sizeof(struct tcphdr); 2724 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 2725 if (olen < TCPOLEN_MINSACK || 2726 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) 2727 return (0); 2728 2729 eoh = opts + olen; 2730 opt = opts; 2731 while ((opt = pf_find_tcpopt(opt, opts, olen, 2732 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 2733 { 2734 size_t safelen = MIN(opt[1], (eoh - opt)); 2735 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 2736 size_t startoff = (opt + i) - opts; 2737 memcpy(&sack, &opt[i], sizeof(sack)); 2738 pf_patch_32_unaligned(pd, &sack.start, 2739 htonl(ntohl(sack.start) - dst->seqdiff), 2740 PF_ALGNMNT(startoff)); 2741 pf_patch_32_unaligned(pd, &sack.end, 2742 htonl(ntohl(sack.end) - dst->seqdiff), 2743 PF_ALGNMNT(startoff + sizeof(sack.start))); 2744 memcpy(&opt[i], &sack, sizeof(sack)); 2745 } 2746 copyback = 1; 2747 opt += opt[1]; 2748 } 2749 2750 if (copyback) 2751 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 2752 return (copyback); 2753 } 2754 2755 struct mbuf * 2756 pf_build_tcp(const struct pf_rule *r, sa_family_t af, 2757 const struct pf_addr *saddr, const struct pf_addr *daddr, 2758 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2759 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2760 u_int16_t rtag, u_int sack, u_int rdom) 2761 { 2762 struct mbuf *m; 2763 int len, tlen; 2764 struct ip *h; 2765 #ifdef INET6 2766 struct ip6_hdr *h6; 2767 #endif /* INET6 */ 2768 struct tcphdr *th; 2769 char *opt; 2770 2771 /* maximum segment size tcp option */ 2772 tlen = sizeof(struct tcphdr); 2773 if (mss) 2774 tlen += 4; 2775 if (sack) 2776 tlen += 2; 2777 2778 switch (af) { 2779 case AF_INET: 2780 len = sizeof(struct ip) + tlen; 2781 break; 2782 #ifdef INET6 2783 case AF_INET6: 2784 len = sizeof(struct ip6_hdr) + tlen; 2785 break; 2786 #endif /* INET6 */ 2787 default: 2788 unhandled_af(af); 2789 } 2790 2791 /* create outgoing mbuf */ 2792 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2793 if (m == NULL) 2794 return (NULL); 2795 if (tag) 2796 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2797 m->m_pkthdr.pf.tag = rtag; 2798 m->m_pkthdr.ph_rtableid = rdom; 2799 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2800 m->m_pkthdr.pf.prio = r->set_prio[0]; 2801 if (r && r->qid) 2802 m->m_pkthdr.pf.qid = r->qid; 2803 m->m_data += max_linkhdr; 2804 m->m_pkthdr.len = m->m_len = len; 2805 m->m_pkthdr.ph_ifidx = 0; 2806 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 2807 memset(m->m_data, 0, len); 2808 switch (af) { 2809 case AF_INET: 2810 h = mtod(m, struct ip *); 2811 h->ip_p = IPPROTO_TCP; 2812 h->ip_len = htons(tlen); 2813 h->ip_v = 4; 2814 h->ip_hl = sizeof(*h) >> 2; 2815 h->ip_tos = IPTOS_LOWDELAY; 2816 h->ip_len = htons(len); 2817 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2818 h->ip_ttl = ttl ? ttl : ip_defttl; 2819 h->ip_sum = 0; 2820 h->ip_src.s_addr = saddr->v4.s_addr; 2821 h->ip_dst.s_addr = daddr->v4.s_addr; 2822 2823 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2824 break; 2825 #ifdef INET6 2826 case AF_INET6: 2827 h6 = mtod(m, struct ip6_hdr *); 2828 h6->ip6_nxt = IPPROTO_TCP; 2829 h6->ip6_plen = htons(tlen); 2830 h6->ip6_vfc |= IPV6_VERSION; 2831 h6->ip6_hlim = IPV6_DEFHLIM; 2832 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2833 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2834 2835 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2836 break; 2837 #endif /* INET6 */ 2838 default: 2839 unhandled_af(af); 2840 } 2841 2842 /* TCP header */ 2843 th->th_sport = sport; 2844 th->th_dport = dport; 2845 th->th_seq = htonl(seq); 2846 th->th_ack = htonl(ack); 2847 th->th_off = tlen >> 2; 2848 th->th_flags = flags; 2849 th->th_win = htons(win); 2850 2851 opt = (char *)(th + 1); 2852 if (mss) { 2853 opt[0] = TCPOPT_MAXSEG; 2854 opt[1] = 4; 2855 mss = htons(mss); 2856 memcpy((opt + 2), &mss, 2); 2857 opt += 4; 2858 } 2859 if (sack) { 2860 opt[0] = TCPOPT_SACK_PERMITTED; 2861 opt[1] = 2; 2862 opt += 2; 2863 } 2864 2865 return (m); 2866 } 2867 2868 void 2869 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2870 const struct pf_addr *saddr, const struct pf_addr *daddr, 2871 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2872 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2873 u_int16_t rtag, u_int rdom) 2874 { 2875 struct mbuf *m; 2876 2877 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 2878 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL) 2879 return; 2880 2881 switch (af) { 2882 case AF_INET: 2883 ip_send(m); 2884 break; 2885 #ifdef INET6 2886 case AF_INET6: 2887 ip6_send(m); 2888 break; 2889 #endif /* INET6 */ 2890 } 2891 } 2892 2893 static void 2894 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *s, 2895 struct pf_state_peer *src, struct pf_state_peer *dst) 2896 { 2897 /* 2898 * We are sending challenge ACK as a response to SYN packet, which 2899 * matches existing state (modulo TCP window check). Therefore packet 2900 * must be sent on behalf of destination. 2901 * 2902 * We expect sender to remain either silent, or send RST packet 2903 * so both, firewall and remote peer, can purge dead state from 2904 * memory. 2905 */ 2906 pf_send_tcp(s->rule.ptr, pd->af, pd->dst, pd->src, 2907 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 2908 src->seqlo, TH_ACK, 0, 0, s->rule.ptr->return_ttl, 1, 0, 2909 pd->rdomain); 2910 } 2911 2912 void 2913 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 2914 sa_family_t af, struct pf_rule *r, u_int rdomain) 2915 { 2916 struct mbuf *m0; 2917 2918 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 2919 return; 2920 2921 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2922 m0->m_pkthdr.ph_rtableid = rdomain; 2923 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2924 m0->m_pkthdr.pf.prio = r->set_prio[0]; 2925 if (r && r->qid) 2926 m0->m_pkthdr.pf.qid = r->qid; 2927 2928 switch (af) { 2929 case AF_INET: 2930 icmp_error(m0, type, code, 0, param); 2931 break; 2932 #ifdef INET6 2933 case AF_INET6: 2934 icmp6_error(m0, type, code, param); 2935 break; 2936 #endif /* INET6 */ 2937 } 2938 } 2939 2940 /* 2941 * Return ((n = 0) == (a = b [with mask m])) 2942 * Note: n != 0 => returns (a != b [with mask m]) 2943 */ 2944 int 2945 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2946 struct pf_addr *b, sa_family_t af) 2947 { 2948 switch (af) { 2949 case AF_INET: 2950 if ((a->addr32[0] & m->addr32[0]) == 2951 (b->addr32[0] & m->addr32[0])) 2952 return (n == 0); 2953 break; 2954 #ifdef INET6 2955 case AF_INET6: 2956 if (((a->addr32[0] & m->addr32[0]) == 2957 (b->addr32[0] & m->addr32[0])) && 2958 ((a->addr32[1] & m->addr32[1]) == 2959 (b->addr32[1] & m->addr32[1])) && 2960 ((a->addr32[2] & m->addr32[2]) == 2961 (b->addr32[2] & m->addr32[2])) && 2962 ((a->addr32[3] & m->addr32[3]) == 2963 (b->addr32[3] & m->addr32[3]))) 2964 return (n == 0); 2965 break; 2966 #endif /* INET6 */ 2967 } 2968 2969 return (n != 0); 2970 } 2971 2972 /* 2973 * Return 1 if b <= a <= e, otherwise return 0. 2974 */ 2975 int 2976 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2977 struct pf_addr *a, sa_family_t af) 2978 { 2979 switch (af) { 2980 case AF_INET: 2981 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 2982 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 2983 return (0); 2984 break; 2985 #ifdef INET6 2986 case AF_INET6: { 2987 int i; 2988 2989 /* check a >= b */ 2990 for (i = 0; i < 4; ++i) 2991 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 2992 break; 2993 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 2994 return (0); 2995 /* check a <= e */ 2996 for (i = 0; i < 4; ++i) 2997 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 2998 break; 2999 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3000 return (0); 3001 break; 3002 } 3003 #endif /* INET6 */ 3004 } 3005 return (1); 3006 } 3007 3008 int 3009 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3010 { 3011 switch (op) { 3012 case PF_OP_IRG: 3013 return ((p > a1) && (p < a2)); 3014 case PF_OP_XRG: 3015 return ((p < a1) || (p > a2)); 3016 case PF_OP_RRG: 3017 return ((p >= a1) && (p <= a2)); 3018 case PF_OP_EQ: 3019 return (p == a1); 3020 case PF_OP_NE: 3021 return (p != a1); 3022 case PF_OP_LT: 3023 return (p < a1); 3024 case PF_OP_LE: 3025 return (p <= a1); 3026 case PF_OP_GT: 3027 return (p > a1); 3028 case PF_OP_GE: 3029 return (p >= a1); 3030 } 3031 return (0); /* never reached */ 3032 } 3033 3034 int 3035 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3036 { 3037 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3038 } 3039 3040 int 3041 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3042 { 3043 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 3044 return (0); 3045 return (pf_match(op, a1, a2, u)); 3046 } 3047 3048 int 3049 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3050 { 3051 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 3052 return (0); 3053 return (pf_match(op, a1, a2, g)); 3054 } 3055 3056 int 3057 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3058 { 3059 if (*tag == -1) 3060 *tag = m->m_pkthdr.pf.tag; 3061 3062 return ((!r->match_tag_not && r->match_tag == *tag) || 3063 (r->match_tag_not && r->match_tag != *tag)); 3064 } 3065 3066 int 3067 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3068 { 3069 struct ifnet *ifp; 3070 struct pfi_kif *kif; 3071 3072 ifp = if_get(m->m_pkthdr.ph_ifidx); 3073 if (ifp == NULL) 3074 return (0); 3075 3076 #if NCARP > 0 3077 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 3078 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 3079 else 3080 #endif /* NCARP */ 3081 kif = (struct pfi_kif *)ifp->if_pf_kif; 3082 3083 if_put(ifp); 3084 3085 if (kif == NULL) { 3086 DPFPRINTF(LOG_ERR, 3087 "%s: kif == NULL, @%d via %s", __func__, 3088 r->nr, r->rcv_ifname); 3089 return (0); 3090 } 3091 3092 return (pfi_kif_match(r->rcv_kif, kif)); 3093 } 3094 3095 void 3096 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3097 { 3098 if (tag > 0) 3099 m->m_pkthdr.pf.tag = tag; 3100 if (rtableid >= 0) 3101 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3102 } 3103 3104 enum pf_test_status 3105 pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) 3106 { 3107 int rv; 3108 3109 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 3110 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 3111 return (PF_TEST_FAIL); 3112 } 3113 3114 ctx->depth++; 3115 3116 if (r->anchor_wildcard) { 3117 struct pf_anchor *child; 3118 rv = PF_TEST_OK; 3119 RB_FOREACH(child, pf_anchor_node, &r->anchor->children) { 3120 rv = pf_match_rule(ctx, &child->ruleset); 3121 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 3122 /* 3123 * we either hit a rule with quick action 3124 * (more likely), or hit some runtime 3125 * error (e.g. pool_get() failure). 3126 */ 3127 break; 3128 } 3129 } 3130 } else { 3131 rv = pf_match_rule(ctx, &r->anchor->ruleset); 3132 /* 3133 * Unless errors occured, stop iff any rule matched 3134 * within quick anchors. 3135 */ 3136 if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && 3137 *ctx->am == r) 3138 rv = PF_TEST_QUICK; 3139 } 3140 3141 ctx->depth--; 3142 3143 return (rv); 3144 } 3145 3146 void 3147 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3148 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3149 { 3150 switch (af) { 3151 case AF_INET: 3152 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3153 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3154 break; 3155 #ifdef INET6 3156 case AF_INET6: 3157 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3158 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3159 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3160 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3161 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3162 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3163 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3164 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3165 break; 3166 #endif /* INET6 */ 3167 default: 3168 unhandled_af(af); 3169 } 3170 } 3171 3172 void 3173 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3174 { 3175 switch (af) { 3176 case AF_INET: 3177 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3178 break; 3179 #ifdef INET6 3180 case AF_INET6: 3181 if (addr->addr32[3] == 0xffffffff) { 3182 addr->addr32[3] = 0; 3183 if (addr->addr32[2] == 0xffffffff) { 3184 addr->addr32[2] = 0; 3185 if (addr->addr32[1] == 0xffffffff) { 3186 addr->addr32[1] = 0; 3187 addr->addr32[0] = 3188 htonl(ntohl(addr->addr32[0]) + 1); 3189 } else 3190 addr->addr32[1] = 3191 htonl(ntohl(addr->addr32[1]) + 1); 3192 } else 3193 addr->addr32[2] = 3194 htonl(ntohl(addr->addr32[2]) + 1); 3195 } else 3196 addr->addr32[3] = 3197 htonl(ntohl(addr->addr32[3]) + 1); 3198 break; 3199 #endif /* INET6 */ 3200 default: 3201 unhandled_af(af); 3202 } 3203 } 3204 3205 int 3206 pf_socket_lookup(struct pf_pdesc *pd) 3207 { 3208 struct pf_addr *saddr, *daddr; 3209 u_int16_t sport, dport; 3210 struct inpcbtable *tb; 3211 struct inpcb *inp; 3212 3213 pd->lookup.uid = UID_MAX; 3214 pd->lookup.gid = GID_MAX; 3215 pd->lookup.pid = NO_PID; 3216 switch (pd->virtual_proto) { 3217 case IPPROTO_TCP: 3218 sport = pd->hdr.tcp.th_sport; 3219 dport = pd->hdr.tcp.th_dport; 3220 PF_ASSERT_LOCKED(); 3221 NET_ASSERT_LOCKED(); 3222 tb = &tcbtable; 3223 break; 3224 case IPPROTO_UDP: 3225 sport = pd->hdr.udp.uh_sport; 3226 dport = pd->hdr.udp.uh_dport; 3227 PF_ASSERT_LOCKED(); 3228 NET_ASSERT_LOCKED(); 3229 tb = &udbtable; 3230 break; 3231 default: 3232 return (-1); 3233 } 3234 if (pd->dir == PF_IN) { 3235 saddr = pd->src; 3236 daddr = pd->dst; 3237 } else { 3238 u_int16_t p; 3239 3240 p = sport; 3241 sport = dport; 3242 dport = p; 3243 saddr = pd->dst; 3244 daddr = pd->src; 3245 } 3246 switch (pd->af) { 3247 case AF_INET: 3248 /* 3249 * Fails when rtable is changed while evaluating the ruleset 3250 * The socket looked up will not match the one hit in the end. 3251 */ 3252 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 3253 pd->rdomain); 3254 if (inp == NULL) { 3255 inp = in_pcblookup_listen(tb, daddr->v4, dport, 3256 NULL, pd->rdomain); 3257 if (inp == NULL) 3258 return (-1); 3259 } 3260 break; 3261 #ifdef INET6 3262 case AF_INET6: 3263 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 3264 dport, pd->rdomain); 3265 if (inp == NULL) { 3266 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 3267 NULL, pd->rdomain); 3268 if (inp == NULL) 3269 return (-1); 3270 } 3271 break; 3272 #endif /* INET6 */ 3273 default: 3274 unhandled_af(pd->af); 3275 } 3276 pd->lookup.uid = inp->inp_socket->so_euid; 3277 pd->lookup.gid = inp->inp_socket->so_egid; 3278 pd->lookup.pid = inp->inp_socket->so_cpid; 3279 return (1); 3280 } 3281 3282 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 3283 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 3284 * 3285 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 3286 */ 3287 u_int8_t* 3288 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 3289 u_int8_t min_typelen) 3290 { 3291 u_int8_t *eoh = opts + hlen; 3292 3293 if (min_typelen < 2) 3294 return (NULL); 3295 3296 while ((eoh - opt) >= min_typelen) { 3297 switch (*opt) { 3298 case TCPOPT_EOL: 3299 /* FALLTHROUGH - Workaround the failure of some 3300 systems to NOP-pad their bzero'd option buffers, 3301 producing spurious EOLs */ 3302 case TCPOPT_NOP: 3303 opt++; 3304 continue; 3305 default: 3306 if (opt[0] == type && 3307 opt[1] >= min_typelen) 3308 return (opt); 3309 } 3310 3311 opt += MAX(opt[1], 2); /* evade infinite loops */ 3312 } 3313 3314 return (NULL); 3315 } 3316 3317 u_int8_t 3318 pf_get_wscale(struct pf_pdesc *pd) 3319 { 3320 int olen; 3321 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3322 u_int8_t wscale = 0; 3323 3324 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3325 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 3326 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3327 return (0); 3328 3329 opt = opts; 3330 while ((opt = pf_find_tcpopt(opt, opts, olen, 3331 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 3332 wscale = opt[2]; 3333 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 3334 wscale |= PF_WSCALE_FLAG; 3335 3336 opt += opt[1]; 3337 } 3338 3339 return (wscale); 3340 } 3341 3342 u_int16_t 3343 pf_get_mss(struct pf_pdesc *pd) 3344 { 3345 int olen; 3346 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3347 u_int16_t mss = tcp_mssdflt; 3348 3349 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3350 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 3351 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3352 return (0); 3353 3354 opt = opts; 3355 while ((opt = pf_find_tcpopt(opt, opts, olen, 3356 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 3357 memcpy(&mss, (opt + 2), 2); 3358 mss = ntohs(mss); 3359 3360 opt += opt[1]; 3361 } 3362 return (mss); 3363 } 3364 3365 u_int16_t 3366 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3367 { 3368 struct ifnet *ifp; 3369 struct sockaddr_in *dst; 3370 #ifdef INET6 3371 struct sockaddr_in6 *dst6; 3372 #endif /* INET6 */ 3373 struct rtentry *rt = NULL; 3374 struct sockaddr_storage ss; 3375 int hlen; 3376 u_int16_t mss = tcp_mssdflt; 3377 3378 memset(&ss, 0, sizeof(ss)); 3379 3380 switch (af) { 3381 case AF_INET: 3382 hlen = sizeof(struct ip); 3383 dst = (struct sockaddr_in *)&ss; 3384 dst->sin_family = AF_INET; 3385 dst->sin_len = sizeof(*dst); 3386 dst->sin_addr = addr->v4; 3387 rt = rtalloc(sintosa(dst), 0, rtableid); 3388 break; 3389 #ifdef INET6 3390 case AF_INET6: 3391 hlen = sizeof(struct ip6_hdr); 3392 dst6 = (struct sockaddr_in6 *)&ss; 3393 dst6->sin6_family = AF_INET6; 3394 dst6->sin6_len = sizeof(*dst6); 3395 dst6->sin6_addr = addr->v6; 3396 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3397 break; 3398 #endif /* INET6 */ 3399 } 3400 3401 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3402 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3403 mss = max(tcp_mssdflt, mss); 3404 if_put(ifp); 3405 } 3406 rtfree(rt); 3407 mss = min(mss, offer); 3408 mss = max(mss, 64); /* sanity - at least max opt space */ 3409 return (mss); 3410 } 3411 3412 static __inline int 3413 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af) 3414 { 3415 struct pf_rule *r = s->rule.ptr; 3416 struct pf_src_node *sns[PF_SN_MAX]; 3417 int rv; 3418 3419 s->rt_kif = NULL; 3420 if (!r->rt) 3421 return (0); 3422 3423 memset(sns, 0, sizeof(sns)); 3424 switch (af) { 3425 case AF_INET: 3426 rv = pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, sns, 3427 &r->route, PF_SN_ROUTE); 3428 break; 3429 #ifdef INET6 3430 case AF_INET6: 3431 rv = pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, sns, 3432 &r->route, PF_SN_ROUTE); 3433 break; 3434 #endif /* INET6 */ 3435 default: 3436 rv = 1; 3437 } 3438 3439 if (rv == 0) { 3440 s->rt_kif = r->route.kif; 3441 s->natrule.ptr = r; 3442 } 3443 3444 return (rv); 3445 } 3446 3447 u_int32_t 3448 pf_tcp_iss(struct pf_pdesc *pd) 3449 { 3450 SHA2_CTX ctx; 3451 union { 3452 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3453 uint32_t words[1]; 3454 } digest; 3455 3456 if (pf_tcp_secret_init == 0) { 3457 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3458 SHA512Init(&pf_tcp_secret_ctx); 3459 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3460 sizeof(pf_tcp_secret)); 3461 pf_tcp_secret_init = 1; 3462 } 3463 ctx = pf_tcp_secret_ctx; 3464 3465 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3466 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 3467 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 3468 switch (pd->af) { 3469 case AF_INET: 3470 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3471 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3472 break; 3473 #ifdef INET6 3474 case AF_INET6: 3475 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3476 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3477 break; 3478 #endif /* INET6 */ 3479 } 3480 SHA512Final(digest.bytes, &ctx); 3481 pf_tcp_iss_off += 4096; 3482 return (digest.words[0] + tcp_iss + pf_tcp_iss_off); 3483 } 3484 3485 void 3486 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3487 { 3488 if (r->qid) 3489 a->qid = r->qid; 3490 if (r->pqid) 3491 a->pqid = r->pqid; 3492 if (r->rtableid >= 0) 3493 a->rtableid = r->rtableid; 3494 #if NPFLOG > 0 3495 a->log |= r->log; 3496 #endif /* NPFLOG > 0 */ 3497 if (r->scrub_flags & PFSTATE_SETTOS) 3498 a->set_tos = r->set_tos; 3499 if (r->min_ttl) 3500 a->min_ttl = r->min_ttl; 3501 if (r->max_mss) 3502 a->max_mss = r->max_mss; 3503 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3504 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3505 if (r->scrub_flags & PFSTATE_SETPRIO) { 3506 a->set_prio[0] = r->set_prio[0]; 3507 a->set_prio[1] = r->set_prio[1]; 3508 } 3509 if (r->rule_flag & PFRULE_SETDELAY) 3510 a->delay = r->delay; 3511 } 3512 3513 #define PF_TEST_ATTRIB(t, a) \ 3514 if (t) { \ 3515 r = a; \ 3516 continue; \ 3517 } else do { \ 3518 } while (0) 3519 3520 enum pf_test_status 3521 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 3522 { 3523 struct pf_rule *r; 3524 struct pf_rule *save_a; 3525 struct pf_ruleset *save_aruleset; 3526 3527 r = TAILQ_FIRST(ruleset->rules.active.ptr); 3528 while (r != NULL) { 3529 r->evaluations++; 3530 PF_TEST_ATTRIB( 3531 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 3532 r->skip[PF_SKIP_IFP].ptr); 3533 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 3534 r->skip[PF_SKIP_DIR].ptr); 3535 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3536 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 3537 r->skip[PF_SKIP_RDOM].ptr); 3538 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 3539 r->skip[PF_SKIP_AF].ptr); 3540 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 3541 r->skip[PF_SKIP_PROTO].ptr); 3542 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 3543 ctx->pd->naf, r->src.neg, ctx->pd->kif, 3544 ctx->act.rtableid)), 3545 r->skip[PF_SKIP_SRC_ADDR].ptr); 3546 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 3547 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 3548 r->skip[PF_SKIP_DST_ADDR].ptr); 3549 3550 switch (ctx->pd->virtual_proto) { 3551 case PF_VPROTO_FRAGMENT: 3552 /* tcp/udp only. port_op always 0 in other cases */ 3553 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3554 TAILQ_NEXT(r, entries)); 3555 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 3556 r->flagset), 3557 TAILQ_NEXT(r, entries)); 3558 /* icmp only. type/code always 0 in other cases */ 3559 PF_TEST_ATTRIB((r->type || r->code), 3560 TAILQ_NEXT(r, entries)); 3561 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3562 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3563 TAILQ_NEXT(r, entries)); 3564 break; 3565 3566 case IPPROTO_TCP: 3567 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 3568 r->flags), 3569 TAILQ_NEXT(r, entries)); 3570 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3571 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 3572 r->os_fingerprint)), 3573 TAILQ_NEXT(r, entries)); 3574 /* FALLTHROUGH */ 3575 3576 case IPPROTO_UDP: 3577 /* tcp/udp only. port_op always 0 in other cases */ 3578 PF_TEST_ATTRIB((r->src.port_op && 3579 !pf_match_port(r->src.port_op, r->src.port[0], 3580 r->src.port[1], ctx->pd->nsport)), 3581 r->skip[PF_SKIP_SRC_PORT].ptr); 3582 PF_TEST_ATTRIB((r->dst.port_op && 3583 !pf_match_port(r->dst.port_op, r->dst.port[0], 3584 r->dst.port[1], ctx->pd->ndport)), 3585 r->skip[PF_SKIP_DST_PORT].ptr); 3586 /* tcp/udp only. uid.op always 0 in other cases */ 3587 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 3588 (ctx->pd->lookup.done = 3589 pf_socket_lookup(ctx->pd), 1)) && 3590 !pf_match_uid(r->uid.op, r->uid.uid[0], 3591 r->uid.uid[1], ctx->pd->lookup.uid)), 3592 TAILQ_NEXT(r, entries)); 3593 /* tcp/udp only. gid.op always 0 in other cases */ 3594 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 3595 (ctx->pd->lookup.done = 3596 pf_socket_lookup(ctx->pd), 1)) && 3597 !pf_match_gid(r->gid.op, r->gid.gid[0], 3598 r->gid.gid[1], ctx->pd->lookup.gid)), 3599 TAILQ_NEXT(r, entries)); 3600 break; 3601 3602 case IPPROTO_ICMP: 3603 case IPPROTO_ICMPV6: 3604 /* icmp only. type always 0 in other cases */ 3605 PF_TEST_ATTRIB((r->type && 3606 r->type != ctx->icmptype + 1), 3607 TAILQ_NEXT(r, entries)); 3608 /* icmp only. type always 0 in other cases */ 3609 PF_TEST_ATTRIB((r->code && 3610 r->code != ctx->icmpcode + 1), 3611 TAILQ_NEXT(r, entries)); 3612 /* icmp only. don't create states on replies */ 3613 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 3614 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 3615 ctx->icmp_dir != PF_IN), 3616 TAILQ_NEXT(r, entries)); 3617 break; 3618 3619 default: 3620 break; 3621 } 3622 3623 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3624 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 3625 TAILQ_NEXT(r, entries)); 3626 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 3627 TAILQ_NEXT(r, entries)); 3628 PF_TEST_ATTRIB((r->prob && 3629 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3630 TAILQ_NEXT(r, entries)); 3631 PF_TEST_ATTRIB((r->match_tag && 3632 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 3633 TAILQ_NEXT(r, entries)); 3634 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 3635 r->rcvifnot), 3636 TAILQ_NEXT(r, entries)); 3637 PF_TEST_ATTRIB((r->prio && 3638 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 3639 ctx->pd->m->m_pkthdr.pf.prio), 3640 TAILQ_NEXT(r, entries)); 3641 3642 /* must be last! */ 3643 if (r->pktrate.limit) { 3644 pf_add_threshold(&r->pktrate); 3645 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 3646 TAILQ_NEXT(r, entries)); 3647 } 3648 3649 /* FALLTHROUGH */ 3650 if (r->tag) 3651 ctx->tag = r->tag; 3652 if (r->anchor == NULL) { 3653 if (r->action == PF_MATCH) { 3654 if ((ctx->ri = pool_get(&pf_rule_item_pl, 3655 PR_NOWAIT)) == NULL) { 3656 REASON_SET(&ctx->reason, PFRES_MEMORY); 3657 ctx->test_status = PF_TEST_FAIL; 3658 break; 3659 } 3660 ctx->ri->r = r; 3661 /* order is irrelevant */ 3662 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 3663 ctx->ri = NULL; 3664 pf_rule_to_actions(r, &ctx->act); 3665 if (r->rule_flag & PFRULE_AFTO) 3666 ctx->pd->naf = r->naf; 3667 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 3668 &ctx->nr) == -1) { 3669 REASON_SET(&ctx->reason, 3670 PFRES_TRANSLATE); 3671 ctx->test_status = PF_TEST_FAIL; 3672 break; 3673 } 3674 #if NPFLOG > 0 3675 if (r->log) { 3676 REASON_SET(&ctx->reason, PFRES_MATCH); 3677 pflog_packet(ctx->pd, ctx->reason, r, 3678 ctx->a, ruleset, NULL); 3679 } 3680 #endif /* NPFLOG > 0 */ 3681 } else { 3682 /* 3683 * found matching r 3684 */ 3685 *ctx->rm = r; 3686 /* 3687 * anchor, with ruleset, where r belongs to 3688 */ 3689 *ctx->am = ctx->a; 3690 /* 3691 * ruleset where r belongs to 3692 */ 3693 *ctx->rsm = ruleset; 3694 /* 3695 * ruleset, where anchor belongs to. 3696 */ 3697 ctx->arsm = ctx->aruleset; 3698 } 3699 3700 #if NPFLOG > 0 3701 if (ctx->act.log & PF_LOG_MATCHES) 3702 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 3703 &ctx->rules); 3704 #endif /* NPFLOG > 0 */ 3705 3706 if (r->quick) { 3707 ctx->test_status = PF_TEST_QUICK; 3708 break; 3709 } 3710 } else { 3711 save_a = ctx->a; 3712 save_aruleset = ctx->aruleset; 3713 ctx->a = r; /* remember anchor */ 3714 ctx->aruleset = ruleset; /* and its ruleset */ 3715 /* 3716 * Note: we don't need to restore if we are not going 3717 * to continue with ruleset evaluation. 3718 */ 3719 if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) 3720 break; 3721 ctx->a = save_a; 3722 ctx->aruleset = save_aruleset; 3723 } 3724 r = TAILQ_NEXT(r, entries); 3725 } 3726 3727 return (ctx->test_status); 3728 } 3729 3730 int 3731 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3732 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason) 3733 { 3734 struct pf_rule *r = NULL; 3735 struct pf_rule *a = NULL; 3736 struct pf_ruleset *ruleset = NULL; 3737 struct pf_state_key *skw = NULL, *sks = NULL; 3738 int rewrite = 0; 3739 u_int16_t virtual_type, virtual_id; 3740 int action = PF_DROP; 3741 struct pf_test_ctx ctx; 3742 int rv; 3743 3744 memset(&ctx, 0, sizeof(ctx)); 3745 ctx.pd = pd; 3746 ctx.rm = rm; 3747 ctx.am = am; 3748 ctx.rsm = rsm; 3749 ctx.th = &pd->hdr.tcp; 3750 ctx.act.rtableid = pd->rdomain; 3751 ctx.tag = -1; 3752 SLIST_INIT(&ctx.rules); 3753 3754 if (pd->dir == PF_IN && if_congested()) { 3755 REASON_SET(&ctx.reason, PFRES_CONGEST); 3756 return (PF_DROP); 3757 } 3758 3759 switch (pd->virtual_proto) { 3760 case IPPROTO_ICMP: 3761 ctx.icmptype = pd->hdr.icmp.icmp_type; 3762 ctx.icmpcode = pd->hdr.icmp.icmp_code; 3763 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3764 &ctx.icmp_dir, &virtual_id, &virtual_type); 3765 if (ctx.icmp_dir == PF_IN) { 3766 pd->osport = pd->nsport = virtual_id; 3767 pd->odport = pd->ndport = virtual_type; 3768 } else { 3769 pd->osport = pd->nsport = virtual_type; 3770 pd->odport = pd->ndport = virtual_id; 3771 } 3772 break; 3773 #ifdef INET6 3774 case IPPROTO_ICMPV6: 3775 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 3776 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 3777 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3778 &ctx.icmp_dir, &virtual_id, &virtual_type); 3779 if (ctx.icmp_dir == PF_IN) { 3780 pd->osport = pd->nsport = virtual_id; 3781 pd->odport = pd->ndport = virtual_type; 3782 } else { 3783 pd->osport = pd->nsport = virtual_type; 3784 pd->odport = pd->ndport = virtual_id; 3785 } 3786 break; 3787 #endif /* INET6 */ 3788 } 3789 3790 ruleset = &pf_main_ruleset; 3791 rv = pf_match_rule(&ctx, ruleset); 3792 if (rv == PF_TEST_FAIL) { 3793 /* 3794 * Reason has been set in pf_match_rule() already. 3795 */ 3796 goto cleanup; 3797 } 3798 3799 r = *ctx.rm; /* matching rule */ 3800 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 3801 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 3802 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 3803 3804 /* apply actions for last matching pass/block rule */ 3805 pf_rule_to_actions(r, &ctx.act); 3806 if (r->rule_flag & PFRULE_AFTO) 3807 pd->naf = r->naf; 3808 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 3809 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 3810 goto cleanup; 3811 } 3812 REASON_SET(&ctx.reason, PFRES_MATCH); 3813 3814 #if NPFLOG > 0 3815 if (r->log) 3816 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 3817 if (ctx.act.log & PF_LOG_MATCHES) 3818 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 3819 #endif /* NPFLOG > 0 */ 3820 3821 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3822 (r->action == PF_DROP) && 3823 ((r->rule_flag & PFRULE_RETURNRST) || 3824 (r->rule_flag & PFRULE_RETURNICMP) || 3825 (r->rule_flag & PFRULE_RETURN))) { 3826 if (pd->proto == IPPROTO_TCP && 3827 ((r->rule_flag & PFRULE_RETURNRST) || 3828 (r->rule_flag & PFRULE_RETURN)) && 3829 !(ctx.th->th_flags & TH_RST)) { 3830 u_int32_t ack = 3831 ntohl(ctx.th->th_seq) + pd->p_len; 3832 3833 if (pf_check_tcp_cksum(pd->m, pd->off, 3834 pd->tot_len - pd->off, pd->af)) 3835 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 3836 else { 3837 if (ctx.th->th_flags & TH_SYN) 3838 ack++; 3839 if (ctx.th->th_flags & TH_FIN) 3840 ack++; 3841 pf_send_tcp(r, pd->af, pd->dst, 3842 pd->src, ctx.th->th_dport, 3843 ctx.th->th_sport, ntohl(ctx.th->th_ack), 3844 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 3845 1, 0, pd->rdomain); 3846 } 3847 } else if ((pd->proto != IPPROTO_ICMP || 3848 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 3849 r->return_icmp) 3850 pf_send_icmp(pd->m, r->return_icmp >> 8, 3851 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 3852 else if ((pd->proto != IPPROTO_ICMPV6 || 3853 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 3854 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3855 r->return_icmp6) 3856 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3857 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 3858 } 3859 3860 if (r->action == PF_DROP) 3861 goto cleanup; 3862 3863 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 3864 if (ctx.act.rtableid >= 0 && 3865 rtable_l2(ctx.act.rtableid) != pd->rdomain) 3866 pd->destchg = 1; 3867 3868 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3869 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 3870 #if NPFLOG > 0 3871 pd->pflog |= PF_LOG_FORCE; 3872 #endif /* NPFLOG > 0 */ 3873 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3874 "ip/ipv6 options in pf_test_rule()"); 3875 goto cleanup; 3876 } 3877 3878 action = PF_PASS; 3879 3880 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3881 && !ctx.state_icmp && r->keep_state) { 3882 3883 if (r->rule_flag & PFRULE_SRCTRACK && 3884 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 3885 pd->af, pd->src, NULL) != 0) { 3886 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 3887 goto cleanup; 3888 } 3889 3890 if (r->max_states && (r->states_cur >= r->max_states)) { 3891 pf_status.lcounters[LCNT_STATES]++; 3892 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 3893 goto cleanup; 3894 } 3895 3896 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 3897 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); 3898 3899 if (action != PF_PASS) 3900 goto cleanup; 3901 if (sks != skw) { 3902 struct pf_state_key *sk; 3903 3904 if (pd->dir == PF_IN) 3905 sk = sks; 3906 else 3907 sk = skw; 3908 rewrite += pf_translate(pd, 3909 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 3910 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 3911 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 3912 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 3913 virtual_type, ctx.icmp_dir); 3914 } 3915 3916 #ifdef INET6 3917 if (rewrite && skw->af != sks->af) 3918 action = PF_AFRT; 3919 #endif /* INET6 */ 3920 3921 } else { 3922 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 3923 SLIST_REMOVE_HEAD(&ctx.rules, entry); 3924 pool_put(&pf_rule_item_pl, ctx.ri); 3925 } 3926 } 3927 3928 /* copy back packet headers if needed */ 3929 if (rewrite && pd->hdrlen) { 3930 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 3931 } 3932 3933 #if NPFSYNC > 0 3934 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 3935 pd->dir == PF_OUT && pfsync_up()) { 3936 /* 3937 * We want the state created, but we dont 3938 * want to send this in case a partner 3939 * firewall has to know about it to allow 3940 * replies through it. 3941 */ 3942 if (pfsync_defer(*sm, pd->m)) 3943 return (PF_DEFER); 3944 } 3945 #endif /* NPFSYNC > 0 */ 3946 3947 if (r->rule_flag & PFRULE_ONCE) { 3948 r->rule_flag |= PFRULE_EXPIRED; 3949 r->exptime = time_second; 3950 SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle); 3951 } 3952 3953 return (action); 3954 3955 cleanup: 3956 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 3957 SLIST_REMOVE_HEAD(&ctx.rules, entry); 3958 pool_put(&pf_rule_item_pl, ctx.ri); 3959 } 3960 3961 return (action); 3962 } 3963 3964 static __inline int 3965 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 3966 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 3967 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 3968 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 3969 { 3970 struct pf_state *s = NULL; 3971 struct tcphdr *th = &pd->hdr.tcp; 3972 u_int16_t mss = tcp_mssdflt; 3973 u_short reason; 3974 u_int i; 3975 3976 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 3977 if (s == NULL) { 3978 REASON_SET(&reason, PFRES_MEMORY); 3979 goto csfailed; 3980 } 3981 s->rule.ptr = r; 3982 s->anchor.ptr = a; 3983 s->natrule.ptr = nr; 3984 if (r->allow_opts) 3985 s->state_flags |= PFSTATE_ALLOWOPTS; 3986 if (r->rule_flag & PFRULE_STATESLOPPY) 3987 s->state_flags |= PFSTATE_SLOPPY; 3988 if (r->rule_flag & PFRULE_PFLOW) 3989 s->state_flags |= PFSTATE_PFLOW; 3990 #if NPFLOG > 0 3991 s->log = act->log & PF_LOG_ALL; 3992 #endif /* NPFLOG > 0 */ 3993 s->qid = act->qid; 3994 s->pqid = act->pqid; 3995 s->rtableid[pd->didx] = act->rtableid; 3996 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 3997 s->min_ttl = act->min_ttl; 3998 s->set_tos = act->set_tos; 3999 s->max_mss = act->max_mss; 4000 s->state_flags |= act->flags; 4001 #if NPFSYNC > 0 4002 s->sync_state = PFSYNC_S_NONE; 4003 #endif /* NPFSYNC > 0 */ 4004 s->set_prio[0] = act->set_prio[0]; 4005 s->set_prio[1] = act->set_prio[1]; 4006 s->delay = act->delay; 4007 SLIST_INIT(&s->src_nodes); 4008 /* 4009 * must initialize refcnt, before pf_state_insert() gets called. 4010 * pf_state_inserts() grabs reference for pfsync! 4011 */ 4012 refcnt_init(&s->refcnt); 4013 4014 switch (pd->proto) { 4015 case IPPROTO_TCP: 4016 s->src.seqlo = ntohl(th->th_seq); 4017 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4018 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4019 r->keep_state == PF_STATE_MODULATE) { 4020 /* Generate sequence number modulator */ 4021 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4022 0) 4023 s->src.seqdiff = 1; 4024 pf_patch_32(pd, 4025 &th->th_seq, htonl(s->src.seqlo + s->src.seqdiff)); 4026 *rewrite = 1; 4027 } else 4028 s->src.seqdiff = 0; 4029 if (th->th_flags & TH_SYN) { 4030 s->src.seqhi++; 4031 s->src.wscale = pf_get_wscale(pd); 4032 } 4033 s->src.max_win = MAX(ntohs(th->th_win), 1); 4034 if (s->src.wscale & PF_WSCALE_MASK) { 4035 /* Remove scale factor from initial window */ 4036 int win = s->src.max_win; 4037 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4038 s->src.max_win = (win - 1) >> 4039 (s->src.wscale & PF_WSCALE_MASK); 4040 } 4041 if (th->th_flags & TH_FIN) 4042 s->src.seqhi++; 4043 s->dst.seqhi = 1; 4044 s->dst.max_win = 1; 4045 pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT); 4046 pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED); 4047 s->timeout = PFTM_TCP_FIRST_PACKET; 4048 pf_status.states_halfopen++; 4049 break; 4050 case IPPROTO_UDP: 4051 pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE); 4052 pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 4053 s->timeout = PFTM_UDP_FIRST_PACKET; 4054 break; 4055 case IPPROTO_ICMP: 4056 #ifdef INET6 4057 case IPPROTO_ICMPV6: 4058 #endif /* INET6 */ 4059 s->timeout = PFTM_ICMP_FIRST_PACKET; 4060 break; 4061 default: 4062 pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE); 4063 pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 4064 s->timeout = PFTM_OTHER_FIRST_PACKET; 4065 } 4066 4067 s->creation = time_uptime; 4068 s->expire = time_uptime; 4069 4070 if (pd->proto == IPPROTO_TCP) { 4071 if (s->state_flags & PFSTATE_SCRUB_TCP && 4072 pf_normalize_tcp_init(pd, &s->src)) { 4073 REASON_SET(&reason, PFRES_MEMORY); 4074 goto csfailed; 4075 } 4076 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 4077 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 4078 rewrite)) { 4079 /* This really shouldn't happen!!! */ 4080 DPFPRINTF(LOG_ERR, 4081 "%s: tcp normalize failed on first pkt", __func__); 4082 goto csfailed; 4083 } 4084 } 4085 s->direction = pd->dir; 4086 4087 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 4088 REASON_SET(&reason, PFRES_MEMORY); 4089 goto csfailed; 4090 } 4091 4092 for (i = 0; i < PF_SN_MAX; i++) 4093 if (sns[i] != NULL) { 4094 struct pf_sn_item *sni; 4095 4096 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 4097 if (sni == NULL) { 4098 REASON_SET(&reason, PFRES_MEMORY); 4099 goto csfailed; 4100 } 4101 sni->sn = sns[i]; 4102 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 4103 sni->sn->states++; 4104 } 4105 4106 if (pf_set_rt_ifp(s, pd->src, (*skw)->af) != 0) { 4107 REASON_SET(&reason, PFRES_NOROUTE); 4108 goto csfailed; 4109 } 4110 4111 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { 4112 pf_detach_state(s); 4113 *sks = *skw = NULL; 4114 REASON_SET(&reason, PFRES_STATEINS); 4115 goto csfailed; 4116 } else 4117 *sm = s; 4118 4119 /* 4120 * Make state responsible for rules it binds here. 4121 */ 4122 memcpy(&s->match_rules, rules, sizeof(s->match_rules)); 4123 memset(rules, 0, sizeof(*rules)); 4124 STATE_INC_COUNTERS(s); 4125 4126 if (tag > 0) { 4127 pf_tag_ref(tag); 4128 s->tag = tag; 4129 } 4130 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4131 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 4132 int rtid = pd->rdomain; 4133 if (act->rtableid >= 0) 4134 rtid = act->rtableid; 4135 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 4136 s->src.seqhi = arc4random(); 4137 /* Find mss option */ 4138 mss = pf_get_mss(pd); 4139 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 4140 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 4141 s->src.mss = mss; 4142 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4143 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4144 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); 4145 REASON_SET(&reason, PFRES_SYNPROXY); 4146 return (PF_SYNPROXY_DROP); 4147 } 4148 4149 return (PF_PASS); 4150 4151 csfailed: 4152 if (s) { 4153 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 4154 pf_src_tree_remove_state(s); 4155 pool_put(&pf_state_pl, s); 4156 } 4157 4158 for (i = 0; i < PF_SN_MAX; i++) 4159 if (sns[i] != NULL) 4160 pf_remove_src_node(sns[i]); 4161 4162 return (PF_DROP); 4163 } 4164 4165 int 4166 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4167 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4168 int icmp_dir) 4169 { 4170 /* 4171 * when called from bpf_mtap_pflog, there are extra constraints: 4172 * -mbuf is faked, m_data is the bpf buffer 4173 * -pd is not fully set up 4174 */ 4175 int rewrite = 0; 4176 int afto = pd->af != pd->naf; 4177 4178 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4179 pd->destchg = 1; 4180 4181 switch (pd->proto) { 4182 case IPPROTO_TCP: /* FALLTHROUGH */ 4183 case IPPROTO_UDP: 4184 rewrite += pf_patch_16(pd, pd->sport, sport); 4185 rewrite += pf_patch_16(pd, pd->dport, dport); 4186 break; 4187 4188 case IPPROTO_ICMP: 4189 /* pf_translate() is also used when logging invalid packets */ 4190 if (pd->af != AF_INET) 4191 return (0); 4192 4193 if (afto) { 4194 #ifdef INET6 4195 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 4196 return (0); 4197 pd->proto = IPPROTO_ICMPV6; 4198 rewrite = 1; 4199 #endif /* INET6 */ 4200 } 4201 if (virtual_type == htons(ICMP_ECHO)) { 4202 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4203 rewrite += pf_patch_16(pd, 4204 &pd->hdr.icmp.icmp_id, icmpid); 4205 } 4206 break; 4207 4208 #ifdef INET6 4209 case IPPROTO_ICMPV6: 4210 /* pf_translate() is also used when logging invalid packets */ 4211 if (pd->af != AF_INET6) 4212 return (0); 4213 4214 if (afto) { 4215 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 4216 return (0); 4217 pd->proto = IPPROTO_ICMP; 4218 rewrite = 1; 4219 } 4220 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4221 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4222 rewrite += pf_patch_16(pd, 4223 &pd->hdr.icmp6.icmp6_id, icmpid); 4224 } 4225 break; 4226 #endif /* INET6 */ 4227 } 4228 4229 if (!afto) { 4230 rewrite += pf_translate_a(pd, pd->src, saddr); 4231 rewrite += pf_translate_a(pd, pd->dst, daddr); 4232 } 4233 4234 return (rewrite); 4235 } 4236 4237 int 4238 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4239 int *copyback, int reverse) 4240 { 4241 struct tcphdr *th = &pd->hdr.tcp; 4242 struct pf_state_peer *src, *dst; 4243 u_int16_t win = ntohs(th->th_win); 4244 u_int32_t ack, end, data_end, seq, orig_seq; 4245 u_int8_t sws, dws, psrc, pdst; 4246 int ackskew; 4247 4248 if ((pd->dir == (*state)->direction && !reverse) || 4249 (pd->dir != (*state)->direction && reverse)) { 4250 src = &(*state)->src; 4251 dst = &(*state)->dst; 4252 psrc = PF_PEER_SRC; 4253 pdst = PF_PEER_DST; 4254 } else { 4255 src = &(*state)->dst; 4256 dst = &(*state)->src; 4257 psrc = PF_PEER_DST; 4258 pdst = PF_PEER_SRC; 4259 } 4260 4261 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4262 sws = src->wscale & PF_WSCALE_MASK; 4263 dws = dst->wscale & PF_WSCALE_MASK; 4264 } else 4265 sws = dws = 0; 4266 4267 /* 4268 * Sequence tracking algorithm from Guido van Rooij's paper: 4269 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4270 * tcp_filtering.ps 4271 */ 4272 4273 orig_seq = seq = ntohl(th->th_seq); 4274 if (src->seqlo == 0) { 4275 /* First packet from this end. Set its state */ 4276 4277 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4278 src->scrub == NULL) { 4279 if (pf_normalize_tcp_init(pd, src)) { 4280 REASON_SET(reason, PFRES_MEMORY); 4281 return (PF_DROP); 4282 } 4283 } 4284 4285 /* Deferred generation of sequence number modulator */ 4286 if (dst->seqdiff && !src->seqdiff) { 4287 /* use random iss for the TCP server */ 4288 while ((src->seqdiff = arc4random() - seq) == 0) 4289 continue; 4290 ack = ntohl(th->th_ack) - dst->seqdiff; 4291 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4292 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4293 *copyback = 1; 4294 } else { 4295 ack = ntohl(th->th_ack); 4296 } 4297 4298 end = seq + pd->p_len; 4299 if (th->th_flags & TH_SYN) { 4300 end++; 4301 if (dst->wscale & PF_WSCALE_FLAG) { 4302 src->wscale = pf_get_wscale(pd); 4303 if (src->wscale & PF_WSCALE_FLAG) { 4304 /* Remove scale factor from initial 4305 * window */ 4306 sws = src->wscale & PF_WSCALE_MASK; 4307 win = ((u_int32_t)win + (1 << sws) - 1) 4308 >> sws; 4309 dws = dst->wscale & PF_WSCALE_MASK; 4310 } else { 4311 /* fixup other window */ 4312 dst->max_win = MIN(TCP_MAXWIN, 4313 (u_int32_t)dst->max_win << 4314 (dst->wscale & PF_WSCALE_MASK)); 4315 /* in case of a retrans SYN|ACK */ 4316 dst->wscale = 0; 4317 } 4318 } 4319 } 4320 data_end = end; 4321 if (th->th_flags & TH_FIN) 4322 end++; 4323 4324 src->seqlo = seq; 4325 if (src->state < TCPS_SYN_SENT) 4326 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4327 4328 /* 4329 * May need to slide the window (seqhi may have been set by 4330 * the crappy stack check or if we picked up the connection 4331 * after establishment) 4332 */ 4333 if (src->seqhi == 1 || 4334 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4335 src->seqhi = end + MAX(1, dst->max_win << dws); 4336 if (win > src->max_win) 4337 src->max_win = win; 4338 4339 } else { 4340 ack = ntohl(th->th_ack) - dst->seqdiff; 4341 if (src->seqdiff) { 4342 /* Modulate sequence numbers */ 4343 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4344 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4345 *copyback = 1; 4346 } 4347 end = seq + pd->p_len; 4348 if (th->th_flags & TH_SYN) 4349 end++; 4350 data_end = end; 4351 if (th->th_flags & TH_FIN) 4352 end++; 4353 } 4354 4355 if ((th->th_flags & TH_ACK) == 0) { 4356 /* Let it pass through the ack skew check */ 4357 ack = dst->seqlo; 4358 } else if ((ack == 0 && 4359 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4360 /* broken tcp stacks do not set ack */ 4361 (dst->state < TCPS_SYN_SENT)) { 4362 /* 4363 * Many stacks (ours included) will set the ACK number in an 4364 * FIN|ACK if the SYN times out -- no sequence to ACK. 4365 */ 4366 ack = dst->seqlo; 4367 } 4368 4369 if (seq == end) { 4370 /* Ease sequencing restrictions on no data packets */ 4371 seq = src->seqlo; 4372 data_end = end = seq; 4373 } 4374 4375 ackskew = dst->seqlo - ack; 4376 4377 4378 /* 4379 * Need to demodulate the sequence numbers in any TCP SACK options 4380 * (Selective ACK). We could optionally validate the SACK values 4381 * against the current ACK window, either forwards or backwards, but 4382 * I'm not confident that SACK has been implemented properly 4383 * everywhere. It wouldn't surprise me if several stacks accidently 4384 * SACK too far backwards of previously ACKed data. There really aren't 4385 * any security implications of bad SACKing unless the target stack 4386 * doesn't validate the option length correctly. Someone trying to 4387 * spoof into a TCP connection won't bother blindly sending SACK 4388 * options anyway. 4389 */ 4390 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4391 if (pf_modulate_sack(pd, dst)) 4392 *copyback = 1; 4393 } 4394 4395 4396 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4397 if (SEQ_GEQ(src->seqhi, data_end) && 4398 /* Last octet inside other's window space */ 4399 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4400 /* Retrans: not more than one window back */ 4401 (ackskew >= -MAXACKWINDOW) && 4402 /* Acking not more than one reassembled fragment backwards */ 4403 (ackskew <= (MAXACKWINDOW << sws)) && 4404 /* Acking not more than one window forward */ 4405 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4406 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4407 /* Require an exact/+1 sequence match on resets when possible */ 4408 4409 if (dst->scrub || src->scrub) { 4410 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4411 dst, copyback)) 4412 return (PF_DROP); 4413 } 4414 4415 /* update max window */ 4416 if (src->max_win < win) 4417 src->max_win = win; 4418 /* synchronize sequencing */ 4419 if (SEQ_GT(end, src->seqlo)) 4420 src->seqlo = end; 4421 /* slide the window of what the other end can send */ 4422 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4423 dst->seqhi = ack + MAX((win << sws), 1); 4424 4425 /* update states */ 4426 if (th->th_flags & TH_SYN) 4427 if (src->state < TCPS_SYN_SENT) 4428 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4429 if (th->th_flags & TH_FIN) 4430 if (src->state < TCPS_CLOSING) 4431 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4432 if (th->th_flags & TH_ACK) { 4433 if (dst->state == TCPS_SYN_SENT) { 4434 pf_set_protostate(*state, pdst, 4435 TCPS_ESTABLISHED); 4436 if (src->state == TCPS_ESTABLISHED && 4437 !SLIST_EMPTY(&(*state)->src_nodes) && 4438 pf_src_connlimit(state)) { 4439 REASON_SET(reason, PFRES_SRCLIMIT); 4440 return (PF_DROP); 4441 } 4442 } else if (dst->state == TCPS_CLOSING) 4443 pf_set_protostate(*state, pdst, 4444 TCPS_FIN_WAIT_2); 4445 } 4446 if (th->th_flags & TH_RST) 4447 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4448 4449 /* update expire time */ 4450 (*state)->expire = time_uptime; 4451 if (src->state >= TCPS_FIN_WAIT_2 && 4452 dst->state >= TCPS_FIN_WAIT_2) 4453 (*state)->timeout = PFTM_TCP_CLOSED; 4454 else if (src->state >= TCPS_CLOSING && 4455 dst->state >= TCPS_CLOSING) 4456 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4457 else if (src->state < TCPS_ESTABLISHED || 4458 dst->state < TCPS_ESTABLISHED) 4459 (*state)->timeout = PFTM_TCP_OPENING; 4460 else if (src->state >= TCPS_CLOSING || 4461 dst->state >= TCPS_CLOSING) 4462 (*state)->timeout = PFTM_TCP_CLOSING; 4463 else 4464 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4465 4466 /* Fall through to PASS packet */ 4467 } else if ((dst->state < TCPS_SYN_SENT || 4468 dst->state >= TCPS_FIN_WAIT_2 || 4469 src->state >= TCPS_FIN_WAIT_2) && 4470 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4471 /* Within a window forward of the originating packet */ 4472 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4473 /* Within a window backward of the originating packet */ 4474 4475 /* 4476 * This currently handles three situations: 4477 * 1) Stupid stacks will shotgun SYNs before their peer 4478 * replies. 4479 * 2) When PF catches an already established stream (the 4480 * firewall rebooted, the state table was flushed, routes 4481 * changed...) 4482 * 3) Packets get funky immediately after the connection 4483 * closes (this should catch Solaris spurious ACK|FINs 4484 * that web servers like to spew after a close) 4485 * 4486 * This must be a little more careful than the above code 4487 * since packet floods will also be caught here. We don't 4488 * update the TTL here to mitigate the damage of a packet 4489 * flood and so the same code can handle awkward establishment 4490 * and a loosened connection close. 4491 * In the establishment case, a correct peer response will 4492 * validate the connection, go through the normal state code 4493 * and keep updating the state TTL. 4494 */ 4495 4496 if (pf_status.debug >= LOG_NOTICE) { 4497 log(LOG_NOTICE, "pf: loose state match: "); 4498 pf_print_state(*state); 4499 pf_print_flags(th->th_flags); 4500 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4501 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4502 pd->p_len, ackskew, (*state)->packets[0], 4503 (*state)->packets[1], 4504 pd->dir == PF_IN ? "in" : "out", 4505 pd->dir == (*state)->direction ? "fwd" : "rev"); 4506 } 4507 4508 if (dst->scrub || src->scrub) { 4509 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4510 dst, copyback)) 4511 return (PF_DROP); 4512 } 4513 4514 /* update max window */ 4515 if (src->max_win < win) 4516 src->max_win = win; 4517 /* synchronize sequencing */ 4518 if (SEQ_GT(end, src->seqlo)) 4519 src->seqlo = end; 4520 /* slide the window of what the other end can send */ 4521 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4522 dst->seqhi = ack + MAX((win << sws), 1); 4523 4524 /* 4525 * Cannot set dst->seqhi here since this could be a shotgunned 4526 * SYN and not an already established connection. 4527 */ 4528 if (th->th_flags & TH_FIN) 4529 if (src->state < TCPS_CLOSING) 4530 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4531 if (th->th_flags & TH_RST) 4532 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4533 4534 /* Fall through to PASS packet */ 4535 } else { 4536 if ((*state)->dst.state == TCPS_SYN_SENT && 4537 (*state)->src.state == TCPS_SYN_SENT) { 4538 /* Send RST for state mismatches during handshake */ 4539 if (!(th->th_flags & TH_RST)) 4540 pf_send_tcp((*state)->rule.ptr, pd->af, 4541 pd->dst, pd->src, th->th_dport, 4542 th->th_sport, ntohl(th->th_ack), 0, 4543 TH_RST, 0, 0, 4544 (*state)->rule.ptr->return_ttl, 1, 0, 4545 pd->rdomain); 4546 src->seqlo = 0; 4547 src->seqhi = 1; 4548 src->max_win = 1; 4549 } else if (pf_status.debug >= LOG_NOTICE) { 4550 log(LOG_NOTICE, "pf: BAD state: "); 4551 pf_print_state(*state); 4552 pf_print_flags(th->th_flags); 4553 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4554 "pkts=%llu:%llu dir=%s,%s\n", 4555 seq, orig_seq, ack, pd->p_len, ackskew, 4556 (*state)->packets[0], (*state)->packets[1], 4557 pd->dir == PF_IN ? "in" : "out", 4558 pd->dir == (*state)->direction ? "fwd" : "rev"); 4559 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4560 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 4561 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4562 ' ': '2', 4563 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4564 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4565 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 4566 ' ' :'5', 4567 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4568 } 4569 REASON_SET(reason, PFRES_BADSTATE); 4570 return (PF_DROP); 4571 } 4572 4573 return (PF_PASS); 4574 } 4575 4576 int 4577 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **state, 4578 u_short *reason) 4579 { 4580 struct tcphdr *th = &pd->hdr.tcp; 4581 struct pf_state_peer *src, *dst; 4582 u_int8_t psrc, pdst; 4583 4584 if (pd->dir == (*state)->direction) { 4585 src = &(*state)->src; 4586 dst = &(*state)->dst; 4587 psrc = PF_PEER_SRC; 4588 pdst = PF_PEER_DST; 4589 } else { 4590 src = &(*state)->dst; 4591 dst = &(*state)->src; 4592 psrc = PF_PEER_DST; 4593 pdst = PF_PEER_SRC; 4594 } 4595 4596 if (th->th_flags & TH_SYN) 4597 if (src->state < TCPS_SYN_SENT) 4598 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4599 if (th->th_flags & TH_FIN) 4600 if (src->state < TCPS_CLOSING) 4601 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4602 if (th->th_flags & TH_ACK) { 4603 if (dst->state == TCPS_SYN_SENT) { 4604 pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); 4605 if (src->state == TCPS_ESTABLISHED && 4606 !SLIST_EMPTY(&(*state)->src_nodes) && 4607 pf_src_connlimit(state)) { 4608 REASON_SET(reason, PFRES_SRCLIMIT); 4609 return (PF_DROP); 4610 } 4611 } else if (dst->state == TCPS_CLOSING) { 4612 pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2); 4613 } else if (src->state == TCPS_SYN_SENT && 4614 dst->state < TCPS_SYN_SENT) { 4615 /* 4616 * Handle a special sloppy case where we only see one 4617 * half of the connection. If there is a ACK after 4618 * the initial SYN without ever seeing a packet from 4619 * the destination, set the connection to established. 4620 */ 4621 pf_set_protostate(*state, PF_PEER_BOTH, 4622 TCPS_ESTABLISHED); 4623 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4624 pf_src_connlimit(state)) { 4625 REASON_SET(reason, PFRES_SRCLIMIT); 4626 return (PF_DROP); 4627 } 4628 } else if (src->state == TCPS_CLOSING && 4629 dst->state == TCPS_ESTABLISHED && 4630 dst->seqlo == 0) { 4631 /* 4632 * Handle the closing of half connections where we 4633 * don't see the full bidirectional FIN/ACK+ACK 4634 * handshake. 4635 */ 4636 pf_set_protostate(*state, pdst, TCPS_CLOSING); 4637 } 4638 } 4639 if (th->th_flags & TH_RST) 4640 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4641 4642 /* update expire time */ 4643 (*state)->expire = time_uptime; 4644 if (src->state >= TCPS_FIN_WAIT_2 && 4645 dst->state >= TCPS_FIN_WAIT_2) 4646 (*state)->timeout = PFTM_TCP_CLOSED; 4647 else if (src->state >= TCPS_CLOSING && 4648 dst->state >= TCPS_CLOSING) 4649 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4650 else if (src->state < TCPS_ESTABLISHED || 4651 dst->state < TCPS_ESTABLISHED) 4652 (*state)->timeout = PFTM_TCP_OPENING; 4653 else if (src->state >= TCPS_CLOSING || 4654 dst->state >= TCPS_CLOSING) 4655 (*state)->timeout = PFTM_TCP_CLOSING; 4656 else 4657 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4658 4659 return (PF_PASS); 4660 } 4661 4662 static __inline int 4663 pf_synproxy(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4664 { 4665 struct pf_state_key *sk = (*state)->key[pd->didx]; 4666 4667 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4668 struct tcphdr *th = &pd->hdr.tcp; 4669 4670 if (pd->dir != (*state)->direction) { 4671 REASON_SET(reason, PFRES_SYNPROXY); 4672 return (PF_SYNPROXY_DROP); 4673 } 4674 if (th->th_flags & TH_SYN) { 4675 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4676 REASON_SET(reason, PFRES_SYNPROXY); 4677 return (PF_DROP); 4678 } 4679 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4680 pd->src, th->th_dport, th->th_sport, 4681 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4682 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4683 0, pd->rdomain); 4684 REASON_SET(reason, PFRES_SYNPROXY); 4685 return (PF_SYNPROXY_DROP); 4686 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 4687 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4688 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4689 REASON_SET(reason, PFRES_SYNPROXY); 4690 return (PF_DROP); 4691 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4692 pf_src_connlimit(state)) { 4693 REASON_SET(reason, PFRES_SRCLIMIT); 4694 return (PF_DROP); 4695 } else 4696 pf_set_protostate(*state, PF_PEER_SRC, 4697 PF_TCPS_PROXY_DST); 4698 } 4699 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4700 struct tcphdr *th = &pd->hdr.tcp; 4701 4702 if (pd->dir == (*state)->direction) { 4703 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4704 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4705 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4706 REASON_SET(reason, PFRES_SYNPROXY); 4707 return (PF_DROP); 4708 } 4709 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4710 if ((*state)->dst.seqhi == 1) 4711 (*state)->dst.seqhi = arc4random(); 4712 pf_send_tcp((*state)->rule.ptr, pd->af, 4713 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4714 sk->port[pd->sidx], sk->port[pd->didx], 4715 (*state)->dst.seqhi, 0, TH_SYN, 0, 4716 (*state)->src.mss, 0, 0, (*state)->tag, 4717 sk->rdomain); 4718 REASON_SET(reason, PFRES_SYNPROXY); 4719 return (PF_SYNPROXY_DROP); 4720 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4721 (TH_SYN|TH_ACK)) || 4722 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4723 REASON_SET(reason, PFRES_SYNPROXY); 4724 return (PF_DROP); 4725 } else { 4726 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4727 (*state)->dst.seqlo = ntohl(th->th_seq); 4728 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4729 pd->src, th->th_dport, th->th_sport, 4730 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4731 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4732 (*state)->tag, pd->rdomain); 4733 pf_send_tcp((*state)->rule.ptr, pd->af, 4734 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4735 sk->port[pd->sidx], sk->port[pd->didx], 4736 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4737 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4738 0, sk->rdomain); 4739 (*state)->src.seqdiff = (*state)->dst.seqhi - 4740 (*state)->src.seqlo; 4741 (*state)->dst.seqdiff = (*state)->src.seqhi - 4742 (*state)->dst.seqlo; 4743 (*state)->src.seqhi = (*state)->src.seqlo + 4744 (*state)->dst.max_win; 4745 (*state)->dst.seqhi = (*state)->dst.seqlo + 4746 (*state)->src.max_win; 4747 (*state)->src.wscale = (*state)->dst.wscale = 0; 4748 pf_set_protostate(*state, PF_PEER_BOTH, 4749 TCPS_ESTABLISHED); 4750 REASON_SET(reason, PFRES_SYNPROXY); 4751 return (PF_SYNPROXY_DROP); 4752 } 4753 } 4754 return (PF_PASS); 4755 } 4756 4757 int 4758 pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4759 int syncookie) 4760 { 4761 struct pf_state_key_cmp key; 4762 int copyback = 0; 4763 struct pf_state_peer *src, *dst; 4764 int action; 4765 struct inpcb *inp; 4766 u_int8_t psrc, pdst; 4767 4768 key.af = pd->af; 4769 key.proto = pd->virtual_proto; 4770 key.rdomain = pd->rdomain; 4771 pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); 4772 pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); 4773 key.port[pd->sidx] = pd->osport; 4774 key.port[pd->didx] = pd->odport; 4775 inp = pd->m->m_pkthdr.pf.inp; 4776 4777 action = pf_find_state(pd, &key, state); 4778 if (action != PF_MATCH) 4779 return (action); 4780 4781 action = PF_PASS; 4782 if (pd->dir == (*state)->direction) { 4783 src = &(*state)->src; 4784 dst = &(*state)->dst; 4785 psrc = PF_PEER_SRC; 4786 pdst = PF_PEER_DST; 4787 } else { 4788 src = &(*state)->dst; 4789 dst = &(*state)->src; 4790 psrc = PF_PEER_DST; 4791 pdst = PF_PEER_SRC; 4792 } 4793 4794 switch (pd->virtual_proto) { 4795 case IPPROTO_TCP: 4796 if (syncookie) { 4797 pf_set_protostate(*state, PF_PEER_SRC, 4798 PF_TCPS_PROXY_DST); 4799 (*state)->dst.seqhi = ntohl(pd->hdr.tcp.th_ack) - 1; 4800 } 4801 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) 4802 return (action); 4803 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 4804 4805 if (dst->state >= TCPS_FIN_WAIT_2 && 4806 src->state >= TCPS_FIN_WAIT_2) { 4807 if (pf_status.debug >= LOG_NOTICE) { 4808 log(LOG_NOTICE, "pf: state reuse "); 4809 pf_print_state(*state); 4810 pf_print_flags(pd->hdr.tcp.th_flags); 4811 addlog("\n"); 4812 } 4813 /* XXX make sure it's the same direction ?? */ 4814 (*state)->timeout = PFTM_PURGE; 4815 *state = NULL; 4816 pf_mbuf_link_inpcb(pd->m, inp); 4817 return (PF_DROP); 4818 } else if (dst->state >= TCPS_ESTABLISHED && 4819 src->state >= TCPS_ESTABLISHED) { 4820 /* 4821 * SYN matches existing state??? 4822 * Typically happens when sender boots up after 4823 * sudden panic. Certain protocols (NFSv3) are 4824 * always using same port numbers. Challenge 4825 * ACK enables all parties (firewall and peers) 4826 * to get in sync again. 4827 */ 4828 pf_send_challenge_ack(pd, *state, src, dst); 4829 return (PF_DROP); 4830 } 4831 } 4832 4833 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4834 if (pf_tcp_track_sloppy(pd, state, reason) == PF_DROP) 4835 return (PF_DROP); 4836 } else { 4837 if (pf_tcp_track_full(pd, state, reason, ©back, 4838 PF_REVERSED_KEY((*state)->key, pd->af)) == PF_DROP) 4839 return (PF_DROP); 4840 } 4841 break; 4842 case IPPROTO_UDP: 4843 /* update states */ 4844 if (src->state < PFUDPS_SINGLE) 4845 pf_set_protostate(*state, psrc, PFUDPS_SINGLE); 4846 if (dst->state == PFUDPS_SINGLE) 4847 pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE); 4848 4849 /* update expire time */ 4850 (*state)->expire = time_uptime; 4851 if (src->state == PFUDPS_MULTIPLE && 4852 dst->state == PFUDPS_MULTIPLE) 4853 (*state)->timeout = PFTM_UDP_MULTIPLE; 4854 else 4855 (*state)->timeout = PFTM_UDP_SINGLE; 4856 break; 4857 default: 4858 /* update states */ 4859 if (src->state < PFOTHERS_SINGLE) 4860 pf_set_protostate(*state, psrc, PFOTHERS_SINGLE); 4861 if (dst->state == PFOTHERS_SINGLE) 4862 pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE); 4863 4864 /* update expire time */ 4865 (*state)->expire = time_uptime; 4866 if (src->state == PFOTHERS_MULTIPLE && 4867 dst->state == PFOTHERS_MULTIPLE) 4868 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4869 else 4870 (*state)->timeout = PFTM_OTHER_SINGLE; 4871 break; 4872 } 4873 4874 /* translate source/destination address, if necessary */ 4875 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4876 struct pf_state_key *nk; 4877 int afto, sidx, didx; 4878 4879 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4880 nk = (*state)->key[pd->sidx]; 4881 else 4882 nk = (*state)->key[pd->didx]; 4883 4884 afto = pd->af != nk->af; 4885 sidx = afto ? pd->didx : pd->sidx; 4886 didx = afto ? pd->sidx : pd->didx; 4887 4888 #ifdef INET6 4889 if (afto) { 4890 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 4891 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 4892 pd->naf = nk->af; 4893 action = PF_AFRT; 4894 } 4895 #endif /* INET6 */ 4896 4897 if (!afto) 4898 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 4899 4900 if (pd->sport != NULL) 4901 pf_patch_16(pd, pd->sport, nk->port[sidx]); 4902 4903 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4904 pd->rdomain != nk->rdomain) 4905 pd->destchg = 1; 4906 4907 if (!afto) 4908 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 4909 4910 if (pd->dport != NULL) 4911 pf_patch_16(pd, pd->dport, nk->port[didx]); 4912 4913 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 4914 copyback = 1; 4915 } 4916 4917 if (copyback && pd->hdrlen > 0) { 4918 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4919 } 4920 4921 return (action); 4922 } 4923 4924 int 4925 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 4926 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 4927 int icmp_dir, int *iidx, int multi, int inner) 4928 { 4929 int direction, action; 4930 4931 key->af = pd->af; 4932 key->proto = pd->proto; 4933 key->rdomain = pd->rdomain; 4934 if (icmp_dir == PF_IN) { 4935 *iidx = pd->sidx; 4936 key->port[pd->sidx] = icmpid; 4937 key->port[pd->didx] = type; 4938 } else { 4939 *iidx = pd->didx; 4940 key->port[pd->sidx] = type; 4941 key->port[pd->didx] = icmpid; 4942 } 4943 4944 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 4945 pd->dst, pd->af, multi)) 4946 return (PF_DROP); 4947 4948 action = pf_find_state(pd, key, state); 4949 if (action != PF_MATCH) 4950 return (action); 4951 4952 if ((*state)->state_flags & PFSTATE_SLOPPY) 4953 return (-1); 4954 4955 /* Is this ICMP message flowing in right direction? */ 4956 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 4957 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 4958 PF_IN : PF_OUT; 4959 else 4960 direction = (*state)->direction; 4961 if ((((!inner && direction == pd->dir) || 4962 (inner && direction != pd->dir)) ? 4963 PF_IN : PF_OUT) != icmp_dir) { 4964 if (pf_status.debug >= LOG_NOTICE) { 4965 log(LOG_NOTICE, 4966 "pf: icmp type %d in wrong direction (%d): ", 4967 ntohs(type), icmp_dir); 4968 pf_print_state(*state); 4969 addlog("\n"); 4970 } 4971 return (PF_DROP); 4972 } 4973 return (-1); 4974 } 4975 4976 int 4977 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 4978 u_short *reason) 4979 { 4980 u_int16_t virtual_id, virtual_type; 4981 u_int8_t icmptype; 4982 int icmp_dir, iidx, ret, copyback = 0; 4983 4984 struct pf_state_key_cmp key; 4985 4986 switch (pd->proto) { 4987 case IPPROTO_ICMP: 4988 icmptype = pd->hdr.icmp.icmp_type; 4989 break; 4990 #ifdef INET6 4991 case IPPROTO_ICMPV6: 4992 icmptype = pd->hdr.icmp6.icmp6_type; 4993 break; 4994 #endif /* INET6 */ 4995 default: 4996 panic("unhandled proto %d", pd->proto); 4997 } 4998 4999 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 5000 &virtual_type) == 0) { 5001 /* 5002 * ICMP query/reply message not related to a TCP/UDP packet. 5003 * Search for an ICMP state. 5004 */ 5005 ret = pf_icmp_state_lookup(pd, &key, state, 5006 virtual_id, virtual_type, icmp_dir, &iidx, 5007 0, 0); 5008 /* IPv6? try matching a multicast address */ 5009 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 5010 ret = pf_icmp_state_lookup(pd, &key, state, virtual_id, 5011 virtual_type, icmp_dir, &iidx, 1, 0); 5012 if (ret >= 0) 5013 return (ret); 5014 5015 (*state)->expire = time_uptime; 5016 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5017 5018 /* translate source/destination address, if necessary */ 5019 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5020 struct pf_state_key *nk; 5021 int afto, sidx, didx; 5022 5023 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5024 nk = (*state)->key[pd->sidx]; 5025 else 5026 nk = (*state)->key[pd->didx]; 5027 5028 afto = pd->af != nk->af; 5029 sidx = afto ? pd->didx : pd->sidx; 5030 didx = afto ? pd->sidx : pd->didx; 5031 iidx = afto ? !iidx : iidx; 5032 #ifdef INET6 5033 if (afto) { 5034 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 5035 nk->af); 5036 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 5037 nk->af); 5038 pd->naf = nk->af; 5039 } 5040 #endif /* INET6 */ 5041 if (!afto) { 5042 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5043 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5044 } 5045 5046 if (pd->rdomain != nk->rdomain) 5047 pd->destchg = 1; 5048 if (!afto && PF_ANEQ(pd->dst, 5049 &nk->addr[didx], pd->af)) 5050 pd->destchg = 1; 5051 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5052 5053 switch (pd->af) { 5054 case AF_INET: 5055 #ifdef INET6 5056 if (afto) { 5057 if (pf_translate_icmp_af(pd, AF_INET6, 5058 &pd->hdr.icmp)) 5059 return (PF_DROP); 5060 pd->proto = IPPROTO_ICMPV6; 5061 } 5062 #endif /* INET6 */ 5063 pf_patch_16(pd, 5064 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 5065 5066 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5067 &pd->hdr.icmp, M_NOWAIT); 5068 copyback = 1; 5069 break; 5070 #ifdef INET6 5071 case AF_INET6: 5072 if (afto) { 5073 if (pf_translate_icmp_af(pd, AF_INET, 5074 &pd->hdr.icmp6)) 5075 return (PF_DROP); 5076 pd->proto = IPPROTO_ICMP; 5077 } 5078 5079 pf_patch_16(pd, 5080 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 5081 5082 m_copyback(pd->m, pd->off, 5083 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5084 M_NOWAIT); 5085 copyback = 1; 5086 break; 5087 #endif /* INET6 */ 5088 } 5089 #ifdef INET6 5090 if (afto) 5091 return (PF_AFRT); 5092 #endif /* INET6 */ 5093 } 5094 } else { 5095 /* 5096 * ICMP error message in response to a TCP/UDP packet. 5097 * Extract the inner TCP/UDP header and search for that state. 5098 */ 5099 struct pf_pdesc pd2; 5100 struct ip h2; 5101 #ifdef INET6 5102 struct ip6_hdr h2_6; 5103 #endif /* INET6 */ 5104 int ipoff2; 5105 5106 /* Initialize pd2 fields valid for both packets with pd. */ 5107 memset(&pd2, 0, sizeof(pd2)); 5108 pd2.af = pd->af; 5109 pd2.dir = pd->dir; 5110 pd2.kif = pd->kif; 5111 pd2.m = pd->m; 5112 pd2.rdomain = pd->rdomain; 5113 /* Payload packet is from the opposite direction. */ 5114 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 5115 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 5116 switch (pd->af) { 5117 case AF_INET: 5118 /* offset of h2 in mbuf chain */ 5119 ipoff2 = pd->off + ICMP_MINLEN; 5120 5121 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 5122 NULL, reason, pd2.af)) { 5123 DPFPRINTF(LOG_NOTICE, 5124 "ICMP error message too short (ip)"); 5125 return (PF_DROP); 5126 } 5127 /* 5128 * ICMP error messages don't refer to non-first 5129 * fragments 5130 */ 5131 if (h2.ip_off & htons(IP_OFFMASK)) { 5132 REASON_SET(reason, PFRES_FRAG); 5133 return (PF_DROP); 5134 } 5135 5136 /* offset of protocol header that follows h2 */ 5137 pd2.off = ipoff2; 5138 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 5139 return (PF_DROP); 5140 5141 pd2.tot_len = ntohs(h2.ip_len); 5142 pd2.src = (struct pf_addr *)&h2.ip_src; 5143 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5144 break; 5145 #ifdef INET6 5146 case AF_INET6: 5147 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 5148 5149 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 5150 NULL, reason, pd2.af)) { 5151 DPFPRINTF(LOG_NOTICE, 5152 "ICMP error message too short (ip6)"); 5153 return (PF_DROP); 5154 } 5155 5156 pd2.off = ipoff2; 5157 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 5158 return (PF_DROP); 5159 5160 pd2.tot_len = ntohs(h2_6.ip6_plen) + 5161 sizeof(struct ip6_hdr); 5162 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5163 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5164 break; 5165 #endif /* INET6 */ 5166 default: 5167 unhandled_af(pd->af); 5168 } 5169 5170 switch (pd2.proto) { 5171 case IPPROTO_TCP: { 5172 struct tcphdr *th = &pd2.hdr.tcp; 5173 u_int32_t seq; 5174 struct pf_state_peer *src, *dst; 5175 u_int8_t dws; 5176 int action; 5177 5178 /* 5179 * Only the first 8 bytes of the TCP header can be 5180 * expected. Don't access any TCP header fields after 5181 * th_seq, an ackskew test is not possible. 5182 */ 5183 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason, 5184 pd2.af)) { 5185 DPFPRINTF(LOG_NOTICE, 5186 "ICMP error message too short (tcp)"); 5187 return (PF_DROP); 5188 } 5189 5190 key.af = pd2.af; 5191 key.proto = IPPROTO_TCP; 5192 key.rdomain = pd2.rdomain; 5193 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5194 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5195 key.port[pd2.sidx] = th->th_sport; 5196 key.port[pd2.didx] = th->th_dport; 5197 5198 action = pf_find_state(&pd2, &key, state); 5199 if (action != PF_MATCH) 5200 return (action); 5201 5202 if (pd2.dir == (*state)->direction) { 5203 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5204 src = &(*state)->src; 5205 dst = &(*state)->dst; 5206 } else { 5207 src = &(*state)->dst; 5208 dst = &(*state)->src; 5209 } 5210 } else { 5211 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5212 src = &(*state)->dst; 5213 dst = &(*state)->src; 5214 } else { 5215 src = &(*state)->src; 5216 dst = &(*state)->dst; 5217 } 5218 } 5219 5220 if (src->wscale && dst->wscale) 5221 dws = dst->wscale & PF_WSCALE_MASK; 5222 else 5223 dws = 0; 5224 5225 /* Demodulate sequence number */ 5226 seq = ntohl(th->th_seq) - src->seqdiff; 5227 if (src->seqdiff) { 5228 pf_patch_32(pd, &th->th_seq, htonl(seq)); 5229 copyback = 1; 5230 } 5231 5232 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5233 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5234 src->seqlo - (dst->max_win << dws)))) { 5235 if (pf_status.debug >= LOG_NOTICE) { 5236 log(LOG_NOTICE, 5237 "pf: BAD ICMP %d:%d ", 5238 icmptype, pd->hdr.icmp.icmp_code); 5239 pf_print_host(pd->src, 0, pd->af); 5240 addlog(" -> "); 5241 pf_print_host(pd->dst, 0, pd->af); 5242 addlog(" state: "); 5243 pf_print_state(*state); 5244 addlog(" seq=%u\n", seq); 5245 } 5246 REASON_SET(reason, PFRES_BADSTATE); 5247 return (PF_DROP); 5248 } else { 5249 if (pf_status.debug >= LOG_DEBUG) { 5250 log(LOG_DEBUG, 5251 "pf: OK ICMP %d:%d ", 5252 icmptype, pd->hdr.icmp.icmp_code); 5253 pf_print_host(pd->src, 0, pd->af); 5254 addlog(" -> "); 5255 pf_print_host(pd->dst, 0, pd->af); 5256 addlog(" state: "); 5257 pf_print_state(*state); 5258 addlog(" seq=%u\n", seq); 5259 } 5260 } 5261 5262 /* translate source/destination address, if necessary */ 5263 if ((*state)->key[PF_SK_WIRE] != 5264 (*state)->key[PF_SK_STACK]) { 5265 struct pf_state_key *nk; 5266 int afto, sidx, didx; 5267 5268 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5269 nk = (*state)->key[pd->sidx]; 5270 else 5271 nk = (*state)->key[pd->didx]; 5272 5273 afto = pd->af != nk->af; 5274 sidx = afto ? pd2.didx : pd2.sidx; 5275 didx = afto ? pd2.sidx : pd2.didx; 5276 5277 #ifdef INET6 5278 if (afto) { 5279 if (pf_translate_icmp_af(pd, nk->af, 5280 &pd->hdr.icmp)) 5281 return (PF_DROP); 5282 m_copyback(pd->m, pd->off, 5283 sizeof(struct icmp6_hdr), 5284 &pd->hdr.icmp6, M_NOWAIT); 5285 if (pf_change_icmp_af(pd->m, ipoff2, 5286 pd, &pd2, &nk->addr[sidx], 5287 &nk->addr[didx], pd->af, nk->af)) 5288 return (PF_DROP); 5289 if (nk->af == AF_INET) 5290 pd->proto = IPPROTO_ICMP; 5291 else 5292 pd->proto = IPPROTO_ICMPV6; 5293 pd->m->m_pkthdr.ph_rtableid = 5294 nk->rdomain; 5295 pd->destchg = 1; 5296 pf_addrcpy(&pd->nsaddr, 5297 &nk->addr[pd2.sidx], nk->af); 5298 pf_addrcpy(&pd->ndaddr, 5299 &nk->addr[pd2.didx], nk->af); 5300 pd->naf = nk->af; 5301 5302 pf_patch_16(pd, 5303 &th->th_sport, nk->port[sidx]); 5304 pf_patch_16(pd, 5305 &th->th_dport, nk->port[didx]); 5306 5307 m_copyback(pd2.m, pd2.off, 8, th, 5308 M_NOWAIT); 5309 return (PF_AFRT); 5310 } 5311 #endif /* INET6 */ 5312 if (PF_ANEQ(pd2.src, 5313 &nk->addr[pd2.sidx], pd2.af) || 5314 nk->port[pd2.sidx] != th->th_sport) 5315 pf_translate_icmp(pd, pd2.src, 5316 &th->th_sport, pd->dst, 5317 &nk->addr[pd2.sidx], 5318 nk->port[pd2.sidx]); 5319 5320 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5321 pd2.af) || pd2.rdomain != nk->rdomain) 5322 pd->destchg = 1; 5323 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5324 5325 if (PF_ANEQ(pd2.dst, 5326 &nk->addr[pd2.didx], pd2.af) || 5327 nk->port[pd2.didx] != th->th_dport) 5328 pf_translate_icmp(pd, pd2.dst, 5329 &th->th_dport, pd->src, 5330 &nk->addr[pd2.didx], 5331 nk->port[pd2.didx]); 5332 copyback = 1; 5333 } 5334 5335 if (copyback) { 5336 switch (pd2.af) { 5337 case AF_INET: 5338 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5339 &pd->hdr.icmp, M_NOWAIT); 5340 m_copyback(pd2.m, ipoff2, sizeof(h2), 5341 &h2, M_NOWAIT); 5342 break; 5343 #ifdef INET6 5344 case AF_INET6: 5345 m_copyback(pd->m, pd->off, 5346 sizeof(struct icmp6_hdr), 5347 &pd->hdr.icmp6, M_NOWAIT); 5348 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5349 &h2_6, M_NOWAIT); 5350 break; 5351 #endif /* INET6 */ 5352 } 5353 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 5354 } 5355 break; 5356 } 5357 case IPPROTO_UDP: { 5358 struct udphdr *uh = &pd2.hdr.udp; 5359 int action; 5360 5361 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 5362 NULL, reason, pd2.af)) { 5363 DPFPRINTF(LOG_NOTICE, 5364 "ICMP error message too short (udp)"); 5365 return (PF_DROP); 5366 } 5367 5368 key.af = pd2.af; 5369 key.proto = IPPROTO_UDP; 5370 key.rdomain = pd2.rdomain; 5371 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5372 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5373 key.port[pd2.sidx] = uh->uh_sport; 5374 key.port[pd2.didx] = uh->uh_dport; 5375 5376 action = pf_find_state(&pd2, &key, state); 5377 if (action != PF_MATCH) 5378 return (action); 5379 5380 /* translate source/destination address, if necessary */ 5381 if ((*state)->key[PF_SK_WIRE] != 5382 (*state)->key[PF_SK_STACK]) { 5383 struct pf_state_key *nk; 5384 int afto, sidx, didx; 5385 5386 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5387 nk = (*state)->key[pd->sidx]; 5388 else 5389 nk = (*state)->key[pd->didx]; 5390 5391 afto = pd->af != nk->af; 5392 sidx = afto ? pd2.didx : pd2.sidx; 5393 didx = afto ? pd2.sidx : pd2.didx; 5394 5395 #ifdef INET6 5396 if (afto) { 5397 if (pf_translate_icmp_af(pd, nk->af, 5398 &pd->hdr.icmp)) 5399 return (PF_DROP); 5400 m_copyback(pd->m, pd->off, 5401 sizeof(struct icmp6_hdr), 5402 &pd->hdr.icmp6, M_NOWAIT); 5403 if (pf_change_icmp_af(pd->m, ipoff2, 5404 pd, &pd2, &nk->addr[sidx], 5405 &nk->addr[didx], pd->af, nk->af)) 5406 return (PF_DROP); 5407 if (nk->af == AF_INET) 5408 pd->proto = IPPROTO_ICMP; 5409 else 5410 pd->proto = IPPROTO_ICMPV6; 5411 pd->m->m_pkthdr.ph_rtableid = 5412 nk->rdomain; 5413 pd->destchg = 1; 5414 pf_addrcpy(&pd->nsaddr, 5415 &nk->addr[pd2.sidx], nk->af); 5416 pf_addrcpy(&pd->ndaddr, 5417 &nk->addr[pd2.didx], nk->af); 5418 pd->naf = nk->af; 5419 5420 pf_patch_16(pd, 5421 &uh->uh_sport, nk->port[sidx]); 5422 pf_patch_16(pd, 5423 &uh->uh_dport, nk->port[didx]); 5424 5425 m_copyback(pd2.m, pd2.off, sizeof(*uh), 5426 uh, M_NOWAIT); 5427 return (PF_AFRT); 5428 } 5429 #endif /* INET6 */ 5430 5431 if (PF_ANEQ(pd2.src, 5432 &nk->addr[pd2.sidx], pd2.af) || 5433 nk->port[pd2.sidx] != uh->uh_sport) 5434 pf_translate_icmp(pd, pd2.src, 5435 &uh->uh_sport, pd->dst, 5436 &nk->addr[pd2.sidx], 5437 nk->port[pd2.sidx]); 5438 5439 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5440 pd2.af) || pd2.rdomain != nk->rdomain) 5441 pd->destchg = 1; 5442 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5443 5444 if (PF_ANEQ(pd2.dst, 5445 &nk->addr[pd2.didx], pd2.af) || 5446 nk->port[pd2.didx] != uh->uh_dport) 5447 pf_translate_icmp(pd, pd2.dst, 5448 &uh->uh_dport, pd->src, 5449 &nk->addr[pd2.didx], 5450 nk->port[pd2.didx]); 5451 5452 switch (pd2.af) { 5453 case AF_INET: 5454 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5455 &pd->hdr.icmp, M_NOWAIT); 5456 m_copyback(pd2.m, ipoff2, sizeof(h2), 5457 &h2, M_NOWAIT); 5458 break; 5459 #ifdef INET6 5460 case AF_INET6: 5461 m_copyback(pd->m, pd->off, 5462 sizeof(struct icmp6_hdr), 5463 &pd->hdr.icmp6, M_NOWAIT); 5464 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5465 &h2_6, M_NOWAIT); 5466 break; 5467 #endif /* INET6 */ 5468 } 5469 /* Avoid recomputing quoted UDP checksum. 5470 * note: udp6 0 csum invalid per rfc2460 p27. 5471 * but presumed nothing cares in this context */ 5472 pf_patch_16(pd, &uh->uh_sum, 0); 5473 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 5474 M_NOWAIT); 5475 copyback = 1; 5476 } 5477 break; 5478 } 5479 case IPPROTO_ICMP: { 5480 struct icmp *iih = &pd2.hdr.icmp; 5481 5482 if (pd2.af != AF_INET) { 5483 REASON_SET(reason, PFRES_NORM); 5484 return (PF_DROP); 5485 } 5486 5487 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 5488 NULL, reason, pd2.af)) { 5489 DPFPRINTF(LOG_NOTICE, 5490 "ICMP error message too short (icmp)"); 5491 return (PF_DROP); 5492 } 5493 5494 pf_icmp_mapping(&pd2, iih->icmp_type, 5495 &icmp_dir, &virtual_id, &virtual_type); 5496 5497 ret = pf_icmp_state_lookup(&pd2, &key, state, 5498 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5499 if (ret >= 0) 5500 return (ret); 5501 5502 /* translate source/destination address, if necessary */ 5503 if ((*state)->key[PF_SK_WIRE] != 5504 (*state)->key[PF_SK_STACK]) { 5505 struct pf_state_key *nk; 5506 int afto, sidx, didx; 5507 5508 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5509 nk = (*state)->key[pd->sidx]; 5510 else 5511 nk = (*state)->key[pd->didx]; 5512 5513 afto = pd->af != nk->af; 5514 sidx = afto ? pd2.didx : pd2.sidx; 5515 didx = afto ? pd2.sidx : pd2.didx; 5516 iidx = afto ? !iidx : iidx; 5517 5518 #ifdef INET6 5519 if (afto) { 5520 if (nk->af != AF_INET6) 5521 return (PF_DROP); 5522 if (pf_translate_icmp_af(pd, nk->af, 5523 &pd->hdr.icmp)) 5524 return (PF_DROP); 5525 m_copyback(pd->m, pd->off, 5526 sizeof(struct icmp6_hdr), 5527 &pd->hdr.icmp6, M_NOWAIT); 5528 if (pf_change_icmp_af(pd->m, ipoff2, 5529 pd, &pd2, &nk->addr[sidx], 5530 &nk->addr[didx], pd->af, nk->af)) 5531 return (PF_DROP); 5532 pd->proto = IPPROTO_ICMPV6; 5533 if (pf_translate_icmp_af(pd, 5534 nk->af, iih)) 5535 return (PF_DROP); 5536 if (virtual_type == htons(ICMP_ECHO)) 5537 pf_patch_16(pd, &iih->icmp_id, 5538 nk->port[iidx]); 5539 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5540 iih, M_NOWAIT); 5541 pd->m->m_pkthdr.ph_rtableid = 5542 nk->rdomain; 5543 pd->destchg = 1; 5544 pf_addrcpy(&pd->nsaddr, 5545 &nk->addr[pd2.sidx], nk->af); 5546 pf_addrcpy(&pd->ndaddr, 5547 &nk->addr[pd2.didx], nk->af); 5548 pd->naf = nk->af; 5549 return (PF_AFRT); 5550 } 5551 #endif /* INET6 */ 5552 5553 if (PF_ANEQ(pd2.src, 5554 &nk->addr[pd2.sidx], pd2.af) || 5555 (virtual_type == htons(ICMP_ECHO) && 5556 nk->port[iidx] != iih->icmp_id)) 5557 pf_translate_icmp(pd, pd2.src, 5558 (virtual_type == htons(ICMP_ECHO)) ? 5559 &iih->icmp_id : NULL, 5560 pd->dst, &nk->addr[pd2.sidx], 5561 (virtual_type == htons(ICMP_ECHO)) ? 5562 nk->port[iidx] : 0); 5563 5564 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5565 pd2.af) || pd2.rdomain != nk->rdomain) 5566 pd->destchg = 1; 5567 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5568 5569 if (PF_ANEQ(pd2.dst, 5570 &nk->addr[pd2.didx], pd2.af)) 5571 pf_translate_icmp(pd, pd2.dst, NULL, 5572 pd->src, &nk->addr[pd2.didx], 0); 5573 5574 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5575 &pd->hdr.icmp, M_NOWAIT); 5576 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5577 M_NOWAIT); 5578 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 5579 M_NOWAIT); 5580 copyback = 1; 5581 } 5582 break; 5583 } 5584 #ifdef INET6 5585 case IPPROTO_ICMPV6: { 5586 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 5587 5588 if (pd2.af != AF_INET6) { 5589 REASON_SET(reason, PFRES_NORM); 5590 return (PF_DROP); 5591 } 5592 5593 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 5594 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5595 DPFPRINTF(LOG_NOTICE, 5596 "ICMP error message too short (icmp6)"); 5597 return (PF_DROP); 5598 } 5599 5600 pf_icmp_mapping(&pd2, iih->icmp6_type, 5601 &icmp_dir, &virtual_id, &virtual_type); 5602 ret = pf_icmp_state_lookup(&pd2, &key, state, 5603 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5604 /* IPv6? try matching a multicast address */ 5605 if (ret == PF_DROP && pd2.af == AF_INET6 && 5606 icmp_dir == PF_OUT) 5607 ret = pf_icmp_state_lookup(&pd2, &key, state, 5608 virtual_id, virtual_type, icmp_dir, &iidx, 5609 1, 1); 5610 if (ret >= 0) 5611 return (ret); 5612 5613 /* translate source/destination address, if necessary */ 5614 if ((*state)->key[PF_SK_WIRE] != 5615 (*state)->key[PF_SK_STACK]) { 5616 struct pf_state_key *nk; 5617 int afto, sidx, didx; 5618 5619 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5620 nk = (*state)->key[pd->sidx]; 5621 else 5622 nk = (*state)->key[pd->didx]; 5623 5624 afto = pd->af != nk->af; 5625 sidx = afto ? pd2.didx : pd2.sidx; 5626 didx = afto ? pd2.sidx : pd2.didx; 5627 iidx = afto ? !iidx : iidx; 5628 5629 if (afto) { 5630 if (nk->af != AF_INET) 5631 return (PF_DROP); 5632 if (pf_translate_icmp_af(pd, nk->af, 5633 &pd->hdr.icmp)) 5634 return (PF_DROP); 5635 m_copyback(pd->m, pd->off, 5636 sizeof(struct icmp6_hdr), 5637 &pd->hdr.icmp6, M_NOWAIT); 5638 if (pf_change_icmp_af(pd->m, ipoff2, 5639 pd, &pd2, &nk->addr[sidx], 5640 &nk->addr[didx], pd->af, nk->af)) 5641 return (PF_DROP); 5642 pd->proto = IPPROTO_ICMP; 5643 if (pf_translate_icmp_af(pd, 5644 nk->af, iih)) 5645 return (PF_DROP); 5646 if (virtual_type == 5647 htons(ICMP6_ECHO_REQUEST)) 5648 pf_patch_16(pd, &iih->icmp6_id, 5649 nk->port[iidx]); 5650 m_copyback(pd2.m, pd2.off, 5651 sizeof(struct icmp6_hdr), iih, 5652 M_NOWAIT); 5653 pd->m->m_pkthdr.ph_rtableid = 5654 nk->rdomain; 5655 pd->destchg = 1; 5656 pf_addrcpy(&pd->nsaddr, 5657 &nk->addr[pd2.sidx], nk->af); 5658 pf_addrcpy(&pd->ndaddr, 5659 &nk->addr[pd2.didx], nk->af); 5660 pd->naf = nk->af; 5661 return (PF_AFRT); 5662 } 5663 5664 if (PF_ANEQ(pd2.src, 5665 &nk->addr[pd2.sidx], pd2.af) || 5666 ((virtual_type == 5667 htons(ICMP6_ECHO_REQUEST)) && 5668 nk->port[pd2.sidx] != iih->icmp6_id)) 5669 pf_translate_icmp(pd, pd2.src, 5670 (virtual_type == 5671 htons(ICMP6_ECHO_REQUEST)) 5672 ? &iih->icmp6_id : NULL, 5673 pd->dst, &nk->addr[pd2.sidx], 5674 (virtual_type == 5675 htons(ICMP6_ECHO_REQUEST)) 5676 ? nk->port[iidx] : 0); 5677 5678 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5679 pd2.af) || pd2.rdomain != nk->rdomain) 5680 pd->destchg = 1; 5681 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5682 5683 if (PF_ANEQ(pd2.dst, 5684 &nk->addr[pd2.didx], pd2.af)) 5685 pf_translate_icmp(pd, pd2.dst, NULL, 5686 pd->src, &nk->addr[pd2.didx], 0); 5687 5688 m_copyback(pd->m, pd->off, 5689 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5690 M_NOWAIT); 5691 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5692 M_NOWAIT); 5693 m_copyback(pd2.m, pd2.off, 5694 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 5695 copyback = 1; 5696 } 5697 break; 5698 } 5699 #endif /* INET6 */ 5700 default: { 5701 int action; 5702 5703 key.af = pd2.af; 5704 key.proto = pd2.proto; 5705 key.rdomain = pd2.rdomain; 5706 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5707 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5708 key.port[0] = key.port[1] = 0; 5709 5710 action = pf_find_state(&pd2, &key, state); 5711 if (action != PF_MATCH) 5712 return (action); 5713 5714 /* translate source/destination address, if necessary */ 5715 if ((*state)->key[PF_SK_WIRE] != 5716 (*state)->key[PF_SK_STACK]) { 5717 struct pf_state_key *nk = 5718 (*state)->key[pd->didx]; 5719 5720 if (PF_ANEQ(pd2.src, 5721 &nk->addr[pd2.sidx], pd2.af)) 5722 pf_translate_icmp(pd, pd2.src, NULL, 5723 pd->dst, &nk->addr[pd2.sidx], 0); 5724 5725 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5726 pd2.af) || pd2.rdomain != nk->rdomain) 5727 pd->destchg = 1; 5728 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5729 5730 if (PF_ANEQ(pd2.dst, 5731 &nk->addr[pd2.didx], pd2.af)) 5732 pf_translate_icmp(pd, pd2.dst, NULL, 5733 pd->src, &nk->addr[pd2.didx], 0); 5734 5735 switch (pd2.af) { 5736 case AF_INET: 5737 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5738 &pd->hdr.icmp, M_NOWAIT); 5739 m_copyback(pd2.m, ipoff2, sizeof(h2), 5740 &h2, M_NOWAIT); 5741 break; 5742 #ifdef INET6 5743 case AF_INET6: 5744 m_copyback(pd->m, pd->off, 5745 sizeof(struct icmp6_hdr), 5746 &pd->hdr.icmp6, M_NOWAIT); 5747 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5748 &h2_6, M_NOWAIT); 5749 break; 5750 #endif /* INET6 */ 5751 } 5752 copyback = 1; 5753 } 5754 break; 5755 } 5756 } 5757 } 5758 if (copyback) { 5759 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5760 } 5761 5762 return (PF_PASS); 5763 } 5764 5765 /* 5766 * ipoff and off are measured from the start of the mbuf chain. 5767 * h must be at "ipoff" on the mbuf chain. 5768 */ 5769 void * 5770 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5771 u_short *actionp, u_short *reasonp, sa_family_t af) 5772 { 5773 int iplen = 0; 5774 5775 switch (af) { 5776 case AF_INET: { 5777 struct ip *h = mtod(m, struct ip *); 5778 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5779 5780 if (fragoff) { 5781 if (fragoff >= len) 5782 ACTION_SET(actionp, PF_PASS); 5783 else { 5784 ACTION_SET(actionp, PF_DROP); 5785 REASON_SET(reasonp, PFRES_FRAG); 5786 } 5787 return (NULL); 5788 } 5789 iplen = ntohs(h->ip_len); 5790 break; 5791 } 5792 #ifdef INET6 5793 case AF_INET6: { 5794 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5795 5796 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5797 break; 5798 } 5799 #endif /* INET6 */ 5800 } 5801 if (m->m_pkthdr.len < off + len || iplen < off + len) { 5802 ACTION_SET(actionp, PF_DROP); 5803 REASON_SET(reasonp, PFRES_SHORT); 5804 return (NULL); 5805 } 5806 m_copydata(m, off, len, p); 5807 return (p); 5808 } 5809 5810 int 5811 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5812 int rtableid) 5813 { 5814 struct sockaddr_storage ss; 5815 struct sockaddr_in *dst; 5816 int ret = 1; 5817 int check_mpath; 5818 #ifdef INET6 5819 struct sockaddr_in6 *dst6; 5820 #endif /* INET6 */ 5821 struct rtentry *rt = NULL; 5822 5823 check_mpath = 0; 5824 memset(&ss, 0, sizeof(ss)); 5825 switch (af) { 5826 case AF_INET: 5827 dst = (struct sockaddr_in *)&ss; 5828 dst->sin_family = AF_INET; 5829 dst->sin_len = sizeof(*dst); 5830 dst->sin_addr = addr->v4; 5831 if (ipmultipath) 5832 check_mpath = 1; 5833 break; 5834 #ifdef INET6 5835 case AF_INET6: 5836 /* 5837 * Skip check for addresses with embedded interface scope, 5838 * as they would always match anyway. 5839 */ 5840 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5841 goto out; 5842 dst6 = (struct sockaddr_in6 *)&ss; 5843 dst6->sin6_family = AF_INET6; 5844 dst6->sin6_len = sizeof(*dst6); 5845 dst6->sin6_addr = addr->v6; 5846 if (ip6_multipath) 5847 check_mpath = 1; 5848 break; 5849 #endif /* INET6 */ 5850 } 5851 5852 /* Skip checks for ipsec interfaces */ 5853 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5854 goto out; 5855 5856 rt = rtalloc(sstosa(&ss), 0, rtableid); 5857 if (rt != NULL) { 5858 /* No interface given, this is a no-route check */ 5859 if (kif == NULL) 5860 goto out; 5861 5862 if (kif->pfik_ifp == NULL) { 5863 ret = 0; 5864 goto out; 5865 } 5866 5867 /* Perform uRPF check if passed input interface */ 5868 ret = 0; 5869 do { 5870 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 5871 ret = 1; 5872 #if NCARP > 0 5873 } else { 5874 struct ifnet *ifp; 5875 5876 ifp = if_get(rt->rt_ifidx); 5877 if (ifp != NULL && ifp->if_type == IFT_CARP && 5878 ifp->if_carpdev == kif->pfik_ifp) 5879 ret = 1; 5880 if_put(ifp); 5881 #endif /* NCARP */ 5882 } 5883 5884 rt = rtable_iterate(rt); 5885 } while (check_mpath == 1 && rt != NULL && ret == 0); 5886 } else 5887 ret = 0; 5888 out: 5889 rtfree(rt); 5890 return (ret); 5891 } 5892 5893 int 5894 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 5895 int rtableid) 5896 { 5897 struct sockaddr_storage ss; 5898 struct sockaddr_in *dst; 5899 #ifdef INET6 5900 struct sockaddr_in6 *dst6; 5901 #endif /* INET6 */ 5902 struct rtentry *rt; 5903 int ret = 0; 5904 5905 memset(&ss, 0, sizeof(ss)); 5906 switch (af) { 5907 case AF_INET: 5908 dst = (struct sockaddr_in *)&ss; 5909 dst->sin_family = AF_INET; 5910 dst->sin_len = sizeof(*dst); 5911 dst->sin_addr = addr->v4; 5912 break; 5913 #ifdef INET6 5914 case AF_INET6: 5915 dst6 = (struct sockaddr_in6 *)&ss; 5916 dst6->sin6_family = AF_INET6; 5917 dst6->sin6_len = sizeof(*dst6); 5918 dst6->sin6_addr = addr->v6; 5919 break; 5920 #endif /* INET6 */ 5921 } 5922 5923 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 5924 if (rt != NULL) { 5925 if (rt->rt_labelid == aw->v.rtlabel) 5926 ret = 1; 5927 rtfree(rt); 5928 } 5929 5930 return (ret); 5931 } 5932 5933 /* pf_route() may change pd->m, adjust local copies after calling */ 5934 void 5935 pf_route(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s) 5936 { 5937 struct mbuf *m0, *m1; 5938 struct sockaddr_in *dst, sin; 5939 struct rtentry *rt = NULL; 5940 struct ip *ip; 5941 struct ifnet *ifp = NULL; 5942 struct pf_addr naddr; 5943 struct pf_src_node *sns[PF_SN_MAX]; 5944 int error = 0; 5945 unsigned int rtableid; 5946 5947 if (pd->m->m_pkthdr.pf.routed++ > 3) { 5948 m_freem(pd->m); 5949 pd->m = NULL; 5950 return; 5951 } 5952 5953 if (r->rt == PF_DUPTO) { 5954 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 5955 return; 5956 } else { 5957 if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir)) 5958 return; 5959 m0 = pd->m; 5960 } 5961 5962 if (m0->m_len < sizeof(struct ip)) { 5963 DPFPRINTF(LOG_ERR, 5964 "%s: m0->m_len < sizeof(struct ip)", __func__); 5965 goto bad; 5966 } 5967 5968 ip = mtod(m0, struct ip *); 5969 5970 memset(&sin, 0, sizeof(sin)); 5971 dst = &sin; 5972 dst->sin_family = AF_INET; 5973 dst->sin_len = sizeof(*dst); 5974 dst->sin_addr = ip->ip_dst; 5975 rtableid = m0->m_pkthdr.ph_rtableid; 5976 5977 if (pd->dir == PF_IN) { 5978 if (ip->ip_ttl <= IPTTLDEC) { 5979 if (r->rt != PF_DUPTO) 5980 pf_send_icmp(m0, ICMP_TIMXCEED, 5981 ICMP_TIMXCEED_INTRANS, 0, 5982 pd->af, r, pd->rdomain); 5983 goto bad; 5984 } 5985 ip->ip_ttl -= IPTTLDEC; 5986 } 5987 5988 if (s == NULL) { 5989 memset(sns, 0, sizeof(sns)); 5990 if (pf_map_addr(AF_INET, r, 5991 (struct pf_addr *)&ip->ip_src, 5992 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 5993 DPFPRINTF(LOG_ERR, 5994 "%s: pf_map_addr() failed", __func__); 5995 goto bad; 5996 } 5997 5998 if (!PF_AZERO(&naddr, AF_INET)) 5999 dst->sin_addr.s_addr = naddr.v4.s_addr; 6000 ifp = r->route.kif ? 6001 r->route.kif->pfik_ifp : NULL; 6002 } else { 6003 if (!PF_AZERO(&s->rt_addr, AF_INET)) 6004 dst->sin_addr.s_addr = 6005 s->rt_addr.v4.s_addr; 6006 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6007 } 6008 if (ifp == NULL) 6009 goto bad; 6010 6011 if (pd->kif->pfik_ifp != ifp) { 6012 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 6013 goto bad; 6014 else if (m0 == NULL) 6015 goto done; 6016 if (m0->m_len < sizeof(struct ip)) { 6017 DPFPRINTF(LOG_ERR, 6018 "%s: m0->m_len < sizeof(struct ip)", __func__); 6019 goto bad; 6020 } 6021 ip = mtod(m0, struct ip *); 6022 } 6023 6024 rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); 6025 if (!rtisvalid(rt)) { 6026 ipstat_inc(ips_noroute); 6027 goto bad; 6028 } 6029 /* A locally generated packet may have invalid source address. */ 6030 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 6031 (ifp->if_flags & IFF_LOOPBACK) == 0) 6032 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 6033 6034 in_proto_cksum_out(m0, ifp); 6035 6036 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 6037 ip->ip_sum = 0; 6038 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) 6039 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 6040 else { 6041 ipstat_inc(ips_outswcsum); 6042 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6043 } 6044 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6045 goto done; 6046 } 6047 6048 /* 6049 * Too large for interface; fragment if possible. 6050 * Must be able to put at least 8 bytes per fragment. 6051 */ 6052 if (ip->ip_off & htons(IP_DF)) { 6053 ipstat_inc(ips_cantfrag); 6054 if (r->rt != PF_DUPTO) 6055 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 6056 ifp->if_mtu, pd->af, r, pd->rdomain); 6057 goto bad; 6058 } 6059 6060 m1 = m0; 6061 error = ip_fragment(m0, ifp, ifp->if_mtu); 6062 if (error) { 6063 m0 = NULL; 6064 goto bad; 6065 } 6066 6067 for (m0 = m1; m0; m0 = m1) { 6068 m1 = m0->m_nextpkt; 6069 m0->m_nextpkt = 0; 6070 if (error == 0) 6071 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6072 else 6073 m_freem(m0); 6074 } 6075 6076 if (error == 0) 6077 ipstat_inc(ips_fragmented); 6078 6079 done: 6080 if (r->rt != PF_DUPTO) 6081 pd->m = NULL; 6082 rtfree(rt); 6083 return; 6084 6085 bad: 6086 m_freem(m0); 6087 goto done; 6088 } 6089 6090 #ifdef INET6 6091 /* pf_route6() may change pd->m, adjust local copies after calling */ 6092 void 6093 pf_route6(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s) 6094 { 6095 struct mbuf *m0; 6096 struct sockaddr_in6 *dst, sin6; 6097 struct rtentry *rt = NULL; 6098 struct ip6_hdr *ip6; 6099 struct ifnet *ifp = NULL; 6100 struct pf_addr naddr; 6101 struct pf_src_node *sns[PF_SN_MAX]; 6102 struct m_tag *mtag; 6103 unsigned int rtableid; 6104 6105 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6106 m_freem(pd->m); 6107 pd->m = NULL; 6108 return; 6109 } 6110 6111 if (r->rt == PF_DUPTO) { 6112 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6113 return; 6114 } else { 6115 if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir)) 6116 return; 6117 m0 = pd->m; 6118 } 6119 6120 if (m0->m_len < sizeof(struct ip6_hdr)) { 6121 DPFPRINTF(LOG_ERR, 6122 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6123 goto bad; 6124 } 6125 ip6 = mtod(m0, struct ip6_hdr *); 6126 6127 memset(&sin6, 0, sizeof(sin6)); 6128 dst = &sin6; 6129 dst->sin6_family = AF_INET6; 6130 dst->sin6_len = sizeof(*dst); 6131 dst->sin6_addr = ip6->ip6_dst; 6132 rtableid = m0->m_pkthdr.ph_rtableid; 6133 6134 if (pd->dir == PF_IN) { 6135 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 6136 if (r->rt != PF_DUPTO) 6137 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 6138 ICMP6_TIME_EXCEED_TRANSIT, 0, 6139 pd->af, r, pd->rdomain); 6140 goto bad; 6141 } 6142 ip6->ip6_hlim -= IPV6_HLIMDEC; 6143 } 6144 6145 if (s == NULL) { 6146 memset(sns, 0, sizeof(sns)); 6147 if (pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 6148 &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { 6149 DPFPRINTF(LOG_ERR, 6150 "%s: pf_map_addr() failed", __func__); 6151 goto bad; 6152 } 6153 if (!PF_AZERO(&naddr, AF_INET6)) 6154 pf_addrcpy((struct pf_addr *)&dst->sin6_addr, 6155 &naddr, AF_INET6); 6156 ifp = r->route.kif ? r->route.kif->pfik_ifp : NULL; 6157 } else { 6158 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 6159 pf_addrcpy((struct pf_addr *)&dst->sin6_addr, 6160 &s->rt_addr, AF_INET6); 6161 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6162 } 6163 if (ifp == NULL) 6164 goto bad; 6165 6166 if (pd->kif->pfik_ifp != ifp) { 6167 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6168 goto bad; 6169 else if (m0 == NULL) 6170 goto done; 6171 if (m0->m_len < sizeof(struct ip6_hdr)) { 6172 DPFPRINTF(LOG_ERR, 6173 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6174 goto bad; 6175 } 6176 } 6177 6178 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 6179 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 6180 rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid); 6181 if (!rtisvalid(rt)) { 6182 ip6stat_inc(ip6s_noroute); 6183 goto bad; 6184 } 6185 /* A locally generated packet may have invalid source address. */ 6186 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 6187 (ifp->if_flags & IFF_LOOPBACK) == 0) 6188 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 6189 6190 in6_proto_cksum_out(m0, ifp); 6191 6192 /* 6193 * If packet has been reassembled by PF earlier, we have to 6194 * use pf_refragment6() here to turn it back to fragments. 6195 */ 6196 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6197 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 6198 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6199 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 6200 } else { 6201 ip6stat_inc(ip6s_cantfrag); 6202 if (r->rt != PF_DUPTO) 6203 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 6204 ifp->if_mtu, pd->af, r, pd->rdomain); 6205 goto bad; 6206 } 6207 6208 done: 6209 if (r->rt != PF_DUPTO) 6210 pd->m = NULL; 6211 rtfree(rt); 6212 return; 6213 6214 bad: 6215 m_freem(m0); 6216 goto done; 6217 } 6218 #endif /* INET6 */ 6219 6220 6221 /* 6222 * check TCP checksum and set mbuf flag 6223 * off is the offset where the protocol header starts 6224 * len is the total length of protocol header plus payload 6225 * returns 0 when the checksum is valid, otherwise returns 1. 6226 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6227 */ 6228 int 6229 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6230 { 6231 u_int16_t sum; 6232 6233 if (m->m_pkthdr.csum_flags & 6234 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6235 return (0); 6236 } 6237 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6238 off < sizeof(struct ip) || 6239 m->m_pkthdr.len < off + len) { 6240 return (1); 6241 } 6242 6243 /* need to do it in software */ 6244 tcpstat_inc(tcps_inswcsum); 6245 6246 switch (af) { 6247 case AF_INET: 6248 if (m->m_len < sizeof(struct ip)) 6249 return (1); 6250 6251 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6252 break; 6253 #ifdef INET6 6254 case AF_INET6: 6255 if (m->m_len < sizeof(struct ip6_hdr)) 6256 return (1); 6257 6258 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6259 break; 6260 #endif /* INET6 */ 6261 default: 6262 unhandled_af(af); 6263 } 6264 if (sum) { 6265 tcpstat_inc(tcps_rcvbadsum); 6266 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6267 return (1); 6268 } 6269 6270 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6271 return (0); 6272 } 6273 6274 struct pf_divert * 6275 pf_find_divert(struct mbuf *m) 6276 { 6277 struct m_tag *mtag; 6278 6279 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6280 return (NULL); 6281 6282 return ((struct pf_divert *)(mtag + 1)); 6283 } 6284 6285 struct pf_divert * 6286 pf_get_divert(struct mbuf *m) 6287 { 6288 struct m_tag *mtag; 6289 6290 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6291 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6292 M_NOWAIT); 6293 if (mtag == NULL) 6294 return (NULL); 6295 memset(mtag + 1, 0, sizeof(struct pf_divert)); 6296 m_tag_prepend(m, mtag); 6297 } 6298 6299 return ((struct pf_divert *)(mtag + 1)); 6300 } 6301 6302 int 6303 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 6304 { 6305 struct ip6_ext ext; 6306 u_int32_t hlen, end; 6307 int hdr_cnt; 6308 6309 hlen = h->ip_hl << 2; 6310 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 6311 REASON_SET(reason, PFRES_SHORT); 6312 return (PF_DROP); 6313 } 6314 if (hlen != sizeof(struct ip)) 6315 pd->badopts++; 6316 end = pd->off + ntohs(h->ip_len); 6317 pd->off += hlen; 6318 pd->proto = h->ip_p; 6319 /* stop walking over non initial fragments */ 6320 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 6321 return (PF_PASS); 6322 6323 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6324 switch (pd->proto) { 6325 case IPPROTO_AH: 6326 /* fragments may be short */ 6327 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 6328 end < pd->off + sizeof(ext)) 6329 return (PF_PASS); 6330 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6331 NULL, reason, AF_INET)) { 6332 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 6333 return (PF_DROP); 6334 } 6335 pd->off += (ext.ip6e_len + 2) * 4; 6336 pd->proto = ext.ip6e_nxt; 6337 break; 6338 default: 6339 return (PF_PASS); 6340 } 6341 } 6342 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 6343 REASON_SET(reason, PFRES_IPOPTIONS); 6344 return (PF_DROP); 6345 } 6346 6347 #ifdef INET6 6348 int 6349 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6350 u_short *reason) 6351 { 6352 struct ip6_opt opt; 6353 struct ip6_opt_jumbo jumbo; 6354 6355 while (off < end) { 6356 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6357 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6358 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6359 return (PF_DROP); 6360 } 6361 if (opt.ip6o_type == IP6OPT_PAD1) { 6362 off++; 6363 continue; 6364 } 6365 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6366 NULL, reason, AF_INET6)) { 6367 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6368 return (PF_DROP); 6369 } 6370 if (off + sizeof(opt) + opt.ip6o_len > end) { 6371 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6372 REASON_SET(reason, PFRES_IPOPTIONS); 6373 return (PF_DROP); 6374 } 6375 switch (opt.ip6o_type) { 6376 case IP6OPT_JUMBO: 6377 if (pd->jumbolen != 0) { 6378 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6379 REASON_SET(reason, PFRES_IPOPTIONS); 6380 return (PF_DROP); 6381 } 6382 if (ntohs(h->ip6_plen) != 0) { 6383 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6384 REASON_SET(reason, PFRES_IPOPTIONS); 6385 return (PF_DROP); 6386 } 6387 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6388 NULL, reason, AF_INET6)) { 6389 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6390 return (PF_DROP); 6391 } 6392 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6393 sizeof(pd->jumbolen)); 6394 pd->jumbolen = ntohl(pd->jumbolen); 6395 if (pd->jumbolen < IPV6_MAXPACKET) { 6396 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6397 REASON_SET(reason, PFRES_IPOPTIONS); 6398 return (PF_DROP); 6399 } 6400 break; 6401 default: 6402 break; 6403 } 6404 off += sizeof(opt) + opt.ip6o_len; 6405 } 6406 6407 return (PF_PASS); 6408 } 6409 6410 int 6411 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6412 { 6413 struct ip6_frag frag; 6414 struct ip6_ext ext; 6415 struct ip6_rthdr rthdr; 6416 u_int32_t end; 6417 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 6418 6419 pd->off += sizeof(struct ip6_hdr); 6420 end = pd->off + ntohs(h->ip6_plen); 6421 pd->fragoff = pd->extoff = pd->jumbolen = 0; 6422 pd->proto = h->ip6_nxt; 6423 6424 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6425 switch (pd->proto) { 6426 case IPPROTO_ROUTING: 6427 case IPPROTO_HOPOPTS: 6428 case IPPROTO_DSTOPTS: 6429 pd->badopts++; 6430 break; 6431 } 6432 switch (pd->proto) { 6433 case IPPROTO_FRAGMENT: 6434 if (fraghdr_cnt++) { 6435 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 6436 REASON_SET(reason, PFRES_FRAG); 6437 return (PF_DROP); 6438 } 6439 /* jumbo payload packets cannot be fragmented */ 6440 if (pd->jumbolen != 0) { 6441 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6442 REASON_SET(reason, PFRES_FRAG); 6443 return (PF_DROP); 6444 } 6445 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6446 NULL, reason, AF_INET6)) { 6447 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6448 return (PF_DROP); 6449 } 6450 /* stop walking over non initial fragments */ 6451 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 6452 pd->fragoff = pd->off; 6453 return (PF_PASS); 6454 } 6455 /* RFC6946: reassemble only non atomic fragments */ 6456 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 6457 pd->fragoff = pd->off; 6458 pd->off += sizeof(frag); 6459 pd->proto = frag.ip6f_nxt; 6460 break; 6461 case IPPROTO_ROUTING: 6462 if (rthdr_cnt++) { 6463 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6464 REASON_SET(reason, PFRES_IPOPTIONS); 6465 return (PF_DROP); 6466 } 6467 /* fragments may be short */ 6468 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6469 pd->off = pd->fragoff; 6470 pd->proto = IPPROTO_FRAGMENT; 6471 return (PF_PASS); 6472 } 6473 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6474 NULL, reason, AF_INET6)) { 6475 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6476 return (PF_DROP); 6477 } 6478 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6479 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6480 REASON_SET(reason, PFRES_IPOPTIONS); 6481 return (PF_DROP); 6482 } 6483 /* FALLTHROUGH */ 6484 case IPPROTO_HOPOPTS: 6485 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 6486 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 6487 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 6488 REASON_SET(reason, PFRES_IPOPTIONS); 6489 return (PF_DROP); 6490 } 6491 /* FALLTHROUGH */ 6492 case IPPROTO_AH: 6493 case IPPROTO_DSTOPTS: 6494 /* fragments may be short */ 6495 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6496 pd->off = pd->fragoff; 6497 pd->proto = IPPROTO_FRAGMENT; 6498 return (PF_PASS); 6499 } 6500 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6501 NULL, reason, AF_INET6)) { 6502 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6503 return (PF_DROP); 6504 } 6505 /* reassembly needs the ext header before the frag */ 6506 if (pd->fragoff == 0) 6507 pd->extoff = pd->off; 6508 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { 6509 if (pf_walk_option6(pd, h, 6510 pd->off + sizeof(ext), 6511 pd->off + (ext.ip6e_len + 1) * 8, reason) 6512 != PF_PASS) 6513 return (PF_DROP); 6514 if (ntohs(h->ip6_plen) == 0 && 6515 pd->jumbolen != 0) { 6516 DPFPRINTF(LOG_NOTICE, 6517 "IPv6 missing jumbo"); 6518 REASON_SET(reason, PFRES_IPOPTIONS); 6519 return (PF_DROP); 6520 } 6521 } 6522 if (pd->proto == IPPROTO_AH) 6523 pd->off += (ext.ip6e_len + 2) * 4; 6524 else 6525 pd->off += (ext.ip6e_len + 1) * 8; 6526 pd->proto = ext.ip6e_nxt; 6527 break; 6528 case IPPROTO_TCP: 6529 case IPPROTO_UDP: 6530 case IPPROTO_ICMPV6: 6531 /* fragments may be short, ignore inner header then */ 6532 if (pd->fragoff != 0 && end < pd->off + 6533 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6534 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6535 sizeof(struct icmp6_hdr))) { 6536 pd->off = pd->fragoff; 6537 pd->proto = IPPROTO_FRAGMENT; 6538 } 6539 /* FALLTHROUGH */ 6540 default: 6541 return (PF_PASS); 6542 } 6543 } 6544 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 6545 REASON_SET(reason, PFRES_IPOPTIONS); 6546 return (PF_DROP); 6547 } 6548 #endif /* INET6 */ 6549 6550 int 6551 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 6552 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6553 { 6554 memset(pd, 0, sizeof(*pd)); 6555 pd->dir = dir; 6556 pd->kif = kif; /* kif is NULL when called by pflog */ 6557 pd->m = m; 6558 pd->sidx = (dir == PF_IN) ? 0 : 1; 6559 pd->didx = (dir == PF_IN) ? 1 : 0; 6560 pd->af = pd->naf = af; 6561 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 6562 6563 switch (pd->af) { 6564 case AF_INET: { 6565 struct ip *h; 6566 6567 /* Check for illegal packets */ 6568 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6569 REASON_SET(reason, PFRES_SHORT); 6570 return (PF_DROP); 6571 } 6572 6573 h = mtod(pd->m, struct ip *); 6574 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6575 REASON_SET(reason, PFRES_SHORT); 6576 return (PF_DROP); 6577 } 6578 6579 if (pf_walk_header(pd, h, reason) != PF_PASS) 6580 return (PF_DROP); 6581 6582 pd->src = (struct pf_addr *)&h->ip_src; 6583 pd->dst = (struct pf_addr *)&h->ip_dst; 6584 pd->tot_len = ntohs(h->ip_len); 6585 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6586 pd->ttl = h->ip_ttl; 6587 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 6588 PF_VPROTO_FRAGMENT : pd->proto; 6589 6590 break; 6591 } 6592 #ifdef INET6 6593 case AF_INET6: { 6594 struct ip6_hdr *h; 6595 6596 /* Check for illegal packets */ 6597 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6598 REASON_SET(reason, PFRES_SHORT); 6599 return (PF_DROP); 6600 } 6601 6602 h = mtod(pd->m, struct ip6_hdr *); 6603 if (pd->m->m_pkthdr.len < 6604 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6605 REASON_SET(reason, PFRES_SHORT); 6606 return (PF_DROP); 6607 } 6608 6609 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6610 return (PF_DROP); 6611 6612 #if 1 6613 /* 6614 * we do not support jumbogram yet. if we keep going, zero 6615 * ip6_plen will do something bad, so drop the packet for now. 6616 */ 6617 if (pd->jumbolen != 0) { 6618 REASON_SET(reason, PFRES_NORM); 6619 return (PF_DROP); 6620 } 6621 #endif /* 1 */ 6622 6623 pd->src = (struct pf_addr *)&h->ip6_src; 6624 pd->dst = (struct pf_addr *)&h->ip6_dst; 6625 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6626 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 6627 pd->ttl = h->ip6_hlim; 6628 pd->virtual_proto = (pd->fragoff != 0) ? 6629 PF_VPROTO_FRAGMENT : pd->proto; 6630 6631 break; 6632 } 6633 #endif /* INET6 */ 6634 default: 6635 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6636 6637 } 6638 6639 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6640 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6641 6642 switch (pd->virtual_proto) { 6643 case IPPROTO_TCP: { 6644 struct tcphdr *th = &pd->hdr.tcp; 6645 6646 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6647 NULL, reason, pd->af)) 6648 return (PF_DROP); 6649 pd->hdrlen = sizeof(*th); 6650 if (pd->off + (th->th_off << 2) > pd->tot_len || 6651 (th->th_off << 2) < sizeof(struct tcphdr)) { 6652 REASON_SET(reason, PFRES_SHORT); 6653 return (PF_DROP); 6654 } 6655 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6656 pd->sport = &th->th_sport; 6657 pd->dport = &th->th_dport; 6658 pd->pcksum = &th->th_sum; 6659 break; 6660 } 6661 case IPPROTO_UDP: { 6662 struct udphdr *uh = &pd->hdr.udp; 6663 6664 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6665 NULL, reason, pd->af)) 6666 return (PF_DROP); 6667 pd->hdrlen = sizeof(*uh); 6668 if (uh->uh_dport == 0 || 6669 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6670 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6671 REASON_SET(reason, PFRES_SHORT); 6672 return (PF_DROP); 6673 } 6674 pd->sport = &uh->uh_sport; 6675 pd->dport = &uh->uh_dport; 6676 pd->pcksum = &uh->uh_sum; 6677 break; 6678 } 6679 case IPPROTO_ICMP: { 6680 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 6681 NULL, reason, pd->af)) 6682 return (PF_DROP); 6683 pd->hdrlen = ICMP_MINLEN; 6684 if (pd->off + pd->hdrlen > pd->tot_len) { 6685 REASON_SET(reason, PFRES_SHORT); 6686 return (PF_DROP); 6687 } 6688 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 6689 break; 6690 } 6691 #ifdef INET6 6692 case IPPROTO_ICMPV6: { 6693 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6694 6695 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6696 NULL, reason, pd->af)) 6697 return (PF_DROP); 6698 /* ICMP headers we look further into to match state */ 6699 switch (pd->hdr.icmp6.icmp6_type) { 6700 case MLD_LISTENER_QUERY: 6701 case MLD_LISTENER_REPORT: 6702 icmp_hlen = sizeof(struct mld_hdr); 6703 break; 6704 case ND_NEIGHBOR_SOLICIT: 6705 case ND_NEIGHBOR_ADVERT: 6706 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6707 /* FALLTHROUGH */ 6708 case ND_ROUTER_SOLICIT: 6709 case ND_ROUTER_ADVERT: 6710 case ND_REDIRECT: 6711 if (pd->ttl != 255) { 6712 REASON_SET(reason, PFRES_NORM); 6713 return (PF_DROP); 6714 } 6715 break; 6716 } 6717 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6718 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6719 NULL, reason, pd->af)) 6720 return (PF_DROP); 6721 pd->hdrlen = icmp_hlen; 6722 if (pd->off + pd->hdrlen > pd->tot_len) { 6723 REASON_SET(reason, PFRES_SHORT); 6724 return (PF_DROP); 6725 } 6726 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 6727 break; 6728 } 6729 #endif /* INET6 */ 6730 } 6731 6732 if (pd->sport) 6733 pd->osport = pd->nsport = *pd->sport; 6734 if (pd->dport) 6735 pd->odport = pd->ndport = *pd->dport; 6736 6737 return (PF_PASS); 6738 } 6739 6740 void 6741 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6742 struct pf_rule *r, struct pf_rule *a) 6743 { 6744 int dirndx; 6745 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6746 [action != PF_PASS] += pd->tot_len; 6747 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6748 [action != PF_PASS]++; 6749 6750 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6751 dirndx = (pd->dir == PF_OUT); 6752 r->packets[dirndx]++; 6753 r->bytes[dirndx] += pd->tot_len; 6754 if (a != NULL) { 6755 a->packets[dirndx]++; 6756 a->bytes[dirndx] += pd->tot_len; 6757 } 6758 if (s != NULL) { 6759 struct pf_rule_item *ri; 6760 struct pf_sn_item *sni; 6761 6762 SLIST_FOREACH(sni, &s->src_nodes, next) { 6763 sni->sn->packets[dirndx]++; 6764 sni->sn->bytes[dirndx] += pd->tot_len; 6765 } 6766 dirndx = (pd->dir == s->direction) ? 0 : 1; 6767 s->packets[dirndx]++; 6768 s->bytes[dirndx] += pd->tot_len; 6769 6770 SLIST_FOREACH(ri, &s->match_rules, entry) { 6771 ri->r->packets[dirndx]++; 6772 ri->r->bytes[dirndx] += pd->tot_len; 6773 6774 if (ri->r->src.addr.type == PF_ADDR_TABLE) 6775 pfr_update_stats(ri->r->src.addr.p.tbl, 6776 &s->key[(s->direction == PF_IN)]-> 6777 addr[(s->direction == PF_OUT)], 6778 pd, ri->r->action, ri->r->src.neg); 6779 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 6780 pfr_update_stats(ri->r->dst.addr.p.tbl, 6781 &s->key[(s->direction == PF_IN)]-> 6782 addr[(s->direction == PF_IN)], 6783 pd, ri->r->action, ri->r->dst.neg); 6784 } 6785 } 6786 if (r->src.addr.type == PF_ADDR_TABLE) 6787 pfr_update_stats(r->src.addr.p.tbl, 6788 (s == NULL) ? pd->src : 6789 &s->key[(s->direction == PF_IN)]-> 6790 addr[(s->direction == PF_OUT)], 6791 pd, r->action, r->src.neg); 6792 if (r->dst.addr.type == PF_ADDR_TABLE) 6793 pfr_update_stats(r->dst.addr.p.tbl, 6794 (s == NULL) ? pd->dst : 6795 &s->key[(s->direction == PF_IN)]-> 6796 addr[(s->direction == PF_IN)], 6797 pd, r->action, r->dst.neg); 6798 } 6799 } 6800 6801 int 6802 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 6803 { 6804 struct pfi_kif *kif; 6805 u_short action, reason = 0; 6806 struct pf_rule *a = NULL, *r = &pf_default_rule; 6807 struct pf_state *s = NULL; 6808 struct pf_ruleset *ruleset = NULL; 6809 struct pf_pdesc pd; 6810 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6811 u_int32_t qid, pqid = 0; 6812 int have_pf_lock = 0; 6813 6814 if (!pf_status.running) 6815 return (PF_PASS); 6816 6817 #if NCARP > 0 6818 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6819 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6820 else 6821 #endif /* NCARP */ 6822 kif = (struct pfi_kif *)ifp->if_pf_kif; 6823 6824 if (kif == NULL) { 6825 DPFPRINTF(LOG_ERR, 6826 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 6827 return (PF_DROP); 6828 } 6829 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6830 return (PF_PASS); 6831 6832 #ifdef DIAGNOSTIC 6833 if (((*m0)->m_flags & M_PKTHDR) == 0) 6834 panic("non-M_PKTHDR is passed to pf_test"); 6835 #endif /* DIAGNOSTIC */ 6836 6837 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 6838 return (PF_PASS); 6839 6840 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) 6841 return (PF_PASS); 6842 6843 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 6844 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 6845 return (PF_PASS); 6846 } 6847 6848 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 6849 if (action != PF_PASS) { 6850 #if NPFLOG > 0 6851 pd.pflog |= PF_LOG_FORCE; 6852 #endif /* NPFLOG > 0 */ 6853 goto done; 6854 } 6855 6856 /* packet normalization and reassembly */ 6857 switch (pd.af) { 6858 case AF_INET: 6859 action = pf_normalize_ip(&pd, &reason); 6860 break; 6861 #ifdef INET6 6862 case AF_INET6: 6863 action = pf_normalize_ip6(&pd, &reason); 6864 break; 6865 #endif /* INET6 */ 6866 } 6867 *m0 = pd.m; 6868 /* if packet sits in reassembly queue, return without error */ 6869 if (pd.m == NULL) 6870 return PF_PASS; 6871 6872 if (action != PF_PASS) { 6873 #if NPFLOG > 0 6874 pd.pflog |= PF_LOG_FORCE; 6875 #endif /* NPFLOG > 0 */ 6876 goto done; 6877 } 6878 6879 /* if packet has been reassembled, update packet description */ 6880 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 6881 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 6882 if (action != PF_PASS) { 6883 #if NPFLOG > 0 6884 pd.pflog |= PF_LOG_FORCE; 6885 #endif /* NPFLOG > 0 */ 6886 goto done; 6887 } 6888 } 6889 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 6890 6891 /* 6892 * Avoid pcb-lookups from the forwarding path. They should never 6893 * match and would cause MP locking problems. 6894 */ 6895 if (fwdir == PF_FWD) { 6896 pd.lookup.done = -1; 6897 pd.lookup.uid = UID_MAX; 6898 pd.lookup.gid = GID_MAX; 6899 pd.lookup.pid = NO_PID; 6900 } 6901 6902 switch (pd.virtual_proto) { 6903 6904 case PF_VPROTO_FRAGMENT: { 6905 /* 6906 * handle fragments that aren't reassembled by 6907 * normalization 6908 */ 6909 PF_LOCK(); 6910 have_pf_lock = 1; 6911 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); 6912 s = pf_state_ref(s); 6913 if (action != PF_PASS) 6914 REASON_SET(&reason, PFRES_FRAG); 6915 break; 6916 } 6917 6918 case IPPROTO_ICMP: { 6919 if (pd.af != AF_INET) { 6920 action = PF_DROP; 6921 REASON_SET(&reason, PFRES_NORM); 6922 DPFPRINTF(LOG_NOTICE, 6923 "dropping IPv6 packet with ICMPv4 payload"); 6924 break; 6925 } 6926 PF_STATE_ENTER_READ(); 6927 action = pf_test_state_icmp(&pd, &s, &reason); 6928 s = pf_state_ref(s); 6929 PF_STATE_EXIT_READ(); 6930 if (action == PF_PASS || action == PF_AFRT) { 6931 #if NPFSYNC > 0 6932 pfsync_update_state(s, &have_pf_lock); 6933 #endif /* NPFSYNC > 0 */ 6934 r = s->rule.ptr; 6935 a = s->anchor.ptr; 6936 #if NPFLOG > 0 6937 pd.pflog |= s->log; 6938 #endif /* NPFLOG > 0 */ 6939 } else if (s == NULL) { 6940 PF_LOCK(); 6941 have_pf_lock = 1; 6942 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6943 &reason); 6944 s = pf_state_ref(s); 6945 } 6946 break; 6947 } 6948 6949 #ifdef INET6 6950 case IPPROTO_ICMPV6: { 6951 if (pd.af != AF_INET6) { 6952 action = PF_DROP; 6953 REASON_SET(&reason, PFRES_NORM); 6954 DPFPRINTF(LOG_NOTICE, 6955 "dropping IPv4 packet with ICMPv6 payload"); 6956 break; 6957 } 6958 PF_STATE_ENTER_READ(); 6959 action = pf_test_state_icmp(&pd, &s, &reason); 6960 s = pf_state_ref(s); 6961 PF_STATE_EXIT_READ(); 6962 if (action == PF_PASS || action == PF_AFRT) { 6963 #if NPFSYNC > 0 6964 pfsync_update_state(s, &have_pf_lock); 6965 #endif /* NPFSYNC > 0 */ 6966 r = s->rule.ptr; 6967 a = s->anchor.ptr; 6968 #if NPFLOG > 0 6969 pd.pflog |= s->log; 6970 #endif /* NPFLOG > 0 */ 6971 } else if (s == NULL) { 6972 PF_LOCK(); 6973 have_pf_lock = 1; 6974 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 6975 &reason); 6976 s = pf_state_ref(s); 6977 } 6978 break; 6979 } 6980 #endif /* INET6 */ 6981 6982 default: 6983 if (pd.virtual_proto == IPPROTO_TCP) { 6984 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & 6985 (TH_SYN|TH_ACK)) == TH_SYN && 6986 pf_synflood_check(&pd)) { 6987 PF_LOCK(); 6988 have_pf_lock = 1; 6989 pf_syncookie_send(&pd); 6990 action = PF_DROP; 6991 break; 6992 } 6993 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 6994 pqid = 1; 6995 action = pf_normalize_tcp(&pd); 6996 if (action == PF_DROP) 6997 break; 6998 } 6999 PF_STATE_ENTER_READ(); 7000 action = pf_test_state(&pd, &s, &reason, 0); 7001 s = pf_state_ref(s); 7002 PF_STATE_EXIT_READ(); 7003 if (s == NULL && action != PF_PASS && action != PF_AFRT && 7004 pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 7005 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 7006 pf_syncookie_validate(&pd)) { 7007 struct mbuf *msyn; 7008 msyn = pf_syncookie_recreate_syn(&pd); 7009 if (msyn) { 7010 action = pf_test(af, fwdir, ifp, &msyn); 7011 m_freem(msyn); 7012 if (action == PF_PASS || action == PF_AFRT) { 7013 PF_STATE_ENTER_READ(); 7014 pf_test_state(&pd, &s, &reason, 1); 7015 s = pf_state_ref(s); 7016 PF_STATE_EXIT_READ(); 7017 if (s == NULL) 7018 return (PF_DROP); 7019 s->src.seqhi = 7020 ntohl(pd.hdr.tcp.th_ack) - 1; 7021 s->src.seqlo = 7022 ntohl(pd.hdr.tcp.th_seq) - 1; 7023 pf_set_protostate(s, PF_PEER_SRC, 7024 PF_TCPS_PROXY_DST); 7025 PF_LOCK(); 7026 have_pf_lock = 1; 7027 action = pf_synproxy(&pd, &s, &reason); 7028 if (action != PF_PASS) { 7029 PF_UNLOCK(); 7030 pf_state_unref(s); 7031 return (action); 7032 } 7033 } 7034 } else 7035 action = PF_DROP; 7036 } 7037 7038 if (action == PF_PASS || action == PF_AFRT) { 7039 #if NPFSYNC > 0 7040 pfsync_update_state(s, &have_pf_lock); 7041 #endif /* NPFSYNC > 0 */ 7042 r = s->rule.ptr; 7043 a = s->anchor.ptr; 7044 #if NPFLOG > 0 7045 pd.pflog |= s->log; 7046 #endif /* NPFLOG > 0 */ 7047 } else if (s == NULL) { 7048 PF_LOCK(); 7049 have_pf_lock = 1; 7050 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7051 &reason); 7052 s = pf_state_ref(s); 7053 } 7054 7055 if (pd.virtual_proto == IPPROTO_TCP) { 7056 if (s) { 7057 if (s->max_mss) 7058 pf_normalize_mss(&pd, s->max_mss); 7059 } else if (r->max_mss) 7060 pf_normalize_mss(&pd, r->max_mss); 7061 } 7062 7063 break; 7064 } 7065 7066 if (have_pf_lock != 0) 7067 PF_UNLOCK(); 7068 7069 /* 7070 * At the moment, we rely on NET_LOCK() to prevent removal of items 7071 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 7072 * to be refcounted when NET_LOCK() is gone. 7073 */ 7074 7075 done: 7076 if (action != PF_DROP) { 7077 if (s) { 7078 /* The non-state case is handled in pf_test_rule() */ 7079 if (action == PF_PASS && pd.badopts && 7080 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 7081 action = PF_DROP; 7082 REASON_SET(&reason, PFRES_IPOPTIONS); 7083 #if NPFLOG > 0 7084 pd.pflog |= PF_LOG_FORCE; 7085 #endif /* NPFLOG > 0 */ 7086 DPFPRINTF(LOG_NOTICE, "dropping packet with " 7087 "ip/ipv6 options in pf_test()"); 7088 } 7089 7090 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 7091 s->set_tos); 7092 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 7093 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7094 qid = s->pqid; 7095 if (s->state_flags & PFSTATE_SETPRIO) 7096 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 7097 } else { 7098 qid = s->qid; 7099 if (s->state_flags & PFSTATE_SETPRIO) 7100 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 7101 } 7102 pd.m->m_pkthdr.pf.delay = s->delay; 7103 } else { 7104 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 7105 r->set_tos); 7106 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7107 qid = r->pqid; 7108 if (r->scrub_flags & PFSTATE_SETPRIO) 7109 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 7110 } else { 7111 qid = r->qid; 7112 if (r->scrub_flags & PFSTATE_SETPRIO) 7113 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 7114 } 7115 pd.m->m_pkthdr.pf.delay = r->delay; 7116 } 7117 } 7118 7119 if (action == PF_PASS && qid) 7120 pd.m->m_pkthdr.pf.qid = qid; 7121 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) 7122 pf_mbuf_link_state_key(pd.m, s->key[PF_SK_STACK]); 7123 if (pd.dir == PF_OUT && 7124 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 7125 s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) 7126 pf_state_key_link_inpcb(s->key[PF_SK_STACK], 7127 pd.m->m_pkthdr.pf.inp); 7128 7129 if (s && (pd.m->m_pkthdr.ph_flowid & M_FLOWID_VALID) == 0) { 7130 pd.m->m_pkthdr.ph_flowid = M_FLOWID_VALID | 7131 (M_FLOWID_MASK & bemtoh64(&s->id)); 7132 } 7133 7134 /* 7135 * connections redirected to loopback should not match sockets 7136 * bound specifically to loopback due to security implications, 7137 * see in_pcblookup_listen(). 7138 */ 7139 if (pd.destchg) 7140 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 7141 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 7142 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 7143 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7144 /* We need to redo the route lookup on outgoing routes. */ 7145 if (pd.destchg && pd.dir == PF_OUT) 7146 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 7147 7148 if (pd.dir == PF_IN && action == PF_PASS && 7149 (r->divert.type == PF_DIVERT_TO || 7150 r->divert.type == PF_DIVERT_REPLY)) { 7151 struct pf_divert *divert; 7152 7153 if ((divert = pf_get_divert(pd.m))) { 7154 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7155 divert->addr = r->divert.addr; 7156 divert->port = r->divert.port; 7157 divert->rdomain = pd.rdomain; 7158 divert->type = r->divert.type; 7159 } 7160 } 7161 7162 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 7163 action = PF_DIVERT; 7164 7165 #if NPFLOG > 0 7166 if (pd.pflog) { 7167 struct pf_rule_item *ri; 7168 7169 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 7170 pflog_packet(&pd, reason, r, a, ruleset, NULL); 7171 if (s) { 7172 SLIST_FOREACH(ri, &s->match_rules, entry) 7173 if (ri->r->log & PF_LOG_ALL) 7174 pflog_packet(&pd, reason, ri->r, a, 7175 ruleset, NULL); 7176 } 7177 } 7178 #endif /* NPFLOG > 0 */ 7179 7180 pf_counters_inc(action, &pd, s, r, a); 7181 7182 switch (action) { 7183 case PF_SYNPROXY_DROP: 7184 m_freem(pd.m); 7185 /* FALLTHROUGH */ 7186 case PF_DEFER: 7187 pd.m = NULL; 7188 action = PF_PASS; 7189 break; 7190 case PF_DIVERT: 7191 switch (pd.af) { 7192 case AF_INET: 7193 if (!divert_packet(pd.m, pd.dir, r->divert.port)) 7194 pd.m = NULL; 7195 break; 7196 #ifdef INET6 7197 case AF_INET6: 7198 if (!divert6_packet(pd.m, pd.dir, r->divert.port)) 7199 pd.m = NULL; 7200 break; 7201 #endif /* INET6 */ 7202 } 7203 action = PF_PASS; 7204 break; 7205 #ifdef INET6 7206 case PF_AFRT: 7207 if (pf_translate_af(&pd)) { 7208 action = PF_DROP; 7209 break; 7210 } 7211 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 7212 switch (pd.naf) { 7213 case AF_INET: 7214 if (pd.dir == PF_IN) 7215 ip_forward(pd.m, ifp, NULL, 1); 7216 else 7217 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 7218 break; 7219 case AF_INET6: 7220 if (pd.dir == PF_IN) 7221 ip6_forward(pd.m, NULL, 1); 7222 else 7223 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 7224 break; 7225 } 7226 pd.m = NULL; 7227 action = PF_PASS; 7228 break; 7229 #endif /* INET6 */ 7230 case PF_DROP: 7231 m_freem(pd.m); 7232 pd.m = NULL; 7233 break; 7234 default: 7235 if (r->rt) { 7236 switch (pd.af) { 7237 case AF_INET: 7238 pf_route(&pd, r, s); 7239 break; 7240 #ifdef INET6 7241 case AF_INET6: 7242 pf_route6(&pd, r, s); 7243 break; 7244 #endif /* INET6 */ 7245 } 7246 } 7247 break; 7248 } 7249 7250 #ifdef INET6 7251 /* if reassembled packet passed, create new fragments */ 7252 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 7253 pd.af == AF_INET6) { 7254 struct m_tag *mtag; 7255 7256 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 7257 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 7258 } 7259 #endif /* INET6 */ 7260 if (s && action != PF_DROP) { 7261 if (!s->if_index_in && dir == PF_IN) 7262 s->if_index_in = ifp->if_index; 7263 else if (!s->if_index_out && dir == PF_OUT) 7264 s->if_index_out = ifp->if_index; 7265 } 7266 7267 *m0 = pd.m; 7268 7269 pf_state_unref(s); 7270 7271 return (action); 7272 } 7273 7274 int 7275 pf_ouraddr(struct mbuf *m) 7276 { 7277 struct pf_state_key *sk; 7278 7279 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 7280 return (1); 7281 7282 sk = m->m_pkthdr.pf.statekey; 7283 if (sk != NULL) { 7284 if (sk->inp != NULL) 7285 return (1); 7286 } 7287 7288 return (-1); 7289 } 7290 7291 /* 7292 * must be called whenever any addressing information such as 7293 * address, port, protocol has changed 7294 */ 7295 void 7296 pf_pkt_addr_changed(struct mbuf *m) 7297 { 7298 pf_mbuf_unlink_state_key(m); 7299 pf_mbuf_unlink_inpcb(m); 7300 } 7301 7302 struct inpcb * 7303 pf_inp_lookup(struct mbuf *m) 7304 { 7305 struct inpcb *inp = NULL; 7306 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7307 7308 if (!pf_state_key_isvalid(sk)) 7309 pf_mbuf_unlink_state_key(m); 7310 else 7311 inp = m->m_pkthdr.pf.statekey->inp; 7312 7313 if (inp && inp->inp_pf_sk) 7314 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 7315 7316 return (inp); 7317 } 7318 7319 void 7320 pf_inp_link(struct mbuf *m, struct inpcb *inp) 7321 { 7322 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7323 7324 if (!pf_state_key_isvalid(sk)) { 7325 pf_mbuf_unlink_state_key(m); 7326 return; 7327 } 7328 7329 /* 7330 * we don't need to grab PF-lock here. At worst case we link inp to 7331 * state, which might be just being marked as deleted by another 7332 * thread. 7333 */ 7334 if (inp && !sk->inp && !inp->inp_pf_sk) 7335 pf_state_key_link_inpcb(sk, inp); 7336 7337 /* The statekey has finished finding the inp, it is no longer needed. */ 7338 pf_mbuf_unlink_state_key(m); 7339 } 7340 7341 void 7342 pf_inp_unlink(struct inpcb *inp) 7343 { 7344 pf_inpcb_unlink_state_key(inp); 7345 } 7346 7347 void 7348 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 7349 { 7350 /* Note that sk and skrev may be equal, then we refcount twice. */ 7351 KASSERT(sk->reverse == NULL); 7352 KASSERT(skrev->reverse == NULL); 7353 sk->reverse = pf_state_key_ref(skrev); 7354 skrev->reverse = pf_state_key_ref(sk); 7355 } 7356 7357 #if NPFLOG > 0 7358 void 7359 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 7360 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 7361 { 7362 struct pf_rule_item *ri; 7363 7364 /* if this is the log(matches) rule, packet has been logged already */ 7365 if (rm->log & PF_LOG_MATCHES) 7366 return; 7367 7368 SLIST_FOREACH(ri, matchrules, entry) 7369 if (ri->r->log & PF_LOG_MATCHES) 7370 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 7371 } 7372 #endif /* NPFLOG > 0 */ 7373 7374 struct pf_state_key * 7375 pf_state_key_ref(struct pf_state_key *sk) 7376 { 7377 if (sk != NULL) 7378 PF_REF_TAKE(sk->refcnt); 7379 7380 return (sk); 7381 } 7382 7383 void 7384 pf_state_key_unref(struct pf_state_key *sk) 7385 { 7386 if (PF_REF_RELE(sk->refcnt)) { 7387 /* state key must be removed from tree */ 7388 KASSERT(!pf_state_key_isvalid(sk)); 7389 /* state key must be unlinked from reverse key */ 7390 KASSERT(sk->reverse == NULL); 7391 /* state key must be unlinked from socket */ 7392 KASSERT(sk->inp == NULL); 7393 pool_put(&pf_state_key_pl, sk); 7394 } 7395 } 7396 7397 int 7398 pf_state_key_isvalid(struct pf_state_key *sk) 7399 { 7400 return ((sk != NULL) && (sk->removed == 0)); 7401 } 7402 7403 void 7404 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 7405 { 7406 KASSERT(m->m_pkthdr.pf.statekey == NULL); 7407 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 7408 } 7409 7410 void 7411 pf_mbuf_unlink_state_key(struct mbuf *m) 7412 { 7413 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7414 7415 if (sk != NULL) { 7416 m->m_pkthdr.pf.statekey = NULL; 7417 pf_state_key_unref(sk); 7418 } 7419 } 7420 7421 void 7422 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 7423 { 7424 KASSERT(m->m_pkthdr.pf.inp == NULL); 7425 m->m_pkthdr.pf.inp = in_pcbref(inp); 7426 } 7427 7428 void 7429 pf_mbuf_unlink_inpcb(struct mbuf *m) 7430 { 7431 struct inpcb *inp = m->m_pkthdr.pf.inp; 7432 7433 if (inp != NULL) { 7434 m->m_pkthdr.pf.inp = NULL; 7435 in_pcbunref(inp); 7436 } 7437 } 7438 7439 void 7440 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 7441 { 7442 KASSERT(sk->inp == NULL); 7443 sk->inp = in_pcbref(inp); 7444 KASSERT(inp->inp_pf_sk == NULL); 7445 inp->inp_pf_sk = pf_state_key_ref(sk); 7446 } 7447 7448 void 7449 pf_inpcb_unlink_state_key(struct inpcb *inp) 7450 { 7451 struct pf_state_key *sk = inp->inp_pf_sk; 7452 7453 if (sk != NULL) { 7454 KASSERT(sk->inp == inp); 7455 sk->inp = NULL; 7456 inp->inp_pf_sk = NULL; 7457 pf_state_key_unref(sk); 7458 in_pcbunref(inp); 7459 } 7460 } 7461 7462 void 7463 pf_state_key_unlink_inpcb(struct pf_state_key *sk) 7464 { 7465 struct inpcb *inp = sk->inp; 7466 7467 if (inp != NULL) { 7468 KASSERT(inp->inp_pf_sk == sk); 7469 sk->inp = NULL; 7470 inp->inp_pf_sk = NULL; 7471 pf_state_key_unref(sk); 7472 in_pcbunref(inp); 7473 } 7474 } 7475 7476 void 7477 pf_state_key_unlink_reverse(struct pf_state_key *sk) 7478 { 7479 struct pf_state_key *skrev = sk->reverse; 7480 7481 /* Note that sk and skrev may be equal, then we unref twice. */ 7482 if (skrev != NULL) { 7483 KASSERT(skrev->reverse == sk); 7484 sk->reverse = NULL; 7485 skrev->reverse = NULL; 7486 pf_state_key_unref(skrev); 7487 pf_state_key_unref(sk); 7488 } 7489 } 7490 7491 struct pf_state * 7492 pf_state_ref(struct pf_state *s) 7493 { 7494 if (s != NULL) 7495 PF_REF_TAKE(s->refcnt); 7496 return (s); 7497 } 7498 7499 void 7500 pf_state_unref(struct pf_state *s) 7501 { 7502 if ((s != NULL) && PF_REF_RELE(s->refcnt)) { 7503 /* never inserted or removed */ 7504 #if NPFSYNC > 0 7505 KASSERT((TAILQ_NEXT(s, sync_list) == NULL) || 7506 ((TAILQ_NEXT(s, sync_list) == _Q_INVALID) && 7507 (s->sync_state == PFSYNC_S_NONE))); 7508 #endif /* NPFSYNC */ 7509 KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || 7510 (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); 7511 KASSERT((s->key[PF_SK_WIRE] == NULL) && 7512 (s->key[PF_SK_STACK] == NULL)); 7513 7514 pool_put(&pf_state_pl, s); 7515 } 7516 } 7517 7518 int 7519 pf_delay_pkt(struct mbuf *m, u_int ifidx) 7520 { 7521 struct pf_pktdelay *pdy; 7522 7523 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 7524 m_freem(m); 7525 return (ENOBUFS); 7526 } 7527 pdy->ifidx = ifidx; 7528 pdy->m = m; 7529 timeout_set(pdy->to, pf_pktenqueue_delayed, pdy); 7530 timeout_add_msec(pdy->to, m->m_pkthdr.pf.delay); 7531 m->m_pkthdr.pf.delay = 0; 7532 return (0); 7533 } 7534 7535 void 7536 pf_pktenqueue_delayed(void *arg) 7537 { 7538 struct pf_pktdelay *pdy = arg; 7539 struct ifnet *ifp; 7540 7541 ifp = if_get(pdy->ifidx); 7542 if (ifp != NULL) { 7543 if_enqueue(ifp, pdy->m); 7544 if_put(ifp); 7545 } else 7546 m_freem(pdy->m); 7547 7548 pool_put(&pf_pktdelay_pl, pdy); 7549 } 7550