1 /* $OpenBSD: pf.c,v 1.808 2012/07/10 17:33:48 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2010 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "pflog.h" 40 #include "pfsync.h" 41 #include "pflow.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/filio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/kernel.h> 50 #include <sys/time.h> 51 #include <sys/pool.h> 52 #include <sys/proc.h> 53 #include <sys/rwlock.h> 54 #include <sys/syslog.h> 55 56 #include <crypto/md5.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/route.h> 62 #include <net/radix_mpath.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_seq.h> 71 #include <netinet/udp.h> 72 #include <netinet/ip_icmp.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/tcp_timer.h> 75 #include <netinet/tcp_var.h> 76 #include <netinet/udp_var.h> 77 #include <netinet/icmp_var.h> 78 #include <netinet/if_ether.h> 79 #include <netinet/ip_divert.h> 80 81 #include <dev/rndvar.h> 82 #include <net/pfvar.h> 83 #include <net/if_pflog.h> 84 #include <net/if_pflow.h> 85 86 #if NPFSYNC > 0 87 #include <net/if_pfsync.h> 88 #endif /* NPFSYNC > 0 */ 89 90 #ifdef INET6 91 #include <netinet/ip6.h> 92 #include <netinet/in_pcb.h> 93 #include <netinet/icmp6.h> 94 #include <netinet6/nd6.h> 95 #include <netinet6/ip6_divert.h> 96 #endif /* INET6 */ 97 98 99 /* 100 * Global variables 101 */ 102 struct pf_state_tree pf_statetbl; 103 104 struct pf_altqqueue pf_altqs[2]; 105 struct pf_altqqueue *pf_altqs_active; 106 struct pf_altqqueue *pf_altqs_inactive; 107 struct pf_status pf_status; 108 109 u_int32_t ticket_altqs_active; 110 u_int32_t ticket_altqs_inactive; 111 int altqs_inactive_open; 112 113 MD5_CTX pf_tcp_secret_ctx; 114 u_char pf_tcp_secret[16]; 115 int pf_tcp_secret_init; 116 int pf_tcp_iss_off; 117 118 struct pf_anchor_stackframe { 119 struct pf_ruleset *rs; 120 struct pf_rule *r; 121 struct pf_anchor_node *parent; 122 struct pf_anchor *child; 123 } pf_anchor_stack[64]; 124 125 /* 126 * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c. 127 * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c. 128 */ 129 union pf_headers { 130 struct tcphdr tcp; 131 struct udphdr udp; 132 struct icmp icmp; 133 #ifdef INET6 134 struct icmp6_hdr icmp6; 135 struct mld_hdr mld; 136 struct nd_neighbor_solicit nd_ns; 137 #endif /* INET6 */ 138 }; 139 140 141 struct pool pf_src_tree_pl, pf_rule_pl; 142 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 143 struct pool pf_altq_pl, pf_rule_item_pl, pf_sn_item_pl; 144 145 void pf_init_threshold(struct pf_threshold *, u_int32_t, 146 u_int32_t); 147 void pf_add_threshold(struct pf_threshold *); 148 int pf_check_threshold(struct pf_threshold *); 149 150 void pf_change_ap(struct pf_addr *, u_int16_t *, 151 u_int16_t *, struct pf_addr *, u_int16_t, 152 u_int8_t, sa_family_t, sa_family_t); 153 int pf_modulate_sack(struct pf_pdesc *, 154 struct pf_state_peer *); 155 void pf_change_a6(struct pf_addr *, u_int16_t *, 156 struct pf_addr *, u_int8_t); 157 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 158 int *, u_int16_t *, u_int16_t *); 159 void pf_change_icmp(struct pf_addr *, u_int16_t *, 160 struct pf_addr *, struct pf_addr *, u_int16_t, 161 u_int16_t *, u_int16_t *, u_int16_t *, 162 u_int8_t, sa_family_t); 163 int pf_change_icmp_af(struct mbuf *, int, 164 struct pf_pdesc *, struct pf_pdesc *, 165 struct pf_addr *, struct pf_addr *, sa_family_t, 166 sa_family_t); 167 int pf_translate_icmp_af(int, void *); 168 void pf_send_tcp(const struct pf_rule *, sa_family_t, 169 const struct pf_addr *, const struct pf_addr *, 170 u_int16_t, u_int16_t, u_int32_t, u_int32_t, 171 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, 172 u_int16_t, u_int, struct ether_header *, 173 struct ifnet *); 174 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 175 sa_family_t, struct pf_rule *, u_int); 176 void pf_detach_state(struct pf_state *); 177 void pf_state_key_detach(struct pf_state *, int); 178 u_int32_t pf_tcp_iss(struct pf_pdesc *); 179 void pf_rule_to_actions(struct pf_rule *, 180 struct pf_rule_actions *); 181 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 182 struct pf_state **, struct pf_rule **, 183 struct pf_ruleset **); 184 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 185 struct pf_rule *, struct pf_rule *, 186 struct pf_state_key **, struct pf_state_key **, 187 int *, struct pf_state **, int, 188 struct pf_rule_slist *, struct pf_rule_actions *, 189 struct pf_src_node *[]); 190 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 191 int, struct pf_addr *, int, struct pf_addr *, 192 int, int); 193 int pf_state_key_setup(struct pf_pdesc *, struct 194 pf_state_key **, struct pf_state_key **, int); 195 int pf_tcp_track_full(struct pf_pdesc *, 196 struct pf_state_peer *, struct pf_state_peer *, 197 struct pf_state **, u_short *, int *); 198 int pf_tcp_track_sloppy(struct pf_pdesc *, 199 struct pf_state_peer *, struct pf_state_peer *, 200 struct pf_state **, u_short *); 201 int pf_test_state_tcp(struct pf_pdesc *, 202 struct pf_state **, u_short *); 203 int pf_test_state_udp( struct pf_pdesc *, 204 struct pf_state **); 205 int pf_icmp_state_lookup(struct pf_pdesc *, 206 struct pf_state_key_cmp *, struct pf_state **, 207 u_int16_t, u_int16_t, int, int *, int, int); 208 int pf_test_state_icmp(struct pf_pdesc *, 209 struct pf_state **, u_short *); 210 int pf_test_state_other( struct pf_pdesc *, 211 struct pf_state **); 212 u_int8_t pf_get_wscale(struct pf_pdesc *); 213 u_int16_t pf_get_mss(struct pf_pdesc *); 214 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 215 u_int16_t); 216 void pf_set_rt_ifp(struct pf_state *, 217 struct pf_addr *); 218 int pf_check_proto_cksum(struct mbuf *, int, int, 219 u_int8_t, sa_family_t); 220 struct pf_divert *pf_get_divert(struct mbuf *); 221 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 222 int, int, u_short *); 223 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 224 u_short *); 225 void pf_print_state_parts(struct pf_state *, 226 struct pf_state_key *, struct pf_state_key *); 227 int pf_addr_wrap_neq(struct pf_addr_wrap *, 228 struct pf_addr_wrap *); 229 int pf_compare_state_keys(struct pf_state_key *, 230 struct pf_state_key *, struct pfi_kif *, u_int); 231 struct pf_state *pf_find_state(struct pfi_kif *, 232 struct pf_state_key_cmp *, u_int, struct mbuf *); 233 int pf_src_connlimit(struct pf_state **); 234 int pf_check_congestion(struct ifqueue *); 235 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 236 void pf_counters_inc(int, struct pf_pdesc *, 237 struct pf_state *, struct pf_rule *, 238 struct pf_rule *); 239 240 extern struct pool pfr_ktable_pl; 241 extern struct pool pfr_kentry_pl; 242 243 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 244 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 245 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 246 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 247 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 248 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT } 249 }; 250 251 enum { PF_ICMP_MULTI_NONE, PF_ICMP_MULTI_LINK }; 252 253 254 #define STATE_LOOKUP(i, k, d, s, m) \ 255 do { \ 256 s = pf_find_state(i, k, d, m); \ 257 if (s == NULL || (s)->timeout == PFTM_PURGE) \ 258 return (PF_DROP); \ 259 if (d == PF_OUT && \ 260 (((s)->rule.ptr->rt == PF_ROUTETO && \ 261 (s)->rule.ptr->direction == PF_OUT) || \ 262 ((s)->rule.ptr->rt == PF_REPLYTO && \ 263 (s)->rule.ptr->direction == PF_IN)) && \ 264 (s)->rt_kif != NULL && \ 265 (s)->rt_kif != i) \ 266 return (PF_PASS); \ 267 } while (0) 268 269 #define BOUND_IFACE(r, k) \ 270 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 271 272 #define STATE_INC_COUNTERS(s) \ 273 do { \ 274 struct pf_rule_item *mrm; \ 275 s->rule.ptr->states_cur++; \ 276 s->rule.ptr->states_tot++; \ 277 if (s->anchor.ptr != NULL) { \ 278 s->anchor.ptr->states_cur++; \ 279 s->anchor.ptr->states_tot++; \ 280 } \ 281 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 282 mrm->r->states_cur++; \ 283 } while (0) 284 285 #define STATE_DEC_COUNTERS(s) \ 286 do { \ 287 struct pf_rule_item *mrm; \ 288 if (s->anchor.ptr != NULL) \ 289 s->anchor.ptr->states_cur--; \ 290 s->rule.ptr->states_cur--; \ 291 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 292 mrm->r->states_cur--; \ 293 } while (0) 294 295 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 296 static __inline int pf_state_compare_key(struct pf_state_key *, 297 struct pf_state_key *); 298 static __inline int pf_state_compare_id(struct pf_state *, 299 struct pf_state *); 300 301 struct pf_src_tree tree_src_tracking; 302 303 struct pf_state_tree_id tree_id; 304 struct pf_state_queue state_list; 305 306 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 307 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 308 RB_GENERATE(pf_state_tree_id, pf_state, 309 entry_id, pf_state_compare_id); 310 311 __inline int 312 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 313 { 314 switch (af) { 315 #ifdef INET 316 case AF_INET: 317 if (a->addr32[0] > b->addr32[0]) 318 return (1); 319 if (a->addr32[0] < b->addr32[0]) 320 return (-1); 321 break; 322 #endif /* INET */ 323 #ifdef INET6 324 case AF_INET6: 325 if (a->addr32[3] > b->addr32[3]) 326 return (1); 327 if (a->addr32[3] < b->addr32[3]) 328 return (-1); 329 if (a->addr32[2] > b->addr32[2]) 330 return (1); 331 if (a->addr32[2] < b->addr32[2]) 332 return (-1); 333 if (a->addr32[1] > b->addr32[1]) 334 return (1); 335 if (a->addr32[1] < b->addr32[1]) 336 return (-1); 337 if (a->addr32[0] > b->addr32[0]) 338 return (1); 339 if (a->addr32[0] < b->addr32[0]) 340 return (-1); 341 break; 342 #endif /* INET6 */ 343 } 344 return (0); 345 } 346 347 static __inline int 348 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 349 { 350 int diff; 351 352 if (a->rule.ptr > b->rule.ptr) 353 return (1); 354 if (a->rule.ptr < b->rule.ptr) 355 return (-1); 356 if ((diff = a->type - b->type) != 0) 357 return (diff); 358 if ((diff = a->af - b->af) != 0) 359 return (diff); 360 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 361 return (diff); 362 return (0); 363 } 364 365 #ifdef INET6 366 void 367 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 368 { 369 switch (af) { 370 #ifdef INET 371 case AF_INET: 372 dst->addr32[0] = src->addr32[0]; 373 break; 374 #endif /* INET */ 375 case AF_INET6: 376 dst->addr32[0] = src->addr32[0]; 377 dst->addr32[1] = src->addr32[1]; 378 dst->addr32[2] = src->addr32[2]; 379 dst->addr32[3] = src->addr32[3]; 380 break; 381 } 382 } 383 #endif /* INET6 */ 384 385 void 386 pf_init_threshold(struct pf_threshold *threshold, 387 u_int32_t limit, u_int32_t seconds) 388 { 389 threshold->limit = limit * PF_THRESHOLD_MULT; 390 threshold->seconds = seconds; 391 threshold->count = 0; 392 threshold->last = time_second; 393 } 394 395 void 396 pf_add_threshold(struct pf_threshold *threshold) 397 { 398 u_int32_t t = time_second, diff = t - threshold->last; 399 400 if (diff >= threshold->seconds) 401 threshold->count = 0; 402 else 403 threshold->count -= threshold->count * diff / 404 threshold->seconds; 405 threshold->count += PF_THRESHOLD_MULT; 406 threshold->last = t; 407 } 408 409 int 410 pf_check_threshold(struct pf_threshold *threshold) 411 { 412 return (threshold->count > threshold->limit); 413 } 414 415 int 416 pf_src_connlimit(struct pf_state **state) 417 { 418 int bad = 0; 419 struct pf_src_node *sn; 420 421 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 422 return (0); 423 424 sn->conn++; 425 (*state)->src.tcp_est = 1; 426 pf_add_threshold(&sn->conn_rate); 427 428 if ((*state)->rule.ptr->max_src_conn && 429 (*state)->rule.ptr->max_src_conn < sn->conn) { 430 pf_status.lcounters[LCNT_SRCCONN]++; 431 bad++; 432 } 433 434 if ((*state)->rule.ptr->max_src_conn_rate.limit && 435 pf_check_threshold(&sn->conn_rate)) { 436 pf_status.lcounters[LCNT_SRCCONNRATE]++; 437 bad++; 438 } 439 440 if (!bad) 441 return (0); 442 443 if ((*state)->rule.ptr->overload_tbl) { 444 struct pfr_addr p; 445 u_int32_t killed = 0; 446 447 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 448 if (pf_status.debug >= LOG_NOTICE) { 449 log(LOG_NOTICE, 450 "pf: pf_src_connlimit: blocking address "); 451 pf_print_host(&sn->addr, 0, 452 (*state)->key[PF_SK_WIRE]->af); 453 } 454 455 bzero(&p, sizeof(p)); 456 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 457 switch ((*state)->key[PF_SK_WIRE]->af) { 458 #ifdef INET 459 case AF_INET: 460 p.pfra_net = 32; 461 p.pfra_ip4addr = sn->addr.v4; 462 break; 463 #endif /* INET */ 464 #ifdef INET6 465 case AF_INET6: 466 p.pfra_net = 128; 467 p.pfra_ip6addr = sn->addr.v6; 468 break; 469 #endif /* INET6 */ 470 } 471 472 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 473 &p, time_second); 474 475 /* kill existing states if that's required. */ 476 if ((*state)->rule.ptr->flush) { 477 struct pf_state_key *sk; 478 struct pf_state *st; 479 480 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 481 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 482 sk = st->key[PF_SK_WIRE]; 483 /* 484 * Kill states from this source. (Only those 485 * from the same rule if PF_FLUSH_GLOBAL is not 486 * set) 487 */ 488 if (sk->af == 489 (*state)->key[PF_SK_WIRE]->af && 490 (((*state)->direction == PF_OUT && 491 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 492 ((*state)->direction == PF_IN && 493 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 494 ((*state)->rule.ptr->flush & 495 PF_FLUSH_GLOBAL || 496 (*state)->rule.ptr == st->rule.ptr)) { 497 st->timeout = PFTM_PURGE; 498 st->src.state = st->dst.state = 499 TCPS_CLOSED; 500 killed++; 501 } 502 } 503 if (pf_status.debug >= LOG_NOTICE) 504 addlog(", %u states killed", killed); 505 } 506 if (pf_status.debug >= LOG_NOTICE) 507 addlog("\n"); 508 } 509 510 /* kill this state */ 511 (*state)->timeout = PFTM_PURGE; 512 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 513 return (1); 514 } 515 516 int 517 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 518 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 519 struct pf_addr *raddr, int global) 520 { 521 struct pf_src_node k; 522 523 if (*sn == NULL) { 524 k.af = af; 525 k.type = type; 526 PF_ACPY(&k.addr, src, af); 527 if (global) 528 k.rule.ptr = NULL; 529 else 530 k.rule.ptr = rule; 531 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 532 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 533 } 534 if (*sn == NULL) { 535 if (!rule->max_src_nodes || 536 rule->src_nodes < rule->max_src_nodes) 537 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 538 else 539 pf_status.lcounters[LCNT_SRCNODES]++; 540 if ((*sn) == NULL) 541 return (-1); 542 543 pf_init_threshold(&(*sn)->conn_rate, 544 rule->max_src_conn_rate.limit, 545 rule->max_src_conn_rate.seconds); 546 547 (*sn)->type = type; 548 (*sn)->af = af; 549 if (global) 550 (*sn)->rule.ptr = NULL; 551 else 552 (*sn)->rule.ptr = rule; 553 PF_ACPY(&(*sn)->addr, src, af); 554 if (raddr) 555 PF_ACPY(&(*sn)->raddr, raddr, af); 556 if (RB_INSERT(pf_src_tree, 557 &tree_src_tracking, *sn) != NULL) { 558 if (pf_status.debug >= LOG_NOTICE) { 559 log(LOG_NOTICE, 560 "pf: src_tree insert failed: "); 561 pf_print_host(&(*sn)->addr, 0, af); 562 addlog("\n"); 563 } 564 pool_put(&pf_src_tree_pl, *sn); 565 return (-1); 566 } 567 (*sn)->creation = time_uptime; 568 if ((*sn)->rule.ptr != NULL) 569 (*sn)->rule.ptr->src_nodes++; 570 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 571 pf_status.src_nodes++; 572 } else { 573 if (rule->max_src_states && 574 (*sn)->states >= rule->max_src_states) { 575 pf_status.lcounters[LCNT_SRCSTATES]++; 576 return (-1); 577 } 578 } 579 return (0); 580 } 581 582 void 583 pf_remove_src_node(struct pf_src_node *sn) 584 { 585 if (sn->states > 0 || sn->expire > time_second) 586 return; 587 588 if (sn->rule.ptr != NULL) { 589 sn->rule.ptr->src_nodes--; 590 if (sn->rule.ptr->states_cur <= 0 && 591 sn->rule.ptr->src_nodes <= 0) 592 pf_rm_rule(NULL, sn->rule.ptr); 593 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 594 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 595 pf_status.src_nodes--; 596 pool_put(&pf_src_tree_pl, sn); 597 } 598 } 599 600 struct pf_src_node * 601 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 602 { 603 struct pf_sn_item *sni; 604 605 SLIST_FOREACH(sni, &s->src_nodes, next) 606 if (sni->sn->type == type) 607 return (sni->sn); 608 return (NULL); 609 } 610 611 void 612 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 613 { 614 struct pf_sn_item *sni, *snin, *snip = NULL; 615 616 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 617 snin = SLIST_NEXT(sni, next); 618 if (sni->sn == sn) { 619 if (snip) 620 SLIST_REMOVE_AFTER(snip, next); 621 else 622 SLIST_REMOVE_HEAD(&s->src_nodes, next); 623 pool_put(&pf_sn_item_pl, sni); 624 sn->states--; 625 } 626 snip = sni; 627 } 628 } 629 630 /* state table stuff */ 631 632 static __inline int 633 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 634 { 635 int diff; 636 637 if ((diff = a->proto - b->proto) != 0) 638 return (diff); 639 if ((diff = a->af - b->af) != 0) 640 return (diff); 641 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 642 return (diff); 643 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 644 return (diff); 645 if ((diff = a->port[0] - b->port[0]) != 0) 646 return (diff); 647 if ((diff = a->port[1] - b->port[1]) != 0) 648 return (diff); 649 if ((diff = a->rdomain - b->rdomain) != 0) 650 return (diff); 651 return (0); 652 } 653 654 static __inline int 655 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 656 { 657 if (a->id > b->id) 658 return (1); 659 if (a->id < b->id) 660 return (-1); 661 if (a->creatorid > b->creatorid) 662 return (1); 663 if (a->creatorid < b->creatorid) 664 return (-1); 665 666 return (0); 667 } 668 669 int 670 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 671 { 672 struct pf_state_item *si; 673 struct pf_state_key *cur; 674 struct pf_state *olds = NULL; 675 676 KASSERT(s->key[idx] == NULL); 677 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 678 /* key exists. check for same kif, if none, add to key */ 679 TAILQ_FOREACH(si, &cur->states, entry) 680 if (si->s->kif == s->kif && 681 ((si->s->key[PF_SK_WIRE]->af == sk->af && 682 si->s->direction == s->direction) || 683 (si->s->key[PF_SK_WIRE]->af != 684 si->s->key[PF_SK_STACK]->af && 685 sk->af == si->s->key[PF_SK_STACK]->af && 686 si->s->direction != s->direction))) { 687 if (sk->proto == IPPROTO_TCP && 688 si->s->src.state >= TCPS_FIN_WAIT_2 && 689 si->s->dst.state >= TCPS_FIN_WAIT_2) { 690 si->s->src.state = si->s->dst.state = 691 TCPS_CLOSED; 692 /* unlink late or sks can go away */ 693 olds = si->s; 694 } else { 695 if (pf_status.debug >= LOG_NOTICE) { 696 log(LOG_NOTICE, 697 "pf: %s key attach " 698 "failed on %s: ", 699 (idx == PF_SK_WIRE) ? 700 "wire" : "stack", 701 s->kif->pfik_name); 702 pf_print_state_parts(s, 703 (idx == PF_SK_WIRE) ? 704 sk : NULL, 705 (idx == PF_SK_STACK) ? 706 sk : NULL); 707 addlog(", existing: "); 708 pf_print_state_parts(si->s, 709 (idx == PF_SK_WIRE) ? 710 sk : NULL, 711 (idx == PF_SK_STACK) ? 712 sk : NULL); 713 addlog("\n"); 714 } 715 pool_put(&pf_state_key_pl, sk); 716 return (-1); /* collision! */ 717 } 718 } 719 pool_put(&pf_state_key_pl, sk); 720 s->key[idx] = cur; 721 } else 722 s->key[idx] = sk; 723 724 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 725 pf_state_key_detach(s, idx); 726 return (-1); 727 } 728 si->s = s; 729 730 /* list is sorted, if-bound states before floating */ 731 if (s->kif == pfi_all) 732 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 733 else 734 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 735 736 if (olds) 737 pf_unlink_state(olds); 738 739 return (0); 740 } 741 742 void 743 pf_detach_state(struct pf_state *s) 744 { 745 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 746 s->key[PF_SK_WIRE] = NULL; 747 748 if (s->key[PF_SK_STACK] != NULL) 749 pf_state_key_detach(s, PF_SK_STACK); 750 751 if (s->key[PF_SK_WIRE] != NULL) 752 pf_state_key_detach(s, PF_SK_WIRE); 753 } 754 755 void 756 pf_state_key_detach(struct pf_state *s, int idx) 757 { 758 struct pf_state_item *si; 759 760 if (s->key[idx] == NULL) 761 return; 762 763 si = TAILQ_FIRST(&s->key[idx]->states); 764 while (si && si->s != s) 765 si = TAILQ_NEXT(si, entry); 766 767 if (si) { 768 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 769 pool_put(&pf_state_item_pl, si); 770 } 771 772 if (TAILQ_EMPTY(&s->key[idx]->states)) { 773 RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]); 774 if (s->key[idx]->reverse) 775 s->key[idx]->reverse->reverse = NULL; 776 if (s->key[idx]->inp) 777 s->key[idx]->inp->inp_pf_sk = NULL; 778 pool_put(&pf_state_key_pl, s->key[idx]); 779 } 780 s->key[idx] = NULL; 781 } 782 783 struct pf_state_key * 784 pf_alloc_state_key(int pool_flags) 785 { 786 struct pf_state_key *sk; 787 788 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 789 return (NULL); 790 TAILQ_INIT(&sk->states); 791 792 return (sk); 793 } 794 795 static __inline int 796 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 797 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 798 { 799 struct pf_state_key_cmp *key = arg; 800 #ifdef INET6 801 struct nd_neighbor_solicit *nd; 802 struct pf_addr *target; 803 804 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 805 goto copy; 806 807 switch (pd->hdr.icmp6->icmp6_type) { 808 case ND_NEIGHBOR_SOLICIT: 809 if (multi) 810 return (-1); 811 nd = (void *)pd->hdr.icmp6; 812 target = (struct pf_addr *)&nd->nd_ns_target; 813 daddr = target; 814 break; 815 case ND_NEIGHBOR_ADVERT: 816 if (multi) 817 return (-1); 818 nd = (void *)pd->hdr.icmp6; 819 target = (struct pf_addr *)&nd->nd_ns_target; 820 saddr = target; 821 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 822 key->addr[didx].addr32[0] = 0; 823 key->addr[didx].addr32[1] = 0; 824 key->addr[didx].addr32[2] = 0; 825 key->addr[didx].addr32[3] = 0; 826 daddr = NULL; /* overwritten */ 827 } 828 break; 829 default: 830 if (multi == PF_ICMP_MULTI_LINK) { 831 key->addr[sidx].addr32[0] = IPV6_ADDR_INT32_MLL; 832 key->addr[sidx].addr32[1] = 0; 833 key->addr[sidx].addr32[2] = 0; 834 key->addr[sidx].addr32[3] = IPV6_ADDR_INT32_ONE; 835 saddr = NULL; /* overwritten */ 836 } 837 } 838 copy: 839 #endif 840 if (saddr) 841 PF_ACPY(&key->addr[sidx], saddr, af); 842 if (daddr) 843 PF_ACPY(&key->addr[didx], daddr, af); 844 845 return (0); 846 } 847 848 int 849 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 850 struct pf_state_key **sks, int rtableid) 851 { 852 /* if returning error we MUST pool_put state keys ourselves */ 853 struct pf_state_key *sk1, *sk2; 854 u_int wrdom = pd->rdomain; 855 int afto = pd->af != pd->naf; 856 857 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 858 return (ENOMEM); 859 860 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 861 pd->af, 0); 862 sk1->port[pd->sidx] = pd->osport; 863 sk1->port[pd->didx] = pd->odport; 864 sk1->proto = pd->proto; 865 sk1->af = pd->af; 866 sk1->rdomain = pd->rdomain; 867 if (rtableid >= 0) 868 wrdom = rtable_l2(rtableid); 869 870 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 871 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 872 pd->nsport != pd->osport || pd->ndport != pd->odport || 873 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 874 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 875 pool_put(&pf_state_key_pl, sk1); 876 return (ENOMEM); 877 } 878 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 879 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 880 pd->naf, 0); 881 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 882 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 883 if (afto) { 884 switch (pd->proto) { 885 case IPPROTO_ICMP: 886 sk2->proto = IPPROTO_ICMPV6; 887 break; 888 case IPPROTO_ICMPV6: 889 sk2->proto = IPPROTO_ICMP; 890 break; 891 default: 892 sk2->proto = pd->proto; 893 } 894 } else 895 sk2->proto = pd->proto; 896 sk2->af = pd->naf; 897 sk2->rdomain = wrdom; 898 } else 899 sk2 = sk1; 900 901 if (pd->dir == PF_IN) { 902 *skw = sk1; 903 *sks = sk2; 904 } else { 905 *sks = sk1; 906 *skw = sk2; 907 } 908 909 if (pf_status.debug >= LOG_DEBUG) { 910 log(LOG_DEBUG, "pf: key setup: "); 911 pf_print_state_parts(NULL, *skw, *sks); 912 addlog("\n"); 913 } 914 915 return (0); 916 } 917 918 int 919 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, 920 struct pf_state_key *sks, struct pf_state *s) 921 { 922 splsoftassert(IPL_SOFTNET); 923 924 s->kif = kif; 925 if (skw == sks) { 926 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) 927 return (-1); 928 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 929 } else { 930 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { 931 pool_put(&pf_state_key_pl, sks); 932 return (-1); 933 } 934 if (pf_state_key_attach(sks, s, PF_SK_STACK)) { 935 pf_state_key_detach(s, PF_SK_WIRE); 936 return (-1); 937 } 938 } 939 940 if (s->id == 0 && s->creatorid == 0) { 941 s->id = htobe64(pf_status.stateid++); 942 s->creatorid = pf_status.hostid; 943 } 944 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 945 if (pf_status.debug >= LOG_NOTICE) { 946 log(LOG_NOTICE, "pf: state insert failed: " 947 "id: %016llx creatorid: %08x", 948 betoh64(s->id), ntohl(s->creatorid)); 949 addlog("\n"); 950 } 951 pf_detach_state(s); 952 return (-1); 953 } 954 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 955 pf_status.fcounters[FCNT_STATE_INSERT]++; 956 pf_status.states++; 957 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 958 #if NPFSYNC > 0 959 pfsync_insert_state(s); 960 #endif 961 return (0); 962 } 963 964 struct pf_state * 965 pf_find_state_byid(struct pf_state_cmp *key) 966 { 967 pf_status.fcounters[FCNT_STATE_SEARCH]++; 968 969 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 970 } 971 972 int 973 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 974 struct pfi_kif *kif, u_int dir) 975 { 976 /* a (from hdr) and b (new) must be exact opposites of each other */ 977 if (a->af == b->af && a->proto == b->proto && 978 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 979 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 980 a->port[0] == b->port[1] && 981 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 982 return (0); 983 else { 984 /* mismatch. must not happen. */ 985 if (pf_status.debug >= LOG_ERR) { 986 log(LOG_ERR, 987 "pf: state key linking mismatch! dir=%s, " 988 "if=%s, stored af=%u, a0: ", 989 dir == PF_OUT ? "OUT" : "IN", 990 kif->pfik_name, a->af); 991 pf_print_host(&a->addr[0], a->port[0], a->af); 992 addlog(", a1: "); 993 pf_print_host(&a->addr[1], a->port[1], a->af); 994 addlog(", proto=%u", a->proto); 995 addlog(", found af=%u, a0: ", b->af); 996 pf_print_host(&b->addr[0], b->port[0], b->af); 997 addlog(", a1: "); 998 pf_print_host(&b->addr[1], b->port[1], b->af); 999 addlog(", proto=%u", b->proto); 1000 addlog("\n"); 1001 } 1002 return (-1); 1003 } 1004 } 1005 1006 struct pf_state * 1007 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, 1008 struct mbuf *m) 1009 { 1010 struct pf_state_key *sk; 1011 struct pf_state_item *si; 1012 1013 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1014 if (pf_status.debug >= LOG_DEBUG) { 1015 log(LOG_DEBUG, "pf: key search, if=%s: ", kif->pfik_name); 1016 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1017 addlog("\n"); 1018 } 1019 1020 if (dir == PF_OUT && m->m_pkthdr.pf.statekey && 1021 ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) 1022 sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; 1023 else { 1024 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1025 (struct pf_state_key *)key)) == NULL) 1026 return (NULL); 1027 if (dir == PF_OUT && m->m_pkthdr.pf.statekey && 1028 pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk, 1029 kif, dir) == 0) { 1030 ((struct pf_state_key *) 1031 m->m_pkthdr.pf.statekey)->reverse = sk; 1032 sk->reverse = m->m_pkthdr.pf.statekey; 1033 } 1034 } 1035 1036 if (dir == PF_OUT) 1037 m->m_pkthdr.pf.statekey = NULL; 1038 1039 /* list is sorted, if-bound states before floating ones */ 1040 TAILQ_FOREACH(si, &sk->states, entry) 1041 if ((si->s->kif == pfi_all || si->s->kif == kif) && 1042 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1043 && sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1044 si->s->key[PF_SK_STACK])) || 1045 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1046 && dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1047 sk == si->s->key[PF_SK_WIRE])))) 1048 return (si->s); 1049 1050 return (NULL); 1051 } 1052 1053 struct pf_state * 1054 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1055 { 1056 struct pf_state_key *sk; 1057 struct pf_state_item *si, *ret = NULL; 1058 1059 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1060 1061 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1062 1063 if (sk != NULL) { 1064 TAILQ_FOREACH(si, &sk->states, entry) 1065 if (dir == PF_INOUT || 1066 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1067 si->s->key[PF_SK_STACK]))) { 1068 if (more == NULL) 1069 return (si->s); 1070 1071 if (ret) 1072 (*more)++; 1073 else 1074 ret = si; 1075 } 1076 } 1077 return (ret ? ret->s : NULL); 1078 } 1079 1080 void 1081 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1082 { 1083 bzero(sp, sizeof(struct pfsync_state)); 1084 1085 /* copy from state key */ 1086 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1087 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1088 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1089 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1090 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1091 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1092 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1093 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1094 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1095 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1096 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1097 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1098 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1099 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1100 sp->proto = st->key[PF_SK_WIRE]->proto; 1101 sp->af = st->key[PF_SK_WIRE]->af; 1102 1103 /* copy from state */ 1104 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1105 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 1106 sp->creation = htonl(time_uptime - st->creation); 1107 sp->expire = pf_state_expires(st); 1108 if (sp->expire <= time_second) 1109 sp->expire = htonl(0); 1110 else 1111 sp->expire = htonl(sp->expire - time_second); 1112 1113 sp->direction = st->direction; 1114 sp->log = st->log; 1115 sp->timeout = st->timeout; 1116 /* XXX replace state_flags post 5.0 */ 1117 sp->state_flags = st->state_flags; 1118 sp->all_state_flags = htons(st->state_flags); 1119 if (!SLIST_EMPTY(&st->src_nodes)) 1120 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1121 1122 sp->id = st->id; 1123 sp->creatorid = st->creatorid; 1124 pf_state_peer_hton(&st->src, &sp->src); 1125 pf_state_peer_hton(&st->dst, &sp->dst); 1126 1127 if (st->rule.ptr == NULL) 1128 sp->rule = htonl(-1); 1129 else 1130 sp->rule = htonl(st->rule.ptr->nr); 1131 if (st->anchor.ptr == NULL) 1132 sp->anchor = htonl(-1); 1133 else 1134 sp->anchor = htonl(st->anchor.ptr->nr); 1135 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1136 1137 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1138 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1139 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1140 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1141 1142 sp->max_mss = htons(st->max_mss); 1143 sp->min_ttl = st->min_ttl; 1144 sp->set_tos = st->set_tos; 1145 } 1146 1147 /* END state table stuff */ 1148 1149 void 1150 pf_purge_thread(void *v) 1151 { 1152 int nloops = 0, s; 1153 1154 for (;;) { 1155 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); 1156 1157 s = splsoftnet(); 1158 1159 /* process a fraction of the state table every second */ 1160 pf_purge_expired_states(1 + (pf_status.states 1161 / pf_default_rule.timeout[PFTM_INTERVAL])); 1162 1163 /* purge other expired types every PFTM_INTERVAL seconds */ 1164 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1165 pf_purge_expired_fragments(); 1166 pf_purge_expired_src_nodes(0); 1167 nloops = 0; 1168 } 1169 1170 splx(s); 1171 } 1172 } 1173 1174 u_int32_t 1175 pf_state_expires(const struct pf_state *state) 1176 { 1177 u_int32_t timeout; 1178 u_int32_t start; 1179 u_int32_t end; 1180 u_int32_t states; 1181 1182 /* handle all PFTM_* > PFTM_MAX here */ 1183 if (state->timeout == PFTM_PURGE) 1184 return (time_second); 1185 KASSERT(state->timeout != PFTM_UNLINKED); 1186 KASSERT(state->timeout < PFTM_MAX); 1187 timeout = state->rule.ptr->timeout[state->timeout]; 1188 if (!timeout) 1189 timeout = pf_default_rule.timeout[state->timeout]; 1190 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1191 if (start) { 1192 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1193 states = state->rule.ptr->states_cur; 1194 } else { 1195 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1196 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1197 states = pf_status.states; 1198 } 1199 if (end && states > start && start < end) { 1200 if (states < end) 1201 return (state->expire + timeout * (end - states) / 1202 (end - start)); 1203 else 1204 return (time_second); 1205 } 1206 return (state->expire + timeout); 1207 } 1208 1209 void 1210 pf_purge_expired_src_nodes(int waslocked) 1211 { 1212 struct pf_src_node *cur, *next; 1213 int locked = waslocked; 1214 1215 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1216 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1217 1218 if (cur->states <= 0 && cur->expire <= time_second) { 1219 if (! locked) { 1220 rw_enter_write(&pf_consistency_lock); 1221 next = RB_NEXT(pf_src_tree, 1222 &tree_src_tracking, cur); 1223 locked = 1; 1224 } 1225 pf_remove_src_node(cur); 1226 } 1227 } 1228 1229 if (locked && !waslocked) 1230 rw_exit_write(&pf_consistency_lock); 1231 } 1232 1233 void 1234 pf_src_tree_remove_state(struct pf_state *s) 1235 { 1236 u_int32_t timeout; 1237 struct pf_sn_item *sni; 1238 1239 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1240 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1241 if (s->src.tcp_est) 1242 --sni->sn->conn; 1243 if (--sni->sn->states <= 0) { 1244 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1245 if (!timeout) 1246 timeout = 1247 pf_default_rule.timeout[PFTM_SRC_NODE]; 1248 sni->sn->expire = time_second + timeout; 1249 } 1250 pool_put(&pf_sn_item_pl, sni); 1251 } 1252 } 1253 1254 /* callers should be at splsoftnet */ 1255 void 1256 pf_unlink_state(struct pf_state *cur) 1257 { 1258 splsoftassert(IPL_SOFTNET); 1259 1260 /* handle load balancing related tasks */ 1261 pf_postprocess_addr(cur); 1262 1263 if (cur->src.state == PF_TCPS_PROXY_DST) { 1264 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1265 &cur->key[PF_SK_WIRE]->addr[1], 1266 &cur->key[PF_SK_WIRE]->addr[0], 1267 cur->key[PF_SK_WIRE]->port[1], 1268 cur->key[PF_SK_WIRE]->port[0], 1269 cur->src.seqhi, cur->src.seqlo + 1, 1270 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1271 cur->key[PF_SK_WIRE]->rdomain, NULL, NULL); 1272 } 1273 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1274 #if NPFLOW > 0 1275 if (cur->state_flags & PFSTATE_PFLOW) 1276 export_pflow(cur); 1277 #endif 1278 #if NPFSYNC > 0 1279 pfsync_delete_state(cur); 1280 #endif 1281 cur->timeout = PFTM_UNLINKED; 1282 pf_src_tree_remove_state(cur); 1283 pf_detach_state(cur); 1284 } 1285 1286 /* callers should be at splsoftnet and hold the 1287 * write_lock on pf_consistency_lock */ 1288 void 1289 pf_free_state(struct pf_state *cur) 1290 { 1291 struct pf_rule_item *ri; 1292 1293 splsoftassert(IPL_SOFTNET); 1294 1295 #if NPFSYNC > 0 1296 if (pfsync_state_in_use(cur)) 1297 return; 1298 #endif 1299 KASSERT(cur->timeout == PFTM_UNLINKED); 1300 if (--cur->rule.ptr->states_cur <= 0 && 1301 cur->rule.ptr->src_nodes <= 0) 1302 pf_rm_rule(NULL, cur->rule.ptr); 1303 if (cur->anchor.ptr != NULL) 1304 if (--cur->anchor.ptr->states_cur <= 0) 1305 pf_rm_rule(NULL, cur->anchor.ptr); 1306 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1307 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1308 if (--ri->r->states_cur <= 0 && 1309 ri->r->src_nodes <= 0) 1310 pf_rm_rule(NULL, ri->r); 1311 pool_put(&pf_rule_item_pl, ri); 1312 } 1313 pf_normalize_tcp_cleanup(cur); 1314 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1315 TAILQ_REMOVE(&state_list, cur, entry_list); 1316 if (cur->tag) 1317 pf_tag_unref(cur->tag); 1318 pool_put(&pf_state_pl, cur); 1319 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1320 pf_status.states--; 1321 } 1322 1323 void 1324 pf_purge_expired_states(u_int32_t maxcheck) 1325 { 1326 static struct pf_state *cur = NULL; 1327 struct pf_state *next; 1328 int locked = 0; 1329 1330 while (maxcheck--) { 1331 /* wrap to start of list when we hit the end */ 1332 if (cur == NULL) { 1333 cur = TAILQ_FIRST(&state_list); 1334 if (cur == NULL) 1335 break; /* list empty */ 1336 } 1337 1338 /* get next state, as cur may get deleted */ 1339 next = TAILQ_NEXT(cur, entry_list); 1340 1341 if (cur->timeout == PFTM_UNLINKED) { 1342 /* free unlinked state */ 1343 if (! locked) { 1344 rw_enter_write(&pf_consistency_lock); 1345 locked = 1; 1346 } 1347 pf_free_state(cur); 1348 } else if (pf_state_expires(cur) <= time_second) { 1349 /* unlink and free expired state */ 1350 pf_unlink_state(cur); 1351 if (! locked) { 1352 rw_enter_write(&pf_consistency_lock); 1353 locked = 1; 1354 } 1355 pf_free_state(cur); 1356 } 1357 cur = next; 1358 } 1359 1360 if (locked) 1361 rw_exit_write(&pf_consistency_lock); 1362 } 1363 1364 int 1365 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1366 { 1367 if (aw->type != PF_ADDR_TABLE) 1368 return (0); 1369 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1370 return (1); 1371 return (0); 1372 } 1373 1374 void 1375 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1376 { 1377 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1378 return; 1379 pfr_detach_table(aw->p.tbl); 1380 aw->p.tbl = NULL; 1381 } 1382 1383 void 1384 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1385 { 1386 struct pfr_ktable *kt = aw->p.tbl; 1387 1388 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1389 return; 1390 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1391 kt = kt->pfrkt_root; 1392 aw->p.tbl = NULL; 1393 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1394 kt->pfrkt_cnt : -1; 1395 } 1396 1397 void 1398 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1399 { 1400 switch (af) { 1401 #ifdef INET 1402 case AF_INET: { 1403 u_int32_t a = ntohl(addr->addr32[0]); 1404 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1405 (a>>8)&255, a&255); 1406 if (p) { 1407 p = ntohs(p); 1408 addlog(":%u", p); 1409 } 1410 break; 1411 } 1412 #endif /* INET */ 1413 #ifdef INET6 1414 case AF_INET6: { 1415 u_int16_t b; 1416 u_int8_t i, curstart, curend, maxstart, maxend; 1417 curstart = curend = maxstart = maxend = 255; 1418 for (i = 0; i < 8; i++) { 1419 if (!addr->addr16[i]) { 1420 if (curstart == 255) 1421 curstart = i; 1422 curend = i; 1423 } else { 1424 if ((curend - curstart) > 1425 (maxend - maxstart)) { 1426 maxstart = curstart; 1427 maxend = curend; 1428 } 1429 curstart = curend = 255; 1430 } 1431 } 1432 if ((curend - curstart) > 1433 (maxend - maxstart)) { 1434 maxstart = curstart; 1435 maxend = curend; 1436 } 1437 for (i = 0; i < 8; i++) { 1438 if (i >= maxstart && i <= maxend) { 1439 if (i == 0) 1440 addlog(":"); 1441 if (i == maxend) 1442 addlog(":"); 1443 } else { 1444 b = ntohs(addr->addr16[i]); 1445 addlog("%x", b); 1446 if (i < 7) 1447 addlog(":"); 1448 } 1449 } 1450 if (p) { 1451 p = ntohs(p); 1452 addlog("[%u]", p); 1453 } 1454 break; 1455 } 1456 #endif /* INET6 */ 1457 } 1458 } 1459 1460 void 1461 pf_print_state(struct pf_state *s) 1462 { 1463 pf_print_state_parts(s, NULL, NULL); 1464 } 1465 1466 void 1467 pf_print_state_parts(struct pf_state *s, 1468 struct pf_state_key *skwp, struct pf_state_key *sksp) 1469 { 1470 struct pf_state_key *skw, *sks; 1471 u_int8_t proto, dir; 1472 1473 /* Do our best to fill these, but they're skipped if NULL */ 1474 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1475 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1476 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1477 dir = s ? s->direction : 0; 1478 1479 switch (proto) { 1480 case IPPROTO_IPV4: 1481 addlog("IPv4"); 1482 break; 1483 case IPPROTO_IPV6: 1484 addlog("IPv6"); 1485 break; 1486 case IPPROTO_TCP: 1487 addlog("TCP"); 1488 break; 1489 case IPPROTO_UDP: 1490 addlog("UDP"); 1491 break; 1492 case IPPROTO_ICMP: 1493 addlog("ICMP"); 1494 break; 1495 case IPPROTO_ICMPV6: 1496 addlog("ICMPv6"); 1497 break; 1498 default: 1499 addlog("%u", proto); 1500 break; 1501 } 1502 switch (dir) { 1503 case PF_IN: 1504 addlog(" in"); 1505 break; 1506 case PF_OUT: 1507 addlog(" out"); 1508 break; 1509 } 1510 if (skw) { 1511 addlog(" wire: (%d) ", skw->rdomain); 1512 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1513 addlog(" "); 1514 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1515 } 1516 if (sks) { 1517 addlog(" stack: (%d) ", sks->rdomain); 1518 if (sks != skw) { 1519 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1520 addlog(" "); 1521 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1522 } else 1523 addlog("-"); 1524 } 1525 if (s) { 1526 if (proto == IPPROTO_TCP) { 1527 addlog(" [lo=%u high=%u win=%u modulator=%u", 1528 s->src.seqlo, s->src.seqhi, 1529 s->src.max_win, s->src.seqdiff); 1530 if (s->src.wscale && s->dst.wscale) 1531 addlog(" wscale=%u", 1532 s->src.wscale & PF_WSCALE_MASK); 1533 addlog("]"); 1534 addlog(" [lo=%u high=%u win=%u modulator=%u", 1535 s->dst.seqlo, s->dst.seqhi, 1536 s->dst.max_win, s->dst.seqdiff); 1537 if (s->src.wscale && s->dst.wscale) 1538 addlog(" wscale=%u", 1539 s->dst.wscale & PF_WSCALE_MASK); 1540 addlog("]"); 1541 } 1542 addlog(" %u:%u", s->src.state, s->dst.state); 1543 if (s->rule.ptr) 1544 addlog(" @%d", s->rule.ptr->nr); 1545 } 1546 } 1547 1548 void 1549 pf_print_flags(u_int8_t f) 1550 { 1551 if (f) 1552 addlog(" "); 1553 if (f & TH_FIN) 1554 addlog("F"); 1555 if (f & TH_SYN) 1556 addlog("S"); 1557 if (f & TH_RST) 1558 addlog("R"); 1559 if (f & TH_PUSH) 1560 addlog("P"); 1561 if (f & TH_ACK) 1562 addlog("A"); 1563 if (f & TH_URG) 1564 addlog("U"); 1565 if (f & TH_ECE) 1566 addlog("E"); 1567 if (f & TH_CWR) 1568 addlog("W"); 1569 } 1570 1571 #define PF_SET_SKIP_STEPS(i) \ 1572 do { \ 1573 while (head[i] != cur) { \ 1574 head[i]->skip[i].ptr = cur; \ 1575 head[i] = TAILQ_NEXT(head[i], entries); \ 1576 } \ 1577 } while (0) 1578 1579 void 1580 pf_calc_skip_steps(struct pf_rulequeue *rules) 1581 { 1582 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1583 int i; 1584 1585 cur = TAILQ_FIRST(rules); 1586 prev = cur; 1587 for (i = 0; i < PF_SKIP_COUNT; ++i) 1588 head[i] = cur; 1589 while (cur != NULL) { 1590 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1591 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1592 if (cur->direction != prev->direction) 1593 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1594 if (cur->onrdomain != prev->onrdomain || 1595 cur->ifnot != prev->ifnot) 1596 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1597 if (cur->af != prev->af) 1598 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1599 if (cur->proto != prev->proto) 1600 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1601 if (cur->src.neg != prev->src.neg || 1602 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1603 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1604 if (cur->dst.neg != prev->dst.neg || 1605 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1606 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1607 if (cur->src.port[0] != prev->src.port[0] || 1608 cur->src.port[1] != prev->src.port[1] || 1609 cur->src.port_op != prev->src.port_op) 1610 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1611 if (cur->dst.port[0] != prev->dst.port[0] || 1612 cur->dst.port[1] != prev->dst.port[1] || 1613 cur->dst.port_op != prev->dst.port_op) 1614 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1615 1616 prev = cur; 1617 cur = TAILQ_NEXT(cur, entries); 1618 } 1619 for (i = 0; i < PF_SKIP_COUNT; ++i) 1620 PF_SET_SKIP_STEPS(i); 1621 } 1622 1623 int 1624 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1625 { 1626 if (aw1->type != aw2->type) 1627 return (1); 1628 switch (aw1->type) { 1629 case PF_ADDR_ADDRMASK: 1630 case PF_ADDR_RANGE: 1631 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1632 return (1); 1633 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1634 return (1); 1635 return (0); 1636 case PF_ADDR_DYNIFTL: 1637 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1638 case PF_ADDR_NONE: 1639 case PF_ADDR_NOROUTE: 1640 case PF_ADDR_URPFFAILED: 1641 return (0); 1642 case PF_ADDR_TABLE: 1643 return (aw1->p.tbl != aw2->p.tbl); 1644 case PF_ADDR_RTLABEL: 1645 return (aw1->v.rtlabel != aw2->v.rtlabel); 1646 default: 1647 addlog("invalid address type: %d\n", aw1->type); 1648 return (1); 1649 } 1650 } 1651 1652 u_int16_t 1653 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) 1654 { 1655 u_int32_t l; 1656 1657 if (udp && !cksum) 1658 return (0x0000); 1659 l = cksum + old - new; 1660 l = (l >> 16) + (l & 65535); 1661 l = l & 65535; 1662 if (udp && !l) 1663 return (0xFFFF); 1664 return (l); 1665 } 1666 1667 void 1668 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *pc, 1669 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af, 1670 sa_family_t naf) 1671 { 1672 struct pf_addr ao; 1673 u_int16_t po = *p; 1674 1675 PF_ACPY(&ao, a, af); 1676 if (af == naf) 1677 PF_ACPY(a, an, naf); 1678 1679 *p = pn; 1680 1681 switch (af) { 1682 #ifdef INET 1683 case AF_INET: 1684 switch (naf) { 1685 case AF_INET: 1686 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1687 ao.addr16[0], an->addr16[0], u), 1688 ao.addr16[1], an->addr16[1], u), 1689 po, pn, u); 1690 break; 1691 #ifdef INET6 1692 case AF_INET6: 1693 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1694 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1695 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1696 ao.addr16[0], an->addr16[0], u), 1697 ao.addr16[1], an->addr16[1], u), 1698 0, an->addr16[2], u), 1699 0, an->addr16[3], u), 1700 0, an->addr16[4], u), 1701 0, an->addr16[5], u), 1702 0, an->addr16[6], u), 1703 0, an->addr16[7], u), 1704 po, pn, u); 1705 break; 1706 #endif /* INET6 */ 1707 } 1708 break; 1709 #endif /* INET */ 1710 #ifdef INET6 1711 case AF_INET6: 1712 switch (naf) { 1713 #ifdef INET 1714 case AF_INET: 1715 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1716 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1717 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1718 ao.addr16[0], an->addr16[0], u), 1719 ao.addr16[1], an->addr16[1], u), 1720 ao.addr16[2], 0, u), 1721 ao.addr16[3], 0, u), 1722 ao.addr16[4], 0, u), 1723 ao.addr16[5], 0, u), 1724 ao.addr16[6], 0, u), 1725 ao.addr16[7], 0, u), 1726 po, pn, u); 1727 break; 1728 #endif /* INET */ 1729 case AF_INET6: 1730 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1731 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1732 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1733 ao.addr16[0], an->addr16[0], u), 1734 ao.addr16[1], an->addr16[1], u), 1735 ao.addr16[2], an->addr16[2], u), 1736 ao.addr16[3], an->addr16[3], u), 1737 ao.addr16[4], an->addr16[4], u), 1738 ao.addr16[5], an->addr16[5], u), 1739 ao.addr16[6], an->addr16[6], u), 1740 ao.addr16[7], an->addr16[7], u), 1741 po, pn, u); 1742 break; 1743 } 1744 break; 1745 #endif /* INET6 */ 1746 } 1747 } 1748 1749 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ 1750 void 1751 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) 1752 { 1753 u_int32_t ao; 1754 1755 memcpy(&ao, a, sizeof(ao)); 1756 memcpy(a, &an, sizeof(u_int32_t)); 1757 if (c != NULL) 1758 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, 1759 u), ao % 65536, an % 65536, u); 1760 } 1761 1762 #ifdef INET6 1763 void 1764 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) 1765 { 1766 struct pf_addr ao; 1767 1768 PF_ACPY(&ao, a, AF_INET6); 1769 PF_ACPY(a, an, AF_INET6); 1770 1771 if (c) 1772 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1773 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1774 pf_cksum_fixup(pf_cksum_fixup(*c, 1775 ao.addr16[0], an->addr16[0], u), 1776 ao.addr16[1], an->addr16[1], u), 1777 ao.addr16[2], an->addr16[2], u), 1778 ao.addr16[3], an->addr16[3], u), 1779 ao.addr16[4], an->addr16[4], u), 1780 ao.addr16[5], an->addr16[5], u), 1781 ao.addr16[6], an->addr16[6], u), 1782 ao.addr16[7], an->addr16[7], u); 1783 } 1784 #endif /* INET6 */ 1785 1786 int 1787 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, int *multi, 1788 u_int16_t *virtual_id, u_int16_t *virtual_type) 1789 { 1790 /* 1791 * ICMP types marked with PF_OUT are typically responses to 1792 * PF_IN, and will match states in the opposite direction. 1793 * PF_IN ICMP types need to match a state with that type. 1794 */ 1795 *icmp_dir = PF_OUT; 1796 *multi = PF_ICMP_MULTI_LINK; 1797 1798 /* Queries (and responses) */ 1799 switch (pd->af) { 1800 #ifdef INET 1801 case AF_INET: 1802 switch (type) { 1803 case ICMP_ECHO: 1804 *icmp_dir = PF_IN; 1805 case ICMP_ECHOREPLY: 1806 *virtual_type = ICMP_ECHO; 1807 *virtual_id = pd->hdr.icmp->icmp_id; 1808 break; 1809 1810 case ICMP_TSTAMP: 1811 *icmp_dir = PF_IN; 1812 case ICMP_TSTAMPREPLY: 1813 *virtual_type = ICMP_TSTAMP; 1814 *virtual_id = pd->hdr.icmp->icmp_id; 1815 break; 1816 1817 case ICMP_IREQ: 1818 *icmp_dir = PF_IN; 1819 case ICMP_IREQREPLY: 1820 *virtual_type = ICMP_IREQ; 1821 *virtual_id = pd->hdr.icmp->icmp_id; 1822 break; 1823 1824 case ICMP_MASKREQ: 1825 *icmp_dir = PF_IN; 1826 case ICMP_MASKREPLY: 1827 *virtual_type = ICMP_MASKREQ; 1828 *virtual_id = pd->hdr.icmp->icmp_id; 1829 break; 1830 1831 case ICMP_IPV6_WHEREAREYOU: 1832 *icmp_dir = PF_IN; 1833 case ICMP_IPV6_IAMHERE: 1834 *virtual_type = ICMP_IPV6_WHEREAREYOU; 1835 *virtual_id = 0; /* Nothing sane to match on! */ 1836 break; 1837 1838 case ICMP_MOBILE_REGREQUEST: 1839 *icmp_dir = PF_IN; 1840 case ICMP_MOBILE_REGREPLY: 1841 *virtual_type = ICMP_MOBILE_REGREQUEST; 1842 *virtual_id = 0; /* Nothing sane to match on! */ 1843 break; 1844 1845 case ICMP_ROUTERSOLICIT: 1846 *icmp_dir = PF_IN; 1847 case ICMP_ROUTERADVERT: 1848 *virtual_type = ICMP_ROUTERSOLICIT; 1849 *virtual_id = 0; /* Nothing sane to match on! */ 1850 break; 1851 1852 /* These ICMP types map to other connections */ 1853 case ICMP_UNREACH: 1854 case ICMP_SOURCEQUENCH: 1855 case ICMP_REDIRECT: 1856 case ICMP_TIMXCEED: 1857 case ICMP_PARAMPROB: 1858 /* These will not be used, but set them anyway */ 1859 *icmp_dir = PF_IN; 1860 *virtual_type = type; 1861 *virtual_id = 0; 1862 HTONS(*virtual_type); 1863 return (1); /* These types match to another state */ 1864 1865 /* 1866 * All remaining ICMP types get their own states, 1867 * and will only match in one direction. 1868 */ 1869 default: 1870 *icmp_dir = PF_IN; 1871 *virtual_type = type; 1872 *virtual_id = 0; 1873 break; 1874 } 1875 break; 1876 #endif /* INET */ 1877 #ifdef INET6 1878 case AF_INET6: 1879 switch (type) { 1880 case ICMP6_ECHO_REQUEST: 1881 *icmp_dir = PF_IN; 1882 case ICMP6_ECHO_REPLY: 1883 *virtual_type = ICMP6_ECHO_REQUEST; 1884 *virtual_id = pd->hdr.icmp6->icmp6_id; 1885 break; 1886 1887 case MLD_LISTENER_QUERY: 1888 *icmp_dir = PF_IN; 1889 case MLD_LISTENER_REPORT: { 1890 struct mld_hdr *mld = (void *)pd->hdr.icmp6; 1891 u_int32_t h; 1892 1893 *virtual_type = MLD_LISTENER_QUERY; 1894 /* generate fake id for these messages */ 1895 h = mld->mld_addr.s6_addr32[0] ^ 1896 mld->mld_addr.s6_addr32[1] ^ 1897 mld->mld_addr.s6_addr32[2] ^ 1898 mld->mld_addr.s6_addr32[3]; 1899 *virtual_id = (h >> 16) ^ (h & 0xffff); 1900 break; 1901 } 1902 1903 /* 1904 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 1905 * ICMP6_WRU 1906 */ 1907 case ICMP6_WRUREQUEST: 1908 *icmp_dir = PF_IN; 1909 case ICMP6_WRUREPLY: 1910 *virtual_type = ICMP6_WRUREQUEST; 1911 *virtual_id = 0; /* Nothing sane to match on! */ 1912 break; 1913 1914 case MLD_MTRACE: 1915 *icmp_dir = PF_IN; 1916 case MLD_MTRACE_RESP: 1917 *virtual_type = MLD_MTRACE; 1918 *virtual_id = 0; /* Nothing sane to match on! */ 1919 break; 1920 1921 case ND_NEIGHBOR_SOLICIT: 1922 *icmp_dir = PF_IN; 1923 case ND_NEIGHBOR_ADVERT: { 1924 struct nd_neighbor_solicit *nd = (void *)pd->hdr.icmp6; 1925 u_int32_t h; 1926 1927 *virtual_type = ND_NEIGHBOR_SOLICIT; 1928 /* generate fake id for these messages */ 1929 h = nd->nd_ns_target.s6_addr32[0] ^ 1930 nd->nd_ns_target.s6_addr32[1] ^ 1931 nd->nd_ns_target.s6_addr32[2] ^ 1932 nd->nd_ns_target.s6_addr32[3]; 1933 *virtual_id = (h >> 16) ^ (h & 0xffff); 1934 break; 1935 } 1936 1937 /* 1938 * These ICMP types map to other connections. 1939 * ND_REDIRECT can't be in this list because the triggering 1940 * packet header is optional. 1941 */ 1942 case ICMP6_DST_UNREACH: 1943 case ICMP6_PACKET_TOO_BIG: 1944 case ICMP6_TIME_EXCEEDED: 1945 case ICMP6_PARAM_PROB: 1946 /* These will not be used, but set them anyway */ 1947 *icmp_dir = PF_IN; 1948 *virtual_type = type; 1949 *virtual_id = 0; 1950 HTONS(*virtual_type); 1951 return (1); /* These types match to another state */ 1952 /* 1953 * All remaining ICMP6 types get their own states, 1954 * and will only match in one direction. 1955 */ 1956 default: 1957 *icmp_dir = PF_IN; 1958 *virtual_type = type; 1959 *virtual_id = 0; 1960 break; 1961 } 1962 break; 1963 #endif /* INET6 */ 1964 } 1965 HTONS(*virtual_type); 1966 return (0); /* These types match to their own state */ 1967 } 1968 1969 void 1970 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, 1971 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, 1972 u_int16_t *ic, u_int8_t u, sa_family_t af) 1973 { 1974 struct pf_addr oia, ooa; 1975 1976 PF_ACPY(&oia, ia, af); 1977 if (oa) 1978 PF_ACPY(&ooa, oa, af); 1979 1980 /* Change inner protocol port, fix inner protocol checksum. */ 1981 if (ip != NULL) { 1982 u_int16_t oip = *ip; 1983 u_int32_t opc; 1984 1985 if (pc != NULL) 1986 opc = *pc; 1987 *ip = np; 1988 if (pc != NULL) 1989 *pc = pf_cksum_fixup(*pc, oip, *ip, u); 1990 *ic = pf_cksum_fixup(*ic, oip, *ip, 0); 1991 if (pc != NULL) 1992 *ic = pf_cksum_fixup(*ic, opc, *pc, 0); 1993 } 1994 /* Change inner ip address, fix inner ip and icmp checksums. */ 1995 PF_ACPY(ia, na, af); 1996 switch (af) { 1997 #ifdef INET 1998 case AF_INET: { 1999 u_int32_t oh2c = *h2c; 2000 2001 /* XXX just in_cksum() */ 2002 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, 2003 oia.addr16[0], ia->addr16[0], 0), 2004 oia.addr16[1], ia->addr16[1], 0); 2005 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 2006 oia.addr16[0], ia->addr16[0], 0), 2007 oia.addr16[1], ia->addr16[1], 0); 2008 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); 2009 break; 2010 } 2011 #endif /* INET */ 2012 #ifdef INET6 2013 case AF_INET6: 2014 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2015 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2016 pf_cksum_fixup(pf_cksum_fixup(*ic, 2017 oia.addr16[0], ia->addr16[0], u), 2018 oia.addr16[1], ia->addr16[1], u), 2019 oia.addr16[2], ia->addr16[2], u), 2020 oia.addr16[3], ia->addr16[3], u), 2021 oia.addr16[4], ia->addr16[4], u), 2022 oia.addr16[5], ia->addr16[5], u), 2023 oia.addr16[6], ia->addr16[6], u), 2024 oia.addr16[7], ia->addr16[7], u); 2025 break; 2026 #endif /* INET6 */ 2027 } 2028 /* Outer ip address, fix outer icmpv6 checksum, if necessary. */ 2029 if (oa) { 2030 PF_ACPY(oa, na, af); 2031 #ifdef INET6 2032 if (af == AF_INET6) 2033 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2034 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2035 pf_cksum_fixup(pf_cksum_fixup(*ic, 2036 ooa.addr16[0], oa->addr16[0], u), 2037 ooa.addr16[1], oa->addr16[1], u), 2038 ooa.addr16[2], oa->addr16[2], u), 2039 ooa.addr16[3], oa->addr16[3], u), 2040 ooa.addr16[4], oa->addr16[4], u), 2041 ooa.addr16[5], oa->addr16[5], u), 2042 ooa.addr16[6], oa->addr16[6], u), 2043 ooa.addr16[7], oa->addr16[7], u); 2044 #endif /* INET6 */ 2045 } 2046 } 2047 2048 #if INET && INET6 2049 int 2050 pf_translate_af(struct pf_pdesc *pd) 2051 { 2052 struct mbuf *mp; 2053 struct ip *ip4; 2054 struct ip6_hdr *ip6; 2055 struct icmp6_hdr *icmp; 2056 int hlen; 2057 2058 hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2059 2060 /* trim the old header */ 2061 m_adj(pd->m, pd->off); 2062 2063 /* prepend a new one */ 2064 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) 2065 return (-1); 2066 2067 switch (pd->naf) { 2068 case AF_INET: 2069 ip4 = mtod(pd->m, struct ip *); 2070 bzero(ip4, hlen); 2071 ip4->ip_v = IPVERSION; 2072 ip4->ip_hl = hlen >> 2; 2073 ip4->ip_len = htons(hlen + (pd->tot_len - pd->off)); 2074 ip4->ip_id = htons(ip_randomid()); 2075 ip4->ip_off = htons(IP_DF); 2076 ip4->ip_ttl = pd->ttl; 2077 ip4->ip_p = pd->proto; 2078 ip4->ip_src = pd->nsaddr.v4; 2079 ip4->ip_dst = pd->ndaddr.v4; 2080 break; 2081 case AF_INET6: 2082 ip6 = mtod(pd->m, struct ip6_hdr *); 2083 bzero(ip6, hlen); 2084 ip6->ip6_vfc = IPV6_VERSION; 2085 ip6->ip6_plen = htons(pd->tot_len - pd->off); 2086 ip6->ip6_nxt = pd->proto; 2087 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2088 ip6->ip6_hlim = IPV6_DEFHLIM; 2089 else 2090 ip6->ip6_hlim = pd->ttl; 2091 ip6->ip6_src = pd->nsaddr.v6; 2092 ip6->ip6_dst = pd->ndaddr.v6; 2093 break; 2094 default: 2095 return (-1); 2096 } 2097 2098 /* recalculate icmp/icmp6 checksums */ 2099 if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) { 2100 int off; 2101 if ((mp = m_pulldown(pd->m, hlen, sizeof(*icmp), &off)) == 2102 NULL) { 2103 pd->m = NULL; 2104 return (-1); 2105 } 2106 icmp = (struct icmp6_hdr *)(mp->m_data + off); 2107 icmp->icmp6_cksum = 0; 2108 icmp->icmp6_cksum = pd->naf == AF_INET ? 2109 in4_cksum(pd->m, 0, hlen, ntohs(ip4->ip_len) - hlen) : 2110 in6_cksum(pd->m, IPPROTO_ICMPV6, hlen, 2111 ntohs(ip6->ip6_plen)); 2112 } 2113 2114 return (0); 2115 } 2116 2117 int 2118 pf_change_icmp_af(struct mbuf *m, int off, struct pf_pdesc *pd, 2119 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2120 sa_family_t af, sa_family_t naf) 2121 { 2122 struct mbuf *n = NULL; 2123 struct ip *ip4; 2124 struct ip6_hdr *ip6; 2125 int hlen, olen, mlen; 2126 2127 if (af == naf || (af != AF_INET && af != AF_INET6) || 2128 (naf != AF_INET && naf != AF_INET6)) 2129 return (-1); 2130 2131 /* split the mbuf chain on the inner ip/ip6 header boundary */ 2132 if ((n = m_split(m, off, M_DONTWAIT)) == NULL) 2133 return (-1); 2134 2135 /* old header */ 2136 olen = pd2->off - off; 2137 /* new header */ 2138 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2139 2140 /* trim old header */ 2141 m_adj(n, olen); 2142 2143 /* prepend a new one */ 2144 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2145 return (-1); 2146 2147 /* translate inner ip/ip6 header */ 2148 switch (naf) { 2149 case AF_INET: 2150 ip4 = mtod(n, struct ip *); 2151 bzero(ip4, sizeof(*ip4)); 2152 ip4->ip_v = IPVERSION; 2153 ip4->ip_hl = sizeof(*ip4) >> 2; 2154 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen); 2155 ip4->ip_id = htons(ip_randomid()); 2156 ip4->ip_off = htons(IP_DF); 2157 ip4->ip_ttl = pd2->ttl; 2158 if (pd2->proto == IPPROTO_ICMPV6) 2159 ip4->ip_p = IPPROTO_ICMP; 2160 else 2161 ip4->ip_p = pd2->proto; 2162 ip4->ip_src = src->v4; 2163 ip4->ip_dst = dst->v4; 2164 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2165 break; 2166 case AF_INET6: 2167 ip6 = mtod(n, struct ip6_hdr *); 2168 bzero(ip6, sizeof(*ip6)); 2169 ip6->ip6_vfc = IPV6_VERSION; 2170 ip6->ip6_plen = htons(pd2->tot_len - olen); 2171 if (pd2->proto == IPPROTO_ICMP) 2172 ip6->ip6_nxt = IPPROTO_ICMPV6; 2173 else 2174 ip6->ip6_nxt = pd2->proto; 2175 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2176 ip6->ip6_hlim = IPV6_DEFHLIM; 2177 else 2178 ip6->ip6_hlim = pd2->ttl; 2179 ip6->ip6_src = src->v6; 2180 ip6->ip6_dst = dst->v6; 2181 break; 2182 } 2183 2184 /* adjust payload offset and total packet length */ 2185 pd2->off += hlen - olen; 2186 pd->tot_len += hlen - olen; 2187 2188 /* merge modified inner packet with the original header */ 2189 mlen = n->m_pkthdr.len; 2190 m_cat(m, n); 2191 m->m_pkthdr.len += mlen; 2192 2193 return (0); 2194 } 2195 2196 2197 #define PTR_IP(field) (offsetof(struct ip, field)) 2198 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2199 2200 int 2201 pf_translate_icmp_af(int af, void *arg) 2202 { 2203 struct icmp *icmp4; 2204 struct icmp6_hdr *icmp6; 2205 u_int32_t mtu; 2206 int32_t ptr = -1; 2207 u_int8_t type; 2208 u_int8_t code; 2209 2210 switch (af) { 2211 case AF_INET: 2212 icmp6 = arg; 2213 type = icmp6->icmp6_type; 2214 code = icmp6->icmp6_code; 2215 mtu = ntohl(icmp6->icmp6_mtu); 2216 2217 switch (type) { 2218 case ICMP6_ECHO_REQUEST: 2219 type = ICMP_ECHO; 2220 break; 2221 case ICMP6_ECHO_REPLY: 2222 type = ICMP_ECHOREPLY; 2223 break; 2224 case ICMP6_DST_UNREACH: 2225 type = ICMP_UNREACH; 2226 switch (code) { 2227 case ICMP6_DST_UNREACH_NOROUTE: 2228 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2229 case ICMP6_DST_UNREACH_ADDR: 2230 code = ICMP_UNREACH_HOST; 2231 break; 2232 case ICMP6_DST_UNREACH_ADMIN: 2233 code = ICMP_UNREACH_HOST_PROHIB; 2234 break; 2235 case ICMP6_DST_UNREACH_NOPORT: 2236 code = ICMP_UNREACH_PORT; 2237 break; 2238 default: 2239 return (-1); 2240 } 2241 break; 2242 case ICMP6_PACKET_TOO_BIG: 2243 type = ICMP_UNREACH; 2244 code = ICMP_UNREACH_NEEDFRAG; 2245 mtu -= 20; 2246 break; 2247 case ICMP6_TIME_EXCEEDED: 2248 type = ICMP_TIMXCEED; 2249 break; 2250 case ICMP6_PARAM_PROB: 2251 switch (code) { 2252 case ICMP6_PARAMPROB_HEADER: 2253 type = ICMP_PARAMPROB; 2254 code = ICMP_PARAMPROB_ERRATPTR; 2255 ptr = ntohl(icmp6->icmp6_pptr); 2256 2257 if (ptr == PTR_IP6(ip6_vfc)) 2258 ; /* preserve */ 2259 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2260 ptr = PTR_IP(ip_tos); 2261 else if (ptr == PTR_IP6(ip6_plen) || 2262 ptr == PTR_IP6(ip6_plen) + 1) 2263 ptr = PTR_IP(ip_len); 2264 else if (ptr == PTR_IP6(ip6_nxt)) 2265 ptr = PTR_IP(ip_p); 2266 else if (ptr == PTR_IP6(ip6_hlim)) 2267 ptr = PTR_IP(ip_ttl); 2268 else if (ptr >= PTR_IP6(ip6_src) && 2269 ptr < PTR_IP6(ip6_dst)) 2270 ptr = PTR_IP(ip_src); 2271 else if (ptr >= PTR_IP6(ip6_dst) && 2272 ptr < sizeof(struct ip6_hdr)) 2273 ptr = PTR_IP(ip_dst); 2274 else { 2275 return (-1); 2276 } 2277 break; 2278 case ICMP6_PARAMPROB_NEXTHEADER: 2279 type = ICMP_UNREACH; 2280 code = ICMP_UNREACH_PROTOCOL; 2281 break; 2282 default: 2283 return (-1); 2284 } 2285 break; 2286 default: 2287 return (-1); 2288 } 2289 if (icmp6->icmp6_type != type) { 2290 icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum, 2291 icmp6->icmp6_type, type, 0); 2292 icmp6->icmp6_type = type; 2293 } 2294 if (icmp6->icmp6_code != code) { 2295 icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum, 2296 icmp6->icmp6_code, code, 0); 2297 icmp6->icmp6_code = code; 2298 } 2299 if (icmp6->icmp6_mtu != htonl(mtu)) { 2300 icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum, 2301 htons(ntohl(icmp6->icmp6_mtu)), htons(mtu), 0); 2302 /* aligns well with a icmpv4 nextmtu */ 2303 icmp6->icmp6_mtu = htonl(mtu); 2304 } 2305 if (ptr >= 0 && icmp6->icmp6_pptr != htonl(ptr)) { 2306 icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum, 2307 htons(ntohl(icmp6->icmp6_pptr)), htons(ptr), 0); 2308 /* icmpv4 pptr is a one most significant byte */ 2309 icmp6->icmp6_pptr = htonl(ptr << 24); 2310 } 2311 break; 2312 case AF_INET6: 2313 icmp4 = arg; 2314 type = icmp4->icmp_type; 2315 code = icmp4->icmp_code; 2316 mtu = ntohs(icmp4->icmp_nextmtu); 2317 2318 switch (type) { 2319 case ICMP_ECHO: 2320 type = ICMP6_ECHO_REQUEST; 2321 break; 2322 case ICMP_ECHOREPLY: 2323 type = ICMP6_ECHO_REPLY; 2324 break; 2325 case ICMP_UNREACH: 2326 type = ICMP6_DST_UNREACH; 2327 switch (code) { 2328 case ICMP_UNREACH_NET: 2329 case ICMP_UNREACH_HOST: 2330 case ICMP_UNREACH_NET_UNKNOWN: 2331 case ICMP_UNREACH_HOST_UNKNOWN: 2332 case ICMP_UNREACH_ISOLATED: 2333 case ICMP_UNREACH_TOSNET: 2334 case ICMP_UNREACH_TOSHOST: 2335 code = ICMP6_DST_UNREACH_NOROUTE; 2336 break; 2337 case ICMP_UNREACH_PORT: 2338 code = ICMP6_DST_UNREACH_NOPORT; 2339 break; 2340 case ICMP_UNREACH_NET_PROHIB: 2341 case ICMP_UNREACH_HOST_PROHIB: 2342 case ICMP_UNREACH_FILTER_PROHIB: 2343 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2344 code = ICMP6_DST_UNREACH_ADMIN; 2345 break; 2346 case ICMP_UNREACH_PROTOCOL: 2347 type = ICMP6_PARAM_PROB; 2348 code = ICMP6_PARAMPROB_NEXTHEADER; 2349 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2350 break; 2351 case ICMP_UNREACH_NEEDFRAG: 2352 type = ICMP6_PACKET_TOO_BIG; 2353 code = 0; 2354 mtu += 20; 2355 break; 2356 default: 2357 return (-1); 2358 } 2359 break; 2360 case ICMP_TIMXCEED: 2361 type = ICMP6_TIME_EXCEEDED; 2362 break; 2363 case ICMP_PARAMPROB: 2364 type = ICMP6_PARAM_PROB; 2365 switch (code) { 2366 case ICMP_PARAMPROB_ERRATPTR: 2367 code = ICMP6_PARAMPROB_HEADER; 2368 break; 2369 case ICMP_PARAMPROB_LENGTH: 2370 code = ICMP6_PARAMPROB_HEADER; 2371 break; 2372 default: 2373 return (-1); 2374 } 2375 2376 ptr = icmp4->icmp_pptr; 2377 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2378 ; /* preserve */ 2379 else if (ptr == PTR_IP(ip_len) || 2380 ptr == PTR_IP(ip_len) + 1) 2381 ptr = PTR_IP6(ip6_plen); 2382 else if (ptr == PTR_IP(ip_ttl)) 2383 ptr = PTR_IP6(ip6_hlim); 2384 else if (ptr == PTR_IP(ip_p)) 2385 ptr = PTR_IP6(ip6_nxt); 2386 else if (ptr >= PTR_IP(ip_src) && 2387 ptr < PTR_IP(ip_dst)) 2388 ptr = PTR_IP6(ip6_src); 2389 else if (ptr >= PTR_IP(ip_dst) && 2390 ptr < sizeof(struct ip)) 2391 ptr = PTR_IP6(ip6_dst); 2392 else { 2393 return (-1); 2394 } 2395 break; 2396 default: 2397 return (-1); 2398 } 2399 if (icmp4->icmp_type != type) { 2400 icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum, 2401 icmp4->icmp_type, type, 0); 2402 icmp4->icmp_type = type; 2403 } 2404 if (icmp4->icmp_code != code) { 2405 icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum, 2406 icmp4->icmp_code, code, 0); 2407 icmp4->icmp_code = code; 2408 } 2409 if (icmp4->icmp_nextmtu != htons(mtu)) { 2410 icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum, 2411 icmp4->icmp_nextmtu, htons(mtu), 0); 2412 icmp4->icmp_nextmtu = htons(mtu); 2413 } 2414 if (ptr >= 0 && icmp4->icmp_void != ptr) { 2415 icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum, 2416 htons(icmp4->icmp_pptr), htons(ptr), 0); 2417 icmp4->icmp_void = htonl(ptr); 2418 } 2419 break; 2420 } 2421 2422 return (0); 2423 } 2424 #endif /* INET && INET6 */ 2425 2426 /* 2427 * Need to modulate the sequence numbers in the TCP SACK option 2428 * (credits to Krzysztof Pfaff for report and patch) 2429 */ 2430 int 2431 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2432 { 2433 struct tcphdr *th = pd->hdr.tcp; 2434 int hlen = (th->th_off << 2) - sizeof(*th); 2435 int thoptlen = hlen; 2436 u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; 2437 int copyback = 0, i, olen; 2438 struct sackblk sack; 2439 2440 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) 2441 if (hlen < TCPOLEN_SACKLEN || !pf_pull_hdr(pd->m, pd->off + sizeof(*th), 2442 opts, hlen, NULL, NULL, pd->af)) 2443 return 0; 2444 2445 while (hlen >= TCPOLEN_SACKLEN) { 2446 olen = opt[1]; 2447 switch (*opt) { 2448 case TCPOPT_EOL: /* FALLTHROUGH */ 2449 case TCPOPT_NOP: 2450 opt++; 2451 hlen--; 2452 break; 2453 case TCPOPT_SACK: 2454 if (olen > hlen) 2455 olen = hlen; 2456 if (olen >= TCPOLEN_SACKLEN) { 2457 for (i = 2; i + TCPOLEN_SACK <= olen; 2458 i += TCPOLEN_SACK) { 2459 memcpy(&sack, &opt[i], sizeof(sack)); 2460 pf_change_a(&sack.start, &th->th_sum, 2461 htonl(ntohl(sack.start) - 2462 dst->seqdiff), 0); 2463 pf_change_a(&sack.end, &th->th_sum, 2464 htonl(ntohl(sack.end) - 2465 dst->seqdiff), 0); 2466 memcpy(&opt[i], &sack, sizeof(sack)); 2467 } 2468 copyback = 1; 2469 } 2470 /* FALLTHROUGH */ 2471 default: 2472 if (olen < 2) 2473 olen = 2; 2474 hlen -= olen; 2475 opt += olen; 2476 } 2477 } 2478 2479 if (copyback) 2480 m_copyback(pd->m, pd->off + sizeof(*th), thoptlen, opts, 2481 M_NOWAIT); 2482 return (copyback); 2483 } 2484 2485 void 2486 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2487 const struct pf_addr *saddr, const struct pf_addr *daddr, 2488 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2489 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2490 u_int16_t rtag, u_int rdom, struct ether_header *eh, struct ifnet *ifp) 2491 { 2492 struct mbuf *m; 2493 int len, tlen; 2494 #ifdef INET 2495 struct ip *h; 2496 #endif /* INET */ 2497 #ifdef INET6 2498 struct ip6_hdr *h6; 2499 #endif /* INET6 */ 2500 struct tcphdr *th; 2501 char *opt; 2502 2503 /* maximum segment size tcp option */ 2504 tlen = sizeof(struct tcphdr); 2505 if (mss) 2506 tlen += 4; 2507 2508 switch (af) { 2509 #ifdef INET 2510 case AF_INET: 2511 len = sizeof(struct ip) + tlen; 2512 break; 2513 #endif /* INET */ 2514 #ifdef INET6 2515 case AF_INET6: 2516 len = sizeof(struct ip6_hdr) + tlen; 2517 break; 2518 #endif /* INET6 */ 2519 } 2520 2521 /* create outgoing mbuf */ 2522 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2523 if (m == NULL) 2524 return; 2525 if (tag) 2526 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2527 m->m_pkthdr.pf.tag = rtag; 2528 m->m_pkthdr.rdomain = rdom; 2529 if (r && r->set_prio[0] != PF_PRIO_NOTSET) 2530 m->m_pkthdr.pf.prio = r->set_prio[0]; 2531 2532 #ifdef ALTQ 2533 if (r != NULL && r->qid) { 2534 m->m_pkthdr.pf.qid = r->qid; 2535 /* add hints for ecn */ 2536 m->m_pkthdr.pf.hdr = mtod(m, struct ip *); 2537 } 2538 #endif /* ALTQ */ 2539 m->m_data += max_linkhdr; 2540 m->m_pkthdr.len = m->m_len = len; 2541 m->m_pkthdr.rcvif = NULL; 2542 bzero(m->m_data, len); 2543 switch (af) { 2544 #ifdef INET 2545 case AF_INET: 2546 h = mtod(m, struct ip *); 2547 2548 /* IP header fields included in the TCP checksum */ 2549 h->ip_p = IPPROTO_TCP; 2550 h->ip_len = htons(tlen); 2551 h->ip_src.s_addr = saddr->v4.s_addr; 2552 h->ip_dst.s_addr = daddr->v4.s_addr; 2553 2554 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2555 break; 2556 #endif /* INET */ 2557 #ifdef INET6 2558 case AF_INET6: 2559 h6 = mtod(m, struct ip6_hdr *); 2560 2561 /* IP header fields included in the TCP checksum */ 2562 h6->ip6_nxt = IPPROTO_TCP; 2563 h6->ip6_plen = htons(tlen); 2564 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2565 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2566 2567 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2568 break; 2569 #endif /* INET6 */ 2570 } 2571 2572 /* TCP header */ 2573 th->th_sport = sport; 2574 th->th_dport = dport; 2575 th->th_seq = htonl(seq); 2576 th->th_ack = htonl(ack); 2577 th->th_off = tlen >> 2; 2578 th->th_flags = flags; 2579 th->th_win = htons(win); 2580 2581 if (mss) { 2582 opt = (char *)(th + 1); 2583 opt[0] = TCPOPT_MAXSEG; 2584 opt[1] = 4; 2585 HTONS(mss); 2586 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); 2587 } 2588 2589 switch (af) { 2590 #ifdef INET 2591 case AF_INET: 2592 /* TCP checksum */ 2593 th->th_sum = in_cksum(m, len); 2594 2595 /* Finish the IP header */ 2596 h->ip_v = 4; 2597 h->ip_hl = sizeof(*h) >> 2; 2598 h->ip_tos = IPTOS_LOWDELAY; 2599 h->ip_len = htons(len); 2600 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2601 h->ip_ttl = ttl ? ttl : ip_defttl; 2602 h->ip_sum = 0; 2603 if (eh == NULL) { 2604 ip_output(m, (void *)NULL, (void *)NULL, 0, 2605 (void *)NULL, (void *)NULL); 2606 } else { 2607 struct route ro; 2608 struct rtentry rt; 2609 struct ether_header *e = (void *)ro.ro_dst.sa_data; 2610 2611 if (ifp == NULL) { 2612 m_freem(m); 2613 return; 2614 } 2615 rt.rt_ifp = ifp; 2616 ro.ro_rt = &rt; 2617 ro.ro_dst.sa_len = sizeof(ro.ro_dst); 2618 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; 2619 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); 2620 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); 2621 e->ether_type = eh->ether_type; 2622 ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER, 2623 (void *)NULL, (void *)NULL); 2624 } 2625 break; 2626 #endif /* INET */ 2627 #ifdef INET6 2628 case AF_INET6: 2629 /* TCP checksum */ 2630 th->th_sum = in6_cksum(m, IPPROTO_TCP, 2631 sizeof(struct ip6_hdr), tlen); 2632 2633 h6->ip6_vfc |= IPV6_VERSION; 2634 h6->ip6_hlim = IPV6_DEFHLIM; 2635 2636 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 2637 break; 2638 #endif /* INET6 */ 2639 } 2640 } 2641 2642 void 2643 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, 2644 struct pf_rule *r, u_int rdomain) 2645 { 2646 struct mbuf *m0; 2647 2648 if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) 2649 return; 2650 2651 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2652 m0->m_pkthdr.rdomain = rdomain; 2653 if (r && r->set_prio[0] != PF_PRIO_NOTSET) 2654 m0->m_pkthdr.pf.prio = r->set_prio[0]; 2655 2656 #ifdef ALTQ 2657 if (r->qid) { 2658 m0->m_pkthdr.pf.qid = r->qid; 2659 /* add hints for ecn */ 2660 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); 2661 } 2662 #endif /* ALTQ */ 2663 2664 switch (af) { 2665 #ifdef INET 2666 case AF_INET: 2667 icmp_error(m0, type, code, 0, 0); 2668 break; 2669 #endif /* INET */ 2670 #ifdef INET6 2671 case AF_INET6: 2672 icmp6_error(m0, type, code, 0); 2673 break; 2674 #endif /* INET6 */ 2675 } 2676 } 2677 2678 /* 2679 * Return 1 if the addresses a and b match (with mask m), otherwise return 0. 2680 * If n is 0, they match if they are equal. If n is != 0, they match if they 2681 * are different. 2682 */ 2683 int 2684 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2685 struct pf_addr *b, sa_family_t af) 2686 { 2687 int match = 0; 2688 2689 switch (af) { 2690 #ifdef INET 2691 case AF_INET: 2692 if ((a->addr32[0] & m->addr32[0]) == 2693 (b->addr32[0] & m->addr32[0])) 2694 match++; 2695 break; 2696 #endif /* INET */ 2697 #ifdef INET6 2698 case AF_INET6: 2699 if (((a->addr32[0] & m->addr32[0]) == 2700 (b->addr32[0] & m->addr32[0])) && 2701 ((a->addr32[1] & m->addr32[1]) == 2702 (b->addr32[1] & m->addr32[1])) && 2703 ((a->addr32[2] & m->addr32[2]) == 2704 (b->addr32[2] & m->addr32[2])) && 2705 ((a->addr32[3] & m->addr32[3]) == 2706 (b->addr32[3] & m->addr32[3]))) 2707 match++; 2708 break; 2709 #endif /* INET6 */ 2710 } 2711 if (match) { 2712 if (n) 2713 return (0); 2714 else 2715 return (1); 2716 } else { 2717 if (n) 2718 return (1); 2719 else 2720 return (0); 2721 } 2722 } 2723 2724 /* 2725 * Return 1 if b <= a <= e, otherwise return 0. 2726 */ 2727 int 2728 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2729 struct pf_addr *a, sa_family_t af) 2730 { 2731 switch (af) { 2732 #ifdef INET 2733 case AF_INET: 2734 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 2735 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 2736 return (0); 2737 break; 2738 #endif /* INET */ 2739 #ifdef INET6 2740 case AF_INET6: { 2741 int i; 2742 2743 /* check a >= b */ 2744 for (i = 0; i < 4; ++i) 2745 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 2746 break; 2747 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 2748 return (0); 2749 /* check a <= e */ 2750 for (i = 0; i < 4; ++i) 2751 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 2752 break; 2753 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 2754 return (0); 2755 break; 2756 } 2757 #endif /* INET6 */ 2758 } 2759 return (1); 2760 } 2761 2762 int 2763 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 2764 { 2765 switch (op) { 2766 case PF_OP_IRG: 2767 return ((p > a1) && (p < a2)); 2768 case PF_OP_XRG: 2769 return ((p < a1) || (p > a2)); 2770 case PF_OP_RRG: 2771 return ((p >= a1) && (p <= a2)); 2772 case PF_OP_EQ: 2773 return (p == a1); 2774 case PF_OP_NE: 2775 return (p != a1); 2776 case PF_OP_LT: 2777 return (p < a1); 2778 case PF_OP_LE: 2779 return (p <= a1); 2780 case PF_OP_GT: 2781 return (p > a1); 2782 case PF_OP_GE: 2783 return (p >= a1); 2784 } 2785 return (0); /* never reached */ 2786 } 2787 2788 int 2789 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 2790 { 2791 NTOHS(a1); 2792 NTOHS(a2); 2793 NTOHS(p); 2794 return (pf_match(op, a1, a2, p)); 2795 } 2796 2797 int 2798 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 2799 { 2800 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2801 return (0); 2802 return (pf_match(op, a1, a2, u)); 2803 } 2804 2805 int 2806 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 2807 { 2808 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2809 return (0); 2810 return (pf_match(op, a1, a2, g)); 2811 } 2812 2813 int 2814 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 2815 { 2816 if (*tag == -1) 2817 *tag = m->m_pkthdr.pf.tag; 2818 2819 return ((!r->match_tag_not && r->match_tag == *tag) || 2820 (r->match_tag_not && r->match_tag != *tag)); 2821 } 2822 2823 int 2824 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 2825 { 2826 struct ifnet *ifp = m->m_pkthdr.rcvif; 2827 struct pfi_kif *kif; 2828 2829 if (ifp == NULL) 2830 return (0); 2831 2832 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 2833 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 2834 else 2835 kif = (struct pfi_kif *)ifp->if_pf_kif; 2836 2837 if (kif == NULL) { 2838 DPFPRINTF(LOG_ERR, 2839 "pf_test_via: kif == NULL, @%d via %s", 2840 r->nr, r->rcv_ifname); 2841 return (0); 2842 } 2843 2844 return (pfi_kif_match(r->rcv_kif, kif)); 2845 } 2846 2847 void 2848 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 2849 { 2850 if (tag > 0) 2851 m->m_pkthdr.pf.tag = tag; 2852 if (rtableid >= 0) 2853 m->m_pkthdr.rdomain = rtableid; 2854 } 2855 2856 void 2857 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, 2858 struct pf_rule **r, struct pf_rule **a, int *match) 2859 { 2860 struct pf_anchor_stackframe *f; 2861 2862 (*r)->anchor->match = 0; 2863 if (match) 2864 *match = 0; 2865 if (*depth >= sizeof(pf_anchor_stack) / 2866 sizeof(pf_anchor_stack[0])) { 2867 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 2868 *r = TAILQ_NEXT(*r, entries); 2869 return; 2870 } else if (*depth == 0 && a != NULL) 2871 *a = *r; 2872 f = pf_anchor_stack + (*depth)++; 2873 f->rs = *rs; 2874 f->r = *r; 2875 if ((*r)->anchor_wildcard) { 2876 f->parent = &(*r)->anchor->children; 2877 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == 2878 NULL) { 2879 *r = NULL; 2880 return; 2881 } 2882 *rs = &f->child->ruleset; 2883 } else { 2884 f->parent = NULL; 2885 f->child = NULL; 2886 *rs = &(*r)->anchor->ruleset; 2887 } 2888 *r = TAILQ_FIRST((*rs)->rules.active.ptr); 2889 } 2890 2891 int 2892 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, 2893 struct pf_rule **r, struct pf_rule **a, int *match) 2894 { 2895 struct pf_anchor_stackframe *f; 2896 int quick = 0; 2897 2898 do { 2899 if (*depth <= 0) 2900 break; 2901 f = pf_anchor_stack + *depth - 1; 2902 if (f->parent != NULL && f->child != NULL) { 2903 if (f->child->match || 2904 (match != NULL && *match)) { 2905 f->r->anchor->match = 1; 2906 *match = 0; 2907 } 2908 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); 2909 if (f->child != NULL) { 2910 *rs = &f->child->ruleset; 2911 *r = TAILQ_FIRST((*rs)->rules.active.ptr); 2912 if (*r == NULL) 2913 continue; 2914 else 2915 break; 2916 } 2917 } 2918 (*depth)--; 2919 if (*depth == 0 && a != NULL) 2920 *a = NULL; 2921 *rs = f->rs; 2922 if (f->r->anchor->match || (match != NULL && *match)) 2923 quick = f->r->quick; 2924 *r = TAILQ_NEXT(f->r, entries); 2925 } while (*r == NULL); 2926 2927 return (quick); 2928 } 2929 2930 #ifdef INET6 2931 void 2932 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 2933 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 2934 { 2935 switch (af) { 2936 #ifdef INET 2937 case AF_INET: 2938 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2939 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2940 break; 2941 #endif /* INET */ 2942 case AF_INET6: 2943 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2944 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2945 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 2946 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 2947 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 2948 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 2949 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 2950 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 2951 break; 2952 } 2953 } 2954 2955 void 2956 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 2957 { 2958 switch (af) { 2959 #ifdef INET 2960 case AF_INET: 2961 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 2962 break; 2963 #endif /* INET */ 2964 case AF_INET6: 2965 if (addr->addr32[3] == 0xffffffff) { 2966 addr->addr32[3] = 0; 2967 if (addr->addr32[2] == 0xffffffff) { 2968 addr->addr32[2] = 0; 2969 if (addr->addr32[1] == 0xffffffff) { 2970 addr->addr32[1] = 0; 2971 addr->addr32[0] = 2972 htonl(ntohl(addr->addr32[0]) + 1); 2973 } else 2974 addr->addr32[1] = 2975 htonl(ntohl(addr->addr32[1]) + 1); 2976 } else 2977 addr->addr32[2] = 2978 htonl(ntohl(addr->addr32[2]) + 1); 2979 } else 2980 addr->addr32[3] = 2981 htonl(ntohl(addr->addr32[3]) + 1); 2982 break; 2983 } 2984 } 2985 #endif /* INET6 */ 2986 2987 int 2988 pf_socket_lookup(struct pf_pdesc *pd) 2989 { 2990 struct pf_addr *saddr, *daddr; 2991 u_int16_t sport, dport; 2992 struct inpcbtable *tb; 2993 struct inpcb *inp; 2994 2995 if (pd == NULL) 2996 return (-1); 2997 pd->lookup.uid = UID_MAX; 2998 pd->lookup.gid = GID_MAX; 2999 pd->lookup.pid = NO_PID; 3000 switch (pd->proto) { 3001 case IPPROTO_TCP: 3002 if (pd->hdr.tcp == NULL) 3003 return (-1); 3004 sport = pd->hdr.tcp->th_sport; 3005 dport = pd->hdr.tcp->th_dport; 3006 tb = &tcbtable; 3007 break; 3008 case IPPROTO_UDP: 3009 if (pd->hdr.udp == NULL) 3010 return (-1); 3011 sport = pd->hdr.udp->uh_sport; 3012 dport = pd->hdr.udp->uh_dport; 3013 tb = &udbtable; 3014 break; 3015 default: 3016 return (-1); 3017 } 3018 if (pd->dir == PF_IN) { 3019 saddr = pd->src; 3020 daddr = pd->dst; 3021 } else { 3022 u_int16_t p; 3023 3024 p = sport; 3025 sport = dport; 3026 dport = p; 3027 saddr = pd->dst; 3028 daddr = pd->src; 3029 } 3030 switch (pd->af) { 3031 #ifdef INET 3032 case AF_INET: 3033 /* 3034 * Fails when rtable is changed while evaluating the ruleset 3035 * The socket looked up will not match the one hit in the end. 3036 */ 3037 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 3038 pd->rdomain); 3039 if (inp == NULL) { 3040 inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, 3041 NULL, pd->rdomain); 3042 if (inp == NULL) 3043 return (-1); 3044 } 3045 break; 3046 #endif /* INET */ 3047 #ifdef INET6 3048 case AF_INET6: 3049 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 3050 dport); 3051 if (inp == NULL) { 3052 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, 3053 NULL); 3054 if (inp == NULL) 3055 return (-1); 3056 } 3057 break; 3058 #endif /* INET6 */ 3059 } 3060 pd->lookup.uid = inp->inp_socket->so_euid; 3061 pd->lookup.gid = inp->inp_socket->so_egid; 3062 pd->lookup.pid = inp->inp_socket->so_cpid; 3063 return (1); 3064 } 3065 3066 u_int8_t 3067 pf_get_wscale(struct pf_pdesc *pd) 3068 { 3069 struct tcphdr *th = pd->hdr.tcp; 3070 int hlen; 3071 u_int8_t hdr[60]; 3072 u_int8_t *opt, optlen; 3073 u_int8_t wscale = 0; 3074 3075 hlen = th->th_off << 2; /* hlen <= sizeof(hdr) */ 3076 if (hlen <= sizeof(struct tcphdr)) 3077 return (0); 3078 if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af)) 3079 return (0); 3080 opt = hdr + sizeof(struct tcphdr); 3081 hlen -= sizeof(struct tcphdr); 3082 while (hlen >= 3) { 3083 switch (*opt) { 3084 case TCPOPT_EOL: 3085 case TCPOPT_NOP: 3086 ++opt; 3087 --hlen; 3088 break; 3089 case TCPOPT_WINDOW: 3090 wscale = opt[2]; 3091 if (wscale > TCP_MAX_WINSHIFT) 3092 wscale = TCP_MAX_WINSHIFT; 3093 wscale |= PF_WSCALE_FLAG; 3094 /* FALLTHROUGH */ 3095 default: 3096 optlen = opt[1]; 3097 if (optlen < 2) 3098 optlen = 2; 3099 hlen -= optlen; 3100 opt += optlen; 3101 break; 3102 } 3103 } 3104 return (wscale); 3105 } 3106 3107 u_int16_t 3108 pf_get_mss(struct pf_pdesc *pd) 3109 { 3110 struct tcphdr *th = pd->hdr.tcp; 3111 int hlen; 3112 u_int8_t hdr[60]; 3113 u_int8_t *opt, optlen; 3114 u_int16_t mss = tcp_mssdflt; 3115 3116 hlen = th->th_off << 2; /* hlen <= sizeof(hdr) */ 3117 if (hlen <= sizeof(struct tcphdr)) 3118 return (0); 3119 if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af)) 3120 return (0); 3121 opt = hdr + sizeof(struct tcphdr); 3122 hlen -= sizeof(struct tcphdr); 3123 while (hlen >= TCPOLEN_MAXSEG) { 3124 switch (*opt) { 3125 case TCPOPT_EOL: 3126 case TCPOPT_NOP: 3127 ++opt; 3128 --hlen; 3129 break; 3130 case TCPOPT_MAXSEG: 3131 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); 3132 NTOHS(mss); 3133 /* FALLTHROUGH */ 3134 default: 3135 optlen = opt[1]; 3136 if (optlen < 2) 3137 optlen = 2; 3138 hlen -= optlen; 3139 opt += optlen; 3140 break; 3141 } 3142 } 3143 return (mss); 3144 } 3145 3146 u_int16_t 3147 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3148 { 3149 #ifdef INET 3150 struct sockaddr_in *dst; 3151 struct route ro; 3152 #endif /* INET */ 3153 #ifdef INET6 3154 struct sockaddr_in6 *dst6; 3155 struct route_in6 ro6; 3156 #endif /* INET6 */ 3157 struct rtentry *rt = NULL; 3158 int hlen; 3159 u_int16_t mss = tcp_mssdflt; 3160 3161 switch (af) { 3162 #ifdef INET 3163 case AF_INET: 3164 hlen = sizeof(struct ip); 3165 bzero(&ro, sizeof(ro)); 3166 dst = (struct sockaddr_in *)&ro.ro_dst; 3167 dst->sin_family = AF_INET; 3168 dst->sin_len = sizeof(*dst); 3169 dst->sin_addr = addr->v4; 3170 ro.ro_tableid = rtableid; 3171 rtalloc_noclone(&ro); 3172 rt = ro.ro_rt; 3173 break; 3174 #endif /* INET */ 3175 #ifdef INET6 3176 case AF_INET6: 3177 hlen = sizeof(struct ip6_hdr); 3178 bzero(&ro6, sizeof(ro6)); 3179 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; 3180 dst6->sin6_family = AF_INET6; 3181 dst6->sin6_len = sizeof(*dst6); 3182 dst6->sin6_addr = addr->v6; 3183 ro6.ro_tableid = rtableid; 3184 rtalloc_noclone((struct route *)&ro6); 3185 rt = ro6.ro_rt; 3186 break; 3187 #endif /* INET6 */ 3188 } 3189 3190 if (rt && rt->rt_ifp) { 3191 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); 3192 mss = max(tcp_mssdflt, mss); 3193 RTFREE(rt); 3194 } 3195 mss = min(mss, offer); 3196 mss = max(mss, 64); /* sanity - at least max opt space */ 3197 return (mss); 3198 } 3199 3200 void 3201 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) 3202 { 3203 struct pf_rule *r = s->rule.ptr; 3204 struct pf_src_node *sn = NULL; 3205 3206 s->rt_kif = NULL; 3207 if (!r->rt) 3208 return; 3209 switch (s->key[PF_SK_WIRE]->af) { 3210 #ifdef INET 3211 case AF_INET: 3212 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn, 3213 &r->route, PF_SN_ROUTE); 3214 s->rt_kif = r->route.kif; 3215 s->natrule.ptr = r; 3216 break; 3217 #endif /* INET */ 3218 #ifdef INET6 3219 case AF_INET6: 3220 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn, 3221 &r->route, PF_SN_ROUTE); 3222 s->rt_kif = r->route.kif; 3223 s->natrule.ptr = r; 3224 break; 3225 #endif /* INET6 */ 3226 } 3227 } 3228 3229 u_int32_t 3230 pf_tcp_iss(struct pf_pdesc *pd) 3231 { 3232 MD5_CTX ctx; 3233 u_int32_t digest[4]; 3234 3235 if (pf_tcp_secret_init == 0) { 3236 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3237 MD5Init(&pf_tcp_secret_ctx); 3238 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3239 sizeof(pf_tcp_secret)); 3240 pf_tcp_secret_init = 1; 3241 } 3242 ctx = pf_tcp_secret_ctx; 3243 3244 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); 3245 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); 3246 switch (pd->af) { 3247 #ifdef INET 3248 case AF_INET: 3249 MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); 3250 MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); 3251 break; 3252 #endif /* INET */ 3253 #ifdef INET6 3254 case AF_INET6: 3255 MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); 3256 MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); 3257 break; 3258 #endif /* INET6 */ 3259 } 3260 MD5Final((u_char *)digest, &ctx); 3261 pf_tcp_iss_off += 4096; 3262 return (digest[0] + tcp_iss + pf_tcp_iss_off); 3263 } 3264 3265 void 3266 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3267 { 3268 if (r->qid) 3269 a->qid = r->qid; 3270 if (r->pqid) 3271 a->pqid = r->pqid; 3272 if (r->rtableid >= 0) 3273 a->rtableid = r->rtableid; 3274 a->log |= r->log; 3275 if (r->scrub_flags & PFSTATE_SETTOS) 3276 a->set_tos = r->set_tos; 3277 if (r->min_ttl) 3278 a->min_ttl = r->min_ttl; 3279 if (r->max_mss) 3280 a->max_mss = r->max_mss; 3281 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3282 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP)); 3283 if (r->set_prio[0] != PF_PRIO_NOTSET) 3284 a->set_prio[0] = r->set_prio[0]; 3285 if (r->set_prio[1] != PF_PRIO_NOTSET) 3286 a->set_prio[1] = r->set_prio[1]; 3287 } 3288 3289 #define PF_TEST_ATTRIB(t, a) \ 3290 do { \ 3291 if (t) { \ 3292 r = a; \ 3293 goto nextrule; \ 3294 } \ 3295 } while (0) 3296 3297 int 3298 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3299 struct pf_rule **am, struct pf_ruleset **rsm) 3300 { 3301 struct pf_rule *r; 3302 struct pf_rule *nr = NULL; 3303 struct pf_rule *a = NULL; 3304 struct pf_ruleset *ruleset = NULL; 3305 struct pf_rule_slist rules; 3306 struct pf_rule_item *ri; 3307 struct pf_src_node *sns[PF_SN_MAX]; 3308 struct tcphdr *th = pd->hdr.tcp; 3309 struct pf_state_key *skw = NULL, *sks = NULL; 3310 struct pf_rule_actions act; 3311 struct ifqueue *ifq = &ipintrq; 3312 u_short reason; 3313 int rewrite = 0; 3314 int tag = -1; 3315 int asd = 0; 3316 int match = 0; 3317 int state_icmp = 0, icmp_dir, multi; 3318 u_int16_t virtual_type, virtual_id; 3319 u_int8_t icmptype = 0, icmpcode = 0; 3320 3321 bzero(&act, sizeof(act)); 3322 act.set_prio[0] = act.set_prio[1] = PF_PRIO_NOTSET; 3323 bzero(sns, sizeof(sns)); 3324 act.rtableid = pd->rdomain; 3325 SLIST_INIT(&rules); 3326 3327 #ifdef INET6 3328 if (pd->af == AF_INET6) 3329 ifq = &ip6intrq; 3330 #endif 3331 3332 if (pd->dir == PF_IN && pf_check_congestion(ifq)) { 3333 REASON_SET(&reason, PFRES_CONGEST); 3334 return (PF_DROP); 3335 } 3336 3337 switch (pd->virtual_proto) { 3338 #ifdef INET 3339 case IPPROTO_ICMP: 3340 icmptype = pd->hdr.icmp->icmp_type; 3341 icmpcode = pd->hdr.icmp->icmp_code; 3342 state_icmp = pf_icmp_mapping(pd, icmptype, 3343 &icmp_dir, &multi, &virtual_id, &virtual_type); 3344 if (icmp_dir == PF_IN) { 3345 pd->nsport = virtual_id; 3346 pd->ndport = virtual_type; 3347 } else { 3348 pd->nsport = virtual_type; 3349 pd->ndport = virtual_id; 3350 } 3351 break; 3352 #endif /* INET */ 3353 #ifdef INET6 3354 case IPPROTO_ICMPV6: 3355 icmptype = pd->hdr.icmp6->icmp6_type; 3356 icmpcode = pd->hdr.icmp6->icmp6_code; 3357 state_icmp = pf_icmp_mapping(pd, icmptype, 3358 &icmp_dir, &multi, &virtual_id, &virtual_type); 3359 if (icmp_dir == PF_IN) { 3360 pd->nsport = virtual_id; 3361 pd->ndport = virtual_type; 3362 } else { 3363 pd->nsport = virtual_type; 3364 pd->ndport = virtual_id; 3365 } 3366 break; 3367 #endif /* INET6 */ 3368 } 3369 3370 pd->osport = pd->nsport; 3371 pd->odport = pd->ndport; 3372 3373 r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr); 3374 while (r != NULL) { 3375 r->evaluations++; 3376 PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot), 3377 r->skip[PF_SKIP_IFP].ptr); 3378 PF_TEST_ATTRIB((r->direction && r->direction != pd->dir), 3379 r->skip[PF_SKIP_DIR].ptr); 3380 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3381 (r->onrdomain == pd->rdomain) == r->ifnot), 3382 r->skip[PF_SKIP_RDOM].ptr); 3383 PF_TEST_ATTRIB((r->af && r->af != pd->af), 3384 r->skip[PF_SKIP_AF].ptr); 3385 PF_TEST_ATTRIB((r->proto && r->proto != pd->proto), 3386 r->skip[PF_SKIP_PROTO].ptr); 3387 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &pd->nsaddr, 3388 pd->naf, r->src.neg, pd->kif, act.rtableid)), 3389 r->skip[PF_SKIP_SRC_ADDR].ptr); 3390 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af, 3391 r->dst.neg, NULL, act.rtableid)), 3392 r->skip[PF_SKIP_DST_ADDR].ptr); 3393 3394 switch (pd->virtual_proto) { 3395 case PF_VPROTO_FRAGMENT: 3396 /* tcp/udp only. port_op always 0 in other cases */ 3397 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3398 TAILQ_NEXT(r, entries)); 3399 PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset), 3400 TAILQ_NEXT(r, entries)); 3401 /* icmp only. type/code always 0 in other cases */ 3402 PF_TEST_ATTRIB((r->type || r->code), 3403 TAILQ_NEXT(r, entries)); 3404 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3405 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3406 TAILQ_NEXT(r, entries)); 3407 break; 3408 3409 case IPPROTO_TCP: 3410 PF_TEST_ATTRIB(((r->flagset & th->th_flags) != 3411 r->flags), 3412 TAILQ_NEXT(r, entries)); 3413 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3414 !pf_osfp_match(pf_osfp_fingerprint(pd), 3415 r->os_fingerprint)), 3416 TAILQ_NEXT(r, entries)); 3417 /* FALLTHROUGH */ 3418 3419 case IPPROTO_UDP: 3420 /* tcp/udp only. port_op always 0 in other cases */ 3421 PF_TEST_ATTRIB((r->src.port_op && 3422 !pf_match_port(r->src.port_op, r->src.port[0], 3423 r->src.port[1], pd->nsport)), 3424 r->skip[PF_SKIP_SRC_PORT].ptr); 3425 PF_TEST_ATTRIB((r->dst.port_op && 3426 !pf_match_port(r->dst.port_op, r->dst.port[0], 3427 r->dst.port[1], pd->ndport)), 3428 r->skip[PF_SKIP_DST_PORT].ptr); 3429 /* tcp/udp only. uid.op always 0 in other cases */ 3430 PF_TEST_ATTRIB((r->uid.op && (pd->lookup.done || 3431 (pd->lookup.done = 3432 pf_socket_lookup(pd), 1)) && 3433 !pf_match_uid(r->uid.op, r->uid.uid[0], 3434 r->uid.uid[1], pd->lookup.uid)), 3435 TAILQ_NEXT(r, entries)); 3436 /* tcp/udp only. gid.op always 0 in other cases */ 3437 PF_TEST_ATTRIB((r->gid.op && (pd->lookup.done || 3438 (pd->lookup.done = 3439 pf_socket_lookup(pd), 1)) && 3440 !pf_match_gid(r->gid.op, r->gid.gid[0], 3441 r->gid.gid[1], pd->lookup.gid)), 3442 TAILQ_NEXT(r, entries)); 3443 break; 3444 3445 case IPPROTO_ICMP: 3446 case IPPROTO_ICMPV6: 3447 /* icmp only. type always 0 in other cases */ 3448 PF_TEST_ATTRIB((r->type && r->type != icmptype + 1), 3449 TAILQ_NEXT(r, entries)); 3450 /* icmp only. type always 0 in other cases */ 3451 PF_TEST_ATTRIB((r->code && r->code != icmpcode + 1), 3452 TAILQ_NEXT(r, entries)); 3453 break; 3454 3455 default: 3456 break; 3457 } 3458 3459 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3460 pd->virtual_proto != PF_VPROTO_FRAGMENT), 3461 TAILQ_NEXT(r, entries)); 3462 PF_TEST_ATTRIB((r->tos && !(r->tos == pd->tos)), 3463 TAILQ_NEXT(r, entries)); 3464 PF_TEST_ATTRIB((r->prob && 3465 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3466 TAILQ_NEXT(r, entries)); 3467 PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)), 3468 TAILQ_NEXT(r, entries)); 3469 PF_TEST_ATTRIB((r->rcv_kif && !pf_match_rcvif(pd->m, r)), 3470 TAILQ_NEXT(r, entries)); 3471 3472 /* FALLTHROUGH */ 3473 if (r->tag) 3474 tag = r->tag; 3475 if (r->anchor == NULL) { 3476 if (r->action == PF_MATCH) { 3477 if ((ri = pool_get(&pf_rule_item_pl, 3478 PR_NOWAIT)) == NULL) { 3479 REASON_SET(&reason, PFRES_MEMORY); 3480 goto cleanup; 3481 } 3482 ri->r = r; 3483 /* order is irrelevant */ 3484 SLIST_INSERT_HEAD(&rules, ri, entry); 3485 pf_rule_to_actions(r, &act); 3486 if (r->rule_flag & PFRULE_AFTO) 3487 pd->naf = r->naf; 3488 if (pf_get_transaddr(r, pd, sns, &nr) == -1) { 3489 REASON_SET(&reason, PFRES_MEMORY); 3490 goto cleanup; 3491 } 3492 if (r->log || act.log & PF_LOG_MATCHES) { 3493 REASON_SET(&reason, PFRES_MATCH); 3494 PFLOG_PACKET(pd, reason, r, a, ruleset); 3495 } 3496 } else { 3497 match = 1; 3498 *rm = r; 3499 *am = a; 3500 *rsm = ruleset; 3501 if (act.log & PF_LOG_MATCHES) { 3502 REASON_SET(&reason, PFRES_MATCH); 3503 PFLOG_PACKET(pd, reason, r, a, ruleset); 3504 } 3505 } 3506 3507 if ((*rm)->quick) 3508 break; 3509 r = TAILQ_NEXT(r, entries); 3510 } else 3511 pf_step_into_anchor(&asd, &ruleset, 3512 &r, &a, &match); 3513 3514 nextrule: 3515 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3516 &r, &a, &match)) 3517 break; 3518 } 3519 r = *rm; 3520 a = *am; 3521 ruleset = *rsm; 3522 3523 /* apply actions for last matching pass/block rule */ 3524 pf_rule_to_actions(r, &act); 3525 if (r->rule_flag & PFRULE_AFTO) 3526 pd->naf = r->naf; 3527 if (pf_get_transaddr(r, pd, sns, &nr) == -1) { 3528 REASON_SET(&reason, PFRES_MEMORY); 3529 goto cleanup; 3530 } 3531 REASON_SET(&reason, PFRES_MATCH); 3532 3533 if (r->log || act.log & PF_LOG_MATCHES) 3534 PFLOG_PACKET(pd, reason, r, a, ruleset); 3535 3536 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3537 (r->action == PF_DROP) && 3538 ((r->rule_flag & PFRULE_RETURNRST) || 3539 (r->rule_flag & PFRULE_RETURNICMP) || 3540 (r->rule_flag & PFRULE_RETURN))) { 3541 if (pd->proto == IPPROTO_TCP && 3542 ((r->rule_flag & PFRULE_RETURNRST) || 3543 (r->rule_flag & PFRULE_RETURN)) && 3544 !(th->th_flags & TH_RST)) { 3545 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 3546 3547 if (pf_check_proto_cksum(pd->m, pd->off, 3548 pd->tot_len - pd->off, IPPROTO_TCP, pd->af)) 3549 REASON_SET(&reason, PFRES_PROTCKSUM); 3550 else { 3551 if (th->th_flags & TH_SYN) 3552 ack++; 3553 if (th->th_flags & TH_FIN) 3554 ack++; 3555 pf_send_tcp(r, pd->af, pd->dst, 3556 pd->src, th->th_dport, th->th_sport, 3557 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 3558 r->return_ttl, 1, 0, pd->rdomain, 3559 pd->eh, pd->kif->pfik_ifp); 3560 } 3561 } else if ((pd->proto != IPPROTO_ICMP || 3562 ICMP_INFOTYPE(icmptype)) && pd->af == AF_INET && 3563 r->return_icmp) 3564 pf_send_icmp(pd->m, r->return_icmp >> 8, 3565 r->return_icmp & 255, pd->af, r, pd->rdomain); 3566 else if ((pd->proto != IPPROTO_ICMPV6 || 3567 (icmptype >= ICMP6_ECHO_REQUEST && 3568 icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3569 r->return_icmp6) 3570 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3571 r->return_icmp6 & 255, pd->af, r, pd->rdomain); 3572 } 3573 3574 if (r->action == PF_DROP) 3575 goto cleanup; 3576 3577 pf_tag_packet(pd->m, tag, act.rtableid); 3578 if (act.rtableid >= 0 && 3579 rtable_l2(act.rtableid) != pd->rdomain) 3580 pd->destchg = 1; 3581 3582 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3583 REASON_SET(&reason, PFRES_IPOPTIONS); 3584 pd->pflog |= PF_LOG_FORCE; 3585 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3586 "ip/ipv6 options in pf_test_rule()"); 3587 goto cleanup; 3588 } 3589 3590 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3591 && !state_icmp && r->keep_state) { 3592 int action; 3593 3594 if (r->rule_flag & PFRULE_SRCTRACK && 3595 pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af, 3596 pd->src, NULL, 0) != 0) { 3597 REASON_SET(&reason, PFRES_SRCLIMIT); 3598 goto cleanup; 3599 } 3600 3601 action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite, 3602 sm, tag, &rules, &act, sns); 3603 3604 if (action != PF_PASS) 3605 return (action); 3606 if (sks != skw) { 3607 struct pf_state_key *sk; 3608 3609 if (pd->dir == PF_IN) 3610 sk = sks; 3611 else 3612 sk = skw; 3613 rewrite += pf_translate(pd, 3614 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 3615 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 3616 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 3617 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 3618 virtual_type, icmp_dir); 3619 } 3620 } else { 3621 while ((ri = SLIST_FIRST(&rules))) { 3622 SLIST_REMOVE_HEAD(&rules, entry); 3623 pool_put(&pf_rule_item_pl, ri); 3624 } 3625 } 3626 3627 /* copy back packet headers if we performed NAT operations */ 3628 if (rewrite && pd->hdrlen) 3629 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any, M_NOWAIT); 3630 3631 #if NPFSYNC > 0 3632 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 3633 pd->dir == PF_OUT && pfsync_up()) { 3634 /* 3635 * We want the state created, but we dont 3636 * want to send this in case a partner 3637 * firewall has to know about it to allow 3638 * replies through it. 3639 */ 3640 if (pfsync_defer(*sm, pd->m)) 3641 return (PF_DEFER); 3642 } 3643 #endif 3644 3645 if (r->rule_flag & PFRULE_ONCE) 3646 pf_purge_rule(ruleset, r); 3647 3648 #if INET && INET6 3649 if (rewrite && skw->af != sks->af) 3650 return (PF_AFRT); 3651 #endif /* INET && INET6 */ 3652 3653 return (PF_PASS); 3654 3655 cleanup: 3656 while ((ri = SLIST_FIRST(&rules))) { 3657 SLIST_REMOVE_HEAD(&rules, entry); 3658 pool_put(&pf_rule_item_pl, ri); 3659 } 3660 3661 return (PF_DROP); 3662 } 3663 3664 static __inline int 3665 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 3666 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 3667 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 3668 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 3669 { 3670 struct pf_state *s = NULL; 3671 struct tcphdr *th = pd->hdr.tcp; 3672 u_int16_t mss = tcp_mssdflt; 3673 u_short reason; 3674 u_int i; 3675 3676 /* check maximums */ 3677 if (r->max_states && (r->states_cur >= r->max_states)) { 3678 pf_status.lcounters[LCNT_STATES]++; 3679 REASON_SET(&reason, PFRES_MAXSTATES); 3680 return (PF_DROP); 3681 } 3682 3683 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 3684 if (s == NULL) { 3685 REASON_SET(&reason, PFRES_MEMORY); 3686 goto csfailed; 3687 } 3688 s->rule.ptr = r; 3689 s->anchor.ptr = a; 3690 s->natrule.ptr = nr; 3691 bcopy(rules, &s->match_rules, sizeof(s->match_rules)); 3692 STATE_INC_COUNTERS(s); 3693 if (r->allow_opts) 3694 s->state_flags |= PFSTATE_ALLOWOPTS; 3695 if (r->rule_flag & PFRULE_STATESLOPPY) 3696 s->state_flags |= PFSTATE_SLOPPY; 3697 if (r->rule_flag & PFRULE_PFLOW) 3698 s->state_flags |= PFSTATE_PFLOW; 3699 s->log = act->log & PF_LOG_ALL; 3700 s->qid = act->qid; 3701 s->pqid = act->pqid; 3702 s->rtableid[pd->didx] = act->rtableid; 3703 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 3704 s->min_ttl = act->min_ttl; 3705 s->set_tos = act->set_tos; 3706 s->max_mss = act->max_mss; 3707 s->state_flags |= act->flags; 3708 #if NPFSYNC > 0 3709 s->sync_state = PFSYNC_S_NONE; 3710 #endif 3711 s->set_prio[0] = act->set_prio[0]; 3712 s->set_prio[1] = act->set_prio[1]; 3713 switch (pd->proto) { 3714 case IPPROTO_TCP: 3715 s->src.seqlo = ntohl(th->th_seq); 3716 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 3717 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 3718 r->keep_state == PF_STATE_MODULATE) { 3719 /* Generate sequence number modulator */ 3720 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 3721 0) 3722 s->src.seqdiff = 1; 3723 pf_change_a(&th->th_seq, &th->th_sum, 3724 htonl(s->src.seqlo + s->src.seqdiff), 0); 3725 *rewrite = 1; 3726 } else 3727 s->src.seqdiff = 0; 3728 if (th->th_flags & TH_SYN) { 3729 s->src.seqhi++; 3730 s->src.wscale = pf_get_wscale(pd); 3731 } 3732 s->src.max_win = MAX(ntohs(th->th_win), 1); 3733 if (s->src.wscale & PF_WSCALE_MASK) { 3734 /* Remove scale factor from initial window */ 3735 int win = s->src.max_win; 3736 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 3737 s->src.max_win = (win - 1) >> 3738 (s->src.wscale & PF_WSCALE_MASK); 3739 } 3740 if (th->th_flags & TH_FIN) 3741 s->src.seqhi++; 3742 s->dst.seqhi = 1; 3743 s->dst.max_win = 1; 3744 s->src.state = TCPS_SYN_SENT; 3745 s->dst.state = TCPS_CLOSED; 3746 s->timeout = PFTM_TCP_FIRST_PACKET; 3747 break; 3748 case IPPROTO_UDP: 3749 s->src.state = PFUDPS_SINGLE; 3750 s->dst.state = PFUDPS_NO_TRAFFIC; 3751 s->timeout = PFTM_UDP_FIRST_PACKET; 3752 break; 3753 case IPPROTO_ICMP: 3754 #ifdef INET6 3755 case IPPROTO_ICMPV6: 3756 #endif 3757 s->timeout = PFTM_ICMP_FIRST_PACKET; 3758 break; 3759 default: 3760 s->src.state = PFOTHERS_SINGLE; 3761 s->dst.state = PFOTHERS_NO_TRAFFIC; 3762 s->timeout = PFTM_OTHER_FIRST_PACKET; 3763 } 3764 3765 s->creation = time_uptime; 3766 s->expire = time_second; 3767 3768 if (pd->proto == IPPROTO_TCP) { 3769 if (s->state_flags & PFSTATE_SCRUB_TCP && 3770 pf_normalize_tcp_init(pd, &s->src, &s->dst)) { 3771 REASON_SET(&reason, PFRES_MEMORY); 3772 goto csfailed; 3773 } 3774 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 3775 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 3776 rewrite)) { 3777 /* This really shouldn't happen!!! */ 3778 DPFPRINTF(LOG_ERR, 3779 "pf_normalize_tcp_stateful failed on first pkt"); 3780 goto csfailed; 3781 } 3782 } 3783 s->direction = pd->dir; 3784 3785 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 3786 REASON_SET(&reason, PFRES_MEMORY); 3787 goto csfailed; 3788 } 3789 3790 if (pf_state_insert(BOUND_IFACE(r, pd->kif), *skw, *sks, s)) { 3791 pf_state_key_detach(s, PF_SK_STACK); 3792 pf_state_key_detach(s, PF_SK_WIRE); 3793 *sks = *skw = NULL; 3794 REASON_SET(&reason, PFRES_STATEINS); 3795 goto csfailed; 3796 } else 3797 *sm = s; 3798 3799 /* attach src nodes late, otherwise cleanup on error nontrivial */ 3800 for (i = 0; i < PF_SN_MAX; i++) 3801 if (sns[i] != NULL) { 3802 struct pf_sn_item *sni; 3803 3804 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 3805 if (sni == NULL) { 3806 REASON_SET(&reason, PFRES_MEMORY); 3807 pf_src_tree_remove_state(s); 3808 STATE_DEC_COUNTERS(s); 3809 pool_put(&pf_state_pl, s); 3810 return (PF_DROP); 3811 } 3812 sni->sn = sns[i]; 3813 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 3814 sni->sn->states++; 3815 } 3816 3817 pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ 3818 if (tag > 0) { 3819 pf_tag_ref(tag); 3820 s->tag = tag; 3821 } 3822 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 3823 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 3824 int rtid = pd->rdomain; 3825 if (act->rtableid >= 0) 3826 rtid = act->rtableid; 3827 s->src.state = PF_TCPS_PROXY_SRC; 3828 s->src.seqhi = htonl(arc4random()); 3829 /* Find mss option */ 3830 mss = pf_get_mss(pd); 3831 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 3832 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 3833 s->src.mss = mss; 3834 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 3835 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 3836 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain, 3837 NULL, NULL); 3838 REASON_SET(&reason, PFRES_SYNPROXY); 3839 return (PF_SYNPROXY_DROP); 3840 } 3841 3842 return (PF_PASS); 3843 3844 csfailed: 3845 for (i = 0; i < PF_SN_MAX; i++) 3846 if (sns[i] != NULL) 3847 pf_remove_src_node(sns[i]); 3848 if (s) { 3849 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 3850 pf_src_tree_remove_state(s); 3851 STATE_DEC_COUNTERS(s); 3852 pool_put(&pf_state_pl, s); 3853 } 3854 3855 return (PF_DROP); 3856 } 3857 3858 int 3859 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 3860 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 3861 int icmp_dir) 3862 { 3863 /* 3864 * when called from bpf_mtap_pflog, there are extra constraints: 3865 * -mbuf is faked, m_data is the bpf buffer 3866 * -pd is not fully set up 3867 */ 3868 int rewrite = 0; 3869 int afto = pd->af != pd->naf; 3870 3871 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 3872 pd->destchg = 1; 3873 3874 switch (pd->proto) { 3875 case IPPROTO_TCP: 3876 if (afto || PF_ANEQ(saddr, pd->src, pd->af) || 3877 *pd->sport != sport) { 3878 pf_change_ap(pd->src, pd->sport, &pd->hdr.tcp->th_sum, 3879 saddr, sport, 0, pd->af, pd->naf); 3880 rewrite = 1; 3881 } 3882 if (afto || PF_ANEQ(daddr, pd->dst, pd->af) || 3883 *pd->dport != dport) { 3884 pf_change_ap(pd->dst, pd->dport, &pd->hdr.tcp->th_sum, 3885 daddr, dport, 0, pd->af, pd->naf); 3886 rewrite = 1; 3887 } 3888 break; 3889 3890 case IPPROTO_UDP: 3891 if (afto || PF_ANEQ(saddr, pd->src, pd->af) || 3892 *pd->sport != sport) { 3893 pf_change_ap(pd->src, pd->sport, &pd->hdr.udp->uh_sum, 3894 saddr, sport, 1, pd->af, pd->naf); 3895 rewrite = 1; 3896 } 3897 if (afto || PF_ANEQ(daddr, pd->dst, pd->af) || 3898 *pd->dport != dport) { 3899 pf_change_ap(pd->dst, pd->dport, &pd->hdr.udp->uh_sum, 3900 daddr, dport, 1, pd->af, pd->naf); 3901 rewrite = 1; 3902 } 3903 break; 3904 3905 #ifdef INET 3906 case IPPROTO_ICMP: 3907 /* pf_translate() is also used when logging invalid packets */ 3908 if (pd->af != AF_INET) 3909 return (0); 3910 3911 if (afto) { 3912 #ifdef INET6 3913 if (pf_translate_icmp_af(AF_INET6, pd->hdr.icmp)) 3914 return (0); 3915 pd->proto = IPPROTO_ICMPV6; 3916 rewrite = 1; 3917 #endif /* INET6 */ 3918 } else { 3919 if (PF_ANEQ(saddr, pd->src, pd->af)) { 3920 pf_change_a(&pd->src->v4.s_addr, NULL, 3921 saddr->v4.s_addr, 0); 3922 rewrite = 1; 3923 } 3924 if (PF_ANEQ(daddr, pd->dst, pd->af)) { 3925 pf_change_a(&pd->dst->v4.s_addr, NULL, 3926 daddr->v4.s_addr, 0); 3927 rewrite = 1; 3928 } 3929 } 3930 if (virtual_type == htons(ICMP_ECHO)) { 3931 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 3932 3933 if (icmpid != pd->hdr.icmp->icmp_id) { 3934 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( 3935 pd->hdr.icmp->icmp_cksum, 3936 pd->hdr.icmp->icmp_id, icmpid, 0); 3937 pd->hdr.icmp->icmp_id = icmpid; 3938 rewrite = 1; 3939 } 3940 } 3941 break; 3942 #endif /* INET */ 3943 3944 #ifdef INET6 3945 case IPPROTO_ICMPV6: 3946 /* pf_translate() is also used when logging invalid packets */ 3947 if (pd->af != AF_INET6) 3948 return (0); 3949 3950 if (afto) { 3951 #ifdef INET 3952 /* ip_sum will be recalculated in pf_translate_af */ 3953 if (pf_translate_icmp_af(AF_INET, pd->hdr.icmp6)) 3954 return (0); 3955 pd->proto = IPPROTO_ICMP; 3956 rewrite = 1; 3957 #endif /* INET */ 3958 } else { 3959 if (PF_ANEQ(saddr, pd->src, pd->af)) { 3960 pf_change_a6(pd->src, 3961 &pd->hdr.icmp6->icmp6_cksum, saddr, 0); 3962 rewrite = 1; 3963 } 3964 if (PF_ANEQ(daddr, pd->dst, pd->af)) { 3965 pf_change_a6(pd->dst, 3966 &pd->hdr.icmp6->icmp6_cksum, daddr, 0); 3967 rewrite = 1; 3968 } 3969 } 3970 break; 3971 #endif /* INET6 */ 3972 3973 default: 3974 switch (pd->af) { 3975 #ifdef INET 3976 case AF_INET: 3977 if (!afto && PF_ANEQ(saddr, pd->src, pd->af)) { 3978 pf_change_a(&pd->src->v4.s_addr, NULL, 3979 saddr->v4.s_addr, 0); 3980 rewrite = 1; 3981 } 3982 if (!afto && PF_ANEQ(daddr, pd->dst, pd->af)) { 3983 pf_change_a(&pd->dst->v4.s_addr, NULL, 3984 daddr->v4.s_addr, 0); 3985 rewrite = 1; 3986 } 3987 break; 3988 #endif /* INET */ 3989 #ifdef INET6 3990 case AF_INET6: 3991 if (!afto && PF_ANEQ(saddr, pd->src, pd->af)) { 3992 pf_change_a6(pd->src, NULL, saddr, 0); 3993 rewrite = 1; 3994 } 3995 if (!afto && PF_ANEQ(daddr, pd->dst, pd->af)) { 3996 pf_change_a6(pd->dst, NULL, daddr, 0); 3997 rewrite = 1; 3998 } 3999 break; 4000 #endif /* INET6 */ 4001 } 4002 } 4003 return (rewrite); 4004 } 4005 4006 int 4007 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state_peer *src, 4008 struct pf_state_peer *dst, struct pf_state **state, u_short *reason, 4009 int *copyback) 4010 { 4011 struct tcphdr *th = pd->hdr.tcp; 4012 u_int16_t win = ntohs(th->th_win); 4013 u_int32_t ack, end, seq, orig_seq; 4014 u_int8_t sws, dws; 4015 int ackskew; 4016 4017 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4018 sws = src->wscale & PF_WSCALE_MASK; 4019 dws = dst->wscale & PF_WSCALE_MASK; 4020 } else 4021 sws = dws = 0; 4022 4023 /* 4024 * Sequence tracking algorithm from Guido van Rooij's paper: 4025 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4026 * tcp_filtering.ps 4027 */ 4028 4029 orig_seq = seq = ntohl(th->th_seq); 4030 if (src->seqlo == 0) { 4031 /* First packet from this end. Set its state */ 4032 4033 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4034 src->scrub == NULL) { 4035 if (pf_normalize_tcp_init(pd, src, dst)) { 4036 REASON_SET(reason, PFRES_MEMORY); 4037 return (PF_DROP); 4038 } 4039 } 4040 4041 /* Deferred generation of sequence number modulator */ 4042 if (dst->seqdiff && !src->seqdiff) { 4043 /* use random iss for the TCP server */ 4044 while ((src->seqdiff = arc4random() - seq) == 0) 4045 ; 4046 ack = ntohl(th->th_ack) - dst->seqdiff; 4047 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 4048 src->seqdiff), 0); 4049 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 4050 *copyback = 1; 4051 } else { 4052 ack = ntohl(th->th_ack); 4053 } 4054 4055 end = seq + pd->p_len; 4056 if (th->th_flags & TH_SYN) { 4057 end++; 4058 if (dst->wscale & PF_WSCALE_FLAG) { 4059 src->wscale = pf_get_wscale(pd); 4060 if (src->wscale & PF_WSCALE_FLAG) { 4061 /* Remove scale factor from initial 4062 * window */ 4063 sws = src->wscale & PF_WSCALE_MASK; 4064 win = ((u_int32_t)win + (1 << sws) - 1) 4065 >> sws; 4066 dws = dst->wscale & PF_WSCALE_MASK; 4067 } else { 4068 /* fixup other window */ 4069 dst->max_win <<= dst->wscale & 4070 PF_WSCALE_MASK; 4071 /* in case of a retrans SYN|ACK */ 4072 dst->wscale = 0; 4073 } 4074 } 4075 } 4076 if (th->th_flags & TH_FIN) 4077 end++; 4078 4079 src->seqlo = seq; 4080 if (src->state < TCPS_SYN_SENT) 4081 src->state = TCPS_SYN_SENT; 4082 4083 /* 4084 * May need to slide the window (seqhi may have been set by 4085 * the crappy stack check or if we picked up the connection 4086 * after establishment) 4087 */ 4088 if (src->seqhi == 1 || 4089 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4090 src->seqhi = end + MAX(1, dst->max_win << dws); 4091 if (win > src->max_win) 4092 src->max_win = win; 4093 4094 } else { 4095 ack = ntohl(th->th_ack) - dst->seqdiff; 4096 if (src->seqdiff) { 4097 /* Modulate sequence numbers */ 4098 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 4099 src->seqdiff), 0); 4100 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 4101 *copyback = 1; 4102 } 4103 end = seq + pd->p_len; 4104 if (th->th_flags & TH_SYN) 4105 end++; 4106 if (th->th_flags & TH_FIN) 4107 end++; 4108 } 4109 4110 if ((th->th_flags & TH_ACK) == 0) { 4111 /* Let it pass through the ack skew check */ 4112 ack = dst->seqlo; 4113 } else if ((ack == 0 && 4114 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4115 /* broken tcp stacks do not set ack */ 4116 (dst->state < TCPS_SYN_SENT)) { 4117 /* 4118 * Many stacks (ours included) will set the ACK number in an 4119 * FIN|ACK if the SYN times out -- no sequence to ACK. 4120 */ 4121 ack = dst->seqlo; 4122 } 4123 4124 if (seq == end) { 4125 /* Ease sequencing restrictions on no data packets */ 4126 seq = src->seqlo; 4127 end = seq; 4128 } 4129 4130 ackskew = dst->seqlo - ack; 4131 4132 4133 /* 4134 * Need to demodulate the sequence numbers in any TCP SACK options 4135 * (Selective ACK). We could optionally validate the SACK values 4136 * against the current ACK window, either forwards or backwards, but 4137 * I'm not confident that SACK has been implemented properly 4138 * everywhere. It wouldn't surprise me if several stacks accidently 4139 * SACK too far backwards of previously ACKed data. There really aren't 4140 * any security implications of bad SACKing unless the target stack 4141 * doesn't validate the option length correctly. Someone trying to 4142 * spoof into a TCP connection won't bother blindly sending SACK 4143 * options anyway. 4144 */ 4145 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4146 if (pf_modulate_sack(pd, dst)) 4147 *copyback = 1; 4148 } 4149 4150 4151 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4152 if (SEQ_GEQ(src->seqhi, end) && 4153 /* Last octet inside other's window space */ 4154 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4155 /* Retrans: not more than one window back */ 4156 (ackskew >= -MAXACKWINDOW) && 4157 /* Acking not more than one reassembled fragment backwards */ 4158 (ackskew <= (MAXACKWINDOW << sws)) && 4159 /* Acking not more than one window forward */ 4160 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4161 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4162 /* Require an exact/+1 sequence match on resets when possible */ 4163 4164 if (dst->scrub || src->scrub) { 4165 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4166 dst, copyback)) 4167 return (PF_DROP); 4168 } 4169 4170 /* update max window */ 4171 if (src->max_win < win) 4172 src->max_win = win; 4173 /* synchronize sequencing */ 4174 if (SEQ_GT(end, src->seqlo)) 4175 src->seqlo = end; 4176 /* slide the window of what the other end can send */ 4177 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4178 dst->seqhi = ack + MAX((win << sws), 1); 4179 4180 /* update states */ 4181 if (th->th_flags & TH_SYN) 4182 if (src->state < TCPS_SYN_SENT) 4183 src->state = TCPS_SYN_SENT; 4184 if (th->th_flags & TH_FIN) 4185 if (src->state < TCPS_CLOSING) 4186 src->state = TCPS_CLOSING; 4187 if (th->th_flags & TH_ACK) { 4188 if (dst->state == TCPS_SYN_SENT) { 4189 dst->state = TCPS_ESTABLISHED; 4190 if (src->state == TCPS_ESTABLISHED && 4191 !SLIST_EMPTY(&(*state)->src_nodes) && 4192 pf_src_connlimit(state)) { 4193 REASON_SET(reason, PFRES_SRCLIMIT); 4194 return (PF_DROP); 4195 } 4196 } else if (dst->state == TCPS_CLOSING) 4197 dst->state = TCPS_FIN_WAIT_2; 4198 } 4199 if (th->th_flags & TH_RST) 4200 src->state = dst->state = TCPS_TIME_WAIT; 4201 4202 /* update expire time */ 4203 (*state)->expire = time_second; 4204 if (src->state >= TCPS_FIN_WAIT_2 && 4205 dst->state >= TCPS_FIN_WAIT_2) 4206 (*state)->timeout = PFTM_TCP_CLOSED; 4207 else if (src->state >= TCPS_CLOSING && 4208 dst->state >= TCPS_CLOSING) 4209 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4210 else if (src->state < TCPS_ESTABLISHED || 4211 dst->state < TCPS_ESTABLISHED) 4212 (*state)->timeout = PFTM_TCP_OPENING; 4213 else if (src->state >= TCPS_CLOSING || 4214 dst->state >= TCPS_CLOSING) 4215 (*state)->timeout = PFTM_TCP_CLOSING; 4216 else 4217 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4218 4219 /* Fall through to PASS packet */ 4220 } else if ((dst->state < TCPS_SYN_SENT || 4221 dst->state >= TCPS_FIN_WAIT_2 || 4222 src->state >= TCPS_FIN_WAIT_2) && 4223 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && 4224 /* Within a window forward of the originating packet */ 4225 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4226 /* Within a window backward of the originating packet */ 4227 4228 /* 4229 * This currently handles three situations: 4230 * 1) Stupid stacks will shotgun SYNs before their peer 4231 * replies. 4232 * 2) When PF catches an already established stream (the 4233 * firewall rebooted, the state table was flushed, routes 4234 * changed...) 4235 * 3) Packets get funky immediately after the connection 4236 * closes (this should catch Solaris spurious ACK|FINs 4237 * that web servers like to spew after a close) 4238 * 4239 * This must be a little more careful than the above code 4240 * since packet floods will also be caught here. We don't 4241 * update the TTL here to mitigate the damage of a packet 4242 * flood and so the same code can handle awkward establishment 4243 * and a loosened connection close. 4244 * In the establishment case, a correct peer response will 4245 * validate the connection, go through the normal state code 4246 * and keep updating the state TTL. 4247 */ 4248 4249 if (pf_status.debug >= LOG_NOTICE) { 4250 log(LOG_NOTICE, "pf: loose state match: "); 4251 pf_print_state(*state); 4252 pf_print_flags(th->th_flags); 4253 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4254 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4255 pd->p_len, ackskew, (*state)->packets[0], 4256 (*state)->packets[1], 4257 pd->dir == PF_IN ? "in" : "out", 4258 pd->dir == (*state)->direction ? "fwd" : "rev"); 4259 } 4260 4261 if (dst->scrub || src->scrub) { 4262 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4263 dst, copyback)) 4264 return (PF_DROP); 4265 } 4266 4267 /* update max window */ 4268 if (src->max_win < win) 4269 src->max_win = win; 4270 /* synchronize sequencing */ 4271 if (SEQ_GT(end, src->seqlo)) 4272 src->seqlo = end; 4273 /* slide the window of what the other end can send */ 4274 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4275 dst->seqhi = ack + MAX((win << sws), 1); 4276 4277 /* 4278 * Cannot set dst->seqhi here since this could be a shotgunned 4279 * SYN and not an already established connection. 4280 */ 4281 if (th->th_flags & TH_FIN) 4282 if (src->state < TCPS_CLOSING) 4283 src->state = TCPS_CLOSING; 4284 if (th->th_flags & TH_RST) 4285 src->state = dst->state = TCPS_TIME_WAIT; 4286 4287 /* Fall through to PASS packet */ 4288 } else { 4289 if ((*state)->dst.state == TCPS_SYN_SENT && 4290 (*state)->src.state == TCPS_SYN_SENT) { 4291 /* Send RST for state mismatches during handshake */ 4292 if (!(th->th_flags & TH_RST)) 4293 pf_send_tcp((*state)->rule.ptr, pd->af, 4294 pd->dst, pd->src, th->th_dport, 4295 th->th_sport, ntohl(th->th_ack), 0, 4296 TH_RST, 0, 0, 4297 (*state)->rule.ptr->return_ttl, 1, 0, 4298 pd->rdomain, pd->eh, pd->kif->pfik_ifp); 4299 src->seqlo = 0; 4300 src->seqhi = 1; 4301 src->max_win = 1; 4302 } else if (pf_status.debug >= LOG_NOTICE) { 4303 log(LOG_NOTICE, "pf: BAD state: "); 4304 pf_print_state(*state); 4305 pf_print_flags(th->th_flags); 4306 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4307 "pkts=%llu:%llu dir=%s,%s\n", 4308 seq, orig_seq, ack, pd->p_len, ackskew, 4309 (*state)->packets[0], (*state)->packets[1], 4310 pd->dir == PF_IN ? "in" : "out", 4311 pd->dir == (*state)->direction ? "fwd" : "rev"); 4312 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4313 SEQ_GEQ(src->seqhi, end) ? ' ' : '1', 4314 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4315 ' ': '2', 4316 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4317 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4318 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', 4319 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4320 } 4321 REASON_SET(reason, PFRES_BADSTATE); 4322 return (PF_DROP); 4323 } 4324 4325 return (PF_PASS); 4326 } 4327 4328 int 4329 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state_peer *src, 4330 struct pf_state_peer *dst, struct pf_state **state, u_short *reason) 4331 { 4332 struct tcphdr *th = pd->hdr.tcp; 4333 4334 if (th->th_flags & TH_SYN) 4335 if (src->state < TCPS_SYN_SENT) 4336 src->state = TCPS_SYN_SENT; 4337 if (th->th_flags & TH_FIN) 4338 if (src->state < TCPS_CLOSING) 4339 src->state = TCPS_CLOSING; 4340 if (th->th_flags & TH_ACK) { 4341 if (dst->state == TCPS_SYN_SENT) { 4342 dst->state = TCPS_ESTABLISHED; 4343 if (src->state == TCPS_ESTABLISHED && 4344 !SLIST_EMPTY(&(*state)->src_nodes) && 4345 pf_src_connlimit(state)) { 4346 REASON_SET(reason, PFRES_SRCLIMIT); 4347 return (PF_DROP); 4348 } 4349 } else if (dst->state == TCPS_CLOSING) { 4350 dst->state = TCPS_FIN_WAIT_2; 4351 } else if (src->state == TCPS_SYN_SENT && 4352 dst->state < TCPS_SYN_SENT) { 4353 /* 4354 * Handle a special sloppy case where we only see one 4355 * half of the connection. If there is a ACK after 4356 * the initial SYN without ever seeing a packet from 4357 * the destination, set the connection to established. 4358 */ 4359 dst->state = src->state = TCPS_ESTABLISHED; 4360 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4361 pf_src_connlimit(state)) { 4362 REASON_SET(reason, PFRES_SRCLIMIT); 4363 return (PF_DROP); 4364 } 4365 } else if (src->state == TCPS_CLOSING && 4366 dst->state == TCPS_ESTABLISHED && 4367 dst->seqlo == 0) { 4368 /* 4369 * Handle the closing of half connections where we 4370 * don't see the full bidirectional FIN/ACK+ACK 4371 * handshake. 4372 */ 4373 dst->state = TCPS_CLOSING; 4374 } 4375 } 4376 if (th->th_flags & TH_RST) 4377 src->state = dst->state = TCPS_TIME_WAIT; 4378 4379 /* update expire time */ 4380 (*state)->expire = time_second; 4381 if (src->state >= TCPS_FIN_WAIT_2 && 4382 dst->state >= TCPS_FIN_WAIT_2) 4383 (*state)->timeout = PFTM_TCP_CLOSED; 4384 else if (src->state >= TCPS_CLOSING && 4385 dst->state >= TCPS_CLOSING) 4386 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4387 else if (src->state < TCPS_ESTABLISHED || 4388 dst->state < TCPS_ESTABLISHED) 4389 (*state)->timeout = PFTM_TCP_OPENING; 4390 else if (src->state >= TCPS_CLOSING || 4391 dst->state >= TCPS_CLOSING) 4392 (*state)->timeout = PFTM_TCP_CLOSING; 4393 else 4394 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4395 4396 return (PF_PASS); 4397 } 4398 4399 int 4400 pf_test_state_tcp(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4401 { 4402 struct pf_state_key_cmp key; 4403 struct tcphdr *th = pd->hdr.tcp; 4404 int copyback = 0; 4405 struct pf_state_peer *src, *dst; 4406 struct pf_state_key *sk; 4407 int action = PF_PASS; 4408 4409 key.af = pd->af; 4410 key.proto = IPPROTO_TCP; 4411 key.rdomain = pd->rdomain; 4412 PF_ACPY(&key.addr[pd->sidx], pd->src, key.af); 4413 PF_ACPY(&key.addr[pd->didx], pd->dst, key.af); 4414 key.port[pd->sidx] = th->th_sport; 4415 key.port[pd->didx] = th->th_dport; 4416 4417 STATE_LOOKUP(pd->kif, &key, pd->dir, *state, pd->m); 4418 4419 if (pd->dir == (*state)->direction) { 4420 src = &(*state)->src; 4421 dst = &(*state)->dst; 4422 } else { 4423 src = &(*state)->dst; 4424 dst = &(*state)->src; 4425 } 4426 4427 sk = (*state)->key[pd->didx]; 4428 4429 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4430 if (pd->dir != (*state)->direction) { 4431 REASON_SET(reason, PFRES_SYNPROXY); 4432 return (PF_SYNPROXY_DROP); 4433 } 4434 if (th->th_flags & TH_SYN) { 4435 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4436 REASON_SET(reason, PFRES_SYNPROXY); 4437 return (PF_DROP); 4438 } 4439 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4440 pd->src, th->th_dport, th->th_sport, 4441 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4442 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4443 0, pd->rdomain, NULL, NULL); 4444 REASON_SET(reason, PFRES_SYNPROXY); 4445 return (PF_SYNPROXY_DROP); 4446 } else if (!(th->th_flags & TH_ACK) || 4447 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4448 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4449 REASON_SET(reason, PFRES_SYNPROXY); 4450 return (PF_DROP); 4451 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4452 pf_src_connlimit(state)) { 4453 REASON_SET(reason, PFRES_SRCLIMIT); 4454 return (PF_DROP); 4455 } else 4456 (*state)->src.state = PF_TCPS_PROXY_DST; 4457 } 4458 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4459 if (pd->dir == (*state)->direction) { 4460 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4461 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4462 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4463 REASON_SET(reason, PFRES_SYNPROXY); 4464 return (PF_DROP); 4465 } 4466 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4467 if ((*state)->dst.seqhi == 1) 4468 (*state)->dst.seqhi = htonl(arc4random()); 4469 pf_send_tcp((*state)->rule.ptr, pd->af, 4470 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4471 sk->port[pd->sidx], sk->port[pd->didx], 4472 (*state)->dst.seqhi, 0, TH_SYN, 0, 4473 (*state)->src.mss, 0, 0, (*state)->tag, 4474 sk->rdomain, NULL, NULL); 4475 REASON_SET(reason, PFRES_SYNPROXY); 4476 return (PF_SYNPROXY_DROP); 4477 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4478 (TH_SYN|TH_ACK)) || 4479 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4480 REASON_SET(reason, PFRES_SYNPROXY); 4481 return (PF_DROP); 4482 } else { 4483 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4484 (*state)->dst.seqlo = ntohl(th->th_seq); 4485 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4486 pd->src, th->th_dport, th->th_sport, 4487 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4488 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4489 (*state)->tag, pd->rdomain, NULL, NULL); 4490 pf_send_tcp((*state)->rule.ptr, pd->af, 4491 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4492 sk->port[pd->sidx], sk->port[pd->didx], 4493 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4494 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4495 0, sk->rdomain, NULL, NULL); 4496 (*state)->src.seqdiff = (*state)->dst.seqhi - 4497 (*state)->src.seqlo; 4498 (*state)->dst.seqdiff = (*state)->src.seqhi - 4499 (*state)->dst.seqlo; 4500 (*state)->src.seqhi = (*state)->src.seqlo + 4501 (*state)->dst.max_win; 4502 (*state)->dst.seqhi = (*state)->dst.seqlo + 4503 (*state)->src.max_win; 4504 (*state)->src.wscale = (*state)->dst.wscale = 0; 4505 (*state)->src.state = (*state)->dst.state = 4506 TCPS_ESTABLISHED; 4507 REASON_SET(reason, PFRES_SYNPROXY); 4508 return (PF_SYNPROXY_DROP); 4509 } 4510 } 4511 4512 if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && 4513 dst->state >= TCPS_FIN_WAIT_2 && 4514 src->state >= TCPS_FIN_WAIT_2) { 4515 if (pf_status.debug >= LOG_NOTICE) { 4516 log(LOG_NOTICE, "pf: state reuse "); 4517 pf_print_state(*state); 4518 pf_print_flags(th->th_flags); 4519 addlog("\n"); 4520 } 4521 /* XXX make sure it's the same direction ?? */ 4522 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 4523 pf_unlink_state(*state); 4524 *state = NULL; 4525 return (PF_DROP); 4526 } 4527 4528 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4529 if (pf_tcp_track_sloppy(pd, src, dst, state, reason) == PF_DROP) 4530 return (PF_DROP); 4531 } else { 4532 int ret; 4533 4534 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4535 ret = pf_tcp_track_full(pd, dst, src, state, 4536 reason, ©back); 4537 else 4538 ret = pf_tcp_track_full(pd, src, dst, state, 4539 reason, ©back); 4540 if (ret == PF_DROP) 4541 return (PF_DROP); 4542 } 4543 4544 /* translate source/destination address, if necessary */ 4545 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4546 struct pf_state_key *nk; 4547 int afto, sidx, didx; 4548 4549 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4550 nk = (*state)->key[pd->sidx]; 4551 else 4552 nk = (*state)->key[pd->didx]; 4553 4554 afto = pd->af != nk->af; 4555 sidx = afto ? pd->didx : pd->sidx; 4556 didx = afto ? pd->sidx : pd->didx; 4557 4558 if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) || 4559 nk->port[sidx] != th->th_sport) 4560 pf_change_ap(pd->src, &th->th_sport, &th->th_sum, 4561 &nk->addr[sidx], nk->port[sidx], 0, pd->af, nk->af); 4562 4563 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4564 pd->rdomain != nk->rdomain) 4565 pd->destchg = 1; 4566 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4567 nk->port[didx] != th->th_dport) 4568 pf_change_ap(pd->dst, &th->th_dport, &th->th_sum, 4569 &nk->addr[didx], nk->port[didx], 0, pd->af, 4570 nk->af); 4571 pd->m->m_pkthdr.rdomain = nk->rdomain; 4572 4573 #if INET && INET6 4574 if (afto) { 4575 PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); 4576 PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); 4577 pd->naf = nk->af; 4578 action = PF_AFRT; 4579 } 4580 #endif /* INET && INET6 */ 4581 4582 copyback = 1; 4583 } 4584 4585 /* Copyback sequence modulation or stateful scrub changes if needed */ 4586 if (copyback) 4587 m_copyback(pd->m, pd->off, sizeof(*th), th, M_NOWAIT); 4588 4589 return (action); 4590 } 4591 4592 int 4593 pf_test_state_udp(struct pf_pdesc *pd, struct pf_state **state) 4594 { 4595 struct pf_state_peer *src, *dst; 4596 struct pf_state_key_cmp key; 4597 struct udphdr *uh = pd->hdr.udp; 4598 int action = PF_PASS; 4599 4600 key.af = pd->af; 4601 key.proto = IPPROTO_UDP; 4602 key.rdomain = pd->rdomain; 4603 PF_ACPY(&key.addr[pd->sidx], pd->src, key.af); 4604 PF_ACPY(&key.addr[pd->didx], pd->dst, key.af); 4605 key.port[pd->sidx] = uh->uh_sport; 4606 key.port[pd->didx] = uh->uh_dport; 4607 4608 STATE_LOOKUP(pd->kif, &key, pd->dir, *state, pd->m); 4609 4610 if (pd->dir == (*state)->direction) { 4611 src = &(*state)->src; 4612 dst = &(*state)->dst; 4613 } else { 4614 src = &(*state)->dst; 4615 dst = &(*state)->src; 4616 } 4617 4618 /* update states */ 4619 if (src->state < PFUDPS_SINGLE) 4620 src->state = PFUDPS_SINGLE; 4621 if (dst->state == PFUDPS_SINGLE) 4622 dst->state = PFUDPS_MULTIPLE; 4623 4624 /* update expire time */ 4625 (*state)->expire = time_second; 4626 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) 4627 (*state)->timeout = PFTM_UDP_MULTIPLE; 4628 else 4629 (*state)->timeout = PFTM_UDP_SINGLE; 4630 4631 /* translate source/destination address, if necessary */ 4632 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4633 struct pf_state_key *nk; 4634 int afto, sidx, didx; 4635 4636 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4637 nk = (*state)->key[pd->sidx]; 4638 else 4639 nk = (*state)->key[pd->didx]; 4640 4641 afto = pd->af != nk->af; 4642 sidx = afto ? pd->didx : pd->sidx; 4643 didx = afto ? pd->sidx : pd->didx; 4644 4645 if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) || 4646 nk->port[sidx] != uh->uh_sport) 4647 pf_change_ap(pd->src, &uh->uh_sport, &uh->uh_sum, 4648 &nk->addr[sidx], nk->port[sidx], 1, pd->af, nk->af); 4649 4650 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4651 pd->rdomain != nk->rdomain) 4652 pd->destchg = 1; 4653 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4654 nk->port[didx] != uh->uh_dport) 4655 pf_change_ap(pd->dst, &uh->uh_dport, &uh->uh_sum, 4656 &nk->addr[didx], nk->port[didx], 1, pd->af, nk->af); 4657 pd->m->m_pkthdr.rdomain = nk->rdomain; 4658 4659 #if INET && INET6 4660 if (afto) { 4661 PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); 4662 PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); 4663 pd->naf = nk->af; 4664 action = PF_AFRT; 4665 } 4666 #endif /* INET && INET6 */ 4667 4668 m_copyback(pd->m, pd->off, sizeof(*uh), uh, M_NOWAIT); 4669 } 4670 4671 return (action); 4672 } 4673 4674 int 4675 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 4676 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 4677 int icmp_dir, int *iidx, int multi, int inner) 4678 { 4679 int direction; 4680 4681 key->af = pd->af; 4682 key->proto = pd->proto; 4683 key->rdomain = pd->rdomain; 4684 if (icmp_dir == PF_IN) { 4685 *iidx = pd->sidx; 4686 key->port[pd->sidx] = icmpid; 4687 key->port[pd->didx] = type; 4688 } else { 4689 *iidx = pd->didx; 4690 key->port[pd->sidx] = type; 4691 key->port[pd->didx] = icmpid; 4692 } 4693 4694 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 4695 pd->dst, pd->af, multi)) 4696 return (PF_DROP); 4697 4698 STATE_LOOKUP(pd->kif, key, pd->dir, *state, pd->m); 4699 4700 /* Is this ICMP message flowing in right direction? */ 4701 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 4702 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 4703 PF_IN : PF_OUT; 4704 else 4705 direction = (*state)->direction; 4706 if ((((!inner && direction == pd->dir) || 4707 (inner && direction != pd->dir)) ? 4708 PF_IN : PF_OUT) != icmp_dir) { 4709 if (pf_status.debug >= LOG_NOTICE) { 4710 log(LOG_NOTICE, 4711 "pf: icmp type %d in wrong direction (%d): ", 4712 ntohs(type), icmp_dir); 4713 pf_print_state(*state); 4714 addlog("\n"); 4715 } 4716 return (PF_DROP); 4717 } 4718 return (-1); 4719 } 4720 4721 int 4722 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 4723 u_short *reason) 4724 { 4725 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 4726 u_int16_t *icmpsum, virtual_id, virtual_type; 4727 u_int8_t icmptype; 4728 int icmp_dir, iidx, ret, multi; 4729 struct pf_state_key_cmp key; 4730 4731 switch (pd->proto) { 4732 #ifdef INET 4733 case IPPROTO_ICMP: 4734 icmptype = pd->hdr.icmp->icmp_type; 4735 icmpsum = &pd->hdr.icmp->icmp_cksum; 4736 break; 4737 #endif /* INET */ 4738 #ifdef INET6 4739 case IPPROTO_ICMPV6: 4740 icmptype = pd->hdr.icmp6->icmp6_type; 4741 icmpsum = &pd->hdr.icmp6->icmp6_cksum; 4742 break; 4743 #endif /* INET6 */ 4744 } 4745 4746 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &multi, 4747 &virtual_id, &virtual_type) == 0) { 4748 /* 4749 * ICMP query/reply message not related to a TCP/UDP packet. 4750 * Search for an ICMP state. 4751 */ 4752 ret = pf_icmp_state_lookup(pd, &key, state, 4753 virtual_id, virtual_type, icmp_dir, &iidx, 4754 PF_ICMP_MULTI_NONE, 0); 4755 if (ret >= 0) { 4756 if (ret == PF_DROP && pd->af == AF_INET6 && 4757 icmp_dir == PF_OUT) { 4758 ret = pf_icmp_state_lookup(pd, &key, state, 4759 virtual_id, virtual_type, icmp_dir, &iidx, 4760 multi, 0); 4761 if (ret >= 0) 4762 return (ret); 4763 } else 4764 return (ret); 4765 } 4766 4767 (*state)->expire = time_second; 4768 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 4769 4770 /* translate source/destination address, if necessary */ 4771 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4772 struct pf_state_key *nk; 4773 int afto, sidx, didx; 4774 4775 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4776 nk = (*state)->key[pd->sidx]; 4777 else 4778 nk = (*state)->key[pd->didx]; 4779 4780 afto = pd->af != nk->af; 4781 sidx = afto ? pd->didx : pd->sidx; 4782 didx = afto ? pd->sidx : pd->didx; 4783 iidx = afto ? !iidx : iidx; 4784 4785 if (pd->rdomain != nk->rdomain) 4786 pd->destchg = 1; 4787 pd->m->m_pkthdr.rdomain = nk->rdomain; 4788 4789 switch (pd->af) { 4790 #ifdef INET 4791 case AF_INET: 4792 #ifdef INET6 4793 if (afto) { 4794 if (pf_translate_icmp_af(AF_INET6, 4795 pd->hdr.icmp)) 4796 return (PF_DROP); 4797 pd->proto = IPPROTO_ICMPV6; 4798 } 4799 #endif /* INET6 */ 4800 if (!afto && PF_ANEQ(pd->src, 4801 &nk->addr[sidx], AF_INET)) 4802 pf_change_a(&saddr->v4.s_addr, NULL, 4803 nk->addr[sidx].v4.s_addr, 0); 4804 4805 if (!afto && PF_ANEQ(pd->dst, 4806 &nk->addr[didx], AF_INET)) { 4807 pf_change_a(&daddr->v4.s_addr, NULL, 4808 nk->addr[didx].v4.s_addr, 0); 4809 pd->destchg = 1; 4810 } 4811 4812 if (nk->port[iidx] != 4813 pd->hdr.icmp->icmp_id) { 4814 pd->hdr.icmp->icmp_cksum = 4815 pf_cksum_fixup( 4816 pd->hdr.icmp->icmp_cksum, 4817 pd->hdr.icmp->icmp_id, 4818 nk->port[iidx], 0); 4819 pd->hdr.icmp->icmp_id = nk->port[iidx]; 4820 } 4821 4822 m_copyback(pd->m, pd->off, ICMP_MINLEN, 4823 pd->hdr.icmp, M_NOWAIT); 4824 break; 4825 #endif /* INET */ 4826 #ifdef INET6 4827 case AF_INET6: 4828 #ifdef INET 4829 if (afto) { 4830 if (pf_translate_icmp_af(AF_INET, 4831 pd->hdr.icmp6)) 4832 return (PF_DROP); 4833 pd->proto = IPPROTO_ICMP; 4834 } 4835 #endif /* INET */ 4836 if (!afto && PF_ANEQ(pd->src, 4837 &nk->addr[sidx], AF_INET6)) 4838 pf_change_a6(saddr, 4839 &pd->hdr.icmp6->icmp6_cksum, 4840 &nk->addr[sidx], 0); 4841 4842 if (!afto && PF_ANEQ(pd->dst, 4843 &nk->addr[didx], AF_INET6)) { 4844 pf_change_a6(daddr, 4845 &pd->hdr.icmp6->icmp6_cksum, 4846 &nk->addr[didx], 0); 4847 pd->destchg = 1; 4848 } 4849 4850 if (nk->port[iidx] != pd->hdr.icmp6->icmp6_id) 4851 pd->hdr.icmp6->icmp6_id = 4852 nk->port[iidx]; 4853 4854 m_copyback(pd->m, pd->off, 4855 sizeof(struct icmp6_hdr), pd->hdr.icmp6, 4856 M_NOWAIT); 4857 break; 4858 #endif /* INET6 */ 4859 } 4860 #if INET && INET6 4861 if (afto) { 4862 PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); 4863 PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); 4864 pd->naf = nk->af; 4865 return (PF_AFRT); 4866 } 4867 #endif /* INET && INET6 */ 4868 } 4869 return (PF_PASS); 4870 4871 } else { 4872 /* 4873 * ICMP error message in response to a TCP/UDP packet. 4874 * Extract the inner TCP/UDP header and search for that state. 4875 */ 4876 struct pf_pdesc pd2; 4877 #ifdef INET 4878 struct ip h2; 4879 #endif /* INET */ 4880 #ifdef INET6 4881 struct ip6_hdr h2_6; 4882 #endif /* INET6 */ 4883 u_int16_t *ipsum2; 4884 int ipoff2; 4885 4886 /* Initialize pd2 fields valid for both packets with pd. */ 4887 bzero(&pd2, sizeof(pd2)); 4888 pd2.af = pd->af; 4889 pd2.dir = pd->dir; 4890 pd2.kif = pd->kif; 4891 pd2.m = pd->m; 4892 pd2.rdomain = pd->rdomain; 4893 /* Payload packet is from the opposite direction. */ 4894 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 4895 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 4896 switch (pd->af) { 4897 #ifdef INET 4898 case AF_INET: 4899 /* offset of h2 in mbuf chain */ 4900 ipoff2 = pd->off + ICMP_MINLEN; 4901 4902 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 4903 NULL, reason, pd2.af)) { 4904 DPFPRINTF(LOG_NOTICE, 4905 "ICMP error message too short (ip)"); 4906 return (PF_DROP); 4907 } 4908 /* 4909 * ICMP error messages don't refer to non-first 4910 * fragments 4911 */ 4912 if (h2.ip_off & htons(IP_OFFMASK)) { 4913 REASON_SET(reason, PFRES_FRAG); 4914 return (PF_DROP); 4915 } 4916 4917 /* offset of protocol header that follows h2 */ 4918 pd2.off = ipoff2 + (h2.ip_hl << 2); 4919 4920 pd2.proto = h2.ip_p; 4921 pd2.tot_len = ntohs(h2.ip_len); 4922 pd2.src = (struct pf_addr *)&h2.ip_src; 4923 pd2.dst = (struct pf_addr *)&h2.ip_dst; 4924 ipsum2 = &h2.ip_sum; 4925 break; 4926 #endif /* INET */ 4927 #ifdef INET6 4928 case AF_INET6: 4929 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 4930 4931 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 4932 NULL, reason, pd2.af)) { 4933 DPFPRINTF(LOG_NOTICE, 4934 "ICMP error message too short (ip6)"); 4935 return (PF_DROP); 4936 } 4937 4938 pd2.off = ipoff2; 4939 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 4940 return (PF_DROP); 4941 4942 pd2.tot_len = ntohs(h2_6.ip6_plen) + 4943 sizeof(struct ip6_hdr); 4944 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 4945 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 4946 ipsum2 = NULL; 4947 break; 4948 #endif /* INET6 */ 4949 } 4950 4951 switch (pd2.proto) { 4952 case IPPROTO_TCP: { 4953 struct tcphdr th; 4954 u_int32_t seq; 4955 struct pf_state_peer *src, *dst; 4956 u_int8_t dws; 4957 int copyback = 0; 4958 4959 /* 4960 * Only the first 8 bytes of the TCP header can be 4961 * expected. Don't access any TCP header fields after 4962 * th_seq, an ackskew test is not possible. 4963 */ 4964 if (!pf_pull_hdr(pd2.m, pd2.off, &th, 8, NULL, reason, 4965 pd2.af)) { 4966 DPFPRINTF(LOG_NOTICE, 4967 "ICMP error message too short (tcp)"); 4968 return (PF_DROP); 4969 } 4970 4971 key.af = pd2.af; 4972 key.proto = IPPROTO_TCP; 4973 key.rdomain = pd2.rdomain; 4974 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 4975 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 4976 key.port[pd2.sidx] = th.th_sport; 4977 key.port[pd2.didx] = th.th_dport; 4978 4979 STATE_LOOKUP(pd2.kif, &key, pd2.dir, *state, pd2.m); 4980 4981 if (pd2.dir == (*state)->direction) { 4982 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 4983 src = &(*state)->src; 4984 dst = &(*state)->dst; 4985 } else { 4986 src = &(*state)->dst; 4987 dst = &(*state)->src; 4988 } 4989 } else { 4990 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 4991 src = &(*state)->dst; 4992 dst = &(*state)->src; 4993 } else { 4994 src = &(*state)->src; 4995 dst = &(*state)->dst; 4996 } 4997 } 4998 4999 if (src->wscale && dst->wscale) 5000 dws = dst->wscale & PF_WSCALE_MASK; 5001 else 5002 dws = 0; 5003 5004 /* Demodulate sequence number */ 5005 seq = ntohl(th.th_seq) - src->seqdiff; 5006 if (src->seqdiff) { 5007 pf_change_a(&th.th_seq, icmpsum, 5008 htonl(seq), 0); 5009 copyback = 1; 5010 } 5011 5012 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5013 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5014 src->seqlo - (dst->max_win << dws)))) { 5015 if (pf_status.debug >= LOG_NOTICE) { 5016 log(LOG_NOTICE, 5017 "pf: BAD ICMP %d:%d ", 5018 icmptype, pd->hdr.icmp->icmp_code); 5019 pf_print_host(pd->src, 0, pd->af); 5020 addlog(" -> "); 5021 pf_print_host(pd->dst, 0, pd->af); 5022 addlog(" state: "); 5023 pf_print_state(*state); 5024 addlog(" seq=%u\n", seq); 5025 } 5026 REASON_SET(reason, PFRES_BADSTATE); 5027 return (PF_DROP); 5028 } else { 5029 if (pf_status.debug >= LOG_DEBUG) { 5030 log(LOG_DEBUG, 5031 "pf: OK ICMP %d:%d ", 5032 icmptype, pd->hdr.icmp->icmp_code); 5033 pf_print_host(pd->src, 0, pd->af); 5034 addlog(" -> "); 5035 pf_print_host(pd->dst, 0, pd->af); 5036 addlog(" state: "); 5037 pf_print_state(*state); 5038 addlog(" seq=%u\n", seq); 5039 } 5040 } 5041 5042 /* translate source/destination address, if necessary */ 5043 if ((*state)->key[PF_SK_WIRE] != 5044 (*state)->key[PF_SK_STACK]) { 5045 struct pf_state_key *nk; 5046 int afto, sidx, didx; 5047 5048 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5049 nk = (*state)->key[pd->sidx]; 5050 else 5051 nk = (*state)->key[pd->didx]; 5052 5053 afto = pd->af != nk->af; 5054 sidx = afto ? pd2.didx : pd2.sidx; 5055 didx = afto ? pd2.sidx : pd2.didx; 5056 5057 #if INET && INET6 5058 if (afto) { 5059 if (pf_translate_icmp_af(nk->af, 5060 pd->hdr.icmp)) 5061 return (PF_DROP); 5062 m_copyback(pd->m, pd->off, 5063 sizeof(struct icmp6_hdr), 5064 pd->hdr.icmp6, M_NOWAIT); 5065 if (pf_change_icmp_af(pd->m, ipoff2, 5066 pd, &pd2, &nk->addr[sidx], 5067 &nk->addr[didx], pd->af, nk->af)) 5068 return (PF_DROP); 5069 if (nk->af == AF_INET) 5070 pd->proto = IPPROTO_ICMP; 5071 else 5072 pd->proto = IPPROTO_ICMPV6; 5073 th.th_sport = nk->port[sidx]; 5074 th.th_dport = nk->port[didx]; 5075 m_copyback(pd2.m, pd2.off, 8, &th, 5076 M_NOWAIT); 5077 pd->m->m_pkthdr.rdomain = nk->rdomain; 5078 pd->destchg = 1; 5079 PF_ACPY(&pd->nsaddr, 5080 &nk->addr[pd2.sidx], nk->af); 5081 PF_ACPY(&pd->ndaddr, 5082 &nk->addr[pd2.didx], nk->af); 5083 pd->naf = nk->af; 5084 return (PF_AFRT); 5085 } 5086 #endif 5087 if (PF_ANEQ(pd2.src, 5088 &nk->addr[pd2.sidx], pd2.af) || 5089 nk->port[pd2.sidx] != th.th_sport) 5090 pf_change_icmp(pd2.src, &th.th_sport, 5091 daddr, &nk->addr[pd2.sidx], 5092 nk->port[pd2.sidx], NULL, 5093 ipsum2, icmpsum, 0, pd2.af); 5094 5095 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5096 pd2.af) || pd2.rdomain != nk->rdomain) 5097 pd->destchg = 1; 5098 pd->m->m_pkthdr.rdomain = nk->rdomain; 5099 5100 if (PF_ANEQ(pd2.dst, 5101 &nk->addr[pd2.didx], pd2.af) || 5102 nk->port[pd2.didx] != th.th_dport) 5103 pf_change_icmp(pd2.dst, &th.th_dport, 5104 saddr, &nk->addr[pd2.didx], 5105 nk->port[pd2.didx], NULL, 5106 ipsum2, icmpsum, 0, pd2.af); 5107 copyback = 1; 5108 } 5109 5110 if (copyback) { 5111 switch (pd2.af) { 5112 #ifdef INET 5113 case AF_INET: 5114 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5115 pd->hdr.icmp, M_NOWAIT); 5116 m_copyback(pd2.m, ipoff2, sizeof(h2), 5117 &h2, M_NOWAIT); 5118 break; 5119 #endif /* INET */ 5120 #ifdef INET6 5121 case AF_INET6: 5122 m_copyback(pd->m, pd->off, 5123 sizeof(struct icmp6_hdr), 5124 pd->hdr.icmp6, M_NOWAIT); 5125 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5126 &h2_6, M_NOWAIT); 5127 break; 5128 #endif /* INET6 */ 5129 } 5130 m_copyback(pd2.m, pd2.off, 8, &th, M_NOWAIT); 5131 } 5132 5133 return (PF_PASS); 5134 break; 5135 } 5136 case IPPROTO_UDP: { 5137 struct udphdr uh; 5138 5139 if (!pf_pull_hdr(pd2.m, pd2.off, &uh, sizeof(uh), 5140 NULL, reason, pd2.af)) { 5141 DPFPRINTF(LOG_NOTICE, 5142 "ICMP error message too short (udp)"); 5143 return (PF_DROP); 5144 } 5145 5146 key.af = pd2.af; 5147 key.proto = IPPROTO_UDP; 5148 key.rdomain = pd2.rdomain; 5149 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5150 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5151 key.port[pd2.sidx] = uh.uh_sport; 5152 key.port[pd2.didx] = uh.uh_dport; 5153 5154 STATE_LOOKUP(pd2.kif, &key, pd2.dir, *state, pd2.m); 5155 5156 /* translate source/destination address, if necessary */ 5157 if ((*state)->key[PF_SK_WIRE] != 5158 (*state)->key[PF_SK_STACK]) { 5159 struct pf_state_key *nk; 5160 int afto, sidx, didx; 5161 5162 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5163 nk = (*state)->key[pd->sidx]; 5164 else 5165 nk = (*state)->key[pd->didx]; 5166 5167 afto = pd->af != nk->af; 5168 sidx = afto ? pd2.didx : pd2.sidx; 5169 didx = afto ? pd2.sidx : pd2.didx; 5170 5171 #if INET && INET6 5172 if (afto) { 5173 if (pf_translate_icmp_af(nk->af, 5174 pd->hdr.icmp)) 5175 return (PF_DROP); 5176 m_copyback(pd->m, pd->off, 5177 sizeof(struct icmp6_hdr), 5178 pd->hdr.icmp6, M_NOWAIT); 5179 if (pf_change_icmp_af(pd->m, ipoff2, 5180 pd, &pd2, &nk->addr[sidx], 5181 &nk->addr[didx], pd->af, nk->af)) 5182 return (PF_DROP); 5183 if (nk->af == AF_INET) 5184 pd->proto = IPPROTO_ICMP; 5185 else 5186 pd->proto = IPPROTO_ICMPV6; 5187 pf_change_ap(pd2.src, &uh.uh_sport, 5188 &uh.uh_sum, &nk->addr[pd2.sidx], 5189 nk->port[sidx], 1, pd->af, nk->af); 5190 pf_change_ap(pd2.dst, &uh.uh_dport, 5191 &uh.uh_sum, &nk->addr[pd2.didx], 5192 nk->port[didx], 1, pd->af, nk->af); 5193 m_copyback(pd2.m, pd2.off, sizeof(uh), 5194 &uh, M_NOWAIT); 5195 pd->m->m_pkthdr.rdomain = nk->rdomain; 5196 pd->destchg = 1; 5197 PF_ACPY(&pd->nsaddr, 5198 &nk->addr[pd2.sidx], nk->af); 5199 PF_ACPY(&pd->ndaddr, 5200 &nk->addr[pd2.didx], nk->af); 5201 pd->naf = nk->af; 5202 return (PF_AFRT); 5203 } 5204 #endif /* INET && INET6 */ 5205 5206 if (PF_ANEQ(pd2.src, 5207 &nk->addr[pd2.sidx], pd2.af) || 5208 nk->port[pd2.sidx] != uh.uh_sport) 5209 pf_change_icmp(pd2.src, &uh.uh_sport, 5210 daddr, &nk->addr[pd2.sidx], 5211 nk->port[pd2.sidx], &uh.uh_sum, 5212 ipsum2, icmpsum, 1, pd2.af); 5213 5214 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5215 pd2.af) || pd2.rdomain != nk->rdomain) 5216 pd->destchg = 1; 5217 pd->m->m_pkthdr.rdomain = nk->rdomain; 5218 5219 if (PF_ANEQ(pd2.dst, 5220 &nk->addr[pd2.didx], pd2.af) || 5221 nk->port[pd2.didx] != uh.uh_dport) 5222 pf_change_icmp(pd2.dst, &uh.uh_dport, 5223 saddr, &nk->addr[pd2.didx], 5224 nk->port[pd2.didx], &uh.uh_sum, 5225 ipsum2, icmpsum, 1, pd2.af); 5226 5227 switch (pd2.af) { 5228 #ifdef INET 5229 case AF_INET: 5230 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5231 pd->hdr.icmp, M_NOWAIT); 5232 m_copyback(pd2.m, ipoff2, sizeof(h2), 5233 &h2, M_NOWAIT); 5234 break; 5235 #endif /* INET */ 5236 #ifdef INET6 5237 case AF_INET6: 5238 m_copyback(pd->m, pd->off, 5239 sizeof(struct icmp6_hdr), 5240 pd->hdr.icmp6, M_NOWAIT); 5241 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5242 &h2_6, M_NOWAIT); 5243 break; 5244 #endif /* INET6 */ 5245 } 5246 m_copyback(pd2.m, pd2.off, sizeof(uh), &uh, 5247 M_NOWAIT); 5248 } 5249 return (PF_PASS); 5250 break; 5251 } 5252 #ifdef INET 5253 case IPPROTO_ICMP: { 5254 struct icmp iih; 5255 5256 if (pd2.af != AF_INET) { 5257 REASON_SET(reason, PFRES_NORM); 5258 return (PF_DROP); 5259 } 5260 5261 if (!pf_pull_hdr(pd2.m, pd2.off, &iih, ICMP_MINLEN, 5262 NULL, reason, pd2.af)) { 5263 DPFPRINTF(LOG_NOTICE, 5264 "ICMP error message too short (icmp)"); 5265 return (PF_DROP); 5266 } 5267 5268 pd2.hdr.icmp = &iih; 5269 pf_icmp_mapping(&pd2, iih.icmp_type, 5270 &icmp_dir, &multi, &virtual_id, &virtual_type); 5271 5272 ret = pf_icmp_state_lookup(&pd2, &key, state, 5273 virtual_id, virtual_type, icmp_dir, &iidx, 5274 PF_ICMP_MULTI_NONE, 1); 5275 if (ret >= 0) 5276 return (ret); 5277 5278 /* translate source/destination address, if necessary */ 5279 if ((*state)->key[PF_SK_WIRE] != 5280 (*state)->key[PF_SK_STACK]) { 5281 struct pf_state_key *nk; 5282 int afto, sidx, didx; 5283 5284 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5285 nk = (*state)->key[pd->sidx]; 5286 else 5287 nk = (*state)->key[pd->didx]; 5288 5289 afto = pd->af != nk->af; 5290 sidx = afto ? pd2.didx : pd2.sidx; 5291 didx = afto ? pd2.sidx : pd2.didx; 5292 iidx = afto ? !iidx : iidx; 5293 5294 #ifdef INET6 5295 if (afto) { 5296 if (nk->af != AF_INET6) 5297 return (PF_DROP); 5298 if (pf_translate_icmp_af(nk->af, 5299 pd->hdr.icmp)) 5300 return (PF_DROP); 5301 m_copyback(pd->m, pd->off, 5302 sizeof(struct icmp6_hdr), 5303 pd->hdr.icmp6, M_NOWAIT); 5304 if (pf_change_icmp_af(pd->m, ipoff2, 5305 pd, &pd2, &nk->addr[sidx], 5306 &nk->addr[didx], pd->af, nk->af)) 5307 return (PF_DROP); 5308 pd->proto = IPPROTO_ICMPV6; 5309 if (pf_translate_icmp_af(nk->af, &iih)) 5310 return (PF_DROP); 5311 if (virtual_type == htons(ICMP_ECHO) && 5312 nk->port[iidx] != iih.icmp_id) 5313 iih.icmp_id = nk->port[iidx]; 5314 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5315 &iih, M_NOWAIT); 5316 pd->m->m_pkthdr.rdomain = nk->rdomain; 5317 pd->destchg = 1; 5318 PF_ACPY(&pd->nsaddr, 5319 &nk->addr[pd2.sidx], nk->af); 5320 PF_ACPY(&pd->ndaddr, 5321 &nk->addr[pd2.didx], nk->af); 5322 pd->naf = nk->af; 5323 return (PF_AFRT); 5324 } 5325 #endif /* INET6 */ 5326 5327 if (PF_ANEQ(pd2.src, 5328 &nk->addr[pd2.sidx], pd2.af) || 5329 (virtual_type == htons(ICMP_ECHO) && 5330 nk->port[iidx] != iih.icmp_id)) 5331 pf_change_icmp(pd2.src, 5332 (virtual_type == htons(ICMP_ECHO)) ? 5333 &iih.icmp_id : NULL, 5334 daddr, &nk->addr[pd2.sidx], 5335 (virtual_type == htons(ICMP_ECHO)) ? 5336 nk->port[iidx] : 0, NULL, 5337 ipsum2, icmpsum, 0, AF_INET); 5338 5339 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5340 pd2.af) || pd2.rdomain != nk->rdomain) 5341 pd->destchg = 1; 5342 pd->m->m_pkthdr.rdomain = nk->rdomain; 5343 5344 if (PF_ANEQ(pd2.dst, 5345 &nk->addr[pd2.didx], pd2.af)) 5346 pf_change_icmp(pd2.dst, NULL, saddr, 5347 &nk->addr[pd2.didx], 0, NULL, 5348 ipsum2, icmpsum, 0, AF_INET); 5349 5350 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5351 pd->hdr.icmp, M_NOWAIT); 5352 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5353 M_NOWAIT); 5354 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, &iih, 5355 M_NOWAIT); 5356 } 5357 return (PF_PASS); 5358 break; 5359 } 5360 #endif /* INET */ 5361 #ifdef INET6 5362 case IPPROTO_ICMPV6: { 5363 struct icmp6_hdr iih; 5364 5365 if (pd2.af != AF_INET6) { 5366 REASON_SET(reason, PFRES_NORM); 5367 return (PF_DROP); 5368 } 5369 5370 if (!pf_pull_hdr(pd2.m, pd2.off, &iih, 5371 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5372 DPFPRINTF(LOG_NOTICE, 5373 "ICMP error message too short (icmp6)"); 5374 return (PF_DROP); 5375 } 5376 5377 pd2.hdr.icmp6 = &iih; 5378 pf_icmp_mapping(&pd2, iih.icmp6_type, 5379 &icmp_dir, &multi, &virtual_id, &virtual_type); 5380 ret = pf_icmp_state_lookup(&pd2, &key, state, 5381 virtual_id, virtual_type, icmp_dir, &iidx, 5382 PF_ICMP_MULTI_NONE, 1); 5383 if (ret >= 0) { 5384 if (ret == PF_DROP && pd2.af == AF_INET6 && 5385 icmp_dir == PF_OUT) { 5386 ret = pf_icmp_state_lookup(&pd2, &key, 5387 state, virtual_id, virtual_type, 5388 icmp_dir, &iidx, multi, 1); 5389 if (ret >= 0) 5390 return (ret); 5391 } else 5392 return (ret); 5393 } 5394 5395 /* translate source/destination address, if necessary */ 5396 if ((*state)->key[PF_SK_WIRE] != 5397 (*state)->key[PF_SK_STACK]) { 5398 struct pf_state_key *nk; 5399 int afto, sidx, didx; 5400 5401 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5402 nk = (*state)->key[pd->sidx]; 5403 else 5404 nk = (*state)->key[pd->didx]; 5405 5406 afto = pd->af != nk->af; 5407 sidx = afto ? pd2.didx : pd2.sidx; 5408 didx = afto ? pd2.sidx : pd2.didx; 5409 iidx = afto ? !iidx : iidx; 5410 5411 #ifdef INET 5412 if (afto) { 5413 if (nk->af != AF_INET) 5414 return (PF_DROP); 5415 if (pf_translate_icmp_af(nk->af, 5416 pd->hdr.icmp)) 5417 return (PF_DROP); 5418 m_copyback(pd->m, pd->off, 5419 sizeof(struct icmp6_hdr), 5420 pd->hdr.icmp6, M_NOWAIT); 5421 if (pf_change_icmp_af(pd->m, ipoff2, 5422 pd, &pd2, &nk->addr[sidx], 5423 &nk->addr[didx], pd->af, nk->af)) 5424 return (PF_DROP); 5425 pd->proto = IPPROTO_ICMP; 5426 if (pf_translate_icmp_af(nk->af, &iih)) 5427 return (PF_DROP); 5428 if (virtual_type == 5429 htons(ICMP6_ECHO_REQUEST) && 5430 nk->port[iidx] != iih.icmp6_id) 5431 iih.icmp6_id = nk->port[iidx]; 5432 m_copyback(pd2.m, pd2.off, 5433 sizeof(struct icmp6_hdr), &iih, 5434 M_NOWAIT); 5435 pd->m->m_pkthdr.rdomain = nk->rdomain; 5436 pd->destchg = 1; 5437 PF_ACPY(&pd->nsaddr, 5438 &nk->addr[pd2.sidx], nk->af); 5439 PF_ACPY(&pd->ndaddr, 5440 &nk->addr[pd2.didx], nk->af); 5441 pd->naf = nk->af; 5442 return (PF_AFRT); 5443 } 5444 #endif /* INET */ 5445 5446 if (PF_ANEQ(pd2.src, 5447 &nk->addr[pd2.sidx], pd2.af) || 5448 ((virtual_type == 5449 htons(ICMP6_ECHO_REQUEST)) && 5450 nk->port[pd2.sidx] != iih.icmp6_id)) 5451 pf_change_icmp(pd2.src, 5452 (virtual_type == 5453 htons(ICMP6_ECHO_REQUEST)) 5454 ? &iih.icmp6_id : NULL, 5455 daddr, &nk->addr[pd2.sidx], 5456 (virtual_type == 5457 htons(ICMP6_ECHO_REQUEST)) 5458 ? nk->port[iidx] : 0, NULL, 5459 ipsum2, icmpsum, 0, AF_INET6); 5460 5461 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5462 pd2.af) || pd2.rdomain != nk->rdomain) 5463 pd->destchg = 1; 5464 pd->m->m_pkthdr.rdomain = nk->rdomain; 5465 5466 if (PF_ANEQ(pd2.dst, 5467 &nk->addr[pd2.didx], pd2.af)) 5468 pf_change_icmp(pd2.dst, NULL, saddr, 5469 &nk->addr[pd2.didx], 0, NULL, 5470 ipsum2, icmpsum, 0, AF_INET6); 5471 5472 m_copyback(pd->m, pd->off, 5473 sizeof(struct icmp6_hdr), pd->hdr.icmp6, 5474 M_NOWAIT); 5475 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5476 M_NOWAIT); 5477 m_copyback(pd2.m, pd2.off, 5478 sizeof(struct icmp6_hdr), &iih, M_NOWAIT); 5479 } 5480 return (PF_PASS); 5481 break; 5482 } 5483 #endif /* INET6 */ 5484 default: { 5485 key.af = pd2.af; 5486 key.proto = pd2.proto; 5487 key.rdomain = pd2.rdomain; 5488 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5489 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5490 key.port[0] = key.port[1] = 0; 5491 5492 STATE_LOOKUP(pd2.kif, &key, pd2.dir, *state, pd2.m); 5493 5494 /* translate source/destination address, if necessary */ 5495 if ((*state)->key[PF_SK_WIRE] != 5496 (*state)->key[PF_SK_STACK]) { 5497 struct pf_state_key *nk = 5498 (*state)->key[pd->didx]; 5499 5500 if (PF_ANEQ(pd2.src, 5501 &nk->addr[pd2.sidx], pd2.af)) 5502 pf_change_icmp(pd2.src, NULL, daddr, 5503 &nk->addr[pd2.sidx], 0, NULL, 5504 ipsum2, icmpsum, 0, pd2.af); 5505 5506 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5507 pd2.af) || pd2.rdomain != nk->rdomain) 5508 pd->destchg = 1; 5509 pd->m->m_pkthdr.rdomain = nk->rdomain; 5510 5511 if (PF_ANEQ(pd2.dst, 5512 &nk->addr[pd2.didx], pd2.af)) 5513 pf_change_icmp(pd2.dst, NULL, saddr, 5514 &nk->addr[pd2.didx], 0, NULL, 5515 ipsum2, icmpsum, 0, pd2.af); 5516 5517 switch (pd2.af) { 5518 #ifdef INET 5519 case AF_INET: 5520 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5521 pd->hdr.icmp, M_NOWAIT); 5522 m_copyback(pd2.m, ipoff2, sizeof(h2), 5523 &h2, M_NOWAIT); 5524 break; 5525 #endif /* INET */ 5526 #ifdef INET6 5527 case AF_INET6: 5528 m_copyback(pd->m, pd->off, 5529 sizeof(struct icmp6_hdr), 5530 pd->hdr.icmp6, M_NOWAIT); 5531 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5532 &h2_6, M_NOWAIT); 5533 break; 5534 #endif /* INET6 */ 5535 } 5536 } 5537 return (PF_PASS); 5538 break; 5539 } 5540 } 5541 } 5542 } 5543 5544 int 5545 pf_test_state_other(struct pf_pdesc *pd, struct pf_state **state) 5546 { 5547 struct pf_state_peer *src, *dst; 5548 struct pf_state_key_cmp key; 5549 int action = PF_PASS; 5550 5551 key.af = pd->af; 5552 key.proto = pd->proto; 5553 key.rdomain = pd->rdomain; 5554 PF_ACPY(&key.addr[pd->sidx], pd->src, key.af); 5555 PF_ACPY(&key.addr[pd->didx], pd->dst, key.af); 5556 key.port[0] = key.port[1] = 0; 5557 5558 STATE_LOOKUP(pd->kif, &key, pd->dir, *state, pd->m); 5559 5560 if (pd->dir == (*state)->direction) { 5561 src = &(*state)->src; 5562 dst = &(*state)->dst; 5563 } else { 5564 src = &(*state)->dst; 5565 dst = &(*state)->src; 5566 } 5567 5568 /* update states */ 5569 if (src->state < PFOTHERS_SINGLE) 5570 src->state = PFOTHERS_SINGLE; 5571 if (dst->state == PFOTHERS_SINGLE) 5572 dst->state = PFOTHERS_MULTIPLE; 5573 5574 /* update expire time */ 5575 (*state)->expire = time_second; 5576 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) 5577 (*state)->timeout = PFTM_OTHER_MULTIPLE; 5578 else 5579 (*state)->timeout = PFTM_OTHER_SINGLE; 5580 5581 /* translate source/destination address, if necessary */ 5582 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5583 struct pf_state_key *nk; 5584 int afto; 5585 5586 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5587 nk = (*state)->key[pd->sidx]; 5588 else 5589 nk = (*state)->key[pd->didx]; 5590 5591 afto = pd->af != nk->af; 5592 5593 KASSERT(nk); 5594 KASSERT(pd); 5595 KASSERT(pd->src); 5596 KASSERT(pd->dst); 5597 5598 switch (pd->af) { 5599 #ifdef INET 5600 case AF_INET: 5601 if (!afto && 5602 PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) 5603 pf_change_a(&pd->src->v4.s_addr, NULL, 5604 nk->addr[pd->sidx].v4.s_addr, 5605 0); 5606 if (!afto && 5607 PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) { 5608 pf_change_a(&pd->dst->v4.s_addr, NULL, 5609 nk->addr[pd->didx].v4.s_addr, 5610 0); 5611 pd->destchg = 1; 5612 } 5613 break; 5614 #endif /* INET */ 5615 #ifdef INET6 5616 case AF_INET6: 5617 if (!afto && 5618 PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET6)) 5619 PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); 5620 5621 if (!afto && 5622 PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET6)) { 5623 PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); 5624 pd->destchg = 1; 5625 } 5626 break; 5627 #endif /* INET6 */ 5628 } 5629 if (pd->rdomain != nk->rdomain) 5630 pd->destchg = 1; 5631 5632 #if INET && INET6 5633 if (afto) { 5634 PF_ACPY(&pd->nsaddr, 5635 &nk->addr[afto ? pd->didx : pd->sidx], nk->af); 5636 PF_ACPY(&pd->ndaddr, 5637 &nk->addr[afto ? pd->sidx : pd->didx], nk->af); 5638 pd->destchg = 1; 5639 pd->naf = nk->af; 5640 action = PF_AFRT; 5641 } 5642 #endif /* INET && INET6 */ 5643 5644 pd->m->m_pkthdr.rdomain = nk->rdomain; 5645 } 5646 return (action); 5647 } 5648 5649 /* 5650 * ipoff and off are measured from the start of the mbuf chain. 5651 * h must be at "ipoff" on the mbuf chain. 5652 */ 5653 void * 5654 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5655 u_short *actionp, u_short *reasonp, sa_family_t af) 5656 { 5657 switch (af) { 5658 #ifdef INET 5659 case AF_INET: { 5660 struct ip *h = mtod(m, struct ip *); 5661 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5662 5663 if (fragoff) { 5664 if (fragoff >= len) 5665 ACTION_SET(actionp, PF_PASS); 5666 else { 5667 ACTION_SET(actionp, PF_DROP); 5668 REASON_SET(reasonp, PFRES_FRAG); 5669 } 5670 return (NULL); 5671 } 5672 if (m->m_pkthdr.len < off + len || 5673 ntohs(h->ip_len) < off + len) { 5674 ACTION_SET(actionp, PF_DROP); 5675 REASON_SET(reasonp, PFRES_SHORT); 5676 return (NULL); 5677 } 5678 break; 5679 } 5680 #endif /* INET */ 5681 #ifdef INET6 5682 case AF_INET6: { 5683 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5684 5685 if (m->m_pkthdr.len < off + len || 5686 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < 5687 (unsigned)(off + len)) { 5688 ACTION_SET(actionp, PF_DROP); 5689 REASON_SET(reasonp, PFRES_SHORT); 5690 return (NULL); 5691 } 5692 break; 5693 } 5694 #endif /* INET6 */ 5695 } 5696 m_copydata(m, off, len, p); 5697 return (p); 5698 } 5699 5700 int 5701 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5702 int rtableid) 5703 { 5704 struct sockaddr_in *dst; 5705 int ret = 1; 5706 int check_mpath; 5707 extern int ipmultipath; 5708 #ifdef INET6 5709 extern int ip6_multipath; 5710 struct sockaddr_in6 *dst6; 5711 struct route_in6 ro; 5712 #else 5713 struct route ro; 5714 #endif 5715 struct radix_node *rn; 5716 struct rtentry *rt; 5717 struct ifnet *ifp; 5718 5719 check_mpath = 0; 5720 bzero(&ro, sizeof(ro)); 5721 ro.ro_tableid = rtableid; 5722 switch (af) { 5723 #ifdef INET 5724 case AF_INET: 5725 dst = satosin(&ro.ro_dst); 5726 dst->sin_family = AF_INET; 5727 dst->sin_len = sizeof(*dst); 5728 dst->sin_addr = addr->v4; 5729 if (ipmultipath) 5730 check_mpath = 1; 5731 break; 5732 #endif /* INET */ 5733 #ifdef INET6 5734 case AF_INET6: 5735 /* 5736 * Skip check for addresses with embedded interface scope, 5737 * as they would always match anyway. 5738 */ 5739 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5740 goto out; 5741 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 5742 dst6->sin6_family = AF_INET6; 5743 dst6->sin6_len = sizeof(*dst6); 5744 dst6->sin6_addr = addr->v6; 5745 if (ip6_multipath) 5746 check_mpath = 1; 5747 break; 5748 #endif /* INET6 */ 5749 } 5750 5751 /* Skip checks for ipsec interfaces */ 5752 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5753 goto out; 5754 5755 rtalloc_noclone((struct route *)&ro); 5756 5757 if (ro.ro_rt != NULL) { 5758 /* No interface given, this is a no-route check */ 5759 if (kif == NULL) 5760 goto out; 5761 5762 if (kif->pfik_ifp == NULL) { 5763 ret = 0; 5764 goto out; 5765 } 5766 5767 /* Perform uRPF check if passed input interface */ 5768 ret = 0; 5769 rn = (struct radix_node *)ro.ro_rt; 5770 do { 5771 rt = (struct rtentry *)rn; 5772 if (rt->rt_ifp->if_type == IFT_CARP) 5773 ifp = rt->rt_ifp->if_carpdev; 5774 else 5775 ifp = rt->rt_ifp; 5776 5777 if (kif->pfik_ifp == ifp) 5778 ret = 1; 5779 rn = rn_mpath_next(rn, 0); 5780 } while (check_mpath == 1 && rn != NULL && ret == 0); 5781 } else 5782 ret = 0; 5783 out: 5784 if (ro.ro_rt != NULL) 5785 RTFREE(ro.ro_rt); 5786 return (ret); 5787 } 5788 5789 int 5790 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 5791 int rtableid) 5792 { 5793 struct sockaddr_in *dst; 5794 #ifdef INET6 5795 struct sockaddr_in6 *dst6; 5796 struct route_in6 ro; 5797 #else 5798 struct route ro; 5799 #endif 5800 int ret = 0; 5801 5802 bzero(&ro, sizeof(ro)); 5803 ro.ro_tableid = rtableid; 5804 switch (af) { 5805 #ifdef INET 5806 case AF_INET: 5807 dst = satosin(&ro.ro_dst); 5808 dst->sin_family = AF_INET; 5809 dst->sin_len = sizeof(*dst); 5810 dst->sin_addr = addr->v4; 5811 break; 5812 #endif /* INET */ 5813 #ifdef INET6 5814 case AF_INET6: 5815 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 5816 dst6->sin6_family = AF_INET6; 5817 dst6->sin6_len = sizeof(*dst6); 5818 dst6->sin6_addr = addr->v6; 5819 break; 5820 #endif /* INET6 */ 5821 } 5822 5823 rtalloc_noclone((struct route *)&ro); 5824 5825 if (ro.ro_rt != NULL) { 5826 if (ro.ro_rt->rt_labelid == aw->v.rtlabel) 5827 ret = 1; 5828 RTFREE(ro.ro_rt); 5829 } 5830 5831 return (ret); 5832 } 5833 5834 #ifdef INET 5835 void 5836 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5837 struct pf_state *s) 5838 { 5839 struct mbuf *m0, *m1; 5840 struct route iproute; 5841 struct route *ro = NULL; 5842 struct sockaddr_in *dst; 5843 struct ip *ip; 5844 struct ifnet *ifp = NULL; 5845 struct pf_addr naddr; 5846 struct pf_src_node *sn = NULL; 5847 int error = 0; 5848 #ifdef IPSEC 5849 struct m_tag *mtag; 5850 #endif /* IPSEC */ 5851 5852 if (m == NULL || *m == NULL || r == NULL || 5853 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5854 panic("pf_route: invalid parameters"); 5855 5856 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5857 m0 = *m; 5858 *m = NULL; 5859 goto bad; 5860 } 5861 5862 if (r->rt == PF_DUPTO) { 5863 if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) 5864 return; 5865 } else { 5866 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5867 return; 5868 m0 = *m; 5869 } 5870 5871 if (m0->m_len < sizeof(struct ip)) { 5872 DPFPRINTF(LOG_ERR, 5873 "pf_route: m0->m_len < sizeof(struct ip)"); 5874 goto bad; 5875 } 5876 5877 ip = mtod(m0, struct ip *); 5878 5879 ro = &iproute; 5880 bzero((caddr_t)ro, sizeof(*ro)); 5881 dst = satosin(&ro->ro_dst); 5882 dst->sin_family = AF_INET; 5883 dst->sin_len = sizeof(*dst); 5884 dst->sin_addr = ip->ip_dst; 5885 ro->ro_tableid = m0->m_pkthdr.rdomain; 5886 5887 if (!r->rt) { 5888 rtalloc(ro); 5889 if (ro->ro_rt == 0) { 5890 ipstat.ips_noroute++; 5891 goto bad; 5892 } 5893 5894 ifp = ro->ro_rt->rt_ifp; 5895 ro->ro_rt->rt_use++; 5896 5897 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 5898 dst = satosin(ro->ro_rt->rt_gateway); 5899 5900 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 5901 } else { 5902 if (s == NULL) { 5903 if (pf_map_addr(AF_INET, r, 5904 (struct pf_addr *)&ip->ip_src, 5905 &naddr, NULL, &sn, &r->route, PF_SN_ROUTE)) { 5906 DPFPRINTF(LOG_ERR, 5907 "pf_route: pf_map_addr() failed."); 5908 goto bad; 5909 } 5910 5911 if (!PF_AZERO(&naddr, AF_INET)) 5912 dst->sin_addr.s_addr = naddr.v4.s_addr; 5913 ifp = r->route.kif ? 5914 r->route.kif->pfik_ifp : NULL; 5915 } else { 5916 if (!PF_AZERO(&s->rt_addr, AF_INET)) 5917 dst->sin_addr.s_addr = 5918 s->rt_addr.v4.s_addr; 5919 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5920 } 5921 } 5922 if (ifp == NULL) 5923 goto bad; 5924 5925 5926 if (oifp != ifp) { 5927 if (pf_test(AF_INET, PF_OUT, ifp, &m0, NULL) != PF_PASS) 5928 goto bad; 5929 else if (m0 == NULL) 5930 goto done; 5931 if (m0->m_len < sizeof(struct ip)) { 5932 DPFPRINTF(LOG_ERR, 5933 "pf_route: m0->m_len < sizeof(struct ip)"); 5934 goto bad; 5935 } 5936 ip = mtod(m0, struct ip *); 5937 } 5938 5939 /* Copied from ip_output. */ 5940 #ifdef IPSEC 5941 /* 5942 * If we got here and IPsec crypto processing didn't happen, drop it. 5943 */ 5944 if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) 5945 != NULL) { 5946 /* Notify IPsec to do its own crypto. */ 5947 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 5948 goto bad; 5949 } 5950 #endif /* IPSEC */ 5951 5952 in_proto_cksum_out(m0, ifp); 5953 5954 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 5955 ip->ip_sum = 0; 5956 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) { 5957 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 5958 ipstat.ips_outhwcsum++; 5959 } else 5960 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 5961 /* Update relevant hardware checksum stats for TCP/UDP */ 5962 if (m0->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) 5963 tcpstat.tcps_outhwcsum++; 5964 else if (m0->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) 5965 udpstat.udps_outhwcsum++; 5966 error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); 5967 goto done; 5968 } 5969 5970 /* 5971 * Too large for interface; fragment if possible. 5972 * Must be able to put at least 8 bytes per fragment. 5973 */ 5974 if (ip->ip_off & htons(IP_DF)) { 5975 ipstat.ips_cantfrag++; 5976 if (r->rt != PF_DUPTO) { 5977 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, 5978 ifp->if_mtu); 5979 goto done; 5980 } else 5981 goto bad; 5982 } 5983 5984 m1 = m0; 5985 error = ip_fragment(m0, ifp, ifp->if_mtu); 5986 if (error) { 5987 m0 = NULL; 5988 goto bad; 5989 } 5990 5991 for (m0 = m1; m0; m0 = m1) { 5992 m1 = m0->m_nextpkt; 5993 m0->m_nextpkt = 0; 5994 if (error == 0) 5995 error = (*ifp->if_output)(ifp, m0, sintosa(dst), 5996 NULL); 5997 else 5998 m_freem(m0); 5999 } 6000 6001 if (error == 0) 6002 ipstat.ips_fragmented++; 6003 6004 done: 6005 if (r->rt != PF_DUPTO) 6006 *m = NULL; 6007 if (ro == &iproute && ro->ro_rt) 6008 RTFREE(ro->ro_rt); 6009 return; 6010 6011 bad: 6012 m_freem(m0); 6013 goto done; 6014 } 6015 #endif /* INET */ 6016 6017 #ifdef INET6 6018 void 6019 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 6020 struct pf_state *s) 6021 { 6022 struct mbuf *m0; 6023 struct route_in6 ip6route; 6024 struct route_in6 *ro; 6025 struct sockaddr_in6 *dst; 6026 struct ip6_hdr *ip6; 6027 struct ifnet *ifp = NULL; 6028 struct pf_addr naddr; 6029 struct pf_src_node *sn = NULL; 6030 6031 if (m == NULL || *m == NULL || r == NULL || 6032 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 6033 panic("pf_route6: invalid parameters"); 6034 6035 if ((*m)->m_pkthdr.pf.routed++ > 3) { 6036 m0 = *m; 6037 *m = NULL; 6038 goto bad; 6039 } 6040 6041 if (r->rt == PF_DUPTO) { 6042 if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) 6043 return; 6044 } else { 6045 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 6046 return; 6047 m0 = *m; 6048 } 6049 6050 if (m0->m_len < sizeof(struct ip6_hdr)) { 6051 DPFPRINTF(LOG_ERR, 6052 "pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); 6053 goto bad; 6054 } 6055 ip6 = mtod(m0, struct ip6_hdr *); 6056 6057 ro = &ip6route; 6058 bzero((caddr_t)ro, sizeof(*ro)); 6059 dst = (struct sockaddr_in6 *)&ro->ro_dst; 6060 dst->sin6_family = AF_INET6; 6061 dst->sin6_len = sizeof(*dst); 6062 dst->sin6_addr = ip6->ip6_dst; 6063 6064 if (!r->rt) { 6065 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 6066 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); 6067 return; 6068 } 6069 6070 if (s == NULL) { 6071 if (pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 6072 &naddr, NULL, &sn, &r->route, PF_SN_ROUTE)) { 6073 DPFPRINTF(LOG_ERR, 6074 "pf_route6: pf_map_addr() failed."); 6075 goto bad; 6076 } 6077 if (!PF_AZERO(&naddr, AF_INET6)) 6078 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 6079 &naddr, AF_INET6); 6080 ifp = r->route.kif ? r->route.kif->pfik_ifp : NULL; 6081 } else { 6082 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 6083 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 6084 &s->rt_addr, AF_INET6); 6085 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6086 } 6087 if (ifp == NULL) 6088 goto bad; 6089 6090 if (oifp != ifp) { 6091 if (pf_test(AF_INET6, PF_OUT, ifp, &m0, NULL) != PF_PASS) 6092 goto bad; 6093 else if (m0 == NULL) 6094 goto done; 6095 if (m0->m_len < sizeof(struct ip6_hdr)) { 6096 DPFPRINTF(LOG_ERR, 6097 "pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); 6098 goto bad; 6099 } 6100 ip6 = mtod(m0, struct ip6_hdr *); 6101 } 6102 6103 /* 6104 * If the packet is too large for the outgoing interface, 6105 * send back an icmp6 error. 6106 */ 6107 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 6108 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 6109 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6110 nd6_output(ifp, ifp, m0, dst, NULL); 6111 } else { 6112 in6_ifstat_inc(ifp, ifs6_in_toobig); 6113 if (r->rt != PF_DUPTO) 6114 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); 6115 else 6116 goto bad; 6117 } 6118 6119 done: 6120 if (r->rt != PF_DUPTO) 6121 *m = NULL; 6122 return; 6123 6124 bad: 6125 m_freem(m0); 6126 goto done; 6127 } 6128 #endif /* INET6 */ 6129 6130 6131 /* 6132 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag 6133 * off is the offset where the protocol header starts 6134 * len is the total length of protocol header plus payload 6135 * returns 0 when the checksum is valid, otherwise returns 1. 6136 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6137 */ 6138 int 6139 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, 6140 sa_family_t af) 6141 { 6142 u_int16_t flag_ok, flag_bad, flag_out; 6143 u_int16_t sum; 6144 6145 switch (p) { 6146 case IPPROTO_TCP: 6147 flag_ok = M_TCP_CSUM_IN_OK; 6148 flag_out = M_TCP_CSUM_OUT; 6149 flag_bad = M_TCP_CSUM_IN_BAD; 6150 break; 6151 case IPPROTO_UDP: 6152 flag_ok = M_UDP_CSUM_IN_OK; 6153 flag_out = M_UDP_CSUM_OUT; 6154 flag_bad = M_UDP_CSUM_IN_BAD; 6155 break; 6156 case IPPROTO_ICMP: 6157 #ifdef INET6 6158 case IPPROTO_ICMPV6: 6159 #endif /* INET6 */ 6160 flag_ok = flag_out = flag_bad = 0; 6161 break; 6162 default: 6163 return (1); 6164 } 6165 if (m->m_pkthdr.csum_flags & (flag_ok | flag_out)) 6166 return (0); 6167 if (m->m_pkthdr.csum_flags & flag_bad) 6168 return (1); 6169 if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) 6170 return (1); 6171 if (m->m_pkthdr.len < off + len) 6172 return (1); 6173 switch (af) { 6174 #ifdef INET 6175 case AF_INET: 6176 if (p == IPPROTO_ICMP) { 6177 if (m->m_len < off) 6178 return (1); 6179 m->m_data += off; 6180 m->m_len -= off; 6181 sum = in_cksum(m, len); 6182 m->m_data -= off; 6183 m->m_len += off; 6184 } else { 6185 if (m->m_len < sizeof(struct ip)) 6186 return (1); 6187 sum = in4_cksum(m, p, off, len); 6188 } 6189 break; 6190 #endif /* INET */ 6191 #ifdef INET6 6192 case AF_INET6: 6193 if (m->m_len < sizeof(struct ip6_hdr)) 6194 return (1); 6195 sum = in6_cksum(m, p, off, len); 6196 break; 6197 #endif /* INET6 */ 6198 } 6199 if (sum) { 6200 m->m_pkthdr.csum_flags |= flag_bad; 6201 switch (p) { 6202 case IPPROTO_TCP: 6203 tcpstat.tcps_rcvbadsum++; 6204 break; 6205 case IPPROTO_UDP: 6206 udpstat.udps_badsum++; 6207 break; 6208 case IPPROTO_ICMP: 6209 icmpstat.icps_checksum++; 6210 break; 6211 #ifdef INET6 6212 case IPPROTO_ICMPV6: 6213 icmp6stat.icp6s_checksum++; 6214 break; 6215 #endif /* INET6 */ 6216 } 6217 return (1); 6218 } 6219 m->m_pkthdr.csum_flags |= flag_ok; 6220 return (0); 6221 } 6222 6223 struct pf_divert * 6224 pf_find_divert(struct mbuf *m) 6225 { 6226 struct m_tag *mtag; 6227 6228 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6229 return (NULL); 6230 6231 return ((struct pf_divert *)(mtag + 1)); 6232 } 6233 6234 struct pf_divert * 6235 pf_get_divert(struct mbuf *m) 6236 { 6237 struct m_tag *mtag; 6238 6239 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6240 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6241 M_NOWAIT); 6242 if (mtag == NULL) 6243 return (NULL); 6244 bzero(mtag + 1, sizeof(struct pf_divert)); 6245 m_tag_prepend(m, mtag); 6246 } 6247 6248 return ((struct pf_divert *)(mtag + 1)); 6249 } 6250 6251 #ifdef INET6 6252 int 6253 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6254 u_short *reason) 6255 { 6256 struct ip6_opt opt; 6257 struct ip6_opt_jumbo jumbo; 6258 6259 while (off < end) { 6260 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6261 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6262 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6263 return (PF_DROP); 6264 } 6265 if (opt.ip6o_type == IP6OPT_PAD1) { 6266 off++; 6267 continue; 6268 } 6269 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6270 NULL, reason, AF_INET6)) { 6271 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6272 return (PF_DROP); 6273 } 6274 if (off + sizeof(opt) + opt.ip6o_len > end) { 6275 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6276 REASON_SET(reason, PFRES_IPOPTIONS); 6277 return (PF_DROP); 6278 } 6279 switch (opt.ip6o_type) { 6280 case IP6OPT_JUMBO: 6281 if (pd->jumbolen != 0) { 6282 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6283 REASON_SET(reason, PFRES_IPOPTIONS); 6284 return (PF_DROP); 6285 } 6286 if (ntohs(h->ip6_plen) != 0) { 6287 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6288 REASON_SET(reason, PFRES_IPOPTIONS); 6289 return (PF_DROP); 6290 } 6291 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6292 NULL, reason, AF_INET6)) { 6293 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6294 return (PF_DROP); 6295 } 6296 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6297 sizeof(pd->jumbolen)); 6298 pd->jumbolen = ntohl(pd->jumbolen); 6299 if (pd->jumbolen < IPV6_MAXPACKET) { 6300 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6301 REASON_SET(reason, PFRES_IPOPTIONS); 6302 return (PF_DROP); 6303 } 6304 break; 6305 default: 6306 break; 6307 } 6308 off += sizeof(opt) + opt.ip6o_len; 6309 } 6310 6311 return (PF_PASS); 6312 } 6313 6314 int 6315 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6316 { 6317 struct ip6_frag frag; 6318 struct ip6_ext ext; 6319 struct ip6_rthdr rthdr; 6320 u_int32_t end; 6321 int rthdr_cnt = 0; 6322 6323 pd->off += sizeof(struct ip6_hdr); 6324 end = pd->off + ntohs(h->ip6_plen); 6325 pd->fragoff = pd->extoff = pd->jumbolen = 0; 6326 pd->proto = h->ip6_nxt; 6327 for (;;) { 6328 switch (pd->proto) { 6329 case IPPROTO_FRAGMENT: 6330 if (pd->fragoff != 0) { 6331 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 6332 REASON_SET(reason, PFRES_FRAG); 6333 return (PF_DROP); 6334 } 6335 /* jumbo payload packets cannot be fragmented */ 6336 if (pd->jumbolen != 0) { 6337 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6338 REASON_SET(reason, PFRES_FRAG); 6339 return (PF_DROP); 6340 } 6341 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6342 NULL, reason, AF_INET6)) { 6343 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6344 return (PF_DROP); 6345 } 6346 pd->fragoff = pd->off; 6347 /* stop walking over non initial fragments */ 6348 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) 6349 return (PF_PASS); 6350 pd->off += sizeof(frag); 6351 pd->proto = frag.ip6f_nxt; 6352 break; 6353 case IPPROTO_ROUTING: 6354 if (rthdr_cnt++) { 6355 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6356 REASON_SET(reason, PFRES_IPOPTIONS); 6357 return (PF_DROP); 6358 } 6359 /* fragments may be short */ 6360 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6361 pd->off = pd->fragoff; 6362 pd->proto = IPPROTO_FRAGMENT; 6363 return (PF_PASS); 6364 } 6365 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6366 NULL, reason, AF_INET6)) { 6367 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6368 return (PF_DROP); 6369 } 6370 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6371 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6372 REASON_SET(reason, PFRES_IPOPTIONS); 6373 return (PF_DROP); 6374 } 6375 /* FALLTHROUGH */ 6376 case IPPROTO_AH: 6377 case IPPROTO_HOPOPTS: 6378 case IPPROTO_DSTOPTS: 6379 /* fragments may be short */ 6380 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6381 pd->off = pd->fragoff; 6382 pd->proto = IPPROTO_FRAGMENT; 6383 return (PF_PASS); 6384 } 6385 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6386 NULL, reason, AF_INET6)) { 6387 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6388 return (PF_DROP); 6389 } 6390 /* reassembly needs the ext header before the frag */ 6391 if (pd->fragoff == 0) 6392 pd->extoff = pd->off; 6393 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { 6394 if (pf_walk_option6(pd, h, 6395 pd->off + sizeof(ext), 6396 pd->off + (ext.ip6e_len + 1) * 8, reason) 6397 != PF_PASS) 6398 return (PF_DROP); 6399 if (ntohs(h->ip6_plen) == 0 && 6400 pd->jumbolen != 0) { 6401 DPFPRINTF(LOG_NOTICE, 6402 "IPv6 missing jumbo"); 6403 REASON_SET(reason, PFRES_IPOPTIONS); 6404 return (PF_DROP); 6405 } 6406 } 6407 if (pd->proto == IPPROTO_AH) 6408 pd->off += (ext.ip6e_len + 2) * 4; 6409 else 6410 pd->off += (ext.ip6e_len + 1) * 8; 6411 pd->proto = ext.ip6e_nxt; 6412 break; 6413 case IPPROTO_TCP: 6414 case IPPROTO_UDP: 6415 case IPPROTO_ICMPV6: 6416 /* fragments may be short, ignore inner header then */ 6417 if (pd->fragoff != 0 && end < pd->off + 6418 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6419 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6420 sizeof(struct icmp6_hdr))) { 6421 pd->off = pd->fragoff; 6422 pd->proto = IPPROTO_FRAGMENT; 6423 } 6424 /* FALLTHROUGH */ 6425 default: 6426 return (PF_PASS); 6427 } 6428 } 6429 } 6430 #endif /* INET6 */ 6431 6432 int 6433 pf_setup_pdesc(struct pf_pdesc *pd, void *pdhdrs, sa_family_t af, int dir, 6434 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6435 { 6436 bzero(pd, sizeof(*pd)); 6437 pd->hdr.any = pdhdrs; 6438 pd->dir = dir; 6439 pd->kif = kif; /* kif is NULL when called by pflog */ 6440 pd->m = m; 6441 pd->sidx = (dir == PF_IN) ? 0 : 1; 6442 pd->didx = (dir == PF_IN) ? 1 : 0; 6443 pd->af = pd->naf = af; 6444 6445 switch (pd->af) { 6446 #ifdef INET 6447 case AF_INET: { 6448 struct ip *h; 6449 6450 /* Check for illegal packets */ 6451 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6452 REASON_SET(reason, PFRES_SHORT); 6453 return (PF_DROP); 6454 } 6455 6456 h = mtod(pd->m, struct ip *); 6457 pd->off = h->ip_hl << 2; 6458 6459 if (pd->off < sizeof(struct ip) || 6460 pd->off > ntohs(h->ip_len) || 6461 pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6462 REASON_SET(reason, PFRES_SHORT); 6463 return (PF_DROP); 6464 } 6465 6466 pd->src = (struct pf_addr *)&h->ip_src; 6467 pd->dst = (struct pf_addr *)&h->ip_dst; 6468 pd->virtual_proto = pd->proto = h->ip_p; 6469 pd->tot_len = ntohs(h->ip_len); 6470 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6471 pd->rdomain = rtable_l2(pd->m->m_pkthdr.rdomain); 6472 pd->ttl = h->ip_ttl; 6473 if (h->ip_hl > 5) /* has options */ 6474 pd->badopts++; 6475 6476 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) 6477 pd->virtual_proto = PF_VPROTO_FRAGMENT; 6478 6479 break; 6480 } 6481 #endif /* INET */ 6482 #ifdef INET6 6483 case AF_INET6: { 6484 struct ip6_hdr *h; 6485 6486 /* Check for illegal packets */ 6487 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6488 REASON_SET(reason, PFRES_SHORT); 6489 return (PF_DROP); 6490 } 6491 6492 h = mtod(pd->m, struct ip6_hdr *); 6493 pd->off = 0; 6494 6495 if (pd->m->m_pkthdr.len < 6496 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6497 REASON_SET(reason, PFRES_SHORT); 6498 return (PF_DROP); 6499 } 6500 6501 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6502 return (PF_DROP); 6503 6504 #if 1 6505 /* 6506 * we do not support jumbogram yet. if we keep going, zero 6507 * ip6_plen will do something bad, so drop the packet for now. 6508 */ 6509 if (pd->jumbolen != 0) { 6510 REASON_SET(reason, PFRES_NORM); 6511 return (PF_DROP); 6512 } 6513 #endif 6514 6515 pd->src = (struct pf_addr *)&h->ip6_src; 6516 pd->dst = (struct pf_addr *)&h->ip6_dst; 6517 pd->virtual_proto = pd->proto; 6518 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6519 pd->tos = 0; 6520 pd->ttl = h->ip6_hlim; 6521 pd->rdomain = 0; 6522 6523 if (pd->fragoff != 0) 6524 pd->virtual_proto = PF_VPROTO_FRAGMENT; 6525 6526 break; 6527 } 6528 #endif /* INET6 */ 6529 default: 6530 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6531 6532 } 6533 6534 PF_ACPY(&pd->nsaddr, pd->src, pd->af); 6535 PF_ACPY(&pd->ndaddr, pd->dst, pd->af); 6536 6537 switch (pd->virtual_proto) { 6538 case IPPROTO_TCP: { 6539 struct tcphdr *th = pd->hdr.tcp; 6540 6541 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6542 NULL, reason, pd->af)) 6543 return (PF_DROP); 6544 pd->hdrlen = sizeof(*th); 6545 if (pd->off + (th->th_off << 2) > pd->tot_len || 6546 (th->th_off << 2) < sizeof(struct tcphdr)) { 6547 REASON_SET(reason, PFRES_SHORT); 6548 return (PF_DROP); 6549 } 6550 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6551 pd->sport = &th->th_sport; 6552 pd->dport = &th->th_dport; 6553 break; 6554 } 6555 case IPPROTO_UDP: { 6556 struct udphdr *uh = pd->hdr.udp; 6557 6558 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6559 NULL, reason, pd->af)) 6560 return (PF_DROP); 6561 pd->hdrlen = sizeof(*uh); 6562 if (uh->uh_dport == 0 || 6563 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6564 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6565 REASON_SET(reason, PFRES_SHORT); 6566 return (PF_DROP); 6567 } 6568 pd->sport = &uh->uh_sport; 6569 pd->dport = &uh->uh_dport; 6570 break; 6571 } 6572 case IPPROTO_ICMP: { 6573 if (!pf_pull_hdr(pd->m, pd->off, pd->hdr.icmp, ICMP_MINLEN, 6574 NULL, reason, pd->af)) 6575 return (PF_DROP); 6576 pd->hdrlen = ICMP_MINLEN; 6577 if (pd->off + pd->hdrlen > pd->tot_len) { 6578 REASON_SET(reason, PFRES_SHORT); 6579 return (PF_DROP); 6580 } 6581 break; 6582 } 6583 #ifdef INET6 6584 case IPPROTO_ICMPV6: { 6585 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6586 6587 if (!pf_pull_hdr(pd->m, pd->off, pd->hdr.icmp6, icmp_hlen, 6588 NULL, reason, pd->af)) 6589 return (PF_DROP); 6590 /* ICMP headers we look further into to match state */ 6591 switch (pd->hdr.icmp6->icmp6_type) { 6592 case MLD_LISTENER_QUERY: 6593 case MLD_LISTENER_REPORT: 6594 icmp_hlen = sizeof(struct mld_hdr); 6595 break; 6596 case ND_NEIGHBOR_SOLICIT: 6597 case ND_NEIGHBOR_ADVERT: 6598 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6599 break; 6600 } 6601 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6602 !pf_pull_hdr(pd->m, pd->off, pd->hdr.icmp6, icmp_hlen, 6603 NULL, reason, pd->af)) 6604 return (PF_DROP); 6605 pd->hdrlen = icmp_hlen; 6606 if (pd->off + pd->hdrlen > pd->tot_len) { 6607 REASON_SET(reason, PFRES_SHORT); 6608 return (PF_DROP); 6609 } 6610 break; 6611 } 6612 #endif /* INET6 */ 6613 } 6614 6615 if (pd->sport) 6616 pd->nsport = *pd->sport; 6617 if (pd->dport) 6618 pd->ndport = *pd->dport; 6619 6620 return (PF_PASS); 6621 } 6622 6623 void 6624 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6625 struct pf_rule *r, struct pf_rule *a) 6626 { 6627 int dirndx; 6628 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6629 [action != PF_PASS] += pd->tot_len; 6630 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6631 [action != PF_PASS]++; 6632 6633 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6634 dirndx = (pd->dir == PF_OUT); 6635 r->packets[dirndx]++; 6636 r->bytes[dirndx] += pd->tot_len; 6637 if (a != NULL) { 6638 a->packets[dirndx]++; 6639 a->bytes[dirndx] += pd->tot_len; 6640 } 6641 if (s != NULL) { 6642 struct pf_rule_item *ri; 6643 struct pf_sn_item *sni; 6644 6645 SLIST_FOREACH(sni, &s->src_nodes, next) { 6646 sni->sn->packets[dirndx]++; 6647 sni->sn->bytes[dirndx] += pd->tot_len; 6648 } 6649 dirndx = (pd->dir == s->direction) ? 0 : 1; 6650 s->packets[dirndx]++; 6651 s->bytes[dirndx] += pd->tot_len; 6652 6653 SLIST_FOREACH(ri, &s->match_rules, entry) { 6654 ri->r->packets[dirndx]++; 6655 ri->r->bytes[dirndx] += pd->tot_len; 6656 } 6657 } 6658 if (r->src.addr.type == PF_ADDR_TABLE) 6659 pfr_update_stats(r->src.addr.p.tbl, 6660 (s == NULL) ? pd->src : 6661 &s->key[(s->direction == PF_IN)]-> 6662 addr[(s->direction == PF_OUT)], 6663 pd->af, pd->tot_len, pd->dir == PF_OUT, 6664 r->action == PF_PASS, r->src.neg); 6665 if (r->dst.addr.type == PF_ADDR_TABLE) 6666 pfr_update_stats(r->dst.addr.p.tbl, 6667 (s == NULL) ? pd->dst : 6668 &s->key[(s->direction == PF_IN)]-> 6669 addr[(s->direction == PF_IN)], 6670 pd->af, pd->tot_len, pd->dir == PF_OUT, 6671 r->action == PF_PASS, r->dst.neg); 6672 } 6673 } 6674 6675 int 6676 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0, 6677 struct ether_header *eh) 6678 { 6679 struct pfi_kif *kif; 6680 u_short action, reason = 0; 6681 struct pf_rule *a = NULL, *r = &pf_default_rule; 6682 struct pf_state *s = NULL; 6683 struct pf_ruleset *ruleset = NULL; 6684 struct pf_pdesc pd; 6685 union pf_headers pdhdrs; 6686 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6687 u_int32_t qid, pqid = 0; 6688 6689 if (!pf_status.running) 6690 return (PF_PASS); 6691 6692 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6693 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6694 else 6695 kif = (struct pfi_kif *)ifp->if_pf_kif; 6696 6697 if (kif == NULL) { 6698 DPFPRINTF(LOG_ERR, 6699 "pf_test: kif == NULL, if_xname %s", ifp->if_xname); 6700 return (PF_DROP); 6701 } 6702 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6703 return (PF_PASS); 6704 6705 #ifdef DIAGNOSTIC 6706 if (((*m0)->m_flags & M_PKTHDR) == 0) 6707 panic("non-M_PKTHDR is passed to pf_test"); 6708 #endif /* DIAGNOSTIC */ 6709 6710 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 6711 return (PF_PASS); 6712 6713 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) 6714 return (PF_PASS); 6715 6716 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 6717 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 6718 return (PF_PASS); 6719 } 6720 6721 action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, &reason); 6722 if (action != PF_PASS) { 6723 pd.pflog |= PF_LOG_FORCE; 6724 goto done; 6725 } 6726 6727 /* packet normalization and reassembly */ 6728 switch (pd.af) { 6729 #ifdef INET 6730 case AF_INET: 6731 action = pf_normalize_ip(&pd, &reason); 6732 break; 6733 #endif 6734 #ifdef INET6 6735 case AF_INET6: 6736 action = pf_normalize_ip6(&pd, &reason); 6737 break; 6738 #endif 6739 } 6740 *m0 = pd.m; 6741 /* if packet sits in reassembly queue, return without error */ 6742 if (pd.m == NULL) 6743 return PF_PASS; 6744 if (action != PF_PASS) { 6745 pd.pflog |= PF_LOG_FORCE; 6746 goto done; 6747 } 6748 6749 /* if packet has been reassembled, update packet description */ 6750 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 6751 action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, 6752 &reason); 6753 if (action != PF_PASS) { 6754 pd.pflog |= PF_LOG_FORCE; 6755 goto done; 6756 } 6757 } 6758 pd.eh = eh; 6759 6760 switch (pd.virtual_proto) { 6761 6762 case PF_VPROTO_FRAGMENT: { 6763 /* 6764 * handle fragments that aren't reassembled by 6765 * normalization 6766 */ 6767 action = pf_test_rule(&pd, &r, &s, &a, &ruleset); 6768 if (action != PF_PASS) 6769 REASON_SET(&reason, PFRES_FRAG); 6770 break; 6771 } 6772 6773 case IPPROTO_TCP: { 6774 if ((pd.hdr.tcp->th_flags & TH_ACK) && pd.p_len == 0) 6775 pqid = 1; 6776 action = pf_normalize_tcp(&pd); 6777 if (action == PF_DROP) 6778 goto done; 6779 action = pf_test_state_tcp(&pd, &s, &reason); 6780 if (action == PF_PASS || action == PF_AFRT) { 6781 #if NPFSYNC > 0 6782 pfsync_update_state(s); 6783 #endif /* NPFSYNC */ 6784 r = s->rule.ptr; 6785 a = s->anchor.ptr; 6786 pd.pflog |= s->log; 6787 } else if (s == NULL) 6788 action = pf_test_rule(&pd, &r, &s, &a, &ruleset); 6789 6790 if (s) { 6791 if (s->max_mss) 6792 pf_normalize_mss(&pd, s->max_mss); 6793 } else if (r->max_mss) 6794 pf_normalize_mss(&pd, r->max_mss); 6795 6796 break; 6797 } 6798 6799 case IPPROTO_UDP: { 6800 action = pf_test_state_udp(&pd, &s); 6801 if (action == PF_PASS || action == PF_AFRT) { 6802 #if NPFSYNC > 0 6803 pfsync_update_state(s); 6804 #endif /* NPFSYNC */ 6805 r = s->rule.ptr; 6806 a = s->anchor.ptr; 6807 pd.pflog |= s->log; 6808 } else if (s == NULL) 6809 action = pf_test_rule(&pd, &r, &s, &a, &ruleset); 6810 break; 6811 } 6812 6813 case IPPROTO_ICMP: { 6814 if (pd.af != AF_INET) { 6815 action = PF_DROP; 6816 REASON_SET(&reason, PFRES_NORM); 6817 DPFPRINTF(LOG_NOTICE, 6818 "dropping IPv6 packet with ICMPv4 payload"); 6819 goto done; 6820 } 6821 action = pf_test_state_icmp(&pd, &s, &reason); 6822 if (action == PF_PASS || action == PF_AFRT) { 6823 #if NPFSYNC > 0 6824 pfsync_update_state(s); 6825 #endif /* NPFSYNC */ 6826 r = s->rule.ptr; 6827 a = s->anchor.ptr; 6828 pd.pflog |= s->log; 6829 } else if (s == NULL) 6830 action = pf_test_rule(&pd, &r, &s, &a, &ruleset); 6831 break; 6832 } 6833 6834 #ifdef INET6 6835 case IPPROTO_ICMPV6: { 6836 if (pd.af != AF_INET6) { 6837 action = PF_DROP; 6838 REASON_SET(&reason, PFRES_NORM); 6839 DPFPRINTF(LOG_NOTICE, 6840 "dropping IPv4 packet with ICMPv6 payload"); 6841 goto done; 6842 } 6843 action = pf_test_state_icmp(&pd, &s, &reason); 6844 if (action == PF_PASS || action == PF_AFRT) { 6845 #if NPFSYNC > 0 6846 pfsync_update_state(s); 6847 #endif /* NPFSYNC */ 6848 r = s->rule.ptr; 6849 a = s->anchor.ptr; 6850 pd.pflog |= s->log; 6851 } else if (s == NULL) 6852 action = pf_test_rule(&pd, &r, &s, &a, &ruleset); 6853 break; 6854 } 6855 #endif /* INET6 */ 6856 6857 default: 6858 action = pf_test_state_other(&pd, &s); 6859 if (action == PF_PASS || action == PF_AFRT) { 6860 #if NPFSYNC > 0 6861 pfsync_update_state(s); 6862 #endif /* NPFSYNC */ 6863 r = s->rule.ptr; 6864 a = s->anchor.ptr; 6865 pd.pflog |= s->log; 6866 } else if (s == NULL) 6867 action = pf_test_rule(&pd, &r, &s, &a, &ruleset); 6868 break; 6869 } 6870 6871 done: 6872 if (action != PF_DROP) { 6873 if (s) { 6874 /* The non-state case is handled in pf_test_rule() */ 6875 if (action == PF_PASS && pd.badopts && 6876 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 6877 action = PF_DROP; 6878 REASON_SET(&reason, PFRES_IPOPTIONS); 6879 pd.pflog |= PF_LOG_FORCE; 6880 DPFPRINTF(LOG_NOTICE, "dropping packet with " 6881 "ip/ipv6 options in pf_test()"); 6882 } 6883 6884 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 6885 s->set_tos); 6886 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 6887 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 6888 qid = s->pqid; 6889 if (s->set_prio[1] != PF_PRIO_NOTSET) 6890 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 6891 } else { 6892 qid = s->qid; 6893 if (s->set_prio[0] != PF_PRIO_NOTSET) 6894 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 6895 } 6896 } else { 6897 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 6898 r->set_tos); 6899 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 6900 qid = r->pqid; 6901 if (r->set_prio[1] != PF_PRIO_NOTSET) 6902 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 6903 } else { 6904 qid = r->qid; 6905 if (r->set_prio[0] != PF_PRIO_NOTSET) 6906 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 6907 } 6908 } 6909 } 6910 6911 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) 6912 pd.m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 6913 6914 #ifdef ALTQ 6915 if (action == PF_PASS && qid) { 6916 pd.m->m_pkthdr.pf.qid = qid; 6917 pd.m->m_pkthdr.pf.hdr = mtod(pd.m, caddr_t);/* hints for ecn */ 6918 } 6919 #endif /* ALTQ */ 6920 6921 /* 6922 * connections redirected to loopback should not match sockets 6923 * bound specifically to loopback due to security implications, 6924 * see tcp_input() and in_pcblookup_listen(). 6925 */ 6926 if (pd.destchg) 6927 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 6928 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 6929 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 6930 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 6931 /* We need to redo the route lookup on outgoing routes. */ 6932 if (pd.destchg && pd.dir == PF_OUT) 6933 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 6934 6935 if (pd.dir == PF_IN && action == PF_PASS && r->divert.port) { 6936 struct pf_divert *divert; 6937 6938 if ((divert = pf_get_divert(pd.m))) { 6939 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 6940 divert->port = r->divert.port; 6941 divert->rdomain = pd.rdomain; 6942 divert->addr = r->divert.addr; 6943 } 6944 } 6945 6946 if (action == PF_PASS && r->divert_packet.port) { 6947 struct pf_divert *divert; 6948 6949 if ((divert = pf_get_divert(pd.m))) 6950 divert->port = r->divert_packet.port; 6951 6952 action = PF_DIVERT; 6953 } 6954 6955 if (pd.pflog) { 6956 struct pf_rule_item *ri; 6957 6958 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 6959 PFLOG_PACKET(&pd, reason, r, a, ruleset); 6960 if (s) { 6961 SLIST_FOREACH(ri, &s->match_rules, entry) 6962 if (ri->r->log & PF_LOG_ALL) 6963 PFLOG_PACKET(&pd, reason, ri->r, a, 6964 ruleset); 6965 } 6966 } 6967 6968 pf_counters_inc(action, &pd, s, r, a); 6969 6970 switch (action) { 6971 case PF_SYNPROXY_DROP: 6972 m_freem(*m0); 6973 case PF_DEFER: 6974 *m0 = NULL; 6975 action = PF_PASS; 6976 break; 6977 case PF_DIVERT: 6978 switch (pd.af) { 6979 case AF_INET: 6980 divert_packet(pd.m, pd.dir); 6981 break; 6982 #ifdef INET6 6983 case AF_INET6: 6984 divert6_packet(pd.m, pd.dir); 6985 break; 6986 #endif /* INET6 */ 6987 } 6988 *m0 = NULL; 6989 action = PF_PASS; 6990 break; 6991 #if INET && INET6 6992 case PF_AFRT: 6993 if (pf_translate_af(&pd)) { 6994 if (!pd.m) 6995 *m0 = NULL; 6996 action = PF_DROP; 6997 break; 6998 } 6999 if (pd.naf == AF_INET) 7000 pf_route(&pd.m, r, dir, kif->pfik_ifp, s); 7001 if (pd.naf == AF_INET6) 7002 pf_route6(&pd.m, r, dir, kif->pfik_ifp, s); 7003 *m0 = NULL; 7004 action = PF_PASS; 7005 break; 7006 #endif /* INET && INET6 */ 7007 default: 7008 /* pf_route can free the mbuf causing *m0 to become NULL */ 7009 if (r->rt) { 7010 switch (pd.af) { 7011 case AF_INET: 7012 pf_route(m0, r, pd.dir, pd.kif->pfik_ifp, s); 7013 break; 7014 #ifdef INET6 7015 case AF_INET6: 7016 pf_route6(m0, r, pd.dir, pd.kif->pfik_ifp, s); 7017 break; 7018 #endif /* INET6 */ 7019 } 7020 } 7021 break; 7022 } 7023 7024 #ifdef INET6 7025 /* if reassembled packet passed, create new fragments */ 7026 if (pf_status.reass && action == PF_PASS && *m0 && fwdir == PF_FWD) { 7027 struct m_tag *mtag; 7028 7029 if ((mtag = m_tag_find(*m0, PACKET_TAG_PF_REASSEMBLED, NULL))) 7030 action = pf_refragment6(m0, mtag, fwdir); 7031 } 7032 #endif 7033 7034 return (action); 7035 } 7036 7037 int 7038 pf_check_congestion(struct ifqueue *ifq) 7039 { 7040 if (ifq->ifq_congestion) 7041 return (1); 7042 else 7043 return (0); 7044 } 7045 7046 /* 7047 * must be called whenever any addressing information such as 7048 * address, port, protocol has changed 7049 */ 7050 void 7051 pf_pkt_addr_changed(struct mbuf *m) 7052 { 7053 m->m_pkthdr.pf.statekey = NULL; 7054 } 7055