1 /* $OpenBSD: pf.c,v 1.552.2.1 2007/11/27 16:37:57 henning Exp $ */ 2 /* add $OpenBSD: pf.c,v 1.553 2007/08/23 11:15:49 dhartmei Exp $ */ 3 /* add $OpenBSD: pf.c,v 1.554 2007/08/28 16:09:12 henning Exp $ */ 4 5 6 /* 7 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 8 * 9 * Copyright (c) 2001 Daniel Hartmeier 10 * Copyright (c) 2002,2003 Henning Brauer 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 34 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Effort sponsored in part by the Defense Advanced Research Projects 38 * Agency (DARPA) and Air Force Research Laboratory, Air Force 39 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 40 * 41 */ 42 43 #include "opt_inet.h" 44 #include "opt_inet6.h" 45 #include "use_pfsync.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/mbuf.h> 51 #include <sys/filio.h> 52 #include <sys/socket.h> 53 #include <sys/socketvar.h> 54 #include <sys/kernel.h> 55 #include <sys/time.h> 56 #include <sys/sysctl.h> 57 #include <sys/endian.h> 58 #include <vm/vm_zone.h> 59 #include <sys/proc.h> 60 #include <sys/kthread.h> 61 62 #include <sys/mplock2.h> 63 64 #include <machine/inttypes.h> 65 66 #include <net/if.h> 67 #include <net/if_types.h> 68 #include <net/bpf.h> 69 #include <net/netisr.h> 70 #include <net/route.h> 71 72 #include <netinet/in.h> 73 #include <netinet/in_var.h> 74 #include <netinet/in_systm.h> 75 #include <netinet/ip.h> 76 #include <netinet/ip_var.h> 77 #include <netinet/tcp.h> 78 #include <netinet/tcp_seq.h> 79 #include <netinet/udp.h> 80 #include <netinet/ip_icmp.h> 81 #include <netinet/in_pcb.h> 82 #include <netinet/tcp_timer.h> 83 #include <netinet/tcp_var.h> 84 #include <netinet/udp_var.h> 85 #include <netinet/icmp_var.h> 86 #include <netinet/if_ether.h> 87 88 #include <net/pf/pfvar.h> 89 #include <net/pf/if_pflog.h> 90 91 #if NPFSYNC > 0 92 #include <net/pf/if_pfsync.h> 93 #endif /* NPFSYNC > 0 */ 94 95 #ifdef INET6 96 #include <netinet/ip6.h> 97 #include <netinet/in_pcb.h> 98 #include <netinet/icmp6.h> 99 #include <netinet6/nd6.h> 100 #include <netinet6/ip6_var.h> 101 #include <netinet6/in6_pcb.h> 102 #endif /* INET6 */ 103 104 #include <sys/in_cksum.h> 105 #include <sys/ucred.h> 106 #include <machine/limits.h> 107 #include <sys/msgport2.h> 108 #include <net/netmsg2.h> 109 110 extern int ip_optcopy(struct ip *, struct ip *); 111 extern int debug_pfugidhack; 112 113 struct lwkt_token pf_token = LWKT_TOKEN_MP_INITIALIZER(pf_token); 114 115 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x 116 117 /* 118 * Global variables 119 */ 120 121 /* state tables */ 122 struct pf_state_tree_lan_ext pf_statetbl_lan_ext; 123 struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy; 124 125 struct pf_altqqueue pf_altqs[2]; 126 struct pf_palist pf_pabuf; 127 struct pf_altqqueue *pf_altqs_active; 128 struct pf_altqqueue *pf_altqs_inactive; 129 struct pf_status pf_status; 130 131 u_int32_t ticket_altqs_active; 132 u_int32_t ticket_altqs_inactive; 133 int altqs_inactive_open; 134 u_int32_t ticket_pabuf; 135 136 struct pf_anchor_stackframe { 137 struct pf_ruleset *rs; 138 struct pf_rule *r; 139 struct pf_anchor_node *parent; 140 struct pf_anchor *child; 141 } pf_anchor_stack[64]; 142 143 vm_zone_t pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl; 144 vm_zone_t pf_state_pl, pf_state_key_pl; 145 vm_zone_t pf_altq_pl; 146 147 void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); 148 149 void pf_init_threshold(struct pf_threshold *, u_int32_t, 150 u_int32_t); 151 void pf_add_threshold(struct pf_threshold *); 152 int pf_check_threshold(struct pf_threshold *); 153 154 void pf_change_ap(struct pf_addr *, u_int16_t *, 155 u_int16_t *, u_int16_t *, struct pf_addr *, 156 u_int16_t, u_int8_t, sa_family_t); 157 int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, 158 struct tcphdr *, struct pf_state_peer *); 159 #ifdef INET6 160 void pf_change_a6(struct pf_addr *, u_int16_t *, 161 struct pf_addr *, u_int8_t); 162 #endif /* INET6 */ 163 void pf_change_icmp(struct pf_addr *, u_int16_t *, 164 struct pf_addr *, struct pf_addr *, u_int16_t, 165 u_int16_t *, u_int16_t *, u_int16_t *, 166 u_int16_t *, u_int8_t, sa_family_t); 167 void pf_send_tcp(const struct pf_rule *, sa_family_t, 168 const struct pf_addr *, const struct pf_addr *, 169 u_int16_t, u_int16_t, u_int32_t, u_int32_t, 170 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, 171 u_int16_t, struct ether_header *, struct ifnet *); 172 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 173 sa_family_t, struct pf_rule *); 174 struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, 175 int, int, struct pfi_kif *, 176 struct pf_addr *, u_int16_t, struct pf_addr *, 177 u_int16_t, int); 178 struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, 179 int, int, struct pfi_kif *, struct pf_src_node **, 180 struct pf_addr *, u_int16_t, 181 struct pf_addr *, u_int16_t, 182 struct pf_addr *, u_int16_t *); 183 void pf_attach_state(struct pf_state_key *, 184 struct pf_state *, int); 185 void pf_detach_state(struct pf_state *, int); 186 int pf_test_rule(struct pf_rule **, struct pf_state **, 187 int, struct pfi_kif *, struct mbuf *, int, 188 void *, struct pf_pdesc *, struct pf_rule **, 189 struct pf_ruleset **, struct ifqueue *, struct inpcb *); 190 int pf_test_fragment(struct pf_rule **, int, 191 struct pfi_kif *, struct mbuf *, void *, 192 struct pf_pdesc *, struct pf_rule **, 193 struct pf_ruleset **); 194 int pf_test_state_tcp(struct pf_state **, int, 195 struct pfi_kif *, struct mbuf *, int, 196 void *, struct pf_pdesc *, u_short *); 197 int pf_test_state_udp(struct pf_state **, int, 198 struct pfi_kif *, struct mbuf *, int, 199 void *, struct pf_pdesc *); 200 int pf_test_state_icmp(struct pf_state **, int, 201 struct pfi_kif *, struct mbuf *, int, 202 void *, struct pf_pdesc *, u_short *); 203 int pf_test_state_other(struct pf_state **, int, 204 struct pfi_kif *, struct pf_pdesc *); 205 int pf_match_tag(struct mbuf *, struct pf_rule *, int *); 206 void pf_step_into_anchor(int *, struct pf_ruleset **, int, 207 struct pf_rule **, struct pf_rule **, int *); 208 int pf_step_out_of_anchor(int *, struct pf_ruleset **, 209 int, struct pf_rule **, struct pf_rule **, 210 int *); 211 void pf_hash(struct pf_addr *, struct pf_addr *, 212 struct pf_poolhashkey *, sa_family_t); 213 int pf_map_addr(u_int8_t, struct pf_rule *, 214 struct pf_addr *, struct pf_addr *, 215 struct pf_addr *, struct pf_src_node **); 216 int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, 217 struct pf_addr *, struct pf_addr *, u_int16_t, 218 struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, 219 struct pf_src_node **); 220 void pf_route(struct mbuf **, struct pf_rule *, int, 221 struct ifnet *, struct pf_state *, 222 struct pf_pdesc *); 223 void pf_route6(struct mbuf **, struct pf_rule *, int, 224 struct ifnet *, struct pf_state *, 225 struct pf_pdesc *); 226 u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, 227 sa_family_t); 228 u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, 229 sa_family_t); 230 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, 231 u_int16_t); 232 void pf_set_rt_ifp(struct pf_state *, 233 struct pf_addr *); 234 int pf_check_proto_cksum(struct mbuf *, int, int, 235 u_int8_t, sa_family_t); 236 int pf_addr_wrap_neq(struct pf_addr_wrap *, 237 struct pf_addr_wrap *); 238 struct pf_state *pf_find_state(struct pfi_kif *, 239 struct pf_state_key_cmp *, u_int8_t); 240 int pf_src_connlimit(struct pf_state **); 241 void pf_stateins_err(const char *, struct pf_state *, 242 struct pfi_kif *); 243 int pf_check_congestion(struct ifqueue *); 244 245 extern int pf_end_threads; 246 247 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 248 { &pf_state_pl, PFSTATE_HIWAT }, 249 { &pf_src_tree_pl, PFSNODE_HIWAT }, 250 { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, 251 { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, 252 { &pfr_kentry_pl, PFR_KENTRY_HIWAT } 253 }; 254 255 #define STATE_LOOKUP() \ 256 do { \ 257 if (direction == PF_IN) \ 258 *state = pf_find_state(kif, &key, PF_EXT_GWY); \ 259 else \ 260 *state = pf_find_state(kif, &key, PF_LAN_EXT); \ 261 if (*state == NULL || (*state)->timeout == PFTM_PURGE) \ 262 return (PF_DROP); \ 263 if (direction == PF_OUT && \ 264 (((*state)->rule.ptr->rt == PF_ROUTETO && \ 265 (*state)->rule.ptr->direction == PF_OUT) || \ 266 ((*state)->rule.ptr->rt == PF_REPLYTO && \ 267 (*state)->rule.ptr->direction == PF_IN)) && \ 268 (*state)->rt_kif != NULL && \ 269 (*state)->rt_kif != kif) \ 270 return (PF_PASS); \ 271 } while (0) 272 273 #define STATE_TRANSLATE(sk) \ 274 (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \ 275 ((sk)->af == AF_INET6 && \ 276 ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \ 277 (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \ 278 (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) || \ 279 (sk)->lan.port != (sk)->gwy.port 280 281 #define BOUND_IFACE(r, k) \ 282 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 283 284 #define STATE_INC_COUNTERS(s) \ 285 do { \ 286 s->rule.ptr->states++; \ 287 if (s->anchor.ptr != NULL) \ 288 s->anchor.ptr->states++; \ 289 if (s->nat_rule.ptr != NULL) \ 290 s->nat_rule.ptr->states++; \ 291 } while (0) 292 293 #define STATE_DEC_COUNTERS(s) \ 294 do { \ 295 if (s->nat_rule.ptr != NULL) \ 296 s->nat_rule.ptr->states--; \ 297 if (s->anchor.ptr != NULL) \ 298 s->anchor.ptr->states--; \ 299 s->rule.ptr->states--; \ 300 } while (0) 301 302 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 303 static __inline int pf_state_compare_lan_ext(struct pf_state_key *, 304 struct pf_state_key *); 305 static __inline int pf_state_compare_ext_gwy(struct pf_state_key *, 306 struct pf_state_key *); 307 static __inline int pf_state_compare_id(struct pf_state *, 308 struct pf_state *); 309 310 struct pf_src_tree tree_src_tracking; 311 312 struct pf_state_tree_id tree_id; 313 struct pf_state_queue state_list; 314 315 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 316 RB_GENERATE(pf_state_tree_lan_ext, pf_state_key, 317 entry_lan_ext, pf_state_compare_lan_ext); 318 RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key, 319 entry_ext_gwy, pf_state_compare_ext_gwy); 320 RB_GENERATE(pf_state_tree_id, pf_state, 321 entry_id, pf_state_compare_id); 322 323 #define PF_DT_SKIP_LANEXT 0x01 324 #define PF_DT_SKIP_EXTGWY 0x02 325 326 static __inline int 327 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 328 { 329 int diff; 330 331 if (a->rule.ptr > b->rule.ptr) 332 return (1); 333 if (a->rule.ptr < b->rule.ptr) 334 return (-1); 335 if ((diff = a->af - b->af) != 0) 336 return (diff); 337 switch (a->af) { 338 #ifdef INET 339 case AF_INET: 340 if (a->addr.addr32[0] > b->addr.addr32[0]) 341 return (1); 342 if (a->addr.addr32[0] < b->addr.addr32[0]) 343 return (-1); 344 break; 345 #endif /* INET */ 346 #ifdef INET6 347 case AF_INET6: 348 if (a->addr.addr32[3] > b->addr.addr32[3]) 349 return (1); 350 if (a->addr.addr32[3] < b->addr.addr32[3]) 351 return (-1); 352 if (a->addr.addr32[2] > b->addr.addr32[2]) 353 return (1); 354 if (a->addr.addr32[2] < b->addr.addr32[2]) 355 return (-1); 356 if (a->addr.addr32[1] > b->addr.addr32[1]) 357 return (1); 358 if (a->addr.addr32[1] < b->addr.addr32[1]) 359 return (-1); 360 if (a->addr.addr32[0] > b->addr.addr32[0]) 361 return (1); 362 if (a->addr.addr32[0] < b->addr.addr32[0]) 363 return (-1); 364 break; 365 #endif /* INET6 */ 366 } 367 return (0); 368 } 369 370 u_int32_t 371 pf_state_hash(struct pf_state_key *sk) 372 { 373 u_int32_t hv = (intptr_t)sk / sizeof(*sk); 374 375 hv ^= crc32(&sk->lan, sizeof(sk->lan)); 376 hv ^= crc32(&sk->gwy, sizeof(sk->gwy)); 377 hv ^= crc32(&sk->ext, sizeof(sk->ext)); 378 if (hv == 0) /* disallow 0 */ 379 hv = 1; 380 return(hv); 381 } 382 383 static __inline int 384 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b) 385 { 386 int diff; 387 388 if ((diff = a->proto - b->proto) != 0) 389 return (diff); 390 if ((diff = a->af - b->af) != 0) 391 return (diff); 392 switch (a->af) { 393 #ifdef INET 394 case AF_INET: 395 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) 396 return (1); 397 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) 398 return (-1); 399 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) 400 return (1); 401 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) 402 return (-1); 403 break; 404 #endif /* INET */ 405 #ifdef INET6 406 case AF_INET6: 407 if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3]) 408 return (1); 409 if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3]) 410 return (-1); 411 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) 412 return (1); 413 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) 414 return (-1); 415 if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2]) 416 return (1); 417 if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2]) 418 return (-1); 419 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) 420 return (1); 421 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) 422 return (-1); 423 if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1]) 424 return (1); 425 if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1]) 426 return (-1); 427 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) 428 return (1); 429 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) 430 return (-1); 431 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) 432 return (1); 433 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) 434 return (-1); 435 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) 436 return (1); 437 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) 438 return (-1); 439 break; 440 #endif /* INET6 */ 441 } 442 443 if ((diff = a->lan.port - b->lan.port) != 0) 444 return (diff); 445 if ((diff = a->ext.port - b->ext.port) != 0) 446 return (diff); 447 448 return (0); 449 } 450 451 static __inline int 452 pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b) 453 { 454 int diff; 455 456 if ((diff = a->proto - b->proto) != 0) 457 return (diff); 458 if ((diff = a->af - b->af) != 0) 459 return (diff); 460 switch (a->af) { 461 #ifdef INET 462 case AF_INET: 463 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) 464 return (1); 465 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) 466 return (-1); 467 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) 468 return (1); 469 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) 470 return (-1); 471 break; 472 #endif /* INET */ 473 #ifdef INET6 474 case AF_INET6: 475 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) 476 return (1); 477 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) 478 return (-1); 479 if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3]) 480 return (1); 481 if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3]) 482 return (-1); 483 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) 484 return (1); 485 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) 486 return (-1); 487 if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2]) 488 return (1); 489 if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2]) 490 return (-1); 491 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) 492 return (1); 493 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) 494 return (-1); 495 if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1]) 496 return (1); 497 if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1]) 498 return (-1); 499 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) 500 return (1); 501 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) 502 return (-1); 503 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) 504 return (1); 505 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) 506 return (-1); 507 break; 508 #endif /* INET6 */ 509 } 510 511 if ((diff = a->ext.port - b->ext.port) != 0) 512 return (diff); 513 if ((diff = a->gwy.port - b->gwy.port) != 0) 514 return (diff); 515 516 return (0); 517 } 518 519 static __inline int 520 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 521 { 522 if (a->id > b->id) 523 return (1); 524 if (a->id < b->id) 525 return (-1); 526 if (a->creatorid > b->creatorid) 527 return (1); 528 if (a->creatorid < b->creatorid) 529 return (-1); 530 531 return (0); 532 } 533 534 #ifdef INET6 535 void 536 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 537 { 538 switch (af) { 539 #ifdef INET 540 case AF_INET: 541 dst->addr32[0] = src->addr32[0]; 542 break; 543 #endif /* INET */ 544 case AF_INET6: 545 dst->addr32[0] = src->addr32[0]; 546 dst->addr32[1] = src->addr32[1]; 547 dst->addr32[2] = src->addr32[2]; 548 dst->addr32[3] = src->addr32[3]; 549 break; 550 } 551 } 552 #endif /* INET6 */ 553 554 struct pf_state * 555 pf_find_state_byid(struct pf_state_cmp *key) 556 { 557 pf_status.fcounters[FCNT_STATE_SEARCH]++; 558 559 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 560 } 561 562 struct pf_state * 563 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int8_t tree) 564 { 565 struct pf_state_key *sk; 566 struct pf_state *s; 567 568 pf_status.fcounters[FCNT_STATE_SEARCH]++; 569 570 switch (tree) { 571 case PF_LAN_EXT: 572 sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext, 573 (struct pf_state_key *)key); 574 break; 575 case PF_EXT_GWY: 576 sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy, 577 (struct pf_state_key *)key); 578 break; 579 default: 580 panic("pf_find_state"); 581 } 582 583 /* list is sorted, if-bound states before floating ones */ 584 if (sk != NULL) 585 TAILQ_FOREACH(s, &sk->states, next) 586 if (s->kif == pfi_all || s->kif == kif) 587 return (s); 588 589 return (NULL); 590 } 591 592 struct pf_state * 593 pf_find_state_all(struct pf_state_key_cmp *key, u_int8_t tree, int *more) 594 { 595 struct pf_state_key *sk; 596 struct pf_state *s, *ret = NULL; 597 598 pf_status.fcounters[FCNT_STATE_SEARCH]++; 599 600 switch (tree) { 601 case PF_LAN_EXT: 602 sk = RB_FIND(pf_state_tree_lan_ext, 603 &pf_statetbl_lan_ext, (struct pf_state_key *)key); 604 break; 605 case PF_EXT_GWY: 606 sk = RB_FIND(pf_state_tree_ext_gwy, 607 &pf_statetbl_ext_gwy, (struct pf_state_key *)key); 608 break; 609 default: 610 panic("pf_find_state_all"); 611 } 612 613 if (sk != NULL) { 614 ret = TAILQ_FIRST(&sk->states); 615 if (more == NULL) 616 return (ret); 617 618 TAILQ_FOREACH(s, &sk->states, next) 619 (*more)++; 620 } 621 622 return (ret); 623 } 624 625 void 626 pf_init_threshold(struct pf_threshold *threshold, 627 u_int32_t limit, u_int32_t seconds) 628 { 629 threshold->limit = limit * PF_THRESHOLD_MULT; 630 threshold->seconds = seconds; 631 threshold->count = 0; 632 threshold->last = time_second; 633 } 634 635 void 636 pf_add_threshold(struct pf_threshold *threshold) 637 { 638 u_int32_t t = time_second, diff = t - threshold->last; 639 640 if (diff >= threshold->seconds) 641 threshold->count = 0; 642 else 643 threshold->count -= threshold->count * diff / 644 threshold->seconds; 645 threshold->count += PF_THRESHOLD_MULT; 646 threshold->last = t; 647 } 648 649 int 650 pf_check_threshold(struct pf_threshold *threshold) 651 { 652 return (threshold->count > threshold->limit); 653 } 654 655 int 656 pf_src_connlimit(struct pf_state **state) 657 { 658 int bad = 0; 659 660 (*state)->src_node->conn++; 661 (*state)->src.tcp_est = 1; 662 pf_add_threshold(&(*state)->src_node->conn_rate); 663 664 if ((*state)->rule.ptr->max_src_conn && 665 (*state)->rule.ptr->max_src_conn < 666 (*state)->src_node->conn) { 667 pf_status.lcounters[LCNT_SRCCONN]++; 668 bad++; 669 } 670 671 if ((*state)->rule.ptr->max_src_conn_rate.limit && 672 pf_check_threshold(&(*state)->src_node->conn_rate)) { 673 pf_status.lcounters[LCNT_SRCCONNRATE]++; 674 bad++; 675 } 676 677 if (!bad) 678 return (0); 679 680 if ((*state)->rule.ptr->overload_tbl) { 681 struct pfr_addr p; 682 u_int32_t killed = 0; 683 684 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 685 if (pf_status.debug >= PF_DEBUG_MISC) { 686 kprintf("pf_src_connlimit: blocking address "); 687 pf_print_host(&(*state)->src_node->addr, 0, 688 (*state)->state_key->af); 689 } 690 691 bzero(&p, sizeof(p)); 692 p.pfra_af = (*state)->state_key->af; 693 switch ((*state)->state_key->af) { 694 #ifdef INET 695 case AF_INET: 696 p.pfra_net = 32; 697 p.pfra_ip4addr = (*state)->src_node->addr.v4; 698 break; 699 #endif /* INET */ 700 #ifdef INET6 701 case AF_INET6: 702 p.pfra_net = 128; 703 p.pfra_ip6addr = (*state)->src_node->addr.v6; 704 break; 705 #endif /* INET6 */ 706 } 707 708 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 709 &p, time_second); 710 711 /* kill existing states if that's required. */ 712 if ((*state)->rule.ptr->flush) { 713 struct pf_state_key *sk; 714 struct pf_state *st; 715 716 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 717 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 718 sk = st->state_key; 719 /* 720 * Kill states from this source. (Only those 721 * from the same rule if PF_FLUSH_GLOBAL is not 722 * set) 723 */ 724 if (sk->af == 725 (*state)->state_key->af && 726 (((*state)->state_key->direction == 727 PF_OUT && 728 PF_AEQ(&(*state)->src_node->addr, 729 &sk->lan.addr, sk->af)) || 730 ((*state)->state_key->direction == PF_IN && 731 PF_AEQ(&(*state)->src_node->addr, 732 &sk->ext.addr, sk->af))) && 733 ((*state)->rule.ptr->flush & 734 PF_FLUSH_GLOBAL || 735 (*state)->rule.ptr == st->rule.ptr)) { 736 st->timeout = PFTM_PURGE; 737 st->src.state = st->dst.state = 738 TCPS_CLOSED; 739 killed++; 740 } 741 } 742 if (pf_status.debug >= PF_DEBUG_MISC) 743 kprintf(", %u states killed", killed); 744 } 745 if (pf_status.debug >= PF_DEBUG_MISC) 746 kprintf("\n"); 747 } 748 749 /* kill this state */ 750 (*state)->timeout = PFTM_PURGE; 751 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 752 return (1); 753 } 754 755 int 756 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 757 struct pf_addr *src, sa_family_t af) 758 { 759 struct pf_src_node k; 760 761 if (*sn == NULL) { 762 k.af = af; 763 PF_ACPY(&k.addr, src, af); 764 if (rule->rule_flag & PFRULE_RULESRCTRACK || 765 rule->rpool.opts & PF_POOL_STICKYADDR) 766 k.rule.ptr = rule; 767 else 768 k.rule.ptr = NULL; 769 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 770 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 771 } 772 if (*sn == NULL) { 773 if (!rule->max_src_nodes || 774 rule->src_nodes < rule->max_src_nodes) 775 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT); 776 else 777 pf_status.lcounters[LCNT_SRCNODES]++; 778 if ((*sn) == NULL) 779 return (-1); 780 bzero(*sn, sizeof(struct pf_src_node)); 781 782 pf_init_threshold(&(*sn)->conn_rate, 783 rule->max_src_conn_rate.limit, 784 rule->max_src_conn_rate.seconds); 785 786 (*sn)->af = af; 787 if (rule->rule_flag & PFRULE_RULESRCTRACK || 788 rule->rpool.opts & PF_POOL_STICKYADDR) 789 (*sn)->rule.ptr = rule; 790 else 791 (*sn)->rule.ptr = NULL; 792 PF_ACPY(&(*sn)->addr, src, af); 793 if (RB_INSERT(pf_src_tree, 794 &tree_src_tracking, *sn) != NULL) { 795 if (pf_status.debug >= PF_DEBUG_MISC) { 796 kprintf("pf: src_tree insert failed: "); 797 pf_print_host(&(*sn)->addr, 0, af); 798 kprintf("\n"); 799 } 800 pool_put(&pf_src_tree_pl, *sn); 801 return (-1); 802 } 803 (*sn)->creation = time_second; 804 (*sn)->ruletype = rule->action; 805 if ((*sn)->rule.ptr != NULL) 806 (*sn)->rule.ptr->src_nodes++; 807 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 808 pf_status.src_nodes++; 809 } else { 810 if (rule->max_src_states && 811 (*sn)->states >= rule->max_src_states) { 812 pf_status.lcounters[LCNT_SRCSTATES]++; 813 return (-1); 814 } 815 } 816 return (0); 817 } 818 819 void 820 pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif) 821 { 822 struct pf_state_key *sk = s->state_key; 823 824 if (pf_status.debug >= PF_DEBUG_MISC) { 825 kprintf("pf: state insert failed: %s %s", tree, kif->pfik_name); 826 kprintf(" lan: "); 827 pf_print_host(&sk->lan.addr, sk->lan.port, 828 sk->af); 829 kprintf(" gwy: "); 830 pf_print_host(&sk->gwy.addr, sk->gwy.port, 831 sk->af); 832 kprintf(" ext: "); 833 pf_print_host(&sk->ext.addr, sk->ext.port, 834 sk->af); 835 if (s->sync_flags & PFSTATE_FROMSYNC) 836 kprintf(" (from sync)"); 837 kprintf("\n"); 838 } 839 } 840 841 int 842 pf_insert_state(struct pfi_kif *kif, struct pf_state *s) 843 { 844 struct pf_state_key *cur; 845 struct pf_state *sp; 846 847 KKASSERT(s->state_key != NULL); 848 s->kif = kif; 849 850 if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext, 851 s->state_key)) != NULL) { 852 /* key exists. check for same kif, if none, add to key */ 853 TAILQ_FOREACH(sp, &cur->states, next) 854 if (sp->kif == kif) { /* collision! */ 855 pf_stateins_err("tree_lan_ext", s, kif); 856 pf_detach_state(s, 857 PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY); 858 return (-1); 859 } 860 pf_detach_state(s, PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY); 861 pf_attach_state(cur, s, kif == pfi_all ? 1 : 0); 862 } 863 864 /* if cur != NULL, we already found a state key and attached to it */ 865 if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy, 866 &pf_statetbl_ext_gwy, s->state_key)) != NULL) { 867 /* must not happen. we must have found the sk above! */ 868 pf_stateins_err("tree_ext_gwy", s, kif); 869 pf_detach_state(s, PF_DT_SKIP_EXTGWY); 870 return (-1); 871 } 872 873 if (s->id == 0 && s->creatorid == 0) { 874 s->id = htobe64(pf_status.stateid++); 875 s->creatorid = pf_status.hostid; 876 } 877 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 878 if (pf_status.debug >= PF_DEBUG_MISC) { 879 kprintf("pf: state insert failed: " 880 "id: %016llx creatorid: %08x", 881 be64toh(s->id), ntohl(s->creatorid)); 882 if (s->sync_flags & PFSTATE_FROMSYNC) 883 kprintf(" (from sync)"); 884 kprintf("\n"); 885 } 886 pf_detach_state(s, 0); 887 return (-1); 888 } 889 TAILQ_INSERT_TAIL(&state_list, s, entry_list); 890 pf_status.fcounters[FCNT_STATE_INSERT]++; 891 pf_status.states++; 892 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 893 #if NPFSYNC 894 pfsync_insert_state(s); 895 #endif 896 return (0); 897 } 898 899 void 900 pf_purge_thread(void *v) 901 { 902 int nloops = 0; 903 int locked = 0; 904 905 get_mplock(); 906 for (;;) { 907 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); 908 909 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 910 911 if (pf_end_threads) { 912 pf_purge_expired_states(pf_status.states, 1); 913 pf_purge_expired_fragments(); 914 pf_purge_expired_src_nodes(1); 915 pf_end_threads++; 916 917 lockmgr(&pf_consistency_lock, LK_RELEASE); 918 wakeup(pf_purge_thread); 919 kthread_exit(); 920 } 921 crit_enter(); 922 923 /* process a fraction of the state table every second */ 924 if(!pf_purge_expired_states(1 + (pf_status.states 925 / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) { 926 927 pf_purge_expired_states(1 + (pf_status.states 928 / pf_default_rule.timeout[PFTM_INTERVAL]), 1); 929 } 930 931 /* purge other expired types every PFTM_INTERVAL seconds */ 932 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { 933 pf_purge_expired_fragments(); 934 if (!pf_purge_expired_src_nodes(locked)) { 935 pf_purge_expired_src_nodes(1); 936 } 937 nloops = 0; 938 } 939 crit_exit(); 940 lockmgr(&pf_consistency_lock, LK_RELEASE); 941 } 942 rel_mplock(); 943 } 944 945 u_int32_t 946 pf_state_expires(const struct pf_state *state) 947 { 948 u_int32_t timeout; 949 u_int32_t start; 950 u_int32_t end; 951 u_int32_t states; 952 953 /* handle all PFTM_* > PFTM_MAX here */ 954 if (state->timeout == PFTM_PURGE) 955 return (time_second); 956 if (state->timeout == PFTM_UNTIL_PACKET) 957 return (0); 958 KKASSERT(state->timeout != PFTM_UNLINKED); 959 KASSERT((state->timeout < PFTM_MAX), 960 ("pf_state_expires: timeout > PFTM_MAX")); 961 timeout = state->rule.ptr->timeout[state->timeout]; 962 if (!timeout) 963 timeout = pf_default_rule.timeout[state->timeout]; 964 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 965 if (start) { 966 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 967 states = state->rule.ptr->states; 968 } else { 969 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 970 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 971 states = pf_status.states; 972 } 973 if (end && states > start && start < end) { 974 if (states < end) 975 return (state->expire + timeout * (end - states) / 976 (end - start)); 977 else 978 return (time_second); 979 } 980 return (state->expire + timeout); 981 } 982 983 int 984 pf_purge_expired_src_nodes(int waslocked) 985 { 986 struct pf_src_node *cur, *next; 987 int locked = waslocked; 988 989 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 990 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 991 992 if (cur->states <= 0 && cur->expire <= time_second) { 993 if (! locked) { 994 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 995 next = RB_NEXT(pf_src_tree, 996 &tree_src_tracking, cur); 997 locked = 1; 998 } 999 if (cur->rule.ptr != NULL) { 1000 cur->rule.ptr->src_nodes--; 1001 if (cur->rule.ptr->states <= 0 && 1002 cur->rule.ptr->max_src_nodes <= 0) 1003 pf_rm_rule(NULL, cur->rule.ptr); 1004 } 1005 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); 1006 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 1007 pf_status.src_nodes--; 1008 pool_put(&pf_src_tree_pl, cur); 1009 } 1010 } 1011 1012 if (locked && !waslocked) 1013 lockmgr(&pf_consistency_lock, LK_RELEASE); 1014 return(1); 1015 } 1016 1017 void 1018 pf_src_tree_remove_state(struct pf_state *s) 1019 { 1020 u_int32_t timeout; 1021 1022 if (s->src_node != NULL) { 1023 if (s->src.tcp_est) 1024 --s->src_node->conn; 1025 if (--s->src_node->states <= 0) { 1026 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1027 if (!timeout) 1028 timeout = 1029 pf_default_rule.timeout[PFTM_SRC_NODE]; 1030 s->src_node->expire = time_second + timeout; 1031 } 1032 } 1033 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { 1034 if (--s->nat_src_node->states <= 0) { 1035 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1036 if (!timeout) 1037 timeout = 1038 pf_default_rule.timeout[PFTM_SRC_NODE]; 1039 s->nat_src_node->expire = time_second + timeout; 1040 } 1041 } 1042 s->src_node = s->nat_src_node = NULL; 1043 } 1044 1045 /* callers should be at crit_enter() */ 1046 void 1047 pf_unlink_state(struct pf_state *cur) 1048 { 1049 if (cur->src.state == PF_TCPS_PROXY_DST) { 1050 pf_send_tcp(cur->rule.ptr, cur->state_key->af, 1051 &cur->state_key->ext.addr, &cur->state_key->lan.addr, 1052 cur->state_key->ext.port, cur->state_key->lan.port, 1053 cur->src.seqhi, cur->src.seqlo + 1, 1054 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); 1055 } 1056 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1057 #if NPFSYNC 1058 if (cur->creatorid == pf_status.hostid) 1059 pfsync_delete_state(cur); 1060 #endif 1061 cur->timeout = PFTM_UNLINKED; 1062 pf_src_tree_remove_state(cur); 1063 pf_detach_state(cur, 0); 1064 } 1065 1066 /* callers should be at crit_enter() and hold the 1067 * write_lock on pf_consistency_lock */ 1068 void 1069 pf_free_state(struct pf_state *cur) 1070 { 1071 #if NPFSYNC 1072 if (pfsyncif != NULL && 1073 (pfsyncif->sc_bulk_send_next == cur || 1074 pfsyncif->sc_bulk_terminator == cur)) 1075 return; 1076 #endif 1077 KKASSERT(cur->timeout == PFTM_UNLINKED); 1078 if (--cur->rule.ptr->states <= 0 && 1079 cur->rule.ptr->src_nodes <= 0) 1080 pf_rm_rule(NULL, cur->rule.ptr); 1081 if (cur->nat_rule.ptr != NULL) 1082 if (--cur->nat_rule.ptr->states <= 0 && 1083 cur->nat_rule.ptr->src_nodes <= 0) 1084 pf_rm_rule(NULL, cur->nat_rule.ptr); 1085 if (cur->anchor.ptr != NULL) 1086 if (--cur->anchor.ptr->states <= 0) 1087 pf_rm_rule(NULL, cur->anchor.ptr); 1088 pf_normalize_tcp_cleanup(cur); 1089 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1090 TAILQ_REMOVE(&state_list, cur, entry_list); 1091 if (cur->tag) 1092 pf_tag_unref(cur->tag); 1093 pool_put(&pf_state_pl, cur); 1094 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1095 pf_status.states--; 1096 } 1097 1098 int 1099 pf_purge_expired_states(u_int32_t maxcheck, int waslocked) 1100 { 1101 static struct pf_state *cur = NULL; 1102 struct pf_state *next; 1103 int locked = waslocked; 1104 1105 while (maxcheck--) { 1106 /* wrap to start of list when we hit the end */ 1107 if (cur == NULL) { 1108 cur = TAILQ_FIRST(&state_list); 1109 if (cur == NULL) 1110 break; /* list empty */ 1111 } 1112 1113 /* get next state, as cur may get deleted */ 1114 next = TAILQ_NEXT(cur, entry_list); 1115 1116 if (cur->timeout == PFTM_UNLINKED) { 1117 /* free unlinked state */ 1118 if (! locked) { 1119 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1120 locked = 1; 1121 } 1122 pf_free_state(cur); 1123 } else if (pf_state_expires(cur) <= time_second) { 1124 /* unlink and free expired state */ 1125 pf_unlink_state(cur); 1126 if (! locked) { 1127 if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE)) 1128 return (0); 1129 locked = 1; 1130 } 1131 pf_free_state(cur); 1132 } 1133 cur = next; 1134 } 1135 1136 if (locked) 1137 lockmgr(&pf_consistency_lock, LK_RELEASE); 1138 return (1); 1139 } 1140 1141 int 1142 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1143 { 1144 if (aw->type != PF_ADDR_TABLE) 1145 return (0); 1146 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) 1147 return (1); 1148 return (0); 1149 } 1150 1151 void 1152 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1153 { 1154 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1155 return; 1156 pfr_detach_table(aw->p.tbl); 1157 aw->p.tbl = NULL; 1158 } 1159 1160 void 1161 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1162 { 1163 struct pfr_ktable *kt = aw->p.tbl; 1164 1165 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1166 return; 1167 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1168 kt = kt->pfrkt_root; 1169 aw->p.tbl = NULL; 1170 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1171 kt->pfrkt_cnt : -1; 1172 } 1173 1174 void 1175 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1176 { 1177 switch (af) { 1178 #ifdef INET 1179 case AF_INET: { 1180 u_int32_t a = ntohl(addr->addr32[0]); 1181 kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1182 (a>>8)&255, a&255); 1183 if (p) { 1184 p = ntohs(p); 1185 kprintf(":%u", p); 1186 } 1187 break; 1188 } 1189 #endif /* INET */ 1190 #ifdef INET6 1191 case AF_INET6: { 1192 u_int16_t b; 1193 u_int8_t i, curstart = 255, curend = 0, 1194 maxstart = 0, maxend = 0; 1195 for (i = 0; i < 8; i++) { 1196 if (!addr->addr16[i]) { 1197 if (curstart == 255) 1198 curstart = i; 1199 else 1200 curend = i; 1201 } else { 1202 if (curstart) { 1203 if ((curend - curstart) > 1204 (maxend - maxstart)) { 1205 maxstart = curstart; 1206 maxend = curend; 1207 curstart = 255; 1208 } 1209 } 1210 } 1211 } 1212 for (i = 0; i < 8; i++) { 1213 if (i >= maxstart && i <= maxend) { 1214 if (maxend != 7) { 1215 if (i == maxstart) 1216 kprintf(":"); 1217 } else { 1218 if (i == maxend) 1219 kprintf(":"); 1220 } 1221 } else { 1222 b = ntohs(addr->addr16[i]); 1223 kprintf("%x", b); 1224 if (i < 7) 1225 kprintf(":"); 1226 } 1227 } 1228 if (p) { 1229 p = ntohs(p); 1230 kprintf("[%u]", p); 1231 } 1232 break; 1233 } 1234 #endif /* INET6 */ 1235 } 1236 } 1237 1238 void 1239 pf_print_state(struct pf_state *s) 1240 { 1241 struct pf_state_key *sk = s->state_key; 1242 switch (sk->proto) { 1243 case IPPROTO_TCP: 1244 kprintf("TCP "); 1245 break; 1246 case IPPROTO_UDP: 1247 kprintf("UDP "); 1248 break; 1249 case IPPROTO_ICMP: 1250 kprintf("ICMP "); 1251 break; 1252 case IPPROTO_ICMPV6: 1253 kprintf("ICMPV6 "); 1254 break; 1255 default: 1256 kprintf("%u ", sk->proto); 1257 break; 1258 } 1259 pf_print_host(&sk->lan.addr, sk->lan.port, sk->af); 1260 kprintf(" "); 1261 pf_print_host(&sk->gwy.addr, sk->gwy.port, sk->af); 1262 kprintf(" "); 1263 pf_print_host(&sk->ext.addr, sk->ext.port, sk->af); 1264 kprintf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, 1265 s->src.seqhi, s->src.max_win, s->src.seqdiff); 1266 if (s->src.wscale && s->dst.wscale) 1267 kprintf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); 1268 kprintf("]"); 1269 kprintf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, 1270 s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); 1271 if (s->src.wscale && s->dst.wscale) 1272 kprintf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); 1273 kprintf("]"); 1274 kprintf(" %u:%u", s->src.state, s->dst.state); 1275 } 1276 1277 void 1278 pf_print_flags(u_int8_t f) 1279 { 1280 if (f) 1281 kprintf(" "); 1282 if (f & TH_FIN) 1283 kprintf("F"); 1284 if (f & TH_SYN) 1285 kprintf("S"); 1286 if (f & TH_RST) 1287 kprintf("R"); 1288 if (f & TH_PUSH) 1289 kprintf("P"); 1290 if (f & TH_ACK) 1291 kprintf("A"); 1292 if (f & TH_URG) 1293 kprintf("U"); 1294 if (f & TH_ECE) 1295 kprintf("E"); 1296 if (f & TH_CWR) 1297 kprintf("W"); 1298 } 1299 1300 #define PF_SET_SKIP_STEPS(i) \ 1301 do { \ 1302 while (head[i] != cur) { \ 1303 head[i]->skip[i].ptr = cur; \ 1304 head[i] = TAILQ_NEXT(head[i], entries); \ 1305 } \ 1306 } while (0) 1307 1308 void 1309 pf_calc_skip_steps(struct pf_rulequeue *rules) 1310 { 1311 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1312 int i; 1313 1314 cur = TAILQ_FIRST(rules); 1315 prev = cur; 1316 for (i = 0; i < PF_SKIP_COUNT; ++i) 1317 head[i] = cur; 1318 while (cur != NULL) { 1319 1320 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1321 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1322 if (cur->direction != prev->direction) 1323 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1324 if (cur->af != prev->af) 1325 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1326 if (cur->proto != prev->proto) 1327 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1328 if (cur->src.neg != prev->src.neg || 1329 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1330 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1331 if (cur->src.port[0] != prev->src.port[0] || 1332 cur->src.port[1] != prev->src.port[1] || 1333 cur->src.port_op != prev->src.port_op) 1334 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1335 if (cur->dst.neg != prev->dst.neg || 1336 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1337 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1338 if (cur->dst.port[0] != prev->dst.port[0] || 1339 cur->dst.port[1] != prev->dst.port[1] || 1340 cur->dst.port_op != prev->dst.port_op) 1341 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1342 1343 prev = cur; 1344 cur = TAILQ_NEXT(cur, entries); 1345 } 1346 for (i = 0; i < PF_SKIP_COUNT; ++i) 1347 PF_SET_SKIP_STEPS(i); 1348 } 1349 1350 int 1351 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1352 { 1353 if (aw1->type != aw2->type) 1354 return (1); 1355 switch (aw1->type) { 1356 case PF_ADDR_ADDRMASK: 1357 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) 1358 return (1); 1359 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) 1360 return (1); 1361 return (0); 1362 case PF_ADDR_DYNIFTL: 1363 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1364 case PF_ADDR_NOROUTE: 1365 case PF_ADDR_URPFFAILED: 1366 return (0); 1367 case PF_ADDR_TABLE: 1368 return (aw1->p.tbl != aw2->p.tbl); 1369 case PF_ADDR_RTLABEL: 1370 return (aw1->v.rtlabel != aw2->v.rtlabel); 1371 default: 1372 kprintf("invalid address type: %d\n", aw1->type); 1373 return (1); 1374 } 1375 } 1376 1377 u_int16_t 1378 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) 1379 { 1380 u_int32_t l; 1381 1382 if (udp && !cksum) 1383 return (0x0000); 1384 l = cksum + old - new; 1385 l = (l >> 16) + (l & 65535); 1386 l = l & 65535; 1387 if (udp && !l) 1388 return (0xFFFF); 1389 return (l); 1390 } 1391 1392 void 1393 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, 1394 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) 1395 { 1396 struct pf_addr ao; 1397 u_int16_t po = *p; 1398 1399 PF_ACPY(&ao, a, af); 1400 PF_ACPY(a, an, af); 1401 1402 *p = pn; 1403 1404 switch (af) { 1405 #ifdef INET 1406 case AF_INET: 1407 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 1408 ao.addr16[0], an->addr16[0], 0), 1409 ao.addr16[1], an->addr16[1], 0); 1410 *p = pn; 1411 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1412 ao.addr16[0], an->addr16[0], u), 1413 ao.addr16[1], an->addr16[1], u), 1414 po, pn, u); 1415 break; 1416 #endif /* INET */ 1417 #ifdef INET6 1418 case AF_INET6: 1419 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1420 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1421 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1422 ao.addr16[0], an->addr16[0], u), 1423 ao.addr16[1], an->addr16[1], u), 1424 ao.addr16[2], an->addr16[2], u), 1425 ao.addr16[3], an->addr16[3], u), 1426 ao.addr16[4], an->addr16[4], u), 1427 ao.addr16[5], an->addr16[5], u), 1428 ao.addr16[6], an->addr16[6], u), 1429 ao.addr16[7], an->addr16[7], u), 1430 po, pn, u); 1431 break; 1432 #endif /* INET6 */ 1433 } 1434 } 1435 1436 1437 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ 1438 void 1439 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) 1440 { 1441 u_int32_t ao; 1442 1443 memcpy(&ao, a, sizeof(ao)); 1444 memcpy(a, &an, sizeof(u_int32_t)); 1445 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), 1446 ao % 65536, an % 65536, u); 1447 } 1448 1449 #ifdef INET6 1450 void 1451 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) 1452 { 1453 struct pf_addr ao; 1454 1455 PF_ACPY(&ao, a, AF_INET6); 1456 PF_ACPY(a, an, AF_INET6); 1457 1458 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1459 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1460 pf_cksum_fixup(pf_cksum_fixup(*c, 1461 ao.addr16[0], an->addr16[0], u), 1462 ao.addr16[1], an->addr16[1], u), 1463 ao.addr16[2], an->addr16[2], u), 1464 ao.addr16[3], an->addr16[3], u), 1465 ao.addr16[4], an->addr16[4], u), 1466 ao.addr16[5], an->addr16[5], u), 1467 ao.addr16[6], an->addr16[6], u), 1468 ao.addr16[7], an->addr16[7], u); 1469 } 1470 #endif /* INET6 */ 1471 1472 void 1473 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, 1474 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, 1475 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) 1476 { 1477 struct pf_addr oia, ooa; 1478 1479 PF_ACPY(&oia, ia, af); 1480 PF_ACPY(&ooa, oa, af); 1481 1482 /* Change inner protocol port, fix inner protocol checksum. */ 1483 if (ip != NULL) { 1484 u_int16_t oip = *ip; 1485 u_int32_t opc = 0; 1486 1487 if (pc != NULL) 1488 opc = *pc; 1489 *ip = np; 1490 if (pc != NULL) 1491 *pc = pf_cksum_fixup(*pc, oip, *ip, u); 1492 *ic = pf_cksum_fixup(*ic, oip, *ip, 0); 1493 if (pc != NULL) 1494 *ic = pf_cksum_fixup(*ic, opc, *pc, 0); 1495 } 1496 /* Change inner ip address, fix inner ip and icmp checksums. */ 1497 PF_ACPY(ia, na, af); 1498 switch (af) { 1499 #ifdef INET 1500 case AF_INET: { 1501 u_int32_t oh2c = *h2c; 1502 1503 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, 1504 oia.addr16[0], ia->addr16[0], 0), 1505 oia.addr16[1], ia->addr16[1], 0); 1506 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 1507 oia.addr16[0], ia->addr16[0], 0), 1508 oia.addr16[1], ia->addr16[1], 0); 1509 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); 1510 break; 1511 } 1512 #endif /* INET */ 1513 #ifdef INET6 1514 case AF_INET6: 1515 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1516 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1517 pf_cksum_fixup(pf_cksum_fixup(*ic, 1518 oia.addr16[0], ia->addr16[0], u), 1519 oia.addr16[1], ia->addr16[1], u), 1520 oia.addr16[2], ia->addr16[2], u), 1521 oia.addr16[3], ia->addr16[3], u), 1522 oia.addr16[4], ia->addr16[4], u), 1523 oia.addr16[5], ia->addr16[5], u), 1524 oia.addr16[6], ia->addr16[6], u), 1525 oia.addr16[7], ia->addr16[7], u); 1526 break; 1527 #endif /* INET6 */ 1528 } 1529 /* Change outer ip address, fix outer ip or icmpv6 checksum. */ 1530 PF_ACPY(oa, na, af); 1531 switch (af) { 1532 #ifdef INET 1533 case AF_INET: 1534 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, 1535 ooa.addr16[0], oa->addr16[0], 0), 1536 ooa.addr16[1], oa->addr16[1], 0); 1537 break; 1538 #endif /* INET */ 1539 #ifdef INET6 1540 case AF_INET6: 1541 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1542 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1543 pf_cksum_fixup(pf_cksum_fixup(*ic, 1544 ooa.addr16[0], oa->addr16[0], u), 1545 ooa.addr16[1], oa->addr16[1], u), 1546 ooa.addr16[2], oa->addr16[2], u), 1547 ooa.addr16[3], oa->addr16[3], u), 1548 ooa.addr16[4], oa->addr16[4], u), 1549 ooa.addr16[5], oa->addr16[5], u), 1550 ooa.addr16[6], oa->addr16[6], u), 1551 ooa.addr16[7], oa->addr16[7], u); 1552 break; 1553 #endif /* INET6 */ 1554 } 1555 } 1556 1557 1558 /* 1559 * Need to modulate the sequence numbers in the TCP SACK option 1560 * (credits to Krzysztof Pfaff for report and patch) 1561 */ 1562 int 1563 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, 1564 struct tcphdr *th, struct pf_state_peer *dst) 1565 { 1566 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; 1567 u_int8_t opts[TCP_MAXOLEN], *opt = opts; 1568 int copyback = 0, i, olen; 1569 struct raw_sackblock sack; 1570 1571 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) 1572 if (hlen < TCPOLEN_SACKLEN || 1573 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) 1574 return 0; 1575 1576 while (hlen >= TCPOLEN_SACKLEN) { 1577 olen = opt[1]; 1578 switch (*opt) { 1579 case TCPOPT_EOL: /* FALLTHROUGH */ 1580 case TCPOPT_NOP: 1581 opt++; 1582 hlen--; 1583 break; 1584 case TCPOPT_SACK: 1585 if (olen > hlen) 1586 olen = hlen; 1587 if (olen >= TCPOLEN_SACKLEN) { 1588 for (i = 2; i + TCPOLEN_SACK <= olen; 1589 i += TCPOLEN_SACK) { 1590 memcpy(&sack, &opt[i], sizeof(sack)); 1591 pf_change_a(&sack.rblk_start, &th->th_sum, 1592 htonl(ntohl(sack.rblk_start) - 1593 dst->seqdiff), 0); 1594 pf_change_a(&sack.rblk_end, &th->th_sum, 1595 htonl(ntohl(sack.rblk_end) - 1596 dst->seqdiff), 0); 1597 memcpy(&opt[i], &sack, sizeof(sack)); 1598 } 1599 copyback = 1; 1600 } 1601 /* FALLTHROUGH */ 1602 default: 1603 if (olen < 2) 1604 olen = 2; 1605 hlen -= olen; 1606 opt += olen; 1607 } 1608 } 1609 1610 if (copyback) 1611 m_copyback(m, off + sizeof(*th), thoptlen, opts); 1612 return (copyback); 1613 } 1614 1615 void 1616 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 1617 const struct pf_addr *saddr, const struct pf_addr *daddr, 1618 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 1619 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 1620 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) 1621 { 1622 struct mbuf *m; 1623 int len = 0, tlen; 1624 #ifdef INET 1625 struct ip *h = NULL; 1626 #endif /* INET */ 1627 #ifdef INET6 1628 struct ip6_hdr *h6 = NULL; 1629 #endif /* INET6 */ 1630 struct tcphdr *th = NULL; 1631 char *opt; 1632 1633 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1634 1635 /* maximum segment size tcp option */ 1636 tlen = sizeof(struct tcphdr); 1637 if (mss) 1638 tlen += 4; 1639 1640 switch (af) { 1641 #ifdef INET 1642 case AF_INET: 1643 len = sizeof(struct ip) + tlen; 1644 break; 1645 #endif /* INET */ 1646 #ifdef INET6 1647 case AF_INET6: 1648 len = sizeof(struct ip6_hdr) + tlen; 1649 break; 1650 #endif /* INET6 */ 1651 } 1652 1653 /* create outgoing mbuf */ 1654 m = m_gethdr(MB_DONTWAIT, MT_HEADER); 1655 if (m == NULL) { 1656 return; 1657 } 1658 if (tag) 1659 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1660 m->m_pkthdr.pf.tag = rtag; 1661 1662 if (r != NULL && r->rtableid >= 0) 1663 m->m_pkthdr.pf.rtableid = r->rtableid; 1664 1665 #ifdef ALTQ 1666 if (r != NULL && r->qid) { 1667 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 1668 m->m_pkthdr.pf.qid = r->qid; 1669 m->m_pkthdr.pf.ecn_af = af; 1670 m->m_pkthdr.pf.hdr = mtod(m, struct ip *); 1671 } 1672 #endif /* ALTQ */ 1673 m->m_data += max_linkhdr; 1674 m->m_pkthdr.len = m->m_len = len; 1675 m->m_pkthdr.rcvif = NULL; 1676 bzero(m->m_data, len); 1677 switch (af) { 1678 #ifdef INET 1679 case AF_INET: 1680 h = mtod(m, struct ip *); 1681 1682 /* IP header fields included in the TCP checksum */ 1683 h->ip_p = IPPROTO_TCP; 1684 h->ip_len = tlen; 1685 h->ip_src.s_addr = saddr->v4.s_addr; 1686 h->ip_dst.s_addr = daddr->v4.s_addr; 1687 1688 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 1689 break; 1690 #endif /* INET */ 1691 #ifdef INET6 1692 case AF_INET6: 1693 h6 = mtod(m, struct ip6_hdr *); 1694 1695 /* IP header fields included in the TCP checksum */ 1696 h6->ip6_nxt = IPPROTO_TCP; 1697 h6->ip6_plen = htons(tlen); 1698 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 1699 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 1700 1701 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 1702 break; 1703 #endif /* INET6 */ 1704 } 1705 1706 /* TCP header */ 1707 th->th_sport = sport; 1708 th->th_dport = dport; 1709 th->th_seq = htonl(seq); 1710 th->th_ack = htonl(ack); 1711 th->th_off = tlen >> 2; 1712 th->th_flags = flags; 1713 th->th_win = htons(win); 1714 1715 if (mss) { 1716 opt = (char *)(th + 1); 1717 opt[0] = TCPOPT_MAXSEG; 1718 opt[1] = 4; 1719 mss = htons(mss); 1720 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); 1721 } 1722 1723 switch (af) { 1724 #ifdef INET 1725 case AF_INET: 1726 /* TCP checksum */ 1727 th->th_sum = in_cksum(m, len); 1728 1729 /* Finish the IP header */ 1730 h->ip_v = 4; 1731 h->ip_hl = sizeof(*h) >> 2; 1732 h->ip_tos = IPTOS_LOWDELAY; 1733 h->ip_len = len; 1734 h->ip_off = path_mtu_discovery ? IP_DF : 0; 1735 h->ip_ttl = ttl ? ttl : ip_defttl; 1736 h->ip_sum = 0; 1737 if (eh == NULL) { 1738 lwkt_reltoken(&pf_token); 1739 ip_output(m, NULL, NULL, 0, NULL, NULL); 1740 lwkt_gettoken(&pf_token); 1741 } else { 1742 struct route ro; 1743 struct rtentry rt; 1744 struct ether_header *e = (void *)ro.ro_dst.sa_data; 1745 1746 if (ifp == NULL) { 1747 m_freem(m); 1748 return; 1749 } 1750 rt.rt_ifp = ifp; 1751 ro.ro_rt = &rt; 1752 ro.ro_dst.sa_len = sizeof(ro.ro_dst); 1753 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; 1754 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); 1755 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); 1756 e->ether_type = eh->ether_type; 1757 /* XXX_IMPORT: later */ 1758 lwkt_reltoken(&pf_token); 1759 ip_output(m, (void *)NULL, &ro, 0, 1760 (void *)NULL, (void *)NULL); 1761 lwkt_gettoken(&pf_token); 1762 } 1763 break; 1764 #endif /* INET */ 1765 #ifdef INET6 1766 case AF_INET6: 1767 /* TCP checksum */ 1768 th->th_sum = in6_cksum(m, IPPROTO_TCP, 1769 sizeof(struct ip6_hdr), tlen); 1770 1771 h6->ip6_vfc |= IPV6_VERSION; 1772 h6->ip6_hlim = IPV6_DEFHLIM; 1773 1774 lwkt_reltoken(&pf_token); 1775 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 1776 lwkt_gettoken(&pf_token); 1777 break; 1778 #endif /* INET6 */ 1779 } 1780 } 1781 1782 void 1783 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, 1784 struct pf_rule *r) 1785 { 1786 struct mbuf *m0; 1787 1788 m0 = m_copy(m, 0, M_COPYALL); 1789 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1790 1791 if (r->rtableid >= 0) 1792 m0->m_pkthdr.pf.rtableid = r->rtableid; 1793 1794 #ifdef ALTQ 1795 if (r->qid) { 1796 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 1797 m0->m_pkthdr.pf.qid = r->qid; 1798 m0->m_pkthdr.pf.ecn_af = af; 1799 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); 1800 } 1801 #endif /* ALTQ */ 1802 1803 switch (af) { 1804 #ifdef INET 1805 case AF_INET: 1806 icmp_error(m0, type, code, 0, 0); 1807 break; 1808 #endif /* INET */ 1809 #ifdef INET6 1810 case AF_INET6: 1811 icmp6_error(m0, type, code, 0); 1812 break; 1813 #endif /* INET6 */ 1814 } 1815 } 1816 1817 /* 1818 * Return 1 if the addresses a and b match (with mask m), otherwise return 0. 1819 * If n is 0, they match if they are equal. If n is != 0, they match if they 1820 * are different. 1821 */ 1822 int 1823 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 1824 struct pf_addr *b, sa_family_t af) 1825 { 1826 int match = 0; 1827 1828 switch (af) { 1829 #ifdef INET 1830 case AF_INET: 1831 if ((a->addr32[0] & m->addr32[0]) == 1832 (b->addr32[0] & m->addr32[0])) 1833 match++; 1834 break; 1835 #endif /* INET */ 1836 #ifdef INET6 1837 case AF_INET6: 1838 if (((a->addr32[0] & m->addr32[0]) == 1839 (b->addr32[0] & m->addr32[0])) && 1840 ((a->addr32[1] & m->addr32[1]) == 1841 (b->addr32[1] & m->addr32[1])) && 1842 ((a->addr32[2] & m->addr32[2]) == 1843 (b->addr32[2] & m->addr32[2])) && 1844 ((a->addr32[3] & m->addr32[3]) == 1845 (b->addr32[3] & m->addr32[3]))) 1846 match++; 1847 break; 1848 #endif /* INET6 */ 1849 } 1850 if (match) { 1851 if (n) 1852 return (0); 1853 else 1854 return (1); 1855 } else { 1856 if (n) 1857 return (1); 1858 else 1859 return (0); 1860 } 1861 } 1862 1863 int 1864 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 1865 { 1866 switch (op) { 1867 case PF_OP_IRG: 1868 return ((p > a1) && (p < a2)); 1869 case PF_OP_XRG: 1870 return ((p < a1) || (p > a2)); 1871 case PF_OP_RRG: 1872 return ((p >= a1) && (p <= a2)); 1873 case PF_OP_EQ: 1874 return (p == a1); 1875 case PF_OP_NE: 1876 return (p != a1); 1877 case PF_OP_LT: 1878 return (p < a1); 1879 case PF_OP_LE: 1880 return (p <= a1); 1881 case PF_OP_GT: 1882 return (p > a1); 1883 case PF_OP_GE: 1884 return (p >= a1); 1885 } 1886 return (0); /* never reached */ 1887 } 1888 1889 int 1890 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 1891 { 1892 a1 = ntohs(a1); 1893 a2 = ntohs(a2); 1894 p = ntohs(p); 1895 return (pf_match(op, a1, a2, p)); 1896 } 1897 1898 int 1899 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 1900 { 1901 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 1902 return (0); 1903 return (pf_match(op, a1, a2, u)); 1904 } 1905 1906 int 1907 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 1908 { 1909 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 1910 return (0); 1911 return (pf_match(op, a1, a2, g)); 1912 } 1913 1914 int 1915 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 1916 { 1917 if (*tag == -1) 1918 *tag = m->m_pkthdr.pf.tag; 1919 1920 return ((!r->match_tag_not && r->match_tag == *tag) || 1921 (r->match_tag_not && r->match_tag != *tag)); 1922 } 1923 1924 int 1925 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 1926 { 1927 if (tag <= 0 && rtableid < 0) 1928 return (0); 1929 1930 if (tag > 0) 1931 m->m_pkthdr.pf.tag = tag; 1932 if (rtableid >= 0) 1933 m->m_pkthdr.pf.rtableid = rtableid; 1934 1935 return (0); 1936 } 1937 1938 void 1939 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, 1940 struct pf_rule **r, struct pf_rule **a, int *match) 1941 { 1942 struct pf_anchor_stackframe *f; 1943 1944 (*r)->anchor->match = 0; 1945 if (match) 1946 *match = 0; 1947 if (*depth >= sizeof(pf_anchor_stack) / 1948 sizeof(pf_anchor_stack[0])) { 1949 kprintf("pf_step_into_anchor: stack overflow\n"); 1950 *r = TAILQ_NEXT(*r, entries); 1951 return; 1952 } else if (*depth == 0 && a != NULL) 1953 *a = *r; 1954 f = pf_anchor_stack + (*depth)++; 1955 f->rs = *rs; 1956 f->r = *r; 1957 if ((*r)->anchor_wildcard) { 1958 f->parent = &(*r)->anchor->children; 1959 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == 1960 NULL) { 1961 *r = NULL; 1962 return; 1963 } 1964 *rs = &f->child->ruleset; 1965 } else { 1966 f->parent = NULL; 1967 f->child = NULL; 1968 *rs = &(*r)->anchor->ruleset; 1969 } 1970 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 1971 } 1972 1973 int 1974 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, 1975 struct pf_rule **r, struct pf_rule **a, int *match) 1976 { 1977 struct pf_anchor_stackframe *f; 1978 int quick = 0; 1979 1980 do { 1981 if (*depth <= 0) 1982 break; 1983 f = pf_anchor_stack + *depth - 1; 1984 if (f->parent != NULL && f->child != NULL) { 1985 if (f->child->match || 1986 (match != NULL && *match)) { 1987 f->r->anchor->match = 1; 1988 *match = 0; 1989 } 1990 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); 1991 if (f->child != NULL) { 1992 *rs = &f->child->ruleset; 1993 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 1994 if (*r == NULL) 1995 continue; 1996 else 1997 break; 1998 } 1999 } 2000 (*depth)--; 2001 if (*depth == 0 && a != NULL) 2002 *a = NULL; 2003 *rs = f->rs; 2004 if (f->r->anchor->match || (match != NULL && *match)) 2005 quick = f->r->quick; 2006 *r = TAILQ_NEXT(f->r, entries); 2007 } while (*r == NULL); 2008 2009 return (quick); 2010 } 2011 2012 #ifdef INET6 2013 void 2014 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 2015 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 2016 { 2017 switch (af) { 2018 #ifdef INET 2019 case AF_INET: 2020 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2021 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2022 break; 2023 #endif /* INET */ 2024 case AF_INET6: 2025 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2026 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2027 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 2028 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 2029 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 2030 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 2031 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 2032 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 2033 break; 2034 } 2035 } 2036 2037 void 2038 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 2039 { 2040 switch (af) { 2041 #ifdef INET 2042 case AF_INET: 2043 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 2044 break; 2045 #endif /* INET */ 2046 case AF_INET6: 2047 if (addr->addr32[3] == 0xffffffff) { 2048 addr->addr32[3] = 0; 2049 if (addr->addr32[2] == 0xffffffff) { 2050 addr->addr32[2] = 0; 2051 if (addr->addr32[1] == 0xffffffff) { 2052 addr->addr32[1] = 0; 2053 addr->addr32[0] = 2054 htonl(ntohl(addr->addr32[0]) + 1); 2055 } else 2056 addr->addr32[1] = 2057 htonl(ntohl(addr->addr32[1]) + 1); 2058 } else 2059 addr->addr32[2] = 2060 htonl(ntohl(addr->addr32[2]) + 1); 2061 } else 2062 addr->addr32[3] = 2063 htonl(ntohl(addr->addr32[3]) + 1); 2064 break; 2065 } 2066 } 2067 #endif /* INET6 */ 2068 2069 #define mix(a,b,c) \ 2070 do { \ 2071 a -= b; a -= c; a ^= (c >> 13); \ 2072 b -= c; b -= a; b ^= (a << 8); \ 2073 c -= a; c -= b; c ^= (b >> 13); \ 2074 a -= b; a -= c; a ^= (c >> 12); \ 2075 b -= c; b -= a; b ^= (a << 16); \ 2076 c -= a; c -= b; c ^= (b >> 5); \ 2077 a -= b; a -= c; a ^= (c >> 3); \ 2078 b -= c; b -= a; b ^= (a << 10); \ 2079 c -= a; c -= b; c ^= (b >> 15); \ 2080 } while (0) 2081 2082 /* 2083 * hash function based on bridge_hash in if_bridge.c 2084 */ 2085 void 2086 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 2087 struct pf_poolhashkey *key, sa_family_t af) 2088 { 2089 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; 2090 2091 switch (af) { 2092 #ifdef INET 2093 case AF_INET: 2094 a += inaddr->addr32[0]; 2095 b += key->key32[1]; 2096 mix(a, b, c); 2097 hash->addr32[0] = c + key->key32[2]; 2098 break; 2099 #endif /* INET */ 2100 #ifdef INET6 2101 case AF_INET6: 2102 a += inaddr->addr32[0]; 2103 b += inaddr->addr32[2]; 2104 mix(a, b, c); 2105 hash->addr32[0] = c; 2106 a += inaddr->addr32[1]; 2107 b += inaddr->addr32[3]; 2108 c += key->key32[1]; 2109 mix(a, b, c); 2110 hash->addr32[1] = c; 2111 a += inaddr->addr32[2]; 2112 b += inaddr->addr32[1]; 2113 c += key->key32[2]; 2114 mix(a, b, c); 2115 hash->addr32[2] = c; 2116 a += inaddr->addr32[3]; 2117 b += inaddr->addr32[0]; 2118 c += key->key32[3]; 2119 mix(a, b, c); 2120 hash->addr32[3] = c; 2121 break; 2122 #endif /* INET6 */ 2123 } 2124 } 2125 2126 int 2127 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 2128 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) 2129 { 2130 unsigned char hash[16]; 2131 struct pf_pool *rpool = &r->rpool; 2132 struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; 2133 struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; 2134 struct pf_pooladdr *acur = rpool->cur; 2135 struct pf_src_node k; 2136 2137 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && 2138 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2139 k.af = af; 2140 PF_ACPY(&k.addr, saddr, af); 2141 if (r->rule_flag & PFRULE_RULESRCTRACK || 2142 r->rpool.opts & PF_POOL_STICKYADDR) 2143 k.rule.ptr = r; 2144 else 2145 k.rule.ptr = NULL; 2146 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 2147 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 2148 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { 2149 PF_ACPY(naddr, &(*sn)->raddr, af); 2150 if (pf_status.debug >= PF_DEBUG_MISC) { 2151 kprintf("pf_map_addr: src tracking maps "); 2152 pf_print_host(&k.addr, 0, af); 2153 kprintf(" to "); 2154 pf_print_host(naddr, 0, af); 2155 kprintf("\n"); 2156 } 2157 return (0); 2158 } 2159 } 2160 2161 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) 2162 return (1); 2163 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 2164 switch (af) { 2165 #ifdef INET 2166 case AF_INET: 2167 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 2168 (rpool->opts & PF_POOL_TYPEMASK) != 2169 PF_POOL_ROUNDROBIN) 2170 return (1); 2171 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 2172 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 2173 break; 2174 #endif /* INET */ 2175 #ifdef INET6 2176 case AF_INET6: 2177 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 2178 (rpool->opts & PF_POOL_TYPEMASK) != 2179 PF_POOL_ROUNDROBIN) 2180 return (1); 2181 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 2182 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 2183 break; 2184 #endif /* INET6 */ 2185 } 2186 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 2187 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) 2188 return (1); /* unsupported */ 2189 } else { 2190 raddr = &rpool->cur->addr.v.a.addr; 2191 rmask = &rpool->cur->addr.v.a.mask; 2192 } 2193 2194 switch (rpool->opts & PF_POOL_TYPEMASK) { 2195 case PF_POOL_NONE: 2196 PF_ACPY(naddr, raddr, af); 2197 break; 2198 case PF_POOL_BITMASK: 2199 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 2200 break; 2201 case PF_POOL_RANDOM: 2202 if (init_addr != NULL && PF_AZERO(init_addr, af)) { 2203 switch (af) { 2204 #ifdef INET 2205 case AF_INET: 2206 rpool->counter.addr32[0] = htonl(karc4random()); 2207 break; 2208 #endif /* INET */ 2209 #ifdef INET6 2210 case AF_INET6: 2211 if (rmask->addr32[3] != 0xffffffff) 2212 rpool->counter.addr32[3] = 2213 htonl(karc4random()); 2214 else 2215 break; 2216 if (rmask->addr32[2] != 0xffffffff) 2217 rpool->counter.addr32[2] = 2218 htonl(karc4random()); 2219 else 2220 break; 2221 if (rmask->addr32[1] != 0xffffffff) 2222 rpool->counter.addr32[1] = 2223 htonl(karc4random()); 2224 else 2225 break; 2226 if (rmask->addr32[0] != 0xffffffff) 2227 rpool->counter.addr32[0] = 2228 htonl(karc4random()); 2229 break; 2230 #endif /* INET6 */ 2231 } 2232 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 2233 PF_ACPY(init_addr, naddr, af); 2234 2235 } else { 2236 PF_AINC(&rpool->counter, af); 2237 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 2238 } 2239 break; 2240 case PF_POOL_SRCHASH: 2241 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 2242 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); 2243 break; 2244 case PF_POOL_ROUNDROBIN: 2245 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 2246 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 2247 &rpool->tblidx, &rpool->counter, 2248 &raddr, &rmask, af)) 2249 goto get_addr; 2250 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 2251 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 2252 &rpool->tblidx, &rpool->counter, 2253 &raddr, &rmask, af)) 2254 goto get_addr; 2255 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 2256 goto get_addr; 2257 2258 try_next: 2259 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) 2260 rpool->cur = TAILQ_FIRST(&rpool->list); 2261 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 2262 rpool->tblidx = -1; 2263 if (pfr_pool_get(rpool->cur->addr.p.tbl, 2264 &rpool->tblidx, &rpool->counter, 2265 &raddr, &rmask, af)) { 2266 /* table contains no address of type 'af' */ 2267 if (rpool->cur != acur) 2268 goto try_next; 2269 return (1); 2270 } 2271 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 2272 rpool->tblidx = -1; 2273 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 2274 &rpool->tblidx, &rpool->counter, 2275 &raddr, &rmask, af)) { 2276 /* table contains no address of type 'af' */ 2277 if (rpool->cur != acur) 2278 goto try_next; 2279 return (1); 2280 } 2281 } else { 2282 raddr = &rpool->cur->addr.v.a.addr; 2283 rmask = &rpool->cur->addr.v.a.mask; 2284 PF_ACPY(&rpool->counter, raddr, af); 2285 } 2286 2287 get_addr: 2288 PF_ACPY(naddr, &rpool->counter, af); 2289 if (init_addr != NULL && PF_AZERO(init_addr, af)) 2290 PF_ACPY(init_addr, naddr, af); 2291 PF_AINC(&rpool->counter, af); 2292 break; 2293 } 2294 if (*sn != NULL) 2295 PF_ACPY(&(*sn)->raddr, naddr, af); 2296 2297 if (pf_status.debug >= PF_DEBUG_MISC && 2298 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2299 kprintf("pf_map_addr: selected address "); 2300 pf_print_host(naddr, 0, af); 2301 kprintf("\n"); 2302 } 2303 2304 return (0); 2305 } 2306 2307 int 2308 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, 2309 struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, 2310 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, 2311 struct pf_src_node **sn) 2312 { 2313 struct pf_state_key_cmp key; 2314 struct pf_addr init_addr; 2315 u_int16_t cut; 2316 2317 bzero(&init_addr, sizeof(init_addr)); 2318 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 2319 return (1); 2320 2321 if (proto == IPPROTO_ICMP) { 2322 low = 1; 2323 high = 65535; 2324 } 2325 2326 do { 2327 key.af = af; 2328 key.proto = proto; 2329 PF_ACPY(&key.ext.addr, daddr, key.af); 2330 PF_ACPY(&key.gwy.addr, naddr, key.af); 2331 key.ext.port = dport; 2332 2333 /* 2334 * port search; start random, step; 2335 * similar 2 portloop in in_pcbbind 2336 */ 2337 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || 2338 proto == IPPROTO_ICMP)) { 2339 key.gwy.port = dport; 2340 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) 2341 return (0); 2342 } else if (low == 0 && high == 0) { 2343 key.gwy.port = *nport; 2344 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) 2345 return (0); 2346 } else if (low == high) { 2347 key.gwy.port = htons(low); 2348 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) { 2349 *nport = htons(low); 2350 return (0); 2351 } 2352 } else { 2353 u_int16_t tmp; 2354 2355 if (low > high) { 2356 tmp = low; 2357 low = high; 2358 high = tmp; 2359 } 2360 /* low < high */ 2361 cut = htonl(karc4random()) % (1 + high - low) + low; 2362 /* low <= cut <= high */ 2363 for (tmp = cut; tmp <= high; ++(tmp)) { 2364 key.gwy.port = htons(tmp); 2365 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == 2366 NULL) { 2367 *nport = htons(tmp); 2368 return (0); 2369 } 2370 } 2371 for (tmp = cut - 1; tmp >= low; --(tmp)) { 2372 key.gwy.port = htons(tmp); 2373 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == 2374 NULL) { 2375 *nport = htons(tmp); 2376 return (0); 2377 } 2378 } 2379 } 2380 2381 switch (r->rpool.opts & PF_POOL_TYPEMASK) { 2382 case PF_POOL_RANDOM: 2383 case PF_POOL_ROUNDROBIN: 2384 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 2385 return (1); 2386 break; 2387 case PF_POOL_NONE: 2388 case PF_POOL_SRCHASH: 2389 case PF_POOL_BITMASK: 2390 default: 2391 return (1); 2392 } 2393 } while (! PF_AEQ(&init_addr, naddr, af) ); 2394 2395 return (1); /* none available */ 2396 } 2397 2398 struct pf_rule * 2399 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, 2400 int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, 2401 struct pf_addr *daddr, u_int16_t dport, int rs_num) 2402 { 2403 struct pf_rule *r, *rm = NULL; 2404 struct pf_ruleset *ruleset = NULL; 2405 int tag = -1; 2406 int rtableid = -1; 2407 int asd = 0; 2408 2409 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); 2410 while (r && rm == NULL) { 2411 struct pf_rule_addr *src = NULL, *dst = NULL; 2412 struct pf_addr_wrap *xdst = NULL; 2413 2414 if (r->action == PF_BINAT && direction == PF_IN) { 2415 src = &r->dst; 2416 if (r->rpool.cur != NULL) 2417 xdst = &r->rpool.cur->addr; 2418 } else { 2419 src = &r->src; 2420 dst = &r->dst; 2421 } 2422 2423 r->evaluations++; 2424 if (pfi_kif_match(r->kif, kif) == r->ifnot) 2425 r = r->skip[PF_SKIP_IFP].ptr; 2426 else if (r->direction && r->direction != direction) 2427 r = r->skip[PF_SKIP_DIR].ptr; 2428 else if (r->af && r->af != pd->af) 2429 r = r->skip[PF_SKIP_AF].ptr; 2430 else if (r->proto && r->proto != pd->proto) 2431 r = r->skip[PF_SKIP_PROTO].ptr; 2432 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, 2433 src->neg, kif)) 2434 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 2435 PF_SKIP_DST_ADDR].ptr; 2436 else if (src->port_op && !pf_match_port(src->port_op, 2437 src->port[0], src->port[1], sport)) 2438 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 2439 PF_SKIP_DST_PORT].ptr; 2440 else if (dst != NULL && 2441 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) 2442 r = r->skip[PF_SKIP_DST_ADDR].ptr; 2443 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 2444 0, NULL)) 2445 r = TAILQ_NEXT(r, entries); 2446 else if (dst != NULL && dst->port_op && 2447 !pf_match_port(dst->port_op, dst->port[0], 2448 dst->port[1], dport)) 2449 r = r->skip[PF_SKIP_DST_PORT].ptr; 2450 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 2451 r = TAILQ_NEXT(r, entries); 2452 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 2453 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, 2454 off, pd->hdr.tcp), r->os_fingerprint))) 2455 r = TAILQ_NEXT(r, entries); 2456 else { 2457 if (r->tag) 2458 tag = r->tag; 2459 if (r->rtableid >= 0) 2460 rtableid = r->rtableid; 2461 if (r->anchor == NULL) { 2462 rm = r; 2463 } else 2464 pf_step_into_anchor(&asd, &ruleset, rs_num, 2465 &r, NULL, NULL); 2466 } 2467 if (r == NULL) 2468 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, 2469 NULL, NULL); 2470 } 2471 if (pf_tag_packet(m, tag, rtableid)) 2472 return (NULL); 2473 if (rm != NULL && (rm->action == PF_NONAT || 2474 rm->action == PF_NORDR || rm->action == PF_NOBINAT)) 2475 return (NULL); 2476 return (rm); 2477 } 2478 2479 struct pf_rule * 2480 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, 2481 struct pfi_kif *kif, struct pf_src_node **sn, 2482 struct pf_addr *saddr, u_int16_t sport, 2483 struct pf_addr *daddr, u_int16_t dport, 2484 struct pf_addr *naddr, u_int16_t *nport) 2485 { 2486 struct pf_rule *r = NULL; 2487 2488 if (direction == PF_OUT) { 2489 r = pf_match_translation(pd, m, off, direction, kif, saddr, 2490 sport, daddr, dport, PF_RULESET_BINAT); 2491 if (r == NULL) 2492 r = pf_match_translation(pd, m, off, direction, kif, 2493 saddr, sport, daddr, dport, PF_RULESET_NAT); 2494 } else { 2495 r = pf_match_translation(pd, m, off, direction, kif, saddr, 2496 sport, daddr, dport, PF_RULESET_RDR); 2497 if (r == NULL) 2498 r = pf_match_translation(pd, m, off, direction, kif, 2499 saddr, sport, daddr, dport, PF_RULESET_BINAT); 2500 } 2501 2502 if (r != NULL) { 2503 switch (r->action) { 2504 case PF_NONAT: 2505 case PF_NOBINAT: 2506 case PF_NORDR: 2507 return (NULL); 2508 case PF_NAT: 2509 if (pf_get_sport(pd->af, pd->proto, r, saddr, 2510 daddr, dport, naddr, nport, r->rpool.proxy_port[0], 2511 r->rpool.proxy_port[1], sn)) { 2512 DPFPRINTF(PF_DEBUG_MISC, 2513 ("pf: NAT proxy port allocation " 2514 "(%u-%u) failed\n", 2515 r->rpool.proxy_port[0], 2516 r->rpool.proxy_port[1])); 2517 return (NULL); 2518 } 2519 break; 2520 case PF_BINAT: 2521 switch (direction) { 2522 case PF_OUT: 2523 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ 2524 switch (pd->af) { 2525 #ifdef INET 2526 case AF_INET: 2527 if (r->rpool.cur->addr.p.dyn-> 2528 pfid_acnt4 < 1) 2529 return (NULL); 2530 PF_POOLMASK(naddr, 2531 &r->rpool.cur->addr.p.dyn-> 2532 pfid_addr4, 2533 &r->rpool.cur->addr.p.dyn-> 2534 pfid_mask4, 2535 saddr, AF_INET); 2536 break; 2537 #endif /* INET */ 2538 #ifdef INET6 2539 case AF_INET6: 2540 if (r->rpool.cur->addr.p.dyn-> 2541 pfid_acnt6 < 1) 2542 return (NULL); 2543 PF_POOLMASK(naddr, 2544 &r->rpool.cur->addr.p.dyn-> 2545 pfid_addr6, 2546 &r->rpool.cur->addr.p.dyn-> 2547 pfid_mask6, 2548 saddr, AF_INET6); 2549 break; 2550 #endif /* INET6 */ 2551 } 2552 } else 2553 PF_POOLMASK(naddr, 2554 &r->rpool.cur->addr.v.a.addr, 2555 &r->rpool.cur->addr.v.a.mask, 2556 saddr, pd->af); 2557 break; 2558 case PF_IN: 2559 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 2560 switch (pd->af) { 2561 #ifdef INET 2562 case AF_INET: 2563 if (r->src.addr.p.dyn-> 2564 pfid_acnt4 < 1) 2565 return (NULL); 2566 PF_POOLMASK(naddr, 2567 &r->src.addr.p.dyn-> 2568 pfid_addr4, 2569 &r->src.addr.p.dyn-> 2570 pfid_mask4, 2571 daddr, AF_INET); 2572 break; 2573 #endif /* INET */ 2574 #ifdef INET6 2575 case AF_INET6: 2576 if (r->src.addr.p.dyn-> 2577 pfid_acnt6 < 1) 2578 return (NULL); 2579 PF_POOLMASK(naddr, 2580 &r->src.addr.p.dyn-> 2581 pfid_addr6, 2582 &r->src.addr.p.dyn-> 2583 pfid_mask6, 2584 daddr, AF_INET6); 2585 break; 2586 #endif /* INET6 */ 2587 } 2588 } else 2589 PF_POOLMASK(naddr, 2590 &r->src.addr.v.a.addr, 2591 &r->src.addr.v.a.mask, daddr, 2592 pd->af); 2593 break; 2594 } 2595 break; 2596 case PF_RDR: { 2597 if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) 2598 return (NULL); 2599 if ((r->rpool.opts & PF_POOL_TYPEMASK) == 2600 PF_POOL_BITMASK) 2601 PF_POOLMASK(naddr, naddr, 2602 &r->rpool.cur->addr.v.a.mask, daddr, 2603 pd->af); 2604 2605 if (r->rpool.proxy_port[1]) { 2606 u_int32_t tmp_nport; 2607 2608 tmp_nport = ((ntohs(dport) - 2609 ntohs(r->dst.port[0])) % 2610 (r->rpool.proxy_port[1] - 2611 r->rpool.proxy_port[0] + 1)) + 2612 r->rpool.proxy_port[0]; 2613 2614 /* wrap around if necessary */ 2615 if (tmp_nport > 65535) 2616 tmp_nport -= 65535; 2617 *nport = htons((u_int16_t)tmp_nport); 2618 } else if (r->rpool.proxy_port[0]) 2619 *nport = htons(r->rpool.proxy_port[0]); 2620 break; 2621 } 2622 default: 2623 return (NULL); 2624 } 2625 } 2626 2627 return (r); 2628 } 2629 2630 #ifdef SMP 2631 struct netmsg_hashlookup { 2632 struct netmsg_base base; 2633 struct inpcb **nm_pinp; 2634 struct inpcbinfo *nm_pcbinfo; 2635 struct pf_addr *nm_saddr; 2636 struct pf_addr *nm_daddr; 2637 uint16_t nm_sport; 2638 uint16_t nm_dport; 2639 sa_family_t nm_af; 2640 }; 2641 2642 static void 2643 in_pcblookup_hash_handler(netmsg_t msg) 2644 { 2645 struct netmsg_hashlookup *rmsg = (struct netmsg_hashlookup *)msg; 2646 2647 if (rmsg->nm_af == AF_INET) 2648 *rmsg->nm_pinp = in_pcblookup_hash(rmsg->nm_pcbinfo, 2649 rmsg->nm_saddr->v4, rmsg->nm_sport, rmsg->nm_daddr->v4, 2650 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL); 2651 #ifdef INET6 2652 else 2653 *rmsg->nm_pinp = in6_pcblookup_hash(rmsg->nm_pcbinfo, 2654 &rmsg->nm_saddr->v6, rmsg->nm_sport, &rmsg->nm_daddr->v6, 2655 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL); 2656 #endif /* INET6 */ 2657 lwkt_replymsg(&rmsg->base.lmsg, 0); 2658 } 2659 #endif /* SMP */ 2660 2661 int 2662 pf_socket_lookup(int direction, struct pf_pdesc *pd) 2663 { 2664 struct pf_addr *saddr, *daddr; 2665 u_int16_t sport, dport; 2666 struct inpcbinfo *pi; 2667 struct inpcb *inp; 2668 #ifdef SMP 2669 struct netmsg_hashlookup *msg = NULL; 2670 #endif 2671 int pi_cpu = 0; 2672 2673 if (pd == NULL) 2674 return (-1); 2675 pd->lookup.uid = UID_MAX; 2676 pd->lookup.gid = GID_MAX; 2677 pd->lookup.pid = NO_PID; 2678 if (direction == PF_IN) { 2679 saddr = pd->src; 2680 daddr = pd->dst; 2681 } else { 2682 saddr = pd->dst; 2683 daddr = pd->src; 2684 } 2685 switch (pd->proto) { 2686 case IPPROTO_TCP: 2687 if (pd->hdr.tcp == NULL) 2688 return (-1); 2689 sport = pd->hdr.tcp->th_sport; 2690 dport = pd->hdr.tcp->th_dport; 2691 2692 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport); 2693 pi = &tcbinfo[pi_cpu]; 2694 #ifdef SMP 2695 /* 2696 * Our netstack runs lockless on MP systems 2697 * (only for TCP connections at the moment). 2698 * 2699 * As we are not allowed to read another CPU's tcbinfo, 2700 * we have to ask that CPU via remote call to search the 2701 * table for us. 2702 * 2703 * Prepare a msg iff data belongs to another CPU. 2704 */ 2705 if (pi_cpu != mycpu->gd_cpuid) { 2706 msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_INTWAIT); 2707 netmsg_init(&msg->base, NULL, &netisr_afree_rport, 2708 0, in_pcblookup_hash_handler); 2709 msg->nm_pinp = &inp; 2710 msg->nm_pcbinfo = pi; 2711 msg->nm_saddr = saddr; 2712 msg->nm_sport = sport; 2713 msg->nm_daddr = daddr; 2714 msg->nm_dport = dport; 2715 msg->nm_af = pd->af; 2716 } 2717 #endif /* SMP */ 2718 break; 2719 case IPPROTO_UDP: 2720 if (pd->hdr.udp == NULL) 2721 return (-1); 2722 sport = pd->hdr.udp->uh_sport; 2723 dport = pd->hdr.udp->uh_dport; 2724 pi = &udbinfo; 2725 break; 2726 default: 2727 return (-1); 2728 } 2729 if (direction != PF_IN) { 2730 u_int16_t p; 2731 2732 p = sport; 2733 sport = dport; 2734 dport = p; 2735 } 2736 switch (pd->af) { 2737 #ifdef INET6 2738 case AF_INET6: 2739 #ifdef SMP 2740 /* 2741 * Query other CPU, second part 2742 * 2743 * msg only gets initialized when: 2744 * 1) packet is TCP 2745 * 2) the info belongs to another CPU 2746 * 2747 * Use some switch/case magic to avoid code duplication. 2748 */ 2749 if (msg == NULL) 2750 #endif /* SMP */ 2751 { 2752 inp = in6_pcblookup_hash(pi, &saddr->v6, sport, 2753 &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL); 2754 2755 if (inp == NULL) 2756 return (-1); 2757 break; 2758 } 2759 /* FALLTHROUGH if SMP and on other CPU */ 2760 #endif /* INET6 */ 2761 case AF_INET: 2762 #ifdef SMP 2763 if (msg != NULL) { 2764 lwkt_domsg(cpu_portfn(pi_cpu), 2765 &msg->base.lmsg, 0); 2766 } else 2767 #endif /* SMP */ 2768 { 2769 inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, 2770 dport, INPLOOKUP_WILDCARD, NULL); 2771 } 2772 if (inp == NULL) 2773 return (-1); 2774 break; 2775 2776 default: 2777 return (-1); 2778 } 2779 pd->lookup.uid = inp->inp_socket->so_cred->cr_uid; 2780 pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0]; 2781 return (1); 2782 } 2783 2784 u_int8_t 2785 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 2786 { 2787 int hlen; 2788 u_int8_t hdr[60]; 2789 u_int8_t *opt, optlen; 2790 u_int8_t wscale = 0; 2791 2792 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 2793 if (hlen <= sizeof(struct tcphdr)) 2794 return (0); 2795 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 2796 return (0); 2797 opt = hdr + sizeof(struct tcphdr); 2798 hlen -= sizeof(struct tcphdr); 2799 while (hlen >= 3) { 2800 switch (*opt) { 2801 case TCPOPT_EOL: 2802 case TCPOPT_NOP: 2803 ++opt; 2804 --hlen; 2805 break; 2806 case TCPOPT_WINDOW: 2807 wscale = opt[2]; 2808 if (wscale > TCP_MAX_WINSHIFT) 2809 wscale = TCP_MAX_WINSHIFT; 2810 wscale |= PF_WSCALE_FLAG; 2811 /* FALLTHROUGH */ 2812 default: 2813 optlen = opt[1]; 2814 if (optlen < 2) 2815 optlen = 2; 2816 hlen -= optlen; 2817 opt += optlen; 2818 break; 2819 } 2820 } 2821 return (wscale); 2822 } 2823 2824 u_int16_t 2825 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 2826 { 2827 int hlen; 2828 u_int8_t hdr[60]; 2829 u_int8_t *opt, optlen; 2830 u_int16_t mss = tcp_mssdflt; 2831 2832 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 2833 if (hlen <= sizeof(struct tcphdr)) 2834 return (0); 2835 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 2836 return (0); 2837 opt = hdr + sizeof(struct tcphdr); 2838 hlen -= sizeof(struct tcphdr); 2839 while (hlen >= TCPOLEN_MAXSEG) { 2840 switch (*opt) { 2841 case TCPOPT_EOL: 2842 case TCPOPT_NOP: 2843 ++opt; 2844 --hlen; 2845 break; 2846 case TCPOPT_MAXSEG: 2847 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); 2848 /* FALLTHROUGH */ 2849 default: 2850 optlen = opt[1]; 2851 if (optlen < 2) 2852 optlen = 2; 2853 hlen -= optlen; 2854 opt += optlen; 2855 break; 2856 } 2857 } 2858 return (mss); 2859 } 2860 2861 u_int16_t 2862 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) 2863 { 2864 #ifdef INET 2865 struct sockaddr_in *dst; 2866 struct route ro; 2867 #endif /* INET */ 2868 #ifdef INET6 2869 struct sockaddr_in6 *dst6; 2870 struct route_in6 ro6; 2871 #endif /* INET6 */ 2872 struct rtentry *rt = NULL; 2873 int hlen = 0; 2874 u_int16_t mss = tcp_mssdflt; 2875 2876 switch (af) { 2877 #ifdef INET 2878 case AF_INET: 2879 hlen = sizeof(struct ip); 2880 bzero(&ro, sizeof(ro)); 2881 dst = (struct sockaddr_in *)&ro.ro_dst; 2882 dst->sin_family = AF_INET; 2883 dst->sin_len = sizeof(*dst); 2884 dst->sin_addr = addr->v4; 2885 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); 2886 rt = ro.ro_rt; 2887 break; 2888 #endif /* INET */ 2889 #ifdef INET6 2890 case AF_INET6: 2891 hlen = sizeof(struct ip6_hdr); 2892 bzero(&ro6, sizeof(ro6)); 2893 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; 2894 dst6->sin6_family = AF_INET6; 2895 dst6->sin6_len = sizeof(*dst6); 2896 dst6->sin6_addr = addr->v6; 2897 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING)); 2898 rt = ro6.ro_rt; 2899 break; 2900 #endif /* INET6 */ 2901 } 2902 2903 if (rt && rt->rt_ifp) { 2904 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); 2905 mss = max(tcp_mssdflt, mss); 2906 RTFREE(rt); 2907 } 2908 mss = min(mss, offer); 2909 mss = max(mss, 64); /* sanity - at least max opt space */ 2910 return (mss); 2911 } 2912 2913 void 2914 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) 2915 { 2916 struct pf_rule *r = s->rule.ptr; 2917 2918 s->rt_kif = NULL; 2919 if (!r->rt || r->rt == PF_FASTROUTE) 2920 return; 2921 switch (s->state_key->af) { 2922 #ifdef INET 2923 case AF_INET: 2924 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, 2925 &s->nat_src_node); 2926 s->rt_kif = r->rpool.cur->kif; 2927 break; 2928 #endif /* INET */ 2929 #ifdef INET6 2930 case AF_INET6: 2931 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, 2932 &s->nat_src_node); 2933 s->rt_kif = r->rpool.cur->kif; 2934 break; 2935 #endif /* INET6 */ 2936 } 2937 } 2938 2939 void 2940 pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail) 2941 { 2942 s->state_key = sk; 2943 sk->refcnt++; 2944 2945 /* list is sorted, if-bound states before floating */ 2946 if (tail) 2947 TAILQ_INSERT_TAIL(&sk->states, s, next); 2948 else 2949 TAILQ_INSERT_HEAD(&sk->states, s, next); 2950 } 2951 2952 void 2953 pf_detach_state(struct pf_state *s, int flags) 2954 { 2955 struct pf_state_key *sk = s->state_key; 2956 2957 if (sk == NULL) 2958 return; 2959 2960 s->state_key = NULL; 2961 TAILQ_REMOVE(&sk->states, s, next); 2962 if (--sk->refcnt == 0) { 2963 if (!(flags & PF_DT_SKIP_EXTGWY)) 2964 RB_REMOVE(pf_state_tree_ext_gwy, 2965 &pf_statetbl_ext_gwy, sk); 2966 if (!(flags & PF_DT_SKIP_LANEXT)) 2967 RB_REMOVE(pf_state_tree_lan_ext, 2968 &pf_statetbl_lan_ext, sk); 2969 pool_put(&pf_state_key_pl, sk); 2970 } 2971 } 2972 2973 struct pf_state_key * 2974 pf_alloc_state_key(struct pf_state *s) 2975 { 2976 struct pf_state_key *sk; 2977 2978 if ((sk = pool_get(&pf_state_key_pl, PR_NOWAIT)) == NULL) 2979 return (NULL); 2980 bzero(sk, sizeof(*sk)); 2981 TAILQ_INIT(&sk->states); 2982 pf_attach_state(sk, s, 0); 2983 2984 return (sk); 2985 } 2986 2987 int 2988 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, 2989 struct pfi_kif *kif, struct mbuf *m, int off, void *h, 2990 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, 2991 struct ifqueue *ifq, struct inpcb *inp) 2992 { 2993 struct pf_rule *nr = NULL; 2994 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 2995 u_int16_t bport, nport = 0; 2996 sa_family_t af = pd->af; 2997 struct pf_rule *r, *a = NULL; 2998 struct pf_ruleset *ruleset = NULL; 2999 struct pf_src_node *nsn = NULL; 3000 struct tcphdr *th = pd->hdr.tcp; 3001 u_short reason; 3002 int rewrite = 0, hdrlen = 0; 3003 int tag = -1, rtableid = -1; 3004 int asd = 0; 3005 int match = 0; 3006 int state_icmp = 0; 3007 u_int16_t mss = tcp_mssdflt; 3008 u_int16_t sport, dport; 3009 u_int8_t icmptype = 0, icmpcode = 0; 3010 3011 if (direction == PF_IN && pf_check_congestion(ifq)) { 3012 REASON_SET(&reason, PFRES_CONGEST); 3013 return (PF_DROP); 3014 } 3015 3016 if (inp != NULL) 3017 pd->lookup.done = pf_socket_lookup(direction, pd); 3018 else if (debug_pfugidhack) { 3019 crit_exit(); 3020 DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); 3021 pd->lookup.done = pf_socket_lookup(direction, pd); 3022 crit_enter(); 3023 } 3024 3025 sport = dport = hdrlen = 0; 3026 3027 switch (pd->proto) { 3028 case IPPROTO_TCP: 3029 sport = th->th_sport; 3030 dport = th->th_dport; 3031 hdrlen = sizeof(*th); 3032 break; 3033 case IPPROTO_UDP: 3034 sport = pd->hdr.udp->uh_sport; 3035 dport = pd->hdr.udp->uh_dport; 3036 hdrlen = sizeof(*pd->hdr.udp); 3037 break; 3038 #ifdef INET 3039 case IPPROTO_ICMP: 3040 if (pd->af != AF_INET) 3041 break; 3042 sport = dport = pd->hdr.icmp->icmp_id; 3043 icmptype = pd->hdr.icmp->icmp_type; 3044 icmpcode = pd->hdr.icmp->icmp_code; 3045 3046 if (icmptype == ICMP_UNREACH || 3047 icmptype == ICMP_SOURCEQUENCH || 3048 icmptype == ICMP_REDIRECT || 3049 icmptype == ICMP_TIMXCEED || 3050 icmptype == ICMP_PARAMPROB) 3051 state_icmp++; 3052 break; 3053 #endif /* INET */ 3054 #ifdef INET6 3055 case IPPROTO_ICMPV6: 3056 if (pd->af != AF_INET6) 3057 break; 3058 sport = dport = pd->hdr.icmp6->icmp6_id; 3059 hdrlen = sizeof(*pd->hdr.icmp6); 3060 icmptype = pd->hdr.icmp6->icmp6_type; 3061 icmpcode = pd->hdr.icmp6->icmp6_code; 3062 3063 if (icmptype == ICMP6_DST_UNREACH || 3064 icmptype == ICMP6_PACKET_TOO_BIG || 3065 icmptype == ICMP6_TIME_EXCEEDED || 3066 icmptype == ICMP6_PARAM_PROB) 3067 state_icmp++; 3068 break; 3069 #endif /* INET6 */ 3070 } 3071 3072 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 3073 3074 if (direction == PF_OUT) { 3075 bport = nport = sport; 3076 /* check outgoing packet for BINAT/NAT */ 3077 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, 3078 saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) { 3079 PF_ACPY(&pd->baddr, saddr, af); 3080 switch (pd->proto) { 3081 case IPPROTO_TCP: 3082 pf_change_ap(saddr, &th->th_sport, pd->ip_sum, 3083 &th->th_sum, &pd->naddr, nport, 0, af); 3084 sport = th->th_sport; 3085 rewrite++; 3086 break; 3087 case IPPROTO_UDP: 3088 pf_change_ap(saddr, &pd->hdr.udp->uh_sport, 3089 pd->ip_sum, &pd->hdr.udp->uh_sum, 3090 &pd->naddr, nport, 1, af); 3091 sport = pd->hdr.udp->uh_sport; 3092 rewrite++; 3093 break; 3094 #ifdef INET 3095 case IPPROTO_ICMP: 3096 pf_change_a(&saddr->v4.s_addr, pd->ip_sum, 3097 pd->naddr.v4.s_addr, 0); 3098 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( 3099 pd->hdr.icmp->icmp_cksum, sport, nport, 0); 3100 pd->hdr.icmp->icmp_id = nport; 3101 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); 3102 break; 3103 #endif /* INET */ 3104 #ifdef INET6 3105 case IPPROTO_ICMPV6: 3106 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, 3107 &pd->naddr, 0); 3108 rewrite++; 3109 break; 3110 #endif /* INET */ 3111 default: 3112 switch (af) { 3113 #ifdef INET 3114 case AF_INET: 3115 pf_change_a(&saddr->v4.s_addr, 3116 pd->ip_sum, pd->naddr.v4.s_addr, 0); 3117 break; 3118 #endif /* INET */ 3119 #ifdef INET6 3120 case AF_INET6: 3121 PF_ACPY(saddr, &pd->naddr, af); 3122 break; 3123 #endif /* INET */ 3124 } 3125 break; 3126 } 3127 3128 if (nr->natpass) 3129 r = NULL; 3130 pd->nat_rule = nr; 3131 } 3132 } else { 3133 bport = nport = dport; 3134 /* check incoming packet for BINAT/RDR */ 3135 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, 3136 saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) { 3137 PF_ACPY(&pd->baddr, daddr, af); 3138 switch (pd->proto) { 3139 case IPPROTO_TCP: 3140 pf_change_ap(daddr, &th->th_dport, pd->ip_sum, 3141 &th->th_sum, &pd->naddr, nport, 0, af); 3142 dport = th->th_dport; 3143 rewrite++; 3144 break; 3145 case IPPROTO_UDP: 3146 pf_change_ap(daddr, &pd->hdr.udp->uh_dport, 3147 pd->ip_sum, &pd->hdr.udp->uh_sum, 3148 &pd->naddr, nport, 1, af); 3149 dport = pd->hdr.udp->uh_dport; 3150 rewrite++; 3151 break; 3152 #ifdef INET 3153 case IPPROTO_ICMP: 3154 pf_change_a(&daddr->v4.s_addr, pd->ip_sum, 3155 pd->naddr.v4.s_addr, 0); 3156 break; 3157 #endif /* INET */ 3158 #ifdef INET6 3159 case IPPROTO_ICMPV6: 3160 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, 3161 &pd->naddr, 0); 3162 rewrite++; 3163 break; 3164 #endif /* INET6 */ 3165 default: 3166 switch (af) { 3167 #ifdef INET 3168 case AF_INET: 3169 pf_change_a(&daddr->v4.s_addr, 3170 pd->ip_sum, pd->naddr.v4.s_addr, 0); 3171 break; 3172 #endif /* INET */ 3173 #ifdef INET6 3174 case AF_INET6: 3175 PF_ACPY(daddr, &pd->naddr, af); 3176 break; 3177 #endif /* INET */ 3178 } 3179 break; 3180 } 3181 3182 if (nr->natpass) 3183 r = NULL; 3184 pd->nat_rule = nr; 3185 } 3186 } 3187 3188 while (r != NULL) { 3189 r->evaluations++; 3190 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3191 r = r->skip[PF_SKIP_IFP].ptr; 3192 else if (r->direction && r->direction != direction) 3193 r = r->skip[PF_SKIP_DIR].ptr; 3194 else if (r->af && r->af != af) 3195 r = r->skip[PF_SKIP_AF].ptr; 3196 else if (r->proto && r->proto != pd->proto) 3197 r = r->skip[PF_SKIP_PROTO].ptr; 3198 else if (PF_MISMATCHAW(&r->src.addr, saddr, af, 3199 r->src.neg, kif)) 3200 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 3201 /* tcp/udp only. port_op always 0 in other cases */ 3202 else if (r->src.port_op && !pf_match_port(r->src.port_op, 3203 r->src.port[0], r->src.port[1], sport)) 3204 r = r->skip[PF_SKIP_SRC_PORT].ptr; 3205 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, 3206 r->dst.neg, NULL)) 3207 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3208 /* tcp/udp only. port_op always 0 in other cases */ 3209 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 3210 r->dst.port[0], r->dst.port[1], dport)) 3211 r = r->skip[PF_SKIP_DST_PORT].ptr; 3212 /* icmp only. type always 0 in other cases */ 3213 else if (r->type && r->type != icmptype + 1) 3214 r = TAILQ_NEXT(r, entries); 3215 /* icmp only. type always 0 in other cases */ 3216 else if (r->code && r->code != icmpcode + 1) 3217 r = TAILQ_NEXT(r, entries); 3218 else if (r->tos && !(r->tos == pd->tos)) 3219 r = TAILQ_NEXT(r, entries); 3220 else if (r->rule_flag & PFRULE_FRAGMENT) 3221 r = TAILQ_NEXT(r, entries); 3222 else if (pd->proto == IPPROTO_TCP && 3223 (r->flagset & th->th_flags) != r->flags) 3224 r = TAILQ_NEXT(r, entries); 3225 /* tcp/udp only. uid.op always 0 in other cases */ 3226 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = 3227 pf_socket_lookup(direction, pd), 1)) && 3228 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], 3229 pd->lookup.uid)) 3230 r = TAILQ_NEXT(r, entries); 3231 /* tcp/udp only. gid.op always 0 in other cases */ 3232 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = 3233 pf_socket_lookup(direction, pd), 1)) && 3234 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], 3235 pd->lookup.gid)) 3236 r = TAILQ_NEXT(r, entries); 3237 else if (r->prob && r->prob <= karc4random()) 3238 r = TAILQ_NEXT(r, entries); 3239 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3240 r = TAILQ_NEXT(r, entries); 3241 else if (r->os_fingerprint != PF_OSFP_ANY && 3242 (pd->proto != IPPROTO_TCP || !pf_osfp_match( 3243 pf_osfp_fingerprint(pd, m, off, th), 3244 r->os_fingerprint))) 3245 r = TAILQ_NEXT(r, entries); 3246 else { 3247 if (r->tag) 3248 tag = r->tag; 3249 if (r->rtableid >= 0) 3250 rtableid = r->rtableid; 3251 if (r->anchor == NULL) { 3252 match = 1; 3253 *rm = r; 3254 *am = a; 3255 *rsm = ruleset; 3256 if ((*rm)->quick) 3257 break; 3258 r = TAILQ_NEXT(r, entries); 3259 } else 3260 pf_step_into_anchor(&asd, &ruleset, 3261 PF_RULESET_FILTER, &r, &a, &match); 3262 } 3263 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3264 PF_RULESET_FILTER, &r, &a, &match)) 3265 break; 3266 } 3267 r = *rm; 3268 a = *am; 3269 ruleset = *rsm; 3270 3271 REASON_SET(&reason, PFRES_MATCH); 3272 3273 if (r->log || (nr != NULL && nr->log)) { 3274 if (rewrite) 3275 m_copyback(m, off, hdrlen, pd->hdr.any); 3276 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, 3277 a, ruleset, pd); 3278 } 3279 3280 if ((r->action == PF_DROP) && 3281 ((r->rule_flag & PFRULE_RETURNRST) || 3282 (r->rule_flag & PFRULE_RETURNICMP) || 3283 (r->rule_flag & PFRULE_RETURN))) { 3284 /* undo NAT changes, if they have taken place */ 3285 if (nr != NULL) { 3286 if (direction == PF_OUT) { 3287 switch (pd->proto) { 3288 case IPPROTO_TCP: 3289 pf_change_ap(saddr, &th->th_sport, 3290 pd->ip_sum, &th->th_sum, 3291 &pd->baddr, bport, 0, af); 3292 sport = th->th_sport; 3293 rewrite++; 3294 break; 3295 case IPPROTO_UDP: 3296 pf_change_ap(saddr, 3297 &pd->hdr.udp->uh_sport, pd->ip_sum, 3298 &pd->hdr.udp->uh_sum, &pd->baddr, 3299 bport, 1, af); 3300 sport = pd->hdr.udp->uh_sport; 3301 rewrite++; 3302 break; 3303 case IPPROTO_ICMP: 3304 #ifdef INET6 3305 case IPPROTO_ICMPV6: 3306 #endif 3307 /* nothing! */ 3308 break; 3309 default: 3310 switch (af) { 3311 case AF_INET: 3312 pf_change_a(&saddr->v4.s_addr, 3313 pd->ip_sum, 3314 pd->baddr.v4.s_addr, 0); 3315 break; 3316 case AF_INET6: 3317 PF_ACPY(saddr, &pd->baddr, af); 3318 break; 3319 } 3320 } 3321 } else { 3322 switch (pd->proto) { 3323 case IPPROTO_TCP: 3324 pf_change_ap(daddr, &th->th_dport, 3325 pd->ip_sum, &th->th_sum, 3326 &pd->baddr, bport, 0, af); 3327 dport = th->th_dport; 3328 rewrite++; 3329 break; 3330 case IPPROTO_UDP: 3331 pf_change_ap(daddr, 3332 &pd->hdr.udp->uh_dport, pd->ip_sum, 3333 &pd->hdr.udp->uh_sum, &pd->baddr, 3334 bport, 1, af); 3335 dport = pd->hdr.udp->uh_dport; 3336 rewrite++; 3337 break; 3338 case IPPROTO_ICMP: 3339 #ifdef INET6 3340 case IPPROTO_ICMPV6: 3341 #endif 3342 /* nothing! */ 3343 break; 3344 default: 3345 switch (af) { 3346 case AF_INET: 3347 pf_change_a(&daddr->v4.s_addr, 3348 pd->ip_sum, 3349 pd->baddr.v4.s_addr, 0); 3350 break; 3351 case AF_INET6: 3352 PF_ACPY(daddr, &pd->baddr, af); 3353 break; 3354 } 3355 } 3356 } 3357 } 3358 if (pd->proto == IPPROTO_TCP && 3359 ((r->rule_flag & PFRULE_RETURNRST) || 3360 (r->rule_flag & PFRULE_RETURN)) && 3361 !(th->th_flags & TH_RST)) { 3362 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 3363 struct ip *h = mtod(m, struct ip *); 3364 3365 if (pf_check_proto_cksum(m, off, 3366 h->ip_len - off, IPPROTO_TCP, AF_INET)) 3367 REASON_SET(&reason, PFRES_PROTCKSUM); 3368 else { 3369 if (th->th_flags & TH_SYN) 3370 ack++; 3371 if (th->th_flags & TH_FIN) 3372 ack++; 3373 pf_send_tcp(r, af, pd->dst, 3374 pd->src, th->th_dport, th->th_sport, 3375 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 3376 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); 3377 } 3378 } else if ((af == AF_INET) && r->return_icmp) 3379 pf_send_icmp(m, r->return_icmp >> 8, 3380 r->return_icmp & 255, af, r); 3381 else if ((af == AF_INET6) && r->return_icmp6) 3382 pf_send_icmp(m, r->return_icmp6 >> 8, 3383 r->return_icmp6 & 255, af, r); 3384 } 3385 3386 if (r->action == PF_DROP) 3387 return (PF_DROP); 3388 3389 if (pf_tag_packet(m, tag, rtableid)) { 3390 REASON_SET(&reason, PFRES_MEMORY); 3391 return (PF_DROP); 3392 } 3393 3394 if (!state_icmp && (r->keep_state || nr != NULL || 3395 (pd->flags & PFDESC_TCP_NORM))) { 3396 /* create new state */ 3397 u_int16_t len = 0; 3398 struct pf_state *s = NULL; 3399 struct pf_state_key *sk = NULL; 3400 struct pf_src_node *sn = NULL; 3401 3402 /* check maximums */ 3403 if (r->max_states && (r->states >= r->max_states)) { 3404 pf_status.lcounters[LCNT_STATES]++; 3405 REASON_SET(&reason, PFRES_MAXSTATES); 3406 goto cleanup; 3407 } 3408 /* src node for filter rule */ 3409 if ((r->rule_flag & PFRULE_SRCTRACK || 3410 r->rpool.opts & PF_POOL_STICKYADDR) && 3411 pf_insert_src_node(&sn, r, saddr, af) != 0) { 3412 REASON_SET(&reason, PFRES_SRCLIMIT); 3413 goto cleanup; 3414 } 3415 /* src node for translation rule */ 3416 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && 3417 ((direction == PF_OUT && 3418 pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || 3419 (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { 3420 REASON_SET(&reason, PFRES_SRCLIMIT); 3421 goto cleanup; 3422 } 3423 s = pool_get(&pf_state_pl, PR_NOWAIT); 3424 if (s == NULL) { 3425 REASON_SET(&reason, PFRES_MEMORY); 3426 cleanup: 3427 if (sn != NULL && sn->states == 0 && sn->expire == 0) { 3428 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 3429 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 3430 pf_status.src_nodes--; 3431 pool_put(&pf_src_tree_pl, sn); 3432 } 3433 if (nsn != sn && nsn != NULL && nsn->states == 0 && 3434 nsn->expire == 0) { 3435 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); 3436 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 3437 pf_status.src_nodes--; 3438 pool_put(&pf_src_tree_pl, nsn); 3439 } 3440 if (sk != NULL) { 3441 pool_put(&pf_state_key_pl, sk); 3442 } 3443 return (PF_DROP); 3444 } 3445 bzero(s, sizeof(*s)); 3446 s->rule.ptr = r; 3447 s->nat_rule.ptr = nr; 3448 s->anchor.ptr = a; 3449 STATE_INC_COUNTERS(s); 3450 s->allow_opts = r->allow_opts; 3451 s->log = r->log & PF_LOG_ALL; 3452 if (nr != NULL) 3453 s->log |= nr->log & PF_LOG_ALL; 3454 switch (pd->proto) { 3455 case IPPROTO_TCP: 3456 len = pd->tot_len - off - (th->th_off << 2); 3457 s->src.seqlo = ntohl(th->th_seq); 3458 s->src.seqhi = s->src.seqlo + len + 1; 3459 if ((th->th_flags & (TH_SYN|TH_ACK)) == 3460 TH_SYN && r->keep_state == PF_STATE_MODULATE) { 3461 /* Generate sequence number modulator */ 3462 while ((s->src.seqdiff = 3463 pf_new_isn(sk) - s->src.seqlo) == 0) 3464 ; 3465 pf_change_a(&th->th_seq, &th->th_sum, 3466 htonl(s->src.seqlo + s->src.seqdiff), 0); 3467 rewrite = 1; 3468 } else 3469 s->src.seqdiff = 0; 3470 if (th->th_flags & TH_SYN) { 3471 s->src.seqhi++; 3472 s->src.wscale = pf_get_wscale(m, off, 3473 th->th_off, af); 3474 } 3475 s->src.max_win = MAX(ntohs(th->th_win), 1); 3476 if (s->src.wscale & PF_WSCALE_MASK) { 3477 /* Remove scale factor from initial window */ 3478 int win = s->src.max_win; 3479 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 3480 s->src.max_win = (win - 1) >> 3481 (s->src.wscale & PF_WSCALE_MASK); 3482 } 3483 if (th->th_flags & TH_FIN) 3484 s->src.seqhi++; 3485 s->dst.seqhi = 1; 3486 s->dst.max_win = 1; 3487 s->src.state = TCPS_SYN_SENT; 3488 s->dst.state = TCPS_CLOSED; 3489 s->timeout = PFTM_TCP_FIRST_PACKET; 3490 break; 3491 case IPPROTO_UDP: 3492 s->src.state = PFUDPS_SINGLE; 3493 s->dst.state = PFUDPS_NO_TRAFFIC; 3494 s->timeout = PFTM_UDP_FIRST_PACKET; 3495 break; 3496 case IPPROTO_ICMP: 3497 #ifdef INET6 3498 case IPPROTO_ICMPV6: 3499 #endif 3500 s->timeout = PFTM_ICMP_FIRST_PACKET; 3501 break; 3502 default: 3503 s->src.state = PFOTHERS_SINGLE; 3504 s->dst.state = PFOTHERS_NO_TRAFFIC; 3505 s->timeout = PFTM_OTHER_FIRST_PACKET; 3506 } 3507 3508 s->creation = time_second; 3509 s->expire = time_second; 3510 3511 if (sn != NULL) { 3512 s->src_node = sn; 3513 s->src_node->states++; 3514 } 3515 if (nsn != NULL) { 3516 PF_ACPY(&nsn->raddr, &pd->naddr, af); 3517 s->nat_src_node = nsn; 3518 s->nat_src_node->states++; 3519 } 3520 if (pd->proto == IPPROTO_TCP) { 3521 if ((pd->flags & PFDESC_TCP_NORM) && 3522 pf_normalize_tcp_init(m, off, pd, th, &s->src, 3523 &s->dst)) { 3524 REASON_SET(&reason, PFRES_MEMORY); 3525 pf_src_tree_remove_state(s); 3526 STATE_DEC_COUNTERS(s); 3527 pool_put(&pf_state_pl, s); 3528 return (PF_DROP); 3529 } 3530 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && 3531 pf_normalize_tcp_stateful(m, off, pd, &reason, 3532 th, s, &s->src, &s->dst, &rewrite)) { 3533 /* This really shouldn't happen!!! */ 3534 DPFPRINTF(PF_DEBUG_URGENT, 3535 ("pf_normalize_tcp_stateful failed on " 3536 "first pkt")); 3537 pf_normalize_tcp_cleanup(s); 3538 pf_src_tree_remove_state(s); 3539 STATE_DEC_COUNTERS(s); 3540 pool_put(&pf_state_pl, s); 3541 return (PF_DROP); 3542 } 3543 } 3544 3545 if ((sk = pf_alloc_state_key(s)) == NULL) { 3546 REASON_SET(&reason, PFRES_MEMORY); 3547 goto cleanup; 3548 } 3549 3550 sk->proto = pd->proto; 3551 sk->direction = direction; 3552 sk->af = af; 3553 if (direction == PF_OUT) { 3554 PF_ACPY(&sk->gwy.addr, saddr, af); 3555 PF_ACPY(&sk->ext.addr, daddr, af); 3556 switch (pd->proto) { 3557 case IPPROTO_ICMP: 3558 #ifdef INET6 3559 case IPPROTO_ICMPV6: 3560 #endif 3561 sk->gwy.port = nport; 3562 sk->ext.port = 0; 3563 break; 3564 default: 3565 sk->gwy.port = sport; 3566 sk->ext.port = dport; 3567 } 3568 if (nr != NULL) { 3569 PF_ACPY(&sk->lan.addr, &pd->baddr, af); 3570 sk->lan.port = bport; 3571 } else { 3572 PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af); 3573 sk->lan.port = sk->gwy.port; 3574 } 3575 } else { 3576 PF_ACPY(&sk->lan.addr, daddr, af); 3577 PF_ACPY(&sk->ext.addr, saddr, af); 3578 switch (pd->proto) { 3579 case IPPROTO_ICMP: 3580 #ifdef INET6 3581 case IPPROTO_ICMPV6: 3582 #endif 3583 sk->lan.port = nport; 3584 sk->ext.port = 0; 3585 break; 3586 default: 3587 sk->lan.port = dport; 3588 sk->ext.port = sport; 3589 } 3590 if (nr != NULL) { 3591 PF_ACPY(&sk->gwy.addr, &pd->baddr, af); 3592 sk->gwy.port = bport; 3593 } else { 3594 PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af); 3595 sk->gwy.port = sk->lan.port; 3596 } 3597 } 3598 3599 s->hash = pf_state_hash(sk); 3600 s->pickup_mode = r->pickup_mode; 3601 3602 pf_set_rt_ifp(s, saddr); /* needs s->state_key set */ 3603 3604 if (pf_insert_state(BOUND_IFACE(r, kif), s)) { 3605 if (pd->proto == IPPROTO_TCP) 3606 pf_normalize_tcp_cleanup(s); 3607 REASON_SET(&reason, PFRES_STATEINS); 3608 pf_src_tree_remove_state(s); 3609 STATE_DEC_COUNTERS(s); 3610 pool_put(&pf_state_pl, s); 3611 return (PF_DROP); 3612 } else 3613 *sm = s; 3614 if (tag > 0) { 3615 pf_tag_ref(tag); 3616 s->tag = tag; 3617 } 3618 if (pd->proto == IPPROTO_TCP && 3619 (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 3620 r->keep_state == PF_STATE_SYNPROXY) { 3621 s->src.state = PF_TCPS_PROXY_SRC; 3622 if (nr != NULL) { 3623 if (direction == PF_OUT) { 3624 pf_change_ap(saddr, &th->th_sport, 3625 pd->ip_sum, &th->th_sum, &pd->baddr, 3626 bport, 0, af); 3627 sport = th->th_sport; 3628 } else { 3629 pf_change_ap(daddr, &th->th_dport, 3630 pd->ip_sum, &th->th_sum, &pd->baddr, 3631 bport, 0, af); 3632 sport = th->th_dport; 3633 } 3634 } 3635 s->src.seqhi = htonl(karc4random()); 3636 /* Find mss option */ 3637 mss = pf_get_mss(m, off, th->th_off, af); 3638 mss = pf_calc_mss(saddr, af, mss); 3639 mss = pf_calc_mss(daddr, af, mss); 3640 s->src.mss = mss; 3641 pf_send_tcp(r, af, daddr, saddr, th->th_dport, 3642 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 3643 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); 3644 REASON_SET(&reason, PFRES_SYNPROXY); 3645 return (PF_SYNPROXY_DROP); 3646 } 3647 } 3648 3649 /* copy back packet headers if we performed NAT operations */ 3650 if (rewrite) 3651 m_copyback(m, off, hdrlen, pd->hdr.any); 3652 3653 return (PF_PASS); 3654 } 3655 3656 int 3657 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, 3658 struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, 3659 struct pf_ruleset **rsm) 3660 { 3661 struct pf_rule *r, *a = NULL; 3662 struct pf_ruleset *ruleset = NULL; 3663 sa_family_t af = pd->af; 3664 u_short reason; 3665 int tag = -1; 3666 int asd = 0; 3667 int match = 0; 3668 3669 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 3670 while (r != NULL) { 3671 r->evaluations++; 3672 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3673 r = r->skip[PF_SKIP_IFP].ptr; 3674 else if (r->direction && r->direction != direction) 3675 r = r->skip[PF_SKIP_DIR].ptr; 3676 else if (r->af && r->af != af) 3677 r = r->skip[PF_SKIP_AF].ptr; 3678 else if (r->proto && r->proto != pd->proto) 3679 r = r->skip[PF_SKIP_PROTO].ptr; 3680 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 3681 r->src.neg, kif)) 3682 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 3683 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 3684 r->dst.neg, NULL)) 3685 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3686 else if (r->tos && !(r->tos == pd->tos)) 3687 r = TAILQ_NEXT(r, entries); 3688 else if (r->os_fingerprint != PF_OSFP_ANY) 3689 r = TAILQ_NEXT(r, entries); 3690 else if (pd->proto == IPPROTO_UDP && 3691 (r->src.port_op || r->dst.port_op)) 3692 r = TAILQ_NEXT(r, entries); 3693 else if (pd->proto == IPPROTO_TCP && 3694 (r->src.port_op || r->dst.port_op || r->flagset)) 3695 r = TAILQ_NEXT(r, entries); 3696 else if ((pd->proto == IPPROTO_ICMP || 3697 pd->proto == IPPROTO_ICMPV6) && 3698 (r->type || r->code)) 3699 r = TAILQ_NEXT(r, entries); 3700 else if (r->prob && r->prob <= karc4random()) 3701 r = TAILQ_NEXT(r, entries); 3702 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3703 r = TAILQ_NEXT(r, entries); 3704 else { 3705 if (r->anchor == NULL) { 3706 match = 1; 3707 *rm = r; 3708 *am = a; 3709 *rsm = ruleset; 3710 if ((*rm)->quick) 3711 break; 3712 r = TAILQ_NEXT(r, entries); 3713 } else 3714 pf_step_into_anchor(&asd, &ruleset, 3715 PF_RULESET_FILTER, &r, &a, &match); 3716 } 3717 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3718 PF_RULESET_FILTER, &r, &a, &match)) 3719 break; 3720 } 3721 r = *rm; 3722 a = *am; 3723 ruleset = *rsm; 3724 3725 REASON_SET(&reason, PFRES_MATCH); 3726 3727 if (r->log) 3728 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, 3729 pd); 3730 3731 if (r->action != PF_PASS) 3732 return (PF_DROP); 3733 3734 if (pf_tag_packet(m, tag, -1)) { 3735 REASON_SET(&reason, PFRES_MEMORY); 3736 return (PF_DROP); 3737 } 3738 3739 return (PF_PASS); 3740 } 3741 3742 int 3743 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, 3744 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, 3745 u_short *reason) 3746 { 3747 struct pf_state_key_cmp key; 3748 struct tcphdr *th = pd->hdr.tcp; 3749 u_int16_t win = ntohs(th->th_win); 3750 u_int32_t ack, end, seq, orig_seq; 3751 u_int8_t sws, dws; 3752 int ackskew; 3753 int copyback = 0; 3754 struct pf_state_peer *src, *dst; 3755 3756 key.af = pd->af; 3757 key.proto = IPPROTO_TCP; 3758 if (direction == PF_IN) { 3759 PF_ACPY(&key.ext.addr, pd->src, key.af); 3760 PF_ACPY(&key.gwy.addr, pd->dst, key.af); 3761 key.ext.port = th->th_sport; 3762 key.gwy.port = th->th_dport; 3763 } else { 3764 PF_ACPY(&key.lan.addr, pd->src, key.af); 3765 PF_ACPY(&key.ext.addr, pd->dst, key.af); 3766 key.lan.port = th->th_sport; 3767 key.ext.port = th->th_dport; 3768 } 3769 3770 STATE_LOOKUP(); 3771 3772 if (direction == (*state)->state_key->direction) { 3773 src = &(*state)->src; 3774 dst = &(*state)->dst; 3775 } else { 3776 src = &(*state)->dst; 3777 dst = &(*state)->src; 3778 } 3779 3780 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 3781 if (direction != (*state)->state_key->direction) { 3782 REASON_SET(reason, PFRES_SYNPROXY); 3783 return (PF_SYNPROXY_DROP); 3784 } 3785 if (th->th_flags & TH_SYN) { 3786 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 3787 REASON_SET(reason, PFRES_SYNPROXY); 3788 return (PF_DROP); 3789 } 3790 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 3791 pd->src, th->th_dport, th->th_sport, 3792 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 3793 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 3794 0, NULL, NULL); 3795 REASON_SET(reason, PFRES_SYNPROXY); 3796 return (PF_SYNPROXY_DROP); 3797 } else if (!(th->th_flags & TH_ACK) || 3798 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 3799 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 3800 REASON_SET(reason, PFRES_SYNPROXY); 3801 return (PF_DROP); 3802 } else if ((*state)->src_node != NULL && 3803 pf_src_connlimit(state)) { 3804 REASON_SET(reason, PFRES_SRCLIMIT); 3805 return (PF_DROP); 3806 } else 3807 (*state)->src.state = PF_TCPS_PROXY_DST; 3808 } 3809 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 3810 struct pf_state_host *src, *dst; 3811 3812 if (direction == PF_OUT) { 3813 src = &(*state)->state_key->gwy; 3814 dst = &(*state)->state_key->ext; 3815 } else { 3816 src = &(*state)->state_key->ext; 3817 dst = &(*state)->state_key->lan; 3818 } 3819 if (direction == (*state)->state_key->direction) { 3820 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 3821 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 3822 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 3823 REASON_SET(reason, PFRES_SYNPROXY); 3824 return (PF_DROP); 3825 } 3826 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 3827 if ((*state)->dst.seqhi == 1) 3828 (*state)->dst.seqhi = htonl(karc4random()); 3829 pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, 3830 &dst->addr, src->port, dst->port, 3831 (*state)->dst.seqhi, 0, TH_SYN, 0, 3832 (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); 3833 REASON_SET(reason, PFRES_SYNPROXY); 3834 return (PF_SYNPROXY_DROP); 3835 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 3836 (TH_SYN|TH_ACK)) || 3837 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 3838 REASON_SET(reason, PFRES_SYNPROXY); 3839 return (PF_DROP); 3840 } else { 3841 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 3842 (*state)->dst.seqlo = ntohl(th->th_seq); 3843 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 3844 pd->src, th->th_dport, th->th_sport, 3845 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 3846 TH_ACK, (*state)->src.max_win, 0, 0, 0, 3847 (*state)->tag, NULL, NULL); 3848 pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, 3849 &dst->addr, src->port, dst->port, 3850 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 3851 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 3852 0, NULL, NULL); 3853 (*state)->src.seqdiff = (*state)->dst.seqhi - 3854 (*state)->src.seqlo; 3855 (*state)->dst.seqdiff = (*state)->src.seqhi - 3856 (*state)->dst.seqlo; 3857 (*state)->src.seqhi = (*state)->src.seqlo + 3858 (*state)->dst.max_win; 3859 (*state)->dst.seqhi = (*state)->dst.seqlo + 3860 (*state)->src.max_win; 3861 (*state)->src.wscale = (*state)->dst.wscale = 0; 3862 (*state)->src.state = (*state)->dst.state = 3863 TCPS_ESTABLISHED; 3864 REASON_SET(reason, PFRES_SYNPROXY); 3865 return (PF_SYNPROXY_DROP); 3866 } 3867 } 3868 3869 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 3870 sws = src->wscale & PF_WSCALE_MASK; 3871 dws = dst->wscale & PF_WSCALE_MASK; 3872 } else 3873 sws = dws = 0; 3874 3875 /* 3876 * Sequence tracking algorithm from Guido van Rooij's paper: 3877 * http://www.madison-gurkha.com/publications/tcp_filtering/ 3878 * tcp_filtering.ps 3879 */ 3880 3881 orig_seq = seq = ntohl(th->th_seq); 3882 if (src->seqlo == 0) { 3883 /* First packet from this end. Set its state */ 3884 3885 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && 3886 src->scrub == NULL) { 3887 if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { 3888 REASON_SET(reason, PFRES_MEMORY); 3889 return (PF_DROP); 3890 } 3891 } 3892 3893 /* Deferred generation of sequence number modulator */ 3894 if (dst->seqdiff && !src->seqdiff) { 3895 3896 while ((src->seqdiff = pf_new_isn((struct pf_state_key *)&key) - seq) == 0) 3897 ; 3898 ack = ntohl(th->th_ack) - dst->seqdiff; 3899 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 3900 src->seqdiff), 0); 3901 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 3902 copyback = 1; 3903 } else { 3904 ack = ntohl(th->th_ack); 3905 } 3906 3907 end = seq + pd->p_len; 3908 if (th->th_flags & TH_SYN) { 3909 end++; 3910 (*state)->sync_flags |= PFSTATE_GOT_SYN2; 3911 if (dst->wscale & PF_WSCALE_FLAG) { 3912 src->wscale = pf_get_wscale(m, off, th->th_off, 3913 pd->af); 3914 if (src->wscale & PF_WSCALE_FLAG) { 3915 /* Remove scale factor from initial 3916 * window */ 3917 sws = src->wscale & PF_WSCALE_MASK; 3918 win = ((u_int32_t)win + (1 << sws) - 1) 3919 >> sws; 3920 dws = dst->wscale & PF_WSCALE_MASK; 3921 } else { 3922 /* fixup other window */ 3923 dst->max_win <<= dst->wscale & 3924 PF_WSCALE_MASK; 3925 /* in case of a retrans SYN|ACK */ 3926 dst->wscale = 0; 3927 } 3928 } 3929 } 3930 if (th->th_flags & TH_FIN) 3931 end++; 3932 3933 src->seqlo = seq; 3934 if (src->state < TCPS_SYN_SENT) 3935 src->state = TCPS_SYN_SENT; 3936 3937 /* 3938 * May need to slide the window (seqhi may have been set by 3939 * the crappy stack check or if we picked up the connection 3940 * after establishment) 3941 */ 3942 if (src->seqhi == 1 || 3943 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 3944 src->seqhi = end + MAX(1, dst->max_win << dws); 3945 if (win > src->max_win) 3946 src->max_win = win; 3947 3948 } else { 3949 ack = ntohl(th->th_ack) - dst->seqdiff; 3950 if (src->seqdiff) { 3951 /* Modulate sequence numbers */ 3952 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 3953 src->seqdiff), 0); 3954 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 3955 copyback = 1; 3956 } 3957 end = seq + pd->p_len; 3958 if (th->th_flags & TH_SYN) 3959 end++; 3960 if (th->th_flags & TH_FIN) 3961 end++; 3962 } 3963 3964 if ((th->th_flags & TH_ACK) == 0) { 3965 /* Let it pass through the ack skew check */ 3966 ack = dst->seqlo; 3967 } else if ((ack == 0 && 3968 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 3969 /* broken tcp stacks do not set ack */ 3970 (dst->state < TCPS_SYN_SENT)) { 3971 /* 3972 * Many stacks (ours included) will set the ACK number in an 3973 * FIN|ACK if the SYN times out -- no sequence to ACK. 3974 */ 3975 ack = dst->seqlo; 3976 } 3977 3978 if (seq == end) { 3979 /* Ease sequencing restrictions on no data packets */ 3980 seq = src->seqlo; 3981 end = seq; 3982 } 3983 3984 ackskew = dst->seqlo - ack; 3985 3986 3987 /* 3988 * Need to demodulate the sequence numbers in any TCP SACK options 3989 * (Selective ACK). We could optionally validate the SACK values 3990 * against the current ACK window, either forwards or backwards, but 3991 * I'm not confident that SACK has been implemented properly 3992 * everywhere. It wouldn't surprise me if several stacks accidently 3993 * SACK too far backwards of previously ACKed data. There really aren't 3994 * any security implications of bad SACKing unless the target stack 3995 * doesn't validate the option length correctly. Someone trying to 3996 * spoof into a TCP connection won't bother blindly sending SACK 3997 * options anyway. 3998 */ 3999 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4000 if (pf_modulate_sack(m, off, pd, th, dst)) 4001 copyback = 1; 4002 } 4003 4004 4005 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4006 if (SEQ_GEQ(src->seqhi, end) && 4007 /* Last octet inside other's window space */ 4008 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4009 /* Retrans: not more than one window back */ 4010 (ackskew >= -MAXACKWINDOW) && 4011 /* Acking not more than one reassembled fragment backwards */ 4012 (ackskew <= (MAXACKWINDOW << sws)) && 4013 /* Acking not more than one window forward */ 4014 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4015 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || 4016 (pd->flags & PFDESC_IP_REAS) == 0)) { 4017 /* Require an exact/+1 sequence match on resets when possible */ 4018 4019 if (dst->scrub || src->scrub) { 4020 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 4021 *state, src, dst, ©back)) 4022 return (PF_DROP); 4023 } 4024 4025 /* update max window */ 4026 if (src->max_win < win) 4027 src->max_win = win; 4028 /* synchronize sequencing */ 4029 if (SEQ_GT(end, src->seqlo)) 4030 src->seqlo = end; 4031 /* slide the window of what the other end can send */ 4032 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4033 dst->seqhi = ack + MAX((win << sws), 1); 4034 4035 4036 /* update states */ 4037 if (th->th_flags & TH_SYN) 4038 if (src->state < TCPS_SYN_SENT) 4039 src->state = TCPS_SYN_SENT; 4040 if (th->th_flags & TH_FIN) 4041 if (src->state < TCPS_CLOSING) 4042 src->state = TCPS_CLOSING; 4043 if (th->th_flags & TH_ACK) { 4044 if (dst->state == TCPS_SYN_SENT) { 4045 dst->state = TCPS_ESTABLISHED; 4046 if (src->state == TCPS_ESTABLISHED && 4047 (*state)->src_node != NULL && 4048 pf_src_connlimit(state)) { 4049 REASON_SET(reason, PFRES_SRCLIMIT); 4050 return (PF_DROP); 4051 } 4052 } else if (dst->state == TCPS_CLOSING) 4053 dst->state = TCPS_FIN_WAIT_2; 4054 } 4055 if (th->th_flags & TH_RST) 4056 src->state = dst->state = TCPS_TIME_WAIT; 4057 4058 /* update expire time */ 4059 (*state)->expire = time_second; 4060 if (src->state >= TCPS_FIN_WAIT_2 && 4061 dst->state >= TCPS_FIN_WAIT_2) 4062 (*state)->timeout = PFTM_TCP_CLOSED; 4063 else if (src->state >= TCPS_CLOSING && 4064 dst->state >= TCPS_CLOSING) 4065 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4066 else if (src->state < TCPS_ESTABLISHED || 4067 dst->state < TCPS_ESTABLISHED) 4068 (*state)->timeout = PFTM_TCP_OPENING; 4069 else if (src->state >= TCPS_CLOSING || 4070 dst->state >= TCPS_CLOSING) 4071 (*state)->timeout = PFTM_TCP_CLOSING; 4072 else 4073 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4074 4075 /* Fall through to PASS packet */ 4076 4077 } else if ((dst->state < TCPS_SYN_SENT || 4078 dst->state >= TCPS_FIN_WAIT_2 || 4079 src->state >= TCPS_FIN_WAIT_2) && 4080 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && 4081 /* Within a window forward of the originating packet */ 4082 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4083 /* Within a window backward of the originating packet */ 4084 4085 /* 4086 * This currently handles three situations: 4087 * 1) Stupid stacks will shotgun SYNs before their peer 4088 * replies. 4089 * 2) When PF catches an already established stream (the 4090 * firewall rebooted, the state table was flushed, routes 4091 * changed...) 4092 * 3) Packets get funky immediately after the connection 4093 * closes (this should catch Solaris spurious ACK|FINs 4094 * that web servers like to spew after a close) 4095 * 4096 * This must be a little more careful than the above code 4097 * since packet floods will also be caught here. We don't 4098 * update the TTL here to mitigate the damage of a packet 4099 * flood and so the same code can handle awkward establishment 4100 * and a loosened connection close. 4101 * In the establishment case, a correct peer response will 4102 * validate the connection, go through the normal state code 4103 * and keep updating the state TTL. 4104 */ 4105 4106 if (pf_status.debug >= PF_DEBUG_MISC) { 4107 kprintf("pf: loose state match: "); 4108 pf_print_state(*state); 4109 pf_print_flags(th->th_flags); 4110 kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4111 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, 4112 ackskew, (unsigned long long)(*state)->packets[0], 4113 (unsigned long long)(*state)->packets[1], 4114 direction == PF_IN ? "in" : "out", 4115 direction == (*state)->state_key->direction ? 4116 "fwd" : "rev"); 4117 } 4118 4119 if (dst->scrub || src->scrub) { 4120 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 4121 *state, src, dst, ©back)) 4122 return (PF_DROP); 4123 } 4124 4125 /* update max window */ 4126 if (src->max_win < win) 4127 src->max_win = win; 4128 /* synchronize sequencing */ 4129 if (SEQ_GT(end, src->seqlo)) 4130 src->seqlo = end; 4131 /* slide the window of what the other end can send */ 4132 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4133 dst->seqhi = ack + MAX((win << sws), 1); 4134 4135 /* 4136 * Cannot set dst->seqhi here since this could be a shotgunned 4137 * SYN and not an already established connection. 4138 */ 4139 4140 if (th->th_flags & TH_FIN) 4141 if (src->state < TCPS_CLOSING) 4142 src->state = TCPS_CLOSING; 4143 if (th->th_flags & TH_RST) 4144 src->state = dst->state = TCPS_TIME_WAIT; 4145 4146 /* Fall through to PASS packet */ 4147 4148 } else if ((*state)->pickup_mode == PF_PICKUPS_HASHONLY || 4149 ((*state)->pickup_mode == PF_PICKUPS_ENABLED && 4150 ((*state)->sync_flags & PFSTATE_GOT_SYN_MASK) != 4151 PFSTATE_GOT_SYN_MASK)) { 4152 /* 4153 * If pickup mode is hash only, do not fail on sequence checks. 4154 * 4155 * If pickup mode is enabled and we did not see the SYN in 4156 * both direction, do not fail on sequence checks because 4157 * we do not have complete information on window scale. 4158 * 4159 * Adjust expiration and fall through to PASS packet. 4160 * XXX Add a FIN check to reduce timeout? 4161 */ 4162 (*state)->expire = time_second; 4163 } else { 4164 /* 4165 * Failure processing 4166 */ 4167 if ((*state)->dst.state == TCPS_SYN_SENT && 4168 (*state)->src.state == TCPS_SYN_SENT) { 4169 /* Send RST for state mismatches during handshake */ 4170 if (!(th->th_flags & TH_RST)) 4171 pf_send_tcp((*state)->rule.ptr, pd->af, 4172 pd->dst, pd->src, th->th_dport, 4173 th->th_sport, ntohl(th->th_ack), 0, 4174 TH_RST, 0, 0, 4175 (*state)->rule.ptr->return_ttl, 1, 0, 4176 pd->eh, kif->pfik_ifp); 4177 src->seqlo = 0; 4178 src->seqhi = 1; 4179 src->max_win = 1; 4180 } else if (pf_status.debug >= PF_DEBUG_MISC) { 4181 kprintf("pf: BAD state: "); 4182 pf_print_state(*state); 4183 pf_print_flags(th->th_flags); 4184 kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4185 "pkts=%llu:%llu dir=%s,%s\n", 4186 seq, orig_seq, ack, pd->p_len, ackskew, 4187 (unsigned long long)(*state)->packets[0], 4188 (unsigned long long)(*state)->packets[1], 4189 direction == PF_IN ? "in" : "out", 4190 direction == (*state)->state_key->direction ? 4191 "fwd" : "rev"); 4192 kprintf("pf: State failure on: %c %c %c %c | %c %c\n", 4193 SEQ_GEQ(src->seqhi, end) ? ' ' : '1', 4194 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4195 ' ': '2', 4196 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4197 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4198 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', 4199 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4200 } 4201 REASON_SET(reason, PFRES_BADSTATE); 4202 return (PF_DROP); 4203 } 4204 4205 /* Any packets which have gotten here are to be passed */ 4206 4207 /* translate source/destination address, if necessary */ 4208 if (STATE_TRANSLATE((*state)->state_key)) { 4209 if (direction == PF_OUT) { 4210 pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, 4211 &th->th_sum, &(*state)->state_key->gwy.addr, 4212 (*state)->state_key->gwy.port, 0, pd->af); 4213 } else { 4214 /* 4215 * If we don't redispatch the packet will go into 4216 * the protocol stack on the wrong cpu for the 4217 * post-translated address. 4218 */ 4219 /* m->m_pkthdr.fw_flags |= FW_MBUF_REDISPATCH; */ 4220 m->m_flags &= ~M_HASH; 4221 pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, 4222 &th->th_sum, &(*state)->state_key->lan.addr, 4223 (*state)->state_key->lan.port, 0, pd->af); 4224 } 4225 m_copyback(m, off, sizeof(*th), (caddr_t)th); 4226 } else if (copyback) { 4227 /* Copyback sequence modulation or stateful scrub changes */ 4228 m_copyback(m, off, sizeof(*th), (caddr_t)th); 4229 } 4230 4231 return (PF_PASS); 4232 } 4233 4234 int 4235 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, 4236 struct mbuf *m, int off, void *h, struct pf_pdesc *pd) 4237 { 4238 struct pf_state_peer *src, *dst; 4239 struct pf_state_key_cmp key; 4240 struct udphdr *uh = pd->hdr.udp; 4241 4242 key.af = pd->af; 4243 key.proto = IPPROTO_UDP; 4244 if (direction == PF_IN) { 4245 PF_ACPY(&key.ext.addr, pd->src, key.af); 4246 PF_ACPY(&key.gwy.addr, pd->dst, key.af); 4247 key.ext.port = uh->uh_sport; 4248 key.gwy.port = uh->uh_dport; 4249 } else { 4250 PF_ACPY(&key.lan.addr, pd->src, key.af); 4251 PF_ACPY(&key.ext.addr, pd->dst, key.af); 4252 key.lan.port = uh->uh_sport; 4253 key.ext.port = uh->uh_dport; 4254 } 4255 4256 STATE_LOOKUP(); 4257 4258 if (direction == (*state)->state_key->direction) { 4259 src = &(*state)->src; 4260 dst = &(*state)->dst; 4261 } else { 4262 src = &(*state)->dst; 4263 dst = &(*state)->src; 4264 } 4265 4266 /* update states */ 4267 if (src->state < PFUDPS_SINGLE) 4268 src->state = PFUDPS_SINGLE; 4269 if (dst->state == PFUDPS_SINGLE) 4270 dst->state = PFUDPS_MULTIPLE; 4271 4272 /* update expire time */ 4273 (*state)->expire = time_second; 4274 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) 4275 (*state)->timeout = PFTM_UDP_MULTIPLE; 4276 else 4277 (*state)->timeout = PFTM_UDP_SINGLE; 4278 4279 /* translate source/destination address, if necessary */ 4280 if (STATE_TRANSLATE((*state)->state_key)) { 4281 if (direction == PF_OUT) { 4282 pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, 4283 &uh->uh_sum, &(*state)->state_key->gwy.addr, 4284 (*state)->state_key->gwy.port, 1, pd->af); 4285 } else { 4286 /* 4287 * If we don't redispatch the packet will go into 4288 * the protocol stack on the wrong cpu for the 4289 * post-translated address. 4290 */ 4291 /* m->m_pkthdr.fw_flags |= FW_MBUF_REDISPATCH; */ 4292 m->m_flags &= ~M_HASH; 4293 pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, 4294 &uh->uh_sum, &(*state)->state_key->lan.addr, 4295 (*state)->state_key->lan.port, 1, pd->af); 4296 } 4297 m_copyback(m, off, sizeof(*uh), (caddr_t)uh); 4298 } 4299 4300 return (PF_PASS); 4301 } 4302 4303 int 4304 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, 4305 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) 4306 { 4307 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 4308 u_int16_t icmpid = 0, *icmpsum; 4309 u_int8_t icmptype; 4310 int state_icmp = 0; 4311 struct pf_state_key_cmp key; 4312 4313 switch (pd->proto) { 4314 #ifdef INET 4315 case IPPROTO_ICMP: 4316 icmptype = pd->hdr.icmp->icmp_type; 4317 icmpid = pd->hdr.icmp->icmp_id; 4318 icmpsum = &pd->hdr.icmp->icmp_cksum; 4319 4320 if (icmptype == ICMP_UNREACH || 4321 icmptype == ICMP_SOURCEQUENCH || 4322 icmptype == ICMP_REDIRECT || 4323 icmptype == ICMP_TIMXCEED || 4324 icmptype == ICMP_PARAMPROB) 4325 state_icmp++; 4326 break; 4327 #endif /* INET */ 4328 #ifdef INET6 4329 case IPPROTO_ICMPV6: 4330 icmptype = pd->hdr.icmp6->icmp6_type; 4331 icmpid = pd->hdr.icmp6->icmp6_id; 4332 icmpsum = &pd->hdr.icmp6->icmp6_cksum; 4333 4334 if (icmptype == ICMP6_DST_UNREACH || 4335 icmptype == ICMP6_PACKET_TOO_BIG || 4336 icmptype == ICMP6_TIME_EXCEEDED || 4337 icmptype == ICMP6_PARAM_PROB) 4338 state_icmp++; 4339 break; 4340 #endif /* INET6 */ 4341 } 4342 4343 if (!state_icmp) { 4344 4345 /* 4346 * ICMP query/reply message not related to a TCP/UDP packet. 4347 * Search for an ICMP state. 4348 */ 4349 key.af = pd->af; 4350 key.proto = pd->proto; 4351 if (direction == PF_IN) { 4352 PF_ACPY(&key.ext.addr, pd->src, key.af); 4353 PF_ACPY(&key.gwy.addr, pd->dst, key.af); 4354 key.ext.port = 0; 4355 key.gwy.port = icmpid; 4356 } else { 4357 PF_ACPY(&key.lan.addr, pd->src, key.af); 4358 PF_ACPY(&key.ext.addr, pd->dst, key.af); 4359 key.lan.port = icmpid; 4360 key.ext.port = 0; 4361 } 4362 4363 STATE_LOOKUP(); 4364 4365 (*state)->expire = time_second; 4366 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 4367 4368 /* translate source/destination address, if necessary */ 4369 if (STATE_TRANSLATE((*state)->state_key)) { 4370 if (direction == PF_OUT) { 4371 switch (pd->af) { 4372 #ifdef INET 4373 case AF_INET: 4374 pf_change_a(&saddr->v4.s_addr, 4375 pd->ip_sum, 4376 (*state)->state_key->gwy.addr.v4.s_addr, 0); 4377 pd->hdr.icmp->icmp_cksum = 4378 pf_cksum_fixup( 4379 pd->hdr.icmp->icmp_cksum, icmpid, 4380 (*state)->state_key->gwy.port, 0); 4381 pd->hdr.icmp->icmp_id = 4382 (*state)->state_key->gwy.port; 4383 m_copyback(m, off, ICMP_MINLEN, 4384 (caddr_t)pd->hdr.icmp); 4385 break; 4386 #endif /* INET */ 4387 #ifdef INET6 4388 case AF_INET6: 4389 pf_change_a6(saddr, 4390 &pd->hdr.icmp6->icmp6_cksum, 4391 &(*state)->state_key->gwy.addr, 0); 4392 m_copyback(m, off, 4393 sizeof(struct icmp6_hdr), 4394 (caddr_t)pd->hdr.icmp6); 4395 break; 4396 #endif /* INET6 */ 4397 } 4398 } else { 4399 switch (pd->af) { 4400 #ifdef INET 4401 case AF_INET: 4402 pf_change_a(&daddr->v4.s_addr, 4403 pd->ip_sum, 4404 (*state)->state_key->lan.addr.v4.s_addr, 0); 4405 pd->hdr.icmp->icmp_cksum = 4406 pf_cksum_fixup( 4407 pd->hdr.icmp->icmp_cksum, icmpid, 4408 (*state)->state_key->lan.port, 0); 4409 pd->hdr.icmp->icmp_id = 4410 (*state)->state_key->lan.port; 4411 m_copyback(m, off, ICMP_MINLEN, 4412 (caddr_t)pd->hdr.icmp); 4413 break; 4414 #endif /* INET */ 4415 #ifdef INET6 4416 case AF_INET6: 4417 pf_change_a6(daddr, 4418 &pd->hdr.icmp6->icmp6_cksum, 4419 &(*state)->state_key->lan.addr, 0); 4420 m_copyback(m, off, 4421 sizeof(struct icmp6_hdr), 4422 (caddr_t)pd->hdr.icmp6); 4423 break; 4424 #endif /* INET6 */ 4425 } 4426 } 4427 } 4428 4429 return (PF_PASS); 4430 4431 } else { 4432 /* 4433 * ICMP error message in response to a TCP/UDP packet. 4434 * Extract the inner TCP/UDP header and search for that state. 4435 */ 4436 4437 struct pf_pdesc pd2; 4438 #ifdef INET 4439 struct ip h2; 4440 #endif /* INET */ 4441 #ifdef INET6 4442 struct ip6_hdr h2_6; 4443 int terminal = 0; 4444 #endif /* INET6 */ 4445 int ipoff2; 4446 int off2; 4447 4448 pd2.af = pd->af; 4449 switch (pd->af) { 4450 #ifdef INET 4451 case AF_INET: 4452 /* offset of h2 in mbuf chain */ 4453 ipoff2 = off + ICMP_MINLEN; 4454 4455 if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), 4456 NULL, reason, pd2.af)) { 4457 DPFPRINTF(PF_DEBUG_MISC, 4458 ("pf: ICMP error message too short " 4459 "(ip)\n")); 4460 return (PF_DROP); 4461 } 4462 /* 4463 * ICMP error messages don't refer to non-first 4464 * fragments 4465 */ 4466 if (h2.ip_off & htons(IP_OFFMASK)) { 4467 REASON_SET(reason, PFRES_FRAG); 4468 return (PF_DROP); 4469 } 4470 4471 /* offset of protocol header that follows h2 */ 4472 off2 = ipoff2 + (h2.ip_hl << 2); 4473 4474 pd2.proto = h2.ip_p; 4475 pd2.src = (struct pf_addr *)&h2.ip_src; 4476 pd2.dst = (struct pf_addr *)&h2.ip_dst; 4477 pd2.ip_sum = &h2.ip_sum; 4478 break; 4479 #endif /* INET */ 4480 #ifdef INET6 4481 case AF_INET6: 4482 ipoff2 = off + sizeof(struct icmp6_hdr); 4483 4484 if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), 4485 NULL, reason, pd2.af)) { 4486 DPFPRINTF(PF_DEBUG_MISC, 4487 ("pf: ICMP error message too short " 4488 "(ip6)\n")); 4489 return (PF_DROP); 4490 } 4491 pd2.proto = h2_6.ip6_nxt; 4492 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 4493 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 4494 pd2.ip_sum = NULL; 4495 off2 = ipoff2 + sizeof(h2_6); 4496 do { 4497 switch (pd2.proto) { 4498 case IPPROTO_FRAGMENT: 4499 /* 4500 * ICMPv6 error messages for 4501 * non-first fragments 4502 */ 4503 REASON_SET(reason, PFRES_FRAG); 4504 return (PF_DROP); 4505 case IPPROTO_AH: 4506 case IPPROTO_HOPOPTS: 4507 case IPPROTO_ROUTING: 4508 case IPPROTO_DSTOPTS: { 4509 /* get next header and header length */ 4510 struct ip6_ext opt6; 4511 4512 if (!pf_pull_hdr(m, off2, &opt6, 4513 sizeof(opt6), NULL, reason, 4514 pd2.af)) { 4515 DPFPRINTF(PF_DEBUG_MISC, 4516 ("pf: ICMPv6 short opt\n")); 4517 return (PF_DROP); 4518 } 4519 if (pd2.proto == IPPROTO_AH) 4520 off2 += (opt6.ip6e_len + 2) * 4; 4521 else 4522 off2 += (opt6.ip6e_len + 1) * 8; 4523 pd2.proto = opt6.ip6e_nxt; 4524 /* goto the next header */ 4525 break; 4526 } 4527 default: 4528 terminal++; 4529 break; 4530 } 4531 } while (!terminal); 4532 break; 4533 #endif /* INET6 */ 4534 default: 4535 DPFPRINTF(PF_DEBUG_MISC, 4536 ("pf: ICMP AF %d unknown (ip6)\n", pd->af)); 4537 return (PF_DROP); 4538 break; 4539 } 4540 4541 switch (pd2.proto) { 4542 case IPPROTO_TCP: { 4543 struct tcphdr th; 4544 u_int32_t seq; 4545 struct pf_state_peer *src, *dst; 4546 u_int8_t dws; 4547 int copyback = 0; 4548 4549 /* 4550 * Only the first 8 bytes of the TCP header can be 4551 * expected. Don't access any TCP header fields after 4552 * th_seq, an ackskew test is not possible. 4553 */ 4554 if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, 4555 pd2.af)) { 4556 DPFPRINTF(PF_DEBUG_MISC, 4557 ("pf: ICMP error message too short " 4558 "(tcp)\n")); 4559 return (PF_DROP); 4560 } 4561 4562 key.af = pd2.af; 4563 key.proto = IPPROTO_TCP; 4564 if (direction == PF_IN) { 4565 PF_ACPY(&key.ext.addr, pd2.dst, key.af); 4566 PF_ACPY(&key.gwy.addr, pd2.src, key.af); 4567 key.ext.port = th.th_dport; 4568 key.gwy.port = th.th_sport; 4569 } else { 4570 PF_ACPY(&key.lan.addr, pd2.dst, key.af); 4571 PF_ACPY(&key.ext.addr, pd2.src, key.af); 4572 key.lan.port = th.th_dport; 4573 key.ext.port = th.th_sport; 4574 } 4575 4576 STATE_LOOKUP(); 4577 4578 if (direction == (*state)->state_key->direction) { 4579 src = &(*state)->dst; 4580 dst = &(*state)->src; 4581 } else { 4582 src = &(*state)->src; 4583 dst = &(*state)->dst; 4584 } 4585 4586 if (src->wscale && dst->wscale) 4587 dws = dst->wscale & PF_WSCALE_MASK; 4588 else 4589 dws = 0; 4590 4591 /* Demodulate sequence number */ 4592 seq = ntohl(th.th_seq) - src->seqdiff; 4593 if (src->seqdiff) { 4594 pf_change_a(&th.th_seq, icmpsum, 4595 htonl(seq), 0); 4596 copyback = 1; 4597 } 4598 4599 if (!SEQ_GEQ(src->seqhi, seq) || 4600 !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) { 4601 if (pf_status.debug >= PF_DEBUG_MISC) { 4602 kprintf("pf: BAD ICMP %d:%d ", 4603 icmptype, pd->hdr.icmp->icmp_code); 4604 pf_print_host(pd->src, 0, pd->af); 4605 kprintf(" -> "); 4606 pf_print_host(pd->dst, 0, pd->af); 4607 kprintf(" state: "); 4608 pf_print_state(*state); 4609 kprintf(" seq=%u\n", seq); 4610 } 4611 REASON_SET(reason, PFRES_BADSTATE); 4612 return (PF_DROP); 4613 } 4614 4615 if (STATE_TRANSLATE((*state)->state_key)) { 4616 if (direction == PF_IN) { 4617 pf_change_icmp(pd2.src, &th.th_sport, 4618 daddr, &(*state)->state_key->lan.addr, 4619 (*state)->state_key->lan.port, NULL, 4620 pd2.ip_sum, icmpsum, 4621 pd->ip_sum, 0, pd2.af); 4622 } else { 4623 pf_change_icmp(pd2.dst, &th.th_dport, 4624 saddr, &(*state)->state_key->gwy.addr, 4625 (*state)->state_key->gwy.port, NULL, 4626 pd2.ip_sum, icmpsum, 4627 pd->ip_sum, 0, pd2.af); 4628 } 4629 copyback = 1; 4630 } 4631 4632 if (copyback) { 4633 switch (pd2.af) { 4634 #ifdef INET 4635 case AF_INET: 4636 m_copyback(m, off, ICMP_MINLEN, 4637 (caddr_t)pd->hdr.icmp); 4638 m_copyback(m, ipoff2, sizeof(h2), 4639 (caddr_t)&h2); 4640 break; 4641 #endif /* INET */ 4642 #ifdef INET6 4643 case AF_INET6: 4644 m_copyback(m, off, 4645 sizeof(struct icmp6_hdr), 4646 (caddr_t)pd->hdr.icmp6); 4647 m_copyback(m, ipoff2, sizeof(h2_6), 4648 (caddr_t)&h2_6); 4649 break; 4650 #endif /* INET6 */ 4651 } 4652 m_copyback(m, off2, 8, (caddr_t)&th); 4653 } 4654 4655 return (PF_PASS); 4656 break; 4657 } 4658 case IPPROTO_UDP: { 4659 struct udphdr uh; 4660 4661 if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), 4662 NULL, reason, pd2.af)) { 4663 DPFPRINTF(PF_DEBUG_MISC, 4664 ("pf: ICMP error message too short " 4665 "(udp)\n")); 4666 return (PF_DROP); 4667 } 4668 4669 key.af = pd2.af; 4670 key.proto = IPPROTO_UDP; 4671 if (direction == PF_IN) { 4672 PF_ACPY(&key.ext.addr, pd2.dst, key.af); 4673 PF_ACPY(&key.gwy.addr, pd2.src, key.af); 4674 key.ext.port = uh.uh_dport; 4675 key.gwy.port = uh.uh_sport; 4676 } else { 4677 PF_ACPY(&key.lan.addr, pd2.dst, key.af); 4678 PF_ACPY(&key.ext.addr, pd2.src, key.af); 4679 key.lan.port = uh.uh_dport; 4680 key.ext.port = uh.uh_sport; 4681 } 4682 4683 STATE_LOOKUP(); 4684 4685 if (STATE_TRANSLATE((*state)->state_key)) { 4686 if (direction == PF_IN) { 4687 pf_change_icmp(pd2.src, &uh.uh_sport, 4688 daddr, 4689 &(*state)->state_key->lan.addr, 4690 (*state)->state_key->lan.port, 4691 &uh.uh_sum, 4692 pd2.ip_sum, icmpsum, 4693 pd->ip_sum, 1, pd2.af); 4694 } else { 4695 pf_change_icmp(pd2.dst, &uh.uh_dport, 4696 saddr, 4697 &(*state)->state_key->gwy.addr, 4698 (*state)->state_key->gwy.port, &uh.uh_sum, 4699 pd2.ip_sum, icmpsum, 4700 pd->ip_sum, 1, pd2.af); 4701 } 4702 switch (pd2.af) { 4703 #ifdef INET 4704 case AF_INET: 4705 m_copyback(m, off, ICMP_MINLEN, 4706 (caddr_t)pd->hdr.icmp); 4707 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 4708 break; 4709 #endif /* INET */ 4710 #ifdef INET6 4711 case AF_INET6: 4712 m_copyback(m, off, 4713 sizeof(struct icmp6_hdr), 4714 (caddr_t)pd->hdr.icmp6); 4715 m_copyback(m, ipoff2, sizeof(h2_6), 4716 (caddr_t)&h2_6); 4717 break; 4718 #endif /* INET6 */ 4719 } 4720 m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); 4721 } 4722 4723 return (PF_PASS); 4724 break; 4725 } 4726 #ifdef INET 4727 case IPPROTO_ICMP: { 4728 struct icmp iih; 4729 4730 if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, 4731 NULL, reason, pd2.af)) { 4732 DPFPRINTF(PF_DEBUG_MISC, 4733 ("pf: ICMP error message too short i" 4734 "(icmp)\n")); 4735 return (PF_DROP); 4736 } 4737 4738 key.af = pd2.af; 4739 key.proto = IPPROTO_ICMP; 4740 if (direction == PF_IN) { 4741 PF_ACPY(&key.ext.addr, pd2.dst, key.af); 4742 PF_ACPY(&key.gwy.addr, pd2.src, key.af); 4743 key.ext.port = 0; 4744 key.gwy.port = iih.icmp_id; 4745 } else { 4746 PF_ACPY(&key.lan.addr, pd2.dst, key.af); 4747 PF_ACPY(&key.ext.addr, pd2.src, key.af); 4748 key.lan.port = iih.icmp_id; 4749 key.ext.port = 0; 4750 } 4751 4752 STATE_LOOKUP(); 4753 4754 if (STATE_TRANSLATE((*state)->state_key)) { 4755 if (direction == PF_IN) { 4756 pf_change_icmp(pd2.src, &iih.icmp_id, 4757 daddr, 4758 &(*state)->state_key->lan.addr, 4759 (*state)->state_key->lan.port, NULL, 4760 pd2.ip_sum, icmpsum, 4761 pd->ip_sum, 0, AF_INET); 4762 } else { 4763 pf_change_icmp(pd2.dst, &iih.icmp_id, 4764 saddr, 4765 &(*state)->state_key->gwy.addr, 4766 (*state)->state_key->gwy.port, NULL, 4767 pd2.ip_sum, icmpsum, 4768 pd->ip_sum, 0, AF_INET); 4769 } 4770 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); 4771 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 4772 m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); 4773 } 4774 4775 return (PF_PASS); 4776 break; 4777 } 4778 #endif /* INET */ 4779 #ifdef INET6 4780 case IPPROTO_ICMPV6: { 4781 struct icmp6_hdr iih; 4782 4783 if (!pf_pull_hdr(m, off2, &iih, 4784 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 4785 DPFPRINTF(PF_DEBUG_MISC, 4786 ("pf: ICMP error message too short " 4787 "(icmp6)\n")); 4788 return (PF_DROP); 4789 } 4790 4791 key.af = pd2.af; 4792 key.proto = IPPROTO_ICMPV6; 4793 if (direction == PF_IN) { 4794 PF_ACPY(&key.ext.addr, pd2.dst, key.af); 4795 PF_ACPY(&key.gwy.addr, pd2.src, key.af); 4796 key.ext.port = 0; 4797 key.gwy.port = iih.icmp6_id; 4798 } else { 4799 PF_ACPY(&key.lan.addr, pd2.dst, key.af); 4800 PF_ACPY(&key.ext.addr, pd2.src, key.af); 4801 key.lan.port = iih.icmp6_id; 4802 key.ext.port = 0; 4803 } 4804 4805 STATE_LOOKUP(); 4806 4807 if (STATE_TRANSLATE((*state)->state_key)) { 4808 if (direction == PF_IN) { 4809 pf_change_icmp(pd2.src, &iih.icmp6_id, 4810 daddr, 4811 &(*state)->state_key->lan.addr, 4812 (*state)->state_key->lan.port, NULL, 4813 pd2.ip_sum, icmpsum, 4814 pd->ip_sum, 0, AF_INET6); 4815 } else { 4816 pf_change_icmp(pd2.dst, &iih.icmp6_id, 4817 saddr, &(*state)->state_key->gwy.addr, 4818 (*state)->state_key->gwy.port, NULL, 4819 pd2.ip_sum, icmpsum, 4820 pd->ip_sum, 0, AF_INET6); 4821 } 4822 m_copyback(m, off, sizeof(struct icmp6_hdr), 4823 (caddr_t)pd->hdr.icmp6); 4824 m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); 4825 m_copyback(m, off2, sizeof(struct icmp6_hdr), 4826 (caddr_t)&iih); 4827 } 4828 4829 return (PF_PASS); 4830 break; 4831 } 4832 #endif /* INET6 */ 4833 default: { 4834 key.af = pd2.af; 4835 key.proto = pd2.proto; 4836 if (direction == PF_IN) { 4837 PF_ACPY(&key.ext.addr, pd2.dst, key.af); 4838 PF_ACPY(&key.gwy.addr, pd2.src, key.af); 4839 key.ext.port = 0; 4840 key.gwy.port = 0; 4841 } else { 4842 PF_ACPY(&key.lan.addr, pd2.dst, key.af); 4843 PF_ACPY(&key.ext.addr, pd2.src, key.af); 4844 key.lan.port = 0; 4845 key.ext.port = 0; 4846 } 4847 4848 STATE_LOOKUP(); 4849 4850 if (STATE_TRANSLATE((*state)->state_key)) { 4851 if (direction == PF_IN) { 4852 pf_change_icmp(pd2.src, NULL, 4853 daddr, 4854 &(*state)->state_key->lan.addr, 4855 0, NULL, 4856 pd2.ip_sum, icmpsum, 4857 pd->ip_sum, 0, pd2.af); 4858 } else { 4859 pf_change_icmp(pd2.dst, NULL, 4860 saddr, 4861 &(*state)->state_key->gwy.addr, 4862 0, NULL, 4863 pd2.ip_sum, icmpsum, 4864 pd->ip_sum, 0, pd2.af); 4865 } 4866 switch (pd2.af) { 4867 #ifdef INET 4868 case AF_INET: 4869 m_copyback(m, off, ICMP_MINLEN, 4870 (caddr_t)pd->hdr.icmp); 4871 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 4872 break; 4873 #endif /* INET */ 4874 #ifdef INET6 4875 case AF_INET6: 4876 m_copyback(m, off, 4877 sizeof(struct icmp6_hdr), 4878 (caddr_t)pd->hdr.icmp6); 4879 m_copyback(m, ipoff2, sizeof(h2_6), 4880 (caddr_t)&h2_6); 4881 break; 4882 #endif /* INET6 */ 4883 } 4884 } 4885 4886 return (PF_PASS); 4887 break; 4888 } 4889 } 4890 } 4891 } 4892 4893 int 4894 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, 4895 struct pf_pdesc *pd) 4896 { 4897 struct pf_state_peer *src, *dst; 4898 struct pf_state_key_cmp key; 4899 4900 key.af = pd->af; 4901 key.proto = pd->proto; 4902 if (direction == PF_IN) { 4903 PF_ACPY(&key.ext.addr, pd->src, key.af); 4904 PF_ACPY(&key.gwy.addr, pd->dst, key.af); 4905 key.ext.port = 0; 4906 key.gwy.port = 0; 4907 } else { 4908 PF_ACPY(&key.lan.addr, pd->src, key.af); 4909 PF_ACPY(&key.ext.addr, pd->dst, key.af); 4910 key.lan.port = 0; 4911 key.ext.port = 0; 4912 } 4913 4914 STATE_LOOKUP(); 4915 4916 if (direction == (*state)->state_key->direction) { 4917 src = &(*state)->src; 4918 dst = &(*state)->dst; 4919 } else { 4920 src = &(*state)->dst; 4921 dst = &(*state)->src; 4922 } 4923 4924 /* update states */ 4925 if (src->state < PFOTHERS_SINGLE) 4926 src->state = PFOTHERS_SINGLE; 4927 if (dst->state == PFOTHERS_SINGLE) 4928 dst->state = PFOTHERS_MULTIPLE; 4929 4930 /* update expire time */ 4931 (*state)->expire = time_second; 4932 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) 4933 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4934 else 4935 (*state)->timeout = PFTM_OTHER_SINGLE; 4936 4937 /* translate source/destination address, if necessary */ 4938 if (STATE_TRANSLATE((*state)->state_key)) { 4939 if (direction == PF_OUT) 4940 switch (pd->af) { 4941 #ifdef INET 4942 case AF_INET: 4943 pf_change_a(&pd->src->v4.s_addr, 4944 pd->ip_sum, 4945 (*state)->state_key->gwy.addr.v4.s_addr, 4946 0); 4947 break; 4948 #endif /* INET */ 4949 #ifdef INET6 4950 case AF_INET6: 4951 PF_ACPY(pd->src, 4952 &(*state)->state_key->gwy.addr, pd->af); 4953 break; 4954 #endif /* INET6 */ 4955 } 4956 else 4957 switch (pd->af) { 4958 #ifdef INET 4959 case AF_INET: 4960 pf_change_a(&pd->dst->v4.s_addr, 4961 pd->ip_sum, 4962 (*state)->state_key->lan.addr.v4.s_addr, 4963 0); 4964 break; 4965 #endif /* INET */ 4966 #ifdef INET6 4967 case AF_INET6: 4968 PF_ACPY(pd->dst, 4969 &(*state)->state_key->lan.addr, pd->af); 4970 break; 4971 #endif /* INET6 */ 4972 } 4973 } 4974 4975 return (PF_PASS); 4976 } 4977 4978 /* 4979 * ipoff and off are measured from the start of the mbuf chain. 4980 * h must be at "ipoff" on the mbuf chain. 4981 */ 4982 void * 4983 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 4984 u_short *actionp, u_short *reasonp, sa_family_t af) 4985 { 4986 switch (af) { 4987 #ifdef INET 4988 case AF_INET: { 4989 struct ip *h = mtod(m, struct ip *); 4990 u_int16_t fragoff = (h->ip_off & IP_OFFMASK) << 3; 4991 4992 if (fragoff) { 4993 if (fragoff >= len) 4994 ACTION_SET(actionp, PF_PASS); 4995 else { 4996 ACTION_SET(actionp, PF_DROP); 4997 REASON_SET(reasonp, PFRES_FRAG); 4998 } 4999 return (NULL); 5000 } 5001 if (m->m_pkthdr.len < off + len || 5002 h->ip_len < off + len) { 5003 ACTION_SET(actionp, PF_DROP); 5004 REASON_SET(reasonp, PFRES_SHORT); 5005 return (NULL); 5006 } 5007 break; 5008 } 5009 #endif /* INET */ 5010 #ifdef INET6 5011 case AF_INET6: { 5012 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5013 5014 if (m->m_pkthdr.len < off + len || 5015 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < 5016 (unsigned)(off + len)) { 5017 ACTION_SET(actionp, PF_DROP); 5018 REASON_SET(reasonp, PFRES_SHORT); 5019 return (NULL); 5020 } 5021 break; 5022 } 5023 #endif /* INET6 */ 5024 } 5025 m_copydata(m, off, len, p); 5026 return (p); 5027 } 5028 5029 int 5030 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) 5031 { 5032 struct sockaddr_in *dst; 5033 int ret = 1; 5034 int check_mpath; 5035 #ifdef INET6 5036 struct sockaddr_in6 *dst6; 5037 struct route_in6 ro; 5038 #else 5039 struct route ro; 5040 #endif 5041 struct radix_node *rn; 5042 struct rtentry *rt; 5043 struct ifnet *ifp; 5044 5045 check_mpath = 0; 5046 bzero(&ro, sizeof(ro)); 5047 switch (af) { 5048 case AF_INET: 5049 dst = satosin(&ro.ro_dst); 5050 dst->sin_family = AF_INET; 5051 dst->sin_len = sizeof(*dst); 5052 dst->sin_addr = addr->v4; 5053 break; 5054 #ifdef INET6 5055 case AF_INET6: 5056 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 5057 dst6->sin6_family = AF_INET6; 5058 dst6->sin6_len = sizeof(*dst6); 5059 dst6->sin6_addr = addr->v6; 5060 break; 5061 #endif /* INET6 */ 5062 default: 5063 return (0); 5064 } 5065 5066 /* Skip checks for ipsec interfaces */ 5067 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5068 goto out; 5069 5070 rtalloc_ign((struct route *)&ro, 0); 5071 5072 if (ro.ro_rt != NULL) { 5073 /* No interface given, this is a no-route check */ 5074 if (kif == NULL) 5075 goto out; 5076 5077 if (kif->pfik_ifp == NULL) { 5078 ret = 0; 5079 goto out; 5080 } 5081 5082 /* Perform uRPF check if passed input interface */ 5083 ret = 0; 5084 rn = (struct radix_node *)ro.ro_rt; 5085 do { 5086 rt = (struct rtentry *)rn; 5087 ifp = rt->rt_ifp; 5088 5089 if (kif->pfik_ifp == ifp) 5090 ret = 1; 5091 rn = NULL; 5092 } while (check_mpath == 1 && rn != NULL && ret == 0); 5093 } else 5094 ret = 0; 5095 out: 5096 if (ro.ro_rt != NULL) 5097 RTFREE(ro.ro_rt); 5098 return (ret); 5099 } 5100 5101 int 5102 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) 5103 { 5104 struct sockaddr_in *dst; 5105 #ifdef INET6 5106 struct sockaddr_in6 *dst6; 5107 struct route_in6 ro; 5108 #else 5109 struct route ro; 5110 #endif 5111 int ret = 0; 5112 5113 ASSERT_LWKT_TOKEN_HELD(&pf_token); 5114 5115 bzero(&ro, sizeof(ro)); 5116 switch (af) { 5117 case AF_INET: 5118 dst = satosin(&ro.ro_dst); 5119 dst->sin_family = AF_INET; 5120 dst->sin_len = sizeof(*dst); 5121 dst->sin_addr = addr->v4; 5122 break; 5123 #ifdef INET6 5124 case AF_INET6: 5125 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 5126 dst6->sin6_family = AF_INET6; 5127 dst6->sin6_len = sizeof(*dst6); 5128 dst6->sin6_addr = addr->v6; 5129 break; 5130 #endif /* INET6 */ 5131 default: 5132 return (0); 5133 } 5134 5135 rtalloc_ign((struct route *)&ro, (RTF_CLONING | RTF_PRCLONING)); 5136 5137 if (ro.ro_rt != NULL) { 5138 RTFREE(ro.ro_rt); 5139 } 5140 5141 return (ret); 5142 } 5143 5144 #ifdef INET 5145 void 5146 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5147 struct pf_state *s, struct pf_pdesc *pd) 5148 { 5149 struct mbuf *m0, *m1; 5150 struct route iproute; 5151 struct route *ro = NULL; 5152 struct sockaddr_in *dst; 5153 struct ip *ip; 5154 struct ifnet *ifp = NULL; 5155 struct pf_addr naddr; 5156 struct pf_src_node *sn = NULL; 5157 int error = 0; 5158 int sw_csum; 5159 #ifdef IPSEC 5160 struct m_tag *mtag; 5161 #endif /* IPSEC */ 5162 5163 ASSERT_LWKT_TOKEN_HELD(&pf_token); 5164 5165 if (m == NULL || *m == NULL || r == NULL || 5166 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5167 panic("pf_route: invalid parameters"); 5168 5169 if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) { 5170 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED; 5171 (*m)->m_pkthdr.pf.routed = 1; 5172 } else { 5173 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5174 m0 = *m; 5175 *m = NULL; 5176 goto bad; 5177 } 5178 } 5179 5180 if (r->rt == PF_DUPTO) { 5181 if ((m0 = m_dup(*m, MB_DONTWAIT)) == NULL) { 5182 return; 5183 } 5184 } else { 5185 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { 5186 return; 5187 } 5188 m0 = *m; 5189 } 5190 5191 if (m0->m_len < sizeof(struct ip)) { 5192 DPFPRINTF(PF_DEBUG_URGENT, 5193 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 5194 goto bad; 5195 } 5196 5197 ip = mtod(m0, struct ip *); 5198 5199 ro = &iproute; 5200 bzero((caddr_t)ro, sizeof(*ro)); 5201 dst = satosin(&ro->ro_dst); 5202 dst->sin_family = AF_INET; 5203 dst->sin_len = sizeof(*dst); 5204 dst->sin_addr = ip->ip_dst; 5205 5206 if (r->rt == PF_FASTROUTE) { 5207 rtalloc(ro); 5208 if (ro->ro_rt == 0) { 5209 ipstat.ips_noroute++; 5210 goto bad; 5211 } 5212 5213 ifp = ro->ro_rt->rt_ifp; 5214 ro->ro_rt->rt_use++; 5215 5216 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 5217 dst = satosin(ro->ro_rt->rt_gateway); 5218 } else { 5219 if (TAILQ_EMPTY(&r->rpool.list)) { 5220 DPFPRINTF(PF_DEBUG_URGENT, 5221 ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); 5222 goto bad; 5223 } 5224 if (s == NULL) { 5225 pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, 5226 &naddr, NULL, &sn); 5227 if (!PF_AZERO(&naddr, AF_INET)) 5228 dst->sin_addr.s_addr = naddr.v4.s_addr; 5229 ifp = r->rpool.cur->kif ? 5230 r->rpool.cur->kif->pfik_ifp : NULL; 5231 } else { 5232 if (!PF_AZERO(&s->rt_addr, AF_INET)) 5233 dst->sin_addr.s_addr = 5234 s->rt_addr.v4.s_addr; 5235 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5236 } 5237 } 5238 if (ifp == NULL) 5239 goto bad; 5240 5241 if (oifp != ifp) { 5242 crit_exit(); 5243 if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { 5244 crit_enter(); 5245 goto bad; 5246 } else if (m0 == NULL) { 5247 crit_enter(); 5248 goto done; 5249 } 5250 crit_enter(); 5251 if (m0->m_len < sizeof(struct ip)) { 5252 DPFPRINTF(PF_DEBUG_URGENT, 5253 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 5254 goto bad; 5255 } 5256 ip = mtod(m0, struct ip *); 5257 } 5258 5259 /* Copied from FreeBSD 5.1-CURRENT ip_output. */ 5260 m0->m_pkthdr.csum_flags |= CSUM_IP; 5261 sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist; 5262 if (sw_csum & CSUM_DELAY_DATA) { 5263 in_delayed_cksum(m0); 5264 sw_csum &= ~CSUM_DELAY_DATA; 5265 } 5266 m0->m_pkthdr.csum_flags &= ifp->if_hwassist; 5267 5268 if (ip->ip_len <= ifp->if_mtu || 5269 (ifp->if_hwassist & CSUM_FRAGMENT && 5270 (ip->ip_off & IP_DF) == 0)) { 5271 ip->ip_len = htons(ip->ip_len); 5272 ip->ip_off = htons(ip->ip_off); 5273 ip->ip_sum = 0; 5274 if (sw_csum & CSUM_DELAY_IP) { 5275 /* From KAME */ 5276 if (ip->ip_v == IPVERSION && 5277 (ip->ip_hl << 2) == sizeof(*ip)) { 5278 ip->ip_sum = in_cksum_hdr(ip); 5279 } else { 5280 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 5281 } 5282 } 5283 lwkt_reltoken(&pf_token); 5284 crit_exit(); 5285 error = ifp->if_output(ifp, m0, sintosa(dst), ro->ro_rt); 5286 crit_enter(); 5287 lwkt_gettoken(&pf_token); 5288 goto done; 5289 } 5290 5291 /* 5292 * Too large for interface; fragment if possible. 5293 * Must be able to put at least 8 bytes per fragment. 5294 */ 5295 if (ip->ip_off & IP_DF) { 5296 ipstat.ips_cantfrag++; 5297 if (r->rt != PF_DUPTO) { 5298 crit_exit(); 5299 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, 5300 ifp->if_mtu); 5301 crit_enter(); 5302 goto done; 5303 } else 5304 goto bad; 5305 } 5306 5307 m1 = m0; 5308 error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum); 5309 if (error) { 5310 goto bad; 5311 } 5312 5313 for (m0 = m1; m0; m0 = m1) { 5314 m1 = m0->m_nextpkt; 5315 m0->m_nextpkt = 0; 5316 if (error == 0) { 5317 lwkt_reltoken(&pf_token); 5318 crit_exit(); 5319 error = (*ifp->if_output)(ifp, m0, sintosa(dst), 5320 NULL); 5321 crit_enter(); 5322 lwkt_gettoken(&pf_token); 5323 } else 5324 m_freem(m0); 5325 } 5326 5327 if (error == 0) 5328 ipstat.ips_fragmented++; 5329 5330 done: 5331 if (r->rt != PF_DUPTO) 5332 *m = NULL; 5333 if (ro == &iproute && ro->ro_rt) 5334 RTFREE(ro->ro_rt); 5335 return; 5336 5337 bad: 5338 m_freem(m0); 5339 goto done; 5340 } 5341 #endif /* INET */ 5342 5343 #ifdef INET6 5344 void 5345 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 5346 struct pf_state *s, struct pf_pdesc *pd) 5347 { 5348 struct mbuf *m0; 5349 struct route_in6 ip6route; 5350 struct route_in6 *ro; 5351 struct sockaddr_in6 *dst; 5352 struct ip6_hdr *ip6; 5353 struct ifnet *ifp = NULL; 5354 struct pf_addr naddr; 5355 struct pf_src_node *sn = NULL; 5356 int error = 0; 5357 5358 if (m == NULL || *m == NULL || r == NULL || 5359 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 5360 panic("pf_route6: invalid parameters"); 5361 5362 if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) { 5363 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED; 5364 (*m)->m_pkthdr.pf.routed = 1; 5365 } else { 5366 if ((*m)->m_pkthdr.pf.routed++ > 3) { 5367 m0 = *m; 5368 *m = NULL; 5369 goto bad; 5370 } 5371 } 5372 5373 if (r->rt == PF_DUPTO) { 5374 if ((m0 = m_dup(*m, MB_DONTWAIT)) == NULL) 5375 return; 5376 } else { 5377 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 5378 return; 5379 m0 = *m; 5380 } 5381 5382 if (m0->m_len < sizeof(struct ip6_hdr)) { 5383 DPFPRINTF(PF_DEBUG_URGENT, 5384 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 5385 goto bad; 5386 } 5387 ip6 = mtod(m0, struct ip6_hdr *); 5388 5389 ro = &ip6route; 5390 bzero((caddr_t)ro, sizeof(*ro)); 5391 dst = (struct sockaddr_in6 *)&ro->ro_dst; 5392 dst->sin6_family = AF_INET6; 5393 dst->sin6_len = sizeof(*dst); 5394 dst->sin6_addr = ip6->ip6_dst; 5395 5396 /* Cheat. XXX why only in the v6 case??? */ 5397 if (r->rt == PF_FASTROUTE) { 5398 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 5399 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); 5400 crit_enter(); 5401 return; 5402 } 5403 5404 if (TAILQ_EMPTY(&r->rpool.list)) { 5405 DPFPRINTF(PF_DEBUG_URGENT, 5406 ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); 5407 goto bad; 5408 } 5409 if (s == NULL) { 5410 pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 5411 &naddr, NULL, &sn); 5412 if (!PF_AZERO(&naddr, AF_INET6)) 5413 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5414 &naddr, AF_INET6); 5415 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; 5416 } else { 5417 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 5418 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 5419 &s->rt_addr, AF_INET6); 5420 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 5421 } 5422 if (ifp == NULL) 5423 goto bad; 5424 5425 if (oifp != ifp) { 5426 crit_exit(); 5427 if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { 5428 crit_enter(); 5429 goto bad; 5430 } else if (m0 == NULL) { 5431 crit_enter(); 5432 goto done; 5433 } 5434 crit_enter(); 5435 if (m0->m_len < sizeof(struct ip6_hdr)) { 5436 DPFPRINTF(PF_DEBUG_URGENT, 5437 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 5438 goto bad; 5439 } 5440 ip6 = mtod(m0, struct ip6_hdr *); 5441 } 5442 5443 /* 5444 * If the packet is too large for the outgoing interface, 5445 * send back an icmp6 error. 5446 */ 5447 if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr)) 5448 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 5449 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 5450 crit_exit(); 5451 error = nd6_output(ifp, ifp, m0, dst, NULL); 5452 crit_enter(); 5453 } else { 5454 in6_ifstat_inc(ifp, ifs6_in_toobig); 5455 if (r->rt != PF_DUPTO) { 5456 crit_exit(); 5457 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); 5458 crit_enter(); 5459 } else 5460 goto bad; 5461 } 5462 5463 done: 5464 if (r->rt != PF_DUPTO) 5465 *m = NULL; 5466 return; 5467 5468 bad: 5469 m_freem(m0); 5470 goto done; 5471 } 5472 5473 #endif /* INET6 */ 5474 5475 5476 /* 5477 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag 5478 * off is the offset where the protocol header starts 5479 * len is the total length of protocol header plus payload 5480 * returns 0 when the checksum is valid, otherwise returns 1. 5481 */ 5482 /* 5483 * XXX 5484 * FreeBSD supports cksum offload for the following drivers. 5485 * em(4), gx(4), lge(4), nge(4), ti(4), xl(4) 5486 * If we can make full use of it we would outperform ipfw/ipfilter in 5487 * very heavy traffic. 5488 * I have not tested 'cause I don't have NICs that supports cksum offload. 5489 * (There might be problems. Typical phenomena would be 5490 * 1. No route message for UDP packet. 5491 * 2. No connection acceptance from external hosts regardless of rule set.) 5492 */ 5493 int 5494 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, 5495 sa_family_t af) 5496 { 5497 u_int16_t sum = 0; 5498 int hw_assist = 0; 5499 struct ip *ip; 5500 5501 if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) 5502 return (1); 5503 if (m->m_pkthdr.len < off + len) 5504 return (1); 5505 5506 switch (p) { 5507 case IPPROTO_TCP: 5508 case IPPROTO_UDP: 5509 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 5510 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { 5511 sum = m->m_pkthdr.csum_data; 5512 } else { 5513 ip = mtod(m, struct ip *); 5514 sum = in_pseudo(ip->ip_src.s_addr, 5515 ip->ip_dst.s_addr, htonl((u_short)len + 5516 m->m_pkthdr.csum_data + p)); 5517 } 5518 sum ^= 0xffff; 5519 ++hw_assist; 5520 } 5521 break; 5522 case IPPROTO_ICMP: 5523 #ifdef INET6 5524 case IPPROTO_ICMPV6: 5525 #endif /* INET6 */ 5526 break; 5527 default: 5528 return (1); 5529 } 5530 5531 if (!hw_assist) { 5532 switch (af) { 5533 case AF_INET: 5534 if (p == IPPROTO_ICMP) { 5535 if (m->m_len < off) 5536 return (1); 5537 m->m_data += off; 5538 m->m_len -= off; 5539 sum = in_cksum(m, len); 5540 m->m_data -= off; 5541 m->m_len += off; 5542 } else { 5543 if (m->m_len < sizeof(struct ip)) 5544 return (1); 5545 sum = in_cksum_range(m, p, off, len); 5546 if (sum == 0) { 5547 m->m_pkthdr.csum_flags |= 5548 (CSUM_DATA_VALID | 5549 CSUM_PSEUDO_HDR); 5550 m->m_pkthdr.csum_data = 0xffff; 5551 } 5552 } 5553 break; 5554 #ifdef INET6 5555 case AF_INET6: 5556 if (m->m_len < sizeof(struct ip6_hdr)) 5557 return (1); 5558 sum = in6_cksum(m, p, off, len); 5559 /* 5560 * XXX 5561 * IPv6 H/W cksum off-load not supported yet! 5562 * 5563 * if (sum == 0) { 5564 * m->m_pkthdr.csum_flags |= 5565 * (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 5566 * m->m_pkthdr.csum_data = 0xffff; 5567 *} 5568 */ 5569 break; 5570 #endif /* INET6 */ 5571 default: 5572 return (1); 5573 } 5574 } 5575 if (sum) { 5576 switch (p) { 5577 case IPPROTO_TCP: 5578 tcpstat.tcps_rcvbadsum++; 5579 break; 5580 case IPPROTO_UDP: 5581 udpstat.udps_badsum++; 5582 break; 5583 case IPPROTO_ICMP: 5584 icmpstat.icps_checksum++; 5585 break; 5586 #ifdef INET6 5587 case IPPROTO_ICMPV6: 5588 icmp6stat.icp6s_checksum++; 5589 break; 5590 #endif /* INET6 */ 5591 } 5592 return (1); 5593 } 5594 return (0); 5595 } 5596 5597 #ifdef INET 5598 int 5599 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, 5600 struct ether_header *eh, struct inpcb *inp) 5601 { 5602 struct pfi_kif *kif; 5603 u_short action, reason = 0, log = 0; 5604 struct mbuf *m = *m0; 5605 struct ip *h = NULL; 5606 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 5607 struct pf_state *s = NULL; 5608 struct pf_state_key *sk = NULL; 5609 struct pf_ruleset *ruleset = NULL; 5610 struct pf_pdesc pd; 5611 int off, dirndx, pqid = 0; 5612 5613 if (!pf_status.running) 5614 return (PF_PASS); 5615 5616 memset(&pd, 0, sizeof(pd)); 5617 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 5618 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 5619 else 5620 kif = (struct pfi_kif *)ifp->if_pf_kif; 5621 5622 if (kif == NULL) { 5623 DPFPRINTF(PF_DEBUG_URGENT, 5624 ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); 5625 return (PF_DROP); 5626 } 5627 if (kif->pfik_flags & PFI_IFLAG_SKIP) 5628 return (PF_PASS); 5629 5630 #ifdef DIAGNOSTIC 5631 if ((m->m_flags & M_PKTHDR) == 0) 5632 panic("non-M_PKTHDR is passed to pf_test"); 5633 #endif /* DIAGNOSTIC */ 5634 5635 if (m->m_pkthdr.len < (int)sizeof(*h)) { 5636 action = PF_DROP; 5637 REASON_SET(&reason, PFRES_SHORT); 5638 log = 1; 5639 goto done; 5640 } 5641 5642 if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) 5643 return (PF_PASS); 5644 5645 /* We do IP header normalization and packet reassembly here */ 5646 if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { 5647 action = PF_DROP; 5648 goto done; 5649 } 5650 m = *m0; /* pf_normalize messes with m0 */ 5651 h = mtod(m, struct ip *); 5652 5653 off = h->ip_hl << 2; 5654 if (off < (int)sizeof(*h)) { 5655 action = PF_DROP; 5656 REASON_SET(&reason, PFRES_SHORT); 5657 log = 1; 5658 goto done; 5659 } 5660 5661 pd.src = (struct pf_addr *)&h->ip_src; 5662 pd.dst = (struct pf_addr *)&h->ip_dst; 5663 PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET); 5664 pd.ip_sum = &h->ip_sum; 5665 pd.proto = h->ip_p; 5666 pd.af = AF_INET; 5667 pd.tos = h->ip_tos; 5668 pd.tot_len = h->ip_len; 5669 pd.eh = eh; 5670 5671 /* handle fragments that didn't get reassembled by normalization */ 5672 if (h->ip_off & (IP_MF | IP_OFFMASK)) { 5673 action = pf_test_fragment(&r, dir, kif, m, h, 5674 &pd, &a, &ruleset); 5675 goto done; 5676 } 5677 5678 switch (h->ip_p) { 5679 5680 case IPPROTO_TCP: { 5681 struct tcphdr th; 5682 5683 pd.hdr.tcp = &th; 5684 if (!pf_pull_hdr(m, off, &th, sizeof(th), 5685 &action, &reason, AF_INET)) { 5686 log = action != PF_PASS; 5687 goto done; 5688 } 5689 pd.p_len = pd.tot_len - off - (th.th_off << 2); 5690 if ((th.th_flags & TH_ACK) && pd.p_len == 0) 5691 pqid = 1; 5692 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 5693 if (action == PF_DROP) 5694 goto done; 5695 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 5696 &reason); 5697 if (action == PF_PASS) { 5698 #if NPFSYNC 5699 pfsync_update_state(s); 5700 #endif /* NPFSYNC */ 5701 r = s->rule.ptr; 5702 a = s->anchor.ptr; 5703 log = s->log; 5704 } else if (s == NULL) 5705 action = pf_test_rule(&r, &s, dir, kif, 5706 m, off, h, &pd, &a, &ruleset, NULL, inp); 5707 break; 5708 } 5709 5710 case IPPROTO_UDP: { 5711 struct udphdr uh; 5712 5713 pd.hdr.udp = &uh; 5714 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 5715 &action, &reason, AF_INET)) { 5716 log = action != PF_PASS; 5717 goto done; 5718 } 5719 if (uh.uh_dport == 0 || 5720 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 5721 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 5722 action = PF_DROP; 5723 REASON_SET(&reason, PFRES_SHORT); 5724 goto done; 5725 } 5726 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 5727 if (action == PF_PASS) { 5728 #if NPFSYNC 5729 pfsync_update_state(s); 5730 #endif /* NPFSYNC */ 5731 r = s->rule.ptr; 5732 a = s->anchor.ptr; 5733 log = s->log; 5734 } else if (s == NULL) 5735 action = pf_test_rule(&r, &s, dir, kif, 5736 m, off, h, &pd, &a, &ruleset, NULL, inp); 5737 break; 5738 } 5739 5740 case IPPROTO_ICMP: { 5741 struct icmp ih; 5742 5743 pd.hdr.icmp = &ih; 5744 if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, 5745 &action, &reason, AF_INET)) { 5746 log = action != PF_PASS; 5747 goto done; 5748 } 5749 action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, 5750 &reason); 5751 if (action == PF_PASS) { 5752 #if NPFSYNC 5753 pfsync_update_state(s); 5754 #endif /* NPFSYNC */ 5755 r = s->rule.ptr; 5756 a = s->anchor.ptr; 5757 log = s->log; 5758 } else if (s == NULL) 5759 action = pf_test_rule(&r, &s, dir, kif, 5760 m, off, h, &pd, &a, &ruleset, NULL, inp); 5761 break; 5762 } 5763 5764 default: 5765 action = pf_test_state_other(&s, dir, kif, &pd); 5766 if (action == PF_PASS) { 5767 #if NPFSYNC 5768 pfsync_update_state(s); 5769 #endif /* NPFSYNC */ 5770 r = s->rule.ptr; 5771 a = s->anchor.ptr; 5772 log = s->log; 5773 } else if (s == NULL) 5774 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 5775 &pd, &a, &ruleset, NULL, inp); 5776 break; 5777 } 5778 5779 done: 5780 if (action == PF_PASS && h->ip_hl > 5 && 5781 !((s && s->allow_opts) || r->allow_opts)) { 5782 action = PF_DROP; 5783 REASON_SET(&reason, PFRES_IPOPTIONS); 5784 log = 1; 5785 DPFPRINTF(PF_DEBUG_MISC, 5786 ("pf: dropping packet with ip options\n")); 5787 } 5788 5789 if ((s && s->tag) || r->rtableid) 5790 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 5791 5792 #ifdef ALTQ 5793 if (action == PF_PASS && r->qid) { 5794 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 5795 if (pqid || (pd.tos & IPTOS_LOWDELAY)) 5796 m->m_pkthdr.pf.qid = r->pqid; 5797 else 5798 m->m_pkthdr.pf.qid = r->qid; 5799 m->m_pkthdr.pf.ecn_af = AF_INET; 5800 m->m_pkthdr.pf.hdr = h; 5801 /* add connection hash for fairq */ 5802 if (s) { 5803 /* for fairq */ 5804 m->m_pkthdr.pf.state_hash = s->hash; 5805 m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED; 5806 } 5807 } 5808 #endif /* ALTQ */ 5809 5810 /* 5811 * connections redirected to loopback should not match sockets 5812 * bound specifically to loopback due to security implications, 5813 * see tcp_input() and in_pcblookup_listen(). 5814 */ 5815 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 5816 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 5817 (s->nat_rule.ptr->action == PF_RDR || 5818 s->nat_rule.ptr->action == PF_BINAT) && 5819 (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) 5820 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 5821 5822 if (log) { 5823 struct pf_rule *lr; 5824 5825 if (s != NULL && s->nat_rule.ptr != NULL && 5826 s->nat_rule.ptr->log & PF_LOG_ALL) 5827 lr = s->nat_rule.ptr; 5828 else 5829 lr = r; 5830 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, 5831 &pd); 5832 } 5833 5834 kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 5835 kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; 5836 5837 if (action == PF_PASS || r->action == PF_DROP) { 5838 dirndx = (dir == PF_OUT); 5839 r->packets[dirndx]++; 5840 r->bytes[dirndx] += pd.tot_len; 5841 if (a != NULL) { 5842 a->packets[dirndx]++; 5843 a->bytes[dirndx] += pd.tot_len; 5844 } 5845 if (s != NULL) { 5846 sk = s->state_key; 5847 if (s->nat_rule.ptr != NULL) { 5848 s->nat_rule.ptr->packets[dirndx]++; 5849 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 5850 } 5851 if (s->src_node != NULL) { 5852 s->src_node->packets[dirndx]++; 5853 s->src_node->bytes[dirndx] += pd.tot_len; 5854 } 5855 if (s->nat_src_node != NULL) { 5856 s->nat_src_node->packets[dirndx]++; 5857 s->nat_src_node->bytes[dirndx] += pd.tot_len; 5858 } 5859 dirndx = (dir == sk->direction) ? 0 : 1; 5860 s->packets[dirndx]++; 5861 s->bytes[dirndx] += pd.tot_len; 5862 } 5863 tr = r; 5864 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 5865 if (nr != NULL) { 5866 struct pf_addr *x; 5867 /* 5868 * XXX: we need to make sure that the addresses 5869 * passed to pfr_update_stats() are the same than 5870 * the addresses used during matching (pfr_match) 5871 */ 5872 if (r == &pf_default_rule) { 5873 tr = nr; 5874 x = (sk == NULL || sk->direction == dir) ? 5875 &pd.baddr : &pd.naddr; 5876 } else 5877 x = (sk == NULL || sk->direction == dir) ? 5878 &pd.naddr : &pd.baddr; 5879 if (x == &pd.baddr || s == NULL) { 5880 /* we need to change the address */ 5881 if (dir == PF_OUT) 5882 pd.src = x; 5883 else 5884 pd.dst = x; 5885 } 5886 } 5887 if (tr->src.addr.type == PF_ADDR_TABLE) 5888 pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL || 5889 sk->direction == dir) ? 5890 pd.src : pd.dst, pd.af, 5891 pd.tot_len, dir == PF_OUT, r->action == PF_PASS, 5892 tr->src.neg); 5893 if (tr->dst.addr.type == PF_ADDR_TABLE) 5894 pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL || 5895 sk->direction == dir) ? pd.dst : pd.src, pd.af, 5896 pd.tot_len, dir == PF_OUT, r->action == PF_PASS, 5897 tr->dst.neg); 5898 } 5899 5900 5901 if (action == PF_SYNPROXY_DROP) { 5902 m_freem(*m0); 5903 *m0 = NULL; 5904 action = PF_PASS; 5905 } else if (r->rt) 5906 /* pf_route can free the mbuf causing *m0 to become NULL */ 5907 pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); 5908 5909 return (action); 5910 } 5911 #endif /* INET */ 5912 5913 #ifdef INET6 5914 int 5915 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, 5916 struct ether_header *eh, struct inpcb *inp) 5917 { 5918 struct pfi_kif *kif; 5919 u_short action, reason = 0, log = 0; 5920 struct mbuf *m = *m0, *n = NULL; 5921 struct ip6_hdr *h = NULL; 5922 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 5923 struct pf_state *s = NULL; 5924 struct pf_state_key *sk = NULL; 5925 struct pf_ruleset *ruleset = NULL; 5926 struct pf_pdesc pd; 5927 int off, terminal = 0, dirndx, rh_cnt = 0; 5928 5929 if (!pf_status.running) 5930 return (PF_PASS); 5931 5932 memset(&pd, 0, sizeof(pd)); 5933 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 5934 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 5935 else 5936 kif = (struct pfi_kif *)ifp->if_pf_kif; 5937 5938 if (kif == NULL) { 5939 DPFPRINTF(PF_DEBUG_URGENT, 5940 ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); 5941 return (PF_DROP); 5942 } 5943 if (kif->pfik_flags & PFI_IFLAG_SKIP) 5944 return (PF_PASS); 5945 5946 #ifdef DIAGNOSTIC 5947 if ((m->m_flags & M_PKTHDR) == 0) 5948 panic("non-M_PKTHDR is passed to pf_test6"); 5949 #endif /* DIAGNOSTIC */ 5950 5951 if (m->m_pkthdr.len < (int)sizeof(*h)) { 5952 action = PF_DROP; 5953 REASON_SET(&reason, PFRES_SHORT); 5954 log = 1; 5955 goto done; 5956 } 5957 5958 if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) 5959 return (PF_PASS); 5960 5961 /* We do IP header normalization and packet reassembly here */ 5962 if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { 5963 action = PF_DROP; 5964 goto done; 5965 } 5966 m = *m0; /* pf_normalize messes with m0 */ 5967 h = mtod(m, struct ip6_hdr *); 5968 5969 #if 1 5970 /* 5971 * we do not support jumbogram yet. if we keep going, zero ip6_plen 5972 * will do something bad, so drop the packet for now. 5973 */ 5974 if (htons(h->ip6_plen) == 0) { 5975 action = PF_DROP; 5976 REASON_SET(&reason, PFRES_NORM); /*XXX*/ 5977 goto done; 5978 } 5979 #endif 5980 5981 pd.src = (struct pf_addr *)&h->ip6_src; 5982 pd.dst = (struct pf_addr *)&h->ip6_dst; 5983 PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); 5984 pd.ip_sum = NULL; 5985 pd.af = AF_INET6; 5986 pd.tos = 0; 5987 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5988 pd.eh = eh; 5989 5990 off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); 5991 pd.proto = h->ip6_nxt; 5992 do { 5993 switch (pd.proto) { 5994 case IPPROTO_FRAGMENT: 5995 action = pf_test_fragment(&r, dir, kif, m, h, 5996 &pd, &a, &ruleset); 5997 if (action == PF_DROP) 5998 REASON_SET(&reason, PFRES_FRAG); 5999 goto done; 6000 case IPPROTO_ROUTING: { 6001 struct ip6_rthdr rthdr; 6002 6003 if (rh_cnt++) { 6004 DPFPRINTF(PF_DEBUG_MISC, 6005 ("pf: IPv6 more than one rthdr\n")); 6006 action = PF_DROP; 6007 REASON_SET(&reason, PFRES_IPOPTIONS); 6008 log = 1; 6009 goto done; 6010 } 6011 if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, 6012 &reason, pd.af)) { 6013 DPFPRINTF(PF_DEBUG_MISC, 6014 ("pf: IPv6 short rthdr\n")); 6015 action = PF_DROP; 6016 REASON_SET(&reason, PFRES_SHORT); 6017 log = 1; 6018 goto done; 6019 } 6020 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6021 DPFPRINTF(PF_DEBUG_MISC, 6022 ("pf: IPv6 rthdr0\n")); 6023 action = PF_DROP; 6024 REASON_SET(&reason, PFRES_IPOPTIONS); 6025 log = 1; 6026 goto done; 6027 } 6028 /* FALLTHROUGH */ 6029 } 6030 case IPPROTO_AH: 6031 case IPPROTO_HOPOPTS: 6032 case IPPROTO_DSTOPTS: { 6033 /* get next header and header length */ 6034 struct ip6_ext opt6; 6035 6036 if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), 6037 NULL, &reason, pd.af)) { 6038 DPFPRINTF(PF_DEBUG_MISC, 6039 ("pf: IPv6 short opt\n")); 6040 action = PF_DROP; 6041 log = 1; 6042 goto done; 6043 } 6044 if (pd.proto == IPPROTO_AH) 6045 off += (opt6.ip6e_len + 2) * 4; 6046 else 6047 off += (opt6.ip6e_len + 1) * 8; 6048 pd.proto = opt6.ip6e_nxt; 6049 /* goto the next header */ 6050 break; 6051 } 6052 default: 6053 terminal++; 6054 break; 6055 } 6056 } while (!terminal); 6057 6058 /* if there's no routing header, use unmodified mbuf for checksumming */ 6059 if (!n) 6060 n = m; 6061 6062 switch (pd.proto) { 6063 6064 case IPPROTO_TCP: { 6065 struct tcphdr th; 6066 6067 pd.hdr.tcp = &th; 6068 if (!pf_pull_hdr(m, off, &th, sizeof(th), 6069 &action, &reason, AF_INET6)) { 6070 log = action != PF_PASS; 6071 goto done; 6072 } 6073 pd.p_len = pd.tot_len - off - (th.th_off << 2); 6074 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 6075 if (action == PF_DROP) 6076 goto done; 6077 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 6078 &reason); 6079 if (action == PF_PASS) { 6080 #if NPFSYNC 6081 pfsync_update_state(s); 6082 #endif /* NPFSYNC */ 6083 r = s->rule.ptr; 6084 a = s->anchor.ptr; 6085 log = s->log; 6086 } else if (s == NULL) 6087 action = pf_test_rule(&r, &s, dir, kif, 6088 m, off, h, &pd, &a, &ruleset, NULL, inp); 6089 break; 6090 } 6091 6092 case IPPROTO_UDP: { 6093 struct udphdr uh; 6094 6095 pd.hdr.udp = &uh; 6096 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 6097 &action, &reason, AF_INET6)) { 6098 log = action != PF_PASS; 6099 goto done; 6100 } 6101 if (uh.uh_dport == 0 || 6102 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 6103 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 6104 action = PF_DROP; 6105 REASON_SET(&reason, PFRES_SHORT); 6106 goto done; 6107 } 6108 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 6109 if (action == PF_PASS) { 6110 #if NPFSYNC 6111 pfsync_update_state(s); 6112 #endif /* NPFSYNC */ 6113 r = s->rule.ptr; 6114 a = s->anchor.ptr; 6115 log = s->log; 6116 } else if (s == NULL) 6117 action = pf_test_rule(&r, &s, dir, kif, 6118 m, off, h, &pd, &a, &ruleset, NULL, inp); 6119 break; 6120 } 6121 6122 case IPPROTO_ICMPV6: { 6123 struct icmp6_hdr ih; 6124 6125 pd.hdr.icmp6 = &ih; 6126 if (!pf_pull_hdr(m, off, &ih, sizeof(ih), 6127 &action, &reason, AF_INET6)) { 6128 log = action != PF_PASS; 6129 goto done; 6130 } 6131 action = pf_test_state_icmp(&s, dir, kif, 6132 m, off, h, &pd, &reason); 6133 if (action == PF_PASS) { 6134 #if NPFSYNC 6135 pfsync_update_state(s); 6136 #endif /* NPFSYNC */ 6137 r = s->rule.ptr; 6138 a = s->anchor.ptr; 6139 log = s->log; 6140 } else if (s == NULL) 6141 action = pf_test_rule(&r, &s, dir, kif, 6142 m, off, h, &pd, &a, &ruleset, NULL, inp); 6143 break; 6144 } 6145 6146 default: 6147 action = pf_test_state_other(&s, dir, kif, &pd); 6148 if (action == PF_PASS) { 6149 #if NPFSYNC 6150 pfsync_update_state(s); 6151 #endif /* NPFSYNC */ 6152 r = s->rule.ptr; 6153 a = s->anchor.ptr; 6154 log = s->log; 6155 } else if (s == NULL) 6156 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 6157 &pd, &a, &ruleset, NULL, inp); 6158 break; 6159 } 6160 6161 done: 6162 if (n != m) { 6163 m_freem(n); 6164 n = NULL; 6165 } 6166 6167 /* handle dangerous IPv6 extension headers. */ 6168 if (action == PF_PASS && rh_cnt && 6169 !((s && s->allow_opts) || r->allow_opts)) { 6170 action = PF_DROP; 6171 REASON_SET(&reason, PFRES_IPOPTIONS); 6172 log = 1; 6173 DPFPRINTF(PF_DEBUG_MISC, 6174 ("pf: dropping packet with dangerous v6 headers\n")); 6175 } 6176 6177 if ((s && s->tag) || r->rtableid) 6178 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 6179 6180 #ifdef ALTQ 6181 if (action == PF_PASS && r->qid) { 6182 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 6183 if (pd.tos & IPTOS_LOWDELAY) 6184 m->m_pkthdr.pf.qid = r->pqid; 6185 else 6186 m->m_pkthdr.pf.qid = r->qid; 6187 m->m_pkthdr.pf.ecn_af = AF_INET6; 6188 m->m_pkthdr.pf.hdr = h; 6189 if (s) { 6190 /* for fairq */ 6191 m->m_pkthdr.pf.state_hash = s->hash; 6192 m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED; 6193 } 6194 } 6195 #endif /* ALTQ */ 6196 6197 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 6198 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 6199 (s->nat_rule.ptr->action == PF_RDR || 6200 s->nat_rule.ptr->action == PF_BINAT) && 6201 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) 6202 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 6203 6204 if (log) { 6205 struct pf_rule *lr; 6206 6207 if (s != NULL && s->nat_rule.ptr != NULL && 6208 s->nat_rule.ptr->log & PF_LOG_ALL) 6209 lr = s->nat_rule.ptr; 6210 else 6211 lr = r; 6212 PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, 6213 &pd); 6214 } 6215 6216 kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 6217 kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; 6218 6219 if (action == PF_PASS || r->action == PF_DROP) { 6220 dirndx = (dir == PF_OUT); 6221 r->packets[dirndx]++; 6222 r->bytes[dirndx] += pd.tot_len; 6223 if (a != NULL) { 6224 a->packets[dirndx]++; 6225 a->bytes[dirndx] += pd.tot_len; 6226 } 6227 if (s != NULL) { 6228 sk = s->state_key; 6229 if (s->nat_rule.ptr != NULL) { 6230 s->nat_rule.ptr->packets[dirndx]++; 6231 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 6232 } 6233 if (s->src_node != NULL) { 6234 s->src_node->packets[dirndx]++; 6235 s->src_node->bytes[dirndx] += pd.tot_len; 6236 } 6237 if (s->nat_src_node != NULL) { 6238 s->nat_src_node->packets[dirndx]++; 6239 s->nat_src_node->bytes[dirndx] += pd.tot_len; 6240 } 6241 dirndx = (dir == sk->direction) ? 0 : 1; 6242 s->packets[dirndx]++; 6243 s->bytes[dirndx] += pd.tot_len; 6244 } 6245 tr = r; 6246 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 6247 if (nr != NULL) { 6248 struct pf_addr *x; 6249 /* 6250 * XXX: we need to make sure that the addresses 6251 * passed to pfr_update_stats() are the same than 6252 * the addresses used during matching (pfr_match) 6253 */ 6254 if (r == &pf_default_rule) { 6255 tr = nr; 6256 x = (s == NULL || sk->direction == dir) ? 6257 &pd.baddr : &pd.naddr; 6258 } else { 6259 x = (s == NULL || sk->direction == dir) ? 6260 &pd.naddr : &pd.baddr; 6261 } 6262 if (x == &pd.baddr || s == NULL) { 6263 if (dir == PF_OUT) 6264 pd.src = x; 6265 else 6266 pd.dst = x; 6267 } 6268 } 6269 if (tr->src.addr.type == PF_ADDR_TABLE) 6270 pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL || 6271 sk->direction == dir) ? pd.src : pd.dst, pd.af, 6272 pd.tot_len, dir == PF_OUT, r->action == PF_PASS, 6273 tr->src.neg); 6274 if (tr->dst.addr.type == PF_ADDR_TABLE) 6275 pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL || 6276 sk->direction == dir) ? pd.dst : pd.src, pd.af, 6277 pd.tot_len, dir == PF_OUT, r->action == PF_PASS, 6278 tr->dst.neg); 6279 } 6280 6281 6282 if (action == PF_SYNPROXY_DROP) { 6283 m_freem(*m0); 6284 *m0 = NULL; 6285 action = PF_PASS; 6286 } else if (r->rt) 6287 /* pf_route6 can free the mbuf causing *m0 to become NULL */ 6288 pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); 6289 6290 return (action); 6291 } 6292 #endif /* INET6 */ 6293 6294 int 6295 pf_check_congestion(struct ifqueue *ifq) 6296 { 6297 return (0); 6298 } 6299