1 /* $NetBSD: pf_norm.c,v 1.10 2005/12/11 12:24:25 christos Exp $ */ 2 /* $OpenBSD: pf_norm.c,v 1.97 2004/09/21 16:59:12 aaron Exp $ */ 3 4 /* 5 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #ifdef _KERNEL_OPT 30 #include "opt_inet.h" 31 #endif 32 33 #include "pflog.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/mbuf.h> 38 #include <sys/filio.h> 39 #include <sys/fcntl.h> 40 #include <sys/socket.h> 41 #include <sys/kernel.h> 42 #include <sys/time.h> 43 #include <sys/pool.h> 44 45 #ifdef __OpenBSD__ 46 #include <dev/rndvar.h> 47 #else 48 #include <sys/rnd.h> 49 #endif 50 #include <net/if.h> 51 #include <net/if_types.h> 52 #include <net/bpf.h> 53 #include <net/route.h> 54 #include <net/if_pflog.h> 55 56 #include <netinet/in.h> 57 #include <netinet/in_var.h> 58 #include <netinet/in_systm.h> 59 #include <netinet/ip.h> 60 #include <netinet/ip_var.h> 61 #include <netinet/tcp.h> 62 #include <netinet/tcp_seq.h> 63 #include <netinet/udp.h> 64 #include <netinet/ip_icmp.h> 65 66 #ifdef INET6 67 #include <netinet/ip6.h> 68 #endif /* INET6 */ 69 70 #include <net/pfvar.h> 71 72 struct pf_frent { 73 LIST_ENTRY(pf_frent) fr_next; 74 struct ip *fr_ip; 75 struct mbuf *fr_m; 76 }; 77 78 struct pf_frcache { 79 LIST_ENTRY(pf_frcache) fr_next; 80 uint16_t fr_off; 81 uint16_t fr_end; 82 }; 83 84 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ 85 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ 86 #define PFFRAG_DROP 0x0004 /* Drop all fragments */ 87 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) 88 89 struct pf_fragment { 90 RB_ENTRY(pf_fragment) fr_entry; 91 TAILQ_ENTRY(pf_fragment) frag_next; 92 struct in_addr fr_src; 93 struct in_addr fr_dst; 94 u_int8_t fr_p; /* protocol of this fragment */ 95 u_int8_t fr_flags; /* status flags */ 96 u_int16_t fr_id; /* fragment id for reassemble */ 97 u_int16_t fr_max; /* fragment data max */ 98 u_int32_t fr_timeout; 99 #define fr_queue fr_u.fru_queue 100 #define fr_cache fr_u.fru_cache 101 union { 102 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ 103 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ 104 } fr_u; 105 }; 106 107 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 108 TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; 109 110 static __inline int pf_frag_compare(struct pf_fragment *, 111 struct pf_fragment *); 112 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 113 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 114 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 115 116 /* Private prototypes */ 117 void pf_ip2key(struct pf_fragment *, struct ip *); 118 void pf_remove_fragment(struct pf_fragment *); 119 void pf_flush_fragments(void); 120 void pf_free_fragment(struct pf_fragment *); 121 struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); 122 struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, 123 struct pf_frent *, int); 124 struct mbuf *pf_fragcache(struct mbuf **, struct ip*, 125 struct pf_fragment **, int, int, int *); 126 int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, 127 struct tcphdr *, int); 128 129 #define DPFPRINTF(x) do { \ 130 if (pf_status.debug >= PF_DEBUG_MISC) { \ 131 printf("%s: ", __func__); \ 132 printf x ; \ 133 } \ 134 } while(0) 135 136 /* Globals */ 137 struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; 138 struct pool pf_state_scrub_pl; 139 int pf_nfrents, pf_ncache; 140 141 void 142 pf_normalize_init(void) 143 { 144 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 145 NULL); 146 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 147 NULL); 148 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, 149 "pffrcache", NULL); 150 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", 151 NULL); 152 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 153 "pfstscr", NULL); 154 155 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 156 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 157 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); 158 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); 159 160 TAILQ_INIT(&pf_fragqueue); 161 TAILQ_INIT(&pf_cachequeue); 162 } 163 164 #ifdef _LKM 165 void 166 pf_normalize_destroy(void) 167 { 168 pool_destroy(&pf_state_scrub_pl); 169 pool_destroy(&pf_cent_pl); 170 pool_destroy(&pf_cache_pl); 171 pool_destroy(&pf_frag_pl); 172 pool_destroy(&pf_frent_pl); 173 } 174 #endif 175 176 static __inline int 177 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 178 { 179 int diff; 180 181 if ((diff = a->fr_id - b->fr_id)) 182 return (diff); 183 else if ((diff = a->fr_p - b->fr_p)) 184 return (diff); 185 else if (a->fr_src.s_addr < b->fr_src.s_addr) 186 return (-1); 187 else if (a->fr_src.s_addr > b->fr_src.s_addr) 188 return (1); 189 else if (a->fr_dst.s_addr < b->fr_dst.s_addr) 190 return (-1); 191 else if (a->fr_dst.s_addr > b->fr_dst.s_addr) 192 return (1); 193 return (0); 194 } 195 196 void 197 pf_purge_expired_fragments(void) 198 { 199 struct pf_fragment *frag; 200 u_int32_t expire = time_second - 201 pf_default_rule.timeout[PFTM_FRAG]; 202 203 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 204 KASSERT(BUFFER_FRAGMENTS(frag)); 205 if (frag->fr_timeout > expire) 206 break; 207 208 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 209 pf_free_fragment(frag); 210 } 211 212 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { 213 KASSERT(!BUFFER_FRAGMENTS(frag)); 214 if (frag->fr_timeout > expire) 215 break; 216 217 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 218 pf_free_fragment(frag); 219 KASSERT(TAILQ_EMPTY(&pf_cachequeue) || 220 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); 221 } 222 } 223 224 /* 225 * Try to flush old fragments to make space for new ones 226 */ 227 228 void 229 pf_flush_fragments(void) 230 { 231 struct pf_fragment *frag; 232 int goal; 233 234 goal = pf_nfrents * 9 / 10; 235 DPFPRINTF(("trying to free > %d frents\n", 236 pf_nfrents - goal)); 237 while (goal < pf_nfrents) { 238 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 239 if (frag == NULL) 240 break; 241 pf_free_fragment(frag); 242 } 243 244 245 goal = pf_ncache * 9 / 10; 246 DPFPRINTF(("trying to free > %d cache entries\n", 247 pf_ncache - goal)); 248 while (goal < pf_ncache) { 249 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); 250 if (frag == NULL) 251 break; 252 pf_free_fragment(frag); 253 } 254 } 255 256 /* Frees the fragments and all associated entries */ 257 258 void 259 pf_free_fragment(struct pf_fragment *frag) 260 { 261 struct pf_frent *frent; 262 struct pf_frcache *frcache; 263 264 /* Free all fragments */ 265 if (BUFFER_FRAGMENTS(frag)) { 266 for (frent = LIST_FIRST(&frag->fr_queue); frent; 267 frent = LIST_FIRST(&frag->fr_queue)) { 268 LIST_REMOVE(frent, fr_next); 269 270 m_freem(frent->fr_m); 271 pool_put(&pf_frent_pl, frent); 272 pf_nfrents--; 273 } 274 } else { 275 for (frcache = LIST_FIRST(&frag->fr_cache); frcache; 276 frcache = LIST_FIRST(&frag->fr_cache)) { 277 LIST_REMOVE(frcache, fr_next); 278 279 KASSERT(LIST_EMPTY(&frag->fr_cache) || 280 LIST_FIRST(&frag->fr_cache)->fr_off > 281 frcache->fr_end); 282 283 pool_put(&pf_cent_pl, frcache); 284 pf_ncache--; 285 } 286 } 287 288 pf_remove_fragment(frag); 289 } 290 291 void 292 pf_ip2key(struct pf_fragment *key, struct ip *ip) 293 { 294 key->fr_p = ip->ip_p; 295 key->fr_id = ip->ip_id; 296 key->fr_src.s_addr = ip->ip_src.s_addr; 297 key->fr_dst.s_addr = ip->ip_dst.s_addr; 298 } 299 300 struct pf_fragment * 301 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) 302 { 303 struct pf_fragment key; 304 struct pf_fragment *frag; 305 306 pf_ip2key(&key, ip); 307 308 frag = RB_FIND(pf_frag_tree, tree, &key); 309 if (frag != NULL) { 310 /* XXX Are we sure we want to update the timeout? */ 311 frag->fr_timeout = time_second; 312 if (BUFFER_FRAGMENTS(frag)) { 313 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 314 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 315 } else { 316 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 317 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); 318 } 319 } 320 321 return (frag); 322 } 323 324 /* Removes a fragment from the fragment queue and frees the fragment */ 325 326 void 327 pf_remove_fragment(struct pf_fragment *frag) 328 { 329 if (BUFFER_FRAGMENTS(frag)) { 330 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 331 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 332 pool_put(&pf_frag_pl, frag); 333 } else { 334 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); 335 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 336 pool_put(&pf_cache_pl, frag); 337 } 338 } 339 340 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) 341 struct mbuf * 342 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, 343 struct pf_frent *frent, int mff) 344 { 345 struct mbuf *m = *m0, *m2; 346 struct pf_frent *frea, *next; 347 struct pf_frent *frep = NULL; 348 struct ip *ip = frent->fr_ip; 349 int hlen = ip->ip_hl << 2; 350 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 351 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; 352 u_int16_t max = ip_len + off; 353 354 KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag)); 355 356 /* Strip off ip header */ 357 m->m_data += hlen; 358 m->m_len -= hlen; 359 360 /* Create a new reassembly queue for this packet */ 361 if (*frag == NULL) { 362 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 363 if (*frag == NULL) { 364 pf_flush_fragments(); 365 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 366 if (*frag == NULL) 367 goto drop_fragment; 368 } 369 370 (*frag)->fr_flags = 0; 371 (*frag)->fr_max = 0; 372 (*frag)->fr_src = frent->fr_ip->ip_src; 373 (*frag)->fr_dst = frent->fr_ip->ip_dst; 374 (*frag)->fr_p = frent->fr_ip->ip_p; 375 (*frag)->fr_id = frent->fr_ip->ip_id; 376 (*frag)->fr_timeout = time_second; 377 LIST_INIT(&(*frag)->fr_queue); 378 379 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); 380 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); 381 382 /* We do not have a previous fragment */ 383 frep = NULL; 384 goto insert; 385 } 386 387 /* 388 * Find a fragment after the current one: 389 * - off contains the real shifted offset. 390 */ 391 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 392 if (FR_IP_OFF(frea) > off) 393 break; 394 frep = frea; 395 } 396 397 KASSERT(frep != NULL || frea != NULL); 398 399 if (frep != NULL && 400 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 401 4 > off) 402 { 403 u_int16_t precut; 404 405 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - 406 frep->fr_ip->ip_hl * 4 - off; 407 if (precut >= ip_len) 408 goto drop_fragment; 409 m_adj(frent->fr_m, precut); 410 DPFPRINTF(("overlap -%d\n", precut)); 411 /* Enforce 8 byte boundaries */ 412 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); 413 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 414 ip_len -= precut; 415 ip->ip_len = htons(ip_len); 416 } 417 418 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); 419 frea = next) 420 { 421 u_int16_t aftercut; 422 423 aftercut = ip_len + off - FR_IP_OFF(frea); 424 DPFPRINTF(("adjust overlap %d\n", aftercut)); 425 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl 426 * 4) 427 { 428 frea->fr_ip->ip_len = 429 htons(ntohs(frea->fr_ip->ip_len) - aftercut); 430 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + 431 (aftercut >> 3)); 432 m_adj(frea->fr_m, aftercut); 433 break; 434 } 435 436 /* This fragment is completely overlapped, loose it */ 437 next = LIST_NEXT(frea, fr_next); 438 m_freem(frea->fr_m); 439 LIST_REMOVE(frea, fr_next); 440 pool_put(&pf_frent_pl, frea); 441 pf_nfrents--; 442 } 443 444 insert: 445 /* Update maximum data size */ 446 if ((*frag)->fr_max < max) 447 (*frag)->fr_max = max; 448 /* This is the last segment */ 449 if (!mff) 450 (*frag)->fr_flags |= PFFRAG_SEENLAST; 451 452 if (frep == NULL) 453 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 454 else 455 LIST_INSERT_AFTER(frep, frent, fr_next); 456 457 /* Check if we are completely reassembled */ 458 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 459 return (NULL); 460 461 /* Check if we have all the data */ 462 off = 0; 463 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 464 next = LIST_NEXT(frep, fr_next); 465 466 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; 467 if (off < (*frag)->fr_max && 468 (next == NULL || FR_IP_OFF(next) != off)) 469 { 470 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 471 off, next == NULL ? -1 : FR_IP_OFF(next), 472 (*frag)->fr_max)); 473 return (NULL); 474 } 475 } 476 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 477 if (off < (*frag)->fr_max) 478 return (NULL); 479 480 /* We have all the data */ 481 frent = LIST_FIRST(&(*frag)->fr_queue); 482 KASSERT(frent != NULL); 483 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { 484 DPFPRINTF(("drop: too big: %d\n", off)); 485 pf_free_fragment(*frag); 486 *frag = NULL; 487 return (NULL); 488 } 489 next = LIST_NEXT(frent, fr_next); 490 491 /* Magic from ip_input */ 492 ip = frent->fr_ip; 493 m = frent->fr_m; 494 m2 = m->m_next; 495 m->m_next = NULL; 496 m_cat(m, m2); 497 pool_put(&pf_frent_pl, frent); 498 pf_nfrents--; 499 for (frent = next; frent != NULL; frent = next) { 500 next = LIST_NEXT(frent, fr_next); 501 502 m2 = frent->fr_m; 503 pool_put(&pf_frent_pl, frent); 504 pf_nfrents--; 505 m_cat(m, m2); 506 } 507 508 ip->ip_src = (*frag)->fr_src; 509 ip->ip_dst = (*frag)->fr_dst; 510 511 /* Remove from fragment queue */ 512 pf_remove_fragment(*frag); 513 *frag = NULL; 514 515 hlen = ip->ip_hl << 2; 516 ip->ip_len = htons(off + hlen); 517 m->m_len += hlen; 518 m->m_data -= hlen; 519 520 /* some debugging cruft by sklower, below, will go away soon */ 521 /* XXX this should be done elsewhere */ 522 if (m->m_flags & M_PKTHDR) { 523 int plen = 0; 524 for (m2 = m; m2; m2 = m2->m_next) 525 plen += m2->m_len; 526 m->m_pkthdr.len = plen; 527 #if defined(__NetBSD__) 528 m->m_pkthdr.csum_flags = 0; 529 #endif /* defined(__NetBSD__) */ 530 } 531 532 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); 533 return (m); 534 535 drop_fragment: 536 /* Oops - fail safe - drop packet */ 537 pool_put(&pf_frent_pl, frent); 538 pf_nfrents--; 539 m_freem(m); 540 return (NULL); 541 } 542 543 struct mbuf * 544 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, 545 int drop, int *nomem) 546 { 547 struct mbuf *m = *m0; 548 struct pf_frcache *frp, *fra, *cur = NULL; 549 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); 550 u_int16_t off = ntohs(h->ip_off) << 3; 551 u_int16_t max = ip_len + off; 552 int hosed = 0; 553 554 KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); 555 556 /* Create a new range queue for this packet */ 557 if (*frag == NULL) { 558 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 559 if (*frag == NULL) { 560 pf_flush_fragments(); 561 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 562 if (*frag == NULL) 563 goto no_mem; 564 } 565 566 /* Get an entry for the queue */ 567 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 568 if (cur == NULL) { 569 pool_put(&pf_cache_pl, *frag); 570 *frag = NULL; 571 goto no_mem; 572 } 573 pf_ncache++; 574 575 (*frag)->fr_flags = PFFRAG_NOBUFFER; 576 (*frag)->fr_max = 0; 577 (*frag)->fr_src = h->ip_src; 578 (*frag)->fr_dst = h->ip_dst; 579 (*frag)->fr_p = h->ip_p; 580 (*frag)->fr_id = h->ip_id; 581 (*frag)->fr_timeout = time_second; 582 583 cur->fr_off = off; 584 cur->fr_end = max; 585 LIST_INIT(&(*frag)->fr_cache); 586 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); 587 588 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); 589 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); 590 591 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); 592 593 goto pass; 594 } 595 596 /* 597 * Find a fragment after the current one: 598 * - off contains the real shifted offset. 599 */ 600 frp = NULL; 601 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { 602 if (fra->fr_off > off) 603 break; 604 frp = fra; 605 } 606 607 KASSERT(frp != NULL || fra != NULL); 608 609 if (frp != NULL) { 610 int precut; 611 612 precut = frp->fr_end - off; 613 if (precut >= ip_len) { 614 /* Fragment is entirely a duplicate */ 615 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", 616 h->ip_id, frp->fr_off, frp->fr_end, off, max)); 617 goto drop_fragment; 618 } 619 if (precut == 0) { 620 /* They are adjacent. Fixup cache entry */ 621 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", 622 h->ip_id, frp->fr_off, frp->fr_end, off, max)); 623 frp->fr_end = max; 624 } else if (precut > 0) { 625 /* The first part of this payload overlaps with a 626 * fragment that has already been passed. 627 * Need to trim off the first part of the payload. 628 * But to do so easily, we need to create another 629 * mbuf to throw the original header into. 630 */ 631 632 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", 633 h->ip_id, precut, frp->fr_off, frp->fr_end, off, 634 max)); 635 636 off += precut; 637 max -= precut; 638 /* Update the previous frag to encompass this one */ 639 frp->fr_end = max; 640 641 if (!drop) { 642 /* XXX Optimization opportunity 643 * This is a very heavy way to trim the payload. 644 * we could do it much faster by diddling mbuf 645 * internals but that would be even less legible 646 * than this mbuf magic. For my next trick, 647 * I'll pull a rabbit out of my laptop. 648 */ 649 *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT); 650 if (*m0 == NULL) 651 goto no_mem; 652 KASSERT((*m0)->m_next == NULL); 653 m_adj(m, precut + (h->ip_hl << 2)); 654 m_cat(*m0, m); 655 m = *m0; 656 if (m->m_flags & M_PKTHDR) { 657 int plen = 0; 658 struct mbuf *t; 659 for (t = m; t; t = t->m_next) 660 plen += t->m_len; 661 m->m_pkthdr.len = plen; 662 } 663 664 665 h = mtod(m, struct ip *); 666 667 668 KASSERT((int)m->m_len == 669 ntohs(h->ip_len) - precut); 670 h->ip_off = htons(ntohs(h->ip_off) + 671 (precut >> 3)); 672 h->ip_len = htons(ntohs(h->ip_len) - precut); 673 } else { 674 hosed++; 675 } 676 } else { 677 /* There is a gap between fragments */ 678 679 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", 680 h->ip_id, -precut, frp->fr_off, frp->fr_end, off, 681 max)); 682 683 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 684 if (cur == NULL) 685 goto no_mem; 686 pf_ncache++; 687 688 cur->fr_off = off; 689 cur->fr_end = max; 690 LIST_INSERT_AFTER(frp, cur, fr_next); 691 } 692 } 693 694 if (fra != NULL) { 695 int aftercut; 696 int merge = 0; 697 698 aftercut = max - fra->fr_off; 699 if (aftercut == 0) { 700 /* Adjacent fragments */ 701 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", 702 h->ip_id, off, max, fra->fr_off, fra->fr_end)); 703 fra->fr_off = off; 704 merge = 1; 705 } else if (aftercut > 0) { 706 /* Need to chop off the tail of this fragment */ 707 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", 708 h->ip_id, aftercut, off, max, fra->fr_off, 709 fra->fr_end)); 710 fra->fr_off = off; 711 max -= aftercut; 712 713 merge = 1; 714 715 if (!drop) { 716 m_adj(m, -aftercut); 717 if (m->m_flags & M_PKTHDR) { 718 int plen = 0; 719 struct mbuf *t; 720 for (t = m; t; t = t->m_next) 721 plen += t->m_len; 722 m->m_pkthdr.len = plen; 723 } 724 h = mtod(m, struct ip *); 725 KASSERT((int)m->m_len == 726 ntohs(h->ip_len) - aftercut); 727 h->ip_len = htons(ntohs(h->ip_len) - aftercut); 728 } else { 729 hosed++; 730 } 731 } else { 732 /* There is a gap between fragments */ 733 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", 734 h->ip_id, -aftercut, off, max, fra->fr_off, 735 fra->fr_end)); 736 737 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 738 if (cur == NULL) 739 goto no_mem; 740 pf_ncache++; 741 742 cur->fr_off = off; 743 cur->fr_end = max; 744 LIST_INSERT_BEFORE(fra, cur, fr_next); 745 } 746 747 748 /* Need to glue together two separate fragment descriptors */ 749 if (merge) { 750 if (cur && fra->fr_off <= cur->fr_end) { 751 /* Need to merge in a previous 'cur' */ 752 DPFPRINTF(("fragcache[%d]: adjacent(merge " 753 "%d-%d) %d-%d (%d-%d)\n", 754 h->ip_id, cur->fr_off, cur->fr_end, off, 755 max, fra->fr_off, fra->fr_end)); 756 fra->fr_off = cur->fr_off; 757 LIST_REMOVE(cur, fr_next); 758 pool_put(&pf_cent_pl, cur); 759 pf_ncache--; 760 cur = NULL; 761 762 } else if (frp && fra->fr_off <= frp->fr_end) { 763 /* Need to merge in a modified 'frp' */ 764 KASSERT(cur == NULL); 765 DPFPRINTF(("fragcache[%d]: adjacent(merge " 766 "%d-%d) %d-%d (%d-%d)\n", 767 h->ip_id, frp->fr_off, frp->fr_end, off, 768 max, fra->fr_off, fra->fr_end)); 769 fra->fr_off = frp->fr_off; 770 LIST_REMOVE(frp, fr_next); 771 pool_put(&pf_cent_pl, frp); 772 pf_ncache--; 773 frp = NULL; 774 775 } 776 } 777 } 778 779 if (hosed) { 780 /* 781 * We must keep tracking the overall fragment even when 782 * we're going to drop it anyway so that we know when to 783 * free the overall descriptor. Thus we drop the frag late. 784 */ 785 goto drop_fragment; 786 } 787 788 789 pass: 790 /* Update maximum data size */ 791 if ((*frag)->fr_max < max) 792 (*frag)->fr_max = max; 793 794 /* This is the last segment */ 795 if (!mff) 796 (*frag)->fr_flags |= PFFRAG_SEENLAST; 797 798 /* Check if we are completely reassembled */ 799 if (((*frag)->fr_flags & PFFRAG_SEENLAST) && 800 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && 801 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { 802 /* Remove from fragment queue */ 803 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, 804 (*frag)->fr_max)); 805 pf_free_fragment(*frag); 806 *frag = NULL; 807 } 808 809 return (m); 810 811 no_mem: 812 *nomem = 1; 813 814 /* Still need to pay attention to !IP_MF */ 815 if (!mff && *frag != NULL) 816 (*frag)->fr_flags |= PFFRAG_SEENLAST; 817 818 m_freem(m); 819 return (NULL); 820 821 drop_fragment: 822 823 /* Still need to pay attention to !IP_MF */ 824 if (!mff && *frag != NULL) 825 (*frag)->fr_flags |= PFFRAG_SEENLAST; 826 827 if (drop) { 828 /* This fragment has been deemed bad. Don't reass */ 829 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) 830 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", 831 h->ip_id)); 832 (*frag)->fr_flags |= PFFRAG_DROP; 833 } 834 835 m_freem(m); 836 return (NULL); 837 } 838 839 int 840 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, 841 struct pf_pdesc *pd) 842 { 843 struct mbuf *m = *m0; 844 struct pf_rule *r; 845 struct pf_frent *frent; 846 struct pf_fragment *frag = NULL; 847 struct ip *h = mtod(m, struct ip *); 848 int mff = (ntohs(h->ip_off) & IP_MF); 849 int hlen = h->ip_hl << 2; 850 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 851 u_int16_t max; 852 int ip_len; 853 int ip_off; 854 855 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 856 while (r != NULL) { 857 r->evaluations++; 858 if (r->kif != NULL && 859 (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) 860 r = r->skip[PF_SKIP_IFP].ptr; 861 else if (r->direction && r->direction != dir) 862 r = r->skip[PF_SKIP_DIR].ptr; 863 else if (r->af && r->af != AF_INET) 864 r = r->skip[PF_SKIP_AF].ptr; 865 else if (r->proto && r->proto != h->ip_p) 866 r = r->skip[PF_SKIP_PROTO].ptr; 867 else if (PF_MISMATCHAW(&r->src.addr, 868 (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.neg)) 869 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 870 else if (PF_MISMATCHAW(&r->dst.addr, 871 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg)) 872 r = r->skip[PF_SKIP_DST_ADDR].ptr; 873 else 874 break; 875 } 876 877 if (r == NULL) 878 return (PF_PASS); 879 else 880 r->packets++; 881 882 /* Check for illegal packets */ 883 if (hlen < (int)sizeof(struct ip)) 884 goto drop; 885 886 if (hlen > ntohs(h->ip_len)) 887 goto drop; 888 889 /* Clear IP_DF if the rule uses the no-df option */ 890 if (r->rule_flag & PFRULE_NODF) 891 h->ip_off &= htons(~IP_DF); 892 893 /* We will need other tests here */ 894 if (!fragoff && !mff) 895 goto no_fragment; 896 897 /* We're dealing with a fragment now. Don't allow fragments 898 * with IP_DF to enter the cache. If the flag was cleared by 899 * no-df above, fine. Otherwise drop it. 900 */ 901 if (h->ip_off & htons(IP_DF)) { 902 DPFPRINTF(("IP_DF\n")); 903 goto bad; 904 } 905 906 ip_len = ntohs(h->ip_len) - hlen; 907 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 908 909 /* All fragments are 8 byte aligned */ 910 if (mff && (ip_len & 0x7)) { 911 DPFPRINTF(("mff and %d\n", ip_len)); 912 goto bad; 913 } 914 915 /* Respect maximum length */ 916 if (fragoff + ip_len > IP_MAXPACKET) { 917 DPFPRINTF(("max packet %d\n", fragoff + ip_len)); 918 goto bad; 919 } 920 max = fragoff + ip_len; 921 922 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { 923 /* Fully buffer all of the fragments */ 924 925 frag = pf_find_fragment(h, &pf_frag_tree); 926 927 /* Check if we saw the last fragment already */ 928 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 929 max > frag->fr_max) 930 goto bad; 931 932 /* Get an entry for the fragment queue */ 933 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 934 if (frent == NULL) { 935 REASON_SET(reason, PFRES_MEMORY); 936 return (PF_DROP); 937 } 938 pf_nfrents++; 939 frent->fr_ip = h; 940 frent->fr_m = m; 941 942 /* Might return a completely reassembled mbuf, or NULL */ 943 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); 944 *m0 = m = pf_reassemble(m0, &frag, frent, mff); 945 946 if (m == NULL) 947 return (PF_DROP); 948 949 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 950 goto drop; 951 952 h = mtod(m, struct ip *); 953 } else { 954 /* non-buffering fragment cache (drops or masks overlaps) */ 955 int nomem = 0; 956 957 if (dir == PF_OUT) { 958 if (m_tag_find(m, PACKET_TAG_PF_FRAGCACHE, NULL) != 959 NULL) { 960 /* Already passed the fragment cache in the 961 * input direction. If we continued, it would 962 * appear to be a dup and would be dropped. 963 */ 964 goto fragment_pass; 965 } 966 } 967 968 frag = pf_find_fragment(h, &pf_cache_tree); 969 970 /* Check if we saw the last fragment already */ 971 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 972 max > frag->fr_max) { 973 if (r->rule_flag & PFRULE_FRAGDROP) 974 frag->fr_flags |= PFFRAG_DROP; 975 goto bad; 976 } 977 978 *m0 = m = pf_fragcache(m0, h, &frag, mff, 979 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); 980 if (m == NULL) { 981 if (nomem) 982 goto no_mem; 983 goto drop; 984 } 985 986 if (dir == PF_IN) { 987 struct m_tag *mtag; 988 989 mtag = m_tag_get(PACKET_TAG_PF_FRAGCACHE, 0, M_NOWAIT); 990 if (mtag == NULL) 991 goto no_mem; 992 m_tag_prepend(m, mtag); 993 } 994 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 995 goto drop; 996 goto fragment_pass; 997 } 998 999 no_fragment: 1000 /* At this point, only IP_DF is allowed in ip_off */ 1001 h->ip_off &= htons(IP_DF); 1002 1003 /* Enforce a minimum ttl, may cause endless packet loops */ 1004 if (r->min_ttl && h->ip_ttl < r->min_ttl) 1005 h->ip_ttl = r->min_ttl; 1006 1007 if (r->rule_flag & PFRULE_RANDOMID) { 1008 u_int16_t ip_id = h->ip_id; 1009 1010 h->ip_id = ip_randomid(); 1011 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); 1012 } 1013 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1014 pd->flags |= PFDESC_IP_REAS; 1015 1016 return (PF_PASS); 1017 1018 fragment_pass: 1019 /* Enforce a minimum ttl, may cause endless packet loops */ 1020 if (r->min_ttl && h->ip_ttl < r->min_ttl) 1021 h->ip_ttl = r->min_ttl; 1022 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1023 pd->flags |= PFDESC_IP_REAS; 1024 return (PF_PASS); 1025 1026 no_mem: 1027 REASON_SET(reason, PFRES_MEMORY); 1028 if (r != NULL && r->log) 1029 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); 1030 return (PF_DROP); 1031 1032 drop: 1033 REASON_SET(reason, PFRES_NORM); 1034 if (r != NULL && r->log) 1035 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); 1036 return (PF_DROP); 1037 1038 bad: 1039 DPFPRINTF(("dropping bad fragment\n")); 1040 1041 /* Free associated fragments */ 1042 if (frag != NULL) 1043 pf_free_fragment(frag); 1044 1045 REASON_SET(reason, PFRES_FRAG); 1046 if (r != NULL && r->log) 1047 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); 1048 1049 return (PF_DROP); 1050 } 1051 1052 #ifdef INET6 1053 int 1054 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, 1055 u_short *reason, struct pf_pdesc *pd) 1056 { 1057 struct mbuf *m = *m0; 1058 struct pf_rule *r; 1059 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1060 int off; 1061 struct ip6_ext ext; 1062 struct ip6_opt opt; 1063 struct ip6_opt_jumbo jumbo; 1064 struct ip6_frag frag; 1065 u_int32_t jumbolen = 0, plen; 1066 u_int16_t fragoff = 0; 1067 int optend; 1068 int ooff; 1069 u_int8_t proto; 1070 int terminal; 1071 1072 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1073 while (r != NULL) { 1074 r->evaluations++; 1075 if (r->kif != NULL && 1076 (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) 1077 r = r->skip[PF_SKIP_IFP].ptr; 1078 else if (r->direction && r->direction != dir) 1079 r = r->skip[PF_SKIP_DIR].ptr; 1080 else if (r->af && r->af != AF_INET6) 1081 r = r->skip[PF_SKIP_AF].ptr; 1082 #if 0 /* header chain! */ 1083 else if (r->proto && r->proto != h->ip6_nxt) 1084 r = r->skip[PF_SKIP_PROTO].ptr; 1085 #endif 1086 else if (PF_MISMATCHAW(&r->src.addr, 1087 (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.neg)) 1088 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1089 else if (PF_MISMATCHAW(&r->dst.addr, 1090 (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.neg)) 1091 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1092 else 1093 break; 1094 } 1095 1096 if (r == NULL) 1097 return (PF_PASS); 1098 else 1099 r->packets++; 1100 1101 /* Check for illegal packets */ 1102 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) 1103 goto drop; 1104 1105 off = sizeof(struct ip6_hdr); 1106 proto = h->ip6_nxt; 1107 terminal = 0; 1108 do { 1109 switch (proto) { 1110 case IPPROTO_FRAGMENT: 1111 goto fragment; 1112 break; 1113 case IPPROTO_AH: 1114 case IPPROTO_ROUTING: 1115 case IPPROTO_DSTOPTS: 1116 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 1117 NULL, AF_INET6)) 1118 goto shortpkt; 1119 if (proto == IPPROTO_AH) 1120 off += (ext.ip6e_len + 2) * 4; 1121 else 1122 off += (ext.ip6e_len + 1) * 8; 1123 proto = ext.ip6e_nxt; 1124 break; 1125 case IPPROTO_HOPOPTS: 1126 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 1127 NULL, AF_INET6)) 1128 goto shortpkt; 1129 optend = off + (ext.ip6e_len + 1) * 8; 1130 ooff = off + sizeof(ext); 1131 do { 1132 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, 1133 sizeof(opt.ip6o_type), NULL, NULL, 1134 AF_INET6)) 1135 goto shortpkt; 1136 if (opt.ip6o_type == IP6OPT_PAD1) { 1137 ooff++; 1138 continue; 1139 } 1140 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), 1141 NULL, NULL, AF_INET6)) 1142 goto shortpkt; 1143 if (ooff + sizeof(opt) + opt.ip6o_len > optend) 1144 goto drop; 1145 switch (opt.ip6o_type) { 1146 case IP6OPT_JUMBO: 1147 if (h->ip6_plen != 0) 1148 goto drop; 1149 if (!pf_pull_hdr(m, ooff, &jumbo, 1150 sizeof(jumbo), NULL, NULL, 1151 AF_INET6)) 1152 goto shortpkt; 1153 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, 1154 sizeof(jumbolen)); 1155 jumbolen = ntohl(jumbolen); 1156 if (jumbolen <= IPV6_MAXPACKET) 1157 goto drop; 1158 if (sizeof(struct ip6_hdr) + jumbolen != 1159 m->m_pkthdr.len) 1160 goto drop; 1161 break; 1162 default: 1163 break; 1164 } 1165 ooff += sizeof(opt) + opt.ip6o_len; 1166 } while (ooff < optend); 1167 1168 off = optend; 1169 proto = ext.ip6e_nxt; 1170 break; 1171 default: 1172 terminal = 1; 1173 break; 1174 } 1175 } while (!terminal); 1176 1177 /* jumbo payload option must be present, or plen > 0 */ 1178 if (ntohs(h->ip6_plen) == 0) 1179 plen = jumbolen; 1180 else 1181 plen = ntohs(h->ip6_plen); 1182 if (plen == 0) 1183 goto drop; 1184 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) 1185 goto shortpkt; 1186 1187 /* Enforce a minimum ttl, may cause endless packet loops */ 1188 if (r->min_ttl && h->ip6_hlim < r->min_ttl) 1189 h->ip6_hlim = r->min_ttl; 1190 1191 return (PF_PASS); 1192 1193 fragment: 1194 if (ntohs(h->ip6_plen) == 0 || jumbolen) 1195 goto drop; 1196 plen = ntohs(h->ip6_plen); 1197 1198 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) 1199 goto shortpkt; 1200 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); 1201 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) 1202 goto badfrag; 1203 1204 /* do something about it */ 1205 /* remember to set pd->flags |= PFDESC_IP_REAS */ 1206 return (PF_PASS); 1207 1208 shortpkt: 1209 REASON_SET(reason, PFRES_SHORT); 1210 if (r != NULL && r->log) 1211 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); 1212 return (PF_DROP); 1213 1214 drop: 1215 REASON_SET(reason, PFRES_NORM); 1216 if (r != NULL && r->log) 1217 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); 1218 return (PF_DROP); 1219 1220 badfrag: 1221 REASON_SET(reason, PFRES_FRAG); 1222 if (r != NULL && r->log) 1223 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); 1224 return (PF_DROP); 1225 } 1226 #endif /* INET6 */ 1227 1228 int 1229 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, 1230 int off, void *h, struct pf_pdesc *pd) 1231 { 1232 struct pf_rule *r, *rm = NULL; 1233 struct tcphdr *th = pd->hdr.tcp; 1234 int rewrite = 0; 1235 u_short reason; 1236 u_int8_t flags; 1237 sa_family_t af = pd->af; 1238 1239 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1240 while (r != NULL) { 1241 r->evaluations++; 1242 if (r->kif != NULL && 1243 (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) 1244 r = r->skip[PF_SKIP_IFP].ptr; 1245 else if (r->direction && r->direction != dir) 1246 r = r->skip[PF_SKIP_DIR].ptr; 1247 else if (r->af && r->af != af) 1248 r = r->skip[PF_SKIP_AF].ptr; 1249 else if (r->proto && r->proto != pd->proto) 1250 r = r->skip[PF_SKIP_PROTO].ptr; 1251 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) 1252 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1253 else if (r->src.port_op && !pf_match_port(r->src.port_op, 1254 r->src.port[0], r->src.port[1], th->th_sport)) 1255 r = r->skip[PF_SKIP_SRC_PORT].ptr; 1256 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) 1257 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1258 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 1259 r->dst.port[0], r->dst.port[1], th->th_dport)) 1260 r = r->skip[PF_SKIP_DST_PORT].ptr; 1261 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( 1262 pf_osfp_fingerprint(pd, m, off, th), 1263 r->os_fingerprint)) 1264 r = TAILQ_NEXT(r, entries); 1265 else { 1266 rm = r; 1267 break; 1268 } 1269 } 1270 1271 if (rm == NULL || rm->action == PF_NOSCRUB) 1272 return (PF_PASS); 1273 else 1274 r->packets++; 1275 1276 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) 1277 pd->flags |= PFDESC_TCP_NORM; 1278 1279 flags = th->th_flags; 1280 if (flags & TH_SYN) { 1281 /* Illegal packet */ 1282 if (flags & TH_RST) 1283 goto tcp_drop; 1284 1285 if (flags & TH_FIN) 1286 flags &= ~TH_FIN; 1287 } else { 1288 /* Illegal packet */ 1289 if (!(flags & (TH_ACK|TH_RST))) 1290 goto tcp_drop; 1291 } 1292 1293 if (!(flags & TH_ACK)) { 1294 /* These flags are only valid if ACK is set */ 1295 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 1296 goto tcp_drop; 1297 } 1298 1299 /* Check for illegal header length */ 1300 if (th->th_off < (sizeof(struct tcphdr) >> 2)) 1301 goto tcp_drop; 1302 1303 /* If flags changed, or reserved data set, then adjust */ 1304 if (flags != th->th_flags || th->th_x2 != 0) { 1305 u_int16_t ov, nv; 1306 1307 ov = *(u_int16_t *)(&th->th_ack + 1); 1308 th->th_flags = flags; 1309 th->th_x2 = 0; 1310 nv = *(u_int16_t *)(&th->th_ack + 1); 1311 1312 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 1313 rewrite = 1; 1314 } 1315 1316 /* Remove urgent pointer, if TH_URG is not set */ 1317 if (!(flags & TH_URG) && th->th_urp) { 1318 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 1319 th->th_urp = 0; 1320 rewrite = 1; 1321 } 1322 1323 /* Process options */ 1324 if (r->max_mss && pf_normalize_tcpopt(r, m, th, off)) 1325 rewrite = 1; 1326 1327 /* copy back packet headers if we sanitized */ 1328 if (rewrite) 1329 m_copyback(m, off, sizeof(*th), th); 1330 1331 return (PF_PASS); 1332 1333 tcp_drop: 1334 REASON_SET(&reason, PFRES_NORM); 1335 if (rm != NULL && r->log) 1336 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL); 1337 return (PF_DROP); 1338 } 1339 1340 int 1341 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, 1342 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) 1343 { 1344 u_int32_t tsval, tsecr; 1345 u_int8_t hdr[60]; 1346 u_int8_t *opt; 1347 1348 KASSERT(src->scrub == NULL); 1349 1350 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 1351 if (src->scrub == NULL) 1352 return (1); 1353 bzero(src->scrub, sizeof(*src->scrub)); 1354 1355 switch (pd->af) { 1356 #ifdef INET 1357 case AF_INET: { 1358 struct ip *h = mtod(m, struct ip *); 1359 src->scrub->pfss_ttl = h->ip_ttl; 1360 break; 1361 } 1362 #endif /* INET */ 1363 #ifdef INET6 1364 case AF_INET6: { 1365 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1366 src->scrub->pfss_ttl = h->ip6_hlim; 1367 break; 1368 } 1369 #endif /* INET6 */ 1370 } 1371 1372 1373 /* 1374 * All normalizations below are only begun if we see the start of 1375 * the connections. They must all set an enabled bit in pfss_flags 1376 */ 1377 if ((th->th_flags & TH_SYN) == 0) 1378 return (0); 1379 1380 1381 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 1382 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 1383 /* Diddle with TCP options */ 1384 int hlen; 1385 opt = hdr + sizeof(struct tcphdr); 1386 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1387 while (hlen >= TCPOLEN_TIMESTAMP) { 1388 switch (*opt) { 1389 case TCPOPT_EOL: /* FALLTHROUGH */ 1390 case TCPOPT_NOP: 1391 opt++; 1392 hlen--; 1393 break; 1394 case TCPOPT_TIMESTAMP: 1395 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1396 src->scrub->pfss_flags |= 1397 PFSS_TIMESTAMP; 1398 src->scrub->pfss_ts_mod = 1399 htonl(arc4random()); 1400 1401 /* note PFSS_PAWS not set yet */ 1402 memcpy(&tsval, &opt[2], 1403 sizeof(u_int32_t)); 1404 memcpy(&tsecr, &opt[6], 1405 sizeof(u_int32_t)); 1406 src->scrub->pfss_tsval0 = ntohl(tsval); 1407 src->scrub->pfss_tsval = ntohl(tsval); 1408 src->scrub->pfss_tsecr = ntohl(tsecr); 1409 getmicrouptime(&src->scrub->pfss_last); 1410 } 1411 /* FALLTHROUGH */ 1412 default: 1413 hlen -= MAX(opt[1], 2); 1414 opt += MAX(opt[1], 2); 1415 break; 1416 } 1417 } 1418 } 1419 1420 return (0); 1421 } 1422 1423 void 1424 pf_normalize_tcp_cleanup(struct pf_state *state) 1425 { 1426 if (state->src.scrub) 1427 pool_put(&pf_state_scrub_pl, state->src.scrub); 1428 if (state->dst.scrub) 1429 pool_put(&pf_state_scrub_pl, state->dst.scrub); 1430 1431 /* Someday... flush the TCP segment reassembly descriptors. */ 1432 } 1433 1434 int 1435 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, 1436 u_short *reason, struct tcphdr *th, struct pf_state *state, 1437 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 1438 { 1439 struct timeval uptime; 1440 u_int32_t tsval, tsecr; 1441 u_int tsval_from_last; 1442 u_int8_t hdr[60]; 1443 u_int8_t *opt; 1444 int copyback = 0; 1445 int got_ts = 0; 1446 1447 KASSERT(src->scrub || dst->scrub); 1448 1449 /* 1450 * Enforce the minimum TTL seen for this connection. Negate a common 1451 * technique to evade an intrusion detection system and confuse 1452 * firewall state code. 1453 */ 1454 switch (pd->af) { 1455 #ifdef INET 1456 case AF_INET: { 1457 if (src->scrub) { 1458 struct ip *h = mtod(m, struct ip *); 1459 if (h->ip_ttl > src->scrub->pfss_ttl) 1460 src->scrub->pfss_ttl = h->ip_ttl; 1461 h->ip_ttl = src->scrub->pfss_ttl; 1462 } 1463 break; 1464 } 1465 #endif /* INET */ 1466 #ifdef INET6 1467 case AF_INET6: { 1468 if (src->scrub) { 1469 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1470 if (h->ip6_hlim > src->scrub->pfss_ttl) 1471 src->scrub->pfss_ttl = h->ip6_hlim; 1472 h->ip6_hlim = src->scrub->pfss_ttl; 1473 } 1474 break; 1475 } 1476 #endif /* INET6 */ 1477 } 1478 1479 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1480 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1481 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1482 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 1483 /* Diddle with TCP options */ 1484 int hlen; 1485 opt = hdr + sizeof(struct tcphdr); 1486 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1487 while (hlen >= TCPOLEN_TIMESTAMP) { 1488 switch (*opt) { 1489 case TCPOPT_EOL: /* FALLTHROUGH */ 1490 case TCPOPT_NOP: 1491 opt++; 1492 hlen--; 1493 break; 1494 case TCPOPT_TIMESTAMP: 1495 /* Modulate the timestamps. Can be used for 1496 * NAT detection, OS uptime determination or 1497 * reboot detection. 1498 */ 1499 1500 if (got_ts) { 1501 /* Huh? Multiple timestamps!? */ 1502 if (pf_status.debug >= PF_DEBUG_MISC) { 1503 DPFPRINTF(("multiple TS??")); 1504 pf_print_state(state); 1505 printf("\n"); 1506 } 1507 REASON_SET(reason, PFRES_TS); 1508 return (PF_DROP); 1509 } 1510 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1511 memcpy(&tsval, &opt[2], 1512 sizeof(u_int32_t)); 1513 if (tsval && src->scrub && 1514 (src->scrub->pfss_flags & 1515 PFSS_TIMESTAMP)) { 1516 tsval = ntohl(tsval); 1517 pf_change_a(&opt[2], 1518 &th->th_sum, 1519 htonl(tsval + 1520 src->scrub->pfss_ts_mod), 1521 0); 1522 copyback = 1; 1523 } 1524 1525 /* Modulate TS reply iff valid (!0) */ 1526 memcpy(&tsecr, &opt[6], 1527 sizeof(u_int32_t)); 1528 if (tsecr && dst->scrub && 1529 (dst->scrub->pfss_flags & 1530 PFSS_TIMESTAMP)) { 1531 tsecr = ntohl(tsecr) 1532 - dst->scrub->pfss_ts_mod; 1533 pf_change_a(&opt[6], 1534 &th->th_sum, htonl(tsecr), 1535 0); 1536 copyback = 1; 1537 } 1538 got_ts = 1; 1539 } 1540 /* FALLTHROUGH */ 1541 default: 1542 hlen -= MAX(opt[1], 2); 1543 opt += MAX(opt[1], 2); 1544 break; 1545 } 1546 } 1547 if (copyback) { 1548 /* Copyback the options, caller copys back header */ 1549 *writeback = 1; 1550 m_copyback(m, off + sizeof(struct tcphdr), 1551 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1552 sizeof(struct tcphdr)); 1553 } 1554 } 1555 1556 1557 /* 1558 * Must invalidate PAWS checks on connections idle for too long. 1559 * The fastest allowed timestamp clock is 1ms. That turns out to 1560 * be about 24 days before it wraps. XXX Right now our lowerbound 1561 * TS echo check only works for the first 12 days of a connection 1562 * when the TS has exhausted half its 32bit space 1563 */ 1564 #define TS_MAX_IDLE (24*24*60*60) 1565 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1566 1567 getmicrouptime(&uptime); 1568 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1569 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1570 time_second - state->creation > TS_MAX_CONN)) { 1571 if (pf_status.debug >= PF_DEBUG_MISC) { 1572 DPFPRINTF(("src idled out of PAWS\n")); 1573 pf_print_state(state); 1574 printf("\n"); 1575 } 1576 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1577 | PFSS_PAWS_IDLED; 1578 } 1579 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1580 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1581 if (pf_status.debug >= PF_DEBUG_MISC) { 1582 DPFPRINTF(("dst idled out of PAWS\n")); 1583 pf_print_state(state); 1584 printf("\n"); 1585 } 1586 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1587 | PFSS_PAWS_IDLED; 1588 } 1589 1590 if (got_ts && src->scrub && dst->scrub && 1591 (src->scrub->pfss_flags & PFSS_PAWS) && 1592 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1593 /* Validate that the timestamps are "in-window". 1594 * RFC1323 describes TCP Timestamp options that allow 1595 * measurement of RTT (round trip time) and PAWS 1596 * (protection against wrapped sequence numbers). PAWS 1597 * gives us a set of rules for rejecting packets on 1598 * long fat pipes (packets that were somehow delayed 1599 * in transit longer than the time it took to send the 1600 * full TCP sequence space of 4Gb). We can use these 1601 * rules and infer a few others that will let us treat 1602 * the 32bit timestamp and the 32bit echoed timestamp 1603 * as sequence numbers to prevent a blind attacker from 1604 * inserting packets into a connection. 1605 * 1606 * RFC1323 tells us: 1607 * - The timestamp on this packet must be greater than 1608 * or equal to the last value echoed by the other 1609 * endpoint. The RFC says those will be discarded 1610 * since it is a dup that has already been acked. 1611 * This gives us a lowerbound on the timestamp. 1612 * timestamp >= other last echoed timestamp 1613 * - The timestamp will be less than or equal to 1614 * the last timestamp plus the time between the 1615 * last packet and now. The RFC defines the max 1616 * clock rate as 1ms. We will allow clocks to be 1617 * up to 10% fast and will allow a total difference 1618 * or 30 seconds due to a route change. And this 1619 * gives us an upperbound on the timestamp. 1620 * timestamp <= last timestamp + max ticks 1621 * We have to be careful here. Windows will send an 1622 * initial timestamp of zero and then initialize it 1623 * to a random value after the 3whs; presumably to 1624 * avoid a DoS by having to call an expensive RNG 1625 * during a SYN flood. Proof MS has at least one 1626 * good security geek. 1627 * 1628 * - The TCP timestamp option must also echo the other 1629 * endpoints timestamp. The timestamp echoed is the 1630 * one carried on the earliest unacknowledged segment 1631 * on the left edge of the sequence window. The RFC 1632 * states that the host will reject any echoed 1633 * timestamps that were larger than any ever sent. 1634 * This gives us an upperbound on the TS echo. 1635 * tescr <= largest_tsval 1636 * - The lowerbound on the TS echo is a little more 1637 * tricky to determine. The other endpoint's echoed 1638 * values will not decrease. But there may be 1639 * network conditions that re-order packets and 1640 * cause our view of them to decrease. For now the 1641 * only lowerbound we can safely determine is that 1642 * the TS echo will never be less than the orginal 1643 * TS. XXX There is probably a better lowerbound. 1644 * Remove TS_MAX_CONN with better lowerbound check. 1645 * tescr >= other original TS 1646 * 1647 * It is also important to note that the fastest 1648 * timestamp clock of 1ms will wrap its 32bit space in 1649 * 24 days. So we just disable TS checking after 24 1650 * days of idle time. We actually must use a 12d 1651 * connection limit until we can come up with a better 1652 * lowerbound to the TS echo check. 1653 */ 1654 struct timeval delta_ts; 1655 int ts_fudge; 1656 1657 1658 /* 1659 * PFTM_TS_DIFF is how many seconds of leeway to allow 1660 * a host's timestamp. This can happen if the previous 1661 * packet got delayed in transit for much longer than 1662 * this packet. 1663 */ 1664 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1665 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1666 1667 1668 /* Calculate max ticks since the last timestamp */ 1669 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1670 #define TS_MICROSECS 1000000 /* microseconds per second */ 1671 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1672 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1673 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1674 1675 1676 if ((src->state >= TCPS_ESTABLISHED && 1677 dst->state >= TCPS_ESTABLISHED) && 1678 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1679 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1680 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1681 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1682 /* Bad RFC1323 implementation or an insertion attack. 1683 * 1684 * - Solaris 2.6 and 2.7 are known to send another ACK 1685 * after the FIN,FIN|ACK,ACK closing that carries 1686 * an old timestamp. 1687 */ 1688 1689 DPFPRINTF(("Timestamp failed %c%c%c%c\n", 1690 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1691 SEQ_GT(tsval, src->scrub->pfss_tsval + 1692 tsval_from_last) ? '1' : ' ', 1693 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1694 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); 1695 DPFPRINTF((" tsval: %" PRIu32 " tsecr: %" PRIu32 1696 " +ticks: %" PRIu32 " idle: %lus %lums\n", 1697 tsval, tsecr, tsval_from_last, delta_ts.tv_sec, 1698 delta_ts.tv_usec / 1000)); 1699 DPFPRINTF((" src->tsval: %" PRIu32 " tsecr: %" PRIu32 1700 "\n", 1701 src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); 1702 DPFPRINTF((" dst->tsval: %" PRIu32 " tsecr: %" PRIu32 1703 " tsval0: %" PRIu32 "\n", 1704 dst->scrub->pfss_tsval, 1705 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); 1706 if (pf_status.debug >= PF_DEBUG_MISC) { 1707 pf_print_state(state); 1708 pf_print_flags(th->th_flags); 1709 printf("\n"); 1710 } 1711 REASON_SET(reason, PFRES_TS); 1712 return (PF_DROP); 1713 } 1714 1715 /* XXX I'd really like to require tsecr but it's optional */ 1716 1717 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1718 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1719 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1720 src->scrub && dst->scrub && 1721 (src->scrub->pfss_flags & PFSS_PAWS) && 1722 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1723 /* Didn't send a timestamp. Timestamps aren't really useful 1724 * when: 1725 * - connection opening or closing (often not even sent). 1726 * but we must not let an attacker to put a FIN on a 1727 * data packet to sneak it through our ESTABLISHED check. 1728 * - on a TCP reset. RFC suggests not even looking at TS. 1729 * - on an empty ACK. The TS will not be echoed so it will 1730 * probably not help keep the RTT calculation in sync and 1731 * there isn't as much danger when the sequence numbers 1732 * got wrapped. So some stacks don't include TS on empty 1733 * ACKs :-( 1734 * 1735 * To minimize the disruption to mostly RFC1323 conformant 1736 * stacks, we will only require timestamps on data packets. 1737 * 1738 * And what do ya know, we cannot require timestamps on data 1739 * packets. There appear to be devices that do legitimate 1740 * TCP connection hijacking. There are HTTP devices that allow 1741 * a 3whs (with timestamps) and then buffer the HTTP request. 1742 * If the intermediate device has the HTTP response cache, it 1743 * will spoof the response but not bother timestamping its 1744 * packets. So we can look for the presence of a timestamp in 1745 * the first data packet and if there, require it in all future 1746 * packets. 1747 */ 1748 1749 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1750 /* 1751 * Hey! Someone tried to sneak a packet in. Or the 1752 * stack changed its RFC1323 behavior?!?! 1753 */ 1754 if (pf_status.debug >= PF_DEBUG_MISC) { 1755 DPFPRINTF(("Did not receive expected RFC1323 " 1756 "timestamp\n")); 1757 pf_print_state(state); 1758 pf_print_flags(th->th_flags); 1759 printf("\n"); 1760 } 1761 REASON_SET(reason, PFRES_TS); 1762 return (PF_DROP); 1763 } 1764 } 1765 1766 1767 /* 1768 * We will note if a host sends his data packets with or without 1769 * timestamps. And require all data packets to contain a timestamp 1770 * if the first does. PAWS implicitly requires that all data packets be 1771 * timestamped. But I think there are middle-man devices that hijack 1772 * TCP streams immedietly after the 3whs and don't timestamp their 1773 * packets (seen in a WWW accelerator or cache). 1774 */ 1775 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1776 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1777 if (got_ts) 1778 src->scrub->pfss_flags |= PFSS_DATA_TS; 1779 else { 1780 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1781 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && 1782 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1783 /* Don't warn if other host rejected RFC1323 */ 1784 DPFPRINTF(("Broken RFC1323 stack did not " 1785 "timestamp data packet. Disabled PAWS " 1786 "security.\n")); 1787 pf_print_state(state); 1788 pf_print_flags(th->th_flags); 1789 printf("\n"); 1790 } 1791 } 1792 } 1793 1794 1795 /* 1796 * Update PAWS values 1797 */ 1798 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1799 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1800 getmicrouptime(&src->scrub->pfss_last); 1801 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1802 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1803 src->scrub->pfss_tsval = tsval; 1804 1805 if (tsecr) { 1806 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1807 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1808 src->scrub->pfss_tsecr = tsecr; 1809 1810 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1811 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1812 src->scrub->pfss_tsval0 == 0)) { 1813 /* tsval0 MUST be the lowest timestamp */ 1814 src->scrub->pfss_tsval0 = tsval; 1815 } 1816 1817 /* Only fully initialized after a TS gets echoed */ 1818 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1819 src->scrub->pfss_flags |= PFSS_PAWS; 1820 } 1821 } 1822 1823 /* I have a dream.... TCP segment reassembly.... */ 1824 return (0); 1825 } 1826 1827 int 1828 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, 1829 int off) 1830 { 1831 u_int16_t *mss; 1832 int thoff; 1833 int opt, cnt, optlen = 0; 1834 int rewrite = 0; 1835 u_char *optp; 1836 1837 thoff = th->th_off << 2; 1838 cnt = thoff - sizeof(struct tcphdr); 1839 optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr); 1840 1841 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1842 opt = optp[0]; 1843 if (opt == TCPOPT_EOL) 1844 break; 1845 if (opt == TCPOPT_NOP) 1846 optlen = 1; 1847 else { 1848 if (cnt < 2) 1849 break; 1850 optlen = optp[1]; 1851 if (optlen < 2 || optlen > cnt) 1852 break; 1853 } 1854 switch (opt) { 1855 case TCPOPT_MAXSEG: 1856 mss = (u_int16_t *)(optp + 2); 1857 if ((ntohs(*mss)) > r->max_mss) { 1858 th->th_sum = pf_cksum_fixup(th->th_sum, 1859 *mss, htons(r->max_mss), 0); 1860 *mss = htons(r->max_mss); 1861 rewrite = 1; 1862 } 1863 break; 1864 default: 1865 break; 1866 } 1867 } 1868 1869 return (rewrite); 1870 } 1871