1 /* $NetBSD: pf_norm.c,v 1.16 2007/03/12 18:18:31 ad Exp $ */ 2 /* $OpenBSD: pf_norm.c,v 1.97 2004/09/21 16:59:12 aaron Exp $ */ 3 4 /* 5 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #ifdef _KERNEL_OPT 30 #include "opt_inet.h" 31 #endif 32 33 #include "pflog.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/mbuf.h> 38 #include <sys/filio.h> 39 #include <sys/fcntl.h> 40 #include <sys/socket.h> 41 #include <sys/kernel.h> 42 #include <sys/time.h> 43 #include <sys/pool.h> 44 45 #ifdef __OpenBSD__ 46 #include <dev/rndvar.h> 47 #else 48 #include <sys/rnd.h> 49 #endif 50 #include <net/if.h> 51 #include <net/if_types.h> 52 #include <net/bpf.h> 53 #include <net/route.h> 54 #include <net/if_pflog.h> 55 56 #include <netinet/in.h> 57 #include <netinet/in_var.h> 58 #include <netinet/in_systm.h> 59 #include <netinet/ip.h> 60 #include <netinet/ip_var.h> 61 #include <netinet/tcp.h> 62 #include <netinet/tcp_seq.h> 63 #include <netinet/udp.h> 64 #include <netinet/ip_icmp.h> 65 66 #ifdef INET6 67 #include <netinet/ip6.h> 68 #endif /* INET6 */ 69 70 #include <net/pfvar.h> 71 72 struct pf_frent { 73 LIST_ENTRY(pf_frent) fr_next; 74 struct ip *fr_ip; 75 struct mbuf *fr_m; 76 }; 77 78 struct pf_frcache { 79 LIST_ENTRY(pf_frcache) fr_next; 80 uint16_t fr_off; 81 uint16_t fr_end; 82 }; 83 84 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ 85 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ 86 #define PFFRAG_DROP 0x0004 /* Drop all fragments */ 87 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) 88 89 struct pf_fragment { 90 RB_ENTRY(pf_fragment) fr_entry; 91 TAILQ_ENTRY(pf_fragment) frag_next; 92 struct in_addr fr_src; 93 struct in_addr fr_dst; 94 u_int8_t fr_p; /* protocol of this fragment */ 95 u_int8_t fr_flags; /* status flags */ 96 u_int16_t fr_id; /* fragment id for reassemble */ 97 u_int16_t fr_max; /* fragment data max */ 98 u_int32_t fr_timeout; 99 #define fr_queue fr_u.fru_queue 100 #define fr_cache fr_u.fru_cache 101 union { 102 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ 103 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ 104 } fr_u; 105 }; 106 107 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 108 TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; 109 110 static __inline int pf_frag_compare(struct pf_fragment *, 111 struct pf_fragment *); 112 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 113 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 114 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 115 116 /* Private prototypes */ 117 void pf_ip2key(struct pf_fragment *, struct ip *); 118 void pf_remove_fragment(struct pf_fragment *); 119 void pf_flush_fragments(void); 120 void pf_free_fragment(struct pf_fragment *); 121 struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); 122 struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, 123 struct pf_frent *, int); 124 struct mbuf *pf_fragcache(struct mbuf **, struct ip*, 125 struct pf_fragment **, int, int, int *); 126 int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, 127 struct tcphdr *, int); 128 129 #define DPFPRINTF(x) do { \ 130 if (pf_status.debug >= PF_DEBUG_MISC) { \ 131 printf("%s: ", __func__); \ 132 printf x ; \ 133 } \ 134 } while(0) 135 136 /* Globals */ 137 struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; 138 struct pool pf_state_scrub_pl; 139 int pf_nfrents, pf_ncache; 140 141 void 142 pf_normalize_init(void) 143 { 144 #ifdef __NetBSD__ 145 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 146 NULL, IPL_SOFTNET); 147 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 148 NULL, IPL_SOFTNET); 149 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, 150 "pffrcache", NULL, IPL_SOFTNET); 151 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", 152 NULL, IPL_SOFTNET); 153 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 154 "pfstscr", NULL, IPL_SOFTNET); 155 #else 156 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 157 NULL); 158 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 159 NULL); 160 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, 161 "pffrcache", NULL); 162 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", 163 NULL); 164 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 165 "pfstscr", NULL); 166 #endif 167 168 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 169 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 170 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); 171 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); 172 173 TAILQ_INIT(&pf_fragqueue); 174 TAILQ_INIT(&pf_cachequeue); 175 } 176 177 #ifdef _LKM 178 void 179 pf_normalize_destroy(void) 180 { 181 pool_destroy(&pf_state_scrub_pl); 182 pool_destroy(&pf_cent_pl); 183 pool_destroy(&pf_cache_pl); 184 pool_destroy(&pf_frag_pl); 185 pool_destroy(&pf_frent_pl); 186 } 187 #endif 188 189 static __inline int 190 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 191 { 192 int diff; 193 194 if ((diff = a->fr_id - b->fr_id)) 195 return (diff); 196 else if ((diff = a->fr_p - b->fr_p)) 197 return (diff); 198 else if (a->fr_src.s_addr < b->fr_src.s_addr) 199 return (-1); 200 else if (a->fr_src.s_addr > b->fr_src.s_addr) 201 return (1); 202 else if (a->fr_dst.s_addr < b->fr_dst.s_addr) 203 return (-1); 204 else if (a->fr_dst.s_addr > b->fr_dst.s_addr) 205 return (1); 206 return (0); 207 } 208 209 void 210 pf_purge_expired_fragments(void) 211 { 212 struct pf_fragment *frag; 213 u_int32_t expire = time_second - 214 pf_default_rule.timeout[PFTM_FRAG]; 215 216 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 217 KASSERT(BUFFER_FRAGMENTS(frag)); 218 if (frag->fr_timeout > expire) 219 break; 220 221 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 222 pf_free_fragment(frag); 223 } 224 225 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { 226 KASSERT(!BUFFER_FRAGMENTS(frag)); 227 if (frag->fr_timeout > expire) 228 break; 229 230 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 231 pf_free_fragment(frag); 232 KASSERT(TAILQ_EMPTY(&pf_cachequeue) || 233 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); 234 } 235 } 236 237 /* 238 * Try to flush old fragments to make space for new ones 239 */ 240 241 void 242 pf_flush_fragments(void) 243 { 244 struct pf_fragment *frag; 245 int goal; 246 247 goal = pf_nfrents * 9 / 10; 248 DPFPRINTF(("trying to free > %d frents\n", 249 pf_nfrents - goal)); 250 while (goal < pf_nfrents) { 251 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 252 if (frag == NULL) 253 break; 254 pf_free_fragment(frag); 255 } 256 257 258 goal = pf_ncache * 9 / 10; 259 DPFPRINTF(("trying to free > %d cache entries\n", 260 pf_ncache - goal)); 261 while (goal < pf_ncache) { 262 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); 263 if (frag == NULL) 264 break; 265 pf_free_fragment(frag); 266 } 267 } 268 269 /* Frees the fragments and all associated entries */ 270 271 void 272 pf_free_fragment(struct pf_fragment *frag) 273 { 274 struct pf_frent *frent; 275 struct pf_frcache *frcache; 276 277 /* Free all fragments */ 278 if (BUFFER_FRAGMENTS(frag)) { 279 for (frent = LIST_FIRST(&frag->fr_queue); frent; 280 frent = LIST_FIRST(&frag->fr_queue)) { 281 LIST_REMOVE(frent, fr_next); 282 283 m_freem(frent->fr_m); 284 pool_put(&pf_frent_pl, frent); 285 pf_nfrents--; 286 } 287 } else { 288 for (frcache = LIST_FIRST(&frag->fr_cache); frcache; 289 frcache = LIST_FIRST(&frag->fr_cache)) { 290 LIST_REMOVE(frcache, fr_next); 291 292 KASSERT(LIST_EMPTY(&frag->fr_cache) || 293 LIST_FIRST(&frag->fr_cache)->fr_off > 294 frcache->fr_end); 295 296 pool_put(&pf_cent_pl, frcache); 297 pf_ncache--; 298 } 299 } 300 301 pf_remove_fragment(frag); 302 } 303 304 void 305 pf_ip2key(struct pf_fragment *key, struct ip *ip) 306 { 307 key->fr_p = ip->ip_p; 308 key->fr_id = ip->ip_id; 309 key->fr_src.s_addr = ip->ip_src.s_addr; 310 key->fr_dst.s_addr = ip->ip_dst.s_addr; 311 } 312 313 struct pf_fragment * 314 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) 315 { 316 struct pf_fragment key; 317 struct pf_fragment *frag; 318 319 pf_ip2key(&key, ip); 320 321 frag = RB_FIND(pf_frag_tree, tree, &key); 322 if (frag != NULL) { 323 /* XXX Are we sure we want to update the timeout? */ 324 frag->fr_timeout = time_second; 325 if (BUFFER_FRAGMENTS(frag)) { 326 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 327 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 328 } else { 329 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 330 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); 331 } 332 } 333 334 return (frag); 335 } 336 337 /* Removes a fragment from the fragment queue and frees the fragment */ 338 339 void 340 pf_remove_fragment(struct pf_fragment *frag) 341 { 342 if (BUFFER_FRAGMENTS(frag)) { 343 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 344 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 345 pool_put(&pf_frag_pl, frag); 346 } else { 347 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); 348 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 349 pool_put(&pf_cache_pl, frag); 350 } 351 } 352 353 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) 354 struct mbuf * 355 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, 356 struct pf_frent *frent, int mff) 357 { 358 struct mbuf *m = *m0, *m2; 359 struct pf_frent *frea, *next; 360 struct pf_frent *frep = NULL; 361 struct ip *ip = frent->fr_ip; 362 int hlen = ip->ip_hl << 2; 363 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 364 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; 365 u_int16_t max = ip_len + off; 366 367 KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag)); 368 369 /* Strip off ip header */ 370 m->m_data += hlen; 371 m->m_len -= hlen; 372 373 /* Create a new reassembly queue for this packet */ 374 if (*frag == NULL) { 375 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 376 if (*frag == NULL) { 377 pf_flush_fragments(); 378 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 379 if (*frag == NULL) 380 goto drop_fragment; 381 } 382 383 (*frag)->fr_flags = 0; 384 (*frag)->fr_max = 0; 385 (*frag)->fr_src = frent->fr_ip->ip_src; 386 (*frag)->fr_dst = frent->fr_ip->ip_dst; 387 (*frag)->fr_p = frent->fr_ip->ip_p; 388 (*frag)->fr_id = frent->fr_ip->ip_id; 389 (*frag)->fr_timeout = time_second; 390 LIST_INIT(&(*frag)->fr_queue); 391 392 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); 393 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); 394 395 /* We do not have a previous fragment */ 396 frep = NULL; 397 goto insert; 398 } 399 400 /* 401 * Find a fragment after the current one: 402 * - off contains the real shifted offset. 403 */ 404 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 405 if (FR_IP_OFF(frea) > off) 406 break; 407 frep = frea; 408 } 409 410 KASSERT(frep != NULL || frea != NULL); 411 412 if (frep != NULL && 413 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 414 4 > off) 415 { 416 u_int16_t precut; 417 418 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - 419 frep->fr_ip->ip_hl * 4 - off; 420 if (precut >= ip_len) 421 goto drop_fragment; 422 m_adj(frent->fr_m, precut); 423 DPFPRINTF(("overlap -%d\n", precut)); 424 /* Enforce 8 byte boundaries */ 425 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); 426 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 427 ip_len -= precut; 428 ip->ip_len = htons(ip_len); 429 } 430 431 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); 432 frea = next) 433 { 434 u_int16_t aftercut; 435 436 aftercut = ip_len + off - FR_IP_OFF(frea); 437 DPFPRINTF(("adjust overlap %d\n", aftercut)); 438 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl 439 * 4) 440 { 441 frea->fr_ip->ip_len = 442 htons(ntohs(frea->fr_ip->ip_len) - aftercut); 443 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + 444 (aftercut >> 3)); 445 m_adj(frea->fr_m, aftercut); 446 break; 447 } 448 449 /* This fragment is completely overlapped, loose it */ 450 next = LIST_NEXT(frea, fr_next); 451 m_freem(frea->fr_m); 452 LIST_REMOVE(frea, fr_next); 453 pool_put(&pf_frent_pl, frea); 454 pf_nfrents--; 455 } 456 457 insert: 458 /* Update maximum data size */ 459 if ((*frag)->fr_max < max) 460 (*frag)->fr_max = max; 461 /* This is the last segment */ 462 if (!mff) 463 (*frag)->fr_flags |= PFFRAG_SEENLAST; 464 465 if (frep == NULL) 466 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 467 else 468 LIST_INSERT_AFTER(frep, frent, fr_next); 469 470 /* Check if we are completely reassembled */ 471 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 472 return (NULL); 473 474 /* Check if we have all the data */ 475 off = 0; 476 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 477 next = LIST_NEXT(frep, fr_next); 478 479 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; 480 if (off < (*frag)->fr_max && 481 (next == NULL || FR_IP_OFF(next) != off)) 482 { 483 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 484 off, next == NULL ? -1 : FR_IP_OFF(next), 485 (*frag)->fr_max)); 486 return (NULL); 487 } 488 } 489 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 490 if (off < (*frag)->fr_max) 491 return (NULL); 492 493 /* We have all the data */ 494 frent = LIST_FIRST(&(*frag)->fr_queue); 495 KASSERT(frent != NULL); 496 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { 497 DPFPRINTF(("drop: too big: %d\n", off)); 498 pf_free_fragment(*frag); 499 *frag = NULL; 500 return (NULL); 501 } 502 next = LIST_NEXT(frent, fr_next); 503 504 /* Magic from ip_input */ 505 ip = frent->fr_ip; 506 m = frent->fr_m; 507 m2 = m->m_next; 508 m->m_next = NULL; 509 m_cat(m, m2); 510 pool_put(&pf_frent_pl, frent); 511 pf_nfrents--; 512 for (frent = next; frent != NULL; frent = next) { 513 next = LIST_NEXT(frent, fr_next); 514 515 m2 = frent->fr_m; 516 pool_put(&pf_frent_pl, frent); 517 pf_nfrents--; 518 m_cat(m, m2); 519 } 520 521 ip->ip_src = (*frag)->fr_src; 522 ip->ip_dst = (*frag)->fr_dst; 523 524 /* Remove from fragment queue */ 525 pf_remove_fragment(*frag); 526 *frag = NULL; 527 528 hlen = ip->ip_hl << 2; 529 ip->ip_len = htons(off + hlen); 530 m->m_len += hlen; 531 m->m_data -= hlen; 532 533 /* some debugging cruft by sklower, below, will go away soon */ 534 /* XXX this should be done elsewhere */ 535 if (m->m_flags & M_PKTHDR) { 536 int plen = 0; 537 for (m2 = m; m2; m2 = m2->m_next) 538 plen += m2->m_len; 539 m->m_pkthdr.len = plen; 540 #if defined(__NetBSD__) 541 m->m_pkthdr.csum_flags = 0; 542 #endif /* defined(__NetBSD__) */ 543 } 544 545 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); 546 return (m); 547 548 drop_fragment: 549 /* Oops - fail safe - drop packet */ 550 pool_put(&pf_frent_pl, frent); 551 pf_nfrents--; 552 m_freem(m); 553 return (NULL); 554 } 555 556 struct mbuf * 557 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, 558 int drop, int *nomem) 559 { 560 struct mbuf *m = *m0; 561 struct pf_frcache *frp, *fra, *cur = NULL; 562 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); 563 u_int16_t off = ntohs(h->ip_off) << 3; 564 u_int16_t max = ip_len + off; 565 int hosed = 0; 566 567 KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); 568 569 /* Create a new range queue for this packet */ 570 if (*frag == NULL) { 571 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 572 if (*frag == NULL) { 573 pf_flush_fragments(); 574 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 575 if (*frag == NULL) 576 goto no_mem; 577 } 578 579 /* Get an entry for the queue */ 580 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 581 if (cur == NULL) { 582 pool_put(&pf_cache_pl, *frag); 583 *frag = NULL; 584 goto no_mem; 585 } 586 pf_ncache++; 587 588 (*frag)->fr_flags = PFFRAG_NOBUFFER; 589 (*frag)->fr_max = 0; 590 (*frag)->fr_src = h->ip_src; 591 (*frag)->fr_dst = h->ip_dst; 592 (*frag)->fr_p = h->ip_p; 593 (*frag)->fr_id = h->ip_id; 594 (*frag)->fr_timeout = time_second; 595 596 cur->fr_off = off; 597 cur->fr_end = max; 598 LIST_INIT(&(*frag)->fr_cache); 599 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); 600 601 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); 602 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); 603 604 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); 605 606 goto pass; 607 } 608 609 /* 610 * Find a fragment after the current one: 611 * - off contains the real shifted offset. 612 */ 613 frp = NULL; 614 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { 615 if (fra->fr_off > off) 616 break; 617 frp = fra; 618 } 619 620 KASSERT(frp != NULL || fra != NULL); 621 622 if (frp != NULL) { 623 int precut; 624 625 precut = frp->fr_end - off; 626 if (precut >= ip_len) { 627 /* Fragment is entirely a duplicate */ 628 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", 629 h->ip_id, frp->fr_off, frp->fr_end, off, max)); 630 goto drop_fragment; 631 } 632 if (precut == 0) { 633 /* They are adjacent. Fixup cache entry */ 634 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", 635 h->ip_id, frp->fr_off, frp->fr_end, off, max)); 636 frp->fr_end = max; 637 } else if (precut > 0) { 638 /* The first part of this payload overlaps with a 639 * fragment that has already been passed. 640 * Need to trim off the first part of the payload. 641 * But to do so easily, we need to create another 642 * mbuf to throw the original header into. 643 */ 644 645 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", 646 h->ip_id, precut, frp->fr_off, frp->fr_end, off, 647 max)); 648 649 off += precut; 650 max -= precut; 651 /* Update the previous frag to encompass this one */ 652 frp->fr_end = max; 653 654 if (!drop) { 655 /* XXX Optimization opportunity 656 * This is a very heavy way to trim the payload. 657 * we could do it much faster by diddling mbuf 658 * internals but that would be even less legible 659 * than this mbuf magic. For my next trick, 660 * I'll pull a rabbit out of my laptop. 661 */ 662 *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT); 663 if (*m0 == NULL) 664 goto no_mem; 665 KASSERT((*m0)->m_next == NULL); 666 m_adj(m, precut + (h->ip_hl << 2)); 667 m_cat(*m0, m); 668 m = *m0; 669 if (m->m_flags & M_PKTHDR) { 670 int plen = 0; 671 struct mbuf *t; 672 for (t = m; t; t = t->m_next) 673 plen += t->m_len; 674 m->m_pkthdr.len = plen; 675 } 676 677 678 h = mtod(m, struct ip *); 679 680 681 KASSERT((int)m->m_len == 682 ntohs(h->ip_len) - precut); 683 h->ip_off = htons(ntohs(h->ip_off) + 684 (precut >> 3)); 685 h->ip_len = htons(ntohs(h->ip_len) - precut); 686 } else { 687 hosed++; 688 } 689 } else { 690 /* There is a gap between fragments */ 691 692 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", 693 h->ip_id, -precut, frp->fr_off, frp->fr_end, off, 694 max)); 695 696 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 697 if (cur == NULL) 698 goto no_mem; 699 pf_ncache++; 700 701 cur->fr_off = off; 702 cur->fr_end = max; 703 LIST_INSERT_AFTER(frp, cur, fr_next); 704 } 705 } 706 707 if (fra != NULL) { 708 int aftercut; 709 int merge = 0; 710 711 aftercut = max - fra->fr_off; 712 if (aftercut == 0) { 713 /* Adjacent fragments */ 714 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", 715 h->ip_id, off, max, fra->fr_off, fra->fr_end)); 716 fra->fr_off = off; 717 merge = 1; 718 } else if (aftercut > 0) { 719 /* Need to chop off the tail of this fragment */ 720 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", 721 h->ip_id, aftercut, off, max, fra->fr_off, 722 fra->fr_end)); 723 fra->fr_off = off; 724 max -= aftercut; 725 726 merge = 1; 727 728 if (!drop) { 729 m_adj(m, -aftercut); 730 if (m->m_flags & M_PKTHDR) { 731 int plen = 0; 732 struct mbuf *t; 733 for (t = m; t; t = t->m_next) 734 plen += t->m_len; 735 m->m_pkthdr.len = plen; 736 } 737 h = mtod(m, struct ip *); 738 KASSERT((int)m->m_len == 739 ntohs(h->ip_len) - aftercut); 740 h->ip_len = htons(ntohs(h->ip_len) - aftercut); 741 } else { 742 hosed++; 743 } 744 } else if (frp == NULL) { 745 /* There is a gap between fragments */ 746 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", 747 h->ip_id, -aftercut, off, max, fra->fr_off, 748 fra->fr_end)); 749 750 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 751 if (cur == NULL) 752 goto no_mem; 753 pf_ncache++; 754 755 cur->fr_off = off; 756 cur->fr_end = max; 757 LIST_INSERT_BEFORE(fra, cur, fr_next); 758 } 759 760 761 /* Need to glue together two separate fragment descriptors */ 762 if (merge) { 763 if (cur && fra->fr_off <= cur->fr_end) { 764 /* Need to merge in a previous 'cur' */ 765 DPFPRINTF(("fragcache[%d]: adjacent(merge " 766 "%d-%d) %d-%d (%d-%d)\n", 767 h->ip_id, cur->fr_off, cur->fr_end, off, 768 max, fra->fr_off, fra->fr_end)); 769 fra->fr_off = cur->fr_off; 770 LIST_REMOVE(cur, fr_next); 771 pool_put(&pf_cent_pl, cur); 772 pf_ncache--; 773 cur = NULL; 774 775 } else if (frp && fra->fr_off <= frp->fr_end) { 776 /* Need to merge in a modified 'frp' */ 777 KASSERT(cur == NULL); 778 DPFPRINTF(("fragcache[%d]: adjacent(merge " 779 "%d-%d) %d-%d (%d-%d)\n", 780 h->ip_id, frp->fr_off, frp->fr_end, off, 781 max, fra->fr_off, fra->fr_end)); 782 fra->fr_off = frp->fr_off; 783 LIST_REMOVE(frp, fr_next); 784 pool_put(&pf_cent_pl, frp); 785 pf_ncache--; 786 frp = NULL; 787 788 } 789 } 790 } 791 792 if (hosed) { 793 /* 794 * We must keep tracking the overall fragment even when 795 * we're going to drop it anyway so that we know when to 796 * free the overall descriptor. Thus we drop the frag late. 797 */ 798 goto drop_fragment; 799 } 800 801 802 pass: 803 /* Update maximum data size */ 804 if ((*frag)->fr_max < max) 805 (*frag)->fr_max = max; 806 807 /* This is the last segment */ 808 if (!mff) 809 (*frag)->fr_flags |= PFFRAG_SEENLAST; 810 811 /* Check if we are completely reassembled */ 812 if (((*frag)->fr_flags & PFFRAG_SEENLAST) && 813 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && 814 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { 815 /* Remove from fragment queue */ 816 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, 817 (*frag)->fr_max)); 818 pf_free_fragment(*frag); 819 *frag = NULL; 820 } 821 822 return (m); 823 824 no_mem: 825 *nomem = 1; 826 827 /* Still need to pay attention to !IP_MF */ 828 if (!mff && *frag != NULL) 829 (*frag)->fr_flags |= PFFRAG_SEENLAST; 830 831 m_freem(m); 832 return (NULL); 833 834 drop_fragment: 835 836 /* Still need to pay attention to !IP_MF */ 837 if (!mff && *frag != NULL) 838 (*frag)->fr_flags |= PFFRAG_SEENLAST; 839 840 if (drop) { 841 /* This fragment has been deemed bad. Don't reass */ 842 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) 843 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", 844 h->ip_id)); 845 (*frag)->fr_flags |= PFFRAG_DROP; 846 } 847 848 m_freem(m); 849 return (NULL); 850 } 851 852 int 853 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, 854 struct pf_pdesc *pd) 855 { 856 struct mbuf *m = *m0; 857 struct pf_rule *r; 858 struct pf_frent *frent; 859 struct pf_fragment *frag = NULL; 860 struct ip *h = mtod(m, struct ip *); 861 int mff = (ntohs(h->ip_off) & IP_MF); 862 int hlen = h->ip_hl << 2; 863 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 864 u_int16_t max; 865 int ip_len; 866 int ip_off; 867 868 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 869 while (r != NULL) { 870 r->evaluations++; 871 if (r->kif != NULL && 872 (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) 873 r = r->skip[PF_SKIP_IFP].ptr; 874 else if (r->direction && r->direction != dir) 875 r = r->skip[PF_SKIP_DIR].ptr; 876 else if (r->af && r->af != AF_INET) 877 r = r->skip[PF_SKIP_AF].ptr; 878 else if (r->proto && r->proto != h->ip_p) 879 r = r->skip[PF_SKIP_PROTO].ptr; 880 else if (PF_MISMATCHAW(&r->src.addr, 881 (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.neg)) 882 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 883 else if (PF_MISMATCHAW(&r->dst.addr, 884 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg)) 885 r = r->skip[PF_SKIP_DST_ADDR].ptr; 886 else 887 break; 888 } 889 890 if (r == NULL) 891 return (PF_PASS); 892 else 893 r->packets++; 894 895 /* Check for illegal packets */ 896 if (hlen < (int)sizeof(struct ip)) 897 goto drop; 898 899 if (hlen > ntohs(h->ip_len)) 900 goto drop; 901 902 /* Clear IP_DF if the rule uses the no-df option */ 903 if (r->rule_flag & PFRULE_NODF) 904 h->ip_off &= htons(~IP_DF); 905 906 /* We will need other tests here */ 907 if (!fragoff && !mff) 908 goto no_fragment; 909 910 /* We're dealing with a fragment now. Don't allow fragments 911 * with IP_DF to enter the cache. If the flag was cleared by 912 * no-df above, fine. Otherwise drop it. 913 */ 914 if (h->ip_off & htons(IP_DF)) { 915 DPFPRINTF(("IP_DF\n")); 916 goto bad; 917 } 918 919 ip_len = ntohs(h->ip_len) - hlen; 920 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 921 922 /* All fragments are 8 byte aligned */ 923 if (mff && (ip_len & 0x7)) { 924 DPFPRINTF(("mff and %d\n", ip_len)); 925 goto bad; 926 } 927 928 /* Respect maximum length */ 929 if (fragoff + ip_len > IP_MAXPACKET) { 930 DPFPRINTF(("max packet %d\n", fragoff + ip_len)); 931 goto bad; 932 } 933 max = fragoff + ip_len; 934 935 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { 936 /* Fully buffer all of the fragments */ 937 938 frag = pf_find_fragment(h, &pf_frag_tree); 939 940 /* Check if we saw the last fragment already */ 941 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 942 max > frag->fr_max) 943 goto bad; 944 945 /* Get an entry for the fragment queue */ 946 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 947 if (frent == NULL) { 948 REASON_SET(reason, PFRES_MEMORY); 949 return (PF_DROP); 950 } 951 pf_nfrents++; 952 frent->fr_ip = h; 953 frent->fr_m = m; 954 955 /* Might return a completely reassembled mbuf, or NULL */ 956 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); 957 *m0 = m = pf_reassemble(m0, &frag, frent, mff); 958 959 if (m == NULL) 960 return (PF_DROP); 961 962 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 963 goto drop; 964 965 h = mtod(m, struct ip *); 966 } else { 967 /* non-buffering fragment cache (drops or masks overlaps) */ 968 int nomem = 0; 969 970 if (dir == PF_OUT) { 971 if (m_tag_find(m, PACKET_TAG_PF_FRAGCACHE, NULL) != 972 NULL) { 973 /* Already passed the fragment cache in the 974 * input direction. If we continued, it would 975 * appear to be a dup and would be dropped. 976 */ 977 goto fragment_pass; 978 } 979 } 980 981 frag = pf_find_fragment(h, &pf_cache_tree); 982 983 /* Check if we saw the last fragment already */ 984 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 985 max > frag->fr_max) { 986 if (r->rule_flag & PFRULE_FRAGDROP) 987 frag->fr_flags |= PFFRAG_DROP; 988 goto bad; 989 } 990 991 *m0 = m = pf_fragcache(m0, h, &frag, mff, 992 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); 993 if (m == NULL) { 994 if (nomem) 995 goto no_mem; 996 goto drop; 997 } 998 999 if (dir == PF_IN) { 1000 struct m_tag *mtag; 1001 1002 mtag = m_tag_get(PACKET_TAG_PF_FRAGCACHE, 0, M_NOWAIT); 1003 if (mtag == NULL) 1004 goto no_mem; 1005 m_tag_prepend(m, mtag); 1006 } 1007 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 1008 goto drop; 1009 goto fragment_pass; 1010 } 1011 1012 no_fragment: 1013 /* At this point, only IP_DF is allowed in ip_off */ 1014 h->ip_off &= htons(IP_DF); 1015 1016 /* Enforce a minimum ttl, may cause endless packet loops */ 1017 if (r->min_ttl && h->ip_ttl < r->min_ttl) 1018 h->ip_ttl = r->min_ttl; 1019 1020 if (r->rule_flag & PFRULE_RANDOMID) { 1021 u_int16_t ip_id = h->ip_id; 1022 1023 h->ip_id = ip_randomid(); 1024 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); 1025 } 1026 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1027 pd->flags |= PFDESC_IP_REAS; 1028 1029 return (PF_PASS); 1030 1031 fragment_pass: 1032 /* Enforce a minimum ttl, may cause endless packet loops */ 1033 if (r->min_ttl && h->ip_ttl < r->min_ttl) 1034 h->ip_ttl = r->min_ttl; 1035 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1036 pd->flags |= PFDESC_IP_REAS; 1037 return (PF_PASS); 1038 1039 no_mem: 1040 REASON_SET(reason, PFRES_MEMORY); 1041 if (r != NULL && r->log) 1042 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); 1043 return (PF_DROP); 1044 1045 drop: 1046 REASON_SET(reason, PFRES_NORM); 1047 if (r != NULL && r->log) 1048 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); 1049 return (PF_DROP); 1050 1051 bad: 1052 DPFPRINTF(("dropping bad fragment\n")); 1053 1054 /* Free associated fragments */ 1055 if (frag != NULL) 1056 pf_free_fragment(frag); 1057 1058 REASON_SET(reason, PFRES_FRAG); 1059 if (r != NULL && r->log) 1060 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); 1061 1062 return (PF_DROP); 1063 } 1064 1065 #ifdef INET6 1066 int 1067 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, 1068 u_short *reason, struct pf_pdesc *pd) 1069 { 1070 struct mbuf *m = *m0; 1071 struct pf_rule *r; 1072 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1073 int off; 1074 struct ip6_ext ext; 1075 struct ip6_opt opt; 1076 struct ip6_opt_jumbo jumbo; 1077 struct ip6_frag frag; 1078 u_int32_t jumbolen = 0, plen; 1079 u_int16_t fragoff = 0; 1080 int optend; 1081 int ooff; 1082 u_int8_t proto; 1083 int terminal; 1084 1085 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1086 while (r != NULL) { 1087 r->evaluations++; 1088 if (r->kif != NULL && 1089 (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) 1090 r = r->skip[PF_SKIP_IFP].ptr; 1091 else if (r->direction && r->direction != dir) 1092 r = r->skip[PF_SKIP_DIR].ptr; 1093 else if (r->af && r->af != AF_INET6) 1094 r = r->skip[PF_SKIP_AF].ptr; 1095 #if 0 /* header chain! */ 1096 else if (r->proto && r->proto != h->ip6_nxt) 1097 r = r->skip[PF_SKIP_PROTO].ptr; 1098 #endif 1099 else if (PF_MISMATCHAW(&r->src.addr, 1100 (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.neg)) 1101 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1102 else if (PF_MISMATCHAW(&r->dst.addr, 1103 (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.neg)) 1104 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1105 else 1106 break; 1107 } 1108 1109 if (r == NULL) 1110 return (PF_PASS); 1111 else 1112 r->packets++; 1113 1114 /* Check for illegal packets */ 1115 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) 1116 goto drop; 1117 1118 off = sizeof(struct ip6_hdr); 1119 proto = h->ip6_nxt; 1120 terminal = 0; 1121 do { 1122 switch (proto) { 1123 case IPPROTO_FRAGMENT: 1124 goto fragment; 1125 break; 1126 case IPPROTO_AH: 1127 case IPPROTO_ROUTING: 1128 case IPPROTO_DSTOPTS: 1129 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 1130 NULL, AF_INET6)) 1131 goto shortpkt; 1132 if (proto == IPPROTO_AH) 1133 off += (ext.ip6e_len + 2) * 4; 1134 else 1135 off += (ext.ip6e_len + 1) * 8; 1136 proto = ext.ip6e_nxt; 1137 break; 1138 case IPPROTO_HOPOPTS: 1139 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 1140 NULL, AF_INET6)) 1141 goto shortpkt; 1142 optend = off + (ext.ip6e_len + 1) * 8; 1143 ooff = off + sizeof(ext); 1144 do { 1145 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, 1146 sizeof(opt.ip6o_type), NULL, NULL, 1147 AF_INET6)) 1148 goto shortpkt; 1149 if (opt.ip6o_type == IP6OPT_PAD1) { 1150 ooff++; 1151 continue; 1152 } 1153 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), 1154 NULL, NULL, AF_INET6)) 1155 goto shortpkt; 1156 if (ooff + sizeof(opt) + opt.ip6o_len > optend) 1157 goto drop; 1158 switch (opt.ip6o_type) { 1159 case IP6OPT_JUMBO: 1160 if (h->ip6_plen != 0) 1161 goto drop; 1162 if (!pf_pull_hdr(m, ooff, &jumbo, 1163 sizeof(jumbo), NULL, NULL, 1164 AF_INET6)) 1165 goto shortpkt; 1166 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, 1167 sizeof(jumbolen)); 1168 jumbolen = ntohl(jumbolen); 1169 if (jumbolen <= IPV6_MAXPACKET) 1170 goto drop; 1171 if (sizeof(struct ip6_hdr) + jumbolen != 1172 m->m_pkthdr.len) 1173 goto drop; 1174 break; 1175 default: 1176 break; 1177 } 1178 ooff += sizeof(opt) + opt.ip6o_len; 1179 } while (ooff < optend); 1180 1181 off = optend; 1182 proto = ext.ip6e_nxt; 1183 break; 1184 default: 1185 terminal = 1; 1186 break; 1187 } 1188 } while (!terminal); 1189 1190 /* jumbo payload option must be present, or plen > 0 */ 1191 if (ntohs(h->ip6_plen) == 0) 1192 plen = jumbolen; 1193 else 1194 plen = ntohs(h->ip6_plen); 1195 if (plen == 0) 1196 goto drop; 1197 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) 1198 goto shortpkt; 1199 1200 /* Enforce a minimum ttl, may cause endless packet loops */ 1201 if (r->min_ttl && h->ip6_hlim < r->min_ttl) 1202 h->ip6_hlim = r->min_ttl; 1203 1204 return (PF_PASS); 1205 1206 fragment: 1207 if (ntohs(h->ip6_plen) == 0 || jumbolen) 1208 goto drop; 1209 plen = ntohs(h->ip6_plen); 1210 1211 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) 1212 goto shortpkt; 1213 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); 1214 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) 1215 goto badfrag; 1216 1217 /* do something about it */ 1218 /* remember to set pd->flags |= PFDESC_IP_REAS */ 1219 return (PF_PASS); 1220 1221 shortpkt: 1222 REASON_SET(reason, PFRES_SHORT); 1223 if (r != NULL && r->log) 1224 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); 1225 return (PF_DROP); 1226 1227 drop: 1228 REASON_SET(reason, PFRES_NORM); 1229 if (r != NULL && r->log) 1230 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); 1231 return (PF_DROP); 1232 1233 badfrag: 1234 REASON_SET(reason, PFRES_FRAG); 1235 if (r != NULL && r->log) 1236 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); 1237 return (PF_DROP); 1238 } 1239 #endif /* INET6 */ 1240 1241 int 1242 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, 1243 int ipoff, int off, void *h, struct pf_pdesc *pd) 1244 { 1245 struct pf_rule *r, *rm = NULL; 1246 struct tcphdr *th = pd->hdr.tcp; 1247 int rewrite = 0; 1248 u_short reason; 1249 u_int8_t flags; 1250 sa_family_t af = pd->af; 1251 1252 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1253 while (r != NULL) { 1254 r->evaluations++; 1255 if (r->kif != NULL && 1256 (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) 1257 r = r->skip[PF_SKIP_IFP].ptr; 1258 else if (r->direction && r->direction != dir) 1259 r = r->skip[PF_SKIP_DIR].ptr; 1260 else if (r->af && r->af != af) 1261 r = r->skip[PF_SKIP_AF].ptr; 1262 else if (r->proto && r->proto != pd->proto) 1263 r = r->skip[PF_SKIP_PROTO].ptr; 1264 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) 1265 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1266 else if (r->src.port_op && !pf_match_port(r->src.port_op, 1267 r->src.port[0], r->src.port[1], th->th_sport)) 1268 r = r->skip[PF_SKIP_SRC_PORT].ptr; 1269 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) 1270 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1271 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 1272 r->dst.port[0], r->dst.port[1], th->th_dport)) 1273 r = r->skip[PF_SKIP_DST_PORT].ptr; 1274 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( 1275 pf_osfp_fingerprint(pd, m, off, th), 1276 r->os_fingerprint)) 1277 r = TAILQ_NEXT(r, entries); 1278 else { 1279 rm = r; 1280 break; 1281 } 1282 } 1283 1284 if (rm == NULL || rm->action == PF_NOSCRUB) 1285 return (PF_PASS); 1286 else 1287 r->packets++; 1288 1289 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) 1290 pd->flags |= PFDESC_TCP_NORM; 1291 1292 flags = th->th_flags; 1293 if (flags & TH_SYN) { 1294 /* Illegal packet */ 1295 if (flags & TH_RST) 1296 goto tcp_drop; 1297 1298 if (flags & TH_FIN) 1299 flags &= ~TH_FIN; 1300 } else { 1301 /* Illegal packet */ 1302 if (!(flags & (TH_ACK|TH_RST))) 1303 goto tcp_drop; 1304 } 1305 1306 if (!(flags & TH_ACK)) { 1307 /* These flags are only valid if ACK is set */ 1308 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 1309 goto tcp_drop; 1310 } 1311 1312 /* Check for illegal header length */ 1313 if (th->th_off < (sizeof(struct tcphdr) >> 2)) 1314 goto tcp_drop; 1315 1316 /* If flags changed, or reserved data set, then adjust */ 1317 if (flags != th->th_flags || th->th_x2 != 0) { 1318 u_int16_t ov, nv; 1319 1320 ov = *(u_int16_t *)(&th->th_ack + 1); 1321 th->th_flags = flags; 1322 th->th_x2 = 0; 1323 nv = *(u_int16_t *)(&th->th_ack + 1); 1324 1325 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 1326 rewrite = 1; 1327 } 1328 1329 /* Remove urgent pointer, if TH_URG is not set */ 1330 if (!(flags & TH_URG) && th->th_urp) { 1331 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 1332 th->th_urp = 0; 1333 rewrite = 1; 1334 } 1335 1336 /* Process options */ 1337 if (r->max_mss && pf_normalize_tcpopt(r, m, th, off)) 1338 rewrite = 1; 1339 1340 /* copy back packet headers if we sanitized */ 1341 if (rewrite) 1342 m_copyback(m, off, sizeof(*th), th); 1343 1344 return (PF_PASS); 1345 1346 tcp_drop: 1347 REASON_SET(&reason, PFRES_NORM); 1348 if (rm != NULL && r->log) 1349 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL); 1350 return (PF_DROP); 1351 } 1352 1353 int 1354 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, 1355 struct tcphdr *th, struct pf_state_peer *src, 1356 struct pf_state_peer *dst) 1357 { 1358 u_int32_t tsval, tsecr; 1359 u_int8_t hdr[60]; 1360 u_int8_t *opt; 1361 1362 KASSERT(src->scrub == NULL); 1363 1364 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 1365 if (src->scrub == NULL) 1366 return (1); 1367 bzero(src->scrub, sizeof(*src->scrub)); 1368 1369 switch (pd->af) { 1370 #ifdef INET 1371 case AF_INET: { 1372 struct ip *h = mtod(m, struct ip *); 1373 src->scrub->pfss_ttl = h->ip_ttl; 1374 break; 1375 } 1376 #endif /* INET */ 1377 #ifdef INET6 1378 case AF_INET6: { 1379 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1380 src->scrub->pfss_ttl = h->ip6_hlim; 1381 break; 1382 } 1383 #endif /* INET6 */ 1384 } 1385 1386 1387 /* 1388 * All normalizations below are only begun if we see the start of 1389 * the connections. They must all set an enabled bit in pfss_flags 1390 */ 1391 if ((th->th_flags & TH_SYN) == 0) 1392 return (0); 1393 1394 1395 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 1396 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 1397 /* Diddle with TCP options */ 1398 int hlen; 1399 opt = hdr + sizeof(struct tcphdr); 1400 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1401 while (hlen >= TCPOLEN_TIMESTAMP) { 1402 switch (*opt) { 1403 case TCPOPT_EOL: /* FALLTHROUGH */ 1404 case TCPOPT_NOP: 1405 opt++; 1406 hlen--; 1407 break; 1408 case TCPOPT_TIMESTAMP: 1409 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1410 src->scrub->pfss_flags |= 1411 PFSS_TIMESTAMP; 1412 src->scrub->pfss_ts_mod = 1413 htonl(arc4random()); 1414 1415 /* note PFSS_PAWS not set yet */ 1416 memcpy(&tsval, &opt[2], 1417 sizeof(u_int32_t)); 1418 memcpy(&tsecr, &opt[6], 1419 sizeof(u_int32_t)); 1420 src->scrub->pfss_tsval0 = ntohl(tsval); 1421 src->scrub->pfss_tsval = ntohl(tsval); 1422 src->scrub->pfss_tsecr = ntohl(tsecr); 1423 getmicrouptime(&src->scrub->pfss_last); 1424 } 1425 /* FALLTHROUGH */ 1426 default: 1427 hlen -= MAX(opt[1], 2); 1428 opt += MAX(opt[1], 2); 1429 break; 1430 } 1431 } 1432 } 1433 1434 return (0); 1435 } 1436 1437 void 1438 pf_normalize_tcp_cleanup(struct pf_state *state) 1439 { 1440 if (state->src.scrub) 1441 pool_put(&pf_state_scrub_pl, state->src.scrub); 1442 if (state->dst.scrub) 1443 pool_put(&pf_state_scrub_pl, state->dst.scrub); 1444 1445 /* Someday... flush the TCP segment reassembly descriptors. */ 1446 } 1447 1448 int 1449 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, 1450 u_short *reason, struct tcphdr *th, struct pf_state *state, 1451 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 1452 { 1453 struct timeval uptime; 1454 u_int32_t tsval, tsecr; 1455 u_int tsval_from_last; 1456 u_int8_t hdr[60]; 1457 u_int8_t *opt; 1458 int copyback = 0; 1459 int got_ts = 0; 1460 1461 KASSERT(src->scrub || dst->scrub); 1462 1463 /* 1464 * Enforce the minimum TTL seen for this connection. Negate a common 1465 * technique to evade an intrusion detection system and confuse 1466 * firewall state code. 1467 */ 1468 switch (pd->af) { 1469 #ifdef INET 1470 case AF_INET: { 1471 if (src->scrub) { 1472 struct ip *h = mtod(m, struct ip *); 1473 if (h->ip_ttl > src->scrub->pfss_ttl) 1474 src->scrub->pfss_ttl = h->ip_ttl; 1475 h->ip_ttl = src->scrub->pfss_ttl; 1476 } 1477 break; 1478 } 1479 #endif /* INET */ 1480 #ifdef INET6 1481 case AF_INET6: { 1482 if (src->scrub) { 1483 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1484 if (h->ip6_hlim > src->scrub->pfss_ttl) 1485 src->scrub->pfss_ttl = h->ip6_hlim; 1486 h->ip6_hlim = src->scrub->pfss_ttl; 1487 } 1488 break; 1489 } 1490 #endif /* INET6 */ 1491 } 1492 1493 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1494 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1495 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1496 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 1497 /* Diddle with TCP options */ 1498 int hlen; 1499 opt = hdr + sizeof(struct tcphdr); 1500 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1501 while (hlen >= TCPOLEN_TIMESTAMP) { 1502 switch (*opt) { 1503 case TCPOPT_EOL: /* FALLTHROUGH */ 1504 case TCPOPT_NOP: 1505 opt++; 1506 hlen--; 1507 break; 1508 case TCPOPT_TIMESTAMP: 1509 /* Modulate the timestamps. Can be used for 1510 * NAT detection, OS uptime determination or 1511 * reboot detection. 1512 */ 1513 1514 if (got_ts) { 1515 /* Huh? Multiple timestamps!? */ 1516 if (pf_status.debug >= PF_DEBUG_MISC) { 1517 DPFPRINTF(("multiple TS??")); 1518 pf_print_state(state); 1519 printf("\n"); 1520 } 1521 REASON_SET(reason, PFRES_TS); 1522 return (PF_DROP); 1523 } 1524 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1525 memcpy(&tsval, &opt[2], 1526 sizeof(u_int32_t)); 1527 if (tsval && src->scrub && 1528 (src->scrub->pfss_flags & 1529 PFSS_TIMESTAMP)) { 1530 tsval = ntohl(tsval); 1531 pf_change_a(&opt[2], 1532 &th->th_sum, 1533 htonl(tsval + 1534 src->scrub->pfss_ts_mod), 1535 0); 1536 copyback = 1; 1537 } 1538 1539 /* Modulate TS reply iff valid (!0) */ 1540 memcpy(&tsecr, &opt[6], 1541 sizeof(u_int32_t)); 1542 if (tsecr && dst->scrub && 1543 (dst->scrub->pfss_flags & 1544 PFSS_TIMESTAMP)) { 1545 tsecr = ntohl(tsecr) 1546 - dst->scrub->pfss_ts_mod; 1547 pf_change_a(&opt[6], 1548 &th->th_sum, htonl(tsecr), 1549 0); 1550 copyback = 1; 1551 } 1552 got_ts = 1; 1553 } 1554 /* FALLTHROUGH */ 1555 default: 1556 hlen -= MAX(opt[1], 2); 1557 opt += MAX(opt[1], 2); 1558 break; 1559 } 1560 } 1561 if (copyback) { 1562 /* Copyback the options, caller copys back header */ 1563 *writeback = 1; 1564 m_copyback(m, off + sizeof(struct tcphdr), 1565 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1566 sizeof(struct tcphdr)); 1567 } 1568 } 1569 1570 1571 /* 1572 * Must invalidate PAWS checks on connections idle for too long. 1573 * The fastest allowed timestamp clock is 1ms. That turns out to 1574 * be about 24 days before it wraps. XXX Right now our lowerbound 1575 * TS echo check only works for the first 12 days of a connection 1576 * when the TS has exhausted half its 32bit space 1577 */ 1578 #define TS_MAX_IDLE (24*24*60*60) 1579 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1580 1581 getmicrouptime(&uptime); 1582 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1583 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1584 time_second - state->creation > TS_MAX_CONN)) { 1585 if (pf_status.debug >= PF_DEBUG_MISC) { 1586 DPFPRINTF(("src idled out of PAWS\n")); 1587 pf_print_state(state); 1588 printf("\n"); 1589 } 1590 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1591 | PFSS_PAWS_IDLED; 1592 } 1593 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1594 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1595 if (pf_status.debug >= PF_DEBUG_MISC) { 1596 DPFPRINTF(("dst idled out of PAWS\n")); 1597 pf_print_state(state); 1598 printf("\n"); 1599 } 1600 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1601 | PFSS_PAWS_IDLED; 1602 } 1603 1604 if (got_ts && src->scrub && dst->scrub && 1605 (src->scrub->pfss_flags & PFSS_PAWS) && 1606 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1607 /* Validate that the timestamps are "in-window". 1608 * RFC1323 describes TCP Timestamp options that allow 1609 * measurement of RTT (round trip time) and PAWS 1610 * (protection against wrapped sequence numbers). PAWS 1611 * gives us a set of rules for rejecting packets on 1612 * long fat pipes (packets that were somehow delayed 1613 * in transit longer than the time it took to send the 1614 * full TCP sequence space of 4Gb). We can use these 1615 * rules and infer a few others that will let us treat 1616 * the 32bit timestamp and the 32bit echoed timestamp 1617 * as sequence numbers to prevent a blind attacker from 1618 * inserting packets into a connection. 1619 * 1620 * RFC1323 tells us: 1621 * - The timestamp on this packet must be greater than 1622 * or equal to the last value echoed by the other 1623 * endpoint. The RFC says those will be discarded 1624 * since it is a dup that has already been acked. 1625 * This gives us a lowerbound on the timestamp. 1626 * timestamp >= other last echoed timestamp 1627 * - The timestamp will be less than or equal to 1628 * the last timestamp plus the time between the 1629 * last packet and now. The RFC defines the max 1630 * clock rate as 1ms. We will allow clocks to be 1631 * up to 10% fast and will allow a total difference 1632 * or 30 seconds due to a route change. And this 1633 * gives us an upperbound on the timestamp. 1634 * timestamp <= last timestamp + max ticks 1635 * We have to be careful here. Windows will send an 1636 * initial timestamp of zero and then initialize it 1637 * to a random value after the 3whs; presumably to 1638 * avoid a DoS by having to call an expensive RNG 1639 * during a SYN flood. Proof MS has at least one 1640 * good security geek. 1641 * 1642 * - The TCP timestamp option must also echo the other 1643 * endpoints timestamp. The timestamp echoed is the 1644 * one carried on the earliest unacknowledged segment 1645 * on the left edge of the sequence window. The RFC 1646 * states that the host will reject any echoed 1647 * timestamps that were larger than any ever sent. 1648 * This gives us an upperbound on the TS echo. 1649 * tescr <= largest_tsval 1650 * - The lowerbound on the TS echo is a little more 1651 * tricky to determine. The other endpoint's echoed 1652 * values will not decrease. But there may be 1653 * network conditions that re-order packets and 1654 * cause our view of them to decrease. For now the 1655 * only lowerbound we can safely determine is that 1656 * the TS echo will never be less than the orginal 1657 * TS. XXX There is probably a better lowerbound. 1658 * Remove TS_MAX_CONN with better lowerbound check. 1659 * tescr >= other original TS 1660 * 1661 * It is also important to note that the fastest 1662 * timestamp clock of 1ms will wrap its 32bit space in 1663 * 24 days. So we just disable TS checking after 24 1664 * days of idle time. We actually must use a 12d 1665 * connection limit until we can come up with a better 1666 * lowerbound to the TS echo check. 1667 */ 1668 struct timeval delta_ts; 1669 int ts_fudge; 1670 1671 1672 /* 1673 * PFTM_TS_DIFF is how many seconds of leeway to allow 1674 * a host's timestamp. This can happen if the previous 1675 * packet got delayed in transit for much longer than 1676 * this packet. 1677 */ 1678 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1679 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1680 1681 1682 /* Calculate max ticks since the last timestamp */ 1683 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1 kHz + 10% skew */ 1684 #define TS_MICROSECS 1000000 /* microseconds per second */ 1685 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1686 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1687 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1688 1689 1690 if ((src->state >= TCPS_ESTABLISHED && 1691 dst->state >= TCPS_ESTABLISHED) && 1692 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1693 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1694 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1695 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1696 /* Bad RFC1323 implementation or an insertion attack. 1697 * 1698 * - Solaris 2.6 and 2.7 are known to send another ACK 1699 * after the FIN,FIN|ACK,ACK closing that carries 1700 * an old timestamp. 1701 */ 1702 1703 DPFPRINTF(("Timestamp failed %c%c%c%c\n", 1704 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1705 SEQ_GT(tsval, src->scrub->pfss_tsval + 1706 tsval_from_last) ? '1' : ' ', 1707 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1708 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); 1709 DPFPRINTF((" tsval: %" PRIu32 " tsecr: %" PRIu32 1710 " +ticks: %" PRIu32 " idle: %lus %lums\n", 1711 tsval, tsecr, tsval_from_last, delta_ts.tv_sec, 1712 delta_ts.tv_usec / 1000)); 1713 DPFPRINTF((" src->tsval: %" PRIu32 " tsecr: %" PRIu32 1714 "\n", 1715 src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); 1716 DPFPRINTF((" dst->tsval: %" PRIu32 " tsecr: %" PRIu32 1717 " tsval0: %" PRIu32 "\n", 1718 dst->scrub->pfss_tsval, 1719 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); 1720 if (pf_status.debug >= PF_DEBUG_MISC) { 1721 pf_print_state(state); 1722 pf_print_flags(th->th_flags); 1723 printf("\n"); 1724 } 1725 REASON_SET(reason, PFRES_TS); 1726 return (PF_DROP); 1727 } 1728 1729 /* XXX I'd really like to require tsecr but it's optional */ 1730 1731 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1732 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1733 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1734 src->scrub && dst->scrub && 1735 (src->scrub->pfss_flags & PFSS_PAWS) && 1736 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1737 /* Didn't send a timestamp. Timestamps aren't really useful 1738 * when: 1739 * - connection opening or closing (often not even sent). 1740 * but we must not let an attacker to put a FIN on a 1741 * data packet to sneak it through our ESTABLISHED check. 1742 * - on a TCP reset. RFC suggests not even looking at TS. 1743 * - on an empty ACK. The TS will not be echoed so it will 1744 * probably not help keep the RTT calculation in sync and 1745 * there isn't as much danger when the sequence numbers 1746 * got wrapped. So some stacks don't include TS on empty 1747 * ACKs :-( 1748 * 1749 * To minimize the disruption to mostly RFC1323 conformant 1750 * stacks, we will only require timestamps on data packets. 1751 * 1752 * And what do ya know, we cannot require timestamps on data 1753 * packets. There appear to be devices that do legitimate 1754 * TCP connection hijacking. There are HTTP devices that allow 1755 * a 3whs (with timestamps) and then buffer the HTTP request. 1756 * If the intermediate device has the HTTP response cache, it 1757 * will spoof the response but not bother timestamping its 1758 * packets. So we can look for the presence of a timestamp in 1759 * the first data packet and if there, require it in all future 1760 * packets. 1761 */ 1762 1763 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1764 /* 1765 * Hey! Someone tried to sneak a packet in. Or the 1766 * stack changed its RFC1323 behavior?!?! 1767 */ 1768 if (pf_status.debug >= PF_DEBUG_MISC) { 1769 DPFPRINTF(("Did not receive expected RFC1323 " 1770 "timestamp\n")); 1771 pf_print_state(state); 1772 pf_print_flags(th->th_flags); 1773 printf("\n"); 1774 } 1775 REASON_SET(reason, PFRES_TS); 1776 return (PF_DROP); 1777 } 1778 } 1779 1780 1781 /* 1782 * We will note if a host sends his data packets with or without 1783 * timestamps. And require all data packets to contain a timestamp 1784 * if the first does. PAWS implicitly requires that all data packets be 1785 * timestamped. But I think there are middle-man devices that hijack 1786 * TCP streams immedietly after the 3whs and don't timestamp their 1787 * packets (seen in a WWW accelerator or cache). 1788 */ 1789 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1790 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1791 if (got_ts) 1792 src->scrub->pfss_flags |= PFSS_DATA_TS; 1793 else { 1794 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1795 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && 1796 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1797 /* Don't warn if other host rejected RFC1323 */ 1798 DPFPRINTF(("Broken RFC1323 stack did not " 1799 "timestamp data packet. Disabled PAWS " 1800 "security.\n")); 1801 pf_print_state(state); 1802 pf_print_flags(th->th_flags); 1803 printf("\n"); 1804 } 1805 } 1806 } 1807 1808 1809 /* 1810 * Update PAWS values 1811 */ 1812 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1813 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1814 getmicrouptime(&src->scrub->pfss_last); 1815 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1816 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1817 src->scrub->pfss_tsval = tsval; 1818 1819 if (tsecr) { 1820 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1821 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1822 src->scrub->pfss_tsecr = tsecr; 1823 1824 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1825 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1826 src->scrub->pfss_tsval0 == 0)) { 1827 /* tsval0 MUST be the lowest timestamp */ 1828 src->scrub->pfss_tsval0 = tsval; 1829 } 1830 1831 /* Only fully initialized after a TS gets echoed */ 1832 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1833 src->scrub->pfss_flags |= PFSS_PAWS; 1834 } 1835 } 1836 1837 /* I have a dream.... TCP segment reassembly.... */ 1838 return (0); 1839 } 1840 1841 int 1842 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, 1843 int off) 1844 { 1845 u_int16_t *mss; 1846 int thoff; 1847 int opt, cnt, optlen = 0; 1848 int rewrite = 0; 1849 u_char *optp; 1850 1851 thoff = th->th_off << 2; 1852 cnt = thoff - sizeof(struct tcphdr); 1853 optp = mtod(m, u_char *) + off + sizeof(struct tcphdr); 1854 1855 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1856 opt = optp[0]; 1857 if (opt == TCPOPT_EOL) 1858 break; 1859 if (opt == TCPOPT_NOP) 1860 optlen = 1; 1861 else { 1862 if (cnt < 2) 1863 break; 1864 optlen = optp[1]; 1865 if (optlen < 2 || optlen > cnt) 1866 break; 1867 } 1868 switch (opt) { 1869 case TCPOPT_MAXSEG: 1870 mss = (u_int16_t *)(optp + 2); 1871 if ((ntohs(*mss)) > r->max_mss) { 1872 th->th_sum = pf_cksum_fixup(th->th_sum, 1873 *mss, htons(r->max_mss), 0); 1874 *mss = htons(r->max_mss); 1875 rewrite = 1; 1876 } 1877 break; 1878 default: 1879 break; 1880 } 1881 } 1882 1883 return (rewrite); 1884 } 1885