1 /* $OpenBSD: pf_norm.c,v 1.120 2009/09/01 15:51:06 jsing Exp $ */ 2 3 /* 4 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 5 * Copyright 2009 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "pflog.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/mbuf.h> 34 #include <sys/filio.h> 35 #include <sys/fcntl.h> 36 #include <sys/socket.h> 37 #include <sys/kernel.h> 38 #include <sys/time.h> 39 #include <sys/pool.h> 40 41 #include <dev/rndvar.h> 42 #include <net/if.h> 43 #include <net/if_types.h> 44 #include <net/bpf.h> 45 #include <net/route.h> 46 #include <net/if_pflog.h> 47 48 #include <netinet/in.h> 49 #include <netinet/in_var.h> 50 #include <netinet/in_systm.h> 51 #include <netinet/ip.h> 52 #include <netinet/ip_var.h> 53 #include <netinet/tcp.h> 54 #include <netinet/tcp_seq.h> 55 #include <netinet/udp.h> 56 #include <netinet/ip_icmp.h> 57 58 #ifdef INET6 59 #include <netinet/ip6.h> 60 #endif /* INET6 */ 61 62 #include <net/pfvar.h> 63 64 struct pf_frent { 65 LIST_ENTRY(pf_frent) fr_next; 66 struct ip *fr_ip; 67 struct mbuf *fr_m; 68 }; 69 70 struct pf_frcache { 71 LIST_ENTRY(pf_frcache) fr_next; 72 uint16_t fr_off; 73 uint16_t fr_end; 74 }; 75 76 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ 77 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ 78 #define PFFRAG_DROP 0x0004 /* Drop all fragments */ 79 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) 80 81 struct pf_fragment { 82 RB_ENTRY(pf_fragment) fr_entry; 83 TAILQ_ENTRY(pf_fragment) frag_next; 84 struct in_addr fr_src; 85 struct in_addr fr_dst; 86 u_int8_t fr_p; /* protocol of this fragment */ 87 u_int8_t fr_flags; /* status flags */ 88 u_int16_t fr_id; /* fragment id for reassemble */ 89 u_int16_t fr_max; /* fragment data max */ 90 u_int32_t fr_timeout; 91 #define fr_queue fr_u.fru_queue 92 #define fr_cache fr_u.fru_cache 93 union { 94 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ 95 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ 96 } fr_u; 97 }; 98 99 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 100 TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; 101 102 static __inline int pf_frag_compare(struct pf_fragment *, 103 struct pf_fragment *); 104 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 105 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 106 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 107 108 /* Private prototypes */ 109 void pf_ip2key(struct pf_fragment *, struct ip *); 110 void pf_remove_fragment(struct pf_fragment *); 111 void pf_flush_fragments(void); 112 void pf_free_fragment(struct pf_fragment *); 113 struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); 114 struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, 115 struct pf_frent *, int); 116 117 #define DPFPRINTF(x) do { \ 118 if (pf_status.debug >= PF_DEBUG_MISC) { \ 119 printf("%s: ", __func__); \ 120 printf x ; \ 121 } \ 122 } while(0) 123 124 /* Globals */ 125 struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; 126 struct pool pf_state_scrub_pl; 127 int pf_nfrents, pf_ncache; 128 129 void 130 pf_normalize_init(void) 131 { 132 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 133 NULL); 134 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 135 NULL); 136 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, 137 "pffrcache", NULL); 138 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", 139 NULL); 140 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 141 "pfstscr", NULL); 142 143 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 144 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 145 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); 146 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); 147 148 TAILQ_INIT(&pf_fragqueue); 149 TAILQ_INIT(&pf_cachequeue); 150 } 151 152 static __inline int 153 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 154 { 155 int diff; 156 157 if ((diff = a->fr_id - b->fr_id)) 158 return (diff); 159 else if ((diff = a->fr_p - b->fr_p)) 160 return (diff); 161 else if (a->fr_src.s_addr < b->fr_src.s_addr) 162 return (-1); 163 else if (a->fr_src.s_addr > b->fr_src.s_addr) 164 return (1); 165 else if (a->fr_dst.s_addr < b->fr_dst.s_addr) 166 return (-1); 167 else if (a->fr_dst.s_addr > b->fr_dst.s_addr) 168 return (1); 169 return (0); 170 } 171 172 void 173 pf_purge_expired_fragments(void) 174 { 175 struct pf_fragment *frag; 176 u_int32_t expire = time_second - 177 pf_default_rule.timeout[PFTM_FRAG]; 178 179 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 180 KASSERT(BUFFER_FRAGMENTS(frag)); 181 if (frag->fr_timeout > expire) 182 break; 183 184 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 185 pf_free_fragment(frag); 186 } 187 188 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { 189 KASSERT(!BUFFER_FRAGMENTS(frag)); 190 if (frag->fr_timeout > expire) 191 break; 192 193 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 194 pf_free_fragment(frag); 195 KASSERT(TAILQ_EMPTY(&pf_cachequeue) || 196 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); 197 } 198 } 199 200 /* 201 * Try to flush old fragments to make space for new ones 202 */ 203 204 void 205 pf_flush_fragments(void) 206 { 207 struct pf_fragment *frag; 208 int goal; 209 210 goal = pf_nfrents * 9 / 10; 211 DPFPRINTF(("trying to free > %d frents\n", 212 pf_nfrents - goal)); 213 while (goal < pf_nfrents) { 214 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 215 if (frag == NULL) 216 break; 217 pf_free_fragment(frag); 218 } 219 220 221 goal = pf_ncache * 9 / 10; 222 DPFPRINTF(("trying to free > %d cache entries\n", 223 pf_ncache - goal)); 224 while (goal < pf_ncache) { 225 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); 226 if (frag == NULL) 227 break; 228 pf_free_fragment(frag); 229 } 230 } 231 232 /* Frees the fragments and all associated entries */ 233 234 void 235 pf_free_fragment(struct pf_fragment *frag) 236 { 237 struct pf_frent *frent; 238 struct pf_frcache *frcache; 239 240 /* Free all fragments */ 241 if (BUFFER_FRAGMENTS(frag)) { 242 for (frent = LIST_FIRST(&frag->fr_queue); frent; 243 frent = LIST_FIRST(&frag->fr_queue)) { 244 LIST_REMOVE(frent, fr_next); 245 246 m_freem(frent->fr_m); 247 pool_put(&pf_frent_pl, frent); 248 pf_nfrents--; 249 } 250 } else { 251 for (frcache = LIST_FIRST(&frag->fr_cache); frcache; 252 frcache = LIST_FIRST(&frag->fr_cache)) { 253 LIST_REMOVE(frcache, fr_next); 254 255 KASSERT(LIST_EMPTY(&frag->fr_cache) || 256 LIST_FIRST(&frag->fr_cache)->fr_off > 257 frcache->fr_end); 258 259 pool_put(&pf_cent_pl, frcache); 260 pf_ncache--; 261 } 262 } 263 264 pf_remove_fragment(frag); 265 } 266 267 void 268 pf_ip2key(struct pf_fragment *key, struct ip *ip) 269 { 270 key->fr_p = ip->ip_p; 271 key->fr_id = ip->ip_id; 272 key->fr_src.s_addr = ip->ip_src.s_addr; 273 key->fr_dst.s_addr = ip->ip_dst.s_addr; 274 } 275 276 struct pf_fragment * 277 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) 278 { 279 struct pf_fragment key; 280 struct pf_fragment *frag; 281 282 pf_ip2key(&key, ip); 283 284 frag = RB_FIND(pf_frag_tree, tree, &key); 285 if (frag != NULL) { 286 /* XXX Are we sure we want to update the timeout? */ 287 frag->fr_timeout = time_second; 288 if (BUFFER_FRAGMENTS(frag)) { 289 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 290 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 291 } else { 292 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 293 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); 294 } 295 } 296 297 return (frag); 298 } 299 300 /* Removes a fragment from the fragment queue and frees the fragment */ 301 302 void 303 pf_remove_fragment(struct pf_fragment *frag) 304 { 305 if (BUFFER_FRAGMENTS(frag)) { 306 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 307 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 308 pool_put(&pf_frag_pl, frag); 309 } else { 310 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); 311 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 312 pool_put(&pf_cache_pl, frag); 313 } 314 } 315 316 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) 317 struct mbuf * 318 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, 319 struct pf_frent *frent, int mff) 320 { 321 struct mbuf *m = *m0, *m2; 322 struct pf_frent *frea, *next; 323 struct pf_frent *frep = NULL; 324 struct ip *ip = frent->fr_ip; 325 int hlen = ip->ip_hl << 2; 326 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 327 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; 328 u_int16_t max = ip_len + off; 329 330 KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag)); 331 332 /* Strip off ip header */ 333 m->m_data += hlen; 334 m->m_len -= hlen; 335 336 /* Create a new reassembly queue for this packet */ 337 if (*frag == NULL) { 338 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 339 if (*frag == NULL) { 340 pf_flush_fragments(); 341 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 342 if (*frag == NULL) 343 goto drop_fragment; 344 } 345 346 (*frag)->fr_flags = 0; 347 (*frag)->fr_max = 0; 348 (*frag)->fr_src = frent->fr_ip->ip_src; 349 (*frag)->fr_dst = frent->fr_ip->ip_dst; 350 (*frag)->fr_p = frent->fr_ip->ip_p; 351 (*frag)->fr_id = frent->fr_ip->ip_id; 352 (*frag)->fr_timeout = time_second; 353 LIST_INIT(&(*frag)->fr_queue); 354 355 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); 356 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); 357 358 /* We do not have a previous fragment */ 359 frep = NULL; 360 goto insert; 361 } 362 363 /* 364 * Find a fragment after the current one: 365 * - off contains the real shifted offset. 366 */ 367 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 368 if (FR_IP_OFF(frea) > off) 369 break; 370 frep = frea; 371 } 372 373 KASSERT(frep != NULL || frea != NULL); 374 375 if (frep != NULL && 376 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 377 4 > off) 378 { 379 u_int16_t precut; 380 381 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - 382 frep->fr_ip->ip_hl * 4 - off; 383 if (precut >= ip_len) 384 goto drop_fragment; 385 m_adj(frent->fr_m, precut); 386 DPFPRINTF(("overlap -%d\n", precut)); 387 /* Enforce 8 byte boundaries */ 388 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); 389 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 390 ip_len -= precut; 391 ip->ip_len = htons(ip_len); 392 } 393 394 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); 395 frea = next) 396 { 397 u_int16_t aftercut; 398 399 aftercut = ip_len + off - FR_IP_OFF(frea); 400 DPFPRINTF(("adjust overlap %d\n", aftercut)); 401 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl 402 * 4) 403 { 404 frea->fr_ip->ip_len = 405 htons(ntohs(frea->fr_ip->ip_len) - aftercut); 406 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + 407 (aftercut >> 3)); 408 m_adj(frea->fr_m, aftercut); 409 break; 410 } 411 412 /* This fragment is completely overlapped, lose it */ 413 next = LIST_NEXT(frea, fr_next); 414 m_freem(frea->fr_m); 415 LIST_REMOVE(frea, fr_next); 416 pool_put(&pf_frent_pl, frea); 417 pf_nfrents--; 418 } 419 420 insert: 421 /* Update maximum data size */ 422 if ((*frag)->fr_max < max) 423 (*frag)->fr_max = max; 424 /* This is the last segment */ 425 if (!mff) 426 (*frag)->fr_flags |= PFFRAG_SEENLAST; 427 428 if (frep == NULL) 429 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 430 else 431 LIST_INSERT_AFTER(frep, frent, fr_next); 432 433 /* Check if we are completely reassembled */ 434 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 435 return (NULL); 436 437 /* Check if we have all the data */ 438 off = 0; 439 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 440 next = LIST_NEXT(frep, fr_next); 441 442 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; 443 if (off < (*frag)->fr_max && 444 (next == NULL || FR_IP_OFF(next) != off)) 445 { 446 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 447 off, next == NULL ? -1 : FR_IP_OFF(next), 448 (*frag)->fr_max)); 449 return (NULL); 450 } 451 } 452 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 453 if (off < (*frag)->fr_max) 454 return (NULL); 455 456 /* We have all the data */ 457 frent = LIST_FIRST(&(*frag)->fr_queue); 458 KASSERT(frent != NULL); 459 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { 460 DPFPRINTF(("drop: too big: %d\n", off)); 461 pf_free_fragment(*frag); 462 *frag = NULL; 463 return (NULL); 464 } 465 next = LIST_NEXT(frent, fr_next); 466 467 /* Magic from ip_input */ 468 ip = frent->fr_ip; 469 m = frent->fr_m; 470 m2 = m->m_next; 471 m->m_next = NULL; 472 m_cat(m, m2); 473 pool_put(&pf_frent_pl, frent); 474 pf_nfrents--; 475 for (frent = next; frent != NULL; frent = next) { 476 next = LIST_NEXT(frent, fr_next); 477 478 m2 = frent->fr_m; 479 pool_put(&pf_frent_pl, frent); 480 pf_nfrents--; 481 m_cat(m, m2); 482 } 483 484 ip->ip_src = (*frag)->fr_src; 485 ip->ip_dst = (*frag)->fr_dst; 486 487 /* Remove from fragment queue */ 488 pf_remove_fragment(*frag); 489 *frag = NULL; 490 491 hlen = ip->ip_hl << 2; 492 ip->ip_len = htons(off + hlen); 493 m->m_len += hlen; 494 m->m_data -= hlen; 495 496 /* some debugging cruft by sklower, below, will go away soon */ 497 /* XXX this should be done elsewhere */ 498 if (m->m_flags & M_PKTHDR) { 499 int plen = 0; 500 for (m2 = m; m2; m2 = m2->m_next) 501 plen += m2->m_len; 502 m->m_pkthdr.len = plen; 503 } 504 505 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); 506 return (m); 507 508 drop_fragment: 509 /* Oops - fail safe - drop packet */ 510 pool_put(&pf_frent_pl, frent); 511 pf_nfrents--; 512 m_freem(m); 513 return (NULL); 514 } 515 516 int 517 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, 518 struct pf_pdesc *pd) 519 { 520 struct mbuf *m = *m0; 521 struct pf_frent *frent; 522 struct pf_fragment *frag = NULL; 523 struct ip *h = mtod(m, struct ip *); 524 int mff = (ntohs(h->ip_off) & IP_MF); 525 int hlen = h->ip_hl << 2; 526 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 527 u_int16_t max; 528 int ip_len; 529 int ip_off; 530 531 /* Check for illegal packets */ 532 if (hlen < (int)sizeof(struct ip)) 533 goto drop; 534 535 if (hlen > ntohs(h->ip_len)) 536 goto drop; 537 538 /* Clear IP_DF if we're in no-df mode */ 539 if (pf_status.reass & PF_REASS_NODF && h->ip_off & htons(IP_DF)) { 540 u_int16_t ip_off = h->ip_off; 541 542 h->ip_off &= htons(~IP_DF); 543 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 544 } 545 546 /* We will need other tests here */ 547 if (!fragoff && !mff) 548 goto no_fragment; 549 550 /* We're dealing with a fragment now. Don't allow fragments 551 * with IP_DF to enter the cache. If the flag was cleared by 552 * no-df above, fine. Otherwise drop it. 553 */ 554 if (h->ip_off & htons(IP_DF)) { 555 DPFPRINTF(("IP_DF\n")); 556 goto bad; 557 } 558 559 ip_len = ntohs(h->ip_len) - hlen; 560 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 561 562 /* All fragments are 8 byte aligned */ 563 if (mff && (ip_len & 0x7)) { 564 DPFPRINTF(("mff and %d\n", ip_len)); 565 goto bad; 566 } 567 568 /* Respect maximum length */ 569 if (fragoff + ip_len > IP_MAXPACKET) { 570 DPFPRINTF(("max packet %d\n", fragoff + ip_len)); 571 goto bad; 572 } 573 max = fragoff + ip_len; 574 575 /* Fully buffer all of the fragments */ 576 frag = pf_find_fragment(h, &pf_frag_tree); 577 578 /* Check if we saw the last fragment already */ 579 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 580 max > frag->fr_max) 581 goto bad; 582 583 /* Get an entry for the fragment queue */ 584 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 585 if (frent == NULL) { 586 REASON_SET(reason, PFRES_MEMORY); 587 return (PF_DROP); 588 } 589 pf_nfrents++; 590 frent->fr_ip = h; 591 frent->fr_m = m; 592 593 /* Might return a completely reassembled mbuf, or NULL */ 594 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); 595 *m0 = m = pf_reassemble(m0, &frag, frent, mff); 596 597 if (m == NULL) 598 return (PF_DROP); 599 600 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 601 goto drop; 602 603 h = mtod(m, struct ip *); 604 605 no_fragment: 606 /* At this point, only IP_DF is allowed in ip_off */ 607 if (h->ip_off & ~htons(IP_DF)) { 608 u_int16_t ip_off = h->ip_off; 609 610 h->ip_off &= htons(IP_DF); 611 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 612 } 613 614 pd->flags |= PFDESC_IP_REAS; 615 return (PF_PASS); 616 617 drop: 618 REASON_SET(reason, PFRES_NORM); 619 return (PF_DROP); 620 621 bad: 622 DPFPRINTF(("dropping bad fragment\n")); 623 624 /* Free associated fragments */ 625 if (frag != NULL) 626 pf_free_fragment(frag); 627 628 REASON_SET(reason, PFRES_FRAG); 629 630 return (PF_DROP); 631 } 632 633 #ifdef INET6 634 int 635 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, 636 u_short *reason, struct pf_pdesc *pd) 637 { 638 struct mbuf *m = *m0; 639 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 640 int off; 641 struct ip6_ext ext; 642 struct ip6_opt opt; 643 struct ip6_opt_jumbo jumbo; 644 struct ip6_frag frag; 645 u_int32_t jumbolen = 0, plen; 646 u_int16_t fragoff = 0; 647 int optend; 648 int ooff; 649 u_int8_t proto; 650 int terminal; 651 652 /* Check for illegal packets */ 653 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) 654 goto drop; 655 656 off = sizeof(struct ip6_hdr); 657 proto = h->ip6_nxt; 658 terminal = 0; 659 do { 660 switch (proto) { 661 case IPPROTO_FRAGMENT: 662 goto fragment; 663 break; 664 case IPPROTO_AH: 665 case IPPROTO_ROUTING: 666 case IPPROTO_DSTOPTS: 667 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 668 NULL, AF_INET6)) 669 goto shortpkt; 670 if (proto == IPPROTO_AH) 671 off += (ext.ip6e_len + 2) * 4; 672 else 673 off += (ext.ip6e_len + 1) * 8; 674 proto = ext.ip6e_nxt; 675 break; 676 case IPPROTO_HOPOPTS: 677 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 678 NULL, AF_INET6)) 679 goto shortpkt; 680 optend = off + (ext.ip6e_len + 1) * 8; 681 ooff = off + sizeof(ext); 682 do { 683 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, 684 sizeof(opt.ip6o_type), NULL, NULL, 685 AF_INET6)) 686 goto shortpkt; 687 if (opt.ip6o_type == IP6OPT_PAD1) { 688 ooff++; 689 continue; 690 } 691 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), 692 NULL, NULL, AF_INET6)) 693 goto shortpkt; 694 if (ooff + sizeof(opt) + opt.ip6o_len > optend) 695 goto drop; 696 switch (opt.ip6o_type) { 697 case IP6OPT_JUMBO: 698 if (h->ip6_plen != 0) 699 goto drop; 700 if (!pf_pull_hdr(m, ooff, &jumbo, 701 sizeof(jumbo), NULL, NULL, 702 AF_INET6)) 703 goto shortpkt; 704 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, 705 sizeof(jumbolen)); 706 jumbolen = ntohl(jumbolen); 707 if (jumbolen <= IPV6_MAXPACKET) 708 goto drop; 709 if (sizeof(struct ip6_hdr) + jumbolen != 710 m->m_pkthdr.len) 711 goto drop; 712 break; 713 default: 714 break; 715 } 716 ooff += sizeof(opt) + opt.ip6o_len; 717 } while (ooff < optend); 718 719 off = optend; 720 proto = ext.ip6e_nxt; 721 break; 722 default: 723 terminal = 1; 724 break; 725 } 726 } while (!terminal); 727 728 /* jumbo payload option must be present, or plen > 0 */ 729 if (ntohs(h->ip6_plen) == 0) 730 plen = jumbolen; 731 else 732 plen = ntohs(h->ip6_plen); 733 if (plen == 0) 734 goto drop; 735 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) 736 goto shortpkt; 737 738 return (PF_PASS); 739 740 fragment: 741 if (ntohs(h->ip6_plen) == 0 || jumbolen) 742 goto drop; 743 plen = ntohs(h->ip6_plen); 744 745 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) 746 goto shortpkt; 747 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); 748 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) 749 goto badfrag; 750 751 /* do something about it */ 752 /* remember to set pd->flags |= PFDESC_IP_REAS */ 753 return (PF_PASS); 754 755 shortpkt: 756 REASON_SET(reason, PFRES_SHORT); 757 return (PF_DROP); 758 759 drop: 760 REASON_SET(reason, PFRES_NORM); 761 return (PF_DROP); 762 763 badfrag: 764 REASON_SET(reason, PFRES_FRAG); 765 return (PF_DROP); 766 } 767 #endif /* INET6 */ 768 769 int 770 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, 771 int off, void *h, struct pf_pdesc *pd) 772 { 773 struct tcphdr *th = pd->hdr.tcp; 774 u_short reason; 775 u_int8_t flags; 776 u_int rewrite = 0; 777 778 flags = th->th_flags; 779 if (flags & TH_SYN) { 780 /* Illegal packet */ 781 if (flags & TH_RST) 782 goto tcp_drop; 783 784 if (flags & TH_FIN) 785 flags &= ~TH_FIN; 786 } else { 787 /* Illegal packet */ 788 if (!(flags & (TH_ACK|TH_RST))) 789 goto tcp_drop; 790 } 791 792 if (!(flags & TH_ACK)) { 793 /* These flags are only valid if ACK is set */ 794 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 795 goto tcp_drop; 796 } 797 798 /* Check for illegal header length */ 799 if (th->th_off < (sizeof(struct tcphdr) >> 2)) 800 goto tcp_drop; 801 802 /* If flags changed, or reserved data set, then adjust */ 803 if (flags != th->th_flags || th->th_x2 != 0) { 804 u_int16_t ov, nv; 805 806 ov = *(u_int16_t *)(&th->th_ack + 1); 807 th->th_flags = flags; 808 th->th_x2 = 0; 809 nv = *(u_int16_t *)(&th->th_ack + 1); 810 811 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 812 rewrite = 1; 813 } 814 815 /* Remove urgent pointer, if TH_URG is not set */ 816 if (!(flags & TH_URG) && th->th_urp) { 817 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 818 th->th_urp = 0; 819 rewrite = 1; 820 } 821 822 /* copy back packet headers if we sanitized */ 823 if (rewrite) 824 m_copyback(m, off, sizeof(*th), th); 825 826 return (PF_PASS); 827 828 tcp_drop: 829 REASON_SET(&reason, PFRES_NORM); 830 return (PF_DROP); 831 } 832 833 int 834 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, 835 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) 836 { 837 u_int32_t tsval, tsecr; 838 u_int8_t hdr[60]; 839 u_int8_t *opt; 840 841 KASSERT(src->scrub == NULL); 842 843 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 844 if (src->scrub == NULL) 845 return (1); 846 bzero(src->scrub, sizeof(*src->scrub)); 847 848 switch (pd->af) { 849 #ifdef INET 850 case AF_INET: { 851 struct ip *h = mtod(m, struct ip *); 852 src->scrub->pfss_ttl = h->ip_ttl; 853 break; 854 } 855 #endif /* INET */ 856 #ifdef INET6 857 case AF_INET6: { 858 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 859 src->scrub->pfss_ttl = h->ip6_hlim; 860 break; 861 } 862 #endif /* INET6 */ 863 } 864 865 866 /* 867 * All normalizations below are only begun if we see the start of 868 * the connections. They must all set an enabled bit in pfss_flags 869 */ 870 if ((th->th_flags & TH_SYN) == 0) 871 return (0); 872 873 874 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 875 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 876 /* Diddle with TCP options */ 877 int hlen; 878 opt = hdr + sizeof(struct tcphdr); 879 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 880 while (hlen >= TCPOLEN_TIMESTAMP) { 881 switch (*opt) { 882 case TCPOPT_EOL: /* FALLTHROUGH */ 883 case TCPOPT_NOP: 884 opt++; 885 hlen--; 886 break; 887 case TCPOPT_TIMESTAMP: 888 if (opt[1] >= TCPOLEN_TIMESTAMP) { 889 src->scrub->pfss_flags |= 890 PFSS_TIMESTAMP; 891 src->scrub->pfss_ts_mod = 892 htonl(arc4random()); 893 894 /* note PFSS_PAWS not set yet */ 895 memcpy(&tsval, &opt[2], 896 sizeof(u_int32_t)); 897 memcpy(&tsecr, &opt[6], 898 sizeof(u_int32_t)); 899 src->scrub->pfss_tsval0 = ntohl(tsval); 900 src->scrub->pfss_tsval = ntohl(tsval); 901 src->scrub->pfss_tsecr = ntohl(tsecr); 902 getmicrouptime(&src->scrub->pfss_last); 903 } 904 /* FALLTHROUGH */ 905 default: 906 hlen -= MAX(opt[1], 2); 907 opt += MAX(opt[1], 2); 908 break; 909 } 910 } 911 } 912 913 return (0); 914 } 915 916 void 917 pf_normalize_tcp_cleanup(struct pf_state *state) 918 { 919 if (state->src.scrub) 920 pool_put(&pf_state_scrub_pl, state->src.scrub); 921 if (state->dst.scrub) 922 pool_put(&pf_state_scrub_pl, state->dst.scrub); 923 924 /* Someday... flush the TCP segment reassembly descriptors. */ 925 } 926 927 int 928 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, 929 u_short *reason, struct tcphdr *th, struct pf_state *state, 930 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 931 { 932 struct timeval uptime; 933 u_int32_t tsval, tsecr; 934 u_int tsval_from_last; 935 u_int8_t hdr[60]; 936 u_int8_t *opt; 937 int copyback = 0; 938 int got_ts = 0; 939 940 KASSERT(src->scrub || dst->scrub); 941 942 /* 943 * Enforce the minimum TTL seen for this connection. Negate a common 944 * technique to evade an intrusion detection system and confuse 945 * firewall state code. 946 */ 947 switch (pd->af) { 948 #ifdef INET 949 case AF_INET: { 950 if (src->scrub) { 951 struct ip *h = mtod(m, struct ip *); 952 if (h->ip_ttl > src->scrub->pfss_ttl) 953 src->scrub->pfss_ttl = h->ip_ttl; 954 h->ip_ttl = src->scrub->pfss_ttl; 955 } 956 break; 957 } 958 #endif /* INET */ 959 #ifdef INET6 960 case AF_INET6: { 961 if (src->scrub) { 962 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 963 if (h->ip6_hlim > src->scrub->pfss_ttl) 964 src->scrub->pfss_ttl = h->ip6_hlim; 965 h->ip6_hlim = src->scrub->pfss_ttl; 966 } 967 break; 968 } 969 #endif /* INET6 */ 970 } 971 972 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 973 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 974 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 975 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 976 /* Diddle with TCP options */ 977 int hlen; 978 opt = hdr + sizeof(struct tcphdr); 979 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 980 while (hlen >= TCPOLEN_TIMESTAMP) { 981 switch (*opt) { 982 case TCPOPT_EOL: /* FALLTHROUGH */ 983 case TCPOPT_NOP: 984 opt++; 985 hlen--; 986 break; 987 case TCPOPT_TIMESTAMP: 988 /* Modulate the timestamps. Can be used for 989 * NAT detection, OS uptime determination or 990 * reboot detection. 991 */ 992 993 if (got_ts) { 994 /* Huh? Multiple timestamps!? */ 995 if (pf_status.debug >= PF_DEBUG_MISC) { 996 DPFPRINTF(("multiple TS??")); 997 pf_print_state(state); 998 printf("\n"); 999 } 1000 REASON_SET(reason, PFRES_TS); 1001 return (PF_DROP); 1002 } 1003 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1004 memcpy(&tsval, &opt[2], 1005 sizeof(u_int32_t)); 1006 if (tsval && src->scrub && 1007 (src->scrub->pfss_flags & 1008 PFSS_TIMESTAMP)) { 1009 tsval = ntohl(tsval); 1010 pf_change_a(&opt[2], 1011 &th->th_sum, 1012 htonl(tsval + 1013 src->scrub->pfss_ts_mod), 1014 0); 1015 copyback = 1; 1016 } 1017 1018 /* Modulate TS reply iff valid (!0) */ 1019 memcpy(&tsecr, &opt[6], 1020 sizeof(u_int32_t)); 1021 if (tsecr && dst->scrub && 1022 (dst->scrub->pfss_flags & 1023 PFSS_TIMESTAMP)) { 1024 tsecr = ntohl(tsecr) 1025 - dst->scrub->pfss_ts_mod; 1026 pf_change_a(&opt[6], 1027 &th->th_sum, htonl(tsecr), 1028 0); 1029 copyback = 1; 1030 } 1031 got_ts = 1; 1032 } 1033 /* FALLTHROUGH */ 1034 default: 1035 hlen -= MAX(opt[1], 2); 1036 opt += MAX(opt[1], 2); 1037 break; 1038 } 1039 } 1040 if (copyback) { 1041 /* Copyback the options, caller copys back header */ 1042 *writeback = 1; 1043 m_copyback(m, off + sizeof(struct tcphdr), 1044 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1045 sizeof(struct tcphdr)); 1046 } 1047 } 1048 1049 1050 /* 1051 * Must invalidate PAWS checks on connections idle for too long. 1052 * The fastest allowed timestamp clock is 1ms. That turns out to 1053 * be about 24 days before it wraps. XXX Right now our lowerbound 1054 * TS echo check only works for the first 12 days of a connection 1055 * when the TS has exhausted half its 32bit space 1056 */ 1057 #define TS_MAX_IDLE (24*24*60*60) 1058 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1059 1060 getmicrouptime(&uptime); 1061 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1062 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1063 time_second - state->creation > TS_MAX_CONN)) { 1064 if (pf_status.debug >= PF_DEBUG_MISC) { 1065 DPFPRINTF(("src idled out of PAWS\n")); 1066 pf_print_state(state); 1067 printf("\n"); 1068 } 1069 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1070 | PFSS_PAWS_IDLED; 1071 } 1072 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1073 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1074 if (pf_status.debug >= PF_DEBUG_MISC) { 1075 DPFPRINTF(("dst idled out of PAWS\n")); 1076 pf_print_state(state); 1077 printf("\n"); 1078 } 1079 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1080 | PFSS_PAWS_IDLED; 1081 } 1082 1083 if (got_ts && src->scrub && dst->scrub && 1084 (src->scrub->pfss_flags & PFSS_PAWS) && 1085 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1086 /* Validate that the timestamps are "in-window". 1087 * RFC1323 describes TCP Timestamp options that allow 1088 * measurement of RTT (round trip time) and PAWS 1089 * (protection against wrapped sequence numbers). PAWS 1090 * gives us a set of rules for rejecting packets on 1091 * long fat pipes (packets that were somehow delayed 1092 * in transit longer than the time it took to send the 1093 * full TCP sequence space of 4Gb). We can use these 1094 * rules and infer a few others that will let us treat 1095 * the 32bit timestamp and the 32bit echoed timestamp 1096 * as sequence numbers to prevent a blind attacker from 1097 * inserting packets into a connection. 1098 * 1099 * RFC1323 tells us: 1100 * - The timestamp on this packet must be greater than 1101 * or equal to the last value echoed by the other 1102 * endpoint. The RFC says those will be discarded 1103 * since it is a dup that has already been acked. 1104 * This gives us a lowerbound on the timestamp. 1105 * timestamp >= other last echoed timestamp 1106 * - The timestamp will be less than or equal to 1107 * the last timestamp plus the time between the 1108 * last packet and now. The RFC defines the max 1109 * clock rate as 1ms. We will allow clocks to be 1110 * up to 10% fast and will allow a total difference 1111 * or 30 seconds due to a route change. And this 1112 * gives us an upperbound on the timestamp. 1113 * timestamp <= last timestamp + max ticks 1114 * We have to be careful here. Windows will send an 1115 * initial timestamp of zero and then initialize it 1116 * to a random value after the 3whs; presumably to 1117 * avoid a DoS by having to call an expensive RNG 1118 * during a SYN flood. Proof MS has at least one 1119 * good security geek. 1120 * 1121 * - The TCP timestamp option must also echo the other 1122 * endpoints timestamp. The timestamp echoed is the 1123 * one carried on the earliest unacknowledged segment 1124 * on the left edge of the sequence window. The RFC 1125 * states that the host will reject any echoed 1126 * timestamps that were larger than any ever sent. 1127 * This gives us an upperbound on the TS echo. 1128 * tescr <= largest_tsval 1129 * - The lowerbound on the TS echo is a little more 1130 * tricky to determine. The other endpoint's echoed 1131 * values will not decrease. But there may be 1132 * network conditions that re-order packets and 1133 * cause our view of them to decrease. For now the 1134 * only lowerbound we can safely determine is that 1135 * the TS echo will never be less than the original 1136 * TS. XXX There is probably a better lowerbound. 1137 * Remove TS_MAX_CONN with better lowerbound check. 1138 * tescr >= other original TS 1139 * 1140 * It is also important to note that the fastest 1141 * timestamp clock of 1ms will wrap its 32bit space in 1142 * 24 days. So we just disable TS checking after 24 1143 * days of idle time. We actually must use a 12d 1144 * connection limit until we can come up with a better 1145 * lowerbound to the TS echo check. 1146 */ 1147 struct timeval delta_ts; 1148 int ts_fudge; 1149 1150 1151 /* 1152 * PFTM_TS_DIFF is how many seconds of leeway to allow 1153 * a host's timestamp. This can happen if the previous 1154 * packet got delayed in transit for much longer than 1155 * this packet. 1156 */ 1157 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1158 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1159 1160 1161 /* Calculate max ticks since the last timestamp */ 1162 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1163 #define TS_MICROSECS 1000000 /* microseconds per second */ 1164 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1165 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1166 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1167 1168 1169 if ((src->state >= TCPS_ESTABLISHED && 1170 dst->state >= TCPS_ESTABLISHED) && 1171 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1172 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1173 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1174 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1175 /* Bad RFC1323 implementation or an insertion attack. 1176 * 1177 * - Solaris 2.6 and 2.7 are known to send another ACK 1178 * after the FIN,FIN|ACK,ACK closing that carries 1179 * an old timestamp. 1180 */ 1181 1182 DPFPRINTF(("Timestamp failed %c%c%c%c\n", 1183 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1184 SEQ_GT(tsval, src->scrub->pfss_tsval + 1185 tsval_from_last) ? '1' : ' ', 1186 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1187 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); 1188 DPFPRINTF((" tsval: %lu tsecr: %lu +ticks: %lu " 1189 "idle: %lus %lums\n", 1190 tsval, tsecr, tsval_from_last, delta_ts.tv_sec, 1191 delta_ts.tv_usec / 1000)); 1192 DPFPRINTF((" src->tsval: %lu tsecr: %lu\n", 1193 src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); 1194 DPFPRINTF((" dst->tsval: %lu tsecr: %lu tsval0: %lu" 1195 "\n", dst->scrub->pfss_tsval, 1196 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); 1197 if (pf_status.debug >= PF_DEBUG_MISC) { 1198 pf_print_state(state); 1199 pf_print_flags(th->th_flags); 1200 printf("\n"); 1201 } 1202 REASON_SET(reason, PFRES_TS); 1203 return (PF_DROP); 1204 } 1205 1206 /* XXX I'd really like to require tsecr but it's optional */ 1207 1208 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1209 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1210 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1211 src->scrub && dst->scrub && 1212 (src->scrub->pfss_flags & PFSS_PAWS) && 1213 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1214 /* Didn't send a timestamp. Timestamps aren't really useful 1215 * when: 1216 * - connection opening or closing (often not even sent). 1217 * but we must not let an attacker to put a FIN on a 1218 * data packet to sneak it through our ESTABLISHED check. 1219 * - on a TCP reset. RFC suggests not even looking at TS. 1220 * - on an empty ACK. The TS will not be echoed so it will 1221 * probably not help keep the RTT calculation in sync and 1222 * there isn't as much danger when the sequence numbers 1223 * got wrapped. So some stacks don't include TS on empty 1224 * ACKs :-( 1225 * 1226 * To minimize the disruption to mostly RFC1323 conformant 1227 * stacks, we will only require timestamps on data packets. 1228 * 1229 * And what do ya know, we cannot require timestamps on data 1230 * packets. There appear to be devices that do legitimate 1231 * TCP connection hijacking. There are HTTP devices that allow 1232 * a 3whs (with timestamps) and then buffer the HTTP request. 1233 * If the intermediate device has the HTTP response cache, it 1234 * will spoof the response but not bother timestamping its 1235 * packets. So we can look for the presence of a timestamp in 1236 * the first data packet and if there, require it in all future 1237 * packets. 1238 */ 1239 1240 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1241 /* 1242 * Hey! Someone tried to sneak a packet in. Or the 1243 * stack changed its RFC1323 behavior?!?! 1244 */ 1245 if (pf_status.debug >= PF_DEBUG_MISC) { 1246 DPFPRINTF(("Did not receive expected RFC1323 " 1247 "timestamp\n")); 1248 pf_print_state(state); 1249 pf_print_flags(th->th_flags); 1250 printf("\n"); 1251 } 1252 REASON_SET(reason, PFRES_TS); 1253 return (PF_DROP); 1254 } 1255 } 1256 1257 1258 /* 1259 * We will note if a host sends his data packets with or without 1260 * timestamps. And require all data packets to contain a timestamp 1261 * if the first does. PAWS implicitly requires that all data packets be 1262 * timestamped. But I think there are middle-man devices that hijack 1263 * TCP streams immediately after the 3whs and don't timestamp their 1264 * packets (seen in a WWW accelerator or cache). 1265 */ 1266 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1267 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1268 if (got_ts) 1269 src->scrub->pfss_flags |= PFSS_DATA_TS; 1270 else { 1271 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1272 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && 1273 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1274 /* Don't warn if other host rejected RFC1323 */ 1275 DPFPRINTF(("Broken RFC1323 stack did not " 1276 "timestamp data packet. Disabled PAWS " 1277 "security.\n")); 1278 pf_print_state(state); 1279 pf_print_flags(th->th_flags); 1280 printf("\n"); 1281 } 1282 } 1283 } 1284 1285 1286 /* 1287 * Update PAWS values 1288 */ 1289 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1290 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1291 getmicrouptime(&src->scrub->pfss_last); 1292 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1293 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1294 src->scrub->pfss_tsval = tsval; 1295 1296 if (tsecr) { 1297 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1298 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1299 src->scrub->pfss_tsecr = tsecr; 1300 1301 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1302 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1303 src->scrub->pfss_tsval0 == 0)) { 1304 /* tsval0 MUST be the lowest timestamp */ 1305 src->scrub->pfss_tsval0 = tsval; 1306 } 1307 1308 /* Only fully initialized after a TS gets echoed */ 1309 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1310 src->scrub->pfss_flags |= PFSS_PAWS; 1311 } 1312 } 1313 1314 /* I have a dream.... TCP segment reassembly.... */ 1315 return (0); 1316 } 1317 1318 int 1319 pf_normalize_mss(struct mbuf *m, int off, struct pf_pdesc *pd, u_int16_t maxmss) 1320 { 1321 struct tcphdr *th = pd->hdr.tcp; 1322 u_int16_t *mss; 1323 int thoff; 1324 int opt, cnt, optlen = 0; 1325 u_char opts[MAX_TCPOPTLEN]; 1326 u_char *optp = opts; 1327 1328 thoff = th->th_off << 2; 1329 cnt = thoff - sizeof(struct tcphdr); 1330 1331 if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt, 1332 NULL, NULL, pd->af)) 1333 return (0); 1334 1335 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1336 opt = optp[0]; 1337 if (opt == TCPOPT_EOL) 1338 break; 1339 if (opt == TCPOPT_NOP) 1340 optlen = 1; 1341 else { 1342 if (cnt < 2) 1343 break; 1344 optlen = optp[1]; 1345 if (optlen < 2 || optlen > cnt) 1346 break; 1347 } 1348 switch (opt) { 1349 case TCPOPT_MAXSEG: 1350 mss = (u_int16_t *)(optp + 2); 1351 if ((ntohs(*mss)) > maxmss) { 1352 th->th_sum = pf_cksum_fixup(th->th_sum, 1353 *mss, htons(maxmss), 0); 1354 *mss = htons(maxmss); 1355 m_copyback(m, off + sizeof(*th), 1356 thoff - sizeof(*th), opts); 1357 m_copyback(m, off, sizeof(*th), th); 1358 } 1359 break; 1360 default: 1361 break; 1362 } 1363 } 1364 1365 1366 1367 return (0); 1368 } 1369 1370 void 1371 pf_scrub_ip(struct mbuf **m0, u_int16_t flags, u_int8_t min_ttl, u_int8_t tos) 1372 { 1373 struct mbuf *m = *m0; 1374 struct ip *h = mtod(m, struct ip *); 1375 1376 /* Clear IP_DF if no-df was requested */ 1377 if (flags & PFSTATE_NODF && h->ip_off & htons(IP_DF)) { 1378 u_int16_t ip_off = h->ip_off; 1379 1380 h->ip_off &= htons(~IP_DF); 1381 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 1382 } 1383 1384 /* Enforce a minimum ttl, may cause endless packet loops */ 1385 if (min_ttl && h->ip_ttl < min_ttl) { 1386 u_int16_t ip_ttl = h->ip_ttl; 1387 1388 h->ip_ttl = min_ttl; 1389 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 1390 } 1391 1392 /* Enforce tos */ 1393 if (flags & PFSTATE_SETTOS) { 1394 u_int16_t ov, nv; 1395 1396 ov = *(u_int16_t *)h; 1397 h->ip_tos = tos; 1398 nv = *(u_int16_t *)h; 1399 1400 h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); 1401 } 1402 1403 /* random-id, but not for fragments */ 1404 if (flags & PFSTATE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) { 1405 u_int16_t ip_id = h->ip_id; 1406 1407 h->ip_id = ip_randomid(); 1408 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); 1409 } 1410 } 1411 1412 #ifdef INET6 1413 void 1414 pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl) 1415 { 1416 struct mbuf *m = *m0; 1417 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1418 1419 /* Enforce a minimum ttl, may cause endless packet loops */ 1420 if (min_ttl && h->ip6_hlim < min_ttl) 1421 h->ip6_hlim = min_ttl; 1422 } 1423 #endif 1424