1 /* $OpenBSD: pf_norm.c,v 1.117 2009/04/07 13:26:23 henning Exp $ */ 2 3 /* 4 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 5 * Copyright 2009 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "pflog.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/mbuf.h> 34 #include <sys/filio.h> 35 #include <sys/fcntl.h> 36 #include <sys/socket.h> 37 #include <sys/kernel.h> 38 #include <sys/time.h> 39 #include <sys/pool.h> 40 41 #include <dev/rndvar.h> 42 #include <net/if.h> 43 #include <net/if_types.h> 44 #include <net/bpf.h> 45 #include <net/route.h> 46 #include <net/if_pflog.h> 47 48 #include <netinet/in.h> 49 #include <netinet/in_var.h> 50 #include <netinet/in_systm.h> 51 #include <netinet/ip.h> 52 #include <netinet/ip_var.h> 53 #include <netinet/tcp.h> 54 #include <netinet/tcp_seq.h> 55 #include <netinet/udp.h> 56 #include <netinet/ip_icmp.h> 57 58 #ifdef INET6 59 #include <netinet/ip6.h> 60 #endif /* INET6 */ 61 62 #include <net/pfvar.h> 63 64 struct pf_frent { 65 LIST_ENTRY(pf_frent) fr_next; 66 struct ip *fr_ip; 67 struct mbuf *fr_m; 68 }; 69 70 struct pf_frcache { 71 LIST_ENTRY(pf_frcache) fr_next; 72 uint16_t fr_off; 73 uint16_t fr_end; 74 }; 75 76 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ 77 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ 78 #define PFFRAG_DROP 0x0004 /* Drop all fragments */ 79 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) 80 81 struct pf_fragment { 82 RB_ENTRY(pf_fragment) fr_entry; 83 TAILQ_ENTRY(pf_fragment) frag_next; 84 struct in_addr fr_src; 85 struct in_addr fr_dst; 86 u_int8_t fr_p; /* protocol of this fragment */ 87 u_int8_t fr_flags; /* status flags */ 88 u_int16_t fr_id; /* fragment id for reassemble */ 89 u_int16_t fr_max; /* fragment data max */ 90 u_int32_t fr_timeout; 91 #define fr_queue fr_u.fru_queue 92 #define fr_cache fr_u.fru_cache 93 union { 94 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ 95 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ 96 } fr_u; 97 }; 98 99 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 100 TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; 101 102 static __inline int pf_frag_compare(struct pf_fragment *, 103 struct pf_fragment *); 104 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 105 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 106 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 107 108 /* Private prototypes */ 109 void pf_ip2key(struct pf_fragment *, struct ip *); 110 void pf_remove_fragment(struct pf_fragment *); 111 void pf_flush_fragments(void); 112 void pf_free_fragment(struct pf_fragment *); 113 struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); 114 struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, 115 struct pf_frent *, int); 116 void pf_scrub_ip(struct mbuf **, u_int8_t, u_int8_t, 117 u_int8_t); 118 #ifdef INET6 119 void pf_scrub_ip6(struct mbuf **, u_int8_t); 120 #endif 121 122 #define DPFPRINTF(x) do { \ 123 if (pf_status.debug >= PF_DEBUG_MISC) { \ 124 printf("%s: ", __func__); \ 125 printf x ; \ 126 } \ 127 } while(0) 128 129 /* Globals */ 130 struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; 131 struct pool pf_state_scrub_pl; 132 int pf_nfrents, pf_ncache; 133 134 void 135 pf_normalize_init(void) 136 { 137 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 138 NULL); 139 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 140 NULL); 141 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, 142 "pffrcache", NULL); 143 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", 144 NULL); 145 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 146 "pfstscr", NULL); 147 148 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 149 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 150 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); 151 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); 152 153 TAILQ_INIT(&pf_fragqueue); 154 TAILQ_INIT(&pf_cachequeue); 155 } 156 157 static __inline int 158 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 159 { 160 int diff; 161 162 if ((diff = a->fr_id - b->fr_id)) 163 return (diff); 164 else if ((diff = a->fr_p - b->fr_p)) 165 return (diff); 166 else if (a->fr_src.s_addr < b->fr_src.s_addr) 167 return (-1); 168 else if (a->fr_src.s_addr > b->fr_src.s_addr) 169 return (1); 170 else if (a->fr_dst.s_addr < b->fr_dst.s_addr) 171 return (-1); 172 else if (a->fr_dst.s_addr > b->fr_dst.s_addr) 173 return (1); 174 return (0); 175 } 176 177 void 178 pf_purge_expired_fragments(void) 179 { 180 struct pf_fragment *frag; 181 u_int32_t expire = time_second - 182 pf_default_rule.timeout[PFTM_FRAG]; 183 184 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 185 KASSERT(BUFFER_FRAGMENTS(frag)); 186 if (frag->fr_timeout > expire) 187 break; 188 189 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 190 pf_free_fragment(frag); 191 } 192 193 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { 194 KASSERT(!BUFFER_FRAGMENTS(frag)); 195 if (frag->fr_timeout > expire) 196 break; 197 198 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 199 pf_free_fragment(frag); 200 KASSERT(TAILQ_EMPTY(&pf_cachequeue) || 201 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); 202 } 203 } 204 205 /* 206 * Try to flush old fragments to make space for new ones 207 */ 208 209 void 210 pf_flush_fragments(void) 211 { 212 struct pf_fragment *frag; 213 int goal; 214 215 goal = pf_nfrents * 9 / 10; 216 DPFPRINTF(("trying to free > %d frents\n", 217 pf_nfrents - goal)); 218 while (goal < pf_nfrents) { 219 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 220 if (frag == NULL) 221 break; 222 pf_free_fragment(frag); 223 } 224 225 226 goal = pf_ncache * 9 / 10; 227 DPFPRINTF(("trying to free > %d cache entries\n", 228 pf_ncache - goal)); 229 while (goal < pf_ncache) { 230 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); 231 if (frag == NULL) 232 break; 233 pf_free_fragment(frag); 234 } 235 } 236 237 /* Frees the fragments and all associated entries */ 238 239 void 240 pf_free_fragment(struct pf_fragment *frag) 241 { 242 struct pf_frent *frent; 243 struct pf_frcache *frcache; 244 245 /* Free all fragments */ 246 if (BUFFER_FRAGMENTS(frag)) { 247 for (frent = LIST_FIRST(&frag->fr_queue); frent; 248 frent = LIST_FIRST(&frag->fr_queue)) { 249 LIST_REMOVE(frent, fr_next); 250 251 m_freem(frent->fr_m); 252 pool_put(&pf_frent_pl, frent); 253 pf_nfrents--; 254 } 255 } else { 256 for (frcache = LIST_FIRST(&frag->fr_cache); frcache; 257 frcache = LIST_FIRST(&frag->fr_cache)) { 258 LIST_REMOVE(frcache, fr_next); 259 260 KASSERT(LIST_EMPTY(&frag->fr_cache) || 261 LIST_FIRST(&frag->fr_cache)->fr_off > 262 frcache->fr_end); 263 264 pool_put(&pf_cent_pl, frcache); 265 pf_ncache--; 266 } 267 } 268 269 pf_remove_fragment(frag); 270 } 271 272 void 273 pf_ip2key(struct pf_fragment *key, struct ip *ip) 274 { 275 key->fr_p = ip->ip_p; 276 key->fr_id = ip->ip_id; 277 key->fr_src.s_addr = ip->ip_src.s_addr; 278 key->fr_dst.s_addr = ip->ip_dst.s_addr; 279 } 280 281 struct pf_fragment * 282 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) 283 { 284 struct pf_fragment key; 285 struct pf_fragment *frag; 286 287 pf_ip2key(&key, ip); 288 289 frag = RB_FIND(pf_frag_tree, tree, &key); 290 if (frag != NULL) { 291 /* XXX Are we sure we want to update the timeout? */ 292 frag->fr_timeout = time_second; 293 if (BUFFER_FRAGMENTS(frag)) { 294 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 295 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 296 } else { 297 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 298 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); 299 } 300 } 301 302 return (frag); 303 } 304 305 /* Removes a fragment from the fragment queue and frees the fragment */ 306 307 void 308 pf_remove_fragment(struct pf_fragment *frag) 309 { 310 if (BUFFER_FRAGMENTS(frag)) { 311 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 312 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 313 pool_put(&pf_frag_pl, frag); 314 } else { 315 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); 316 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 317 pool_put(&pf_cache_pl, frag); 318 } 319 } 320 321 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) 322 struct mbuf * 323 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, 324 struct pf_frent *frent, int mff) 325 { 326 struct mbuf *m = *m0, *m2; 327 struct pf_frent *frea, *next; 328 struct pf_frent *frep = NULL; 329 struct ip *ip = frent->fr_ip; 330 int hlen = ip->ip_hl << 2; 331 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 332 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; 333 u_int16_t max = ip_len + off; 334 335 KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag)); 336 337 /* Strip off ip header */ 338 m->m_data += hlen; 339 m->m_len -= hlen; 340 341 /* Create a new reassembly queue for this packet */ 342 if (*frag == NULL) { 343 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 344 if (*frag == NULL) { 345 pf_flush_fragments(); 346 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 347 if (*frag == NULL) 348 goto drop_fragment; 349 } 350 351 (*frag)->fr_flags = 0; 352 (*frag)->fr_max = 0; 353 (*frag)->fr_src = frent->fr_ip->ip_src; 354 (*frag)->fr_dst = frent->fr_ip->ip_dst; 355 (*frag)->fr_p = frent->fr_ip->ip_p; 356 (*frag)->fr_id = frent->fr_ip->ip_id; 357 (*frag)->fr_timeout = time_second; 358 LIST_INIT(&(*frag)->fr_queue); 359 360 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); 361 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); 362 363 /* We do not have a previous fragment */ 364 frep = NULL; 365 goto insert; 366 } 367 368 /* 369 * Find a fragment after the current one: 370 * - off contains the real shifted offset. 371 */ 372 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 373 if (FR_IP_OFF(frea) > off) 374 break; 375 frep = frea; 376 } 377 378 KASSERT(frep != NULL || frea != NULL); 379 380 if (frep != NULL && 381 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 382 4 > off) 383 { 384 u_int16_t precut; 385 386 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - 387 frep->fr_ip->ip_hl * 4 - off; 388 if (precut >= ip_len) 389 goto drop_fragment; 390 m_adj(frent->fr_m, precut); 391 DPFPRINTF(("overlap -%d\n", precut)); 392 /* Enforce 8 byte boundaries */ 393 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); 394 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 395 ip_len -= precut; 396 ip->ip_len = htons(ip_len); 397 } 398 399 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); 400 frea = next) 401 { 402 u_int16_t aftercut; 403 404 aftercut = ip_len + off - FR_IP_OFF(frea); 405 DPFPRINTF(("adjust overlap %d\n", aftercut)); 406 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl 407 * 4) 408 { 409 frea->fr_ip->ip_len = 410 htons(ntohs(frea->fr_ip->ip_len) - aftercut); 411 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + 412 (aftercut >> 3)); 413 m_adj(frea->fr_m, aftercut); 414 break; 415 } 416 417 /* This fragment is completely overlapped, lose it */ 418 next = LIST_NEXT(frea, fr_next); 419 m_freem(frea->fr_m); 420 LIST_REMOVE(frea, fr_next); 421 pool_put(&pf_frent_pl, frea); 422 pf_nfrents--; 423 } 424 425 insert: 426 /* Update maximum data size */ 427 if ((*frag)->fr_max < max) 428 (*frag)->fr_max = max; 429 /* This is the last segment */ 430 if (!mff) 431 (*frag)->fr_flags |= PFFRAG_SEENLAST; 432 433 if (frep == NULL) 434 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 435 else 436 LIST_INSERT_AFTER(frep, frent, fr_next); 437 438 /* Check if we are completely reassembled */ 439 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 440 return (NULL); 441 442 /* Check if we have all the data */ 443 off = 0; 444 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 445 next = LIST_NEXT(frep, fr_next); 446 447 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; 448 if (off < (*frag)->fr_max && 449 (next == NULL || FR_IP_OFF(next) != off)) 450 { 451 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 452 off, next == NULL ? -1 : FR_IP_OFF(next), 453 (*frag)->fr_max)); 454 return (NULL); 455 } 456 } 457 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 458 if (off < (*frag)->fr_max) 459 return (NULL); 460 461 /* We have all the data */ 462 frent = LIST_FIRST(&(*frag)->fr_queue); 463 KASSERT(frent != NULL); 464 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { 465 DPFPRINTF(("drop: too big: %d\n", off)); 466 pf_free_fragment(*frag); 467 *frag = NULL; 468 return (NULL); 469 } 470 next = LIST_NEXT(frent, fr_next); 471 472 /* Magic from ip_input */ 473 ip = frent->fr_ip; 474 m = frent->fr_m; 475 m2 = m->m_next; 476 m->m_next = NULL; 477 m_cat(m, m2); 478 pool_put(&pf_frent_pl, frent); 479 pf_nfrents--; 480 for (frent = next; frent != NULL; frent = next) { 481 next = LIST_NEXT(frent, fr_next); 482 483 m2 = frent->fr_m; 484 pool_put(&pf_frent_pl, frent); 485 pf_nfrents--; 486 m_cat(m, m2); 487 } 488 489 ip->ip_src = (*frag)->fr_src; 490 ip->ip_dst = (*frag)->fr_dst; 491 492 /* Remove from fragment queue */ 493 pf_remove_fragment(*frag); 494 *frag = NULL; 495 496 hlen = ip->ip_hl << 2; 497 ip->ip_len = htons(off + hlen); 498 m->m_len += hlen; 499 m->m_data -= hlen; 500 501 /* some debugging cruft by sklower, below, will go away soon */ 502 /* XXX this should be done elsewhere */ 503 if (m->m_flags & M_PKTHDR) { 504 int plen = 0; 505 for (m2 = m; m2; m2 = m2->m_next) 506 plen += m2->m_len; 507 m->m_pkthdr.len = plen; 508 } 509 510 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); 511 return (m); 512 513 drop_fragment: 514 /* Oops - fail safe - drop packet */ 515 pool_put(&pf_frent_pl, frent); 516 pf_nfrents--; 517 m_freem(m); 518 return (NULL); 519 } 520 521 int 522 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, 523 struct pf_pdesc *pd) 524 { 525 struct mbuf *m = *m0; 526 struct pf_frent *frent; 527 struct pf_fragment *frag = NULL; 528 struct ip *h = mtod(m, struct ip *); 529 int mff = (ntohs(h->ip_off) & IP_MF); 530 int hlen = h->ip_hl << 2; 531 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 532 u_int16_t max; 533 int ip_len; 534 int ip_off; 535 536 /* Check for illegal packets */ 537 if (hlen < (int)sizeof(struct ip)) 538 goto drop; 539 540 if (hlen > ntohs(h->ip_len)) 541 goto drop; 542 543 /* Clear IP_DF if we're in no-df mode */ 544 if (!(pf_status.reass & PF_REASS_NODF) && h->ip_off & htons(IP_DF)) { 545 u_int16_t ip_off = h->ip_off; 546 547 h->ip_off &= htons(~IP_DF); 548 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 549 } 550 551 /* We will need other tests here */ 552 if (!fragoff && !mff) 553 goto no_fragment; 554 555 /* We're dealing with a fragment now. Don't allow fragments 556 * with IP_DF to enter the cache. If the flag was cleared by 557 * no-df above, fine. Otherwise drop it. 558 */ 559 if (h->ip_off & htons(IP_DF)) { 560 DPFPRINTF(("IP_DF\n")); 561 goto bad; 562 } 563 564 ip_len = ntohs(h->ip_len) - hlen; 565 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 566 567 /* All fragments are 8 byte aligned */ 568 if (mff && (ip_len & 0x7)) { 569 DPFPRINTF(("mff and %d\n", ip_len)); 570 goto bad; 571 } 572 573 /* Respect maximum length */ 574 if (fragoff + ip_len > IP_MAXPACKET) { 575 DPFPRINTF(("max packet %d\n", fragoff + ip_len)); 576 goto bad; 577 } 578 max = fragoff + ip_len; 579 580 /* Fully buffer all of the fragments */ 581 frag = pf_find_fragment(h, &pf_frag_tree); 582 583 /* Check if we saw the last fragment already */ 584 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 585 max > frag->fr_max) 586 goto bad; 587 588 /* Get an entry for the fragment queue */ 589 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 590 if (frent == NULL) { 591 REASON_SET(reason, PFRES_MEMORY); 592 return (PF_DROP); 593 } 594 pf_nfrents++; 595 frent->fr_ip = h; 596 frent->fr_m = m; 597 598 /* Might return a completely reassembled mbuf, or NULL */ 599 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); 600 *m0 = m = pf_reassemble(m0, &frag, frent, mff); 601 602 if (m == NULL) 603 return (PF_DROP); 604 605 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 606 goto drop; 607 608 h = mtod(m, struct ip *); 609 610 no_fragment: 611 /* At this point, only IP_DF is allowed in ip_off */ 612 if (h->ip_off & ~htons(IP_DF)) { 613 u_int16_t ip_off = h->ip_off; 614 615 h->ip_off &= htons(IP_DF); 616 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 617 } 618 619 pd->flags |= PFDESC_IP_REAS; 620 return (PF_PASS); 621 622 drop: 623 REASON_SET(reason, PFRES_NORM); 624 return (PF_DROP); 625 626 bad: 627 DPFPRINTF(("dropping bad fragment\n")); 628 629 /* Free associated fragments */ 630 if (frag != NULL) 631 pf_free_fragment(frag); 632 633 REASON_SET(reason, PFRES_FRAG); 634 635 return (PF_DROP); 636 } 637 638 #ifdef INET6 639 int 640 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, 641 u_short *reason, struct pf_pdesc *pd) 642 { 643 struct mbuf *m = *m0; 644 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 645 int off; 646 struct ip6_ext ext; 647 struct ip6_opt opt; 648 struct ip6_opt_jumbo jumbo; 649 struct ip6_frag frag; 650 u_int32_t jumbolen = 0, plen; 651 u_int16_t fragoff = 0; 652 int optend; 653 int ooff; 654 u_int8_t proto; 655 int terminal; 656 657 /* Check for illegal packets */ 658 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) 659 goto drop; 660 661 off = sizeof(struct ip6_hdr); 662 proto = h->ip6_nxt; 663 terminal = 0; 664 do { 665 switch (proto) { 666 case IPPROTO_FRAGMENT: 667 goto fragment; 668 break; 669 case IPPROTO_AH: 670 case IPPROTO_ROUTING: 671 case IPPROTO_DSTOPTS: 672 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 673 NULL, AF_INET6)) 674 goto shortpkt; 675 if (proto == IPPROTO_AH) 676 off += (ext.ip6e_len + 2) * 4; 677 else 678 off += (ext.ip6e_len + 1) * 8; 679 proto = ext.ip6e_nxt; 680 break; 681 case IPPROTO_HOPOPTS: 682 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 683 NULL, AF_INET6)) 684 goto shortpkt; 685 optend = off + (ext.ip6e_len + 1) * 8; 686 ooff = off + sizeof(ext); 687 do { 688 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, 689 sizeof(opt.ip6o_type), NULL, NULL, 690 AF_INET6)) 691 goto shortpkt; 692 if (opt.ip6o_type == IP6OPT_PAD1) { 693 ooff++; 694 continue; 695 } 696 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), 697 NULL, NULL, AF_INET6)) 698 goto shortpkt; 699 if (ooff + sizeof(opt) + opt.ip6o_len > optend) 700 goto drop; 701 switch (opt.ip6o_type) { 702 case IP6OPT_JUMBO: 703 if (h->ip6_plen != 0) 704 goto drop; 705 if (!pf_pull_hdr(m, ooff, &jumbo, 706 sizeof(jumbo), NULL, NULL, 707 AF_INET6)) 708 goto shortpkt; 709 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, 710 sizeof(jumbolen)); 711 jumbolen = ntohl(jumbolen); 712 if (jumbolen <= IPV6_MAXPACKET) 713 goto drop; 714 if (sizeof(struct ip6_hdr) + jumbolen != 715 m->m_pkthdr.len) 716 goto drop; 717 break; 718 default: 719 break; 720 } 721 ooff += sizeof(opt) + opt.ip6o_len; 722 } while (ooff < optend); 723 724 off = optend; 725 proto = ext.ip6e_nxt; 726 break; 727 default: 728 terminal = 1; 729 break; 730 } 731 } while (!terminal); 732 733 /* jumbo payload option must be present, or plen > 0 */ 734 if (ntohs(h->ip6_plen) == 0) 735 plen = jumbolen; 736 else 737 plen = ntohs(h->ip6_plen); 738 if (plen == 0) 739 goto drop; 740 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) 741 goto shortpkt; 742 743 return (PF_PASS); 744 745 fragment: 746 if (ntohs(h->ip6_plen) == 0 || jumbolen) 747 goto drop; 748 plen = ntohs(h->ip6_plen); 749 750 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) 751 goto shortpkt; 752 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); 753 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) 754 goto badfrag; 755 756 /* do something about it */ 757 /* remember to set pd->flags |= PFDESC_IP_REAS */ 758 return (PF_PASS); 759 760 shortpkt: 761 REASON_SET(reason, PFRES_SHORT); 762 return (PF_DROP); 763 764 drop: 765 REASON_SET(reason, PFRES_NORM); 766 return (PF_DROP); 767 768 badfrag: 769 REASON_SET(reason, PFRES_FRAG); 770 return (PF_DROP); 771 } 772 #endif /* INET6 */ 773 774 int 775 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, 776 int off, void *h, struct pf_pdesc *pd) 777 { 778 struct tcphdr *th = pd->hdr.tcp; 779 u_short reason; 780 u_int8_t flags; 781 u_int rewrite = 0; 782 783 flags = th->th_flags; 784 if (flags & TH_SYN) { 785 /* Illegal packet */ 786 if (flags & TH_RST) 787 goto tcp_drop; 788 789 if (flags & TH_FIN) 790 flags &= ~TH_FIN; 791 } else { 792 /* Illegal packet */ 793 if (!(flags & (TH_ACK|TH_RST))) 794 goto tcp_drop; 795 } 796 797 if (!(flags & TH_ACK)) { 798 /* These flags are only valid if ACK is set */ 799 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 800 goto tcp_drop; 801 } 802 803 /* Check for illegal header length */ 804 if (th->th_off < (sizeof(struct tcphdr) >> 2)) 805 goto tcp_drop; 806 807 /* If flags changed, or reserved data set, then adjust */ 808 if (flags != th->th_flags || th->th_x2 != 0) { 809 u_int16_t ov, nv; 810 811 ov = *(u_int16_t *)(&th->th_ack + 1); 812 th->th_flags = flags; 813 th->th_x2 = 0; 814 nv = *(u_int16_t *)(&th->th_ack + 1); 815 816 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 817 rewrite = 1; 818 } 819 820 /* Remove urgent pointer, if TH_URG is not set */ 821 if (!(flags & TH_URG) && th->th_urp) { 822 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 823 th->th_urp = 0; 824 rewrite = 1; 825 } 826 827 /* copy back packet headers if we sanitized */ 828 if (rewrite) 829 m_copyback(m, off, sizeof(*th), th); 830 831 return (PF_PASS); 832 833 tcp_drop: 834 REASON_SET(&reason, PFRES_NORM); 835 return (PF_DROP); 836 } 837 838 int 839 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, 840 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) 841 { 842 u_int32_t tsval, tsecr; 843 u_int8_t hdr[60]; 844 u_int8_t *opt; 845 846 KASSERT(src->scrub == NULL); 847 848 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 849 if (src->scrub == NULL) 850 return (1); 851 bzero(src->scrub, sizeof(*src->scrub)); 852 853 switch (pd->af) { 854 #ifdef INET 855 case AF_INET: { 856 struct ip *h = mtod(m, struct ip *); 857 src->scrub->pfss_ttl = h->ip_ttl; 858 break; 859 } 860 #endif /* INET */ 861 #ifdef INET6 862 case AF_INET6: { 863 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 864 src->scrub->pfss_ttl = h->ip6_hlim; 865 break; 866 } 867 #endif /* INET6 */ 868 } 869 870 871 /* 872 * All normalizations below are only begun if we see the start of 873 * the connections. They must all set an enabled bit in pfss_flags 874 */ 875 if ((th->th_flags & TH_SYN) == 0) 876 return (0); 877 878 879 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 880 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 881 /* Diddle with TCP options */ 882 int hlen; 883 opt = hdr + sizeof(struct tcphdr); 884 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 885 while (hlen >= TCPOLEN_TIMESTAMP) { 886 switch (*opt) { 887 case TCPOPT_EOL: /* FALLTHROUGH */ 888 case TCPOPT_NOP: 889 opt++; 890 hlen--; 891 break; 892 case TCPOPT_TIMESTAMP: 893 if (opt[1] >= TCPOLEN_TIMESTAMP) { 894 src->scrub->pfss_flags |= 895 PFSS_TIMESTAMP; 896 src->scrub->pfss_ts_mod = 897 htonl(arc4random()); 898 899 /* note PFSS_PAWS not set yet */ 900 memcpy(&tsval, &opt[2], 901 sizeof(u_int32_t)); 902 memcpy(&tsecr, &opt[6], 903 sizeof(u_int32_t)); 904 src->scrub->pfss_tsval0 = ntohl(tsval); 905 src->scrub->pfss_tsval = ntohl(tsval); 906 src->scrub->pfss_tsecr = ntohl(tsecr); 907 getmicrouptime(&src->scrub->pfss_last); 908 } 909 /* FALLTHROUGH */ 910 default: 911 hlen -= MAX(opt[1], 2); 912 opt += MAX(opt[1], 2); 913 break; 914 } 915 } 916 } 917 918 return (0); 919 } 920 921 void 922 pf_normalize_tcp_cleanup(struct pf_state *state) 923 { 924 if (state->src.scrub) 925 pool_put(&pf_state_scrub_pl, state->src.scrub); 926 if (state->dst.scrub) 927 pool_put(&pf_state_scrub_pl, state->dst.scrub); 928 929 /* Someday... flush the TCP segment reassembly descriptors. */ 930 } 931 932 int 933 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, 934 u_short *reason, struct tcphdr *th, struct pf_state *state, 935 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 936 { 937 struct timeval uptime; 938 u_int32_t tsval, tsecr; 939 u_int tsval_from_last; 940 u_int8_t hdr[60]; 941 u_int8_t *opt; 942 int copyback = 0; 943 int got_ts = 0; 944 945 KASSERT(src->scrub || dst->scrub); 946 947 /* 948 * Enforce the minimum TTL seen for this connection. Negate a common 949 * technique to evade an intrusion detection system and confuse 950 * firewall state code. 951 */ 952 switch (pd->af) { 953 #ifdef INET 954 case AF_INET: { 955 if (src->scrub) { 956 struct ip *h = mtod(m, struct ip *); 957 if (h->ip_ttl > src->scrub->pfss_ttl) 958 src->scrub->pfss_ttl = h->ip_ttl; 959 h->ip_ttl = src->scrub->pfss_ttl; 960 } 961 break; 962 } 963 #endif /* INET */ 964 #ifdef INET6 965 case AF_INET6: { 966 if (src->scrub) { 967 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 968 if (h->ip6_hlim > src->scrub->pfss_ttl) 969 src->scrub->pfss_ttl = h->ip6_hlim; 970 h->ip6_hlim = src->scrub->pfss_ttl; 971 } 972 break; 973 } 974 #endif /* INET6 */ 975 } 976 977 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 978 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 979 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 980 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 981 /* Diddle with TCP options */ 982 int hlen; 983 opt = hdr + sizeof(struct tcphdr); 984 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 985 while (hlen >= TCPOLEN_TIMESTAMP) { 986 switch (*opt) { 987 case TCPOPT_EOL: /* FALLTHROUGH */ 988 case TCPOPT_NOP: 989 opt++; 990 hlen--; 991 break; 992 case TCPOPT_TIMESTAMP: 993 /* Modulate the timestamps. Can be used for 994 * NAT detection, OS uptime determination or 995 * reboot detection. 996 */ 997 998 if (got_ts) { 999 /* Huh? Multiple timestamps!? */ 1000 if (pf_status.debug >= PF_DEBUG_MISC) { 1001 DPFPRINTF(("multiple TS??")); 1002 pf_print_state(state); 1003 printf("\n"); 1004 } 1005 REASON_SET(reason, PFRES_TS); 1006 return (PF_DROP); 1007 } 1008 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1009 memcpy(&tsval, &opt[2], 1010 sizeof(u_int32_t)); 1011 if (tsval && src->scrub && 1012 (src->scrub->pfss_flags & 1013 PFSS_TIMESTAMP)) { 1014 tsval = ntohl(tsval); 1015 pf_change_a(&opt[2], 1016 &th->th_sum, 1017 htonl(tsval + 1018 src->scrub->pfss_ts_mod), 1019 0); 1020 copyback = 1; 1021 } 1022 1023 /* Modulate TS reply iff valid (!0) */ 1024 memcpy(&tsecr, &opt[6], 1025 sizeof(u_int32_t)); 1026 if (tsecr && dst->scrub && 1027 (dst->scrub->pfss_flags & 1028 PFSS_TIMESTAMP)) { 1029 tsecr = ntohl(tsecr) 1030 - dst->scrub->pfss_ts_mod; 1031 pf_change_a(&opt[6], 1032 &th->th_sum, htonl(tsecr), 1033 0); 1034 copyback = 1; 1035 } 1036 got_ts = 1; 1037 } 1038 /* FALLTHROUGH */ 1039 default: 1040 hlen -= MAX(opt[1], 2); 1041 opt += MAX(opt[1], 2); 1042 break; 1043 } 1044 } 1045 if (copyback) { 1046 /* Copyback the options, caller copys back header */ 1047 *writeback = 1; 1048 m_copyback(m, off + sizeof(struct tcphdr), 1049 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1050 sizeof(struct tcphdr)); 1051 } 1052 } 1053 1054 1055 /* 1056 * Must invalidate PAWS checks on connections idle for too long. 1057 * The fastest allowed timestamp clock is 1ms. That turns out to 1058 * be about 24 days before it wraps. XXX Right now our lowerbound 1059 * TS echo check only works for the first 12 days of a connection 1060 * when the TS has exhausted half its 32bit space 1061 */ 1062 #define TS_MAX_IDLE (24*24*60*60) 1063 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1064 1065 getmicrouptime(&uptime); 1066 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1067 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1068 time_second - state->creation > TS_MAX_CONN)) { 1069 if (pf_status.debug >= PF_DEBUG_MISC) { 1070 DPFPRINTF(("src idled out of PAWS\n")); 1071 pf_print_state(state); 1072 printf("\n"); 1073 } 1074 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1075 | PFSS_PAWS_IDLED; 1076 } 1077 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1078 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1079 if (pf_status.debug >= PF_DEBUG_MISC) { 1080 DPFPRINTF(("dst idled out of PAWS\n")); 1081 pf_print_state(state); 1082 printf("\n"); 1083 } 1084 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1085 | PFSS_PAWS_IDLED; 1086 } 1087 1088 if (got_ts && src->scrub && dst->scrub && 1089 (src->scrub->pfss_flags & PFSS_PAWS) && 1090 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1091 /* Validate that the timestamps are "in-window". 1092 * RFC1323 describes TCP Timestamp options that allow 1093 * measurement of RTT (round trip time) and PAWS 1094 * (protection against wrapped sequence numbers). PAWS 1095 * gives us a set of rules for rejecting packets on 1096 * long fat pipes (packets that were somehow delayed 1097 * in transit longer than the time it took to send the 1098 * full TCP sequence space of 4Gb). We can use these 1099 * rules and infer a few others that will let us treat 1100 * the 32bit timestamp and the 32bit echoed timestamp 1101 * as sequence numbers to prevent a blind attacker from 1102 * inserting packets into a connection. 1103 * 1104 * RFC1323 tells us: 1105 * - The timestamp on this packet must be greater than 1106 * or equal to the last value echoed by the other 1107 * endpoint. The RFC says those will be discarded 1108 * since it is a dup that has already been acked. 1109 * This gives us a lowerbound on the timestamp. 1110 * timestamp >= other last echoed timestamp 1111 * - The timestamp will be less than or equal to 1112 * the last timestamp plus the time between the 1113 * last packet and now. The RFC defines the max 1114 * clock rate as 1ms. We will allow clocks to be 1115 * up to 10% fast and will allow a total difference 1116 * or 30 seconds due to a route change. And this 1117 * gives us an upperbound on the timestamp. 1118 * timestamp <= last timestamp + max ticks 1119 * We have to be careful here. Windows will send an 1120 * initial timestamp of zero and then initialize it 1121 * to a random value after the 3whs; presumably to 1122 * avoid a DoS by having to call an expensive RNG 1123 * during a SYN flood. Proof MS has at least one 1124 * good security geek. 1125 * 1126 * - The TCP timestamp option must also echo the other 1127 * endpoints timestamp. The timestamp echoed is the 1128 * one carried on the earliest unacknowledged segment 1129 * on the left edge of the sequence window. The RFC 1130 * states that the host will reject any echoed 1131 * timestamps that were larger than any ever sent. 1132 * This gives us an upperbound on the TS echo. 1133 * tescr <= largest_tsval 1134 * - The lowerbound on the TS echo is a little more 1135 * tricky to determine. The other endpoint's echoed 1136 * values will not decrease. But there may be 1137 * network conditions that re-order packets and 1138 * cause our view of them to decrease. For now the 1139 * only lowerbound we can safely determine is that 1140 * the TS echo will never be less than the original 1141 * TS. XXX There is probably a better lowerbound. 1142 * Remove TS_MAX_CONN with better lowerbound check. 1143 * tescr >= other original TS 1144 * 1145 * It is also important to note that the fastest 1146 * timestamp clock of 1ms will wrap its 32bit space in 1147 * 24 days. So we just disable TS checking after 24 1148 * days of idle time. We actually must use a 12d 1149 * connection limit until we can come up with a better 1150 * lowerbound to the TS echo check. 1151 */ 1152 struct timeval delta_ts; 1153 int ts_fudge; 1154 1155 1156 /* 1157 * PFTM_TS_DIFF is how many seconds of leeway to allow 1158 * a host's timestamp. This can happen if the previous 1159 * packet got delayed in transit for much longer than 1160 * this packet. 1161 */ 1162 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1163 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1164 1165 1166 /* Calculate max ticks since the last timestamp */ 1167 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1168 #define TS_MICROSECS 1000000 /* microseconds per second */ 1169 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1170 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1171 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1172 1173 1174 if ((src->state >= TCPS_ESTABLISHED && 1175 dst->state >= TCPS_ESTABLISHED) && 1176 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1177 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1178 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1179 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1180 /* Bad RFC1323 implementation or an insertion attack. 1181 * 1182 * - Solaris 2.6 and 2.7 are known to send another ACK 1183 * after the FIN,FIN|ACK,ACK closing that carries 1184 * an old timestamp. 1185 */ 1186 1187 DPFPRINTF(("Timestamp failed %c%c%c%c\n", 1188 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1189 SEQ_GT(tsval, src->scrub->pfss_tsval + 1190 tsval_from_last) ? '1' : ' ', 1191 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1192 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); 1193 DPFPRINTF((" tsval: %lu tsecr: %lu +ticks: %lu " 1194 "idle: %lus %lums\n", 1195 tsval, tsecr, tsval_from_last, delta_ts.tv_sec, 1196 delta_ts.tv_usec / 1000)); 1197 DPFPRINTF((" src->tsval: %lu tsecr: %lu\n", 1198 src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); 1199 DPFPRINTF((" dst->tsval: %lu tsecr: %lu tsval0: %lu" 1200 "\n", dst->scrub->pfss_tsval, 1201 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); 1202 if (pf_status.debug >= PF_DEBUG_MISC) { 1203 pf_print_state(state); 1204 pf_print_flags(th->th_flags); 1205 printf("\n"); 1206 } 1207 REASON_SET(reason, PFRES_TS); 1208 return (PF_DROP); 1209 } 1210 1211 /* XXX I'd really like to require tsecr but it's optional */ 1212 1213 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1214 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1215 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1216 src->scrub && dst->scrub && 1217 (src->scrub->pfss_flags & PFSS_PAWS) && 1218 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1219 /* Didn't send a timestamp. Timestamps aren't really useful 1220 * when: 1221 * - connection opening or closing (often not even sent). 1222 * but we must not let an attacker to put a FIN on a 1223 * data packet to sneak it through our ESTABLISHED check. 1224 * - on a TCP reset. RFC suggests not even looking at TS. 1225 * - on an empty ACK. The TS will not be echoed so it will 1226 * probably not help keep the RTT calculation in sync and 1227 * there isn't as much danger when the sequence numbers 1228 * got wrapped. So some stacks don't include TS on empty 1229 * ACKs :-( 1230 * 1231 * To minimize the disruption to mostly RFC1323 conformant 1232 * stacks, we will only require timestamps on data packets. 1233 * 1234 * And what do ya know, we cannot require timestamps on data 1235 * packets. There appear to be devices that do legitimate 1236 * TCP connection hijacking. There are HTTP devices that allow 1237 * a 3whs (with timestamps) and then buffer the HTTP request. 1238 * If the intermediate device has the HTTP response cache, it 1239 * will spoof the response but not bother timestamping its 1240 * packets. So we can look for the presence of a timestamp in 1241 * the first data packet and if there, require it in all future 1242 * packets. 1243 */ 1244 1245 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1246 /* 1247 * Hey! Someone tried to sneak a packet in. Or the 1248 * stack changed its RFC1323 behavior?!?! 1249 */ 1250 if (pf_status.debug >= PF_DEBUG_MISC) { 1251 DPFPRINTF(("Did not receive expected RFC1323 " 1252 "timestamp\n")); 1253 pf_print_state(state); 1254 pf_print_flags(th->th_flags); 1255 printf("\n"); 1256 } 1257 REASON_SET(reason, PFRES_TS); 1258 return (PF_DROP); 1259 } 1260 } 1261 1262 1263 /* 1264 * We will note if a host sends his data packets with or without 1265 * timestamps. And require all data packets to contain a timestamp 1266 * if the first does. PAWS implicitly requires that all data packets be 1267 * timestamped. But I think there are middle-man devices that hijack 1268 * TCP streams immediately after the 3whs and don't timestamp their 1269 * packets (seen in a WWW accelerator or cache). 1270 */ 1271 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1272 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1273 if (got_ts) 1274 src->scrub->pfss_flags |= PFSS_DATA_TS; 1275 else { 1276 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1277 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && 1278 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1279 /* Don't warn if other host rejected RFC1323 */ 1280 DPFPRINTF(("Broken RFC1323 stack did not " 1281 "timestamp data packet. Disabled PAWS " 1282 "security.\n")); 1283 pf_print_state(state); 1284 pf_print_flags(th->th_flags); 1285 printf("\n"); 1286 } 1287 } 1288 } 1289 1290 1291 /* 1292 * Update PAWS values 1293 */ 1294 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1295 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1296 getmicrouptime(&src->scrub->pfss_last); 1297 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1298 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1299 src->scrub->pfss_tsval = tsval; 1300 1301 if (tsecr) { 1302 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1303 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1304 src->scrub->pfss_tsecr = tsecr; 1305 1306 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1307 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1308 src->scrub->pfss_tsval0 == 0)) { 1309 /* tsval0 MUST be the lowest timestamp */ 1310 src->scrub->pfss_tsval0 = tsval; 1311 } 1312 1313 /* Only fully initialized after a TS gets echoed */ 1314 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1315 src->scrub->pfss_flags |= PFSS_PAWS; 1316 } 1317 } 1318 1319 /* I have a dream.... TCP segment reassembly.... */ 1320 return (0); 1321 } 1322 1323 int 1324 pf_normalize_mss(struct mbuf *m, int off, struct pf_pdesc *pd, u_int16_t maxmss) 1325 { 1326 struct tcphdr *th = pd->hdr.tcp; 1327 u_int16_t *mss; 1328 int thoff; 1329 int opt, cnt, optlen = 0; 1330 u_char opts[MAX_TCPOPTLEN]; 1331 u_char *optp = opts; 1332 1333 thoff = th->th_off << 2; 1334 cnt = thoff - sizeof(struct tcphdr); 1335 1336 if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt, 1337 NULL, NULL, pd->af)) 1338 return (0); 1339 1340 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1341 opt = optp[0]; 1342 if (opt == TCPOPT_EOL) 1343 break; 1344 if (opt == TCPOPT_NOP) 1345 optlen = 1; 1346 else { 1347 if (cnt < 2) 1348 break; 1349 optlen = optp[1]; 1350 if (optlen < 2 || optlen > cnt) 1351 break; 1352 } 1353 switch (opt) { 1354 case TCPOPT_MAXSEG: 1355 mss = (u_int16_t *)(optp + 2); 1356 if ((ntohs(*mss)) > maxmss) { 1357 th->th_sum = pf_cksum_fixup(th->th_sum, 1358 *mss, htons(maxmss), 0); 1359 *mss = htons(maxmss); 1360 m_copyback(m, off + sizeof(*th), 1361 thoff - sizeof(*th), opts); 1362 m_copyback(m, off, sizeof(*th), th); 1363 } 1364 break; 1365 default: 1366 break; 1367 } 1368 } 1369 1370 1371 1372 return (0); 1373 } 1374 1375 void 1376 pf_scrub_ip(struct mbuf **m0, u_int8_t flags, u_int8_t min_ttl, u_int8_t tos) 1377 { 1378 struct mbuf *m = *m0; 1379 struct ip *h = mtod(m, struct ip *); 1380 1381 /* Clear IP_DF if no-df was requested */ 1382 if (flags & PFSTATE_NODF && h->ip_off & htons(IP_DF)) { 1383 u_int16_t ip_off = h->ip_off; 1384 1385 h->ip_off &= htons(~IP_DF); 1386 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 1387 } 1388 1389 /* Enforce a minimum ttl, may cause endless packet loops */ 1390 if (min_ttl && h->ip_ttl < min_ttl) { 1391 u_int16_t ip_ttl = h->ip_ttl; 1392 1393 h->ip_ttl = min_ttl; 1394 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 1395 } 1396 1397 /* Enforce tos */ 1398 if (flags & PFSTATE_SETTOS) { 1399 u_int16_t ov, nv; 1400 1401 ov = *(u_int16_t *)h; 1402 h->ip_tos = tos; 1403 nv = *(u_int16_t *)h; 1404 1405 h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); 1406 } 1407 1408 /* random-id, but not for fragments */ 1409 if (flags & PFSTATE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) { 1410 u_int16_t ip_id = h->ip_id; 1411 1412 h->ip_id = ip_randomid(); 1413 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); 1414 } 1415 } 1416 1417 #ifdef INET6 1418 void 1419 pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl) 1420 { 1421 struct mbuf *m = *m0; 1422 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1423 1424 /* Enforce a minimum ttl, may cause endless packet loops */ 1425 if (min_ttl && h->ip6_hlim < min_ttl) 1426 h->ip6_hlim = min_ttl; 1427 } 1428 #endif 1429