1 /* $NetBSD: pf_norm.c,v 1.21 2009/07/28 18:15:26 minskim Exp $ */ 2 /* $OpenBSD: pf_norm.c,v 1.109 2007/05/28 17:16:39 henning Exp $ */ 3 4 /* 5 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: pf_norm.c,v 1.21 2009/07/28 18:15:26 minskim Exp $"); 31 32 #ifdef _KERNEL_OPT 33 #include "opt_inet.h" 34 #endif 35 36 #include "pflog.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/mbuf.h> 41 #include <sys/filio.h> 42 #include <sys/fcntl.h> 43 #include <sys/socket.h> 44 #include <sys/kernel.h> 45 #include <sys/time.h> 46 #include <sys/pool.h> 47 48 #ifdef __NetBSD__ 49 #include <sys/rnd.h> 50 #else 51 #include <dev/rndvar.h> 52 #endif /* !__NetBSD__ */ 53 #include <net/if.h> 54 #include <net/if_types.h> 55 #include <net/bpf.h> 56 #include <net/route.h> 57 #include <net/if_pflog.h> 58 59 #include <netinet/in.h> 60 #include <netinet/in_var.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/ip.h> 63 #include <netinet/ip_var.h> 64 #include <netinet/tcp.h> 65 #include <netinet/tcp_seq.h> 66 #include <netinet/udp.h> 67 #include <netinet/ip_icmp.h> 68 69 #ifdef INET6 70 #include <netinet/ip6.h> 71 #endif /* INET6 */ 72 73 #include <net/pfvar.h> 74 75 struct pf_frent { 76 LIST_ENTRY(pf_frent) fr_next; 77 struct ip *fr_ip; 78 struct mbuf *fr_m; 79 }; 80 81 struct pf_frcache { 82 LIST_ENTRY(pf_frcache) fr_next; 83 uint16_t fr_off; 84 uint16_t fr_end; 85 }; 86 87 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ 88 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ 89 #define PFFRAG_DROP 0x0004 /* Drop all fragments */ 90 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) 91 92 struct pf_fragment { 93 RB_ENTRY(pf_fragment) fr_entry; 94 TAILQ_ENTRY(pf_fragment) frag_next; 95 struct in_addr fr_src; 96 struct in_addr fr_dst; 97 u_int8_t fr_p; /* protocol of this fragment */ 98 u_int8_t fr_flags; /* status flags */ 99 u_int16_t fr_id; /* fragment id for reassemble */ 100 u_int16_t fr_max; /* fragment data max */ 101 u_int32_t fr_timeout; 102 #define fr_queue fr_u.fru_queue 103 #define fr_cache fr_u.fru_cache 104 union { 105 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ 106 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ 107 } fr_u; 108 }; 109 110 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 111 TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; 112 113 static __inline int pf_frag_compare(struct pf_fragment *, 114 struct pf_fragment *); 115 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 116 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 117 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 118 119 /* Private prototypes */ 120 void pf_ip2key(struct pf_fragment *, struct ip *); 121 void pf_remove_fragment(struct pf_fragment *); 122 void pf_flush_fragments(void); 123 void pf_free_fragment(struct pf_fragment *); 124 struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); 125 struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, 126 struct pf_frent *, int); 127 struct mbuf *pf_fragcache(struct mbuf **, struct ip*, 128 struct pf_fragment **, int, int, int *); 129 int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, 130 struct tcphdr *, int); 131 132 #define DPFPRINTF(x) do { \ 133 if (pf_status.debug >= PF_DEBUG_MISC) { \ 134 printf("%s: ", __func__); \ 135 printf x ; \ 136 } \ 137 } while(0) 138 139 /* Globals */ 140 struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; 141 struct pool pf_state_scrub_pl; 142 int pf_nfrents, pf_ncache; 143 144 void 145 pf_normalize_init(void) 146 { 147 #ifdef __NetBSD__ 148 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 149 NULL, IPL_SOFTNET); 150 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 151 NULL, IPL_SOFTNET); 152 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, 153 "pffrcache", NULL, IPL_SOFTNET); 154 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", 155 NULL, IPL_SOFTNET); 156 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 157 "pfstscr", NULL, IPL_SOFTNET); 158 #else 159 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 160 NULL); 161 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 162 NULL); 163 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, 164 "pffrcache", NULL); 165 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", 166 NULL); 167 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 168 "pfstscr", NULL); 169 #endif /* !__NetBSD__ */ 170 171 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 172 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 173 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); 174 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); 175 176 TAILQ_INIT(&pf_fragqueue); 177 TAILQ_INIT(&pf_cachequeue); 178 } 179 180 static __inline int 181 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 182 { 183 int diff; 184 185 if ((diff = a->fr_id - b->fr_id)) 186 return (diff); 187 else if ((diff = a->fr_p - b->fr_p)) 188 return (diff); 189 else if (a->fr_src.s_addr < b->fr_src.s_addr) 190 return (-1); 191 else if (a->fr_src.s_addr > b->fr_src.s_addr) 192 return (1); 193 else if (a->fr_dst.s_addr < b->fr_dst.s_addr) 194 return (-1); 195 else if (a->fr_dst.s_addr > b->fr_dst.s_addr) 196 return (1); 197 return (0); 198 } 199 200 void 201 pf_purge_expired_fragments(void) 202 { 203 struct pf_fragment *frag; 204 u_int32_t expire = time_second - 205 pf_default_rule.timeout[PFTM_FRAG]; 206 207 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 208 KASSERT(BUFFER_FRAGMENTS(frag)); 209 if (frag->fr_timeout > expire) 210 break; 211 212 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 213 pf_free_fragment(frag); 214 } 215 216 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { 217 KASSERT(!BUFFER_FRAGMENTS(frag)); 218 if (frag->fr_timeout > expire) 219 break; 220 221 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 222 pf_free_fragment(frag); 223 KASSERT(TAILQ_EMPTY(&pf_cachequeue) || 224 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); 225 } 226 } 227 228 /* 229 * Try to flush old fragments to make space for new ones 230 */ 231 232 void 233 pf_flush_fragments(void) 234 { 235 struct pf_fragment *frag; 236 int goal; 237 238 goal = pf_nfrents * 9 / 10; 239 DPFPRINTF(("trying to free > %d frents\n", 240 pf_nfrents - goal)); 241 while (goal < pf_nfrents) { 242 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 243 if (frag == NULL) 244 break; 245 pf_free_fragment(frag); 246 } 247 248 249 goal = pf_ncache * 9 / 10; 250 DPFPRINTF(("trying to free > %d cache entries\n", 251 pf_ncache - goal)); 252 while (goal < pf_ncache) { 253 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); 254 if (frag == NULL) 255 break; 256 pf_free_fragment(frag); 257 } 258 } 259 260 /* Frees the fragments and all associated entries */ 261 262 void 263 pf_free_fragment(struct pf_fragment *frag) 264 { 265 struct pf_frent *frent; 266 struct pf_frcache *frcache; 267 268 /* Free all fragments */ 269 if (BUFFER_FRAGMENTS(frag)) { 270 for (frent = LIST_FIRST(&frag->fr_queue); frent; 271 frent = LIST_FIRST(&frag->fr_queue)) { 272 LIST_REMOVE(frent, fr_next); 273 274 m_freem(frent->fr_m); 275 pool_put(&pf_frent_pl, frent); 276 pf_nfrents--; 277 } 278 } else { 279 for (frcache = LIST_FIRST(&frag->fr_cache); frcache; 280 frcache = LIST_FIRST(&frag->fr_cache)) { 281 LIST_REMOVE(frcache, fr_next); 282 283 KASSERT(LIST_EMPTY(&frag->fr_cache) || 284 LIST_FIRST(&frag->fr_cache)->fr_off > 285 frcache->fr_end); 286 287 pool_put(&pf_cent_pl, frcache); 288 pf_ncache--; 289 } 290 } 291 292 pf_remove_fragment(frag); 293 } 294 295 void 296 pf_ip2key(struct pf_fragment *key, struct ip *ip) 297 { 298 key->fr_p = ip->ip_p; 299 key->fr_id = ip->ip_id; 300 key->fr_src.s_addr = ip->ip_src.s_addr; 301 key->fr_dst.s_addr = ip->ip_dst.s_addr; 302 } 303 304 struct pf_fragment * 305 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) 306 { 307 struct pf_fragment key; 308 struct pf_fragment *frag; 309 310 pf_ip2key(&key, ip); 311 312 frag = RB_FIND(pf_frag_tree, tree, &key); 313 if (frag != NULL) { 314 /* XXX Are we sure we want to update the timeout? */ 315 frag->fr_timeout = time_second; 316 if (BUFFER_FRAGMENTS(frag)) { 317 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 318 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 319 } else { 320 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 321 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); 322 } 323 } 324 325 return (frag); 326 } 327 328 /* Removes a fragment from the fragment queue and frees the fragment */ 329 330 void 331 pf_remove_fragment(struct pf_fragment *frag) 332 { 333 if (BUFFER_FRAGMENTS(frag)) { 334 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 335 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 336 pool_put(&pf_frag_pl, frag); 337 } else { 338 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); 339 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 340 pool_put(&pf_cache_pl, frag); 341 } 342 } 343 344 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) 345 struct mbuf * 346 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, 347 struct pf_frent *frent, int mff) 348 { 349 struct mbuf *m = *m0, *m2; 350 struct pf_frent *frea, *next; 351 struct pf_frent *frep = NULL; 352 struct ip *ip = frent->fr_ip; 353 int hlen = ip->ip_hl << 2; 354 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 355 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; 356 u_int16_t max = ip_len + off; 357 358 KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag)); 359 360 /* Strip off ip header */ 361 m->m_data += hlen; 362 m->m_len -= hlen; 363 364 /* Create a new reassembly queue for this packet */ 365 if (*frag == NULL) { 366 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 367 if (*frag == NULL) { 368 pf_flush_fragments(); 369 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 370 if (*frag == NULL) 371 goto drop_fragment; 372 } 373 374 (*frag)->fr_flags = 0; 375 (*frag)->fr_max = 0; 376 (*frag)->fr_src = frent->fr_ip->ip_src; 377 (*frag)->fr_dst = frent->fr_ip->ip_dst; 378 (*frag)->fr_p = frent->fr_ip->ip_p; 379 (*frag)->fr_id = frent->fr_ip->ip_id; 380 (*frag)->fr_timeout = time_second; 381 LIST_INIT(&(*frag)->fr_queue); 382 383 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); 384 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); 385 386 /* We do not have a previous fragment */ 387 frep = NULL; 388 goto insert; 389 } 390 391 /* 392 * Find a fragment after the current one: 393 * - off contains the real shifted offset. 394 */ 395 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 396 if (FR_IP_OFF(frea) > off) 397 break; 398 frep = frea; 399 } 400 401 KASSERT(frep != NULL || frea != NULL); 402 403 if (frep != NULL && 404 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 405 4 > off) 406 { 407 u_int16_t precut; 408 409 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - 410 frep->fr_ip->ip_hl * 4 - off; 411 if (precut >= ip_len) 412 goto drop_fragment; 413 m_adj(frent->fr_m, precut); 414 DPFPRINTF(("overlap -%d\n", precut)); 415 /* Enforce 8 byte boundaries */ 416 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); 417 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 418 ip_len -= precut; 419 ip->ip_len = htons(ip_len); 420 } 421 422 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); 423 frea = next) 424 { 425 u_int16_t aftercut; 426 427 aftercut = ip_len + off - FR_IP_OFF(frea); 428 DPFPRINTF(("adjust overlap %d\n", aftercut)); 429 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl 430 * 4) 431 { 432 frea->fr_ip->ip_len = 433 htons(ntohs(frea->fr_ip->ip_len) - aftercut); 434 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + 435 (aftercut >> 3)); 436 m_adj(frea->fr_m, aftercut); 437 break; 438 } 439 440 /* This fragment is completely overlapped, lose it */ 441 next = LIST_NEXT(frea, fr_next); 442 m_freem(frea->fr_m); 443 LIST_REMOVE(frea, fr_next); 444 pool_put(&pf_frent_pl, frea); 445 pf_nfrents--; 446 } 447 448 insert: 449 /* Update maximum data size */ 450 if ((*frag)->fr_max < max) 451 (*frag)->fr_max = max; 452 /* This is the last segment */ 453 if (!mff) 454 (*frag)->fr_flags |= PFFRAG_SEENLAST; 455 456 if (frep == NULL) 457 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 458 else 459 LIST_INSERT_AFTER(frep, frent, fr_next); 460 461 /* Check if we are completely reassembled */ 462 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 463 return (NULL); 464 465 /* Check if we have all the data */ 466 off = 0; 467 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 468 next = LIST_NEXT(frep, fr_next); 469 470 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; 471 if (off < (*frag)->fr_max && 472 (next == NULL || FR_IP_OFF(next) != off)) 473 { 474 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 475 off, next == NULL ? -1 : FR_IP_OFF(next), 476 (*frag)->fr_max)); 477 return (NULL); 478 } 479 } 480 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 481 if (off < (*frag)->fr_max) 482 return (NULL); 483 484 /* We have all the data */ 485 frent = LIST_FIRST(&(*frag)->fr_queue); 486 KASSERT(frent != NULL); 487 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { 488 DPFPRINTF(("drop: too big: %d\n", off)); 489 pf_free_fragment(*frag); 490 *frag = NULL; 491 return (NULL); 492 } 493 next = LIST_NEXT(frent, fr_next); 494 495 /* Magic from ip_input */ 496 ip = frent->fr_ip; 497 m = frent->fr_m; 498 m2 = m->m_next; 499 m->m_next = NULL; 500 m_cat(m, m2); 501 pool_put(&pf_frent_pl, frent); 502 pf_nfrents--; 503 for (frent = next; frent != NULL; frent = next) { 504 next = LIST_NEXT(frent, fr_next); 505 506 m2 = frent->fr_m; 507 pool_put(&pf_frent_pl, frent); 508 pf_nfrents--; 509 m_cat(m, m2); 510 } 511 512 ip->ip_src = (*frag)->fr_src; 513 ip->ip_dst = (*frag)->fr_dst; 514 515 /* Remove from fragment queue */ 516 pf_remove_fragment(*frag); 517 *frag = NULL; 518 519 hlen = ip->ip_hl << 2; 520 ip->ip_len = htons(off + hlen); 521 m->m_len += hlen; 522 m->m_data -= hlen; 523 524 /* some debugging cruft by sklower, below, will go away soon */ 525 /* XXX this should be done elsewhere */ 526 if (m->m_flags & M_PKTHDR) { 527 int plen = 0; 528 for (m2 = m; m2; m2 = m2->m_next) 529 plen += m2->m_len; 530 m->m_pkthdr.len = plen; 531 #ifdef __NetBSD__ 532 m->m_pkthdr.csum_flags = 0; 533 #endif /* __NetBSD__ */ 534 } 535 536 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); 537 return (m); 538 539 drop_fragment: 540 /* Oops - fail safe - drop packet */ 541 pool_put(&pf_frent_pl, frent); 542 pf_nfrents--; 543 m_freem(m); 544 return (NULL); 545 } 546 547 struct mbuf * 548 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, 549 int drop, int *nomem) 550 { 551 struct mbuf *m = *m0; 552 struct pf_frcache *frp, *fra, *cur = NULL; 553 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); 554 u_int16_t off = ntohs(h->ip_off) << 3; 555 u_int16_t max = ip_len + off; 556 int hosed = 0; 557 558 KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); 559 560 /* Create a new range queue for this packet */ 561 if (*frag == NULL) { 562 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 563 if (*frag == NULL) { 564 pf_flush_fragments(); 565 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 566 if (*frag == NULL) 567 goto no_mem; 568 } 569 570 /* Get an entry for the queue */ 571 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 572 if (cur == NULL) { 573 pool_put(&pf_cache_pl, *frag); 574 *frag = NULL; 575 goto no_mem; 576 } 577 pf_ncache++; 578 579 (*frag)->fr_flags = PFFRAG_NOBUFFER; 580 (*frag)->fr_max = 0; 581 (*frag)->fr_src = h->ip_src; 582 (*frag)->fr_dst = h->ip_dst; 583 (*frag)->fr_p = h->ip_p; 584 (*frag)->fr_id = h->ip_id; 585 (*frag)->fr_timeout = time_second; 586 587 cur->fr_off = off; 588 cur->fr_end = max; 589 LIST_INIT(&(*frag)->fr_cache); 590 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); 591 592 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); 593 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); 594 595 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); 596 597 goto pass; 598 } 599 600 /* 601 * Find a fragment after the current one: 602 * - off contains the real shifted offset. 603 */ 604 frp = NULL; 605 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { 606 if (fra->fr_off > off) 607 break; 608 frp = fra; 609 } 610 611 KASSERT(frp != NULL || fra != NULL); 612 613 if (frp != NULL) { 614 int precut; 615 616 precut = frp->fr_end - off; 617 if (precut >= ip_len) { 618 /* Fragment is entirely a duplicate */ 619 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", 620 h->ip_id, frp->fr_off, frp->fr_end, off, max)); 621 goto drop_fragment; 622 } 623 if (precut == 0) { 624 /* They are adjacent. Fixup cache entry */ 625 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", 626 h->ip_id, frp->fr_off, frp->fr_end, off, max)); 627 frp->fr_end = max; 628 } else if (precut > 0) { 629 /* The first part of this payload overlaps with a 630 * fragment that has already been passed. 631 * Need to trim off the first part of the payload. 632 * But to do so easily, we need to create another 633 * mbuf to throw the original header into. 634 */ 635 636 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", 637 h->ip_id, precut, frp->fr_off, frp->fr_end, off, 638 max)); 639 640 off += precut; 641 max -= precut; 642 /* Update the previous frag to encompass this one */ 643 frp->fr_end = max; 644 645 if (!drop) { 646 /* XXX Optimization opportunity 647 * This is a very heavy way to trim the payload. 648 * we could do it much faster by diddling mbuf 649 * internals but that would be even less legible 650 * than this mbuf magic. For my next trick, 651 * I'll pull a rabbit out of my laptop. 652 */ 653 *m0 = m_dup(m, 0, h->ip_hl << 2, M_NOWAIT); 654 if (*m0 == NULL) 655 goto no_mem; 656 KASSERT((*m0)->m_next == NULL); 657 m_adj(m, precut + (h->ip_hl << 2)); 658 m_cat(*m0, m); 659 m = *m0; 660 if (m->m_flags & M_PKTHDR) { 661 int plen = 0; 662 struct mbuf *t; 663 for (t = m; t; t = t->m_next) 664 plen += t->m_len; 665 m->m_pkthdr.len = plen; 666 } 667 668 669 h = mtod(m, struct ip *); 670 671 672 KASSERT((int)m->m_len == 673 ntohs(h->ip_len) - precut); 674 h->ip_off = htons(ntohs(h->ip_off) + 675 (precut >> 3)); 676 h->ip_len = htons(ntohs(h->ip_len) - precut); 677 } else { 678 hosed++; 679 } 680 } else { 681 /* There is a gap between fragments */ 682 683 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", 684 h->ip_id, -precut, frp->fr_off, frp->fr_end, off, 685 max)); 686 687 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 688 if (cur == NULL) 689 goto no_mem; 690 pf_ncache++; 691 692 cur->fr_off = off; 693 cur->fr_end = max; 694 LIST_INSERT_AFTER(frp, cur, fr_next); 695 } 696 } 697 698 if (fra != NULL) { 699 int aftercut; 700 int merge = 0; 701 702 aftercut = max - fra->fr_off; 703 if (aftercut == 0) { 704 /* Adjacent fragments */ 705 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", 706 h->ip_id, off, max, fra->fr_off, fra->fr_end)); 707 fra->fr_off = off; 708 merge = 1; 709 } else if (aftercut > 0) { 710 /* Need to chop off the tail of this fragment */ 711 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", 712 h->ip_id, aftercut, off, max, fra->fr_off, 713 fra->fr_end)); 714 fra->fr_off = off; 715 max -= aftercut; 716 717 merge = 1; 718 719 if (!drop) { 720 m_adj(m, -aftercut); 721 if (m->m_flags & M_PKTHDR) { 722 int plen = 0; 723 struct mbuf *t; 724 for (t = m; t; t = t->m_next) 725 plen += t->m_len; 726 m->m_pkthdr.len = plen; 727 } 728 h = mtod(m, struct ip *); 729 KASSERT((int)m->m_len == 730 ntohs(h->ip_len) - aftercut); 731 h->ip_len = htons(ntohs(h->ip_len) - aftercut); 732 } else { 733 hosed++; 734 } 735 } else if (frp == NULL) { 736 /* There is a gap between fragments */ 737 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", 738 h->ip_id, -aftercut, off, max, fra->fr_off, 739 fra->fr_end)); 740 741 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 742 if (cur == NULL) 743 goto no_mem; 744 pf_ncache++; 745 746 cur->fr_off = off; 747 cur->fr_end = max; 748 LIST_INSERT_BEFORE(fra, cur, fr_next); 749 } 750 751 752 /* Need to glue together two separate fragment descriptors */ 753 if (merge) { 754 if (cur && fra->fr_off <= cur->fr_end) { 755 /* Need to merge in a previous 'cur' */ 756 DPFPRINTF(("fragcache[%d]: adjacent(merge " 757 "%d-%d) %d-%d (%d-%d)\n", 758 h->ip_id, cur->fr_off, cur->fr_end, off, 759 max, fra->fr_off, fra->fr_end)); 760 fra->fr_off = cur->fr_off; 761 LIST_REMOVE(cur, fr_next); 762 pool_put(&pf_cent_pl, cur); 763 pf_ncache--; 764 cur = NULL; 765 766 } else if (frp && fra->fr_off <= frp->fr_end) { 767 /* Need to merge in a modified 'frp' */ 768 KASSERT(cur == NULL); 769 DPFPRINTF(("fragcache[%d]: adjacent(merge " 770 "%d-%d) %d-%d (%d-%d)\n", 771 h->ip_id, frp->fr_off, frp->fr_end, off, 772 max, fra->fr_off, fra->fr_end)); 773 fra->fr_off = frp->fr_off; 774 LIST_REMOVE(frp, fr_next); 775 pool_put(&pf_cent_pl, frp); 776 pf_ncache--; 777 frp = NULL; 778 779 } 780 } 781 } 782 783 if (hosed) { 784 /* 785 * We must keep tracking the overall fragment even when 786 * we're going to drop it anyway so that we know when to 787 * free the overall descriptor. Thus we drop the frag late. 788 */ 789 goto drop_fragment; 790 } 791 792 793 pass: 794 /* Update maximum data size */ 795 if ((*frag)->fr_max < max) 796 (*frag)->fr_max = max; 797 798 /* This is the last segment */ 799 if (!mff) 800 (*frag)->fr_flags |= PFFRAG_SEENLAST; 801 802 /* Check if we are completely reassembled */ 803 if (((*frag)->fr_flags & PFFRAG_SEENLAST) && 804 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && 805 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { 806 /* Remove from fragment queue */ 807 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, 808 (*frag)->fr_max)); 809 pf_free_fragment(*frag); 810 *frag = NULL; 811 } 812 813 return (m); 814 815 no_mem: 816 *nomem = 1; 817 818 /* Still need to pay attention to !IP_MF */ 819 if (!mff && *frag != NULL) 820 (*frag)->fr_flags |= PFFRAG_SEENLAST; 821 822 m_freem(m); 823 return (NULL); 824 825 drop_fragment: 826 827 /* Still need to pay attention to !IP_MF */ 828 if (!mff && *frag != NULL) 829 (*frag)->fr_flags |= PFFRAG_SEENLAST; 830 831 if (drop) { 832 /* This fragment has been deemed bad. Don't reass */ 833 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) 834 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", 835 h->ip_id)); 836 (*frag)->fr_flags |= PFFRAG_DROP; 837 } 838 839 m_freem(m); 840 return (NULL); 841 } 842 843 int 844 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, 845 struct pf_pdesc *pd) 846 { 847 struct mbuf *m = *m0; 848 struct pf_rule *r; 849 struct pf_frent *frent; 850 struct pf_fragment *frag = NULL; 851 struct ip *h = mtod(m, struct ip *); 852 int mff = (ntohs(h->ip_off) & IP_MF); 853 int hlen = h->ip_hl << 2; 854 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 855 u_int16_t max; 856 int ip_len; 857 int ip_off; 858 859 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 860 while (r != NULL) { 861 r->evaluations++; 862 if (pfi_kif_match(r->kif, kif) == r->ifnot) 863 r = r->skip[PF_SKIP_IFP].ptr; 864 else if (r->direction && r->direction != dir) 865 r = r->skip[PF_SKIP_DIR].ptr; 866 else if (r->af && r->af != AF_INET) 867 r = r->skip[PF_SKIP_AF].ptr; 868 else if (r->proto && r->proto != h->ip_p) 869 r = r->skip[PF_SKIP_PROTO].ptr; 870 else if (PF_MISMATCHAW(&r->src.addr, 871 (struct pf_addr *)&h->ip_src.s_addr, AF_INET, 872 r->src.neg, kif)) 873 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 874 else if (PF_MISMATCHAW(&r->dst.addr, 875 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, 876 r->dst.neg, NULL)) 877 r = r->skip[PF_SKIP_DST_ADDR].ptr; 878 else 879 break; 880 } 881 882 if (r == NULL || r->action == PF_NOSCRUB) 883 return (PF_PASS); 884 else { 885 r->packets[dir == PF_OUT]++; 886 r->bytes[dir == PF_OUT] += pd->tot_len; 887 } 888 889 /* Check for illegal packets */ 890 if (hlen < (int)sizeof(struct ip)) 891 goto drop; 892 893 if (hlen > ntohs(h->ip_len)) 894 goto drop; 895 896 /* Clear IP_DF if the rule uses the no-df option */ 897 if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { 898 u_int16_t ip_off = h->ip_off; 899 900 h->ip_off &= htons(~IP_DF); 901 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 902 } 903 904 /* We will need other tests here */ 905 if (!fragoff && !mff) 906 goto no_fragment; 907 908 /* We're dealing with a fragment now. Don't allow fragments 909 * with IP_DF to enter the cache. If the flag was cleared by 910 * no-df above, fine. Otherwise drop it. 911 */ 912 if (h->ip_off & htons(IP_DF)) { 913 DPFPRINTF(("IP_DF\n")); 914 goto bad; 915 } 916 917 ip_len = ntohs(h->ip_len) - hlen; 918 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 919 920 /* All fragments are 8 byte aligned */ 921 if (mff && (ip_len & 0x7)) { 922 DPFPRINTF(("mff and %d\n", ip_len)); 923 goto bad; 924 } 925 926 /* Respect maximum length */ 927 if (fragoff + ip_len > IP_MAXPACKET) { 928 DPFPRINTF(("max packet %d\n", fragoff + ip_len)); 929 goto bad; 930 } 931 max = fragoff + ip_len; 932 933 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { 934 /* Fully buffer all of the fragments */ 935 936 frag = pf_find_fragment(h, &pf_frag_tree); 937 938 /* Check if we saw the last fragment already */ 939 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 940 max > frag->fr_max) 941 goto bad; 942 943 /* Get an entry for the fragment queue */ 944 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 945 if (frent == NULL) { 946 REASON_SET(reason, PFRES_MEMORY); 947 return (PF_DROP); 948 } 949 pf_nfrents++; 950 frent->fr_ip = h; 951 frent->fr_m = m; 952 953 /* Might return a completely reassembled mbuf, or NULL */ 954 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); 955 *m0 = m = pf_reassemble(m0, &frag, frent, mff); 956 957 if (m == NULL) 958 return (PF_DROP); 959 960 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 961 goto drop; 962 963 h = mtod(m, struct ip *); 964 } else { 965 /* non-buffering fragment cache (drops or masks overlaps) */ 966 int nomem = 0; 967 968 #ifdef __NetBSD__ 969 struct pf_mtag *pf_mtag = pf_find_mtag(m); 970 KASSERT(pf_mtag != NULL); 971 972 if (dir == PF_OUT && pf_mtag->flags & PF_TAG_FRAGCACHE) { 973 #else 974 if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) { 975 #endif /* !__NetBSD__ */ 976 /* 977 * Already passed the fragment cache in the 978 * input direction. If we continued, it would 979 * appear to be a dup and would be dropped. 980 */ 981 goto fragment_pass; 982 } 983 984 frag = pf_find_fragment(h, &pf_cache_tree); 985 986 /* Check if we saw the last fragment already */ 987 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 988 max > frag->fr_max) { 989 if (r->rule_flag & PFRULE_FRAGDROP) 990 frag->fr_flags |= PFFRAG_DROP; 991 goto bad; 992 } 993 994 *m0 = m = pf_fragcache(m0, h, &frag, mff, 995 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); 996 if (m == NULL) { 997 if (nomem) 998 goto no_mem; 999 goto drop; 1000 } 1001 1002 if (dir == PF_IN) 1003 #ifdef __NetBSD__ 1004 pf_mtag = pf_find_mtag(m); 1005 KASSERT(pf_mtag != NULL); 1006 1007 pf_mtag->flags |= PF_TAG_FRAGCACHE; 1008 #else 1009 m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE; 1010 #endif /* !__NetBSD__ */ 1011 1012 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 1013 goto drop; 1014 goto fragment_pass; 1015 } 1016 1017 no_fragment: 1018 /* At this point, only IP_DF is allowed in ip_off */ 1019 if (h->ip_off & ~htons(IP_DF)) { 1020 u_int16_t ip_off = h->ip_off; 1021 1022 h->ip_off &= htons(IP_DF); 1023 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 1024 } 1025 1026 /* Enforce a minimum ttl, may cause endless packet loops */ 1027 if (r->min_ttl && h->ip_ttl < r->min_ttl) { 1028 u_int16_t ip_ttl = h->ip_ttl; 1029 1030 h->ip_ttl = r->min_ttl; 1031 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 1032 } 1033 1034 if (r->rule_flag & PFRULE_RANDOMID) { 1035 u_int16_t ip_id = h->ip_id; 1036 1037 h->ip_id = ip_randomid(0); 1038 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); 1039 } 1040 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1041 pd->flags |= PFDESC_IP_REAS; 1042 1043 return (PF_PASS); 1044 1045 fragment_pass: 1046 /* Enforce a minimum ttl, may cause endless packet loops */ 1047 if (r->min_ttl && h->ip_ttl < r->min_ttl) { 1048 u_int16_t ip_ttl = h->ip_ttl; 1049 1050 h->ip_ttl = r->min_ttl; 1051 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 1052 } 1053 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1054 pd->flags |= PFDESC_IP_REAS; 1055 return (PF_PASS); 1056 1057 no_mem: 1058 REASON_SET(reason, PFRES_MEMORY); 1059 if (r != NULL && r->log) 1060 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); 1061 return (PF_DROP); 1062 1063 drop: 1064 REASON_SET(reason, PFRES_NORM); 1065 if (r != NULL && r->log) 1066 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); 1067 return (PF_DROP); 1068 1069 bad: 1070 DPFPRINTF(("dropping bad fragment\n")); 1071 1072 /* Free associated fragments */ 1073 if (frag != NULL) 1074 pf_free_fragment(frag); 1075 1076 REASON_SET(reason, PFRES_FRAG); 1077 if (r != NULL && r->log) 1078 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); 1079 1080 return (PF_DROP); 1081 } 1082 1083 #ifdef INET6 1084 int 1085 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, 1086 u_short *reason, struct pf_pdesc *pd) 1087 { 1088 struct mbuf *m = *m0; 1089 struct pf_rule *r; 1090 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1091 int off; 1092 struct ip6_ext ext; 1093 struct ip6_opt opt; 1094 struct ip6_opt_jumbo jumbo; 1095 struct ip6_frag frag; 1096 u_int32_t jumbolen = 0, plen; 1097 u_int16_t fragoff = 0; 1098 int optend; 1099 int ooff; 1100 u_int8_t proto; 1101 int terminal; 1102 1103 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1104 while (r != NULL) { 1105 r->evaluations++; 1106 if (pfi_kif_match(r->kif, kif) == r->ifnot) 1107 r = r->skip[PF_SKIP_IFP].ptr; 1108 else if (r->direction && r->direction != dir) 1109 r = r->skip[PF_SKIP_DIR].ptr; 1110 else if (r->af && r->af != AF_INET6) 1111 r = r->skip[PF_SKIP_AF].ptr; 1112 #if 0 /* header chain! */ 1113 else if (r->proto && r->proto != h->ip6_nxt) 1114 r = r->skip[PF_SKIP_PROTO].ptr; 1115 #endif 1116 else if (PF_MISMATCHAW(&r->src.addr, 1117 (struct pf_addr *)&h->ip6_src, AF_INET6, 1118 r->src.neg, kif)) 1119 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1120 else if (PF_MISMATCHAW(&r->dst.addr, 1121 (struct pf_addr *)&h->ip6_dst, AF_INET6, 1122 r->dst.neg, NULL)) 1123 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1124 else 1125 break; 1126 } 1127 1128 if (r == NULL || r->action == PF_NOSCRUB) 1129 return (PF_PASS); 1130 else { 1131 r->packets[dir == PF_OUT]++; 1132 r->bytes[dir == PF_OUT] += pd->tot_len; 1133 } 1134 1135 /* Check for illegal packets */ 1136 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) 1137 goto drop; 1138 1139 off = sizeof(struct ip6_hdr); 1140 proto = h->ip6_nxt; 1141 terminal = 0; 1142 do { 1143 switch (proto) { 1144 case IPPROTO_FRAGMENT: 1145 goto fragment; 1146 break; 1147 case IPPROTO_AH: 1148 case IPPROTO_ROUTING: 1149 case IPPROTO_DSTOPTS: 1150 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 1151 NULL, AF_INET6)) 1152 goto shortpkt; 1153 if (proto == IPPROTO_AH) 1154 off += (ext.ip6e_len + 2) * 4; 1155 else 1156 off += (ext.ip6e_len + 1) * 8; 1157 proto = ext.ip6e_nxt; 1158 break; 1159 case IPPROTO_HOPOPTS: 1160 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 1161 NULL, AF_INET6)) 1162 goto shortpkt; 1163 optend = off + (ext.ip6e_len + 1) * 8; 1164 ooff = off + sizeof(ext); 1165 do { 1166 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, 1167 sizeof(opt.ip6o_type), NULL, NULL, 1168 AF_INET6)) 1169 goto shortpkt; 1170 if (opt.ip6o_type == IP6OPT_PAD1) { 1171 ooff++; 1172 continue; 1173 } 1174 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), 1175 NULL, NULL, AF_INET6)) 1176 goto shortpkt; 1177 if (ooff + sizeof(opt) + opt.ip6o_len > optend) 1178 goto drop; 1179 switch (opt.ip6o_type) { 1180 case IP6OPT_JUMBO: 1181 if (h->ip6_plen != 0) 1182 goto drop; 1183 if (!pf_pull_hdr(m, ooff, &jumbo, 1184 sizeof(jumbo), NULL, NULL, 1185 AF_INET6)) 1186 goto shortpkt; 1187 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, 1188 sizeof(jumbolen)); 1189 jumbolen = ntohl(jumbolen); 1190 if (jumbolen <= IPV6_MAXPACKET) 1191 goto drop; 1192 if (sizeof(struct ip6_hdr) + jumbolen != 1193 m->m_pkthdr.len) 1194 goto drop; 1195 break; 1196 default: 1197 break; 1198 } 1199 ooff += sizeof(opt) + opt.ip6o_len; 1200 } while (ooff < optend); 1201 1202 off = optend; 1203 proto = ext.ip6e_nxt; 1204 break; 1205 default: 1206 terminal = 1; 1207 break; 1208 } 1209 } while (!terminal); 1210 1211 /* jumbo payload option must be present, or plen > 0 */ 1212 if (ntohs(h->ip6_plen) == 0) 1213 plen = jumbolen; 1214 else 1215 plen = ntohs(h->ip6_plen); 1216 if (plen == 0) 1217 goto drop; 1218 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) 1219 goto shortpkt; 1220 1221 /* Enforce a minimum ttl, may cause endless packet loops */ 1222 if (r->min_ttl && h->ip6_hlim < r->min_ttl) 1223 h->ip6_hlim = r->min_ttl; 1224 1225 return (PF_PASS); 1226 1227 fragment: 1228 if (ntohs(h->ip6_plen) == 0 || jumbolen) 1229 goto drop; 1230 plen = ntohs(h->ip6_plen); 1231 1232 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) 1233 goto shortpkt; 1234 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); 1235 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) 1236 goto badfrag; 1237 1238 /* do something about it */ 1239 /* remember to set pd->flags |= PFDESC_IP_REAS */ 1240 return (PF_PASS); 1241 1242 shortpkt: 1243 REASON_SET(reason, PFRES_SHORT); 1244 if (r != NULL && r->log) 1245 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); 1246 return (PF_DROP); 1247 1248 drop: 1249 REASON_SET(reason, PFRES_NORM); 1250 if (r != NULL && r->log) 1251 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); 1252 return (PF_DROP); 1253 1254 badfrag: 1255 REASON_SET(reason, PFRES_FRAG); 1256 if (r != NULL && r->log) 1257 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); 1258 return (PF_DROP); 1259 } 1260 #endif /* INET6 */ 1261 1262 int 1263 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, 1264 int ipoff, int off, void *h, struct pf_pdesc *pd) 1265 { 1266 struct pf_rule *r, *rm = NULL; 1267 struct tcphdr *th = pd->hdr.tcp; 1268 int rewrite = 0; 1269 u_short reason; 1270 u_int8_t flags; 1271 sa_family_t af = pd->af; 1272 1273 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1274 while (r != NULL) { 1275 r->evaluations++; 1276 if (pfi_kif_match(r->kif, kif) == r->ifnot) 1277 r = r->skip[PF_SKIP_IFP].ptr; 1278 else if (r->direction && r->direction != dir) 1279 r = r->skip[PF_SKIP_DIR].ptr; 1280 else if (r->af && r->af != af) 1281 r = r->skip[PF_SKIP_AF].ptr; 1282 else if (r->proto && r->proto != pd->proto) 1283 r = r->skip[PF_SKIP_PROTO].ptr; 1284 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 1285 r->src.neg, kif)) 1286 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1287 else if (r->src.port_op && !pf_match_port(r->src.port_op, 1288 r->src.port[0], r->src.port[1], th->th_sport)) 1289 r = r->skip[PF_SKIP_SRC_PORT].ptr; 1290 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 1291 r->dst.neg, NULL)) 1292 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1293 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 1294 r->dst.port[0], r->dst.port[1], th->th_dport)) 1295 r = r->skip[PF_SKIP_DST_PORT].ptr; 1296 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( 1297 pf_osfp_fingerprint(pd, m, off, th), 1298 r->os_fingerprint)) 1299 r = TAILQ_NEXT(r, entries); 1300 else { 1301 rm = r; 1302 break; 1303 } 1304 } 1305 1306 if (rm == NULL || rm->action == PF_NOSCRUB) 1307 return (PF_PASS); 1308 else { 1309 r->packets[dir == PF_OUT]++; 1310 r->bytes[dir == PF_OUT] += pd->tot_len; 1311 } 1312 1313 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) 1314 pd->flags |= PFDESC_TCP_NORM; 1315 1316 flags = th->th_flags; 1317 if (flags & TH_SYN) { 1318 /* Illegal packet */ 1319 if (flags & TH_RST) 1320 goto tcp_drop; 1321 1322 if (flags & TH_FIN) 1323 flags &= ~TH_FIN; 1324 } else { 1325 /* Illegal packet */ 1326 if (!(flags & (TH_ACK|TH_RST))) 1327 goto tcp_drop; 1328 } 1329 1330 if (!(flags & TH_ACK)) { 1331 /* These flags are only valid if ACK is set */ 1332 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 1333 goto tcp_drop; 1334 } 1335 1336 /* Check for illegal header length */ 1337 if (th->th_off < (sizeof(struct tcphdr) >> 2)) 1338 goto tcp_drop; 1339 1340 /* If flags changed, or reserved data set, then adjust */ 1341 if (flags != th->th_flags || th->th_x2 != 0) { 1342 u_int16_t ov, nv; 1343 1344 ov = *(u_int16_t *)(&th->th_ack + 1); 1345 th->th_flags = flags; 1346 th->th_x2 = 0; 1347 nv = *(u_int16_t *)(&th->th_ack + 1); 1348 1349 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 1350 rewrite = 1; 1351 } 1352 1353 /* Remove urgent pointer, if TH_URG is not set */ 1354 if (!(flags & TH_URG) && th->th_urp) { 1355 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 1356 th->th_urp = 0; 1357 rewrite = 1; 1358 } 1359 1360 /* Process options */ 1361 if (r->max_mss && pf_normalize_tcpopt(r, m, th, off)) 1362 rewrite = 1; 1363 1364 /* copy back packet headers if we sanitized */ 1365 if (rewrite) 1366 m_copyback(m, off, sizeof(*th), th); 1367 1368 return (PF_PASS); 1369 1370 tcp_drop: 1371 REASON_SET(&reason, PFRES_NORM); 1372 if (rm != NULL && r->log) 1373 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); 1374 return (PF_DROP); 1375 } 1376 1377 int 1378 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, 1379 struct tcphdr *th, struct pf_state_peer *src, 1380 struct pf_state_peer *dst) 1381 { 1382 u_int32_t tsval, tsecr; 1383 u_int8_t hdr[60]; 1384 u_int8_t *opt; 1385 1386 KASSERT(src->scrub == NULL); 1387 1388 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 1389 if (src->scrub == NULL) 1390 return (1); 1391 bzero(src->scrub, sizeof(*src->scrub)); 1392 1393 switch (pd->af) { 1394 #ifdef INET 1395 case AF_INET: { 1396 struct ip *h = mtod(m, struct ip *); 1397 src->scrub->pfss_ttl = h->ip_ttl; 1398 break; 1399 } 1400 #endif /* INET */ 1401 #ifdef INET6 1402 case AF_INET6: { 1403 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1404 src->scrub->pfss_ttl = h->ip6_hlim; 1405 break; 1406 } 1407 #endif /* INET6 */ 1408 } 1409 1410 1411 /* 1412 * All normalizations below are only begun if we see the start of 1413 * the connections. They must all set an enabled bit in pfss_flags 1414 */ 1415 if ((th->th_flags & TH_SYN) == 0) 1416 return (0); 1417 1418 1419 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 1420 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 1421 /* Diddle with TCP options */ 1422 int hlen; 1423 opt = hdr + sizeof(struct tcphdr); 1424 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1425 while (hlen >= TCPOLEN_TIMESTAMP) { 1426 switch (*opt) { 1427 case TCPOPT_EOL: /* FALLTHROUGH */ 1428 case TCPOPT_NOP: 1429 opt++; 1430 hlen--; 1431 break; 1432 case TCPOPT_TIMESTAMP: 1433 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1434 src->scrub->pfss_flags |= 1435 PFSS_TIMESTAMP; 1436 src->scrub->pfss_ts_mod = 1437 htonl(arc4random()); 1438 1439 /* note PFSS_PAWS not set yet */ 1440 memcpy(&tsval, &opt[2], 1441 sizeof(u_int32_t)); 1442 memcpy(&tsecr, &opt[6], 1443 sizeof(u_int32_t)); 1444 src->scrub->pfss_tsval0 = ntohl(tsval); 1445 src->scrub->pfss_tsval = ntohl(tsval); 1446 src->scrub->pfss_tsecr = ntohl(tsecr); 1447 getmicrouptime(&src->scrub->pfss_last); 1448 } 1449 /* FALLTHROUGH */ 1450 default: 1451 hlen -= MAX(opt[1], 2); 1452 opt += MAX(opt[1], 2); 1453 break; 1454 } 1455 } 1456 } 1457 1458 return (0); 1459 } 1460 1461 void 1462 pf_normalize_tcp_cleanup(struct pf_state *state) 1463 { 1464 if (state->src.scrub) 1465 pool_put(&pf_state_scrub_pl, state->src.scrub); 1466 if (state->dst.scrub) 1467 pool_put(&pf_state_scrub_pl, state->dst.scrub); 1468 1469 /* Someday... flush the TCP segment reassembly descriptors. */ 1470 } 1471 1472 int 1473 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, 1474 u_short *reason, struct tcphdr *th, struct pf_state *state, 1475 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 1476 { 1477 struct timeval uptime; 1478 u_int32_t tsval, tsecr; 1479 u_int tsval_from_last; 1480 u_int8_t hdr[60]; 1481 u_int8_t *opt; 1482 int copyback = 0; 1483 int got_ts = 0; 1484 1485 KASSERT(src->scrub || dst->scrub); 1486 1487 /* 1488 * Enforce the minimum TTL seen for this connection. Negate a common 1489 * technique to evade an intrusion detection system and confuse 1490 * firewall state code. 1491 */ 1492 switch (pd->af) { 1493 #ifdef INET 1494 case AF_INET: { 1495 if (src->scrub) { 1496 struct ip *h = mtod(m, struct ip *); 1497 if (h->ip_ttl > src->scrub->pfss_ttl) 1498 src->scrub->pfss_ttl = h->ip_ttl; 1499 h->ip_ttl = src->scrub->pfss_ttl; 1500 } 1501 break; 1502 } 1503 #endif /* INET */ 1504 #ifdef INET6 1505 case AF_INET6: { 1506 if (src->scrub) { 1507 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1508 if (h->ip6_hlim > src->scrub->pfss_ttl) 1509 src->scrub->pfss_ttl = h->ip6_hlim; 1510 h->ip6_hlim = src->scrub->pfss_ttl; 1511 } 1512 break; 1513 } 1514 #endif /* INET6 */ 1515 } 1516 1517 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1518 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1519 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1520 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 1521 /* Diddle with TCP options */ 1522 int hlen; 1523 opt = hdr + sizeof(struct tcphdr); 1524 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1525 while (hlen >= TCPOLEN_TIMESTAMP) { 1526 switch (*opt) { 1527 case TCPOPT_EOL: /* FALLTHROUGH */ 1528 case TCPOPT_NOP: 1529 opt++; 1530 hlen--; 1531 break; 1532 case TCPOPT_TIMESTAMP: 1533 /* Modulate the timestamps. Can be used for 1534 * NAT detection, OS uptime determination or 1535 * reboot detection. 1536 */ 1537 1538 if (got_ts) { 1539 /* Huh? Multiple timestamps!? */ 1540 if (pf_status.debug >= PF_DEBUG_MISC) { 1541 DPFPRINTF(("multiple TS??")); 1542 pf_print_state(state); 1543 printf("\n"); 1544 } 1545 REASON_SET(reason, PFRES_TS); 1546 return (PF_DROP); 1547 } 1548 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1549 memcpy(&tsval, &opt[2], 1550 sizeof(u_int32_t)); 1551 if (tsval && src->scrub && 1552 (src->scrub->pfss_flags & 1553 PFSS_TIMESTAMP)) { 1554 tsval = ntohl(tsval); 1555 pf_change_a(&opt[2], 1556 &th->th_sum, 1557 htonl(tsval + 1558 src->scrub->pfss_ts_mod), 1559 0); 1560 copyback = 1; 1561 } 1562 1563 /* Modulate TS reply iff valid (!0) */ 1564 memcpy(&tsecr, &opt[6], 1565 sizeof(u_int32_t)); 1566 if (tsecr && dst->scrub && 1567 (dst->scrub->pfss_flags & 1568 PFSS_TIMESTAMP)) { 1569 tsecr = ntohl(tsecr) 1570 - dst->scrub->pfss_ts_mod; 1571 pf_change_a(&opt[6], 1572 &th->th_sum, htonl(tsecr), 1573 0); 1574 copyback = 1; 1575 } 1576 got_ts = 1; 1577 } 1578 /* FALLTHROUGH */ 1579 default: 1580 hlen -= MAX(opt[1], 2); 1581 opt += MAX(opt[1], 2); 1582 break; 1583 } 1584 } 1585 if (copyback) { 1586 /* Copyback the options, caller copys back header */ 1587 *writeback = 1; 1588 m_copyback(m, off + sizeof(struct tcphdr), 1589 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1590 sizeof(struct tcphdr)); 1591 } 1592 } 1593 1594 1595 /* 1596 * Must invalidate PAWS checks on connections idle for too long. 1597 * The fastest allowed timestamp clock is 1ms. That turns out to 1598 * be about 24 days before it wraps. XXX Right now our lowerbound 1599 * TS echo check only works for the first 12 days of a connection 1600 * when the TS has exhausted half its 32bit space 1601 */ 1602 #define TS_MAX_IDLE (24*24*60*60) 1603 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1604 1605 getmicrouptime(&uptime); 1606 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1607 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1608 time_second - state->creation > TS_MAX_CONN)) { 1609 if (pf_status.debug >= PF_DEBUG_MISC) { 1610 DPFPRINTF(("src idled out of PAWS\n")); 1611 pf_print_state(state); 1612 printf("\n"); 1613 } 1614 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1615 | PFSS_PAWS_IDLED; 1616 } 1617 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1618 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1619 if (pf_status.debug >= PF_DEBUG_MISC) { 1620 DPFPRINTF(("dst idled out of PAWS\n")); 1621 pf_print_state(state); 1622 printf("\n"); 1623 } 1624 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1625 | PFSS_PAWS_IDLED; 1626 } 1627 1628 if (got_ts && src->scrub && dst->scrub && 1629 (src->scrub->pfss_flags & PFSS_PAWS) && 1630 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1631 /* Validate that the timestamps are "in-window". 1632 * RFC1323 describes TCP Timestamp options that allow 1633 * measurement of RTT (round trip time) and PAWS 1634 * (protection against wrapped sequence numbers). PAWS 1635 * gives us a set of rules for rejecting packets on 1636 * long fat pipes (packets that were somehow delayed 1637 * in transit longer than the time it took to send the 1638 * full TCP sequence space of 4Gb). We can use these 1639 * rules and infer a few others that will let us treat 1640 * the 32bit timestamp and the 32bit echoed timestamp 1641 * as sequence numbers to prevent a blind attacker from 1642 * inserting packets into a connection. 1643 * 1644 * RFC1323 tells us: 1645 * - The timestamp on this packet must be greater than 1646 * or equal to the last value echoed by the other 1647 * endpoint. The RFC says those will be discarded 1648 * since it is a dup that has already been acked. 1649 * This gives us a lowerbound on the timestamp. 1650 * timestamp >= other last echoed timestamp 1651 * - The timestamp will be less than or equal to 1652 * the last timestamp plus the time between the 1653 * last packet and now. The RFC defines the max 1654 * clock rate as 1ms. We will allow clocks to be 1655 * up to 10% fast and will allow a total difference 1656 * or 30 seconds due to a route change. And this 1657 * gives us an upperbound on the timestamp. 1658 * timestamp <= last timestamp + max ticks 1659 * We have to be careful here. Windows will send an 1660 * initial timestamp of zero and then initialize it 1661 * to a random value after the 3whs; presumably to 1662 * avoid a DoS by having to call an expensive RNG 1663 * during a SYN flood. Proof MS has at least one 1664 * good security geek. 1665 * 1666 * - The TCP timestamp option must also echo the other 1667 * endpoints timestamp. The timestamp echoed is the 1668 * one carried on the earliest unacknowledged segment 1669 * on the left edge of the sequence window. The RFC 1670 * states that the host will reject any echoed 1671 * timestamps that were larger than any ever sent. 1672 * This gives us an upperbound on the TS echo. 1673 * tescr <= largest_tsval 1674 * - The lowerbound on the TS echo is a little more 1675 * tricky to determine. The other endpoint's echoed 1676 * values will not decrease. But there may be 1677 * network conditions that re-order packets and 1678 * cause our view of them to decrease. For now the 1679 * only lowerbound we can safely determine is that 1680 * the TS echo will never be less than the original 1681 * TS. XXX There is probably a better lowerbound. 1682 * Remove TS_MAX_CONN with better lowerbound check. 1683 * tescr >= other original TS 1684 * 1685 * It is also important to note that the fastest 1686 * timestamp clock of 1ms will wrap its 32bit space in 1687 * 24 days. So we just disable TS checking after 24 1688 * days of idle time. We actually must use a 12d 1689 * connection limit until we can come up with a better 1690 * lowerbound to the TS echo check. 1691 */ 1692 struct timeval delta_ts; 1693 int ts_fudge; 1694 1695 1696 /* 1697 * PFTM_TS_DIFF is how many seconds of leeway to allow 1698 * a host's timestamp. This can happen if the previous 1699 * packet got delayed in transit for much longer than 1700 * this packet. 1701 */ 1702 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1703 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1704 1705 1706 /* Calculate max ticks since the last timestamp */ 1707 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1 kHz + 10% skew */ 1708 #define TS_MICROSECS 1000000 /* microseconds per second */ 1709 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1710 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1711 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1712 1713 1714 if ((src->state >= TCPS_ESTABLISHED && 1715 dst->state >= TCPS_ESTABLISHED) && 1716 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1717 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1718 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1719 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1720 /* Bad RFC1323 implementation or an insertion attack. 1721 * 1722 * - Solaris 2.6 and 2.7 are known to send another ACK 1723 * after the FIN,FIN|ACK,ACK closing that carries 1724 * an old timestamp. 1725 */ 1726 1727 DPFPRINTF(("Timestamp failed %c%c%c%c\n", 1728 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1729 SEQ_GT(tsval, src->scrub->pfss_tsval + 1730 tsval_from_last) ? '1' : ' ', 1731 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1732 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); 1733 DPFPRINTF((" tsval: %" PRIu32 " tsecr: %" PRIu32 1734 " +ticks: %" PRIu32 " idle: %"PRIx64"s %ums\n", 1735 tsval, tsecr, tsval_from_last, delta_ts.tv_sec, 1736 delta_ts.tv_usec / 1000U)); 1737 DPFPRINTF((" src->tsval: %" PRIu32 " tsecr: %" PRIu32 1738 "\n", 1739 src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); 1740 DPFPRINTF((" dst->tsval: %" PRIu32 " tsecr: %" PRIu32 1741 " tsval0: %" PRIu32 "\n", 1742 dst->scrub->pfss_tsval, 1743 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); 1744 if (pf_status.debug >= PF_DEBUG_MISC) { 1745 pf_print_state(state); 1746 pf_print_flags(th->th_flags); 1747 printf("\n"); 1748 } 1749 REASON_SET(reason, PFRES_TS); 1750 return (PF_DROP); 1751 } 1752 1753 /* XXX I'd really like to require tsecr but it's optional */ 1754 1755 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1756 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1757 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1758 src->scrub && dst->scrub && 1759 (src->scrub->pfss_flags & PFSS_PAWS) && 1760 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1761 /* Didn't send a timestamp. Timestamps aren't really useful 1762 * when: 1763 * - connection opening or closing (often not even sent). 1764 * but we must not let an attacker to put a FIN on a 1765 * data packet to sneak it through our ESTABLISHED check. 1766 * - on a TCP reset. RFC suggests not even looking at TS. 1767 * - on an empty ACK. The TS will not be echoed so it will 1768 * probably not help keep the RTT calculation in sync and 1769 * there isn't as much danger when the sequence numbers 1770 * got wrapped. So some stacks don't include TS on empty 1771 * ACKs :-( 1772 * 1773 * To minimize the disruption to mostly RFC1323 conformant 1774 * stacks, we will only require timestamps on data packets. 1775 * 1776 * And what do ya know, we cannot require timestamps on data 1777 * packets. There appear to be devices that do legitimate 1778 * TCP connection hijacking. There are HTTP devices that allow 1779 * a 3whs (with timestamps) and then buffer the HTTP request. 1780 * If the intermediate device has the HTTP response cache, it 1781 * will spoof the response but not bother timestamping its 1782 * packets. So we can look for the presence of a timestamp in 1783 * the first data packet and if there, require it in all future 1784 * packets. 1785 */ 1786 1787 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1788 /* 1789 * Hey! Someone tried to sneak a packet in. Or the 1790 * stack changed its RFC1323 behavior?!?! 1791 */ 1792 if (pf_status.debug >= PF_DEBUG_MISC) { 1793 DPFPRINTF(("Did not receive expected RFC1323 " 1794 "timestamp\n")); 1795 pf_print_state(state); 1796 pf_print_flags(th->th_flags); 1797 printf("\n"); 1798 } 1799 REASON_SET(reason, PFRES_TS); 1800 return (PF_DROP); 1801 } 1802 } 1803 1804 1805 /* 1806 * We will note if a host sends his data packets with or without 1807 * timestamps. And require all data packets to contain a timestamp 1808 * if the first does. PAWS implicitly requires that all data packets be 1809 * timestamped. But I think there are middle-man devices that hijack 1810 * TCP streams immediately after the 3whs and don't timestamp their 1811 * packets (seen in a WWW accelerator or cache). 1812 */ 1813 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1814 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1815 if (got_ts) 1816 src->scrub->pfss_flags |= PFSS_DATA_TS; 1817 else { 1818 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1819 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && 1820 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1821 /* Don't warn if other host rejected RFC1323 */ 1822 DPFPRINTF(("Broken RFC1323 stack did not " 1823 "timestamp data packet. Disabled PAWS " 1824 "security.\n")); 1825 pf_print_state(state); 1826 pf_print_flags(th->th_flags); 1827 printf("\n"); 1828 } 1829 } 1830 } 1831 1832 1833 /* 1834 * Update PAWS values 1835 */ 1836 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1837 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1838 getmicrouptime(&src->scrub->pfss_last); 1839 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1840 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1841 src->scrub->pfss_tsval = tsval; 1842 1843 if (tsecr) { 1844 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1845 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1846 src->scrub->pfss_tsecr = tsecr; 1847 1848 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1849 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1850 src->scrub->pfss_tsval0 == 0)) { 1851 /* tsval0 MUST be the lowest timestamp */ 1852 src->scrub->pfss_tsval0 = tsval; 1853 } 1854 1855 /* Only fully initialized after a TS gets echoed */ 1856 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1857 src->scrub->pfss_flags |= PFSS_PAWS; 1858 } 1859 } 1860 1861 /* I have a dream.... TCP segment reassembly.... */ 1862 return (0); 1863 } 1864 1865 int 1866 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, 1867 int off) 1868 { 1869 u_int16_t *mss; 1870 int thoff; 1871 int opt, cnt, optlen = 0; 1872 int rewrite = 0; 1873 u_char *optp; 1874 1875 thoff = th->th_off << 2; 1876 cnt = thoff - sizeof(struct tcphdr); 1877 optp = mtod(m, u_char *) + off + sizeof(struct tcphdr); 1878 1879 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1880 opt = optp[0]; 1881 if (opt == TCPOPT_EOL) 1882 break; 1883 if (opt == TCPOPT_NOP) 1884 optlen = 1; 1885 else { 1886 if (cnt < 2) 1887 break; 1888 optlen = optp[1]; 1889 if (optlen < 2 || optlen > cnt) 1890 break; 1891 } 1892 switch (opt) { 1893 case TCPOPT_MAXSEG: 1894 mss = (u_int16_t *)(optp + 2); 1895 if ((ntohs(*mss)) > r->max_mss) { 1896 th->th_sum = pf_cksum_fixup(th->th_sum, 1897 *mss, htons(r->max_mss), 0); 1898 *mss = htons(r->max_mss); 1899 rewrite = 1; 1900 } 1901 break; 1902 default: 1903 break; 1904 } 1905 } 1906 1907 return (rewrite); 1908 } 1909