1 /* $OpenBSD: pf_norm.c,v 1.166 2014/07/13 17:41:04 bluhm Exp $ */ 2 3 /* 4 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 5 * Copyright 2009 Henning Brauer <henning@openbsd.org> 6 * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include "pflog.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/mbuf.h> 35 #include <sys/filio.h> 36 #include <sys/fcntl.h> 37 #include <sys/socket.h> 38 #include <sys/kernel.h> 39 #include <sys/time.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 43 #include <dev/rndvar.h> 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/bpf.h> 47 #include <net/route.h> 48 #include <net/if_pflog.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_systm.h> 52 #include <netinet/ip.h> 53 #include <netinet/ip_var.h> 54 #include <netinet/tcp.h> 55 #include <netinet/tcp_seq.h> 56 #include <netinet/udp.h> 57 #include <netinet/ip_icmp.h> 58 59 #ifdef INET6 60 #include <netinet/ip6.h> 61 #include <netinet6/ip6_var.h> 62 #endif /* INET6 */ 63 64 #include <net/pfvar.h> 65 66 struct pf_frent { 67 TAILQ_ENTRY(pf_frent) fr_next; 68 struct mbuf *fe_m; 69 u_int16_t fe_hdrlen; /* ipv4 header lenght with ip options 70 ipv6, extension, fragment header */ 71 u_int16_t fe_extoff; /* last extension header offset or 0 */ 72 u_int16_t fe_len; /* fragment length */ 73 u_int16_t fe_off; /* fragment offset */ 74 u_int16_t fe_mff; /* more fragment flag */ 75 }; 76 77 /* keep synced with struct pf_fragment, used in RB_FIND */ 78 struct pf_fragment_cmp { 79 struct pf_addr fr_src; 80 struct pf_addr fr_dst; 81 u_int32_t fr_id; 82 sa_family_t fr_af; 83 u_int8_t fr_proto; 84 u_int8_t fr_direction; 85 }; 86 87 struct pf_fragment { 88 struct pf_addr fr_src; /* ip source address */ 89 struct pf_addr fr_dst; /* ip destination address */ 90 u_int32_t fr_id; /* fragment id for reassemble */ 91 sa_family_t fr_af; /* address family */ 92 u_int8_t fr_proto; /* protocol of this fragment */ 93 u_int8_t fr_direction; /* pf packet direction */ 94 95 RB_ENTRY(pf_fragment) fr_entry; 96 TAILQ_ENTRY(pf_fragment) frag_next; 97 TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; 98 int32_t fr_timeout; 99 u_int16_t fr_maxlen; /* maximum length of single fragment */ 100 }; 101 102 struct pf_fragment_tag { 103 u_int16_t ft_hdrlen; /* header lenght of reassembled pkt */ 104 u_int16_t ft_extoff; /* last extension header offset or 0 */ 105 u_int16_t ft_maxlen; /* maximum fragment payload length */ 106 }; 107 108 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 109 110 static __inline int pf_frag_compare(struct pf_fragment *, 111 struct pf_fragment *); 112 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 113 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 114 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 115 116 /* Private prototypes */ 117 void pf_flush_fragments(void); 118 void pf_free_fragment(struct pf_fragment *); 119 struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *, 120 struct pf_frag_tree *); 121 struct pf_frent *pf_create_fragment(u_short *); 122 struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, 123 struct pf_frent *, u_short *); 124 int pf_isfull_fragment(struct pf_fragment *); 125 struct mbuf *pf_join_fragment(struct pf_fragment *); 126 int pf_reassemble(struct mbuf **, int, u_short *); 127 #ifdef INET6 128 int pf_reassemble6(struct mbuf **, struct ip6_frag *, 129 u_int16_t, u_int16_t, int, u_short *); 130 #endif /* INET6 */ 131 132 /* Globals */ 133 struct pool pf_frent_pl, pf_frag_pl; 134 struct pool pf_state_scrub_pl; 135 int pf_nfrents; 136 137 void 138 pf_normalize_init(void) 139 { 140 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 141 NULL); 142 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 143 NULL); 144 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 145 "pfstscr", NULL); 146 147 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 148 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 149 150 TAILQ_INIT(&pf_fragqueue); 151 } 152 153 static __inline int 154 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 155 { 156 int diff; 157 158 if ((diff = a->fr_id - b->fr_id) != 0) 159 return (diff); 160 if ((diff = a->fr_proto - b->fr_proto) != 0) 161 return (diff); 162 if ((diff = a->fr_af - b->fr_af) != 0) 163 return (diff); 164 if ((diff = pf_addr_compare(&a->fr_src, &b->fr_src, a->fr_af)) != 0) 165 return (diff); 166 if ((diff = pf_addr_compare(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) 167 return (diff); 168 return (0); 169 } 170 171 void 172 pf_purge_expired_fragments(void) 173 { 174 struct pf_fragment *frag; 175 int32_t expire = time_uptime - 176 pf_default_rule.timeout[PFTM_FRAG]; 177 178 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 179 if (frag->fr_timeout > expire) 180 break; 181 182 DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag); 183 pf_free_fragment(frag); 184 } 185 } 186 187 /* 188 * Try to flush old fragments to make space for new ones 189 */ 190 191 void 192 pf_flush_fragments(void) 193 { 194 struct pf_fragment *frag; 195 int goal; 196 197 goal = pf_nfrents * 9 / 10; 198 DPFPRINTF(LOG_NOTICE, "trying to free > %d frents", 199 pf_nfrents - goal); 200 while (goal < pf_nfrents) { 201 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 202 if (frag == NULL) 203 break; 204 pf_free_fragment(frag); 205 } 206 } 207 208 /* 209 * Remove a fragment from the fragment queue, free its fragment entries, 210 * and free the fragment itself. 211 */ 212 void 213 pf_free_fragment(struct pf_fragment *frag) 214 { 215 struct pf_frent *frent; 216 217 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 218 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 219 220 /* Free all fragment entries */ 221 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 222 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 223 224 m_freem(frent->fe_m); 225 pool_put(&pf_frent_pl, frent); 226 pf_nfrents--; 227 } 228 229 pool_put(&pf_frag_pl, frag); 230 } 231 232 struct pf_fragment * 233 pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) 234 { 235 struct pf_fragment *frag; 236 237 frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); 238 if (frag != NULL) { 239 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 240 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 241 } 242 243 return (frag); 244 } 245 246 struct pf_frent * 247 pf_create_fragment(u_short *reason) 248 { 249 struct pf_frent *frent; 250 251 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 252 if (frent == NULL) { 253 pf_flush_fragments(); 254 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 255 if (frent == NULL) { 256 REASON_SET(reason, PFRES_MEMORY); 257 return (NULL); 258 } 259 } 260 pf_nfrents++; 261 262 return (frent); 263 } 264 265 struct pf_fragment * 266 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, 267 u_short *reason) 268 { 269 struct pf_frent *after, *next, *prev; 270 struct pf_fragment *frag; 271 u_int16_t total; 272 273 /* No empty fragments */ 274 if (frent->fe_len == 0) { 275 DPFPRINTF(LOG_NOTICE, "bad fragment: len 0"); 276 goto bad_fragment; 277 } 278 279 /* All fragments are 8 byte aligned */ 280 if (frent->fe_mff && (frent->fe_len & 0x7)) { 281 DPFPRINTF(LOG_NOTICE, "bad fragment: mff and len %d", 282 frent->fe_len); 283 goto bad_fragment; 284 } 285 286 /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET */ 287 if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { 288 DPFPRINTF(LOG_NOTICE, "bad fragment: max packet %d", 289 frent->fe_off + frent->fe_len); 290 goto bad_fragment; 291 } 292 293 DPFPRINTF(LOG_NOTICE, key->fr_af == AF_INET ? 294 "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d", 295 key->fr_id, frent->fe_off, frent->fe_off + frent->fe_len); 296 297 /* Fully buffer all of the fragments in this fragment queue */ 298 frag = pf_find_fragment(key, &pf_frag_tree); 299 300 /* Create a new reassembly queue for this packet */ 301 if (frag == NULL) { 302 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 303 if (frag == NULL) { 304 pf_flush_fragments(); 305 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 306 if (frag == NULL) { 307 REASON_SET(reason, PFRES_MEMORY); 308 goto drop_fragment; 309 } 310 } 311 312 *(struct pf_fragment_cmp *)frag = *key; 313 TAILQ_INIT(&frag->fr_queue); 314 frag->fr_timeout = time_uptime; 315 frag->fr_maxlen = frent->fe_len; 316 317 RB_INSERT(pf_frag_tree, &pf_frag_tree, frag); 318 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 319 320 /* We do not have a previous fragment */ 321 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 322 323 return (frag); 324 } 325 326 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 327 328 /* Remember maximum fragment len for refragmentation */ 329 if (frent->fe_len > frag->fr_maxlen) 330 frag->fr_maxlen = frent->fe_len; 331 332 /* Maximum data we have seen already */ 333 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 334 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 335 336 /* Non terminal fragments must have more fragments flag */ 337 if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) 338 goto bad_fragment; 339 340 /* Check if we saw the last fragment already */ 341 if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { 342 if (frent->fe_off + frent->fe_len > total || 343 (frent->fe_off + frent->fe_len == total && frent->fe_mff)) 344 goto bad_fragment; 345 } else { 346 if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) 347 goto bad_fragment; 348 } 349 350 /* Find a fragment after the current one */ 351 prev = NULL; 352 TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { 353 if (after->fe_off > frent->fe_off) 354 break; 355 prev = after; 356 } 357 358 KASSERT(prev != NULL || after != NULL); 359 360 if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { 361 u_int16_t precut; 362 363 #ifdef INET6 364 if (frag->fr_af == AF_INET6) 365 goto free_fragment; 366 #endif /* INET6 */ 367 368 precut = prev->fe_off + prev->fe_len - frent->fe_off; 369 if (precut >= frent->fe_len) { 370 DPFPRINTF(LOG_NOTICE, "new frag overlapped"); 371 goto drop_fragment; 372 } 373 DPFPRINTF(LOG_NOTICE, "frag head overlap %d", precut); 374 m_adj(frent->fe_m, precut); 375 frent->fe_off += precut; 376 frent->fe_len -= precut; 377 } 378 379 for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; 380 after = next) 381 { 382 u_int16_t aftercut; 383 384 #ifdef INET6 385 if (frag->fr_af == AF_INET6) 386 goto free_fragment; 387 #endif /* INET6 */ 388 389 aftercut = frent->fe_off + frent->fe_len - after->fe_off; 390 if (aftercut < after->fe_len) { 391 DPFPRINTF(LOG_NOTICE, "frag tail overlap %d", aftercut); 392 m_adj(after->fe_m, aftercut); 393 after->fe_off += aftercut; 394 after->fe_len -= aftercut; 395 break; 396 } 397 398 /* This fragment is completely overlapped, lose it */ 399 DPFPRINTF(LOG_NOTICE, "old frag overlapped"); 400 next = TAILQ_NEXT(after, fr_next); 401 TAILQ_REMOVE(&frag->fr_queue, after, fr_next); 402 403 m_freem(after->fe_m); 404 pool_put(&pf_frent_pl, after); 405 pf_nfrents--; 406 } 407 408 if (prev == NULL) 409 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 410 else 411 TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); 412 413 return (frag); 414 415 #ifdef INET6 416 free_fragment: 417 /* 418 * RFC 5722, Errata 3089: When reassembling an IPv6 datagram, if one 419 * or more its constituent fragments is determined to be an overlapping 420 * fragment, the entire datagram (and any constituent fragments) MUST 421 * be silently discarded. 422 */ 423 DPFPRINTF(LOG_NOTICE, "flush overlapping fragments"); 424 pf_free_fragment(frag); 425 #endif /* INET6 */ 426 bad_fragment: 427 REASON_SET(reason, PFRES_FRAG); 428 drop_fragment: 429 pool_put(&pf_frent_pl, frent); 430 pf_nfrents--; 431 return (NULL); 432 } 433 434 int 435 pf_isfull_fragment(struct pf_fragment *frag) 436 { 437 struct pf_frent *frent, *next; 438 u_int16_t off, total; 439 440 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 441 442 /* Check if we are completely reassembled */ 443 if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) 444 return (0); 445 446 /* Maximum data we have seen already */ 447 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 448 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 449 450 /* Check if we have all the data */ 451 off = 0; 452 for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) { 453 next = TAILQ_NEXT(frent, fr_next); 454 455 off += frent->fe_len; 456 if (off < total && (next == NULL || next->fe_off != off)) { 457 DPFPRINTF(LOG_NOTICE, 458 "missing fragment at %d, next %d, total %d", 459 off, next == NULL ? -1 : next->fe_off, total); 460 return (0); 461 } 462 } 463 DPFPRINTF(LOG_NOTICE, "%d < %d?", off, total); 464 if (off < total) 465 return (0); 466 KASSERT(off == total); 467 468 return (1); 469 } 470 471 struct mbuf * 472 pf_join_fragment(struct pf_fragment *frag) 473 { 474 struct mbuf *m, *m2; 475 struct pf_frent *frent; 476 477 frent = TAILQ_FIRST(&frag->fr_queue); 478 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 479 480 /* Magic from ip_input */ 481 m = frent->fe_m; 482 m2 = m->m_next; 483 m->m_next = NULL; 484 m_cat(m, m2); 485 pool_put(&pf_frent_pl, frent); 486 pf_nfrents--; 487 488 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 489 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 490 491 m2 = frent->fe_m; 492 /* Strip off ip header */ 493 m_adj(m2, frent->fe_hdrlen); 494 pool_put(&pf_frent_pl, frent); 495 pf_nfrents--; 496 m_cat(m, m2); 497 } 498 499 /* Remove from fragment queue */ 500 pf_free_fragment(frag); 501 502 return (m); 503 } 504 505 int 506 pf_reassemble(struct mbuf **m0, int dir, u_short *reason) 507 { 508 struct mbuf *m = *m0; 509 struct ip *ip = mtod(m, struct ip *); 510 struct pf_frent *frent; 511 struct pf_fragment *frag; 512 struct pf_fragment_cmp key; 513 u_int16_t total, hdrlen; 514 515 /* Get an entry for the fragment queue */ 516 if ((frent = pf_create_fragment(reason)) == NULL) 517 return (PF_DROP); 518 519 frent->fe_m = m; 520 frent->fe_hdrlen = ip->ip_hl << 2; 521 frent->fe_extoff = 0; 522 frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 523 frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 524 frent->fe_mff = ntohs(ip->ip_off) & IP_MF; 525 526 key.fr_src.v4 = ip->ip_src; 527 key.fr_dst.v4 = ip->ip_dst; 528 key.fr_af = AF_INET; 529 key.fr_proto = ip->ip_p; 530 key.fr_id = ip->ip_id; 531 key.fr_direction = dir; 532 533 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 534 return (PF_DROP); 535 536 /* The mbuf is part of the fragment entry, no direct free or access */ 537 m = *m0 = NULL; 538 539 if (!pf_isfull_fragment(frag)) 540 return (PF_PASS); /* drop because *m0 is NULL, no error */ 541 542 /* We have all the data */ 543 frent = TAILQ_FIRST(&frag->fr_queue); 544 KASSERT(frent != NULL); 545 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 546 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 547 hdrlen = frent->fe_hdrlen; 548 549 m = *m0 = pf_join_fragment(frag); 550 frag = NULL; 551 552 if (m->m_flags & M_PKTHDR) { 553 int plen = 0; 554 for (m = *m0; m; m = m->m_next) 555 plen += m->m_len; 556 m = *m0; 557 m->m_pkthdr.len = plen; 558 } 559 560 ip = mtod(m, struct ip *); 561 ip->ip_len = htons(hdrlen + total); 562 ip->ip_off &= ~(IP_MF|IP_OFFMASK); 563 564 if (hdrlen + total > IP_MAXPACKET) { 565 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 566 ip->ip_len = 0; 567 REASON_SET(reason, PFRES_SHORT); 568 /* PF_DROP requires a valid mbuf *m0 in pf_test() */ 569 return (PF_DROP); 570 } 571 572 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip->ip_len)); 573 return (PF_PASS); 574 } 575 576 #ifdef INET6 577 int 578 pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, 579 u_int16_t hdrlen, u_int16_t extoff, int dir, u_short *reason) 580 { 581 struct mbuf *m = *m0; 582 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 583 struct m_tag *mtag; 584 struct pf_fragment_tag *ftag; 585 struct pf_frent *frent; 586 struct pf_fragment *frag; 587 struct pf_fragment_cmp key; 588 int off; 589 u_int16_t total, maxlen; 590 u_int8_t proto; 591 592 /* Get an entry for the fragment queue */ 593 if ((frent = pf_create_fragment(reason)) == NULL) 594 return (PF_DROP); 595 596 frent->fe_m = m; 597 frent->fe_hdrlen = hdrlen; 598 frent->fe_extoff = extoff; 599 frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; 600 frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 601 frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; 602 603 key.fr_src.v6 = ip6->ip6_src; 604 key.fr_dst.v6 = ip6->ip6_dst; 605 key.fr_af = AF_INET6; 606 /* Only the first fragment's protocol is relevant */ 607 key.fr_proto = 0; 608 key.fr_id = fraghdr->ip6f_ident; 609 key.fr_direction = dir; 610 611 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 612 return (PF_DROP); 613 614 /* The mbuf is part of the fragment entry, no direct free or access */ 615 m = *m0 = NULL; 616 617 if (!pf_isfull_fragment(frag)) 618 return (PF_PASS); /* drop because *m0 is NULL, no error */ 619 620 /* We have all the data */ 621 extoff = frent->fe_extoff; 622 maxlen = frag->fr_maxlen; 623 frent = TAILQ_FIRST(&frag->fr_queue); 624 KASSERT(frent != NULL); 625 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 626 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 627 hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); 628 629 m = *m0 = pf_join_fragment(frag); 630 frag = NULL; 631 632 /* Take protocol from first fragment header */ 633 if ((m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), 634 &off)) == NULL) 635 panic("pf_reassemble6: short mbuf chain"); 636 proto = *(mtod(m, caddr_t) + off); 637 m = *m0; 638 639 /* Delete frag6 header */ 640 if (frag6_deletefraghdr(m, hdrlen) != 0) 641 goto fail; 642 643 if (m->m_flags & M_PKTHDR) { 644 int plen = 0; 645 for (m = *m0; m; m = m->m_next) 646 plen += m->m_len; 647 m = *m0; 648 m->m_pkthdr.len = plen; 649 } 650 651 if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct 652 pf_fragment_tag), M_NOWAIT)) == NULL) 653 goto fail; 654 ftag = (struct pf_fragment_tag *)(mtag + 1); 655 ftag->ft_hdrlen = hdrlen; 656 ftag->ft_extoff = extoff; 657 ftag->ft_maxlen = maxlen; 658 m_tag_prepend(m, mtag); 659 660 ip6 = mtod(m, struct ip6_hdr *); 661 ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); 662 if (extoff) { 663 /* Write protocol into next field of last extension header */ 664 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 665 ip6e_nxt), &off)) == NULL) 666 panic("pf_reassemble6: short mbuf chain"); 667 *(mtod(m, caddr_t) + off) = proto; 668 m = *m0; 669 } else 670 ip6->ip6_nxt = proto; 671 672 if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { 673 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 674 ip6->ip6_plen = 0; 675 REASON_SET(reason, PFRES_SHORT); 676 /* PF_DROP requires a valid mbuf *m0 in pf_test6() */ 677 return (PF_DROP); 678 } 679 680 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip6->ip6_plen)); 681 return (PF_PASS); 682 683 fail: 684 REASON_SET(reason, PFRES_MEMORY); 685 /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */ 686 return (PF_DROP); 687 } 688 689 int 690 pf_refragment6(struct mbuf **m0, struct m_tag *mtag, int dir) 691 { 692 struct mbuf *m = *m0, *t; 693 struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); 694 u_int32_t mtu; 695 u_int16_t hdrlen, extoff, maxlen; 696 u_int8_t proto; 697 int error, action; 698 699 hdrlen = ftag->ft_hdrlen; 700 extoff = ftag->ft_extoff; 701 maxlen = ftag->ft_maxlen; 702 m_tag_delete(m, mtag); 703 mtag = NULL; 704 ftag = NULL; 705 706 /* Checksum must be calculated for the whole packet */ 707 in6_proto_cksum_out(m, NULL); 708 709 if (extoff) { 710 int off; 711 712 /* Use protocol from next field of last extension header */ 713 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 714 ip6e_nxt), &off)) == NULL) 715 panic("pf_refragment6: short mbuf chain"); 716 proto = *(mtod(m, caddr_t) + off); 717 *(mtod(m, caddr_t) + off) = IPPROTO_FRAGMENT; 718 m = *m0; 719 } else { 720 struct ip6_hdr *hdr; 721 722 hdr = mtod(m, struct ip6_hdr *); 723 proto = hdr->ip6_nxt; 724 hdr->ip6_nxt = IPPROTO_FRAGMENT; 725 } 726 727 /* 728 * Maxlen may be less than 8 iff there was only a single 729 * fragment. As it was fragmented before, add a fragment 730 * header also for a single fragment. If total or maxlen 731 * is less than 8, ip6_fragment() will return EMSGSIZE and 732 * we drop the packet. 733 */ 734 735 mtu = hdrlen + sizeof(struct ip6_frag) + maxlen; 736 error = ip6_fragment(m, hdrlen, proto, mtu); 737 738 m = (*m0)->m_nextpkt; 739 (*m0)->m_nextpkt = NULL; 740 if (error == 0) { 741 /* The first mbuf contains the unfragmented packet */ 742 m_freem(*m0); 743 *m0 = NULL; 744 action = PF_PASS; 745 } else { 746 /* Drop expects an mbuf to free */ 747 DPFPRINTF(LOG_NOTICE, "refragment error %d", error); 748 action = PF_DROP; 749 } 750 for (t = m; m; m = t) { 751 t = m->m_nextpkt; 752 m->m_nextpkt = NULL; 753 m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; 754 if (error == 0) 755 ip6_forward(m, 0); 756 else 757 m_freem(m); 758 } 759 760 return (action); 761 } 762 #endif /* INET6 */ 763 764 int 765 pf_normalize_ip(struct pf_pdesc *pd, u_short *reason) 766 { 767 struct ip *h = mtod(pd->m, struct ip *); 768 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 769 u_int16_t mff = (ntohs(h->ip_off) & IP_MF); 770 771 if (!fragoff && !mff) 772 goto no_fragment; 773 774 /* Clear IP_DF if we're in no-df mode */ 775 if (pf_status.reass & PF_REASS_NODF && h->ip_off & htons(IP_DF)) 776 h->ip_off &= htons(~IP_DF); 777 778 /* We're dealing with a fragment now. Don't allow fragments 779 * with IP_DF to enter the cache. If the flag was cleared by 780 * no-df above, fine. Otherwise drop it. 781 */ 782 if (h->ip_off & htons(IP_DF)) { 783 DPFPRINTF(LOG_NOTICE, "bad fragment: IP_DF"); 784 REASON_SET(reason, PFRES_FRAG); 785 return (PF_DROP); 786 } 787 788 if (!pf_status.reass) 789 return (PF_PASS); /* no reassembly */ 790 791 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 792 if (pf_reassemble(&pd->m, pd->dir, reason) != PF_PASS) 793 return (PF_DROP); 794 if (pd->m == NULL) 795 return (PF_PASS); /* packet has been reassembled, no error */ 796 797 h = mtod(pd->m, struct ip *); 798 799 no_fragment: 800 /* At this point, only IP_DF is allowed in ip_off */ 801 if (h->ip_off & ~htons(IP_DF)) 802 h->ip_off &= htons(IP_DF); 803 804 return (PF_PASS); 805 } 806 807 #ifdef INET6 808 int 809 pf_normalize_ip6(struct pf_pdesc *pd, u_short *reason) 810 { 811 struct ip6_frag frag; 812 813 if (pd->fragoff == 0) 814 goto no_fragment; 815 816 if (!pf_pull_hdr(pd->m, pd->fragoff, &frag, sizeof(frag), NULL, reason, 817 AF_INET6)) 818 return (PF_DROP); 819 820 if (!pf_status.reass) 821 return (PF_PASS); /* no reassembly */ 822 823 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 824 if (pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag), 825 pd->extoff, pd->dir, reason) != PF_PASS) 826 return (PF_DROP); 827 if (pd->m == NULL) 828 return (PF_PASS); /* packet has been reassembled, no error */ 829 830 no_fragment: 831 return (PF_PASS); 832 } 833 #endif /* INET6 */ 834 835 int 836 pf_normalize_tcp(struct pf_pdesc *pd) 837 { 838 struct tcphdr *th = pd->hdr.tcp; 839 u_short reason; 840 u_int8_t flags; 841 u_int rewrite = 0; 842 843 if (pd->csum_status == PF_CSUM_UNKNOWN) 844 pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off, 845 pd->proto, pd->af); 846 847 flags = th->th_flags; 848 if (flags & TH_SYN) { 849 /* Illegal packet */ 850 if (flags & TH_RST) 851 goto tcp_drop; 852 853 if (flags & TH_FIN) 854 flags &= ~TH_FIN; 855 } else { 856 /* Illegal packet */ 857 if (!(flags & (TH_ACK|TH_RST))) 858 goto tcp_drop; 859 } 860 861 if (!(flags & TH_ACK)) { 862 /* These flags are only valid if ACK is set */ 863 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 864 goto tcp_drop; 865 } 866 867 /* If flags changed, or reserved data set, then adjust */ 868 if (flags != th->th_flags || th->th_x2 != 0) { 869 th->th_flags = flags; 870 th->th_x2 = 0; 871 rewrite = 1; 872 } 873 874 /* Remove urgent pointer, if TH_URG is not set */ 875 if (!(flags & TH_URG) && th->th_urp) { 876 th->th_urp = 0; 877 rewrite = 1; 878 } 879 880 /* copy back packet headers if we sanitized */ 881 if (rewrite) { 882 pf_cksum(pd, pd->m); 883 m_copyback(pd->m, pd->off, sizeof(*th), th, M_NOWAIT); 884 } 885 886 return (PF_PASS); 887 888 tcp_drop: 889 REASON_SET(&reason, PFRES_NORM); 890 return (PF_DROP); 891 } 892 893 int 894 pf_normalize_tcp_init(struct pf_pdesc *pd, struct pf_state_peer *src, 895 struct pf_state_peer *dst) 896 { 897 struct tcphdr *th = pd->hdr.tcp; 898 u_int32_t tsval, tsecr; 899 u_int8_t hdr[60]; 900 u_int8_t *opt; 901 902 KASSERT(src->scrub == NULL); 903 904 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 905 if (src->scrub == NULL) 906 return (1); 907 bzero(src->scrub, sizeof(*src->scrub)); 908 909 switch (pd->af) { 910 #ifdef INET 911 case AF_INET: { 912 struct ip *h = mtod(pd->m, struct ip *); 913 src->scrub->pfss_ttl = h->ip_ttl; 914 break; 915 } 916 #endif /* INET */ 917 #ifdef INET6 918 case AF_INET6: { 919 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 920 src->scrub->pfss_ttl = h->ip6_hlim; 921 break; 922 } 923 #endif /* INET6 */ 924 } 925 926 927 /* 928 * All normalizations below are only begun if we see the start of 929 * the connections. They must all set an enabled bit in pfss_flags 930 */ 931 if ((th->th_flags & TH_SYN) == 0) 932 return (0); 933 934 935 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 936 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 937 pd->af)) { 938 /* Diddle with TCP options */ 939 int hlen; 940 opt = hdr + sizeof(struct tcphdr); 941 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 942 while (hlen >= TCPOLEN_TIMESTAMP) { 943 switch (*opt) { 944 case TCPOPT_EOL: /* FALLTHROUGH */ 945 case TCPOPT_NOP: 946 opt++; 947 hlen--; 948 break; 949 case TCPOPT_TIMESTAMP: 950 if (opt[1] >= TCPOLEN_TIMESTAMP) { 951 src->scrub->pfss_flags |= 952 PFSS_TIMESTAMP; 953 src->scrub->pfss_ts_mod = 954 htonl(arc4random()); 955 956 /* note PFSS_PAWS not set yet */ 957 memcpy(&tsval, &opt[2], 958 sizeof(u_int32_t)); 959 memcpy(&tsecr, &opt[6], 960 sizeof(u_int32_t)); 961 src->scrub->pfss_tsval0 = ntohl(tsval); 962 src->scrub->pfss_tsval = ntohl(tsval); 963 src->scrub->pfss_tsecr = ntohl(tsecr); 964 getmicrouptime(&src->scrub->pfss_last); 965 } 966 /* FALLTHROUGH */ 967 default: 968 hlen -= MAX(opt[1], 2); 969 opt += MAX(opt[1], 2); 970 break; 971 } 972 } 973 } 974 975 return (0); 976 } 977 978 void 979 pf_normalize_tcp_cleanup(struct pf_state *state) 980 { 981 if (state->src.scrub) 982 pool_put(&pf_state_scrub_pl, state->src.scrub); 983 if (state->dst.scrub) 984 pool_put(&pf_state_scrub_pl, state->dst.scrub); 985 986 /* Someday... flush the TCP segment reassembly descriptors. */ 987 } 988 989 int 990 pf_normalize_tcp_stateful(struct pf_pdesc *pd, u_short *reason, 991 struct pf_state *state, struct pf_state_peer *src, 992 struct pf_state_peer *dst, int *writeback) 993 { 994 struct tcphdr *th = pd->hdr.tcp; 995 struct timeval uptime; 996 u_int32_t tsval, tsecr; 997 u_int tsval_from_last; 998 u_int8_t hdr[60]; 999 u_int8_t *opt; 1000 int copyback = 0; 1001 int got_ts = 0; 1002 1003 KASSERT(src->scrub || dst->scrub); 1004 1005 /* 1006 * Enforce the minimum TTL seen for this connection. Negate a common 1007 * technique to evade an intrusion detection system and confuse 1008 * firewall state code. 1009 */ 1010 switch (pd->af) { 1011 #ifdef INET 1012 case AF_INET: { 1013 if (src->scrub) { 1014 struct ip *h = mtod(pd->m, struct ip *); 1015 if (h->ip_ttl > src->scrub->pfss_ttl) 1016 src->scrub->pfss_ttl = h->ip_ttl; 1017 h->ip_ttl = src->scrub->pfss_ttl; 1018 } 1019 break; 1020 } 1021 #endif /* INET */ 1022 #ifdef INET6 1023 case AF_INET6: { 1024 if (src->scrub) { 1025 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 1026 if (h->ip6_hlim > src->scrub->pfss_ttl) 1027 src->scrub->pfss_ttl = h->ip6_hlim; 1028 h->ip6_hlim = src->scrub->pfss_ttl; 1029 } 1030 break; 1031 } 1032 #endif /* INET6 */ 1033 } 1034 1035 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1036 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1037 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1038 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 1039 pd->af)) { 1040 /* Diddle with TCP options */ 1041 int hlen; 1042 opt = hdr + sizeof(struct tcphdr); 1043 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1044 while (hlen >= TCPOLEN_TIMESTAMP) { 1045 switch (*opt) { 1046 case TCPOPT_EOL: /* FALLTHROUGH */ 1047 case TCPOPT_NOP: 1048 opt++; 1049 hlen--; 1050 break; 1051 case TCPOPT_TIMESTAMP: 1052 /* Modulate the timestamps. Can be used for 1053 * NAT detection, OS uptime determination or 1054 * reboot detection. 1055 */ 1056 1057 if (got_ts) { 1058 /* Huh? Multiple timestamps!? */ 1059 if (pf_status.debug >= LOG_NOTICE) { 1060 log(LOG_NOTICE, 1061 "pf: %s: multiple TS??", 1062 __func__); 1063 pf_print_state(state); 1064 addlog("\n"); 1065 } 1066 REASON_SET(reason, PFRES_TS); 1067 return (PF_DROP); 1068 } 1069 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1070 memcpy(&tsval, &opt[2], 1071 sizeof(u_int32_t)); 1072 if (tsval && src->scrub && 1073 (src->scrub->pfss_flags & 1074 PFSS_TIMESTAMP)) { 1075 tsval = ntohl(tsval); 1076 pf_change_a(pd, &opt[2], 1077 htonl(tsval + 1078 src->scrub->pfss_ts_mod)); 1079 copyback = 1; 1080 } 1081 1082 /* Modulate TS reply iff valid (!0) */ 1083 memcpy(&tsecr, &opt[6], 1084 sizeof(u_int32_t)); 1085 if (tsecr && dst->scrub && 1086 (dst->scrub->pfss_flags & 1087 PFSS_TIMESTAMP)) { 1088 tsecr = ntohl(tsecr) 1089 - dst->scrub->pfss_ts_mod; 1090 pf_change_a(pd, &opt[6], 1091 htonl(tsecr)); 1092 copyback = 1; 1093 } 1094 got_ts = 1; 1095 } 1096 /* FALLTHROUGH */ 1097 default: 1098 hlen -= MAX(opt[1], 2); 1099 opt += MAX(opt[1], 2); 1100 break; 1101 } 1102 } 1103 if (copyback) { 1104 /* Copyback the options, caller copys back header */ 1105 *writeback = 1; 1106 m_copyback(pd->m, pd->off + sizeof(struct tcphdr), 1107 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1108 sizeof(struct tcphdr), M_NOWAIT); 1109 } 1110 } 1111 1112 1113 /* 1114 * Must invalidate PAWS checks on connections idle for too long. 1115 * The fastest allowed timestamp clock is 1ms. That turns out to 1116 * be about 24 days before it wraps. XXX Right now our lowerbound 1117 * TS echo check only works for the first 12 days of a connection 1118 * when the TS has exhausted half its 32bit space 1119 */ 1120 #define TS_MAX_IDLE (24*24*60*60) 1121 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1122 1123 getmicrouptime(&uptime); 1124 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1125 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1126 time_uptime - state->creation > TS_MAX_CONN)) { 1127 if (pf_status.debug >= LOG_NOTICE) { 1128 log(LOG_NOTICE, "pf: src idled out of PAWS "); 1129 pf_print_state(state); 1130 addlog("\n"); 1131 } 1132 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1133 | PFSS_PAWS_IDLED; 1134 } 1135 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1136 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1137 if (pf_status.debug >= LOG_NOTICE) { 1138 log(LOG_NOTICE, "pf: dst idled out of PAWS "); 1139 pf_print_state(state); 1140 addlog("\n"); 1141 } 1142 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1143 | PFSS_PAWS_IDLED; 1144 } 1145 1146 if (got_ts && src->scrub && dst->scrub && 1147 (src->scrub->pfss_flags & PFSS_PAWS) && 1148 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1149 /* Validate that the timestamps are "in-window". 1150 * RFC1323 describes TCP Timestamp options that allow 1151 * measurement of RTT (round trip time) and PAWS 1152 * (protection against wrapped sequence numbers). PAWS 1153 * gives us a set of rules for rejecting packets on 1154 * long fat pipes (packets that were somehow delayed 1155 * in transit longer than the time it took to send the 1156 * full TCP sequence space of 4Gb). We can use these 1157 * rules and infer a few others that will let us treat 1158 * the 32bit timestamp and the 32bit echoed timestamp 1159 * as sequence numbers to prevent a blind attacker from 1160 * inserting packets into a connection. 1161 * 1162 * RFC1323 tells us: 1163 * - The timestamp on this packet must be greater than 1164 * or equal to the last value echoed by the other 1165 * endpoint. The RFC says those will be discarded 1166 * since it is a dup that has already been acked. 1167 * This gives us a lowerbound on the timestamp. 1168 * timestamp >= other last echoed timestamp 1169 * - The timestamp will be less than or equal to 1170 * the last timestamp plus the time between the 1171 * last packet and now. The RFC defines the max 1172 * clock rate as 1ms. We will allow clocks to be 1173 * up to 10% fast and will allow a total difference 1174 * or 30 seconds due to a route change. And this 1175 * gives us an upperbound on the timestamp. 1176 * timestamp <= last timestamp + max ticks 1177 * We have to be careful here. Windows will send an 1178 * initial timestamp of zero and then initialize it 1179 * to a random value after the 3whs; presumably to 1180 * avoid a DoS by having to call an expensive RNG 1181 * during a SYN flood. Proof MS has at least one 1182 * good security geek. 1183 * 1184 * - The TCP timestamp option must also echo the other 1185 * endpoints timestamp. The timestamp echoed is the 1186 * one carried on the earliest unacknowledged segment 1187 * on the left edge of the sequence window. The RFC 1188 * states that the host will reject any echoed 1189 * timestamps that were larger than any ever sent. 1190 * This gives us an upperbound on the TS echo. 1191 * tescr <= largest_tsval 1192 * - The lowerbound on the TS echo is a little more 1193 * tricky to determine. The other endpoint's echoed 1194 * values will not decrease. But there may be 1195 * network conditions that re-order packets and 1196 * cause our view of them to decrease. For now the 1197 * only lowerbound we can safely determine is that 1198 * the TS echo will never be less than the original 1199 * TS. XXX There is probably a better lowerbound. 1200 * Remove TS_MAX_CONN with better lowerbound check. 1201 * tescr >= other original TS 1202 * 1203 * It is also important to note that the fastest 1204 * timestamp clock of 1ms will wrap its 32bit space in 1205 * 24 days. So we just disable TS checking after 24 1206 * days of idle time. We actually must use a 12d 1207 * connection limit until we can come up with a better 1208 * lowerbound to the TS echo check. 1209 */ 1210 struct timeval delta_ts; 1211 int ts_fudge; 1212 1213 1214 /* 1215 * PFTM_TS_DIFF is how many seconds of leeway to allow 1216 * a host's timestamp. This can happen if the previous 1217 * packet got delayed in transit for much longer than 1218 * this packet. 1219 */ 1220 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1221 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1222 1223 1224 /* Calculate max ticks since the last timestamp */ 1225 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1226 #define TS_MICROSECS 1000000 /* microseconds per second */ 1227 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1228 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1229 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1230 1231 1232 if ((src->state >= TCPS_ESTABLISHED && 1233 dst->state >= TCPS_ESTABLISHED) && 1234 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1235 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1236 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1237 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1238 /* Bad RFC1323 implementation or an insertion attack. 1239 * 1240 * - Solaris 2.6 and 2.7 are known to send another ACK 1241 * after the FIN,FIN|ACK,ACK closing that carries 1242 * an old timestamp. 1243 */ 1244 1245 DPFPRINTF(LOG_NOTICE, "Timestamp failed %c%c%c%c", 1246 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1247 SEQ_GT(tsval, src->scrub->pfss_tsval + 1248 tsval_from_last) ? '1' : ' ', 1249 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1250 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '); 1251 DPFPRINTF(LOG_NOTICE, 1252 " tsval: %u tsecr: %u +ticks: %u " 1253 "idle: %llu.%06lus", 1254 tsval, tsecr, tsval_from_last, 1255 (long long)delta_ts.tv_sec, delta_ts.tv_usec); 1256 DPFPRINTF(LOG_NOTICE, 1257 " src->tsval: %u tsecr: %u", 1258 src->scrub->pfss_tsval, src->scrub->pfss_tsecr); 1259 DPFPRINTF(LOG_NOTICE, 1260 " dst->tsval: %u tsecr: %u tsval0: %u", 1261 dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, 1262 dst->scrub->pfss_tsval0); 1263 if (pf_status.debug >= LOG_NOTICE) { 1264 log(LOG_NOTICE, "pf: "); 1265 pf_print_state(state); 1266 pf_print_flags(th->th_flags); 1267 addlog("\n"); 1268 } 1269 REASON_SET(reason, PFRES_TS); 1270 return (PF_DROP); 1271 } 1272 1273 /* XXX I'd really like to require tsecr but it's optional */ 1274 1275 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1276 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1277 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1278 src->scrub && dst->scrub && 1279 (src->scrub->pfss_flags & PFSS_PAWS) && 1280 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1281 /* Didn't send a timestamp. Timestamps aren't really useful 1282 * when: 1283 * - connection opening or closing (often not even sent). 1284 * but we must not let an attacker to put a FIN on a 1285 * data packet to sneak it through our ESTABLISHED check. 1286 * - on a TCP reset. RFC suggests not even looking at TS. 1287 * - on an empty ACK. The TS will not be echoed so it will 1288 * probably not help keep the RTT calculation in sync and 1289 * there isn't as much danger when the sequence numbers 1290 * got wrapped. So some stacks don't include TS on empty 1291 * ACKs :-( 1292 * 1293 * To minimize the disruption to mostly RFC1323 conformant 1294 * stacks, we will only require timestamps on data packets. 1295 * 1296 * And what do ya know, we cannot require timestamps on data 1297 * packets. There appear to be devices that do legitimate 1298 * TCP connection hijacking. There are HTTP devices that allow 1299 * a 3whs (with timestamps) and then buffer the HTTP request. 1300 * If the intermediate device has the HTTP response cache, it 1301 * will spoof the response but not bother timestamping its 1302 * packets. So we can look for the presence of a timestamp in 1303 * the first data packet and if there, require it in all future 1304 * packets. 1305 */ 1306 1307 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1308 /* 1309 * Hey! Someone tried to sneak a packet in. Or the 1310 * stack changed its RFC1323 behavior?!?! 1311 */ 1312 if (pf_status.debug >= LOG_NOTICE) { 1313 log(LOG_NOTICE, 1314 "pf: did not receive expected RFC1323 " 1315 "timestamp"); 1316 pf_print_state(state); 1317 pf_print_flags(th->th_flags); 1318 addlog("\n"); 1319 } 1320 REASON_SET(reason, PFRES_TS); 1321 return (PF_DROP); 1322 } 1323 } 1324 1325 1326 /* 1327 * We will note if a host sends his data packets with or without 1328 * timestamps. And require all data packets to contain a timestamp 1329 * if the first does. PAWS implicitly requires that all data packets be 1330 * timestamped. But I think there are middle-man devices that hijack 1331 * TCP streams immediately after the 3whs and don't timestamp their 1332 * packets (seen in a WWW accelerator or cache). 1333 */ 1334 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1335 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1336 if (got_ts) 1337 src->scrub->pfss_flags |= PFSS_DATA_TS; 1338 else { 1339 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1340 if (pf_status.debug >= LOG_NOTICE && dst->scrub && 1341 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1342 /* Don't warn if other host rejected RFC1323 */ 1343 log(LOG_NOTICE, 1344 "pf: broken RFC1323 stack did not " 1345 "timestamp data packet. Disabled PAWS " 1346 "security."); 1347 pf_print_state(state); 1348 pf_print_flags(th->th_flags); 1349 addlog("\n"); 1350 } 1351 } 1352 } 1353 1354 1355 /* 1356 * Update PAWS values 1357 */ 1358 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1359 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1360 getmicrouptime(&src->scrub->pfss_last); 1361 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1362 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1363 src->scrub->pfss_tsval = tsval; 1364 1365 if (tsecr) { 1366 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1367 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1368 src->scrub->pfss_tsecr = tsecr; 1369 1370 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1371 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1372 src->scrub->pfss_tsval0 == 0)) { 1373 /* tsval0 MUST be the lowest timestamp */ 1374 src->scrub->pfss_tsval0 = tsval; 1375 } 1376 1377 /* Only fully initialized after a TS gets echoed */ 1378 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1379 src->scrub->pfss_flags |= PFSS_PAWS; 1380 } 1381 } 1382 1383 /* I have a dream.... TCP segment reassembly.... */ 1384 return (0); 1385 } 1386 1387 int 1388 pf_normalize_mss(struct pf_pdesc *pd, u_int16_t maxmss) 1389 { 1390 struct tcphdr *th = pd->hdr.tcp; 1391 u_int16_t mss; 1392 int thoff; 1393 int opt, cnt, optlen = 0; 1394 u_char opts[MAX_TCPOPTLEN]; 1395 u_char *optp = opts; 1396 1397 if (pd->csum_status == PF_CSUM_UNKNOWN) 1398 pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off, 1399 pd->proto, pd->af); 1400 1401 thoff = th->th_off << 2; 1402 cnt = thoff - sizeof(struct tcphdr); 1403 1404 if (cnt <= 0 || cnt > MAX_TCPOPTLEN || !pf_pull_hdr(pd->m, 1405 pd->off + sizeof(*th), opts, cnt, NULL, NULL, pd->af)) 1406 return (0); 1407 1408 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1409 opt = optp[0]; 1410 if (opt == TCPOPT_EOL) 1411 break; 1412 if (opt == TCPOPT_NOP) 1413 optlen = 1; 1414 else { 1415 if (cnt < 2) 1416 break; 1417 optlen = optp[1]; 1418 if (optlen < 2 || optlen > cnt) 1419 break; 1420 } 1421 switch (opt) { 1422 case TCPOPT_MAXSEG: 1423 memcpy(&mss, (optp + 2), 2); 1424 if (ntohs(mss) > maxmss) { 1425 mss = htons(maxmss); 1426 m_copyback(pd->m, 1427 pd->off + sizeof(*th) + optp + 2 - opts, 1428 2, &mss, M_NOWAIT); 1429 pf_cksum(pd, pd->m); 1430 m_copyback(pd->m, pd->off, sizeof(*th), th, 1431 M_NOWAIT); 1432 } 1433 break; 1434 default: 1435 break; 1436 } 1437 } 1438 1439 return (0); 1440 } 1441 1442 void 1443 pf_scrub(struct mbuf *m, u_int16_t flags, sa_family_t af, u_int8_t min_ttl, 1444 u_int8_t tos) 1445 { 1446 struct ip *h = mtod(m, struct ip *); 1447 #ifdef INET6 1448 struct ip6_hdr *h6 = mtod(m, struct ip6_hdr *); 1449 #endif 1450 1451 /* Clear IP_DF if no-df was requested */ 1452 if (flags & PFSTATE_NODF && af == AF_INET && h->ip_off & htons(IP_DF)) 1453 h->ip_off &= htons(~IP_DF); 1454 1455 /* Enforce a minimum ttl, may cause endless packet loops */ 1456 if (min_ttl && af == AF_INET && h->ip_ttl < min_ttl) 1457 h->ip_ttl = min_ttl; 1458 #ifdef INET6 1459 if (min_ttl && af == AF_INET6 && h6->ip6_hlim < min_ttl) 1460 h6->ip6_hlim = min_ttl; 1461 #endif 1462 1463 /* Enforce tos */ 1464 if (flags & PFSTATE_SETTOS) { 1465 if (af == AF_INET) 1466 h->ip_tos = tos | (h->ip_tos & IPTOS_ECN_MASK); 1467 #ifdef INET6 1468 if (af == AF_INET6) { 1469 /* drugs are unable to explain such idiocy */ 1470 h6->ip6_flow &= ~htonl(0x0fc00000); 1471 h6->ip6_flow |= htonl(((u_int32_t)tos) << 20); 1472 } 1473 #endif 1474 } 1475 1476 /* random-id, but not for fragments */ 1477 if (flags & PFSTATE_RANDOMID && af == AF_INET && 1478 !(h->ip_off & ~htons(IP_DF))) 1479 h->ip_id = htons(ip_randomid()); 1480 } 1481