1 /* $OpenBSD: pf_norm.c,v 1.140 2011/07/18 21:03:10 mikeb Exp $ */ 2 3 /* 4 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 5 * Copyright 2009 Henning Brauer <henning@openbsd.org> 6 * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include "pflog.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/mbuf.h> 35 #include <sys/filio.h> 36 #include <sys/fcntl.h> 37 #include <sys/socket.h> 38 #include <sys/kernel.h> 39 #include <sys/time.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 43 #include <dev/rndvar.h> 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/bpf.h> 47 #include <net/route.h> 48 #include <net/if_pflog.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_var.h> 52 #include <netinet/in_systm.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip_var.h> 55 #include <netinet/tcp.h> 56 #include <netinet/tcp_seq.h> 57 #include <netinet/udp.h> 58 #include <netinet/ip_icmp.h> 59 60 #ifdef INET6 61 #include <netinet/ip6.h> 62 #include <netinet6/ip6_var.h> 63 #endif /* INET6 */ 64 65 #include <net/pfvar.h> 66 67 struct pf_frent { 68 TAILQ_ENTRY(pf_frent) fr_next; 69 struct mbuf *fe_m; 70 u_int16_t fe_hdrlen; /* ipv4 header lenght with ip options 71 ipv6, extension, fragment header */ 72 u_int16_t fe_extoff; /* last extension header offset or 0 */ 73 u_int16_t fe_len; /* fragment length */ 74 u_int16_t fe_off; /* fragment offset */ 75 u_int16_t fe_mff; /* more fragment flag */ 76 }; 77 78 /* keep synced with struct pf_fragment, used in RB_FIND */ 79 struct pf_fragment_cmp { 80 struct pf_addr fr_src; 81 struct pf_addr fr_dst; 82 u_int32_t fr_id; 83 sa_family_t fr_af; 84 u_int8_t fr_proto; 85 u_int8_t fr_direction; 86 }; 87 88 struct pf_fragment { 89 struct pf_addr fr_src; /* ip source address */ 90 struct pf_addr fr_dst; /* ip destination address */ 91 u_int32_t fr_id; /* fragment id for reassemble */ 92 sa_family_t fr_af; /* address family */ 93 u_int8_t fr_proto; /* protocol of this fragment */ 94 u_int8_t fr_direction; /* pf packet direction */ 95 96 RB_ENTRY(pf_fragment) fr_entry; 97 TAILQ_ENTRY(pf_fragment) frag_next; 98 u_int32_t fr_timeout; 99 u_int16_t fr_maxlen; /* maximum length of single fragment */ 100 TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; 101 }; 102 103 struct pf_fragment_tag { 104 u_int16_t ft_hdrlen; /* header lenght of reassembled pkt */ 105 u_int16_t ft_extoff; /* last extension header offset or 0 */ 106 u_int16_t ft_maxlen; /* maximum fragment payload length */ 107 }; 108 109 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 110 111 static __inline int pf_frag_compare(struct pf_fragment *, 112 struct pf_fragment *); 113 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 114 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 115 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 116 117 /* Private prototypes */ 118 void pf_remove_fragment(struct pf_fragment *); 119 void pf_flush_fragments(void); 120 void pf_free_fragment(struct pf_fragment *); 121 struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *, 122 struct pf_frag_tree *); 123 struct pf_frent *pf_create_fragment(u_short *); 124 struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, 125 struct pf_frent *, u_short *); 126 int pf_isfull_fragment(struct pf_fragment *); 127 struct mbuf *pf_join_fragment(struct pf_fragment *); 128 int pf_reassemble(struct mbuf **, int, u_short *); 129 #ifdef INET6 130 int pf_reassemble6(struct mbuf **, struct ip6_frag *, 131 u_int16_t, u_int16_t, int, u_short *); 132 #endif 133 134 /* Globals */ 135 struct pool pf_frent_pl, pf_frag_pl; 136 struct pool pf_state_scrub_pl; 137 int pf_nfrents; 138 139 void 140 pf_normalize_init(void) 141 { 142 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 143 NULL); 144 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 145 NULL); 146 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 147 "pfstscr", NULL); 148 149 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 150 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 151 152 TAILQ_INIT(&pf_fragqueue); 153 } 154 155 static __inline int 156 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 157 { 158 int diff; 159 160 if ((diff = a->fr_id - b->fr_id) != 0) 161 return (diff); 162 if ((diff = a->fr_proto - b->fr_proto) != 0) 163 return (diff); 164 if ((diff = a->fr_af - b->fr_af) != 0) 165 return (diff); 166 if ((diff = pf_addr_compare(&a->fr_src, &b->fr_src, a->fr_af)) != 0) 167 return (diff); 168 if ((diff = pf_addr_compare(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) 169 return (diff); 170 return (0); 171 } 172 173 void 174 pf_purge_expired_fragments(void) 175 { 176 struct pf_fragment *frag; 177 u_int32_t expire = time_second - 178 pf_default_rule.timeout[PFTM_FRAG]; 179 180 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 181 if (frag->fr_timeout > expire) 182 break; 183 184 DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag); 185 pf_free_fragment(frag); 186 } 187 } 188 189 /* 190 * Try to flush old fragments to make space for new ones 191 */ 192 193 void 194 pf_flush_fragments(void) 195 { 196 struct pf_fragment *frag; 197 int goal; 198 199 goal = pf_nfrents * 9 / 10; 200 DPFPRINTF(LOG_NOTICE, "trying to free > %d frents", 201 pf_nfrents - goal); 202 while (goal < pf_nfrents) { 203 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 204 if (frag == NULL) 205 break; 206 pf_free_fragment(frag); 207 } 208 } 209 210 /* Frees the fragments and all associated entries */ 211 212 void 213 pf_free_fragment(struct pf_fragment *frag) 214 { 215 struct pf_frent *frent; 216 217 /* Free all fragments */ 218 for (frent = TAILQ_FIRST(&frag->fr_queue); frent; 219 frent = TAILQ_FIRST(&frag->fr_queue)) { 220 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 221 222 m_freem(frent->fe_m); 223 pool_put(&pf_frent_pl, frent); 224 pf_nfrents--; 225 } 226 227 pf_remove_fragment(frag); 228 } 229 230 struct pf_fragment * 231 pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) 232 { 233 struct pf_fragment *frag; 234 235 frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); 236 if (frag != NULL) { 237 /* XXX Are we sure we want to update the timeout? */ 238 frag->fr_timeout = time_second; 239 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 240 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 241 } 242 243 return (frag); 244 } 245 246 /* Removes a fragment from the fragment queue and frees the fragment */ 247 248 void 249 pf_remove_fragment(struct pf_fragment *frag) 250 { 251 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 252 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 253 pool_put(&pf_frag_pl, frag); 254 } 255 256 struct pf_frent * 257 pf_create_fragment(u_short *reason) 258 { 259 struct pf_frent *frent; 260 261 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 262 if (frent == NULL) { 263 pf_flush_fragments(); 264 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 265 if (frent == NULL) { 266 REASON_SET(reason, PFRES_MEMORY); 267 return (NULL); 268 } 269 } 270 pf_nfrents++; 271 272 return (frent); 273 } 274 275 struct pf_fragment * 276 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, 277 u_short *reason) 278 { 279 struct pf_frent *after, *next, *prev; 280 struct pf_fragment *frag; 281 u_int16_t total; 282 283 /* No empty fragments */ 284 if (frent->fe_len == 0) { 285 DPFPRINTF(LOG_NOTICE, "bad fragment: len 0"); 286 goto bad_fragment; 287 } 288 289 /* All fragments are 8 byte aligned */ 290 if (frent->fe_mff && (frent->fe_len & 0x7)) { 291 DPFPRINTF(LOG_NOTICE, "bad fragment: mff and len %d", 292 frent->fe_len); 293 goto bad_fragment; 294 } 295 296 /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET */ 297 if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { 298 DPFPRINTF(LOG_NOTICE, "bad fragment: max packet %d", 299 frent->fe_off + frent->fe_len); 300 goto bad_fragment; 301 } 302 303 DPFPRINTF(LOG_NOTICE, key->fr_af == AF_INET ? 304 "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d", 305 key->fr_id, frent->fe_off, frent->fe_off + frent->fe_len); 306 307 /* Fully buffer all of the fragments in this fragment queue */ 308 frag = pf_find_fragment(key, &pf_frag_tree); 309 310 /* Create a new reassembly queue for this packet */ 311 if (frag == NULL) { 312 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 313 if (frag == NULL) { 314 pf_flush_fragments(); 315 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 316 if (frag == NULL) { 317 REASON_SET(reason, PFRES_MEMORY); 318 goto drop_fragment; 319 } 320 } 321 322 *(struct pf_fragment_cmp *)frag = *key; 323 frag->fr_timeout = time_second; 324 frag->fr_maxlen = frent->fe_len; 325 TAILQ_INIT(&frag->fr_queue); 326 327 RB_INSERT(pf_frag_tree, &pf_frag_tree, frag); 328 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 329 330 /* We do not have a previous fragment */ 331 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 332 333 return (frag); 334 } 335 336 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 337 338 /* Remember maximum fragment len for refragmentation */ 339 if (frent->fe_len > frag->fr_maxlen) 340 frag->fr_maxlen = frent->fe_len; 341 342 /* Maximum data we have seen already */ 343 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 344 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 345 346 /* Non terminal fragments must have more fragments flag */ 347 if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) 348 goto bad_fragment; 349 350 /* Check if we saw the last fragment already */ 351 if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { 352 if (frent->fe_off + frent->fe_len > total || 353 (frent->fe_off + frent->fe_len == total && frent->fe_mff)) 354 goto bad_fragment; 355 } else { 356 if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) 357 goto bad_fragment; 358 } 359 360 /* Find a fragment after the current one */ 361 prev = NULL; 362 TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { 363 if (after->fe_off > frent->fe_off) 364 break; 365 prev = after; 366 } 367 368 KASSERT(prev != NULL || after != NULL); 369 370 if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { 371 u_int16_t precut; 372 373 precut = prev->fe_off + prev->fe_len - frent->fe_off; 374 if (precut >= frent->fe_len) 375 goto bad_fragment; 376 DPFPRINTF(LOG_NOTICE, "overlap -%d", precut); 377 m_adj(frent->fe_m, precut); 378 frent->fe_off += precut; 379 frent->fe_len -= precut; 380 } 381 382 for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; 383 after = next) 384 { 385 u_int16_t aftercut; 386 387 aftercut = frent->fe_off + frent->fe_len - after->fe_off; 388 DPFPRINTF(LOG_NOTICE, "adjust overlap %d", aftercut); 389 if (aftercut < after->fe_len) { 390 m_adj(after->fe_m, aftercut); 391 after->fe_off += aftercut; 392 after->fe_len -= aftercut; 393 break; 394 } 395 396 /* This fragment is completely overlapped, lose it */ 397 next = TAILQ_NEXT(after, fr_next); 398 m_freem(after->fe_m); 399 TAILQ_REMOVE(&frag->fr_queue, after, fr_next); 400 pool_put(&pf_frent_pl, after); 401 pf_nfrents--; 402 } 403 404 if (prev == NULL) 405 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 406 else 407 TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); 408 409 return (frag); 410 411 bad_fragment: 412 REASON_SET(reason, PFRES_FRAG); 413 drop_fragment: 414 pool_put(&pf_frent_pl, frent); 415 pf_nfrents--; 416 return (NULL); 417 } 418 419 int 420 pf_isfull_fragment(struct pf_fragment *frag) 421 { 422 struct pf_frent *frent, *next; 423 u_int16_t off, total; 424 425 /* Check if we are completely reassembled */ 426 if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) 427 return (0); 428 429 /* Maximum data we have seen already */ 430 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 431 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 432 433 /* Check if we have all the data */ 434 off = 0; 435 for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) { 436 next = TAILQ_NEXT(frent, fr_next); 437 438 off += frent->fe_len; 439 if (off < total && (next == NULL || next->fe_off != off)) { 440 DPFPRINTF(LOG_NOTICE, 441 "missing fragment at %d, next %d, total %d", 442 off, next == NULL ? -1 : next->fe_off, total); 443 return (0); 444 } 445 } 446 DPFPRINTF(LOG_NOTICE, "%d < %d?", off, total); 447 if (off < total) 448 return (0); 449 KASSERT(off == total); 450 451 return (1); 452 } 453 454 struct mbuf * 455 pf_join_fragment(struct pf_fragment *frag) 456 { 457 struct mbuf *m, *m2; 458 struct pf_frent *frent, *next; 459 460 frent = TAILQ_FIRST(&frag->fr_queue); 461 next = TAILQ_NEXT(frent, fr_next); 462 463 /* Magic from ip_input */ 464 m = frent->fe_m; 465 m2 = m->m_next; 466 m->m_next = NULL; 467 m_cat(m, m2); 468 pool_put(&pf_frent_pl, frent); 469 pf_nfrents--; 470 for (frent = next; frent != NULL; frent = next) { 471 next = TAILQ_NEXT(frent, fr_next); 472 473 m2 = frent->fe_m; 474 /* Strip off ip header */ 475 m_adj(m2, frent->fe_hdrlen); 476 pool_put(&pf_frent_pl, frent); 477 pf_nfrents--; 478 m_cat(m, m2); 479 } 480 481 /* Remove from fragment queue */ 482 pf_remove_fragment(frag); 483 484 return (m); 485 } 486 487 int 488 pf_reassemble(struct mbuf **m0, int dir, u_short *reason) 489 { 490 struct mbuf *m = *m0; 491 struct ip *ip = mtod(m, struct ip *); 492 struct pf_frent *frent; 493 struct pf_fragment *frag; 494 struct pf_fragment_cmp key; 495 u_int16_t total, hdrlen; 496 497 /* Get an entry for the fragment queue */ 498 if ((frent = pf_create_fragment(reason)) == NULL) 499 return (PF_DROP); 500 501 frent->fe_m = m; 502 frent->fe_hdrlen = ip->ip_hl << 2; 503 frent->fe_extoff = 0; 504 frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 505 frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 506 frent->fe_mff = ntohs(ip->ip_off) & IP_MF; 507 508 key.fr_src.v4 = ip->ip_src; 509 key.fr_dst.v4 = ip->ip_dst; 510 key.fr_af = AF_INET; 511 key.fr_proto = ip->ip_p; 512 key.fr_id = ip->ip_id; 513 key.fr_direction = dir; 514 515 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 516 return (PF_DROP); 517 518 /* The mbuf is part of the fragment entry, no direct free or access */ 519 m = *m0 = NULL; 520 521 if (!pf_isfull_fragment(frag)) 522 return (PF_PASS); /* drop because *m0 is NULL, no error */ 523 524 /* We have all the data */ 525 frent = TAILQ_FIRST(&frag->fr_queue); 526 KASSERT(frent != NULL); 527 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 528 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 529 hdrlen = frent->fe_hdrlen; 530 531 m = *m0 = pf_join_fragment(frag); 532 frag = NULL; 533 534 if (m->m_flags & M_PKTHDR) { 535 int plen = 0; 536 for (m = *m0; m; m = m->m_next) 537 plen += m->m_len; 538 m = *m0; 539 m->m_pkthdr.len = plen; 540 } 541 542 ip = mtod(m, struct ip *); 543 ip->ip_len = htons(hdrlen + total); 544 ip->ip_off &= ~(IP_MF|IP_OFFMASK); 545 546 if (hdrlen + total > IP_MAXPACKET) { 547 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 548 ip->ip_len = 0; 549 REASON_SET(reason, PFRES_SHORT); 550 /* PF_DROP requires a valid mbuf *m0 in pf_test() */ 551 return (PF_DROP); 552 } 553 554 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip->ip_len)); 555 return (PF_PASS); 556 } 557 558 #ifdef INET6 559 int 560 pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, 561 u_int16_t hdrlen, u_int16_t extoff, int dir, u_short *reason) 562 { 563 struct mbuf *m = *m0; 564 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 565 struct m_tag *mtag; 566 struct pf_fragment_tag *ftag; 567 struct pf_frent *frent; 568 struct pf_fragment *frag; 569 struct pf_fragment_cmp key; 570 int off; 571 u_int16_t total, maxlen; 572 u_int8_t proto; 573 574 /* Get an entry for the fragment queue */ 575 if ((frent = pf_create_fragment(reason)) == NULL) 576 return (PF_DROP); 577 578 frent->fe_m = m; 579 frent->fe_hdrlen = hdrlen; 580 frent->fe_extoff = extoff; 581 frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; 582 frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 583 frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; 584 585 key.fr_src.v6 = ip6->ip6_src; 586 key.fr_dst.v6 = ip6->ip6_dst; 587 key.fr_af = AF_INET6; 588 /* Only the first fragment's protocol is relevant */ 589 key.fr_proto = 0; 590 key.fr_id = fraghdr->ip6f_ident; 591 key.fr_direction = dir; 592 593 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 594 return (PF_DROP); 595 596 /* The mbuf is part of the fragment entry, no direct free or access */ 597 m = *m0 = NULL; 598 599 if (!pf_isfull_fragment(frag)) 600 return (PF_PASS); /* drop because *m0 is NULL, no error */ 601 602 /* We have all the data */ 603 extoff = frent->fe_extoff; 604 maxlen = frag->fr_maxlen; 605 frent = TAILQ_FIRST(&frag->fr_queue); 606 KASSERT(frent != NULL); 607 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 608 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 609 hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); 610 611 m = *m0 = pf_join_fragment(frag); 612 frag = NULL; 613 614 /* Take protocol from first fragment header */ 615 if ((m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), 616 &off)) == NULL) 617 panic("pf_reassemble6: short mbuf chain"); 618 proto = *(mtod(m, caddr_t) + off); 619 m = *m0; 620 621 /* Delete frag6 header */ 622 if (frag6_deletefraghdr(m, hdrlen) != 0) 623 goto fail; 624 625 if (m->m_flags & M_PKTHDR) { 626 int plen = 0; 627 for (m = *m0; m; m = m->m_next) 628 plen += m->m_len; 629 m = *m0; 630 m->m_pkthdr.len = plen; 631 } 632 633 if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct 634 pf_fragment_tag), M_NOWAIT)) == NULL) 635 goto fail; 636 ftag = (struct pf_fragment_tag *)(mtag + 1); 637 ftag->ft_hdrlen = hdrlen; 638 ftag->ft_extoff = extoff; 639 ftag->ft_maxlen = maxlen; 640 m_tag_prepend(m, mtag); 641 642 ip6 = mtod(m, struct ip6_hdr *); 643 ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); 644 if (extoff) { 645 /* Write protocol into next field of last extension header */ 646 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 647 ip6e_nxt), &off)) == NULL) 648 panic("pf_reassemble6: short mbuf chain"); 649 *(mtod(m, caddr_t) + off) = proto; 650 m = *m0; 651 } else 652 ip6->ip6_nxt = proto; 653 654 if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { 655 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 656 ip6->ip6_plen = 0; 657 REASON_SET(reason, PFRES_SHORT); 658 /* PF_DROP requires a valid mbuf *m0 in pf_test6() */ 659 return (PF_DROP); 660 } 661 662 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip6->ip6_plen)); 663 return (PF_PASS); 664 665 fail: 666 REASON_SET(reason, PFRES_MEMORY); 667 /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */ 668 return (PF_DROP); 669 } 670 671 int 672 pf_refragment6(struct mbuf **m0, struct m_tag *mtag, int dir) 673 { 674 struct mbuf *m = *m0, *t; 675 struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); 676 u_int32_t mtu; 677 u_int16_t hdrlen, extoff, maxlen; 678 u_int8_t proto; 679 int error, action; 680 681 hdrlen = ftag->ft_hdrlen; 682 extoff = ftag->ft_extoff; 683 maxlen = ftag->ft_maxlen; 684 m_tag_delete(m, mtag); 685 mtag = NULL; 686 ftag = NULL; 687 688 if (extoff) { 689 int off; 690 691 /* Use protocol from next field of last extension header */ 692 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 693 ip6e_nxt), &off)) == NULL) 694 panic("pf_refragment6: short mbuf chain"); 695 proto = *(mtod(m, caddr_t) + off); 696 *(mtod(m, caddr_t) + off) = IPPROTO_FRAGMENT; 697 m = *m0; 698 } else { 699 struct ip6_hdr *hdr; 700 701 hdr = mtod(m, struct ip6_hdr *); 702 proto = hdr->ip6_nxt; 703 hdr->ip6_nxt = IPPROTO_FRAGMENT; 704 } 705 706 /* 707 * Maxlen may be less than 8 iff there was only a single 708 * fragment. As it was fragmented before, add a fragment 709 * header also for a single fragment. If total or maxlen 710 * is less than 8, ip6_fragment() will return EMSGSIZE and 711 * we drop the packet. 712 */ 713 714 mtu = hdrlen + sizeof(struct ip6_frag) + maxlen; 715 error = ip6_fragment(m, hdrlen, proto, mtu); 716 717 m = (*m0)->m_nextpkt; 718 (*m0)->m_nextpkt = NULL; 719 if (error == 0) { 720 /* The first mbuf contains the unfragmented packet */ 721 m_freem(*m0); 722 *m0 = NULL; 723 action = PF_PASS; 724 } else { 725 /* Drop expects an mbuf to free */ 726 DPFPRINTF(LOG_NOTICE, "refragment error %d", error); 727 action = PF_DROP; 728 } 729 for (t = m; m; m = t) { 730 t = m->m_nextpkt; 731 m->m_nextpkt = NULL; 732 m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; 733 if (error == 0) 734 ip6_forward(m, 0); 735 else 736 m_freem(m); 737 } 738 739 return (action); 740 } 741 #endif /* INET6 */ 742 743 int 744 pf_normalize_ip(struct mbuf **m0, int dir, u_short *reason) 745 { 746 struct mbuf *m = *m0; 747 struct ip *h = mtod(m, struct ip *); 748 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 749 u_int16_t mff = (ntohs(h->ip_off) & IP_MF); 750 751 /* We will need other tests here */ 752 if (!fragoff && !mff) 753 goto no_fragment; 754 755 /* Clear IP_DF if we're in no-df mode */ 756 if (pf_status.reass & PF_REASS_NODF && h->ip_off & htons(IP_DF)) 757 h->ip_off &= htons(~IP_DF); 758 759 /* We're dealing with a fragment now. Don't allow fragments 760 * with IP_DF to enter the cache. If the flag was cleared by 761 * no-df above, fine. Otherwise drop it. 762 */ 763 if (h->ip_off & htons(IP_DF)) { 764 DPFPRINTF(LOG_NOTICE, "bad fragment: IP_DF"); 765 REASON_SET(reason, PFRES_FRAG); 766 return (PF_DROP); 767 } 768 769 if (!pf_status.reass) 770 return (PF_PASS); /* no reassembly */ 771 772 /* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf */ 773 if (pf_reassemble(m0, dir, reason) != PF_PASS) 774 return (PF_DROP); 775 m = *m0; 776 if (m == NULL) 777 return (PF_PASS); /* packet has been reassembled, no error */ 778 779 h = mtod(m, struct ip *); 780 781 no_fragment: 782 /* At this point, only IP_DF is allowed in ip_off */ 783 if (h->ip_off & ~htons(IP_DF)) 784 h->ip_off &= htons(IP_DF); 785 786 return (PF_PASS); 787 } 788 789 #ifdef INET6 790 int 791 pf_normalize_ip6(struct mbuf **m0, int dir, int off, int extoff, 792 u_short *reason) 793 { 794 struct mbuf *m = *m0; 795 struct ip6_frag frag; 796 797 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, reason, AF_INET6)) 798 return (PF_DROP); 799 /* offset now points to data portion */ 800 off += sizeof(frag); 801 802 /* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf */ 803 if (pf_reassemble6(m0, &frag, off, extoff, dir, reason) != PF_PASS) 804 return (PF_DROP); 805 806 return (PF_PASS); 807 } 808 #endif /* INET6 */ 809 810 int 811 pf_normalize_tcp(int dir, struct mbuf *m, int off, struct pf_pdesc *pd) 812 { 813 struct tcphdr *th = pd->hdr.tcp; 814 u_short reason; 815 u_int8_t flags; 816 u_int rewrite = 0; 817 818 flags = th->th_flags; 819 if (flags & TH_SYN) { 820 /* Illegal packet */ 821 if (flags & TH_RST) 822 goto tcp_drop; 823 824 if (flags & TH_FIN) 825 flags &= ~TH_FIN; 826 } else { 827 /* Illegal packet */ 828 if (!(flags & (TH_ACK|TH_RST))) 829 goto tcp_drop; 830 } 831 832 if (!(flags & TH_ACK)) { 833 /* These flags are only valid if ACK is set */ 834 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 835 goto tcp_drop; 836 } 837 838 /* Check for illegal header length */ 839 if (th->th_off < (sizeof(struct tcphdr) >> 2)) 840 goto tcp_drop; 841 842 /* If flags changed, or reserved data set, then adjust */ 843 if (flags != th->th_flags || th->th_x2 != 0) { 844 u_int16_t ov, nv; 845 846 ov = *(u_int16_t *)(&th->th_ack + 1); 847 th->th_flags = flags; 848 th->th_x2 = 0; 849 nv = *(u_int16_t *)(&th->th_ack + 1); 850 851 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 852 rewrite = 1; 853 } 854 855 /* Remove urgent pointer, if TH_URG is not set */ 856 if (!(flags & TH_URG) && th->th_urp) { 857 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 858 th->th_urp = 0; 859 rewrite = 1; 860 } 861 862 /* copy back packet headers if we sanitized */ 863 if (rewrite) 864 m_copyback(m, off, sizeof(*th), th, M_NOWAIT); 865 866 return (PF_PASS); 867 868 tcp_drop: 869 REASON_SET(&reason, PFRES_NORM); 870 return (PF_DROP); 871 } 872 873 int 874 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, 875 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) 876 { 877 u_int32_t tsval, tsecr; 878 u_int8_t hdr[60]; 879 u_int8_t *opt; 880 881 KASSERT(src->scrub == NULL); 882 883 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 884 if (src->scrub == NULL) 885 return (1); 886 bzero(src->scrub, sizeof(*src->scrub)); 887 888 switch (pd->af) { 889 #ifdef INET 890 case AF_INET: { 891 struct ip *h = mtod(m, struct ip *); 892 src->scrub->pfss_ttl = h->ip_ttl; 893 break; 894 } 895 #endif /* INET */ 896 #ifdef INET6 897 case AF_INET6: { 898 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 899 src->scrub->pfss_ttl = h->ip6_hlim; 900 break; 901 } 902 #endif /* INET6 */ 903 } 904 905 906 /* 907 * All normalizations below are only begun if we see the start of 908 * the connections. They must all set an enabled bit in pfss_flags 909 */ 910 if ((th->th_flags & TH_SYN) == 0) 911 return (0); 912 913 914 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 915 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 916 /* Diddle with TCP options */ 917 int hlen; 918 opt = hdr + sizeof(struct tcphdr); 919 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 920 while (hlen >= TCPOLEN_TIMESTAMP) { 921 switch (*opt) { 922 case TCPOPT_EOL: /* FALLTHROUGH */ 923 case TCPOPT_NOP: 924 opt++; 925 hlen--; 926 break; 927 case TCPOPT_TIMESTAMP: 928 if (opt[1] >= TCPOLEN_TIMESTAMP) { 929 src->scrub->pfss_flags |= 930 PFSS_TIMESTAMP; 931 src->scrub->pfss_ts_mod = 932 htonl(arc4random()); 933 934 /* note PFSS_PAWS not set yet */ 935 memcpy(&tsval, &opt[2], 936 sizeof(u_int32_t)); 937 memcpy(&tsecr, &opt[6], 938 sizeof(u_int32_t)); 939 src->scrub->pfss_tsval0 = ntohl(tsval); 940 src->scrub->pfss_tsval = ntohl(tsval); 941 src->scrub->pfss_tsecr = ntohl(tsecr); 942 getmicrouptime(&src->scrub->pfss_last); 943 } 944 /* FALLTHROUGH */ 945 default: 946 hlen -= MAX(opt[1], 2); 947 opt += MAX(opt[1], 2); 948 break; 949 } 950 } 951 } 952 953 return (0); 954 } 955 956 void 957 pf_normalize_tcp_cleanup(struct pf_state *state) 958 { 959 if (state->src.scrub) 960 pool_put(&pf_state_scrub_pl, state->src.scrub); 961 if (state->dst.scrub) 962 pool_put(&pf_state_scrub_pl, state->dst.scrub); 963 964 /* Someday... flush the TCP segment reassembly descriptors. */ 965 } 966 967 int 968 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, 969 u_short *reason, struct tcphdr *th, struct pf_state *state, 970 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 971 { 972 struct timeval uptime; 973 u_int32_t tsval, tsecr; 974 u_int tsval_from_last; 975 u_int8_t hdr[60]; 976 u_int8_t *opt; 977 int copyback = 0; 978 int got_ts = 0; 979 980 KASSERT(src->scrub || dst->scrub); 981 982 /* 983 * Enforce the minimum TTL seen for this connection. Negate a common 984 * technique to evade an intrusion detection system and confuse 985 * firewall state code. 986 */ 987 switch (pd->af) { 988 #ifdef INET 989 case AF_INET: { 990 if (src->scrub) { 991 struct ip *h = mtod(m, struct ip *); 992 if (h->ip_ttl > src->scrub->pfss_ttl) 993 src->scrub->pfss_ttl = h->ip_ttl; 994 h->ip_ttl = src->scrub->pfss_ttl; 995 } 996 break; 997 } 998 #endif /* INET */ 999 #ifdef INET6 1000 case AF_INET6: { 1001 if (src->scrub) { 1002 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1003 if (h->ip6_hlim > src->scrub->pfss_ttl) 1004 src->scrub->pfss_ttl = h->ip6_hlim; 1005 h->ip6_hlim = src->scrub->pfss_ttl; 1006 } 1007 break; 1008 } 1009 #endif /* INET6 */ 1010 } 1011 1012 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1013 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1014 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1015 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 1016 /* Diddle with TCP options */ 1017 int hlen; 1018 opt = hdr + sizeof(struct tcphdr); 1019 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1020 while (hlen >= TCPOLEN_TIMESTAMP) { 1021 switch (*opt) { 1022 case TCPOPT_EOL: /* FALLTHROUGH */ 1023 case TCPOPT_NOP: 1024 opt++; 1025 hlen--; 1026 break; 1027 case TCPOPT_TIMESTAMP: 1028 /* Modulate the timestamps. Can be used for 1029 * NAT detection, OS uptime determination or 1030 * reboot detection. 1031 */ 1032 1033 if (got_ts) { 1034 /* Huh? Multiple timestamps!? */ 1035 if (pf_status.debug >= LOG_NOTICE) { 1036 log(LOG_NOTICE, 1037 "pf: %s: multiple TS??", 1038 __func__); 1039 pf_print_state(state); 1040 addlog("\n"); 1041 } 1042 REASON_SET(reason, PFRES_TS); 1043 return (PF_DROP); 1044 } 1045 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1046 memcpy(&tsval, &opt[2], 1047 sizeof(u_int32_t)); 1048 if (tsval && src->scrub && 1049 (src->scrub->pfss_flags & 1050 PFSS_TIMESTAMP)) { 1051 tsval = ntohl(tsval); 1052 pf_change_a(&opt[2], 1053 &th->th_sum, 1054 htonl(tsval + 1055 src->scrub->pfss_ts_mod), 1056 0); 1057 copyback = 1; 1058 } 1059 1060 /* Modulate TS reply iff valid (!0) */ 1061 memcpy(&tsecr, &opt[6], 1062 sizeof(u_int32_t)); 1063 if (tsecr && dst->scrub && 1064 (dst->scrub->pfss_flags & 1065 PFSS_TIMESTAMP)) { 1066 tsecr = ntohl(tsecr) 1067 - dst->scrub->pfss_ts_mod; 1068 pf_change_a(&opt[6], 1069 &th->th_sum, htonl(tsecr), 1070 0); 1071 copyback = 1; 1072 } 1073 got_ts = 1; 1074 } 1075 /* FALLTHROUGH */ 1076 default: 1077 hlen -= MAX(opt[1], 2); 1078 opt += MAX(opt[1], 2); 1079 break; 1080 } 1081 } 1082 if (copyback) { 1083 /* Copyback the options, caller copys back header */ 1084 *writeback = 1; 1085 m_copyback(m, off + sizeof(struct tcphdr), 1086 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1087 sizeof(struct tcphdr), M_NOWAIT); 1088 } 1089 } 1090 1091 1092 /* 1093 * Must invalidate PAWS checks on connections idle for too long. 1094 * The fastest allowed timestamp clock is 1ms. That turns out to 1095 * be about 24 days before it wraps. XXX Right now our lowerbound 1096 * TS echo check only works for the first 12 days of a connection 1097 * when the TS has exhausted half its 32bit space 1098 */ 1099 #define TS_MAX_IDLE (24*24*60*60) 1100 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1101 1102 getmicrouptime(&uptime); 1103 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1104 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1105 time_second - state->creation > TS_MAX_CONN)) { 1106 if (pf_status.debug >= LOG_NOTICE) { 1107 log(LOG_NOTICE, "pf: src idled out of PAWS "); 1108 pf_print_state(state); 1109 addlog("\n"); 1110 } 1111 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1112 | PFSS_PAWS_IDLED; 1113 } 1114 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1115 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1116 if (pf_status.debug >= LOG_NOTICE) { 1117 log(LOG_NOTICE, "pf: dst idled out of PAWS "); 1118 pf_print_state(state); 1119 addlog("\n"); 1120 } 1121 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1122 | PFSS_PAWS_IDLED; 1123 } 1124 1125 if (got_ts && src->scrub && dst->scrub && 1126 (src->scrub->pfss_flags & PFSS_PAWS) && 1127 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1128 /* Validate that the timestamps are "in-window". 1129 * RFC1323 describes TCP Timestamp options that allow 1130 * measurement of RTT (round trip time) and PAWS 1131 * (protection against wrapped sequence numbers). PAWS 1132 * gives us a set of rules for rejecting packets on 1133 * long fat pipes (packets that were somehow delayed 1134 * in transit longer than the time it took to send the 1135 * full TCP sequence space of 4Gb). We can use these 1136 * rules and infer a few others that will let us treat 1137 * the 32bit timestamp and the 32bit echoed timestamp 1138 * as sequence numbers to prevent a blind attacker from 1139 * inserting packets into a connection. 1140 * 1141 * RFC1323 tells us: 1142 * - The timestamp on this packet must be greater than 1143 * or equal to the last value echoed by the other 1144 * endpoint. The RFC says those will be discarded 1145 * since it is a dup that has already been acked. 1146 * This gives us a lowerbound on the timestamp. 1147 * timestamp >= other last echoed timestamp 1148 * - The timestamp will be less than or equal to 1149 * the last timestamp plus the time between the 1150 * last packet and now. The RFC defines the max 1151 * clock rate as 1ms. We will allow clocks to be 1152 * up to 10% fast and will allow a total difference 1153 * or 30 seconds due to a route change. And this 1154 * gives us an upperbound on the timestamp. 1155 * timestamp <= last timestamp + max ticks 1156 * We have to be careful here. Windows will send an 1157 * initial timestamp of zero and then initialize it 1158 * to a random value after the 3whs; presumably to 1159 * avoid a DoS by having to call an expensive RNG 1160 * during a SYN flood. Proof MS has at least one 1161 * good security geek. 1162 * 1163 * - The TCP timestamp option must also echo the other 1164 * endpoints timestamp. The timestamp echoed is the 1165 * one carried on the earliest unacknowledged segment 1166 * on the left edge of the sequence window. The RFC 1167 * states that the host will reject any echoed 1168 * timestamps that were larger than any ever sent. 1169 * This gives us an upperbound on the TS echo. 1170 * tescr <= largest_tsval 1171 * - The lowerbound on the TS echo is a little more 1172 * tricky to determine. The other endpoint's echoed 1173 * values will not decrease. But there may be 1174 * network conditions that re-order packets and 1175 * cause our view of them to decrease. For now the 1176 * only lowerbound we can safely determine is that 1177 * the TS echo will never be less than the original 1178 * TS. XXX There is probably a better lowerbound. 1179 * Remove TS_MAX_CONN with better lowerbound check. 1180 * tescr >= other original TS 1181 * 1182 * It is also important to note that the fastest 1183 * timestamp clock of 1ms will wrap its 32bit space in 1184 * 24 days. So we just disable TS checking after 24 1185 * days of idle time. We actually must use a 12d 1186 * connection limit until we can come up with a better 1187 * lowerbound to the TS echo check. 1188 */ 1189 struct timeval delta_ts; 1190 int ts_fudge; 1191 1192 1193 /* 1194 * PFTM_TS_DIFF is how many seconds of leeway to allow 1195 * a host's timestamp. This can happen if the previous 1196 * packet got delayed in transit for much longer than 1197 * this packet. 1198 */ 1199 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1200 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1201 1202 1203 /* Calculate max ticks since the last timestamp */ 1204 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1205 #define TS_MICROSECS 1000000 /* microseconds per second */ 1206 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1207 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1208 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1209 1210 1211 if ((src->state >= TCPS_ESTABLISHED && 1212 dst->state >= TCPS_ESTABLISHED) && 1213 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1214 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1215 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1216 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1217 /* Bad RFC1323 implementation or an insertion attack. 1218 * 1219 * - Solaris 2.6 and 2.7 are known to send another ACK 1220 * after the FIN,FIN|ACK,ACK closing that carries 1221 * an old timestamp. 1222 */ 1223 1224 DPFPRINTF(LOG_NOTICE, "Timestamp failed %c%c%c%c", 1225 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1226 SEQ_GT(tsval, src->scrub->pfss_tsval + 1227 tsval_from_last) ? '1' : ' ', 1228 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1229 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '); 1230 DPFPRINTF(LOG_NOTICE, 1231 " tsval: %lu tsecr: %lu +ticks: %lu " 1232 "idle: %lus %lums", 1233 tsval, tsecr, tsval_from_last, delta_ts.tv_sec, 1234 delta_ts.tv_usec / 1000); 1235 DPFPRINTF(LOG_NOTICE, 1236 " src->tsval: %lu tsecr: %lu", 1237 src->scrub->pfss_tsval, src->scrub->pfss_tsecr); 1238 DPFPRINTF(LOG_NOTICE, 1239 " dst->tsval: %lu tsecr: %lu tsval0: %lu", 1240 dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, 1241 dst->scrub->pfss_tsval0); 1242 if (pf_status.debug >= LOG_NOTICE) { 1243 log(LOG_NOTICE, "pf: "); 1244 pf_print_state(state); 1245 pf_print_flags(th->th_flags); 1246 addlog("\n"); 1247 } 1248 REASON_SET(reason, PFRES_TS); 1249 return (PF_DROP); 1250 } 1251 1252 /* XXX I'd really like to require tsecr but it's optional */ 1253 1254 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1255 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1256 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1257 src->scrub && dst->scrub && 1258 (src->scrub->pfss_flags & PFSS_PAWS) && 1259 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1260 /* Didn't send a timestamp. Timestamps aren't really useful 1261 * when: 1262 * - connection opening or closing (often not even sent). 1263 * but we must not let an attacker to put a FIN on a 1264 * data packet to sneak it through our ESTABLISHED check. 1265 * - on a TCP reset. RFC suggests not even looking at TS. 1266 * - on an empty ACK. The TS will not be echoed so it will 1267 * probably not help keep the RTT calculation in sync and 1268 * there isn't as much danger when the sequence numbers 1269 * got wrapped. So some stacks don't include TS on empty 1270 * ACKs :-( 1271 * 1272 * To minimize the disruption to mostly RFC1323 conformant 1273 * stacks, we will only require timestamps on data packets. 1274 * 1275 * And what do ya know, we cannot require timestamps on data 1276 * packets. There appear to be devices that do legitimate 1277 * TCP connection hijacking. There are HTTP devices that allow 1278 * a 3whs (with timestamps) and then buffer the HTTP request. 1279 * If the intermediate device has the HTTP response cache, it 1280 * will spoof the response but not bother timestamping its 1281 * packets. So we can look for the presence of a timestamp in 1282 * the first data packet and if there, require it in all future 1283 * packets. 1284 */ 1285 1286 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1287 /* 1288 * Hey! Someone tried to sneak a packet in. Or the 1289 * stack changed its RFC1323 behavior?!?! 1290 */ 1291 if (pf_status.debug >= LOG_NOTICE) { 1292 log(LOG_NOTICE, 1293 "pf: did not receive expected RFC1323 " 1294 "timestamp"); 1295 pf_print_state(state); 1296 pf_print_flags(th->th_flags); 1297 addlog("\n"); 1298 } 1299 REASON_SET(reason, PFRES_TS); 1300 return (PF_DROP); 1301 } 1302 } 1303 1304 1305 /* 1306 * We will note if a host sends his data packets with or without 1307 * timestamps. And require all data packets to contain a timestamp 1308 * if the first does. PAWS implicitly requires that all data packets be 1309 * timestamped. But I think there are middle-man devices that hijack 1310 * TCP streams immediately after the 3whs and don't timestamp their 1311 * packets (seen in a WWW accelerator or cache). 1312 */ 1313 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1314 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1315 if (got_ts) 1316 src->scrub->pfss_flags |= PFSS_DATA_TS; 1317 else { 1318 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1319 if (pf_status.debug >= LOG_NOTICE && dst->scrub && 1320 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1321 /* Don't warn if other host rejected RFC1323 */ 1322 log(LOG_NOTICE, 1323 "pf: broken RFC1323 stack did not " 1324 "timestamp data packet. Disabled PAWS " 1325 "security."); 1326 pf_print_state(state); 1327 pf_print_flags(th->th_flags); 1328 addlog("\n"); 1329 } 1330 } 1331 } 1332 1333 1334 /* 1335 * Update PAWS values 1336 */ 1337 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1338 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1339 getmicrouptime(&src->scrub->pfss_last); 1340 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1341 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1342 src->scrub->pfss_tsval = tsval; 1343 1344 if (tsecr) { 1345 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1346 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1347 src->scrub->pfss_tsecr = tsecr; 1348 1349 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1350 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1351 src->scrub->pfss_tsval0 == 0)) { 1352 /* tsval0 MUST be the lowest timestamp */ 1353 src->scrub->pfss_tsval0 = tsval; 1354 } 1355 1356 /* Only fully initialized after a TS gets echoed */ 1357 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1358 src->scrub->pfss_flags |= PFSS_PAWS; 1359 } 1360 } 1361 1362 /* I have a dream.... TCP segment reassembly.... */ 1363 return (0); 1364 } 1365 1366 int 1367 pf_normalize_mss(struct mbuf *m, int off, struct pf_pdesc *pd, u_int16_t maxmss) 1368 { 1369 struct tcphdr *th = pd->hdr.tcp; 1370 u_int16_t mss; 1371 int thoff; 1372 int opt, cnt, optlen = 0; 1373 u_char opts[MAX_TCPOPTLEN]; 1374 u_char *optp = opts; 1375 1376 thoff = th->th_off << 2; 1377 cnt = thoff - sizeof(struct tcphdr); 1378 1379 if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt, 1380 NULL, NULL, pd->af)) 1381 return (0); 1382 1383 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1384 opt = optp[0]; 1385 if (opt == TCPOPT_EOL) 1386 break; 1387 if (opt == TCPOPT_NOP) 1388 optlen = 1; 1389 else { 1390 if (cnt < 2) 1391 break; 1392 optlen = optp[1]; 1393 if (optlen < 2 || optlen > cnt) 1394 break; 1395 } 1396 switch (opt) { 1397 case TCPOPT_MAXSEG: 1398 bcopy((caddr_t)(optp + 2), (caddr_t)&mss, 2); 1399 if (ntohs(mss) > maxmss) { 1400 th->th_sum = pf_cksum_fixup(th->th_sum, 1401 mss, htons(maxmss), 0); 1402 mss = htons(maxmss); 1403 m_copyback(m, 1404 off + sizeof(*th) + optp + 2 - opts, 1405 2, &mss, M_NOWAIT); 1406 m_copyback(m, off, sizeof(*th), th, M_NOWAIT); 1407 } 1408 break; 1409 default: 1410 break; 1411 } 1412 } 1413 1414 1415 1416 return (0); 1417 } 1418 1419 void 1420 pf_scrub(struct mbuf *m, u_int16_t flags, sa_family_t af, u_int8_t min_ttl, 1421 u_int8_t tos) 1422 { 1423 struct ip *h = mtod(m, struct ip *); 1424 #ifdef INET6 1425 struct ip6_hdr *h6 = mtod(m, struct ip6_hdr *); 1426 #endif 1427 1428 /* Clear IP_DF if no-df was requested */ 1429 if (flags & PFSTATE_NODF && af == AF_INET && h->ip_off & htons(IP_DF)) 1430 h->ip_off &= htons(~IP_DF); 1431 1432 /* Enforce a minimum ttl, may cause endless packet loops */ 1433 if (min_ttl && af == AF_INET && h->ip_ttl < min_ttl) 1434 h->ip_ttl = min_ttl; 1435 #ifdef INET6 1436 if (min_ttl && af == AF_INET6 && h6->ip6_hlim < min_ttl) 1437 h6->ip6_hlim = min_ttl; 1438 #endif 1439 1440 /* Enforce tos */ 1441 if (flags & PFSTATE_SETTOS) { 1442 if (af == AF_INET) 1443 h->ip_tos = tos; 1444 #ifdef INET6 1445 if (af == AF_INET6) { 1446 /* drugs are unable to explain such idiocy */ 1447 h6->ip6_flow &= ~htonl(0x0ff00000); 1448 h6->ip6_flow |= htonl(((u_int32_t)tos) << 20); 1449 } 1450 #endif 1451 } 1452 1453 /* random-id, but not for fragments */ 1454 if (flags & PFSTATE_RANDOMID && af == AF_INET && 1455 !(h->ip_off & ~htons(IP_DF))) 1456 h->ip_id = htons(ip_randomid()); 1457 } 1458