1 /* $OpenBSD: pf_norm.c,v 1.201 2017/01/30 17:41:34 benno Exp $ */ 2 3 /* 4 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 5 * Copyright 2009 Henning Brauer <henning@openbsd.org> 6 * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include "pflog.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/mbuf.h> 35 #include <sys/filio.h> 36 #include <sys/fcntl.h> 37 #include <sys/socket.h> 38 #include <sys/kernel.h> 39 #include <sys/time.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 43 #include <net/if.h> 44 #include <net/if_var.h> 45 #include <net/if_pflog.h> 46 47 #include <netinet/in.h> 48 #include <netinet/ip.h> 49 #include <netinet/ip_var.h> 50 #include <netinet/ip_icmp.h> 51 #include <netinet/tcp.h> 52 #include <netinet/tcp_seq.h> 53 #include <netinet/tcp_fsm.h> 54 #include <netinet/udp.h> 55 56 #ifdef INET6 57 #include <netinet6/in6_var.h> 58 #include <netinet/ip6.h> 59 #include <netinet6/ip6_var.h> 60 #include <netinet/icmp6.h> 61 #include <netinet6/nd6.h> 62 #endif /* INET6 */ 63 64 #include <net/pfvar.h> 65 #include <net/pfvar_priv.h> 66 67 struct pf_frent { 68 TAILQ_ENTRY(pf_frent) fr_next; 69 struct mbuf *fe_m; 70 u_int16_t fe_hdrlen; /* ipv4 header length with ip options 71 ipv6, extension, fragment header */ 72 u_int16_t fe_extoff; /* last extension header offset or 0 */ 73 u_int16_t fe_len; /* fragment length */ 74 u_int16_t fe_off; /* fragment offset */ 75 u_int16_t fe_mff; /* more fragment flag */ 76 }; 77 78 /* keep synced with struct pf_fragment, used in RB_FIND */ 79 struct pf_fragment_cmp { 80 struct pf_addr fr_src; 81 struct pf_addr fr_dst; 82 u_int32_t fr_id; 83 sa_family_t fr_af; 84 u_int8_t fr_proto; 85 u_int8_t fr_direction; 86 }; 87 88 struct pf_fragment { 89 struct pf_addr fr_src; /* ip source address */ 90 struct pf_addr fr_dst; /* ip destination address */ 91 u_int32_t fr_id; /* fragment id for reassemble */ 92 sa_family_t fr_af; /* address family */ 93 u_int8_t fr_proto; /* protocol of this fragment */ 94 u_int8_t fr_direction; /* pf packet direction */ 95 96 RB_ENTRY(pf_fragment) fr_entry; 97 TAILQ_ENTRY(pf_fragment) frag_next; 98 TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; 99 int32_t fr_timeout; 100 u_int16_t fr_maxlen; /* maximum length of single fragment */ 101 }; 102 103 struct pf_fragment_tag { 104 u_int16_t ft_hdrlen; /* header length of reassembled pkt */ 105 u_int16_t ft_extoff; /* last extension header offset or 0 */ 106 u_int16_t ft_maxlen; /* maximum fragment payload length */ 107 }; 108 109 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 110 111 static __inline int pf_frag_compare(struct pf_fragment *, 112 struct pf_fragment *); 113 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 114 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 115 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 116 117 /* Private prototypes */ 118 void pf_flush_fragments(void); 119 void pf_free_fragment(struct pf_fragment *); 120 struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *, 121 struct pf_frag_tree *); 122 struct pf_frent *pf_create_fragment(u_short *); 123 struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, 124 struct pf_frent *, u_short *); 125 int pf_isfull_fragment(struct pf_fragment *); 126 struct mbuf *pf_join_fragment(struct pf_fragment *); 127 int pf_reassemble(struct mbuf **, int, u_short *); 128 #ifdef INET6 129 int pf_reassemble6(struct mbuf **, struct ip6_frag *, 130 u_int16_t, u_int16_t, int, u_short *); 131 #endif /* INET6 */ 132 133 /* Globals */ 134 struct pool pf_frent_pl, pf_frag_pl; 135 struct pool pf_state_scrub_pl; 136 int pf_nfrents; 137 138 void 139 pf_normalize_init(void) 140 { 141 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 142 IPL_SOFTNET, 0, "pffrent", NULL); 143 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 144 IPL_SOFTNET, 0, "pffrag", NULL); 145 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 146 IPL_SOFTNET, 0, "pfstscr", NULL); 147 148 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 149 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 150 151 TAILQ_INIT(&pf_fragqueue); 152 } 153 154 static __inline int 155 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 156 { 157 int diff; 158 159 if ((diff = a->fr_id - b->fr_id) != 0) 160 return (diff); 161 if ((diff = a->fr_proto - b->fr_proto) != 0) 162 return (diff); 163 if ((diff = a->fr_af - b->fr_af) != 0) 164 return (diff); 165 if ((diff = pf_addr_compare(&a->fr_src, &b->fr_src, a->fr_af)) != 0) 166 return (diff); 167 if ((diff = pf_addr_compare(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) 168 return (diff); 169 170 return (0); 171 } 172 173 void 174 pf_purge_expired_fragments(void) 175 { 176 struct pf_fragment *frag; 177 int32_t expire; 178 179 NET_ASSERT_LOCKED(); 180 181 expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG]; 182 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 183 if (frag->fr_timeout > expire) 184 break; 185 DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag); 186 pf_free_fragment(frag); 187 } 188 } 189 190 /* 191 * Try to flush old fragments to make space for new ones 192 */ 193 void 194 pf_flush_fragments(void) 195 { 196 struct pf_fragment *frag; 197 int goal; 198 199 goal = pf_nfrents * 9 / 10; 200 DPFPRINTF(LOG_NOTICE, "trying to free > %d frents", pf_nfrents - goal); 201 while (goal < pf_nfrents) { 202 if ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) == NULL) 203 break; 204 pf_free_fragment(frag); 205 } 206 } 207 208 /* 209 * Remove a fragment from the fragment queue, free its fragment entries, 210 * and free the fragment itself. 211 */ 212 void 213 pf_free_fragment(struct pf_fragment *frag) 214 { 215 struct pf_frent *frent; 216 217 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 218 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 219 220 /* Free all fragment entries */ 221 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 222 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 223 m_freem(frent->fe_m); 224 pool_put(&pf_frent_pl, frent); 225 pf_nfrents--; 226 } 227 pool_put(&pf_frag_pl, frag); 228 } 229 230 struct pf_fragment * 231 pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) 232 { 233 struct pf_fragment *frag; 234 235 frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); 236 if (frag != NULL) { 237 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 238 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 239 } 240 241 return (frag); 242 } 243 244 struct pf_frent * 245 pf_create_fragment(u_short *reason) 246 { 247 struct pf_frent *frent; 248 249 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 250 if (frent == NULL) { 251 pf_flush_fragments(); 252 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 253 if (frent == NULL) { 254 REASON_SET(reason, PFRES_MEMORY); 255 return (NULL); 256 } 257 } 258 pf_nfrents++; 259 260 return (frent); 261 } 262 263 struct pf_fragment * 264 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, 265 u_short *reason) 266 { 267 struct pf_frent *after, *next, *prev; 268 struct pf_fragment *frag; 269 u_int16_t total; 270 271 /* No empty fragments */ 272 if (frent->fe_len == 0) { 273 DPFPRINTF(LOG_NOTICE, "bad fragment: len 0"); 274 goto bad_fragment; 275 } 276 277 /* All fragments are 8 byte aligned */ 278 if (frent->fe_mff && (frent->fe_len & 0x7)) { 279 DPFPRINTF(LOG_NOTICE, "bad fragment: mff and len %d", 280 frent->fe_len); 281 goto bad_fragment; 282 } 283 284 /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET */ 285 if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { 286 DPFPRINTF(LOG_NOTICE, "bad fragment: max packet %d", 287 frent->fe_off + frent->fe_len); 288 goto bad_fragment; 289 } 290 291 DPFPRINTF(LOG_NOTICE, key->fr_af == AF_INET ? 292 "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d", 293 key->fr_id, frent->fe_off, frent->fe_off + frent->fe_len); 294 295 /* Fully buffer all of the fragments in this fragment queue */ 296 frag = pf_find_fragment(key, &pf_frag_tree); 297 298 /* Create a new reassembly queue for this packet */ 299 if (frag == NULL) { 300 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 301 if (frag == NULL) { 302 pf_flush_fragments(); 303 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 304 if (frag == NULL) { 305 REASON_SET(reason, PFRES_MEMORY); 306 goto drop_fragment; 307 } 308 } 309 310 *(struct pf_fragment_cmp *)frag = *key; 311 TAILQ_INIT(&frag->fr_queue); 312 frag->fr_timeout = time_uptime; 313 frag->fr_maxlen = frent->fe_len; 314 315 RB_INSERT(pf_frag_tree, &pf_frag_tree, frag); 316 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 317 318 /* We do not have a previous fragment */ 319 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 320 321 return (frag); 322 } 323 324 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 325 326 /* Remember maximum fragment len for refragmentation */ 327 if (frent->fe_len > frag->fr_maxlen) 328 frag->fr_maxlen = frent->fe_len; 329 330 /* Maximum data we have seen already */ 331 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 332 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 333 334 /* Non terminal fragments must have more fragments flag */ 335 if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) 336 goto free_ipv6_fragment; 337 338 /* Check if we saw the last fragment already */ 339 if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { 340 if (frent->fe_off + frent->fe_len > total || 341 (frent->fe_off + frent->fe_len == total && frent->fe_mff)) 342 goto free_ipv6_fragment; 343 } else { 344 if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) 345 goto free_ipv6_fragment; 346 } 347 348 /* Find a fragment after the current one */ 349 prev = NULL; 350 TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { 351 if (after->fe_off > frent->fe_off) 352 break; 353 prev = after; 354 } 355 356 KASSERT(prev != NULL || after != NULL); 357 358 if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { 359 u_int16_t precut; 360 361 #ifdef INET6 362 if (frag->fr_af == AF_INET6) 363 goto free_fragment; 364 #endif /* INET6 */ 365 366 precut = prev->fe_off + prev->fe_len - frent->fe_off; 367 if (precut >= frent->fe_len) { 368 DPFPRINTF(LOG_NOTICE, "new frag overlapped"); 369 goto drop_fragment; 370 } 371 DPFPRINTF(LOG_NOTICE, "frag head overlap %d", precut); 372 m_adj(frent->fe_m, precut); 373 frent->fe_off += precut; 374 frent->fe_len -= precut; 375 } 376 377 for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; 378 after = next) { 379 u_int16_t aftercut; 380 381 #ifdef INET6 382 if (frag->fr_af == AF_INET6) 383 goto free_fragment; 384 #endif /* INET6 */ 385 386 aftercut = frent->fe_off + frent->fe_len - after->fe_off; 387 if (aftercut < after->fe_len) { 388 DPFPRINTF(LOG_NOTICE, "frag tail overlap %d", aftercut); 389 m_adj(after->fe_m, aftercut); 390 after->fe_off += aftercut; 391 after->fe_len -= aftercut; 392 break; 393 } 394 395 /* This fragment is completely overlapped, lose it */ 396 DPFPRINTF(LOG_NOTICE, "old frag overlapped"); 397 next = TAILQ_NEXT(after, fr_next); 398 TAILQ_REMOVE(&frag->fr_queue, after, fr_next); 399 m_freem(after->fe_m); 400 pool_put(&pf_frent_pl, after); 401 pf_nfrents--; 402 } 403 404 if (prev == NULL) 405 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 406 else 407 TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); 408 409 return (frag); 410 411 free_ipv6_fragment: 412 #ifdef INET6 413 if (frag->fr_af == AF_INET) 414 goto bad_fragment; 415 free_fragment: 416 /* 417 * RFC 5722, Errata 3089: When reassembling an IPv6 datagram, if one 418 * or more its constituent fragments is determined to be an overlapping 419 * fragment, the entire datagram (and any constituent fragments) MUST 420 * be silently discarded. 421 */ 422 DPFPRINTF(LOG_NOTICE, "flush overlapping fragments"); 423 pf_free_fragment(frag); 424 #endif /* INET6 */ 425 bad_fragment: 426 REASON_SET(reason, PFRES_FRAG); 427 drop_fragment: 428 pool_put(&pf_frent_pl, frent); 429 pf_nfrents--; 430 return (NULL); 431 } 432 433 int 434 pf_isfull_fragment(struct pf_fragment *frag) 435 { 436 struct pf_frent *frent, *next; 437 u_int16_t off, total; 438 439 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 440 441 /* Check if we are completely reassembled */ 442 if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) 443 return (0); 444 445 /* Maximum data we have seen already */ 446 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 447 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 448 449 /* Check if we have all the data */ 450 off = 0; 451 for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) { 452 next = TAILQ_NEXT(frent, fr_next); 453 off += frent->fe_len; 454 if (off < total && (next == NULL || next->fe_off != off)) { 455 DPFPRINTF(LOG_NOTICE, 456 "missing fragment at %d, next %d, total %d", 457 off, next == NULL ? -1 : next->fe_off, total); 458 return (0); 459 } 460 } 461 DPFPRINTF(LOG_NOTICE, "%d < %d?", off, total); 462 if (off < total) 463 return (0); 464 KASSERT(off == total); 465 466 return (1); 467 } 468 469 struct mbuf * 470 pf_join_fragment(struct pf_fragment *frag) 471 { 472 struct mbuf *m, *m2; 473 struct pf_frent *frent; 474 475 frent = TAILQ_FIRST(&frag->fr_queue); 476 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 477 478 m = frent->fe_m; 479 /* Strip off any trailing bytes */ 480 if ((frent->fe_hdrlen + frent->fe_len) < m->m_pkthdr.len) 481 m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len); 482 /* Magic from ip_input */ 483 m2 = m->m_next; 484 m->m_next = NULL; 485 m_cat(m, m2); 486 pool_put(&pf_frent_pl, frent); 487 pf_nfrents--; 488 489 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 490 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 491 m2 = frent->fe_m; 492 /* Strip off ip header */ 493 m_adj(m2, frent->fe_hdrlen); 494 /* Strip off any trailing bytes */ 495 if (frent->fe_len < m2->m_pkthdr.len) 496 m_adj(m2, frent->fe_len - m2->m_pkthdr.len); 497 pool_put(&pf_frent_pl, frent); 498 pf_nfrents--; 499 m_cat(m, m2); 500 } 501 502 /* Remove from fragment queue */ 503 pf_free_fragment(frag); 504 505 return (m); 506 } 507 508 int 509 pf_reassemble(struct mbuf **m0, int dir, u_short *reason) 510 { 511 struct mbuf *m = *m0; 512 struct ip *ip = mtod(m, struct ip *); 513 struct pf_frent *frent; 514 struct pf_fragment *frag; 515 struct pf_fragment_cmp key; 516 u_int16_t total, hdrlen; 517 518 /* Get an entry for the fragment queue */ 519 if ((frent = pf_create_fragment(reason)) == NULL) 520 return (PF_DROP); 521 522 frent->fe_m = m; 523 frent->fe_hdrlen = ip->ip_hl << 2; 524 frent->fe_extoff = 0; 525 frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 526 frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 527 frent->fe_mff = ntohs(ip->ip_off) & IP_MF; 528 529 key.fr_src.v4 = ip->ip_src; 530 key.fr_dst.v4 = ip->ip_dst; 531 key.fr_af = AF_INET; 532 key.fr_proto = ip->ip_p; 533 key.fr_id = ip->ip_id; 534 key.fr_direction = dir; 535 536 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 537 return (PF_DROP); 538 539 /* The mbuf is part of the fragment entry, no direct free or access */ 540 m = *m0 = NULL; 541 542 if (!pf_isfull_fragment(frag)) 543 return (PF_PASS); /* drop because *m0 is NULL, no error */ 544 545 /* We have all the data */ 546 frent = TAILQ_FIRST(&frag->fr_queue); 547 KASSERT(frent != NULL); 548 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 549 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 550 hdrlen = frent->fe_hdrlen; 551 m = *m0 = pf_join_fragment(frag); 552 frag = NULL; 553 554 if (m->m_flags & M_PKTHDR) { 555 int plen = 0; 556 for (m = *m0; m; m = m->m_next) 557 plen += m->m_len; 558 m = *m0; 559 m->m_pkthdr.len = plen; 560 } 561 562 ip = mtod(m, struct ip *); 563 ip->ip_len = htons(hdrlen + total); 564 ip->ip_off &= ~(IP_MF|IP_OFFMASK); 565 566 if (hdrlen + total > IP_MAXPACKET) { 567 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 568 ip->ip_len = 0; 569 REASON_SET(reason, PFRES_SHORT); 570 /* PF_DROP requires a valid mbuf *m0 in pf_test() */ 571 return (PF_DROP); 572 } 573 574 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip->ip_len)); 575 return (PF_PASS); 576 } 577 578 #ifdef INET6 579 int 580 pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, 581 u_int16_t hdrlen, u_int16_t extoff, int dir, u_short *reason) 582 { 583 struct mbuf *m = *m0; 584 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 585 struct m_tag *mtag; 586 struct pf_fragment_tag *ftag; 587 struct pf_frent *frent; 588 struct pf_fragment *frag; 589 struct pf_fragment_cmp key; 590 int off; 591 u_int16_t total, maxlen; 592 u_int8_t proto; 593 594 /* Get an entry for the fragment queue */ 595 if ((frent = pf_create_fragment(reason)) == NULL) 596 return (PF_DROP); 597 598 frent->fe_m = m; 599 frent->fe_hdrlen = hdrlen; 600 frent->fe_extoff = extoff; 601 frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; 602 frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 603 frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; 604 605 key.fr_src.v6 = ip6->ip6_src; 606 key.fr_dst.v6 = ip6->ip6_dst; 607 key.fr_af = AF_INET6; 608 /* Only the first fragment's protocol is relevant */ 609 key.fr_proto = 0; 610 key.fr_id = fraghdr->ip6f_ident; 611 key.fr_direction = dir; 612 613 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 614 return (PF_DROP); 615 616 /* The mbuf is part of the fragment entry, no direct free or access */ 617 m = *m0 = NULL; 618 619 if (!pf_isfull_fragment(frag)) 620 return (PF_PASS); /* drop because *m0 is NULL, no error */ 621 622 /* We have all the data */ 623 extoff = frent->fe_extoff; 624 maxlen = frag->fr_maxlen; 625 frent = TAILQ_FIRST(&frag->fr_queue); 626 KASSERT(frent != NULL); 627 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 628 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 629 hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); 630 m = *m0 = pf_join_fragment(frag); 631 frag = NULL; 632 633 /* Take protocol from first fragment header */ 634 if ((m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), 635 &off)) == NULL) 636 panic("%s: short frag mbuf chain", __func__); 637 proto = *(mtod(m, caddr_t) + off); 638 m = *m0; 639 640 /* Delete frag6 header */ 641 if (frag6_deletefraghdr(m, hdrlen) != 0) 642 goto fail; 643 644 if (m->m_flags & M_PKTHDR) { 645 int plen = 0; 646 for (m = *m0; m; m = m->m_next) 647 plen += m->m_len; 648 m = *m0; 649 m->m_pkthdr.len = plen; 650 } 651 652 if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct 653 pf_fragment_tag), M_NOWAIT)) == NULL) 654 goto fail; 655 ftag = (struct pf_fragment_tag *)(mtag + 1); 656 ftag->ft_hdrlen = hdrlen; 657 ftag->ft_extoff = extoff; 658 ftag->ft_maxlen = maxlen; 659 m_tag_prepend(m, mtag); 660 661 ip6 = mtod(m, struct ip6_hdr *); 662 ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); 663 if (extoff) { 664 /* Write protocol into next field of last extension header */ 665 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 666 ip6e_nxt), &off)) == NULL) 667 panic("%s: short ext mbuf chain", __func__); 668 *(mtod(m, caddr_t) + off) = proto; 669 m = *m0; 670 } else 671 ip6->ip6_nxt = proto; 672 673 if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { 674 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 675 ip6->ip6_plen = 0; 676 REASON_SET(reason, PFRES_SHORT); 677 /* PF_DROP requires a valid mbuf *m0 in pf_test6() */ 678 return (PF_DROP); 679 } 680 681 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip6->ip6_plen)); 682 return (PF_PASS); 683 684 fail: 685 REASON_SET(reason, PFRES_MEMORY); 686 /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */ 687 return (PF_DROP); 688 } 689 690 int 691 pf_refragment6(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in6 *dst, 692 struct ifnet *ifp, struct rtentry *rt) 693 { 694 struct mbuf *m = *m0, *t; 695 struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); 696 u_int32_t mtu; 697 u_int16_t hdrlen, extoff, maxlen; 698 u_int8_t proto; 699 int error, action; 700 701 hdrlen = ftag->ft_hdrlen; 702 extoff = ftag->ft_extoff; 703 maxlen = ftag->ft_maxlen; 704 m_tag_delete(m, mtag); 705 mtag = NULL; 706 ftag = NULL; 707 708 /* Checksum must be calculated for the whole packet */ 709 in6_proto_cksum_out(m, NULL); 710 711 if (extoff) { 712 int off; 713 714 /* Use protocol from next field of last extension header */ 715 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 716 ip6e_nxt), &off)) == NULL) 717 panic("%s: short ext mbuf chain", __func__); 718 proto = *(mtod(m, caddr_t) + off); 719 *(mtod(m, caddr_t) + off) = IPPROTO_FRAGMENT; 720 m = *m0; 721 } else { 722 struct ip6_hdr *hdr; 723 724 hdr = mtod(m, struct ip6_hdr *); 725 proto = hdr->ip6_nxt; 726 hdr->ip6_nxt = IPPROTO_FRAGMENT; 727 } 728 729 /* 730 * Maxlen may be less than 8 iff there was only a single 731 * fragment. As it was fragmented before, add a fragment 732 * header also for a single fragment. If total or maxlen 733 * is less than 8, ip6_fragment() will return EMSGSIZE and 734 * we drop the packet. 735 */ 736 mtu = hdrlen + sizeof(struct ip6_frag) + maxlen; 737 error = ip6_fragment(m, hdrlen, proto, mtu); 738 739 m = (*m0)->m_nextpkt; 740 (*m0)->m_nextpkt = NULL; 741 if (error == 0) { 742 /* The first mbuf contains the unfragmented packet */ 743 m_freem(*m0); 744 *m0 = NULL; 745 action = PF_PASS; 746 } else { 747 /* Drop expects an mbuf to free */ 748 DPFPRINTF(LOG_NOTICE, "refragment error %d", error); 749 action = PF_DROP; 750 } 751 752 for (t = m; m; m = t) { 753 t = m->m_nextpkt; 754 m->m_nextpkt = NULL; 755 m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; 756 if (error == 0) { 757 if (ifp == NULL) { 758 ip6_forward(m, NULL, 0); 759 } else if ((u_long)m->m_pkthdr.len <= ifp->if_mtu) { 760 ifp->if_output(ifp, m, sin6tosa(dst), rt); 761 } else { 762 icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, 763 ifp->if_mtu); 764 } 765 } else { 766 m_freem(m); 767 } 768 } 769 770 return (action); 771 } 772 #endif /* INET6 */ 773 774 int 775 pf_normalize_ip(struct pf_pdesc *pd, u_short *reason) 776 { 777 struct ip *h = mtod(pd->m, struct ip *); 778 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 779 u_int16_t mff = (ntohs(h->ip_off) & IP_MF); 780 781 if (!fragoff && !mff) 782 goto no_fragment; 783 784 /* Clear IP_DF if we're in no-df mode */ 785 if (pf_status.reass & PF_REASS_NODF && h->ip_off & htons(IP_DF)) 786 h->ip_off &= htons(~IP_DF); 787 788 /* We're dealing with a fragment now. Don't allow fragments 789 * with IP_DF to enter the cache. If the flag was cleared by 790 * no-df above, fine. Otherwise drop it. 791 */ 792 if (h->ip_off & htons(IP_DF)) { 793 DPFPRINTF(LOG_NOTICE, "bad fragment: IP_DF"); 794 REASON_SET(reason, PFRES_FRAG); 795 return (PF_DROP); 796 } 797 798 if (!pf_status.reass) 799 return (PF_PASS); /* no reassembly */ 800 801 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 802 if (pf_reassemble(&pd->m, pd->dir, reason) != PF_PASS) 803 return (PF_DROP); 804 if (pd->m == NULL) 805 return (PF_PASS); /* packet has been reassembled, no error */ 806 807 h = mtod(pd->m, struct ip *); 808 809 no_fragment: 810 /* At this point, only IP_DF is allowed in ip_off */ 811 if (h->ip_off & ~htons(IP_DF)) 812 h->ip_off &= htons(IP_DF); 813 814 return (PF_PASS); 815 } 816 817 #ifdef INET6 818 int 819 pf_normalize_ip6(struct pf_pdesc *pd, u_short *reason) 820 { 821 struct ip6_frag frag; 822 823 if (pd->fragoff == 0) 824 goto no_fragment; 825 826 if (!pf_pull_hdr(pd->m, pd->fragoff, &frag, sizeof(frag), NULL, reason, 827 AF_INET6)) 828 return (PF_DROP); 829 830 if (!pf_status.reass) 831 return (PF_PASS); /* no reassembly */ 832 833 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 834 if (pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag), 835 pd->extoff, pd->dir, reason) != PF_PASS) 836 return (PF_DROP); 837 if (pd->m == NULL) 838 return (PF_PASS); /* packet has been reassembled, no error */ 839 840 no_fragment: 841 return (PF_PASS); 842 } 843 #endif /* INET6 */ 844 845 int 846 pf_normalize_tcp(struct pf_pdesc *pd) 847 { 848 struct tcphdr *th = &pd->hdr.tcp; 849 u_short reason; 850 u_int8_t flags; 851 u_int rewrite = 0; 852 853 flags = th->th_flags; 854 if (flags & TH_SYN) { 855 /* Illegal packet */ 856 if (flags & TH_RST) 857 goto tcp_drop; 858 859 if (flags & TH_FIN) /* XXX why clear instead of drop? */ 860 flags &= ~TH_FIN; 861 } else { 862 /* Illegal packet */ 863 if (!(flags & (TH_ACK|TH_RST))) 864 goto tcp_drop; 865 } 866 867 if (!(flags & TH_ACK)) { 868 /* These flags are only valid if ACK is set */ 869 if (flags & (TH_FIN|TH_PUSH|TH_URG)) 870 goto tcp_drop; 871 } 872 873 /* If flags changed, or reserved data set, then adjust */ 874 if (flags != th->th_flags || th->th_x2 != 0) { 875 /* hack: set 4-bit th_x2 = 0 */ 876 u_int8_t *th_off = (u_int8_t*)(&th->th_ack+1); 877 pf_patch_8(pd, th_off, th->th_off << 4, PF_HI); 878 879 pf_patch_8(pd, &th->th_flags, flags, PF_LO); 880 rewrite = 1; 881 } 882 883 /* Remove urgent pointer, if TH_URG is not set */ 884 if (!(flags & TH_URG) && th->th_urp) { 885 pf_patch_16(pd, &th->th_urp, 0); 886 rewrite = 1; 887 } 888 889 /* copy back packet headers if we sanitized */ 890 if (rewrite) { 891 m_copyback(pd->m, pd->off, sizeof(*th), th, M_NOWAIT); 892 } 893 894 return (PF_PASS); 895 896 tcp_drop: 897 REASON_SET(&reason, PFRES_NORM); 898 return (PF_DROP); 899 } 900 901 int 902 pf_normalize_tcp_init(struct pf_pdesc *pd, struct pf_state_peer *src) 903 { 904 struct tcphdr *th = &pd->hdr.tcp; 905 u_int32_t tsval, tsecr; 906 u_int8_t hdr[60]; 907 u_int8_t *opt; 908 909 KASSERT(src->scrub == NULL); 910 911 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 912 if (src->scrub == NULL) 913 return (1); 914 bzero(src->scrub, sizeof(*src->scrub)); 915 916 switch (pd->af) { 917 case AF_INET: { 918 struct ip *h = mtod(pd->m, struct ip *); 919 src->scrub->pfss_ttl = h->ip_ttl; 920 break; 921 } 922 #ifdef INET6 923 case AF_INET6: { 924 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 925 src->scrub->pfss_ttl = h->ip6_hlim; 926 break; 927 } 928 #endif /* INET6 */ 929 default: 930 unhandled_af(pd->af); 931 } 932 933 /* 934 * All normalizations below are only begun if we see the start of 935 * the connections. They must all set an enabled bit in pfss_flags 936 */ 937 if ((th->th_flags & TH_SYN) == 0) 938 return (0); 939 940 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 941 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 942 pd->af)) { 943 /* Diddle with TCP options */ 944 int hlen; 945 946 opt = hdr + sizeof(struct tcphdr); 947 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 948 while (hlen >= TCPOLEN_TIMESTAMP) { 949 switch (*opt) { 950 case TCPOPT_EOL: /* FALLTHROUGH */ 951 case TCPOPT_NOP: 952 opt++; 953 hlen--; 954 break; 955 case TCPOPT_TIMESTAMP: 956 if (opt[1] >= TCPOLEN_TIMESTAMP) { 957 src->scrub->pfss_flags |= 958 PFSS_TIMESTAMP; 959 src->scrub->pfss_ts_mod = arc4random(); 960 961 /* note PFSS_PAWS not set yet */ 962 memcpy(&tsval, &opt[2], 963 sizeof(u_int32_t)); 964 memcpy(&tsecr, &opt[6], 965 sizeof(u_int32_t)); 966 src->scrub->pfss_tsval0 = ntohl(tsval); 967 src->scrub->pfss_tsval = ntohl(tsval); 968 src->scrub->pfss_tsecr = ntohl(tsecr); 969 getmicrouptime(&src->scrub->pfss_last); 970 } 971 /* FALLTHROUGH */ 972 default: 973 hlen -= MAX(opt[1], 2); 974 opt += MAX(opt[1], 2); 975 break; 976 } 977 } 978 } 979 980 return (0); 981 } 982 983 void 984 pf_normalize_tcp_cleanup(struct pf_state *state) 985 { 986 if (state->src.scrub) 987 pool_put(&pf_state_scrub_pl, state->src.scrub); 988 if (state->dst.scrub) 989 pool_put(&pf_state_scrub_pl, state->dst.scrub); 990 991 /* Someday... flush the TCP segment reassembly descriptors. */ 992 } 993 994 int 995 pf_normalize_tcp_stateful(struct pf_pdesc *pd, u_short *reason, 996 struct pf_state *state, struct pf_state_peer *src, 997 struct pf_state_peer *dst, int *writeback) 998 { 999 struct tcphdr *th = &pd->hdr.tcp; 1000 struct timeval uptime; 1001 u_int32_t tsval, tsecr; 1002 u_int tsval_from_last; 1003 u_int8_t hdr[60]; 1004 u_int8_t *opts, *opt; 1005 int copyback = 0; 1006 int got_ts = 0; 1007 1008 KASSERT(src->scrub || dst->scrub); 1009 1010 /* 1011 * Enforce the minimum TTL seen for this connection. Negate a common 1012 * technique to evade an intrusion detection system and confuse 1013 * firewall state code. 1014 */ 1015 switch (pd->af) { 1016 case AF_INET: 1017 if (src->scrub) { 1018 struct ip *h = mtod(pd->m, struct ip *); 1019 if (h->ip_ttl > src->scrub->pfss_ttl) 1020 src->scrub->pfss_ttl = h->ip_ttl; 1021 h->ip_ttl = src->scrub->pfss_ttl; 1022 } 1023 break; 1024 #ifdef INET6 1025 case AF_INET6: 1026 if (src->scrub) { 1027 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 1028 if (h->ip6_hlim > src->scrub->pfss_ttl) 1029 src->scrub->pfss_ttl = h->ip6_hlim; 1030 h->ip6_hlim = src->scrub->pfss_ttl; 1031 } 1032 break; 1033 #endif /* INET6 */ 1034 default: 1035 unhandled_af(pd->af); 1036 } 1037 1038 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1039 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1040 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1041 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 1042 pd->af)) { 1043 /* Diddle with TCP options */ 1044 int hlen; 1045 opt = opts = hdr + sizeof(struct tcphdr); 1046 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1047 while (hlen >= TCPOLEN_TIMESTAMP) { 1048 switch (*opt) { 1049 case TCPOPT_EOL: /* FALLTHROUGH */ 1050 case TCPOPT_NOP: 1051 opt++; 1052 hlen--; 1053 break; 1054 case TCPOPT_TIMESTAMP: 1055 /* Modulate the timestamps. Can be used for 1056 * NAT detection, OS uptime determination or 1057 * reboot detection. 1058 */ 1059 1060 if (got_ts) { 1061 /* Huh? Multiple timestamps!? */ 1062 if (pf_status.debug >= LOG_NOTICE) { 1063 log(LOG_NOTICE, 1064 "pf: %s: multiple TS??", 1065 __func__); 1066 pf_print_state(state); 1067 addlog("\n"); 1068 } 1069 REASON_SET(reason, PFRES_TS); 1070 return (PF_DROP); 1071 } 1072 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1073 u_int8_t *ts = opt + 2; 1074 u_int8_t *tsr = opt + 6; 1075 1076 memcpy(&tsval, ts, sizeof(u_int32_t)); 1077 memcpy(&tsecr, tsr, sizeof(u_int32_t)); 1078 1079 /* modulate TS */ 1080 if (tsval && src->scrub && 1081 (src->scrub->pfss_flags & 1082 PFSS_TIMESTAMP)) { 1083 /* tsval used further on */ 1084 tsval = ntohl(tsval); 1085 pf_patch_32_unaligned(pd, ts, 1086 htonl(tsval + 1087 src->scrub->pfss_ts_mod), 1088 PF_ALGNMNT(ts - opts)); 1089 copyback = 1; 1090 } 1091 1092 /* modulate TS reply if any (!0) */ 1093 if (tsecr && dst->scrub && 1094 (dst->scrub->pfss_flags & 1095 PFSS_TIMESTAMP)) { 1096 /* tsecr used further on */ 1097 tsecr = ntohl(tsecr) 1098 - dst->scrub->pfss_ts_mod; 1099 pf_patch_32_unaligned(pd, tsr, 1100 htonl(tsecr), 1101 PF_ALGNMNT(tsr - opts)); 1102 copyback = 1; 1103 } 1104 got_ts = 1; 1105 } 1106 /* FALLTHROUGH */ 1107 default: 1108 hlen -= MAX(opt[1], 2); 1109 opt += MAX(opt[1], 2); 1110 break; 1111 } 1112 } 1113 if (copyback) { 1114 /* Copyback the options, caller copys back header */ 1115 *writeback = 1; 1116 m_copyback(pd->m, pd->off + sizeof(struct tcphdr), 1117 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1118 sizeof(struct tcphdr), M_NOWAIT); 1119 } 1120 } 1121 1122 1123 /* 1124 * Must invalidate PAWS checks on connections idle for too long. 1125 * The fastest allowed timestamp clock is 1ms. That turns out to 1126 * be about 24 days before it wraps. XXX Right now our lowerbound 1127 * TS echo check only works for the first 12 days of a connection 1128 * when the TS has exhausted half its 32bit space 1129 */ 1130 #define TS_MAX_IDLE (24*24*60*60) 1131 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1132 1133 getmicrouptime(&uptime); 1134 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1135 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1136 time_uptime - state->creation > TS_MAX_CONN)) { 1137 if (pf_status.debug >= LOG_NOTICE) { 1138 log(LOG_NOTICE, "pf: src idled out of PAWS "); 1139 pf_print_state(state); 1140 addlog("\n"); 1141 } 1142 src->scrub->pfss_flags = 1143 (src->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; 1144 } 1145 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1146 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1147 if (pf_status.debug >= LOG_NOTICE) { 1148 log(LOG_NOTICE, "pf: dst idled out of PAWS "); 1149 pf_print_state(state); 1150 addlog("\n"); 1151 } 1152 dst->scrub->pfss_flags = 1153 (dst->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; 1154 } 1155 1156 if (got_ts && src->scrub && dst->scrub && 1157 (src->scrub->pfss_flags & PFSS_PAWS) && 1158 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1159 /* Validate that the timestamps are "in-window". 1160 * RFC1323 describes TCP Timestamp options that allow 1161 * measurement of RTT (round trip time) and PAWS 1162 * (protection against wrapped sequence numbers). PAWS 1163 * gives us a set of rules for rejecting packets on 1164 * long fat pipes (packets that were somehow delayed 1165 * in transit longer than the time it took to send the 1166 * full TCP sequence space of 4Gb). We can use these 1167 * rules and infer a few others that will let us treat 1168 * the 32bit timestamp and the 32bit echoed timestamp 1169 * as sequence numbers to prevent a blind attacker from 1170 * inserting packets into a connection. 1171 * 1172 * RFC1323 tells us: 1173 * - The timestamp on this packet must be greater than 1174 * or equal to the last value echoed by the other 1175 * endpoint. The RFC says those will be discarded 1176 * since it is a dup that has already been acked. 1177 * This gives us a lowerbound on the timestamp. 1178 * timestamp >= other last echoed timestamp 1179 * - The timestamp will be less than or equal to 1180 * the last timestamp plus the time between the 1181 * last packet and now. The RFC defines the max 1182 * clock rate as 1ms. We will allow clocks to be 1183 * up to 10% fast and will allow a total difference 1184 * or 30 seconds due to a route change. And this 1185 * gives us an upperbound on the timestamp. 1186 * timestamp <= last timestamp + max ticks 1187 * We have to be careful here. Windows will send an 1188 * initial timestamp of zero and then initialize it 1189 * to a random value after the 3whs; presumably to 1190 * avoid a DoS by having to call an expensive RNG 1191 * during a SYN flood. Proof MS has at least one 1192 * good security geek. 1193 * 1194 * - The TCP timestamp option must also echo the other 1195 * endpoints timestamp. The timestamp echoed is the 1196 * one carried on the earliest unacknowledged segment 1197 * on the left edge of the sequence window. The RFC 1198 * states that the host will reject any echoed 1199 * timestamps that were larger than any ever sent. 1200 * This gives us an upperbound on the TS echo. 1201 * tescr <= largest_tsval 1202 * - The lowerbound on the TS echo is a little more 1203 * tricky to determine. The other endpoint's echoed 1204 * values will not decrease. But there may be 1205 * network conditions that re-order packets and 1206 * cause our view of them to decrease. For now the 1207 * only lowerbound we can safely determine is that 1208 * the TS echo will never be less than the original 1209 * TS. XXX There is probably a better lowerbound. 1210 * Remove TS_MAX_CONN with better lowerbound check. 1211 * tescr >= other original TS 1212 * 1213 * It is also important to note that the fastest 1214 * timestamp clock of 1ms will wrap its 32bit space in 1215 * 24 days. So we just disable TS checking after 24 1216 * days of idle time. We actually must use a 12d 1217 * connection limit until we can come up with a better 1218 * lowerbound to the TS echo check. 1219 */ 1220 struct timeval delta_ts; 1221 int ts_fudge; 1222 1223 /* 1224 * PFTM_TS_DIFF is how many seconds of leeway to allow 1225 * a host's timestamp. This can happen if the previous 1226 * packet got delayed in transit for much longer than 1227 * this packet. 1228 */ 1229 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1230 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1231 1232 /* Calculate max ticks since the last timestamp */ 1233 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1234 #define TS_MICROSECS 1000000 /* microseconds per second */ 1235 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1236 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1237 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1238 1239 if ((src->state >= TCPS_ESTABLISHED && 1240 dst->state >= TCPS_ESTABLISHED) && 1241 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1242 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1243 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1244 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1245 /* Bad RFC1323 implementation or an insertion attack. 1246 * 1247 * - Solaris 2.6 and 2.7 are known to send another ACK 1248 * after the FIN,FIN|ACK,ACK closing that carries 1249 * an old timestamp. 1250 */ 1251 1252 DPFPRINTF(LOG_NOTICE, "Timestamp failed %c%c%c%c", 1253 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1254 SEQ_GT(tsval, src->scrub->pfss_tsval + 1255 tsval_from_last) ? '1' : ' ', 1256 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1257 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '); 1258 DPFPRINTF(LOG_NOTICE, " tsval: %u tsecr: %u " 1259 "+ticks: %u idle: %llu.%06lus", tsval, tsecr, 1260 tsval_from_last, (long long)delta_ts.tv_sec, 1261 delta_ts.tv_usec); 1262 DPFPRINTF(LOG_NOTICE, " src->tsval: %u tsecr: %u", 1263 src->scrub->pfss_tsval, src->scrub->pfss_tsecr); 1264 DPFPRINTF(LOG_NOTICE, " dst->tsval: %u tsecr: %u " 1265 "tsval0: %u", dst->scrub->pfss_tsval, 1266 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0); 1267 if (pf_status.debug >= LOG_NOTICE) { 1268 log(LOG_NOTICE, "pf: "); 1269 pf_print_state(state); 1270 pf_print_flags(th->th_flags); 1271 addlog("\n"); 1272 } 1273 REASON_SET(reason, PFRES_TS); 1274 return (PF_DROP); 1275 } 1276 /* XXX I'd really like to require tsecr but it's optional */ 1277 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1278 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1279 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1280 src->scrub && dst->scrub && 1281 (src->scrub->pfss_flags & PFSS_PAWS) && 1282 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1283 /* Didn't send a timestamp. Timestamps aren't really useful 1284 * when: 1285 * - connection opening or closing (often not even sent). 1286 * but we must not let an attacker to put a FIN on a 1287 * data packet to sneak it through our ESTABLISHED check. 1288 * - on a TCP reset. RFC suggests not even looking at TS. 1289 * - on an empty ACK. The TS will not be echoed so it will 1290 * probably not help keep the RTT calculation in sync and 1291 * there isn't as much danger when the sequence numbers 1292 * got wrapped. So some stacks don't include TS on empty 1293 * ACKs :-( 1294 * 1295 * To minimize the disruption to mostly RFC1323 conformant 1296 * stacks, we will only require timestamps on data packets. 1297 * 1298 * And what do ya know, we cannot require timestamps on data 1299 * packets. There appear to be devices that do legitimate 1300 * TCP connection hijacking. There are HTTP devices that allow 1301 * a 3whs (with timestamps) and then buffer the HTTP request. 1302 * If the intermediate device has the HTTP response cache, it 1303 * will spoof the response but not bother timestamping its 1304 * packets. So we can look for the presence of a timestamp in 1305 * the first data packet and if there, require it in all future 1306 * packets. 1307 */ 1308 1309 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1310 /* 1311 * Hey! Someone tried to sneak a packet in. Or the 1312 * stack changed its RFC1323 behavior?!?! 1313 */ 1314 if (pf_status.debug >= LOG_NOTICE) { 1315 log(LOG_NOTICE, 1316 "pf: did not receive expected RFC1323 " 1317 "timestamp"); 1318 pf_print_state(state); 1319 pf_print_flags(th->th_flags); 1320 addlog("\n"); 1321 } 1322 REASON_SET(reason, PFRES_TS); 1323 return (PF_DROP); 1324 } 1325 } 1326 1327 /* 1328 * We will note if a host sends his data packets with or without 1329 * timestamps. And require all data packets to contain a timestamp 1330 * if the first does. PAWS implicitly requires that all data packets be 1331 * timestamped. But I think there are middle-man devices that hijack 1332 * TCP streams immediately after the 3whs and don't timestamp their 1333 * packets (seen in a WWW accelerator or cache). 1334 */ 1335 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1336 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1337 if (got_ts) 1338 src->scrub->pfss_flags |= PFSS_DATA_TS; 1339 else { 1340 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1341 if (pf_status.debug >= LOG_NOTICE && dst->scrub && 1342 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1343 /* Don't warn if other host rejected RFC1323 */ 1344 log(LOG_NOTICE, 1345 "pf: broken RFC1323 stack did not " 1346 "timestamp data packet. Disabled PAWS " 1347 "security."); 1348 pf_print_state(state); 1349 pf_print_flags(th->th_flags); 1350 addlog("\n"); 1351 } 1352 } 1353 } 1354 1355 /* 1356 * Update PAWS values 1357 */ 1358 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1359 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1360 getmicrouptime(&src->scrub->pfss_last); 1361 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1362 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1363 src->scrub->pfss_tsval = tsval; 1364 1365 if (tsecr) { 1366 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1367 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1368 src->scrub->pfss_tsecr = tsecr; 1369 1370 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1371 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1372 src->scrub->pfss_tsval0 == 0)) { 1373 /* tsval0 MUST be the lowest timestamp */ 1374 src->scrub->pfss_tsval0 = tsval; 1375 } 1376 1377 /* Only fully initialized after a TS gets echoed */ 1378 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1379 src->scrub->pfss_flags |= PFSS_PAWS; 1380 } 1381 } 1382 1383 /* I have a dream.... TCP segment reassembly.... */ 1384 return (0); 1385 } 1386 1387 int 1388 pf_normalize_mss(struct pf_pdesc *pd, u_int16_t maxmss) 1389 { 1390 struct tcphdr *th = &pd->hdr.tcp; 1391 u_int16_t mss; 1392 int thoff; 1393 int opt, cnt, optlen = 0; 1394 u_int8_t opts[MAX_TCPOPTLEN]; 1395 u_int8_t *optp = opts; 1396 1397 thoff = th->th_off << 2; 1398 cnt = thoff - sizeof(struct tcphdr); 1399 1400 if (cnt <= 0 || cnt > MAX_TCPOPTLEN || !pf_pull_hdr(pd->m, 1401 pd->off + sizeof(*th), opts, cnt, NULL, NULL, pd->af)) 1402 return (0); 1403 1404 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1405 opt = optp[0]; 1406 if (opt == TCPOPT_EOL) 1407 break; 1408 if (opt == TCPOPT_NOP) 1409 optlen = 1; 1410 else { 1411 if (cnt < 2) 1412 break; 1413 optlen = optp[1]; 1414 if (optlen < 2 || optlen > cnt) 1415 break; 1416 } 1417 if (opt == TCPOPT_MAXSEG) { 1418 u_int8_t *mssp = optp + 2; 1419 memcpy(&mss, mssp, sizeof(mss)); 1420 if (ntohs(mss) > maxmss) { 1421 size_t mssoffopts = mssp - opts; 1422 pf_patch_16_unaligned(pd, &mss, 1423 htons(maxmss), PF_ALGNMNT(mssoffopts)); 1424 m_copyback(pd->m, 1425 pd->off + sizeof(*th) + mssoffopts, 1426 sizeof(mss), &mss, M_NOWAIT); 1427 m_copyback(pd->m, pd->off, sizeof(*th), th, 1428 M_NOWAIT); 1429 } 1430 } 1431 } 1432 1433 return (0); 1434 } 1435 1436 void 1437 pf_scrub(struct mbuf *m, u_int16_t flags, sa_family_t af, u_int8_t min_ttl, 1438 u_int8_t tos) 1439 { 1440 struct ip *h = mtod(m, struct ip *); 1441 #ifdef INET6 1442 struct ip6_hdr *h6 = mtod(m, struct ip6_hdr *); 1443 #endif /* INET6 */ 1444 1445 /* Clear IP_DF if no-df was requested */ 1446 if (flags & PFSTATE_NODF && af == AF_INET && h->ip_off & htons(IP_DF)) 1447 h->ip_off &= htons(~IP_DF); 1448 1449 /* Enforce a minimum ttl, may cause endless packet loops */ 1450 if (min_ttl && af == AF_INET && h->ip_ttl < min_ttl) 1451 h->ip_ttl = min_ttl; 1452 #ifdef INET6 1453 if (min_ttl && af == AF_INET6 && h6->ip6_hlim < min_ttl) 1454 h6->ip6_hlim = min_ttl; 1455 #endif /* INET6 */ 1456 1457 /* Enforce tos */ 1458 if (flags & PFSTATE_SETTOS) { 1459 if (af == AF_INET) 1460 h->ip_tos = tos | (h->ip_tos & IPTOS_ECN_MASK); 1461 #ifdef INET6 1462 if (af == AF_INET6) { 1463 /* drugs are unable to explain such idiocy */ 1464 h6->ip6_flow &= ~htonl(0x0fc00000); 1465 h6->ip6_flow |= htonl(((u_int32_t)tos) << 20); 1466 } 1467 #endif /* INET6 */ 1468 } 1469 1470 /* random-id, but not for fragments */ 1471 if (flags & PFSTATE_RANDOMID && af == AF_INET && 1472 !(h->ip_off & ~htons(IP_DF))) 1473 h->ip_id = htons(ip_randomid()); 1474 } 1475