1 /* $NetBSD: ip_reass.c,v 1.8 2011/06/27 00:45:50 enami Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 32 */ 33 34 /* 35 * IP reassembly. 36 * 37 * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for IP 38 * reassembly queue buffer managment. 39 * 40 * We keep a count of total IP fragments (NB: not fragmented packets), 41 * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments. 42 * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the total 43 * fragments in reassembly queues. This AIMD policy avoids repeatedly 44 * deleting single packets under heavy fragmentation load (e.g., from lossy 45 * NFS peers). 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: ip_reass.c,v 1.8 2011/06/27 00:45:50 enami Exp $"); 50 51 #include <sys/param.h> 52 #include <sys/types.h> 53 54 #include <sys/malloc.h> 55 #include <sys/mbuf.h> 56 #include <sys/mutex.h> 57 #include <sys/domain.h> 58 #include <sys/protosw.h> 59 #include <sys/pool.h> 60 #include <sys/queue.h> 61 #include <sys/sysctl.h> 62 #include <sys/systm.h> 63 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <netinet/in.h> 68 #include <netinet/in_systm.h> 69 #include <netinet/ip.h> 70 #include <netinet/in_pcb.h> 71 #include <netinet/ip_var.h> 72 #include <netinet/in_proto.h> 73 #include <netinet/ip_private.h> 74 #include <netinet/in_var.h> 75 76 /* 77 * IP reassembly queue structures. Each fragment being reassembled is 78 * attached to one of these structures. They are timed out after TTL 79 * drops to 0, and may also be reclaimed if memory becomes tight. 80 */ 81 82 typedef struct ipfr_qent { 83 TAILQ_ENTRY(ipfr_qent) ipqe_q; 84 struct ip * ipqe_ip; 85 struct mbuf * ipqe_m; 86 bool ipqe_mff; 87 } ipfr_qent_t; 88 89 TAILQ_HEAD(ipfr_qent_head, ipfr_qent); 90 91 typedef struct ipfr_queue { 92 LIST_ENTRY(ipfr_queue) ipq_q; /* to other reass headers */ 93 struct ipfr_qent_head ipq_fragq; /* queue of fragment entries */ 94 uint8_t ipq_ttl; /* time for reass q to live */ 95 uint8_t ipq_p; /* protocol of this fragment */ 96 uint16_t ipq_id; /* sequence id for reassembly */ 97 struct in_addr ipq_src; 98 struct in_addr ipq_dst; 99 uint16_t ipq_nfrags; /* frags in this queue entry */ 100 uint8_t ipq_tos; /* TOS of this fragment */ 101 } ipfr_queue_t; 102 103 /* 104 * Hash table of IP reassembly queues. 105 */ 106 #define IPREASS_HASH_SHIFT 6 107 #define IPREASS_HASH_SIZE (1 << IPREASS_HASH_SHIFT) 108 #define IPREASS_HASH_MASK (IPREASS_HASH_SIZE - 1) 109 #define IPREASS_HASH(x, y) \ 110 (((((x) & 0xf) | ((((x) >> 8) & 0xf) << 4)) ^ (y)) & IPREASS_HASH_MASK) 111 112 static LIST_HEAD(, ipfr_queue) ip_frags[IPREASS_HASH_SIZE]; 113 static pool_cache_t ipfren_cache; 114 static kmutex_t ipfr_lock; 115 116 /* Number of packets in reassembly queue and total number of fragments. */ 117 static int ip_nfragpackets; 118 static int ip_nfrags; 119 120 /* Limits on packet and fragments. */ 121 static int ip_maxfragpackets; 122 static int ip_maxfrags; 123 124 /* 125 * Cached copy of nmbclusters. If nbclusters is different, recalculate 126 * IP parameters derived from nmbclusters. 127 */ 128 static int ip_nmbclusters; 129 130 /* 131 * IP reassembly TTL machinery for multiplicative drop. 132 */ 133 static u_int fragttl_histo[IPFRAGTTL + 1]; 134 135 static struct sysctllog *ip_reass_sysctllog; 136 137 void sysctl_ip_reass_setup(void); 138 static void ip_nmbclusters_changed(void); 139 140 static struct mbuf * ip_reass(ipfr_qent_t *, ipfr_queue_t *, u_int); 141 static u_int ip_reass_ttl_decr(u_int ticks); 142 static void ip_reass_drophalf(void); 143 static void ip_freef(ipfr_queue_t *); 144 145 /* 146 * ip_reass_init: 147 * 148 * Initialization of IP reassembly mechanism. 149 */ 150 void 151 ip_reass_init(void) 152 { 153 int i; 154 155 ipfren_cache = pool_cache_init(sizeof(ipfr_qent_t), coherency_unit, 156 0, 0, "ipfrenpl", NULL, IPL_NET, NULL, NULL, NULL); 157 mutex_init(&ipfr_lock, MUTEX_DEFAULT, IPL_VM); 158 159 for (i = 0; i < IPREASS_HASH_SIZE; i++) { 160 LIST_INIT(&ip_frags[i]); 161 } 162 ip_maxfragpackets = 200; 163 ip_maxfrags = 0; 164 ip_nmbclusters_changed(); 165 166 sysctl_ip_reass_setup(); 167 } 168 169 void 170 sysctl_ip_reass_setup(void) 171 { 172 173 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL, 174 CTLFLAG_PERMANENT, 175 CTLTYPE_NODE, "net", NULL, 176 NULL, 0, NULL, 0, 177 CTL_NET, CTL_EOL); 178 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL, 179 CTLFLAG_PERMANENT, 180 CTLTYPE_NODE, "inet", 181 SYSCTL_DESCR("PF_INET related settings"), 182 NULL, 0, NULL, 0, 183 CTL_NET, PF_INET, CTL_EOL); 184 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL, 185 CTLFLAG_PERMANENT, 186 CTLTYPE_NODE, "ip", 187 SYSCTL_DESCR("IPv4 related settings"), 188 NULL, 0, NULL, 0, 189 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL); 190 191 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL, 192 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 193 CTLTYPE_INT, "maxfragpackets", 194 SYSCTL_DESCR("Maximum number of fragments to retain for " 195 "possible reassembly"), 196 NULL, 0, &ip_maxfragpackets, 0, 197 CTL_NET, PF_INET, IPPROTO_IP, IPCTL_MAXFRAGPACKETS, CTL_EOL); 198 } 199 200 #define CHECK_NMBCLUSTER_PARAMS() \ 201 do { \ 202 if (__predict_false(ip_nmbclusters != nmbclusters)) \ 203 ip_nmbclusters_changed(); \ 204 } while (/*CONSTCOND*/0) 205 206 /* 207 * Compute IP limits derived from the value of nmbclusters. 208 */ 209 static void 210 ip_nmbclusters_changed(void) 211 { 212 ip_maxfrags = nmbclusters / 4; 213 ip_nmbclusters = nmbclusters; 214 } 215 216 /* 217 * ip_reass: 218 * 219 * Take incoming datagram fragment and try to reassemble it into whole 220 * datagram. If a chain for reassembly of this datagram already exists, 221 * then it is given as 'fp'; otherwise have to make a chain. 222 */ 223 struct mbuf * 224 ip_reass(ipfr_qent_t *ipqe, ipfr_queue_t *fp, const u_int hash) 225 { 226 struct ip *ip = ipqe->ipqe_ip, *qip; 227 const int hlen = ip->ip_hl << 2; 228 struct mbuf *m = ipqe->ipqe_m, *t; 229 ipfr_qent_t *nq, *p, *q; 230 int i, next; 231 232 KASSERT(mutex_owned(&ipfr_lock)); 233 234 /* 235 * Presence of header sizes in mbufs would confuse code below. 236 */ 237 m->m_data += hlen; 238 m->m_len -= hlen; 239 240 #ifdef notyet 241 /* Make sure fragment limit is up-to-date. */ 242 CHECK_NMBCLUSTER_PARAMS(); 243 244 /* If we have too many fragments, drop the older half. */ 245 if (ip_nfrags >= ip_maxfrags) { 246 ip_reass_drophalf(void); 247 } 248 #endif 249 250 /* 251 * We are about to add a fragment; increment frag count. 252 */ 253 ip_nfrags++; 254 255 /* 256 * If first fragment to arrive, create a reassembly queue. 257 */ 258 if (fp == NULL) { 259 /* 260 * Enforce upper bound on number of fragmented packets 261 * for which we attempt reassembly: a) if maxfrag is 0, 262 * never accept fragments b) if maxfrag is -1, accept 263 * all fragments without limitation. 264 */ 265 if (ip_maxfragpackets < 0) 266 ; 267 else if (ip_nfragpackets >= ip_maxfragpackets) { 268 goto dropfrag; 269 } 270 fp = malloc(sizeof(ipfr_queue_t), M_FTABLE, M_NOWAIT); 271 if (fp == NULL) { 272 goto dropfrag; 273 } 274 ip_nfragpackets++; 275 TAILQ_INIT(&fp->ipq_fragq); 276 fp->ipq_nfrags = 1; 277 fp->ipq_ttl = IPFRAGTTL; 278 fp->ipq_p = ip->ip_p; 279 fp->ipq_id = ip->ip_id; 280 fp->ipq_tos = ip->ip_tos; 281 fp->ipq_src = ip->ip_src; 282 fp->ipq_dst = ip->ip_dst; 283 LIST_INSERT_HEAD(&ip_frags[hash], fp, ipq_q); 284 p = NULL; 285 goto insert; 286 } else { 287 fp->ipq_nfrags++; 288 } 289 290 /* 291 * Find a segment which begins after this one does. 292 */ 293 TAILQ_FOREACH(q, &fp->ipq_fragq, ipqe_q) { 294 if (ntohs(q->ipqe_ip->ip_off) > ntohs(ip->ip_off)) 295 break; 296 } 297 if (q != NULL) { 298 p = TAILQ_PREV(q, ipfr_qent_head, ipqe_q); 299 } else { 300 p = TAILQ_LAST(&fp->ipq_fragq, ipfr_qent_head); 301 } 302 303 /* 304 * If there is a preceding segment, it may provide some of our 305 * data already. If so, drop the data from the incoming segment. 306 * If it provides all of our data, drop us. 307 */ 308 if (p != NULL) { 309 i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) - 310 ntohs(ip->ip_off); 311 if (i > 0) { 312 if (i >= ntohs(ip->ip_len)) { 313 goto dropfrag; 314 } 315 m_adj(ipqe->ipqe_m, i); 316 ip->ip_off = htons(ntohs(ip->ip_off) + i); 317 ip->ip_len = htons(ntohs(ip->ip_len) - i); 318 } 319 } 320 321 /* 322 * While we overlap succeeding segments trim them or, if they are 323 * completely covered, dequeue them. 324 */ 325 while (q != NULL) { 326 size_t end; 327 328 qip = q->ipqe_ip; 329 end = ntohs(ip->ip_off) + ntohs(ip->ip_len); 330 if (end <= ntohs(qip->ip_off)) { 331 break; 332 } 333 i = end - ntohs(qip->ip_off); 334 if (i < ntohs(qip->ip_len)) { 335 qip->ip_len = htons(ntohs(qip->ip_len) - i); 336 qip->ip_off = htons(ntohs(qip->ip_off) + i); 337 m_adj(q->ipqe_m, i); 338 break; 339 } 340 nq = TAILQ_NEXT(q, ipqe_q); 341 m_freem(q->ipqe_m); 342 TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); 343 pool_cache_put(ipfren_cache, q); 344 fp->ipq_nfrags--; 345 ip_nfrags--; 346 q = nq; 347 } 348 349 insert: 350 /* 351 * Stick new segment in its place; check for complete reassembly. 352 */ 353 if (p == NULL) { 354 TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q); 355 } else { 356 TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q); 357 } 358 next = 0; 359 TAILQ_FOREACH(q, &fp->ipq_fragq, ipqe_q) { 360 qip = q->ipqe_ip; 361 if (ntohs(qip->ip_off) != next) { 362 mutex_exit(&ipfr_lock); 363 return NULL; 364 } 365 next += ntohs(qip->ip_len); 366 } 367 p = TAILQ_LAST(&fp->ipq_fragq, ipfr_qent_head); 368 if (p->ipqe_mff) { 369 mutex_exit(&ipfr_lock); 370 return NULL; 371 } 372 373 /* 374 * Reassembly is complete. Check for a bogus message size. 375 */ 376 q = TAILQ_FIRST(&fp->ipq_fragq); 377 ip = q->ipqe_ip; 378 if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) { 379 IP_STATINC(IP_STAT_TOOLONG); 380 ip_freef(fp); 381 mutex_exit(&ipfr_lock); 382 return NULL; 383 } 384 LIST_REMOVE(fp, ipq_q); 385 ip_nfrags -= fp->ipq_nfrags; 386 ip_nfragpackets--; 387 mutex_exit(&ipfr_lock); 388 389 /* Concatenate all fragments. */ 390 m = q->ipqe_m; 391 t = m->m_next; 392 m->m_next = NULL; 393 m_cat(m, t); 394 nq = TAILQ_NEXT(q, ipqe_q); 395 pool_cache_put(ipfren_cache, q); 396 397 for (q = nq; q != NULL; q = nq) { 398 t = q->ipqe_m; 399 nq = TAILQ_NEXT(q, ipqe_q); 400 pool_cache_put(ipfren_cache, q); 401 m_cat(m, t); 402 } 403 404 /* 405 * Create header for new packet by modifying header of first 406 * packet. Dequeue and discard fragment reassembly header. Make 407 * header visible. 408 */ 409 ip->ip_len = htons((ip->ip_hl << 2) + next); 410 ip->ip_src = fp->ipq_src; 411 ip->ip_dst = fp->ipq_dst; 412 free(fp, M_FTABLE); 413 414 m->m_len += (ip->ip_hl << 2); 415 m->m_data -= (ip->ip_hl << 2); 416 417 /* Fix up mbuf. XXX This should be done elsewhere. */ 418 if (m->m_flags & M_PKTHDR) { 419 int plen = 0; 420 for (t = m; t; t = t->m_next) { 421 plen += t->m_len; 422 } 423 m->m_pkthdr.len = plen; 424 m->m_pkthdr.csum_flags = 0; 425 } 426 return m; 427 428 dropfrag: 429 if (fp != NULL) { 430 fp->ipq_nfrags--; 431 } 432 ip_nfrags--; 433 IP_STATINC(IP_STAT_FRAGDROPPED); 434 mutex_exit(&ipfr_lock); 435 436 pool_cache_put(ipfren_cache, ipqe); 437 m_freem(m); 438 return NULL; 439 } 440 441 /* 442 * ip_freef: 443 * 444 * Free a fragment reassembly header and all associated datagrams. 445 */ 446 static void 447 ip_freef(ipfr_queue_t *fp) 448 { 449 ipfr_qent_t *q; 450 451 KASSERT(mutex_owned(&ipfr_lock)); 452 453 LIST_REMOVE(fp, ipq_q); 454 ip_nfrags -= fp->ipq_nfrags; 455 ip_nfragpackets--; 456 457 while ((q = TAILQ_FIRST(&fp->ipq_fragq)) != NULL) { 458 TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); 459 m_freem(q->ipqe_m); 460 pool_cache_put(ipfren_cache, q); 461 } 462 free(fp, M_FTABLE); 463 } 464 465 /* 466 * ip_reass_ttl_decr: 467 * 468 * Decrement TTL of all reasembly queue entries by `ticks'. Count 469 * number of distinct fragments (as opposed to partial, fragmented 470 * datagrams) inthe reassembly queue. While we traverse the entire 471 * reassembly queue, compute and return the median TTL over all 472 * fragments. 473 */ 474 static u_int 475 ip_reass_ttl_decr(u_int ticks) 476 { 477 u_int nfrags, median, dropfraction, keepfraction; 478 ipfr_queue_t *fp, *nfp; 479 int i; 480 481 nfrags = 0; 482 memset(fragttl_histo, 0, sizeof(fragttl_histo)); 483 484 for (i = 0; i < IPREASS_HASH_SIZE; i++) { 485 for (fp = LIST_FIRST(&ip_frags[i]); fp != NULL; fp = nfp) { 486 fp->ipq_ttl = ((fp->ipq_ttl <= ticks) ? 487 0 : fp->ipq_ttl - ticks); 488 nfp = LIST_NEXT(fp, ipq_q); 489 if (fp->ipq_ttl == 0) { 490 IP_STATINC(IP_STAT_FRAGTIMEOUT); 491 ip_freef(fp); 492 } else { 493 nfrags += fp->ipq_nfrags; 494 fragttl_histo[fp->ipq_ttl] += fp->ipq_nfrags; 495 } 496 } 497 } 498 499 KASSERT(ip_nfrags == nfrags); 500 501 /* Find median (or other drop fraction) in histogram. */ 502 dropfraction = (ip_nfrags / 2); 503 keepfraction = ip_nfrags - dropfraction; 504 for (i = IPFRAGTTL, median = 0; i >= 0; i--) { 505 median += fragttl_histo[i]; 506 if (median >= keepfraction) 507 break; 508 } 509 510 /* Return TTL of median (or other fraction). */ 511 return (u_int)i; 512 } 513 514 static void 515 ip_reass_drophalf(void) 516 { 517 u_int median_ticks; 518 519 KASSERT(mutex_owned(&ipfr_lock)); 520 521 /* 522 * Compute median TTL of all fragments, and count frags 523 * with that TTL or lower (roughly half of all fragments). 524 */ 525 median_ticks = ip_reass_ttl_decr(0); 526 527 /* Drop half. */ 528 median_ticks = ip_reass_ttl_decr(median_ticks); 529 } 530 531 /* 532 * ip_reass_drain: drain off all datagram fragments. Do not acquire 533 * softnet_lock as can be called from hardware interrupt context. 534 */ 535 void 536 ip_reass_drain(void) 537 { 538 539 /* 540 * We may be called from a device's interrupt context. If 541 * the ipq is already busy, just bail out now. 542 */ 543 if (mutex_tryenter(&ipfr_lock)) { 544 /* 545 * Drop half the total fragments now. If more mbufs are 546 * needed, we will be called again soon. 547 */ 548 ip_reass_drophalf(); 549 mutex_exit(&ipfr_lock); 550 } 551 } 552 553 /* 554 * ip_reass_slowtimo: 555 * 556 * If a timer expires on a reassembly queue, discard it. 557 */ 558 void 559 ip_reass_slowtimo(void) 560 { 561 static u_int dropscanidx = 0; 562 u_int i, median_ttl; 563 564 mutex_enter(&ipfr_lock); 565 566 /* Age TTL of all fragments by 1 tick .*/ 567 median_ttl = ip_reass_ttl_decr(1); 568 569 /* Make sure fragment limit is up-to-date. */ 570 CHECK_NMBCLUSTER_PARAMS(); 571 572 /* If we have too many fragments, drop the older half. */ 573 if (ip_nfrags > ip_maxfrags) { 574 ip_reass_ttl_decr(median_ttl); 575 } 576 577 /* 578 * If we are over the maximum number of fragmented packets (due to 579 * the limit being lowered), drain off enough to get down to the 580 * new limit. Start draining from the reassembly hashqueue most 581 * recently drained. 582 */ 583 if (ip_maxfragpackets < 0) 584 ; 585 else { 586 int wrapped = 0; 587 588 i = dropscanidx; 589 while (ip_nfragpackets > ip_maxfragpackets && wrapped == 0) { 590 while (LIST_FIRST(&ip_frags[i]) != NULL) { 591 ip_freef(LIST_FIRST(&ip_frags[i])); 592 } 593 if (++i >= IPREASS_HASH_SIZE) { 594 i = 0; 595 } 596 /* 597 * Do not scan forever even if fragment counters are 598 * wrong: stop after scanning entire reassembly queue. 599 */ 600 if (i == dropscanidx) { 601 wrapped = 1; 602 } 603 } 604 dropscanidx = i; 605 } 606 mutex_exit(&ipfr_lock); 607 } 608 609 /* 610 * ip_reass_packet: generic routine to perform IP reassembly. 611 * 612 * => Passed fragment should have IP_MF flag and/or offset set. 613 * => Fragment should not have other than IP_MF flags set. 614 * 615 * => Returns 0 on success or error otherwise. 616 * => On complete, m0 represents a constructed final packet. 617 */ 618 int 619 ip_reass_packet(struct mbuf **m0, struct ip *ip) 620 { 621 const int hlen = ip->ip_hl << 2; 622 const int len = ntohs(ip->ip_len); 623 struct mbuf *m = *m0; 624 ipfr_queue_t *fp; 625 ipfr_qent_t *ipqe; 626 u_int hash, off, flen; 627 bool mff; 628 629 /* 630 * Prevent TCP blind data attacks by not allowing non-initial 631 * fragments to start at less than 68 bytes (minimal fragment 632 * size) and making sure the first fragment is at least 68 633 * bytes. 634 */ 635 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 636 if ((off > 0 ? off + hlen : len) < IP_MINFRAGSIZE - 1) { 637 IP_STATINC(IP_STAT_BADFRAGS); 638 return EINVAL; 639 } 640 641 /* 642 * Fragment length and MF flag. Make sure that fragments have 643 * a data length which is non-zero and multiple of 8 bytes. 644 */ 645 flen = ntohs(ip->ip_len) - hlen; 646 mff = (ip->ip_off & htons(IP_MF)) != 0; 647 if (mff && (flen == 0 || (flen & 0x7) != 0)) { 648 IP_STATINC(IP_STAT_BADFRAGS); 649 return EINVAL; 650 } 651 652 /* 653 * Adjust total IP length to not reflect header and convert 654 * offset of this to bytes. XXX: clobbers struct ip. 655 */ 656 ip->ip_len = htons(flen); 657 ip->ip_off = htons(off); 658 659 /* Look for queue of fragments of this datagram. */ 660 mutex_enter(&ipfr_lock); 661 hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); 662 LIST_FOREACH(fp, &ip_frags[hash], ipq_q) { 663 if (ip->ip_id != fp->ipq_id) 664 continue; 665 if (!in_hosteq(ip->ip_src, fp->ipq_src)) 666 continue; 667 if (!in_hosteq(ip->ip_dst, fp->ipq_dst)) 668 continue; 669 if (ip->ip_p != fp->ipq_p) 670 continue; 671 break; 672 } 673 674 /* Make sure that TOS matches previous fragments. */ 675 if (fp && fp->ipq_tos != ip->ip_tos) { 676 IP_STATINC(IP_STAT_BADFRAGS); 677 mutex_exit(&ipfr_lock); 678 return EINVAL; 679 } 680 681 /* 682 * Create new entry and attempt to reassembly. 683 */ 684 IP_STATINC(IP_STAT_FRAGMENTS); 685 ipqe = pool_cache_get(ipfren_cache, PR_NOWAIT); 686 if (ipqe == NULL) { 687 IP_STATINC(IP_STAT_RCVMEMDROP); 688 mutex_exit(&ipfr_lock); 689 return ENOMEM; 690 } 691 ipqe->ipqe_mff = mff; 692 ipqe->ipqe_m = m; 693 ipqe->ipqe_ip = ip; 694 695 *m0 = ip_reass(ipqe, fp, hash); 696 if (*m0) { 697 /* Note that finally reassembled. */ 698 IP_STATINC(IP_STAT_REASSEMBLED); 699 } 700 return 0; 701 } 702