1 /* $NetBSD: ip_reass.c,v 1.9 2014/02/25 18:30:12 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 32 */ 33 34 /* 35 * IP reassembly. 36 * 37 * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for IP 38 * reassembly queue buffer managment. 39 * 40 * We keep a count of total IP fragments (NB: not fragmented packets), 41 * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments. 42 * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the total 43 * fragments in reassembly queues. This AIMD policy avoids repeatedly 44 * deleting single packets under heavy fragmentation load (e.g., from lossy 45 * NFS peers). 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: ip_reass.c,v 1.9 2014/02/25 18:30:12 pooka Exp $"); 50 51 #include <sys/param.h> 52 #include <sys/types.h> 53 54 #include <sys/malloc.h> 55 #include <sys/mbuf.h> 56 #include <sys/mutex.h> 57 #include <sys/domain.h> 58 #include <sys/protosw.h> 59 #include <sys/pool.h> 60 #include <sys/queue.h> 61 #include <sys/sysctl.h> 62 #include <sys/systm.h> 63 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <netinet/in.h> 68 #include <netinet/in_systm.h> 69 #include <netinet/ip.h> 70 #include <netinet/in_pcb.h> 71 #include <netinet/ip_var.h> 72 #include <netinet/in_proto.h> 73 #include <netinet/ip_private.h> 74 #include <netinet/in_var.h> 75 76 /* 77 * IP reassembly queue structures. Each fragment being reassembled is 78 * attached to one of these structures. They are timed out after TTL 79 * drops to 0, and may also be reclaimed if memory becomes tight. 80 */ 81 82 typedef struct ipfr_qent { 83 TAILQ_ENTRY(ipfr_qent) ipqe_q; 84 struct ip * ipqe_ip; 85 struct mbuf * ipqe_m; 86 bool ipqe_mff; 87 } ipfr_qent_t; 88 89 TAILQ_HEAD(ipfr_qent_head, ipfr_qent); 90 91 typedef struct ipfr_queue { 92 LIST_ENTRY(ipfr_queue) ipq_q; /* to other reass headers */ 93 struct ipfr_qent_head ipq_fragq; /* queue of fragment entries */ 94 uint8_t ipq_ttl; /* time for reass q to live */ 95 uint8_t ipq_p; /* protocol of this fragment */ 96 uint16_t ipq_id; /* sequence id for reassembly */ 97 struct in_addr ipq_src; 98 struct in_addr ipq_dst; 99 uint16_t ipq_nfrags; /* frags in this queue entry */ 100 uint8_t ipq_tos; /* TOS of this fragment */ 101 } ipfr_queue_t; 102 103 /* 104 * Hash table of IP reassembly queues. 105 */ 106 #define IPREASS_HASH_SHIFT 6 107 #define IPREASS_HASH_SIZE (1 << IPREASS_HASH_SHIFT) 108 #define IPREASS_HASH_MASK (IPREASS_HASH_SIZE - 1) 109 #define IPREASS_HASH(x, y) \ 110 (((((x) & 0xf) | ((((x) >> 8) & 0xf) << 4)) ^ (y)) & IPREASS_HASH_MASK) 111 112 static LIST_HEAD(, ipfr_queue) ip_frags[IPREASS_HASH_SIZE]; 113 static pool_cache_t ipfren_cache; 114 static kmutex_t ipfr_lock; 115 116 /* Number of packets in reassembly queue and total number of fragments. */ 117 static int ip_nfragpackets; 118 static int ip_nfrags; 119 120 /* Limits on packet and fragments. */ 121 static int ip_maxfragpackets; 122 static int ip_maxfrags; 123 124 /* 125 * Cached copy of nmbclusters. If nbclusters is different, recalculate 126 * IP parameters derived from nmbclusters. 127 */ 128 static int ip_nmbclusters; 129 130 /* 131 * IP reassembly TTL machinery for multiplicative drop. 132 */ 133 static u_int fragttl_histo[IPFRAGTTL + 1]; 134 135 static struct sysctllog *ip_reass_sysctllog; 136 137 void sysctl_ip_reass_setup(void); 138 static void ip_nmbclusters_changed(void); 139 140 static struct mbuf * ip_reass(ipfr_qent_t *, ipfr_queue_t *, u_int); 141 static u_int ip_reass_ttl_decr(u_int ticks); 142 static void ip_reass_drophalf(void); 143 static void ip_freef(ipfr_queue_t *); 144 145 /* 146 * ip_reass_init: 147 * 148 * Initialization of IP reassembly mechanism. 149 */ 150 void 151 ip_reass_init(void) 152 { 153 int i; 154 155 ipfren_cache = pool_cache_init(sizeof(ipfr_qent_t), coherency_unit, 156 0, 0, "ipfrenpl", NULL, IPL_NET, NULL, NULL, NULL); 157 mutex_init(&ipfr_lock, MUTEX_DEFAULT, IPL_VM); 158 159 for (i = 0; i < IPREASS_HASH_SIZE; i++) { 160 LIST_INIT(&ip_frags[i]); 161 } 162 ip_maxfragpackets = 200; 163 ip_maxfrags = 0; 164 ip_nmbclusters_changed(); 165 166 sysctl_ip_reass_setup(); 167 } 168 169 void 170 sysctl_ip_reass_setup(void) 171 { 172 173 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL, 174 CTLFLAG_PERMANENT, 175 CTLTYPE_NODE, "inet", 176 SYSCTL_DESCR("PF_INET related settings"), 177 NULL, 0, NULL, 0, 178 CTL_NET, PF_INET, CTL_EOL); 179 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL, 180 CTLFLAG_PERMANENT, 181 CTLTYPE_NODE, "ip", 182 SYSCTL_DESCR("IPv4 related settings"), 183 NULL, 0, NULL, 0, 184 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL); 185 186 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL, 187 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 188 CTLTYPE_INT, "maxfragpackets", 189 SYSCTL_DESCR("Maximum number of fragments to retain for " 190 "possible reassembly"), 191 NULL, 0, &ip_maxfragpackets, 0, 192 CTL_NET, PF_INET, IPPROTO_IP, IPCTL_MAXFRAGPACKETS, CTL_EOL); 193 } 194 195 #define CHECK_NMBCLUSTER_PARAMS() \ 196 do { \ 197 if (__predict_false(ip_nmbclusters != nmbclusters)) \ 198 ip_nmbclusters_changed(); \ 199 } while (/*CONSTCOND*/0) 200 201 /* 202 * Compute IP limits derived from the value of nmbclusters. 203 */ 204 static void 205 ip_nmbclusters_changed(void) 206 { 207 ip_maxfrags = nmbclusters / 4; 208 ip_nmbclusters = nmbclusters; 209 } 210 211 /* 212 * ip_reass: 213 * 214 * Take incoming datagram fragment and try to reassemble it into whole 215 * datagram. If a chain for reassembly of this datagram already exists, 216 * then it is given as 'fp'; otherwise have to make a chain. 217 */ 218 struct mbuf * 219 ip_reass(ipfr_qent_t *ipqe, ipfr_queue_t *fp, const u_int hash) 220 { 221 struct ip *ip = ipqe->ipqe_ip, *qip; 222 const int hlen = ip->ip_hl << 2; 223 struct mbuf *m = ipqe->ipqe_m, *t; 224 ipfr_qent_t *nq, *p, *q; 225 int i, next; 226 227 KASSERT(mutex_owned(&ipfr_lock)); 228 229 /* 230 * Presence of header sizes in mbufs would confuse code below. 231 */ 232 m->m_data += hlen; 233 m->m_len -= hlen; 234 235 #ifdef notyet 236 /* Make sure fragment limit is up-to-date. */ 237 CHECK_NMBCLUSTER_PARAMS(); 238 239 /* If we have too many fragments, drop the older half. */ 240 if (ip_nfrags >= ip_maxfrags) { 241 ip_reass_drophalf(void); 242 } 243 #endif 244 245 /* 246 * We are about to add a fragment; increment frag count. 247 */ 248 ip_nfrags++; 249 250 /* 251 * If first fragment to arrive, create a reassembly queue. 252 */ 253 if (fp == NULL) { 254 /* 255 * Enforce upper bound on number of fragmented packets 256 * for which we attempt reassembly: a) if maxfrag is 0, 257 * never accept fragments b) if maxfrag is -1, accept 258 * all fragments without limitation. 259 */ 260 if (ip_maxfragpackets < 0) 261 ; 262 else if (ip_nfragpackets >= ip_maxfragpackets) { 263 goto dropfrag; 264 } 265 fp = malloc(sizeof(ipfr_queue_t), M_FTABLE, M_NOWAIT); 266 if (fp == NULL) { 267 goto dropfrag; 268 } 269 ip_nfragpackets++; 270 TAILQ_INIT(&fp->ipq_fragq); 271 fp->ipq_nfrags = 1; 272 fp->ipq_ttl = IPFRAGTTL; 273 fp->ipq_p = ip->ip_p; 274 fp->ipq_id = ip->ip_id; 275 fp->ipq_tos = ip->ip_tos; 276 fp->ipq_src = ip->ip_src; 277 fp->ipq_dst = ip->ip_dst; 278 LIST_INSERT_HEAD(&ip_frags[hash], fp, ipq_q); 279 p = NULL; 280 goto insert; 281 } else { 282 fp->ipq_nfrags++; 283 } 284 285 /* 286 * Find a segment which begins after this one does. 287 */ 288 TAILQ_FOREACH(q, &fp->ipq_fragq, ipqe_q) { 289 if (ntohs(q->ipqe_ip->ip_off) > ntohs(ip->ip_off)) 290 break; 291 } 292 if (q != NULL) { 293 p = TAILQ_PREV(q, ipfr_qent_head, ipqe_q); 294 } else { 295 p = TAILQ_LAST(&fp->ipq_fragq, ipfr_qent_head); 296 } 297 298 /* 299 * If there is a preceding segment, it may provide some of our 300 * data already. If so, drop the data from the incoming segment. 301 * If it provides all of our data, drop us. 302 */ 303 if (p != NULL) { 304 i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) - 305 ntohs(ip->ip_off); 306 if (i > 0) { 307 if (i >= ntohs(ip->ip_len)) { 308 goto dropfrag; 309 } 310 m_adj(ipqe->ipqe_m, i); 311 ip->ip_off = htons(ntohs(ip->ip_off) + i); 312 ip->ip_len = htons(ntohs(ip->ip_len) - i); 313 } 314 } 315 316 /* 317 * While we overlap succeeding segments trim them or, if they are 318 * completely covered, dequeue them. 319 */ 320 while (q != NULL) { 321 size_t end; 322 323 qip = q->ipqe_ip; 324 end = ntohs(ip->ip_off) + ntohs(ip->ip_len); 325 if (end <= ntohs(qip->ip_off)) { 326 break; 327 } 328 i = end - ntohs(qip->ip_off); 329 if (i < ntohs(qip->ip_len)) { 330 qip->ip_len = htons(ntohs(qip->ip_len) - i); 331 qip->ip_off = htons(ntohs(qip->ip_off) + i); 332 m_adj(q->ipqe_m, i); 333 break; 334 } 335 nq = TAILQ_NEXT(q, ipqe_q); 336 m_freem(q->ipqe_m); 337 TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); 338 pool_cache_put(ipfren_cache, q); 339 fp->ipq_nfrags--; 340 ip_nfrags--; 341 q = nq; 342 } 343 344 insert: 345 /* 346 * Stick new segment in its place; check for complete reassembly. 347 */ 348 if (p == NULL) { 349 TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q); 350 } else { 351 TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q); 352 } 353 next = 0; 354 TAILQ_FOREACH(q, &fp->ipq_fragq, ipqe_q) { 355 qip = q->ipqe_ip; 356 if (ntohs(qip->ip_off) != next) { 357 mutex_exit(&ipfr_lock); 358 return NULL; 359 } 360 next += ntohs(qip->ip_len); 361 } 362 p = TAILQ_LAST(&fp->ipq_fragq, ipfr_qent_head); 363 if (p->ipqe_mff) { 364 mutex_exit(&ipfr_lock); 365 return NULL; 366 } 367 368 /* 369 * Reassembly is complete. Check for a bogus message size. 370 */ 371 q = TAILQ_FIRST(&fp->ipq_fragq); 372 ip = q->ipqe_ip; 373 if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) { 374 IP_STATINC(IP_STAT_TOOLONG); 375 ip_freef(fp); 376 mutex_exit(&ipfr_lock); 377 return NULL; 378 } 379 LIST_REMOVE(fp, ipq_q); 380 ip_nfrags -= fp->ipq_nfrags; 381 ip_nfragpackets--; 382 mutex_exit(&ipfr_lock); 383 384 /* Concatenate all fragments. */ 385 m = q->ipqe_m; 386 t = m->m_next; 387 m->m_next = NULL; 388 m_cat(m, t); 389 nq = TAILQ_NEXT(q, ipqe_q); 390 pool_cache_put(ipfren_cache, q); 391 392 for (q = nq; q != NULL; q = nq) { 393 t = q->ipqe_m; 394 nq = TAILQ_NEXT(q, ipqe_q); 395 pool_cache_put(ipfren_cache, q); 396 m_cat(m, t); 397 } 398 399 /* 400 * Create header for new packet by modifying header of first 401 * packet. Dequeue and discard fragment reassembly header. Make 402 * header visible. 403 */ 404 ip->ip_len = htons((ip->ip_hl << 2) + next); 405 ip->ip_src = fp->ipq_src; 406 ip->ip_dst = fp->ipq_dst; 407 free(fp, M_FTABLE); 408 409 m->m_len += (ip->ip_hl << 2); 410 m->m_data -= (ip->ip_hl << 2); 411 412 /* Fix up mbuf. XXX This should be done elsewhere. */ 413 if (m->m_flags & M_PKTHDR) { 414 int plen = 0; 415 for (t = m; t; t = t->m_next) { 416 plen += t->m_len; 417 } 418 m->m_pkthdr.len = plen; 419 m->m_pkthdr.csum_flags = 0; 420 } 421 return m; 422 423 dropfrag: 424 if (fp != NULL) { 425 fp->ipq_nfrags--; 426 } 427 ip_nfrags--; 428 IP_STATINC(IP_STAT_FRAGDROPPED); 429 mutex_exit(&ipfr_lock); 430 431 pool_cache_put(ipfren_cache, ipqe); 432 m_freem(m); 433 return NULL; 434 } 435 436 /* 437 * ip_freef: 438 * 439 * Free a fragment reassembly header and all associated datagrams. 440 */ 441 static void 442 ip_freef(ipfr_queue_t *fp) 443 { 444 ipfr_qent_t *q; 445 446 KASSERT(mutex_owned(&ipfr_lock)); 447 448 LIST_REMOVE(fp, ipq_q); 449 ip_nfrags -= fp->ipq_nfrags; 450 ip_nfragpackets--; 451 452 while ((q = TAILQ_FIRST(&fp->ipq_fragq)) != NULL) { 453 TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); 454 m_freem(q->ipqe_m); 455 pool_cache_put(ipfren_cache, q); 456 } 457 free(fp, M_FTABLE); 458 } 459 460 /* 461 * ip_reass_ttl_decr: 462 * 463 * Decrement TTL of all reasembly queue entries by `ticks'. Count 464 * number of distinct fragments (as opposed to partial, fragmented 465 * datagrams) inthe reassembly queue. While we traverse the entire 466 * reassembly queue, compute and return the median TTL over all 467 * fragments. 468 */ 469 static u_int 470 ip_reass_ttl_decr(u_int ticks) 471 { 472 u_int nfrags, median, dropfraction, keepfraction; 473 ipfr_queue_t *fp, *nfp; 474 int i; 475 476 nfrags = 0; 477 memset(fragttl_histo, 0, sizeof(fragttl_histo)); 478 479 for (i = 0; i < IPREASS_HASH_SIZE; i++) { 480 for (fp = LIST_FIRST(&ip_frags[i]); fp != NULL; fp = nfp) { 481 fp->ipq_ttl = ((fp->ipq_ttl <= ticks) ? 482 0 : fp->ipq_ttl - ticks); 483 nfp = LIST_NEXT(fp, ipq_q); 484 if (fp->ipq_ttl == 0) { 485 IP_STATINC(IP_STAT_FRAGTIMEOUT); 486 ip_freef(fp); 487 } else { 488 nfrags += fp->ipq_nfrags; 489 fragttl_histo[fp->ipq_ttl] += fp->ipq_nfrags; 490 } 491 } 492 } 493 494 KASSERT(ip_nfrags == nfrags); 495 496 /* Find median (or other drop fraction) in histogram. */ 497 dropfraction = (ip_nfrags / 2); 498 keepfraction = ip_nfrags - dropfraction; 499 for (i = IPFRAGTTL, median = 0; i >= 0; i--) { 500 median += fragttl_histo[i]; 501 if (median >= keepfraction) 502 break; 503 } 504 505 /* Return TTL of median (or other fraction). */ 506 return (u_int)i; 507 } 508 509 static void 510 ip_reass_drophalf(void) 511 { 512 u_int median_ticks; 513 514 KASSERT(mutex_owned(&ipfr_lock)); 515 516 /* 517 * Compute median TTL of all fragments, and count frags 518 * with that TTL or lower (roughly half of all fragments). 519 */ 520 median_ticks = ip_reass_ttl_decr(0); 521 522 /* Drop half. */ 523 median_ticks = ip_reass_ttl_decr(median_ticks); 524 } 525 526 /* 527 * ip_reass_drain: drain off all datagram fragments. Do not acquire 528 * softnet_lock as can be called from hardware interrupt context. 529 */ 530 void 531 ip_reass_drain(void) 532 { 533 534 /* 535 * We may be called from a device's interrupt context. If 536 * the ipq is already busy, just bail out now. 537 */ 538 if (mutex_tryenter(&ipfr_lock)) { 539 /* 540 * Drop half the total fragments now. If more mbufs are 541 * needed, we will be called again soon. 542 */ 543 ip_reass_drophalf(); 544 mutex_exit(&ipfr_lock); 545 } 546 } 547 548 /* 549 * ip_reass_slowtimo: 550 * 551 * If a timer expires on a reassembly queue, discard it. 552 */ 553 void 554 ip_reass_slowtimo(void) 555 { 556 static u_int dropscanidx = 0; 557 u_int i, median_ttl; 558 559 mutex_enter(&ipfr_lock); 560 561 /* Age TTL of all fragments by 1 tick .*/ 562 median_ttl = ip_reass_ttl_decr(1); 563 564 /* Make sure fragment limit is up-to-date. */ 565 CHECK_NMBCLUSTER_PARAMS(); 566 567 /* If we have too many fragments, drop the older half. */ 568 if (ip_nfrags > ip_maxfrags) { 569 ip_reass_ttl_decr(median_ttl); 570 } 571 572 /* 573 * If we are over the maximum number of fragmented packets (due to 574 * the limit being lowered), drain off enough to get down to the 575 * new limit. Start draining from the reassembly hashqueue most 576 * recently drained. 577 */ 578 if (ip_maxfragpackets < 0) 579 ; 580 else { 581 int wrapped = 0; 582 583 i = dropscanidx; 584 while (ip_nfragpackets > ip_maxfragpackets && wrapped == 0) { 585 while (LIST_FIRST(&ip_frags[i]) != NULL) { 586 ip_freef(LIST_FIRST(&ip_frags[i])); 587 } 588 if (++i >= IPREASS_HASH_SIZE) { 589 i = 0; 590 } 591 /* 592 * Do not scan forever even if fragment counters are 593 * wrong: stop after scanning entire reassembly queue. 594 */ 595 if (i == dropscanidx) { 596 wrapped = 1; 597 } 598 } 599 dropscanidx = i; 600 } 601 mutex_exit(&ipfr_lock); 602 } 603 604 /* 605 * ip_reass_packet: generic routine to perform IP reassembly. 606 * 607 * => Passed fragment should have IP_MF flag and/or offset set. 608 * => Fragment should not have other than IP_MF flags set. 609 * 610 * => Returns 0 on success or error otherwise. 611 * => On complete, m0 represents a constructed final packet. 612 */ 613 int 614 ip_reass_packet(struct mbuf **m0, struct ip *ip) 615 { 616 const int hlen = ip->ip_hl << 2; 617 const int len = ntohs(ip->ip_len); 618 struct mbuf *m = *m0; 619 ipfr_queue_t *fp; 620 ipfr_qent_t *ipqe; 621 u_int hash, off, flen; 622 bool mff; 623 624 /* 625 * Prevent TCP blind data attacks by not allowing non-initial 626 * fragments to start at less than 68 bytes (minimal fragment 627 * size) and making sure the first fragment is at least 68 628 * bytes. 629 */ 630 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 631 if ((off > 0 ? off + hlen : len) < IP_MINFRAGSIZE - 1) { 632 IP_STATINC(IP_STAT_BADFRAGS); 633 return EINVAL; 634 } 635 636 /* 637 * Fragment length and MF flag. Make sure that fragments have 638 * a data length which is non-zero and multiple of 8 bytes. 639 */ 640 flen = ntohs(ip->ip_len) - hlen; 641 mff = (ip->ip_off & htons(IP_MF)) != 0; 642 if (mff && (flen == 0 || (flen & 0x7) != 0)) { 643 IP_STATINC(IP_STAT_BADFRAGS); 644 return EINVAL; 645 } 646 647 /* 648 * Adjust total IP length to not reflect header and convert 649 * offset of this to bytes. XXX: clobbers struct ip. 650 */ 651 ip->ip_len = htons(flen); 652 ip->ip_off = htons(off); 653 654 /* Look for queue of fragments of this datagram. */ 655 mutex_enter(&ipfr_lock); 656 hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); 657 LIST_FOREACH(fp, &ip_frags[hash], ipq_q) { 658 if (ip->ip_id != fp->ipq_id) 659 continue; 660 if (!in_hosteq(ip->ip_src, fp->ipq_src)) 661 continue; 662 if (!in_hosteq(ip->ip_dst, fp->ipq_dst)) 663 continue; 664 if (ip->ip_p != fp->ipq_p) 665 continue; 666 break; 667 } 668 669 /* Make sure that TOS matches previous fragments. */ 670 if (fp && fp->ipq_tos != ip->ip_tos) { 671 IP_STATINC(IP_STAT_BADFRAGS); 672 mutex_exit(&ipfr_lock); 673 return EINVAL; 674 } 675 676 /* 677 * Create new entry and attempt to reassembly. 678 */ 679 IP_STATINC(IP_STAT_FRAGMENTS); 680 ipqe = pool_cache_get(ipfren_cache, PR_NOWAIT); 681 if (ipqe == NULL) { 682 IP_STATINC(IP_STAT_RCVMEMDROP); 683 mutex_exit(&ipfr_lock); 684 return ENOMEM; 685 } 686 ipqe->ipqe_mff = mff; 687 ipqe->ipqe_m = m; 688 ipqe->ipqe_ip = ip; 689 690 *m0 = ip_reass(ipqe, fp, hash); 691 if (*m0) { 692 /* Note that finally reassembled. */ 693 IP_STATINC(IP_STAT_REASSEMBLED); 694 } 695 return 0; 696 } 697