1 /* ip_input.c 6.4 84/08/21 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/mbuf.h" 6 #include "../h/domain.h" 7 #include "../h/protosw.h" 8 #include "../h/socket.h" 9 #include "../h/errno.h" 10 #include "../h/time.h" 11 #include "../h/kernel.h" 12 13 #include "../net/if.h" 14 #include "../net/route.h" 15 16 #include "../netinet/in.h" 17 #include "../netinet/in_pcb.h" 18 #include "../netinet/in_systm.h" 19 #include "../netinet/ip.h" 20 #include "../netinet/ip_var.h" 21 #include "../netinet/ip_icmp.h" 22 #include "../netinet/tcp.h" 23 24 u_char ip_protox[IPPROTO_MAX]; 25 int ipqmaxlen = IFQ_MAXLEN; 26 struct ifnet *ifinet; /* first inet interface */ 27 28 /* 29 * IP initialization: fill in IP protocol switch table. 30 * All protocols not implemented in kernel go to raw IP protocol handler. 31 */ 32 ip_init() 33 { 34 register struct protosw *pr; 35 register int i; 36 37 pr = pffindproto(PF_INET, IPPROTO_RAW); 38 if (pr == 0) 39 panic("ip_init"); 40 for (i = 0; i < IPPROTO_MAX; i++) 41 ip_protox[i] = pr - inetsw; 42 for (pr = inetdomain.dom_protosw; 43 pr <= inetdomain.dom_protoswNPROTOSW; pr++) 44 if (pr->pr_domain->dom_family == PF_INET && 45 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) 46 ip_protox[pr->pr_protocol] = pr - inetsw; 47 ipq.next = ipq.prev = &ipq; 48 ip_id = time.tv_sec & 0xffff; 49 ipintrq.ifq_maxlen = ipqmaxlen; 50 ifinet = if_ifwithaf(AF_INET); 51 } 52 53 u_char ipcksum = 1; 54 struct ip *ip_reass(); 55 struct sockaddr_in ipaddr = { AF_INET }; 56 57 /* 58 * Ip input routine. Checksum and byte swap header. If fragmented 59 * try to reassamble. If complete and fragment queue exists, discard. 60 * Process options. Pass to next level. 61 */ 62 ipintr() 63 { 64 register struct ip *ip; 65 register struct mbuf *m; 66 struct mbuf *m0; 67 register int i; 68 register struct ipq *fp; 69 int hlen, s; 70 71 next: 72 /* 73 * Get next datagram off input queue and get IP header 74 * in first mbuf. 75 */ 76 s = splimp(); 77 IF_DEQUEUE(&ipintrq, m); 78 splx(s); 79 if (m == 0) 80 return; 81 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) && 82 (m = m_pullup(m, sizeof (struct ip))) == 0) { 83 ipstat.ips_toosmall++; 84 goto next; 85 } 86 ip = mtod(m, struct ip *); 87 if ((hlen = ip->ip_hl << 2) > m->m_len) { 88 if ((m = m_pullup(m, hlen)) == 0) { 89 ipstat.ips_badhlen++; 90 goto next; 91 } 92 ip = mtod(m, struct ip *); 93 } 94 if (ipcksum) 95 if (ip->ip_sum = in_cksum(m, hlen)) { 96 ipstat.ips_badsum++; 97 goto bad; 98 } 99 100 /* 101 * Convert fields to host representation. 102 */ 103 ip->ip_len = ntohs((u_short)ip->ip_len); 104 if (ip->ip_len < hlen) { 105 ipstat.ips_badlen++; 106 goto bad; 107 } 108 ip->ip_id = ntohs(ip->ip_id); 109 ip->ip_off = ntohs((u_short)ip->ip_off); 110 111 /* 112 * Check that the amount of data in the buffers 113 * is as at least much as the IP header would have us expect. 114 * Trim mbufs if longer than we expect. 115 * Drop packet if shorter than we expect. 116 */ 117 i = -ip->ip_len; 118 m0 = m; 119 for (;;) { 120 i += m->m_len; 121 if (m->m_next == 0) 122 break; 123 m = m->m_next; 124 } 125 if (i != 0) { 126 if (i < 0) { 127 ipstat.ips_tooshort++; 128 goto bad; 129 } 130 if (i <= m->m_len) 131 m->m_len -= i; 132 else 133 m_adj(m0, -i); 134 } 135 m = m0; 136 137 /* 138 * Process options and, if not destined for us, 139 * ship it on. ip_dooptions returns 1 when an 140 * error was detected (causing an icmp message 141 * to be sent). 142 */ 143 if (hlen > sizeof (struct ip) && ip_dooptions(ip)) 144 goto next; 145 146 /* 147 * Fast check on the first internet 148 * interface in the list. 149 */ 150 if (ifinet) { 151 struct sockaddr_in *sin; 152 153 sin = (struct sockaddr_in *)&ifinet->if_addr; 154 if (sin->sin_addr.s_addr == ip->ip_dst.s_addr) 155 goto ours; 156 sin = (struct sockaddr_in *)&ifinet->if_broadaddr; 157 if ((ifinet->if_flags & IFF_BROADCAST) && 158 sin->sin_addr.s_addr == ip->ip_dst.s_addr) 159 goto ours; 160 } 161 /* BEGIN GROT */ 162 #include "nd.h" 163 #if NND > 0 164 /* 165 * Diskless machines don't initially know 166 * their address, so take packets from them 167 * if we're acting as a network disk server. 168 */ 169 if (in_netof(ip->ip_dst) == INADDR_ANY && 170 (in_netof(ip->ip_src) == INADDR_ANY && 171 in_lnaof(ip->ip_src) != INADDR_ANY)) 172 goto ours; 173 #endif 174 /* END GROT */ 175 ipaddr.sin_addr = ip->ip_dst; 176 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) { 177 ip_forward(ip); 178 goto next; 179 } 180 181 ours: 182 /* 183 * Look for queue of fragments 184 * of this datagram. 185 */ 186 for (fp = ipq.next; fp != &ipq; fp = fp->next) 187 if (ip->ip_id == fp->ipq_id && 188 ip->ip_src.s_addr == fp->ipq_src.s_addr && 189 ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 190 ip->ip_p == fp->ipq_p) 191 goto found; 192 fp = 0; 193 found: 194 195 /* 196 * Adjust ip_len to not reflect header, 197 * set ip_mff if more fragments are expected, 198 * convert offset of this to bytes. 199 */ 200 ip->ip_len -= hlen; 201 ((struct ipasfrag *)ip)->ipf_mff = 0; 202 if (ip->ip_off & IP_MF) 203 ((struct ipasfrag *)ip)->ipf_mff = 1; 204 ip->ip_off <<= 3; 205 206 /* 207 * If datagram marked as having more fragments 208 * or if this is not the first fragment, 209 * attempt reassembly; if it succeeds, proceed. 210 */ 211 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) { 212 ip = ip_reass((struct ipasfrag *)ip, fp); 213 if (ip == 0) 214 goto next; 215 hlen = ip->ip_hl << 2; 216 m = dtom(ip); 217 } else 218 if (fp) 219 ip_freef(fp); 220 221 /* 222 * Switch out to protocol's input routine. 223 */ 224 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m); 225 goto next; 226 bad: 227 m_freem(m); 228 goto next; 229 } 230 231 /* 232 * Take incoming datagram fragment and try to 233 * reassemble it into whole datagram. If a chain for 234 * reassembly of this datagram already exists, then it 235 * is given as fp; otherwise have to make a chain. 236 */ 237 struct ip * 238 ip_reass(ip, fp) 239 register struct ipasfrag *ip; 240 register struct ipq *fp; 241 { 242 register struct mbuf *m = dtom(ip); 243 register struct ipasfrag *q; 244 struct mbuf *t; 245 int hlen = ip->ip_hl << 2; 246 int i, next; 247 248 /* 249 * Presence of header sizes in mbufs 250 * would confuse code below. 251 */ 252 m->m_off += hlen; 253 m->m_len -= hlen; 254 255 /* 256 * If first fragment to arrive, create a reassembly queue. 257 */ 258 if (fp == 0) { 259 if ((t = m_get(M_WAIT, MT_FTABLE)) == NULL) 260 goto dropfrag; 261 fp = mtod(t, struct ipq *); 262 insque(fp, &ipq); 263 fp->ipq_ttl = IPFRAGTTL; 264 fp->ipq_p = ip->ip_p; 265 fp->ipq_id = ip->ip_id; 266 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp; 267 fp->ipq_src = ((struct ip *)ip)->ip_src; 268 fp->ipq_dst = ((struct ip *)ip)->ip_dst; 269 q = (struct ipasfrag *)fp; 270 goto insert; 271 } 272 273 /* 274 * Find a segment which begins after this one does. 275 */ 276 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) 277 if (q->ip_off > ip->ip_off) 278 break; 279 280 /* 281 * If there is a preceding segment, it may provide some of 282 * our data already. If so, drop the data from the incoming 283 * segment. If it provides all of our data, drop us. 284 */ 285 if (q->ipf_prev != (struct ipasfrag *)fp) { 286 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off; 287 if (i > 0) { 288 if (i >= ip->ip_len) 289 goto dropfrag; 290 m_adj(dtom(ip), i); 291 ip->ip_off += i; 292 ip->ip_len -= i; 293 } 294 } 295 296 /* 297 * While we overlap succeeding segments trim them or, 298 * if they are completely covered, dequeue them. 299 */ 300 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) { 301 i = (ip->ip_off + ip->ip_len) - q->ip_off; 302 if (i < q->ip_len) { 303 q->ip_len -= i; 304 q->ip_off += i; 305 m_adj(dtom(q), i); 306 break; 307 } 308 q = q->ipf_next; 309 m_freem(dtom(q->ipf_prev)); 310 ip_deq(q->ipf_prev); 311 } 312 313 insert: 314 /* 315 * Stick new segment in its place; 316 * check for complete reassembly. 317 */ 318 ip_enq(ip, q->ipf_prev); 319 next = 0; 320 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) { 321 if (q->ip_off != next) 322 return (0); 323 next += q->ip_len; 324 } 325 if (q->ipf_prev->ipf_mff) 326 return (0); 327 328 /* 329 * Reassembly is complete; concatenate fragments. 330 */ 331 q = fp->ipq_next; 332 m = dtom(q); 333 t = m->m_next; 334 m->m_next = 0; 335 m_cat(m, t); 336 q = q->ipf_next; 337 while (q != (struct ipasfrag *)fp) { 338 t = dtom(q); 339 q = q->ipf_next; 340 m_cat(m, t); 341 } 342 343 /* 344 * Create header for new ip packet by 345 * modifying header of first packet; 346 * dequeue and discard fragment reassembly header. 347 * Make header visible. 348 */ 349 ip = fp->ipq_next; 350 ip->ip_len = next; 351 ((struct ip *)ip)->ip_src = fp->ipq_src; 352 ((struct ip *)ip)->ip_dst = fp->ipq_dst; 353 remque(fp); 354 (void) m_free(dtom(fp)); 355 m = dtom(ip); 356 m->m_len += sizeof (struct ipasfrag); 357 m->m_off -= sizeof (struct ipasfrag); 358 return ((struct ip *)ip); 359 360 dropfrag: 361 m_freem(m); 362 return (0); 363 } 364 365 /* 366 * Free a fragment reassembly header and all 367 * associated datagrams. 368 */ 369 ip_freef(fp) 370 struct ipq *fp; 371 { 372 register struct ipasfrag *q, *p; 373 374 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) { 375 p = q->ipf_next; 376 ip_deq(q); 377 m_freem(dtom(q)); 378 } 379 remque(fp); 380 (void) m_free(dtom(fp)); 381 } 382 383 /* 384 * Put an ip fragment on a reassembly chain. 385 * Like insque, but pointers in middle of structure. 386 */ 387 ip_enq(p, prev) 388 register struct ipasfrag *p, *prev; 389 { 390 391 p->ipf_prev = prev; 392 p->ipf_next = prev->ipf_next; 393 prev->ipf_next->ipf_prev = p; 394 prev->ipf_next = p; 395 } 396 397 /* 398 * To ip_enq as remque is to insque. 399 */ 400 ip_deq(p) 401 register struct ipasfrag *p; 402 { 403 404 p->ipf_prev->ipf_next = p->ipf_next; 405 p->ipf_next->ipf_prev = p->ipf_prev; 406 } 407 408 /* 409 * IP timer processing; 410 * if a timer expires on a reassembly 411 * queue, discard it. 412 */ 413 ip_slowtimo() 414 { 415 register struct ipq *fp; 416 int s = splnet(); 417 418 fp = ipq.next; 419 if (fp == 0) { 420 splx(s); 421 return; 422 } 423 while (fp != &ipq) { 424 --fp->ipq_ttl; 425 fp = fp->next; 426 if (fp->prev->ipq_ttl == 0) 427 ip_freef(fp->prev); 428 } 429 splx(s); 430 } 431 432 /* 433 * Drain off all datagram fragments. 434 */ 435 ip_drain() 436 { 437 438 while (ipq.next != &ipq) 439 ip_freef(ipq.next); 440 } 441 442 /* 443 * Do option processing on a datagram, 444 * possibly discarding it if bad options 445 * are encountered. 446 */ 447 ip_dooptions(ip) 448 struct ip *ip; 449 { 450 register u_char *cp; 451 int opt, optlen, cnt, code, type; 452 struct in_addr *sin; 453 register struct ip_timestamp *ipt; 454 register struct ifnet *ifp; 455 struct in_addr t; 456 457 cp = (u_char *)(ip + 1); 458 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 459 for (; cnt > 0; cnt -= optlen, cp += optlen) { 460 opt = cp[0]; 461 if (opt == IPOPT_EOL) 462 break; 463 if (opt == IPOPT_NOP) 464 optlen = 1; 465 else { 466 optlen = cp[1]; 467 if (optlen <= 0) 468 break; 469 } 470 switch (opt) { 471 472 default: 473 break; 474 475 /* 476 * Source routing with record. 477 * Find interface with current destination address. 478 * If none on this machine then drop if strictly routed, 479 * or do nothing if loosely routed. 480 * Record interface address and bring up next address 481 * component. If strictly routed make sure next 482 * address on directly accessible net. 483 */ 484 case IPOPT_LSRR: 485 case IPOPT_SSRR: 486 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1)) 487 break; 488 sin = (struct in_addr *)(cp + cp[2]); 489 ipaddr.sin_addr = *sin; 490 ifp = if_ifwithaddr((struct sockaddr *)&ipaddr); 491 type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL; 492 if (ifp == 0) { 493 if (opt == IPOPT_SSRR) 494 goto bad; 495 break; 496 } 497 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t; 498 cp[2] += 4; 499 if (cp[2] > optlen - (sizeof (long) - 1)) 500 break; 501 ip->ip_dst = sin[1]; 502 if (opt == IPOPT_SSRR && 503 if_ifonnetof(in_netof(ip->ip_dst)) == 0) 504 goto bad; 505 break; 506 507 case IPOPT_TS: 508 code = cp - (u_char *)ip; 509 type = ICMP_PARAMPROB; 510 ipt = (struct ip_timestamp *)cp; 511 if (ipt->ipt_len < 5) 512 goto bad; 513 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) { 514 if (++ipt->ipt_oflw == 0) 515 goto bad; 516 break; 517 } 518 sin = (struct in_addr *)(cp+cp[2]); 519 switch (ipt->ipt_flg) { 520 521 case IPOPT_TS_TSONLY: 522 break; 523 524 case IPOPT_TS_TSANDADDR: 525 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 526 goto bad; 527 if (ifinet == 0) 528 goto bad; /* ??? */ 529 *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr; 530 break; 531 532 case IPOPT_TS_PRESPEC: 533 ipaddr.sin_addr = *sin; 534 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) 535 continue; 536 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 537 goto bad; 538 ipt->ipt_ptr += 4; 539 break; 540 541 default: 542 goto bad; 543 } 544 *(n_time *)sin = iptime(); 545 ipt->ipt_ptr += 4; 546 } 547 } 548 return (0); 549 bad: 550 icmp_error(ip, type, code); 551 return (1); 552 } 553 554 /* 555 * Strip out IP options, at higher 556 * level protocol in the kernel. 557 * Second argument is buffer to which options 558 * will be moved, and return value is their length. 559 */ 560 ip_stripoptions(ip, mopt) 561 struct ip *ip; 562 struct mbuf *mopt; 563 { 564 register int i; 565 register struct mbuf *m; 566 int olen; 567 568 olen = (ip->ip_hl<<2) - sizeof (struct ip); 569 m = dtom(ip); 570 ip++; 571 if (mopt) { 572 mopt->m_len = olen; 573 mopt->m_off = MMINOFF; 574 bcopy((caddr_t)ip, mtod(mopt, caddr_t), (unsigned)olen); 575 } 576 i = m->m_len - (sizeof (struct ip) + olen); 577 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i); 578 m->m_len -= olen; 579 } 580 581 u_char inetctlerrmap[PRC_NCMDS] = { 582 ECONNABORTED, ECONNABORTED, 0, 0, 583 0, 0, EHOSTDOWN, EHOSTUNREACH, 584 ENETUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 585 EMSGSIZE, 0, 0, 0, 586 0, 0, 0, 0 587 }; 588 589 ip_ctlinput(cmd, arg) 590 int cmd; 591 caddr_t arg; 592 { 593 struct in_addr *in; 594 int tcp_abort(), udp_abort(); 595 extern struct inpcb tcb, udb; 596 597 if (cmd < 0 || cmd > PRC_NCMDS) 598 return; 599 if (inetctlerrmap[cmd] == 0) 600 return; /* XXX */ 601 if (cmd == PRC_IFDOWN) 602 in = &((struct sockaddr_in *)arg)->sin_addr; 603 else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH) 604 in = (struct in_addr *)arg; 605 else 606 in = &((struct icmp *)arg)->icmp_ip.ip_dst; 607 /* THIS IS VERY QUESTIONABLE, SHOULD HIT ALL PROTOCOLS */ 608 in_pcbnotify(&tcb, in, (int)inetctlerrmap[cmd], tcp_abort); 609 in_pcbnotify(&udb, in, (int)inetctlerrmap[cmd], udp_abort); 610 } 611 612 int ipprintfs = 0; 613 int ipforwarding = 1; 614 /* 615 * Forward a packet. If some error occurs return the sender 616 * and icmp packet. Note we can't always generate a meaningful 617 * icmp message because icmp doesn't have a large enough repetoire 618 * of codes and types. 619 */ 620 ip_forward(ip) 621 register struct ip *ip; 622 { 623 register int error, type, code; 624 struct mbuf *mopt, *mcopy; 625 626 if (ipprintfs) 627 printf("forward: src %x dst %x ttl %x\n", ip->ip_src, 628 ip->ip_dst, ip->ip_ttl); 629 if (ipforwarding == 0) { 630 /* can't tell difference between net and host */ 631 type = ICMP_UNREACH, code = ICMP_UNREACH_NET; 632 goto sendicmp; 633 } 634 if (ip->ip_ttl < IPTTLDEC) { 635 type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS; 636 goto sendicmp; 637 } 638 ip->ip_ttl -= IPTTLDEC; 639 mopt = m_get(M_DONTWAIT, MT_DATA); 640 if (mopt == NULL) { 641 m_freem(dtom(ip)); 642 return; 643 } 644 645 /* 646 * Save at most 64 bytes of the packet in case 647 * we need to generate an ICMP message to the src. 648 */ 649 mcopy = m_copy(dtom(ip), 0, imin(ip->ip_len, 64)); 650 ip_stripoptions(ip, mopt); 651 652 error = ip_output(dtom(ip), mopt, (struct route *)0, IP_FORWARDING); 653 if (error == 0) { 654 if (mcopy) 655 m_freem(mcopy); 656 return; 657 } 658 if (mcopy == NULL) 659 return; 660 ip = mtod(mcopy, struct ip *); 661 type = ICMP_UNREACH, code = 0; /* need ``undefined'' */ 662 switch (error) { 663 664 case ENETUNREACH: 665 case ENETDOWN: 666 code = ICMP_UNREACH_NET; 667 break; 668 669 case EMSGSIZE: 670 code = ICMP_UNREACH_NEEDFRAG; 671 break; 672 673 case EPERM: 674 code = ICMP_UNREACH_PORT; 675 break; 676 677 case ENOBUFS: 678 type = ICMP_SOURCEQUENCH; 679 break; 680 681 case EHOSTDOWN: 682 case EHOSTUNREACH: 683 code = ICMP_UNREACH_HOST; 684 break; 685 } 686 sendicmp: 687 icmp_error(ip, type, code); 688 } 689