1 /* ip_input.c 1.68 83/05/15 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/mbuf.h" 6 #include "../h/domain.h" 7 #include "../h/protosw.h" 8 #include "../h/socket.h" 9 #include "../h/errno.h" 10 #include "../h/time.h" 11 #include "../h/kernel.h" 12 13 #include "../net/if.h" 14 #include "../net/route.h" 15 16 #include "../netinet/in.h" 17 #include "../netinet/in_pcb.h" 18 #include "../netinet/in_systm.h" 19 #include "../netinet/ip.h" 20 #include "../netinet/ip_var.h" 21 #include "../netinet/ip_icmp.h" 22 #include "../netinet/tcp.h" 23 24 u_char ip_protox[IPPROTO_MAX]; 25 int ipqmaxlen = IFQ_MAXLEN; 26 struct ifnet *ifinet; /* first inet interface */ 27 28 /* 29 * IP initialization: fill in IP protocol switch table. 30 * All protocols not implemented in kernel go to raw IP protocol handler. 31 */ 32 ip_init() 33 { 34 register struct protosw *pr; 35 register int i; 36 37 pr = pffindproto(PF_INET, IPPROTO_RAW); 38 if (pr == 0) 39 panic("ip_init"); 40 for (i = 0; i < IPPROTO_MAX; i++) 41 ip_protox[i] = pr - inetsw; 42 for (pr = inetdomain.dom_protosw; 43 pr <= inetdomain.dom_protoswNPROTOSW; pr++) 44 if (pr->pr_family == PF_INET && 45 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) 46 ip_protox[pr->pr_protocol] = pr - inetsw; 47 ipq.next = ipq.prev = &ipq; 48 ip_id = time.tv_sec & 0xffff; 49 ipintrq.ifq_maxlen = ipqmaxlen; 50 ifinet = if_ifwithaf(AF_INET); 51 } 52 53 u_char ipcksum = 1; 54 struct ip *ip_reass(); 55 struct sockaddr_in ipaddr = { AF_INET }; 56 57 /* 58 * Ip input routine. Checksum and byte swap header. If fragmented 59 * try to reassamble. If complete and fragment queue exists, discard. 60 * Process options. Pass to next level. 61 */ 62 ipintr() 63 { 64 register struct ip *ip; 65 register struct mbuf *m; 66 struct mbuf *m0; 67 register int i; 68 register struct ipq *fp; 69 int hlen, s; 70 71 next: 72 /* 73 * Get next datagram off input queue and get IP header 74 * in first mbuf. 75 */ 76 s = splimp(); 77 IF_DEQUEUE(&ipintrq, m); 78 splx(s); 79 if (m == 0) 80 return; 81 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) && 82 (m = m_pullup(m, sizeof (struct ip))) == 0) { 83 ipstat.ips_toosmall++; 84 goto next; 85 } 86 ip = mtod(m, struct ip *); 87 if ((hlen = ip->ip_hl << 2) > m->m_len) { 88 if ((m = m_pullup(m, hlen)) == 0) { 89 ipstat.ips_badhlen++; 90 goto next; 91 } 92 ip = mtod(m, struct ip *); 93 } 94 if (ipcksum) 95 if (ip->ip_sum = in_cksum(m, hlen)) { 96 ipstat.ips_badsum++; 97 goto bad; 98 } 99 100 /* 101 * Convert fields to host representation. 102 */ 103 ip->ip_len = ntohs((u_short)ip->ip_len); 104 if (ip->ip_len < hlen) { 105 ipstat.ips_badlen++; 106 goto bad; 107 } 108 ip->ip_id = ntohs(ip->ip_id); 109 ip->ip_off = ntohs((u_short)ip->ip_off); 110 111 /* 112 * Check that the amount of data in the buffers 113 * is as at least much as the IP header would have us expect. 114 * Trim mbufs if longer than we expect. 115 * Drop packet if shorter than we expect. 116 */ 117 i = -ip->ip_len; 118 m0 = m; 119 for (;;) { 120 i += m->m_len; 121 if (m->m_next == 0) 122 break; 123 m = m->m_next; 124 } 125 if (i != 0) { 126 if (i < 0) { 127 ipstat.ips_tooshort++; 128 goto bad; 129 } 130 if (i <= m->m_len) 131 m->m_len -= i; 132 else 133 m_adj(m0, -i); 134 } 135 m = m0; 136 137 /* 138 * Process options and, if not destined for us, 139 * ship it on. ip_dooptions returns 1 when an 140 * error was detected (causing an icmp message 141 * to be sent). 142 */ 143 if (hlen > sizeof (struct ip) && ip_dooptions(ip)) 144 goto next; 145 146 /* 147 * Fast check on the first internet 148 * interface in the list. 149 */ 150 if (ifinet) { 151 struct sockaddr_in *sin; 152 153 sin = (struct sockaddr_in *)&ifinet->if_addr; 154 if (sin->sin_addr.s_addr == ip->ip_dst.s_addr) 155 goto ours; 156 sin = (struct sockaddr_in *)&ifinet->if_broadaddr; 157 if ((ifinet->if_flags & IFF_BROADCAST) && 158 sin->sin_addr.s_addr == ip->ip_dst.s_addr) 159 goto ours; 160 } 161 /* BEGIN GROT */ 162 #include "nd.h" 163 #if NND > 0 164 /* 165 * Diskless machines don't initially know 166 * their address, so take packets from them 167 * if we're acting as a network disk server. 168 */ 169 if (in_netof(ip->ip_dst) == INADDR_ANY && 170 (in_netof(ip->ip_src) == INADDR_ANY && 171 in_lnaof(ip->ip_src) != INADDR_ANY)) 172 goto ours; 173 #endif 174 /* END GROT */ 175 ipaddr.sin_addr = ip->ip_dst; 176 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) { 177 ip_forward(ip); 178 goto next; 179 } 180 181 ours: 182 /* 183 * Look for queue of fragments 184 * of this datagram. 185 */ 186 for (fp = ipq.next; fp != &ipq; fp = fp->next) 187 if (ip->ip_id == fp->ipq_id && 188 ip->ip_src.s_addr == fp->ipq_src.s_addr && 189 ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 190 ip->ip_p == fp->ipq_p) 191 goto found; 192 fp = 0; 193 found: 194 195 /* 196 * Adjust ip_len to not reflect header, 197 * set ip_mff if more fragments are expected, 198 * convert offset of this to bytes. 199 */ 200 ip->ip_len -= hlen; 201 ((struct ipasfrag *)ip)->ipf_mff = 0; 202 if (ip->ip_off & IP_MF) 203 ((struct ipasfrag *)ip)->ipf_mff = 1; 204 ip->ip_off <<= 3; 205 206 /* 207 * If datagram marked as having more fragments 208 * or if this is not the first fragment, 209 * attempt reassembly; if it succeeds, proceed. 210 */ 211 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) { 212 ip = ip_reass((struct ipasfrag *)ip, fp); 213 if (ip == 0) 214 goto next; 215 hlen = ip->ip_hl << 2; 216 m = dtom(ip); 217 } else 218 if (fp) 219 ip_freef(fp); 220 221 /* 222 * Switch out to protocol's input routine. 223 */ 224 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m); 225 goto next; 226 bad: 227 m_freem(m); 228 goto next; 229 } 230 231 /* 232 * Take incoming datagram fragment and try to 233 * reassemble it into whole datagram. If a chain for 234 * reassembly of this datagram already exists, then it 235 * is given as fp; otherwise have to make a chain. 236 */ 237 struct ip * 238 ip_reass(ip, fp) 239 register struct ipasfrag *ip; 240 register struct ipq *fp; 241 { 242 register struct mbuf *m = dtom(ip); 243 register struct ipasfrag *q; 244 struct mbuf *t; 245 int hlen = ip->ip_hl << 2; 246 int i, next; 247 248 /* 249 * Presence of header sizes in mbufs 250 * would confuse code below. 251 */ 252 m->m_off += hlen; 253 m->m_len -= hlen; 254 255 /* 256 * If first fragment to arrive, create a reassembly queue. 257 */ 258 if (fp == 0) { 259 if ((t = m_get(M_WAIT, MT_FTABLE)) == NULL) 260 goto dropfrag; 261 fp = mtod(t, struct ipq *); 262 insque(fp, &ipq); 263 fp->ipq_ttl = IPFRAGTTL; 264 fp->ipq_p = ip->ip_p; 265 fp->ipq_id = ip->ip_id; 266 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp; 267 fp->ipq_src = ((struct ip *)ip)->ip_src; 268 fp->ipq_dst = ((struct ip *)ip)->ip_dst; 269 q = (struct ipasfrag *)fp; 270 goto insert; 271 } 272 273 /* 274 * Find a segment which begins after this one does. 275 */ 276 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) 277 if (q->ip_off > ip->ip_off) 278 break; 279 280 /* 281 * If there is a preceding segment, it may provide some of 282 * our data already. If so, drop the data from the incoming 283 * segment. If it provides all of our data, drop us. 284 */ 285 if (q->ipf_prev != (struct ipasfrag *)fp) { 286 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off; 287 if (i > 0) { 288 if (i >= ip->ip_len) 289 goto dropfrag; 290 m_adj(dtom(ip), i); 291 ip->ip_off += i; 292 ip->ip_len -= i; 293 } 294 } 295 296 /* 297 * While we overlap succeeding segments trim them or, 298 * if they are completely covered, dequeue them. 299 */ 300 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) { 301 i = (ip->ip_off + ip->ip_len) - q->ip_off; 302 if (i < q->ip_len) { 303 q->ip_len -= i; 304 q->ip_off += i; 305 m_adj(dtom(q), i); 306 break; 307 } 308 q = q->ipf_next; 309 m_freem(dtom(q->ipf_prev)); 310 ip_deq(q->ipf_prev); 311 } 312 313 insert: 314 /* 315 * Stick new segment in its place; 316 * check for complete reassembly. 317 */ 318 ip_enq(ip, q->ipf_prev); 319 next = 0; 320 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) { 321 if (q->ip_off != next) 322 return (0); 323 next += q->ip_len; 324 } 325 if (q->ipf_prev->ipf_mff) 326 return (0); 327 328 /* 329 * Reassembly is complete; concatenate fragments. 330 */ 331 q = fp->ipq_next; 332 m = dtom(q); 333 t = m->m_next; 334 m->m_next = 0; 335 m_cat(m, t); 336 q = q->ipf_next; 337 while (q != (struct ipasfrag *)fp) { 338 t = dtom(q); 339 q = q->ipf_next; 340 m_cat(m, t); 341 } 342 343 /* 344 * Create header for new ip packet by 345 * modifying header of first packet; 346 * dequeue and discard fragment reassembly header. 347 * Make header visible. 348 */ 349 ip = fp->ipq_next; 350 ip->ip_len = next; 351 ((struct ip *)ip)->ip_src = fp->ipq_src; 352 ((struct ip *)ip)->ip_dst = fp->ipq_dst; 353 remque(fp); 354 (void) m_free(dtom(fp)); 355 m = dtom(ip); 356 m->m_len += sizeof (struct ipasfrag); 357 m->m_off -= sizeof (struct ipasfrag); 358 return ((struct ip *)ip); 359 360 dropfrag: 361 m_freem(m); 362 return (0); 363 } 364 365 /* 366 * Free a fragment reassembly header and all 367 * associated datagrams. 368 */ 369 ip_freef(fp) 370 struct ipq *fp; 371 { 372 register struct ipasfrag *q, *p; 373 374 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) { 375 p = q->ipf_next; 376 ip_deq(q); 377 m_freem(dtom(q)); 378 } 379 remque(fp); 380 (void) m_free(dtom(fp)); 381 } 382 383 /* 384 * Put an ip fragment on a reassembly chain. 385 * Like insque, but pointers in middle of structure. 386 */ 387 ip_enq(p, prev) 388 register struct ipasfrag *p, *prev; 389 { 390 391 p->ipf_prev = prev; 392 p->ipf_next = prev->ipf_next; 393 prev->ipf_next->ipf_prev = p; 394 prev->ipf_next = p; 395 } 396 397 /* 398 * To ip_enq as remque is to insque. 399 */ 400 ip_deq(p) 401 register struct ipasfrag *p; 402 { 403 404 p->ipf_prev->ipf_next = p->ipf_next; 405 p->ipf_next->ipf_prev = p->ipf_prev; 406 } 407 408 /* 409 * IP timer processing; 410 * if a timer expires on a reassembly 411 * queue, discard it. 412 */ 413 ip_slowtimo() 414 { 415 register struct ipq *fp; 416 int s = splnet(); 417 418 fp = ipq.next; 419 if (fp == 0) { 420 splx(s); 421 return; 422 } 423 while (fp != &ipq) { 424 --fp->ipq_ttl; 425 fp = fp->next; 426 if (fp->prev->ipq_ttl == 0) 427 ip_freef(fp->prev); 428 } 429 splx(s); 430 } 431 432 /* 433 * Drain off all datagram fragments. 434 */ 435 ip_drain() 436 { 437 438 while (ipq.next != &ipq) 439 ip_freef(ipq.next); 440 } 441 442 /* 443 * Do option processing on a datagram, 444 * possibly discarding it if bad options 445 * are encountered. 446 */ 447 ip_dooptions(ip) 448 struct ip *ip; 449 { 450 register u_char *cp; 451 int opt, optlen, cnt, code, type; 452 struct in_addr *sin; 453 register struct ip_timestamp *ipt; 454 register struct ifnet *ifp; 455 struct in_addr t; 456 457 cp = (u_char *)(ip + 1); 458 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 459 for (; cnt > 0; cnt -= optlen, cp += optlen) { 460 opt = cp[0]; 461 if (opt == IPOPT_EOL) 462 break; 463 if (opt == IPOPT_NOP) 464 optlen = 1; 465 else 466 optlen = cp[1]; 467 switch (opt) { 468 469 default: 470 break; 471 472 /* 473 * Source routing with record. 474 * Find interface with current destination address. 475 * If none on this machine then drop if strictly routed, 476 * or do nothing if loosely routed. 477 * Record interface address and bring up next address 478 * component. If strictly routed make sure next 479 * address on directly accessible net. 480 */ 481 case IPOPT_LSRR: 482 case IPOPT_SSRR: 483 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1)) 484 break; 485 sin = (struct in_addr *)(cp + cp[2]); 486 ipaddr.sin_addr = *sin; 487 ifp = if_ifwithaddr((struct sockaddr *)&ipaddr); 488 type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL; 489 if (ifp == 0) { 490 if (opt == IPOPT_SSRR) 491 goto bad; 492 break; 493 } 494 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t; 495 cp[2] += 4; 496 if (cp[2] > optlen - (sizeof (long) - 1)) 497 break; 498 ip->ip_dst = sin[1]; 499 if (opt == IPOPT_SSRR && 500 if_ifonnetof(in_netof(ip->ip_dst)) == 0) 501 goto bad; 502 break; 503 504 case IPOPT_TS: 505 code = cp - (u_char *)ip; 506 type = ICMP_PARAMPROB; 507 ipt = (struct ip_timestamp *)cp; 508 if (ipt->ipt_len < 5) 509 goto bad; 510 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) { 511 if (++ipt->ipt_oflw == 0) 512 goto bad; 513 break; 514 } 515 sin = (struct in_addr *)(cp+cp[2]); 516 switch (ipt->ipt_flg) { 517 518 case IPOPT_TS_TSONLY: 519 break; 520 521 case IPOPT_TS_TSANDADDR: 522 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 523 goto bad; 524 if (ifinet == 0) 525 goto bad; /* ??? */ 526 *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr; 527 break; 528 529 case IPOPT_TS_PRESPEC: 530 ipaddr.sin_addr = *sin; 531 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) 532 continue; 533 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 534 goto bad; 535 ipt->ipt_ptr += 4; 536 break; 537 538 default: 539 goto bad; 540 } 541 *(n_time *)sin = iptime(); 542 ipt->ipt_ptr += 4; 543 } 544 } 545 return (0); 546 bad: 547 icmp_error(ip, type, code); 548 return (1); 549 } 550 551 /* 552 * Strip out IP options, at higher 553 * level protocol in the kernel. 554 * Second argument is buffer to which options 555 * will be moved, and return value is their length. 556 */ 557 ip_stripoptions(ip, mopt) 558 struct ip *ip; 559 struct mbuf *mopt; 560 { 561 register int i; 562 register struct mbuf *m; 563 int olen; 564 565 olen = (ip->ip_hl<<2) - sizeof (struct ip); 566 m = dtom(ip); 567 ip++; 568 if (mopt) { 569 mopt->m_len = olen; 570 mopt->m_off = MMINOFF; 571 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen); 572 } 573 i = m->m_len - (sizeof (struct ip) + olen); 574 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i); 575 m->m_len -= olen; 576 } 577 578 u_char inetctlerrmap[] = { 579 ECONNABORTED, ECONNABORTED, 0, 0, 580 0, 0, 581 EHOSTDOWN, EHOSTUNREACH, ENETUNREACH, EHOSTUNREACH, 582 ECONNREFUSED, ECONNREFUSED, EMSGSIZE, 0, 583 0, 0, 0, 0 584 }; 585 586 ip_ctlinput(cmd, arg) 587 int cmd; 588 caddr_t arg; 589 { 590 struct in_addr *in; 591 int tcp_abort(), udp_abort(); 592 extern struct inpcb tcb, udb; 593 594 if (cmd < 0 || cmd > PRC_NCMDS) 595 return; 596 if (inetctlerrmap[cmd] == 0) 597 return; /* XXX */ 598 if (cmd == PRC_IFDOWN) 599 in = &((struct sockaddr_in *)arg)->sin_addr; 600 else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH) 601 in = (struct in_addr *)arg; 602 else 603 in = &((struct icmp *)arg)->icmp_ip.ip_dst; 604 /* THIS IS VERY QUESTIONABLE, SHOULD HIT ALL PROTOCOLS */ 605 in_pcbnotify(&tcb, in, (int)inetctlerrmap[cmd], tcp_abort); 606 in_pcbnotify(&udb, in, (int)inetctlerrmap[cmd], udp_abort); 607 } 608 609 int ipprintfs = 0; 610 int ipforwarding = 1; 611 /* 612 * Forward a packet. If some error occurs return the sender 613 * and icmp packet. Note we can't always generate a meaningful 614 * icmp message because icmp doesn't have a large enough repetoire 615 * of codes and types. 616 */ 617 ip_forward(ip) 618 register struct ip *ip; 619 { 620 register int error, type, code; 621 struct mbuf *mopt, *mcopy; 622 623 if (ipprintfs) 624 printf("forward: src %x dst %x ttl %x\n", ip->ip_src, 625 ip->ip_dst, ip->ip_ttl); 626 if (ipforwarding == 0) { 627 /* can't tell difference between net and host */ 628 type = ICMP_UNREACH, code = ICMP_UNREACH_NET; 629 goto sendicmp; 630 } 631 if (ip->ip_ttl < IPTTLDEC) { 632 type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS; 633 goto sendicmp; 634 } 635 ip->ip_ttl -= IPTTLDEC; 636 mopt = m_get(M_DONTWAIT, MT_DATA); 637 if (mopt == NULL) { 638 m_freem(dtom(ip)); 639 return; 640 } 641 642 /* 643 * Save at most 64 bytes of the packet in case 644 * we need to generate an ICMP message to the src. 645 */ 646 mcopy = m_copy(dtom(ip), 0, imin(ip->ip_len, 64)); 647 ip_stripoptions(ip, mopt); 648 649 error = ip_output(dtom(ip), mopt, (struct route *)0, IP_FORWARDING); 650 if (error == 0) { 651 if (mcopy) 652 m_freem(mcopy); 653 return; 654 } 655 if (mcopy == NULL) 656 return; 657 ip = mtod(mcopy, struct ip *); 658 type = ICMP_UNREACH, code = 0; /* need ``undefined'' */ 659 switch (error) { 660 661 case ENETUNREACH: 662 case ENETDOWN: 663 code = ICMP_UNREACH_NET; 664 break; 665 666 case EMSGSIZE: 667 code = ICMP_UNREACH_NEEDFRAG; 668 break; 669 670 case EPERM: 671 code = ICMP_UNREACH_PORT; 672 break; 673 674 case ENOBUFS: 675 type = ICMP_SOURCEQUENCH; 676 break; 677 678 case EHOSTDOWN: 679 case EHOSTUNREACH: 680 code = ICMP_UNREACH_HOST; 681 break; 682 } 683 sendicmp: 684 icmp_error(ip, type, code); 685 } 686