1 /* ip_input.c 1.45 82/06/20 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/clock.h" 6 #include "../h/mbuf.h" 7 #include "../h/protosw.h" 8 #include "../h/socket.h" 9 #include "../net/in.h" 10 #include "../net/in_systm.h" 11 #include "../net/if.h" 12 #include "../net/ip.h" /* belongs before in.h */ 13 #include "../net/ip_var.h" 14 #include "../net/ip_icmp.h" 15 #include "../net/tcp.h" 16 #include <errno.h> 17 18 u_char ip_protox[IPPROTO_MAX]; 19 int ipqmaxlen = IFQ_MAXLEN; 20 struct ifnet *ifinet; /* first inet interface */ 21 22 /* 23 * IP initialization: fill in IP protocol switch table. 24 * All protocols not implemented in kernel go to raw IP protocol handler. 25 */ 26 ip_init() 27 { 28 register struct protosw *pr; 29 register int i; 30 31 pr = pffindproto(PF_INET, IPPROTO_RAW); 32 if (pr == 0) 33 panic("ip_init"); 34 for (i = 0; i < IPPROTO_MAX; i++) 35 ip_protox[i] = pr - protosw; 36 for (pr = protosw; pr <= protoswLAST; pr++) 37 if (pr->pr_family == PF_INET && 38 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) 39 ip_protox[pr->pr_protocol] = pr - protosw; 40 ipq.next = ipq.prev = &ipq; 41 ip_id = time & 0xffff; 42 ipintrq.ifq_maxlen = ipqmaxlen; 43 ifinet = if_ifwithaf(AF_INET); 44 } 45 46 u_char ipcksum = 1; 47 struct ip *ip_reass(); 48 struct sockaddr_in ipaddr = { AF_INET }; 49 50 /* 51 * Ip input routine. Checksum and byte swap header. If fragmented 52 * try to reassamble. If complete and fragment queue exists, discard. 53 * Process options. Pass to next level. 54 */ 55 ipintr() 56 { 57 register struct ip *ip; 58 register struct mbuf *m; 59 struct mbuf *m0, *mopt; 60 register int i; 61 register struct ipq *fp; 62 int hlen, s; 63 64 next: 65 /* 66 * Get next datagram off input queue and get IP header 67 * in first mbuf. 68 */ 69 s = splimp(); 70 IF_DEQUEUE(&ipintrq, m); 71 splx(s); 72 if (m == 0) 73 return; 74 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) && 75 (m = m_pullup(m, sizeof (struct ip))) == 0) 76 return; 77 ip = mtod(m, struct ip *); 78 if ((hlen = ip->ip_hl << 2) > m->m_len) { 79 if ((m = m_pullup(m, hlen)) == 0) 80 return; 81 ip = mtod(m, struct ip *); 82 } 83 if (ipcksum) 84 if (ip->ip_sum = in_cksum(m, hlen)) { 85 printf("ip_sum %x\n", ip->ip_sum); /* XXX */ 86 ipstat.ips_badsum++; 87 goto bad; 88 } 89 90 #if vax 91 /* 92 * Convert fields to host representation. 93 */ 94 ip->ip_len = ntohs((u_short)ip->ip_len); 95 ip->ip_id = ntohs(ip->ip_id); 96 ip->ip_off = ntohs((u_short)ip->ip_off); 97 #endif 98 99 /* 100 * Check that the amount of data in the buffers 101 * is as at least much as the IP header would have us expect. 102 * Trim mbufs if longer than we expect. 103 * Drop packet if shorter than we expect. 104 */ 105 i = -ip->ip_len; 106 m0 = m; 107 for (;;) { 108 i += m->m_len; 109 if (m->m_next == 0) 110 break; 111 m = m->m_next; 112 } 113 if (i != 0) { 114 if (i < 0) { 115 ipstat.ips_tooshort++; 116 goto bad; 117 } 118 if (i <= m->m_len) 119 m->m_len -= i; 120 else 121 m_adj(m0, -i); 122 } 123 m = m0; 124 125 /* 126 * Process options and, if not destined for us, 127 * ship it on. ip_dooptions returns 1 when an 128 * error was detected (causing an icmp message 129 * to be sent). 130 */ 131 if (hlen > sizeof (struct ip) && ip_dooptions(ip)) 132 goto next; 133 134 /* 135 * Fast check on the first internet 136 * interface in the list. 137 */ 138 if (ifinet) { 139 struct sockaddr_in *sin; 140 141 sin = (struct sockaddr_in *)&ifinet->if_addr; 142 if (sin->sin_addr.s_addr == ip->ip_dst.s_addr) 143 goto ours; 144 sin = (struct sockaddr_in *)&ifinet->if_broadaddr; 145 if ((ifinet->if_flags & IFF_BROADCAST) && 146 sin->sin_addr.s_addr == ip->ip_dst.s_addr) 147 goto ours; 148 } 149 ipaddr.sin_addr = ip->ip_dst; 150 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) { 151 ip_forward(ip); 152 goto next; 153 } 154 155 ours: 156 /* 157 * Look for queue of fragments 158 * of this datagram. 159 */ 160 for (fp = ipq.next; fp != &ipq; fp = fp->next) 161 if (ip->ip_id == fp->ipq_id && 162 ip->ip_src.s_addr == fp->ipq_src.s_addr && 163 ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 164 ip->ip_p == fp->ipq_p) 165 goto found; 166 fp = 0; 167 found: 168 169 /* 170 * Adjust ip_len to not reflect header, 171 * set ip_mff if more fragments are expected, 172 * convert offset of this to bytes. 173 */ 174 ip->ip_len -= hlen; 175 ((struct ipasfrag *)ip)->ipf_mff = 0; 176 if (ip->ip_off & IP_MF) 177 ((struct ipasfrag *)ip)->ipf_mff = 1; 178 ip->ip_off <<= 3; 179 180 /* 181 * If datagram marked as having more fragments 182 * or if this is not the first fragment, 183 * attempt reassembly; if it succeeds, proceed. 184 */ 185 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) { 186 ip = ip_reass((struct ipasfrag *)ip, fp); 187 if (ip == 0) 188 goto next; 189 hlen = ip->ip_hl << 2; 190 m = dtom(ip); 191 } else 192 if (fp) 193 (void) ip_freef(fp); 194 195 /* 196 * Switch out to protocol's input routine. 197 */ 198 (*protosw[ip_protox[ip->ip_p]].pr_input)(m); 199 goto next; 200 bad: 201 m_freem(m); 202 goto next; 203 } 204 205 /* 206 * Take incoming datagram fragment and try to 207 * reassemble it into whole datagram. If a chain for 208 * reassembly of this datagram already exists, then it 209 * is given as fp; otherwise have to make a chain. 210 */ 211 struct ip * 212 ip_reass(ip, fp) 213 register struct ipasfrag *ip; 214 register struct ipq *fp; 215 { 216 register struct mbuf *m = dtom(ip); 217 register struct ipasfrag *q; 218 struct mbuf *t; 219 int hlen = ip->ip_hl << 2; 220 int i, next; 221 222 /* 223 * Presence of header sizes in mbufs 224 * would confuse code below. 225 */ 226 m->m_off += hlen; 227 m->m_len -= hlen; 228 229 /* 230 * If first fragment to arrive, create a reassembly queue. 231 */ 232 if (fp == 0) { 233 if ((t = m_get(M_WAIT)) == NULL) 234 goto dropfrag; 235 t->m_off = MMINOFF; 236 fp = mtod(t, struct ipq *); 237 insque(fp, &ipq); 238 fp->ipq_ttl = IPFRAGTTL; 239 fp->ipq_p = ip->ip_p; 240 fp->ipq_id = ip->ip_id; 241 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp; 242 fp->ipq_src = ((struct ip *)ip)->ip_src; 243 fp->ipq_dst = ((struct ip *)ip)->ip_dst; 244 q = (struct ipasfrag *)fp; 245 goto insert; 246 } 247 248 /* 249 * Find a segment which begins after this one does. 250 */ 251 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) 252 if (q->ip_off > ip->ip_off) 253 break; 254 255 /* 256 * If there is a preceding segment, it may provide some of 257 * our data already. If so, drop the data from the incoming 258 * segment. If it provides all of our data, drop us. 259 */ 260 if (q->ipf_prev != (struct ipasfrag *)fp) { 261 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off; 262 if (i > 0) { 263 if (i >= ip->ip_len) 264 goto dropfrag; 265 m_adj(dtom(ip), i); 266 ip->ip_off += i; 267 ip->ip_len -= i; 268 } 269 } 270 271 /* 272 * While we overlap succeeding segments trim them or, 273 * if they are completely covered, dequeue them. 274 */ 275 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) { 276 i = (ip->ip_off + ip->ip_len) - q->ip_off; 277 if (i < q->ip_len) { 278 q->ip_len -= i; 279 q->ip_off += i; 280 m_adj(dtom(q), i); 281 break; 282 } 283 q = q->ipf_next; 284 m_freem(dtom(q->ipf_prev)); 285 ip_deq(q->ipf_prev); 286 } 287 288 insert: 289 /* 290 * Stick new segment in its place; 291 * check for complete reassembly. 292 */ 293 ip_enq(ip, q->ipf_prev); 294 next = 0; 295 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) { 296 if (q->ip_off != next) 297 return (0); 298 next += q->ip_len; 299 } 300 if (q->ipf_prev->ipf_mff) 301 return (0); 302 303 /* 304 * Reassembly is complete; concatenate fragments. 305 */ 306 q = fp->ipq_next; 307 m = dtom(q); 308 t = m->m_next; 309 m->m_next = 0; 310 m_cat(m, t); 311 q = q->ipf_next; 312 while (q != (struct ipasfrag *)fp) { 313 t = dtom(q); 314 q = q->ipf_next; 315 m_cat(m, t); 316 } 317 318 /* 319 * Create header for new ip packet by 320 * modifying header of first packet; 321 * dequeue and discard fragment reassembly header. 322 * Make header visible. 323 */ 324 ip = fp->ipq_next; 325 ip->ip_len = next; 326 ((struct ip *)ip)->ip_src = fp->ipq_src; 327 ((struct ip *)ip)->ip_dst = fp->ipq_dst; 328 remque(fp); 329 (void) m_free(dtom(fp)); 330 m = dtom(ip); 331 m->m_len += sizeof (struct ipasfrag); 332 m->m_off -= sizeof (struct ipasfrag); 333 return ((struct ip *)ip); 334 335 dropfrag: 336 m_freem(m); 337 return (0); 338 } 339 340 /* 341 * Free a fragment reassembly header and all 342 * associated datagrams. 343 */ 344 struct ipq * 345 ip_freef(fp) 346 struct ipq *fp; 347 { 348 register struct ipasfrag *q; 349 struct mbuf *m; 350 351 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) 352 m_freem(dtom(q)); 353 m = dtom(fp); 354 fp = fp->next; 355 remque(fp->prev); 356 (void) m_free(m); 357 return (fp); 358 } 359 360 /* 361 * Put an ip fragment on a reassembly chain. 362 * Like insque, but pointers in middle of structure. 363 */ 364 ip_enq(p, prev) 365 register struct ipasfrag *p, *prev; 366 { 367 368 p->ipf_prev = prev; 369 p->ipf_next = prev->ipf_next; 370 prev->ipf_next->ipf_prev = p; 371 prev->ipf_next = p; 372 } 373 374 /* 375 * To ip_enq as remque is to insque. 376 */ 377 ip_deq(p) 378 register struct ipasfrag *p; 379 { 380 381 p->ipf_prev->ipf_next = p->ipf_next; 382 p->ipf_next->ipf_prev = p->ipf_prev; 383 } 384 385 /* 386 * IP timer processing; 387 * if a timer expires on a reassembly 388 * queue, discard it. 389 */ 390 ip_slowtimo() 391 { 392 register struct ipq *fp; 393 int s = splnet(); 394 395 fp = ipq.next; 396 if (fp == 0) { 397 splx(s); 398 return; 399 } 400 while (fp != &ipq) 401 if (--fp->ipq_ttl == 0) 402 fp = ip_freef(fp); 403 else 404 fp = fp->next; 405 splx(s); 406 } 407 408 /* 409 * Drain off all datagram fragments. 410 */ 411 ip_drain() 412 { 413 414 while (ipq.next != &ipq) 415 (void) ip_freef(ipq.next); 416 } 417 418 /* 419 * Do option processing on a datagram, 420 * possibly discarding it if bad options 421 * are encountered. 422 */ 423 ip_dooptions(ip) 424 struct ip *ip; 425 { 426 register u_char *cp; 427 int opt, optlen, cnt, code, type; 428 struct in_addr *sin; 429 register struct ip_timestamp *ipt; 430 register struct ifnet *ifp; 431 struct in_addr t; 432 433 cp = (u_char *)(ip + 1); 434 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 435 for (; cnt > 0; cnt -= optlen, cp += optlen) { 436 opt = cp[0]; 437 if (opt == IPOPT_EOL) 438 break; 439 if (opt == IPOPT_NOP) 440 optlen = 1; 441 else 442 optlen = cp[1]; 443 switch (opt) { 444 445 default: 446 break; 447 448 /* 449 * Source routing with record. 450 * Find interface with current destination address. 451 * If none on this machine then drop if strictly routed, 452 * or do nothing if loosely routed. 453 * Record interface address and bring up next address 454 * component. If strictly routed make sure next 455 * address on directly accessible net. 456 */ 457 case IPOPT_LSRR: 458 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1)) 459 break; 460 sin = (struct in_addr *)(cp + cp[2]); 461 ipaddr.sin_addr = *sin; 462 ifp = if_ifwithaddr((struct sockaddr *)&ipaddr); 463 type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL; 464 if (ifp == 0) { 465 if (opt == IPOPT_SSRR) 466 goto bad; 467 break; 468 } 469 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t; 470 cp[2] += 4; 471 if (cp[2] > optlen - (sizeof (long) - 1)) 472 break; 473 ip->ip_dst = sin[1]; 474 if (opt == IPOPT_SSRR && 475 if_ifonnetof(in_netof(ip->ip_dst)) == 0) 476 goto bad; 477 break; 478 479 case IPOPT_TS: 480 code = cp - (u_char *)ip; 481 type = ICMP_PARAMPROB; 482 ipt = (struct ip_timestamp *)cp; 483 if (ipt->ipt_len < 5) 484 goto bad; 485 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) { 486 if (++ipt->ipt_oflw == 0) 487 goto bad; 488 break; 489 } 490 sin = (struct in_addr *)(cp+cp[2]); 491 switch (ipt->ipt_flg) { 492 493 case IPOPT_TS_TSONLY: 494 break; 495 496 case IPOPT_TS_TSANDADDR: 497 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 498 goto bad; 499 if (ifinet == 0) 500 goto bad; /* ??? */ 501 *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr; 502 break; 503 504 case IPOPT_TS_PRESPEC: 505 ipaddr.sin_addr = *sin; 506 if (!if_ifwithaddr((struct sockaddr *)&ipaddr)) 507 continue; 508 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 509 goto bad; 510 ipt->ipt_ptr += 4; 511 break; 512 513 default: 514 goto bad; 515 } 516 *(n_time *)sin = iptime(); 517 ipt->ipt_ptr += 4; 518 } 519 } 520 return (0); 521 bad: 522 icmp_error(ip, type, code); 523 return (1); 524 } 525 526 /* 527 * Strip out IP options, at higher 528 * level protocol in the kernel. 529 * Second argument is buffer to which options 530 * will be moved, and return value is their length. 531 */ 532 ip_stripoptions(ip, mopt) 533 struct ip *ip; 534 struct mbuf *mopt; 535 { 536 register int i; 537 register struct mbuf *m; 538 int olen; 539 540 olen = (ip->ip_hl<<2) - sizeof (struct ip); 541 m = dtom(ip); 542 ip++; 543 if (mopt) { 544 mopt->m_len = olen; 545 mopt->m_off = MMINOFF; 546 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen); 547 } 548 i = m->m_len - (sizeof (struct ip) + olen); 549 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i); 550 m->m_len -= olen; 551 } 552 553 u_char inetctlerrmap[] = { 554 ECONNABORTED, ECONNABORTED, 0, 0, 555 0, 0, 556 EHOSTDOWN, EHOSTUNREACH, ENETUNREACH, EHOSTUNREACH, 557 ECONNREFUSED, ECONNREFUSED, EMSGSIZE, 0, 558 0, 0, 0, 0 559 }; 560 561 ip_ctlinput(cmd, arg) 562 int cmd; 563 caddr_t arg; 564 { 565 struct in_addr *sin; 566 int tcp_abort(), udp_abort(); 567 extern struct inpcb tcb, udb; 568 569 if (cmd < 0 || cmd > PRC_NCMDS) 570 return; 571 if (inetctlerrmap[cmd] == 0) 572 return; /* XXX */ 573 if (cmd == PRC_IFDOWN) 574 sin = &((struct sockaddr_in *)arg)->sin_addr; 575 else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH) 576 sin = (struct in_addr *)arg; 577 else 578 sin = &((struct icmp *)arg)->icmp_ip.ip_dst; 579 in_pcbnotify(&tcb, sin, inetctlerrmap[cmd], tcp_abort); 580 in_pcbnotify(&udb, sin, inetctlerrmap[cmd], udp_abort); 581 } 582 583 int ipprintfs = 0; 584 int ipforwarding = 1; 585 /* 586 * Forward a packet. If some error occurs return the sender 587 * and icmp packet. Note we can't always generate a meaningful 588 * icmp message because icmp doesn't have a large enough repetoire 589 * of codes and types. 590 */ 591 ip_forward(ip) 592 register struct ip *ip; 593 { 594 register int error, type, code; 595 struct mbuf *mopt, *mcopy; 596 597 if (ipprintfs) 598 printf("forward: src %x dst %x ttl %x\n", ip->ip_src, 599 ip->ip_dst, ip->ip_ttl); 600 if (ipforwarding == 0) { 601 /* can't tell difference between net and host */ 602 type = ICMP_UNREACH, code = ICMP_UNREACH_NET; 603 goto sendicmp; 604 } 605 if (ip->ip_ttl < IPTTLDEC) { 606 type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS; 607 goto sendicmp; 608 } 609 ip->ip_ttl -= IPTTLDEC; 610 mopt = m_get(M_DONTWAIT); 611 if (mopt == 0) { 612 m_freem(dtom(ip)); 613 return; 614 } 615 616 /* 617 * Save at most 64 bytes of the packet in case 618 * we need to generate an ICMP message to the src. 619 */ 620 mcopy = m_copy(dtom(ip), 0, min(ip->ip_len, 64)); 621 ip_stripoptions(ip, mopt); 622 623 /* last 0 here means no directed broadcast */ 624 if ((error = ip_output(dtom(ip), mopt, 0, 0)) == 0) { 625 if (mcopy) 626 m_freem(mcopy); 627 return; 628 } 629 ip = mtod(mcopy, struct ip *); 630 type = ICMP_UNREACH, code = 0; /* need ``undefined'' */ 631 switch (error) { 632 633 case ENETUNREACH: 634 case ENETDOWN: 635 code = ICMP_UNREACH_NET; 636 break; 637 638 case EMSGSIZE: 639 code = ICMP_UNREACH_NEEDFRAG; 640 break; 641 642 case EPERM: 643 code = ICMP_UNREACH_PORT; 644 break; 645 646 case ENOBUFS: 647 type = ICMP_SOURCEQUENCH; 648 break; 649 650 case EHOSTDOWN: 651 case EHOSTUNREACH: 652 code = ICMP_UNREACH_HOST; 653 break; 654 } 655 sendicmp: 656 icmp_error(ip, type, code); 657 } 658