1 /* ip_input.c 1.41 82/04/24 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/clock.h" 6 #include "../h/mbuf.h" 7 #include "../h/protosw.h" 8 #include "../h/socket.h" 9 #include "../net/in.h" 10 #include "../net/in_systm.h" 11 #include "../net/if.h" 12 #include "../net/ip.h" /* belongs before in.h */ 13 #include "../net/ip_var.h" 14 #include "../net/ip_icmp.h" 15 #include "../net/tcp.h" 16 #include <errno.h> 17 18 u_char ip_protox[IPPROTO_MAX]; 19 int ipqmaxlen = IFQ_MAXLEN; 20 struct ifnet *ifinet; /* first inet interface */ 21 22 /* 23 * IP initialization: fill in IP protocol switch table. 24 * All protocols not implemented in kernel go to raw IP protocol handler. 25 */ 26 ip_init() 27 { 28 register struct protosw *pr; 29 register int i; 30 31 COUNT(IP_INIT); 32 pr = pffindproto(PF_INET, IPPROTO_RAW); 33 if (pr == 0) 34 panic("ip_init"); 35 for (i = 0; i < IPPROTO_MAX; i++) 36 ip_protox[i] = pr - protosw; 37 for (pr = protosw; pr <= protoswLAST; pr++) 38 if (pr->pr_family == PF_INET && 39 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) 40 ip_protox[pr->pr_protocol] = pr - protosw; 41 ipq.next = ipq.prev = &ipq; 42 ip_id = time & 0xffff; 43 ipintrq.ifq_maxlen = ipqmaxlen; 44 ifinet = if_ifwithaf(AF_INET); 45 } 46 47 u_char ipcksum = 1; 48 struct ip *ip_reass(); 49 struct sockaddr_in ipaddr = { AF_INET }; 50 51 /* 52 * Ip input routine. Checksum and byte swap header. If fragmented 53 * try to reassamble. If complete and fragment queue exists, discard. 54 * Process options. Pass to next level. 55 */ 56 ipintr() 57 { 58 register struct ip *ip; 59 register struct mbuf *m; 60 struct mbuf *m0, *mopt; 61 register int i; 62 register struct ipq *fp; 63 int hlen, s; 64 65 COUNT(IPINTR); 66 next: 67 /* 68 * Get next datagram off input queue and get IP header 69 * in first mbuf. 70 */ 71 s = splimp(); 72 IF_DEQUEUE(&ipintrq, m); 73 splx(s); 74 if (m == 0) 75 return; 76 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) && 77 (m = m_pullup(m, sizeof (struct ip))) == 0) 78 return; 79 ip = mtod(m, struct ip *); 80 if ((hlen = ip->ip_hl << 2) > m->m_len) { 81 if ((m = m_pullup(m, hlen)) == 0) 82 return; 83 ip = mtod(m, struct ip *); 84 } 85 if (ipcksum) 86 if (ip->ip_sum = in_cksum(m, hlen)) { 87 printf("ip_sum %x\n", ip->ip_sum); /* XXX */ 88 ipstat.ips_badsum++; 89 goto bad; 90 } 91 92 #if vax 93 /* 94 * Convert fields to host representation. 95 */ 96 ip->ip_len = ntohs((u_short)ip->ip_len); 97 ip->ip_id = ntohs(ip->ip_id); 98 ip->ip_off = ntohs((u_short)ip->ip_off); 99 #endif 100 101 /* 102 * Check that the amount of data in the buffers 103 * is as at least much as the IP header would have us expect. 104 * Trim mbufs if longer than we expect. 105 * Drop packet if shorter than we expect. 106 */ 107 i = -ip->ip_len; 108 m0 = m; 109 for (;;) { 110 i += m->m_len; 111 if (m->m_next == 0) 112 break; 113 m = m->m_next; 114 } 115 if (i != 0) { 116 if (i < 0) { 117 ipstat.ips_tooshort++; 118 goto bad; 119 } 120 if (i <= m->m_len) 121 m->m_len -= i; 122 else 123 m_adj(m0, -i); 124 } 125 m = m0; 126 127 /* 128 * Process options and, if not destined for us, 129 * ship it on. ip_dooptions returns 1 when an 130 * error was detected (causing an icmp message 131 * to be sent). 132 */ 133 if (hlen > sizeof (struct ip) && ip_dooptions(ip)) 134 goto next; 135 136 /* 137 * Fast check on the first internet 138 * interface in the list. 139 */ 140 if (ifinet) { 141 struct sockaddr_in *sin; 142 143 sin = (struct sockaddr_in *)&ifinet->if_addr; 144 if (sin->sin_addr.s_addr == ip->ip_dst.s_addr) 145 goto ours; 146 sin = (struct sockaddr_in *)&ifinet->if_broadaddr; 147 if ((ifinet->if_flags & IFF_BROADCAST) && 148 sin->sin_addr.s_addr == ip->ip_dst.s_addr) 149 goto ours; 150 } 151 ipaddr.sin_addr = ip->ip_dst; 152 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) { 153 ip_forward(ip); 154 goto next; 155 } 156 157 ours: 158 /* 159 * Look for queue of fragments 160 * of this datagram. 161 */ 162 for (fp = ipq.next; fp != &ipq; fp = fp->next) 163 if (ip->ip_id == fp->ipq_id && 164 ip->ip_src.s_addr == fp->ipq_src.s_addr && 165 ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 166 ip->ip_p == fp->ipq_p) 167 goto found; 168 fp = 0; 169 found: 170 171 /* 172 * Adjust ip_len to not reflect header, 173 * set ip_mff if more fragments are expected, 174 * convert offset of this to bytes. 175 */ 176 ip->ip_len -= hlen; 177 ((struct ipasfrag *)ip)->ipf_mff = 0; 178 if (ip->ip_off & IP_MF) 179 ((struct ipasfrag *)ip)->ipf_mff = 1; 180 ip->ip_off <<= 3; 181 182 /* 183 * If datagram marked as having more fragments 184 * or if this is not the first fragment, 185 * attempt reassembly; if it succeeds, proceed. 186 */ 187 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) { 188 ip = ip_reass((struct ipasfrag *)ip, fp); 189 if (ip == 0) 190 goto next; 191 hlen = ip->ip_hl << 2; 192 m = dtom(ip); 193 } else 194 if (fp) 195 (void) ip_freef(fp); 196 197 /* 198 * Switch out to protocol's input routine. 199 */ 200 (*protosw[ip_protox[ip->ip_p]].pr_input)(m); 201 goto next; 202 bad: 203 m_freem(m); 204 goto next; 205 } 206 207 /* 208 * Take incoming datagram fragment and try to 209 * reassemble it into whole datagram. If a chain for 210 * reassembly of this datagram already exists, then it 211 * is given as fp; otherwise have to make a chain. 212 */ 213 struct ip * 214 ip_reass(ip, fp) 215 register struct ipasfrag *ip; 216 register struct ipq *fp; 217 { 218 register struct mbuf *m = dtom(ip); 219 register struct ipasfrag *q; 220 struct mbuf *t; 221 int hlen = ip->ip_hl << 2; 222 int i, next; 223 COUNT(IP_REASS); 224 225 /* 226 * Presence of header sizes in mbufs 227 * would confuse code below. 228 */ 229 m->m_off += hlen; 230 m->m_len -= hlen; 231 232 /* 233 * If first fragment to arrive, create a reassembly queue. 234 */ 235 if (fp == 0) { 236 if ((t = m_get(M_WAIT)) == NULL) 237 goto dropfrag; 238 t->m_off = MMINOFF; 239 fp = mtod(t, struct ipq *); 240 insque(fp, &ipq); 241 fp->ipq_ttl = IPFRAGTTL; 242 fp->ipq_p = ip->ip_p; 243 fp->ipq_id = ip->ip_id; 244 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp; 245 fp->ipq_src = ((struct ip *)ip)->ip_src; 246 fp->ipq_dst = ((struct ip *)ip)->ip_dst; 247 q = (struct ipasfrag *)fp; 248 goto insert; 249 } 250 251 /* 252 * Find a segment which begins after this one does. 253 */ 254 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) 255 if (q->ip_off > ip->ip_off) 256 break; 257 258 /* 259 * If there is a preceding segment, it may provide some of 260 * our data already. If so, drop the data from the incoming 261 * segment. If it provides all of our data, drop us. 262 */ 263 if (q->ipf_prev != (struct ipasfrag *)fp) { 264 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off; 265 if (i > 0) { 266 if (i >= ip->ip_len) 267 goto dropfrag; 268 m_adj(dtom(ip), i); 269 ip->ip_off += i; 270 ip->ip_len -= i; 271 } 272 } 273 274 /* 275 * While we overlap succeeding segments trim them or, 276 * if they are completely covered, dequeue them. 277 */ 278 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) { 279 i = (ip->ip_off + ip->ip_len) - q->ip_off; 280 if (i < q->ip_len) { 281 q->ip_len -= i; 282 q->ip_off += i; 283 m_adj(dtom(q), i); 284 break; 285 } 286 q = q->ipf_next; 287 m_freem(dtom(q->ipf_prev)); 288 ip_deq(q->ipf_prev); 289 } 290 291 insert: 292 /* 293 * Stick new segment in its place; 294 * check for complete reassembly. 295 */ 296 ip_enq(ip, q->ipf_prev); 297 next = 0; 298 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) { 299 if (q->ip_off != next) 300 return (0); 301 next += q->ip_len; 302 } 303 if (q->ipf_prev->ipf_mff) 304 return (0); 305 306 /* 307 * Reassembly is complete; concatenate fragments. 308 */ 309 q = fp->ipq_next; 310 m = dtom(q); 311 t = m->m_next; 312 m->m_next = 0; 313 m_cat(m, t); 314 q = q->ipf_next; 315 while (q != (struct ipasfrag *)fp) { 316 t = dtom(q); 317 q = q->ipf_next; 318 m_cat(m, t); 319 } 320 321 /* 322 * Create header for new ip packet by 323 * modifying header of first packet; 324 * dequeue and discard fragment reassembly header. 325 * Make header visible. 326 */ 327 ip = fp->ipq_next; 328 ip->ip_len = next; 329 ((struct ip *)ip)->ip_src = fp->ipq_src; 330 ((struct ip *)ip)->ip_dst = fp->ipq_dst; 331 remque(fp); 332 (void) m_free(dtom(fp)); 333 m = dtom(ip); 334 m->m_len += sizeof (struct ipasfrag); 335 m->m_off -= sizeof (struct ipasfrag); 336 return ((struct ip *)ip); 337 338 dropfrag: 339 m_freem(m); 340 return (0); 341 } 342 343 /* 344 * Free a fragment reassembly header and all 345 * associated datagrams. 346 */ 347 struct ipq * 348 ip_freef(fp) 349 struct ipq *fp; 350 { 351 register struct ipasfrag *q; 352 struct mbuf *m; 353 COUNT(IP_FREEF); 354 355 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) 356 m_freem(dtom(q)); 357 m = dtom(fp); 358 fp = fp->next; 359 remque(fp->prev); 360 (void) m_free(m); 361 return (fp); 362 } 363 364 /* 365 * Put an ip fragment on a reassembly chain. 366 * Like insque, but pointers in middle of structure. 367 */ 368 ip_enq(p, prev) 369 register struct ipasfrag *p, *prev; 370 { 371 372 COUNT(IP_ENQ); 373 p->ipf_prev = prev; 374 p->ipf_next = prev->ipf_next; 375 prev->ipf_next->ipf_prev = p; 376 prev->ipf_next = p; 377 } 378 379 /* 380 * To ip_enq as remque is to insque. 381 */ 382 ip_deq(p) 383 register struct ipasfrag *p; 384 { 385 386 COUNT(IP_DEQ); 387 p->ipf_prev->ipf_next = p->ipf_next; 388 p->ipf_next->ipf_prev = p->ipf_prev; 389 } 390 391 /* 392 * IP timer processing; 393 * if a timer expires on a reassembly 394 * queue, discard it. 395 */ 396 ip_slowtimo() 397 { 398 register struct ipq *fp; 399 int s = splnet(); 400 401 COUNT(IP_SLOWTIMO); 402 fp = ipq.next; 403 if (fp == 0) { 404 splx(s); 405 return; 406 } 407 while (fp != &ipq) 408 if (--fp->ipq_ttl == 0) 409 fp = ip_freef(fp); 410 else 411 fp = fp->next; 412 splx(s); 413 } 414 415 /* 416 * Drain off all datagram fragments. 417 */ 418 ip_drain() 419 { 420 421 COUNT(IP_DRAIN); 422 while (ipq.next != &ipq) 423 (void) ip_freef(ipq.next); 424 } 425 426 /* 427 * Do option processing on a datagram, 428 * possibly discarding it if bad options 429 * are encountered. 430 */ 431 ip_dooptions(ip) 432 struct ip *ip; 433 { 434 register u_char *cp; 435 int opt, optlen, cnt, code, type; 436 struct in_addr *sin; 437 register struct ip_timestamp *ipt; 438 register struct ifnet *ifp; 439 struct in_addr t; 440 441 COUNT(IP_DOOPTIONS); 442 cp = (u_char *)(ip + 1); 443 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 444 for (; cnt > 0; cnt -= optlen, cp += optlen) { 445 opt = cp[0]; 446 if (opt == IPOPT_EOL) 447 break; 448 if (opt == IPOPT_NOP) 449 optlen = 1; 450 else 451 optlen = cp[1]; 452 switch (opt) { 453 454 default: 455 break; 456 457 /* 458 * Source routing with record. 459 * Find interface with current destination address. 460 * If none on this machine then drop if strictly routed, 461 * or do nothing if loosely routed. 462 * Record interface address and bring up next address 463 * component. If strictly routed make sure next 464 * address on directly accessible net. 465 */ 466 case IPOPT_LSRR: 467 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1)) 468 break; 469 sin = (struct in_addr *)(cp + cp[2]); 470 ipaddr.sin_addr = *sin; 471 ifp = if_ifwithaddr((struct sockaddr *)&ipaddr); 472 type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL; 473 if (ifp == 0) { 474 if (opt == IPOPT_SSRR) 475 goto bad; 476 break; 477 } 478 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t; 479 cp[2] += 4; 480 if (cp[2] > optlen - (sizeof (long) - 1)) 481 break; 482 ip->ip_dst = sin[1]; 483 if (opt == IPOPT_SSRR && 484 if_ifonnetof(ip->ip_dst.s_net) == 0) 485 goto bad; 486 break; 487 488 case IPOPT_TS: 489 code = cp - (u_char *)ip; 490 type = ICMP_PARAMPROB; 491 ipt = (struct ip_timestamp *)cp; 492 if (ipt->ipt_len < 5) 493 goto bad; 494 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) { 495 if (++ipt->ipt_oflw == 0) 496 goto bad; 497 break; 498 } 499 sin = (struct in_addr *)(cp+cp[2]); 500 switch (ipt->ipt_flg) { 501 502 case IPOPT_TS_TSONLY: 503 break; 504 505 case IPOPT_TS_TSANDADDR: 506 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 507 goto bad; 508 if (ifinet == 0) 509 goto bad; /* ??? */ 510 *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr; 511 break; 512 513 case IPOPT_TS_PRESPEC: 514 ipaddr.sin_addr = *sin; 515 if (!if_ifwithaddr((struct sockaddr *)&ipaddr)) 516 continue; 517 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 518 goto bad; 519 ipt->ipt_ptr += 4; 520 break; 521 522 default: 523 goto bad; 524 } 525 *(n_time *)sin = iptime(); 526 ipt->ipt_ptr += 4; 527 } 528 } 529 return (0); 530 bad: 531 icmp_error(ip, type, code); 532 return (1); 533 } 534 535 /* 536 * Strip out IP options, at higher 537 * level protocol in the kernel. 538 * Second argument is buffer to which options 539 * will be moved, and return value is their length. 540 */ 541 ip_stripoptions(ip, mopt) 542 struct ip *ip; 543 struct mbuf *mopt; 544 { 545 register int i; 546 register struct mbuf *m; 547 int olen; 548 COUNT(IP_STRIPOPTIONS); 549 550 olen = (ip->ip_hl<<2) - sizeof (struct ip); 551 m = dtom(ip); 552 ip++; 553 if (mopt) { 554 mopt->m_len = olen; 555 mopt->m_off = MMINOFF; 556 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen); 557 } 558 i = m->m_len - (sizeof (struct ip) + olen); 559 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i); 560 m->m_len -= olen; 561 } 562 563 static u_char ctlerrmap[] = { 564 ECONNABORTED, ECONNABORTED, 0, 0, 565 0, 566 #ifdef notdef 567 EHOSTUNREACH, EHOSTDOWN, ENETUNREACH, EHOSTUNREACH, 568 #else 569 ENETUNREACH, ENETUNREACH, ENETUNREACH, ENETUNREACH, 570 #endif 571 ECONNREFUSED, ECONNREFUSED, EMSGSIZE, 0, 572 0, 0, 0, 0 573 }; 574 575 ip_ctlinput(cmd, arg) 576 int cmd; 577 caddr_t arg; 578 { 579 struct in_addr *sin; 580 extern int tcp_abort(), udp_abort(); 581 extern struct inpcb tcb, udb; 582 583 if (cmd < 0 || cmd > PRC_NCMDS) 584 return; 585 if (ctlerrmap[cmd] == 0) 586 return; /* XXX */ 587 if (cmd == PRC_IFDOWN) 588 sin = &((struct sockaddr_in *)arg)->sin_addr; 589 else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH) 590 sin = (struct in_addr *)arg; 591 else 592 sin = &((struct icmp *)arg)->icmp_ip.ip_dst; 593 in_pcbnotify(&tcb, sin, ctlerrmap[cmd], tcp_abort); 594 in_pcbnotify(&udb, sin, ctlerrmap[cmd], udp_abort); 595 } 596 597 int ipprintfs = 0; 598 int ipforwarding = 1; 599 /* 600 * Forward a packet. If some error occurs return the sender 601 * and icmp packet. Note we can't always generate a meaningful 602 * icmp message because icmp doesn't have a large enough repetoire 603 * of codes and types. 604 */ 605 ip_forward(ip) 606 register struct ip *ip; 607 { 608 register int error, type, code; 609 struct mbuf *mopt; 610 611 if (ipprintfs) 612 printf("forward: src %x dst %x ttl %x\n", ip->ip_src, 613 ip->ip_dst, ip->ip_ttl); 614 if (ipforwarding == 0) { 615 /* can't tell difference between net and host */ 616 type = ICMP_UNREACH, code = ICMP_UNREACH_NET; 617 goto sendicmp; 618 } 619 if (ip->ip_ttl < IPTTLDEC) { 620 type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS; 621 goto sendicmp; 622 } 623 ip->ip_ttl -= IPTTLDEC; 624 mopt = m_get(M_DONTWAIT); 625 if (mopt == 0) { 626 m_freem(dtom(ip)); 627 return; 628 } 629 ip_stripoptions(ip, mopt); 630 631 /* last 0 here means no directed broadcast */ 632 if ((error = ip_output(dtom(ip), mopt, 0, 0)) == 0) 633 return; 634 #ifdef notdef 635 /* 636 * Want to generate a message, but lower 637 * layers assume they can free up a message 638 * in the event of an error. This causes 639 * the call to icmp_error to work on ``freed'' 640 * mbuf's, and worse. 641 */ 642 type = ICMP_UNREACH, code = 0; /* need ``undefined'' */ 643 if (error == ENETUNREACH || error == ENETDOWN) 644 code = ICMP_UNREACH_NET; 645 else if (error == EMSGSIZE) 646 code = ICMP_UNREACH_NEEDFRAG; 647 #else 648 return; 649 #endif 650 sendicmp: 651 icmp_error(ip, type, code); 652 } 653