xref: /csrg-svn/sys/netinet/ip_input.c (revision 12459)
1 /*	ip_input.c	1.68	83/05/15	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mbuf.h"
6 #include "../h/domain.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include "../h/errno.h"
10 #include "../h/time.h"
11 #include "../h/kernel.h"
12 
13 #include "../net/if.h"
14 #include "../net/route.h"
15 
16 #include "../netinet/in.h"
17 #include "../netinet/in_pcb.h"
18 #include "../netinet/in_systm.h"
19 #include "../netinet/ip.h"
20 #include "../netinet/ip_var.h"
21 #include "../netinet/ip_icmp.h"
22 #include "../netinet/tcp.h"
23 
24 u_char	ip_protox[IPPROTO_MAX];
25 int	ipqmaxlen = IFQ_MAXLEN;
26 struct	ifnet *ifinet;			/* first inet interface */
27 
28 /*
29  * IP initialization: fill in IP protocol switch table.
30  * All protocols not implemented in kernel go to raw IP protocol handler.
31  */
32 ip_init()
33 {
34 	register struct protosw *pr;
35 	register int i;
36 
37 	pr = pffindproto(PF_INET, IPPROTO_RAW);
38 	if (pr == 0)
39 		panic("ip_init");
40 	for (i = 0; i < IPPROTO_MAX; i++)
41 		ip_protox[i] = pr - inetsw;
42 	for (pr = inetdomain.dom_protosw;
43 	    pr <= inetdomain.dom_protoswNPROTOSW; pr++)
44 		if (pr->pr_family == PF_INET &&
45 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
46 			ip_protox[pr->pr_protocol] = pr - inetsw;
47 	ipq.next = ipq.prev = &ipq;
48 	ip_id = time.tv_sec & 0xffff;
49 	ipintrq.ifq_maxlen = ipqmaxlen;
50 	ifinet = if_ifwithaf(AF_INET);
51 }
52 
53 u_char	ipcksum = 1;
54 struct	ip *ip_reass();
55 struct	sockaddr_in ipaddr = { AF_INET };
56 
57 /*
58  * Ip input routine.  Checksum and byte swap header.  If fragmented
59  * try to reassamble.  If complete and fragment queue exists, discard.
60  * Process options.  Pass to next level.
61  */
62 ipintr()
63 {
64 	register struct ip *ip;
65 	register struct mbuf *m;
66 	struct mbuf *m0;
67 	register int i;
68 	register struct ipq *fp;
69 	int hlen, s;
70 
71 next:
72 	/*
73 	 * Get next datagram off input queue and get IP header
74 	 * in first mbuf.
75 	 */
76 	s = splimp();
77 	IF_DEQUEUE(&ipintrq, m);
78 	splx(s);
79 	if (m == 0)
80 		return;
81 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
82 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
83 		ipstat.ips_toosmall++;
84 		goto next;
85 	}
86 	ip = mtod(m, struct ip *);
87 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
88 		if ((m = m_pullup(m, hlen)) == 0) {
89 			ipstat.ips_badhlen++;
90 			goto next;
91 		}
92 		ip = mtod(m, struct ip *);
93 	}
94 	if (ipcksum)
95 		if (ip->ip_sum = in_cksum(m, hlen)) {
96 			ipstat.ips_badsum++;
97 			goto bad;
98 		}
99 
100 	/*
101 	 * Convert fields to host representation.
102 	 */
103 	ip->ip_len = ntohs((u_short)ip->ip_len);
104 	if (ip->ip_len < hlen) {
105 		ipstat.ips_badlen++;
106 		goto bad;
107 	}
108 	ip->ip_id = ntohs(ip->ip_id);
109 	ip->ip_off = ntohs((u_short)ip->ip_off);
110 
111 	/*
112 	 * Check that the amount of data in the buffers
113 	 * is as at least much as the IP header would have us expect.
114 	 * Trim mbufs if longer than we expect.
115 	 * Drop packet if shorter than we expect.
116 	 */
117 	i = -ip->ip_len;
118 	m0 = m;
119 	for (;;) {
120 		i += m->m_len;
121 		if (m->m_next == 0)
122 			break;
123 		m = m->m_next;
124 	}
125 	if (i != 0) {
126 		if (i < 0) {
127 			ipstat.ips_tooshort++;
128 			goto bad;
129 		}
130 		if (i <= m->m_len)
131 			m->m_len -= i;
132 		else
133 			m_adj(m0, -i);
134 	}
135 	m = m0;
136 
137 	/*
138 	 * Process options and, if not destined for us,
139 	 * ship it on.  ip_dooptions returns 1 when an
140 	 * error was detected (causing an icmp message
141 	 * to be sent).
142 	 */
143 	if (hlen > sizeof (struct ip) && ip_dooptions(ip))
144 		goto next;
145 
146 	/*
147 	 * Fast check on the first internet
148 	 * interface in the list.
149 	 */
150 	if (ifinet) {
151 		struct sockaddr_in *sin;
152 
153 		sin = (struct sockaddr_in *)&ifinet->if_addr;
154 		if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
155 			goto ours;
156 		sin = (struct sockaddr_in *)&ifinet->if_broadaddr;
157 		if ((ifinet->if_flags & IFF_BROADCAST) &&
158 		    sin->sin_addr.s_addr == ip->ip_dst.s_addr)
159 			goto ours;
160 	}
161 /* BEGIN GROT */
162 #include "nd.h"
163 #if NND > 0
164 	/*
165 	 * Diskless machines don't initially know
166 	 * their address, so take packets from them
167 	 * if we're acting as a network disk server.
168 	 */
169 	if (in_netof(ip->ip_dst) == INADDR_ANY &&
170 	    (in_netof(ip->ip_src) == INADDR_ANY &&
171 	     in_lnaof(ip->ip_src) != INADDR_ANY))
172 		goto ours;
173 #endif
174 /* END GROT */
175 	ipaddr.sin_addr = ip->ip_dst;
176 	if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
177 		ip_forward(ip);
178 		goto next;
179 	}
180 
181 ours:
182 	/*
183 	 * Look for queue of fragments
184 	 * of this datagram.
185 	 */
186 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
187 		if (ip->ip_id == fp->ipq_id &&
188 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
189 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
190 		    ip->ip_p == fp->ipq_p)
191 			goto found;
192 	fp = 0;
193 found:
194 
195 	/*
196 	 * Adjust ip_len to not reflect header,
197 	 * set ip_mff if more fragments are expected,
198 	 * convert offset of this to bytes.
199 	 */
200 	ip->ip_len -= hlen;
201 	((struct ipasfrag *)ip)->ipf_mff = 0;
202 	if (ip->ip_off & IP_MF)
203 		((struct ipasfrag *)ip)->ipf_mff = 1;
204 	ip->ip_off <<= 3;
205 
206 	/*
207 	 * If datagram marked as having more fragments
208 	 * or if this is not the first fragment,
209 	 * attempt reassembly; if it succeeds, proceed.
210 	 */
211 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
212 		ip = ip_reass((struct ipasfrag *)ip, fp);
213 		if (ip == 0)
214 			goto next;
215 		hlen = ip->ip_hl << 2;
216 		m = dtom(ip);
217 	} else
218 		if (fp)
219 			ip_freef(fp);
220 
221 	/*
222 	 * Switch out to protocol's input routine.
223 	 */
224 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m);
225 	goto next;
226 bad:
227 	m_freem(m);
228 	goto next;
229 }
230 
231 /*
232  * Take incoming datagram fragment and try to
233  * reassemble it into whole datagram.  If a chain for
234  * reassembly of this datagram already exists, then it
235  * is given as fp; otherwise have to make a chain.
236  */
237 struct ip *
238 ip_reass(ip, fp)
239 	register struct ipasfrag *ip;
240 	register struct ipq *fp;
241 {
242 	register struct mbuf *m = dtom(ip);
243 	register struct ipasfrag *q;
244 	struct mbuf *t;
245 	int hlen = ip->ip_hl << 2;
246 	int i, next;
247 
248 	/*
249 	 * Presence of header sizes in mbufs
250 	 * would confuse code below.
251 	 */
252 	m->m_off += hlen;
253 	m->m_len -= hlen;
254 
255 	/*
256 	 * If first fragment to arrive, create a reassembly queue.
257 	 */
258 	if (fp == 0) {
259 		if ((t = m_get(M_WAIT, MT_FTABLE)) == NULL)
260 			goto dropfrag;
261 		fp = mtod(t, struct ipq *);
262 		insque(fp, &ipq);
263 		fp->ipq_ttl = IPFRAGTTL;
264 		fp->ipq_p = ip->ip_p;
265 		fp->ipq_id = ip->ip_id;
266 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
267 		fp->ipq_src = ((struct ip *)ip)->ip_src;
268 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
269 		q = (struct ipasfrag *)fp;
270 		goto insert;
271 	}
272 
273 	/*
274 	 * Find a segment which begins after this one does.
275 	 */
276 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
277 		if (q->ip_off > ip->ip_off)
278 			break;
279 
280 	/*
281 	 * If there is a preceding segment, it may provide some of
282 	 * our data already.  If so, drop the data from the incoming
283 	 * segment.  If it provides all of our data, drop us.
284 	 */
285 	if (q->ipf_prev != (struct ipasfrag *)fp) {
286 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
287 		if (i > 0) {
288 			if (i >= ip->ip_len)
289 				goto dropfrag;
290 			m_adj(dtom(ip), i);
291 			ip->ip_off += i;
292 			ip->ip_len -= i;
293 		}
294 	}
295 
296 	/*
297 	 * While we overlap succeeding segments trim them or,
298 	 * if they are completely covered, dequeue them.
299 	 */
300 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
301 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
302 		if (i < q->ip_len) {
303 			q->ip_len -= i;
304 			q->ip_off += i;
305 			m_adj(dtom(q), i);
306 			break;
307 		}
308 		q = q->ipf_next;
309 		m_freem(dtom(q->ipf_prev));
310 		ip_deq(q->ipf_prev);
311 	}
312 
313 insert:
314 	/*
315 	 * Stick new segment in its place;
316 	 * check for complete reassembly.
317 	 */
318 	ip_enq(ip, q->ipf_prev);
319 	next = 0;
320 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
321 		if (q->ip_off != next)
322 			return (0);
323 		next += q->ip_len;
324 	}
325 	if (q->ipf_prev->ipf_mff)
326 		return (0);
327 
328 	/*
329 	 * Reassembly is complete; concatenate fragments.
330 	 */
331 	q = fp->ipq_next;
332 	m = dtom(q);
333 	t = m->m_next;
334 	m->m_next = 0;
335 	m_cat(m, t);
336 	q = q->ipf_next;
337 	while (q != (struct ipasfrag *)fp) {
338 		t = dtom(q);
339 		q = q->ipf_next;
340 		m_cat(m, t);
341 	}
342 
343 	/*
344 	 * Create header for new ip packet by
345 	 * modifying header of first packet;
346 	 * dequeue and discard fragment reassembly header.
347 	 * Make header visible.
348 	 */
349 	ip = fp->ipq_next;
350 	ip->ip_len = next;
351 	((struct ip *)ip)->ip_src = fp->ipq_src;
352 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
353 	remque(fp);
354 	(void) m_free(dtom(fp));
355 	m = dtom(ip);
356 	m->m_len += sizeof (struct ipasfrag);
357 	m->m_off -= sizeof (struct ipasfrag);
358 	return ((struct ip *)ip);
359 
360 dropfrag:
361 	m_freem(m);
362 	return (0);
363 }
364 
365 /*
366  * Free a fragment reassembly header and all
367  * associated datagrams.
368  */
369 ip_freef(fp)
370 	struct ipq *fp;
371 {
372 	register struct ipasfrag *q, *p;
373 
374 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
375 		p = q->ipf_next;
376 		ip_deq(q);
377 		m_freem(dtom(q));
378 	}
379 	remque(fp);
380 	(void) m_free(dtom(fp));
381 }
382 
383 /*
384  * Put an ip fragment on a reassembly chain.
385  * Like insque, but pointers in middle of structure.
386  */
387 ip_enq(p, prev)
388 	register struct ipasfrag *p, *prev;
389 {
390 
391 	p->ipf_prev = prev;
392 	p->ipf_next = prev->ipf_next;
393 	prev->ipf_next->ipf_prev = p;
394 	prev->ipf_next = p;
395 }
396 
397 /*
398  * To ip_enq as remque is to insque.
399  */
400 ip_deq(p)
401 	register struct ipasfrag *p;
402 {
403 
404 	p->ipf_prev->ipf_next = p->ipf_next;
405 	p->ipf_next->ipf_prev = p->ipf_prev;
406 }
407 
408 /*
409  * IP timer processing;
410  * if a timer expires on a reassembly
411  * queue, discard it.
412  */
413 ip_slowtimo()
414 {
415 	register struct ipq *fp;
416 	int s = splnet();
417 
418 	fp = ipq.next;
419 	if (fp == 0) {
420 		splx(s);
421 		return;
422 	}
423 	while (fp != &ipq) {
424 		--fp->ipq_ttl;
425 		fp = fp->next;
426 		if (fp->prev->ipq_ttl == 0)
427 			ip_freef(fp->prev);
428 	}
429 	splx(s);
430 }
431 
432 /*
433  * Drain off all datagram fragments.
434  */
435 ip_drain()
436 {
437 
438 	while (ipq.next != &ipq)
439 		ip_freef(ipq.next);
440 }
441 
442 /*
443  * Do option processing on a datagram,
444  * possibly discarding it if bad options
445  * are encountered.
446  */
447 ip_dooptions(ip)
448 	struct ip *ip;
449 {
450 	register u_char *cp;
451 	int opt, optlen, cnt, code, type;
452 	struct in_addr *sin;
453 	register struct ip_timestamp *ipt;
454 	register struct ifnet *ifp;
455 	struct in_addr t;
456 
457 	cp = (u_char *)(ip + 1);
458 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
459 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
460 		opt = cp[0];
461 		if (opt == IPOPT_EOL)
462 			break;
463 		if (opt == IPOPT_NOP)
464 			optlen = 1;
465 		else
466 			optlen = cp[1];
467 		switch (opt) {
468 
469 		default:
470 			break;
471 
472 		/*
473 		 * Source routing with record.
474 		 * Find interface with current destination address.
475 		 * If none on this machine then drop if strictly routed,
476 		 * or do nothing if loosely routed.
477 		 * Record interface address and bring up next address
478 		 * component.  If strictly routed make sure next
479 		 * address on directly accessible net.
480 		 */
481 		case IPOPT_LSRR:
482 		case IPOPT_SSRR:
483 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
484 				break;
485 			sin = (struct in_addr *)(cp + cp[2]);
486 			ipaddr.sin_addr = *sin;
487 			ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
488 			type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL;
489 			if (ifp == 0) {
490 				if (opt == IPOPT_SSRR)
491 					goto bad;
492 				break;
493 			}
494 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
495 			cp[2] += 4;
496 			if (cp[2] > optlen - (sizeof (long) - 1))
497 				break;
498 			ip->ip_dst = sin[1];
499 			if (opt == IPOPT_SSRR &&
500 			    if_ifonnetof(in_netof(ip->ip_dst)) == 0)
501 				goto bad;
502 			break;
503 
504 		case IPOPT_TS:
505 			code = cp - (u_char *)ip;
506 			type = ICMP_PARAMPROB;
507 			ipt = (struct ip_timestamp *)cp;
508 			if (ipt->ipt_len < 5)
509 				goto bad;
510 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
511 				if (++ipt->ipt_oflw == 0)
512 					goto bad;
513 				break;
514 			}
515 			sin = (struct in_addr *)(cp+cp[2]);
516 			switch (ipt->ipt_flg) {
517 
518 			case IPOPT_TS_TSONLY:
519 				break;
520 
521 			case IPOPT_TS_TSANDADDR:
522 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
523 					goto bad;
524 				if (ifinet == 0)
525 					goto bad;	/* ??? */
526 				*sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
527 				break;
528 
529 			case IPOPT_TS_PRESPEC:
530 				ipaddr.sin_addr = *sin;
531 				if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
532 					continue;
533 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
534 					goto bad;
535 				ipt->ipt_ptr += 4;
536 				break;
537 
538 			default:
539 				goto bad;
540 			}
541 			*(n_time *)sin = iptime();
542 			ipt->ipt_ptr += 4;
543 		}
544 	}
545 	return (0);
546 bad:
547 	icmp_error(ip, type, code);
548 	return (1);
549 }
550 
551 /*
552  * Strip out IP options, at higher
553  * level protocol in the kernel.
554  * Second argument is buffer to which options
555  * will be moved, and return value is their length.
556  */
557 ip_stripoptions(ip, mopt)
558 	struct ip *ip;
559 	struct mbuf *mopt;
560 {
561 	register int i;
562 	register struct mbuf *m;
563 	int olen;
564 
565 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
566 	m = dtom(ip);
567 	ip++;
568 	if (mopt) {
569 		mopt->m_len = olen;
570 		mopt->m_off = MMINOFF;
571 		bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
572 	}
573 	i = m->m_len - (sizeof (struct ip) + olen);
574 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
575 	m->m_len -= olen;
576 }
577 
578 u_char inetctlerrmap[] = {
579 	ECONNABORTED,	ECONNABORTED,	0,		0,
580 	0,		0,
581 	EHOSTDOWN,	EHOSTUNREACH,	ENETUNREACH,	EHOSTUNREACH,
582 	ECONNREFUSED,	ECONNREFUSED,	EMSGSIZE,	0,
583 	0,		0,		0,		0
584 };
585 
586 ip_ctlinput(cmd, arg)
587 	int cmd;
588 	caddr_t arg;
589 {
590 	struct in_addr *in;
591 	int tcp_abort(), udp_abort();
592 	extern struct inpcb tcb, udb;
593 
594 	if (cmd < 0 || cmd > PRC_NCMDS)
595 		return;
596 	if (inetctlerrmap[cmd] == 0)
597 		return;		/* XXX */
598 	if (cmd == PRC_IFDOWN)
599 		in = &((struct sockaddr_in *)arg)->sin_addr;
600 	else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH)
601 		in = (struct in_addr *)arg;
602 	else
603 		in = &((struct icmp *)arg)->icmp_ip.ip_dst;
604 /* THIS IS VERY QUESTIONABLE, SHOULD HIT ALL PROTOCOLS */
605 	in_pcbnotify(&tcb, in, (int)inetctlerrmap[cmd], tcp_abort);
606 	in_pcbnotify(&udb, in, (int)inetctlerrmap[cmd], udp_abort);
607 }
608 
609 int	ipprintfs = 0;
610 int	ipforwarding = 1;
611 /*
612  * Forward a packet.  If some error occurs return the sender
613  * and icmp packet.  Note we can't always generate a meaningful
614  * icmp message because icmp doesn't have a large enough repetoire
615  * of codes and types.
616  */
617 ip_forward(ip)
618 	register struct ip *ip;
619 {
620 	register int error, type, code;
621 	struct mbuf *mopt, *mcopy;
622 
623 	if (ipprintfs)
624 		printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
625 			ip->ip_dst, ip->ip_ttl);
626 	if (ipforwarding == 0) {
627 		/* can't tell difference between net and host */
628 		type = ICMP_UNREACH, code = ICMP_UNREACH_NET;
629 		goto sendicmp;
630 	}
631 	if (ip->ip_ttl < IPTTLDEC) {
632 		type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS;
633 		goto sendicmp;
634 	}
635 	ip->ip_ttl -= IPTTLDEC;
636 	mopt = m_get(M_DONTWAIT, MT_DATA);
637 	if (mopt == NULL) {
638 		m_freem(dtom(ip));
639 		return;
640 	}
641 
642 	/*
643 	 * Save at most 64 bytes of the packet in case
644 	 * we need to generate an ICMP message to the src.
645 	 */
646 	mcopy = m_copy(dtom(ip), 0, imin(ip->ip_len, 64));
647 	ip_stripoptions(ip, mopt);
648 
649 	error = ip_output(dtom(ip), mopt, (struct route *)0, IP_FORWARDING);
650 	if (error == 0) {
651 		if (mcopy)
652 			m_freem(mcopy);
653 		return;
654 	}
655 	if (mcopy == NULL)
656 		return;
657 	ip = mtod(mcopy, struct ip *);
658 	type = ICMP_UNREACH, code = 0;		/* need ``undefined'' */
659 	switch (error) {
660 
661 	case ENETUNREACH:
662 	case ENETDOWN:
663 		code = ICMP_UNREACH_NET;
664 		break;
665 
666 	case EMSGSIZE:
667 		code = ICMP_UNREACH_NEEDFRAG;
668 		break;
669 
670 	case EPERM:
671 		code = ICMP_UNREACH_PORT;
672 		break;
673 
674 	case ENOBUFS:
675 		type = ICMP_SOURCEQUENCH;
676 		break;
677 
678 	case EHOSTDOWN:
679 	case EHOSTUNREACH:
680 		code = ICMP_UNREACH_HOST;
681 		break;
682 	}
683 sendicmp:
684 	icmp_error(ip, type, code);
685 }
686