xref: /csrg-svn/sys/netinet/ip_input.c (revision 8312)
1 /*	ip_input.c	1.50	82/10/05	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mbuf.h"
6 #include "../h/protosw.h"
7 #include "../h/socket.h"
8 #include "../net/in.h"
9 #include "../net/in_systm.h"
10 #include "../net/if.h"
11 #include "../net/ip.h"			/* belongs before in.h */
12 #include "../net/ip_var.h"
13 #include "../net/ip_icmp.h"
14 #include "../net/tcp.h"
15 #include <time.h>
16 #include "../h/kernel.h"
17 #include <errno.h>
18 
19 u_char	ip_protox[IPPROTO_MAX];
20 int	ipqmaxlen = IFQ_MAXLEN;
21 struct	ifnet *ifinet;			/* first inet interface */
22 
23 /*
24  * IP initialization: fill in IP protocol switch table.
25  * All protocols not implemented in kernel go to raw IP protocol handler.
26  */
27 ip_init()
28 {
29 	register struct protosw *pr;
30 	register int i;
31 
32 	pr = pffindproto(PF_INET, IPPROTO_RAW);
33 	if (pr == 0)
34 		panic("ip_init");
35 	for (i = 0; i < IPPROTO_MAX; i++)
36 		ip_protox[i] = pr - protosw;
37 	for (pr = protosw; pr <= protoswLAST; pr++)
38 		if (pr->pr_family == PF_INET &&
39 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
40 			ip_protox[pr->pr_protocol] = pr - protosw;
41 	ipq.next = ipq.prev = &ipq;
42 	ip_id = time.tv_sec & 0xffff;
43 	ipintrq.ifq_maxlen = ipqmaxlen;
44 	ifinet = if_ifwithaf(AF_INET);
45 }
46 
47 u_char	ipcksum = 1;
48 struct	ip *ip_reass();
49 struct	sockaddr_in ipaddr = { AF_INET };
50 
51 /*
52  * Ip input routine.  Checksum and byte swap header.  If fragmented
53  * try to reassamble.  If complete and fragment queue exists, discard.
54  * Process options.  Pass to next level.
55  */
56 ipintr()
57 {
58 	register struct ip *ip;
59 	register struct mbuf *m;
60 	struct mbuf *m0, *mopt;
61 	register int i;
62 	register struct ipq *fp;
63 	int hlen, s;
64 
65 next:
66 	/*
67 	 * Get next datagram off input queue and get IP header
68 	 * in first mbuf.
69 	 */
70 	s = splimp();
71 	IF_DEQUEUE(&ipintrq, m);
72 	splx(s);
73 	if (m == 0)
74 		return;
75 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
76 	    (m = m_pullup(m, sizeof (struct ip))) == 0)
77 		return;
78 	ip = mtod(m, struct ip *);
79 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
80 		if ((m = m_pullup(m, hlen)) == 0)
81 			return;
82 		ip = mtod(m, struct ip *);
83 	}
84 	if (ipcksum)
85 		if (ip->ip_sum = in_cksum(m, hlen)) {
86 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
87 			ipstat.ips_badsum++;
88 			goto bad;
89 		}
90 
91 #if vax
92 	/*
93 	 * Convert fields to host representation.
94 	 */
95 	ip->ip_len = ntohs((u_short)ip->ip_len);
96 	ip->ip_id = ntohs(ip->ip_id);
97 	ip->ip_off = ntohs((u_short)ip->ip_off);
98 #endif
99 
100 	/*
101 	 * Check that the amount of data in the buffers
102 	 * is as at least much as the IP header would have us expect.
103 	 * Trim mbufs if longer than we expect.
104 	 * Drop packet if shorter than we expect.
105 	 */
106 	i = -ip->ip_len;
107 	m0 = m;
108 	for (;;) {
109 		i += m->m_len;
110 		if (m->m_next == 0)
111 			break;
112 		m = m->m_next;
113 	}
114 	if (i != 0) {
115 		if (i < 0) {
116 			ipstat.ips_tooshort++;
117 			goto bad;
118 		}
119 		if (i <= m->m_len)
120 			m->m_len -= i;
121 		else
122 			m_adj(m0, -i);
123 	}
124 	m = m0;
125 
126 	/*
127 	 * Process options and, if not destined for us,
128 	 * ship it on.  ip_dooptions returns 1 when an
129 	 * error was detected (causing an icmp message
130 	 * to be sent).
131 	 */
132 	if (hlen > sizeof (struct ip) && ip_dooptions(ip))
133 		goto next;
134 
135 	/*
136 	 * Fast check on the first internet
137 	 * interface in the list.
138 	 */
139 	if (ifinet) {
140 		struct sockaddr_in *sin;
141 
142 		sin = (struct sockaddr_in *)&ifinet->if_addr;
143 		if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
144 			goto ours;
145 		sin = (struct sockaddr_in *)&ifinet->if_broadaddr;
146 		if ((ifinet->if_flags & IFF_BROADCAST) &&
147 		    sin->sin_addr.s_addr == ip->ip_dst.s_addr)
148 			goto ours;
149 	}
150 	ipaddr.sin_addr = ip->ip_dst;
151 	if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
152 		ip_forward(ip);
153 		goto next;
154 	}
155 
156 ours:
157 	/*
158 	 * Look for queue of fragments
159 	 * of this datagram.
160 	 */
161 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
162 		if (ip->ip_id == fp->ipq_id &&
163 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
164 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
165 		    ip->ip_p == fp->ipq_p)
166 			goto found;
167 	fp = 0;
168 found:
169 
170 	/*
171 	 * Adjust ip_len to not reflect header,
172 	 * set ip_mff if more fragments are expected,
173 	 * convert offset of this to bytes.
174 	 */
175 	ip->ip_len -= hlen;
176 	((struct ipasfrag *)ip)->ipf_mff = 0;
177 	if (ip->ip_off & IP_MF)
178 		((struct ipasfrag *)ip)->ipf_mff = 1;
179 	ip->ip_off <<= 3;
180 
181 	/*
182 	 * If datagram marked as having more fragments
183 	 * or if this is not the first fragment,
184 	 * attempt reassembly; if it succeeds, proceed.
185 	 */
186 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
187 		ip = ip_reass((struct ipasfrag *)ip, fp);
188 		if (ip == 0)
189 			goto next;
190 		hlen = ip->ip_hl << 2;
191 		m = dtom(ip);
192 	} else
193 		if (fp)
194 			(void) ip_freef(fp);
195 
196 	/*
197 	 * Switch out to protocol's input routine.
198 	 */
199 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
200 	goto next;
201 bad:
202 	m_freem(m);
203 	goto next;
204 }
205 
206 /*
207  * Take incoming datagram fragment and try to
208  * reassemble it into whole datagram.  If a chain for
209  * reassembly of this datagram already exists, then it
210  * is given as fp; otherwise have to make a chain.
211  */
212 struct ip *
213 ip_reass(ip, fp)
214 	register struct ipasfrag *ip;
215 	register struct ipq *fp;
216 {
217 	register struct mbuf *m = dtom(ip);
218 	register struct ipasfrag *q;
219 	struct mbuf *t;
220 	int hlen = ip->ip_hl << 2;
221 	int i, next;
222 
223 	/*
224 	 * Presence of header sizes in mbufs
225 	 * would confuse code below.
226 	 */
227 	m->m_off += hlen;
228 	m->m_len -= hlen;
229 
230 	/*
231 	 * If first fragment to arrive, create a reassembly queue.
232 	 */
233 	if (fp == 0) {
234 		if ((t = m_get(M_WAIT)) == NULL)
235 			goto dropfrag;
236 		fp = mtod(t, struct ipq *);
237 		insque(fp, &ipq);
238 		fp->ipq_ttl = IPFRAGTTL;
239 		fp->ipq_p = ip->ip_p;
240 		fp->ipq_id = ip->ip_id;
241 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
242 		fp->ipq_src = ((struct ip *)ip)->ip_src;
243 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
244 		q = (struct ipasfrag *)fp;
245 		goto insert;
246 	}
247 
248 	/*
249 	 * Find a segment which begins after this one does.
250 	 */
251 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
252 		if (q->ip_off > ip->ip_off)
253 			break;
254 
255 	/*
256 	 * If there is a preceding segment, it may provide some of
257 	 * our data already.  If so, drop the data from the incoming
258 	 * segment.  If it provides all of our data, drop us.
259 	 */
260 	if (q->ipf_prev != (struct ipasfrag *)fp) {
261 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
262 		if (i > 0) {
263 			if (i >= ip->ip_len)
264 				goto dropfrag;
265 			m_adj(dtom(ip), i);
266 			ip->ip_off += i;
267 			ip->ip_len -= i;
268 		}
269 	}
270 
271 	/*
272 	 * While we overlap succeeding segments trim them or,
273 	 * if they are completely covered, dequeue them.
274 	 */
275 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
276 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
277 		if (i < q->ip_len) {
278 			q->ip_len -= i;
279 			q->ip_off += i;
280 			m_adj(dtom(q), i);
281 			break;
282 		}
283 		q = q->ipf_next;
284 		m_freem(dtom(q->ipf_prev));
285 		ip_deq(q->ipf_prev);
286 	}
287 
288 insert:
289 	/*
290 	 * Stick new segment in its place;
291 	 * check for complete reassembly.
292 	 */
293 	ip_enq(ip, q->ipf_prev);
294 	next = 0;
295 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
296 		if (q->ip_off != next)
297 			return (0);
298 		next += q->ip_len;
299 	}
300 	if (q->ipf_prev->ipf_mff)
301 		return (0);
302 
303 	/*
304 	 * Reassembly is complete; concatenate fragments.
305 	 */
306 	q = fp->ipq_next;
307 	m = dtom(q);
308 	t = m->m_next;
309 	m->m_next = 0;
310 	m_cat(m, t);
311 	q = q->ipf_next;
312 	while (q != (struct ipasfrag *)fp) {
313 		t = dtom(q);
314 		q = q->ipf_next;
315 		m_cat(m, t);
316 	}
317 
318 	/*
319 	 * Create header for new ip packet by
320 	 * modifying header of first packet;
321 	 * dequeue and discard fragment reassembly header.
322 	 * Make header visible.
323 	 */
324 	ip = fp->ipq_next;
325 	ip->ip_len = next;
326 	((struct ip *)ip)->ip_src = fp->ipq_src;
327 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
328 	remque(fp);
329 	(void) m_free(dtom(fp));
330 	m = dtom(ip);
331 	m->m_len += sizeof (struct ipasfrag);
332 	m->m_off -= sizeof (struct ipasfrag);
333 	return ((struct ip *)ip);
334 
335 dropfrag:
336 	m_freem(m);
337 	return (0);
338 }
339 
340 /*
341  * Free a fragment reassembly header and all
342  * associated datagrams.
343  */
344 struct ipq *
345 ip_freef(fp)
346 	struct ipq *fp;
347 {
348 	register struct ipasfrag *q;
349 	struct mbuf *m;
350 
351 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
352 		m_freem(dtom(q));
353 	m = dtom(fp);
354 	fp = fp->next;
355 	remque(fp->prev);
356 	(void) m_free(m);
357 	return (fp);
358 }
359 
360 /*
361  * Put an ip fragment on a reassembly chain.
362  * Like insque, but pointers in middle of structure.
363  */
364 ip_enq(p, prev)
365 	register struct ipasfrag *p, *prev;
366 {
367 
368 	p->ipf_prev = prev;
369 	p->ipf_next = prev->ipf_next;
370 	prev->ipf_next->ipf_prev = p;
371 	prev->ipf_next = p;
372 }
373 
374 /*
375  * To ip_enq as remque is to insque.
376  */
377 ip_deq(p)
378 	register struct ipasfrag *p;
379 {
380 
381 	p->ipf_prev->ipf_next = p->ipf_next;
382 	p->ipf_next->ipf_prev = p->ipf_prev;
383 }
384 
385 /*
386  * IP timer processing;
387  * if a timer expires on a reassembly
388  * queue, discard it.
389  */
390 ip_slowtimo()
391 {
392 	register struct ipq *fp;
393 	int s = splnet();
394 
395 	fp = ipq.next;
396 	if (fp == 0) {
397 		splx(s);
398 		return;
399 	}
400 	while (fp != &ipq)
401 		if (--fp->ipq_ttl == 0)
402 			fp = ip_freef(fp);
403 		else
404 			fp = fp->next;
405 	splx(s);
406 }
407 
408 /*
409  * Drain off all datagram fragments.
410  */
411 ip_drain()
412 {
413 
414 	while (ipq.next != &ipq)
415 		(void) ip_freef(ipq.next);
416 }
417 
418 /*
419  * Do option processing on a datagram,
420  * possibly discarding it if bad options
421  * are encountered.
422  */
423 ip_dooptions(ip)
424 	struct ip *ip;
425 {
426 	register u_char *cp;
427 	int opt, optlen, cnt, code, type;
428 	struct in_addr *sin;
429 	register struct ip_timestamp *ipt;
430 	register struct ifnet *ifp;
431 	struct in_addr t;
432 
433 	cp = (u_char *)(ip + 1);
434 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
435 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
436 		opt = cp[0];
437 		if (opt == IPOPT_EOL)
438 			break;
439 		if (opt == IPOPT_NOP)
440 			optlen = 1;
441 		else
442 			optlen = cp[1];
443 		switch (opt) {
444 
445 		default:
446 			break;
447 
448 		/*
449 		 * Source routing with record.
450 		 * Find interface with current destination address.
451 		 * If none on this machine then drop if strictly routed,
452 		 * or do nothing if loosely routed.
453 		 * Record interface address and bring up next address
454 		 * component.  If strictly routed make sure next
455 		 * address on directly accessible net.
456 		 */
457 		case IPOPT_LSRR:
458 		case IPOPT_SSRR:
459 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
460 				break;
461 			sin = (struct in_addr *)(cp + cp[2]);
462 			ipaddr.sin_addr = *sin;
463 			ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
464 			type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL;
465 			if (ifp == 0) {
466 				if (opt == IPOPT_SSRR)
467 					goto bad;
468 				break;
469 			}
470 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
471 			cp[2] += 4;
472 			if (cp[2] > optlen - (sizeof (long) - 1))
473 				break;
474 			ip->ip_dst = sin[1];
475 			if (opt == IPOPT_SSRR &&
476 			    if_ifonnetof(in_netof(ip->ip_dst)) == 0)
477 				goto bad;
478 			break;
479 
480 		case IPOPT_TS:
481 			code = cp - (u_char *)ip;
482 			type = ICMP_PARAMPROB;
483 			ipt = (struct ip_timestamp *)cp;
484 			if (ipt->ipt_len < 5)
485 				goto bad;
486 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
487 				if (++ipt->ipt_oflw == 0)
488 					goto bad;
489 				break;
490 			}
491 			sin = (struct in_addr *)(cp+cp[2]);
492 			switch (ipt->ipt_flg) {
493 
494 			case IPOPT_TS_TSONLY:
495 				break;
496 
497 			case IPOPT_TS_TSANDADDR:
498 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
499 					goto bad;
500 				if (ifinet == 0)
501 					goto bad;	/* ??? */
502 				*sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
503 				break;
504 
505 			case IPOPT_TS_PRESPEC:
506 				ipaddr.sin_addr = *sin;
507 				if (!if_ifwithaddr((struct sockaddr *)&ipaddr))
508 					continue;
509 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
510 					goto bad;
511 				ipt->ipt_ptr += 4;
512 				break;
513 
514 			default:
515 				goto bad;
516 			}
517 			*(n_time *)sin = iptime();
518 			ipt->ipt_ptr += 4;
519 		}
520 	}
521 	return (0);
522 bad:
523 	icmp_error(ip, type, code);
524 	return (1);
525 }
526 
527 /*
528  * Strip out IP options, at higher
529  * level protocol in the kernel.
530  * Second argument is buffer to which options
531  * will be moved, and return value is their length.
532  */
533 ip_stripoptions(ip, mopt)
534 	struct ip *ip;
535 	struct mbuf *mopt;
536 {
537 	register int i;
538 	register struct mbuf *m;
539 	int olen;
540 
541 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
542 	m = dtom(ip);
543 	ip++;
544 	if (mopt) {
545 		mopt->m_len = olen;
546 		mopt->m_off = MMINOFF;
547 		bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
548 	}
549 	i = m->m_len - (sizeof (struct ip) + olen);
550 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
551 	m->m_len -= olen;
552 }
553 
554 u_char inetctlerrmap[] = {
555 	ECONNABORTED,	ECONNABORTED,	0,		0,
556 	0,		0,
557 	EHOSTDOWN,	EHOSTUNREACH,	ENETUNREACH,	EHOSTUNREACH,
558 	ECONNREFUSED,	ECONNREFUSED,	EMSGSIZE,	0,
559 	0,		0,		0,		0
560 };
561 
562 ip_ctlinput(cmd, arg)
563 	int cmd;
564 	caddr_t arg;
565 {
566 	struct in_addr *sin;
567 	int tcp_abort(), udp_abort();
568 	extern struct inpcb tcb, udb;
569 
570 	if (cmd < 0 || cmd > PRC_NCMDS)
571 		return;
572 	if (inetctlerrmap[cmd] == 0)
573 		return;		/* XXX */
574 	if (cmd == PRC_IFDOWN)
575 		sin = &((struct sockaddr_in *)arg)->sin_addr;
576 	else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH)
577 		sin = (struct in_addr *)arg;
578 	else
579 		sin = &((struct icmp *)arg)->icmp_ip.ip_dst;
580 	in_pcbnotify(&tcb, sin, inetctlerrmap[cmd], tcp_abort);
581 	in_pcbnotify(&udb, sin, inetctlerrmap[cmd], udp_abort);
582 }
583 
584 int	ipprintfs = 0;
585 int	ipforwarding = 1;
586 /*
587  * Forward a packet.  If some error occurs return the sender
588  * and icmp packet.  Note we can't always generate a meaningful
589  * icmp message because icmp doesn't have a large enough repetoire
590  * of codes and types.
591  */
592 ip_forward(ip)
593 	register struct ip *ip;
594 {
595 	register int error, type, code;
596 	struct mbuf *mopt, *mcopy;
597 
598 	if (ipprintfs)
599 		printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
600 			ip->ip_dst, ip->ip_ttl);
601 	if (ipforwarding == 0) {
602 		/* can't tell difference between net and host */
603 		type = ICMP_UNREACH, code = ICMP_UNREACH_NET;
604 		goto sendicmp;
605 	}
606 	if (ip->ip_ttl < IPTTLDEC) {
607 		type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS;
608 		goto sendicmp;
609 	}
610 	ip->ip_ttl -= IPTTLDEC;
611 	mopt = m_get(M_DONTWAIT);
612 	if (mopt == 0) {
613 		m_freem(dtom(ip));
614 		return;
615 	}
616 
617 	/*
618 	 * Save at most 64 bytes of the packet in case
619 	 * we need to generate an ICMP message to the src.
620 	 */
621 	mcopy = m_copy(dtom(ip), 0, imin(ip->ip_len, 64));
622 	ip_stripoptions(ip, mopt);
623 
624 	/* last 0 here means no directed broadcast */
625 	if ((error = ip_output(dtom(ip), mopt, 0, 0)) == 0) {
626 		if (mcopy)
627 			m_freem(mcopy);
628 		return;
629 	}
630 	ip = mtod(mcopy, struct ip *);
631 	type = ICMP_UNREACH, code = 0;		/* need ``undefined'' */
632 	switch (error) {
633 
634 	case ENETUNREACH:
635 	case ENETDOWN:
636 		code = ICMP_UNREACH_NET;
637 		break;
638 
639 	case EMSGSIZE:
640 		code = ICMP_UNREACH_NEEDFRAG;
641 		break;
642 
643 	case EPERM:
644 		code = ICMP_UNREACH_PORT;
645 		break;
646 
647 	case ENOBUFS:
648 		type = ICMP_SOURCEQUENCH;
649 		break;
650 
651 	case EHOSTDOWN:
652 	case EHOSTUNREACH:
653 		code = ICMP_UNREACH_HOST;
654 		break;
655 	}
656 sendicmp:
657 	icmp_error(ip, type, code);
658 }
659