xref: /csrg-svn/sys/netinet/ip_input.c (revision 6367)
1 /*	ip_input.c	1.37	82/03/30	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/clock.h"
6 #include "../h/mbuf.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include "../net/in.h"
10 #include "../net/in_systm.h"
11 #include "../net/if.h"
12 #include "../net/ip.h"			/* belongs before in.h */
13 #include "../net/ip_var.h"
14 #include "../net/ip_icmp.h"
15 #include "../net/tcp.h"
16 
17 u_char	ip_protox[IPPROTO_MAX];
18 int	ipqmaxlen = IFQ_MAXLEN;
19 struct	ifnet *ifinet;			/* first inet interface */
20 
21 /*
22  * IP initialization: fill in IP protocol switch table.
23  * All protocols not implemented in kernel go to raw IP protocol handler.
24  */
25 ip_init()
26 {
27 	register struct protosw *pr;
28 	register int i;
29 
30 COUNT(IP_INIT);
31 	pr = pffindproto(PF_INET, IPPROTO_RAW);
32 	if (pr == 0)
33 		panic("ip_init");
34 	for (i = 0; i < IPPROTO_MAX; i++)
35 		ip_protox[i] = pr - protosw;
36 	for (pr = protosw; pr <= protoswLAST; pr++)
37 		if (pr->pr_family == PF_INET &&
38 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
39 			ip_protox[pr->pr_protocol] = pr - protosw;
40 	ipq.next = ipq.prev = &ipq;
41 	ip_id = time & 0xffff;
42 	ipintrq.ifq_maxlen = ipqmaxlen;
43 	ifinet = if_ifwithaf(AF_INET);
44 }
45 
46 u_char	ipcksum = 1;
47 struct	ip *ip_reass();
48 int	ipforwarding = 0;
49 struct	sockaddr_in ipaddr = { AF_INET };
50 
51 /*
52  * Ip input routine.  Checksum and byte swap header.  If fragmented
53  * try to reassamble.  If complete and fragment queue exists, discard.
54  * Process options.  Pass to next level.
55  */
56 ipintr()
57 {
58 	register struct ip *ip;
59 	register struct mbuf *m;
60 	struct mbuf *m0, *mopt;
61 	register int i;
62 	register struct ipq *fp;
63 	int hlen, s;
64 
65 COUNT(IPINTR);
66 next:
67 	/*
68 	 * Get next datagram off input queue and get IP header
69 	 * in first mbuf.
70 	 */
71 	s = splimp();
72 	IF_DEQUEUE(&ipintrq, m);
73 	splx(s);
74 	if (m == 0)
75 		return;
76 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
77 	    (m = m_pullup(m, sizeof (struct ip))) == 0)
78 		return;
79 	ip = mtod(m, struct ip *);
80 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
81 		if ((m = m_pullup(m, hlen)) == 0)
82 			return;
83 		ip = mtod(m, struct ip *);
84 	}
85 	if (ipcksum)
86 		if (ip->ip_sum = in_cksum(m, hlen)) {
87 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
88 			ipstat.ips_badsum++;
89 			goto bad;
90 		}
91 
92 #if vax
93 	/*
94 	 * Convert fields to host representation.
95 	 */
96 	ip->ip_len = ntohs((u_short)ip->ip_len);
97 	ip->ip_id = ntohs(ip->ip_id);
98 	ip->ip_off = ntohs((u_short)ip->ip_off);
99 #endif
100 
101 	/*
102 	 * Check that the amount of data in the buffers
103 	 * is as at least much as the IP header would have us expect.
104 	 * Trim mbufs if longer than we expect.
105 	 * Drop packet if shorter than we expect.
106 	 */
107 	i = 0;
108 	m0 = m;
109 	for (; m != NULL; m = m->m_next) {
110 		if (m->m_free) panic("ipinput already free");
111 		i += m->m_len;
112 	}
113 	m = m0;
114 	if (i != ip->ip_len) {
115 		if (i < ip->ip_len) {
116 			ipstat.ips_tooshort++;
117 			goto bad;
118 		}
119 		m_adj(m, ip->ip_len - i);
120 	}
121 
122 	/*
123 	 * Process options and, if not destined for us,
124 	 * ship it on.
125 	 */
126 	if (hlen > sizeof (struct ip))
127 		ip_dooptions(ip);
128 
129 	/*
130 	 * Fast check on the first internet
131 	 * interface in the list.
132 	 */
133 	if (ifinet) {
134 		struct sockaddr_in *sin;
135 
136 		sin = (struct sockaddr_in *)&ifinet->if_addr;
137 		if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
138 			goto ours;
139 		if ((ifinet->if_flags & IFF_BROADCAST) &&
140 		    sin->sin_addr.s_addr == ip->ip_dst.s_addr)
141 			goto ours;
142 	}
143 	ipaddr.sin_addr = ip->ip_dst;
144 	if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
145 printf("forward: src %x dst %x ttl %x\n", ip->ip_src, ip->ip_dst, ip->ip_ttl);
146 		if (ipforwarding == 0)
147 			goto bad;
148 		if (ip->ip_ttl < IPTTLDEC) {
149 			icmp_error(ip, ICMP_TIMXCEED, 0);
150 			goto next;
151 		}
152 		ip->ip_ttl -= IPTTLDEC;
153 		mopt = m_get(M_DONTWAIT);
154 		if (mopt == 0)
155 			goto bad;
156 		ip_stripoptions(ip, mopt);
157 
158 		/* last 0 here means no directed broadcast */
159 		(void) ip_output(m0, mopt, 0, 0);
160 		goto next;
161 	}
162 
163 ours:
164 	/*
165 	 * Look for queue of fragments
166 	 * of this datagram.
167 	 */
168 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
169 		if (ip->ip_id == fp->ipq_id &&
170 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
171 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
172 		    ip->ip_p == fp->ipq_p)
173 			goto found;
174 	fp = 0;
175 found:
176 
177 	/*
178 	 * Adjust ip_len to not reflect header,
179 	 * set ip_mff if more fragments are expected,
180 	 * convert offset of this to bytes.
181 	 */
182 	ip->ip_len -= hlen;
183 	((struct ipasfrag *)ip)->ipf_mff = 0;
184 	if (ip->ip_off & IP_MF)
185 		((struct ipasfrag *)ip)->ipf_mff = 1;
186 	ip->ip_off <<= 3;
187 
188 	/*
189 	 * If datagram marked as having more fragments
190 	 * or if this is not the first fragment,
191 	 * attempt reassembly; if it succeeds, proceed.
192 	 */
193 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
194 		ip = ip_reass((struct ipasfrag *)ip, fp);
195 		if (ip == 0)
196 			goto next;
197 		hlen = ip->ip_hl << 2;
198 		m = dtom(ip);
199 	} else
200 		if (fp)
201 			(void) ip_freef(fp);
202 
203 	/*
204 	 * Switch out to protocol's input routine.
205 	 */
206 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
207 	goto next;
208 bad:
209 	m_freem(m);
210 	goto next;
211 }
212 
213 /*
214  * Take incoming datagram fragment and try to
215  * reassemble it into whole datagram.  If a chain for
216  * reassembly of this datagram already exists, then it
217  * is given as fp; otherwise have to make a chain.
218  */
219 struct ip *
220 ip_reass(ip, fp)
221 	register struct ipasfrag *ip;
222 	register struct ipq *fp;
223 {
224 	register struct mbuf *m = dtom(ip);
225 	register struct ipasfrag *q;
226 	struct mbuf *t;
227 	int hlen = ip->ip_hl << 2;
228 	int i, next;
229 COUNT(IP_REASS);
230 
231 	/*
232 	 * Presence of header sizes in mbufs
233 	 * would confuse code below.
234 	 */
235 	m->m_off += hlen;
236 	m->m_len -= hlen;
237 
238 	/*
239 	 * If first fragment to arrive, create a reassembly queue.
240 	 */
241 	if (fp == 0) {
242 		if ((t = m_get(M_WAIT)) == NULL)
243 			goto dropfrag;
244 		t->m_off = MMINOFF;
245 		fp = mtod(t, struct ipq *);
246 		insque(fp, &ipq);
247 		fp->ipq_ttl = IPFRAGTTL;
248 		fp->ipq_p = ip->ip_p;
249 		fp->ipq_id = ip->ip_id;
250 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
251 		fp->ipq_src = ((struct ip *)ip)->ip_src;
252 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
253 		q = (struct ipasfrag *)fp;
254 		goto insert;
255 	}
256 
257 	/*
258 	 * Find a segment which begins after this one does.
259 	 */
260 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
261 		if (q->ip_off > ip->ip_off)
262 			break;
263 
264 	/*
265 	 * If there is a preceding segment, it may provide some of
266 	 * our data already.  If so, drop the data from the incoming
267 	 * segment.  If it provides all of our data, drop us.
268 	 */
269 	if (q->ipf_prev != (struct ipasfrag *)fp) {
270 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
271 		if (i > 0) {
272 			if (i >= ip->ip_len)
273 				goto dropfrag;
274 			m_adj(dtom(ip), i);
275 			ip->ip_off += i;
276 			ip->ip_len -= i;
277 		}
278 	}
279 
280 	/*
281 	 * While we overlap succeeding segments trim them or,
282 	 * if they are completely covered, dequeue them.
283 	 */
284 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
285 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
286 		if (i < q->ip_len) {
287 			q->ip_len -= i;
288 			q->ip_off += i;
289 			m_adj(dtom(q), i);
290 			break;
291 		}
292 		q = q->ipf_next;
293 		m_freem(dtom(q->ipf_prev));
294 		ip_deq(q->ipf_prev);
295 	}
296 
297 insert:
298 	/*
299 	 * Stick new segment in its place;
300 	 * check for complete reassembly.
301 	 */
302 	ip_enq(ip, q->ipf_prev);
303 	next = 0;
304 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
305 		if (q->ip_off != next)
306 			return (0);
307 		next += q->ip_len;
308 	}
309 	if (q->ipf_prev->ipf_mff)
310 		return (0);
311 
312 	/*
313 	 * Reassembly is complete; concatenate fragments.
314 	 */
315 	q = fp->ipq_next;
316 	m = dtom(q);
317 	t = m->m_next;
318 	m->m_next = 0;
319 	m_cat(m, t);
320 	q = q->ipf_next;
321 	while (q != (struct ipasfrag *)fp) {
322 		t = dtom(q);
323 		q = q->ipf_next;
324 		m_cat(m, t);
325 	}
326 
327 	/*
328 	 * Create header for new ip packet by
329 	 * modifying header of first packet;
330 	 * dequeue and discard fragment reassembly header.
331 	 * Make header visible.
332 	 */
333 	ip = fp->ipq_next;
334 	ip->ip_len = next;
335 	((struct ip *)ip)->ip_src = fp->ipq_src;
336 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
337 	remque(fp);
338 	(void) m_free(dtom(fp));
339 	m = dtom(ip);
340 	m->m_len += sizeof (struct ipasfrag);
341 	m->m_off -= sizeof (struct ipasfrag);
342 	return ((struct ip *)ip);
343 
344 dropfrag:
345 	m_freem(m);
346 	return (0);
347 }
348 
349 /*
350  * Free a fragment reassembly header and all
351  * associated datagrams.
352  */
353 struct ipq *
354 ip_freef(fp)
355 	struct ipq *fp;
356 {
357 	register struct ipasfrag *q;
358 	struct mbuf *m;
359 COUNT(IP_FREEF);
360 
361 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
362 		m_freem(dtom(q));
363 	m = dtom(fp);
364 	fp = fp->next;
365 	remque(fp->prev);
366 	(void) m_free(m);
367 	return (fp);
368 }
369 
370 /*
371  * Put an ip fragment on a reassembly chain.
372  * Like insque, but pointers in middle of structure.
373  */
374 ip_enq(p, prev)
375 	register struct ipasfrag *p, *prev;
376 {
377 
378 COUNT(IP_ENQ);
379 	p->ipf_prev = prev;
380 	p->ipf_next = prev->ipf_next;
381 	prev->ipf_next->ipf_prev = p;
382 	prev->ipf_next = p;
383 }
384 
385 /*
386  * To ip_enq as remque is to insque.
387  */
388 ip_deq(p)
389 	register struct ipasfrag *p;
390 {
391 
392 COUNT(IP_DEQ);
393 	p->ipf_prev->ipf_next = p->ipf_next;
394 	p->ipf_next->ipf_prev = p->ipf_prev;
395 }
396 
397 /*
398  * IP timer processing;
399  * if a timer expires on a reassembly
400  * queue, discard it.
401  */
402 ip_slowtimo()
403 {
404 	register struct ipq *fp;
405 	int s = splnet();
406 
407 COUNT(IP_SLOWTIMO);
408 	fp = ipq.next;
409 	if (fp == 0) {
410 		splx(s);
411 		return;
412 	}
413 	while (fp != &ipq)
414 		if (--fp->ipq_ttl == 0)
415 			fp = ip_freef(fp);
416 		else
417 			fp = fp->next;
418 	splx(s);
419 }
420 
421 /*
422  * Drain off all datagram fragments.
423  */
424 ip_drain()
425 {
426 
427 COUNT(IP_DRAIN);
428 	while (ipq.next != &ipq)
429 		(void) ip_freef(ipq.next);
430 }
431 
432 /*
433  * Do option processing on a datagram,
434  * possibly discarding it if bad options
435  * are encountered.
436  */
437 ip_dooptions(ip)
438 	struct ip *ip;
439 {
440 	register u_char *cp;
441 	int opt, optlen, cnt;
442 	struct in_addr *sin;
443 	register struct ip_timestamp *ipt;
444 	register struct ifnet *ifp;
445 	struct in_addr t;
446 
447 COUNT(IP_DOOPTIONS);
448 	cp = (u_char *)(ip + 1);
449 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
450 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
451 		opt = cp[0];
452 		if (opt == IPOPT_EOL)
453 			break;
454 		if (opt == IPOPT_NOP)
455 			optlen = 1;
456 		else
457 			optlen = cp[1];
458 		switch (opt) {
459 
460 		default:
461 			break;
462 
463 		/*
464 		 * Source routing with record.
465 		 * Find interface with current destination address.
466 		 * If none on this machine then drop if strictly routed,
467 		 * or do nothing if loosely routed.
468 		 * Record interface address and bring up next address
469 		 * component.  If strictly routed make sure next
470 		 * address on directly accessible net.
471 		 */
472 		case IPOPT_LSRR:
473 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
474 				break;
475 			sin = (struct in_addr *)(cp + cp[2]);
476 			ipaddr.sin_addr = *sin;
477 			ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
478 			if (ifp == 0) {
479 				if (opt == IPOPT_SSRR)
480 					goto bad;
481 				break;
482 			}
483 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
484 			cp[2] += 4;
485 			if (cp[2] > optlen - (sizeof (long) - 1))
486 				break;
487 			ip->ip_dst = sin[1];
488 			if (opt == IPOPT_SSRR &&
489 			    if_ifonnetof(ip->ip_dst.s_net) == 0)
490 				goto bad;
491 			break;
492 
493 		case IPOPT_TS:
494 			ipt = (struct ip_timestamp *)cp;
495 			if (ipt->ipt_len < 5)
496 				goto bad;
497 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
498 				if (++ipt->ipt_oflw == 0)
499 					goto bad;
500 				break;
501 			}
502 			sin = (struct in_addr *)(cp+cp[2]);
503 			switch (ipt->ipt_flg) {
504 
505 			case IPOPT_TS_TSONLY:
506 				break;
507 
508 			case IPOPT_TS_TSANDADDR:
509 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
510 					goto bad;
511 				if (ifinet == 0)
512 					goto bad;	/* ??? */
513 				*sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
514 				break;
515 
516 			case IPOPT_TS_PRESPEC:
517 				ipaddr.sin_addr = *sin;
518 				if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
519 					continue;
520 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
521 					goto bad;
522 				ipt->ipt_ptr += 4;
523 				break;
524 
525 			default:
526 				goto bad;
527 			}
528 			*(n_time *)sin = iptime();
529 			ipt->ipt_ptr += 4;
530 		}
531 	}
532 	return;
533 bad:
534 	/* SHOULD FORCE ICMP MESSAGE */
535 	return;
536 }
537 
538 /*
539  * Strip out IP options, at higher
540  * level protocol in the kernel.
541  * Second argument is buffer to which options
542  * will be moved, and return value is their length.
543  */
544 ip_stripoptions(ip, mopt)
545 	struct ip *ip;
546 	struct mbuf *mopt;
547 {
548 	register int i;
549 	register struct mbuf *m;
550 	int olen;
551 COUNT(IP_STRIPOPTIONS);
552 
553 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
554 	m = dtom(ip);
555 	ip++;
556 	if (mopt) {
557 		mopt->m_len = olen;
558 		mopt->m_off = MMINOFF;
559 		bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
560 	}
561 	i = m->m_len - (sizeof (struct ip) + olen);
562 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
563 	m->m_len -= olen;
564 }
565