xref: /csrg-svn/sys/netinet/ip_input.c (revision 5243)
1 /* ip_input.c 1.26 81/12/12 */
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/clock.h"
6 #include "../h/mbuf.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include "../net/in.h"
10 #include "../net/in_systm.h"
11 #include "../net/if.h"
12 #include "../net/ip.h"			/* belongs before in.h */
13 #include "../net/ip_var.h"
14 #include "../net/ip_icmp.h"
15 #include "../net/tcp.h"
16 
17 u_char	ip_protox[IPPROTO_MAX];
18 
19 /*
20  * IP initialization: fill in IP protocol switch table.
21  * All protocols not implemented in kernel go to raw IP protocol handler.
22  */
23 ip_init()
24 {
25 	register struct protosw *pr;
26 	register int i;
27 
28 COUNT(IP_INIT);
29 	pr = pffindproto(PF_INET, IPPROTO_RAW);
30 	if (pr == 0)
31 		panic("ip_init");
32 	for (i = 0; i < IPPROTO_MAX; i++)
33 		ip_protox[i] = pr - protosw;
34 	for (pr = protosw; pr <= protoswLAST; pr++)
35 		if (pr->pr_family == PF_INET &&
36 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
37 			ip_protox[pr->pr_protocol] = pr - protosw;
38 	ipq.next = ipq.prev = &ipq;
39 	ip_id = time & 0xffff;
40 }
41 
42 u_char	ipcksum = 1;
43 struct	ip *ip_reass();
44 
45 /*
46  * Ip input routine.  Checksum and byte swap header.  If fragmented
47  * try to reassamble.  If complete and fragment queue exists, discard.
48  * Process options.  Pass to next level.
49  */
50 ipintr()
51 {
52 	register struct ip *ip;
53 	register struct mbuf *m;
54 	struct mbuf *m0, *mopt;
55 	register int i;
56 	register struct ipq *fp;
57 	int hlen, s;
58 
59 COUNT(IPINTR);
60 next:
61 	/*
62 	 * Get next datagram off input queue and get IP header
63 	 * in first mbuf.
64 	 */
65 	s = splimp();
66 	IF_DEQUEUE(&ipintrq, m);
67 	splx(s);
68 	if (m == 0)
69 		return;
70 	if (m->m_len < sizeof (struct ip) &&
71 	    m_pullup(m, sizeof (struct ip)) == 0)
72 		goto bad;
73 	ip = mtod(m, struct ip *);
74 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
75 		if (m_pullup(m, hlen) == 0)
76 			goto bad;
77 		ip = mtod(m, struct ip *);
78 	}
79 	if (ipcksum)
80 		if (ip->ip_sum = in_cksum(m, hlen)) {
81 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
82 			ipstat.ips_badsum++;
83 			goto bad;
84 		}
85 
86 #if vax
87 	/*
88 	 * Convert fields to host representation.
89 	 */
90 	ip->ip_len = ntohs((u_short)ip->ip_len);
91 	ip->ip_id = ntohs(ip->ip_id);
92 	ip->ip_off = ntohs((u_short)ip->ip_off);
93 #endif
94 
95 	/*
96 	 * Check that the amount of data in the buffers
97 	 * is as at least much as the IP header would have us expect.
98 	 * Trim mbufs if longer than we expect.
99 	 * Drop packet if shorter than we expect.
100 	 */
101 	i = 0;
102 	m0 = m;
103 	for (; m != NULL; m = m->m_next)
104 		i += m->m_len;
105 	m = m0;
106 	if (i != ip->ip_len) {
107 		if (i < ip->ip_len) {
108 			ipstat.ips_tooshort++;
109 			goto bad;
110 		}
111 		m_adj(m, ip->ip_len - i);
112 	}
113 
114 	/*
115 	 * Process options and, if not destined for us,
116 	 * ship it on.
117 	 */
118 	if (hlen > sizeof (struct ip))
119 		ip_dooptions(ip);
120 	if (ifnet && ip->ip_dst.s_addr != ifnet->if_addr.s_addr &&
121 	    if_ifwithaddr(ip->ip_dst) == 0) {
122 		if (--ip->ip_ttl == 0) {
123 			icmp_error(ip, ICMP_TIMXCEED, 0);
124 			goto next;
125 		}
126 		mopt = m_get(M_DONTWAIT);
127 		if (mopt == 0)
128 			goto bad;
129 		ip_stripoptions(ip, mopt);
130 		(void) ip_output(m0, mopt);
131 		goto next;
132 	}
133 
134 	/*
135 	 * Look for queue of fragments
136 	 * of this datagram.
137 	 */
138 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
139 		if (ip->ip_id == fp->ipq_id &&
140 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
141 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
142 		    ip->ip_p == fp->ipq_p)
143 			goto found;
144 	fp = 0;
145 found:
146 
147 	/*
148 	 * Adjust ip_len to not reflect header,
149 	 * set ip_mff if more fragments are expected,
150 	 * convert offset of this to bytes.
151 	 */
152 	ip->ip_len -= hlen;
153 	((struct ipasfrag *)ip)->ipf_mff = 0;
154 	if (ip->ip_off & IP_MF)
155 		((struct ipasfrag *)ip)->ipf_mff = 1;
156 	ip->ip_off <<= 3;
157 
158 	/*
159 	 * If datagram marked as having more fragments
160 	 * or if this is not the first fragment,
161 	 * attempt reassembly; if it succeeds, proceed.
162 	 */
163 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
164 		ip = ip_reass((struct ipasfrag *)ip, fp);
165 		if (ip == 0)
166 			goto next;
167 		hlen = ip->ip_hl << 2;
168 		m = dtom(ip);
169 	} else
170 		if (fp)
171 			(void) ip_freef(fp);
172 
173 	/*
174 	 * Switch out to protocol's input routine.
175 	 */
176 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
177 	goto next;
178 bad:
179 printf("ip_input bad!\n");
180 	m_freem(m);
181 	goto next;
182 }
183 
184 /*
185  * Take incoming datagram fragment and try to
186  * reassemble it into whole datagram.  If a chain for
187  * reassembly of this datagram already exists, then it
188  * is given as fp; otherwise have to make a chain.
189  */
190 struct ip *
191 ip_reass(ip, fp)
192 	register struct ipasfrag *ip;
193 	register struct ipq *fp;
194 {
195 	register struct mbuf *m = dtom(ip);
196 	register struct ipasfrag *q;
197 	struct mbuf *t;
198 	int hlen = ip->ip_hl << 2;
199 	int i, next;
200 COUNT(IP_REASS);
201 
202 	/*
203 	 * Presence of header sizes in mbufs
204 	 * would confuse code below.
205 	 */
206 	m->m_off += hlen;
207 	m->m_len -= hlen;
208 
209 	/*
210 	 * If first fragment to arrive, create a reassembly queue.
211 	 */
212 	if (fp == 0) {
213 		if ((t = m_get(1)) == NULL)
214 			goto dropfrag;
215 		t->m_off = MMINOFF;
216 		fp = mtod(t, struct ipq *);
217 		insque(fp, &ipq);
218 		fp->ipq_ttl = IPFRAGTTL;
219 		fp->ipq_p = ip->ip_p;
220 		fp->ipq_id = ip->ip_id;
221 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
222 		fp->ipq_src = ((struct ip *)ip)->ip_src;
223 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
224 		q = (struct ipasfrag *)fp;
225 		goto insert;
226 	}
227 
228 	/*
229 	 * Find a segment which begins after this one does.
230 	 */
231 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
232 		if (q->ip_off > ip->ip_off)
233 			break;
234 
235 	/*
236 	 * If there is a preceding segment, it may provide some of
237 	 * our data already.  If so, drop the data from the incoming
238 	 * segment.  If it provides all of our data, drop us.
239 	 */
240 	if (q->ipf_prev != (struct ipasfrag *)fp) {
241 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
242 		if (i > 0) {
243 			if (i >= ip->ip_len)
244 				goto dropfrag;
245 			m_adj(dtom(ip), i);
246 			ip->ip_off += i;
247 			ip->ip_len -= i;
248 		}
249 	}
250 
251 	/*
252 	 * While we overlap succeeding segments trim them or,
253 	 * if they are completely covered, dequeue them.
254 	 */
255 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
256 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
257 		if (i < q->ip_len) {
258 			q->ip_len -= i;
259 			m_adj(dtom(q), i);
260 			break;
261 		}
262 		q = q->ipf_next;
263 		m_freem(dtom(q->ipf_prev));
264 		ip_deq(q->ipf_prev);
265 	}
266 
267 insert:
268 	/*
269 	 * Stick new segment in its place;
270 	 * check for complete reassembly.
271 	 */
272 	ip_enq(ip, q->ipf_prev);
273 	next = 0;
274 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
275 		if (q->ip_off != next)
276 			return (0);
277 		next += q->ip_len;
278 	}
279 	if (q->ipf_prev->ipf_mff)
280 		return (0);
281 
282 	/*
283 	 * Reassembly is complete; concatenate fragments.
284 	 */
285 	q = fp->ipq_next;
286 	m = dtom(q);
287 	t = m->m_next;
288 	m->m_next = 0;
289 	m_cat(m, t);
290 	while ((q = q->ipf_next) != (struct ipasfrag *)fp)
291 		m_cat(m, dtom(q));
292 
293 	/*
294 	 * Create header for new ip packet by
295 	 * modifying header of first packet;
296 	 * dequeue and discard fragment reassembly header.
297 	 * Make header visible.
298 	 */
299 	ip = fp->ipq_next;
300 	ip->ip_len = next;
301 	((struct ip *)ip)->ip_src = fp->ipq_src;
302 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
303 	remque(fp);
304 	(void) m_free(dtom(fp));
305 	m = dtom(ip);
306 	m->m_len += sizeof (struct ipasfrag);
307 	m->m_off -= sizeof (struct ipasfrag);
308 	return ((struct ip *)ip);
309 
310 dropfrag:
311 	m_freem(m);
312 	return (0);
313 }
314 
315 /*
316  * Free a fragment reassembly header and all
317  * associated datagrams.
318  */
319 struct ipq *
320 ip_freef(fp)
321 	struct ipq *fp;
322 {
323 	register struct ipasfrag *q;
324 	struct mbuf *m;
325 COUNT(IP_FREEF);
326 
327 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
328 		m_freem(dtom(q));
329 	m = dtom(fp);
330 	fp = fp->next;
331 	remque(fp->prev);
332 	(void) m_free(m);
333 	return (fp);
334 }
335 
336 /*
337  * Put an ip fragment on a reassembly chain.
338  * Like insque, but pointers in middle of structure.
339  */
340 ip_enq(p, prev)
341 	register struct ipasfrag *p, *prev;
342 {
343 
344 COUNT(IP_ENQ);
345 	p->ipf_prev = prev;
346 	p->ipf_next = prev->ipf_next;
347 	prev->ipf_next->ipf_prev = p;
348 	prev->ipf_next = p;
349 }
350 
351 /*
352  * To ip_enq as remque is to insque.
353  */
354 ip_deq(p)
355 	register struct ipasfrag *p;
356 {
357 
358 COUNT(IP_DEQ);
359 	p->ipf_prev->ipf_next = p->ipf_next;
360 	p->ipf_next->ipf_prev = p->ipf_prev;
361 }
362 
363 /*
364  * IP timer processing;
365  * if a timer expires on a reassembly
366  * queue, discard it.
367  */
368 ip_slowtimo()
369 {
370 	register struct ipq *fp;
371 	int s = splnet();
372 
373 COUNT(IP_SLOWTIMO);
374 	fp = ipq.next;
375 	if (fp == 0) {
376 		splx(s);
377 		return;
378 	}
379 	while (fp != &ipq)
380 		if (--fp->ipq_ttl == 0)
381 			fp = ip_freef(fp);
382 		else
383 			fp = fp->next;
384 	splx(s);
385 }
386 
387 /*
388  * Drain off all datagram fragments.
389  */
390 ip_drain()
391 {
392 
393 COUNT(IP_DRAIN);
394 	while (ipq.next != &ipq)
395 		(void) ip_freef(ipq.next);
396 }
397 
398 /*
399  * Do option processing on a datagram,
400  * possibly discarding it if bad options
401  * are encountered.
402  */
403 ip_dooptions(ip)
404 	struct ip *ip;
405 {
406 	register u_char *cp;
407 	int opt, optlen, cnt;
408 	struct in_addr *sin;
409 	register struct ip_timestamp *ipt;
410 	register struct ifnet *ifp;
411 	struct in_addr t;
412 
413 COUNT(IP_DOOPTIONS);
414 	cp = (u_char *)(ip + 1);
415 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
416 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
417 		opt = cp[0];
418 		if (opt == IPOPT_EOL)
419 			break;
420 		if (opt == IPOPT_NOP)
421 			optlen = 1;
422 		else
423 			optlen = cp[1];
424 		switch (opt) {
425 
426 		default:
427 			break;
428 
429 		/*
430 		 * Source routing with record.
431 		 * Find interface with current destination address.
432 		 * If none on this machine then drop if strictly routed,
433 		 * or do nothing if loosely routed.
434 		 * Record interface address and bring up next address
435 		 * component.  If strictly routed make sure next
436 		 * address on directly accessible net.
437 		 */
438 		case IPOPT_LSRR:
439 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
440 				break;
441 			sin = (struct in_addr *)(cp + cp[2]);
442 			ifp = if_ifwithaddr(*sin);
443 			if (ifp == 0) {
444 				if (opt == IPOPT_SSRR)
445 					goto bad;
446 				break;
447 			}
448 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
449 			cp[2] += 4;
450 			if (cp[2] > optlen - (sizeof (long) - 1))
451 				break;
452 			ip->ip_dst = sin[1];
453 			if (opt == IPOPT_SSRR && if_ifonnetof(ip->ip_dst)==0)
454 				goto bad;
455 			break;
456 
457 		case IPOPT_TS:
458 			ipt = (struct ip_timestamp *)cp;
459 			if (ipt->ipt_len < 5)
460 				goto bad;
461 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
462 				if (++ipt->ipt_oflw == 0)
463 					goto bad;
464 				break;
465 			}
466 			sin = (struct in_addr *)(cp+cp[2]);
467 			switch (ipt->ipt_flg) {
468 
469 			case IPOPT_TS_TSONLY:
470 				break;
471 
472 			case IPOPT_TS_TSANDADDR:
473 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
474 					goto bad;
475 				/* stamp with ``first'' interface address */
476 				*sin++ = ifnet->if_addr;
477 				break;
478 
479 			case IPOPT_TS_PRESPEC:
480 				if (if_ifwithaddr(*sin) == 0)
481 					continue;
482 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
483 					goto bad;
484 				ipt->ipt_ptr += 4;
485 				break;
486 
487 			default:
488 				goto bad;
489 			}
490 			*(n_time *)sin = iptime();
491 			ipt->ipt_ptr += 4;
492 		}
493 	}
494 	return;
495 bad:
496 	/* SHOULD FORCE ICMP MESSAGE */
497 	return;
498 }
499 
500 /*
501  * Strip out IP options, at higher
502  * level protocol in the kernel.
503  * Second argument is buffer to which options
504  * will be moved, and return value is their length.
505  */
506 ip_stripoptions(ip, mopt)
507 	struct ip *ip;
508 	struct mbuf *mopt;
509 {
510 	register int i;
511 	register struct mbuf *m;
512 	int olen;
513 COUNT(IP_STRIPOPTIONS);
514 
515 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
516 	m = dtom(ip);
517 	ip++;
518 	if (mopt) {
519 		mopt->m_len = olen;
520 		mopt->m_off = MMINOFF;
521 		bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
522 	}
523 	i = m->m_len - (sizeof (struct ip) + olen);
524 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
525 	m->m_len -= olen;
526 }
527