xref: /csrg-svn/sys/netinet/ip_input.c (revision 5172)
1 /* ip_input.c 1.22 81/12/03 */
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/clock.h"
6 #include "../h/mbuf.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include "../net/in.h"
10 #include "../net/in_systm.h"
11 #include "../net/if.h"
12 #include "../net/ip.h"			/* belongs before in.h */
13 #include "../net/ip_var.h"
14 #include "../net/ip_icmp.h"
15 #include "../net/tcp.h"
16 
17 u_char	ip_protox[IPPROTO_MAX];
18 
19 /*
20  * IP initialization: fill in IP protocol switch table.
21  * All protocols not implemented in kernel go to raw IP protocol handler.
22  */
23 ip_init()
24 {
25 	register struct protosw *pr;
26 	register int i;
27 
28 COUNT(IP_INIT);
29 	pr = pffindproto(PF_INET, IPPROTO_RAW);
30 	if (pr == 0)
31 		panic("ip_init");
32 	for (i = 0; i < IPPROTO_MAX; i++)
33 		ip_protox[i] = pr - protosw;
34 	for (pr = protosw; pr <= protoswLAST; pr++)
35 		if (pr->pr_family == PF_INET &&
36 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
37 			ip_protox[pr->pr_protocol] = pr - protosw;
38 	ipq.next = ipq.prev = &ipq;
39 	ip_id = time & 0xffff;
40 }
41 
42 u_char	ipcksum = 1;
43 struct	ip *ip_reass();
44 
45 /*
46  * Ip input routine.  Checksum and byte swap header.  If fragmented
47  * try to reassamble.  If complete and fragment queue exists, discard.
48  * Process options.  Pass to next level.
49  */
50 ipintr()
51 {
52 	register struct ip *ip;
53 	register struct mbuf *m;
54 	struct mbuf *m0;
55 	register int i;
56 	register struct ipq *fp;
57 	int hlen, s;
58 
59 COUNT(IPINTR);
60 next:
61 	/*
62 	 * Get next datagram off input queue and get IP header
63 	 * in first mbuf.
64 	 */
65 	s = splimp();
66 	IF_DEQUEUE(&ipintrq, m);
67 	splx(s);
68 	if (m == 0)
69 		return;
70 	if (m->m_len < sizeof (struct ip) &&
71 	    m_pullup(m, sizeof (struct ip)) == 0)
72 		goto bad;
73 	ip = mtod(m, struct ip *);
74 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
75 		if (m_pullup(m, hlen) == 0)
76 			goto bad;
77 		ip = mtod(m, struct ip *);
78 	}
79 	if (ipcksum)
80 		if ((ip->ip_sum = in_cksum(m, hlen)) != 0xffff) {
81 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
82 			ipstat.ips_badsum++;
83 			goto bad;
84 		}
85 
86 	/*
87 	 * Convert fields to host representation.
88 	 */
89 	ip->ip_len = ntohs((u_short)ip->ip_len);
90 	ip->ip_id = ntohs(ip->ip_id);
91 	ip->ip_off = ntohs((u_short)ip->ip_off);
92 
93 	/*
94 	 * Check that the amount of data in the buffers
95 	 * is as at least much as the IP header would have us expect.
96 	 * Trim mbufs if longer than we expect.
97 	 * Drop packet if shorter than we expect.
98 	 */
99 	i = 0;
100 	m0 = m;
101 	for (; m != NULL; m = m->m_next)
102 		i += m->m_len;
103 	m = m0;
104 	if (i != ip->ip_len) {
105 		if (i < ip->ip_len) {
106 			ipstat.ips_tooshort++;
107 			goto bad;
108 		}
109 		m_adj(m, ip->ip_len - i);
110 	}
111 
112 	/*
113 	 * Process options and, if not destined for us,
114 	 * ship it on.
115 	 */
116 	if (hlen > sizeof (struct ip))
117 		ip_dooptions(ip);
118 	if (ifnet && ip->ip_dst.s_addr != ifnet->if_addr.s_addr &&
119 	    if_ifwithaddr(ip->ip_dst) == 0) {
120 		if (--ip->ip_ttl == 0) {
121 			icmp_error(ip, ICMP_TIMXCEED, 0);
122 			goto next;
123 		}
124 		(void) ip_output(dtom(ip), (struct mbuf *)0);
125 		goto next;
126 	}
127 
128 	/*
129 	 * Look for queue of fragments
130 	 * of this datagram.
131 	 */
132 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
133 		if (ip->ip_id == fp->ipq_id &&
134 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
135 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
136 		    ip->ip_p == fp->ipq_p)
137 			goto found;
138 	fp = 0;
139 found:
140 
141 	/*
142 	 * Adjust ip_len to not reflect header,
143 	 * set ip_mff if more fragments are expected,
144 	 * convert offset of this to bytes.
145 	 */
146 	ip->ip_len -= hlen;
147 	((struct ipasfrag *)ip)->ipf_mff = 0;
148 	if (ip->ip_off & IP_MF)
149 		((struct ipasfrag *)ip)->ipf_mff = 1;
150 	ip->ip_off <<= 3;
151 
152 	/*
153 	 * If datagram marked as having more fragments
154 	 * or if this is not the first fragment,
155 	 * attempt reassembly; if it succeeds, proceed.
156 	 */
157 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
158 		ip = ip_reass((struct ipasfrag *)ip, fp);
159 		if (ip == 0)
160 			goto next;
161 		hlen = ip->ip_hl << 2;
162 		m = dtom(ip);
163 	} else
164 		if (fp)
165 			(void) ip_freef(fp);
166 
167 	/*
168 	 * Switch out to protocol's input routine.
169 	 */
170 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
171 	goto next;
172 bad:
173 	m_freem(m);
174 	goto next;
175 }
176 
177 /*
178  * Take incoming datagram fragment and try to
179  * reassemble it into whole datagram.  If a chain for
180  * reassembly of this datagram already exists, then it
181  * is given as fp; otherwise have to make a chain.
182  */
183 struct ip *
184 ip_reass(ip, fp)
185 	register struct ipasfrag *ip;
186 	register struct ipq *fp;
187 {
188 	register struct mbuf *m = dtom(ip);
189 	register struct ipasfrag *q;
190 	struct mbuf *t;
191 	int hlen = ip->ip_hl << 2;
192 	int i, next;
193 COUNT(IP_REASS);
194 
195 	/*
196 	 * Presence of header sizes in mbufs
197 	 * would confuse code below.
198 	 */
199 	m->m_off += hlen;
200 	m->m_len -= hlen;
201 
202 	/*
203 	 * If first fragment to arrive, create a reassembly queue.
204 	 */
205 	if (fp == 0) {
206 		if ((t = m_get(1)) == NULL)
207 			goto dropfrag;
208 		t->m_off = MMINOFF;
209 		fp = mtod(t, struct ipq *);
210 		insque(fp, &ipq);
211 		fp->ipq_ttl = IPFRAGTTL;
212 		fp->ipq_p = ip->ip_p;
213 		fp->ipq_id = ip->ip_id;
214 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
215 		fp->ipq_src = ((struct ip *)ip)->ip_src;
216 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
217 		q = (struct ipasfrag *)fp;
218 		goto insert;
219 	}
220 
221 	/*
222 	 * Find a segment which begins after this one does.
223 	 */
224 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
225 		if (q->ip_off > ip->ip_off)
226 			break;
227 
228 	/*
229 	 * If there is a preceding segment, it may provide some of
230 	 * our data already.  If so, drop the data from the incoming
231 	 * segment.  If it provides all of our data, drop us.
232 	 */
233 	if (q->ipf_prev != (struct ipasfrag *)fp) {
234 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
235 		if (i > 0) {
236 			if (i >= ip->ip_len)
237 				goto dropfrag;
238 			m_adj(dtom(ip), i);
239 			ip->ip_off += i;
240 			ip->ip_len -= i;
241 		}
242 	}
243 
244 	/*
245 	 * While we overlap succeeding segments trim them or,
246 	 * if they are completely covered, dequeue them.
247 	 */
248 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
249 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
250 		if (i < q->ip_len) {
251 			q->ip_len -= i;
252 			m_adj(dtom(q), i);
253 			break;
254 		}
255 		q = q->ipf_next;
256 		m_freem(dtom(q->ipf_prev));
257 		ip_deq(q->ipf_prev);
258 	}
259 
260 insert:
261 	/*
262 	 * Stick new segment in its place;
263 	 * check for complete reassembly.
264 	 */
265 	ip_enq(ip, q->ipf_prev);
266 	next = 0;
267 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
268 		if (q->ip_off != next)
269 			return (0);
270 		next += q->ip_len;
271 	}
272 	if (q->ipf_prev->ipf_mff)
273 		return (0);
274 
275 	/*
276 	 * Reassembly is complete; concatenate fragments.
277 	 */
278 	q = fp->ipq_next;
279 	m = dtom(q);
280 	t = m->m_next;
281 	m->m_next = 0;
282 	m_cat(m, t);
283 	while ((q = q->ipf_next) != (struct ipasfrag *)fp)
284 		m_cat(m, dtom(q));
285 
286 	/*
287 	 * Create header for new ip packet by
288 	 * modifying header of first packet;
289 	 * dequeue and discard fragment reassembly header.
290 	 * Make header visible.
291 	 */
292 	ip = fp->ipq_next;
293 	ip->ip_len = next;
294 	((struct ip *)ip)->ip_src = fp->ipq_src;
295 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
296 	remque(fp);
297 	(void) m_free(dtom(fp));
298 	m = dtom(ip);
299 	m->m_len += sizeof (struct ipasfrag);
300 	m->m_off -= sizeof (struct ipasfrag);
301 	return ((struct ip *)ip);
302 
303 dropfrag:
304 	m_freem(m);
305 	return (0);
306 }
307 
308 /*
309  * Free a fragment reassembly header and all
310  * associated datagrams.
311  */
312 struct ipq *
313 ip_freef(fp)
314 	struct ipq *fp;
315 {
316 	register struct ipasfrag *q;
317 	struct mbuf *m;
318 COUNT(IP_FREEF);
319 
320 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
321 		m_freem(dtom(q));
322 	m = dtom(fp);
323 	fp = fp->next;
324 	remque(fp->prev);
325 	(void) m_free(m);
326 	return (fp);
327 }
328 
329 /*
330  * Put an ip fragment on a reassembly chain.
331  * Like insque, but pointers in middle of structure.
332  */
333 ip_enq(p, prev)
334 	register struct ipasfrag *p, *prev;
335 {
336 
337 COUNT(IP_ENQ);
338 	p->ipf_prev = prev;
339 	p->ipf_next = prev->ipf_next;
340 	prev->ipf_next->ipf_prev = p;
341 	prev->ipf_next = p;
342 }
343 
344 /*
345  * To ip_enq as remque is to insque.
346  */
347 ip_deq(p)
348 	register struct ipasfrag *p;
349 {
350 
351 COUNT(IP_DEQ);
352 	p->ipf_prev->ipf_next = p->ipf_next;
353 	p->ipf_next->ipf_prev = p->ipf_prev;
354 }
355 
356 /*
357  * IP timer processing;
358  * if a timer expires on a reassembly
359  * queue, discard it.
360  */
361 ip_slowtimo()
362 {
363 	register struct ipq *fp;
364 	int s = splnet();
365 
366 COUNT(IP_SLOWTIMO);
367 	for (fp = ipq.next; fp != &ipq; )
368 		if (--fp->ipq_ttl == 0)
369 			fp = ip_freef(fp);
370 		else
371 			fp = fp->next;
372 	splx(s);
373 }
374 
375 /*
376  * Drain off all datagram fragments.
377  */
378 ip_drain()
379 {
380 
381 COUNT(IP_DRAIN);
382 	while (ipq.next != &ipq)
383 		(void) ip_freef(ipq.next);
384 }
385 
386 /*
387  * Do option processing on a datagram,
388  * possibly discarding it if bad options
389  * are encountered.
390  */
391 ip_dooptions(ip)
392 	struct ip *ip;
393 {
394 	register u_char *cp;
395 	int opt, optlen, cnt;
396 	struct in_addr *sin;
397 	register struct ip_timestamp *ipt;
398 	register struct ifnet *ifp;
399 	struct in_addr t;
400 
401 COUNT(IP_DOOPTIONS);
402 	cp = (u_char *)(ip + 1);
403 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
404 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
405 		opt = cp[0];
406 		if (opt == IPOPT_EOL)
407 			break;
408 		if (opt == IPOPT_NOP)
409 			optlen = 1;
410 		else
411 			optlen = cp[1];
412 		switch (opt) {
413 
414 		default:
415 			break;
416 
417 		/*
418 		 * Source routing with record.
419 		 * Find interface with current destination address.
420 		 * If none on this machine then drop if strictly routed,
421 		 * or do nothing if loosely routed.
422 		 * Record interface address and bring up next address
423 		 * component.  If strictly routed make sure next
424 		 * address on directly accessible net.
425 		 */
426 		case IPOPT_LSRR:
427 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
428 				break;
429 			sin = (struct in_addr *)(cp + cp[2]);
430 			ifp = if_ifwithaddr(*sin);
431 			if (ifp == 0) {
432 				if (opt == IPOPT_SSRR)
433 					goto bad;
434 				break;
435 			}
436 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
437 			cp[2] += 4;
438 			if (cp[2] > optlen - (sizeof (long) - 1))
439 				break;
440 			ip->ip_dst = sin[1];
441 			if (opt == IPOPT_SSRR && if_ifonnetof(ip->ip_dst)==0)
442 				goto bad;
443 			break;
444 
445 		case IPOPT_TS:
446 			ipt = (struct ip_timestamp *)cp;
447 			if (ipt->ipt_len < 5)
448 				goto bad;
449 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
450 				if (++ipt->ipt_oflw == 0)
451 					goto bad;
452 				break;
453 			}
454 			sin = (struct in_addr *)(cp+cp[2]);
455 			switch (ipt->ipt_flg) {
456 
457 			case IPOPT_TS_TSONLY:
458 				break;
459 
460 			case IPOPT_TS_TSANDADDR:
461 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
462 					goto bad;
463 				/* stamp with ``first'' interface address */
464 				*sin++ = ifnet->if_addr;
465 				break;
466 
467 			case IPOPT_TS_PRESPEC:
468 				if (if_ifwithaddr(*sin) == 0)
469 					continue;
470 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
471 					goto bad;
472 				ipt->ipt_ptr += 4;
473 				break;
474 
475 			default:
476 				goto bad;
477 			}
478 			*(n_time *)sin = iptime();
479 			ipt->ipt_ptr += 4;
480 		}
481 	}
482 	return;
483 bad:
484 	/* SHOULD FORCE ICMP MESSAGE */
485 	return;
486 }
487 
488 /*
489  * Strip out IP options, at higher
490  * level protocol in the kernel.
491  * Second argument is buffer to which options
492  * will be moved, and return value is their length.
493  */
494 ip_stripoptions(ip, cp)
495 	struct ip *ip;
496 	char *cp;
497 {
498 	register int i;
499 	register struct mbuf *m;
500 	int olen;
501 COUNT(IP_STRIPOPTIONS);
502 
503 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
504 	m = dtom(ip);
505 	ip++;
506 	if (cp)
507 		bcopy((caddr_t)ip, cp, (unsigned)olen);
508 	i = m->m_len - (sizeof (struct ip) + olen);
509 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
510 	m->m_len -= i;
511 }
512