xref: /csrg-svn/sys/netinet/ip_input.c (revision 5084)
1 /* ip_input.c 1.19 81/11/26 */
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/clock.h"
6 #include "../h/mbuf.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include "../net/in.h"
10 #include "../net/in_systm.h"
11 #include "../net/if.h"
12 #include "../net/ip.h"			/* belongs before in.h */
13 #include "../net/ip_var.h"
14 #include "../net/ip_icmp.h"
15 #include "../net/tcp.h"
16 
17 u_char	ip_protox[IPPROTO_MAX];
18 
19 /*
20  * Ip initialization.
21  */
22 ip_init()
23 {
24 	register struct protosw *pr;
25 	register int i;
26 
27 COUNT(IP_INIT);
28 	pr = pffindproto(PF_INET, IPPROTO_RAW);
29 	if (pr == 0)
30 		panic("ip_init");
31 	for (i = 0; i < IPPROTO_MAX; i++)
32 		ip_protox[i] = pr - protosw;
33 	for (pr = protosw; pr <= protoswLAST; pr++)
34 		if (pr->pr_family == PF_INET &&
35 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
36 			ip_protox[pr->pr_protocol] = pr - protosw;
37 	ipq.next = ipq.prev = &ipq;
38 	ip_id = time & 0xffff;
39 }
40 
41 u_char	ipcksum = 1;
42 struct	ip *ip_reass();
43 
44 /*
45  * Ip input routines.
46  */
47 
48 /*
49  * Ip input routine.  Checksum and byte swap header.  If fragmented
50  * try to reassamble.  If complete and fragment queue exists, discard.
51  * Process options.  Pass to next level.
52  */
53 ipintr()
54 {
55 	register struct ip *ip;
56 	register struct mbuf *m;
57 	struct mbuf *m0;
58 	register int i;
59 	register struct ipq *fp;
60 	int hlen, s;
61 
62 COUNT(IPINTR);
63 next:
64 	/*
65 	 * Get next datagram off input queue and get IP header
66 	 * in first mbuf.
67 	 */
68 	s = splimp();
69 	IF_DEQUEUE(&ipintrq, m);
70 	splx(s);
71 	if (m == 0)
72 		return;
73 	if (m->m_len < sizeof (struct ip) &&
74 	    m_pullup(m, sizeof (struct ip)) == 0)
75 		goto bad;
76 	ip = mtod(m, struct ip *);
77 	if ((hlen = ip->ip_hl << 2) > m->m_len &&
78 	    m_pullup(m, hlen) == 0)
79 		goto bad;
80 	if (ipcksum)
81 		if ((ip->ip_sum = in_cksum(m, hlen)) != 0xffff) {
82 			printf("ip_sum %x\n", ip->ip_sum);
83 			ipstat.ips_badsum++;
84 			goto bad;
85 		}
86 
87 	/*
88 	 * Convert fields to host representation.
89 	 */
90 	ip->ip_len = ntohs((u_short)ip->ip_len);
91 	ip->ip_id = ntohs(ip->ip_id);
92 	ip->ip_off = ntohs((u_short)ip->ip_off);
93 
94 	/*
95 	 * Check that the amount of data in the buffers
96 	 * is as at least much as the IP header would have us expect.
97 	 * Trim mbufs if longer than we expect.
98 	 * Drop packet if shorter than we expect.
99 	 */
100 	i = 0;
101 	for (m0 = m; m != NULL; m = m->m_next)
102 		i += m->m_len;
103 	m = m0;
104 	if (i != ip->ip_len) {
105 		if (i < ip->ip_len)
106 			goto bad;
107 		m_adj(m, ip->ip_len - i);
108 	}
109 
110 	/*
111 	 * Process options and, if not destined for us,
112 	 * ship it on.
113 	 */
114 	if (hlen > sizeof (struct ip))
115 		ip_dooptions(ip);
116 	if (ifnet && ip->ip_dst.s_addr != ifnet->if_addr.s_addr &&
117 	    if_ifwithaddr(ip->ip_dst) == 0) {
118 		if (--ip->ip_ttl == 0) {
119 			icmp_error(ip, ICMP_TIMXCEED, 0);
120 			goto next;
121 		}
122 		ip_output(dtom(ip), (struct mbuf *)0);
123 		goto next;
124 	}
125 
126 	/*
127 	 * Look for queue of fragments
128 	 * of this datagram.
129 	 */
130 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
131 		if (ip->ip_id == fp->ipq_id &&
132 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
133 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
134 		    ip->ip_p == fp->ipq_p)
135 			goto found;
136 	fp = 0;
137 found:
138 
139 	/*
140 	 * Adjust ip_len to not reflect header,
141 	 * set ip_mff if more fragments are expected,
142 	 * convert offset of this to bytes.
143 	 */
144 	ip->ip_len -= hlen;
145 	((struct ipasfrag *)ip)->ipf_mff = 0;
146 	if (ip->ip_off & IP_MF)
147 		((struct ipasfrag *)ip)->ipf_mff = 1;
148 	ip->ip_off <<= 3;
149 
150 	/*
151 	 * If datagram marked as having more fragments
152 	 * or if this is not the first fragment,
153 	 * attempt reassembly; if it succeeds, proceed.
154 	 */
155 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
156 		ip = ip_reass((struct ipasfrag *)ip, fp);
157 		if (ip == 0)
158 			goto next;
159 		hlen = ip->ip_hl << 2;
160 		m = dtom(ip);
161 	} else
162 		if (fp)
163 			(void) ip_freef(fp);
164 
165 	/*
166 	 * Switch out to protocol's input routine.
167 	 */
168 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
169 	goto next;
170 bad:
171 	m_freem(m);
172 	goto next;
173 }
174 
175 /*
176  * Take incoming datagram fragment and try to
177  * reassemble it into whole datagram.  If a chain for
178  * reassembly of this datagram already exists, then it
179  * is given as fp; otherwise have to make a chain.
180  */
181 struct ip *
182 ip_reass(ip, fp)
183 	register struct ipasfrag *ip;
184 	register struct ipq *fp;
185 {
186 	register struct mbuf *m = dtom(ip);
187 	register struct ipasfrag *q;
188 	struct mbuf *t;
189 	int hlen = ip->ip_hl << 2;
190 	int i, next;
191 COUNT(IP_REASS);
192 
193 	/*
194 	 * Presence of header sizes in mbufs
195 	 * would confuse code below.
196 	 */
197 	m->m_off += hlen;
198 	m->m_len -= hlen;
199 
200 	/*
201 	 * If first fragment to arrive, create a reassembly queue.
202 	 */
203 	if (fp == 0) {
204 		if ((t = m_get(1)) == NULL)
205 			goto dropfrag;
206 		t->m_off = MMINOFF;
207 		fp = mtod(t, struct ipq *);
208 		insque(fp, &ipq);
209 		fp->ipq_ttl = IPFRAGTTL;
210 		fp->ipq_p = ip->ip_p;
211 		fp->ipq_id = ip->ip_id;
212 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
213 		fp->ipq_src = ((struct ip *)ip)->ip_src;
214 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
215 	}
216 
217 	/*
218 	 * Find a segment which begins after this one does.
219 	 */
220 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
221 		if (q->ip_off > ip->ip_off)
222 			break;
223 
224 	/*
225 	 * If there is a preceding segment, it may provide some of
226 	 * our data already.  If so, drop the data from the incoming
227 	 * segment.  If it provides all of our data, drop us.
228 	 */
229 	if (q->ipf_prev != (struct ipasfrag *)fp) {
230 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
231 		if (i > 0) {
232 			if (i >= ip->ip_len)
233 				goto dropfrag;
234 			m_adj(dtom(ip), i);
235 			ip->ip_off += i;
236 			ip->ip_len -= i;
237 		}
238 	}
239 
240 	/*
241 	 * While we overlap succeeding segments trim them or,
242 	 * if they are completely covered, dequeue them.
243 	 */
244 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
245 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
246 		if (i < q->ip_len) {
247 			q->ip_len -= i;
248 			m_adj(dtom(q), i);
249 			break;
250 		}
251 		q = q->ipf_next;
252 		m_freem(dtom(q->ipf_prev));
253 		ip_deq(q->ipf_prev);
254 	}
255 
256 	/*
257 	 * Stick new segment in its place;
258 	 * check for complete reassembly.
259 	 */
260 	ip_enq(ip, q->ipf_prev);
261 	next = 0;
262 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
263 		if (q->ip_off != next)
264 			return (0);
265 		next += q->ip_len;
266 	}
267 	if (q->ipf_prev->ipf_mff)
268 		return (0);
269 
270 	/*
271 	 * Reassembly is complete; concatenate fragments.
272 	 */
273 	q = fp->ipq_next;
274 	m = dtom(q);
275 	t = m->m_next;
276 	m->m_next = 0;
277 	m_cat(m, t);
278 	while ((q = q->ipf_next) != (struct ipasfrag *)fp)
279 		m_cat(m, dtom(q));
280 
281 	/*
282 	 * Create header for new ip packet by
283 	 * modifying header of first packet;
284 	 * dequeue and discard fragment reassembly header.
285 	 * Make header visible.
286 	 */
287 	ip = fp->ipq_next;
288 	ip->ip_len = next;
289 	((struct ip *)ip)->ip_src = fp->ipq_src;
290 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
291 	remque(fp);
292 	(void) m_free(dtom(fp));
293 	m = dtom(ip);
294 	m->m_len += sizeof (struct ipasfrag);
295 	m->m_off -= sizeof (struct ipasfrag);
296 	return ((struct ip *)ip);
297 
298 dropfrag:
299 	m_freem(m);
300 	return (0);
301 }
302 
303 /*
304  * Free a fragment reassembly header and all
305  * associated datagrams.
306  */
307 struct ipq *
308 ip_freef(fp)
309 	struct ipq *fp;
310 {
311 	register struct ipasfrag *q;
312 	struct mbuf *m;
313 COUNT(IP_FREEF);
314 
315 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
316 		m_freem(dtom(q));
317 	m = dtom(fp);
318 	fp = fp->next;
319 	remque(fp->prev);
320 	(void) m_free(m);
321 	return (fp);
322 }
323 
324 /*
325  * Put an ip fragment on a reassembly chain.
326  * Like insque, but pointers in middle of structure.
327  */
328 ip_enq(p, prev)
329 	register struct ipasfrag *p, *prev;
330 {
331 
332 COUNT(IP_ENQ);
333 	p->ipf_prev = prev;
334 	p->ipf_next = prev->ipf_next;
335 	prev->ipf_next->ipf_prev = p;
336 	prev->ipf_next = p;
337 }
338 
339 /*
340  * To ip_enq as remque is to insque.
341  */
342 ip_deq(p)
343 	register struct ipasfrag *p;
344 {
345 
346 COUNT(IP_DEQ);
347 	p->ipf_prev->ipf_next = p->ipf_next;
348 	p->ipf_next->ipf_prev = p->ipf_prev;
349 }
350 
351 /*
352  * IP timer processing;
353  * if a timer expires on a reassembly
354  * queue, discard it.
355  */
356 ip_slowtimo()
357 {
358 	register struct ipq *fp;
359 	int s = splnet();
360 
361 COUNT(IP_SLOWTIMO);
362 	for (fp = ipq.next; fp != &ipq; )
363 		if (--fp->ipq_ttl == 0)
364 			fp = ip_freef(fp);
365 		else
366 			fp = fp->next;
367 	splx(s);
368 }
369 
370 /*
371  * Drain off all datagram fragments.
372  */
373 ip_drain()
374 {
375 
376 COUNT(IP_DRAIN);
377 	while (ipq.next != &ipq)
378 		(void) ip_freef(ipq.next);
379 }
380 
381 /*
382  * Do option processing on a datagram,
383  * possibly discarding it if bad options
384  * are encountered.
385  */
386 ip_dooptions(ip)
387 	struct ip *ip;
388 {
389 	register u_char *cp;
390 	int opt, optlen, cnt;
391 	struct in_addr *sin;
392 	register struct ip_timestamp *ipt;
393 	register struct ifnet *ifp;
394 	struct in_addr t;
395 
396 COUNT(IP_DOOPTIONS);
397 	cp = (u_char *)(ip + 1);
398 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
399 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
400 		opt = cp[0];
401 		if (opt == IPOPT_EOL)
402 			break;
403 		if (opt == IPOPT_NOP)
404 			optlen = 1;
405 		else
406 			optlen = cp[1];
407 		switch (opt) {
408 
409 		default:
410 			break;
411 
412 		/*
413 		 * Source routing with record.
414 		 * Find interface with current destination address.
415 		 * If none on this machine then drop if strictly routed,
416 		 * or do nothing if loosely routed.
417 		 * Record interface address and bring up next address
418 		 * component.  If strictly routed make sure next
419 		 * address on directly accessible net.
420 		 */
421 		case IPOPT_LSRR:
422 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
423 				break;
424 			sin = (struct in_addr *)(cp + cp[2]);
425 			ifp = if_ifwithaddr(*sin);
426 			if (ifp == 0) {
427 				if (opt == IPOPT_SSRR)
428 					goto bad;
429 				break;
430 			}
431 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
432 			cp[2] += 4;
433 			if (cp[2] > optlen - (sizeof (long) - 1))
434 				break;
435 			ip->ip_dst = sin[1];
436 			if (opt == IPOPT_SSRR && if_ifonnetof(ip->ip_dst)==0)
437 				goto bad;
438 			break;
439 
440 		case IPOPT_TS:
441 			ipt = (struct ip_timestamp *)cp;
442 			if (ipt->ipt_len < 5)
443 				goto bad;
444 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
445 				if (++ipt->ipt_oflw == 0)
446 					goto bad;
447 				break;
448 			}
449 			sin = (struct in_addr *)(cp+cp[2]);
450 			switch (ipt->ipt_flg) {
451 
452 			case IPOPT_TS_TSONLY:
453 				break;
454 
455 			case IPOPT_TS_TSANDADDR:
456 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
457 					goto bad;
458 				/* stamp with ``first'' interface address */
459 				*sin++ = ifnet->if_addr;
460 				break;
461 
462 			case IPOPT_TS_PRESPEC:
463 				if (if_ifwithaddr(*sin) == 0)
464 					continue;
465 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
466 					goto bad;
467 				ipt->ipt_ptr += 4;
468 				break;
469 
470 			default:
471 				goto bad;
472 			}
473 			*(n_time *)sin = iptime();
474 			ipt->ipt_ptr += 4;
475 		}
476 	}
477 	return;
478 bad:
479 	/* SHOULD FORCE ICMP MESSAGE */
480 	return;
481 }
482 
483 /*
484  * Strip out IP options, at higher
485  * level protocol in the kernel.
486  * Second argument is buffer to which options
487  * will be moved, and return value is their length.
488  */
489 ip_stripoptions(ip, cp)
490 	struct ip *ip;
491 	char *cp;
492 {
493 	register int i;
494 	register struct mbuf *m;
495 	int olen;
496 COUNT(IP_STRIPOPTIONS);
497 
498 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
499 	m = dtom(ip);
500 	ip++;
501 	if (cp)
502 		bcopy((caddr_t)ip, cp, (unsigned)olen);
503 	i = m->m_len - (sizeof (struct ip) + olen);
504 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
505 	m->m_len -= i;
506 }
507