xref: /csrg-svn/sys/netinet/ip_input.c (revision 25195)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)ip_input.c	6.13 (Berkeley) 10/14/85
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "mbuf.h"
12 #include "domain.h"
13 #include "protosw.h"
14 #include "socket.h"
15 #include "errno.h"
16 #include "time.h"
17 #include "kernel.h"
18 
19 #include "../net/if.h"
20 #include "../net/route.h"
21 
22 #include "in.h"
23 #include "in_pcb.h"
24 #include "in_systm.h"
25 #include "in_var.h"
26 #include "ip.h"
27 #include "ip_var.h"
28 #include "ip_icmp.h"
29 #include "tcp.h"
30 
31 u_char	ip_protox[IPPROTO_MAX];
32 int	ipqmaxlen = IFQ_MAXLEN;
33 struct	in_ifaddr *in_ifaddr;			/* first inet address */
34 
35 /*
36  * We need to save the IP options in case a protocol wants to respond
37  * to an incoming packet over the same route if the packet got here
38  * using IP source routing.  This allows connection establishment and
39  * maintenance when the remote end is on a network that is not known
40  * to us.
41  */
42 int	ip_nhops = 0;
43 static	struct ip_srcrt {
44 	char	nop;				/* one NOP to align */
45 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
46 	struct	in_addr route[MAX_IPOPTLEN];
47 } ip_srcrt;
48 
49 /*
50  * IP initialization: fill in IP protocol switch table.
51  * All protocols not implemented in kernel go to raw IP protocol handler.
52  */
53 ip_init()
54 {
55 	register struct protosw *pr;
56 	register int i;
57 
58 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
59 	if (pr == 0)
60 		panic("ip_init");
61 	for (i = 0; i < IPPROTO_MAX; i++)
62 		ip_protox[i] = pr - inetsw;
63 	for (pr = inetdomain.dom_protosw;
64 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
65 		if (pr->pr_domain->dom_family == PF_INET &&
66 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
67 			ip_protox[pr->pr_protocol] = pr - inetsw;
68 	ipq.next = ipq.prev = &ipq;
69 	ip_id = time.tv_sec & 0xffff;
70 	ipintrq.ifq_maxlen = ipqmaxlen;
71 }
72 
73 u_char	ipcksum = 1;
74 struct	ip *ip_reass();
75 struct	sockaddr_in ipaddr = { AF_INET };
76 struct	route ipforward_rt;
77 
78 /*
79  * Ip input routine.  Checksum and byte swap header.  If fragmented
80  * try to reassamble.  If complete and fragment queue exists, discard.
81  * Process options.  Pass to next level.
82  */
83 ipintr()
84 {
85 	register struct ip *ip;
86 	register struct mbuf *m;
87 	struct mbuf *m0;
88 	register int i;
89 	register struct ipq *fp;
90 	register struct in_ifaddr *ia;
91 	struct ifnet *ifp;
92 	int hlen, s;
93 
94 next:
95 	/*
96 	 * Get next datagram off input queue and get IP header
97 	 * in first mbuf.
98 	 */
99 	s = splimp();
100 	IF_DEQUEUEIF(&ipintrq, m, ifp);
101 	splx(s);
102 	if (m == 0)
103 		return;
104 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
105 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
106 		ipstat.ips_toosmall++;
107 		goto next;
108 	}
109 	ip = mtod(m, struct ip *);
110 	hlen = ip->ip_hl << 2;
111 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
112 		ipstat.ips_badhlen++;
113 		goto bad;
114 	}
115 	if (hlen > m->m_len) {
116 		if ((m = m_pullup(m, hlen)) == 0) {
117 			ipstat.ips_badhlen++;
118 			goto next;
119 		}
120 		ip = mtod(m, struct ip *);
121 	}
122 	if (ipcksum)
123 		if (ip->ip_sum = in_cksum(m, hlen)) {
124 			ipstat.ips_badsum++;
125 			goto bad;
126 		}
127 
128 	/*
129 	 * Convert fields to host representation.
130 	 */
131 	ip->ip_len = ntohs((u_short)ip->ip_len);
132 	if (ip->ip_len < hlen) {
133 		ipstat.ips_badlen++;
134 		goto bad;
135 	}
136 	ip->ip_id = ntohs(ip->ip_id);
137 	ip->ip_off = ntohs((u_short)ip->ip_off);
138 
139 	/*
140 	 * Check that the amount of data in the buffers
141 	 * is as at least much as the IP header would have us expect.
142 	 * Trim mbufs if longer than we expect.
143 	 * Drop packet if shorter than we expect.
144 	 */
145 	i = -(u_short)ip->ip_len;
146 	m0 = m;
147 	for (;;) {
148 		i += m->m_len;
149 		if (m->m_next == 0)
150 			break;
151 		m = m->m_next;
152 	}
153 	if (i != 0) {
154 		if (i < 0) {
155 			ipstat.ips_tooshort++;
156 			m = m0;
157 			goto bad;
158 		}
159 		if (i <= m->m_len)
160 			m->m_len -= i;
161 		else
162 			m_adj(m0, -i);
163 	}
164 	m = m0;
165 
166 	/*
167 	 * Process options and, if not destined for us,
168 	 * ship it on.  ip_dooptions returns 1 when an
169 	 * error was detected (causing an icmp message
170 	 * to be sent and the original packet to be freed).
171 	 */
172 	ip_nhops = 0;		/* for source routed packets */
173 	if (hlen > sizeof (struct ip) && ip_dooptions(ip))
174 		goto next;
175 
176 	/*
177 	 * Check our list of addresses, to see if the packet is for us.
178 	 */
179 	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
180 #define	satosin(sa)	((struct sockaddr_in *)(sa))
181 
182 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
183 			goto ours;
184 		if (
185 #ifdef	DIRECTED_BROADCAST
186 		    ia->ia_ifp == ifp &&
187 #endif
188 		    (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
189 			u_long i;
190 
191 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
192 			    ip->ip_dst.s_addr)
193 				goto ours;
194 			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
195 				goto ours;
196 			/*
197 			 * Look for all-0's host part (old broadcast addr),
198 			 * either for subnet or net.
199 			 */
200 			i = ntohl(ip->ip_dst.s_addr);
201 			if (i == ia->ia_subnet)
202 				goto ours;
203 			if (i == ia->ia_net)
204 				goto ours;
205 		}
206 	}
207 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
208 		goto ours;
209 	if (ip->ip_dst.s_addr == INADDR_ANY)
210 		goto ours;
211 
212 	/*
213 	 * Not for us; forward if possible and desirable.
214 	 */
215 	ip_forward(ip, ifp);
216 	goto next;
217 
218 ours:
219 	/*
220 	 * Look for queue of fragments
221 	 * of this datagram.
222 	 */
223 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
224 		if (ip->ip_id == fp->ipq_id &&
225 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
226 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
227 		    ip->ip_p == fp->ipq_p)
228 			goto found;
229 	fp = 0;
230 found:
231 
232 	/*
233 	 * Adjust ip_len to not reflect header,
234 	 * set ip_mff if more fragments are expected,
235 	 * convert offset of this to bytes.
236 	 */
237 	ip->ip_len -= hlen;
238 	((struct ipasfrag *)ip)->ipf_mff = 0;
239 	if (ip->ip_off & IP_MF)
240 		((struct ipasfrag *)ip)->ipf_mff = 1;
241 	ip->ip_off <<= 3;
242 
243 	/*
244 	 * If datagram marked as having more fragments
245 	 * or if this is not the first fragment,
246 	 * attempt reassembly; if it succeeds, proceed.
247 	 */
248 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
249 		ipstat.ips_fragments++;
250 		ip = ip_reass((struct ipasfrag *)ip, fp);
251 		if (ip == 0)
252 			goto next;
253 		m = dtom(ip);
254 	} else
255 		if (fp)
256 			ip_freef(fp);
257 
258 	/*
259 	 * Switch out to protocol's input routine.
260 	 */
261 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, ifp);
262 	goto next;
263 bad:
264 	m_freem(m);
265 	goto next;
266 }
267 
268 /*
269  * Take incoming datagram fragment and try to
270  * reassemble it into whole datagram.  If a chain for
271  * reassembly of this datagram already exists, then it
272  * is given as fp; otherwise have to make a chain.
273  */
274 struct ip *
275 ip_reass(ip, fp)
276 	register struct ipasfrag *ip;
277 	register struct ipq *fp;
278 {
279 	register struct mbuf *m = dtom(ip);
280 	register struct ipasfrag *q;
281 	struct mbuf *t;
282 	int hlen = ip->ip_hl << 2;
283 	int i, next;
284 
285 	/*
286 	 * Presence of header sizes in mbufs
287 	 * would confuse code below.
288 	 */
289 	m->m_off += hlen;
290 	m->m_len -= hlen;
291 
292 	/*
293 	 * If first fragment to arrive, create a reassembly queue.
294 	 */
295 	if (fp == 0) {
296 		if ((t = m_get(M_WAIT, MT_FTABLE)) == NULL)
297 			goto dropfrag;
298 		fp = mtod(t, struct ipq *);
299 		insque(fp, &ipq);
300 		fp->ipq_ttl = IPFRAGTTL;
301 		fp->ipq_p = ip->ip_p;
302 		fp->ipq_id = ip->ip_id;
303 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
304 		fp->ipq_src = ((struct ip *)ip)->ip_src;
305 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
306 		q = (struct ipasfrag *)fp;
307 		goto insert;
308 	}
309 
310 	/*
311 	 * Find a segment which begins after this one does.
312 	 */
313 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
314 		if (q->ip_off > ip->ip_off)
315 			break;
316 
317 	/*
318 	 * If there is a preceding segment, it may provide some of
319 	 * our data already.  If so, drop the data from the incoming
320 	 * segment.  If it provides all of our data, drop us.
321 	 */
322 	if (q->ipf_prev != (struct ipasfrag *)fp) {
323 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
324 		if (i > 0) {
325 			if (i >= ip->ip_len)
326 				goto dropfrag;
327 			m_adj(dtom(ip), i);
328 			ip->ip_off += i;
329 			ip->ip_len -= i;
330 		}
331 	}
332 
333 	/*
334 	 * While we overlap succeeding segments trim them or,
335 	 * if they are completely covered, dequeue them.
336 	 */
337 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
338 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
339 		if (i < q->ip_len) {
340 			q->ip_len -= i;
341 			q->ip_off += i;
342 			m_adj(dtom(q), i);
343 			break;
344 		}
345 		q = q->ipf_next;
346 		m_freem(dtom(q->ipf_prev));
347 		ip_deq(q->ipf_prev);
348 	}
349 
350 insert:
351 	/*
352 	 * Stick new segment in its place;
353 	 * check for complete reassembly.
354 	 */
355 	ip_enq(ip, q->ipf_prev);
356 	next = 0;
357 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
358 		if (q->ip_off != next)
359 			return (0);
360 		next += q->ip_len;
361 	}
362 	if (q->ipf_prev->ipf_mff)
363 		return (0);
364 
365 	/*
366 	 * Reassembly is complete; concatenate fragments.
367 	 */
368 	q = fp->ipq_next;
369 	m = dtom(q);
370 	t = m->m_next;
371 	m->m_next = 0;
372 	m_cat(m, t);
373 	q = q->ipf_next;
374 	while (q != (struct ipasfrag *)fp) {
375 		t = dtom(q);
376 		q = q->ipf_next;
377 		m_cat(m, t);
378 	}
379 
380 	/*
381 	 * Create header for new ip packet by
382 	 * modifying header of first packet;
383 	 * dequeue and discard fragment reassembly header.
384 	 * Make header visible.
385 	 */
386 	ip = fp->ipq_next;
387 	ip->ip_len = next;
388 	((struct ip *)ip)->ip_src = fp->ipq_src;
389 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
390 	remque(fp);
391 	(void) m_free(dtom(fp));
392 	m = dtom(ip);
393 	m->m_len += (ip->ip_hl << 2);
394 	m->m_off -= (ip->ip_hl << 2);
395 	return ((struct ip *)ip);
396 
397 dropfrag:
398 	ipstat.ips_fragdropped++;
399 	m_freem(m);
400 	return (0);
401 }
402 
403 /*
404  * Free a fragment reassembly header and all
405  * associated datagrams.
406  */
407 ip_freef(fp)
408 	struct ipq *fp;
409 {
410 	register struct ipasfrag *q, *p;
411 
412 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
413 		p = q->ipf_next;
414 		ip_deq(q);
415 		m_freem(dtom(q));
416 	}
417 	remque(fp);
418 	(void) m_free(dtom(fp));
419 }
420 
421 /*
422  * Put an ip fragment on a reassembly chain.
423  * Like insque, but pointers in middle of structure.
424  */
425 ip_enq(p, prev)
426 	register struct ipasfrag *p, *prev;
427 {
428 
429 	p->ipf_prev = prev;
430 	p->ipf_next = prev->ipf_next;
431 	prev->ipf_next->ipf_prev = p;
432 	prev->ipf_next = p;
433 }
434 
435 /*
436  * To ip_enq as remque is to insque.
437  */
438 ip_deq(p)
439 	register struct ipasfrag *p;
440 {
441 
442 	p->ipf_prev->ipf_next = p->ipf_next;
443 	p->ipf_next->ipf_prev = p->ipf_prev;
444 }
445 
446 /*
447  * IP timer processing;
448  * if a timer expires on a reassembly
449  * queue, discard it.
450  */
451 ip_slowtimo()
452 {
453 	register struct ipq *fp;
454 	int s = splnet();
455 
456 	fp = ipq.next;
457 	if (fp == 0) {
458 		splx(s);
459 		return;
460 	}
461 	while (fp != &ipq) {
462 		--fp->ipq_ttl;
463 		fp = fp->next;
464 		if (fp->prev->ipq_ttl == 0) {
465 			ipstat.ips_fragtimeout++;
466 			ip_freef(fp->prev);
467 		}
468 	}
469 	splx(s);
470 }
471 
472 /*
473  * Drain off all datagram fragments.
474  */
475 ip_drain()
476 {
477 
478 	while (ipq.next != &ipq) {
479 		ipstat.ips_fragdropped++;
480 		ip_freef(ipq.next);
481 	}
482 }
483 
484 struct in_ifaddr *ip_rtaddr();
485 
486 /*
487  * Do option processing on a datagram,
488  * possibly discarding it if bad options
489  * are encountered.
490  */
491 ip_dooptions(ip)
492 	struct ip *ip;
493 {
494 	register u_char *cp;
495 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB;
496 	register struct ip_timestamp *ipt;
497 	register struct in_ifaddr *ia;
498 	struct in_addr *sin;
499 	n_time ntime;
500 
501 	cp = (u_char *)(ip + 1);
502 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
503 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
504 		opt = cp[IPOPT_OPTVAL];
505 		if (opt == IPOPT_EOL)
506 			break;
507 		if (opt == IPOPT_NOP)
508 			optlen = 1;
509 		else {
510 			optlen = cp[IPOPT_OLEN];
511 			if (optlen <= 0 || optlen > cnt) {
512 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
513 				goto bad;
514 			}
515 		}
516 		switch (opt) {
517 
518 		default:
519 			break;
520 
521 		/*
522 		 * Source routing with record.
523 		 * Find interface with current destination address.
524 		 * If none on this machine then drop if strictly routed,
525 		 * or do nothing if loosely routed.
526 		 * Record interface address and bring up next address
527 		 * component.  If strictly routed make sure next
528 		 * address on directly accessible net.
529 		 */
530 		case IPOPT_LSRR:
531 		case IPOPT_SSRR:
532 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
533 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
534 				goto bad;
535 			}
536 			ipaddr.sin_addr = ip->ip_dst;
537 			ia = (struct in_ifaddr *)
538 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
539 			if (ia == 0) {
540 				if (opt == IPOPT_SSRR) {
541 					type = ICMP_UNREACH;
542 					code = ICMP_UNREACH_SRCFAIL;
543 					goto bad;
544 				}
545 				/*
546 				 * Loose routing, and not at next destination
547 				 * yet; nothing to do except forward.
548 				 */
549 				break;
550 			}
551 			off--;			/* 0 origin */
552 			if (off > optlen - sizeof(struct in_addr)) {
553 				/*
554 				 * End of source route.  Should be for us.
555 				 */
556 				save_rte(cp, ip->ip_src);
557 				break;
558 			}
559 			/*
560 			 * locate outgoing interface
561 			 */
562 			bcopy(cp + off, (caddr_t)&ipaddr.sin_addr,
563 			    sizeof(ipaddr.sin_addr));
564 			if ((opt == IPOPT_SSRR &&
565 			    in_iaonnetof(in_netof(ipaddr.sin_addr)) == 0) ||
566 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
567 				type = ICMP_UNREACH;
568 				code = ICMP_UNREACH_SRCFAIL;
569 				goto bad;
570 			}
571 			ip->ip_dst = ipaddr.sin_addr;
572 			bcopy(&(IA_SIN(ia)->sin_addr), cp + off,
573 				sizeof(struct in_addr));
574 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
575 			break;
576 
577 		case IPOPT_RR:
578 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
579 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
580 				goto bad;
581 			}
582 			/*
583 			 * If no space remains, ignore.
584 			 */
585 			off--;			/* 0 origin */
586 			if (off > optlen - sizeof(struct in_addr))
587 				break;
588 			bcopy(cp + off, (caddr_t)ipaddr.sin_addr,
589 			    sizeof(ipaddr.sin_addr));
590 			/*
591 			 * locate outgoing interface
592 			 */
593 			if ((ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
594 				type = ICMP_UNREACH;
595 				code = ICMP_UNREACH_SRCFAIL;
596 				goto bad;
597 			}
598 			bcopy(&(IA_SIN(ia)->sin_addr), cp + off,
599 				sizeof(struct in_addr));
600 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
601 			break;
602 
603 		case IPOPT_TS:
604 			code = cp - (u_char *)ip;
605 			ipt = (struct ip_timestamp *)cp;
606 			if (ipt->ipt_len < 5)
607 				goto bad;
608 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
609 				if (++ipt->ipt_oflw == 0)
610 					goto bad;
611 				break;
612 			}
613 			sin = (struct in_addr *)(cp+cp[IPOPT_OFFSET]-1);
614 			switch (ipt->ipt_flg) {
615 
616 			case IPOPT_TS_TSONLY:
617 				break;
618 
619 			case IPOPT_TS_TSANDADDR:
620 				if (ipt->ipt_ptr + sizeof(n_time) +
621 				    sizeof(struct in_addr) > ipt->ipt_len)
622 					goto bad;
623 				if (in_ifaddr == 0)
624 					goto bad;	/* ??? */
625 				bcopy((caddr_t)&IA_SIN(in_ifaddr)->sin_addr,
626 				    (caddr_t)sin, sizeof(struct in_addr));
627 				sin++;
628 				break;
629 
630 			case IPOPT_TS_PRESPEC:
631 				bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
632 				    sizeof(struct in_addr));
633 				if (ifa_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
634 					continue;
635 				if (ipt->ipt_ptr + sizeof(n_time) +
636 				    sizeof(struct in_addr) > ipt->ipt_len)
637 					goto bad;
638 				ipt->ipt_ptr += sizeof(struct in_addr);
639 				break;
640 
641 			default:
642 				goto bad;
643 			}
644 			ntime = iptime();
645 			bcopy((caddr_t)&ntime, (caddr_t)sin, sizeof(n_time));
646 			ipt->ipt_ptr += sizeof(n_time);
647 		}
648 	}
649 	return (0);
650 bad:
651 	icmp_error(ip, type, code);
652 	return (1);
653 }
654 
655 /*
656  * Given address of next destination (final or next hop),
657  * return internet address info of interface to be used to get there.
658  */
659 struct in_ifaddr *
660 ip_rtaddr(dst)
661 	 struct in_addr dst;
662 {
663 	register struct sockaddr_in *sin;
664 	register struct in_ifaddr *ia;
665 
666 	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
667 
668 	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
669 		if (ipforward_rt.ro_rt) {
670 			RTFREE(ipforward_rt.ro_rt);
671 			ipforward_rt.ro_rt = 0;
672 		}
673 		sin->sin_family = AF_INET;
674 		sin->sin_addr = dst;
675 
676 		rtalloc(&ipforward_rt);
677 	}
678 	if (ipforward_rt.ro_rt == 0)
679 		return ((struct in_ifaddr *)0);
680 	/*
681 	 * Find address associated with outgoing interface.
682 	 */
683 	for (ia = in_ifaddr; ia; ia = ia->ia_next)
684 		if (ia->ia_ifp == ipforward_rt.ro_rt->rt_ifp)
685 			break;
686 	return (ia);
687 }
688 
689 /*
690  * Save incoming source route for use in replies,
691  * to be picked up later by ip_srcroute if the receiver is interested.
692  */
693 save_rte(option, dst)
694 	caddr_t option;
695 	struct in_addr dst;
696 {
697 	int olen;
698 	extern ipprintfs;
699 
700 	olen = option[IPOPT_OLEN];
701 	if (olen > sizeof(ip_srcrt) - 1) {
702 		if (ipprintfs)
703 			printf("save_rte: olen %d\n", olen);
704 		return;
705 	}
706 	bcopy(option, (caddr_t)ip_srcrt.srcopt, olen);
707 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
708 	ip_srcrt.route[ip_nhops++] = dst;
709 }
710 
711 /*
712  * Retrieve incoming source route for use in replies,
713  * in the same form used by setsockopt.
714  * The first hop is placed before the options, will be removed later.
715  */
716 struct mbuf *
717 ip_srcroute()
718 {
719 	register struct in_addr *p, *q;
720 	register struct mbuf *m;
721 
722 	if (ip_nhops == 0)
723 		return ((struct mbuf *)0);
724 	m = m_get(M_WAIT, MT_SOOPTS);
725 	m->m_len = ip_nhops * sizeof(struct in_addr) + IPOPT_OFFSET + 1 + 1;
726 
727 	/*
728 	 * First save first hop for return route
729 	 */
730 	p = &ip_srcrt.route[ip_nhops - 1];
731 	*(mtod(m, struct in_addr *)) = *p--;
732 
733 	/*
734 	 * Copy option fields and padding (nop) to mbuf.
735 	 */
736 	ip_srcrt.nop = IPOPT_NOP;
737 	bcopy((caddr_t)&ip_srcrt, mtod(m, caddr_t) + sizeof(struct in_addr),
738 	    IPOPT_OFFSET + 1 + 1);
739 	q = (struct in_addr *)(mtod(m, caddr_t) +
740 	    sizeof(struct in_addr) + IPOPT_OFFSET + 1 + 1);
741 	/*
742 	 * Record return path as an IP source route,
743 	 * reversing the path (pointers are now aligned).
744 	 */
745 	while (p >= ip_srcrt.route)
746 		*q++ = *p--;
747 	return (m);
748 }
749 
750 /*
751  * Strip out IP options, at higher
752  * level protocol in the kernel.
753  * Second argument is buffer to which options
754  * will be moved, and return value is their length.
755  */
756 ip_stripoptions(ip, mopt)
757 	struct ip *ip;
758 	struct mbuf *mopt;
759 {
760 	register int i;
761 	register struct mbuf *m;
762 	register caddr_t opts;
763 	int olen;
764 
765 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
766 	m = dtom(ip);
767 	opts = (caddr_t)(ip + 1);
768 	if (mopt) {
769 		mopt->m_len = olen;
770 		mopt->m_off = MMINOFF;
771 		bcopy(opts, mtod(mopt, caddr_t), (unsigned)olen);
772 	}
773 	i = m->m_len - (sizeof (struct ip) + olen);
774 	bcopy(opts  + olen, opts, (unsigned)i);
775 	m->m_len -= olen;
776 	ip->ip_hl = sizeof(struct ip) >> 2;
777 }
778 
779 u_char inetctlerrmap[PRC_NCMDS] = {
780 	0,		0,		0,		0,
781 	0,		0,		EHOSTDOWN,	EHOSTUNREACH,
782 	ENETUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
783 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
784 	0,		0,		0,		0,
785 	ENOPROTOOPT
786 };
787 
788 #ifndef	IPFORWARDING
789 #define	IPFORWARDING	1
790 #endif
791 #ifndef	IPSENDREDIRECTS
792 #define	IPSENDREDIRECTS	1
793 #endif
794 int	ipprintfs = 0;
795 int	ipforwarding = IPFORWARDING;
796 extern	int in_interfaces;
797 int	ipsendredirects = IPSENDREDIRECTS;
798 
799 /*
800  * Forward a packet.  If some error occurs return the sender
801  * an icmp packet.  Note we can't always generate a meaningful
802  * icmp message because icmp doesn't have a large enough repertoire
803  * of codes and types.
804  */
805 ip_forward(ip, ifp)
806 	register struct ip *ip;
807 	struct ifnet *ifp;
808 {
809 	register int error, type = 0, code;
810 	register struct sockaddr_in *sin;
811 	struct mbuf *mcopy;
812 	struct in_addr dest;
813 
814 #ifdef lint
815 	dest.s_addr = 0;
816 #endif
817 	if (ipprintfs)
818 		printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
819 			ip->ip_dst, ip->ip_ttl);
820 	ip->ip_id = htons(ip->ip_id);
821 	if (ipforwarding == 0 || in_interfaces <= 1) {
822 		/* can't tell difference between net and host */
823 		type = ICMP_UNREACH, code = ICMP_UNREACH_NET;
824 		goto sendicmp;
825 	}
826 	if (ip->ip_ttl < IPTTLDEC) {
827 		type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS;
828 		goto sendicmp;
829 	}
830 	ip->ip_ttl -= IPTTLDEC;
831 
832 	/*
833 	 * Save at most 64 bytes of the packet in case
834 	 * we need to generate an ICMP message to the src.
835 	 */
836 	mcopy = m_copy(dtom(ip), 0, imin(ip->ip_len, 64));
837 
838 	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
839 	if (ipforward_rt.ro_rt == 0 ||
840 	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
841 		if (ipforward_rt.ro_rt) {
842 			RTFREE(ipforward_rt.ro_rt);
843 			ipforward_rt.ro_rt = 0;
844 		}
845 		sin->sin_family = AF_INET;
846 		sin->sin_addr = ip->ip_dst;
847 
848 		rtalloc(&ipforward_rt);
849 	}
850 	/*
851 	 * If forwarding packet using same interface that it came in on,
852 	 * perhaps should send a redirect to sender to shortcut a hop.
853 	 * Only send redirect if source is sending directly to us,
854 	 * and if packet was not source routed (or has any options).
855 	 */
856 	if (ipforward_rt.ro_rt && ipforward_rt.ro_rt->rt_ifp == ifp &&
857 	    ipsendredirects && ip->ip_hl == (sizeof(struct ip) >> 2)) {
858 		struct in_ifaddr *ia;
859 		extern struct in_ifaddr *ifptoia();
860 		u_long src = ntohl(ip->ip_src.s_addr);
861 		u_long dst = ntohl(ip->ip_dst.s_addr);
862 
863 		if ((ia = ifptoia(ifp)) &&
864 		   (src & ia->ia_subnetmask) == ia->ia_subnet) {
865 		    if (ipforward_rt.ro_rt->rt_flags & RTF_GATEWAY)
866 			dest = satosin(&ipforward_rt.ro_rt->rt_gateway)->sin_addr;
867 		    else
868 			dest = ip->ip_dst;
869 		    /*
870 		     * If the destination is reached by a route to host,
871 		     * is directly on the attached net (!),
872 		     * or if the destination is on a subnet of a local net
873 		     * not known to the source net, use host redirect.
874 		     * (We may be the correct first hop for other subnets.)
875 		     */
876 		    type = ICMP_REDIRECT;
877 		    code = ICMP_REDIRECT_NET;
878 		    if ((ipforward_rt.ro_rt->rt_flags & RTF_HOST) ||
879 		       (ipforward_rt.ro_rt->rt_flags & RTF_GATEWAY) == 0)
880 			code = ICMP_REDIRECT_HOST;
881 		    else for (ia = in_ifaddr; ia = ia->ia_next; )
882 			if ((dst & ia->ia_netmask) == ia->ia_net) {
883 			    if ((src & ia->ia_netmask) != ia->ia_net)
884 				code = ICMP_REDIRECT_HOST;
885 			    break;
886 			}
887 		    if (ipprintfs)
888 		        printf("redirect (%d) to %x\n", code, dest);
889 		}
890 	}
891 
892 	error = ip_output(dtom(ip), (struct mbuf *)0, &ipforward_rt,
893 		IP_FORWARDING);
894 	if (error)
895 		ipstat.ips_cantforward++;
896 	else if (type)
897 		ipstat.ips_redirectsent++;
898 	else {
899 		if (mcopy)
900 			m_freem(mcopy);
901 		ipstat.ips_forward++;
902 		return;
903 	}
904 	if (mcopy == NULL)
905 		return;
906 	ip = mtod(mcopy, struct ip *);
907 	type = ICMP_UNREACH;
908 	switch (error) {
909 
910 	case 0:				/* forwarded, but need redirect */
911 		type = ICMP_REDIRECT;
912 		/* code set above */
913 		break;
914 
915 	case ENETUNREACH:
916 	case ENETDOWN:
917 		code = ICMP_UNREACH_NET;
918 		break;
919 
920 	case EMSGSIZE:
921 		code = ICMP_UNREACH_NEEDFRAG;
922 		break;
923 
924 	case EPERM:
925 		code = ICMP_UNREACH_PORT;
926 		break;
927 
928 	case ENOBUFS:
929 		type = ICMP_SOURCEQUENCH;
930 		break;
931 
932 	case EHOSTDOWN:
933 	case EHOSTUNREACH:
934 		code = ICMP_UNREACH_HOST;
935 		break;
936 	}
937 sendicmp:
938 	icmp_error(ip, type, code, dest);
939 }
940