xref: /csrg-svn/sys/netinet/ip_input.c (revision 28944)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)ip_input.c	6.23 (Berkeley) 06/02/86
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "mbuf.h"
12 #include "domain.h"
13 #include "protosw.h"
14 #include "socket.h"
15 #include "errno.h"
16 #include "time.h"
17 #include "kernel.h"
18 
19 #include "../net/if.h"
20 #include "../net/route.h"
21 
22 #include "in.h"
23 #include "in_pcb.h"
24 #include "in_systm.h"
25 #include "in_var.h"
26 #include "ip.h"
27 #include "ip_var.h"
28 #include "ip_icmp.h"
29 #include "tcp.h"
30 
31 u_char	ip_protox[IPPROTO_MAX];
32 int	ipqmaxlen = IFQ_MAXLEN;
33 struct	in_ifaddr *in_ifaddr;			/* first inet address */
34 
35 /*
36  * We need to save the IP options in case a protocol wants to respond
37  * to an incoming packet over the same route if the packet got here
38  * using IP source routing.  This allows connection establishment and
39  * maintenance when the remote end is on a network that is not known
40  * to us.
41  */
42 int	ip_nhops = 0;
43 static	struct ip_srcrt {
44 	char	nop;				/* one NOP to align */
45 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
46 	struct	in_addr route[MAX_IPOPTLEN];
47 } ip_srcrt;
48 
49 /*
50  * IP initialization: fill in IP protocol switch table.
51  * All protocols not implemented in kernel go to raw IP protocol handler.
52  */
53 ip_init()
54 {
55 	register struct protosw *pr;
56 	register int i;
57 
58 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
59 	if (pr == 0)
60 		panic("ip_init");
61 	for (i = 0; i < IPPROTO_MAX; i++)
62 		ip_protox[i] = pr - inetsw;
63 	for (pr = inetdomain.dom_protosw;
64 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
65 		if (pr->pr_domain->dom_family == PF_INET &&
66 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
67 			ip_protox[pr->pr_protocol] = pr - inetsw;
68 	ipq.next = ipq.prev = &ipq;
69 	ip_id = time.tv_sec & 0xffff;
70 	ipintrq.ifq_maxlen = ipqmaxlen;
71 }
72 
73 u_char	ipcksum = 1;
74 struct	ip *ip_reass();
75 struct	sockaddr_in ipaddr = { AF_INET };
76 struct	route ipforward_rt;
77 
78 /*
79  * Ip input routine.  Checksum and byte swap header.  If fragmented
80  * try to reassamble.  If complete and fragment queue exists, discard.
81  * Process options.  Pass to next level.
82  */
83 ipintr()
84 {
85 	register struct ip *ip;
86 	register struct mbuf *m;
87 	struct mbuf *m0;
88 	register int i;
89 	register struct ipq *fp;
90 	register struct in_ifaddr *ia;
91 	struct ifnet *ifp;
92 	int hlen, s;
93 
94 next:
95 	/*
96 	 * Get next datagram off input queue and get IP header
97 	 * in first mbuf.
98 	 */
99 	s = splimp();
100 	IF_DEQUEUEIF(&ipintrq, m, ifp);
101 	splx(s);
102 	if (m == 0)
103 		return;
104 	/*
105 	 * If no IP addresses have been set yet but the interfaces
106 	 * are receiving, can't do anything with incoming packets yet.
107 	 */
108 	if (in_ifaddr == NULL)
109 		goto bad;
110 	ipstat.ips_total++;
111 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
112 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
113 		ipstat.ips_toosmall++;
114 		goto next;
115 	}
116 	ip = mtod(m, struct ip *);
117 	hlen = ip->ip_hl << 2;
118 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
119 		ipstat.ips_badhlen++;
120 		goto bad;
121 	}
122 	if (hlen > m->m_len) {
123 		if ((m = m_pullup(m, hlen)) == 0) {
124 			ipstat.ips_badhlen++;
125 			goto next;
126 		}
127 		ip = mtod(m, struct ip *);
128 	}
129 	if (ipcksum)
130 		if (ip->ip_sum = in_cksum(m, hlen)) {
131 			ipstat.ips_badsum++;
132 			goto bad;
133 		}
134 
135 	/*
136 	 * Convert fields to host representation.
137 	 */
138 	ip->ip_len = ntohs((u_short)ip->ip_len);
139 	if (ip->ip_len < hlen) {
140 		ipstat.ips_badlen++;
141 		goto bad;
142 	}
143 	ip->ip_id = ntohs(ip->ip_id);
144 	ip->ip_off = ntohs((u_short)ip->ip_off);
145 
146 	/*
147 	 * Check that the amount of data in the buffers
148 	 * is as at least much as the IP header would have us expect.
149 	 * Trim mbufs if longer than we expect.
150 	 * Drop packet if shorter than we expect.
151 	 */
152 	i = -(u_short)ip->ip_len;
153 	m0 = m;
154 	for (;;) {
155 		i += m->m_len;
156 		if (m->m_next == 0)
157 			break;
158 		m = m->m_next;
159 	}
160 	if (i != 0) {
161 		if (i < 0) {
162 			ipstat.ips_tooshort++;
163 			m = m0;
164 			goto bad;
165 		}
166 		if (i <= m->m_len)
167 			m->m_len -= i;
168 		else
169 			m_adj(m0, -i);
170 	}
171 	m = m0;
172 
173 	/*
174 	 * Process options and, if not destined for us,
175 	 * ship it on.  ip_dooptions returns 1 when an
176 	 * error was detected (causing an icmp message
177 	 * to be sent and the original packet to be freed).
178 	 */
179 	ip_nhops = 0;		/* for source routed packets */
180 	if (hlen > sizeof (struct ip) && ip_dooptions(ip, ifp))
181 		goto next;
182 
183 	/*
184 	 * Check our list of addresses, to see if the packet is for us.
185 	 */
186 	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
187 #define	satosin(sa)	((struct sockaddr_in *)(sa))
188 
189 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
190 			goto ours;
191 		if (
192 #ifdef	DIRECTED_BROADCAST
193 		    ia->ia_ifp == ifp &&
194 #endif
195 		    (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
196 			u_long t;
197 
198 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
199 			    ip->ip_dst.s_addr)
200 				goto ours;
201 			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
202 				goto ours;
203 			/*
204 			 * Look for all-0's host part (old broadcast addr),
205 			 * either for subnet or net.
206 			 */
207 			t = ntohl(ip->ip_dst.s_addr);
208 			if (t == ia->ia_subnet)
209 				goto ours;
210 			if (t == ia->ia_net)
211 				goto ours;
212 		}
213 	}
214 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
215 		goto ours;
216 	if (ip->ip_dst.s_addr == INADDR_ANY)
217 		goto ours;
218 
219 	/*
220 	 * Not for us; forward if possible and desirable.
221 	 */
222 	ip_forward(ip, ifp);
223 	goto next;
224 
225 ours:
226 	/*
227 	 * Look for queue of fragments
228 	 * of this datagram.
229 	 */
230 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
231 		if (ip->ip_id == fp->ipq_id &&
232 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
233 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
234 		    ip->ip_p == fp->ipq_p)
235 			goto found;
236 	fp = 0;
237 found:
238 
239 	/*
240 	 * Adjust ip_len to not reflect header,
241 	 * set ip_mff if more fragments are expected,
242 	 * convert offset of this to bytes.
243 	 */
244 	ip->ip_len -= hlen;
245 	((struct ipasfrag *)ip)->ipf_mff = 0;
246 	if (ip->ip_off & IP_MF)
247 		((struct ipasfrag *)ip)->ipf_mff = 1;
248 	ip->ip_off <<= 3;
249 
250 	/*
251 	 * If datagram marked as having more fragments
252 	 * or if this is not the first fragment,
253 	 * attempt reassembly; if it succeeds, proceed.
254 	 */
255 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
256 		ipstat.ips_fragments++;
257 		ip = ip_reass((struct ipasfrag *)ip, fp);
258 		if (ip == 0)
259 			goto next;
260 		m = dtom(ip);
261 	} else
262 		if (fp)
263 			ip_freef(fp);
264 
265 	/*
266 	 * Switch out to protocol's input routine.
267 	 */
268 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, ifp);
269 	goto next;
270 bad:
271 	m_freem(m);
272 	goto next;
273 }
274 
275 /*
276  * Take incoming datagram fragment and try to
277  * reassemble it into whole datagram.  If a chain for
278  * reassembly of this datagram already exists, then it
279  * is given as fp; otherwise have to make a chain.
280  */
281 struct ip *
282 ip_reass(ip, fp)
283 	register struct ipasfrag *ip;
284 	register struct ipq *fp;
285 {
286 	register struct mbuf *m = dtom(ip);
287 	register struct ipasfrag *q;
288 	struct mbuf *t;
289 	int hlen = ip->ip_hl << 2;
290 	int i, next;
291 
292 	/*
293 	 * Presence of header sizes in mbufs
294 	 * would confuse code below.
295 	 */
296 	m->m_off += hlen;
297 	m->m_len -= hlen;
298 
299 	/*
300 	 * If first fragment to arrive, create a reassembly queue.
301 	 */
302 	if (fp == 0) {
303 		if ((t = m_get(M_WAIT, MT_FTABLE)) == NULL)
304 			goto dropfrag;
305 		fp = mtod(t, struct ipq *);
306 		insque(fp, &ipq);
307 		fp->ipq_ttl = IPFRAGTTL;
308 		fp->ipq_p = ip->ip_p;
309 		fp->ipq_id = ip->ip_id;
310 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
311 		fp->ipq_src = ((struct ip *)ip)->ip_src;
312 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
313 		q = (struct ipasfrag *)fp;
314 		goto insert;
315 	}
316 
317 	/*
318 	 * Find a segment which begins after this one does.
319 	 */
320 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
321 		if (q->ip_off > ip->ip_off)
322 			break;
323 
324 	/*
325 	 * If there is a preceding segment, it may provide some of
326 	 * our data already.  If so, drop the data from the incoming
327 	 * segment.  If it provides all of our data, drop us.
328 	 */
329 	if (q->ipf_prev != (struct ipasfrag *)fp) {
330 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
331 		if (i > 0) {
332 			if (i >= ip->ip_len)
333 				goto dropfrag;
334 			m_adj(dtom(ip), i);
335 			ip->ip_off += i;
336 			ip->ip_len -= i;
337 		}
338 	}
339 
340 	/*
341 	 * While we overlap succeeding segments trim them or,
342 	 * if they are completely covered, dequeue them.
343 	 */
344 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
345 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
346 		if (i < q->ip_len) {
347 			q->ip_len -= i;
348 			q->ip_off += i;
349 			m_adj(dtom(q), i);
350 			break;
351 		}
352 		q = q->ipf_next;
353 		m_freem(dtom(q->ipf_prev));
354 		ip_deq(q->ipf_prev);
355 	}
356 
357 insert:
358 	/*
359 	 * Stick new segment in its place;
360 	 * check for complete reassembly.
361 	 */
362 	ip_enq(ip, q->ipf_prev);
363 	next = 0;
364 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
365 		if (q->ip_off != next)
366 			return (0);
367 		next += q->ip_len;
368 	}
369 	if (q->ipf_prev->ipf_mff)
370 		return (0);
371 
372 	/*
373 	 * Reassembly is complete; concatenate fragments.
374 	 */
375 	q = fp->ipq_next;
376 	m = dtom(q);
377 	t = m->m_next;
378 	m->m_next = 0;
379 	m_cat(m, t);
380 	q = q->ipf_next;
381 	while (q != (struct ipasfrag *)fp) {
382 		t = dtom(q);
383 		q = q->ipf_next;
384 		m_cat(m, t);
385 	}
386 
387 	/*
388 	 * Create header for new ip packet by
389 	 * modifying header of first packet;
390 	 * dequeue and discard fragment reassembly header.
391 	 * Make header visible.
392 	 */
393 	ip = fp->ipq_next;
394 	ip->ip_len = next;
395 	((struct ip *)ip)->ip_src = fp->ipq_src;
396 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
397 	remque(fp);
398 	(void) m_free(dtom(fp));
399 	m = dtom(ip);
400 	m->m_len += (ip->ip_hl << 2);
401 	m->m_off -= (ip->ip_hl << 2);
402 	return ((struct ip *)ip);
403 
404 dropfrag:
405 	ipstat.ips_fragdropped++;
406 	m_freem(m);
407 	return (0);
408 }
409 
410 /*
411  * Free a fragment reassembly header and all
412  * associated datagrams.
413  */
414 ip_freef(fp)
415 	struct ipq *fp;
416 {
417 	register struct ipasfrag *q, *p;
418 
419 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
420 		p = q->ipf_next;
421 		ip_deq(q);
422 		m_freem(dtom(q));
423 	}
424 	remque(fp);
425 	(void) m_free(dtom(fp));
426 }
427 
428 /*
429  * Put an ip fragment on a reassembly chain.
430  * Like insque, but pointers in middle of structure.
431  */
432 ip_enq(p, prev)
433 	register struct ipasfrag *p, *prev;
434 {
435 
436 	p->ipf_prev = prev;
437 	p->ipf_next = prev->ipf_next;
438 	prev->ipf_next->ipf_prev = p;
439 	prev->ipf_next = p;
440 }
441 
442 /*
443  * To ip_enq as remque is to insque.
444  */
445 ip_deq(p)
446 	register struct ipasfrag *p;
447 {
448 
449 	p->ipf_prev->ipf_next = p->ipf_next;
450 	p->ipf_next->ipf_prev = p->ipf_prev;
451 }
452 
453 /*
454  * IP timer processing;
455  * if a timer expires on a reassembly
456  * queue, discard it.
457  */
458 ip_slowtimo()
459 {
460 	register struct ipq *fp;
461 	int s = splnet();
462 
463 	fp = ipq.next;
464 	if (fp == 0) {
465 		splx(s);
466 		return;
467 	}
468 	while (fp != &ipq) {
469 		--fp->ipq_ttl;
470 		fp = fp->next;
471 		if (fp->prev->ipq_ttl == 0) {
472 			ipstat.ips_fragtimeout++;
473 			ip_freef(fp->prev);
474 		}
475 	}
476 	splx(s);
477 }
478 
479 /*
480  * Drain off all datagram fragments.
481  */
482 ip_drain()
483 {
484 
485 	while (ipq.next != &ipq) {
486 		ipstat.ips_fragdropped++;
487 		ip_freef(ipq.next);
488 	}
489 }
490 
491 struct in_ifaddr *ip_rtaddr();
492 
493 /*
494  * Do option processing on a datagram,
495  * possibly discarding it if bad options
496  * are encountered.
497  */
498 ip_dooptions(ip, ifp)
499 	register struct ip *ip;
500 	struct ifnet *ifp;
501 {
502 	register u_char *cp;
503 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB;
504 	register struct ip_timestamp *ipt;
505 	register struct in_ifaddr *ia;
506 	struct in_addr *sin;
507 	n_time ntime;
508 
509 	cp = (u_char *)(ip + 1);
510 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
511 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
512 		opt = cp[IPOPT_OPTVAL];
513 		if (opt == IPOPT_EOL)
514 			break;
515 		if (opt == IPOPT_NOP)
516 			optlen = 1;
517 		else {
518 			optlen = cp[IPOPT_OLEN];
519 			if (optlen <= 0 || optlen > cnt) {
520 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
521 				goto bad;
522 			}
523 		}
524 		switch (opt) {
525 
526 		default:
527 			break;
528 
529 		/*
530 		 * Source routing with record.
531 		 * Find interface with current destination address.
532 		 * If none on this machine then drop if strictly routed,
533 		 * or do nothing if loosely routed.
534 		 * Record interface address and bring up next address
535 		 * component.  If strictly routed make sure next
536 		 * address on directly accessible net.
537 		 */
538 		case IPOPT_LSRR:
539 		case IPOPT_SSRR:
540 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
541 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
542 				goto bad;
543 			}
544 			ipaddr.sin_addr = ip->ip_dst;
545 			ia = (struct in_ifaddr *)
546 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
547 			if (ia == 0) {
548 				if (opt == IPOPT_SSRR) {
549 					type = ICMP_UNREACH;
550 					code = ICMP_UNREACH_SRCFAIL;
551 					goto bad;
552 				}
553 				/*
554 				 * Loose routing, and not at next destination
555 				 * yet; nothing to do except forward.
556 				 */
557 				break;
558 			}
559 			off--;			/* 0 origin */
560 			if (off > optlen - sizeof(struct in_addr)) {
561 				/*
562 				 * End of source route.  Should be for us.
563 				 */
564 				save_rte(cp, ip->ip_src);
565 				break;
566 			}
567 			/*
568 			 * locate outgoing interface
569 			 */
570 			bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
571 			    sizeof(ipaddr.sin_addr));
572 			if ((opt == IPOPT_SSRR &&
573 			    in_iaonnetof(in_netof(ipaddr.sin_addr)) == 0) ||
574 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
575 				type = ICMP_UNREACH;
576 				code = ICMP_UNREACH_SRCFAIL;
577 				goto bad;
578 			}
579 			ip->ip_dst = ipaddr.sin_addr;
580 			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
581 			    (caddr_t)(cp + off), sizeof(struct in_addr));
582 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
583 			break;
584 
585 		case IPOPT_RR:
586 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
587 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
588 				goto bad;
589 			}
590 			/*
591 			 * If no space remains, ignore.
592 			 */
593 			off--;			/* 0 origin */
594 			if (off > optlen - sizeof(struct in_addr))
595 				break;
596 			bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
597 			    sizeof(ipaddr.sin_addr));
598 			/*
599 			 * locate outgoing interface
600 			 */
601 			if ((ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
602 				type = ICMP_UNREACH;
603 				code = ICMP_UNREACH_SRCFAIL;
604 				goto bad;
605 			}
606 			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
607 			    (caddr_t)(cp + off), sizeof(struct in_addr));
608 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
609 			break;
610 
611 		case IPOPT_TS:
612 			code = cp - (u_char *)ip;
613 			ipt = (struct ip_timestamp *)cp;
614 			if (ipt->ipt_len < 5)
615 				goto bad;
616 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
617 				if (++ipt->ipt_oflw == 0)
618 					goto bad;
619 				break;
620 			}
621 			sin = (struct in_addr *)(cp+cp[IPOPT_OFFSET]-1);
622 			switch (ipt->ipt_flg) {
623 
624 			case IPOPT_TS_TSONLY:
625 				break;
626 
627 			case IPOPT_TS_TSANDADDR:
628 				if (ipt->ipt_ptr + sizeof(n_time) +
629 				    sizeof(struct in_addr) > ipt->ipt_len)
630 					goto bad;
631 				if (in_ifaddr == 0)
632 					goto bad;	/* ??? */
633 				bcopy((caddr_t)&IA_SIN(in_ifaddr)->sin_addr,
634 				    (caddr_t)sin, sizeof(struct in_addr));
635 				sin++;
636 				break;
637 
638 			case IPOPT_TS_PRESPEC:
639 				bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
640 				    sizeof(struct in_addr));
641 				if (ifa_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
642 					continue;
643 				if (ipt->ipt_ptr + sizeof(n_time) +
644 				    sizeof(struct in_addr) > ipt->ipt_len)
645 					goto bad;
646 				ipt->ipt_ptr += sizeof(struct in_addr);
647 				break;
648 
649 			default:
650 				goto bad;
651 			}
652 			ntime = iptime();
653 			bcopy((caddr_t)&ntime, (caddr_t)sin, sizeof(n_time));
654 			ipt->ipt_ptr += sizeof(n_time);
655 		}
656 	}
657 	return (0);
658 bad:
659 	icmp_error(ip, type, code, ifp);
660 	return (1);
661 }
662 
663 /*
664  * Given address of next destination (final or next hop),
665  * return internet address info of interface to be used to get there.
666  */
667 struct in_ifaddr *
668 ip_rtaddr(dst)
669 	 struct in_addr dst;
670 {
671 	register struct sockaddr_in *sin;
672 	register struct in_ifaddr *ia;
673 
674 	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
675 
676 	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
677 		if (ipforward_rt.ro_rt) {
678 			RTFREE(ipforward_rt.ro_rt);
679 			ipforward_rt.ro_rt = 0;
680 		}
681 		sin->sin_family = AF_INET;
682 		sin->sin_addr = dst;
683 
684 		rtalloc(&ipforward_rt);
685 	}
686 	if (ipforward_rt.ro_rt == 0)
687 		return ((struct in_ifaddr *)0);
688 	/*
689 	 * Find address associated with outgoing interface.
690 	 */
691 	for (ia = in_ifaddr; ia; ia = ia->ia_next)
692 		if (ia->ia_ifp == ipforward_rt.ro_rt->rt_ifp)
693 			break;
694 	return (ia);
695 }
696 
697 /*
698  * Save incoming source route for use in replies,
699  * to be picked up later by ip_srcroute if the receiver is interested.
700  */
701 save_rte(option, dst)
702 	u_char *option;
703 	struct in_addr dst;
704 {
705 	unsigned olen;
706 	extern ipprintfs;
707 
708 	olen = option[IPOPT_OLEN];
709 	if (olen > sizeof(ip_srcrt) - 1) {
710 		if (ipprintfs)
711 			printf("save_rte: olen %d\n", olen);
712 		return;
713 	}
714 	bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
715 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
716 	ip_srcrt.route[ip_nhops++] = dst;
717 }
718 
719 /*
720  * Retrieve incoming source route for use in replies,
721  * in the same form used by setsockopt.
722  * The first hop is placed before the options, will be removed later.
723  */
724 struct mbuf *
725 ip_srcroute()
726 {
727 	register struct in_addr *p, *q;
728 	register struct mbuf *m;
729 
730 	if (ip_nhops == 0)
731 		return ((struct mbuf *)0);
732 	m = m_get(M_WAIT, MT_SOOPTS);
733 	m->m_len = ip_nhops * sizeof(struct in_addr) + IPOPT_OFFSET + 1 + 1;
734 
735 	/*
736 	 * First save first hop for return route
737 	 */
738 	p = &ip_srcrt.route[ip_nhops - 1];
739 	*(mtod(m, struct in_addr *)) = *p--;
740 
741 	/*
742 	 * Copy option fields and padding (nop) to mbuf.
743 	 */
744 	ip_srcrt.nop = IPOPT_NOP;
745 	bcopy((caddr_t)&ip_srcrt, mtod(m, caddr_t) + sizeof(struct in_addr),
746 	    IPOPT_OFFSET + 1 + 1);
747 	q = (struct in_addr *)(mtod(m, caddr_t) +
748 	    sizeof(struct in_addr) + IPOPT_OFFSET + 1 + 1);
749 	/*
750 	 * Record return path as an IP source route,
751 	 * reversing the path (pointers are now aligned).
752 	 */
753 	while (p >= ip_srcrt.route)
754 		*q++ = *p--;
755 	return (m);
756 }
757 
758 /*
759  * Strip out IP options, at higher
760  * level protocol in the kernel.
761  * Second argument is buffer to which options
762  * will be moved, and return value is their length.
763  */
764 ip_stripoptions(ip, mopt)
765 	struct ip *ip;
766 	struct mbuf *mopt;
767 {
768 	register int i;
769 	register struct mbuf *m;
770 	register caddr_t opts;
771 	int olen;
772 
773 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
774 	m = dtom(ip);
775 	opts = (caddr_t)(ip + 1);
776 	if (mopt) {
777 		mopt->m_len = olen;
778 		mopt->m_off = MMINOFF;
779 		bcopy(opts, mtod(mopt, caddr_t), (unsigned)olen);
780 	}
781 	i = m->m_len - (sizeof (struct ip) + olen);
782 	bcopy(opts  + olen, opts, (unsigned)i);
783 	m->m_len -= olen;
784 	ip->ip_hl = sizeof(struct ip) >> 2;
785 }
786 
787 u_char inetctlerrmap[PRC_NCMDS] = {
788 	0,		0,		0,		0,
789 	0,		0,		EHOSTDOWN,	EHOSTUNREACH,
790 	ENETUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
791 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
792 	0,		0,		0,		0,
793 	ENOPROTOOPT
794 };
795 
796 #ifndef	IPFORWARDING
797 #define	IPFORWARDING	1
798 #endif
799 #ifndef	IPSENDREDIRECTS
800 #define	IPSENDREDIRECTS	1
801 #endif
802 int	ipprintfs = 0;
803 int	ipforwarding = IPFORWARDING;
804 extern	int in_interfaces;
805 int	ipsendredirects = IPSENDREDIRECTS;
806 
807 /*
808  * Forward a packet.  If some error occurs return the sender
809  * an icmp packet.  Note we can't always generate a meaningful
810  * icmp message because icmp doesn't have a large enough repertoire
811  * of codes and types.
812  *
813  * If not forwarding (possibly because we have only a single external
814  * network), just drop the packet.  This could be confusing if ipforwarding
815  * was zero but some routing protocol was advancing us as a gateway
816  * to somewhere.  However, we must let the routing protocol deal with that.
817  */
818 ip_forward(ip, ifp)
819 	register struct ip *ip;
820 	struct ifnet *ifp;
821 {
822 	register int error, type = 0, code;
823 	register struct sockaddr_in *sin;
824 	struct mbuf *mcopy;
825 	struct in_addr dest;
826 
827 	dest.s_addr = 0;
828 	if (ipprintfs)
829 		printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
830 			ip->ip_dst, ip->ip_ttl);
831 	ip->ip_id = htons(ip->ip_id);
832 	if (ipforwarding == 0 || in_interfaces <= 1) {
833 		ipstat.ips_cantforward++;
834 #ifdef GATEWAY
835 		type = ICMP_UNREACH, code = ICMP_UNREACH_NET;
836 		goto sendicmp;
837 #else
838 		m_freem(dtom(ip));
839 		return;
840 #endif
841 	}
842 	if (ip->ip_ttl < IPTTLDEC) {
843 		type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS;
844 		goto sendicmp;
845 	}
846 	ip->ip_ttl -= IPTTLDEC;
847 
848 	/*
849 	 * Save at most 64 bytes of the packet in case
850 	 * we need to generate an ICMP message to the src.
851 	 */
852 	mcopy = m_copy(dtom(ip), 0, imin((int)ip->ip_len, 64));
853 
854 	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
855 	if (ipforward_rt.ro_rt == 0 ||
856 	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
857 		if (ipforward_rt.ro_rt) {
858 			RTFREE(ipforward_rt.ro_rt);
859 			ipforward_rt.ro_rt = 0;
860 		}
861 		sin->sin_family = AF_INET;
862 		sin->sin_addr = ip->ip_dst;
863 
864 		rtalloc(&ipforward_rt);
865 	}
866 	/*
867 	 * If forwarding packet using same interface that it came in on,
868 	 * perhaps should send a redirect to sender to shortcut a hop.
869 	 * Only send redirect if source is sending directly to us,
870 	 * and if packet was not source routed (or has any options).
871 	 */
872 	if (ipforward_rt.ro_rt && ipforward_rt.ro_rt->rt_ifp == ifp &&
873 	    ipsendredirects && ip->ip_hl == (sizeof(struct ip) >> 2)) {
874 		struct in_ifaddr *ia;
875 		extern struct in_ifaddr *ifptoia();
876 		u_long src = ntohl(ip->ip_src.s_addr);
877 		u_long dst = ntohl(ip->ip_dst.s_addr);
878 
879 		if ((ia = ifptoia(ifp)) &&
880 		   (src & ia->ia_subnetmask) == ia->ia_subnet) {
881 		    if (ipforward_rt.ro_rt->rt_flags & RTF_GATEWAY)
882 			dest = satosin(&ipforward_rt.ro_rt->rt_gateway)->sin_addr;
883 		    else
884 			dest = ip->ip_dst;
885 		    /*
886 		     * If the destination is reached by a route to host,
887 		     * is on a subnet of a local net, or is directly
888 		     * on the attached net (!), use host redirect.
889 		     * (We may be the correct first hop for other subnets.)
890 		     */
891 		    type = ICMP_REDIRECT;
892 		    code = ICMP_REDIRECT_NET;
893 		    if ((ipforward_rt.ro_rt->rt_flags & RTF_HOST) ||
894 		       (ipforward_rt.ro_rt->rt_flags & RTF_GATEWAY) == 0)
895 			code = ICMP_REDIRECT_HOST;
896 		    else for (ia = in_ifaddr; ia = ia->ia_next; )
897 			if ((dst & ia->ia_netmask) == ia->ia_net) {
898 			    if (ia->ia_subnetmask != ia->ia_netmask)
899 				    code = ICMP_REDIRECT_HOST;
900 			    break;
901 			}
902 		    if (ipprintfs)
903 		        printf("redirect (%d) to %x\n", code, dest);
904 		}
905 	}
906 
907 	error = ip_output(dtom(ip), (struct mbuf *)0, &ipforward_rt,
908 		IP_FORWARDING);
909 	if (error)
910 		ipstat.ips_cantforward++;
911 	else if (type)
912 		ipstat.ips_redirectsent++;
913 	else {
914 		if (mcopy)
915 			m_freem(mcopy);
916 		ipstat.ips_forward++;
917 		return;
918 	}
919 	if (mcopy == NULL)
920 		return;
921 	ip = mtod(mcopy, struct ip *);
922 	type = ICMP_UNREACH;
923 	switch (error) {
924 
925 	case 0:				/* forwarded, but need redirect */
926 		type = ICMP_REDIRECT;
927 		/* code set above */
928 		break;
929 
930 	case ENETUNREACH:
931 	case ENETDOWN:
932 		code = ICMP_UNREACH_NET;
933 		break;
934 
935 	case EMSGSIZE:
936 		code = ICMP_UNREACH_NEEDFRAG;
937 		break;
938 
939 	case EPERM:
940 		code = ICMP_UNREACH_PORT;
941 		break;
942 
943 	case ENOBUFS:
944 		type = ICMP_SOURCEQUENCH;
945 		break;
946 
947 	case EHOSTDOWN:
948 	case EHOSTUNREACH:
949 		code = ICMP_UNREACH_HOST;
950 		break;
951 	}
952 sendicmp:
953 	icmp_error(ip, type, code, ifp, dest);
954 }
955