xref: /netbsd-src/sys/netinet/ip_icmp.c (revision 3b01aba77a7a698587faaae455bbfe740923c1f5)
1 /*	$NetBSD: ip_icmp.c,v 1.60 2001/03/08 00:17:05 itojun Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to The NetBSD Foundation
37  * by Public Access Networks Corporation ("Panix").  It was developed under
38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39  *
40  * This code is derived from software contributed to The NetBSD Foundation
41  * by Jason R. Thorpe of Zembu Labs, Inc.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the NetBSD
54  *	Foundation, Inc. and its contributors.
55  * 4. Neither the name of The NetBSD Foundation nor the names of its
56  *    contributors may be used to endorse or promote products derived
57  *    from this software without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
60  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69  * POSSIBILITY OF SUCH DAMAGE.
70  */
71 
72 /*
73  * Copyright (c) 1982, 1986, 1988, 1993
74  *	The Regents of the University of California.  All rights reserved.
75  *
76  * Redistribution and use in source and binary forms, with or without
77  * modification, are permitted provided that the following conditions
78  * are met:
79  * 1. Redistributions of source code must retain the above copyright
80  *    notice, this list of conditions and the following disclaimer.
81  * 2. Redistributions in binary form must reproduce the above copyright
82  *    notice, this list of conditions and the following disclaimer in the
83  *    documentation and/or other materials provided with the distribution.
84  * 3. All advertising materials mentioning features or use of this software
85  *    must display the following acknowledgement:
86  *	This product includes software developed by the University of
87  *	California, Berkeley and its contributors.
88  * 4. Neither the name of the University nor the names of its contributors
89  *    may be used to endorse or promote products derived from this software
90  *    without specific prior written permission.
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102  * SUCH DAMAGE.
103  *
104  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
105  */
106 
107 #include "opt_ipsec.h"
108 
109 #include <sys/param.h>
110 #include <sys/systm.h>
111 #include <sys/malloc.h>
112 #include <sys/mbuf.h>
113 #include <sys/protosw.h>
114 #include <sys/socket.h>
115 #include <sys/time.h>
116 #include <sys/kernel.h>
117 #include <sys/proc.h>
118 
119 #include <uvm/uvm_extern.h>
120 
121 #include <sys/sysctl.h>
122 
123 #include <net/if.h>
124 #include <net/route.h>
125 
126 #include <netinet/in.h>
127 #include <netinet/in_systm.h>
128 #include <netinet/in_var.h>
129 #include <netinet/ip.h>
130 #include <netinet/ip_icmp.h>
131 #include <netinet/ip_var.h>
132 #include <netinet/in_pcb.h>
133 #include <netinet/icmp_var.h>
134 
135 #ifdef IPSEC
136 #include <netinet6/ipsec.h>
137 #include <netkey/key.h>
138 #endif
139 
140 #include <machine/stdarg.h>
141 
142 /*
143  * ICMP routines: error generation, receive packet processing, and
144  * routines to turnaround packets back to the originator, and
145  * host table maintenance routines.
146  */
147 
148 int	icmpmaskrepl = 0;
149 #ifdef ICMPPRINTFS
150 int	icmpprintfs = 0;
151 #endif
152 int	icmpreturndatabytes = 8;
153 
154 /*
155  * List of callbacks to notify when Path MTU changes are made.
156  */
157 struct icmp_mtudisc_callback {
158 	LIST_ENTRY(icmp_mtudisc_callback) mc_list;
159 	void (*mc_func) __P((struct in_addr));
160 };
161 
162 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
163     LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
164 
165 #if 0
166 static int	ip_next_mtu __P((int, int));
167 #else
168 /*static*/ int	ip_next_mtu __P((int, int));
169 #endif
170 
171 extern int icmperrppslim;
172 static int icmperrpps_count = 0;
173 static struct timeval icmperrppslim_last;
174 
175 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
176 
177 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
178 
179 /*
180  * Register a Path MTU Discovery callback.
181  */
182 void
183 icmp_mtudisc_callback_register(func)
184 	void (*func) __P((struct in_addr));
185 {
186 	struct icmp_mtudisc_callback *mc;
187 
188 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
189 	     mc = LIST_NEXT(mc, mc_list)) {
190 		if (mc->mc_func == func)
191 			return;
192 	}
193 
194 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
195 	if (mc == NULL)
196 		panic("icmp_mtudisc_callback_register");
197 
198 	mc->mc_func = func;
199 	LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
200 }
201 
202 /*
203  * Generate an error packet of type error
204  * in response to bad packet ip.
205  */
206 void
207 icmp_error(n, type, code, dest, destifp)
208 	struct mbuf *n;
209 	int type, code;
210 	n_long dest;
211 	struct ifnet *destifp;
212 {
213 	struct ip *oip = mtod(n, struct ip *), *nip;
214 	unsigned oiplen = oip->ip_hl << 2;
215 	struct icmp *icp;
216 	struct mbuf *m;
217 	unsigned icmplen, mblen;
218 
219 #ifdef ICMPPRINTFS
220 	if (icmpprintfs)
221 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
222 #endif
223 	if (type != ICMP_REDIRECT)
224 		icmpstat.icps_error++;
225 	/*
226 	 * Don't send error if the original packet was encrypted.
227 	 * Don't send error if not the first fragment of message.
228 	 * Don't error if the old packet protocol was ICMP
229 	 * error message, only known informational types.
230 	 */
231 	if (n->m_flags & M_DECRYPTED)
232 		goto freeit;
233 	if (oip->ip_off &~ (IP_MF|IP_DF))
234 		goto freeit;
235 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
236 	  n->m_len >= oiplen + ICMP_MINLEN &&
237 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
238 		icmpstat.icps_oldicmp++;
239 		goto freeit;
240 	}
241 	/* Don't send error in response to a multicast or broadcast packet */
242 	if (n->m_flags & (M_BCAST|M_MCAST))
243 		goto freeit;
244 
245 	/*
246 	 * First, do a rate limitation check.
247 	 */
248 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
249 		/* XXX stat */
250 		goto freeit;
251 	}
252 
253 	/*
254 	 * Now, formulate icmp message
255 	 */
256 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
257 	/*
258 	 * Defend against mbuf chains shorter than oip->ip_len:
259 	 */
260 	mblen = 0;
261 	for (m = n; m && (mblen < icmplen); m = m->m_next)
262 		mblen += m->m_len;
263 	icmplen = min(mblen, icmplen);
264 
265 	/*
266 	 * As we are not required to return everything we have,
267 	 * we return whatever we can return at ease.
268 	 *
269 	 * Note that ICMP datagrams longer than 576 octets are out of spec
270 	 * according to RFC1812; the limit on icmpreturndatabytes below in
271 	 * icmp_sysctl will keep things below that limit.
272 	 */
273 
274 	KASSERT(ICMP_MINLEN <= MCLBYTES);
275 
276 	if (icmplen + ICMP_MINLEN > MCLBYTES)
277 		icmplen = MCLBYTES - ICMP_MINLEN;
278 
279 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
280 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
281 		MCLGET(m, M_DONTWAIT);
282 		if ((m->m_flags & M_EXT) == 0) {
283 			m_freem(m);
284 			m = NULL;
285 		}
286 	}
287 	if (m == NULL)
288 		goto freeit;
289 	m->m_len = icmplen + ICMP_MINLEN;
290 	if ((m->m_flags & M_EXT) == 0)
291 		MH_ALIGN(m, m->m_len);
292 	icp = mtod(m, struct icmp *);
293 	if ((u_int)type > ICMP_MAXTYPE)
294 		panic("icmp_error");
295 	icmpstat.icps_outhist[type]++;
296 	icp->icmp_type = type;
297 	if (type == ICMP_REDIRECT)
298 		icp->icmp_gwaddr.s_addr = dest;
299 	else {
300 		icp->icmp_void = 0;
301 		/*
302 		 * The following assignments assume an overlay with the
303 		 * zeroed icmp_void field.
304 		 */
305 		if (type == ICMP_PARAMPROB) {
306 			icp->icmp_pptr = code;
307 			code = 0;
308 		} else if (type == ICMP_UNREACH &&
309 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
310 			icp->icmp_nextmtu = htons(destifp->if_mtu);
311 	}
312 
313 	HTONS(oip->ip_off);
314 	HTONS(oip->ip_len);
315 	icp->icmp_code = code;
316 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
317 	nip = &icp->icmp_ip;
318 
319 	/*
320 	 * Now, copy old ip header (without options)
321 	 * in front of icmp message.
322 	 */
323 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
324 		panic("icmp len");
325 	m->m_data -= sizeof(struct ip);
326 	m->m_len += sizeof(struct ip);
327 	m->m_pkthdr.len = m->m_len;
328 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
329 	nip = mtod(m, struct ip *);
330 	/* ip_v set in ip_output */
331 	nip->ip_hl = sizeof(struct ip) >> 2;
332 	nip->ip_tos = 0;
333 	nip->ip_len = m->m_len;
334 	/* ip_id set in ip_output */
335 	nip->ip_off = 0;
336 	/* ip_ttl set in icmp_reflect */
337 	nip->ip_p = IPPROTO_ICMP;
338 	nip->ip_src = oip->ip_src;
339 	nip->ip_dst = oip->ip_dst;
340 	icmp_reflect(m);
341 
342 freeit:
343 	m_freem(n);
344 }
345 
346 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
347 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
348 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
349 struct sockaddr_in icmpmask = { 8, 0 };
350 
351 /*
352  * Process a received ICMP message.
353  */
354 void
355 #if __STDC__
356 icmp_input(struct mbuf *m, ...)
357 #else
358 icmp_input(m, va_alist)
359 	struct mbuf *m;
360 	va_dcl
361 #endif
362 {
363 	int proto;
364 	struct icmp *icp;
365 	struct ip *ip = mtod(m, struct ip *);
366 	int icmplen;
367 	int i;
368 	struct in_ifaddr *ia;
369 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
370 	int code;
371 	int hlen;
372 	va_list ap;
373 
374 	va_start(ap, m);
375 	hlen = va_arg(ap, int);
376 	proto = va_arg(ap, int);
377 	va_end(ap);
378 
379 	/*
380 	 * Locate icmp structure in mbuf, and check
381 	 * that not corrupted and of at least minimum length.
382 	 */
383 	icmplen = ip->ip_len - hlen;
384 #ifdef ICMPPRINTFS
385 	if (icmpprintfs)
386 		printf("icmp_input from %x to %x, len %d\n",
387 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
388 		    icmplen);
389 #endif
390 	if (icmplen < ICMP_MINLEN) {
391 		icmpstat.icps_tooshort++;
392 		goto freeit;
393 	}
394 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
395 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
396 		icmpstat.icps_tooshort++;
397 		return;
398 	}
399 	ip = mtod(m, struct ip *);
400 	m->m_len -= hlen;
401 	m->m_data += hlen;
402 	icp = mtod(m, struct icmp *);
403 	if (in_cksum(m, icmplen)) {
404 		icmpstat.icps_checksum++;
405 		goto freeit;
406 	}
407 	m->m_len += hlen;
408 	m->m_data -= hlen;
409 
410 #ifdef ICMPPRINTFS
411 	/*
412 	 * Message type specific processing.
413 	 */
414 	if (icmpprintfs)
415 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
416 		    icp->icmp_code);
417 #endif
418 	if (icp->icmp_type > ICMP_MAXTYPE)
419 		goto raw;
420 	icmpstat.icps_inhist[icp->icmp_type]++;
421 	code = icp->icmp_code;
422 	switch (icp->icmp_type) {
423 
424 	case ICMP_UNREACH:
425 		switch (code) {
426 			case ICMP_UNREACH_NET:
427 			case ICMP_UNREACH_HOST:
428 			case ICMP_UNREACH_PROTOCOL:
429 			case ICMP_UNREACH_PORT:
430 			case ICMP_UNREACH_SRCFAIL:
431 				code += PRC_UNREACH_NET;
432 				break;
433 
434 			case ICMP_UNREACH_NEEDFRAG:
435 				code = PRC_MSGSIZE;
436 				break;
437 
438 			case ICMP_UNREACH_NET_UNKNOWN:
439 			case ICMP_UNREACH_NET_PROHIB:
440 			case ICMP_UNREACH_TOSNET:
441 				code = PRC_UNREACH_NET;
442 				break;
443 
444 			case ICMP_UNREACH_HOST_UNKNOWN:
445 			case ICMP_UNREACH_ISOLATED:
446 			case ICMP_UNREACH_HOST_PROHIB:
447 			case ICMP_UNREACH_TOSHOST:
448 				code = PRC_UNREACH_HOST;
449 				break;
450 
451 			default:
452 				goto badcode;
453 		}
454 		goto deliver;
455 
456 	case ICMP_TIMXCEED:
457 		if (code > 1)
458 			goto badcode;
459 		code += PRC_TIMXCEED_INTRANS;
460 		goto deliver;
461 
462 	case ICMP_PARAMPROB:
463 		if (code > 1)
464 			goto badcode;
465 		code = PRC_PARAMPROB;
466 		goto deliver;
467 
468 	case ICMP_SOURCEQUENCH:
469 		if (code)
470 			goto badcode;
471 		code = PRC_QUENCH;
472 		goto deliver;
473 
474 	deliver:
475 		/*
476 		 * Problem with datagram; advise higher level routines.
477 		 */
478 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
479 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
480 			icmpstat.icps_badlen++;
481 			goto freeit;
482 		}
483 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
484 			goto badcode;
485 		NTOHS(icp->icmp_ip.ip_len);
486 #ifdef ICMPPRINTFS
487 		if (icmpprintfs)
488 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
489 #endif
490 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
491 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
492 		if (ctlfunc)
493 			(void) (*ctlfunc)(code, sintosa(&icmpsrc),
494 			    &icp->icmp_ip);
495 		break;
496 
497 	badcode:
498 		icmpstat.icps_badcode++;
499 		break;
500 
501 	case ICMP_ECHO:
502 		icp->icmp_type = ICMP_ECHOREPLY;
503 		goto reflect;
504 
505 	case ICMP_TSTAMP:
506 		if (icmplen < ICMP_TSLEN) {
507 			icmpstat.icps_badlen++;
508 			break;
509 		}
510 		icp->icmp_type = ICMP_TSTAMPREPLY;
511 		icp->icmp_rtime = iptime();
512 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
513 		goto reflect;
514 
515 	case ICMP_MASKREQ:
516 		if (icmpmaskrepl == 0)
517 			break;
518 		/*
519 		 * We are not able to respond with all ones broadcast
520 		 * unless we receive it over a point-to-point interface.
521 		 */
522 		if (icmplen < ICMP_MASKLEN) {
523 			icmpstat.icps_badlen++;
524 			break;
525 		}
526 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
527 		    in_nullhost(ip->ip_dst))
528 			icmpdst.sin_addr = ip->ip_src;
529 		else
530 			icmpdst.sin_addr = ip->ip_dst;
531 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
532 		    m->m_pkthdr.rcvif));
533 		if (ia == 0)
534 			break;
535 		icp->icmp_type = ICMP_MASKREPLY;
536 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
537 		if (in_nullhost(ip->ip_src)) {
538 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
539 				ip->ip_src = ia->ia_broadaddr.sin_addr;
540 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
541 				ip->ip_src = ia->ia_dstaddr.sin_addr;
542 		}
543 reflect:
544 		icmpstat.icps_reflect++;
545 		icmpstat.icps_outhist[icp->icmp_type]++;
546 		icmp_reflect(m);
547 		return;
548 
549 	case ICMP_REDIRECT:
550 		if (code > 3)
551 			goto badcode;
552 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
553 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
554 			icmpstat.icps_badlen++;
555 			break;
556 		}
557 		/*
558 		 * Short circuit routing redirects to force
559 		 * immediate change in the kernel's routing
560 		 * tables.  The message is also handed to anyone
561 		 * listening on a raw socket (e.g. the routing
562 		 * daemon for use in updating its tables).
563 		 */
564 		icmpgw.sin_addr = ip->ip_src;
565 		icmpdst.sin_addr = icp->icmp_gwaddr;
566 #ifdef	ICMPPRINTFS
567 		if (icmpprintfs)
568 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
569 			    icp->icmp_gwaddr);
570 #endif
571 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
572 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
573 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
574 		    sintosa(&icmpgw), (struct rtentry **)0);
575 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
576 #ifdef IPSEC
577 		key_sa_routechange((struct sockaddr *)&icmpsrc);
578 #endif
579 		break;
580 
581 	/*
582 	 * No kernel processing for the following;
583 	 * just fall through to send to raw listener.
584 	 */
585 	case ICMP_ECHOREPLY:
586 	case ICMP_ROUTERADVERT:
587 	case ICMP_ROUTERSOLICIT:
588 	case ICMP_TSTAMPREPLY:
589 	case ICMP_IREQREPLY:
590 	case ICMP_MASKREPLY:
591 	default:
592 		break;
593 	}
594 
595 raw:
596 	rip_input(m, hlen, proto);
597 	return;
598 
599 freeit:
600 	m_freem(m);
601 	return;
602 }
603 
604 /*
605  * Reflect the ip packet back to the source
606  */
607 void
608 icmp_reflect(m)
609 	struct mbuf *m;
610 {
611 	struct ip *ip = mtod(m, struct ip *);
612 	struct in_ifaddr *ia;
613 	struct ifaddr *ifa;
614 	struct sockaddr_in *sin = 0;
615 	struct in_addr t;
616 	struct mbuf *opts = 0;
617 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
618 
619 	if (!in_canforward(ip->ip_src) &&
620 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
621 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
622 		m_freem(m);	/* Bad return address */
623 		goto done;	/* ip_output() will check for broadcast */
624 	}
625 	t = ip->ip_dst;
626 	ip->ip_dst = ip->ip_src;
627 	/*
628 	 * If the incoming packet was addressed directly to us, use
629 	 * dst as the src for the reply.  Otherwise (broadcast or
630 	 * anonymous), use an address which corresponds to the
631 	 * incoming interface, with a preference for the address which
632 	 * corresponds to the route to the destination of the ICMP.
633 	 */
634 
635 	/* Look for packet addressed to us */
636 	INADDR_TO_IA(t, ia);
637 
638 	/* look for packet sent to broadcast address */
639 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
640 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
641 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
642 			if (ifa->ifa_addr->sa_family != AF_INET)
643 				continue;
644 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
645 				ia = ifatoia(ifa);
646 				break;
647 			}
648 		}
649 	}
650 
651 	if (ia)
652 		sin = &ia->ia_addr;
653 
654 	icmpdst.sin_addr = t;
655 
656 	/* if the packet is addressed somewhere else, compute the
657 	   source address for packets routed back to the source, and
658 	   use that, if it's an address on the interface which
659 	   received the packet */
660 	if (sin == (struct sockaddr_in *)0) {
661 		struct sockaddr_in sin_dst;
662 		struct route icmproute;
663 		int errornum;
664 
665 		sin_dst.sin_family = AF_INET;
666 		sin_dst.sin_len = sizeof(struct sockaddr_in);
667 		sin_dst.sin_addr = ip->ip_dst;
668 		bzero(&icmproute, sizeof(icmproute));
669 		errornum = 0;
670 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
671 		/* errornum is never used */
672 		if (icmproute.ro_rt)
673 			RTFREE(icmproute.ro_rt);
674 		/* check to make sure sin is a source address on rcvif */
675 		if (sin) {
676 			t = sin->sin_addr;
677 			sin = (struct sockaddr_in *)0;
678 			INADDR_TO_IA(t, ia);
679 			while (ia) {
680 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
681 					sin = &ia->ia_addr;
682 					break;
683 				}
684 				NEXT_IA_WITH_SAME_ADDR(ia);
685 			}
686 		}
687 	}
688 
689 	/* if it was not addressed to us, but the route doesn't go out
690 	   the source interface, pick an address on the source
691 	   interface.  This can happen when routing is asymmetric, or
692 	   when the incoming packet was encapsulated */
693 	if (sin == (struct sockaddr_in *)0) {
694 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
695 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
696 			if (ifa->ifa_addr->sa_family != AF_INET)
697 				continue;
698 			sin = &(ifatoia(ifa)->ia_addr);
699 			break;
700 		}
701 	}
702 
703 	/*
704 	 * The following happens if the packet was not addressed to us,
705 	 * and was received on an interface with no IP address:
706 	 * We find the first AF_INET address on the first non-loopback
707 	 * interface.
708 	 */
709 	if (sin == (struct sockaddr_in *)0)
710 		for (ia = in_ifaddr.tqh_first; ia != NULL;
711 		    ia = ia->ia_list.tqe_next) {
712 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
713 				continue;
714 			sin = &ia->ia_addr;
715 			break;
716 		}
717 
718 	/*
719 	 * If we still didn't find an address, punt.  We could have an
720 	 * interface up (and receiving packets) with no address.
721 	 */
722 	if (sin == (struct sockaddr_in *)0) {
723 		m_freem(m);
724 		goto done;
725 	}
726 
727 	ip->ip_src = sin->sin_addr;
728 	ip->ip_ttl = MAXTTL;
729 
730 	if (optlen > 0) {
731 		u_char *cp;
732 		int opt, cnt;
733 		u_int len;
734 
735 		/*
736 		 * Retrieve any source routing from the incoming packet;
737 		 * add on any record-route or timestamp options.
738 		 */
739 		cp = (u_char *) (ip + 1);
740 		if ((opts = ip_srcroute()) == 0 &&
741 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
742 			opts->m_len = sizeof(struct in_addr);
743 			*mtod(opts, struct in_addr *) = zeroin_addr;
744 		}
745 		if (opts) {
746 #ifdef ICMPPRINTFS
747 		    if (icmpprintfs)
748 			    printf("icmp_reflect optlen %d rt %d => ",
749 				optlen, opts->m_len);
750 #endif
751 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
752 			    opt = cp[IPOPT_OPTVAL];
753 			    if (opt == IPOPT_EOL)
754 				    break;
755 			    if (opt == IPOPT_NOP)
756 				    len = 1;
757 			    else {
758 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
759 					    break;
760 				    len = cp[IPOPT_OLEN];
761 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
762 				        len > cnt)
763 					    break;
764 			    }
765 			    /*
766 			     * Should check for overflow, but it "can't happen"
767 			     */
768 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
769 				opt == IPOPT_SECURITY) {
770 				    bcopy((caddr_t)cp,
771 					mtod(opts, caddr_t) + opts->m_len, len);
772 				    opts->m_len += len;
773 			    }
774 		    }
775 		    /* Terminate & pad, if necessary */
776 		    if ((cnt = opts->m_len % 4) != 0) {
777 			    for (; cnt < 4; cnt++) {
778 				    *(mtod(opts, caddr_t) + opts->m_len) =
779 					IPOPT_EOL;
780 				    opts->m_len++;
781 			    }
782 		    }
783 #ifdef ICMPPRINTFS
784 		    if (icmpprintfs)
785 			    printf("%d\n", opts->m_len);
786 #endif
787 		}
788 		/*
789 		 * Now strip out original options by copying rest of first
790 		 * mbuf's data back, and adjust the IP length.
791 		 */
792 		ip->ip_len -= optlen;
793 		ip->ip_hl = sizeof(struct ip) >> 2;
794 		m->m_len -= optlen;
795 		if (m->m_flags & M_PKTHDR)
796 			m->m_pkthdr.len -= optlen;
797 		optlen += sizeof(struct ip);
798 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
799 			 (unsigned)(m->m_len - sizeof(struct ip)));
800 	}
801 	m->m_flags &= ~(M_BCAST|M_MCAST);
802 	icmp_send(m, opts);
803 done:
804 	if (opts)
805 		(void)m_free(opts);
806 }
807 
808 /*
809  * Send an icmp packet back to the ip level,
810  * after supplying a checksum.
811  */
812 void
813 icmp_send(m, opts)
814 	struct mbuf *m;
815 	struct mbuf *opts;
816 {
817 	struct ip *ip = mtod(m, struct ip *);
818 	int hlen;
819 	struct icmp *icp;
820 
821 	hlen = ip->ip_hl << 2;
822 	m->m_data += hlen;
823 	m->m_len -= hlen;
824 	icp = mtod(m, struct icmp *);
825 	icp->icmp_cksum = 0;
826 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
827 	m->m_data -= hlen;
828 	m->m_len += hlen;
829 #ifdef ICMPPRINTFS
830 	if (icmpprintfs)
831 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
832 #endif
833 #ifdef IPSEC
834 	/* Don't lookup socket */
835 	(void)ipsec_setsocket(m, NULL);
836 #endif
837 	(void) ip_output(m, opts, NULL, 0, NULL);
838 }
839 
840 n_time
841 iptime()
842 {
843 	struct timeval atv;
844 	u_long t;
845 
846 	microtime(&atv);
847 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
848 	return (htonl(t));
849 }
850 
851 int
852 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
853 	int *name;
854 	u_int namelen;
855 	void *oldp;
856 	size_t *oldlenp;
857 	void *newp;
858 	size_t newlen;
859 {
860 	int arg, error;
861 
862 	/* All sysctl names at this level are terminal. */
863 	if (namelen != 1)
864 		return (ENOTDIR);
865 
866 	switch (name[0])
867 	{
868 	case ICMPCTL_MASKREPL:
869 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
870 		break;
871 	case ICMPCTL_RETURNDATABYTES:
872 		arg = icmpreturndatabytes;
873 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
874 		if (error)
875 			break;
876 		if ((arg >= 8) || (arg <= 512))
877 			icmpreturndatabytes = arg;
878 		else
879 			error = EINVAL;
880 		break;
881 	case ICMPCTL_ERRPPSLIMIT:
882 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
883 		break;
884 	default:
885 		error = ENOPROTOOPT;
886 		break;
887 	}
888 	return error;
889 }
890 
891 void
892 icmp_mtudisc(icp, faddr)
893 	struct icmp *icp;
894 	struct in_addr faddr;
895 {
896 	struct icmp_mtudisc_callback *mc;
897 	struct sockaddr *dst = sintosa(&icmpsrc);
898 	struct rtentry *rt;
899 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
900 	int    error;
901 
902 	/* Table of common MTUs: */
903 
904 	static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
905 				     4352, 2002, 1492, 1006, 508, 296, 68, 0};
906 
907 	rt = rtalloc1(dst, 1);
908 	if (rt == 0)
909 		return;
910 
911 	/* If we didn't get a host route, allocate one */
912 
913 	if ((rt->rt_flags & RTF_HOST) == 0) {
914 		struct rtentry *nrt;
915 
916 		error = rtrequest((int) RTM_ADD, dst,
917 		    (struct sockaddr *) rt->rt_gateway,
918 		    (struct sockaddr *) 0,
919 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
920 		if (error) {
921 			rtfree(rt);
922 			return;
923 		}
924 		nrt->rt_rmx = rt->rt_rmx;
925 		rtfree(rt);
926 		rt = nrt;
927 	}
928 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
929 	if (error) {
930 		rtfree(rt);
931 		return;
932 	}
933 
934 	if (mtu == 0) {
935 		int i = 0;
936 
937 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
938 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
939 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
940 			mtu -= (icp->icmp_ip.ip_hl << 2);
941 
942 		/* If we still can't guess a value, try the route */
943 
944 		if (mtu == 0) {
945 			mtu = rt->rt_rmx.rmx_mtu;
946 
947 			/* If no route mtu, default to the interface mtu */
948 
949 			if (mtu == 0)
950 				mtu = rt->rt_ifp->if_mtu;
951 		}
952 
953 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
954 			if (mtu > mtu_table[i]) {
955 				mtu = mtu_table[i];
956 				break;
957 			}
958 	}
959 
960 	/*
961 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
962 	 *	  to turn off PMTU for a route, and the kernel can
963 	 *	  set it to indicate a serious problem with PMTU
964 	 *	  on a route.  We should be using a separate flag
965 	 *	  for the kernel to indicate this.
966 	 */
967 
968 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
969 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
970 			rt->rt_rmx.rmx_locks |= RTV_MTU;
971 		else if (rt->rt_rmx.rmx_mtu > mtu ||
972 			 rt->rt_rmx.rmx_mtu == 0) {
973 			icmpstat.icps_pmtuchg++;
974 			rt->rt_rmx.rmx_mtu = mtu;
975 		}
976 	}
977 
978 	if (rt)
979 		rtfree(rt);
980 
981 	/*
982 	 * Notify protocols that the MTU for this destination
983 	 * has changed.
984 	 */
985 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
986 	     mc = LIST_NEXT(mc, mc_list))
987 		(*mc->mc_func)(faddr);
988 }
989 
990 /*
991  * Return the next larger or smaller MTU plateau (table from RFC 1191)
992  * given current value MTU.  If DIR is less than zero, a larger plateau
993  * is returned; otherwise, a smaller value is returned.
994  */
995 int
996 ip_next_mtu(mtu, dir)	/* XXX */
997 	int mtu;
998 	int dir;
999 {
1000 	static int mtutab[] = {
1001 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
1002 		68, 0
1003 	};
1004 	int i;
1005 
1006 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
1007 		if (mtu >= mtutab[i])
1008 			break;
1009 	}
1010 
1011 	if (dir < 0) {
1012 		if (i == 0) {
1013 			return 0;
1014 		} else {
1015 			return mtutab[i - 1];
1016 		}
1017 	} else {
1018 		if (mtutab[i] == 0) {
1019 			return 0;
1020 		} else if(mtu > mtutab[i]) {
1021 			return mtutab[i];
1022 		} else {
1023 			return mtutab[i + 1];
1024 		}
1025 	}
1026 }
1027 
1028 static void
1029 icmp_mtudisc_timeout(rt, r)
1030 	struct rtentry *rt;
1031 	struct rttimer *r;
1032 {
1033 	if (rt == NULL)
1034 		panic("icmp_mtudisc_timeout:  bad route to timeout");
1035 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1036 	    (RTF_DYNAMIC | RTF_HOST)) {
1037 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1038 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1039 	} else {
1040 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1041 			rt->rt_rmx.rmx_mtu = 0;
1042 		}
1043 	}
1044 }
1045 
1046 /*
1047  * Perform rate limit check.
1048  * Returns 0 if it is okay to send the icmp packet.
1049  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1050  * limitation.
1051  *
1052  * XXX per-destination/type check necessary?
1053  */
1054 static int
1055 icmp_ratelimit(dst, type, code)
1056 	const struct in_addr *dst;
1057 	const int type;			/* not used at this moment */
1058 	const int code;			/* not used at this moment */
1059 {
1060 
1061 	/* PPS limit */
1062 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1063 	    icmperrppslim)) {
1064 		/* The packet is subject to rate limit */
1065 		return 1;
1066 	}
1067 
1068 	/*okay to send*/
1069 	return 0;
1070 }
1071