xref: /openbsd-src/sys/netinet/ip_icmp.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: ip_icmp.c,v 1.82 2008/09/10 09:10:55 henning Exp $	*/
2 /*	$NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  *	This product includes software developed at the Information
48  *	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include "carp.h"
72 #include "pf.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/sysctl.h>
80 
81 #include <net/if.h>
82 #include <net/route.h>
83 
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip.h>
88 #include <netinet/ip_icmp.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/icmp_var.h>
91 
92 #if NCARP > 0
93 #include <net/if_types.h>
94 #include <netinet/ip_carp.h>
95 #endif
96 
97 #if NPF > 0
98 #include <net/pfvar.h>
99 #endif
100 
101 /*
102  * ICMP routines: error generation, receive packet processing, and
103  * routines to turnaround packets back to the originator, and
104  * host table maintenance routines.
105  */
106 
107 int	icmpmaskrepl = 0;
108 int	icmpbmcastecho = 0;
109 int	icmptstamprepl = 1;
110 #ifdef ICMPPRINTFS
111 int	icmpprintfs = 0;
112 #endif
113 int	icmperrppslim = 100;
114 int	icmperrpps_count = 0;
115 struct timeval icmperrppslim_last;
116 int	icmp_rediraccept = 1;
117 int	icmp_redirtimeout = 10 * 60;
118 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
119 struct	icmpstat icmpstat;
120 
121 int *icmpctl_vars[ICMPCTL_MAXID] = ICMPCTL_VARS;
122 
123 void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *);
124 int icmp_ratelimit(const struct in_addr *, const int, const int);
125 void icmp_redirect_timeout(struct rtentry *, struct rttimer *);
126 
127 extern	struct protosw inetsw[];
128 
129 void
130 icmp_init(void)
131 {
132 	/*
133 	 * This is only useful if the user initializes redirtimeout to
134 	 * something other than zero.
135 	 */
136 	if (icmp_redirtimeout != 0) {
137 		icmp_redirect_timeout_q =
138 		    rt_timer_queue_create(icmp_redirtimeout);
139 	}
140 }
141 
142 struct mbuf *
143 icmp_do_error(struct mbuf *n, int type, int code, n_long dest, int destmtu)
144 {
145 	struct ip *oip = mtod(n, struct ip *), *nip;
146 	unsigned oiplen = oip->ip_hl << 2;
147 	struct icmp *icp;
148 	struct mbuf *m;
149 	unsigned icmplen, mblen;
150 
151 #ifdef ICMPPRINTFS
152 	if (icmpprintfs)
153 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
154 #endif
155 	if (type != ICMP_REDIRECT)
156 		icmpstat.icps_error++;
157 	/*
158 	 * Don't send error if not the first fragment of message.
159 	 * Don't error if the old packet protocol was ICMP
160 	 * error message, only known informational types.
161 	 */
162 	if (oip->ip_off & htons(IP_OFFMASK))
163 		goto freeit;
164 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
165 	    n->m_len >= oiplen + ICMP_MINLEN &&
166 	    !ICMP_INFOTYPE(((struct icmp *)
167 	    ((caddr_t)oip + oiplen))->icmp_type)) {
168 		icmpstat.icps_oldicmp++;
169 		goto freeit;
170 	}
171 	/* Don't send error in response to a multicast or broadcast packet */
172 	if (n->m_flags & (M_BCAST|M_MCAST))
173 		goto freeit;
174 
175 	/*
176 	 * First, do a rate limitation check.
177 	 */
178 	if (icmp_ratelimit(&oip->ip_src, type, code))
179 		goto freeit;	/* XXX stat */
180 
181 	/*
182 	 * Now, formulate icmp message
183 	 */
184 	icmplen = oiplen + min(8, ntohs(oip->ip_len));
185 	/*
186 	 * Defend against mbuf chains shorter than oip->ip_len:
187 	 */
188 	mblen = 0;
189 	for (m = n; m && (mblen < icmplen); m = m->m_next)
190 		mblen += m->m_len;
191 	icmplen = min(mblen, icmplen);
192 
193 	/*
194 	 * As we are not required to return everything we have,
195 	 * we return whatever we can return at ease.
196 	 *
197 	 * Note that ICMP datagrams longer than 576 octets are out of spec
198 	 * according to RFC1812;
199 	 */
200 
201 	KASSERT(ICMP_MINLEN <= MCLBYTES);
202 
203 	if (icmplen + ICMP_MINLEN > MCLBYTES)
204 		icmplen = MCLBYTES - ICMP_MINLEN - sizeof (struct ip);
205 
206 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
207 	if (m && (sizeof (struct ip) + icmplen + ICMP_MINLEN > MHLEN)) {
208 		MCLGET(m, M_DONTWAIT);
209 		if ((m->m_flags & M_EXT) == 0) {
210 			m_freem(m);
211 			m = NULL;
212 		}
213 	}
214 	if (m == NULL)
215 		goto freeit;
216 	m->m_len = icmplen + ICMP_MINLEN;
217 	if ((m->m_flags & M_EXT) == 0)
218 		MH_ALIGN(m, m->m_len);
219 	icp = mtod(m, struct icmp *);
220 	if ((u_int)type > ICMP_MAXTYPE)
221 		panic("icmp_error");
222 	icmpstat.icps_outhist[type]++;
223 	icp->icmp_type = type;
224 	if (type == ICMP_REDIRECT)
225 		icp->icmp_gwaddr.s_addr = dest;
226 	else {
227 		icp->icmp_void = 0;
228 		/*
229 		 * The following assignments assume an overlay with the
230 		 * zeroed icmp_void field.
231 		 */
232 		if (type == ICMP_PARAMPROB) {
233 			icp->icmp_pptr = code;
234 			code = 0;
235 		} else if (type == ICMP_UNREACH &&
236 		    code == ICMP_UNREACH_NEEDFRAG && destmtu)
237 			icp->icmp_nextmtu = htons(destmtu);
238 	}
239 
240 	icp->icmp_code = code;
241 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
242 
243 	/*
244 	 * Now, copy old ip header (without options)
245 	 * in front of icmp message.
246 	 */
247 	if ((m->m_flags & M_EXT) == 0 &&
248 	    m->m_data - sizeof(struct ip) < m->m_pktdat)
249 		panic("icmp len");
250 	m->m_data -= sizeof(struct ip);
251 	m->m_len += sizeof(struct ip);
252 	m->m_pkthdr.len = m->m_len;
253 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
254 	nip = mtod(m, struct ip *);
255 	/* ip_v set in ip_output */
256 	nip->ip_hl = sizeof(struct ip) >> 2;
257 	nip->ip_tos = 0;
258 	nip->ip_len = htons(m->m_len);
259 	/* ip_id set in ip_output */
260 	nip->ip_off = 0;
261 	/* ip_ttl set in icmp_reflect */
262 	nip->ip_p = IPPROTO_ICMP;
263 	nip->ip_src = oip->ip_src;
264 	nip->ip_dst = oip->ip_dst;
265 
266 	/* move PF_GENERATED to new packet, if existent XXX preserve more? */
267 	if (n->m_pkthdr.pf.flags & PF_TAG_GENERATED)
268 		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
269 
270 	m_freem(n);
271 	return (m);
272 
273 freeit:
274 	m_freem(n);
275 	return (NULL);
276 }
277 
278 /*
279  * Generate an error packet of type error
280  * in response to bad packet ip.
281  *
282  * The ip packet inside has ip_off and ip_len in host byte order.
283  */
284 void
285 icmp_error(struct mbuf *n, int type, int code, n_long dest, int destmtu)
286 {
287 	struct mbuf *m;
288 
289 	m = icmp_do_error(n, type, code, dest, destmtu);
290 	if (m != NULL)
291 		icmp_reflect(m);
292 }
293 
294 struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
295 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
296 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
297 
298 /*
299  * Process a received ICMP message.
300  */
301 void
302 icmp_input(struct mbuf *m, ...)
303 {
304 	struct icmp *icp;
305 	struct ip *ip = mtod(m, struct ip *);
306 	int icmplen;
307 	int i;
308 	struct in_ifaddr *ia;
309 	void *(*ctlfunc)(int, struct sockaddr *, void *);
310 	int code;
311 	extern u_char ip_protox[];
312 	int hlen;
313 	va_list ap;
314 	struct rtentry *rt;
315 
316 	va_start(ap, m);
317 	hlen = va_arg(ap, int);
318 	va_end(ap);
319 
320 	/*
321 	 * Locate icmp structure in mbuf, and check
322 	 * that not corrupted and of at least minimum length.
323 	 */
324 	icmplen = ntohs(ip->ip_len) - hlen;
325 #ifdef ICMPPRINTFS
326 	if (icmpprintfs) {
327 		char buf[4 * sizeof("123")];
328 
329 		strlcpy(buf, inet_ntoa(ip->ip_dst), sizeof buf);
330 		printf("icmp_input from %s to %s, len %d\n",
331 		    inet_ntoa(ip->ip_src), buf, icmplen);
332 	}
333 #endif
334 	if (icmplen < ICMP_MINLEN) {
335 		icmpstat.icps_tooshort++;
336 		goto freeit;
337 	}
338 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
339 	if (m->m_len < i && (m = m_pullup(m, i)) == NULL) {
340 		icmpstat.icps_tooshort++;
341 		return;
342 	}
343 	ip = mtod(m, struct ip *);
344 	m->m_len -= hlen;
345 	m->m_data += hlen;
346 	icp = mtod(m, struct icmp *);
347 	if (in_cksum(m, icmplen)) {
348 		icmpstat.icps_checksum++;
349 		goto freeit;
350 	}
351 	m->m_len += hlen;
352 	m->m_data -= hlen;
353 
354 #ifdef ICMPPRINTFS
355 	/*
356 	 * Message type specific processing.
357 	 */
358 	if (icmpprintfs)
359 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
360 		    icp->icmp_code);
361 #endif
362 	if (icp->icmp_type > ICMP_MAXTYPE)
363 		goto raw;
364 	icmpstat.icps_inhist[icp->icmp_type]++;
365 	code = icp->icmp_code;
366 	switch (icp->icmp_type) {
367 
368 	case ICMP_UNREACH:
369 		switch (code) {
370 		case ICMP_UNREACH_NET:
371 		case ICMP_UNREACH_HOST:
372 		case ICMP_UNREACH_PROTOCOL:
373 		case ICMP_UNREACH_PORT:
374 		case ICMP_UNREACH_SRCFAIL:
375 			code += PRC_UNREACH_NET;
376 			break;
377 
378 		case ICMP_UNREACH_NEEDFRAG:
379 			code = PRC_MSGSIZE;
380 			break;
381 
382 		case ICMP_UNREACH_NET_UNKNOWN:
383 		case ICMP_UNREACH_NET_PROHIB:
384 		case ICMP_UNREACH_TOSNET:
385 			code = PRC_UNREACH_NET;
386 			break;
387 
388 		case ICMP_UNREACH_HOST_UNKNOWN:
389 		case ICMP_UNREACH_ISOLATED:
390 		case ICMP_UNREACH_HOST_PROHIB:
391 		case ICMP_UNREACH_TOSHOST:
392 		case ICMP_UNREACH_FILTER_PROHIB:
393 		case ICMP_UNREACH_HOST_PRECEDENCE:
394 		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
395 			code = PRC_UNREACH_HOST;
396 			break;
397 
398 		default:
399 			goto badcode;
400 		}
401 		goto deliver;
402 
403 	case ICMP_TIMXCEED:
404 		if (code > 1)
405 			goto badcode;
406 		code += PRC_TIMXCEED_INTRANS;
407 		goto deliver;
408 
409 	case ICMP_PARAMPROB:
410 		if (code > 1)
411 			goto badcode;
412 		code = PRC_PARAMPROB;
413 		goto deliver;
414 
415 	case ICMP_SOURCEQUENCH:
416 		if (code)
417 			goto badcode;
418 		code = PRC_QUENCH;
419 	deliver:
420 		/* Free packet atttributes */
421 		if (m->m_flags & M_PKTHDR)
422 			m_tag_delete_chain(m);
423 
424 		/*
425 		 * Problem with datagram; advise higher level routines.
426 		 */
427 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
428 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
429 			icmpstat.icps_badlen++;
430 			goto freeit;
431 		}
432 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
433 			goto badcode;
434 #ifdef INET6
435 		/* Get more contiguous data for a v6 in v4 ICMP message. */
436 		if (icp->icmp_ip.ip_p == IPPROTO_IPV6) {
437 			if (icmplen < ICMP_V6ADVLENMIN ||
438 			    icmplen < ICMP_V6ADVLEN(icp)) {
439 				icmpstat.icps_badlen++;
440 				goto freeit;
441 			} else {
442 				if ((m = m_pullup(m, (ip->ip_hl << 2) +
443 				    ICMP_V6ADVLEN(icp))) == NULL) {
444 					icmpstat.icps_tooshort++;
445 					return;
446 				}
447 				ip = mtod(m, struct ip *);
448 				icp = (struct icmp *)
449 				    (m->m_data + (ip->ip_hl << 2));
450 			}
451 		}
452 #endif /* INET6 */
453 #ifdef ICMPPRINTFS
454 		if (icmpprintfs)
455 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
456 #endif
457 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
458 #if NCARP > 0
459 		if (m->m_pkthdr.rcvif->if_type == IFT_CARP &&
460 		    carp_lsdrop(m, AF_INET, &icmpsrc.sin_addr.s_addr,
461 		    &ip->ip_dst.s_addr))
462 			goto freeit;
463 #endif
464 		/*
465 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
466 		 * notification to TCP layer.
467 		 */
468 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
469 		if (ctlfunc)
470 			(*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
471 		break;
472 
473 	badcode:
474 		icmpstat.icps_badcode++;
475 		break;
476 
477 	case ICMP_ECHO:
478 		if (!icmpbmcastecho &&
479 		    (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
480 			icmpstat.icps_bmcastecho++;
481 			break;
482 		}
483 		icp->icmp_type = ICMP_ECHOREPLY;
484 		goto reflect;
485 
486 	case ICMP_TSTAMP:
487 		if (icmptstamprepl == 0)
488 			break;
489 
490 		if (!icmpbmcastecho &&
491 		    (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
492 			icmpstat.icps_bmcastecho++;
493 			break;
494 		}
495 		if (icmplen < ICMP_TSLEN) {
496 			icmpstat.icps_badlen++;
497 			break;
498 		}
499 		icp->icmp_type = ICMP_TSTAMPREPLY;
500 		icp->icmp_rtime = iptime();
501 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
502 		goto reflect;
503 
504 	case ICMP_MASKREQ:
505 		if (icmpmaskrepl == 0)
506 			break;
507 		/*
508 		 * We are not able to respond with all ones broadcast
509 		 * unless we receive it over a point-to-point interface.
510 		 */
511 		if (icmplen < ICMP_MASKLEN) {
512 			icmpstat.icps_badlen++;
513 			break;
514 		}
515 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
516 		    ip->ip_dst.s_addr == INADDR_ANY)
517 			icmpdst.sin_addr = ip->ip_src;
518 		else
519 			icmpdst.sin_addr = ip->ip_dst;
520 		if (m->m_pkthdr.rcvif == NULL)
521 			break;
522 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
523 		    m->m_pkthdr.rcvif));
524 		if (ia == 0)
525 			break;
526 		icp->icmp_type = ICMP_MASKREPLY;
527 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
528 		if (ip->ip_src.s_addr == 0) {
529 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
530 				ip->ip_src = ia->ia_broadaddr.sin_addr;
531 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
532 				ip->ip_src = ia->ia_dstaddr.sin_addr;
533 		}
534 reflect:
535 #if NCARP > 0
536 		if (m->m_pkthdr.rcvif->if_type == IFT_CARP &&
537 		    carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr,
538 		    &ip->ip_dst.s_addr))
539 			goto freeit;
540 #endif
541 		/* Free packet atttributes */
542 		if (m->m_flags & M_PKTHDR)
543 			m_tag_delete_chain(m);
544 
545 		icmpstat.icps_reflect++;
546 		icmpstat.icps_outhist[icp->icmp_type]++;
547 		icmp_reflect(m);
548 		return;
549 
550 	case ICMP_REDIRECT:
551 		/* Free packet atttributes */
552 		if (m->m_flags & M_PKTHDR)
553 			m_tag_delete_chain(m);
554 		if (icmp_rediraccept == 0)
555 			goto freeit;
556 		if (code > 3)
557 			goto badcode;
558 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
559 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
560 			icmpstat.icps_badlen++;
561 			break;
562 		}
563 		/*
564 		 * Short circuit routing redirects to force
565 		 * immediate change in the kernel's routing
566 		 * tables.  The message is also handed to anyone
567 		 * listening on a raw socket (e.g. the routing
568 		 * daemon for use in updating its tables).
569 		 */
570 		icmpgw.sin_addr = ip->ip_src;
571 		icmpdst.sin_addr = icp->icmp_gwaddr;
572 #ifdef	ICMPPRINTFS
573 		if (icmpprintfs) {
574 			char buf[4 * sizeof("123")];
575 			strlcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst),
576 			    sizeof buf);
577 
578 			printf("redirect dst %s to %s\n",
579 			    buf, inet_ntoa(icp->icmp_gwaddr));
580 		}
581 #endif
582 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
583 #if NCARP > 0
584 		if (m->m_pkthdr.rcvif->if_type == IFT_CARP &&
585 		    carp_lsdrop(m, AF_INET, &icmpsrc.sin_addr.s_addr,
586 		    &ip->ip_dst.s_addr))
587 			goto freeit;
588 #endif
589 		rt = NULL;
590 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
591 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
592 		    sintosa(&icmpgw), (struct rtentry **)&rt);
593 		if (rt != NULL && icmp_redirtimeout != 0) {
594 			(void)rt_timer_add(rt, icmp_redirect_timeout,
595 			    icmp_redirect_timeout_q);
596 		}
597 		if (rt != NULL)
598 			rtfree(rt);
599 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
600 		break;
601 
602 	/*
603 	 * No kernel processing for the following;
604 	 * just fall through to send to raw listener.
605 	 */
606 	case ICMP_ECHOREPLY:
607 	case ICMP_ROUTERADVERT:
608 	case ICMP_ROUTERSOLICIT:
609 	case ICMP_TSTAMPREPLY:
610 	case ICMP_IREQREPLY:
611 	case ICMP_MASKREPLY:
612 	case ICMP_TRACEROUTE:
613 	case ICMP_DATACONVERR:
614 	case ICMP_MOBILE_REDIRECT:
615 	case ICMP_IPV6_WHEREAREYOU:
616 	case ICMP_IPV6_IAMHERE:
617 	case ICMP_MOBILE_REGREQUEST:
618 	case ICMP_MOBILE_REGREPLY:
619 	case ICMP_PHOTURIS:
620 	default:
621 		break;
622 	}
623 
624 raw:
625 	rip_input(m);
626 	return;
627 
628 freeit:
629 	m_freem(m);
630 }
631 
632 /*
633  * Reflect the ip packet back to the source
634  */
635 void
636 icmp_reflect(struct mbuf *m)
637 {
638 	struct ip *ip = mtod(m, struct ip *);
639 	struct in_ifaddr *ia;
640 	struct in_addr t;
641 	struct mbuf *opts = 0;
642 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
643 
644 	if (!in_canforward(ip->ip_src) &&
645 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
646 	    htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
647 		m_freem(m);	/* Bad return address */
648 		goto done;	/* ip_output() will check for broadcast */
649 	}
650 
651 #if NPF > 0
652 	pf_pkt_addr_changed(m);
653 #endif
654 	t = ip->ip_dst;
655 	ip->ip_dst = ip->ip_src;
656 	/*
657 	 * If the incoming packet was addressed directly to us,
658 	 * use dst as the src for the reply.  For broadcast, use
659 	 * the address which corresponds to the incoming interface.
660 	 */
661 	TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
662 		if (t.s_addr == ia->ia_addr.sin_addr.s_addr)
663 			break;
664 		if ((ia->ia_ifp->if_flags & IFF_BROADCAST) &&
665 		    t.s_addr == ia->ia_broadaddr.sin_addr.s_addr)
666 			break;
667 	}
668 	/*
669 	 * The following happens if the packet was not addressed to us.
670 	 * Use the new source address and do a route lookup. If it fails
671 	 * drop the packet as there is no path to the host.
672 	 */
673 	if (ia == (struct in_ifaddr *)0) {
674 		struct sockaddr_in *dst;
675 		struct route ro;
676 
677 		bzero((caddr_t) &ro, sizeof(ro));
678 		dst = satosin(&ro.ro_dst);
679 		dst->sin_family = AF_INET;
680 		dst->sin_len = sizeof(*dst);
681 		dst->sin_addr = ip->ip_src;
682 
683 		rtalloc(&ro);
684 		if (ro.ro_rt == 0) {
685 			ipstat.ips_noroute++;
686 			m_freem(m);
687 			goto done;
688 		}
689 
690 		ia = ifatoia(ro.ro_rt->rt_ifa);
691 		ro.ro_rt->rt_use++;
692 		RTFREE(ro.ro_rt);
693 	}
694 
695 	t = ia->ia_addr.sin_addr;
696 	ip->ip_src = t;
697 	ip->ip_ttl = MAXTTL;
698 
699 	if (optlen > 0) {
700 		u_char *cp;
701 		int opt, cnt;
702 		u_int len;
703 
704 		/*
705 		 * Retrieve any source routing from the incoming packet;
706 		 * add on any record-route or timestamp options.
707 		 */
708 		cp = (u_char *) (ip + 1);
709 		if ((opts = ip_srcroute()) == 0 &&
710 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
711 			opts->m_len = sizeof(struct in_addr);
712 			mtod(opts, struct in_addr *)->s_addr = 0;
713 		}
714 		if (opts) {
715 #ifdef ICMPPRINTFS
716 			if (icmpprintfs)
717 				printf("icmp_reflect optlen %d rt %d => ",
718 				    optlen, opts->m_len);
719 #endif
720 			for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
721 				opt = cp[IPOPT_OPTVAL];
722 				if (opt == IPOPT_EOL)
723 					break;
724 				if (opt == IPOPT_NOP)
725 					len = 1;
726 				else {
727 					if (cnt < IPOPT_OLEN + sizeof(*cp))
728 						break;
729 					len = cp[IPOPT_OLEN];
730 					if (len < IPOPT_OLEN + sizeof(*cp) ||
731 					    len > cnt)
732 						break;
733 				}
734 				/*
735 				 * Should check for overflow, but it
736 				 * "can't happen"
737 				 */
738 				if (opt == IPOPT_RR || opt == IPOPT_TS ||
739 				    opt == IPOPT_SECURITY) {
740 					bcopy((caddr_t)cp,
741 					    mtod(opts, caddr_t) + opts->m_len,
742 					    len);
743 					opts->m_len += len;
744 				}
745 			}
746 			/* Terminate & pad, if necessary */
747 			if ((cnt = opts->m_len % 4) != 0)
748 				for (; cnt < 4; cnt++) {
749 					*(mtod(opts, caddr_t) + opts->m_len) =
750 					    IPOPT_EOL;
751 					opts->m_len++;
752 				}
753 #ifdef ICMPPRINTFS
754 			if (icmpprintfs)
755 				printf("%d\n", opts->m_len);
756 #endif
757 		}
758 		/*
759 		 * Now strip out original options by copying rest of first
760 		 * mbuf's data back, and adjust the IP length.
761 		 */
762 		ip->ip_len = htons(ntohs(ip->ip_len) - optlen);
763 		ip->ip_hl = sizeof(struct ip) >> 2;
764 		m->m_len -= optlen;
765 		if (m->m_flags & M_PKTHDR)
766 			m->m_pkthdr.len -= optlen;
767 		optlen += sizeof(struct ip);
768 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
769 		    (unsigned)(m->m_len - sizeof(struct ip)));
770 	}
771 	m->m_flags &= ~(M_BCAST|M_MCAST);
772 	icmp_send(m, opts);
773 done:
774 	if (opts)
775 		(void)m_free(opts);
776 }
777 
778 /*
779  * Send an icmp packet back to the ip level,
780  * after supplying a checksum.
781  */
782 void
783 icmp_send(struct mbuf *m, struct mbuf *opts)
784 {
785 	struct ip *ip = mtod(m, struct ip *);
786 	int hlen;
787 	struct icmp *icp;
788 
789 	hlen = ip->ip_hl << 2;
790 	m->m_data += hlen;
791 	m->m_len -= hlen;
792 	icp = mtod(m, struct icmp *);
793 	icp->icmp_cksum = 0;
794 	icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
795 	m->m_data -= hlen;
796 	m->m_len += hlen;
797 #ifdef ICMPPRINTFS
798 	if (icmpprintfs) {
799 		char buf[4 * sizeof("123")];
800 
801 		strlcpy(buf, inet_ntoa(ip->ip_dst), sizeof buf);
802 		printf("icmp_send dst %s src %s\n",
803 		    buf, inet_ntoa(ip->ip_src));
804 	}
805 #endif
806 	(void)ip_output(m, opts, (void *)NULL, 0, (void *)NULL, (void *)NULL);
807 }
808 
809 n_time
810 iptime(void)
811 {
812 	struct timeval atv;
813 	u_long t;
814 
815 	microtime(&atv);
816 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
817 	return (htonl(t));
818 }
819 
820 int
821 icmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
822     size_t newlen)
823 {
824 
825 	/* All sysctl names at this level are terminal. */
826 	if (namelen != 1)
827 		return (ENOTDIR);
828 
829 	switch (name[0]) {
830 	case ICMPCTL_REDIRTIMEOUT: {
831 		int error;
832 
833 		error = sysctl_int(oldp, oldlenp, newp, newlen,
834 		    &icmp_redirtimeout);
835 		if (icmp_redirect_timeout_q != NULL) {
836 			if (icmp_redirtimeout == 0) {
837 				rt_timer_queue_destroy(icmp_redirect_timeout_q,
838 				    TRUE);
839 				icmp_redirect_timeout_q = NULL;
840 			} else
841 				rt_timer_queue_change(icmp_redirect_timeout_q,
842 				    icmp_redirtimeout);
843 		} else if (icmp_redirtimeout > 0) {
844 			icmp_redirect_timeout_q =
845 			    rt_timer_queue_create(icmp_redirtimeout);
846 		}
847 		return (error);
848 
849 		break;
850 	}
851 	case ICMPCTL_STATS:
852 		if (newp != NULL)
853 			return (EPERM);
854 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
855 		    &icmpstat, sizeof(icmpstat)));
856 	default:
857 		if (name[0] < ICMPCTL_MAXID)
858 			return (sysctl_int_arr(icmpctl_vars, name, namelen,
859 			    oldp, oldlenp, newp, newlen));
860 		return (ENOPROTOOPT);
861 	}
862 	/* NOTREACHED */
863 }
864 
865 
866 /* XXX only handles table 0 right now */
867 struct rtentry *
868 icmp_mtudisc_clone(struct sockaddr *dst)
869 {
870 	struct rtentry *rt;
871 	int error;
872 
873 	rt = rtalloc1(dst, 1, 0);
874 	if (rt == 0)
875 		return (NULL);
876 
877 	/* If we didn't get a host route, allocate one */
878 
879 	if ((rt->rt_flags & RTF_HOST) == 0) {
880 		struct rtentry *nrt;
881 		struct rt_addrinfo info;
882 
883 		bzero(&info, sizeof(info));
884 		info.rti_info[RTAX_DST] = dst;
885 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
886 		info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
887 
888 		error = rtrequest1(RTM_ADD, &info, rt->rt_priority, &nrt, 0);
889 		if (error) {
890 			rtfree(rt);
891 			return (NULL);
892 		}
893 		nrt->rt_rmx = rt->rt_rmx;
894 		rtfree(rt);
895 		rt = nrt;
896 	}
897 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
898 	if (error) {
899 		rtfree(rt);
900 		return (NULL);
901 	}
902 
903 	return (rt);
904 }
905 
906 void
907 icmp_mtudisc(struct icmp *icp)
908 {
909 	struct rtentry *rt;
910 	struct sockaddr *dst = sintosa(&icmpsrc);
911 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
912 
913 	/* Table of common MTUs: */
914 
915 	static u_short mtu_table[] = {
916 		65535, 65280, 32000, 17914, 9180, 8166,
917 		4352, 2002, 1492, 1006, 508, 296, 68, 0
918 	};
919 
920 	rt = icmp_mtudisc_clone(dst);
921 	if (rt == 0)
922 		return;
923 
924 	if (mtu == 0) {
925 		int i = 0;
926 
927 		mtu = ntohs(icp->icmp_ip.ip_len);
928 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
929 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
930 			mtu -= (icp->icmp_ip.ip_hl << 2);
931 
932 		/* If we still can't guess a value, try the route */
933 
934 		if (mtu == 0) {
935 			mtu = rt->rt_rmx.rmx_mtu;
936 
937 			/* If no route mtu, default to the interface mtu */
938 
939 			if (mtu == 0)
940 				mtu = rt->rt_ifp->if_mtu;
941 		}
942 
943 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
944 			if (mtu > mtu_table[i]) {
945 				mtu = mtu_table[i];
946 				break;
947 			}
948 	}
949 
950 	/*
951 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
952 	 *	  to turn off PMTU for a route, and the kernel can
953 	 *	  set it to indicate a serious problem with PMTU
954 	 *	  on a route.  We should be using a separate flag
955 	 *	  for the kernel to indicate this.
956 	 */
957 
958 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
959 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
960 			rt->rt_rmx.rmx_locks |= RTV_MTU;
961 		else if (rt->rt_rmx.rmx_mtu > mtu ||
962 		    rt->rt_rmx.rmx_mtu == 0)
963 			rt->rt_rmx.rmx_mtu = mtu;
964 	}
965 
966 	rtfree(rt);
967 }
968 
969 /* XXX only handles table 0 right now */
970 void
971 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
972 {
973 	if (rt == NULL)
974 		panic("icmp_mtudisc_timeout:  bad route to timeout");
975 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
976 	    (RTF_DYNAMIC | RTF_HOST)) {
977 		void *(*ctlfunc)(int, struct sockaddr *, void *);
978 		extern u_char ip_protox[];
979 		struct sockaddr_in sa;
980 		struct rt_addrinfo info;
981 
982 		bzero(&info, sizeof(info));
983 		info.rti_info[RTAX_DST] = rt_key(rt);
984 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
985 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
986 		info.rti_flags = rt->rt_flags;
987 
988 		sa = *(struct sockaddr_in *)rt_key(rt);
989 		rtrequest1(RTM_DELETE, &info, rt->rt_priority, NULL, 0);
990 
991 		/* Notify TCP layer of increased Path MTU estimate */
992 		ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput;
993 		if (ctlfunc)
994 			(*ctlfunc)(PRC_MTUINC,(struct sockaddr *)&sa, NULL);
995 	} else
996 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
997 			rt->rt_rmx.rmx_mtu = 0;
998 }
999 
1000 /*
1001  * Perform rate limit check.
1002  * Returns 0 if it is okay to send the icmp packet.
1003  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1004  * limitation.
1005  *
1006  * XXX per-destination/type check necessary?
1007  */
1008 int
1009 icmp_ratelimit(const struct in_addr *dst, const int type, const int code)
1010 {
1011 
1012 	/* PPS limit */
1013 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1014 	    icmperrppslim))
1015 		return 1;
1016 
1017 	/*okay to send*/
1018 	return 0;
1019 }
1020 
1021 /* XXX only handles table 0 right now */
1022 void
1023 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
1024 {
1025 	if (rt == NULL)
1026 		panic("icmp_redirect_timeout:  bad route to timeout");
1027 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1028 	    (RTF_DYNAMIC | RTF_HOST)) {
1029 		struct rt_addrinfo info;
1030 
1031 		bzero(&info, sizeof(info));
1032 		info.rti_info[RTAX_DST] = rt_key(rt);
1033 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1034 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1035 		info.rti_flags = rt->rt_flags;
1036 
1037 		rtrequest1(RTM_DELETE, &info, rt->rt_priority, NULL, 0);
1038 	}
1039 }
1040