xref: /openbsd-src/sys/netinet/ip_icmp.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: ip_icmp.c,v 1.42 2001/07/04 16:52:03 dhartmei Exp $	*/
2 /*	$NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  * 	This product includes software developed by the University of
50  * 	California, Berkeley and its contributors.
51  * 	This product includes software developed at the Information
52  * 	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/protosw.h>
79 #include <sys/socket.h>
80 #include <sys/sysctl.h>
81 
82 #include <net/if.h>
83 #include <net/route.h>
84 
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_icmp.h>
90 #include <netinet/ip_var.h>
91 #include <netinet/icmp_var.h>
92 
93 /*
94  * ICMP routines: error generation, receive packet processing, and
95  * routines to turnaround packets back to the originator, and
96  * host table maintenance routines.
97  */
98 
99 int	icmpmaskrepl = 0;
100 int	icmpbmcastecho = 0;
101 #ifdef ICMPPRINTFS
102 int	icmpprintfs = 0;
103 #endif
104 int	icmperrppslim = 100;
105 int	icmperrpps_count = 0;
106 struct timeval icmperrppslim_last;
107 
108 void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
109 int icmp_ratelimit __P((const struct in_addr *, const int, const int));
110 
111 extern	struct protosw inetsw[];
112 
113 /*
114  * Generate an error packet of type error
115  * in response to bad packet ip.
116  *
117  * The ip packet inside has ip_off and ip_len in host byte order.
118  */
119 void
120 icmp_error(n, type, code, dest, destifp)
121 	struct mbuf *n;
122 	int type, code;
123 	n_long dest;
124 	struct ifnet *destifp;
125 {
126 	register struct ip *oip = mtod(n, struct ip *), *nip;
127 	register unsigned oiplen = oip->ip_hl << 2;
128 	register struct icmp *icp;
129 	struct mbuf *m;
130 	struct m_tag *mtag;
131 	unsigned icmplen, mblen;
132 
133 #ifdef ICMPPRINTFS
134 	if (icmpprintfs)
135 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
136 #endif
137 	if (type != ICMP_REDIRECT)
138 		icmpstat.icps_error++;
139 	/*
140 	 * Don't send error if not the first fragment of message.
141 	 * Don't error if the old packet protocol was ICMP
142 	 * error message, only known informational types.
143 	 */
144 	if (oip->ip_off & IP_OFFMASK)
145 		goto freeit;
146 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
147 	  n->m_len >= oiplen + ICMP_MINLEN &&
148 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
149 		icmpstat.icps_oldicmp++;
150 		goto freeit;
151 	}
152 	/* Don't send error in response to a multicast or broadcast packet */
153 	if (n->m_flags & (M_BCAST|M_MCAST))
154 		goto freeit;
155 
156 	/*
157 	 * First, do a rate limitation check.
158  	 */
159 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
160 		/* XXX stat */
161 		goto freeit;
162 	}
163 
164 	/*
165 	 * Now, formulate icmp message
166 	 */
167 	icmplen = oiplen + min(8, oip->ip_len);
168  	/*
169 	 * Defend against mbuf chains shorter than oip->ip_len:
170 	 */
171 	mblen = 0;
172 	for (m = n; m && (mblen < icmplen); m = m->m_next)
173 		mblen += m->m_len;
174 	icmplen = min(mblen, icmplen);
175 
176 	/*
177 	 * As we are not required to return everything we have,
178 	 * we return whatever we can return at ease.
179 	 *
180 	 * Note that ICMP datagrams longer than 576 octets are out of spec
181 	 * according to RFC1812;
182 	 */
183 
184 	KASSERT(ICMP_MINLEN <= MCLBYTES);
185 
186 	if (icmplen + ICMP_MINLEN > MCLBYTES)
187 		icmplen = MCLBYTES - ICMP_MINLEN - sizeof (struct ip);
188 
189 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
190 	if (m && (sizeof (struct ip) + icmplen + ICMP_MINLEN > MHLEN)) {
191 		MCLGET(m, M_DONTWAIT);
192 		if ((m->m_flags & M_EXT) == 0) {
193 			m_freem(m);
194 			m = NULL;
195 		}
196 	}
197 	if (m == NULL)
198 		goto freeit;
199 	m->m_len = icmplen + ICMP_MINLEN;
200 	if ((m->m_flags & M_EXT) == 0)
201 		MH_ALIGN(m, m->m_len);
202 	icp = mtod(m, struct icmp *);
203 	if ((u_int)type > ICMP_MAXTYPE)
204 		panic("icmp_error");
205 	icmpstat.icps_outhist[type]++;
206 	icp->icmp_type = type;
207 	if (type == ICMP_REDIRECT)
208 		icp->icmp_gwaddr.s_addr = dest;
209 	else {
210 		icp->icmp_void = 0;
211 		/*
212 		 * The following assignments assume an overlay with the
213 		 * zeroed icmp_void field.
214 		 */
215 		if (type == ICMP_PARAMPROB) {
216 			icp->icmp_pptr = code;
217 			code = 0;
218 		} else if (type == ICMP_UNREACH &&
219 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
220 			icp->icmp_nextmtu = htons(destifp->if_mtu);
221 	}
222 
223 	HTONS(oip->ip_off);
224 	HTONS(oip->ip_len);
225 	icp->icmp_code = code;
226 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
227 	nip = &icp->icmp_ip;
228 
229 	/*
230 	 * Now, copy old ip header (without options)
231 	 * in front of icmp message.
232 	 */
233 	if ((m->m_flags & M_EXT) == 0 &&
234 	    m->m_data - sizeof(struct ip) < m->m_pktdat)
235 		panic("icmp len");
236 	m->m_data -= sizeof(struct ip);
237 	m->m_len += sizeof(struct ip);
238 	m->m_pkthdr.len = m->m_len;
239 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
240 	nip = mtod(m, struct ip *);
241 	/* ip_v set in ip_output */
242 	nip->ip_hl = sizeof(struct ip) >> 2;
243 	nip->ip_tos = 0;
244 	nip->ip_len = m->m_len;
245 	/* ip_id set in ip_output */
246 	nip->ip_off = 0;
247 	/* ip_ttl set in icmp_reflect */
248 	nip->ip_p = IPPROTO_ICMP;
249 	nip->ip_src = oip->ip_src;
250 	nip->ip_dst = oip->ip_dst;
251 	/* move PF_GENERATED m_tag to new packet, if it exists */
252 	mtag = m_tag_find(n, PACKET_TAG_PF_GENERATED, NULL);
253 	if (mtag != NULL) {
254 		m_tag_unlink(n, mtag);
255 		m_tag_prepend(m, mtag);
256 	}
257 	icmp_reflect(m);
258 
259 freeit:
260 	m_freem(n);
261 }
262 
263 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
264 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
265 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
266 struct sockaddr_in icmpmask = { 8, 0 };
267 
268 /*
269  * Process a received ICMP message.
270  */
271 void
272 #if __STDC__
273 icmp_input(struct mbuf *m, ...)
274 #else
275 icmp_input(m, va_alist)
276 	struct mbuf *m;
277 	va_dcl
278 #endif
279 {
280 	register struct icmp *icp;
281 	register struct ip *ip = mtod(m, struct ip *);
282 	int icmplen = ip->ip_len;
283 	register int i;
284 	struct in_ifaddr *ia;
285 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
286 	int code;
287 	extern u_char ip_protox[];
288 	int hlen;
289 	va_list ap;
290 
291 	va_start(ap, m);
292 	hlen = va_arg(ap, int);
293 	va_end(ap);
294 
295 	/*
296 	 * Locate icmp structure in mbuf, and check
297 	 * that not corrupted and of at least minimum length.
298 	 */
299 #ifdef ICMPPRINTFS
300 	if (icmpprintfs) {
301 		char buf[4*sizeof "123"];
302 
303 		strcpy(buf, inet_ntoa(ip->ip_dst));
304 		printf("icmp_input from %s to %s, len %d\n",
305 			inet_ntoa(ip->ip_src), buf, icmplen);
306 	}
307 #endif
308 	if (icmplen < ICMP_MINLEN) {
309 		icmpstat.icps_tooshort++;
310 		goto freeit;
311 	}
312 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
313 	if (m->m_len < i && (m = m_pullup(m, i)) == NULL)  {
314 		icmpstat.icps_tooshort++;
315 		return;
316 	}
317 	ip = mtod(m, struct ip *);
318 	m->m_len -= hlen;
319 	m->m_data += hlen;
320 	icp = mtod(m, struct icmp *);
321 	if (in_cksum(m, icmplen)) {
322 		icmpstat.icps_checksum++;
323 		goto freeit;
324 	}
325 	m->m_len += hlen;
326 	m->m_data -= hlen;
327 
328 #ifdef ICMPPRINTFS
329 	/*
330 	 * Message type specific processing.
331 	 */
332 	if (icmpprintfs)
333 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
334 		    icp->icmp_code);
335 #endif
336 	if (icp->icmp_type > ICMP_MAXTYPE)
337 		goto raw;
338 	icmpstat.icps_inhist[icp->icmp_type]++;
339 	code = icp->icmp_code;
340 	switch (icp->icmp_type) {
341 
342 	case ICMP_UNREACH:
343 		switch (code) {
344 		case ICMP_UNREACH_NET:
345 		case ICMP_UNREACH_HOST:
346 		case ICMP_UNREACH_PROTOCOL:
347 		case ICMP_UNREACH_PORT:
348 		case ICMP_UNREACH_SRCFAIL:
349 			code += PRC_UNREACH_NET;
350 			break;
351 
352 		case ICMP_UNREACH_NEEDFRAG:
353 #if 0 /*NRL INET6*/
354 			if (icp->icmp_nextmtu) {
355 				extern int ipv6_trans_mtu
356 				    __P((struct mbuf **, int, int));
357 				struct mbuf *m0 = m;
358 
359 				/*
360 				 * Do cool v4-related path MTU, for now,
361 				 * only v6-in-v4 can handle it.
362 				 */
363 				if (icmplen >= ICMP_V6ADVLENMIN &&
364 				    icmplen >= ICMP_V6ADVLEN(icp) &&
365 				    icp->icmp_ip.ip_p == IPPROTO_IPV6) {
366 					/*
367 					 * ipv6_trans_mtu returns 1 if
368 					 * the mbuf is still intact.
369 					 */
370 					if (ipv6_trans_mtu(&m0,icp->icmp_nextmtu,
371 					    hlen + ICMP_V6ADVLEN(icp))) {
372 						m = m0;
373 						goto raw;
374 					} else
375 						return;
376 				}
377 			}
378 #endif /* INET6 */
379 			code = PRC_MSGSIZE;
380 			break;
381 
382 		case ICMP_UNREACH_NET_UNKNOWN:
383 		case ICMP_UNREACH_NET_PROHIB:
384 		case ICMP_UNREACH_TOSNET:
385 			code = PRC_UNREACH_NET;
386 			break;
387 
388 		case ICMP_UNREACH_HOST_UNKNOWN:
389 		case ICMP_UNREACH_ISOLATED:
390 		case ICMP_UNREACH_HOST_PROHIB:
391 		case ICMP_UNREACH_TOSHOST:
392 		case ICMP_UNREACH_FILTER_PROHIB:
393 		case ICMP_UNREACH_HOST_PRECEDENCE:
394 		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
395 			code = PRC_UNREACH_HOST;
396 			break;
397 
398 		default:
399 			goto badcode;
400 		}
401 		goto deliver;
402 
403 	case ICMP_TIMXCEED:
404 		if (code > 1)
405 			goto badcode;
406 		code += PRC_TIMXCEED_INTRANS;
407 		goto deliver;
408 
409 	case ICMP_PARAMPROB:
410 		if (code > 1)
411 			goto badcode;
412 		code = PRC_PARAMPROB;
413 		goto deliver;
414 
415 	case ICMP_SOURCEQUENCH:
416 		if (code)
417 			goto badcode;
418 		code = PRC_QUENCH;
419 	deliver:
420 		/* Free packet atttributes */
421 		if (m->m_flags & M_PKTHDR)
422 			m_tag_delete_chain(m, NULL);
423 
424 		/*
425 		 * Problem with datagram; advise higher level routines.
426 		 */
427 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
428 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
429 			icmpstat.icps_badlen++;
430 			goto freeit;
431 		}
432 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
433 			goto badcode;
434 		NTOHS(icp->icmp_ip.ip_len);
435 #ifdef INET6
436 		/* Get more contiguous data for a v6 in v4 ICMP message. */
437 		if (icp->icmp_ip.ip_p == IPPROTO_IPV6) {
438 			if (icmplen < ICMP_V6ADVLENMIN ||
439 			    icmplen < ICMP_V6ADVLEN(icp)) {
440 				icmpstat.icps_badlen++;
441 				goto freeit;
442 			} else {
443 				if ((m = m_pullup(m, (ip->ip_hl << 2) +
444 				    ICMP_V6ADVLEN(icp))) == NULL) {
445 					icmpstat.icps_tooshort++;
446 					return;
447 				}
448 				ip = mtod(m, struct ip *);
449 				icp = (struct icmp *)(m->m_data + (ip->ip_hl << 2));
450 			}
451 		}
452 #endif /* INET6 */
453 #ifdef ICMPPRINTFS
454 		if (icmpprintfs)
455 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
456 #endif
457 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
458 		/*
459 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
460 		 * notification to TCP layer.
461 		 */
462 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
463 		if (ctlfunc)
464 			(*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
465 		break;
466 
467 	badcode:
468 		icmpstat.icps_badcode++;
469 		break;
470 
471 	case ICMP_ECHO:
472 		if (!icmpbmcastecho &&
473 		    (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
474 			icmpstat.icps_bmcastecho++;
475 			break;
476 		}
477 		icp->icmp_type = ICMP_ECHOREPLY;
478 		goto reflect;
479 
480 	case ICMP_TSTAMP:
481 		if (!icmpbmcastecho &&
482 		    (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
483 			icmpstat.icps_bmcastecho++;
484 			break;
485 		}
486 		if (icmplen < ICMP_TSLEN) {
487 			icmpstat.icps_badlen++;
488 			break;
489 		}
490 		icp->icmp_type = ICMP_TSTAMPREPLY;
491 		icp->icmp_rtime = iptime();
492 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
493 		goto reflect;
494 
495 	case ICMP_MASKREQ:
496 		if (icmpmaskrepl == 0)
497 			break;
498 		/*
499 		 * We are not able to respond with all ones broadcast
500 		 * unless we receive it over a point-to-point interface.
501 		 */
502 		if (icmplen < ICMP_MASKLEN) {
503 			icmpstat.icps_badlen++;
504 			break;
505 		}
506 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
507 		    ip->ip_dst.s_addr == INADDR_ANY)
508 			icmpdst.sin_addr = ip->ip_src;
509 		else
510 			icmpdst.sin_addr = ip->ip_dst;
511 		if (m->m_pkthdr.rcvif != NULL)
512 			ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
513 			    m->m_pkthdr.rcvif));
514 		if (ia == 0)
515 			break;
516 		icp->icmp_type = ICMP_MASKREPLY;
517 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
518 		if (ip->ip_src.s_addr == 0) {
519 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
520 				ip->ip_src = ia->ia_broadaddr.sin_addr;
521 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
522 				ip->ip_src = ia->ia_dstaddr.sin_addr;
523 		}
524 reflect:
525 		/* Free packet atttributes */
526 		if (m->m_flags & M_PKTHDR)
527 			m_tag_delete_chain(m, NULL);
528 
529 		ip->ip_len += hlen;	/* since ip_input deducts this */
530 		icmpstat.icps_reflect++;
531 		icmpstat.icps_outhist[icp->icmp_type]++;
532 		icmp_reflect(m);
533 		return;
534 
535 	case ICMP_REDIRECT:
536 		/* Free packet atttributes */
537 		if (m->m_flags & M_PKTHDR)
538 			m_tag_delete_chain(m, NULL);
539 		if (code > 3)
540 			goto badcode;
541 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
542 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
543 			icmpstat.icps_badlen++;
544 			break;
545 		}
546 		/*
547 		 * Short circuit routing redirects to force
548 		 * immediate change in the kernel's routing
549 		 * tables.  The message is also handed to anyone
550 		 * listening on a raw socket (e.g. the routing
551 		 * daemon for use in updating its tables).
552 		 */
553 		icmpgw.sin_addr = ip->ip_src;
554 		icmpdst.sin_addr = icp->icmp_gwaddr;
555 #ifdef	ICMPPRINTFS
556 		if (icmpprintfs) {
557 			char buf[4 * sizeof "123"];
558 			strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
559 
560 			printf("redirect dst %s to %s\n",
561 			    buf, inet_ntoa(icp->icmp_gwaddr));
562 		}
563 #endif
564 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
565 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
566 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
567 		    sintosa(&icmpgw), (struct rtentry **)0);
568 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
569 		break;
570 
571 	/*
572 	 * No kernel processing for the following;
573 	 * just fall through to send to raw listener.
574 	 */
575 	case ICMP_ECHOREPLY:
576 	case ICMP_ROUTERADVERT:
577 	case ICMP_ROUTERSOLICIT:
578 	case ICMP_TSTAMPREPLY:
579 	case ICMP_IREQREPLY:
580 	case ICMP_MASKREPLY:
581 	case ICMP_TRACEROUTE:
582 	case ICMP_DATACONVERR:
583 	case ICMP_MOBILE_REDIRECT:
584 	case ICMP_IPV6_WHEREAREYOU:
585 	case ICMP_IPV6_IAMHERE:
586 	case ICMP_MOBILE_REGREQUEST:
587 	case ICMP_MOBILE_REGREPLY:
588 	case ICMP_PHOTURIS:
589 	default:
590 		break;
591 	}
592 
593 raw:
594 	rip_input(m);
595 	return;
596 
597 freeit:
598 	m_freem(m);
599 }
600 
601 /*
602  * Reflect the ip packet back to the source
603  */
604 void
605 icmp_reflect(m)
606 	struct mbuf *m;
607 {
608 	register struct ip *ip = mtod(m, struct ip *);
609 	register struct in_ifaddr *ia;
610 	struct in_addr t;
611 	struct mbuf *opts = 0;
612 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
613 
614 	if (!in_canforward(ip->ip_src) &&
615 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
616 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
617 		m_freem(m);	/* Bad return address */
618 		goto done;	/* ip_output() will check for broadcast */
619 	}
620 	t = ip->ip_dst;
621 	ip->ip_dst = ip->ip_src;
622 	/*
623 	 * If the incoming packet was addressed directly to us,
624 	 * use dst as the src for the reply.  Otherwise (broadcast
625 	 * or anonymous), use the address which corresponds
626 	 * to the incoming interface.
627 	 */
628 	for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) {
629 		if (t.s_addr == ia->ia_addr.sin_addr.s_addr)
630 			break;
631 		if ((ia->ia_ifp->if_flags & IFF_BROADCAST) &&
632 		    t.s_addr == ia->ia_broadaddr.sin_addr.s_addr)
633 			break;
634 	}
635 	icmpdst.sin_addr = t;
636 	if ((ia == (struct in_ifaddr *)0) && (m->m_pkthdr.rcvif != NULL))
637 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
638 					      m->m_pkthdr.rcvif));
639 	/*
640 	 * The following happens if the packet was not addressed to us,
641 	 * and was received on an interface with no IP address.
642 	 */
643 	if (ia == (struct in_ifaddr *)0) {
644 	        struct sockaddr_in *dst;
645 		struct route ro;
646 
647 		bzero((caddr_t) &ro, sizeof(ro));
648 		dst = satosin(&ro.ro_dst);
649 		dst->sin_family = AF_INET;
650 		dst->sin_len = sizeof(*dst);
651 		dst->sin_addr = t;
652 
653 		rtalloc(&ro);
654 		if (ro.ro_rt == 0)
655 		{
656 		    ipstat.ips_noroute++;
657 		    goto done;
658 		}
659 
660 		ia = ifatoia(ro.ro_rt->rt_ifa);
661 		ro.ro_rt->rt_use++;
662                 RTFREE(ro.ro_rt);
663         }
664 
665 	t = ia->ia_addr.sin_addr;
666 	ip->ip_src = t;
667 	ip->ip_ttl = MAXTTL;
668 
669 	if (optlen > 0) {
670 		register u_char *cp;
671 		int opt, cnt;
672 		u_int len;
673 
674 		/*
675 		 * Retrieve any source routing from the incoming packet;
676 		 * add on any record-route or timestamp options.
677 		 */
678 		cp = (u_char *) (ip + 1);
679 		if ((opts = ip_srcroute()) == 0 &&
680 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
681 			opts->m_len = sizeof(struct in_addr);
682 			mtod(opts, struct in_addr *)->s_addr = 0;
683 		}
684 		if (opts) {
685 #ifdef ICMPPRINTFS
686 		    if (icmpprintfs)
687 			    printf("icmp_reflect optlen %d rt %d => ",
688 				optlen, opts->m_len);
689 #endif
690 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
691 			    opt = cp[IPOPT_OPTVAL];
692 			    if (opt == IPOPT_EOL)
693 				    break;
694 			    if (opt == IPOPT_NOP)
695 				    len = 1;
696 			    else {
697 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
698 					    break;
699 				    len = cp[IPOPT_OLEN];
700 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
701 				        len > cnt)
702 					    break;
703 			    }
704 			    /*
705 			     * Should check for overflow, but it "can't happen"
706 			     */
707 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
708 				opt == IPOPT_SECURITY) {
709 				    bcopy((caddr_t)cp,
710 					mtod(opts, caddr_t) + opts->m_len, len);
711 				    opts->m_len += len;
712 			    }
713 		    }
714 		    /* Terminate & pad, if necessary */
715 		    if ((cnt = opts->m_len % 4) != 0) {
716 			    for (; cnt < 4; cnt++) {
717 				    *(mtod(opts, caddr_t) + opts->m_len) =
718 					IPOPT_EOL;
719 				    opts->m_len++;
720 			    }
721 		    }
722 #ifdef ICMPPRINTFS
723 		    if (icmpprintfs)
724 			    printf("%d\n", opts->m_len);
725 #endif
726 		}
727 		/*
728 		 * Now strip out original options by copying rest of first
729 		 * mbuf's data back, and adjust the IP length.
730 		 */
731 		ip->ip_len -= optlen;
732 		ip->ip_hl = sizeof(struct ip) >> 2;
733 		m->m_len -= optlen;
734 		if (m->m_flags & M_PKTHDR)
735 			m->m_pkthdr.len -= optlen;
736 		optlen += sizeof(struct ip);
737 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
738 			 (unsigned)(m->m_len - sizeof(struct ip)));
739 	}
740 	m->m_flags &= ~(M_BCAST|M_MCAST);
741 	icmp_send(m, opts);
742 done:
743 	if (opts)
744 		(void)m_free(opts);
745 }
746 
747 /*
748  * Send an icmp packet back to the ip level,
749  * after supplying a checksum.
750  */
751 void
752 icmp_send(m, opts)
753 	register struct mbuf *m;
754 	struct mbuf *opts;
755 {
756 	register struct ip *ip = mtod(m, struct ip *);
757 	register int hlen;
758 	register struct icmp *icp;
759 
760 	hlen = ip->ip_hl << 2;
761 	m->m_data += hlen;
762 	m->m_len -= hlen;
763 	icp = mtod(m, struct icmp *);
764 	icp->icmp_cksum = 0;
765 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
766 	m->m_data -= hlen;
767 	m->m_len += hlen;
768 #ifdef ICMPPRINTFS
769 	if (icmpprintfs) {
770 		char buf[4 * sizeof "123"];
771 
772 		strcpy(buf, inet_ntoa(ip->ip_dst));
773 		printf("icmp_send dst %s src %s\n",
774 		    buf, inet_ntoa(ip->ip_src));
775 	}
776 #endif
777 	(void) ip_output(m, opts, NULL, 0, NULL, NULL);
778 }
779 
780 n_time
781 iptime()
782 {
783 	struct timeval atv;
784 	u_long t;
785 
786 	microtime(&atv);
787 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
788 	return (htonl(t));
789 }
790 
791 int
792 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
793 	int *name;
794 	u_int namelen;
795 	void *oldp;
796 	size_t *oldlenp;
797 	void *newp;
798 	size_t newlen;
799 {
800 
801 	/* All sysctl names at this level are terminal. */
802 	if (namelen != 1)
803 		return (ENOTDIR);
804 
805 	switch (name[0]) {
806 	case ICMPCTL_MASKREPL:
807 		return (sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl));
808 	case ICMPCTL_BMCASTECHO:
809 		return (sysctl_int(oldp, oldlenp, newp, newlen, &icmpbmcastecho));
810 	case ICMPCTL_ERRPPSLIMIT:
811 		return (sysctl_int(oldp, oldlenp, newp, newlen,
812 		    &icmperrppslim));
813 		break;
814 	default:
815 		return (ENOPROTOOPT);
816 	}
817 	/* NOTREACHED */
818 }
819 
820 struct rtentry *
821 icmp_mtudisc_clone(struct sockaddr *dst)
822 {
823 	struct rtentry *rt;
824 	int error;
825 
826 	rt = rtalloc1(dst, 1);
827 	if (rt == 0)
828 		return (NULL);
829 
830 	/* If we didn't get a host route, allocate one */
831 
832 	if ((rt->rt_flags & RTF_HOST) == 0) {
833 		struct rtentry *nrt;
834 
835 		error = rtrequest((int) RTM_ADD, dst,
836 		    (struct sockaddr *) rt->rt_gateway,
837 		    (struct sockaddr *) 0,
838 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
839 		if (error) {
840 			rtfree(rt);
841 			return (NULL);
842 		}
843 		nrt->rt_rmx = rt->rt_rmx;
844 		rtfree(rt);
845 		rt = nrt;
846 	}
847 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
848 	if (error) {
849 		rtfree(rt);
850 		return (NULL);
851 	}
852 
853 	return (rt);
854 }
855 
856 void
857 icmp_mtudisc(icp)
858 	struct icmp *icp;
859 {
860 	struct rtentry *rt;
861 	struct sockaddr *dst = sintosa(&icmpsrc);
862 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
863 
864 	/* Table of common MTUs: */
865 
866 	static u_short mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
867 				      4352, 2002, 1492, 1006, 508, 296, 68, 0};
868 
869 	rt = icmp_mtudisc_clone(dst);
870 	if (rt == 0)
871 		return;
872 
873 	if (mtu == 0) {
874 		int i = 0;
875 
876 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
877 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
878 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
879 			mtu -= (icp->icmp_ip.ip_hl << 2);
880 
881 		/* If we still can't guess a value, try the route */
882 
883 		if (mtu == 0) {
884 			mtu = rt->rt_rmx.rmx_mtu;
885 
886 			/* If no route mtu, default to the interface mtu */
887 
888 			if (mtu == 0)
889 				mtu = rt->rt_ifp->if_mtu;
890 		}
891 
892 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
893 			if (mtu > mtu_table[i]) {
894 				mtu = mtu_table[i];
895 				break;
896 			}
897 	}
898 
899 	/*
900 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
901 	 *	  to turn off PMTU for a route, and the kernel can
902 	 *	  set it to indicate a serious problem with PMTU
903 	 *	  on a route.  We should be using a separate flag
904 	 *	  for the kernel to indicate this.
905 	 */
906 
907 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
908 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
909 			rt->rt_rmx.rmx_locks |= RTV_MTU;
910 		else if (rt->rt_rmx.rmx_mtu > mtu ||
911 			 rt->rt_rmx.rmx_mtu == 0)
912 			rt->rt_rmx.rmx_mtu = mtu;
913 	}
914 
915 	rtfree(rt);
916 }
917 
918 void
919 icmp_mtudisc_timeout(rt, r)
920 	struct rtentry *rt;
921 	struct rttimer *r;
922 {
923 	if (rt == NULL)
924 		panic("icmp_mtudisc_timeout:  bad route to timeout");
925 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
926 	    (RTF_DYNAMIC | RTF_HOST)) {
927 		void *(*ctlfunc) __P((int, struct sockaddr *, void *));
928 		extern u_char ip_protox[];
929 		struct sockaddr_in sa;
930 
931 		sa = *(struct sockaddr_in *)rt_key(rt);
932 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
933 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
934 
935 		/* Notify TCP layer of increased Path MTU estimate */
936 		ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput;
937 		if (ctlfunc)
938 			(*ctlfunc)(PRC_MTUINC,(struct sockaddr *)&sa, NULL);
939 	} else {
940 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
941 			rt->rt_rmx.rmx_mtu = 0;
942 		}
943 	}
944 }
945 
946 /*
947  * Perform rate limit check.
948  * Returns 0 if it is okay to send the icmp packet.
949  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
950  * limitation.
951  *
952  * XXX per-destination/type check necessary?
953  */
954 int
955 icmp_ratelimit(dst, type, code)
956 	const struct in_addr *dst;
957 	const int type;			/* not used at this moment */
958 	const int code;			/* not used at this moment */
959 {
960 
961 	/* PPS limit */
962 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
963 	    icmperrppslim)) {
964 		/* The packet is subject to rate limit */
965 		return 1;
966 	}
967 
968 	/*okay to send*/
969 	return 0;
970 }
971