xref: /openbsd-src/sys/netinet6/icmp6.c (revision 5a38ef86d0b61900239c7913d24a05e7b88a58f0)
1 /*	$OpenBSD: icmp6.c,v 1.236 2021/07/26 20:44:44 bluhm Exp $	*/
2 /*	$KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
62  */
63 
64 #include "carp.h"
65 #include "pf.h"
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/sysctl.h>
72 #include <sys/protosw.h>
73 #include <sys/socket.h>
74 #include <sys/socketvar.h>
75 #include <sys/time.h>
76 #include <sys/kernel.h>
77 #include <sys/syslog.h>
78 #include <sys/domain.h>
79 
80 #include <net/if.h>
81 #include <net/if_var.h>
82 #include <net/route.h>
83 #include <net/if_dl.h>
84 #include <net/if_types.h>
85 
86 #include <netinet/in.h>
87 #include <netinet/ip.h>
88 #include <netinet6/in6_var.h>
89 #include <netinet/ip6.h>
90 #include <netinet6/ip6_var.h>
91 #include <netinet/icmp6.h>
92 #include <netinet6/mld6_var.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet6/nd6.h>
95 #include <netinet6/ip6protosw.h>
96 
97 #if NCARP > 0
98 #include <netinet/ip_carp.h>
99 #endif
100 
101 #if NPF > 0
102 #include <net/pfvar.h>
103 #endif
104 
105 struct cpumem *icmp6counters;
106 
107 extern int icmp6errppslim;
108 static int icmp6errpps_count = 0;
109 static struct timeval icmp6errppslim_last;
110 
111 /*
112  * List of callbacks to notify when Path MTU changes are made.
113  */
114 struct icmp6_mtudisc_callback {
115 	LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
116 	void (*mc_func)(struct sockaddr_in6 *, u_int);
117 };
118 
119 LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
120     LIST_HEAD_INITIALIZER(icmp6_mtudisc_callbacks);
121 
122 struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
123 
124 /* XXX do these values make any sense? */
125 static int icmp6_mtudisc_hiwat = 1280;
126 static int icmp6_mtudisc_lowat = 256;
127 
128 /*
129  * keep track of # of redirect routes.
130  */
131 static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;
132 
133 /* XXX experimental, turned off */
134 static int icmp6_redirect_lowat = -1;
135 
136 void	icmp6_errcount(int, int);
137 int	icmp6_ratelimit(const struct in6_addr *, const int, const int);
138 const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *,
139 	    struct in6_addr *);
140 int	icmp6_notify_error(struct mbuf *, int, int, int);
141 void	icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *);
142 void	icmp6_redirect_timeout(struct rtentry *, struct rttimer *);
143 
144 void
145 icmp6_init(void)
146 {
147 	mld6_init();
148 	icmp6_mtudisc_timeout_q = rt_timer_queue_create(ip6_mtudisc_timeout);
149 	icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
150 	icmp6counters = counters_alloc(icp6s_ncounters);
151 }
152 
153 void
154 icmp6_errcount(int type, int code)
155 {
156 	enum icmp6stat_counters c = icp6s_ounknown;
157 
158 	switch (type) {
159 	case ICMP6_DST_UNREACH:
160 		switch (code) {
161 		case ICMP6_DST_UNREACH_NOROUTE:
162 			c = icp6s_odst_unreach_noroute;
163 			break;
164 		case ICMP6_DST_UNREACH_ADMIN:
165 			c = icp6s_odst_unreach_admin;
166 			break;
167 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
168 			c = icp6s_odst_unreach_beyondscope;
169 			break;
170 		case ICMP6_DST_UNREACH_ADDR:
171 			c = icp6s_odst_unreach_addr;
172 			break;
173 		case ICMP6_DST_UNREACH_NOPORT:
174 			c = icp6s_odst_unreach_noport;
175 			break;
176 		}
177 		break;
178 	case ICMP6_PACKET_TOO_BIG:
179 		c = icp6s_opacket_too_big;
180 		break;
181 	case ICMP6_TIME_EXCEEDED:
182 		switch (code) {
183 		case ICMP6_TIME_EXCEED_TRANSIT:
184 			c = icp6s_otime_exceed_transit;
185 			break;
186 		case ICMP6_TIME_EXCEED_REASSEMBLY:
187 			c = icp6s_otime_exceed_reassembly;
188 			break;
189 		}
190 		break;
191 	case ICMP6_PARAM_PROB:
192 		switch (code) {
193 		case ICMP6_PARAMPROB_HEADER:
194 			c = icp6s_oparamprob_header;
195 			break;
196 		case ICMP6_PARAMPROB_NEXTHEADER:
197 			c = icp6s_oparamprob_nextheader;
198 			break;
199 		case ICMP6_PARAMPROB_OPTION:
200 			c = icp6s_oparamprob_option;
201 			break;
202 		}
203 		break;
204 	case ND_REDIRECT:
205 		c = icp6s_oredirect;
206 		break;
207 	}
208 
209 	icmp6stat_inc(c);
210 }
211 
212 /*
213  * Register a Path MTU Discovery callback.
214  */
215 void
216 icmp6_mtudisc_callback_register(void (*func)(struct sockaddr_in6 *, u_int))
217 {
218 	struct icmp6_mtudisc_callback *mc;
219 
220 	LIST_FOREACH(mc, &icmp6_mtudisc_callbacks, mc_list) {
221 		if (mc->mc_func == func)
222 			return;
223 	}
224 
225 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
226 	if (mc == NULL)
227 		panic("%s", __func__);
228 
229 	mc->mc_func = func;
230 	LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list);
231 }
232 
233 struct mbuf *
234 icmp6_do_error(struct mbuf *m, int type, int code, int param)
235 {
236 	struct ip6_hdr *oip6, *nip6;
237 	struct icmp6_hdr *icmp6;
238 	u_int preplen;
239 	int off;
240 	int nxt;
241 
242 	icmp6stat_inc(icp6s_error);
243 
244 	/* count per-type-code statistics */
245 	icmp6_errcount(type, code);
246 
247 	if (m->m_len < sizeof(struct ip6_hdr)) {
248 		m = m_pullup(m, sizeof(struct ip6_hdr));
249 		if (m == NULL)
250 			return (NULL);
251 	}
252 	oip6 = mtod(m, struct ip6_hdr *);
253 
254 	/*
255 	 * If the destination address of the erroneous packet is a multicast
256 	 * address, or the packet was sent using link-layer multicast,
257 	 * we should basically suppress sending an error (RFC 2463, Section
258 	 * 2.4).
259 	 * We have two exceptions (the item e.2 in that section):
260 	 * - the Packet Too Big message can be sent for path MTU discovery.
261 	 * - the Parameter Problem Message that can be allowed an icmp6 error
262 	 *   in the option type field.  This check has been done in
263 	 *   ip6_unknown_opt(), so we can just check the type and code.
264 	 */
265 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
266 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
267 	    (type != ICMP6_PACKET_TOO_BIG &&
268 	     (type != ICMP6_PARAM_PROB ||
269 	      code != ICMP6_PARAMPROB_OPTION)))
270 		goto freeit;
271 
272 	/*
273 	 * RFC 2463, 2.4 (e.5): source address check.
274 	 * XXX: the case of anycast source?
275 	 */
276 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
277 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
278 		goto freeit;
279 
280 	/*
281 	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
282 	 * don't do it.
283 	 */
284 	nxt = -1;
285 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
286 	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
287 		struct icmp6_hdr *icp;
288 
289 		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
290 			sizeof(*icp));
291 		if (icp == NULL) {
292 			icmp6stat_inc(icp6s_tooshort);
293 			return (NULL);
294 		}
295 		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
296 		    icp->icmp6_type == ND_REDIRECT) {
297 			/*
298 			 * ICMPv6 error
299 			 * Special case: for redirect (which is
300 			 * informational) we must not send icmp6 error.
301 			 */
302 			icmp6stat_inc(icp6s_canterror);
303 			goto freeit;
304 		} else {
305 			/* ICMPv6 informational - send the error */
306 		}
307 	}
308 	else {
309 		/* non-ICMPv6 - send the error */
310 	}
311 
312 	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
313 
314 	/* Finally, do rate limitation check. */
315 	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
316 		icmp6stat_inc(icp6s_toofreq);
317 		goto freeit;
318 	}
319 
320 	/*
321 	 * OK, ICMP6 can be generated.
322 	 */
323 
324 	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
325 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
326 
327 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
328 	M_PREPEND(m, preplen, M_DONTWAIT);
329 	if (m && m->m_len < preplen)
330 		m = m_pullup(m, preplen);
331 	if (m == NULL) {
332 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
333 		return (NULL);
334 	}
335 
336 	nip6 = mtod(m, struct ip6_hdr *);
337 	nip6->ip6_src  = oip6->ip6_src;
338 	nip6->ip6_dst  = oip6->ip6_dst;
339 
340 	if (IN6_IS_SCOPE_EMBED(&oip6->ip6_src))
341 		oip6->ip6_src.s6_addr16[1] = 0;
342 	if (IN6_IS_SCOPE_EMBED(&oip6->ip6_dst))
343 		oip6->ip6_dst.s6_addr16[1] = 0;
344 
345 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
346 	icmp6->icmp6_type = type;
347 	icmp6->icmp6_code = code;
348 	icmp6->icmp6_pptr = htonl((u_int32_t)param);
349 
350 	/*
351 	 * icmp6_reflect() is designed to be in the input path.
352 	 * icmp6_error() can be called from both input and output path,
353 	 * and if we are in output path rcvif could contain bogus value.
354 	 * clear m->m_pkthdr.ph_ifidx for safety, we should have enough
355 	 * scope information in ip header (nip6).
356 	 */
357 	m->m_pkthdr.ph_ifidx = 0;
358 
359 	icmp6stat_inc(icp6s_outhist + type);
360 
361 	return (m);
362 
363   freeit:
364 	/*
365 	 * If we can't tell whether or not we can generate ICMP6, free it.
366 	 */
367 	return (m_freem(m));
368 }
369 
370 /*
371  * Generate an error packet of type error in response to bad IP6 packet.
372  */
373 void
374 icmp6_error(struct mbuf *m, int type, int code, int param)
375 {
376 	struct mbuf	*n;
377 
378 	n = icmp6_do_error(m, type, code, param);
379 	if (n != NULL) {
380 		/* header order: IPv6 - ICMPv6 */
381 		if (!icmp6_reflect(&n, sizeof(struct ip6_hdr), NULL))
382 			ip6_send(n);
383 	}
384 }
385 
386 /*
387  * Process a received ICMP6 message.
388  */
389 int
390 icmp6_input(struct mbuf **mp, int *offp, int proto, int af)
391 {
392 #if NCARP > 0
393 	struct ifnet *ifp;
394 #endif
395 	struct mbuf *m = *mp, *n;
396 	struct ip6_hdr *ip6, *nip6;
397 	struct icmp6_hdr *icmp6, *nicmp6;
398 	int off = *offp;
399 	int icmp6len = m->m_pkthdr.len - *offp;
400 	int code, sum, noff;
401 	char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
402 
403 	/*
404 	 * Locate icmp6 structure in mbuf, and check
405 	 * that not corrupted and of at least minimum length
406 	 */
407 
408 	ip6 = mtod(m, struct ip6_hdr *);
409 	if (icmp6len < sizeof(struct icmp6_hdr)) {
410 		icmp6stat_inc(icp6s_tooshort);
411 		goto freeit;
412 	}
413 
414 	/*
415 	 * calculate the checksum
416 	 */
417 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
418 	if (icmp6 == NULL) {
419 		icmp6stat_inc(icp6s_tooshort);
420 		return IPPROTO_DONE;
421 	}
422 	code = icmp6->icmp6_code;
423 
424 	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
425 		nd6log((LOG_ERR,
426 		    "ICMP6 checksum error(%d|%x) %s\n",
427 		    icmp6->icmp6_type, sum,
428 		    inet_ntop(AF_INET6, &ip6->ip6_src, src, sizeof(src))));
429 		icmp6stat_inc(icp6s_checksum);
430 		goto freeit;
431 	}
432 
433 #if NPF > 0
434 	if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
435 		switch (icmp6->icmp6_type) {
436 		/*
437 		 * These ICMP6 types map to other connections.  They must be
438 		 * delivered to pr_ctlinput() also for diverted connections.
439 		 */
440 		case ICMP6_DST_UNREACH:
441 		case ICMP6_PACKET_TOO_BIG:
442 		case ICMP6_TIME_EXCEEDED:
443 		case ICMP6_PARAM_PROB:
444 			/*
445 			 * Do not use the divert-to property of the TCP or UDP
446 			 * rule when doing the PCB lookup for the raw socket.
447 			 */
448 			m->m_pkthdr.pf.flags &=~ PF_TAG_DIVERTED;
449 			break;
450 		default:
451 			goto raw;
452 		}
453 	}
454 #endif /* NPF */
455 
456 #if NCARP > 0
457 	ifp = if_get(m->m_pkthdr.ph_ifidx);
458 	if (ifp == NULL)
459 		goto freeit;
460 
461 	if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST &&
462 	    carp_lsdrop(ifp, m, AF_INET6, ip6->ip6_src.s6_addr32,
463 	    ip6->ip6_dst.s6_addr32, 1)) {
464 		if_put(ifp);
465 		goto freeit;
466 	}
467 
468 	if_put(ifp);
469 #endif
470 	icmp6stat_inc(icp6s_inhist + icmp6->icmp6_type);
471 
472 	switch (icmp6->icmp6_type) {
473 	case ICMP6_DST_UNREACH:
474 		switch (code) {
475 		case ICMP6_DST_UNREACH_NOROUTE:
476 			code = PRC_UNREACH_NET;
477 			break;
478 		case ICMP6_DST_UNREACH_ADMIN:
479 			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
480 			break;
481 		case ICMP6_DST_UNREACH_ADDR:
482 			code = PRC_HOSTDEAD;
483 			break;
484 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
485 			/* I mean "source address was incorrect." */
486 			code = PRC_PARAMPROB;
487 			break;
488 		case ICMP6_DST_UNREACH_NOPORT:
489 			code = PRC_UNREACH_PORT;
490 			break;
491 		default:
492 			goto badcode;
493 		}
494 		goto deliver;
495 
496 	case ICMP6_PACKET_TOO_BIG:
497 		/* MTU is checked in icmp6_mtudisc_update. */
498 		code = PRC_MSGSIZE;
499 
500 		/*
501 		 * Updating the path MTU will be done after examining
502 		 * intermediate extension headers.
503 		 */
504 		goto deliver;
505 
506 	case ICMP6_TIME_EXCEEDED:
507 		switch (code) {
508 		case ICMP6_TIME_EXCEED_TRANSIT:
509 			code = PRC_TIMXCEED_INTRANS;
510 			break;
511 		case ICMP6_TIME_EXCEED_REASSEMBLY:
512 			code = PRC_TIMXCEED_REASS;
513 			break;
514 		default:
515 			goto badcode;
516 		}
517 		goto deliver;
518 
519 	case ICMP6_PARAM_PROB:
520 		switch (code) {
521 		case ICMP6_PARAMPROB_NEXTHEADER:
522 			code = PRC_UNREACH_PROTOCOL;
523 			break;
524 		case ICMP6_PARAMPROB_HEADER:
525 		case ICMP6_PARAMPROB_OPTION:
526 			code = PRC_PARAMPROB;
527 			break;
528 		default:
529 			goto badcode;
530 		}
531 		goto deliver;
532 
533 	case ICMP6_ECHO_REQUEST:
534 		if (code != 0)
535 			goto badcode;
536 		/*
537 		 * Copy mbuf to send to two data paths: userland socket(s),
538 		 * and to the querier (echo reply).
539 		 * m: a copy for socket, n: a copy for querier
540 		 */
541 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
542 			/* Give up local */
543 			n = m;
544 			m = *mp = NULL;
545 			goto deliverecho;
546 		}
547 		/*
548 		 * If the first mbuf is shared, or the first mbuf is too short,
549 		 * copy the first part of the data into a fresh mbuf.
550 		 * Otherwise, we will wrongly overwrite both copies.
551 		 */
552 		if ((n->m_flags & M_EXT) != 0 ||
553 		    n->m_len < off + sizeof(struct icmp6_hdr)) {
554 			struct mbuf *n0 = n;
555 			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
556 
557 			/*
558 			 * Prepare an internal mbuf.  m_pullup() doesn't
559 			 * always copy the length we specified.
560 			 */
561 			if (maxlen >= MCLBYTES) {
562 				/* Give up remote */
563 				m_freem(n0);
564 				break;
565 			}
566 			MGETHDR(n, M_DONTWAIT, n0->m_type);
567 			if (n && maxlen >= MHLEN) {
568 				MCLGET(n, M_DONTWAIT);
569 				if ((n->m_flags & M_EXT) == 0) {
570 					m_free(n);
571 					n = NULL;
572 				}
573 			}
574 			if (n == NULL) {
575 				/* Give up local */
576 				m_freem(n0);
577 				n = m;
578 				m = *mp = NULL;
579 				goto deliverecho;
580 			}
581 			M_MOVE_PKTHDR(n, n0);
582 			/*
583 			 * Copy IPv6 and ICMPv6 only.
584 			 */
585 			nip6 = mtod(n, struct ip6_hdr *);
586 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
587 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
588 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
589 			noff = sizeof(struct ip6_hdr);
590 			n->m_len = noff + sizeof(struct icmp6_hdr);
591 			/*
592 			 * Adjust mbuf.  ip6_plen will be adjusted in
593 			 * ip6_output().
594 			 * n->m_pkthdr.len == n0->m_pkthdr.len at this point.
595 			 */
596 			n->m_pkthdr.len += noff + sizeof(struct icmp6_hdr);
597 			n->m_pkthdr.len -= (off + sizeof(struct icmp6_hdr));
598 			m_adj(n0, off + sizeof(struct icmp6_hdr));
599 			n->m_next = n0;
600 		} else {
601 	 deliverecho:
602 			IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
603 			    sizeof(*nicmp6));
604 			noff = off;
605 		}
606 		if (n) {
607 			nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
608 			nicmp6->icmp6_code = 0;
609 			icmp6stat_inc(icp6s_reflect);
610 			icmp6stat_inc(icp6s_outhist + ICMP6_ECHO_REPLY);
611 			if (!icmp6_reflect(&n, noff, NULL))
612 				ip6_send(n);
613 		}
614 		if (!m)
615 			goto freeit;
616 		break;
617 
618 	case ICMP6_ECHO_REPLY:
619 		if (code != 0)
620 			goto badcode;
621 		break;
622 
623 	case MLD_LISTENER_QUERY:
624 	case MLD_LISTENER_REPORT:
625 		if (icmp6len < sizeof(struct mld_hdr))
626 			goto badlen;
627 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
628 			/* give up local */
629 			mld6_input(m, off);
630 			m = NULL;
631 			goto freeit;
632 		}
633 		mld6_input(n, off);
634 		/* m stays. */
635 		break;
636 
637 	case MLD_LISTENER_DONE:
638 		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
639 			goto badlen;
640 		break;		/* nothing to be done in kernel */
641 
642 	case MLD_MTRACE_RESP:
643 	case MLD_MTRACE:
644 		/* XXX: these two are experimental.  not officially defined. */
645 		/* XXX: per-interface statistics? */
646 		break;		/* just pass it to applications */
647 
648 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
649 		/* IPv6 Node Information Queries are not supported */
650 		break;
651 	case ICMP6_WRUREPLY:
652 		break;
653 
654 	case ND_ROUTER_SOLICIT:
655 	case ND_ROUTER_ADVERT:
656 		if (code != 0)
657 			goto badcode;
658 		if ((icmp6->icmp6_type == ND_ROUTER_SOLICIT && icmp6len <
659 		    sizeof(struct nd_router_solicit)) ||
660 		    (icmp6->icmp6_type == ND_ROUTER_ADVERT && icmp6len <
661 		    sizeof(struct nd_router_advert)))
662 			goto badlen;
663 
664 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
665 			/* give up local */
666 			nd6_rtr_cache(m, off, icmp6len,
667 			    icmp6->icmp6_type);
668 			m = NULL;
669 			goto freeit;
670 		}
671 		nd6_rtr_cache(n, off, icmp6len, icmp6->icmp6_type);
672 		/* m stays. */
673 		break;
674 
675 	case ND_NEIGHBOR_SOLICIT:
676 		if (code != 0)
677 			goto badcode;
678 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
679 			goto badlen;
680 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
681 			/* give up local */
682 			nd6_ns_input(m, off, icmp6len);
683 			m = NULL;
684 			goto freeit;
685 		}
686 		nd6_ns_input(n, off, icmp6len);
687 		/* m stays. */
688 		break;
689 
690 	case ND_NEIGHBOR_ADVERT:
691 		if (code != 0)
692 			goto badcode;
693 		if (icmp6len < sizeof(struct nd_neighbor_advert))
694 			goto badlen;
695 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
696 			/* give up local */
697 			nd6_na_input(m, off, icmp6len);
698 			m = NULL;
699 			goto freeit;
700 		}
701 		nd6_na_input(n, off, icmp6len);
702 		/* m stays. */
703 		break;
704 
705 	case ND_REDIRECT:
706 		if (code != 0)
707 			goto badcode;
708 		if (icmp6len < sizeof(struct nd_redirect))
709 			goto badlen;
710 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
711 			/* give up local */
712 			icmp6_redirect_input(m, off);
713 			m = NULL;
714 			goto freeit;
715 		}
716 		icmp6_redirect_input(n, off);
717 		/* m stays. */
718 		break;
719 
720 	case ICMP6_ROUTER_RENUMBERING:
721 		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
722 		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
723 			goto badcode;
724 		if (icmp6len < sizeof(struct icmp6_router_renum))
725 			goto badlen;
726 		break;
727 
728 	default:
729 		nd6log((LOG_DEBUG,
730 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%u)\n",
731 		    icmp6->icmp6_type,
732 		    inet_ntop(AF_INET6, &ip6->ip6_src, src, sizeof(src)),
733 		    inet_ntop(AF_INET6, &ip6->ip6_dst, dst, sizeof(dst)),
734 		    m->m_pkthdr.ph_ifidx));
735 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
736 			/* ICMPv6 error: MUST deliver it by spec... */
737 			code = PRC_NCMDS;
738 			/* deliver */
739 		} else {
740 			/* ICMPv6 informational: MUST not deliver */
741 			break;
742 		}
743 deliver:
744 		if (icmp6_notify_error(m, off, icmp6len, code)) {
745 			/* In this case, m should've been freed. */
746 			return (IPPROTO_DONE);
747 		}
748 		break;
749 
750 badcode:
751 		icmp6stat_inc(icp6s_badcode);
752 		break;
753 
754 badlen:
755 		icmp6stat_inc(icp6s_badlen);
756 		break;
757 	}
758 
759 #if NPF > 0
760 raw:
761 #endif
762 	/* deliver the packet to appropriate sockets */
763 	return rip6_input(mp, offp, proto, af);
764 
765  freeit:
766 	m_freem(m);
767 	return IPPROTO_DONE;
768 }
769 
770 int
771 icmp6_notify_error(struct mbuf *m, int off, int icmp6len, int code)
772 {
773 	struct icmp6_hdr *icmp6;
774 	struct ip6_hdr *eip6;
775 	u_int32_t notifymtu;
776 	struct sockaddr_in6 icmp6src, icmp6dst;
777 
778 	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
779 		icmp6stat_inc(icp6s_tooshort);
780 		goto freeit;
781 	}
782 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
783 		       sizeof(*icmp6) + sizeof(struct ip6_hdr));
784 	if (icmp6 == NULL) {
785 		icmp6stat_inc(icp6s_tooshort);
786 		return (-1);
787 	}
788 	eip6 = (struct ip6_hdr *)(icmp6 + 1);
789 
790 	/* Detect the upper level protocol */
791 	{
792 		void (*ctlfunc)(int, struct sockaddr *, u_int, void *);
793 		u_int8_t nxt = eip6->ip6_nxt;
794 		int eoff = off + sizeof(struct icmp6_hdr) +
795 			sizeof(struct ip6_hdr);
796 		struct ip6ctlparam ip6cp;
797 		struct in6_addr *finaldst = NULL;
798 		int icmp6type = icmp6->icmp6_type;
799 		struct ip6_frag *fh;
800 		struct ip6_rthdr *rth;
801 		struct ip6_rthdr0 *rth0;
802 		int rthlen;
803 
804 		while (1) { /* XXX: should avoid infinite loop explicitly? */
805 			struct ip6_ext *eh;
806 
807 			switch (nxt) {
808 			case IPPROTO_HOPOPTS:
809 			case IPPROTO_DSTOPTS:
810 			case IPPROTO_AH:
811 				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
812 					       eoff, sizeof(*eh));
813 				if (eh == NULL) {
814 					icmp6stat_inc(icp6s_tooshort);
815 					return (-1);
816 				}
817 
818 				if (nxt == IPPROTO_AH)
819 					eoff += (eh->ip6e_len + 2) << 2;
820 				else
821 					eoff += (eh->ip6e_len + 1) << 3;
822 				nxt = eh->ip6e_nxt;
823 				break;
824 			case IPPROTO_ROUTING:
825 				/*
826 				 * When the erroneous packet contains a
827 				 * routing header, we should examine the
828 				 * header to determine the final destination.
829 				 * Otherwise, we can't properly update
830 				 * information that depends on the final
831 				 * destination (e.g. path MTU).
832 				 */
833 				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
834 					       eoff, sizeof(*rth));
835 				if (rth == NULL) {
836 					icmp6stat_inc(icp6s_tooshort);
837 					return (-1);
838 				}
839 				rthlen = (rth->ip6r_len + 1) << 3;
840 				/*
841 				 * XXX: currently there is no
842 				 * officially defined type other
843 				 * than type-0.
844 				 * Note that if the segment left field
845 				 * is 0, all intermediate hops must
846 				 * have been passed.
847 				 */
848 				if (rth->ip6r_segleft &&
849 				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
850 					int hops;
851 
852 					IP6_EXTHDR_GET(rth0,
853 						       struct ip6_rthdr0 *, m,
854 						       eoff, rthlen);
855 					if (rth0 == NULL) {
856 						icmp6stat_inc(icp6s_tooshort);
857 						return (-1);
858 					}
859 					/* just ignore a bogus header */
860 					if ((rth0->ip6r0_len % 2) == 0 &&
861 					    (hops = rth0->ip6r0_len/2))
862 						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
863 				}
864 				eoff += rthlen;
865 				nxt = rth->ip6r_nxt;
866 				break;
867 			case IPPROTO_FRAGMENT:
868 				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
869 					       eoff, sizeof(*fh));
870 				if (fh == NULL) {
871 					icmp6stat_inc(icp6s_tooshort);
872 					return (-1);
873 				}
874 				/*
875 				 * Data after a fragment header is meaningless
876 				 * unless it is the first fragment, but
877 				 * we'll go to the notify label for path MTU
878 				 * discovery.
879 				 */
880 				if (fh->ip6f_offlg & IP6F_OFF_MASK)
881 					goto notify;
882 
883 				eoff += sizeof(struct ip6_frag);
884 				nxt = fh->ip6f_nxt;
885 				break;
886 			default:
887 				/*
888 				 * This case includes ESP and the No Next
889 				 * Header.  In such cases going to the notify
890 				 * label does not have any meaning
891 				 * (i.e. ctlfunc will be NULL), but we go
892 				 * anyway since we might have to update
893 				 * path MTU information.
894 				 */
895 				goto notify;
896 			}
897 		}
898 	  notify:
899 		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
900 			       sizeof(*icmp6) + sizeof(struct ip6_hdr));
901 		if (icmp6 == NULL) {
902 			icmp6stat_inc(icp6s_tooshort);
903 			return (-1);
904 		}
905 
906 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
907 		bzero(&icmp6dst, sizeof(icmp6dst));
908 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
909 		icmp6dst.sin6_family = AF_INET6;
910 		if (finaldst == NULL)
911 			icmp6dst.sin6_addr = eip6->ip6_dst;
912 		else
913 			icmp6dst.sin6_addr = *finaldst;
914 		icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.ph_ifidx,
915 		    &icmp6dst.sin6_addr);
916 		if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst, NULL)) {
917 			/* should be impossbile */
918 			nd6log((LOG_DEBUG,
919 			    "icmp6_notify_error: in6_embedscope failed\n"));
920 			goto freeit;
921 		}
922 
923 		/*
924 		 * retrieve parameters from the inner IPv6 header, and convert
925 		 * them into sockaddr structures.
926 		 */
927 		bzero(&icmp6src, sizeof(icmp6src));
928 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
929 		icmp6src.sin6_family = AF_INET6;
930 		icmp6src.sin6_addr = eip6->ip6_src;
931 		icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.ph_ifidx,
932 		    &icmp6src.sin6_addr);
933 		if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src, NULL)) {
934 			/* should be impossbile */
935 			nd6log((LOG_DEBUG,
936 			    "icmp6_notify_error: in6_embedscope failed\n"));
937 			goto freeit;
938 		}
939 		icmp6src.sin6_flowinfo =
940 		    (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
941 
942 		if (finaldst == NULL)
943 			finaldst = &eip6->ip6_dst;
944 		ip6cp.ip6c_m = m;
945 		ip6cp.ip6c_icmp6 = icmp6;
946 		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
947 		ip6cp.ip6c_off = eoff;
948 		ip6cp.ip6c_finaldst = finaldst;
949 		ip6cp.ip6c_src = &icmp6src;
950 		ip6cp.ip6c_nxt = nxt;
951 #if NPF > 0
952 		pf_pkt_addr_changed(m);
953 #endif
954 
955 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
956 			notifymtu = ntohl(icmp6->icmp6_mtu);
957 			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
958 		}
959 
960 		ctlfunc = inet6sw[ip6_protox[nxt]].pr_ctlinput;
961 		if (ctlfunc)
962 			(*ctlfunc)(code, sin6tosa(&icmp6dst),
963 			    m->m_pkthdr.ph_rtableid, &ip6cp);
964 	}
965 	return (0);
966 
967   freeit:
968 	m_freem(m);
969 	return (-1);
970 }
971 
972 void
973 icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
974 {
975 	unsigned long rtcount;
976 	struct icmp6_mtudisc_callback *mc;
977 	struct in6_addr *dst = ip6cp->ip6c_finaldst;
978 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
979 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
980 	u_int mtu = ntohl(icmp6->icmp6_mtu);
981 	struct rtentry *rt = NULL;
982 	struct sockaddr_in6 sin6;
983 
984 	if (mtu < IPV6_MMTU)
985 		return;
986 
987 	/*
988 	 * allow non-validated cases if memory is plenty, to make traffic
989 	 * from non-connected pcb happy.
990 	 */
991 	rtcount = rt_timer_queue_count(icmp6_mtudisc_timeout_q);
992 	if (validated) {
993 		if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat)
994 			return;
995 		else if (0 <= icmp6_mtudisc_lowat &&
996 		    rtcount > icmp6_mtudisc_lowat) {
997 			/*
998 			 * XXX nuke a victim, install the new one.
999 			 */
1000 		}
1001 	} else {
1002 		if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat)
1003 			return;
1004 	}
1005 
1006 	bzero(&sin6, sizeof(sin6));
1007 	sin6.sin6_family = PF_INET6;
1008 	sin6.sin6_len = sizeof(struct sockaddr_in6);
1009 	sin6.sin6_addr = *dst;
1010 	/* XXX normally, this won't happen */
1011 	if (IN6_IS_ADDR_LINKLOCAL(dst)) {
1012 		sin6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.ph_ifidx);
1013 	}
1014 	sin6.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.ph_ifidx,
1015 	    &sin6.sin6_addr);
1016 
1017 	rt = icmp6_mtudisc_clone(&sin6, m->m_pkthdr.ph_rtableid, 0);
1018 
1019 	if (rt != NULL && ISSET(rt->rt_flags, RTF_HOST) &&
1020 	    !(rt->rt_locks & RTV_MTU) &&
1021 	    (rt->rt_mtu > mtu || rt->rt_mtu == 0)) {
1022 		struct ifnet *ifp;
1023 
1024 		ifp = if_get(rt->rt_ifidx);
1025 		if (ifp != NULL && mtu < ifp->if_mtu) {
1026 			icmp6stat_inc(icp6s_pmtuchg);
1027 			rt->rt_mtu = mtu;
1028 		}
1029 		if_put(ifp);
1030 	}
1031 	rtfree(rt);
1032 
1033 	/*
1034 	 * Notify protocols that the MTU for this destination
1035 	 * has changed.
1036 	 */
1037 	LIST_FOREACH(mc, &icmp6_mtudisc_callbacks, mc_list)
1038 		(*mc->mc_func)(&sin6, m->m_pkthdr.ph_rtableid);
1039 }
1040 
1041 /*
1042  * Reflect the ip6 packet back to the source.
1043  * OFF points to the icmp6 header, counted from the top of the mbuf.
1044  */
1045 int
1046 icmp6_reflect(struct mbuf **mp, size_t off, struct sockaddr *sa)
1047 {
1048 	struct mbuf *m = *mp;
1049 	struct rtentry *rt = NULL;
1050 	struct ip6_hdr *ip6;
1051 	struct icmp6_hdr *icmp6;
1052 	struct in6_addr t, *src = NULL;
1053 	struct sockaddr_in6 sa6_src, sa6_dst;
1054 	u_int rtableid;
1055 	u_int8_t pfflags;
1056 
1057 	CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN);
1058 
1059 	/* too short to reflect */
1060 	if (off < sizeof(struct ip6_hdr)) {
1061 		nd6log((LOG_DEBUG,
1062 		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
1063 		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
1064 		    __FILE__, __LINE__));
1065 		goto bad;
1066 	}
1067 
1068 	if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) {
1069 		m_freemp(mp);
1070 		return (ELOOP);
1071 	}
1072 	rtableid = m->m_pkthdr.ph_rtableid;
1073 	pfflags = m->m_pkthdr.pf.flags;
1074 	m_resethdr(m);
1075 	m->m_pkthdr.ph_rtableid = rtableid;
1076 	m->m_pkthdr.pf.flags = pfflags & PF_TAG_GENERATED;
1077 
1078 	/*
1079 	 * If there are extra headers between IPv6 and ICMPv6, strip
1080 	 * off that header first.
1081 	 */
1082 	if (off > sizeof(struct ip6_hdr)) {
1083 		size_t l;
1084 		struct ip6_hdr nip6;
1085 
1086 		l = off - sizeof(struct ip6_hdr);
1087 		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
1088 		m_adj(m, l);
1089 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
1090 		if (m->m_len < l) {
1091 			if ((m = *mp = m_pullup(m, l)) == NULL)
1092 				return (EMSGSIZE);
1093 		}
1094 		memcpy(mtod(m, caddr_t), &nip6, sizeof(nip6));
1095 	} else /* off == sizeof(struct ip6_hdr) */ {
1096 		size_t l;
1097 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
1098 		if (m->m_len < l) {
1099 			if ((m = *mp = m_pullup(m, l)) == NULL)
1100 				return (EMSGSIZE);
1101 		}
1102 	}
1103 	ip6 = mtod(m, struct ip6_hdr *);
1104 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1105 	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
1106 
1107 	t = ip6->ip6_dst;
1108 	/*
1109 	 * ip6_input() drops a packet if its src is multicast.
1110 	 * So, the src is never multicast.
1111 	 */
1112 	ip6->ip6_dst = ip6->ip6_src;
1113 
1114 	/*
1115 	 * XXX: make sure to embed scope zone information, using
1116 	 * already embedded IDs or the received interface (if any).
1117 	 * Note that rcvif may be NULL.
1118 	 * TODO: scoped routing case (XXX).
1119 	 */
1120 	bzero(&sa6_src, sizeof(sa6_src));
1121 	sa6_src.sin6_family = AF_INET6;
1122 	sa6_src.sin6_len = sizeof(sa6_src);
1123 	sa6_src.sin6_addr = ip6->ip6_dst;
1124 	bzero(&sa6_dst, sizeof(sa6_dst));
1125 	sa6_dst.sin6_family = AF_INET6;
1126 	sa6_dst.sin6_len = sizeof(sa6_dst);
1127 	sa6_dst.sin6_addr = t;
1128 
1129 	if (sa == NULL) {
1130 		/*
1131 		 * If the incoming packet was addressed directly to us (i.e.
1132 		 * unicast), use dst as the src for the reply. The
1133 		 * IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED case would be VERY rare,
1134 		 * but is possible (for example) when we encounter an error
1135 		 * while forwarding procedure destined to a duplicated address
1136 		 * of ours.
1137 		 */
1138 		rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid);
1139 		if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
1140 		    !ISSET(ifatoia6(rt->rt_ifa)->ia6_flags,
1141 		    IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) {
1142 			src = &t;
1143 		}
1144 		rtfree(rt);
1145 		rt = NULL;
1146 		sa = sin6tosa(&sa6_src);
1147 	}
1148 
1149 	if (src == NULL) {
1150 		struct in6_ifaddr *ia6;
1151 
1152 		/*
1153 		 * This case matches to multicasts, our anycast, or unicasts
1154 		 * that we do not own.  Select a source address based on the
1155 		 * source address of the erroneous packet.
1156 		 */
1157 		rt = rtalloc(sa, RT_RESOLVE, rtableid);
1158 		if (!rtisvalid(rt)) {
1159 			char addr[INET6_ADDRSTRLEN];
1160 
1161 			nd6log((LOG_DEBUG,
1162 			    "%s: source can't be determined: dst=%s\n",
1163 			    __func__, inet_ntop(AF_INET6, &sa6_src.sin6_addr,
1164 			    addr, sizeof(addr))));
1165 			rtfree(rt);
1166 			goto bad;
1167 		}
1168 		ia6 = in6_ifawithscope(rt->rt_ifa->ifa_ifp, &t, rtableid);
1169 		if (ia6 != NULL)
1170 			src = &ia6->ia_addr.sin6_addr;
1171 		if (src == NULL)
1172 			src = &ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
1173 	}
1174 
1175 	ip6->ip6_src = *src;
1176 	rtfree(rt);
1177 
1178 	ip6->ip6_flow = 0;
1179 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1180 	ip6->ip6_vfc |= IPV6_VERSION;
1181 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1182 	ip6->ip6_hlim = ip6_defhlim;
1183 
1184 	icmp6->icmp6_cksum = 0;
1185 	m->m_pkthdr.csum_flags = M_ICMP_CSUM_OUT;
1186 
1187 	/*
1188 	 * XXX option handling
1189 	 */
1190 
1191 	m->m_flags &= ~(M_BCAST|M_MCAST);
1192 	return (0);
1193 
1194  bad:
1195 	m_freemp(mp);
1196 	return (EHOSTUNREACH);
1197 }
1198 
1199 void
1200 icmp6_fasttimo(void)
1201 {
1202 
1203 	mld6_fasttimeo();
1204 }
1205 
1206 const char *
1207 icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
1208     struct in6_addr *tgt6)
1209 {
1210 	static char buf[1024]; /* XXX */
1211 	char src[INET6_ADDRSTRLEN];
1212 	char dst[INET6_ADDRSTRLEN];
1213 	char tgt[INET6_ADDRSTRLEN];
1214 
1215 	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
1216 		 inet_ntop(AF_INET6, src6, src, sizeof(src)),
1217 		 inet_ntop(AF_INET6, dst6, dst, sizeof(dst)),
1218 		 inet_ntop(AF_INET6, tgt6, tgt, sizeof(tgt)));
1219 	return buf;
1220 }
1221 
1222 void
1223 icmp6_redirect_input(struct mbuf *m, int off)
1224 {
1225 	struct ifnet *ifp;
1226 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1227 	struct nd_redirect *nd_rd;
1228 	int icmp6len = ntohs(ip6->ip6_plen);
1229 	char *lladdr = NULL;
1230 	int lladdrlen = 0;
1231 	struct rtentry *rt = NULL;
1232 	int is_router;
1233 	int is_onlink;
1234 	struct in6_addr src6 = ip6->ip6_src;
1235 	struct in6_addr redtgt6;
1236 	struct in6_addr reddst6;
1237 	union nd_opts ndopts;
1238 	char addr[INET6_ADDRSTRLEN];
1239 
1240 	ifp = if_get(m->m_pkthdr.ph_ifidx);
1241 	if (ifp == NULL)
1242 		return;
1243 
1244 	/* XXX if we are router, we don't update route by icmp6 redirect */
1245 	if (ip6_forwarding)
1246 		goto freeit;
1247 	if (!(ifp->if_xflags & IFXF_AUTOCONF6))
1248 		goto freeit;
1249 
1250 	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
1251 	if (nd_rd == NULL) {
1252 		icmp6stat_inc(icp6s_tooshort);
1253 		if_put(ifp);
1254 		return;
1255 	}
1256 	redtgt6 = nd_rd->nd_rd_target;
1257 	reddst6 = nd_rd->nd_rd_dst;
1258 
1259 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
1260 		redtgt6.s6_addr16[1] = htons(ifp->if_index);
1261 	if (IN6_IS_ADDR_LINKLOCAL(&reddst6))
1262 		reddst6.s6_addr16[1] = htons(ifp->if_index);
1263 
1264 	/* validation */
1265 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
1266 		nd6log((LOG_ERR,
1267 			"ICMP6 redirect sent from %s rejected; "
1268 			"must be from linklocal\n",
1269 			inet_ntop(AF_INET6, &src6, addr, sizeof(addr))));
1270 		goto bad;
1271 	}
1272 	if (ip6->ip6_hlim != 255) {
1273 		nd6log((LOG_ERR,
1274 			"ICMP6 redirect sent from %s rejected; "
1275 			"hlim=%d (must be 255)\n",
1276 			inet_ntop(AF_INET6, &src6, addr, sizeof(addr)),
1277 			ip6->ip6_hlim));
1278 		goto bad;
1279 	}
1280 	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
1281 		nd6log((LOG_ERR,
1282 			"ICMP6 redirect rejected; "
1283 			"redirect dst must be unicast: %s\n",
1284 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1285 		goto bad;
1286 	}
1287     {
1288 	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
1289 	struct sockaddr_in6 sin6;
1290 	struct in6_addr *gw6;
1291 
1292 	bzero(&sin6, sizeof(sin6));
1293 	sin6.sin6_family = AF_INET6;
1294 	sin6.sin6_len = sizeof(struct sockaddr_in6);
1295 	memcpy(&sin6.sin6_addr, &reddst6, sizeof(reddst6));
1296 	rt = rtalloc(sin6tosa(&sin6), 0, m->m_pkthdr.ph_rtableid);
1297 	if (rt) {
1298 		if (rt->rt_gateway == NULL ||
1299 		    rt->rt_gateway->sa_family != AF_INET6) {
1300 			nd6log((LOG_ERR,
1301 			    "ICMP6 redirect rejected; no route "
1302 			    "with inet6 gateway found for redirect dst: %s\n",
1303 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1304 			rtfree(rt);
1305 			goto bad;
1306 		}
1307 
1308 		gw6 = &(satosin6(rt->rt_gateway)->sin6_addr);
1309 		if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
1310 			nd6log((LOG_ERR,
1311 				"ICMP6 redirect rejected; "
1312 				"not equal to gw-for-src=%s (must be same): "
1313 				"%s\n",
1314 				inet_ntop(AF_INET6, gw6, addr, sizeof(addr)),
1315 				icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1316 			rtfree(rt);
1317 			goto bad;
1318 		}
1319 	} else {
1320 		nd6log((LOG_ERR,
1321 			"ICMP6 redirect rejected; "
1322 			"no route found for redirect dst: %s\n",
1323 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1324 		goto bad;
1325 	}
1326 	rtfree(rt);
1327 	rt = NULL;
1328     }
1329 
1330 	is_router = is_onlink = 0;
1331 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
1332 		is_router = 1;	/* router case */
1333 	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
1334 		is_onlink = 1;	/* on-link destination case */
1335 	if (!is_router && !is_onlink) {
1336 		nd6log((LOG_ERR,
1337 			"ICMP6 redirect rejected; "
1338 			"neither router case nor onlink case: %s\n",
1339 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1340 		goto bad;
1341 	}
1342 	/* validation passed */
1343 
1344 	icmp6len -= sizeof(*nd_rd);
1345 	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
1346 	if (nd6_options(&ndopts) < 0) {
1347 		nd6log((LOG_INFO, "icmp6_redirect_input: "
1348 			"invalid ND option, rejected: %s\n",
1349 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1350 		/* nd6_options have incremented stats */
1351 		goto freeit;
1352 	}
1353 
1354 	if (ndopts.nd_opts_tgt_lladdr) {
1355 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
1356 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
1357 	}
1358 
1359 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
1360 		nd6log((LOG_INFO,
1361 			"icmp6_redirect_input: lladdrlen mismatch for %s "
1362 			"(if %d, icmp6 packet %d): %s\n",
1363 			inet_ntop(AF_INET6, &redtgt6, addr, sizeof(addr)),
1364 			ifp->if_addrlen, lladdrlen - 2,
1365 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1366 		goto bad;
1367 	}
1368 
1369 	/* RFC 2461 8.3 */
1370 	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
1371 			 is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
1372 
1373 	if (!is_onlink) {	/* better router case.  perform rtredirect. */
1374 		/* perform rtredirect */
1375 		struct sockaddr_in6 sdst;
1376 		struct sockaddr_in6 sgw;
1377 		struct sockaddr_in6 ssrc;
1378 		unsigned long rtcount;
1379 		struct rtentry *newrt = NULL;
1380 
1381 		/*
1382 		 * do not install redirect route, if the number of entries
1383 		 * is too much (> hiwat).  note that, the node (= host) will
1384 		 * work just fine even if we do not install redirect route
1385 		 * (there will be additional hops, though).
1386 		 */
1387 		rtcount = rt_timer_queue_count(icmp6_redirect_timeout_q);
1388 		if (0 <= ip6_maxdynroutes && rtcount >= ip6_maxdynroutes)
1389 			goto freeit;
1390 		else if (0 <= icmp6_redirect_lowat &&
1391 		    rtcount > icmp6_redirect_lowat) {
1392 			/*
1393 			 * XXX nuke a victim, install the new one.
1394 			 */
1395 		}
1396 
1397 		bzero(&sdst, sizeof(sdst));
1398 		bzero(&sgw, sizeof(sgw));
1399 		bzero(&ssrc, sizeof(ssrc));
1400 		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
1401 		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
1402 			sizeof(struct sockaddr_in6);
1403 		memcpy(&sgw.sin6_addr, &redtgt6, sizeof(struct in6_addr));
1404 		memcpy(&sdst.sin6_addr, &reddst6, sizeof(struct in6_addr));
1405 		memcpy(&ssrc.sin6_addr, &src6, sizeof(struct in6_addr));
1406 		rtredirect(sin6tosa(&sdst), sin6tosa(&sgw), sin6tosa(&ssrc),
1407 		    &newrt, m->m_pkthdr.ph_rtableid);
1408 
1409 		if (newrt) {
1410 			(void)rt_timer_add(newrt, icmp6_redirect_timeout,
1411 			    icmp6_redirect_timeout_q, m->m_pkthdr.ph_rtableid);
1412 			rtfree(newrt);
1413 		}
1414 	}
1415 	/* finally update cached route in each socket via pfctlinput */
1416 	{
1417 		struct sockaddr_in6 sdst;
1418 
1419 		bzero(&sdst, sizeof(sdst));
1420 		sdst.sin6_family = AF_INET6;
1421 		sdst.sin6_len = sizeof(struct sockaddr_in6);
1422 		memcpy(&sdst.sin6_addr, &reddst6, sizeof(struct in6_addr));
1423 		pfctlinput(PRC_REDIRECT_HOST, sin6tosa(&sdst));
1424 	}
1425 
1426  freeit:
1427 	if_put(ifp);
1428 	m_freem(m);
1429 	return;
1430 
1431  bad:
1432 	if_put(ifp);
1433 	icmp6stat_inc(icp6s_badredirect);
1434 	m_freem(m);
1435 }
1436 
1437 void
1438 icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
1439 {
1440 	struct ifnet *ifp = NULL;
1441 	struct in6_addr *ifp_ll6;
1442 	struct in6_addr *nexthop;
1443 	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
1444 	struct mbuf *m = NULL;	/* newly allocated one */
1445 	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
1446 	struct nd_redirect *nd_rd;
1447 	size_t maxlen;
1448 	u_char *p;
1449 	struct sockaddr_in6 src_sa;
1450 
1451 	icmp6_errcount(ND_REDIRECT, 0);
1452 
1453 	/* if we are not router, we don't send icmp6 redirect */
1454 	if (!ip6_forwarding)
1455 		goto fail;
1456 
1457 	/* sanity check */
1458 	if (m0 == NULL || !rtisvalid(rt))
1459 		goto fail;
1460 
1461 	ifp = if_get(rt->rt_ifidx);
1462 	if (ifp == NULL)
1463 		goto fail;
1464 
1465 	/*
1466 	 * Address check:
1467 	 *  the source address must identify a neighbor, and
1468 	 *  the destination address must not be a multicast address
1469 	 *  [RFC 2461, sec 8.2]
1470 	 */
1471 	sip6 = mtod(m0, struct ip6_hdr *);
1472 	bzero(&src_sa, sizeof(src_sa));
1473 	src_sa.sin6_family = AF_INET6;
1474 	src_sa.sin6_len = sizeof(src_sa);
1475 	src_sa.sin6_addr = sip6->ip6_src;
1476 	/* we don't currently use sin6_scope_id, but eventually use it */
1477 	src_sa.sin6_scope_id = in6_addr2scopeid(ifp->if_index, &sip6->ip6_src);
1478 	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
1479 		goto fail;
1480 	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
1481 		goto fail;	/* what should we do here? */
1482 
1483 	/* rate limit */
1484 	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
1485 		goto fail;
1486 
1487 	/*
1488 	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
1489 	 * we almost always ask for an mbuf cluster for simplicity.
1490 	 * (MHLEN < IPV6_MMTU is almost always true)
1491 	 */
1492 #if IPV6_MMTU >= MCLBYTES
1493 # error assumption failed about IPV6_MMTU and MCLBYTES
1494 #endif
1495 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
1496 	if (m && IPV6_MMTU >= MHLEN)
1497 		MCLGET(m, M_DONTWAIT);
1498 	if (!m)
1499 		goto fail;
1500 	m->m_pkthdr.ph_ifidx = 0;
1501 	m->m_len = 0;
1502 	maxlen = m_trailingspace(m);
1503 	maxlen = min(IPV6_MMTU, maxlen);
1504 	/* just for safety */
1505 	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
1506 	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
1507 		goto fail;
1508 	}
1509 
1510 	{
1511 		/* get ip6 linklocal address for ifp(my outgoing interface). */
1512 		struct in6_ifaddr *ia6;
1513 		if ((ia6 = in6ifa_ifpforlinklocal(ifp, IN6_IFF_TENTATIVE|
1514 		    IN6_IFF_DUPLICATED|IN6_IFF_ANYCAST)) == NULL)
1515 			goto fail;
1516 		ifp_ll6 = &ia6->ia_addr.sin6_addr;
1517 	}
1518 
1519 	/* get ip6 linklocal address for the router. */
1520 	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
1521 		struct sockaddr_in6 *sin6;
1522 		sin6 = satosin6(rt->rt_gateway);
1523 		nexthop = &sin6->sin6_addr;
1524 		if (!IN6_IS_ADDR_LINKLOCAL(nexthop))
1525 			nexthop = NULL;
1526 	} else
1527 		nexthop = NULL;
1528 
1529 	/* ip6 */
1530 	ip6 = mtod(m, struct ip6_hdr *);
1531 	ip6->ip6_flow = 0;
1532 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1533 	ip6->ip6_vfc |= IPV6_VERSION;
1534 	/* ip6->ip6_plen will be set later */
1535 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1536 	ip6->ip6_hlim = 255;
1537 	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
1538 	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
1539 	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
1540 
1541 	/* ND Redirect */
1542 	nd_rd = (struct nd_redirect *)(ip6 + 1);
1543 	nd_rd->nd_rd_type = ND_REDIRECT;
1544 	nd_rd->nd_rd_code = 0;
1545 	nd_rd->nd_rd_reserved = 0;
1546 	if (rt->rt_flags & RTF_GATEWAY) {
1547 		/*
1548 		 * nd_rd->nd_rd_target must be a link-local address in
1549 		 * better router cases.
1550 		 */
1551 		if (!nexthop)
1552 			goto fail;
1553 		bcopy(nexthop, &nd_rd->nd_rd_target,
1554 		      sizeof(nd_rd->nd_rd_target));
1555 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
1556 		      sizeof(nd_rd->nd_rd_dst));
1557 	} else {
1558 		/* make sure redtgt == reddst */
1559 		nexthop = &sip6->ip6_dst;
1560 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
1561 		      sizeof(nd_rd->nd_rd_target));
1562 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
1563 		      sizeof(nd_rd->nd_rd_dst));
1564 	}
1565 
1566 	p = (u_char *)(nd_rd + 1);
1567 
1568 	{
1569 		/* target lladdr option */
1570 		struct rtentry *nrt;
1571 		int len;
1572 		struct sockaddr_dl *sdl;
1573 		struct nd_opt_hdr *nd_opt;
1574 		char *lladdr;
1575 
1576 		len = sizeof(*nd_opt) + ifp->if_addrlen;
1577 		len = (len + 7) & ~7;	/* round by 8 */
1578 		/* safety check */
1579 		if (len + (p - (u_char *)ip6) > maxlen)
1580 			goto nolladdropt;
1581 		nrt = nd6_lookup(nexthop, 0, ifp, ifp->if_rdomain);
1582 		if ((nrt != NULL) &&
1583 		    (nrt->rt_flags & (RTF_GATEWAY|RTF_LLINFO)) == RTF_LLINFO &&
1584 		    (nrt->rt_gateway->sa_family == AF_LINK) &&
1585 		    (sdl = satosdl(nrt->rt_gateway)) &&
1586 		    sdl->sdl_alen) {
1587 			nd_opt = (struct nd_opt_hdr *)p;
1588 			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1589 			nd_opt->nd_opt_len = len >> 3;
1590 			lladdr = (char *)(nd_opt + 1);
1591 			bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
1592 			p += len;
1593 		}
1594 		rtfree(nrt);
1595 	}
1596   nolladdropt:;
1597 
1598 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
1599 
1600 	/* just to be safe */
1601 	if (p - (u_char *)ip6 > maxlen)
1602 		goto noredhdropt;
1603 
1604 	{
1605 		/* redirected header option */
1606 		int len;
1607 		struct nd_opt_rd_hdr *nd_opt_rh;
1608 
1609 		/*
1610 		 * compute the maximum size for icmp6 redirect header option.
1611 		 * XXX room for auth header?
1612 		 */
1613 		len = maxlen - (p - (u_char *)ip6);
1614 		len &= ~7;
1615 
1616 		/*
1617 		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
1618 		 * about padding/truncate rule for the original IP packet.
1619 		 * From the discussion on IPv6imp in Feb 1999,
1620 		 * the consensus was:
1621 		 * - "attach as much as possible" is the goal
1622 		 * - pad if not aligned (original size can be guessed by
1623 		 *   original ip6 header)
1624 		 * Following code adds the padding if it is simple enough,
1625 		 * and truncates if not.
1626 		 */
1627 		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
1628 			/* not enough room, truncate */
1629 			m_adj(m0, (len - sizeof(*nd_opt_rh)) -
1630 			    m0->m_pkthdr.len);
1631 		} else {
1632 			/*
1633 			 * enough room, truncate if not aligned.
1634 			 * we don't pad here for simplicity.
1635 			 */
1636 			size_t extra;
1637 
1638 			extra = m0->m_pkthdr.len % 8;
1639 			if (extra) {
1640 				/* truncate */
1641 				m_adj(m0, -extra);
1642 			}
1643 			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
1644 		}
1645 
1646 		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
1647 		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
1648 		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
1649 		nd_opt_rh->nd_opt_rh_len = len >> 3;
1650 		p += sizeof(*nd_opt_rh);
1651 		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
1652 
1653 		/* connect m0 to m */
1654 		m->m_pkthdr.len += m0->m_pkthdr.len;
1655 		m_cat(m, m0);
1656 		m0 = NULL;
1657 	}
1658 noredhdropt:
1659 	m_freem(m0);
1660 	m0 = NULL;
1661 
1662 	sip6 = mtod(m, struct ip6_hdr *);
1663 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src))
1664 		sip6->ip6_src.s6_addr16[1] = 0;
1665 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst))
1666 		sip6->ip6_dst.s6_addr16[1] = 0;
1667 #if 0
1668 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
1669 		ip6->ip6_src.s6_addr16[1] = 0;
1670 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
1671 		ip6->ip6_dst.s6_addr16[1] = 0;
1672 #endif
1673 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target))
1674 		nd_rd->nd_rd_target.s6_addr16[1] = 0;
1675 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst))
1676 		nd_rd->nd_rd_dst.s6_addr16[1] = 0;
1677 
1678 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
1679 
1680 	nd_rd->nd_rd_cksum = 0;
1681 	m->m_pkthdr.csum_flags = M_ICMP_CSUM_OUT;
1682 
1683 	/* send the packet to outside... */
1684 	ip6_output(m, NULL, NULL, 0, NULL, NULL);
1685 
1686 	icmp6stat_inc(icp6s_outhist + ND_REDIRECT);
1687 
1688 	if_put(ifp);
1689 	return;
1690 
1691 fail:
1692 	if_put(ifp);
1693 	m_freem(m);
1694 	m_freem(m0);
1695 }
1696 
1697 /*
1698  * ICMPv6 socket option processing.
1699  */
1700 int
1701 icmp6_ctloutput(int op, struct socket *so, int level, int optname,
1702     struct mbuf *m)
1703 {
1704 	int error = 0;
1705 	struct inpcb *in6p = sotoinpcb(so);
1706 
1707 	if (level != IPPROTO_ICMPV6)
1708 		return EINVAL;
1709 
1710 	switch (op) {
1711 	case PRCO_SETOPT:
1712 		switch (optname) {
1713 		case ICMP6_FILTER:
1714 		    {
1715 			struct icmp6_filter *p;
1716 
1717 			if (m == NULL || m->m_len != sizeof(*p)) {
1718 				error = EMSGSIZE;
1719 				break;
1720 			}
1721 			p = mtod(m, struct icmp6_filter *);
1722 			if (!p || !in6p->inp_icmp6filt) {
1723 				error = EINVAL;
1724 				break;
1725 			}
1726 			bcopy(p, in6p->inp_icmp6filt,
1727 				sizeof(struct icmp6_filter));
1728 			error = 0;
1729 			break;
1730 		    }
1731 
1732 		default:
1733 			error = ENOPROTOOPT;
1734 			break;
1735 		}
1736 		break;
1737 
1738 	case PRCO_GETOPT:
1739 		switch (optname) {
1740 		case ICMP6_FILTER:
1741 		    {
1742 			struct icmp6_filter *p;
1743 
1744 			if (!in6p->inp_icmp6filt) {
1745 				error = EINVAL;
1746 				break;
1747 			}
1748 			m->m_len = sizeof(struct icmp6_filter);
1749 			p = mtod(m, struct icmp6_filter *);
1750 			bcopy(in6p->inp_icmp6filt, p,
1751 				sizeof(struct icmp6_filter));
1752 			error = 0;
1753 			break;
1754 		    }
1755 
1756 		default:
1757 			error = ENOPROTOOPT;
1758 			break;
1759 		}
1760 		break;
1761 	}
1762 
1763 	return (error);
1764 }
1765 
1766 /*
1767  * Perform rate limit check.
1768  * Returns 0 if it is okay to send the icmp6 packet.
1769  * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
1770  * limitation.
1771  *
1772  * XXX per-destination/type check necessary?
1773  *
1774  * dst - not used at this moment
1775  * type - not used at this moment
1776  * code - not used at this moment
1777  */
1778 int
1779 icmp6_ratelimit(const struct in6_addr *dst, const int type, const int code)
1780 {
1781 	/* PPS limit */
1782 	if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
1783 	    icmp6errppslim))
1784 		return 1;	/* The packet is subject to rate limit */
1785 	return 0;		/* okay to send */
1786 }
1787 
1788 struct rtentry *
1789 icmp6_mtudisc_clone(struct sockaddr_in6 *dst, u_int rtableid, int ipsec)
1790 {
1791 	struct rtentry *rt;
1792 	int    error;
1793 
1794 	rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid);
1795 
1796 	/* Check if the route is actually usable */
1797 	if (!rtisvalid(rt))
1798 		goto bad;
1799 	/* IPsec needs the route only for PMTU, it can use reject for that */
1800 	if (!ipsec && (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)))
1801 		goto bad;
1802 
1803 	/*
1804 	 * No PMTU for local routes and permanent neighbors,
1805 	 * ARP and NDP use the same expire timer as the route.
1806 	 */
1807 	if (ISSET(rt->rt_flags, RTF_LOCAL) ||
1808 	    (ISSET(rt->rt_flags, RTF_LLINFO) && rt->rt_expire == 0))
1809 		goto bad;
1810 
1811 	/* If we didn't get a host route, allocate one */
1812 	if ((rt->rt_flags & RTF_HOST) == 0) {
1813 		struct rtentry *nrt;
1814 		struct rt_addrinfo info;
1815 		struct sockaddr_rtlabel sa_rl;
1816 
1817 		memset(&info, 0, sizeof(info));
1818 		info.rti_ifa = rt->rt_ifa;
1819 		info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
1820 		info.rti_info[RTAX_DST] = sin6tosa(dst);
1821 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1822 		info.rti_info[RTAX_LABEL] =
1823 		    rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1824 
1825 		error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt,
1826 		    rtableid);
1827 		if (error)
1828 			goto bad;
1829 		nrt->rt_rmx = rt->rt_rmx;
1830 		rtfree(rt);
1831 		rt = nrt;
1832 		rtm_send(rt, RTM_ADD, 0, rtableid);
1833 	}
1834 	error = rt_timer_add(rt, icmp6_mtudisc_timeout, icmp6_mtudisc_timeout_q,
1835 	    rtableid);
1836 	if (error)
1837 		goto bad;
1838 
1839 	return (rt);
1840 bad:
1841 	rtfree(rt);
1842 	return (NULL);
1843 }
1844 
1845 void
1846 icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
1847 {
1848 	struct ifnet *ifp;
1849 
1850 	NET_ASSERT_LOCKED();
1851 
1852 	ifp = if_get(rt->rt_ifidx);
1853 	if (ifp == NULL)
1854 		return;
1855 
1856 	if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
1857 		rtdeletemsg(rt, ifp, r->rtt_tableid);
1858 	} else {
1859 		if (!(rt->rt_locks & RTV_MTU))
1860 			rt->rt_mtu = 0;
1861 	}
1862 
1863 	if_put(ifp);
1864 }
1865 
1866 void
1867 icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
1868 {
1869 	struct ifnet *ifp;
1870 
1871 	NET_ASSERT_LOCKED();
1872 
1873 	ifp = if_get(rt->rt_ifidx);
1874 	if (ifp == NULL)
1875 		return;
1876 
1877 	if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
1878 		rtdeletemsg(rt, ifp, r->rtt_tableid);
1879 	}
1880 
1881 	if_put(ifp);
1882 }
1883 
1884 const struct sysctl_bounded_args icmpv6ctl_vars[] = {
1885 	{ ICMPV6CTL_REDIRTIMEOUT, &icmp6_redirtimeout, 0, INT_MAX },
1886 	{ ICMPV6CTL_ND6_DELAY, &nd6_delay, 0, INT_MAX },
1887 	{ ICMPV6CTL_ND6_UMAXTRIES, &nd6_umaxtries, 0, INT_MAX },
1888 	{ ICMPV6CTL_ND6_MMAXTRIES, &nd6_mmaxtries, 0, INT_MAX },
1889 	{ ICMPV6CTL_ERRPPSLIMIT, &icmp6errppslim, -1, 1000 },
1890 	{ ICMPV6CTL_ND6_MAXNUDHINT, &nd6_maxnudhint, 0, INT_MAX },
1891 	{ ICMPV6CTL_MTUDISC_HIWAT, &icmp6_mtudisc_hiwat, -1, INT_MAX },
1892 	{ ICMPV6CTL_MTUDISC_LOWAT, &icmp6_mtudisc_lowat, -1, INT_MAX },
1893 	{ ICMPV6CTL_ND6_DEBUG, &nd6_debug, 0, 1 },
1894 };
1895 
1896 int
1897 icmp6_sysctl_icmp6stat(void *oldp, size_t *oldlenp, void *newp)
1898 {
1899 	struct icmp6stat *icmp6stat;
1900 	int ret;
1901 
1902 	CTASSERT(sizeof(*icmp6stat) == icp6s_ncounters * sizeof(uint64_t));
1903 	icmp6stat = malloc(sizeof(*icmp6stat), M_TEMP, M_WAITOK|M_ZERO);
1904 	counters_read(icmp6counters, (uint64_t *)icmp6stat, icp6s_ncounters);
1905 	ret = sysctl_rdstruct(oldp, oldlenp, newp,
1906 	    icmp6stat, sizeof(*icmp6stat));
1907 	free(icmp6stat, M_TEMP, sizeof(*icmp6stat));
1908 
1909 	return (ret);
1910 }
1911 
1912 int
1913 icmp6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1914     void *newp, size_t newlen)
1915 {
1916 	int error;
1917 
1918 	/* All sysctl names at this level are terminal. */
1919 	if (namelen != 1)
1920 		return ENOTDIR;
1921 
1922 	switch (name[0]) {
1923 
1924 	case ICMPV6CTL_STATS:
1925 		return icmp6_sysctl_icmp6stat(oldp, oldlenp, newp);
1926 	default:
1927 		NET_LOCK();
1928 		error = sysctl_bounded_arr(icmpv6ctl_vars,
1929 		    nitems(icmpv6ctl_vars), name, namelen, oldp, oldlenp, newp,
1930 		    newlen);
1931 		NET_UNLOCK();
1932 		return (error);
1933 	}
1934 	/* NOTREACHED */
1935 }
1936