xref: /openbsd-src/sys/netinet6/icmp6.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: icmp6.c,v 1.146 2014/07/11 12:20:26 benno Exp $	*/
2 /*	$KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
62  */
63 
64 #include "carp.h"
65 #include "pf.h"
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/sysctl.h>
72 #include <sys/protosw.h>
73 #include <sys/socket.h>
74 #include <sys/socketvar.h>
75 #include <sys/time.h>
76 #include <sys/kernel.h>
77 #include <sys/syslog.h>
78 #include <sys/domain.h>
79 
80 #include <net/if.h>
81 #include <net/route.h>
82 #include <net/if_dl.h>
83 #include <net/if_types.h>
84 
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/ip.h>
88 #include <netinet6/in6_var.h>
89 #include <netinet/ip6.h>
90 #include <netinet6/ip6_var.h>
91 #include <netinet/icmp6.h>
92 #include <netinet6/mld6_var.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet6/nd6.h>
95 #include <netinet6/in6_ifattach.h>
96 #include <netinet6/ip6protosw.h>
97 
98 #if NCARP > 0
99 #include <netinet/ip_carp.h>
100 #endif
101 
102 #if NPF > 0
103 #include <net/pfvar.h>
104 #endif
105 
106 struct icmp6stat icmp6stat;
107 
108 extern struct inpcbtable rawin6pcbtable;
109 extern int icmp6errppslim;
110 static int icmp6errpps_count = 0;
111 static struct timeval icmp6errppslim_last;
112 
113 /*
114  * List of callbacks to notify when Path MTU changes are made.
115  */
116 struct icmp6_mtudisc_callback {
117 	LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
118 	void (*mc_func)(struct sockaddr_in6 *, u_int);
119 };
120 
121 LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
122     LIST_HEAD_INITIALIZER(icmp6_mtudisc_callbacks);
123 
124 struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
125 
126 /* XXX do these values make any sense? */
127 static int icmp6_mtudisc_hiwat = 1280;
128 static int icmp6_mtudisc_lowat = 256;
129 
130 /*
131  * keep track of # of redirect routes.
132  */
133 static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;
134 
135 /* XXX experimental, turned off */
136 static int icmp6_redirect_lowat = -1;
137 
138 void	icmp6_errcount(struct icmp6errstat *, int, int);
139 int	icmp6_rip6_input(struct mbuf **, int);
140 int	icmp6_ratelimit(const struct in6_addr *, const int, const int);
141 const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *,
142 	    struct in6_addr *);
143 int	icmp6_notify_error(struct mbuf *, int, int, int);
144 struct rtentry *icmp6_mtudisc_clone(struct sockaddr *, u_int);
145 void	icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *);
146 void	icmp6_redirect_timeout(struct rtentry *, struct rttimer *);
147 
148 void
149 icmp6_init(void)
150 {
151 	mld6_init();
152 	icmp6_mtudisc_timeout_q = rt_timer_queue_create(ip6_mtudisc_timeout);
153 	icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
154 }
155 
156 void
157 icmp6_errcount(struct icmp6errstat *stat, int type, int code)
158 {
159 	switch (type) {
160 	case ICMP6_DST_UNREACH:
161 		switch (code) {
162 		case ICMP6_DST_UNREACH_NOROUTE:
163 			stat->icp6errs_dst_unreach_noroute++;
164 			return;
165 		case ICMP6_DST_UNREACH_ADMIN:
166 			stat->icp6errs_dst_unreach_admin++;
167 			return;
168 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
169 			stat->icp6errs_dst_unreach_beyondscope++;
170 			return;
171 		case ICMP6_DST_UNREACH_ADDR:
172 			stat->icp6errs_dst_unreach_addr++;
173 			return;
174 		case ICMP6_DST_UNREACH_NOPORT:
175 			stat->icp6errs_dst_unreach_noport++;
176 			return;
177 		}
178 		break;
179 	case ICMP6_PACKET_TOO_BIG:
180 		stat->icp6errs_packet_too_big++;
181 		return;
182 	case ICMP6_TIME_EXCEEDED:
183 		switch (code) {
184 		case ICMP6_TIME_EXCEED_TRANSIT:
185 			stat->icp6errs_time_exceed_transit++;
186 			return;
187 		case ICMP6_TIME_EXCEED_REASSEMBLY:
188 			stat->icp6errs_time_exceed_reassembly++;
189 			return;
190 		}
191 		break;
192 	case ICMP6_PARAM_PROB:
193 		switch (code) {
194 		case ICMP6_PARAMPROB_HEADER:
195 			stat->icp6errs_paramprob_header++;
196 			return;
197 		case ICMP6_PARAMPROB_NEXTHEADER:
198 			stat->icp6errs_paramprob_nextheader++;
199 			return;
200 		case ICMP6_PARAMPROB_OPTION:
201 			stat->icp6errs_paramprob_option++;
202 			return;
203 		}
204 		break;
205 	case ND_REDIRECT:
206 		stat->icp6errs_redirect++;
207 		return;
208 	}
209 	stat->icp6errs_unknown++;
210 }
211 
212 /*
213  * Register a Path MTU Discovery callback.
214  */
215 void
216 icmp6_mtudisc_callback_register(void (*func)(struct sockaddr_in6 *, u_int))
217 {
218 	struct icmp6_mtudisc_callback *mc;
219 
220 	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
221 	     mc = LIST_NEXT(mc, mc_list)) {
222 		if (mc->mc_func == func)
223 			return;
224 	}
225 
226 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
227 	if (mc == NULL)
228 		panic("icmp6_mtudisc_callback_register");
229 
230 	mc->mc_func = func;
231 	LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list);
232 }
233 
234 /*
235  * Generate an error packet of type error in response to bad IP6 packet.
236  */
237 void
238 icmp6_error(struct mbuf *m, int type, int code, int param)
239 {
240 	struct ip6_hdr *oip6, *nip6;
241 	struct icmp6_hdr *icmp6;
242 	u_int preplen;
243 	int off;
244 	int nxt;
245 
246 	icmp6stat.icp6s_error++;
247 
248 	/* count per-type-code statistics */
249 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code);
250 
251 	if (m->m_len < sizeof(struct ip6_hdr)) {
252 		m = m_pullup(m, sizeof(struct ip6_hdr));
253 		if (m == NULL)
254 			return;
255 	}
256 	oip6 = mtod(m, struct ip6_hdr *);
257 
258 	/*
259 	 * If the destination address of the erroneous packet is a multicast
260 	 * address, or the packet was sent using link-layer multicast,
261 	 * we should basically suppress sending an error (RFC 2463, Section
262 	 * 2.4).
263 	 * We have two exceptions (the item e.2 in that section):
264 	 * - the Packet Too Big message can be sent for path MTU discovery.
265 	 * - the Parameter Problem Message that can be allowed an icmp6 error
266 	 *   in the option type field.  This check has been done in
267 	 *   ip6_unknown_opt(), so we can just check the type and code.
268 	 */
269 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
270 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
271 	    (type != ICMP6_PACKET_TOO_BIG &&
272 	     (type != ICMP6_PARAM_PROB ||
273 	      code != ICMP6_PARAMPROB_OPTION)))
274 		goto freeit;
275 
276 	/*
277 	 * RFC 2463, 2.4 (e.5): source address check.
278 	 * XXX: the case of anycast source?
279 	 */
280 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
281 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
282 		goto freeit;
283 
284 	/*
285 	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
286 	 * don't do it.
287 	 */
288 	nxt = -1;
289 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
290 	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
291 		struct icmp6_hdr *icp;
292 
293 		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
294 			sizeof(*icp));
295 		if (icp == NULL) {
296 			icmp6stat.icp6s_tooshort++;
297 			return;
298 		}
299 		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
300 		    icp->icmp6_type == ND_REDIRECT) {
301 			/*
302 			 * ICMPv6 error
303 			 * Special case: for redirect (which is
304 			 * informational) we must not send icmp6 error.
305 			 */
306 			icmp6stat.icp6s_canterror++;
307 			goto freeit;
308 		} else {
309 			/* ICMPv6 informational - send the error */
310 		}
311 	}
312 	else {
313 		/* non-ICMPv6 - send the error */
314 	}
315 
316 	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
317 
318 	/* Finally, do rate limitation check. */
319 	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
320 		icmp6stat.icp6s_toofreq++;
321 		goto freeit;
322 	}
323 
324 	/*
325 	 * OK, ICMP6 can be generated.
326 	 */
327 
328 	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
329 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
330 
331 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
332 	M_PREPEND(m, preplen, M_DONTWAIT);
333 	if (m && m->m_len < preplen)
334 		m = m_pullup(m, preplen);
335 	if (m == NULL) {
336 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
337 		return;
338 	}
339 
340 	nip6 = mtod(m, struct ip6_hdr *);
341 	nip6->ip6_src  = oip6->ip6_src;
342 	nip6->ip6_dst  = oip6->ip6_dst;
343 
344 	if (IN6_IS_SCOPE_EMBED(&oip6->ip6_src))
345 		oip6->ip6_src.s6_addr16[1] = 0;
346 	if (IN6_IS_SCOPE_EMBED(&oip6->ip6_dst))
347 		oip6->ip6_dst.s6_addr16[1] = 0;
348 
349 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
350 	icmp6->icmp6_type = type;
351 	icmp6->icmp6_code = code;
352 	icmp6->icmp6_pptr = htonl((u_int32_t)param);
353 
354 	/*
355 	 * icmp6_reflect() is designed to be in the input path.
356 	 * icmp6_error() can be called from both input and outut path,
357 	 * and if we are in output path rcvif could contain bogus value.
358 	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
359 	 * information in ip header (nip6).
360 	 */
361 	m->m_pkthdr.rcvif = NULL;
362 
363 	icmp6stat.icp6s_outhist[type]++;
364 	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
365 
366 	return;
367 
368   freeit:
369 	/*
370 	 * If we can't tell wheter or not we can generate ICMP6, free it.
371 	 */
372 	m_freem(m);
373 }
374 
375 /*
376  * Process a received ICMP6 message.
377  */
378 int
379 icmp6_input(struct mbuf **mp, int *offp, int proto)
380 {
381 	struct ifnet *ifp;
382 	struct mbuf *m = *mp, *n;
383 	struct ip6_hdr *ip6, *nip6;
384 	struct icmp6_hdr *icmp6, *nicmp6;
385 	int off = *offp;
386 	int icmp6len = m->m_pkthdr.len - *offp;
387 	int code, sum, noff;
388 	char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
389 
390 	ifp = m->m_pkthdr.rcvif;
391 
392 	icmp6_ifstat_inc(ifp, ifs6_in_msg);
393 
394 	/*
395 	 * Locate icmp6 structure in mbuf, and check
396 	 * that not corrupted and of at least minimum length
397 	 */
398 
399 	ip6 = mtod(m, struct ip6_hdr *);
400 	if (icmp6len < sizeof(struct icmp6_hdr)) {
401 		icmp6stat.icp6s_tooshort++;
402 		icmp6_ifstat_inc(ifp, ifs6_in_error);
403 		goto freeit;
404 	}
405 
406 	/*
407 	 * calculate the checksum
408 	 */
409 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
410 	if (icmp6 == NULL) {
411 		icmp6stat.icp6s_tooshort++;
412 		return IPPROTO_DONE;
413 	}
414 	code = icmp6->icmp6_code;
415 
416 	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
417 		nd6log((LOG_ERR,
418 		    "ICMP6 checksum error(%d|%x) %s\n",
419 		    icmp6->icmp6_type, sum,
420 		    inet_ntop(AF_INET6, &ip6->ip6_src, src, sizeof(src))));
421 		icmp6stat.icp6s_checksum++;
422 		icmp6_ifstat_inc(ifp, ifs6_in_error);
423 		goto freeit;
424 	}
425 
426 #if NPF > 0
427 	if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
428 		switch (icmp6->icmp6_type) {
429 		/*
430 		 * These ICMP6 types map to other connections.  They must be
431 		 * delivered to pr_ctlinput() also for diverted connections.
432 		 */
433 		case ICMP6_DST_UNREACH:
434 		case ICMP6_PACKET_TOO_BIG:
435 		case ICMP6_TIME_EXCEEDED:
436 		case ICMP6_PARAM_PROB:
437 			break;
438 		default:
439 			goto raw;
440 		}
441 	}
442 #endif /* NPF */
443 
444 #if NCARP > 0
445 	if (ifp->if_type == IFT_CARP &&
446 	    icmp6->icmp6_type == ICMP6_ECHO_REQUEST &&
447 	    carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
448 	    ip6->ip6_dst.s6_addr32))
449 		goto freeit;
450 #endif
451 	icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
452 
453 	switch (icmp6->icmp6_type) {
454 	case ICMP6_DST_UNREACH:
455 		icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
456 		switch (code) {
457 		case ICMP6_DST_UNREACH_NOROUTE:
458 			code = PRC_UNREACH_NET;
459 			break;
460 		case ICMP6_DST_UNREACH_ADMIN:
461 			icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
462 			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
463 			break;
464 		case ICMP6_DST_UNREACH_ADDR:
465 			code = PRC_HOSTDEAD;
466 			break;
467 #ifdef COMPAT_RFC1885
468 		case ICMP6_DST_UNREACH_NOTNEIGHBOR:
469 			code = PRC_UNREACH_SRCFAIL;
470 			break;
471 #else
472 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
473 			/* I mean "source address was incorrect." */
474 			code = PRC_PARAMPROB;
475 			break;
476 #endif
477 		case ICMP6_DST_UNREACH_NOPORT:
478 			code = PRC_UNREACH_PORT;
479 			break;
480 		default:
481 			goto badcode;
482 		}
483 		goto deliver;
484 
485 	case ICMP6_PACKET_TOO_BIG:
486 		icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig);
487 
488 		/* MTU is checked in icmp6_mtudisc_update. */
489 		code = PRC_MSGSIZE;
490 
491 		/*
492 		 * Updating the path MTU will be done after examining
493 		 * intermediate extension headers.
494 		 */
495 		goto deliver;
496 
497 	case ICMP6_TIME_EXCEEDED:
498 		icmp6_ifstat_inc(ifp, ifs6_in_timeexceed);
499 		switch (code) {
500 		case ICMP6_TIME_EXCEED_TRANSIT:
501 			code = PRC_TIMXCEED_INTRANS;
502 			break;
503 		case ICMP6_TIME_EXCEED_REASSEMBLY:
504 			code = PRC_TIMXCEED_REASS;
505 			break;
506 		default:
507 			goto badcode;
508 		}
509 		goto deliver;
510 
511 	case ICMP6_PARAM_PROB:
512 		icmp6_ifstat_inc(ifp, ifs6_in_paramprob);
513 		switch (code) {
514 		case ICMP6_PARAMPROB_NEXTHEADER:
515 			code = PRC_UNREACH_PROTOCOL;
516 			break;
517 		case ICMP6_PARAMPROB_HEADER:
518 		case ICMP6_PARAMPROB_OPTION:
519 			code = PRC_PARAMPROB;
520 			break;
521 		default:
522 			goto badcode;
523 		}
524 		goto deliver;
525 
526 	case ICMP6_ECHO_REQUEST:
527 		icmp6_ifstat_inc(ifp, ifs6_in_echo);
528 		if (code != 0)
529 			goto badcode;
530 		/*
531 		 * Copy mbuf to send to two data paths: userland socket(s),
532 		 * and to the querier (echo reply).
533 		 * m: a copy for socket, n: a copy for querier
534 		 */
535 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
536 			/* Give up local */
537 			n = m;
538 			m = NULL;
539 			goto deliverecho;
540 		}
541 		/*
542 		 * If the first mbuf is shared, or the first mbuf is too short,
543 		 * copy the first part of the data into a fresh mbuf.
544 		 * Otherwise, we will wrongly overwrite both copies.
545 		 */
546 		if ((n->m_flags & M_EXT) != 0 ||
547 		    n->m_len < off + sizeof(struct icmp6_hdr)) {
548 			struct mbuf *n0 = n;
549 			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
550 
551 			/*
552 			 * Prepare an internal mbuf.  m_pullup() doesn't
553 			 * always copy the length we specified.
554 			 */
555 			if (maxlen >= MCLBYTES) {
556 				/* Give up remote */
557 				m_freem(n0);
558 				break;
559 			}
560 			MGETHDR(n, M_DONTWAIT, n0->m_type);
561 			if (n && maxlen >= MHLEN) {
562 				MCLGET(n, M_DONTWAIT);
563 				if ((n->m_flags & M_EXT) == 0) {
564 					m_free(n);
565 					n = NULL;
566 				}
567 			}
568 			if (n == NULL) {
569 				/* Give up local */
570 				m_freem(n0);
571 				n = m;
572 				m = NULL;
573 				goto deliverecho;
574 			}
575 			M_MOVE_PKTHDR(n, n0);
576 			/*
577 			 * Copy IPv6 and ICMPv6 only.
578 			 */
579 			nip6 = mtod(n, struct ip6_hdr *);
580 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
581 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
582 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
583 			noff = sizeof(struct ip6_hdr);
584 			n->m_len = noff + sizeof(struct icmp6_hdr);
585 			/*
586 			 * Adjust mbuf.  ip6_plen will be adjusted in
587 			 * ip6_output().
588 			 * n->m_pkthdr.len == n0->m_pkthdr.len at this point.
589 			 */
590 			n->m_pkthdr.len += noff + sizeof(struct icmp6_hdr);
591 			n->m_pkthdr.len -= (off + sizeof(struct icmp6_hdr));
592 			m_adj(n0, off + sizeof(struct icmp6_hdr));
593 			n->m_next = n0;
594 		} else {
595 	 deliverecho:
596 			nip6 = mtod(n, struct ip6_hdr *);
597 			IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
598 			    sizeof(*nicmp6));
599 			noff = off;
600 		}
601 		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
602 		nicmp6->icmp6_code = 0;
603 		if (n) {
604 			icmp6stat.icp6s_reflect++;
605 			icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
606 			icmp6_reflect(n, noff);
607 		}
608 		if (!m)
609 			goto freeit;
610 		break;
611 
612 	case ICMP6_ECHO_REPLY:
613 		icmp6_ifstat_inc(ifp, ifs6_in_echoreply);
614 		if (code != 0)
615 			goto badcode;
616 		break;
617 
618 	case MLD_LISTENER_QUERY:
619 	case MLD_LISTENER_REPORT:
620 		if (icmp6len < sizeof(struct mld_hdr))
621 			goto badlen;
622 		if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
623 			icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
624 		else
625 			icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
626 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
627 			/* give up local */
628 			mld6_input(m, off);
629 			m = NULL;
630 			goto freeit;
631 		}
632 		mld6_input(n, off);
633 		/* m stays. */
634 		break;
635 
636 	case MLD_LISTENER_DONE:
637 		icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
638 		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
639 			goto badlen;
640 		break;		/* nothing to be done in kernel */
641 
642 	case MLD_MTRACE_RESP:
643 	case MLD_MTRACE:
644 		/* XXX: these two are experimental.  not officially defined. */
645 		/* XXX: per-interface statistics? */
646 		break;		/* just pass it to applications */
647 
648 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
649 		/* IPv6 Node Information Queries are not supported */
650 		break;
651 	case ICMP6_WRUREPLY:
652 		break;
653 
654 	case ND_ROUTER_SOLICIT:
655 		icmp6_ifstat_inc(ifp, ifs6_in_routersolicit);
656 		if (code != 0)
657 			goto badcode;
658 		if (icmp6len < sizeof(struct nd_router_solicit))
659 			goto badlen;
660 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
661 			/* give up local */
662 			nd6_rs_input(m, off, icmp6len);
663 			m = NULL;
664 			goto freeit;
665 		}
666 		nd6_rs_input(n, off, icmp6len);
667 		/* m stays. */
668 		break;
669 
670 	case ND_ROUTER_ADVERT:
671 		icmp6_ifstat_inc(ifp, ifs6_in_routeradvert);
672 		if (code != 0)
673 			goto badcode;
674 		if (icmp6len < sizeof(struct nd_router_advert))
675 			goto badlen;
676 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
677 			/* give up local */
678 			nd6_ra_input(m, off, icmp6len);
679 			m = NULL;
680 			goto freeit;
681 		}
682 		nd6_ra_input(n, off, icmp6len);
683 		/* m stays. */
684 		break;
685 
686 	case ND_NEIGHBOR_SOLICIT:
687 		icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit);
688 		if (code != 0)
689 			goto badcode;
690 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
691 			goto badlen;
692 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
693 			/* give up local */
694 			nd6_ns_input(m, off, icmp6len);
695 			m = NULL;
696 			goto freeit;
697 		}
698 		nd6_ns_input(n, off, icmp6len);
699 		/* m stays. */
700 		break;
701 
702 	case ND_NEIGHBOR_ADVERT:
703 		icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert);
704 		if (code != 0)
705 			goto badcode;
706 		if (icmp6len < sizeof(struct nd_neighbor_advert))
707 			goto badlen;
708 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
709 			/* give up local */
710 			nd6_na_input(m, off, icmp6len);
711 			m = NULL;
712 			goto freeit;
713 		}
714 		nd6_na_input(n, off, icmp6len);
715 		/* m stays. */
716 		break;
717 
718 	case ND_REDIRECT:
719 		icmp6_ifstat_inc(ifp, ifs6_in_redirect);
720 		if (code != 0)
721 			goto badcode;
722 		if (icmp6len < sizeof(struct nd_redirect))
723 			goto badlen;
724 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
725 			/* give up local */
726 			icmp6_redirect_input(m, off);
727 			m = NULL;
728 			goto freeit;
729 		}
730 		icmp6_redirect_input(n, off);
731 		/* m stays. */
732 		break;
733 
734 	case ICMP6_ROUTER_RENUMBERING:
735 		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
736 		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
737 			goto badcode;
738 		if (icmp6len < sizeof(struct icmp6_router_renum))
739 			goto badlen;
740 		break;
741 
742 	default:
743 		nd6log((LOG_DEBUG,
744 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
745 		    icmp6->icmp6_type,
746 		    inet_ntop(AF_INET6, &ip6->ip6_src, src, sizeof(src)),
747 		    inet_ntop(AF_INET6, &ip6->ip6_dst, dst, sizeof(dst)),
748 		    ifp ? ifp->if_index : 0));
749 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
750 			/* ICMPv6 error: MUST deliver it by spec... */
751 			code = PRC_NCMDS;
752 			/* deliver */
753 		} else {
754 			/* ICMPv6 informational: MUST not deliver */
755 			break;
756 		}
757 deliver:
758 		if (icmp6_notify_error(m, off, icmp6len, code)) {
759 			/* In this case, m should've been freed. */
760 			return (IPPROTO_DONE);
761 		}
762 		break;
763 
764 badcode:
765 		icmp6stat.icp6s_badcode++;
766 		break;
767 
768 badlen:
769 		icmp6stat.icp6s_badlen++;
770 		break;
771 	}
772 
773 #if NPF > 0
774 raw:
775 #endif
776 	/* deliver the packet to appropriate sockets */
777 	icmp6_rip6_input(&m, *offp);
778 
779 	return IPPROTO_DONE;
780 
781  freeit:
782 	m_freem(m);
783 	return IPPROTO_DONE;
784 }
785 
786 int
787 icmp6_notify_error(struct mbuf *m, int off, int icmp6len, int code)
788 {
789 	struct icmp6_hdr *icmp6;
790 	struct ip6_hdr *eip6;
791 	u_int32_t notifymtu;
792 	struct sockaddr_in6 icmp6src, icmp6dst;
793 
794 	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
795 		icmp6stat.icp6s_tooshort++;
796 		goto freeit;
797 	}
798 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
799 		       sizeof(*icmp6) + sizeof(struct ip6_hdr));
800 	if (icmp6 == NULL) {
801 		icmp6stat.icp6s_tooshort++;
802 		return (-1);
803 	}
804 	eip6 = (struct ip6_hdr *)(icmp6 + 1);
805 
806 	/* Detect the upper level protocol */
807 	{
808 		void (*ctlfunc)(int, struct sockaddr *, u_int, void *);
809 		u_int8_t nxt = eip6->ip6_nxt;
810 		int eoff = off + sizeof(struct icmp6_hdr) +
811 			sizeof(struct ip6_hdr);
812 		struct ip6ctlparam ip6cp;
813 		struct in6_addr *finaldst = NULL;
814 		int icmp6type = icmp6->icmp6_type;
815 		struct ip6_frag *fh;
816 		struct ip6_rthdr *rth;
817 		struct ip6_rthdr0 *rth0;
818 		int rthlen;
819 
820 		while (1) { /* XXX: should avoid infinite loop explicitly? */
821 			struct ip6_ext *eh;
822 
823 			switch (nxt) {
824 			case IPPROTO_HOPOPTS:
825 			case IPPROTO_DSTOPTS:
826 			case IPPROTO_AH:
827 				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
828 					       eoff, sizeof(*eh));
829 				if (eh == NULL) {
830 					icmp6stat.icp6s_tooshort++;
831 					return (-1);
832 				}
833 
834 				if (nxt == IPPROTO_AH)
835 					eoff += (eh->ip6e_len + 2) << 2;
836 				else
837 					eoff += (eh->ip6e_len + 1) << 3;
838 				nxt = eh->ip6e_nxt;
839 				break;
840 			case IPPROTO_ROUTING:
841 				/*
842 				 * When the erroneous packet contains a
843 				 * routing header, we should examine the
844 				 * header to determine the final destination.
845 				 * Otherwise, we can't properly update
846 				 * information that depends on the final
847 				 * destination (e.g. path MTU).
848 				 */
849 				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
850 					       eoff, sizeof(*rth));
851 				if (rth == NULL) {
852 					icmp6stat.icp6s_tooshort++;
853 					return (-1);
854 				}
855 				rthlen = (rth->ip6r_len + 1) << 3;
856 				/*
857 				 * XXX: currently there is no
858 				 * officially defined type other
859 				 * than type-0.
860 				 * Note that if the segment left field
861 				 * is 0, all intermediate hops must
862 				 * have been passed.
863 				 */
864 				if (rth->ip6r_segleft &&
865 				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
866 					int hops;
867 
868 					IP6_EXTHDR_GET(rth0,
869 						       struct ip6_rthdr0 *, m,
870 						       eoff, rthlen);
871 					if (rth0 == NULL) {
872 						icmp6stat.icp6s_tooshort++;
873 						return (-1);
874 					}
875 					/* just ignore a bogus header */
876 					if ((rth0->ip6r0_len % 2) == 0 &&
877 					    (hops = rth0->ip6r0_len/2))
878 						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
879 				}
880 				eoff += rthlen;
881 				nxt = rth->ip6r_nxt;
882 				break;
883 			case IPPROTO_FRAGMENT:
884 				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
885 					       eoff, sizeof(*fh));
886 				if (fh == NULL) {
887 					icmp6stat.icp6s_tooshort++;
888 					return (-1);
889 				}
890 				/*
891 				 * Data after a fragment header is meaningless
892 				 * unless it is the first fragment, but
893 				 * we'll go to the notify label for path MTU
894 				 * discovery.
895 				 */
896 				if (fh->ip6f_offlg & IP6F_OFF_MASK)
897 					goto notify;
898 
899 				eoff += sizeof(struct ip6_frag);
900 				nxt = fh->ip6f_nxt;
901 				break;
902 			default:
903 				/*
904 				 * This case includes ESP and the No Next
905 				 * Header.  In such cases going to the notify
906 				 * label does not have any meaning
907 				 * (i.e. ctlfunc will be NULL), but we go
908 				 * anyway since we might have to update
909 				 * path MTU information.
910 				 */
911 				goto notify;
912 			}
913 		}
914 	  notify:
915 		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
916 			       sizeof(*icmp6) + sizeof(struct ip6_hdr));
917 		if (icmp6 == NULL) {
918 			icmp6stat.icp6s_tooshort++;
919 			return (-1);
920 		}
921 
922 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
923 		bzero(&icmp6dst, sizeof(icmp6dst));
924 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
925 		icmp6dst.sin6_family = AF_INET6;
926 		if (finaldst == NULL)
927 			icmp6dst.sin6_addr = eip6->ip6_dst;
928 		else
929 			icmp6dst.sin6_addr = *finaldst;
930 		icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
931 							  &icmp6dst.sin6_addr);
932 		if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst,
933 				   NULL, NULL)) {
934 			/* should be impossbile */
935 			nd6log((LOG_DEBUG,
936 			    "icmp6_notify_error: in6_embedscope failed\n"));
937 			goto freeit;
938 		}
939 
940 		/*
941 		 * retrieve parameters from the inner IPv6 header, and convert
942 		 * them into sockaddr structures.
943 		 */
944 		bzero(&icmp6src, sizeof(icmp6src));
945 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
946 		icmp6src.sin6_family = AF_INET6;
947 		icmp6src.sin6_addr = eip6->ip6_src;
948 		icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
949 							  &icmp6src.sin6_addr);
950 		if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src,
951 				   NULL, NULL)) {
952 			/* should be impossbile */
953 			nd6log((LOG_DEBUG,
954 			    "icmp6_notify_error: in6_embedscope failed\n"));
955 			goto freeit;
956 		}
957 		icmp6src.sin6_flowinfo =
958 		    (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
959 
960 		if (finaldst == NULL)
961 			finaldst = &eip6->ip6_dst;
962 		ip6cp.ip6c_m = m;
963 		ip6cp.ip6c_icmp6 = icmp6;
964 		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
965 		ip6cp.ip6c_off = eoff;
966 		ip6cp.ip6c_finaldst = finaldst;
967 		ip6cp.ip6c_src = &icmp6src;
968 		ip6cp.ip6c_nxt = nxt;
969 #if NPF > 0
970 		pf_pkt_addr_changed(m);
971 #endif
972 
973 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
974 			notifymtu = ntohl(icmp6->icmp6_mtu);
975 			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
976 		}
977 
978 		ctlfunc = inet6sw[ip6_protox[nxt]].pr_ctlinput;
979 		if (ctlfunc)
980 			(*ctlfunc)(code, sin6tosa(&icmp6dst),
981 			    m->m_pkthdr.ph_rtableid, &ip6cp);
982 	}
983 	return (0);
984 
985   freeit:
986 	m_freem(m);
987 	return (-1);
988 }
989 
990 void
991 icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
992 {
993 	unsigned long rtcount;
994 	struct icmp6_mtudisc_callback *mc;
995 	struct in6_addr *dst = ip6cp->ip6c_finaldst;
996 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
997 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
998 	u_int mtu = ntohl(icmp6->icmp6_mtu);
999 	struct rtentry *rt = NULL;
1000 	struct sockaddr_in6 sin6;
1001 
1002 	/*
1003 	 * The MTU may not be less then the minimal IPv6 MTU except for the
1004 	 * hack in ip6_output/ip6_setpmtu where we always include a frag header.
1005 	 */
1006 	if (mtu < IPV6_MMTU - sizeof(struct ip6_frag))
1007 		return;
1008 
1009 	/*
1010 	 * allow non-validated cases if memory is plenty, to make traffic
1011 	 * from non-connected pcb happy.
1012 	 */
1013 	rtcount = rt_timer_queue_count(icmp6_mtudisc_timeout_q);
1014 	if (validated) {
1015 		if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat)
1016 			return;
1017 		else if (0 <= icmp6_mtudisc_lowat &&
1018 		    rtcount > icmp6_mtudisc_lowat) {
1019 			/*
1020 			 * XXX nuke a victim, install the new one.
1021 			 */
1022 		}
1023 	} else {
1024 		if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat)
1025 			return;
1026 	}
1027 
1028 	bzero(&sin6, sizeof(sin6));
1029 	sin6.sin6_family = PF_INET6;
1030 	sin6.sin6_len = sizeof(struct sockaddr_in6);
1031 	sin6.sin6_addr = *dst;
1032 	/* XXX normally, this won't happen */
1033 	if (IN6_IS_ADDR_LINKLOCAL(dst)) {
1034 		sin6.sin6_addr.s6_addr16[1] =
1035 		    htons(m->m_pkthdr.rcvif->if_index);
1036 	}
1037 	sin6.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
1038 	    &sin6.sin6_addr);
1039 
1040 	rt = icmp6_mtudisc_clone(sin6tosa(&sin6), m->m_pkthdr.ph_rtableid);
1041 
1042 	if (rt && (rt->rt_flags & RTF_HOST) &&
1043 	    !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
1044 	    (rt->rt_rmx.rmx_mtu > mtu || rt->rt_rmx.rmx_mtu == 0)) {
1045 		if (mtu < IN6_LINKMTU(rt->rt_ifp)) {
1046 			icmp6stat.icp6s_pmtuchg++;
1047 			rt->rt_rmx.rmx_mtu = mtu;
1048 		}
1049 	}
1050 	if (rt) { /* XXX: need braces to avoid conflict with else in RTFREE. */
1051 		RTFREE(rt);
1052 	}
1053 
1054 	/*
1055 	 * Notify protocols that the MTU for this destination
1056 	 * has changed.
1057 	 */
1058 	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
1059 	     mc = LIST_NEXT(mc, mc_list))
1060 		(*mc->mc_func)(&sin6, m->m_pkthdr.ph_rtableid);
1061 }
1062 
1063 /*
1064  * XXX almost dup'ed code with rip6_input.
1065  */
1066 int
1067 icmp6_rip6_input(struct mbuf **mp, int off)
1068 {
1069 	struct mbuf *m = *mp;
1070 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1071 	struct inpcb *in6p;
1072 	struct inpcb *last = NULL;
1073 	struct sockaddr_in6 rip6src;
1074 	struct icmp6_hdr *icmp6;
1075 	struct mbuf *opts = NULL;
1076 
1077 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
1078 	if (icmp6 == NULL) {
1079 		/* m is already reclaimed */
1080 		return IPPROTO_DONE;
1081 	}
1082 
1083 	bzero(&rip6src, sizeof(rip6src));
1084 	rip6src.sin6_len = sizeof(struct sockaddr_in6);
1085 	rip6src.sin6_family = AF_INET6;
1086 	/* KAME hack: recover scopeid */
1087 	(void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif);
1088 
1089 	TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) {
1090 		if (!(in6p->inp_flags & INP_IPV6))
1091 			continue;
1092 		if (in6p->inp_ipv6.ip6_nxt != IPPROTO_ICMPV6)
1093 			continue;
1094 #if NPF > 0
1095 		if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
1096 			struct pf_divert *divert;
1097 
1098 			/* XXX rdomain support */
1099 			if ((divert = pf_find_divert(m)) == NULL)
1100 				continue;
1101 			if (!IN6_ARE_ADDR_EQUAL(&in6p->inp_laddr6,
1102 			    &divert->addr.v6))
1103 				continue;
1104 		} else
1105 #endif
1106 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->inp_laddr6) &&
1107 		   !IN6_ARE_ADDR_EQUAL(&in6p->inp_laddr6, &ip6->ip6_dst))
1108 			continue;
1109 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->inp_faddr6) &&
1110 		   !IN6_ARE_ADDR_EQUAL(&in6p->inp_faddr6, &ip6->ip6_src))
1111 			continue;
1112 		if (in6p->inp_icmp6filt
1113 		    && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
1114 				 in6p->inp_icmp6filt))
1115 			continue;
1116 		if (last) {
1117 			struct	mbuf *n;
1118 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
1119 				if (last->inp_flags & IN6P_CONTROLOPTS)
1120 					ip6_savecontrol(last, n, &opts);
1121 				/* strip intermediate headers */
1122 				m_adj(n, off);
1123 				if (sbappendaddr(&last->inp_socket->so_rcv,
1124 				    sin6tosa(&rip6src), n, opts) == 0) {
1125 					/* should notify about lost packet */
1126 					m_freem(n);
1127 					if (opts)
1128 						m_freem(opts);
1129 				} else
1130 					sorwakeup(last->inp_socket);
1131 				opts = NULL;
1132 			}
1133 		}
1134 		last = in6p;
1135 	}
1136 	if (last) {
1137 		if (last->inp_flags & IN6P_CONTROLOPTS)
1138 			ip6_savecontrol(last, m, &opts);
1139 		/* strip intermediate headers */
1140 		m_adj(m, off);
1141 		if (sbappendaddr(&last->inp_socket->so_rcv,
1142 		    sin6tosa(&rip6src), m, opts) == 0) {
1143 			m_freem(m);
1144 			if (opts)
1145 				m_freem(opts);
1146 		} else
1147 			sorwakeup(last->inp_socket);
1148 	} else {
1149 		m_freem(m);
1150 		ip6stat.ip6s_delivered--;
1151 	}
1152 	return IPPROTO_DONE;
1153 }
1154 
1155 /*
1156  * Reflect the ip6 packet back to the source.
1157  * OFF points to the icmp6 header, counted from the top of the mbuf.
1158  *
1159  * Note: RFC 1885 required that an echo reply should be truncated if it
1160  * did not fit in with (return) path MTU, and KAME code supported the
1161  * behavior.  However, as a clarification after the RFC, this limitation
1162  * was removed in a revised version of the spec, RFC 2463.  We had kept the
1163  * old behavior, with a (non-default) ifdef block, while the new version of
1164  * the spec was an internet-draft status, and even after the new RFC was
1165  * published.  But it would rather make sense to clean the obsoleted part
1166  * up, and to make the code simpler at this stage.
1167  */
1168 void
1169 icmp6_reflect(struct mbuf *m, size_t off)
1170 {
1171 	struct ip6_hdr *ip6;
1172 	struct icmp6_hdr *icmp6;
1173 	struct in6_ifaddr *ia6;
1174 	struct in6_addr t, *src = NULL;
1175 	int plen;
1176 	int type, code;
1177 	struct ifnet *outif = NULL;
1178 	struct sockaddr_in6 sa6_src, sa6_dst;
1179 
1180 	/* too short to reflect */
1181 	if (off < sizeof(struct ip6_hdr)) {
1182 		nd6log((LOG_DEBUG,
1183 		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
1184 		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
1185 		    __FILE__, __LINE__));
1186 		goto bad;
1187 	}
1188 
1189 	/*
1190 	 * If there are extra headers between IPv6 and ICMPv6, strip
1191 	 * off that header first.
1192 	 */
1193 #ifdef DIAGNOSTIC
1194 	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
1195 		panic("assumption failed in icmp6_reflect");
1196 #endif
1197 	if (off > sizeof(struct ip6_hdr)) {
1198 		size_t l;
1199 		struct ip6_hdr nip6;
1200 
1201 		l = off - sizeof(struct ip6_hdr);
1202 		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
1203 		m_adj(m, l);
1204 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
1205 		if (m->m_len < l) {
1206 			if ((m = m_pullup(m, l)) == NULL)
1207 				return;
1208 		}
1209 		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
1210 	} else /* off == sizeof(struct ip6_hdr) */ {
1211 		size_t l;
1212 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
1213 		if (m->m_len < l) {
1214 			if ((m = m_pullup(m, l)) == NULL)
1215 				return;
1216 		}
1217 	}
1218 	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
1219 	ip6 = mtod(m, struct ip6_hdr *);
1220 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1221 	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
1222 	type = icmp6->icmp6_type; /* keep type for statistics */
1223 	code = icmp6->icmp6_code; /* ditto. */
1224 
1225 	t = ip6->ip6_dst;
1226 	/*
1227 	 * ip6_input() drops a packet if its src is multicast.
1228 	 * So, the src is never multicast.
1229 	 */
1230 	ip6->ip6_dst = ip6->ip6_src;
1231 
1232 	/*
1233 	 * XXX: make sure to embed scope zone information, using
1234 	 * already embedded IDs or the received interface (if any).
1235 	 * Note that rcvif may be NULL.
1236 	 * TODO: scoped routing case (XXX).
1237 	 */
1238 	bzero(&sa6_src, sizeof(sa6_src));
1239 	sa6_src.sin6_family = AF_INET6;
1240 	sa6_src.sin6_len = sizeof(sa6_src);
1241 	sa6_src.sin6_addr = ip6->ip6_dst;
1242 	in6_recoverscope(&sa6_src, &ip6->ip6_dst, m->m_pkthdr.rcvif);
1243 	in6_embedscope(&ip6->ip6_dst, &sa6_src, NULL, NULL);
1244 	bzero(&sa6_dst, sizeof(sa6_dst));
1245 	sa6_dst.sin6_family = AF_INET6;
1246 	sa6_dst.sin6_len = sizeof(sa6_dst);
1247 	sa6_dst.sin6_addr = t;
1248 	in6_recoverscope(&sa6_dst, &t, m->m_pkthdr.rcvif);
1249 	in6_embedscope(&t, &sa6_dst, NULL, NULL);
1250 
1251 	/*
1252 	 * If the incoming packet was addressed directly to us (i.e. unicast),
1253 	 * use dst as the src for the reply.
1254 	 * The IN6_IFF_NOTREADY case would be VERY rare, but is possible
1255 	 * (for example) when we encounter an error while forwarding procedure
1256 	 * destined to a duplicated address of ours.
1257 	 */
1258 	TAILQ_FOREACH(ia6, &in6_ifaddr, ia_list)
1259 		if (IN6_ARE_ADDR_EQUAL(&t, &ia6->ia_addr.sin6_addr) &&
1260 		    (ia6->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
1261 			src = &t;
1262 			break;
1263 		}
1264 	if (ia6 == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) {
1265 		/*
1266 		 * This is the case if the dst is our link-local address
1267 		 * and the sender is also ourselves.
1268 		 */
1269 		src = &t;
1270 	}
1271 
1272 	if (src == NULL) {
1273 		int error;
1274 		struct route_in6 ro;
1275 		char addr[INET6_ADDRSTRLEN];
1276 
1277 		/*
1278 		 * This case matches to multicasts, our anycast, or unicasts
1279 		 * that we do not own.  Select a source address based on the
1280 		 * source address of the erroneous packet.
1281 		 */
1282 		bzero(&ro, sizeof(ro));
1283 		error = in6_selectsrc(&src, &sa6_src, NULL, NULL, &ro, NULL,
1284 		    m->m_pkthdr.ph_rtableid);
1285 		if (ro.ro_rt) { /* XXX: see comments in icmp6_mtudisc_update */
1286 			RTFREE(ro.ro_rt); /* XXX: we could use this */
1287 		}
1288 		if (error) {
1289 			nd6log((LOG_DEBUG,
1290 			    "icmp6_reflect: source can't be determined: "
1291 			    "dst=%s, error=%d\n",
1292 			    inet_ntop(AF_INET6, &sa6_src.sin6_addr,
1293 				addr, sizeof(addr)),
1294 			    error));
1295 			goto bad;
1296 		}
1297 	}
1298 
1299 	ip6->ip6_src = *src;
1300 
1301 	ip6->ip6_flow = 0;
1302 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1303 	ip6->ip6_vfc |= IPV6_VERSION;
1304 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1305 	if (m->m_pkthdr.rcvif) {
1306 		/* XXX: This may not be the outgoing interface */
1307 		ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
1308 	} else
1309 		ip6->ip6_hlim = ip6_defhlim;
1310 
1311 	icmp6->icmp6_cksum = 0;
1312 	m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
1313 
1314 	/*
1315 	 * XXX option handling
1316 	 */
1317 
1318 	m->m_flags &= ~(M_BCAST|M_MCAST);
1319 
1320 	/*
1321 	 * To avoid a "too big" situation at an intermediate router
1322 	 * and the path MTU discovery process, specify the IPV6_MINMTU flag.
1323 	 * Note that only echo and node information replies are affected,
1324 	 * since the length of ICMP6 errors is limited to the minimum MTU.
1325 	 */
1326 #if NPF > 0
1327 	pf_pkt_addr_changed(m);
1328 #endif
1329 	if (ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, &outif, NULL) != 0 &&
1330 	    outif)
1331 		icmp6_ifstat_inc(outif, ifs6_out_error);
1332 
1333 	if (outif)
1334 		icmp6_ifoutstat_inc(outif, type, code);
1335 
1336 	return;
1337 
1338  bad:
1339 	m_freem(m);
1340 	return;
1341 }
1342 
1343 void
1344 icmp6_fasttimo(void)
1345 {
1346 
1347 	mld6_fasttimeo();
1348 }
1349 
1350 const char *
1351 icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
1352     struct in6_addr *tgt6)
1353 {
1354 	static char buf[1024]; /* XXX */
1355 	char src[INET6_ADDRSTRLEN];
1356 	char dst[INET6_ADDRSTRLEN];
1357 	char tgt[INET6_ADDRSTRLEN];
1358 
1359 	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
1360 		 inet_ntop(AF_INET6, src6, src, sizeof(src)),
1361 		 inet_ntop(AF_INET6, dst6, dst, sizeof(dst)),
1362 		 inet_ntop(AF_INET6, tgt6, tgt, sizeof(tgt)));
1363 	return buf;
1364 }
1365 
1366 void
1367 icmp6_redirect_input(struct mbuf *m, int off)
1368 {
1369 	struct ifnet *ifp = m->m_pkthdr.rcvif;
1370 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1371 	struct nd_redirect *nd_rd;
1372 	int icmp6len = ntohs(ip6->ip6_plen);
1373 	char *lladdr = NULL;
1374 	int lladdrlen = 0;
1375 	struct rtentry *rt = NULL;
1376 	int is_router;
1377 	int is_onlink;
1378 	struct in6_addr src6 = ip6->ip6_src;
1379 	struct in6_addr redtgt6;
1380 	struct in6_addr reddst6;
1381 	union nd_opts ndopts;
1382 	char addr[INET6_ADDRSTRLEN];
1383 
1384 	if (!ifp)
1385 		return;
1386 
1387 	/* XXX if we are router, we don't update route by icmp6 redirect */
1388 	if (ip6_forwarding)
1389 		goto freeit;
1390 	if (!icmp6_rediraccept)
1391 		goto freeit;
1392 
1393 	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
1394 	if (nd_rd == NULL) {
1395 		icmp6stat.icp6s_tooshort++;
1396 		return;
1397 	}
1398 	redtgt6 = nd_rd->nd_rd_target;
1399 	reddst6 = nd_rd->nd_rd_dst;
1400 
1401 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
1402 		redtgt6.s6_addr16[1] = htons(ifp->if_index);
1403 	if (IN6_IS_ADDR_LINKLOCAL(&reddst6))
1404 		reddst6.s6_addr16[1] = htons(ifp->if_index);
1405 
1406 	/* validation */
1407 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
1408 		nd6log((LOG_ERR,
1409 			"ICMP6 redirect sent from %s rejected; "
1410 			"must be from linklocal\n",
1411 			inet_ntop(AF_INET6, &src6, addr, sizeof(addr))));
1412 		goto bad;
1413 	}
1414 	if (ip6->ip6_hlim != 255) {
1415 		nd6log((LOG_ERR,
1416 			"ICMP6 redirect sent from %s rejected; "
1417 			"hlim=%d (must be 255)\n",
1418 			inet_ntop(AF_INET6, &src6, addr, sizeof(addr)),
1419 			ip6->ip6_hlim));
1420 		goto bad;
1421 	}
1422 	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
1423 		nd6log((LOG_ERR,
1424 			"ICMP6 redirect rejected; "
1425 			"redirect dst must be unicast: %s\n",
1426 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1427 		goto bad;
1428 	}
1429     {
1430 	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
1431 	struct sockaddr_in6 sin6;
1432 	struct in6_addr *gw6;
1433 
1434 	bzero(&sin6, sizeof(sin6));
1435 	sin6.sin6_family = AF_INET6;
1436 	sin6.sin6_len = sizeof(struct sockaddr_in6);
1437 	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
1438 	rt = rtalloc1(sin6tosa(&sin6), 0, m->m_pkthdr.ph_rtableid);
1439 	if (rt) {
1440 		if (rt->rt_gateway == NULL ||
1441 		    rt->rt_gateway->sa_family != AF_INET6) {
1442 			nd6log((LOG_ERR,
1443 			    "ICMP6 redirect rejected; no route "
1444 			    "with inet6 gateway found for redirect dst: %s\n",
1445 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1446 			RTFREE(rt);
1447 			goto bad;
1448 		}
1449 
1450 		gw6 = &(satosin6(rt->rt_gateway)->sin6_addr);
1451 		if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
1452 			nd6log((LOG_ERR,
1453 				"ICMP6 redirect rejected; "
1454 				"not equal to gw-for-src=%s (must be same): "
1455 				"%s\n",
1456 				inet_ntop(AF_INET6, gw6, addr, sizeof(addr)),
1457 				icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1458 			RTFREE(rt);
1459 			goto bad;
1460 		}
1461 	} else {
1462 		nd6log((LOG_ERR,
1463 			"ICMP6 redirect rejected; "
1464 			"no route found for redirect dst: %s\n",
1465 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1466 		goto bad;
1467 	}
1468 	RTFREE(rt);
1469 	rt = NULL;
1470     }
1471 
1472 	is_router = is_onlink = 0;
1473 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
1474 		is_router = 1;	/* router case */
1475 	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
1476 		is_onlink = 1;	/* on-link destination case */
1477 	if (!is_router && !is_onlink) {
1478 		nd6log((LOG_ERR,
1479 			"ICMP6 redirect rejected; "
1480 			"neither router case nor onlink case: %s\n",
1481 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1482 		goto bad;
1483 	}
1484 	/* validation passed */
1485 
1486 	icmp6len -= sizeof(*nd_rd);
1487 	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
1488 	if (nd6_options(&ndopts) < 0) {
1489 		nd6log((LOG_INFO, "icmp6_redirect_input: "
1490 			"invalid ND option, rejected: %s\n",
1491 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1492 		/* nd6_options have incremented stats */
1493 		goto freeit;
1494 	}
1495 
1496 	if (ndopts.nd_opts_tgt_lladdr) {
1497 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
1498 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
1499 	}
1500 
1501 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
1502 		nd6log((LOG_INFO,
1503 			"icmp6_redirect_input: lladdrlen mismatch for %s "
1504 			"(if %d, icmp6 packet %d): %s\n",
1505 			inet_ntop(AF_INET6, &redtgt6, addr, sizeof(addr)),
1506 			ifp->if_addrlen, lladdrlen - 2,
1507 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1508 		goto bad;
1509 	}
1510 
1511 	/* RFC 2461 8.3 */
1512 	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
1513 			 is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
1514 
1515 	if (!is_onlink) {	/* better router case.  perform rtredirect. */
1516 		/* perform rtredirect */
1517 		struct sockaddr_in6 sdst;
1518 		struct sockaddr_in6 sgw;
1519 		struct sockaddr_in6 ssrc;
1520 		unsigned long rtcount;
1521 		struct rtentry *newrt = NULL;
1522 
1523 		/*
1524 		 * do not install redirect route, if the number of entries
1525 		 * is too much (> hiwat).  note that, the node (= host) will
1526 		 * work just fine even if we do not install redirect route
1527 		 * (there will be additional hops, though).
1528 		 */
1529 		rtcount = rt_timer_queue_count(icmp6_redirect_timeout_q);
1530 		if (0 <= ip6_maxdynroutes && rtcount >= ip6_maxdynroutes)
1531 			goto freeit;
1532 		else if (0 <= icmp6_redirect_lowat &&
1533 		    rtcount > icmp6_redirect_lowat) {
1534 			/*
1535 			 * XXX nuke a victim, install the new one.
1536 			 */
1537 		}
1538 
1539 		bzero(&sdst, sizeof(sdst));
1540 		bzero(&sgw, sizeof(sgw));
1541 		bzero(&ssrc, sizeof(ssrc));
1542 		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
1543 		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
1544 			sizeof(struct sockaddr_in6);
1545 		bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
1546 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
1547 		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
1548 		rtredirect(sin6tosa(&sdst), sin6tosa(&sgw), NULL,
1549 		    RTF_GATEWAY | RTF_HOST, sin6tosa(&ssrc),
1550 		    &newrt, m->m_pkthdr.ph_rtableid);
1551 
1552 		if (newrt) {
1553 			(void)rt_timer_add(newrt, icmp6_redirect_timeout,
1554 			    icmp6_redirect_timeout_q, m->m_pkthdr.ph_rtableid);
1555 			rtfree(newrt);
1556 		}
1557 	}
1558 	/* finally update cached route in each socket via pfctlinput */
1559 	{
1560 		struct sockaddr_in6 sdst;
1561 
1562 		bzero(&sdst, sizeof(sdst));
1563 		sdst.sin6_family = AF_INET6;
1564 		sdst.sin6_len = sizeof(struct sockaddr_in6);
1565 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
1566 		pfctlinput(PRC_REDIRECT_HOST, sin6tosa(&sdst));
1567 	}
1568 
1569  freeit:
1570 	m_freem(m);
1571 	return;
1572 
1573  bad:
1574 	icmp6stat.icp6s_badredirect++;
1575 	m_freem(m);
1576 }
1577 
1578 void
1579 icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
1580 {
1581 	struct ifnet *ifp;	/* my outgoing interface */
1582 	struct in6_addr *ifp_ll6;
1583 	struct in6_addr *nexthop;
1584 	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
1585 	struct mbuf *m = NULL;	/* newly allocated one */
1586 	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
1587 	struct nd_redirect *nd_rd;
1588 	size_t maxlen;
1589 	u_char *p;
1590 	struct sockaddr_in6 src_sa;
1591 
1592 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
1593 
1594 	/* if we are not router, we don't send icmp6 redirect */
1595 	if (!ip6_forwarding)
1596 		goto fail;
1597 
1598 	/* sanity check */
1599 	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
1600 		goto fail;
1601 
1602 	/*
1603 	 * Address check:
1604 	 *  the source address must identify a neighbor, and
1605 	 *  the destination address must not be a multicast address
1606 	 *  [RFC 2461, sec 8.2]
1607 	 */
1608 	sip6 = mtod(m0, struct ip6_hdr *);
1609 	bzero(&src_sa, sizeof(src_sa));
1610 	src_sa.sin6_family = AF_INET6;
1611 	src_sa.sin6_len = sizeof(src_sa);
1612 	src_sa.sin6_addr = sip6->ip6_src;
1613 	/* we don't currently use sin6_scope_id, but eventually use it */
1614 	src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src);
1615 	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
1616 		goto fail;
1617 	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
1618 		goto fail;	/* what should we do here? */
1619 
1620 	/* rate limit */
1621 	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
1622 		goto fail;
1623 
1624 	/*
1625 	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
1626 	 * we almost always ask for an mbuf cluster for simplicity.
1627 	 * (MHLEN < IPV6_MMTU is almost always true)
1628 	 */
1629 #if IPV6_MMTU >= MCLBYTES
1630 # error assumption failed about IPV6_MMTU and MCLBYTES
1631 #endif
1632 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
1633 	if (m && IPV6_MMTU >= MHLEN)
1634 		MCLGET(m, M_DONTWAIT);
1635 	if (!m)
1636 		goto fail;
1637 	m->m_pkthdr.rcvif = NULL;
1638 	m->m_len = 0;
1639 	maxlen = M_TRAILINGSPACE(m);
1640 	maxlen = min(IPV6_MMTU, maxlen);
1641 	/* just for safety */
1642 	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
1643 	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
1644 		goto fail;
1645 	}
1646 
1647 	{
1648 		/* get ip6 linklocal address for ifp(my outgoing interface). */
1649 		struct in6_ifaddr *ia6;
1650 		if ((ia6 = in6ifa_ifpforlinklocal(ifp,
1651 						 IN6_IFF_NOTREADY|
1652 						 IN6_IFF_ANYCAST)) == NULL)
1653 			goto fail;
1654 		ifp_ll6 = &ia6->ia_addr.sin6_addr;
1655 	}
1656 
1657 	/* get ip6 linklocal address for the router. */
1658 	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
1659 		struct sockaddr_in6 *sin6;
1660 		sin6 = satosin6(rt->rt_gateway);
1661 		nexthop = &sin6->sin6_addr;
1662 		if (!IN6_IS_ADDR_LINKLOCAL(nexthop))
1663 			nexthop = NULL;
1664 	} else
1665 		nexthop = NULL;
1666 
1667 	/* ip6 */
1668 	ip6 = mtod(m, struct ip6_hdr *);
1669 	ip6->ip6_flow = 0;
1670 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1671 	ip6->ip6_vfc |= IPV6_VERSION;
1672 	/* ip6->ip6_plen will be set later */
1673 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1674 	ip6->ip6_hlim = 255;
1675 	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
1676 	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
1677 	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
1678 
1679 	/* ND Redirect */
1680 	nd_rd = (struct nd_redirect *)(ip6 + 1);
1681 	nd_rd->nd_rd_type = ND_REDIRECT;
1682 	nd_rd->nd_rd_code = 0;
1683 	nd_rd->nd_rd_reserved = 0;
1684 	if (rt->rt_flags & RTF_GATEWAY) {
1685 		/*
1686 		 * nd_rd->nd_rd_target must be a link-local address in
1687 		 * better router cases.
1688 		 */
1689 		if (!nexthop)
1690 			goto fail;
1691 		bcopy(nexthop, &nd_rd->nd_rd_target,
1692 		      sizeof(nd_rd->nd_rd_target));
1693 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
1694 		      sizeof(nd_rd->nd_rd_dst));
1695 	} else {
1696 		/* make sure redtgt == reddst */
1697 		nexthop = &sip6->ip6_dst;
1698 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
1699 		      sizeof(nd_rd->nd_rd_target));
1700 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
1701 		      sizeof(nd_rd->nd_rd_dst));
1702 	}
1703 
1704 	p = (u_char *)(nd_rd + 1);
1705 
1706 	{
1707 		/* target lladdr option */
1708 		struct rtentry *rt_nexthop = NULL;
1709 		int len;
1710 		struct sockaddr_dl *sdl;
1711 		struct nd_opt_hdr *nd_opt;
1712 		char *lladdr;
1713 
1714 		rt_nexthop = nd6_lookup(nexthop, 0, ifp, ifp->if_rdomain);
1715 		if (!rt_nexthop)
1716 			goto nolladdropt;
1717 		len = sizeof(*nd_opt) + ifp->if_addrlen;
1718 		len = (len + 7) & ~7;	/* round by 8 */
1719 		/* safety check */
1720 		if (len + (p - (u_char *)ip6) > maxlen)
1721 			goto nolladdropt;
1722 		if (!(rt_nexthop->rt_flags & RTF_GATEWAY) &&
1723 		    (rt_nexthop->rt_flags & RTF_LLINFO) &&
1724 		    (rt_nexthop->rt_gateway->sa_family == AF_LINK) &&
1725 		    (sdl = (struct sockaddr_dl *)rt_nexthop->rt_gateway) &&
1726 		    sdl->sdl_alen) {
1727 			nd_opt = (struct nd_opt_hdr *)p;
1728 			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1729 			nd_opt->nd_opt_len = len >> 3;
1730 			lladdr = (char *)(nd_opt + 1);
1731 			bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
1732 			p += len;
1733 		}
1734 	}
1735   nolladdropt:;
1736 
1737 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
1738 
1739 	/* just to be safe */
1740 	if (p - (u_char *)ip6 > maxlen)
1741 		goto noredhdropt;
1742 
1743 	{
1744 		/* redirected header option */
1745 		int len;
1746 		struct nd_opt_rd_hdr *nd_opt_rh;
1747 
1748 		/*
1749 		 * compute the maximum size for icmp6 redirect header option.
1750 		 * XXX room for auth header?
1751 		 */
1752 		len = maxlen - (p - (u_char *)ip6);
1753 		len &= ~7;
1754 
1755 		/*
1756 		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
1757 		 * about padding/truncate rule for the original IP packet.
1758 		 * From the discussion on IPv6imp in Feb 1999,
1759 		 * the consensus was:
1760 		 * - "attach as much as possible" is the goal
1761 		 * - pad if not aligned (original size can be guessed by
1762 		 *   original ip6 header)
1763 		 * Following code adds the padding if it is simple enough,
1764 		 * and truncates if not.
1765 		 */
1766 		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
1767 			/* not enough room, truncate */
1768 			m_adj(m0, (len - sizeof(*nd_opt_rh)) -
1769 			    m0->m_pkthdr.len);
1770 		} else {
1771 			/*
1772 			 * enough room, truncate if not aligned.
1773 			 * we don't pad here for simplicity.
1774 			 */
1775 			size_t extra;
1776 
1777 			extra = m0->m_pkthdr.len % 8;
1778 			if (extra) {
1779 				/* truncate */
1780 				m_adj(m0, -extra);
1781 			}
1782 			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
1783 		}
1784 
1785 		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
1786 		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
1787 		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
1788 		nd_opt_rh->nd_opt_rh_len = len >> 3;
1789 		p += sizeof(*nd_opt_rh);
1790 		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
1791 
1792 		/* connect m0 to m */
1793 		m->m_pkthdr.len += m0->m_pkthdr.len;
1794 		m_cat(m, m0);
1795 		m0 = NULL;
1796 	}
1797 noredhdropt:
1798 	if (m0) {
1799 		m_freem(m0);
1800 		m0 = NULL;
1801 	}
1802 
1803 	sip6 = mtod(m, struct ip6_hdr *);
1804 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src))
1805 		sip6->ip6_src.s6_addr16[1] = 0;
1806 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst))
1807 		sip6->ip6_dst.s6_addr16[1] = 0;
1808 #if 0
1809 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
1810 		ip6->ip6_src.s6_addr16[1] = 0;
1811 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
1812 		ip6->ip6_dst.s6_addr16[1] = 0;
1813 #endif
1814 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target))
1815 		nd_rd->nd_rd_target.s6_addr16[1] = 0;
1816 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst))
1817 		nd_rd->nd_rd_dst.s6_addr16[1] = 0;
1818 
1819 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
1820 
1821 	nd_rd->nd_rd_cksum = 0;
1822 	m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
1823 
1824 	/* send the packet to outside... */
1825 	if (ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL) != 0)
1826 		icmp6_ifstat_inc(ifp, ifs6_out_error);
1827 
1828 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
1829 	icmp6_ifstat_inc(ifp, ifs6_out_redirect);
1830 	icmp6stat.icp6s_outhist[ND_REDIRECT]++;
1831 
1832 	return;
1833 
1834 fail:
1835 	if (m)
1836 		m_freem(m);
1837 	if (m0)
1838 		m_freem(m0);
1839 }
1840 
1841 /*
1842  * ICMPv6 socket option processing.
1843  */
1844 int
1845 icmp6_ctloutput(int op, struct socket *so, int level, int optname,
1846     struct mbuf **mp)
1847 {
1848 	int error = 0;
1849 	struct inpcb *in6p = sotoinpcb(so);
1850 	struct mbuf *m = *mp;
1851 
1852 	if (level != IPPROTO_ICMPV6) {
1853 		if (op == PRCO_SETOPT && m)
1854 			(void)m_free(m);
1855 		return EINVAL;
1856 	}
1857 
1858 	switch (op) {
1859 	case PRCO_SETOPT:
1860 		switch (optname) {
1861 		case ICMP6_FILTER:
1862 		    {
1863 			struct icmp6_filter *p;
1864 
1865 			if (m == NULL || m->m_len != sizeof(*p)) {
1866 				error = EMSGSIZE;
1867 				break;
1868 			}
1869 			p = mtod(m, struct icmp6_filter *);
1870 			if (!p || !in6p->inp_icmp6filt) {
1871 				error = EINVAL;
1872 				break;
1873 			}
1874 			bcopy(p, in6p->inp_icmp6filt,
1875 				sizeof(struct icmp6_filter));
1876 			error = 0;
1877 			break;
1878 		    }
1879 
1880 		default:
1881 			error = ENOPROTOOPT;
1882 			break;
1883 		}
1884 		if (m)
1885 			m_freem(m);
1886 		break;
1887 
1888 	case PRCO_GETOPT:
1889 		switch (optname) {
1890 		case ICMP6_FILTER:
1891 		    {
1892 			struct icmp6_filter *p;
1893 
1894 			if (!in6p->inp_icmp6filt) {
1895 				error = EINVAL;
1896 				break;
1897 			}
1898 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1899 			m->m_len = sizeof(struct icmp6_filter);
1900 			p = mtod(m, struct icmp6_filter *);
1901 			bcopy(in6p->inp_icmp6filt, p,
1902 				sizeof(struct icmp6_filter));
1903 			error = 0;
1904 			break;
1905 		    }
1906 
1907 		default:
1908 			error = ENOPROTOOPT;
1909 			break;
1910 		}
1911 		break;
1912 	}
1913 
1914 	return (error);
1915 }
1916 
1917 /*
1918  * Perform rate limit check.
1919  * Returns 0 if it is okay to send the icmp6 packet.
1920  * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
1921  * limitation.
1922  *
1923  * XXX per-destination/type check necessary?
1924  *
1925  * dst - not used at this moment
1926  * type - not used at this moment
1927  * code - not used at this moment
1928  */
1929 int
1930 icmp6_ratelimit(const struct in6_addr *dst, const int type, const int code)
1931 {
1932 	/* PPS limit */
1933 	if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
1934 	    icmp6errppslim))
1935 		return 1;	/* The packet is subject to rate limit */
1936 	return 0;		/* okay to send */
1937 }
1938 
1939 struct rtentry *
1940 icmp6_mtudisc_clone(struct sockaddr *dst, u_int rdomain)
1941 {
1942 	struct rtentry *rt;
1943 	int    error;
1944 
1945 	rt = rtalloc1(dst, RT_REPORT, rdomain);
1946 	if (rt == 0)
1947 		return NULL;
1948 
1949 	/* If we didn't get a host route, allocate one */
1950 	if ((rt->rt_flags & RTF_HOST) == 0) {
1951 		struct rt_addrinfo info;
1952 		struct rtentry *nrt;
1953 
1954 		bzero(&info, sizeof(info));
1955 		info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
1956 		info.rti_info[RTAX_DST] = dst;
1957 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1958 		error = rtrequest1(RTM_ADD, &info, rt->rt_priority, &nrt,
1959 		    rdomain);
1960 		if (error) {
1961 			rtfree(rt);
1962 			return NULL;
1963 		}
1964 		nrt->rt_rmx = rt->rt_rmx;
1965 		rtfree(rt);
1966 		rt = nrt;
1967 	}
1968 	error = rt_timer_add(rt, icmp6_mtudisc_timeout,
1969 			icmp6_mtudisc_timeout_q, rdomain);
1970 	if (error) {
1971 		rtfree(rt);
1972 		return NULL;
1973 	}
1974 
1975 	return rt;	/* caller need to call rtfree() */
1976 }
1977 
1978 void
1979 icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
1980 {
1981 	if (rt == NULL)
1982 		panic("icmp6_mtudisc_timeout: bad route to timeout");
1983 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1984 	    (RTF_DYNAMIC | RTF_HOST)) {
1985 		struct rt_addrinfo info;
1986 
1987 		bzero(&info, sizeof(info));
1988 		info.rti_flags = rt->rt_flags;
1989 		info.rti_info[RTAX_DST] = rt_key(rt);
1990 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1991 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1992 		rtrequest1(RTM_DELETE, &info, rt->rt_priority, NULL,
1993 		    r->rtt_tableid);
1994 	} else {
1995 		if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1996 			rt->rt_rmx.rmx_mtu = 0;
1997 	}
1998 }
1999 
2000 void
2001 icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
2002 {
2003 	if (rt == NULL)
2004 		panic("icmp6_redirect_timeout: bad route to timeout");
2005 	if ((rt->rt_flags & (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) ==
2006 	    (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) {
2007 		struct rt_addrinfo info;
2008 
2009 		bzero(&info, sizeof(info));
2010 		info.rti_flags = rt->rt_flags;
2011 		info.rti_info[RTAX_DST] = rt_key(rt);
2012 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
2013 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
2014 		rtrequest1(RTM_DELETE, &info, rt->rt_priority, NULL,
2015 		    r->rtt_tableid);
2016 	}
2017 }
2018 
2019 int *icmpv6ctl_vars[ICMPV6CTL_MAXID] = ICMPV6CTL_VARS;
2020 
2021 int
2022 icmp6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2023     void *newp, size_t newlen)
2024 {
2025 	/* All sysctl names at this level are terminal. */
2026 	if (namelen != 1)
2027 		return ENOTDIR;
2028 
2029 	switch (name[0]) {
2030 
2031 	case ICMPV6CTL_STATS:
2032 		return sysctl_rdstruct(oldp, oldlenp, newp,
2033 				&icmp6stat, sizeof(icmp6stat));
2034 	case ICMPV6CTL_ND6_DRLIST:
2035 	case ICMPV6CTL_ND6_PRLIST:
2036 		return nd6_sysctl(name[0], oldp, oldlenp, newp, newlen);
2037 	default:
2038 		if (name[0] < ICMPV6CTL_MAXID)
2039 			return (sysctl_int_arr(icmpv6ctl_vars, name, namelen,
2040 			    oldp, oldlenp, newp, newlen));
2041 		return ENOPROTOOPT;
2042 	}
2043 	/* NOTREACHED */
2044 }
2045