xref: /openbsd-src/sys/netinet6/nd6.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: nd6.c,v 1.188 2016/07/13 08:40:46 mpi Exp $	*/
2 /*	$KAME: nd6.c,v 1.280 2002/06/08 19:52:07 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/timeout.h>
36 #include <sys/malloc.h>
37 #include <sys/mbuf.h>
38 #include <sys/socket.h>
39 #include <sys/sockio.h>
40 #include <sys/time.h>
41 #include <sys/kernel.h>
42 #include <sys/pool.h>
43 #include <sys/protosw.h>
44 #include <sys/errno.h>
45 #include <sys/ioctl.h>
46 #include <sys/syslog.h>
47 #include <sys/queue.h>
48 #include <sys/task.h>
49 
50 #include <net/if.h>
51 #include <net/if_dl.h>
52 #include <net/if_types.h>
53 #include <net/route.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/if_ether.h>
57 #include <netinet/ip_ipsp.h>
58 
59 #include <netinet6/in6_var.h>
60 #include <netinet/ip6.h>
61 #include <netinet6/ip6_var.h>
62 #include <netinet6/nd6.h>
63 #include <netinet/icmp6.h>
64 
65 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
66 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
67 
68 /* timer values */
69 int	nd6_prune	= 1;	/* walk list every 1 seconds */
70 int	nd6_delay	= 5;	/* delay first probe time 5 second */
71 int	nd6_umaxtries	= 3;	/* maximum unicast query */
72 int	nd6_mmaxtries	= 3;	/* maximum multicast query */
73 int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
74 
75 /* preventing too many loops in ND option parsing */
76 int nd6_maxndopt = 10;	/* max # of ND options allowed */
77 
78 int nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
79 
80 #ifdef ND6_DEBUG
81 int nd6_debug = 1;
82 #else
83 int nd6_debug = 0;
84 #endif
85 
86 TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list;
87 struct	pool nd6_pool;		/* pool for llinfo_nd6 structures */
88 int	nd6_inuse, nd6_allocated;
89 
90 struct nd_drhead nd_defrouter;
91 struct nd_prhead nd_prefix = { 0 };
92 
93 int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
94 
95 void nd6_slowtimo(void *);
96 struct llinfo_nd6 *nd6_free(struct rtentry *, int);
97 void nd6_llinfo_timer(void *);
98 
99 struct timeout nd6_slowtimo_ch;
100 struct timeout nd6_timer_ch;
101 struct task nd6_timer_task;
102 void nd6_timer_work(void *);
103 
104 int fill_drlist(void *, size_t *, size_t);
105 int fill_prlist(void *, size_t *, size_t);
106 
107 void
108 nd6_init(void)
109 {
110 	static int nd6_init_done = 0;
111 
112 	if (nd6_init_done) {
113 		log(LOG_NOTICE, "%s called more than once\n", __func__);
114 		return;
115 	}
116 
117 	TAILQ_INIT(&nd6_list);
118 	pool_init(&nd6_pool, sizeof(struct llinfo_nd6), 0, 0, 0, "nd6", NULL);
119 
120 	/* initialization of the default router list */
121 	TAILQ_INIT(&nd_defrouter);
122 
123 	task_set(&nd6_timer_task, nd6_timer_work, NULL);
124 
125 	nd6_init_done = 1;
126 
127 	/* start timer */
128 	timeout_set(&nd6_slowtimo_ch, nd6_slowtimo, NULL);
129 	timeout_add_sec(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL);
130 
131 	nd6_rs_init();
132 }
133 
134 struct nd_ifinfo *
135 nd6_ifattach(struct ifnet *ifp)
136 {
137 	struct nd_ifinfo *nd;
138 
139 	nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
140 
141 	nd->initialized = 1;
142 
143 	nd->basereachable = REACHABLE_TIME;
144 	nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
145 	nd->retrans = RETRANS_TIMER;
146 	/* per-interface IFXF_AUTOCONF6 needs to be set too to accept RAs */
147 	nd->flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV);
148 
149 	return nd;
150 }
151 
152 void
153 nd6_ifdetach(struct nd_ifinfo *nd)
154 {
155 
156 	free(nd, M_IP6NDP, 0);
157 }
158 
159 void
160 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
161 {
162 	bzero(ndopts, sizeof(*ndopts));
163 	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
164 	ndopts->nd_opts_last
165 		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
166 
167 	if (icmp6len == 0) {
168 		ndopts->nd_opts_done = 1;
169 		ndopts->nd_opts_search = NULL;
170 	}
171 }
172 
173 /*
174  * Take one ND option.
175  */
176 struct nd_opt_hdr *
177 nd6_option(union nd_opts *ndopts)
178 {
179 	struct nd_opt_hdr *nd_opt;
180 	int olen;
181 
182 	if (!ndopts)
183 		panic("ndopts == NULL in nd6_option");
184 	if (!ndopts->nd_opts_last)
185 		panic("uninitialized ndopts in nd6_option");
186 	if (!ndopts->nd_opts_search)
187 		return NULL;
188 	if (ndopts->nd_opts_done)
189 		return NULL;
190 
191 	nd_opt = ndopts->nd_opts_search;
192 
193 	/* make sure nd_opt_len is inside the buffer */
194 	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
195 		bzero(ndopts, sizeof(*ndopts));
196 		return NULL;
197 	}
198 
199 	olen = nd_opt->nd_opt_len << 3;
200 	if (olen == 0) {
201 		/*
202 		 * Message validation requires that all included
203 		 * options have a length that is greater than zero.
204 		 */
205 		bzero(ndopts, sizeof(*ndopts));
206 		return NULL;
207 	}
208 
209 	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
210 	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
211 		/* option overruns the end of buffer, invalid */
212 		bzero(ndopts, sizeof(*ndopts));
213 		return NULL;
214 	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
215 		/* reached the end of options chain */
216 		ndopts->nd_opts_done = 1;
217 		ndopts->nd_opts_search = NULL;
218 	}
219 	return nd_opt;
220 }
221 
222 /*
223  * Parse multiple ND options.
224  * This function is much easier to use, for ND routines that do not need
225  * multiple options of the same type.
226  */
227 int
228 nd6_options(union nd_opts *ndopts)
229 {
230 	struct nd_opt_hdr *nd_opt;
231 	int i = 0;
232 
233 	if (!ndopts)
234 		panic("ndopts == NULL in nd6_options");
235 	if (!ndopts->nd_opts_last)
236 		panic("uninitialized ndopts in nd6_options");
237 	if (!ndopts->nd_opts_search)
238 		return 0;
239 
240 	while (1) {
241 		nd_opt = nd6_option(ndopts);
242 		if (!nd_opt && !ndopts->nd_opts_last) {
243 			/*
244 			 * Message validation requires that all included
245 			 * options have a length that is greater than zero.
246 			 */
247 			icmp6stat.icp6s_nd_badopt++;
248 			bzero(ndopts, sizeof(*ndopts));
249 			return -1;
250 		}
251 
252 		if (!nd_opt)
253 			goto skip1;
254 
255 		switch (nd_opt->nd_opt_type) {
256 		case ND_OPT_SOURCE_LINKADDR:
257 		case ND_OPT_TARGET_LINKADDR:
258 		case ND_OPT_MTU:
259 		case ND_OPT_REDIRECTED_HEADER:
260 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
261 				nd6log((LOG_INFO,
262 				    "duplicated ND6 option found (type=%d)\n",
263 				    nd_opt->nd_opt_type));
264 				/* XXX bark? */
265 			} else {
266 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
267 					= nd_opt;
268 			}
269 			break;
270 		case ND_OPT_PREFIX_INFORMATION:
271 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
272 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
273 					= nd_opt;
274 			}
275 			ndopts->nd_opts_pi_end =
276 				(struct nd_opt_prefix_info *)nd_opt;
277 			break;
278 		default:
279 			/*
280 			 * Unknown options must be silently ignored,
281 			 * to accommodate future extension to the protocol.
282 			 */
283 			nd6log((LOG_DEBUG,
284 			    "nd6_options: unsupported option %d - "
285 			    "option ignored\n", nd_opt->nd_opt_type));
286 		}
287 
288 skip1:
289 		i++;
290 		if (i > nd6_maxndopt) {
291 			icmp6stat.icp6s_nd_toomanyopt++;
292 			nd6log((LOG_INFO, "too many loop in nd opt\n"));
293 			break;
294 		}
295 
296 		if (ndopts->nd_opts_done)
297 			break;
298 	}
299 
300 	return 0;
301 }
302 
303 /*
304  * ND6 timer routine to handle ND6 entries
305  */
306 void
307 nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs)
308 {
309 	int s;
310 
311 	s = splsoftnet();
312 
313 	if (secs < 0) {
314 		ln->ln_rt->rt_expire = 0;
315 		timeout_del(&ln->ln_timer_ch);
316 	} else {
317 		ln->ln_rt->rt_expire = time_uptime + secs;
318 		timeout_add_sec(&ln->ln_timer_ch, secs);
319 	}
320 
321 	splx(s);
322 }
323 
324 void
325 nd6_llinfo_timer(void *arg)
326 {
327 	int s;
328 	struct llinfo_nd6 *ln;
329 	struct rtentry *rt;
330 	struct sockaddr_in6 *dst;
331 	struct ifnet *ifp;
332 	struct nd_ifinfo *ndi = NULL;
333 
334 	s = splsoftnet();
335 
336 	ln = (struct llinfo_nd6 *)arg;
337 
338 	if ((rt = ln->ln_rt) == NULL)
339 		panic("ln->ln_rt == NULL");
340 	if ((ifp = if_get(rt->rt_ifidx)) == NULL) {
341 		splx(s);
342 		return;
343 	}
344 	ndi = ND_IFINFO(ifp);
345 	dst = satosin6(rt_key(rt));
346 
347 	/* sanity check */
348 	if (rt->rt_llinfo != NULL && (struct llinfo_nd6 *)rt->rt_llinfo != ln)
349 		panic("rt_llinfo(%p) is not equal to ln(%p)",
350 		      rt->rt_llinfo, ln);
351 	if (!dst)
352 		panic("dst=0 in nd6_timer(ln=%p)", ln);
353 
354 	switch (ln->ln_state) {
355 	case ND6_LLINFO_INCOMPLETE:
356 		if (ln->ln_asked < nd6_mmaxtries) {
357 			ln->ln_asked++;
358 			nd6_llinfo_settimer(ln, ndi->retrans / 1000);
359 			nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
360 		} else {
361 			struct mbuf *m = ln->ln_hold;
362 			if (m) {
363 				ln->ln_hold = NULL;
364 				/*
365 				 * Fake rcvif to make the ICMP error
366 				 * more helpful in diagnosing for the
367 				 * receiver.
368 				 * XXX: should we consider
369 				 * older rcvif?
370 				 */
371 				m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
372 
373 				icmp6_error(m, ICMP6_DST_UNREACH,
374 				    ICMP6_DST_UNREACH_ADDR, 0);
375 				if (ln->ln_hold == m) {
376 					/* m is back in ln_hold. Discard. */
377 					m_freem(ln->ln_hold);
378 					ln->ln_hold = NULL;
379 				}
380 			}
381 			(void)nd6_free(rt, 0);
382 			ln = NULL;
383 		}
384 		break;
385 	case ND6_LLINFO_REACHABLE:
386 		if (!ND6_LLINFO_PERMANENT(ln)) {
387 			ln->ln_state = ND6_LLINFO_STALE;
388 			nd6_llinfo_settimer(ln, nd6_gctimer);
389 		}
390 		break;
391 
392 	case ND6_LLINFO_STALE:
393 	case ND6_LLINFO_PURGE:
394 		/* Garbage Collection(RFC 2461 5.3) */
395 		if (!ND6_LLINFO_PERMANENT(ln)) {
396 			(void)nd6_free(rt, 1);
397 			ln = NULL;
398 		}
399 		break;
400 
401 	case ND6_LLINFO_DELAY:
402 		if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
403 			/* We need NUD */
404 			ln->ln_asked = 1;
405 			ln->ln_state = ND6_LLINFO_PROBE;
406 			nd6_llinfo_settimer(ln, ndi->retrans / 1000);
407 			nd6_ns_output(ifp, &dst->sin6_addr,
408 			    &dst->sin6_addr, ln, 0);
409 		} else {
410 			ln->ln_state = ND6_LLINFO_STALE; /* XXX */
411 			nd6_llinfo_settimer(ln, nd6_gctimer);
412 		}
413 		break;
414 	case ND6_LLINFO_PROBE:
415 		if (ln->ln_asked < nd6_umaxtries) {
416 			ln->ln_asked++;
417 			nd6_llinfo_settimer(ln, ndi->retrans / 1000);
418 			nd6_ns_output(ifp, &dst->sin6_addr,
419 			    &dst->sin6_addr, ln, 0);
420 		} else {
421 			(void)nd6_free(rt, 0);
422 			ln = NULL;
423 		}
424 		break;
425 	}
426 
427 	if_put(ifp);
428 	splx(s);
429 }
430 
431 /*
432  * ND6 timer routine to expire default route list and prefix list
433  */
434 void
435 nd6_timer_work(void *null)
436 {
437 	int s;
438 	struct nd_defrouter *dr, *ndr;
439 	struct nd_prefix *pr, *npr;
440 	struct in6_ifaddr *ia6, *nia6;
441 
442 	s = splsoftnet();
443 	timeout_set(&nd6_timer_ch, nd6_timer, NULL);
444 	timeout_add_sec(&nd6_timer_ch, nd6_prune);
445 
446 	/* expire default router list */
447 	TAILQ_FOREACH_SAFE(dr, &nd_defrouter, dr_entry, ndr)
448 		if (dr->expire && dr->expire < time_second)
449 			defrtrlist_del(dr);
450 
451 	/*
452 	 * expire interface addresses.
453 	 * in the past the loop was inside prefix expiry processing.
454 	 * However, from a stricter spec-conformance standpoint, we should
455 	 * rather separate address lifetimes and prefix lifetimes.
456 	 */
457 	TAILQ_FOREACH_SAFE(ia6, &in6_ifaddr, ia_list, nia6) {
458 		/* check address lifetime */
459 		if (IFA6_IS_INVALID(ia6)) {
460 			in6_purgeaddr(&ia6->ia_ifa);
461 		} else if (IFA6_IS_DEPRECATED(ia6)) {
462 			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
463 		} else {
464 			/*
465 			 * A new RA might have made a deprecated address
466 			 * preferred.
467 			 */
468 			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
469 		}
470 	}
471 
472 	/* expire prefix list */
473 	LIST_FOREACH_SAFE(pr, &nd_prefix, ndpr_entry, npr) {
474 		/*
475 		 * check prefix lifetime.
476 		 * since pltime is just for autoconf, pltime processing for
477 		 * prefix is not necessary.
478 		 */
479 		if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
480 		    time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
481 			/*
482 			 * address expiration and prefix expiration are
483 			 * separate.  NEVER perform in6_purgeaddr here.
484 			 */
485 
486 			prelist_remove(pr);
487 		}
488 	}
489 	splx(s);
490 }
491 
492 void
493 nd6_timer(void *ignored_arg)
494 {
495 	task_add(systq, &nd6_timer_task);
496 }
497 
498 /*
499  * Nuke neighbor cache/prefix/default router management table, right before
500  * ifp goes away.
501  */
502 void
503 nd6_purge(struct ifnet *ifp)
504 {
505 	struct llinfo_nd6 *ln, *nln;
506 	struct nd_defrouter *dr, *ndr;
507 	struct nd_prefix *pr, *npr;
508 
509 	/*
510 	 * Nuke default router list entries toward ifp.
511 	 * We defer removal of default router list entries that is installed
512 	 * in the routing table, in order to keep additional side effects as
513 	 * small as possible.
514 	 */
515 	TAILQ_FOREACH_SAFE(dr, &nd_defrouter, dr_entry, ndr) {
516 		if (dr->installed)
517 			continue;
518 
519 		if (dr->ifp == ifp)
520 			defrtrlist_del(dr);
521 	}
522 	TAILQ_FOREACH_SAFE(dr, &nd_defrouter, dr_entry, ndr) {
523 		if (!dr->installed)
524 			continue;
525 
526 		if (dr->ifp == ifp)
527 			defrtrlist_del(dr);
528 	}
529 
530 	/* Nuke prefix list entries toward ifp */
531 	LIST_FOREACH_SAFE(pr, &nd_prefix, ndpr_entry, npr) {
532 		if (pr->ndpr_ifp == ifp)
533 			prelist_remove(pr);
534 	}
535 
536 	if (ifp->if_xflags & IFXF_AUTOCONF6) {
537 		/* refresh default router list */
538 		defrouter_select();
539 	}
540 
541 	/*
542 	 * Nuke neighbor cache entries for the ifp.
543 	 */
544 	TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) {
545 		struct rtentry *rt;
546 		struct sockaddr_dl *sdl;
547 
548 		rt = ln->ln_rt;
549 		if (rt != NULL && rt->rt_gateway != NULL &&
550 		    rt->rt_gateway->sa_family == AF_LINK) {
551 			sdl = satosdl(rt->rt_gateway);
552 			if (sdl->sdl_index == ifp->if_index)
553 				nln = nd6_free(rt, 0);
554 		}
555 	}
556 }
557 
558 struct rtentry *
559 nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp,
560     u_int rtableid)
561 {
562 	struct rtentry *rt;
563 	struct sockaddr_in6 sin6;
564 	int flags;
565 
566 	bzero(&sin6, sizeof(sin6));
567 	sin6.sin6_len = sizeof(struct sockaddr_in6);
568 	sin6.sin6_family = AF_INET6;
569 	sin6.sin6_addr = *addr6;
570 	flags = (create) ? RT_RESOLVE : 0;
571 
572 	rt = rtalloc(sin6tosa(&sin6), flags, rtableid);
573 	if (rt != NULL && (rt->rt_flags & RTF_LLINFO) == 0) {
574 		/*
575 		 * This is the case for the default route.
576 		 * If we want to create a neighbor cache for the address, we
577 		 * should free the route for the destination and allocate an
578 		 * interface route.
579 		 */
580 		if (create) {
581 			rtfree(rt);
582 			rt = NULL;
583 		}
584 	}
585 	if (rt == NULL) {
586 		if (create && ifp) {
587 			struct rt_addrinfo info;
588 			int error;
589 
590 			/*
591 			 * If no route is available and create is set,
592 			 * we allocate a host route for the destination
593 			 * and treat it like an interface route.
594 			 * This hack is necessary for a neighbor which can't
595 			 * be covered by our own prefix.
596 			 */
597 			struct ifaddr *ifa =
598 			    ifaof_ifpforaddr(sin6tosa(&sin6), ifp);
599 			if (ifa == NULL)
600 				return (NULL);
601 
602 			/*
603 			 * Create a new route.  RTF_LLINFO is necessary
604 			 * to create a Neighbor Cache entry for the
605 			 * destination in nd6_rtrequest which will be
606 			 * called in rtrequest.
607 			 */
608 			bzero(&info, sizeof(info));
609 			info.rti_flags = RTF_HOST | RTF_LLINFO;
610 			info.rti_info[RTAX_DST] = sin6tosa(&sin6);
611 			info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl);
612 			error = rtrequest(RTM_ADD, &info, RTP_CONNECTED, &rt,
613 			    rtableid);
614 			if (error)
615 				return (NULL);
616 			if (rt->rt_llinfo != NULL) {
617 				struct llinfo_nd6 *ln =
618 				    (struct llinfo_nd6 *)rt->rt_llinfo;
619 				ln->ln_state = ND6_LLINFO_NOSTATE;
620 			}
621 		} else
622 			return (NULL);
623 	}
624 	/*
625 	 * Validation for the entry.
626 	 * Note that the check for rt_llinfo is necessary because a cloned
627 	 * route from a parent route that has the L flag (e.g. the default
628 	 * route to a p2p interface) may have the flag, too, while the
629 	 * destination is not actually a neighbor.
630 	 */
631 	if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
632 	    rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
633 	    (ifp != NULL && rt->rt_ifidx != ifp->if_index)) {
634 		if (create) {
635 			char addr[INET6_ADDRSTRLEN];
636 			nd6log((LOG_DEBUG, "%s: failed to lookup %s (if=%s)\n",
637 			    __func__,
638 			    inet_ntop(AF_INET6, addr6, addr, sizeof(addr)),
639 			    ifp ? ifp->if_xname : "unspec"));
640 		}
641 		rtfree(rt);
642 		return (NULL);
643 	}
644 	return (rt);
645 }
646 
647 /*
648  * Detect if a given IPv6 address identifies a neighbor on a given link.
649  * XXX: should take care of the destination of a p2p link?
650  */
651 int
652 nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
653 {
654 	struct nd_prefix *pr;
655 	struct in6_ifaddr *ia6;
656 	struct ifaddr *ifa;
657 	struct rtentry *rt;
658 
659 	/*
660 	 * A link-local address is always a neighbor.
661 	 * XXX: we should use the sin6_scope_id field rather than the embedded
662 	 * interface index.
663 	 * XXX: a link does not necessarily specify a single interface.
664 	 */
665 	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) &&
666 	    ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index)
667 		return (1);
668 
669 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
670 		if (ifa->ifa_addr->sa_family != AF_INET6)
671 			continue;
672 
673 		ia6 = ifatoia6(ifa);
674 
675 		/* Prefix check down below. */
676 		if (ia6->ia6_flags & IN6_IFF_AUTOCONF)
677 			continue;
678 
679 		if (IN6_ARE_MASKED_ADDR_EQUAL(&addr->sin6_addr,
680 		    &ia6->ia_addr.sin6_addr,
681 		    &ia6->ia_prefixmask.sin6_addr))
682 			return (1);
683 	}
684 
685 	/*
686 	 * If the address matches one of our on-link prefixes, it should be a
687 	 * neighbor.
688 	 */
689 	LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
690 		if (pr->ndpr_ifp != ifp)
691 			continue;
692 
693 		if (!(pr->ndpr_stateflags & NDPRF_ONLINK))
694 			continue;
695 
696 		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
697 		    &addr->sin6_addr, &pr->ndpr_mask))
698 			return (1);
699 	}
700 
701 	/*
702 	 * Even if the address matches none of our addresses, it might be
703 	 * in the neighbor cache.
704 	 */
705 	rt = nd6_lookup(&addr->sin6_addr, 0, ifp, ifp->if_rdomain);
706 	if (rt != NULL) {
707 		rtfree(rt);
708 		return (1);
709 	}
710 
711 	return (0);
712 }
713 
714 /*
715  * Free an nd6 llinfo entry.
716  * Since the function would cause significant changes in the kernel, DO NOT
717  * make it global, unless you have a strong reason for the change, and are sure
718  * that the change is safe.
719  */
720 struct llinfo_nd6 *
721 nd6_free(struct rtentry *rt, int gc)
722 {
723 	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next;
724 	struct in6_addr in6 = satosin6(rt_key(rt))->sin6_addr;
725 	struct nd_defrouter *dr;
726 	struct ifnet *ifp;
727 	int s;
728 
729 	/*
730 	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
731 	 * even though it is not harmful, it was not really necessary.
732 	 */
733 	ifp = if_get(rt->rt_ifidx);
734 
735 	s = splsoftnet();
736 	if (!ip6_forwarding) {
737 		dr = defrouter_lookup(&satosin6(rt_key(rt))->sin6_addr,
738 		    rt->rt_ifidx);
739 
740 		if (dr != NULL && dr->expire &&
741 		    ln->ln_state == ND6_LLINFO_STALE && gc) {
742 			/*
743 			 * If the reason for the deletion is just garbage
744 			 * collection, and the neighbor is an active default
745 			 * router, do not delete it.  Instead, reset the GC
746 			 * timer using the router's lifetime.
747 			 * Simply deleting the entry would affect default
748 			 * router selection, which is not necessarily a good
749 			 * thing, especially when we're using router preference
750 			 * values.
751 			 * XXX: the check for ln_state would be redundant,
752 			 *      but we intentionally keep it just in case.
753 			 */
754 			if (dr->expire > time_second) {
755 				nd6_llinfo_settimer(ln,
756 				    dr->expire - time_second);
757 			} else
758 				nd6_llinfo_settimer(ln, nd6_gctimer);
759 			splx(s);
760 			if_put(ifp);
761 			return (TAILQ_NEXT(ln, ln_list));
762 		}
763 
764 		if (ln->ln_router || dr) {
765 			/*
766 			 * rt6_flush must be called whether or not the neighbor
767 			 * is in the Default Router List.
768 			 * See a corresponding comment in nd6_na_input().
769 			 */
770 			rt6_flush(&in6, ifp);
771 		}
772 
773 		if (dr) {
774 			/*
775 			 * Unreachability of a router might affect the default
776 			 * router selection and on-link detection of advertised
777 			 * prefixes.
778 			 */
779 
780 			/*
781 			 * Temporarily fake the state to choose a new default
782 			 * router and to perform on-link determination of
783 			 * prefixes correctly.
784 			 * Below the state will be set correctly,
785 			 * or the entry itself will be deleted.
786 			 */
787 			ln->ln_state = ND6_LLINFO_INCOMPLETE;
788 
789 			/*
790 			 * Since defrouter_select() does not affect the
791 			 * on-link determination and MIP6 needs the check
792 			 * before the default router selection, we perform
793 			 * the check now.
794 			 */
795 			pfxlist_onlink_check();
796 
797 			/*
798 			 * refresh default router list
799 			 */
800 			defrouter_select();
801 		}
802 	}
803 
804 	/*
805 	 * Before deleting the entry, remember the next entry as the
806 	 * return value.  We need this because pfxlist_onlink_check() above
807 	 * might have freed other entries (particularly the old next entry) as
808 	 * a side effect (XXX).
809 	 */
810 	next = TAILQ_NEXT(ln, ln_list);
811 
812 	/*
813 	 * Detach the route from the routing tree and the list of neighbor
814 	 * caches, and disable the route entry not to be used in already
815 	 * cached routes.
816 	 */
817 	if (!ISSET(rt->rt_flags, RTF_STATIC))
818 		rtdeletemsg(rt, ifp, ifp->if_rdomain);
819 	splx(s);
820 
821 	if_put(ifp);
822 
823 	return (next);
824 }
825 
826 /*
827  * Upper-layer reachability hint for Neighbor Unreachability Detection.
828  *
829  * XXX cost-effective methods?
830  */
831 void
832 nd6_nud_hint(struct rtentry *rt)
833 {
834 	struct llinfo_nd6 *ln;
835 	struct ifnet *ifp;
836 
837 	ifp = if_get(rt->rt_ifidx);
838 	if (ifp == NULL)
839 		return;
840 
841 	if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
842 	    (rt->rt_flags & RTF_LLINFO) == 0 ||
843 	    rt->rt_llinfo == NULL || rt->rt_gateway == NULL ||
844 	    rt->rt_gateway->sa_family != AF_LINK) {
845 		/* This is not a host route. */
846 		goto out;
847 	}
848 
849 	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
850 	if (ln->ln_state < ND6_LLINFO_REACHABLE)
851 		goto out;
852 
853 	/*
854 	 * if we get upper-layer reachability confirmation many times,
855 	 * it is possible we have false information.
856 	 */
857 	ln->ln_byhint++;
858 	if (ln->ln_byhint > nd6_maxnudhint)
859 		goto out;
860 
861 	ln->ln_state = ND6_LLINFO_REACHABLE;
862 	if (!ND6_LLINFO_PERMANENT(ln))
863 		nd6_llinfo_settimer(ln, ND_IFINFO(ifp)->reachable);
864 out:
865 	if_put(ifp);
866 }
867 
868 void
869 nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
870 {
871 	struct sockaddr *gate = rt->rt_gateway;
872 	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
873 	struct ifaddr *ifa;
874 	struct nd_defrouter *dr;
875 
876 	if (req == RTM_DELETE && (rt->rt_flags & RTF_GATEWAY) &&
877 	    (IN6_ARE_ADDR_EQUAL(&(satosin6(rt_key(rt)))->sin6_addr,
878 	    &in6addr_any) && rt_plen(rt) == 0)) {
879 		dr = defrouter_lookup(&satosin6(gate)->sin6_addr,
880 		    ifp->if_index);
881 		if (dr)
882 			dr->installed = 0;
883 	}
884 
885 	if (ISSET(rt->rt_flags, RTF_GATEWAY|RTF_MULTICAST))
886 		return;
887 
888 	if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
889 		/*
890 		 * This is probably an interface direct route for a link
891 		 * which does not need neighbor caches (e.g. fe80::%lo0/64).
892 		 * We do not need special treatment below for such a route.
893 		 * Moreover, the RTF_LLINFO flag which would be set below
894 		 * would annoy the ndp(8) command.
895 		 */
896 		return;
897 	}
898 
899 	if (req == RTM_RESOLVE && nd6_need_cache(ifp) == 0) {
900 		/*
901 		 * For routing daemons like ospf6d we allow neighbor discovery
902 		 * based on the cloning route only.  This allows us to sent
903 		 * packets directly into a network without having an address
904 		 * with matching prefix on the interface.  If the cloning
905 		 * route is used for an stf interface, we would mistakenly
906 		 * make a neighbor cache for the host route, and would see
907 		 * strange neighbor solicitation for the corresponding
908 		 * destination.  In order to avoid confusion, we check if the
909 		 * interface is suitable for neighbor discovery, and stop the
910 		 * process if not.  Additionally, we remove the LLINFO flag
911 		 * so that ndp(8) will not try to get the neighbor information
912 		 * of the destination.
913 		 */
914 		rt->rt_flags &= ~RTF_LLINFO;
915 		return;
916 	}
917 
918 	switch (req) {
919 	case RTM_ADD:
920 		if ((rt->rt_flags & RTF_CLONING) ||
921 		    ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && ln == NULL)) {
922 			if (ln != NULL)
923 				nd6_llinfo_settimer(ln, 0);
924 			if ((rt->rt_flags & RTF_CLONING) != 0)
925 				break;
926 		}
927 		/*
928 		 * In IPv4 code, we try to announce new RTF_ANNOUNCE entry here.
929 		 * We don't do that here since llinfo is not ready yet.
930 		 *
931 		 * There are also couple of other things to be discussed:
932 		 * - unsolicited NA code needs improvement beforehand
933 		 * - RFC2461 says we MAY send multicast unsolicited NA
934 		 *   (7.2.6 paragraph 4), however, it also says that we
935 		 *   SHOULD provide a mechanism to prevent multicast NA storm.
936 		 *   we don't have anything like it right now.
937 		 *   note that the mechanism needs a mutual agreement
938 		 *   between proxies, which means that we need to implement
939 		 *   a new protocol, or a new kludge.
940 		 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
941 		 *   we need to check ip6forwarding before sending it.
942 		 *   (or should we allow proxy ND configuration only for
943 		 *   routers?  there's no mention about proxy ND from hosts)
944 		 */
945 #if 0
946 		/* XXX it does not work */
947 		if (rt->rt_flags & RTF_ANNOUNCE)
948 			nd6_na_output(ifp,
949 			      &satosin6(rt_key(rt))->sin6_addr,
950 			      &satosin6(rt_key(rt))->sin6_addr,
951 			      ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
952 			      1, NULL);
953 #endif
954 		/* FALLTHROUGH */
955 	case RTM_RESOLVE:
956 		if (gate->sa_family != AF_LINK ||
957 		    gate->sa_len < sizeof(struct sockaddr_dl)) {
958 			log(LOG_DEBUG, "%s: bad gateway value: %s\n",
959 			    __func__, ifp->if_xname);
960 			break;
961 		}
962 		satosdl(gate)->sdl_type = ifp->if_type;
963 		satosdl(gate)->sdl_index = ifp->if_index;
964 		if (ln != NULL)
965 			break;	/* This happens on a route change */
966 		/*
967 		 * Case 2: This route may come from cloning, or a manual route
968 		 * add with a LL address.
969 		 */
970 		ln = pool_get(&nd6_pool, PR_NOWAIT | PR_ZERO);
971 		rt->rt_llinfo = (caddr_t)ln;
972 		if (ln == NULL) {
973 			log(LOG_DEBUG, "%s: pool get failed\n", __func__);
974 			break;
975 		}
976 		nd6_inuse++;
977 		nd6_allocated++;
978 		ln->ln_rt = rt;
979 		timeout_set(&ln->ln_timer_ch, nd6_llinfo_timer, ln);
980 		/* this is required for "ndp" command. - shin */
981 		if (req == RTM_ADD) {
982 		        /*
983 			 * gate should have some valid AF_LINK entry,
984 			 * and ln expire should have some lifetime
985 			 * which is specified by ndp command.
986 			 */
987 			ln->ln_state = ND6_LLINFO_REACHABLE;
988 			ln->ln_byhint = 0;
989 		} else {
990 		        /*
991 			 * When req == RTM_RESOLVE, rt is created and
992 			 * initialized in rtrequest(), so rt_expire is 0.
993 			 */
994 			ln->ln_state = ND6_LLINFO_NOSTATE;
995 			nd6_llinfo_settimer(ln, 0);
996 		}
997 		rt->rt_flags |= RTF_LLINFO;
998 		TAILQ_INSERT_HEAD(&nd6_list, ln, ln_list);
999 
1000 		/*
1001 		 * If we have too many cache entries, initiate immediate
1002 		 * purging for some "less recently used" entries.  Note that
1003 		 * we cannot directly call nd6_free() here because it would
1004 		 * cause re-entering rtable related routines triggering an LOR
1005 		 * problem for FreeBSD.
1006 		 */
1007 		if (ip6_neighborgcthresh >= 0 &&
1008 		    nd6_inuse >= ip6_neighborgcthresh) {
1009 			int i;
1010 
1011 			for (i = 0; i < 10; i++) {
1012 				struct llinfo_nd6 *ln_end;
1013 
1014 				ln_end = TAILQ_LAST(&nd6_list, llinfo_nd6_head);
1015 				if (ln_end == ln)
1016 					break;
1017 
1018 				/* Move this entry to the head */
1019 				TAILQ_REMOVE(&nd6_list, ln_end, ln_list);
1020 				TAILQ_INSERT_HEAD(&nd6_list, ln_end, ln_list);
1021 
1022 				if (ND6_LLINFO_PERMANENT(ln_end))
1023 					continue;
1024 
1025 				if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE)
1026 					ln_end->ln_state = ND6_LLINFO_STALE;
1027 				else
1028 					ln_end->ln_state = ND6_LLINFO_PURGE;
1029 				nd6_llinfo_settimer(ln_end, 0);
1030 			}
1031 		}
1032 
1033 		/*
1034 		 * check if rt_key(rt) is one of my address assigned
1035 		 * to the interface.
1036 		 */
1037 		ifa = &in6ifa_ifpwithaddr(ifp,
1038 		    &satosin6(rt_key(rt))->sin6_addr)->ia_ifa;
1039 		if (ifa) {
1040 			nd6_llinfo_settimer(ln, -1);
1041 			ln->ln_state = ND6_LLINFO_REACHABLE;
1042 			ln->ln_byhint = 0;
1043 			KASSERT(ifa == rt->rt_ifa);
1044 		} else if (rt->rt_flags & RTF_ANNOUNCE) {
1045 			nd6_llinfo_settimer(ln, -1);
1046 			ln->ln_state = ND6_LLINFO_REACHABLE;
1047 			ln->ln_byhint = 0;
1048 
1049 			/* join solicited node multicast for proxy ND */
1050 			if (ifp->if_flags & IFF_MULTICAST) {
1051 				struct in6_addr llsol;
1052 				int error;
1053 
1054 				llsol = satosin6(rt_key(rt))->sin6_addr;
1055 				llsol.s6_addr16[0] = htons(0xff02);
1056 				llsol.s6_addr16[1] = htons(ifp->if_index);
1057 				llsol.s6_addr32[1] = 0;
1058 				llsol.s6_addr32[2] = htonl(1);
1059 				llsol.s6_addr8[12] = 0xff;
1060 
1061 				if (in6_addmulti(&llsol, ifp, &error)) {
1062 					char addr[INET6_ADDRSTRLEN];
1063 					nd6log((LOG_ERR, "%s: failed to join "
1064 					    "%s (errno=%d)\n", ifp->if_xname,
1065 					    inet_ntop(AF_INET6, &llsol,
1066 						addr, sizeof(addr)),
1067 					    error));
1068 				}
1069 			}
1070 		}
1071 		break;
1072 
1073 	case RTM_DELETE:
1074 		if (ln == NULL)
1075 			break;
1076 		/* leave from solicited node multicast for proxy ND */
1077 		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
1078 		    (ifp->if_flags & IFF_MULTICAST) != 0) {
1079 			struct in6_addr llsol;
1080 			struct in6_multi *in6m;
1081 
1082 			llsol = satosin6(rt_key(rt))->sin6_addr;
1083 			llsol.s6_addr16[0] = htons(0xff02);
1084 			llsol.s6_addr16[1] = htons(ifp->if_index);
1085 			llsol.s6_addr32[1] = 0;
1086 			llsol.s6_addr32[2] = htonl(1);
1087 			llsol.s6_addr8[12] = 0xff;
1088 
1089 			IN6_LOOKUP_MULTI(llsol, ifp, in6m);
1090 			if (in6m)
1091 				in6_delmulti(in6m);
1092 		}
1093 		nd6_inuse--;
1094 		TAILQ_REMOVE(&nd6_list, ln, ln_list);
1095 		nd6_llinfo_settimer(ln, -1);
1096 		rt->rt_llinfo = NULL;
1097 		rt->rt_flags &= ~RTF_LLINFO;
1098 		m_freem(ln->ln_hold);
1099 		pool_put(&nd6_pool, ln);
1100 	}
1101 }
1102 
1103 int
1104 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
1105 {
1106 	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1107 	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
1108 	struct rtentry *rt;
1109 	int error = 0;
1110 	int s;
1111 
1112 	switch (cmd) {
1113 	case SIOCGIFINFO_IN6:
1114 		ndi->ndi = *ND_IFINFO(ifp);
1115 		memset(&ndi->ndi.randomseed0, 0, sizeof ndi->ndi.randomseed0);
1116 		memset(&ndi->ndi.randomseed1, 0, sizeof ndi->ndi.randomseed1);
1117 		memset(&ndi->ndi.randomid, 0, sizeof ndi->ndi.randomid);
1118 		break;
1119 	case SIOCSIFINFO_FLAGS:
1120 		ND_IFINFO(ifp)->flags = ndi->ndi.flags;
1121 		break;
1122 	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
1123 		/* sync kernel routing table with the default router list */
1124 		defrouter_reset();
1125 		defrouter_select();
1126 		break;
1127 	case SIOCSPFXFLUSH_IN6:
1128 	{
1129 		/* flush all the prefix advertised by routers */
1130 		struct nd_prefix *pr, *npr;
1131 
1132 		s = splsoftnet();
1133 		/* First purge the addresses referenced by a prefix. */
1134 		LIST_FOREACH_SAFE(pr, &nd_prefix, ndpr_entry, npr) {
1135 			struct in6_ifaddr *ia6, *ia6_next;
1136 
1137 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1138 				continue; /* XXX */
1139 
1140 			/* do we really have to remove addresses as well? */
1141 			TAILQ_FOREACH_SAFE(ia6, &in6_ifaddr, ia_list, ia6_next) {
1142 				if ((ia6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1143 					continue;
1144 
1145 				if (ia6->ia6_ndpr == pr)
1146 					in6_purgeaddr(&ia6->ia_ifa);
1147 			}
1148 		}
1149 		/*
1150 		 * Purging the addresses might remove the prefix as well.
1151 		 * So run the loop again to access only prefixes that have
1152 		 * not been freed already.
1153 		 */
1154 		LIST_FOREACH_SAFE(pr, &nd_prefix, ndpr_entry, npr) {
1155 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1156 				continue; /* XXX */
1157 
1158 			prelist_remove(pr);
1159 		}
1160 		splx(s);
1161 		break;
1162 	}
1163 	case SIOCSRTRFLUSH_IN6:
1164 	{
1165 		/* flush all the default routers */
1166 		struct nd_defrouter *dr, *ndr;
1167 
1168 		s = splsoftnet();
1169 		defrouter_reset();
1170 		TAILQ_FOREACH_SAFE(dr, &nd_defrouter, dr_entry, ndr)
1171 			defrtrlist_del(dr);
1172 		defrouter_select();
1173 		splx(s);
1174 		break;
1175 	}
1176 	case SIOCGNBRINFO_IN6:
1177 	{
1178 		struct llinfo_nd6 *ln;
1179 		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
1180 		time_t expire;
1181 
1182 		/*
1183 		 * XXX: KAME specific hack for scoped addresses
1184 		 *      XXXX: for other scopes than link-local?
1185 		 */
1186 		if (IN6_IS_ADDR_LINKLOCAL(&nbi->addr) ||
1187 		    IN6_IS_ADDR_MC_LINKLOCAL(&nbi->addr)) {
1188 			u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
1189 
1190 			if (*idp == 0)
1191 				*idp = htons(ifp->if_index);
1192 		}
1193 
1194 		s = splsoftnet();
1195 		rt = nd6_lookup(&nb_addr, 0, ifp, ifp->if_rdomain);
1196 		if (rt == NULL ||
1197 		    (ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) {
1198 			error = EINVAL;
1199 			rtfree(rt);
1200 			splx(s);
1201 			break;
1202 		}
1203 		expire = ln->ln_rt->rt_expire;
1204 		if (expire != 0) {
1205 			expire -= time_uptime;
1206 			expire += time_second;
1207 		}
1208 
1209 		nbi->state = ln->ln_state;
1210 		nbi->asked = ln->ln_asked;
1211 		nbi->isrouter = ln->ln_router;
1212 		nbi->expire = expire;
1213 		rtfree(rt);
1214 		splx(s);
1215 
1216 		break;
1217 	}
1218 	}
1219 	return (error);
1220 }
1221 
1222 /*
1223  * Create neighbor cache entry and cache link-layer address,
1224  * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
1225  *
1226  * type - ICMP6 type
1227  * code - type dependent information
1228  */
1229 void
1230 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
1231     int lladdrlen, int type, int code)
1232 {
1233 	struct rtentry *rt = NULL;
1234 	struct llinfo_nd6 *ln = NULL;
1235 	int is_newentry;
1236 	struct sockaddr_dl *sdl = NULL;
1237 	int do_update;
1238 	int olladdr;
1239 	int llchange;
1240 	int newstate = 0;
1241 
1242 	if (!ifp)
1243 		panic("ifp == NULL in nd6_cache_lladdr");
1244 	if (!from)
1245 		panic("from == NULL in nd6_cache_lladdr");
1246 
1247 	/* nothing must be updated for unspecified address */
1248 	if (IN6_IS_ADDR_UNSPECIFIED(from))
1249 		return;
1250 
1251 	/*
1252 	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1253 	 * the caller.
1254 	 *
1255 	 * XXX If the link does not have link-layer address, what should
1256 	 * we do? (ifp->if_addrlen == 0)
1257 	 * Spec says nothing in sections for RA, RS and NA.  There's small
1258 	 * description on it in NS section (RFC 2461 7.2.3).
1259 	 */
1260 
1261 	rt = nd6_lookup(from, 0, ifp, ifp->if_rdomain);
1262 	if (rt == NULL) {
1263 #if 0
1264 		/* nothing must be done if there's no lladdr */
1265 		if (!lladdr || !lladdrlen)
1266 			return NULL;
1267 #endif
1268 
1269 		rt = nd6_lookup(from, 1, ifp, ifp->if_rdomain);
1270 		is_newentry = 1;
1271 	} else {
1272 		/* do nothing if static ndp is set */
1273 		if (rt->rt_flags & RTF_STATIC) {
1274 			rtfree(rt);
1275 			return;
1276 		}
1277 		is_newentry = 0;
1278 	}
1279 
1280 	if (!rt)
1281 		return;
1282 	if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
1283 fail:
1284 		(void)nd6_free(rt, 0);
1285 		rtfree(rt);
1286 		return;
1287 	}
1288 	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1289 	if (ln == NULL)
1290 		goto fail;
1291 	if (rt->rt_gateway == NULL)
1292 		goto fail;
1293 	if (rt->rt_gateway->sa_family != AF_LINK)
1294 		goto fail;
1295 	sdl = satosdl(rt->rt_gateway);
1296 
1297 	olladdr = (sdl->sdl_alen) ? 1 : 0;
1298 	if (olladdr && lladdr) {
1299 		if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
1300 			llchange = 1;
1301 		else
1302 			llchange = 0;
1303 	} else
1304 		llchange = 0;
1305 
1306 	/*
1307 	 * newentry olladdr  lladdr  llchange	(*=record)
1308 	 *	0	n	n	--	(1)
1309 	 *	0	y	n	--	(2)
1310 	 *	0	n	y	--	(3) * STALE
1311 	 *	0	y	y	n	(4) *
1312 	 *	0	y	y	y	(5) * STALE
1313 	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
1314 	 *	1	--	y	--	(7) * STALE
1315 	 */
1316 
1317 	if (llchange) {
1318 		char addr[INET6_ADDRSTRLEN];
1319 		log(LOG_INFO, "ndp info overwritten for %s by %s on %s\n",
1320 		    inet_ntop(AF_INET6, from, addr, sizeof(addr)),
1321 		    ether_sprintf(lladdr), ifp->if_xname);
1322 	}
1323 	if (lladdr) {		/* (3-5) and (7) */
1324 		/*
1325 		 * Record source link-layer address
1326 		 * XXX is it dependent to ifp->if_type?
1327 		 */
1328 		sdl->sdl_alen = ifp->if_addrlen;
1329 		bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
1330 	}
1331 
1332 	if (!is_newentry) {
1333 		if ((!olladdr && lladdr) ||		/* (3) */
1334 		    (olladdr && lladdr && llchange)) {	/* (5) */
1335 			do_update = 1;
1336 			newstate = ND6_LLINFO_STALE;
1337 		} else					/* (1-2,4) */
1338 			do_update = 0;
1339 	} else {
1340 		do_update = 1;
1341 		if (!lladdr)				/* (6) */
1342 			newstate = ND6_LLINFO_NOSTATE;
1343 		else					/* (7) */
1344 			newstate = ND6_LLINFO_STALE;
1345 	}
1346 
1347 	if (do_update) {
1348 		/*
1349 		 * Update the state of the neighbor cache.
1350 		 */
1351 		ln->ln_state = newstate;
1352 
1353 		if (ln->ln_state == ND6_LLINFO_STALE) {
1354 			/*
1355 			 * Since nd6_resolve() in ifp->if_output() will cause
1356 			 * state transition to DELAY and reset the timer,
1357 			 * we must set the timer now, although it is actually
1358 			 * meaningless.
1359 			 */
1360 			nd6_llinfo_settimer(ln, nd6_gctimer);
1361 
1362 			if (ln->ln_hold) {
1363 				struct mbuf *n = ln->ln_hold;
1364 				ln->ln_hold = NULL;
1365 				/*
1366 				 * we assume ifp is not a p2p here, so just
1367 				 * set the 2nd argument as the 1st one.
1368 				 */
1369 				ifp->if_output(ifp, n, rt_key(rt), rt);
1370 				if (ln->ln_hold == n) {
1371 					/* n is back in ln_hold. Discard. */
1372 					m_freem(ln->ln_hold);
1373 					ln->ln_hold = NULL;
1374 				}
1375 			}
1376 		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
1377 			/* probe right away */
1378 			nd6_llinfo_settimer((void *)ln, 0);
1379 		}
1380 	}
1381 
1382 	/*
1383 	 * ICMP6 type dependent behavior.
1384 	 *
1385 	 * NS: clear IsRouter if new entry
1386 	 * RS: clear IsRouter
1387 	 * RA: set IsRouter if there's lladdr
1388 	 * redir: clear IsRouter if new entry
1389 	 *
1390 	 * RA case, (1):
1391 	 * The spec says that we must set IsRouter in the following cases:
1392 	 * - If lladdr exist, set IsRouter.  This means (1-5).
1393 	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1394 	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1395 	 * A question arises for (1) case.  (1) case has no lladdr in the
1396 	 * neighbor cache, this is similar to (6).
1397 	 * This case is rare but we figured that we MUST NOT set IsRouter.
1398 	 *
1399 	 * newentry olladdr  lladdr  llchange	    NS  RS  RA	redir
1400 	 *							D R
1401 	 *	0	n	n	--	(1)	c   ?     s
1402 	 *	0	y	n	--	(2)	c   s     s
1403 	 *	0	n	y	--	(3)	c   s     s
1404 	 *	0	y	y	n	(4)	c   s     s
1405 	 *	0	y	y	y	(5)	c   s     s
1406 	 *	1	--	n	--	(6) c	c	c s
1407 	 *	1	--	y	--	(7) c	c   s	c s
1408 	 *
1409 	 *					(c=clear s=set)
1410 	 */
1411 	switch (type & 0xff) {
1412 	case ND_NEIGHBOR_SOLICIT:
1413 		/*
1414 		 * New entry must have is_router flag cleared.
1415 		 */
1416 		if (is_newentry)	/* (6-7) */
1417 			ln->ln_router = 0;
1418 		break;
1419 	case ND_REDIRECT:
1420 		/*
1421 		 * If the icmp is a redirect to a better router, always set the
1422 		 * is_router flag.  Otherwise, if the entry is newly created,
1423 		 * clear the flag.  [RFC 2461, sec 8.3]
1424 		 */
1425 		if (code == ND_REDIRECT_ROUTER)
1426 			ln->ln_router = 1;
1427 		else if (is_newentry) /* (6-7) */
1428 			ln->ln_router = 0;
1429 		break;
1430 	case ND_ROUTER_SOLICIT:
1431 		/*
1432 		 * is_router flag must always be cleared.
1433 		 */
1434 		ln->ln_router = 0;
1435 		break;
1436 	case ND_ROUTER_ADVERT:
1437 		/*
1438 		 * Mark an entry with lladdr as a router.
1439 		 */
1440 		if ((!is_newentry && (olladdr || lladdr)) ||	/* (2-5) */
1441 		    (is_newentry && lladdr)) {			/* (7) */
1442 			ln->ln_router = 1;
1443 		}
1444 		break;
1445 	}
1446 
1447 	/*
1448 	 * When the link-layer address of a router changes, select the
1449 	 * best router again.  In particular, when the neighbor entry is newly
1450 	 * created, it might affect the selection policy.
1451 	 * Question: can we restrict the first condition to the "is_newentry"
1452 	 * case?
1453 	 * XXX: when we hear an RA from a new router with the link-layer
1454 	 * address option, defrouter_select() is called twice, since
1455 	 * defrtrlist_update called the function as well.  However, I believe
1456 	 * we can compromise the overhead, since it only happens the first
1457 	 * time.
1458 	 */
1459 	if (do_update && ln->ln_router && (ifp->if_xflags & IFXF_AUTOCONF6))
1460 		defrouter_select();
1461 
1462 	rtfree(rt);
1463 }
1464 
1465 void
1466 nd6_slowtimo(void *ignored_arg)
1467 {
1468 	int s = splsoftnet();
1469 	struct nd_ifinfo *nd6if;
1470 	struct ifnet *ifp;
1471 
1472 	timeout_set(&nd6_slowtimo_ch, nd6_slowtimo, NULL);
1473 	timeout_add_sec(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL);
1474 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1475 		nd6if = ND_IFINFO(ifp);
1476 		if (nd6if->basereachable && /* already initialized */
1477 		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
1478 			/*
1479 			 * Since reachable time rarely changes by router
1480 			 * advertisements, we SHOULD insure that a new random
1481 			 * value gets recomputed at least once every few hours.
1482 			 * (RFC 2461, 6.3.4)
1483 			 */
1484 			nd6if->recalctm = nd6_recalc_reachtm_interval;
1485 			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
1486 		}
1487 	}
1488 	splx(s);
1489 }
1490 
1491 int
1492 nd6_resolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
1493     struct sockaddr *dst, u_char *desten)
1494 {
1495 	struct sockaddr_dl *sdl;
1496 	struct rtentry *rt;
1497 	struct llinfo_nd6 *ln = NULL;
1498 	int error;
1499 
1500 	if (m->m_flags & M_MCAST) {
1501 		ETHER_MAP_IPV6_MULTICAST(&satosin6(dst)->sin6_addr, desten);
1502 		return (0);
1503 	}
1504 
1505 	error = rt_checkgate(rt0, &rt);
1506 	if (error) {
1507 		m_freem(m);
1508 		return (error);
1509 	}
1510 
1511 	/*
1512 	 * Address resolution or Neighbor Unreachability Detection
1513 	 * for the next hop.
1514 	 * At this point, the destination of the packet must be a unicast
1515 	 * or an anycast address(i.e. not a multicast).
1516 	 */
1517 	if (!ISSET(rt->rt_flags, RTF_LLINFO)) {
1518 		char addr[INET6_ADDRSTRLEN];
1519 		log(LOG_DEBUG, "%s: %s: route contains no ND information\n",
1520 		    __func__, inet_ntop(AF_INET6,
1521 		    &satosin6(rt_key(rt))->sin6_addr, addr, sizeof(addr)));
1522 		m_freem(m);
1523 		return (EINVAL);
1524 	}
1525 
1526 	if (rt->rt_gateway->sa_family != AF_LINK) {
1527 		printf("%s: something odd happens\n", __func__);
1528 		m_freem(m);
1529 		return (EINVAL);
1530 	}
1531 
1532 	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1533 	KASSERT(ln != NULL);
1534 
1535 	/*
1536 	 * Move this entry to the head of the queue so that it is less likely
1537 	 * for this entry to be a target of forced garbage collection (see
1538 	 * nd6_rtrequest()).
1539 	 */
1540 	TAILQ_REMOVE(&nd6_list, ln, ln_list);
1541 	TAILQ_INSERT_HEAD(&nd6_list, ln, ln_list);
1542 
1543 	/*
1544 	 * The first time we send a packet to a neighbor whose entry is
1545 	 * STALE, we have to change the state to DELAY and a sets a timer to
1546 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
1547 	 * neighbor unreachability detection on expiration.
1548 	 * (RFC 2461 7.3.3)
1549 	 */
1550 	if (ln->ln_state == ND6_LLINFO_STALE) {
1551 		ln->ln_asked = 0;
1552 		ln->ln_state = ND6_LLINFO_DELAY;
1553 		nd6_llinfo_settimer(ln, nd6_delay);
1554 	}
1555 
1556 	/*
1557 	 * If the neighbor cache entry has a state other than INCOMPLETE
1558 	 * (i.e. its link-layer address is already resolved), just
1559 	 * send the packet.
1560 	 */
1561 	if (ln->ln_state > ND6_LLINFO_INCOMPLETE) {
1562 		sdl = satosdl(rt->rt_gateway);
1563 		if (sdl->sdl_alen != ETHER_ADDR_LEN) {
1564 			char addr[INET6_ADDRSTRLEN];
1565 			log(LOG_DEBUG, "%s: %s: incorrect nd6 information\n",
1566 			    __func__,
1567 			    inet_ntop(AF_INET6, &satosin6(dst)->sin6_addr,
1568 				addr, sizeof(addr)));
1569 			m_freem(m);
1570 			return (EINVAL);
1571 		}
1572 
1573 		bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
1574 		return (0);
1575 	}
1576 
1577 	/*
1578 	 * There is a neighbor cache entry, but no ethernet address
1579 	 * response yet.  Replace the held mbuf (if any) with this
1580 	 * latest one.
1581 	 */
1582 	if (ln->ln_state == ND6_LLINFO_NOSTATE)
1583 		ln->ln_state = ND6_LLINFO_INCOMPLETE;
1584 	m_freem(ln->ln_hold);
1585 	ln->ln_hold = m;
1586 
1587 	/*
1588 	 * If there has been no NS for the neighbor after entering the
1589 	 * INCOMPLETE state, send the first solicitation.
1590 	 */
1591 	if (!ND6_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
1592 		ln->ln_asked++;
1593 		nd6_llinfo_settimer(ln, ND_IFINFO(ifp)->retrans / 1000);
1594 		nd6_ns_output(ifp, NULL, &satosin6(dst)->sin6_addr, ln, 0);
1595 	}
1596 	return (EAGAIN);
1597 }
1598 
1599 int
1600 nd6_need_cache(struct ifnet *ifp)
1601 {
1602 	/*
1603 	 * RFC2893 says:
1604 	 * - unidirectional tunnels needs no ND
1605 	 */
1606 	switch (ifp->if_type) {
1607 	case IFT_ETHER:
1608 	case IFT_IEEE80211:
1609 	case IFT_CARP:
1610 		return (1);
1611 	default:
1612 		return (0);
1613 	}
1614 }
1615 
1616 /*
1617  * oldp - syscall arg, need copyout
1618  * newp - syscall arg, need copyin
1619  */
1620 
1621 int
1622 nd6_sysctl(int name, void *oldp, size_t *oldlenp, void *newp, size_t newlen)
1623 {
1624 	void *p;
1625 	size_t ol;
1626 	int error;
1627 
1628 	error = 0;
1629 
1630 	if (newp)
1631 		return EPERM;
1632 	if (oldp && !oldlenp)
1633 		return EINVAL;
1634 	ol = oldlenp ? *oldlenp : 0;
1635 
1636 	if (oldp) {
1637 		p = malloc(*oldlenp, M_TEMP, M_WAITOK | M_CANFAIL);
1638 		if (!p)
1639 			return ENOMEM;
1640 	} else
1641 		p = NULL;
1642 	switch (name) {
1643 	case ICMPV6CTL_ND6_DRLIST:
1644 		error = fill_drlist(p, oldlenp, ol);
1645 		if (!error && p && oldp)
1646 			error = copyout(p, oldp, *oldlenp);
1647 		break;
1648 
1649 	case ICMPV6CTL_ND6_PRLIST:
1650 		error = fill_prlist(p, oldlenp, ol);
1651 		if (!error && p && oldp)
1652 			error = copyout(p, oldp, *oldlenp);
1653 		break;
1654 
1655 	default:
1656 		error = ENOPROTOOPT;
1657 		break;
1658 	}
1659 	if (p)
1660 		free(p, M_TEMP, 0);
1661 
1662 	return (error);
1663 }
1664 
1665 int
1666 fill_drlist(void *oldp, size_t *oldlenp, size_t ol)
1667 {
1668 	int error = 0, s;
1669 	struct in6_defrouter *d = NULL, *de = NULL;
1670 	struct nd_defrouter *dr;
1671 	size_t l;
1672 
1673 	s = splsoftnet();
1674 
1675 	if (oldp) {
1676 		d = (struct in6_defrouter *)oldp;
1677 		de = (struct in6_defrouter *)((caddr_t)oldp + *oldlenp);
1678 	}
1679 	l = 0;
1680 
1681 	TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
1682 		if (oldp && d + 1 <= de) {
1683 			bzero(d, sizeof(*d));
1684 			d->rtaddr.sin6_family = AF_INET6;
1685 			d->rtaddr.sin6_len = sizeof(struct sockaddr_in6);
1686 			in6_recoverscope(&d->rtaddr, &dr->rtaddr);
1687 			d->flags = dr->flags;
1688 			d->rtlifetime = dr->rtlifetime;
1689 			d->expire = dr->expire;
1690 			d->if_index = dr->ifp->if_index;
1691 		}
1692 
1693 		l += sizeof(*d);
1694 		if (d)
1695 			d++;
1696 	}
1697 
1698 	if (oldp) {
1699 		*oldlenp = l;	/* (caddr_t)d - (caddr_t)oldp */
1700 		if (l > ol)
1701 			error = ENOMEM;
1702 	} else
1703 		*oldlenp = l;
1704 
1705 	splx(s);
1706 
1707 	return (error);
1708 }
1709 
1710 int
1711 fill_prlist(void *oldp, size_t *oldlenp, size_t ol)
1712 {
1713 	int error = 0, s;
1714 	struct nd_prefix *pr;
1715 	char *p = NULL, *ps = NULL;
1716 	char *pe = NULL;
1717 	size_t l;
1718 
1719 	s = splsoftnet();
1720 
1721 	if (oldp) {
1722 		ps = p = (char *)oldp;
1723 		pe = (char *)oldp + *oldlenp;
1724 	}
1725 	l = 0;
1726 
1727 	LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
1728 		u_short advrtrs;
1729 		struct sockaddr_in6 sin6;
1730 		struct nd_pfxrouter *pfr;
1731 		struct in6_prefix pfx;
1732 
1733 		if (oldp && p + sizeof(struct in6_prefix) <= pe) {
1734 			memset(&pfx, 0, sizeof(pfx));
1735 			ps = p;
1736 
1737 			pfx.prefix = pr->ndpr_prefix;
1738 			in6_recoverscope(&pfx.prefix,
1739 			    &pfx.prefix.sin6_addr);
1740 			pfx.raflags = pr->ndpr_raf;
1741 			pfx.prefixlen = pr->ndpr_plen;
1742 			pfx.vltime = pr->ndpr_vltime;
1743 			pfx.pltime = pr->ndpr_pltime;
1744 			pfx.if_index = pr->ndpr_ifp->if_index;
1745 			if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
1746 				pfx.expire = 0;
1747 			else {
1748 				time_t maxexpire;
1749 
1750 				/* XXX: we assume time_t is signed. */
1751 				maxexpire = (time_t)~(1ULL <<
1752 				    ((sizeof(maxexpire) * 8) - 1));
1753 				if (pr->ndpr_vltime <
1754 				    maxexpire - pr->ndpr_lastupdate) {
1755 					pfx.expire = pr->ndpr_lastupdate +
1756 						pr->ndpr_vltime;
1757 				} else
1758 					pfx.expire = maxexpire;
1759 			}
1760 			pfx.refcnt = pr->ndpr_refcnt;
1761 			pfx.flags = pr->ndpr_stateflags;
1762 			pfx.origin = PR_ORIG_RA;
1763 
1764 			p += sizeof(pfx); l += sizeof(pfx);
1765 
1766 			advrtrs = 0;
1767 			LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
1768 				if (p + sizeof(sin6) > pe) {
1769 					advrtrs++;
1770 					continue;
1771 				}
1772 				bzero(&sin6, sizeof(sin6));
1773 				sin6.sin6_family = AF_INET6;
1774 				sin6.sin6_len = sizeof(struct sockaddr_in6);
1775 				in6_recoverscope(&sin6, &pfr->router->rtaddr);
1776 				advrtrs++;
1777 				memcpy(p, &sin6, sizeof(sin6));
1778 				p += sizeof(sin6);
1779 				l += sizeof(sin6);
1780 			}
1781 			pfx.advrtrs = advrtrs;
1782 			memcpy(ps, &pfx, sizeof(pfx));
1783 		}
1784 		else {
1785 			l += sizeof(pfx);
1786 			advrtrs = 0;
1787 			LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
1788 				advrtrs++;
1789 				l += sizeof(sin6);
1790 			}
1791 		}
1792 	}
1793 
1794 	if (oldp) {
1795 		*oldlenp = l;	/* (caddr_t)d - (caddr_t)oldp */
1796 		if (l > ol)
1797 			error = ENOMEM;
1798 	} else
1799 		*oldlenp = l;
1800 
1801 	splx(s);
1802 
1803 	return (error);
1804 }
1805