xref: /openbsd-src/sys/netinet6/in6.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: in6.c,v 1.234 2019/11/18 22:08:59 bluhm Exp $	*/
2 /*	$KAME: in6.c,v 1.372 2004/06/14 08:14:21 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)in.c	8.2 (Berkeley) 11/15/93
62  */
63 
64 #include "carp.h"
65 
66 #include <sys/param.h>
67 #include <sys/ioctl.h>
68 #include <sys/errno.h>
69 #include <sys/malloc.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/sockio.h>
73 #include <sys/mbuf.h>
74 #include <sys/systm.h>
75 #include <sys/time.h>
76 #include <sys/kernel.h>
77 #include <sys/syslog.h>
78 
79 #include <net/if.h>
80 #include <net/if_dl.h>
81 #include <net/if_types.h>
82 #include <net/route.h>
83 
84 #include <netinet/in.h>
85 #include <netinet/if_ether.h>
86 
87 #include <netinet6/in6_var.h>
88 #include <netinet/ip6.h>
89 #include <netinet6/ip6_var.h>
90 #include <netinet6/nd6.h>
91 #include <netinet6/mld6_var.h>
92 #ifdef MROUTING
93 #include <netinet6/ip6_mroute.h>
94 #endif
95 #include <netinet6/in6_ifattach.h>
96 #if NCARP > 0
97 #include <netinet/ip_carp.h>
98 #endif
99 
100 /*
101  * Definitions of some constant IP6 addresses.
102  */
103 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
104 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
105 const struct in6_addr in6addr_intfacelocal_allnodes =
106 	IN6ADDR_INTFACELOCAL_ALLNODES_INIT;
107 const struct in6_addr in6addr_linklocal_allnodes =
108 	IN6ADDR_LINKLOCAL_ALLNODES_INIT;
109 const struct in6_addr in6addr_linklocal_allrouters =
110 	IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
111 
112 const struct in6_addr in6mask0 = IN6MASK0;
113 const struct in6_addr in6mask32 = IN6MASK32;
114 const struct in6_addr in6mask64 = IN6MASK64;
115 const struct in6_addr in6mask96 = IN6MASK96;
116 const struct in6_addr in6mask128 = IN6MASK128;
117 
118 int in6_ioctl(u_long, caddr_t, struct ifnet *, int);
119 int in6_ioctl_change_ifaddr(u_long, caddr_t, struct ifnet *);
120 int in6_ioctl_get(u_long, caddr_t, struct ifnet *);
121 int in6_check_embed_scope(struct sockaddr_in6 *, unsigned int);
122 int in6_clear_scope_id(struct sockaddr_in6 *, unsigned int);
123 int in6_ifinit(struct ifnet *, struct in6_ifaddr *, int);
124 void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
125 
126 const struct sockaddr_in6 sa6_any = {
127 	sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0
128 };
129 
130 int
131 in6_mask2len(struct in6_addr *mask, u_char *lim0)
132 {
133 	int x = 0, y;
134 	u_char *lim = lim0, *p;
135 
136 	/* ignore the scope_id part */
137 	if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
138 		lim = (u_char *)mask + sizeof(*mask);
139 	for (p = (u_char *)mask; p < lim; x++, p++) {
140 		if (*p != 0xff)
141 			break;
142 	}
143 	y = 0;
144 	if (p < lim) {
145 		for (y = 0; y < 8; y++) {
146 			if ((*p & (0x80 >> y)) == 0)
147 				break;
148 		}
149 	}
150 
151 	/*
152 	 * when the limit pointer is given, do a stricter check on the
153 	 * remaining bits.
154 	 */
155 	if (p < lim) {
156 		if (y != 0 && (*p & (0x00ff >> y)) != 0)
157 			return (-1);
158 		for (p = p + 1; p < lim; p++)
159 			if (*p != 0)
160 				return (-1);
161 	}
162 
163 	return x * 8 + y;
164 }
165 
166 int
167 in6_nam2sin6(const struct mbuf *nam, struct sockaddr_in6 **sin6)
168 {
169 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
170 
171 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
172 		return EINVAL;
173 	if (sa->sa_family != AF_INET6)
174 		return EAFNOSUPPORT;
175 	if (sa->sa_len != nam->m_len)
176 		return EINVAL;
177 	if (sa->sa_len != sizeof(struct sockaddr_in6))
178 		return EINVAL;
179 	*sin6 = satosin6(sa);
180 
181 	return 0;
182 }
183 
184 int
185 in6_sa2sin6(struct sockaddr *sa, struct sockaddr_in6 **sin6)
186 {
187 	if (sa->sa_family != AF_INET6)
188 		return EAFNOSUPPORT;
189 	if (sa->sa_len != sizeof(struct sockaddr_in6))
190 		return EINVAL;
191 	*sin6 = satosin6(sa);
192 
193 	return 0;
194 }
195 
196 int
197 in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp)
198 {
199 	int privileged;
200 	int error;
201 
202 	privileged = 0;
203 	if ((so->so_state & SS_PRIV) != 0)
204 		privileged++;
205 
206 	switch (cmd) {
207 #ifdef MROUTING
208 	case SIOCGETSGCNT_IN6:
209 	case SIOCGETMIFCNT_IN6:
210 		error = mrt6_ioctl(so, cmd, data);
211 		break;
212 #endif /* MROUTING */
213 	default:
214 		error = in6_ioctl(cmd, data, ifp, privileged);
215 		break;
216 	}
217 
218 	return error;
219 }
220 
221 int
222 in6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, int privileged)
223 {
224 	if (ifp == NULL)
225 		return (ENXIO);
226 
227 	switch (cmd) {
228 	case SIOCGIFINFO_IN6:
229 	case SIOCGNBRINFO_IN6:
230 		return (nd6_ioctl(cmd, data, ifp));
231 	case SIOCGIFDSTADDR_IN6:
232 	case SIOCGIFNETMASK_IN6:
233 	case SIOCGIFAFLAG_IN6:
234 	case SIOCGIFALIFETIME_IN6:
235 		return (in6_ioctl_get(cmd, data, ifp));
236 	case SIOCAIFADDR_IN6:
237 	case SIOCDIFADDR_IN6:
238 		if (!privileged)
239 			return (EPERM);
240 		return (in6_ioctl_change_ifaddr(cmd, data, ifp));
241 	case SIOCSIFADDR:
242 	case SIOCSIFDSTADDR:
243 	case SIOCSIFBRDADDR:
244 	case SIOCSIFNETMASK:
245 		/*
246 		 * Do not pass those ioctl to driver handler since they are not
247 		 * properly set up. Instead just error out.
248 		 */
249 		return (EINVAL);
250 	default:
251 		return (EOPNOTSUPP);
252 	}
253 }
254 
255 int
256 in6_ioctl_change_ifaddr(u_long cmd, caddr_t data, struct ifnet *ifp)
257 {
258 	struct	in6_ifaddr *ia6 = NULL;
259 	struct	in6_aliasreq *ifra = (struct in6_aliasreq *)data;
260 	struct	sockaddr *sa;
261 	struct	sockaddr_in6 *sa6 = NULL;
262 	int	error = 0, newifaddr = 0, plen;
263 
264 	/*
265 	 * Find address for this interface, if it exists.
266 	 *
267 	 * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
268 	 * only, and used the first interface address as the target of other
269 	 * operations (without checking ifra_addr).  This was because netinet
270 	 * code/API assumed at most 1 interface address per interface.
271 	 * Since IPv6 allows a node to assign multiple addresses
272 	 * on a single interface, we almost always look and check the
273 	 * presence of ifra_addr, and reject invalid ones here.
274 	 * It also decreases duplicated code among SIOC*_IN6 operations.
275 	 *
276 	 * We always require users to specify a valid IPv6 address for
277 	 * the corresponding operation.
278 	 */
279 	switch (cmd) {
280 	case SIOCAIFADDR_IN6:
281 		sa = sin6tosa(&ifra->ifra_addr);
282 		break;
283 	case SIOCDIFADDR_IN6:
284 		sa = sin6tosa(&((struct in6_ifreq *)data)->ifr_addr);
285 		break;
286 	default:
287 		panic("%s: invalid ioctl %lu", __func__, cmd);
288 	}
289 	if (sa->sa_family == AF_INET6) {
290 		error = in6_sa2sin6(sa, &sa6);
291 		if (error)
292 			return (error);
293 	}
294 
295 	NET_LOCK();
296 
297 	if (sa6 != NULL) {
298 		error = in6_check_embed_scope(sa6, ifp->if_index);
299 		if (error)
300 			goto err;
301 		error = in6_clear_scope_id(sa6, ifp->if_index);
302 		if (error)
303 			goto err;
304 		ia6 = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr);
305 	}
306 
307 	switch (cmd) {
308 	case SIOCDIFADDR_IN6:
309 		/*
310 		 * for IPv4, we look for existing in_ifaddr here to allow
311 		 * "ifconfig if0 delete" to remove the first IPv4 address on
312 		 * the interface.  For IPv6, as the spec allows multiple
313 		 * interface address from the day one, we consider "remove the
314 		 * first one" semantics to be not preferable.
315 		 */
316 		if (ia6 == NULL) {
317 			error = EADDRNOTAVAIL;
318 			break;
319 		}
320 		in6_purgeaddr(&ia6->ia_ifa);
321 		if_addrhooks_run(ifp);
322 		break;
323 
324 	case SIOCAIFADDR_IN6:
325 		/* reject read-only flags */
326 		if ((ifra->ifra_flags & IN6_IFF_DUPLICATED) != 0 ||
327 		    (ifra->ifra_flags & IN6_IFF_DETACHED) != 0 ||
328 		    (ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
329 			error = EINVAL;
330 			break;
331 		}
332 
333 		if (ia6 == NULL)
334 			newifaddr = 1;
335 
336 		/*
337 		 * Make the address tentative before joining multicast
338 		 * addresses, so that corresponding MLD responses would
339 		 * not have a tentative source address.
340 		 */
341 		if (newifaddr && in6if_do_dad(ifp))
342 			ifra->ifra_flags |= IN6_IFF_TENTATIVE;
343 
344 		/*
345 		 * first, make or update the interface address structure,
346 		 * and link it to the list. try to enable inet6 if there
347 		 * is no link-local yet.
348 		 */
349 		error = in6_ifattach(ifp);
350 		if (error)
351 			break;
352 		error = in6_update_ifa(ifp, ifra, ia6);
353 		if (error)
354 			break;
355 
356 		ia6 = NULL;
357 		if (sa6 != NULL)
358 			ia6 = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr);
359 		if (ia6 == NULL) {
360 			/*
361 			 * this can happen when the user specify the 0 valid
362 			 * lifetime.
363 			 */
364 			break;
365 		}
366 
367 		/* Perform DAD, if needed. */
368 		if (ia6->ia6_flags & IN6_IFF_TENTATIVE)
369 			nd6_dad_start(&ia6->ia_ifa);
370 
371 		if (!newifaddr) {
372 			if_addrhooks_run(ifp);
373 			break;
374 		}
375 
376 		plen = in6_mask2len(&ia6->ia_prefixmask.sin6_addr, NULL);
377 		if ((ifp->if_flags & IFF_LOOPBACK) || plen == 128) {
378 			if_addrhooks_run(ifp);
379 			break;	/* No need to install a connected route. */
380 		}
381 
382 		error = rt_ifa_add(&ia6->ia_ifa,
383 		    RTF_CLONING | RTF_CONNECTED | RTF_MPATH,
384 		    ia6->ia_ifa.ifa_addr, ifp->if_rdomain);
385 		if (error) {
386 			in6_purgeaddr(&ia6->ia_ifa);
387 			break;
388 		}
389 		if_addrhooks_run(ifp);
390 		break;
391 	}
392 
393 err:
394 	NET_UNLOCK();
395 	return (error);
396 }
397 
398 int
399 in6_ioctl_get(u_long cmd, caddr_t data, struct ifnet *ifp)
400 {
401 	struct	in6_ifreq *ifr = (struct in6_ifreq *)data;
402 	struct	in6_ifaddr *ia6 = NULL;
403 	struct	sockaddr *sa;
404 	struct	sockaddr_in6 *sa6 = NULL;
405 	int	error = 0;
406 
407 	sa = sin6tosa(&ifr->ifr_addr);
408 	if (sa->sa_family == AF_INET6) {
409 		sa->sa_len = sizeof(struct sockaddr_in6);
410 		error = in6_sa2sin6(sa, &sa6);
411 		if (error)
412 			return (error);
413 	}
414 
415 	NET_RLOCK();
416 
417 	if (sa6 != NULL) {
418 		error = in6_check_embed_scope(sa6, ifp->if_index);
419 		if (error)
420 			goto err;
421 		error = in6_clear_scope_id(sa6, ifp->if_index);
422 		if (error)
423 			goto err;
424 		ia6 = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr);
425 	}
426 
427 	/* must think again about its semantics */
428 	if (ia6 == NULL) {
429 		error = EADDRNOTAVAIL;
430 		goto err;
431 	}
432 
433 	switch (cmd) {
434 	case SIOCGIFDSTADDR_IN6:
435 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
436 			error = EINVAL;
437 			break;
438 		}
439 		/*
440 		 * XXX: should we check if ifa_dstaddr is NULL and return
441 		 * an error?
442 		 */
443 		ifr->ifr_dstaddr = ia6->ia_dstaddr;
444 		break;
445 
446 	case SIOCGIFNETMASK_IN6:
447 		ifr->ifr_addr = ia6->ia_prefixmask;
448 		break;
449 
450 	case SIOCGIFAFLAG_IN6:
451 		ifr->ifr_ifru.ifru_flags6 = ia6->ia6_flags;
452 		break;
453 
454 	case SIOCGIFALIFETIME_IN6:
455 		ifr->ifr_ifru.ifru_lifetime = ia6->ia6_lifetime;
456 		if (ia6->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
457 			time_t expire, maxexpire;
458 			struct in6_addrlifetime *retlt =
459 			    &ifr->ifr_ifru.ifru_lifetime;
460 
461 			/*
462 			 * XXX: adjust expiration time assuming time_t is
463 			 * signed.
464 			 */
465 			maxexpire =
466 			    (time_t)~(1ULL << ((sizeof(maxexpire) * 8) - 1));
467 			if (ia6->ia6_lifetime.ia6t_vltime <
468 			    maxexpire - ia6->ia6_updatetime) {
469 				expire = ia6->ia6_updatetime +
470 				    ia6->ia6_lifetime.ia6t_vltime;
471 				if (expire != 0) {
472 					expire -= time_uptime;
473 					expire += time_second;
474 				}
475 				retlt->ia6t_expire = expire;
476 			} else
477 				retlt->ia6t_expire = maxexpire;
478 		}
479 		if (ia6->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
480 			time_t expire, maxexpire;
481 			struct in6_addrlifetime *retlt =
482 			    &ifr->ifr_ifru.ifru_lifetime;
483 
484 			/*
485 			 * XXX: adjust expiration time assuming time_t is
486 			 * signed.
487 			 */
488 			maxexpire =
489 			    (time_t)~(1ULL << ((sizeof(maxexpire) * 8) - 1));
490 			if (ia6->ia6_lifetime.ia6t_pltime <
491 			    maxexpire - ia6->ia6_updatetime) {
492 				expire = ia6->ia6_updatetime +
493 				    ia6->ia6_lifetime.ia6t_pltime;
494 				if (expire != 0) {
495 					expire -= time_uptime;
496 					expire += time_second;
497 				}
498 				retlt->ia6t_preferred = expire;
499 			} else
500 				retlt->ia6t_preferred = maxexpire;
501 		}
502 		break;
503 
504 	default:
505 		panic("%s: invalid ioctl %lu", __func__, cmd);
506 	}
507 
508 err:
509 	NET_RUNLOCK();
510 	return (error);
511 }
512 
513 int
514 in6_check_embed_scope(struct sockaddr_in6 *sa6, unsigned int ifidx)
515 {
516 	if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) {
517 		if (sa6->sin6_addr.s6_addr16[1] == 0) {
518 			/* link ID is not embedded by the user */
519 			sa6->sin6_addr.s6_addr16[1] = htons(ifidx);
520 		} else if (sa6->sin6_addr.s6_addr16[1] != htons(ifidx))
521 			return EINVAL;	/* link ID contradicts */
522 	}
523 	return 0;
524 }
525 
526 int
527 in6_clear_scope_id(struct sockaddr_in6 *sa6, unsigned int ifidx)
528 {
529 	if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) {
530 		if (sa6->sin6_scope_id) {
531 			if (sa6->sin6_scope_id != (u_int32_t)ifidx)
532 				return EINVAL;
533 			sa6->sin6_scope_id = 0; /* XXX: good way? */
534 		}
535 	}
536 	return 0;
537 }
538 
539 /*
540  * Update parameters of an IPv6 interface address.
541  * If necessary, a new entry is created and linked into address chains.
542  * This function is separated from in6_control().
543  */
544 int
545 in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
546     struct in6_ifaddr *ia6)
547 {
548 	int error = 0, hostIsNew = 0, plen = -1;
549 	struct sockaddr_in6 dst6;
550 	struct in6_addrlifetime *lt;
551 	struct in6_multi_mship *imm;
552 	struct rtentry *rt;
553 	char addr[INET6_ADDRSTRLEN];
554 
555 	NET_ASSERT_LOCKED();
556 
557 	/* Validate parameters */
558 	if (ifp == NULL || ifra == NULL) /* this maybe redundant */
559 		return (EINVAL);
560 
561 	/*
562 	 * The destination address for a p2p link must have a family
563 	 * of AF_UNSPEC or AF_INET6.
564 	 */
565 	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
566 	    ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
567 	    ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
568 		return (EAFNOSUPPORT);
569 
570 	/*
571 	 * validate ifra_prefixmask.  don't check sin6_family, netmask
572 	 * does not carry fields other than sin6_len.
573 	 */
574 	if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6))
575 		return (EINVAL);
576 	/*
577 	 * Because the IPv6 address architecture is classless, we require
578 	 * users to specify a (non 0) prefix length (mask) for a new address.
579 	 * We also require the prefix (when specified) mask is valid, and thus
580 	 * reject a non-consecutive mask.
581 	 */
582 	if (ia6 == NULL && ifra->ifra_prefixmask.sin6_len == 0)
583 		return (EINVAL);
584 	if (ifra->ifra_prefixmask.sin6_len != 0) {
585 		plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
586 		    (u_char *)&ifra->ifra_prefixmask +
587 		    ifra->ifra_prefixmask.sin6_len);
588 		if (plen <= 0)
589 			return (EINVAL);
590 	} else {
591 		/*
592 		 * In this case, ia6 must not be NULL.  We just use its prefix
593 		 * length.
594 		 */
595 		plen = in6_mask2len(&ia6->ia_prefixmask.sin6_addr, NULL);
596 	}
597 	/*
598 	 * If the destination address on a p2p interface is specified,
599 	 * and the address is a scoped one, validate/set the scope
600 	 * zone identifier.
601 	 */
602 	dst6 = ifra->ifra_dstaddr;
603 	if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 &&
604 	    (dst6.sin6_family == AF_INET6)) {
605 		error = in6_check_embed_scope(&dst6, ifp->if_index);
606 		if (error)
607 			return error;
608 	}
609 	/*
610 	 * The destination address can be specified only for a p2p or a
611 	 * loopback interface.  If specified, the corresponding prefix length
612 	 * must be 128.
613 	 */
614 	if (ifra->ifra_dstaddr.sin6_family == AF_INET6) {
615 		if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0)
616 			return (EINVAL);
617 		if (plen != 128)
618 			return (EINVAL);
619 	}
620 	/* lifetime consistency check */
621 	lt = &ifra->ifra_lifetime;
622 	if (lt->ia6t_pltime > lt->ia6t_vltime)
623 		return (EINVAL);
624 	if (lt->ia6t_vltime == 0) {
625 		/*
626 		 * the following log might be noisy, but this is a typical
627 		 * configuration mistake or a tool's bug.
628 		 */
629 		nd6log((LOG_INFO, "%s: valid lifetime is 0 for %s\n", __func__,
630 		    inet_ntop(AF_INET6, &ifra->ifra_addr.sin6_addr,
631 		    addr, sizeof(addr))));
632 
633 		if (ia6 == NULL)
634 			return (0); /* there's nothing to do */
635 	}
636 
637 	/*
638 	 * If this is a new address, allocate a new ifaddr and link it
639 	 * into chains.
640 	 */
641 	if (ia6 == NULL) {
642 		hostIsNew = 1;
643 		ia6 = malloc(sizeof(*ia6), M_IFADDR, M_WAITOK | M_ZERO);
644 		LIST_INIT(&ia6->ia6_memberships);
645 		/* Initialize the address and masks, and put time stamp */
646 		ia6->ia_ifa.ifa_addr = sin6tosa(&ia6->ia_addr);
647 		ia6->ia_addr.sin6_family = AF_INET6;
648 		ia6->ia_addr.sin6_len = sizeof(ia6->ia_addr);
649 		ia6->ia6_updatetime = time_uptime;
650 		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
651 			/*
652 			 * XXX: some functions expect that ifa_dstaddr is not
653 			 * NULL for p2p interfaces.
654 			 */
655 			ia6->ia_ifa.ifa_dstaddr = sin6tosa(&ia6->ia_dstaddr);
656 		} else {
657 			ia6->ia_ifa.ifa_dstaddr = NULL;
658 		}
659 		ia6->ia_ifa.ifa_netmask = sin6tosa(&ia6->ia_prefixmask);
660 
661 		ia6->ia_ifp = ifp;
662 		ia6->ia_addr = ifra->ifra_addr;
663 		ifa_add(ifp, &ia6->ia_ifa);
664 	}
665 
666 	/* set prefix mask */
667 	if (ifra->ifra_prefixmask.sin6_len) {
668 		/*
669 		 * We prohibit changing the prefix length of an existing
670 		 * address, because
671 		 * + such an operation should be rare in IPv6, and
672 		 * + the operation would confuse prefix management.
673 		 */
674 		if (ia6->ia_prefixmask.sin6_len &&
675 		    in6_mask2len(&ia6->ia_prefixmask.sin6_addr, NULL) != plen) {
676 			error = EINVAL;
677 			goto unlink;
678 		}
679 		ia6->ia_prefixmask = ifra->ifra_prefixmask;
680 	}
681 
682 	/*
683 	 * If a new destination address is specified, scrub the old one and
684 	 * install the new destination.  Note that the interface must be
685 	 * p2p or loopback (see the check above.)
686 	 */
687 	if ((ifp->if_flags & IFF_POINTOPOINT) && dst6.sin6_family == AF_INET6 &&
688 	    !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia6->ia_dstaddr.sin6_addr)) {
689 		struct ifaddr *ifa = &ia6->ia_ifa;
690 
691 		if ((ia6->ia_flags & IFA_ROUTE) != 0 &&
692 		    rt_ifa_del(ifa, RTF_HOST, ifa->ifa_dstaddr,
693 		     ifp->if_rdomain) != 0) {
694 			nd6log((LOG_ERR, "%s: failed to remove a route "
695 			    "to the old destination: %s\n", __func__,
696 			    inet_ntop(AF_INET6, &ia6->ia_addr.sin6_addr,
697 			    addr, sizeof(addr))));
698 			/* proceed anyway... */
699 		} else
700 			ia6->ia_flags &= ~IFA_ROUTE;
701 		ia6->ia_dstaddr = dst6;
702 	}
703 
704 	/*
705 	 * Set lifetimes.  We do not refer to ia6t_expire and ia6t_preferred
706 	 * to see if the address is deprecated or invalidated, but initialize
707 	 * these members for applications.
708 	 */
709 	ia6->ia6_updatetime = time_uptime;
710 	ia6->ia6_lifetime = ifra->ifra_lifetime;
711 	if (ia6->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
712 		ia6->ia6_lifetime.ia6t_expire =
713 		    time_uptime + ia6->ia6_lifetime.ia6t_vltime;
714 	} else
715 		ia6->ia6_lifetime.ia6t_expire = 0;
716 	if (ia6->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
717 		ia6->ia6_lifetime.ia6t_preferred =
718 		    time_uptime + ia6->ia6_lifetime.ia6t_pltime;
719 	} else
720 		ia6->ia6_lifetime.ia6t_preferred = 0;
721 
722 	/* reset the interface and routing table appropriately. */
723 	if ((error = in6_ifinit(ifp, ia6, hostIsNew)) != 0)
724 		goto unlink;
725 
726 	/* re-run DAD */
727 	if (ia6->ia6_flags & (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED))
728 		ifra->ifra_flags |= IN6_IFF_TENTATIVE;
729 	/*
730 	 * configure address flags.
731 	 */
732 	ia6->ia6_flags = ifra->ifra_flags;
733 
734 	nd6_expire_timer_update(ia6);
735 
736 	/*
737 	 * We are done if we have simply modified an existing address.
738 	 */
739 	if (!hostIsNew)
740 		return (error);
741 
742 	/*
743 	 * Beyond this point, we should call in6_purgeaddr upon an error,
744 	 * not just go to unlink.
745 	 */
746 
747 	/* join necessary multiast groups */
748 	if ((ifp->if_flags & IFF_MULTICAST) != 0) {
749 		struct sockaddr_in6 mltaddr, mltmask;
750 
751 		/* join solicited multicast addr for new host id */
752 		struct sockaddr_in6 llsol;
753 
754 		bzero(&llsol, sizeof(llsol));
755 		llsol.sin6_family = AF_INET6;
756 		llsol.sin6_len = sizeof(llsol);
757 		llsol.sin6_addr.s6_addr16[0] = htons(0xff02);
758 		llsol.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
759 		llsol.sin6_addr.s6_addr32[1] = 0;
760 		llsol.sin6_addr.s6_addr32[2] = htonl(1);
761 		llsol.sin6_addr.s6_addr32[3] =
762 		    ifra->ifra_addr.sin6_addr.s6_addr32[3];
763 		llsol.sin6_addr.s6_addr8[12] = 0xff;
764 		imm = in6_joingroup(ifp, &llsol.sin6_addr, &error);
765 		if (!imm)
766 			goto cleanup;
767 		LIST_INSERT_HEAD(&ia6->ia6_memberships, imm, i6mm_chain);
768 
769 		bzero(&mltmask, sizeof(mltmask));
770 		mltmask.sin6_len = sizeof(struct sockaddr_in6);
771 		mltmask.sin6_family = AF_INET6;
772 		mltmask.sin6_addr = in6mask32;
773 
774 		/*
775 		 * join link-local all-nodes address
776 		 */
777 		bzero(&mltaddr, sizeof(mltaddr));
778 		mltaddr.sin6_len = sizeof(struct sockaddr_in6);
779 		mltaddr.sin6_family = AF_INET6;
780 		mltaddr.sin6_addr = in6addr_linklocal_allnodes;
781 		mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
782 		mltaddr.sin6_scope_id = 0;
783 
784 		/*
785 		 * XXX: do we really need this automatic routes?
786 		 * We should probably reconsider this stuff.  Most applications
787 		 * actually do not need the routes, since they usually specify
788 		 * the outgoing interface.
789 		 */
790 		rt = rtalloc(sin6tosa(&mltaddr), 0, ifp->if_rdomain);
791 		if (rt) {
792 			/* 32bit came from "mltmask" */
793 			if (memcmp(&mltaddr.sin6_addr,
794 			    &satosin6(rt_key(rt))->sin6_addr,
795 			    32 / 8)) {
796 				rtfree(rt);
797 				rt = NULL;
798 			}
799 		}
800 		if (!rt) {
801 			struct rt_addrinfo info;
802 
803 			bzero(&info, sizeof(info));
804 			info.rti_ifa = &ia6->ia_ifa;
805 			info.rti_info[RTAX_DST] = sin6tosa(&mltaddr);
806 			info.rti_info[RTAX_GATEWAY] = sin6tosa(&ia6->ia_addr);
807 			info.rti_info[RTAX_NETMASK] = sin6tosa(&mltmask);
808 			info.rti_info[RTAX_IFA] = sin6tosa(&ia6->ia_addr);
809 			info.rti_flags = RTF_MULTICAST;
810 			error = rtrequest(RTM_ADD, &info, RTP_CONNECTED, NULL,
811 			    ifp->if_rdomain);
812 			if (error)
813 				goto cleanup;
814 		} else {
815 			rtfree(rt);
816 		}
817 		imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error);
818 		if (!imm)
819 			goto cleanup;
820 		LIST_INSERT_HEAD(&ia6->ia6_memberships, imm, i6mm_chain);
821 
822 		/*
823 		 * join interface-local all-nodes address.
824 		 * (ff01::1%ifN, and ff01::%ifN/32)
825 		 */
826 		bzero(&mltaddr, sizeof(mltaddr));
827 		mltaddr.sin6_len = sizeof(struct sockaddr_in6);
828 		mltaddr.sin6_family = AF_INET6;
829 		mltaddr.sin6_addr = in6addr_intfacelocal_allnodes;
830 		mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
831 		mltaddr.sin6_scope_id = 0;
832 
833 		/* XXX: again, do we really need the route? */
834 		rt = rtalloc(sin6tosa(&mltaddr), 0, ifp->if_rdomain);
835 		if (rt) {
836 			/* 32bit came from "mltmask" */
837 			if (memcmp(&mltaddr.sin6_addr,
838 			    &satosin6(rt_key(rt))->sin6_addr,
839 			    32 / 8)) {
840 				rtfree(rt);
841 				rt = NULL;
842 			}
843 		}
844 		if (!rt) {
845 			struct rt_addrinfo info;
846 
847 			bzero(&info, sizeof(info));
848 			info.rti_ifa = &ia6->ia_ifa;
849 			info.rti_info[RTAX_DST] = sin6tosa(&mltaddr);
850 			info.rti_info[RTAX_GATEWAY] = sin6tosa(&ia6->ia_addr);
851 			info.rti_info[RTAX_NETMASK] = sin6tosa(&mltmask);
852 			info.rti_info[RTAX_IFA] = sin6tosa(&ia6->ia_addr);
853 			info.rti_flags = RTF_MULTICAST;
854 			error = rtrequest(RTM_ADD, &info, RTP_CONNECTED, NULL,
855 			    ifp->if_rdomain);
856 			if (error)
857 				goto cleanup;
858 		} else {
859 			rtfree(rt);
860 		}
861 		imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error);
862 		if (!imm)
863 			goto cleanup;
864 		LIST_INSERT_HEAD(&ia6->ia6_memberships, imm, i6mm_chain);
865 	}
866 
867 	return (error);
868 
869   unlink:
870 	/*
871 	 * XXX: if a change of an existing address failed, keep the entry
872 	 * anyway.
873 	 */
874 	if (hostIsNew)
875 		in6_unlink_ifa(ia6, ifp);
876 	return (error);
877 
878   cleanup:
879 	in6_purgeaddr(&ia6->ia_ifa);
880 	return error;
881 }
882 
883 void
884 in6_purgeaddr(struct ifaddr *ifa)
885 {
886 	struct ifnet *ifp = ifa->ifa_ifp;
887 	struct in6_ifaddr *ia6 = ifatoia6(ifa);
888 	struct in6_multi_mship *imm;
889 
890 	/* stop DAD processing */
891 	nd6_dad_stop(ifa);
892 
893 	/*
894 	 * delete route to the destination of the address being purged.
895 	 * The interface must be p2p or loopback in this case.
896 	 */
897 	if ((ifp->if_flags & IFF_POINTOPOINT) && (ia6->ia_flags & IFA_ROUTE) &&
898 	    ia6->ia_dstaddr.sin6_len != 0) {
899 		int e;
900 
901 		e = rt_ifa_del(ifa, RTF_HOST, ifa->ifa_dstaddr,
902 		    ifp->if_rdomain);
903 		if (e != 0) {
904 			char addr[INET6_ADDRSTRLEN];
905 			log(LOG_ERR, "in6_purgeaddr: failed to remove "
906 			    "a route to the p2p destination: %s on %s, "
907 			    "errno=%d\n",
908 			    inet_ntop(AF_INET6, &ia6->ia_addr.sin6_addr,
909 				addr, sizeof(addr)),
910 			    ifp->if_xname, e);
911 			/* proceed anyway... */
912 		} else
913 			ia6->ia_flags &= ~IFA_ROUTE;
914 	}
915 
916 	/* Remove ownaddr's loopback rtentry, if it exists. */
917 	rt_ifa_dellocal(&(ia6->ia_ifa));
918 
919 	/*
920 	 * leave from multicast groups we have joined for the interface
921 	 */
922 	while (!LIST_EMPTY(&ia6->ia6_memberships)) {
923 		imm = LIST_FIRST(&ia6->ia6_memberships);
924 		LIST_REMOVE(imm, i6mm_chain);
925 		in6_leavegroup(imm);
926 	}
927 
928 	in6_unlink_ifa(ia6, ifp);
929 }
930 
931 void
932 in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp)
933 {
934 	struct ifaddr *ifa = &ia6->ia_ifa;
935 	extern int ifatrash;
936 	int plen;
937 
938 	NET_ASSERT_LOCKED();
939 
940 	/* Release the reference to the base prefix. */
941 	plen = in6_mask2len(&ia6->ia_prefixmask.sin6_addr, NULL);
942 	if ((ifp->if_flags & IFF_LOOPBACK) == 0 && plen != 128) {
943 		rt_ifa_del(ifa, RTF_CLONING | RTF_CONNECTED,
944 		    ifa->ifa_addr, ifp->if_rdomain);
945 	}
946 
947 	rt_ifa_purge(ifa);
948 	ifa_del(ifp, ifa);
949 
950 	ifatrash++;
951 	ia6->ia_ifp = NULL;
952 	ifafree(&ia6->ia_ifa);
953 }
954 
955 /*
956  * Initialize an interface's intetnet6 address
957  * and routing table entry.
958  */
959 int
960 in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia6, int newhost)
961 {
962 	int	error = 0, plen, ifacount = 0;
963 	struct ifaddr *ifa;
964 
965 	NET_ASSERT_LOCKED();
966 
967 	/*
968 	 * Give the interface a chance to initialize
969 	 * if this is its first address (or it is a CARP interface)
970 	 * and to validate the address if necessary.
971 	 */
972 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
973 		if (ifa->ifa_addr->sa_family != AF_INET6)
974 			continue;
975 		ifacount++;
976 	}
977 
978 	if ((ifacount <= 1 || ifp->if_type == IFT_CARP ||
979 	    (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))) &&
980 	    (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia6))) {
981 		return (error);
982 	}
983 
984 	ia6->ia_ifa.ifa_metric = ifp->if_metric;
985 
986 	/* we could do in(6)_socktrim here, but just omit it at this moment. */
987 
988 	/*
989 	 * Special case:
990 	 * If the destination address is specified for a point-to-point
991 	 * interface, install a route to the destination as an interface
992 	 * direct route.
993 	 */
994 	plen = in6_mask2len(&ia6->ia_prefixmask.sin6_addr, NULL); /* XXX */
995 	if ((ifp->if_flags & IFF_POINTOPOINT) && plen == 128 &&
996 	    ia6->ia_dstaddr.sin6_family == AF_INET6) {
997 		ifa = &ia6->ia_ifa;
998 		error = rt_ifa_add(ifa, RTF_HOST | RTF_MPATH,
999 		    ifa->ifa_dstaddr, ifp->if_rdomain);
1000 		if (error != 0)
1001 			return (error);
1002 		ia6->ia_flags |= IFA_ROUTE;
1003 	}
1004 
1005 	if (newhost)
1006 		error = rt_ifa_addlocal(&(ia6->ia_ifa));
1007 
1008 	return (error);
1009 }
1010 
1011 /*
1012  * Add an address to the list of IP6 multicast addresses for a
1013  * given interface.
1014  */
1015 struct in6_multi *
1016 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp, int *errorp)
1017 {
1018 	struct	in6_ifreq ifr;
1019 	struct	in6_multi *in6m;
1020 
1021 	NET_ASSERT_LOCKED();
1022 
1023 	*errorp = 0;
1024 	/*
1025 	 * See if address already in list.
1026 	 */
1027 	IN6_LOOKUP_MULTI(*maddr6, ifp, in6m);
1028 	if (in6m != NULL) {
1029 		/*
1030 		 * Found it; just increment the refrence count.
1031 		 */
1032 		in6m->in6m_refcnt++;
1033 	} else {
1034 		/*
1035 		 * New address; allocate a new multicast record
1036 		 * and link it into the interface's multicast list.
1037 		 */
1038 		in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT | M_ZERO);
1039 		if (in6m == NULL) {
1040 			*errorp = ENOBUFS;
1041 			return (NULL);
1042 		}
1043 
1044 		in6m->in6m_sin.sin6_len = sizeof(struct sockaddr_in6);
1045 		in6m->in6m_sin.sin6_family = AF_INET6;
1046 		in6m->in6m_sin.sin6_addr = *maddr6;
1047 		in6m->in6m_refcnt = 1;
1048 		in6m->in6m_ifidx = ifp->if_index;
1049 		in6m->in6m_ifma.ifma_addr = sin6tosa(&in6m->in6m_sin);
1050 
1051 		/*
1052 		 * Ask the network driver to update its multicast reception
1053 		 * filter appropriately for the new address.
1054 		 */
1055 		memcpy(&ifr.ifr_addr, &in6m->in6m_sin, sizeof(in6m->in6m_sin));
1056 		*errorp = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
1057 		if (*errorp) {
1058 			free(in6m, M_IPMADDR, sizeof(*in6m));
1059 			return (NULL);
1060 		}
1061 
1062 		TAILQ_INSERT_HEAD(&ifp->if_maddrlist, &in6m->in6m_ifma,
1063 		    ifma_list);
1064 
1065 		/*
1066 		 * Let MLD6 know that we have joined a new IP6 multicast
1067 		 * group.
1068 		 */
1069 		mld6_start_listening(in6m);
1070 	}
1071 
1072 	return (in6m);
1073 }
1074 
1075 /*
1076  * Delete a multicast address record.
1077  */
1078 void
1079 in6_delmulti(struct in6_multi *in6m)
1080 {
1081 	struct	in6_ifreq ifr;
1082 	struct	ifnet *ifp;
1083 
1084 	NET_ASSERT_LOCKED();
1085 
1086 	if (--in6m->in6m_refcnt == 0) {
1087 		/*
1088 		 * No remaining claims to this record; let MLD6 know
1089 		 * that we are leaving the multicast group.
1090 		 */
1091 		mld6_stop_listening(in6m);
1092 		ifp = if_get(in6m->in6m_ifidx);
1093 
1094 		/*
1095 		 * Notify the network driver to update its multicast
1096 		 * reception filter.
1097 		 */
1098 		if (ifp != NULL) {
1099 			bzero(&ifr.ifr_addr, sizeof(struct sockaddr_in6));
1100 			ifr.ifr_addr.sin6_len = sizeof(struct sockaddr_in6);
1101 			ifr.ifr_addr.sin6_family = AF_INET6;
1102 			ifr.ifr_addr.sin6_addr = in6m->in6m_addr;
1103 			(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
1104 
1105 			TAILQ_REMOVE(&ifp->if_maddrlist, &in6m->in6m_ifma,
1106 			    ifma_list);
1107 		}
1108 		if_put(ifp);
1109 
1110 		free(in6m, M_IPMADDR, sizeof(*in6m));
1111 	}
1112 }
1113 
1114 /*
1115  * Return 1 if the multicast group represented by ``maddr6'' has been
1116  * joined by interface ``ifp'', 0 otherwise.
1117  */
1118 int
1119 in6_hasmulti(struct in6_addr *maddr6, struct ifnet *ifp)
1120 {
1121 	struct in6_multi *in6m;
1122 	int joined;
1123 
1124 	IN6_LOOKUP_MULTI(*maddr6, ifp, in6m);
1125 	joined = (in6m != NULL);
1126 
1127 	return (joined);
1128 }
1129 
1130 struct in6_multi_mship *
1131 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr, int *errorp)
1132 {
1133 	struct in6_multi_mship *imm;
1134 
1135 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT);
1136 	if (!imm) {
1137 		*errorp = ENOBUFS;
1138 		return NULL;
1139 	}
1140 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp);
1141 	if (!imm->i6mm_maddr) {
1142 		/* *errorp is alrady set */
1143 		free(imm, M_IPMADDR, sizeof(*imm));
1144 		return NULL;
1145 	}
1146 	return imm;
1147 }
1148 
1149 void
1150 in6_leavegroup(struct in6_multi_mship *imm)
1151 {
1152 
1153 	if (imm->i6mm_maddr)
1154 		in6_delmulti(imm->i6mm_maddr);
1155 	free(imm,  M_IPMADDR, sizeof(*imm));
1156 }
1157 
1158 /*
1159  * Find an IPv6 interface link-local address specific to an interface.
1160  */
1161 struct in6_ifaddr *
1162 in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
1163 {
1164 	struct ifaddr *ifa;
1165 
1166 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1167 		if (ifa->ifa_addr->sa_family != AF_INET6)
1168 			continue;
1169 		if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
1170 			if ((ifatoia6(ifa)->ia6_flags & ignoreflags) != 0)
1171 				continue;
1172 			break;
1173 		}
1174 	}
1175 
1176 	return (ifatoia6(ifa));
1177 }
1178 
1179 
1180 /*
1181  * find the internet address corresponding to a given interface and address.
1182  */
1183 struct in6_ifaddr *
1184 in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr)
1185 {
1186 	struct ifaddr *ifa;
1187 
1188 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1189 		if (ifa->ifa_addr->sa_family != AF_INET6)
1190 			continue;
1191 		if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa)))
1192 			break;
1193 	}
1194 
1195 	return (ifatoia6(ifa));
1196 }
1197 
1198 /*
1199  * Get a scope of the address. Node-local, link-local, site-local or global.
1200  */
1201 int
1202 in6_addrscope(struct in6_addr *addr)
1203 {
1204 	int scope;
1205 
1206 	if (addr->s6_addr8[0] == 0xfe) {
1207 		scope = addr->s6_addr8[1] & 0xc0;
1208 
1209 		switch (scope) {
1210 		case 0x80:
1211 			return __IPV6_ADDR_SCOPE_LINKLOCAL;
1212 			break;
1213 		case 0xc0:
1214 			return __IPV6_ADDR_SCOPE_SITELOCAL;
1215 			break;
1216 		default:
1217 			return __IPV6_ADDR_SCOPE_GLOBAL; /* just in case */
1218 			break;
1219 		}
1220 	}
1221 
1222 
1223 	if (addr->s6_addr8[0] == 0xff) {
1224 		scope = addr->s6_addr8[1] & 0x0f;
1225 
1226 		/*
1227 		 * due to other scope such as reserved,
1228 		 * return scope doesn't work.
1229 		 */
1230 		switch (scope) {
1231 		case __IPV6_ADDR_SCOPE_INTFACELOCAL:
1232 			return __IPV6_ADDR_SCOPE_INTFACELOCAL;
1233 			break;
1234 		case __IPV6_ADDR_SCOPE_LINKLOCAL:
1235 			return __IPV6_ADDR_SCOPE_LINKLOCAL;
1236 			break;
1237 		case __IPV6_ADDR_SCOPE_SITELOCAL:
1238 			return __IPV6_ADDR_SCOPE_SITELOCAL;
1239 			break;
1240 		default:
1241 			return __IPV6_ADDR_SCOPE_GLOBAL;
1242 			break;
1243 		}
1244 	}
1245 
1246 	if (bcmp(&in6addr_loopback, addr, sizeof(*addr) - 1) == 0) {
1247 		if (addr->s6_addr8[15] == 1) /* loopback */
1248 			return __IPV6_ADDR_SCOPE_INTFACELOCAL;
1249 		if (addr->s6_addr8[15] == 0) /* unspecified */
1250 			return __IPV6_ADDR_SCOPE_LINKLOCAL;
1251 	}
1252 
1253 	return __IPV6_ADDR_SCOPE_GLOBAL;
1254 }
1255 
1256 int
1257 in6_addr2scopeid(unsigned int ifidx, struct in6_addr *addr)
1258 {
1259 	int scope = in6_addrscope(addr);
1260 
1261 	switch(scope) {
1262 	case __IPV6_ADDR_SCOPE_INTFACELOCAL:
1263 	case __IPV6_ADDR_SCOPE_LINKLOCAL:
1264 		/* XXX: we do not distinguish between a link and an I/F. */
1265 		return (ifidx);
1266 
1267 	case __IPV6_ADDR_SCOPE_SITELOCAL:
1268 		return (0);	/* XXX: invalid. */
1269 
1270 	default:
1271 		return (0);	/* XXX: treat as global. */
1272 	}
1273 }
1274 
1275 /*
1276  * return length of part which dst and src are equal
1277  * hard coding...
1278  */
1279 int
1280 in6_matchlen(struct in6_addr *src, struct in6_addr *dst)
1281 {
1282 	int match = 0;
1283 	u_char *s = (u_char *)src, *d = (u_char *)dst;
1284 	u_char *lim = s + 16, r;
1285 
1286 	while (s < lim)
1287 		if ((r = (*d++ ^ *s++)) != 0) {
1288 			while (r < 128) {
1289 				match++;
1290 				r <<= 1;
1291 			}
1292 			break;
1293 		} else
1294 			match += 8;
1295 	return match;
1296 }
1297 
1298 void
1299 in6_prefixlen2mask(struct in6_addr *maskp, int len)
1300 {
1301 	u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
1302 	int bytelen, bitlen, i;
1303 
1304 	/* sanity check */
1305 	if (0 > len || len > 128) {
1306 		log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n",
1307 		    len);
1308 		return;
1309 	}
1310 
1311 	bzero(maskp, sizeof(*maskp));
1312 	bytelen = len / 8;
1313 	bitlen = len % 8;
1314 	for (i = 0; i < bytelen; i++)
1315 		maskp->s6_addr[i] = 0xff;
1316 	/* len == 128 is ok because bitlen == 0 then */
1317 	if (bitlen)
1318 		maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
1319 }
1320 
1321 /*
1322  * return the best address out of the same scope
1323  */
1324 struct in6_ifaddr *
1325 in6_ifawithscope(struct ifnet *oifp, struct in6_addr *dst, u_int rdomain)
1326 {
1327 	int dst_scope =	in6_addrscope(dst), src_scope, best_scope = 0;
1328 	int blen = -1;
1329 	struct ifaddr *ifa;
1330 	struct ifnet *ifp;
1331 	struct in6_ifaddr *ia6_best = NULL;
1332 
1333 	if (oifp == NULL) {
1334 		printf("%s: output interface is not specified\n", __func__);
1335 		return (NULL);
1336 	}
1337 
1338 	/*
1339 	 * We search for all addresses on all interfaces from the beginning.
1340 	 * Comparing an interface with the outgoing interface will be done
1341 	 * only at the final stage of tiebreaking.
1342 	 */
1343 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1344 		if (ifp->if_rdomain != rdomain)
1345 			continue;
1346 #if NCARP > 0
1347 		/*
1348 		 * Never use a carp address of an interface which is not
1349 		 * the master.
1350 		 */
1351 		if (ifp->if_type == IFT_CARP && !carp_iamatch(ifp))
1352 			continue;
1353 #endif
1354 
1355 		/*
1356 		 * We can never take an address that breaks the scope zone
1357 		 * of the destination.
1358 		 */
1359 		if (in6_addr2scopeid(ifp->if_index, dst) !=
1360 		    in6_addr2scopeid(oifp->if_index, dst))
1361 			continue;
1362 
1363 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1364 			int tlen = -1, dscopecmp, bscopecmp, matchcmp;
1365 
1366 			if (ifa->ifa_addr->sa_family != AF_INET6)
1367 				continue;
1368 
1369 			src_scope = in6_addrscope(IFA_IN6(ifa));
1370 
1371 #ifdef ADDRSELECT_DEBUG		/* should be removed after stabilization */
1372 		{
1373 			char adst[INET6_ADDRSTRLEN], asrc[INET6_ADDRSTRLEN];
1374 			char bestaddr[INET6_ADDRSTRLEN];
1375 
1376 
1377 			dscopecmp = IN6_ARE_SCOPE_CMP(src_scope, dst_scope);
1378 			printf("%s: dst=%s bestaddr=%s, "
1379 			    "newaddr=%s, scope=%x, dcmp=%d, bcmp=%d, "
1380 			    "matchlen=%d, flgs=%x\n", __func__,
1381 			    inet_ntop(AF_INET6, dst, adst, sizeof(adst)),
1382 			    (ia6_best == NULL) ? "none" :
1383 			    inet_ntop(AF_INET6, &ia6_best->ia_addr.sin6_addr,
1384 			    bestaddr, sizeof(bestaddr)),
1385 			    inet_ntop(AF_INET6, IFA_IN6(ifa),
1386 			    asrc, sizeof(asrc)),
1387 			    src_scope, dscopecmp, ia6_best ?
1388 			    IN6_ARE_SCOPE_CMP(src_scope, best_scope) : -1,
1389 			    in6_matchlen(IFA_IN6(ifa), dst),
1390 			    ifatoia6(ifa)->ia6_flags);
1391 		}
1392 #endif
1393 
1394 			/*
1395 			 * Don't use an address before completing DAD
1396 			 * nor a duplicated address.
1397 			 */
1398 			if (ifatoia6(ifa)->ia6_flags &
1399 			    (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED))
1400 				continue;
1401 
1402 			/* XXX: is there any case to allow anycasts? */
1403 			if (ifatoia6(ifa)->ia6_flags & IN6_IFF_ANYCAST)
1404 				continue;
1405 
1406 			if (ifatoia6(ifa)->ia6_flags & IN6_IFF_DETACHED)
1407 				continue;
1408 
1409 			/*
1410 			 * If this is the first address we find,
1411 			 * keep it anyway.
1412 			 */
1413 			if (ia6_best == NULL)
1414 				goto replace;
1415 
1416 			/*
1417 			 * ia6_best is never NULL beyond this line except
1418 			 * within the block labeled "replace".
1419 			 */
1420 
1421 			/*
1422 			 * If ia6_best has a smaller scope than dst and
1423 			 * the current address has a larger one than
1424 			 * (or equal to) dst, always replace ia6_best.
1425 			 * Also, if the current address has a smaller scope
1426 			 * than dst, ignore it unless ia6_best also has a
1427 			 * smaller scope.
1428 			 */
1429 			if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0 &&
1430 			    IN6_ARE_SCOPE_CMP(src_scope, dst_scope) >= 0)
1431 				goto replace;
1432 			if (IN6_ARE_SCOPE_CMP(src_scope, dst_scope) < 0 &&
1433 			    IN6_ARE_SCOPE_CMP(best_scope, dst_scope) >= 0)
1434 				continue;
1435 
1436 			/*
1437 			 * A deprecated address SHOULD NOT be used in new
1438 			 * communications if an alternate (non-deprecated)
1439 			 * address is available and has sufficient scope.
1440 			 * RFC 2462, Section 5.5.4.
1441 			 */
1442 			if (ifatoia6(ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
1443 				/*
1444 				 * Ignore any deprecated addresses if
1445 				 * specified by configuration.
1446 				 */
1447 				if (!ip6_use_deprecated)
1448 					continue;
1449 
1450 				/*
1451 				 * If we have already found a non-deprecated
1452 				 * candidate, just ignore deprecated addresses.
1453 				 */
1454 				if ((ia6_best->ia6_flags & IN6_IFF_DEPRECATED)
1455 				    == 0)
1456 					continue;
1457 			}
1458 
1459 			/*
1460 			 * A non-deprecated address is always preferred
1461 			 * to a deprecated one regardless of scopes and
1462 			 * address matching.
1463 			 */
1464 			if ((ia6_best->ia6_flags & IN6_IFF_DEPRECATED) &&
1465 			    (ifatoia6(ifa)->ia6_flags &
1466 			     IN6_IFF_DEPRECATED) == 0)
1467 				goto replace;
1468 
1469 			/* RFC 3484 5. Rule 5: Prefer outgoing interface */
1470 			if (ia6_best->ia_ifp == oifp && ifp != oifp)
1471 				continue;
1472 			if (ia6_best->ia_ifp != oifp && ifp == oifp)
1473 				goto replace;
1474 
1475 			/*
1476 			 * At this point, we have two cases:
1477 			 * 1. we are looking at a non-deprecated address,
1478 			 *    and ia6_best is also non-deprecated.
1479 			 * 2. we are looking at a deprecated address,
1480 			 *    and ia6_best is also deprecated.
1481 			 * Also, we do not have to consider a case where
1482 			 * the scope of if_best is larger(smaller) than dst and
1483 			 * the scope of the current address is smaller(larger)
1484 			 * than dst. Such a case has already been covered.
1485 			 * Tiebreaking is done according to the following
1486 			 * items:
1487 			 * - the scope comparison between the address and
1488 			 *   dst (dscopecmp)
1489 			 * - the scope comparison between the address and
1490 			 *   ia6_best (bscopecmp)
1491 			 * - if the address match dst longer than ia6_best
1492 			 *   (matchcmp)
1493 			 * - if the address is on the outgoing I/F (outI/F)
1494 			 *
1495 			 * Roughly speaking, the selection policy is
1496 			 * - the most important item is scope. The same scope
1497 			 *   is best. Then search for a larger scope.
1498 			 *   Smaller scopes are the last resort.
1499 			 * - A deprecated address is chosen only when we have
1500 			 *   no address that has an enough scope, but is
1501 			 *   prefered to any addresses of smaller scopes.
1502 			 * - Longest address match against dst is considered
1503 			 *   only for addresses that has the same scope of dst.
1504 			 * - If there is no other reasons to choose one,
1505 			 *   addresses on the outgoing I/F are preferred.
1506 			 *
1507 			 * The precise decision table is as follows:
1508 			 * dscopecmp bscopecmp matchcmp outI/F | replace?
1509 			 *    !equal     equal      N/A    Yes |      Yes (1)
1510 			 *    !equal     equal      N/A     No |       No (2)
1511 			 *    larger    larger      N/A    N/A |       No (3)
1512 			 *    larger   smaller      N/A    N/A |      Yes (4)
1513 			 *   smaller    larger      N/A    N/A |      Yes (5)
1514 			 *   smaller   smaller      N/A    N/A |       No (6)
1515 			 *     equal   smaller      N/A    N/A |      Yes (7)
1516 			 *     equal    larger       (already done)
1517 			 *     equal     equal   larger    N/A |      Yes (8)
1518 			 *     equal     equal  smaller    N/A |       No (9)
1519 			 *     equal     equal    equal    Yes |      Yes (a)
1520 			 *     equal     equal    equal     No |       No (b)
1521 			 */
1522 			dscopecmp = IN6_ARE_SCOPE_CMP(src_scope, dst_scope);
1523 			bscopecmp = IN6_ARE_SCOPE_CMP(src_scope, best_scope);
1524 
1525 			if (dscopecmp && bscopecmp == 0) {
1526 				if (oifp == ifp) /* (1) */
1527 					goto replace;
1528 				continue; /* (2) */
1529 			}
1530 			if (dscopecmp > 0) {
1531 				if (bscopecmp > 0) /* (3) */
1532 					continue;
1533 				goto replace; /* (4) */
1534 			}
1535 			if (dscopecmp < 0) {
1536 				if (bscopecmp > 0) /* (5) */
1537 					goto replace;
1538 				continue; /* (6) */
1539 			}
1540 
1541 			/* now dscopecmp must be 0 */
1542 			if (bscopecmp < 0)
1543 				goto replace; /* (7) */
1544 
1545 			/*
1546 			 * At last both dscopecmp and bscopecmp must be 0.
1547 			 * We need address matching against dst for
1548 			 * tiebreaking.
1549 			 * Privacy addresses are preferred over public
1550 			 * addresses (RFC3484 requires a config knob for
1551 			 * this which we don't provide).
1552 			 */
1553 			if (oifp == ifp) {
1554 				/* Do not replace temporary autoconf addresses
1555 				 * with non-temporary addresses. */
1556 				if ((ia6_best->ia6_flags & IN6_IFF_PRIVACY) &&
1557 				    !(ifatoia6(ifa)->ia6_flags &
1558 				    IN6_IFF_PRIVACY))
1559 					continue;
1560 
1561 				/* Replace non-temporary autoconf addresses
1562 				 * with temporary addresses. */
1563 				if (!(ia6_best->ia6_flags & IN6_IFF_PRIVACY) &&
1564 				    (ifatoia6(ifa)->ia6_flags &
1565 				    IN6_IFF_PRIVACY))
1566 					goto replace;
1567 			}
1568 			tlen = in6_matchlen(IFA_IN6(ifa), dst);
1569 			matchcmp = tlen - blen;
1570 			if (matchcmp > 0) { /* (8) */
1571 #if NCARP > 0
1572 				/*
1573 				 * Don't let carp interfaces win a tie against
1574 				 * the output interface based on matchlen.
1575 				 * We should only use a carp address if no
1576 				 * other interface has a usable address.
1577 				 * Otherwise, when communicating from a carp
1578 				 * master to a carp slave, the slave won't
1579 				 * respond since the carp address is also
1580 				 * configured as a local address on the slave.
1581 				 * Note that carp interfaces in backup state
1582 				 * were already skipped above.
1583 				 */
1584 				if (ifp->if_type == IFT_CARP &&
1585 				    oifp->if_type != IFT_CARP)
1586 					continue;
1587 #endif
1588 				goto replace;
1589 			}
1590 			if (matchcmp < 0) /* (9) */
1591 				continue;
1592 			if (oifp == ifp) /* (a) */
1593 				goto replace;
1594 			continue; /* (b) */
1595 
1596 		  replace:
1597 			ia6_best = ifatoia6(ifa);
1598 			blen = tlen >= 0 ? tlen :
1599 			    in6_matchlen(IFA_IN6(ifa), dst);
1600 			best_scope =
1601 			    in6_addrscope(&ia6_best->ia_addr.sin6_addr);
1602 		}
1603 	}
1604 
1605 	/* count statistics for future improvements */
1606 	if (ia6_best == NULL)
1607 		ip6stat_inc(ip6s_sources_none);
1608 	else {
1609 		if (oifp == ia6_best->ia_ifp)
1610 			ip6stat_inc(ip6s_sources_sameif + best_scope);
1611 		else
1612 			ip6stat_inc(ip6s_sources_otherif + best_scope);
1613 
1614 		if (best_scope == dst_scope)
1615 			ip6stat_inc(ip6s_sources_samescope + best_scope);
1616 		else
1617 			ip6stat_inc(ip6s_sources_otherscope + best_scope);
1618 
1619 		if ((ia6_best->ia6_flags & IN6_IFF_DEPRECATED) != 0)
1620 			ip6stat_inc(ip6s_sources_deprecated + best_scope);
1621 	}
1622 
1623 	return (ia6_best);
1624 }
1625 
1626 int
1627 in6if_do_dad(struct ifnet *ifp)
1628 {
1629 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
1630 		return (0);
1631 
1632 	switch (ifp->if_type) {
1633 #if NCARP > 0
1634 	case IFT_CARP:
1635 		/*
1636 		 * XXX: DAD does not work currently on carp(4)
1637 		 * so disable it for now.
1638 		 */
1639 		return (0);
1640 #endif
1641 	default:
1642 		/*
1643 		 * Our DAD routine requires the interface up and running.
1644 		 * However, some interfaces can be up before the RUNNING
1645 		 * status.  Additionaly, users may try to assign addresses
1646 		 * before the interface becomes up (or running).
1647 		 * We simply skip DAD in such a case as a work around.
1648 		 * XXX: we should rather mark "tentative" on such addresses,
1649 		 * and do DAD after the interface becomes ready.
1650 		 */
1651 		if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) !=
1652 		    (IFF_UP|IFF_RUNNING))
1653 			return (0);
1654 
1655 		return (1);
1656 	}
1657 }
1658 
1659 void *
1660 in6_domifattach(struct ifnet *ifp)
1661 {
1662 	struct in6_ifextra *ext;
1663 
1664 	ext = malloc(sizeof(*ext), M_IFADDR, M_WAITOK | M_ZERO);
1665 
1666 	ext->nd_ifinfo = nd6_ifattach(ifp);
1667 	ext->nprefixes = 0;
1668 	ext->ndefrouters = 0;
1669 	return ext;
1670 }
1671 
1672 void
1673 in6_domifdetach(struct ifnet *ifp, void *aux)
1674 {
1675 	struct in6_ifextra *ext = (struct in6_ifextra *)aux;
1676 
1677 	nd6_ifdetach(ext->nd_ifinfo);
1678 	free(ext, M_IFADDR, sizeof(*ext));
1679 }
1680