1 /* $KAME: in6_src.c,v 1.159 2005/10/19 01:40:32 t-momose Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1991, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 65 */ 66 67 #include <sys/cdefs.h> 68 __KERNEL_RCSID(0, "$NetBSD: in6_src.c,v 1.28 2006/09/01 01:59:56 dyoung Exp $"); 69 70 #include "opt_inet.h" 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/malloc.h> 75 #include <sys/mbuf.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #ifndef __FreeBSD__ 80 #include <sys/ioctl.h> 81 #else 82 #include <sys/sockio.h> 83 #endif 84 #ifdef __FreeBSD__ 85 #include <sys/sysctl.h> 86 #endif 87 #include <sys/errno.h> 88 #include <sys/time.h> 89 #include <sys/kernel.h> 90 #include <sys/proc.h> 91 #include <sys/kauth.h> 92 93 #include <net/if.h> 94 #include <net/if_types.h> 95 #include <net/route.h> 96 #ifdef RADIX_MPATH 97 #include <net/radix_mpath.h> 98 #endif 99 100 #include <netinet/in.h> 101 #include <netinet/in_var.h> 102 #include <netinet/in_systm.h> 103 #include <netinet/ip.h> 104 #include <netinet/in_pcb.h> 105 #include <netinet6/in6_var.h> 106 #include <netinet/ip6.h> 107 #ifndef __OpenBSD__ 108 #include <netinet6/in6_pcb.h> 109 #endif 110 #include <netinet6/ip6_var.h> 111 #include <netinet6/nd6.h> 112 #include <netinet6/scope6_var.h> 113 114 #include <net/net_osdep.h> 115 116 #ifdef MIP6 117 #include <netinet6/mip6.h> 118 #include <netinet6/mip6_var.h> 119 #include "mip.h" 120 #if NMIP > 0 121 #include <net/if_mip.h> 122 #endif /* NMIP > 0 */ 123 #endif /* MIP6 */ 124 125 #ifndef __OpenBSD__ 126 #include "loop.h" 127 #endif 128 #ifdef __NetBSD__ 129 extern struct ifnet loif[NLOOP]; 130 #endif 131 132 #define ADDR_LABEL_NOTAPP (-1) 133 struct in6_addrpolicy defaultaddrpolicy; 134 135 #ifdef notyet /* until introducing ND extensions and address selection */ 136 int ip6_prefer_tempaddr = 0; 137 #endif 138 139 static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, 140 struct ip6_moptions *, struct route_in6 *, struct ifnet **, 141 struct rtentry **, int, int)); 142 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 143 struct ip6_moptions *, struct route_in6 *, struct ifnet **)); 144 145 static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *)); 146 147 static void init_policy_queue __P((void)); 148 static int add_addrsel_policyent __P((struct in6_addrpolicy *)); 149 static int delete_addrsel_policyent __P((struct in6_addrpolicy *)); 150 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 151 void *)); 152 static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *)); 153 static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *)); 154 155 /* 156 * Return an IPv6 address, which is the most appropriate for a given 157 * destination and user specified options. 158 * If necessary, this function lookups the routing table and returns 159 * an entry to the caller for later use. 160 */ 161 #if 0 /* diabled ad-hoc */ 162 #define REPLACE(r) do {\ 163 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 164 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 165 ip6stat.ip6s_sources_rule[(r)]++; \ 166 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 167 goto replace; \ 168 } while(0) 169 #define NEXT(r) do {\ 170 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 171 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 172 ip6stat.ip6s_sources_rule[(r)]++; \ 173 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 174 goto next; /* XXX: we can't use 'continue' here */ \ 175 } while(0) 176 #define BREAK(r) do { \ 177 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 178 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 179 ip6stat.ip6s_sources_rule[(r)]++; \ 180 goto out; /* XXX: we can't use 'break' here */ \ 181 } while(0) 182 #else 183 #define REPLACE(r) goto replace 184 #define NEXT(r) goto next 185 #define BREAK(r) goto out 186 #endif 187 188 struct in6_addr * 189 in6_selectsrc(dstsock, opts, mopts, ro, laddr, ifpp, errorp) 190 struct sockaddr_in6 *dstsock; 191 struct ip6_pktopts *opts; 192 struct ip6_moptions *mopts; 193 struct route_in6 *ro; 194 struct in6_addr *laddr; 195 struct ifnet **ifpp; 196 int *errorp; 197 { 198 struct in6_addr dst; 199 struct ifnet *ifp = NULL; 200 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 201 struct in6_pktinfo *pi = NULL; 202 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 203 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 204 u_int32_t odstzone; 205 int error; 206 #ifdef notyet /* until introducing ND extensions and address selection */ 207 int prefer_tempaddr; 208 #endif 209 #if defined(MIP6) && NMIP > 0 210 u_int8_t ip6po_usecoa = 0; 211 #endif /* MIP6 && NMIP > 0 */ 212 213 dst = dstsock->sin6_addr; /* make a copy for local operation */ 214 *errorp = 0; 215 if (ifpp) 216 *ifpp = NULL; 217 218 /* 219 * Try to determine the outgoing interface for the given destination. 220 * We do this regardless of whether the socket is bound, since the 221 * caller may need this information as a side effect of the call 222 * to this function (e.g., for identifying the appropriate scope zone 223 * ID). 224 */ 225 error = in6_selectif(dstsock, opts, mopts, ro, &ifp); 226 if (ifpp) 227 *ifpp = ifp; 228 229 /* 230 * If the source address is explicitly specified by the caller, 231 * check if the requested source address is indeed a unicast address 232 * assigned to the node, and can be used as the packet's source 233 * address. If everything is okay, use the address as source. 234 */ 235 if (opts && (pi = opts->ip6po_pktinfo) && 236 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 237 struct sockaddr_in6 srcsock; 238 struct in6_ifaddr *ia6; 239 240 /* 241 * Determine the appropriate zone id of the source based on 242 * the zone of the destination and the outgoing interface. 243 * If the specified address is ambiguous wrt the scope zone, 244 * the interface must be specified; otherwise, ifa_ifwithaddr() 245 * will fail matching the address. 246 */ 247 bzero(&srcsock, sizeof(srcsock)); 248 srcsock.sin6_family = AF_INET6; 249 srcsock.sin6_len = sizeof(srcsock); 250 srcsock.sin6_addr = pi->ipi6_addr; 251 if (ifp) { 252 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL); 253 if (*errorp != 0) 254 return (NULL); 255 } 256 257 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 258 if (ia6 == NULL || 259 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 260 *errorp = EADDRNOTAVAIL; 261 return (NULL); 262 } 263 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 264 if (ifpp) 265 *ifpp = ifp; 266 return (&ia6->ia_addr.sin6_addr); 267 } 268 269 /* 270 * If the socket has already bound the source, just use it. We don't 271 * care at the moment whether in6_selectif() succeeded above, even 272 * though it would eventually cause an error. 273 */ 274 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) 275 return (laddr); 276 277 /* 278 * The outgoing interface is crucial in the general selection procedure 279 * below. If it is not known at this point, we fail. 280 */ 281 if (ifp == NULL) { 282 *errorp = error; 283 return (NULL); 284 } 285 286 /* 287 * If the address is not yet determined, choose the best one based on 288 * the outgoing interface and the destination address. 289 */ 290 291 #if defined(MIP6) && NMIP > 0 292 /* 293 * a caller can specify IP6PO_USECOA to not to use a home 294 * address. for example, the case that the neighbour 295 * unreachability detection to the global address. 296 */ 297 if (opts != NULL && 298 (opts->ip6po_flags & IP6PO_USECOA) != 0) { 299 ip6po_usecoa = 1; 300 } 301 #endif /* MIP6 && NMIP > 0 */ 302 303 #ifdef DIAGNOSTIC 304 if (ifp == NULL) /* this should not happen */ 305 panic("in6_selectsrc: NULL ifp"); 306 #endif 307 *errorp = in6_setscope(&dst, ifp, &odstzone); 308 if (*errorp != 0) 309 return (NULL); 310 311 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 312 int new_scope = -1, new_matchlen = -1; 313 struct in6_addrpolicy *new_policy = NULL; 314 u_int32_t srczone, osrczone, dstzone; 315 struct in6_addr src; 316 struct ifnet *ifp1 = ia->ia_ifp; 317 318 /* 319 * We'll never take an address that breaks the scope zone 320 * of the destination. We also skip an address if its zone 321 * does not contain the outgoing interface. 322 * XXX: we should probably use sin6_scope_id here. 323 */ 324 if (in6_setscope(&dst, ifp1, &dstzone) || 325 odstzone != dstzone) { 326 continue; 327 } 328 src = ia->ia_addr.sin6_addr; 329 if (in6_setscope(&src, ifp, &osrczone) || 330 in6_setscope(&src, ifp1, &srczone) || 331 osrczone != srczone) { 332 continue; 333 } 334 335 /* avoid unusable addresses */ 336 if ((ia->ia6_flags & 337 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 338 continue; 339 } 340 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 341 continue; 342 343 #if defined(MIP6) && NMIP > 0 344 /* avoid unusable home addresses. */ 345 if ((ia->ia6_flags & IN6_IFF_HOME) && 346 !mip6_ifa6_is_addr_valid_hoa(ia)) 347 continue; 348 #endif /* MIP6 && NMIP > 0 */ 349 350 /* Rule 1: Prefer same address */ 351 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { 352 ia_best = ia; 353 BREAK(1); /* there should be no better candidate */ 354 } 355 356 if (ia_best == NULL) 357 REPLACE(0); 358 359 /* Rule 2: Prefer appropriate scope */ 360 if (dst_scope < 0) 361 dst_scope = in6_addrscope(&dst); 362 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 363 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 364 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 365 REPLACE(2); 366 NEXT(2); 367 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 368 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 369 NEXT(2); 370 REPLACE(2); 371 } 372 373 /* 374 * Rule 3: Avoid deprecated addresses. Note that the case of 375 * !ip6_use_deprecated is already rejected above. 376 */ 377 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 378 NEXT(3); 379 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 380 REPLACE(3); 381 382 /* Rule 4: Prefer home addresses */ 383 #if defined(MIP6) && NMIP > 0 384 if (!MIP6_IS_MN) 385 goto skip_rule4; 386 387 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 && 388 (ia->ia6_flags & IN6_IFF_HOME) == 0) { 389 /* both address are not home addresses. */ 390 goto skip_rule4; 391 } 392 393 /* 394 * If SA is simultaneously a home address and care-of 395 * address and SB is not, then prefer SA. Similarly, 396 * if SB is simultaneously a home address and care-of 397 * address and SA is not, then prefer SB. 398 */ 399 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 400 ia_best->ia_ifp->if_type != IFT_MIP) 401 && 402 ((ia->ia6_flags & IN6_IFF_HOME) != 0 && 403 ia->ia_ifp->if_type == IFT_MIP)) 404 NEXT(4); 405 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 406 ia_best->ia_ifp->if_type == IFT_MIP) 407 && 408 ((ia->ia6_flags & IN6_IFF_HOME) != 0 && 409 ia->ia_ifp->if_type != IFT_MIP)) 410 REPLACE(4); 411 if (ip6po_usecoa == 0) { 412 /* 413 * If SA is just a home address and SB is just 414 * a care-of address, then prefer 415 * SA. Similarly, if SB is just a home address 416 * and SA is just a care-of address, then 417 * prefer SB. 418 */ 419 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 420 (ia->ia6_flags & IN6_IFF_HOME) == 0) { 421 NEXT(4); 422 } 423 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 && 424 (ia->ia6_flags & IN6_IFF_HOME) != 0) { 425 REPLACE(4); 426 } 427 } else { 428 /* 429 * a sender don't want to use a home address 430 * because: 431 * 432 * 1) we cannot use. (ex. NS or NA to global 433 * addresses.) 434 * 435 * 2) a user specified not to use. 436 * (ex. mip6control -u) 437 */ 438 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 && 439 (ia->ia6_flags & IN6_IFF_HOME) != 0) { 440 /* XXX breaks stat */ 441 NEXT(0); 442 } 443 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 444 (ia->ia6_flags & IN6_IFF_HOME) == 0) { 445 /* XXX breaks stat */ 446 REPLACE(0); 447 } 448 } 449 skip_rule4: 450 #endif /* MIP6 && NMIP > 0 */ 451 452 /* Rule 5: Prefer outgoing interface */ 453 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 454 NEXT(5); 455 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 456 REPLACE(5); 457 458 /* 459 * Rule 6: Prefer matching label 460 * Note that best_policy should be non-NULL here. 461 */ 462 if (dst_policy == NULL) 463 dst_policy = lookup_addrsel_policy(dstsock); 464 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 465 new_policy = lookup_addrsel_policy(&ia->ia_addr); 466 if (dst_policy->label == best_policy->label && 467 dst_policy->label != new_policy->label) 468 NEXT(6); 469 if (dst_policy->label != best_policy->label && 470 dst_policy->label == new_policy->label) 471 REPLACE(6); 472 } 473 474 /* 475 * Rule 7: Prefer public addresses. 476 * We allow users to reverse the logic by configuring 477 * a sysctl variable, so that privacy conscious users can 478 * always prefer temporary addresses. 479 */ 480 #ifdef notyet /* until introducing ND extensions and address selection */ 481 if (opts == NULL || 482 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 483 prefer_tempaddr = ip6_prefer_tempaddr; 484 } else if (opts->ip6po_prefer_tempaddr == 485 IP6PO_TEMPADDR_NOTPREFER) { 486 prefer_tempaddr = 0; 487 } else 488 prefer_tempaddr = 1; 489 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 490 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 491 if (prefer_tempaddr) 492 REPLACE(7); 493 else 494 NEXT(7); 495 } 496 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 497 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 498 if (prefer_tempaddr) 499 NEXT(7); 500 else 501 REPLACE(7); 502 } 503 #endif 504 505 /* 506 * Rule 8: prefer addresses on alive interfaces. 507 * This is a KAME specific rule. 508 */ 509 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 510 !(ia->ia_ifp->if_flags & IFF_UP)) 511 NEXT(8); 512 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 513 (ia->ia_ifp->if_flags & IFF_UP)) 514 REPLACE(8); 515 516 /* 517 * Rule 9: prefer addresses on "preferred" interfaces. 518 * This is a KAME specific rule. 519 */ 520 #ifdef notyet /* until introducing address selection */ 521 #define NDI_BEST ND_IFINFO(ia_best->ia_ifp) 522 #define NDI_NEW ND_IFINFO(ia->ia_ifp) 523 if ((NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) && 524 !(NDI_NEW->flags & ND6_IFF_PREFER_SOURCE)) 525 NEXT(9); 526 if (!(NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) && 527 (NDI_NEW->flags & ND6_IFF_PREFER_SOURCE)) 528 REPLACE(9); 529 #undef NDI_BEST 530 #undef NDI_NEW 531 #endif 532 533 /* 534 * Rule 14: Use longest matching prefix. 535 * Note: in the address selection draft, this rule is 536 * documented as "Rule 8". However, since it is also 537 * documented that this rule can be overridden, we assign 538 * a large number so that it is easy to assign smaller numbers 539 * to more preferred rules. 540 */ 541 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 542 if (best_matchlen < new_matchlen) 543 REPLACE(14); 544 if (new_matchlen < best_matchlen) 545 NEXT(14); 546 547 /* Rule 15 is reserved. */ 548 549 /* 550 * Last resort: just keep the current candidate. 551 * Or, do we need more rules? 552 */ 553 continue; 554 555 replace: 556 ia_best = ia; 557 best_scope = (new_scope >= 0 ? new_scope : 558 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 559 best_policy = (new_policy ? new_policy : 560 lookup_addrsel_policy(&ia_best->ia_addr)); 561 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 562 in6_matchlen(&ia_best->ia_addr.sin6_addr, 563 &dst)); 564 565 next: 566 continue; 567 568 out: 569 break; 570 } 571 572 if ((ia = ia_best) == NULL) { 573 *errorp = EADDRNOTAVAIL; 574 return (NULL); 575 } 576 577 return (&ia->ia_addr.sin6_addr); 578 } 579 #undef REPLACE 580 #undef BREAK 581 #undef NEXT 582 583 static int 584 selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone, norouteok) 585 struct sockaddr_in6 *dstsock; 586 struct ip6_pktopts *opts; 587 struct ip6_moptions *mopts; 588 #ifdef NEW_STRUCT_ROUTE 589 struct route *ro; 590 #else 591 struct route_in6 *ro; 592 #endif 593 struct ifnet **retifp; 594 struct rtentry **retrt; 595 int clone; 596 int norouteok; 597 { 598 int error = 0; 599 struct ifnet *ifp = NULL; 600 struct rtentry *rt = NULL; 601 struct sockaddr_in6 *sin6_next; 602 struct in6_pktinfo *pi = NULL; 603 struct in6_addr *dst; 604 605 dst = &dstsock->sin6_addr; 606 607 #if 0 608 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 609 dstsock->sin6_addr.s6_addr32[1] == 0 && 610 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 611 printf("in6_selectroute: strange destination %s\n", 612 ip6_sprintf(&dstsock->sin6_addr)); 613 } else { 614 printf("in6_selectroute: destination = %s%%%d\n", 615 ip6_sprintf(&dstsock->sin6_addr), 616 dstsock->sin6_scope_id); /* for debug */ 617 } 618 #endif 619 620 /* If the caller specify the outgoing interface explicitly, use it. */ 621 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 622 /* XXX boundary check is assumed to be already done. */ 623 #ifdef __FreeBSD__ 624 ifp = ifnet_byindex(pi->ipi6_ifindex); 625 #else 626 ifp = ifindex2ifnet[pi->ipi6_ifindex]; 627 #endif 628 if (ifp != NULL && 629 (norouteok || retrt == NULL || 630 IN6_IS_ADDR_MULTICAST(dst))) { 631 /* 632 * we do not have to check or get the route for 633 * multicast. 634 */ 635 goto done; 636 } else 637 goto getroute; 638 } 639 640 /* 641 * If the destination address is a multicast address and the outgoing 642 * interface for the address is specified by the caller, use it. 643 */ 644 if (IN6_IS_ADDR_MULTICAST(dst) && 645 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 646 goto done; /* we do not need a route for multicast. */ 647 } 648 649 getroute: 650 /* 651 * If the next hop address for the packet is specified by the caller, 652 * use it as the gateway. 653 */ 654 if (opts && opts->ip6po_nexthop) { 655 struct route_in6 *ron; 656 657 sin6_next = satosin6(opts->ip6po_nexthop); 658 659 /* at this moment, we only support AF_INET6 next hops */ 660 if (sin6_next->sin6_family != AF_INET6) { 661 error = EAFNOSUPPORT; /* or should we proceed? */ 662 goto done; 663 } 664 665 /* 666 * If the next hop is an IPv6 address, then the node identified 667 * by that address must be a neighbor of the sending host. 668 */ 669 ron = &opts->ip6po_nextroute; 670 if ((ron->ro_rt && 671 (ron->ro_rt->rt_flags & (RTF_UP | RTF_GATEWAY)) != 672 RTF_UP) || 673 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, 674 &sin6_next->sin6_addr)) { 675 if (ron->ro_rt) { 676 RTFREE(ron->ro_rt); 677 ron->ro_rt = NULL; 678 } 679 *satosin6(&ron->ro_dst) = *sin6_next; 680 } 681 if (ron->ro_rt == NULL) { 682 rtalloc((struct route *)ron); /* multi path case? */ 683 if (ron->ro_rt == NULL || 684 (ron->ro_rt->rt_flags & RTF_GATEWAY)) { 685 if (ron->ro_rt) { 686 RTFREE(ron->ro_rt); 687 ron->ro_rt = NULL; 688 } 689 error = EHOSTUNREACH; 690 goto done; 691 } 692 } 693 if (!nd6_is_addr_neighbor(sin6_next, ron->ro_rt->rt_ifp)) { 694 RTFREE(ron->ro_rt); 695 ron->ro_rt = NULL; 696 error = EHOSTUNREACH; 697 goto done; 698 } 699 rt = ron->ro_rt; 700 ifp = rt->rt_ifp; 701 702 /* 703 * When cloning is required, try to allocate a route to the 704 * destination so that the caller can store path MTU 705 * information. 706 */ 707 if (!clone) 708 goto done; 709 } 710 711 /* 712 * Use a cached route if it exists and is valid, else try to allocate 713 * a new one. Note that we should check the address family of the 714 * cached destination, in case of sharing the cache with IPv4. 715 */ 716 if (ro) { 717 if (ro->ro_rt && 718 (!(ro->ro_rt->rt_flags & RTF_UP) || 719 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 720 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 721 dst))) { 722 RTFREE(ro->ro_rt); 723 ro->ro_rt = (struct rtentry *)NULL; 724 } 725 if (ro->ro_rt == (struct rtentry *)NULL) { 726 struct sockaddr_in6 *sa6; 727 728 /* No route yet, so try to acquire one */ 729 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 730 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 731 *sa6 = *dstsock; 732 sa6->sin6_scope_id = 0; 733 if (clone) { 734 #ifdef RADIX_MPATH 735 rtalloc_mpath((struct route *)ro, 736 ntohl(sa6->sin6_addr.s6_addr32[3])); 737 #else 738 rtalloc((struct route *)ro); 739 #endif /* RADIX_MPATH */ 740 } else { 741 #ifdef RADIX_MPATH 742 rtalloc_mpath((struct route *)ro, 743 ntohl(sa6->sin6_addr.s6_addr32[3])); 744 #else 745 ro->ro_rt = rtalloc1(&((struct route *)ro) 746 ->ro_dst, 0); 747 #endif /* RADIX_MPATH */ 748 } 749 } 750 751 /* 752 * do not care about the result if we have the nexthop 753 * explicitly specified. 754 */ 755 if (opts && opts->ip6po_nexthop) 756 goto done; 757 758 if (ro->ro_rt) { 759 ifp = ro->ro_rt->rt_ifp; 760 761 if (ifp == NULL) { /* can this really happen? */ 762 RTFREE(ro->ro_rt); 763 ro->ro_rt = NULL; 764 } 765 } 766 if (ro->ro_rt == NULL) 767 error = EHOSTUNREACH; 768 rt = ro->ro_rt; 769 770 /* 771 * Check if the outgoing interface conflicts with 772 * the interface specified by ipi6_ifindex (if specified). 773 * Note that loopback interface is always okay. 774 * (this may happen when we are sending a packet to one of 775 * our own addresses.) 776 */ 777 if (opts && opts->ip6po_pktinfo && 778 opts->ip6po_pktinfo->ipi6_ifindex) { 779 if (!(ifp->if_flags & IFF_LOOPBACK) && 780 ifp->if_index != 781 opts->ip6po_pktinfo->ipi6_ifindex) { 782 error = EHOSTUNREACH; 783 goto done; 784 } 785 } 786 } 787 788 done: 789 if (ifp == NULL && rt == NULL) { 790 /* 791 * This can happen if the caller did not pass a cached route 792 * nor any other hints. We treat this case an error. 793 */ 794 error = EHOSTUNREACH; 795 } 796 if (error == EHOSTUNREACH) 797 ip6stat.ip6s_noroute++; 798 799 if (retifp != NULL) 800 *retifp = ifp; 801 if (retrt != NULL) 802 *retrt = rt; /* rt may be NULL */ 803 804 return (error); 805 } 806 807 static int 808 in6_selectif(dstsock, opts, mopts, ro, retifp) 809 struct sockaddr_in6 *dstsock; 810 struct ip6_pktopts *opts; 811 struct ip6_moptions *mopts; 812 struct route_in6 *ro; 813 struct ifnet **retifp; 814 { 815 int error, clone; 816 struct rtentry *rt = NULL; 817 818 clone = IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) ? 0 : 1; 819 if ((error = selectroute(dstsock, opts, mopts, ro, retifp, 820 &rt, clone, 1)) != 0) { 821 return (error); 822 } 823 824 /* 825 * do not use a rejected or black hole route. 826 * XXX: this check should be done in the L2 output routine. 827 * However, if we skipped this check here, we'd see the following 828 * scenario: 829 * - install a rejected route for a scoped address prefix 830 * (like fe80::/10) 831 * - send a packet to a destination that matches the scoped prefix, 832 * with ambiguity about the scope zone. 833 * - pick the outgoing interface from the route, and disambiguate the 834 * scope zone with the interface. 835 * - ip6_output() would try to get another route with the "new" 836 * destination, which may be valid. 837 * - we'd see no error on output. 838 * Although this may not be very harmful, it should still be confusing. 839 * We thus reject the case here. 840 */ 841 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) 842 return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 843 844 /* 845 * Adjust the "outgoing" interface. If we're going to loop the packet 846 * back to ourselves, the ifp would be the loopback interface. 847 * However, we'd rather know the interface associated to the 848 * destination address (which should probably be one of our own 849 * addresses.) 850 */ 851 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 852 *retifp = rt->rt_ifa->ifa_ifp; 853 854 return (0); 855 } 856 857 int 858 in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone) 859 struct sockaddr_in6 *dstsock; 860 struct ip6_pktopts *opts; 861 struct ip6_moptions *mopts; 862 struct route_in6 *ro; 863 struct ifnet **retifp; 864 struct rtentry **retrt; 865 int clone; /* meaningful only for bsdi and freebsd. */ 866 { 867 return (selectroute(dstsock, opts, mopts, ro, retifp, 868 retrt, clone, 0)); 869 } 870 871 /* 872 * Default hop limit selection. The precedence is as follows: 873 * 1. Hoplimit value specified via ioctl. 874 * 2. (If the outgoing interface is detected) the current 875 * hop limit of the interface specified by router advertisement. 876 * 3. The system default hoplimit. 877 */ 878 int 879 in6_selecthlim(in6p, ifp) 880 struct in6pcb *in6p; 881 struct ifnet *ifp; 882 { 883 if (in6p && in6p->in6p_hops >= 0) 884 return (in6p->in6p_hops); 885 else if (ifp) 886 return (ND_IFINFO(ifp)->chlim); 887 else 888 return (ip6_defhlim); 889 } 890 891 /* 892 * Find an empty port and set it to the specified PCB. 893 */ 894 int 895 in6_pcbsetport(laddr, in6p, l) 896 struct in6_addr *laddr; 897 struct in6pcb *in6p; 898 struct lwp *l; 899 { 900 struct socket *so = in6p->in6p_socket; 901 struct inpcbtable *table = in6p->in6p_table; 902 int cnt; 903 u_int16_t minport, maxport; 904 u_int16_t lport, *lastport; 905 int wild = 0; 906 void *t; 907 908 /* XXX: this is redundant when called from in6_pcbbind */ 909 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && 910 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || 911 (so->so_options & SO_ACCEPTCONN) == 0)) 912 wild = 1; 913 914 if (in6p->in6p_flags & IN6P_LOWPORT) { 915 #ifndef IPNOPRIVPORTS 916 if (l == 0 || (kauth_authorize_generic(l->l_cred, 917 KAUTH_GENERIC_ISSUSER, &l->l_acflag) != 0)) 918 return (EACCES); 919 #endif 920 minport = ip6_lowportmin; 921 maxport = ip6_lowportmax; 922 lastport = &table->inpt_lastlow; 923 } else { 924 minport = ip6_anonportmin; 925 maxport = ip6_anonportmax; 926 lastport = &table->inpt_lastport; 927 } 928 929 if (minport > maxport) { /* sanity check */ 930 u_int16_t swp; 931 932 swp = minport; 933 minport = maxport; 934 maxport = swp; 935 } 936 937 lport = *lastport - 1; 938 for (cnt = maxport - minport + 1; cnt; cnt--, lport--) { 939 if (lport < minport || lport > maxport) 940 lport = maxport; 941 #ifdef INET 942 if (IN6_IS_ADDR_V4MAPPED(laddr)) { 943 t = in_pcblookup_port(table, 944 *(struct in_addr *)&laddr->s6_addr32[3], 945 lport, wild); 946 } else 947 #endif 948 { 949 t = in6_pcblookup_port(table, laddr, lport, wild); 950 } 951 if (t == 0) 952 goto found; 953 } 954 955 return (EAGAIN); 956 957 found: 958 in6p->in6p_flags |= IN6P_ANONPORT; 959 *lastport = lport; 960 in6p->in6p_lport = htons(lport); 961 in6_pcbstate(in6p, IN6P_BOUND); 962 return (0); /* success */ 963 } 964 965 void 966 addrsel_policy_init() 967 { 968 init_policy_queue(); 969 970 /* initialize the "last resort" policy */ 971 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 972 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 973 } 974 975 static struct in6_addrpolicy * 976 lookup_addrsel_policy(key) 977 struct sockaddr_in6 *key; 978 { 979 struct in6_addrpolicy *match = NULL; 980 981 match = match_addrsel_policy(key); 982 983 if (match == NULL) 984 match = &defaultaddrpolicy; 985 else 986 match->use++; 987 988 return (match); 989 } 990 991 /* 992 * Subroutines to manage the address selection policy table via sysctl. 993 */ 994 struct walkarg { 995 size_t w_total; 996 size_t w_given; 997 caddr_t w_where; 998 caddr_t w_limit; 999 }; 1000 1001 int 1002 in6_src_sysctl(oldp, oldlenp, newp, newlen) 1003 void *oldp; 1004 size_t *oldlenp; 1005 void *newp; 1006 size_t newlen; 1007 { 1008 int error = 0; 1009 int s; 1010 1011 s = splsoftnet(); 1012 1013 if (newp) { 1014 error = EPERM; 1015 goto end; 1016 } 1017 if (oldp && oldlenp == NULL) { 1018 error = EINVAL; 1019 goto end; 1020 } 1021 if (oldp || oldlenp) { 1022 struct walkarg w; 1023 size_t oldlen = *oldlenp; 1024 1025 bzero(&w, sizeof(w)); 1026 w.w_given = oldlen; 1027 w.w_where = oldp; 1028 if (oldp) 1029 w.w_limit = (caddr_t)oldp + oldlen; 1030 1031 error = walk_addrsel_policy(dump_addrsel_policyent, &w); 1032 1033 *oldlenp = w.w_total; 1034 if (oldp && w.w_total > oldlen && error == 0) 1035 error = ENOMEM; 1036 } 1037 1038 end: 1039 splx(s); 1040 1041 return (error); 1042 } 1043 1044 int 1045 in6_src_ioctl(cmd, data) 1046 u_long cmd; 1047 caddr_t data; 1048 { 1049 int i; 1050 struct in6_addrpolicy ent0; 1051 1052 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 1053 return (EOPNOTSUPP); /* check for safety */ 1054 1055 ent0 = *(struct in6_addrpolicy *)data; 1056 1057 if (ent0.label == ADDR_LABEL_NOTAPP) 1058 return (EINVAL); 1059 /* check if the prefix mask is consecutive. */ 1060 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 1061 return (EINVAL); 1062 /* clear trailing garbages (if any) of the prefix address. */ 1063 for (i = 0; i < 4; i++) { 1064 ent0.addr.sin6_addr.s6_addr32[i] &= 1065 ent0.addrmask.sin6_addr.s6_addr32[i]; 1066 } 1067 ent0.use = 0; 1068 1069 switch (cmd) { 1070 case SIOCAADDRCTL_POLICY: 1071 return (add_addrsel_policyent(&ent0)); 1072 case SIOCDADDRCTL_POLICY: 1073 return (delete_addrsel_policyent(&ent0)); 1074 } 1075 1076 return (0); /* XXX: compromise compilers */ 1077 } 1078 1079 /* 1080 * The followings are implementation of the policy table using a 1081 * simple tail queue. 1082 * XXX such details should be hidden. 1083 * XXX implementation using binary tree should be more efficient. 1084 */ 1085 struct addrsel_policyent { 1086 TAILQ_ENTRY(addrsel_policyent) ape_entry; 1087 struct in6_addrpolicy ape_policy; 1088 }; 1089 1090 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1091 1092 struct addrsel_policyhead addrsel_policytab; 1093 1094 static void 1095 init_policy_queue() 1096 { 1097 TAILQ_INIT(&addrsel_policytab); 1098 } 1099 1100 static int 1101 add_addrsel_policyent(newpolicy) 1102 struct in6_addrpolicy *newpolicy; 1103 { 1104 struct addrsel_policyent *new, *pol; 1105 1106 /* duplication check */ 1107 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1108 pol = TAILQ_NEXT(pol, ape_entry)) { 1109 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, 1110 &pol->ape_policy.addr.sin6_addr) && 1111 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, 1112 &pol->ape_policy.addrmask.sin6_addr)) { 1113 return (EEXIST); /* or override it? */ 1114 } 1115 } 1116 1117 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 1118 M_WAITOK); 1119 bzero(new, sizeof(*new)); 1120 1121 /* XXX: should validate entry */ 1122 new->ape_policy = *newpolicy; 1123 1124 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 1125 1126 return (0); 1127 } 1128 1129 static int 1130 delete_addrsel_policyent(key) 1131 struct in6_addrpolicy *key; 1132 { 1133 struct addrsel_policyent *pol; 1134 1135 /* search for the entry in the table */ 1136 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1137 pol = TAILQ_NEXT(pol, ape_entry)) { 1138 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, 1139 &pol->ape_policy.addr.sin6_addr) && 1140 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, 1141 &pol->ape_policy.addrmask.sin6_addr)) { 1142 break; 1143 } 1144 } 1145 if (pol == NULL) { 1146 return (ESRCH); 1147 } 1148 1149 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1150 1151 return (0); 1152 } 1153 1154 static int 1155 walk_addrsel_policy(callback, w) 1156 int (*callback) __P((struct in6_addrpolicy *, void *)); 1157 void *w; 1158 { 1159 struct addrsel_policyent *pol; 1160 int error = 0; 1161 1162 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1163 pol = TAILQ_NEXT(pol, ape_entry)) { 1164 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1165 return (error); 1166 } 1167 } 1168 1169 return (error); 1170 } 1171 1172 static int 1173 dump_addrsel_policyent(pol, arg) 1174 struct in6_addrpolicy *pol; 1175 void *arg; 1176 { 1177 int error = 0; 1178 struct walkarg *w = arg; 1179 1180 if (w->w_where && w->w_where + sizeof(*pol) <= w->w_limit) { 1181 if ((error = copyout(pol, w->w_where, sizeof(*pol))) != 0) 1182 return (error); 1183 w->w_where += sizeof(*pol); 1184 } 1185 w->w_total += sizeof(*pol); 1186 1187 return (error); 1188 } 1189 1190 static struct in6_addrpolicy * 1191 match_addrsel_policy(key) 1192 struct sockaddr_in6 *key; 1193 { 1194 struct addrsel_policyent *pent; 1195 struct in6_addrpolicy *bestpol = NULL, *pol; 1196 int matchlen, bestmatchlen = -1; 1197 u_char *mp, *ep, *k, *p, m; 1198 1199 for (pent = TAILQ_FIRST(&addrsel_policytab); pent; 1200 pent = TAILQ_NEXT(pent, ape_entry)) { 1201 matchlen = 0; 1202 1203 pol = &pent->ape_policy; 1204 mp = (u_char *)&pol->addrmask.sin6_addr; 1205 ep = mp + 16; /* XXX: scope field? */ 1206 k = (u_char *)&key->sin6_addr; 1207 p = (u_char *)&pol->addr.sin6_addr; 1208 for (; mp < ep && *mp; mp++, k++, p++) { 1209 m = *mp; 1210 if ((*k & m) != *p) 1211 goto next; /* not match */ 1212 if (m == 0xff) /* short cut for a typical case */ 1213 matchlen += 8; 1214 else { 1215 while (m >= 0x80) { 1216 matchlen++; 1217 m <<= 1; 1218 } 1219 } 1220 } 1221 1222 /* matched. check if this is better than the current best. */ 1223 if (bestpol == NULL || 1224 matchlen > bestmatchlen) { 1225 bestpol = pol; 1226 bestmatchlen = matchlen; 1227 } 1228 1229 next: 1230 continue; 1231 } 1232 1233 return (bestpol); 1234 } 1235