1 /* $KAME: in6_src.c,v 1.159 2005/10/19 01:40:32 t-momose Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1991, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 65 */ 66 67 #include <sys/cdefs.h> 68 __KERNEL_RCSID(0, "$NetBSD: in6_src.c,v 1.26 2006/05/14 21:19:34 elad Exp $"); 69 70 #include "opt_inet.h" 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/malloc.h> 75 #include <sys/mbuf.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #ifndef __FreeBSD__ 80 #include <sys/ioctl.h> 81 #else 82 #include <sys/sockio.h> 83 #endif 84 #ifdef __FreeBSD__ 85 #include <sys/sysctl.h> 86 #endif 87 #include <sys/errno.h> 88 #include <sys/time.h> 89 #include <sys/kernel.h> 90 #include <sys/proc.h> 91 #include <sys/kauth.h> 92 93 #include <net/if.h> 94 #include <net/if_types.h> 95 #include <net/route.h> 96 #ifdef RADIX_MPATH 97 #include <net/radix_mpath.h> 98 #endif 99 100 #include <netinet/in.h> 101 #include <netinet/in_var.h> 102 #include <netinet/in_systm.h> 103 #include <netinet/ip.h> 104 #include <netinet/in_pcb.h> 105 #include <netinet6/in6_var.h> 106 #include <netinet/ip6.h> 107 #ifndef __OpenBSD__ 108 #include <netinet6/in6_pcb.h> 109 #endif 110 #include <netinet6/ip6_var.h> 111 #include <netinet6/nd6.h> 112 #include <netinet6/scope6_var.h> 113 114 #include <net/net_osdep.h> 115 116 #ifdef MIP6 117 #include <netinet6/mip6.h> 118 #include <netinet6/mip6_var.h> 119 #include "mip.h" 120 #if NMIP > 0 121 #include <net/if_mip.h> 122 #endif /* NMIP > 0 */ 123 #endif /* MIP6 */ 124 125 #ifndef __OpenBSD__ 126 #include "loop.h" 127 #endif 128 #ifdef __NetBSD__ 129 extern struct ifnet loif[NLOOP]; 130 #endif 131 132 #define ADDR_LABEL_NOTAPP (-1) 133 struct in6_addrpolicy defaultaddrpolicy; 134 135 #ifdef notyet /* until introducing ND extensions and address selection */ 136 int ip6_prefer_tempaddr = 0; 137 #endif 138 139 static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, 140 struct ip6_moptions *, struct route_in6 *, struct ifnet **, 141 struct rtentry **, int, int)); 142 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 143 struct ip6_moptions *, struct route_in6 *, struct ifnet **)); 144 145 static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *)); 146 147 static void init_policy_queue __P((void)); 148 static int add_addrsel_policyent __P((struct in6_addrpolicy *)); 149 static int delete_addrsel_policyent __P((struct in6_addrpolicy *)); 150 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 151 void *)); 152 static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *)); 153 static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *)); 154 155 /* 156 * Return an IPv6 address, which is the most appropriate for a given 157 * destination and user specified options. 158 * If necessary, this function lookups the routing table and returns 159 * an entry to the caller for later use. 160 */ 161 #if 0 /* diabled ad-hoc */ 162 #define REPLACE(r) do {\ 163 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 164 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 165 ip6stat.ip6s_sources_rule[(r)]++; \ 166 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 167 goto replace; \ 168 } while(0) 169 #define NEXT(r) do {\ 170 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 171 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 172 ip6stat.ip6s_sources_rule[(r)]++; \ 173 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 174 goto next; /* XXX: we can't use 'continue' here */ \ 175 } while(0) 176 #define BREAK(r) do { \ 177 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 178 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 179 ip6stat.ip6s_sources_rule[(r)]++; \ 180 goto out; /* XXX: we can't use 'break' here */ \ 181 } while(0) 182 #else 183 #define REPLACE(r) goto replace 184 #define NEXT(r) goto next 185 #define BREAK(r) goto out 186 #endif 187 188 struct in6_addr * 189 in6_selectsrc(dstsock, opts, mopts, ro, laddr, ifpp, errorp) 190 struct sockaddr_in6 *dstsock; 191 struct ip6_pktopts *opts; 192 struct ip6_moptions *mopts; 193 struct route_in6 *ro; 194 struct in6_addr *laddr; 195 struct ifnet **ifpp; 196 int *errorp; 197 { 198 struct in6_addr dst; 199 struct ifnet *ifp = NULL; 200 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 201 struct in6_pktinfo *pi = NULL; 202 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 203 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 204 u_int32_t odstzone; 205 #ifdef notyet /* until introducing ND extensions and address selection */ 206 int prefer_tempaddr; 207 #endif 208 #if defined(MIP6) && NMIP > 0 209 u_int8_t ip6po_usecoa = 0; 210 #endif /* MIP6 && NMIP > 0 */ 211 212 dst = dstsock->sin6_addr; /* make a copy for local operation */ 213 *errorp = 0; 214 if (ifpp) 215 *ifpp = NULL; 216 217 /* 218 * If the source address is explicitly specified by the caller, 219 * check if the requested source address is indeed a unicast address 220 * assigned to the node, and can be used as the packet's source 221 * address. If everything is okay, use the address as source. 222 */ 223 if (opts && (pi = opts->ip6po_pktinfo) && 224 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 225 struct sockaddr_in6 srcsock; 226 struct in6_ifaddr *ia6; 227 228 /* get the outgoing interface */ 229 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) 230 != 0) { 231 return (NULL); 232 } 233 234 /* 235 * Determine the appropriate zone id of the source based on 236 * the zone of the destination and the outgoing interface. 237 * If the specified address is ambiguous wrt the scope zone, 238 * the interface must be specified; otherwise, ifa_ifwithaddr() 239 * will fail matching the address. 240 */ 241 bzero(&srcsock, sizeof(srcsock)); 242 srcsock.sin6_family = AF_INET6; 243 srcsock.sin6_len = sizeof(srcsock); 244 srcsock.sin6_addr = pi->ipi6_addr; 245 if (ifp) { 246 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL); 247 if (*errorp != 0) 248 return (NULL); 249 } 250 251 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 252 if (ia6 == NULL || 253 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 254 *errorp = EADDRNOTAVAIL; 255 return (NULL); 256 } 257 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 258 if (ifpp) 259 *ifpp = ifp; 260 return (&ia6->ia_addr.sin6_addr); 261 } 262 263 /* 264 * Otherwise, if the socket has already bound the source, just use it. 265 */ 266 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) 267 return (laddr); 268 269 /* 270 * If the address is not specified, choose the best one based on 271 * the outgoing interface and the destination address. 272 */ 273 /* get the outgoing interface */ 274 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) 275 return (NULL); 276 277 #if defined(MIP6) && NMIP > 0 278 /* 279 * a caller can specify IP6PO_USECOA to not to use a home 280 * address. for example, the case that the neighbour 281 * unreachability detection to the global address. 282 */ 283 if (opts != NULL && 284 (opts->ip6po_flags & IP6PO_USECOA) != 0) { 285 ip6po_usecoa = 1; 286 } 287 #endif /* MIP6 && NMIP > 0 */ 288 289 #ifdef DIAGNOSTIC 290 if (ifp == NULL) /* this should not happen */ 291 panic("in6_selectsrc: NULL ifp"); 292 #endif 293 *errorp = in6_setscope(&dst, ifp, &odstzone); 294 if (*errorp != 0) 295 return (NULL); 296 297 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 298 int new_scope = -1, new_matchlen = -1; 299 struct in6_addrpolicy *new_policy = NULL; 300 u_int32_t srczone, osrczone, dstzone; 301 struct in6_addr src; 302 struct ifnet *ifp1 = ia->ia_ifp; 303 304 /* 305 * We'll never take an address that breaks the scope zone 306 * of the destination. We also skip an address if its zone 307 * does not contain the outgoing interface. 308 * XXX: we should probably use sin6_scope_id here. 309 */ 310 if (in6_setscope(&dst, ifp1, &dstzone) || 311 odstzone != dstzone) { 312 continue; 313 } 314 src = ia->ia_addr.sin6_addr; 315 if (in6_setscope(&src, ifp, &osrczone) || 316 in6_setscope(&src, ifp1, &srczone) || 317 osrczone != srczone) { 318 continue; 319 } 320 321 /* avoid unusable addresses */ 322 if ((ia->ia6_flags & 323 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 324 continue; 325 } 326 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 327 continue; 328 329 #if defined(MIP6) && NMIP > 0 330 /* avoid unusable home addresses. */ 331 if ((ia->ia6_flags & IN6_IFF_HOME) && 332 !mip6_ifa6_is_addr_valid_hoa(ia)) 333 continue; 334 #endif /* MIP6 && NMIP > 0 */ 335 336 /* Rule 1: Prefer same address */ 337 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { 338 ia_best = ia; 339 BREAK(1); /* there should be no better candidate */ 340 } 341 342 if (ia_best == NULL) 343 REPLACE(0); 344 345 /* Rule 2: Prefer appropriate scope */ 346 if (dst_scope < 0) 347 dst_scope = in6_addrscope(&dst); 348 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 349 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 350 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 351 REPLACE(2); 352 NEXT(2); 353 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 354 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 355 NEXT(2); 356 REPLACE(2); 357 } 358 359 /* 360 * Rule 3: Avoid deprecated addresses. Note that the case of 361 * !ip6_use_deprecated is already rejected above. 362 */ 363 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 364 NEXT(3); 365 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 366 REPLACE(3); 367 368 /* Rule 4: Prefer home addresses */ 369 #if defined(MIP6) && NMIP > 0 370 if (!MIP6_IS_MN) 371 goto skip_rule4; 372 373 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 && 374 (ia->ia6_flags & IN6_IFF_HOME) == 0) { 375 /* both address are not home addresses. */ 376 goto skip_rule4; 377 } 378 379 /* 380 * If SA is simultaneously a home address and care-of 381 * address and SB is not, then prefer SA. Similarly, 382 * if SB is simultaneously a home address and care-of 383 * address and SA is not, then prefer SB. 384 */ 385 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 386 ia_best->ia_ifp->if_type != IFT_MIP) 387 && 388 ((ia->ia6_flags & IN6_IFF_HOME) != 0 && 389 ia->ia_ifp->if_type == IFT_MIP)) 390 NEXT(4); 391 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 392 ia_best->ia_ifp->if_type == IFT_MIP) 393 && 394 ((ia->ia6_flags & IN6_IFF_HOME) != 0 && 395 ia->ia_ifp->if_type != IFT_MIP)) 396 REPLACE(4); 397 if (ip6po_usecoa == 0) { 398 /* 399 * If SA is just a home address and SB is just 400 * a care-of address, then prefer 401 * SA. Similarly, if SB is just a home address 402 * and SA is just a care-of address, then 403 * prefer SB. 404 */ 405 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 406 (ia->ia6_flags & IN6_IFF_HOME) == 0) { 407 NEXT(4); 408 } 409 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 && 410 (ia->ia6_flags & IN6_IFF_HOME) != 0) { 411 REPLACE(4); 412 } 413 } else { 414 /* 415 * a sender don't want to use a home address 416 * because: 417 * 418 * 1) we cannot use. (ex. NS or NA to global 419 * addresses.) 420 * 421 * 2) a user specified not to use. 422 * (ex. mip6control -u) 423 */ 424 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 && 425 (ia->ia6_flags & IN6_IFF_HOME) != 0) { 426 /* XXX breaks stat */ 427 NEXT(0); 428 } 429 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 430 (ia->ia6_flags & IN6_IFF_HOME) == 0) { 431 /* XXX breaks stat */ 432 REPLACE(0); 433 } 434 } 435 skip_rule4: 436 #endif /* MIP6 && NMIP > 0 */ 437 438 /* Rule 5: Prefer outgoing interface */ 439 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 440 NEXT(5); 441 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 442 REPLACE(5); 443 444 /* 445 * Rule 6: Prefer matching label 446 * Note that best_policy should be non-NULL here. 447 */ 448 if (dst_policy == NULL) 449 dst_policy = lookup_addrsel_policy(dstsock); 450 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 451 new_policy = lookup_addrsel_policy(&ia->ia_addr); 452 if (dst_policy->label == best_policy->label && 453 dst_policy->label != new_policy->label) 454 NEXT(6); 455 if (dst_policy->label != best_policy->label && 456 dst_policy->label == new_policy->label) 457 REPLACE(6); 458 } 459 460 /* 461 * Rule 7: Prefer public addresses. 462 * We allow users to reverse the logic by configuring 463 * a sysctl variable, so that privacy conscious users can 464 * always prefer temporary addresses. 465 */ 466 #ifdef notyet /* until introducing ND extensions and address selection */ 467 if (opts == NULL || 468 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 469 prefer_tempaddr = ip6_prefer_tempaddr; 470 } else if (opts->ip6po_prefer_tempaddr == 471 IP6PO_TEMPADDR_NOTPREFER) { 472 prefer_tempaddr = 0; 473 } else 474 prefer_tempaddr = 1; 475 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 476 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 477 if (prefer_tempaddr) 478 REPLACE(7); 479 else 480 NEXT(7); 481 } 482 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 483 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 484 if (prefer_tempaddr) 485 NEXT(7); 486 else 487 REPLACE(7); 488 } 489 #endif 490 491 /* 492 * Rule 8: prefer addresses on alive interfaces. 493 * This is a KAME specific rule. 494 */ 495 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 496 !(ia->ia_ifp->if_flags & IFF_UP)) 497 NEXT(8); 498 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 499 (ia->ia_ifp->if_flags & IFF_UP)) 500 REPLACE(8); 501 502 /* 503 * Rule 9: prefer addresses on "preferred" interfaces. 504 * This is a KAME specific rule. 505 */ 506 #ifdef notyet /* until introducing address selection */ 507 #define NDI_BEST ND_IFINFO(ia_best->ia_ifp) 508 #define NDI_NEW ND_IFINFO(ia->ia_ifp) 509 if ((NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) && 510 !(NDI_NEW->flags & ND6_IFF_PREFER_SOURCE)) 511 NEXT(9); 512 if (!(NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) && 513 (NDI_NEW->flags & ND6_IFF_PREFER_SOURCE)) 514 REPLACE(9); 515 #undef NDI_BEST 516 #undef NDI_NEW 517 #endif 518 519 /* 520 * Rule 14: Use longest matching prefix. 521 * Note: in the address selection draft, this rule is 522 * documented as "Rule 8". However, since it is also 523 * documented that this rule can be overridden, we assign 524 * a large number so that it is easy to assign smaller numbers 525 * to more preferred rules. 526 */ 527 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 528 if (best_matchlen < new_matchlen) 529 REPLACE(14); 530 if (new_matchlen < best_matchlen) 531 NEXT(14); 532 533 /* Rule 15 is reserved. */ 534 535 /* 536 * Last resort: just keep the current candidate. 537 * Or, do we need more rules? 538 */ 539 continue; 540 541 replace: 542 ia_best = ia; 543 best_scope = (new_scope >= 0 ? new_scope : 544 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 545 best_policy = (new_policy ? new_policy : 546 lookup_addrsel_policy(&ia_best->ia_addr)); 547 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 548 in6_matchlen(&ia_best->ia_addr.sin6_addr, 549 &dst)); 550 551 next: 552 continue; 553 554 out: 555 break; 556 } 557 558 if ((ia = ia_best) == NULL) { 559 *errorp = EADDRNOTAVAIL; 560 return (NULL); 561 } 562 563 if (ifpp) 564 *ifpp = ifp; 565 return (&ia->ia_addr.sin6_addr); 566 } 567 #undef REPLACE 568 #undef BREAK 569 #undef NEXT 570 571 static int 572 selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone, norouteok) 573 struct sockaddr_in6 *dstsock; 574 struct ip6_pktopts *opts; 575 struct ip6_moptions *mopts; 576 #ifdef NEW_STRUCT_ROUTE 577 struct route *ro; 578 #else 579 struct route_in6 *ro; 580 #endif 581 struct ifnet **retifp; 582 struct rtentry **retrt; 583 int clone; 584 int norouteok; 585 { 586 int error = 0; 587 struct ifnet *ifp = NULL; 588 struct rtentry *rt = NULL; 589 struct sockaddr_in6 *sin6_next; 590 struct in6_pktinfo *pi = NULL; 591 struct in6_addr *dst; 592 593 dst = &dstsock->sin6_addr; 594 595 #if 0 596 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 597 dstsock->sin6_addr.s6_addr32[1] == 0 && 598 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 599 printf("in6_selectroute: strange destination %s\n", 600 ip6_sprintf(&dstsock->sin6_addr)); 601 } else { 602 printf("in6_selectroute: destination = %s%%%d\n", 603 ip6_sprintf(&dstsock->sin6_addr), 604 dstsock->sin6_scope_id); /* for debug */ 605 } 606 #endif 607 608 /* If the caller specify the outgoing interface explicitly, use it. */ 609 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 610 /* XXX boundary check is assumed to be already done. */ 611 #ifdef __FreeBSD__ 612 ifp = ifnet_byindex(pi->ipi6_ifindex); 613 #else 614 ifp = ifindex2ifnet[pi->ipi6_ifindex]; 615 #endif 616 if (ifp != NULL && 617 (norouteok || retrt == NULL || 618 IN6_IS_ADDR_MULTICAST(dst))) { 619 /* 620 * we do not have to check or get the route for 621 * multicast. 622 */ 623 goto done; 624 } else 625 goto getroute; 626 } 627 628 /* 629 * If the destination address is a multicast address and the outgoing 630 * interface for the address is specified by the caller, use it. 631 */ 632 if (IN6_IS_ADDR_MULTICAST(dst) && 633 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 634 goto done; /* we do not need a route for multicast. */ 635 } 636 637 getroute: 638 /* 639 * If the next hop address for the packet is specified by the caller, 640 * use it as the gateway. 641 */ 642 if (opts && opts->ip6po_nexthop) { 643 struct route_in6 *ron; 644 645 sin6_next = satosin6(opts->ip6po_nexthop); 646 647 /* at this moment, we only support AF_INET6 next hops */ 648 if (sin6_next->sin6_family != AF_INET6) { 649 error = EAFNOSUPPORT; /* or should we proceed? */ 650 goto done; 651 } 652 653 /* 654 * If the next hop is an IPv6 address, then the node identified 655 * by that address must be a neighbor of the sending host. 656 */ 657 ron = &opts->ip6po_nextroute; 658 if ((ron->ro_rt && 659 (ron->ro_rt->rt_flags & (RTF_UP | RTF_GATEWAY)) != 660 RTF_UP) || 661 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, 662 &sin6_next->sin6_addr)) { 663 if (ron->ro_rt) { 664 RTFREE(ron->ro_rt); 665 ron->ro_rt = NULL; 666 } 667 *satosin6(&ron->ro_dst) = *sin6_next; 668 } 669 if (ron->ro_rt == NULL) { 670 rtalloc((struct route *)ron); /* multi path case? */ 671 if (ron->ro_rt == NULL || 672 (ron->ro_rt->rt_flags & RTF_GATEWAY)) { 673 if (ron->ro_rt) { 674 RTFREE(ron->ro_rt); 675 ron->ro_rt = NULL; 676 } 677 error = EHOSTUNREACH; 678 goto done; 679 } 680 } 681 if (!nd6_is_addr_neighbor(sin6_next, ron->ro_rt->rt_ifp)) { 682 RTFREE(ron->ro_rt); 683 ron->ro_rt = NULL; 684 error = EHOSTUNREACH; 685 goto done; 686 } 687 rt = ron->ro_rt; 688 ifp = rt->rt_ifp; 689 690 /* 691 * When cloning is required, try to allocate a route to the 692 * destination so that the caller can store path MTU 693 * information. 694 */ 695 if (!clone) 696 goto done; 697 } 698 699 /* 700 * Use a cached route if it exists and is valid, else try to allocate 701 * a new one. Note that we should check the address family of the 702 * cached destination, in case of sharing the cache with IPv4. 703 */ 704 if (ro) { 705 if (ro->ro_rt && 706 (!(ro->ro_rt->rt_flags & RTF_UP) || 707 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 708 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 709 dst))) { 710 RTFREE(ro->ro_rt); 711 ro->ro_rt = (struct rtentry *)NULL; 712 } 713 if (ro->ro_rt == (struct rtentry *)NULL) { 714 struct sockaddr_in6 *sa6; 715 716 /* No route yet, so try to acquire one */ 717 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 718 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 719 *sa6 = *dstsock; 720 sa6->sin6_scope_id = 0; 721 if (clone) { 722 #ifdef RADIX_MPATH 723 rtalloc_mpath((struct route *)ro, 724 ntohl(sa6->sin6_addr.s6_addr32[3])); 725 #else 726 rtalloc((struct route *)ro); 727 #endif /* RADIX_MPATH */ 728 } else { 729 #ifdef RADIX_MPATH 730 rtalloc_mpath((struct route *)ro, 731 ntohl(sa6->sin6_addr.s6_addr32[3])); 732 #else 733 ro->ro_rt = rtalloc1(&((struct route *)ro) 734 ->ro_dst, 0); 735 #endif /* RADIX_MPATH */ 736 } 737 } 738 739 /* 740 * do not care about the result if we have the nexthop 741 * explicitly specified. 742 */ 743 if (opts && opts->ip6po_nexthop) 744 goto done; 745 746 if (ro->ro_rt) { 747 ifp = ro->ro_rt->rt_ifp; 748 749 if (ifp == NULL) { /* can this really happen? */ 750 RTFREE(ro->ro_rt); 751 ro->ro_rt = NULL; 752 } 753 } 754 if (ro->ro_rt == NULL) 755 error = EHOSTUNREACH; 756 rt = ro->ro_rt; 757 758 /* 759 * Check if the outgoing interface conflicts with 760 * the interface specified by ipi6_ifindex (if specified). 761 * Note that loopback interface is always okay. 762 * (this may happen when we are sending a packet to one of 763 * our own addresses.) 764 */ 765 if (opts && opts->ip6po_pktinfo && 766 opts->ip6po_pktinfo->ipi6_ifindex) { 767 if (!(ifp->if_flags & IFF_LOOPBACK) && 768 ifp->if_index != 769 opts->ip6po_pktinfo->ipi6_ifindex) { 770 error = EHOSTUNREACH; 771 goto done; 772 } 773 } 774 } 775 776 done: 777 if (ifp == NULL && rt == NULL) { 778 /* 779 * This can happen if the caller did not pass a cached route 780 * nor any other hints. We treat this case an error. 781 */ 782 error = EHOSTUNREACH; 783 } 784 if (error == EHOSTUNREACH) 785 ip6stat.ip6s_noroute++; 786 787 if (retifp != NULL) 788 *retifp = ifp; 789 if (retrt != NULL) 790 *retrt = rt; /* rt may be NULL */ 791 792 return (error); 793 } 794 795 static int 796 in6_selectif(dstsock, opts, mopts, ro, retifp) 797 struct sockaddr_in6 *dstsock; 798 struct ip6_pktopts *opts; 799 struct ip6_moptions *mopts; 800 struct route_in6 *ro; 801 struct ifnet **retifp; 802 { 803 int error, clone; 804 struct rtentry *rt = NULL; 805 806 clone = IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) ? 0 : 1; 807 if ((error = selectroute(dstsock, opts, mopts, ro, retifp, 808 &rt, clone, 1)) != 0) { 809 return (error); 810 } 811 812 /* 813 * do not use a rejected or black hole route. 814 * XXX: this check should be done in the L2 output routine. 815 * However, if we skipped this check here, we'd see the following 816 * scenario: 817 * - install a rejected route for a scoped address prefix 818 * (like fe80::/10) 819 * - send a packet to a destination that matches the scoped prefix, 820 * with ambiguity about the scope zone. 821 * - pick the outgoing interface from the route, and disambiguate the 822 * scope zone with the interface. 823 * - ip6_output() would try to get another route with the "new" 824 * destination, which may be valid. 825 * - we'd see no error on output. 826 * Although this may not be very harmful, it should still be confusing. 827 * We thus reject the case here. 828 */ 829 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) 830 return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 831 832 /* 833 * Adjust the "outgoing" interface. If we're going to loop the packet 834 * back to ourselves, the ifp would be the loopback interface. 835 * However, we'd rather know the interface associated to the 836 * destination address (which should probably be one of our own 837 * addresses.) 838 */ 839 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 840 *retifp = rt->rt_ifa->ifa_ifp; 841 842 return (0); 843 } 844 845 int 846 in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone) 847 struct sockaddr_in6 *dstsock; 848 struct ip6_pktopts *opts; 849 struct ip6_moptions *mopts; 850 struct route_in6 *ro; 851 struct ifnet **retifp; 852 struct rtentry **retrt; 853 int clone; /* meaningful only for bsdi and freebsd. */ 854 { 855 return (selectroute(dstsock, opts, mopts, ro, retifp, 856 retrt, clone, 0)); 857 } 858 859 /* 860 * Default hop limit selection. The precedence is as follows: 861 * 1. Hoplimit value specified via ioctl. 862 * 2. (If the outgoing interface is detected) the current 863 * hop limit of the interface specified by router advertisement. 864 * 3. The system default hoplimit. 865 */ 866 int 867 in6_selecthlim(in6p, ifp) 868 struct in6pcb *in6p; 869 struct ifnet *ifp; 870 { 871 if (in6p && in6p->in6p_hops >= 0) 872 return (in6p->in6p_hops); 873 else if (ifp) 874 return (ND_IFINFO(ifp)->chlim); 875 else 876 return (ip6_defhlim); 877 } 878 879 /* 880 * Find an empty port and set it to the specified PCB. 881 */ 882 int 883 in6_pcbsetport(laddr, in6p, p) 884 struct in6_addr *laddr; 885 struct in6pcb *in6p; 886 struct proc *p; 887 { 888 struct socket *so = in6p->in6p_socket; 889 struct inpcbtable *table = in6p->in6p_table; 890 int cnt; 891 u_int16_t minport, maxport; 892 u_int16_t lport, *lastport; 893 int wild = 0; 894 void *t; 895 896 /* XXX: this is redundant when called from in6_pcbbind */ 897 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && 898 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || 899 (so->so_options & SO_ACCEPTCONN) == 0)) 900 wild = 1; 901 902 if (in6p->in6p_flags & IN6P_LOWPORT) { 903 #ifndef IPNOPRIVPORTS 904 if (p == 0 || (kauth_authorize_generic(p->p_cred, 905 KAUTH_GENERIC_ISSUSER, &p->p_acflag) != 0)) 906 return (EACCES); 907 #endif 908 minport = ip6_lowportmin; 909 maxport = ip6_lowportmax; 910 lastport = &table->inpt_lastlow; 911 } else { 912 minport = ip6_anonportmin; 913 maxport = ip6_anonportmax; 914 lastport = &table->inpt_lastport; 915 } 916 917 if (minport > maxport) { /* sanity check */ 918 u_int16_t swp; 919 920 swp = minport; 921 minport = maxport; 922 maxport = swp; 923 } 924 925 lport = *lastport - 1; 926 for (cnt = maxport - minport + 1; cnt; cnt--, lport--) { 927 if (lport < minport || lport > maxport) 928 lport = maxport; 929 #ifdef INET 930 if (IN6_IS_ADDR_V4MAPPED(laddr)) { 931 t = in_pcblookup_port(table, 932 *(struct in_addr *)&laddr->s6_addr32[3], 933 lport, wild); 934 } else 935 #endif 936 { 937 t = in6_pcblookup_port(table, laddr, lport, wild); 938 } 939 if (t == 0) 940 goto found; 941 } 942 943 return (EAGAIN); 944 945 found: 946 in6p->in6p_flags |= IN6P_ANONPORT; 947 *lastport = lport; 948 in6p->in6p_lport = htons(lport); 949 in6_pcbstate(in6p, IN6P_BOUND); 950 return (0); /* success */ 951 } 952 953 void 954 addrsel_policy_init() 955 { 956 init_policy_queue(); 957 958 /* initialize the "last resort" policy */ 959 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 960 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 961 } 962 963 static struct in6_addrpolicy * 964 lookup_addrsel_policy(key) 965 struct sockaddr_in6 *key; 966 { 967 struct in6_addrpolicy *match = NULL; 968 969 match = match_addrsel_policy(key); 970 971 if (match == NULL) 972 match = &defaultaddrpolicy; 973 else 974 match->use++; 975 976 return (match); 977 } 978 979 /* 980 * Subroutines to manage the address selection policy table via sysctl. 981 */ 982 struct walkarg { 983 size_t w_total; 984 size_t w_given; 985 caddr_t w_where; 986 caddr_t w_limit; 987 }; 988 989 int 990 in6_src_sysctl(oldp, oldlenp, newp, newlen) 991 void *oldp; 992 size_t *oldlenp; 993 void *newp; 994 size_t newlen; 995 { 996 int error = 0; 997 int s; 998 999 s = splsoftnet(); 1000 1001 if (newp) { 1002 error = EPERM; 1003 goto end; 1004 } 1005 if (oldp && oldlenp == NULL) { 1006 error = EINVAL; 1007 goto end; 1008 } 1009 if (oldp || oldlenp) { 1010 struct walkarg w; 1011 size_t oldlen = *oldlenp; 1012 1013 bzero(&w, sizeof(w)); 1014 w.w_given = oldlen; 1015 w.w_where = oldp; 1016 if (oldp) 1017 w.w_limit = (caddr_t)oldp + oldlen; 1018 1019 error = walk_addrsel_policy(dump_addrsel_policyent, &w); 1020 1021 *oldlenp = w.w_total; 1022 if (oldp && w.w_total > oldlen && error == 0) 1023 error = ENOMEM; 1024 } 1025 1026 end: 1027 splx(s); 1028 1029 return (error); 1030 } 1031 1032 int 1033 in6_src_ioctl(cmd, data) 1034 u_long cmd; 1035 caddr_t data; 1036 { 1037 int i; 1038 struct in6_addrpolicy ent0; 1039 1040 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 1041 return (EOPNOTSUPP); /* check for safety */ 1042 1043 ent0 = *(struct in6_addrpolicy *)data; 1044 1045 if (ent0.label == ADDR_LABEL_NOTAPP) 1046 return (EINVAL); 1047 /* check if the prefix mask is consecutive. */ 1048 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 1049 return (EINVAL); 1050 /* clear trailing garbages (if any) of the prefix address. */ 1051 for (i = 0; i < 4; i++) { 1052 ent0.addr.sin6_addr.s6_addr32[i] &= 1053 ent0.addrmask.sin6_addr.s6_addr32[i]; 1054 } 1055 ent0.use = 0; 1056 1057 switch (cmd) { 1058 case SIOCAADDRCTL_POLICY: 1059 return (add_addrsel_policyent(&ent0)); 1060 case SIOCDADDRCTL_POLICY: 1061 return (delete_addrsel_policyent(&ent0)); 1062 } 1063 1064 return (0); /* XXX: compromise compilers */ 1065 } 1066 1067 /* 1068 * The followings are implementation of the policy table using a 1069 * simple tail queue. 1070 * XXX such details should be hidden. 1071 * XXX implementation using binary tree should be more efficient. 1072 */ 1073 struct addrsel_policyent { 1074 TAILQ_ENTRY(addrsel_policyent) ape_entry; 1075 struct in6_addrpolicy ape_policy; 1076 }; 1077 1078 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1079 1080 struct addrsel_policyhead addrsel_policytab; 1081 1082 static void 1083 init_policy_queue() 1084 { 1085 TAILQ_INIT(&addrsel_policytab); 1086 } 1087 1088 static int 1089 add_addrsel_policyent(newpolicy) 1090 struct in6_addrpolicy *newpolicy; 1091 { 1092 struct addrsel_policyent *new, *pol; 1093 1094 /* duplication check */ 1095 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1096 pol = TAILQ_NEXT(pol, ape_entry)) { 1097 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, 1098 &pol->ape_policy.addr.sin6_addr) && 1099 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, 1100 &pol->ape_policy.addrmask.sin6_addr)) { 1101 return (EEXIST); /* or override it? */ 1102 } 1103 } 1104 1105 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 1106 M_WAITOK); 1107 bzero(new, sizeof(*new)); 1108 1109 /* XXX: should validate entry */ 1110 new->ape_policy = *newpolicy; 1111 1112 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 1113 1114 return (0); 1115 } 1116 1117 static int 1118 delete_addrsel_policyent(key) 1119 struct in6_addrpolicy *key; 1120 { 1121 struct addrsel_policyent *pol; 1122 1123 /* search for the entry in the table */ 1124 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1125 pol = TAILQ_NEXT(pol, ape_entry)) { 1126 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, 1127 &pol->ape_policy.addr.sin6_addr) && 1128 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, 1129 &pol->ape_policy.addrmask.sin6_addr)) { 1130 break; 1131 } 1132 } 1133 if (pol == NULL) { 1134 return (ESRCH); 1135 } 1136 1137 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1138 1139 return (0); 1140 } 1141 1142 static int 1143 walk_addrsel_policy(callback, w) 1144 int (*callback) __P((struct in6_addrpolicy *, void *)); 1145 void *w; 1146 { 1147 struct addrsel_policyent *pol; 1148 int error = 0; 1149 1150 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1151 pol = TAILQ_NEXT(pol, ape_entry)) { 1152 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1153 return (error); 1154 } 1155 } 1156 1157 return (error); 1158 } 1159 1160 static int 1161 dump_addrsel_policyent(pol, arg) 1162 struct in6_addrpolicy *pol; 1163 void *arg; 1164 { 1165 int error = 0; 1166 struct walkarg *w = arg; 1167 1168 if (w->w_where && w->w_where + sizeof(*pol) <= w->w_limit) { 1169 if ((error = copyout(pol, w->w_where, sizeof(*pol))) != 0) 1170 return (error); 1171 w->w_where += sizeof(*pol); 1172 } 1173 w->w_total += sizeof(*pol); 1174 1175 return (error); 1176 } 1177 1178 static struct in6_addrpolicy * 1179 match_addrsel_policy(key) 1180 struct sockaddr_in6 *key; 1181 { 1182 struct addrsel_policyent *pent; 1183 struct in6_addrpolicy *bestpol = NULL, *pol; 1184 int matchlen, bestmatchlen = -1; 1185 u_char *mp, *ep, *k, *p, m; 1186 1187 for (pent = TAILQ_FIRST(&addrsel_policytab); pent; 1188 pent = TAILQ_NEXT(pent, ape_entry)) { 1189 matchlen = 0; 1190 1191 pol = &pent->ape_policy; 1192 mp = (u_char *)&pol->addrmask.sin6_addr; 1193 ep = mp + 16; /* XXX: scope field? */ 1194 k = (u_char *)&key->sin6_addr; 1195 p = (u_char *)&pol->addr.sin6_addr; 1196 for (; mp < ep && *mp; mp++, k++, p++) { 1197 m = *mp; 1198 if ((*k & m) != *p) 1199 goto next; /* not match */ 1200 if (m == 0xff) /* short cut for a typical case */ 1201 matchlen += 8; 1202 else { 1203 while (m >= 0x80) { 1204 matchlen++; 1205 m <<= 1; 1206 } 1207 } 1208 } 1209 1210 /* matched. check if this is better than the current best. */ 1211 if (bestpol == NULL || 1212 matchlen > bestmatchlen) { 1213 bestpol = pol; 1214 bestmatchlen = matchlen; 1215 } 1216 1217 next: 1218 continue; 1219 } 1220 1221 return (bestpol); 1222 } 1223