1 /* $KAME: in6_src.c,v 1.159 2005/10/19 01:40:32 t-momose Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1991, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 65 */ 66 67 #include <sys/cdefs.h> 68 __KERNEL_RCSID(0, "$NetBSD: in6_src.c,v 1.23 2006/01/21 00:15:36 rpaulo Exp $"); 69 70 #include "opt_inet.h" 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/malloc.h> 75 #include <sys/mbuf.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #ifndef __FreeBSD__ 80 #include <sys/ioctl.h> 81 #else 82 #include <sys/sockio.h> 83 #endif 84 #ifdef __FreeBSD__ 85 #include <sys/sysctl.h> 86 #endif 87 #include <sys/errno.h> 88 #include <sys/time.h> 89 #include <sys/kernel.h> 90 #include <sys/proc.h> 91 92 #include <net/if.h> 93 #include <net/if_types.h> 94 #include <net/route.h> 95 #ifdef RADIX_MPATH 96 #include <net/radix_mpath.h> 97 #endif 98 99 #include <netinet/in.h> 100 #include <netinet/in_var.h> 101 #include <netinet/in_systm.h> 102 #include <netinet/ip.h> 103 #include <netinet/in_pcb.h> 104 #include <netinet6/in6_var.h> 105 #include <netinet/ip6.h> 106 #ifndef __OpenBSD__ 107 #include <netinet6/in6_pcb.h> 108 #endif 109 #include <netinet6/ip6_var.h> 110 #include <netinet6/nd6.h> 111 #include <netinet6/scope6_var.h> 112 113 #include <net/net_osdep.h> 114 115 #ifdef MIP6 116 #include <netinet6/mip6.h> 117 #include <netinet6/mip6_var.h> 118 #include "mip.h" 119 #if NMIP > 0 120 #include <net/if_mip.h> 121 #endif /* NMIP > 0 */ 122 #endif /* MIP6 */ 123 124 #ifndef __OpenBSD__ 125 #include "loop.h" 126 #endif 127 #ifdef __NetBSD__ 128 extern struct ifnet loif[NLOOP]; 129 #endif 130 131 #define ADDR_LABEL_NOTAPP (-1) 132 struct in6_addrpolicy defaultaddrpolicy; 133 134 #ifdef notyet /* until introducing ND extensions and address selection */ 135 int ip6_prefer_tempaddr = 0; 136 #endif 137 138 static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, 139 struct ip6_moptions *, struct route_in6 *, struct ifnet **, 140 struct rtentry **, int, int)); 141 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 142 struct ip6_moptions *, struct route_in6 *, struct ifnet **)); 143 144 static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *)); 145 146 static void init_policy_queue __P((void)); 147 static int add_addrsel_policyent __P((struct in6_addrpolicy *)); 148 static int delete_addrsel_policyent __P((struct in6_addrpolicy *)); 149 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 150 void *)); 151 static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *)); 152 static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *)); 153 154 /* 155 * Return an IPv6 address, which is the most appropriate for a given 156 * destination and user specified options. 157 * If necessary, this function lookups the routing table and returns 158 * an entry to the caller for later use. 159 */ 160 #if 0 /* diabled ad-hoc */ 161 #define REPLACE(r) do {\ 162 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 163 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 164 ip6stat.ip6s_sources_rule[(r)]++; \ 165 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 166 goto replace; \ 167 } while(0) 168 #define NEXT(r) do {\ 169 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 170 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 171 ip6stat.ip6s_sources_rule[(r)]++; \ 172 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 173 goto next; /* XXX: we can't use 'continue' here */ \ 174 } while(0) 175 #define BREAK(r) do { \ 176 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 177 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 178 ip6stat.ip6s_sources_rule[(r)]++; \ 179 goto out; /* XXX: we can't use 'break' here */ \ 180 } while(0) 181 #else 182 #define REPLACE(r) goto replace 183 #define NEXT(r) goto next 184 #define BREAK(r) goto out 185 #endif 186 187 struct in6_addr * 188 in6_selectsrc(dstsock, opts, mopts, ro, laddr, ifpp, errorp) 189 struct sockaddr_in6 *dstsock; 190 struct ip6_pktopts *opts; 191 struct ip6_moptions *mopts; 192 struct route_in6 *ro; 193 struct in6_addr *laddr; 194 struct ifnet **ifpp; 195 int *errorp; 196 { 197 struct in6_addr dst; 198 struct ifnet *ifp = NULL; 199 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 200 struct in6_pktinfo *pi = NULL; 201 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 202 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 203 u_int32_t odstzone; 204 #ifdef notyet /* until introducing ND extensions and address selection */ 205 int prefer_tempaddr; 206 #endif 207 #if defined(MIP6) && NMIP > 0 208 u_int8_t ip6po_usecoa = 0; 209 #endif /* MIP6 && NMIP > 0 */ 210 211 dst = dstsock->sin6_addr; /* make a copy for local operation */ 212 *errorp = 0; 213 if (ifpp) 214 *ifpp = NULL; 215 216 /* 217 * If the source address is explicitly specified by the caller, 218 * check if the requested source address is indeed a unicast address 219 * assigned to the node, and can be used as the packet's source 220 * address. If everything is okay, use the address as source. 221 */ 222 if (opts && (pi = opts->ip6po_pktinfo) && 223 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 224 struct sockaddr_in6 srcsock; 225 struct in6_ifaddr *ia6; 226 227 /* get the outgoing interface */ 228 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) 229 != 0) { 230 return (NULL); 231 } 232 233 /* 234 * Determine the appropriate zone id of the source based on 235 * the zone of the destination and the outgoing interface. 236 * If the specified address is ambiguous wrt the scope zone, 237 * the interface must be specified; otherwise, ifa_ifwithaddr() 238 * will fail matching the address. 239 */ 240 bzero(&srcsock, sizeof(srcsock)); 241 srcsock.sin6_family = AF_INET6; 242 srcsock.sin6_len = sizeof(srcsock); 243 srcsock.sin6_addr = pi->ipi6_addr; 244 if (ifp) { 245 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL); 246 if (*errorp != 0) 247 return (NULL); 248 } 249 250 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 251 if (ia6 == NULL || 252 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 253 *errorp = EADDRNOTAVAIL; 254 return (NULL); 255 } 256 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 257 if (ifpp) 258 *ifpp = ifp; 259 return (&ia6->ia_addr.sin6_addr); 260 } 261 262 /* 263 * Otherwise, if the socket has already bound the source, just use it. 264 */ 265 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) 266 return (laddr); 267 268 /* 269 * If the address is not specified, choose the best one based on 270 * the outgoing interface and the destination address. 271 */ 272 /* get the outgoing interface */ 273 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) 274 return (NULL); 275 276 #if defined(MIP6) && NMIP > 0 277 /* 278 * a caller can specify IP6PO_USECOA to not to use a home 279 * address. for example, the case that the neighbour 280 * unreachability detection to the global address. 281 */ 282 if (opts != NULL && 283 (opts->ip6po_flags & IP6PO_USECOA) != 0) { 284 ip6po_usecoa = 1; 285 } 286 #endif /* MIP6 && NMIP > 0 */ 287 288 #ifdef DIAGNOSTIC 289 if (ifp == NULL) /* this should not happen */ 290 panic("in6_selectsrc: NULL ifp"); 291 #endif 292 *errorp = in6_setscope(&dst, ifp, &odstzone); 293 if (*errorp != 0) 294 return (NULL); 295 296 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 297 int new_scope = -1, new_matchlen = -1; 298 struct in6_addrpolicy *new_policy = NULL; 299 u_int32_t srczone, osrczone, dstzone; 300 struct in6_addr src; 301 struct ifnet *ifp1 = ia->ia_ifp; 302 303 /* 304 * We'll never take an address that breaks the scope zone 305 * of the destination. We also skip an address if its zone 306 * does not contain the outgoing interface. 307 * XXX: we should probably use sin6_scope_id here. 308 */ 309 if (in6_setscope(&dst, ifp1, &dstzone) || 310 odstzone != dstzone) { 311 continue; 312 } 313 src = ia->ia_addr.sin6_addr; 314 if (in6_setscope(&src, ifp, &osrczone) || 315 in6_setscope(&src, ifp1, &srczone) || 316 osrczone != srczone) { 317 continue; 318 } 319 320 /* avoid unusable addresses */ 321 if ((ia->ia6_flags & 322 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 323 continue; 324 } 325 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 326 continue; 327 328 #if defined(MIP6) && NMIP > 0 329 /* avoid unusable home addresses. */ 330 if ((ia->ia6_flags & IN6_IFF_HOME) && 331 !mip6_ifa6_is_addr_valid_hoa(ia)) 332 continue; 333 #endif /* MIP6 && NMIP > 0 */ 334 335 /* Rule 1: Prefer same address */ 336 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { 337 ia_best = ia; 338 BREAK(1); /* there should be no better candidate */ 339 } 340 341 if (ia_best == NULL) 342 REPLACE(0); 343 344 /* Rule 2: Prefer appropriate scope */ 345 if (dst_scope < 0) 346 dst_scope = in6_addrscope(&dst); 347 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 348 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 349 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 350 REPLACE(2); 351 NEXT(2); 352 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 353 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 354 NEXT(2); 355 REPLACE(2); 356 } 357 358 /* 359 * Rule 3: Avoid deprecated addresses. Note that the case of 360 * !ip6_use_deprecated is already rejected above. 361 */ 362 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 363 NEXT(3); 364 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 365 REPLACE(3); 366 367 /* Rule 4: Prefer home addresses */ 368 #if defined(MIP6) && NMIP > 0 369 if (!MIP6_IS_MN) 370 goto skip_rule4; 371 372 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 && 373 (ia->ia6_flags & IN6_IFF_HOME) == 0) { 374 /* both address are not home addresses. */ 375 goto skip_rule4; 376 } 377 378 /* 379 * If SA is simultaneously a home address and care-of 380 * address and SB is not, then prefer SA. Similarly, 381 * if SB is simultaneously a home address and care-of 382 * address and SA is not, then prefer SB. 383 */ 384 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 385 ia_best->ia_ifp->if_type != IFT_MIP) 386 && 387 ((ia->ia6_flags & IN6_IFF_HOME) != 0 && 388 ia->ia_ifp->if_type == IFT_MIP)) 389 NEXT(4); 390 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 391 ia_best->ia_ifp->if_type == IFT_MIP) 392 && 393 ((ia->ia6_flags & IN6_IFF_HOME) != 0 && 394 ia->ia_ifp->if_type != IFT_MIP)) 395 REPLACE(4); 396 if (ip6po_usecoa == 0) { 397 /* 398 * If SA is just a home address and SB is just 399 * a care-of address, then prefer 400 * SA. Similarly, if SB is just a home address 401 * and SA is just a care-of address, then 402 * prefer SB. 403 */ 404 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 405 (ia->ia6_flags & IN6_IFF_HOME) == 0) { 406 NEXT(4); 407 } 408 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 && 409 (ia->ia6_flags & IN6_IFF_HOME) != 0) { 410 REPLACE(4); 411 } 412 } else { 413 /* 414 * a sender don't want to use a home address 415 * because: 416 * 417 * 1) we cannot use. (ex. NS or NA to global 418 * addresses.) 419 * 420 * 2) a user specified not to use. 421 * (ex. mip6control -u) 422 */ 423 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 && 424 (ia->ia6_flags & IN6_IFF_HOME) != 0) { 425 /* XXX breaks stat */ 426 NEXT(0); 427 } 428 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 && 429 (ia->ia6_flags & IN6_IFF_HOME) == 0) { 430 /* XXX breaks stat */ 431 REPLACE(0); 432 } 433 } 434 skip_rule4: 435 #endif /* MIP6 && NMIP > 0 */ 436 437 /* Rule 5: Prefer outgoing interface */ 438 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 439 NEXT(5); 440 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 441 REPLACE(5); 442 443 /* 444 * Rule 6: Prefer matching label 445 * Note that best_policy should be non-NULL here. 446 */ 447 if (dst_policy == NULL) 448 dst_policy = lookup_addrsel_policy(dstsock); 449 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 450 new_policy = lookup_addrsel_policy(&ia->ia_addr); 451 if (dst_policy->label == best_policy->label && 452 dst_policy->label != new_policy->label) 453 NEXT(6); 454 if (dst_policy->label != best_policy->label && 455 dst_policy->label == new_policy->label) 456 REPLACE(6); 457 } 458 459 /* 460 * Rule 7: Prefer public addresses. 461 * We allow users to reverse the logic by configuring 462 * a sysctl variable, so that privacy conscious users can 463 * always prefer temporary addresses. 464 */ 465 #ifdef notyet /* until introducing ND extensions and address selection */ 466 if (opts == NULL || 467 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 468 prefer_tempaddr = ip6_prefer_tempaddr; 469 } else if (opts->ip6po_prefer_tempaddr == 470 IP6PO_TEMPADDR_NOTPREFER) { 471 prefer_tempaddr = 0; 472 } else 473 prefer_tempaddr = 1; 474 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 475 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 476 if (prefer_tempaddr) 477 REPLACE(7); 478 else 479 NEXT(7); 480 } 481 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 482 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 483 if (prefer_tempaddr) 484 NEXT(7); 485 else 486 REPLACE(7); 487 } 488 #endif 489 490 /* 491 * Rule 8: prefer addresses on alive interfaces. 492 * This is a KAME specific rule. 493 */ 494 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 495 !(ia->ia_ifp->if_flags & IFF_UP)) 496 NEXT(8); 497 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 498 (ia->ia_ifp->if_flags & IFF_UP)) 499 REPLACE(8); 500 501 /* 502 * Rule 9: prefer addresses on "preferred" interfaces. 503 * This is a KAME specific rule. 504 */ 505 #ifdef notyet /* until introducing address selection */ 506 #define NDI_BEST ND_IFINFO(ia_best->ia_ifp) 507 #define NDI_NEW ND_IFINFO(ia->ia_ifp) 508 if ((NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) && 509 !(NDI_NEW->flags & ND6_IFF_PREFER_SOURCE)) 510 NEXT(9); 511 if (!(NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) && 512 (NDI_NEW->flags & ND6_IFF_PREFER_SOURCE)) 513 REPLACE(9); 514 #undef NDI_BEST 515 #undef NDI_NEW 516 #endif 517 518 /* 519 * Rule 14: Use longest matching prefix. 520 * Note: in the address selection draft, this rule is 521 * documented as "Rule 8". However, since it is also 522 * documented that this rule can be overridden, we assign 523 * a large number so that it is easy to assign smaller numbers 524 * to more preferred rules. 525 */ 526 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 527 if (best_matchlen < new_matchlen) 528 REPLACE(14); 529 if (new_matchlen < best_matchlen) 530 NEXT(14); 531 532 /* Rule 15 is reserved. */ 533 534 /* 535 * Last resort: just keep the current candidate. 536 * Or, do we need more rules? 537 */ 538 continue; 539 540 replace: 541 ia_best = ia; 542 best_scope = (new_scope >= 0 ? new_scope : 543 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 544 best_policy = (new_policy ? new_policy : 545 lookup_addrsel_policy(&ia_best->ia_addr)); 546 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 547 in6_matchlen(&ia_best->ia_addr.sin6_addr, 548 &dst)); 549 550 next: 551 continue; 552 553 out: 554 break; 555 } 556 557 if ((ia = ia_best) == NULL) { 558 *errorp = EADDRNOTAVAIL; 559 return (NULL); 560 } 561 562 if (ifpp) 563 *ifpp = ifp; 564 return (&ia->ia_addr.sin6_addr); 565 } 566 #undef REPLACE 567 #undef BREAK 568 #undef NEXT 569 570 static int 571 selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone, norouteok) 572 struct sockaddr_in6 *dstsock; 573 struct ip6_pktopts *opts; 574 struct ip6_moptions *mopts; 575 #ifdef NEW_STRUCT_ROUTE 576 struct route *ro; 577 #else 578 struct route_in6 *ro; 579 #endif 580 struct ifnet **retifp; 581 struct rtentry **retrt; 582 int clone; 583 int norouteok; 584 { 585 int error = 0; 586 struct ifnet *ifp = NULL; 587 struct rtentry *rt = NULL; 588 struct sockaddr_in6 *sin6_next; 589 struct in6_pktinfo *pi = NULL; 590 struct in6_addr *dst; 591 592 dst = &dstsock->sin6_addr; 593 594 #if 0 595 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 596 dstsock->sin6_addr.s6_addr32[1] == 0 && 597 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 598 printf("in6_selectroute: strange destination %s\n", 599 ip6_sprintf(&dstsock->sin6_addr)); 600 } else { 601 printf("in6_selectroute: destination = %s%%%d\n", 602 ip6_sprintf(&dstsock->sin6_addr), 603 dstsock->sin6_scope_id); /* for debug */ 604 } 605 #endif 606 607 /* If the caller specify the outgoing interface explicitly, use it. */ 608 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 609 /* XXX boundary check is assumed to be already done. */ 610 #ifdef __FreeBSD__ 611 ifp = ifnet_byindex(pi->ipi6_ifindex); 612 #else 613 ifp = ifindex2ifnet[pi->ipi6_ifindex]; 614 #endif 615 if (ifp != NULL && 616 (norouteok || retrt == NULL || 617 IN6_IS_ADDR_MULTICAST(dst))) { 618 /* 619 * we do not have to check or get the route for 620 * multicast. 621 */ 622 goto done; 623 } else 624 goto getroute; 625 } 626 627 /* 628 * If the destination address is a multicast address and the outgoing 629 * interface for the address is specified by the caller, use it. 630 */ 631 if (IN6_IS_ADDR_MULTICAST(dst) && 632 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 633 goto done; /* we do not need a route for multicast. */ 634 } 635 636 getroute: 637 /* 638 * If the next hop address for the packet is specified by the caller, 639 * use it as the gateway. 640 */ 641 if (opts && opts->ip6po_nexthop) { 642 #ifdef notyet /* until introducing RFC3542 support */ 643 struct route_in6 *ron; 644 #endif 645 646 sin6_next = satosin6(opts->ip6po_nexthop); 647 648 /* at this moment, we only support AF_INET6 next hops */ 649 if (sin6_next->sin6_family != AF_INET6) { 650 error = EAFNOSUPPORT; /* or should we proceed? */ 651 goto done; 652 } 653 654 /* 655 * If the next hop is an IPv6 address, then the node identified 656 * by that address must be a neighbor of the sending host. 657 */ 658 #ifdef notyet /* see above */ 659 ron = &opts->ip6po_nextroute; 660 if ((ron->ro_rt && 661 (ron->ro_rt->rt_flags & (RTF_UP | RTF_GATEWAY)) != 662 RTF_UP) || 663 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, 664 &sin6_next->sin6_addr)) { 665 if (ron->ro_rt) { 666 RTFREE(ron->ro_rt); 667 ron->ro_rt = NULL; 668 } 669 *satosin6(&ron->ro_dst) = *sin6_next; 670 } 671 if (ron->ro_rt == NULL) { 672 rtalloc((struct route *)ron); /* multi path case? */ 673 if (ron->ro_rt == NULL || 674 (ron->ro_rt->rt_flags & RTF_GATEWAY)) { 675 if (ron->ro_rt) { 676 RTFREE(ron->ro_rt); 677 ron->ro_rt = NULL; 678 } 679 error = EHOSTUNREACH; 680 goto done; 681 } 682 } 683 if (!nd6_is_addr_neighbor(sin6_next, ron->ro_rt->rt_ifp)) { 684 RTFREE(ron->ro_rt); 685 ron->ro_rt = NULL; 686 error = EHOSTUNREACH; 687 goto done; 688 } 689 rt = ron->ro_rt; 690 ifp = rt->rt_ifp; 691 692 /* 693 * When cloning is required, try to allocate a route to the 694 * destination so that the caller can store path MTU 695 * information. 696 */ 697 if (!clone) 698 goto done; 699 #endif 700 } 701 702 /* 703 * Use a cached route if it exists and is valid, else try to allocate 704 * a new one. Note that we should check the address family of the 705 * cached destination, in case of sharing the cache with IPv4. 706 */ 707 if (ro) { 708 if (ro->ro_rt && 709 (!(ro->ro_rt->rt_flags & RTF_UP) || 710 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 711 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 712 dst))) { 713 RTFREE(ro->ro_rt); 714 ro->ro_rt = (struct rtentry *)NULL; 715 } 716 if (ro->ro_rt == (struct rtentry *)NULL) { 717 struct sockaddr_in6 *sa6; 718 719 /* No route yet, so try to acquire one */ 720 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 721 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 722 *sa6 = *dstsock; 723 sa6->sin6_scope_id = 0; 724 if (clone) { 725 #ifdef RADIX_MPATH 726 rtalloc_mpath((struct route *)ro, 727 ntohl(sa6->sin6_addr.s6_addr32[3])); 728 #else 729 rtalloc((struct route *)ro); 730 #endif /* RADIX_MPATH */ 731 } else { 732 #ifdef RADIX_MPATH 733 rtalloc_mpath((struct route *)ro, 734 ntohl(sa6->sin6_addr.s6_addr32[3])); 735 #else 736 ro->ro_rt = rtalloc1(&((struct route *)ro) 737 ->ro_dst, 0); 738 #endif /* RADIX_MPATH */ 739 } 740 } 741 742 /* 743 * do not care about the result if we have the nexthop 744 * explicitly specified. 745 */ 746 if (opts && opts->ip6po_nexthop) 747 goto done; 748 749 if (ro->ro_rt) { 750 ifp = ro->ro_rt->rt_ifp; 751 752 if (ifp == NULL) { /* can this really happen? */ 753 RTFREE(ro->ro_rt); 754 ro->ro_rt = NULL; 755 } 756 } 757 if (ro->ro_rt == NULL) 758 error = EHOSTUNREACH; 759 rt = ro->ro_rt; 760 761 /* 762 * Check if the outgoing interface conflicts with 763 * the interface specified by ipi6_ifindex (if specified). 764 * Note that loopback interface is always okay. 765 * (this may happen when we are sending a packet to one of 766 * our own addresses.) 767 */ 768 if (opts && opts->ip6po_pktinfo && 769 opts->ip6po_pktinfo->ipi6_ifindex) { 770 if (!(ifp->if_flags & IFF_LOOPBACK) && 771 ifp->if_index != 772 opts->ip6po_pktinfo->ipi6_ifindex) { 773 error = EHOSTUNREACH; 774 goto done; 775 } 776 } 777 } 778 779 done: 780 if (ifp == NULL && rt == NULL) { 781 /* 782 * This can happen if the caller did not pass a cached route 783 * nor any other hints. We treat this case an error. 784 */ 785 error = EHOSTUNREACH; 786 } 787 if (error == EHOSTUNREACH) 788 ip6stat.ip6s_noroute++; 789 790 if (retifp != NULL) 791 *retifp = ifp; 792 if (retrt != NULL) 793 *retrt = rt; /* rt may be NULL */ 794 795 return (error); 796 } 797 798 static int 799 in6_selectif(dstsock, opts, mopts, ro, retifp) 800 struct sockaddr_in6 *dstsock; 801 struct ip6_pktopts *opts; 802 struct ip6_moptions *mopts; 803 struct route_in6 *ro; 804 struct ifnet **retifp; 805 { 806 int error, clone; 807 struct rtentry *rt = NULL; 808 809 clone = IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) ? 0 : 1; 810 if ((error = selectroute(dstsock, opts, mopts, ro, retifp, 811 &rt, clone, 1)) != 0) { 812 return (error); 813 } 814 815 /* 816 * do not use a rejected or black hole route. 817 * XXX: this check should be done in the L2 output routine. 818 * However, if we skipped this check here, we'd see the following 819 * scenario: 820 * - install a rejected route for a scoped address prefix 821 * (like fe80::/10) 822 * - send a packet to a destination that matches the scoped prefix, 823 * with ambiguity about the scope zone. 824 * - pick the outgoing interface from the route, and disambiguate the 825 * scope zone with the interface. 826 * - ip6_output() would try to get another route with the "new" 827 * destination, which may be valid. 828 * - we'd see no error on output. 829 * Although this may not be very harmful, it should still be confusing. 830 * We thus reject the case here. 831 */ 832 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) 833 return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 834 835 /* 836 * Adjust the "outgoing" interface. If we're going to loop the packet 837 * back to ourselves, the ifp would be the loopback interface. 838 * However, we'd rather know the interface associated to the 839 * destination address (which should probably be one of our own 840 * addresses.) 841 */ 842 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 843 *retifp = rt->rt_ifa->ifa_ifp; 844 845 return (0); 846 } 847 848 int 849 in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone) 850 struct sockaddr_in6 *dstsock; 851 struct ip6_pktopts *opts; 852 struct ip6_moptions *mopts; 853 struct route_in6 *ro; 854 struct ifnet **retifp; 855 struct rtentry **retrt; 856 int clone; /* meaningful only for bsdi and freebsd. */ 857 { 858 return (selectroute(dstsock, opts, mopts, ro, retifp, 859 retrt, clone, 0)); 860 } 861 862 /* 863 * Default hop limit selection. The precedence is as follows: 864 * 1. Hoplimit value specified via ioctl. 865 * 2. (If the outgoing interface is detected) the current 866 * hop limit of the interface specified by router advertisement. 867 * 3. The system default hoplimit. 868 */ 869 int 870 in6_selecthlim(in6p, ifp) 871 struct in6pcb *in6p; 872 struct ifnet *ifp; 873 { 874 if (in6p && in6p->in6p_hops >= 0) 875 return (in6p->in6p_hops); 876 else if (ifp) 877 return (ND_IFINFO(ifp)->chlim); 878 else 879 return (ip6_defhlim); 880 } 881 882 /* 883 * Find an empty port and set it to the specified PCB. 884 */ 885 int 886 in6_pcbsetport(laddr, in6p, p) 887 struct in6_addr *laddr; 888 struct in6pcb *in6p; 889 struct proc *p; 890 { 891 struct socket *so = in6p->in6p_socket; 892 struct inpcbtable *table = in6p->in6p_table; 893 int cnt; 894 u_int16_t minport, maxport; 895 u_int16_t lport, *lastport; 896 int wild = 0; 897 void *t; 898 899 /* XXX: this is redundant when called from in6_pcbbind */ 900 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && 901 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || 902 (so->so_options & SO_ACCEPTCONN) == 0)) 903 wild = 1; 904 905 if (in6p->in6p_flags & IN6P_LOWPORT) { 906 #ifndef IPNOPRIVPORTS 907 if (p == 0 || (suser(p->p_ucred, &p->p_acflag) != 0)) 908 return (EACCES); 909 #endif 910 minport = ip6_lowportmin; 911 maxport = ip6_lowportmax; 912 lastport = &table->inpt_lastlow; 913 } else { 914 minport = ip6_anonportmin; 915 maxport = ip6_anonportmax; 916 lastport = &table->inpt_lastport; 917 } 918 919 if (minport > maxport) { /* sanity check */ 920 u_int16_t swp; 921 922 swp = minport; 923 minport = maxport; 924 maxport = swp; 925 } 926 927 lport = *lastport - 1; 928 for (cnt = maxport - minport + 1; cnt; cnt--, lport--) { 929 if (lport < minport || lport > maxport) 930 lport = maxport; 931 #ifdef INET 932 if (IN6_IS_ADDR_V4MAPPED(laddr)) { 933 t = in_pcblookup_port(table, 934 *(struct in_addr *)&laddr->s6_addr32[3], 935 lport, wild); 936 } else 937 #endif 938 { 939 t = in6_pcblookup_port(table, laddr, lport, wild); 940 } 941 if (t == 0) 942 goto found; 943 } 944 945 return (EAGAIN); 946 947 found: 948 in6p->in6p_flags |= IN6P_ANONPORT; 949 *lastport = lport; 950 in6p->in6p_lport = htons(lport); 951 in6_pcbstate(in6p, IN6P_BOUND); 952 return (0); /* success */ 953 } 954 955 void 956 addrsel_policy_init() 957 { 958 init_policy_queue(); 959 960 /* initialize the "last resort" policy */ 961 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 962 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 963 } 964 965 static struct in6_addrpolicy * 966 lookup_addrsel_policy(key) 967 struct sockaddr_in6 *key; 968 { 969 struct in6_addrpolicy *match = NULL; 970 971 match = match_addrsel_policy(key); 972 973 if (match == NULL) 974 match = &defaultaddrpolicy; 975 else 976 match->use++; 977 978 return (match); 979 } 980 981 /* 982 * Subroutines to manage the address selection policy table via sysctl. 983 */ 984 struct walkarg { 985 size_t w_total; 986 size_t w_given; 987 caddr_t w_where; 988 caddr_t w_limit; 989 }; 990 991 int 992 in6_src_sysctl(oldp, oldlenp, newp, newlen) 993 void *oldp; 994 size_t *oldlenp; 995 void *newp; 996 size_t newlen; 997 { 998 int error = 0; 999 int s; 1000 1001 s = splsoftnet(); 1002 1003 if (newp) { 1004 error = EPERM; 1005 goto end; 1006 } 1007 if (oldp && oldlenp == NULL) { 1008 error = EINVAL; 1009 goto end; 1010 } 1011 if (oldp || oldlenp) { 1012 struct walkarg w; 1013 size_t oldlen = (oldlenp ? *oldlenp : 0); 1014 1015 bzero(&w, sizeof(w)); 1016 w.w_given = oldlen; 1017 w.w_where = oldp; 1018 if (oldp) 1019 w.w_limit = (caddr_t)oldp + oldlen; 1020 1021 error = walk_addrsel_policy(dump_addrsel_policyent, &w); 1022 1023 *oldlenp = w.w_total; 1024 if (oldp && w.w_total > oldlen && error == 0) 1025 error = ENOMEM; 1026 } 1027 1028 end: 1029 splx(s); 1030 1031 return (error); 1032 } 1033 1034 int 1035 in6_src_ioctl(cmd, data) 1036 u_long cmd; 1037 caddr_t data; 1038 { 1039 int i; 1040 struct in6_addrpolicy ent0; 1041 1042 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 1043 return (EOPNOTSUPP); /* check for safety */ 1044 1045 ent0 = *(struct in6_addrpolicy *)data; 1046 1047 if (ent0.label == ADDR_LABEL_NOTAPP) 1048 return (EINVAL); 1049 /* check if the prefix mask is consecutive. */ 1050 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 1051 return (EINVAL); 1052 /* clear trailing garbages (if any) of the prefix address. */ 1053 for (i = 0; i < 4; i++) { 1054 ent0.addr.sin6_addr.s6_addr32[i] &= 1055 ent0.addrmask.sin6_addr.s6_addr32[i]; 1056 } 1057 ent0.use = 0; 1058 1059 switch (cmd) { 1060 case SIOCAADDRCTL_POLICY: 1061 return (add_addrsel_policyent(&ent0)); 1062 case SIOCDADDRCTL_POLICY: 1063 return (delete_addrsel_policyent(&ent0)); 1064 } 1065 1066 return (0); /* XXX: compromise compilers */ 1067 } 1068 1069 /* 1070 * The followings are implementation of the policy table using a 1071 * simple tail queue. 1072 * XXX such details should be hidden. 1073 * XXX implementation using binary tree should be more efficient. 1074 */ 1075 struct addrsel_policyent { 1076 TAILQ_ENTRY(addrsel_policyent) ape_entry; 1077 struct in6_addrpolicy ape_policy; 1078 }; 1079 1080 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1081 1082 struct addrsel_policyhead addrsel_policytab; 1083 1084 static void 1085 init_policy_queue() 1086 { 1087 TAILQ_INIT(&addrsel_policytab); 1088 } 1089 1090 static int 1091 add_addrsel_policyent(newpolicy) 1092 struct in6_addrpolicy *newpolicy; 1093 { 1094 struct addrsel_policyent *new, *pol; 1095 1096 /* duplication check */ 1097 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1098 pol = TAILQ_NEXT(pol, ape_entry)) { 1099 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, 1100 &pol->ape_policy.addr.sin6_addr) && 1101 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, 1102 &pol->ape_policy.addrmask.sin6_addr)) { 1103 return (EEXIST); /* or override it? */ 1104 } 1105 } 1106 1107 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 1108 M_WAITOK); 1109 bzero(new, sizeof(*new)); 1110 1111 /* XXX: should validate entry */ 1112 new->ape_policy = *newpolicy; 1113 1114 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 1115 1116 return (0); 1117 } 1118 1119 static int 1120 delete_addrsel_policyent(key) 1121 struct in6_addrpolicy *key; 1122 { 1123 struct addrsel_policyent *pol; 1124 1125 /* search for the entry in the table */ 1126 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1127 pol = TAILQ_NEXT(pol, ape_entry)) { 1128 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, 1129 &pol->ape_policy.addr.sin6_addr) && 1130 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, 1131 &pol->ape_policy.addrmask.sin6_addr)) { 1132 break; 1133 } 1134 } 1135 if (pol == NULL) { 1136 return (ESRCH); 1137 } 1138 1139 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1140 1141 return (0); 1142 } 1143 1144 static int 1145 walk_addrsel_policy(callback, w) 1146 int (*callback) __P((struct in6_addrpolicy *, void *)); 1147 void *w; 1148 { 1149 struct addrsel_policyent *pol; 1150 int error = 0; 1151 1152 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1153 pol = TAILQ_NEXT(pol, ape_entry)) { 1154 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1155 return (error); 1156 } 1157 } 1158 1159 return (error); 1160 } 1161 1162 static int 1163 dump_addrsel_policyent(pol, arg) 1164 struct in6_addrpolicy *pol; 1165 void *arg; 1166 { 1167 int error = 0; 1168 struct walkarg *w = arg; 1169 1170 if (w->w_where && w->w_where + sizeof(*pol) <= w->w_limit) { 1171 if ((error = copyout(pol, w->w_where, sizeof(*pol))) != 0) 1172 return (error); 1173 w->w_where += sizeof(*pol); 1174 } 1175 w->w_total += sizeof(*pol); 1176 1177 return (error); 1178 } 1179 1180 static struct in6_addrpolicy * 1181 match_addrsel_policy(key) 1182 struct sockaddr_in6 *key; 1183 { 1184 struct addrsel_policyent *pent; 1185 struct in6_addrpolicy *bestpol = NULL, *pol; 1186 int matchlen, bestmatchlen = -1; 1187 u_char *mp, *ep, *k, *p, m; 1188 1189 for (pent = TAILQ_FIRST(&addrsel_policytab); pent; 1190 pent = TAILQ_NEXT(pent, ape_entry)) { 1191 matchlen = 0; 1192 1193 pol = &pent->ape_policy; 1194 mp = (u_char *)&pol->addrmask.sin6_addr; 1195 ep = mp + 16; /* XXX: scope field? */ 1196 k = (u_char *)&key->sin6_addr; 1197 p = (u_char *)&pol->addr.sin6_addr; 1198 for (; mp < ep && *mp; mp++, k++, p++) { 1199 m = *mp; 1200 if ((*k & m) != *p) 1201 goto next; /* not match */ 1202 if (m == 0xff) /* short cut for a typical case */ 1203 matchlen += 8; 1204 else { 1205 while (m >= 0x80) { 1206 matchlen++; 1207 m <<= 1; 1208 } 1209 } 1210 } 1211 1212 /* matched. check if this is better than the current best. */ 1213 if (bestpol == NULL || 1214 matchlen > bestmatchlen) { 1215 bestpol = pol; 1216 bestmatchlen = matchlen; 1217 } 1218 1219 next: 1220 continue; 1221 } 1222 1223 return (bestpol); 1224 } 1225