1 /* $NetBSD: nd6.c,v 1.271 2020/06/12 11:04:45 roy Exp $ */ 2 /* $KAME: nd6.c,v 1.279 2002/06/08 11:16:51 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.271 2020/06/12 11:04:45 roy Exp $"); 35 36 #ifdef _KERNEL_OPT 37 #include "opt_compat_netbsd.h" 38 #include "opt_net_mpsafe.h" 39 #endif 40 41 #include "bridge.h" 42 #include "carp.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/callout.h> 47 #include <sys/kmem.h> 48 #include <sys/mbuf.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/sockio.h> 52 #include <sys/time.h> 53 #include <sys/kernel.h> 54 #include <sys/errno.h> 55 #include <sys/ioctl.h> 56 #include <sys/syslog.h> 57 #include <sys/queue.h> 58 #include <sys/cprng.h> 59 #include <sys/workqueue.h> 60 61 #include <net/if.h> 62 #include <net/if_dl.h> 63 #include <net/if_llatbl.h> 64 #include <net/if_types.h> 65 #include <net/route.h> 66 #include <net/if_ether.h> 67 #include <net/if_arc.h> 68 69 #include <netinet/in.h> 70 #include <netinet6/in6_var.h> 71 #include <netinet/ip6.h> 72 #include <netinet6/ip6_var.h> 73 #include <netinet6/scope6_var.h> 74 #include <netinet6/nd6.h> 75 #include <netinet6/in6_ifattach.h> 76 #include <netinet/icmp6.h> 77 #include <netinet6/icmp6_private.h> 78 79 #ifdef COMPAT_90 80 #include <compat/netinet6/in6_var.h> 81 #include <compat/netinet6/nd6.h> 82 #endif 83 84 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ 85 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ 86 87 /* timer values */ 88 int nd6_prune = 1; /* walk list every 1 seconds */ 89 int nd6_delay = 5; /* delay first probe time 5 second */ 90 int nd6_umaxtries = 3; /* maximum unicast query */ 91 int nd6_mmaxtries = 3; /* maximum multicast query */ 92 int nd6_useloopback = 1; /* use loopback interface for local traffic */ 93 int nd6_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */ 94 95 /* preventing too many loops in ND option parsing */ 96 int nd6_maxndopt = 10; /* max # of ND options allowed */ 97 98 int nd6_maxnudhint = 0; /* max # of subsequent upper layer hints */ 99 100 int nd6_maxqueuelen = 1; /* max # of packets cached in unresolved ND entries */ 101 102 #ifdef ND6_DEBUG 103 int nd6_debug = 1; 104 #else 105 int nd6_debug = 0; 106 #endif 107 108 krwlock_t nd6_lock __cacheline_aligned; 109 110 int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL; 111 112 static void nd6_slowtimo(void *); 113 static void nd6_free(struct llentry *, int); 114 static void nd6_llinfo_timer(void *); 115 static void nd6_timer(void *); 116 static void nd6_timer_work(struct work *, void *); 117 static void clear_llinfo_pqueue(struct llentry *); 118 static struct nd_opt_hdr *nd6_option(union nd_opts *); 119 120 static callout_t nd6_slowtimo_ch; 121 static callout_t nd6_timer_ch; 122 static struct workqueue *nd6_timer_wq; 123 static struct work nd6_timer_wk; 124 125 MALLOC_DEFINE(M_IP6NDP, "NDP", "IPv6 Neighbour Discovery"); 126 127 void 128 nd6_init(void) 129 { 130 int error; 131 132 nd6_nbr_init(); 133 134 rw_init(&nd6_lock); 135 136 callout_init(&nd6_slowtimo_ch, CALLOUT_MPSAFE); 137 callout_init(&nd6_timer_ch, CALLOUT_MPSAFE); 138 139 error = workqueue_create(&nd6_timer_wq, "nd6_timer", 140 nd6_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 141 if (error) 142 panic("%s: workqueue_create failed (%d)\n", __func__, error); 143 144 /* start timer */ 145 callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, 146 nd6_slowtimo, NULL); 147 callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL); 148 } 149 150 struct nd_kifinfo * 151 nd6_ifattach(struct ifnet *ifp) 152 { 153 struct nd_kifinfo *nd; 154 155 nd = kmem_zalloc(sizeof(*nd), KM_SLEEP); 156 157 nd->chlim = IPV6_DEFHLIM; 158 nd->basereachable = REACHABLE_TIME; 159 nd->reachable = ND_COMPUTE_RTIME(nd->basereachable); 160 nd->retrans = RETRANS_TIMER; 161 162 nd->flags = ND6_IFF_PERFORMNUD; 163 164 /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL. 165 * A bridge interface should not have ND6_IFF_AUTO_LINKLOCAL 166 * because one of its members should. */ 167 if ((ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) || 168 (ifp->if_flags & IFF_LOOPBACK)) 169 nd->flags |= ND6_IFF_AUTO_LINKLOCAL; 170 171 return nd; 172 } 173 174 void 175 nd6_ifdetach(struct ifnet *ifp, struct in6_ifextra *ext) 176 { 177 178 /* Ensure all IPv6 addresses are purged before calling nd6_purge */ 179 if_purgeaddrs(ifp, AF_INET6, in6_purgeaddr); 180 nd6_purge(ifp, ext); 181 kmem_free(ext->nd_ifinfo, sizeof(struct nd_kifinfo)); 182 } 183 184 void 185 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts) 186 { 187 188 memset(ndopts, 0, sizeof(*ndopts)); 189 ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; 190 ndopts->nd_opts_last 191 = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); 192 193 if (icmp6len == 0) { 194 ndopts->nd_opts_done = 1; 195 ndopts->nd_opts_search = NULL; 196 } 197 } 198 199 /* 200 * Take one ND option. 201 */ 202 static struct nd_opt_hdr * 203 nd6_option(union nd_opts *ndopts) 204 { 205 struct nd_opt_hdr *nd_opt; 206 int olen; 207 208 KASSERT(ndopts != NULL); 209 KASSERT(ndopts->nd_opts_last != NULL); 210 211 if (ndopts->nd_opts_search == NULL) 212 return NULL; 213 if (ndopts->nd_opts_done) 214 return NULL; 215 216 nd_opt = ndopts->nd_opts_search; 217 218 /* make sure nd_opt_len is inside the buffer */ 219 if ((void *)&nd_opt->nd_opt_len >= (void *)ndopts->nd_opts_last) { 220 memset(ndopts, 0, sizeof(*ndopts)); 221 return NULL; 222 } 223 224 olen = nd_opt->nd_opt_len << 3; 225 if (olen == 0) { 226 /* 227 * Message validation requires that all included 228 * options have a length that is greater than zero. 229 */ 230 memset(ndopts, 0, sizeof(*ndopts)); 231 return NULL; 232 } 233 234 ndopts->nd_opts_search = (struct nd_opt_hdr *)((char *)nd_opt + olen); 235 if (ndopts->nd_opts_search > ndopts->nd_opts_last) { 236 /* option overruns the end of buffer, invalid */ 237 memset(ndopts, 0, sizeof(*ndopts)); 238 return NULL; 239 } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { 240 /* reached the end of options chain */ 241 ndopts->nd_opts_done = 1; 242 ndopts->nd_opts_search = NULL; 243 } 244 return nd_opt; 245 } 246 247 /* 248 * Parse multiple ND options. 249 * This function is much easier to use, for ND routines that do not need 250 * multiple options of the same type. 251 */ 252 int 253 nd6_options(union nd_opts *ndopts) 254 { 255 struct nd_opt_hdr *nd_opt; 256 int i = 0; 257 258 KASSERT(ndopts != NULL); 259 KASSERT(ndopts->nd_opts_last != NULL); 260 261 if (ndopts->nd_opts_search == NULL) 262 return 0; 263 264 while (1) { 265 nd_opt = nd6_option(ndopts); 266 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) { 267 /* 268 * Message validation requires that all included 269 * options have a length that is greater than zero. 270 */ 271 ICMP6_STATINC(ICMP6_STAT_ND_BADOPT); 272 memset(ndopts, 0, sizeof(*ndopts)); 273 return -1; 274 } 275 276 if (nd_opt == NULL) 277 goto skip1; 278 279 switch (nd_opt->nd_opt_type) { 280 case ND_OPT_SOURCE_LINKADDR: 281 case ND_OPT_TARGET_LINKADDR: 282 case ND_OPT_MTU: 283 case ND_OPT_REDIRECTED_HEADER: 284 case ND_OPT_NONCE: 285 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { 286 nd6log(LOG_INFO, 287 "duplicated ND6 option found (type=%d)\n", 288 nd_opt->nd_opt_type); 289 /* XXX bark? */ 290 } else { 291 ndopts->nd_opt_array[nd_opt->nd_opt_type] 292 = nd_opt; 293 } 294 break; 295 case ND_OPT_PREFIX_INFORMATION: 296 if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { 297 ndopts->nd_opt_array[nd_opt->nd_opt_type] 298 = nd_opt; 299 } 300 ndopts->nd_opts_pi_end = 301 (struct nd_opt_prefix_info *)nd_opt; 302 break; 303 default: 304 /* 305 * Unknown options must be silently ignored, 306 * to accommodate future extension to the protocol. 307 */ 308 nd6log(LOG_DEBUG, 309 "nd6_options: unsupported option %d - " 310 "option ignored\n", nd_opt->nd_opt_type); 311 } 312 313 skip1: 314 i++; 315 if (i > nd6_maxndopt) { 316 ICMP6_STATINC(ICMP6_STAT_ND_TOOMANYOPT); 317 nd6log(LOG_INFO, "too many loop in nd opt\n"); 318 break; 319 } 320 321 if (ndopts->nd_opts_done) 322 break; 323 } 324 325 return 0; 326 } 327 328 /* 329 * ND6 timer routine to handle ND6 entries 330 */ 331 void 332 nd6_llinfo_settimer(struct llentry *ln, time_t xtick) 333 { 334 335 CTASSERT(sizeof(time_t) > sizeof(int)); 336 LLE_WLOCK_ASSERT(ln); 337 338 KASSERT(xtick >= 0); 339 340 /* 341 * We have to take care of a reference leak which occurs if 342 * callout_reset overwrites a pending callout schedule. Unfortunately 343 * we don't have a mean to know the overwrite, so we need to know it 344 * using callout_stop. We need to call callout_pending first to exclude 345 * the case that the callout has never been scheduled. 346 */ 347 if (callout_pending(&ln->la_timer)) { 348 bool expired = callout_stop(&ln->la_timer); 349 if (!expired) 350 LLE_REMREF(ln); 351 } 352 353 ln->ln_expire = time_uptime + xtick / hz; 354 LLE_ADDREF(ln); 355 if (xtick > INT_MAX) { 356 ln->ln_ntick = xtick - INT_MAX; 357 callout_reset(&ln->ln_timer_ch, INT_MAX, 358 nd6_llinfo_timer, ln); 359 } else { 360 ln->ln_ntick = 0; 361 callout_reset(&ln->ln_timer_ch, xtick, 362 nd6_llinfo_timer, ln); 363 } 364 } 365 366 /* 367 * Gets source address of the first packet in hold queue 368 * and stores it in @src. 369 * Returns pointer to @src (if hold queue is not empty) or NULL. 370 */ 371 static struct in6_addr * 372 nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src) 373 { 374 struct ip6_hdr *hip6; 375 376 if (ln == NULL || ln->ln_hold == NULL) 377 return NULL; 378 379 /* 380 * assuming every packet in ln_hold has the same IP header 381 */ 382 hip6 = mtod(ln->ln_hold, struct ip6_hdr *); 383 /* XXX pullup? */ 384 if (sizeof(*hip6) < ln->ln_hold->m_len) 385 *src = hip6->ip6_src; 386 else 387 src = NULL; 388 389 return src; 390 } 391 392 static void 393 nd6_llinfo_timer(void *arg) 394 { 395 struct llentry *ln = arg; 396 struct ifnet *ifp; 397 struct nd_kifinfo *ndi; 398 bool send_ns = false; 399 const struct in6_addr *daddr6 = NULL; 400 const struct in6_addr *taddr6 = &ln->r_l3addr.addr6; 401 struct sockaddr_in6 dsin6, tsin6; 402 struct mbuf *m = NULL; 403 bool missed = false; 404 405 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); 406 407 LLE_WLOCK(ln); 408 if ((ln->la_flags & LLE_LINKED) == 0) 409 goto out; 410 if (ln->ln_ntick > 0) { 411 nd6_llinfo_settimer(ln, ln->ln_ntick); 412 goto out; 413 } 414 415 ifp = ln->lle_tbl->llt_ifp; 416 KASSERT(ifp != NULL); 417 418 ndi = ND_IFINFO(ifp); 419 420 switch (ln->ln_state) { 421 case ND6_LLINFO_WAITDELETE: 422 LLE_REMREF(ln); 423 nd6_free(ln, 0); 424 ln = NULL; 425 break; 426 427 case ND6_LLINFO_INCOMPLETE: 428 if (ln->ln_asked++ < nd6_mmaxtries) { 429 send_ns = true; 430 break; 431 } 432 433 missed = true; 434 sockaddr_in6_init(&tsin6, taddr6, 0, 0, 0); 435 436 if (ln->ln_hold) { 437 struct mbuf *m0; 438 439 m = ln->ln_hold; 440 441 /* 442 * assuming every packet in ln_hold has 443 * the same IP header 444 */ 445 m0 = m->m_nextpkt; 446 m->m_nextpkt = NULL; 447 ln->ln_hold = m0; 448 clear_llinfo_pqueue(ln); 449 } 450 451 /* 452 * Move to the ND6_LLINFO_WAITDELETE state for another 453 * interval at which point the llentry will be freed 454 * unless it's attempted to be used again and we'll 455 * resend NS again, rinse and repeat. 456 */ 457 ln->ln_state = ND6_LLINFO_WAITDELETE; 458 if (ln->ln_asked == nd6_mmaxtries) 459 nd6_llinfo_settimer(ln, ndi->retrans * hz / 1000); 460 else 461 send_ns = true; 462 break; 463 464 case ND6_LLINFO_REACHABLE: 465 if (!ND6_LLINFO_PERMANENT(ln)) { 466 ln->ln_state = ND6_LLINFO_STALE; 467 nd6_llinfo_settimer(ln, nd6_gctimer * hz); 468 } 469 break; 470 471 case ND6_LLINFO_PURGE: 472 case ND6_LLINFO_STALE: 473 /* Garbage Collection(RFC 2461 5.3) */ 474 if (!ND6_LLINFO_PERMANENT(ln)) { 475 LLE_REMREF(ln); 476 nd6_free(ln, 1); 477 ln = NULL; 478 } 479 break; 480 481 case ND6_LLINFO_DELAY: 482 if (ndi->flags & ND6_IFF_PERFORMNUD) { 483 /* We need NUD */ 484 ln->ln_asked = 1; 485 ln->ln_state = ND6_LLINFO_PROBE; 486 daddr6 = &ln->r_l3addr.addr6; 487 send_ns = true; 488 } else { 489 ln->ln_state = ND6_LLINFO_STALE; /* XXX */ 490 nd6_llinfo_settimer(ln, nd6_gctimer * hz); 491 } 492 break; 493 case ND6_LLINFO_PROBE: 494 if (ln->ln_asked < nd6_umaxtries) { 495 ln->ln_asked++; 496 daddr6 = &ln->r_l3addr.addr6; 497 send_ns = true; 498 } else { 499 LLE_REMREF(ln); 500 nd6_free(ln, 0); 501 ln = NULL; 502 } 503 break; 504 } 505 506 if (send_ns) { 507 struct in6_addr src, *psrc; 508 509 nd6_llinfo_settimer(ln, ndi->retrans * hz / 1000); 510 psrc = nd6_llinfo_get_holdsrc(ln, &src); 511 LLE_FREE_LOCKED(ln); 512 ln = NULL; 513 nd6_ns_output(ifp, daddr6, taddr6, psrc, NULL); 514 } 515 516 out: 517 if (ln != NULL) 518 LLE_FREE_LOCKED(ln); 519 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 520 if (missed) { 521 struct in6_addr mdaddr6 = zeroin6_addr; 522 struct sockaddr *sa; 523 524 if (m != NULL) 525 icmp6_error2(m, ICMP6_DST_UNREACH, 526 ICMP6_DST_UNREACH_ADDR, 0, ifp, &mdaddr6); 527 if (!IN6_IS_ADDR_UNSPECIFIED(&mdaddr6)) { 528 sockaddr_in6_init(&dsin6, &mdaddr6, 0, 0, 0); 529 sa = sin6tosa(&dsin6); 530 } else 531 sa = NULL; 532 rt_clonedmsg(RTM_MISS, sa, sin6tosa(&tsin6), NULL, ifp); 533 } 534 } 535 536 /* 537 * ND6 timer routine to expire default route list and prefix list 538 */ 539 static void 540 nd6_timer_work(struct work *wk, void *arg) 541 { 542 struct in6_ifaddr *ia6, *nia6; 543 int s, bound; 544 struct psref psref; 545 546 callout_reset(&nd6_timer_ch, nd6_prune * hz, 547 nd6_timer, NULL); 548 549 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); 550 551 /* expire interface addresses */ 552 bound = curlwp_bind(); 553 s = pserialize_read_enter(); 554 for (ia6 = IN6_ADDRLIST_READER_FIRST(); ia6; ia6 = nia6) { 555 nia6 = IN6_ADDRLIST_READER_NEXT(ia6); 556 557 ia6_acquire(ia6, &psref); 558 pserialize_read_exit(s); 559 560 /* check address lifetime */ 561 if (IFA6_IS_INVALID(ia6)) { 562 struct ifnet *ifp; 563 564 ifp = ia6->ia_ifa.ifa_ifp; 565 IFNET_LOCK(ifp); 566 /* 567 * Need to take the lock first to prevent if_detach 568 * from running in6_purgeaddr concurrently. 569 */ 570 if (!if_is_deactivated(ifp)) { 571 ia6_release(ia6, &psref); 572 in6_purgeaddr(&ia6->ia_ifa); 573 } else { 574 /* 575 * ifp is being destroyed, ia6 will be destroyed 576 * by if_detach. 577 */ 578 ia6_release(ia6, &psref); 579 } 580 ia6 = NULL; 581 IFNET_UNLOCK(ifp); 582 } else if (IFA6_IS_DEPRECATED(ia6)) { 583 int oldflags = ia6->ia6_flags; 584 585 if ((oldflags & IN6_IFF_DEPRECATED) == 0) { 586 ia6->ia6_flags |= IN6_IFF_DEPRECATED; 587 rt_addrmsg(RTM_NEWADDR, (struct ifaddr *)ia6); 588 } 589 } else { 590 /* 591 * A new RA might have made a deprecated address 592 * preferred. 593 */ 594 if (ia6->ia6_flags & IN6_IFF_DEPRECATED) { 595 ia6->ia6_flags &= ~IN6_IFF_DEPRECATED; 596 rt_addrmsg(RTM_NEWADDR, (struct ifaddr *)ia6); 597 } 598 } 599 s = pserialize_read_enter(); 600 ia6_release(ia6, &psref); 601 } 602 pserialize_read_exit(s); 603 curlwp_bindx(bound); 604 605 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 606 } 607 608 static void 609 nd6_timer(void *ignored_arg) 610 { 611 612 workqueue_enqueue(nd6_timer_wq, &nd6_timer_wk, NULL); 613 } 614 615 /* 616 * Nuke neighbor cache/prefix/default router management table, right before 617 * ifp goes away. 618 */ 619 void 620 nd6_purge(struct ifnet *ifp, struct in6_ifextra *ext) 621 { 622 623 /* 624 * During detach, the ND info might be already removed, but 625 * then is explitly passed as argument. 626 * Otherwise get it from ifp->if_afdata. 627 */ 628 if (ext == NULL) 629 ext = ifp->if_afdata[AF_INET6]; 630 if (ext == NULL) 631 return; 632 633 /* 634 * We may not need to nuke the neighbor cache entries here 635 * because the neighbor cache is kept in if_afdata[AF_INET6]. 636 * nd6_purge() is invoked by in6_ifdetach() which is called 637 * from if_detach() where everything gets purged. However 638 * in6_ifdetach is directly called from vlan(4), so we still 639 * need to purge entries here. 640 */ 641 if (ext->lltable != NULL) 642 lltable_purge_entries(ext->lltable); 643 } 644 645 struct llentry * 646 nd6_lookup(const struct in6_addr *addr6, const struct ifnet *ifp, bool wlock) 647 { 648 struct sockaddr_in6 sin6; 649 struct llentry *ln; 650 651 sockaddr_in6_init(&sin6, addr6, 0, 0, 0); 652 653 IF_AFDATA_RLOCK(ifp); 654 ln = lla_lookup(LLTABLE6(ifp), wlock ? LLE_EXCLUSIVE : 0, 655 sin6tosa(&sin6)); 656 IF_AFDATA_RUNLOCK(ifp); 657 658 return ln; 659 } 660 661 struct llentry * 662 nd6_create(const struct in6_addr *addr6, const struct ifnet *ifp) 663 { 664 struct sockaddr_in6 sin6; 665 struct llentry *ln; 666 struct rtentry *rt; 667 668 sockaddr_in6_init(&sin6, addr6, 0, 0, 0); 669 rt = rtalloc1(sin6tosa(&sin6), 0); 670 671 IF_AFDATA_WLOCK(ifp); 672 ln = lla_create(LLTABLE6(ifp), LLE_EXCLUSIVE, sin6tosa(&sin6), rt); 673 IF_AFDATA_WUNLOCK(ifp); 674 675 if (rt != NULL) 676 rt_unref(rt); 677 if (ln != NULL) 678 ln->ln_state = ND6_LLINFO_NOSTATE; 679 680 return ln; 681 } 682 683 /* 684 * Test whether a given IPv6 address is a neighbor or not, ignoring 685 * the actual neighbor cache. The neighbor cache is ignored in order 686 * to not reenter the routing code from within itself. 687 */ 688 static int 689 nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) 690 { 691 struct ifaddr *dstaddr; 692 int s; 693 694 /* 695 * A link-local address is always a neighbor. 696 * XXX: a link does not necessarily specify a single interface. 697 */ 698 if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { 699 struct sockaddr_in6 sin6_copy; 700 u_int32_t zone; 701 702 /* 703 * We need sin6_copy since sa6_recoverscope() may modify the 704 * content (XXX). 705 */ 706 sin6_copy = *addr; 707 if (sa6_recoverscope(&sin6_copy)) 708 return 0; /* XXX: should be impossible */ 709 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) 710 return 0; 711 if (sin6_copy.sin6_scope_id == zone) 712 return 1; 713 else 714 return 0; 715 } 716 717 /* 718 * If the address is assigned on the node of the other side of 719 * a p2p interface, the address should be a neighbor. 720 */ 721 s = pserialize_read_enter(); 722 dstaddr = ifa_ifwithdstaddr(sin6tocsa(addr)); 723 if (dstaddr != NULL) { 724 if (dstaddr->ifa_ifp == ifp) { 725 pserialize_read_exit(s); 726 return 1; 727 } 728 } 729 pserialize_read_exit(s); 730 731 return 0; 732 } 733 734 /* 735 * Detect if a given IPv6 address identifies a neighbor on a given link. 736 * XXX: should take care of the destination of a p2p link? 737 */ 738 int 739 nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) 740 { 741 struct llentry *ln; 742 struct rtentry *rt; 743 744 /* 745 * A link-local address is always a neighbor. 746 * XXX: a link does not necessarily specify a single interface. 747 */ 748 if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { 749 struct sockaddr_in6 sin6_copy; 750 u_int32_t zone; 751 752 /* 753 * We need sin6_copy since sa6_recoverscope() may modify the 754 * content (XXX). 755 */ 756 sin6_copy = *addr; 757 if (sa6_recoverscope(&sin6_copy)) 758 return 0; /* XXX: should be impossible */ 759 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) 760 return 0; 761 if (sin6_copy.sin6_scope_id == zone) 762 return 1; 763 else 764 return 0; 765 } 766 767 if (nd6_is_new_addr_neighbor(addr, ifp)) 768 return 1; 769 770 /* 771 * Even if the address matches none of our addresses, it might be 772 * in the neighbor cache or a connected route. 773 */ 774 ln = nd6_lookup(&addr->sin6_addr, ifp, false); 775 if (ln != NULL) { 776 LLE_RUNLOCK(ln); 777 return 1; 778 } 779 780 rt = rtalloc1(sin6tocsa(addr), 0); 781 if (rt == NULL) 782 return 0; 783 784 if ((rt->rt_flags & RTF_CONNECTED) && (rt->rt_ifp == ifp 785 #if NBRIDGE > 0 786 || rt->rt_ifp->if_bridge == ifp->if_bridge 787 #endif 788 #if NCARP > 0 789 || (ifp->if_type == IFT_CARP && rt->rt_ifp == ifp->if_carpdev) || 790 (rt->rt_ifp->if_type == IFT_CARP && rt->rt_ifp->if_carpdev == ifp)|| 791 (ifp->if_type == IFT_CARP && rt->rt_ifp->if_type == IFT_CARP && 792 rt->rt_ifp->if_carpdev == ifp->if_carpdev) 793 #endif 794 )) { 795 rt_unref(rt); 796 return 1; 797 } 798 rt_unref(rt); 799 800 return 0; 801 } 802 803 /* 804 * Free an nd6 llinfo entry. 805 * Since the function would cause significant changes in the kernel, DO NOT 806 * make it global, unless you have a strong reason for the change, and are sure 807 * that the change is safe. 808 */ 809 static void 810 nd6_free(struct llentry *ln, int gc) 811 { 812 struct ifnet *ifp; 813 814 KASSERT(ln != NULL); 815 LLE_WLOCK_ASSERT(ln); 816 817 /* 818 * If the reason for the deletion is just garbage collection, 819 * and the neighbor is an active router, do not delete it. 820 * Instead, reset the GC timer using the router's lifetime. 821 * XXX: the check for ln_state should be redundant, 822 * but we intentionally keep it just in case. 823 */ 824 if (!ip6_forwarding && ln->ln_router && 825 ln->ln_state == ND6_LLINFO_STALE && gc) 826 { 827 if (ln->ln_expire > time_uptime) 828 nd6_llinfo_settimer(ln, 829 (ln->ln_expire - time_uptime) * hz); 830 else 831 nd6_llinfo_settimer(ln, nd6_gctimer * hz); 832 LLE_WUNLOCK(ln); 833 return; 834 } 835 836 ifp = ln->lle_tbl->llt_ifp; 837 838 if (ln->la_flags & LLE_VALID || gc) { 839 struct sockaddr_in6 sin6; 840 const char *lladdr; 841 842 sockaddr_in6_init(&sin6, &ln->r_l3addr.addr6, 0, 0, 0); 843 lladdr = ln->la_flags & LLE_VALID ? 844 (const char *)&ln->ll_addr : NULL; 845 rt_clonedmsg(RTM_DELETE, NULL, sin6tosa(&sin6), lladdr, ifp); 846 } 847 848 /* 849 * Save to unlock. We still hold an extra reference and will not 850 * free(9) in llentry_free() if someone else holds one as well. 851 */ 852 LLE_WUNLOCK(ln); 853 IF_AFDATA_LOCK(ifp); 854 LLE_WLOCK(ln); 855 856 lltable_free_entry(LLTABLE6(ifp), ln); 857 858 IF_AFDATA_UNLOCK(ifp); 859 } 860 861 /* 862 * Upper-layer reachability hint for Neighbor Unreachability Detection. 863 * 864 * XXX cost-effective methods? 865 */ 866 void 867 nd6_nud_hint(struct rtentry *rt) 868 { 869 struct llentry *ln; 870 struct ifnet *ifp; 871 872 if (rt == NULL) 873 return; 874 875 ifp = rt->rt_ifp; 876 ln = nd6_lookup(&(satocsin6(rt_getkey(rt)))->sin6_addr, ifp, true); 877 if (ln == NULL) 878 return; 879 880 if (ln->ln_state < ND6_LLINFO_REACHABLE) 881 goto done; 882 883 /* 884 * if we get upper-layer reachability confirmation many times, 885 * it is possible we have false information. 886 */ 887 ln->ln_byhint++; 888 if (ln->ln_byhint > nd6_maxnudhint) 889 goto done; 890 891 ln->ln_state = ND6_LLINFO_REACHABLE; 892 if (!ND6_LLINFO_PERMANENT(ln)) 893 nd6_llinfo_settimer(ln, ND_IFINFO(rt->rt_ifp)->reachable * hz); 894 895 done: 896 LLE_WUNLOCK(ln); 897 898 return; 899 } 900 901 struct gc_args { 902 int gc_entries; 903 const struct in6_addr *skip_in6; 904 }; 905 906 static int 907 nd6_purge_entry(struct lltable *llt, struct llentry *ln, void *farg) 908 { 909 struct gc_args *args = farg; 910 int *n = &args->gc_entries; 911 const struct in6_addr *skip_in6 = args->skip_in6; 912 913 if (*n <= 0) 914 return 0; 915 916 if (ND6_LLINFO_PERMANENT(ln)) 917 return 0; 918 919 if (IN6_ARE_ADDR_EQUAL(&ln->r_l3addr.addr6, skip_in6)) 920 return 0; 921 922 LLE_WLOCK(ln); 923 if (ln->ln_state > ND6_LLINFO_INCOMPLETE) 924 ln->ln_state = ND6_LLINFO_STALE; 925 else 926 ln->ln_state = ND6_LLINFO_PURGE; 927 nd6_llinfo_settimer(ln, 0); 928 LLE_WUNLOCK(ln); 929 930 (*n)--; 931 return 0; 932 } 933 934 static void 935 nd6_gc_neighbors(struct lltable *llt, const struct in6_addr *in6) 936 { 937 938 if (ip6_neighborgcthresh >= 0 && 939 lltable_get_entry_count(llt) >= ip6_neighborgcthresh) { 940 struct gc_args gc_args = {10, in6}; 941 /* 942 * XXX entries that are "less recently used" should be 943 * freed first. 944 */ 945 lltable_foreach_lle(llt, nd6_purge_entry, &gc_args); 946 } 947 } 948 949 void 950 nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info) 951 { 952 struct sockaddr *gate = rt->rt_gateway; 953 struct ifnet *ifp = rt->rt_ifp; 954 uint8_t namelen = strlen(ifp->if_xname), addrlen = ifp->if_addrlen; 955 struct ifaddr *ifa; 956 957 RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); 958 959 if (req == RTM_LLINFO_UPD) { 960 int rc; 961 struct in6_addr *in6; 962 struct in6_addr in6_all; 963 int anycast; 964 965 if ((ifa = info->rti_ifa) == NULL) 966 return; 967 968 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 969 anycast = ifatoia6(ifa)->ia6_flags & IN6_IFF_ANYCAST; 970 971 in6_all = in6addr_linklocal_allnodes; 972 if ((rc = in6_setscope(&in6_all, ifa->ifa_ifp, NULL)) != 0) { 973 log(LOG_ERR, "%s: failed to set scope %s " 974 "(errno=%d)\n", __func__, if_name(ifp), rc); 975 return; 976 } 977 978 /* XXX don't set Override for proxy addresses */ 979 nd6_na_output(ifa->ifa_ifp, &in6_all, in6, 980 (anycast ? 0 : ND_NA_FLAG_OVERRIDE) 981 #if 0 982 | (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0) 983 #endif 984 , 1, NULL); 985 return; 986 } 987 988 if ((rt->rt_flags & RTF_GATEWAY) != 0) { 989 if (req != RTM_ADD) 990 return; 991 /* 992 * linklayers with particular MTU limitation. 993 */ 994 switch(ifp->if_type) { 995 #if NARCNET > 0 996 case IFT_ARCNET: 997 if (rt->rt_rmx.rmx_mtu > ARC_PHDS_MAXMTU) /* RFC2497 */ 998 rt->rt_rmx.rmx_mtu = ARC_PHDS_MAXMTU; 999 break; 1000 #endif 1001 } 1002 return; 1003 } 1004 1005 if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) { 1006 RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); 1007 /* 1008 * This is probably an interface direct route for a link 1009 * which does not need neighbor caches (e.g. fe80::%lo0/64). 1010 * We do not need special treatment below for such a route. 1011 * Moreover, the RTF_LLINFO flag which would be set below 1012 * would annoy the ndp(8) command. 1013 */ 1014 return; 1015 } 1016 1017 switch (req) { 1018 case RTM_ADD: { 1019 struct psref psref; 1020 1021 RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); 1022 /* 1023 * There is no backward compatibility :) 1024 * 1025 * if ((rt->rt_flags & RTF_HOST) == 0 && 1026 * SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) 1027 * rt->rt_flags |= RTF_CLONING; 1028 */ 1029 /* XXX should move to route.c? */ 1030 if (rt->rt_flags & (RTF_CONNECTED | RTF_LOCAL)) { 1031 union { 1032 struct sockaddr sa; 1033 struct sockaddr_dl sdl; 1034 struct sockaddr_storage ss; 1035 } u; 1036 /* 1037 * Case 1: This route should come from a route to 1038 * interface (RTF_CLONING case) or the route should be 1039 * treated as on-link but is currently not 1040 * (RTF_LLINFO && ln == NULL case). 1041 */ 1042 if (sockaddr_dl_init(&u.sdl, sizeof(u.ss), 1043 ifp->if_index, ifp->if_type, 1044 NULL, namelen, NULL, addrlen) == NULL) { 1045 printf("%s.%d: sockaddr_dl_init(, %zu, ) " 1046 "failed on %s\n", __func__, __LINE__, 1047 sizeof(u.ss), if_name(ifp)); 1048 } 1049 rt_setgate(rt, &u.sa); 1050 gate = rt->rt_gateway; 1051 RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); 1052 if (gate == NULL) { 1053 log(LOG_ERR, 1054 "%s: rt_setgate failed on %s\n", __func__, 1055 if_name(ifp)); 1056 break; 1057 } 1058 1059 RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); 1060 if ((rt->rt_flags & RTF_CONNECTED) != 0) 1061 break; 1062 } 1063 RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); 1064 /* 1065 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here. 1066 * We don't do that here since llinfo is not ready yet. 1067 * 1068 * There are also couple of other things to be discussed: 1069 * - unsolicited NA code needs improvement beforehand 1070 * - RFC2461 says we MAY send multicast unsolicited NA 1071 * (7.2.6 paragraph 4), however, it also says that we 1072 * SHOULD provide a mechanism to prevent multicast NA storm. 1073 * we don't have anything like it right now. 1074 * note that the mechanism needs a mutual agreement 1075 * between proxies, which means that we need to implement 1076 * a new protocol, or a new kludge. 1077 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA. 1078 * we need to check ip6forwarding before sending it. 1079 * (or should we allow proxy ND configuration only for 1080 * routers? there's no mention about proxy ND from hosts) 1081 */ 1082 #if 0 1083 /* XXX it does not work */ 1084 if (rt->rt_flags & RTF_ANNOUNCE) 1085 nd6_na_output(ifp, 1086 &satocsin6(rt_getkey(rt))->sin6_addr, 1087 &satocsin6(rt_getkey(rt))->sin6_addr, 1088 ip6_forwarding ? ND_NA_FLAG_ROUTER : 0, 1089 1, NULL); 1090 #endif 1091 1092 if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) { 1093 RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); 1094 /* 1095 * Address resolution isn't necessary for a point to 1096 * point link, so we can skip this test for a p2p link. 1097 */ 1098 if (gate->sa_family != AF_LINK || 1099 gate->sa_len < 1100 sockaddr_dl_measure(namelen, addrlen)) { 1101 log(LOG_DEBUG, 1102 "nd6_rtrequest: bad gateway value: %s\n", 1103 if_name(ifp)); 1104 break; 1105 } 1106 satosdl(gate)->sdl_type = ifp->if_type; 1107 satosdl(gate)->sdl_index = ifp->if_index; 1108 RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); 1109 } 1110 RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt)); 1111 1112 /* 1113 * When called from rt_ifa_addlocal, we cannot depend on that 1114 * the address (rt_getkey(rt)) exits in the address list of the 1115 * interface. So check RTF_LOCAL instead. 1116 */ 1117 if (rt->rt_flags & RTF_LOCAL) { 1118 if (nd6_useloopback) 1119 rt->rt_ifp = lo0ifp; /* XXX */ 1120 break; 1121 } 1122 1123 /* 1124 * check if rt_getkey(rt) is an address assigned 1125 * to the interface. 1126 */ 1127 ifa = (struct ifaddr *)in6ifa_ifpwithaddr_psref(ifp, 1128 &satocsin6(rt_getkey(rt))->sin6_addr, &psref); 1129 if (ifa != NULL) { 1130 if (nd6_useloopback) { 1131 rt->rt_ifp = lo0ifp; /* XXX */ 1132 /* 1133 * Make sure rt_ifa be equal to the ifaddr 1134 * corresponding to the address. 1135 * We need this because when we refer 1136 * rt_ifa->ia6_flags in ip6_input, we assume 1137 * that the rt_ifa points to the address instead 1138 * of the loopback address. 1139 */ 1140 if (!ISSET(info->rti_flags, RTF_DONTCHANGEIFA) 1141 && ifa != rt->rt_ifa) 1142 rt_replace_ifa(rt, ifa); 1143 } 1144 } else if (rt->rt_flags & RTF_ANNOUNCE) { 1145 /* join solicited node multicast for proxy ND */ 1146 if (ifp->if_flags & IFF_MULTICAST) { 1147 struct in6_addr llsol; 1148 int error; 1149 1150 llsol = satocsin6(rt_getkey(rt))->sin6_addr; 1151 llsol.s6_addr32[0] = htonl(0xff020000); 1152 llsol.s6_addr32[1] = 0; 1153 llsol.s6_addr32[2] = htonl(1); 1154 llsol.s6_addr8[12] = 0xff; 1155 if (in6_setscope(&llsol, ifp, NULL)) 1156 goto out; 1157 if (!in6_addmulti(&llsol, ifp, &error, 0)) { 1158 char ip6buf[INET6_ADDRSTRLEN]; 1159 nd6log(LOG_ERR, "%s: failed to join " 1160 "%s (errno=%d)\n", if_name(ifp), 1161 IN6_PRINT(ip6buf, &llsol), error); 1162 } 1163 } 1164 } 1165 out: 1166 ifa_release(ifa, &psref); 1167 /* 1168 * If we have too many cache entries, initiate immediate 1169 * purging for some entries. 1170 */ 1171 if (rt->rt_ifp != NULL) 1172 nd6_gc_neighbors(LLTABLE6(rt->rt_ifp), NULL); 1173 break; 1174 } 1175 1176 case RTM_DELETE: 1177 /* leave from solicited node multicast for proxy ND */ 1178 if ((rt->rt_flags & RTF_ANNOUNCE) != 0 && 1179 (ifp->if_flags & IFF_MULTICAST) != 0) { 1180 struct in6_addr llsol; 1181 1182 llsol = satocsin6(rt_getkey(rt))->sin6_addr; 1183 llsol.s6_addr32[0] = htonl(0xff020000); 1184 llsol.s6_addr32[1] = 0; 1185 llsol.s6_addr32[2] = htonl(1); 1186 llsol.s6_addr8[12] = 0xff; 1187 if (in6_setscope(&llsol, ifp, NULL) == 0) 1188 in6_lookup_and_delete_multi(&llsol, ifp); 1189 } 1190 break; 1191 } 1192 } 1193 1194 static void 1195 nd6_setifflags(struct ifnet *ifp, uint32_t flags) 1196 { 1197 struct nd_kifinfo *ndi = ND_IFINFO(ifp); 1198 struct ifaddr *ifa; 1199 struct in6_ifaddr *ia; 1200 int s; 1201 1202 if (ndi->flags & ND6_IFF_IFDISABLED && !(flags & ND6_IFF_IFDISABLED)) { 1203 /* 1204 * If the interface is marked as ND6_IFF_IFDISABLED and 1205 * has a link-local address with IN6_IFF_DUPLICATED, 1206 * do not clear ND6_IFF_IFDISABLED. 1207 * See RFC 4862, section 5.4.5. 1208 */ 1209 bool duplicated_linklocal = false; 1210 1211 s = pserialize_read_enter(); 1212 IFADDR_READER_FOREACH(ifa, ifp) { 1213 if (ifa->ifa_addr->sa_family != AF_INET6) 1214 continue; 1215 ia = (struct in6_ifaddr *)ifa; 1216 if ((ia->ia6_flags & IN6_IFF_DUPLICATED) && 1217 IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) 1218 { 1219 duplicated_linklocal = true; 1220 break; 1221 } 1222 } 1223 pserialize_read_exit(s); 1224 1225 if (duplicated_linklocal) { 1226 flags |= ND6_IFF_IFDISABLED; 1227 log(LOG_ERR, "%s: Cannot enable an interface" 1228 " with a link-local address marked" 1229 " duplicate.\n", if_name(ifp)); 1230 } else { 1231 ndi->flags &= ~ND6_IFF_IFDISABLED; 1232 if (ifp->if_flags & IFF_UP) 1233 in6_if_up(ifp); 1234 } 1235 } else if (!(ndi->flags & ND6_IFF_IFDISABLED) && 1236 (flags & ND6_IFF_IFDISABLED)) 1237 { 1238 struct psref psref; 1239 int bound = curlwp_bind(); 1240 1241 /* Mark all IPv6 addresses as tentative. */ 1242 1243 ndi->flags |= ND6_IFF_IFDISABLED; 1244 s = pserialize_read_enter(); 1245 IFADDR_READER_FOREACH(ifa, ifp) { 1246 if (ifa->ifa_addr->sa_family != AF_INET6) 1247 continue; 1248 ifa_acquire(ifa, &psref); 1249 pserialize_read_exit(s); 1250 1251 nd6_dad_stop(ifa); 1252 1253 ia = (struct in6_ifaddr *)ifa; 1254 ia->ia6_flags |= IN6_IFF_TENTATIVE; 1255 1256 s = pserialize_read_enter(); 1257 ifa_release(ifa, &psref); 1258 } 1259 pserialize_read_exit(s); 1260 curlwp_bindx(bound); 1261 } 1262 1263 if (flags & ND6_IFF_AUTO_LINKLOCAL) { 1264 if (!(ndi->flags & ND6_IFF_AUTO_LINKLOCAL)) { 1265 /* auto_linklocal 0->1 transition */ 1266 1267 ndi->flags |= ND6_IFF_AUTO_LINKLOCAL; 1268 in6_ifattach(ifp, NULL); 1269 } else if (!(flags & ND6_IFF_IFDISABLED) && 1270 ifp->if_flags & IFF_UP) 1271 { 1272 /* 1273 * When the IF already has 1274 * ND6_IFF_AUTO_LINKLOCAL, no link-local 1275 * address is assigned, and IFF_UP, try to 1276 * assign one. 1277 */ 1278 bool haslinklocal = 0; 1279 1280 s = pserialize_read_enter(); 1281 IFADDR_READER_FOREACH(ifa, ifp) { 1282 if (ifa->ifa_addr->sa_family !=AF_INET6) 1283 continue; 1284 ia = (struct in6_ifaddr *)ifa; 1285 if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))){ 1286 haslinklocal = true; 1287 break; 1288 } 1289 } 1290 pserialize_read_exit(s); 1291 if (!haslinklocal) 1292 in6_ifattach(ifp, NULL); 1293 } 1294 } 1295 1296 ndi->flags = flags; 1297 } 1298 1299 int 1300 nd6_ioctl(u_long cmd, void *data, struct ifnet *ifp) 1301 { 1302 #ifdef OSIOCGIFINFO_IN6_90 1303 struct in6_ndireq90 *ondi = (struct in6_ndireq90 *)data; 1304 struct in6_ndifreq90 *ndif = (struct in6_ndifreq90 *)data; 1305 #define OND ondi->ndi 1306 #endif 1307 struct in6_ndireq *ndi = (struct in6_ndireq *)data; 1308 struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; 1309 struct nd_kifinfo *ifndi = ND_IFINFO(ifp); 1310 int error = 0; 1311 #define ND ndi->ndi 1312 1313 switch (cmd) { 1314 #ifdef OSIOCSRTRFLUSH_IN6 1315 case OSIOCGDRLST_IN6: /* FALLTHROUGH */ 1316 case OSIOCGPRLST_IN6: /* FALLTHROUGH */ 1317 case OSIOCSNDFLUSH_IN6: /* FALLTHROUGH */ 1318 case OSIOCSPFXFLUSH_IN6: /* FALLTHROUGH */ 1319 case OSIOCSRTRFLUSH_IN6: /* FALLTHROUGH */ 1320 break; 1321 case OSIOCGDEFIFACE_IN6: 1322 ndif->ifindex = 0; 1323 break; 1324 case OSIOCSDEFIFACE_IN6: 1325 error = ENOTSUP; 1326 break; 1327 #endif 1328 #ifdef OSIOCGIFINFO_IN6 1329 case OSIOCGIFINFO_IN6: /* FALLTHROUGH */ 1330 #endif 1331 #ifdef OSIOCGIFINFO_IN6_90 1332 case OSIOCGIFINFO_IN6_90: 1333 memset(&OND, 0, sizeof(OND)); 1334 OND.initialized = 1; 1335 OND.chlim = ifndi->chlim; 1336 OND.basereachable = ifndi->basereachable; 1337 OND.retrans = ifndi->retrans; 1338 OND.flags = ifndi->flags; 1339 break; 1340 case OSIOCSIFINFO_IN6_90: 1341 /* Allow userland to set Neighour Unreachability Detection 1342 * timers. */ 1343 if (OND.chlim != 0) 1344 ifndi->chlim = OND.chlim; 1345 if (OND.basereachable != 0 && 1346 OND.basereachable != ifndi->basereachable) 1347 { 1348 ifndi->basereachable = OND.basereachable; 1349 ifndi->reachable = ND_COMPUTE_RTIME(OND.basereachable); 1350 } 1351 if (OND.retrans != 0) 1352 ifndi->retrans = OND.retrans; 1353 /* Retain the old behaviour .... */ 1354 /* FALLTHROUGH */ 1355 case OSIOCSIFINFO_FLAGS_90: 1356 nd6_setifflags(ifp, OND.flags); 1357 break; 1358 #undef OND 1359 #endif 1360 case SIOCGIFINFO_IN6: 1361 ND.chlim = ifndi->chlim; 1362 ND.basereachable = ifndi->basereachable; 1363 ND.retrans = ifndi->retrans; 1364 ND.flags = ifndi->flags; 1365 break; 1366 case SIOCSIFINFO_IN6: 1367 /* Allow userland to set Neighour Unreachability Detection 1368 * timers. */ 1369 if (ND.chlim != 0) 1370 ifndi->chlim = ND.chlim; 1371 if (ND.basereachable != 0 && 1372 ND.basereachable != ifndi->basereachable) 1373 { 1374 ifndi->basereachable = ND.basereachable; 1375 ifndi->reachable = ND_COMPUTE_RTIME(ND.basereachable); 1376 } 1377 if (ND.retrans != 0) 1378 ifndi->retrans = ND.retrans; 1379 break; 1380 case SIOCSIFINFO_FLAGS: 1381 nd6_setifflags(ifp, ND.flags); 1382 break; 1383 #undef ND 1384 case SIOCGNBRINFO_IN6: 1385 { 1386 struct llentry *ln; 1387 struct in6_addr nb_addr = nbi->addr; /* make local for safety */ 1388 1389 if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) 1390 return error; 1391 1392 ln = nd6_lookup(&nb_addr, ifp, false); 1393 if (ln == NULL) { 1394 error = EINVAL; 1395 break; 1396 } 1397 nbi->state = ln->ln_state; 1398 nbi->asked = ln->ln_asked; 1399 nbi->isrouter = ln->ln_router; 1400 nbi->expire = ln->ln_expire ? 1401 time_mono_to_wall(ln->ln_expire) : 0; 1402 LLE_RUNLOCK(ln); 1403 1404 break; 1405 } 1406 } 1407 return error; 1408 } 1409 1410 void 1411 nd6_llinfo_release_pkts(struct llentry *ln, struct ifnet *ifp) 1412 { 1413 struct mbuf *m_hold, *m_hold_next; 1414 struct sockaddr_in6 sin6; 1415 1416 LLE_WLOCK_ASSERT(ln); 1417 1418 sockaddr_in6_init(&sin6, &ln->r_l3addr.addr6, 0, 0, 0); 1419 1420 m_hold = ln->la_hold, ln->la_hold = NULL, ln->la_numheld = 0; 1421 1422 LLE_WUNLOCK(ln); 1423 for (; m_hold != NULL; m_hold = m_hold_next) { 1424 m_hold_next = m_hold->m_nextpkt; 1425 m_hold->m_nextpkt = NULL; 1426 1427 /* 1428 * we assume ifp is not a p2p here, so 1429 * just set the 2nd argument as the 1430 * 1st one. 1431 */ 1432 ip6_if_output(ifp, ifp, m_hold, &sin6, NULL); 1433 } 1434 LLE_WLOCK(ln); 1435 } 1436 1437 /* 1438 * Create neighbor cache entry and cache link-layer address, 1439 * on reception of inbound ND6 packets. (RS/RA/NS/redirect) 1440 */ 1441 void 1442 nd6_cache_lladdr( 1443 struct ifnet *ifp, 1444 struct in6_addr *from, 1445 char *lladdr, 1446 int lladdrlen, 1447 int type, /* ICMP6 type */ 1448 int code /* type dependent information */ 1449 ) 1450 { 1451 struct llentry *ln = NULL; 1452 int is_newentry; 1453 int do_update; 1454 int olladdr; 1455 int llchange; 1456 int newstate = 0; 1457 1458 KASSERT(ifp != NULL); 1459 KASSERT(from != NULL); 1460 1461 /* nothing must be updated for unspecified address */ 1462 if (IN6_IS_ADDR_UNSPECIFIED(from)) 1463 return; 1464 1465 /* 1466 * Validation about ifp->if_addrlen and lladdrlen must be done in 1467 * the caller. 1468 * 1469 * XXX If the link does not have link-layer adderss, what should 1470 * we do? (ifp->if_addrlen == 0) 1471 * Spec says nothing in sections for RA, RS and NA. There's small 1472 * description on it in NS section (RFC 2461 7.2.3). 1473 */ 1474 1475 ln = nd6_lookup(from, ifp, true); 1476 if (ln == NULL) { 1477 #if 0 1478 /* nothing must be done if there's no lladdr */ 1479 if (!lladdr || !lladdrlen) 1480 return NULL; 1481 #endif 1482 1483 ln = nd6_create(from, ifp); 1484 is_newentry = 1; 1485 } else { 1486 /* do nothing if static ndp is set */ 1487 if (ln->la_flags & LLE_STATIC) { 1488 LLE_WUNLOCK(ln); 1489 return; 1490 } 1491 is_newentry = 0; 1492 } 1493 1494 if (ln == NULL) 1495 return; 1496 1497 olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; 1498 if (olladdr && lladdr) { 1499 llchange = memcmp(lladdr, &ln->ll_addr, ifp->if_addrlen); 1500 } else 1501 llchange = 0; 1502 1503 /* 1504 * newentry olladdr lladdr llchange (*=record) 1505 * 0 n n -- (1) 1506 * 0 y n -- (2) 1507 * 0 n y -- (3) * STALE 1508 * 0 y y n (4) * 1509 * 0 y y y (5) * STALE 1510 * 1 -- n -- (6) NOSTATE(= PASSIVE) 1511 * 1 -- y -- (7) * STALE 1512 */ 1513 1514 if (lladdr) { /* (3-5) and (7) */ 1515 /* 1516 * Record source link-layer address 1517 * XXX is it dependent to ifp->if_type? 1518 */ 1519 memcpy(&ln->ll_addr, lladdr, ifp->if_addrlen); 1520 ln->la_flags |= LLE_VALID; 1521 } 1522 1523 if (!is_newentry) { 1524 if ((!olladdr && lladdr) || /* (3) */ 1525 (olladdr && lladdr && llchange)) { /* (5) */ 1526 do_update = 1; 1527 newstate = ND6_LLINFO_STALE; 1528 } else /* (1-2,4) */ 1529 do_update = 0; 1530 } else { 1531 do_update = 1; 1532 if (lladdr == NULL) /* (6) */ 1533 newstate = ND6_LLINFO_NOSTATE; 1534 else /* (7) */ 1535 newstate = ND6_LLINFO_STALE; 1536 } 1537 1538 if (do_update) { 1539 /* 1540 * Update the state of the neighbor cache. 1541 */ 1542 ln->ln_state = newstate; 1543 1544 if (ln->ln_state == ND6_LLINFO_STALE) { 1545 /* 1546 * XXX: since nd6_output() below will cause 1547 * state tansition to DELAY and reset the timer, 1548 * we must set the timer now, although it is actually 1549 * meaningless. 1550 */ 1551 nd6_llinfo_settimer(ln, nd6_gctimer * hz); 1552 1553 nd6_llinfo_release_pkts(ln, ifp); 1554 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { 1555 /* probe right away */ 1556 nd6_llinfo_settimer((void *)ln, 0); 1557 } 1558 } 1559 1560 /* 1561 * ICMP6 type dependent behavior. 1562 * 1563 * NS: clear IsRouter if new entry 1564 * RS: clear IsRouter 1565 * RA: set IsRouter if there's lladdr 1566 * redir: clear IsRouter if new entry 1567 * 1568 * RA case, (1): 1569 * The spec says that we must set IsRouter in the following cases: 1570 * - If lladdr exist, set IsRouter. This means (1-5). 1571 * - If it is old entry (!newentry), set IsRouter. This means (7). 1572 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. 1573 * A quetion arises for (1) case. (1) case has no lladdr in the 1574 * neighbor cache, this is similar to (6). 1575 * This case is rare but we figured that we MUST NOT set IsRouter. 1576 * 1577 * newentry olladdr lladdr llchange NS RS RA redir 1578 * D R 1579 * 0 n n -- (1) c ? s 1580 * 0 y n -- (2) c s s 1581 * 0 n y -- (3) c s s 1582 * 0 y y n (4) c s s 1583 * 0 y y y (5) c s s 1584 * 1 -- n -- (6) c c c s 1585 * 1 -- y -- (7) c c s c s 1586 * 1587 * (c=clear s=set) 1588 */ 1589 switch (type & 0xff) { 1590 case ND_NEIGHBOR_SOLICIT: 1591 /* 1592 * New entry must have is_router flag cleared. 1593 */ 1594 if (is_newentry) /* (6-7) */ 1595 ln->ln_router = 0; 1596 break; 1597 case ND_REDIRECT: 1598 /* 1599 * If the icmp is a redirect to a better router, always set the 1600 * is_router flag. Otherwise, if the entry is newly created, 1601 * clear the flag. [RFC 2461, sec 8.3] 1602 */ 1603 if (code == ND_REDIRECT_ROUTER) 1604 ln->ln_router = 1; 1605 else if (is_newentry) /* (6-7) */ 1606 ln->ln_router = 0; 1607 break; 1608 case ND_ROUTER_SOLICIT: 1609 /* 1610 * is_router flag must always be cleared. 1611 */ 1612 ln->ln_router = 0; 1613 break; 1614 case ND_ROUTER_ADVERT: 1615 /* 1616 * Mark an entry with lladdr as a router. 1617 */ 1618 if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */ 1619 (is_newentry && lladdr)) { /* (7) */ 1620 ln->ln_router = 1; 1621 } 1622 break; 1623 } 1624 1625 if (do_update && lladdr != NULL) { 1626 struct sockaddr_in6 sin6; 1627 1628 sockaddr_in6_init(&sin6, from, 0, 0, 0); 1629 rt_clonedmsg(is_newentry ? RTM_ADD : RTM_CHANGE, 1630 NULL, sin6tosa(&sin6), lladdr, ifp); 1631 } 1632 1633 if (ln != NULL) 1634 LLE_WUNLOCK(ln); 1635 1636 /* 1637 * If we have too many cache entries, initiate immediate 1638 * purging for some entries. 1639 */ 1640 if (is_newentry) 1641 nd6_gc_neighbors(LLTABLE6(ifp), &ln->r_l3addr.addr6); 1642 } 1643 1644 static void 1645 nd6_slowtimo(void *ignored_arg) 1646 { 1647 struct nd_kifinfo *ndi; 1648 struct ifnet *ifp; 1649 int s; 1650 1651 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); 1652 callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, 1653 nd6_slowtimo, NULL); 1654 1655 s = pserialize_read_enter(); 1656 IFNET_READER_FOREACH(ifp) { 1657 ndi = ND_IFINFO(ifp); 1658 if (ndi->basereachable && /* already initialized */ 1659 (ndi->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { 1660 /* 1661 * Since reachable time rarely changes by router 1662 * advertisements, we SHOULD insure that a new random 1663 * value gets recomputed at least once every few hours. 1664 * (RFC 2461, 6.3.4) 1665 */ 1666 ndi->recalctm = nd6_recalc_reachtm_interval; 1667 ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable); 1668 } 1669 } 1670 pserialize_read_exit(s); 1671 1672 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 1673 } 1674 1675 /* 1676 * Return 0 if a neighbor cache is found. Return EWOULDBLOCK if a cache is not 1677 * found and trying to resolve a neighbor; in this case the mbuf is queued in 1678 * the list. Otherwise return errno after freeing the mbuf. 1679 */ 1680 int 1681 nd6_resolve(struct ifnet *ifp, const struct rtentry *rt, struct mbuf *m, 1682 const struct sockaddr *_dst, uint8_t *lldst, size_t dstsize) 1683 { 1684 struct llentry *ln = NULL; 1685 bool created = false; 1686 const struct sockaddr_in6 *dst = satocsin6(_dst); 1687 int error; 1688 struct nd_kifinfo *ndi = ND_IFINFO(ifp); 1689 1690 /* discard the packet if IPv6 operation is disabled on the interface */ 1691 if (ndi->flags & ND6_IFF_IFDISABLED) { 1692 m_freem(m); 1693 return ENETDOWN; /* better error? */ 1694 } 1695 1696 /* 1697 * Address resolution or Neighbor Unreachability Detection 1698 * for the next hop. 1699 * At this point, the destination of the packet must be a unicast 1700 * or an anycast address(i.e. not a multicast). 1701 */ 1702 1703 /* Look up the neighbor cache for the nexthop */ 1704 ln = nd6_lookup(&dst->sin6_addr, ifp, false); 1705 1706 if (ln != NULL && (ln->la_flags & LLE_VALID) != 0 && 1707 ln->ln_state == ND6_LLINFO_REACHABLE) { 1708 /* Fast path */ 1709 memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen)); 1710 LLE_RUNLOCK(ln); 1711 return 0; 1712 } 1713 if (ln != NULL) 1714 LLE_RUNLOCK(ln); 1715 1716 /* Slow path */ 1717 ln = nd6_lookup(&dst->sin6_addr, ifp, true); 1718 if (ln == NULL && nd6_is_addr_neighbor(dst, ifp)) { 1719 /* 1720 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), 1721 * the condition below is not very efficient. But we believe 1722 * it is tolerable, because this should be a rare case. 1723 */ 1724 ln = nd6_create(&dst->sin6_addr, ifp); 1725 if (ln == NULL) { 1726 char ip6buf[INET6_ADDRSTRLEN]; 1727 log(LOG_DEBUG, 1728 "%s: can't allocate llinfo for %s " 1729 "(ln=%p, rt=%p)\n", __func__, 1730 IN6_PRINT(ip6buf, &dst->sin6_addr), ln, rt); 1731 m_freem(m); 1732 return ENOBUFS; 1733 } 1734 created = true; 1735 } 1736 1737 if (ln == NULL) { 1738 m_freem(m); 1739 return ENETDOWN; /* better error? */ 1740 } 1741 1742 LLE_WLOCK_ASSERT(ln); 1743 1744 /* We don't have to do link-layer address resolution on a p2p link. */ 1745 if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && 1746 ln->ln_state < ND6_LLINFO_REACHABLE) { 1747 ln->ln_state = ND6_LLINFO_STALE; 1748 nd6_llinfo_settimer(ln, nd6_gctimer * hz); 1749 } 1750 1751 /* 1752 * The first time we send a packet to a neighbor whose entry is 1753 * STALE, we have to change the state to DELAY and a sets a timer to 1754 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do 1755 * neighbor unreachability detection on expiration. 1756 * (RFC 2461 7.3.3) 1757 */ 1758 if (ln->ln_state == ND6_LLINFO_STALE) { 1759 ln->ln_asked = 0; 1760 ln->ln_state = ND6_LLINFO_DELAY; 1761 nd6_llinfo_settimer(ln, nd6_delay * hz); 1762 } 1763 1764 /* 1765 * If the neighbor cache entry has a state other than INCOMPLETE 1766 * (i.e. its link-layer address is already resolved), just 1767 * send the packet. 1768 */ 1769 if (ln->ln_state > ND6_LLINFO_INCOMPLETE) { 1770 KASSERT((ln->la_flags & LLE_VALID) != 0); 1771 memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen)); 1772 LLE_WUNLOCK(ln); 1773 return 0; 1774 } 1775 1776 /* 1777 * There is a neighbor cache entry, but no ethernet address 1778 * response yet. Append this latest packet to the end of the 1779 * packet queue in the mbuf, unless the number of the packet 1780 * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, 1781 * the oldest packet in the queue will be removed. 1782 */ 1783 if (ln->ln_state == ND6_LLINFO_NOSTATE || 1784 ln->ln_state == ND6_LLINFO_WAITDELETE) 1785 ln->ln_state = ND6_LLINFO_INCOMPLETE; 1786 if (ln->ln_hold) { 1787 struct mbuf *m_hold; 1788 int i; 1789 1790 i = 0; 1791 for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) { 1792 i++; 1793 if (m_hold->m_nextpkt == NULL) { 1794 m_hold->m_nextpkt = m; 1795 break; 1796 } 1797 } 1798 while (i >= nd6_maxqueuelen) { 1799 m_hold = ln->ln_hold; 1800 ln->ln_hold = ln->ln_hold->m_nextpkt; 1801 m_freem(m_hold); 1802 i--; 1803 } 1804 } else { 1805 ln->ln_hold = m; 1806 } 1807 1808 if (ln->ln_asked >= nd6_mmaxtries) 1809 error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ? 1810 EHOSTUNREACH : EHOSTDOWN; 1811 else 1812 error = EWOULDBLOCK; 1813 1814 /* 1815 * If there has been no NS for the neighbor after entering the 1816 * INCOMPLETE state, send the first solicitation. 1817 */ 1818 if (!ND6_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) { 1819 struct in6_addr src, *psrc; 1820 1821 ln->ln_asked++; 1822 nd6_llinfo_settimer(ln, ndi->retrans * hz / 1000); 1823 psrc = nd6_llinfo_get_holdsrc(ln, &src); 1824 LLE_WUNLOCK(ln); 1825 nd6_ns_output(ifp, NULL, &dst->sin6_addr, psrc, NULL); 1826 } else 1827 LLE_WUNLOCK(ln); 1828 1829 if (created) 1830 nd6_gc_neighbors(LLTABLE6(ifp), &dst->sin6_addr); 1831 1832 return error; 1833 } 1834 1835 int 1836 nd6_need_cache(struct ifnet *ifp) 1837 { 1838 /* 1839 * XXX: we currently do not make neighbor cache on any interface 1840 * other than ARCnet, Ethernet, and GIF. 1841 * 1842 * RFC2893 says: 1843 * - unidirectional tunnels needs no ND 1844 */ 1845 switch (ifp->if_type) { 1846 case IFT_ARCNET: 1847 case IFT_ETHER: 1848 case IFT_IEEE1394: 1849 case IFT_CARP: 1850 case IFT_GIF: /* XXX need more cases? */ 1851 case IFT_PPP: 1852 case IFT_TUNNEL: 1853 return 1; 1854 default: 1855 return 0; 1856 } 1857 } 1858 1859 static void 1860 clear_llinfo_pqueue(struct llentry *ln) 1861 { 1862 struct mbuf *m_hold, *m_hold_next; 1863 1864 for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold_next) { 1865 m_hold_next = m_hold->m_nextpkt; 1866 m_hold->m_nextpkt = NULL; 1867 m_freem(m_hold); 1868 } 1869 1870 ln->ln_hold = NULL; 1871 return; 1872 } 1873 1874 int 1875 nd6_sysctl( 1876 int name, 1877 void *oldp, /* syscall arg, need copyout */ 1878 size_t *oldlenp, 1879 void *newp, /* syscall arg, need copyin */ 1880 size_t newlen 1881 ) 1882 { 1883 1884 if (newp) 1885 return EPERM; 1886 1887 switch (name) { 1888 #ifdef COMPAT_90 1889 case OICMPV6CTL_ND6_DRLIST: /* FALLTHROUGH */ 1890 case OICMPV6CTL_ND6_PRLIST: 1891 *oldlenp = 0; 1892 return 0; 1893 #endif 1894 case ICMPV6CTL_ND6_MAXQLEN: 1895 return 0; 1896 default: 1897 return ENOPROTOOPT; 1898 } 1899 } 1900