1 /* $NetBSD: route.c,v 1.236 2022/12/22 13:54:57 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.236 2022/12/22 13:54:57 riastradh Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/kernel.h> 114 #include <sys/ioctl.h> 115 #include <sys/pool.h> 116 #include <sys/kauth.h> 117 #include <sys/workqueue.h> 118 #include <sys/syslog.h> 119 #include <sys/rwlock.h> 120 #include <sys/mutex.h> 121 #include <sys/cpu.h> 122 #include <sys/kmem.h> 123 124 #include <net/if.h> 125 #include <net/if_dl.h> 126 #include <net/route.h> 127 #if defined(INET) || defined(INET6) 128 #include <net/if_llatbl.h> 129 #endif 130 131 #include <netinet/in.h> 132 #include <netinet/in_var.h> 133 134 #define PRESERVED_RTF (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK) 135 136 #ifdef RTFLUSH_DEBUG 137 #define rtcache_debug() __predict_false(_rtcache_debug) 138 #else /* RTFLUSH_DEBUG */ 139 #define rtcache_debug() 0 140 #endif /* RTFLUSH_DEBUG */ 141 142 #ifdef RT_DEBUG 143 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 144 __func__, __LINE__, (rt), (rt)->rt_refcnt) 145 #else 146 #define RT_REFCNT_TRACE(rt) do {} while (0) 147 #endif 148 149 #ifdef RT_DEBUG 150 #define dlog(level, fmt, args...) log(level, fmt, ##args) 151 #else 152 #define dlog(level, fmt, args...) do {} while (0) 153 #endif 154 155 struct rtstat rtstat; 156 157 static int rttrash; /* routes not in table but not freed */ 158 159 static struct pool rtentry_pool; 160 static struct pool rttimer_pool; 161 162 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 163 static struct workqueue *rt_timer_wq; 164 static struct work rt_timer_wk; 165 166 static void rt_timer_init(void); 167 static void rt_timer_queue_remove_all(struct rttimer_queue *); 168 static void rt_timer_remove_all(struct rtentry *); 169 static void rt_timer_timer(void *); 170 171 /* 172 * Locking notes: 173 * - The routing table is protected by a global rwlock 174 * - API: RT_RLOCK and friends 175 * - rtcaches are NOT protected by the framework 176 * - Callers must guarantee a rtcache isn't accessed simultaneously 177 * - How the constraint is guaranteed in the wild 178 * - Protect a rtcache by a mutex (e.g., inp_route) 179 * - Make rtcache per-CPU and allow only accesses from softint 180 * (e.g., ipforward_rt_percpu) 181 * - References to a rtentry is managed by reference counting and psref 182 * - Reference counting is used for temporal reference when a rtentry 183 * is fetched from the routing table 184 * - psref is used for temporal reference when a rtentry is fetched 185 * from a rtcache 186 * - struct route (rtcache) has struct psref, so we cannot obtain 187 * a reference twice on the same struct route 188 * - Before destroying or updating a rtentry, we have to wait for 189 * all references left (see below for details) 190 * - APIs 191 * - An obtained rtentry via rtalloc1 or rtrequest* must be 192 * unreferenced by rt_unref 193 * - An obtained rtentry via rtcache_* must be unreferenced by 194 * rtcache_unref 195 * - TODO: once we get a lockless routing table, we should use only 196 * psref for rtentries 197 * - rtentry destruction 198 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 199 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 200 * has a responsibility to destroy the rtentry by itself by calling 201 * rt_free 202 * - If not, rtrequest itself does that 203 * - If rt_free is called in softint, the actual destruction routine is 204 * deferred to a workqueue 205 * - rtentry update 206 * - When updating a rtentry, RTF_UPDATING flag is set 207 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 208 * the routing table or a rtcache results in either of the following 209 * cases: 210 * - if the caller runs in softint, the caller fails to fetch 211 * - otherwise, the caller waits for the update completed and retries 212 * to fetch (probably succeed to fetch for the second time) 213 * - rtcache invalidation 214 * - There is a global generation counter that is incremented when 215 * any routes have been added or deleted 216 * - When a rtcache caches a rtentry into itself, it also stores 217 * a snapshot of the generation counter 218 * - If the snapshot equals to the global counter, the cache is valid, 219 * otherwise the cache is invalidated 220 */ 221 222 /* 223 * Global lock for the routing table. 224 */ 225 static krwlock_t rt_lock __cacheline_aligned; 226 #ifdef NET_MPSAFE 227 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 228 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 229 #define RT_UNLOCK() rw_exit(&rt_lock) 230 #define RT_WLOCKED() rw_write_held(&rt_lock) 231 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 232 #else 233 #define RT_RLOCK() do {} while (0) 234 #define RT_WLOCK() do {} while (0) 235 #define RT_UNLOCK() do {} while (0) 236 #define RT_WLOCKED() true 237 #define RT_ASSERT_WLOCK() do {} while (0) 238 #endif 239 240 static uint64_t rtcache_generation; 241 242 /* 243 * mutex and cv that are used to wait for references to a rtentry left 244 * before updating the rtentry. 245 */ 246 static struct { 247 kmutex_t lock; 248 kcondvar_t cv; 249 bool ongoing; 250 const struct lwp *lwp; 251 } rt_update_global __cacheline_aligned; 252 253 /* 254 * A workqueue and stuff that are used to defer the destruction routine 255 * of rtentries. 256 */ 257 static struct { 258 struct workqueue *wq; 259 struct work wk; 260 kmutex_t lock; 261 SLIST_HEAD(, rtentry) queue; 262 bool enqueued; 263 } rt_free_global __cacheline_aligned; 264 265 /* psref for rtentry */ 266 static struct psref_class *rt_psref_class __read_mostly; 267 268 #ifdef RTFLUSH_DEBUG 269 static int _rtcache_debug = 0; 270 #endif /* RTFLUSH_DEBUG */ 271 272 static kauth_listener_t route_listener; 273 274 static int rtdeletemsg(struct rtentry *); 275 276 static void rt_maskedcopy(const struct sockaddr *, 277 struct sockaddr *, const struct sockaddr *); 278 279 static void rtcache_invalidate(void); 280 281 static void rt_ref(struct rtentry *); 282 283 static struct rtentry * 284 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 285 286 static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *); 287 static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *); 288 static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *, 289 const struct sockaddr *, struct psref *); 290 291 static void rtcache_ref(struct rtentry *, struct route *); 292 293 #ifdef NET_MPSAFE 294 static void rt_update_wait(void); 295 #endif 296 297 static bool rt_wait_ok(void); 298 static void rt_wait_refcnt(const char *, struct rtentry *, int); 299 static void rt_wait_psref(struct rtentry *); 300 301 #ifdef DDB 302 static void db_print_sa(const struct sockaddr *); 303 static void db_print_ifa(struct ifaddr *); 304 static int db_show_rtentry(struct rtentry *, void *); 305 #endif 306 307 #ifdef RTFLUSH_DEBUG 308 static void sysctl_net_rtcache_setup(struct sysctllog **); 309 static void 310 sysctl_net_rtcache_setup(struct sysctllog **clog) 311 { 312 const struct sysctlnode *rnode; 313 314 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 315 CTLTYPE_NODE, 316 "rtcache", SYSCTL_DESCR("Route cache related settings"), 317 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 318 return; 319 if (sysctl_createv(clog, 0, &rnode, &rnode, 320 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 321 "debug", SYSCTL_DESCR("Debug route caches"), 322 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 323 return; 324 } 325 #endif /* RTFLUSH_DEBUG */ 326 327 static inline void 328 rt_destroy(struct rtentry *rt) 329 { 330 if (rt->_rt_key != NULL) 331 sockaddr_free(rt->_rt_key); 332 if (rt->rt_gateway != NULL) 333 sockaddr_free(rt->rt_gateway); 334 if (rt_gettag(rt) != NULL) 335 sockaddr_free(rt_gettag(rt)); 336 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 337 } 338 339 static inline const struct sockaddr * 340 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 341 { 342 if (rt->_rt_key == key) 343 goto out; 344 345 if (rt->_rt_key != NULL) 346 sockaddr_free(rt->_rt_key); 347 rt->_rt_key = sockaddr_dup(key, flags); 348 out: 349 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 350 return rt->_rt_key; 351 } 352 353 struct ifaddr * 354 rt_get_ifa(struct rtentry *rt) 355 { 356 struct ifaddr *ifa; 357 358 ifa = rt->rt_ifa; 359 if (ifa->ifa_getifa == NULL) 360 return ifa; 361 #if 0 362 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 363 return ifa; 364 #endif 365 else { 366 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 367 if (ifa == NULL) 368 return NULL; 369 rt_replace_ifa(rt, ifa); 370 return ifa; 371 } 372 } 373 374 static void 375 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 376 { 377 rt->rt_ifa = ifa; 378 if (ifa->ifa_seqno != NULL) 379 rt->rt_ifa_seqno = *ifa->ifa_seqno; 380 } 381 382 /* 383 * Is this route the connected route for the ifa? 384 */ 385 static int 386 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 387 { 388 const struct sockaddr *key, *dst, *odst; 389 struct sockaddr_storage maskeddst; 390 391 key = rt_getkey(rt); 392 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 393 if (dst == NULL || 394 dst->sa_family != key->sa_family || 395 dst->sa_len != key->sa_len) 396 return 0; 397 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 398 odst = dst; 399 dst = (struct sockaddr *)&maskeddst; 400 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 401 ifa->ifa_netmask); 402 } 403 return (memcmp(dst, key, dst->sa_len) == 0); 404 } 405 406 void 407 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 408 { 409 struct ifaddr *old; 410 411 if (rt->rt_ifa == ifa) 412 return; 413 414 if (rt->rt_ifa != ifa && 415 rt->rt_ifa->ifa_flags & IFA_ROUTE && 416 rt_ifa_connected(rt, rt->rt_ifa)) 417 { 418 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 419 "replace deleted IFA_ROUTE\n", 420 (void *)rt->_rt_key, (void *)rt->rt_ifa); 421 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 422 if (rt_ifa_connected(rt, ifa)) { 423 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 424 "replace added IFA_ROUTE\n", 425 (void *)rt->_rt_key, (void *)ifa); 426 ifa->ifa_flags |= IFA_ROUTE; 427 } 428 } 429 430 ifaref(ifa); 431 old = rt->rt_ifa; 432 rt_set_ifa1(rt, ifa); 433 ifafree(old); 434 } 435 436 static void 437 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 438 { 439 ifaref(ifa); 440 rt_set_ifa1(rt, ifa); 441 } 442 443 static int 444 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 445 void *arg0, void *arg1, void *arg2, void *arg3) 446 { 447 struct rt_msghdr *rtm; 448 int result; 449 450 result = KAUTH_RESULT_DEFER; 451 rtm = arg1; 452 453 if (action != KAUTH_NETWORK_ROUTE) 454 return result; 455 456 if (rtm->rtm_type == RTM_GET) 457 result = KAUTH_RESULT_ALLOW; 458 459 return result; 460 } 461 462 static void rt_free_work(struct work *, void *); 463 464 void 465 rt_init(void) 466 { 467 int error; 468 469 #ifdef RTFLUSH_DEBUG 470 sysctl_net_rtcache_setup(NULL); 471 #endif 472 473 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 474 SLIST_INIT(&rt_free_global.queue); 475 rt_free_global.enqueued = false; 476 477 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 478 479 error = workqueue_create(&rt_free_global.wq, "rt_free", 480 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 481 if (error) 482 panic("%s: workqueue_create failed (%d)\n", __func__, error); 483 484 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 485 cv_init(&rt_update_global.cv, "rt_update"); 486 487 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 488 NULL, IPL_SOFTNET); 489 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 490 NULL, IPL_SOFTNET); 491 492 rn_init(); /* initialize all zeroes, all ones, mask table */ 493 rtbl_init(); 494 495 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 496 route_listener_cb, NULL); 497 } 498 499 static void 500 rtcache_invalidate(void) 501 { 502 503 RT_ASSERT_WLOCK(); 504 505 if (rtcache_debug()) 506 printf("%s: enter\n", __func__); 507 508 rtcache_generation++; 509 } 510 511 #ifdef RT_DEBUG 512 static void 513 dump_rt(const struct rtentry *rt) 514 { 515 char buf[512]; 516 517 log(LOG_DEBUG, "rt: "); 518 log(LOG_DEBUG, "p=%p ", rt); 519 if (rt->_rt_key == NULL) { 520 log(LOG_DEBUG, "dst=(NULL) "); 521 } else { 522 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 523 log(LOG_DEBUG, "dst=%s ", buf); 524 } 525 if (rt->rt_gateway == NULL) { 526 log(LOG_DEBUG, "gw=(NULL) "); 527 } else { 528 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 529 log(LOG_DEBUG, "gw=%s ", buf); 530 } 531 log(LOG_DEBUG, "flags=%x ", rt->rt_flags); 532 if (rt->rt_ifp == NULL) { 533 log(LOG_DEBUG, "if=(NULL) "); 534 } else { 535 log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname); 536 } 537 log(LOG_DEBUG, "\n"); 538 } 539 #endif /* RT_DEBUG */ 540 541 /* 542 * Packet routing routines. If success, refcnt of a returned rtentry 543 * will be incremented. The caller has to rtfree it by itself. 544 */ 545 struct rtentry * 546 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 547 bool wlock) 548 { 549 rtbl_t *rtbl; 550 struct rtentry *rt; 551 int s; 552 553 #ifdef NET_MPSAFE 554 retry: 555 #endif 556 s = splsoftnet(); 557 rtbl = rt_gettable(dst->sa_family); 558 if (rtbl == NULL) 559 goto miss; 560 561 rt = rt_matchaddr(rtbl, dst); 562 if (rt == NULL) 563 goto miss; 564 565 if (!ISSET(rt->rt_flags, RTF_UP)) 566 goto miss; 567 568 #ifdef NET_MPSAFE 569 if (ISSET(rt->rt_flags, RTF_UPDATING) && 570 /* XXX updater should be always able to acquire */ 571 curlwp != rt_update_global.lwp) { 572 if (!wait_ok || !rt_wait_ok()) 573 goto miss; 574 RT_UNLOCK(); 575 splx(s); 576 577 /* We can wait until the update is complete */ 578 rt_update_wait(); 579 580 if (wlock) 581 RT_WLOCK(); 582 else 583 RT_RLOCK(); 584 goto retry; 585 } 586 #endif /* NET_MPSAFE */ 587 588 rt_ref(rt); 589 RT_REFCNT_TRACE(rt); 590 591 splx(s); 592 return rt; 593 miss: 594 rtstat.rts_unreach++; 595 if (report) { 596 struct rt_addrinfo info; 597 598 memset(&info, 0, sizeof(info)); 599 info.rti_info[RTAX_DST] = dst; 600 rt_missmsg(RTM_MISS, &info, 0, 0); 601 } 602 splx(s); 603 return NULL; 604 } 605 606 struct rtentry * 607 rtalloc1(const struct sockaddr *dst, int report) 608 { 609 struct rtentry *rt; 610 611 RT_RLOCK(); 612 rt = rtalloc1_locked(dst, report, true, false); 613 RT_UNLOCK(); 614 615 return rt; 616 } 617 618 static void 619 rt_ref(struct rtentry *rt) 620 { 621 622 KASSERTMSG(rt->rt_refcnt >= 0, "rt_refcnt=%d", rt->rt_refcnt); 623 atomic_inc_uint(&rt->rt_refcnt); 624 } 625 626 void 627 rt_unref(struct rtentry *rt) 628 { 629 630 KASSERT(rt != NULL); 631 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 632 633 atomic_dec_uint(&rt->rt_refcnt); 634 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 635 mutex_enter(&rt_free_global.lock); 636 cv_broadcast(&rt->rt_cv); 637 mutex_exit(&rt_free_global.lock); 638 } 639 } 640 641 static bool 642 rt_wait_ok(void) 643 { 644 645 /* 646 * This originally returned !cpu_softintr_p(), but that doesn't 647 * work: the caller may hold a lock (probably softnet lock) 648 * that a softint is waiting for, in which case waiting here 649 * would cause a deadlock. See https://gnats.netbsd.org/56844 650 * for details. For now, until the locking paths are sorted 651 * out, we just disable the waiting option altogether and 652 * always defer to workqueue. 653 */ 654 KASSERT(!cpu_intr_p()); 655 return false; 656 } 657 658 void 659 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 660 { 661 mutex_enter(&rt_free_global.lock); 662 while (rt->rt_refcnt > cnt) { 663 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 664 __func__, title, rt->rt_refcnt); 665 cv_wait(&rt->rt_cv, &rt_free_global.lock); 666 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 667 __func__, title, rt->rt_refcnt); 668 } 669 mutex_exit(&rt_free_global.lock); 670 } 671 672 void 673 rt_wait_psref(struct rtentry *rt) 674 { 675 676 psref_target_destroy(&rt->rt_psref, rt_psref_class); 677 psref_target_init(&rt->rt_psref, rt_psref_class); 678 } 679 680 static void 681 _rt_free(struct rtentry *rt) 682 { 683 struct ifaddr *ifa; 684 685 /* 686 * Need to avoid a deadlock on rt_wait_refcnt of update 687 * and a conflict on psref_target_destroy of update. 688 */ 689 #ifdef NET_MPSAFE 690 rt_update_wait(); 691 #endif 692 693 RT_REFCNT_TRACE(rt); 694 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 695 rt_wait_refcnt("free", rt, 0); 696 #ifdef NET_MPSAFE 697 psref_target_destroy(&rt->rt_psref, rt_psref_class); 698 #endif 699 700 rt_assert_inactive(rt); 701 rttrash--; 702 ifa = rt->rt_ifa; 703 rt->rt_ifa = NULL; 704 ifafree(ifa); 705 rt->rt_ifp = NULL; 706 cv_destroy(&rt->rt_cv); 707 rt_destroy(rt); 708 pool_put(&rtentry_pool, rt); 709 } 710 711 static void 712 rt_free_work(struct work *wk, void *arg) 713 { 714 715 for (;;) { 716 struct rtentry *rt; 717 718 mutex_enter(&rt_free_global.lock); 719 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) { 720 rt_free_global.enqueued = false; 721 mutex_exit(&rt_free_global.lock); 722 return; 723 } 724 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free); 725 mutex_exit(&rt_free_global.lock); 726 atomic_dec_uint(&rt->rt_refcnt); 727 _rt_free(rt); 728 } 729 } 730 731 void 732 rt_free(struct rtentry *rt) 733 { 734 735 KASSERTMSG(rt->rt_refcnt > 0, "rt_refcnt=%d", rt->rt_refcnt); 736 if (rt_wait_ok()) { 737 atomic_dec_uint(&rt->rt_refcnt); 738 _rt_free(rt); 739 return; 740 } 741 742 mutex_enter(&rt_free_global.lock); 743 /* No need to add a reference here. */ 744 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free); 745 if (!rt_free_global.enqueued) { 746 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 747 rt_free_global.enqueued = true; 748 } 749 mutex_exit(&rt_free_global.lock); 750 } 751 752 #ifdef NET_MPSAFE 753 static void 754 rt_update_wait(void) 755 { 756 757 mutex_enter(&rt_update_global.lock); 758 while (rt_update_global.ongoing) { 759 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 760 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 761 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 762 } 763 mutex_exit(&rt_update_global.lock); 764 } 765 #endif 766 767 int 768 rt_update_prepare(struct rtentry *rt) 769 { 770 771 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 772 773 RT_WLOCK(); 774 /* If the entry is being destroyed, don't proceed the update. */ 775 if (!ISSET(rt->rt_flags, RTF_UP)) { 776 RT_UNLOCK(); 777 return ESRCH; 778 } 779 rt->rt_flags |= RTF_UPDATING; 780 RT_UNLOCK(); 781 782 mutex_enter(&rt_update_global.lock); 783 while (rt_update_global.ongoing) { 784 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 785 __func__, rt, curlwp); 786 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 787 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 788 __func__, rt, curlwp); 789 } 790 rt_update_global.ongoing = true; 791 /* XXX need it to avoid rt_update_wait by updater itself. */ 792 rt_update_global.lwp = curlwp; 793 mutex_exit(&rt_update_global.lock); 794 795 rt_wait_refcnt("update", rt, 1); 796 rt_wait_psref(rt); 797 798 return 0; 799 } 800 801 void 802 rt_update_finish(struct rtentry *rt) 803 { 804 805 RT_WLOCK(); 806 rt->rt_flags &= ~RTF_UPDATING; 807 RT_UNLOCK(); 808 809 mutex_enter(&rt_update_global.lock); 810 rt_update_global.ongoing = false; 811 rt_update_global.lwp = NULL; 812 cv_broadcast(&rt_update_global.cv); 813 mutex_exit(&rt_update_global.lock); 814 815 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 816 } 817 818 /* 819 * Force a routing table entry to the specified 820 * destination to go through the given gateway. 821 * Normally called as a result of a routing redirect 822 * message from the network layer. 823 * 824 * N.B.: must be called at splsoftnet 825 */ 826 void 827 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 828 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 829 struct rtentry **rtp) 830 { 831 struct rtentry *rt; 832 int error = 0; 833 uint64_t *stat = NULL; 834 struct rt_addrinfo info; 835 struct ifaddr *ifa; 836 struct psref psref; 837 838 /* verify the gateway is directly reachable */ 839 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 840 error = ENETUNREACH; 841 goto out; 842 } 843 rt = rtalloc1(dst, 0); 844 /* 845 * If the redirect isn't from our current router for this dst, 846 * it's either old or wrong. If it redirects us to ourselves, 847 * we have a routing loop, perhaps as a result of an interface 848 * going down recently. 849 */ 850 if (!(flags & RTF_DONE) && rt && 851 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 852 error = EINVAL; 853 else { 854 int s = pserialize_read_enter(); 855 struct ifaddr *_ifa; 856 857 _ifa = ifa_ifwithaddr(gateway); 858 if (_ifa != NULL) 859 error = EHOSTUNREACH; 860 pserialize_read_exit(s); 861 } 862 if (error) 863 goto done; 864 /* 865 * Create a new entry if we just got back a wildcard entry 866 * or the lookup failed. This is necessary for hosts 867 * which use routing redirects generated by smart gateways 868 * to dynamically build the routing tables. 869 */ 870 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 871 goto create; 872 /* 873 * Don't listen to the redirect if it's 874 * for a route to an interface. 875 */ 876 if (rt->rt_flags & RTF_GATEWAY) { 877 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 878 /* 879 * Changing from route to net => route to host. 880 * Create new route, rather than smashing route to net. 881 */ 882 create: 883 if (rt != NULL) 884 rt_unref(rt); 885 flags |= RTF_GATEWAY | RTF_DYNAMIC; 886 memset(&info, 0, sizeof(info)); 887 info.rti_info[RTAX_DST] = dst; 888 info.rti_info[RTAX_GATEWAY] = gateway; 889 info.rti_info[RTAX_NETMASK] = netmask; 890 info.rti_ifa = ifa; 891 info.rti_flags = flags; 892 rt = NULL; 893 error = rtrequest1(RTM_ADD, &info, &rt); 894 if (rt != NULL) 895 flags = rt->rt_flags; 896 if (error == 0) 897 rt_newmsg_dynamic(RTM_ADD, rt); 898 stat = &rtstat.rts_dynamic; 899 } else { 900 /* 901 * Smash the current notion of the gateway to 902 * this destination. Should check about netmask!!! 903 */ 904 #ifdef NET_MPSAFE 905 KASSERT(!cpu_softintr_p()); 906 907 error = rt_update_prepare(rt); 908 if (error == 0) { 909 #endif 910 RT_WLOCK(); 911 error = rt_setgate(rt, gateway); 912 if (error == 0) { 913 rt->rt_flags |= RTF_MODIFIED; 914 flags |= RTF_MODIFIED; 915 } 916 RT_UNLOCK(); 917 #ifdef NET_MPSAFE 918 rt_update_finish(rt); 919 } else { 920 /* 921 * If error != 0, the rtentry is being 922 * destroyed, so doing nothing doesn't 923 * matter. 924 */ 925 } 926 #endif 927 stat = &rtstat.rts_newgateway; 928 } 929 } else 930 error = EHOSTUNREACH; 931 done: 932 if (rt) { 933 if (rtp != NULL && !error) 934 *rtp = rt; 935 else 936 rt_unref(rt); 937 } 938 out: 939 if (error) 940 rtstat.rts_badredirect++; 941 else if (stat != NULL) 942 (*stat)++; 943 memset(&info, 0, sizeof(info)); 944 info.rti_info[RTAX_DST] = dst; 945 info.rti_info[RTAX_GATEWAY] = gateway; 946 info.rti_info[RTAX_NETMASK] = netmask; 947 info.rti_info[RTAX_AUTHOR] = src; 948 rt_missmsg(RTM_REDIRECT, &info, flags, error); 949 ifa_release(ifa, &psref); 950 } 951 952 /* 953 * Delete a route and generate a message. 954 * It doesn't free a passed rt. 955 */ 956 static int 957 rtdeletemsg(struct rtentry *rt) 958 { 959 int error; 960 struct rt_addrinfo info; 961 struct rtentry *retrt; 962 963 /* 964 * Request the new route so that the entry is not actually 965 * deleted. That will allow the information being reported to 966 * be accurate (and consistent with route_output()). 967 */ 968 memset(&info, 0, sizeof(info)); 969 info.rti_info[RTAX_DST] = rt_getkey(rt); 970 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 971 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 972 info.rti_flags = rt->rt_flags; 973 error = rtrequest1(RTM_DELETE, &info, &retrt); 974 975 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 976 977 return error; 978 } 979 980 static struct ifaddr * 981 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 982 const struct sockaddr *gateway, struct psref *psref) 983 { 984 struct ifaddr *ifa = NULL; 985 986 if ((flags & RTF_GATEWAY) == 0) { 987 /* 988 * If we are adding a route to an interface, 989 * and the interface is a pt to pt link 990 * we should search for the destination 991 * as our clue to the interface. Otherwise 992 * we can use the local address. 993 */ 994 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 995 ifa = ifa_ifwithdstaddr_psref(dst, psref); 996 if (ifa == NULL) 997 ifa = ifa_ifwithaddr_psref(gateway, psref); 998 } else { 999 /* 1000 * If we are adding a route to a remote net 1001 * or host, the gateway may still be on the 1002 * other end of a pt to pt link. 1003 */ 1004 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 1005 } 1006 if (ifa == NULL) 1007 ifa = ifa_ifwithnet_psref(gateway, psref); 1008 if (ifa == NULL) { 1009 int s; 1010 struct rtentry *rt; 1011 1012 rt = rtalloc1_locked(gateway, 0, true, true); 1013 if (rt == NULL) 1014 return NULL; 1015 if (rt->rt_flags & RTF_GATEWAY) { 1016 rt_unref(rt); 1017 return NULL; 1018 } 1019 /* 1020 * Just in case. May not need to do this workaround. 1021 * Revisit when working on rtentry MP-ification. 1022 */ 1023 s = pserialize_read_enter(); 1024 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1025 if (ifa == rt->rt_ifa) 1026 break; 1027 } 1028 if (ifa != NULL) 1029 ifa_acquire(ifa, psref); 1030 pserialize_read_exit(s); 1031 rt_unref(rt); 1032 if (ifa == NULL) 1033 return NULL; 1034 } 1035 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1036 struct ifaddr *nifa; 1037 int s; 1038 1039 s = pserialize_read_enter(); 1040 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1041 if (nifa != NULL) { 1042 ifa_release(ifa, psref); 1043 ifa_acquire(nifa, psref); 1044 ifa = nifa; 1045 } 1046 pserialize_read_exit(s); 1047 } 1048 return ifa; 1049 } 1050 1051 /* 1052 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1053 * The caller has to rtfree it by itself. 1054 */ 1055 int 1056 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1057 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1058 { 1059 struct rt_addrinfo info; 1060 1061 memset(&info, 0, sizeof(info)); 1062 info.rti_flags = flags; 1063 info.rti_info[RTAX_DST] = dst; 1064 info.rti_info[RTAX_GATEWAY] = gateway; 1065 info.rti_info[RTAX_NETMASK] = netmask; 1066 return rtrequest1(req, &info, ret_nrt); 1067 } 1068 1069 static struct ifnet * 1070 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1071 { 1072 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1073 1074 if (info->rti_ifp != NULL) 1075 return NULL; 1076 /* 1077 * ifp may be specified by sockaddr_dl when protocol address 1078 * is ambiguous 1079 */ 1080 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1081 struct ifaddr *ifa; 1082 int s = pserialize_read_enter(); 1083 1084 ifa = ifa_ifwithnet(ifpaddr); 1085 if (ifa != NULL) 1086 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1087 psref); 1088 pserialize_read_exit(s); 1089 } 1090 1091 return info->rti_ifp; 1092 } 1093 1094 static struct ifaddr * 1095 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1096 { 1097 struct ifaddr *ifa = NULL; 1098 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1099 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1100 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1101 int flags = info->rti_flags; 1102 const struct sockaddr *sa; 1103 1104 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1105 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1106 if (ifa != NULL) 1107 goto got; 1108 } 1109 1110 sa = ifaaddr != NULL ? ifaaddr : 1111 (gateway != NULL ? gateway : dst); 1112 if (sa != NULL && info->rti_ifp != NULL) 1113 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1114 else if (dst != NULL && gateway != NULL) 1115 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1116 else if (sa != NULL) 1117 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1118 if (ifa == NULL) 1119 return NULL; 1120 got: 1121 if (ifa->ifa_getifa != NULL) { 1122 /* FIXME ifa_getifa is NOMPSAFE */ 1123 ifa = (*ifa->ifa_getifa)(ifa, dst); 1124 if (ifa == NULL) 1125 return NULL; 1126 ifa_acquire(ifa, psref); 1127 } 1128 info->rti_ifa = ifa; 1129 if (info->rti_ifp == NULL) 1130 info->rti_ifp = ifa->ifa_ifp; 1131 return ifa; 1132 } 1133 1134 /* 1135 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1136 * The caller has to rtfree it by itself. 1137 */ 1138 int 1139 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1140 { 1141 int s = splsoftnet(), ss; 1142 int error = 0, rc; 1143 struct rtentry *rt; 1144 rtbl_t *rtbl; 1145 struct ifaddr *ifa = NULL; 1146 struct sockaddr_storage maskeddst; 1147 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1148 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1149 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1150 int flags = info->rti_flags; 1151 struct psref psref_ifp, psref_ifa; 1152 int bound = 0; 1153 struct ifnet *ifp = NULL; 1154 bool need_to_release_ifa = true; 1155 bool need_unlock = true; 1156 #define senderr(x) { error = x ; goto bad; } 1157 1158 RT_WLOCK(); 1159 1160 bound = curlwp_bind(); 1161 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1162 senderr(ESRCH); 1163 if (flags & RTF_HOST) 1164 netmask = NULL; 1165 switch (req) { 1166 case RTM_DELETE: 1167 if (netmask) { 1168 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1169 netmask); 1170 dst = (struct sockaddr *)&maskeddst; 1171 } 1172 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1173 senderr(ESRCH); 1174 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1175 senderr(ESRCH); 1176 rt->rt_flags &= ~RTF_UP; 1177 ifa = rt->rt_ifa; 1178 if (ifa->ifa_flags & IFA_ROUTE && 1179 rt_ifa_connected(rt, ifa)) { 1180 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1181 "deleted IFA_ROUTE\n", 1182 (void *)rt->_rt_key, (void *)ifa); 1183 ifa->ifa_flags &= ~IFA_ROUTE; 1184 } 1185 if (ifa->ifa_rtrequest) 1186 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1187 ifa = NULL; 1188 rttrash++; 1189 if (ret_nrt) { 1190 *ret_nrt = rt; 1191 rt_ref(rt); 1192 RT_REFCNT_TRACE(rt); 1193 } 1194 rtcache_invalidate(); 1195 RT_UNLOCK(); 1196 need_unlock = false; 1197 rt_timer_remove_all(rt); 1198 #if defined(INET) || defined(INET6) 1199 if (netmask != NULL) 1200 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1201 #endif 1202 if (ret_nrt == NULL) { 1203 /* Adjust the refcount */ 1204 rt_ref(rt); 1205 RT_REFCNT_TRACE(rt); 1206 rt_free(rt); 1207 } 1208 break; 1209 1210 case RTM_ADD: 1211 if (info->rti_ifa == NULL) { 1212 ifp = rt_getifp(info, &psref_ifp); 1213 ifa = rt_getifa(info, &psref_ifa); 1214 if (ifa == NULL) 1215 senderr(ENETUNREACH); 1216 } else { 1217 /* Caller should have a reference of ifa */ 1218 ifa = info->rti_ifa; 1219 need_to_release_ifa = false; 1220 } 1221 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1222 if (rt == NULL) 1223 senderr(ENOBUFS); 1224 memset(rt, 0, sizeof(*rt)); 1225 rt->rt_flags = RTF_UP | (flags & ~RTF_DONTCHANGEIFA); 1226 LIST_INIT(&rt->rt_timer); 1227 1228 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1229 if (netmask) { 1230 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1231 netmask); 1232 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1233 } else { 1234 rt_setkey(rt, dst, M_NOWAIT); 1235 } 1236 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1237 if (rt_getkey(rt) == NULL || 1238 rt_setgate(rt, gateway) != 0) { 1239 pool_put(&rtentry_pool, rt); 1240 senderr(ENOBUFS); 1241 } 1242 1243 rt_set_ifa(rt, ifa); 1244 if (info->rti_info[RTAX_TAG] != NULL) { 1245 const struct sockaddr *tag; 1246 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1247 if (tag == NULL) 1248 senderr(ENOBUFS); 1249 } 1250 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1251 1252 ss = pserialize_read_enter(); 1253 if (info->rti_info[RTAX_IFP] != NULL) { 1254 struct ifaddr *ifa2; 1255 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1256 if (ifa2 != NULL) 1257 rt->rt_ifp = ifa2->ifa_ifp; 1258 else 1259 rt->rt_ifp = ifa->ifa_ifp; 1260 } else 1261 rt->rt_ifp = ifa->ifa_ifp; 1262 pserialize_read_exit(ss); 1263 cv_init(&rt->rt_cv, "rtentry"); 1264 psref_target_init(&rt->rt_psref, rt_psref_class); 1265 1266 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1267 rc = rt_addaddr(rtbl, rt, netmask); 1268 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1269 if (rc != 0) { 1270 ifafree(ifa); /* for rt_set_ifa above */ 1271 cv_destroy(&rt->rt_cv); 1272 rt_destroy(rt); 1273 pool_put(&rtentry_pool, rt); 1274 senderr(rc); 1275 } 1276 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1277 if (ifa->ifa_rtrequest) 1278 ifa->ifa_rtrequest(req, rt, info); 1279 if (need_to_release_ifa) 1280 ifa_release(ifa, &psref_ifa); 1281 ifa = NULL; 1282 if_put(ifp, &psref_ifp); 1283 ifp = NULL; 1284 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1285 if (ret_nrt) { 1286 *ret_nrt = rt; 1287 rt_ref(rt); 1288 RT_REFCNT_TRACE(rt); 1289 } 1290 rtcache_invalidate(); 1291 RT_UNLOCK(); 1292 need_unlock = false; 1293 break; 1294 case RTM_GET: 1295 if (netmask != NULL) { 1296 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1297 netmask); 1298 dst = (struct sockaddr *)&maskeddst; 1299 } 1300 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1301 senderr(ESRCH); 1302 if (ret_nrt != NULL) { 1303 *ret_nrt = rt; 1304 rt_ref(rt); 1305 RT_REFCNT_TRACE(rt); 1306 } 1307 break; 1308 } 1309 bad: 1310 if (need_to_release_ifa) 1311 ifa_release(ifa, &psref_ifa); 1312 if_put(ifp, &psref_ifp); 1313 curlwp_bindx(bound); 1314 if (need_unlock) 1315 RT_UNLOCK(); 1316 splx(s); 1317 return error; 1318 } 1319 1320 int 1321 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1322 { 1323 struct sockaddr *new, *old; 1324 1325 KASSERT(RT_WLOCKED()); 1326 KASSERT(rt->_rt_key != NULL); 1327 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1328 1329 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1330 if (new == NULL) 1331 return ENOMEM; 1332 1333 old = rt->rt_gateway; 1334 rt->rt_gateway = new; 1335 if (old != NULL) 1336 sockaddr_free(old); 1337 1338 KASSERT(rt->_rt_key != NULL); 1339 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1340 1341 if (rt->rt_flags & RTF_GATEWAY) { 1342 struct rtentry *gwrt; 1343 1344 gwrt = rtalloc1_locked(gate, 1, false, true); 1345 /* 1346 * If we switched gateways, grab the MTU from the new 1347 * gateway route if the current MTU, if the current MTU is 1348 * greater than the MTU of gateway. 1349 * Note that, if the MTU of gateway is 0, we will reset the 1350 * MTU of the route to run PMTUD again from scratch. XXX 1351 */ 1352 if (gwrt != NULL) { 1353 KASSERT(gwrt->_rt_key != NULL); 1354 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1355 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1356 rt->rt_rmx.rmx_mtu && 1357 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1358 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1359 } 1360 rt_unref(gwrt); 1361 } 1362 } 1363 KASSERT(rt->_rt_key != NULL); 1364 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1365 return 0; 1366 } 1367 1368 static struct ifaddr * 1369 rt_update_get_ifa(const struct rt_addrinfo *info, const struct rtentry *rt, 1370 struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref) 1371 { 1372 struct ifaddr *ifa = NULL; 1373 1374 *ifp = NULL; 1375 if (info->rti_info[RTAX_IFP] != NULL) { 1376 ifa = ifa_ifwithnet_psref(info->rti_info[RTAX_IFP], psref); 1377 if (ifa == NULL) 1378 goto next; 1379 if (ifa->ifa_ifp->if_flags & IFF_UNNUMBERED) { 1380 ifa_release(ifa, psref); 1381 ifa = NULL; 1382 goto next; 1383 } 1384 *ifp = ifa->ifa_ifp; 1385 if_acquire(*ifp, psref_ifp); 1386 if (info->rti_info[RTAX_IFA] == NULL && 1387 info->rti_info[RTAX_GATEWAY] == NULL) 1388 goto out; 1389 ifa_release(ifa, psref); 1390 if (info->rti_info[RTAX_IFA] == NULL) { 1391 /* route change <dst> <gw> -ifp <if> */ 1392 ifa = ifaof_ifpforaddr_psref( 1393 info->rti_info[RTAX_GATEWAY], *ifp, psref); 1394 } else { 1395 /* route change <dst> -ifp <if> -ifa <addr> */ 1396 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], 1397 psref); 1398 if (ifa != NULL) 1399 goto out; 1400 ifa = ifaof_ifpforaddr_psref(info->rti_info[RTAX_IFA], 1401 *ifp, psref); 1402 } 1403 goto out; 1404 } 1405 next: 1406 if (info->rti_info[RTAX_IFA] != NULL) { 1407 /* route change <dst> <gw> -ifa <addr> */ 1408 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], psref); 1409 if (ifa != NULL) 1410 goto out; 1411 } 1412 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1413 /* route change <dst> <gw> */ 1414 ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt), 1415 info->rti_info[RTAX_GATEWAY], psref); 1416 } 1417 out: 1418 if (ifa != NULL && *ifp == NULL) { 1419 *ifp = ifa->ifa_ifp; 1420 if_acquire(*ifp, psref_ifp); 1421 } 1422 if (ifa == NULL && *ifp != NULL) { 1423 if_put(*ifp, psref_ifp); 1424 *ifp = NULL; 1425 } 1426 return ifa; 1427 } 1428 1429 int 1430 rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm) 1431 { 1432 int error = 0; 1433 struct ifnet *ifp = NULL, *new_ifp = NULL; 1434 struct ifaddr *ifa = NULL, *new_ifa; 1435 struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp; 1436 bool newgw, ifp_changed = false; 1437 1438 RT_WLOCK(); 1439 /* 1440 * New gateway could require new ifaddr, ifp; 1441 * flags may also be different; ifp may be specified 1442 * by ll sockaddr when protocol address is ambiguous 1443 */ 1444 newgw = info->rti_info[RTAX_GATEWAY] != NULL && 1445 sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0; 1446 1447 if (newgw || info->rti_info[RTAX_IFP] != NULL || 1448 info->rti_info[RTAX_IFA] != NULL) { 1449 ifp = rt_getifp(info, &psref_ifp); 1450 /* info refers ifp so we need to keep a reference */ 1451 ifa = rt_getifa(info, &psref_ifa); 1452 if (ifa == NULL) { 1453 error = ENETUNREACH; 1454 goto out; 1455 } 1456 } 1457 if (newgw) { 1458 error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]); 1459 if (error != 0) 1460 goto out; 1461 } 1462 if (info->rti_info[RTAX_TAG]) { 1463 const struct sockaddr *tag; 1464 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1465 if (tag == NULL) { 1466 error = ENOBUFS; 1467 goto out; 1468 } 1469 } 1470 /* 1471 * New gateway could require new ifaddr, ifp; 1472 * flags may also be different; ifp may be specified 1473 * by ll sockaddr when protocol address is ambiguous 1474 */ 1475 new_ifa = rt_update_get_ifa(info, rt, &new_ifp, &psref_new_ifp, 1476 &psref_new_ifa); 1477 if (new_ifa != NULL) { 1478 ifa_release(ifa, &psref_ifa); 1479 ifa = new_ifa; 1480 } 1481 if (ifa) { 1482 struct ifaddr *oifa = rt->rt_ifa; 1483 if (oifa != ifa && !ifa_is_destroying(ifa) && 1484 new_ifp != NULL && !if_is_deactivated(new_ifp)) { 1485 if (oifa && oifa->ifa_rtrequest) 1486 oifa->ifa_rtrequest(RTM_DELETE, rt, info); 1487 rt_replace_ifa(rt, ifa); 1488 rt->rt_ifp = new_ifp; 1489 ifp_changed = true; 1490 } 1491 if (new_ifa == NULL) 1492 ifa_release(ifa, &psref_ifa); 1493 /* To avoid ifa_release below */ 1494 ifa = NULL; 1495 } 1496 ifa_release(new_ifa, &psref_new_ifa); 1497 if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) { 1498 rt->rt_ifp = new_ifp; 1499 ifp_changed = true; 1500 } 1501 rt_setmetrics(rtm, rt); 1502 if (rt->rt_flags != info->rti_flags) { 1503 rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) | 1504 (rt->rt_flags & PRESERVED_RTF); 1505 } 1506 if (rt->rt_ifa->ifa_rtrequest) 1507 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); 1508 #if defined(INET) || defined(INET6) 1509 if (ifp_changed && rt_mask(rt) != NULL) 1510 lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt), 1511 rt_mask(rt), 0); 1512 #else 1513 (void)ifp_changed; /* XXX gcc */ 1514 #endif 1515 out: 1516 ifa_release(ifa, &psref_ifa); 1517 if_put(new_ifp, &psref_new_ifp); 1518 if_put(ifp, &psref_ifp); 1519 1520 RT_UNLOCK(); 1521 1522 return error; 1523 } 1524 1525 static void 1526 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1527 const struct sockaddr *netmask) 1528 { 1529 const char *netmaskp = &netmask->sa_data[0], 1530 *srcp = &src->sa_data[0]; 1531 char *dstp = &dst->sa_data[0]; 1532 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1533 const char *srcend = (char *)dst + src->sa_len; 1534 1535 dst->sa_len = src->sa_len; 1536 dst->sa_family = src->sa_family; 1537 1538 while (dstp < maskend) 1539 *dstp++ = *srcp++ & *netmaskp++; 1540 if (dstp < srcend) 1541 memset(dstp, 0, (size_t)(srcend - dstp)); 1542 } 1543 1544 /* 1545 * Inform the routing socket of a route change. 1546 */ 1547 void 1548 rt_newmsg(const int cmd, const struct rtentry *rt) 1549 { 1550 struct rt_addrinfo info; 1551 1552 memset((void *)&info, 0, sizeof(info)); 1553 info.rti_info[RTAX_DST] = rt_getkey(rt); 1554 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1555 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1556 if (rt->rt_ifp) { 1557 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1558 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1559 } 1560 1561 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1562 } 1563 1564 /* 1565 * Inform the routing socket of a route change for RTF_DYNAMIC. 1566 */ 1567 void 1568 rt_newmsg_dynamic(const int cmd, const struct rtentry *rt) 1569 { 1570 struct rt_addrinfo info; 1571 struct sockaddr *gateway = rt->rt_gateway; 1572 1573 if (gateway == NULL) 1574 return; 1575 1576 switch(gateway->sa_family) { 1577 #ifdef INET 1578 case AF_INET: { 1579 extern bool icmp_dynamic_rt_msg; 1580 if (!icmp_dynamic_rt_msg) 1581 return; 1582 break; 1583 } 1584 #endif 1585 #ifdef INET6 1586 case AF_INET6: { 1587 extern bool icmp6_dynamic_rt_msg; 1588 if (!icmp6_dynamic_rt_msg) 1589 return; 1590 break; 1591 } 1592 #endif 1593 default: 1594 return; 1595 } 1596 1597 memset((void *)&info, 0, sizeof(info)); 1598 info.rti_info[RTAX_DST] = rt_getkey(rt); 1599 info.rti_info[RTAX_GATEWAY] = gateway; 1600 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1601 if (rt->rt_ifp) { 1602 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1603 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1604 } 1605 1606 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1607 } 1608 1609 /* 1610 * Set up or tear down a routing table entry, normally 1611 * for an interface. 1612 */ 1613 int 1614 rtinit(struct ifaddr *ifa, int cmd, int flags) 1615 { 1616 struct rtentry *rt; 1617 struct sockaddr *dst, *odst; 1618 struct sockaddr_storage maskeddst; 1619 struct rtentry *nrt = NULL; 1620 int error; 1621 struct rt_addrinfo info; 1622 1623 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1624 if (cmd == RTM_DELETE) { 1625 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1626 /* Delete subnet route for this interface */ 1627 odst = dst; 1628 dst = (struct sockaddr *)&maskeddst; 1629 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1630 } 1631 if ((rt = rtalloc1(dst, 0)) != NULL) { 1632 if (rt->rt_ifa != ifa) { 1633 rt_unref(rt); 1634 return (flags & RTF_HOST) ? EHOSTUNREACH 1635 : ENETUNREACH; 1636 } 1637 rt_unref(rt); 1638 } 1639 } 1640 memset(&info, 0, sizeof(info)); 1641 info.rti_ifa = ifa; 1642 info.rti_flags = flags | ifa->ifa_flags | RTF_DONTCHANGEIFA; 1643 info.rti_info[RTAX_DST] = dst; 1644 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1645 1646 /* 1647 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1648 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1649 * variable) when RTF_HOST is 1. still not sure if i can safely 1650 * change it to meet bsdi4 behavior. 1651 */ 1652 if (cmd != RTM_LLINFO_UPD) 1653 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1654 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1655 &nrt); 1656 if (error != 0) 1657 return error; 1658 1659 rt = nrt; 1660 RT_REFCNT_TRACE(rt); 1661 switch (cmd) { 1662 case RTM_DELETE: 1663 rt_newmsg(cmd, rt); 1664 rt_free(rt); 1665 break; 1666 case RTM_LLINFO_UPD: 1667 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1668 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1669 rt_newmsg(RTM_CHANGE, rt); 1670 rt_unref(rt); 1671 break; 1672 case RTM_ADD: 1673 KASSERT(rt->rt_ifa == ifa); 1674 rt_newmsg(cmd, rt); 1675 rt_unref(rt); 1676 RT_REFCNT_TRACE(rt); 1677 break; 1678 } 1679 return error; 1680 } 1681 1682 /* 1683 * Create a local route entry for the address. 1684 * Announce the addition of the address and the route to the routing socket. 1685 */ 1686 int 1687 rt_ifa_addlocal(struct ifaddr *ifa) 1688 { 1689 struct rtentry *rt; 1690 int e; 1691 1692 /* If there is no loopback entry, allocate one. */ 1693 rt = rtalloc1(ifa->ifa_addr, 0); 1694 #ifdef RT_DEBUG 1695 if (rt != NULL) 1696 dump_rt(rt); 1697 #endif 1698 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1699 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1700 { 1701 struct rt_addrinfo info; 1702 struct rtentry *nrt; 1703 1704 memset(&info, 0, sizeof(info)); 1705 info.rti_flags = RTF_HOST | RTF_LOCAL | RTF_DONTCHANGEIFA; 1706 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1707 info.rti_info[RTAX_GATEWAY] = 1708 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1709 info.rti_ifa = ifa; 1710 nrt = NULL; 1711 e = rtrequest1(RTM_ADD, &info, &nrt); 1712 rt_addrmsg_rt(RTM_ADD, ifa, e, nrt); 1713 if (nrt != NULL) { 1714 KASSERT(nrt->rt_ifa == ifa); 1715 #ifdef RT_DEBUG 1716 dump_rt(nrt); 1717 #endif 1718 rt_unref(nrt); 1719 RT_REFCNT_TRACE(nrt); 1720 } 1721 } else { 1722 e = 0; 1723 rt_addrmsg(RTM_NEWADDR, ifa); 1724 } 1725 if (rt != NULL) 1726 rt_unref(rt); 1727 return e; 1728 } 1729 1730 /* 1731 * Remove the local route entry for the address. 1732 * Announce the removal of the address and the route to the routing socket. 1733 */ 1734 int 1735 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1736 { 1737 struct rtentry *rt; 1738 int e = 0; 1739 1740 rt = rtalloc1(ifa->ifa_addr, 0); 1741 1742 /* 1743 * Before deleting, check if a corresponding loopbacked 1744 * host route surely exists. With this check, we can avoid 1745 * deleting an interface direct route whose destination is 1746 * the same as the address being removed. This can happen 1747 * when removing a subnet-router anycast address on an 1748 * interface attached to a shared medium. 1749 */ 1750 if (rt != NULL && 1751 (rt->rt_flags & RTF_HOST) && 1752 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1753 { 1754 /* If we cannot replace the route's ifaddr with the equivalent 1755 * ifaddr of another interface, I believe it is safest to 1756 * delete the route. 1757 */ 1758 if (alt_ifa == NULL) { 1759 e = rtdeletemsg(rt); 1760 if (e == 0) { 1761 rt_unref(rt); 1762 rt_free(rt); 1763 rt = NULL; 1764 } 1765 rt_addrmsg(RTM_DELADDR, ifa); 1766 } else { 1767 #ifdef NET_MPSAFE 1768 int error = rt_update_prepare(rt); 1769 if (error == 0) { 1770 rt_replace_ifa(rt, alt_ifa); 1771 rt_update_finish(rt); 1772 } else { 1773 /* 1774 * If error != 0, the rtentry is being 1775 * destroyed, so doing nothing doesn't 1776 * matter. 1777 */ 1778 } 1779 #else 1780 rt_replace_ifa(rt, alt_ifa); 1781 #endif 1782 rt_newmsg(RTM_CHANGE, rt); 1783 } 1784 } else 1785 rt_addrmsg(RTM_DELADDR, ifa); 1786 if (rt != NULL) 1787 rt_unref(rt); 1788 return e; 1789 } 1790 1791 /* 1792 * Route timer routines. These routes allow functions to be called 1793 * for various routes at any time. This is useful in supporting 1794 * path MTU discovery and redirect route deletion. 1795 * 1796 * This is similar to some BSDI internal functions, but it provides 1797 * for multiple queues for efficiency's sake... 1798 */ 1799 1800 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1801 static int rt_init_done = 0; 1802 1803 /* 1804 * Some subtle order problems with domain initialization mean that 1805 * we cannot count on this being run from rt_init before various 1806 * protocol initializations are done. Therefore, we make sure 1807 * that this is run when the first queue is added... 1808 */ 1809 1810 static void rt_timer_work(struct work *, void *); 1811 1812 static void 1813 rt_timer_init(void) 1814 { 1815 int error; 1816 1817 assert(rt_init_done == 0); 1818 1819 /* XXX should be in rt_init */ 1820 rw_init(&rt_lock); 1821 1822 LIST_INIT(&rttimer_queue_head); 1823 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1824 error = workqueue_create(&rt_timer_wq, "rt_timer", 1825 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1826 if (error) 1827 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1828 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1829 rt_init_done = 1; 1830 } 1831 1832 struct rttimer_queue * 1833 rt_timer_queue_create(u_int timeout) 1834 { 1835 struct rttimer_queue *rtq; 1836 1837 if (rt_init_done == 0) 1838 rt_timer_init(); 1839 1840 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1841 if (rtq == NULL) 1842 return NULL; 1843 memset(rtq, 0, sizeof(*rtq)); 1844 1845 rtq->rtq_timeout = timeout; 1846 TAILQ_INIT(&rtq->rtq_head); 1847 RT_WLOCK(); 1848 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1849 RT_UNLOCK(); 1850 1851 return rtq; 1852 } 1853 1854 void 1855 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1856 { 1857 1858 rtq->rtq_timeout = timeout; 1859 } 1860 1861 static void 1862 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1863 { 1864 struct rttimer *r; 1865 1866 RT_ASSERT_WLOCK(); 1867 1868 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1869 LIST_REMOVE(r, rtt_link); 1870 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1871 rt_ref(r->rtt_rt); /* XXX */ 1872 RT_REFCNT_TRACE(r->rtt_rt); 1873 RT_UNLOCK(); 1874 (*r->rtt_func)(r->rtt_rt, r); 1875 pool_put(&rttimer_pool, r); 1876 RT_WLOCK(); 1877 if (rtq->rtq_count > 0) 1878 rtq->rtq_count--; 1879 else 1880 printf("rt_timer_queue_remove_all: " 1881 "rtq_count reached 0\n"); 1882 } 1883 } 1884 1885 void 1886 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1887 { 1888 1889 RT_WLOCK(); 1890 rt_timer_queue_remove_all(rtq); 1891 LIST_REMOVE(rtq, rtq_link); 1892 RT_UNLOCK(); 1893 1894 /* 1895 * Caller is responsible for freeing the rttimer_queue structure. 1896 */ 1897 } 1898 1899 unsigned long 1900 rt_timer_count(struct rttimer_queue *rtq) 1901 { 1902 return rtq->rtq_count; 1903 } 1904 1905 static void 1906 rt_timer_remove_all(struct rtentry *rt) 1907 { 1908 struct rttimer *r; 1909 1910 RT_WLOCK(); 1911 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1912 LIST_REMOVE(r, rtt_link); 1913 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1914 if (r->rtt_queue->rtq_count > 0) 1915 r->rtt_queue->rtq_count--; 1916 else 1917 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1918 pool_put(&rttimer_pool, r); 1919 } 1920 RT_UNLOCK(); 1921 } 1922 1923 int 1924 rt_timer_add(struct rtentry *rt, 1925 void (*func)(struct rtentry *, struct rttimer *), 1926 struct rttimer_queue *queue) 1927 { 1928 struct rttimer *r; 1929 1930 KASSERT(func != NULL); 1931 RT_WLOCK(); 1932 /* 1933 * If there's already a timer with this action, destroy it before 1934 * we add a new one. 1935 */ 1936 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1937 if (r->rtt_func == func) 1938 break; 1939 } 1940 if (r != NULL) { 1941 LIST_REMOVE(r, rtt_link); 1942 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1943 if (r->rtt_queue->rtq_count > 0) 1944 r->rtt_queue->rtq_count--; 1945 else 1946 printf("rt_timer_add: rtq_count reached 0\n"); 1947 } else { 1948 r = pool_get(&rttimer_pool, PR_NOWAIT); 1949 if (r == NULL) { 1950 RT_UNLOCK(); 1951 return ENOBUFS; 1952 } 1953 } 1954 1955 memset(r, 0, sizeof(*r)); 1956 1957 r->rtt_rt = rt; 1958 r->rtt_time = time_uptime; 1959 r->rtt_func = func; 1960 r->rtt_queue = queue; 1961 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1962 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1963 r->rtt_queue->rtq_count++; 1964 1965 RT_UNLOCK(); 1966 1967 return 0; 1968 } 1969 1970 static void 1971 rt_timer_work(struct work *wk, void *arg) 1972 { 1973 struct rttimer_queue *rtq; 1974 struct rttimer *r; 1975 1976 RT_WLOCK(); 1977 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1978 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1979 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1980 LIST_REMOVE(r, rtt_link); 1981 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1982 /* 1983 * Take a reference to avoid the rtentry is freed 1984 * accidentally after RT_UNLOCK. The callback 1985 * (rtt_func) must rt_unref it by itself. 1986 */ 1987 rt_ref(r->rtt_rt); 1988 RT_REFCNT_TRACE(r->rtt_rt); 1989 RT_UNLOCK(); 1990 (*r->rtt_func)(r->rtt_rt, r); 1991 pool_put(&rttimer_pool, r); 1992 RT_WLOCK(); 1993 if (rtq->rtq_count > 0) 1994 rtq->rtq_count--; 1995 else 1996 printf("rt_timer_timer: rtq_count reached 0\n"); 1997 } 1998 } 1999 RT_UNLOCK(); 2000 2001 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 2002 } 2003 2004 static void 2005 rt_timer_timer(void *arg) 2006 { 2007 2008 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 2009 } 2010 2011 static struct rtentry * 2012 _rtcache_init(struct route *ro, int flag) 2013 { 2014 struct rtentry *rt; 2015 2016 rtcache_invariants(ro); 2017 KASSERT(ro->_ro_rt == NULL); 2018 2019 if (rtcache_getdst(ro) == NULL) 2020 return NULL; 2021 rt = rtalloc1(rtcache_getdst(ro), flag); 2022 if (rt != NULL) { 2023 RT_RLOCK(); 2024 if (ISSET(rt->rt_flags, RTF_UP)) { 2025 ro->_ro_rt = rt; 2026 ro->ro_rtcache_generation = rtcache_generation; 2027 rtcache_ref(rt, ro); 2028 } 2029 RT_UNLOCK(); 2030 rt_unref(rt); 2031 } 2032 2033 rtcache_invariants(ro); 2034 return ro->_ro_rt; 2035 } 2036 2037 struct rtentry * 2038 rtcache_init(struct route *ro) 2039 { 2040 2041 return _rtcache_init(ro, 1); 2042 } 2043 2044 struct rtentry * 2045 rtcache_init_noclone(struct route *ro) 2046 { 2047 2048 return _rtcache_init(ro, 0); 2049 } 2050 2051 struct rtentry * 2052 rtcache_update(struct route *ro, int clone) 2053 { 2054 2055 ro->_ro_rt = NULL; 2056 return _rtcache_init(ro, clone); 2057 } 2058 2059 void 2060 rtcache_copy(struct route *new_ro, struct route *old_ro) 2061 { 2062 struct rtentry *rt; 2063 int ret; 2064 2065 KASSERT(new_ro != old_ro); 2066 rtcache_invariants(new_ro); 2067 rtcache_invariants(old_ro); 2068 2069 rt = rtcache_validate(old_ro); 2070 2071 if (rtcache_getdst(old_ro) == NULL) 2072 goto out; 2073 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 2074 if (ret != 0) 2075 goto out; 2076 2077 RT_RLOCK(); 2078 new_ro->_ro_rt = rt; 2079 new_ro->ro_rtcache_generation = rtcache_generation; 2080 RT_UNLOCK(); 2081 rtcache_invariants(new_ro); 2082 out: 2083 rtcache_unref(rt, old_ro); 2084 return; 2085 } 2086 2087 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 2088 static void 2089 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 2090 { 2091 char dst[64]; 2092 2093 sockaddr_format(ro->ro_sa, dst, 64); 2094 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 2095 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 2096 } 2097 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 2098 #else 2099 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 2100 #endif 2101 2102 static void 2103 rtcache_ref(struct rtentry *rt, struct route *ro) 2104 { 2105 2106 KASSERT(rt != NULL); 2107 2108 #ifdef NET_MPSAFE 2109 RTCACHE_PSREF_TRACE(rt, ro); 2110 ro->ro_bound = curlwp_bind(); 2111 /* XXX Use a real caller's address */ 2112 PSREF_DEBUG_FILL_RETURN_ADDRESS(&ro->ro_psref); 2113 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2114 #endif 2115 } 2116 2117 void 2118 rtcache_unref(struct rtentry *rt, struct route *ro) 2119 { 2120 2121 if (rt == NULL) 2122 return; 2123 2124 #ifdef NET_MPSAFE 2125 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2126 curlwp_bindx(ro->ro_bound); 2127 RTCACHE_PSREF_TRACE(rt, ro); 2128 #endif 2129 } 2130 2131 struct rtentry * 2132 rtcache_validate(struct route *ro) 2133 { 2134 struct rtentry *rt = NULL; 2135 2136 #ifdef NET_MPSAFE 2137 retry: 2138 #endif 2139 rtcache_invariants(ro); 2140 RT_RLOCK(); 2141 if (ro->ro_rtcache_generation != rtcache_generation) { 2142 /* The cache is invalidated */ 2143 rt = NULL; 2144 goto out; 2145 } 2146 2147 rt = ro->_ro_rt; 2148 if (rt == NULL) 2149 goto out; 2150 2151 if ((rt->rt_flags & RTF_UP) == 0) { 2152 rt = NULL; 2153 goto out; 2154 } 2155 #ifdef NET_MPSAFE 2156 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 2157 if (rt_wait_ok()) { 2158 RT_UNLOCK(); 2159 2160 /* We can wait until the update is complete */ 2161 rt_update_wait(); 2162 goto retry; 2163 } else { 2164 rt = NULL; 2165 } 2166 } else 2167 #endif 2168 rtcache_ref(rt, ro); 2169 out: 2170 RT_UNLOCK(); 2171 return rt; 2172 } 2173 2174 struct rtentry * 2175 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2176 int clone, int *hitp) 2177 { 2178 const struct sockaddr *odst; 2179 struct rtentry *rt = NULL; 2180 2181 odst = rtcache_getdst(ro); 2182 if (odst == NULL) 2183 goto miss; 2184 2185 if (sockaddr_cmp(odst, dst) != 0) { 2186 rtcache_free(ro); 2187 goto miss; 2188 } 2189 2190 rt = rtcache_validate(ro); 2191 if (rt == NULL) { 2192 ro->_ro_rt = NULL; 2193 goto miss; 2194 } 2195 2196 rtcache_invariants(ro); 2197 2198 if (hitp != NULL) 2199 *hitp = 1; 2200 return rt; 2201 miss: 2202 if (hitp != NULL) 2203 *hitp = 0; 2204 if (rtcache_setdst(ro, dst) == 0) 2205 rt = _rtcache_init(ro, clone); 2206 2207 rtcache_invariants(ro); 2208 2209 return rt; 2210 } 2211 2212 void 2213 rtcache_free(struct route *ro) 2214 { 2215 2216 ro->_ro_rt = NULL; 2217 if (ro->ro_sa != NULL) { 2218 sockaddr_free(ro->ro_sa); 2219 ro->ro_sa = NULL; 2220 } 2221 rtcache_invariants(ro); 2222 } 2223 2224 int 2225 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2226 { 2227 KASSERT(sa != NULL); 2228 2229 rtcache_invariants(ro); 2230 if (ro->ro_sa != NULL) { 2231 if (ro->ro_sa->sa_family == sa->sa_family) { 2232 ro->_ro_rt = NULL; 2233 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2234 rtcache_invariants(ro); 2235 return 0; 2236 } 2237 /* free ro_sa, wrong family */ 2238 rtcache_free(ro); 2239 } 2240 2241 KASSERT(ro->_ro_rt == NULL); 2242 2243 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2244 rtcache_invariants(ro); 2245 return ENOMEM; 2246 } 2247 rtcache_invariants(ro); 2248 return 0; 2249 } 2250 2251 static void 2252 rtcache_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused) 2253 { 2254 struct route **rop = p; 2255 2256 /* 2257 * We can't have struct route as percpu data because it can be destroyed 2258 * over a memory enlargement processing of percpu. 2259 */ 2260 *rop = kmem_zalloc(sizeof(**rop), KM_SLEEP); 2261 } 2262 2263 percpu_t * 2264 rtcache_percpu_alloc(void) 2265 { 2266 2267 return percpu_create(sizeof(struct route *), 2268 rtcache_percpu_init_cpu, NULL, NULL); 2269 } 2270 2271 const struct sockaddr * 2272 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2273 { 2274 if (rt->rt_tag != tag) { 2275 if (rt->rt_tag != NULL) 2276 sockaddr_free(rt->rt_tag); 2277 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2278 } 2279 return rt->rt_tag; 2280 } 2281 2282 struct sockaddr * 2283 rt_gettag(const struct rtentry *rt) 2284 { 2285 return rt->rt_tag; 2286 } 2287 2288 int 2289 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2290 { 2291 2292 if ((rt->rt_flags & RTF_REJECT) != 0) { 2293 /* Mimic looutput */ 2294 if (ifp->if_flags & IFF_LOOPBACK) 2295 return (rt->rt_flags & RTF_HOST) ? 2296 EHOSTUNREACH : ENETUNREACH; 2297 else if (rt->rt_rmx.rmx_expire == 0 || 2298 time_uptime < rt->rt_rmx.rmx_expire) 2299 return (rt->rt_flags & RTF_GATEWAY) ? 2300 EHOSTUNREACH : EHOSTDOWN; 2301 } 2302 2303 return 0; 2304 } 2305 2306 void 2307 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2308 void *v, bool notify) 2309 { 2310 2311 for (;;) { 2312 int s; 2313 int error; 2314 struct rtentry *rt, *retrt = NULL; 2315 2316 RT_RLOCK(); 2317 s = splsoftnet(); 2318 rt = rtbl_search_matched_entry(family, f, v); 2319 if (rt == NULL) { 2320 splx(s); 2321 RT_UNLOCK(); 2322 return; 2323 } 2324 rt_ref(rt); 2325 RT_REFCNT_TRACE(rt); 2326 splx(s); 2327 RT_UNLOCK(); 2328 2329 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2330 rt_mask(rt), rt->rt_flags, &retrt); 2331 if (error == 0) { 2332 KASSERT(retrt == rt); 2333 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2334 if (notify) 2335 rt_newmsg(RTM_DELETE, retrt); 2336 retrt->rt_ifp = NULL; 2337 rt_unref(rt); 2338 RT_REFCNT_TRACE(rt); 2339 rt_free(retrt); 2340 } else if (error == ESRCH) { 2341 /* Someone deleted the entry already. */ 2342 rt_unref(rt); 2343 RT_REFCNT_TRACE(rt); 2344 } else { 2345 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2346 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2347 /* XXX how to treat this case? */ 2348 } 2349 } 2350 } 2351 2352 static int 2353 rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *), 2354 void *v) 2355 { 2356 2357 return rtbl_walktree(family, f, v); 2358 } 2359 2360 void 2361 rt_replace_ifa_matched_entries(sa_family_t family, 2362 int (*f)(struct rtentry *, void *), void *v, struct ifaddr *ifa) 2363 { 2364 2365 for (;;) { 2366 int s; 2367 #ifdef NET_MPSAFE 2368 int error; 2369 #endif 2370 struct rtentry *rt; 2371 2372 RT_RLOCK(); 2373 s = splsoftnet(); 2374 rt = rtbl_search_matched_entry(family, f, v); 2375 if (rt == NULL) { 2376 splx(s); 2377 RT_UNLOCK(); 2378 return; 2379 } 2380 rt_ref(rt); 2381 RT_REFCNT_TRACE(rt); 2382 splx(s); 2383 RT_UNLOCK(); 2384 2385 #ifdef NET_MPSAFE 2386 error = rt_update_prepare(rt); 2387 if (error == 0) { 2388 rt_replace_ifa(rt, ifa); 2389 rt_update_finish(rt); 2390 rt_newmsg(RTM_CHANGE, rt); 2391 } else { 2392 /* 2393 * If error != 0, the rtentry is being 2394 * destroyed, so doing nothing doesn't 2395 * matter. 2396 */ 2397 } 2398 #else 2399 rt_replace_ifa(rt, ifa); 2400 rt_newmsg(RTM_CHANGE, rt); 2401 #endif 2402 rt_unref(rt); 2403 RT_REFCNT_TRACE(rt); 2404 } 2405 } 2406 2407 int 2408 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2409 { 2410 int error; 2411 2412 RT_RLOCK(); 2413 error = rt_walktree_locked(family, f, v); 2414 RT_UNLOCK(); 2415 2416 return error; 2417 } 2418 2419 #ifdef DDB 2420 2421 #include <machine/db_machdep.h> 2422 #include <ddb/db_interface.h> 2423 #include <ddb/db_output.h> 2424 2425 #define rt_expire rt_rmx.rmx_expire 2426 2427 static void 2428 db_print_sa(const struct sockaddr *sa) 2429 { 2430 int len; 2431 const u_char *p; 2432 2433 if (sa == NULL) { 2434 db_printf("[NULL]"); 2435 return; 2436 } 2437 2438 p = (const u_char *)sa; 2439 len = sa->sa_len; 2440 db_printf("["); 2441 while (len > 0) { 2442 db_printf("%d", *p); 2443 p++; len--; 2444 if (len) db_printf(","); 2445 } 2446 db_printf("]\n"); 2447 } 2448 2449 static void 2450 db_print_ifa(struct ifaddr *ifa) 2451 { 2452 if (ifa == NULL) 2453 return; 2454 db_printf(" ifa_addr="); 2455 db_print_sa(ifa->ifa_addr); 2456 db_printf(" ifa_dsta="); 2457 db_print_sa(ifa->ifa_dstaddr); 2458 db_printf(" ifa_mask="); 2459 db_print_sa(ifa->ifa_netmask); 2460 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2461 ifa->ifa_flags, 2462 ifa->ifa_refcnt, 2463 ifa->ifa_metric); 2464 } 2465 2466 /* 2467 * Function to pass to rt_walktree(). 2468 * Return non-zero error to abort walk. 2469 */ 2470 static int 2471 db_show_rtentry(struct rtentry *rt, void *w) 2472 { 2473 db_printf("rtentry=%p", rt); 2474 2475 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2476 rt->rt_flags, rt->rt_refcnt, 2477 rt->rt_use, (uint64_t)rt->rt_expire); 2478 2479 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2480 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2481 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2482 2483 db_printf(" ifp=%p ", rt->rt_ifp); 2484 if (rt->rt_ifp) 2485 db_printf("(%s)", rt->rt_ifp->if_xname); 2486 else 2487 db_printf("(NULL)"); 2488 2489 db_printf(" ifa=%p\n", rt->rt_ifa); 2490 db_print_ifa(rt->rt_ifa); 2491 2492 db_printf(" gwroute=%p llinfo=%p\n", 2493 rt->rt_gwroute, rt->rt_llinfo); 2494 2495 return 0; 2496 } 2497 2498 /* 2499 * Function to print all the route trees. 2500 * Use this from ddb: "show routes" 2501 */ 2502 void 2503 db_show_routes(db_expr_t addr, bool have_addr, 2504 db_expr_t count, const char *modif) 2505 { 2506 2507 /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */ 2508 rt_walktree_locked(AF_INET, db_show_rtentry, NULL); 2509 } 2510 #endif 2511