1 /* $NetBSD: route.c,v 1.229 2020/04/08 03:37:14 knakahara Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.229 2020/04/08 03:37:14 knakahara Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/kernel.h> 114 #include <sys/ioctl.h> 115 #include <sys/pool.h> 116 #include <sys/kauth.h> 117 #include <sys/workqueue.h> 118 #include <sys/syslog.h> 119 #include <sys/rwlock.h> 120 #include <sys/mutex.h> 121 #include <sys/cpu.h> 122 #include <sys/kmem.h> 123 124 #include <net/if.h> 125 #include <net/if_dl.h> 126 #include <net/route.h> 127 #if defined(INET) || defined(INET6) 128 #include <net/if_llatbl.h> 129 #endif 130 131 #include <netinet/in.h> 132 #include <netinet/in_var.h> 133 134 #define PRESERVED_RTF (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK) 135 136 #ifdef RTFLUSH_DEBUG 137 #define rtcache_debug() __predict_false(_rtcache_debug) 138 #else /* RTFLUSH_DEBUG */ 139 #define rtcache_debug() 0 140 #endif /* RTFLUSH_DEBUG */ 141 142 #ifdef RT_DEBUG 143 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 144 __func__, __LINE__, (rt), (rt)->rt_refcnt) 145 #else 146 #define RT_REFCNT_TRACE(rt) do {} while (0) 147 #endif 148 149 #ifdef RT_DEBUG 150 #define dlog(level, fmt, args...) log(level, fmt, ##args) 151 #else 152 #define dlog(level, fmt, args...) do {} while (0) 153 #endif 154 155 struct rtstat rtstat; 156 157 static int rttrash; /* routes not in table but not freed */ 158 159 static struct pool rtentry_pool; 160 static struct pool rttimer_pool; 161 162 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 163 static struct workqueue *rt_timer_wq; 164 static struct work rt_timer_wk; 165 166 static void rt_timer_init(void); 167 static void rt_timer_queue_remove_all(struct rttimer_queue *); 168 static void rt_timer_remove_all(struct rtentry *); 169 static void rt_timer_timer(void *); 170 171 /* 172 * Locking notes: 173 * - The routing table is protected by a global rwlock 174 * - API: RT_RLOCK and friends 175 * - rtcaches are NOT protected by the framework 176 * - Callers must guarantee a rtcache isn't accessed simultaneously 177 * - How the constraint is guranteed in the wild 178 * - Protect a rtcache by a mutex (e.g., inp_route) 179 * - Make rtcache per-CPU and allow only accesses from softint 180 * (e.g., ipforward_rt_percpu) 181 * - References to a rtentry is managed by reference counting and psref 182 * - Reference counting is used for temporal reference when a rtentry 183 * is fetched from the routing table 184 * - psref is used for temporal reference when a rtentry is fetched 185 * from a rtcache 186 * - struct route (rtcache) has struct psref, so we cannot obtain 187 * a reference twice on the same struct route 188 * - Before destroying or updating a rtentry, we have to wait for 189 * all references left (see below for details) 190 * - APIs 191 * - An obtained rtentry via rtalloc1 or rtrequest* must be 192 * unreferenced by rt_unref 193 * - An obtained rtentry via rtcache_* must be unreferenced by 194 * rtcache_unref 195 * - TODO: once we get a lockless routing table, we should use only 196 * psref for rtentries 197 * - rtentry destruction 198 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 199 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 200 * has a responsibility to destroy the rtentry by itself by calling 201 * rt_free 202 * - If not, rtrequest itself does that 203 * - If rt_free is called in softint, the actual destruction routine is 204 * deferred to a workqueue 205 * - rtentry update 206 * - When updating a rtentry, RTF_UPDATING flag is set 207 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 208 * the routing table or a rtcache results in either of the following 209 * cases: 210 * - if the caller runs in softint, the caller fails to fetch 211 * - otherwise, the caller waits for the update completed and retries 212 * to fetch (probably succeed to fetch for the second time) 213 * - rtcache invalidation 214 * - There is a global generation counter that is incremented when 215 * any routes have been added or deleted 216 * - When a rtcache caches a rtentry into itself, it also stores 217 * a snapshot of the generation counter 218 * - If the snapshot equals to the global counter, the cache is valid, 219 * otherwise the cache is invalidated 220 */ 221 222 /* 223 * Global lock for the routing table. 224 */ 225 static krwlock_t rt_lock __cacheline_aligned; 226 #ifdef NET_MPSAFE 227 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 228 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 229 #define RT_UNLOCK() rw_exit(&rt_lock) 230 #define RT_WLOCKED() rw_write_held(&rt_lock) 231 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 232 #else 233 #define RT_RLOCK() do {} while (0) 234 #define RT_WLOCK() do {} while (0) 235 #define RT_UNLOCK() do {} while (0) 236 #define RT_WLOCKED() true 237 #define RT_ASSERT_WLOCK() do {} while (0) 238 #endif 239 240 static uint64_t rtcache_generation; 241 242 /* 243 * mutex and cv that are used to wait for references to a rtentry left 244 * before updating the rtentry. 245 */ 246 static struct { 247 kmutex_t lock; 248 kcondvar_t cv; 249 bool ongoing; 250 const struct lwp *lwp; 251 } rt_update_global __cacheline_aligned; 252 253 /* 254 * A workqueue and stuff that are used to defer the destruction routine 255 * of rtentries. 256 */ 257 static struct { 258 struct workqueue *wq; 259 struct work wk; 260 kmutex_t lock; 261 SLIST_HEAD(, rtentry) queue; 262 bool enqueued; 263 } rt_free_global __cacheline_aligned; 264 265 /* psref for rtentry */ 266 static struct psref_class *rt_psref_class __read_mostly; 267 268 #ifdef RTFLUSH_DEBUG 269 static int _rtcache_debug = 0; 270 #endif /* RTFLUSH_DEBUG */ 271 272 static kauth_listener_t route_listener; 273 274 static int rtdeletemsg(struct rtentry *); 275 276 static void rt_maskedcopy(const struct sockaddr *, 277 struct sockaddr *, const struct sockaddr *); 278 279 static void rtcache_invalidate(void); 280 281 static void rt_ref(struct rtentry *); 282 283 static struct rtentry * 284 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 285 286 static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *); 287 static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *); 288 static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *, 289 const struct sockaddr *, struct psref *); 290 291 static void rtcache_ref(struct rtentry *, struct route *); 292 293 #ifdef NET_MPSAFE 294 static void rt_update_wait(void); 295 #endif 296 297 static bool rt_wait_ok(void); 298 static void rt_wait_refcnt(const char *, struct rtentry *, int); 299 static void rt_wait_psref(struct rtentry *); 300 301 #ifdef DDB 302 static void db_print_sa(const struct sockaddr *); 303 static void db_print_ifa(struct ifaddr *); 304 static int db_show_rtentry(struct rtentry *, void *); 305 #endif 306 307 #ifdef RTFLUSH_DEBUG 308 static void sysctl_net_rtcache_setup(struct sysctllog **); 309 static void 310 sysctl_net_rtcache_setup(struct sysctllog **clog) 311 { 312 const struct sysctlnode *rnode; 313 314 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 315 CTLTYPE_NODE, 316 "rtcache", SYSCTL_DESCR("Route cache related settings"), 317 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 318 return; 319 if (sysctl_createv(clog, 0, &rnode, &rnode, 320 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 321 "debug", SYSCTL_DESCR("Debug route caches"), 322 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 323 return; 324 } 325 #endif /* RTFLUSH_DEBUG */ 326 327 static inline void 328 rt_destroy(struct rtentry *rt) 329 { 330 if (rt->_rt_key != NULL) 331 sockaddr_free(rt->_rt_key); 332 if (rt->rt_gateway != NULL) 333 sockaddr_free(rt->rt_gateway); 334 if (rt_gettag(rt) != NULL) 335 sockaddr_free(rt_gettag(rt)); 336 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 337 } 338 339 static inline const struct sockaddr * 340 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 341 { 342 if (rt->_rt_key == key) 343 goto out; 344 345 if (rt->_rt_key != NULL) 346 sockaddr_free(rt->_rt_key); 347 rt->_rt_key = sockaddr_dup(key, flags); 348 out: 349 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 350 return rt->_rt_key; 351 } 352 353 struct ifaddr * 354 rt_get_ifa(struct rtentry *rt) 355 { 356 struct ifaddr *ifa; 357 358 ifa = rt->rt_ifa; 359 if (ifa->ifa_getifa == NULL) 360 return ifa; 361 #if 0 362 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 363 return ifa; 364 #endif 365 else { 366 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 367 if (ifa == NULL) 368 return NULL; 369 rt_replace_ifa(rt, ifa); 370 return ifa; 371 } 372 } 373 374 static void 375 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 376 { 377 rt->rt_ifa = ifa; 378 if (ifa->ifa_seqno != NULL) 379 rt->rt_ifa_seqno = *ifa->ifa_seqno; 380 } 381 382 /* 383 * Is this route the connected route for the ifa? 384 */ 385 static int 386 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 387 { 388 const struct sockaddr *key, *dst, *odst; 389 struct sockaddr_storage maskeddst; 390 391 key = rt_getkey(rt); 392 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 393 if (dst == NULL || 394 dst->sa_family != key->sa_family || 395 dst->sa_len != key->sa_len) 396 return 0; 397 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 398 odst = dst; 399 dst = (struct sockaddr *)&maskeddst; 400 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 401 ifa->ifa_netmask); 402 } 403 return (memcmp(dst, key, dst->sa_len) == 0); 404 } 405 406 void 407 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 408 { 409 struct ifaddr *old; 410 411 if (rt->rt_ifa == ifa) 412 return; 413 414 if (rt->rt_ifa != ifa && 415 rt->rt_ifa->ifa_flags & IFA_ROUTE && 416 rt_ifa_connected(rt, rt->rt_ifa)) 417 { 418 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 419 "replace deleted IFA_ROUTE\n", 420 (void *)rt->_rt_key, (void *)rt->rt_ifa); 421 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 422 if (rt_ifa_connected(rt, ifa)) { 423 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 424 "replace added IFA_ROUTE\n", 425 (void *)rt->_rt_key, (void *)ifa); 426 ifa->ifa_flags |= IFA_ROUTE; 427 } 428 } 429 430 ifaref(ifa); 431 old = rt->rt_ifa; 432 rt_set_ifa1(rt, ifa); 433 ifafree(old); 434 } 435 436 static void 437 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 438 { 439 ifaref(ifa); 440 rt_set_ifa1(rt, ifa); 441 } 442 443 static int 444 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 445 void *arg0, void *arg1, void *arg2, void *arg3) 446 { 447 struct rt_msghdr *rtm; 448 int result; 449 450 result = KAUTH_RESULT_DEFER; 451 rtm = arg1; 452 453 if (action != KAUTH_NETWORK_ROUTE) 454 return result; 455 456 if (rtm->rtm_type == RTM_GET) 457 result = KAUTH_RESULT_ALLOW; 458 459 return result; 460 } 461 462 static void rt_free_work(struct work *, void *); 463 464 void 465 rt_init(void) 466 { 467 int error; 468 469 #ifdef RTFLUSH_DEBUG 470 sysctl_net_rtcache_setup(NULL); 471 #endif 472 473 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 474 SLIST_INIT(&rt_free_global.queue); 475 rt_free_global.enqueued = false; 476 477 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 478 479 error = workqueue_create(&rt_free_global.wq, "rt_free", 480 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 481 if (error) 482 panic("%s: workqueue_create failed (%d)\n", __func__, error); 483 484 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 485 cv_init(&rt_update_global.cv, "rt_update"); 486 487 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 488 NULL, IPL_SOFTNET); 489 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 490 NULL, IPL_SOFTNET); 491 492 rn_init(); /* initialize all zeroes, all ones, mask table */ 493 rtbl_init(); 494 495 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 496 route_listener_cb, NULL); 497 } 498 499 static void 500 rtcache_invalidate(void) 501 { 502 503 RT_ASSERT_WLOCK(); 504 505 if (rtcache_debug()) 506 printf("%s: enter\n", __func__); 507 508 rtcache_generation++; 509 } 510 511 #ifdef RT_DEBUG 512 static void 513 dump_rt(const struct rtentry *rt) 514 { 515 char buf[512]; 516 517 log(LOG_DEBUG, "rt: "); 518 log(LOG_DEBUG, "p=%p ", rt); 519 if (rt->_rt_key == NULL) { 520 log(LOG_DEBUG, "dst=(NULL) "); 521 } else { 522 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 523 log(LOG_DEBUG, "dst=%s ", buf); 524 } 525 if (rt->rt_gateway == NULL) { 526 log(LOG_DEBUG, "gw=(NULL) "); 527 } else { 528 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 529 log(LOG_DEBUG, "gw=%s ", buf); 530 } 531 log(LOG_DEBUG, "flags=%x ", rt->rt_flags); 532 if (rt->rt_ifp == NULL) { 533 log(LOG_DEBUG, "if=(NULL) "); 534 } else { 535 log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname); 536 } 537 log(LOG_DEBUG, "\n"); 538 } 539 #endif /* RT_DEBUG */ 540 541 /* 542 * Packet routing routines. If success, refcnt of a returned rtentry 543 * will be incremented. The caller has to rtfree it by itself. 544 */ 545 struct rtentry * 546 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 547 bool wlock) 548 { 549 rtbl_t *rtbl; 550 struct rtentry *rt; 551 int s; 552 553 #ifdef NET_MPSAFE 554 retry: 555 #endif 556 s = splsoftnet(); 557 rtbl = rt_gettable(dst->sa_family); 558 if (rtbl == NULL) 559 goto miss; 560 561 rt = rt_matchaddr(rtbl, dst); 562 if (rt == NULL) 563 goto miss; 564 565 if (!ISSET(rt->rt_flags, RTF_UP)) 566 goto miss; 567 568 #ifdef NET_MPSAFE 569 if (ISSET(rt->rt_flags, RTF_UPDATING) && 570 /* XXX updater should be always able to acquire */ 571 curlwp != rt_update_global.lwp) { 572 if (!wait_ok || !rt_wait_ok()) 573 goto miss; 574 RT_UNLOCK(); 575 splx(s); 576 577 /* We can wait until the update is complete */ 578 rt_update_wait(); 579 580 if (wlock) 581 RT_WLOCK(); 582 else 583 RT_RLOCK(); 584 goto retry; 585 } 586 #endif /* NET_MPSAFE */ 587 588 rt_ref(rt); 589 RT_REFCNT_TRACE(rt); 590 591 splx(s); 592 return rt; 593 miss: 594 rtstat.rts_unreach++; 595 if (report) { 596 struct rt_addrinfo info; 597 598 memset(&info, 0, sizeof(info)); 599 info.rti_info[RTAX_DST] = dst; 600 rt_missmsg(RTM_MISS, &info, 0, 0); 601 } 602 splx(s); 603 return NULL; 604 } 605 606 struct rtentry * 607 rtalloc1(const struct sockaddr *dst, int report) 608 { 609 struct rtentry *rt; 610 611 RT_RLOCK(); 612 rt = rtalloc1_locked(dst, report, true, false); 613 RT_UNLOCK(); 614 615 return rt; 616 } 617 618 static void 619 rt_ref(struct rtentry *rt) 620 { 621 622 KASSERTMSG(rt->rt_refcnt >= 0, "rt_refcnt=%d", rt->rt_refcnt); 623 atomic_inc_uint(&rt->rt_refcnt); 624 } 625 626 void 627 rt_unref(struct rtentry *rt) 628 { 629 630 KASSERT(rt != NULL); 631 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 632 633 atomic_dec_uint(&rt->rt_refcnt); 634 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 635 mutex_enter(&rt_free_global.lock); 636 cv_broadcast(&rt->rt_cv); 637 mutex_exit(&rt_free_global.lock); 638 } 639 } 640 641 static bool 642 rt_wait_ok(void) 643 { 644 645 KASSERT(!cpu_intr_p()); 646 return !cpu_softintr_p(); 647 } 648 649 void 650 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 651 { 652 mutex_enter(&rt_free_global.lock); 653 while (rt->rt_refcnt > cnt) { 654 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 655 __func__, title, rt->rt_refcnt); 656 cv_wait(&rt->rt_cv, &rt_free_global.lock); 657 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 658 __func__, title, rt->rt_refcnt); 659 } 660 mutex_exit(&rt_free_global.lock); 661 } 662 663 void 664 rt_wait_psref(struct rtentry *rt) 665 { 666 667 psref_target_destroy(&rt->rt_psref, rt_psref_class); 668 psref_target_init(&rt->rt_psref, rt_psref_class); 669 } 670 671 static void 672 _rt_free(struct rtentry *rt) 673 { 674 struct ifaddr *ifa; 675 676 /* 677 * Need to avoid a deadlock on rt_wait_refcnt of update 678 * and a conflict on psref_target_destroy of update. 679 */ 680 #ifdef NET_MPSAFE 681 rt_update_wait(); 682 #endif 683 684 RT_REFCNT_TRACE(rt); 685 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 686 rt_wait_refcnt("free", rt, 0); 687 #ifdef NET_MPSAFE 688 psref_target_destroy(&rt->rt_psref, rt_psref_class); 689 #endif 690 691 rt_assert_inactive(rt); 692 rttrash--; 693 ifa = rt->rt_ifa; 694 rt->rt_ifa = NULL; 695 ifafree(ifa); 696 rt->rt_ifp = NULL; 697 cv_destroy(&rt->rt_cv); 698 rt_destroy(rt); 699 pool_put(&rtentry_pool, rt); 700 } 701 702 static void 703 rt_free_work(struct work *wk, void *arg) 704 { 705 706 for (;;) { 707 struct rtentry *rt; 708 709 mutex_enter(&rt_free_global.lock); 710 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) { 711 rt_free_global.enqueued = false; 712 mutex_exit(&rt_free_global.lock); 713 return; 714 } 715 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free); 716 mutex_exit(&rt_free_global.lock); 717 atomic_dec_uint(&rt->rt_refcnt); 718 _rt_free(rt); 719 } 720 } 721 722 void 723 rt_free(struct rtentry *rt) 724 { 725 726 KASSERTMSG(rt->rt_refcnt > 0, "rt_refcnt=%d", rt->rt_refcnt); 727 if (rt_wait_ok()) { 728 atomic_dec_uint(&rt->rt_refcnt); 729 _rt_free(rt); 730 return; 731 } 732 733 mutex_enter(&rt_free_global.lock); 734 /* No need to add a reference here. */ 735 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free); 736 if (!rt_free_global.enqueued) { 737 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 738 rt_free_global.enqueued = true; 739 } 740 mutex_exit(&rt_free_global.lock); 741 } 742 743 #ifdef NET_MPSAFE 744 static void 745 rt_update_wait(void) 746 { 747 748 mutex_enter(&rt_update_global.lock); 749 while (rt_update_global.ongoing) { 750 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 751 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 752 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 753 } 754 mutex_exit(&rt_update_global.lock); 755 } 756 #endif 757 758 int 759 rt_update_prepare(struct rtentry *rt) 760 { 761 762 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 763 764 RT_WLOCK(); 765 /* If the entry is being destroyed, don't proceed the update. */ 766 if (!ISSET(rt->rt_flags, RTF_UP)) { 767 RT_UNLOCK(); 768 return ESRCH; 769 } 770 rt->rt_flags |= RTF_UPDATING; 771 RT_UNLOCK(); 772 773 mutex_enter(&rt_update_global.lock); 774 while (rt_update_global.ongoing) { 775 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 776 __func__, rt, curlwp); 777 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 778 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 779 __func__, rt, curlwp); 780 } 781 rt_update_global.ongoing = true; 782 /* XXX need it to avoid rt_update_wait by updater itself. */ 783 rt_update_global.lwp = curlwp; 784 mutex_exit(&rt_update_global.lock); 785 786 rt_wait_refcnt("update", rt, 1); 787 rt_wait_psref(rt); 788 789 return 0; 790 } 791 792 void 793 rt_update_finish(struct rtentry *rt) 794 { 795 796 RT_WLOCK(); 797 rt->rt_flags &= ~RTF_UPDATING; 798 RT_UNLOCK(); 799 800 mutex_enter(&rt_update_global.lock); 801 rt_update_global.ongoing = false; 802 rt_update_global.lwp = NULL; 803 cv_broadcast(&rt_update_global.cv); 804 mutex_exit(&rt_update_global.lock); 805 806 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 807 } 808 809 /* 810 * Force a routing table entry to the specified 811 * destination to go through the given gateway. 812 * Normally called as a result of a routing redirect 813 * message from the network layer. 814 * 815 * N.B.: must be called at splsoftnet 816 */ 817 void 818 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 819 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 820 struct rtentry **rtp) 821 { 822 struct rtentry *rt; 823 int error = 0; 824 uint64_t *stat = NULL; 825 struct rt_addrinfo info; 826 struct ifaddr *ifa; 827 struct psref psref; 828 829 /* verify the gateway is directly reachable */ 830 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 831 error = ENETUNREACH; 832 goto out; 833 } 834 rt = rtalloc1(dst, 0); 835 /* 836 * If the redirect isn't from our current router for this dst, 837 * it's either old or wrong. If it redirects us to ourselves, 838 * we have a routing loop, perhaps as a result of an interface 839 * going down recently. 840 */ 841 if (!(flags & RTF_DONE) && rt && 842 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 843 error = EINVAL; 844 else { 845 int s = pserialize_read_enter(); 846 struct ifaddr *_ifa; 847 848 _ifa = ifa_ifwithaddr(gateway); 849 if (_ifa != NULL) 850 error = EHOSTUNREACH; 851 pserialize_read_exit(s); 852 } 853 if (error) 854 goto done; 855 /* 856 * Create a new entry if we just got back a wildcard entry 857 * or the lookup failed. This is necessary for hosts 858 * which use routing redirects generated by smart gateways 859 * to dynamically build the routing tables. 860 */ 861 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 862 goto create; 863 /* 864 * Don't listen to the redirect if it's 865 * for a route to an interface. 866 */ 867 if (rt->rt_flags & RTF_GATEWAY) { 868 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 869 /* 870 * Changing from route to net => route to host. 871 * Create new route, rather than smashing route to net. 872 */ 873 create: 874 if (rt != NULL) 875 rt_unref(rt); 876 flags |= RTF_GATEWAY | RTF_DYNAMIC; 877 memset(&info, 0, sizeof(info)); 878 info.rti_info[RTAX_DST] = dst; 879 info.rti_info[RTAX_GATEWAY] = gateway; 880 info.rti_info[RTAX_NETMASK] = netmask; 881 info.rti_ifa = ifa; 882 info.rti_flags = flags; 883 rt = NULL; 884 error = rtrequest1(RTM_ADD, &info, &rt); 885 if (rt != NULL) 886 flags = rt->rt_flags; 887 stat = &rtstat.rts_dynamic; 888 } else { 889 /* 890 * Smash the current notion of the gateway to 891 * this destination. Should check about netmask!!! 892 */ 893 #ifdef NET_MPSAFE 894 KASSERT(!cpu_softintr_p()); 895 896 error = rt_update_prepare(rt); 897 if (error == 0) { 898 #endif 899 RT_WLOCK(); 900 error = rt_setgate(rt, gateway); 901 if (error == 0) { 902 rt->rt_flags |= RTF_MODIFIED; 903 flags |= RTF_MODIFIED; 904 } 905 RT_UNLOCK(); 906 #ifdef NET_MPSAFE 907 rt_update_finish(rt); 908 } else { 909 /* 910 * If error != 0, the rtentry is being 911 * destroyed, so doing nothing doesn't 912 * matter. 913 */ 914 } 915 #endif 916 stat = &rtstat.rts_newgateway; 917 } 918 } else 919 error = EHOSTUNREACH; 920 done: 921 if (rt) { 922 if (rtp != NULL && !error) 923 *rtp = rt; 924 else 925 rt_unref(rt); 926 } 927 out: 928 if (error) 929 rtstat.rts_badredirect++; 930 else if (stat != NULL) 931 (*stat)++; 932 memset(&info, 0, sizeof(info)); 933 info.rti_info[RTAX_DST] = dst; 934 info.rti_info[RTAX_GATEWAY] = gateway; 935 info.rti_info[RTAX_NETMASK] = netmask; 936 info.rti_info[RTAX_AUTHOR] = src; 937 rt_missmsg(RTM_REDIRECT, &info, flags, error); 938 ifa_release(ifa, &psref); 939 } 940 941 /* 942 * Delete a route and generate a message. 943 * It doesn't free a passed rt. 944 */ 945 static int 946 rtdeletemsg(struct rtentry *rt) 947 { 948 int error; 949 struct rt_addrinfo info; 950 struct rtentry *retrt; 951 952 /* 953 * Request the new route so that the entry is not actually 954 * deleted. That will allow the information being reported to 955 * be accurate (and consistent with route_output()). 956 */ 957 memset(&info, 0, sizeof(info)); 958 info.rti_info[RTAX_DST] = rt_getkey(rt); 959 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 960 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 961 info.rti_flags = rt->rt_flags; 962 error = rtrequest1(RTM_DELETE, &info, &retrt); 963 964 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 965 966 return error; 967 } 968 969 static struct ifaddr * 970 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 971 const struct sockaddr *gateway, struct psref *psref) 972 { 973 struct ifaddr *ifa = NULL; 974 975 if ((flags & RTF_GATEWAY) == 0) { 976 /* 977 * If we are adding a route to an interface, 978 * and the interface is a pt to pt link 979 * we should search for the destination 980 * as our clue to the interface. Otherwise 981 * we can use the local address. 982 */ 983 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 984 ifa = ifa_ifwithdstaddr_psref(dst, psref); 985 if (ifa == NULL) 986 ifa = ifa_ifwithaddr_psref(gateway, psref); 987 } else { 988 /* 989 * If we are adding a route to a remote net 990 * or host, the gateway may still be on the 991 * other end of a pt to pt link. 992 */ 993 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 994 } 995 if (ifa == NULL) 996 ifa = ifa_ifwithnet_psref(gateway, psref); 997 if (ifa == NULL) { 998 int s; 999 struct rtentry *rt; 1000 1001 rt = rtalloc1_locked(gateway, 0, true, true); 1002 if (rt == NULL) 1003 return NULL; 1004 if (rt->rt_flags & RTF_GATEWAY) { 1005 rt_unref(rt); 1006 return NULL; 1007 } 1008 /* 1009 * Just in case. May not need to do this workaround. 1010 * Revisit when working on rtentry MP-ification. 1011 */ 1012 s = pserialize_read_enter(); 1013 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1014 if (ifa == rt->rt_ifa) 1015 break; 1016 } 1017 if (ifa != NULL) 1018 ifa_acquire(ifa, psref); 1019 pserialize_read_exit(s); 1020 rt_unref(rt); 1021 if (ifa == NULL) 1022 return NULL; 1023 } 1024 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1025 struct ifaddr *nifa; 1026 int s; 1027 1028 s = pserialize_read_enter(); 1029 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1030 if (nifa != NULL) { 1031 ifa_release(ifa, psref); 1032 ifa_acquire(nifa, psref); 1033 ifa = nifa; 1034 } 1035 pserialize_read_exit(s); 1036 } 1037 return ifa; 1038 } 1039 1040 /* 1041 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1042 * The caller has to rtfree it by itself. 1043 */ 1044 int 1045 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1046 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1047 { 1048 struct rt_addrinfo info; 1049 1050 memset(&info, 0, sizeof(info)); 1051 info.rti_flags = flags; 1052 info.rti_info[RTAX_DST] = dst; 1053 info.rti_info[RTAX_GATEWAY] = gateway; 1054 info.rti_info[RTAX_NETMASK] = netmask; 1055 return rtrequest1(req, &info, ret_nrt); 1056 } 1057 1058 /* 1059 * It's a utility function to add/remove a route to/from the routing table 1060 * and tell user processes the addition/removal on success. 1061 */ 1062 int 1063 rtrequest_newmsg(const int req, const struct sockaddr *dst, 1064 const struct sockaddr *gateway, const struct sockaddr *netmask, 1065 const int flags) 1066 { 1067 int error; 1068 struct rtentry *ret_nrt = NULL; 1069 1070 KASSERT(req == RTM_ADD || req == RTM_DELETE); 1071 1072 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt); 1073 if (error != 0) 1074 return error; 1075 1076 KASSERT(ret_nrt != NULL); 1077 1078 rt_newmsg(req, ret_nrt); /* tell user process */ 1079 if (req == RTM_DELETE) 1080 rt_free(ret_nrt); 1081 else 1082 rt_unref(ret_nrt); 1083 1084 return 0; 1085 } 1086 1087 static struct ifnet * 1088 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1089 { 1090 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1091 1092 if (info->rti_ifp != NULL) 1093 return NULL; 1094 /* 1095 * ifp may be specified by sockaddr_dl when protocol address 1096 * is ambiguous 1097 */ 1098 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1099 struct ifaddr *ifa; 1100 int s = pserialize_read_enter(); 1101 1102 ifa = ifa_ifwithnet(ifpaddr); 1103 if (ifa != NULL) 1104 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1105 psref); 1106 pserialize_read_exit(s); 1107 } 1108 1109 return info->rti_ifp; 1110 } 1111 1112 static struct ifaddr * 1113 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1114 { 1115 struct ifaddr *ifa = NULL; 1116 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1117 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1118 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1119 int flags = info->rti_flags; 1120 const struct sockaddr *sa; 1121 1122 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1123 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1124 if (ifa != NULL) 1125 goto got; 1126 } 1127 1128 sa = ifaaddr != NULL ? ifaaddr : 1129 (gateway != NULL ? gateway : dst); 1130 if (sa != NULL && info->rti_ifp != NULL) 1131 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1132 else if (dst != NULL && gateway != NULL) 1133 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1134 else if (sa != NULL) 1135 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1136 if (ifa == NULL) 1137 return NULL; 1138 got: 1139 if (ifa->ifa_getifa != NULL) { 1140 /* FIXME ifa_getifa is NOMPSAFE */ 1141 ifa = (*ifa->ifa_getifa)(ifa, dst); 1142 if (ifa == NULL) 1143 return NULL; 1144 ifa_acquire(ifa, psref); 1145 } 1146 info->rti_ifa = ifa; 1147 if (info->rti_ifp == NULL) 1148 info->rti_ifp = ifa->ifa_ifp; 1149 return ifa; 1150 } 1151 1152 /* 1153 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1154 * The caller has to rtfree it by itself. 1155 */ 1156 int 1157 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1158 { 1159 int s = splsoftnet(), ss; 1160 int error = 0, rc; 1161 struct rtentry *rt; 1162 rtbl_t *rtbl; 1163 struct ifaddr *ifa = NULL; 1164 struct sockaddr_storage maskeddst; 1165 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1166 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1167 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1168 int flags = info->rti_flags; 1169 struct psref psref_ifp, psref_ifa; 1170 int bound = 0; 1171 struct ifnet *ifp = NULL; 1172 bool need_to_release_ifa = true; 1173 bool need_unlock = true; 1174 #define senderr(x) { error = x ; goto bad; } 1175 1176 RT_WLOCK(); 1177 1178 bound = curlwp_bind(); 1179 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1180 senderr(ESRCH); 1181 if (flags & RTF_HOST) 1182 netmask = NULL; 1183 switch (req) { 1184 case RTM_DELETE: 1185 if (netmask) { 1186 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1187 netmask); 1188 dst = (struct sockaddr *)&maskeddst; 1189 } 1190 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1191 senderr(ESRCH); 1192 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1193 senderr(ESRCH); 1194 rt->rt_flags &= ~RTF_UP; 1195 ifa = rt->rt_ifa; 1196 if (ifa->ifa_flags & IFA_ROUTE && 1197 rt_ifa_connected(rt, ifa)) { 1198 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1199 "deleted IFA_ROUTE\n", 1200 (void *)rt->_rt_key, (void *)ifa); 1201 ifa->ifa_flags &= ~IFA_ROUTE; 1202 } 1203 if (ifa->ifa_rtrequest) 1204 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1205 ifa = NULL; 1206 rttrash++; 1207 if (ret_nrt) { 1208 *ret_nrt = rt; 1209 rt_ref(rt); 1210 RT_REFCNT_TRACE(rt); 1211 } 1212 rtcache_invalidate(); 1213 RT_UNLOCK(); 1214 need_unlock = false; 1215 rt_timer_remove_all(rt); 1216 #if defined(INET) || defined(INET6) 1217 if (netmask != NULL) 1218 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1219 #endif 1220 if (ret_nrt == NULL) { 1221 /* Adjust the refcount */ 1222 rt_ref(rt); 1223 RT_REFCNT_TRACE(rt); 1224 rt_free(rt); 1225 } 1226 break; 1227 1228 case RTM_ADD: 1229 if (info->rti_ifa == NULL) { 1230 ifp = rt_getifp(info, &psref_ifp); 1231 ifa = rt_getifa(info, &psref_ifa); 1232 if (ifa == NULL) 1233 senderr(ENETUNREACH); 1234 } else { 1235 /* Caller should have a reference of ifa */ 1236 ifa = info->rti_ifa; 1237 need_to_release_ifa = false; 1238 } 1239 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1240 if (rt == NULL) 1241 senderr(ENOBUFS); 1242 memset(rt, 0, sizeof(*rt)); 1243 rt->rt_flags = RTF_UP | (flags & ~RTF_DONTCHANGEIFA); 1244 LIST_INIT(&rt->rt_timer); 1245 1246 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1247 if (netmask) { 1248 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1249 netmask); 1250 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1251 } else { 1252 rt_setkey(rt, dst, M_NOWAIT); 1253 } 1254 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1255 if (rt_getkey(rt) == NULL || 1256 rt_setgate(rt, gateway) != 0) { 1257 pool_put(&rtentry_pool, rt); 1258 senderr(ENOBUFS); 1259 } 1260 1261 rt_set_ifa(rt, ifa); 1262 if (info->rti_info[RTAX_TAG] != NULL) { 1263 const struct sockaddr *tag; 1264 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1265 if (tag == NULL) 1266 senderr(ENOBUFS); 1267 } 1268 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1269 1270 ss = pserialize_read_enter(); 1271 if (info->rti_info[RTAX_IFP] != NULL) { 1272 struct ifaddr *ifa2; 1273 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1274 if (ifa2 != NULL) 1275 rt->rt_ifp = ifa2->ifa_ifp; 1276 else 1277 rt->rt_ifp = ifa->ifa_ifp; 1278 } else 1279 rt->rt_ifp = ifa->ifa_ifp; 1280 pserialize_read_exit(ss); 1281 cv_init(&rt->rt_cv, "rtentry"); 1282 psref_target_init(&rt->rt_psref, rt_psref_class); 1283 1284 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1285 rc = rt_addaddr(rtbl, rt, netmask); 1286 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1287 if (rc != 0) { 1288 ifafree(ifa); /* for rt_set_ifa above */ 1289 cv_destroy(&rt->rt_cv); 1290 rt_destroy(rt); 1291 pool_put(&rtentry_pool, rt); 1292 senderr(rc); 1293 } 1294 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1295 if (ifa->ifa_rtrequest) 1296 ifa->ifa_rtrequest(req, rt, info); 1297 if (need_to_release_ifa) 1298 ifa_release(ifa, &psref_ifa); 1299 ifa = NULL; 1300 if_put(ifp, &psref_ifp); 1301 ifp = NULL; 1302 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1303 if (ret_nrt) { 1304 *ret_nrt = rt; 1305 rt_ref(rt); 1306 RT_REFCNT_TRACE(rt); 1307 } 1308 rtcache_invalidate(); 1309 RT_UNLOCK(); 1310 need_unlock = false; 1311 break; 1312 case RTM_GET: 1313 if (netmask != NULL) { 1314 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1315 netmask); 1316 dst = (struct sockaddr *)&maskeddst; 1317 } 1318 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1319 senderr(ESRCH); 1320 if (ret_nrt != NULL) { 1321 *ret_nrt = rt; 1322 rt_ref(rt); 1323 RT_REFCNT_TRACE(rt); 1324 } 1325 break; 1326 } 1327 bad: 1328 if (need_to_release_ifa) 1329 ifa_release(ifa, &psref_ifa); 1330 if_put(ifp, &psref_ifp); 1331 curlwp_bindx(bound); 1332 if (need_unlock) 1333 RT_UNLOCK(); 1334 splx(s); 1335 return error; 1336 } 1337 1338 int 1339 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1340 { 1341 struct sockaddr *new, *old; 1342 1343 KASSERT(RT_WLOCKED()); 1344 KASSERT(rt->_rt_key != NULL); 1345 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1346 1347 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1348 if (new == NULL) 1349 return ENOMEM; 1350 1351 old = rt->rt_gateway; 1352 rt->rt_gateway = new; 1353 if (old != NULL) 1354 sockaddr_free(old); 1355 1356 KASSERT(rt->_rt_key != NULL); 1357 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1358 1359 if (rt->rt_flags & RTF_GATEWAY) { 1360 struct rtentry *gwrt; 1361 1362 gwrt = rtalloc1_locked(gate, 1, false, true); 1363 /* 1364 * If we switched gateways, grab the MTU from the new 1365 * gateway route if the current MTU, if the current MTU is 1366 * greater than the MTU of gateway. 1367 * Note that, if the MTU of gateway is 0, we will reset the 1368 * MTU of the route to run PMTUD again from scratch. XXX 1369 */ 1370 if (gwrt != NULL) { 1371 KASSERT(gwrt->_rt_key != NULL); 1372 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1373 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1374 rt->rt_rmx.rmx_mtu && 1375 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1376 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1377 } 1378 rt_unref(gwrt); 1379 } 1380 } 1381 KASSERT(rt->_rt_key != NULL); 1382 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1383 return 0; 1384 } 1385 1386 static struct ifaddr * 1387 rt_update_get_ifa(const struct rt_addrinfo *info, const struct rtentry *rt, 1388 struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref) 1389 { 1390 struct ifaddr *ifa = NULL; 1391 1392 *ifp = NULL; 1393 if (info->rti_info[RTAX_IFP] != NULL) { 1394 ifa = ifa_ifwithnet_psref(info->rti_info[RTAX_IFP], psref); 1395 if (ifa == NULL) 1396 goto next; 1397 *ifp = ifa->ifa_ifp; 1398 if_acquire(*ifp, psref_ifp); 1399 if (info->rti_info[RTAX_IFA] == NULL && 1400 info->rti_info[RTAX_GATEWAY] == NULL) 1401 goto out; 1402 ifa_release(ifa, psref); 1403 if (info->rti_info[RTAX_IFA] == NULL) { 1404 /* route change <dst> <gw> -ifp <if> */ 1405 ifa = ifaof_ifpforaddr_psref( 1406 info->rti_info[RTAX_GATEWAY], *ifp, psref); 1407 } else { 1408 /* route change <dst> -ifp <if> -ifa <addr> */ 1409 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], 1410 psref); 1411 if (ifa != NULL) 1412 goto out; 1413 ifa = ifaof_ifpforaddr_psref(info->rti_info[RTAX_IFA], 1414 *ifp, psref); 1415 } 1416 goto out; 1417 } 1418 next: 1419 if (info->rti_info[RTAX_IFA] != NULL) { 1420 /* route change <dst> <gw> -ifa <addr> */ 1421 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], psref); 1422 if (ifa != NULL) 1423 goto out; 1424 } 1425 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1426 /* route change <dst> <gw> */ 1427 ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt), 1428 info->rti_info[RTAX_GATEWAY], psref); 1429 } 1430 out: 1431 if (ifa != NULL && *ifp == NULL) { 1432 *ifp = ifa->ifa_ifp; 1433 if_acquire(*ifp, psref_ifp); 1434 } 1435 if (ifa == NULL && *ifp != NULL) { 1436 if_put(*ifp, psref_ifp); 1437 *ifp = NULL; 1438 } 1439 return ifa; 1440 } 1441 1442 int 1443 rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm) 1444 { 1445 int error = 0; 1446 struct ifnet *ifp = NULL, *new_ifp = NULL; 1447 struct ifaddr *ifa = NULL, *new_ifa; 1448 struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp; 1449 bool newgw, ifp_changed = false; 1450 1451 RT_WLOCK(); 1452 /* 1453 * New gateway could require new ifaddr, ifp; 1454 * flags may also be different; ifp may be specified 1455 * by ll sockaddr when protocol address is ambiguous 1456 */ 1457 newgw = info->rti_info[RTAX_GATEWAY] != NULL && 1458 sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0; 1459 1460 if (newgw || info->rti_info[RTAX_IFP] != NULL || 1461 info->rti_info[RTAX_IFA] != NULL) { 1462 ifp = rt_getifp(info, &psref_ifp); 1463 /* info refers ifp so we need to keep a reference */ 1464 ifa = rt_getifa(info, &psref_ifa); 1465 if (ifa == NULL) { 1466 error = ENETUNREACH; 1467 goto out; 1468 } 1469 } 1470 if (newgw) { 1471 error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]); 1472 if (error != 0) 1473 goto out; 1474 } 1475 if (info->rti_info[RTAX_TAG]) { 1476 const struct sockaddr *tag; 1477 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1478 if (tag == NULL) { 1479 error = ENOBUFS; 1480 goto out; 1481 } 1482 } 1483 /* 1484 * New gateway could require new ifaddr, ifp; 1485 * flags may also be different; ifp may be specified 1486 * by ll sockaddr when protocol address is ambiguous 1487 */ 1488 new_ifa = rt_update_get_ifa(info, rt, &new_ifp, &psref_new_ifp, 1489 &psref_new_ifa); 1490 if (new_ifa != NULL) { 1491 ifa_release(ifa, &psref_ifa); 1492 ifa = new_ifa; 1493 } 1494 if (ifa) { 1495 struct ifaddr *oifa = rt->rt_ifa; 1496 if (oifa != ifa && !ifa_is_destroying(ifa) && 1497 new_ifp != NULL && !if_is_deactivated(new_ifp)) { 1498 if (oifa && oifa->ifa_rtrequest) 1499 oifa->ifa_rtrequest(RTM_DELETE, rt, info); 1500 rt_replace_ifa(rt, ifa); 1501 rt->rt_ifp = new_ifp; 1502 ifp_changed = true; 1503 } 1504 if (new_ifa == NULL) 1505 ifa_release(ifa, &psref_ifa); 1506 /* To avoid ifa_release below */ 1507 ifa = NULL; 1508 } 1509 ifa_release(new_ifa, &psref_new_ifa); 1510 if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) { 1511 rt->rt_ifp = new_ifp; 1512 ifp_changed = true; 1513 } 1514 rt_setmetrics(rtm, rt); 1515 if (rt->rt_flags != info->rti_flags) { 1516 rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) | 1517 (rt->rt_flags & PRESERVED_RTF); 1518 } 1519 if (rt->rt_ifa->ifa_rtrequest) 1520 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); 1521 #if defined(INET) || defined(INET6) 1522 if (ifp_changed && rt_mask(rt) != NULL) 1523 lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt), 1524 rt_mask(rt), 0); 1525 #else 1526 (void)ifp_changed; /* XXX gcc */ 1527 #endif 1528 out: 1529 ifa_release(ifa, &psref_ifa); 1530 if_put(new_ifp, &psref_new_ifp); 1531 if_put(ifp, &psref_ifp); 1532 1533 RT_UNLOCK(); 1534 1535 return error; 1536 } 1537 1538 static void 1539 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1540 const struct sockaddr *netmask) 1541 { 1542 const char *netmaskp = &netmask->sa_data[0], 1543 *srcp = &src->sa_data[0]; 1544 char *dstp = &dst->sa_data[0]; 1545 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1546 const char *srcend = (char *)dst + src->sa_len; 1547 1548 dst->sa_len = src->sa_len; 1549 dst->sa_family = src->sa_family; 1550 1551 while (dstp < maskend) 1552 *dstp++ = *srcp++ & *netmaskp++; 1553 if (dstp < srcend) 1554 memset(dstp, 0, (size_t)(srcend - dstp)); 1555 } 1556 1557 /* 1558 * Inform the routing socket of a route change. 1559 */ 1560 void 1561 rt_newmsg(const int cmd, const struct rtentry *rt) 1562 { 1563 struct rt_addrinfo info; 1564 1565 memset((void *)&info, 0, sizeof(info)); 1566 info.rti_info[RTAX_DST] = rt_getkey(rt); 1567 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1568 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1569 if (rt->rt_ifp) { 1570 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1571 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1572 } 1573 1574 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1575 } 1576 1577 /* 1578 * Set up or tear down a routing table entry, normally 1579 * for an interface. 1580 */ 1581 int 1582 rtinit(struct ifaddr *ifa, int cmd, int flags) 1583 { 1584 struct rtentry *rt; 1585 struct sockaddr *dst, *odst; 1586 struct sockaddr_storage maskeddst; 1587 struct rtentry *nrt = NULL; 1588 int error; 1589 struct rt_addrinfo info; 1590 1591 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1592 if (cmd == RTM_DELETE) { 1593 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1594 /* Delete subnet route for this interface */ 1595 odst = dst; 1596 dst = (struct sockaddr *)&maskeddst; 1597 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1598 } 1599 if ((rt = rtalloc1(dst, 0)) != NULL) { 1600 if (rt->rt_ifa != ifa) { 1601 rt_unref(rt); 1602 return (flags & RTF_HOST) ? EHOSTUNREACH 1603 : ENETUNREACH; 1604 } 1605 rt_unref(rt); 1606 } 1607 } 1608 memset(&info, 0, sizeof(info)); 1609 info.rti_ifa = ifa; 1610 info.rti_flags = flags | ifa->ifa_flags | RTF_DONTCHANGEIFA; 1611 info.rti_info[RTAX_DST] = dst; 1612 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1613 1614 /* 1615 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1616 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1617 * variable) when RTF_HOST is 1. still not sure if i can safely 1618 * change it to meet bsdi4 behavior. 1619 */ 1620 if (cmd != RTM_LLINFO_UPD) 1621 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1622 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1623 &nrt); 1624 if (error != 0) 1625 return error; 1626 1627 rt = nrt; 1628 RT_REFCNT_TRACE(rt); 1629 switch (cmd) { 1630 case RTM_DELETE: 1631 rt_newmsg(cmd, rt); 1632 rt_free(rt); 1633 break; 1634 case RTM_LLINFO_UPD: 1635 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1636 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1637 rt_newmsg(RTM_CHANGE, rt); 1638 rt_unref(rt); 1639 break; 1640 case RTM_ADD: 1641 KASSERT(rt->rt_ifa == ifa); 1642 rt_newmsg(cmd, rt); 1643 rt_unref(rt); 1644 RT_REFCNT_TRACE(rt); 1645 break; 1646 } 1647 return error; 1648 } 1649 1650 /* 1651 * Create a local route entry for the address. 1652 * Announce the addition of the address and the route to the routing socket. 1653 */ 1654 int 1655 rt_ifa_addlocal(struct ifaddr *ifa) 1656 { 1657 struct rtentry *rt; 1658 int e; 1659 1660 /* If there is no loopback entry, allocate one. */ 1661 rt = rtalloc1(ifa->ifa_addr, 0); 1662 #ifdef RT_DEBUG 1663 if (rt != NULL) 1664 dump_rt(rt); 1665 #endif 1666 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1667 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1668 { 1669 struct rt_addrinfo info; 1670 struct rtentry *nrt; 1671 1672 memset(&info, 0, sizeof(info)); 1673 info.rti_flags = RTF_HOST | RTF_LOCAL | RTF_DONTCHANGEIFA; 1674 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1675 info.rti_info[RTAX_GATEWAY] = 1676 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1677 info.rti_ifa = ifa; 1678 nrt = NULL; 1679 e = rtrequest1(RTM_ADD, &info, &nrt); 1680 rt_addrmsg_rt(RTM_ADD, ifa, e, nrt); 1681 if (nrt != NULL) { 1682 KASSERT(nrt->rt_ifa == ifa); 1683 #ifdef RT_DEBUG 1684 dump_rt(nrt); 1685 #endif 1686 rt_unref(nrt); 1687 RT_REFCNT_TRACE(nrt); 1688 } 1689 } else { 1690 e = 0; 1691 rt_addrmsg(RTM_NEWADDR, ifa); 1692 } 1693 if (rt != NULL) 1694 rt_unref(rt); 1695 return e; 1696 } 1697 1698 /* 1699 * Remove the local route entry for the address. 1700 * Announce the removal of the address and the route to the routing socket. 1701 */ 1702 int 1703 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1704 { 1705 struct rtentry *rt; 1706 int e = 0; 1707 1708 rt = rtalloc1(ifa->ifa_addr, 0); 1709 1710 /* 1711 * Before deleting, check if a corresponding loopbacked 1712 * host route surely exists. With this check, we can avoid 1713 * deleting an interface direct route whose destination is 1714 * the same as the address being removed. This can happen 1715 * when removing a subnet-router anycast address on an 1716 * interface attached to a shared medium. 1717 */ 1718 if (rt != NULL && 1719 (rt->rt_flags & RTF_HOST) && 1720 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1721 { 1722 /* If we cannot replace the route's ifaddr with the equivalent 1723 * ifaddr of another interface, I believe it is safest to 1724 * delete the route. 1725 */ 1726 if (alt_ifa == NULL) { 1727 e = rtdeletemsg(rt); 1728 if (e == 0) { 1729 rt_unref(rt); 1730 rt_free(rt); 1731 rt = NULL; 1732 } 1733 rt_addrmsg(RTM_DELADDR, ifa); 1734 } else { 1735 #ifdef NET_MPSAFE 1736 int error = rt_update_prepare(rt); 1737 if (error == 0) { 1738 rt_replace_ifa(rt, alt_ifa); 1739 rt_update_finish(rt); 1740 } else { 1741 /* 1742 * If error != 0, the rtentry is being 1743 * destroyed, so doing nothing doesn't 1744 * matter. 1745 */ 1746 } 1747 #else 1748 rt_replace_ifa(rt, alt_ifa); 1749 #endif 1750 rt_newmsg(RTM_CHANGE, rt); 1751 } 1752 } else 1753 rt_addrmsg(RTM_DELADDR, ifa); 1754 if (rt != NULL) 1755 rt_unref(rt); 1756 return e; 1757 } 1758 1759 /* 1760 * Route timer routines. These routes allow functions to be called 1761 * for various routes at any time. This is useful in supporting 1762 * path MTU discovery and redirect route deletion. 1763 * 1764 * This is similar to some BSDI internal functions, but it provides 1765 * for multiple queues for efficiency's sake... 1766 */ 1767 1768 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1769 static int rt_init_done = 0; 1770 1771 /* 1772 * Some subtle order problems with domain initialization mean that 1773 * we cannot count on this being run from rt_init before various 1774 * protocol initializations are done. Therefore, we make sure 1775 * that this is run when the first queue is added... 1776 */ 1777 1778 static void rt_timer_work(struct work *, void *); 1779 1780 static void 1781 rt_timer_init(void) 1782 { 1783 int error; 1784 1785 assert(rt_init_done == 0); 1786 1787 /* XXX should be in rt_init */ 1788 rw_init(&rt_lock); 1789 1790 LIST_INIT(&rttimer_queue_head); 1791 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1792 error = workqueue_create(&rt_timer_wq, "rt_timer", 1793 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1794 if (error) 1795 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1796 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1797 rt_init_done = 1; 1798 } 1799 1800 struct rttimer_queue * 1801 rt_timer_queue_create(u_int timeout) 1802 { 1803 struct rttimer_queue *rtq; 1804 1805 if (rt_init_done == 0) 1806 rt_timer_init(); 1807 1808 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1809 if (rtq == NULL) 1810 return NULL; 1811 memset(rtq, 0, sizeof(*rtq)); 1812 1813 rtq->rtq_timeout = timeout; 1814 TAILQ_INIT(&rtq->rtq_head); 1815 RT_WLOCK(); 1816 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1817 RT_UNLOCK(); 1818 1819 return rtq; 1820 } 1821 1822 void 1823 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1824 { 1825 1826 rtq->rtq_timeout = timeout; 1827 } 1828 1829 static void 1830 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1831 { 1832 struct rttimer *r; 1833 1834 RT_ASSERT_WLOCK(); 1835 1836 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1837 LIST_REMOVE(r, rtt_link); 1838 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1839 rt_ref(r->rtt_rt); /* XXX */ 1840 RT_REFCNT_TRACE(r->rtt_rt); 1841 RT_UNLOCK(); 1842 (*r->rtt_func)(r->rtt_rt, r); 1843 pool_put(&rttimer_pool, r); 1844 RT_WLOCK(); 1845 if (rtq->rtq_count > 0) 1846 rtq->rtq_count--; 1847 else 1848 printf("rt_timer_queue_remove_all: " 1849 "rtq_count reached 0\n"); 1850 } 1851 } 1852 1853 void 1854 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1855 { 1856 1857 RT_WLOCK(); 1858 rt_timer_queue_remove_all(rtq); 1859 LIST_REMOVE(rtq, rtq_link); 1860 RT_UNLOCK(); 1861 1862 /* 1863 * Caller is responsible for freeing the rttimer_queue structure. 1864 */ 1865 } 1866 1867 unsigned long 1868 rt_timer_count(struct rttimer_queue *rtq) 1869 { 1870 return rtq->rtq_count; 1871 } 1872 1873 static void 1874 rt_timer_remove_all(struct rtentry *rt) 1875 { 1876 struct rttimer *r; 1877 1878 RT_WLOCK(); 1879 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1880 LIST_REMOVE(r, rtt_link); 1881 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1882 if (r->rtt_queue->rtq_count > 0) 1883 r->rtt_queue->rtq_count--; 1884 else 1885 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1886 pool_put(&rttimer_pool, r); 1887 } 1888 RT_UNLOCK(); 1889 } 1890 1891 int 1892 rt_timer_add(struct rtentry *rt, 1893 void (*func)(struct rtentry *, struct rttimer *), 1894 struct rttimer_queue *queue) 1895 { 1896 struct rttimer *r; 1897 1898 KASSERT(func != NULL); 1899 RT_WLOCK(); 1900 /* 1901 * If there's already a timer with this action, destroy it before 1902 * we add a new one. 1903 */ 1904 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1905 if (r->rtt_func == func) 1906 break; 1907 } 1908 if (r != NULL) { 1909 LIST_REMOVE(r, rtt_link); 1910 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1911 if (r->rtt_queue->rtq_count > 0) 1912 r->rtt_queue->rtq_count--; 1913 else 1914 printf("rt_timer_add: rtq_count reached 0\n"); 1915 } else { 1916 r = pool_get(&rttimer_pool, PR_NOWAIT); 1917 if (r == NULL) { 1918 RT_UNLOCK(); 1919 return ENOBUFS; 1920 } 1921 } 1922 1923 memset(r, 0, sizeof(*r)); 1924 1925 r->rtt_rt = rt; 1926 r->rtt_time = time_uptime; 1927 r->rtt_func = func; 1928 r->rtt_queue = queue; 1929 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1930 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1931 r->rtt_queue->rtq_count++; 1932 1933 RT_UNLOCK(); 1934 1935 return 0; 1936 } 1937 1938 static void 1939 rt_timer_work(struct work *wk, void *arg) 1940 { 1941 struct rttimer_queue *rtq; 1942 struct rttimer *r; 1943 1944 RT_WLOCK(); 1945 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1946 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1947 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1948 LIST_REMOVE(r, rtt_link); 1949 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1950 /* 1951 * Take a reference to avoid the rtentry is freed 1952 * accidentally after RT_UNLOCK. The callback 1953 * (rtt_func) must rt_unref it by itself. 1954 */ 1955 rt_ref(r->rtt_rt); 1956 RT_REFCNT_TRACE(r->rtt_rt); 1957 RT_UNLOCK(); 1958 (*r->rtt_func)(r->rtt_rt, r); 1959 pool_put(&rttimer_pool, r); 1960 RT_WLOCK(); 1961 if (rtq->rtq_count > 0) 1962 rtq->rtq_count--; 1963 else 1964 printf("rt_timer_timer: rtq_count reached 0\n"); 1965 } 1966 } 1967 RT_UNLOCK(); 1968 1969 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1970 } 1971 1972 static void 1973 rt_timer_timer(void *arg) 1974 { 1975 1976 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 1977 } 1978 1979 static struct rtentry * 1980 _rtcache_init(struct route *ro, int flag) 1981 { 1982 struct rtentry *rt; 1983 1984 rtcache_invariants(ro); 1985 KASSERT(ro->_ro_rt == NULL); 1986 1987 if (rtcache_getdst(ro) == NULL) 1988 return NULL; 1989 rt = rtalloc1(rtcache_getdst(ro), flag); 1990 if (rt != NULL) { 1991 RT_RLOCK(); 1992 if (ISSET(rt->rt_flags, RTF_UP)) { 1993 ro->_ro_rt = rt; 1994 ro->ro_rtcache_generation = rtcache_generation; 1995 rtcache_ref(rt, ro); 1996 } 1997 RT_UNLOCK(); 1998 rt_unref(rt); 1999 } 2000 2001 rtcache_invariants(ro); 2002 return ro->_ro_rt; 2003 } 2004 2005 struct rtentry * 2006 rtcache_init(struct route *ro) 2007 { 2008 2009 return _rtcache_init(ro, 1); 2010 } 2011 2012 struct rtentry * 2013 rtcache_init_noclone(struct route *ro) 2014 { 2015 2016 return _rtcache_init(ro, 0); 2017 } 2018 2019 struct rtentry * 2020 rtcache_update(struct route *ro, int clone) 2021 { 2022 2023 ro->_ro_rt = NULL; 2024 return _rtcache_init(ro, clone); 2025 } 2026 2027 void 2028 rtcache_copy(struct route *new_ro, struct route *old_ro) 2029 { 2030 struct rtentry *rt; 2031 int ret; 2032 2033 KASSERT(new_ro != old_ro); 2034 rtcache_invariants(new_ro); 2035 rtcache_invariants(old_ro); 2036 2037 rt = rtcache_validate(old_ro); 2038 2039 if (rtcache_getdst(old_ro) == NULL) 2040 goto out; 2041 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 2042 if (ret != 0) 2043 goto out; 2044 2045 RT_RLOCK(); 2046 new_ro->_ro_rt = rt; 2047 new_ro->ro_rtcache_generation = rtcache_generation; 2048 RT_UNLOCK(); 2049 rtcache_invariants(new_ro); 2050 out: 2051 rtcache_unref(rt, old_ro); 2052 return; 2053 } 2054 2055 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 2056 static void 2057 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 2058 { 2059 char dst[64]; 2060 2061 sockaddr_format(ro->ro_sa, dst, 64); 2062 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 2063 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 2064 } 2065 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 2066 #else 2067 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 2068 #endif 2069 2070 static void 2071 rtcache_ref(struct rtentry *rt, struct route *ro) 2072 { 2073 2074 KASSERT(rt != NULL); 2075 2076 #ifdef NET_MPSAFE 2077 RTCACHE_PSREF_TRACE(rt, ro); 2078 ro->ro_bound = curlwp_bind(); 2079 /* XXX Use a real caller's address */ 2080 PSREF_DEBUG_FILL_RETURN_ADDRESS(&ro->ro_psref); 2081 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2082 #endif 2083 } 2084 2085 void 2086 rtcache_unref(struct rtentry *rt, struct route *ro) 2087 { 2088 2089 if (rt == NULL) 2090 return; 2091 2092 #ifdef NET_MPSAFE 2093 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2094 curlwp_bindx(ro->ro_bound); 2095 RTCACHE_PSREF_TRACE(rt, ro); 2096 #endif 2097 } 2098 2099 struct rtentry * 2100 rtcache_validate(struct route *ro) 2101 { 2102 struct rtentry *rt = NULL; 2103 2104 #ifdef NET_MPSAFE 2105 retry: 2106 #endif 2107 rtcache_invariants(ro); 2108 RT_RLOCK(); 2109 if (ro->ro_rtcache_generation != rtcache_generation) { 2110 /* The cache is invalidated */ 2111 rt = NULL; 2112 goto out; 2113 } 2114 2115 rt = ro->_ro_rt; 2116 if (rt == NULL) 2117 goto out; 2118 2119 if ((rt->rt_flags & RTF_UP) == 0) { 2120 rt = NULL; 2121 goto out; 2122 } 2123 #ifdef NET_MPSAFE 2124 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 2125 if (rt_wait_ok()) { 2126 RT_UNLOCK(); 2127 2128 /* We can wait until the update is complete */ 2129 rt_update_wait(); 2130 goto retry; 2131 } else { 2132 rt = NULL; 2133 } 2134 } else 2135 #endif 2136 rtcache_ref(rt, ro); 2137 out: 2138 RT_UNLOCK(); 2139 return rt; 2140 } 2141 2142 struct rtentry * 2143 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2144 int clone, int *hitp) 2145 { 2146 const struct sockaddr *odst; 2147 struct rtentry *rt = NULL; 2148 2149 odst = rtcache_getdst(ro); 2150 if (odst == NULL) 2151 goto miss; 2152 2153 if (sockaddr_cmp(odst, dst) != 0) { 2154 rtcache_free(ro); 2155 goto miss; 2156 } 2157 2158 rt = rtcache_validate(ro); 2159 if (rt == NULL) { 2160 ro->_ro_rt = NULL; 2161 goto miss; 2162 } 2163 2164 rtcache_invariants(ro); 2165 2166 if (hitp != NULL) 2167 *hitp = 1; 2168 return rt; 2169 miss: 2170 if (hitp != NULL) 2171 *hitp = 0; 2172 if (rtcache_setdst(ro, dst) == 0) 2173 rt = _rtcache_init(ro, clone); 2174 2175 rtcache_invariants(ro); 2176 2177 return rt; 2178 } 2179 2180 void 2181 rtcache_free(struct route *ro) 2182 { 2183 2184 ro->_ro_rt = NULL; 2185 if (ro->ro_sa != NULL) { 2186 sockaddr_free(ro->ro_sa); 2187 ro->ro_sa = NULL; 2188 } 2189 rtcache_invariants(ro); 2190 } 2191 2192 int 2193 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2194 { 2195 KASSERT(sa != NULL); 2196 2197 rtcache_invariants(ro); 2198 if (ro->ro_sa != NULL) { 2199 if (ro->ro_sa->sa_family == sa->sa_family) { 2200 ro->_ro_rt = NULL; 2201 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2202 rtcache_invariants(ro); 2203 return 0; 2204 } 2205 /* free ro_sa, wrong family */ 2206 rtcache_free(ro); 2207 } 2208 2209 KASSERT(ro->_ro_rt == NULL); 2210 2211 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2212 rtcache_invariants(ro); 2213 return ENOMEM; 2214 } 2215 rtcache_invariants(ro); 2216 return 0; 2217 } 2218 2219 static void 2220 rtcache_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused) 2221 { 2222 struct route **rop = p; 2223 2224 /* 2225 * We can't have struct route as percpu data because it can be destroyed 2226 * over a memory enlargement processing of percpu. 2227 */ 2228 *rop = kmem_zalloc(sizeof(**rop), KM_SLEEP); 2229 } 2230 2231 percpu_t * 2232 rtcache_percpu_alloc(void) 2233 { 2234 2235 return percpu_create(sizeof(struct route *), 2236 rtcache_percpu_init_cpu, NULL, NULL); 2237 } 2238 2239 const struct sockaddr * 2240 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2241 { 2242 if (rt->rt_tag != tag) { 2243 if (rt->rt_tag != NULL) 2244 sockaddr_free(rt->rt_tag); 2245 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2246 } 2247 return rt->rt_tag; 2248 } 2249 2250 struct sockaddr * 2251 rt_gettag(const struct rtentry *rt) 2252 { 2253 return rt->rt_tag; 2254 } 2255 2256 int 2257 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2258 { 2259 2260 if ((rt->rt_flags & RTF_REJECT) != 0) { 2261 /* Mimic looutput */ 2262 if (ifp->if_flags & IFF_LOOPBACK) 2263 return (rt->rt_flags & RTF_HOST) ? 2264 EHOSTUNREACH : ENETUNREACH; 2265 else if (rt->rt_rmx.rmx_expire == 0 || 2266 time_uptime < rt->rt_rmx.rmx_expire) 2267 return (rt->rt_flags & RTF_GATEWAY) ? 2268 EHOSTUNREACH : EHOSTDOWN; 2269 } 2270 2271 return 0; 2272 } 2273 2274 void 2275 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2276 void *v) 2277 { 2278 2279 for (;;) { 2280 int s; 2281 int error; 2282 struct rtentry *rt, *retrt = NULL; 2283 2284 RT_RLOCK(); 2285 s = splsoftnet(); 2286 rt = rtbl_search_matched_entry(family, f, v); 2287 if (rt == NULL) { 2288 splx(s); 2289 RT_UNLOCK(); 2290 return; 2291 } 2292 rt_ref(rt); 2293 splx(s); 2294 RT_UNLOCK(); 2295 2296 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2297 rt_mask(rt), rt->rt_flags, &retrt); 2298 if (error == 0) { 2299 KASSERT(retrt == rt); 2300 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2301 retrt->rt_ifp = NULL; 2302 rt_unref(rt); 2303 rt_free(retrt); 2304 } else if (error == ESRCH) { 2305 /* Someone deleted the entry already. */ 2306 rt_unref(rt); 2307 } else { 2308 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2309 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2310 /* XXX how to treat this case? */ 2311 } 2312 } 2313 } 2314 2315 static int 2316 rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *), 2317 void *v) 2318 { 2319 2320 return rtbl_walktree(family, f, v); 2321 } 2322 2323 int 2324 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2325 { 2326 int error; 2327 2328 RT_RLOCK(); 2329 error = rt_walktree_locked(family, f, v); 2330 RT_UNLOCK(); 2331 2332 return error; 2333 } 2334 2335 #ifdef DDB 2336 2337 #include <machine/db_machdep.h> 2338 #include <ddb/db_interface.h> 2339 #include <ddb/db_output.h> 2340 2341 #define rt_expire rt_rmx.rmx_expire 2342 2343 static void 2344 db_print_sa(const struct sockaddr *sa) 2345 { 2346 int len; 2347 const u_char *p; 2348 2349 if (sa == NULL) { 2350 db_printf("[NULL]"); 2351 return; 2352 } 2353 2354 p = (const u_char *)sa; 2355 len = sa->sa_len; 2356 db_printf("["); 2357 while (len > 0) { 2358 db_printf("%d", *p); 2359 p++; len--; 2360 if (len) db_printf(","); 2361 } 2362 db_printf("]\n"); 2363 } 2364 2365 static void 2366 db_print_ifa(struct ifaddr *ifa) 2367 { 2368 if (ifa == NULL) 2369 return; 2370 db_printf(" ifa_addr="); 2371 db_print_sa(ifa->ifa_addr); 2372 db_printf(" ifa_dsta="); 2373 db_print_sa(ifa->ifa_dstaddr); 2374 db_printf(" ifa_mask="); 2375 db_print_sa(ifa->ifa_netmask); 2376 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2377 ifa->ifa_flags, 2378 ifa->ifa_refcnt, 2379 ifa->ifa_metric); 2380 } 2381 2382 /* 2383 * Function to pass to rt_walktree(). 2384 * Return non-zero error to abort walk. 2385 */ 2386 static int 2387 db_show_rtentry(struct rtentry *rt, void *w) 2388 { 2389 db_printf("rtentry=%p", rt); 2390 2391 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2392 rt->rt_flags, rt->rt_refcnt, 2393 rt->rt_use, (uint64_t)rt->rt_expire); 2394 2395 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2396 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2397 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2398 2399 db_printf(" ifp=%p ", rt->rt_ifp); 2400 if (rt->rt_ifp) 2401 db_printf("(%s)", rt->rt_ifp->if_xname); 2402 else 2403 db_printf("(NULL)"); 2404 2405 db_printf(" ifa=%p\n", rt->rt_ifa); 2406 db_print_ifa(rt->rt_ifa); 2407 2408 db_printf(" gwroute=%p llinfo=%p\n", 2409 rt->rt_gwroute, rt->rt_llinfo); 2410 2411 return 0; 2412 } 2413 2414 /* 2415 * Function to print all the route trees. 2416 * Use this from ddb: "show routes" 2417 */ 2418 void 2419 db_show_routes(db_expr_t addr, bool have_addr, 2420 db_expr_t count, const char *modif) 2421 { 2422 2423 /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */ 2424 rt_walktree_locked(AF_INET, db_show_rtentry, NULL); 2425 } 2426 #endif 2427