1 /* $NetBSD: route.c,v 1.219 2019/05/17 03:34:26 ozaki-r Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.219 2019/05/17 03:34:26 ozaki-r Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/kernel.h> 114 #include <sys/ioctl.h> 115 #include <sys/pool.h> 116 #include <sys/kauth.h> 117 #include <sys/workqueue.h> 118 #include <sys/syslog.h> 119 #include <sys/rwlock.h> 120 #include <sys/mutex.h> 121 #include <sys/cpu.h> 122 123 #include <net/if.h> 124 #include <net/if_dl.h> 125 #include <net/route.h> 126 #if defined(INET) || defined(INET6) 127 #include <net/if_llatbl.h> 128 #endif 129 130 #include <netinet/in.h> 131 #include <netinet/in_var.h> 132 133 #define PRESERVED_RTF (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK) 134 135 #ifdef RTFLUSH_DEBUG 136 #define rtcache_debug() __predict_false(_rtcache_debug) 137 #else /* RTFLUSH_DEBUG */ 138 #define rtcache_debug() 0 139 #endif /* RTFLUSH_DEBUG */ 140 141 #ifdef RT_DEBUG 142 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 143 __func__, __LINE__, (rt), (rt)->rt_refcnt) 144 #else 145 #define RT_REFCNT_TRACE(rt) do {} while (0) 146 #endif 147 148 #ifdef RT_DEBUG 149 #define dlog(level, fmt, args...) log(level, fmt, ##args) 150 #else 151 #define dlog(level, fmt, args...) do {} while (0) 152 #endif 153 154 struct rtstat rtstat; 155 156 static int rttrash; /* routes not in table but not freed */ 157 158 static struct pool rtentry_pool; 159 static struct pool rttimer_pool; 160 161 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 162 static struct workqueue *rt_timer_wq; 163 static struct work rt_timer_wk; 164 165 static void rt_timer_init(void); 166 static void rt_timer_queue_remove_all(struct rttimer_queue *); 167 static void rt_timer_remove_all(struct rtentry *); 168 static void rt_timer_timer(void *); 169 170 /* 171 * Locking notes: 172 * - The routing table is protected by a global rwlock 173 * - API: RT_RLOCK and friends 174 * - rtcaches are NOT protected by the framework 175 * - Callers must guarantee a rtcache isn't accessed simultaneously 176 * - How the constraint is guranteed in the wild 177 * - Protect a rtcache by a mutex (e.g., inp_route) 178 * - Make rtcache per-CPU and allow only accesses from softint 179 * (e.g., ipforward_rt_percpu) 180 * - References to a rtentry is managed by reference counting and psref 181 * - Reference couting is used for temporal reference when a rtentry 182 * is fetched from the routing table 183 * - psref is used for temporal reference when a rtentry is fetched 184 * from a rtcache 185 * - struct route (rtcache) has struct psref, so we cannot obtain 186 * a reference twice on the same struct route 187 * - Befere destroying or updating a rtentry, we have to wait for 188 * all references left (see below for details) 189 * - APIs 190 * - An obtained rtentry via rtalloc1 or rtrequest* must be 191 * unreferenced by rt_unref 192 * - An obtained rtentry via rtcache_* must be unreferenced by 193 * rtcache_unref 194 * - TODO: once we get a lockless routing table, we should use only 195 * psref for rtentries 196 * - rtentry destruction 197 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 198 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 199 * has a responsibility to destroy the rtentry by itself by calling 200 * rt_free 201 * - If not, rtrequest itself does that 202 * - If rt_free is called in softint, the actual destruction routine is 203 * deferred to a workqueue 204 * - rtentry update 205 * - When updating a rtentry, RTF_UPDATING flag is set 206 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 207 * the routing table or a rtcache results in either of the following 208 * cases: 209 * - if the caller runs in softint, the caller fails to fetch 210 * - otherwise, the caller waits for the update completed and retries 211 * to fetch (probably succeed to fetch for the second time) 212 * - rtcache invalidation 213 * - There is a global generation counter that is incremented when 214 * any routes have been added or deleted 215 * - When a rtcache caches a rtentry into itself, it also stores 216 * a snapshot of the generation counter 217 * - If the snapshot equals to the global counter, the cache is valid, 218 * otherwise the cache is invalidated 219 */ 220 221 /* 222 * Global lock for the routing table. 223 */ 224 static krwlock_t rt_lock __cacheline_aligned; 225 #ifdef NET_MPSAFE 226 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 227 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 228 #define RT_UNLOCK() rw_exit(&rt_lock) 229 #define RT_WLOCKED() rw_write_held(&rt_lock) 230 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 231 #else 232 #define RT_RLOCK() do {} while (0) 233 #define RT_WLOCK() do {} while (0) 234 #define RT_UNLOCK() do {} while (0) 235 #define RT_WLOCKED() true 236 #define RT_ASSERT_WLOCK() do {} while (0) 237 #endif 238 239 static uint64_t rtcache_generation; 240 241 /* 242 * mutex and cv that are used to wait for references to a rtentry left 243 * before updating the rtentry. 244 */ 245 static struct { 246 kmutex_t lock; 247 kcondvar_t cv; 248 bool ongoing; 249 const struct lwp *lwp; 250 } rt_update_global __cacheline_aligned; 251 252 /* 253 * A workqueue and stuff that are used to defer the destruction routine 254 * of rtentries. 255 */ 256 static struct { 257 struct workqueue *wq; 258 struct work wk; 259 kmutex_t lock; 260 SLIST_HEAD(, rtentry) queue; 261 bool enqueued; 262 } rt_free_global __cacheline_aligned; 263 264 /* psref for rtentry */ 265 static struct psref_class *rt_psref_class __read_mostly; 266 267 #ifdef RTFLUSH_DEBUG 268 static int _rtcache_debug = 0; 269 #endif /* RTFLUSH_DEBUG */ 270 271 static kauth_listener_t route_listener; 272 273 static int rtdeletemsg(struct rtentry *); 274 275 static void rt_maskedcopy(const struct sockaddr *, 276 struct sockaddr *, const struct sockaddr *); 277 278 static void rtcache_invalidate(void); 279 280 static void rt_ref(struct rtentry *); 281 282 static struct rtentry * 283 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 284 285 static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *); 286 static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *); 287 static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *, 288 const struct sockaddr *, struct psref *); 289 290 static void rtcache_ref(struct rtentry *, struct route *); 291 292 #ifdef NET_MPSAFE 293 static void rt_update_wait(void); 294 #endif 295 296 static bool rt_wait_ok(void); 297 static void rt_wait_refcnt(const char *, struct rtentry *, int); 298 static void rt_wait_psref(struct rtentry *); 299 300 #ifdef DDB 301 static void db_print_sa(const struct sockaddr *); 302 static void db_print_ifa(struct ifaddr *); 303 static int db_show_rtentry(struct rtentry *, void *); 304 #endif 305 306 #ifdef RTFLUSH_DEBUG 307 static void sysctl_net_rtcache_setup(struct sysctllog **); 308 static void 309 sysctl_net_rtcache_setup(struct sysctllog **clog) 310 { 311 const struct sysctlnode *rnode; 312 313 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 314 CTLTYPE_NODE, 315 "rtcache", SYSCTL_DESCR("Route cache related settings"), 316 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 317 return; 318 if (sysctl_createv(clog, 0, &rnode, &rnode, 319 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 320 "debug", SYSCTL_DESCR("Debug route caches"), 321 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 322 return; 323 } 324 #endif /* RTFLUSH_DEBUG */ 325 326 static inline void 327 rt_destroy(struct rtentry *rt) 328 { 329 if (rt->_rt_key != NULL) 330 sockaddr_free(rt->_rt_key); 331 if (rt->rt_gateway != NULL) 332 sockaddr_free(rt->rt_gateway); 333 if (rt_gettag(rt) != NULL) 334 sockaddr_free(rt_gettag(rt)); 335 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 336 } 337 338 static inline const struct sockaddr * 339 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 340 { 341 if (rt->_rt_key == key) 342 goto out; 343 344 if (rt->_rt_key != NULL) 345 sockaddr_free(rt->_rt_key); 346 rt->_rt_key = sockaddr_dup(key, flags); 347 out: 348 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 349 return rt->_rt_key; 350 } 351 352 struct ifaddr * 353 rt_get_ifa(struct rtentry *rt) 354 { 355 struct ifaddr *ifa; 356 357 if ((ifa = rt->rt_ifa) == NULL) 358 return ifa; 359 else if (ifa->ifa_getifa == NULL) 360 return ifa; 361 #if 0 362 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 363 return ifa; 364 #endif 365 else { 366 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 367 if (ifa == NULL) 368 return NULL; 369 rt_replace_ifa(rt, ifa); 370 return ifa; 371 } 372 } 373 374 static void 375 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 376 { 377 rt->rt_ifa = ifa; 378 if (ifa->ifa_seqno != NULL) 379 rt->rt_ifa_seqno = *ifa->ifa_seqno; 380 } 381 382 /* 383 * Is this route the connected route for the ifa? 384 */ 385 static int 386 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 387 { 388 const struct sockaddr *key, *dst, *odst; 389 struct sockaddr_storage maskeddst; 390 391 key = rt_getkey(rt); 392 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 393 if (dst == NULL || 394 dst->sa_family != key->sa_family || 395 dst->sa_len != key->sa_len) 396 return 0; 397 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 398 odst = dst; 399 dst = (struct sockaddr *)&maskeddst; 400 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 401 ifa->ifa_netmask); 402 } 403 return (memcmp(dst, key, dst->sa_len) == 0); 404 } 405 406 void 407 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 408 { 409 struct ifaddr *old; 410 411 if (rt->rt_ifa == ifa) 412 return; 413 414 if (rt->rt_ifa && 415 rt->rt_ifa != ifa && 416 rt->rt_ifa->ifa_flags & IFA_ROUTE && 417 rt_ifa_connected(rt, rt->rt_ifa)) 418 { 419 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 420 "replace deleted IFA_ROUTE\n", 421 (void *)rt->_rt_key, (void *)rt->rt_ifa); 422 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 423 if (rt_ifa_connected(rt, ifa)) { 424 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 425 "replace added IFA_ROUTE\n", 426 (void *)rt->_rt_key, (void *)ifa); 427 ifa->ifa_flags |= IFA_ROUTE; 428 } 429 } 430 431 ifaref(ifa); 432 old = rt->rt_ifa; 433 rt_set_ifa1(rt, ifa); 434 ifafree(old); 435 } 436 437 static void 438 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 439 { 440 ifaref(ifa); 441 rt_set_ifa1(rt, ifa); 442 } 443 444 static int 445 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 446 void *arg0, void *arg1, void *arg2, void *arg3) 447 { 448 struct rt_msghdr *rtm; 449 int result; 450 451 result = KAUTH_RESULT_DEFER; 452 rtm = arg1; 453 454 if (action != KAUTH_NETWORK_ROUTE) 455 return result; 456 457 if (rtm->rtm_type == RTM_GET) 458 result = KAUTH_RESULT_ALLOW; 459 460 return result; 461 } 462 463 static void rt_free_work(struct work *, void *); 464 465 void 466 rt_init(void) 467 { 468 int error; 469 470 #ifdef RTFLUSH_DEBUG 471 sysctl_net_rtcache_setup(NULL); 472 #endif 473 474 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 475 SLIST_INIT(&rt_free_global.queue); 476 rt_free_global.enqueued = false; 477 478 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 479 480 error = workqueue_create(&rt_free_global.wq, "rt_free", 481 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 482 if (error) 483 panic("%s: workqueue_create failed (%d)\n", __func__, error); 484 485 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 486 cv_init(&rt_update_global.cv, "rt_update"); 487 488 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 489 NULL, IPL_SOFTNET); 490 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 491 NULL, IPL_SOFTNET); 492 493 rn_init(); /* initialize all zeroes, all ones, mask table */ 494 rtbl_init(); 495 496 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 497 route_listener_cb, NULL); 498 } 499 500 static void 501 rtcache_invalidate(void) 502 { 503 504 RT_ASSERT_WLOCK(); 505 506 if (rtcache_debug()) 507 printf("%s: enter\n", __func__); 508 509 rtcache_generation++; 510 } 511 512 #ifdef RT_DEBUG 513 static void 514 dump_rt(const struct rtentry *rt) 515 { 516 char buf[512]; 517 518 log(LOG_DEBUG, "rt: "); 519 log(LOG_DEBUG, "p=%p ", rt); 520 if (rt->_rt_key == NULL) { 521 log(LOG_DEBUG, "dst=(NULL) "); 522 } else { 523 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 524 log(LOG_DEBUG, "dst=%s ", buf); 525 } 526 if (rt->rt_gateway == NULL) { 527 log(LOG_DEBUG, "gw=(NULL) "); 528 } else { 529 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 530 log(LOG_DEBUG, "gw=%s ", buf); 531 } 532 log(LOG_DEBUG, "flags=%x ", rt->rt_flags); 533 if (rt->rt_ifp == NULL) { 534 log(LOG_DEBUG, "if=(NULL) "); 535 } else { 536 log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname); 537 } 538 log(LOG_DEBUG, "\n"); 539 } 540 #endif /* RT_DEBUG */ 541 542 /* 543 * Packet routing routines. If success, refcnt of a returned rtentry 544 * will be incremented. The caller has to rtfree it by itself. 545 */ 546 struct rtentry * 547 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 548 bool wlock) 549 { 550 rtbl_t *rtbl; 551 struct rtentry *rt; 552 int s; 553 554 #ifdef NET_MPSAFE 555 retry: 556 #endif 557 s = splsoftnet(); 558 rtbl = rt_gettable(dst->sa_family); 559 if (rtbl == NULL) 560 goto miss; 561 562 rt = rt_matchaddr(rtbl, dst); 563 if (rt == NULL) 564 goto miss; 565 566 if (!ISSET(rt->rt_flags, RTF_UP)) 567 goto miss; 568 569 #ifdef NET_MPSAFE 570 if (ISSET(rt->rt_flags, RTF_UPDATING) && 571 /* XXX updater should be always able to acquire */ 572 curlwp != rt_update_global.lwp) { 573 if (!wait_ok || !rt_wait_ok()) 574 goto miss; 575 RT_UNLOCK(); 576 splx(s); 577 578 /* We can wait until the update is complete */ 579 rt_update_wait(); 580 581 if (wlock) 582 RT_WLOCK(); 583 else 584 RT_RLOCK(); 585 goto retry; 586 } 587 #endif /* NET_MPSAFE */ 588 589 rt_ref(rt); 590 RT_REFCNT_TRACE(rt); 591 592 splx(s); 593 return rt; 594 miss: 595 rtstat.rts_unreach++; 596 if (report) { 597 struct rt_addrinfo info; 598 599 memset(&info, 0, sizeof(info)); 600 info.rti_info[RTAX_DST] = dst; 601 rt_missmsg(RTM_MISS, &info, 0, 0); 602 } 603 splx(s); 604 return NULL; 605 } 606 607 struct rtentry * 608 rtalloc1(const struct sockaddr *dst, int report) 609 { 610 struct rtentry *rt; 611 612 RT_RLOCK(); 613 rt = rtalloc1_locked(dst, report, true, false); 614 RT_UNLOCK(); 615 616 return rt; 617 } 618 619 static void 620 rt_ref(struct rtentry *rt) 621 { 622 623 KASSERT(rt->rt_refcnt >= 0); 624 atomic_inc_uint(&rt->rt_refcnt); 625 } 626 627 void 628 rt_unref(struct rtentry *rt) 629 { 630 631 KASSERT(rt != NULL); 632 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 633 634 atomic_dec_uint(&rt->rt_refcnt); 635 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 636 mutex_enter(&rt_free_global.lock); 637 cv_broadcast(&rt->rt_cv); 638 mutex_exit(&rt_free_global.lock); 639 } 640 } 641 642 static bool 643 rt_wait_ok(void) 644 { 645 646 KASSERT(!cpu_intr_p()); 647 return !cpu_softintr_p(); 648 } 649 650 void 651 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 652 { 653 mutex_enter(&rt_free_global.lock); 654 while (rt->rt_refcnt > cnt) { 655 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 656 __func__, title, rt->rt_refcnt); 657 cv_wait(&rt->rt_cv, &rt_free_global.lock); 658 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 659 __func__, title, rt->rt_refcnt); 660 } 661 mutex_exit(&rt_free_global.lock); 662 } 663 664 void 665 rt_wait_psref(struct rtentry *rt) 666 { 667 668 psref_target_destroy(&rt->rt_psref, rt_psref_class); 669 psref_target_init(&rt->rt_psref, rt_psref_class); 670 } 671 672 static void 673 _rt_free(struct rtentry *rt) 674 { 675 struct ifaddr *ifa; 676 677 /* 678 * Need to avoid a deadlock on rt_wait_refcnt of update 679 * and a conflict on psref_target_destroy of update. 680 */ 681 #ifdef NET_MPSAFE 682 rt_update_wait(); 683 #endif 684 685 RT_REFCNT_TRACE(rt); 686 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 687 rt_wait_refcnt("free", rt, 0); 688 #ifdef NET_MPSAFE 689 psref_target_destroy(&rt->rt_psref, rt_psref_class); 690 #endif 691 692 rt_assert_inactive(rt); 693 rttrash--; 694 ifa = rt->rt_ifa; 695 rt->rt_ifa = NULL; 696 ifafree(ifa); 697 rt->rt_ifp = NULL; 698 cv_destroy(&rt->rt_cv); 699 rt_destroy(rt); 700 pool_put(&rtentry_pool, rt); 701 } 702 703 static void 704 rt_free_work(struct work *wk, void *arg) 705 { 706 707 for (;;) { 708 struct rtentry *rt; 709 710 mutex_enter(&rt_free_global.lock); 711 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) { 712 rt_free_global.enqueued = false; 713 mutex_exit(&rt_free_global.lock); 714 return; 715 } 716 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free); 717 mutex_exit(&rt_free_global.lock); 718 atomic_dec_uint(&rt->rt_refcnt); 719 _rt_free(rt); 720 } 721 } 722 723 void 724 rt_free(struct rtentry *rt) 725 { 726 727 KASSERT(rt->rt_refcnt > 0); 728 if (rt_wait_ok()) { 729 atomic_dec_uint(&rt->rt_refcnt); 730 _rt_free(rt); 731 return; 732 } 733 734 mutex_enter(&rt_free_global.lock); 735 /* No need to add a reference here. */ 736 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free); 737 if (!rt_free_global.enqueued) { 738 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 739 rt_free_global.enqueued = true; 740 } 741 mutex_exit(&rt_free_global.lock); 742 } 743 744 #ifdef NET_MPSAFE 745 static void 746 rt_update_wait(void) 747 { 748 749 mutex_enter(&rt_update_global.lock); 750 while (rt_update_global.ongoing) { 751 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 752 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 753 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 754 } 755 mutex_exit(&rt_update_global.lock); 756 } 757 #endif 758 759 int 760 rt_update_prepare(struct rtentry *rt) 761 { 762 763 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 764 765 RT_WLOCK(); 766 /* If the entry is being destroyed, don't proceed the update. */ 767 if (!ISSET(rt->rt_flags, RTF_UP)) { 768 RT_UNLOCK(); 769 return ESRCH; 770 } 771 rt->rt_flags |= RTF_UPDATING; 772 RT_UNLOCK(); 773 774 mutex_enter(&rt_update_global.lock); 775 while (rt_update_global.ongoing) { 776 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 777 __func__, rt, curlwp); 778 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 779 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 780 __func__, rt, curlwp); 781 } 782 rt_update_global.ongoing = true; 783 /* XXX need it to avoid rt_update_wait by updater itself. */ 784 rt_update_global.lwp = curlwp; 785 mutex_exit(&rt_update_global.lock); 786 787 rt_wait_refcnt("update", rt, 1); 788 rt_wait_psref(rt); 789 790 return 0; 791 } 792 793 void 794 rt_update_finish(struct rtentry *rt) 795 { 796 797 RT_WLOCK(); 798 rt->rt_flags &= ~RTF_UPDATING; 799 RT_UNLOCK(); 800 801 mutex_enter(&rt_update_global.lock); 802 rt_update_global.ongoing = false; 803 rt_update_global.lwp = NULL; 804 cv_broadcast(&rt_update_global.cv); 805 mutex_exit(&rt_update_global.lock); 806 807 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 808 } 809 810 /* 811 * Force a routing table entry to the specified 812 * destination to go through the given gateway. 813 * Normally called as a result of a routing redirect 814 * message from the network layer. 815 * 816 * N.B.: must be called at splsoftnet 817 */ 818 void 819 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 820 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 821 struct rtentry **rtp) 822 { 823 struct rtentry *rt; 824 int error = 0; 825 uint64_t *stat = NULL; 826 struct rt_addrinfo info; 827 struct ifaddr *ifa; 828 struct psref psref; 829 830 /* verify the gateway is directly reachable */ 831 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 832 error = ENETUNREACH; 833 goto out; 834 } 835 rt = rtalloc1(dst, 0); 836 /* 837 * If the redirect isn't from our current router for this dst, 838 * it's either old or wrong. If it redirects us to ourselves, 839 * we have a routing loop, perhaps as a result of an interface 840 * going down recently. 841 */ 842 if (!(flags & RTF_DONE) && rt && 843 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 844 error = EINVAL; 845 else { 846 int s = pserialize_read_enter(); 847 struct ifaddr *_ifa; 848 849 _ifa = ifa_ifwithaddr(gateway); 850 if (_ifa != NULL) 851 error = EHOSTUNREACH; 852 pserialize_read_exit(s); 853 } 854 if (error) 855 goto done; 856 /* 857 * Create a new entry if we just got back a wildcard entry 858 * or the lookup failed. This is necessary for hosts 859 * which use routing redirects generated by smart gateways 860 * to dynamically build the routing tables. 861 */ 862 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 863 goto create; 864 /* 865 * Don't listen to the redirect if it's 866 * for a route to an interface. 867 */ 868 if (rt->rt_flags & RTF_GATEWAY) { 869 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 870 /* 871 * Changing from route to net => route to host. 872 * Create new route, rather than smashing route to net. 873 */ 874 create: 875 if (rt != NULL) 876 rt_unref(rt); 877 flags |= RTF_GATEWAY | RTF_DYNAMIC; 878 memset(&info, 0, sizeof(info)); 879 info.rti_info[RTAX_DST] = dst; 880 info.rti_info[RTAX_GATEWAY] = gateway; 881 info.rti_info[RTAX_NETMASK] = netmask; 882 info.rti_ifa = ifa; 883 info.rti_flags = flags; 884 rt = NULL; 885 error = rtrequest1(RTM_ADD, &info, &rt); 886 if (rt != NULL) 887 flags = rt->rt_flags; 888 stat = &rtstat.rts_dynamic; 889 } else { 890 /* 891 * Smash the current notion of the gateway to 892 * this destination. Should check about netmask!!! 893 */ 894 #ifdef NET_MPSAFE 895 KASSERT(!cpu_softintr_p()); 896 897 error = rt_update_prepare(rt); 898 if (error == 0) { 899 #endif 900 RT_WLOCK(); 901 error = rt_setgate(rt, gateway); 902 if (error == 0) { 903 rt->rt_flags |= RTF_MODIFIED; 904 flags |= RTF_MODIFIED; 905 } 906 RT_UNLOCK(); 907 #ifdef NET_MPSAFE 908 rt_update_finish(rt); 909 } else { 910 /* 911 * If error != 0, the rtentry is being 912 * destroyed, so doing nothing doesn't 913 * matter. 914 */ 915 } 916 #endif 917 stat = &rtstat.rts_newgateway; 918 } 919 } else 920 error = EHOSTUNREACH; 921 done: 922 if (rt) { 923 if (rtp != NULL && !error) 924 *rtp = rt; 925 else 926 rt_unref(rt); 927 } 928 out: 929 if (error) 930 rtstat.rts_badredirect++; 931 else if (stat != NULL) 932 (*stat)++; 933 memset(&info, 0, sizeof(info)); 934 info.rti_info[RTAX_DST] = dst; 935 info.rti_info[RTAX_GATEWAY] = gateway; 936 info.rti_info[RTAX_NETMASK] = netmask; 937 info.rti_info[RTAX_AUTHOR] = src; 938 rt_missmsg(RTM_REDIRECT, &info, flags, error); 939 ifa_release(ifa, &psref); 940 } 941 942 /* 943 * Delete a route and generate a message. 944 * It doesn't free a passed rt. 945 */ 946 static int 947 rtdeletemsg(struct rtentry *rt) 948 { 949 int error; 950 struct rt_addrinfo info; 951 struct rtentry *retrt; 952 953 /* 954 * Request the new route so that the entry is not actually 955 * deleted. That will allow the information being reported to 956 * be accurate (and consistent with route_output()). 957 */ 958 memset(&info, 0, sizeof(info)); 959 info.rti_info[RTAX_DST] = rt_getkey(rt); 960 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 961 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 962 info.rti_flags = rt->rt_flags; 963 error = rtrequest1(RTM_DELETE, &info, &retrt); 964 965 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 966 967 return error; 968 } 969 970 static struct ifaddr * 971 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 972 const struct sockaddr *gateway, struct psref *psref) 973 { 974 struct ifaddr *ifa = NULL; 975 976 if ((flags & RTF_GATEWAY) == 0) { 977 /* 978 * If we are adding a route to an interface, 979 * and the interface is a pt to pt link 980 * we should search for the destination 981 * as our clue to the interface. Otherwise 982 * we can use the local address. 983 */ 984 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 985 ifa = ifa_ifwithdstaddr_psref(dst, psref); 986 if (ifa == NULL) 987 ifa = ifa_ifwithaddr_psref(gateway, psref); 988 } else { 989 /* 990 * If we are adding a route to a remote net 991 * or host, the gateway may still be on the 992 * other end of a pt to pt link. 993 */ 994 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 995 } 996 if (ifa == NULL) 997 ifa = ifa_ifwithnet_psref(gateway, psref); 998 if (ifa == NULL) { 999 int s; 1000 struct rtentry *rt; 1001 1002 rt = rtalloc1_locked(gateway, 0, true, true); 1003 if (rt == NULL) 1004 return NULL; 1005 if (rt->rt_flags & RTF_GATEWAY) { 1006 rt_unref(rt); 1007 return NULL; 1008 } 1009 /* 1010 * Just in case. May not need to do this workaround. 1011 * Revisit when working on rtentry MP-ification. 1012 */ 1013 s = pserialize_read_enter(); 1014 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1015 if (ifa == rt->rt_ifa) 1016 break; 1017 } 1018 if (ifa != NULL) 1019 ifa_acquire(ifa, psref); 1020 pserialize_read_exit(s); 1021 rt_unref(rt); 1022 if (ifa == NULL) 1023 return NULL; 1024 } 1025 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1026 struct ifaddr *nifa; 1027 int s; 1028 1029 s = pserialize_read_enter(); 1030 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1031 if (nifa != NULL) { 1032 ifa_release(ifa, psref); 1033 ifa_acquire(nifa, psref); 1034 ifa = nifa; 1035 } 1036 pserialize_read_exit(s); 1037 } 1038 return ifa; 1039 } 1040 1041 /* 1042 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1043 * The caller has to rtfree it by itself. 1044 */ 1045 int 1046 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1047 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1048 { 1049 struct rt_addrinfo info; 1050 1051 memset(&info, 0, sizeof(info)); 1052 info.rti_flags = flags; 1053 info.rti_info[RTAX_DST] = dst; 1054 info.rti_info[RTAX_GATEWAY] = gateway; 1055 info.rti_info[RTAX_NETMASK] = netmask; 1056 return rtrequest1(req, &info, ret_nrt); 1057 } 1058 1059 /* 1060 * It's a utility function to add/remove a route to/from the routing table 1061 * and tell user processes the addition/removal on success. 1062 */ 1063 int 1064 rtrequest_newmsg(const int req, const struct sockaddr *dst, 1065 const struct sockaddr *gateway, const struct sockaddr *netmask, 1066 const int flags) 1067 { 1068 int error; 1069 struct rtentry *ret_nrt = NULL; 1070 1071 KASSERT(req == RTM_ADD || req == RTM_DELETE); 1072 1073 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt); 1074 if (error != 0) 1075 return error; 1076 1077 KASSERT(ret_nrt != NULL); 1078 1079 rt_newmsg(req, ret_nrt); /* tell user process */ 1080 if (req == RTM_DELETE) 1081 rt_free(ret_nrt); 1082 else 1083 rt_unref(ret_nrt); 1084 1085 return 0; 1086 } 1087 1088 static struct ifnet * 1089 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1090 { 1091 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1092 1093 if (info->rti_ifp != NULL) 1094 return NULL; 1095 /* 1096 * ifp may be specified by sockaddr_dl when protocol address 1097 * is ambiguous 1098 */ 1099 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1100 struct ifaddr *ifa; 1101 int s = pserialize_read_enter(); 1102 1103 ifa = ifa_ifwithnet(ifpaddr); 1104 if (ifa != NULL) 1105 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1106 psref); 1107 pserialize_read_exit(s); 1108 } 1109 1110 return info->rti_ifp; 1111 } 1112 1113 static struct ifaddr * 1114 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1115 { 1116 struct ifaddr *ifa = NULL; 1117 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1118 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1119 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1120 int flags = info->rti_flags; 1121 const struct sockaddr *sa; 1122 1123 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1124 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1125 if (ifa != NULL) 1126 goto got; 1127 } 1128 1129 sa = ifaaddr != NULL ? ifaaddr : 1130 (gateway != NULL ? gateway : dst); 1131 if (sa != NULL && info->rti_ifp != NULL) 1132 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1133 else if (dst != NULL && gateway != NULL) 1134 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1135 else if (sa != NULL) 1136 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1137 if (ifa == NULL) 1138 return NULL; 1139 got: 1140 if (ifa->ifa_getifa != NULL) { 1141 /* FIXME ifa_getifa is NOMPSAFE */ 1142 ifa = (*ifa->ifa_getifa)(ifa, dst); 1143 if (ifa == NULL) 1144 return NULL; 1145 ifa_acquire(ifa, psref); 1146 } 1147 info->rti_ifa = ifa; 1148 if (info->rti_ifp == NULL) 1149 info->rti_ifp = ifa->ifa_ifp; 1150 return ifa; 1151 } 1152 1153 /* 1154 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1155 * The caller has to rtfree it by itself. 1156 */ 1157 int 1158 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1159 { 1160 int s = splsoftnet(), ss; 1161 int error = 0, rc; 1162 struct rtentry *rt; 1163 rtbl_t *rtbl; 1164 struct ifaddr *ifa = NULL; 1165 struct sockaddr_storage maskeddst; 1166 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1167 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1168 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1169 int flags = info->rti_flags; 1170 struct psref psref_ifp, psref_ifa; 1171 int bound = 0; 1172 struct ifnet *ifp = NULL; 1173 bool need_to_release_ifa = true; 1174 bool need_unlock = true; 1175 #define senderr(x) { error = x ; goto bad; } 1176 1177 RT_WLOCK(); 1178 1179 bound = curlwp_bind(); 1180 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1181 senderr(ESRCH); 1182 if (flags & RTF_HOST) 1183 netmask = NULL; 1184 switch (req) { 1185 case RTM_DELETE: 1186 if (netmask) { 1187 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1188 netmask); 1189 dst = (struct sockaddr *)&maskeddst; 1190 } 1191 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1192 senderr(ESRCH); 1193 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1194 senderr(ESRCH); 1195 rt->rt_flags &= ~RTF_UP; 1196 if ((ifa = rt->rt_ifa)) { 1197 if (ifa->ifa_flags & IFA_ROUTE && 1198 rt_ifa_connected(rt, ifa)) { 1199 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1200 "deleted IFA_ROUTE\n", 1201 (void *)rt->_rt_key, (void *)ifa); 1202 ifa->ifa_flags &= ~IFA_ROUTE; 1203 } 1204 if (ifa->ifa_rtrequest) 1205 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1206 ifa = NULL; 1207 } 1208 rttrash++; 1209 if (ret_nrt) { 1210 *ret_nrt = rt; 1211 rt_ref(rt); 1212 RT_REFCNT_TRACE(rt); 1213 } 1214 rtcache_invalidate(); 1215 RT_UNLOCK(); 1216 need_unlock = false; 1217 rt_timer_remove_all(rt); 1218 #if defined(INET) || defined(INET6) 1219 if (netmask != NULL) 1220 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1221 #endif 1222 if (ret_nrt == NULL) { 1223 /* Adjust the refcount */ 1224 rt_ref(rt); 1225 RT_REFCNT_TRACE(rt); 1226 rt_free(rt); 1227 } 1228 break; 1229 1230 case RTM_ADD: 1231 if (info->rti_ifa == NULL) { 1232 ifp = rt_getifp(info, &psref_ifp); 1233 ifa = rt_getifa(info, &psref_ifa); 1234 if (ifa == NULL) 1235 senderr(ENETUNREACH); 1236 } else { 1237 /* Caller should have a reference of ifa */ 1238 ifa = info->rti_ifa; 1239 need_to_release_ifa = false; 1240 } 1241 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1242 if (rt == NULL) 1243 senderr(ENOBUFS); 1244 memset(rt, 0, sizeof(*rt)); 1245 rt->rt_flags = RTF_UP | (flags & ~RTF_DONTCHANGEIFA); 1246 LIST_INIT(&rt->rt_timer); 1247 1248 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1249 if (netmask) { 1250 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1251 netmask); 1252 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1253 } else { 1254 rt_setkey(rt, dst, M_NOWAIT); 1255 } 1256 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1257 if (rt_getkey(rt) == NULL || 1258 rt_setgate(rt, gateway) != 0) { 1259 pool_put(&rtentry_pool, rt); 1260 senderr(ENOBUFS); 1261 } 1262 1263 rt_set_ifa(rt, ifa); 1264 if (info->rti_info[RTAX_TAG] != NULL) { 1265 const struct sockaddr *tag; 1266 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1267 if (tag == NULL) 1268 senderr(ENOBUFS); 1269 } 1270 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1271 1272 ss = pserialize_read_enter(); 1273 if (info->rti_info[RTAX_IFP] != NULL) { 1274 struct ifaddr *ifa2; 1275 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1276 if (ifa2 != NULL) 1277 rt->rt_ifp = ifa2->ifa_ifp; 1278 else 1279 rt->rt_ifp = ifa->ifa_ifp; 1280 } else 1281 rt->rt_ifp = ifa->ifa_ifp; 1282 pserialize_read_exit(ss); 1283 cv_init(&rt->rt_cv, "rtentry"); 1284 psref_target_init(&rt->rt_psref, rt_psref_class); 1285 1286 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1287 rc = rt_addaddr(rtbl, rt, netmask); 1288 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1289 if (rc != 0) { 1290 ifafree(ifa); /* for rt_set_ifa above */ 1291 cv_destroy(&rt->rt_cv); 1292 rt_destroy(rt); 1293 pool_put(&rtentry_pool, rt); 1294 senderr(rc); 1295 } 1296 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1297 if (ifa->ifa_rtrequest) 1298 ifa->ifa_rtrequest(req, rt, info); 1299 if (need_to_release_ifa) 1300 ifa_release(ifa, &psref_ifa); 1301 ifa = NULL; 1302 if_put(ifp, &psref_ifp); 1303 ifp = NULL; 1304 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1305 if (ret_nrt) { 1306 *ret_nrt = rt; 1307 rt_ref(rt); 1308 RT_REFCNT_TRACE(rt); 1309 } 1310 rtcache_invalidate(); 1311 RT_UNLOCK(); 1312 need_unlock = false; 1313 break; 1314 case RTM_GET: 1315 if (netmask != NULL) { 1316 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1317 netmask); 1318 dst = (struct sockaddr *)&maskeddst; 1319 } 1320 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1321 senderr(ESRCH); 1322 if (ret_nrt != NULL) { 1323 *ret_nrt = rt; 1324 rt_ref(rt); 1325 RT_REFCNT_TRACE(rt); 1326 } 1327 break; 1328 } 1329 bad: 1330 if (need_to_release_ifa) 1331 ifa_release(ifa, &psref_ifa); 1332 if_put(ifp, &psref_ifp); 1333 curlwp_bindx(bound); 1334 if (need_unlock) 1335 RT_UNLOCK(); 1336 splx(s); 1337 return error; 1338 } 1339 1340 int 1341 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1342 { 1343 struct sockaddr *new, *old; 1344 1345 KASSERT(RT_WLOCKED()); 1346 KASSERT(rt->_rt_key != NULL); 1347 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1348 1349 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1350 if (new == NULL) 1351 return ENOMEM; 1352 1353 old = rt->rt_gateway; 1354 rt->rt_gateway = new; 1355 if (old != NULL) 1356 sockaddr_free(old); 1357 1358 KASSERT(rt->_rt_key != NULL); 1359 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1360 1361 if (rt->rt_flags & RTF_GATEWAY) { 1362 struct rtentry *gwrt; 1363 1364 gwrt = rtalloc1_locked(gate, 1, false, true); 1365 /* 1366 * If we switched gateways, grab the MTU from the new 1367 * gateway route if the current MTU, if the current MTU is 1368 * greater than the MTU of gateway. 1369 * Note that, if the MTU of gateway is 0, we will reset the 1370 * MTU of the route to run PMTUD again from scratch. XXX 1371 */ 1372 if (gwrt != NULL) { 1373 KASSERT(gwrt->_rt_key != NULL); 1374 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1375 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1376 rt->rt_rmx.rmx_mtu && 1377 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1378 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1379 } 1380 rt_unref(gwrt); 1381 } 1382 } 1383 KASSERT(rt->_rt_key != NULL); 1384 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1385 return 0; 1386 } 1387 1388 static struct ifaddr * 1389 rt_update_get_ifa(const struct rt_addrinfo info, const struct rtentry *rt, 1390 struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref) 1391 { 1392 struct ifaddr *ifa = NULL; 1393 1394 *ifp = NULL; 1395 if (info.rti_info[RTAX_IFP] != NULL) { 1396 ifa = ifa_ifwithnet_psref(info.rti_info[RTAX_IFP], psref); 1397 if (ifa == NULL) 1398 goto next; 1399 *ifp = ifa->ifa_ifp; 1400 if_acquire(*ifp, psref_ifp); 1401 if (info.rti_info[RTAX_IFA] == NULL && 1402 info.rti_info[RTAX_GATEWAY] == NULL) 1403 goto next; 1404 ifa_release(ifa, psref); 1405 if (info.rti_info[RTAX_IFA] == NULL) { 1406 /* route change <dst> <gw> -ifp <if> */ 1407 ifa = ifaof_ifpforaddr_psref(info.rti_info[RTAX_GATEWAY], 1408 *ifp, psref); 1409 } else { 1410 /* route change <dst> -ifp <if> -ifa <addr> */ 1411 ifa = ifa_ifwithaddr_psref(info.rti_info[RTAX_IFA], psref); 1412 if (ifa != NULL) 1413 goto out; 1414 ifa = ifaof_ifpforaddr_psref(info.rti_info[RTAX_IFA], 1415 *ifp, psref); 1416 } 1417 goto out; 1418 } 1419 next: 1420 if (info.rti_info[RTAX_IFA] != NULL) { 1421 /* route change <dst> <gw> -ifa <addr> */ 1422 ifa = ifa_ifwithaddr_psref(info.rti_info[RTAX_IFA], psref); 1423 if (ifa != NULL) 1424 goto out; 1425 } 1426 if (info.rti_info[RTAX_GATEWAY] != NULL) { 1427 /* route change <dst> <gw> */ 1428 ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt), 1429 info.rti_info[RTAX_GATEWAY], psref); 1430 } 1431 out: 1432 if (ifa != NULL && *ifp == NULL) { 1433 *ifp = ifa->ifa_ifp; 1434 if_acquire(*ifp, psref_ifp); 1435 } 1436 if (ifa == NULL && *ifp != NULL) { 1437 if_put(*ifp, psref_ifp); 1438 *ifp = NULL; 1439 } 1440 return ifa; 1441 } 1442 1443 int 1444 rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm) 1445 { 1446 int error = 0; 1447 struct ifnet *ifp = NULL, *new_ifp = NULL; 1448 struct ifaddr *ifa = NULL, *new_ifa; 1449 struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp; 1450 bool newgw, ifp_changed = false; 1451 1452 RT_WLOCK(); 1453 /* 1454 * New gateway could require new ifaddr, ifp; 1455 * flags may also be different; ifp may be specified 1456 * by ll sockaddr when protocol address is ambiguous 1457 */ 1458 newgw = info->rti_info[RTAX_GATEWAY] != NULL && 1459 sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0; 1460 1461 if (newgw || info->rti_info[RTAX_IFP] != NULL || 1462 info->rti_info[RTAX_IFA] != NULL) { 1463 ifp = rt_getifp(info, &psref_ifp); 1464 /* info refers ifp so we need to keep a reference */ 1465 ifa = rt_getifa(info, &psref_ifa); 1466 if (ifa == NULL) { 1467 error = ENETUNREACH; 1468 goto out; 1469 } 1470 } 1471 if (newgw) { 1472 error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]); 1473 if (error != 0) 1474 goto out; 1475 } 1476 if (info->rti_info[RTAX_TAG]) { 1477 const struct sockaddr *tag; 1478 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1479 if (tag == NULL) { 1480 error = ENOBUFS; 1481 goto out; 1482 } 1483 } 1484 /* 1485 * New gateway could require new ifaddr, ifp; 1486 * flags may also be different; ifp may be specified 1487 * by ll sockaddr when protocol address is ambiguous 1488 */ 1489 new_ifa = rt_update_get_ifa(*info, rt, &new_ifp, &psref_new_ifp, 1490 &psref_new_ifa); 1491 if (new_ifa != NULL) { 1492 ifa_release(ifa, &psref_ifa); 1493 ifa = new_ifa; 1494 } 1495 if (ifa) { 1496 struct ifaddr *oifa = rt->rt_ifa; 1497 if (oifa != ifa && !ifa_is_destroying(ifa) && 1498 new_ifp != NULL && !if_is_deactivated(new_ifp)) { 1499 if (oifa && oifa->ifa_rtrequest) 1500 oifa->ifa_rtrequest(RTM_DELETE, rt, info); 1501 rt_replace_ifa(rt, ifa); 1502 rt->rt_ifp = new_ifp; 1503 ifp_changed = true; 1504 } 1505 if (new_ifa == NULL) 1506 ifa_release(ifa, &psref_ifa); 1507 /* To avoid ifa_release below */ 1508 ifa = NULL; 1509 } 1510 ifa_release(new_ifa, &psref_new_ifa); 1511 if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) { 1512 rt->rt_ifp = new_ifp; 1513 ifp_changed = true; 1514 } 1515 rt_setmetrics(rtm, rt); 1516 if (rt->rt_flags != info->rti_flags) { 1517 rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) | 1518 (rt->rt_flags & PRESERVED_RTF); 1519 } 1520 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) 1521 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); 1522 #if defined(INET) || defined(INET6) 1523 if (ifp_changed && rt_mask(rt) != NULL) 1524 lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt), 1525 rt_mask(rt), 0); 1526 #else 1527 (void)ifp_changed; /* XXX gcc */ 1528 #endif 1529 out: 1530 ifa_release(ifa, &psref_ifa); 1531 if_put(new_ifp, &psref_new_ifp); 1532 if_put(ifp, &psref_ifp); 1533 1534 RT_UNLOCK(); 1535 1536 return error; 1537 } 1538 1539 static void 1540 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1541 const struct sockaddr *netmask) 1542 { 1543 const char *netmaskp = &netmask->sa_data[0], 1544 *srcp = &src->sa_data[0]; 1545 char *dstp = &dst->sa_data[0]; 1546 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1547 const char *srcend = (char *)dst + src->sa_len; 1548 1549 dst->sa_len = src->sa_len; 1550 dst->sa_family = src->sa_family; 1551 1552 while (dstp < maskend) 1553 *dstp++ = *srcp++ & *netmaskp++; 1554 if (dstp < srcend) 1555 memset(dstp, 0, (size_t)(srcend - dstp)); 1556 } 1557 1558 /* 1559 * Inform the routing socket of a route change. 1560 */ 1561 void 1562 rt_newmsg(const int cmd, const struct rtentry *rt) 1563 { 1564 struct rt_addrinfo info; 1565 1566 memset((void *)&info, 0, sizeof(info)); 1567 info.rti_info[RTAX_DST] = rt_getkey(rt); 1568 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1569 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1570 if (rt->rt_ifp) { 1571 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1572 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1573 } 1574 1575 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1576 } 1577 1578 /* 1579 * Set up or tear down a routing table entry, normally 1580 * for an interface. 1581 */ 1582 int 1583 rtinit(struct ifaddr *ifa, int cmd, int flags) 1584 { 1585 struct rtentry *rt; 1586 struct sockaddr *dst, *odst; 1587 struct sockaddr_storage maskeddst; 1588 struct rtentry *nrt = NULL; 1589 int error; 1590 struct rt_addrinfo info; 1591 1592 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1593 if (cmd == RTM_DELETE) { 1594 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1595 /* Delete subnet route for this interface */ 1596 odst = dst; 1597 dst = (struct sockaddr *)&maskeddst; 1598 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1599 } 1600 if ((rt = rtalloc1(dst, 0)) != NULL) { 1601 if (rt->rt_ifa != ifa) { 1602 rt_unref(rt); 1603 return (flags & RTF_HOST) ? EHOSTUNREACH 1604 : ENETUNREACH; 1605 } 1606 rt_unref(rt); 1607 } 1608 } 1609 memset(&info, 0, sizeof(info)); 1610 info.rti_ifa = ifa; 1611 info.rti_flags = flags | ifa->ifa_flags | RTF_DONTCHANGEIFA; 1612 info.rti_info[RTAX_DST] = dst; 1613 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1614 1615 /* 1616 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1617 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1618 * variable) when RTF_HOST is 1. still not sure if i can safely 1619 * change it to meet bsdi4 behavior. 1620 */ 1621 if (cmd != RTM_LLINFO_UPD) 1622 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1623 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1624 &nrt); 1625 if (error != 0) 1626 return error; 1627 1628 rt = nrt; 1629 RT_REFCNT_TRACE(rt); 1630 switch (cmd) { 1631 case RTM_DELETE: 1632 rt_newmsg(cmd, rt); 1633 rt_free(rt); 1634 break; 1635 case RTM_LLINFO_UPD: 1636 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1637 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1638 rt_newmsg(RTM_CHANGE, rt); 1639 rt_unref(rt); 1640 break; 1641 case RTM_ADD: 1642 KASSERT(rt->rt_ifa == ifa); 1643 rt_newmsg(cmd, rt); 1644 rt_unref(rt); 1645 RT_REFCNT_TRACE(rt); 1646 break; 1647 } 1648 return error; 1649 } 1650 1651 /* 1652 * Create a local route entry for the address. 1653 * Announce the addition of the address and the route to the routing socket. 1654 */ 1655 int 1656 rt_ifa_addlocal(struct ifaddr *ifa) 1657 { 1658 struct rtentry *rt; 1659 int e; 1660 1661 /* If there is no loopback entry, allocate one. */ 1662 rt = rtalloc1(ifa->ifa_addr, 0); 1663 #ifdef RT_DEBUG 1664 if (rt != NULL) 1665 dump_rt(rt); 1666 #endif 1667 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1668 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1669 { 1670 struct rt_addrinfo info; 1671 struct rtentry *nrt; 1672 1673 memset(&info, 0, sizeof(info)); 1674 info.rti_flags = RTF_HOST | RTF_LOCAL | RTF_DONTCHANGEIFA; 1675 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1676 info.rti_info[RTAX_GATEWAY] = 1677 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1678 info.rti_ifa = ifa; 1679 nrt = NULL; 1680 e = rtrequest1(RTM_ADD, &info, &nrt); 1681 rt_addrmsg_rt(RTM_ADD, ifa, e, nrt); 1682 if (nrt != NULL) { 1683 KASSERT(nrt->rt_ifa == ifa); 1684 #ifdef RT_DEBUG 1685 dump_rt(nrt); 1686 #endif 1687 rt_unref(nrt); 1688 RT_REFCNT_TRACE(nrt); 1689 } 1690 } else { 1691 e = 0; 1692 rt_addrmsg(RTM_NEWADDR, ifa); 1693 } 1694 if (rt != NULL) 1695 rt_unref(rt); 1696 return e; 1697 } 1698 1699 /* 1700 * Remove the local route entry for the address. 1701 * Announce the removal of the address and the route to the routing socket. 1702 */ 1703 int 1704 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1705 { 1706 struct rtentry *rt; 1707 int e = 0; 1708 1709 rt = rtalloc1(ifa->ifa_addr, 0); 1710 1711 /* 1712 * Before deleting, check if a corresponding loopbacked 1713 * host route surely exists. With this check, we can avoid 1714 * deleting an interface direct route whose destination is 1715 * the same as the address being removed. This can happen 1716 * when removing a subnet-router anycast address on an 1717 * interface attached to a shared medium. 1718 */ 1719 if (rt != NULL && 1720 (rt->rt_flags & RTF_HOST) && 1721 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1722 { 1723 /* If we cannot replace the route's ifaddr with the equivalent 1724 * ifaddr of another interface, I believe it is safest to 1725 * delete the route. 1726 */ 1727 if (alt_ifa == NULL) { 1728 e = rtdeletemsg(rt); 1729 if (e == 0) { 1730 rt_unref(rt); 1731 rt_free(rt); 1732 rt = NULL; 1733 } 1734 rt_addrmsg(RTM_DELADDR, ifa); 1735 } else { 1736 #ifdef NET_MPSAFE 1737 int error = rt_update_prepare(rt); 1738 if (error == 0) { 1739 rt_replace_ifa(rt, alt_ifa); 1740 rt_update_finish(rt); 1741 } else { 1742 /* 1743 * If error != 0, the rtentry is being 1744 * destroyed, so doing nothing doesn't 1745 * matter. 1746 */ 1747 } 1748 #else 1749 rt_replace_ifa(rt, alt_ifa); 1750 #endif 1751 rt_newmsg(RTM_CHANGE, rt); 1752 } 1753 } else 1754 rt_addrmsg(RTM_DELADDR, ifa); 1755 if (rt != NULL) 1756 rt_unref(rt); 1757 return e; 1758 } 1759 1760 /* 1761 * Route timer routines. These routes allow functions to be called 1762 * for various routes at any time. This is useful in supporting 1763 * path MTU discovery and redirect route deletion. 1764 * 1765 * This is similar to some BSDI internal functions, but it provides 1766 * for multiple queues for efficiency's sake... 1767 */ 1768 1769 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1770 static int rt_init_done = 0; 1771 1772 /* 1773 * Some subtle order problems with domain initialization mean that 1774 * we cannot count on this being run from rt_init before various 1775 * protocol initializations are done. Therefore, we make sure 1776 * that this is run when the first queue is added... 1777 */ 1778 1779 static void rt_timer_work(struct work *, void *); 1780 1781 static void 1782 rt_timer_init(void) 1783 { 1784 int error; 1785 1786 assert(rt_init_done == 0); 1787 1788 /* XXX should be in rt_init */ 1789 rw_init(&rt_lock); 1790 1791 LIST_INIT(&rttimer_queue_head); 1792 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1793 error = workqueue_create(&rt_timer_wq, "rt_timer", 1794 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1795 if (error) 1796 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1797 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1798 rt_init_done = 1; 1799 } 1800 1801 struct rttimer_queue * 1802 rt_timer_queue_create(u_int timeout) 1803 { 1804 struct rttimer_queue *rtq; 1805 1806 if (rt_init_done == 0) 1807 rt_timer_init(); 1808 1809 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1810 if (rtq == NULL) 1811 return NULL; 1812 memset(rtq, 0, sizeof(*rtq)); 1813 1814 rtq->rtq_timeout = timeout; 1815 TAILQ_INIT(&rtq->rtq_head); 1816 RT_WLOCK(); 1817 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1818 RT_UNLOCK(); 1819 1820 return rtq; 1821 } 1822 1823 void 1824 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1825 { 1826 1827 rtq->rtq_timeout = timeout; 1828 } 1829 1830 static void 1831 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1832 { 1833 struct rttimer *r; 1834 1835 RT_ASSERT_WLOCK(); 1836 1837 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1838 LIST_REMOVE(r, rtt_link); 1839 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1840 rt_ref(r->rtt_rt); /* XXX */ 1841 RT_REFCNT_TRACE(r->rtt_rt); 1842 RT_UNLOCK(); 1843 (*r->rtt_func)(r->rtt_rt, r); 1844 pool_put(&rttimer_pool, r); 1845 RT_WLOCK(); 1846 if (rtq->rtq_count > 0) 1847 rtq->rtq_count--; 1848 else 1849 printf("rt_timer_queue_remove_all: " 1850 "rtq_count reached 0\n"); 1851 } 1852 } 1853 1854 void 1855 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1856 { 1857 1858 RT_WLOCK(); 1859 rt_timer_queue_remove_all(rtq); 1860 LIST_REMOVE(rtq, rtq_link); 1861 RT_UNLOCK(); 1862 1863 /* 1864 * Caller is responsible for freeing the rttimer_queue structure. 1865 */ 1866 } 1867 1868 unsigned long 1869 rt_timer_count(struct rttimer_queue *rtq) 1870 { 1871 return rtq->rtq_count; 1872 } 1873 1874 static void 1875 rt_timer_remove_all(struct rtentry *rt) 1876 { 1877 struct rttimer *r; 1878 1879 RT_WLOCK(); 1880 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1881 LIST_REMOVE(r, rtt_link); 1882 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1883 if (r->rtt_queue->rtq_count > 0) 1884 r->rtt_queue->rtq_count--; 1885 else 1886 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1887 pool_put(&rttimer_pool, r); 1888 } 1889 RT_UNLOCK(); 1890 } 1891 1892 int 1893 rt_timer_add(struct rtentry *rt, 1894 void (*func)(struct rtentry *, struct rttimer *), 1895 struct rttimer_queue *queue) 1896 { 1897 struct rttimer *r; 1898 1899 KASSERT(func != NULL); 1900 RT_WLOCK(); 1901 /* 1902 * If there's already a timer with this action, destroy it before 1903 * we add a new one. 1904 */ 1905 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1906 if (r->rtt_func == func) 1907 break; 1908 } 1909 if (r != NULL) { 1910 LIST_REMOVE(r, rtt_link); 1911 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1912 if (r->rtt_queue->rtq_count > 0) 1913 r->rtt_queue->rtq_count--; 1914 else 1915 printf("rt_timer_add: rtq_count reached 0\n"); 1916 } else { 1917 r = pool_get(&rttimer_pool, PR_NOWAIT); 1918 if (r == NULL) { 1919 RT_UNLOCK(); 1920 return ENOBUFS; 1921 } 1922 } 1923 1924 memset(r, 0, sizeof(*r)); 1925 1926 r->rtt_rt = rt; 1927 r->rtt_time = time_uptime; 1928 r->rtt_func = func; 1929 r->rtt_queue = queue; 1930 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1931 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1932 r->rtt_queue->rtq_count++; 1933 1934 RT_UNLOCK(); 1935 1936 return 0; 1937 } 1938 1939 static void 1940 rt_timer_work(struct work *wk, void *arg) 1941 { 1942 struct rttimer_queue *rtq; 1943 struct rttimer *r; 1944 1945 RT_WLOCK(); 1946 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1947 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1948 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1949 LIST_REMOVE(r, rtt_link); 1950 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1951 /* 1952 * Take a reference to avoid the rtentry is freed 1953 * accidentally after RT_UNLOCK. The callback 1954 * (rtt_func) must rt_unref it by itself. 1955 */ 1956 rt_ref(r->rtt_rt); 1957 RT_REFCNT_TRACE(r->rtt_rt); 1958 RT_UNLOCK(); 1959 (*r->rtt_func)(r->rtt_rt, r); 1960 pool_put(&rttimer_pool, r); 1961 RT_WLOCK(); 1962 if (rtq->rtq_count > 0) 1963 rtq->rtq_count--; 1964 else 1965 printf("rt_timer_timer: rtq_count reached 0\n"); 1966 } 1967 } 1968 RT_UNLOCK(); 1969 1970 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1971 } 1972 1973 static void 1974 rt_timer_timer(void *arg) 1975 { 1976 1977 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 1978 } 1979 1980 static struct rtentry * 1981 _rtcache_init(struct route *ro, int flag) 1982 { 1983 struct rtentry *rt; 1984 1985 rtcache_invariants(ro); 1986 KASSERT(ro->_ro_rt == NULL); 1987 1988 if (rtcache_getdst(ro) == NULL) 1989 return NULL; 1990 rt = rtalloc1(rtcache_getdst(ro), flag); 1991 if (rt != NULL) { 1992 RT_RLOCK(); 1993 if (ISSET(rt->rt_flags, RTF_UP)) { 1994 ro->_ro_rt = rt; 1995 ro->ro_rtcache_generation = rtcache_generation; 1996 rtcache_ref(rt, ro); 1997 } 1998 RT_UNLOCK(); 1999 rt_unref(rt); 2000 } 2001 2002 rtcache_invariants(ro); 2003 return ro->_ro_rt; 2004 } 2005 2006 struct rtentry * 2007 rtcache_init(struct route *ro) 2008 { 2009 2010 return _rtcache_init(ro, 1); 2011 } 2012 2013 struct rtentry * 2014 rtcache_init_noclone(struct route *ro) 2015 { 2016 2017 return _rtcache_init(ro, 0); 2018 } 2019 2020 struct rtentry * 2021 rtcache_update(struct route *ro, int clone) 2022 { 2023 2024 ro->_ro_rt = NULL; 2025 return _rtcache_init(ro, clone); 2026 } 2027 2028 void 2029 rtcache_copy(struct route *new_ro, struct route *old_ro) 2030 { 2031 struct rtentry *rt; 2032 int ret; 2033 2034 KASSERT(new_ro != old_ro); 2035 rtcache_invariants(new_ro); 2036 rtcache_invariants(old_ro); 2037 2038 rt = rtcache_validate(old_ro); 2039 2040 if (rtcache_getdst(old_ro) == NULL) 2041 goto out; 2042 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 2043 if (ret != 0) 2044 goto out; 2045 2046 RT_RLOCK(); 2047 new_ro->_ro_rt = rt; 2048 new_ro->ro_rtcache_generation = rtcache_generation; 2049 RT_UNLOCK(); 2050 rtcache_invariants(new_ro); 2051 out: 2052 rtcache_unref(rt, old_ro); 2053 return; 2054 } 2055 2056 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 2057 static void 2058 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 2059 { 2060 char dst[64]; 2061 2062 sockaddr_format(ro->ro_sa, dst, 64); 2063 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 2064 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 2065 } 2066 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 2067 #else 2068 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 2069 #endif 2070 2071 static void 2072 rtcache_ref(struct rtentry *rt, struct route *ro) 2073 { 2074 2075 KASSERT(rt != NULL); 2076 2077 #ifdef NET_MPSAFE 2078 RTCACHE_PSREF_TRACE(rt, ro); 2079 ro->ro_bound = curlwp_bind(); 2080 /* XXX Use a real caller's address */ 2081 PSREF_DEBUG_FILL_RETURN_ADDRESS(&ro->ro_psref); 2082 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2083 #endif 2084 } 2085 2086 void 2087 rtcache_unref(struct rtentry *rt, struct route *ro) 2088 { 2089 2090 if (rt == NULL) 2091 return; 2092 2093 #ifdef NET_MPSAFE 2094 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2095 curlwp_bindx(ro->ro_bound); 2096 RTCACHE_PSREF_TRACE(rt, ro); 2097 #endif 2098 } 2099 2100 struct rtentry * 2101 rtcache_validate(struct route *ro) 2102 { 2103 struct rtentry *rt = NULL; 2104 2105 #ifdef NET_MPSAFE 2106 retry: 2107 #endif 2108 rtcache_invariants(ro); 2109 RT_RLOCK(); 2110 if (ro->ro_rtcache_generation != rtcache_generation) { 2111 /* The cache is invalidated */ 2112 rt = NULL; 2113 goto out; 2114 } 2115 2116 rt = ro->_ro_rt; 2117 if (rt == NULL) 2118 goto out; 2119 2120 if ((rt->rt_flags & RTF_UP) == 0) { 2121 rt = NULL; 2122 goto out; 2123 } 2124 #ifdef NET_MPSAFE 2125 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 2126 if (rt_wait_ok()) { 2127 RT_UNLOCK(); 2128 2129 /* We can wait until the update is complete */ 2130 rt_update_wait(); 2131 goto retry; 2132 } else { 2133 rt = NULL; 2134 } 2135 } else 2136 #endif 2137 rtcache_ref(rt, ro); 2138 out: 2139 RT_UNLOCK(); 2140 return rt; 2141 } 2142 2143 struct rtentry * 2144 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2145 int clone, int *hitp) 2146 { 2147 const struct sockaddr *odst; 2148 struct rtentry *rt = NULL; 2149 2150 odst = rtcache_getdst(ro); 2151 if (odst == NULL) 2152 goto miss; 2153 2154 if (sockaddr_cmp(odst, dst) != 0) { 2155 rtcache_free(ro); 2156 goto miss; 2157 } 2158 2159 rt = rtcache_validate(ro); 2160 if (rt == NULL) { 2161 ro->_ro_rt = NULL; 2162 goto miss; 2163 } 2164 2165 rtcache_invariants(ro); 2166 2167 if (hitp != NULL) 2168 *hitp = 1; 2169 return rt; 2170 miss: 2171 if (hitp != NULL) 2172 *hitp = 0; 2173 if (rtcache_setdst(ro, dst) == 0) 2174 rt = _rtcache_init(ro, clone); 2175 2176 rtcache_invariants(ro); 2177 2178 return rt; 2179 } 2180 2181 void 2182 rtcache_free(struct route *ro) 2183 { 2184 2185 ro->_ro_rt = NULL; 2186 if (ro->ro_sa != NULL) { 2187 sockaddr_free(ro->ro_sa); 2188 ro->ro_sa = NULL; 2189 } 2190 rtcache_invariants(ro); 2191 } 2192 2193 int 2194 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2195 { 2196 KASSERT(sa != NULL); 2197 2198 rtcache_invariants(ro); 2199 if (ro->ro_sa != NULL) { 2200 if (ro->ro_sa->sa_family == sa->sa_family) { 2201 ro->_ro_rt = NULL; 2202 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2203 rtcache_invariants(ro); 2204 return 0; 2205 } 2206 /* free ro_sa, wrong family */ 2207 rtcache_free(ro); 2208 } 2209 2210 KASSERT(ro->_ro_rt == NULL); 2211 2212 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2213 rtcache_invariants(ro); 2214 return ENOMEM; 2215 } 2216 rtcache_invariants(ro); 2217 return 0; 2218 } 2219 2220 const struct sockaddr * 2221 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2222 { 2223 if (rt->rt_tag != tag) { 2224 if (rt->rt_tag != NULL) 2225 sockaddr_free(rt->rt_tag); 2226 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2227 } 2228 return rt->rt_tag; 2229 } 2230 2231 struct sockaddr * 2232 rt_gettag(const struct rtentry *rt) 2233 { 2234 return rt->rt_tag; 2235 } 2236 2237 int 2238 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2239 { 2240 2241 if ((rt->rt_flags & RTF_REJECT) != 0) { 2242 /* Mimic looutput */ 2243 if (ifp->if_flags & IFF_LOOPBACK) 2244 return (rt->rt_flags & RTF_HOST) ? 2245 EHOSTUNREACH : ENETUNREACH; 2246 else if (rt->rt_rmx.rmx_expire == 0 || 2247 time_uptime < rt->rt_rmx.rmx_expire) 2248 return (rt->rt_flags & RTF_GATEWAY) ? 2249 EHOSTUNREACH : EHOSTDOWN; 2250 } 2251 2252 return 0; 2253 } 2254 2255 void 2256 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2257 void *v) 2258 { 2259 2260 for (;;) { 2261 int s; 2262 int error; 2263 struct rtentry *rt, *retrt = NULL; 2264 2265 RT_RLOCK(); 2266 s = splsoftnet(); 2267 rt = rtbl_search_matched_entry(family, f, v); 2268 if (rt == NULL) { 2269 splx(s); 2270 RT_UNLOCK(); 2271 return; 2272 } 2273 rt_ref(rt); 2274 splx(s); 2275 RT_UNLOCK(); 2276 2277 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2278 rt_mask(rt), rt->rt_flags, &retrt); 2279 if (error == 0) { 2280 KASSERT(retrt == rt); 2281 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2282 retrt->rt_ifp = NULL; 2283 rt_unref(rt); 2284 rt_free(retrt); 2285 } else if (error == ESRCH) { 2286 /* Someone deleted the entry already. */ 2287 rt_unref(rt); 2288 } else { 2289 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2290 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2291 /* XXX how to treat this case? */ 2292 } 2293 } 2294 } 2295 2296 static int 2297 rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *), 2298 void *v) 2299 { 2300 2301 return rtbl_walktree(family, f, v); 2302 } 2303 2304 int 2305 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2306 { 2307 int error; 2308 2309 RT_RLOCK(); 2310 error = rt_walktree_locked(family, f, v); 2311 RT_UNLOCK(); 2312 2313 return error; 2314 } 2315 2316 #ifdef DDB 2317 2318 #include <machine/db_machdep.h> 2319 #include <ddb/db_interface.h> 2320 #include <ddb/db_output.h> 2321 2322 #define rt_expire rt_rmx.rmx_expire 2323 2324 static void 2325 db_print_sa(const struct sockaddr *sa) 2326 { 2327 int len; 2328 const u_char *p; 2329 2330 if (sa == NULL) { 2331 db_printf("[NULL]"); 2332 return; 2333 } 2334 2335 p = (const u_char *)sa; 2336 len = sa->sa_len; 2337 db_printf("["); 2338 while (len > 0) { 2339 db_printf("%d", *p); 2340 p++; len--; 2341 if (len) db_printf(","); 2342 } 2343 db_printf("]\n"); 2344 } 2345 2346 static void 2347 db_print_ifa(struct ifaddr *ifa) 2348 { 2349 if (ifa == NULL) 2350 return; 2351 db_printf(" ifa_addr="); 2352 db_print_sa(ifa->ifa_addr); 2353 db_printf(" ifa_dsta="); 2354 db_print_sa(ifa->ifa_dstaddr); 2355 db_printf(" ifa_mask="); 2356 db_print_sa(ifa->ifa_netmask); 2357 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2358 ifa->ifa_flags, 2359 ifa->ifa_refcnt, 2360 ifa->ifa_metric); 2361 } 2362 2363 /* 2364 * Function to pass to rt_walktree(). 2365 * Return non-zero error to abort walk. 2366 */ 2367 static int 2368 db_show_rtentry(struct rtentry *rt, void *w) 2369 { 2370 db_printf("rtentry=%p", rt); 2371 2372 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2373 rt->rt_flags, rt->rt_refcnt, 2374 rt->rt_use, (uint64_t)rt->rt_expire); 2375 2376 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2377 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2378 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2379 2380 db_printf(" ifp=%p ", rt->rt_ifp); 2381 if (rt->rt_ifp) 2382 db_printf("(%s)", rt->rt_ifp->if_xname); 2383 else 2384 db_printf("(NULL)"); 2385 2386 db_printf(" ifa=%p\n", rt->rt_ifa); 2387 db_print_ifa(rt->rt_ifa); 2388 2389 db_printf(" gwroute=%p llinfo=%p\n", 2390 rt->rt_gwroute, rt->rt_llinfo); 2391 2392 return 0; 2393 } 2394 2395 /* 2396 * Function to print all the route trees. 2397 * Use this from ddb: "show routes" 2398 */ 2399 void 2400 db_show_routes(db_expr_t addr, bool have_addr, 2401 db_expr_t count, const char *modif) 2402 { 2403 2404 /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */ 2405 rt_walktree_locked(AF_INET, db_show_rtentry, NULL); 2406 } 2407 #endif 2408