1 /* $NetBSD: route.c,v 1.225 2019/10/03 03:10:02 knakahara Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.225 2019/10/03 03:10:02 knakahara Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/kernel.h> 114 #include <sys/ioctl.h> 115 #include <sys/pool.h> 116 #include <sys/kauth.h> 117 #include <sys/workqueue.h> 118 #include <sys/syslog.h> 119 #include <sys/rwlock.h> 120 #include <sys/mutex.h> 121 #include <sys/cpu.h> 122 #include <sys/kmem.h> 123 124 #include <net/if.h> 125 #include <net/if_dl.h> 126 #include <net/route.h> 127 #if defined(INET) || defined(INET6) 128 #include <net/if_llatbl.h> 129 #endif 130 131 #include <netinet/in.h> 132 #include <netinet/in_var.h> 133 134 #define PRESERVED_RTF (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK) 135 136 #ifdef RTFLUSH_DEBUG 137 #define rtcache_debug() __predict_false(_rtcache_debug) 138 #else /* RTFLUSH_DEBUG */ 139 #define rtcache_debug() 0 140 #endif /* RTFLUSH_DEBUG */ 141 142 #ifdef RT_DEBUG 143 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 144 __func__, __LINE__, (rt), (rt)->rt_refcnt) 145 #else 146 #define RT_REFCNT_TRACE(rt) do {} while (0) 147 #endif 148 149 #ifdef RT_DEBUG 150 #define dlog(level, fmt, args...) log(level, fmt, ##args) 151 #else 152 #define dlog(level, fmt, args...) do {} while (0) 153 #endif 154 155 struct rtstat rtstat; 156 157 static int rttrash; /* routes not in table but not freed */ 158 159 static struct pool rtentry_pool; 160 static struct pool rttimer_pool; 161 162 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 163 static struct workqueue *rt_timer_wq; 164 static struct work rt_timer_wk; 165 166 static void rt_timer_init(void); 167 static void rt_timer_queue_remove_all(struct rttimer_queue *); 168 static void rt_timer_remove_all(struct rtentry *); 169 static void rt_timer_timer(void *); 170 171 /* 172 * Locking notes: 173 * - The routing table is protected by a global rwlock 174 * - API: RT_RLOCK and friends 175 * - rtcaches are NOT protected by the framework 176 * - Callers must guarantee a rtcache isn't accessed simultaneously 177 * - How the constraint is guranteed in the wild 178 * - Protect a rtcache by a mutex (e.g., inp_route) 179 * - Make rtcache per-CPU and allow only accesses from softint 180 * (e.g., ipforward_rt_percpu) 181 * - References to a rtentry is managed by reference counting and psref 182 * - Reference couting is used for temporal reference when a rtentry 183 * is fetched from the routing table 184 * - psref is used for temporal reference when a rtentry is fetched 185 * from a rtcache 186 * - struct route (rtcache) has struct psref, so we cannot obtain 187 * a reference twice on the same struct route 188 * - Befere destroying or updating a rtentry, we have to wait for 189 * all references left (see below for details) 190 * - APIs 191 * - An obtained rtentry via rtalloc1 or rtrequest* must be 192 * unreferenced by rt_unref 193 * - An obtained rtentry via rtcache_* must be unreferenced by 194 * rtcache_unref 195 * - TODO: once we get a lockless routing table, we should use only 196 * psref for rtentries 197 * - rtentry destruction 198 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 199 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 200 * has a responsibility to destroy the rtentry by itself by calling 201 * rt_free 202 * - If not, rtrequest itself does that 203 * - If rt_free is called in softint, the actual destruction routine is 204 * deferred to a workqueue 205 * - rtentry update 206 * - When updating a rtentry, RTF_UPDATING flag is set 207 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 208 * the routing table or a rtcache results in either of the following 209 * cases: 210 * - if the caller runs in softint, the caller fails to fetch 211 * - otherwise, the caller waits for the update completed and retries 212 * to fetch (probably succeed to fetch for the second time) 213 * - rtcache invalidation 214 * - There is a global generation counter that is incremented when 215 * any routes have been added or deleted 216 * - When a rtcache caches a rtentry into itself, it also stores 217 * a snapshot of the generation counter 218 * - If the snapshot equals to the global counter, the cache is valid, 219 * otherwise the cache is invalidated 220 */ 221 222 /* 223 * Global lock for the routing table. 224 */ 225 static krwlock_t rt_lock __cacheline_aligned; 226 #ifdef NET_MPSAFE 227 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 228 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 229 #define RT_UNLOCK() rw_exit(&rt_lock) 230 #define RT_WLOCKED() rw_write_held(&rt_lock) 231 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 232 #else 233 #define RT_RLOCK() do {} while (0) 234 #define RT_WLOCK() do {} while (0) 235 #define RT_UNLOCK() do {} while (0) 236 #define RT_WLOCKED() true 237 #define RT_ASSERT_WLOCK() do {} while (0) 238 #endif 239 240 static uint64_t rtcache_generation; 241 242 /* 243 * mutex and cv that are used to wait for references to a rtentry left 244 * before updating the rtentry. 245 */ 246 static struct { 247 kmutex_t lock; 248 kcondvar_t cv; 249 bool ongoing; 250 const struct lwp *lwp; 251 } rt_update_global __cacheline_aligned; 252 253 /* 254 * A workqueue and stuff that are used to defer the destruction routine 255 * of rtentries. 256 */ 257 static struct { 258 struct workqueue *wq; 259 struct work wk; 260 kmutex_t lock; 261 SLIST_HEAD(, rtentry) queue; 262 bool enqueued; 263 } rt_free_global __cacheline_aligned; 264 265 /* psref for rtentry */ 266 static struct psref_class *rt_psref_class __read_mostly; 267 268 #ifdef RTFLUSH_DEBUG 269 static int _rtcache_debug = 0; 270 #endif /* RTFLUSH_DEBUG */ 271 272 static kauth_listener_t route_listener; 273 274 static int rtdeletemsg(struct rtentry *); 275 276 static void rt_maskedcopy(const struct sockaddr *, 277 struct sockaddr *, const struct sockaddr *); 278 279 static void rtcache_invalidate(void); 280 281 static void rt_ref(struct rtentry *); 282 283 static struct rtentry * 284 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 285 286 static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *); 287 static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *); 288 static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *, 289 const struct sockaddr *, struct psref *); 290 291 static void rtcache_ref(struct rtentry *, struct route *); 292 293 #ifdef NET_MPSAFE 294 static void rt_update_wait(void); 295 #endif 296 297 static bool rt_wait_ok(void); 298 static void rt_wait_refcnt(const char *, struct rtentry *, int); 299 static void rt_wait_psref(struct rtentry *); 300 301 #ifdef DDB 302 static void db_print_sa(const struct sockaddr *); 303 static void db_print_ifa(struct ifaddr *); 304 static int db_show_rtentry(struct rtentry *, void *); 305 #endif 306 307 #ifdef RTFLUSH_DEBUG 308 static void sysctl_net_rtcache_setup(struct sysctllog **); 309 static void 310 sysctl_net_rtcache_setup(struct sysctllog **clog) 311 { 312 const struct sysctlnode *rnode; 313 314 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 315 CTLTYPE_NODE, 316 "rtcache", SYSCTL_DESCR("Route cache related settings"), 317 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 318 return; 319 if (sysctl_createv(clog, 0, &rnode, &rnode, 320 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 321 "debug", SYSCTL_DESCR("Debug route caches"), 322 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 323 return; 324 } 325 #endif /* RTFLUSH_DEBUG */ 326 327 static inline void 328 rt_destroy(struct rtentry *rt) 329 { 330 if (rt->_rt_key != NULL) 331 sockaddr_free(rt->_rt_key); 332 if (rt->rt_gateway != NULL) 333 sockaddr_free(rt->rt_gateway); 334 if (rt_gettag(rt) != NULL) 335 sockaddr_free(rt_gettag(rt)); 336 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 337 } 338 339 static inline const struct sockaddr * 340 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 341 { 342 if (rt->_rt_key == key) 343 goto out; 344 345 if (rt->_rt_key != NULL) 346 sockaddr_free(rt->_rt_key); 347 rt->_rt_key = sockaddr_dup(key, flags); 348 out: 349 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 350 return rt->_rt_key; 351 } 352 353 struct ifaddr * 354 rt_get_ifa(struct rtentry *rt) 355 { 356 struct ifaddr *ifa; 357 358 if ((ifa = rt->rt_ifa) == NULL) 359 return ifa; 360 else if (ifa->ifa_getifa == NULL) 361 return ifa; 362 #if 0 363 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 364 return ifa; 365 #endif 366 else { 367 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 368 if (ifa == NULL) 369 return NULL; 370 rt_replace_ifa(rt, ifa); 371 return ifa; 372 } 373 } 374 375 static void 376 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 377 { 378 rt->rt_ifa = ifa; 379 if (ifa->ifa_seqno != NULL) 380 rt->rt_ifa_seqno = *ifa->ifa_seqno; 381 } 382 383 /* 384 * Is this route the connected route for the ifa? 385 */ 386 static int 387 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 388 { 389 const struct sockaddr *key, *dst, *odst; 390 struct sockaddr_storage maskeddst; 391 392 key = rt_getkey(rt); 393 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 394 if (dst == NULL || 395 dst->sa_family != key->sa_family || 396 dst->sa_len != key->sa_len) 397 return 0; 398 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 399 odst = dst; 400 dst = (struct sockaddr *)&maskeddst; 401 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 402 ifa->ifa_netmask); 403 } 404 return (memcmp(dst, key, dst->sa_len) == 0); 405 } 406 407 void 408 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 409 { 410 struct ifaddr *old; 411 412 if (rt->rt_ifa == ifa) 413 return; 414 415 if (rt->rt_ifa && 416 rt->rt_ifa != ifa && 417 rt->rt_ifa->ifa_flags & IFA_ROUTE && 418 rt_ifa_connected(rt, rt->rt_ifa)) 419 { 420 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 421 "replace deleted IFA_ROUTE\n", 422 (void *)rt->_rt_key, (void *)rt->rt_ifa); 423 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 424 if (rt_ifa_connected(rt, ifa)) { 425 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 426 "replace added IFA_ROUTE\n", 427 (void *)rt->_rt_key, (void *)ifa); 428 ifa->ifa_flags |= IFA_ROUTE; 429 } 430 } 431 432 ifaref(ifa); 433 old = rt->rt_ifa; 434 rt_set_ifa1(rt, ifa); 435 ifafree(old); 436 } 437 438 static void 439 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 440 { 441 ifaref(ifa); 442 rt_set_ifa1(rt, ifa); 443 } 444 445 static int 446 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 447 void *arg0, void *arg1, void *arg2, void *arg3) 448 { 449 struct rt_msghdr *rtm; 450 int result; 451 452 result = KAUTH_RESULT_DEFER; 453 rtm = arg1; 454 455 if (action != KAUTH_NETWORK_ROUTE) 456 return result; 457 458 if (rtm->rtm_type == RTM_GET) 459 result = KAUTH_RESULT_ALLOW; 460 461 return result; 462 } 463 464 static void rt_free_work(struct work *, void *); 465 466 void 467 rt_init(void) 468 { 469 int error; 470 471 #ifdef RTFLUSH_DEBUG 472 sysctl_net_rtcache_setup(NULL); 473 #endif 474 475 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 476 SLIST_INIT(&rt_free_global.queue); 477 rt_free_global.enqueued = false; 478 479 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 480 481 error = workqueue_create(&rt_free_global.wq, "rt_free", 482 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 483 if (error) 484 panic("%s: workqueue_create failed (%d)\n", __func__, error); 485 486 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 487 cv_init(&rt_update_global.cv, "rt_update"); 488 489 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 490 NULL, IPL_SOFTNET); 491 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 492 NULL, IPL_SOFTNET); 493 494 rn_init(); /* initialize all zeroes, all ones, mask table */ 495 rtbl_init(); 496 497 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 498 route_listener_cb, NULL); 499 } 500 501 static void 502 rtcache_invalidate(void) 503 { 504 505 RT_ASSERT_WLOCK(); 506 507 if (rtcache_debug()) 508 printf("%s: enter\n", __func__); 509 510 rtcache_generation++; 511 } 512 513 #ifdef RT_DEBUG 514 static void 515 dump_rt(const struct rtentry *rt) 516 { 517 char buf[512]; 518 519 log(LOG_DEBUG, "rt: "); 520 log(LOG_DEBUG, "p=%p ", rt); 521 if (rt->_rt_key == NULL) { 522 log(LOG_DEBUG, "dst=(NULL) "); 523 } else { 524 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 525 log(LOG_DEBUG, "dst=%s ", buf); 526 } 527 if (rt->rt_gateway == NULL) { 528 log(LOG_DEBUG, "gw=(NULL) "); 529 } else { 530 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 531 log(LOG_DEBUG, "gw=%s ", buf); 532 } 533 log(LOG_DEBUG, "flags=%x ", rt->rt_flags); 534 if (rt->rt_ifp == NULL) { 535 log(LOG_DEBUG, "if=(NULL) "); 536 } else { 537 log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname); 538 } 539 log(LOG_DEBUG, "\n"); 540 } 541 #endif /* RT_DEBUG */ 542 543 /* 544 * Packet routing routines. If success, refcnt of a returned rtentry 545 * will be incremented. The caller has to rtfree it by itself. 546 */ 547 struct rtentry * 548 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 549 bool wlock) 550 { 551 rtbl_t *rtbl; 552 struct rtentry *rt; 553 int s; 554 555 #ifdef NET_MPSAFE 556 retry: 557 #endif 558 s = splsoftnet(); 559 rtbl = rt_gettable(dst->sa_family); 560 if (rtbl == NULL) 561 goto miss; 562 563 rt = rt_matchaddr(rtbl, dst); 564 if (rt == NULL) 565 goto miss; 566 567 if (!ISSET(rt->rt_flags, RTF_UP)) 568 goto miss; 569 570 #ifdef NET_MPSAFE 571 if (ISSET(rt->rt_flags, RTF_UPDATING) && 572 /* XXX updater should be always able to acquire */ 573 curlwp != rt_update_global.lwp) { 574 if (!wait_ok || !rt_wait_ok()) 575 goto miss; 576 RT_UNLOCK(); 577 splx(s); 578 579 /* We can wait until the update is complete */ 580 rt_update_wait(); 581 582 if (wlock) 583 RT_WLOCK(); 584 else 585 RT_RLOCK(); 586 goto retry; 587 } 588 #endif /* NET_MPSAFE */ 589 590 rt_ref(rt); 591 RT_REFCNT_TRACE(rt); 592 593 splx(s); 594 return rt; 595 miss: 596 rtstat.rts_unreach++; 597 if (report) { 598 struct rt_addrinfo info; 599 600 memset(&info, 0, sizeof(info)); 601 info.rti_info[RTAX_DST] = dst; 602 rt_missmsg(RTM_MISS, &info, 0, 0); 603 } 604 splx(s); 605 return NULL; 606 } 607 608 struct rtentry * 609 rtalloc1(const struct sockaddr *dst, int report) 610 { 611 struct rtentry *rt; 612 613 RT_RLOCK(); 614 rt = rtalloc1_locked(dst, report, true, false); 615 RT_UNLOCK(); 616 617 return rt; 618 } 619 620 static void 621 rt_ref(struct rtentry *rt) 622 { 623 624 KASSERTMSG(rt->rt_refcnt >= 0, "rt_refcnt=%d", rt->rt_refcnt); 625 atomic_inc_uint(&rt->rt_refcnt); 626 } 627 628 void 629 rt_unref(struct rtentry *rt) 630 { 631 632 KASSERT(rt != NULL); 633 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 634 635 atomic_dec_uint(&rt->rt_refcnt); 636 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 637 mutex_enter(&rt_free_global.lock); 638 cv_broadcast(&rt->rt_cv); 639 mutex_exit(&rt_free_global.lock); 640 } 641 } 642 643 static bool 644 rt_wait_ok(void) 645 { 646 647 KASSERT(!cpu_intr_p()); 648 return !cpu_softintr_p(); 649 } 650 651 void 652 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 653 { 654 mutex_enter(&rt_free_global.lock); 655 while (rt->rt_refcnt > cnt) { 656 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 657 __func__, title, rt->rt_refcnt); 658 cv_wait(&rt->rt_cv, &rt_free_global.lock); 659 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 660 __func__, title, rt->rt_refcnt); 661 } 662 mutex_exit(&rt_free_global.lock); 663 } 664 665 void 666 rt_wait_psref(struct rtentry *rt) 667 { 668 669 psref_target_destroy(&rt->rt_psref, rt_psref_class); 670 psref_target_init(&rt->rt_psref, rt_psref_class); 671 } 672 673 static void 674 _rt_free(struct rtentry *rt) 675 { 676 struct ifaddr *ifa; 677 678 /* 679 * Need to avoid a deadlock on rt_wait_refcnt of update 680 * and a conflict on psref_target_destroy of update. 681 */ 682 #ifdef NET_MPSAFE 683 rt_update_wait(); 684 #endif 685 686 RT_REFCNT_TRACE(rt); 687 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 688 rt_wait_refcnt("free", rt, 0); 689 #ifdef NET_MPSAFE 690 psref_target_destroy(&rt->rt_psref, rt_psref_class); 691 #endif 692 693 rt_assert_inactive(rt); 694 rttrash--; 695 ifa = rt->rt_ifa; 696 rt->rt_ifa = NULL; 697 ifafree(ifa); 698 rt->rt_ifp = NULL; 699 cv_destroy(&rt->rt_cv); 700 rt_destroy(rt); 701 pool_put(&rtentry_pool, rt); 702 } 703 704 static void 705 rt_free_work(struct work *wk, void *arg) 706 { 707 708 for (;;) { 709 struct rtentry *rt; 710 711 mutex_enter(&rt_free_global.lock); 712 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) { 713 rt_free_global.enqueued = false; 714 mutex_exit(&rt_free_global.lock); 715 return; 716 } 717 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free); 718 mutex_exit(&rt_free_global.lock); 719 atomic_dec_uint(&rt->rt_refcnt); 720 _rt_free(rt); 721 } 722 } 723 724 void 725 rt_free(struct rtentry *rt) 726 { 727 728 KASSERTMSG(rt->rt_refcnt > 0, "rt_refcnt=%d", rt->rt_refcnt); 729 if (rt_wait_ok()) { 730 atomic_dec_uint(&rt->rt_refcnt); 731 _rt_free(rt); 732 return; 733 } 734 735 mutex_enter(&rt_free_global.lock); 736 /* No need to add a reference here. */ 737 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free); 738 if (!rt_free_global.enqueued) { 739 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 740 rt_free_global.enqueued = true; 741 } 742 mutex_exit(&rt_free_global.lock); 743 } 744 745 #ifdef NET_MPSAFE 746 static void 747 rt_update_wait(void) 748 { 749 750 mutex_enter(&rt_update_global.lock); 751 while (rt_update_global.ongoing) { 752 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 753 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 754 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 755 } 756 mutex_exit(&rt_update_global.lock); 757 } 758 #endif 759 760 int 761 rt_update_prepare(struct rtentry *rt) 762 { 763 764 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 765 766 RT_WLOCK(); 767 /* If the entry is being destroyed, don't proceed the update. */ 768 if (!ISSET(rt->rt_flags, RTF_UP)) { 769 RT_UNLOCK(); 770 return ESRCH; 771 } 772 rt->rt_flags |= RTF_UPDATING; 773 RT_UNLOCK(); 774 775 mutex_enter(&rt_update_global.lock); 776 while (rt_update_global.ongoing) { 777 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 778 __func__, rt, curlwp); 779 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 780 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 781 __func__, rt, curlwp); 782 } 783 rt_update_global.ongoing = true; 784 /* XXX need it to avoid rt_update_wait by updater itself. */ 785 rt_update_global.lwp = curlwp; 786 mutex_exit(&rt_update_global.lock); 787 788 rt_wait_refcnt("update", rt, 1); 789 rt_wait_psref(rt); 790 791 return 0; 792 } 793 794 void 795 rt_update_finish(struct rtentry *rt) 796 { 797 798 RT_WLOCK(); 799 rt->rt_flags &= ~RTF_UPDATING; 800 RT_UNLOCK(); 801 802 mutex_enter(&rt_update_global.lock); 803 rt_update_global.ongoing = false; 804 rt_update_global.lwp = NULL; 805 cv_broadcast(&rt_update_global.cv); 806 mutex_exit(&rt_update_global.lock); 807 808 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 809 } 810 811 /* 812 * Force a routing table entry to the specified 813 * destination to go through the given gateway. 814 * Normally called as a result of a routing redirect 815 * message from the network layer. 816 * 817 * N.B.: must be called at splsoftnet 818 */ 819 void 820 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 821 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 822 struct rtentry **rtp) 823 { 824 struct rtentry *rt; 825 int error = 0; 826 uint64_t *stat = NULL; 827 struct rt_addrinfo info; 828 struct ifaddr *ifa; 829 struct psref psref; 830 831 /* verify the gateway is directly reachable */ 832 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 833 error = ENETUNREACH; 834 goto out; 835 } 836 rt = rtalloc1(dst, 0); 837 /* 838 * If the redirect isn't from our current router for this dst, 839 * it's either old or wrong. If it redirects us to ourselves, 840 * we have a routing loop, perhaps as a result of an interface 841 * going down recently. 842 */ 843 if (!(flags & RTF_DONE) && rt && 844 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 845 error = EINVAL; 846 else { 847 int s = pserialize_read_enter(); 848 struct ifaddr *_ifa; 849 850 _ifa = ifa_ifwithaddr(gateway); 851 if (_ifa != NULL) 852 error = EHOSTUNREACH; 853 pserialize_read_exit(s); 854 } 855 if (error) 856 goto done; 857 /* 858 * Create a new entry if we just got back a wildcard entry 859 * or the lookup failed. This is necessary for hosts 860 * which use routing redirects generated by smart gateways 861 * to dynamically build the routing tables. 862 */ 863 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 864 goto create; 865 /* 866 * Don't listen to the redirect if it's 867 * for a route to an interface. 868 */ 869 if (rt->rt_flags & RTF_GATEWAY) { 870 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 871 /* 872 * Changing from route to net => route to host. 873 * Create new route, rather than smashing route to net. 874 */ 875 create: 876 if (rt != NULL) 877 rt_unref(rt); 878 flags |= RTF_GATEWAY | RTF_DYNAMIC; 879 memset(&info, 0, sizeof(info)); 880 info.rti_info[RTAX_DST] = dst; 881 info.rti_info[RTAX_GATEWAY] = gateway; 882 info.rti_info[RTAX_NETMASK] = netmask; 883 info.rti_ifa = ifa; 884 info.rti_flags = flags; 885 rt = NULL; 886 error = rtrequest1(RTM_ADD, &info, &rt); 887 if (rt != NULL) 888 flags = rt->rt_flags; 889 stat = &rtstat.rts_dynamic; 890 } else { 891 /* 892 * Smash the current notion of the gateway to 893 * this destination. Should check about netmask!!! 894 */ 895 #ifdef NET_MPSAFE 896 KASSERT(!cpu_softintr_p()); 897 898 error = rt_update_prepare(rt); 899 if (error == 0) { 900 #endif 901 RT_WLOCK(); 902 error = rt_setgate(rt, gateway); 903 if (error == 0) { 904 rt->rt_flags |= RTF_MODIFIED; 905 flags |= RTF_MODIFIED; 906 } 907 RT_UNLOCK(); 908 #ifdef NET_MPSAFE 909 rt_update_finish(rt); 910 } else { 911 /* 912 * If error != 0, the rtentry is being 913 * destroyed, so doing nothing doesn't 914 * matter. 915 */ 916 } 917 #endif 918 stat = &rtstat.rts_newgateway; 919 } 920 } else 921 error = EHOSTUNREACH; 922 done: 923 if (rt) { 924 if (rtp != NULL && !error) 925 *rtp = rt; 926 else 927 rt_unref(rt); 928 } 929 out: 930 if (error) 931 rtstat.rts_badredirect++; 932 else if (stat != NULL) 933 (*stat)++; 934 memset(&info, 0, sizeof(info)); 935 info.rti_info[RTAX_DST] = dst; 936 info.rti_info[RTAX_GATEWAY] = gateway; 937 info.rti_info[RTAX_NETMASK] = netmask; 938 info.rti_info[RTAX_AUTHOR] = src; 939 rt_missmsg(RTM_REDIRECT, &info, flags, error); 940 ifa_release(ifa, &psref); 941 } 942 943 /* 944 * Delete a route and generate a message. 945 * It doesn't free a passed rt. 946 */ 947 static int 948 rtdeletemsg(struct rtentry *rt) 949 { 950 int error; 951 struct rt_addrinfo info; 952 struct rtentry *retrt; 953 954 /* 955 * Request the new route so that the entry is not actually 956 * deleted. That will allow the information being reported to 957 * be accurate (and consistent with route_output()). 958 */ 959 memset(&info, 0, sizeof(info)); 960 info.rti_info[RTAX_DST] = rt_getkey(rt); 961 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 962 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 963 info.rti_flags = rt->rt_flags; 964 error = rtrequest1(RTM_DELETE, &info, &retrt); 965 966 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 967 968 return error; 969 } 970 971 static struct ifaddr * 972 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 973 const struct sockaddr *gateway, struct psref *psref) 974 { 975 struct ifaddr *ifa = NULL; 976 977 if ((flags & RTF_GATEWAY) == 0) { 978 /* 979 * If we are adding a route to an interface, 980 * and the interface is a pt to pt link 981 * we should search for the destination 982 * as our clue to the interface. Otherwise 983 * we can use the local address. 984 */ 985 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 986 ifa = ifa_ifwithdstaddr_psref(dst, psref); 987 if (ifa == NULL) 988 ifa = ifa_ifwithaddr_psref(gateway, psref); 989 } else { 990 /* 991 * If we are adding a route to a remote net 992 * or host, the gateway may still be on the 993 * other end of a pt to pt link. 994 */ 995 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 996 } 997 if (ifa == NULL) 998 ifa = ifa_ifwithnet_psref(gateway, psref); 999 if (ifa == NULL) { 1000 int s; 1001 struct rtentry *rt; 1002 1003 rt = rtalloc1_locked(gateway, 0, true, true); 1004 if (rt == NULL) 1005 return NULL; 1006 if (rt->rt_flags & RTF_GATEWAY) { 1007 rt_unref(rt); 1008 return NULL; 1009 } 1010 /* 1011 * Just in case. May not need to do this workaround. 1012 * Revisit when working on rtentry MP-ification. 1013 */ 1014 s = pserialize_read_enter(); 1015 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1016 if (ifa == rt->rt_ifa) 1017 break; 1018 } 1019 if (ifa != NULL) 1020 ifa_acquire(ifa, psref); 1021 pserialize_read_exit(s); 1022 rt_unref(rt); 1023 if (ifa == NULL) 1024 return NULL; 1025 } 1026 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1027 struct ifaddr *nifa; 1028 int s; 1029 1030 s = pserialize_read_enter(); 1031 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1032 if (nifa != NULL) { 1033 ifa_release(ifa, psref); 1034 ifa_acquire(nifa, psref); 1035 ifa = nifa; 1036 } 1037 pserialize_read_exit(s); 1038 } 1039 return ifa; 1040 } 1041 1042 /* 1043 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1044 * The caller has to rtfree it by itself. 1045 */ 1046 int 1047 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1048 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1049 { 1050 struct rt_addrinfo info; 1051 1052 memset(&info, 0, sizeof(info)); 1053 info.rti_flags = flags; 1054 info.rti_info[RTAX_DST] = dst; 1055 info.rti_info[RTAX_GATEWAY] = gateway; 1056 info.rti_info[RTAX_NETMASK] = netmask; 1057 return rtrequest1(req, &info, ret_nrt); 1058 } 1059 1060 /* 1061 * It's a utility function to add/remove a route to/from the routing table 1062 * and tell user processes the addition/removal on success. 1063 */ 1064 int 1065 rtrequest_newmsg(const int req, const struct sockaddr *dst, 1066 const struct sockaddr *gateway, const struct sockaddr *netmask, 1067 const int flags) 1068 { 1069 int error; 1070 struct rtentry *ret_nrt = NULL; 1071 1072 KASSERT(req == RTM_ADD || req == RTM_DELETE); 1073 1074 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt); 1075 if (error != 0) 1076 return error; 1077 1078 KASSERT(ret_nrt != NULL); 1079 1080 rt_newmsg(req, ret_nrt); /* tell user process */ 1081 if (req == RTM_DELETE) 1082 rt_free(ret_nrt); 1083 else 1084 rt_unref(ret_nrt); 1085 1086 return 0; 1087 } 1088 1089 static struct ifnet * 1090 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1091 { 1092 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1093 1094 if (info->rti_ifp != NULL) 1095 return NULL; 1096 /* 1097 * ifp may be specified by sockaddr_dl when protocol address 1098 * is ambiguous 1099 */ 1100 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1101 struct ifaddr *ifa; 1102 int s = pserialize_read_enter(); 1103 1104 ifa = ifa_ifwithnet(ifpaddr); 1105 if (ifa != NULL) 1106 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1107 psref); 1108 pserialize_read_exit(s); 1109 } 1110 1111 return info->rti_ifp; 1112 } 1113 1114 static struct ifaddr * 1115 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1116 { 1117 struct ifaddr *ifa = NULL; 1118 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1119 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1120 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1121 int flags = info->rti_flags; 1122 const struct sockaddr *sa; 1123 1124 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1125 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1126 if (ifa != NULL) 1127 goto got; 1128 } 1129 1130 sa = ifaaddr != NULL ? ifaaddr : 1131 (gateway != NULL ? gateway : dst); 1132 if (sa != NULL && info->rti_ifp != NULL) 1133 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1134 else if (dst != NULL && gateway != NULL) 1135 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1136 else if (sa != NULL) 1137 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1138 if (ifa == NULL) 1139 return NULL; 1140 got: 1141 if (ifa->ifa_getifa != NULL) { 1142 /* FIXME ifa_getifa is NOMPSAFE */ 1143 ifa = (*ifa->ifa_getifa)(ifa, dst); 1144 if (ifa == NULL) 1145 return NULL; 1146 ifa_acquire(ifa, psref); 1147 } 1148 info->rti_ifa = ifa; 1149 if (info->rti_ifp == NULL) 1150 info->rti_ifp = ifa->ifa_ifp; 1151 return ifa; 1152 } 1153 1154 /* 1155 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1156 * The caller has to rtfree it by itself. 1157 */ 1158 int 1159 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1160 { 1161 int s = splsoftnet(), ss; 1162 int error = 0, rc; 1163 struct rtentry *rt; 1164 rtbl_t *rtbl; 1165 struct ifaddr *ifa = NULL; 1166 struct sockaddr_storage maskeddst; 1167 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1168 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1169 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1170 int flags = info->rti_flags; 1171 struct psref psref_ifp, psref_ifa; 1172 int bound = 0; 1173 struct ifnet *ifp = NULL; 1174 bool need_to_release_ifa = true; 1175 bool need_unlock = true; 1176 #define senderr(x) { error = x ; goto bad; } 1177 1178 RT_WLOCK(); 1179 1180 bound = curlwp_bind(); 1181 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1182 senderr(ESRCH); 1183 if (flags & RTF_HOST) 1184 netmask = NULL; 1185 switch (req) { 1186 case RTM_DELETE: 1187 if (netmask) { 1188 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1189 netmask); 1190 dst = (struct sockaddr *)&maskeddst; 1191 } 1192 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1193 senderr(ESRCH); 1194 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1195 senderr(ESRCH); 1196 rt->rt_flags &= ~RTF_UP; 1197 if ((ifa = rt->rt_ifa)) { 1198 if (ifa->ifa_flags & IFA_ROUTE && 1199 rt_ifa_connected(rt, ifa)) { 1200 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1201 "deleted IFA_ROUTE\n", 1202 (void *)rt->_rt_key, (void *)ifa); 1203 ifa->ifa_flags &= ~IFA_ROUTE; 1204 } 1205 if (ifa->ifa_rtrequest) 1206 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1207 ifa = NULL; 1208 } 1209 rttrash++; 1210 if (ret_nrt) { 1211 *ret_nrt = rt; 1212 rt_ref(rt); 1213 RT_REFCNT_TRACE(rt); 1214 } 1215 rtcache_invalidate(); 1216 RT_UNLOCK(); 1217 need_unlock = false; 1218 rt_timer_remove_all(rt); 1219 #if defined(INET) || defined(INET6) 1220 if (netmask != NULL) 1221 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1222 #endif 1223 if (ret_nrt == NULL) { 1224 /* Adjust the refcount */ 1225 rt_ref(rt); 1226 RT_REFCNT_TRACE(rt); 1227 rt_free(rt); 1228 } 1229 break; 1230 1231 case RTM_ADD: 1232 if (info->rti_ifa == NULL) { 1233 ifp = rt_getifp(info, &psref_ifp); 1234 ifa = rt_getifa(info, &psref_ifa); 1235 if (ifa == NULL) 1236 senderr(ENETUNREACH); 1237 } else { 1238 /* Caller should have a reference of ifa */ 1239 ifa = info->rti_ifa; 1240 need_to_release_ifa = false; 1241 } 1242 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1243 if (rt == NULL) 1244 senderr(ENOBUFS); 1245 memset(rt, 0, sizeof(*rt)); 1246 rt->rt_flags = RTF_UP | (flags & ~RTF_DONTCHANGEIFA); 1247 LIST_INIT(&rt->rt_timer); 1248 1249 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1250 if (netmask) { 1251 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1252 netmask); 1253 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1254 } else { 1255 rt_setkey(rt, dst, M_NOWAIT); 1256 } 1257 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1258 if (rt_getkey(rt) == NULL || 1259 rt_setgate(rt, gateway) != 0) { 1260 pool_put(&rtentry_pool, rt); 1261 senderr(ENOBUFS); 1262 } 1263 1264 rt_set_ifa(rt, ifa); 1265 if (info->rti_info[RTAX_TAG] != NULL) { 1266 const struct sockaddr *tag; 1267 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1268 if (tag == NULL) 1269 senderr(ENOBUFS); 1270 } 1271 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1272 1273 ss = pserialize_read_enter(); 1274 if (info->rti_info[RTAX_IFP] != NULL) { 1275 struct ifaddr *ifa2; 1276 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1277 if (ifa2 != NULL) 1278 rt->rt_ifp = ifa2->ifa_ifp; 1279 else 1280 rt->rt_ifp = ifa->ifa_ifp; 1281 } else 1282 rt->rt_ifp = ifa->ifa_ifp; 1283 pserialize_read_exit(ss); 1284 cv_init(&rt->rt_cv, "rtentry"); 1285 psref_target_init(&rt->rt_psref, rt_psref_class); 1286 1287 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1288 rc = rt_addaddr(rtbl, rt, netmask); 1289 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1290 if (rc != 0) { 1291 ifafree(ifa); /* for rt_set_ifa above */ 1292 cv_destroy(&rt->rt_cv); 1293 rt_destroy(rt); 1294 pool_put(&rtentry_pool, rt); 1295 senderr(rc); 1296 } 1297 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1298 if (ifa->ifa_rtrequest) 1299 ifa->ifa_rtrequest(req, rt, info); 1300 if (need_to_release_ifa) 1301 ifa_release(ifa, &psref_ifa); 1302 ifa = NULL; 1303 if_put(ifp, &psref_ifp); 1304 ifp = NULL; 1305 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1306 if (ret_nrt) { 1307 *ret_nrt = rt; 1308 rt_ref(rt); 1309 RT_REFCNT_TRACE(rt); 1310 } 1311 rtcache_invalidate(); 1312 RT_UNLOCK(); 1313 need_unlock = false; 1314 break; 1315 case RTM_GET: 1316 if (netmask != NULL) { 1317 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1318 netmask); 1319 dst = (struct sockaddr *)&maskeddst; 1320 } 1321 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1322 senderr(ESRCH); 1323 if (ret_nrt != NULL) { 1324 *ret_nrt = rt; 1325 rt_ref(rt); 1326 RT_REFCNT_TRACE(rt); 1327 } 1328 break; 1329 } 1330 bad: 1331 if (need_to_release_ifa) 1332 ifa_release(ifa, &psref_ifa); 1333 if_put(ifp, &psref_ifp); 1334 curlwp_bindx(bound); 1335 if (need_unlock) 1336 RT_UNLOCK(); 1337 splx(s); 1338 return error; 1339 } 1340 1341 int 1342 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1343 { 1344 struct sockaddr *new, *old; 1345 1346 KASSERT(RT_WLOCKED()); 1347 KASSERT(rt->_rt_key != NULL); 1348 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1349 1350 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1351 if (new == NULL) 1352 return ENOMEM; 1353 1354 old = rt->rt_gateway; 1355 rt->rt_gateway = new; 1356 if (old != NULL) 1357 sockaddr_free(old); 1358 1359 KASSERT(rt->_rt_key != NULL); 1360 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1361 1362 if (rt->rt_flags & RTF_GATEWAY) { 1363 struct rtentry *gwrt; 1364 1365 gwrt = rtalloc1_locked(gate, 1, false, true); 1366 /* 1367 * If we switched gateways, grab the MTU from the new 1368 * gateway route if the current MTU, if the current MTU is 1369 * greater than the MTU of gateway. 1370 * Note that, if the MTU of gateway is 0, we will reset the 1371 * MTU of the route to run PMTUD again from scratch. XXX 1372 */ 1373 if (gwrt != NULL) { 1374 KASSERT(gwrt->_rt_key != NULL); 1375 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1376 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1377 rt->rt_rmx.rmx_mtu && 1378 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1379 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1380 } 1381 rt_unref(gwrt); 1382 } 1383 } 1384 KASSERT(rt->_rt_key != NULL); 1385 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1386 return 0; 1387 } 1388 1389 static struct ifaddr * 1390 rt_update_get_ifa(const struct rt_addrinfo *info, const struct rtentry *rt, 1391 struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref) 1392 { 1393 struct ifaddr *ifa = NULL; 1394 1395 *ifp = NULL; 1396 if (info->rti_info[RTAX_IFP] != NULL) { 1397 ifa = ifa_ifwithnet_psref(info->rti_info[RTAX_IFP], psref); 1398 if (ifa == NULL) 1399 goto next; 1400 *ifp = ifa->ifa_ifp; 1401 if_acquire(*ifp, psref_ifp); 1402 if (info->rti_info[RTAX_IFA] == NULL && 1403 info->rti_info[RTAX_GATEWAY] == NULL) 1404 goto out; 1405 ifa_release(ifa, psref); 1406 if (info->rti_info[RTAX_IFA] == NULL) { 1407 /* route change <dst> <gw> -ifp <if> */ 1408 ifa = ifaof_ifpforaddr_psref( 1409 info->rti_info[RTAX_GATEWAY], *ifp, psref); 1410 } else { 1411 /* route change <dst> -ifp <if> -ifa <addr> */ 1412 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], 1413 psref); 1414 if (ifa != NULL) 1415 goto out; 1416 ifa = ifaof_ifpforaddr_psref(info->rti_info[RTAX_IFA], 1417 *ifp, psref); 1418 } 1419 goto out; 1420 } 1421 next: 1422 if (info->rti_info[RTAX_IFA] != NULL) { 1423 /* route change <dst> <gw> -ifa <addr> */ 1424 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], psref); 1425 if (ifa != NULL) 1426 goto out; 1427 } 1428 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1429 /* route change <dst> <gw> */ 1430 ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt), 1431 info->rti_info[RTAX_GATEWAY], psref); 1432 } 1433 out: 1434 if (ifa != NULL && *ifp == NULL) { 1435 *ifp = ifa->ifa_ifp; 1436 if_acquire(*ifp, psref_ifp); 1437 } 1438 if (ifa == NULL && *ifp != NULL) { 1439 if_put(*ifp, psref_ifp); 1440 *ifp = NULL; 1441 } 1442 return ifa; 1443 } 1444 1445 int 1446 rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm) 1447 { 1448 int error = 0; 1449 struct ifnet *ifp = NULL, *new_ifp = NULL; 1450 struct ifaddr *ifa = NULL, *new_ifa; 1451 struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp; 1452 bool newgw, ifp_changed = false; 1453 1454 RT_WLOCK(); 1455 /* 1456 * New gateway could require new ifaddr, ifp; 1457 * flags may also be different; ifp may be specified 1458 * by ll sockaddr when protocol address is ambiguous 1459 */ 1460 newgw = info->rti_info[RTAX_GATEWAY] != NULL && 1461 sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0; 1462 1463 if (newgw || info->rti_info[RTAX_IFP] != NULL || 1464 info->rti_info[RTAX_IFA] != NULL) { 1465 ifp = rt_getifp(info, &psref_ifp); 1466 /* info refers ifp so we need to keep a reference */ 1467 ifa = rt_getifa(info, &psref_ifa); 1468 if (ifa == NULL) { 1469 error = ENETUNREACH; 1470 goto out; 1471 } 1472 } 1473 if (newgw) { 1474 error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]); 1475 if (error != 0) 1476 goto out; 1477 } 1478 if (info->rti_info[RTAX_TAG]) { 1479 const struct sockaddr *tag; 1480 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1481 if (tag == NULL) { 1482 error = ENOBUFS; 1483 goto out; 1484 } 1485 } 1486 /* 1487 * New gateway could require new ifaddr, ifp; 1488 * flags may also be different; ifp may be specified 1489 * by ll sockaddr when protocol address is ambiguous 1490 */ 1491 new_ifa = rt_update_get_ifa(info, rt, &new_ifp, &psref_new_ifp, 1492 &psref_new_ifa); 1493 if (new_ifa != NULL) { 1494 ifa_release(ifa, &psref_ifa); 1495 ifa = new_ifa; 1496 } 1497 if (ifa) { 1498 struct ifaddr *oifa = rt->rt_ifa; 1499 if (oifa != ifa && !ifa_is_destroying(ifa) && 1500 new_ifp != NULL && !if_is_deactivated(new_ifp)) { 1501 if (oifa && oifa->ifa_rtrequest) 1502 oifa->ifa_rtrequest(RTM_DELETE, rt, info); 1503 rt_replace_ifa(rt, ifa); 1504 rt->rt_ifp = new_ifp; 1505 ifp_changed = true; 1506 } 1507 if (new_ifa == NULL) 1508 ifa_release(ifa, &psref_ifa); 1509 /* To avoid ifa_release below */ 1510 ifa = NULL; 1511 } 1512 ifa_release(new_ifa, &psref_new_ifa); 1513 if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) { 1514 rt->rt_ifp = new_ifp; 1515 ifp_changed = true; 1516 } 1517 rt_setmetrics(rtm, rt); 1518 if (rt->rt_flags != info->rti_flags) { 1519 rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) | 1520 (rt->rt_flags & PRESERVED_RTF); 1521 } 1522 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) 1523 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); 1524 #if defined(INET) || defined(INET6) 1525 if (ifp_changed && rt_mask(rt) != NULL) 1526 lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt), 1527 rt_mask(rt), 0); 1528 #else 1529 (void)ifp_changed; /* XXX gcc */ 1530 #endif 1531 out: 1532 ifa_release(ifa, &psref_ifa); 1533 if_put(new_ifp, &psref_new_ifp); 1534 if_put(ifp, &psref_ifp); 1535 1536 RT_UNLOCK(); 1537 1538 return error; 1539 } 1540 1541 static void 1542 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1543 const struct sockaddr *netmask) 1544 { 1545 const char *netmaskp = &netmask->sa_data[0], 1546 *srcp = &src->sa_data[0]; 1547 char *dstp = &dst->sa_data[0]; 1548 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1549 const char *srcend = (char *)dst + src->sa_len; 1550 1551 dst->sa_len = src->sa_len; 1552 dst->sa_family = src->sa_family; 1553 1554 while (dstp < maskend) 1555 *dstp++ = *srcp++ & *netmaskp++; 1556 if (dstp < srcend) 1557 memset(dstp, 0, (size_t)(srcend - dstp)); 1558 } 1559 1560 /* 1561 * Inform the routing socket of a route change. 1562 */ 1563 void 1564 rt_newmsg(const int cmd, const struct rtentry *rt) 1565 { 1566 struct rt_addrinfo info; 1567 1568 memset((void *)&info, 0, sizeof(info)); 1569 info.rti_info[RTAX_DST] = rt_getkey(rt); 1570 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1571 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1572 if (rt->rt_ifp) { 1573 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1574 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1575 } 1576 1577 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1578 } 1579 1580 /* 1581 * Set up or tear down a routing table entry, normally 1582 * for an interface. 1583 */ 1584 int 1585 rtinit(struct ifaddr *ifa, int cmd, int flags) 1586 { 1587 struct rtentry *rt; 1588 struct sockaddr *dst, *odst; 1589 struct sockaddr_storage maskeddst; 1590 struct rtentry *nrt = NULL; 1591 int error; 1592 struct rt_addrinfo info; 1593 1594 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1595 if (cmd == RTM_DELETE) { 1596 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1597 /* Delete subnet route for this interface */ 1598 odst = dst; 1599 dst = (struct sockaddr *)&maskeddst; 1600 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1601 } 1602 if ((rt = rtalloc1(dst, 0)) != NULL) { 1603 if (rt->rt_ifa != ifa) { 1604 rt_unref(rt); 1605 return (flags & RTF_HOST) ? EHOSTUNREACH 1606 : ENETUNREACH; 1607 } 1608 rt_unref(rt); 1609 } 1610 } 1611 memset(&info, 0, sizeof(info)); 1612 info.rti_ifa = ifa; 1613 info.rti_flags = flags | ifa->ifa_flags | RTF_DONTCHANGEIFA; 1614 info.rti_info[RTAX_DST] = dst; 1615 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1616 1617 /* 1618 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1619 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1620 * variable) when RTF_HOST is 1. still not sure if i can safely 1621 * change it to meet bsdi4 behavior. 1622 */ 1623 if (cmd != RTM_LLINFO_UPD) 1624 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1625 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1626 &nrt); 1627 if (error != 0) 1628 return error; 1629 1630 rt = nrt; 1631 RT_REFCNT_TRACE(rt); 1632 switch (cmd) { 1633 case RTM_DELETE: 1634 rt_newmsg(cmd, rt); 1635 rt_free(rt); 1636 break; 1637 case RTM_LLINFO_UPD: 1638 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1639 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1640 rt_newmsg(RTM_CHANGE, rt); 1641 rt_unref(rt); 1642 break; 1643 case RTM_ADD: 1644 KASSERT(rt->rt_ifa == ifa); 1645 rt_newmsg(cmd, rt); 1646 rt_unref(rt); 1647 RT_REFCNT_TRACE(rt); 1648 break; 1649 } 1650 return error; 1651 } 1652 1653 /* 1654 * Create a local route entry for the address. 1655 * Announce the addition of the address and the route to the routing socket. 1656 */ 1657 int 1658 rt_ifa_addlocal(struct ifaddr *ifa) 1659 { 1660 struct rtentry *rt; 1661 int e; 1662 1663 /* If there is no loopback entry, allocate one. */ 1664 rt = rtalloc1(ifa->ifa_addr, 0); 1665 #ifdef RT_DEBUG 1666 if (rt != NULL) 1667 dump_rt(rt); 1668 #endif 1669 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1670 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1671 { 1672 struct rt_addrinfo info; 1673 struct rtentry *nrt; 1674 1675 memset(&info, 0, sizeof(info)); 1676 info.rti_flags = RTF_HOST | RTF_LOCAL | RTF_DONTCHANGEIFA; 1677 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1678 info.rti_info[RTAX_GATEWAY] = 1679 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1680 info.rti_ifa = ifa; 1681 nrt = NULL; 1682 e = rtrequest1(RTM_ADD, &info, &nrt); 1683 rt_addrmsg_rt(RTM_ADD, ifa, e, nrt); 1684 if (nrt != NULL) { 1685 KASSERT(nrt->rt_ifa == ifa); 1686 #ifdef RT_DEBUG 1687 dump_rt(nrt); 1688 #endif 1689 rt_unref(nrt); 1690 RT_REFCNT_TRACE(nrt); 1691 } 1692 } else { 1693 e = 0; 1694 rt_addrmsg(RTM_NEWADDR, ifa); 1695 } 1696 if (rt != NULL) 1697 rt_unref(rt); 1698 return e; 1699 } 1700 1701 /* 1702 * Remove the local route entry for the address. 1703 * Announce the removal of the address and the route to the routing socket. 1704 */ 1705 int 1706 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1707 { 1708 struct rtentry *rt; 1709 int e = 0; 1710 1711 rt = rtalloc1(ifa->ifa_addr, 0); 1712 1713 /* 1714 * Before deleting, check if a corresponding loopbacked 1715 * host route surely exists. With this check, we can avoid 1716 * deleting an interface direct route whose destination is 1717 * the same as the address being removed. This can happen 1718 * when removing a subnet-router anycast address on an 1719 * interface attached to a shared medium. 1720 */ 1721 if (rt != NULL && 1722 (rt->rt_flags & RTF_HOST) && 1723 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1724 { 1725 /* If we cannot replace the route's ifaddr with the equivalent 1726 * ifaddr of another interface, I believe it is safest to 1727 * delete the route. 1728 */ 1729 if (alt_ifa == NULL) { 1730 e = rtdeletemsg(rt); 1731 if (e == 0) { 1732 rt_unref(rt); 1733 rt_free(rt); 1734 rt = NULL; 1735 } 1736 rt_addrmsg(RTM_DELADDR, ifa); 1737 } else { 1738 #ifdef NET_MPSAFE 1739 int error = rt_update_prepare(rt); 1740 if (error == 0) { 1741 rt_replace_ifa(rt, alt_ifa); 1742 rt_update_finish(rt); 1743 } else { 1744 /* 1745 * If error != 0, the rtentry is being 1746 * destroyed, so doing nothing doesn't 1747 * matter. 1748 */ 1749 } 1750 #else 1751 rt_replace_ifa(rt, alt_ifa); 1752 #endif 1753 rt_newmsg(RTM_CHANGE, rt); 1754 } 1755 } else 1756 rt_addrmsg(RTM_DELADDR, ifa); 1757 if (rt != NULL) 1758 rt_unref(rt); 1759 return e; 1760 } 1761 1762 /* 1763 * Route timer routines. These routes allow functions to be called 1764 * for various routes at any time. This is useful in supporting 1765 * path MTU discovery and redirect route deletion. 1766 * 1767 * This is similar to some BSDI internal functions, but it provides 1768 * for multiple queues for efficiency's sake... 1769 */ 1770 1771 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1772 static int rt_init_done = 0; 1773 1774 /* 1775 * Some subtle order problems with domain initialization mean that 1776 * we cannot count on this being run from rt_init before various 1777 * protocol initializations are done. Therefore, we make sure 1778 * that this is run when the first queue is added... 1779 */ 1780 1781 static void rt_timer_work(struct work *, void *); 1782 1783 static void 1784 rt_timer_init(void) 1785 { 1786 int error; 1787 1788 assert(rt_init_done == 0); 1789 1790 /* XXX should be in rt_init */ 1791 rw_init(&rt_lock); 1792 1793 LIST_INIT(&rttimer_queue_head); 1794 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1795 error = workqueue_create(&rt_timer_wq, "rt_timer", 1796 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1797 if (error) 1798 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1799 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1800 rt_init_done = 1; 1801 } 1802 1803 struct rttimer_queue * 1804 rt_timer_queue_create(u_int timeout) 1805 { 1806 struct rttimer_queue *rtq; 1807 1808 if (rt_init_done == 0) 1809 rt_timer_init(); 1810 1811 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1812 if (rtq == NULL) 1813 return NULL; 1814 memset(rtq, 0, sizeof(*rtq)); 1815 1816 rtq->rtq_timeout = timeout; 1817 TAILQ_INIT(&rtq->rtq_head); 1818 RT_WLOCK(); 1819 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1820 RT_UNLOCK(); 1821 1822 return rtq; 1823 } 1824 1825 void 1826 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1827 { 1828 1829 rtq->rtq_timeout = timeout; 1830 } 1831 1832 static void 1833 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1834 { 1835 struct rttimer *r; 1836 1837 RT_ASSERT_WLOCK(); 1838 1839 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1840 LIST_REMOVE(r, rtt_link); 1841 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1842 rt_ref(r->rtt_rt); /* XXX */ 1843 RT_REFCNT_TRACE(r->rtt_rt); 1844 RT_UNLOCK(); 1845 (*r->rtt_func)(r->rtt_rt, r); 1846 pool_put(&rttimer_pool, r); 1847 RT_WLOCK(); 1848 if (rtq->rtq_count > 0) 1849 rtq->rtq_count--; 1850 else 1851 printf("rt_timer_queue_remove_all: " 1852 "rtq_count reached 0\n"); 1853 } 1854 } 1855 1856 void 1857 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1858 { 1859 1860 RT_WLOCK(); 1861 rt_timer_queue_remove_all(rtq); 1862 LIST_REMOVE(rtq, rtq_link); 1863 RT_UNLOCK(); 1864 1865 /* 1866 * Caller is responsible for freeing the rttimer_queue structure. 1867 */ 1868 } 1869 1870 unsigned long 1871 rt_timer_count(struct rttimer_queue *rtq) 1872 { 1873 return rtq->rtq_count; 1874 } 1875 1876 static void 1877 rt_timer_remove_all(struct rtentry *rt) 1878 { 1879 struct rttimer *r; 1880 1881 RT_WLOCK(); 1882 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1883 LIST_REMOVE(r, rtt_link); 1884 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1885 if (r->rtt_queue->rtq_count > 0) 1886 r->rtt_queue->rtq_count--; 1887 else 1888 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1889 pool_put(&rttimer_pool, r); 1890 } 1891 RT_UNLOCK(); 1892 } 1893 1894 int 1895 rt_timer_add(struct rtentry *rt, 1896 void (*func)(struct rtentry *, struct rttimer *), 1897 struct rttimer_queue *queue) 1898 { 1899 struct rttimer *r; 1900 1901 KASSERT(func != NULL); 1902 RT_WLOCK(); 1903 /* 1904 * If there's already a timer with this action, destroy it before 1905 * we add a new one. 1906 */ 1907 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1908 if (r->rtt_func == func) 1909 break; 1910 } 1911 if (r != NULL) { 1912 LIST_REMOVE(r, rtt_link); 1913 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1914 if (r->rtt_queue->rtq_count > 0) 1915 r->rtt_queue->rtq_count--; 1916 else 1917 printf("rt_timer_add: rtq_count reached 0\n"); 1918 } else { 1919 r = pool_get(&rttimer_pool, PR_NOWAIT); 1920 if (r == NULL) { 1921 RT_UNLOCK(); 1922 return ENOBUFS; 1923 } 1924 } 1925 1926 memset(r, 0, sizeof(*r)); 1927 1928 r->rtt_rt = rt; 1929 r->rtt_time = time_uptime; 1930 r->rtt_func = func; 1931 r->rtt_queue = queue; 1932 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1933 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1934 r->rtt_queue->rtq_count++; 1935 1936 RT_UNLOCK(); 1937 1938 return 0; 1939 } 1940 1941 static void 1942 rt_timer_work(struct work *wk, void *arg) 1943 { 1944 struct rttimer_queue *rtq; 1945 struct rttimer *r; 1946 1947 RT_WLOCK(); 1948 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1949 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1950 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1951 LIST_REMOVE(r, rtt_link); 1952 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1953 /* 1954 * Take a reference to avoid the rtentry is freed 1955 * accidentally after RT_UNLOCK. The callback 1956 * (rtt_func) must rt_unref it by itself. 1957 */ 1958 rt_ref(r->rtt_rt); 1959 RT_REFCNT_TRACE(r->rtt_rt); 1960 RT_UNLOCK(); 1961 (*r->rtt_func)(r->rtt_rt, r); 1962 pool_put(&rttimer_pool, r); 1963 RT_WLOCK(); 1964 if (rtq->rtq_count > 0) 1965 rtq->rtq_count--; 1966 else 1967 printf("rt_timer_timer: rtq_count reached 0\n"); 1968 } 1969 } 1970 RT_UNLOCK(); 1971 1972 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1973 } 1974 1975 static void 1976 rt_timer_timer(void *arg) 1977 { 1978 1979 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 1980 } 1981 1982 static struct rtentry * 1983 _rtcache_init(struct route *ro, int flag) 1984 { 1985 struct rtentry *rt; 1986 1987 rtcache_invariants(ro); 1988 KASSERT(ro->_ro_rt == NULL); 1989 1990 if (rtcache_getdst(ro) == NULL) 1991 return NULL; 1992 rt = rtalloc1(rtcache_getdst(ro), flag); 1993 if (rt != NULL) { 1994 RT_RLOCK(); 1995 if (ISSET(rt->rt_flags, RTF_UP)) { 1996 ro->_ro_rt = rt; 1997 ro->ro_rtcache_generation = rtcache_generation; 1998 rtcache_ref(rt, ro); 1999 } 2000 RT_UNLOCK(); 2001 rt_unref(rt); 2002 } 2003 2004 rtcache_invariants(ro); 2005 return ro->_ro_rt; 2006 } 2007 2008 struct rtentry * 2009 rtcache_init(struct route *ro) 2010 { 2011 2012 return _rtcache_init(ro, 1); 2013 } 2014 2015 struct rtentry * 2016 rtcache_init_noclone(struct route *ro) 2017 { 2018 2019 return _rtcache_init(ro, 0); 2020 } 2021 2022 struct rtentry * 2023 rtcache_update(struct route *ro, int clone) 2024 { 2025 2026 ro->_ro_rt = NULL; 2027 return _rtcache_init(ro, clone); 2028 } 2029 2030 void 2031 rtcache_copy(struct route *new_ro, struct route *old_ro) 2032 { 2033 struct rtentry *rt; 2034 int ret; 2035 2036 KASSERT(new_ro != old_ro); 2037 rtcache_invariants(new_ro); 2038 rtcache_invariants(old_ro); 2039 2040 rt = rtcache_validate(old_ro); 2041 2042 if (rtcache_getdst(old_ro) == NULL) 2043 goto out; 2044 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 2045 if (ret != 0) 2046 goto out; 2047 2048 RT_RLOCK(); 2049 new_ro->_ro_rt = rt; 2050 new_ro->ro_rtcache_generation = rtcache_generation; 2051 RT_UNLOCK(); 2052 rtcache_invariants(new_ro); 2053 out: 2054 rtcache_unref(rt, old_ro); 2055 return; 2056 } 2057 2058 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 2059 static void 2060 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 2061 { 2062 char dst[64]; 2063 2064 sockaddr_format(ro->ro_sa, dst, 64); 2065 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 2066 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 2067 } 2068 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 2069 #else 2070 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 2071 #endif 2072 2073 static void 2074 rtcache_ref(struct rtentry *rt, struct route *ro) 2075 { 2076 2077 KASSERT(rt != NULL); 2078 2079 #ifdef NET_MPSAFE 2080 RTCACHE_PSREF_TRACE(rt, ro); 2081 ro->ro_bound = curlwp_bind(); 2082 /* XXX Use a real caller's address */ 2083 PSREF_DEBUG_FILL_RETURN_ADDRESS(&ro->ro_psref); 2084 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2085 #endif 2086 } 2087 2088 void 2089 rtcache_unref(struct rtentry *rt, struct route *ro) 2090 { 2091 2092 if (rt == NULL) 2093 return; 2094 2095 #ifdef NET_MPSAFE 2096 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2097 curlwp_bindx(ro->ro_bound); 2098 RTCACHE_PSREF_TRACE(rt, ro); 2099 #endif 2100 } 2101 2102 struct rtentry * 2103 rtcache_validate(struct route *ro) 2104 { 2105 struct rtentry *rt = NULL; 2106 2107 #ifdef NET_MPSAFE 2108 retry: 2109 #endif 2110 rtcache_invariants(ro); 2111 RT_RLOCK(); 2112 if (ro->ro_rtcache_generation != rtcache_generation) { 2113 /* The cache is invalidated */ 2114 rt = NULL; 2115 goto out; 2116 } 2117 2118 rt = ro->_ro_rt; 2119 if (rt == NULL) 2120 goto out; 2121 2122 if ((rt->rt_flags & RTF_UP) == 0) { 2123 rt = NULL; 2124 goto out; 2125 } 2126 #ifdef NET_MPSAFE 2127 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 2128 if (rt_wait_ok()) { 2129 RT_UNLOCK(); 2130 2131 /* We can wait until the update is complete */ 2132 rt_update_wait(); 2133 goto retry; 2134 } else { 2135 rt = NULL; 2136 } 2137 } else 2138 #endif 2139 rtcache_ref(rt, ro); 2140 out: 2141 RT_UNLOCK(); 2142 return rt; 2143 } 2144 2145 struct rtentry * 2146 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2147 int clone, int *hitp) 2148 { 2149 const struct sockaddr *odst; 2150 struct rtentry *rt = NULL; 2151 2152 odst = rtcache_getdst(ro); 2153 if (odst == NULL) 2154 goto miss; 2155 2156 if (sockaddr_cmp(odst, dst) != 0) { 2157 rtcache_free(ro); 2158 goto miss; 2159 } 2160 2161 rt = rtcache_validate(ro); 2162 if (rt == NULL) { 2163 ro->_ro_rt = NULL; 2164 goto miss; 2165 } 2166 2167 rtcache_invariants(ro); 2168 2169 if (hitp != NULL) 2170 *hitp = 1; 2171 return rt; 2172 miss: 2173 if (hitp != NULL) 2174 *hitp = 0; 2175 if (rtcache_setdst(ro, dst) == 0) 2176 rt = _rtcache_init(ro, clone); 2177 2178 rtcache_invariants(ro); 2179 2180 return rt; 2181 } 2182 2183 void 2184 rtcache_free(struct route *ro) 2185 { 2186 2187 ro->_ro_rt = NULL; 2188 if (ro->ro_sa != NULL) { 2189 sockaddr_free(ro->ro_sa); 2190 ro->ro_sa = NULL; 2191 } 2192 rtcache_invariants(ro); 2193 } 2194 2195 int 2196 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2197 { 2198 KASSERT(sa != NULL); 2199 2200 rtcache_invariants(ro); 2201 if (ro->ro_sa != NULL) { 2202 if (ro->ro_sa->sa_family == sa->sa_family) { 2203 ro->_ro_rt = NULL; 2204 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2205 rtcache_invariants(ro); 2206 return 0; 2207 } 2208 /* free ro_sa, wrong family */ 2209 rtcache_free(ro); 2210 } 2211 2212 KASSERT(ro->_ro_rt == NULL); 2213 2214 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2215 rtcache_invariants(ro); 2216 return ENOMEM; 2217 } 2218 rtcache_invariants(ro); 2219 return 0; 2220 } 2221 2222 static void 2223 rtcache_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused) 2224 { 2225 struct route **rop = p; 2226 2227 /* 2228 * We can't have struct route as percpu data because it can be destroyed 2229 * over a memory enlargement processing of percpu. 2230 */ 2231 *rop = kmem_zalloc(sizeof(**rop), KM_SLEEP); 2232 } 2233 2234 percpu_t * 2235 rtcache_percpu_alloc(void) 2236 { 2237 percpu_t *pc; 2238 2239 pc = percpu_alloc(sizeof(struct route *)); 2240 percpu_foreach(pc, rtcache_percpu_init_cpu, NULL); 2241 2242 return pc; 2243 } 2244 2245 const struct sockaddr * 2246 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2247 { 2248 if (rt->rt_tag != tag) { 2249 if (rt->rt_tag != NULL) 2250 sockaddr_free(rt->rt_tag); 2251 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2252 } 2253 return rt->rt_tag; 2254 } 2255 2256 struct sockaddr * 2257 rt_gettag(const struct rtentry *rt) 2258 { 2259 return rt->rt_tag; 2260 } 2261 2262 int 2263 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2264 { 2265 2266 if ((rt->rt_flags & RTF_REJECT) != 0) { 2267 /* Mimic looutput */ 2268 if (ifp->if_flags & IFF_LOOPBACK) 2269 return (rt->rt_flags & RTF_HOST) ? 2270 EHOSTUNREACH : ENETUNREACH; 2271 else if (rt->rt_rmx.rmx_expire == 0 || 2272 time_uptime < rt->rt_rmx.rmx_expire) 2273 return (rt->rt_flags & RTF_GATEWAY) ? 2274 EHOSTUNREACH : EHOSTDOWN; 2275 } 2276 2277 return 0; 2278 } 2279 2280 void 2281 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2282 void *v) 2283 { 2284 2285 for (;;) { 2286 int s; 2287 int error; 2288 struct rtentry *rt, *retrt = NULL; 2289 2290 RT_RLOCK(); 2291 s = splsoftnet(); 2292 rt = rtbl_search_matched_entry(family, f, v); 2293 if (rt == NULL) { 2294 splx(s); 2295 RT_UNLOCK(); 2296 return; 2297 } 2298 rt_ref(rt); 2299 splx(s); 2300 RT_UNLOCK(); 2301 2302 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2303 rt_mask(rt), rt->rt_flags, &retrt); 2304 if (error == 0) { 2305 KASSERT(retrt == rt); 2306 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2307 retrt->rt_ifp = NULL; 2308 rt_unref(rt); 2309 rt_free(retrt); 2310 } else if (error == ESRCH) { 2311 /* Someone deleted the entry already. */ 2312 rt_unref(rt); 2313 } else { 2314 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2315 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2316 /* XXX how to treat this case? */ 2317 } 2318 } 2319 } 2320 2321 static int 2322 rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *), 2323 void *v) 2324 { 2325 2326 return rtbl_walktree(family, f, v); 2327 } 2328 2329 int 2330 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2331 { 2332 int error; 2333 2334 RT_RLOCK(); 2335 error = rt_walktree_locked(family, f, v); 2336 RT_UNLOCK(); 2337 2338 return error; 2339 } 2340 2341 #ifdef DDB 2342 2343 #include <machine/db_machdep.h> 2344 #include <ddb/db_interface.h> 2345 #include <ddb/db_output.h> 2346 2347 #define rt_expire rt_rmx.rmx_expire 2348 2349 static void 2350 db_print_sa(const struct sockaddr *sa) 2351 { 2352 int len; 2353 const u_char *p; 2354 2355 if (sa == NULL) { 2356 db_printf("[NULL]"); 2357 return; 2358 } 2359 2360 p = (const u_char *)sa; 2361 len = sa->sa_len; 2362 db_printf("["); 2363 while (len > 0) { 2364 db_printf("%d", *p); 2365 p++; len--; 2366 if (len) db_printf(","); 2367 } 2368 db_printf("]\n"); 2369 } 2370 2371 static void 2372 db_print_ifa(struct ifaddr *ifa) 2373 { 2374 if (ifa == NULL) 2375 return; 2376 db_printf(" ifa_addr="); 2377 db_print_sa(ifa->ifa_addr); 2378 db_printf(" ifa_dsta="); 2379 db_print_sa(ifa->ifa_dstaddr); 2380 db_printf(" ifa_mask="); 2381 db_print_sa(ifa->ifa_netmask); 2382 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2383 ifa->ifa_flags, 2384 ifa->ifa_refcnt, 2385 ifa->ifa_metric); 2386 } 2387 2388 /* 2389 * Function to pass to rt_walktree(). 2390 * Return non-zero error to abort walk. 2391 */ 2392 static int 2393 db_show_rtentry(struct rtentry *rt, void *w) 2394 { 2395 db_printf("rtentry=%p", rt); 2396 2397 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2398 rt->rt_flags, rt->rt_refcnt, 2399 rt->rt_use, (uint64_t)rt->rt_expire); 2400 2401 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2402 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2403 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2404 2405 db_printf(" ifp=%p ", rt->rt_ifp); 2406 if (rt->rt_ifp) 2407 db_printf("(%s)", rt->rt_ifp->if_xname); 2408 else 2409 db_printf("(NULL)"); 2410 2411 db_printf(" ifa=%p\n", rt->rt_ifa); 2412 db_print_ifa(rt->rt_ifa); 2413 2414 db_printf(" gwroute=%p llinfo=%p\n", 2415 rt->rt_gwroute, rt->rt_llinfo); 2416 2417 return 0; 2418 } 2419 2420 /* 2421 * Function to print all the route trees. 2422 * Use this from ddb: "show routes" 2423 */ 2424 void 2425 db_show_routes(db_expr_t addr, bool have_addr, 2426 db_expr_t count, const char *modif) 2427 { 2428 2429 /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */ 2430 rt_walktree_locked(AF_INET, db_show_rtentry, NULL); 2431 } 2432 #endif 2433