1 /* $NetBSD: route.c,v 1.203 2018/01/09 19:52:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.203 2018/01/09 19:52:29 christos Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/kernel.h> 114 #include <sys/ioctl.h> 115 #include <sys/pool.h> 116 #include <sys/kauth.h> 117 #include <sys/workqueue.h> 118 #include <sys/syslog.h> 119 #include <sys/rwlock.h> 120 #include <sys/mutex.h> 121 #include <sys/cpu.h> 122 123 #include <net/if.h> 124 #include <net/if_dl.h> 125 #include <net/route.h> 126 #if defined(INET) || defined(INET6) 127 #include <net/if_llatbl.h> 128 #endif 129 130 #include <netinet/in.h> 131 #include <netinet/in_var.h> 132 133 #ifdef RTFLUSH_DEBUG 134 #define rtcache_debug() __predict_false(_rtcache_debug) 135 #else /* RTFLUSH_DEBUG */ 136 #define rtcache_debug() 0 137 #endif /* RTFLUSH_DEBUG */ 138 139 #ifdef RT_DEBUG 140 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 141 __func__, __LINE__, (rt), (rt)->rt_refcnt) 142 #else 143 #define RT_REFCNT_TRACE(rt) do {} while (0) 144 #endif 145 146 #ifdef DEBUG 147 #define dlog(level, fmt, args...) log(level, fmt, ##args) 148 #else 149 #define dlog(level, fmt, args...) do {} while (0) 150 #endif 151 152 struct rtstat rtstat; 153 154 static int rttrash; /* routes not in table but not freed */ 155 156 static struct pool rtentry_pool; 157 static struct pool rttimer_pool; 158 159 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 160 static struct workqueue *rt_timer_wq; 161 static struct work rt_timer_wk; 162 163 static void rt_timer_init(void); 164 static void rt_timer_queue_remove_all(struct rttimer_queue *); 165 static void rt_timer_remove_all(struct rtentry *); 166 static void rt_timer_timer(void *); 167 168 /* 169 * Locking notes: 170 * - The routing table is protected by a global rwlock 171 * - API: RT_RLOCK and friends 172 * - rtcaches are NOT protected by the framework 173 * - Callers must guarantee a rtcache isn't accessed simultaneously 174 * - How the constraint is guranteed in the wild 175 * - Protect a rtcache by a mutex (e.g., inp_route) 176 * - Make rtcache per-CPU and allow only accesses from softint 177 * (e.g., ipforward_rt_percpu) 178 * - References to a rtentry is managed by reference counting and psref 179 * - Reference couting is used for temporal reference when a rtentry 180 * is fetched from the routing table 181 * - psref is used for temporal reference when a rtentry is fetched 182 * from a rtcache 183 * - struct route (rtcache) has struct psref, so we cannot obtain 184 * a reference twice on the same struct route 185 * - Befere destroying or updating a rtentry, we have to wait for 186 * all references left (see below for details) 187 * - APIs 188 * - An obtained rtentry via rtalloc1 or rtrequest* must be 189 * unreferenced by rt_unref 190 * - An obtained rtentry via rtcache_* must be unreferenced by 191 * rtcache_unref 192 * - TODO: once we get a lockless routing table, we should use only 193 * psref for rtentries 194 * - rtentry destruction 195 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 196 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 197 * has a responsibility to destroy the rtentry by itself by calling 198 * rt_free 199 * - If not, rtrequest itself does that 200 * - If rt_free is called in softint, the actual destruction routine is 201 * deferred to a workqueue 202 * - rtentry update 203 * - When updating a rtentry, RTF_UPDATING flag is set 204 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 205 * the routing table or a rtcache results in either of the following 206 * cases: 207 * - if the caller runs in softint, the caller fails to fetch 208 * - otherwise, the caller waits for the update completed and retries 209 * to fetch (probably succeed to fetch for the second time) 210 * - rtcache invalidation 211 * - There is a global generation counter that is incremented when 212 * any routes have been added or deleted 213 * - When a rtcache caches a rtentry into itself, it also stores 214 * a snapshot of the generation counter 215 * - If the snapshot equals to the global counter, the cache is valid, 216 * otherwise the cache is invalidated 217 */ 218 219 /* 220 * Global lock for the routing table. 221 */ 222 static krwlock_t rt_lock __cacheline_aligned; 223 #ifdef NET_MPSAFE 224 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 225 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 226 #define RT_UNLOCK() rw_exit(&rt_lock) 227 #define RT_LOCKED() rw_lock_held(&rt_lock) 228 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 229 #else 230 #define RT_RLOCK() do {} while (0) 231 #define RT_WLOCK() do {} while (0) 232 #define RT_UNLOCK() do {} while (0) 233 #define RT_LOCKED() false 234 #define RT_ASSERT_WLOCK() do {} while (0) 235 #endif 236 237 static uint64_t rtcache_generation; 238 239 /* 240 * mutex and cv that are used to wait for references to a rtentry left 241 * before updating the rtentry. 242 */ 243 static struct { 244 kmutex_t lock; 245 kcondvar_t cv; 246 bool ongoing; 247 const struct lwp *lwp; 248 } rt_update_global __cacheline_aligned; 249 250 /* 251 * A workqueue and stuff that are used to defer the destruction routine 252 * of rtentries. 253 */ 254 static struct { 255 struct workqueue *wq; 256 struct work wk; 257 kmutex_t lock; 258 SLIST_HEAD(, rtentry) queue; 259 } rt_free_global __cacheline_aligned; 260 261 /* psref for rtentry */ 262 static struct psref_class *rt_psref_class __read_mostly; 263 264 #ifdef RTFLUSH_DEBUG 265 static int _rtcache_debug = 0; 266 #endif /* RTFLUSH_DEBUG */ 267 268 static kauth_listener_t route_listener; 269 270 static int rtdeletemsg(struct rtentry *); 271 272 static void rt_maskedcopy(const struct sockaddr *, 273 struct sockaddr *, const struct sockaddr *); 274 275 static void rtcache_invalidate(void); 276 277 static void rt_ref(struct rtentry *); 278 279 static struct rtentry * 280 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 281 282 static void rtcache_ref(struct rtentry *, struct route *); 283 284 #ifdef NET_MPSAFE 285 static void rt_update_wait(void); 286 #endif 287 288 static bool rt_wait_ok(void); 289 static void rt_wait_refcnt(const char *, struct rtentry *, int); 290 static void rt_wait_psref(struct rtentry *); 291 292 #ifdef DDB 293 static void db_print_sa(const struct sockaddr *); 294 static void db_print_ifa(struct ifaddr *); 295 static int db_show_rtentry(struct rtentry *, void *); 296 #endif 297 298 #ifdef RTFLUSH_DEBUG 299 static void sysctl_net_rtcache_setup(struct sysctllog **); 300 static void 301 sysctl_net_rtcache_setup(struct sysctllog **clog) 302 { 303 const struct sysctlnode *rnode; 304 305 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 306 CTLTYPE_NODE, 307 "rtcache", SYSCTL_DESCR("Route cache related settings"), 308 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 309 return; 310 if (sysctl_createv(clog, 0, &rnode, &rnode, 311 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 312 "debug", SYSCTL_DESCR("Debug route caches"), 313 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 314 return; 315 } 316 #endif /* RTFLUSH_DEBUG */ 317 318 static inline void 319 rt_destroy(struct rtentry *rt) 320 { 321 if (rt->_rt_key != NULL) 322 sockaddr_free(rt->_rt_key); 323 if (rt->rt_gateway != NULL) 324 sockaddr_free(rt->rt_gateway); 325 if (rt_gettag(rt) != NULL) 326 sockaddr_free(rt_gettag(rt)); 327 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 328 } 329 330 static inline const struct sockaddr * 331 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 332 { 333 if (rt->_rt_key == key) 334 goto out; 335 336 if (rt->_rt_key != NULL) 337 sockaddr_free(rt->_rt_key); 338 rt->_rt_key = sockaddr_dup(key, flags); 339 out: 340 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 341 return rt->_rt_key; 342 } 343 344 struct ifaddr * 345 rt_get_ifa(struct rtentry *rt) 346 { 347 struct ifaddr *ifa; 348 349 if ((ifa = rt->rt_ifa) == NULL) 350 return ifa; 351 else if (ifa->ifa_getifa == NULL) 352 return ifa; 353 #if 0 354 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 355 return ifa; 356 #endif 357 else { 358 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 359 if (ifa == NULL) 360 return NULL; 361 rt_replace_ifa(rt, ifa); 362 return ifa; 363 } 364 } 365 366 static void 367 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 368 { 369 rt->rt_ifa = ifa; 370 if (ifa->ifa_seqno != NULL) 371 rt->rt_ifa_seqno = *ifa->ifa_seqno; 372 } 373 374 /* 375 * Is this route the connected route for the ifa? 376 */ 377 static int 378 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 379 { 380 const struct sockaddr *key, *dst, *odst; 381 struct sockaddr_storage maskeddst; 382 383 key = rt_getkey(rt); 384 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 385 if (dst == NULL || 386 dst->sa_family != key->sa_family || 387 dst->sa_len != key->sa_len) 388 return 0; 389 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 390 odst = dst; 391 dst = (struct sockaddr *)&maskeddst; 392 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 393 ifa->ifa_netmask); 394 } 395 return (memcmp(dst, key, dst->sa_len) == 0); 396 } 397 398 void 399 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 400 { 401 if (rt->rt_ifa && 402 rt->rt_ifa != ifa && 403 rt->rt_ifa->ifa_flags & IFA_ROUTE && 404 rt_ifa_connected(rt, rt->rt_ifa)) 405 { 406 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 407 "replace deleted IFA_ROUTE\n", 408 (void *)rt->_rt_key, (void *)rt->rt_ifa); 409 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 410 if (rt_ifa_connected(rt, ifa)) { 411 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 412 "replace added IFA_ROUTE\n", 413 (void *)rt->_rt_key, (void *)ifa); 414 ifa->ifa_flags |= IFA_ROUTE; 415 } 416 } 417 418 ifaref(ifa); 419 ifafree(rt->rt_ifa); 420 rt_set_ifa1(rt, ifa); 421 } 422 423 static void 424 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 425 { 426 ifaref(ifa); 427 rt_set_ifa1(rt, ifa); 428 } 429 430 static int 431 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 432 void *arg0, void *arg1, void *arg2, void *arg3) 433 { 434 struct rt_msghdr *rtm; 435 int result; 436 437 result = KAUTH_RESULT_DEFER; 438 rtm = arg1; 439 440 if (action != KAUTH_NETWORK_ROUTE) 441 return result; 442 443 if (rtm->rtm_type == RTM_GET) 444 result = KAUTH_RESULT_ALLOW; 445 446 return result; 447 } 448 449 static void rt_free_work(struct work *, void *); 450 451 void 452 rt_init(void) 453 { 454 int error; 455 456 #ifdef RTFLUSH_DEBUG 457 sysctl_net_rtcache_setup(NULL); 458 #endif 459 460 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 461 SLIST_INIT(&rt_free_global.queue); 462 463 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 464 465 error = workqueue_create(&rt_free_global.wq, "rt_free", 466 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 467 if (error) 468 panic("%s: workqueue_create failed (%d)\n", __func__, error); 469 470 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 471 cv_init(&rt_update_global.cv, "rt_update"); 472 473 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 474 NULL, IPL_SOFTNET); 475 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 476 NULL, IPL_SOFTNET); 477 478 rn_init(); /* initialize all zeroes, all ones, mask table */ 479 rtbl_init(); 480 481 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 482 route_listener_cb, NULL); 483 } 484 485 static void 486 rtcache_invalidate(void) 487 { 488 489 RT_ASSERT_WLOCK(); 490 491 if (rtcache_debug()) 492 printf("%s: enter\n", __func__); 493 494 rtcache_generation++; 495 } 496 497 #ifdef RT_DEBUG 498 static void 499 dump_rt(const struct rtentry *rt) 500 { 501 char buf[512]; 502 503 aprint_normal("rt: "); 504 aprint_normal("p=%p ", rt); 505 if (rt->_rt_key == NULL) { 506 aprint_normal("dst=(NULL) "); 507 } else { 508 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 509 aprint_normal("dst=%s ", buf); 510 } 511 if (rt->rt_gateway == NULL) { 512 aprint_normal("gw=(NULL) "); 513 } else { 514 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 515 aprint_normal("gw=%s ", buf); 516 } 517 aprint_normal("flags=%x ", rt->rt_flags); 518 if (rt->rt_ifp == NULL) { 519 aprint_normal("if=(NULL) "); 520 } else { 521 aprint_normal("if=%s ", rt->rt_ifp->if_xname); 522 } 523 aprint_normal("\n"); 524 } 525 #endif /* RT_DEBUG */ 526 527 /* 528 * Packet routing routines. If success, refcnt of a returned rtentry 529 * will be incremented. The caller has to rtfree it by itself. 530 */ 531 struct rtentry * 532 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 533 bool wlock) 534 { 535 rtbl_t *rtbl; 536 struct rtentry *rt; 537 int s; 538 539 #ifdef NET_MPSAFE 540 retry: 541 #endif 542 s = splsoftnet(); 543 rtbl = rt_gettable(dst->sa_family); 544 if (rtbl == NULL) 545 goto miss; 546 547 rt = rt_matchaddr(rtbl, dst); 548 if (rt == NULL) 549 goto miss; 550 551 if (!ISSET(rt->rt_flags, RTF_UP)) 552 goto miss; 553 554 #ifdef NET_MPSAFE 555 if (ISSET(rt->rt_flags, RTF_UPDATING) && 556 /* XXX updater should be always able to acquire */ 557 curlwp != rt_update_global.lwp) { 558 if (!wait_ok || !rt_wait_ok()) 559 goto miss; 560 RT_UNLOCK(); 561 splx(s); 562 563 /* We can wait until the update is complete */ 564 rt_update_wait(); 565 566 if (wlock) 567 RT_WLOCK(); 568 else 569 RT_RLOCK(); 570 goto retry; 571 } 572 #endif /* NET_MPSAFE */ 573 574 rt_ref(rt); 575 RT_REFCNT_TRACE(rt); 576 577 splx(s); 578 return rt; 579 miss: 580 rtstat.rts_unreach++; 581 if (report) { 582 struct rt_addrinfo info; 583 584 memset(&info, 0, sizeof(info)); 585 info.rti_info[RTAX_DST] = dst; 586 rt_missmsg(RTM_MISS, &info, 0, 0); 587 } 588 splx(s); 589 return NULL; 590 } 591 592 struct rtentry * 593 rtalloc1(const struct sockaddr *dst, int report) 594 { 595 struct rtentry *rt; 596 597 RT_RLOCK(); 598 rt = rtalloc1_locked(dst, report, true, false); 599 RT_UNLOCK(); 600 601 return rt; 602 } 603 604 static void 605 rt_ref(struct rtentry *rt) 606 { 607 608 KASSERT(rt->rt_refcnt >= 0); 609 atomic_inc_uint(&rt->rt_refcnt); 610 } 611 612 void 613 rt_unref(struct rtentry *rt) 614 { 615 616 KASSERT(rt != NULL); 617 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 618 619 atomic_dec_uint(&rt->rt_refcnt); 620 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 621 mutex_enter(&rt_free_global.lock); 622 cv_broadcast(&rt->rt_cv); 623 mutex_exit(&rt_free_global.lock); 624 } 625 } 626 627 static bool 628 rt_wait_ok(void) 629 { 630 631 KASSERT(!cpu_intr_p()); 632 return !cpu_softintr_p(); 633 } 634 635 void 636 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 637 { 638 mutex_enter(&rt_free_global.lock); 639 while (rt->rt_refcnt > cnt) { 640 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 641 __func__, title, rt->rt_refcnt); 642 cv_wait(&rt->rt_cv, &rt_free_global.lock); 643 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 644 __func__, title, rt->rt_refcnt); 645 } 646 mutex_exit(&rt_free_global.lock); 647 } 648 649 void 650 rt_wait_psref(struct rtentry *rt) 651 { 652 653 psref_target_destroy(&rt->rt_psref, rt_psref_class); 654 psref_target_init(&rt->rt_psref, rt_psref_class); 655 } 656 657 static void 658 _rt_free(struct rtentry *rt) 659 { 660 struct ifaddr *ifa; 661 662 /* 663 * Need to avoid a deadlock on rt_wait_refcnt of update 664 * and a conflict on psref_target_destroy of update. 665 */ 666 #ifdef NET_MPSAFE 667 rt_update_wait(); 668 #endif 669 670 RT_REFCNT_TRACE(rt); 671 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 672 rt_wait_refcnt("free", rt, 0); 673 #ifdef NET_MPSAFE 674 psref_target_destroy(&rt->rt_psref, rt_psref_class); 675 #endif 676 677 rt_assert_inactive(rt); 678 rttrash--; 679 ifa = rt->rt_ifa; 680 rt->rt_ifa = NULL; 681 ifafree(ifa); 682 rt->rt_ifp = NULL; 683 cv_destroy(&rt->rt_cv); 684 rt_destroy(rt); 685 pool_put(&rtentry_pool, rt); 686 } 687 688 static void 689 rt_free_work(struct work *wk, void *arg) 690 { 691 692 for (;;) { 693 struct rtentry *rt; 694 695 mutex_enter(&rt_free_global.lock); 696 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) { 697 mutex_exit(&rt_free_global.lock); 698 return; 699 } 700 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free); 701 mutex_exit(&rt_free_global.lock); 702 atomic_dec_uint(&rt->rt_refcnt); 703 _rt_free(rt); 704 } 705 } 706 707 void 708 rt_free(struct rtentry *rt) 709 { 710 711 KASSERT(rt->rt_refcnt > 0); 712 if (rt_wait_ok()) { 713 atomic_dec_uint(&rt->rt_refcnt); 714 _rt_free(rt); 715 return; 716 } 717 718 mutex_enter(&rt_free_global.lock); 719 rt_ref(rt); 720 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free); 721 mutex_exit(&rt_free_global.lock); 722 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 723 } 724 725 #ifdef NET_MPSAFE 726 static void 727 rt_update_wait(void) 728 { 729 730 mutex_enter(&rt_update_global.lock); 731 while (rt_update_global.ongoing) { 732 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 733 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 734 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 735 } 736 mutex_exit(&rt_update_global.lock); 737 } 738 #endif 739 740 int 741 rt_update_prepare(struct rtentry *rt) 742 { 743 744 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 745 746 RT_WLOCK(); 747 /* If the entry is being destroyed, don't proceed the update. */ 748 if (!ISSET(rt->rt_flags, RTF_UP)) { 749 RT_UNLOCK(); 750 return -1; 751 } 752 rt->rt_flags |= RTF_UPDATING; 753 RT_UNLOCK(); 754 755 mutex_enter(&rt_update_global.lock); 756 while (rt_update_global.ongoing) { 757 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 758 __func__, rt, curlwp); 759 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 760 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 761 __func__, rt, curlwp); 762 } 763 rt_update_global.ongoing = true; 764 /* XXX need it to avoid rt_update_wait by updater itself. */ 765 rt_update_global.lwp = curlwp; 766 mutex_exit(&rt_update_global.lock); 767 768 rt_wait_refcnt("update", rt, 1); 769 rt_wait_psref(rt); 770 771 return 0; 772 } 773 774 void 775 rt_update_finish(struct rtentry *rt) 776 { 777 778 RT_WLOCK(); 779 rt->rt_flags &= ~RTF_UPDATING; 780 RT_UNLOCK(); 781 782 mutex_enter(&rt_update_global.lock); 783 rt_update_global.ongoing = false; 784 rt_update_global.lwp = NULL; 785 cv_broadcast(&rt_update_global.cv); 786 mutex_exit(&rt_update_global.lock); 787 788 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 789 } 790 791 /* 792 * Force a routing table entry to the specified 793 * destination to go through the given gateway. 794 * Normally called as a result of a routing redirect 795 * message from the network layer. 796 * 797 * N.B.: must be called at splsoftnet 798 */ 799 void 800 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 801 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 802 struct rtentry **rtp) 803 { 804 struct rtentry *rt; 805 int error = 0; 806 uint64_t *stat = NULL; 807 struct rt_addrinfo info; 808 struct ifaddr *ifa; 809 struct psref psref; 810 811 /* verify the gateway is directly reachable */ 812 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 813 error = ENETUNREACH; 814 goto out; 815 } 816 rt = rtalloc1(dst, 0); 817 /* 818 * If the redirect isn't from our current router for this dst, 819 * it's either old or wrong. If it redirects us to ourselves, 820 * we have a routing loop, perhaps as a result of an interface 821 * going down recently. 822 */ 823 if (!(flags & RTF_DONE) && rt && 824 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 825 error = EINVAL; 826 else { 827 int s = pserialize_read_enter(); 828 struct ifaddr *_ifa; 829 830 _ifa = ifa_ifwithaddr(gateway); 831 if (_ifa != NULL) 832 error = EHOSTUNREACH; 833 pserialize_read_exit(s); 834 } 835 if (error) 836 goto done; 837 /* 838 * Create a new entry if we just got back a wildcard entry 839 * or the lookup failed. This is necessary for hosts 840 * which use routing redirects generated by smart gateways 841 * to dynamically build the routing tables. 842 */ 843 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 844 goto create; 845 /* 846 * Don't listen to the redirect if it's 847 * for a route to an interface. 848 */ 849 if (rt->rt_flags & RTF_GATEWAY) { 850 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 851 /* 852 * Changing from route to net => route to host. 853 * Create new route, rather than smashing route to net. 854 */ 855 create: 856 if (rt != NULL) 857 rt_unref(rt); 858 flags |= RTF_GATEWAY | RTF_DYNAMIC; 859 memset(&info, 0, sizeof(info)); 860 info.rti_info[RTAX_DST] = dst; 861 info.rti_info[RTAX_GATEWAY] = gateway; 862 info.rti_info[RTAX_NETMASK] = netmask; 863 info.rti_ifa = ifa; 864 info.rti_flags = flags; 865 rt = NULL; 866 error = rtrequest1(RTM_ADD, &info, &rt); 867 if (rt != NULL) 868 flags = rt->rt_flags; 869 stat = &rtstat.rts_dynamic; 870 } else { 871 /* 872 * Smash the current notion of the gateway to 873 * this destination. Should check about netmask!!! 874 */ 875 #ifdef NET_MPSAFE 876 KASSERT(!cpu_softintr_p()); 877 878 error = rt_update_prepare(rt); 879 if (error == 0) { 880 #endif 881 error = rt_setgate(rt, gateway); 882 if (error == 0) { 883 rt->rt_flags |= RTF_MODIFIED; 884 flags |= RTF_MODIFIED; 885 } 886 #ifdef NET_MPSAFE 887 rt_update_finish(rt); 888 } else { 889 /* 890 * If error != 0, the rtentry is being 891 * destroyed, so doing nothing doesn't 892 * matter. 893 */ 894 } 895 #endif 896 stat = &rtstat.rts_newgateway; 897 } 898 } else 899 error = EHOSTUNREACH; 900 done: 901 if (rt) { 902 if (rtp != NULL && !error) 903 *rtp = rt; 904 else 905 rt_unref(rt); 906 } 907 out: 908 if (error) 909 rtstat.rts_badredirect++; 910 else if (stat != NULL) 911 (*stat)++; 912 memset(&info, 0, sizeof(info)); 913 info.rti_info[RTAX_DST] = dst; 914 info.rti_info[RTAX_GATEWAY] = gateway; 915 info.rti_info[RTAX_NETMASK] = netmask; 916 info.rti_info[RTAX_AUTHOR] = src; 917 rt_missmsg(RTM_REDIRECT, &info, flags, error); 918 ifa_release(ifa, &psref); 919 } 920 921 /* 922 * Delete a route and generate a message. 923 * It doesn't free a passed rt. 924 */ 925 static int 926 rtdeletemsg(struct rtentry *rt) 927 { 928 int error; 929 struct rt_addrinfo info; 930 struct rtentry *retrt; 931 932 /* 933 * Request the new route so that the entry is not actually 934 * deleted. That will allow the information being reported to 935 * be accurate (and consistent with route_output()). 936 */ 937 memset(&info, 0, sizeof(info)); 938 info.rti_info[RTAX_DST] = rt_getkey(rt); 939 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 940 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 941 info.rti_flags = rt->rt_flags; 942 error = rtrequest1(RTM_DELETE, &info, &retrt); 943 944 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 945 946 return error; 947 } 948 949 struct ifaddr * 950 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 951 const struct sockaddr *gateway, struct psref *psref) 952 { 953 struct ifaddr *ifa = NULL; 954 955 if ((flags & RTF_GATEWAY) == 0) { 956 /* 957 * If we are adding a route to an interface, 958 * and the interface is a pt to pt link 959 * we should search for the destination 960 * as our clue to the interface. Otherwise 961 * we can use the local address. 962 */ 963 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 964 ifa = ifa_ifwithdstaddr_psref(dst, psref); 965 if (ifa == NULL) 966 ifa = ifa_ifwithaddr_psref(gateway, psref); 967 } else { 968 /* 969 * If we are adding a route to a remote net 970 * or host, the gateway may still be on the 971 * other end of a pt to pt link. 972 */ 973 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 974 } 975 if (ifa == NULL) 976 ifa = ifa_ifwithnet_psref(gateway, psref); 977 if (ifa == NULL) { 978 int s; 979 struct rtentry *rt; 980 981 /* XXX we cannot call rtalloc1 if holding the rt lock */ 982 if (RT_LOCKED()) 983 rt = rtalloc1_locked(gateway, 0, true, true); 984 else 985 rt = rtalloc1(gateway, 0); 986 if (rt == NULL) 987 return NULL; 988 if (rt->rt_flags & RTF_GATEWAY) { 989 rt_unref(rt); 990 return NULL; 991 } 992 /* 993 * Just in case. May not need to do this workaround. 994 * Revisit when working on rtentry MP-ification. 995 */ 996 s = pserialize_read_enter(); 997 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 998 if (ifa == rt->rt_ifa) 999 break; 1000 } 1001 if (ifa != NULL) 1002 ifa_acquire(ifa, psref); 1003 pserialize_read_exit(s); 1004 rt_unref(rt); 1005 if (ifa == NULL) 1006 return NULL; 1007 } 1008 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1009 struct ifaddr *nifa; 1010 int s; 1011 1012 s = pserialize_read_enter(); 1013 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1014 if (nifa != NULL) { 1015 ifa_release(ifa, psref); 1016 ifa_acquire(nifa, psref); 1017 ifa = nifa; 1018 } 1019 pserialize_read_exit(s); 1020 } 1021 return ifa; 1022 } 1023 1024 /* 1025 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1026 * The caller has to rtfree it by itself. 1027 */ 1028 int 1029 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1030 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1031 { 1032 struct rt_addrinfo info; 1033 1034 memset(&info, 0, sizeof(info)); 1035 info.rti_flags = flags; 1036 info.rti_info[RTAX_DST] = dst; 1037 info.rti_info[RTAX_GATEWAY] = gateway; 1038 info.rti_info[RTAX_NETMASK] = netmask; 1039 return rtrequest1(req, &info, ret_nrt); 1040 } 1041 1042 /* 1043 * It's a utility function to add/remove a route to/from the routing table 1044 * and tell user processes the addition/removal on success. 1045 */ 1046 int 1047 rtrequest_newmsg(const int req, const struct sockaddr *dst, 1048 const struct sockaddr *gateway, const struct sockaddr *netmask, 1049 const int flags) 1050 { 1051 int error; 1052 struct rtentry *ret_nrt = NULL; 1053 1054 KASSERT(req == RTM_ADD || req == RTM_DELETE); 1055 1056 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt); 1057 if (error != 0) 1058 return error; 1059 1060 KASSERT(ret_nrt != NULL); 1061 1062 rt_newmsg(req, ret_nrt); /* tell user process */ 1063 if (req == RTM_DELETE) 1064 rt_free(ret_nrt); 1065 else 1066 rt_unref(ret_nrt); 1067 1068 return 0; 1069 } 1070 1071 struct ifnet * 1072 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1073 { 1074 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1075 1076 if (info->rti_ifp != NULL) 1077 return NULL; 1078 /* 1079 * ifp may be specified by sockaddr_dl when protocol address 1080 * is ambiguous 1081 */ 1082 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1083 struct ifaddr *ifa; 1084 int s = pserialize_read_enter(); 1085 1086 ifa = ifa_ifwithnet(ifpaddr); 1087 if (ifa != NULL) 1088 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1089 psref); 1090 pserialize_read_exit(s); 1091 } 1092 1093 return info->rti_ifp; 1094 } 1095 1096 struct ifaddr * 1097 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1098 { 1099 struct ifaddr *ifa = NULL; 1100 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1101 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1102 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1103 int flags = info->rti_flags; 1104 const struct sockaddr *sa; 1105 1106 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1107 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1108 if (ifa != NULL) 1109 goto got; 1110 } 1111 1112 sa = ifaaddr != NULL ? ifaaddr : 1113 (gateway != NULL ? gateway : dst); 1114 if (sa != NULL && info->rti_ifp != NULL) 1115 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1116 else if (dst != NULL && gateway != NULL) 1117 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1118 else if (sa != NULL) 1119 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1120 if (ifa == NULL) 1121 return NULL; 1122 got: 1123 if (ifa->ifa_getifa != NULL) { 1124 /* FIXME ifa_getifa is NOMPSAFE */ 1125 ifa = (*ifa->ifa_getifa)(ifa, dst); 1126 if (ifa == NULL) 1127 return NULL; 1128 ifa_acquire(ifa, psref); 1129 } 1130 info->rti_ifa = ifa; 1131 if (info->rti_ifp == NULL) 1132 info->rti_ifp = ifa->ifa_ifp; 1133 return ifa; 1134 } 1135 1136 /* 1137 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1138 * The caller has to rtfree it by itself. 1139 */ 1140 int 1141 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1142 { 1143 int s = splsoftnet(), ss; 1144 int error = 0, rc; 1145 struct rtentry *rt; 1146 rtbl_t *rtbl; 1147 struct ifaddr *ifa = NULL; 1148 struct sockaddr_storage maskeddst; 1149 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1150 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1151 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1152 int flags = info->rti_flags; 1153 struct psref psref_ifp, psref_ifa; 1154 int bound = 0; 1155 struct ifnet *ifp = NULL; 1156 bool need_to_release_ifa = true; 1157 bool need_unlock = true; 1158 #define senderr(x) { error = x ; goto bad; } 1159 1160 RT_WLOCK(); 1161 1162 bound = curlwp_bind(); 1163 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1164 senderr(ESRCH); 1165 if (flags & RTF_HOST) 1166 netmask = NULL; 1167 switch (req) { 1168 case RTM_DELETE: 1169 if (netmask) { 1170 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1171 netmask); 1172 dst = (struct sockaddr *)&maskeddst; 1173 } 1174 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1175 senderr(ESRCH); 1176 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1177 senderr(ESRCH); 1178 rt->rt_flags &= ~RTF_UP; 1179 if ((ifa = rt->rt_ifa)) { 1180 if (ifa->ifa_flags & IFA_ROUTE && 1181 rt_ifa_connected(rt, ifa)) { 1182 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1183 "deleted IFA_ROUTE\n", 1184 (void *)rt->_rt_key, (void *)ifa); 1185 ifa->ifa_flags &= ~IFA_ROUTE; 1186 } 1187 if (ifa->ifa_rtrequest) 1188 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1189 ifa = NULL; 1190 } 1191 rttrash++; 1192 if (ret_nrt) { 1193 *ret_nrt = rt; 1194 rt_ref(rt); 1195 RT_REFCNT_TRACE(rt); 1196 } 1197 rtcache_invalidate(); 1198 RT_UNLOCK(); 1199 need_unlock = false; 1200 rt_timer_remove_all(rt); 1201 #if defined(INET) || defined(INET6) 1202 if (netmask != NULL) 1203 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1204 #endif 1205 if (ret_nrt == NULL) { 1206 /* Adjust the refcount */ 1207 rt_ref(rt); 1208 RT_REFCNT_TRACE(rt); 1209 rt_free(rt); 1210 } 1211 break; 1212 1213 case RTM_ADD: 1214 if (info->rti_ifa == NULL) { 1215 ifp = rt_getifp(info, &psref_ifp); 1216 ifa = rt_getifa(info, &psref_ifa); 1217 if (ifa == NULL) 1218 senderr(ENETUNREACH); 1219 } else { 1220 /* Caller should have a reference of ifa */ 1221 ifa = info->rti_ifa; 1222 need_to_release_ifa = false; 1223 } 1224 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1225 if (rt == NULL) 1226 senderr(ENOBUFS); 1227 memset(rt, 0, sizeof(*rt)); 1228 rt->rt_flags = RTF_UP | flags; 1229 LIST_INIT(&rt->rt_timer); 1230 1231 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1232 if (netmask) { 1233 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1234 netmask); 1235 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1236 } else { 1237 rt_setkey(rt, dst, M_NOWAIT); 1238 } 1239 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1240 if (rt_getkey(rt) == NULL || 1241 rt_setgate(rt, gateway) != 0) { 1242 pool_put(&rtentry_pool, rt); 1243 senderr(ENOBUFS); 1244 } 1245 1246 rt_set_ifa(rt, ifa); 1247 if (info->rti_info[RTAX_TAG] != NULL) { 1248 const struct sockaddr *tag; 1249 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1250 if (tag == NULL) 1251 senderr(ENOBUFS); 1252 } 1253 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1254 1255 ss = pserialize_read_enter(); 1256 if (info->rti_info[RTAX_IFP] != NULL) { 1257 struct ifaddr *ifa2; 1258 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1259 if (ifa2 != NULL) 1260 rt->rt_ifp = ifa2->ifa_ifp; 1261 else 1262 rt->rt_ifp = ifa->ifa_ifp; 1263 } else 1264 rt->rt_ifp = ifa->ifa_ifp; 1265 pserialize_read_exit(ss); 1266 cv_init(&rt->rt_cv, "rtentry"); 1267 psref_target_init(&rt->rt_psref, rt_psref_class); 1268 1269 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1270 rc = rt_addaddr(rtbl, rt, netmask); 1271 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1272 if (rc != 0) { 1273 ifafree(ifa); /* for rt_set_ifa above */ 1274 cv_destroy(&rt->rt_cv); 1275 rt_destroy(rt); 1276 pool_put(&rtentry_pool, rt); 1277 senderr(rc); 1278 } 1279 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1280 if (ifa->ifa_rtrequest) 1281 ifa->ifa_rtrequest(req, rt, info); 1282 if (need_to_release_ifa) 1283 ifa_release(ifa, &psref_ifa); 1284 ifa = NULL; 1285 if_put(ifp, &psref_ifp); 1286 ifp = NULL; 1287 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1288 if (ret_nrt) { 1289 *ret_nrt = rt; 1290 rt_ref(rt); 1291 RT_REFCNT_TRACE(rt); 1292 } 1293 rtcache_invalidate(); 1294 RT_UNLOCK(); 1295 need_unlock = false; 1296 break; 1297 case RTM_GET: 1298 if (netmask != NULL) { 1299 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1300 netmask); 1301 dst = (struct sockaddr *)&maskeddst; 1302 } 1303 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1304 senderr(ESRCH); 1305 if (ret_nrt != NULL) { 1306 *ret_nrt = rt; 1307 rt_ref(rt); 1308 RT_REFCNT_TRACE(rt); 1309 } 1310 break; 1311 } 1312 bad: 1313 if (need_to_release_ifa) 1314 ifa_release(ifa, &psref_ifa); 1315 if_put(ifp, &psref_ifp); 1316 curlwp_bindx(bound); 1317 if (need_unlock) 1318 RT_UNLOCK(); 1319 splx(s); 1320 return error; 1321 } 1322 1323 int 1324 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1325 { 1326 struct sockaddr *new, *old; 1327 1328 KASSERT(rt->_rt_key != NULL); 1329 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1330 1331 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1332 if (new == NULL) 1333 return ENOMEM; 1334 1335 old = rt->rt_gateway; 1336 rt->rt_gateway = new; 1337 if (old != NULL) 1338 sockaddr_free(old); 1339 1340 KASSERT(rt->_rt_key != NULL); 1341 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1342 1343 if (rt->rt_flags & RTF_GATEWAY) { 1344 struct rtentry *gwrt; 1345 1346 /* XXX we cannot call rtalloc1 if holding the rt lock */ 1347 if (RT_LOCKED()) 1348 gwrt = rtalloc1_locked(gate, 1, false, true); 1349 else 1350 gwrt = rtalloc1(gate, 1); 1351 /* 1352 * If we switched gateways, grab the MTU from the new 1353 * gateway route if the current MTU, if the current MTU is 1354 * greater than the MTU of gateway. 1355 * Note that, if the MTU of gateway is 0, we will reset the 1356 * MTU of the route to run PMTUD again from scratch. XXX 1357 */ 1358 if (gwrt != NULL) { 1359 KASSERT(gwrt->_rt_key != NULL); 1360 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1361 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1362 rt->rt_rmx.rmx_mtu && 1363 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1364 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1365 } 1366 rt_unref(gwrt); 1367 } 1368 } 1369 KASSERT(rt->_rt_key != NULL); 1370 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1371 return 0; 1372 } 1373 1374 static void 1375 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1376 const struct sockaddr *netmask) 1377 { 1378 const char *netmaskp = &netmask->sa_data[0], 1379 *srcp = &src->sa_data[0]; 1380 char *dstp = &dst->sa_data[0]; 1381 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1382 const char *srcend = (char *)dst + src->sa_len; 1383 1384 dst->sa_len = src->sa_len; 1385 dst->sa_family = src->sa_family; 1386 1387 while (dstp < maskend) 1388 *dstp++ = *srcp++ & *netmaskp++; 1389 if (dstp < srcend) 1390 memset(dstp, 0, (size_t)(srcend - dstp)); 1391 } 1392 1393 /* 1394 * Inform the routing socket of a route change. 1395 */ 1396 void 1397 rt_newmsg(const int cmd, const struct rtentry *rt) 1398 { 1399 struct rt_addrinfo info; 1400 1401 memset((void *)&info, 0, sizeof(info)); 1402 info.rti_info[RTAX_DST] = rt_getkey(rt); 1403 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1404 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1405 if (rt->rt_ifp) { 1406 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1407 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1408 } 1409 1410 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1411 } 1412 1413 /* 1414 * Set up or tear down a routing table entry, normally 1415 * for an interface. 1416 */ 1417 int 1418 rtinit(struct ifaddr *ifa, int cmd, int flags) 1419 { 1420 struct rtentry *rt; 1421 struct sockaddr *dst, *odst; 1422 struct sockaddr_storage maskeddst; 1423 struct rtentry *nrt = NULL; 1424 int error; 1425 struct rt_addrinfo info; 1426 1427 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1428 if (cmd == RTM_DELETE) { 1429 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1430 /* Delete subnet route for this interface */ 1431 odst = dst; 1432 dst = (struct sockaddr *)&maskeddst; 1433 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1434 } 1435 if ((rt = rtalloc1(dst, 0)) != NULL) { 1436 if (rt->rt_ifa != ifa) { 1437 rt_unref(rt); 1438 return (flags & RTF_HOST) ? EHOSTUNREACH 1439 : ENETUNREACH; 1440 } 1441 rt_unref(rt); 1442 } 1443 } 1444 memset(&info, 0, sizeof(info)); 1445 info.rti_ifa = ifa; 1446 info.rti_flags = flags | ifa->ifa_flags; 1447 info.rti_info[RTAX_DST] = dst; 1448 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1449 1450 /* 1451 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1452 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1453 * variable) when RTF_HOST is 1. still not sure if i can safely 1454 * change it to meet bsdi4 behavior. 1455 */ 1456 if (cmd != RTM_LLINFO_UPD) 1457 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1458 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1459 &nrt); 1460 if (error != 0) 1461 return error; 1462 1463 rt = nrt; 1464 RT_REFCNT_TRACE(rt); 1465 switch (cmd) { 1466 case RTM_DELETE: 1467 rt_newmsg(cmd, rt); 1468 rt_free(rt); 1469 break; 1470 case RTM_LLINFO_UPD: 1471 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1472 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1473 rt_newmsg(RTM_CHANGE, rt); 1474 rt_unref(rt); 1475 break; 1476 case RTM_ADD: 1477 /* 1478 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest 1479 * called via rtrequest1. Can we just prevent the replacement 1480 * somehow and remove the following code? And also doesn't 1481 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again? 1482 */ 1483 if (rt->rt_ifa != ifa) { 1484 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa, 1485 rt->rt_ifa); 1486 #ifdef NET_MPSAFE 1487 KASSERT(!cpu_softintr_p()); 1488 1489 error = rt_update_prepare(rt); 1490 if (error == 0) { 1491 #endif 1492 if (rt->rt_ifa->ifa_rtrequest != NULL) { 1493 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, 1494 rt, &info); 1495 } 1496 rt_replace_ifa(rt, ifa); 1497 rt->rt_ifp = ifa->ifa_ifp; 1498 if (ifa->ifa_rtrequest != NULL) 1499 ifa->ifa_rtrequest(RTM_ADD, rt, &info); 1500 #ifdef NET_MPSAFE 1501 rt_update_finish(rt); 1502 } else { 1503 /* 1504 * If error != 0, the rtentry is being 1505 * destroyed, so doing nothing doesn't 1506 * matter. 1507 */ 1508 } 1509 #endif 1510 } 1511 rt_newmsg(cmd, rt); 1512 rt_unref(rt); 1513 RT_REFCNT_TRACE(rt); 1514 break; 1515 } 1516 return error; 1517 } 1518 1519 /* 1520 * Create a local route entry for the address. 1521 * Announce the addition of the address and the route to the routing socket. 1522 */ 1523 int 1524 rt_ifa_addlocal(struct ifaddr *ifa) 1525 { 1526 struct rtentry *rt; 1527 int e; 1528 1529 /* If there is no loopback entry, allocate one. */ 1530 rt = rtalloc1(ifa->ifa_addr, 0); 1531 #ifdef RT_DEBUG 1532 if (rt != NULL) 1533 dump_rt(rt); 1534 #endif 1535 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1536 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1537 { 1538 struct rt_addrinfo info; 1539 struct rtentry *nrt; 1540 1541 memset(&info, 0, sizeof(info)); 1542 info.rti_flags = RTF_HOST | RTF_LOCAL; 1543 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1544 info.rti_info[RTAX_GATEWAY] = 1545 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1546 info.rti_ifa = ifa; 1547 nrt = NULL; 1548 e = rtrequest1(RTM_ADD, &info, &nrt); 1549 if (nrt && ifa != nrt->rt_ifa) 1550 rt_replace_ifa(nrt, ifa); 1551 rt_newaddrmsg(RTM_ADD, ifa, e, nrt); 1552 if (nrt != NULL) { 1553 #ifdef RT_DEBUG 1554 dump_rt(nrt); 1555 #endif 1556 rt_unref(nrt); 1557 RT_REFCNT_TRACE(nrt); 1558 } 1559 } else { 1560 e = 0; 1561 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL); 1562 } 1563 if (rt != NULL) 1564 rt_unref(rt); 1565 return e; 1566 } 1567 1568 /* 1569 * Remove the local route entry for the address. 1570 * Announce the removal of the address and the route to the routing socket. 1571 */ 1572 int 1573 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1574 { 1575 struct rtentry *rt; 1576 int e = 0; 1577 1578 rt = rtalloc1(ifa->ifa_addr, 0); 1579 1580 /* 1581 * Before deleting, check if a corresponding loopbacked 1582 * host route surely exists. With this check, we can avoid 1583 * deleting an interface direct route whose destination is 1584 * the same as the address being removed. This can happen 1585 * when removing a subnet-router anycast address on an 1586 * interface attached to a shared medium. 1587 */ 1588 if (rt != NULL && 1589 (rt->rt_flags & RTF_HOST) && 1590 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1591 { 1592 /* If we cannot replace the route's ifaddr with the equivalent 1593 * ifaddr of another interface, I believe it is safest to 1594 * delete the route. 1595 */ 1596 if (alt_ifa == NULL) { 1597 e = rtdeletemsg(rt); 1598 if (e == 0) { 1599 rt_unref(rt); 1600 rt_free(rt); 1601 rt = NULL; 1602 } 1603 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1604 } else { 1605 rt_replace_ifa(rt, alt_ifa); 1606 rt_newmsg(RTM_CHANGE, rt); 1607 } 1608 } else 1609 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1610 if (rt != NULL) 1611 rt_unref(rt); 1612 return e; 1613 } 1614 1615 /* 1616 * Route timer routines. These routes allow functions to be called 1617 * for various routes at any time. This is useful in supporting 1618 * path MTU discovery and redirect route deletion. 1619 * 1620 * This is similar to some BSDI internal functions, but it provides 1621 * for multiple queues for efficiency's sake... 1622 */ 1623 1624 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1625 static int rt_init_done = 0; 1626 1627 /* 1628 * Some subtle order problems with domain initialization mean that 1629 * we cannot count on this being run from rt_init before various 1630 * protocol initializations are done. Therefore, we make sure 1631 * that this is run when the first queue is added... 1632 */ 1633 1634 static void rt_timer_work(struct work *, void *); 1635 1636 static void 1637 rt_timer_init(void) 1638 { 1639 int error; 1640 1641 assert(rt_init_done == 0); 1642 1643 /* XXX should be in rt_init */ 1644 rw_init(&rt_lock); 1645 1646 LIST_INIT(&rttimer_queue_head); 1647 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1648 error = workqueue_create(&rt_timer_wq, "rt_timer", 1649 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1650 if (error) 1651 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1652 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1653 rt_init_done = 1; 1654 } 1655 1656 struct rttimer_queue * 1657 rt_timer_queue_create(u_int timeout) 1658 { 1659 struct rttimer_queue *rtq; 1660 1661 if (rt_init_done == 0) 1662 rt_timer_init(); 1663 1664 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1665 if (rtq == NULL) 1666 return NULL; 1667 memset(rtq, 0, sizeof(*rtq)); 1668 1669 rtq->rtq_timeout = timeout; 1670 TAILQ_INIT(&rtq->rtq_head); 1671 RT_WLOCK(); 1672 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1673 RT_UNLOCK(); 1674 1675 return rtq; 1676 } 1677 1678 void 1679 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1680 { 1681 1682 rtq->rtq_timeout = timeout; 1683 } 1684 1685 static void 1686 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1687 { 1688 struct rttimer *r; 1689 1690 RT_ASSERT_WLOCK(); 1691 1692 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1693 LIST_REMOVE(r, rtt_link); 1694 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1695 rt_ref(r->rtt_rt); /* XXX */ 1696 RT_REFCNT_TRACE(r->rtt_rt); 1697 RT_UNLOCK(); 1698 (*r->rtt_func)(r->rtt_rt, r); 1699 pool_put(&rttimer_pool, r); 1700 RT_WLOCK(); 1701 if (rtq->rtq_count > 0) 1702 rtq->rtq_count--; 1703 else 1704 printf("rt_timer_queue_remove_all: " 1705 "rtq_count reached 0\n"); 1706 } 1707 } 1708 1709 void 1710 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1711 { 1712 1713 RT_WLOCK(); 1714 rt_timer_queue_remove_all(rtq); 1715 LIST_REMOVE(rtq, rtq_link); 1716 RT_UNLOCK(); 1717 1718 /* 1719 * Caller is responsible for freeing the rttimer_queue structure. 1720 */ 1721 } 1722 1723 unsigned long 1724 rt_timer_count(struct rttimer_queue *rtq) 1725 { 1726 return rtq->rtq_count; 1727 } 1728 1729 static void 1730 rt_timer_remove_all(struct rtentry *rt) 1731 { 1732 struct rttimer *r; 1733 1734 RT_WLOCK(); 1735 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1736 LIST_REMOVE(r, rtt_link); 1737 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1738 if (r->rtt_queue->rtq_count > 0) 1739 r->rtt_queue->rtq_count--; 1740 else 1741 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1742 pool_put(&rttimer_pool, r); 1743 } 1744 RT_UNLOCK(); 1745 } 1746 1747 int 1748 rt_timer_add(struct rtentry *rt, 1749 void (*func)(struct rtentry *, struct rttimer *), 1750 struct rttimer_queue *queue) 1751 { 1752 struct rttimer *r; 1753 1754 KASSERT(func != NULL); 1755 RT_WLOCK(); 1756 /* 1757 * If there's already a timer with this action, destroy it before 1758 * we add a new one. 1759 */ 1760 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1761 if (r->rtt_func == func) 1762 break; 1763 } 1764 if (r != NULL) { 1765 LIST_REMOVE(r, rtt_link); 1766 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1767 if (r->rtt_queue->rtq_count > 0) 1768 r->rtt_queue->rtq_count--; 1769 else 1770 printf("rt_timer_add: rtq_count reached 0\n"); 1771 } else { 1772 r = pool_get(&rttimer_pool, PR_NOWAIT); 1773 if (r == NULL) { 1774 RT_UNLOCK(); 1775 return ENOBUFS; 1776 } 1777 } 1778 1779 memset(r, 0, sizeof(*r)); 1780 1781 r->rtt_rt = rt; 1782 r->rtt_time = time_uptime; 1783 r->rtt_func = func; 1784 r->rtt_queue = queue; 1785 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1786 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1787 r->rtt_queue->rtq_count++; 1788 1789 RT_UNLOCK(); 1790 1791 return 0; 1792 } 1793 1794 static void 1795 rt_timer_work(struct work *wk, void *arg) 1796 { 1797 struct rttimer_queue *rtq; 1798 struct rttimer *r; 1799 1800 RT_WLOCK(); 1801 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1802 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1803 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1804 LIST_REMOVE(r, rtt_link); 1805 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1806 rt_ref(r->rtt_rt); /* XXX */ 1807 RT_REFCNT_TRACE(r->rtt_rt); 1808 RT_UNLOCK(); 1809 (*r->rtt_func)(r->rtt_rt, r); 1810 pool_put(&rttimer_pool, r); 1811 RT_WLOCK(); 1812 if (rtq->rtq_count > 0) 1813 rtq->rtq_count--; 1814 else 1815 printf("rt_timer_timer: rtq_count reached 0\n"); 1816 } 1817 } 1818 RT_UNLOCK(); 1819 1820 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1821 } 1822 1823 static void 1824 rt_timer_timer(void *arg) 1825 { 1826 1827 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 1828 } 1829 1830 static struct rtentry * 1831 _rtcache_init(struct route *ro, int flag) 1832 { 1833 struct rtentry *rt; 1834 1835 rtcache_invariants(ro); 1836 KASSERT(ro->_ro_rt == NULL); 1837 1838 if (rtcache_getdst(ro) == NULL) 1839 return NULL; 1840 rt = rtalloc1(rtcache_getdst(ro), flag); 1841 if (rt != NULL) { 1842 RT_RLOCK(); 1843 if (ISSET(rt->rt_flags, RTF_UP)) { 1844 ro->_ro_rt = rt; 1845 ro->ro_rtcache_generation = rtcache_generation; 1846 rtcache_ref(rt, ro); 1847 } 1848 RT_UNLOCK(); 1849 rt_unref(rt); 1850 } 1851 1852 rtcache_invariants(ro); 1853 return ro->_ro_rt; 1854 } 1855 1856 struct rtentry * 1857 rtcache_init(struct route *ro) 1858 { 1859 1860 return _rtcache_init(ro, 1); 1861 } 1862 1863 struct rtentry * 1864 rtcache_init_noclone(struct route *ro) 1865 { 1866 1867 return _rtcache_init(ro, 0); 1868 } 1869 1870 struct rtentry * 1871 rtcache_update(struct route *ro, int clone) 1872 { 1873 1874 ro->_ro_rt = NULL; 1875 return _rtcache_init(ro, clone); 1876 } 1877 1878 void 1879 rtcache_copy(struct route *new_ro, struct route *old_ro) 1880 { 1881 struct rtentry *rt; 1882 int ret; 1883 1884 KASSERT(new_ro != old_ro); 1885 rtcache_invariants(new_ro); 1886 rtcache_invariants(old_ro); 1887 1888 rt = rtcache_validate(old_ro); 1889 1890 if (rtcache_getdst(old_ro) == NULL) 1891 goto out; 1892 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 1893 if (ret != 0) 1894 goto out; 1895 1896 RT_RLOCK(); 1897 new_ro->_ro_rt = rt; 1898 new_ro->ro_rtcache_generation = rtcache_generation; 1899 RT_UNLOCK(); 1900 rtcache_invariants(new_ro); 1901 out: 1902 rtcache_unref(rt, old_ro); 1903 return; 1904 } 1905 1906 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 1907 static void 1908 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 1909 { 1910 char dst[64]; 1911 1912 sockaddr_format(ro->ro_sa, dst, 64); 1913 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 1914 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 1915 } 1916 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 1917 #else 1918 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 1919 #endif 1920 1921 static void 1922 rtcache_ref(struct rtentry *rt, struct route *ro) 1923 { 1924 1925 KASSERT(rt != NULL); 1926 1927 #ifdef NET_MPSAFE 1928 RTCACHE_PSREF_TRACE(rt, ro); 1929 ro->ro_bound = curlwp_bind(); 1930 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1931 #endif 1932 } 1933 1934 void 1935 rtcache_unref(struct rtentry *rt, struct route *ro) 1936 { 1937 1938 if (rt == NULL) 1939 return; 1940 1941 #ifdef NET_MPSAFE 1942 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1943 curlwp_bindx(ro->ro_bound); 1944 RTCACHE_PSREF_TRACE(rt, ro); 1945 #endif 1946 } 1947 1948 struct rtentry * 1949 rtcache_validate(struct route *ro) 1950 { 1951 struct rtentry *rt = NULL; 1952 1953 #ifdef NET_MPSAFE 1954 retry: 1955 #endif 1956 rtcache_invariants(ro); 1957 RT_RLOCK(); 1958 if (ro->ro_rtcache_generation != rtcache_generation) { 1959 /* The cache is invalidated */ 1960 rt = NULL; 1961 goto out; 1962 } 1963 1964 rt = ro->_ro_rt; 1965 if (rt == NULL) 1966 goto out; 1967 1968 if ((rt->rt_flags & RTF_UP) == 0) { 1969 rt = NULL; 1970 goto out; 1971 } 1972 #ifdef NET_MPSAFE 1973 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 1974 if (rt_wait_ok()) { 1975 RT_UNLOCK(); 1976 1977 /* We can wait until the update is complete */ 1978 rt_update_wait(); 1979 goto retry; 1980 } else { 1981 rt = NULL; 1982 } 1983 } else 1984 #endif 1985 rtcache_ref(rt, ro); 1986 out: 1987 RT_UNLOCK(); 1988 return rt; 1989 } 1990 1991 struct rtentry * 1992 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 1993 int clone, int *hitp) 1994 { 1995 const struct sockaddr *odst; 1996 struct rtentry *rt = NULL; 1997 1998 odst = rtcache_getdst(ro); 1999 if (odst == NULL) 2000 goto miss; 2001 2002 if (sockaddr_cmp(odst, dst) != 0) { 2003 rtcache_free(ro); 2004 goto miss; 2005 } 2006 2007 rt = rtcache_validate(ro); 2008 if (rt == NULL) { 2009 ro->_ro_rt = NULL; 2010 goto miss; 2011 } 2012 2013 rtcache_invariants(ro); 2014 2015 if (hitp != NULL) 2016 *hitp = 1; 2017 return rt; 2018 miss: 2019 if (hitp != NULL) 2020 *hitp = 0; 2021 if (rtcache_setdst(ro, dst) == 0) 2022 rt = _rtcache_init(ro, clone); 2023 2024 rtcache_invariants(ro); 2025 2026 return rt; 2027 } 2028 2029 void 2030 rtcache_free(struct route *ro) 2031 { 2032 2033 ro->_ro_rt = NULL; 2034 if (ro->ro_sa != NULL) { 2035 sockaddr_free(ro->ro_sa); 2036 ro->ro_sa = NULL; 2037 } 2038 rtcache_invariants(ro); 2039 } 2040 2041 int 2042 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2043 { 2044 KASSERT(sa != NULL); 2045 2046 rtcache_invariants(ro); 2047 if (ro->ro_sa != NULL) { 2048 if (ro->ro_sa->sa_family == sa->sa_family) { 2049 ro->_ro_rt = NULL; 2050 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2051 rtcache_invariants(ro); 2052 return 0; 2053 } 2054 /* free ro_sa, wrong family */ 2055 rtcache_free(ro); 2056 } 2057 2058 KASSERT(ro->_ro_rt == NULL); 2059 2060 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2061 rtcache_invariants(ro); 2062 return ENOMEM; 2063 } 2064 rtcache_invariants(ro); 2065 return 0; 2066 } 2067 2068 const struct sockaddr * 2069 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2070 { 2071 if (rt->rt_tag != tag) { 2072 if (rt->rt_tag != NULL) 2073 sockaddr_free(rt->rt_tag); 2074 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2075 } 2076 return rt->rt_tag; 2077 } 2078 2079 struct sockaddr * 2080 rt_gettag(const struct rtentry *rt) 2081 { 2082 return rt->rt_tag; 2083 } 2084 2085 int 2086 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2087 { 2088 2089 if ((rt->rt_flags & RTF_REJECT) != 0) { 2090 /* Mimic looutput */ 2091 if (ifp->if_flags & IFF_LOOPBACK) 2092 return (rt->rt_flags & RTF_HOST) ? 2093 EHOSTUNREACH : ENETUNREACH; 2094 else if (rt->rt_rmx.rmx_expire == 0 || 2095 time_uptime < rt->rt_rmx.rmx_expire) 2096 return (rt->rt_flags & RTF_GATEWAY) ? 2097 EHOSTUNREACH : EHOSTDOWN; 2098 } 2099 2100 return 0; 2101 } 2102 2103 void 2104 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2105 void *v) 2106 { 2107 2108 for (;;) { 2109 int s; 2110 int error; 2111 struct rtentry *rt, *retrt = NULL; 2112 2113 RT_RLOCK(); 2114 s = splsoftnet(); 2115 rt = rtbl_search_matched_entry(family, f, v); 2116 if (rt == NULL) { 2117 splx(s); 2118 RT_UNLOCK(); 2119 return; 2120 } 2121 rt->rt_refcnt++; 2122 splx(s); 2123 RT_UNLOCK(); 2124 2125 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2126 rt_mask(rt), rt->rt_flags, &retrt); 2127 if (error == 0) { 2128 KASSERT(retrt == rt); 2129 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2130 retrt->rt_ifp = NULL; 2131 rt_unref(rt); 2132 rt_free(retrt); 2133 } else if (error == ESRCH) { 2134 /* Someone deleted the entry already. */ 2135 rt_unref(rt); 2136 } else { 2137 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2138 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2139 /* XXX how to treat this case? */ 2140 } 2141 } 2142 } 2143 2144 int 2145 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2146 { 2147 int error; 2148 2149 RT_RLOCK(); 2150 error = rtbl_walktree(family, f, v); 2151 RT_UNLOCK(); 2152 2153 return error; 2154 } 2155 2156 #ifdef DDB 2157 2158 #include <machine/db_machdep.h> 2159 #include <ddb/db_interface.h> 2160 #include <ddb/db_output.h> 2161 2162 #define rt_expire rt_rmx.rmx_expire 2163 2164 static void 2165 db_print_sa(const struct sockaddr *sa) 2166 { 2167 int len; 2168 const u_char *p; 2169 2170 if (sa == NULL) { 2171 db_printf("[NULL]"); 2172 return; 2173 } 2174 2175 p = (const u_char *)sa; 2176 len = sa->sa_len; 2177 db_printf("["); 2178 while (len > 0) { 2179 db_printf("%d", *p); 2180 p++; len--; 2181 if (len) db_printf(","); 2182 } 2183 db_printf("]\n"); 2184 } 2185 2186 static void 2187 db_print_ifa(struct ifaddr *ifa) 2188 { 2189 if (ifa == NULL) 2190 return; 2191 db_printf(" ifa_addr="); 2192 db_print_sa(ifa->ifa_addr); 2193 db_printf(" ifa_dsta="); 2194 db_print_sa(ifa->ifa_dstaddr); 2195 db_printf(" ifa_mask="); 2196 db_print_sa(ifa->ifa_netmask); 2197 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2198 ifa->ifa_flags, 2199 ifa->ifa_refcnt, 2200 ifa->ifa_metric); 2201 } 2202 2203 /* 2204 * Function to pass to rt_walktree(). 2205 * Return non-zero error to abort walk. 2206 */ 2207 static int 2208 db_show_rtentry(struct rtentry *rt, void *w) 2209 { 2210 db_printf("rtentry=%p", rt); 2211 2212 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2213 rt->rt_flags, rt->rt_refcnt, 2214 rt->rt_use, (uint64_t)rt->rt_expire); 2215 2216 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2217 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2218 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2219 2220 db_printf(" ifp=%p ", rt->rt_ifp); 2221 if (rt->rt_ifp) 2222 db_printf("(%s)", rt->rt_ifp->if_xname); 2223 else 2224 db_printf("(NULL)"); 2225 2226 db_printf(" ifa=%p\n", rt->rt_ifa); 2227 db_print_ifa(rt->rt_ifa); 2228 2229 db_printf(" gwroute=%p llinfo=%p\n", 2230 rt->rt_gwroute, rt->rt_llinfo); 2231 2232 return 0; 2233 } 2234 2235 /* 2236 * Function to print all the route trees. 2237 * Use this from ddb: "show routes" 2238 */ 2239 void 2240 db_show_routes(db_expr_t addr, bool have_addr, 2241 db_expr_t count, const char *modif) 2242 { 2243 rt_walktree(AF_INET, db_show_rtentry, NULL); 2244 } 2245 #endif 2246