1 /* $NetBSD: route.c,v 1.206 2018/01/30 11:01:04 ozaki-r Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.206 2018/01/30 11:01:04 ozaki-r Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/kernel.h> 114 #include <sys/ioctl.h> 115 #include <sys/pool.h> 116 #include <sys/kauth.h> 117 #include <sys/workqueue.h> 118 #include <sys/syslog.h> 119 #include <sys/rwlock.h> 120 #include <sys/mutex.h> 121 #include <sys/cpu.h> 122 123 #include <net/if.h> 124 #include <net/if_dl.h> 125 #include <net/route.h> 126 #if defined(INET) || defined(INET6) 127 #include <net/if_llatbl.h> 128 #endif 129 130 #include <netinet/in.h> 131 #include <netinet/in_var.h> 132 133 #ifdef RTFLUSH_DEBUG 134 #define rtcache_debug() __predict_false(_rtcache_debug) 135 #else /* RTFLUSH_DEBUG */ 136 #define rtcache_debug() 0 137 #endif /* RTFLUSH_DEBUG */ 138 139 #ifdef RT_DEBUG 140 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 141 __func__, __LINE__, (rt), (rt)->rt_refcnt) 142 #else 143 #define RT_REFCNT_TRACE(rt) do {} while (0) 144 #endif 145 146 #ifdef RT_DEBUG 147 #define dlog(level, fmt, args...) log(level, fmt, ##args) 148 #else 149 #define dlog(level, fmt, args...) do {} while (0) 150 #endif 151 152 struct rtstat rtstat; 153 154 static int rttrash; /* routes not in table but not freed */ 155 156 static struct pool rtentry_pool; 157 static struct pool rttimer_pool; 158 159 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 160 static struct workqueue *rt_timer_wq; 161 static struct work rt_timer_wk; 162 163 static void rt_timer_init(void); 164 static void rt_timer_queue_remove_all(struct rttimer_queue *); 165 static void rt_timer_remove_all(struct rtentry *); 166 static void rt_timer_timer(void *); 167 168 /* 169 * Locking notes: 170 * - The routing table is protected by a global rwlock 171 * - API: RT_RLOCK and friends 172 * - rtcaches are NOT protected by the framework 173 * - Callers must guarantee a rtcache isn't accessed simultaneously 174 * - How the constraint is guranteed in the wild 175 * - Protect a rtcache by a mutex (e.g., inp_route) 176 * - Make rtcache per-CPU and allow only accesses from softint 177 * (e.g., ipforward_rt_percpu) 178 * - References to a rtentry is managed by reference counting and psref 179 * - Reference couting is used for temporal reference when a rtentry 180 * is fetched from the routing table 181 * - psref is used for temporal reference when a rtentry is fetched 182 * from a rtcache 183 * - struct route (rtcache) has struct psref, so we cannot obtain 184 * a reference twice on the same struct route 185 * - Befere destroying or updating a rtentry, we have to wait for 186 * all references left (see below for details) 187 * - APIs 188 * - An obtained rtentry via rtalloc1 or rtrequest* must be 189 * unreferenced by rt_unref 190 * - An obtained rtentry via rtcache_* must be unreferenced by 191 * rtcache_unref 192 * - TODO: once we get a lockless routing table, we should use only 193 * psref for rtentries 194 * - rtentry destruction 195 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 196 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 197 * has a responsibility to destroy the rtentry by itself by calling 198 * rt_free 199 * - If not, rtrequest itself does that 200 * - If rt_free is called in softint, the actual destruction routine is 201 * deferred to a workqueue 202 * - rtentry update 203 * - When updating a rtentry, RTF_UPDATING flag is set 204 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 205 * the routing table or a rtcache results in either of the following 206 * cases: 207 * - if the caller runs in softint, the caller fails to fetch 208 * - otherwise, the caller waits for the update completed and retries 209 * to fetch (probably succeed to fetch for the second time) 210 * - rtcache invalidation 211 * - There is a global generation counter that is incremented when 212 * any routes have been added or deleted 213 * - When a rtcache caches a rtentry into itself, it also stores 214 * a snapshot of the generation counter 215 * - If the snapshot equals to the global counter, the cache is valid, 216 * otherwise the cache is invalidated 217 */ 218 219 /* 220 * Global lock for the routing table. 221 */ 222 static krwlock_t rt_lock __cacheline_aligned; 223 #ifdef NET_MPSAFE 224 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 225 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 226 #define RT_UNLOCK() rw_exit(&rt_lock) 227 #define RT_LOCKED() rw_lock_held(&rt_lock) 228 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 229 #else 230 #define RT_RLOCK() do {} while (0) 231 #define RT_WLOCK() do {} while (0) 232 #define RT_UNLOCK() do {} while (0) 233 #define RT_LOCKED() false 234 #define RT_ASSERT_WLOCK() do {} while (0) 235 #endif 236 237 static uint64_t rtcache_generation; 238 239 /* 240 * mutex and cv that are used to wait for references to a rtentry left 241 * before updating the rtentry. 242 */ 243 static struct { 244 kmutex_t lock; 245 kcondvar_t cv; 246 bool ongoing; 247 const struct lwp *lwp; 248 } rt_update_global __cacheline_aligned; 249 250 /* 251 * A workqueue and stuff that are used to defer the destruction routine 252 * of rtentries. 253 */ 254 static struct { 255 struct workqueue *wq; 256 struct work wk; 257 kmutex_t lock; 258 SLIST_HEAD(, rtentry) queue; 259 bool enqueued; 260 } rt_free_global __cacheline_aligned; 261 262 /* psref for rtentry */ 263 static struct psref_class *rt_psref_class __read_mostly; 264 265 #ifdef RTFLUSH_DEBUG 266 static int _rtcache_debug = 0; 267 #endif /* RTFLUSH_DEBUG */ 268 269 static kauth_listener_t route_listener; 270 271 static int rtdeletemsg(struct rtentry *); 272 273 static void rt_maskedcopy(const struct sockaddr *, 274 struct sockaddr *, const struct sockaddr *); 275 276 static void rtcache_invalidate(void); 277 278 static void rt_ref(struct rtentry *); 279 280 static struct rtentry * 281 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 282 283 static void rtcache_ref(struct rtentry *, struct route *); 284 285 #ifdef NET_MPSAFE 286 static void rt_update_wait(void); 287 #endif 288 289 static bool rt_wait_ok(void); 290 static void rt_wait_refcnt(const char *, struct rtentry *, int); 291 static void rt_wait_psref(struct rtentry *); 292 293 #ifdef DDB 294 static void db_print_sa(const struct sockaddr *); 295 static void db_print_ifa(struct ifaddr *); 296 static int db_show_rtentry(struct rtentry *, void *); 297 #endif 298 299 #ifdef RTFLUSH_DEBUG 300 static void sysctl_net_rtcache_setup(struct sysctllog **); 301 static void 302 sysctl_net_rtcache_setup(struct sysctllog **clog) 303 { 304 const struct sysctlnode *rnode; 305 306 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 307 CTLTYPE_NODE, 308 "rtcache", SYSCTL_DESCR("Route cache related settings"), 309 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 310 return; 311 if (sysctl_createv(clog, 0, &rnode, &rnode, 312 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 313 "debug", SYSCTL_DESCR("Debug route caches"), 314 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 315 return; 316 } 317 #endif /* RTFLUSH_DEBUG */ 318 319 static inline void 320 rt_destroy(struct rtentry *rt) 321 { 322 if (rt->_rt_key != NULL) 323 sockaddr_free(rt->_rt_key); 324 if (rt->rt_gateway != NULL) 325 sockaddr_free(rt->rt_gateway); 326 if (rt_gettag(rt) != NULL) 327 sockaddr_free(rt_gettag(rt)); 328 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 329 } 330 331 static inline const struct sockaddr * 332 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 333 { 334 if (rt->_rt_key == key) 335 goto out; 336 337 if (rt->_rt_key != NULL) 338 sockaddr_free(rt->_rt_key); 339 rt->_rt_key = sockaddr_dup(key, flags); 340 out: 341 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 342 return rt->_rt_key; 343 } 344 345 struct ifaddr * 346 rt_get_ifa(struct rtentry *rt) 347 { 348 struct ifaddr *ifa; 349 350 if ((ifa = rt->rt_ifa) == NULL) 351 return ifa; 352 else if (ifa->ifa_getifa == NULL) 353 return ifa; 354 #if 0 355 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 356 return ifa; 357 #endif 358 else { 359 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 360 if (ifa == NULL) 361 return NULL; 362 rt_replace_ifa(rt, ifa); 363 return ifa; 364 } 365 } 366 367 static void 368 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 369 { 370 rt->rt_ifa = ifa; 371 if (ifa->ifa_seqno != NULL) 372 rt->rt_ifa_seqno = *ifa->ifa_seqno; 373 } 374 375 /* 376 * Is this route the connected route for the ifa? 377 */ 378 static int 379 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 380 { 381 const struct sockaddr *key, *dst, *odst; 382 struct sockaddr_storage maskeddst; 383 384 key = rt_getkey(rt); 385 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 386 if (dst == NULL || 387 dst->sa_family != key->sa_family || 388 dst->sa_len != key->sa_len) 389 return 0; 390 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 391 odst = dst; 392 dst = (struct sockaddr *)&maskeddst; 393 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 394 ifa->ifa_netmask); 395 } 396 return (memcmp(dst, key, dst->sa_len) == 0); 397 } 398 399 void 400 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 401 { 402 if (rt->rt_ifa && 403 rt->rt_ifa != ifa && 404 rt->rt_ifa->ifa_flags & IFA_ROUTE && 405 rt_ifa_connected(rt, rt->rt_ifa)) 406 { 407 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 408 "replace deleted IFA_ROUTE\n", 409 (void *)rt->_rt_key, (void *)rt->rt_ifa); 410 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 411 if (rt_ifa_connected(rt, ifa)) { 412 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 413 "replace added IFA_ROUTE\n", 414 (void *)rt->_rt_key, (void *)ifa); 415 ifa->ifa_flags |= IFA_ROUTE; 416 } 417 } 418 419 ifaref(ifa); 420 ifafree(rt->rt_ifa); 421 rt_set_ifa1(rt, ifa); 422 } 423 424 static void 425 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 426 { 427 ifaref(ifa); 428 rt_set_ifa1(rt, ifa); 429 } 430 431 static int 432 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 433 void *arg0, void *arg1, void *arg2, void *arg3) 434 { 435 struct rt_msghdr *rtm; 436 int result; 437 438 result = KAUTH_RESULT_DEFER; 439 rtm = arg1; 440 441 if (action != KAUTH_NETWORK_ROUTE) 442 return result; 443 444 if (rtm->rtm_type == RTM_GET) 445 result = KAUTH_RESULT_ALLOW; 446 447 return result; 448 } 449 450 static void rt_free_work(struct work *, void *); 451 452 void 453 rt_init(void) 454 { 455 int error; 456 457 #ifdef RTFLUSH_DEBUG 458 sysctl_net_rtcache_setup(NULL); 459 #endif 460 461 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 462 SLIST_INIT(&rt_free_global.queue); 463 rt_free_global.enqueued = false; 464 465 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 466 467 error = workqueue_create(&rt_free_global.wq, "rt_free", 468 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 469 if (error) 470 panic("%s: workqueue_create failed (%d)\n", __func__, error); 471 472 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 473 cv_init(&rt_update_global.cv, "rt_update"); 474 475 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 476 NULL, IPL_SOFTNET); 477 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 478 NULL, IPL_SOFTNET); 479 480 rn_init(); /* initialize all zeroes, all ones, mask table */ 481 rtbl_init(); 482 483 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 484 route_listener_cb, NULL); 485 } 486 487 static void 488 rtcache_invalidate(void) 489 { 490 491 RT_ASSERT_WLOCK(); 492 493 if (rtcache_debug()) 494 printf("%s: enter\n", __func__); 495 496 rtcache_generation++; 497 } 498 499 #ifdef RT_DEBUG 500 static void 501 dump_rt(const struct rtentry *rt) 502 { 503 char buf[512]; 504 505 aprint_normal("rt: "); 506 aprint_normal("p=%p ", rt); 507 if (rt->_rt_key == NULL) { 508 aprint_normal("dst=(NULL) "); 509 } else { 510 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 511 aprint_normal("dst=%s ", buf); 512 } 513 if (rt->rt_gateway == NULL) { 514 aprint_normal("gw=(NULL) "); 515 } else { 516 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 517 aprint_normal("gw=%s ", buf); 518 } 519 aprint_normal("flags=%x ", rt->rt_flags); 520 if (rt->rt_ifp == NULL) { 521 aprint_normal("if=(NULL) "); 522 } else { 523 aprint_normal("if=%s ", rt->rt_ifp->if_xname); 524 } 525 aprint_normal("\n"); 526 } 527 #endif /* RT_DEBUG */ 528 529 /* 530 * Packet routing routines. If success, refcnt of a returned rtentry 531 * will be incremented. The caller has to rtfree it by itself. 532 */ 533 struct rtentry * 534 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 535 bool wlock) 536 { 537 rtbl_t *rtbl; 538 struct rtentry *rt; 539 int s; 540 541 #ifdef NET_MPSAFE 542 retry: 543 #endif 544 s = splsoftnet(); 545 rtbl = rt_gettable(dst->sa_family); 546 if (rtbl == NULL) 547 goto miss; 548 549 rt = rt_matchaddr(rtbl, dst); 550 if (rt == NULL) 551 goto miss; 552 553 if (!ISSET(rt->rt_flags, RTF_UP)) 554 goto miss; 555 556 #ifdef NET_MPSAFE 557 if (ISSET(rt->rt_flags, RTF_UPDATING) && 558 /* XXX updater should be always able to acquire */ 559 curlwp != rt_update_global.lwp) { 560 if (!wait_ok || !rt_wait_ok()) 561 goto miss; 562 RT_UNLOCK(); 563 splx(s); 564 565 /* We can wait until the update is complete */ 566 rt_update_wait(); 567 568 if (wlock) 569 RT_WLOCK(); 570 else 571 RT_RLOCK(); 572 goto retry; 573 } 574 #endif /* NET_MPSAFE */ 575 576 rt_ref(rt); 577 RT_REFCNT_TRACE(rt); 578 579 splx(s); 580 return rt; 581 miss: 582 rtstat.rts_unreach++; 583 if (report) { 584 struct rt_addrinfo info; 585 586 memset(&info, 0, sizeof(info)); 587 info.rti_info[RTAX_DST] = dst; 588 rt_missmsg(RTM_MISS, &info, 0, 0); 589 } 590 splx(s); 591 return NULL; 592 } 593 594 struct rtentry * 595 rtalloc1(const struct sockaddr *dst, int report) 596 { 597 struct rtentry *rt; 598 599 RT_RLOCK(); 600 rt = rtalloc1_locked(dst, report, true, false); 601 RT_UNLOCK(); 602 603 return rt; 604 } 605 606 static void 607 rt_ref(struct rtentry *rt) 608 { 609 610 KASSERT(rt->rt_refcnt >= 0); 611 atomic_inc_uint(&rt->rt_refcnt); 612 } 613 614 void 615 rt_unref(struct rtentry *rt) 616 { 617 618 KASSERT(rt != NULL); 619 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 620 621 atomic_dec_uint(&rt->rt_refcnt); 622 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 623 mutex_enter(&rt_free_global.lock); 624 cv_broadcast(&rt->rt_cv); 625 mutex_exit(&rt_free_global.lock); 626 } 627 } 628 629 static bool 630 rt_wait_ok(void) 631 { 632 633 KASSERT(!cpu_intr_p()); 634 return !cpu_softintr_p(); 635 } 636 637 void 638 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 639 { 640 mutex_enter(&rt_free_global.lock); 641 while (rt->rt_refcnt > cnt) { 642 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 643 __func__, title, rt->rt_refcnt); 644 cv_wait(&rt->rt_cv, &rt_free_global.lock); 645 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 646 __func__, title, rt->rt_refcnt); 647 } 648 mutex_exit(&rt_free_global.lock); 649 } 650 651 void 652 rt_wait_psref(struct rtentry *rt) 653 { 654 655 psref_target_destroy(&rt->rt_psref, rt_psref_class); 656 psref_target_init(&rt->rt_psref, rt_psref_class); 657 } 658 659 static void 660 _rt_free(struct rtentry *rt) 661 { 662 struct ifaddr *ifa; 663 664 /* 665 * Need to avoid a deadlock on rt_wait_refcnt of update 666 * and a conflict on psref_target_destroy of update. 667 */ 668 #ifdef NET_MPSAFE 669 rt_update_wait(); 670 #endif 671 672 RT_REFCNT_TRACE(rt); 673 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 674 rt_wait_refcnt("free", rt, 0); 675 #ifdef NET_MPSAFE 676 psref_target_destroy(&rt->rt_psref, rt_psref_class); 677 #endif 678 679 rt_assert_inactive(rt); 680 rttrash--; 681 ifa = rt->rt_ifa; 682 rt->rt_ifa = NULL; 683 ifafree(ifa); 684 rt->rt_ifp = NULL; 685 cv_destroy(&rt->rt_cv); 686 rt_destroy(rt); 687 pool_put(&rtentry_pool, rt); 688 } 689 690 static void 691 rt_free_work(struct work *wk, void *arg) 692 { 693 694 for (;;) { 695 struct rtentry *rt; 696 697 mutex_enter(&rt_free_global.lock); 698 rt_free_global.enqueued = false; 699 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) { 700 mutex_exit(&rt_free_global.lock); 701 return; 702 } 703 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free); 704 mutex_exit(&rt_free_global.lock); 705 atomic_dec_uint(&rt->rt_refcnt); 706 _rt_free(rt); 707 } 708 } 709 710 void 711 rt_free(struct rtentry *rt) 712 { 713 714 KASSERT(rt->rt_refcnt > 0); 715 if (rt_wait_ok()) { 716 atomic_dec_uint(&rt->rt_refcnt); 717 _rt_free(rt); 718 return; 719 } 720 721 mutex_enter(&rt_free_global.lock); 722 rt_ref(rt); 723 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free); 724 if (!rt_free_global.enqueued) { 725 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 726 rt_free_global.enqueued = true; 727 } 728 mutex_exit(&rt_free_global.lock); 729 } 730 731 #ifdef NET_MPSAFE 732 static void 733 rt_update_wait(void) 734 { 735 736 mutex_enter(&rt_update_global.lock); 737 while (rt_update_global.ongoing) { 738 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 739 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 740 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 741 } 742 mutex_exit(&rt_update_global.lock); 743 } 744 #endif 745 746 int 747 rt_update_prepare(struct rtentry *rt) 748 { 749 750 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 751 752 RT_WLOCK(); 753 /* If the entry is being destroyed, don't proceed the update. */ 754 if (!ISSET(rt->rt_flags, RTF_UP)) { 755 RT_UNLOCK(); 756 return ESRCH; 757 } 758 rt->rt_flags |= RTF_UPDATING; 759 RT_UNLOCK(); 760 761 mutex_enter(&rt_update_global.lock); 762 while (rt_update_global.ongoing) { 763 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 764 __func__, rt, curlwp); 765 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 766 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 767 __func__, rt, curlwp); 768 } 769 rt_update_global.ongoing = true; 770 /* XXX need it to avoid rt_update_wait by updater itself. */ 771 rt_update_global.lwp = curlwp; 772 mutex_exit(&rt_update_global.lock); 773 774 rt_wait_refcnt("update", rt, 1); 775 rt_wait_psref(rt); 776 777 return 0; 778 } 779 780 void 781 rt_update_finish(struct rtentry *rt) 782 { 783 784 RT_WLOCK(); 785 rt->rt_flags &= ~RTF_UPDATING; 786 RT_UNLOCK(); 787 788 mutex_enter(&rt_update_global.lock); 789 rt_update_global.ongoing = false; 790 rt_update_global.lwp = NULL; 791 cv_broadcast(&rt_update_global.cv); 792 mutex_exit(&rt_update_global.lock); 793 794 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 795 } 796 797 /* 798 * Force a routing table entry to the specified 799 * destination to go through the given gateway. 800 * Normally called as a result of a routing redirect 801 * message from the network layer. 802 * 803 * N.B.: must be called at splsoftnet 804 */ 805 void 806 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 807 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 808 struct rtentry **rtp) 809 { 810 struct rtentry *rt; 811 int error = 0; 812 uint64_t *stat = NULL; 813 struct rt_addrinfo info; 814 struct ifaddr *ifa; 815 struct psref psref; 816 817 /* verify the gateway is directly reachable */ 818 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 819 error = ENETUNREACH; 820 goto out; 821 } 822 rt = rtalloc1(dst, 0); 823 /* 824 * If the redirect isn't from our current router for this dst, 825 * it's either old or wrong. If it redirects us to ourselves, 826 * we have a routing loop, perhaps as a result of an interface 827 * going down recently. 828 */ 829 if (!(flags & RTF_DONE) && rt && 830 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 831 error = EINVAL; 832 else { 833 int s = pserialize_read_enter(); 834 struct ifaddr *_ifa; 835 836 _ifa = ifa_ifwithaddr(gateway); 837 if (_ifa != NULL) 838 error = EHOSTUNREACH; 839 pserialize_read_exit(s); 840 } 841 if (error) 842 goto done; 843 /* 844 * Create a new entry if we just got back a wildcard entry 845 * or the lookup failed. This is necessary for hosts 846 * which use routing redirects generated by smart gateways 847 * to dynamically build the routing tables. 848 */ 849 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 850 goto create; 851 /* 852 * Don't listen to the redirect if it's 853 * for a route to an interface. 854 */ 855 if (rt->rt_flags & RTF_GATEWAY) { 856 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 857 /* 858 * Changing from route to net => route to host. 859 * Create new route, rather than smashing route to net. 860 */ 861 create: 862 if (rt != NULL) 863 rt_unref(rt); 864 flags |= RTF_GATEWAY | RTF_DYNAMIC; 865 memset(&info, 0, sizeof(info)); 866 info.rti_info[RTAX_DST] = dst; 867 info.rti_info[RTAX_GATEWAY] = gateway; 868 info.rti_info[RTAX_NETMASK] = netmask; 869 info.rti_ifa = ifa; 870 info.rti_flags = flags; 871 rt = NULL; 872 error = rtrequest1(RTM_ADD, &info, &rt); 873 if (rt != NULL) 874 flags = rt->rt_flags; 875 stat = &rtstat.rts_dynamic; 876 } else { 877 /* 878 * Smash the current notion of the gateway to 879 * this destination. Should check about netmask!!! 880 */ 881 #ifdef NET_MPSAFE 882 KASSERT(!cpu_softintr_p()); 883 884 error = rt_update_prepare(rt); 885 if (error == 0) { 886 #endif 887 error = rt_setgate(rt, gateway); 888 if (error == 0) { 889 rt->rt_flags |= RTF_MODIFIED; 890 flags |= RTF_MODIFIED; 891 } 892 #ifdef NET_MPSAFE 893 rt_update_finish(rt); 894 } else { 895 /* 896 * If error != 0, the rtentry is being 897 * destroyed, so doing nothing doesn't 898 * matter. 899 */ 900 } 901 #endif 902 stat = &rtstat.rts_newgateway; 903 } 904 } else 905 error = EHOSTUNREACH; 906 done: 907 if (rt) { 908 if (rtp != NULL && !error) 909 *rtp = rt; 910 else 911 rt_unref(rt); 912 } 913 out: 914 if (error) 915 rtstat.rts_badredirect++; 916 else if (stat != NULL) 917 (*stat)++; 918 memset(&info, 0, sizeof(info)); 919 info.rti_info[RTAX_DST] = dst; 920 info.rti_info[RTAX_GATEWAY] = gateway; 921 info.rti_info[RTAX_NETMASK] = netmask; 922 info.rti_info[RTAX_AUTHOR] = src; 923 rt_missmsg(RTM_REDIRECT, &info, flags, error); 924 ifa_release(ifa, &psref); 925 } 926 927 /* 928 * Delete a route and generate a message. 929 * It doesn't free a passed rt. 930 */ 931 static int 932 rtdeletemsg(struct rtentry *rt) 933 { 934 int error; 935 struct rt_addrinfo info; 936 struct rtentry *retrt; 937 938 /* 939 * Request the new route so that the entry is not actually 940 * deleted. That will allow the information being reported to 941 * be accurate (and consistent with route_output()). 942 */ 943 memset(&info, 0, sizeof(info)); 944 info.rti_info[RTAX_DST] = rt_getkey(rt); 945 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 946 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 947 info.rti_flags = rt->rt_flags; 948 error = rtrequest1(RTM_DELETE, &info, &retrt); 949 950 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 951 952 return error; 953 } 954 955 struct ifaddr * 956 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 957 const struct sockaddr *gateway, struct psref *psref) 958 { 959 struct ifaddr *ifa = NULL; 960 961 if ((flags & RTF_GATEWAY) == 0) { 962 /* 963 * If we are adding a route to an interface, 964 * and the interface is a pt to pt link 965 * we should search for the destination 966 * as our clue to the interface. Otherwise 967 * we can use the local address. 968 */ 969 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 970 ifa = ifa_ifwithdstaddr_psref(dst, psref); 971 if (ifa == NULL) 972 ifa = ifa_ifwithaddr_psref(gateway, psref); 973 } else { 974 /* 975 * If we are adding a route to a remote net 976 * or host, the gateway may still be on the 977 * other end of a pt to pt link. 978 */ 979 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 980 } 981 if (ifa == NULL) 982 ifa = ifa_ifwithnet_psref(gateway, psref); 983 if (ifa == NULL) { 984 int s; 985 struct rtentry *rt; 986 987 /* XXX we cannot call rtalloc1 if holding the rt lock */ 988 if (RT_LOCKED()) 989 rt = rtalloc1_locked(gateway, 0, true, true); 990 else 991 rt = rtalloc1(gateway, 0); 992 if (rt == NULL) 993 return NULL; 994 if (rt->rt_flags & RTF_GATEWAY) { 995 rt_unref(rt); 996 return NULL; 997 } 998 /* 999 * Just in case. May not need to do this workaround. 1000 * Revisit when working on rtentry MP-ification. 1001 */ 1002 s = pserialize_read_enter(); 1003 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1004 if (ifa == rt->rt_ifa) 1005 break; 1006 } 1007 if (ifa != NULL) 1008 ifa_acquire(ifa, psref); 1009 pserialize_read_exit(s); 1010 rt_unref(rt); 1011 if (ifa == NULL) 1012 return NULL; 1013 } 1014 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1015 struct ifaddr *nifa; 1016 int s; 1017 1018 s = pserialize_read_enter(); 1019 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1020 if (nifa != NULL) { 1021 ifa_release(ifa, psref); 1022 ifa_acquire(nifa, psref); 1023 ifa = nifa; 1024 } 1025 pserialize_read_exit(s); 1026 } 1027 return ifa; 1028 } 1029 1030 /* 1031 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1032 * The caller has to rtfree it by itself. 1033 */ 1034 int 1035 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1036 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1037 { 1038 struct rt_addrinfo info; 1039 1040 memset(&info, 0, sizeof(info)); 1041 info.rti_flags = flags; 1042 info.rti_info[RTAX_DST] = dst; 1043 info.rti_info[RTAX_GATEWAY] = gateway; 1044 info.rti_info[RTAX_NETMASK] = netmask; 1045 return rtrequest1(req, &info, ret_nrt); 1046 } 1047 1048 /* 1049 * It's a utility function to add/remove a route to/from the routing table 1050 * and tell user processes the addition/removal on success. 1051 */ 1052 int 1053 rtrequest_newmsg(const int req, const struct sockaddr *dst, 1054 const struct sockaddr *gateway, const struct sockaddr *netmask, 1055 const int flags) 1056 { 1057 int error; 1058 struct rtentry *ret_nrt = NULL; 1059 1060 KASSERT(req == RTM_ADD || req == RTM_DELETE); 1061 1062 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt); 1063 if (error != 0) 1064 return error; 1065 1066 KASSERT(ret_nrt != NULL); 1067 1068 rt_newmsg(req, ret_nrt); /* tell user process */ 1069 if (req == RTM_DELETE) 1070 rt_free(ret_nrt); 1071 else 1072 rt_unref(ret_nrt); 1073 1074 return 0; 1075 } 1076 1077 struct ifnet * 1078 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1079 { 1080 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1081 1082 if (info->rti_ifp != NULL) 1083 return NULL; 1084 /* 1085 * ifp may be specified by sockaddr_dl when protocol address 1086 * is ambiguous 1087 */ 1088 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1089 struct ifaddr *ifa; 1090 int s = pserialize_read_enter(); 1091 1092 ifa = ifa_ifwithnet(ifpaddr); 1093 if (ifa != NULL) 1094 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1095 psref); 1096 pserialize_read_exit(s); 1097 } 1098 1099 return info->rti_ifp; 1100 } 1101 1102 struct ifaddr * 1103 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1104 { 1105 struct ifaddr *ifa = NULL; 1106 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1107 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1108 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1109 int flags = info->rti_flags; 1110 const struct sockaddr *sa; 1111 1112 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1113 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1114 if (ifa != NULL) 1115 goto got; 1116 } 1117 1118 sa = ifaaddr != NULL ? ifaaddr : 1119 (gateway != NULL ? gateway : dst); 1120 if (sa != NULL && info->rti_ifp != NULL) 1121 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1122 else if (dst != NULL && gateway != NULL) 1123 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1124 else if (sa != NULL) 1125 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1126 if (ifa == NULL) 1127 return NULL; 1128 got: 1129 if (ifa->ifa_getifa != NULL) { 1130 /* FIXME ifa_getifa is NOMPSAFE */ 1131 ifa = (*ifa->ifa_getifa)(ifa, dst); 1132 if (ifa == NULL) 1133 return NULL; 1134 ifa_acquire(ifa, psref); 1135 } 1136 info->rti_ifa = ifa; 1137 if (info->rti_ifp == NULL) 1138 info->rti_ifp = ifa->ifa_ifp; 1139 return ifa; 1140 } 1141 1142 /* 1143 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1144 * The caller has to rtfree it by itself. 1145 */ 1146 int 1147 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1148 { 1149 int s = splsoftnet(), ss; 1150 int error = 0, rc; 1151 struct rtentry *rt; 1152 rtbl_t *rtbl; 1153 struct ifaddr *ifa = NULL; 1154 struct sockaddr_storage maskeddst; 1155 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1156 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1157 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1158 int flags = info->rti_flags; 1159 struct psref psref_ifp, psref_ifa; 1160 int bound = 0; 1161 struct ifnet *ifp = NULL; 1162 bool need_to_release_ifa = true; 1163 bool need_unlock = true; 1164 #define senderr(x) { error = x ; goto bad; } 1165 1166 RT_WLOCK(); 1167 1168 bound = curlwp_bind(); 1169 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1170 senderr(ESRCH); 1171 if (flags & RTF_HOST) 1172 netmask = NULL; 1173 switch (req) { 1174 case RTM_DELETE: 1175 if (netmask) { 1176 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1177 netmask); 1178 dst = (struct sockaddr *)&maskeddst; 1179 } 1180 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1181 senderr(ESRCH); 1182 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1183 senderr(ESRCH); 1184 rt->rt_flags &= ~RTF_UP; 1185 if ((ifa = rt->rt_ifa)) { 1186 if (ifa->ifa_flags & IFA_ROUTE && 1187 rt_ifa_connected(rt, ifa)) { 1188 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1189 "deleted IFA_ROUTE\n", 1190 (void *)rt->_rt_key, (void *)ifa); 1191 ifa->ifa_flags &= ~IFA_ROUTE; 1192 } 1193 if (ifa->ifa_rtrequest) 1194 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1195 ifa = NULL; 1196 } 1197 rttrash++; 1198 if (ret_nrt) { 1199 *ret_nrt = rt; 1200 rt_ref(rt); 1201 RT_REFCNT_TRACE(rt); 1202 } 1203 rtcache_invalidate(); 1204 RT_UNLOCK(); 1205 need_unlock = false; 1206 rt_timer_remove_all(rt); 1207 #if defined(INET) || defined(INET6) 1208 if (netmask != NULL) 1209 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1210 #endif 1211 if (ret_nrt == NULL) { 1212 /* Adjust the refcount */ 1213 rt_ref(rt); 1214 RT_REFCNT_TRACE(rt); 1215 rt_free(rt); 1216 } 1217 break; 1218 1219 case RTM_ADD: 1220 if (info->rti_ifa == NULL) { 1221 ifp = rt_getifp(info, &psref_ifp); 1222 ifa = rt_getifa(info, &psref_ifa); 1223 if (ifa == NULL) 1224 senderr(ENETUNREACH); 1225 } else { 1226 /* Caller should have a reference of ifa */ 1227 ifa = info->rti_ifa; 1228 need_to_release_ifa = false; 1229 } 1230 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1231 if (rt == NULL) 1232 senderr(ENOBUFS); 1233 memset(rt, 0, sizeof(*rt)); 1234 rt->rt_flags = RTF_UP | flags; 1235 LIST_INIT(&rt->rt_timer); 1236 1237 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1238 if (netmask) { 1239 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1240 netmask); 1241 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1242 } else { 1243 rt_setkey(rt, dst, M_NOWAIT); 1244 } 1245 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1246 if (rt_getkey(rt) == NULL || 1247 rt_setgate(rt, gateway) != 0) { 1248 pool_put(&rtentry_pool, rt); 1249 senderr(ENOBUFS); 1250 } 1251 1252 rt_set_ifa(rt, ifa); 1253 if (info->rti_info[RTAX_TAG] != NULL) { 1254 const struct sockaddr *tag; 1255 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1256 if (tag == NULL) 1257 senderr(ENOBUFS); 1258 } 1259 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1260 1261 ss = pserialize_read_enter(); 1262 if (info->rti_info[RTAX_IFP] != NULL) { 1263 struct ifaddr *ifa2; 1264 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1265 if (ifa2 != NULL) 1266 rt->rt_ifp = ifa2->ifa_ifp; 1267 else 1268 rt->rt_ifp = ifa->ifa_ifp; 1269 } else 1270 rt->rt_ifp = ifa->ifa_ifp; 1271 pserialize_read_exit(ss); 1272 cv_init(&rt->rt_cv, "rtentry"); 1273 psref_target_init(&rt->rt_psref, rt_psref_class); 1274 1275 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1276 rc = rt_addaddr(rtbl, rt, netmask); 1277 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1278 if (rc != 0) { 1279 ifafree(ifa); /* for rt_set_ifa above */ 1280 cv_destroy(&rt->rt_cv); 1281 rt_destroy(rt); 1282 pool_put(&rtentry_pool, rt); 1283 senderr(rc); 1284 } 1285 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1286 if (ifa->ifa_rtrequest) 1287 ifa->ifa_rtrequest(req, rt, info); 1288 if (need_to_release_ifa) 1289 ifa_release(ifa, &psref_ifa); 1290 ifa = NULL; 1291 if_put(ifp, &psref_ifp); 1292 ifp = NULL; 1293 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1294 if (ret_nrt) { 1295 *ret_nrt = rt; 1296 rt_ref(rt); 1297 RT_REFCNT_TRACE(rt); 1298 } 1299 rtcache_invalidate(); 1300 RT_UNLOCK(); 1301 need_unlock = false; 1302 break; 1303 case RTM_GET: 1304 if (netmask != NULL) { 1305 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1306 netmask); 1307 dst = (struct sockaddr *)&maskeddst; 1308 } 1309 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1310 senderr(ESRCH); 1311 if (ret_nrt != NULL) { 1312 *ret_nrt = rt; 1313 rt_ref(rt); 1314 RT_REFCNT_TRACE(rt); 1315 } 1316 break; 1317 } 1318 bad: 1319 if (need_to_release_ifa) 1320 ifa_release(ifa, &psref_ifa); 1321 if_put(ifp, &psref_ifp); 1322 curlwp_bindx(bound); 1323 if (need_unlock) 1324 RT_UNLOCK(); 1325 splx(s); 1326 return error; 1327 } 1328 1329 int 1330 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1331 { 1332 struct sockaddr *new, *old; 1333 1334 KASSERT(rt->_rt_key != NULL); 1335 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1336 1337 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1338 if (new == NULL) 1339 return ENOMEM; 1340 1341 old = rt->rt_gateway; 1342 rt->rt_gateway = new; 1343 if (old != NULL) 1344 sockaddr_free(old); 1345 1346 KASSERT(rt->_rt_key != NULL); 1347 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1348 1349 if (rt->rt_flags & RTF_GATEWAY) { 1350 struct rtentry *gwrt; 1351 1352 /* XXX we cannot call rtalloc1 if holding the rt lock */ 1353 if (RT_LOCKED()) 1354 gwrt = rtalloc1_locked(gate, 1, false, true); 1355 else 1356 gwrt = rtalloc1(gate, 1); 1357 /* 1358 * If we switched gateways, grab the MTU from the new 1359 * gateway route if the current MTU, if the current MTU is 1360 * greater than the MTU of gateway. 1361 * Note that, if the MTU of gateway is 0, we will reset the 1362 * MTU of the route to run PMTUD again from scratch. XXX 1363 */ 1364 if (gwrt != NULL) { 1365 KASSERT(gwrt->_rt_key != NULL); 1366 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1367 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1368 rt->rt_rmx.rmx_mtu && 1369 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1370 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1371 } 1372 rt_unref(gwrt); 1373 } 1374 } 1375 KASSERT(rt->_rt_key != NULL); 1376 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1377 return 0; 1378 } 1379 1380 static void 1381 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1382 const struct sockaddr *netmask) 1383 { 1384 const char *netmaskp = &netmask->sa_data[0], 1385 *srcp = &src->sa_data[0]; 1386 char *dstp = &dst->sa_data[0]; 1387 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1388 const char *srcend = (char *)dst + src->sa_len; 1389 1390 dst->sa_len = src->sa_len; 1391 dst->sa_family = src->sa_family; 1392 1393 while (dstp < maskend) 1394 *dstp++ = *srcp++ & *netmaskp++; 1395 if (dstp < srcend) 1396 memset(dstp, 0, (size_t)(srcend - dstp)); 1397 } 1398 1399 /* 1400 * Inform the routing socket of a route change. 1401 */ 1402 void 1403 rt_newmsg(const int cmd, const struct rtentry *rt) 1404 { 1405 struct rt_addrinfo info; 1406 1407 memset((void *)&info, 0, sizeof(info)); 1408 info.rti_info[RTAX_DST] = rt_getkey(rt); 1409 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1410 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1411 if (rt->rt_ifp) { 1412 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1413 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1414 } 1415 1416 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1417 } 1418 1419 /* 1420 * Set up or tear down a routing table entry, normally 1421 * for an interface. 1422 */ 1423 int 1424 rtinit(struct ifaddr *ifa, int cmd, int flags) 1425 { 1426 struct rtentry *rt; 1427 struct sockaddr *dst, *odst; 1428 struct sockaddr_storage maskeddst; 1429 struct rtentry *nrt = NULL; 1430 int error; 1431 struct rt_addrinfo info; 1432 1433 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1434 if (cmd == RTM_DELETE) { 1435 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1436 /* Delete subnet route for this interface */ 1437 odst = dst; 1438 dst = (struct sockaddr *)&maskeddst; 1439 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1440 } 1441 if ((rt = rtalloc1(dst, 0)) != NULL) { 1442 if (rt->rt_ifa != ifa) { 1443 rt_unref(rt); 1444 return (flags & RTF_HOST) ? EHOSTUNREACH 1445 : ENETUNREACH; 1446 } 1447 rt_unref(rt); 1448 } 1449 } 1450 memset(&info, 0, sizeof(info)); 1451 info.rti_ifa = ifa; 1452 info.rti_flags = flags | ifa->ifa_flags; 1453 info.rti_info[RTAX_DST] = dst; 1454 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1455 1456 /* 1457 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1458 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1459 * variable) when RTF_HOST is 1. still not sure if i can safely 1460 * change it to meet bsdi4 behavior. 1461 */ 1462 if (cmd != RTM_LLINFO_UPD) 1463 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1464 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1465 &nrt); 1466 if (error != 0) 1467 return error; 1468 1469 rt = nrt; 1470 RT_REFCNT_TRACE(rt); 1471 switch (cmd) { 1472 case RTM_DELETE: 1473 rt_newmsg(cmd, rt); 1474 rt_free(rt); 1475 break; 1476 case RTM_LLINFO_UPD: 1477 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1478 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1479 rt_newmsg(RTM_CHANGE, rt); 1480 rt_unref(rt); 1481 break; 1482 case RTM_ADD: 1483 /* 1484 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest 1485 * called via rtrequest1. Can we just prevent the replacement 1486 * somehow and remove the following code? And also doesn't 1487 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again? 1488 */ 1489 if (rt->rt_ifa != ifa) { 1490 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa, 1491 rt->rt_ifa); 1492 #ifdef NET_MPSAFE 1493 KASSERT(!cpu_softintr_p()); 1494 1495 error = rt_update_prepare(rt); 1496 if (error == 0) { 1497 #endif 1498 if (rt->rt_ifa->ifa_rtrequest != NULL) { 1499 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, 1500 rt, &info); 1501 } 1502 rt_replace_ifa(rt, ifa); 1503 rt->rt_ifp = ifa->ifa_ifp; 1504 if (ifa->ifa_rtrequest != NULL) 1505 ifa->ifa_rtrequest(RTM_ADD, rt, &info); 1506 #ifdef NET_MPSAFE 1507 rt_update_finish(rt); 1508 } else { 1509 /* 1510 * If error != 0, the rtentry is being 1511 * destroyed, so doing nothing doesn't 1512 * matter. 1513 */ 1514 } 1515 #endif 1516 } 1517 rt_newmsg(cmd, rt); 1518 rt_unref(rt); 1519 RT_REFCNT_TRACE(rt); 1520 break; 1521 } 1522 return error; 1523 } 1524 1525 /* 1526 * Create a local route entry for the address. 1527 * Announce the addition of the address and the route to the routing socket. 1528 */ 1529 int 1530 rt_ifa_addlocal(struct ifaddr *ifa) 1531 { 1532 struct rtentry *rt; 1533 int e; 1534 1535 /* If there is no loopback entry, allocate one. */ 1536 rt = rtalloc1(ifa->ifa_addr, 0); 1537 #ifdef RT_DEBUG 1538 if (rt != NULL) 1539 dump_rt(rt); 1540 #endif 1541 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1542 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1543 { 1544 struct rt_addrinfo info; 1545 struct rtentry *nrt; 1546 1547 memset(&info, 0, sizeof(info)); 1548 info.rti_flags = RTF_HOST | RTF_LOCAL; 1549 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1550 info.rti_info[RTAX_GATEWAY] = 1551 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1552 info.rti_ifa = ifa; 1553 nrt = NULL; 1554 e = rtrequest1(RTM_ADD, &info, &nrt); 1555 if (nrt && ifa != nrt->rt_ifa) 1556 rt_replace_ifa(nrt, ifa); 1557 rt_newaddrmsg(RTM_ADD, ifa, e, nrt); 1558 if (nrt != NULL) { 1559 #ifdef RT_DEBUG 1560 dump_rt(nrt); 1561 #endif 1562 rt_unref(nrt); 1563 RT_REFCNT_TRACE(nrt); 1564 } 1565 } else { 1566 e = 0; 1567 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL); 1568 } 1569 if (rt != NULL) 1570 rt_unref(rt); 1571 return e; 1572 } 1573 1574 /* 1575 * Remove the local route entry for the address. 1576 * Announce the removal of the address and the route to the routing socket. 1577 */ 1578 int 1579 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1580 { 1581 struct rtentry *rt; 1582 int e = 0; 1583 1584 rt = rtalloc1(ifa->ifa_addr, 0); 1585 1586 /* 1587 * Before deleting, check if a corresponding loopbacked 1588 * host route surely exists. With this check, we can avoid 1589 * deleting an interface direct route whose destination is 1590 * the same as the address being removed. This can happen 1591 * when removing a subnet-router anycast address on an 1592 * interface attached to a shared medium. 1593 */ 1594 if (rt != NULL && 1595 (rt->rt_flags & RTF_HOST) && 1596 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1597 { 1598 /* If we cannot replace the route's ifaddr with the equivalent 1599 * ifaddr of another interface, I believe it is safest to 1600 * delete the route. 1601 */ 1602 if (alt_ifa == NULL) { 1603 e = rtdeletemsg(rt); 1604 if (e == 0) { 1605 rt_unref(rt); 1606 rt_free(rt); 1607 rt = NULL; 1608 } 1609 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1610 } else { 1611 rt_replace_ifa(rt, alt_ifa); 1612 rt_newmsg(RTM_CHANGE, rt); 1613 } 1614 } else 1615 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1616 if (rt != NULL) 1617 rt_unref(rt); 1618 return e; 1619 } 1620 1621 /* 1622 * Route timer routines. These routes allow functions to be called 1623 * for various routes at any time. This is useful in supporting 1624 * path MTU discovery and redirect route deletion. 1625 * 1626 * This is similar to some BSDI internal functions, but it provides 1627 * for multiple queues for efficiency's sake... 1628 */ 1629 1630 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1631 static int rt_init_done = 0; 1632 1633 /* 1634 * Some subtle order problems with domain initialization mean that 1635 * we cannot count on this being run from rt_init before various 1636 * protocol initializations are done. Therefore, we make sure 1637 * that this is run when the first queue is added... 1638 */ 1639 1640 static void rt_timer_work(struct work *, void *); 1641 1642 static void 1643 rt_timer_init(void) 1644 { 1645 int error; 1646 1647 assert(rt_init_done == 0); 1648 1649 /* XXX should be in rt_init */ 1650 rw_init(&rt_lock); 1651 1652 LIST_INIT(&rttimer_queue_head); 1653 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1654 error = workqueue_create(&rt_timer_wq, "rt_timer", 1655 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1656 if (error) 1657 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1658 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1659 rt_init_done = 1; 1660 } 1661 1662 struct rttimer_queue * 1663 rt_timer_queue_create(u_int timeout) 1664 { 1665 struct rttimer_queue *rtq; 1666 1667 if (rt_init_done == 0) 1668 rt_timer_init(); 1669 1670 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1671 if (rtq == NULL) 1672 return NULL; 1673 memset(rtq, 0, sizeof(*rtq)); 1674 1675 rtq->rtq_timeout = timeout; 1676 TAILQ_INIT(&rtq->rtq_head); 1677 RT_WLOCK(); 1678 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1679 RT_UNLOCK(); 1680 1681 return rtq; 1682 } 1683 1684 void 1685 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1686 { 1687 1688 rtq->rtq_timeout = timeout; 1689 } 1690 1691 static void 1692 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1693 { 1694 struct rttimer *r; 1695 1696 RT_ASSERT_WLOCK(); 1697 1698 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1699 LIST_REMOVE(r, rtt_link); 1700 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1701 rt_ref(r->rtt_rt); /* XXX */ 1702 RT_REFCNT_TRACE(r->rtt_rt); 1703 RT_UNLOCK(); 1704 (*r->rtt_func)(r->rtt_rt, r); 1705 pool_put(&rttimer_pool, r); 1706 RT_WLOCK(); 1707 if (rtq->rtq_count > 0) 1708 rtq->rtq_count--; 1709 else 1710 printf("rt_timer_queue_remove_all: " 1711 "rtq_count reached 0\n"); 1712 } 1713 } 1714 1715 void 1716 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1717 { 1718 1719 RT_WLOCK(); 1720 rt_timer_queue_remove_all(rtq); 1721 LIST_REMOVE(rtq, rtq_link); 1722 RT_UNLOCK(); 1723 1724 /* 1725 * Caller is responsible for freeing the rttimer_queue structure. 1726 */ 1727 } 1728 1729 unsigned long 1730 rt_timer_count(struct rttimer_queue *rtq) 1731 { 1732 return rtq->rtq_count; 1733 } 1734 1735 static void 1736 rt_timer_remove_all(struct rtentry *rt) 1737 { 1738 struct rttimer *r; 1739 1740 RT_WLOCK(); 1741 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1742 LIST_REMOVE(r, rtt_link); 1743 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1744 if (r->rtt_queue->rtq_count > 0) 1745 r->rtt_queue->rtq_count--; 1746 else 1747 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1748 pool_put(&rttimer_pool, r); 1749 } 1750 RT_UNLOCK(); 1751 } 1752 1753 int 1754 rt_timer_add(struct rtentry *rt, 1755 void (*func)(struct rtentry *, struct rttimer *), 1756 struct rttimer_queue *queue) 1757 { 1758 struct rttimer *r; 1759 1760 KASSERT(func != NULL); 1761 RT_WLOCK(); 1762 /* 1763 * If there's already a timer with this action, destroy it before 1764 * we add a new one. 1765 */ 1766 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1767 if (r->rtt_func == func) 1768 break; 1769 } 1770 if (r != NULL) { 1771 LIST_REMOVE(r, rtt_link); 1772 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1773 if (r->rtt_queue->rtq_count > 0) 1774 r->rtt_queue->rtq_count--; 1775 else 1776 printf("rt_timer_add: rtq_count reached 0\n"); 1777 } else { 1778 r = pool_get(&rttimer_pool, PR_NOWAIT); 1779 if (r == NULL) { 1780 RT_UNLOCK(); 1781 return ENOBUFS; 1782 } 1783 } 1784 1785 memset(r, 0, sizeof(*r)); 1786 1787 r->rtt_rt = rt; 1788 r->rtt_time = time_uptime; 1789 r->rtt_func = func; 1790 r->rtt_queue = queue; 1791 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1792 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1793 r->rtt_queue->rtq_count++; 1794 1795 RT_UNLOCK(); 1796 1797 return 0; 1798 } 1799 1800 static void 1801 rt_timer_work(struct work *wk, void *arg) 1802 { 1803 struct rttimer_queue *rtq; 1804 struct rttimer *r; 1805 1806 RT_WLOCK(); 1807 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1808 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1809 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1810 LIST_REMOVE(r, rtt_link); 1811 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1812 rt_ref(r->rtt_rt); /* XXX */ 1813 RT_REFCNT_TRACE(r->rtt_rt); 1814 RT_UNLOCK(); 1815 (*r->rtt_func)(r->rtt_rt, r); 1816 pool_put(&rttimer_pool, r); 1817 RT_WLOCK(); 1818 if (rtq->rtq_count > 0) 1819 rtq->rtq_count--; 1820 else 1821 printf("rt_timer_timer: rtq_count reached 0\n"); 1822 } 1823 } 1824 RT_UNLOCK(); 1825 1826 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1827 } 1828 1829 static void 1830 rt_timer_timer(void *arg) 1831 { 1832 1833 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 1834 } 1835 1836 static struct rtentry * 1837 _rtcache_init(struct route *ro, int flag) 1838 { 1839 struct rtentry *rt; 1840 1841 rtcache_invariants(ro); 1842 KASSERT(ro->_ro_rt == NULL); 1843 1844 if (rtcache_getdst(ro) == NULL) 1845 return NULL; 1846 rt = rtalloc1(rtcache_getdst(ro), flag); 1847 if (rt != NULL) { 1848 RT_RLOCK(); 1849 if (ISSET(rt->rt_flags, RTF_UP)) { 1850 ro->_ro_rt = rt; 1851 ro->ro_rtcache_generation = rtcache_generation; 1852 rtcache_ref(rt, ro); 1853 } 1854 RT_UNLOCK(); 1855 rt_unref(rt); 1856 } 1857 1858 rtcache_invariants(ro); 1859 return ro->_ro_rt; 1860 } 1861 1862 struct rtentry * 1863 rtcache_init(struct route *ro) 1864 { 1865 1866 return _rtcache_init(ro, 1); 1867 } 1868 1869 struct rtentry * 1870 rtcache_init_noclone(struct route *ro) 1871 { 1872 1873 return _rtcache_init(ro, 0); 1874 } 1875 1876 struct rtentry * 1877 rtcache_update(struct route *ro, int clone) 1878 { 1879 1880 ro->_ro_rt = NULL; 1881 return _rtcache_init(ro, clone); 1882 } 1883 1884 void 1885 rtcache_copy(struct route *new_ro, struct route *old_ro) 1886 { 1887 struct rtentry *rt; 1888 int ret; 1889 1890 KASSERT(new_ro != old_ro); 1891 rtcache_invariants(new_ro); 1892 rtcache_invariants(old_ro); 1893 1894 rt = rtcache_validate(old_ro); 1895 1896 if (rtcache_getdst(old_ro) == NULL) 1897 goto out; 1898 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 1899 if (ret != 0) 1900 goto out; 1901 1902 RT_RLOCK(); 1903 new_ro->_ro_rt = rt; 1904 new_ro->ro_rtcache_generation = rtcache_generation; 1905 RT_UNLOCK(); 1906 rtcache_invariants(new_ro); 1907 out: 1908 rtcache_unref(rt, old_ro); 1909 return; 1910 } 1911 1912 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 1913 static void 1914 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 1915 { 1916 char dst[64]; 1917 1918 sockaddr_format(ro->ro_sa, dst, 64); 1919 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 1920 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 1921 } 1922 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 1923 #else 1924 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 1925 #endif 1926 1927 static void 1928 rtcache_ref(struct rtentry *rt, struct route *ro) 1929 { 1930 1931 KASSERT(rt != NULL); 1932 1933 #ifdef NET_MPSAFE 1934 RTCACHE_PSREF_TRACE(rt, ro); 1935 ro->ro_bound = curlwp_bind(); 1936 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1937 #endif 1938 } 1939 1940 void 1941 rtcache_unref(struct rtentry *rt, struct route *ro) 1942 { 1943 1944 if (rt == NULL) 1945 return; 1946 1947 #ifdef NET_MPSAFE 1948 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1949 curlwp_bindx(ro->ro_bound); 1950 RTCACHE_PSREF_TRACE(rt, ro); 1951 #endif 1952 } 1953 1954 struct rtentry * 1955 rtcache_validate(struct route *ro) 1956 { 1957 struct rtentry *rt = NULL; 1958 1959 #ifdef NET_MPSAFE 1960 retry: 1961 #endif 1962 rtcache_invariants(ro); 1963 RT_RLOCK(); 1964 if (ro->ro_rtcache_generation != rtcache_generation) { 1965 /* The cache is invalidated */ 1966 rt = NULL; 1967 goto out; 1968 } 1969 1970 rt = ro->_ro_rt; 1971 if (rt == NULL) 1972 goto out; 1973 1974 if ((rt->rt_flags & RTF_UP) == 0) { 1975 rt = NULL; 1976 goto out; 1977 } 1978 #ifdef NET_MPSAFE 1979 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 1980 if (rt_wait_ok()) { 1981 RT_UNLOCK(); 1982 1983 /* We can wait until the update is complete */ 1984 rt_update_wait(); 1985 goto retry; 1986 } else { 1987 rt = NULL; 1988 } 1989 } else 1990 #endif 1991 rtcache_ref(rt, ro); 1992 out: 1993 RT_UNLOCK(); 1994 return rt; 1995 } 1996 1997 struct rtentry * 1998 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 1999 int clone, int *hitp) 2000 { 2001 const struct sockaddr *odst; 2002 struct rtentry *rt = NULL; 2003 2004 odst = rtcache_getdst(ro); 2005 if (odst == NULL) 2006 goto miss; 2007 2008 if (sockaddr_cmp(odst, dst) != 0) { 2009 rtcache_free(ro); 2010 goto miss; 2011 } 2012 2013 rt = rtcache_validate(ro); 2014 if (rt == NULL) { 2015 ro->_ro_rt = NULL; 2016 goto miss; 2017 } 2018 2019 rtcache_invariants(ro); 2020 2021 if (hitp != NULL) 2022 *hitp = 1; 2023 return rt; 2024 miss: 2025 if (hitp != NULL) 2026 *hitp = 0; 2027 if (rtcache_setdst(ro, dst) == 0) 2028 rt = _rtcache_init(ro, clone); 2029 2030 rtcache_invariants(ro); 2031 2032 return rt; 2033 } 2034 2035 void 2036 rtcache_free(struct route *ro) 2037 { 2038 2039 ro->_ro_rt = NULL; 2040 if (ro->ro_sa != NULL) { 2041 sockaddr_free(ro->ro_sa); 2042 ro->ro_sa = NULL; 2043 } 2044 rtcache_invariants(ro); 2045 } 2046 2047 int 2048 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2049 { 2050 KASSERT(sa != NULL); 2051 2052 rtcache_invariants(ro); 2053 if (ro->ro_sa != NULL) { 2054 if (ro->ro_sa->sa_family == sa->sa_family) { 2055 ro->_ro_rt = NULL; 2056 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2057 rtcache_invariants(ro); 2058 return 0; 2059 } 2060 /* free ro_sa, wrong family */ 2061 rtcache_free(ro); 2062 } 2063 2064 KASSERT(ro->_ro_rt == NULL); 2065 2066 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2067 rtcache_invariants(ro); 2068 return ENOMEM; 2069 } 2070 rtcache_invariants(ro); 2071 return 0; 2072 } 2073 2074 const struct sockaddr * 2075 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2076 { 2077 if (rt->rt_tag != tag) { 2078 if (rt->rt_tag != NULL) 2079 sockaddr_free(rt->rt_tag); 2080 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2081 } 2082 return rt->rt_tag; 2083 } 2084 2085 struct sockaddr * 2086 rt_gettag(const struct rtentry *rt) 2087 { 2088 return rt->rt_tag; 2089 } 2090 2091 int 2092 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2093 { 2094 2095 if ((rt->rt_flags & RTF_REJECT) != 0) { 2096 /* Mimic looutput */ 2097 if (ifp->if_flags & IFF_LOOPBACK) 2098 return (rt->rt_flags & RTF_HOST) ? 2099 EHOSTUNREACH : ENETUNREACH; 2100 else if (rt->rt_rmx.rmx_expire == 0 || 2101 time_uptime < rt->rt_rmx.rmx_expire) 2102 return (rt->rt_flags & RTF_GATEWAY) ? 2103 EHOSTUNREACH : EHOSTDOWN; 2104 } 2105 2106 return 0; 2107 } 2108 2109 void 2110 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2111 void *v) 2112 { 2113 2114 for (;;) { 2115 int s; 2116 int error; 2117 struct rtentry *rt, *retrt = NULL; 2118 2119 RT_RLOCK(); 2120 s = splsoftnet(); 2121 rt = rtbl_search_matched_entry(family, f, v); 2122 if (rt == NULL) { 2123 splx(s); 2124 RT_UNLOCK(); 2125 return; 2126 } 2127 rt->rt_refcnt++; 2128 splx(s); 2129 RT_UNLOCK(); 2130 2131 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2132 rt_mask(rt), rt->rt_flags, &retrt); 2133 if (error == 0) { 2134 KASSERT(retrt == rt); 2135 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2136 retrt->rt_ifp = NULL; 2137 rt_unref(rt); 2138 rt_free(retrt); 2139 } else if (error == ESRCH) { 2140 /* Someone deleted the entry already. */ 2141 rt_unref(rt); 2142 } else { 2143 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2144 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2145 /* XXX how to treat this case? */ 2146 } 2147 } 2148 } 2149 2150 int 2151 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2152 { 2153 int error; 2154 2155 RT_RLOCK(); 2156 error = rtbl_walktree(family, f, v); 2157 RT_UNLOCK(); 2158 2159 return error; 2160 } 2161 2162 #ifdef DDB 2163 2164 #include <machine/db_machdep.h> 2165 #include <ddb/db_interface.h> 2166 #include <ddb/db_output.h> 2167 2168 #define rt_expire rt_rmx.rmx_expire 2169 2170 static void 2171 db_print_sa(const struct sockaddr *sa) 2172 { 2173 int len; 2174 const u_char *p; 2175 2176 if (sa == NULL) { 2177 db_printf("[NULL]"); 2178 return; 2179 } 2180 2181 p = (const u_char *)sa; 2182 len = sa->sa_len; 2183 db_printf("["); 2184 while (len > 0) { 2185 db_printf("%d", *p); 2186 p++; len--; 2187 if (len) db_printf(","); 2188 } 2189 db_printf("]\n"); 2190 } 2191 2192 static void 2193 db_print_ifa(struct ifaddr *ifa) 2194 { 2195 if (ifa == NULL) 2196 return; 2197 db_printf(" ifa_addr="); 2198 db_print_sa(ifa->ifa_addr); 2199 db_printf(" ifa_dsta="); 2200 db_print_sa(ifa->ifa_dstaddr); 2201 db_printf(" ifa_mask="); 2202 db_print_sa(ifa->ifa_netmask); 2203 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2204 ifa->ifa_flags, 2205 ifa->ifa_refcnt, 2206 ifa->ifa_metric); 2207 } 2208 2209 /* 2210 * Function to pass to rt_walktree(). 2211 * Return non-zero error to abort walk. 2212 */ 2213 static int 2214 db_show_rtentry(struct rtentry *rt, void *w) 2215 { 2216 db_printf("rtentry=%p", rt); 2217 2218 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2219 rt->rt_flags, rt->rt_refcnt, 2220 rt->rt_use, (uint64_t)rt->rt_expire); 2221 2222 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2223 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2224 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2225 2226 db_printf(" ifp=%p ", rt->rt_ifp); 2227 if (rt->rt_ifp) 2228 db_printf("(%s)", rt->rt_ifp->if_xname); 2229 else 2230 db_printf("(NULL)"); 2231 2232 db_printf(" ifa=%p\n", rt->rt_ifa); 2233 db_print_ifa(rt->rt_ifa); 2234 2235 db_printf(" gwroute=%p llinfo=%p\n", 2236 rt->rt_gwroute, rt->rt_llinfo); 2237 2238 return 0; 2239 } 2240 2241 /* 2242 * Function to print all the route trees. 2243 * Use this from ddb: "show routes" 2244 */ 2245 void 2246 db_show_routes(db_expr_t addr, bool have_addr, 2247 db_expr_t count, const char *modif) 2248 { 2249 rt_walktree(AF_INET, db_show_rtentry, NULL); 2250 } 2251 #endif 2252