1 /* $NetBSD: route.c,v 1.185 2016/12/21 04:01:57 ozaki-r Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.185 2016/12/21 04:01:57 ozaki-r Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/protosw.h> 114 #include <sys/kernel.h> 115 #include <sys/ioctl.h> 116 #include <sys/pool.h> 117 #include <sys/kauth.h> 118 #include <sys/workqueue.h> 119 #include <sys/syslog.h> 120 #include <sys/rwlock.h> 121 #include <sys/mutex.h> 122 #include <sys/cpu.h> 123 124 #include <net/if.h> 125 #include <net/if_dl.h> 126 #include <net/route.h> 127 128 #include <netinet/in.h> 129 #include <netinet/in_var.h> 130 131 #ifdef RTFLUSH_DEBUG 132 #define rtcache_debug() __predict_false(_rtcache_debug) 133 #else /* RTFLUSH_DEBUG */ 134 #define rtcache_debug() 0 135 #endif /* RTFLUSH_DEBUG */ 136 137 #ifdef RT_DEBUG 138 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 139 __func__, __LINE__, (rt), (rt)->rt_refcnt) 140 #else 141 #define RT_REFCNT_TRACE(rt) do {} while (0) 142 #endif 143 144 #ifdef DEBUG 145 #define dlog(level, fmt, args...) log(level, fmt, ##args) 146 #else 147 #define dlog(level, fmt, args...) do {} while (0) 148 #endif 149 150 struct rtstat rtstat; 151 152 static int rttrash; /* routes not in table but not freed */ 153 154 static struct pool rtentry_pool; 155 static struct pool rttimer_pool; 156 157 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 158 static struct workqueue *rt_timer_wq; 159 static struct work rt_timer_wk; 160 161 static void rt_timer_init(void); 162 static void rt_timer_queue_remove_all(struct rttimer_queue *); 163 static void rt_timer_remove_all(struct rtentry *); 164 static void rt_timer_timer(void *); 165 166 /* 167 * Locking notes: 168 * - The routing table is protected by a global rwlock 169 * - API: RT_RLOCK and friends 170 * - rtcaches are protected by a global rwlock 171 * - API: RTCACHE_RLOCK and friends 172 * - References to a rtentry is managed by reference counting and psref 173 * - Reference couting is used for temporal reference when a rtentry 174 * is fetched from the routing table 175 * - psref is used for temporal reference when a rtentry is fetched 176 * from a rtcache 177 * - struct route (rtcache) has struct psref, so we cannot obtain 178 * a reference twice on the same struct route 179 * - Befere destroying or updating a rtentry, we have to wait for 180 * all references left (see below for details) 181 * - APIs 182 * - An obtained rtentry via rtalloc1 or rtrequest* must be 183 * unreferenced by rt_unref 184 * - An obtained rtentry via rtcache_* must be unreferenced by 185 * rtcache_unref 186 * - TODO: once we get a lockless routing table, we should use only 187 * psref for rtentries 188 * - rtentry destruction 189 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 190 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 191 * has a responsibility to destroy the rtentry by itself by calling 192 * rt_free 193 * - If not, rtrequest itself does that 194 * - If rt_free is called in softint, the actual destruction routine is 195 * deferred to a workqueue 196 * - rtentry update 197 * - When updating a rtentry, RTF_UPDATING flag is set 198 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 199 * the routing table or a rtcache results in either of the following 200 * cases: 201 * - if the caller runs in softint, the caller fails to fetch 202 * - otherwise, the caller waits for the update completed and retries 203 * to fetch (probably succeed to fetch for the second time) 204 */ 205 206 /* 207 * Global locks for the routing table and rtcaches. 208 * Locking order: rtcache_lock => rt_lock 209 */ 210 static krwlock_t rt_lock __cacheline_aligned; 211 #ifdef NET_MPSAFE 212 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 213 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 214 #define RT_UNLOCK() rw_exit(&rt_lock) 215 #define RT_LOCKED() rw_lock_held(&rt_lock) 216 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 217 #else 218 #define RT_RLOCK() do {} while (0) 219 #define RT_WLOCK() do {} while (0) 220 #define RT_UNLOCK() do {} while (0) 221 #define RT_LOCKED() false 222 #define RT_ASSERT_WLOCK() do {} while (0) 223 #endif 224 225 static krwlock_t rtcache_lock __cacheline_aligned; 226 #ifdef NET_MPSAFE 227 #define RTCACHE_RLOCK() rw_enter(&rtcache_lock, RW_READER) 228 #define RTCACHE_WLOCK() rw_enter(&rtcache_lock, RW_WRITER) 229 #define RTCACHE_UNLOCK() rw_exit(&rtcache_lock) 230 #define RTCACHE_ASSERT_WLOCK() KASSERT(rw_write_held(&rtcache_lock)) 231 #define RTCACHE_WLOCKED() rw_write_held(&rtcache_lock) 232 #else 233 #define RTCACHE_RLOCK() do {} while (0) 234 #define RTCACHE_WLOCK() do {} while (0) 235 #define RTCACHE_UNLOCK() do {} while (0) 236 #define RTCACHE_ASSERT_WLOCK() do {} while (0) 237 #define RTCACHE_WLOCKED() false 238 #endif 239 240 /* 241 * mutex and cv that are used to wait for references to a rtentry left 242 * before updating the rtentry. 243 */ 244 static struct { 245 kmutex_t lock; 246 kcondvar_t cv; 247 bool ongoing; 248 const struct lwp *lwp; 249 } rt_update_global __cacheline_aligned; 250 251 /* 252 * A workqueue and stuff that are used to defer the destruction routine 253 * of rtentries. 254 */ 255 static struct { 256 struct workqueue *wq; 257 struct work wk; 258 kmutex_t lock; 259 struct rtentry *queue[10]; 260 } rt_free_global __cacheline_aligned; 261 262 /* psref for rtentry */ 263 static struct psref_class *rt_psref_class __read_mostly; 264 265 #ifdef RTFLUSH_DEBUG 266 static int _rtcache_debug = 0; 267 #endif /* RTFLUSH_DEBUG */ 268 269 static kauth_listener_t route_listener; 270 271 static int rtdeletemsg(struct rtentry *); 272 static void rtflushall(int); 273 274 static void rt_maskedcopy(const struct sockaddr *, 275 struct sockaddr *, const struct sockaddr *); 276 277 static void rtcache_clear(struct route *); 278 static void rtcache_clear_rtentry(int, struct rtentry *); 279 static void rtcache_invalidate(struct dom_rtlist *); 280 281 static void rt_ref(struct rtentry *); 282 283 static struct rtentry * 284 rtalloc1_locked(const struct sockaddr *, int, bool); 285 static struct rtentry * 286 rtcache_validate_locked(struct route *); 287 static void rtcache_free_locked(struct route *); 288 static int rtcache_setdst_locked(struct route *, const struct sockaddr *); 289 290 static void rtcache_ref(struct rtentry *, struct route *); 291 292 static void rt_update_wait(void); 293 294 static bool rt_wait_ok(void); 295 static void rt_wait_refcnt(const char *, struct rtentry *, int); 296 static void rt_wait_psref(struct rtentry *); 297 298 #ifdef DDB 299 static void db_print_sa(const struct sockaddr *); 300 static void db_print_ifa(struct ifaddr *); 301 static int db_show_rtentry(struct rtentry *, void *); 302 #endif 303 304 #ifdef RTFLUSH_DEBUG 305 static void sysctl_net_rtcache_setup(struct sysctllog **); 306 static void 307 sysctl_net_rtcache_setup(struct sysctllog **clog) 308 { 309 const struct sysctlnode *rnode; 310 311 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 312 CTLTYPE_NODE, 313 "rtcache", SYSCTL_DESCR("Route cache related settings"), 314 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 315 return; 316 if (sysctl_createv(clog, 0, &rnode, &rnode, 317 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 318 "debug", SYSCTL_DESCR("Debug route caches"), 319 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 320 return; 321 } 322 #endif /* RTFLUSH_DEBUG */ 323 324 static inline void 325 rt_destroy(struct rtentry *rt) 326 { 327 if (rt->_rt_key != NULL) 328 sockaddr_free(rt->_rt_key); 329 if (rt->rt_gateway != NULL) 330 sockaddr_free(rt->rt_gateway); 331 if (rt_gettag(rt) != NULL) 332 sockaddr_free(rt_gettag(rt)); 333 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 334 } 335 336 static inline const struct sockaddr * 337 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 338 { 339 if (rt->_rt_key == key) 340 goto out; 341 342 if (rt->_rt_key != NULL) 343 sockaddr_free(rt->_rt_key); 344 rt->_rt_key = sockaddr_dup(key, flags); 345 out: 346 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 347 return rt->_rt_key; 348 } 349 350 struct ifaddr * 351 rt_get_ifa(struct rtentry *rt) 352 { 353 struct ifaddr *ifa; 354 355 if ((ifa = rt->rt_ifa) == NULL) 356 return ifa; 357 else if (ifa->ifa_getifa == NULL) 358 return ifa; 359 #if 0 360 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 361 return ifa; 362 #endif 363 else { 364 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 365 if (ifa == NULL) 366 return NULL; 367 rt_replace_ifa(rt, ifa); 368 return ifa; 369 } 370 } 371 372 static void 373 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 374 { 375 rt->rt_ifa = ifa; 376 if (ifa->ifa_seqno != NULL) 377 rt->rt_ifa_seqno = *ifa->ifa_seqno; 378 } 379 380 /* 381 * Is this route the connected route for the ifa? 382 */ 383 static int 384 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 385 { 386 const struct sockaddr *key, *dst, *odst; 387 struct sockaddr_storage maskeddst; 388 389 key = rt_getkey(rt); 390 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 391 if (dst == NULL || 392 dst->sa_family != key->sa_family || 393 dst->sa_len != key->sa_len) 394 return 0; 395 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 396 odst = dst; 397 dst = (struct sockaddr *)&maskeddst; 398 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 399 ifa->ifa_netmask); 400 } 401 return (memcmp(dst, key, dst->sa_len) == 0); 402 } 403 404 void 405 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 406 { 407 if (rt->rt_ifa && 408 rt->rt_ifa != ifa && 409 rt->rt_ifa->ifa_flags & IFA_ROUTE && 410 rt_ifa_connected(rt, rt->rt_ifa)) 411 { 412 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 413 "replace deleted IFA_ROUTE\n", 414 (void *)rt->_rt_key, (void *)rt->rt_ifa); 415 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 416 if (rt_ifa_connected(rt, ifa)) { 417 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 418 "replace added IFA_ROUTE\n", 419 (void *)rt->_rt_key, (void *)ifa); 420 ifa->ifa_flags |= IFA_ROUTE; 421 } 422 } 423 424 ifaref(ifa); 425 ifafree(rt->rt_ifa); 426 rt_set_ifa1(rt, ifa); 427 } 428 429 static void 430 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 431 { 432 ifaref(ifa); 433 rt_set_ifa1(rt, ifa); 434 } 435 436 static int 437 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 438 void *arg0, void *arg1, void *arg2, void *arg3) 439 { 440 struct rt_msghdr *rtm; 441 int result; 442 443 result = KAUTH_RESULT_DEFER; 444 rtm = arg1; 445 446 if (action != KAUTH_NETWORK_ROUTE) 447 return result; 448 449 if (rtm->rtm_type == RTM_GET) 450 result = KAUTH_RESULT_ALLOW; 451 452 return result; 453 } 454 455 static void rt_free_work(struct work *, void *); 456 457 void 458 rt_init(void) 459 { 460 int error; 461 462 #ifdef RTFLUSH_DEBUG 463 sysctl_net_rtcache_setup(NULL); 464 #endif 465 466 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 467 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 468 469 error = workqueue_create(&rt_free_global.wq, "rt_free", 470 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 471 if (error) 472 panic("%s: workqueue_create failed (%d)\n", __func__, error); 473 474 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 475 cv_init(&rt_update_global.cv, "rt_update"); 476 477 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 478 NULL, IPL_SOFTNET); 479 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 480 NULL, IPL_SOFTNET); 481 482 rn_init(); /* initialize all zeroes, all ones, mask table */ 483 rtbl_init(); 484 485 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 486 route_listener_cb, NULL); 487 } 488 489 static void 490 rtflushall(int family) 491 { 492 struct domain *dom; 493 494 if (rtcache_debug()) 495 printf("%s: enter\n", __func__); 496 497 if ((dom = pffinddomain(family)) == NULL) 498 return; 499 500 RTCACHE_WLOCK(); 501 rtcache_invalidate(&dom->dom_rtcache); 502 RTCACHE_UNLOCK(); 503 } 504 505 static void 506 rtcache(struct route *ro) 507 { 508 struct domain *dom; 509 510 RTCACHE_ASSERT_WLOCK(); 511 512 rtcache_invariants(ro); 513 KASSERT(ro->_ro_rt != NULL); 514 KASSERT(ro->ro_invalid == false); 515 KASSERT(rtcache_getdst(ro) != NULL); 516 517 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL) 518 return; 519 520 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next); 521 rtcache_invariants(ro); 522 } 523 524 #ifdef RT_DEBUG 525 static void 526 dump_rt(const struct rtentry *rt) 527 { 528 char buf[512]; 529 530 aprint_normal("rt: "); 531 aprint_normal("p=%p ", rt); 532 if (rt->_rt_key == NULL) { 533 aprint_normal("dst=(NULL) "); 534 } else { 535 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 536 aprint_normal("dst=%s ", buf); 537 } 538 if (rt->rt_gateway == NULL) { 539 aprint_normal("gw=(NULL) "); 540 } else { 541 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 542 aprint_normal("gw=%s ", buf); 543 } 544 aprint_normal("flags=%x ", rt->rt_flags); 545 if (rt->rt_ifp == NULL) { 546 aprint_normal("if=(NULL) "); 547 } else { 548 aprint_normal("if=%s ", rt->rt_ifp->if_xname); 549 } 550 aprint_normal("\n"); 551 } 552 #endif /* RT_DEBUG */ 553 554 /* 555 * Packet routing routines. If success, refcnt of a returned rtentry 556 * will be incremented. The caller has to rtfree it by itself. 557 */ 558 struct rtentry * 559 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok) 560 { 561 rtbl_t *rtbl; 562 struct rtentry *rt; 563 int s; 564 565 retry: 566 s = splsoftnet(); 567 rtbl = rt_gettable(dst->sa_family); 568 if (rtbl == NULL) 569 goto miss; 570 571 rt = rt_matchaddr(rtbl, dst); 572 if (rt == NULL) 573 goto miss; 574 575 if (!ISSET(rt->rt_flags, RTF_UP)) 576 goto miss; 577 578 if (ISSET(rt->rt_flags, RTF_UPDATING) && 579 /* XXX updater should be always able to acquire */ 580 curlwp != rt_update_global.lwp) { 581 bool need_lock = false; 582 if (!wait_ok || !rt_wait_ok()) 583 goto miss; 584 RT_UNLOCK(); 585 splx(s); 586 587 /* XXX need more proper solution */ 588 if (RTCACHE_WLOCKED()) { 589 RTCACHE_UNLOCK(); 590 need_lock = true; 591 } 592 593 /* We can wait until the update is complete */ 594 rt_update_wait(); 595 596 if (need_lock) 597 RTCACHE_WLOCK(); 598 goto retry; 599 } 600 601 rt_ref(rt); 602 RT_REFCNT_TRACE(rt); 603 604 splx(s); 605 return rt; 606 miss: 607 rtstat.rts_unreach++; 608 if (report) { 609 struct rt_addrinfo info; 610 611 memset(&info, 0, sizeof(info)); 612 info.rti_info[RTAX_DST] = dst; 613 rt_missmsg(RTM_MISS, &info, 0, 0); 614 } 615 splx(s); 616 return NULL; 617 } 618 619 struct rtentry * 620 rtalloc1(const struct sockaddr *dst, int report) 621 { 622 struct rtentry *rt; 623 624 RT_RLOCK(); 625 rt = rtalloc1_locked(dst, report, true); 626 RT_UNLOCK(); 627 628 return rt; 629 } 630 631 static void 632 rt_ref(struct rtentry *rt) 633 { 634 635 KASSERT(rt->rt_refcnt >= 0); 636 atomic_inc_uint(&rt->rt_refcnt); 637 } 638 639 void 640 rt_unref(struct rtentry *rt) 641 { 642 643 KASSERT(rt != NULL); 644 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 645 646 atomic_dec_uint(&rt->rt_refcnt); 647 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 648 mutex_enter(&rt_free_global.lock); 649 cv_broadcast(&rt->rt_cv); 650 mutex_exit(&rt_free_global.lock); 651 } 652 } 653 654 static bool 655 rt_wait_ok(void) 656 { 657 658 KASSERT(!cpu_intr_p()); 659 return !cpu_softintr_p(); 660 } 661 662 void 663 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 664 { 665 mutex_enter(&rt_free_global.lock); 666 while (rt->rt_refcnt > cnt) { 667 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 668 __func__, title, rt->rt_refcnt); 669 cv_wait(&rt->rt_cv, &rt_free_global.lock); 670 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 671 __func__, title, rt->rt_refcnt); 672 } 673 mutex_exit(&rt_free_global.lock); 674 } 675 676 void 677 rt_wait_psref(struct rtentry *rt) 678 { 679 680 psref_target_destroy(&rt->rt_psref, rt_psref_class); 681 psref_target_init(&rt->rt_psref, rt_psref_class); 682 } 683 684 static void 685 _rt_free(struct rtentry *rt) 686 { 687 struct ifaddr *ifa; 688 689 /* 690 * Need to avoid a deadlock on rt_wait_refcnt of update 691 * and a conflict on psref_target_destroy of update. 692 */ 693 rt_update_wait(); 694 695 RT_REFCNT_TRACE(rt); 696 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 697 rt_wait_refcnt("free", rt, 0); 698 #ifdef NET_MPSAFE 699 psref_target_destroy(&rt->rt_psref, rt_psref_class); 700 #endif 701 702 rt_assert_inactive(rt); 703 rttrash--; 704 ifa = rt->rt_ifa; 705 rt->rt_ifa = NULL; 706 ifafree(ifa); 707 rt->rt_ifp = NULL; 708 cv_destroy(&rt->rt_cv); 709 rt_destroy(rt); 710 pool_put(&rtentry_pool, rt); 711 } 712 713 static void 714 rt_free_work(struct work *wk, void *arg) 715 { 716 int i; 717 struct rtentry *rt; 718 719 restart: 720 mutex_enter(&rt_free_global.lock); 721 for (i = 0; i < sizeof(rt_free_global.queue); i++) { 722 if (rt_free_global.queue[i] == NULL) 723 continue; 724 rt = rt_free_global.queue[i]; 725 rt_free_global.queue[i] = NULL; 726 mutex_exit(&rt_free_global.lock); 727 728 atomic_dec_uint(&rt->rt_refcnt); 729 _rt_free(rt); 730 goto restart; 731 } 732 mutex_exit(&rt_free_global.lock); 733 } 734 735 void 736 rt_free(struct rtentry *rt) 737 { 738 739 KASSERT(rt->rt_refcnt > 0); 740 if (!rt_wait_ok()) { 741 int i; 742 mutex_enter(&rt_free_global.lock); 743 for (i = 0; i < sizeof(rt_free_global.queue); i++) { 744 if (rt_free_global.queue[i] == NULL) { 745 rt_free_global.queue[i] = rt; 746 break; 747 } 748 } 749 KASSERT(i < sizeof(rt_free_global.queue)); 750 rt_ref(rt); 751 mutex_exit(&rt_free_global.lock); 752 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 753 } else { 754 atomic_dec_uint(&rt->rt_refcnt); 755 _rt_free(rt); 756 } 757 } 758 759 static void 760 rt_update_wait(void) 761 { 762 763 mutex_enter(&rt_update_global.lock); 764 while (rt_update_global.ongoing) { 765 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 766 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 767 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 768 } 769 mutex_exit(&rt_update_global.lock); 770 } 771 772 int 773 rt_update_prepare(struct rtentry *rt) 774 { 775 776 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 777 778 RTCACHE_WLOCK(); 779 RT_WLOCK(); 780 /* If the entry is being destroyed, don't proceed the update. */ 781 if (!ISSET(rt->rt_flags, RTF_UP)) { 782 RT_UNLOCK(); 783 RTCACHE_UNLOCK(); 784 return -1; 785 } 786 rt->rt_flags |= RTF_UPDATING; 787 RT_UNLOCK(); 788 RTCACHE_UNLOCK(); 789 790 mutex_enter(&rt_update_global.lock); 791 while (rt_update_global.ongoing) { 792 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 793 __func__, rt, curlwp); 794 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 795 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 796 __func__, rt, curlwp); 797 } 798 rt_update_global.ongoing = true; 799 /* XXX need it to avoid rt_update_wait by updater itself. */ 800 rt_update_global.lwp = curlwp; 801 mutex_exit(&rt_update_global.lock); 802 803 rt_wait_refcnt("update", rt, 1); 804 rt_wait_psref(rt); 805 806 return 0; 807 } 808 809 void 810 rt_update_finish(struct rtentry *rt) 811 { 812 813 RTCACHE_WLOCK(); 814 RT_WLOCK(); 815 rt->rt_flags &= ~RTF_UPDATING; 816 RT_UNLOCK(); 817 RTCACHE_UNLOCK(); 818 819 mutex_enter(&rt_update_global.lock); 820 rt_update_global.ongoing = false; 821 rt_update_global.lwp = NULL; 822 cv_broadcast(&rt_update_global.cv); 823 mutex_exit(&rt_update_global.lock); 824 825 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 826 } 827 828 /* 829 * Force a routing table entry to the specified 830 * destination to go through the given gateway. 831 * Normally called as a result of a routing redirect 832 * message from the network layer. 833 * 834 * N.B.: must be called at splsoftnet 835 */ 836 void 837 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 838 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 839 struct rtentry **rtp) 840 { 841 struct rtentry *rt; 842 int error = 0; 843 uint64_t *stat = NULL; 844 struct rt_addrinfo info; 845 struct ifaddr *ifa; 846 struct psref psref; 847 848 /* verify the gateway is directly reachable */ 849 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 850 error = ENETUNREACH; 851 goto out; 852 } 853 rt = rtalloc1(dst, 0); 854 /* 855 * If the redirect isn't from our current router for this dst, 856 * it's either old or wrong. If it redirects us to ourselves, 857 * we have a routing loop, perhaps as a result of an interface 858 * going down recently. 859 */ 860 if (!(flags & RTF_DONE) && rt && 861 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 862 error = EINVAL; 863 else { 864 int s = pserialize_read_enter(); 865 struct ifaddr *_ifa; 866 867 _ifa = ifa_ifwithaddr(gateway); 868 if (_ifa != NULL) 869 error = EHOSTUNREACH; 870 pserialize_read_exit(s); 871 } 872 if (error) 873 goto done; 874 /* 875 * Create a new entry if we just got back a wildcard entry 876 * or the lookup failed. This is necessary for hosts 877 * which use routing redirects generated by smart gateways 878 * to dynamically build the routing tables. 879 */ 880 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 881 goto create; 882 /* 883 * Don't listen to the redirect if it's 884 * for a route to an interface. 885 */ 886 if (rt->rt_flags & RTF_GATEWAY) { 887 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 888 /* 889 * Changing from route to net => route to host. 890 * Create new route, rather than smashing route to net. 891 */ 892 create: 893 if (rt != NULL) 894 rt_unref(rt); 895 flags |= RTF_GATEWAY | RTF_DYNAMIC; 896 memset(&info, 0, sizeof(info)); 897 info.rti_info[RTAX_DST] = dst; 898 info.rti_info[RTAX_GATEWAY] = gateway; 899 info.rti_info[RTAX_NETMASK] = netmask; 900 info.rti_ifa = ifa; 901 info.rti_flags = flags; 902 rt = NULL; 903 error = rtrequest1(RTM_ADD, &info, &rt); 904 if (rt != NULL) 905 flags = rt->rt_flags; 906 stat = &rtstat.rts_dynamic; 907 } else { 908 /* 909 * Smash the current notion of the gateway to 910 * this destination. Should check about netmask!!! 911 */ 912 /* 913 * FIXME NOMPAFE: the rtentry is updated with the existence 914 * of refeferences of it. 915 */ 916 error = rt_setgate(rt, gateway); 917 if (error == 0) { 918 rt->rt_flags |= RTF_MODIFIED; 919 flags |= RTF_MODIFIED; 920 } 921 stat = &rtstat.rts_newgateway; 922 } 923 } else 924 error = EHOSTUNREACH; 925 done: 926 if (rt) { 927 if (rtp != NULL && !error) 928 *rtp = rt; 929 else 930 rt_unref(rt); 931 } 932 out: 933 if (error) 934 rtstat.rts_badredirect++; 935 else if (stat != NULL) 936 (*stat)++; 937 memset(&info, 0, sizeof(info)); 938 info.rti_info[RTAX_DST] = dst; 939 info.rti_info[RTAX_GATEWAY] = gateway; 940 info.rti_info[RTAX_NETMASK] = netmask; 941 info.rti_info[RTAX_AUTHOR] = src; 942 rt_missmsg(RTM_REDIRECT, &info, flags, error); 943 ifa_release(ifa, &psref); 944 } 945 946 /* 947 * Delete a route and generate a message. 948 * It doesn't free a passed rt. 949 */ 950 static int 951 rtdeletemsg(struct rtentry *rt) 952 { 953 int error; 954 struct rt_addrinfo info; 955 struct rtentry *retrt; 956 957 /* 958 * Request the new route so that the entry is not actually 959 * deleted. That will allow the information being reported to 960 * be accurate (and consistent with route_output()). 961 */ 962 memset(&info, 0, sizeof(info)); 963 info.rti_info[RTAX_DST] = rt_getkey(rt); 964 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 965 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 966 info.rti_flags = rt->rt_flags; 967 error = rtrequest1(RTM_DELETE, &info, &retrt); 968 969 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 970 971 return error; 972 } 973 974 struct ifaddr * 975 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 976 const struct sockaddr *gateway, struct psref *psref) 977 { 978 struct ifaddr *ifa = NULL; 979 980 if ((flags & RTF_GATEWAY) == 0) { 981 /* 982 * If we are adding a route to an interface, 983 * and the interface is a pt to pt link 984 * we should search for the destination 985 * as our clue to the interface. Otherwise 986 * we can use the local address. 987 */ 988 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 989 ifa = ifa_ifwithdstaddr_psref(dst, psref); 990 if (ifa == NULL) 991 ifa = ifa_ifwithaddr_psref(gateway, psref); 992 } else { 993 /* 994 * If we are adding a route to a remote net 995 * or host, the gateway may still be on the 996 * other end of a pt to pt link. 997 */ 998 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 999 } 1000 if (ifa == NULL) 1001 ifa = ifa_ifwithnet_psref(gateway, psref); 1002 if (ifa == NULL) { 1003 int s; 1004 struct rtentry *rt; 1005 1006 rt = rtalloc1(dst, 0); 1007 if (rt == NULL) 1008 return NULL; 1009 /* 1010 * Just in case. May not need to do this workaround. 1011 * Revisit when working on rtentry MP-ification. 1012 */ 1013 s = pserialize_read_enter(); 1014 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1015 if (ifa == rt->rt_ifa) 1016 break; 1017 } 1018 if (ifa != NULL) 1019 ifa_acquire(ifa, psref); 1020 pserialize_read_exit(s); 1021 rt_unref(rt); 1022 if (ifa == NULL) 1023 return NULL; 1024 } 1025 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1026 struct ifaddr *nifa; 1027 int s; 1028 1029 s = pserialize_read_enter(); 1030 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1031 if (nifa != NULL) { 1032 ifa_release(ifa, psref); 1033 ifa_acquire(nifa, psref); 1034 ifa = nifa; 1035 } 1036 pserialize_read_exit(s); 1037 } 1038 return ifa; 1039 } 1040 1041 /* 1042 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1043 * The caller has to rtfree it by itself. 1044 */ 1045 int 1046 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1047 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1048 { 1049 struct rt_addrinfo info; 1050 1051 memset(&info, 0, sizeof(info)); 1052 info.rti_flags = flags; 1053 info.rti_info[RTAX_DST] = dst; 1054 info.rti_info[RTAX_GATEWAY] = gateway; 1055 info.rti_info[RTAX_NETMASK] = netmask; 1056 return rtrequest1(req, &info, ret_nrt); 1057 } 1058 1059 /* 1060 * It's a utility function to add/remove a route to/from the routing table 1061 * and tell user processes the addition/removal on success. 1062 */ 1063 int 1064 rtrequest_newmsg(const int req, const struct sockaddr *dst, 1065 const struct sockaddr *gateway, const struct sockaddr *netmask, 1066 const int flags) 1067 { 1068 int error; 1069 struct rtentry *ret_nrt = NULL; 1070 1071 KASSERT(req == RTM_ADD || req == RTM_DELETE); 1072 1073 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt); 1074 if (error != 0) 1075 return error; 1076 1077 KASSERT(ret_nrt != NULL); 1078 1079 rt_newmsg(req, ret_nrt); /* tell user process */ 1080 if (req == RTM_DELETE) 1081 rt_free(ret_nrt); 1082 else 1083 rt_unref(ret_nrt); 1084 1085 return 0; 1086 } 1087 1088 struct ifnet * 1089 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1090 { 1091 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1092 1093 if (info->rti_ifp != NULL) 1094 return NULL; 1095 /* 1096 * ifp may be specified by sockaddr_dl when protocol address 1097 * is ambiguous 1098 */ 1099 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1100 struct ifaddr *ifa; 1101 int s = pserialize_read_enter(); 1102 1103 ifa = ifa_ifwithnet(ifpaddr); 1104 if (ifa != NULL) 1105 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1106 psref); 1107 pserialize_read_exit(s); 1108 } 1109 1110 return info->rti_ifp; 1111 } 1112 1113 struct ifaddr * 1114 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1115 { 1116 struct ifaddr *ifa = NULL; 1117 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1118 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1119 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1120 int flags = info->rti_flags; 1121 const struct sockaddr *sa; 1122 1123 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1124 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1125 if (ifa != NULL) 1126 goto got; 1127 } 1128 1129 sa = ifaaddr != NULL ? ifaaddr : 1130 (gateway != NULL ? gateway : dst); 1131 if (sa != NULL && info->rti_ifp != NULL) 1132 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1133 else if (dst != NULL && gateway != NULL) 1134 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1135 else if (sa != NULL) 1136 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1137 if (ifa == NULL) 1138 return NULL; 1139 got: 1140 if (ifa->ifa_getifa != NULL) { 1141 /* FIXME NOMPSAFE */ 1142 ifa = (*ifa->ifa_getifa)(ifa, dst); 1143 if (ifa == NULL) 1144 return NULL; 1145 ifa_acquire(ifa, psref); 1146 } 1147 info->rti_ifa = ifa; 1148 if (info->rti_ifp == NULL) 1149 info->rti_ifp = ifa->ifa_ifp; 1150 return ifa; 1151 } 1152 1153 /* 1154 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1155 * The caller has to rtfree it by itself. 1156 */ 1157 int 1158 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1159 { 1160 int s = splsoftnet(), ss; 1161 int error = 0, rc; 1162 struct rtentry *rt; 1163 rtbl_t *rtbl; 1164 struct ifaddr *ifa = NULL, *ifa2 = NULL; 1165 struct sockaddr_storage maskeddst; 1166 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1167 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1168 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1169 int flags = info->rti_flags; 1170 struct psref psref_ifp, psref_ifa; 1171 int bound = 0; 1172 struct ifnet *ifp = NULL; 1173 bool need_to_release_ifa = true; 1174 bool need_unlock = true; 1175 #define senderr(x) { error = x ; goto bad; } 1176 1177 RT_WLOCK(); 1178 1179 bound = curlwp_bind(); 1180 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1181 senderr(ESRCH); 1182 if (flags & RTF_HOST) 1183 netmask = NULL; 1184 switch (req) { 1185 case RTM_DELETE: 1186 if (netmask) { 1187 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1188 netmask); 1189 dst = (struct sockaddr *)&maskeddst; 1190 } 1191 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1192 senderr(ESRCH); 1193 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1194 senderr(ESRCH); 1195 rt->rt_flags &= ~RTF_UP; 1196 if ((ifa = rt->rt_ifa)) { 1197 if (ifa->ifa_flags & IFA_ROUTE && 1198 rt_ifa_connected(rt, ifa)) { 1199 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1200 "deleted IFA_ROUTE\n", 1201 (void *)rt->_rt_key, (void *)ifa); 1202 ifa->ifa_flags &= ~IFA_ROUTE; 1203 } 1204 if (ifa->ifa_rtrequest) 1205 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1206 ifa = NULL; 1207 } 1208 rttrash++; 1209 if (ret_nrt) { 1210 *ret_nrt = rt; 1211 rt_ref(rt); 1212 RT_REFCNT_TRACE(rt); 1213 } 1214 RT_UNLOCK(); 1215 need_unlock = false; 1216 rt_timer_remove_all(rt); 1217 rtcache_clear_rtentry(dst->sa_family, rt); 1218 if (ret_nrt == NULL) { 1219 /* Adjust the refcount */ 1220 rt_ref(rt); 1221 RT_REFCNT_TRACE(rt); 1222 rt_free(rt); 1223 } 1224 break; 1225 1226 case RTM_ADD: 1227 if (info->rti_ifa == NULL) { 1228 ifp = rt_getifp(info, &psref_ifp); 1229 ifa = rt_getifa(info, &psref_ifa); 1230 if (ifa == NULL) 1231 senderr(ENETUNREACH); 1232 } else { 1233 /* Caller should have a reference of ifa */ 1234 ifa = info->rti_ifa; 1235 need_to_release_ifa = false; 1236 } 1237 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1238 if (rt == NULL) 1239 senderr(ENOBUFS); 1240 memset(rt, 0, sizeof(*rt)); 1241 rt->rt_flags = RTF_UP | flags; 1242 LIST_INIT(&rt->rt_timer); 1243 1244 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1245 if (netmask) { 1246 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1247 netmask); 1248 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1249 } else { 1250 rt_setkey(rt, dst, M_NOWAIT); 1251 } 1252 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1253 if (rt_getkey(rt) == NULL || 1254 rt_setgate(rt, gateway) != 0) { 1255 pool_put(&rtentry_pool, rt); 1256 senderr(ENOBUFS); 1257 } 1258 1259 rt_set_ifa(rt, ifa); 1260 if (info->rti_info[RTAX_TAG] != NULL) { 1261 const struct sockaddr *tag; 1262 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1263 if (tag == NULL) 1264 senderr(ENOBUFS); 1265 } 1266 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1267 1268 ss = pserialize_read_enter(); 1269 if (info->rti_info[RTAX_IFP] != NULL) { 1270 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1271 if (ifa2 != NULL) 1272 rt->rt_ifp = ifa2->ifa_ifp; 1273 else 1274 rt->rt_ifp = ifa->ifa_ifp; 1275 } else 1276 rt->rt_ifp = ifa->ifa_ifp; 1277 pserialize_read_exit(ss); 1278 cv_init(&rt->rt_cv, "rtentry"); 1279 psref_target_init(&rt->rt_psref, rt_psref_class); 1280 1281 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1282 rc = rt_addaddr(rtbl, rt, netmask); 1283 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1284 if (rc != 0) { 1285 ifafree(ifa); /* for rt_set_ifa above */ 1286 cv_destroy(&rt->rt_cv); 1287 rt_destroy(rt); 1288 pool_put(&rtentry_pool, rt); 1289 senderr(rc); 1290 } 1291 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1292 if (ifa->ifa_rtrequest) 1293 ifa->ifa_rtrequest(req, rt, info); 1294 if (need_to_release_ifa) 1295 ifa_release(ifa, &psref_ifa); 1296 ifa = NULL; 1297 if_put(ifp, &psref_ifp); 1298 ifp = NULL; 1299 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1300 if (ret_nrt) { 1301 *ret_nrt = rt; 1302 rt_ref(rt); 1303 RT_REFCNT_TRACE(rt); 1304 } 1305 RT_UNLOCK(); 1306 need_unlock = false; 1307 rtflushall(dst->sa_family); 1308 break; 1309 case RTM_GET: 1310 if (netmask != NULL) { 1311 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1312 netmask); 1313 dst = (struct sockaddr *)&maskeddst; 1314 } 1315 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1316 senderr(ESRCH); 1317 if (ret_nrt != NULL) { 1318 *ret_nrt = rt; 1319 rt_ref(rt); 1320 RT_REFCNT_TRACE(rt); 1321 } 1322 break; 1323 } 1324 bad: 1325 if (need_to_release_ifa) 1326 ifa_release(ifa, &psref_ifa); 1327 if_put(ifp, &psref_ifp); 1328 curlwp_bindx(bound); 1329 if (need_unlock) 1330 RT_UNLOCK(); 1331 splx(s); 1332 return error; 1333 } 1334 1335 int 1336 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1337 { 1338 struct sockaddr *new, *old; 1339 1340 KASSERT(rt->_rt_key != NULL); 1341 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1342 1343 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1344 if (new == NULL) 1345 return ENOMEM; 1346 1347 old = rt->rt_gateway; 1348 rt->rt_gateway = new; 1349 if (old != NULL) 1350 sockaddr_free(old); 1351 1352 KASSERT(rt->_rt_key != NULL); 1353 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1354 1355 if (rt->rt_flags & RTF_GATEWAY) { 1356 struct rtentry *gwrt; 1357 1358 /* XXX we cannot call rtalloc1 if holding the rt lock */ 1359 if (RT_LOCKED()) 1360 gwrt = rtalloc1_locked(gate, 1, false); 1361 else 1362 gwrt = rtalloc1(gate, 1); 1363 /* 1364 * If we switched gateways, grab the MTU from the new 1365 * gateway route if the current MTU, if the current MTU is 1366 * greater than the MTU of gateway. 1367 * Note that, if the MTU of gateway is 0, we will reset the 1368 * MTU of the route to run PMTUD again from scratch. XXX 1369 */ 1370 if (gwrt != NULL) { 1371 KASSERT(gwrt->_rt_key != NULL); 1372 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1373 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1374 rt->rt_rmx.rmx_mtu && 1375 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1376 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1377 } 1378 rt_unref(gwrt); 1379 } 1380 } 1381 KASSERT(rt->_rt_key != NULL); 1382 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1383 return 0; 1384 } 1385 1386 static void 1387 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1388 const struct sockaddr *netmask) 1389 { 1390 const char *netmaskp = &netmask->sa_data[0], 1391 *srcp = &src->sa_data[0]; 1392 char *dstp = &dst->sa_data[0]; 1393 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1394 const char *srcend = (char *)dst + src->sa_len; 1395 1396 dst->sa_len = src->sa_len; 1397 dst->sa_family = src->sa_family; 1398 1399 while (dstp < maskend) 1400 *dstp++ = *srcp++ & *netmaskp++; 1401 if (dstp < srcend) 1402 memset(dstp, 0, (size_t)(srcend - dstp)); 1403 } 1404 1405 /* 1406 * Inform the routing socket of a route change. 1407 */ 1408 void 1409 rt_newmsg(const int cmd, const struct rtentry *rt) 1410 { 1411 struct rt_addrinfo info; 1412 1413 memset((void *)&info, 0, sizeof(info)); 1414 info.rti_info[RTAX_DST] = rt_getkey(rt); 1415 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1416 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1417 if (rt->rt_ifp) { 1418 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1419 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1420 } 1421 1422 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1423 } 1424 1425 /* 1426 * Set up or tear down a routing table entry, normally 1427 * for an interface. 1428 */ 1429 int 1430 rtinit(struct ifaddr *ifa, int cmd, int flags) 1431 { 1432 struct rtentry *rt; 1433 struct sockaddr *dst, *odst; 1434 struct sockaddr_storage maskeddst; 1435 struct rtentry *nrt = NULL; 1436 int error; 1437 struct rt_addrinfo info; 1438 1439 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1440 if (cmd == RTM_DELETE) { 1441 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1442 /* Delete subnet route for this interface */ 1443 odst = dst; 1444 dst = (struct sockaddr *)&maskeddst; 1445 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1446 } 1447 if ((rt = rtalloc1(dst, 0)) != NULL) { 1448 if (rt->rt_ifa != ifa) { 1449 rt_unref(rt); 1450 return (flags & RTF_HOST) ? EHOSTUNREACH 1451 : ENETUNREACH; 1452 } 1453 rt_unref(rt); 1454 } 1455 } 1456 memset(&info, 0, sizeof(info)); 1457 info.rti_ifa = ifa; 1458 info.rti_flags = flags | ifa->ifa_flags; 1459 info.rti_info[RTAX_DST] = dst; 1460 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1461 1462 /* 1463 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1464 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1465 * variable) when RTF_HOST is 1. still not sure if i can safely 1466 * change it to meet bsdi4 behavior. 1467 */ 1468 if (cmd != RTM_LLINFO_UPD) 1469 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1470 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1471 &nrt); 1472 if (error != 0) 1473 return error; 1474 1475 rt = nrt; 1476 RT_REFCNT_TRACE(rt); 1477 switch (cmd) { 1478 case RTM_DELETE: 1479 rt_newmsg(cmd, rt); 1480 rt_free(rt); 1481 break; 1482 case RTM_LLINFO_UPD: 1483 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1484 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1485 rt_newmsg(RTM_CHANGE, rt); 1486 rt_unref(rt); 1487 break; 1488 case RTM_ADD: 1489 /* 1490 * FIXME NOMPAFE: the rtentry is updated with the existence 1491 * of refeferences of it. 1492 */ 1493 /* 1494 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest 1495 * called via rtrequest1. Can we just prevent the replacement 1496 * somehow and remove the following code? And also doesn't 1497 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again? 1498 */ 1499 if (rt->rt_ifa != ifa) { 1500 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa, 1501 rt->rt_ifa); 1502 if (rt->rt_ifa->ifa_rtrequest != NULL) { 1503 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, 1504 &info); 1505 } 1506 rt_replace_ifa(rt, ifa); 1507 rt->rt_ifp = ifa->ifa_ifp; 1508 if (ifa->ifa_rtrequest != NULL) 1509 ifa->ifa_rtrequest(RTM_ADD, rt, &info); 1510 } 1511 rt_newmsg(cmd, rt); 1512 rt_unref(rt); 1513 RT_REFCNT_TRACE(rt); 1514 break; 1515 } 1516 return error; 1517 } 1518 1519 /* 1520 * Create a local route entry for the address. 1521 * Announce the addition of the address and the route to the routing socket. 1522 */ 1523 int 1524 rt_ifa_addlocal(struct ifaddr *ifa) 1525 { 1526 struct rtentry *rt; 1527 int e; 1528 1529 /* If there is no loopback entry, allocate one. */ 1530 rt = rtalloc1(ifa->ifa_addr, 0); 1531 #ifdef RT_DEBUG 1532 if (rt != NULL) 1533 dump_rt(rt); 1534 #endif 1535 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1536 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1537 { 1538 struct rt_addrinfo info; 1539 struct rtentry *nrt; 1540 1541 memset(&info, 0, sizeof(info)); 1542 info.rti_flags = RTF_HOST | RTF_LOCAL; 1543 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))) 1544 info.rti_flags |= RTF_LLDATA; 1545 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1546 info.rti_info[RTAX_GATEWAY] = 1547 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1548 info.rti_ifa = ifa; 1549 nrt = NULL; 1550 e = rtrequest1(RTM_ADD, &info, &nrt); 1551 if (nrt && ifa != nrt->rt_ifa) 1552 rt_replace_ifa(nrt, ifa); 1553 rt_newaddrmsg(RTM_ADD, ifa, e, nrt); 1554 if (nrt != NULL) { 1555 #ifdef RT_DEBUG 1556 dump_rt(nrt); 1557 #endif 1558 rt_unref(nrt); 1559 RT_REFCNT_TRACE(nrt); 1560 } 1561 } else { 1562 e = 0; 1563 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL); 1564 } 1565 if (rt != NULL) 1566 rt_unref(rt); 1567 return e; 1568 } 1569 1570 /* 1571 * Remove the local route entry for the address. 1572 * Announce the removal of the address and the route to the routing socket. 1573 */ 1574 int 1575 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1576 { 1577 struct rtentry *rt; 1578 int e = 0; 1579 1580 rt = rtalloc1(ifa->ifa_addr, 0); 1581 1582 /* 1583 * Before deleting, check if a corresponding loopbacked 1584 * host route surely exists. With this check, we can avoid 1585 * deleting an interface direct route whose destination is 1586 * the same as the address being removed. This can happen 1587 * when removing a subnet-router anycast address on an 1588 * interface attached to a shared medium. 1589 */ 1590 if (rt != NULL && 1591 (rt->rt_flags & RTF_HOST) && 1592 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1593 { 1594 /* If we cannot replace the route's ifaddr with the equivalent 1595 * ifaddr of another interface, I believe it is safest to 1596 * delete the route. 1597 */ 1598 if (alt_ifa == NULL) { 1599 e = rtdeletemsg(rt); 1600 if (e == 0) { 1601 rt_unref(rt); 1602 rt_free(rt); 1603 rt = NULL; 1604 } 1605 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1606 } else { 1607 rt_replace_ifa(rt, alt_ifa); 1608 rt_newmsg(RTM_CHANGE, rt); 1609 } 1610 } else 1611 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1612 if (rt != NULL) 1613 rt_unref(rt); 1614 return e; 1615 } 1616 1617 /* 1618 * Route timer routines. These routes allow functions to be called 1619 * for various routes at any time. This is useful in supporting 1620 * path MTU discovery and redirect route deletion. 1621 * 1622 * This is similar to some BSDI internal functions, but it provides 1623 * for multiple queues for efficiency's sake... 1624 */ 1625 1626 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1627 static int rt_init_done = 0; 1628 1629 /* 1630 * Some subtle order problems with domain initialization mean that 1631 * we cannot count on this being run from rt_init before various 1632 * protocol initializations are done. Therefore, we make sure 1633 * that this is run when the first queue is added... 1634 */ 1635 1636 static void rt_timer_work(struct work *, void *); 1637 1638 static void 1639 rt_timer_init(void) 1640 { 1641 int error; 1642 1643 assert(rt_init_done == 0); 1644 1645 /* XXX should be in rt_init */ 1646 rw_init(&rt_lock); 1647 rw_init(&rtcache_lock); 1648 1649 LIST_INIT(&rttimer_queue_head); 1650 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1651 error = workqueue_create(&rt_timer_wq, "rt_timer", 1652 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1653 if (error) 1654 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1655 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1656 rt_init_done = 1; 1657 } 1658 1659 struct rttimer_queue * 1660 rt_timer_queue_create(u_int timeout) 1661 { 1662 struct rttimer_queue *rtq; 1663 1664 if (rt_init_done == 0) 1665 rt_timer_init(); 1666 1667 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1668 if (rtq == NULL) 1669 return NULL; 1670 memset(rtq, 0, sizeof(*rtq)); 1671 1672 rtq->rtq_timeout = timeout; 1673 TAILQ_INIT(&rtq->rtq_head); 1674 RT_WLOCK(); 1675 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1676 RT_UNLOCK(); 1677 1678 return rtq; 1679 } 1680 1681 void 1682 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1683 { 1684 1685 rtq->rtq_timeout = timeout; 1686 } 1687 1688 static void 1689 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1690 { 1691 struct rttimer *r; 1692 1693 RT_ASSERT_WLOCK(); 1694 1695 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1696 LIST_REMOVE(r, rtt_link); 1697 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1698 rt_ref(r->rtt_rt); /* XXX */ 1699 RT_REFCNT_TRACE(r->rtt_rt); 1700 RT_UNLOCK(); 1701 (*r->rtt_func)(r->rtt_rt, r); 1702 pool_put(&rttimer_pool, r); 1703 RT_WLOCK(); 1704 if (rtq->rtq_count > 0) 1705 rtq->rtq_count--; 1706 else 1707 printf("rt_timer_queue_remove_all: " 1708 "rtq_count reached 0\n"); 1709 } 1710 } 1711 1712 void 1713 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1714 { 1715 1716 RT_WLOCK(); 1717 rt_timer_queue_remove_all(rtq); 1718 LIST_REMOVE(rtq, rtq_link); 1719 RT_UNLOCK(); 1720 1721 /* 1722 * Caller is responsible for freeing the rttimer_queue structure. 1723 */ 1724 } 1725 1726 unsigned long 1727 rt_timer_count(struct rttimer_queue *rtq) 1728 { 1729 return rtq->rtq_count; 1730 } 1731 1732 static void 1733 rt_timer_remove_all(struct rtentry *rt) 1734 { 1735 struct rttimer *r; 1736 1737 RT_WLOCK(); 1738 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1739 LIST_REMOVE(r, rtt_link); 1740 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1741 if (r->rtt_queue->rtq_count > 0) 1742 r->rtt_queue->rtq_count--; 1743 else 1744 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1745 pool_put(&rttimer_pool, r); 1746 } 1747 RT_UNLOCK(); 1748 } 1749 1750 int 1751 rt_timer_add(struct rtentry *rt, 1752 void (*func)(struct rtentry *, struct rttimer *), 1753 struct rttimer_queue *queue) 1754 { 1755 struct rttimer *r; 1756 1757 KASSERT(func != NULL); 1758 RT_WLOCK(); 1759 /* 1760 * If there's already a timer with this action, destroy it before 1761 * we add a new one. 1762 */ 1763 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1764 if (r->rtt_func == func) 1765 break; 1766 } 1767 if (r != NULL) { 1768 LIST_REMOVE(r, rtt_link); 1769 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1770 if (r->rtt_queue->rtq_count > 0) 1771 r->rtt_queue->rtq_count--; 1772 else 1773 printf("rt_timer_add: rtq_count reached 0\n"); 1774 } else { 1775 r = pool_get(&rttimer_pool, PR_NOWAIT); 1776 if (r == NULL) { 1777 RT_UNLOCK(); 1778 return ENOBUFS; 1779 } 1780 } 1781 1782 memset(r, 0, sizeof(*r)); 1783 1784 r->rtt_rt = rt; 1785 r->rtt_time = time_uptime; 1786 r->rtt_func = func; 1787 r->rtt_queue = queue; 1788 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1789 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1790 r->rtt_queue->rtq_count++; 1791 1792 RT_UNLOCK(); 1793 1794 return 0; 1795 } 1796 1797 static void 1798 rt_timer_work(struct work *wk, void *arg) 1799 { 1800 struct rttimer_queue *rtq; 1801 struct rttimer *r; 1802 1803 RT_WLOCK(); 1804 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1805 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1806 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1807 LIST_REMOVE(r, rtt_link); 1808 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1809 rt_ref(r->rtt_rt); /* XXX */ 1810 RT_REFCNT_TRACE(r->rtt_rt); 1811 RT_UNLOCK(); 1812 (*r->rtt_func)(r->rtt_rt, r); 1813 pool_put(&rttimer_pool, r); 1814 RT_WLOCK(); 1815 if (rtq->rtq_count > 0) 1816 rtq->rtq_count--; 1817 else 1818 printf("rt_timer_timer: rtq_count reached 0\n"); 1819 } 1820 } 1821 RT_UNLOCK(); 1822 1823 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1824 } 1825 1826 static void 1827 rt_timer_timer(void *arg) 1828 { 1829 1830 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 1831 } 1832 1833 static struct rtentry * 1834 _rtcache_init(struct route *ro, int flag) 1835 { 1836 struct rtentry *rt; 1837 1838 rtcache_invariants(ro); 1839 KASSERT(ro->_ro_rt == NULL); 1840 RTCACHE_ASSERT_WLOCK(); 1841 1842 if (rtcache_getdst(ro) == NULL) 1843 return NULL; 1844 ro->ro_invalid = false; 1845 rt = rtalloc1(rtcache_getdst(ro), flag); 1846 if (rt != NULL && ISSET(rt->rt_flags, RTF_UP)) { 1847 ro->_ro_rt = rt; 1848 KASSERT(!ISSET(rt->rt_flags, RTF_UPDATING)); 1849 rtcache_ref(rt, ro); 1850 rt_unref(rt); 1851 rtcache(ro); 1852 } else if (rt != NULL) 1853 rt_unref(rt); 1854 1855 rtcache_invariants(ro); 1856 return ro->_ro_rt; 1857 } 1858 1859 struct rtentry * 1860 rtcache_init(struct route *ro) 1861 { 1862 struct rtentry *rt; 1863 RTCACHE_WLOCK(); 1864 rt = _rtcache_init(ro, 1); 1865 RTCACHE_UNLOCK(); 1866 return rt; 1867 } 1868 1869 struct rtentry * 1870 rtcache_init_noclone(struct route *ro) 1871 { 1872 struct rtentry *rt; 1873 RTCACHE_WLOCK(); 1874 rt = _rtcache_init(ro, 0); 1875 RTCACHE_UNLOCK(); 1876 return rt; 1877 } 1878 1879 struct rtentry * 1880 rtcache_update(struct route *ro, int clone) 1881 { 1882 struct rtentry *rt; 1883 RTCACHE_WLOCK(); 1884 rtcache_clear(ro); 1885 rt = _rtcache_init(ro, clone); 1886 RTCACHE_UNLOCK(); 1887 return rt; 1888 } 1889 1890 void 1891 rtcache_copy(struct route *new_ro, struct route *old_ro) 1892 { 1893 struct rtentry *rt; 1894 int ret; 1895 1896 KASSERT(new_ro != old_ro); 1897 rtcache_invariants(new_ro); 1898 rtcache_invariants(old_ro); 1899 1900 rt = rtcache_validate(old_ro); 1901 1902 if (rtcache_getdst(old_ro) == NULL) 1903 goto out; 1904 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 1905 if (ret != 0) 1906 goto out; 1907 1908 RTCACHE_WLOCK(); 1909 new_ro->ro_invalid = false; 1910 if ((new_ro->_ro_rt = rt) != NULL) 1911 rtcache(new_ro); 1912 rtcache_invariants(new_ro); 1913 RTCACHE_UNLOCK(); 1914 out: 1915 rtcache_unref(rt, old_ro); 1916 return; 1917 } 1918 1919 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist); 1920 1921 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 1922 static void 1923 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 1924 { 1925 char dst[64]; 1926 1927 sockaddr_format(ro->ro_sa, dst, 64); 1928 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 1929 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 1930 } 1931 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 1932 #else 1933 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 1934 #endif 1935 1936 static void 1937 rtcache_ref(struct rtentry *rt, struct route *ro) 1938 { 1939 1940 KASSERT(rt != NULL); 1941 1942 #ifdef NET_MPSAFE 1943 RTCACHE_PSREF_TRACE(rt, ro); 1944 ro->ro_bound = curlwp_bind(); 1945 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1946 #endif 1947 } 1948 1949 void 1950 rtcache_unref(struct rtentry *rt, struct route *ro) 1951 { 1952 1953 if (rt == NULL) 1954 return; 1955 1956 #ifdef NET_MPSAFE 1957 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1958 curlwp_bindx(ro->ro_bound); 1959 RTCACHE_PSREF_TRACE(rt, ro); 1960 #endif 1961 } 1962 1963 static struct rtentry * 1964 rtcache_validate_locked(struct route *ro) 1965 { 1966 struct rtentry *rt = NULL; 1967 1968 retry: 1969 rt = ro->_ro_rt; 1970 rtcache_invariants(ro); 1971 1972 if (ro->ro_invalid) { 1973 rt = NULL; 1974 goto out; 1975 } 1976 1977 RT_RLOCK(); 1978 if (rt != NULL && (rt->rt_flags & RTF_UP) != 0 && rt->rt_ifp != NULL) { 1979 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 1980 if (rt_wait_ok()) { 1981 RT_UNLOCK(); 1982 RTCACHE_UNLOCK(); 1983 /* We can wait until the update is complete */ 1984 rt_update_wait(); 1985 RTCACHE_RLOCK(); 1986 goto retry; 1987 } else { 1988 rt = NULL; 1989 } 1990 } else 1991 rtcache_ref(rt, ro); 1992 } else 1993 rt = NULL; 1994 RT_UNLOCK(); 1995 out: 1996 return rt; 1997 } 1998 1999 struct rtentry * 2000 rtcache_validate(struct route *ro) 2001 { 2002 struct rtentry *rt; 2003 2004 RTCACHE_RLOCK(); 2005 rt = rtcache_validate_locked(ro); 2006 RTCACHE_UNLOCK(); 2007 return rt; 2008 } 2009 2010 static void 2011 rtcache_invalidate(struct dom_rtlist *rtlist) 2012 { 2013 struct route *ro; 2014 2015 RTCACHE_ASSERT_WLOCK(); 2016 2017 while ((ro = LIST_FIRST(rtlist)) != NULL) { 2018 rtcache_invariants(ro); 2019 KASSERT(ro->_ro_rt != NULL); 2020 ro->ro_invalid = true; 2021 LIST_REMOVE(ro, ro_rtcache_next); 2022 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next); 2023 rtcache_invariants(ro); 2024 } 2025 } 2026 2027 static void 2028 rtcache_clear_rtentry(int family, struct rtentry *rt) 2029 { 2030 struct domain *dom; 2031 struct route *ro, *nro; 2032 2033 if ((dom = pffinddomain(family)) == NULL) 2034 return; 2035 2036 RTCACHE_WLOCK(); 2037 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) { 2038 if (ro->_ro_rt == rt) 2039 rtcache_clear(ro); 2040 } 2041 RTCACHE_UNLOCK(); 2042 } 2043 2044 static void 2045 rtcache_clear(struct route *ro) 2046 { 2047 2048 RTCACHE_ASSERT_WLOCK(); 2049 2050 rtcache_invariants(ro); 2051 if (ro->_ro_rt == NULL) 2052 return; 2053 2054 LIST_REMOVE(ro, ro_rtcache_next); 2055 2056 ro->_ro_rt = NULL; 2057 ro->ro_invalid = false; 2058 rtcache_invariants(ro); 2059 } 2060 2061 struct rtentry * 2062 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2063 int clone, int *hitp) 2064 { 2065 const struct sockaddr *odst; 2066 struct rtentry *rt = NULL; 2067 2068 RTCACHE_RLOCK(); 2069 odst = rtcache_getdst(ro); 2070 if (odst == NULL) { 2071 RTCACHE_UNLOCK(); 2072 RTCACHE_WLOCK(); 2073 goto miss; 2074 } 2075 2076 if (sockaddr_cmp(odst, dst) != 0) { 2077 RTCACHE_UNLOCK(); 2078 RTCACHE_WLOCK(); 2079 rtcache_free_locked(ro); 2080 goto miss; 2081 } 2082 2083 rt = rtcache_validate_locked(ro); 2084 if (rt == NULL) { 2085 RTCACHE_UNLOCK(); 2086 RTCACHE_WLOCK(); 2087 rtcache_clear(ro); 2088 goto miss; 2089 } 2090 2091 rtcache_invariants(ro); 2092 2093 RTCACHE_UNLOCK(); 2094 if (hitp != NULL) 2095 *hitp = 1; 2096 return rt; 2097 miss: 2098 if (hitp != NULL) 2099 *hitp = 0; 2100 if (rtcache_setdst_locked(ro, dst) == 0) 2101 rt = _rtcache_init(ro, clone); 2102 2103 rtcache_invariants(ro); 2104 2105 RTCACHE_UNLOCK(); 2106 return rt; 2107 } 2108 2109 static void 2110 rtcache_free_locked(struct route *ro) 2111 { 2112 2113 RTCACHE_ASSERT_WLOCK(); 2114 rtcache_clear(ro); 2115 if (ro->ro_sa != NULL) { 2116 sockaddr_free(ro->ro_sa); 2117 ro->ro_sa = NULL; 2118 } 2119 rtcache_invariants(ro); 2120 } 2121 2122 void 2123 rtcache_free(struct route *ro) 2124 { 2125 2126 RTCACHE_WLOCK(); 2127 rtcache_free_locked(ro); 2128 RTCACHE_UNLOCK(); 2129 } 2130 2131 static int 2132 rtcache_setdst_locked(struct route *ro, const struct sockaddr *sa) 2133 { 2134 KASSERT(sa != NULL); 2135 2136 RTCACHE_ASSERT_WLOCK(); 2137 2138 rtcache_invariants(ro); 2139 if (ro->ro_sa != NULL) { 2140 if (ro->ro_sa->sa_family == sa->sa_family) { 2141 rtcache_clear(ro); 2142 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2143 rtcache_invariants(ro); 2144 return 0; 2145 } 2146 /* free ro_sa, wrong family */ 2147 rtcache_free_locked(ro); 2148 } 2149 2150 KASSERT(ro->_ro_rt == NULL); 2151 2152 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2153 rtcache_invariants(ro); 2154 return ENOMEM; 2155 } 2156 rtcache_invariants(ro); 2157 return 0; 2158 } 2159 2160 int 2161 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2162 { 2163 int error; 2164 2165 RTCACHE_WLOCK(); 2166 error = rtcache_setdst_locked(ro, sa); 2167 RTCACHE_UNLOCK(); 2168 2169 return error; 2170 } 2171 2172 const struct sockaddr * 2173 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2174 { 2175 if (rt->rt_tag != tag) { 2176 if (rt->rt_tag != NULL) 2177 sockaddr_free(rt->rt_tag); 2178 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2179 } 2180 return rt->rt_tag; 2181 } 2182 2183 struct sockaddr * 2184 rt_gettag(const struct rtentry *rt) 2185 { 2186 return rt->rt_tag; 2187 } 2188 2189 int 2190 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2191 { 2192 2193 if ((rt->rt_flags & RTF_REJECT) != 0) { 2194 /* Mimic looutput */ 2195 if (ifp->if_flags & IFF_LOOPBACK) 2196 return (rt->rt_flags & RTF_HOST) ? 2197 EHOSTUNREACH : ENETUNREACH; 2198 else if (rt->rt_rmx.rmx_expire == 0 || 2199 time_uptime < rt->rt_rmx.rmx_expire) 2200 return (rt->rt_flags & RTF_GATEWAY) ? 2201 EHOSTUNREACH : EHOSTDOWN; 2202 } 2203 2204 return 0; 2205 } 2206 2207 void 2208 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2209 void *v) 2210 { 2211 2212 for (;;) { 2213 int s; 2214 int error; 2215 struct rtentry *rt, *retrt = NULL; 2216 2217 RT_RLOCK(); 2218 s = splsoftnet(); 2219 rt = rtbl_search_matched_entry(family, f, v); 2220 if (rt == NULL) { 2221 splx(s); 2222 RT_UNLOCK(); 2223 return; 2224 } 2225 rt->rt_refcnt++; 2226 splx(s); 2227 RT_UNLOCK(); 2228 2229 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2230 rt_mask(rt), rt->rt_flags, &retrt); 2231 if (error == 0) { 2232 KASSERT(retrt == rt); 2233 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2234 retrt->rt_ifp = NULL; 2235 rt_unref(rt); 2236 rt_free(retrt); 2237 } else if (error == ESRCH) { 2238 /* Someone deleted the entry already. */ 2239 rt_unref(rt); 2240 } else { 2241 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2242 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2243 /* XXX how to treat this case? */ 2244 } 2245 } 2246 } 2247 2248 int 2249 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2250 { 2251 int error; 2252 2253 RT_RLOCK(); 2254 error = rtbl_walktree(family, f, v); 2255 RT_UNLOCK(); 2256 2257 return error; 2258 } 2259 2260 #ifdef DDB 2261 2262 #include <machine/db_machdep.h> 2263 #include <ddb/db_interface.h> 2264 #include <ddb/db_output.h> 2265 2266 #define rt_expire rt_rmx.rmx_expire 2267 2268 static void 2269 db_print_sa(const struct sockaddr *sa) 2270 { 2271 int len; 2272 const u_char *p; 2273 2274 if (sa == NULL) { 2275 db_printf("[NULL]"); 2276 return; 2277 } 2278 2279 p = (const u_char *)sa; 2280 len = sa->sa_len; 2281 db_printf("["); 2282 while (len > 0) { 2283 db_printf("%d", *p); 2284 p++; len--; 2285 if (len) db_printf(","); 2286 } 2287 db_printf("]\n"); 2288 } 2289 2290 static void 2291 db_print_ifa(struct ifaddr *ifa) 2292 { 2293 if (ifa == NULL) 2294 return; 2295 db_printf(" ifa_addr="); 2296 db_print_sa(ifa->ifa_addr); 2297 db_printf(" ifa_dsta="); 2298 db_print_sa(ifa->ifa_dstaddr); 2299 db_printf(" ifa_mask="); 2300 db_print_sa(ifa->ifa_netmask); 2301 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2302 ifa->ifa_flags, 2303 ifa->ifa_refcnt, 2304 ifa->ifa_metric); 2305 } 2306 2307 /* 2308 * Function to pass to rt_walktree(). 2309 * Return non-zero error to abort walk. 2310 */ 2311 static int 2312 db_show_rtentry(struct rtentry *rt, void *w) 2313 { 2314 db_printf("rtentry=%p", rt); 2315 2316 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2317 rt->rt_flags, rt->rt_refcnt, 2318 rt->rt_use, (uint64_t)rt->rt_expire); 2319 2320 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2321 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2322 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2323 2324 db_printf(" ifp=%p ", rt->rt_ifp); 2325 if (rt->rt_ifp) 2326 db_printf("(%s)", rt->rt_ifp->if_xname); 2327 else 2328 db_printf("(NULL)"); 2329 2330 db_printf(" ifa=%p\n", rt->rt_ifa); 2331 db_print_ifa(rt->rt_ifa); 2332 2333 db_printf(" gwroute=%p llinfo=%p\n", 2334 rt->rt_gwroute, rt->rt_llinfo); 2335 2336 return 0; 2337 } 2338 2339 /* 2340 * Function to print all the route trees. 2341 * Use this from ddb: "show routes" 2342 */ 2343 void 2344 db_show_routes(db_expr_t addr, bool have_addr, 2345 db_expr_t count, const char *modif) 2346 { 2347 rt_walktree(AF_INET, db_show_rtentry, NULL); 2348 } 2349 #endif 2350