1 /* $NetBSD: route.c,v 1.190 2017/02/10 13:48:06 ozaki-r Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.190 2017/02/10 13:48:06 ozaki-r Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/kernel.h> 114 #include <sys/ioctl.h> 115 #include <sys/pool.h> 116 #include <sys/kauth.h> 117 #include <sys/workqueue.h> 118 #include <sys/syslog.h> 119 #include <sys/rwlock.h> 120 #include <sys/mutex.h> 121 #include <sys/cpu.h> 122 123 #include <net/if.h> 124 #include <net/if_dl.h> 125 #include <net/route.h> 126 127 #include <netinet/in.h> 128 #include <netinet/in_var.h> 129 130 #ifdef RTFLUSH_DEBUG 131 #define rtcache_debug() __predict_false(_rtcache_debug) 132 #else /* RTFLUSH_DEBUG */ 133 #define rtcache_debug() 0 134 #endif /* RTFLUSH_DEBUG */ 135 136 #ifdef RT_DEBUG 137 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 138 __func__, __LINE__, (rt), (rt)->rt_refcnt) 139 #else 140 #define RT_REFCNT_TRACE(rt) do {} while (0) 141 #endif 142 143 #ifdef DEBUG 144 #define dlog(level, fmt, args...) log(level, fmt, ##args) 145 #else 146 #define dlog(level, fmt, args...) do {} while (0) 147 #endif 148 149 struct rtstat rtstat; 150 151 static int rttrash; /* routes not in table but not freed */ 152 153 static struct pool rtentry_pool; 154 static struct pool rttimer_pool; 155 156 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 157 static struct workqueue *rt_timer_wq; 158 static struct work rt_timer_wk; 159 160 static void rt_timer_init(void); 161 static void rt_timer_queue_remove_all(struct rttimer_queue *); 162 static void rt_timer_remove_all(struct rtentry *); 163 static void rt_timer_timer(void *); 164 165 /* 166 * Locking notes: 167 * - The routing table is protected by a global rwlock 168 * - API: RT_RLOCK and friends 169 * - rtcaches are protected by a global rwlock 170 * - API: RTCACHE_RLOCK and friends 171 * - References to a rtentry is managed by reference counting and psref 172 * - Reference couting is used for temporal reference when a rtentry 173 * is fetched from the routing table 174 * - psref is used for temporal reference when a rtentry is fetched 175 * from a rtcache 176 * - struct route (rtcache) has struct psref, so we cannot obtain 177 * a reference twice on the same struct route 178 * - Befere destroying or updating a rtentry, we have to wait for 179 * all references left (see below for details) 180 * - APIs 181 * - An obtained rtentry via rtalloc1 or rtrequest* must be 182 * unreferenced by rt_unref 183 * - An obtained rtentry via rtcache_* must be unreferenced by 184 * rtcache_unref 185 * - TODO: once we get a lockless routing table, we should use only 186 * psref for rtentries 187 * - rtentry destruction 188 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 189 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 190 * has a responsibility to destroy the rtentry by itself by calling 191 * rt_free 192 * - If not, rtrequest itself does that 193 * - If rt_free is called in softint, the actual destruction routine is 194 * deferred to a workqueue 195 * - rtentry update 196 * - When updating a rtentry, RTF_UPDATING flag is set 197 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 198 * the routing table or a rtcache results in either of the following 199 * cases: 200 * - if the caller runs in softint, the caller fails to fetch 201 * - otherwise, the caller waits for the update completed and retries 202 * to fetch (probably succeed to fetch for the second time) 203 */ 204 205 /* 206 * Global locks for the routing table and rtcaches. 207 * Locking order: rtcache_lock => rt_lock 208 */ 209 static krwlock_t rt_lock __cacheline_aligned; 210 #ifdef NET_MPSAFE 211 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 212 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 213 #define RT_UNLOCK() rw_exit(&rt_lock) 214 #define RT_LOCKED() rw_lock_held(&rt_lock) 215 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 216 #else 217 #define RT_RLOCK() do {} while (0) 218 #define RT_WLOCK() do {} while (0) 219 #define RT_UNLOCK() do {} while (0) 220 #define RT_LOCKED() false 221 #define RT_ASSERT_WLOCK() do {} while (0) 222 #endif 223 224 static krwlock_t rtcache_lock __cacheline_aligned; 225 #ifdef NET_MPSAFE 226 #define RTCACHE_RLOCK() rw_enter(&rtcache_lock, RW_READER) 227 #define RTCACHE_WLOCK() rw_enter(&rtcache_lock, RW_WRITER) 228 #define RTCACHE_UNLOCK() rw_exit(&rtcache_lock) 229 #define RTCACHE_ASSERT_WLOCK() KASSERT(rw_write_held(&rtcache_lock)) 230 #define RTCACHE_WLOCKED() rw_write_held(&rtcache_lock) 231 #else 232 #define RTCACHE_RLOCK() do {} while (0) 233 #define RTCACHE_WLOCK() do {} while (0) 234 #define RTCACHE_UNLOCK() do {} while (0) 235 #define RTCACHE_ASSERT_WLOCK() do {} while (0) 236 #define RTCACHE_WLOCKED() false 237 #endif 238 239 /* 240 * mutex and cv that are used to wait for references to a rtentry left 241 * before updating the rtentry. 242 */ 243 static struct { 244 kmutex_t lock; 245 kcondvar_t cv; 246 bool ongoing; 247 const struct lwp *lwp; 248 } rt_update_global __cacheline_aligned; 249 250 /* 251 * A workqueue and stuff that are used to defer the destruction routine 252 * of rtentries. 253 */ 254 static struct { 255 struct workqueue *wq; 256 struct work wk; 257 kmutex_t lock; 258 struct rtentry *queue[10]; 259 } rt_free_global __cacheline_aligned; 260 261 /* psref for rtentry */ 262 static struct psref_class *rt_psref_class __read_mostly; 263 264 #ifdef RTFLUSH_DEBUG 265 static int _rtcache_debug = 0; 266 #endif /* RTFLUSH_DEBUG */ 267 268 static kauth_listener_t route_listener; 269 270 static int rtdeletemsg(struct rtentry *); 271 static void rtflushall(int); 272 273 static void rt_maskedcopy(const struct sockaddr *, 274 struct sockaddr *, const struct sockaddr *); 275 276 static void rtcache_clear(struct route *); 277 static void rtcache_clear_rtentry(int, struct rtentry *); 278 static void rtcache_invalidate(struct dom_rtlist *); 279 280 static void rt_ref(struct rtentry *); 281 282 static struct rtentry * 283 rtalloc1_locked(const struct sockaddr *, int, bool); 284 static struct rtentry * 285 rtcache_validate_locked(struct route *); 286 static void rtcache_free_locked(struct route *); 287 static int rtcache_setdst_locked(struct route *, const struct sockaddr *); 288 289 static void rtcache_ref(struct rtentry *, struct route *); 290 291 #ifdef NET_MPSAFE 292 static void rt_update_wait(void); 293 #endif 294 295 static bool rt_wait_ok(void); 296 static void rt_wait_refcnt(const char *, struct rtentry *, int); 297 static void rt_wait_psref(struct rtentry *); 298 299 #ifdef DDB 300 static void db_print_sa(const struct sockaddr *); 301 static void db_print_ifa(struct ifaddr *); 302 static int db_show_rtentry(struct rtentry *, void *); 303 #endif 304 305 #ifdef RTFLUSH_DEBUG 306 static void sysctl_net_rtcache_setup(struct sysctllog **); 307 static void 308 sysctl_net_rtcache_setup(struct sysctllog **clog) 309 { 310 const struct sysctlnode *rnode; 311 312 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 313 CTLTYPE_NODE, 314 "rtcache", SYSCTL_DESCR("Route cache related settings"), 315 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 316 return; 317 if (sysctl_createv(clog, 0, &rnode, &rnode, 318 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 319 "debug", SYSCTL_DESCR("Debug route caches"), 320 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 321 return; 322 } 323 #endif /* RTFLUSH_DEBUG */ 324 325 static inline void 326 rt_destroy(struct rtentry *rt) 327 { 328 if (rt->_rt_key != NULL) 329 sockaddr_free(rt->_rt_key); 330 if (rt->rt_gateway != NULL) 331 sockaddr_free(rt->rt_gateway); 332 if (rt_gettag(rt) != NULL) 333 sockaddr_free(rt_gettag(rt)); 334 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 335 } 336 337 static inline const struct sockaddr * 338 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 339 { 340 if (rt->_rt_key == key) 341 goto out; 342 343 if (rt->_rt_key != NULL) 344 sockaddr_free(rt->_rt_key); 345 rt->_rt_key = sockaddr_dup(key, flags); 346 out: 347 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 348 return rt->_rt_key; 349 } 350 351 struct ifaddr * 352 rt_get_ifa(struct rtentry *rt) 353 { 354 struct ifaddr *ifa; 355 356 if ((ifa = rt->rt_ifa) == NULL) 357 return ifa; 358 else if (ifa->ifa_getifa == NULL) 359 return ifa; 360 #if 0 361 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 362 return ifa; 363 #endif 364 else { 365 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 366 if (ifa == NULL) 367 return NULL; 368 rt_replace_ifa(rt, ifa); 369 return ifa; 370 } 371 } 372 373 static void 374 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 375 { 376 rt->rt_ifa = ifa; 377 if (ifa->ifa_seqno != NULL) 378 rt->rt_ifa_seqno = *ifa->ifa_seqno; 379 } 380 381 /* 382 * Is this route the connected route for the ifa? 383 */ 384 static int 385 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 386 { 387 const struct sockaddr *key, *dst, *odst; 388 struct sockaddr_storage maskeddst; 389 390 key = rt_getkey(rt); 391 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 392 if (dst == NULL || 393 dst->sa_family != key->sa_family || 394 dst->sa_len != key->sa_len) 395 return 0; 396 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 397 odst = dst; 398 dst = (struct sockaddr *)&maskeddst; 399 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 400 ifa->ifa_netmask); 401 } 402 return (memcmp(dst, key, dst->sa_len) == 0); 403 } 404 405 void 406 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 407 { 408 if (rt->rt_ifa && 409 rt->rt_ifa != ifa && 410 rt->rt_ifa->ifa_flags & IFA_ROUTE && 411 rt_ifa_connected(rt, rt->rt_ifa)) 412 { 413 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 414 "replace deleted IFA_ROUTE\n", 415 (void *)rt->_rt_key, (void *)rt->rt_ifa); 416 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 417 if (rt_ifa_connected(rt, ifa)) { 418 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 419 "replace added IFA_ROUTE\n", 420 (void *)rt->_rt_key, (void *)ifa); 421 ifa->ifa_flags |= IFA_ROUTE; 422 } 423 } 424 425 ifaref(ifa); 426 ifafree(rt->rt_ifa); 427 rt_set_ifa1(rt, ifa); 428 } 429 430 static void 431 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 432 { 433 ifaref(ifa); 434 rt_set_ifa1(rt, ifa); 435 } 436 437 static int 438 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 439 void *arg0, void *arg1, void *arg2, void *arg3) 440 { 441 struct rt_msghdr *rtm; 442 int result; 443 444 result = KAUTH_RESULT_DEFER; 445 rtm = arg1; 446 447 if (action != KAUTH_NETWORK_ROUTE) 448 return result; 449 450 if (rtm->rtm_type == RTM_GET) 451 result = KAUTH_RESULT_ALLOW; 452 453 return result; 454 } 455 456 static void rt_free_work(struct work *, void *); 457 458 void 459 rt_init(void) 460 { 461 int error; 462 463 #ifdef RTFLUSH_DEBUG 464 sysctl_net_rtcache_setup(NULL); 465 #endif 466 467 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 468 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 469 470 error = workqueue_create(&rt_free_global.wq, "rt_free", 471 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 472 if (error) 473 panic("%s: workqueue_create failed (%d)\n", __func__, error); 474 475 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 476 cv_init(&rt_update_global.cv, "rt_update"); 477 478 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 479 NULL, IPL_SOFTNET); 480 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 481 NULL, IPL_SOFTNET); 482 483 rn_init(); /* initialize all zeroes, all ones, mask table */ 484 rtbl_init(); 485 486 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 487 route_listener_cb, NULL); 488 } 489 490 static void 491 rtflushall(int family) 492 { 493 struct domain *dom; 494 495 if (rtcache_debug()) 496 printf("%s: enter\n", __func__); 497 498 if ((dom = pffinddomain(family)) == NULL) 499 return; 500 501 RTCACHE_WLOCK(); 502 rtcache_invalidate(&dom->dom_rtcache); 503 RTCACHE_UNLOCK(); 504 } 505 506 static void 507 rtcache(struct route *ro) 508 { 509 struct domain *dom; 510 511 RTCACHE_ASSERT_WLOCK(); 512 513 rtcache_invariants(ro); 514 KASSERT(ro->_ro_rt != NULL); 515 KASSERT(ro->ro_invalid == false); 516 KASSERT(rtcache_getdst(ro) != NULL); 517 518 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL) 519 return; 520 521 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next); 522 rtcache_invariants(ro); 523 } 524 525 #ifdef RT_DEBUG 526 static void 527 dump_rt(const struct rtentry *rt) 528 { 529 char buf[512]; 530 531 aprint_normal("rt: "); 532 aprint_normal("p=%p ", rt); 533 if (rt->_rt_key == NULL) { 534 aprint_normal("dst=(NULL) "); 535 } else { 536 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 537 aprint_normal("dst=%s ", buf); 538 } 539 if (rt->rt_gateway == NULL) { 540 aprint_normal("gw=(NULL) "); 541 } else { 542 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 543 aprint_normal("gw=%s ", buf); 544 } 545 aprint_normal("flags=%x ", rt->rt_flags); 546 if (rt->rt_ifp == NULL) { 547 aprint_normal("if=(NULL) "); 548 } else { 549 aprint_normal("if=%s ", rt->rt_ifp->if_xname); 550 } 551 aprint_normal("\n"); 552 } 553 #endif /* RT_DEBUG */ 554 555 /* 556 * Packet routing routines. If success, refcnt of a returned rtentry 557 * will be incremented. The caller has to rtfree it by itself. 558 */ 559 struct rtentry * 560 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok) 561 { 562 rtbl_t *rtbl; 563 struct rtentry *rt; 564 int s; 565 566 #ifdef NET_MPSAFE 567 retry: 568 #endif 569 s = splsoftnet(); 570 rtbl = rt_gettable(dst->sa_family); 571 if (rtbl == NULL) 572 goto miss; 573 574 rt = rt_matchaddr(rtbl, dst); 575 if (rt == NULL) 576 goto miss; 577 578 if (!ISSET(rt->rt_flags, RTF_UP)) 579 goto miss; 580 581 #ifdef NET_MPSAFE 582 if (ISSET(rt->rt_flags, RTF_UPDATING) && 583 /* XXX updater should be always able to acquire */ 584 curlwp != rt_update_global.lwp) { 585 bool need_lock = false; 586 if (!wait_ok || !rt_wait_ok()) 587 goto miss; 588 RT_UNLOCK(); 589 splx(s); 590 591 /* XXX need more proper solution */ 592 if (RTCACHE_WLOCKED()) { 593 RTCACHE_UNLOCK(); 594 need_lock = true; 595 } 596 597 /* We can wait until the update is complete */ 598 rt_update_wait(); 599 600 if (need_lock) 601 RTCACHE_WLOCK(); 602 goto retry; 603 } 604 #endif /* NET_MPSAFE */ 605 606 rt_ref(rt); 607 RT_REFCNT_TRACE(rt); 608 609 splx(s); 610 return rt; 611 miss: 612 rtstat.rts_unreach++; 613 if (report) { 614 struct rt_addrinfo info; 615 616 memset(&info, 0, sizeof(info)); 617 info.rti_info[RTAX_DST] = dst; 618 rt_missmsg(RTM_MISS, &info, 0, 0); 619 } 620 splx(s); 621 return NULL; 622 } 623 624 struct rtentry * 625 rtalloc1(const struct sockaddr *dst, int report) 626 { 627 struct rtentry *rt; 628 629 RT_RLOCK(); 630 rt = rtalloc1_locked(dst, report, true); 631 RT_UNLOCK(); 632 633 return rt; 634 } 635 636 static void 637 rt_ref(struct rtentry *rt) 638 { 639 640 KASSERT(rt->rt_refcnt >= 0); 641 atomic_inc_uint(&rt->rt_refcnt); 642 } 643 644 void 645 rt_unref(struct rtentry *rt) 646 { 647 648 KASSERT(rt != NULL); 649 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 650 651 atomic_dec_uint(&rt->rt_refcnt); 652 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 653 mutex_enter(&rt_free_global.lock); 654 cv_broadcast(&rt->rt_cv); 655 mutex_exit(&rt_free_global.lock); 656 } 657 } 658 659 static bool 660 rt_wait_ok(void) 661 { 662 663 KASSERT(!cpu_intr_p()); 664 return !cpu_softintr_p(); 665 } 666 667 void 668 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 669 { 670 mutex_enter(&rt_free_global.lock); 671 while (rt->rt_refcnt > cnt) { 672 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 673 __func__, title, rt->rt_refcnt); 674 cv_wait(&rt->rt_cv, &rt_free_global.lock); 675 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 676 __func__, title, rt->rt_refcnt); 677 } 678 mutex_exit(&rt_free_global.lock); 679 } 680 681 void 682 rt_wait_psref(struct rtentry *rt) 683 { 684 685 psref_target_destroy(&rt->rt_psref, rt_psref_class); 686 psref_target_init(&rt->rt_psref, rt_psref_class); 687 } 688 689 static void 690 _rt_free(struct rtentry *rt) 691 { 692 struct ifaddr *ifa; 693 694 /* 695 * Need to avoid a deadlock on rt_wait_refcnt of update 696 * and a conflict on psref_target_destroy of update. 697 */ 698 #ifdef NET_MPSAFE 699 rt_update_wait(); 700 #endif 701 702 RT_REFCNT_TRACE(rt); 703 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 704 rt_wait_refcnt("free", rt, 0); 705 #ifdef NET_MPSAFE 706 psref_target_destroy(&rt->rt_psref, rt_psref_class); 707 #endif 708 709 rt_assert_inactive(rt); 710 rttrash--; 711 ifa = rt->rt_ifa; 712 rt->rt_ifa = NULL; 713 ifafree(ifa); 714 rt->rt_ifp = NULL; 715 cv_destroy(&rt->rt_cv); 716 rt_destroy(rt); 717 pool_put(&rtentry_pool, rt); 718 } 719 720 static void 721 rt_free_work(struct work *wk, void *arg) 722 { 723 int i; 724 struct rtentry *rt; 725 726 restart: 727 mutex_enter(&rt_free_global.lock); 728 for (i = 0; i < sizeof(rt_free_global.queue); i++) { 729 if (rt_free_global.queue[i] == NULL) 730 continue; 731 rt = rt_free_global.queue[i]; 732 rt_free_global.queue[i] = NULL; 733 mutex_exit(&rt_free_global.lock); 734 735 atomic_dec_uint(&rt->rt_refcnt); 736 _rt_free(rt); 737 goto restart; 738 } 739 mutex_exit(&rt_free_global.lock); 740 } 741 742 void 743 rt_free(struct rtentry *rt) 744 { 745 746 KASSERT(rt->rt_refcnt > 0); 747 if (!rt_wait_ok()) { 748 int i; 749 mutex_enter(&rt_free_global.lock); 750 for (i = 0; i < sizeof(rt_free_global.queue); i++) { 751 if (rt_free_global.queue[i] == NULL) { 752 rt_free_global.queue[i] = rt; 753 break; 754 } 755 } 756 KASSERT(i < sizeof(rt_free_global.queue)); 757 rt_ref(rt); 758 mutex_exit(&rt_free_global.lock); 759 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 760 } else { 761 atomic_dec_uint(&rt->rt_refcnt); 762 _rt_free(rt); 763 } 764 } 765 766 #ifdef NET_MPSAFE 767 static void 768 rt_update_wait(void) 769 { 770 771 mutex_enter(&rt_update_global.lock); 772 while (rt_update_global.ongoing) { 773 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 774 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 775 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 776 } 777 mutex_exit(&rt_update_global.lock); 778 } 779 #endif 780 781 int 782 rt_update_prepare(struct rtentry *rt) 783 { 784 785 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 786 787 RTCACHE_WLOCK(); 788 RT_WLOCK(); 789 /* If the entry is being destroyed, don't proceed the update. */ 790 if (!ISSET(rt->rt_flags, RTF_UP)) { 791 RT_UNLOCK(); 792 RTCACHE_UNLOCK(); 793 return -1; 794 } 795 rt->rt_flags |= RTF_UPDATING; 796 RT_UNLOCK(); 797 RTCACHE_UNLOCK(); 798 799 mutex_enter(&rt_update_global.lock); 800 while (rt_update_global.ongoing) { 801 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 802 __func__, rt, curlwp); 803 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 804 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 805 __func__, rt, curlwp); 806 } 807 rt_update_global.ongoing = true; 808 /* XXX need it to avoid rt_update_wait by updater itself. */ 809 rt_update_global.lwp = curlwp; 810 mutex_exit(&rt_update_global.lock); 811 812 rt_wait_refcnt("update", rt, 1); 813 rt_wait_psref(rt); 814 815 return 0; 816 } 817 818 void 819 rt_update_finish(struct rtentry *rt) 820 { 821 822 RTCACHE_WLOCK(); 823 RT_WLOCK(); 824 rt->rt_flags &= ~RTF_UPDATING; 825 RT_UNLOCK(); 826 RTCACHE_UNLOCK(); 827 828 mutex_enter(&rt_update_global.lock); 829 rt_update_global.ongoing = false; 830 rt_update_global.lwp = NULL; 831 cv_broadcast(&rt_update_global.cv); 832 mutex_exit(&rt_update_global.lock); 833 834 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 835 } 836 837 /* 838 * Force a routing table entry to the specified 839 * destination to go through the given gateway. 840 * Normally called as a result of a routing redirect 841 * message from the network layer. 842 * 843 * N.B.: must be called at splsoftnet 844 */ 845 void 846 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 847 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 848 struct rtentry **rtp) 849 { 850 struct rtentry *rt; 851 int error = 0; 852 uint64_t *stat = NULL; 853 struct rt_addrinfo info; 854 struct ifaddr *ifa; 855 struct psref psref; 856 857 /* verify the gateway is directly reachable */ 858 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 859 error = ENETUNREACH; 860 goto out; 861 } 862 rt = rtalloc1(dst, 0); 863 /* 864 * If the redirect isn't from our current router for this dst, 865 * it's either old or wrong. If it redirects us to ourselves, 866 * we have a routing loop, perhaps as a result of an interface 867 * going down recently. 868 */ 869 if (!(flags & RTF_DONE) && rt && 870 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 871 error = EINVAL; 872 else { 873 int s = pserialize_read_enter(); 874 struct ifaddr *_ifa; 875 876 _ifa = ifa_ifwithaddr(gateway); 877 if (_ifa != NULL) 878 error = EHOSTUNREACH; 879 pserialize_read_exit(s); 880 } 881 if (error) 882 goto done; 883 /* 884 * Create a new entry if we just got back a wildcard entry 885 * or the lookup failed. This is necessary for hosts 886 * which use routing redirects generated by smart gateways 887 * to dynamically build the routing tables. 888 */ 889 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 890 goto create; 891 /* 892 * Don't listen to the redirect if it's 893 * for a route to an interface. 894 */ 895 if (rt->rt_flags & RTF_GATEWAY) { 896 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 897 /* 898 * Changing from route to net => route to host. 899 * Create new route, rather than smashing route to net. 900 */ 901 create: 902 if (rt != NULL) 903 rt_unref(rt); 904 flags |= RTF_GATEWAY | RTF_DYNAMIC; 905 memset(&info, 0, sizeof(info)); 906 info.rti_info[RTAX_DST] = dst; 907 info.rti_info[RTAX_GATEWAY] = gateway; 908 info.rti_info[RTAX_NETMASK] = netmask; 909 info.rti_ifa = ifa; 910 info.rti_flags = flags; 911 rt = NULL; 912 error = rtrequest1(RTM_ADD, &info, &rt); 913 if (rt != NULL) 914 flags = rt->rt_flags; 915 stat = &rtstat.rts_dynamic; 916 } else { 917 /* 918 * Smash the current notion of the gateway to 919 * this destination. Should check about netmask!!! 920 */ 921 #ifdef NET_MPSAFE 922 KASSERT(!cpu_softintr_p()); 923 924 error = rt_update_prepare(rt); 925 if (error == 0) { 926 #endif 927 error = rt_setgate(rt, gateway); 928 if (error == 0) { 929 rt->rt_flags |= RTF_MODIFIED; 930 flags |= RTF_MODIFIED; 931 } 932 #ifdef NET_MPSAFE 933 rt_update_finish(rt); 934 } else { 935 /* 936 * If error != 0, the rtentry is being 937 * destroyed, so doing nothing doesn't 938 * matter. 939 */ 940 } 941 #endif 942 stat = &rtstat.rts_newgateway; 943 } 944 } else 945 error = EHOSTUNREACH; 946 done: 947 if (rt) { 948 if (rtp != NULL && !error) 949 *rtp = rt; 950 else 951 rt_unref(rt); 952 } 953 out: 954 if (error) 955 rtstat.rts_badredirect++; 956 else if (stat != NULL) 957 (*stat)++; 958 memset(&info, 0, sizeof(info)); 959 info.rti_info[RTAX_DST] = dst; 960 info.rti_info[RTAX_GATEWAY] = gateway; 961 info.rti_info[RTAX_NETMASK] = netmask; 962 info.rti_info[RTAX_AUTHOR] = src; 963 rt_missmsg(RTM_REDIRECT, &info, flags, error); 964 ifa_release(ifa, &psref); 965 } 966 967 /* 968 * Delete a route and generate a message. 969 * It doesn't free a passed rt. 970 */ 971 static int 972 rtdeletemsg(struct rtentry *rt) 973 { 974 int error; 975 struct rt_addrinfo info; 976 struct rtentry *retrt; 977 978 /* 979 * Request the new route so that the entry is not actually 980 * deleted. That will allow the information being reported to 981 * be accurate (and consistent with route_output()). 982 */ 983 memset(&info, 0, sizeof(info)); 984 info.rti_info[RTAX_DST] = rt_getkey(rt); 985 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 986 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 987 info.rti_flags = rt->rt_flags; 988 error = rtrequest1(RTM_DELETE, &info, &retrt); 989 990 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 991 992 return error; 993 } 994 995 struct ifaddr * 996 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 997 const struct sockaddr *gateway, struct psref *psref) 998 { 999 struct ifaddr *ifa = NULL; 1000 1001 if ((flags & RTF_GATEWAY) == 0) { 1002 /* 1003 * If we are adding a route to an interface, 1004 * and the interface is a pt to pt link 1005 * we should search for the destination 1006 * as our clue to the interface. Otherwise 1007 * we can use the local address. 1008 */ 1009 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 1010 ifa = ifa_ifwithdstaddr_psref(dst, psref); 1011 if (ifa == NULL) 1012 ifa = ifa_ifwithaddr_psref(gateway, psref); 1013 } else { 1014 /* 1015 * If we are adding a route to a remote net 1016 * or host, the gateway may still be on the 1017 * other end of a pt to pt link. 1018 */ 1019 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 1020 } 1021 if (ifa == NULL) 1022 ifa = ifa_ifwithnet_psref(gateway, psref); 1023 if (ifa == NULL) { 1024 int s; 1025 struct rtentry *rt; 1026 1027 /* XXX we cannot call rtalloc1 if holding the rt lock */ 1028 if (RT_LOCKED()) 1029 rt = rtalloc1_locked(dst, 0, true); 1030 else 1031 rt = rtalloc1(dst, 0); 1032 if (rt == NULL) 1033 return NULL; 1034 /* 1035 * Just in case. May not need to do this workaround. 1036 * Revisit when working on rtentry MP-ification. 1037 */ 1038 s = pserialize_read_enter(); 1039 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1040 if (ifa == rt->rt_ifa) 1041 break; 1042 } 1043 if (ifa != NULL) 1044 ifa_acquire(ifa, psref); 1045 pserialize_read_exit(s); 1046 rt_unref(rt); 1047 if (ifa == NULL) 1048 return NULL; 1049 } 1050 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1051 struct ifaddr *nifa; 1052 int s; 1053 1054 s = pserialize_read_enter(); 1055 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1056 if (nifa != NULL) { 1057 ifa_release(ifa, psref); 1058 ifa_acquire(nifa, psref); 1059 ifa = nifa; 1060 } 1061 pserialize_read_exit(s); 1062 } 1063 return ifa; 1064 } 1065 1066 /* 1067 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1068 * The caller has to rtfree it by itself. 1069 */ 1070 int 1071 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1072 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1073 { 1074 struct rt_addrinfo info; 1075 1076 memset(&info, 0, sizeof(info)); 1077 info.rti_flags = flags; 1078 info.rti_info[RTAX_DST] = dst; 1079 info.rti_info[RTAX_GATEWAY] = gateway; 1080 info.rti_info[RTAX_NETMASK] = netmask; 1081 return rtrequest1(req, &info, ret_nrt); 1082 } 1083 1084 /* 1085 * It's a utility function to add/remove a route to/from the routing table 1086 * and tell user processes the addition/removal on success. 1087 */ 1088 int 1089 rtrequest_newmsg(const int req, const struct sockaddr *dst, 1090 const struct sockaddr *gateway, const struct sockaddr *netmask, 1091 const int flags) 1092 { 1093 int error; 1094 struct rtentry *ret_nrt = NULL; 1095 1096 KASSERT(req == RTM_ADD || req == RTM_DELETE); 1097 1098 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt); 1099 if (error != 0) 1100 return error; 1101 1102 KASSERT(ret_nrt != NULL); 1103 1104 rt_newmsg(req, ret_nrt); /* tell user process */ 1105 if (req == RTM_DELETE) 1106 rt_free(ret_nrt); 1107 else 1108 rt_unref(ret_nrt); 1109 1110 return 0; 1111 } 1112 1113 struct ifnet * 1114 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1115 { 1116 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1117 1118 if (info->rti_ifp != NULL) 1119 return NULL; 1120 /* 1121 * ifp may be specified by sockaddr_dl when protocol address 1122 * is ambiguous 1123 */ 1124 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1125 struct ifaddr *ifa; 1126 int s = pserialize_read_enter(); 1127 1128 ifa = ifa_ifwithnet(ifpaddr); 1129 if (ifa != NULL) 1130 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1131 psref); 1132 pserialize_read_exit(s); 1133 } 1134 1135 return info->rti_ifp; 1136 } 1137 1138 struct ifaddr * 1139 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1140 { 1141 struct ifaddr *ifa = NULL; 1142 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1143 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1144 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1145 int flags = info->rti_flags; 1146 const struct sockaddr *sa; 1147 1148 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1149 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1150 if (ifa != NULL) 1151 goto got; 1152 } 1153 1154 sa = ifaaddr != NULL ? ifaaddr : 1155 (gateway != NULL ? gateway : dst); 1156 if (sa != NULL && info->rti_ifp != NULL) 1157 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1158 else if (dst != NULL && gateway != NULL) 1159 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1160 else if (sa != NULL) 1161 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1162 if (ifa == NULL) 1163 return NULL; 1164 got: 1165 if (ifa->ifa_getifa != NULL) { 1166 /* FIXME NOMPSAFE */ 1167 ifa = (*ifa->ifa_getifa)(ifa, dst); 1168 if (ifa == NULL) 1169 return NULL; 1170 ifa_acquire(ifa, psref); 1171 } 1172 info->rti_ifa = ifa; 1173 if (info->rti_ifp == NULL) 1174 info->rti_ifp = ifa->ifa_ifp; 1175 return ifa; 1176 } 1177 1178 /* 1179 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1180 * The caller has to rtfree it by itself. 1181 */ 1182 int 1183 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1184 { 1185 int s = splsoftnet(), ss; 1186 int error = 0, rc; 1187 struct rtentry *rt; 1188 rtbl_t *rtbl; 1189 struct ifaddr *ifa = NULL, *ifa2 = NULL; 1190 struct sockaddr_storage maskeddst; 1191 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1192 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1193 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1194 int flags = info->rti_flags; 1195 struct psref psref_ifp, psref_ifa; 1196 int bound = 0; 1197 struct ifnet *ifp = NULL; 1198 bool need_to_release_ifa = true; 1199 bool need_unlock = true; 1200 #define senderr(x) { error = x ; goto bad; } 1201 1202 RT_WLOCK(); 1203 1204 bound = curlwp_bind(); 1205 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1206 senderr(ESRCH); 1207 if (flags & RTF_HOST) 1208 netmask = NULL; 1209 switch (req) { 1210 case RTM_DELETE: 1211 if (netmask) { 1212 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1213 netmask); 1214 dst = (struct sockaddr *)&maskeddst; 1215 } 1216 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1217 senderr(ESRCH); 1218 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1219 senderr(ESRCH); 1220 rt->rt_flags &= ~RTF_UP; 1221 if ((ifa = rt->rt_ifa)) { 1222 if (ifa->ifa_flags & IFA_ROUTE && 1223 rt_ifa_connected(rt, ifa)) { 1224 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1225 "deleted IFA_ROUTE\n", 1226 (void *)rt->_rt_key, (void *)ifa); 1227 ifa->ifa_flags &= ~IFA_ROUTE; 1228 } 1229 if (ifa->ifa_rtrequest) 1230 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1231 ifa = NULL; 1232 } 1233 rttrash++; 1234 if (ret_nrt) { 1235 *ret_nrt = rt; 1236 rt_ref(rt); 1237 RT_REFCNT_TRACE(rt); 1238 } 1239 RT_UNLOCK(); 1240 need_unlock = false; 1241 rt_timer_remove_all(rt); 1242 rtcache_clear_rtentry(dst->sa_family, rt); 1243 if (ret_nrt == NULL) { 1244 /* Adjust the refcount */ 1245 rt_ref(rt); 1246 RT_REFCNT_TRACE(rt); 1247 rt_free(rt); 1248 } 1249 break; 1250 1251 case RTM_ADD: 1252 if (info->rti_ifa == NULL) { 1253 ifp = rt_getifp(info, &psref_ifp); 1254 ifa = rt_getifa(info, &psref_ifa); 1255 if (ifa == NULL) 1256 senderr(ENETUNREACH); 1257 } else { 1258 /* Caller should have a reference of ifa */ 1259 ifa = info->rti_ifa; 1260 need_to_release_ifa = false; 1261 } 1262 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1263 if (rt == NULL) 1264 senderr(ENOBUFS); 1265 memset(rt, 0, sizeof(*rt)); 1266 rt->rt_flags = RTF_UP | flags; 1267 LIST_INIT(&rt->rt_timer); 1268 1269 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1270 if (netmask) { 1271 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1272 netmask); 1273 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1274 } else { 1275 rt_setkey(rt, dst, M_NOWAIT); 1276 } 1277 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1278 if (rt_getkey(rt) == NULL || 1279 rt_setgate(rt, gateway) != 0) { 1280 pool_put(&rtentry_pool, rt); 1281 senderr(ENOBUFS); 1282 } 1283 1284 rt_set_ifa(rt, ifa); 1285 if (info->rti_info[RTAX_TAG] != NULL) { 1286 const struct sockaddr *tag; 1287 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1288 if (tag == NULL) 1289 senderr(ENOBUFS); 1290 } 1291 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1292 1293 ss = pserialize_read_enter(); 1294 if (info->rti_info[RTAX_IFP] != NULL) { 1295 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1296 if (ifa2 != NULL) 1297 rt->rt_ifp = ifa2->ifa_ifp; 1298 else 1299 rt->rt_ifp = ifa->ifa_ifp; 1300 } else 1301 rt->rt_ifp = ifa->ifa_ifp; 1302 pserialize_read_exit(ss); 1303 cv_init(&rt->rt_cv, "rtentry"); 1304 psref_target_init(&rt->rt_psref, rt_psref_class); 1305 1306 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1307 rc = rt_addaddr(rtbl, rt, netmask); 1308 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1309 if (rc != 0) { 1310 ifafree(ifa); /* for rt_set_ifa above */ 1311 cv_destroy(&rt->rt_cv); 1312 rt_destroy(rt); 1313 pool_put(&rtentry_pool, rt); 1314 senderr(rc); 1315 } 1316 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1317 if (ifa->ifa_rtrequest) 1318 ifa->ifa_rtrequest(req, rt, info); 1319 if (need_to_release_ifa) 1320 ifa_release(ifa, &psref_ifa); 1321 ifa = NULL; 1322 if_put(ifp, &psref_ifp); 1323 ifp = NULL; 1324 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1325 if (ret_nrt) { 1326 *ret_nrt = rt; 1327 rt_ref(rt); 1328 RT_REFCNT_TRACE(rt); 1329 } 1330 RT_UNLOCK(); 1331 need_unlock = false; 1332 rtflushall(dst->sa_family); 1333 break; 1334 case RTM_GET: 1335 if (netmask != NULL) { 1336 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1337 netmask); 1338 dst = (struct sockaddr *)&maskeddst; 1339 } 1340 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1341 senderr(ESRCH); 1342 if (ret_nrt != NULL) { 1343 *ret_nrt = rt; 1344 rt_ref(rt); 1345 RT_REFCNT_TRACE(rt); 1346 } 1347 break; 1348 } 1349 bad: 1350 if (need_to_release_ifa) 1351 ifa_release(ifa, &psref_ifa); 1352 if_put(ifp, &psref_ifp); 1353 curlwp_bindx(bound); 1354 if (need_unlock) 1355 RT_UNLOCK(); 1356 splx(s); 1357 return error; 1358 } 1359 1360 int 1361 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1362 { 1363 struct sockaddr *new, *old; 1364 1365 KASSERT(rt->_rt_key != NULL); 1366 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1367 1368 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1369 if (new == NULL) 1370 return ENOMEM; 1371 1372 old = rt->rt_gateway; 1373 rt->rt_gateway = new; 1374 if (old != NULL) 1375 sockaddr_free(old); 1376 1377 KASSERT(rt->_rt_key != NULL); 1378 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1379 1380 if (rt->rt_flags & RTF_GATEWAY) { 1381 struct rtentry *gwrt; 1382 1383 /* XXX we cannot call rtalloc1 if holding the rt lock */ 1384 if (RT_LOCKED()) 1385 gwrt = rtalloc1_locked(gate, 1, false); 1386 else 1387 gwrt = rtalloc1(gate, 1); 1388 /* 1389 * If we switched gateways, grab the MTU from the new 1390 * gateway route if the current MTU, if the current MTU is 1391 * greater than the MTU of gateway. 1392 * Note that, if the MTU of gateway is 0, we will reset the 1393 * MTU of the route to run PMTUD again from scratch. XXX 1394 */ 1395 if (gwrt != NULL) { 1396 KASSERT(gwrt->_rt_key != NULL); 1397 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1398 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1399 rt->rt_rmx.rmx_mtu && 1400 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1401 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1402 } 1403 rt_unref(gwrt); 1404 } 1405 } 1406 KASSERT(rt->_rt_key != NULL); 1407 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1408 return 0; 1409 } 1410 1411 static void 1412 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1413 const struct sockaddr *netmask) 1414 { 1415 const char *netmaskp = &netmask->sa_data[0], 1416 *srcp = &src->sa_data[0]; 1417 char *dstp = &dst->sa_data[0]; 1418 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1419 const char *srcend = (char *)dst + src->sa_len; 1420 1421 dst->sa_len = src->sa_len; 1422 dst->sa_family = src->sa_family; 1423 1424 while (dstp < maskend) 1425 *dstp++ = *srcp++ & *netmaskp++; 1426 if (dstp < srcend) 1427 memset(dstp, 0, (size_t)(srcend - dstp)); 1428 } 1429 1430 /* 1431 * Inform the routing socket of a route change. 1432 */ 1433 void 1434 rt_newmsg(const int cmd, const struct rtentry *rt) 1435 { 1436 struct rt_addrinfo info; 1437 1438 memset((void *)&info, 0, sizeof(info)); 1439 info.rti_info[RTAX_DST] = rt_getkey(rt); 1440 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1441 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1442 if (rt->rt_ifp) { 1443 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1444 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1445 } 1446 1447 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1448 } 1449 1450 /* 1451 * Set up or tear down a routing table entry, normally 1452 * for an interface. 1453 */ 1454 int 1455 rtinit(struct ifaddr *ifa, int cmd, int flags) 1456 { 1457 struct rtentry *rt; 1458 struct sockaddr *dst, *odst; 1459 struct sockaddr_storage maskeddst; 1460 struct rtentry *nrt = NULL; 1461 int error; 1462 struct rt_addrinfo info; 1463 1464 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1465 if (cmd == RTM_DELETE) { 1466 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1467 /* Delete subnet route for this interface */ 1468 odst = dst; 1469 dst = (struct sockaddr *)&maskeddst; 1470 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1471 } 1472 if ((rt = rtalloc1(dst, 0)) != NULL) { 1473 if (rt->rt_ifa != ifa) { 1474 rt_unref(rt); 1475 return (flags & RTF_HOST) ? EHOSTUNREACH 1476 : ENETUNREACH; 1477 } 1478 rt_unref(rt); 1479 } 1480 } 1481 memset(&info, 0, sizeof(info)); 1482 info.rti_ifa = ifa; 1483 info.rti_flags = flags | ifa->ifa_flags; 1484 info.rti_info[RTAX_DST] = dst; 1485 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1486 1487 /* 1488 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1489 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1490 * variable) when RTF_HOST is 1. still not sure if i can safely 1491 * change it to meet bsdi4 behavior. 1492 */ 1493 if (cmd != RTM_LLINFO_UPD) 1494 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1495 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1496 &nrt); 1497 if (error != 0) 1498 return error; 1499 1500 rt = nrt; 1501 RT_REFCNT_TRACE(rt); 1502 switch (cmd) { 1503 case RTM_DELETE: 1504 rt_newmsg(cmd, rt); 1505 rt_free(rt); 1506 break; 1507 case RTM_LLINFO_UPD: 1508 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1509 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1510 rt_newmsg(RTM_CHANGE, rt); 1511 rt_unref(rt); 1512 break; 1513 case RTM_ADD: 1514 /* 1515 * FIXME NOMPSAFE: the rtentry is updated with the existence 1516 * of refeferences of it. 1517 */ 1518 /* 1519 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest 1520 * called via rtrequest1. Can we just prevent the replacement 1521 * somehow and remove the following code? And also doesn't 1522 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again? 1523 */ 1524 if (rt->rt_ifa != ifa) { 1525 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa, 1526 rt->rt_ifa); 1527 if (rt->rt_ifa->ifa_rtrequest != NULL) { 1528 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, 1529 &info); 1530 } 1531 rt_replace_ifa(rt, ifa); 1532 rt->rt_ifp = ifa->ifa_ifp; 1533 if (ifa->ifa_rtrequest != NULL) 1534 ifa->ifa_rtrequest(RTM_ADD, rt, &info); 1535 } 1536 rt_newmsg(cmd, rt); 1537 rt_unref(rt); 1538 RT_REFCNT_TRACE(rt); 1539 break; 1540 } 1541 return error; 1542 } 1543 1544 /* 1545 * Create a local route entry for the address. 1546 * Announce the addition of the address and the route to the routing socket. 1547 */ 1548 int 1549 rt_ifa_addlocal(struct ifaddr *ifa) 1550 { 1551 struct rtentry *rt; 1552 int e; 1553 1554 /* If there is no loopback entry, allocate one. */ 1555 rt = rtalloc1(ifa->ifa_addr, 0); 1556 #ifdef RT_DEBUG 1557 if (rt != NULL) 1558 dump_rt(rt); 1559 #endif 1560 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1561 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1562 { 1563 struct rt_addrinfo info; 1564 struct rtentry *nrt; 1565 1566 memset(&info, 0, sizeof(info)); 1567 info.rti_flags = RTF_HOST | RTF_LOCAL; 1568 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))) 1569 info.rti_flags |= RTF_LLDATA; 1570 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1571 info.rti_info[RTAX_GATEWAY] = 1572 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1573 info.rti_ifa = ifa; 1574 nrt = NULL; 1575 e = rtrequest1(RTM_ADD, &info, &nrt); 1576 if (nrt && ifa != nrt->rt_ifa) 1577 rt_replace_ifa(nrt, ifa); 1578 rt_newaddrmsg(RTM_ADD, ifa, e, nrt); 1579 if (nrt != NULL) { 1580 #ifdef RT_DEBUG 1581 dump_rt(nrt); 1582 #endif 1583 rt_unref(nrt); 1584 RT_REFCNT_TRACE(nrt); 1585 } 1586 } else { 1587 e = 0; 1588 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL); 1589 } 1590 if (rt != NULL) 1591 rt_unref(rt); 1592 return e; 1593 } 1594 1595 /* 1596 * Remove the local route entry for the address. 1597 * Announce the removal of the address and the route to the routing socket. 1598 */ 1599 int 1600 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1601 { 1602 struct rtentry *rt; 1603 int e = 0; 1604 1605 rt = rtalloc1(ifa->ifa_addr, 0); 1606 1607 /* 1608 * Before deleting, check if a corresponding loopbacked 1609 * host route surely exists. With this check, we can avoid 1610 * deleting an interface direct route whose destination is 1611 * the same as the address being removed. This can happen 1612 * when removing a subnet-router anycast address on an 1613 * interface attached to a shared medium. 1614 */ 1615 if (rt != NULL && 1616 (rt->rt_flags & RTF_HOST) && 1617 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1618 { 1619 /* If we cannot replace the route's ifaddr with the equivalent 1620 * ifaddr of another interface, I believe it is safest to 1621 * delete the route. 1622 */ 1623 if (alt_ifa == NULL) { 1624 e = rtdeletemsg(rt); 1625 if (e == 0) { 1626 rt_unref(rt); 1627 rt_free(rt); 1628 rt = NULL; 1629 } 1630 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1631 } else { 1632 rt_replace_ifa(rt, alt_ifa); 1633 rt_newmsg(RTM_CHANGE, rt); 1634 } 1635 } else 1636 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1637 if (rt != NULL) 1638 rt_unref(rt); 1639 return e; 1640 } 1641 1642 /* 1643 * Route timer routines. These routes allow functions to be called 1644 * for various routes at any time. This is useful in supporting 1645 * path MTU discovery and redirect route deletion. 1646 * 1647 * This is similar to some BSDI internal functions, but it provides 1648 * for multiple queues for efficiency's sake... 1649 */ 1650 1651 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1652 static int rt_init_done = 0; 1653 1654 /* 1655 * Some subtle order problems with domain initialization mean that 1656 * we cannot count on this being run from rt_init before various 1657 * protocol initializations are done. Therefore, we make sure 1658 * that this is run when the first queue is added... 1659 */ 1660 1661 static void rt_timer_work(struct work *, void *); 1662 1663 static void 1664 rt_timer_init(void) 1665 { 1666 int error; 1667 1668 assert(rt_init_done == 0); 1669 1670 /* XXX should be in rt_init */ 1671 rw_init(&rt_lock); 1672 rw_init(&rtcache_lock); 1673 1674 LIST_INIT(&rttimer_queue_head); 1675 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1676 error = workqueue_create(&rt_timer_wq, "rt_timer", 1677 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1678 if (error) 1679 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1680 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1681 rt_init_done = 1; 1682 } 1683 1684 struct rttimer_queue * 1685 rt_timer_queue_create(u_int timeout) 1686 { 1687 struct rttimer_queue *rtq; 1688 1689 if (rt_init_done == 0) 1690 rt_timer_init(); 1691 1692 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1693 if (rtq == NULL) 1694 return NULL; 1695 memset(rtq, 0, sizeof(*rtq)); 1696 1697 rtq->rtq_timeout = timeout; 1698 TAILQ_INIT(&rtq->rtq_head); 1699 RT_WLOCK(); 1700 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1701 RT_UNLOCK(); 1702 1703 return rtq; 1704 } 1705 1706 void 1707 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1708 { 1709 1710 rtq->rtq_timeout = timeout; 1711 } 1712 1713 static void 1714 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1715 { 1716 struct rttimer *r; 1717 1718 RT_ASSERT_WLOCK(); 1719 1720 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1721 LIST_REMOVE(r, rtt_link); 1722 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1723 rt_ref(r->rtt_rt); /* XXX */ 1724 RT_REFCNT_TRACE(r->rtt_rt); 1725 RT_UNLOCK(); 1726 (*r->rtt_func)(r->rtt_rt, r); 1727 pool_put(&rttimer_pool, r); 1728 RT_WLOCK(); 1729 if (rtq->rtq_count > 0) 1730 rtq->rtq_count--; 1731 else 1732 printf("rt_timer_queue_remove_all: " 1733 "rtq_count reached 0\n"); 1734 } 1735 } 1736 1737 void 1738 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1739 { 1740 1741 RT_WLOCK(); 1742 rt_timer_queue_remove_all(rtq); 1743 LIST_REMOVE(rtq, rtq_link); 1744 RT_UNLOCK(); 1745 1746 /* 1747 * Caller is responsible for freeing the rttimer_queue structure. 1748 */ 1749 } 1750 1751 unsigned long 1752 rt_timer_count(struct rttimer_queue *rtq) 1753 { 1754 return rtq->rtq_count; 1755 } 1756 1757 static void 1758 rt_timer_remove_all(struct rtentry *rt) 1759 { 1760 struct rttimer *r; 1761 1762 RT_WLOCK(); 1763 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1764 LIST_REMOVE(r, rtt_link); 1765 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1766 if (r->rtt_queue->rtq_count > 0) 1767 r->rtt_queue->rtq_count--; 1768 else 1769 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1770 pool_put(&rttimer_pool, r); 1771 } 1772 RT_UNLOCK(); 1773 } 1774 1775 int 1776 rt_timer_add(struct rtentry *rt, 1777 void (*func)(struct rtentry *, struct rttimer *), 1778 struct rttimer_queue *queue) 1779 { 1780 struct rttimer *r; 1781 1782 KASSERT(func != NULL); 1783 RT_WLOCK(); 1784 /* 1785 * If there's already a timer with this action, destroy it before 1786 * we add a new one. 1787 */ 1788 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1789 if (r->rtt_func == func) 1790 break; 1791 } 1792 if (r != NULL) { 1793 LIST_REMOVE(r, rtt_link); 1794 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1795 if (r->rtt_queue->rtq_count > 0) 1796 r->rtt_queue->rtq_count--; 1797 else 1798 printf("rt_timer_add: rtq_count reached 0\n"); 1799 } else { 1800 r = pool_get(&rttimer_pool, PR_NOWAIT); 1801 if (r == NULL) { 1802 RT_UNLOCK(); 1803 return ENOBUFS; 1804 } 1805 } 1806 1807 memset(r, 0, sizeof(*r)); 1808 1809 r->rtt_rt = rt; 1810 r->rtt_time = time_uptime; 1811 r->rtt_func = func; 1812 r->rtt_queue = queue; 1813 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1814 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1815 r->rtt_queue->rtq_count++; 1816 1817 RT_UNLOCK(); 1818 1819 return 0; 1820 } 1821 1822 static void 1823 rt_timer_work(struct work *wk, void *arg) 1824 { 1825 struct rttimer_queue *rtq; 1826 struct rttimer *r; 1827 1828 RT_WLOCK(); 1829 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1830 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1831 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1832 LIST_REMOVE(r, rtt_link); 1833 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1834 rt_ref(r->rtt_rt); /* XXX */ 1835 RT_REFCNT_TRACE(r->rtt_rt); 1836 RT_UNLOCK(); 1837 (*r->rtt_func)(r->rtt_rt, r); 1838 pool_put(&rttimer_pool, r); 1839 RT_WLOCK(); 1840 if (rtq->rtq_count > 0) 1841 rtq->rtq_count--; 1842 else 1843 printf("rt_timer_timer: rtq_count reached 0\n"); 1844 } 1845 } 1846 RT_UNLOCK(); 1847 1848 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1849 } 1850 1851 static void 1852 rt_timer_timer(void *arg) 1853 { 1854 1855 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 1856 } 1857 1858 static struct rtentry * 1859 _rtcache_init(struct route *ro, int flag) 1860 { 1861 struct rtentry *rt; 1862 1863 rtcache_invariants(ro); 1864 KASSERT(ro->_ro_rt == NULL); 1865 RTCACHE_ASSERT_WLOCK(); 1866 1867 if (rtcache_getdst(ro) == NULL) 1868 return NULL; 1869 ro->ro_invalid = false; 1870 rt = rtalloc1(rtcache_getdst(ro), flag); 1871 if (rt != NULL && ISSET(rt->rt_flags, RTF_UP)) { 1872 ro->_ro_rt = rt; 1873 KASSERT(!ISSET(rt->rt_flags, RTF_UPDATING)); 1874 rtcache_ref(rt, ro); 1875 rt_unref(rt); 1876 rtcache(ro); 1877 } else if (rt != NULL) 1878 rt_unref(rt); 1879 1880 rtcache_invariants(ro); 1881 return ro->_ro_rt; 1882 } 1883 1884 struct rtentry * 1885 rtcache_init(struct route *ro) 1886 { 1887 struct rtentry *rt; 1888 RTCACHE_WLOCK(); 1889 rt = _rtcache_init(ro, 1); 1890 RTCACHE_UNLOCK(); 1891 return rt; 1892 } 1893 1894 struct rtentry * 1895 rtcache_init_noclone(struct route *ro) 1896 { 1897 struct rtentry *rt; 1898 RTCACHE_WLOCK(); 1899 rt = _rtcache_init(ro, 0); 1900 RTCACHE_UNLOCK(); 1901 return rt; 1902 } 1903 1904 struct rtentry * 1905 rtcache_update(struct route *ro, int clone) 1906 { 1907 struct rtentry *rt; 1908 RTCACHE_WLOCK(); 1909 rtcache_clear(ro); 1910 rt = _rtcache_init(ro, clone); 1911 RTCACHE_UNLOCK(); 1912 return rt; 1913 } 1914 1915 void 1916 rtcache_copy(struct route *new_ro, struct route *old_ro) 1917 { 1918 struct rtentry *rt; 1919 int ret; 1920 1921 KASSERT(new_ro != old_ro); 1922 rtcache_invariants(new_ro); 1923 rtcache_invariants(old_ro); 1924 1925 rt = rtcache_validate(old_ro); 1926 1927 if (rtcache_getdst(old_ro) == NULL) 1928 goto out; 1929 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 1930 if (ret != 0) 1931 goto out; 1932 1933 RTCACHE_WLOCK(); 1934 new_ro->ro_invalid = false; 1935 if ((new_ro->_ro_rt = rt) != NULL) 1936 rtcache(new_ro); 1937 rtcache_invariants(new_ro); 1938 RTCACHE_UNLOCK(); 1939 out: 1940 rtcache_unref(rt, old_ro); 1941 return; 1942 } 1943 1944 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist); 1945 1946 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 1947 static void 1948 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 1949 { 1950 char dst[64]; 1951 1952 sockaddr_format(ro->ro_sa, dst, 64); 1953 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 1954 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 1955 } 1956 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 1957 #else 1958 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 1959 #endif 1960 1961 static void 1962 rtcache_ref(struct rtentry *rt, struct route *ro) 1963 { 1964 1965 KASSERT(rt != NULL); 1966 1967 #ifdef NET_MPSAFE 1968 RTCACHE_PSREF_TRACE(rt, ro); 1969 ro->ro_bound = curlwp_bind(); 1970 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1971 #endif 1972 } 1973 1974 void 1975 rtcache_unref(struct rtentry *rt, struct route *ro) 1976 { 1977 1978 if (rt == NULL) 1979 return; 1980 1981 #ifdef NET_MPSAFE 1982 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1983 curlwp_bindx(ro->ro_bound); 1984 RTCACHE_PSREF_TRACE(rt, ro); 1985 #endif 1986 } 1987 1988 static struct rtentry * 1989 rtcache_validate_locked(struct route *ro) 1990 { 1991 struct rtentry *rt = NULL; 1992 1993 #ifdef NET_MPSAFE 1994 retry: 1995 #endif 1996 rt = ro->_ro_rt; 1997 rtcache_invariants(ro); 1998 1999 if (ro->ro_invalid) { 2000 rt = NULL; 2001 goto out; 2002 } 2003 2004 RT_RLOCK(); 2005 if (rt != NULL && (rt->rt_flags & RTF_UP) != 0 && rt->rt_ifp != NULL) { 2006 #ifdef NET_MPSAFE 2007 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 2008 if (rt_wait_ok()) { 2009 RT_UNLOCK(); 2010 RTCACHE_UNLOCK(); 2011 /* We can wait until the update is complete */ 2012 rt_update_wait(); 2013 RTCACHE_RLOCK(); 2014 goto retry; 2015 } else { 2016 rt = NULL; 2017 } 2018 } else 2019 #endif 2020 rtcache_ref(rt, ro); 2021 } else 2022 rt = NULL; 2023 RT_UNLOCK(); 2024 out: 2025 return rt; 2026 } 2027 2028 struct rtentry * 2029 rtcache_validate(struct route *ro) 2030 { 2031 struct rtentry *rt; 2032 2033 RTCACHE_RLOCK(); 2034 rt = rtcache_validate_locked(ro); 2035 RTCACHE_UNLOCK(); 2036 return rt; 2037 } 2038 2039 static void 2040 rtcache_invalidate(struct dom_rtlist *rtlist) 2041 { 2042 struct route *ro; 2043 2044 RTCACHE_ASSERT_WLOCK(); 2045 2046 while ((ro = LIST_FIRST(rtlist)) != NULL) { 2047 rtcache_invariants(ro); 2048 KASSERT(ro->_ro_rt != NULL); 2049 ro->ro_invalid = true; 2050 LIST_REMOVE(ro, ro_rtcache_next); 2051 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next); 2052 rtcache_invariants(ro); 2053 } 2054 } 2055 2056 static void 2057 rtcache_clear_rtentry(int family, struct rtentry *rt) 2058 { 2059 struct domain *dom; 2060 struct route *ro, *nro; 2061 2062 if ((dom = pffinddomain(family)) == NULL) 2063 return; 2064 2065 RTCACHE_WLOCK(); 2066 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) { 2067 if (ro->_ro_rt == rt) 2068 rtcache_clear(ro); 2069 } 2070 RTCACHE_UNLOCK(); 2071 } 2072 2073 static void 2074 rtcache_clear(struct route *ro) 2075 { 2076 2077 RTCACHE_ASSERT_WLOCK(); 2078 2079 rtcache_invariants(ro); 2080 if (ro->_ro_rt == NULL) 2081 return; 2082 2083 LIST_REMOVE(ro, ro_rtcache_next); 2084 2085 ro->_ro_rt = NULL; 2086 ro->ro_invalid = false; 2087 rtcache_invariants(ro); 2088 } 2089 2090 struct rtentry * 2091 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2092 int clone, int *hitp) 2093 { 2094 const struct sockaddr *odst; 2095 struct rtentry *rt = NULL; 2096 2097 RTCACHE_RLOCK(); 2098 odst = rtcache_getdst(ro); 2099 if (odst == NULL) { 2100 RTCACHE_UNLOCK(); 2101 RTCACHE_WLOCK(); 2102 goto miss; 2103 } 2104 2105 if (sockaddr_cmp(odst, dst) != 0) { 2106 RTCACHE_UNLOCK(); 2107 RTCACHE_WLOCK(); 2108 rtcache_free_locked(ro); 2109 goto miss; 2110 } 2111 2112 rt = rtcache_validate_locked(ro); 2113 if (rt == NULL) { 2114 RTCACHE_UNLOCK(); 2115 RTCACHE_WLOCK(); 2116 rtcache_clear(ro); 2117 goto miss; 2118 } 2119 2120 rtcache_invariants(ro); 2121 2122 RTCACHE_UNLOCK(); 2123 if (hitp != NULL) 2124 *hitp = 1; 2125 return rt; 2126 miss: 2127 if (hitp != NULL) 2128 *hitp = 0; 2129 if (rtcache_setdst_locked(ro, dst) == 0) 2130 rt = _rtcache_init(ro, clone); 2131 2132 rtcache_invariants(ro); 2133 2134 RTCACHE_UNLOCK(); 2135 return rt; 2136 } 2137 2138 static void 2139 rtcache_free_locked(struct route *ro) 2140 { 2141 2142 RTCACHE_ASSERT_WLOCK(); 2143 rtcache_clear(ro); 2144 if (ro->ro_sa != NULL) { 2145 sockaddr_free(ro->ro_sa); 2146 ro->ro_sa = NULL; 2147 } 2148 rtcache_invariants(ro); 2149 } 2150 2151 void 2152 rtcache_free(struct route *ro) 2153 { 2154 2155 RTCACHE_WLOCK(); 2156 rtcache_free_locked(ro); 2157 RTCACHE_UNLOCK(); 2158 } 2159 2160 static int 2161 rtcache_setdst_locked(struct route *ro, const struct sockaddr *sa) 2162 { 2163 KASSERT(sa != NULL); 2164 2165 RTCACHE_ASSERT_WLOCK(); 2166 2167 rtcache_invariants(ro); 2168 if (ro->ro_sa != NULL) { 2169 if (ro->ro_sa->sa_family == sa->sa_family) { 2170 rtcache_clear(ro); 2171 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2172 rtcache_invariants(ro); 2173 return 0; 2174 } 2175 /* free ro_sa, wrong family */ 2176 rtcache_free_locked(ro); 2177 } 2178 2179 KASSERT(ro->_ro_rt == NULL); 2180 2181 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2182 rtcache_invariants(ro); 2183 return ENOMEM; 2184 } 2185 rtcache_invariants(ro); 2186 return 0; 2187 } 2188 2189 int 2190 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2191 { 2192 int error; 2193 2194 RTCACHE_WLOCK(); 2195 error = rtcache_setdst_locked(ro, sa); 2196 RTCACHE_UNLOCK(); 2197 2198 return error; 2199 } 2200 2201 const struct sockaddr * 2202 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2203 { 2204 if (rt->rt_tag != tag) { 2205 if (rt->rt_tag != NULL) 2206 sockaddr_free(rt->rt_tag); 2207 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2208 } 2209 return rt->rt_tag; 2210 } 2211 2212 struct sockaddr * 2213 rt_gettag(const struct rtentry *rt) 2214 { 2215 return rt->rt_tag; 2216 } 2217 2218 int 2219 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2220 { 2221 2222 if ((rt->rt_flags & RTF_REJECT) != 0) { 2223 /* Mimic looutput */ 2224 if (ifp->if_flags & IFF_LOOPBACK) 2225 return (rt->rt_flags & RTF_HOST) ? 2226 EHOSTUNREACH : ENETUNREACH; 2227 else if (rt->rt_rmx.rmx_expire == 0 || 2228 time_uptime < rt->rt_rmx.rmx_expire) 2229 return (rt->rt_flags & RTF_GATEWAY) ? 2230 EHOSTUNREACH : EHOSTDOWN; 2231 } 2232 2233 return 0; 2234 } 2235 2236 void 2237 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2238 void *v) 2239 { 2240 2241 for (;;) { 2242 int s; 2243 int error; 2244 struct rtentry *rt, *retrt = NULL; 2245 2246 RT_RLOCK(); 2247 s = splsoftnet(); 2248 rt = rtbl_search_matched_entry(family, f, v); 2249 if (rt == NULL) { 2250 splx(s); 2251 RT_UNLOCK(); 2252 return; 2253 } 2254 rt->rt_refcnt++; 2255 splx(s); 2256 RT_UNLOCK(); 2257 2258 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2259 rt_mask(rt), rt->rt_flags, &retrt); 2260 if (error == 0) { 2261 KASSERT(retrt == rt); 2262 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2263 retrt->rt_ifp = NULL; 2264 rt_unref(rt); 2265 rt_free(retrt); 2266 } else if (error == ESRCH) { 2267 /* Someone deleted the entry already. */ 2268 rt_unref(rt); 2269 } else { 2270 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2271 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2272 /* XXX how to treat this case? */ 2273 } 2274 } 2275 } 2276 2277 int 2278 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2279 { 2280 int error; 2281 2282 RT_RLOCK(); 2283 error = rtbl_walktree(family, f, v); 2284 RT_UNLOCK(); 2285 2286 return error; 2287 } 2288 2289 #ifdef DDB 2290 2291 #include <machine/db_machdep.h> 2292 #include <ddb/db_interface.h> 2293 #include <ddb/db_output.h> 2294 2295 #define rt_expire rt_rmx.rmx_expire 2296 2297 static void 2298 db_print_sa(const struct sockaddr *sa) 2299 { 2300 int len; 2301 const u_char *p; 2302 2303 if (sa == NULL) { 2304 db_printf("[NULL]"); 2305 return; 2306 } 2307 2308 p = (const u_char *)sa; 2309 len = sa->sa_len; 2310 db_printf("["); 2311 while (len > 0) { 2312 db_printf("%d", *p); 2313 p++; len--; 2314 if (len) db_printf(","); 2315 } 2316 db_printf("]\n"); 2317 } 2318 2319 static void 2320 db_print_ifa(struct ifaddr *ifa) 2321 { 2322 if (ifa == NULL) 2323 return; 2324 db_printf(" ifa_addr="); 2325 db_print_sa(ifa->ifa_addr); 2326 db_printf(" ifa_dsta="); 2327 db_print_sa(ifa->ifa_dstaddr); 2328 db_printf(" ifa_mask="); 2329 db_print_sa(ifa->ifa_netmask); 2330 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2331 ifa->ifa_flags, 2332 ifa->ifa_refcnt, 2333 ifa->ifa_metric); 2334 } 2335 2336 /* 2337 * Function to pass to rt_walktree(). 2338 * Return non-zero error to abort walk. 2339 */ 2340 static int 2341 db_show_rtentry(struct rtentry *rt, void *w) 2342 { 2343 db_printf("rtentry=%p", rt); 2344 2345 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2346 rt->rt_flags, rt->rt_refcnt, 2347 rt->rt_use, (uint64_t)rt->rt_expire); 2348 2349 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2350 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2351 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2352 2353 db_printf(" ifp=%p ", rt->rt_ifp); 2354 if (rt->rt_ifp) 2355 db_printf("(%s)", rt->rt_ifp->if_xname); 2356 else 2357 db_printf("(NULL)"); 2358 2359 db_printf(" ifa=%p\n", rt->rt_ifa); 2360 db_print_ifa(rt->rt_ifa); 2361 2362 db_printf(" gwroute=%p llinfo=%p\n", 2363 rt->rt_gwroute, rt->rt_llinfo); 2364 2365 return 0; 2366 } 2367 2368 /* 2369 * Function to print all the route trees. 2370 * Use this from ddb: "show routes" 2371 */ 2372 void 2373 db_show_routes(db_expr_t addr, bool have_addr, 2374 db_expr_t count, const char *modif) 2375 { 2376 rt_walktree(AF_INET, db_show_rtentry, NULL); 2377 } 2378 #endif 2379