1 /* $NetBSD: route.c,v 1.211 2018/07/12 02:26:04 ozaki-r Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.211 2018/07/12 02:26:04 ozaki-r Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/kernel.h> 114 #include <sys/ioctl.h> 115 #include <sys/pool.h> 116 #include <sys/kauth.h> 117 #include <sys/workqueue.h> 118 #include <sys/syslog.h> 119 #include <sys/rwlock.h> 120 #include <sys/mutex.h> 121 #include <sys/cpu.h> 122 123 #include <net/if.h> 124 #include <net/if_dl.h> 125 #include <net/route.h> 126 #if defined(INET) || defined(INET6) 127 #include <net/if_llatbl.h> 128 #endif 129 130 #include <netinet/in.h> 131 #include <netinet/in_var.h> 132 133 #define PRESERVED_RTF (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK) 134 135 #ifdef RTFLUSH_DEBUG 136 #define rtcache_debug() __predict_false(_rtcache_debug) 137 #else /* RTFLUSH_DEBUG */ 138 #define rtcache_debug() 0 139 #endif /* RTFLUSH_DEBUG */ 140 141 #ifdef RT_DEBUG 142 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 143 __func__, __LINE__, (rt), (rt)->rt_refcnt) 144 #else 145 #define RT_REFCNT_TRACE(rt) do {} while (0) 146 #endif 147 148 #ifdef RT_DEBUG 149 #define dlog(level, fmt, args...) log(level, fmt, ##args) 150 #else 151 #define dlog(level, fmt, args...) do {} while (0) 152 #endif 153 154 struct rtstat rtstat; 155 156 static int rttrash; /* routes not in table but not freed */ 157 158 static struct pool rtentry_pool; 159 static struct pool rttimer_pool; 160 161 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 162 static struct workqueue *rt_timer_wq; 163 static struct work rt_timer_wk; 164 165 static void rt_timer_init(void); 166 static void rt_timer_queue_remove_all(struct rttimer_queue *); 167 static void rt_timer_remove_all(struct rtentry *); 168 static void rt_timer_timer(void *); 169 170 /* 171 * Locking notes: 172 * - The routing table is protected by a global rwlock 173 * - API: RT_RLOCK and friends 174 * - rtcaches are NOT protected by the framework 175 * - Callers must guarantee a rtcache isn't accessed simultaneously 176 * - How the constraint is guranteed in the wild 177 * - Protect a rtcache by a mutex (e.g., inp_route) 178 * - Make rtcache per-CPU and allow only accesses from softint 179 * (e.g., ipforward_rt_percpu) 180 * - References to a rtentry is managed by reference counting and psref 181 * - Reference couting is used for temporal reference when a rtentry 182 * is fetched from the routing table 183 * - psref is used for temporal reference when a rtentry is fetched 184 * from a rtcache 185 * - struct route (rtcache) has struct psref, so we cannot obtain 186 * a reference twice on the same struct route 187 * - Befere destroying or updating a rtentry, we have to wait for 188 * all references left (see below for details) 189 * - APIs 190 * - An obtained rtentry via rtalloc1 or rtrequest* must be 191 * unreferenced by rt_unref 192 * - An obtained rtentry via rtcache_* must be unreferenced by 193 * rtcache_unref 194 * - TODO: once we get a lockless routing table, we should use only 195 * psref for rtentries 196 * - rtentry destruction 197 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 198 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 199 * has a responsibility to destroy the rtentry by itself by calling 200 * rt_free 201 * - If not, rtrequest itself does that 202 * - If rt_free is called in softint, the actual destruction routine is 203 * deferred to a workqueue 204 * - rtentry update 205 * - When updating a rtentry, RTF_UPDATING flag is set 206 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 207 * the routing table or a rtcache results in either of the following 208 * cases: 209 * - if the caller runs in softint, the caller fails to fetch 210 * - otherwise, the caller waits for the update completed and retries 211 * to fetch (probably succeed to fetch for the second time) 212 * - rtcache invalidation 213 * - There is a global generation counter that is incremented when 214 * any routes have been added or deleted 215 * - When a rtcache caches a rtentry into itself, it also stores 216 * a snapshot of the generation counter 217 * - If the snapshot equals to the global counter, the cache is valid, 218 * otherwise the cache is invalidated 219 */ 220 221 /* 222 * Global lock for the routing table. 223 */ 224 static krwlock_t rt_lock __cacheline_aligned; 225 #ifdef NET_MPSAFE 226 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 227 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 228 #define RT_UNLOCK() rw_exit(&rt_lock) 229 #define RT_WLOCKED() rw_write_held(&rt_lock) 230 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 231 #else 232 #define RT_RLOCK() do {} while (0) 233 #define RT_WLOCK() do {} while (0) 234 #define RT_UNLOCK() do {} while (0) 235 #define RT_WLOCKED() true 236 #define RT_ASSERT_WLOCK() do {} while (0) 237 #endif 238 239 static uint64_t rtcache_generation; 240 241 /* 242 * mutex and cv that are used to wait for references to a rtentry left 243 * before updating the rtentry. 244 */ 245 static struct { 246 kmutex_t lock; 247 kcondvar_t cv; 248 bool ongoing; 249 const struct lwp *lwp; 250 } rt_update_global __cacheline_aligned; 251 252 /* 253 * A workqueue and stuff that are used to defer the destruction routine 254 * of rtentries. 255 */ 256 static struct { 257 struct workqueue *wq; 258 struct work wk; 259 kmutex_t lock; 260 SLIST_HEAD(, rtentry) queue; 261 bool enqueued; 262 } rt_free_global __cacheline_aligned; 263 264 /* psref for rtentry */ 265 static struct psref_class *rt_psref_class __read_mostly; 266 267 #ifdef RTFLUSH_DEBUG 268 static int _rtcache_debug = 0; 269 #endif /* RTFLUSH_DEBUG */ 270 271 static kauth_listener_t route_listener; 272 273 static int rtdeletemsg(struct rtentry *); 274 275 static void rt_maskedcopy(const struct sockaddr *, 276 struct sockaddr *, const struct sockaddr *); 277 278 static void rtcache_invalidate(void); 279 280 static void rt_ref(struct rtentry *); 281 282 static struct rtentry * 283 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 284 285 static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *); 286 static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *); 287 static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *, 288 const struct sockaddr *, struct psref *); 289 290 static void rtcache_ref(struct rtentry *, struct route *); 291 292 #ifdef NET_MPSAFE 293 static void rt_update_wait(void); 294 #endif 295 296 static bool rt_wait_ok(void); 297 static void rt_wait_refcnt(const char *, struct rtentry *, int); 298 static void rt_wait_psref(struct rtentry *); 299 300 #ifdef DDB 301 static void db_print_sa(const struct sockaddr *); 302 static void db_print_ifa(struct ifaddr *); 303 static int db_show_rtentry(struct rtentry *, void *); 304 #endif 305 306 #ifdef RTFLUSH_DEBUG 307 static void sysctl_net_rtcache_setup(struct sysctllog **); 308 static void 309 sysctl_net_rtcache_setup(struct sysctllog **clog) 310 { 311 const struct sysctlnode *rnode; 312 313 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 314 CTLTYPE_NODE, 315 "rtcache", SYSCTL_DESCR("Route cache related settings"), 316 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 317 return; 318 if (sysctl_createv(clog, 0, &rnode, &rnode, 319 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 320 "debug", SYSCTL_DESCR("Debug route caches"), 321 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 322 return; 323 } 324 #endif /* RTFLUSH_DEBUG */ 325 326 static inline void 327 rt_destroy(struct rtentry *rt) 328 { 329 if (rt->_rt_key != NULL) 330 sockaddr_free(rt->_rt_key); 331 if (rt->rt_gateway != NULL) 332 sockaddr_free(rt->rt_gateway); 333 if (rt_gettag(rt) != NULL) 334 sockaddr_free(rt_gettag(rt)); 335 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 336 } 337 338 static inline const struct sockaddr * 339 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 340 { 341 if (rt->_rt_key == key) 342 goto out; 343 344 if (rt->_rt_key != NULL) 345 sockaddr_free(rt->_rt_key); 346 rt->_rt_key = sockaddr_dup(key, flags); 347 out: 348 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 349 return rt->_rt_key; 350 } 351 352 struct ifaddr * 353 rt_get_ifa(struct rtentry *rt) 354 { 355 struct ifaddr *ifa; 356 357 if ((ifa = rt->rt_ifa) == NULL) 358 return ifa; 359 else if (ifa->ifa_getifa == NULL) 360 return ifa; 361 #if 0 362 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 363 return ifa; 364 #endif 365 else { 366 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 367 if (ifa == NULL) 368 return NULL; 369 rt_replace_ifa(rt, ifa); 370 return ifa; 371 } 372 } 373 374 static void 375 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 376 { 377 rt->rt_ifa = ifa; 378 if (ifa->ifa_seqno != NULL) 379 rt->rt_ifa_seqno = *ifa->ifa_seqno; 380 } 381 382 /* 383 * Is this route the connected route for the ifa? 384 */ 385 static int 386 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 387 { 388 const struct sockaddr *key, *dst, *odst; 389 struct sockaddr_storage maskeddst; 390 391 key = rt_getkey(rt); 392 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 393 if (dst == NULL || 394 dst->sa_family != key->sa_family || 395 dst->sa_len != key->sa_len) 396 return 0; 397 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 398 odst = dst; 399 dst = (struct sockaddr *)&maskeddst; 400 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 401 ifa->ifa_netmask); 402 } 403 return (memcmp(dst, key, dst->sa_len) == 0); 404 } 405 406 void 407 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 408 { 409 if (rt->rt_ifa && 410 rt->rt_ifa != ifa && 411 rt->rt_ifa->ifa_flags & IFA_ROUTE && 412 rt_ifa_connected(rt, rt->rt_ifa)) 413 { 414 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 415 "replace deleted IFA_ROUTE\n", 416 (void *)rt->_rt_key, (void *)rt->rt_ifa); 417 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 418 if (rt_ifa_connected(rt, ifa)) { 419 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 420 "replace added IFA_ROUTE\n", 421 (void *)rt->_rt_key, (void *)ifa); 422 ifa->ifa_flags |= IFA_ROUTE; 423 } 424 } 425 426 ifaref(ifa); 427 ifafree(rt->rt_ifa); 428 rt_set_ifa1(rt, ifa); 429 } 430 431 static void 432 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 433 { 434 ifaref(ifa); 435 rt_set_ifa1(rt, ifa); 436 } 437 438 static int 439 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 440 void *arg0, void *arg1, void *arg2, void *arg3) 441 { 442 struct rt_msghdr *rtm; 443 int result; 444 445 result = KAUTH_RESULT_DEFER; 446 rtm = arg1; 447 448 if (action != KAUTH_NETWORK_ROUTE) 449 return result; 450 451 if (rtm->rtm_type == RTM_GET) 452 result = KAUTH_RESULT_ALLOW; 453 454 return result; 455 } 456 457 static void rt_free_work(struct work *, void *); 458 459 void 460 rt_init(void) 461 { 462 int error; 463 464 #ifdef RTFLUSH_DEBUG 465 sysctl_net_rtcache_setup(NULL); 466 #endif 467 468 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 469 SLIST_INIT(&rt_free_global.queue); 470 rt_free_global.enqueued = false; 471 472 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 473 474 error = workqueue_create(&rt_free_global.wq, "rt_free", 475 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 476 if (error) 477 panic("%s: workqueue_create failed (%d)\n", __func__, error); 478 479 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 480 cv_init(&rt_update_global.cv, "rt_update"); 481 482 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 483 NULL, IPL_SOFTNET); 484 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 485 NULL, IPL_SOFTNET); 486 487 rn_init(); /* initialize all zeroes, all ones, mask table */ 488 rtbl_init(); 489 490 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 491 route_listener_cb, NULL); 492 } 493 494 static void 495 rtcache_invalidate(void) 496 { 497 498 RT_ASSERT_WLOCK(); 499 500 if (rtcache_debug()) 501 printf("%s: enter\n", __func__); 502 503 rtcache_generation++; 504 } 505 506 #ifdef RT_DEBUG 507 static void 508 dump_rt(const struct rtentry *rt) 509 { 510 char buf[512]; 511 512 log(LOG_DEBUG, "rt: "); 513 log(LOG_DEBUG, "p=%p ", rt); 514 if (rt->_rt_key == NULL) { 515 log(LOG_DEBUG, "dst=(NULL) "); 516 } else { 517 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 518 log(LOG_DEBUG, "dst=%s ", buf); 519 } 520 if (rt->rt_gateway == NULL) { 521 log(LOG_DEBUG, "gw=(NULL) "); 522 } else { 523 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 524 log(LOG_DEBUG, "gw=%s ", buf); 525 } 526 log(LOG_DEBUG, "flags=%x ", rt->rt_flags); 527 if (rt->rt_ifp == NULL) { 528 log(LOG_DEBUG, "if=(NULL) "); 529 } else { 530 log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname); 531 } 532 log(LOG_DEBUG, "\n"); 533 } 534 #endif /* RT_DEBUG */ 535 536 /* 537 * Packet routing routines. If success, refcnt of a returned rtentry 538 * will be incremented. The caller has to rtfree it by itself. 539 */ 540 struct rtentry * 541 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 542 bool wlock) 543 { 544 rtbl_t *rtbl; 545 struct rtentry *rt; 546 int s; 547 548 #ifdef NET_MPSAFE 549 retry: 550 #endif 551 s = splsoftnet(); 552 rtbl = rt_gettable(dst->sa_family); 553 if (rtbl == NULL) 554 goto miss; 555 556 rt = rt_matchaddr(rtbl, dst); 557 if (rt == NULL) 558 goto miss; 559 560 if (!ISSET(rt->rt_flags, RTF_UP)) 561 goto miss; 562 563 #ifdef NET_MPSAFE 564 if (ISSET(rt->rt_flags, RTF_UPDATING) && 565 /* XXX updater should be always able to acquire */ 566 curlwp != rt_update_global.lwp) { 567 if (!wait_ok || !rt_wait_ok()) 568 goto miss; 569 RT_UNLOCK(); 570 splx(s); 571 572 /* We can wait until the update is complete */ 573 rt_update_wait(); 574 575 if (wlock) 576 RT_WLOCK(); 577 else 578 RT_RLOCK(); 579 goto retry; 580 } 581 #endif /* NET_MPSAFE */ 582 583 rt_ref(rt); 584 RT_REFCNT_TRACE(rt); 585 586 splx(s); 587 return rt; 588 miss: 589 rtstat.rts_unreach++; 590 if (report) { 591 struct rt_addrinfo info; 592 593 memset(&info, 0, sizeof(info)); 594 info.rti_info[RTAX_DST] = dst; 595 rt_missmsg(RTM_MISS, &info, 0, 0); 596 } 597 splx(s); 598 return NULL; 599 } 600 601 struct rtentry * 602 rtalloc1(const struct sockaddr *dst, int report) 603 { 604 struct rtentry *rt; 605 606 RT_RLOCK(); 607 rt = rtalloc1_locked(dst, report, true, false); 608 RT_UNLOCK(); 609 610 return rt; 611 } 612 613 static void 614 rt_ref(struct rtentry *rt) 615 { 616 617 KASSERT(rt->rt_refcnt >= 0); 618 atomic_inc_uint(&rt->rt_refcnt); 619 } 620 621 void 622 rt_unref(struct rtentry *rt) 623 { 624 625 KASSERT(rt != NULL); 626 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 627 628 atomic_dec_uint(&rt->rt_refcnt); 629 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 630 mutex_enter(&rt_free_global.lock); 631 cv_broadcast(&rt->rt_cv); 632 mutex_exit(&rt_free_global.lock); 633 } 634 } 635 636 static bool 637 rt_wait_ok(void) 638 { 639 640 KASSERT(!cpu_intr_p()); 641 return !cpu_softintr_p(); 642 } 643 644 void 645 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 646 { 647 mutex_enter(&rt_free_global.lock); 648 while (rt->rt_refcnt > cnt) { 649 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 650 __func__, title, rt->rt_refcnt); 651 cv_wait(&rt->rt_cv, &rt_free_global.lock); 652 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 653 __func__, title, rt->rt_refcnt); 654 } 655 mutex_exit(&rt_free_global.lock); 656 } 657 658 void 659 rt_wait_psref(struct rtentry *rt) 660 { 661 662 psref_target_destroy(&rt->rt_psref, rt_psref_class); 663 psref_target_init(&rt->rt_psref, rt_psref_class); 664 } 665 666 static void 667 _rt_free(struct rtentry *rt) 668 { 669 struct ifaddr *ifa; 670 671 /* 672 * Need to avoid a deadlock on rt_wait_refcnt of update 673 * and a conflict on psref_target_destroy of update. 674 */ 675 #ifdef NET_MPSAFE 676 rt_update_wait(); 677 #endif 678 679 RT_REFCNT_TRACE(rt); 680 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 681 rt_wait_refcnt("free", rt, 0); 682 #ifdef NET_MPSAFE 683 psref_target_destroy(&rt->rt_psref, rt_psref_class); 684 #endif 685 686 rt_assert_inactive(rt); 687 rttrash--; 688 ifa = rt->rt_ifa; 689 rt->rt_ifa = NULL; 690 ifafree(ifa); 691 rt->rt_ifp = NULL; 692 cv_destroy(&rt->rt_cv); 693 rt_destroy(rt); 694 pool_put(&rtentry_pool, rt); 695 } 696 697 static void 698 rt_free_work(struct work *wk, void *arg) 699 { 700 701 for (;;) { 702 struct rtentry *rt; 703 704 mutex_enter(&rt_free_global.lock); 705 rt_free_global.enqueued = false; 706 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) { 707 mutex_exit(&rt_free_global.lock); 708 return; 709 } 710 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free); 711 mutex_exit(&rt_free_global.lock); 712 atomic_dec_uint(&rt->rt_refcnt); 713 _rt_free(rt); 714 } 715 } 716 717 void 718 rt_free(struct rtentry *rt) 719 { 720 721 KASSERT(rt->rt_refcnt > 0); 722 if (rt_wait_ok()) { 723 atomic_dec_uint(&rt->rt_refcnt); 724 _rt_free(rt); 725 return; 726 } 727 728 mutex_enter(&rt_free_global.lock); 729 rt_ref(rt); 730 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free); 731 if (!rt_free_global.enqueued) { 732 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 733 rt_free_global.enqueued = true; 734 } 735 mutex_exit(&rt_free_global.lock); 736 } 737 738 #ifdef NET_MPSAFE 739 static void 740 rt_update_wait(void) 741 { 742 743 mutex_enter(&rt_update_global.lock); 744 while (rt_update_global.ongoing) { 745 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 746 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 747 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 748 } 749 mutex_exit(&rt_update_global.lock); 750 } 751 #endif 752 753 int 754 rt_update_prepare(struct rtentry *rt) 755 { 756 757 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 758 759 RT_WLOCK(); 760 /* If the entry is being destroyed, don't proceed the update. */ 761 if (!ISSET(rt->rt_flags, RTF_UP)) { 762 RT_UNLOCK(); 763 return ESRCH; 764 } 765 rt->rt_flags |= RTF_UPDATING; 766 RT_UNLOCK(); 767 768 mutex_enter(&rt_update_global.lock); 769 while (rt_update_global.ongoing) { 770 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 771 __func__, rt, curlwp); 772 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 773 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 774 __func__, rt, curlwp); 775 } 776 rt_update_global.ongoing = true; 777 /* XXX need it to avoid rt_update_wait by updater itself. */ 778 rt_update_global.lwp = curlwp; 779 mutex_exit(&rt_update_global.lock); 780 781 rt_wait_refcnt("update", rt, 1); 782 rt_wait_psref(rt); 783 784 return 0; 785 } 786 787 void 788 rt_update_finish(struct rtentry *rt) 789 { 790 791 RT_WLOCK(); 792 rt->rt_flags &= ~RTF_UPDATING; 793 RT_UNLOCK(); 794 795 mutex_enter(&rt_update_global.lock); 796 rt_update_global.ongoing = false; 797 rt_update_global.lwp = NULL; 798 cv_broadcast(&rt_update_global.cv); 799 mutex_exit(&rt_update_global.lock); 800 801 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 802 } 803 804 /* 805 * Force a routing table entry to the specified 806 * destination to go through the given gateway. 807 * Normally called as a result of a routing redirect 808 * message from the network layer. 809 * 810 * N.B.: must be called at splsoftnet 811 */ 812 void 813 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 814 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 815 struct rtentry **rtp) 816 { 817 struct rtentry *rt; 818 int error = 0; 819 uint64_t *stat = NULL; 820 struct rt_addrinfo info; 821 struct ifaddr *ifa; 822 struct psref psref; 823 824 /* verify the gateway is directly reachable */ 825 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 826 error = ENETUNREACH; 827 goto out; 828 } 829 rt = rtalloc1(dst, 0); 830 /* 831 * If the redirect isn't from our current router for this dst, 832 * it's either old or wrong. If it redirects us to ourselves, 833 * we have a routing loop, perhaps as a result of an interface 834 * going down recently. 835 */ 836 if (!(flags & RTF_DONE) && rt && 837 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 838 error = EINVAL; 839 else { 840 int s = pserialize_read_enter(); 841 struct ifaddr *_ifa; 842 843 _ifa = ifa_ifwithaddr(gateway); 844 if (_ifa != NULL) 845 error = EHOSTUNREACH; 846 pserialize_read_exit(s); 847 } 848 if (error) 849 goto done; 850 /* 851 * Create a new entry if we just got back a wildcard entry 852 * or the lookup failed. This is necessary for hosts 853 * which use routing redirects generated by smart gateways 854 * to dynamically build the routing tables. 855 */ 856 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 857 goto create; 858 /* 859 * Don't listen to the redirect if it's 860 * for a route to an interface. 861 */ 862 if (rt->rt_flags & RTF_GATEWAY) { 863 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 864 /* 865 * Changing from route to net => route to host. 866 * Create new route, rather than smashing route to net. 867 */ 868 create: 869 if (rt != NULL) 870 rt_unref(rt); 871 flags |= RTF_GATEWAY | RTF_DYNAMIC; 872 memset(&info, 0, sizeof(info)); 873 info.rti_info[RTAX_DST] = dst; 874 info.rti_info[RTAX_GATEWAY] = gateway; 875 info.rti_info[RTAX_NETMASK] = netmask; 876 info.rti_ifa = ifa; 877 info.rti_flags = flags; 878 rt = NULL; 879 error = rtrequest1(RTM_ADD, &info, &rt); 880 if (rt != NULL) 881 flags = rt->rt_flags; 882 stat = &rtstat.rts_dynamic; 883 } else { 884 /* 885 * Smash the current notion of the gateway to 886 * this destination. Should check about netmask!!! 887 */ 888 #ifdef NET_MPSAFE 889 KASSERT(!cpu_softintr_p()); 890 891 error = rt_update_prepare(rt); 892 if (error == 0) { 893 #endif 894 RT_WLOCK(); 895 error = rt_setgate(rt, gateway); 896 if (error == 0) { 897 rt->rt_flags |= RTF_MODIFIED; 898 flags |= RTF_MODIFIED; 899 } 900 RT_UNLOCK(); 901 #ifdef NET_MPSAFE 902 rt_update_finish(rt); 903 } else { 904 /* 905 * If error != 0, the rtentry is being 906 * destroyed, so doing nothing doesn't 907 * matter. 908 */ 909 } 910 #endif 911 stat = &rtstat.rts_newgateway; 912 } 913 } else 914 error = EHOSTUNREACH; 915 done: 916 if (rt) { 917 if (rtp != NULL && !error) 918 *rtp = rt; 919 else 920 rt_unref(rt); 921 } 922 out: 923 if (error) 924 rtstat.rts_badredirect++; 925 else if (stat != NULL) 926 (*stat)++; 927 memset(&info, 0, sizeof(info)); 928 info.rti_info[RTAX_DST] = dst; 929 info.rti_info[RTAX_GATEWAY] = gateway; 930 info.rti_info[RTAX_NETMASK] = netmask; 931 info.rti_info[RTAX_AUTHOR] = src; 932 rt_missmsg(RTM_REDIRECT, &info, flags, error); 933 ifa_release(ifa, &psref); 934 } 935 936 /* 937 * Delete a route and generate a message. 938 * It doesn't free a passed rt. 939 */ 940 static int 941 rtdeletemsg(struct rtentry *rt) 942 { 943 int error; 944 struct rt_addrinfo info; 945 struct rtentry *retrt; 946 947 /* 948 * Request the new route so that the entry is not actually 949 * deleted. That will allow the information being reported to 950 * be accurate (and consistent with route_output()). 951 */ 952 memset(&info, 0, sizeof(info)); 953 info.rti_info[RTAX_DST] = rt_getkey(rt); 954 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 955 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 956 info.rti_flags = rt->rt_flags; 957 error = rtrequest1(RTM_DELETE, &info, &retrt); 958 959 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 960 961 return error; 962 } 963 964 static struct ifaddr * 965 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 966 const struct sockaddr *gateway, struct psref *psref) 967 { 968 struct ifaddr *ifa = NULL; 969 970 if ((flags & RTF_GATEWAY) == 0) { 971 /* 972 * If we are adding a route to an interface, 973 * and the interface is a pt to pt link 974 * we should search for the destination 975 * as our clue to the interface. Otherwise 976 * we can use the local address. 977 */ 978 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 979 ifa = ifa_ifwithdstaddr_psref(dst, psref); 980 if (ifa == NULL) 981 ifa = ifa_ifwithaddr_psref(gateway, psref); 982 } else { 983 /* 984 * If we are adding a route to a remote net 985 * or host, the gateway may still be on the 986 * other end of a pt to pt link. 987 */ 988 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 989 } 990 if (ifa == NULL) 991 ifa = ifa_ifwithnet_psref(gateway, psref); 992 if (ifa == NULL) { 993 int s; 994 struct rtentry *rt; 995 996 rt = rtalloc1_locked(gateway, 0, true, true); 997 if (rt == NULL) 998 return NULL; 999 if (rt->rt_flags & RTF_GATEWAY) { 1000 rt_unref(rt); 1001 return NULL; 1002 } 1003 /* 1004 * Just in case. May not need to do this workaround. 1005 * Revisit when working on rtentry MP-ification. 1006 */ 1007 s = pserialize_read_enter(); 1008 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1009 if (ifa == rt->rt_ifa) 1010 break; 1011 } 1012 if (ifa != NULL) 1013 ifa_acquire(ifa, psref); 1014 pserialize_read_exit(s); 1015 rt_unref(rt); 1016 if (ifa == NULL) 1017 return NULL; 1018 } 1019 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1020 struct ifaddr *nifa; 1021 int s; 1022 1023 s = pserialize_read_enter(); 1024 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1025 if (nifa != NULL) { 1026 ifa_release(ifa, psref); 1027 ifa_acquire(nifa, psref); 1028 ifa = nifa; 1029 } 1030 pserialize_read_exit(s); 1031 } 1032 return ifa; 1033 } 1034 1035 /* 1036 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1037 * The caller has to rtfree it by itself. 1038 */ 1039 int 1040 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1041 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1042 { 1043 struct rt_addrinfo info; 1044 1045 memset(&info, 0, sizeof(info)); 1046 info.rti_flags = flags; 1047 info.rti_info[RTAX_DST] = dst; 1048 info.rti_info[RTAX_GATEWAY] = gateway; 1049 info.rti_info[RTAX_NETMASK] = netmask; 1050 return rtrequest1(req, &info, ret_nrt); 1051 } 1052 1053 /* 1054 * It's a utility function to add/remove a route to/from the routing table 1055 * and tell user processes the addition/removal on success. 1056 */ 1057 int 1058 rtrequest_newmsg(const int req, const struct sockaddr *dst, 1059 const struct sockaddr *gateway, const struct sockaddr *netmask, 1060 const int flags) 1061 { 1062 int error; 1063 struct rtentry *ret_nrt = NULL; 1064 1065 KASSERT(req == RTM_ADD || req == RTM_DELETE); 1066 1067 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt); 1068 if (error != 0) 1069 return error; 1070 1071 KASSERT(ret_nrt != NULL); 1072 1073 rt_newmsg(req, ret_nrt); /* tell user process */ 1074 if (req == RTM_DELETE) 1075 rt_free(ret_nrt); 1076 else 1077 rt_unref(ret_nrt); 1078 1079 return 0; 1080 } 1081 1082 static struct ifnet * 1083 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1084 { 1085 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1086 1087 if (info->rti_ifp != NULL) 1088 return NULL; 1089 /* 1090 * ifp may be specified by sockaddr_dl when protocol address 1091 * is ambiguous 1092 */ 1093 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1094 struct ifaddr *ifa; 1095 int s = pserialize_read_enter(); 1096 1097 ifa = ifa_ifwithnet(ifpaddr); 1098 if (ifa != NULL) 1099 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1100 psref); 1101 pserialize_read_exit(s); 1102 } 1103 1104 return info->rti_ifp; 1105 } 1106 1107 static struct ifaddr * 1108 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1109 { 1110 struct ifaddr *ifa = NULL; 1111 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1112 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1113 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1114 int flags = info->rti_flags; 1115 const struct sockaddr *sa; 1116 1117 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1118 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1119 if (ifa != NULL) 1120 goto got; 1121 } 1122 1123 sa = ifaaddr != NULL ? ifaaddr : 1124 (gateway != NULL ? gateway : dst); 1125 if (sa != NULL && info->rti_ifp != NULL) 1126 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1127 else if (dst != NULL && gateway != NULL) 1128 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1129 else if (sa != NULL) 1130 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1131 if (ifa == NULL) 1132 return NULL; 1133 got: 1134 if (ifa->ifa_getifa != NULL) { 1135 /* FIXME ifa_getifa is NOMPSAFE */ 1136 ifa = (*ifa->ifa_getifa)(ifa, dst); 1137 if (ifa == NULL) 1138 return NULL; 1139 ifa_acquire(ifa, psref); 1140 } 1141 info->rti_ifa = ifa; 1142 if (info->rti_ifp == NULL) 1143 info->rti_ifp = ifa->ifa_ifp; 1144 return ifa; 1145 } 1146 1147 /* 1148 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1149 * The caller has to rtfree it by itself. 1150 */ 1151 int 1152 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1153 { 1154 int s = splsoftnet(), ss; 1155 int error = 0, rc; 1156 struct rtentry *rt; 1157 rtbl_t *rtbl; 1158 struct ifaddr *ifa = NULL; 1159 struct sockaddr_storage maskeddst; 1160 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1161 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1162 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1163 int flags = info->rti_flags; 1164 struct psref psref_ifp, psref_ifa; 1165 int bound = 0; 1166 struct ifnet *ifp = NULL; 1167 bool need_to_release_ifa = true; 1168 bool need_unlock = true; 1169 #define senderr(x) { error = x ; goto bad; } 1170 1171 RT_WLOCK(); 1172 1173 bound = curlwp_bind(); 1174 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1175 senderr(ESRCH); 1176 if (flags & RTF_HOST) 1177 netmask = NULL; 1178 switch (req) { 1179 case RTM_DELETE: 1180 if (netmask) { 1181 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1182 netmask); 1183 dst = (struct sockaddr *)&maskeddst; 1184 } 1185 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1186 senderr(ESRCH); 1187 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1188 senderr(ESRCH); 1189 rt->rt_flags &= ~RTF_UP; 1190 if ((ifa = rt->rt_ifa)) { 1191 if (ifa->ifa_flags & IFA_ROUTE && 1192 rt_ifa_connected(rt, ifa)) { 1193 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1194 "deleted IFA_ROUTE\n", 1195 (void *)rt->_rt_key, (void *)ifa); 1196 ifa->ifa_flags &= ~IFA_ROUTE; 1197 } 1198 if (ifa->ifa_rtrequest) 1199 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1200 ifa = NULL; 1201 } 1202 rttrash++; 1203 if (ret_nrt) { 1204 *ret_nrt = rt; 1205 rt_ref(rt); 1206 RT_REFCNT_TRACE(rt); 1207 } 1208 rtcache_invalidate(); 1209 RT_UNLOCK(); 1210 need_unlock = false; 1211 rt_timer_remove_all(rt); 1212 #if defined(INET) || defined(INET6) 1213 if (netmask != NULL) 1214 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1215 #endif 1216 if (ret_nrt == NULL) { 1217 /* Adjust the refcount */ 1218 rt_ref(rt); 1219 RT_REFCNT_TRACE(rt); 1220 rt_free(rt); 1221 } 1222 break; 1223 1224 case RTM_ADD: 1225 if (info->rti_ifa == NULL) { 1226 ifp = rt_getifp(info, &psref_ifp); 1227 ifa = rt_getifa(info, &psref_ifa); 1228 if (ifa == NULL) 1229 senderr(ENETUNREACH); 1230 } else { 1231 /* Caller should have a reference of ifa */ 1232 ifa = info->rti_ifa; 1233 need_to_release_ifa = false; 1234 } 1235 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1236 if (rt == NULL) 1237 senderr(ENOBUFS); 1238 memset(rt, 0, sizeof(*rt)); 1239 rt->rt_flags = RTF_UP | flags; 1240 LIST_INIT(&rt->rt_timer); 1241 1242 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1243 if (netmask) { 1244 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1245 netmask); 1246 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1247 } else { 1248 rt_setkey(rt, dst, M_NOWAIT); 1249 } 1250 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1251 if (rt_getkey(rt) == NULL || 1252 rt_setgate(rt, gateway) != 0) { 1253 pool_put(&rtentry_pool, rt); 1254 senderr(ENOBUFS); 1255 } 1256 1257 rt_set_ifa(rt, ifa); 1258 if (info->rti_info[RTAX_TAG] != NULL) { 1259 const struct sockaddr *tag; 1260 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1261 if (tag == NULL) 1262 senderr(ENOBUFS); 1263 } 1264 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1265 1266 ss = pserialize_read_enter(); 1267 if (info->rti_info[RTAX_IFP] != NULL) { 1268 struct ifaddr *ifa2; 1269 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1270 if (ifa2 != NULL) 1271 rt->rt_ifp = ifa2->ifa_ifp; 1272 else 1273 rt->rt_ifp = ifa->ifa_ifp; 1274 } else 1275 rt->rt_ifp = ifa->ifa_ifp; 1276 pserialize_read_exit(ss); 1277 cv_init(&rt->rt_cv, "rtentry"); 1278 psref_target_init(&rt->rt_psref, rt_psref_class); 1279 1280 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1281 rc = rt_addaddr(rtbl, rt, netmask); 1282 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1283 if (rc != 0) { 1284 ifafree(ifa); /* for rt_set_ifa above */ 1285 cv_destroy(&rt->rt_cv); 1286 rt_destroy(rt); 1287 pool_put(&rtentry_pool, rt); 1288 senderr(rc); 1289 } 1290 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1291 if (ifa->ifa_rtrequest) 1292 ifa->ifa_rtrequest(req, rt, info); 1293 if (need_to_release_ifa) 1294 ifa_release(ifa, &psref_ifa); 1295 ifa = NULL; 1296 if_put(ifp, &psref_ifp); 1297 ifp = NULL; 1298 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1299 if (ret_nrt) { 1300 *ret_nrt = rt; 1301 rt_ref(rt); 1302 RT_REFCNT_TRACE(rt); 1303 } 1304 rtcache_invalidate(); 1305 RT_UNLOCK(); 1306 need_unlock = false; 1307 break; 1308 case RTM_GET: 1309 if (netmask != NULL) { 1310 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1311 netmask); 1312 dst = (struct sockaddr *)&maskeddst; 1313 } 1314 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1315 senderr(ESRCH); 1316 if (ret_nrt != NULL) { 1317 *ret_nrt = rt; 1318 rt_ref(rt); 1319 RT_REFCNT_TRACE(rt); 1320 } 1321 break; 1322 } 1323 bad: 1324 if (need_to_release_ifa) 1325 ifa_release(ifa, &psref_ifa); 1326 if_put(ifp, &psref_ifp); 1327 curlwp_bindx(bound); 1328 if (need_unlock) 1329 RT_UNLOCK(); 1330 splx(s); 1331 return error; 1332 } 1333 1334 int 1335 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1336 { 1337 struct sockaddr *new, *old; 1338 1339 KASSERT(RT_WLOCKED()); 1340 KASSERT(rt->_rt_key != NULL); 1341 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1342 1343 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1344 if (new == NULL) 1345 return ENOMEM; 1346 1347 old = rt->rt_gateway; 1348 rt->rt_gateway = new; 1349 if (old != NULL) 1350 sockaddr_free(old); 1351 1352 KASSERT(rt->_rt_key != NULL); 1353 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1354 1355 if (rt->rt_flags & RTF_GATEWAY) { 1356 struct rtentry *gwrt; 1357 1358 gwrt = rtalloc1_locked(gate, 1, false, true); 1359 /* 1360 * If we switched gateways, grab the MTU from the new 1361 * gateway route if the current MTU, if the current MTU is 1362 * greater than the MTU of gateway. 1363 * Note that, if the MTU of gateway is 0, we will reset the 1364 * MTU of the route to run PMTUD again from scratch. XXX 1365 */ 1366 if (gwrt != NULL) { 1367 KASSERT(gwrt->_rt_key != NULL); 1368 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1369 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1370 rt->rt_rmx.rmx_mtu && 1371 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1372 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1373 } 1374 rt_unref(gwrt); 1375 } 1376 } 1377 KASSERT(rt->_rt_key != NULL); 1378 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1379 return 0; 1380 } 1381 1382 static struct ifaddr * 1383 rt_update_get_ifa(const struct rt_addrinfo info, const struct rtentry *rt, 1384 struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref) 1385 { 1386 struct ifaddr *ifa = NULL; 1387 1388 *ifp = NULL; 1389 if (info.rti_info[RTAX_IFP] != NULL) { 1390 ifa = ifa_ifwithnet_psref(info.rti_info[RTAX_IFP], psref); 1391 if (ifa == NULL) 1392 goto next; 1393 *ifp = ifa->ifa_ifp; 1394 if_acquire(*ifp, psref_ifp); 1395 if (info.rti_info[RTAX_IFA] == NULL && 1396 info.rti_info[RTAX_GATEWAY] == NULL) 1397 goto next; 1398 ifa_release(ifa, psref); 1399 if (info.rti_info[RTAX_IFA] == NULL) { 1400 /* route change <dst> <gw> -ifp <if> */ 1401 ifa = ifaof_ifpforaddr_psref(info.rti_info[RTAX_GATEWAY], 1402 *ifp, psref); 1403 } else { 1404 /* route change <dst> -ifp <if> -ifa <addr> */ 1405 ifa = ifa_ifwithaddr_psref(info.rti_info[RTAX_IFA], psref); 1406 if (ifa != NULL) 1407 goto out; 1408 ifa = ifaof_ifpforaddr_psref(info.rti_info[RTAX_IFA], 1409 *ifp, psref); 1410 } 1411 goto out; 1412 } 1413 next: 1414 if (info.rti_info[RTAX_IFA] != NULL) { 1415 /* route change <dst> <gw> -ifa <addr> */ 1416 ifa = ifa_ifwithaddr_psref(info.rti_info[RTAX_IFA], psref); 1417 if (ifa != NULL) 1418 goto out; 1419 } 1420 if (info.rti_info[RTAX_GATEWAY] != NULL) { 1421 /* route change <dst> <gw> */ 1422 ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt), 1423 info.rti_info[RTAX_GATEWAY], psref); 1424 } 1425 out: 1426 if (ifa != NULL && *ifp == NULL) { 1427 *ifp = ifa->ifa_ifp; 1428 if_acquire(*ifp, psref_ifp); 1429 } 1430 if (ifa == NULL && *ifp != NULL) { 1431 if_put(*ifp, psref_ifp); 1432 *ifp = NULL; 1433 } 1434 return ifa; 1435 } 1436 1437 int 1438 rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm) 1439 { 1440 int error = 0; 1441 struct ifnet *ifp = NULL, *new_ifp = NULL; 1442 struct ifaddr *ifa = NULL, *new_ifa; 1443 struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp; 1444 bool newgw, ifp_changed = false; 1445 1446 RT_WLOCK(); 1447 /* 1448 * New gateway could require new ifaddr, ifp; 1449 * flags may also be different; ifp may be specified 1450 * by ll sockaddr when protocol address is ambiguous 1451 */ 1452 newgw = info->rti_info[RTAX_GATEWAY] != NULL && 1453 sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0; 1454 1455 if (newgw || info->rti_info[RTAX_IFP] != NULL || 1456 info->rti_info[RTAX_IFA] != NULL) { 1457 ifp = rt_getifp(info, &psref_ifp); 1458 /* info refers ifp so we need to keep a reference */ 1459 ifa = rt_getifa(info, &psref_ifa); 1460 if (ifa == NULL) { 1461 error = ENETUNREACH; 1462 goto out; 1463 } 1464 } 1465 if (newgw) { 1466 error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]); 1467 if (error != 0) 1468 goto out; 1469 } 1470 if (info->rti_info[RTAX_TAG]) { 1471 const struct sockaddr *tag; 1472 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1473 if (tag == NULL) { 1474 error = ENOBUFS; 1475 goto out; 1476 } 1477 } 1478 /* 1479 * New gateway could require new ifaddr, ifp; 1480 * flags may also be different; ifp may be specified 1481 * by ll sockaddr when protocol address is ambiguous 1482 */ 1483 new_ifa = rt_update_get_ifa(*info, rt, &new_ifp, &psref_new_ifp, 1484 &psref_new_ifa); 1485 if (new_ifa != NULL) { 1486 ifa_release(ifa, &psref_ifa); 1487 ifa = new_ifa; 1488 } 1489 if (ifa) { 1490 struct ifaddr *oifa = rt->rt_ifa; 1491 if (oifa != ifa && !ifa_is_destroying(ifa) && 1492 new_ifp != NULL && !if_is_deactivated(new_ifp)) { 1493 if (oifa && oifa->ifa_rtrequest) 1494 oifa->ifa_rtrequest(RTM_DELETE, rt, info); 1495 rt_replace_ifa(rt, ifa); 1496 rt->rt_ifp = new_ifp; 1497 ifp_changed = true; 1498 } 1499 if (new_ifa == NULL) 1500 ifa_release(ifa, &psref_ifa); 1501 } 1502 ifa_release(new_ifa, &psref_new_ifa); 1503 if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) { 1504 rt->rt_ifp = new_ifp; 1505 ifp_changed = true; 1506 } 1507 rt_setmetrics(rtm, rt); 1508 if (rt->rt_flags != info->rti_flags) { 1509 rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) | 1510 (rt->rt_flags & PRESERVED_RTF); 1511 } 1512 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) 1513 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); 1514 #if defined(INET) || defined(INET6) 1515 if (ifp_changed && rt_mask(rt) != NULL) 1516 lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt), 1517 rt_mask(rt), 0); 1518 #else 1519 (void)ifp_changed; /* XXX gcc */ 1520 #endif 1521 out: 1522 if_put(new_ifp, &psref_new_ifp); 1523 if_put(ifp, &psref_ifp); 1524 1525 RT_UNLOCK(); 1526 1527 return error; 1528 } 1529 1530 static void 1531 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1532 const struct sockaddr *netmask) 1533 { 1534 const char *netmaskp = &netmask->sa_data[0], 1535 *srcp = &src->sa_data[0]; 1536 char *dstp = &dst->sa_data[0]; 1537 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1538 const char *srcend = (char *)dst + src->sa_len; 1539 1540 dst->sa_len = src->sa_len; 1541 dst->sa_family = src->sa_family; 1542 1543 while (dstp < maskend) 1544 *dstp++ = *srcp++ & *netmaskp++; 1545 if (dstp < srcend) 1546 memset(dstp, 0, (size_t)(srcend - dstp)); 1547 } 1548 1549 /* 1550 * Inform the routing socket of a route change. 1551 */ 1552 void 1553 rt_newmsg(const int cmd, const struct rtentry *rt) 1554 { 1555 struct rt_addrinfo info; 1556 1557 memset((void *)&info, 0, sizeof(info)); 1558 info.rti_info[RTAX_DST] = rt_getkey(rt); 1559 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1560 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1561 if (rt->rt_ifp) { 1562 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1563 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1564 } 1565 1566 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1567 } 1568 1569 /* 1570 * Set up or tear down a routing table entry, normally 1571 * for an interface. 1572 */ 1573 int 1574 rtinit(struct ifaddr *ifa, int cmd, int flags) 1575 { 1576 struct rtentry *rt; 1577 struct sockaddr *dst, *odst; 1578 struct sockaddr_storage maskeddst; 1579 struct rtentry *nrt = NULL; 1580 int error; 1581 struct rt_addrinfo info; 1582 1583 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1584 if (cmd == RTM_DELETE) { 1585 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1586 /* Delete subnet route for this interface */ 1587 odst = dst; 1588 dst = (struct sockaddr *)&maskeddst; 1589 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1590 } 1591 if ((rt = rtalloc1(dst, 0)) != NULL) { 1592 if (rt->rt_ifa != ifa) { 1593 rt_unref(rt); 1594 return (flags & RTF_HOST) ? EHOSTUNREACH 1595 : ENETUNREACH; 1596 } 1597 rt_unref(rt); 1598 } 1599 } 1600 memset(&info, 0, sizeof(info)); 1601 info.rti_ifa = ifa; 1602 info.rti_flags = flags | ifa->ifa_flags; 1603 info.rti_info[RTAX_DST] = dst; 1604 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1605 1606 /* 1607 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1608 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1609 * variable) when RTF_HOST is 1. still not sure if i can safely 1610 * change it to meet bsdi4 behavior. 1611 */ 1612 if (cmd != RTM_LLINFO_UPD) 1613 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1614 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1615 &nrt); 1616 if (error != 0) 1617 return error; 1618 1619 rt = nrt; 1620 RT_REFCNT_TRACE(rt); 1621 switch (cmd) { 1622 case RTM_DELETE: 1623 rt_newmsg(cmd, rt); 1624 rt_free(rt); 1625 break; 1626 case RTM_LLINFO_UPD: 1627 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1628 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1629 rt_newmsg(RTM_CHANGE, rt); 1630 rt_unref(rt); 1631 break; 1632 case RTM_ADD: 1633 /* 1634 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest 1635 * called via rtrequest1. Can we just prevent the replacement 1636 * somehow and remove the following code? And also doesn't 1637 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again? 1638 */ 1639 if (rt->rt_ifa != ifa) { 1640 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa, 1641 rt->rt_ifa); 1642 #ifdef NET_MPSAFE 1643 KASSERT(!cpu_softintr_p()); 1644 1645 error = rt_update_prepare(rt); 1646 if (error == 0) { 1647 #endif 1648 if (rt->rt_ifa->ifa_rtrequest != NULL) { 1649 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, 1650 rt, &info); 1651 } 1652 rt_replace_ifa(rt, ifa); 1653 rt->rt_ifp = ifa->ifa_ifp; 1654 if (ifa->ifa_rtrequest != NULL) 1655 ifa->ifa_rtrequest(RTM_ADD, rt, &info); 1656 #ifdef NET_MPSAFE 1657 rt_update_finish(rt); 1658 } else { 1659 /* 1660 * If error != 0, the rtentry is being 1661 * destroyed, so doing nothing doesn't 1662 * matter. 1663 */ 1664 } 1665 #endif 1666 } 1667 rt_newmsg(cmd, rt); 1668 rt_unref(rt); 1669 RT_REFCNT_TRACE(rt); 1670 break; 1671 } 1672 return error; 1673 } 1674 1675 /* 1676 * Create a local route entry for the address. 1677 * Announce the addition of the address and the route to the routing socket. 1678 */ 1679 int 1680 rt_ifa_addlocal(struct ifaddr *ifa) 1681 { 1682 struct rtentry *rt; 1683 int e; 1684 1685 /* If there is no loopback entry, allocate one. */ 1686 rt = rtalloc1(ifa->ifa_addr, 0); 1687 #ifdef RT_DEBUG 1688 if (rt != NULL) 1689 dump_rt(rt); 1690 #endif 1691 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1692 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1693 { 1694 struct rt_addrinfo info; 1695 struct rtentry *nrt; 1696 1697 memset(&info, 0, sizeof(info)); 1698 info.rti_flags = RTF_HOST | RTF_LOCAL; 1699 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1700 info.rti_info[RTAX_GATEWAY] = 1701 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1702 info.rti_ifa = ifa; 1703 nrt = NULL; 1704 e = rtrequest1(RTM_ADD, &info, &nrt); 1705 if (nrt && ifa != nrt->rt_ifa) 1706 rt_replace_ifa(nrt, ifa); 1707 rt_newaddrmsg(RTM_ADD, ifa, e, nrt); 1708 if (nrt != NULL) { 1709 #ifdef RT_DEBUG 1710 dump_rt(nrt); 1711 #endif 1712 rt_unref(nrt); 1713 RT_REFCNT_TRACE(nrt); 1714 } 1715 } else { 1716 e = 0; 1717 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL); 1718 } 1719 if (rt != NULL) 1720 rt_unref(rt); 1721 return e; 1722 } 1723 1724 /* 1725 * Remove the local route entry for the address. 1726 * Announce the removal of the address and the route to the routing socket. 1727 */ 1728 int 1729 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1730 { 1731 struct rtentry *rt; 1732 int e = 0; 1733 1734 rt = rtalloc1(ifa->ifa_addr, 0); 1735 1736 /* 1737 * Before deleting, check if a corresponding loopbacked 1738 * host route surely exists. With this check, we can avoid 1739 * deleting an interface direct route whose destination is 1740 * the same as the address being removed. This can happen 1741 * when removing a subnet-router anycast address on an 1742 * interface attached to a shared medium. 1743 */ 1744 if (rt != NULL && 1745 (rt->rt_flags & RTF_HOST) && 1746 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1747 { 1748 /* If we cannot replace the route's ifaddr with the equivalent 1749 * ifaddr of another interface, I believe it is safest to 1750 * delete the route. 1751 */ 1752 if (alt_ifa == NULL) { 1753 e = rtdeletemsg(rt); 1754 if (e == 0) { 1755 rt_unref(rt); 1756 rt_free(rt); 1757 rt = NULL; 1758 } 1759 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1760 } else { 1761 rt_replace_ifa(rt, alt_ifa); 1762 rt_newmsg(RTM_CHANGE, rt); 1763 } 1764 } else 1765 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1766 if (rt != NULL) 1767 rt_unref(rt); 1768 return e; 1769 } 1770 1771 /* 1772 * Route timer routines. These routes allow functions to be called 1773 * for various routes at any time. This is useful in supporting 1774 * path MTU discovery and redirect route deletion. 1775 * 1776 * This is similar to some BSDI internal functions, but it provides 1777 * for multiple queues for efficiency's sake... 1778 */ 1779 1780 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1781 static int rt_init_done = 0; 1782 1783 /* 1784 * Some subtle order problems with domain initialization mean that 1785 * we cannot count on this being run from rt_init before various 1786 * protocol initializations are done. Therefore, we make sure 1787 * that this is run when the first queue is added... 1788 */ 1789 1790 static void rt_timer_work(struct work *, void *); 1791 1792 static void 1793 rt_timer_init(void) 1794 { 1795 int error; 1796 1797 assert(rt_init_done == 0); 1798 1799 /* XXX should be in rt_init */ 1800 rw_init(&rt_lock); 1801 1802 LIST_INIT(&rttimer_queue_head); 1803 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1804 error = workqueue_create(&rt_timer_wq, "rt_timer", 1805 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1806 if (error) 1807 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1808 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1809 rt_init_done = 1; 1810 } 1811 1812 struct rttimer_queue * 1813 rt_timer_queue_create(u_int timeout) 1814 { 1815 struct rttimer_queue *rtq; 1816 1817 if (rt_init_done == 0) 1818 rt_timer_init(); 1819 1820 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1821 if (rtq == NULL) 1822 return NULL; 1823 memset(rtq, 0, sizeof(*rtq)); 1824 1825 rtq->rtq_timeout = timeout; 1826 TAILQ_INIT(&rtq->rtq_head); 1827 RT_WLOCK(); 1828 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1829 RT_UNLOCK(); 1830 1831 return rtq; 1832 } 1833 1834 void 1835 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1836 { 1837 1838 rtq->rtq_timeout = timeout; 1839 } 1840 1841 static void 1842 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1843 { 1844 struct rttimer *r; 1845 1846 RT_ASSERT_WLOCK(); 1847 1848 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1849 LIST_REMOVE(r, rtt_link); 1850 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1851 rt_ref(r->rtt_rt); /* XXX */ 1852 RT_REFCNT_TRACE(r->rtt_rt); 1853 RT_UNLOCK(); 1854 (*r->rtt_func)(r->rtt_rt, r); 1855 pool_put(&rttimer_pool, r); 1856 RT_WLOCK(); 1857 if (rtq->rtq_count > 0) 1858 rtq->rtq_count--; 1859 else 1860 printf("rt_timer_queue_remove_all: " 1861 "rtq_count reached 0\n"); 1862 } 1863 } 1864 1865 void 1866 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1867 { 1868 1869 RT_WLOCK(); 1870 rt_timer_queue_remove_all(rtq); 1871 LIST_REMOVE(rtq, rtq_link); 1872 RT_UNLOCK(); 1873 1874 /* 1875 * Caller is responsible for freeing the rttimer_queue structure. 1876 */ 1877 } 1878 1879 unsigned long 1880 rt_timer_count(struct rttimer_queue *rtq) 1881 { 1882 return rtq->rtq_count; 1883 } 1884 1885 static void 1886 rt_timer_remove_all(struct rtentry *rt) 1887 { 1888 struct rttimer *r; 1889 1890 RT_WLOCK(); 1891 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1892 LIST_REMOVE(r, rtt_link); 1893 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1894 if (r->rtt_queue->rtq_count > 0) 1895 r->rtt_queue->rtq_count--; 1896 else 1897 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1898 pool_put(&rttimer_pool, r); 1899 } 1900 RT_UNLOCK(); 1901 } 1902 1903 int 1904 rt_timer_add(struct rtentry *rt, 1905 void (*func)(struct rtentry *, struct rttimer *), 1906 struct rttimer_queue *queue) 1907 { 1908 struct rttimer *r; 1909 1910 KASSERT(func != NULL); 1911 RT_WLOCK(); 1912 /* 1913 * If there's already a timer with this action, destroy it before 1914 * we add a new one. 1915 */ 1916 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1917 if (r->rtt_func == func) 1918 break; 1919 } 1920 if (r != NULL) { 1921 LIST_REMOVE(r, rtt_link); 1922 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1923 if (r->rtt_queue->rtq_count > 0) 1924 r->rtt_queue->rtq_count--; 1925 else 1926 printf("rt_timer_add: rtq_count reached 0\n"); 1927 } else { 1928 r = pool_get(&rttimer_pool, PR_NOWAIT); 1929 if (r == NULL) { 1930 RT_UNLOCK(); 1931 return ENOBUFS; 1932 } 1933 } 1934 1935 memset(r, 0, sizeof(*r)); 1936 1937 r->rtt_rt = rt; 1938 r->rtt_time = time_uptime; 1939 r->rtt_func = func; 1940 r->rtt_queue = queue; 1941 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1942 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1943 r->rtt_queue->rtq_count++; 1944 1945 RT_UNLOCK(); 1946 1947 return 0; 1948 } 1949 1950 static void 1951 rt_timer_work(struct work *wk, void *arg) 1952 { 1953 struct rttimer_queue *rtq; 1954 struct rttimer *r; 1955 1956 RT_WLOCK(); 1957 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1958 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1959 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1960 LIST_REMOVE(r, rtt_link); 1961 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1962 /* 1963 * Take a reference to avoid the rtentry is freed 1964 * accidentally after RT_UNLOCK. The callback 1965 * (rtt_func) must rt_unref it by itself. 1966 */ 1967 rt_ref(r->rtt_rt); 1968 RT_REFCNT_TRACE(r->rtt_rt); 1969 RT_UNLOCK(); 1970 (*r->rtt_func)(r->rtt_rt, r); 1971 pool_put(&rttimer_pool, r); 1972 RT_WLOCK(); 1973 if (rtq->rtq_count > 0) 1974 rtq->rtq_count--; 1975 else 1976 printf("rt_timer_timer: rtq_count reached 0\n"); 1977 } 1978 } 1979 RT_UNLOCK(); 1980 1981 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1982 } 1983 1984 static void 1985 rt_timer_timer(void *arg) 1986 { 1987 1988 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 1989 } 1990 1991 static struct rtentry * 1992 _rtcache_init(struct route *ro, int flag) 1993 { 1994 struct rtentry *rt; 1995 1996 rtcache_invariants(ro); 1997 KASSERT(ro->_ro_rt == NULL); 1998 1999 if (rtcache_getdst(ro) == NULL) 2000 return NULL; 2001 rt = rtalloc1(rtcache_getdst(ro), flag); 2002 if (rt != NULL) { 2003 RT_RLOCK(); 2004 if (ISSET(rt->rt_flags, RTF_UP)) { 2005 ro->_ro_rt = rt; 2006 ro->ro_rtcache_generation = rtcache_generation; 2007 rtcache_ref(rt, ro); 2008 } 2009 RT_UNLOCK(); 2010 rt_unref(rt); 2011 } 2012 2013 rtcache_invariants(ro); 2014 return ro->_ro_rt; 2015 } 2016 2017 struct rtentry * 2018 rtcache_init(struct route *ro) 2019 { 2020 2021 return _rtcache_init(ro, 1); 2022 } 2023 2024 struct rtentry * 2025 rtcache_init_noclone(struct route *ro) 2026 { 2027 2028 return _rtcache_init(ro, 0); 2029 } 2030 2031 struct rtentry * 2032 rtcache_update(struct route *ro, int clone) 2033 { 2034 2035 ro->_ro_rt = NULL; 2036 return _rtcache_init(ro, clone); 2037 } 2038 2039 void 2040 rtcache_copy(struct route *new_ro, struct route *old_ro) 2041 { 2042 struct rtentry *rt; 2043 int ret; 2044 2045 KASSERT(new_ro != old_ro); 2046 rtcache_invariants(new_ro); 2047 rtcache_invariants(old_ro); 2048 2049 rt = rtcache_validate(old_ro); 2050 2051 if (rtcache_getdst(old_ro) == NULL) 2052 goto out; 2053 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 2054 if (ret != 0) 2055 goto out; 2056 2057 RT_RLOCK(); 2058 new_ro->_ro_rt = rt; 2059 new_ro->ro_rtcache_generation = rtcache_generation; 2060 RT_UNLOCK(); 2061 rtcache_invariants(new_ro); 2062 out: 2063 rtcache_unref(rt, old_ro); 2064 return; 2065 } 2066 2067 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 2068 static void 2069 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 2070 { 2071 char dst[64]; 2072 2073 sockaddr_format(ro->ro_sa, dst, 64); 2074 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 2075 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 2076 } 2077 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 2078 #else 2079 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 2080 #endif 2081 2082 static void 2083 rtcache_ref(struct rtentry *rt, struct route *ro) 2084 { 2085 2086 KASSERT(rt != NULL); 2087 2088 #ifdef NET_MPSAFE 2089 RTCACHE_PSREF_TRACE(rt, ro); 2090 ro->ro_bound = curlwp_bind(); 2091 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2092 #endif 2093 } 2094 2095 void 2096 rtcache_unref(struct rtentry *rt, struct route *ro) 2097 { 2098 2099 if (rt == NULL) 2100 return; 2101 2102 #ifdef NET_MPSAFE 2103 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2104 curlwp_bindx(ro->ro_bound); 2105 RTCACHE_PSREF_TRACE(rt, ro); 2106 #endif 2107 } 2108 2109 struct rtentry * 2110 rtcache_validate(struct route *ro) 2111 { 2112 struct rtentry *rt = NULL; 2113 2114 #ifdef NET_MPSAFE 2115 retry: 2116 #endif 2117 rtcache_invariants(ro); 2118 RT_RLOCK(); 2119 if (ro->ro_rtcache_generation != rtcache_generation) { 2120 /* The cache is invalidated */ 2121 rt = NULL; 2122 goto out; 2123 } 2124 2125 rt = ro->_ro_rt; 2126 if (rt == NULL) 2127 goto out; 2128 2129 if ((rt->rt_flags & RTF_UP) == 0) { 2130 rt = NULL; 2131 goto out; 2132 } 2133 #ifdef NET_MPSAFE 2134 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 2135 if (rt_wait_ok()) { 2136 RT_UNLOCK(); 2137 2138 /* We can wait until the update is complete */ 2139 rt_update_wait(); 2140 goto retry; 2141 } else { 2142 rt = NULL; 2143 } 2144 } else 2145 #endif 2146 rtcache_ref(rt, ro); 2147 out: 2148 RT_UNLOCK(); 2149 return rt; 2150 } 2151 2152 struct rtentry * 2153 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2154 int clone, int *hitp) 2155 { 2156 const struct sockaddr *odst; 2157 struct rtentry *rt = NULL; 2158 2159 odst = rtcache_getdst(ro); 2160 if (odst == NULL) 2161 goto miss; 2162 2163 if (sockaddr_cmp(odst, dst) != 0) { 2164 rtcache_free(ro); 2165 goto miss; 2166 } 2167 2168 rt = rtcache_validate(ro); 2169 if (rt == NULL) { 2170 ro->_ro_rt = NULL; 2171 goto miss; 2172 } 2173 2174 rtcache_invariants(ro); 2175 2176 if (hitp != NULL) 2177 *hitp = 1; 2178 return rt; 2179 miss: 2180 if (hitp != NULL) 2181 *hitp = 0; 2182 if (rtcache_setdst(ro, dst) == 0) 2183 rt = _rtcache_init(ro, clone); 2184 2185 rtcache_invariants(ro); 2186 2187 return rt; 2188 } 2189 2190 void 2191 rtcache_free(struct route *ro) 2192 { 2193 2194 ro->_ro_rt = NULL; 2195 if (ro->ro_sa != NULL) { 2196 sockaddr_free(ro->ro_sa); 2197 ro->ro_sa = NULL; 2198 } 2199 rtcache_invariants(ro); 2200 } 2201 2202 int 2203 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2204 { 2205 KASSERT(sa != NULL); 2206 2207 rtcache_invariants(ro); 2208 if (ro->ro_sa != NULL) { 2209 if (ro->ro_sa->sa_family == sa->sa_family) { 2210 ro->_ro_rt = NULL; 2211 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2212 rtcache_invariants(ro); 2213 return 0; 2214 } 2215 /* free ro_sa, wrong family */ 2216 rtcache_free(ro); 2217 } 2218 2219 KASSERT(ro->_ro_rt == NULL); 2220 2221 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2222 rtcache_invariants(ro); 2223 return ENOMEM; 2224 } 2225 rtcache_invariants(ro); 2226 return 0; 2227 } 2228 2229 const struct sockaddr * 2230 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2231 { 2232 if (rt->rt_tag != tag) { 2233 if (rt->rt_tag != NULL) 2234 sockaddr_free(rt->rt_tag); 2235 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2236 } 2237 return rt->rt_tag; 2238 } 2239 2240 struct sockaddr * 2241 rt_gettag(const struct rtentry *rt) 2242 { 2243 return rt->rt_tag; 2244 } 2245 2246 int 2247 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2248 { 2249 2250 if ((rt->rt_flags & RTF_REJECT) != 0) { 2251 /* Mimic looutput */ 2252 if (ifp->if_flags & IFF_LOOPBACK) 2253 return (rt->rt_flags & RTF_HOST) ? 2254 EHOSTUNREACH : ENETUNREACH; 2255 else if (rt->rt_rmx.rmx_expire == 0 || 2256 time_uptime < rt->rt_rmx.rmx_expire) 2257 return (rt->rt_flags & RTF_GATEWAY) ? 2258 EHOSTUNREACH : EHOSTDOWN; 2259 } 2260 2261 return 0; 2262 } 2263 2264 void 2265 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2266 void *v) 2267 { 2268 2269 for (;;) { 2270 int s; 2271 int error; 2272 struct rtentry *rt, *retrt = NULL; 2273 2274 RT_RLOCK(); 2275 s = splsoftnet(); 2276 rt = rtbl_search_matched_entry(family, f, v); 2277 if (rt == NULL) { 2278 splx(s); 2279 RT_UNLOCK(); 2280 return; 2281 } 2282 rt_ref(rt); 2283 splx(s); 2284 RT_UNLOCK(); 2285 2286 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2287 rt_mask(rt), rt->rt_flags, &retrt); 2288 if (error == 0) { 2289 KASSERT(retrt == rt); 2290 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2291 retrt->rt_ifp = NULL; 2292 rt_unref(rt); 2293 rt_free(retrt); 2294 } else if (error == ESRCH) { 2295 /* Someone deleted the entry already. */ 2296 rt_unref(rt); 2297 } else { 2298 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2299 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2300 /* XXX how to treat this case? */ 2301 } 2302 } 2303 } 2304 2305 static int 2306 rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *), 2307 void *v) 2308 { 2309 2310 return rtbl_walktree(family, f, v); 2311 } 2312 2313 int 2314 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2315 { 2316 int error; 2317 2318 RT_RLOCK(); 2319 error = rt_walktree_locked(family, f, v); 2320 RT_UNLOCK(); 2321 2322 return error; 2323 } 2324 2325 #ifdef DDB 2326 2327 #include <machine/db_machdep.h> 2328 #include <ddb/db_interface.h> 2329 #include <ddb/db_output.h> 2330 2331 #define rt_expire rt_rmx.rmx_expire 2332 2333 static void 2334 db_print_sa(const struct sockaddr *sa) 2335 { 2336 int len; 2337 const u_char *p; 2338 2339 if (sa == NULL) { 2340 db_printf("[NULL]"); 2341 return; 2342 } 2343 2344 p = (const u_char *)sa; 2345 len = sa->sa_len; 2346 db_printf("["); 2347 while (len > 0) { 2348 db_printf("%d", *p); 2349 p++; len--; 2350 if (len) db_printf(","); 2351 } 2352 db_printf("]\n"); 2353 } 2354 2355 static void 2356 db_print_ifa(struct ifaddr *ifa) 2357 { 2358 if (ifa == NULL) 2359 return; 2360 db_printf(" ifa_addr="); 2361 db_print_sa(ifa->ifa_addr); 2362 db_printf(" ifa_dsta="); 2363 db_print_sa(ifa->ifa_dstaddr); 2364 db_printf(" ifa_mask="); 2365 db_print_sa(ifa->ifa_netmask); 2366 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2367 ifa->ifa_flags, 2368 ifa->ifa_refcnt, 2369 ifa->ifa_metric); 2370 } 2371 2372 /* 2373 * Function to pass to rt_walktree(). 2374 * Return non-zero error to abort walk. 2375 */ 2376 static int 2377 db_show_rtentry(struct rtentry *rt, void *w) 2378 { 2379 db_printf("rtentry=%p", rt); 2380 2381 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2382 rt->rt_flags, rt->rt_refcnt, 2383 rt->rt_use, (uint64_t)rt->rt_expire); 2384 2385 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2386 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2387 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2388 2389 db_printf(" ifp=%p ", rt->rt_ifp); 2390 if (rt->rt_ifp) 2391 db_printf("(%s)", rt->rt_ifp->if_xname); 2392 else 2393 db_printf("(NULL)"); 2394 2395 db_printf(" ifa=%p\n", rt->rt_ifa); 2396 db_print_ifa(rt->rt_ifa); 2397 2398 db_printf(" gwroute=%p llinfo=%p\n", 2399 rt->rt_gwroute, rt->rt_llinfo); 2400 2401 return 0; 2402 } 2403 2404 /* 2405 * Function to print all the route trees. 2406 * Use this from ddb: "show routes" 2407 */ 2408 void 2409 db_show_routes(db_expr_t addr, bool have_addr, 2410 db_expr_t count, const char *modif) 2411 { 2412 2413 /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */ 2414 rt_walktree_locked(AF_INET, db_show_rtentry, NULL); 2415 } 2416 #endif 2417