1 /* $NetBSD: route.c,v 1.201 2017/09/25 04:15:33 ozaki-r Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93 #ifdef _KERNEL_OPT 94 #include "opt_inet.h" 95 #include "opt_route.h" 96 #include "opt_net_mpsafe.h" 97 #endif 98 99 #include <sys/cdefs.h> 100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.201 2017/09/25 04:15:33 ozaki-r Exp $"); 101 102 #include <sys/param.h> 103 #ifdef RTFLUSH_DEBUG 104 #include <sys/sysctl.h> 105 #endif 106 #include <sys/systm.h> 107 #include <sys/callout.h> 108 #include <sys/proc.h> 109 #include <sys/mbuf.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/kernel.h> 114 #include <sys/ioctl.h> 115 #include <sys/pool.h> 116 #include <sys/kauth.h> 117 #include <sys/workqueue.h> 118 #include <sys/syslog.h> 119 #include <sys/rwlock.h> 120 #include <sys/mutex.h> 121 #include <sys/cpu.h> 122 123 #include <net/if.h> 124 #include <net/if_dl.h> 125 #include <net/route.h> 126 #if defined(INET) || defined(INET6) 127 #include <net/if_llatbl.h> 128 #endif 129 130 #include <netinet/in.h> 131 #include <netinet/in_var.h> 132 133 #ifdef RTFLUSH_DEBUG 134 #define rtcache_debug() __predict_false(_rtcache_debug) 135 #else /* RTFLUSH_DEBUG */ 136 #define rtcache_debug() 0 137 #endif /* RTFLUSH_DEBUG */ 138 139 #ifdef RT_DEBUG 140 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 141 __func__, __LINE__, (rt), (rt)->rt_refcnt) 142 #else 143 #define RT_REFCNT_TRACE(rt) do {} while (0) 144 #endif 145 146 #ifdef DEBUG 147 #define dlog(level, fmt, args...) log(level, fmt, ##args) 148 #else 149 #define dlog(level, fmt, args...) do {} while (0) 150 #endif 151 152 struct rtstat rtstat; 153 154 static int rttrash; /* routes not in table but not freed */ 155 156 static struct pool rtentry_pool; 157 static struct pool rttimer_pool; 158 159 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 160 static struct workqueue *rt_timer_wq; 161 static struct work rt_timer_wk; 162 163 static void rt_timer_init(void); 164 static void rt_timer_queue_remove_all(struct rttimer_queue *); 165 static void rt_timer_remove_all(struct rtentry *); 166 static void rt_timer_timer(void *); 167 168 /* 169 * Locking notes: 170 * - The routing table is protected by a global rwlock 171 * - API: RT_RLOCK and friends 172 * - rtcaches are NOT protected by the framework 173 * - Callers must guarantee a rtcache isn't accessed simultaneously 174 * - How the constraint is guranteed in the wild 175 * - Protect a rtcache by a mutex (e.g., inp_route) 176 * - Make rtcache per-CPU and allow only accesses from softint 177 * (e.g., ipforward_rt_percpu) 178 * - References to a rtentry is managed by reference counting and psref 179 * - Reference couting is used for temporal reference when a rtentry 180 * is fetched from the routing table 181 * - psref is used for temporal reference when a rtentry is fetched 182 * from a rtcache 183 * - struct route (rtcache) has struct psref, so we cannot obtain 184 * a reference twice on the same struct route 185 * - Befere destroying or updating a rtentry, we have to wait for 186 * all references left (see below for details) 187 * - APIs 188 * - An obtained rtentry via rtalloc1 or rtrequest* must be 189 * unreferenced by rt_unref 190 * - An obtained rtentry via rtcache_* must be unreferenced by 191 * rtcache_unref 192 * - TODO: once we get a lockless routing table, we should use only 193 * psref for rtentries 194 * - rtentry destruction 195 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 196 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 197 * has a responsibility to destroy the rtentry by itself by calling 198 * rt_free 199 * - If not, rtrequest itself does that 200 * - If rt_free is called in softint, the actual destruction routine is 201 * deferred to a workqueue 202 * - rtentry update 203 * - When updating a rtentry, RTF_UPDATING flag is set 204 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 205 * the routing table or a rtcache results in either of the following 206 * cases: 207 * - if the caller runs in softint, the caller fails to fetch 208 * - otherwise, the caller waits for the update completed and retries 209 * to fetch (probably succeed to fetch for the second time) 210 * - rtcache invalidation 211 * - There is a global generation counter that is incremented when 212 * any routes have been added or deleted 213 * - When a rtcache caches a rtentry into itself, it also stores 214 * a snapshot of the generation counter 215 * - If the snapshot equals to the global counter, the cache is valid, 216 * otherwise the cache is invalidated 217 */ 218 219 /* 220 * Global lock for the routing table. 221 */ 222 static krwlock_t rt_lock __cacheline_aligned; 223 #ifdef NET_MPSAFE 224 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 225 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 226 #define RT_UNLOCK() rw_exit(&rt_lock) 227 #define RT_LOCKED() rw_lock_held(&rt_lock) 228 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 229 #else 230 #define RT_RLOCK() do {} while (0) 231 #define RT_WLOCK() do {} while (0) 232 #define RT_UNLOCK() do {} while (0) 233 #define RT_LOCKED() false 234 #define RT_ASSERT_WLOCK() do {} while (0) 235 #endif 236 237 static uint64_t rtcache_generation; 238 239 /* 240 * mutex and cv that are used to wait for references to a rtentry left 241 * before updating the rtentry. 242 */ 243 static struct { 244 kmutex_t lock; 245 kcondvar_t cv; 246 bool ongoing; 247 const struct lwp *lwp; 248 } rt_update_global __cacheline_aligned; 249 250 /* 251 * A workqueue and stuff that are used to defer the destruction routine 252 * of rtentries. 253 */ 254 static struct { 255 struct workqueue *wq; 256 struct work wk; 257 kmutex_t lock; 258 struct rtentry *queue[10]; 259 } rt_free_global __cacheline_aligned; 260 261 /* psref for rtentry */ 262 static struct psref_class *rt_psref_class __read_mostly; 263 264 #ifdef RTFLUSH_DEBUG 265 static int _rtcache_debug = 0; 266 #endif /* RTFLUSH_DEBUG */ 267 268 static kauth_listener_t route_listener; 269 270 static int rtdeletemsg(struct rtentry *); 271 272 static void rt_maskedcopy(const struct sockaddr *, 273 struct sockaddr *, const struct sockaddr *); 274 275 static void rtcache_invalidate(void); 276 277 static void rt_ref(struct rtentry *); 278 279 static struct rtentry * 280 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 281 282 static void rtcache_ref(struct rtentry *, struct route *); 283 284 #ifdef NET_MPSAFE 285 static void rt_update_wait(void); 286 #endif 287 288 static bool rt_wait_ok(void); 289 static void rt_wait_refcnt(const char *, struct rtentry *, int); 290 static void rt_wait_psref(struct rtentry *); 291 292 #ifdef DDB 293 static void db_print_sa(const struct sockaddr *); 294 static void db_print_ifa(struct ifaddr *); 295 static int db_show_rtentry(struct rtentry *, void *); 296 #endif 297 298 #ifdef RTFLUSH_DEBUG 299 static void sysctl_net_rtcache_setup(struct sysctllog **); 300 static void 301 sysctl_net_rtcache_setup(struct sysctllog **clog) 302 { 303 const struct sysctlnode *rnode; 304 305 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 306 CTLTYPE_NODE, 307 "rtcache", SYSCTL_DESCR("Route cache related settings"), 308 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 309 return; 310 if (sysctl_createv(clog, 0, &rnode, &rnode, 311 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 312 "debug", SYSCTL_DESCR("Debug route caches"), 313 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 314 return; 315 } 316 #endif /* RTFLUSH_DEBUG */ 317 318 static inline void 319 rt_destroy(struct rtentry *rt) 320 { 321 if (rt->_rt_key != NULL) 322 sockaddr_free(rt->_rt_key); 323 if (rt->rt_gateway != NULL) 324 sockaddr_free(rt->rt_gateway); 325 if (rt_gettag(rt) != NULL) 326 sockaddr_free(rt_gettag(rt)); 327 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 328 } 329 330 static inline const struct sockaddr * 331 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 332 { 333 if (rt->_rt_key == key) 334 goto out; 335 336 if (rt->_rt_key != NULL) 337 sockaddr_free(rt->_rt_key); 338 rt->_rt_key = sockaddr_dup(key, flags); 339 out: 340 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 341 return rt->_rt_key; 342 } 343 344 struct ifaddr * 345 rt_get_ifa(struct rtentry *rt) 346 { 347 struct ifaddr *ifa; 348 349 if ((ifa = rt->rt_ifa) == NULL) 350 return ifa; 351 else if (ifa->ifa_getifa == NULL) 352 return ifa; 353 #if 0 354 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 355 return ifa; 356 #endif 357 else { 358 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 359 if (ifa == NULL) 360 return NULL; 361 rt_replace_ifa(rt, ifa); 362 return ifa; 363 } 364 } 365 366 static void 367 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 368 { 369 rt->rt_ifa = ifa; 370 if (ifa->ifa_seqno != NULL) 371 rt->rt_ifa_seqno = *ifa->ifa_seqno; 372 } 373 374 /* 375 * Is this route the connected route for the ifa? 376 */ 377 static int 378 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 379 { 380 const struct sockaddr *key, *dst, *odst; 381 struct sockaddr_storage maskeddst; 382 383 key = rt_getkey(rt); 384 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 385 if (dst == NULL || 386 dst->sa_family != key->sa_family || 387 dst->sa_len != key->sa_len) 388 return 0; 389 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 390 odst = dst; 391 dst = (struct sockaddr *)&maskeddst; 392 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 393 ifa->ifa_netmask); 394 } 395 return (memcmp(dst, key, dst->sa_len) == 0); 396 } 397 398 void 399 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 400 { 401 if (rt->rt_ifa && 402 rt->rt_ifa != ifa && 403 rt->rt_ifa->ifa_flags & IFA_ROUTE && 404 rt_ifa_connected(rt, rt->rt_ifa)) 405 { 406 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 407 "replace deleted IFA_ROUTE\n", 408 (void *)rt->_rt_key, (void *)rt->rt_ifa); 409 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 410 if (rt_ifa_connected(rt, ifa)) { 411 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 412 "replace added IFA_ROUTE\n", 413 (void *)rt->_rt_key, (void *)ifa); 414 ifa->ifa_flags |= IFA_ROUTE; 415 } 416 } 417 418 ifaref(ifa); 419 ifafree(rt->rt_ifa); 420 rt_set_ifa1(rt, ifa); 421 } 422 423 static void 424 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 425 { 426 ifaref(ifa); 427 rt_set_ifa1(rt, ifa); 428 } 429 430 static int 431 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 432 void *arg0, void *arg1, void *arg2, void *arg3) 433 { 434 struct rt_msghdr *rtm; 435 int result; 436 437 result = KAUTH_RESULT_DEFER; 438 rtm = arg1; 439 440 if (action != KAUTH_NETWORK_ROUTE) 441 return result; 442 443 if (rtm->rtm_type == RTM_GET) 444 result = KAUTH_RESULT_ALLOW; 445 446 return result; 447 } 448 449 static void rt_free_work(struct work *, void *); 450 451 void 452 rt_init(void) 453 { 454 int error; 455 456 #ifdef RTFLUSH_DEBUG 457 sysctl_net_rtcache_setup(NULL); 458 #endif 459 460 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 461 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 462 463 error = workqueue_create(&rt_free_global.wq, "rt_free", 464 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 465 if (error) 466 panic("%s: workqueue_create failed (%d)\n", __func__, error); 467 468 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 469 cv_init(&rt_update_global.cv, "rt_update"); 470 471 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 472 NULL, IPL_SOFTNET); 473 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 474 NULL, IPL_SOFTNET); 475 476 rn_init(); /* initialize all zeroes, all ones, mask table */ 477 rtbl_init(); 478 479 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 480 route_listener_cb, NULL); 481 } 482 483 static void 484 rtcache_invalidate(void) 485 { 486 487 RT_ASSERT_WLOCK(); 488 489 if (rtcache_debug()) 490 printf("%s: enter\n", __func__); 491 492 rtcache_generation++; 493 } 494 495 #ifdef RT_DEBUG 496 static void 497 dump_rt(const struct rtentry *rt) 498 { 499 char buf[512]; 500 501 aprint_normal("rt: "); 502 aprint_normal("p=%p ", rt); 503 if (rt->_rt_key == NULL) { 504 aprint_normal("dst=(NULL) "); 505 } else { 506 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 507 aprint_normal("dst=%s ", buf); 508 } 509 if (rt->rt_gateway == NULL) { 510 aprint_normal("gw=(NULL) "); 511 } else { 512 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 513 aprint_normal("gw=%s ", buf); 514 } 515 aprint_normal("flags=%x ", rt->rt_flags); 516 if (rt->rt_ifp == NULL) { 517 aprint_normal("if=(NULL) "); 518 } else { 519 aprint_normal("if=%s ", rt->rt_ifp->if_xname); 520 } 521 aprint_normal("\n"); 522 } 523 #endif /* RT_DEBUG */ 524 525 /* 526 * Packet routing routines. If success, refcnt of a returned rtentry 527 * will be incremented. The caller has to rtfree it by itself. 528 */ 529 struct rtentry * 530 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 531 bool wlock) 532 { 533 rtbl_t *rtbl; 534 struct rtentry *rt; 535 int s; 536 537 #ifdef NET_MPSAFE 538 retry: 539 #endif 540 s = splsoftnet(); 541 rtbl = rt_gettable(dst->sa_family); 542 if (rtbl == NULL) 543 goto miss; 544 545 rt = rt_matchaddr(rtbl, dst); 546 if (rt == NULL) 547 goto miss; 548 549 if (!ISSET(rt->rt_flags, RTF_UP)) 550 goto miss; 551 552 #ifdef NET_MPSAFE 553 if (ISSET(rt->rt_flags, RTF_UPDATING) && 554 /* XXX updater should be always able to acquire */ 555 curlwp != rt_update_global.lwp) { 556 if (!wait_ok || !rt_wait_ok()) 557 goto miss; 558 RT_UNLOCK(); 559 splx(s); 560 561 /* We can wait until the update is complete */ 562 rt_update_wait(); 563 564 if (wlock) 565 RT_WLOCK(); 566 else 567 RT_RLOCK(); 568 goto retry; 569 } 570 #endif /* NET_MPSAFE */ 571 572 rt_ref(rt); 573 RT_REFCNT_TRACE(rt); 574 575 splx(s); 576 return rt; 577 miss: 578 rtstat.rts_unreach++; 579 if (report) { 580 struct rt_addrinfo info; 581 582 memset(&info, 0, sizeof(info)); 583 info.rti_info[RTAX_DST] = dst; 584 rt_missmsg(RTM_MISS, &info, 0, 0); 585 } 586 splx(s); 587 return NULL; 588 } 589 590 struct rtentry * 591 rtalloc1(const struct sockaddr *dst, int report) 592 { 593 struct rtentry *rt; 594 595 RT_RLOCK(); 596 rt = rtalloc1_locked(dst, report, true, false); 597 RT_UNLOCK(); 598 599 return rt; 600 } 601 602 static void 603 rt_ref(struct rtentry *rt) 604 { 605 606 KASSERT(rt->rt_refcnt >= 0); 607 atomic_inc_uint(&rt->rt_refcnt); 608 } 609 610 void 611 rt_unref(struct rtentry *rt) 612 { 613 614 KASSERT(rt != NULL); 615 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 616 617 atomic_dec_uint(&rt->rt_refcnt); 618 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 619 mutex_enter(&rt_free_global.lock); 620 cv_broadcast(&rt->rt_cv); 621 mutex_exit(&rt_free_global.lock); 622 } 623 } 624 625 static bool 626 rt_wait_ok(void) 627 { 628 629 KASSERT(!cpu_intr_p()); 630 return !cpu_softintr_p(); 631 } 632 633 void 634 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 635 { 636 mutex_enter(&rt_free_global.lock); 637 while (rt->rt_refcnt > cnt) { 638 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 639 __func__, title, rt->rt_refcnt); 640 cv_wait(&rt->rt_cv, &rt_free_global.lock); 641 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 642 __func__, title, rt->rt_refcnt); 643 } 644 mutex_exit(&rt_free_global.lock); 645 } 646 647 void 648 rt_wait_psref(struct rtentry *rt) 649 { 650 651 psref_target_destroy(&rt->rt_psref, rt_psref_class); 652 psref_target_init(&rt->rt_psref, rt_psref_class); 653 } 654 655 static void 656 _rt_free(struct rtentry *rt) 657 { 658 struct ifaddr *ifa; 659 660 /* 661 * Need to avoid a deadlock on rt_wait_refcnt of update 662 * and a conflict on psref_target_destroy of update. 663 */ 664 #ifdef NET_MPSAFE 665 rt_update_wait(); 666 #endif 667 668 RT_REFCNT_TRACE(rt); 669 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 670 rt_wait_refcnt("free", rt, 0); 671 #ifdef NET_MPSAFE 672 psref_target_destroy(&rt->rt_psref, rt_psref_class); 673 #endif 674 675 rt_assert_inactive(rt); 676 rttrash--; 677 ifa = rt->rt_ifa; 678 rt->rt_ifa = NULL; 679 ifafree(ifa); 680 rt->rt_ifp = NULL; 681 cv_destroy(&rt->rt_cv); 682 rt_destroy(rt); 683 pool_put(&rtentry_pool, rt); 684 } 685 686 static void 687 rt_free_work(struct work *wk, void *arg) 688 { 689 int i; 690 struct rtentry *rt; 691 692 restart: 693 mutex_enter(&rt_free_global.lock); 694 for (i = 0; i < sizeof(rt_free_global.queue); i++) { 695 if (rt_free_global.queue[i] == NULL) 696 continue; 697 rt = rt_free_global.queue[i]; 698 rt_free_global.queue[i] = NULL; 699 mutex_exit(&rt_free_global.lock); 700 701 atomic_dec_uint(&rt->rt_refcnt); 702 _rt_free(rt); 703 goto restart; 704 } 705 mutex_exit(&rt_free_global.lock); 706 } 707 708 void 709 rt_free(struct rtentry *rt) 710 { 711 712 KASSERT(rt->rt_refcnt > 0); 713 if (!rt_wait_ok()) { 714 int i; 715 mutex_enter(&rt_free_global.lock); 716 for (i = 0; i < sizeof(rt_free_global.queue); i++) { 717 if (rt_free_global.queue[i] == NULL) { 718 rt_free_global.queue[i] = rt; 719 break; 720 } 721 } 722 KASSERT(i < sizeof(rt_free_global.queue)); 723 rt_ref(rt); 724 mutex_exit(&rt_free_global.lock); 725 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 726 } else { 727 atomic_dec_uint(&rt->rt_refcnt); 728 _rt_free(rt); 729 } 730 } 731 732 #ifdef NET_MPSAFE 733 static void 734 rt_update_wait(void) 735 { 736 737 mutex_enter(&rt_update_global.lock); 738 while (rt_update_global.ongoing) { 739 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 740 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 741 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 742 } 743 mutex_exit(&rt_update_global.lock); 744 } 745 #endif 746 747 int 748 rt_update_prepare(struct rtentry *rt) 749 { 750 751 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 752 753 RT_WLOCK(); 754 /* If the entry is being destroyed, don't proceed the update. */ 755 if (!ISSET(rt->rt_flags, RTF_UP)) { 756 RT_UNLOCK(); 757 return -1; 758 } 759 rt->rt_flags |= RTF_UPDATING; 760 RT_UNLOCK(); 761 762 mutex_enter(&rt_update_global.lock); 763 while (rt_update_global.ongoing) { 764 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 765 __func__, rt, curlwp); 766 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 767 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 768 __func__, rt, curlwp); 769 } 770 rt_update_global.ongoing = true; 771 /* XXX need it to avoid rt_update_wait by updater itself. */ 772 rt_update_global.lwp = curlwp; 773 mutex_exit(&rt_update_global.lock); 774 775 rt_wait_refcnt("update", rt, 1); 776 rt_wait_psref(rt); 777 778 return 0; 779 } 780 781 void 782 rt_update_finish(struct rtentry *rt) 783 { 784 785 RT_WLOCK(); 786 rt->rt_flags &= ~RTF_UPDATING; 787 RT_UNLOCK(); 788 789 mutex_enter(&rt_update_global.lock); 790 rt_update_global.ongoing = false; 791 rt_update_global.lwp = NULL; 792 cv_broadcast(&rt_update_global.cv); 793 mutex_exit(&rt_update_global.lock); 794 795 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 796 } 797 798 /* 799 * Force a routing table entry to the specified 800 * destination to go through the given gateway. 801 * Normally called as a result of a routing redirect 802 * message from the network layer. 803 * 804 * N.B.: must be called at splsoftnet 805 */ 806 void 807 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 808 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 809 struct rtentry **rtp) 810 { 811 struct rtentry *rt; 812 int error = 0; 813 uint64_t *stat = NULL; 814 struct rt_addrinfo info; 815 struct ifaddr *ifa; 816 struct psref psref; 817 818 /* verify the gateway is directly reachable */ 819 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 820 error = ENETUNREACH; 821 goto out; 822 } 823 rt = rtalloc1(dst, 0); 824 /* 825 * If the redirect isn't from our current router for this dst, 826 * it's either old or wrong. If it redirects us to ourselves, 827 * we have a routing loop, perhaps as a result of an interface 828 * going down recently. 829 */ 830 if (!(flags & RTF_DONE) && rt && 831 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 832 error = EINVAL; 833 else { 834 int s = pserialize_read_enter(); 835 struct ifaddr *_ifa; 836 837 _ifa = ifa_ifwithaddr(gateway); 838 if (_ifa != NULL) 839 error = EHOSTUNREACH; 840 pserialize_read_exit(s); 841 } 842 if (error) 843 goto done; 844 /* 845 * Create a new entry if we just got back a wildcard entry 846 * or the lookup failed. This is necessary for hosts 847 * which use routing redirects generated by smart gateways 848 * to dynamically build the routing tables. 849 */ 850 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 851 goto create; 852 /* 853 * Don't listen to the redirect if it's 854 * for a route to an interface. 855 */ 856 if (rt->rt_flags & RTF_GATEWAY) { 857 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 858 /* 859 * Changing from route to net => route to host. 860 * Create new route, rather than smashing route to net. 861 */ 862 create: 863 if (rt != NULL) 864 rt_unref(rt); 865 flags |= RTF_GATEWAY | RTF_DYNAMIC; 866 memset(&info, 0, sizeof(info)); 867 info.rti_info[RTAX_DST] = dst; 868 info.rti_info[RTAX_GATEWAY] = gateway; 869 info.rti_info[RTAX_NETMASK] = netmask; 870 info.rti_ifa = ifa; 871 info.rti_flags = flags; 872 rt = NULL; 873 error = rtrequest1(RTM_ADD, &info, &rt); 874 if (rt != NULL) 875 flags = rt->rt_flags; 876 stat = &rtstat.rts_dynamic; 877 } else { 878 /* 879 * Smash the current notion of the gateway to 880 * this destination. Should check about netmask!!! 881 */ 882 #ifdef NET_MPSAFE 883 KASSERT(!cpu_softintr_p()); 884 885 error = rt_update_prepare(rt); 886 if (error == 0) { 887 #endif 888 error = rt_setgate(rt, gateway); 889 if (error == 0) { 890 rt->rt_flags |= RTF_MODIFIED; 891 flags |= RTF_MODIFIED; 892 } 893 #ifdef NET_MPSAFE 894 rt_update_finish(rt); 895 } else { 896 /* 897 * If error != 0, the rtentry is being 898 * destroyed, so doing nothing doesn't 899 * matter. 900 */ 901 } 902 #endif 903 stat = &rtstat.rts_newgateway; 904 } 905 } else 906 error = EHOSTUNREACH; 907 done: 908 if (rt) { 909 if (rtp != NULL && !error) 910 *rtp = rt; 911 else 912 rt_unref(rt); 913 } 914 out: 915 if (error) 916 rtstat.rts_badredirect++; 917 else if (stat != NULL) 918 (*stat)++; 919 memset(&info, 0, sizeof(info)); 920 info.rti_info[RTAX_DST] = dst; 921 info.rti_info[RTAX_GATEWAY] = gateway; 922 info.rti_info[RTAX_NETMASK] = netmask; 923 info.rti_info[RTAX_AUTHOR] = src; 924 rt_missmsg(RTM_REDIRECT, &info, flags, error); 925 ifa_release(ifa, &psref); 926 } 927 928 /* 929 * Delete a route and generate a message. 930 * It doesn't free a passed rt. 931 */ 932 static int 933 rtdeletemsg(struct rtentry *rt) 934 { 935 int error; 936 struct rt_addrinfo info; 937 struct rtentry *retrt; 938 939 /* 940 * Request the new route so that the entry is not actually 941 * deleted. That will allow the information being reported to 942 * be accurate (and consistent with route_output()). 943 */ 944 memset(&info, 0, sizeof(info)); 945 info.rti_info[RTAX_DST] = rt_getkey(rt); 946 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 947 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 948 info.rti_flags = rt->rt_flags; 949 error = rtrequest1(RTM_DELETE, &info, &retrt); 950 951 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 952 953 return error; 954 } 955 956 struct ifaddr * 957 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 958 const struct sockaddr *gateway, struct psref *psref) 959 { 960 struct ifaddr *ifa = NULL; 961 962 if ((flags & RTF_GATEWAY) == 0) { 963 /* 964 * If we are adding a route to an interface, 965 * and the interface is a pt to pt link 966 * we should search for the destination 967 * as our clue to the interface. Otherwise 968 * we can use the local address. 969 */ 970 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 971 ifa = ifa_ifwithdstaddr_psref(dst, psref); 972 if (ifa == NULL) 973 ifa = ifa_ifwithaddr_psref(gateway, psref); 974 } else { 975 /* 976 * If we are adding a route to a remote net 977 * or host, the gateway may still be on the 978 * other end of a pt to pt link. 979 */ 980 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 981 } 982 if (ifa == NULL) 983 ifa = ifa_ifwithnet_psref(gateway, psref); 984 if (ifa == NULL) { 985 int s; 986 struct rtentry *rt; 987 988 /* XXX we cannot call rtalloc1 if holding the rt lock */ 989 if (RT_LOCKED()) 990 rt = rtalloc1_locked(gateway, 0, true, true); 991 else 992 rt = rtalloc1(gateway, 0); 993 if (rt == NULL) 994 return NULL; 995 if (rt->rt_flags & RTF_GATEWAY) { 996 rt_unref(rt); 997 return NULL; 998 } 999 /* 1000 * Just in case. May not need to do this workaround. 1001 * Revisit when working on rtentry MP-ification. 1002 */ 1003 s = pserialize_read_enter(); 1004 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1005 if (ifa == rt->rt_ifa) 1006 break; 1007 } 1008 if (ifa != NULL) 1009 ifa_acquire(ifa, psref); 1010 pserialize_read_exit(s); 1011 rt_unref(rt); 1012 if (ifa == NULL) 1013 return NULL; 1014 } 1015 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1016 struct ifaddr *nifa; 1017 int s; 1018 1019 s = pserialize_read_enter(); 1020 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1021 if (nifa != NULL) { 1022 ifa_release(ifa, psref); 1023 ifa_acquire(nifa, psref); 1024 ifa = nifa; 1025 } 1026 pserialize_read_exit(s); 1027 } 1028 return ifa; 1029 } 1030 1031 /* 1032 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1033 * The caller has to rtfree it by itself. 1034 */ 1035 int 1036 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1037 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1038 { 1039 struct rt_addrinfo info; 1040 1041 memset(&info, 0, sizeof(info)); 1042 info.rti_flags = flags; 1043 info.rti_info[RTAX_DST] = dst; 1044 info.rti_info[RTAX_GATEWAY] = gateway; 1045 info.rti_info[RTAX_NETMASK] = netmask; 1046 return rtrequest1(req, &info, ret_nrt); 1047 } 1048 1049 /* 1050 * It's a utility function to add/remove a route to/from the routing table 1051 * and tell user processes the addition/removal on success. 1052 */ 1053 int 1054 rtrequest_newmsg(const int req, const struct sockaddr *dst, 1055 const struct sockaddr *gateway, const struct sockaddr *netmask, 1056 const int flags) 1057 { 1058 int error; 1059 struct rtentry *ret_nrt = NULL; 1060 1061 KASSERT(req == RTM_ADD || req == RTM_DELETE); 1062 1063 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt); 1064 if (error != 0) 1065 return error; 1066 1067 KASSERT(ret_nrt != NULL); 1068 1069 rt_newmsg(req, ret_nrt); /* tell user process */ 1070 if (req == RTM_DELETE) 1071 rt_free(ret_nrt); 1072 else 1073 rt_unref(ret_nrt); 1074 1075 return 0; 1076 } 1077 1078 struct ifnet * 1079 rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1080 { 1081 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1082 1083 if (info->rti_ifp != NULL) 1084 return NULL; 1085 /* 1086 * ifp may be specified by sockaddr_dl when protocol address 1087 * is ambiguous 1088 */ 1089 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1090 struct ifaddr *ifa; 1091 int s = pserialize_read_enter(); 1092 1093 ifa = ifa_ifwithnet(ifpaddr); 1094 if (ifa != NULL) 1095 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1096 psref); 1097 pserialize_read_exit(s); 1098 } 1099 1100 return info->rti_ifp; 1101 } 1102 1103 struct ifaddr * 1104 rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1105 { 1106 struct ifaddr *ifa = NULL; 1107 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1108 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1109 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1110 int flags = info->rti_flags; 1111 const struct sockaddr *sa; 1112 1113 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1114 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1115 if (ifa != NULL) 1116 goto got; 1117 } 1118 1119 sa = ifaaddr != NULL ? ifaaddr : 1120 (gateway != NULL ? gateway : dst); 1121 if (sa != NULL && info->rti_ifp != NULL) 1122 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1123 else if (dst != NULL && gateway != NULL) 1124 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1125 else if (sa != NULL) 1126 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1127 if (ifa == NULL) 1128 return NULL; 1129 got: 1130 if (ifa->ifa_getifa != NULL) { 1131 /* FIXME ifa_getifa is NOMPSAFE */ 1132 ifa = (*ifa->ifa_getifa)(ifa, dst); 1133 if (ifa == NULL) 1134 return NULL; 1135 ifa_acquire(ifa, psref); 1136 } 1137 info->rti_ifa = ifa; 1138 if (info->rti_ifp == NULL) 1139 info->rti_ifp = ifa->ifa_ifp; 1140 return ifa; 1141 } 1142 1143 /* 1144 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1145 * The caller has to rtfree it by itself. 1146 */ 1147 int 1148 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1149 { 1150 int s = splsoftnet(), ss; 1151 int error = 0, rc; 1152 struct rtentry *rt; 1153 rtbl_t *rtbl; 1154 struct ifaddr *ifa = NULL; 1155 struct sockaddr_storage maskeddst; 1156 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1157 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1158 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1159 int flags = info->rti_flags; 1160 struct psref psref_ifp, psref_ifa; 1161 int bound = 0; 1162 struct ifnet *ifp = NULL; 1163 bool need_to_release_ifa = true; 1164 bool need_unlock = true; 1165 #define senderr(x) { error = x ; goto bad; } 1166 1167 RT_WLOCK(); 1168 1169 bound = curlwp_bind(); 1170 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1171 senderr(ESRCH); 1172 if (flags & RTF_HOST) 1173 netmask = NULL; 1174 switch (req) { 1175 case RTM_DELETE: 1176 if (netmask) { 1177 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1178 netmask); 1179 dst = (struct sockaddr *)&maskeddst; 1180 } 1181 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1182 senderr(ESRCH); 1183 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1184 senderr(ESRCH); 1185 rt->rt_flags &= ~RTF_UP; 1186 if ((ifa = rt->rt_ifa)) { 1187 if (ifa->ifa_flags & IFA_ROUTE && 1188 rt_ifa_connected(rt, ifa)) { 1189 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1190 "deleted IFA_ROUTE\n", 1191 (void *)rt->_rt_key, (void *)ifa); 1192 ifa->ifa_flags &= ~IFA_ROUTE; 1193 } 1194 if (ifa->ifa_rtrequest) 1195 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1196 ifa = NULL; 1197 } 1198 rttrash++; 1199 if (ret_nrt) { 1200 *ret_nrt = rt; 1201 rt_ref(rt); 1202 RT_REFCNT_TRACE(rt); 1203 } 1204 rtcache_invalidate(); 1205 RT_UNLOCK(); 1206 need_unlock = false; 1207 rt_timer_remove_all(rt); 1208 #if defined(INET) || defined(INET6) 1209 if (netmask != NULL) 1210 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1211 #endif 1212 if (ret_nrt == NULL) { 1213 /* Adjust the refcount */ 1214 rt_ref(rt); 1215 RT_REFCNT_TRACE(rt); 1216 rt_free(rt); 1217 } 1218 break; 1219 1220 case RTM_ADD: 1221 if (info->rti_ifa == NULL) { 1222 ifp = rt_getifp(info, &psref_ifp); 1223 ifa = rt_getifa(info, &psref_ifa); 1224 if (ifa == NULL) 1225 senderr(ENETUNREACH); 1226 } else { 1227 /* Caller should have a reference of ifa */ 1228 ifa = info->rti_ifa; 1229 need_to_release_ifa = false; 1230 } 1231 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1232 if (rt == NULL) 1233 senderr(ENOBUFS); 1234 memset(rt, 0, sizeof(*rt)); 1235 rt->rt_flags = RTF_UP | flags; 1236 LIST_INIT(&rt->rt_timer); 1237 1238 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1239 if (netmask) { 1240 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1241 netmask); 1242 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1243 } else { 1244 rt_setkey(rt, dst, M_NOWAIT); 1245 } 1246 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1247 if (rt_getkey(rt) == NULL || 1248 rt_setgate(rt, gateway) != 0) { 1249 pool_put(&rtentry_pool, rt); 1250 senderr(ENOBUFS); 1251 } 1252 1253 rt_set_ifa(rt, ifa); 1254 if (info->rti_info[RTAX_TAG] != NULL) { 1255 const struct sockaddr *tag; 1256 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1257 if (tag == NULL) 1258 senderr(ENOBUFS); 1259 } 1260 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1261 1262 ss = pserialize_read_enter(); 1263 if (info->rti_info[RTAX_IFP] != NULL) { 1264 struct ifaddr *ifa2; 1265 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1266 if (ifa2 != NULL) 1267 rt->rt_ifp = ifa2->ifa_ifp; 1268 else 1269 rt->rt_ifp = ifa->ifa_ifp; 1270 } else 1271 rt->rt_ifp = ifa->ifa_ifp; 1272 pserialize_read_exit(ss); 1273 cv_init(&rt->rt_cv, "rtentry"); 1274 psref_target_init(&rt->rt_psref, rt_psref_class); 1275 1276 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1277 rc = rt_addaddr(rtbl, rt, netmask); 1278 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1279 if (rc != 0) { 1280 ifafree(ifa); /* for rt_set_ifa above */ 1281 cv_destroy(&rt->rt_cv); 1282 rt_destroy(rt); 1283 pool_put(&rtentry_pool, rt); 1284 senderr(rc); 1285 } 1286 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1287 if (ifa->ifa_rtrequest) 1288 ifa->ifa_rtrequest(req, rt, info); 1289 if (need_to_release_ifa) 1290 ifa_release(ifa, &psref_ifa); 1291 ifa = NULL; 1292 if_put(ifp, &psref_ifp); 1293 ifp = NULL; 1294 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1295 if (ret_nrt) { 1296 *ret_nrt = rt; 1297 rt_ref(rt); 1298 RT_REFCNT_TRACE(rt); 1299 } 1300 rtcache_invalidate(); 1301 RT_UNLOCK(); 1302 need_unlock = false; 1303 break; 1304 case RTM_GET: 1305 if (netmask != NULL) { 1306 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1307 netmask); 1308 dst = (struct sockaddr *)&maskeddst; 1309 } 1310 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1311 senderr(ESRCH); 1312 if (ret_nrt != NULL) { 1313 *ret_nrt = rt; 1314 rt_ref(rt); 1315 RT_REFCNT_TRACE(rt); 1316 } 1317 break; 1318 } 1319 bad: 1320 if (need_to_release_ifa) 1321 ifa_release(ifa, &psref_ifa); 1322 if_put(ifp, &psref_ifp); 1323 curlwp_bindx(bound); 1324 if (need_unlock) 1325 RT_UNLOCK(); 1326 splx(s); 1327 return error; 1328 } 1329 1330 int 1331 rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1332 { 1333 struct sockaddr *new, *old; 1334 1335 KASSERT(rt->_rt_key != NULL); 1336 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1337 1338 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1339 if (new == NULL) 1340 return ENOMEM; 1341 1342 old = rt->rt_gateway; 1343 rt->rt_gateway = new; 1344 if (old != NULL) 1345 sockaddr_free(old); 1346 1347 KASSERT(rt->_rt_key != NULL); 1348 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1349 1350 if (rt->rt_flags & RTF_GATEWAY) { 1351 struct rtentry *gwrt; 1352 1353 /* XXX we cannot call rtalloc1 if holding the rt lock */ 1354 if (RT_LOCKED()) 1355 gwrt = rtalloc1_locked(gate, 1, false, true); 1356 else 1357 gwrt = rtalloc1(gate, 1); 1358 /* 1359 * If we switched gateways, grab the MTU from the new 1360 * gateway route if the current MTU, if the current MTU is 1361 * greater than the MTU of gateway. 1362 * Note that, if the MTU of gateway is 0, we will reset the 1363 * MTU of the route to run PMTUD again from scratch. XXX 1364 */ 1365 if (gwrt != NULL) { 1366 KASSERT(gwrt->_rt_key != NULL); 1367 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1368 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1369 rt->rt_rmx.rmx_mtu && 1370 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1371 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1372 } 1373 rt_unref(gwrt); 1374 } 1375 } 1376 KASSERT(rt->_rt_key != NULL); 1377 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1378 return 0; 1379 } 1380 1381 static void 1382 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1383 const struct sockaddr *netmask) 1384 { 1385 const char *netmaskp = &netmask->sa_data[0], 1386 *srcp = &src->sa_data[0]; 1387 char *dstp = &dst->sa_data[0]; 1388 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1389 const char *srcend = (char *)dst + src->sa_len; 1390 1391 dst->sa_len = src->sa_len; 1392 dst->sa_family = src->sa_family; 1393 1394 while (dstp < maskend) 1395 *dstp++ = *srcp++ & *netmaskp++; 1396 if (dstp < srcend) 1397 memset(dstp, 0, (size_t)(srcend - dstp)); 1398 } 1399 1400 /* 1401 * Inform the routing socket of a route change. 1402 */ 1403 void 1404 rt_newmsg(const int cmd, const struct rtentry *rt) 1405 { 1406 struct rt_addrinfo info; 1407 1408 memset((void *)&info, 0, sizeof(info)); 1409 info.rti_info[RTAX_DST] = rt_getkey(rt); 1410 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1411 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1412 if (rt->rt_ifp) { 1413 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1414 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1415 } 1416 1417 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1418 } 1419 1420 /* 1421 * Set up or tear down a routing table entry, normally 1422 * for an interface. 1423 */ 1424 int 1425 rtinit(struct ifaddr *ifa, int cmd, int flags) 1426 { 1427 struct rtentry *rt; 1428 struct sockaddr *dst, *odst; 1429 struct sockaddr_storage maskeddst; 1430 struct rtentry *nrt = NULL; 1431 int error; 1432 struct rt_addrinfo info; 1433 1434 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1435 if (cmd == RTM_DELETE) { 1436 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1437 /* Delete subnet route for this interface */ 1438 odst = dst; 1439 dst = (struct sockaddr *)&maskeddst; 1440 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1441 } 1442 if ((rt = rtalloc1(dst, 0)) != NULL) { 1443 if (rt->rt_ifa != ifa) { 1444 rt_unref(rt); 1445 return (flags & RTF_HOST) ? EHOSTUNREACH 1446 : ENETUNREACH; 1447 } 1448 rt_unref(rt); 1449 } 1450 } 1451 memset(&info, 0, sizeof(info)); 1452 info.rti_ifa = ifa; 1453 info.rti_flags = flags | ifa->ifa_flags; 1454 info.rti_info[RTAX_DST] = dst; 1455 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1456 1457 /* 1458 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1459 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1460 * variable) when RTF_HOST is 1. still not sure if i can safely 1461 * change it to meet bsdi4 behavior. 1462 */ 1463 if (cmd != RTM_LLINFO_UPD) 1464 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1465 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1466 &nrt); 1467 if (error != 0) 1468 return error; 1469 1470 rt = nrt; 1471 RT_REFCNT_TRACE(rt); 1472 switch (cmd) { 1473 case RTM_DELETE: 1474 rt_newmsg(cmd, rt); 1475 rt_free(rt); 1476 break; 1477 case RTM_LLINFO_UPD: 1478 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1479 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1480 rt_newmsg(RTM_CHANGE, rt); 1481 rt_unref(rt); 1482 break; 1483 case RTM_ADD: 1484 /* 1485 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest 1486 * called via rtrequest1. Can we just prevent the replacement 1487 * somehow and remove the following code? And also doesn't 1488 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again? 1489 */ 1490 if (rt->rt_ifa != ifa) { 1491 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa, 1492 rt->rt_ifa); 1493 #ifdef NET_MPSAFE 1494 KASSERT(!cpu_softintr_p()); 1495 1496 error = rt_update_prepare(rt); 1497 if (error == 0) { 1498 #endif 1499 if (rt->rt_ifa->ifa_rtrequest != NULL) { 1500 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, 1501 rt, &info); 1502 } 1503 rt_replace_ifa(rt, ifa); 1504 rt->rt_ifp = ifa->ifa_ifp; 1505 if (ifa->ifa_rtrequest != NULL) 1506 ifa->ifa_rtrequest(RTM_ADD, rt, &info); 1507 #ifdef NET_MPSAFE 1508 rt_update_finish(rt); 1509 } else { 1510 /* 1511 * If error != 0, the rtentry is being 1512 * destroyed, so doing nothing doesn't 1513 * matter. 1514 */ 1515 } 1516 #endif 1517 } 1518 rt_newmsg(cmd, rt); 1519 rt_unref(rt); 1520 RT_REFCNT_TRACE(rt); 1521 break; 1522 } 1523 return error; 1524 } 1525 1526 /* 1527 * Create a local route entry for the address. 1528 * Announce the addition of the address and the route to the routing socket. 1529 */ 1530 int 1531 rt_ifa_addlocal(struct ifaddr *ifa) 1532 { 1533 struct rtentry *rt; 1534 int e; 1535 1536 /* If there is no loopback entry, allocate one. */ 1537 rt = rtalloc1(ifa->ifa_addr, 0); 1538 #ifdef RT_DEBUG 1539 if (rt != NULL) 1540 dump_rt(rt); 1541 #endif 1542 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1543 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1544 { 1545 struct rt_addrinfo info; 1546 struct rtentry *nrt; 1547 1548 memset(&info, 0, sizeof(info)); 1549 info.rti_flags = RTF_HOST | RTF_LOCAL; 1550 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1551 info.rti_info[RTAX_GATEWAY] = 1552 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1553 info.rti_ifa = ifa; 1554 nrt = NULL; 1555 e = rtrequest1(RTM_ADD, &info, &nrt); 1556 if (nrt && ifa != nrt->rt_ifa) 1557 rt_replace_ifa(nrt, ifa); 1558 rt_newaddrmsg(RTM_ADD, ifa, e, nrt); 1559 if (nrt != NULL) { 1560 #ifdef RT_DEBUG 1561 dump_rt(nrt); 1562 #endif 1563 rt_unref(nrt); 1564 RT_REFCNT_TRACE(nrt); 1565 } 1566 } else { 1567 e = 0; 1568 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL); 1569 } 1570 if (rt != NULL) 1571 rt_unref(rt); 1572 return e; 1573 } 1574 1575 /* 1576 * Remove the local route entry for the address. 1577 * Announce the removal of the address and the route to the routing socket. 1578 */ 1579 int 1580 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1581 { 1582 struct rtentry *rt; 1583 int e = 0; 1584 1585 rt = rtalloc1(ifa->ifa_addr, 0); 1586 1587 /* 1588 * Before deleting, check if a corresponding loopbacked 1589 * host route surely exists. With this check, we can avoid 1590 * deleting an interface direct route whose destination is 1591 * the same as the address being removed. This can happen 1592 * when removing a subnet-router anycast address on an 1593 * interface attached to a shared medium. 1594 */ 1595 if (rt != NULL && 1596 (rt->rt_flags & RTF_HOST) && 1597 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1598 { 1599 /* If we cannot replace the route's ifaddr with the equivalent 1600 * ifaddr of another interface, I believe it is safest to 1601 * delete the route. 1602 */ 1603 if (alt_ifa == NULL) { 1604 e = rtdeletemsg(rt); 1605 if (e == 0) { 1606 rt_unref(rt); 1607 rt_free(rt); 1608 rt = NULL; 1609 } 1610 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1611 } else { 1612 rt_replace_ifa(rt, alt_ifa); 1613 rt_newmsg(RTM_CHANGE, rt); 1614 } 1615 } else 1616 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL); 1617 if (rt != NULL) 1618 rt_unref(rt); 1619 return e; 1620 } 1621 1622 /* 1623 * Route timer routines. These routes allow functions to be called 1624 * for various routes at any time. This is useful in supporting 1625 * path MTU discovery and redirect route deletion. 1626 * 1627 * This is similar to some BSDI internal functions, but it provides 1628 * for multiple queues for efficiency's sake... 1629 */ 1630 1631 LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1632 static int rt_init_done = 0; 1633 1634 /* 1635 * Some subtle order problems with domain initialization mean that 1636 * we cannot count on this being run from rt_init before various 1637 * protocol initializations are done. Therefore, we make sure 1638 * that this is run when the first queue is added... 1639 */ 1640 1641 static void rt_timer_work(struct work *, void *); 1642 1643 static void 1644 rt_timer_init(void) 1645 { 1646 int error; 1647 1648 assert(rt_init_done == 0); 1649 1650 /* XXX should be in rt_init */ 1651 rw_init(&rt_lock); 1652 1653 LIST_INIT(&rttimer_queue_head); 1654 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1655 error = workqueue_create(&rt_timer_wq, "rt_timer", 1656 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE); 1657 if (error) 1658 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1659 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1660 rt_init_done = 1; 1661 } 1662 1663 struct rttimer_queue * 1664 rt_timer_queue_create(u_int timeout) 1665 { 1666 struct rttimer_queue *rtq; 1667 1668 if (rt_init_done == 0) 1669 rt_timer_init(); 1670 1671 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1672 if (rtq == NULL) 1673 return NULL; 1674 memset(rtq, 0, sizeof(*rtq)); 1675 1676 rtq->rtq_timeout = timeout; 1677 TAILQ_INIT(&rtq->rtq_head); 1678 RT_WLOCK(); 1679 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1680 RT_UNLOCK(); 1681 1682 return rtq; 1683 } 1684 1685 void 1686 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1687 { 1688 1689 rtq->rtq_timeout = timeout; 1690 } 1691 1692 static void 1693 rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1694 { 1695 struct rttimer *r; 1696 1697 RT_ASSERT_WLOCK(); 1698 1699 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1700 LIST_REMOVE(r, rtt_link); 1701 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1702 rt_ref(r->rtt_rt); /* XXX */ 1703 RT_REFCNT_TRACE(r->rtt_rt); 1704 RT_UNLOCK(); 1705 (*r->rtt_func)(r->rtt_rt, r); 1706 pool_put(&rttimer_pool, r); 1707 RT_WLOCK(); 1708 if (rtq->rtq_count > 0) 1709 rtq->rtq_count--; 1710 else 1711 printf("rt_timer_queue_remove_all: " 1712 "rtq_count reached 0\n"); 1713 } 1714 } 1715 1716 void 1717 rt_timer_queue_destroy(struct rttimer_queue *rtq) 1718 { 1719 1720 RT_WLOCK(); 1721 rt_timer_queue_remove_all(rtq); 1722 LIST_REMOVE(rtq, rtq_link); 1723 RT_UNLOCK(); 1724 1725 /* 1726 * Caller is responsible for freeing the rttimer_queue structure. 1727 */ 1728 } 1729 1730 unsigned long 1731 rt_timer_count(struct rttimer_queue *rtq) 1732 { 1733 return rtq->rtq_count; 1734 } 1735 1736 static void 1737 rt_timer_remove_all(struct rtentry *rt) 1738 { 1739 struct rttimer *r; 1740 1741 RT_WLOCK(); 1742 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1743 LIST_REMOVE(r, rtt_link); 1744 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1745 if (r->rtt_queue->rtq_count > 0) 1746 r->rtt_queue->rtq_count--; 1747 else 1748 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1749 pool_put(&rttimer_pool, r); 1750 } 1751 RT_UNLOCK(); 1752 } 1753 1754 int 1755 rt_timer_add(struct rtentry *rt, 1756 void (*func)(struct rtentry *, struct rttimer *), 1757 struct rttimer_queue *queue) 1758 { 1759 struct rttimer *r; 1760 1761 KASSERT(func != NULL); 1762 RT_WLOCK(); 1763 /* 1764 * If there's already a timer with this action, destroy it before 1765 * we add a new one. 1766 */ 1767 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1768 if (r->rtt_func == func) 1769 break; 1770 } 1771 if (r != NULL) { 1772 LIST_REMOVE(r, rtt_link); 1773 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1774 if (r->rtt_queue->rtq_count > 0) 1775 r->rtt_queue->rtq_count--; 1776 else 1777 printf("rt_timer_add: rtq_count reached 0\n"); 1778 } else { 1779 r = pool_get(&rttimer_pool, PR_NOWAIT); 1780 if (r == NULL) { 1781 RT_UNLOCK(); 1782 return ENOBUFS; 1783 } 1784 } 1785 1786 memset(r, 0, sizeof(*r)); 1787 1788 r->rtt_rt = rt; 1789 r->rtt_time = time_uptime; 1790 r->rtt_func = func; 1791 r->rtt_queue = queue; 1792 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1793 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1794 r->rtt_queue->rtq_count++; 1795 1796 RT_UNLOCK(); 1797 1798 return 0; 1799 } 1800 1801 static void 1802 rt_timer_work(struct work *wk, void *arg) 1803 { 1804 struct rttimer_queue *rtq; 1805 struct rttimer *r; 1806 1807 RT_WLOCK(); 1808 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1809 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1810 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1811 LIST_REMOVE(r, rtt_link); 1812 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1813 rt_ref(r->rtt_rt); /* XXX */ 1814 RT_REFCNT_TRACE(r->rtt_rt); 1815 RT_UNLOCK(); 1816 (*r->rtt_func)(r->rtt_rt, r); 1817 pool_put(&rttimer_pool, r); 1818 RT_WLOCK(); 1819 if (rtq->rtq_count > 0) 1820 rtq->rtq_count--; 1821 else 1822 printf("rt_timer_timer: rtq_count reached 0\n"); 1823 } 1824 } 1825 RT_UNLOCK(); 1826 1827 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1828 } 1829 1830 static void 1831 rt_timer_timer(void *arg) 1832 { 1833 1834 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 1835 } 1836 1837 static struct rtentry * 1838 _rtcache_init(struct route *ro, int flag) 1839 { 1840 struct rtentry *rt; 1841 1842 rtcache_invariants(ro); 1843 KASSERT(ro->_ro_rt == NULL); 1844 1845 if (rtcache_getdst(ro) == NULL) 1846 return NULL; 1847 rt = rtalloc1(rtcache_getdst(ro), flag); 1848 if (rt != NULL) { 1849 RT_RLOCK(); 1850 if (ISSET(rt->rt_flags, RTF_UP)) { 1851 ro->_ro_rt = rt; 1852 ro->ro_rtcache_generation = rtcache_generation; 1853 rtcache_ref(rt, ro); 1854 } 1855 RT_UNLOCK(); 1856 rt_unref(rt); 1857 } 1858 1859 rtcache_invariants(ro); 1860 return ro->_ro_rt; 1861 } 1862 1863 struct rtentry * 1864 rtcache_init(struct route *ro) 1865 { 1866 1867 return _rtcache_init(ro, 1); 1868 } 1869 1870 struct rtentry * 1871 rtcache_init_noclone(struct route *ro) 1872 { 1873 1874 return _rtcache_init(ro, 0); 1875 } 1876 1877 struct rtentry * 1878 rtcache_update(struct route *ro, int clone) 1879 { 1880 1881 ro->_ro_rt = NULL; 1882 return _rtcache_init(ro, clone); 1883 } 1884 1885 void 1886 rtcache_copy(struct route *new_ro, struct route *old_ro) 1887 { 1888 struct rtentry *rt; 1889 int ret; 1890 1891 KASSERT(new_ro != old_ro); 1892 rtcache_invariants(new_ro); 1893 rtcache_invariants(old_ro); 1894 1895 rt = rtcache_validate(old_ro); 1896 1897 if (rtcache_getdst(old_ro) == NULL) 1898 goto out; 1899 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 1900 if (ret != 0) 1901 goto out; 1902 1903 RT_RLOCK(); 1904 new_ro->_ro_rt = rt; 1905 new_ro->ro_rtcache_generation = rtcache_generation; 1906 RT_UNLOCK(); 1907 rtcache_invariants(new_ro); 1908 out: 1909 rtcache_unref(rt, old_ro); 1910 return; 1911 } 1912 1913 #if defined(RT_DEBUG) && defined(NET_MPSAFE) 1914 static void 1915 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 1916 { 1917 char dst[64]; 1918 1919 sockaddr_format(ro->ro_sa, dst, 64); 1920 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 1921 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 1922 } 1923 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 1924 #else 1925 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 1926 #endif 1927 1928 static void 1929 rtcache_ref(struct rtentry *rt, struct route *ro) 1930 { 1931 1932 KASSERT(rt != NULL); 1933 1934 #ifdef NET_MPSAFE 1935 RTCACHE_PSREF_TRACE(rt, ro); 1936 ro->ro_bound = curlwp_bind(); 1937 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1938 #endif 1939 } 1940 1941 void 1942 rtcache_unref(struct rtentry *rt, struct route *ro) 1943 { 1944 1945 if (rt == NULL) 1946 return; 1947 1948 #ifdef NET_MPSAFE 1949 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 1950 curlwp_bindx(ro->ro_bound); 1951 RTCACHE_PSREF_TRACE(rt, ro); 1952 #endif 1953 } 1954 1955 struct rtentry * 1956 rtcache_validate(struct route *ro) 1957 { 1958 struct rtentry *rt = NULL; 1959 1960 #ifdef NET_MPSAFE 1961 retry: 1962 #endif 1963 rtcache_invariants(ro); 1964 RT_RLOCK(); 1965 if (ro->ro_rtcache_generation != rtcache_generation) { 1966 /* The cache is invalidated */ 1967 rt = NULL; 1968 goto out; 1969 } 1970 1971 rt = ro->_ro_rt; 1972 if (rt == NULL) 1973 goto out; 1974 1975 if ((rt->rt_flags & RTF_UP) == 0) { 1976 rt = NULL; 1977 goto out; 1978 } 1979 #ifdef NET_MPSAFE 1980 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 1981 if (rt_wait_ok()) { 1982 RT_UNLOCK(); 1983 1984 /* We can wait until the update is complete */ 1985 rt_update_wait(); 1986 goto retry; 1987 } else { 1988 rt = NULL; 1989 } 1990 } else 1991 #endif 1992 rtcache_ref(rt, ro); 1993 out: 1994 RT_UNLOCK(); 1995 return rt; 1996 } 1997 1998 struct rtentry * 1999 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2000 int clone, int *hitp) 2001 { 2002 const struct sockaddr *odst; 2003 struct rtentry *rt = NULL; 2004 2005 odst = rtcache_getdst(ro); 2006 if (odst == NULL) 2007 goto miss; 2008 2009 if (sockaddr_cmp(odst, dst) != 0) { 2010 rtcache_free(ro); 2011 goto miss; 2012 } 2013 2014 rt = rtcache_validate(ro); 2015 if (rt == NULL) { 2016 ro->_ro_rt = NULL; 2017 goto miss; 2018 } 2019 2020 rtcache_invariants(ro); 2021 2022 if (hitp != NULL) 2023 *hitp = 1; 2024 return rt; 2025 miss: 2026 if (hitp != NULL) 2027 *hitp = 0; 2028 if (rtcache_setdst(ro, dst) == 0) 2029 rt = _rtcache_init(ro, clone); 2030 2031 rtcache_invariants(ro); 2032 2033 return rt; 2034 } 2035 2036 void 2037 rtcache_free(struct route *ro) 2038 { 2039 2040 ro->_ro_rt = NULL; 2041 if (ro->ro_sa != NULL) { 2042 sockaddr_free(ro->ro_sa); 2043 ro->ro_sa = NULL; 2044 } 2045 rtcache_invariants(ro); 2046 } 2047 2048 int 2049 rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2050 { 2051 KASSERT(sa != NULL); 2052 2053 rtcache_invariants(ro); 2054 if (ro->ro_sa != NULL) { 2055 if (ro->ro_sa->sa_family == sa->sa_family) { 2056 ro->_ro_rt = NULL; 2057 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2058 rtcache_invariants(ro); 2059 return 0; 2060 } 2061 /* free ro_sa, wrong family */ 2062 rtcache_free(ro); 2063 } 2064 2065 KASSERT(ro->_ro_rt == NULL); 2066 2067 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2068 rtcache_invariants(ro); 2069 return ENOMEM; 2070 } 2071 rtcache_invariants(ro); 2072 return 0; 2073 } 2074 2075 const struct sockaddr * 2076 rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2077 { 2078 if (rt->rt_tag != tag) { 2079 if (rt->rt_tag != NULL) 2080 sockaddr_free(rt->rt_tag); 2081 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2082 } 2083 return rt->rt_tag; 2084 } 2085 2086 struct sockaddr * 2087 rt_gettag(const struct rtentry *rt) 2088 { 2089 return rt->rt_tag; 2090 } 2091 2092 int 2093 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2094 { 2095 2096 if ((rt->rt_flags & RTF_REJECT) != 0) { 2097 /* Mimic looutput */ 2098 if (ifp->if_flags & IFF_LOOPBACK) 2099 return (rt->rt_flags & RTF_HOST) ? 2100 EHOSTUNREACH : ENETUNREACH; 2101 else if (rt->rt_rmx.rmx_expire == 0 || 2102 time_uptime < rt->rt_rmx.rmx_expire) 2103 return (rt->rt_flags & RTF_GATEWAY) ? 2104 EHOSTUNREACH : EHOSTDOWN; 2105 } 2106 2107 return 0; 2108 } 2109 2110 void 2111 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2112 void *v) 2113 { 2114 2115 for (;;) { 2116 int s; 2117 int error; 2118 struct rtentry *rt, *retrt = NULL; 2119 2120 RT_RLOCK(); 2121 s = splsoftnet(); 2122 rt = rtbl_search_matched_entry(family, f, v); 2123 if (rt == NULL) { 2124 splx(s); 2125 RT_UNLOCK(); 2126 return; 2127 } 2128 rt->rt_refcnt++; 2129 splx(s); 2130 RT_UNLOCK(); 2131 2132 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2133 rt_mask(rt), rt->rt_flags, &retrt); 2134 if (error == 0) { 2135 KASSERT(retrt == rt); 2136 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2137 retrt->rt_ifp = NULL; 2138 rt_unref(rt); 2139 rt_free(retrt); 2140 } else if (error == ESRCH) { 2141 /* Someone deleted the entry already. */ 2142 rt_unref(rt); 2143 } else { 2144 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2145 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2146 /* XXX how to treat this case? */ 2147 } 2148 } 2149 } 2150 2151 int 2152 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2153 { 2154 int error; 2155 2156 RT_RLOCK(); 2157 error = rtbl_walktree(family, f, v); 2158 RT_UNLOCK(); 2159 2160 return error; 2161 } 2162 2163 #ifdef DDB 2164 2165 #include <machine/db_machdep.h> 2166 #include <ddb/db_interface.h> 2167 #include <ddb/db_output.h> 2168 2169 #define rt_expire rt_rmx.rmx_expire 2170 2171 static void 2172 db_print_sa(const struct sockaddr *sa) 2173 { 2174 int len; 2175 const u_char *p; 2176 2177 if (sa == NULL) { 2178 db_printf("[NULL]"); 2179 return; 2180 } 2181 2182 p = (const u_char *)sa; 2183 len = sa->sa_len; 2184 db_printf("["); 2185 while (len > 0) { 2186 db_printf("%d", *p); 2187 p++; len--; 2188 if (len) db_printf(","); 2189 } 2190 db_printf("]\n"); 2191 } 2192 2193 static void 2194 db_print_ifa(struct ifaddr *ifa) 2195 { 2196 if (ifa == NULL) 2197 return; 2198 db_printf(" ifa_addr="); 2199 db_print_sa(ifa->ifa_addr); 2200 db_printf(" ifa_dsta="); 2201 db_print_sa(ifa->ifa_dstaddr); 2202 db_printf(" ifa_mask="); 2203 db_print_sa(ifa->ifa_netmask); 2204 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2205 ifa->ifa_flags, 2206 ifa->ifa_refcnt, 2207 ifa->ifa_metric); 2208 } 2209 2210 /* 2211 * Function to pass to rt_walktree(). 2212 * Return non-zero error to abort walk. 2213 */ 2214 static int 2215 db_show_rtentry(struct rtentry *rt, void *w) 2216 { 2217 db_printf("rtentry=%p", rt); 2218 2219 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2220 rt->rt_flags, rt->rt_refcnt, 2221 rt->rt_use, (uint64_t)rt->rt_expire); 2222 2223 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2224 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2225 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2226 2227 db_printf(" ifp=%p ", rt->rt_ifp); 2228 if (rt->rt_ifp) 2229 db_printf("(%s)", rt->rt_ifp->if_xname); 2230 else 2231 db_printf("(NULL)"); 2232 2233 db_printf(" ifa=%p\n", rt->rt_ifa); 2234 db_print_ifa(rt->rt_ifa); 2235 2236 db_printf(" gwroute=%p llinfo=%p\n", 2237 rt->rt_gwroute, rt->rt_llinfo); 2238 2239 return 0; 2240 } 2241 2242 /* 2243 * Function to print all the route trees. 2244 * Use this from ddb: "show routes" 2245 */ 2246 void 2247 db_show_routes(db_expr_t addr, bool have_addr, 2248 db_expr_t count, const char *modif) 2249 { 2250 rt_walktree(AF_INET, db_show_rtentry, NULL); 2251 } 2252 #endif 2253