1 /* $OpenBSD: route.c,v 1.412 2022/06/28 10:01:13 bluhm Exp $ */ 2 /* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1980, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)route.c 8.2 (Berkeley) 11/15/93 62 */ 63 64 /* 65 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 66 * 67 * NRL grants permission for redistribution and use in source and binary 68 * forms, with or without modification, of the software and documentation 69 * created at NRL provided that the following conditions are met: 70 * 71 * 1. Redistributions of source code must retain the above copyright 72 * notice, this list of conditions and the following disclaimer. 73 * 2. Redistributions in binary form must reproduce the above copyright 74 * notice, this list of conditions and the following disclaimer in the 75 * documentation and/or other materials provided with the distribution. 76 * 3. All advertising materials mentioning features or use of this software 77 * must display the following acknowledgements: 78 * This product includes software developed by the University of 79 * California, Berkeley and its contributors. 80 * This product includes software developed at the Information 81 * Technology Division, US Naval Research Laboratory. 82 * 4. Neither the name of the NRL nor the names of its contributors 83 * may be used to endorse or promote products derived from this software 84 * without specific prior written permission. 85 * 86 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 87 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 88 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 89 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 90 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 91 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 92 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 93 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 94 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 95 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 96 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 97 * 98 * The views and conclusions contained in the software and documentation 99 * are those of the authors and should not be interpreted as representing 100 * official policies, either expressed or implied, of the US Naval 101 * Research Laboratory (NRL). 102 */ 103 104 #include <sys/param.h> 105 #include <sys/systm.h> 106 #include <sys/mbuf.h> 107 #include <sys/socket.h> 108 #include <sys/socketvar.h> 109 #include <sys/timeout.h> 110 #include <sys/domain.h> 111 #include <sys/ioctl.h> 112 #include <sys/kernel.h> 113 #include <sys/queue.h> 114 #include <sys/pool.h> 115 #include <sys/atomic.h> 116 117 #include <net/if.h> 118 #include <net/if_var.h> 119 #include <net/if_dl.h> 120 #include <net/route.h> 121 122 #include <netinet/in.h> 123 #include <netinet/ip_var.h> 124 #include <netinet/in_var.h> 125 126 #ifdef INET6 127 #include <netinet/ip6.h> 128 #include <netinet6/ip6_var.h> 129 #include <netinet6/in6_var.h> 130 #endif 131 132 #ifdef MPLS 133 #include <netmpls/mpls.h> 134 #endif 135 136 #ifdef BFD 137 #include <net/bfd.h> 138 #endif 139 140 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 141 142 /* Give some jitter to hash, to avoid synchronization between routers. */ 143 static uint32_t rt_hashjitter; 144 145 extern unsigned int rtmap_limit; 146 147 struct cpumem * rtcounters; 148 int rttrash; /* routes not in table but not freed */ 149 int ifatrash; /* ifas not in ifp list but not free */ 150 151 struct pool rtentry_pool; /* pool for rtentry structures */ 152 struct pool rttimer_pool; /* pool for rttimer structures */ 153 154 int rt_setgwroute(struct rtentry *, u_int); 155 void rt_putgwroute(struct rtentry *); 156 int rtflushclone1(struct rtentry *, void *, u_int); 157 int rtflushclone(struct rtentry *, unsigned int); 158 int rt_ifa_purge_walker(struct rtentry *, void *, unsigned int); 159 struct rtentry *rt_match(struct sockaddr *, uint32_t *, int, unsigned int); 160 int rt_clone(struct rtentry **, struct sockaddr *, unsigned int); 161 struct sockaddr *rt_plentosa(sa_family_t, int, struct sockaddr_in6 *); 162 static int rt_copysa(struct sockaddr *, struct sockaddr *, struct sockaddr **); 163 164 #ifdef DDB 165 void db_print_sa(struct sockaddr *); 166 void db_print_ifa(struct ifaddr *); 167 int db_show_rtentry(struct rtentry *, void *, unsigned int); 168 #endif 169 170 #define LABELID_MAX 50000 171 172 struct rt_label { 173 TAILQ_ENTRY(rt_label) rtl_entry; 174 char rtl_name[RTLABEL_LEN]; 175 u_int16_t rtl_id; 176 int rtl_ref; 177 }; 178 179 TAILQ_HEAD(rt_labels, rt_label) rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels); 180 181 void 182 route_init(void) 183 { 184 rtcounters = counters_alloc(rts_ncounters); 185 186 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, IPL_MPFLOOR, 0, 187 "rtentry", NULL); 188 189 while (rt_hashjitter == 0) 190 rt_hashjitter = arc4random(); 191 192 #ifdef BFD 193 bfdinit(); 194 #endif 195 } 196 197 /* 198 * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise. 199 */ 200 int 201 rtisvalid(struct rtentry *rt) 202 { 203 if (rt == NULL) 204 return (0); 205 206 if (!ISSET(rt->rt_flags, RTF_UP)) 207 return (0); 208 209 if (ISSET(rt->rt_flags, RTF_GATEWAY)) { 210 KASSERT(rt->rt_gwroute != NULL); 211 KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY)); 212 if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP)) 213 return (0); 214 } 215 216 return (1); 217 } 218 219 /* 220 * Do the actual lookup for rtalloc(9), do not use directly! 221 * 222 * Return the best matching entry for the destination ``dst''. 223 * 224 * "RT_RESOLVE" means that a corresponding L2 entry should 225 * be added to the routing table and resolved (via ARP or 226 * NDP), if it does not exist. 227 */ 228 struct rtentry * 229 rt_match(struct sockaddr *dst, uint32_t *src, int flags, unsigned int tableid) 230 { 231 struct rtentry *rt = NULL; 232 233 rt = rtable_match(tableid, dst, src); 234 if (rt == NULL) { 235 rtstat_inc(rts_unreach); 236 return (NULL); 237 } 238 239 if (ISSET(rt->rt_flags, RTF_CLONING) && ISSET(flags, RT_RESOLVE)) 240 rt_clone(&rt, dst, tableid); 241 242 rt->rt_use++; 243 return (rt); 244 } 245 246 int 247 rt_clone(struct rtentry **rtp, struct sockaddr *dst, unsigned int rtableid) 248 { 249 struct rt_addrinfo info; 250 struct rtentry *rt = *rtp; 251 int error = 0; 252 253 memset(&info, 0, sizeof(info)); 254 info.rti_info[RTAX_DST] = dst; 255 256 /* 257 * The priority of cloned route should be different 258 * to avoid conflict with /32 cloning routes. 259 * 260 * It should also be higher to let the ARP layer find 261 * cloned routes instead of the cloning one. 262 */ 263 KERNEL_LOCK(); 264 error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt, 265 rtableid); 266 KERNEL_UNLOCK(); 267 if (error) { 268 rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid); 269 } else { 270 /* Inform listeners of the new route */ 271 rtm_send(rt, RTM_ADD, 0, rtableid); 272 rtfree(*rtp); 273 *rtp = rt; 274 } 275 return (error); 276 } 277 278 /* 279 * Originated from bridge_hash() in if_bridge.c 280 */ 281 #define mix(a, b, c) do { \ 282 a -= b; a -= c; a ^= (c >> 13); \ 283 b -= c; b -= a; b ^= (a << 8); \ 284 c -= a; c -= b; c ^= (b >> 13); \ 285 a -= b; a -= c; a ^= (c >> 12); \ 286 b -= c; b -= a; b ^= (a << 16); \ 287 c -= a; c -= b; c ^= (b >> 5); \ 288 a -= b; a -= c; a ^= (c >> 3); \ 289 b -= c; b -= a; b ^= (a << 10); \ 290 c -= a; c -= b; c ^= (b >> 15); \ 291 } while (0) 292 293 int 294 rt_hash(struct rtentry *rt, struct sockaddr *dst, uint32_t *src) 295 { 296 uint32_t a, b, c; 297 298 if (src == NULL || !rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPATH)) 299 return (-1); 300 301 a = b = 0x9e3779b9; 302 c = rt_hashjitter; 303 304 switch (dst->sa_family) { 305 case AF_INET: 306 { 307 struct sockaddr_in *sin; 308 309 if (!ipmultipath) 310 return (-1); 311 312 sin = satosin(dst); 313 a += sin->sin_addr.s_addr; 314 b += src[0]; 315 mix(a, b, c); 316 break; 317 } 318 #ifdef INET6 319 case AF_INET6: 320 { 321 struct sockaddr_in6 *sin6; 322 323 if (!ip6_multipath) 324 return (-1); 325 326 sin6 = satosin6(dst); 327 a += sin6->sin6_addr.s6_addr32[0]; 328 b += sin6->sin6_addr.s6_addr32[2]; 329 c += src[0]; 330 mix(a, b, c); 331 a += sin6->sin6_addr.s6_addr32[1]; 332 b += sin6->sin6_addr.s6_addr32[3]; 333 c += src[1]; 334 mix(a, b, c); 335 a += sin6->sin6_addr.s6_addr32[2]; 336 b += sin6->sin6_addr.s6_addr32[1]; 337 c += src[2]; 338 mix(a, b, c); 339 a += sin6->sin6_addr.s6_addr32[3]; 340 b += sin6->sin6_addr.s6_addr32[0]; 341 c += src[3]; 342 mix(a, b, c); 343 break; 344 } 345 #endif /* INET6 */ 346 } 347 348 return (c & 0xffff); 349 } 350 351 /* 352 * Allocate a route, potentially using multipath to select the peer. 353 */ 354 struct rtentry * 355 rtalloc_mpath(struct sockaddr *dst, uint32_t *src, unsigned int rtableid) 356 { 357 return (rt_match(dst, src, RT_RESOLVE, rtableid)); 358 } 359 360 /* 361 * Look in the routing table for the best matching entry for 362 * ``dst''. 363 * 364 * If a route with a gateway is found and its next hop is no 365 * longer valid, try to cache it. 366 */ 367 struct rtentry * 368 rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid) 369 { 370 return (rt_match(dst, NULL, flags, rtableid)); 371 } 372 373 /* 374 * Cache the route entry corresponding to a reachable next hop in 375 * the gateway entry ``rt''. 376 */ 377 int 378 rt_setgwroute(struct rtentry *rt, u_int rtableid) 379 { 380 struct rtentry *prt, *nhrt; 381 unsigned int rdomain = rtable_l2(rtableid); 382 int error; 383 384 NET_ASSERT_LOCKED(); 385 386 KASSERT(ISSET(rt->rt_flags, RTF_GATEWAY)); 387 388 /* If we cannot find a valid next hop bail. */ 389 nhrt = rt_match(rt->rt_gateway, NULL, RT_RESOLVE, rdomain); 390 if (nhrt == NULL) 391 return (ENOENT); 392 393 /* Next hop entry must be on the same interface. */ 394 if (nhrt->rt_ifidx != rt->rt_ifidx) { 395 struct sockaddr_in6 sa_mask; 396 397 if (!ISSET(nhrt->rt_flags, RTF_LLINFO) || 398 !ISSET(nhrt->rt_flags, RTF_CLONED)) { 399 rtfree(nhrt); 400 return (EHOSTUNREACH); 401 } 402 403 /* 404 * We found a L2 entry, so we might have multiple 405 * RTF_CLONING routes for the same subnet. Query 406 * the first route of the multipath chain and iterate 407 * until we find the correct one. 408 */ 409 prt = rtable_lookup(rdomain, rt_key(nhrt->rt_parent), 410 rt_plen2mask(nhrt->rt_parent, &sa_mask), NULL, RTP_ANY); 411 rtfree(nhrt); 412 413 while (prt != NULL && prt->rt_ifidx != rt->rt_ifidx) 414 prt = rtable_iterate(prt); 415 416 /* We found nothing or a non-cloning MPATH route. */ 417 if (prt == NULL || !ISSET(prt->rt_flags, RTF_CLONING)) { 418 rtfree(prt); 419 return (EHOSTUNREACH); 420 } 421 422 error = rt_clone(&prt, rt->rt_gateway, rdomain); 423 if (error) { 424 rtfree(prt); 425 return (error); 426 } 427 nhrt = prt; 428 } 429 430 /* 431 * Next hop must be reachable, this also prevents rtentry 432 * loops for example when rt->rt_gwroute points to rt. 433 */ 434 if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) { 435 rtfree(nhrt); 436 return (ENETUNREACH); 437 } 438 439 /* Next hop is valid so remove possible old cache. */ 440 rt_putgwroute(rt); 441 KASSERT(rt->rt_gwroute == NULL); 442 443 /* 444 * If the MTU of next hop is 0, this will reset the MTU of the 445 * route to run PMTUD again from scratch. 446 */ 447 if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu)) 448 rt->rt_mtu = nhrt->rt_mtu; 449 450 /* 451 * To avoid reference counting problems when writing link-layer 452 * addresses in an outgoing packet, we ensure that the lifetime 453 * of a cached entry is greater than the bigger lifetime of the 454 * gateway entries it is pointed by. 455 */ 456 nhrt->rt_flags |= RTF_CACHED; 457 nhrt->rt_cachecnt++; 458 459 rt->rt_gwroute = nhrt; 460 461 return (0); 462 } 463 464 /* 465 * Invalidate the cached route entry of the gateway entry ``rt''. 466 */ 467 void 468 rt_putgwroute(struct rtentry *rt) 469 { 470 struct rtentry *nhrt = rt->rt_gwroute; 471 472 NET_ASSERT_LOCKED(); 473 474 if (!ISSET(rt->rt_flags, RTF_GATEWAY) || nhrt == NULL) 475 return; 476 477 KASSERT(ISSET(nhrt->rt_flags, RTF_CACHED)); 478 KASSERT(nhrt->rt_cachecnt > 0); 479 480 --nhrt->rt_cachecnt; 481 if (nhrt->rt_cachecnt == 0) 482 nhrt->rt_flags &= ~RTF_CACHED; 483 484 rtfree(rt->rt_gwroute); 485 rt->rt_gwroute = NULL; 486 } 487 488 void 489 rtref(struct rtentry *rt) 490 { 491 refcnt_take(&rt->rt_refcnt); 492 } 493 494 void 495 rtfree(struct rtentry *rt) 496 { 497 if (rt == NULL) 498 return; 499 500 if (refcnt_rele(&rt->rt_refcnt) == 0) 501 return; 502 503 KASSERT(!ISSET(rt->rt_flags, RTF_UP)); 504 KASSERT(!RT_ROOT(rt)); 505 atomic_dec_int(&rttrash); 506 507 KERNEL_LOCK(); 508 rt_timer_remove_all(rt); 509 ifafree(rt->rt_ifa); 510 rtlabel_unref(rt->rt_labelid); 511 #ifdef MPLS 512 rt_mpls_clear(rt); 513 #endif 514 free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len)); 515 free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len); 516 KERNEL_UNLOCK(); 517 518 pool_put(&rtentry_pool, rt); 519 } 520 521 void 522 ifafree(struct ifaddr *ifa) 523 { 524 if (ifa == NULL) 525 panic("ifafree"); 526 if (ifa->ifa_refcnt == 0) { 527 ifatrash--; 528 free(ifa, M_IFADDR, 0); 529 } else 530 ifa->ifa_refcnt--; 531 } 532 533 /* 534 * Force a routing table entry to the specified 535 * destination to go through the given gateway. 536 * Normally called as a result of a routing redirect 537 * message from the network layer. 538 */ 539 void 540 rtredirect(struct sockaddr *dst, struct sockaddr *gateway, 541 struct sockaddr *src, struct rtentry **rtp, unsigned int rdomain) 542 { 543 struct rtentry *rt; 544 int error = 0; 545 enum rtstat_counters stat = rts_ncounters; 546 struct rt_addrinfo info; 547 struct ifaddr *ifa; 548 unsigned int ifidx = 0; 549 int flags = RTF_GATEWAY|RTF_HOST; 550 uint8_t prio = RTP_NONE; 551 552 NET_ASSERT_LOCKED(); 553 554 /* verify the gateway is directly reachable */ 555 rt = rtalloc(gateway, 0, rdomain); 556 if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY)) { 557 rtfree(rt); 558 error = ENETUNREACH; 559 goto out; 560 } 561 ifidx = rt->rt_ifidx; 562 ifa = rt->rt_ifa; 563 rtfree(rt); 564 rt = NULL; 565 566 rt = rtable_lookup(rdomain, dst, NULL, NULL, RTP_ANY); 567 /* 568 * If the redirect isn't from our current router for this dst, 569 * it's either old or wrong. If it redirects us to ourselves, 570 * we have a routing loop, perhaps as a result of an interface 571 * going down recently. 572 */ 573 #define equal(a1, a2) \ 574 ((a1)->sa_len == (a2)->sa_len && \ 575 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) 576 if (rt != NULL && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) 577 error = EINVAL; 578 else if (ifa_ifwithaddr(gateway, rdomain) != NULL || 579 (gateway->sa_family == AF_INET && 580 in_broadcast(satosin(gateway)->sin_addr, rdomain))) 581 error = EHOSTUNREACH; 582 if (error) 583 goto done; 584 /* 585 * Create a new entry if we just got back a wildcard entry 586 * or the lookup failed. This is necessary for hosts 587 * which use routing redirects generated by smart gateways 588 * to dynamically build the routing tables. 589 */ 590 if (rt == NULL) 591 goto create; 592 /* 593 * Don't listen to the redirect if it's 594 * for a route to an interface. 595 */ 596 if (ISSET(rt->rt_flags, RTF_GATEWAY)) { 597 if (!ISSET(rt->rt_flags, RTF_HOST)) { 598 /* 599 * Changing from route to net => route to host. 600 * Create new route, rather than smashing route to net. 601 */ 602 create: 603 rtfree(rt); 604 flags |= RTF_DYNAMIC; 605 bzero(&info, sizeof(info)); 606 info.rti_info[RTAX_DST] = dst; 607 info.rti_info[RTAX_GATEWAY] = gateway; 608 info.rti_ifa = ifa; 609 info.rti_flags = flags; 610 rt = NULL; 611 error = rtrequest(RTM_ADD, &info, RTP_DEFAULT, &rt, 612 rdomain); 613 if (error == 0) { 614 flags = rt->rt_flags; 615 prio = rt->rt_priority; 616 } 617 stat = rts_dynamic; 618 } else { 619 /* 620 * Smash the current notion of the gateway to 621 * this destination. Should check about netmask!!! 622 */ 623 rt->rt_flags |= RTF_MODIFIED; 624 flags |= RTF_MODIFIED; 625 prio = rt->rt_priority; 626 stat = rts_newgateway; 627 rt_setgate(rt, gateway, rdomain); 628 } 629 } else 630 error = EHOSTUNREACH; 631 done: 632 if (rt) { 633 if (rtp && !error) 634 *rtp = rt; 635 else 636 rtfree(rt); 637 } 638 out: 639 if (error) 640 rtstat_inc(rts_badredirect); 641 else if (stat != rts_ncounters) 642 rtstat_inc(stat); 643 bzero((caddr_t)&info, sizeof(info)); 644 info.rti_info[RTAX_DST] = dst; 645 info.rti_info[RTAX_GATEWAY] = gateway; 646 info.rti_info[RTAX_AUTHOR] = src; 647 rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain); 648 } 649 650 /* 651 * Delete a route and generate a message 652 */ 653 int 654 rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, u_int tableid) 655 { 656 int error; 657 struct rt_addrinfo info; 658 struct sockaddr_rtlabel sa_rl; 659 struct sockaddr_in6 sa_mask; 660 661 KASSERT(rt->rt_ifidx == ifp->if_index); 662 663 /* 664 * Request the new route so that the entry is not actually 665 * deleted. That will allow the information being reported to 666 * be accurate (and consistent with route_output()). 667 */ 668 memset(&info, 0, sizeof(info)); 669 info.rti_info[RTAX_DST] = rt_key(rt); 670 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 671 if (!ISSET(rt->rt_flags, RTF_HOST)) 672 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 673 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 674 info.rti_flags = rt->rt_flags; 675 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 676 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 677 error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid); 678 rtm_miss(RTM_DELETE, &info, info.rti_flags, rt->rt_priority, 679 rt->rt_ifidx, error, tableid); 680 if (error == 0) 681 rtfree(rt); 682 return (error); 683 } 684 685 static inline int 686 rtequal(struct rtentry *a, struct rtentry *b) 687 { 688 if (a == b) 689 return 1; 690 691 if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 && 692 rt_plen(a) == rt_plen(b)) 693 return 1; 694 else 695 return 0; 696 } 697 698 int 699 rtflushclone1(struct rtentry *rt, void *arg, u_int id) 700 { 701 struct rtentry *cloningrt = arg; 702 struct ifnet *ifp; 703 704 if (!ISSET(rt->rt_flags, RTF_CLONED)) 705 return 0; 706 707 /* Cached route must stay alive as long as their parent are alive. */ 708 if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt)) 709 return 0; 710 711 if (!rtequal(rt->rt_parent, cloningrt)) 712 return 0; 713 /* 714 * This happens when an interface with a RTF_CLONING route is 715 * being detached. In this case it's safe to bail because all 716 * the routes are being purged by rt_ifa_purge(). 717 */ 718 ifp = if_get(rt->rt_ifidx); 719 if (ifp == NULL) 720 return 0; 721 722 if_put(ifp); 723 return EEXIST; 724 } 725 726 int 727 rtflushclone(struct rtentry *parent, unsigned int rtableid) 728 { 729 struct rtentry *rt = NULL; 730 struct ifnet *ifp; 731 int error; 732 733 #ifdef DIAGNOSTIC 734 if (!parent || (parent->rt_flags & RTF_CLONING) == 0) 735 panic("rtflushclone: called with a non-cloning route"); 736 #endif 737 738 do { 739 error = rtable_walk(rtableid, rt_key(parent)->sa_family, &rt, 740 rtflushclone1, parent); 741 if (rt != NULL && error == EEXIST) { 742 ifp = if_get(rt->rt_ifidx); 743 if (ifp == NULL) { 744 error = EAGAIN; 745 } else { 746 error = rtdeletemsg(rt, ifp, rtableid); 747 if (error == 0) 748 error = EAGAIN; 749 if_put(ifp); 750 } 751 } 752 rtfree(rt); 753 rt = NULL; 754 } while (error == EAGAIN); 755 756 return error; 757 758 } 759 760 int 761 rtrequest_delete(struct rt_addrinfo *info, u_int8_t prio, struct ifnet *ifp, 762 struct rtentry **ret_nrt, u_int tableid) 763 { 764 struct rtentry *rt; 765 int error; 766 767 NET_ASSERT_LOCKED(); 768 769 if (!rtable_exists(tableid)) 770 return (EAFNOSUPPORT); 771 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 772 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio); 773 if (rt == NULL) 774 return (ESRCH); 775 776 /* Make sure that's the route the caller want to delete. */ 777 if (ifp != NULL && ifp->if_index != rt->rt_ifidx) { 778 rtfree(rt); 779 return (ESRCH); 780 } 781 782 #ifdef BFD 783 if (ISSET(rt->rt_flags, RTF_BFD)) 784 bfdclear(rt); 785 #endif 786 787 error = rtable_delete(tableid, info->rti_info[RTAX_DST], 788 info->rti_info[RTAX_NETMASK], rt); 789 if (error != 0) { 790 rtfree(rt); 791 return (ESRCH); 792 } 793 794 /* Release next hop cache before flushing cloned entries. */ 795 rt_putgwroute(rt); 796 797 /* Clean up any cloned children. */ 798 if (ISSET(rt->rt_flags, RTF_CLONING)) 799 rtflushclone(rt, tableid); 800 801 rtfree(rt->rt_parent); 802 rt->rt_parent = NULL; 803 804 rt->rt_flags &= ~RTF_UP; 805 806 KASSERT(ifp->if_index == rt->rt_ifidx); 807 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 808 809 atomic_inc_int(&rttrash); 810 811 if (ret_nrt != NULL) 812 *ret_nrt = rt; 813 else 814 rtfree(rt); 815 816 return (0); 817 } 818 819 int 820 rtrequest(int req, struct rt_addrinfo *info, u_int8_t prio, 821 struct rtentry **ret_nrt, u_int tableid) 822 { 823 struct ifnet *ifp; 824 struct rtentry *rt, *crt; 825 struct ifaddr *ifa; 826 struct sockaddr *ndst; 827 struct sockaddr_rtlabel *sa_rl, sa_rl2; 828 struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; 829 int error; 830 831 NET_ASSERT_LOCKED(); 832 833 if (!rtable_exists(tableid)) 834 return (EAFNOSUPPORT); 835 if (info->rti_flags & RTF_HOST) 836 info->rti_info[RTAX_NETMASK] = NULL; 837 switch (req) { 838 case RTM_DELETE: 839 return (EINVAL); 840 841 case RTM_RESOLVE: 842 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) 843 return (EINVAL); 844 if ((rt->rt_flags & RTF_CLONING) == 0) 845 return (EINVAL); 846 KASSERT(rt->rt_ifa->ifa_ifp != NULL); 847 info->rti_ifa = rt->rt_ifa; 848 info->rti_flags = rt->rt_flags | (RTF_CLONED|RTF_HOST); 849 info->rti_flags &= ~(RTF_CLONING|RTF_CONNECTED|RTF_STATIC); 850 info->rti_info[RTAX_GATEWAY] = sdltosa(&sa_dl); 851 info->rti_info[RTAX_LABEL] = 852 rtlabel_id2sa(rt->rt_labelid, &sa_rl2); 853 /* FALLTHROUGH */ 854 855 case RTM_ADD: 856 if (info->rti_ifa == NULL) 857 return (EINVAL); 858 ifa = info->rti_ifa; 859 ifp = ifa->ifa_ifp; 860 if (prio == 0) 861 prio = ifp->if_priority + RTP_STATIC; 862 863 error = rt_copysa(info->rti_info[RTAX_DST], 864 info->rti_info[RTAX_NETMASK], &ndst); 865 if (error) 866 return (error); 867 868 rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO); 869 if (rt == NULL) { 870 free(ndst, M_RTABLE, ndst->sa_len); 871 return (ENOBUFS); 872 } 873 874 refcnt_init(&rt->rt_refcnt); 875 rt->rt_flags = info->rti_flags | RTF_UP; 876 rt->rt_priority = prio; /* init routing priority */ 877 LIST_INIT(&rt->rt_timer); 878 879 /* Check the link state if the table supports it. */ 880 if (rtable_mpath_capable(tableid, ndst->sa_family) && 881 !ISSET(rt->rt_flags, RTF_LOCAL) && 882 (!LINK_STATE_IS_UP(ifp->if_link_state) || 883 !ISSET(ifp->if_flags, IFF_UP))) { 884 rt->rt_flags &= ~RTF_UP; 885 rt->rt_priority |= RTP_DOWN; 886 } 887 888 if (info->rti_info[RTAX_LABEL] != NULL) { 889 sa_rl = (struct sockaddr_rtlabel *) 890 info->rti_info[RTAX_LABEL]; 891 rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label); 892 } 893 894 #ifdef MPLS 895 /* We have to allocate additional space for MPLS infos */ 896 if (info->rti_flags & RTF_MPLS && 897 (info->rti_info[RTAX_SRC] != NULL || 898 info->rti_info[RTAX_DST]->sa_family == AF_MPLS)) { 899 error = rt_mpls_set(rt, info->rti_info[RTAX_SRC], 900 info->rti_mpls); 901 if (error) { 902 free(ndst, M_RTABLE, ndst->sa_len); 903 pool_put(&rtentry_pool, rt); 904 return (error); 905 } 906 } else 907 rt_mpls_clear(rt); 908 #endif 909 910 ifa->ifa_refcnt++; 911 rt->rt_ifa = ifa; 912 rt->rt_ifidx = ifp->if_index; 913 /* 914 * Copy metrics and a back pointer from the cloned 915 * route's parent. 916 */ 917 if (ISSET(rt->rt_flags, RTF_CLONED)) { 918 rtref(*ret_nrt); 919 rt->rt_parent = *ret_nrt; 920 rt->rt_rmx = (*ret_nrt)->rt_rmx; 921 } 922 923 /* 924 * We must set rt->rt_gateway before adding ``rt'' to 925 * the routing table because the radix MPATH code use 926 * it to (re)order routes. 927 */ 928 if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY], 929 tableid))) { 930 ifafree(ifa); 931 rtfree(rt->rt_parent); 932 rt_putgwroute(rt); 933 free(rt->rt_gateway, M_RTABLE, 934 ROUNDUP(rt->rt_gateway->sa_len)); 935 free(ndst, M_RTABLE, ndst->sa_len); 936 pool_put(&rtentry_pool, rt); 937 return (error); 938 } 939 940 error = rtable_insert(tableid, ndst, 941 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 942 rt->rt_priority, rt); 943 if (error != 0 && 944 (crt = rtable_match(tableid, ndst, NULL)) != NULL) { 945 /* overwrite cloned route */ 946 if (ISSET(crt->rt_flags, RTF_CLONED) && 947 !ISSET(crt->rt_flags, RTF_CACHED)) { 948 struct ifnet *cifp; 949 950 cifp = if_get(crt->rt_ifidx); 951 KASSERT(cifp != NULL); 952 rtdeletemsg(crt, cifp, tableid); 953 if_put(cifp); 954 955 error = rtable_insert(tableid, ndst, 956 info->rti_info[RTAX_NETMASK], 957 info->rti_info[RTAX_GATEWAY], 958 rt->rt_priority, rt); 959 } 960 rtfree(crt); 961 } 962 if (error != 0) { 963 ifafree(ifa); 964 rtfree(rt->rt_parent); 965 rt_putgwroute(rt); 966 free(rt->rt_gateway, M_RTABLE, 967 ROUNDUP(rt->rt_gateway->sa_len)); 968 free(ndst, M_RTABLE, ndst->sa_len); 969 pool_put(&rtentry_pool, rt); 970 return (EEXIST); 971 } 972 ifp->if_rtrequest(ifp, req, rt); 973 974 if_group_routechange(info->rti_info[RTAX_DST], 975 info->rti_info[RTAX_NETMASK]); 976 977 if (ret_nrt != NULL) 978 *ret_nrt = rt; 979 else 980 rtfree(rt); 981 break; 982 } 983 984 return (0); 985 } 986 987 int 988 rt_setgate(struct rtentry *rt, struct sockaddr *gate, u_int rtableid) 989 { 990 int glen = ROUNDUP(gate->sa_len); 991 struct sockaddr *sa; 992 993 if (rt->rt_gateway == NULL || glen != ROUNDUP(rt->rt_gateway->sa_len)) { 994 sa = malloc(glen, M_RTABLE, M_NOWAIT); 995 if (sa == NULL) 996 return (ENOBUFS); 997 if (rt->rt_gateway != NULL) { 998 free(rt->rt_gateway, M_RTABLE, 999 ROUNDUP(rt->rt_gateway->sa_len)); 1000 } 1001 rt->rt_gateway = sa; 1002 } 1003 memmove(rt->rt_gateway, gate, glen); 1004 1005 if (ISSET(rt->rt_flags, RTF_GATEWAY)) 1006 return (rt_setgwroute(rt, rtableid)); 1007 1008 return (0); 1009 } 1010 1011 /* 1012 * Return the route entry containing the next hop link-layer 1013 * address corresponding to ``rt''. 1014 */ 1015 struct rtentry * 1016 rt_getll(struct rtentry *rt) 1017 { 1018 if (ISSET(rt->rt_flags, RTF_GATEWAY)) { 1019 KASSERT(rt->rt_gwroute != NULL); 1020 return (rt->rt_gwroute); 1021 } 1022 1023 return (rt); 1024 } 1025 1026 void 1027 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, 1028 struct sockaddr *netmask) 1029 { 1030 u_char *cp1 = (u_char *)src; 1031 u_char *cp2 = (u_char *)dst; 1032 u_char *cp3 = (u_char *)netmask; 1033 u_char *cplim = cp2 + *cp3; 1034 u_char *cplim2 = cp2 + *cp1; 1035 1036 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1037 cp3 += 2; 1038 if (cplim > cplim2) 1039 cplim = cplim2; 1040 while (cp2 < cplim) 1041 *cp2++ = *cp1++ & *cp3++; 1042 if (cp2 < cplim2) 1043 bzero(cp2, cplim2 - cp2); 1044 } 1045 1046 /* 1047 * allocate new sockaddr structure based on the user supplied src and mask 1048 * that is useable for the routing table. 1049 */ 1050 static int 1051 rt_copysa(struct sockaddr *src, struct sockaddr *mask, struct sockaddr **dst) 1052 { 1053 static const u_char maskarray[] = { 1054 0x0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe }; 1055 struct sockaddr *ndst; 1056 const struct domain *dp; 1057 u_char *csrc, *cdst; 1058 int i, plen; 1059 1060 for (i = 0; (dp = domains[i]) != NULL; i++) { 1061 if (dp->dom_rtoffset == 0) 1062 continue; 1063 if (src->sa_family == dp->dom_family) 1064 break; 1065 } 1066 if (dp == NULL) 1067 return (EAFNOSUPPORT); 1068 1069 if (src->sa_len < dp->dom_sasize) 1070 return (EINVAL); 1071 1072 plen = rtable_satoplen(src->sa_family, mask); 1073 if (plen == -1) 1074 return (EINVAL); 1075 1076 ndst = malloc(dp->dom_sasize, M_RTABLE, M_NOWAIT|M_ZERO); 1077 if (ndst == NULL) 1078 return (ENOBUFS); 1079 1080 ndst->sa_family = src->sa_family; 1081 ndst->sa_len = dp->dom_sasize; 1082 1083 csrc = (u_char *)src + dp->dom_rtoffset; 1084 cdst = (u_char *)ndst + dp->dom_rtoffset; 1085 1086 memcpy(cdst, csrc, plen / 8); 1087 if (plen % 8 != 0) 1088 cdst[plen / 8] = csrc[plen / 8] & maskarray[plen % 8]; 1089 1090 *dst = ndst; 1091 return (0); 1092 } 1093 1094 int 1095 rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst, 1096 unsigned int rdomain) 1097 { 1098 struct ifnet *ifp = ifa->ifa_ifp; 1099 struct rtentry *rt; 1100 struct sockaddr_rtlabel sa_rl; 1101 struct rt_addrinfo info; 1102 uint8_t prio = ifp->if_priority + RTP_STATIC; 1103 int error; 1104 1105 KASSERT(rdomain == rtable_l2(rdomain)); 1106 1107 memset(&info, 0, sizeof(info)); 1108 info.rti_ifa = ifa; 1109 info.rti_flags = flags; 1110 info.rti_info[RTAX_DST] = dst; 1111 if (flags & RTF_LLINFO) 1112 info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl); 1113 else 1114 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1115 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl); 1116 1117 #ifdef MPLS 1118 if ((flags & RTF_MPLS) == RTF_MPLS) 1119 info.rti_mpls = MPLS_OP_POP; 1120 #endif /* MPLS */ 1121 1122 if ((flags & RTF_HOST) == 0) 1123 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1124 1125 if (flags & (RTF_LOCAL|RTF_BROADCAST)) 1126 prio = RTP_LOCAL; 1127 1128 if (flags & RTF_CONNECTED) 1129 prio = ifp->if_priority + RTP_CONNECTED; 1130 1131 error = rtrequest(RTM_ADD, &info, prio, &rt, rdomain); 1132 if (error == 0) { 1133 /* 1134 * A local route is created for every address configured 1135 * on an interface, so use this information to notify 1136 * userland that a new address has been added. 1137 */ 1138 if (flags & RTF_LOCAL) 1139 rtm_addr(RTM_NEWADDR, ifa); 1140 rtm_send(rt, RTM_ADD, 0, rdomain); 1141 rtfree(rt); 1142 } 1143 return (error); 1144 } 1145 1146 int 1147 rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst, 1148 unsigned int rdomain) 1149 { 1150 struct ifnet *ifp = ifa->ifa_ifp; 1151 struct rtentry *rt; 1152 struct mbuf *m = NULL; 1153 struct sockaddr *deldst; 1154 struct rt_addrinfo info; 1155 struct sockaddr_rtlabel sa_rl; 1156 uint8_t prio = ifp->if_priority + RTP_STATIC; 1157 int error; 1158 1159 KASSERT(rdomain == rtable_l2(rdomain)); 1160 1161 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1162 m = m_get(M_DONTWAIT, MT_SONAME); 1163 if (m == NULL) 1164 return (ENOBUFS); 1165 deldst = mtod(m, struct sockaddr *); 1166 rt_maskedcopy(dst, deldst, ifa->ifa_netmask); 1167 dst = deldst; 1168 } 1169 1170 memset(&info, 0, sizeof(info)); 1171 info.rti_ifa = ifa; 1172 info.rti_flags = flags; 1173 info.rti_info[RTAX_DST] = dst; 1174 if ((flags & RTF_LLINFO) == 0) 1175 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1176 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl); 1177 1178 if ((flags & RTF_HOST) == 0) 1179 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1180 1181 if (flags & (RTF_LOCAL|RTF_BROADCAST)) 1182 prio = RTP_LOCAL; 1183 1184 if (flags & RTF_CONNECTED) 1185 prio = ifp->if_priority + RTP_CONNECTED; 1186 1187 rtable_clearsource(rdomain, ifa->ifa_addr); 1188 error = rtrequest_delete(&info, prio, ifp, &rt, rdomain); 1189 if (error == 0) { 1190 rtm_send(rt, RTM_DELETE, 0, rdomain); 1191 if (flags & RTF_LOCAL) 1192 rtm_addr(RTM_DELADDR, ifa); 1193 rtfree(rt); 1194 } 1195 m_free(m); 1196 1197 return (error); 1198 } 1199 1200 /* 1201 * Add ifa's address as a local rtentry. 1202 */ 1203 int 1204 rt_ifa_addlocal(struct ifaddr *ifa) 1205 { 1206 struct ifnet *ifp = ifa->ifa_ifp; 1207 struct rtentry *rt; 1208 u_int flags = RTF_HOST|RTF_LOCAL; 1209 int error = 0; 1210 1211 /* 1212 * If the configured address correspond to the magical "any" 1213 * address do not add a local route entry because that might 1214 * corrupt the routing tree which uses this value for the 1215 * default routes. 1216 */ 1217 switch (ifa->ifa_addr->sa_family) { 1218 case AF_INET: 1219 if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY) 1220 return (0); 1221 break; 1222 #ifdef INET6 1223 case AF_INET6: 1224 if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr, 1225 &in6addr_any)) 1226 return (0); 1227 break; 1228 #endif 1229 default: 1230 break; 1231 } 1232 1233 if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT))) 1234 flags |= RTF_LLINFO; 1235 1236 /* If there is no local entry, allocate one. */ 1237 rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain); 1238 if (rt == NULL || ISSET(rt->rt_flags, flags) != flags) { 1239 error = rt_ifa_add(ifa, flags | RTF_MPATH, ifa->ifa_addr, 1240 ifp->if_rdomain); 1241 } 1242 rtfree(rt); 1243 1244 return (error); 1245 } 1246 1247 /* 1248 * Remove local rtentry of ifa's address if it exists. 1249 */ 1250 int 1251 rt_ifa_dellocal(struct ifaddr *ifa) 1252 { 1253 struct ifnet *ifp = ifa->ifa_ifp; 1254 struct rtentry *rt; 1255 u_int flags = RTF_HOST|RTF_LOCAL; 1256 int error = 0; 1257 1258 /* 1259 * We do not add local routes for such address, so do not bother 1260 * removing them. 1261 */ 1262 switch (ifa->ifa_addr->sa_family) { 1263 case AF_INET: 1264 if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY) 1265 return (0); 1266 break; 1267 #ifdef INET6 1268 case AF_INET6: 1269 if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr, 1270 &in6addr_any)) 1271 return (0); 1272 break; 1273 #endif 1274 default: 1275 break; 1276 } 1277 1278 if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT))) 1279 flags |= RTF_LLINFO; 1280 1281 /* 1282 * Before deleting, check if a corresponding local host 1283 * route surely exists. With this check, we can avoid to 1284 * delete an interface direct route whose destination is same 1285 * as the address being removed. This can happen when removing 1286 * a subnet-router anycast address on an interface attached 1287 * to a shared medium. 1288 */ 1289 rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain); 1290 if (rt != NULL && ISSET(rt->rt_flags, flags) == flags) { 1291 error = rt_ifa_del(ifa, flags, ifa->ifa_addr, 1292 ifp->if_rdomain); 1293 } 1294 rtfree(rt); 1295 1296 return (error); 1297 } 1298 1299 /* 1300 * Remove all addresses attached to ``ifa''. 1301 */ 1302 void 1303 rt_ifa_purge(struct ifaddr *ifa) 1304 { 1305 struct ifnet *ifp = ifa->ifa_ifp; 1306 struct rtentry *rt = NULL; 1307 unsigned int rtableid; 1308 int error, af = ifa->ifa_addr->sa_family; 1309 1310 KASSERT(ifp != NULL); 1311 1312 for (rtableid = 0; rtableid < rtmap_limit; rtableid++) { 1313 /* skip rtables that are not in the rdomain of the ifp */ 1314 if (rtable_l2(rtableid) != ifp->if_rdomain) 1315 continue; 1316 1317 do { 1318 error = rtable_walk(rtableid, af, &rt, 1319 rt_ifa_purge_walker, ifa); 1320 if (rt != NULL && error == EEXIST) { 1321 error = rtdeletemsg(rt, ifp, rtableid); 1322 if (error == 0) 1323 error = EAGAIN; 1324 } 1325 rtfree(rt); 1326 rt = NULL; 1327 } while (error == EAGAIN); 1328 1329 if (error == EAFNOSUPPORT) 1330 error = 0; 1331 1332 if (error) 1333 break; 1334 } 1335 } 1336 1337 int 1338 rt_ifa_purge_walker(struct rtentry *rt, void *vifa, unsigned int rtableid) 1339 { 1340 struct ifaddr *ifa = vifa; 1341 1342 if (rt->rt_ifa == ifa) 1343 return EEXIST; 1344 1345 return 0; 1346 } 1347 1348 /* 1349 * Route timer routines. These routes allow functions to be called 1350 * for various routes at any time. This is useful in supporting 1351 * path MTU discovery and redirect route deletion. 1352 * 1353 * This is similar to some BSDI internal functions, but it provides 1354 * for multiple queues for efficiency's sake... 1355 */ 1356 1357 struct mutex rttimer_mtx; 1358 1359 struct rttimer { 1360 TAILQ_ENTRY(rttimer) rtt_next; /* [T] entry on timer queue */ 1361 LIST_ENTRY(rttimer) rtt_link; /* [T] timers per rtentry */ 1362 struct timeout rtt_timeout; /* [I] timeout for this entry */ 1363 struct rttimer_queue *rtt_queue; /* [I] back pointer to queue */ 1364 struct rtentry *rtt_rt; /* [T] back pointer to route */ 1365 time_t rtt_expire; /* [I] rt expire time */ 1366 u_int rtt_tableid; /* [I] rtable id of rtt_rt */ 1367 }; 1368 1369 #define RTTIMER_CALLOUT(r) { \ 1370 if (r->rtt_queue->rtq_func != NULL) { \ 1371 (*r->rtt_queue->rtq_func)(r->rtt_rt, r->rtt_tableid); \ 1372 } else { \ 1373 struct ifnet *ifp; \ 1374 \ 1375 ifp = if_get(r->rtt_rt->rt_ifidx); \ 1376 if (ifp != NULL && \ 1377 (r->rtt_rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == \ 1378 (RTF_DYNAMIC|RTF_HOST)) \ 1379 rtdeletemsg(r->rtt_rt, ifp, r->rtt_tableid); \ 1380 if_put(ifp); \ 1381 } \ 1382 } 1383 1384 /* 1385 * Some subtle order problems with domain initialization mean that 1386 * we cannot count on this being run from rt_init before various 1387 * protocol initializations are done. Therefore, we make sure 1388 * that this is run when the first queue is added... 1389 */ 1390 1391 void 1392 rt_timer_init(void) 1393 { 1394 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 1395 IPL_MPFLOOR, 0, "rttmr", NULL); 1396 mtx_init(&rttimer_mtx, IPL_MPFLOOR); 1397 } 1398 1399 void 1400 rt_timer_queue_init(struct rttimer_queue *rtq, int timeout, 1401 void (*func)(struct rtentry *, u_int)) 1402 { 1403 rtq->rtq_timeout = timeout; 1404 rtq->rtq_count = 0; 1405 rtq->rtq_func = func; 1406 TAILQ_INIT(&rtq->rtq_head); 1407 } 1408 1409 void 1410 rt_timer_queue_change(struct rttimer_queue *rtq, int timeout) 1411 { 1412 mtx_enter(&rttimer_mtx); 1413 rtq->rtq_timeout = timeout; 1414 mtx_leave(&rttimer_mtx); 1415 } 1416 1417 void 1418 rt_timer_queue_flush(struct rttimer_queue *rtq) 1419 { 1420 struct rttimer *r; 1421 TAILQ_HEAD(, rttimer) rttlist; 1422 1423 NET_ASSERT_LOCKED(); 1424 1425 TAILQ_INIT(&rttlist); 1426 mtx_enter(&rttimer_mtx); 1427 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1428 LIST_REMOVE(r, rtt_link); 1429 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1430 TAILQ_INSERT_TAIL(&rttlist, r, rtt_next); 1431 KASSERT(rtq->rtq_count > 0); 1432 rtq->rtq_count--; 1433 } 1434 mtx_leave(&rttimer_mtx); 1435 1436 while ((r = TAILQ_FIRST(&rttlist)) != NULL) { 1437 TAILQ_REMOVE(&rttlist, r, rtt_next); 1438 RTTIMER_CALLOUT(r); 1439 pool_put(&rttimer_pool, r); 1440 } 1441 } 1442 1443 unsigned long 1444 rt_timer_queue_count(struct rttimer_queue *rtq) 1445 { 1446 return (rtq->rtq_count); 1447 } 1448 1449 static inline struct rttimer * 1450 rt_timer_unlink(struct rttimer *r) 1451 { 1452 MUTEX_ASSERT_LOCKED(&rttimer_mtx); 1453 1454 LIST_REMOVE(r, rtt_link); 1455 r->rtt_rt = NULL; 1456 1457 if (timeout_del(&r->rtt_timeout) == 0) { 1458 /* timeout fired, so rt_timer_timer will do the cleanup */ 1459 return NULL; 1460 } 1461 1462 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1463 KASSERT(r->rtt_queue->rtq_count > 0); 1464 r->rtt_queue->rtq_count--; 1465 return r; 1466 } 1467 1468 void 1469 rt_timer_remove_all(struct rtentry *rt) 1470 { 1471 struct rttimer *r; 1472 TAILQ_HEAD(, rttimer) rttlist; 1473 1474 TAILQ_INIT(&rttlist); 1475 mtx_enter(&rttimer_mtx); 1476 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1477 r = rt_timer_unlink(r); 1478 if (r != NULL) 1479 TAILQ_INSERT_TAIL(&rttlist, r, rtt_next); 1480 } 1481 mtx_leave(&rttimer_mtx); 1482 1483 while ((r = TAILQ_FIRST(&rttlist)) != NULL) { 1484 TAILQ_REMOVE(&rttlist, r, rtt_next); 1485 pool_put(&rttimer_pool, r); 1486 } 1487 } 1488 1489 time_t 1490 rt_timer_get_expire(const struct rtentry *rt) 1491 { 1492 const struct rttimer *r; 1493 time_t expire = 0; 1494 1495 mtx_enter(&rttimer_mtx); 1496 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1497 if (expire == 0 || expire > r->rtt_expire) 1498 expire = r->rtt_expire; 1499 } 1500 mtx_leave(&rttimer_mtx); 1501 1502 return expire; 1503 } 1504 1505 int 1506 rt_timer_add(struct rtentry *rt, struct rttimer_queue *queue, u_int rtableid) 1507 { 1508 struct rttimer *r, *rnew; 1509 1510 rnew = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO); 1511 if (rnew == NULL) 1512 return (ENOBUFS); 1513 1514 rnew->rtt_rt = rt; 1515 rnew->rtt_queue = queue; 1516 rnew->rtt_tableid = rtableid; 1517 rnew->rtt_expire = getuptime() + queue->rtq_timeout; 1518 timeout_set_proc(&rnew->rtt_timeout, rt_timer_timer, rnew); 1519 1520 mtx_enter(&rttimer_mtx); 1521 /* 1522 * If there's already a timer with this action, destroy it before 1523 * we add a new one. 1524 */ 1525 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1526 if (r->rtt_queue == queue) { 1527 r = rt_timer_unlink(r); 1528 break; /* only one per list, so we can quit... */ 1529 } 1530 } 1531 1532 LIST_INSERT_HEAD(&rt->rt_timer, rnew, rtt_link); 1533 TAILQ_INSERT_TAIL(&queue->rtq_head, rnew, rtt_next); 1534 timeout_add_sec(&rnew->rtt_timeout, queue->rtq_timeout); 1535 rnew->rtt_queue->rtq_count++; 1536 mtx_leave(&rttimer_mtx); 1537 1538 if (r != NULL) 1539 pool_put(&rttimer_pool, r); 1540 1541 return (0); 1542 } 1543 1544 void 1545 rt_timer_timer(void *arg) 1546 { 1547 struct rttimer *r = arg; 1548 struct rttimer_queue *rtq = r->rtt_queue; 1549 1550 NET_LOCK(); 1551 mtx_enter(&rttimer_mtx); 1552 1553 if (r->rtt_rt != NULL) 1554 LIST_REMOVE(r, rtt_link); 1555 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1556 KASSERT(rtq->rtq_count > 0); 1557 rtq->rtq_count--; 1558 1559 mtx_leave(&rttimer_mtx); 1560 1561 if (r->rtt_rt != NULL) 1562 RTTIMER_CALLOUT(r); 1563 NET_UNLOCK(); 1564 1565 pool_put(&rttimer_pool, r); 1566 } 1567 1568 #ifdef MPLS 1569 int 1570 rt_mpls_set(struct rtentry *rt, struct sockaddr *src, uint8_t op) 1571 { 1572 struct sockaddr_mpls *psa_mpls = (struct sockaddr_mpls *)src; 1573 struct rt_mpls *rt_mpls; 1574 1575 if (psa_mpls == NULL && op != MPLS_OP_POP) 1576 return (EOPNOTSUPP); 1577 if (psa_mpls != NULL && psa_mpls->smpls_len != sizeof(*psa_mpls)) 1578 return (EINVAL); 1579 if (psa_mpls != NULL && psa_mpls->smpls_family != AF_MPLS) 1580 return (EAFNOSUPPORT); 1581 1582 rt->rt_llinfo = malloc(sizeof(struct rt_mpls), M_TEMP, M_NOWAIT|M_ZERO); 1583 if (rt->rt_llinfo == NULL) 1584 return (ENOMEM); 1585 1586 rt_mpls = (struct rt_mpls *)rt->rt_llinfo; 1587 if (psa_mpls != NULL) 1588 rt_mpls->mpls_label = psa_mpls->smpls_label; 1589 rt_mpls->mpls_operation = op; 1590 /* XXX: set experimental bits */ 1591 rt->rt_flags |= RTF_MPLS; 1592 1593 return (0); 1594 } 1595 1596 void 1597 rt_mpls_clear(struct rtentry *rt) 1598 { 1599 if (rt->rt_llinfo != NULL && rt->rt_flags & RTF_MPLS) { 1600 free(rt->rt_llinfo, M_TEMP, sizeof(struct rt_mpls)); 1601 rt->rt_llinfo = NULL; 1602 } 1603 rt->rt_flags &= ~RTF_MPLS; 1604 } 1605 #endif 1606 1607 u_int16_t 1608 rtlabel_name2id(char *name) 1609 { 1610 struct rt_label *label, *p; 1611 u_int16_t new_id = 1; 1612 1613 if (!name[0]) 1614 return (0); 1615 1616 TAILQ_FOREACH(label, &rt_labels, rtl_entry) 1617 if (strcmp(name, label->rtl_name) == 0) { 1618 label->rtl_ref++; 1619 return (label->rtl_id); 1620 } 1621 1622 /* 1623 * to avoid fragmentation, we do a linear search from the beginning 1624 * and take the first free slot we find. if there is none or the list 1625 * is empty, append a new entry at the end. 1626 */ 1627 TAILQ_FOREACH(p, &rt_labels, rtl_entry) { 1628 if (p->rtl_id != new_id) 1629 break; 1630 new_id = p->rtl_id + 1; 1631 } 1632 if (new_id > LABELID_MAX) 1633 return (0); 1634 1635 label = malloc(sizeof(*label), M_RTABLE, M_NOWAIT|M_ZERO); 1636 if (label == NULL) 1637 return (0); 1638 strlcpy(label->rtl_name, name, sizeof(label->rtl_name)); 1639 label->rtl_id = new_id; 1640 label->rtl_ref++; 1641 1642 if (p != NULL) /* insert new entry before p */ 1643 TAILQ_INSERT_BEFORE(p, label, rtl_entry); 1644 else /* either list empty or no free slot in between */ 1645 TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry); 1646 1647 return (label->rtl_id); 1648 } 1649 1650 const char * 1651 rtlabel_id2name(u_int16_t id) 1652 { 1653 struct rt_label *label; 1654 1655 TAILQ_FOREACH(label, &rt_labels, rtl_entry) 1656 if (label->rtl_id == id) 1657 return (label->rtl_name); 1658 1659 return (NULL); 1660 } 1661 1662 struct sockaddr * 1663 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl) 1664 { 1665 const char *label; 1666 1667 if (labelid == 0 || (label = rtlabel_id2name(labelid)) == NULL) 1668 return (NULL); 1669 1670 bzero(sa_rl, sizeof(*sa_rl)); 1671 sa_rl->sr_len = sizeof(*sa_rl); 1672 sa_rl->sr_family = AF_UNSPEC; 1673 strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label)); 1674 1675 return ((struct sockaddr *)sa_rl); 1676 } 1677 1678 void 1679 rtlabel_unref(u_int16_t id) 1680 { 1681 struct rt_label *p, *next; 1682 1683 if (id == 0) 1684 return; 1685 1686 TAILQ_FOREACH_SAFE(p, &rt_labels, rtl_entry, next) { 1687 if (id == p->rtl_id) { 1688 if (--p->rtl_ref == 0) { 1689 TAILQ_REMOVE(&rt_labels, p, rtl_entry); 1690 free(p, M_RTABLE, sizeof(*p)); 1691 } 1692 break; 1693 } 1694 } 1695 } 1696 1697 int 1698 rt_if_track(struct ifnet *ifp) 1699 { 1700 unsigned int rtableid; 1701 struct rtentry *rt = NULL; 1702 int i, error = 0; 1703 1704 for (rtableid = 0; rtableid < rtmap_limit; rtableid++) { 1705 /* skip rtables that are not in the rdomain of the ifp */ 1706 if (rtable_l2(rtableid) != ifp->if_rdomain) 1707 continue; 1708 for (i = 1; i <= AF_MAX; i++) { 1709 if (!rtable_mpath_capable(rtableid, i)) 1710 continue; 1711 1712 do { 1713 error = rtable_walk(rtableid, i, &rt, 1714 rt_if_linkstate_change, ifp); 1715 if (rt != NULL && error == EEXIST) { 1716 error = rtdeletemsg(rt, ifp, rtableid); 1717 if (error == 0) 1718 error = EAGAIN; 1719 } 1720 rtfree(rt); 1721 rt = NULL; 1722 } while (error == EAGAIN); 1723 1724 if (error == EAFNOSUPPORT) 1725 error = 0; 1726 1727 if (error) 1728 break; 1729 } 1730 } 1731 1732 return (error); 1733 } 1734 1735 int 1736 rt_if_linkstate_change(struct rtentry *rt, void *arg, u_int id) 1737 { 1738 struct ifnet *ifp = arg; 1739 struct sockaddr_in6 sa_mask; 1740 int error; 1741 1742 if (rt->rt_ifidx != ifp->if_index) 1743 return (0); 1744 1745 /* Local routes are always usable. */ 1746 if (rt->rt_flags & RTF_LOCAL) { 1747 rt->rt_flags |= RTF_UP; 1748 return (0); 1749 } 1750 1751 if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP) { 1752 if (ISSET(rt->rt_flags, RTF_UP)) 1753 return (0); 1754 1755 /* bring route up */ 1756 rt->rt_flags |= RTF_UP; 1757 error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt), 1758 rt->rt_priority & RTP_MASK, rt); 1759 } else { 1760 /* 1761 * Remove redirected and cloned routes (mainly ARP) 1762 * from down interfaces so we have a chance to get 1763 * new routes from a better source. 1764 */ 1765 if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) && 1766 !ISSET(rt->rt_flags, RTF_CACHED|RTF_BFD)) { 1767 return (EEXIST); 1768 } 1769 1770 if (!ISSET(rt->rt_flags, RTF_UP)) 1771 return (0); 1772 1773 /* take route down */ 1774 rt->rt_flags &= ~RTF_UP; 1775 error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt), 1776 rt->rt_priority | RTP_DOWN, rt); 1777 } 1778 if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask)); 1779 1780 return (error); 1781 } 1782 1783 struct sockaddr * 1784 rt_plentosa(sa_family_t af, int plen, struct sockaddr_in6 *sa_mask) 1785 { 1786 struct sockaddr_in *sin = (struct sockaddr_in *)sa_mask; 1787 #ifdef INET6 1788 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa_mask; 1789 #endif 1790 1791 KASSERT(plen >= 0 || plen == -1); 1792 1793 if (plen == -1) 1794 return (NULL); 1795 1796 memset(sa_mask, 0, sizeof(*sa_mask)); 1797 1798 switch (af) { 1799 case AF_INET: 1800 sin->sin_family = AF_INET; 1801 sin->sin_len = sizeof(struct sockaddr_in); 1802 in_prefixlen2mask(&sin->sin_addr, plen); 1803 break; 1804 #ifdef INET6 1805 case AF_INET6: 1806 sin6->sin6_family = AF_INET6; 1807 sin6->sin6_len = sizeof(struct sockaddr_in6); 1808 in6_prefixlen2mask(&sin6->sin6_addr, plen); 1809 break; 1810 #endif /* INET6 */ 1811 default: 1812 return (NULL); 1813 } 1814 1815 return ((struct sockaddr *)sa_mask); 1816 } 1817 1818 struct sockaddr * 1819 rt_plen2mask(struct rtentry *rt, struct sockaddr_in6 *sa_mask) 1820 { 1821 return (rt_plentosa(rt_key(rt)->sa_family, rt_plen(rt), sa_mask)); 1822 } 1823 1824 #ifdef DDB 1825 #include <machine/db_machdep.h> 1826 #include <ddb/db_output.h> 1827 1828 void 1829 db_print_sa(struct sockaddr *sa) 1830 { 1831 int len; 1832 u_char *p; 1833 1834 if (sa == NULL) { 1835 db_printf("[NULL]"); 1836 return; 1837 } 1838 1839 p = (u_char *)sa; 1840 len = sa->sa_len; 1841 db_printf("["); 1842 while (len > 0) { 1843 db_printf("%d", *p); 1844 p++; 1845 len--; 1846 if (len) 1847 db_printf(","); 1848 } 1849 db_printf("]\n"); 1850 } 1851 1852 void 1853 db_print_ifa(struct ifaddr *ifa) 1854 { 1855 if (ifa == NULL) 1856 return; 1857 db_printf(" ifa_addr="); 1858 db_print_sa(ifa->ifa_addr); 1859 db_printf(" ifa_dsta="); 1860 db_print_sa(ifa->ifa_dstaddr); 1861 db_printf(" ifa_mask="); 1862 db_print_sa(ifa->ifa_netmask); 1863 db_printf(" flags=0x%x, refcnt=%d, metric=%d\n", 1864 ifa->ifa_flags, ifa->ifa_refcnt, ifa->ifa_metric); 1865 } 1866 1867 /* 1868 * Function to pass to rtable_walk(). 1869 * Return non-zero error to abort walk. 1870 */ 1871 int 1872 db_show_rtentry(struct rtentry *rt, void *w, unsigned int id) 1873 { 1874 db_printf("rtentry=%p", rt); 1875 1876 db_printf(" flags=0x%x refcnt=%u use=%llu expire=%lld rtableid=%u\n", 1877 rt->rt_flags, rt->rt_refcnt.r_refs, rt->rt_use, rt->rt_expire, id); 1878 1879 db_printf(" key="); db_print_sa(rt_key(rt)); 1880 db_printf(" plen=%d", rt_plen(rt)); 1881 db_printf(" gw="); db_print_sa(rt->rt_gateway); 1882 db_printf(" ifidx=%u ", rt->rt_ifidx); 1883 db_printf(" ifa=%p\n", rt->rt_ifa); 1884 db_print_ifa(rt->rt_ifa); 1885 1886 db_printf(" gwroute=%p llinfo=%p\n", rt->rt_gwroute, rt->rt_llinfo); 1887 return (0); 1888 } 1889 1890 /* 1891 * Function to print all the route trees. 1892 * Use this from ddb: "call db_show_arptab" 1893 */ 1894 int 1895 db_show_arptab(void) 1896 { 1897 db_printf("Route tree for AF_INET\n"); 1898 rtable_walk(0, AF_INET, NULL, db_show_rtentry, NULL); 1899 return (0); 1900 } 1901 #endif /* DDB */ 1902