1 /* $OpenBSD: route.c,v 1.416 2023/01/28 10:17:16 mvs Exp $ */ 2 /* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1980, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)route.c 8.2 (Berkeley) 11/15/93 62 */ 63 64 /* 65 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 66 * 67 * NRL grants permission for redistribution and use in source and binary 68 * forms, with or without modification, of the software and documentation 69 * created at NRL provided that the following conditions are met: 70 * 71 * 1. Redistributions of source code must retain the above copyright 72 * notice, this list of conditions and the following disclaimer. 73 * 2. Redistributions in binary form must reproduce the above copyright 74 * notice, this list of conditions and the following disclaimer in the 75 * documentation and/or other materials provided with the distribution. 76 * 3. All advertising materials mentioning features or use of this software 77 * must display the following acknowledgements: 78 * This product includes software developed by the University of 79 * California, Berkeley and its contributors. 80 * This product includes software developed at the Information 81 * Technology Division, US Naval Research Laboratory. 82 * 4. Neither the name of the NRL nor the names of its contributors 83 * may be used to endorse or promote products derived from this software 84 * without specific prior written permission. 85 * 86 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 87 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 88 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 89 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 90 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 91 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 92 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 93 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 94 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 95 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 96 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 97 * 98 * The views and conclusions contained in the software and documentation 99 * are those of the authors and should not be interpreted as representing 100 * official policies, either expressed or implied, of the US Naval 101 * Research Laboratory (NRL). 102 */ 103 104 #include <sys/param.h> 105 #include <sys/systm.h> 106 #include <sys/mbuf.h> 107 #include <sys/socket.h> 108 #include <sys/socketvar.h> 109 #include <sys/timeout.h> 110 #include <sys/domain.h> 111 #include <sys/ioctl.h> 112 #include <sys/kernel.h> 113 #include <sys/queue.h> 114 #include <sys/pool.h> 115 #include <sys/atomic.h> 116 117 #include <net/if.h> 118 #include <net/if_var.h> 119 #include <net/if_dl.h> 120 #include <net/route.h> 121 122 #include <netinet/in.h> 123 #include <netinet/ip_var.h> 124 #include <netinet/in_var.h> 125 126 #ifdef INET6 127 #include <netinet/ip6.h> 128 #include <netinet6/ip6_var.h> 129 #include <netinet6/in6_var.h> 130 #endif 131 132 #ifdef MPLS 133 #include <netmpls/mpls.h> 134 #endif 135 136 #ifdef BFD 137 #include <net/bfd.h> 138 #endif 139 140 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 141 142 /* Give some jitter to hash, to avoid synchronization between routers. */ 143 static uint32_t rt_hashjitter; 144 145 extern unsigned int rtmap_limit; 146 147 struct cpumem * rtcounters; 148 int rttrash; /* routes not in table but not freed */ 149 150 struct pool rtentry_pool; /* pool for rtentry structures */ 151 struct pool rttimer_pool; /* pool for rttimer structures */ 152 153 int rt_setgwroute(struct rtentry *, u_int); 154 void rt_putgwroute(struct rtentry *); 155 int rtflushclone1(struct rtentry *, void *, u_int); 156 int rtflushclone(struct rtentry *, unsigned int); 157 int rt_ifa_purge_walker(struct rtentry *, void *, unsigned int); 158 struct rtentry *rt_match(struct sockaddr *, uint32_t *, int, unsigned int); 159 int rt_clone(struct rtentry **, struct sockaddr *, unsigned int); 160 struct sockaddr *rt_plentosa(sa_family_t, int, struct sockaddr_in6 *); 161 static int rt_copysa(struct sockaddr *, struct sockaddr *, struct sockaddr **); 162 163 #define LABELID_MAX 50000 164 165 struct rt_label { 166 TAILQ_ENTRY(rt_label) rtl_entry; 167 char rtl_name[RTLABEL_LEN]; 168 u_int16_t rtl_id; 169 int rtl_ref; 170 }; 171 172 TAILQ_HEAD(rt_labels, rt_label) rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels); 173 174 void 175 route_init(void) 176 { 177 rtcounters = counters_alloc(rts_ncounters); 178 179 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, IPL_MPFLOOR, 0, 180 "rtentry", NULL); 181 182 while (rt_hashjitter == 0) 183 rt_hashjitter = arc4random(); 184 185 #ifdef BFD 186 bfdinit(); 187 #endif 188 } 189 190 /* 191 * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise. 192 */ 193 int 194 rtisvalid(struct rtentry *rt) 195 { 196 if (rt == NULL) 197 return (0); 198 199 if (!ISSET(rt->rt_flags, RTF_UP)) 200 return (0); 201 202 if (ISSET(rt->rt_flags, RTF_GATEWAY)) { 203 KASSERT(rt->rt_gwroute != NULL); 204 KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY)); 205 if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP)) 206 return (0); 207 } 208 209 return (1); 210 } 211 212 /* 213 * Do the actual lookup for rtalloc(9), do not use directly! 214 * 215 * Return the best matching entry for the destination ``dst''. 216 * 217 * "RT_RESOLVE" means that a corresponding L2 entry should 218 * be added to the routing table and resolved (via ARP or 219 * NDP), if it does not exist. 220 */ 221 struct rtentry * 222 rt_match(struct sockaddr *dst, uint32_t *src, int flags, unsigned int tableid) 223 { 224 struct rtentry *rt = NULL; 225 226 rt = rtable_match(tableid, dst, src); 227 if (rt == NULL) { 228 rtstat_inc(rts_unreach); 229 return (NULL); 230 } 231 232 if (ISSET(rt->rt_flags, RTF_CLONING) && ISSET(flags, RT_RESOLVE)) 233 rt_clone(&rt, dst, tableid); 234 235 rt->rt_use++; 236 return (rt); 237 } 238 239 int 240 rt_clone(struct rtentry **rtp, struct sockaddr *dst, unsigned int rtableid) 241 { 242 struct rt_addrinfo info; 243 struct rtentry *rt = *rtp; 244 int error = 0; 245 246 memset(&info, 0, sizeof(info)); 247 info.rti_info[RTAX_DST] = dst; 248 249 /* 250 * The priority of cloned route should be different 251 * to avoid conflict with /32 cloning routes. 252 * 253 * It should also be higher to let the ARP layer find 254 * cloned routes instead of the cloning one. 255 */ 256 KERNEL_LOCK(); 257 error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt, 258 rtableid); 259 KERNEL_UNLOCK(); 260 if (error) { 261 rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid); 262 } else { 263 /* Inform listeners of the new route */ 264 rtm_send(rt, RTM_ADD, 0, rtableid); 265 rtfree(*rtp); 266 *rtp = rt; 267 } 268 return (error); 269 } 270 271 /* 272 * Originated from bridge_hash() in if_bridge.c 273 */ 274 #define mix(a, b, c) do { \ 275 a -= b; a -= c; a ^= (c >> 13); \ 276 b -= c; b -= a; b ^= (a << 8); \ 277 c -= a; c -= b; c ^= (b >> 13); \ 278 a -= b; a -= c; a ^= (c >> 12); \ 279 b -= c; b -= a; b ^= (a << 16); \ 280 c -= a; c -= b; c ^= (b >> 5); \ 281 a -= b; a -= c; a ^= (c >> 3); \ 282 b -= c; b -= a; b ^= (a << 10); \ 283 c -= a; c -= b; c ^= (b >> 15); \ 284 } while (0) 285 286 int 287 rt_hash(struct rtentry *rt, struct sockaddr *dst, uint32_t *src) 288 { 289 uint32_t a, b, c; 290 291 if (src == NULL || !rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPATH)) 292 return (-1); 293 294 a = b = 0x9e3779b9; 295 c = rt_hashjitter; 296 297 switch (dst->sa_family) { 298 case AF_INET: 299 { 300 struct sockaddr_in *sin; 301 302 if (!ipmultipath) 303 return (-1); 304 305 sin = satosin(dst); 306 a += sin->sin_addr.s_addr; 307 b += src[0]; 308 mix(a, b, c); 309 break; 310 } 311 #ifdef INET6 312 case AF_INET6: 313 { 314 struct sockaddr_in6 *sin6; 315 316 if (!ip6_multipath) 317 return (-1); 318 319 sin6 = satosin6(dst); 320 a += sin6->sin6_addr.s6_addr32[0]; 321 b += sin6->sin6_addr.s6_addr32[2]; 322 c += src[0]; 323 mix(a, b, c); 324 a += sin6->sin6_addr.s6_addr32[1]; 325 b += sin6->sin6_addr.s6_addr32[3]; 326 c += src[1]; 327 mix(a, b, c); 328 a += sin6->sin6_addr.s6_addr32[2]; 329 b += sin6->sin6_addr.s6_addr32[1]; 330 c += src[2]; 331 mix(a, b, c); 332 a += sin6->sin6_addr.s6_addr32[3]; 333 b += sin6->sin6_addr.s6_addr32[0]; 334 c += src[3]; 335 mix(a, b, c); 336 break; 337 } 338 #endif /* INET6 */ 339 } 340 341 return (c & 0xffff); 342 } 343 344 /* 345 * Allocate a route, potentially using multipath to select the peer. 346 */ 347 struct rtentry * 348 rtalloc_mpath(struct sockaddr *dst, uint32_t *src, unsigned int rtableid) 349 { 350 return (rt_match(dst, src, RT_RESOLVE, rtableid)); 351 } 352 353 /* 354 * Look in the routing table for the best matching entry for 355 * ``dst''. 356 * 357 * If a route with a gateway is found and its next hop is no 358 * longer valid, try to cache it. 359 */ 360 struct rtentry * 361 rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid) 362 { 363 return (rt_match(dst, NULL, flags, rtableid)); 364 } 365 366 /* 367 * Cache the route entry corresponding to a reachable next hop in 368 * the gateway entry ``rt''. 369 */ 370 int 371 rt_setgwroute(struct rtentry *rt, u_int rtableid) 372 { 373 struct rtentry *prt, *nhrt; 374 unsigned int rdomain = rtable_l2(rtableid); 375 int error; 376 377 NET_ASSERT_LOCKED(); 378 379 KASSERT(ISSET(rt->rt_flags, RTF_GATEWAY)); 380 381 /* If we cannot find a valid next hop bail. */ 382 nhrt = rt_match(rt->rt_gateway, NULL, RT_RESOLVE, rdomain); 383 if (nhrt == NULL) 384 return (ENOENT); 385 386 /* Next hop entry must be on the same interface. */ 387 if (nhrt->rt_ifidx != rt->rt_ifidx) { 388 struct sockaddr_in6 sa_mask; 389 390 if (!ISSET(nhrt->rt_flags, RTF_LLINFO) || 391 !ISSET(nhrt->rt_flags, RTF_CLONED)) { 392 rtfree(nhrt); 393 return (EHOSTUNREACH); 394 } 395 396 /* 397 * We found a L2 entry, so we might have multiple 398 * RTF_CLONING routes for the same subnet. Query 399 * the first route of the multipath chain and iterate 400 * until we find the correct one. 401 */ 402 prt = rtable_lookup(rdomain, rt_key(nhrt->rt_parent), 403 rt_plen2mask(nhrt->rt_parent, &sa_mask), NULL, RTP_ANY); 404 rtfree(nhrt); 405 406 while (prt != NULL && prt->rt_ifidx != rt->rt_ifidx) 407 prt = rtable_iterate(prt); 408 409 /* We found nothing or a non-cloning MPATH route. */ 410 if (prt == NULL || !ISSET(prt->rt_flags, RTF_CLONING)) { 411 rtfree(prt); 412 return (EHOSTUNREACH); 413 } 414 415 error = rt_clone(&prt, rt->rt_gateway, rdomain); 416 if (error) { 417 rtfree(prt); 418 return (error); 419 } 420 nhrt = prt; 421 } 422 423 /* 424 * Next hop must be reachable, this also prevents rtentry 425 * loops for example when rt->rt_gwroute points to rt. 426 */ 427 if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) { 428 rtfree(nhrt); 429 return (ENETUNREACH); 430 } 431 432 /* Next hop is valid so remove possible old cache. */ 433 rt_putgwroute(rt); 434 KASSERT(rt->rt_gwroute == NULL); 435 436 /* 437 * If the MTU of next hop is 0, this will reset the MTU of the 438 * route to run PMTUD again from scratch. 439 */ 440 if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu)) 441 rt->rt_mtu = nhrt->rt_mtu; 442 443 /* 444 * To avoid reference counting problems when writing link-layer 445 * addresses in an outgoing packet, we ensure that the lifetime 446 * of a cached entry is greater than the bigger lifetime of the 447 * gateway entries it is pointed by. 448 */ 449 nhrt->rt_flags |= RTF_CACHED; 450 nhrt->rt_cachecnt++; 451 452 rt->rt_gwroute = nhrt; 453 454 return (0); 455 } 456 457 /* 458 * Invalidate the cached route entry of the gateway entry ``rt''. 459 */ 460 void 461 rt_putgwroute(struct rtentry *rt) 462 { 463 struct rtentry *nhrt = rt->rt_gwroute; 464 465 NET_ASSERT_LOCKED(); 466 467 if (!ISSET(rt->rt_flags, RTF_GATEWAY) || nhrt == NULL) 468 return; 469 470 KASSERT(ISSET(nhrt->rt_flags, RTF_CACHED)); 471 KASSERT(nhrt->rt_cachecnt > 0); 472 473 --nhrt->rt_cachecnt; 474 if (nhrt->rt_cachecnt == 0) 475 nhrt->rt_flags &= ~RTF_CACHED; 476 477 rtfree(rt->rt_gwroute); 478 rt->rt_gwroute = NULL; 479 } 480 481 void 482 rtref(struct rtentry *rt) 483 { 484 refcnt_take(&rt->rt_refcnt); 485 } 486 487 void 488 rtfree(struct rtentry *rt) 489 { 490 if (rt == NULL) 491 return; 492 493 if (refcnt_rele(&rt->rt_refcnt) == 0) 494 return; 495 496 KASSERT(!ISSET(rt->rt_flags, RTF_UP)); 497 KASSERT(!RT_ROOT(rt)); 498 atomic_dec_int(&rttrash); 499 500 KERNEL_LOCK(); 501 rt_timer_remove_all(rt); 502 ifafree(rt->rt_ifa); 503 rtlabel_unref(rt->rt_labelid); 504 #ifdef MPLS 505 rt_mpls_clear(rt); 506 #endif 507 free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len)); 508 free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len); 509 KERNEL_UNLOCK(); 510 511 pool_put(&rtentry_pool, rt); 512 } 513 514 struct ifaddr * 515 ifaref(struct ifaddr *ifa) 516 { 517 refcnt_take(&ifa->ifa_refcnt); 518 return ifa; 519 } 520 521 void 522 ifafree(struct ifaddr *ifa) 523 { 524 if (refcnt_rele(&ifa->ifa_refcnt) == 0) 525 return; 526 free(ifa, M_IFADDR, 0); 527 } 528 529 /* 530 * Force a routing table entry to the specified 531 * destination to go through the given gateway. 532 * Normally called as a result of a routing redirect 533 * message from the network layer. 534 */ 535 void 536 rtredirect(struct sockaddr *dst, struct sockaddr *gateway, 537 struct sockaddr *src, struct rtentry **rtp, unsigned int rdomain) 538 { 539 struct rtentry *rt; 540 int error = 0; 541 enum rtstat_counters stat = rts_ncounters; 542 struct rt_addrinfo info; 543 struct ifaddr *ifa; 544 unsigned int ifidx = 0; 545 int flags = RTF_GATEWAY|RTF_HOST; 546 uint8_t prio = RTP_NONE; 547 548 NET_ASSERT_LOCKED(); 549 550 /* verify the gateway is directly reachable */ 551 rt = rtalloc(gateway, 0, rdomain); 552 if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY)) { 553 rtfree(rt); 554 error = ENETUNREACH; 555 goto out; 556 } 557 ifidx = rt->rt_ifidx; 558 ifa = rt->rt_ifa; 559 rtfree(rt); 560 rt = NULL; 561 562 rt = rtable_lookup(rdomain, dst, NULL, NULL, RTP_ANY); 563 /* 564 * If the redirect isn't from our current router for this dst, 565 * it's either old or wrong. If it redirects us to ourselves, 566 * we have a routing loop, perhaps as a result of an interface 567 * going down recently. 568 */ 569 #define equal(a1, a2) \ 570 ((a1)->sa_len == (a2)->sa_len && \ 571 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) 572 if (rt != NULL && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) 573 error = EINVAL; 574 else if (ifa_ifwithaddr(gateway, rdomain) != NULL || 575 (gateway->sa_family == AF_INET && 576 in_broadcast(satosin(gateway)->sin_addr, rdomain))) 577 error = EHOSTUNREACH; 578 if (error) 579 goto done; 580 /* 581 * Create a new entry if we just got back a wildcard entry 582 * or the lookup failed. This is necessary for hosts 583 * which use routing redirects generated by smart gateways 584 * to dynamically build the routing tables. 585 */ 586 if (rt == NULL) 587 goto create; 588 /* 589 * Don't listen to the redirect if it's 590 * for a route to an interface. 591 */ 592 if (ISSET(rt->rt_flags, RTF_GATEWAY)) { 593 if (!ISSET(rt->rt_flags, RTF_HOST)) { 594 /* 595 * Changing from route to net => route to host. 596 * Create new route, rather than smashing route to net. 597 */ 598 create: 599 rtfree(rt); 600 flags |= RTF_DYNAMIC; 601 bzero(&info, sizeof(info)); 602 info.rti_info[RTAX_DST] = dst; 603 info.rti_info[RTAX_GATEWAY] = gateway; 604 info.rti_ifa = ifa; 605 info.rti_flags = flags; 606 rt = NULL; 607 error = rtrequest(RTM_ADD, &info, RTP_DEFAULT, &rt, 608 rdomain); 609 if (error == 0) { 610 flags = rt->rt_flags; 611 prio = rt->rt_priority; 612 } 613 stat = rts_dynamic; 614 } else { 615 /* 616 * Smash the current notion of the gateway to 617 * this destination. Should check about netmask!!! 618 */ 619 rt->rt_flags |= RTF_MODIFIED; 620 flags |= RTF_MODIFIED; 621 prio = rt->rt_priority; 622 stat = rts_newgateway; 623 rt_setgate(rt, gateway, rdomain); 624 } 625 } else 626 error = EHOSTUNREACH; 627 done: 628 if (rt) { 629 if (rtp && !error) 630 *rtp = rt; 631 else 632 rtfree(rt); 633 } 634 out: 635 if (error) 636 rtstat_inc(rts_badredirect); 637 else if (stat != rts_ncounters) 638 rtstat_inc(stat); 639 bzero((caddr_t)&info, sizeof(info)); 640 info.rti_info[RTAX_DST] = dst; 641 info.rti_info[RTAX_GATEWAY] = gateway; 642 info.rti_info[RTAX_AUTHOR] = src; 643 rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain); 644 } 645 646 /* 647 * Delete a route and generate a message 648 */ 649 int 650 rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, u_int tableid) 651 { 652 int error; 653 struct rt_addrinfo info; 654 struct sockaddr_rtlabel sa_rl; 655 struct sockaddr_in6 sa_mask; 656 657 KASSERT(rt->rt_ifidx == ifp->if_index); 658 659 /* 660 * Request the new route so that the entry is not actually 661 * deleted. That will allow the information being reported to 662 * be accurate (and consistent with route_output()). 663 */ 664 memset(&info, 0, sizeof(info)); 665 info.rti_info[RTAX_DST] = rt_key(rt); 666 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 667 if (!ISSET(rt->rt_flags, RTF_HOST)) 668 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 669 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 670 info.rti_flags = rt->rt_flags; 671 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 672 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 673 error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid); 674 rtm_miss(RTM_DELETE, &info, info.rti_flags, rt->rt_priority, 675 rt->rt_ifidx, error, tableid); 676 if (error == 0) 677 rtfree(rt); 678 return (error); 679 } 680 681 static inline int 682 rtequal(struct rtentry *a, struct rtentry *b) 683 { 684 if (a == b) 685 return 1; 686 687 if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 && 688 rt_plen(a) == rt_plen(b)) 689 return 1; 690 else 691 return 0; 692 } 693 694 int 695 rtflushclone1(struct rtentry *rt, void *arg, u_int id) 696 { 697 struct rtentry *cloningrt = arg; 698 struct ifnet *ifp; 699 700 if (!ISSET(rt->rt_flags, RTF_CLONED)) 701 return 0; 702 703 /* Cached route must stay alive as long as their parent are alive. */ 704 if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt)) 705 return 0; 706 707 if (!rtequal(rt->rt_parent, cloningrt)) 708 return 0; 709 /* 710 * This happens when an interface with a RTF_CLONING route is 711 * being detached. In this case it's safe to bail because all 712 * the routes are being purged by rt_ifa_purge(). 713 */ 714 ifp = if_get(rt->rt_ifidx); 715 if (ifp == NULL) 716 return 0; 717 718 if_put(ifp); 719 return EEXIST; 720 } 721 722 int 723 rtflushclone(struct rtentry *parent, unsigned int rtableid) 724 { 725 struct rtentry *rt = NULL; 726 struct ifnet *ifp; 727 int error; 728 729 #ifdef DIAGNOSTIC 730 if (!parent || (parent->rt_flags & RTF_CLONING) == 0) 731 panic("rtflushclone: called with a non-cloning route"); 732 #endif 733 734 do { 735 error = rtable_walk(rtableid, rt_key(parent)->sa_family, &rt, 736 rtflushclone1, parent); 737 if (rt != NULL && error == EEXIST) { 738 ifp = if_get(rt->rt_ifidx); 739 if (ifp == NULL) { 740 error = EAGAIN; 741 } else { 742 error = rtdeletemsg(rt, ifp, rtableid); 743 if (error == 0) 744 error = EAGAIN; 745 if_put(ifp); 746 } 747 } 748 rtfree(rt); 749 rt = NULL; 750 } while (error == EAGAIN); 751 752 return error; 753 754 } 755 756 int 757 rtrequest_delete(struct rt_addrinfo *info, u_int8_t prio, struct ifnet *ifp, 758 struct rtentry **ret_nrt, u_int tableid) 759 { 760 struct rtentry *rt; 761 int error; 762 763 NET_ASSERT_LOCKED(); 764 765 if (!rtable_exists(tableid)) 766 return (EAFNOSUPPORT); 767 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 768 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio); 769 if (rt == NULL) 770 return (ESRCH); 771 772 /* Make sure that's the route the caller want to delete. */ 773 if (ifp != NULL && ifp->if_index != rt->rt_ifidx) { 774 rtfree(rt); 775 return (ESRCH); 776 } 777 778 #ifdef BFD 779 if (ISSET(rt->rt_flags, RTF_BFD)) 780 bfdclear(rt); 781 #endif 782 783 error = rtable_delete(tableid, info->rti_info[RTAX_DST], 784 info->rti_info[RTAX_NETMASK], rt); 785 if (error != 0) { 786 rtfree(rt); 787 return (ESRCH); 788 } 789 790 /* Release next hop cache before flushing cloned entries. */ 791 rt_putgwroute(rt); 792 793 /* Clean up any cloned children. */ 794 if (ISSET(rt->rt_flags, RTF_CLONING)) 795 rtflushclone(rt, tableid); 796 797 rtfree(rt->rt_parent); 798 rt->rt_parent = NULL; 799 800 rt->rt_flags &= ~RTF_UP; 801 802 KASSERT(ifp->if_index == rt->rt_ifidx); 803 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 804 805 atomic_inc_int(&rttrash); 806 807 if (ret_nrt != NULL) 808 *ret_nrt = rt; 809 else 810 rtfree(rt); 811 812 return (0); 813 } 814 815 int 816 rtrequest(int req, struct rt_addrinfo *info, u_int8_t prio, 817 struct rtentry **ret_nrt, u_int tableid) 818 { 819 struct ifnet *ifp; 820 struct rtentry *rt, *crt; 821 struct ifaddr *ifa; 822 struct sockaddr *ndst; 823 struct sockaddr_rtlabel *sa_rl, sa_rl2; 824 struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; 825 int error; 826 827 NET_ASSERT_LOCKED(); 828 829 if (!rtable_exists(tableid)) 830 return (EAFNOSUPPORT); 831 if (info->rti_flags & RTF_HOST) 832 info->rti_info[RTAX_NETMASK] = NULL; 833 switch (req) { 834 case RTM_DELETE: 835 return (EINVAL); 836 837 case RTM_RESOLVE: 838 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) 839 return (EINVAL); 840 if ((rt->rt_flags & RTF_CLONING) == 0) 841 return (EINVAL); 842 KASSERT(rt->rt_ifa->ifa_ifp != NULL); 843 info->rti_ifa = rt->rt_ifa; 844 info->rti_flags = rt->rt_flags | (RTF_CLONED|RTF_HOST); 845 info->rti_flags &= ~(RTF_CLONING|RTF_CONNECTED|RTF_STATIC); 846 info->rti_info[RTAX_GATEWAY] = sdltosa(&sa_dl); 847 info->rti_info[RTAX_LABEL] = 848 rtlabel_id2sa(rt->rt_labelid, &sa_rl2); 849 /* FALLTHROUGH */ 850 851 case RTM_ADD: 852 if (info->rti_ifa == NULL) 853 return (EINVAL); 854 ifa = info->rti_ifa; 855 ifp = ifa->ifa_ifp; 856 if (prio == 0) 857 prio = ifp->if_priority + RTP_STATIC; 858 859 error = rt_copysa(info->rti_info[RTAX_DST], 860 info->rti_info[RTAX_NETMASK], &ndst); 861 if (error) 862 return (error); 863 864 rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO); 865 if (rt == NULL) { 866 free(ndst, M_RTABLE, ndst->sa_len); 867 return (ENOBUFS); 868 } 869 870 refcnt_init(&rt->rt_refcnt); 871 rt->rt_flags = info->rti_flags | RTF_UP; 872 rt->rt_priority = prio; /* init routing priority */ 873 LIST_INIT(&rt->rt_timer); 874 875 /* Check the link state if the table supports it. */ 876 if (rtable_mpath_capable(tableid, ndst->sa_family) && 877 !ISSET(rt->rt_flags, RTF_LOCAL) && 878 (!LINK_STATE_IS_UP(ifp->if_link_state) || 879 !ISSET(ifp->if_flags, IFF_UP))) { 880 rt->rt_flags &= ~RTF_UP; 881 rt->rt_priority |= RTP_DOWN; 882 } 883 884 if (info->rti_info[RTAX_LABEL] != NULL) { 885 sa_rl = (struct sockaddr_rtlabel *) 886 info->rti_info[RTAX_LABEL]; 887 rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label); 888 } 889 890 #ifdef MPLS 891 /* We have to allocate additional space for MPLS infos */ 892 if (info->rti_flags & RTF_MPLS && 893 (info->rti_info[RTAX_SRC] != NULL || 894 info->rti_info[RTAX_DST]->sa_family == AF_MPLS)) { 895 error = rt_mpls_set(rt, info->rti_info[RTAX_SRC], 896 info->rti_mpls); 897 if (error) { 898 free(ndst, M_RTABLE, ndst->sa_len); 899 pool_put(&rtentry_pool, rt); 900 return (error); 901 } 902 } else 903 rt_mpls_clear(rt); 904 #endif 905 906 rt->rt_ifa = ifaref(ifa); 907 rt->rt_ifidx = ifp->if_index; 908 /* 909 * Copy metrics and a back pointer from the cloned 910 * route's parent. 911 */ 912 if (ISSET(rt->rt_flags, RTF_CLONED)) { 913 rtref(*ret_nrt); 914 rt->rt_parent = *ret_nrt; 915 rt->rt_rmx = (*ret_nrt)->rt_rmx; 916 } 917 918 /* 919 * We must set rt->rt_gateway before adding ``rt'' to 920 * the routing table because the radix MPATH code use 921 * it to (re)order routes. 922 */ 923 if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY], 924 tableid))) { 925 ifafree(ifa); 926 rtfree(rt->rt_parent); 927 rt_putgwroute(rt); 928 free(rt->rt_gateway, M_RTABLE, 929 ROUNDUP(rt->rt_gateway->sa_len)); 930 free(ndst, M_RTABLE, ndst->sa_len); 931 pool_put(&rtentry_pool, rt); 932 return (error); 933 } 934 935 error = rtable_insert(tableid, ndst, 936 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 937 rt->rt_priority, rt); 938 if (error != 0 && 939 (crt = rtable_match(tableid, ndst, NULL)) != NULL) { 940 /* overwrite cloned route */ 941 if (ISSET(crt->rt_flags, RTF_CLONED) && 942 !ISSET(crt->rt_flags, RTF_CACHED)) { 943 struct ifnet *cifp; 944 945 cifp = if_get(crt->rt_ifidx); 946 KASSERT(cifp != NULL); 947 rtdeletemsg(crt, cifp, tableid); 948 if_put(cifp); 949 950 error = rtable_insert(tableid, ndst, 951 info->rti_info[RTAX_NETMASK], 952 info->rti_info[RTAX_GATEWAY], 953 rt->rt_priority, rt); 954 } 955 rtfree(crt); 956 } 957 if (error != 0) { 958 ifafree(ifa); 959 rtfree(rt->rt_parent); 960 rt_putgwroute(rt); 961 free(rt->rt_gateway, M_RTABLE, 962 ROUNDUP(rt->rt_gateway->sa_len)); 963 free(ndst, M_RTABLE, ndst->sa_len); 964 pool_put(&rtentry_pool, rt); 965 return (EEXIST); 966 } 967 ifp->if_rtrequest(ifp, req, rt); 968 969 if_group_routechange(info->rti_info[RTAX_DST], 970 info->rti_info[RTAX_NETMASK]); 971 972 if (ret_nrt != NULL) 973 *ret_nrt = rt; 974 else 975 rtfree(rt); 976 break; 977 } 978 979 return (0); 980 } 981 982 int 983 rt_setgate(struct rtentry *rt, struct sockaddr *gate, u_int rtableid) 984 { 985 int glen = ROUNDUP(gate->sa_len); 986 struct sockaddr *sa; 987 988 if (rt->rt_gateway == NULL || glen != ROUNDUP(rt->rt_gateway->sa_len)) { 989 sa = malloc(glen, M_RTABLE, M_NOWAIT); 990 if (sa == NULL) 991 return (ENOBUFS); 992 if (rt->rt_gateway != NULL) { 993 free(rt->rt_gateway, M_RTABLE, 994 ROUNDUP(rt->rt_gateway->sa_len)); 995 } 996 rt->rt_gateway = sa; 997 } 998 memmove(rt->rt_gateway, gate, glen); 999 1000 if (ISSET(rt->rt_flags, RTF_GATEWAY)) 1001 return (rt_setgwroute(rt, rtableid)); 1002 1003 return (0); 1004 } 1005 1006 /* 1007 * Return the route entry containing the next hop link-layer 1008 * address corresponding to ``rt''. 1009 */ 1010 struct rtentry * 1011 rt_getll(struct rtentry *rt) 1012 { 1013 if (ISSET(rt->rt_flags, RTF_GATEWAY)) { 1014 KASSERT(rt->rt_gwroute != NULL); 1015 return (rt->rt_gwroute); 1016 } 1017 1018 return (rt); 1019 } 1020 1021 void 1022 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, 1023 struct sockaddr *netmask) 1024 { 1025 u_char *cp1 = (u_char *)src; 1026 u_char *cp2 = (u_char *)dst; 1027 u_char *cp3 = (u_char *)netmask; 1028 u_char *cplim = cp2 + *cp3; 1029 u_char *cplim2 = cp2 + *cp1; 1030 1031 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1032 cp3 += 2; 1033 if (cplim > cplim2) 1034 cplim = cplim2; 1035 while (cp2 < cplim) 1036 *cp2++ = *cp1++ & *cp3++; 1037 if (cp2 < cplim2) 1038 bzero(cp2, cplim2 - cp2); 1039 } 1040 1041 /* 1042 * allocate new sockaddr structure based on the user supplied src and mask 1043 * that is useable for the routing table. 1044 */ 1045 static int 1046 rt_copysa(struct sockaddr *src, struct sockaddr *mask, struct sockaddr **dst) 1047 { 1048 static const u_char maskarray[] = { 1049 0x0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe }; 1050 struct sockaddr *ndst; 1051 const struct domain *dp; 1052 u_char *csrc, *cdst; 1053 int i, plen; 1054 1055 for (i = 0; (dp = domains[i]) != NULL; i++) { 1056 if (dp->dom_rtoffset == 0) 1057 continue; 1058 if (src->sa_family == dp->dom_family) 1059 break; 1060 } 1061 if (dp == NULL) 1062 return (EAFNOSUPPORT); 1063 1064 if (src->sa_len < dp->dom_sasize) 1065 return (EINVAL); 1066 1067 plen = rtable_satoplen(src->sa_family, mask); 1068 if (plen == -1) 1069 return (EINVAL); 1070 1071 ndst = malloc(dp->dom_sasize, M_RTABLE, M_NOWAIT|M_ZERO); 1072 if (ndst == NULL) 1073 return (ENOBUFS); 1074 1075 ndst->sa_family = src->sa_family; 1076 ndst->sa_len = dp->dom_sasize; 1077 1078 csrc = (u_char *)src + dp->dom_rtoffset; 1079 cdst = (u_char *)ndst + dp->dom_rtoffset; 1080 1081 memcpy(cdst, csrc, plen / 8); 1082 if (plen % 8 != 0) 1083 cdst[plen / 8] = csrc[plen / 8] & maskarray[plen % 8]; 1084 1085 *dst = ndst; 1086 return (0); 1087 } 1088 1089 int 1090 rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst, 1091 unsigned int rdomain) 1092 { 1093 struct ifnet *ifp = ifa->ifa_ifp; 1094 struct rtentry *rt; 1095 struct sockaddr_rtlabel sa_rl; 1096 struct rt_addrinfo info; 1097 uint8_t prio = ifp->if_priority + RTP_STATIC; 1098 int error; 1099 1100 KASSERT(rdomain == rtable_l2(rdomain)); 1101 1102 memset(&info, 0, sizeof(info)); 1103 info.rti_ifa = ifa; 1104 info.rti_flags = flags; 1105 info.rti_info[RTAX_DST] = dst; 1106 if (flags & RTF_LLINFO) 1107 info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl); 1108 else 1109 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1110 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl); 1111 1112 #ifdef MPLS 1113 if ((flags & RTF_MPLS) == RTF_MPLS) 1114 info.rti_mpls = MPLS_OP_POP; 1115 #endif /* MPLS */ 1116 1117 if ((flags & RTF_HOST) == 0) 1118 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1119 1120 if (flags & (RTF_LOCAL|RTF_BROADCAST)) 1121 prio = RTP_LOCAL; 1122 1123 if (flags & RTF_CONNECTED) 1124 prio = ifp->if_priority + RTP_CONNECTED; 1125 1126 error = rtrequest(RTM_ADD, &info, prio, &rt, rdomain); 1127 if (error == 0) { 1128 /* 1129 * A local route is created for every address configured 1130 * on an interface, so use this information to notify 1131 * userland that a new address has been added. 1132 */ 1133 if (flags & RTF_LOCAL) 1134 rtm_addr(RTM_NEWADDR, ifa); 1135 rtm_send(rt, RTM_ADD, 0, rdomain); 1136 rtfree(rt); 1137 } 1138 return (error); 1139 } 1140 1141 int 1142 rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst, 1143 unsigned int rdomain) 1144 { 1145 struct ifnet *ifp = ifa->ifa_ifp; 1146 struct rtentry *rt; 1147 struct mbuf *m = NULL; 1148 struct sockaddr *deldst; 1149 struct rt_addrinfo info; 1150 struct sockaddr_rtlabel sa_rl; 1151 uint8_t prio = ifp->if_priority + RTP_STATIC; 1152 int error; 1153 1154 KASSERT(rdomain == rtable_l2(rdomain)); 1155 1156 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1157 m = m_get(M_DONTWAIT, MT_SONAME); 1158 if (m == NULL) 1159 return (ENOBUFS); 1160 deldst = mtod(m, struct sockaddr *); 1161 rt_maskedcopy(dst, deldst, ifa->ifa_netmask); 1162 dst = deldst; 1163 } 1164 1165 memset(&info, 0, sizeof(info)); 1166 info.rti_ifa = ifa; 1167 info.rti_flags = flags; 1168 info.rti_info[RTAX_DST] = dst; 1169 if ((flags & RTF_LLINFO) == 0) 1170 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1171 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl); 1172 1173 if ((flags & RTF_HOST) == 0) 1174 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1175 1176 if (flags & (RTF_LOCAL|RTF_BROADCAST)) 1177 prio = RTP_LOCAL; 1178 1179 if (flags & RTF_CONNECTED) 1180 prio = ifp->if_priority + RTP_CONNECTED; 1181 1182 rtable_clearsource(rdomain, ifa->ifa_addr); 1183 error = rtrequest_delete(&info, prio, ifp, &rt, rdomain); 1184 if (error == 0) { 1185 rtm_send(rt, RTM_DELETE, 0, rdomain); 1186 if (flags & RTF_LOCAL) 1187 rtm_addr(RTM_DELADDR, ifa); 1188 rtfree(rt); 1189 } 1190 m_free(m); 1191 1192 return (error); 1193 } 1194 1195 /* 1196 * Add ifa's address as a local rtentry. 1197 */ 1198 int 1199 rt_ifa_addlocal(struct ifaddr *ifa) 1200 { 1201 struct ifnet *ifp = ifa->ifa_ifp; 1202 struct rtentry *rt; 1203 u_int flags = RTF_HOST|RTF_LOCAL; 1204 int error = 0; 1205 1206 /* 1207 * If the configured address correspond to the magical "any" 1208 * address do not add a local route entry because that might 1209 * corrupt the routing tree which uses this value for the 1210 * default routes. 1211 */ 1212 switch (ifa->ifa_addr->sa_family) { 1213 case AF_INET: 1214 if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY) 1215 return (0); 1216 break; 1217 #ifdef INET6 1218 case AF_INET6: 1219 if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr, 1220 &in6addr_any)) 1221 return (0); 1222 break; 1223 #endif 1224 default: 1225 break; 1226 } 1227 1228 if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT))) 1229 flags |= RTF_LLINFO; 1230 1231 /* If there is no local entry, allocate one. */ 1232 rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain); 1233 if (rt == NULL || ISSET(rt->rt_flags, flags) != flags) { 1234 error = rt_ifa_add(ifa, flags | RTF_MPATH, ifa->ifa_addr, 1235 ifp->if_rdomain); 1236 } 1237 rtfree(rt); 1238 1239 return (error); 1240 } 1241 1242 /* 1243 * Remove local rtentry of ifa's address if it exists. 1244 */ 1245 int 1246 rt_ifa_dellocal(struct ifaddr *ifa) 1247 { 1248 struct ifnet *ifp = ifa->ifa_ifp; 1249 struct rtentry *rt; 1250 u_int flags = RTF_HOST|RTF_LOCAL; 1251 int error = 0; 1252 1253 /* 1254 * We do not add local routes for such address, so do not bother 1255 * removing them. 1256 */ 1257 switch (ifa->ifa_addr->sa_family) { 1258 case AF_INET: 1259 if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY) 1260 return (0); 1261 break; 1262 #ifdef INET6 1263 case AF_INET6: 1264 if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr, 1265 &in6addr_any)) 1266 return (0); 1267 break; 1268 #endif 1269 default: 1270 break; 1271 } 1272 1273 if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT))) 1274 flags |= RTF_LLINFO; 1275 1276 /* 1277 * Before deleting, check if a corresponding local host 1278 * route surely exists. With this check, we can avoid to 1279 * delete an interface direct route whose destination is same 1280 * as the address being removed. This can happen when removing 1281 * a subnet-router anycast address on an interface attached 1282 * to a shared medium. 1283 */ 1284 rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain); 1285 if (rt != NULL && ISSET(rt->rt_flags, flags) == flags) { 1286 error = rt_ifa_del(ifa, flags, ifa->ifa_addr, 1287 ifp->if_rdomain); 1288 } 1289 rtfree(rt); 1290 1291 return (error); 1292 } 1293 1294 /* 1295 * Remove all addresses attached to ``ifa''. 1296 */ 1297 void 1298 rt_ifa_purge(struct ifaddr *ifa) 1299 { 1300 struct ifnet *ifp = ifa->ifa_ifp; 1301 struct rtentry *rt = NULL; 1302 unsigned int rtableid; 1303 int error, af = ifa->ifa_addr->sa_family; 1304 1305 KASSERT(ifp != NULL); 1306 1307 for (rtableid = 0; rtableid < rtmap_limit; rtableid++) { 1308 /* skip rtables that are not in the rdomain of the ifp */ 1309 if (rtable_l2(rtableid) != ifp->if_rdomain) 1310 continue; 1311 1312 do { 1313 error = rtable_walk(rtableid, af, &rt, 1314 rt_ifa_purge_walker, ifa); 1315 if (rt != NULL && error == EEXIST) { 1316 error = rtdeletemsg(rt, ifp, rtableid); 1317 if (error == 0) 1318 error = EAGAIN; 1319 } 1320 rtfree(rt); 1321 rt = NULL; 1322 } while (error == EAGAIN); 1323 1324 if (error == EAFNOSUPPORT) 1325 error = 0; 1326 1327 if (error) 1328 break; 1329 } 1330 } 1331 1332 int 1333 rt_ifa_purge_walker(struct rtentry *rt, void *vifa, unsigned int rtableid) 1334 { 1335 struct ifaddr *ifa = vifa; 1336 1337 if (rt->rt_ifa == ifa) 1338 return EEXIST; 1339 1340 return 0; 1341 } 1342 1343 /* 1344 * Route timer routines. These routes allow functions to be called 1345 * for various routes at any time. This is useful in supporting 1346 * path MTU discovery and redirect route deletion. 1347 * 1348 * This is similar to some BSDI internal functions, but it provides 1349 * for multiple queues for efficiency's sake... 1350 */ 1351 1352 struct mutex rttimer_mtx; 1353 1354 struct rttimer { 1355 TAILQ_ENTRY(rttimer) rtt_next; /* [T] entry on timer queue */ 1356 LIST_ENTRY(rttimer) rtt_link; /* [T] timers per rtentry */ 1357 struct timeout rtt_timeout; /* [I] timeout for this entry */ 1358 struct rttimer_queue *rtt_queue; /* [I] back pointer to queue */ 1359 struct rtentry *rtt_rt; /* [T] back pointer to route */ 1360 time_t rtt_expire; /* [I] rt expire time */ 1361 u_int rtt_tableid; /* [I] rtable id of rtt_rt */ 1362 }; 1363 1364 #define RTTIMER_CALLOUT(r) { \ 1365 if (r->rtt_queue->rtq_func != NULL) { \ 1366 (*r->rtt_queue->rtq_func)(r->rtt_rt, r->rtt_tableid); \ 1367 } else { \ 1368 struct ifnet *ifp; \ 1369 \ 1370 ifp = if_get(r->rtt_rt->rt_ifidx); \ 1371 if (ifp != NULL && \ 1372 (r->rtt_rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == \ 1373 (RTF_DYNAMIC|RTF_HOST)) \ 1374 rtdeletemsg(r->rtt_rt, ifp, r->rtt_tableid); \ 1375 if_put(ifp); \ 1376 } \ 1377 } 1378 1379 /* 1380 * Some subtle order problems with domain initialization mean that 1381 * we cannot count on this being run from rt_init before various 1382 * protocol initializations are done. Therefore, we make sure 1383 * that this is run when the first queue is added... 1384 */ 1385 1386 void 1387 rt_timer_init(void) 1388 { 1389 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 1390 IPL_MPFLOOR, 0, "rttmr", NULL); 1391 mtx_init(&rttimer_mtx, IPL_MPFLOOR); 1392 } 1393 1394 void 1395 rt_timer_queue_init(struct rttimer_queue *rtq, int timeout, 1396 void (*func)(struct rtentry *, u_int)) 1397 { 1398 rtq->rtq_timeout = timeout; 1399 rtq->rtq_count = 0; 1400 rtq->rtq_func = func; 1401 TAILQ_INIT(&rtq->rtq_head); 1402 } 1403 1404 void 1405 rt_timer_queue_change(struct rttimer_queue *rtq, int timeout) 1406 { 1407 mtx_enter(&rttimer_mtx); 1408 rtq->rtq_timeout = timeout; 1409 mtx_leave(&rttimer_mtx); 1410 } 1411 1412 void 1413 rt_timer_queue_flush(struct rttimer_queue *rtq) 1414 { 1415 struct rttimer *r; 1416 TAILQ_HEAD(, rttimer) rttlist; 1417 1418 NET_ASSERT_LOCKED(); 1419 1420 TAILQ_INIT(&rttlist); 1421 mtx_enter(&rttimer_mtx); 1422 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1423 LIST_REMOVE(r, rtt_link); 1424 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1425 TAILQ_INSERT_TAIL(&rttlist, r, rtt_next); 1426 KASSERT(rtq->rtq_count > 0); 1427 rtq->rtq_count--; 1428 } 1429 mtx_leave(&rttimer_mtx); 1430 1431 while ((r = TAILQ_FIRST(&rttlist)) != NULL) { 1432 TAILQ_REMOVE(&rttlist, r, rtt_next); 1433 RTTIMER_CALLOUT(r); 1434 pool_put(&rttimer_pool, r); 1435 } 1436 } 1437 1438 unsigned long 1439 rt_timer_queue_count(struct rttimer_queue *rtq) 1440 { 1441 return (rtq->rtq_count); 1442 } 1443 1444 static inline struct rttimer * 1445 rt_timer_unlink(struct rttimer *r) 1446 { 1447 MUTEX_ASSERT_LOCKED(&rttimer_mtx); 1448 1449 LIST_REMOVE(r, rtt_link); 1450 r->rtt_rt = NULL; 1451 1452 if (timeout_del(&r->rtt_timeout) == 0) { 1453 /* timeout fired, so rt_timer_timer will do the cleanup */ 1454 return NULL; 1455 } 1456 1457 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1458 KASSERT(r->rtt_queue->rtq_count > 0); 1459 r->rtt_queue->rtq_count--; 1460 return r; 1461 } 1462 1463 void 1464 rt_timer_remove_all(struct rtentry *rt) 1465 { 1466 struct rttimer *r; 1467 TAILQ_HEAD(, rttimer) rttlist; 1468 1469 TAILQ_INIT(&rttlist); 1470 mtx_enter(&rttimer_mtx); 1471 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1472 r = rt_timer_unlink(r); 1473 if (r != NULL) 1474 TAILQ_INSERT_TAIL(&rttlist, r, rtt_next); 1475 } 1476 mtx_leave(&rttimer_mtx); 1477 1478 while ((r = TAILQ_FIRST(&rttlist)) != NULL) { 1479 TAILQ_REMOVE(&rttlist, r, rtt_next); 1480 pool_put(&rttimer_pool, r); 1481 } 1482 } 1483 1484 time_t 1485 rt_timer_get_expire(const struct rtentry *rt) 1486 { 1487 const struct rttimer *r; 1488 time_t expire = 0; 1489 1490 mtx_enter(&rttimer_mtx); 1491 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1492 if (expire == 0 || expire > r->rtt_expire) 1493 expire = r->rtt_expire; 1494 } 1495 mtx_leave(&rttimer_mtx); 1496 1497 return expire; 1498 } 1499 1500 int 1501 rt_timer_add(struct rtentry *rt, struct rttimer_queue *queue, u_int rtableid) 1502 { 1503 struct rttimer *r, *rnew; 1504 1505 rnew = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO); 1506 if (rnew == NULL) 1507 return (ENOBUFS); 1508 1509 rnew->rtt_rt = rt; 1510 rnew->rtt_queue = queue; 1511 rnew->rtt_tableid = rtableid; 1512 rnew->rtt_expire = getuptime() + queue->rtq_timeout; 1513 timeout_set_proc(&rnew->rtt_timeout, rt_timer_timer, rnew); 1514 1515 mtx_enter(&rttimer_mtx); 1516 /* 1517 * If there's already a timer with this action, destroy it before 1518 * we add a new one. 1519 */ 1520 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1521 if (r->rtt_queue == queue) { 1522 r = rt_timer_unlink(r); 1523 break; /* only one per list, so we can quit... */ 1524 } 1525 } 1526 1527 LIST_INSERT_HEAD(&rt->rt_timer, rnew, rtt_link); 1528 TAILQ_INSERT_TAIL(&queue->rtq_head, rnew, rtt_next); 1529 timeout_add_sec(&rnew->rtt_timeout, queue->rtq_timeout); 1530 rnew->rtt_queue->rtq_count++; 1531 mtx_leave(&rttimer_mtx); 1532 1533 if (r != NULL) 1534 pool_put(&rttimer_pool, r); 1535 1536 return (0); 1537 } 1538 1539 void 1540 rt_timer_timer(void *arg) 1541 { 1542 struct rttimer *r = arg; 1543 struct rttimer_queue *rtq = r->rtt_queue; 1544 1545 NET_LOCK(); 1546 mtx_enter(&rttimer_mtx); 1547 1548 if (r->rtt_rt != NULL) 1549 LIST_REMOVE(r, rtt_link); 1550 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1551 KASSERT(rtq->rtq_count > 0); 1552 rtq->rtq_count--; 1553 1554 mtx_leave(&rttimer_mtx); 1555 1556 if (r->rtt_rt != NULL) 1557 RTTIMER_CALLOUT(r); 1558 NET_UNLOCK(); 1559 1560 pool_put(&rttimer_pool, r); 1561 } 1562 1563 #ifdef MPLS 1564 int 1565 rt_mpls_set(struct rtentry *rt, struct sockaddr *src, uint8_t op) 1566 { 1567 struct sockaddr_mpls *psa_mpls = (struct sockaddr_mpls *)src; 1568 struct rt_mpls *rt_mpls; 1569 1570 if (psa_mpls == NULL && op != MPLS_OP_POP) 1571 return (EOPNOTSUPP); 1572 if (psa_mpls != NULL && psa_mpls->smpls_len != sizeof(*psa_mpls)) 1573 return (EINVAL); 1574 if (psa_mpls != NULL && psa_mpls->smpls_family != AF_MPLS) 1575 return (EAFNOSUPPORT); 1576 1577 rt->rt_llinfo = malloc(sizeof(struct rt_mpls), M_TEMP, M_NOWAIT|M_ZERO); 1578 if (rt->rt_llinfo == NULL) 1579 return (ENOMEM); 1580 1581 rt_mpls = (struct rt_mpls *)rt->rt_llinfo; 1582 if (psa_mpls != NULL) 1583 rt_mpls->mpls_label = psa_mpls->smpls_label; 1584 rt_mpls->mpls_operation = op; 1585 /* XXX: set experimental bits */ 1586 rt->rt_flags |= RTF_MPLS; 1587 1588 return (0); 1589 } 1590 1591 void 1592 rt_mpls_clear(struct rtentry *rt) 1593 { 1594 if (rt->rt_llinfo != NULL && rt->rt_flags & RTF_MPLS) { 1595 free(rt->rt_llinfo, M_TEMP, sizeof(struct rt_mpls)); 1596 rt->rt_llinfo = NULL; 1597 } 1598 rt->rt_flags &= ~RTF_MPLS; 1599 } 1600 #endif 1601 1602 u_int16_t 1603 rtlabel_name2id(char *name) 1604 { 1605 struct rt_label *label, *p; 1606 u_int16_t new_id = 1; 1607 1608 if (!name[0]) 1609 return (0); 1610 1611 TAILQ_FOREACH(label, &rt_labels, rtl_entry) 1612 if (strcmp(name, label->rtl_name) == 0) { 1613 label->rtl_ref++; 1614 return (label->rtl_id); 1615 } 1616 1617 /* 1618 * to avoid fragmentation, we do a linear search from the beginning 1619 * and take the first free slot we find. if there is none or the list 1620 * is empty, append a new entry at the end. 1621 */ 1622 TAILQ_FOREACH(p, &rt_labels, rtl_entry) { 1623 if (p->rtl_id != new_id) 1624 break; 1625 new_id = p->rtl_id + 1; 1626 } 1627 if (new_id > LABELID_MAX) 1628 return (0); 1629 1630 label = malloc(sizeof(*label), M_RTABLE, M_NOWAIT|M_ZERO); 1631 if (label == NULL) 1632 return (0); 1633 strlcpy(label->rtl_name, name, sizeof(label->rtl_name)); 1634 label->rtl_id = new_id; 1635 label->rtl_ref++; 1636 1637 if (p != NULL) /* insert new entry before p */ 1638 TAILQ_INSERT_BEFORE(p, label, rtl_entry); 1639 else /* either list empty or no free slot in between */ 1640 TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry); 1641 1642 return (label->rtl_id); 1643 } 1644 1645 const char * 1646 rtlabel_id2name(u_int16_t id) 1647 { 1648 struct rt_label *label; 1649 1650 TAILQ_FOREACH(label, &rt_labels, rtl_entry) 1651 if (label->rtl_id == id) 1652 return (label->rtl_name); 1653 1654 return (NULL); 1655 } 1656 1657 struct sockaddr * 1658 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl) 1659 { 1660 const char *label; 1661 1662 if (labelid == 0 || (label = rtlabel_id2name(labelid)) == NULL) 1663 return (NULL); 1664 1665 bzero(sa_rl, sizeof(*sa_rl)); 1666 sa_rl->sr_len = sizeof(*sa_rl); 1667 sa_rl->sr_family = AF_UNSPEC; 1668 strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label)); 1669 1670 return ((struct sockaddr *)sa_rl); 1671 } 1672 1673 void 1674 rtlabel_unref(u_int16_t id) 1675 { 1676 struct rt_label *p, *next; 1677 1678 if (id == 0) 1679 return; 1680 1681 TAILQ_FOREACH_SAFE(p, &rt_labels, rtl_entry, next) { 1682 if (id == p->rtl_id) { 1683 if (--p->rtl_ref == 0) { 1684 TAILQ_REMOVE(&rt_labels, p, rtl_entry); 1685 free(p, M_RTABLE, sizeof(*p)); 1686 } 1687 break; 1688 } 1689 } 1690 } 1691 1692 int 1693 rt_if_track(struct ifnet *ifp) 1694 { 1695 unsigned int rtableid; 1696 struct rtentry *rt = NULL; 1697 int i, error = 0; 1698 1699 for (rtableid = 0; rtableid < rtmap_limit; rtableid++) { 1700 /* skip rtables that are not in the rdomain of the ifp */ 1701 if (rtable_l2(rtableid) != ifp->if_rdomain) 1702 continue; 1703 for (i = 1; i <= AF_MAX; i++) { 1704 if (!rtable_mpath_capable(rtableid, i)) 1705 continue; 1706 1707 do { 1708 error = rtable_walk(rtableid, i, &rt, 1709 rt_if_linkstate_change, ifp); 1710 if (rt != NULL && error == EEXIST) { 1711 error = rtdeletemsg(rt, ifp, rtableid); 1712 if (error == 0) 1713 error = EAGAIN; 1714 } 1715 rtfree(rt); 1716 rt = NULL; 1717 } while (error == EAGAIN); 1718 1719 if (error == EAFNOSUPPORT) 1720 error = 0; 1721 1722 if (error) 1723 break; 1724 } 1725 } 1726 1727 return (error); 1728 } 1729 1730 int 1731 rt_if_linkstate_change(struct rtentry *rt, void *arg, u_int id) 1732 { 1733 struct ifnet *ifp = arg; 1734 struct sockaddr_in6 sa_mask; 1735 int error; 1736 1737 if (rt->rt_ifidx != ifp->if_index) 1738 return (0); 1739 1740 /* Local routes are always usable. */ 1741 if (rt->rt_flags & RTF_LOCAL) { 1742 rt->rt_flags |= RTF_UP; 1743 return (0); 1744 } 1745 1746 if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP) { 1747 if (ISSET(rt->rt_flags, RTF_UP)) 1748 return (0); 1749 1750 /* bring route up */ 1751 rt->rt_flags |= RTF_UP; 1752 error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt), 1753 rt->rt_priority & RTP_MASK, rt); 1754 } else { 1755 /* 1756 * Remove redirected and cloned routes (mainly ARP) 1757 * from down interfaces so we have a chance to get 1758 * new routes from a better source. 1759 */ 1760 if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) && 1761 !ISSET(rt->rt_flags, RTF_CACHED|RTF_BFD)) { 1762 return (EEXIST); 1763 } 1764 1765 if (!ISSET(rt->rt_flags, RTF_UP)) 1766 return (0); 1767 1768 /* take route down */ 1769 rt->rt_flags &= ~RTF_UP; 1770 error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt), 1771 rt->rt_priority | RTP_DOWN, rt); 1772 } 1773 if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask)); 1774 1775 return (error); 1776 } 1777 1778 struct sockaddr * 1779 rt_plentosa(sa_family_t af, int plen, struct sockaddr_in6 *sa_mask) 1780 { 1781 struct sockaddr_in *sin = (struct sockaddr_in *)sa_mask; 1782 #ifdef INET6 1783 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa_mask; 1784 #endif 1785 1786 KASSERT(plen >= 0 || plen == -1); 1787 1788 if (plen == -1) 1789 return (NULL); 1790 1791 memset(sa_mask, 0, sizeof(*sa_mask)); 1792 1793 switch (af) { 1794 case AF_INET: 1795 sin->sin_family = AF_INET; 1796 sin->sin_len = sizeof(struct sockaddr_in); 1797 in_prefixlen2mask(&sin->sin_addr, plen); 1798 break; 1799 #ifdef INET6 1800 case AF_INET6: 1801 sin6->sin6_family = AF_INET6; 1802 sin6->sin6_len = sizeof(struct sockaddr_in6); 1803 in6_prefixlen2mask(&sin6->sin6_addr, plen); 1804 break; 1805 #endif /* INET6 */ 1806 default: 1807 return (NULL); 1808 } 1809 1810 return ((struct sockaddr *)sa_mask); 1811 } 1812 1813 struct sockaddr * 1814 rt_plen2mask(struct rtentry *rt, struct sockaddr_in6 *sa_mask) 1815 { 1816 return (rt_plentosa(rt_key(rt)->sa_family, rt_plen(rt), sa_mask)); 1817 } 1818 1819 #ifdef DDB 1820 #include <machine/db_machdep.h> 1821 #include <ddb/db_output.h> 1822 1823 void db_print_sa(struct sockaddr *); 1824 void db_print_ifa(struct ifaddr *); 1825 1826 void 1827 db_print_sa(struct sockaddr *sa) 1828 { 1829 int len; 1830 u_char *p; 1831 1832 if (sa == NULL) { 1833 db_printf("[NULL]"); 1834 return; 1835 } 1836 1837 p = (u_char *)sa; 1838 len = sa->sa_len; 1839 db_printf("["); 1840 while (len > 0) { 1841 db_printf("%d", *p); 1842 p++; 1843 len--; 1844 if (len) 1845 db_printf(","); 1846 } 1847 db_printf("]\n"); 1848 } 1849 1850 void 1851 db_print_ifa(struct ifaddr *ifa) 1852 { 1853 if (ifa == NULL) 1854 return; 1855 db_printf(" ifa_addr="); 1856 db_print_sa(ifa->ifa_addr); 1857 db_printf(" ifa_dsta="); 1858 db_print_sa(ifa->ifa_dstaddr); 1859 db_printf(" ifa_mask="); 1860 db_print_sa(ifa->ifa_netmask); 1861 db_printf(" flags=0x%x, refcnt=%u, metric=%d\n", 1862 ifa->ifa_flags, ifa->ifa_refcnt.r_refs, ifa->ifa_metric); 1863 } 1864 1865 /* 1866 * Function to pass to rtable_walk(). 1867 * Return non-zero error to abort walk. 1868 */ 1869 int 1870 db_show_rtentry(struct rtentry *rt, void *w, unsigned int id) 1871 { 1872 db_printf("rtentry=%p", rt); 1873 1874 db_printf(" flags=0x%x refcnt=%u use=%llu expire=%lld\n", 1875 rt->rt_flags, rt->rt_refcnt.r_refs, rt->rt_use, rt->rt_expire); 1876 1877 db_printf(" key="); db_print_sa(rt_key(rt)); 1878 db_printf(" plen=%d", rt_plen(rt)); 1879 db_printf(" gw="); db_print_sa(rt->rt_gateway); 1880 db_printf(" ifidx=%u ", rt->rt_ifidx); 1881 db_printf(" ifa=%p\n", rt->rt_ifa); 1882 db_print_ifa(rt->rt_ifa); 1883 1884 db_printf(" gwroute=%p llinfo=%p priority=%d\n", 1885 rt->rt_gwroute, rt->rt_llinfo, rt->rt_priority); 1886 return (0); 1887 } 1888 1889 /* 1890 * Function to print all the route trees. 1891 */ 1892 int 1893 db_show_rtable(int af, unsigned int rtableid) 1894 { 1895 db_printf("Route tree for af %d, rtableid %u\n", af, rtableid); 1896 rtable_walk(rtableid, af, NULL, db_show_rtentry, NULL); 1897 return (0); 1898 } 1899 #endif /* DDB */ 1900