1 /* $OpenBSD: rtable.c,v 1.87 2024/04/09 12:53:08 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2014-2016 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #ifndef _KERNEL 20 #include "kern_compat.h" 21 #else 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/socket.h> 25 #include <sys/malloc.h> 26 #include <sys/queue.h> 27 #include <sys/domain.h> 28 #include <sys/srp.h> 29 #endif 30 31 #include <net/rtable.h> 32 #include <net/route.h> 33 #include <net/art.h> 34 35 /* 36 * Structures used by rtable_get() to retrieve the corresponding 37 * routing table for a given pair of ``af'' and ``rtableid''. 38 * 39 * Note that once allocated routing table heads are never freed. 40 * This way we do not need to reference count them. 41 * 42 * afmap rtmap/dommp 43 * ----------- --------- ----- 44 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index) 45 * ----------- --------- ----- to rdomain/loopback (=value). 46 * | AF_INET |. 47 * ----------- `. .---------. .---------. 48 * ... `----> | rtable0 | ... | rtableN | Array of pointers for 49 * ----------- '---------' '---------' IPv4 routing tables 50 * | AF_MPLS | indexed by ``rtableid''. 51 * ----------- 52 */ 53 struct srp *afmap; 54 uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */ 55 uint8_t af2idx_max; 56 57 /* Array of routing table pointers. */ 58 struct rtmap { 59 unsigned int limit; 60 void **tbl; 61 }; 62 63 /* 64 * Array of rtableid -> rdomain mapping. 65 * 66 * Only used for the first index as described above. 67 */ 68 struct dommp { 69 unsigned int limit; 70 /* 71 * Array to get the routing domain and loopback interface related to 72 * a routing table. Format: 73 * 74 * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain 75 */ 76 unsigned int *value; 77 }; 78 79 unsigned int rtmap_limit = 0; 80 81 void rtmap_init(void); 82 void rtmap_grow(unsigned int, sa_family_t); 83 void rtmap_dtor(void *, void *); 84 85 struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); 86 87 void rtable_init_backend(void); 88 void *rtable_alloc(unsigned int, unsigned int, unsigned int); 89 void *rtable_get(unsigned int, sa_family_t); 90 91 void 92 rtmap_init(void) 93 { 94 const struct domain *dp; 95 int i; 96 97 /* Start with a single table for every domain that requires it. */ 98 for (i = 0; (dp = domains[i]) != NULL; i++) { 99 if (dp->dom_rtoffset == 0) 100 continue; 101 102 rtmap_grow(1, dp->dom_family); 103 } 104 105 /* Initialize the rtableid->rdomain mapping table. */ 106 rtmap_grow(1, 0); 107 108 rtmap_limit = 1; 109 } 110 111 /* 112 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''. 113 */ 114 void 115 rtmap_grow(unsigned int nlimit, sa_family_t af) 116 { 117 struct rtmap *map, *nmap; 118 int i; 119 120 KERNEL_ASSERT_LOCKED(); 121 122 KASSERT(nlimit > rtmap_limit); 123 124 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK); 125 nmap->limit = nlimit; 126 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE, 127 M_WAITOK|M_ZERO); 128 129 map = srp_get_locked(&afmap[af2idx[af]]); 130 if (map != NULL) { 131 KASSERT(map->limit == rtmap_limit); 132 133 for (i = 0; i < map->limit; i++) 134 nmap->tbl[i] = map->tbl[i]; 135 } 136 137 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap); 138 } 139 140 void 141 rtmap_dtor(void *null, void *xmap) 142 { 143 struct rtmap *map = xmap; 144 145 /* 146 * doesn't need to be serialized since this is the last reference 147 * to this map. there's nothing to race against. 148 */ 149 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl)); 150 free(map, M_RTABLE, sizeof(*map)); 151 } 152 153 void 154 rtable_init(void) 155 { 156 const struct domain *dp; 157 int i; 158 159 KASSERT(sizeof(struct rtmap) == sizeof(struct dommp)); 160 161 /* We use index 0 for the rtable/rdomain map. */ 162 af2idx_max = 1; 163 memset(af2idx, 0, sizeof(af2idx)); 164 165 /* 166 * Compute the maximum supported key length in case the routing 167 * table backend needs it. 168 */ 169 for (i = 0; (dp = domains[i]) != NULL; i++) { 170 if (dp->dom_rtoffset == 0) 171 continue; 172 173 af2idx[dp->dom_family] = af2idx_max++; 174 } 175 rtable_init_backend(); 176 177 /* 178 * Allocate AF-to-id table now that we now how many AFs this 179 * kernel supports. 180 */ 181 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE, 182 M_WAITOK|M_ZERO); 183 184 rtmap_init(); 185 186 if (rtable_add(0) != 0) 187 panic("unable to create default routing table"); 188 189 rt_timer_init(); 190 } 191 192 int 193 rtable_add(unsigned int id) 194 { 195 const struct domain *dp; 196 void *tbl; 197 struct rtmap *map; 198 struct dommp *dmm; 199 sa_family_t af; 200 unsigned int off, alen; 201 int i, error = 0; 202 203 if (id > RT_TABLEID_MAX) 204 return (EINVAL); 205 206 KERNEL_LOCK(); 207 208 if (rtable_exists(id)) 209 goto out; 210 211 for (i = 0; (dp = domains[i]) != NULL; i++) { 212 if (dp->dom_rtoffset == 0) 213 continue; 214 215 af = dp->dom_family; 216 off = dp->dom_rtoffset; 217 alen = dp->dom_maxplen; 218 219 if (id >= rtmap_limit) 220 rtmap_grow(id + 1, af); 221 222 tbl = rtable_alloc(id, alen, off); 223 if (tbl == NULL) { 224 error = ENOMEM; 225 goto out; 226 } 227 228 map = srp_get_locked(&afmap[af2idx[af]]); 229 map->tbl[id] = tbl; 230 } 231 232 /* Reflect possible growth. */ 233 if (id >= rtmap_limit) { 234 rtmap_grow(id + 1, 0); 235 rtmap_limit = id + 1; 236 } 237 238 /* Use main rtable/rdomain by default. */ 239 dmm = srp_get_locked(&afmap[0]); 240 dmm->value[id] = 0; 241 out: 242 KERNEL_UNLOCK(); 243 244 return (error); 245 } 246 247 void * 248 rtable_get(unsigned int rtableid, sa_family_t af) 249 { 250 struct rtmap *map; 251 void *tbl = NULL; 252 struct srp_ref sr; 253 254 if (af >= nitems(af2idx) || af2idx[af] == 0) 255 return (NULL); 256 257 map = srp_enter(&sr, &afmap[af2idx[af]]); 258 if (rtableid < map->limit) 259 tbl = map->tbl[rtableid]; 260 srp_leave(&sr); 261 262 return (tbl); 263 } 264 265 int 266 rtable_exists(unsigned int rtableid) 267 { 268 const struct domain *dp; 269 void *tbl; 270 int i; 271 272 for (i = 0; (dp = domains[i]) != NULL; i++) { 273 if (dp->dom_rtoffset == 0) 274 continue; 275 276 tbl = rtable_get(rtableid, dp->dom_family); 277 if (tbl != NULL) 278 return (1); 279 } 280 281 return (0); 282 } 283 284 int 285 rtable_empty(unsigned int rtableid) 286 { 287 const struct domain *dp; 288 int i; 289 struct art_root *tbl; 290 291 for (i = 0; (dp = domains[i]) != NULL; i++) { 292 if (dp->dom_rtoffset == 0) 293 continue; 294 295 tbl = rtable_get(rtableid, dp->dom_family); 296 if (tbl == NULL) 297 continue; 298 if (tbl->ar_root.ref != NULL) 299 return (0); 300 } 301 302 return (1); 303 } 304 305 unsigned int 306 rtable_l2(unsigned int rtableid) 307 { 308 struct dommp *dmm; 309 unsigned int rdomain = 0; 310 struct srp_ref sr; 311 312 dmm = srp_enter(&sr, &afmap[0]); 313 if (rtableid < dmm->limit) 314 rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK); 315 srp_leave(&sr); 316 317 return (rdomain); 318 } 319 320 unsigned int 321 rtable_loindex(unsigned int rtableid) 322 { 323 struct dommp *dmm; 324 unsigned int loifidx = 0; 325 struct srp_ref sr; 326 327 dmm = srp_enter(&sr, &afmap[0]); 328 if (rtableid < dmm->limit) 329 loifidx = (dmm->value[rtableid] >> RT_TABLEID_BITS); 330 srp_leave(&sr); 331 332 return (loifidx); 333 } 334 335 void 336 rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx) 337 { 338 struct dommp *dmm; 339 unsigned int value; 340 341 KERNEL_ASSERT_LOCKED(); 342 343 if (!rtable_exists(rtableid) || !rtable_exists(rdomain)) 344 return; 345 346 value = (rdomain & RT_TABLEID_MASK) | (loifidx << RT_TABLEID_BITS); 347 348 dmm = srp_get_locked(&afmap[0]); 349 dmm->value[rtableid] = value; 350 } 351 352 353 static inline const uint8_t *satoaddr(struct art_root *, 354 const struct sockaddr *); 355 356 int an_match(struct art_node *, const struct sockaddr *, int); 357 void rtentry_ref(void *, void *); 358 void rtentry_unref(void *, void *); 359 360 void rtable_mpath_insert(struct art_node *, struct rtentry *); 361 362 struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); 363 364 void 365 rtable_init_backend(void) 366 { 367 art_init(); 368 } 369 370 void * 371 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 372 { 373 return (art_alloc(rtableid, alen, off)); 374 } 375 376 int 377 rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src) 378 { 379 struct art_root *ar; 380 381 NET_ASSERT_LOCKED_EXCLUSIVE(); 382 383 if ((ar = rtable_get(rtableid, af)) == NULL) 384 return (EAFNOSUPPORT); 385 386 ar->ar_source = src; 387 388 return (0); 389 } 390 391 struct sockaddr * 392 rtable_getsource(unsigned int rtableid, int af) 393 { 394 struct art_root *ar; 395 396 NET_ASSERT_LOCKED(); 397 398 ar = rtable_get(rtableid, af); 399 if (ar == NULL) 400 return (NULL); 401 402 return (ar->ar_source); 403 } 404 405 void 406 rtable_clearsource(unsigned int rtableid, struct sockaddr *src) 407 { 408 struct sockaddr *addr; 409 410 addr = rtable_getsource(rtableid, src->sa_family); 411 if (addr && (addr->sa_len == src->sa_len)) { 412 if (memcmp(src, addr, addr->sa_len) == 0) { 413 rtable_setsource(rtableid, src->sa_family, NULL); 414 } 415 } 416 } 417 418 struct rtentry * 419 rtable_lookup(unsigned int rtableid, const struct sockaddr *dst, 420 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio) 421 { 422 struct art_root *ar; 423 struct art_node *an; 424 struct rtentry *rt = NULL; 425 struct srp_ref sr, nsr; 426 const uint8_t *addr; 427 int plen; 428 429 ar = rtable_get(rtableid, dst->sa_family); 430 if (ar == NULL) 431 return (NULL); 432 433 addr = satoaddr(ar, dst); 434 435 /* No need for a perfect match. */ 436 if (mask == NULL) { 437 an = art_match(ar, addr, &nsr); 438 if (an == NULL) 439 goto out; 440 } else { 441 plen = rtable_satoplen(dst->sa_family, mask); 442 if (plen == -1) 443 return (NULL); 444 445 an = art_lookup(ar, addr, plen, &nsr); 446 447 /* Make sure we've got a perfect match. */ 448 if (!an_match(an, dst, plen)) 449 goto out; 450 } 451 452 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 453 if (prio != RTP_ANY && 454 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 455 continue; 456 457 if (gateway == NULL) 458 break; 459 460 if (rt->rt_gateway->sa_len == gateway->sa_len && 461 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) 462 break; 463 } 464 if (rt != NULL) 465 rtref(rt); 466 467 SRPL_LEAVE(&sr); 468 out: 469 srp_leave(&nsr); 470 471 return (rt); 472 } 473 474 struct rtentry * 475 rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src) 476 { 477 struct art_root *ar; 478 struct art_node *an; 479 struct rtentry *rt = NULL; 480 struct srp_ref sr, nsr; 481 const uint8_t *addr; 482 int hash; 483 484 ar = rtable_get(rtableid, dst->sa_family); 485 if (ar == NULL) 486 return (NULL); 487 488 addr = satoaddr(ar, dst); 489 490 an = art_match(ar, addr, &nsr); 491 if (an == NULL) 492 goto out; 493 494 rt = SRPL_FIRST(&sr, &an->an_rtlist); 495 if (rt == NULL) { 496 SRPL_LEAVE(&sr); 497 goto out; 498 } 499 rtref(rt); 500 SRPL_LEAVE(&sr); 501 502 /* Gateway selection by Hash-Threshold (RFC 2992) */ 503 if ((hash = rt_hash(rt, dst, src)) != -1) { 504 struct rtentry *mrt; 505 int threshold, npaths = 0; 506 507 KASSERT(hash <= 0xffff); 508 509 SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { 510 /* Only count nexthops with the same priority. */ 511 if (mrt->rt_priority == rt->rt_priority) 512 npaths++; 513 } 514 SRPL_LEAVE(&sr); 515 516 threshold = (0xffff / npaths) + 1; 517 518 /* 519 * we have no protection against concurrent modification of the 520 * route list attached to the node, so we won't necessarily 521 * have the same number of routes. for most modifications, 522 * we'll pick a route that we wouldn't have if we only saw the 523 * list before or after the change. if we were going to use 524 * the last available route, but it got removed, we'll hit 525 * the end of the list and then pick the first route. 526 */ 527 528 mrt = SRPL_FIRST(&sr, &an->an_rtlist); 529 while (hash > threshold && mrt != NULL) { 530 if (mrt->rt_priority == rt->rt_priority) 531 hash -= threshold; 532 mrt = SRPL_FOLLOW(&sr, mrt, rt_next); 533 } 534 535 if (mrt != NULL) { 536 rtref(mrt); 537 rtfree(rt); 538 rt = mrt; 539 } 540 SRPL_LEAVE(&sr); 541 } 542 out: 543 srp_leave(&nsr); 544 return (rt); 545 } 546 547 int 548 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 549 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio, 550 struct rtentry *rt) 551 { 552 struct rtentry *mrt; 553 struct srp_ref sr; 554 struct art_root *ar; 555 struct art_node *an, *prev; 556 const uint8_t *addr; 557 int plen; 558 unsigned int rt_flags; 559 int error = 0; 560 561 ar = rtable_get(rtableid, dst->sa_family); 562 if (ar == NULL) 563 return (EAFNOSUPPORT); 564 565 addr = satoaddr(ar, dst); 566 plen = rtable_satoplen(dst->sa_family, mask); 567 if (plen == -1) 568 return (EINVAL); 569 570 rtref(rt); /* guarantee rtfree won't do anything during insert */ 571 rw_enter_write(&ar->ar_lock); 572 573 /* Do not permit exactly the same dst/mask/gw pair. */ 574 an = art_lookup(ar, addr, plen, &sr); 575 srp_leave(&sr); /* an can't go away while we have the lock */ 576 if (an_match(an, dst, plen)) { 577 struct rtentry *mrt; 578 int mpathok = ISSET(rt->rt_flags, RTF_MPATH); 579 580 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 581 if (prio != RTP_ANY && 582 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 583 continue; 584 585 if (!mpathok || 586 (mrt->rt_gateway->sa_len == gateway->sa_len && 587 memcmp(mrt->rt_gateway, gateway, 588 gateway->sa_len) == 0)) { 589 error = EEXIST; 590 goto leave; 591 } 592 } 593 } 594 595 an = art_get(plen); 596 if (an == NULL) { 597 error = ENOBUFS; 598 goto leave; 599 } 600 601 /* prepare for immediate operation if insert succeeds */ 602 rt_flags = rt->rt_flags; 603 rt->rt_flags &= ~RTF_MPATH; 604 rt->rt_dest = dst; 605 rt->rt_plen = plen; 606 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 607 608 prev = art_insert(ar, an, addr, plen); 609 if (prev != an) { 610 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 611 rt_next); 612 rt->rt_flags = rt_flags; 613 art_put(an); 614 615 if (prev == NULL) { 616 error = ESRCH; 617 goto leave; 618 } 619 620 an = prev; 621 622 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 623 KASSERT(mrt != NULL); 624 KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); 625 626 /* 627 * An ART node with the same destination/netmask already 628 * exists, MPATH conflict must have been already checked. 629 */ 630 if (rt->rt_flags & RTF_MPATH) { 631 /* 632 * Only keep the RTF_MPATH flag if two routes have 633 * the same gateway. 634 */ 635 rt->rt_flags &= ~RTF_MPATH; 636 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 637 if (mrt->rt_priority == prio) { 638 mrt->rt_flags |= RTF_MPATH; 639 rt->rt_flags |= RTF_MPATH; 640 } 641 } 642 } 643 644 /* Put newly inserted entry at the right place. */ 645 rtable_mpath_insert(an, rt); 646 } 647 leave: 648 rw_exit_write(&ar->ar_lock); 649 rtfree(rt); 650 return (error); 651 } 652 653 int 654 rtable_delete(unsigned int rtableid, const struct sockaddr *dst, 655 const struct sockaddr *mask, struct rtentry *rt) 656 { 657 struct art_root *ar; 658 struct art_node *an; 659 struct srp_ref sr; 660 const uint8_t *addr; 661 int plen; 662 struct rtentry *mrt; 663 int npaths = 0; 664 int error = 0; 665 666 ar = rtable_get(rtableid, dst->sa_family); 667 if (ar == NULL) 668 return (EAFNOSUPPORT); 669 670 addr = satoaddr(ar, dst); 671 plen = rtable_satoplen(dst->sa_family, mask); 672 if (plen == -1) 673 return (EINVAL); 674 675 rtref(rt); /* guarantee rtfree won't do anything under ar_lock */ 676 rw_enter_write(&ar->ar_lock); 677 an = art_lookup(ar, addr, plen, &sr); 678 srp_leave(&sr); /* an can't go away while we have the lock */ 679 680 /* Make sure we've got a perfect match. */ 681 if (!an_match(an, dst, plen)) { 682 error = ESRCH; 683 goto leave; 684 } 685 686 /* 687 * If other multipath route entries are still attached to 688 * this ART node we only have to unlink it. 689 */ 690 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) 691 npaths++; 692 693 if (npaths > 1) { 694 KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); 695 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 696 rt_next); 697 698 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 699 if (npaths == 2) 700 mrt->rt_flags &= ~RTF_MPATH; 701 702 goto leave; 703 } 704 705 if (art_delete(ar, an, addr, plen) == NULL) 706 panic("art_delete failed to find node %p", an); 707 708 KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); 709 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); 710 art_put(an); 711 712 leave: 713 rw_exit_write(&ar->ar_lock); 714 rtfree(rt); 715 716 return (error); 717 } 718 719 struct rtable_walk_cookie { 720 int (*rwc_func)(struct rtentry *, void *, unsigned int); 721 void *rwc_arg; 722 struct rtentry **rwc_prt; 723 unsigned int rwc_rid; 724 }; 725 726 /* 727 * Helper for rtable_walk to keep the ART code free from any "struct rtentry". 728 */ 729 int 730 rtable_walk_helper(struct art_node *an, void *xrwc) 731 { 732 struct srp_ref sr; 733 struct rtable_walk_cookie *rwc = xrwc; 734 struct rtentry *rt; 735 int error = 0; 736 737 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 738 error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid); 739 if (error != 0) 740 break; 741 } 742 if (rwc->rwc_prt != NULL && rt != NULL) { 743 rtref(rt); 744 *rwc->rwc_prt = rt; 745 } 746 SRPL_LEAVE(&sr); 747 748 return (error); 749 } 750 751 int 752 rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt, 753 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 754 { 755 struct art_root *ar; 756 struct rtable_walk_cookie rwc; 757 int error; 758 759 ar = rtable_get(rtableid, af); 760 if (ar == NULL) 761 return (EAFNOSUPPORT); 762 763 rwc.rwc_func = func; 764 rwc.rwc_arg = arg; 765 rwc.rwc_prt = prt; 766 rwc.rwc_rid = rtableid; 767 768 error = art_walk(ar, rtable_walk_helper, &rwc); 769 770 return (error); 771 } 772 773 struct rtentry * 774 rtable_iterate(struct rtentry *rt0) 775 { 776 struct rtentry *rt = NULL; 777 struct srp_ref sr; 778 779 rt = SRPL_NEXT(&sr, rt0, rt_next); 780 if (rt != NULL) 781 rtref(rt); 782 SRPL_LEAVE(&sr); 783 rtfree(rt0); 784 return (rt); 785 } 786 787 int 788 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 789 { 790 return (1); 791 } 792 793 int 794 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 795 int plen, uint8_t prio, struct rtentry *rt) 796 { 797 struct art_root *ar; 798 struct art_node *an; 799 struct srp_ref sr; 800 const uint8_t *addr; 801 int error = 0; 802 803 ar = rtable_get(rtableid, dst->sa_family); 804 if (ar == NULL) 805 return (EAFNOSUPPORT); 806 807 addr = satoaddr(ar, dst); 808 809 rw_enter_write(&ar->ar_lock); 810 an = art_lookup(ar, addr, plen, &sr); 811 srp_leave(&sr); /* an can't go away while we have the lock */ 812 813 /* Make sure we've got a perfect match. */ 814 if (!an_match(an, dst, plen)) { 815 error = ESRCH; 816 } else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt && 817 SRPL_NEXT_LOCKED(rt, rt_next) == NULL) { 818 /* 819 * If there's only one entry on the list do not go 820 * through an insert/remove cycle. This is done to 821 * guarantee that ``an->an_rtlist'' is never empty 822 * when a node is in the tree. 823 */ 824 rt->rt_priority = prio; 825 } else { 826 rtref(rt); /* keep rt alive in between remove and insert */ 827 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, 828 rt, rtentry, rt_next); 829 rt->rt_priority = prio; 830 rtable_mpath_insert(an, rt); 831 rtfree(rt); 832 error = EAGAIN; 833 } 834 rw_exit_write(&ar->ar_lock); 835 836 return (error); 837 } 838 839 void 840 rtable_mpath_insert(struct art_node *an, struct rtentry *rt) 841 { 842 struct rtentry *mrt, *prt = NULL; 843 uint8_t prio = rt->rt_priority; 844 845 if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) { 846 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 847 return; 848 } 849 850 /* Iterate until we find the route to be placed after ``rt''. */ 851 while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) { 852 prt = mrt; 853 mrt = SRPL_NEXT_LOCKED(mrt, rt_next); 854 } 855 856 if (mrt->rt_priority <= prio) { 857 SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); 858 } else if (prt != NULL) { 859 SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next); 860 } else { 861 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 862 } 863 } 864 865 /* 866 * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise. 867 */ 868 int 869 an_match(struct art_node *an, const struct sockaddr *dst, int plen) 870 { 871 struct rtentry *rt; 872 struct srp_ref sr; 873 int match; 874 875 if (an == NULL || an->an_plen != plen) 876 return (0); 877 878 rt = SRPL_FIRST(&sr, &an->an_rtlist); 879 match = (rt != NULL && memcmp(rt->rt_dest, dst, dst->sa_len) == 0); 880 SRPL_LEAVE(&sr); 881 882 return (match); 883 } 884 885 void 886 rtentry_ref(void *null, void *xrt) 887 { 888 struct rtentry *rt = xrt; 889 890 rtref(rt); 891 } 892 893 void 894 rtentry_unref(void *null, void *xrt) 895 { 896 struct rtentry *rt = xrt; 897 898 rtfree(rt); 899 } 900 901 /* 902 * Return a pointer to the address (key). This is an heritage from the 903 * BSD radix tree needed to skip the non-address fields from the flavor 904 * of "struct sockaddr" used by this routing table. 905 */ 906 static inline const uint8_t * 907 satoaddr(struct art_root *at, const struct sockaddr *sa) 908 { 909 return (((const uint8_t *)sa) + at->ar_off); 910 } 911 912 /* 913 * Return the prefix length of a mask. 914 */ 915 int 916 rtable_satoplen(sa_family_t af, const struct sockaddr *mask) 917 { 918 const struct domain *dp; 919 uint8_t *ap, *ep; 920 int mlen, plen = 0; 921 int i; 922 923 for (i = 0; (dp = domains[i]) != NULL; i++) { 924 if (dp->dom_rtoffset == 0) 925 continue; 926 927 if (af == dp->dom_family) 928 break; 929 } 930 if (dp == NULL) 931 return (-1); 932 933 /* Host route */ 934 if (mask == NULL) 935 return (dp->dom_maxplen); 936 937 mlen = mask->sa_len; 938 939 /* Default route */ 940 if (mlen == 0) 941 return (0); 942 943 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset; 944 ep = (uint8_t *)((uint8_t *)mask) + mlen; 945 if (ap > ep) 946 return (-1); 947 948 /* Trim trailing zeroes. */ 949 while (ap < ep && ep[-1] == 0) 950 ep--; 951 952 if (ap == ep) 953 return (0); 954 955 /* "Beauty" adapted from sbin/route/show.c ... */ 956 while (ap < ep) { 957 switch (*ap++) { 958 case 0xff: 959 plen += 8; 960 break; 961 case 0xfe: 962 plen += 7; 963 goto out; 964 case 0xfc: 965 plen += 6; 966 goto out; 967 case 0xf8: 968 plen += 5; 969 goto out; 970 case 0xf0: 971 plen += 4; 972 goto out; 973 case 0xe0: 974 plen += 3; 975 goto out; 976 case 0xc0: 977 plen += 2; 978 goto out; 979 case 0x80: 980 plen += 1; 981 goto out; 982 default: 983 /* Non contiguous mask. */ 984 return (-1); 985 } 986 } 987 988 out: 989 if (plen > dp->dom_maxplen || ap != ep) 990 return -1; 991 992 return (plen); 993 } 994