1 /* $OpenBSD: rtable.c,v 1.75 2021/05/25 22:45:09 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2014-2016 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #ifndef _KERNEL 20 #include "kern_compat.h" 21 #else 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/socket.h> 25 #include <sys/malloc.h> 26 #include <sys/queue.h> 27 #include <sys/domain.h> 28 #include <sys/srp.h> 29 #endif 30 31 #include <net/rtable.h> 32 #include <net/route.h> 33 34 /* 35 * Structures used by rtable_get() to retrieve the corresponding 36 * routing table for a given pair of ``af'' and ``rtableid''. 37 * 38 * Note that once allocated routing table heads are never freed. 39 * This way we do not need to reference count them. 40 * 41 * afmap rtmap/dommp 42 * ----------- --------- ----- 43 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index) 44 * ----------- --------- ----- to rdomain/loopback (=value). 45 * | AF_INET |. 46 * ----------- `. .---------. .---------. 47 * ... `----> | rtable0 | ... | rtableN | Array of pointers for 48 * ----------- '---------' '---------' IPv4 routing tables 49 * | AF_MPLS | indexed by ``rtableid''. 50 * ----------- 51 */ 52 struct srp *afmap; 53 uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */ 54 uint8_t af2idx_max; 55 56 /* Array of routing table pointers. */ 57 struct rtmap { 58 unsigned int limit; 59 void **tbl; 60 }; 61 62 /* 63 * Array of rtableid -> rdomain mapping. 64 * 65 * Only used for the first index as describbed above. 66 */ 67 struct dommp { 68 unsigned int limit; 69 /* 70 * Array to get the routing domain and loopback interface related to 71 * a routing table. Format: 72 * 73 * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain 74 */ 75 unsigned int *value; 76 }; 77 78 unsigned int rtmap_limit = 0; 79 80 void rtmap_init(void); 81 void rtmap_grow(unsigned int, sa_family_t); 82 void rtmap_dtor(void *, void *); 83 84 struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); 85 86 void rtable_init_backend(void); 87 void *rtable_alloc(unsigned int, unsigned int, unsigned int); 88 void *rtable_get(unsigned int, sa_family_t); 89 90 void 91 rtmap_init(void) 92 { 93 const struct domain *dp; 94 int i; 95 96 /* Start with a single table for every domain that requires it. */ 97 for (i = 0; (dp = domains[i]) != NULL; i++) { 98 if (dp->dom_rtoffset == 0) 99 continue; 100 101 rtmap_grow(1, dp->dom_family); 102 } 103 104 /* Initialize the rtableid->rdomain mapping table. */ 105 rtmap_grow(1, 0); 106 107 rtmap_limit = 1; 108 } 109 110 /* 111 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''. 112 */ 113 void 114 rtmap_grow(unsigned int nlimit, sa_family_t af) 115 { 116 struct rtmap *map, *nmap; 117 int i; 118 119 KERNEL_ASSERT_LOCKED(); 120 121 KASSERT(nlimit > rtmap_limit); 122 123 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK); 124 nmap->limit = nlimit; 125 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE, 126 M_WAITOK|M_ZERO); 127 128 map = srp_get_locked(&afmap[af2idx[af]]); 129 if (map != NULL) { 130 KASSERT(map->limit == rtmap_limit); 131 132 for (i = 0; i < map->limit; i++) 133 nmap->tbl[i] = map->tbl[i]; 134 } 135 136 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap); 137 } 138 139 void 140 rtmap_dtor(void *null, void *xmap) 141 { 142 struct rtmap *map = xmap; 143 144 /* 145 * doesn't need to be serialized since this is the last reference 146 * to this map. there's nothing to race against. 147 */ 148 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl)); 149 free(map, M_RTABLE, sizeof(*map)); 150 } 151 152 void 153 rtable_init(void) 154 { 155 const struct domain *dp; 156 int i; 157 158 KASSERT(sizeof(struct rtmap) == sizeof(struct dommp)); 159 160 /* We use index 0 for the rtable/rdomain map. */ 161 af2idx_max = 1; 162 memset(af2idx, 0, sizeof(af2idx)); 163 164 /* 165 * Compute the maximum supported key length in case the routing 166 * table backend needs it. 167 */ 168 for (i = 0; (dp = domains[i]) != NULL; i++) { 169 if (dp->dom_rtoffset == 0) 170 continue; 171 172 af2idx[dp->dom_family] = af2idx_max++; 173 } 174 rtable_init_backend(); 175 176 /* 177 * Allocate AF-to-id table now that we now how many AFs this 178 * kernel supports. 179 */ 180 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE, 181 M_WAITOK|M_ZERO); 182 183 rtmap_init(); 184 185 if (rtable_add(0) != 0) 186 panic("unable to create default routing table"); 187 } 188 189 int 190 rtable_add(unsigned int id) 191 { 192 const struct domain *dp; 193 void *tbl; 194 struct rtmap *map; 195 struct dommp *dmm; 196 sa_family_t af; 197 unsigned int off, alen; 198 int i, error = 0; 199 200 if (id > RT_TABLEID_MAX) 201 return (EINVAL); 202 203 KERNEL_LOCK(); 204 205 if (rtable_exists(id)) 206 goto out; 207 208 for (i = 0; (dp = domains[i]) != NULL; i++) { 209 if (dp->dom_rtoffset == 0) 210 continue; 211 212 af = dp->dom_family; 213 off = dp->dom_rtoffset; 214 alen = dp->dom_maxplen; 215 216 if (id >= rtmap_limit) 217 rtmap_grow(id + 1, af); 218 219 tbl = rtable_alloc(id, alen, off); 220 if (tbl == NULL) { 221 error = ENOMEM; 222 goto out; 223 } 224 225 map = srp_get_locked(&afmap[af2idx[af]]); 226 map->tbl[id] = tbl; 227 } 228 229 /* Reflect possible growth. */ 230 if (id >= rtmap_limit) { 231 rtmap_grow(id + 1, 0); 232 rtmap_limit = id + 1; 233 } 234 235 /* Use main rtable/rdomain by default. */ 236 dmm = srp_get_locked(&afmap[0]); 237 dmm->value[id] = 0; 238 out: 239 KERNEL_UNLOCK(); 240 241 return (error); 242 } 243 244 void * 245 rtable_get(unsigned int rtableid, sa_family_t af) 246 { 247 struct rtmap *map; 248 void *tbl = NULL; 249 struct srp_ref sr; 250 251 if (af >= nitems(af2idx) || af2idx[af] == 0) 252 return (NULL); 253 254 map = srp_enter(&sr, &afmap[af2idx[af]]); 255 if (rtableid < map->limit) 256 tbl = map->tbl[rtableid]; 257 srp_leave(&sr); 258 259 return (tbl); 260 } 261 262 int 263 rtable_exists(unsigned int rtableid) 264 { 265 const struct domain *dp; 266 void *tbl; 267 int i; 268 269 for (i = 0; (dp = domains[i]) != NULL; i++) { 270 if (dp->dom_rtoffset == 0) 271 continue; 272 273 tbl = rtable_get(rtableid, dp->dom_family); 274 if (tbl != NULL) 275 return (1); 276 } 277 278 return (0); 279 } 280 281 int 282 rtable_empty(unsigned int rtableid) 283 { 284 const struct domain *dp; 285 int i; 286 struct art_root *tbl; 287 288 for (i = 0; (dp = domains[i]) != NULL; i++) { 289 if (dp->dom_rtoffset == 0) 290 continue; 291 292 tbl = rtable_get(rtableid, dp->dom_family); 293 if (tbl == NULL) 294 continue; 295 if (tbl->ar_root.ref != NULL) 296 return (0); 297 } 298 299 return (1); 300 } 301 302 unsigned int 303 rtable_l2(unsigned int rtableid) 304 { 305 struct dommp *dmm; 306 unsigned int rdomain = 0; 307 struct srp_ref sr; 308 309 dmm = srp_enter(&sr, &afmap[0]); 310 if (rtableid < dmm->limit) 311 rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK); 312 srp_leave(&sr); 313 314 return (rdomain); 315 } 316 317 unsigned int 318 rtable_loindex(unsigned int rtableid) 319 { 320 struct dommp *dmm; 321 unsigned int loifidx = 0; 322 struct srp_ref sr; 323 324 dmm = srp_enter(&sr, &afmap[0]); 325 if (rtableid < dmm->limit) 326 loifidx = (dmm->value[rtableid] >> RT_TABLEID_BITS); 327 srp_leave(&sr); 328 329 return (loifidx); 330 } 331 332 void 333 rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx) 334 { 335 struct dommp *dmm; 336 unsigned int value; 337 338 KERNEL_ASSERT_LOCKED(); 339 340 if (!rtable_exists(rtableid) || !rtable_exists(rdomain)) 341 return; 342 343 value = (rdomain & RT_TABLEID_MASK) | (loifidx << RT_TABLEID_BITS); 344 345 dmm = srp_get_locked(&afmap[0]); 346 dmm->value[rtableid] = value; 347 } 348 349 350 static inline uint8_t *satoaddr(struct art_root *, struct sockaddr *); 351 352 int an_match(struct art_node *, struct sockaddr *, int); 353 void rtentry_ref(void *, void *); 354 void rtentry_unref(void *, void *); 355 356 void rtable_mpath_insert(struct art_node *, struct rtentry *); 357 358 struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); 359 360 void 361 rtable_init_backend(void) 362 { 363 art_init(); 364 } 365 366 void * 367 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 368 { 369 return (art_alloc(rtableid, alen, off)); 370 } 371 372 int 373 rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src) 374 { 375 struct art_root *ar; 376 377 if ((ar = rtable_get(rtableid, af)) == NULL) 378 return (EAFNOSUPPORT); 379 380 ar->source = src; 381 382 return (0); 383 } 384 385 struct sockaddr * 386 rtable_getsource(unsigned int rtableid, int af) 387 { 388 struct art_root *ar; 389 390 ar = rtable_get(rtableid, af); 391 if (ar == NULL) 392 return (NULL); 393 394 return (ar->source); 395 } 396 397 void 398 rtable_clearsource(unsigned int rtableid, struct sockaddr *src) 399 { 400 struct sockaddr *addr; 401 402 addr = rtable_getsource(rtableid, src->sa_family); 403 if (addr && (addr->sa_len == src->sa_len)) { 404 if (memcmp(src, addr, addr->sa_len) == 0) { 405 rtable_setsource(rtableid, src->sa_family, NULL); 406 } 407 } 408 } 409 410 struct rtentry * 411 rtable_lookup(unsigned int rtableid, struct sockaddr *dst, 412 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) 413 { 414 struct art_root *ar; 415 struct art_node *an; 416 struct rtentry *rt = NULL; 417 struct srp_ref sr, nsr; 418 uint8_t *addr; 419 int plen; 420 421 ar = rtable_get(rtableid, dst->sa_family); 422 if (ar == NULL) 423 return (NULL); 424 425 addr = satoaddr(ar, dst); 426 427 /* No need for a perfect match. */ 428 if (mask == NULL) { 429 an = art_match(ar, addr, &nsr); 430 if (an == NULL) 431 goto out; 432 } else { 433 plen = rtable_satoplen(dst->sa_family, mask); 434 if (plen == -1) 435 return (NULL); 436 437 an = art_lookup(ar, addr, plen, &nsr); 438 439 /* Make sure we've got a perfect match. */ 440 if (!an_match(an, dst, plen)) 441 goto out; 442 } 443 444 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 445 if (prio != RTP_ANY && 446 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 447 continue; 448 449 if (gateway == NULL) 450 break; 451 452 if (rt->rt_gateway->sa_len == gateway->sa_len && 453 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) 454 break; 455 } 456 if (rt != NULL) 457 rtref(rt); 458 459 SRPL_LEAVE(&sr); 460 out: 461 srp_leave(&nsr); 462 463 return (rt); 464 } 465 466 struct rtentry * 467 rtable_match(unsigned int rtableid, struct sockaddr *dst, uint32_t *src) 468 { 469 struct art_root *ar; 470 struct art_node *an; 471 struct rtentry *rt = NULL; 472 struct srp_ref sr, nsr; 473 uint8_t *addr; 474 int hash; 475 476 ar = rtable_get(rtableid, dst->sa_family); 477 if (ar == NULL) 478 return (NULL); 479 480 addr = satoaddr(ar, dst); 481 482 an = art_match(ar, addr, &nsr); 483 if (an == NULL) 484 goto out; 485 486 rt = SRPL_FIRST(&sr, &an->an_rtlist); 487 rtref(rt); 488 SRPL_LEAVE(&sr); 489 490 /* Gateway selection by Hash-Threshold (RFC 2992) */ 491 if ((hash = rt_hash(rt, dst, src)) != -1) { 492 struct rtentry *mrt; 493 int threshold, npaths = 0; 494 495 KASSERT(hash <= 0xffff); 496 497 SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { 498 /* Only count nexthops with the same priority. */ 499 if (mrt->rt_priority == rt->rt_priority) 500 npaths++; 501 } 502 SRPL_LEAVE(&sr); 503 504 threshold = (0xffff / npaths) + 1; 505 506 /* 507 * we have no protection against concurrent modification of the 508 * route list attached to the node, so we won't necessarily 509 * have the same number of routes. for most modifications, 510 * we'll pick a route that we wouldn't have if we only saw the 511 * list before or after the change. if we were going to use 512 * the last available route, but it got removed, we'll hit 513 * the end of the list and then pick the first route. 514 */ 515 516 mrt = SRPL_FIRST(&sr, &an->an_rtlist); 517 while (hash > threshold && mrt != NULL) { 518 if (mrt->rt_priority == rt->rt_priority) 519 hash -= threshold; 520 mrt = SRPL_FOLLOW(&sr, mrt, rt_next); 521 } 522 523 if (mrt != NULL) { 524 rtref(mrt); 525 rtfree(rt); 526 rt = mrt; 527 } 528 SRPL_LEAVE(&sr); 529 } 530 out: 531 srp_leave(&nsr); 532 return (rt); 533 } 534 535 int 536 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 537 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio, 538 struct rtentry *rt) 539 { 540 struct rtentry *mrt; 541 struct srp_ref sr; 542 struct art_root *ar; 543 struct art_node *an, *prev; 544 uint8_t *addr; 545 int plen; 546 unsigned int rt_flags; 547 int error = 0; 548 549 ar = rtable_get(rtableid, dst->sa_family); 550 if (ar == NULL) 551 return (EAFNOSUPPORT); 552 553 addr = satoaddr(ar, dst); 554 plen = rtable_satoplen(dst->sa_family, mask); 555 if (plen == -1) 556 return (EINVAL); 557 558 rtref(rt); /* guarantee rtfree won't do anything during insert */ 559 rw_enter_write(&ar->ar_lock); 560 561 /* Do not permit exactly the same dst/mask/gw pair. */ 562 an = art_lookup(ar, addr, plen, &sr); 563 srp_leave(&sr); /* an can't go away while we have the lock */ 564 if (an_match(an, dst, plen)) { 565 struct rtentry *mrt; 566 int mpathok = ISSET(rt->rt_flags, RTF_MPATH); 567 568 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 569 if (prio != RTP_ANY && 570 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 571 continue; 572 573 if (!mpathok || 574 (mrt->rt_gateway->sa_len == gateway->sa_len && 575 !memcmp(mrt->rt_gateway, gateway, gateway->sa_len))){ 576 error = EEXIST; 577 goto leave; 578 } 579 } 580 } 581 582 an = art_get(dst, plen); 583 if (an == NULL) { 584 error = ENOBUFS; 585 goto leave; 586 } 587 588 /* prepare for immediate operation if insert succeeds */ 589 rt_flags = rt->rt_flags; 590 rt->rt_flags &= ~RTF_MPATH; 591 rt->rt_dest = dst; 592 rt->rt_plen = plen; 593 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 594 595 prev = art_insert(ar, an, addr, plen); 596 if (prev != an) { 597 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 598 rt_next); 599 rt->rt_flags = rt_flags; 600 art_put(an); 601 602 if (prev == NULL) { 603 error = ESRCH; 604 goto leave; 605 } 606 607 an = prev; 608 609 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 610 KASSERT(mrt != NULL); 611 KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); 612 613 /* 614 * An ART node with the same destination/netmask already 615 * exists, MPATH conflict must have been already checked. 616 */ 617 if (rt->rt_flags & RTF_MPATH) { 618 /* 619 * Only keep the RTF_MPATH flag if two routes have 620 * the same gateway. 621 */ 622 rt->rt_flags &= ~RTF_MPATH; 623 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 624 if (mrt->rt_priority == prio) { 625 mrt->rt_flags |= RTF_MPATH; 626 rt->rt_flags |= RTF_MPATH; 627 } 628 } 629 } 630 631 /* Put newly inserted entry at the right place. */ 632 rtable_mpath_insert(an, rt); 633 } 634 leave: 635 rw_exit_write(&ar->ar_lock); 636 rtfree(rt); 637 return (error); 638 } 639 640 int 641 rtable_delete(unsigned int rtableid, struct sockaddr *dst, 642 struct sockaddr *mask, struct rtentry *rt) 643 { 644 struct art_root *ar; 645 struct art_node *an; 646 struct srp_ref sr; 647 uint8_t *addr; 648 int plen; 649 struct rtentry *mrt; 650 int npaths = 0; 651 int error = 0; 652 653 ar = rtable_get(rtableid, dst->sa_family); 654 if (ar == NULL) 655 return (EAFNOSUPPORT); 656 657 addr = satoaddr(ar, dst); 658 plen = rtable_satoplen(dst->sa_family, mask); 659 if (plen == -1) 660 return (EINVAL); 661 662 rtref(rt); /* guarantee rtfree won't do anything under ar_lock */ 663 rw_enter_write(&ar->ar_lock); 664 an = art_lookup(ar, addr, plen, &sr); 665 srp_leave(&sr); /* an can't go away while we have the lock */ 666 667 /* Make sure we've got a perfect match. */ 668 if (!an_match(an, dst, plen)) { 669 error = ESRCH; 670 goto leave; 671 } 672 673 /* 674 * If other multipath route entries are still attached to 675 * this ART node we only have to unlink it. 676 */ 677 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) 678 npaths++; 679 680 if (npaths > 1) { 681 KASSERT(rt->rt_refcnt >= 1); 682 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 683 rt_next); 684 685 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 686 if (npaths == 2) 687 mrt->rt_flags &= ~RTF_MPATH; 688 689 goto leave; 690 } 691 692 if (art_delete(ar, an, addr, plen) == NULL) 693 panic("art_delete failed to find node %p", an); 694 695 KASSERT(rt->rt_refcnt >= 1); 696 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); 697 art_put(an); 698 699 leave: 700 rw_exit_write(&ar->ar_lock); 701 rtfree(rt); 702 703 return (error); 704 } 705 706 struct rtable_walk_cookie { 707 int (*rwc_func)(struct rtentry *, void *, unsigned int); 708 void *rwc_arg; 709 struct rtentry **rwc_prt; 710 unsigned int rwc_rid; 711 }; 712 713 /* 714 * Helper for rtable_walk to keep the ART code free from any "struct rtentry". 715 */ 716 int 717 rtable_walk_helper(struct art_node *an, void *xrwc) 718 { 719 struct srp_ref sr; 720 struct rtable_walk_cookie *rwc = xrwc; 721 struct rtentry *rt; 722 int error = 0; 723 724 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 725 error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid); 726 if (error != 0) 727 break; 728 } 729 if (rwc->rwc_prt != NULL && rt != NULL) { 730 rtref(rt); 731 *rwc->rwc_prt = rt; 732 } 733 SRPL_LEAVE(&sr); 734 735 return (error); 736 } 737 738 int 739 rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt, 740 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 741 { 742 struct art_root *ar; 743 struct rtable_walk_cookie rwc; 744 int error; 745 746 ar = rtable_get(rtableid, af); 747 if (ar == NULL) 748 return (EAFNOSUPPORT); 749 750 rwc.rwc_func = func; 751 rwc.rwc_arg = arg; 752 rwc.rwc_prt = prt; 753 rwc.rwc_rid = rtableid; 754 755 error = art_walk(ar, rtable_walk_helper, &rwc); 756 757 return (error); 758 } 759 760 struct rtentry * 761 rtable_iterate(struct rtentry *rt0) 762 { 763 struct rtentry *rt = NULL; 764 struct srp_ref sr; 765 766 rt = SRPL_NEXT(&sr, rt0, rt_next); 767 if (rt != NULL) 768 rtref(rt); 769 SRPL_LEAVE(&sr); 770 rtfree(rt0); 771 return (rt); 772 } 773 774 int 775 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 776 { 777 return (1); 778 } 779 780 int 781 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 782 int plen, uint8_t prio, struct rtentry *rt) 783 { 784 struct art_root *ar; 785 struct art_node *an; 786 struct srp_ref sr; 787 uint8_t *addr; 788 int error = 0; 789 790 ar = rtable_get(rtableid, dst->sa_family); 791 if (ar == NULL) 792 return (EAFNOSUPPORT); 793 794 addr = satoaddr(ar, dst); 795 796 rw_enter_write(&ar->ar_lock); 797 an = art_lookup(ar, addr, plen, &sr); 798 srp_leave(&sr); /* an can't go away while we have the lock */ 799 800 /* Make sure we've got a perfect match. */ 801 if (!an_match(an, dst, plen)) { 802 error = ESRCH; 803 } else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt && 804 SRPL_NEXT_LOCKED(rt, rt_next) == NULL) { 805 /* 806 * If there's only one entry on the list do not go 807 * through an insert/remove cycle. This is done to 808 * guarantee that ``an->an_rtlist'' is never empty 809 * when a node is in the tree. 810 */ 811 rt->rt_priority = prio; 812 } else { 813 rtref(rt); /* keep rt alive in between remove and insert */ 814 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, 815 rt, rtentry, rt_next); 816 rt->rt_priority = prio; 817 rtable_mpath_insert(an, rt); 818 rtfree(rt); 819 error = EAGAIN; 820 } 821 rw_exit_write(&ar->ar_lock); 822 823 return (error); 824 } 825 826 void 827 rtable_mpath_insert(struct art_node *an, struct rtentry *rt) 828 { 829 struct rtentry *mrt, *prt = NULL; 830 uint8_t prio = rt->rt_priority; 831 832 if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) { 833 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 834 return; 835 } 836 837 /* Iterate until we find the route to be placed after ``rt''. */ 838 while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) { 839 prt = mrt; 840 mrt = SRPL_NEXT_LOCKED(mrt, rt_next); 841 } 842 843 if (mrt->rt_priority <= prio) { 844 SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); 845 } else if (prt != NULL) { 846 SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next); 847 } else { 848 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 849 } 850 } 851 852 /* 853 * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise. 854 */ 855 int 856 an_match(struct art_node *an, struct sockaddr *dst, int plen) 857 { 858 struct rtentry *rt; 859 struct srp_ref sr; 860 int match; 861 862 if (an == NULL || an->an_plen != plen) 863 return (0); 864 865 rt = SRPL_FIRST(&sr, &an->an_rtlist); 866 match = (memcmp(rt->rt_dest, dst, dst->sa_len) == 0); 867 SRPL_LEAVE(&sr); 868 869 return (match); 870 } 871 872 void 873 rtentry_ref(void *null, void *xrt) 874 { 875 struct rtentry *rt = xrt; 876 877 rtref(rt); 878 } 879 880 void 881 rtentry_unref(void *null, void *xrt) 882 { 883 struct rtentry *rt = xrt; 884 885 rtfree(rt); 886 } 887 888 /* 889 * Return a pointer to the address (key). This is an heritage from the 890 * BSD radix tree needed to skip the non-address fields from the flavor 891 * of "struct sockaddr" used by this routing table. 892 */ 893 static inline uint8_t * 894 satoaddr(struct art_root *at, struct sockaddr *sa) 895 { 896 return (((uint8_t *)sa) + at->ar_off); 897 } 898 899 /* 900 * Return the prefix length of a mask. 901 */ 902 int 903 rtable_satoplen(sa_family_t af, struct sockaddr *mask) 904 { 905 const struct domain *dp; 906 uint8_t *ap, *ep; 907 int mlen, plen = 0; 908 int i; 909 910 for (i = 0; (dp = domains[i]) != NULL; i++) { 911 if (dp->dom_rtoffset == 0) 912 continue; 913 914 if (af == dp->dom_family) 915 break; 916 } 917 if (dp == NULL) 918 return (-1); 919 920 /* Host route */ 921 if (mask == NULL) 922 return (dp->dom_maxplen); 923 924 mlen = mask->sa_len; 925 926 /* Default route */ 927 if (mlen == 0) 928 return (0); 929 930 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset; 931 ep = (uint8_t *)((uint8_t *)mask) + mlen; 932 if (ap > ep) 933 return (-1); 934 935 /* Trim trailing zeroes. */ 936 while (ap < ep && ep[-1] == 0) 937 ep--; 938 939 if (ap == ep) 940 return (0); 941 942 /* "Beauty" adapted from sbin/route/show.c ... */ 943 while (ap < ep) { 944 switch (*ap++) { 945 case 0xff: 946 plen += 8; 947 break; 948 case 0xfe: 949 plen += 7; 950 goto out; 951 case 0xfc: 952 plen += 6; 953 goto out; 954 case 0xf8: 955 plen += 5; 956 goto out; 957 case 0xf0: 958 plen += 4; 959 goto out; 960 case 0xe0: 961 plen += 3; 962 goto out; 963 case 0xc0: 964 plen += 2; 965 goto out; 966 case 0x80: 967 plen += 1; 968 goto out; 969 default: 970 /* Non contiguous mask. */ 971 return (-1); 972 } 973 } 974 975 out: 976 if (plen > dp->dom_maxplen || ap != ep) 977 return -1; 978 979 return (plen); 980 } 981