1 /* $OpenBSD: rtable.c,v 1.72 2020/11/07 09:51:40 denis Exp $ */ 2 3 /* 4 * Copyright (c) 2014-2016 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #ifndef _KERNEL 20 #include "kern_compat.h" 21 #else 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/socket.h> 25 #include <sys/malloc.h> 26 #include <sys/queue.h> 27 #include <sys/domain.h> 28 #include <sys/srp.h> 29 #endif 30 31 #include <net/rtable.h> 32 #include <net/route.h> 33 34 /* 35 * Structures used by rtable_get() to retrieve the corresponding 36 * routing table for a given pair of ``af'' and ``rtableid''. 37 * 38 * Note that once allocated routing table heads are never freed. 39 * This way we do not need to reference count them. 40 * 41 * afmap rtmap/dommp 42 * ----------- --------- ----- 43 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index) 44 * ----------- --------- ----- to rdomain/loopback (=value). 45 * | AF_INET |. 46 * ----------- `. .---------. .---------. 47 * ... `----> | rtable0 | ... | rtableN | Array of pointers for 48 * ----------- '---------' '---------' IPv4 routing tables 49 * | AF_MPLS | indexed by ``rtableid''. 50 * ----------- 51 */ 52 struct srp *afmap; 53 uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */ 54 uint8_t af2idx_max; 55 56 /* Array of routing table pointers. */ 57 struct rtmap { 58 unsigned int limit; 59 void **tbl; 60 }; 61 62 /* 63 * Array of rtableid -> rdomain mapping. 64 * 65 * Only used for the first index as describbed above. 66 */ 67 struct dommp { 68 unsigned int limit; 69 /* 70 * Array to get the routing domain and loopback interface related to 71 * a routing table. Format: 72 * 73 * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain 74 */ 75 unsigned int *value; 76 }; 77 78 unsigned int rtmap_limit = 0; 79 80 void rtmap_init(void); 81 void rtmap_grow(unsigned int, sa_family_t); 82 void rtmap_dtor(void *, void *); 83 84 struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); 85 86 void rtable_init_backend(void); 87 void *rtable_alloc(unsigned int, unsigned int, unsigned int); 88 void *rtable_get(unsigned int, sa_family_t); 89 90 void 91 rtmap_init(void) 92 { 93 struct domain *dp; 94 int i; 95 96 /* Start with a single table for every domain that requires it. */ 97 for (i = 0; (dp = domains[i]) != NULL; i++) { 98 if (dp->dom_rtoffset == 0) 99 continue; 100 101 rtmap_grow(1, dp->dom_family); 102 } 103 104 /* Initialize the rtableid->rdomain mapping table. */ 105 rtmap_grow(1, 0); 106 107 rtmap_limit = 1; 108 } 109 110 /* 111 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''. 112 */ 113 void 114 rtmap_grow(unsigned int nlimit, sa_family_t af) 115 { 116 struct rtmap *map, *nmap; 117 int i; 118 119 KERNEL_ASSERT_LOCKED(); 120 121 KASSERT(nlimit > rtmap_limit); 122 123 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK); 124 nmap->limit = nlimit; 125 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE, 126 M_WAITOK|M_ZERO); 127 128 map = srp_get_locked(&afmap[af2idx[af]]); 129 if (map != NULL) { 130 KASSERT(map->limit == rtmap_limit); 131 132 for (i = 0; i < map->limit; i++) 133 nmap->tbl[i] = map->tbl[i]; 134 } 135 136 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap); 137 } 138 139 void 140 rtmap_dtor(void *null, void *xmap) 141 { 142 struct rtmap *map = xmap; 143 144 /* 145 * doesnt need to be serialized since this is the last reference 146 * to this map. there's nothing to race against. 147 */ 148 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl)); 149 free(map, M_RTABLE, sizeof(*map)); 150 } 151 152 void 153 rtable_init(void) 154 { 155 struct domain *dp; 156 int i; 157 158 KASSERT(sizeof(struct rtmap) == sizeof(struct dommp)); 159 160 /* We use index 0 for the rtable/rdomain map. */ 161 af2idx_max = 1; 162 memset(af2idx, 0, sizeof(af2idx)); 163 164 /* 165 * Compute the maximum supported key length in case the routing 166 * table backend needs it. 167 */ 168 for (i = 0; (dp = domains[i]) != NULL; i++) { 169 if (dp->dom_rtoffset == 0) 170 continue; 171 172 af2idx[dp->dom_family] = af2idx_max++; 173 } 174 rtable_init_backend(); 175 176 /* 177 * Allocate AF-to-id table now that we now how many AFs this 178 * kernel supports. 179 */ 180 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE, 181 M_WAITOK|M_ZERO); 182 183 rtmap_init(); 184 185 if (rtable_add(0) != 0) 186 panic("unable to create default routing table"); 187 } 188 189 int 190 rtable_add(unsigned int id) 191 { 192 struct domain *dp; 193 void *tbl; 194 struct rtmap *map; 195 struct dommp *dmm; 196 sa_family_t af; 197 unsigned int off, alen; 198 int i; 199 200 KERNEL_ASSERT_LOCKED(); 201 202 if (id > RT_TABLEID_MAX) 203 return (EINVAL); 204 205 if (rtable_exists(id)) 206 return (EEXIST); 207 208 for (i = 0; (dp = domains[i]) != NULL; i++) { 209 if (dp->dom_rtoffset == 0) 210 continue; 211 212 af = dp->dom_family; 213 off = dp->dom_rtoffset; 214 alen = dp->dom_maxplen; 215 216 if (id >= rtmap_limit) 217 rtmap_grow(id + 1, af); 218 219 tbl = rtable_alloc(id, alen, off); 220 if (tbl == NULL) 221 return (ENOMEM); 222 223 map = srp_get_locked(&afmap[af2idx[af]]); 224 map->tbl[id] = tbl; 225 } 226 227 /* Reflect possible growth. */ 228 if (id >= rtmap_limit) { 229 rtmap_grow(id + 1, 0); 230 rtmap_limit = id + 1; 231 } 232 233 /* Use main rtable/rdomain by default. */ 234 dmm = srp_get_locked(&afmap[0]); 235 dmm->value[id] = 0; 236 237 return (0); 238 } 239 240 void * 241 rtable_get(unsigned int rtableid, sa_family_t af) 242 { 243 struct rtmap *map; 244 void *tbl = NULL; 245 struct srp_ref sr; 246 247 if (af >= nitems(af2idx) || af2idx[af] == 0) 248 return (NULL); 249 250 map = srp_enter(&sr, &afmap[af2idx[af]]); 251 if (rtableid < map->limit) 252 tbl = map->tbl[rtableid]; 253 srp_leave(&sr); 254 255 return (tbl); 256 } 257 258 int 259 rtable_exists(unsigned int rtableid) 260 { 261 struct domain *dp; 262 void *tbl; 263 int i; 264 265 for (i = 0; (dp = domains[i]) != NULL; i++) { 266 if (dp->dom_rtoffset == 0) 267 continue; 268 269 tbl = rtable_get(rtableid, dp->dom_family); 270 if (tbl != NULL) 271 return (1); 272 } 273 274 return (0); 275 } 276 277 int 278 rtable_empty(unsigned int rtableid) 279 { 280 struct domain *dp; 281 int i; 282 struct art_root *tbl; 283 284 for (i = 0; (dp = domains[i]) != NULL; i++) { 285 if (dp->dom_rtoffset == 0) 286 continue; 287 288 tbl = rtable_get(rtableid, dp->dom_family); 289 if (tbl == NULL) 290 continue; 291 if (tbl->ar_root.ref != NULL) 292 return (0); 293 } 294 295 return (1); 296 } 297 298 unsigned int 299 rtable_l2(unsigned int rtableid) 300 { 301 struct dommp *dmm; 302 unsigned int rdomain = 0; 303 struct srp_ref sr; 304 305 dmm = srp_enter(&sr, &afmap[0]); 306 if (rtableid < dmm->limit) 307 rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK); 308 srp_leave(&sr); 309 310 return (rdomain); 311 } 312 313 unsigned int 314 rtable_loindex(unsigned int rtableid) 315 { 316 struct dommp *dmm; 317 unsigned int loifidx = 0; 318 struct srp_ref sr; 319 320 dmm = srp_enter(&sr, &afmap[0]); 321 if (rtableid < dmm->limit) 322 loifidx = (dmm->value[rtableid] >> RT_TABLEID_BITS); 323 srp_leave(&sr); 324 325 return (loifidx); 326 } 327 328 void 329 rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx) 330 { 331 struct dommp *dmm; 332 unsigned int value; 333 334 KERNEL_ASSERT_LOCKED(); 335 336 if (!rtable_exists(rtableid) || !rtable_exists(rdomain)) 337 return; 338 339 value = (rdomain & RT_TABLEID_MASK) | (loifidx << RT_TABLEID_BITS); 340 341 dmm = srp_get_locked(&afmap[0]); 342 dmm->value[rtableid] = value; 343 } 344 345 346 static inline uint8_t *satoaddr(struct art_root *, struct sockaddr *); 347 348 int an_match(struct art_node *, struct sockaddr *, int); 349 void rtentry_ref(void *, void *); 350 void rtentry_unref(void *, void *); 351 352 void rtable_mpath_insert(struct art_node *, struct rtentry *); 353 354 struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); 355 356 void 357 rtable_init_backend(void) 358 { 359 art_init(); 360 } 361 362 void * 363 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 364 { 365 return (art_alloc(rtableid, alen, off)); 366 } 367 368 int 369 rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src) 370 { 371 struct art_root *ar; 372 373 if ((ar = rtable_get(rtableid, af)) == NULL) 374 return (EAFNOSUPPORT); 375 376 ar->source = src; 377 378 return (0); 379 } 380 381 struct sockaddr * 382 rtable_getsource(unsigned int rtableid, int af) 383 { 384 struct art_root *ar; 385 386 ar = rtable_get(rtableid, af); 387 if (ar == NULL) 388 return (NULL); 389 390 return (ar->source); 391 } 392 393 void 394 rtable_clearsource(unsigned int rtableid, struct sockaddr *src) 395 { 396 struct sockaddr *addr; 397 398 addr = rtable_getsource(rtableid, src->sa_family); 399 if (addr && (addr->sa_len == src->sa_len)) { 400 if (memcmp(src, addr, addr->sa_len) == 0) { 401 rtable_setsource(rtableid, src->sa_family, NULL); 402 } 403 } 404 } 405 406 struct rtentry * 407 rtable_lookup(unsigned int rtableid, struct sockaddr *dst, 408 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) 409 { 410 struct art_root *ar; 411 struct art_node *an; 412 struct rtentry *rt = NULL; 413 struct srp_ref sr, nsr; 414 uint8_t *addr; 415 int plen; 416 417 ar = rtable_get(rtableid, dst->sa_family); 418 if (ar == NULL) 419 return (NULL); 420 421 addr = satoaddr(ar, dst); 422 423 /* No need for a perfect match. */ 424 if (mask == NULL) { 425 an = art_match(ar, addr, &nsr); 426 if (an == NULL) 427 goto out; 428 } else { 429 plen = rtable_satoplen(dst->sa_family, mask); 430 if (plen == -1) 431 return (NULL); 432 433 an = art_lookup(ar, addr, plen, &nsr); 434 435 /* Make sure we've got a perfect match. */ 436 if (!an_match(an, dst, plen)) 437 goto out; 438 } 439 440 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 441 if (prio != RTP_ANY && 442 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 443 continue; 444 445 if (gateway == NULL) 446 break; 447 448 if (rt->rt_gateway->sa_len == gateway->sa_len && 449 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) 450 break; 451 } 452 if (rt != NULL) 453 rtref(rt); 454 455 SRPL_LEAVE(&sr); 456 out: 457 srp_leave(&nsr); 458 459 return (rt); 460 } 461 462 struct rtentry * 463 rtable_match(unsigned int rtableid, struct sockaddr *dst, uint32_t *src) 464 { 465 struct art_root *ar; 466 struct art_node *an; 467 struct rtentry *rt = NULL; 468 struct srp_ref sr, nsr; 469 uint8_t *addr; 470 int hash; 471 472 ar = rtable_get(rtableid, dst->sa_family); 473 if (ar == NULL) 474 return (NULL); 475 476 addr = satoaddr(ar, dst); 477 478 an = art_match(ar, addr, &nsr); 479 if (an == NULL) 480 goto out; 481 482 rt = SRPL_FIRST(&sr, &an->an_rtlist); 483 rtref(rt); 484 SRPL_LEAVE(&sr); 485 486 /* Gateway selection by Hash-Threshold (RFC 2992) */ 487 if ((hash = rt_hash(rt, dst, src)) != -1) { 488 struct rtentry *mrt; 489 int threshold, npaths = 0; 490 491 KASSERT(hash <= 0xffff); 492 493 SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { 494 /* Only count nexthops with the same priority. */ 495 if (mrt->rt_priority == rt->rt_priority) 496 npaths++; 497 } 498 SRPL_LEAVE(&sr); 499 500 threshold = (0xffff / npaths) + 1; 501 502 /* 503 * we have no protection against concurrent modification of the 504 * route list attached to the node, so we won't necessarily 505 * have the same number of routes. for most modifications, 506 * we'll pick a route that we wouldn't have if we only saw the 507 * list before or after the change. if we were going to use 508 * the last available route, but it got removed, we'll hit 509 * the end of the list and then pick the first route. 510 */ 511 512 mrt = SRPL_FIRST(&sr, &an->an_rtlist); 513 while (hash > threshold && mrt != NULL) { 514 if (mrt->rt_priority == rt->rt_priority) 515 hash -= threshold; 516 mrt = SRPL_FOLLOW(&sr, mrt, rt_next); 517 } 518 519 if (mrt != NULL) { 520 rtref(mrt); 521 rtfree(rt); 522 rt = mrt; 523 } 524 SRPL_LEAVE(&sr); 525 } 526 out: 527 srp_leave(&nsr); 528 return (rt); 529 } 530 531 int 532 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 533 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio, 534 struct rtentry *rt) 535 { 536 struct rtentry *mrt; 537 struct srp_ref sr; 538 struct art_root *ar; 539 struct art_node *an, *prev; 540 uint8_t *addr; 541 int plen; 542 unsigned int rt_flags; 543 int error = 0; 544 545 ar = rtable_get(rtableid, dst->sa_family); 546 if (ar == NULL) 547 return (EAFNOSUPPORT); 548 549 addr = satoaddr(ar, dst); 550 plen = rtable_satoplen(dst->sa_family, mask); 551 if (plen == -1) 552 return (EINVAL); 553 554 rtref(rt); /* guarantee rtfree won't do anything during insert */ 555 rw_enter_write(&ar->ar_lock); 556 557 /* Do not permit exactly the same dst/mask/gw pair. */ 558 an = art_lookup(ar, addr, plen, &sr); 559 srp_leave(&sr); /* an can't go away while we have the lock */ 560 if (an_match(an, dst, plen)) { 561 struct rtentry *mrt; 562 int mpathok = ISSET(rt->rt_flags, RTF_MPATH); 563 564 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 565 if (prio != RTP_ANY && 566 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 567 continue; 568 569 if (!mpathok || 570 (mrt->rt_gateway->sa_len == gateway->sa_len && 571 !memcmp(mrt->rt_gateway, gateway, gateway->sa_len))){ 572 error = EEXIST; 573 goto leave; 574 } 575 } 576 } 577 578 an = art_get(dst, plen); 579 if (an == NULL) { 580 error = ENOBUFS; 581 goto leave; 582 } 583 584 /* prepare for immediate operation if insert succeeds */ 585 rt_flags = rt->rt_flags; 586 rt->rt_flags &= ~RTF_MPATH; 587 rt->rt_dest = dst; 588 rt->rt_plen = plen; 589 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 590 591 prev = art_insert(ar, an, addr, plen); 592 if (prev != an) { 593 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 594 rt_next); 595 rt->rt_flags = rt_flags; 596 art_put(an); 597 598 if (prev == NULL) { 599 error = ESRCH; 600 goto leave; 601 } 602 603 an = prev; 604 605 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 606 KASSERT(mrt != NULL); 607 KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); 608 609 /* 610 * An ART node with the same destination/netmask already 611 * exists, MPATH conflict must have been already checked. 612 */ 613 if (rt->rt_flags & RTF_MPATH) { 614 /* 615 * Only keep the RTF_MPATH flag if two routes have 616 * the same gateway. 617 */ 618 rt->rt_flags &= ~RTF_MPATH; 619 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 620 if (mrt->rt_priority == prio) { 621 mrt->rt_flags |= RTF_MPATH; 622 rt->rt_flags |= RTF_MPATH; 623 } 624 } 625 } 626 627 /* Put newly inserted entry at the right place. */ 628 rtable_mpath_insert(an, rt); 629 } 630 leave: 631 rw_exit_write(&ar->ar_lock); 632 rtfree(rt); 633 return (error); 634 } 635 636 int 637 rtable_delete(unsigned int rtableid, struct sockaddr *dst, 638 struct sockaddr *mask, struct rtentry *rt) 639 { 640 struct art_root *ar; 641 struct art_node *an; 642 struct srp_ref sr; 643 uint8_t *addr; 644 int plen; 645 struct rtentry *mrt; 646 int npaths = 0; 647 int error = 0; 648 649 ar = rtable_get(rtableid, dst->sa_family); 650 if (ar == NULL) 651 return (EAFNOSUPPORT); 652 653 addr = satoaddr(ar, dst); 654 plen = rtable_satoplen(dst->sa_family, mask); 655 if (plen == -1) 656 return (EINVAL); 657 658 rtref(rt); /* guarantee rtfree won't do anything under ar_lock */ 659 rw_enter_write(&ar->ar_lock); 660 an = art_lookup(ar, addr, plen, &sr); 661 srp_leave(&sr); /* an can't go away while we have the lock */ 662 663 /* Make sure we've got a perfect match. */ 664 if (!an_match(an, dst, plen)) { 665 error = ESRCH; 666 goto leave; 667 } 668 669 /* 670 * If other multipath route entries are still attached to 671 * this ART node we only have to unlink it. 672 */ 673 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) 674 npaths++; 675 676 if (npaths > 1) { 677 KASSERT(rt->rt_refcnt >= 1); 678 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 679 rt_next); 680 681 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 682 if (npaths == 2) 683 mrt->rt_flags &= ~RTF_MPATH; 684 685 goto leave; 686 } 687 688 if (art_delete(ar, an, addr, plen) == NULL) 689 panic("art_delete failed to find node %p", an); 690 691 KASSERT(rt->rt_refcnt >= 1); 692 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); 693 art_put(an); 694 695 leave: 696 rw_exit_write(&ar->ar_lock); 697 rtfree(rt); 698 699 return (error); 700 } 701 702 struct rtable_walk_cookie { 703 int (*rwc_func)(struct rtentry *, void *, unsigned int); 704 void *rwc_arg; 705 struct rtentry **rwc_prt; 706 unsigned int rwc_rid; 707 }; 708 709 /* 710 * Helper for rtable_walk to keep the ART code free from any "struct rtentry". 711 */ 712 int 713 rtable_walk_helper(struct art_node *an, void *xrwc) 714 { 715 struct srp_ref sr; 716 struct rtable_walk_cookie *rwc = xrwc; 717 struct rtentry *rt; 718 int error = 0; 719 720 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 721 error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid); 722 if (error != 0) 723 break; 724 } 725 if (rwc->rwc_prt != NULL && rt != NULL) { 726 rtref(rt); 727 *rwc->rwc_prt = rt; 728 } 729 SRPL_LEAVE(&sr); 730 731 return (error); 732 } 733 734 int 735 rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt, 736 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 737 { 738 struct art_root *ar; 739 struct rtable_walk_cookie rwc; 740 int error; 741 742 ar = rtable_get(rtableid, af); 743 if (ar == NULL) 744 return (EAFNOSUPPORT); 745 746 rwc.rwc_func = func; 747 rwc.rwc_arg = arg; 748 rwc.rwc_prt = prt; 749 rwc.rwc_rid = rtableid; 750 751 error = art_walk(ar, rtable_walk_helper, &rwc); 752 753 return (error); 754 } 755 756 struct rtentry * 757 rtable_iterate(struct rtentry *rt0) 758 { 759 struct rtentry *rt = NULL; 760 struct srp_ref sr; 761 762 rt = SRPL_NEXT(&sr, rt0, rt_next); 763 if (rt != NULL) 764 rtref(rt); 765 SRPL_LEAVE(&sr); 766 rtfree(rt0); 767 return (rt); 768 } 769 770 int 771 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 772 { 773 return (1); 774 } 775 776 int 777 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 778 int plen, uint8_t prio, struct rtentry *rt) 779 { 780 struct art_root *ar; 781 struct art_node *an; 782 struct srp_ref sr; 783 uint8_t *addr; 784 int error = 0; 785 786 ar = rtable_get(rtableid, dst->sa_family); 787 if (ar == NULL) 788 return (EAFNOSUPPORT); 789 790 addr = satoaddr(ar, dst); 791 792 rw_enter_write(&ar->ar_lock); 793 an = art_lookup(ar, addr, plen, &sr); 794 srp_leave(&sr); /* an can't go away while we have the lock */ 795 796 /* Make sure we've got a perfect match. */ 797 if (!an_match(an, dst, plen)) { 798 error = ESRCH; 799 } else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt && 800 SRPL_NEXT_LOCKED(rt, rt_next) == NULL) { 801 /* 802 * If there's only one entry on the list do not go 803 * through an insert/remove cycle. This is done to 804 * guarantee that ``an->an_rtlist'' is never empty 805 * when a node is in the tree. 806 */ 807 rt->rt_priority = prio; 808 } else { 809 rtref(rt); /* keep rt alive in between remove and insert */ 810 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, 811 rt, rtentry, rt_next); 812 rt->rt_priority = prio; 813 rtable_mpath_insert(an, rt); 814 rtfree(rt); 815 error = EAGAIN; 816 } 817 rw_exit_write(&ar->ar_lock); 818 819 return (error); 820 } 821 822 void 823 rtable_mpath_insert(struct art_node *an, struct rtentry *rt) 824 { 825 struct rtentry *mrt, *prt = NULL; 826 uint8_t prio = rt->rt_priority; 827 828 if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) { 829 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 830 return; 831 } 832 833 /* Iterate until we find the route to be placed after ``rt''. */ 834 while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) { 835 prt = mrt; 836 mrt = SRPL_NEXT_LOCKED(mrt, rt_next); 837 } 838 839 if (mrt->rt_priority <= prio) { 840 SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); 841 } else if (prt != NULL) { 842 SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next); 843 } else { 844 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 845 } 846 } 847 848 /* 849 * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise. 850 */ 851 int 852 an_match(struct art_node *an, struct sockaddr *dst, int plen) 853 { 854 struct rtentry *rt; 855 struct srp_ref sr; 856 int match; 857 858 if (an == NULL || an->an_plen != plen) 859 return (0); 860 861 rt = SRPL_FIRST(&sr, &an->an_rtlist); 862 match = (memcmp(rt->rt_dest, dst, dst->sa_len) == 0); 863 SRPL_LEAVE(&sr); 864 865 return (match); 866 } 867 868 void 869 rtentry_ref(void *null, void *xrt) 870 { 871 struct rtentry *rt = xrt; 872 873 rtref(rt); 874 } 875 876 void 877 rtentry_unref(void *null, void *xrt) 878 { 879 struct rtentry *rt = xrt; 880 881 rtfree(rt); 882 } 883 884 /* 885 * Return a pointer to the address (key). This is an heritage from the 886 * BSD radix tree needed to skip the non-address fields from the flavor 887 * of "struct sockaddr" used by this routing table. 888 */ 889 static inline uint8_t * 890 satoaddr(struct art_root *at, struct sockaddr *sa) 891 { 892 return (((uint8_t *)sa) + at->ar_off); 893 } 894 895 /* 896 * Return the prefix length of a mask. 897 */ 898 int 899 rtable_satoplen(sa_family_t af, struct sockaddr *mask) 900 { 901 struct domain *dp; 902 uint8_t *ap, *ep; 903 int mlen, plen = 0; 904 int i; 905 906 for (i = 0; (dp = domains[i]) != NULL; i++) { 907 if (dp->dom_rtoffset == 0) 908 continue; 909 910 if (af == dp->dom_family) 911 break; 912 } 913 if (dp == NULL) 914 return (-1); 915 916 /* Host route */ 917 if (mask == NULL) 918 return (dp->dom_maxplen); 919 920 mlen = mask->sa_len; 921 922 /* Default route */ 923 if (mlen == 0) 924 return (0); 925 926 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset; 927 ep = (uint8_t *)((uint8_t *)mask) + mlen; 928 if (ap > ep) 929 return (-1); 930 931 /* Trim trailing zeroes. */ 932 while (ap < ep && ep[-1] == 0) 933 ep--; 934 935 if (ap == ep) 936 return (0); 937 938 /* "Beauty" adapted from sbin/route/show.c ... */ 939 while (ap < ep) { 940 switch (*ap++) { 941 case 0xff: 942 plen += 8; 943 break; 944 case 0xfe: 945 plen += 7; 946 goto out; 947 case 0xfc: 948 plen += 6; 949 goto out; 950 case 0xf8: 951 plen += 5; 952 goto out; 953 case 0xf0: 954 plen += 4; 955 goto out; 956 case 0xe0: 957 plen += 3; 958 goto out; 959 case 0xc0: 960 plen += 2; 961 goto out; 962 case 0x80: 963 plen += 1; 964 goto out; 965 default: 966 /* Non contiguous mask. */ 967 return (-1); 968 } 969 } 970 971 out: 972 if (plen > dp->dom_maxplen || ap != ep) 973 return -1; 974 975 return (plen); 976 } 977