1 /* $OpenBSD: rtable.c,v 1.58 2017/02/28 09:50:13 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2014-2016 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #ifndef _KERNEL 20 #include "kern_compat.h" 21 #else 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/socket.h> 25 #include <sys/malloc.h> 26 #include <sys/queue.h> 27 #include <sys/domain.h> 28 #include <sys/srp.h> 29 #endif 30 31 #include <net/rtable.h> 32 #include <net/route.h> 33 34 /* 35 * Structures used by rtable_get() to retrieve the corresponding 36 * routing table for a given pair of ``af'' and ``rtableid''. 37 * 38 * Note that once allocated routing table heads are never freed. 39 * This way we do not need to reference count them. 40 * 41 * afmap rtmap/dommp 42 * ----------- --------- ----- 43 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index) 44 * ----------- --------- ----- to rdomain/loopback (=value). 45 * | AF_INET |. 46 * ----------- `. .---------. .---------. 47 * ... `----> | rtable0 | ... | rtableN | Array of pointers for 48 * ----------- '---------' '---------' IPv4 routing tables 49 * | AF_MPLS | indexed by ``rtableid''. 50 * ----------- 51 */ 52 struct srp *afmap; 53 uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */ 54 uint8_t af2idx_max; 55 56 /* Array of routing table pointers. */ 57 struct rtmap { 58 unsigned int limit; 59 void **tbl; 60 }; 61 62 /* 63 * Array of rtableid -> rdomain mapping. 64 * 65 * Only used for the first index as describbed above. 66 */ 67 struct dommp { 68 unsigned int limit; 69 /* 70 * Array to get the routing domain and loopback interface related to 71 * a routing table. Format: 72 * 73 * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain 74 */ 75 unsigned int *value; 76 }; 77 78 unsigned int rtmap_limit = 0; 79 80 void rtmap_init(void); 81 void rtmap_grow(unsigned int, sa_family_t); 82 void rtmap_dtor(void *, void *); 83 84 struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); 85 86 void rtable_init_backend(unsigned int); 87 void *rtable_alloc(unsigned int, unsigned int, unsigned int); 88 void *rtable_get(unsigned int, sa_family_t); 89 90 void 91 rtmap_init(void) 92 { 93 struct domain *dp; 94 int i; 95 96 /* Start with a single table for every domain that requires it. */ 97 for (i = 0; (dp = domains[i]) != NULL; i++) { 98 if (dp->dom_rtoffset == 0) 99 continue; 100 101 rtmap_grow(1, dp->dom_family); 102 } 103 104 /* Initialize the rtableid->rdomain mapping table. */ 105 rtmap_grow(1, 0); 106 107 rtmap_limit = 1; 108 } 109 110 /* 111 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''. 112 */ 113 void 114 rtmap_grow(unsigned int nlimit, sa_family_t af) 115 { 116 struct rtmap *map, *nmap; 117 int i; 118 119 KERNEL_ASSERT_LOCKED(); 120 121 KASSERT(nlimit > rtmap_limit); 122 123 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK); 124 nmap->limit = nlimit; 125 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE, 126 M_WAITOK|M_ZERO); 127 128 map = srp_get_locked(&afmap[af2idx[af]]); 129 if (map != NULL) { 130 KASSERT(map->limit == rtmap_limit); 131 132 for (i = 0; i < map->limit; i++) 133 nmap->tbl[i] = map->tbl[i]; 134 } 135 136 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap); 137 } 138 139 void 140 rtmap_dtor(void *null, void *xmap) 141 { 142 struct rtmap *map = xmap; 143 144 /* 145 * doesnt need to be serialized since this is the last reference 146 * to this map. there's nothing to race against. 147 */ 148 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl)); 149 free(map, M_RTABLE, sizeof(*map)); 150 } 151 152 void 153 rtable_init(void) 154 { 155 struct domain *dp; 156 unsigned int keylen = 0; 157 int i; 158 159 KASSERT(sizeof(struct rtmap) == sizeof(struct dommp)); 160 161 /* We use index 0 for the rtable/rdomain map. */ 162 af2idx_max = 1; 163 memset(af2idx, 0, sizeof(af2idx)); 164 165 /* 166 * Compute the maximum supported key length in case the routing 167 * table backend needs it. 168 */ 169 for (i = 0; (dp = domains[i]) != NULL; i++) { 170 if (dp->dom_rtoffset == 0) 171 continue; 172 173 af2idx[dp->dom_family] = af2idx_max++; 174 if (dp->dom_rtkeylen > keylen) 175 keylen = dp->dom_rtkeylen; 176 177 } 178 rtable_init_backend(keylen); 179 180 /* 181 * Allocate AF-to-id table now that we now how many AFs this 182 * kernel supports. 183 */ 184 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE, 185 M_WAITOK|M_ZERO); 186 187 rtmap_init(); 188 189 if (rtable_add(0) != 0) 190 panic("unable to create default routing table"); 191 } 192 193 int 194 rtable_add(unsigned int id) 195 { 196 struct domain *dp; 197 void *tbl; 198 struct rtmap *map; 199 struct dommp *dmm; 200 sa_family_t af; 201 unsigned int off, alen; 202 int i; 203 204 KERNEL_ASSERT_LOCKED(); 205 206 if (id > RT_TABLEID_MAX) 207 return (EINVAL); 208 209 if (rtable_exists(id)) 210 return (EEXIST); 211 212 for (i = 0; (dp = domains[i]) != NULL; i++) { 213 if (dp->dom_rtoffset == 0) 214 continue; 215 216 af = dp->dom_family; 217 off = dp->dom_rtoffset; 218 alen = dp->dom_maxplen; 219 220 if (id >= rtmap_limit) 221 rtmap_grow(id + 1, af); 222 223 tbl = rtable_alloc(id, alen, off); 224 if (tbl == NULL) 225 return (ENOMEM); 226 227 map = srp_get_locked(&afmap[af2idx[af]]); 228 map->tbl[id] = tbl; 229 } 230 231 /* Reflect possible growth. */ 232 if (id >= rtmap_limit) { 233 rtmap_grow(id + 1, 0); 234 rtmap_limit = id + 1; 235 } 236 237 /* Use main rtable/rdomain by default. */ 238 dmm = srp_get_locked(&afmap[0]); 239 dmm->value[id] = 0; 240 241 return (0); 242 } 243 244 void * 245 rtable_get(unsigned int rtableid, sa_family_t af) 246 { 247 struct rtmap *map; 248 void *tbl = NULL; 249 struct srp_ref sr; 250 251 if (af >= nitems(af2idx) || af2idx[af] == 0) 252 return (NULL); 253 254 map = srp_enter(&sr, &afmap[af2idx[af]]); 255 if (rtableid < map->limit) 256 tbl = map->tbl[rtableid]; 257 srp_leave(&sr); 258 259 return (tbl); 260 } 261 262 int 263 rtable_exists(unsigned int rtableid) 264 { 265 struct domain *dp; 266 void *tbl; 267 int i; 268 269 for (i = 0; (dp = domains[i]) != NULL; i++) { 270 if (dp->dom_rtoffset == 0) 271 continue; 272 273 tbl = rtable_get(rtableid, dp->dom_family); 274 if (tbl != NULL) 275 return (1); 276 } 277 278 return (0); 279 } 280 281 unsigned int 282 rtable_l2(unsigned int rtableid) 283 { 284 struct dommp *dmm; 285 unsigned int rdomain = 0; 286 struct srp_ref sr; 287 288 dmm = srp_enter(&sr, &afmap[0]); 289 if (rtableid < dmm->limit) 290 rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK); 291 srp_leave(&sr); 292 293 return (rdomain); 294 } 295 296 unsigned int 297 rtable_loindex(unsigned int rtableid) 298 { 299 struct dommp *dmm; 300 unsigned int loifidx = 0; 301 struct srp_ref sr; 302 303 dmm = srp_enter(&sr, &afmap[0]); 304 if (rtableid < dmm->limit) 305 loifidx = (dmm->value[rtableid] >> RT_TABLEID_BITS); 306 srp_leave(&sr); 307 308 return (loifidx); 309 } 310 311 void 312 rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx) 313 { 314 struct dommp *dmm; 315 unsigned int value; 316 317 KERNEL_ASSERT_LOCKED(); 318 319 if (!rtable_exists(rtableid) || !rtable_exists(rdomain)) 320 return; 321 322 value = (rdomain & RT_TABLEID_MASK) | (loifidx << RT_TABLEID_BITS); 323 324 dmm = srp_get_locked(&afmap[0]); 325 dmm->value[rtableid] = value; 326 } 327 328 #ifndef ART 329 void 330 rtable_init_backend(unsigned int keylen) 331 { 332 rn_init(keylen); /* initialize all zeroes, all ones, mask table */ 333 } 334 335 void * 336 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 337 { 338 struct radix_node_head *rnh = NULL; 339 340 if (rn_inithead((void **)&rnh, off)) { 341 rnh->rnh_rtableid = rtableid; 342 } 343 344 return (rnh); 345 } 346 347 struct rtentry * 348 rtable_lookup(unsigned int rtableid, struct sockaddr *dst, 349 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) 350 { 351 struct radix_node_head *rnh; 352 struct radix_node *rn; 353 struct rtentry *rt; 354 355 rnh = rtable_get(rtableid, dst->sa_family); 356 if (rnh == NULL) 357 return (NULL); 358 359 rn = rn_lookup(dst, mask, rnh); 360 if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) 361 return (NULL); 362 363 rt = ((struct rtentry *)rn); 364 365 rtref(rt); 366 return (rt); 367 } 368 369 struct rtentry * 370 rtable_match(unsigned int rtableid, struct sockaddr *dst, uint32_t *src) 371 { 372 struct radix_node_head *rnh; 373 struct radix_node *rn; 374 struct rtentry *rt = NULL; 375 376 rnh = rtable_get(rtableid, dst->sa_family); 377 if (rnh == NULL) 378 return (NULL); 379 380 KERNEL_LOCK(); 381 rn = rn_match(dst, rnh); 382 if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) 383 goto out; 384 385 rt = ((struct rtentry *)rn); 386 rtref(rt); 387 out: 388 KERNEL_UNLOCK(); 389 return (rt); 390 } 391 392 int 393 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 394 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio, 395 struct rtentry *rt) 396 { 397 struct radix_node_head *rnh; 398 struct radix_node *rn = (struct radix_node *)rt; 399 400 rnh = rtable_get(rtableid, dst->sa_family); 401 if (rnh == NULL) 402 return (EAFNOSUPPORT); 403 404 rn = rn_addroute(dst, mask, rnh, rn, prio); 405 if (rn == NULL) 406 return (ESRCH); 407 408 rt = ((struct rtentry *)rn); 409 rtref(rt); 410 411 return (0); 412 } 413 414 int 415 rtable_delete(unsigned int rtableid, struct sockaddr *dst, 416 struct sockaddr *mask, struct rtentry *rt) 417 { 418 struct radix_node_head *rnh; 419 struct radix_node *rn = (struct radix_node *)rt; 420 421 rnh = rtable_get(rtableid, dst->sa_family); 422 if (rnh == NULL) 423 return (EAFNOSUPPORT); 424 425 rn = rn_delete(dst, mask, rnh, rn); 426 if (rn == NULL) 427 return (ESRCH); 428 429 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 430 panic("active node flags=%x", rn->rn_flags); 431 432 rt = ((struct rtentry *)rn); 433 rtfree(rt); 434 435 return (0); 436 } 437 438 int 439 rtable_walk(unsigned int rtableid, sa_family_t af, 440 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 441 { 442 struct radix_node_head *rnh; 443 int (*f)(struct radix_node *, void *, unsigned int) = (void *)func; 444 int error; 445 446 rnh = rtable_get(rtableid, af); 447 if (rnh == NULL) 448 return (EAFNOSUPPORT); 449 450 while ((error = rn_walktree(rnh, f, arg)) == EAGAIN) 451 continue; 452 453 return (error); 454 } 455 456 struct rtentry * 457 rtable_iterate(struct rtentry *rt0) 458 { 459 rtfree(rt0); 460 return (NULL); 461 } 462 463 #ifndef SMALL_KERNEL 464 int 465 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 466 { 467 return (0); 468 } 469 470 int 471 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 472 struct sockaddr *mask, uint8_t prio, struct rtentry *rt) 473 { 474 return (0); 475 } 476 #endif /* SMALL_KERNEL */ 477 478 #else /* ART */ 479 480 static inline uint8_t *satoaddr(struct art_root *, struct sockaddr *); 481 482 int an_match(struct art_node *, struct sockaddr *, int); 483 void rtentry_ref(void *, void *); 484 void rtentry_unref(void *, void *); 485 486 #ifndef SMALL_KERNEL 487 void rtable_mpath_insert(struct art_node *, struct rtentry *); 488 #endif 489 490 struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); 491 492 void 493 rtable_init_backend(unsigned int keylen) 494 { 495 art_init(); 496 } 497 498 void * 499 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 500 { 501 return (art_alloc(rtableid, alen, off)); 502 } 503 504 struct rtentry * 505 rtable_lookup(unsigned int rtableid, struct sockaddr *dst, 506 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) 507 { 508 struct art_root *ar; 509 struct art_node *an; 510 struct rtentry *rt = NULL; 511 struct srp_ref sr, nsr; 512 uint8_t *addr; 513 int plen; 514 515 ar = rtable_get(rtableid, dst->sa_family); 516 if (ar == NULL) 517 return (NULL); 518 519 addr = satoaddr(ar, dst); 520 521 /* No need for a perfect match. */ 522 if (mask == NULL) { 523 an = art_match(ar, addr, &nsr); 524 if (an == NULL) 525 goto out; 526 } else { 527 plen = rtable_satoplen(dst->sa_family, mask); 528 if (plen == -1) 529 return (NULL); 530 531 an = art_lookup(ar, addr, plen, &nsr); 532 533 /* Make sure we've got a perfect match. */ 534 if (!an_match(an, dst, plen)) 535 goto out; 536 } 537 538 #ifdef SMALL_KERNEL 539 rt = SRPL_FIRST(&sr, &an->an_rtlist); 540 #else 541 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 542 if (prio != RTP_ANY && 543 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 544 continue; 545 546 if (gateway == NULL) 547 break; 548 549 if (rt->rt_gateway->sa_len == gateway->sa_len && 550 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) 551 break; 552 } 553 #endif /* SMALL_KERNEL */ 554 if (rt != NULL) 555 rtref(rt); 556 557 SRPL_LEAVE(&sr); 558 out: 559 srp_leave(&nsr); 560 561 return (rt); 562 } 563 564 struct rtentry * 565 rtable_match(unsigned int rtableid, struct sockaddr *dst, uint32_t *src) 566 { 567 struct art_root *ar; 568 struct art_node *an; 569 struct rtentry *rt = NULL; 570 struct srp_ref sr, nsr; 571 uint8_t *addr; 572 #ifndef SMALL_KERNEL 573 int hash; 574 #endif /* SMALL_KERNEL */ 575 576 ar = rtable_get(rtableid, dst->sa_family); 577 if (ar == NULL) 578 return (NULL); 579 580 addr = satoaddr(ar, dst); 581 582 an = art_match(ar, addr, &nsr); 583 if (an == NULL) 584 goto out; 585 586 rt = SRPL_FIRST(&sr, &an->an_rtlist); 587 rtref(rt); 588 SRPL_LEAVE(&sr); 589 590 #ifndef SMALL_KERNEL 591 /* Gateway selection by Hash-Threshold (RFC 2992) */ 592 if ((hash = rt_hash(rt, dst, src)) != -1) { 593 struct rtentry *mrt; 594 int threshold, npaths = 0; 595 596 KASSERT(hash <= 0xffff); 597 598 SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { 599 /* Only count nexthops with the same priority. */ 600 if (mrt->rt_priority == rt->rt_priority) 601 npaths++; 602 } 603 SRPL_LEAVE(&sr); 604 605 threshold = (0xffff / npaths) + 1; 606 607 /* 608 * we have no protection against concurrent modification of the 609 * route list attached to the node, so we won't necessarily 610 * have the same number of routes. for most modifications, 611 * we'll pick a route that we wouldn't have if we only saw the 612 * list before or after the change. if we were going to use 613 * the last available route, but it got removed, we'll hit 614 * the end of the list and then pick the first route. 615 */ 616 617 mrt = SRPL_FIRST(&sr, &an->an_rtlist); 618 while (hash > threshold && mrt != NULL) { 619 if (mrt->rt_priority == rt->rt_priority) 620 hash -= threshold; 621 mrt = SRPL_FOLLOW(&sr, mrt, rt_next); 622 } 623 624 if (mrt != NULL) { 625 rtref(mrt); 626 rtfree(rt); 627 rt = mrt; 628 } 629 SRPL_LEAVE(&sr); 630 } 631 #endif /* SMALL_KERNEL */ 632 out: 633 srp_leave(&nsr); 634 return (rt); 635 } 636 637 int 638 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 639 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio, 640 struct rtentry *rt) 641 { 642 #ifndef SMALL_KERNEL 643 struct rtentry *mrt; 644 struct srp_ref sr; 645 #endif /* SMALL_KERNEL */ 646 struct art_root *ar; 647 struct art_node *an, *prev; 648 uint8_t *addr; 649 int plen; 650 unsigned int rt_flags; 651 int error = 0; 652 653 ar = rtable_get(rtableid, dst->sa_family); 654 if (ar == NULL) 655 return (EAFNOSUPPORT); 656 657 addr = satoaddr(ar, dst); 658 plen = rtable_satoplen(dst->sa_family, mask); 659 if (plen == -1) 660 return (EINVAL); 661 662 rtref(rt); /* guarantee rtfree won't do anything during insert */ 663 rw_enter_write(&ar->ar_lock); 664 665 #ifndef SMALL_KERNEL 666 /* Do not permit exactly the same dst/mask/gw pair. */ 667 an = art_lookup(ar, addr, plen, &sr); 668 srp_leave(&sr); /* an can't go away while we have the lock */ 669 if (an_match(an, dst, plen)) { 670 struct rtentry *mrt; 671 int mpathok = ISSET(rt->rt_flags, RTF_MPATH); 672 673 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 674 if (prio != RTP_ANY && 675 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 676 continue; 677 678 if (!mpathok || 679 (mrt->rt_gateway->sa_len == gateway->sa_len && 680 !memcmp(mrt->rt_gateway, gateway, gateway->sa_len))){ 681 error = EEXIST; 682 goto leave; 683 } 684 } 685 } 686 #endif /* SMALL_KERNEL */ 687 688 an = art_get(dst, plen); 689 if (an == NULL) { 690 error = ENOBUFS; 691 goto leave; 692 } 693 694 /* prepare for immediate operation if insert succeeds */ 695 rt_flags = rt->rt_flags; 696 rt->rt_flags &= ~RTF_MPATH; 697 rt->rt_dest = dst; 698 rt->rt_plen = plen; 699 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 700 701 prev = art_insert(ar, an, addr, plen); 702 if (prev != an) { 703 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 704 rt_next); 705 rt->rt_flags = rt_flags; 706 art_put(an); 707 708 if (prev == NULL) { 709 error = ESRCH; 710 goto leave; 711 } 712 713 #ifndef SMALL_KERNEL 714 an = prev; 715 716 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 717 KASSERT(mrt != NULL); 718 KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); 719 720 /* 721 * An ART node with the same destination/netmask already 722 * exists, MPATH conflict must have been already checked. 723 */ 724 if (rt->rt_flags & RTF_MPATH) { 725 /* 726 * Only keep the RTF_MPATH flag if two routes have 727 * the same gateway. 728 */ 729 rt->rt_flags &= ~RTF_MPATH; 730 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 731 if (mrt->rt_priority == prio) { 732 mrt->rt_flags |= RTF_MPATH; 733 rt->rt_flags |= RTF_MPATH; 734 } 735 } 736 } 737 738 /* Put newly inserted entry at the right place. */ 739 rtable_mpath_insert(an, rt); 740 #else 741 error = EEXIST; 742 #endif /* SMALL_KERNEL */ 743 } 744 leave: 745 rw_exit_write(&ar->ar_lock); 746 rtfree(rt); 747 return (error); 748 } 749 750 int 751 rtable_delete(unsigned int rtableid, struct sockaddr *dst, 752 struct sockaddr *mask, struct rtentry *rt) 753 { 754 struct art_root *ar; 755 struct art_node *an; 756 struct srp_ref sr; 757 uint8_t *addr; 758 int plen; 759 #ifndef SMALL_KERNEL 760 struct rtentry *mrt; 761 int npaths = 0; 762 #endif /* SMALL_KERNEL */ 763 int error = 0; 764 765 ar = rtable_get(rtableid, dst->sa_family); 766 if (ar == NULL) 767 return (EAFNOSUPPORT); 768 769 addr = satoaddr(ar, dst); 770 plen = rtable_satoplen(dst->sa_family, mask); 771 772 rtref(rt); /* guarantee rtfree won't do anything under ar_lock */ 773 rw_enter_write(&ar->ar_lock); 774 an = art_lookup(ar, addr, plen, &sr); 775 srp_leave(&sr); /* an can't go away while we have the lock */ 776 777 /* Make sure we've got a perfect match. */ 778 if (!an_match(an, dst, plen)) { 779 error = ESRCH; 780 goto leave; 781 } 782 783 #ifndef SMALL_KERNEL 784 /* 785 * If other multipath route entries are still attached to 786 * this ART node we only have to unlink it. 787 */ 788 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) 789 npaths++; 790 791 if (npaths > 1) { 792 KASSERT(rt->rt_refcnt >= 1); 793 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 794 rt_next); 795 796 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 797 if (npaths == 2) 798 mrt->rt_flags &= ~RTF_MPATH; 799 800 goto leave; 801 } 802 #endif /* SMALL_KERNEL */ 803 804 if (art_delete(ar, an, addr, plen) == NULL) 805 panic("art_delete failed to find node %p", an); 806 807 KASSERT(rt->rt_refcnt >= 1); 808 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); 809 art_put(an); 810 811 leave: 812 rw_exit_write(&ar->ar_lock); 813 rtfree(rt); 814 815 return (error); 816 } 817 818 struct rtable_walk_cookie { 819 int (*rwc_func)(struct rtentry *, void *, unsigned int); 820 void *rwc_arg; 821 unsigned int rwc_rid; 822 }; 823 824 /* 825 * Helper for rtable_walk to keep the ART code free from any "struct rtentry". 826 */ 827 int 828 rtable_walk_helper(struct art_node *an, void *xrwc) 829 { 830 struct srp_ref sr; 831 struct rtable_walk_cookie *rwc = xrwc; 832 struct rtentry *rt; 833 int error = 0; 834 835 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 836 if ((error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid))) 837 break; 838 } 839 SRPL_LEAVE(&sr); 840 841 return (error); 842 } 843 844 int 845 rtable_walk(unsigned int rtableid, sa_family_t af, 846 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 847 { 848 struct art_root *ar; 849 struct rtable_walk_cookie rwc; 850 int error; 851 852 ar = rtable_get(rtableid, af); 853 if (ar == NULL) 854 return (EAFNOSUPPORT); 855 856 rwc.rwc_func = func; 857 rwc.rwc_arg = arg; 858 rwc.rwc_rid = rtableid; 859 860 while ((error = art_walk(ar, rtable_walk_helper, &rwc)) == EAGAIN) 861 continue; 862 863 return (error); 864 } 865 866 struct rtentry * 867 rtable_iterate(struct rtentry *rt0) 868 { 869 struct rtentry *rt = NULL; 870 #ifndef SMALL_KERNEL 871 struct srp_ref sr; 872 873 rt = SRPL_NEXT(&sr, rt0, rt_next); 874 if (rt != NULL) 875 rtref(rt); 876 SRPL_LEAVE(&sr); 877 #endif /* SMALL_KERNEL */ 878 rtfree(rt0); 879 return (rt); 880 } 881 882 #ifndef SMALL_KERNEL 883 int 884 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 885 { 886 return (1); 887 } 888 889 int 890 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 891 struct sockaddr *mask, uint8_t prio, struct rtentry *rt) 892 { 893 struct art_root *ar; 894 struct art_node *an; 895 struct srp_ref sr; 896 uint8_t *addr; 897 int plen; 898 int error = 0; 899 900 ar = rtable_get(rtableid, dst->sa_family); 901 if (ar == NULL) 902 return (EAFNOSUPPORT); 903 904 addr = satoaddr(ar, dst); 905 plen = rtable_satoplen(dst->sa_family, mask); 906 907 rw_enter_write(&ar->ar_lock); 908 an = art_lookup(ar, addr, plen, &sr); 909 srp_leave(&sr); /* an can't go away while we have the lock */ 910 911 /* Make sure we've got a perfect match. */ 912 if (!an_match(an, dst, plen)) 913 error = ESRCH; 914 else { 915 rtref(rt); /* keep rt alive in between remove and insert */ 916 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, 917 rt, rtentry, rt_next); 918 rt->rt_priority = prio; 919 rtable_mpath_insert(an, rt); 920 rtfree(rt); 921 } 922 rw_exit_write(&ar->ar_lock); 923 924 return (error); 925 } 926 927 void 928 rtable_mpath_insert(struct art_node *an, struct rtentry *rt) 929 { 930 struct rtentry *mrt, *prt = NULL; 931 uint8_t prio = rt->rt_priority; 932 933 if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) != NULL) { 934 /* 935 * Select the order of the MPATH routes. 936 */ 937 while (SRPL_NEXT_LOCKED(mrt, rt_next) != NULL) { 938 if (mrt->rt_priority > prio) 939 break; 940 prt = mrt; 941 mrt = SRPL_NEXT_LOCKED(mrt, rt_next); 942 } 943 944 if (mrt->rt_priority > prio) { 945 /* 946 * ``rt'' has a higher (smaller) priority than 947 * ``mrt'' so put it before in the list. 948 */ 949 if (prt != NULL) { 950 SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, 951 rt_next); 952 } else { 953 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, 954 rt, rt_next); 955 } 956 } else { 957 SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); 958 } 959 } else { 960 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 961 } 962 } 963 #endif /* SMALL_KERNEL */ 964 965 /* 966 * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise. 967 */ 968 int 969 an_match(struct art_node *an, struct sockaddr *dst, int plen) 970 { 971 struct rtentry *rt; 972 struct srp_ref sr; 973 int match; 974 975 if (an == NULL || an->an_plen != plen) 976 return (0); 977 978 rt = SRPL_FIRST(&sr, &an->an_rtlist); 979 match = (memcmp(rt->rt_dest, dst, dst->sa_len) == 0); 980 SRPL_LEAVE(&sr); 981 982 return (match); 983 } 984 985 void 986 rtentry_ref(void *null, void *xrt) 987 { 988 struct rtentry *rt = xrt; 989 990 rtref(rt); 991 } 992 993 void 994 rtentry_unref(void *null, void *xrt) 995 { 996 struct rtentry *rt = xrt; 997 998 rtfree(rt); 999 } 1000 1001 /* 1002 * Return a pointer to the address (key). This is an heritage from the 1003 * BSD radix tree needed to skip the non-address fields from the flavor 1004 * of "struct sockaddr" used by this routing table. 1005 */ 1006 static inline uint8_t * 1007 satoaddr(struct art_root *at, struct sockaddr *sa) 1008 { 1009 return (((uint8_t *)sa) + at->ar_off); 1010 } 1011 #endif /* ART */ 1012 1013 /* 1014 * Return the prefix length of a mask. 1015 */ 1016 int 1017 rtable_satoplen(sa_family_t af, struct sockaddr *mask) 1018 { 1019 struct domain *dp; 1020 uint8_t *ap, *ep; 1021 int mlen, plen = 0; 1022 int i; 1023 1024 for (i = 0; (dp = domains[i]) != NULL; i++) { 1025 if (dp->dom_rtoffset == 0) 1026 continue; 1027 1028 if (af == dp->dom_family) 1029 break; 1030 } 1031 if (dp == NULL) 1032 return (-1); 1033 1034 /* Host route */ 1035 if (mask == NULL) 1036 return (dp->dom_maxplen); 1037 1038 mlen = mask->sa_len; 1039 1040 /* Default route */ 1041 if (mlen == 0) 1042 return (0); 1043 1044 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset; 1045 ep = (uint8_t *)((uint8_t *)mask) + mlen; 1046 if (ap > ep) 1047 return (-1); 1048 1049 if (ap == ep) 1050 return (0); 1051 1052 /* "Beauty" adapted from sbin/route/show.c ... */ 1053 while (ap < ep) { 1054 switch (*ap) { 1055 case 0xff: 1056 plen += 8; 1057 ap++; 1058 break; 1059 case 0xfe: 1060 plen += 7; 1061 ap++; 1062 goto out; 1063 case 0xfc: 1064 plen += 6; 1065 ap++; 1066 goto out; 1067 case 0xf8: 1068 plen += 5; 1069 ap++; 1070 goto out; 1071 case 0xf0: 1072 plen += 4; 1073 ap++; 1074 goto out; 1075 case 0xe0: 1076 plen += 3; 1077 ap++; 1078 goto out; 1079 case 0xc0: 1080 plen += 2; 1081 ap++; 1082 goto out; 1083 case 0x80: 1084 plen += 1; 1085 ap++; 1086 goto out; 1087 case 0x00: 1088 goto out; 1089 default: 1090 /* Non contiguous mask. */ 1091 return (-1); 1092 } 1093 1094 } 1095 1096 out: 1097 #ifdef DIAGNOSTIC 1098 for (; ap < ep; ap++) { 1099 if (*ap != 0x00) 1100 return (-1); 1101 } 1102 #endif /* DIAGNOSTIC */ 1103 1104 return (plen); 1105 } 1106