1 /* $OpenBSD: rtable.c,v 1.52 2016/09/07 09:36:49 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2014-2015 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #ifndef _KERNEL 20 #include "kern_compat.h" 21 #else 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/socket.h> 25 #include <sys/malloc.h> 26 #include <sys/queue.h> 27 #include <sys/domain.h> 28 #include <sys/srp.h> 29 #endif 30 31 #include <net/rtable.h> 32 #include <net/route.h> 33 34 /* 35 * Structures used by rtable_get() to retrieve the corresponding 36 * routing table for a given pair of ``af'' and ``rtableid''. 37 * 38 * Note that once allocated routing table heads are never freed. 39 * This way we do not need to reference count them. 40 * 41 * afmap rtmap/dommp 42 * ----------- --------- ----- 43 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index) 44 * ----------- --------- ----- to rdomain (=value). 45 * | AF_INET |. 46 * ----------- `. .---------. .---------. 47 * ... `----> | rtable0 | ... | rtableN | Array of pointers for 48 * ----------- '---------' '---------' IPv4 routing tables 49 * | AF_MPLS | indexed by ``rtableid''. 50 * ----------- 51 */ 52 struct srp *afmap; 53 uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */ 54 uint8_t af2idx_max; 55 56 /* Array of routing table pointers. */ 57 struct rtmap { 58 unsigned int limit; 59 void **tbl; 60 }; 61 62 /* Array of rtableid -> rdomain mapping. */ 63 struct dommp { 64 unsigned int limit; 65 unsigned int *dom; 66 }; 67 68 unsigned int rtmap_limit = 0; 69 70 void rtmap_init(void); 71 void rtmap_grow(unsigned int, sa_family_t); 72 void rtmap_dtor(void *, void *); 73 74 struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); 75 76 void rtable_init_backend(unsigned int); 77 void *rtable_alloc(unsigned int, unsigned int, unsigned int); 78 void *rtable_get(unsigned int, sa_family_t); 79 80 void 81 rtmap_init(void) 82 { 83 struct domain *dp; 84 int i; 85 86 /* Start with a single table for every domain that requires it. */ 87 for (i = 0; (dp = domains[i]) != NULL; i++) { 88 if (dp->dom_rtoffset == 0) 89 continue; 90 91 rtmap_grow(1, dp->dom_family); 92 } 93 94 /* Initialize the rtableid->rdomain mapping table. */ 95 rtmap_grow(1, 0); 96 97 rtmap_limit = 1; 98 } 99 100 /* 101 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''. 102 */ 103 void 104 rtmap_grow(unsigned int nlimit, sa_family_t af) 105 { 106 struct rtmap *map, *nmap; 107 int i; 108 109 KERNEL_ASSERT_LOCKED(); 110 111 KASSERT(nlimit > rtmap_limit); 112 113 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK); 114 nmap->limit = nlimit; 115 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE, 116 M_WAITOK|M_ZERO); 117 118 map = srp_get_locked(&afmap[af2idx[af]]); 119 if (map != NULL) { 120 KASSERT(map->limit == rtmap_limit); 121 122 for (i = 0; i < map->limit; i++) 123 nmap->tbl[i] = map->tbl[i]; 124 } 125 126 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap); 127 } 128 129 void 130 rtmap_dtor(void *null, void *xmap) 131 { 132 struct rtmap *map = xmap; 133 134 /* 135 * doesnt need to be serialized since this is the last reference 136 * to this map. there's nothing to race against. 137 */ 138 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl)); 139 free(map, M_RTABLE, sizeof(*map)); 140 } 141 142 void 143 rtable_init(void) 144 { 145 struct domain *dp; 146 unsigned int keylen = 0; 147 int i; 148 149 /* We use index 0 for the rtable/rdomain map. */ 150 af2idx_max = 1; 151 memset(af2idx, 0, sizeof(af2idx)); 152 153 /* 154 * Compute the maximum supported key length in case the routing 155 * table backend needs it. 156 */ 157 for (i = 0; (dp = domains[i]) != NULL; i++) { 158 if (dp->dom_rtoffset == 0) 159 continue; 160 161 af2idx[dp->dom_family] = af2idx_max++; 162 if (dp->dom_rtkeylen > keylen) 163 keylen = dp->dom_rtkeylen; 164 165 } 166 rtable_init_backend(keylen); 167 168 /* 169 * Allocate AF-to-id table now that we now how many AFs this 170 * kernel supports. 171 */ 172 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE, 173 M_WAITOK|M_ZERO); 174 175 rtmap_init(); 176 } 177 178 int 179 rtable_add(unsigned int id) 180 { 181 struct domain *dp; 182 void *tbl; 183 struct rtmap *map; 184 struct dommp *dmm; 185 sa_family_t af; 186 unsigned int off, alen; 187 int i; 188 189 KERNEL_ASSERT_LOCKED(); 190 191 if (id > RT_TABLEID_MAX) 192 return (EINVAL); 193 194 if (rtable_exists(id)) 195 return (EEXIST); 196 197 for (i = 0; (dp = domains[i]) != NULL; i++) { 198 if (dp->dom_rtoffset == 0) 199 continue; 200 201 af = dp->dom_family; 202 off = dp->dom_rtoffset; 203 alen = dp->dom_maxplen; 204 205 if (id >= rtmap_limit) 206 rtmap_grow(id + 1, af); 207 208 tbl = rtable_alloc(id, alen, off); 209 if (tbl == NULL) 210 return (ENOMEM); 211 212 map = srp_get_locked(&afmap[af2idx[af]]); 213 map->tbl[id] = tbl; 214 } 215 216 /* Reflect possible growth. */ 217 if (id >= rtmap_limit) { 218 rtmap_grow(id + 1, 0); 219 rtmap_limit = id + 1; 220 } 221 222 /* Use main rtable/rdomain by default. */ 223 dmm = srp_get_locked(&afmap[0]); 224 dmm->dom[id] = 0; 225 226 return (0); 227 } 228 229 void * 230 rtable_get(unsigned int rtableid, sa_family_t af) 231 { 232 struct rtmap *map; 233 void *tbl = NULL; 234 struct srp_ref sr; 235 236 if (af >= nitems(af2idx) || af2idx[af] == 0) 237 return (NULL); 238 239 map = srp_enter(&sr, &afmap[af2idx[af]]); 240 if (rtableid < map->limit) 241 tbl = map->tbl[rtableid]; 242 srp_leave(&sr); 243 244 return (tbl); 245 } 246 247 int 248 rtable_exists(unsigned int rtableid) 249 { 250 struct domain *dp; 251 void *tbl; 252 int i; 253 254 for (i = 0; (dp = domains[i]) != NULL; i++) { 255 if (dp->dom_rtoffset == 0) 256 continue; 257 258 tbl = rtable_get(rtableid, dp->dom_family); 259 if (tbl != NULL) 260 return (1); 261 } 262 263 return (0); 264 } 265 266 unsigned int 267 rtable_l2(unsigned int rtableid) 268 { 269 struct dommp *dmm; 270 unsigned int rdomain = 0; 271 struct srp_ref sr; 272 273 dmm = srp_enter(&sr, &afmap[0]); 274 if (rtableid < dmm->limit) 275 rdomain = dmm->dom[rtableid]; 276 srp_leave(&sr); 277 278 return (rdomain); 279 } 280 281 void 282 rtable_l2set(unsigned int rtableid, unsigned int rdomain) 283 { 284 struct dommp *dmm; 285 286 KERNEL_ASSERT_LOCKED(); 287 288 if (!rtable_exists(rtableid) || !rtable_exists(rdomain)) 289 return; 290 291 dmm = srp_get_locked(&afmap[0]); 292 dmm->dom[rtableid] = rdomain; 293 } 294 295 #ifndef ART 296 void 297 rtable_init_backend(unsigned int keylen) 298 { 299 rn_init(keylen); /* initialize all zeroes, all ones, mask table */ 300 } 301 302 void * 303 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 304 { 305 struct radix_node_head *rnh = NULL; 306 307 if (rn_inithead((void **)&rnh, off)) { 308 #ifndef SMALL_KERNEL 309 rnh->rnh_multipath = 1; 310 #endif /* SMALL_KERNEL */ 311 rnh->rnh_rtableid = rtableid; 312 } 313 314 return (rnh); 315 } 316 317 struct rtentry * 318 rtable_lookup(unsigned int rtableid, struct sockaddr *dst, 319 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) 320 { 321 struct radix_node_head *rnh; 322 struct radix_node *rn; 323 struct rtentry *rt; 324 325 rnh = rtable_get(rtableid, dst->sa_family); 326 if (rnh == NULL) 327 return (NULL); 328 329 rn = rn_lookup(dst, mask, rnh); 330 if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) 331 return (NULL); 332 333 rt = ((struct rtentry *)rn); 334 335 #ifndef SMALL_KERNEL 336 if (rnh->rnh_multipath) { 337 rt = rt_mpath_matchgate(rt, gateway, prio); 338 if (rt == NULL) 339 return (NULL); 340 } 341 #endif /* !SMALL_KERNEL */ 342 343 rtref(rt); 344 return (rt); 345 } 346 347 struct rtentry * 348 rtable_match(unsigned int rtableid, struct sockaddr *dst, uint32_t *src) 349 { 350 struct radix_node_head *rnh; 351 struct radix_node *rn; 352 struct rtentry *rt = NULL; 353 #ifndef SMALL_KERNEL 354 int hash; 355 #endif /* SMALL_KERNEL */ 356 357 rnh = rtable_get(rtableid, dst->sa_family); 358 if (rnh == NULL) 359 return (NULL); 360 361 KERNEL_LOCK(); 362 rn = rn_match(dst, rnh); 363 if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) 364 goto out; 365 366 rt = ((struct rtentry *)rn); 367 rtref(rt); 368 369 #ifndef SMALL_KERNEL 370 /* Gateway selection by Hash-Threshold (RFC 2992) */ 371 if ((hash = rt_hash(rt, dst, src)) != -1) { 372 struct rtentry *mrt = rt; 373 int threshold, npaths = 1; 374 375 KASSERT(hash <= 0xffff); 376 377 rtref(rt); 378 379 while ((mrt = rtable_iterate(mrt)) != NULL) 380 npaths++; 381 382 threshold = (0xffff / npaths) + 1; 383 384 mrt = rt; 385 while (hash > threshold && mrt != NULL) { 386 /* stay within the multipath routes */ 387 mrt = rtable_iterate(mrt); 388 hash -= threshold; 389 } 390 391 /* if gw selection fails, use the first match (default) */ 392 if (mrt != NULL) { 393 rtfree(rt); 394 rt = mrt; 395 } 396 } 397 #endif /* SMALL_KERNEL */ 398 out: 399 KERNEL_UNLOCK(); 400 return (rt); 401 } 402 403 int 404 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 405 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio, 406 struct rtentry *rt) 407 { 408 struct radix_node_head *rnh; 409 struct radix_node *rn = (struct radix_node *)rt; 410 411 rnh = rtable_get(rtableid, dst->sa_family); 412 if (rnh == NULL) 413 return (EAFNOSUPPORT); 414 415 #ifndef SMALL_KERNEL 416 if (rnh->rnh_multipath) { 417 /* Do not permit exactly the same dst/mask/gw pair. */ 418 if (rt_mpath_conflict(rnh, dst, mask, gateway, prio, 419 ISSET(rt->rt_flags, RTF_MPATH))) { 420 return (EEXIST); 421 } 422 } 423 #endif /* SMALL_KERNEL */ 424 425 rn = rn_addroute(dst, mask, rnh, rn, prio); 426 if (rn == NULL) 427 return (ESRCH); 428 429 rt = ((struct rtentry *)rn); 430 rtref(rt); 431 432 return (0); 433 } 434 435 int 436 rtable_delete(unsigned int rtableid, struct sockaddr *dst, 437 struct sockaddr *mask, struct rtentry *rt) 438 { 439 struct radix_node_head *rnh; 440 struct radix_node *rn = (struct radix_node *)rt; 441 442 rnh = rtable_get(rtableid, dst->sa_family); 443 if (rnh == NULL) 444 return (EAFNOSUPPORT); 445 446 rn = rn_delete(dst, mask, rnh, rn); 447 if (rn == NULL) 448 return (ESRCH); 449 450 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 451 panic("active node flags=%x", rn->rn_flags); 452 453 rt = ((struct rtentry *)rn); 454 rtfree(rt); 455 456 return (0); 457 } 458 459 int 460 rtable_walk(unsigned int rtableid, sa_family_t af, 461 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 462 { 463 struct radix_node_head *rnh; 464 int (*f)(struct radix_node *, void *, unsigned int) = (void *)func; 465 int error; 466 467 rnh = rtable_get(rtableid, af); 468 if (rnh == NULL) 469 return (EAFNOSUPPORT); 470 471 while ((error = rn_walktree(rnh, f, arg)) == EAGAIN) 472 continue; 473 474 return (error); 475 } 476 477 struct rtentry * 478 rtable_iterate(struct rtentry *rt0) 479 { 480 #ifndef SMALL_KERNEL 481 struct radix_node *rn = (struct radix_node *)rt0; 482 struct rtentry *rt; 483 484 rt = (struct rtentry *)rn_mpath_next(rn, RMP_MODE_ACTIVE); 485 if (rt != NULL) 486 rtref(rt); 487 rtfree(rt0); 488 489 return (rt); 490 #else 491 rtfree(rt0); 492 return (NULL); 493 #endif /* SMALL_KERNEL */ 494 } 495 496 #ifndef SMALL_KERNEL 497 int 498 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 499 { 500 struct radix_node_head *rnh; 501 int mpath; 502 503 rnh = rtable_get(rtableid, af); 504 if (rnh == NULL) 505 return (0); 506 507 mpath = rnh->rnh_multipath; 508 return (mpath); 509 } 510 511 int 512 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 513 struct sockaddr *mask, uint8_t prio, struct rtentry *rt) 514 { 515 struct radix_node *rn = (struct radix_node *)rt; 516 517 rn_mpath_reprio(rn, prio); 518 519 return (0); 520 } 521 #endif /* SMALL_KERNEL */ 522 523 #else /* ART */ 524 525 static inline uint8_t *satoaddr(struct art_root *, struct sockaddr *); 526 527 void rtentry_ref(void *, void *); 528 void rtentry_unref(void *, void *); 529 530 #ifndef SMALL_KERNEL 531 void rtable_mpath_insert(struct art_node *, struct rtentry *); 532 #endif 533 534 struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); 535 536 void 537 rtable_init_backend(unsigned int keylen) 538 { 539 art_init(); 540 } 541 542 void * 543 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 544 { 545 return (art_alloc(rtableid, alen, off)); 546 } 547 548 struct rtentry * 549 rtable_lookup(unsigned int rtableid, struct sockaddr *dst, 550 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) 551 { 552 struct art_root *ar; 553 struct art_node *an; 554 struct rtentry *rt = NULL; 555 struct srp_ref sr, nsr; 556 uint8_t *addr; 557 int plen; 558 559 ar = rtable_get(rtableid, dst->sa_family); 560 if (ar == NULL) 561 return (NULL); 562 563 addr = satoaddr(ar, dst); 564 565 /* No need for a perfect match. */ 566 if (mask == NULL) { 567 an = art_match(ar, addr, &nsr); 568 if (an == NULL) 569 goto out; 570 } else { 571 plen = rtable_satoplen(dst->sa_family, mask); 572 if (plen == -1) 573 return (NULL); 574 575 an = art_lookup(ar, addr, plen, &nsr); 576 577 /* Make sure we've got a perfect match. */ 578 if (an == NULL || an->an_plen != plen || 579 memcmp(an->an_dst, dst, dst->sa_len)) 580 goto out; 581 } 582 583 #ifdef SMALL_KERNEL 584 rt = SRPL_ENTER(&sr, &an->an_rtlist); 585 #else 586 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 587 if (prio != RTP_ANY && 588 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 589 continue; 590 591 if (gateway == NULL) 592 break; 593 594 if (rt->rt_gateway->sa_len == gateway->sa_len && 595 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) 596 break; 597 } 598 #endif /* SMALL_KERNEL */ 599 if (rt != NULL) 600 rtref(rt); 601 602 SRPL_LEAVE(&sr); 603 out: 604 srp_leave(&nsr); 605 606 return (rt); 607 } 608 609 struct rtentry * 610 rtable_match(unsigned int rtableid, struct sockaddr *dst, uint32_t *src) 611 { 612 struct art_root *ar; 613 struct art_node *an; 614 struct rtentry *rt = NULL; 615 struct srp_ref sr, nsr; 616 uint8_t *addr; 617 #ifndef SMALL_KERNEL 618 int hash; 619 #endif /* SMALL_KERNEL */ 620 621 ar = rtable_get(rtableid, dst->sa_family); 622 if (ar == NULL) 623 return (NULL); 624 625 addr = satoaddr(ar, dst); 626 627 an = art_match(ar, addr, &nsr); 628 if (an == NULL) 629 goto out; 630 631 rt = SRPL_ENTER(&sr, &an->an_rtlist); 632 rtref(rt); 633 SRPL_LEAVE(&sr); 634 635 #ifndef SMALL_KERNEL 636 /* Gateway selection by Hash-Threshold (RFC 2992) */ 637 if ((hash = rt_hash(rt, dst, src)) != -1) { 638 struct rtentry *mrt; 639 int threshold, npaths = 0; 640 641 KASSERT(hash <= 0xffff); 642 643 SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { 644 /* Only count nexthops with the same priority. */ 645 if (mrt->rt_priority == rt->rt_priority) 646 npaths++; 647 } 648 SRPL_LEAVE(&sr); 649 650 threshold = (0xffff / npaths) + 1; 651 652 /* 653 * we have no protection against concurrent modification of the 654 * route list attached to the node, so we won't necessarily 655 * have the same number of routes. for most modifications, 656 * we'll pick a route that we wouldn't have if we only saw the 657 * list before or after the change. if we were going to use 658 * the last available route, but it got removed, we'll hit 659 * the end of the list and then pick the first route. 660 */ 661 662 mrt = SRPL_ENTER(&sr, &an->an_rtlist); 663 while (hash > threshold && mrt != NULL) { 664 if (mrt->rt_priority == rt->rt_priority) 665 hash -= threshold; 666 mrt = SRPL_NEXT(&sr, mrt, rt_next); 667 } 668 669 if (mrt != NULL) { 670 rtref(mrt); 671 rtfree(rt); 672 rt = mrt; 673 } 674 SRPL_LEAVE(&sr); 675 } 676 #endif /* SMALL_KERNEL */ 677 out: 678 srp_leave(&nsr); 679 return (rt); 680 } 681 682 int 683 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 684 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio, 685 struct rtentry *rt) 686 { 687 #ifndef SMALL_KERNEL 688 struct rtentry *mrt; 689 struct srp_ref sr; 690 #endif /* SMALL_KERNEL */ 691 struct art_root *ar; 692 struct art_node *an, *prev; 693 uint8_t *addr; 694 int plen; 695 unsigned int rt_flags; 696 int error = 0; 697 698 ar = rtable_get(rtableid, dst->sa_family); 699 if (ar == NULL) 700 return (EAFNOSUPPORT); 701 702 addr = satoaddr(ar, dst); 703 plen = rtable_satoplen(dst->sa_family, mask); 704 if (plen == -1) 705 return (EINVAL); 706 707 rtref(rt); /* guarantee rtfree won't do anything during insert */ 708 rw_enter_write(&ar->ar_lock); 709 710 #ifndef SMALL_KERNEL 711 /* Do not permit exactly the same dst/mask/gw pair. */ 712 an = art_lookup(ar, addr, plen, &sr); 713 srp_leave(&sr); /* an can't go away while we have the lock */ 714 if (an != NULL && an->an_plen == plen && 715 !memcmp(an->an_dst, dst, dst->sa_len)) { 716 struct rtentry *mrt; 717 int mpathok = ISSET(rt->rt_flags, RTF_MPATH); 718 719 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 720 if (prio != RTP_ANY && 721 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 722 continue; 723 724 if (!mpathok || 725 (mrt->rt_gateway->sa_len == gateway->sa_len && 726 !memcmp(mrt->rt_gateway, gateway, gateway->sa_len))){ 727 error = EEXIST; 728 goto leave; 729 } 730 } 731 } 732 #endif /* SMALL_KERNEL */ 733 734 an = art_get(dst, plen); 735 if (an == NULL) { 736 error = ENOBUFS; 737 goto leave; 738 } 739 740 /* prepare for immediate operation if insert succeeds */ 741 rt_flags = rt->rt_flags; 742 rt->rt_flags &= ~RTF_MPATH; 743 rt->rt_dest = dst; 744 rt->rt_plen = plen; 745 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 746 747 prev = art_insert(ar, an, addr, plen); 748 if (prev != an) { 749 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 750 rt_next); 751 rt->rt_flags = rt_flags; 752 art_put(an); 753 754 if (prev == NULL) { 755 error = ESRCH; 756 goto leave; 757 } 758 759 #ifndef SMALL_KERNEL 760 an = prev; 761 762 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 763 KASSERT(mrt != NULL); 764 KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); 765 766 /* 767 * An ART node with the same destination/netmask already 768 * exists, MPATH conflict must have been already checked. 769 */ 770 if (rt->rt_flags & RTF_MPATH) { 771 /* 772 * Only keep the RTF_MPATH flag if two routes have 773 * the same gateway. 774 */ 775 rt->rt_flags &= ~RTF_MPATH; 776 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 777 if (mrt->rt_priority == prio) { 778 mrt->rt_flags |= RTF_MPATH; 779 rt->rt_flags |= RTF_MPATH; 780 } 781 } 782 } 783 784 /* Put newly inserted entry at the right place. */ 785 rtable_mpath_insert(an, rt); 786 #else 787 error = EEXIST; 788 #endif /* SMALL_KERNEL */ 789 } 790 leave: 791 rw_exit_write(&ar->ar_lock); 792 rtfree(rt); 793 return (error); 794 } 795 796 int 797 rtable_delete(unsigned int rtableid, struct sockaddr *dst, 798 struct sockaddr *mask, struct rtentry *rt) 799 { 800 struct art_root *ar; 801 struct art_node *an; 802 struct srp_ref sr; 803 uint8_t *addr; 804 int plen; 805 #ifndef SMALL_KERNEL 806 struct rtentry *mrt; 807 int npaths = 0; 808 #endif /* SMALL_KERNEL */ 809 int error = 0; 810 811 ar = rtable_get(rtableid, dst->sa_family); 812 if (ar == NULL) 813 return (EAFNOSUPPORT); 814 815 addr = satoaddr(ar, dst); 816 plen = rtable_satoplen(dst->sa_family, mask); 817 818 rtref(rt); /* guarantee rtfree won't do anything under ar_lock */ 819 rw_enter_write(&ar->ar_lock); 820 an = art_lookup(ar, addr, plen, &sr); 821 srp_leave(&sr); /* an can't go away while we have the lock */ 822 823 /* Make sure we've got a perfect match. */ 824 if (an == NULL || an->an_plen != plen || 825 memcmp(an->an_dst, dst, dst->sa_len)) { 826 error = ESRCH; 827 goto leave; 828 } 829 830 #ifndef SMALL_KERNEL 831 /* 832 * If other multipath route entries are still attached to 833 * this ART node we only have to unlink it. 834 */ 835 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) 836 npaths++; 837 838 if (npaths > 1) { 839 KASSERT(rt->rt_refcnt >= 1); 840 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 841 rt_next); 842 843 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 844 an->an_dst = mrt->rt_dest; 845 if (npaths == 2) 846 mrt->rt_flags &= ~RTF_MPATH; 847 848 goto leave; 849 } 850 #endif /* SMALL_KERNEL */ 851 852 if (art_delete(ar, an, addr, plen) == NULL) 853 panic("art_delete failed to find node %p", an); 854 855 KASSERT(rt->rt_refcnt >= 1); 856 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); 857 art_put(an); 858 859 leave: 860 rw_exit_write(&ar->ar_lock); 861 rtfree(rt); 862 863 return (error); 864 } 865 866 struct rtable_walk_cookie { 867 int (*rwc_func)(struct rtentry *, void *, unsigned int); 868 void *rwc_arg; 869 unsigned int rwc_rid; 870 }; 871 872 /* 873 * Helper for rtable_walk to keep the ART code free from any "struct rtentry". 874 */ 875 int 876 rtable_walk_helper(struct art_node *an, void *xrwc) 877 { 878 struct srp_ref sr; 879 struct rtable_walk_cookie *rwc = xrwc; 880 struct rtentry *rt; 881 int error = 0; 882 883 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 884 if ((error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid))) 885 break; 886 } 887 SRPL_LEAVE(&sr); 888 889 return (error); 890 } 891 892 int 893 rtable_walk(unsigned int rtableid, sa_family_t af, 894 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 895 { 896 struct art_root *ar; 897 struct rtable_walk_cookie rwc; 898 int error; 899 900 ar = rtable_get(rtableid, af); 901 if (ar == NULL) 902 return (EAFNOSUPPORT); 903 904 rwc.rwc_func = func; 905 rwc.rwc_arg = arg; 906 rwc.rwc_rid = rtableid; 907 908 while ((error = art_walk(ar, rtable_walk_helper, &rwc)) == EAGAIN) 909 continue; 910 911 return (error); 912 } 913 914 struct rtentry * 915 rtable_iterate(struct rtentry *rt0) 916 { 917 #ifndef SMALL_KERNEL 918 struct rtentry *rt; 919 920 KERNEL_ASSERT_LOCKED(); 921 922 rt = SRPL_NEXT_LOCKED(rt0, rt_next); 923 if (rt != NULL) 924 rtref(rt); 925 rtfree(rt0); 926 927 return (rt); 928 #else 929 rtfree(rt0); 930 return (NULL); 931 #endif /* SMALL_KERNEL */ 932 } 933 934 #ifndef SMALL_KERNEL 935 int 936 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 937 { 938 return (1); 939 } 940 941 int 942 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 943 struct sockaddr *mask, uint8_t prio, struct rtentry *rt) 944 { 945 struct art_root *ar; 946 struct art_node *an; 947 struct srp_ref sr; 948 uint8_t *addr; 949 int plen; 950 int error = 0; 951 952 ar = rtable_get(rtableid, dst->sa_family); 953 if (ar == NULL) 954 return (EAFNOSUPPORT); 955 956 addr = satoaddr(ar, dst); 957 plen = rtable_satoplen(dst->sa_family, mask); 958 959 rw_enter_write(&ar->ar_lock); 960 an = art_lookup(ar, addr, plen, &sr); 961 srp_leave(&sr); /* an can't go away while we have the lock */ 962 963 /* Make sure we've got a perfect match. */ 964 if (an == NULL || an->an_plen != plen || 965 memcmp(an->an_dst, dst, dst->sa_len)) 966 error = ESRCH; 967 else { 968 rtref(rt); /* keep rt alive in between remove and insert */ 969 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, 970 rt, rtentry, rt_next); 971 rt->rt_priority = prio; 972 rtable_mpath_insert(an, rt); 973 rtfree(rt); 974 } 975 rw_exit_write(&ar->ar_lock); 976 977 return (error); 978 } 979 980 void 981 rtable_mpath_insert(struct art_node *an, struct rtentry *rt) 982 { 983 struct rtentry *mrt, *prt = NULL; 984 uint8_t prio = rt->rt_priority; 985 986 if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) != NULL) { 987 /* 988 * Select the order of the MPATH routes. 989 */ 990 while (SRPL_NEXT_LOCKED(mrt, rt_next) != NULL) { 991 if (mrt->rt_priority > prio) 992 break; 993 prt = mrt; 994 mrt = SRPL_NEXT_LOCKED(mrt, rt_next); 995 } 996 997 if (mrt->rt_priority > prio) { 998 /* 999 * ``rt'' has a higher (smaller) priority than 1000 * ``mrt'' so put it before in the list. 1001 */ 1002 if (prt != NULL) { 1003 SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, 1004 rt_next); 1005 } else { 1006 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, 1007 rt, rt_next); 1008 } 1009 } else { 1010 SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); 1011 } 1012 } else { 1013 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 1014 } 1015 } 1016 #endif /* SMALL_KERNEL */ 1017 1018 void 1019 rtentry_ref(void *null, void *xrt) 1020 { 1021 struct rtentry *rt = xrt; 1022 1023 rtref(rt); 1024 } 1025 1026 void 1027 rtentry_unref(void *null, void *xrt) 1028 { 1029 struct rtentry *rt = xrt; 1030 1031 rtfree(rt); 1032 } 1033 1034 /* 1035 * Return a pointer to the address (key). This is an heritage from the 1036 * BSD radix tree needed to skip the non-address fields from the flavor 1037 * of "struct sockaddr" used by this routing table. 1038 */ 1039 static inline uint8_t * 1040 satoaddr(struct art_root *at, struct sockaddr *sa) 1041 { 1042 return (((uint8_t *)sa) + at->ar_off); 1043 } 1044 #endif /* ART */ 1045 1046 /* 1047 * Return the prefix length of a mask. 1048 */ 1049 int 1050 rtable_satoplen(sa_family_t af, struct sockaddr *mask) 1051 { 1052 struct domain *dp; 1053 uint8_t *ap, *ep; 1054 int mlen, plen = 0; 1055 int i; 1056 1057 for (i = 0; (dp = domains[i]) != NULL; i++) { 1058 if (dp->dom_rtoffset == 0) 1059 continue; 1060 1061 if (af == dp->dom_family) 1062 break; 1063 } 1064 if (dp == NULL) 1065 return (-1); 1066 1067 /* Host route */ 1068 if (mask == NULL) 1069 return (dp->dom_maxplen); 1070 1071 mlen = mask->sa_len; 1072 1073 /* Default route */ 1074 if (mlen == 0) 1075 return (0); 1076 1077 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset; 1078 ep = (uint8_t *)((uint8_t *)mask) + mlen; 1079 if (ap > ep) 1080 return (-1); 1081 1082 if (ap == ep) 1083 return (0); 1084 1085 /* "Beauty" adapted from sbin/route/show.c ... */ 1086 while (ap < ep) { 1087 switch (*ap) { 1088 case 0xff: 1089 plen += 8; 1090 ap++; 1091 break; 1092 case 0xfe: 1093 plen += 7; 1094 ap++; 1095 goto out; 1096 case 0xfc: 1097 plen += 6; 1098 ap++; 1099 goto out; 1100 case 0xf8: 1101 plen += 5; 1102 ap++; 1103 goto out; 1104 case 0xf0: 1105 plen += 4; 1106 ap++; 1107 goto out; 1108 case 0xe0: 1109 plen += 3; 1110 ap++; 1111 goto out; 1112 case 0xc0: 1113 plen += 2; 1114 ap++; 1115 goto out; 1116 case 0x80: 1117 plen += 1; 1118 ap++; 1119 goto out; 1120 case 0x00: 1121 goto out; 1122 default: 1123 /* Non contiguous mask. */ 1124 return (-1); 1125 } 1126 1127 } 1128 1129 out: 1130 #ifdef DIAGNOSTIC 1131 for (; ap < ep; ap++) { 1132 if (*ap != 0x00) 1133 return (-1); 1134 } 1135 #endif /* DIAGNOSTIC */ 1136 1137 return (plen); 1138 } 1139