1 /* $OpenBSD: rtable.c,v 1.50 2016/07/19 10:51:44 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2014-2015 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #ifndef _KERNEL 20 #include "kern_compat.h" 21 #else 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/socket.h> 25 #include <sys/malloc.h> 26 #include <sys/queue.h> 27 #include <sys/domain.h> 28 #include <sys/srp.h> 29 #endif 30 31 #include <net/rtable.h> 32 #include <net/route.h> 33 34 /* 35 * Structures used by rtable_get() to retrieve the corresponding 36 * routing table for a given pair of ``af'' and ``rtableid''. 37 * 38 * Note that once allocated routing table heads are never freed. 39 * This way we do not need to reference count them. 40 * 41 * afmap rtmap/dommp 42 * ----------- --------- ----- 43 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index) 44 * ----------- --------- ----- to rdomain (=value). 45 * | AF_INET |. 46 * ----------- `. .---------. .---------. 47 * ... `----> | rtable0 | ... | rtableN | Array of pointers for 48 * ----------- '---------' '---------' IPv4 routing tables 49 * | AF_MPLS | indexed by ``rtableid''. 50 * ----------- 51 */ 52 struct srp *afmap; 53 uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */ 54 uint8_t af2idx_max; 55 56 /* Array of routing table pointers. */ 57 struct rtmap { 58 unsigned int limit; 59 void **tbl; 60 }; 61 62 /* Array of rtableid -> rdomain mapping. */ 63 struct dommp { 64 unsigned int limit; 65 unsigned int *dom; 66 }; 67 68 unsigned int rtmap_limit = 0; 69 70 void rtmap_init(void); 71 void rtmap_grow(unsigned int, sa_family_t); 72 void rtmap_dtor(void *, void *); 73 74 struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); 75 76 void rtable_init_backend(unsigned int); 77 void *rtable_alloc(unsigned int, unsigned int, unsigned int); 78 void *rtable_get(unsigned int, sa_family_t); 79 80 void 81 rtmap_init(void) 82 { 83 struct domain *dp; 84 int i; 85 86 /* Start with a single table for every domain that requires it. */ 87 for (i = 0; (dp = domains[i]) != NULL; i++) { 88 if (dp->dom_rtoffset == 0) 89 continue; 90 91 rtmap_grow(1, dp->dom_family); 92 } 93 94 /* Initialize the rtableid->rdomain mapping table. */ 95 rtmap_grow(1, 0); 96 97 rtmap_limit = 1; 98 } 99 100 /* 101 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''. 102 */ 103 void 104 rtmap_grow(unsigned int nlimit, sa_family_t af) 105 { 106 struct rtmap *map, *nmap; 107 int i; 108 109 KERNEL_ASSERT_LOCKED(); 110 111 KASSERT(nlimit > rtmap_limit); 112 113 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK); 114 nmap->limit = nlimit; 115 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE, 116 M_WAITOK|M_ZERO); 117 118 map = srp_get_locked(&afmap[af2idx[af]]); 119 if (map != NULL) { 120 KASSERT(map->limit == rtmap_limit); 121 122 for (i = 0; i < map->limit; i++) 123 nmap->tbl[i] = map->tbl[i]; 124 } 125 126 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap); 127 } 128 129 void 130 rtmap_dtor(void *null, void *xmap) 131 { 132 struct rtmap *map = xmap; 133 134 /* 135 * doesnt need to be serialized since this is the last reference 136 * to this map. there's nothing to race against. 137 */ 138 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl)); 139 free(map, M_RTABLE, sizeof(*map)); 140 } 141 142 void 143 rtable_init(void) 144 { 145 struct domain *dp; 146 unsigned int keylen = 0; 147 int i; 148 149 /* We use index 0 for the rtable/rdomain map. */ 150 af2idx_max = 1; 151 memset(af2idx, 0, sizeof(af2idx)); 152 153 /* 154 * Compute the maximum supported key length in case the routing 155 * table backend needs it. 156 */ 157 for (i = 0; (dp = domains[i]) != NULL; i++) { 158 if (dp->dom_rtoffset == 0) 159 continue; 160 161 af2idx[dp->dom_family] = af2idx_max++; 162 if (dp->dom_rtkeylen > keylen) 163 keylen = dp->dom_rtkeylen; 164 165 } 166 rtable_init_backend(keylen); 167 168 /* 169 * Allocate AF-to-id table now that we now how many AFs this 170 * kernel supports. 171 */ 172 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE, 173 M_WAITOK|M_ZERO); 174 175 rtmap_init(); 176 } 177 178 int 179 rtable_add(unsigned int id) 180 { 181 struct domain *dp; 182 void *tbl; 183 struct rtmap *map; 184 struct dommp *dmm; 185 sa_family_t af; 186 unsigned int off, alen; 187 int i; 188 189 KERNEL_ASSERT_LOCKED(); 190 191 if (id > RT_TABLEID_MAX) 192 return (EINVAL); 193 194 if (rtable_exists(id)) 195 return (EEXIST); 196 197 for (i = 0; (dp = domains[i]) != NULL; i++) { 198 if (dp->dom_rtoffset == 0) 199 continue; 200 201 af = dp->dom_family; 202 off = dp->dom_rtoffset; 203 alen = dp->dom_maxplen; 204 205 if (id >= rtmap_limit) 206 rtmap_grow(id + 1, af); 207 208 tbl = rtable_alloc(id, alen, off); 209 if (tbl == NULL) 210 return (ENOMEM); 211 212 map = srp_get_locked(&afmap[af2idx[af]]); 213 map->tbl[id] = tbl; 214 } 215 216 /* Reflect possible growth. */ 217 if (id >= rtmap_limit) { 218 rtmap_grow(id + 1, 0); 219 rtmap_limit = id + 1; 220 } 221 222 /* Use main rtable/rdomain by default. */ 223 dmm = srp_get_locked(&afmap[0]); 224 dmm->dom[id] = 0; 225 226 return (0); 227 } 228 229 void * 230 rtable_get(unsigned int rtableid, sa_family_t af) 231 { 232 struct rtmap *map; 233 void *tbl = NULL; 234 struct srp_ref sr; 235 236 if (af >= nitems(af2idx) || af2idx[af] == 0) 237 return (NULL); 238 239 map = srp_enter(&sr, &afmap[af2idx[af]]); 240 if (rtableid < map->limit) 241 tbl = map->tbl[rtableid]; 242 srp_leave(&sr); 243 244 return (tbl); 245 } 246 247 int 248 rtable_exists(unsigned int rtableid) 249 { 250 struct domain *dp; 251 void *tbl; 252 int i; 253 254 for (i = 0; (dp = domains[i]) != NULL; i++) { 255 if (dp->dom_rtoffset == 0) 256 continue; 257 258 tbl = rtable_get(rtableid, dp->dom_family); 259 if (tbl != NULL) 260 return (1); 261 } 262 263 return (0); 264 } 265 266 unsigned int 267 rtable_l2(unsigned int rtableid) 268 { 269 struct dommp *dmm; 270 unsigned int rdomain = 0; 271 struct srp_ref sr; 272 273 dmm = srp_enter(&sr, &afmap[0]); 274 if (rtableid < dmm->limit) 275 rdomain = dmm->dom[rtableid]; 276 srp_leave(&sr); 277 278 return (rdomain); 279 } 280 281 void 282 rtable_l2set(unsigned int rtableid, unsigned int rdomain) 283 { 284 struct dommp *dmm; 285 286 KERNEL_ASSERT_LOCKED(); 287 288 if (!rtable_exists(rtableid) || !rtable_exists(rdomain)) 289 return; 290 291 dmm = srp_get_locked(&afmap[0]); 292 dmm->dom[rtableid] = rdomain; 293 } 294 295 #ifndef ART 296 void 297 rtable_init_backend(unsigned int keylen) 298 { 299 rn_init(keylen); /* initialize all zeroes, all ones, mask table */ 300 } 301 302 void * 303 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 304 { 305 struct radix_node_head *rnh = NULL; 306 307 if (rn_inithead((void **)&rnh, off)) { 308 #ifndef SMALL_KERNEL 309 rnh->rnh_multipath = 1; 310 #endif /* SMALL_KERNEL */ 311 rnh->rnh_rtableid = rtableid; 312 } 313 314 return (rnh); 315 } 316 317 struct rtentry * 318 rtable_lookup(unsigned int rtableid, struct sockaddr *dst, 319 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) 320 { 321 struct radix_node_head *rnh; 322 struct radix_node *rn; 323 struct rtentry *rt; 324 325 rnh = rtable_get(rtableid, dst->sa_family); 326 if (rnh == NULL) 327 return (NULL); 328 329 rn = rn_lookup(dst, mask, rnh); 330 if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) 331 return (NULL); 332 333 rt = ((struct rtentry *)rn); 334 335 #ifndef SMALL_KERNEL 336 if (rnh->rnh_multipath) { 337 rt = rt_mpath_matchgate(rt, gateway, prio); 338 if (rt == NULL) 339 return (NULL); 340 } 341 #endif /* !SMALL_KERNEL */ 342 343 rtref(rt); 344 return (rt); 345 } 346 347 struct rtentry * 348 rtable_match(unsigned int rtableid, struct sockaddr *dst, uint32_t *src) 349 { 350 struct radix_node_head *rnh; 351 struct radix_node *rn; 352 struct rtentry *rt = NULL; 353 #ifndef SMALL_KERNEL 354 int hash; 355 #endif /* SMALL_KERNEL */ 356 357 rnh = rtable_get(rtableid, dst->sa_family); 358 if (rnh == NULL) 359 return (NULL); 360 361 KERNEL_LOCK(); 362 rn = rn_match(dst, rnh); 363 if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) 364 goto out; 365 366 rt = ((struct rtentry *)rn); 367 rtref(rt); 368 369 #ifndef SMALL_KERNEL 370 /* Gateway selection by Hash-Threshold (RFC 2992) */ 371 if ((hash = rt_hash(rt, dst, src)) != -1) { 372 struct rtentry *mrt = rt; 373 int threshold, npaths = 1; 374 375 KASSERT(hash <= 0xffff); 376 377 while ((mrt = rtable_mpath_next(mrt)) != NULL) 378 npaths++; 379 380 threshold = (0xffff / npaths) + 1; 381 382 mrt = rt; 383 while (hash > threshold && mrt != NULL) { 384 /* stay within the multipath routes */ 385 mrt = rtable_mpath_next(mrt); 386 hash -= threshold; 387 } 388 389 /* if gw selection fails, use the first match (default) */ 390 if (mrt != NULL) { 391 rtref(mrt); 392 rtfree(rt); 393 rt = mrt; 394 } 395 } 396 #endif /* SMALL_KERNEL */ 397 out: 398 KERNEL_UNLOCK(); 399 return (rt); 400 } 401 402 int 403 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 404 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio, 405 struct rtentry *rt) 406 { 407 struct radix_node_head *rnh; 408 struct radix_node *rn = (struct radix_node *)rt; 409 410 rnh = rtable_get(rtableid, dst->sa_family); 411 if (rnh == NULL) 412 return (EAFNOSUPPORT); 413 414 #ifndef SMALL_KERNEL 415 if (rnh->rnh_multipath) { 416 /* Do not permit exactly the same dst/mask/gw pair. */ 417 if (rt_mpath_conflict(rnh, dst, mask, gateway, prio, 418 ISSET(rt->rt_flags, RTF_MPATH))) { 419 return (EEXIST); 420 } 421 } 422 #endif /* SMALL_KERNEL */ 423 424 rn = rn_addroute(dst, mask, rnh, rn, prio); 425 if (rn == NULL) 426 return (ESRCH); 427 428 rt = ((struct rtentry *)rn); 429 rtref(rt); 430 431 return (0); 432 } 433 434 int 435 rtable_delete(unsigned int rtableid, struct sockaddr *dst, 436 struct sockaddr *mask, struct rtentry *rt) 437 { 438 struct radix_node_head *rnh; 439 struct radix_node *rn = (struct radix_node *)rt; 440 441 rnh = rtable_get(rtableid, dst->sa_family); 442 if (rnh == NULL) 443 return (EAFNOSUPPORT); 444 445 rn = rn_delete(dst, mask, rnh, rn); 446 if (rn == NULL) 447 return (ESRCH); 448 449 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 450 panic("active node flags=%x", rn->rn_flags); 451 452 rt = ((struct rtentry *)rn); 453 rtfree(rt); 454 455 return (0); 456 } 457 458 int 459 rtable_walk(unsigned int rtableid, sa_family_t af, 460 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 461 { 462 struct radix_node_head *rnh; 463 int (*f)(struct radix_node *, void *, unsigned int) = (void *)func; 464 int error; 465 466 rnh = rtable_get(rtableid, af); 467 if (rnh == NULL) 468 return (EAFNOSUPPORT); 469 470 while ((error = rn_walktree(rnh, f, arg)) == EAGAIN) 471 continue; 472 473 return (error); 474 } 475 476 #ifndef SMALL_KERNEL 477 int 478 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 479 { 480 struct radix_node_head *rnh; 481 int mpath; 482 483 rnh = rtable_get(rtableid, af); 484 if (rnh == NULL) 485 return (0); 486 487 mpath = rnh->rnh_multipath; 488 return (mpath); 489 } 490 491 int 492 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 493 struct sockaddr *mask, uint8_t prio, struct rtentry *rt) 494 { 495 struct radix_node *rn = (struct radix_node *)rt; 496 497 rn_mpath_reprio(rn, prio); 498 499 return (0); 500 } 501 502 struct rtentry * 503 rtable_mpath_next(struct rtentry *rt) 504 { 505 struct radix_node *rn = (struct radix_node *)rt; 506 507 return ((struct rtentry *)rn_mpath_next(rn, RMP_MODE_ACTIVE)); 508 } 509 #endif /* SMALL_KERNEL */ 510 511 #else /* ART */ 512 513 static inline uint8_t *satoaddr(struct art_root *, struct sockaddr *); 514 515 void rtentry_ref(void *, void *); 516 void rtentry_unref(void *, void *); 517 518 struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); 519 520 void 521 rtable_init_backend(unsigned int keylen) 522 { 523 art_init(); 524 } 525 526 void * 527 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 528 { 529 return (art_alloc(rtableid, alen, off)); 530 } 531 532 struct rtentry * 533 rtable_lookup(unsigned int rtableid, struct sockaddr *dst, 534 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) 535 { 536 struct art_root *ar; 537 struct art_node *an; 538 struct rtentry *rt = NULL; 539 struct srp_ref sr, nsr; 540 uint8_t *addr; 541 int plen; 542 543 ar = rtable_get(rtableid, dst->sa_family); 544 if (ar == NULL) 545 return (NULL); 546 547 addr = satoaddr(ar, dst); 548 549 /* No need for a perfect match. */ 550 if (mask == NULL) { 551 an = art_match(ar, addr, &nsr); 552 if (an == NULL) 553 goto out; 554 } else { 555 plen = rtable_satoplen(dst->sa_family, mask); 556 if (plen == -1) 557 return (NULL); 558 559 an = art_lookup(ar, addr, plen, &nsr); 560 561 /* Make sure we've got a perfect match. */ 562 if (an == NULL || an->an_plen != plen || 563 memcmp(an->an_dst, dst, dst->sa_len)) 564 goto out; 565 } 566 567 #ifdef SMALL_KERNEL 568 rt = SRPL_ENTER(&sr, &an->an_rtlist); 569 #else 570 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 571 if (prio != RTP_ANY && 572 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 573 continue; 574 575 if (gateway == NULL) 576 break; 577 578 if (rt->rt_gateway->sa_len == gateway->sa_len && 579 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) 580 break; 581 } 582 #endif /* SMALL_KERNEL */ 583 if (rt != NULL) 584 rtref(rt); 585 586 SRPL_LEAVE(&sr); 587 out: 588 srp_leave(&nsr); 589 590 return (rt); 591 } 592 593 struct rtentry * 594 rtable_match(unsigned int rtableid, struct sockaddr *dst, uint32_t *src) 595 { 596 struct art_root *ar; 597 struct art_node *an; 598 struct rtentry *rt = NULL; 599 struct srp_ref sr, nsr; 600 uint8_t *addr; 601 #ifndef SMALL_KERNEL 602 int hash; 603 #endif /* SMALL_KERNEL */ 604 605 ar = rtable_get(rtableid, dst->sa_family); 606 if (ar == NULL) 607 return (NULL); 608 609 addr = satoaddr(ar, dst); 610 611 an = art_match(ar, addr, &nsr); 612 if (an == NULL) 613 goto out; 614 615 rt = SRPL_ENTER(&sr, &an->an_rtlist); 616 rtref(rt); 617 SRPL_LEAVE(&sr); 618 619 #ifndef SMALL_KERNEL 620 /* Gateway selection by Hash-Threshold (RFC 2992) */ 621 if ((hash = rt_hash(rt, dst, src)) != -1) { 622 struct rtentry *mrt; 623 int threshold, npaths = 0; 624 625 KASSERT(hash <= 0xffff); 626 627 SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { 628 /* Only count nexthops with the same priority. */ 629 if (mrt->rt_priority == rt->rt_priority) 630 npaths++; 631 } 632 SRPL_LEAVE(&sr); 633 634 threshold = (0xffff / npaths) + 1; 635 636 /* 637 * we have no protection against concurrent modification of the 638 * route list attached to the node, so we won't necessarily 639 * have the same number of routes. for most modifications, 640 * we'll pick a route that we wouldn't have if we only saw the 641 * list before or after the change. if we were going to use 642 * the last available route, but it got removed, we'll hit 643 * the end of the list and then pick the first route. 644 */ 645 646 mrt = SRPL_ENTER(&sr, &an->an_rtlist); 647 while (hash > threshold && mrt != NULL) { 648 if (mrt->rt_priority == rt->rt_priority) 649 hash -= threshold; 650 mrt = SRPL_NEXT(&sr, mrt, rt_next); 651 } 652 653 if (mrt != NULL) { 654 rtref(mrt); 655 rtfree(rt); 656 rt = mrt; 657 } 658 SRPL_LEAVE(&sr); 659 } 660 #endif /* SMALL_KERNEL */ 661 out: 662 srp_leave(&nsr); 663 return (rt); 664 } 665 666 int 667 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 668 struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio, 669 struct rtentry *rt) 670 { 671 #ifndef SMALL_KERNEL 672 struct rtentry *mrt; 673 struct srp_ref sr; 674 #endif /* SMALL_KERNEL */ 675 struct art_root *ar; 676 struct art_node *an, *prev; 677 uint8_t *addr; 678 int plen; 679 unsigned int rt_flags; 680 int error = 0; 681 682 KERNEL_ASSERT_LOCKED(); 683 684 ar = rtable_get(rtableid, dst->sa_family); 685 if (ar == NULL) 686 return (EAFNOSUPPORT); 687 688 addr = satoaddr(ar, dst); 689 plen = rtable_satoplen(dst->sa_family, mask); 690 if (plen == -1) 691 return (EINVAL); 692 693 rtref(rt); /* guarantee rtfree won't do anything during insert */ 694 695 #ifndef SMALL_KERNEL 696 /* Do not permit exactly the same dst/mask/gw pair. */ 697 an = art_lookup(ar, addr, plen, &sr); 698 srp_leave(&sr); /* an can't go away while we have the lock */ 699 if (an != NULL && an->an_plen == plen && 700 !memcmp(an->an_dst, dst, dst->sa_len)) { 701 struct rtentry *mrt; 702 int mpathok = ISSET(rt->rt_flags, RTF_MPATH); 703 704 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 705 if (prio != RTP_ANY && 706 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 707 continue; 708 709 if (!mpathok || 710 (mrt->rt_gateway->sa_len == gateway->sa_len && 711 !memcmp(mrt->rt_gateway, gateway, gateway->sa_len))){ 712 error = EEXIST; 713 goto leave; 714 } 715 } 716 } 717 #endif /* SMALL_KERNEL */ 718 719 an = art_get(dst, plen); 720 if (an == NULL) { 721 error = ENOBUFS; 722 goto leave; 723 } 724 725 /* prepare for immediate operation if insert succeeds */ 726 rt_flags = rt->rt_flags; 727 rt->rt_flags &= ~RTF_MPATH; 728 rt->rt_dest = dst; 729 rt->rt_plen = plen; 730 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 731 732 prev = art_insert(ar, an, addr, plen); 733 if (prev != an) { 734 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 735 rt_next); 736 rt->rt_flags = rt_flags; 737 art_put(an); 738 739 if (prev == NULL) { 740 error = ESRCH; 741 goto leave; 742 } 743 744 #ifndef SMALL_KERNEL 745 an = prev; 746 747 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 748 KASSERT(mrt != NULL); 749 KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); 750 751 /* 752 * An ART node with the same destination/netmask already 753 * exists, MPATH conflict must have been already checked. 754 */ 755 if (rt->rt_flags & RTF_MPATH) { 756 /* 757 * Only keep the RTF_MPATH flag if two routes have 758 * the same gateway. 759 */ 760 rt->rt_flags &= ~RTF_MPATH; 761 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 762 if (mrt->rt_priority == prio) { 763 mrt->rt_flags |= RTF_MPATH; 764 rt->rt_flags |= RTF_MPATH; 765 } 766 } 767 } 768 769 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 770 771 /* Put newly inserted entry at the right place. */ 772 rtable_mpath_reprio(rtableid, dst, mask, rt->rt_priority, rt); 773 #else 774 error = EEXIST; 775 #endif /* SMALL_KERNEL */ 776 } 777 leave: 778 rtfree(rt); 779 return (error); 780 } 781 782 int 783 rtable_delete(unsigned int rtableid, struct sockaddr *dst, 784 struct sockaddr *mask, struct rtentry *rt) 785 { 786 struct art_root *ar; 787 struct art_node *an; 788 struct srp_ref sr; 789 uint8_t *addr; 790 int plen; 791 #ifndef SMALL_KERNEL 792 struct rtentry *mrt; 793 int npaths = 0; 794 #endif /* SMALL_KERNEL */ 795 796 ar = rtable_get(rtableid, dst->sa_family); 797 if (ar == NULL) 798 return (EAFNOSUPPORT); 799 800 addr = satoaddr(ar, dst); 801 plen = rtable_satoplen(dst->sa_family, mask); 802 803 KERNEL_ASSERT_LOCKED(); 804 805 an = art_lookup(ar, addr, plen, &sr); 806 srp_leave(&sr); /* an can't go away while we have the lock */ 807 808 /* Make sure we've got a perfect match. */ 809 if (an == NULL || an->an_plen != plen || 810 memcmp(an->an_dst, dst, dst->sa_len)) 811 return (ESRCH); 812 813 #ifndef SMALL_KERNEL 814 /* 815 * If other multipath route entries are still attached to 816 * this ART node we only have to unlink it. 817 */ 818 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) 819 npaths++; 820 821 if (npaths > 1) { 822 KASSERT(rt->rt_refcnt >= 1); 823 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 824 rt_next); 825 826 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 827 an->an_dst = mrt->rt_dest; 828 if (npaths == 2) 829 mrt->rt_flags &= ~RTF_MPATH; 830 return (0); 831 } 832 #endif /* SMALL_KERNEL */ 833 834 if (art_delete(ar, an, addr, plen) == NULL) 835 return (ESRCH); 836 837 KASSERT(rt->rt_refcnt >= 1); 838 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); 839 840 art_put(an); 841 return (0); 842 } 843 844 struct rtable_walk_cookie { 845 int (*rwc_func)(struct rtentry *, void *, unsigned int); 846 void *rwc_arg; 847 unsigned int rwc_rid; 848 }; 849 850 /* 851 * Helper for rtable_walk to keep the ART code free from any "struct rtentry". 852 */ 853 int 854 rtable_walk_helper(struct art_node *an, void *xrwc) 855 { 856 struct srp_ref sr; 857 struct rtable_walk_cookie *rwc = xrwc; 858 struct rtentry *rt; 859 int error = 0; 860 861 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 862 if ((error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid))) 863 break; 864 } 865 SRPL_LEAVE(&sr); 866 867 return (error); 868 } 869 870 int 871 rtable_walk(unsigned int rtableid, sa_family_t af, 872 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 873 { 874 struct art_root *ar; 875 struct rtable_walk_cookie rwc; 876 int error; 877 878 ar = rtable_get(rtableid, af); 879 if (ar == NULL) 880 return (EAFNOSUPPORT); 881 882 rwc.rwc_func = func; 883 rwc.rwc_arg = arg; 884 rwc.rwc_rid = rtableid; 885 886 while ((error = art_walk(ar, rtable_walk_helper, &rwc)) == EAGAIN) 887 continue; 888 889 return (error); 890 } 891 892 #ifndef SMALL_KERNEL 893 int 894 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 895 { 896 return (1); 897 } 898 899 int 900 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 901 struct sockaddr *mask, uint8_t prio, struct rtentry *rt) 902 { 903 struct art_root *ar; 904 struct art_node *an; 905 struct srp_ref sr; 906 uint8_t *addr; 907 int plen; 908 struct rtentry *mrt, *prt = NULL; 909 910 ar = rtable_get(rtableid, dst->sa_family); 911 if (ar == NULL) 912 return (EAFNOSUPPORT); 913 914 addr = satoaddr(ar, dst); 915 plen = rtable_satoplen(dst->sa_family, mask); 916 917 KERNEL_ASSERT_LOCKED(); 918 919 an = art_lookup(ar, addr, plen, &sr); 920 srp_leave(&sr); /* an can't go away while we have the lock */ 921 922 /* Make sure we've got a perfect match. */ 923 if (an == NULL || an->an_plen != plen || 924 memcmp(an->an_dst, dst, dst->sa_len)) 925 return (ESRCH); 926 927 rtref(rt); /* keep rt alive in between remove and add */ 928 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); 929 rt->rt_priority = prio; 930 931 if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) != NULL) { 932 /* 933 * Select the order of the MPATH routes. 934 */ 935 while (SRPL_NEXT_LOCKED(mrt, rt_next) != NULL) { 936 if (mrt->rt_priority > prio) 937 break; 938 prt = mrt; 939 mrt = SRPL_NEXT_LOCKED(mrt, rt_next); 940 } 941 942 if (mrt->rt_priority > prio) { 943 /* 944 * ``rt'' has a higher (smaller) priority than 945 * ``mrt'' so put it before in the list. 946 */ 947 if (prt != NULL) { 948 SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, 949 rt_next); 950 } else { 951 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, 952 rt, rt_next); 953 } 954 } else { 955 SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); 956 } 957 } else { 958 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 959 } 960 rtfree(rt); 961 962 return (0); 963 } 964 965 struct rtentry * 966 rtable_mpath_next(struct rtentry *rt) 967 { 968 KERNEL_ASSERT_LOCKED(); 969 return (SRPL_NEXT_LOCKED(rt, rt_next)); 970 } 971 #endif /* SMALL_KERNEL */ 972 973 void 974 rtentry_ref(void *null, void *xrt) 975 { 976 struct rtentry *rt = xrt; 977 978 rtref(rt); 979 } 980 981 void 982 rtentry_unref(void *null, void *xrt) 983 { 984 struct rtentry *rt = xrt; 985 986 rtfree(rt); 987 } 988 989 /* 990 * Return a pointer to the address (key). This is an heritage from the 991 * BSD radix tree needed to skip the non-address fields from the flavor 992 * of "struct sockaddr" used by this routing table. 993 */ 994 static inline uint8_t * 995 satoaddr(struct art_root *at, struct sockaddr *sa) 996 { 997 return (((uint8_t *)sa) + at->ar_off); 998 } 999 #endif /* ART */ 1000 1001 /* 1002 * Return the prefix length of a mask. 1003 */ 1004 int 1005 rtable_satoplen(sa_family_t af, struct sockaddr *mask) 1006 { 1007 struct domain *dp; 1008 uint8_t *ap, *ep; 1009 int mlen, plen = 0; 1010 int i; 1011 1012 for (i = 0; (dp = domains[i]) != NULL; i++) { 1013 if (dp->dom_rtoffset == 0) 1014 continue; 1015 1016 if (af == dp->dom_family) 1017 break; 1018 } 1019 if (dp == NULL) 1020 return (-1); 1021 1022 /* Host route */ 1023 if (mask == NULL) 1024 return (dp->dom_maxplen); 1025 1026 mlen = mask->sa_len; 1027 1028 /* Default route */ 1029 if (mlen == 0) 1030 return (0); 1031 1032 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset; 1033 ep = (uint8_t *)((uint8_t *)mask) + mlen; 1034 if (ap > ep) 1035 return (-1); 1036 1037 if (ap == ep) 1038 return (0); 1039 1040 /* "Beauty" adapted from sbin/route/show.c ... */ 1041 while (ap < ep) { 1042 switch (*ap) { 1043 case 0xff: 1044 plen += 8; 1045 ap++; 1046 break; 1047 case 0xfe: 1048 plen += 7; 1049 ap++; 1050 goto out; 1051 case 0xfc: 1052 plen += 6; 1053 ap++; 1054 goto out; 1055 case 0xf8: 1056 plen += 5; 1057 ap++; 1058 goto out; 1059 case 0xf0: 1060 plen += 4; 1061 ap++; 1062 goto out; 1063 case 0xe0: 1064 plen += 3; 1065 ap++; 1066 goto out; 1067 case 0xc0: 1068 plen += 2; 1069 ap++; 1070 goto out; 1071 case 0x80: 1072 plen += 1; 1073 ap++; 1074 goto out; 1075 case 0x00: 1076 goto out; 1077 default: 1078 /* Non contiguous mask. */ 1079 return (-1); 1080 } 1081 1082 } 1083 1084 out: 1085 #ifdef DIAGNOSTIC 1086 for (; ap < ep; ap++) { 1087 if (*ap != 0x00) 1088 return (-1); 1089 } 1090 #endif /* DIAGNOSTIC */ 1091 1092 return (plen); 1093 } 1094