1 /* $OpenBSD: rde_rib.c,v 1.97 2008/11/21 17:41:22 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/hash.h> 22 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include "bgpd.h" 27 #include "rde.h" 28 29 /* 30 * BGP RIB -- Routing Information Base 31 * 32 * The RIB is build with one aspect in mind. Speed -- actually update speed. 33 * Therefore one thing needs to be absolutely avoided, long table walks. 34 * This is achieved by heavily linking the different parts together. 35 */ 36 37 /* used to bump correct prefix counters */ 38 #define PREFIX_COUNT(x, f, op) \ 39 do { \ 40 if (f & F_LOCAL) \ 41 (x)->prefix_cnt += (op); \ 42 if (f & F_ORIGINAL) \ 43 (x)->adjrib_cnt += (op); \ 44 } while (0) 45 46 /* path specific functions */ 47 48 static void path_link(struct rde_aspath *, struct rde_peer *); 49 50 struct path_table pathtable; 51 52 /* XXX the hash should also include communities and the other attrs */ 53 #define PATH_HASH(x) \ 54 &pathtable.path_hashtbl[hash32_buf((x)->data, (x)->len, HASHINIT) & \ 55 pathtable.path_hashmask] 56 57 void 58 path_init(u_int32_t hashsize) 59 { 60 u_int32_t hs, i; 61 62 for (hs = 1; hs < hashsize; hs <<= 1) 63 ; 64 pathtable.path_hashtbl = calloc(hs, sizeof(struct aspath_head)); 65 if (pathtable.path_hashtbl == NULL) 66 fatal("path_init"); 67 68 for (i = 0; i < hs; i++) 69 LIST_INIT(&pathtable.path_hashtbl[i]); 70 71 pathtable.path_hashmask = hs - 1; 72 } 73 74 void 75 path_shutdown(void) 76 { 77 u_int32_t i; 78 79 for (i = 0; i <= pathtable.path_hashmask; i++) 80 if (!LIST_EMPTY(&pathtable.path_hashtbl[i])) 81 log_warnx("path_free: free non-free table"); 82 83 free(pathtable.path_hashtbl); 84 } 85 86 void 87 path_update(struct rde_peer *peer, struct rde_aspath *nasp, 88 struct bgpd_addr *prefix, int prefixlen, u_int32_t flags) 89 { 90 struct rde_aspath *asp; 91 struct prefix *p, *oldp = NULL; 92 93 if (flags & F_LOCAL) { 94 rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0); 95 rde_send_pftable_commit(); 96 } 97 98 /* 99 * First try to find a prefix in the specified RIB or in the 100 * Adj-RIB-In. This works because Local-RIB has precedence over the 101 * Adj-RIB-In. In the end this saves use some additional lookups. 102 */ 103 if ((p = prefix_get(peer, prefix, prefixlen, flags | F_ORIGINAL)) != 104 NULL) { 105 do { 106 if (path_compare(nasp, p->aspath) == 0) { 107 if ((p->flags & flags) == 0) { 108 if (oldp != NULL) { 109 asp = oldp->aspath; 110 prefix_destroy(oldp); 111 if (path_empty(asp)) 112 path_destroy(asp); 113 } 114 p->flags |= flags; 115 PREFIX_COUNT(p->aspath, flags, 1); 116 PREFIX_COUNT(peer, flags, 1); 117 118 /* re-evaluate prefix */ 119 LIST_REMOVE(p, prefix_l); 120 prefix_evaluate(p, p->prefix); 121 } 122 /* update last change */ 123 p->lastchange = time(NULL); 124 return; 125 } 126 /* 127 * If the prefix is not already part of the Adj-RIB-In 128 * do a lookup in there. But keep the original prefix 129 * around so that it can be removed later. 130 */ 131 if (p->flags & F_ORIGINAL) 132 break; 133 oldp = p; 134 p = prefix_get(peer, prefix, prefixlen, F_ORIGINAL); 135 } while (p != NULL); 136 } 137 138 /* Do not try to move a prefix that is in the wrong RIB. */ 139 if (p == NULL || (p->flags & flags) == 0) 140 p = oldp; 141 142 /* 143 * Either the prefix does not exist or the path changed. 144 * In both cases lookup the new aspath to make sure it is not 145 * already in the RIB. 146 */ 147 if ((asp = path_lookup(nasp, peer)) == NULL) { 148 /* Path not available, create and link a new one. */ 149 asp = path_copy(nasp); 150 path_link(asp, peer); 151 } 152 153 /* If the prefix was found move it else add it to the aspath. */ 154 if (p != NULL) 155 prefix_move(asp, p, flags); 156 else 157 prefix_add(asp, prefix, prefixlen, flags); 158 } 159 160 int 161 path_compare(struct rde_aspath *a, struct rde_aspath *b) 162 { 163 int r; 164 165 if (a->origin > b->origin) 166 return (1); 167 if (a->origin < b->origin) 168 return (-1); 169 if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED)) 170 return (1); 171 if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED)) 172 return (-1); 173 if (a->med > b->med) 174 return (1); 175 if (a->med < b->med) 176 return (-1); 177 if (a->lpref > b->lpref) 178 return (1); 179 if (a->lpref < b->lpref) 180 return (-1); 181 if (a->weight > b->weight) 182 return (1); 183 if (a->weight < b->weight) 184 return (-1); 185 if (a->rtlabelid > b->rtlabelid) 186 return (1); 187 if (a->rtlabelid < b->rtlabelid) 188 return (-1); 189 if (a->pftableid > b->pftableid) 190 return (1); 191 if (a->pftableid < b->pftableid) 192 return (-1); 193 194 r = aspath_compare(a->aspath, b->aspath); 195 if (r == 0) 196 r = nexthop_compare(a->nexthop, b->nexthop); 197 if (r > 0) 198 return (1); 199 if (r < 0) 200 return (-1); 201 202 return (attr_compare(a, b)); 203 } 204 205 struct rde_aspath * 206 path_lookup(struct rde_aspath *aspath, struct rde_peer *peer) 207 { 208 struct aspath_head *head; 209 struct rde_aspath *asp; 210 211 head = PATH_HASH(aspath->aspath); 212 213 LIST_FOREACH(asp, head, path_l) { 214 if (peer == asp->peer && path_compare(aspath, asp) == 0) 215 return (asp); 216 } 217 return (NULL); 218 } 219 220 void 221 path_remove(struct rde_aspath *asp) 222 { 223 struct prefix *p; 224 struct bgpd_addr addr; 225 226 while ((p = LIST_FIRST(&asp->prefix_h)) != NULL) { 227 /* Commit is done in peer_down() */ 228 pt_getaddr(p->prefix, &addr); 229 if (p->flags & F_LOCAL) 230 rde_send_pftable(p->aspath->pftableid, &addr, 231 p->prefix->prefixlen, 1); 232 233 prefix_destroy(p); 234 } 235 path_destroy(asp); 236 } 237 238 /* this function is only called by prefix_remove and path_remove */ 239 void 240 path_destroy(struct rde_aspath *asp) 241 { 242 /* path_destroy can only unlink and free empty rde_aspath */ 243 if (asp->prefix_cnt != 0 || asp->active_cnt != 0 || 244 asp->adjrib_cnt != 0) 245 log_warnx("path_destroy: prefix count out of sync"); 246 247 nexthop_unlink(asp); 248 LIST_REMOVE(asp, path_l); 249 LIST_REMOVE(asp, peer_l); 250 asp->peer = NULL; 251 asp->nexthop = NULL; 252 asp->flags &= ~F_ATTR_LINKED; 253 254 path_put(asp); 255 } 256 257 int 258 path_empty(struct rde_aspath *asp) 259 { 260 return LIST_EMPTY(&asp->prefix_h); 261 } 262 263 /* 264 * the path object is linked into multiple lists for fast access. 265 * These are peer_l, path_l and nexthop_l. 266 * peer_l: list of all aspaths that belong to that peer 267 * path_l: hash list to find paths quickly 268 * nexthop_l: list of all aspaths with an equal exit nexthop 269 */ 270 static void 271 path_link(struct rde_aspath *asp, struct rde_peer *peer) 272 { 273 struct aspath_head *head; 274 275 head = PATH_HASH(asp->aspath); 276 277 LIST_INSERT_HEAD(head, asp, path_l); 278 LIST_INSERT_HEAD(&peer->path_h, asp, peer_l); 279 asp->peer = peer; 280 nexthop_link(asp); 281 asp->flags |= F_ATTR_LINKED; 282 } 283 284 /* 285 * copy asp to a new UNLINKED one mainly for filtering 286 */ 287 struct rde_aspath * 288 path_copy(struct rde_aspath *asp) 289 { 290 struct rde_aspath *nasp; 291 292 nasp = path_get(); 293 nasp->aspath = asp->aspath; 294 if (nasp->aspath != NULL) { 295 nasp->aspath->refcnt++; 296 rdemem.aspath_refs++; 297 } 298 nasp->nexthop = asp->nexthop; 299 nasp->med = asp->med; 300 nasp->lpref = asp->lpref; 301 nasp->weight = asp->weight; 302 nasp->origin = asp->origin; 303 nasp->rtlabelid = asp->rtlabelid; 304 rtlabel_ref(nasp->rtlabelid); 305 nasp->pftableid = asp->pftableid; 306 pftable_ref(nasp->pftableid); 307 308 nasp->flags = asp->flags & ~F_ATTR_LINKED; 309 attr_copy(nasp, asp); 310 311 return (nasp); 312 } 313 314 /* alloc and initialize new entry. May not fail. */ 315 struct rde_aspath * 316 path_get(void) 317 { 318 struct rde_aspath *asp; 319 320 asp = calloc(1, sizeof(*asp)); 321 if (asp == NULL) 322 fatal("path_alloc"); 323 rdemem.path_cnt++; 324 325 LIST_INIT(&asp->prefix_h); 326 asp->origin = ORIGIN_INCOMPLETE; 327 asp->lpref = DEFAULT_LPREF; 328 /* med = 0 */ 329 /* weight = 0 */ 330 /* rtlabel = 0 */ 331 332 return (asp); 333 } 334 335 /* free an unlinked element */ 336 void 337 path_put(struct rde_aspath *asp) 338 { 339 if (asp == NULL) 340 return; 341 342 if (asp->flags & F_ATTR_LINKED) 343 fatalx("path_put: linked object"); 344 345 rtlabel_unref(asp->rtlabelid); 346 pftable_unref(asp->pftableid); 347 aspath_put(asp->aspath); 348 attr_freeall(asp); 349 rdemem.path_cnt--; 350 free(asp); 351 } 352 353 /* prefix specific functions */ 354 355 static struct prefix *prefix_alloc(void); 356 static void prefix_free(struct prefix *); 357 static void prefix_link(struct prefix *, struct pt_entry *, 358 struct rde_aspath *, u_int32_t); 359 static void prefix_unlink(struct prefix *); 360 361 int 362 prefix_compare(const struct bgpd_addr *a, const struct bgpd_addr *b, 363 int prefixlen) 364 { 365 in_addr_t mask, aa, ba; 366 int i; 367 u_int8_t m; 368 369 if (a->af != b->af) 370 return (a->af - b->af); 371 372 switch (a->af) { 373 case AF_INET: 374 if (prefixlen > 32) 375 fatalx("prefix_cmp: bad IPv4 prefixlen"); 376 mask = htonl(prefixlen2mask(prefixlen)); 377 aa = ntohl(a->v4.s_addr & mask); 378 ba = ntohl(b->v4.s_addr & mask); 379 if (aa != ba) 380 return (aa - ba); 381 return (0); 382 case AF_INET6: 383 if (prefixlen > 128) 384 fatalx("prefix_cmp: bad IPv6 prefixlen"); 385 for (i = 0; i < prefixlen / 8; i++) 386 if (a->v6.s6_addr[i] != b->v6.s6_addr[i]) 387 return (a->v6.s6_addr[i] - b->v6.s6_addr[i]); 388 i = prefixlen % 8; 389 if (i) { 390 m = 0xff00 >> i; 391 if ((a->v6.s6_addr[prefixlen / 8] & m) != 392 (b->v6.s6_addr[prefixlen / 8] & m)) 393 return ((a->v6.s6_addr[prefixlen / 8] & m) - 394 (b->v6.s6_addr[prefixlen / 8] & m)); 395 } 396 return (0); 397 default: 398 fatalx("prefix_cmp: unknown af"); 399 } 400 return (-1); 401 } 402 403 /* 404 * search for specified prefix of a peer. Returns NULL if not found. 405 */ 406 struct prefix * 407 prefix_get(struct rde_peer *peer, struct bgpd_addr *prefix, int prefixlen, 408 u_int32_t flags) 409 { 410 struct pt_entry *pte; 411 412 pte = pt_get(prefix, prefixlen); 413 if (pte == NULL) 414 return (NULL); 415 return (prefix_bypeer(pte, peer, flags)); 416 } 417 418 /* 419 * Adds or updates a prefix. 420 */ 421 struct pt_entry * 422 prefix_add(struct rde_aspath *asp, struct bgpd_addr *prefix, int prefixlen, 423 u_int32_t flags) 424 425 { 426 struct prefix *p; 427 struct pt_entry *pte; 428 429 pte = pt_get(prefix, prefixlen); 430 if (pte == NULL) 431 pte = pt_add(prefix, prefixlen); 432 433 p = prefix_bypeer(pte, asp->peer, flags); 434 if (p == NULL) { 435 p = prefix_alloc(); 436 prefix_link(p, pte, asp, flags); 437 } else { 438 if (p->aspath != asp) 439 /* prefix belongs to a different aspath so move */ 440 return (prefix_move(asp, p, flags)); 441 p->lastchange = time(NULL); 442 } 443 444 return (pte); 445 } 446 447 /* 448 * Move the prefix to the specified as path, removes the old asp if needed. 449 */ 450 struct pt_entry * 451 prefix_move(struct rde_aspath *asp, struct prefix *p, u_int32_t flags) 452 { 453 struct prefix *np; 454 struct rde_aspath *oasp; 455 456 if (asp->peer != p->aspath->peer) 457 fatalx("prefix_move: cross peer move"); 458 459 /* create new prefix node */ 460 np = prefix_alloc(); 461 np->aspath = asp; 462 /* peer and prefix pointers are still equal */ 463 np->prefix = p->prefix; 464 np->lastchange = time(NULL); 465 np->flags = flags; 466 467 /* add to new as path */ 468 LIST_INSERT_HEAD(&asp->prefix_h, np, path_l); 469 PREFIX_COUNT(asp, flags, 1); 470 /* 471 * no need to update the peer prefix count because we are only moving 472 * the prefix without changing the peer. 473 */ 474 475 /* 476 * fiddle around with the flags. If the p->flags is not equal 477 * to flags the old prefix p may not be removed but instead p->flags 478 * needs to be adjusted. 479 */ 480 if (p->flags != flags) { 481 if ((p->flags & flags) == 0) 482 fatalx("prefix_move: " 483 "prefix is not part of desired RIB"); 484 485 p->flags &= ~flags; 486 PREFIX_COUNT(p->aspath, flags, -1); 487 /* as before peer count needs no update because of move */ 488 489 /* redo the route decision for p */ 490 LIST_REMOVE(p, prefix_l); 491 /* If the prefix is the active one remove it first. */ 492 if (p == p->prefix->active) 493 prefix_evaluate(NULL, p->prefix); 494 prefix_evaluate(p, p->prefix); 495 496 /* and now for np */ 497 prefix_evaluate(np, np->prefix); 498 499 return (np->prefix); 500 } 501 502 /* 503 * First kick the old prefix node out of the prefix list, 504 * afterwards run the route decision for new prefix node. 505 * Because of this only one update is generated if the prefix 506 * was active. 507 * This is save because we create a new prefix and so the change 508 * is noticed by prefix_evaluate(). 509 */ 510 LIST_REMOVE(p, prefix_l); 511 prefix_evaluate(np, np->prefix); 512 513 /* remove old prefix node */ 514 oasp = p->aspath; 515 LIST_REMOVE(p, path_l); 516 PREFIX_COUNT(oasp, flags, -1); 517 /* as before peer count needs no update because of move */ 518 519 /* destroy all references to other objects and free the old prefix */ 520 p->aspath = NULL; 521 p->prefix = NULL; 522 prefix_free(p); 523 524 /* destroy old path if empty */ 525 if (path_empty(oasp)) 526 path_destroy(oasp); 527 528 return (np->prefix); 529 } 530 531 /* 532 * Removes a prefix from all lists. If the parent objects -- path or 533 * pt_entry -- become empty remove them too. 534 */ 535 void 536 prefix_remove(struct rde_peer *peer, struct bgpd_addr *prefix, int prefixlen, 537 u_int32_t flags) 538 { 539 struct prefix *p; 540 struct pt_entry *pte; 541 struct rde_aspath *asp; 542 543 pte = pt_get(prefix, prefixlen); 544 if (pte == NULL) /* Got a dummy withdrawn request */ 545 return; 546 547 p = prefix_bypeer(pte, peer, flags); 548 if (p == NULL) /* Got a dummy withdrawn request. */ 549 return; 550 551 asp = p->aspath; 552 553 if (p->flags & F_LOCAL) { 554 /* only prefixes in the local RIB were pushed into pf */ 555 rde_send_pftable(asp->pftableid, prefix, prefixlen, 1); 556 rde_send_pftable_commit(); 557 } 558 559 /* if prefix belongs to more than one RIB just remove one instance */ 560 if (p->flags != flags) { 561 p->flags &= ~flags; 562 563 PREFIX_COUNT(p->aspath, flags, -1); 564 PREFIX_COUNT(peer, flags, -1); 565 566 /* redo the route decision for p */ 567 LIST_REMOVE(p, prefix_l); 568 /* If the prefix is the active one remove it first. */ 569 if (p == p->prefix->active) 570 prefix_evaluate(NULL, p->prefix); 571 prefix_evaluate(p, p->prefix); 572 return; 573 } 574 575 prefix_unlink(p); 576 prefix_free(p); 577 578 if (pt_empty(pte)) 579 pt_remove(pte); 580 if (path_empty(asp)) 581 path_destroy(asp); 582 } 583 584 /* dump a prefix into specified buffer */ 585 int 586 prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen) 587 { 588 int totlen; 589 590 if (prefix->af != AF_INET && prefix->af != AF_INET6) 591 return (-1); 592 593 totlen = PREFIX_SIZE(plen); 594 595 if (totlen > len) 596 return (-1); 597 *buf++ = plen; 598 memcpy(buf, &prefix->ba, totlen - 1); 599 return (totlen); 600 } 601 602 /* 603 * Searches in the prefix list of specified pt_entry for a prefix entry 604 * belonging to the peer peer. Returns NULL if no match found. 605 */ 606 struct prefix * 607 prefix_bypeer(struct pt_entry *pte, struct rde_peer *peer, u_int32_t flags) 608 { 609 struct prefix *p; 610 611 LIST_FOREACH(p, &pte->prefix_h, prefix_l) { 612 if (p->aspath->peer == peer && p->flags & flags) 613 return (p); 614 } 615 return (NULL); 616 } 617 618 void 619 prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, 620 enum nexthop_state oldstate) 621 { 622 struct prefix *p; 623 624 if (rde_noevaluate()) 625 /* if the decision process is turned off this is a no-op */ 626 return; 627 628 LIST_FOREACH(p, &asp->prefix_h, path_l) { 629 /* 630 * skip non local-RIB nodes, only local-RIB prefixes are 631 * eligible. Both F_LOCAL and F_ORIGINAL may be set. 632 */ 633 if (!(p->flags & F_LOCAL)) 634 continue; 635 636 if (oldstate == state && state == NEXTHOP_REACH) { 637 /* 638 * The state of the nexthop did not change. The only 639 * thing that may have changed is the true_nexthop 640 * or other internal infos. This will not change 641 * the routing decision so shortcut here. 642 */ 643 if (p == p->prefix->active) 644 rde_send_kroute(p, NULL); 645 continue; 646 } 647 648 /* redo the route decision */ 649 LIST_REMOVE(p, prefix_l); 650 /* 651 * If the prefix is the active one remove it first, 652 * this has to be done because we can not detect when 653 * the active prefix changes its state. In this case 654 * we know that this is a withdrawl and so the second 655 * prefix_evaluate() will generate no update because 656 * the nexthop is unreachable or ineligible. 657 */ 658 if (p == p->prefix->active) 659 prefix_evaluate(NULL, p->prefix); 660 prefix_evaluate(p, p->prefix); 661 } 662 } 663 664 /* kill a prefix. Only called by path_remove and path_update. */ 665 void 666 prefix_destroy(struct prefix *p) 667 { 668 struct pt_entry *pte; 669 670 pte = p->prefix; 671 prefix_unlink(p); 672 prefix_free(p); 673 674 if (pt_empty(pte)) 675 pt_remove(pte); 676 } 677 678 /* 679 * helper function to clean up the connected networks after a reload 680 */ 681 void 682 prefix_network_clean(struct rde_peer *peer, time_t reloadtime) 683 { 684 struct rde_aspath *asp, *xasp; 685 struct prefix *p, *xp; 686 struct pt_entry *pte; 687 688 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = xasp) { 689 xasp = LIST_NEXT(asp, peer_l); 690 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = xp) { 691 xp = LIST_NEXT(p, path_l); 692 if (reloadtime > p->lastchange) { 693 pte = p->prefix; 694 prefix_unlink(p); 695 prefix_free(p); 696 697 if (pt_empty(pte)) 698 pt_remove(pte); 699 } 700 } 701 if (path_empty(asp)) 702 path_destroy(asp); 703 } 704 } 705 706 /* 707 * Link a prefix into the different parent objects. 708 */ 709 static void 710 prefix_link(struct prefix *pref, struct pt_entry *pte, struct rde_aspath *asp, 711 u_int32_t flags) 712 { 713 LIST_INSERT_HEAD(&asp->prefix_h, pref, path_l); 714 PREFIX_COUNT(asp, flags, 1); 715 PREFIX_COUNT(asp->peer, flags, 1); 716 717 pref->aspath = asp; 718 pref->prefix = pte; 719 pref->lastchange = time(NULL); 720 pref->flags = flags; 721 722 /* make route decision */ 723 prefix_evaluate(pref, pte); 724 } 725 726 /* 727 * Unlink a prefix from the different parent objects. 728 */ 729 static void 730 prefix_unlink(struct prefix *pref) 731 { 732 /* make route decision */ 733 LIST_REMOVE(pref, prefix_l); 734 prefix_evaluate(NULL, pref->prefix); 735 736 LIST_REMOVE(pref, path_l); 737 PREFIX_COUNT(pref->aspath, pref->flags, -1); 738 PREFIX_COUNT(pref->aspath->peer, pref->flags, -1); 739 740 /* destroy all references to other objects */ 741 pref->aspath = NULL; 742 pref->prefix = NULL; 743 744 /* 745 * It's the caller's duty to remove empty aspath respectively pt_entry 746 * structures. Also freeing the unlinked prefix is the caller's duty. 747 */ 748 } 749 750 /* alloc and bzero new entry. May not fail. */ 751 static struct prefix * 752 prefix_alloc(void) 753 { 754 struct prefix *p; 755 756 p = calloc(1, sizeof(*p)); 757 if (p == NULL) 758 fatal("prefix_alloc"); 759 rdemem.prefix_cnt++; 760 return p; 761 } 762 763 /* free a unlinked entry */ 764 static void 765 prefix_free(struct prefix *pref) 766 { 767 rdemem.prefix_cnt--; 768 free(pref); 769 } 770 771 /* 772 * nexthop functions 773 */ 774 struct nexthop_head *nexthop_hash(struct bgpd_addr *); 775 struct nexthop *nexthop_lookup(struct bgpd_addr *); 776 777 /* 778 * In BGP there exist two nexthops: the exit nexthop which was announced via 779 * BGP and the true nexthop which is used in the FIB -- forward information 780 * base a.k.a kernel routing table. When sending updates it is even more 781 * confusing. In IBGP we pass the unmodified exit nexthop to the neighbors 782 * while in EBGP normally the address of the router is sent. The exit nexthop 783 * may be passed to the external neighbor if the neighbor and the exit nexthop 784 * reside in the same subnet -- directly connected. 785 */ 786 struct nexthop_table { 787 LIST_HEAD(nexthop_head, nexthop) *nexthop_hashtbl; 788 u_int32_t nexthop_hashmask; 789 } nexthoptable; 790 791 void 792 nexthop_init(u_int32_t hashsize) 793 { 794 u_int32_t hs, i; 795 796 for (hs = 1; hs < hashsize; hs <<= 1) 797 ; 798 nexthoptable.nexthop_hashtbl = calloc(hs, sizeof(struct nexthop_table)); 799 if (nexthoptable.nexthop_hashtbl == NULL) 800 fatal("nextop_init"); 801 802 for (i = 0; i < hs; i++) 803 LIST_INIT(&nexthoptable.nexthop_hashtbl[i]); 804 805 nexthoptable.nexthop_hashmask = hs - 1; 806 } 807 808 void 809 nexthop_shutdown(void) 810 { 811 u_int32_t i; 812 struct nexthop *nh, *nnh; 813 814 for (i = 0; i <= nexthoptable.nexthop_hashmask; i++) { 815 for (nh = LIST_FIRST(&nexthoptable.nexthop_hashtbl[i]); 816 nh != NULL; nh = nnh) { 817 nnh = LIST_NEXT(nh, nexthop_l); 818 nh->state = NEXTHOP_UNREACH; 819 (void)nexthop_delete(nh); 820 } 821 if (!LIST_EMPTY(&nexthoptable.nexthop_hashtbl[i])) 822 log_warnx("nexthop_shutdown: non-free table"); 823 } 824 825 free(nexthoptable.nexthop_hashtbl); 826 } 827 828 void 829 nexthop_update(struct kroute_nexthop *msg) 830 { 831 struct nexthop *nh; 832 struct rde_aspath *asp; 833 enum nexthop_state oldstate; 834 835 nh = nexthop_lookup(&msg->nexthop); 836 if (nh == NULL) { 837 log_warnx("nexthop_update: non-existent nexthop %s", 838 log_addr(&msg->nexthop)); 839 return; 840 } 841 842 if (nexthop_delete(nh)) 843 /* nexthop no longer used */ 844 return; 845 846 oldstate = nh->state; 847 if (msg->valid) 848 nh->state = NEXTHOP_REACH; 849 else 850 nh->state = NEXTHOP_UNREACH; 851 852 if (msg->connected) { 853 nh->flags |= NEXTHOP_CONNECTED; 854 memcpy(&nh->true_nexthop, &nh->exit_nexthop, 855 sizeof(nh->true_nexthop)); 856 } else 857 memcpy(&nh->true_nexthop, &msg->gateway, 858 sizeof(nh->true_nexthop)); 859 860 switch (msg->nexthop.af) { 861 case AF_INET: 862 nh->nexthop_netlen = msg->kr.kr4.prefixlen; 863 nh->nexthop_net.af = AF_INET; 864 nh->nexthop_net.v4.s_addr = msg->kr.kr4.prefix.s_addr; 865 break; 866 case AF_INET6: 867 nh->nexthop_netlen = msg->kr.kr6.prefixlen; 868 nh->nexthop_net.af = AF_INET6; 869 memcpy(&nh->nexthop_net.v6, &msg->kr.kr6.prefix, 870 sizeof(struct in6_addr)); 871 break; 872 default: 873 fatalx("nexthop_update: unknown af"); 874 } 875 876 if (rde_noevaluate()) 877 /* 878 * if the decision process is turned off there is no need 879 * for the aspath list walk. 880 */ 881 return; 882 883 LIST_FOREACH(asp, &nh->path_h, nexthop_l) { 884 prefix_updateall(asp, nh->state, oldstate); 885 } 886 } 887 888 void 889 nexthop_modify(struct rde_aspath *asp, struct bgpd_addr *nexthop, 890 enum action_types type, sa_family_t af) 891 { 892 struct nexthop *nh; 893 894 if (type == ACTION_SET_NEXTHOP_REJECT) { 895 asp->flags |= F_NEXTHOP_REJECT; 896 return; 897 } 898 if (type == ACTION_SET_NEXTHOP_BLACKHOLE) { 899 asp->flags |= F_NEXTHOP_BLACKHOLE; 900 return; 901 } 902 if (type == ACTION_SET_NEXTHOP_NOMODIFY) { 903 asp->flags |= F_NEXTHOP_NOMODIFY; 904 return; 905 } 906 if (type == ACTION_SET_NEXTHOP_SELF) { 907 asp->flags |= F_NEXTHOP_SELF; 908 return; 909 } 910 if (af != nexthop->af) 911 return; 912 913 nh = nexthop_get(nexthop); 914 if (asp->flags & F_ATTR_LINKED) 915 nexthop_unlink(asp); 916 asp->nexthop = nh; 917 if (asp->flags & F_ATTR_LINKED) 918 nexthop_link(asp); 919 } 920 921 void 922 nexthop_link(struct rde_aspath *asp) 923 { 924 if (asp->nexthop == NULL) 925 return; 926 927 LIST_INSERT_HEAD(&asp->nexthop->path_h, asp, nexthop_l); 928 } 929 930 void 931 nexthop_unlink(struct rde_aspath *asp) 932 { 933 struct nexthop *nh; 934 935 if (asp->nexthop == NULL) 936 return; 937 938 LIST_REMOVE(asp, nexthop_l); 939 940 /* see if list is empty */ 941 nh = asp->nexthop; 942 asp->nexthop = NULL; 943 944 (void)nexthop_delete(nh); 945 } 946 947 int 948 nexthop_delete(struct nexthop *nh) 949 { 950 /* nexthop still used by some other aspath */ 951 if (!LIST_EMPTY(&nh->path_h)) 952 return (0); 953 954 /* either pinned or in a state where it may not be deleted */ 955 if (nh->refcnt > 0 || nh->state == NEXTHOP_LOOKUP) 956 return (0); 957 958 LIST_REMOVE(nh, nexthop_l); 959 rde_send_nexthop(&nh->exit_nexthop, 0); 960 961 rdemem.nexthop_cnt--; 962 free(nh); 963 return (1); 964 } 965 966 struct nexthop * 967 nexthop_get(struct bgpd_addr *nexthop) 968 { 969 struct nexthop *nh; 970 971 nh = nexthop_lookup(nexthop); 972 if (nh == NULL) { 973 nh = calloc(1, sizeof(*nh)); 974 if (nh == NULL) 975 fatal("nexthop_alloc"); 976 rdemem.nexthop_cnt++; 977 978 LIST_INIT(&nh->path_h); 979 nh->state = NEXTHOP_LOOKUP; 980 nh->exit_nexthop = *nexthop; 981 LIST_INSERT_HEAD(nexthop_hash(nexthop), nh, 982 nexthop_l); 983 984 rde_send_nexthop(&nh->exit_nexthop, 1); 985 } 986 987 return (nh); 988 } 989 990 int 991 nexthop_compare(struct nexthop *na, struct nexthop *nb) 992 { 993 struct bgpd_addr *a, *b; 994 995 if (na == nb) 996 return (0); 997 if (na == NULL) 998 return (-1); 999 if (nb == NULL) 1000 return (1); 1001 1002 a = &na->exit_nexthop; 1003 b = &nb->exit_nexthop; 1004 1005 if (a->af != b->af) 1006 return (a->af - b->af); 1007 1008 switch (a->af) { 1009 case AF_INET: 1010 if (ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr)) 1011 return (1); 1012 if (ntohl(a->v4.s_addr) < ntohl(b->v4.s_addr)) 1013 return (-1); 1014 return (0); 1015 case AF_INET6: 1016 return (memcmp(&a->v6, &b->v6, sizeof(struct in6_addr))); 1017 default: 1018 fatalx("nexthop_cmp: unknown af"); 1019 } 1020 return (-1); 1021 } 1022 1023 struct nexthop * 1024 nexthop_lookup(struct bgpd_addr *nexthop) 1025 { 1026 struct nexthop *nh; 1027 1028 LIST_FOREACH(nh, nexthop_hash(nexthop), nexthop_l) { 1029 if (memcmp(&nh->exit_nexthop, nexthop, 1030 sizeof(struct bgpd_addr)) == 0) 1031 return (nh); 1032 } 1033 return (NULL); 1034 } 1035 1036 struct nexthop_head * 1037 nexthop_hash(struct bgpd_addr *nexthop) 1038 { 1039 u_int32_t h = 0; 1040 1041 switch (nexthop->af) { 1042 case AF_INET: 1043 h = (AF_INET ^ ntohl(nexthop->v4.s_addr) ^ 1044 ntohl(nexthop->v4.s_addr) >> 13) & 1045 nexthoptable.nexthop_hashmask; 1046 break; 1047 case AF_INET6: 1048 h = hash32_buf(nexthop->v6.s6_addr, sizeof(struct in6_addr), 1049 HASHINIT) & nexthoptable.nexthop_hashmask; 1050 break; 1051 default: 1052 fatalx("nexthop_hash: unsupported AF"); 1053 } 1054 return (&nexthoptable.nexthop_hashtbl[h]); 1055 } 1056 1057