1 /* $OpenBSD: rde_rib.c,v 1.138 2013/08/14 20:34:27 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/hash.h> 22 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include "bgpd.h" 27 #include "rde.h" 28 29 /* 30 * BGP RIB -- Routing Information Base 31 * 32 * The RIB is build with one aspect in mind. Speed -- actually update speed. 33 * Therefore one thing needs to be absolutely avoided, long table walks. 34 * This is achieved by heavily linking the different parts together. 35 */ 36 u_int16_t rib_size; 37 struct rib *ribs; 38 39 LIST_HEAD(, rib_context) rib_dump_h = LIST_HEAD_INITIALIZER(rib_dump_h); 40 41 struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int); 42 int rib_compare(const struct rib_entry *, const struct rib_entry *); 43 void rib_remove(struct rib_entry *); 44 int rib_empty(struct rib_entry *); 45 struct rib_entry *rib_restart(struct rib_context *); 46 47 RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare); 48 RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare); 49 50 51 /* RIB specific functions */ 52 u_int16_t 53 rib_new(char *name, u_int rtableid, u_int16_t flags) 54 { 55 struct rib *xribs; 56 size_t newsize; 57 u_int16_t id; 58 59 for (id = 0; id < rib_size; id++) { 60 if (*ribs[id].name == '\0') 61 break; 62 } 63 64 if (id == RIB_FAILED) 65 fatalx("rib_new: trying to use reserved id"); 66 67 if (id >= rib_size) { 68 newsize = sizeof(struct rib) * (id + 1); 69 if ((xribs = realloc(ribs, newsize)) == NULL) { 70 /* XXX this is not clever */ 71 fatal("rib_add"); 72 } 73 ribs = xribs; 74 rib_size = id + 1; 75 } 76 77 bzero(&ribs[id], sizeof(struct rib)); 78 strlcpy(ribs[id].name, name, sizeof(ribs[id].name)); 79 RB_INIT(&ribs[id].rib); 80 ribs[id].state = RECONF_REINIT; 81 ribs[id].id = id; 82 ribs[id].flags = flags; 83 ribs[id].rtableid = rtableid; 84 85 ribs[id].in_rules = calloc(1, sizeof(struct filter_head)); 86 if (ribs[id].in_rules == NULL) 87 fatal(NULL); 88 TAILQ_INIT(ribs[id].in_rules); 89 90 return (id); 91 } 92 93 u_int16_t 94 rib_find(char *name) 95 { 96 u_int16_t id; 97 98 if (name == NULL || *name == '\0') 99 return (1); /* no name returns the Loc-RIB */ 100 101 for (id = 0; id < rib_size; id++) { 102 if (!strcmp(ribs[id].name, name)) 103 return (id); 104 } 105 106 return (RIB_FAILED); 107 } 108 109 void 110 rib_free(struct rib *rib) 111 { 112 struct rib_context *ctx, *next; 113 struct rib_entry *re, *xre; 114 struct prefix *p, *np; 115 116 /* abort pending rib_dumps */ 117 for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) { 118 next = LIST_NEXT(ctx, entry); 119 if (ctx->ctx_rib == rib) { 120 re = ctx->ctx_re; 121 re->flags &= ~F_RIB_ENTRYLOCK; 122 LIST_REMOVE(ctx, entry); 123 if (ctx->ctx_done) 124 ctx->ctx_done(ctx->ctx_arg); 125 else 126 free(ctx); 127 } 128 } 129 130 for (re = RB_MIN(rib_tree, &rib->rib); re != NULL; re = xre) { 131 xre = RB_NEXT(rib_tree, &rib->rib, re); 132 133 /* 134 * Removing the prefixes is tricky because the last one 135 * will remove the rib_entry as well and because we do 136 * an empty check in prefix_destroy() it is not possible to 137 * use the default for loop. 138 */ 139 while ((p = LIST_FIRST(&re->prefix_h))) { 140 np = LIST_NEXT(p, rib_l); 141 if (p->aspath->pftableid) { 142 struct bgpd_addr addr; 143 144 pt_getaddr(p->prefix, &addr); 145 /* Commit is done in peer_down() */ 146 rde_send_pftable(p->aspath->pftableid, &addr, 147 p->prefix->prefixlen, 1); 148 } 149 prefix_destroy(p); 150 if (np == NULL) 151 break; 152 } 153 } 154 rde_free_filter(rib->in_rules_tmp); 155 rde_free_filter(rib->in_rules); 156 bzero(rib, sizeof(struct rib)); 157 } 158 159 int 160 rib_compare(const struct rib_entry *a, const struct rib_entry *b) 161 { 162 return (pt_prefix_cmp(a->prefix, b->prefix)); 163 } 164 165 struct rib_entry * 166 rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) 167 { 168 struct rib_entry xre; 169 struct pt_entry *pte; 170 171 pte = pt_fill(prefix, prefixlen); 172 bzero(&xre, sizeof(xre)); 173 xre.prefix = pte; 174 175 return (RB_FIND(rib_tree, &rib->rib, &xre)); 176 } 177 178 struct rib_entry * 179 rib_lookup(struct rib *rib, struct bgpd_addr *addr) 180 { 181 struct rib_entry *re; 182 int i; 183 184 switch (addr->aid) { 185 case AID_INET: 186 case AID_VPN_IPv4: 187 for (i = 32; i >= 0; i--) { 188 re = rib_get(rib, addr, i); 189 if (re != NULL) 190 return (re); 191 } 192 break; 193 case AID_INET6: 194 for (i = 128; i >= 0; i--) { 195 re = rib_get(rib, addr, i); 196 if (re != NULL) 197 return (re); 198 } 199 break; 200 default: 201 fatalx("rib_lookup: unknown af"); 202 } 203 return (NULL); 204 } 205 206 207 struct rib_entry * 208 rib_add(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) 209 { 210 struct pt_entry *pte; 211 struct rib_entry *re; 212 213 pte = pt_get(prefix, prefixlen); 214 if (pte == NULL) 215 pte = pt_add(prefix, prefixlen); 216 217 if ((re = calloc(1, sizeof(*re))) == NULL) 218 fatal("rib_add"); 219 220 LIST_INIT(&re->prefix_h); 221 re->prefix = pte; 222 re->flags = rib->flags; 223 re->ribid = rib->id; 224 225 if (RB_INSERT(rib_tree, &rib->rib, re) != NULL) { 226 log_warnx("rib_add: insert failed"); 227 free(re); 228 return (NULL); 229 } 230 231 pt_ref(pte); 232 233 rdemem.rib_cnt++; 234 235 return (re); 236 } 237 238 void 239 rib_remove(struct rib_entry *re) 240 { 241 if (!rib_empty(re)) 242 fatalx("rib_remove: entry not empty"); 243 244 if (re->flags & F_RIB_ENTRYLOCK) 245 /* entry is locked, don't free it. */ 246 return; 247 248 pt_unref(re->prefix); 249 if (pt_empty(re->prefix)) 250 pt_remove(re->prefix); 251 252 if (RB_REMOVE(rib_tree, &ribs[re->ribid].rib, re) == NULL) 253 log_warnx("rib_remove: remove failed."); 254 255 free(re); 256 rdemem.rib_cnt--; 257 } 258 259 int 260 rib_empty(struct rib_entry *re) 261 { 262 return LIST_EMPTY(&re->prefix_h); 263 } 264 265 void 266 rib_dump(struct rib *rib, void (*upcall)(struct rib_entry *, void *), 267 void *arg, u_int8_t aid) 268 { 269 struct rib_context *ctx; 270 271 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) 272 fatal("rib_dump"); 273 ctx->ctx_rib = rib; 274 ctx->ctx_upcall = upcall; 275 ctx->ctx_arg = arg; 276 ctx->ctx_aid = aid; 277 rib_dump_r(ctx); 278 } 279 280 void 281 rib_dump_r(struct rib_context *ctx) 282 { 283 struct rib_entry *re; 284 unsigned int i; 285 286 if (ctx->ctx_re == NULL) { 287 re = RB_MIN(rib_tree, &ctx->ctx_rib->rib); 288 LIST_INSERT_HEAD(&rib_dump_h, ctx, entry); 289 } else 290 re = rib_restart(ctx); 291 292 for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) { 293 if (ctx->ctx_aid != AID_UNSPEC && 294 ctx->ctx_aid != re->prefix->aid) 295 continue; 296 if (ctx->ctx_count && i++ >= ctx->ctx_count && 297 (re->flags & F_RIB_ENTRYLOCK) == 0) { 298 /* store and lock last element */ 299 ctx->ctx_re = re; 300 re->flags |= F_RIB_ENTRYLOCK; 301 return; 302 } 303 ctx->ctx_upcall(re, ctx->ctx_arg); 304 } 305 306 LIST_REMOVE(ctx, entry); 307 if (ctx->ctx_done) 308 ctx->ctx_done(ctx->ctx_arg); 309 else 310 free(ctx); 311 } 312 313 struct rib_entry * 314 rib_restart(struct rib_context *ctx) 315 { 316 struct rib_entry *re; 317 318 re = ctx->ctx_re; 319 re->flags &= ~F_RIB_ENTRYLOCK; 320 321 /* find first non empty element */ 322 while (re && rib_empty(re)) 323 re = RB_NEXT(rib_tree, unused, re); 324 325 /* free the previously locked rib element if empty */ 326 if (rib_empty(ctx->ctx_re)) 327 rib_remove(ctx->ctx_re); 328 ctx->ctx_re = NULL; 329 return (re); 330 } 331 332 void 333 rib_dump_runner(void) 334 { 335 struct rib_context *ctx, *next; 336 337 for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) { 338 next = LIST_NEXT(ctx, entry); 339 rib_dump_r(ctx); 340 } 341 } 342 343 int 344 rib_dump_pending(void) 345 { 346 return (!LIST_EMPTY(&rib_dump_h)); 347 } 348 349 /* used to bump correct prefix counters */ 350 #define PREFIX_COUNT(x, op) \ 351 do { \ 352 (x)->prefix_cnt += (op); \ 353 } while (0) 354 355 /* path specific functions */ 356 357 static void path_link(struct rde_aspath *, struct rde_peer *); 358 359 struct path_table pathtable; 360 361 /* XXX the hash should also include communities and the other attrs */ 362 #define PATH_HASH(x) \ 363 &pathtable.path_hashtbl[hash32_buf((x)->data, (x)->len, HASHINIT) & \ 364 pathtable.path_hashmask] 365 366 void 367 path_init(u_int32_t hashsize) 368 { 369 u_int32_t hs, i; 370 371 for (hs = 1; hs < hashsize; hs <<= 1) 372 ; 373 pathtable.path_hashtbl = calloc(hs, sizeof(struct aspath_head)); 374 if (pathtable.path_hashtbl == NULL) 375 fatal("path_init"); 376 377 for (i = 0; i < hs; i++) 378 LIST_INIT(&pathtable.path_hashtbl[i]); 379 380 pathtable.path_hashmask = hs - 1; 381 } 382 383 void 384 path_shutdown(void) 385 { 386 u_int32_t i; 387 388 for (i = 0; i <= pathtable.path_hashmask; i++) 389 if (!LIST_EMPTY(&pathtable.path_hashtbl[i])) 390 log_warnx("path_free: free non-free table"); 391 392 free(pathtable.path_hashtbl); 393 } 394 395 int 396 path_update(struct rib *rib, struct rde_peer *peer, struct rde_aspath *nasp, 397 struct bgpd_addr *prefix, int prefixlen) 398 { 399 struct rde_aspath *asp; 400 struct prefix *p; 401 402 if (nasp->pftableid) { 403 rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0); 404 rde_send_pftable_commit(); 405 } 406 407 /* 408 * First try to find a prefix in the specified RIB. 409 */ 410 if ((p = prefix_get(rib, peer, prefix, prefixlen, 0)) != NULL) { 411 if (path_compare(nasp, p->aspath) == 0) { 412 /* no change, update last change */ 413 p->lastchange = time(NULL); 414 return (0); 415 } 416 } 417 418 /* 419 * Either the prefix does not exist or the path changed. 420 * In both cases lookup the new aspath to make sure it is not 421 * already in the RIB. 422 */ 423 if ((asp = path_lookup(nasp, peer)) == NULL) { 424 /* Path not available, create and link a new one. */ 425 asp = path_copy(nasp); 426 path_link(asp, peer); 427 } 428 429 /* If the prefix was found move it else add it to the aspath. */ 430 if (p != NULL) 431 prefix_move(asp, p); 432 else 433 return (prefix_add(rib, asp, prefix, prefixlen)); 434 return (0); 435 } 436 437 int 438 path_compare(struct rde_aspath *a, struct rde_aspath *b) 439 { 440 int r; 441 442 if (a->origin > b->origin) 443 return (1); 444 if (a->origin < b->origin) 445 return (-1); 446 if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED)) 447 return (1); 448 if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED)) 449 return (-1); 450 if (a->med > b->med) 451 return (1); 452 if (a->med < b->med) 453 return (-1); 454 if (a->lpref > b->lpref) 455 return (1); 456 if (a->lpref < b->lpref) 457 return (-1); 458 if (a->weight > b->weight) 459 return (1); 460 if (a->weight < b->weight) 461 return (-1); 462 if (a->rtlabelid > b->rtlabelid) 463 return (1); 464 if (a->rtlabelid < b->rtlabelid) 465 return (-1); 466 if (a->pftableid > b->pftableid) 467 return (1); 468 if (a->pftableid < b->pftableid) 469 return (-1); 470 471 r = aspath_compare(a->aspath, b->aspath); 472 if (r == 0) 473 r = nexthop_compare(a->nexthop, b->nexthop); 474 if (r > 0) 475 return (1); 476 if (r < 0) 477 return (-1); 478 479 return (attr_compare(a, b)); 480 } 481 482 struct rde_aspath * 483 path_lookup(struct rde_aspath *aspath, struct rde_peer *peer) 484 { 485 struct aspath_head *head; 486 struct rde_aspath *asp; 487 488 head = PATH_HASH(aspath->aspath); 489 490 LIST_FOREACH(asp, head, path_l) { 491 if (peer == asp->peer && path_compare(aspath, asp) == 0) 492 return (asp); 493 } 494 return (NULL); 495 } 496 497 void 498 path_remove(struct rde_aspath *asp) 499 { 500 struct prefix *p, *np; 501 502 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) { 503 np = LIST_NEXT(p, path_l); 504 if (asp->pftableid) { 505 struct bgpd_addr addr; 506 507 pt_getaddr(p->prefix, &addr); 508 /* Commit is done in peer_down() */ 509 rde_send_pftable(p->aspath->pftableid, &addr, 510 p->prefix->prefixlen, 1); 511 } 512 prefix_destroy(p); 513 } 514 } 515 516 /* remove all stale routes or if staletime is 0 remove all routes for 517 a specified AID. */ 518 u_int32_t 519 path_remove_stale(struct rde_aspath *asp, u_int8_t aid) 520 { 521 struct prefix *p, *np; 522 time_t staletime; 523 u_int32_t rprefixes; 524 525 rprefixes=0; 526 staletime = asp->peer->staletime[aid]; 527 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) { 528 np = LIST_NEXT(p, path_l); 529 if (p->prefix->aid != aid) 530 continue; 531 532 if (staletime && p->lastchange > staletime) 533 continue; 534 535 if (asp->pftableid) { 536 struct bgpd_addr addr; 537 538 pt_getaddr(p->prefix, &addr); 539 /* Commit is done in peer_flush() */ 540 rde_send_pftable(p->aspath->pftableid, &addr, 541 p->prefix->prefixlen, 1); 542 } 543 544 /* only count Adj-RIB-In */ 545 if (p->rib->ribid == 0) 546 rprefixes++; 547 548 prefix_destroy(p); 549 } 550 return (rprefixes); 551 } 552 553 554 /* this function is only called by prefix_remove and path_remove */ 555 void 556 path_destroy(struct rde_aspath *asp) 557 { 558 /* path_destroy can only unlink and free empty rde_aspath */ 559 if (asp->prefix_cnt != 0 || asp->active_cnt != 0) 560 log_warnx("path_destroy: prefix count out of sync"); 561 562 nexthop_unlink(asp); 563 LIST_REMOVE(asp, path_l); 564 LIST_REMOVE(asp, peer_l); 565 asp->peer = NULL; 566 asp->nexthop = NULL; 567 asp->flags &= ~F_ATTR_LINKED; 568 569 path_put(asp); 570 } 571 572 int 573 path_empty(struct rde_aspath *asp) 574 { 575 return LIST_EMPTY(&asp->prefix_h); 576 } 577 578 /* 579 * the path object is linked into multiple lists for fast access. 580 * These are peer_l, path_l and nexthop_l. 581 * peer_l: list of all aspaths that belong to that peer 582 * path_l: hash list to find paths quickly 583 * nexthop_l: list of all aspaths with an equal exit nexthop 584 */ 585 static void 586 path_link(struct rde_aspath *asp, struct rde_peer *peer) 587 { 588 struct aspath_head *head; 589 590 head = PATH_HASH(asp->aspath); 591 592 LIST_INSERT_HEAD(head, asp, path_l); 593 LIST_INSERT_HEAD(&peer->path_h, asp, peer_l); 594 asp->peer = peer; 595 nexthop_link(asp); 596 asp->flags |= F_ATTR_LINKED; 597 } 598 599 /* 600 * copy asp to a new UNLINKED one mainly for filtering 601 */ 602 struct rde_aspath * 603 path_copy(struct rde_aspath *asp) 604 { 605 struct rde_aspath *nasp; 606 607 nasp = path_get(); 608 nasp->aspath = asp->aspath; 609 if (nasp->aspath != NULL) { 610 nasp->aspath->refcnt++; 611 rdemem.aspath_refs++; 612 } 613 nasp->nexthop = asp->nexthop; 614 nasp->med = asp->med; 615 nasp->lpref = asp->lpref; 616 nasp->weight = asp->weight; 617 nasp->origin = asp->origin; 618 nasp->rtlabelid = asp->rtlabelid; 619 rtlabel_ref(nasp->rtlabelid); 620 nasp->pftableid = asp->pftableid; 621 pftable_ref(nasp->pftableid); 622 623 nasp->flags = asp->flags & ~F_ATTR_LINKED; 624 attr_copy(nasp, asp); 625 626 return (nasp); 627 } 628 629 /* alloc and initialize new entry. May not fail. */ 630 struct rde_aspath * 631 path_get(void) 632 { 633 struct rde_aspath *asp; 634 635 asp = calloc(1, sizeof(*asp)); 636 if (asp == NULL) 637 fatal("path_alloc"); 638 rdemem.path_cnt++; 639 640 LIST_INIT(&asp->prefix_h); 641 asp->origin = ORIGIN_INCOMPLETE; 642 asp->lpref = DEFAULT_LPREF; 643 /* med = 0 */ 644 /* weight = 0 */ 645 /* rtlabel = 0 */ 646 647 return (asp); 648 } 649 650 /* free an unlinked element */ 651 void 652 path_put(struct rde_aspath *asp) 653 { 654 if (asp == NULL) 655 return; 656 657 if (asp->flags & F_ATTR_LINKED) 658 fatalx("path_put: linked object"); 659 660 rtlabel_unref(asp->rtlabelid); 661 pftable_unref(asp->pftableid); 662 aspath_put(asp->aspath); 663 attr_freeall(asp); 664 rdemem.path_cnt--; 665 free(asp); 666 } 667 668 /* prefix specific functions */ 669 670 static struct prefix *prefix_alloc(void); 671 static void prefix_free(struct prefix *); 672 static void prefix_link(struct prefix *, struct rib_entry *, 673 struct rde_aspath *); 674 static void prefix_unlink(struct prefix *); 675 676 /* 677 * search for specified prefix of a peer. Returns NULL if not found. 678 */ 679 struct prefix * 680 prefix_get(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, 681 int prefixlen, u_int32_t flags) 682 { 683 struct rib_entry *re; 684 685 re = rib_get(rib, prefix, prefixlen); 686 if (re == NULL) 687 return (NULL); 688 return (prefix_bypeer(re, peer, flags)); 689 } 690 691 /* 692 * Adds or updates a prefix. 693 */ 694 int 695 prefix_add(struct rib *rib, struct rde_aspath *asp, struct bgpd_addr *prefix, 696 int prefixlen) 697 698 { 699 struct prefix *p; 700 struct rib_entry *re; 701 702 re = rib_get(rib, prefix, prefixlen); 703 if (re == NULL) 704 re = rib_add(rib, prefix, prefixlen); 705 706 p = prefix_bypeer(re, asp->peer, asp->flags); 707 if (p == NULL) { 708 p = prefix_alloc(); 709 prefix_link(p, re, asp); 710 return (1); 711 } else { 712 if (p->aspath != asp) { 713 /* prefix belongs to a different aspath so move */ 714 prefix_move(asp, p); 715 } else 716 p->lastchange = time(NULL); 717 return (0); 718 } 719 } 720 721 /* 722 * Move the prefix to the specified as path, removes the old asp if needed. 723 */ 724 void 725 prefix_move(struct rde_aspath *asp, struct prefix *p) 726 { 727 struct prefix *np; 728 struct rde_aspath *oasp; 729 730 if (asp->peer != p->aspath->peer) 731 fatalx("prefix_move: cross peer move"); 732 733 /* create new prefix node */ 734 np = prefix_alloc(); 735 np->aspath = asp; 736 /* peer and prefix pointers are still equal */ 737 np->prefix = p->prefix; 738 np->rib = p->rib; 739 np->lastchange = time(NULL); 740 741 /* add to new as path */ 742 LIST_INSERT_HEAD(&asp->prefix_h, np, path_l); 743 PREFIX_COUNT(asp, 1); 744 /* 745 * no need to update the peer prefix count because we are only moving 746 * the prefix without changing the peer. 747 */ 748 749 /* 750 * First kick the old prefix node out of the prefix list, 751 * afterwards run the route decision for new prefix node. 752 * Because of this only one update is generated if the prefix 753 * was active. 754 * This is save because we create a new prefix and so the change 755 * is noticed by prefix_evaluate(). 756 */ 757 LIST_REMOVE(p, rib_l); 758 prefix_evaluate(np, np->rib); 759 760 /* remove old prefix node */ 761 oasp = p->aspath; 762 LIST_REMOVE(p, path_l); 763 PREFIX_COUNT(oasp, -1); 764 /* as before peer count needs no update because of move */ 765 766 /* destroy all references to other objects and free the old prefix */ 767 p->aspath = NULL; 768 p->prefix = NULL; 769 p->rib = NULL; 770 prefix_free(p); 771 772 /* destroy old path if empty */ 773 if (path_empty(oasp)) 774 path_destroy(oasp); 775 } 776 777 /* 778 * Removes a prefix from all lists. If the parent objects -- path or 779 * pt_entry -- become empty remove them too. 780 */ 781 int 782 prefix_remove(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, 783 int prefixlen, u_int32_t flags) 784 { 785 struct prefix *p; 786 struct rib_entry *re; 787 struct rde_aspath *asp; 788 789 re = rib_get(rib, prefix, prefixlen); 790 if (re == NULL) /* Got a dummy withdrawn request */ 791 return (0); 792 793 p = prefix_bypeer(re, peer, flags); 794 if (p == NULL) /* Got a dummy withdrawn request. */ 795 return (0); 796 797 asp = p->aspath; 798 799 if (asp->pftableid) { 800 /* only prefixes in the local RIB were pushed into pf */ 801 rde_send_pftable(asp->pftableid, prefix, prefixlen, 1); 802 rde_send_pftable_commit(); 803 } 804 805 prefix_destroy(p); 806 807 return (1); 808 } 809 810 /* dump a prefix into specified buffer */ 811 int 812 prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen) 813 { 814 int totlen; 815 816 switch (prefix->aid) { 817 case AID_INET: 818 case AID_INET6: 819 totlen = PREFIX_SIZE(plen); 820 821 if (totlen > len) 822 return (-1); 823 *buf++ = plen; 824 memcpy(buf, &prefix->ba, totlen - 1); 825 return (totlen); 826 case AID_VPN_IPv4: 827 totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) + 828 prefix->vpn4.labellen; 829 plen += (sizeof(prefix->vpn4.rd) + prefix->vpn4.labellen) * 8; 830 831 if (totlen > len) 832 return (-1); 833 *buf++ = plen; 834 memcpy(buf, &prefix->vpn4.labelstack, prefix->vpn4.labellen); 835 buf += prefix->vpn4.labellen; 836 memcpy(buf, &prefix->vpn4.rd, sizeof(prefix->vpn4.rd)); 837 buf += sizeof(prefix->vpn4.rd); 838 memcpy(buf, &prefix->vpn4.addr, PREFIX_SIZE(plen) - 1); 839 return (totlen); 840 default: 841 return (-1); 842 } 843 } 844 845 int 846 prefix_writebuf(struct ibuf *buf, struct bgpd_addr *prefix, u_int8_t plen) 847 { 848 int totlen; 849 void *bptr; 850 851 switch (prefix->aid) { 852 case AID_INET: 853 case AID_INET6: 854 totlen = PREFIX_SIZE(plen); 855 break; 856 case AID_VPN_IPv4: 857 totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) + 858 prefix->vpn4.labellen; 859 break; 860 default: 861 return (-1); 862 } 863 864 if ((bptr = ibuf_reserve(buf, totlen)) == NULL) 865 return (-1); 866 if (prefix_write(bptr, totlen, prefix, plen) == -1) 867 return (-1); 868 return (0); 869 } 870 871 /* 872 * Searches in the prefix list of specified pt_entry for a prefix entry 873 * belonging to the peer peer. Returns NULL if no match found. 874 */ 875 struct prefix * 876 prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, u_int32_t flags) 877 { 878 struct prefix *p; 879 880 LIST_FOREACH(p, &re->prefix_h, rib_l) { 881 if (p->aspath->peer != peer) 882 continue; 883 if (p->aspath->flags & flags && 884 (flags & F_ANN_DYNAMIC) != 885 (p->aspath->flags & F_ANN_DYNAMIC)) 886 continue; 887 return (p); 888 } 889 return (NULL); 890 } 891 892 void 893 prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, 894 enum nexthop_state oldstate) 895 { 896 struct prefix *p; 897 898 LIST_FOREACH(p, &asp->prefix_h, path_l) { 899 /* 900 * skip non local-RIBs or RIBs that are flagged as noeval. 901 */ 902 if (p->rib->flags & F_RIB_NOEVALUATE) 903 continue; 904 905 if (oldstate == state && state == NEXTHOP_REACH) { 906 /* 907 * The state of the nexthop did not change. The only 908 * thing that may have changed is the true_nexthop 909 * or other internal infos. This will not change 910 * the routing decision so shortcut here. 911 */ 912 if ((p->rib->flags & F_RIB_NOFIB) == 0 && 913 p == p->rib->active) 914 rde_send_kroute(p, NULL, p->rib->ribid); 915 continue; 916 } 917 918 /* redo the route decision */ 919 LIST_REMOVE(p, rib_l); 920 /* 921 * If the prefix is the active one remove it first, 922 * this has to be done because we can not detect when 923 * the active prefix changes its state. In this case 924 * we know that this is a withdrawal and so the second 925 * prefix_evaluate() will generate no update because 926 * the nexthop is unreachable or ineligible. 927 */ 928 if (p == p->rib->active) 929 prefix_evaluate(NULL, p->rib); 930 prefix_evaluate(p, p->rib); 931 } 932 } 933 934 /* kill a prefix. */ 935 void 936 prefix_destroy(struct prefix *p) 937 { 938 struct rde_aspath *asp; 939 940 asp = p->aspath; 941 prefix_unlink(p); 942 prefix_free(p); 943 944 if (path_empty(asp)) 945 path_destroy(asp); 946 } 947 948 /* 949 * helper function to clean up the connected networks after a reload 950 */ 951 void 952 prefix_network_clean(struct rde_peer *peer, time_t reloadtime, u_int32_t flags) 953 { 954 struct rde_aspath *asp, *xasp; 955 struct prefix *p, *xp; 956 957 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = xasp) { 958 xasp = LIST_NEXT(asp, peer_l); 959 if ((asp->flags & F_ANN_DYNAMIC) != flags) 960 continue; 961 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = xp) { 962 xp = LIST_NEXT(p, path_l); 963 if (reloadtime > p->lastchange) { 964 prefix_unlink(p); 965 prefix_free(p); 966 } 967 } 968 if (path_empty(asp)) 969 path_destroy(asp); 970 } 971 } 972 973 /* 974 * Link a prefix into the different parent objects. 975 */ 976 static void 977 prefix_link(struct prefix *pref, struct rib_entry *re, struct rde_aspath *asp) 978 { 979 LIST_INSERT_HEAD(&asp->prefix_h, pref, path_l); 980 PREFIX_COUNT(asp, 1); 981 982 pref->aspath = asp; 983 pref->rib = re; 984 pref->prefix = re->prefix; 985 pt_ref(pref->prefix); 986 pref->lastchange = time(NULL); 987 988 /* make route decision */ 989 prefix_evaluate(pref, re); 990 } 991 992 /* 993 * Unlink a prefix from the different parent objects. 994 */ 995 static void 996 prefix_unlink(struct prefix *pref) 997 { 998 struct rib_entry *re = pref->rib; 999 1000 /* make route decision */ 1001 LIST_REMOVE(pref, rib_l); 1002 prefix_evaluate(NULL, re); 1003 1004 LIST_REMOVE(pref, path_l); 1005 PREFIX_COUNT(pref->aspath, -1); 1006 1007 pt_unref(pref->prefix); 1008 if (pt_empty(pref->prefix)) 1009 pt_remove(pref->prefix); 1010 if (rib_empty(re)) 1011 rib_remove(re); 1012 1013 /* destroy all references to other objects */ 1014 pref->aspath = NULL; 1015 pref->prefix = NULL; 1016 pref->rib = NULL; 1017 1018 /* 1019 * It's the caller's duty to remove empty aspath structures. 1020 * Also freeing the unlinked prefix is the caller's duty. 1021 */ 1022 } 1023 1024 /* alloc and bzero new entry. May not fail. */ 1025 static struct prefix * 1026 prefix_alloc(void) 1027 { 1028 struct prefix *p; 1029 1030 p = calloc(1, sizeof(*p)); 1031 if (p == NULL) 1032 fatal("prefix_alloc"); 1033 rdemem.prefix_cnt++; 1034 return p; 1035 } 1036 1037 /* free a unlinked entry */ 1038 static void 1039 prefix_free(struct prefix *pref) 1040 { 1041 rdemem.prefix_cnt--; 1042 free(pref); 1043 } 1044 1045 /* 1046 * nexthop functions 1047 */ 1048 struct nexthop_head *nexthop_hash(struct bgpd_addr *); 1049 struct nexthop *nexthop_lookup(struct bgpd_addr *); 1050 1051 /* 1052 * In BGP there exist two nexthops: the exit nexthop which was announced via 1053 * BGP and the true nexthop which is used in the FIB -- forward information 1054 * base a.k.a kernel routing table. When sending updates it is even more 1055 * confusing. In IBGP we pass the unmodified exit nexthop to the neighbors 1056 * while in EBGP normally the address of the router is sent. The exit nexthop 1057 * may be passed to the external neighbor if the neighbor and the exit nexthop 1058 * reside in the same subnet -- directly connected. 1059 */ 1060 struct nexthop_table { 1061 LIST_HEAD(nexthop_head, nexthop) *nexthop_hashtbl; 1062 u_int32_t nexthop_hashmask; 1063 } nexthoptable; 1064 1065 void 1066 nexthop_init(u_int32_t hashsize) 1067 { 1068 u_int32_t hs, i; 1069 1070 for (hs = 1; hs < hashsize; hs <<= 1) 1071 ; 1072 nexthoptable.nexthop_hashtbl = calloc(hs, sizeof(struct nexthop_head)); 1073 if (nexthoptable.nexthop_hashtbl == NULL) 1074 fatal("nextop_init"); 1075 1076 for (i = 0; i < hs; i++) 1077 LIST_INIT(&nexthoptable.nexthop_hashtbl[i]); 1078 1079 nexthoptable.nexthop_hashmask = hs - 1; 1080 } 1081 1082 void 1083 nexthop_shutdown(void) 1084 { 1085 u_int32_t i; 1086 struct nexthop *nh, *nnh; 1087 1088 for (i = 0; i <= nexthoptable.nexthop_hashmask; i++) { 1089 for (nh = LIST_FIRST(&nexthoptable.nexthop_hashtbl[i]); 1090 nh != NULL; nh = nnh) { 1091 nnh = LIST_NEXT(nh, nexthop_l); 1092 nh->state = NEXTHOP_UNREACH; 1093 (void)nexthop_delete(nh); 1094 } 1095 if (!LIST_EMPTY(&nexthoptable.nexthop_hashtbl[i])) 1096 log_warnx("nexthop_shutdown: non-free table"); 1097 } 1098 1099 free(nexthoptable.nexthop_hashtbl); 1100 } 1101 1102 void 1103 nexthop_update(struct kroute_nexthop *msg) 1104 { 1105 struct nexthop *nh; 1106 struct rde_aspath *asp; 1107 enum nexthop_state oldstate; 1108 1109 nh = nexthop_lookup(&msg->nexthop); 1110 if (nh == NULL) { 1111 log_warnx("nexthop_update: non-existent nexthop %s", 1112 log_addr(&msg->nexthop)); 1113 return; 1114 } 1115 1116 oldstate = nh->state; 1117 if (msg->valid) 1118 nh->state = NEXTHOP_REACH; 1119 else 1120 nh->state = NEXTHOP_UNREACH; 1121 1122 if (msg->connected) { 1123 nh->flags |= NEXTHOP_CONNECTED; 1124 memcpy(&nh->true_nexthop, &nh->exit_nexthop, 1125 sizeof(nh->true_nexthop)); 1126 } else 1127 memcpy(&nh->true_nexthop, &msg->gateway, 1128 sizeof(nh->true_nexthop)); 1129 1130 memcpy(&nh->nexthop_net, &msg->net, 1131 sizeof(nh->nexthop_net)); 1132 nh->nexthop_netlen = msg->netlen; 1133 1134 if (nexthop_delete(nh)) 1135 /* nexthop no longer used */ 1136 return; 1137 1138 if (rde_noevaluate()) 1139 /* 1140 * if the decision process is turned off there is no need 1141 * for the aspath list walk. 1142 */ 1143 return; 1144 1145 LIST_FOREACH(asp, &nh->path_h, nexthop_l) { 1146 prefix_updateall(asp, nh->state, oldstate); 1147 } 1148 } 1149 1150 void 1151 nexthop_modify(struct rde_aspath *asp, struct bgpd_addr *nexthop, 1152 enum action_types type, u_int8_t aid) 1153 { 1154 struct nexthop *nh; 1155 1156 if (type == ACTION_SET_NEXTHOP && aid != nexthop->aid) 1157 return; 1158 1159 asp->flags &= ~F_NEXTHOP_MASK; 1160 switch (type) { 1161 case ACTION_SET_NEXTHOP_REJECT: 1162 asp->flags |= F_NEXTHOP_REJECT; 1163 break; 1164 case ACTION_SET_NEXTHOP_BLACKHOLE: 1165 asp->flags |= F_NEXTHOP_BLACKHOLE; 1166 break; 1167 case ACTION_SET_NEXTHOP_NOMODIFY: 1168 asp->flags |= F_NEXTHOP_NOMODIFY; 1169 break; 1170 case ACTION_SET_NEXTHOP_SELF: 1171 asp->flags |= F_NEXTHOP_SELF; 1172 break; 1173 case ACTION_SET_NEXTHOP: 1174 nh = nexthop_get(nexthop); 1175 if (asp->flags & F_ATTR_LINKED) 1176 nexthop_unlink(asp); 1177 asp->nexthop = nh; 1178 if (asp->flags & F_ATTR_LINKED) 1179 nexthop_link(asp); 1180 break; 1181 default: 1182 break; 1183 } 1184 } 1185 1186 void 1187 nexthop_link(struct rde_aspath *asp) 1188 { 1189 if (asp->nexthop == NULL) 1190 return; 1191 1192 LIST_INSERT_HEAD(&asp->nexthop->path_h, asp, nexthop_l); 1193 } 1194 1195 void 1196 nexthop_unlink(struct rde_aspath *asp) 1197 { 1198 struct nexthop *nh; 1199 1200 if (asp->nexthop == NULL) 1201 return; 1202 1203 LIST_REMOVE(asp, nexthop_l); 1204 1205 /* see if list is empty */ 1206 nh = asp->nexthop; 1207 asp->nexthop = NULL; 1208 1209 (void)nexthop_delete(nh); 1210 } 1211 1212 int 1213 nexthop_delete(struct nexthop *nh) 1214 { 1215 /* nexthop still used by some other aspath */ 1216 if (!LIST_EMPTY(&nh->path_h)) 1217 return (0); 1218 1219 /* either pinned or in a state where it may not be deleted */ 1220 if (nh->refcnt > 0 || nh->state == NEXTHOP_LOOKUP) 1221 return (0); 1222 1223 LIST_REMOVE(nh, nexthop_l); 1224 rde_send_nexthop(&nh->exit_nexthop, 0); 1225 1226 rdemem.nexthop_cnt--; 1227 free(nh); 1228 return (1); 1229 } 1230 1231 struct nexthop * 1232 nexthop_get(struct bgpd_addr *nexthop) 1233 { 1234 struct nexthop *nh; 1235 1236 nh = nexthop_lookup(nexthop); 1237 if (nh == NULL) { 1238 nh = calloc(1, sizeof(*nh)); 1239 if (nh == NULL) 1240 fatal("nexthop_alloc"); 1241 rdemem.nexthop_cnt++; 1242 1243 LIST_INIT(&nh->path_h); 1244 nh->state = NEXTHOP_LOOKUP; 1245 nh->exit_nexthop = *nexthop; 1246 LIST_INSERT_HEAD(nexthop_hash(nexthop), nh, 1247 nexthop_l); 1248 1249 rde_send_nexthop(&nh->exit_nexthop, 1); 1250 } 1251 1252 return (nh); 1253 } 1254 1255 int 1256 nexthop_compare(struct nexthop *na, struct nexthop *nb) 1257 { 1258 struct bgpd_addr *a, *b; 1259 1260 if (na == nb) 1261 return (0); 1262 if (na == NULL) 1263 return (-1); 1264 if (nb == NULL) 1265 return (1); 1266 1267 a = &na->exit_nexthop; 1268 b = &nb->exit_nexthop; 1269 1270 if (a->aid != b->aid) 1271 return (a->aid - b->aid); 1272 1273 switch (a->aid) { 1274 case AID_INET: 1275 if (ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr)) 1276 return (1); 1277 if (ntohl(a->v4.s_addr) < ntohl(b->v4.s_addr)) 1278 return (-1); 1279 return (0); 1280 case AID_INET6: 1281 return (memcmp(&a->v6, &b->v6, sizeof(struct in6_addr))); 1282 default: 1283 fatalx("nexthop_cmp: unknown af"); 1284 } 1285 return (-1); 1286 } 1287 1288 struct nexthop * 1289 nexthop_lookup(struct bgpd_addr *nexthop) 1290 { 1291 struct nexthop *nh; 1292 1293 LIST_FOREACH(nh, nexthop_hash(nexthop), nexthop_l) { 1294 if (memcmp(&nh->exit_nexthop, nexthop, 1295 sizeof(struct bgpd_addr)) == 0) 1296 return (nh); 1297 } 1298 return (NULL); 1299 } 1300 1301 struct nexthop_head * 1302 nexthop_hash(struct bgpd_addr *nexthop) 1303 { 1304 u_int32_t h = 0; 1305 1306 switch (nexthop->aid) { 1307 case AID_INET: 1308 h = (AF_INET ^ ntohl(nexthop->v4.s_addr) ^ 1309 ntohl(nexthop->v4.s_addr) >> 13) & 1310 nexthoptable.nexthop_hashmask; 1311 break; 1312 case AID_INET6: 1313 h = hash32_buf(&nexthop->v6, sizeof(struct in6_addr), 1314 HASHINIT) & nexthoptable.nexthop_hashmask; 1315 break; 1316 default: 1317 fatalx("nexthop_hash: unsupported AF"); 1318 } 1319 return (&nexthoptable.nexthop_hashtbl[h]); 1320 } 1321 1322