1 /* $OpenBSD: rde_rib.c,v 1.133 2012/07/01 11:55:13 sthen Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/hash.h> 22 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include "bgpd.h" 27 #include "rde.h" 28 29 /* 30 * BGP RIB -- Routing Information Base 31 * 32 * The RIB is build with one aspect in mind. Speed -- actually update speed. 33 * Therefore one thing needs to be absolutely avoided, long table walks. 34 * This is achieved by heavily linking the different parts together. 35 */ 36 u_int16_t rib_size; 37 struct rib *ribs; 38 39 LIST_HEAD(, rib_context) rib_dump_h = LIST_HEAD_INITIALIZER(rib_dump_h); 40 41 struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int); 42 int rib_compare(const struct rib_entry *, const struct rib_entry *); 43 void rib_remove(struct rib_entry *); 44 int rib_empty(struct rib_entry *); 45 struct rib_entry *rib_restart(struct rib_context *); 46 47 RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare); 48 RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare); 49 50 51 /* RIB specific functions */ 52 u_int16_t 53 rib_new(char *name, u_int rtableid, u_int16_t flags) 54 { 55 struct rib *xribs; 56 size_t newsize; 57 u_int16_t id; 58 59 for (id = 0; id < rib_size; id++) { 60 if (*ribs[id].name == '\0') 61 break; 62 } 63 64 if (id == RIB_FAILED) 65 fatalx("rib_new: trying to use reserved id"); 66 67 if (id >= rib_size) { 68 newsize = sizeof(struct rib) * (id + 1); 69 if ((xribs = realloc(ribs, newsize)) == NULL) { 70 /* XXX this is not clever */ 71 fatal("rib_add"); 72 } 73 ribs = xribs; 74 rib_size = id + 1; 75 } 76 77 bzero(&ribs[id], sizeof(struct rib)); 78 strlcpy(ribs[id].name, name, sizeof(ribs[id].name)); 79 RB_INIT(&ribs[id].rib); 80 ribs[id].state = RECONF_REINIT; 81 ribs[id].id = id; 82 ribs[id].flags = flags; 83 ribs[id].rtableid = rtableid; 84 85 return (id); 86 } 87 88 u_int16_t 89 rib_find(char *name) 90 { 91 u_int16_t id; 92 93 if (name == NULL || *name == '\0') 94 return (1); /* XXX */ 95 96 for (id = 0; id < rib_size; id++) { 97 if (!strcmp(ribs[id].name, name)) 98 return (id); 99 } 100 101 return (RIB_FAILED); 102 } 103 104 void 105 rib_free(struct rib *rib) 106 { 107 struct rib_context *ctx, *next; 108 struct rib_entry *re, *xre; 109 struct prefix *p, *np; 110 111 for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) { 112 next = LIST_NEXT(ctx, entry); 113 if (ctx->ctx_rib == rib) { 114 re = ctx->ctx_re; 115 re->flags &= ~F_RIB_ENTRYLOCK; 116 LIST_REMOVE(ctx, entry); 117 if (ctx->ctx_done) 118 ctx->ctx_done(ctx->ctx_arg); 119 else 120 free(ctx); 121 } 122 } 123 124 for (re = RB_MIN(rib_tree, &rib->rib); re != NULL; re = xre) { 125 xre = RB_NEXT(rib_tree, &rib->rib, re); 126 127 /* 128 * Removing the prefixes is tricky because the last one 129 * will remove the rib_entry as well and at because we do 130 * a empty check in prefix_destroy() it is not possible to 131 * use the default for loop. 132 */ 133 while ((p = LIST_FIRST(&re->prefix_h))) { 134 np = LIST_NEXT(p, rib_l); 135 if (p->aspath->pftableid) { 136 struct bgpd_addr addr; 137 138 pt_getaddr(p->prefix, &addr); 139 /* Commit is done in peer_down() */ 140 rde_send_pftable(p->aspath->pftableid, &addr, 141 p->prefix->prefixlen, 1); 142 } 143 prefix_destroy(p); 144 if (np == NULL) 145 break; 146 } 147 } 148 bzero(rib, sizeof(struct rib)); 149 } 150 151 int 152 rib_compare(const struct rib_entry *a, const struct rib_entry *b) 153 { 154 return (pt_prefix_cmp(a->prefix, b->prefix)); 155 } 156 157 struct rib_entry * 158 rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) 159 { 160 struct rib_entry xre; 161 struct pt_entry *pte; 162 163 pte = pt_fill(prefix, prefixlen); 164 bzero(&xre, sizeof(xre)); 165 xre.prefix = pte; 166 167 return (RB_FIND(rib_tree, &rib->rib, &xre)); 168 } 169 170 struct rib_entry * 171 rib_lookup(struct rib *rib, struct bgpd_addr *addr) 172 { 173 struct rib_entry *re; 174 int i; 175 176 switch (addr->aid) { 177 case AID_INET: 178 case AID_VPN_IPv4: 179 for (i = 32; i >= 0; i--) { 180 re = rib_get(rib, addr, i); 181 if (re != NULL) 182 return (re); 183 } 184 break; 185 case AID_INET6: 186 for (i = 128; i >= 0; i--) { 187 re = rib_get(rib, addr, i); 188 if (re != NULL) 189 return (re); 190 } 191 break; 192 default: 193 fatalx("rib_lookup: unknown af"); 194 } 195 return (NULL); 196 } 197 198 199 struct rib_entry * 200 rib_add(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) 201 { 202 struct pt_entry *pte; 203 struct rib_entry *re; 204 205 pte = pt_get(prefix, prefixlen); 206 if (pte == NULL) 207 pte = pt_add(prefix, prefixlen); 208 209 if ((re = calloc(1, sizeof(*re))) == NULL) 210 fatal("rib_add"); 211 212 LIST_INIT(&re->prefix_h); 213 re->prefix = pte; 214 re->flags = rib->flags; 215 re->ribid = rib->id; 216 217 if (RB_INSERT(rib_tree, &rib->rib, re) != NULL) { 218 log_warnx("rib_add: insert failed"); 219 free(re); 220 return (NULL); 221 } 222 223 pt_ref(pte); 224 225 rdemem.rib_cnt++; 226 227 return (re); 228 } 229 230 void 231 rib_remove(struct rib_entry *re) 232 { 233 if (!rib_empty(re)) 234 fatalx("rib_remove: entry not empty"); 235 236 if (re->flags & F_RIB_ENTRYLOCK) 237 /* entry is locked, don't free it. */ 238 return; 239 240 pt_unref(re->prefix); 241 if (pt_empty(re->prefix)) 242 pt_remove(re->prefix); 243 244 if (RB_REMOVE(rib_tree, &ribs[re->ribid].rib, re) == NULL) 245 log_warnx("rib_remove: remove failed."); 246 247 free(re); 248 rdemem.rib_cnt--; 249 } 250 251 int 252 rib_empty(struct rib_entry *re) 253 { 254 return LIST_EMPTY(&re->prefix_h); 255 } 256 257 void 258 rib_dump(struct rib *rib, void (*upcall)(struct rib_entry *, void *), 259 void *arg, u_int8_t aid) 260 { 261 struct rib_context *ctx; 262 263 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) 264 fatal("rib_dump"); 265 ctx->ctx_rib = rib; 266 ctx->ctx_upcall = upcall; 267 ctx->ctx_arg = arg; 268 ctx->ctx_aid = aid; 269 rib_dump_r(ctx); 270 } 271 272 void 273 rib_dump_r(struct rib_context *ctx) 274 { 275 struct rib_entry *re; 276 unsigned int i; 277 278 if (ctx->ctx_re == NULL) { 279 re = RB_MIN(rib_tree, &ctx->ctx_rib->rib); 280 LIST_INSERT_HEAD(&rib_dump_h, ctx, entry); 281 } else 282 re = rib_restart(ctx); 283 284 for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) { 285 if (ctx->ctx_aid != AID_UNSPEC && 286 ctx->ctx_aid != re->prefix->aid) 287 continue; 288 if (ctx->ctx_count && i++ >= ctx->ctx_count && 289 (re->flags & F_RIB_ENTRYLOCK) == 0) { 290 /* store and lock last element */ 291 ctx->ctx_re = re; 292 re->flags |= F_RIB_ENTRYLOCK; 293 return; 294 } 295 ctx->ctx_upcall(re, ctx->ctx_arg); 296 } 297 298 LIST_REMOVE(ctx, entry); 299 if (ctx->ctx_done) 300 ctx->ctx_done(ctx->ctx_arg); 301 else 302 free(ctx); 303 } 304 305 struct rib_entry * 306 rib_restart(struct rib_context *ctx) 307 { 308 struct rib_entry *re; 309 310 re = ctx->ctx_re; 311 re->flags &= ~F_RIB_ENTRYLOCK; 312 313 /* find first non empty element */ 314 while (re && rib_empty(re)) 315 re = RB_NEXT(rib_tree, unused, re); 316 317 /* free the previously locked rib element if empty */ 318 if (rib_empty(ctx->ctx_re)) 319 rib_remove(ctx->ctx_re); 320 ctx->ctx_re = NULL; 321 return (re); 322 } 323 324 void 325 rib_dump_runner(void) 326 { 327 struct rib_context *ctx, *next; 328 329 for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) { 330 next = LIST_NEXT(ctx, entry); 331 rib_dump_r(ctx); 332 } 333 } 334 335 int 336 rib_dump_pending(void) 337 { 338 return (!LIST_EMPTY(&rib_dump_h)); 339 } 340 341 /* used to bump correct prefix counters */ 342 #define PREFIX_COUNT(x, op) \ 343 do { \ 344 (x)->prefix_cnt += (op); \ 345 } while (0) 346 347 /* path specific functions */ 348 349 static void path_link(struct rde_aspath *, struct rde_peer *); 350 351 struct path_table pathtable; 352 353 /* XXX the hash should also include communities and the other attrs */ 354 #define PATH_HASH(x) \ 355 &pathtable.path_hashtbl[hash32_buf((x)->data, (x)->len, HASHINIT) & \ 356 pathtable.path_hashmask] 357 358 void 359 path_init(u_int32_t hashsize) 360 { 361 u_int32_t hs, i; 362 363 for (hs = 1; hs < hashsize; hs <<= 1) 364 ; 365 pathtable.path_hashtbl = calloc(hs, sizeof(struct aspath_head)); 366 if (pathtable.path_hashtbl == NULL) 367 fatal("path_init"); 368 369 for (i = 0; i < hs; i++) 370 LIST_INIT(&pathtable.path_hashtbl[i]); 371 372 pathtable.path_hashmask = hs - 1; 373 } 374 375 void 376 path_shutdown(void) 377 { 378 u_int32_t i; 379 380 for (i = 0; i <= pathtable.path_hashmask; i++) 381 if (!LIST_EMPTY(&pathtable.path_hashtbl[i])) 382 log_warnx("path_free: free non-free table"); 383 384 free(pathtable.path_hashtbl); 385 } 386 387 int 388 path_update(struct rib *rib, struct rde_peer *peer, struct rde_aspath *nasp, 389 struct bgpd_addr *prefix, int prefixlen) 390 { 391 struct rde_aspath *asp; 392 struct prefix *p; 393 394 if (nasp->pftableid) { 395 rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0); 396 rde_send_pftable_commit(); 397 } 398 399 /* 400 * First try to find a prefix in the specified RIB. 401 */ 402 if ((p = prefix_get(rib, peer, prefix, prefixlen, 0)) != NULL) { 403 if (path_compare(nasp, p->aspath) == 0) { 404 /* no change, update last change */ 405 p->lastchange = time(NULL); 406 return (0); 407 } 408 } 409 410 /* 411 * Either the prefix does not exist or the path changed. 412 * In both cases lookup the new aspath to make sure it is not 413 * already in the RIB. 414 */ 415 if ((asp = path_lookup(nasp, peer)) == NULL) { 416 /* Path not available, create and link a new one. */ 417 asp = path_copy(nasp); 418 path_link(asp, peer); 419 } 420 421 /* If the prefix was found move it else add it to the aspath. */ 422 if (p != NULL) 423 prefix_move(asp, p); 424 else 425 return (prefix_add(rib, asp, prefix, prefixlen)); 426 return (0); 427 } 428 429 int 430 path_compare(struct rde_aspath *a, struct rde_aspath *b) 431 { 432 int r; 433 434 if (a->origin > b->origin) 435 return (1); 436 if (a->origin < b->origin) 437 return (-1); 438 if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED)) 439 return (1); 440 if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED)) 441 return (-1); 442 if (a->med > b->med) 443 return (1); 444 if (a->med < b->med) 445 return (-1); 446 if (a->lpref > b->lpref) 447 return (1); 448 if (a->lpref < b->lpref) 449 return (-1); 450 if (a->weight > b->weight) 451 return (1); 452 if (a->weight < b->weight) 453 return (-1); 454 if (a->rtlabelid > b->rtlabelid) 455 return (1); 456 if (a->rtlabelid < b->rtlabelid) 457 return (-1); 458 if (a->pftableid > b->pftableid) 459 return (1); 460 if (a->pftableid < b->pftableid) 461 return (-1); 462 463 r = aspath_compare(a->aspath, b->aspath); 464 if (r == 0) 465 r = nexthop_compare(a->nexthop, b->nexthop); 466 if (r > 0) 467 return (1); 468 if (r < 0) 469 return (-1); 470 471 return (attr_compare(a, b)); 472 } 473 474 struct rde_aspath * 475 path_lookup(struct rde_aspath *aspath, struct rde_peer *peer) 476 { 477 struct aspath_head *head; 478 struct rde_aspath *asp; 479 480 head = PATH_HASH(aspath->aspath); 481 482 LIST_FOREACH(asp, head, path_l) { 483 if (peer == asp->peer && path_compare(aspath, asp) == 0) 484 return (asp); 485 } 486 return (NULL); 487 } 488 489 void 490 path_remove(struct rde_aspath *asp) 491 { 492 struct prefix *p, *np; 493 494 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) { 495 np = LIST_NEXT(p, path_l); 496 if (asp->pftableid) { 497 struct bgpd_addr addr; 498 499 pt_getaddr(p->prefix, &addr); 500 /* Commit is done in peer_down() */ 501 rde_send_pftable(p->aspath->pftableid, &addr, 502 p->prefix->prefixlen, 1); 503 } 504 prefix_destroy(p); 505 } 506 } 507 508 /* this function is only called by prefix_remove and path_remove */ 509 void 510 path_destroy(struct rde_aspath *asp) 511 { 512 /* path_destroy can only unlink and free empty rde_aspath */ 513 if (asp->prefix_cnt != 0 || asp->active_cnt != 0) 514 log_warnx("path_destroy: prefix count out of sync"); 515 516 nexthop_unlink(asp); 517 LIST_REMOVE(asp, path_l); 518 LIST_REMOVE(asp, peer_l); 519 asp->peer = NULL; 520 asp->nexthop = NULL; 521 asp->flags &= ~F_ATTR_LINKED; 522 523 path_put(asp); 524 } 525 526 int 527 path_empty(struct rde_aspath *asp) 528 { 529 return LIST_EMPTY(&asp->prefix_h); 530 } 531 532 /* 533 * the path object is linked into multiple lists for fast access. 534 * These are peer_l, path_l and nexthop_l. 535 * peer_l: list of all aspaths that belong to that peer 536 * path_l: hash list to find paths quickly 537 * nexthop_l: list of all aspaths with an equal exit nexthop 538 */ 539 static void 540 path_link(struct rde_aspath *asp, struct rde_peer *peer) 541 { 542 struct aspath_head *head; 543 544 head = PATH_HASH(asp->aspath); 545 546 LIST_INSERT_HEAD(head, asp, path_l); 547 LIST_INSERT_HEAD(&peer->path_h, asp, peer_l); 548 asp->peer = peer; 549 nexthop_link(asp); 550 asp->flags |= F_ATTR_LINKED; 551 } 552 553 /* 554 * copy asp to a new UNLINKED one mainly for filtering 555 */ 556 struct rde_aspath * 557 path_copy(struct rde_aspath *asp) 558 { 559 struct rde_aspath *nasp; 560 561 nasp = path_get(); 562 nasp->aspath = asp->aspath; 563 if (nasp->aspath != NULL) { 564 nasp->aspath->refcnt++; 565 rdemem.aspath_refs++; 566 } 567 nasp->nexthop = asp->nexthop; 568 nasp->med = asp->med; 569 nasp->lpref = asp->lpref; 570 nasp->weight = asp->weight; 571 nasp->origin = asp->origin; 572 nasp->rtlabelid = asp->rtlabelid; 573 rtlabel_ref(nasp->rtlabelid); 574 nasp->pftableid = asp->pftableid; 575 pftable_ref(nasp->pftableid); 576 577 nasp->flags = asp->flags & ~F_ATTR_LINKED; 578 attr_copy(nasp, asp); 579 580 return (nasp); 581 } 582 583 /* alloc and initialize new entry. May not fail. */ 584 struct rde_aspath * 585 path_get(void) 586 { 587 struct rde_aspath *asp; 588 589 asp = calloc(1, sizeof(*asp)); 590 if (asp == NULL) 591 fatal("path_alloc"); 592 rdemem.path_cnt++; 593 594 LIST_INIT(&asp->prefix_h); 595 asp->origin = ORIGIN_INCOMPLETE; 596 asp->lpref = DEFAULT_LPREF; 597 /* med = 0 */ 598 /* weight = 0 */ 599 /* rtlabel = 0 */ 600 601 return (asp); 602 } 603 604 /* free an unlinked element */ 605 void 606 path_put(struct rde_aspath *asp) 607 { 608 if (asp == NULL) 609 return; 610 611 if (asp->flags & F_ATTR_LINKED) 612 fatalx("path_put: linked object"); 613 614 rtlabel_unref(asp->rtlabelid); 615 pftable_unref(asp->pftableid); 616 aspath_put(asp->aspath); 617 attr_freeall(asp); 618 rdemem.path_cnt--; 619 free(asp); 620 } 621 622 /* prefix specific functions */ 623 624 static struct prefix *prefix_alloc(void); 625 static void prefix_free(struct prefix *); 626 static void prefix_link(struct prefix *, struct rib_entry *, 627 struct rde_aspath *); 628 static void prefix_unlink(struct prefix *); 629 630 /* 631 * search for specified prefix of a peer. Returns NULL if not found. 632 */ 633 struct prefix * 634 prefix_get(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, 635 int prefixlen, u_int32_t flags) 636 { 637 struct rib_entry *re; 638 639 re = rib_get(rib, prefix, prefixlen); 640 if (re == NULL) 641 return (NULL); 642 return (prefix_bypeer(re, peer, flags)); 643 } 644 645 /* 646 * Adds or updates a prefix. 647 */ 648 int 649 prefix_add(struct rib *rib, struct rde_aspath *asp, struct bgpd_addr *prefix, 650 int prefixlen) 651 652 { 653 struct prefix *p; 654 struct rib_entry *re; 655 656 re = rib_get(rib, prefix, prefixlen); 657 if (re == NULL) 658 re = rib_add(rib, prefix, prefixlen); 659 660 p = prefix_bypeer(re, asp->peer, asp->flags); 661 if (p == NULL) { 662 p = prefix_alloc(); 663 prefix_link(p, re, asp); 664 return (1); 665 } else { 666 if (p->aspath != asp) { 667 /* prefix belongs to a different aspath so move */ 668 prefix_move(asp, p); 669 } else 670 p->lastchange = time(NULL); 671 return (0); 672 } 673 } 674 675 /* 676 * Move the prefix to the specified as path, removes the old asp if needed. 677 */ 678 void 679 prefix_move(struct rde_aspath *asp, struct prefix *p) 680 { 681 struct prefix *np; 682 struct rde_aspath *oasp; 683 684 if (asp->peer != p->aspath->peer) 685 fatalx("prefix_move: cross peer move"); 686 687 /* create new prefix node */ 688 np = prefix_alloc(); 689 np->aspath = asp; 690 /* peer and prefix pointers are still equal */ 691 np->prefix = p->prefix; 692 np->rib = p->rib; 693 np->lastchange = time(NULL); 694 695 /* add to new as path */ 696 LIST_INSERT_HEAD(&asp->prefix_h, np, path_l); 697 PREFIX_COUNT(asp, 1); 698 /* 699 * no need to update the peer prefix count because we are only moving 700 * the prefix without changing the peer. 701 */ 702 703 /* 704 * First kick the old prefix node out of the prefix list, 705 * afterwards run the route decision for new prefix node. 706 * Because of this only one update is generated if the prefix 707 * was active. 708 * This is save because we create a new prefix and so the change 709 * is noticed by prefix_evaluate(). 710 */ 711 LIST_REMOVE(p, rib_l); 712 prefix_evaluate(np, np->rib); 713 714 /* remove old prefix node */ 715 oasp = p->aspath; 716 LIST_REMOVE(p, path_l); 717 PREFIX_COUNT(oasp, -1); 718 /* as before peer count needs no update because of move */ 719 720 /* destroy all references to other objects and free the old prefix */ 721 p->aspath = NULL; 722 p->prefix = NULL; 723 p->rib = NULL; 724 prefix_free(p); 725 726 /* destroy old path if empty */ 727 if (path_empty(oasp)) 728 path_destroy(oasp); 729 } 730 731 /* 732 * Removes a prefix from all lists. If the parent objects -- path or 733 * pt_entry -- become empty remove them too. 734 */ 735 int 736 prefix_remove(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, 737 int prefixlen, u_int32_t flags) 738 { 739 struct prefix *p; 740 struct rib_entry *re; 741 struct rde_aspath *asp; 742 743 re = rib_get(rib, prefix, prefixlen); 744 if (re == NULL) /* Got a dummy withdrawn request */ 745 return (0); 746 747 p = prefix_bypeer(re, peer, flags); 748 if (p == NULL) /* Got a dummy withdrawn request. */ 749 return (0); 750 751 asp = p->aspath; 752 753 if (asp->pftableid) { 754 /* only prefixes in the local RIB were pushed into pf */ 755 rde_send_pftable(asp->pftableid, prefix, prefixlen, 1); 756 rde_send_pftable_commit(); 757 } 758 759 prefix_destroy(p); 760 761 return (1); 762 } 763 764 /* dump a prefix into specified buffer */ 765 int 766 prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen) 767 { 768 int totlen; 769 770 switch (prefix->aid) { 771 case AID_INET: 772 case AID_INET6: 773 totlen = PREFIX_SIZE(plen); 774 775 if (totlen > len) 776 return (-1); 777 *buf++ = plen; 778 memcpy(buf, &prefix->ba, totlen - 1); 779 return (totlen); 780 case AID_VPN_IPv4: 781 totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) + 782 prefix->vpn4.labellen; 783 plen += (sizeof(prefix->vpn4.rd) + prefix->vpn4.labellen) * 8; 784 785 if (totlen > len) 786 return (-1); 787 *buf++ = plen; 788 memcpy(buf, &prefix->vpn4.labelstack, prefix->vpn4.labellen); 789 buf += prefix->vpn4.labellen; 790 memcpy(buf, &prefix->vpn4.rd, sizeof(prefix->vpn4.rd)); 791 buf += sizeof(prefix->vpn4.rd); 792 memcpy(buf, &prefix->vpn4.addr, PREFIX_SIZE(plen) - 1); 793 return (totlen); 794 default: 795 return (-1); 796 } 797 } 798 799 int 800 prefix_writebuf(struct ibuf *buf, struct bgpd_addr *prefix, u_int8_t plen) 801 { 802 int totlen; 803 void *bptr; 804 805 switch (prefix->aid) { 806 case AID_INET: 807 case AID_INET6: 808 totlen = PREFIX_SIZE(plen); 809 break; 810 case AID_VPN_IPv4: 811 totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) + 812 prefix->vpn4.labellen; 813 default: 814 return (-1); 815 } 816 817 if ((bptr = ibuf_reserve(buf, totlen)) == NULL) 818 return (-1); 819 if (prefix_write(bptr, totlen, prefix, plen) == -1) 820 return (-1); 821 return (0); 822 } 823 824 /* 825 * Searches in the prefix list of specified pt_entry for a prefix entry 826 * belonging to the peer peer. Returns NULL if no match found. 827 */ 828 struct prefix * 829 prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, u_int32_t flags) 830 { 831 struct prefix *p; 832 833 LIST_FOREACH(p, &re->prefix_h, rib_l) { 834 if (p->aspath->peer != peer) 835 continue; 836 if (p->aspath->flags & flags && 837 (flags & F_ANN_DYNAMIC) != 838 (p->aspath->flags & F_ANN_DYNAMIC)) 839 continue; 840 return (p); 841 } 842 return (NULL); 843 } 844 845 void 846 prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, 847 enum nexthop_state oldstate) 848 { 849 struct prefix *p; 850 851 LIST_FOREACH(p, &asp->prefix_h, path_l) { 852 /* 853 * skip non local-RIBs or RIBs that are flagged as noeval. 854 */ 855 if (p->rib->flags & F_RIB_NOEVALUATE) 856 continue; 857 858 if (oldstate == state && state == NEXTHOP_REACH) { 859 /* 860 * The state of the nexthop did not change. The only 861 * thing that may have changed is the true_nexthop 862 * or other internal infos. This will not change 863 * the routing decision so shortcut here. 864 */ 865 if ((p->rib->flags & F_RIB_NOFIB) == 0 && 866 p == p->rib->active) 867 rde_send_kroute(p, NULL, p->rib->ribid); 868 continue; 869 } 870 871 /* redo the route decision */ 872 LIST_REMOVE(p, rib_l); 873 /* 874 * If the prefix is the active one remove it first, 875 * this has to be done because we can not detect when 876 * the active prefix changes its state. In this case 877 * we know that this is a withdrawal and so the second 878 * prefix_evaluate() will generate no update because 879 * the nexthop is unreachable or ineligible. 880 */ 881 if (p == p->rib->active) 882 prefix_evaluate(NULL, p->rib); 883 prefix_evaluate(p, p->rib); 884 } 885 } 886 887 /* kill a prefix. */ 888 void 889 prefix_destroy(struct prefix *p) 890 { 891 struct rde_aspath *asp; 892 893 asp = p->aspath; 894 prefix_unlink(p); 895 prefix_free(p); 896 897 if (path_empty(asp)) 898 path_destroy(asp); 899 } 900 901 /* 902 * helper function to clean up the connected networks after a reload 903 */ 904 void 905 prefix_network_clean(struct rde_peer *peer, time_t reloadtime, u_int32_t flags) 906 { 907 struct rde_aspath *asp, *xasp; 908 struct prefix *p, *xp; 909 910 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = xasp) { 911 xasp = LIST_NEXT(asp, peer_l); 912 if ((asp->flags & F_ANN_DYNAMIC) != flags) 913 continue; 914 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = xp) { 915 xp = LIST_NEXT(p, path_l); 916 if (reloadtime > p->lastchange) { 917 prefix_unlink(p); 918 prefix_free(p); 919 } 920 } 921 if (path_empty(asp)) 922 path_destroy(asp); 923 } 924 } 925 926 /* 927 * Link a prefix into the different parent objects. 928 */ 929 static void 930 prefix_link(struct prefix *pref, struct rib_entry *re, struct rde_aspath *asp) 931 { 932 LIST_INSERT_HEAD(&asp->prefix_h, pref, path_l); 933 PREFIX_COUNT(asp, 1); 934 935 pref->aspath = asp; 936 pref->rib = re; 937 pref->prefix = re->prefix; 938 pt_ref(pref->prefix); 939 pref->lastchange = time(NULL); 940 941 /* make route decision */ 942 prefix_evaluate(pref, re); 943 } 944 945 /* 946 * Unlink a prefix from the different parent objects. 947 */ 948 static void 949 prefix_unlink(struct prefix *pref) 950 { 951 struct rib_entry *re = pref->rib; 952 953 /* make route decision */ 954 LIST_REMOVE(pref, rib_l); 955 prefix_evaluate(NULL, re); 956 957 LIST_REMOVE(pref, path_l); 958 PREFIX_COUNT(pref->aspath, -1); 959 960 pt_unref(pref->prefix); 961 if (pt_empty(pref->prefix)) 962 pt_remove(pref->prefix); 963 if (rib_empty(re)) 964 rib_remove(re); 965 966 /* destroy all references to other objects */ 967 pref->aspath = NULL; 968 pref->prefix = NULL; 969 pref->rib = NULL; 970 971 /* 972 * It's the caller's duty to remove empty aspath structures. 973 * Also freeing the unlinked prefix is the caller's duty. 974 */ 975 } 976 977 /* alloc and bzero new entry. May not fail. */ 978 static struct prefix * 979 prefix_alloc(void) 980 { 981 struct prefix *p; 982 983 p = calloc(1, sizeof(*p)); 984 if (p == NULL) 985 fatal("prefix_alloc"); 986 rdemem.prefix_cnt++; 987 return p; 988 } 989 990 /* free a unlinked entry */ 991 static void 992 prefix_free(struct prefix *pref) 993 { 994 rdemem.prefix_cnt--; 995 free(pref); 996 } 997 998 /* 999 * nexthop functions 1000 */ 1001 struct nexthop_head *nexthop_hash(struct bgpd_addr *); 1002 struct nexthop *nexthop_lookup(struct bgpd_addr *); 1003 1004 /* 1005 * In BGP there exist two nexthops: the exit nexthop which was announced via 1006 * BGP and the true nexthop which is used in the FIB -- forward information 1007 * base a.k.a kernel routing table. When sending updates it is even more 1008 * confusing. In IBGP we pass the unmodified exit nexthop to the neighbors 1009 * while in EBGP normally the address of the router is sent. The exit nexthop 1010 * may be passed to the external neighbor if the neighbor and the exit nexthop 1011 * reside in the same subnet -- directly connected. 1012 */ 1013 struct nexthop_table { 1014 LIST_HEAD(nexthop_head, nexthop) *nexthop_hashtbl; 1015 u_int32_t nexthop_hashmask; 1016 } nexthoptable; 1017 1018 void 1019 nexthop_init(u_int32_t hashsize) 1020 { 1021 u_int32_t hs, i; 1022 1023 for (hs = 1; hs < hashsize; hs <<= 1) 1024 ; 1025 nexthoptable.nexthop_hashtbl = calloc(hs, sizeof(struct nexthop_table)); 1026 if (nexthoptable.nexthop_hashtbl == NULL) 1027 fatal("nextop_init"); 1028 1029 for (i = 0; i < hs; i++) 1030 LIST_INIT(&nexthoptable.nexthop_hashtbl[i]); 1031 1032 nexthoptable.nexthop_hashmask = hs - 1; 1033 } 1034 1035 void 1036 nexthop_shutdown(void) 1037 { 1038 u_int32_t i; 1039 struct nexthop *nh, *nnh; 1040 1041 for (i = 0; i <= nexthoptable.nexthop_hashmask; i++) { 1042 for (nh = LIST_FIRST(&nexthoptable.nexthop_hashtbl[i]); 1043 nh != NULL; nh = nnh) { 1044 nnh = LIST_NEXT(nh, nexthop_l); 1045 nh->state = NEXTHOP_UNREACH; 1046 (void)nexthop_delete(nh); 1047 } 1048 if (!LIST_EMPTY(&nexthoptable.nexthop_hashtbl[i])) 1049 log_warnx("nexthop_shutdown: non-free table"); 1050 } 1051 1052 free(nexthoptable.nexthop_hashtbl); 1053 } 1054 1055 void 1056 nexthop_update(struct kroute_nexthop *msg) 1057 { 1058 struct nexthop *nh; 1059 struct rde_aspath *asp; 1060 enum nexthop_state oldstate; 1061 1062 nh = nexthop_lookup(&msg->nexthop); 1063 if (nh == NULL) { 1064 log_warnx("nexthop_update: non-existent nexthop %s", 1065 log_addr(&msg->nexthop)); 1066 return; 1067 } 1068 1069 oldstate = nh->state; 1070 if (msg->valid) 1071 nh->state = NEXTHOP_REACH; 1072 else 1073 nh->state = NEXTHOP_UNREACH; 1074 1075 if (msg->connected) { 1076 nh->flags |= NEXTHOP_CONNECTED; 1077 memcpy(&nh->true_nexthop, &nh->exit_nexthop, 1078 sizeof(nh->true_nexthop)); 1079 } else 1080 memcpy(&nh->true_nexthop, &msg->gateway, 1081 sizeof(nh->true_nexthop)); 1082 1083 memcpy(&nh->nexthop_net, &msg->net, 1084 sizeof(nh->nexthop_net)); 1085 nh->nexthop_netlen = msg->netlen; 1086 1087 if (nexthop_delete(nh)) 1088 /* nexthop no longer used */ 1089 return; 1090 1091 if (rde_noevaluate()) 1092 /* 1093 * if the decision process is turned off there is no need 1094 * for the aspath list walk. 1095 */ 1096 return; 1097 1098 LIST_FOREACH(asp, &nh->path_h, nexthop_l) { 1099 prefix_updateall(asp, nh->state, oldstate); 1100 } 1101 } 1102 1103 void 1104 nexthop_modify(struct rde_aspath *asp, struct bgpd_addr *nexthop, 1105 enum action_types type, u_int8_t aid) 1106 { 1107 struct nexthop *nh; 1108 1109 if (type == ACTION_SET_NEXTHOP && aid != nexthop->aid) 1110 return; 1111 1112 asp->flags &= ~F_NEXTHOP_MASK; 1113 switch (type) { 1114 case ACTION_SET_NEXTHOP_REJECT: 1115 asp->flags |= F_NEXTHOP_REJECT; 1116 break; 1117 case ACTION_SET_NEXTHOP_BLACKHOLE: 1118 asp->flags |= F_NEXTHOP_BLACKHOLE; 1119 break; 1120 case ACTION_SET_NEXTHOP_NOMODIFY: 1121 asp->flags |= F_NEXTHOP_NOMODIFY; 1122 break; 1123 case ACTION_SET_NEXTHOP_SELF: 1124 asp->flags |= F_NEXTHOP_SELF; 1125 break; 1126 case ACTION_SET_NEXTHOP: 1127 nh = nexthop_get(nexthop); 1128 if (asp->flags & F_ATTR_LINKED) 1129 nexthop_unlink(asp); 1130 asp->nexthop = nh; 1131 if (asp->flags & F_ATTR_LINKED) 1132 nexthop_link(asp); 1133 break; 1134 default: 1135 break; 1136 } 1137 } 1138 1139 void 1140 nexthop_link(struct rde_aspath *asp) 1141 { 1142 if (asp->nexthop == NULL) 1143 return; 1144 1145 LIST_INSERT_HEAD(&asp->nexthop->path_h, asp, nexthop_l); 1146 } 1147 1148 void 1149 nexthop_unlink(struct rde_aspath *asp) 1150 { 1151 struct nexthop *nh; 1152 1153 if (asp->nexthop == NULL) 1154 return; 1155 1156 LIST_REMOVE(asp, nexthop_l); 1157 1158 /* see if list is empty */ 1159 nh = asp->nexthop; 1160 asp->nexthop = NULL; 1161 1162 (void)nexthop_delete(nh); 1163 } 1164 1165 int 1166 nexthop_delete(struct nexthop *nh) 1167 { 1168 /* nexthop still used by some other aspath */ 1169 if (!LIST_EMPTY(&nh->path_h)) 1170 return (0); 1171 1172 /* either pinned or in a state where it may not be deleted */ 1173 if (nh->refcnt > 0 || nh->state == NEXTHOP_LOOKUP) 1174 return (0); 1175 1176 LIST_REMOVE(nh, nexthop_l); 1177 rde_send_nexthop(&nh->exit_nexthop, 0); 1178 1179 rdemem.nexthop_cnt--; 1180 free(nh); 1181 return (1); 1182 } 1183 1184 struct nexthop * 1185 nexthop_get(struct bgpd_addr *nexthop) 1186 { 1187 struct nexthop *nh; 1188 1189 nh = nexthop_lookup(nexthop); 1190 if (nh == NULL) { 1191 nh = calloc(1, sizeof(*nh)); 1192 if (nh == NULL) 1193 fatal("nexthop_alloc"); 1194 rdemem.nexthop_cnt++; 1195 1196 LIST_INIT(&nh->path_h); 1197 nh->state = NEXTHOP_LOOKUP; 1198 nh->exit_nexthop = *nexthop; 1199 LIST_INSERT_HEAD(nexthop_hash(nexthop), nh, 1200 nexthop_l); 1201 1202 rde_send_nexthop(&nh->exit_nexthop, 1); 1203 } 1204 1205 return (nh); 1206 } 1207 1208 int 1209 nexthop_compare(struct nexthop *na, struct nexthop *nb) 1210 { 1211 struct bgpd_addr *a, *b; 1212 1213 if (na == nb) 1214 return (0); 1215 if (na == NULL) 1216 return (-1); 1217 if (nb == NULL) 1218 return (1); 1219 1220 a = &na->exit_nexthop; 1221 b = &nb->exit_nexthop; 1222 1223 if (a->aid != b->aid) 1224 return (a->aid - b->aid); 1225 1226 switch (a->aid) { 1227 case AID_INET: 1228 if (ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr)) 1229 return (1); 1230 if (ntohl(a->v4.s_addr) < ntohl(b->v4.s_addr)) 1231 return (-1); 1232 return (0); 1233 case AID_INET6: 1234 return (memcmp(&a->v6, &b->v6, sizeof(struct in6_addr))); 1235 default: 1236 fatalx("nexthop_cmp: unknown af"); 1237 } 1238 return (-1); 1239 } 1240 1241 struct nexthop * 1242 nexthop_lookup(struct bgpd_addr *nexthop) 1243 { 1244 struct nexthop *nh; 1245 1246 LIST_FOREACH(nh, nexthop_hash(nexthop), nexthop_l) { 1247 if (memcmp(&nh->exit_nexthop, nexthop, 1248 sizeof(struct bgpd_addr)) == 0) 1249 return (nh); 1250 } 1251 return (NULL); 1252 } 1253 1254 struct nexthop_head * 1255 nexthop_hash(struct bgpd_addr *nexthop) 1256 { 1257 u_int32_t h = 0; 1258 1259 switch (nexthop->aid) { 1260 case AID_INET: 1261 h = (AF_INET ^ ntohl(nexthop->v4.s_addr) ^ 1262 ntohl(nexthop->v4.s_addr) >> 13) & 1263 nexthoptable.nexthop_hashmask; 1264 break; 1265 case AID_INET6: 1266 h = hash32_buf(&nexthop->v6, sizeof(struct in6_addr), 1267 HASHINIT) & nexthoptable.nexthop_hashmask; 1268 break; 1269 default: 1270 fatalx("nexthop_hash: unsupported AF"); 1271 } 1272 return (&nexthoptable.nexthop_hashtbl[h]); 1273 } 1274 1275