1 /* $OpenBSD: rde_rib.c,v 1.128 2011/01/14 20:07:00 henning Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 #include <sys/hash.h> 22 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include "bgpd.h" 27 #include "rde.h" 28 29 /* 30 * BGP RIB -- Routing Information Base 31 * 32 * The RIB is build with one aspect in mind. Speed -- actually update speed. 33 * Therefore one thing needs to be absolutely avoided, long table walks. 34 * This is achieved by heavily linking the different parts together. 35 */ 36 u_int16_t rib_size; 37 struct rib *ribs; 38 39 LIST_HEAD(, rib_context) rib_dump_h = LIST_HEAD_INITIALIZER(rib_dump_h); 40 41 struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int); 42 int rib_compare(const struct rib_entry *, const struct rib_entry *); 43 void rib_remove(struct rib_entry *); 44 int rib_empty(struct rib_entry *); 45 struct rib_entry *rib_restart(struct rib_context *); 46 47 RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare); 48 RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare); 49 50 51 /* RIB specific functions */ 52 u_int16_t 53 rib_new(char *name, u_int rtableid, u_int16_t flags) 54 { 55 struct rib *xribs; 56 size_t newsize; 57 u_int16_t id; 58 59 for (id = 0; id < rib_size; id++) { 60 if (*ribs[id].name == '\0') 61 break; 62 } 63 64 if (id == RIB_FAILED) 65 fatalx("rib_new: trying to use reserved id"); 66 67 if (id >= rib_size) { 68 newsize = sizeof(struct rib) * (id + 1); 69 if ((xribs = realloc(ribs, newsize)) == NULL) { 70 /* XXX this is not clever */ 71 fatal("rib_add"); 72 } 73 ribs = xribs; 74 rib_size = id + 1; 75 } 76 77 bzero(&ribs[id], sizeof(struct rib)); 78 strlcpy(ribs[id].name, name, sizeof(ribs[id].name)); 79 RB_INIT(&ribs[id].rib); 80 ribs[id].state = RECONF_REINIT; 81 ribs[id].id = id; 82 ribs[id].flags = flags; 83 ribs[id].rtableid = rtableid; 84 85 return (id); 86 } 87 88 u_int16_t 89 rib_find(char *name) 90 { 91 u_int16_t id; 92 93 if (name == NULL || *name == '\0') 94 return (1); /* XXX */ 95 96 for (id = 0; id < rib_size; id++) { 97 if (!strcmp(ribs[id].name, name)) 98 return (id); 99 } 100 101 return (RIB_FAILED); 102 } 103 104 void 105 rib_free(struct rib *rib) 106 { 107 struct rib_context *ctx, *next; 108 struct rib_entry *re, *xre; 109 struct prefix *p, *np; 110 111 for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) { 112 next = LIST_NEXT(ctx, entry); 113 if (ctx->ctx_rib == rib) { 114 re = ctx->ctx_re; 115 re->flags &= ~F_RIB_ENTRYLOCK; 116 LIST_REMOVE(ctx, entry); 117 if (ctx->ctx_done) 118 ctx->ctx_done(ctx->ctx_arg); 119 else 120 free(ctx); 121 } 122 } 123 124 for (re = RB_MIN(rib_tree, &rib->rib); re != NULL; re = xre) { 125 xre = RB_NEXT(rib_tree, &rib->rib, re); 126 127 /* 128 * Removing the prefixes is tricky because the last one 129 * will remove the rib_entry as well and at because we do 130 * a empty check in prefix_destroy() it is not possible to 131 * use the default for loop. 132 */ 133 while ((p = LIST_FIRST(&re->prefix_h))) { 134 np = LIST_NEXT(p, rib_l); 135 if (p->aspath->pftableid) { 136 struct bgpd_addr addr; 137 138 pt_getaddr(p->prefix, &addr); 139 /* Commit is done in peer_down() */ 140 rde_send_pftable(p->aspath->pftableid, &addr, 141 p->prefix->prefixlen, 1); 142 } 143 prefix_destroy(p); 144 if (np == NULL) 145 break; 146 } 147 } 148 bzero(rib, sizeof(struct rib)); 149 } 150 151 int 152 rib_compare(const struct rib_entry *a, const struct rib_entry *b) 153 { 154 return (pt_prefix_cmp(a->prefix, b->prefix)); 155 } 156 157 struct rib_entry * 158 rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) 159 { 160 struct rib_entry xre; 161 struct pt_entry *pte; 162 163 pte = pt_fill(prefix, prefixlen); 164 bzero(&xre, sizeof(xre)); 165 xre.prefix = pte; 166 167 return (RB_FIND(rib_tree, &rib->rib, &xre)); 168 } 169 170 struct rib_entry * 171 rib_lookup(struct rib *rib, struct bgpd_addr *addr) 172 { 173 struct rib_entry *re; 174 int i; 175 176 switch (addr->aid) { 177 case AID_INET: 178 case AID_VPN_IPv4: 179 for (i = 32; i >= 0; i--) { 180 re = rib_get(rib, addr, i); 181 if (re != NULL) 182 return (re); 183 } 184 break; 185 case AID_INET6: 186 for (i = 128; i >= 0; i--) { 187 re = rib_get(rib, addr, i); 188 if (re != NULL) 189 return (re); 190 } 191 break; 192 default: 193 fatalx("rib_lookup: unknown af"); 194 } 195 return (NULL); 196 } 197 198 199 struct rib_entry * 200 rib_add(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) 201 { 202 struct pt_entry *pte; 203 struct rib_entry *re; 204 205 pte = pt_get(prefix, prefixlen); 206 if (pte == NULL) 207 pte = pt_add(prefix, prefixlen); 208 209 if ((re = calloc(1, sizeof(*re))) == NULL) 210 fatal("rib_add"); 211 212 LIST_INIT(&re->prefix_h); 213 re->prefix = pte; 214 re->flags = rib->flags; 215 re->ribid = rib->id; 216 217 if (RB_INSERT(rib_tree, &rib->rib, re) != NULL) { 218 log_warnx("rib_add: insert failed"); 219 free(re); 220 return (NULL); 221 } 222 223 pt_ref(pte); 224 225 rdemem.rib_cnt++; 226 227 return (re); 228 } 229 230 void 231 rib_remove(struct rib_entry *re) 232 { 233 if (!rib_empty(re)) 234 fatalx("rib_remove: entry not empty"); 235 236 if (re->flags & F_RIB_ENTRYLOCK) 237 /* entry is locked, don't free it. */ 238 return; 239 240 pt_unref(re->prefix); 241 if (pt_empty(re->prefix)) 242 pt_remove(re->prefix); 243 244 if (RB_REMOVE(rib_tree, &ribs[re->ribid].rib, re) == NULL) 245 log_warnx("rib_remove: remove failed."); 246 247 free(re); 248 rdemem.rib_cnt--; 249 } 250 251 int 252 rib_empty(struct rib_entry *re) 253 { 254 return LIST_EMPTY(&re->prefix_h); 255 } 256 257 void 258 rib_dump(struct rib *rib, void (*upcall)(struct rib_entry *, void *), 259 void *arg, u_int8_t aid) 260 { 261 struct rib_context *ctx; 262 263 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) 264 fatal("rib_dump"); 265 ctx->ctx_rib = rib; 266 ctx->ctx_upcall = upcall; 267 ctx->ctx_arg = arg; 268 ctx->ctx_aid = aid; 269 rib_dump_r(ctx); 270 } 271 272 void 273 rib_dump_r(struct rib_context *ctx) 274 { 275 struct rib_entry *re; 276 unsigned int i; 277 278 if (ctx->ctx_re == NULL) { 279 re = RB_MIN(rib_tree, &ctx->ctx_rib->rib); 280 LIST_INSERT_HEAD(&rib_dump_h, ctx, entry); 281 } else 282 re = rib_restart(ctx); 283 284 for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) { 285 if (ctx->ctx_aid != AID_UNSPEC && 286 ctx->ctx_aid != re->prefix->aid) 287 continue; 288 if (ctx->ctx_count && i++ >= ctx->ctx_count && 289 (re->flags & F_RIB_ENTRYLOCK) == 0) { 290 /* store and lock last element */ 291 ctx->ctx_re = re; 292 re->flags |= F_RIB_ENTRYLOCK; 293 return; 294 } 295 ctx->ctx_upcall(re, ctx->ctx_arg); 296 } 297 298 LIST_REMOVE(ctx, entry); 299 if (ctx->ctx_done) 300 ctx->ctx_done(ctx->ctx_arg); 301 else 302 free(ctx); 303 } 304 305 struct rib_entry * 306 rib_restart(struct rib_context *ctx) 307 { 308 struct rib_entry *re; 309 310 re = ctx->ctx_re; 311 re->flags &= ~F_RIB_ENTRYLOCK; 312 313 /* find first non empty element */ 314 while (re && rib_empty(re)) 315 re = RB_NEXT(rib_tree, unused, re); 316 317 /* free the previously locked rib element if empty */ 318 if (rib_empty(ctx->ctx_re)) 319 rib_remove(ctx->ctx_re); 320 ctx->ctx_re = NULL; 321 return (re); 322 } 323 324 void 325 rib_dump_runner(void) 326 { 327 struct rib_context *ctx, *next; 328 329 for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) { 330 next = LIST_NEXT(ctx, entry); 331 rib_dump_r(ctx); 332 } 333 } 334 335 int 336 rib_dump_pending(void) 337 { 338 return (!LIST_EMPTY(&rib_dump_h)); 339 } 340 341 /* used to bump correct prefix counters */ 342 #define PREFIX_COUNT(x, op) \ 343 do { \ 344 (x)->prefix_cnt += (op); \ 345 } while (0) 346 347 /* path specific functions */ 348 349 static void path_link(struct rde_aspath *, struct rde_peer *); 350 351 struct path_table pathtable; 352 353 /* XXX the hash should also include communities and the other attrs */ 354 #define PATH_HASH(x) \ 355 &pathtable.path_hashtbl[hash32_buf((x)->data, (x)->len, HASHINIT) & \ 356 pathtable.path_hashmask] 357 358 void 359 path_init(u_int32_t hashsize) 360 { 361 u_int32_t hs, i; 362 363 for (hs = 1; hs < hashsize; hs <<= 1) 364 ; 365 pathtable.path_hashtbl = calloc(hs, sizeof(struct aspath_head)); 366 if (pathtable.path_hashtbl == NULL) 367 fatal("path_init"); 368 369 for (i = 0; i < hs; i++) 370 LIST_INIT(&pathtable.path_hashtbl[i]); 371 372 pathtable.path_hashmask = hs - 1; 373 } 374 375 void 376 path_shutdown(void) 377 { 378 u_int32_t i; 379 380 for (i = 0; i <= pathtable.path_hashmask; i++) 381 if (!LIST_EMPTY(&pathtable.path_hashtbl[i])) 382 log_warnx("path_free: free non-free table"); 383 384 free(pathtable.path_hashtbl); 385 } 386 387 int 388 path_update(struct rib *rib, struct rde_peer *peer, struct rde_aspath *nasp, 389 struct bgpd_addr *prefix, int prefixlen) 390 { 391 struct rde_aspath *asp; 392 struct prefix *p; 393 394 if (nasp->pftableid) { 395 rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0); 396 rde_send_pftable_commit(); 397 } 398 399 /* 400 * First try to find a prefix in the specified RIB. 401 */ 402 if ((p = prefix_get(rib, peer, prefix, prefixlen, 0)) != NULL) { 403 if (path_compare(nasp, p->aspath) == 0) { 404 /* no change, update last change */ 405 p->lastchange = time(NULL); 406 return (0); 407 } 408 } 409 410 /* 411 * Either the prefix does not exist or the path changed. 412 * In both cases lookup the new aspath to make sure it is not 413 * already in the RIB. 414 */ 415 if ((asp = path_lookup(nasp, peer)) == NULL) { 416 /* Path not available, create and link a new one. */ 417 asp = path_copy(nasp); 418 path_link(asp, peer); 419 } 420 421 /* If the prefix was found move it else add it to the aspath. */ 422 if (p != NULL) 423 prefix_move(asp, p); 424 else 425 return (prefix_add(rib, asp, prefix, prefixlen)); 426 return (0); 427 } 428 429 int 430 path_compare(struct rde_aspath *a, struct rde_aspath *b) 431 { 432 int r; 433 434 if (a->origin > b->origin) 435 return (1); 436 if (a->origin < b->origin) 437 return (-1); 438 if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED)) 439 return (1); 440 if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED)) 441 return (-1); 442 if (a->med > b->med) 443 return (1); 444 if (a->med < b->med) 445 return (-1); 446 if (a->lpref > b->lpref) 447 return (1); 448 if (a->lpref < b->lpref) 449 return (-1); 450 if (a->weight > b->weight) 451 return (1); 452 if (a->weight < b->weight) 453 return (-1); 454 if (a->rtlabelid > b->rtlabelid) 455 return (1); 456 if (a->rtlabelid < b->rtlabelid) 457 return (-1); 458 if (a->pftableid > b->pftableid) 459 return (1); 460 if (a->pftableid < b->pftableid) 461 return (-1); 462 463 r = aspath_compare(a->aspath, b->aspath); 464 if (r == 0) 465 r = nexthop_compare(a->nexthop, b->nexthop); 466 if (r > 0) 467 return (1); 468 if (r < 0) 469 return (-1); 470 471 return (attr_compare(a, b)); 472 } 473 474 struct rde_aspath * 475 path_lookup(struct rde_aspath *aspath, struct rde_peer *peer) 476 { 477 struct aspath_head *head; 478 struct rde_aspath *asp; 479 480 head = PATH_HASH(aspath->aspath); 481 482 LIST_FOREACH(asp, head, path_l) { 483 if (peer == asp->peer && path_compare(aspath, asp) == 0) 484 return (asp); 485 } 486 return (NULL); 487 } 488 489 void 490 path_remove(struct rde_aspath *asp) 491 { 492 struct prefix *p, *np; 493 494 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) { 495 np = LIST_NEXT(p, path_l); 496 if (asp->pftableid) { 497 struct bgpd_addr addr; 498 499 pt_getaddr(p->prefix, &addr); 500 /* Commit is done in peer_down() */ 501 rde_send_pftable(p->aspath->pftableid, &addr, 502 p->prefix->prefixlen, 1); 503 } 504 prefix_destroy(p); 505 } 506 } 507 508 /* this function is only called by prefix_remove and path_remove */ 509 void 510 path_destroy(struct rde_aspath *asp) 511 { 512 /* path_destroy can only unlink and free empty rde_aspath */ 513 if (asp->prefix_cnt != 0 || asp->active_cnt != 0) 514 log_warnx("path_destroy: prefix count out of sync"); 515 516 nexthop_unlink(asp); 517 LIST_REMOVE(asp, path_l); 518 LIST_REMOVE(asp, peer_l); 519 asp->peer = NULL; 520 asp->nexthop = NULL; 521 asp->flags &= ~F_ATTR_LINKED; 522 523 path_put(asp); 524 } 525 526 int 527 path_empty(struct rde_aspath *asp) 528 { 529 return LIST_EMPTY(&asp->prefix_h); 530 } 531 532 /* 533 * the path object is linked into multiple lists for fast access. 534 * These are peer_l, path_l and nexthop_l. 535 * peer_l: list of all aspaths that belong to that peer 536 * path_l: hash list to find paths quickly 537 * nexthop_l: list of all aspaths with an equal exit nexthop 538 */ 539 static void 540 path_link(struct rde_aspath *asp, struct rde_peer *peer) 541 { 542 struct aspath_head *head; 543 544 head = PATH_HASH(asp->aspath); 545 546 LIST_INSERT_HEAD(head, asp, path_l); 547 LIST_INSERT_HEAD(&peer->path_h, asp, peer_l); 548 asp->peer = peer; 549 nexthop_link(asp); 550 asp->flags |= F_ATTR_LINKED; 551 } 552 553 /* 554 * copy asp to a new UNLINKED one mainly for filtering 555 */ 556 struct rde_aspath * 557 path_copy(struct rde_aspath *asp) 558 { 559 struct rde_aspath *nasp; 560 561 nasp = path_get(); 562 nasp->aspath = asp->aspath; 563 if (nasp->aspath != NULL) { 564 nasp->aspath->refcnt++; 565 rdemem.aspath_refs++; 566 } 567 nasp->nexthop = asp->nexthop; 568 nasp->med = asp->med; 569 nasp->lpref = asp->lpref; 570 nasp->weight = asp->weight; 571 nasp->origin = asp->origin; 572 nasp->rtlabelid = asp->rtlabelid; 573 rtlabel_ref(nasp->rtlabelid); 574 nasp->pftableid = asp->pftableid; 575 pftable_ref(nasp->pftableid); 576 577 nasp->flags = asp->flags & ~F_ATTR_LINKED; 578 attr_copy(nasp, asp); 579 580 return (nasp); 581 } 582 583 /* alloc and initialize new entry. May not fail. */ 584 struct rde_aspath * 585 path_get(void) 586 { 587 struct rde_aspath *asp; 588 589 asp = calloc(1, sizeof(*asp)); 590 if (asp == NULL) 591 fatal("path_alloc"); 592 rdemem.path_cnt++; 593 594 LIST_INIT(&asp->prefix_h); 595 asp->origin = ORIGIN_INCOMPLETE; 596 asp->lpref = DEFAULT_LPREF; 597 /* med = 0 */ 598 /* weight = 0 */ 599 /* rtlabel = 0 */ 600 601 return (asp); 602 } 603 604 /* free an unlinked element */ 605 void 606 path_put(struct rde_aspath *asp) 607 { 608 if (asp == NULL) 609 return; 610 611 if (asp->flags & F_ATTR_LINKED) 612 fatalx("path_put: linked object"); 613 614 rtlabel_unref(asp->rtlabelid); 615 pftable_unref(asp->pftableid); 616 aspath_put(asp->aspath); 617 attr_freeall(asp); 618 rdemem.path_cnt--; 619 free(asp); 620 } 621 622 /* prefix specific functions */ 623 624 static struct prefix *prefix_alloc(void); 625 static void prefix_free(struct prefix *); 626 static void prefix_link(struct prefix *, struct rib_entry *, 627 struct rde_aspath *); 628 static void prefix_unlink(struct prefix *); 629 630 int 631 prefix_compare(const struct bgpd_addr *a, const struct bgpd_addr *b, 632 int prefixlen) 633 { 634 in_addr_t mask, aa, ba; 635 int i; 636 u_int8_t m; 637 638 if (a->aid != b->aid) 639 return (a->aid - b->aid); 640 641 switch (a->aid) { 642 case AID_INET: 643 if (prefixlen > 32) 644 fatalx("prefix_cmp: bad IPv4 prefixlen"); 645 mask = htonl(prefixlen2mask(prefixlen)); 646 aa = ntohl(a->v4.s_addr & mask); 647 ba = ntohl(b->v4.s_addr & mask); 648 if (aa != ba) 649 return (aa - ba); 650 return (0); 651 case AID_INET6: 652 if (prefixlen > 128) 653 fatalx("prefix_cmp: bad IPv6 prefixlen"); 654 for (i = 0; i < prefixlen / 8; i++) 655 if (a->v6.s6_addr[i] != b->v6.s6_addr[i]) 656 return (a->v6.s6_addr[i] - b->v6.s6_addr[i]); 657 i = prefixlen % 8; 658 if (i) { 659 m = 0xff00 >> i; 660 if ((a->v6.s6_addr[prefixlen / 8] & m) != 661 (b->v6.s6_addr[prefixlen / 8] & m)) 662 return ((a->v6.s6_addr[prefixlen / 8] & m) - 663 (b->v6.s6_addr[prefixlen / 8] & m)); 664 } 665 return (0); 666 case AID_VPN_IPv4: 667 if (prefixlen > 32) 668 fatalx("prefix_cmp: bad IPv4 VPN prefixlen"); 669 if (betoh64(a->vpn4.rd) > betoh64(b->vpn4.rd)) 670 return (1); 671 if (betoh64(a->vpn4.rd) < betoh64(b->vpn4.rd)) 672 return (-1); 673 mask = htonl(prefixlen2mask(prefixlen)); 674 aa = ntohl(a->vpn4.addr.s_addr & mask); 675 ba = ntohl(b->vpn4.addr.s_addr & mask); 676 if (aa != ba) 677 return (aa - ba); 678 if (a->vpn4.labellen > b->vpn4.labellen) 679 return (1); 680 if (a->vpn4.labellen < b->vpn4.labellen) 681 return (-1); 682 return (memcmp(a->vpn4.labelstack, b->vpn4.labelstack, 683 a->vpn4.labellen)); 684 default: 685 fatalx("prefix_cmp: unknown af"); 686 } 687 return (-1); 688 } 689 690 /* 691 * search for specified prefix of a peer. Returns NULL if not found. 692 */ 693 struct prefix * 694 prefix_get(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, 695 int prefixlen, u_int32_t flags) 696 { 697 struct rib_entry *re; 698 699 re = rib_get(rib, prefix, prefixlen); 700 if (re == NULL) 701 return (NULL); 702 return (prefix_bypeer(re, peer, flags)); 703 } 704 705 /* 706 * Adds or updates a prefix. 707 */ 708 int 709 prefix_add(struct rib *rib, struct rde_aspath *asp, struct bgpd_addr *prefix, 710 int prefixlen) 711 712 { 713 struct prefix *p; 714 struct rib_entry *re; 715 716 re = rib_get(rib, prefix, prefixlen); 717 if (re == NULL) 718 re = rib_add(rib, prefix, prefixlen); 719 720 p = prefix_bypeer(re, asp->peer, asp->flags); 721 if (p == NULL) { 722 p = prefix_alloc(); 723 prefix_link(p, re, asp); 724 return (1); 725 } else { 726 if (p->aspath != asp) { 727 /* prefix belongs to a different aspath so move */ 728 prefix_move(asp, p); 729 } else 730 p->lastchange = time(NULL); 731 return (0); 732 } 733 } 734 735 /* 736 * Move the prefix to the specified as path, removes the old asp if needed. 737 */ 738 void 739 prefix_move(struct rde_aspath *asp, struct prefix *p) 740 { 741 struct prefix *np; 742 struct rde_aspath *oasp; 743 744 if (asp->peer != p->aspath->peer) 745 fatalx("prefix_move: cross peer move"); 746 747 /* create new prefix node */ 748 np = prefix_alloc(); 749 np->aspath = asp; 750 /* peer and prefix pointers are still equal */ 751 np->prefix = p->prefix; 752 np->rib = p->rib; 753 np->lastchange = time(NULL); 754 755 /* add to new as path */ 756 LIST_INSERT_HEAD(&asp->prefix_h, np, path_l); 757 PREFIX_COUNT(asp, 1); 758 /* 759 * no need to update the peer prefix count because we are only moving 760 * the prefix without changing the peer. 761 */ 762 763 /* 764 * First kick the old prefix node out of the prefix list, 765 * afterwards run the route decision for new prefix node. 766 * Because of this only one update is generated if the prefix 767 * was active. 768 * This is save because we create a new prefix and so the change 769 * is noticed by prefix_evaluate(). 770 */ 771 LIST_REMOVE(p, rib_l); 772 prefix_evaluate(np, np->rib); 773 774 /* remove old prefix node */ 775 oasp = p->aspath; 776 LIST_REMOVE(p, path_l); 777 PREFIX_COUNT(oasp, -1); 778 /* as before peer count needs no update because of move */ 779 780 /* destroy all references to other objects and free the old prefix */ 781 p->aspath = NULL; 782 p->prefix = NULL; 783 p->rib = NULL; 784 prefix_free(p); 785 786 /* destroy old path if empty */ 787 if (path_empty(oasp)) 788 path_destroy(oasp); 789 } 790 791 /* 792 * Removes a prefix from all lists. If the parent objects -- path or 793 * pt_entry -- become empty remove them too. 794 */ 795 int 796 prefix_remove(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, 797 int prefixlen, u_int32_t flags) 798 { 799 struct prefix *p; 800 struct rib_entry *re; 801 struct rde_aspath *asp; 802 803 re = rib_get(rib, prefix, prefixlen); 804 if (re == NULL) /* Got a dummy withdrawn request */ 805 return (0); 806 807 p = prefix_bypeer(re, peer, flags); 808 if (p == NULL) /* Got a dummy withdrawn request. */ 809 return (0); 810 811 asp = p->aspath; 812 813 if (asp->pftableid) { 814 /* only prefixes in the local RIB were pushed into pf */ 815 rde_send_pftable(asp->pftableid, prefix, prefixlen, 1); 816 rde_send_pftable_commit(); 817 } 818 819 prefix_destroy(p); 820 821 return (1); 822 } 823 824 /* dump a prefix into specified buffer */ 825 int 826 prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen) 827 { 828 int totlen; 829 830 switch (prefix->aid) { 831 case AID_INET: 832 case AID_INET6: 833 totlen = PREFIX_SIZE(plen); 834 835 if (totlen > len) 836 return (-1); 837 *buf++ = plen; 838 memcpy(buf, &prefix->ba, totlen - 1); 839 return (totlen); 840 case AID_VPN_IPv4: 841 totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) + 842 prefix->vpn4.labellen; 843 plen += (sizeof(prefix->vpn4.rd) + prefix->vpn4.labellen) * 8; 844 845 if (totlen > len) 846 return (-1); 847 *buf++ = plen; 848 memcpy(buf, &prefix->vpn4.labelstack, prefix->vpn4.labellen); 849 buf += prefix->vpn4.labellen; 850 memcpy(buf, &prefix->vpn4.rd, sizeof(prefix->vpn4.rd)); 851 buf += sizeof(prefix->vpn4.rd); 852 memcpy(buf, &prefix->vpn4.addr, PREFIX_SIZE(plen) - 1); 853 return (totlen); 854 default: 855 return (-1); 856 } 857 } 858 859 /* 860 * Searches in the prefix list of specified pt_entry for a prefix entry 861 * belonging to the peer peer. Returns NULL if no match found. 862 */ 863 struct prefix * 864 prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, u_int32_t flags) 865 { 866 struct prefix *p; 867 868 LIST_FOREACH(p, &re->prefix_h, rib_l) { 869 if (p->aspath->peer != peer) 870 continue; 871 if (p->aspath->flags & flags && 872 (flags & F_ANN_DYNAMIC) != 873 (p->aspath->flags & F_ANN_DYNAMIC)) 874 continue; 875 return (p); 876 } 877 return (NULL); 878 } 879 880 void 881 prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, 882 enum nexthop_state oldstate) 883 { 884 struct prefix *p; 885 886 LIST_FOREACH(p, &asp->prefix_h, path_l) { 887 /* 888 * skip non local-RIBs or RIBs that are flagged as noeval. 889 */ 890 if (p->rib->flags & F_RIB_NOEVALUATE) 891 continue; 892 893 if (oldstate == state && state == NEXTHOP_REACH) { 894 /* 895 * The state of the nexthop did not change. The only 896 * thing that may have changed is the true_nexthop 897 * or other internal infos. This will not change 898 * the routing decision so shortcut here. 899 */ 900 if ((p->rib->flags & F_RIB_NOFIB) == 0 && 901 p == p->rib->active) 902 rde_send_kroute(p, NULL, p->rib->ribid); 903 continue; 904 } 905 906 /* redo the route decision */ 907 LIST_REMOVE(p, rib_l); 908 /* 909 * If the prefix is the active one remove it first, 910 * this has to be done because we can not detect when 911 * the active prefix changes its state. In this case 912 * we know that this is a withdrawl and so the second 913 * prefix_evaluate() will generate no update because 914 * the nexthop is unreachable or ineligible. 915 */ 916 if (p == p->rib->active) 917 prefix_evaluate(NULL, p->rib); 918 prefix_evaluate(p, p->rib); 919 } 920 } 921 922 /* kill a prefix. */ 923 void 924 prefix_destroy(struct prefix *p) 925 { 926 struct rde_aspath *asp; 927 928 asp = p->aspath; 929 prefix_unlink(p); 930 prefix_free(p); 931 932 if (path_empty(asp)) 933 path_destroy(asp); 934 } 935 936 /* 937 * helper function to clean up the connected networks after a reload 938 */ 939 void 940 prefix_network_clean(struct rde_peer *peer, time_t reloadtime, u_int32_t flags) 941 { 942 struct rde_aspath *asp, *xasp; 943 struct prefix *p, *xp; 944 945 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = xasp) { 946 xasp = LIST_NEXT(asp, peer_l); 947 if ((asp->flags & F_ANN_DYNAMIC) == flags) 948 continue; 949 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = xp) { 950 xp = LIST_NEXT(p, path_l); 951 if (reloadtime > p->lastchange) { 952 prefix_unlink(p); 953 prefix_free(p); 954 } 955 } 956 if (path_empty(asp)) 957 path_destroy(asp); 958 } 959 } 960 961 /* 962 * Link a prefix into the different parent objects. 963 */ 964 static void 965 prefix_link(struct prefix *pref, struct rib_entry *re, struct rde_aspath *asp) 966 { 967 LIST_INSERT_HEAD(&asp->prefix_h, pref, path_l); 968 PREFIX_COUNT(asp, 1); 969 970 pref->aspath = asp; 971 pref->rib = re; 972 pref->prefix = re->prefix; 973 pt_ref(pref->prefix); 974 pref->lastchange = time(NULL); 975 976 /* make route decision */ 977 prefix_evaluate(pref, re); 978 } 979 980 /* 981 * Unlink a prefix from the different parent objects. 982 */ 983 static void 984 prefix_unlink(struct prefix *pref) 985 { 986 struct rib_entry *re = pref->rib; 987 988 /* make route decision */ 989 LIST_REMOVE(pref, rib_l); 990 prefix_evaluate(NULL, re); 991 992 LIST_REMOVE(pref, path_l); 993 PREFIX_COUNT(pref->aspath, -1); 994 995 pt_unref(pref->prefix); 996 if (pt_empty(pref->prefix)) 997 pt_remove(pref->prefix); 998 if (rib_empty(re)) 999 rib_remove(re); 1000 1001 /* destroy all references to other objects */ 1002 pref->aspath = NULL; 1003 pref->prefix = NULL; 1004 pref->rib = NULL; 1005 1006 /* 1007 * It's the caller's duty to remove empty aspath structures. 1008 * Also freeing the unlinked prefix is the caller's duty. 1009 */ 1010 } 1011 1012 /* alloc and bzero new entry. May not fail. */ 1013 static struct prefix * 1014 prefix_alloc(void) 1015 { 1016 struct prefix *p; 1017 1018 p = calloc(1, sizeof(*p)); 1019 if (p == NULL) 1020 fatal("prefix_alloc"); 1021 rdemem.prefix_cnt++; 1022 return p; 1023 } 1024 1025 /* free a unlinked entry */ 1026 static void 1027 prefix_free(struct prefix *pref) 1028 { 1029 rdemem.prefix_cnt--; 1030 free(pref); 1031 } 1032 1033 /* 1034 * nexthop functions 1035 */ 1036 struct nexthop_head *nexthop_hash(struct bgpd_addr *); 1037 struct nexthop *nexthop_lookup(struct bgpd_addr *); 1038 1039 /* 1040 * In BGP there exist two nexthops: the exit nexthop which was announced via 1041 * BGP and the true nexthop which is used in the FIB -- forward information 1042 * base a.k.a kernel routing table. When sending updates it is even more 1043 * confusing. In IBGP we pass the unmodified exit nexthop to the neighbors 1044 * while in EBGP normally the address of the router is sent. The exit nexthop 1045 * may be passed to the external neighbor if the neighbor and the exit nexthop 1046 * reside in the same subnet -- directly connected. 1047 */ 1048 struct nexthop_table { 1049 LIST_HEAD(nexthop_head, nexthop) *nexthop_hashtbl; 1050 u_int32_t nexthop_hashmask; 1051 } nexthoptable; 1052 1053 void 1054 nexthop_init(u_int32_t hashsize) 1055 { 1056 u_int32_t hs, i; 1057 1058 for (hs = 1; hs < hashsize; hs <<= 1) 1059 ; 1060 nexthoptable.nexthop_hashtbl = calloc(hs, sizeof(struct nexthop_table)); 1061 if (nexthoptable.nexthop_hashtbl == NULL) 1062 fatal("nextop_init"); 1063 1064 for (i = 0; i < hs; i++) 1065 LIST_INIT(&nexthoptable.nexthop_hashtbl[i]); 1066 1067 nexthoptable.nexthop_hashmask = hs - 1; 1068 } 1069 1070 void 1071 nexthop_shutdown(void) 1072 { 1073 u_int32_t i; 1074 struct nexthop *nh, *nnh; 1075 1076 for (i = 0; i <= nexthoptable.nexthop_hashmask; i++) { 1077 for (nh = LIST_FIRST(&nexthoptable.nexthop_hashtbl[i]); 1078 nh != NULL; nh = nnh) { 1079 nnh = LIST_NEXT(nh, nexthop_l); 1080 nh->state = NEXTHOP_UNREACH; 1081 (void)nexthop_delete(nh); 1082 } 1083 if (!LIST_EMPTY(&nexthoptable.nexthop_hashtbl[i])) 1084 log_warnx("nexthop_shutdown: non-free table"); 1085 } 1086 1087 free(nexthoptable.nexthop_hashtbl); 1088 } 1089 1090 void 1091 nexthop_update(struct kroute_nexthop *msg) 1092 { 1093 struct nexthop *nh; 1094 struct rde_aspath *asp; 1095 enum nexthop_state oldstate; 1096 1097 nh = nexthop_lookup(&msg->nexthop); 1098 if (nh == NULL) { 1099 log_warnx("nexthop_update: non-existent nexthop %s", 1100 log_addr(&msg->nexthop)); 1101 return; 1102 } 1103 1104 oldstate = nh->state; 1105 if (msg->valid) 1106 nh->state = NEXTHOP_REACH; 1107 else 1108 nh->state = NEXTHOP_UNREACH; 1109 1110 if (msg->connected) { 1111 nh->flags |= NEXTHOP_CONNECTED; 1112 memcpy(&nh->true_nexthop, &nh->exit_nexthop, 1113 sizeof(nh->true_nexthop)); 1114 } else 1115 memcpy(&nh->true_nexthop, &msg->gateway, 1116 sizeof(nh->true_nexthop)); 1117 1118 memcpy(&nh->nexthop_net, &msg->net, 1119 sizeof(nh->nexthop_net)); 1120 nh->nexthop_netlen = msg->netlen; 1121 1122 if (nexthop_delete(nh)) 1123 /* nexthop no longer used */ 1124 return; 1125 1126 if (rde_noevaluate()) 1127 /* 1128 * if the decision process is turned off there is no need 1129 * for the aspath list walk. 1130 */ 1131 return; 1132 1133 LIST_FOREACH(asp, &nh->path_h, nexthop_l) { 1134 prefix_updateall(asp, nh->state, oldstate); 1135 } 1136 } 1137 1138 void 1139 nexthop_modify(struct rde_aspath *asp, struct bgpd_addr *nexthop, 1140 enum action_types type, u_int8_t aid) 1141 { 1142 struct nexthop *nh; 1143 1144 if (type == ACTION_SET_NEXTHOP_REJECT) { 1145 asp->flags |= F_NEXTHOP_REJECT; 1146 return; 1147 } 1148 if (type == ACTION_SET_NEXTHOP_BLACKHOLE) { 1149 asp->flags |= F_NEXTHOP_BLACKHOLE; 1150 return; 1151 } 1152 if (type == ACTION_SET_NEXTHOP_NOMODIFY) { 1153 asp->flags |= F_NEXTHOP_NOMODIFY; 1154 return; 1155 } 1156 if (type == ACTION_SET_NEXTHOP_SELF) { 1157 asp->flags |= F_NEXTHOP_SELF; 1158 return; 1159 } 1160 if (aid != nexthop->aid) 1161 return; 1162 1163 nh = nexthop_get(nexthop); 1164 if (asp->flags & F_ATTR_LINKED) 1165 nexthop_unlink(asp); 1166 asp->nexthop = nh; 1167 if (asp->flags & F_ATTR_LINKED) 1168 nexthop_link(asp); 1169 } 1170 1171 void 1172 nexthop_link(struct rde_aspath *asp) 1173 { 1174 if (asp->nexthop == NULL) 1175 return; 1176 1177 LIST_INSERT_HEAD(&asp->nexthop->path_h, asp, nexthop_l); 1178 } 1179 1180 void 1181 nexthop_unlink(struct rde_aspath *asp) 1182 { 1183 struct nexthop *nh; 1184 1185 if (asp->nexthop == NULL) 1186 return; 1187 1188 LIST_REMOVE(asp, nexthop_l); 1189 1190 /* see if list is empty */ 1191 nh = asp->nexthop; 1192 asp->nexthop = NULL; 1193 1194 (void)nexthop_delete(nh); 1195 } 1196 1197 int 1198 nexthop_delete(struct nexthop *nh) 1199 { 1200 /* nexthop still used by some other aspath */ 1201 if (!LIST_EMPTY(&nh->path_h)) 1202 return (0); 1203 1204 /* either pinned or in a state where it may not be deleted */ 1205 if (nh->refcnt > 0 || nh->state == NEXTHOP_LOOKUP) 1206 return (0); 1207 1208 LIST_REMOVE(nh, nexthop_l); 1209 rde_send_nexthop(&nh->exit_nexthop, 0); 1210 1211 rdemem.nexthop_cnt--; 1212 free(nh); 1213 return (1); 1214 } 1215 1216 struct nexthop * 1217 nexthop_get(struct bgpd_addr *nexthop) 1218 { 1219 struct nexthop *nh; 1220 1221 nh = nexthop_lookup(nexthop); 1222 if (nh == NULL) { 1223 nh = calloc(1, sizeof(*nh)); 1224 if (nh == NULL) 1225 fatal("nexthop_alloc"); 1226 rdemem.nexthop_cnt++; 1227 1228 LIST_INIT(&nh->path_h); 1229 nh->state = NEXTHOP_LOOKUP; 1230 nh->exit_nexthop = *nexthop; 1231 LIST_INSERT_HEAD(nexthop_hash(nexthop), nh, 1232 nexthop_l); 1233 1234 rde_send_nexthop(&nh->exit_nexthop, 1); 1235 } 1236 1237 return (nh); 1238 } 1239 1240 int 1241 nexthop_compare(struct nexthop *na, struct nexthop *nb) 1242 { 1243 struct bgpd_addr *a, *b; 1244 1245 if (na == nb) 1246 return (0); 1247 if (na == NULL) 1248 return (-1); 1249 if (nb == NULL) 1250 return (1); 1251 1252 a = &na->exit_nexthop; 1253 b = &nb->exit_nexthop; 1254 1255 if (a->aid != b->aid) 1256 return (a->aid - b->aid); 1257 1258 switch (a->aid) { 1259 case AID_INET: 1260 if (ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr)) 1261 return (1); 1262 if (ntohl(a->v4.s_addr) < ntohl(b->v4.s_addr)) 1263 return (-1); 1264 return (0); 1265 case AID_INET6: 1266 return (memcmp(&a->v6, &b->v6, sizeof(struct in6_addr))); 1267 default: 1268 fatalx("nexthop_cmp: unknown af"); 1269 } 1270 return (-1); 1271 } 1272 1273 struct nexthop * 1274 nexthop_lookup(struct bgpd_addr *nexthop) 1275 { 1276 struct nexthop *nh; 1277 1278 LIST_FOREACH(nh, nexthop_hash(nexthop), nexthop_l) { 1279 if (memcmp(&nh->exit_nexthop, nexthop, 1280 sizeof(struct bgpd_addr)) == 0) 1281 return (nh); 1282 } 1283 return (NULL); 1284 } 1285 1286 struct nexthop_head * 1287 nexthop_hash(struct bgpd_addr *nexthop) 1288 { 1289 u_int32_t h = 0; 1290 1291 switch (nexthop->aid) { 1292 case AID_INET: 1293 h = (AF_INET ^ ntohl(nexthop->v4.s_addr) ^ 1294 ntohl(nexthop->v4.s_addr) >> 13) & 1295 nexthoptable.nexthop_hashmask; 1296 break; 1297 case AID_INET6: 1298 h = hash32_buf(&nexthop->v6, sizeof(struct in6_addr), 1299 HASHINIT) & nexthoptable.nexthop_hashmask; 1300 break; 1301 default: 1302 fatalx("nexthop_hash: unsupported AF"); 1303 } 1304 return (&nexthoptable.nexthop_hashtbl[h]); 1305 } 1306 1307