1*56202f8dSclaudio /* $OpenBSD: rde_rib.c,v 1.267 2025/01/14 12:24:23 claudio Exp $ */ 2a16c0992Shenning 3a16c0992Shenning /* 4e854144bSclaudio * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> 5a16c0992Shenning * 6a16c0992Shenning * Permission to use, copy, modify, and distribute this software for any 7a16c0992Shenning * purpose with or without fee is hereby granted, provided that the above 8a16c0992Shenning * copyright notice and this permission notice appear in all copies. 9a16c0992Shenning * 10a16c0992Shenning * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11a16c0992Shenning * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12a16c0992Shenning * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13a16c0992Shenning * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14a16c0992Shenning * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15a16c0992Shenning * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16a16c0992Shenning * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17a16c0992Shenning */ 18a16c0992Shenning 19a16c0992Shenning #include <sys/types.h> 20a16c0992Shenning #include <sys/queue.h> 21a16c0992Shenning 224670f09dSclaudio #include <limits.h> 23a16c0992Shenning #include <stdlib.h> 24ad2ef90aSderaadt #include <string.h> 25579e3f2dSguenther #include <time.h> 26a16c0992Shenning 27a16c0992Shenning #include "bgpd.h" 28a16c0992Shenning #include "rde.h" 295e3f6f95Sbenno #include "log.h" 30a16c0992Shenning 31a16c0992Shenning /* 32a16c0992Shenning * BGP RIB -- Routing Information Base 33a16c0992Shenning * 340d031d06Sderaadt * The RIB is build with one aspect in mind. Speed -- actually update speed. 35c5508ee4Sclaudio * Therefore one thing needs to be absolutely avoided, long table walks. 3604f2231bShenning * This is achieved by heavily linking the different parts together. 37a16c0992Shenning */ 3839386878Sclaudio uint16_t rib_size; 398283d0ceSclaudio struct rib **ribs; 40dd2a9ed2Sclaudio struct rib flowrib = { .id = 1, .tree = RB_INITIALIZER(&flowrib.tree) }; 4186f6dc6eSclaudio 425bb86053Sclaudio struct rib_entry *rib_add(struct rib *, struct pt_entry *); 43bb8b30dfSclaudio static inline int rib_compare(const struct rib_entry *, 44bb8b30dfSclaudio const struct rib_entry *); 4586f6dc6eSclaudio void rib_remove(struct rib_entry *); 4686f6dc6eSclaudio int rib_empty(struct rib_entry *); 4739386878Sclaudio static void rib_dump_abort(uint16_t); 4886f6dc6eSclaudio 492d4ddadbSclaudio RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare); 502d4ddadbSclaudio RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare); 5186f6dc6eSclaudio 52bb8b30dfSclaudio struct rib_context { 53bb8b30dfSclaudio LIST_ENTRY(rib_context) entry; 54bb8b30dfSclaudio struct rib_entry *ctx_re; 557b2cffc2Sclaudio struct prefix *ctx_p; 5639386878Sclaudio uint32_t ctx_id; 577b2cffc2Sclaudio void (*ctx_rib_call)(struct rib_entry *, void *); 587b2cffc2Sclaudio void (*ctx_prefix_call)(struct prefix *, void *); 5939386878Sclaudio void (*ctx_done)(void *, uint8_t); 60bb8b30dfSclaudio int (*ctx_throttle)(void *); 61bb8b30dfSclaudio void *ctx_arg; 6276c3be87Sclaudio struct bgpd_addr ctx_subtree; 63bb8b30dfSclaudio unsigned int ctx_count; 6439386878Sclaudio uint8_t ctx_aid; 6576c3be87Sclaudio uint8_t ctx_subtreelen; 66bb8b30dfSclaudio }; 67bb8b30dfSclaudio LIST_HEAD(, rib_context) rib_dumps = LIST_HEAD_INITIALIZER(rib_dumps); 68bb8b30dfSclaudio 697b2cffc2Sclaudio static void prefix_dump_r(struct rib_context *); 7054494a53Sclaudio 71bb8b30dfSclaudio static inline struct rib_entry * 72f7305028Sclaudio re_lock(struct rib_entry *re) 73f7305028Sclaudio { 74bb8b30dfSclaudio if (re->lock != 0) 75bb8b30dfSclaudio log_warnx("%s: entry already locked", __func__); 76bb8b30dfSclaudio re->lock = 1; 77bb8b30dfSclaudio return re; 78f7305028Sclaudio } 79f7305028Sclaudio 80bb8b30dfSclaudio static inline struct rib_entry * 81f7305028Sclaudio re_unlock(struct rib_entry *re) 82f7305028Sclaudio { 83bb8b30dfSclaudio if (re->lock == 0) 84bb8b30dfSclaudio log_warnx("%s: entry already unlocked", __func__); 85bb8b30dfSclaudio re->lock = 0; 86bb8b30dfSclaudio return re; 87f7305028Sclaudio } 88f7305028Sclaudio 89f7305028Sclaudio static inline int 90f7305028Sclaudio re_is_locked(struct rib_entry *re) 91f7305028Sclaudio { 92bb8b30dfSclaudio return (re->lock != 0); 93f7305028Sclaudio } 94f7305028Sclaudio 95091e7b27Sclaudio static inline struct prefix * 96091e7b27Sclaudio prefix_lock(struct prefix *p) 97091e7b27Sclaudio { 98091e7b27Sclaudio if (p->flags & PREFIX_FLAG_LOCKED) 99091e7b27Sclaudio fatalx("%s: locking locked prefix", __func__); 100091e7b27Sclaudio p->flags |= PREFIX_FLAG_LOCKED; 101091e7b27Sclaudio return p; 102091e7b27Sclaudio } 103091e7b27Sclaudio 104091e7b27Sclaudio static inline struct prefix * 105091e7b27Sclaudio prefix_unlock(struct prefix *p) 106091e7b27Sclaudio { 107091e7b27Sclaudio if ((p->flags & PREFIX_FLAG_LOCKED) == 0) 108091e7b27Sclaudio fatalx("%s: unlocking unlocked prefix", __func__); 109091e7b27Sclaudio p->flags &= ~PREFIX_FLAG_LOCKED; 110091e7b27Sclaudio return p; 111091e7b27Sclaudio } 112091e7b27Sclaudio 113091e7b27Sclaudio static inline int 114091e7b27Sclaudio prefix_is_locked(struct prefix *p) 115091e7b27Sclaudio { 116091e7b27Sclaudio return (p->flags & PREFIX_FLAG_LOCKED) != 0; 117091e7b27Sclaudio } 118091e7b27Sclaudio 119091e7b27Sclaudio static inline int 120091e7b27Sclaudio prefix_is_dead(struct prefix *p) 121091e7b27Sclaudio { 122091e7b27Sclaudio return (p->flags & PREFIX_FLAG_DEAD) != 0; 123091e7b27Sclaudio } 124091e7b27Sclaudio 1257498412bSclaudio static inline struct rib_tree * 1267498412bSclaudio rib_tree(struct rib *rib) 1277498412bSclaudio { 1287498412bSclaudio return (&rib->tree); 1297498412bSclaudio } 13086f6dc6eSclaudio 131bb8b30dfSclaudio static inline int 132bb8b30dfSclaudio rib_compare(const struct rib_entry *a, const struct rib_entry *b) 133bb8b30dfSclaudio { 134bb8b30dfSclaudio return (pt_prefix_cmp(a->prefix, b->prefix)); 135bb8b30dfSclaudio } 136bb8b30dfSclaudio 13786f6dc6eSclaudio /* RIB specific functions */ 138a7fc929cSclaudio struct rib * 13939386878Sclaudio rib_new(char *name, u_int rtableid, uint16_t flags) 14086f6dc6eSclaudio { 1418283d0ceSclaudio struct rib *new; 14239386878Sclaudio uint16_t id; 14386f6dc6eSclaudio 144da23cf14Sclaudio for (id = 0; id < rib_size; id++) { 1458283d0ceSclaudio if (ribs[id] == NULL) 146da23cf14Sclaudio break; 147da23cf14Sclaudio } 148da23cf14Sclaudio 14986f6dc6eSclaudio if (id >= rib_size) { 1508283d0ceSclaudio if ((ribs = recallocarray(ribs, id, id + 8, 1519b1abd73Sjsg sizeof(struct rib *))) == NULL) 152bb8b30dfSclaudio fatal(NULL); 1538283d0ceSclaudio rib_size = id + 8; 15486f6dc6eSclaudio } 15586f6dc6eSclaudio 1568283d0ceSclaudio if ((new = calloc(1, sizeof(*new))) == NULL) 15728b060e1Sclaudio fatal(NULL); 1588283d0ceSclaudio 1598283d0ceSclaudio strlcpy(new->name, name, sizeof(new->name)); 1608283d0ceSclaudio RB_INIT(rib_tree(new)); 1618283d0ceSclaudio new->state = RECONF_REINIT; 1628283d0ceSclaudio new->id = id; 1638283d0ceSclaudio new->flags = flags; 1648283d0ceSclaudio new->rtableid = rtableid; 1658283d0ceSclaudio 1668283d0ceSclaudio new->in_rules = calloc(1, sizeof(struct filter_head)); 1678283d0ceSclaudio if (new->in_rules == NULL) 1688283d0ceSclaudio fatal(NULL); 1698283d0ceSclaudio TAILQ_INIT(new->in_rules); 1708283d0ceSclaudio 1718283d0ceSclaudio ribs[id] = new; 17228b060e1Sclaudio 173bb8b30dfSclaudio log_debug("%s: %s -> %u", __func__, name, id); 1748283d0ceSclaudio return (new); 17586f6dc6eSclaudio } 17686f6dc6eSclaudio 177e8068d41Sclaudio /* 178e8068d41Sclaudio * This function is only called when the FIB information of a RIB changed. 179e8068d41Sclaudio * It will flush the FIB if there was one previously and change the fibstate 180e8068d41Sclaudio * from RECONF_NONE (nothing to do) to either RECONF_RELOAD (reload the FIB) 181e8068d41Sclaudio * or RECONF_REINIT (rerun the route decision process for every element) 182e8068d41Sclaudio * depending on the new flags. 183e8068d41Sclaudio */ 1842970de9eSclaudio int 185e8068d41Sclaudio rib_update(struct rib *rib) 186e8068d41Sclaudio { 187e8068d41Sclaudio /* flush fib first if there was one */ 188e8068d41Sclaudio if ((rib->flags & (F_RIB_NOFIB | F_RIB_NOEVALUATE)) == 0) 189e8068d41Sclaudio rde_send_kroute_flush(rib); 190e8068d41Sclaudio 191e8068d41Sclaudio /* if no evaluate changes then a full reinit is needed */ 192e8068d41Sclaudio if ((rib->flags & F_RIB_NOEVALUATE) != 193e8068d41Sclaudio (rib->flags_tmp & F_RIB_NOEVALUATE)) 194879ec5f7Sclaudio rib->fibstate = RECONF_REINIT; 195e8068d41Sclaudio 196e8068d41Sclaudio rib->flags = rib->flags_tmp; 197e8068d41Sclaudio rib->rtableid = rib->rtableid_tmp; 198e8068d41Sclaudio 199e8068d41Sclaudio /* reload fib if there is no reinit pending and there will be a fib */ 200879ec5f7Sclaudio if (rib->fibstate != RECONF_REINIT && 201e8068d41Sclaudio (rib->flags & (F_RIB_NOFIB | F_RIB_NOEVALUATE)) == 0) 202879ec5f7Sclaudio rib->fibstate = RECONF_RELOAD; 2032970de9eSclaudio 2042970de9eSclaudio return (rib->fibstate == RECONF_REINIT); 205e8068d41Sclaudio } 206e8068d41Sclaudio 207a7fc929cSclaudio struct rib * 20839386878Sclaudio rib_byid(uint16_t id) 209bb8b30dfSclaudio { 2108283d0ceSclaudio if (id == RIB_NOTFOUND || id >= rib_size || ribs[id] == NULL) 211bb8b30dfSclaudio return NULL; 2128283d0ceSclaudio return ribs[id]; 213bb8b30dfSclaudio } 214bb8b30dfSclaudio 21539386878Sclaudio uint16_t 2160a930d87Sclaudio rib_find(char *name) 2170a930d87Sclaudio { 21839386878Sclaudio uint16_t id; 2190a930d87Sclaudio 22062894d82Sclaudio /* no name returns the first Loc-RIB */ 2210a930d87Sclaudio if (name == NULL || *name == '\0') 222bb8b30dfSclaudio return RIB_LOC_START; 2230a930d87Sclaudio 2240a930d87Sclaudio for (id = 0; id < rib_size; id++) { 2258283d0ceSclaudio if (ribs[id] != NULL && !strcmp(ribs[id]->name, name)) 226bb8b30dfSclaudio return id; 2270a930d87Sclaudio } 2280a930d87Sclaudio 229bb8b30dfSclaudio return RIB_NOTFOUND; 2300a930d87Sclaudio } 2310a930d87Sclaudio 23286f6dc6eSclaudio void 233a7fc929cSclaudio rib_free(struct rib *rib) 23486f6dc6eSclaudio { 23526eea3d2Sclaudio struct rib_entry *re, *xre; 236e8068d41Sclaudio struct prefix *p; 23726eea3d2Sclaudio 238bb8b30dfSclaudio rib_dump_abort(rib->id); 239bb8b30dfSclaudio 240e8068d41Sclaudio /* 241e8068d41Sclaudio * flush the rib, disable route evaluation and fib sync to speed up 242e8068d41Sclaudio * the prefix removal. Nothing depends on this data anymore. 243e8068d41Sclaudio */ 244e8068d41Sclaudio if ((rib->flags & (F_RIB_NOFIB | F_RIB_NOEVALUATE)) == 0) 245e8068d41Sclaudio rde_send_kroute_flush(rib); 246e8068d41Sclaudio rib->flags |= F_RIB_NOEVALUATE | F_RIB_NOFIB; 247e8068d41Sclaudio 248a7fc929cSclaudio for (re = RB_MIN(rib_tree, rib_tree(rib)); re != NULL; re = xre) { 249a7fc929cSclaudio xre = RB_NEXT(rib_tree, rib_tree(rib), re); 25026eea3d2Sclaudio 25125e17ce8Sclaudio /* 25225e17ce8Sclaudio * Removing the prefixes is tricky because the last one 25328b060e1Sclaudio * will remove the rib_entry as well and because we do 25428b060e1Sclaudio * an empty check in prefix_destroy() it is not possible to 25525e17ce8Sclaudio * use the default for loop. 25625e17ce8Sclaudio */ 257df08e9a0Sclaudio while ((p = TAILQ_FIRST(&re->prefix_h))) { 2582b6d2161Sclaudio struct rde_aspath *asp = prefix_aspath(p); 259b58d89daSclaudio if (asp && asp->pftableid) 260b58d89daSclaudio rde_pftable_del(asp->pftableid, p); 26126eea3d2Sclaudio prefix_destroy(p); 26226eea3d2Sclaudio } 26326eea3d2Sclaudio } 264862a2562Sclaudio if (rib->id <= RIB_LOC_START) 265862a2562Sclaudio return; /* never remove the default ribs */ 266879ec5f7Sclaudio filterlist_free(rib->in_rules_tmp); 267879ec5f7Sclaudio filterlist_free(rib->in_rules); 2688283d0ceSclaudio ribs[rib->id] = NULL; 2698283d0ceSclaudio free(rib); 27086f6dc6eSclaudio } 27186f6dc6eSclaudio 272c6e494ddSclaudio void 273c6e494ddSclaudio rib_shutdown(void) 274c6e494ddSclaudio { 2758283d0ceSclaudio struct rib *rib; 27639386878Sclaudio uint16_t id; 277c6e494ddSclaudio 278c6e494ddSclaudio for (id = 0; id < rib_size; id++) { 2798283d0ceSclaudio rib = rib_byid(id); 2808283d0ceSclaudio if (rib == NULL) 281c6e494ddSclaudio continue; 2828283d0ceSclaudio if (!RB_EMPTY(rib_tree(ribs[id]))) { 283c6e494ddSclaudio log_warnx("%s: rib %s is not empty", __func__, 2848283d0ceSclaudio ribs[id]->name); 285e8068d41Sclaudio } 2868283d0ceSclaudio rib_free(ribs[id]); 287c6e494ddSclaudio } 288e8d21d8aSclaudio for (id = 0; id <= RIB_LOC_START; id++) { 2898283d0ceSclaudio rib = rib_byid(id); 2908283d0ceSclaudio if (rib == NULL) 2918283d0ceSclaudio continue; 2928283d0ceSclaudio filterlist_free(rib->in_rules_tmp); 2938283d0ceSclaudio filterlist_free(rib->in_rules); 2948283d0ceSclaudio ribs[id] = NULL; 2958283d0ceSclaudio free(rib); 296e8d21d8aSclaudio } 297e8d21d8aSclaudio free(ribs); 298c6e494ddSclaudio } 299c6e494ddSclaudio 30086f6dc6eSclaudio struct rib_entry * 3015bb86053Sclaudio rib_get(struct rib *rib, struct pt_entry *pte) 30286f6dc6eSclaudio { 303bb8b30dfSclaudio struct rib_entry xre, *re; 30486f6dc6eSclaudio 3057b2cffc2Sclaudio memset(&xre, 0, sizeof(xre)); 3065bb86053Sclaudio xre.prefix = pte; 30786f6dc6eSclaudio 308bb8b30dfSclaudio re = RB_FIND(rib_tree, rib_tree(rib), &xre); 309bb8b30dfSclaudio if (re && re->rib_id != rib->id) 310bb8b30dfSclaudio fatalx("%s: Unexpected RIB %u != %u.", __func__, 311bb8b30dfSclaudio re->rib_id, rib->id); 312bb8b30dfSclaudio return re; 31386f6dc6eSclaudio } 31486f6dc6eSclaudio 31586f6dc6eSclaudio struct rib_entry * 3165bb86053Sclaudio rib_get_addr(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) 3175bb86053Sclaudio { 3185bb86053Sclaudio return rib_get(rib, pt_fill(prefix, prefixlen)); 3195bb86053Sclaudio } 3205bb86053Sclaudio 3215bb86053Sclaudio struct rib_entry * 3227b2cffc2Sclaudio rib_match(struct rib *rib, struct bgpd_addr *addr) 32386f6dc6eSclaudio { 32486f6dc6eSclaudio struct rib_entry *re; 32586f6dc6eSclaudio int i; 32686f6dc6eSclaudio 327d6c2e4e8Sclaudio switch (addr->aid) { 328d6c2e4e8Sclaudio case AID_INET: 32915d8de66Sclaudio case AID_VPN_IPv4: 33086f6dc6eSclaudio for (i = 32; i >= 0; i--) { 3315bb86053Sclaudio re = rib_get_addr(rib, addr, i); 33286f6dc6eSclaudio if (re != NULL) 33386f6dc6eSclaudio return (re); 33486f6dc6eSclaudio } 33586f6dc6eSclaudio break; 336d6c2e4e8Sclaudio case AID_INET6: 337290f96faSdenis case AID_VPN_IPv6: 33886f6dc6eSclaudio for (i = 128; i >= 0; i--) { 3395bb86053Sclaudio re = rib_get_addr(rib, addr, i); 34086f6dc6eSclaudio if (re != NULL) 34186f6dc6eSclaudio return (re); 34286f6dc6eSclaudio } 34386f6dc6eSclaudio break; 34486f6dc6eSclaudio default: 3457b2cffc2Sclaudio fatalx("%s: unknown af", __func__); 34686f6dc6eSclaudio } 34786f6dc6eSclaudio return (NULL); 34886f6dc6eSclaudio } 34986f6dc6eSclaudio 35086f6dc6eSclaudio 35186f6dc6eSclaudio struct rib_entry * 3525bb86053Sclaudio rib_add(struct rib *rib, struct pt_entry *pte) 35386f6dc6eSclaudio { 35486f6dc6eSclaudio struct rib_entry *re; 35586f6dc6eSclaudio 35686f6dc6eSclaudio if ((re = calloc(1, sizeof(*re))) == NULL) 35786f6dc6eSclaudio fatal("rib_add"); 35886f6dc6eSclaudio 359df08e9a0Sclaudio TAILQ_INIT(&re->prefix_h); 360f8509b38Sclaudio re->prefix = pt_ref(pte); 361bb8b30dfSclaudio re->rib_id = rib->id; 36286f6dc6eSclaudio 3637498412bSclaudio if (RB_INSERT(rib_tree, rib_tree(rib), re) != NULL) { 36486f6dc6eSclaudio log_warnx("rib_add: insert failed"); 36566cce50aShenning free(re); 36686f6dc6eSclaudio return (NULL); 36786f6dc6eSclaudio } 36886f6dc6eSclaudio 36986f6dc6eSclaudio 37086f6dc6eSclaudio rdemem.rib_cnt++; 37186f6dc6eSclaudio 37286f6dc6eSclaudio return (re); 37386f6dc6eSclaudio } 37486f6dc6eSclaudio 37586f6dc6eSclaudio void 37686f6dc6eSclaudio rib_remove(struct rib_entry *re) 37786f6dc6eSclaudio { 37886f6dc6eSclaudio if (!rib_empty(re)) 37986f6dc6eSclaudio fatalx("rib_remove: entry not empty"); 38086f6dc6eSclaudio 381f7305028Sclaudio if (re_is_locked(re)) 382e3b5d516Sclaudio /* entry is locked, don't free it. */ 383e3b5d516Sclaudio return; 384e3b5d516Sclaudio 385e2c0fe86Sclaudio pt_unref(re->prefix); 38686f6dc6eSclaudio 387f7305028Sclaudio if (RB_REMOVE(rib_tree, rib_tree(re_rib(re)), re) == NULL) 38886f6dc6eSclaudio log_warnx("rib_remove: remove failed."); 38986f6dc6eSclaudio 39086f6dc6eSclaudio free(re); 39186f6dc6eSclaudio rdemem.rib_cnt--; 39286f6dc6eSclaudio } 39386f6dc6eSclaudio 39486f6dc6eSclaudio int 39586f6dc6eSclaudio rib_empty(struct rib_entry *re) 39686f6dc6eSclaudio { 397df08e9a0Sclaudio return TAILQ_EMPTY(&re->prefix_h); 39886f6dc6eSclaudio } 39986f6dc6eSclaudio 400bb8b30dfSclaudio static struct rib_entry * 40186f6dc6eSclaudio rib_restart(struct rib_context *ctx) 40286f6dc6eSclaudio { 40376c3be87Sclaudio struct rib_entry *re = NULL; 40486f6dc6eSclaudio 40576c3be87Sclaudio if (ctx->ctx_re) 406091e7b27Sclaudio re = re_unlock(ctx->ctx_re); 40786f6dc6eSclaudio 408e3b5d516Sclaudio /* find first non empty element */ 409a2629ab6Sclaudio while (re && rib_empty(re)) 4102d4ddadbSclaudio re = RB_NEXT(rib_tree, unused, re); 41186f6dc6eSclaudio 412e3b5d516Sclaudio /* free the previously locked rib element if empty */ 41376c3be87Sclaudio if (ctx->ctx_re && rib_empty(ctx->ctx_re)) 414e3b5d516Sclaudio rib_remove(ctx->ctx_re); 415e3b5d516Sclaudio ctx->ctx_re = NULL; 416e3b5d516Sclaudio return (re); 41786f6dc6eSclaudio } 418a16c0992Shenning 419bb8b30dfSclaudio static void 420bb8b30dfSclaudio rib_dump_r(struct rib_context *ctx) 421bb8b30dfSclaudio { 422f80c17aaSclaudio struct rib_entry *re, *next; 423bb8b30dfSclaudio struct rib *rib; 424bb8b30dfSclaudio unsigned int i; 425bb8b30dfSclaudio 4267b2cffc2Sclaudio rib = rib_byid(ctx->ctx_id); 427bb8b30dfSclaudio if (rib == NULL) 4287b2cffc2Sclaudio fatalx("%s: rib id %u gone", __func__, ctx->ctx_id); 429bb8b30dfSclaudio 43076c3be87Sclaudio if (ctx->ctx_re == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) 431bb8b30dfSclaudio re = RB_MIN(rib_tree, rib_tree(rib)); 432bb8b30dfSclaudio else 433bb8b30dfSclaudio re = rib_restart(ctx); 434bb8b30dfSclaudio 435f80c17aaSclaudio for (i = 0; re != NULL; re = next) { 436f80c17aaSclaudio next = RB_NEXT(rib_tree, unused, re); 4377b2cffc2Sclaudio if (re->rib_id != ctx->ctx_id) 438bb8b30dfSclaudio fatalx("%s: Unexpected RIB %u != %u.", __func__, 4397b2cffc2Sclaudio re->rib_id, ctx->ctx_id); 440bb8b30dfSclaudio if (ctx->ctx_aid != AID_UNSPEC && 441bb8b30dfSclaudio ctx->ctx_aid != re->prefix->aid) 442bb8b30dfSclaudio continue; 44376c3be87Sclaudio if (ctx->ctx_subtree.aid != AID_UNSPEC) { 44476c3be87Sclaudio struct bgpd_addr addr; 44576c3be87Sclaudio pt_getaddr(re->prefix, &addr); 44676c3be87Sclaudio if (prefix_compare(&ctx->ctx_subtree, &addr, 44776c3be87Sclaudio ctx->ctx_subtreelen) != 0) 44876c3be87Sclaudio /* left subtree, walk is done */ 44976c3be87Sclaudio break; 45076c3be87Sclaudio } 451bb8b30dfSclaudio if (ctx->ctx_count && i++ >= ctx->ctx_count && 452bb8b30dfSclaudio !re_is_locked(re)) { 453bb8b30dfSclaudio /* store and lock last element */ 454091e7b27Sclaudio ctx->ctx_re = re_lock(re); 455bb8b30dfSclaudio return; 456bb8b30dfSclaudio } 4577b2cffc2Sclaudio ctx->ctx_rib_call(re, ctx->ctx_arg); 458bb8b30dfSclaudio } 459bb8b30dfSclaudio 460bb8b30dfSclaudio if (ctx->ctx_done) 461bb8b30dfSclaudio ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); 462bb8b30dfSclaudio LIST_REMOVE(ctx, entry); 463bb8b30dfSclaudio free(ctx); 464bb8b30dfSclaudio } 465bb8b30dfSclaudio 466bb8b30dfSclaudio int 467bb8b30dfSclaudio rib_dump_pending(void) 468bb8b30dfSclaudio { 469bb8b30dfSclaudio struct rib_context *ctx; 470bb8b30dfSclaudio 471bb8b30dfSclaudio /* return true if at least one context is not throttled */ 472bb8b30dfSclaudio LIST_FOREACH(ctx, &rib_dumps, entry) { 473bb8b30dfSclaudio if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg)) 474bb8b30dfSclaudio continue; 475bb8b30dfSclaudio return 1; 476bb8b30dfSclaudio } 477bb8b30dfSclaudio return 0; 478bb8b30dfSclaudio } 479bb8b30dfSclaudio 480bb8b30dfSclaudio void 481bb8b30dfSclaudio rib_dump_runner(void) 482bb8b30dfSclaudio { 483bb8b30dfSclaudio struct rib_context *ctx, *next; 484bb8b30dfSclaudio 485bb8b30dfSclaudio LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) { 486bb8b30dfSclaudio if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg)) 487bb8b30dfSclaudio continue; 4887b2cffc2Sclaudio if (ctx->ctx_rib_call != NULL) 489bb8b30dfSclaudio rib_dump_r(ctx); 4907b2cffc2Sclaudio else 4917b2cffc2Sclaudio prefix_dump_r(ctx); 492bb8b30dfSclaudio } 493bb8b30dfSclaudio } 494bb8b30dfSclaudio 495bb8b30dfSclaudio static void 49639386878Sclaudio rib_dump_abort(uint16_t id) 497bb8b30dfSclaudio { 498bb8b30dfSclaudio struct rib_context *ctx, *next; 499bb8b30dfSclaudio 500bb8b30dfSclaudio LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) { 5017b2cffc2Sclaudio if (id != ctx->ctx_id) 502bb8b30dfSclaudio continue; 503bb8b30dfSclaudio if (ctx->ctx_done) 504bb8b30dfSclaudio ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); 505091e7b27Sclaudio if (ctx->ctx_re && rib_empty(re_unlock(ctx->ctx_re))) 506091e7b27Sclaudio rib_remove(ctx->ctx_re); 507091e7b27Sclaudio if (ctx->ctx_p && prefix_is_dead(prefix_unlock(ctx->ctx_p))) 508091e7b27Sclaudio prefix_adjout_destroy(ctx->ctx_p); 509091e7b27Sclaudio LIST_REMOVE(ctx, entry); 510091e7b27Sclaudio free(ctx); 511091e7b27Sclaudio } 512091e7b27Sclaudio } 513091e7b27Sclaudio 514091e7b27Sclaudio void 515091e7b27Sclaudio rib_dump_terminate(void *arg) 516091e7b27Sclaudio { 517091e7b27Sclaudio struct rib_context *ctx, *next; 518091e7b27Sclaudio 519091e7b27Sclaudio LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) { 520091e7b27Sclaudio if (ctx->ctx_arg != arg) 521091e7b27Sclaudio continue; 522091e7b27Sclaudio if (ctx->ctx_done) 523091e7b27Sclaudio ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); 524091e7b27Sclaudio if (ctx->ctx_re && rib_empty(re_unlock(ctx->ctx_re))) 525091e7b27Sclaudio rib_remove(ctx->ctx_re); 526091e7b27Sclaudio if (ctx->ctx_p && prefix_is_dead(prefix_unlock(ctx->ctx_p))) 527091e7b27Sclaudio prefix_adjout_destroy(ctx->ctx_p); 528bb8b30dfSclaudio LIST_REMOVE(ctx, entry); 529bb8b30dfSclaudio free(ctx); 530bb8b30dfSclaudio } 531bb8b30dfSclaudio } 532bb8b30dfSclaudio 533bb8b30dfSclaudio int 53439386878Sclaudio rib_dump_new(uint16_t id, uint8_t aid, unsigned int count, void *arg, 53539386878Sclaudio void (*upcall)(struct rib_entry *, void *), void (*done)(void *, uint8_t), 536bb8b30dfSclaudio int (*throttle)(void *)) 537bb8b30dfSclaudio { 538bb8b30dfSclaudio struct rib_context *ctx; 539bb8b30dfSclaudio 540bb8b30dfSclaudio if ((ctx = calloc(1, sizeof(*ctx))) == NULL) 541bb8b30dfSclaudio return -1; 5427b2cffc2Sclaudio ctx->ctx_id = id; 543bb8b30dfSclaudio ctx->ctx_aid = aid; 544bb8b30dfSclaudio ctx->ctx_count = count; 545bb8b30dfSclaudio ctx->ctx_arg = arg; 5467b2cffc2Sclaudio ctx->ctx_rib_call = upcall; 547bb8b30dfSclaudio ctx->ctx_done = done; 548bb8b30dfSclaudio ctx->ctx_throttle = throttle; 549bb8b30dfSclaudio 550bb8b30dfSclaudio LIST_INSERT_HEAD(&rib_dumps, ctx, entry); 551bb8b30dfSclaudio 552f80c17aaSclaudio /* requested a sync traversal */ 553f80c17aaSclaudio if (count == 0) 554f80c17aaSclaudio rib_dump_r(ctx); 555f80c17aaSclaudio 556bb8b30dfSclaudio return 0; 557bb8b30dfSclaudio } 558bb8b30dfSclaudio 55976c3be87Sclaudio int 56076c3be87Sclaudio rib_dump_subtree(uint16_t id, struct bgpd_addr *subtree, uint8_t subtreelen, 56176c3be87Sclaudio unsigned int count, void *arg, void (*upcall)(struct rib_entry *, void *), 56276c3be87Sclaudio void (*done)(void *, uint8_t), int (*throttle)(void *)) 56376c3be87Sclaudio { 56476c3be87Sclaudio struct rib_context *ctx; 56576c3be87Sclaudio struct rib_entry xre; 56676c3be87Sclaudio 56776c3be87Sclaudio if ((ctx = calloc(1, sizeof(*ctx))) == NULL) 56876c3be87Sclaudio return -1; 56976c3be87Sclaudio ctx->ctx_id = id; 57076c3be87Sclaudio ctx->ctx_aid = subtree->aid; 57176c3be87Sclaudio ctx->ctx_count = count; 57276c3be87Sclaudio ctx->ctx_arg = arg; 57376c3be87Sclaudio ctx->ctx_rib_call = upcall; 57476c3be87Sclaudio ctx->ctx_done = done; 57576c3be87Sclaudio ctx->ctx_throttle = throttle; 57676c3be87Sclaudio ctx->ctx_subtree = *subtree; 57776c3be87Sclaudio ctx->ctx_subtreelen = subtreelen; 57876c3be87Sclaudio 57976c3be87Sclaudio LIST_INSERT_HEAD(&rib_dumps, ctx, entry); 58076c3be87Sclaudio 58176c3be87Sclaudio /* lookup start of subtree */ 58276c3be87Sclaudio memset(&xre, 0, sizeof(xre)); 58376c3be87Sclaudio xre.prefix = pt_fill(subtree, subtreelen); 58476c3be87Sclaudio ctx->ctx_re = RB_NFIND(rib_tree, rib_tree(rib_byid(id)), &xre); 58576c3be87Sclaudio if (ctx->ctx_re) 58676c3be87Sclaudio re_lock(ctx->ctx_re); 58776c3be87Sclaudio 58876c3be87Sclaudio /* requested a sync traversal */ 58976c3be87Sclaudio if (count == 0) 59076c3be87Sclaudio rib_dump_r(ctx); 59176c3be87Sclaudio 59276c3be87Sclaudio return 0; 59376c3be87Sclaudio } 59476c3be87Sclaudio 595a16c0992Shenning /* path specific functions */ 596a16c0992Shenning 597e1484d64Sclaudio static struct rde_aspath *path_lookup(struct rde_aspath *); 598e1484d64Sclaudio static void path_link(struct rde_aspath *); 599e1484d64Sclaudio static void path_unlink(struct rde_aspath *); 600a16c0992Shenning 601b1dd0a10Sclaudio static inline int 602af6b2e43Sclaudio path_compare(struct rde_aspath *a, struct rde_aspath *b) 603af6b2e43Sclaudio { 604af6b2e43Sclaudio int r; 605af6b2e43Sclaudio 60654494a53Sclaudio if (a == NULL && b == NULL) 60754494a53Sclaudio return (0); 60854494a53Sclaudio else if (b == NULL) 60954494a53Sclaudio return (1); 61054494a53Sclaudio else if (a == NULL) 61154494a53Sclaudio return (-1); 612bb8f17e7Sclaudio if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED)) 61354494a53Sclaudio return (1); 614bb8f17e7Sclaudio if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED)) 61554494a53Sclaudio return (-1); 616af6b2e43Sclaudio if (a->origin > b->origin) 617af6b2e43Sclaudio return (1); 618af6b2e43Sclaudio if (a->origin < b->origin) 619af6b2e43Sclaudio return (-1); 620af6b2e43Sclaudio if (a->med > b->med) 621af6b2e43Sclaudio return (1); 622af6b2e43Sclaudio if (a->med < b->med) 623af6b2e43Sclaudio return (-1); 624af6b2e43Sclaudio if (a->lpref > b->lpref) 625af6b2e43Sclaudio return (1); 626af6b2e43Sclaudio if (a->lpref < b->lpref) 627af6b2e43Sclaudio return (-1); 628aa5d92feSclaudio if (a->weight > b->weight) 629aa5d92feSclaudio return (1); 630aa5d92feSclaudio if (a->weight < b->weight) 631aa5d92feSclaudio return (-1); 63215aea9d4Sclaudio if (a->rtlabelid > b->rtlabelid) 63315aea9d4Sclaudio return (1); 63415aea9d4Sclaudio if (a->rtlabelid < b->rtlabelid) 63515aea9d4Sclaudio return (-1); 636e2691ddeSclaudio if (a->pftableid > b->pftableid) 637e2691ddeSclaudio return (1); 638e2691ddeSclaudio if (a->pftableid < b->pftableid) 639e2691ddeSclaudio return (-1); 640af6b2e43Sclaudio 641f8fade75Sclaudio /* no need to check aspa_state or aspa_generation */ 642f8fade75Sclaudio 643af6b2e43Sclaudio r = aspath_compare(a->aspath, b->aspath); 644af6b2e43Sclaudio if (r > 0) 645af6b2e43Sclaudio return (1); 646af6b2e43Sclaudio if (r < 0) 647af6b2e43Sclaudio return (-1); 648af6b2e43Sclaudio 64955e823c7Sclaudio return (attr_compare(a, b)); 650a16c0992Shenning } 651a16c0992Shenning 652b1dd0a10Sclaudio RB_HEAD(path_tree, rde_aspath) pathtable = RB_INITIALIZER(&pathtable); 653b1dd0a10Sclaudio RB_GENERATE_STATIC(path_tree, rde_aspath, entry, path_compare); 654b1dd0a10Sclaudio 655b1dd0a10Sclaudio static inline struct rde_aspath * 656b1dd0a10Sclaudio path_ref(struct rde_aspath *asp) 65737c2ba46Sclaudio { 658b1dd0a10Sclaudio if ((asp->flags & F_ATTR_LINKED) == 0) 659b1dd0a10Sclaudio fatalx("%s: unlinked object", __func__); 660b1dd0a10Sclaudio asp->refcnt++; 661b1dd0a10Sclaudio rdemem.path_refs++; 66237c2ba46Sclaudio 663b1dd0a10Sclaudio return asp; 664b1dd0a10Sclaudio } 66537c2ba46Sclaudio 666b1dd0a10Sclaudio static inline void 667b1dd0a10Sclaudio path_unref(struct rde_aspath *asp) 668b1dd0a10Sclaudio { 669b1dd0a10Sclaudio if (asp == NULL) 670b1dd0a10Sclaudio return; 671b1dd0a10Sclaudio if ((asp->flags & F_ATTR_LINKED) == 0) 672b1dd0a10Sclaudio fatalx("%s: unlinked object", __func__); 673b1dd0a10Sclaudio asp->refcnt--; 674b1dd0a10Sclaudio rdemem.path_refs--; 675b1dd0a10Sclaudio if (asp->refcnt <= 0) 676b1dd0a10Sclaudio path_unlink(asp); 677b1dd0a10Sclaudio } 67837c2ba46Sclaudio 679b1dd0a10Sclaudio void 680b1dd0a10Sclaudio path_shutdown(void) 681b1dd0a10Sclaudio { 682b1dd0a10Sclaudio if (!RB_EMPTY(&pathtable)) 683b1dd0a10Sclaudio log_warnx("path_free: free non-free table"); 68437c2ba46Sclaudio } 68537c2ba46Sclaudio 68654494a53Sclaudio static struct rde_aspath * 687e1484d64Sclaudio path_lookup(struct rde_aspath *aspath) 688a16c0992Shenning { 689b1dd0a10Sclaudio return (RB_FIND(path_tree, &pathtable, aspath)); 690a16c0992Shenning } 691a16c0992Shenning 692e54b0439Sclaudio /* 6934ac780caSclaudio * Link this aspath into the global table. 694e1484d64Sclaudio * The asp had to be alloced with path_get. 695a16c0992Shenning */ 696a16c0992Shenning static void 697e1484d64Sclaudio path_link(struct rde_aspath *asp) 698a16c0992Shenning { 699b1dd0a10Sclaudio if (RB_INSERT(path_tree, &pathtable, asp) != NULL) 700b1dd0a10Sclaudio fatalx("%s: already linked object", __func__); 701710df094Sclaudio asp->flags |= F_ATTR_LINKED; 702a16c0992Shenning } 703a16c0992Shenning 704af6b2e43Sclaudio /* 705e1484d64Sclaudio * This function can only be called when all prefix have been removed first. 706e1484d64Sclaudio * Normally this happens directly out of the prefix removal functions. 707e1484d64Sclaudio */ 708e1484d64Sclaudio static void 709e1484d64Sclaudio path_unlink(struct rde_aspath *asp) 710e1484d64Sclaudio { 711be474b4fSclaudio if (asp == NULL) 712be474b4fSclaudio return; 713be474b4fSclaudio 714e1484d64Sclaudio /* make sure no reference is hold for this rde_aspath */ 715e2c0fe86Sclaudio if (asp->refcnt != 0) 716e2c0fe86Sclaudio fatalx("%s: still holds references", __func__); 717e1484d64Sclaudio 718b1dd0a10Sclaudio RB_REMOVE(path_tree, &pathtable, asp); 719e1484d64Sclaudio asp->flags &= ~F_ATTR_LINKED; 720e1484d64Sclaudio 721e1484d64Sclaudio path_put(asp); 722e1484d64Sclaudio } 723e1484d64Sclaudio 724e1484d64Sclaudio /* 7255d2e648fSclaudio * Copy asp to a new UNLINKED aspath. 7265d2e648fSclaudio * On dst either path_get() or path_prep() had to be called before. 727af6b2e43Sclaudio */ 728af6b2e43Sclaudio struct rde_aspath * 7295d2e648fSclaudio path_copy(struct rde_aspath *dst, const struct rde_aspath *src) 730a16c0992Shenning { 7313487a040Sclaudio dst->aspath = aspath_copy(src->aspath); 732b7c7f0a9Sclaudio dst->refcnt = 0; 733b7c7f0a9Sclaudio dst->flags = src->flags & ~F_ATTR_LINKED; 734b7c7f0a9Sclaudio 7355d2e648fSclaudio dst->med = src->med; 7365d2e648fSclaudio dst->lpref = src->lpref; 7375d2e648fSclaudio dst->weight = src->weight; 7385d2e648fSclaudio dst->rtlabelid = rtlabel_ref(src->rtlabelid); 7395d2e648fSclaudio dst->pftableid = pftable_ref(src->pftableid); 740b7c7f0a9Sclaudio dst->origin = src->origin; 741e1484d64Sclaudio 7425d2e648fSclaudio attr_copy(dst, src); 743af6b2e43Sclaudio 7445d2e648fSclaudio return (dst); 745a16c0992Shenning } 746a16c0992Shenning 7473a50f0a9Sjmc /* initialize or prepare an aspath for use */ 748af6b2e43Sclaudio struct rde_aspath * 7495d2e648fSclaudio path_prep(struct rde_aspath *asp) 750a16c0992Shenning { 7515d2e648fSclaudio memset(asp, 0, sizeof(*asp)); 752af6b2e43Sclaudio asp->origin = ORIGIN_INCOMPLETE; 753af6b2e43Sclaudio asp->lpref = DEFAULT_LPREF; 754af6b2e43Sclaudio 755af6b2e43Sclaudio return (asp); 756a16c0992Shenning } 757a16c0992Shenning 7585d2e648fSclaudio /* alloc and initialize new entry. May not fail. */ 7595d2e648fSclaudio struct rde_aspath * 7605d2e648fSclaudio path_get(void) 7615d2e648fSclaudio { 7625d2e648fSclaudio struct rde_aspath *asp; 7635d2e648fSclaudio 7645d2e648fSclaudio asp = malloc(sizeof(*asp)); 7655d2e648fSclaudio if (asp == NULL) 7665d2e648fSclaudio fatal("path_get"); 7675d2e648fSclaudio rdemem.path_cnt++; 7685d2e648fSclaudio 7695d2e648fSclaudio return (path_prep(asp)); 7705d2e648fSclaudio } 7715d2e648fSclaudio 772e1484d64Sclaudio /* clean up an asp after use (frees all references to sub-objects) */ 773af6b2e43Sclaudio void 774effde4cfSclaudio path_clean(struct rde_aspath *asp) 775a16c0992Shenning { 776af6b2e43Sclaudio if (asp->flags & F_ATTR_LINKED) 777e1484d64Sclaudio fatalx("%s: linked object", __func__); 778af6b2e43Sclaudio 77915aea9d4Sclaudio rtlabel_unref(asp->rtlabelid); 780b5dbd344Sclaudio pftable_unref(asp->pftableid); 781af6b2e43Sclaudio aspath_put(asp->aspath); 7823487a040Sclaudio asp->aspath = NULL; 78355e823c7Sclaudio attr_freeall(asp); 784effde4cfSclaudio } 785effde4cfSclaudio 786effde4cfSclaudio /* free an unlinked element */ 787effde4cfSclaudio void 788effde4cfSclaudio path_put(struct rde_aspath *asp) 789effde4cfSclaudio { 790effde4cfSclaudio if (asp == NULL) 791effde4cfSclaudio return; 792effde4cfSclaudio 793effde4cfSclaudio path_clean(asp); 794effde4cfSclaudio 7956b5f6ff8Sclaudio rdemem.path_cnt--; 796a16c0992Shenning free(asp); 797a16c0992Shenning } 798a16c0992Shenning 799a16c0992Shenning /* prefix specific functions */ 800a16c0992Shenning 801bba39c78Sclaudio static int prefix_add(struct bgpd_addr *, int, struct rib *, 802ca69ff0dSclaudio struct rde_peer *, uint32_t, uint32_t, struct rde_aspath *, 803bba39c78Sclaudio struct rde_community *, struct nexthop *, 80489ee02f7Sclaudio uint8_t, uint8_t, int); 805bba39c78Sclaudio static int prefix_move(struct prefix *, struct rde_peer *, 806bba39c78Sclaudio struct rde_aspath *, struct rde_community *, 80789ee02f7Sclaudio struct nexthop *, uint8_t, uint8_t, int); 808bba39c78Sclaudio 80986f6dc6eSclaudio static void prefix_link(struct prefix *, struct rib_entry *, 810ca69ff0dSclaudio struct pt_entry *, struct rde_peer *, uint32_t, uint32_t, 81135dccef6Sclaudio struct rde_aspath *, struct rde_community *, 81235dccef6Sclaudio struct nexthop *, uint8_t, uint8_t); 813a16c0992Shenning static void prefix_unlink(struct prefix *); 814bba39c78Sclaudio 815be474b4fSclaudio static struct prefix *prefix_alloc(void); 816be474b4fSclaudio static void prefix_free(struct prefix *); 817be474b4fSclaudio 818be474b4fSclaudio /* RB tree comparison function */ 819be474b4fSclaudio static inline int 820307d7537Sclaudio prefix_index_cmp(struct prefix *a, struct prefix *b) 821307d7537Sclaudio { 822307d7537Sclaudio int r; 823307d7537Sclaudio r = pt_prefix_cmp(a->pt, b->pt); 824307d7537Sclaudio if (r != 0) 825307d7537Sclaudio return r; 826307d7537Sclaudio 827307d7537Sclaudio if (a->path_id_tx > b->path_id_tx) 828307d7537Sclaudio return 1; 829307d7537Sclaudio if (a->path_id_tx < b->path_id_tx) 830307d7537Sclaudio return -1; 831307d7537Sclaudio return 0; 832307d7537Sclaudio } 833307d7537Sclaudio 834307d7537Sclaudio static inline int 835be474b4fSclaudio prefix_cmp(struct prefix *a, struct prefix *b) 836be474b4fSclaudio { 83765ab7798Sclaudio if ((a->flags & PREFIX_FLAG_EOR) != (b->flags & PREFIX_FLAG_EOR)) 83865ab7798Sclaudio return (a->flags & PREFIX_FLAG_EOR) ? 1 : -1; 83965ab7798Sclaudio /* if EOR marker no need to check the rest */ 84065ab7798Sclaudio if (a->flags & PREFIX_FLAG_EOR) 841f8509b38Sclaudio return 0; 842f8509b38Sclaudio 843be474b4fSclaudio if (a->aspath != b->aspath) 844be474b4fSclaudio return (a->aspath > b->aspath ? 1 : -1); 845e7adcfeaSclaudio if (a->communities != b->communities) 846e7adcfeaSclaudio return (a->communities > b->communities ? 1 : -1); 847be474b4fSclaudio if (a->nexthop != b->nexthop) 848be474b4fSclaudio return (a->nexthop > b->nexthop ? 1 : -1); 849be474b4fSclaudio if (a->nhflags != b->nhflags) 850be474b4fSclaudio return (a->nhflags > b->nhflags ? 1 : -1); 851307d7537Sclaudio return prefix_index_cmp(a, b); 8527b2cffc2Sclaudio } 8537b2cffc2Sclaudio 8547b2cffc2Sclaudio RB_GENERATE(prefix_tree, prefix, entry.tree.update, prefix_cmp) 8557b2cffc2Sclaudio RB_GENERATE_STATIC(prefix_index, prefix, entry.tree.index, prefix_index_cmp) 856a16c0992Shenning 857a16c0992Shenning /* 858307d7537Sclaudio * Search for specified prefix of a peer. Returns NULL if not found. 859a16c0992Shenning */ 860a16c0992Shenning struct prefix * 86139386878Sclaudio prefix_get(struct rib *rib, struct rde_peer *peer, uint32_t path_id, 86229b527fbSclaudio struct bgpd_addr *prefix, int prefixlen) 863a16c0992Shenning { 86486f6dc6eSclaudio struct rib_entry *re; 865a16c0992Shenning 8665bb86053Sclaudio re = rib_get_addr(rib, prefix, prefixlen); 86786f6dc6eSclaudio if (re == NULL) 868af6b2e43Sclaudio return (NULL); 86929b527fbSclaudio return (prefix_bypeer(re, peer, path_id)); 870a16c0992Shenning } 871a16c0992Shenning 872a16c0992Shenning /* 873307d7537Sclaudio * Search for specified prefix in the peer prefix_index. 874307d7537Sclaudio * Returns NULL if not found. 8757b2cffc2Sclaudio */ 8767b2cffc2Sclaudio struct prefix * 8774efb02eeSclaudio prefix_adjout_get(struct rde_peer *peer, uint32_t path_id_tx, 8781e6dccedSclaudio struct pt_entry *pte) 8797b2cffc2Sclaudio { 8807b2cffc2Sclaudio struct prefix xp; 8817b2cffc2Sclaudio 8827b2cffc2Sclaudio memset(&xp, 0, sizeof(xp)); 8831e6dccedSclaudio xp.pt = pte; 8844efb02eeSclaudio xp.path_id_tx = path_id_tx; 8857b2cffc2Sclaudio 8867b2cffc2Sclaudio return RB_FIND(prefix_index, &peer->adj_rib_out, &xp); 8877b2cffc2Sclaudio } 8887b2cffc2Sclaudio 889307d7537Sclaudio /* 890307d7537Sclaudio * Lookup a prefix without considering path_id in the peer prefix_index. 891307d7537Sclaudio * Returns NULL if not found. 892307d7537Sclaudio */ 8937b2cffc2Sclaudio struct prefix * 8941e6dccedSclaudio prefix_adjout_first(struct rde_peer *peer, struct pt_entry *pte) 895307d7537Sclaudio { 896307d7537Sclaudio struct prefix xp, *np; 897307d7537Sclaudio 898307d7537Sclaudio memset(&xp, 0, sizeof(xp)); 8991e6dccedSclaudio xp.pt = pte; 900307d7537Sclaudio 901307d7537Sclaudio np = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp); 90235dccef6Sclaudio if (np == NULL || pt_prefix_cmp(np->pt, xp.pt) != 0) 903307d7537Sclaudio return NULL; 904307d7537Sclaudio return np; 905307d7537Sclaudio } 906307d7537Sclaudio 90735dccef6Sclaudio /* 90835dccef6Sclaudio * Return next prefix after a lookup that is actually an update. 90935dccef6Sclaudio */ 910307d7537Sclaudio struct prefix * 911307d7537Sclaudio prefix_adjout_next(struct rde_peer *peer, struct prefix *p) 912307d7537Sclaudio { 913307d7537Sclaudio struct prefix *np; 914307d7537Sclaudio 915307d7537Sclaudio np = RB_NEXT(prefix_index, &peer->adj_rib_out, p); 91635dccef6Sclaudio if (np == NULL || np->pt != p->pt) 917307d7537Sclaudio return NULL; 918307d7537Sclaudio return np; 919307d7537Sclaudio } 920307d7537Sclaudio 921307d7537Sclaudio /* 9221e6dccedSclaudio * Lookup addr/prefixlen in the peer prefix_index. Returns first match. 9231e6dccedSclaudio * Returns NULL if not found. 9241e6dccedSclaudio */ 9251e6dccedSclaudio struct prefix * 9261e6dccedSclaudio prefix_adjout_lookup(struct rde_peer *peer, struct bgpd_addr *addr, int plen) 9271e6dccedSclaudio { 9281e6dccedSclaudio return prefix_adjout_first(peer, pt_fill(addr, plen)); 9291e6dccedSclaudio } 9301e6dccedSclaudio 9311e6dccedSclaudio /* 932307d7537Sclaudio * Lookup addr in the peer prefix_index. Returns first match. 933307d7537Sclaudio * Returns NULL if not found. 934307d7537Sclaudio */ 935307d7537Sclaudio struct prefix * 936307d7537Sclaudio prefix_adjout_match(struct rde_peer *peer, struct bgpd_addr *addr) 9377b2cffc2Sclaudio { 9387b2cffc2Sclaudio struct prefix *p; 9397b2cffc2Sclaudio int i; 9407b2cffc2Sclaudio 9417b2cffc2Sclaudio switch (addr->aid) { 9427b2cffc2Sclaudio case AID_INET: 9437b2cffc2Sclaudio case AID_VPN_IPv4: 9447b2cffc2Sclaudio for (i = 32; i >= 0; i--) { 945307d7537Sclaudio p = prefix_adjout_lookup(peer, addr, i); 9467b2cffc2Sclaudio if (p != NULL) 9477b2cffc2Sclaudio return p; 9487b2cffc2Sclaudio } 9497b2cffc2Sclaudio break; 9507b2cffc2Sclaudio case AID_INET6: 9517b2cffc2Sclaudio case AID_VPN_IPv6: 9527b2cffc2Sclaudio for (i = 128; i >= 0; i--) { 953307d7537Sclaudio p = prefix_adjout_lookup(peer, addr, i); 9547b2cffc2Sclaudio if (p != NULL) 9557b2cffc2Sclaudio return p; 9567b2cffc2Sclaudio } 9577b2cffc2Sclaudio break; 9587b2cffc2Sclaudio default: 9597b2cffc2Sclaudio fatalx("%s: unknown af", __func__); 9607b2cffc2Sclaudio } 9617b2cffc2Sclaudio return NULL; 9627b2cffc2Sclaudio } 9637b2cffc2Sclaudio 9647b2cffc2Sclaudio /* 965bba39c78Sclaudio * Update a prefix. 966bba39c78Sclaudio * Return 1 if prefix was newly added, 0 if it was just changed. 967bba39c78Sclaudio */ 968bba39c78Sclaudio int 96939386878Sclaudio prefix_update(struct rib *rib, struct rde_peer *peer, uint32_t path_id, 97089ee02f7Sclaudio uint32_t path_id_tx, struct filterstate *state, int filtered, 97189ee02f7Sclaudio struct bgpd_addr *prefix, int prefixlen) 972bba39c78Sclaudio { 973bba39c78Sclaudio struct rde_aspath *asp, *nasp = &state->aspath; 974bba39c78Sclaudio struct rde_community *comm, *ncomm = &state->communities; 975bba39c78Sclaudio struct prefix *p; 976bba39c78Sclaudio 977bba39c78Sclaudio /* 978bba39c78Sclaudio * First try to find a prefix in the specified RIB. 979bba39c78Sclaudio */ 98029b527fbSclaudio if ((p = prefix_get(rib, peer, path_id, prefix, prefixlen)) != NULL) { 981ca69ff0dSclaudio if (path_id_tx != p->path_id_tx) 982ca69ff0dSclaudio fatalx("path_id mismatch"); 983bba39c78Sclaudio if (prefix_nexthop(p) == state->nexthop && 984bba39c78Sclaudio prefix_nhflags(p) == state->nhflags && 985bba39c78Sclaudio communities_equal(ncomm, prefix_communities(p)) && 986bba39c78Sclaudio path_compare(nasp, prefix_aspath(p)) == 0) { 987bba39c78Sclaudio /* no change, update last change */ 98803e83acfSclaudio p->lastchange = getmonotime(); 989c85bce7bSclaudio p->validation_state = state->vstate; 99089ee02f7Sclaudio if (filtered) 99189ee02f7Sclaudio p->flags |= PREFIX_FLAG_FILTERED; 99289ee02f7Sclaudio else 99389ee02f7Sclaudio p->flags &= ~PREFIX_FLAG_FILTERED; 994bba39c78Sclaudio return (0); 995bba39c78Sclaudio } 996bba39c78Sclaudio } 997bba39c78Sclaudio 998bba39c78Sclaudio /* 999bba39c78Sclaudio * Either the prefix does not exist or the path changed. 1000bba39c78Sclaudio * In both cases lookup the new aspath to make sure it is not 1001bba39c78Sclaudio * already in the RIB. 1002bba39c78Sclaudio */ 1003bba39c78Sclaudio if ((asp = path_lookup(nasp)) == NULL) { 1004bba39c78Sclaudio /* Path not available, create and link a new one. */ 1005bba39c78Sclaudio asp = path_copy(path_get(), nasp); 1006bba39c78Sclaudio path_link(asp); 1007bba39c78Sclaudio } 1008bba39c78Sclaudio 1009bba39c78Sclaudio if ((comm = communities_lookup(ncomm)) == NULL) { 1010bba39c78Sclaudio /* Communities not available, create and link a new one. */ 1011bba39c78Sclaudio comm = communities_link(ncomm); 1012bba39c78Sclaudio } 1013bba39c78Sclaudio 101444975fa5Sclaudio /* If the prefix was found move it else add it to the RIB. */ 1015bba39c78Sclaudio if (p != NULL) 1016bba39c78Sclaudio return (prefix_move(p, peer, asp, comm, state->nexthop, 101789ee02f7Sclaudio state->nhflags, state->vstate, filtered)); 1018bba39c78Sclaudio else 1019ca69ff0dSclaudio return (prefix_add(prefix, prefixlen, rib, peer, path_id, 1020ca69ff0dSclaudio path_id_tx, asp, comm, state->nexthop, state->nhflags, 102189ee02f7Sclaudio state->vstate, filtered)); 1022bba39c78Sclaudio } 1023bba39c78Sclaudio 1024bba39c78Sclaudio /* 1025ea79069bSclaudio * Adds or updates a prefix. 1026a16c0992Shenning */ 10276f1dba6eSclaudio static int 102840222badSclaudio prefix_add(struct bgpd_addr *prefix, int prefixlen, struct rib *rib, 1029ca69ff0dSclaudio struct rde_peer *peer, uint32_t path_id, uint32_t path_id_tx, 1030ca69ff0dSclaudio struct rde_aspath *asp, struct rde_community *comm, 103189ee02f7Sclaudio struct nexthop *nexthop, uint8_t nhflags, uint8_t vstate, int filtered) 1032a16c0992Shenning { 10335bb86053Sclaudio struct pt_entry *pte; 1034a16c0992Shenning struct prefix *p; 103586f6dc6eSclaudio struct rib_entry *re; 1036a16c0992Shenning 10375bb86053Sclaudio pte = pt_get(prefix, prefixlen); 10385bb86053Sclaudio if (pte == NULL) 10395bb86053Sclaudio pte = pt_add(prefix, prefixlen); 10405bb86053Sclaudio re = rib_get(rib, pte); 104186f6dc6eSclaudio if (re == NULL) 10425bb86053Sclaudio re = rib_add(rib, pte); 1043af6b2e43Sclaudio 1044a16c0992Shenning p = prefix_alloc(); 1045ca69ff0dSclaudio prefix_link(p, re, re->prefix, peer, path_id, path_id_tx, asp, comm, 1046ca69ff0dSclaudio nexthop, nhflags, vstate); 104735dccef6Sclaudio 104889ee02f7Sclaudio if (filtered) 104989ee02f7Sclaudio p->flags |= PREFIX_FLAG_FILTERED; 105089ee02f7Sclaudio 105135dccef6Sclaudio /* add possible pftable reference form aspath */ 105235dccef6Sclaudio if (asp && asp->pftableid) 105335dccef6Sclaudio rde_pftable_add(asp->pftableid, p); 105435dccef6Sclaudio /* make route decision */ 105535dccef6Sclaudio prefix_evaluate(re, p, NULL); 1056e529331eSclaudio return (1); 1057a16c0992Shenning } 1058a16c0992Shenning 1059a16c0992Shenning /* 1060a16c0992Shenning * Move the prefix to the specified as path, removes the old asp if needed. 1061a16c0992Shenning */ 10626f1dba6eSclaudio static int 106340222badSclaudio prefix_move(struct prefix *p, struct rde_peer *peer, 1064e7adcfeaSclaudio struct rde_aspath *asp, struct rde_community *comm, 106589ee02f7Sclaudio struct nexthop *nexthop, uint8_t nhflags, uint8_t vstate, int filtered) 1066a16c0992Shenning { 1067a16c0992Shenning struct prefix *np; 1068a16c0992Shenning 10692b7b6505Sclaudio if (p->flags & PREFIX_FLAG_ADJOUT) 10702b7b6505Sclaudio fatalx("%s: prefix with PREFIX_FLAG_ADJOUT hit", __func__); 10712b7b6505Sclaudio 107240222badSclaudio if (peer != prefix_peer(p)) 1073abb644cbSclaudio fatalx("prefix_move: cross peer move"); 1074a16c0992Shenning 107575d3d5fbSclaudio /* add new prefix node */ 1076a16c0992Shenning np = prefix_alloc(); 1077ca69ff0dSclaudio prefix_link(np, prefix_re(p), p->pt, peer, p->path_id, p->path_id_tx, 1078ca69ff0dSclaudio asp, comm, nexthop, nhflags, vstate); 1079bb8f17e7Sclaudio 108089ee02f7Sclaudio if (filtered) 108189ee02f7Sclaudio np->flags |= PREFIX_FLAG_FILTERED; 108289ee02f7Sclaudio 1083b58d89daSclaudio /* add possible pftable reference from new aspath */ 1084b58d89daSclaudio if (asp && asp->pftableid) 1085b58d89daSclaudio rde_pftable_add(asp->pftableid, np); 1086b58d89daSclaudio 10871c446d86Sclaudio /* 10881c446d86Sclaudio * no need to update the peer prefix count because we are only moving 10891c446d86Sclaudio * the prefix without changing the peer. 10901c446d86Sclaudio */ 10912b7b6505Sclaudio prefix_evaluate(prefix_re(np), np, p); 1092a16c0992Shenning 109335dccef6Sclaudio /* remove possible pftable reference from old path */ 1094b58d89daSclaudio if (p->aspath && p->aspath->pftableid) 1095b58d89daSclaudio rde_pftable_del(p->aspath->pftableid, p); 1096b58d89daSclaudio 109775d3d5fbSclaudio /* remove old prefix node */ 109835dccef6Sclaudio prefix_unlink(p); 1099a16c0992Shenning prefix_free(p); 1100a16c0992Shenning 110140222badSclaudio return (0); 1102a16c0992Shenning } 1103a16c0992Shenning 1104a16c0992Shenning /* 1105227b4aedSclaudio * Removes a prefix from the specified RIB. If the parent objects -- rib_entry 1106227b4aedSclaudio * or pt_entry -- become empty remove them too. 1107a16c0992Shenning */ 1108e529331eSclaudio int 110939386878Sclaudio prefix_withdraw(struct rib *rib, struct rde_peer *peer, uint32_t path_id, 1110227b4aedSclaudio struct bgpd_addr *prefix, int prefixlen) 1111a16c0992Shenning { 1112a16c0992Shenning struct prefix *p; 1113a16c0992Shenning struct rde_aspath *asp; 1114a16c0992Shenning 111529b527fbSclaudio p = prefix_get(rib, peer, path_id, prefix, prefixlen); 1116a16c0992Shenning if (p == NULL) /* Got a dummy withdrawn request. */ 1117e529331eSclaudio return (0); 1118a16c0992Shenning 11192b7b6505Sclaudio if (p->flags & PREFIX_FLAG_ADJOUT) 11202b7b6505Sclaudio fatalx("%s: prefix with PREFIX_FLAG_ADJOUT hit", __func__); 11212b6d2161Sclaudio asp = prefix_aspath(p); 11223dd3015eSclaudio if (asp && asp->pftableid) 112314468147Sclaudio /* only prefixes in the local RIB were pushed into pf */ 1124b58d89daSclaudio rde_pftable_del(asp->pftableid, p); 11255c150859Sclaudio 112603e02783Sclaudio prefix_destroy(p); 1127e529331eSclaudio 1128e529331eSclaudio return (1); 1129a16c0992Shenning } 1130a16c0992Shenning 1131be474b4fSclaudio /* 1132dd2a9ed2Sclaudio * Special functions for flowspec until full integration is available. 1133dd2a9ed2Sclaudio * This just directly feeds the prefixes into the Adj-RIB-Out bypassing 1134dd2a9ed2Sclaudio * Adj-RIB-In and Loc-RIB for now. 1135dd2a9ed2Sclaudio */ 1136dd2a9ed2Sclaudio int 1137dd2a9ed2Sclaudio prefix_flowspec_update(struct rde_peer *peer, struct filterstate *state, 1138dd2a9ed2Sclaudio struct pt_entry *pte, uint32_t path_id_tx) 1139dd2a9ed2Sclaudio { 1140dd2a9ed2Sclaudio struct rde_aspath *asp, *nasp; 1141dd2a9ed2Sclaudio struct rde_community *comm, *ncomm; 1142dd2a9ed2Sclaudio struct rib_entry *re; 1143dd2a9ed2Sclaudio struct prefix *new, *old; 1144dd2a9ed2Sclaudio 1145dd2a9ed2Sclaudio re = rib_get(&flowrib, pte); 1146dd2a9ed2Sclaudio if (re == NULL) 1147dd2a9ed2Sclaudio re = rib_add(&flowrib, pte); 1148dd2a9ed2Sclaudio 1149dd2a9ed2Sclaudio old = prefix_bypeer(re, peer, 0); 1150dd2a9ed2Sclaudio new = prefix_alloc(); 1151dd2a9ed2Sclaudio 1152dd2a9ed2Sclaudio nasp = &state->aspath; 1153dd2a9ed2Sclaudio ncomm = &state->communities; 1154dd2a9ed2Sclaudio if ((asp = path_lookup(nasp)) == NULL) { 1155dd2a9ed2Sclaudio /* Path not available, create and link a new one. */ 1156dd2a9ed2Sclaudio asp = path_copy(path_get(), nasp); 1157dd2a9ed2Sclaudio path_link(asp); 1158dd2a9ed2Sclaudio } 1159dd2a9ed2Sclaudio if ((comm = communities_lookup(ncomm)) == NULL) { 1160dd2a9ed2Sclaudio /* Communities not available, create and link a new one. */ 1161dd2a9ed2Sclaudio comm = communities_link(ncomm); 1162dd2a9ed2Sclaudio } 1163dd2a9ed2Sclaudio 1164dd2a9ed2Sclaudio prefix_link(new, re, re->prefix, peer, 0, path_id_tx, asp, comm, 1165dd2a9ed2Sclaudio NULL, 0, 0); 1166dd2a9ed2Sclaudio TAILQ_INSERT_HEAD(&re->prefix_h, new, entry.list.rib); 1167dd2a9ed2Sclaudio 1168dd2a9ed2Sclaudio rde_generate_updates(re, new, old, EVAL_DEFAULT); 1169dd2a9ed2Sclaudio 1170dd2a9ed2Sclaudio if (old != NULL) { 1171dd2a9ed2Sclaudio TAILQ_REMOVE(&re->prefix_h, old, entry.list.rib); 1172dd2a9ed2Sclaudio prefix_unlink(old); 1173dd2a9ed2Sclaudio prefix_free(old); 1174dd2a9ed2Sclaudio return 0; 1175dd2a9ed2Sclaudio } 1176dd2a9ed2Sclaudio return 1; 1177dd2a9ed2Sclaudio } 1178dd2a9ed2Sclaudio 1179dd2a9ed2Sclaudio /* 1180dd2a9ed2Sclaudio * Remove a possible flowspec prefix from all Adj-RIB-Outs. 1181dd2a9ed2Sclaudio */ 1182dd2a9ed2Sclaudio int 1183dd2a9ed2Sclaudio prefix_flowspec_withdraw(struct rde_peer *peer, struct pt_entry *pte) 1184dd2a9ed2Sclaudio { 1185dd2a9ed2Sclaudio struct rib_entry *re; 1186dd2a9ed2Sclaudio struct prefix *p; 1187dd2a9ed2Sclaudio 1188dd2a9ed2Sclaudio re = rib_get(&flowrib, pte); 1189dd2a9ed2Sclaudio if (re == NULL) 1190dd2a9ed2Sclaudio return 0; 1191dd2a9ed2Sclaudio p = prefix_bypeer(re, peer, 0); 1192dd2a9ed2Sclaudio if (p == NULL) 1193dd2a9ed2Sclaudio return 0; 1194dd2a9ed2Sclaudio rde_generate_updates(re, NULL, p, EVAL_DEFAULT); 1195dd2a9ed2Sclaudio TAILQ_REMOVE(&re->prefix_h, p, entry.list.rib); 1196dd2a9ed2Sclaudio prefix_unlink(p); 1197dd2a9ed2Sclaudio prefix_free(p); 1198dd2a9ed2Sclaudio return 1; 1199dd2a9ed2Sclaudio } 1200dd2a9ed2Sclaudio 1201dd2a9ed2Sclaudio /* 1202dd2a9ed2Sclaudio * Push all flowspec rules into a newly available Adj-RIB-Out. 1203dd2a9ed2Sclaudio */ 1204dd2a9ed2Sclaudio void 1205dd2a9ed2Sclaudio prefix_flowspec_dump(uint8_t aid, void *arg, 1206dd2a9ed2Sclaudio void (*call)(struct rib_entry *, void *), void (*done)(void *, uint8_t)) 1207dd2a9ed2Sclaudio { 1208dd2a9ed2Sclaudio struct rib_entry *re, *next; 1209dd2a9ed2Sclaudio 1210692f884eSclaudio RB_FOREACH_SAFE(re, rib_tree, rib_tree(&flowrib), next) { 1211692f884eSclaudio if (aid != AID_UNSPEC && aid != re->prefix->aid) 1212692f884eSclaudio continue; 1213dd2a9ed2Sclaudio call(re, arg); 1214692f884eSclaudio } 1215dd2a9ed2Sclaudio if (done != NULL) 1216dd2a9ed2Sclaudio done(arg, aid); 1217dd2a9ed2Sclaudio } 1218dd2a9ed2Sclaudio 1219dd2a9ed2Sclaudio /* 1220be474b4fSclaudio * Insert an End-of-RIB marker into the update queue. 1221be474b4fSclaudio */ 1222be474b4fSclaudio void 122339386878Sclaudio prefix_add_eor(struct rde_peer *peer, uint8_t aid) 1224be474b4fSclaudio { 1225be474b4fSclaudio struct prefix *p; 1226be474b4fSclaudio 1227be474b4fSclaudio p = prefix_alloc(); 122865ab7798Sclaudio p->flags = PREFIX_FLAG_ADJOUT | PREFIX_FLAG_UPDATE | PREFIX_FLAG_EOR; 1229be474b4fSclaudio if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL) 1230be474b4fSclaudio /* no need to add if EoR marker already present */ 1231be474b4fSclaudio prefix_free(p); 12327b2cffc2Sclaudio /* EOR marker is not inserted into the adj_rib_out index */ 1233be474b4fSclaudio } 1234be474b4fSclaudio 1235be474b4fSclaudio /* 1236be474b4fSclaudio * Put a prefix from the Adj-RIB-Out onto the update queue. 1237be474b4fSclaudio */ 12383c4e1154Sclaudio void 12394efb02eeSclaudio prefix_adjout_update(struct prefix *p, struct rde_peer *peer, 12401e6dccedSclaudio struct filterstate *state, struct pt_entry *pte, uint32_t path_id_tx) 1241be474b4fSclaudio { 12427b2cffc2Sclaudio struct rde_aspath *asp; 12437b2cffc2Sclaudio struct rde_community *comm; 1244be474b4fSclaudio 12454efb02eeSclaudio if (p == NULL) { 12467b2cffc2Sclaudio p = prefix_alloc(); 12473a50f0a9Sjmc /* initially mark DEAD so code below is skipped */ 1248353bf614Sclaudio p->flags |= PREFIX_FLAG_ADJOUT | PREFIX_FLAG_DEAD; 12497b2cffc2Sclaudio 12501e6dccedSclaudio p->pt = pt_ref(pte); 12517b2cffc2Sclaudio p->peer = peer; 12524efb02eeSclaudio p->path_id_tx = path_id_tx; 12537b2cffc2Sclaudio 12547b2cffc2Sclaudio if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL) 12557b2cffc2Sclaudio fatalx("%s: RB index invariant violated", __func__); 12567b2cffc2Sclaudio } 12577b2cffc2Sclaudio 1258353bf614Sclaudio if ((p->flags & PREFIX_FLAG_ADJOUT) == 0) 1259353bf614Sclaudio fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__); 1260353bf614Sclaudio if ((p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) == 0) { 12614efb02eeSclaudio /* 12624efb02eeSclaudio * XXX for now treat a different path_id_tx like different 12634efb02eeSclaudio * attributes and force out an update. It is unclear how 12644efb02eeSclaudio * common it is to have equivalent updates from alternative 12654efb02eeSclaudio * paths. 12664efb02eeSclaudio */ 12674efb02eeSclaudio if (p->path_id_tx == path_id_tx && 12684efb02eeSclaudio prefix_nhflags(p) == state->nhflags && 1269353bf614Sclaudio prefix_nexthop(p) == state->nexthop && 1270353bf614Sclaudio communities_equal(&state->communities, 1271353bf614Sclaudio prefix_communities(p)) && 1272353bf614Sclaudio path_compare(&state->aspath, prefix_aspath(p)) == 0) { 1273353bf614Sclaudio /* nothing changed */ 1274c85bce7bSclaudio p->validation_state = state->vstate; 1275353bf614Sclaudio p->lastchange = getmonotime(); 1276*56202f8dSclaudio p->flags &= ~PREFIX_FLAG_STALE; 1277353bf614Sclaudio return; 1278353bf614Sclaudio } 1279353bf614Sclaudio 1280353bf614Sclaudio /* if pending update unhook it before it is unlinked */ 1281353bf614Sclaudio if (p->flags & PREFIX_FLAG_UPDATE) { 12821e6dccedSclaudio RB_REMOVE(prefix_tree, &peer->updates[pte->aid], p); 128382625ff8Sclaudio peer->stats.pending_update--; 1284353bf614Sclaudio } 1285353bf614Sclaudio 1286353bf614Sclaudio /* unlink prefix so it can be relinked below */ 1287353bf614Sclaudio prefix_unlink(p); 128882625ff8Sclaudio peer->stats.prefix_out_cnt--; 1289353bf614Sclaudio } 1290353bf614Sclaudio if (p->flags & PREFIX_FLAG_WITHDRAW) { 12911e6dccedSclaudio RB_REMOVE(prefix_tree, &peer->withdraws[pte->aid], p); 129282625ff8Sclaudio peer->stats.pending_withdraw--; 1293353bf614Sclaudio } 1294353bf614Sclaudio 1295353bf614Sclaudio /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */ 1296353bf614Sclaudio p->flags &= ~PREFIX_FLAG_MASK; 1297353bf614Sclaudio 12984efb02eeSclaudio /* update path_id_tx now that the prefix is unlinked */ 12994efb02eeSclaudio if (p->path_id_tx != path_id_tx) { 13004efb02eeSclaudio /* path_id_tx is part of the index so remove and re-insert p */ 13014efb02eeSclaudio RB_REMOVE(prefix_index, &peer->adj_rib_out, p); 13024efb02eeSclaudio p->path_id_tx = path_id_tx; 13034efb02eeSclaudio if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL) 13044efb02eeSclaudio fatalx("%s: RB index invariant violated", __func__); 13054efb02eeSclaudio } 13064efb02eeSclaudio 13077b2cffc2Sclaudio if ((asp = path_lookup(&state->aspath)) == NULL) { 13087b2cffc2Sclaudio /* Path not available, create and link a new one. */ 13097b2cffc2Sclaudio asp = path_copy(path_get(), &state->aspath); 13107b2cffc2Sclaudio path_link(asp); 13117b2cffc2Sclaudio } 13127b2cffc2Sclaudio 13137b2cffc2Sclaudio if ((comm = communities_lookup(&state->communities)) == NULL) { 13147b2cffc2Sclaudio /* Communities not available, create and link a new one. */ 13157b2cffc2Sclaudio comm = communities_link(&state->communities); 13167b2cffc2Sclaudio } 13177b2cffc2Sclaudio 13184efb02eeSclaudio prefix_link(p, NULL, p->pt, peer, 0, p->path_id_tx, asp, comm, 1319c85bce7bSclaudio state->nexthop, state->nhflags, state->vstate); 132082625ff8Sclaudio peer->stats.prefix_out_cnt++; 13217b2cffc2Sclaudio 13227b2cffc2Sclaudio if (p->flags & PREFIX_FLAG_MASK) 1323be474b4fSclaudio fatalx("%s: bad flags %x", __func__, p->flags); 13246df2f818Sclaudio if (peer_is_up(peer)) { 13257b2cffc2Sclaudio p->flags |= PREFIX_FLAG_UPDATE; 13261e6dccedSclaudio if (RB_INSERT(prefix_tree, &peer->updates[pte->aid], p) != NULL) 1327be474b4fSclaudio fatalx("%s: RB tree invariant violated", __func__); 132882625ff8Sclaudio peer->stats.pending_update++; 1329be474b4fSclaudio } 13306df2f818Sclaudio } 1331be474b4fSclaudio 1332be474b4fSclaudio /* 1333be474b4fSclaudio * Withdraw a prefix from the Adj-RIB-Out, this unlinks the aspath but leaves 1334be474b4fSclaudio * the prefix in the RIB linked to the peer withdraw list. 1335be474b4fSclaudio */ 1336957262a2Sclaudio void 1337957262a2Sclaudio prefix_adjout_withdraw(struct prefix *p) 1338be474b4fSclaudio { 1339957262a2Sclaudio struct rde_peer *peer = prefix_peer(p); 1340be474b4fSclaudio 13412b7b6505Sclaudio if ((p->flags & PREFIX_FLAG_ADJOUT) == 0) 13422b7b6505Sclaudio fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__); 13432b7b6505Sclaudio 1344957262a2Sclaudio /* already a withdraw, shortcut */ 1345957262a2Sclaudio if (p->flags & PREFIX_FLAG_WITHDRAW) { 1346957262a2Sclaudio p->lastchange = getmonotime(); 1347*56202f8dSclaudio p->flags &= ~PREFIX_FLAG_STALE; 1348957262a2Sclaudio return; 1349957262a2Sclaudio } 13507ef8321bSclaudio /* pending update just got withdrawn */ 1351957262a2Sclaudio if (p->flags & PREFIX_FLAG_UPDATE) { 13527ef8321bSclaudio RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p); 135382625ff8Sclaudio peer->stats.pending_update--; 1354957262a2Sclaudio } 135535dccef6Sclaudio /* unlink prefix if it was linked (not a withdraw or dead) */ 1356957262a2Sclaudio if ((p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) == 0) { 135735dccef6Sclaudio prefix_unlink(p); 135882625ff8Sclaudio peer->stats.prefix_out_cnt--; 1359957262a2Sclaudio } 136035dccef6Sclaudio 13617ef8321bSclaudio /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */ 13627ef8321bSclaudio p->flags &= ~PREFIX_FLAG_MASK; 136303e83acfSclaudio p->lastchange = getmonotime(); 1364be474b4fSclaudio 13656df2f818Sclaudio if (peer_is_up(peer)) { 13667b2cffc2Sclaudio p->flags |= PREFIX_FLAG_WITHDRAW; 13676df2f818Sclaudio if (RB_INSERT(prefix_tree, &peer->withdraws[p->pt->aid], 13686df2f818Sclaudio p) != NULL) 1369be474b4fSclaudio fatalx("%s: RB tree invariant violated", __func__); 137082625ff8Sclaudio peer->stats.pending_withdraw++; 13716df2f818Sclaudio } else { 13726df2f818Sclaudio /* mark prefix dead to skip unlink on destroy */ 13736df2f818Sclaudio p->flags |= PREFIX_FLAG_DEAD; 13746df2f818Sclaudio prefix_adjout_destroy(p); 13756df2f818Sclaudio } 1376be474b4fSclaudio } 1377be474b4fSclaudio 13787b2cffc2Sclaudio void 13797b2cffc2Sclaudio prefix_adjout_destroy(struct prefix *p) 13807b2cffc2Sclaudio { 13817b2cffc2Sclaudio struct rde_peer *peer = prefix_peer(p); 13827b2cffc2Sclaudio 13832b7b6505Sclaudio if ((p->flags & PREFIX_FLAG_ADJOUT) == 0) 13842b7b6505Sclaudio fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__); 13852b7b6505Sclaudio 138665ab7798Sclaudio if (p->flags & PREFIX_FLAG_EOR) { 13877b2cffc2Sclaudio /* EOR marker is not linked in the index */ 13887b2cffc2Sclaudio prefix_free(p); 13897b2cffc2Sclaudio return; 13907b2cffc2Sclaudio } 13917b2cffc2Sclaudio 1392957262a2Sclaudio if (p->flags & PREFIX_FLAG_WITHDRAW) { 13937b2cffc2Sclaudio RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p); 139482625ff8Sclaudio peer->stats.pending_withdraw--; 1395957262a2Sclaudio } 1396957262a2Sclaudio if (p->flags & PREFIX_FLAG_UPDATE) { 13977b2cffc2Sclaudio RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p); 139882625ff8Sclaudio peer->stats.pending_update--; 1399957262a2Sclaudio } 140035dccef6Sclaudio /* unlink prefix if it was linked (not a withdraw or dead) */ 1401957262a2Sclaudio if ((p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) == 0) { 140235dccef6Sclaudio prefix_unlink(p); 140382625ff8Sclaudio peer->stats.prefix_out_cnt--; 1404957262a2Sclaudio } 140535dccef6Sclaudio 14067ef8321bSclaudio /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */ 14077b2cffc2Sclaudio p->flags &= ~PREFIX_FLAG_MASK; 14087b2cffc2Sclaudio 14097b2cffc2Sclaudio if (prefix_is_locked(p)) { 141035dccef6Sclaudio /* mark prefix dead but leave it for prefix_restart */ 14117b2cffc2Sclaudio p->flags |= PREFIX_FLAG_DEAD; 141235dccef6Sclaudio } else { 14137b2cffc2Sclaudio RB_REMOVE(prefix_index, &peer->adj_rib_out, p); 141435dccef6Sclaudio /* remove the last prefix reference before free */ 141535dccef6Sclaudio pt_unref(p->pt); 14167b2cffc2Sclaudio prefix_free(p); 14177b2cffc2Sclaudio } 141835dccef6Sclaudio } 14197b2cffc2Sclaudio 1420442a0320Sclaudio void 1421442a0320Sclaudio prefix_adjout_flush_pending(struct rde_peer *peer) 1422442a0320Sclaudio { 1423442a0320Sclaudio struct prefix *p, *np; 1424442a0320Sclaudio uint8_t aid; 1425442a0320Sclaudio 1426442a0320Sclaudio for (aid = AID_MIN; aid < AID_MAX; aid++) { 1427442a0320Sclaudio RB_FOREACH_SAFE(p, prefix_tree, &peer->withdraws[aid], np) { 1428442a0320Sclaudio prefix_adjout_destroy(p); 1429442a0320Sclaudio } 1430442a0320Sclaudio RB_FOREACH_SAFE(p, prefix_tree, &peer->updates[aid], np) { 1431442a0320Sclaudio p->flags &= ~PREFIX_FLAG_UPDATE; 1432442a0320Sclaudio RB_REMOVE(prefix_tree, &peer->updates[aid], p); 1433442a0320Sclaudio peer->stats.pending_update--; 1434442a0320Sclaudio } 1435442a0320Sclaudio } 1436442a0320Sclaudio } 1437442a0320Sclaudio 1438f6662524Sclaudio int 1439f6662524Sclaudio prefix_adjout_reaper(struct rde_peer *peer) 1440f6662524Sclaudio { 1441f6662524Sclaudio struct prefix *p, *np; 1442f6662524Sclaudio int count = RDE_REAPER_ROUNDS; 1443f6662524Sclaudio 1444f6662524Sclaudio RB_FOREACH_SAFE(p, prefix_index, &peer->adj_rib_out, np) { 1445f6662524Sclaudio prefix_adjout_destroy(p); 1446f6662524Sclaudio if (count-- <= 0) 1447f6662524Sclaudio return 0; 1448f6662524Sclaudio } 1449f6662524Sclaudio return 1; 1450f6662524Sclaudio } 1451f6662524Sclaudio 145205c83d2bSclaudio static struct prefix * 145305c83d2bSclaudio prefix_restart(struct rib_context *ctx) 145405c83d2bSclaudio { 145576c3be87Sclaudio struct prefix *p = NULL; 145605c83d2bSclaudio 145776c3be87Sclaudio if (ctx->ctx_p) 145805c83d2bSclaudio p = prefix_unlock(ctx->ctx_p); 145905c83d2bSclaudio 146076c3be87Sclaudio if (p && prefix_is_dead(p)) { 146105c83d2bSclaudio struct prefix *next; 146205c83d2bSclaudio 146305c83d2bSclaudio next = RB_NEXT(prefix_index, unused, p); 146405c83d2bSclaudio prefix_adjout_destroy(p); 146505c83d2bSclaudio p = next; 146605c83d2bSclaudio } 146705c83d2bSclaudio ctx->ctx_p = NULL; 146805c83d2bSclaudio return p; 146905c83d2bSclaudio } 147005c83d2bSclaudio 14717b2cffc2Sclaudio static void 14727b2cffc2Sclaudio prefix_dump_r(struct rib_context *ctx) 14737b2cffc2Sclaudio { 14747b2cffc2Sclaudio struct prefix *p, *next; 14757b2cffc2Sclaudio struct rde_peer *peer; 14767b2cffc2Sclaudio unsigned int i; 14777b2cffc2Sclaudio 14787b2cffc2Sclaudio if ((peer = peer_get(ctx->ctx_id)) == NULL) 14797b2cffc2Sclaudio goto done; 14807b2cffc2Sclaudio 148176c3be87Sclaudio if (ctx->ctx_p == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) 14827b2cffc2Sclaudio p = RB_MIN(prefix_index, &peer->adj_rib_out); 14837b2cffc2Sclaudio else 14847b2cffc2Sclaudio p = prefix_restart(ctx); 14857b2cffc2Sclaudio 14867b2cffc2Sclaudio for (i = 0; p != NULL; p = next) { 14877b2cffc2Sclaudio next = RB_NEXT(prefix_index, unused, p); 14887b2cffc2Sclaudio if (prefix_is_dead(p)) 14897b2cffc2Sclaudio continue; 14907b2cffc2Sclaudio if (ctx->ctx_aid != AID_UNSPEC && 14917b2cffc2Sclaudio ctx->ctx_aid != p->pt->aid) 14927b2cffc2Sclaudio continue; 149376c3be87Sclaudio if (ctx->ctx_subtree.aid != AID_UNSPEC) { 149476c3be87Sclaudio struct bgpd_addr addr; 149576c3be87Sclaudio pt_getaddr(p->pt, &addr); 149676c3be87Sclaudio if (prefix_compare(&ctx->ctx_subtree, &addr, 149776c3be87Sclaudio ctx->ctx_subtreelen) != 0) 149876c3be87Sclaudio /* left subtree, walk is done */ 149976c3be87Sclaudio break; 150076c3be87Sclaudio } 15017b2cffc2Sclaudio if (ctx->ctx_count && i++ >= ctx->ctx_count && 15027b2cffc2Sclaudio !prefix_is_locked(p)) { 15037b2cffc2Sclaudio /* store and lock last element */ 1504091e7b27Sclaudio ctx->ctx_p = prefix_lock(p); 15057b2cffc2Sclaudio return; 15067b2cffc2Sclaudio } 15077b2cffc2Sclaudio ctx->ctx_prefix_call(p, ctx->ctx_arg); 15087b2cffc2Sclaudio } 15097b2cffc2Sclaudio 15107b2cffc2Sclaudio done: 15117b2cffc2Sclaudio if (ctx->ctx_done) 15127b2cffc2Sclaudio ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); 15137b2cffc2Sclaudio LIST_REMOVE(ctx, entry); 15147b2cffc2Sclaudio free(ctx); 15157b2cffc2Sclaudio } 15167b2cffc2Sclaudio 15177b2cffc2Sclaudio int 151839386878Sclaudio prefix_dump_new(struct rde_peer *peer, uint8_t aid, unsigned int count, 15197b2cffc2Sclaudio void *arg, void (*upcall)(struct prefix *, void *), 152039386878Sclaudio void (*done)(void *, uint8_t), int (*throttle)(void *)) 15217b2cffc2Sclaudio { 15227b2cffc2Sclaudio struct rib_context *ctx; 15237b2cffc2Sclaudio 15247b2cffc2Sclaudio if ((ctx = calloc(1, sizeof(*ctx))) == NULL) 15257b2cffc2Sclaudio return -1; 15267b2cffc2Sclaudio ctx->ctx_id = peer->conf.id; 15277b2cffc2Sclaudio ctx->ctx_aid = aid; 15287b2cffc2Sclaudio ctx->ctx_count = count; 15297b2cffc2Sclaudio ctx->ctx_arg = arg; 15307b2cffc2Sclaudio ctx->ctx_prefix_call = upcall; 15317b2cffc2Sclaudio ctx->ctx_done = done; 15327b2cffc2Sclaudio ctx->ctx_throttle = throttle; 15337b2cffc2Sclaudio 15347b2cffc2Sclaudio LIST_INSERT_HEAD(&rib_dumps, ctx, entry); 15357b2cffc2Sclaudio 15367b2cffc2Sclaudio /* requested a sync traversal */ 15377b2cffc2Sclaudio if (count == 0) 15387b2cffc2Sclaudio prefix_dump_r(ctx); 15397b2cffc2Sclaudio 15407b2cffc2Sclaudio return 0; 15417b2cffc2Sclaudio } 1542be474b4fSclaudio 154376c3be87Sclaudio int 154476c3be87Sclaudio prefix_dump_subtree(struct rde_peer *peer, struct bgpd_addr *subtree, 154576c3be87Sclaudio uint8_t subtreelen, unsigned int count, void *arg, 154676c3be87Sclaudio void (*upcall)(struct prefix *, void *), void (*done)(void *, uint8_t), 154776c3be87Sclaudio int (*throttle)(void *)) 154876c3be87Sclaudio { 154976c3be87Sclaudio struct rib_context *ctx; 155076c3be87Sclaudio struct prefix xp; 155176c3be87Sclaudio 155276c3be87Sclaudio if ((ctx = calloc(1, sizeof(*ctx))) == NULL) 155376c3be87Sclaudio return -1; 155476c3be87Sclaudio ctx->ctx_id = peer->conf.id; 155576c3be87Sclaudio ctx->ctx_aid = subtree->aid; 155676c3be87Sclaudio ctx->ctx_count = count; 155776c3be87Sclaudio ctx->ctx_arg = arg; 155876c3be87Sclaudio ctx->ctx_prefix_call = upcall; 155976c3be87Sclaudio ctx->ctx_done = done; 156076c3be87Sclaudio ctx->ctx_throttle = throttle; 156176c3be87Sclaudio ctx->ctx_subtree = *subtree; 156276c3be87Sclaudio ctx->ctx_subtreelen = subtreelen; 156376c3be87Sclaudio 156476c3be87Sclaudio LIST_INSERT_HEAD(&rib_dumps, ctx, entry); 156576c3be87Sclaudio 156676c3be87Sclaudio /* lookup start of subtree */ 156776c3be87Sclaudio memset(&xp, 0, sizeof(xp)); 156876c3be87Sclaudio xp.pt = pt_fill(subtree, subtreelen); 156976c3be87Sclaudio ctx->ctx_p = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp); 157076c3be87Sclaudio if (ctx->ctx_p) 157176c3be87Sclaudio prefix_lock(ctx->ctx_p); 157276c3be87Sclaudio 157376c3be87Sclaudio /* requested a sync traversal */ 157476c3be87Sclaudio if (count == 0) 157576c3be87Sclaudio prefix_dump_r(ctx); 157676c3be87Sclaudio 157776c3be87Sclaudio return 0; 157876c3be87Sclaudio } 157976c3be87Sclaudio 1580a16c0992Shenning /* 1581494ab9efSclaudio * Searches in the prefix list of specified rib_entry for a prefix entry 1582a16c0992Shenning * belonging to the peer peer. Returns NULL if no match found. 1583a16c0992Shenning */ 1584a16c0992Shenning struct prefix * 158539386878Sclaudio prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, uint32_t path_id) 1586a16c0992Shenning { 1587a16c0992Shenning struct prefix *p; 1588a16c0992Shenning 1589df08e9a0Sclaudio TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) 159029b527fbSclaudio if (prefix_peer(p) == peer && p->path_id == path_id) 1591abb644cbSclaudio return (p); 1592abb644cbSclaudio return (NULL); 1593a16c0992Shenning } 1594a16c0992Shenning 159503e02783Sclaudio /* kill a prefix. */ 1596a16c0992Shenning void 1597a16c0992Shenning prefix_destroy(struct prefix *p) 1598a16c0992Shenning { 15997b2cffc2Sclaudio /* make route decision */ 16002b7b6505Sclaudio prefix_evaluate(prefix_re(p), NULL, p); 16017b2cffc2Sclaudio 1602a16c0992Shenning prefix_unlink(p); 1603a16c0992Shenning prefix_free(p); 1604a16c0992Shenning } 1605a16c0992Shenning 1606a16c0992Shenning /* 1607a16c0992Shenning * Link a prefix into the different parent objects. 1608a16c0992Shenning */ 1609a16c0992Shenning static void 161035dccef6Sclaudio prefix_link(struct prefix *p, struct rib_entry *re, struct pt_entry *pt, 1611ca69ff0dSclaudio struct rde_peer *peer, uint32_t path_id, uint32_t path_id_tx, 1612ca69ff0dSclaudio struct rde_aspath *asp, struct rde_community *comm, 1613ca69ff0dSclaudio struct nexthop *nexthop, uint8_t nhflags, uint8_t vstate) 1614a16c0992Shenning { 161535dccef6Sclaudio if (re) 16162b7b6505Sclaudio p->entry.list.re = re; 16173f1c6356Sclaudio p->aspath = path_ref(asp); 1618e2c0fe86Sclaudio p->communities = communities_ref(comm); 16193f1c6356Sclaudio p->peer = peer; 162035dccef6Sclaudio p->pt = pt_ref(pt); 162129b527fbSclaudio p->path_id = path_id; 1622ca69ff0dSclaudio p->path_id_tx = path_id_tx; 16233f1c6356Sclaudio p->validation_state = vstate; 16243f1c6356Sclaudio p->nhflags = nhflags; 1625e7adcfeaSclaudio p->nexthop = nexthop_ref(nexthop); 1626be474b4fSclaudio nexthop_link(p); 162703e83acfSclaudio p->lastchange = getmonotime(); 1628a16c0992Shenning } 1629a16c0992Shenning 1630a16c0992Shenning /* 1631a16c0992Shenning * Unlink a prefix from the different parent objects. 1632a16c0992Shenning */ 1633a16c0992Shenning static void 1634be474b4fSclaudio prefix_unlink(struct prefix *p) 1635a16c0992Shenning { 16362b7b6505Sclaudio struct rib_entry *re = prefix_re(p); 1637be474b4fSclaudio 1638a16c0992Shenning /* destroy all references to other objects */ 163935dccef6Sclaudio /* remove nexthop ref ... */ 1640be474b4fSclaudio nexthop_unlink(p); 1641e2c0fe86Sclaudio nexthop_unref(p->nexthop); 1642be474b4fSclaudio p->nexthop = NULL; 164335dccef6Sclaudio p->nhflags = 0; 164435dccef6Sclaudio /* ... communities ... */ 164535dccef6Sclaudio communities_unref(p->communities); 164635dccef6Sclaudio p->communities = NULL; 164735dccef6Sclaudio /* and unlink from aspath */ 164835dccef6Sclaudio path_unref(p->aspath); 1649be474b4fSclaudio p->aspath = NULL; 1650a16c0992Shenning 16517b2cffc2Sclaudio if (re && rib_empty(re)) 16523f1c6356Sclaudio rib_remove(re); 16533f1c6356Sclaudio 165435dccef6Sclaudio pt_unref(p->pt); 1655a16c0992Shenning } 1656a16c0992Shenning 16577b2cffc2Sclaudio /* alloc and zero new entry. May not fail. */ 1658a16c0992Shenning static struct prefix * 1659a16c0992Shenning prefix_alloc(void) 1660a16c0992Shenning { 1661a16c0992Shenning struct prefix *p; 1662a16c0992Shenning 1663a16c0992Shenning p = calloc(1, sizeof(*p)); 1664a16c0992Shenning if (p == NULL) 1665f87a5020Shenning fatal("prefix_alloc"); 16666b5f6ff8Sclaudio rdemem.prefix_cnt++; 1667a16c0992Shenning return p; 1668a16c0992Shenning } 1669a16c0992Shenning 1670a16c0992Shenning /* free a unlinked entry */ 1671a16c0992Shenning static void 1672be474b4fSclaudio prefix_free(struct prefix *p) 1673a16c0992Shenning { 16746b5f6ff8Sclaudio rdemem.prefix_cnt--; 1675be474b4fSclaudio free(p); 1676a16c0992Shenning } 1677a16c0992Shenning 1678f8fa1458Sclaudio /* 1679f8fa1458Sclaudio * nexthop functions 1680f8fa1458Sclaudio */ 1681af6b2e43Sclaudio struct nexthop *nexthop_lookup(struct bgpd_addr *); 1682a16c0992Shenning 16836b713a0bSclaudio /* 16846b713a0bSclaudio * In BGP there exist two nexthops: the exit nexthop which was announced via 16856b713a0bSclaudio * BGP and the true nexthop which is used in the FIB -- forward information 16866b713a0bSclaudio * base a.k.a kernel routing table. When sending updates it is even more 16876b713a0bSclaudio * confusing. In IBGP we pass the unmodified exit nexthop to the neighbors 1688c5508ee4Sclaudio * while in EBGP normally the address of the router is sent. The exit nexthop 16896b713a0bSclaudio * may be passed to the external neighbor if the neighbor and the exit nexthop 16906b713a0bSclaudio * reside in the same subnet -- directly connected. 16916b713a0bSclaudio */ 1692a16c0992Shenning 1693c03dc58fSclaudio TAILQ_HEAD(nexthop_queue, nexthop) nexthop_runners = 1694c03dc58fSclaudio TAILQ_HEAD_INITIALIZER(nexthop_runners); 1695f0cb2ce5Stedu 1696c03dc58fSclaudio RB_HEAD(nexthop_tree, nexthop) nexthoptable = 1697c03dc58fSclaudio RB_INITIALIZER(&nexthoptree); 1698cb510c7bSclaudio 1699cb510c7bSclaudio static inline int nexthop_cmp(struct nexthop *, struct nexthop *); 1700cb510c7bSclaudio 1701cb510c7bSclaudio RB_GENERATE_STATIC(nexthop_tree, nexthop, entry, nexthop_cmp); 1702a16c0992Shenning 1703a16c0992Shenning void 17046f08c262Sclaudio nexthop_shutdown(void) 17056f08c262Sclaudio { 1706f1c683b3Sclaudio struct nexthop *nh, *nnh; 17076f08c262Sclaudio 1708c03dc58fSclaudio RB_FOREACH_SAFE(nh, nexthop_tree, &nexthoptable, nnh) { 1709f1c683b3Sclaudio nh->state = NEXTHOP_UNREACH; 1710e2c0fe86Sclaudio nexthop_unref(nh); 1711f1c683b3Sclaudio } 1712c03dc58fSclaudio 1713c03dc58fSclaudio RB_FOREACH(nh, nexthop_tree, &nexthoptable) { 1714c03dc58fSclaudio log_warnx("%s: nexthop %s refcnt %d", __func__, 1715ff377642Sclaudio log_addr(&nh->exit_nexthop), nh->refcnt); 1716ff377642Sclaudio } 1717f1c683b3Sclaudio } 17186f08c262Sclaudio 17193f1c6356Sclaudio int 17203f1c6356Sclaudio nexthop_pending(void) 17213f1c6356Sclaudio { 17223f1c6356Sclaudio return !TAILQ_EMPTY(&nexthop_runners); 17233f1c6356Sclaudio } 17243f1c6356Sclaudio 17253f1c6356Sclaudio void 17263f1c6356Sclaudio nexthop_runner(void) 17273f1c6356Sclaudio { 17283f1c6356Sclaudio struct nexthop *nh; 17293f1c6356Sclaudio struct prefix *p; 173039386878Sclaudio uint32_t j; 17313f1c6356Sclaudio 17323f1c6356Sclaudio nh = TAILQ_FIRST(&nexthop_runners); 17333f1c6356Sclaudio if (nh == NULL) 17343f1c6356Sclaudio return; 17353f1c6356Sclaudio 17363f1c6356Sclaudio /* remove from runnner queue */ 17373f1c6356Sclaudio TAILQ_REMOVE(&nexthop_runners, nh, runner_l); 17383f1c6356Sclaudio 17393f1c6356Sclaudio p = nh->next_prefix; 17403f1c6356Sclaudio for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) { 174113d31ce9Sclaudio prefix_evaluate_nexthop(p, nh->state, nh->oldstate); 17427b2cffc2Sclaudio p = LIST_NEXT(p, entry.list.nexthop); 17433f1c6356Sclaudio } 17443f1c6356Sclaudio 17453f1c6356Sclaudio /* prep for next run, if not finished readd to tail of queue */ 17463f1c6356Sclaudio nh->next_prefix = p; 17473f1c6356Sclaudio if (p != NULL) 17483f1c6356Sclaudio TAILQ_INSERT_TAIL(&nexthop_runners, nh, runner_l); 17493f1c6356Sclaudio else 17503f1c6356Sclaudio log_debug("nexthop %s update finished", 17513f1c6356Sclaudio log_addr(&nh->exit_nexthop)); 17523f1c6356Sclaudio } 17533f1c6356Sclaudio 17546f08c262Sclaudio void 17556b713a0bSclaudio nexthop_update(struct kroute_nexthop *msg) 1756a16c0992Shenning { 1757a16c0992Shenning struct nexthop *nh; 1758a16c0992Shenning 1759af6b2e43Sclaudio nh = nexthop_lookup(&msg->nexthop); 17606b713a0bSclaudio if (nh == NULL) { 1761f1c683b3Sclaudio log_warnx("nexthop_update: non-existent nexthop %s", 1762f1c683b3Sclaudio log_addr(&msg->nexthop)); 17636b713a0bSclaudio return; 17646b713a0bSclaudio } 17656b713a0bSclaudio 17663f1c6356Sclaudio nh->oldstate = nh->state; 176742328f4dSclaudio if (msg->valid) 176842328f4dSclaudio nh->state = NEXTHOP_REACH; 176942328f4dSclaudio else 177042328f4dSclaudio nh->state = NEXTHOP_UNREACH; 177142328f4dSclaudio 17723f1c6356Sclaudio if (nh->oldstate == NEXTHOP_LOOKUP) 17733f1c6356Sclaudio /* drop reference which was hold during the lookup */ 1774e2c0fe86Sclaudio if (nexthop_unref(nh)) 17753f1c6356Sclaudio return; /* nh lost last ref, no work left */ 17763f1c6356Sclaudio 177779ec894cSclaudio if (nh->next_prefix) { 17783f1c6356Sclaudio /* 17793f1c6356Sclaudio * If nexthop_runner() is not finished with this nexthop 17803f1c6356Sclaudio * then ensure that all prefixes are updated by setting 17813f1c6356Sclaudio * the oldstate to NEXTHOP_FLAPPED. 17823f1c6356Sclaudio */ 17833f1c6356Sclaudio nh->oldstate = NEXTHOP_FLAPPED; 178479ec894cSclaudio TAILQ_REMOVE(&nexthop_runners, nh, runner_l); 178579ec894cSclaudio } 17866b713a0bSclaudio 178759a54550Sclaudio if (msg->connected) 1788af6b2e43Sclaudio nh->flags |= NEXTHOP_CONNECTED; 1789f8124012Shenning 179059a54550Sclaudio nh->true_nexthop = msg->gateway; 179147bbf013Sclaudio nh->nexthop_net = msg->net; 1792d24b9087Sclaudio nh->nexthop_netlen = msg->netlen; 17937802328aSclaudio 17943f1c6356Sclaudio nh->next_prefix = LIST_FIRST(&nh->prefix_h); 1795099b8a26Sclaudio if (nh->next_prefix != NULL) { 17963f1c6356Sclaudio TAILQ_INSERT_HEAD(&nexthop_runners, nh, runner_l); 1797099b8a26Sclaudio log_debug("nexthop %s update starting", 1798099b8a26Sclaudio log_addr(&nh->exit_nexthop)); 1799099b8a26Sclaudio } 1800a16c0992Shenning } 1801a16c0992Shenning 1802af6b2e43Sclaudio void 180339386878Sclaudio nexthop_modify(struct nexthop *setnh, enum action_types type, uint8_t aid, 180439386878Sclaudio struct nexthop **nexthop, uint8_t *flags) 1805a16c0992Shenning { 180669797618Sclaudio switch (type) { 180769797618Sclaudio case ACTION_SET_NEXTHOP_REJECT: 180840222badSclaudio *flags = NEXTHOP_REJECT; 180969797618Sclaudio break; 181069797618Sclaudio case ACTION_SET_NEXTHOP_BLACKHOLE: 181140222badSclaudio *flags = NEXTHOP_BLACKHOLE; 181269797618Sclaudio break; 181369797618Sclaudio case ACTION_SET_NEXTHOP_NOMODIFY: 181440222badSclaudio *flags = NEXTHOP_NOMODIFY; 181569797618Sclaudio break; 181669797618Sclaudio case ACTION_SET_NEXTHOP_SELF: 181740222badSclaudio *flags = NEXTHOP_SELF; 181869797618Sclaudio break; 181923676e2aSclaudio case ACTION_SET_NEXTHOP_REF: 18208b79bb18Sclaudio /* 18218b79bb18Sclaudio * it is possible that a prefix matches but has the wrong 18228b79bb18Sclaudio * address family for the set nexthop. In this case ignore it. 18238b79bb18Sclaudio */ 18248b79bb18Sclaudio if (aid != setnh->exit_nexthop.aid) 18258b79bb18Sclaudio break; 1826e2c0fe86Sclaudio nexthop_unref(*nexthop); 18278b79bb18Sclaudio *nexthop = nexthop_ref(setnh); 182840222badSclaudio *flags = 0; 182969797618Sclaudio break; 183069797618Sclaudio default: 183169797618Sclaudio break; 183269797618Sclaudio } 1833af6b2e43Sclaudio } 1834af6b2e43Sclaudio 1835af6b2e43Sclaudio void 183640222badSclaudio nexthop_link(struct prefix *p) 1837af6b2e43Sclaudio { 183813d31ce9Sclaudio p->nhflags &= ~NEXTHOP_VALID; 183913d31ce9Sclaudio 184013d31ce9Sclaudio if (p->flags & PREFIX_FLAG_ADJOUT) { 184113d31ce9Sclaudio /* All nexthops are valid in Adj-RIB-Out */ 184213d31ce9Sclaudio p->nhflags |= NEXTHOP_VALID; 1843af6b2e43Sclaudio return; 184413d31ce9Sclaudio } 1845af6b2e43Sclaudio 18463f1c6356Sclaudio /* no need to link prefixes in RIBs that have no decision process */ 18472b7b6505Sclaudio if (re_rib(prefix_re(p))->flags & F_RIB_NOEVALUATE) 18483f1c6356Sclaudio return; 18493f1c6356Sclaudio 185013d31ce9Sclaudio /* self-announce networks use nexthop NULL and are valid as well */ 185113d31ce9Sclaudio if (p->nexthop == NULL || p->nexthop->state == NEXTHOP_REACH) 185213d31ce9Sclaudio p->nhflags |= NEXTHOP_VALID; 185313d31ce9Sclaudio 185413d31ce9Sclaudio if (p->nexthop == NULL) 185513d31ce9Sclaudio return; 18567b2cffc2Sclaudio p->flags |= PREFIX_NEXTHOP_LINKED; 18577b2cffc2Sclaudio LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, entry.list.nexthop); 1858af6b2e43Sclaudio } 1859af6b2e43Sclaudio 1860af6b2e43Sclaudio void 186140222badSclaudio nexthop_unlink(struct prefix *p) 1862af6b2e43Sclaudio { 186313d31ce9Sclaudio p->nhflags &= ~NEXTHOP_VALID; 186413d31ce9Sclaudio 18657b2cffc2Sclaudio if (p->nexthop == NULL || (p->flags & PREFIX_NEXTHOP_LINKED) == 0) 18663f1c6356Sclaudio return; 18673f1c6356Sclaudio 186879ec894cSclaudio if (p == p->nexthop->next_prefix) { 18697b2cffc2Sclaudio p->nexthop->next_prefix = LIST_NEXT(p, entry.list.nexthop); 187079ec894cSclaudio /* remove nexthop from list if no prefixes left to update */ 1871099b8a26Sclaudio if (p->nexthop->next_prefix == NULL) { 187279ec894cSclaudio TAILQ_REMOVE(&nexthop_runners, p->nexthop, runner_l); 1873099b8a26Sclaudio log_debug("nexthop %s update finished", 1874099b8a26Sclaudio log_addr(&p->nexthop->exit_nexthop)); 1875099b8a26Sclaudio } 187679ec894cSclaudio } 18773f1c6356Sclaudio 18787b2cffc2Sclaudio p->flags &= ~PREFIX_NEXTHOP_LINKED; 18797b2cffc2Sclaudio LIST_REMOVE(p, entry.list.nexthop); 1880af6b2e43Sclaudio } 1881af6b2e43Sclaudio 1882af6b2e43Sclaudio struct nexthop * 1883cdf934b4Sclaudio nexthop_get(struct bgpd_addr *nexthop) 1884af6b2e43Sclaudio { 1885af6b2e43Sclaudio struct nexthop *nh; 1886af6b2e43Sclaudio 1887af6b2e43Sclaudio nh = nexthop_lookup(nexthop); 1888af6b2e43Sclaudio if (nh == NULL) { 1889a16c0992Shenning nh = calloc(1, sizeof(*nh)); 1890a16c0992Shenning if (nh == NULL) 1891cb510c7bSclaudio fatal("nexthop_get"); 18926b5f6ff8Sclaudio rdemem.nexthop_cnt++; 1893af6b2e43Sclaudio 189440222badSclaudio LIST_INIT(&nh->prefix_h); 1895af6b2e43Sclaudio nh->state = NEXTHOP_LOOKUP; 1896ff377642Sclaudio nexthop_ref(nh); /* take reference for lookup */ 1897af6b2e43Sclaudio nh->exit_nexthop = *nexthop; 1898c03dc58fSclaudio if (RB_INSERT(nexthop_tree, &nexthoptable, nh) != NULL) 1899c03dc58fSclaudio fatalx("nexthop tree inconsistency"); 1900af6b2e43Sclaudio 1901cdf934b4Sclaudio rde_send_nexthop(&nh->exit_nexthop, 1); 1902a16c0992Shenning } 1903a16c0992Shenning 1904ff377642Sclaudio return nexthop_ref(nh); 1905ff377642Sclaudio } 1906ff377642Sclaudio 1907ff377642Sclaudio struct nexthop * 1908ff377642Sclaudio nexthop_ref(struct nexthop *nexthop) 1909ff377642Sclaudio { 1910ff377642Sclaudio if (nexthop) 1911ff377642Sclaudio nexthop->refcnt++; 1912ff377642Sclaudio return (nexthop); 1913ff377642Sclaudio } 1914ff377642Sclaudio 1915ff377642Sclaudio int 1916e2c0fe86Sclaudio nexthop_unref(struct nexthop *nh) 1917ff377642Sclaudio { 1918ff377642Sclaudio if (nh == NULL) 1919ff377642Sclaudio return (0); 1920ff377642Sclaudio if (--nh->refcnt > 0) 1921ff377642Sclaudio return (0); 1922ff377642Sclaudio 1923ff377642Sclaudio /* sanity check */ 192440222badSclaudio if (!LIST_EMPTY(&nh->prefix_h) || nh->state == NEXTHOP_LOOKUP) 19253f1c6356Sclaudio fatalx("%s: refcnt error", __func__); 19263f1c6356Sclaudio 19273f1c6356Sclaudio /* is nexthop update running? impossible, that is a refcnt error */ 19283f1c6356Sclaudio if (nh->next_prefix) 19293f1c6356Sclaudio fatalx("%s: next_prefix not NULL", __func__); 1930ff377642Sclaudio 1931c03dc58fSclaudio RB_REMOVE(nexthop_tree, &nexthoptable, nh); 1932ff377642Sclaudio rde_send_nexthop(&nh->exit_nexthop, 0); 1933ff377642Sclaudio 1934ff377642Sclaudio rdemem.nexthop_cnt--; 1935ff377642Sclaudio free(nh); 1936ff377642Sclaudio return (1); 1937af6b2e43Sclaudio } 1938a16c0992Shenning 1939cb510c7bSclaudio static inline int 1940cb510c7bSclaudio nexthop_cmp(struct nexthop *na, struct nexthop *nb) 1941af6b2e43Sclaudio { 1942af6b2e43Sclaudio struct bgpd_addr *a, *b; 1943af6b2e43Sclaudio 194413aebe71Sclaudio if (na == nb) 1945af6b2e43Sclaudio return (0); 1946af6b2e43Sclaudio if (na == NULL) 1947af6b2e43Sclaudio return (-1); 1948af6b2e43Sclaudio if (nb == NULL) 1949af6b2e43Sclaudio return (1); 1950af6b2e43Sclaudio 1951af6b2e43Sclaudio a = &na->exit_nexthop; 1952af6b2e43Sclaudio b = &nb->exit_nexthop; 1953af6b2e43Sclaudio 1954d6c2e4e8Sclaudio if (a->aid != b->aid) 1955d6c2e4e8Sclaudio return (a->aid - b->aid); 1956af6b2e43Sclaudio 1957d6c2e4e8Sclaudio switch (a->aid) { 1958d6c2e4e8Sclaudio case AID_INET: 1959af6b2e43Sclaudio if (ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr)) 1960af6b2e43Sclaudio return (1); 1961af6b2e43Sclaudio if (ntohl(a->v4.s_addr) < ntohl(b->v4.s_addr)) 1962af6b2e43Sclaudio return (-1); 1963af6b2e43Sclaudio return (0); 1964d6c2e4e8Sclaudio case AID_INET6: 1965af6b2e43Sclaudio return (memcmp(&a->v6, &b->v6, sizeof(struct in6_addr))); 1966af6b2e43Sclaudio default: 1967cf5008fdSclaudio fatalx("nexthop_cmp: %s is unsupported", aid2str(a->aid)); 1968af6b2e43Sclaudio } 1969af6b2e43Sclaudio return (-1); 1970af6b2e43Sclaudio } 1971af6b2e43Sclaudio 1972af6b2e43Sclaudio struct nexthop * 1973af6b2e43Sclaudio nexthop_lookup(struct bgpd_addr *nexthop) 1974af6b2e43Sclaudio { 1975c03dc58fSclaudio struct nexthop needle = { 0 }; 1976af6b2e43Sclaudio 1977c03dc58fSclaudio needle.exit_nexthop = *nexthop; 1978c03dc58fSclaudio return RB_FIND(nexthop_tree, &nexthoptable, &needle); 1979a16c0992Shenning } 1980