1 /* $OpenBSD: rde_decide.c,v 1.52 2009/04/23 19:23:27 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> 5 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 #include <sys/queue.h> 22 23 #include <string.h> 24 25 #include "bgpd.h" 26 #include "rde.h" 27 28 int prefix_cmp(struct prefix *, struct prefix *); 29 /* 30 * Decision Engine RFC implementation: 31 * Phase 1: 32 * - calculate LOCAL_PREF if needed -- EBGP or IGP learnt routes 33 * - IBGP routes may either use LOCAL_PREF or the local system computes 34 * the degree of preference 35 * - If the route is ineligible, the route MAY NOT serve as an input to 36 * the next phase of route selection 37 * - if the route is eligible the computed value MUST be used as the 38 * LOCAL_PREF value in any IBGP readvertisement 39 * 40 * Phase 2: 41 * - If the NEXT_HOP attribute of a BGP route depicts an address that is 42 * not resolvable the BGP route MUST be excluded from the Phase 2 decision 43 * function. 44 * - If the AS_PATH attribute of a BGP route contains an AS loop, the BGP 45 * route should be excluded from the Phase 2 decision function. 46 * - The local BGP speaker identifies the route that has: 47 * a) the highest degree of preference of any route to the same set 48 * of destinations 49 * b) is the only route to that destination 50 * c) is selected as a result of the Phase 2 tie breaking rules 51 * - The local speaker MUST determine the immediate next-hop address from 52 * the NEXT_HOP attribute of the selected route. 53 * - If either the immediate next hop or the IGP cost to the NEXT_HOP changes, 54 * Phase 2 Route Selection MUST be performed again. 55 * 56 * Route Resolvability Condition 57 * - A route Rte1, referencing only the intermediate network address, is 58 * considered resolvable if the Routing Table contains at least one 59 * resolvable route Rte2 that matches Rte1's intermediate network address 60 * and is not recursively resolved through Rte1. 61 * - Routes referencing interfaces are considered resolvable if the state of 62 * the referenced interface is up and IP processing is enabled. 63 * 64 * Breaking Ties (Phase 2) 65 * 1. Remove from consideration all routes which are not tied for having the 66 * smallest number of AS numbers present in their AS_PATH attributes. 67 * Note, that when counting this number, an AS_SET counts as 1 68 * 2. Remove from consideration all routes which are not tied for having the 69 * lowest Origin number in their Origin attribute. 70 * 3. Remove from consideration routes with less-preferred MULTI_EXIT_DISC 71 * attributes. MULTI_EXIT_DISC is only comparable between routes learned 72 * from the same neighboring AS. 73 * 4. If at least one of the candidate routes was received via EBGP, 74 * remove from consideration all routes which were received via IBGP. 75 * 5. Remove from consideration any routes with less-preferred interior cost. 76 * If the NEXT_HOP hop for a route is reachable, but no cost can be 77 * determined, then this step should be skipped. 78 * 6. Remove from consideration all routes other than the route that was 79 * advertised by the BGP speaker whose BGP Identifier has the lowest value. 80 * 7. Prefer the route received from the lowest peer address. 81 * 82 * Phase 3: Route Dissemination 83 * - All routes in the Loc-RIB are processed into Adj-RIBs-Out according 84 * to configured policy. A route SHALL NOT be installed in the Adj-Rib-Out 85 * unless the destination and NEXT_HOP described by this route may be 86 * forwarded appropriately by the Routing Table. 87 */ 88 89 /* 90 * Decision Engine OUR implementation: 91 * Our implementation has only one RIB. The filtering is done first. The 92 * filtering calculates the preference and stores it in LOCAL_PREF (Phase 1). 93 * Ineligible routes are flagged as ineligible via nexthop_add(). 94 * Phase 3 is done together with Phase 2. 95 * In following cases a prefix needs to be reevaluated: 96 * - update of a prefix (path_update) 97 * - withdraw of a prefix (prefix_remove) 98 * - state change of the nexthop (nexthop-{in}validate) 99 * - state change of session (session down) 100 */ 101 102 /* 103 * Compare two prefixes with equal pt_entry. Returns an integer greater than or 104 * less than 0, according to whether the prefix p1 is more or less preferred 105 * than the prefix p2. p1 should be used for the new prefix and p2 for a 106 * already added prefix. 107 */ 108 int 109 prefix_cmp(struct prefix *p1, struct prefix *p2) 110 { 111 struct rde_aspath *asp1, *asp2; 112 113 if (p1 == NULL) 114 return (-1); 115 if (p2 == NULL) 116 return (1); 117 118 /* only prefixes in the Local-RIB are eligible */ 119 if (!(p1->flags & F_LOCAL)) 120 return (-1); 121 if (!(p2->flags & F_LOCAL)) 122 return (1); 123 124 asp1 = p1->aspath; 125 asp2 = p2->aspath; 126 127 /* only loop free pathes are eligible */ 128 if (asp1->flags & F_ATTR_LOOP) 129 return (-1); 130 if (asp2->flags & F_ATTR_LOOP) 131 return (1); 132 133 /* 1. check if prefix is eligible a.k.a reachable */ 134 if (asp2->nexthop != NULL && asp2->nexthop->state != NEXTHOP_REACH) 135 return (1); 136 if (asp1->nexthop != NULL && asp1->nexthop->state != NEXTHOP_REACH) 137 return (-1); 138 139 /* 2. preference of prefix, bigger is better */ 140 if ((asp1->lpref - asp2->lpref) != 0) 141 return (asp1->lpref - asp2->lpref); 142 143 /* 3. aspath count, the shorter the better */ 144 if ((asp2->aspath->ascnt - asp1->aspath->ascnt) != 0) 145 return (asp2->aspath->ascnt - asp1->aspath->ascnt); 146 147 /* 4. origin, the lower the better */ 148 if ((asp2->origin - asp1->origin) != 0) 149 return (asp2->origin - asp1->origin); 150 151 /* 5. MED decision, only comparable between the same neighboring AS */ 152 if (rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS || 153 aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath)) 154 /* lowest value wins */ 155 if ((asp2->med - asp1->med) != 0) 156 return (asp2->med - asp1->med); 157 158 /* 159 * 6. EBGP is cooler than IBGP 160 * It is absolutely important that the ebgp value in peer_config.ebgp 161 * is bigger than all other ones (IBGP, confederations) 162 */ 163 if ((asp1->peer->conf.ebgp - asp2->peer->conf.ebgp) != 0) { 164 if (asp1->peer->conf.ebgp == 1) /* p1 is EBGP other is lower */ 165 return 1; 166 else if (asp2->peer->conf.ebgp == 1) /* p2 is EBGP */ 167 return -1; 168 } 169 170 /* 171 * 7. local tie-breaker, this weight is here to tip equal long AS 172 * paths in one or the other direction. It happens more and more 173 * that AS paths are equally long and so traffic engineering needs 174 * a metric that weights a prefix at a very late stage in the 175 * decision process. 176 */ 177 if ((asp1->weight - asp2->weight) != 0) 178 return (asp1->weight - asp2->weight); 179 180 /* 8. nexthop costs. NOT YET -> IGNORE */ 181 182 /* 183 * 9. older route (more stable) wins but only if route-age 184 * evaluation is enabled. 185 */ 186 if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE) 187 if ((p2->lastchange - p1->lastchange) != 0) 188 return (p2->lastchange - p1->lastchange); 189 190 /* 10. lowest BGP Id wins */ 191 if ((p2->aspath->peer->remote_bgpid - 192 p1->aspath->peer->remote_bgpid) != 0) 193 return (p2->aspath->peer->remote_bgpid - 194 p1->aspath->peer->remote_bgpid); 195 196 /* 11. lowest peer address wins (IPv4 is better than IPv6) */ 197 if (memcmp(&p1->aspath->peer->remote_addr, 198 &p2->aspath->peer->remote_addr, 199 sizeof(p1->aspath->peer->remote_addr)) != 0) 200 return (-memcmp(&p1->aspath->peer->remote_addr, 201 &p2->aspath->peer->remote_addr, 202 sizeof(p1->aspath->peer->remote_addr))); 203 204 /* 12. for announced prefixes prefer dynamic routes */ 205 if ((p1->flags & F_ANN_DYNAMIC) != (p2->flags & F_ANN_DYNAMIC)) { 206 if (p1->flags & F_ANN_DYNAMIC) 207 return (1); 208 else 209 return (-1); 210 } 211 212 fatalx("Uh, oh a politician in the decision process"); 213 /* NOTREACHED */ 214 } 215 216 /* 217 * Find the correct place to insert the prefix in the prefix list. 218 * If the active prefix has changed we need to send an update. 219 * The to evaluate prefix must not be in the prefix list. 220 */ 221 void 222 prefix_evaluate(struct prefix *p, struct pt_entry *pte) 223 { 224 struct prefix *xp; 225 226 if (rde_noevaluate()) { 227 /* decision process is turned off */ 228 if (p != NULL) 229 LIST_INSERT_HEAD(&pte->prefix_h, p, prefix_l); 230 if (pte->active != NULL) { 231 pte->active->aspath->active_cnt--; 232 pte->active = NULL; 233 } 234 return; 235 } 236 237 if (p != NULL) { 238 if (LIST_EMPTY(&pte->prefix_h)) 239 LIST_INSERT_HEAD(&pte->prefix_h, p, prefix_l); 240 else { 241 LIST_FOREACH(xp, &pte->prefix_h, prefix_l) 242 if (prefix_cmp(p, xp) > 0) { 243 LIST_INSERT_BEFORE(xp, p, prefix_l); 244 break; 245 } else if (LIST_NEXT(xp, prefix_l) == NULL) { 246 /* if xp last element ... */ 247 LIST_INSERT_AFTER(xp, p, prefix_l); 248 break; 249 } 250 } 251 } 252 253 xp = LIST_FIRST(&pte->prefix_h); 254 if (xp == NULL || !(xp->flags & F_LOCAL) || 255 xp->aspath->flags & F_ATTR_LOOP || 256 (xp->aspath->nexthop != NULL && 257 xp->aspath->nexthop->state != NEXTHOP_REACH)) 258 /* xp is ineligible */ 259 xp = NULL; 260 261 if (pte->active != xp) { 262 /* need to generate an update */ 263 if (pte->active != NULL) 264 pte->active->aspath->active_cnt--; 265 266 /* 267 * Send update with remove for pte->active and add for xp 268 * but remember that xp may be NULL aka ineligible. 269 * Additional decision may be made by the called functions. 270 */ 271 rde_generate_updates(xp, pte->active); 272 rde_send_kroute(xp, pte->active); 273 274 pte->active = xp; 275 if (xp != NULL) 276 xp->aspath->active_cnt++; 277 } 278 } 279