1 /* $OpenBSD: rde_update.c,v 1.174 2025/01/13 13:50:34 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/queue.h> 20 #include <sys/tree.h> 21 22 #include <limits.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <stdio.h> 26 27 #include "bgpd.h" 28 #include "session.h" 29 #include "rde.h" 30 #include "log.h" 31 32 enum up_state { 33 UP_OK, 34 UP_ERR_LIMIT, 35 UP_FILTERED, 36 UP_EXCLUDED, 37 }; 38 39 static struct community comm_no_advertise = { 40 .flags = COMMUNITY_TYPE_BASIC, 41 .data1 = COMMUNITY_WELLKNOWN, 42 .data2 = COMMUNITY_NO_ADVERTISE 43 }; 44 static struct community comm_no_export = { 45 .flags = COMMUNITY_TYPE_BASIC, 46 .data1 = COMMUNITY_WELLKNOWN, 47 .data2 = COMMUNITY_NO_EXPORT 48 }; 49 static struct community comm_no_expsubconfed = { 50 .flags = COMMUNITY_TYPE_BASIC, 51 .data1 = COMMUNITY_WELLKNOWN, 52 .data2 = COMMUNITY_NO_EXPSUBCONFED 53 }; 54 55 static void up_prep_adjout(struct rde_peer *, struct filterstate *, uint8_t); 56 57 static int 58 up_test_update(struct rde_peer *peer, struct prefix *p) 59 { 60 struct rde_aspath *asp; 61 struct rde_community *comm; 62 struct rde_peer *frompeer; 63 64 frompeer = prefix_peer(p); 65 asp = prefix_aspath(p); 66 comm = prefix_communities(p); 67 68 if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR) 69 fatalx("try to send out a botched path"); 70 if (asp->flags & (F_ATTR_LOOP | F_ATTR_OTC_LEAK)) 71 fatalx("try to send out a looped path"); 72 73 if (peer == frompeer) 74 /* Do not send routes back to sender */ 75 return (0); 76 77 if (!frompeer->conf.ebgp && !peer->conf.ebgp) { 78 /* 79 * route reflector redistribution rules: 80 * 1. if announce is set -> announce 81 * 2. from non-client, to non-client -> no 82 * 3. from client, to non-client -> yes 83 * 4. from non-client, to client -> yes 84 * 5. from client, to client -> yes 85 */ 86 if (frompeer->conf.reflector_client == 0 && 87 peer->conf.reflector_client == 0 && 88 (asp->flags & F_PREFIX_ANNOUNCED) == 0) 89 /* Do not redistribute updates to ibgp peers */ 90 return (0); 91 } 92 93 /* 94 * With "transparent-as yes" set do not filter based on 95 * well-known communities. Instead pass them on to the client. 96 */ 97 if (peer->flags & PEERFLAG_TRANS_AS) 98 return (1); 99 100 /* well-known communities */ 101 if (community_match(comm, &comm_no_advertise, NULL)) 102 return (0); 103 if (peer->conf.ebgp) { 104 if (community_match(comm, &comm_no_export, NULL)) 105 return (0); 106 if (community_match(comm, &comm_no_expsubconfed, NULL)) 107 return (0); 108 } 109 110 return (1); 111 } 112 113 /* RFC9234 open policy handling */ 114 static int 115 up_enforce_open_policy(struct rde_peer *peer, struct filterstate *state, 116 uint8_t aid) 117 { 118 /* only for IPv4 and IPv6 unicast */ 119 if (aid != AID_INET && aid != AID_INET6) 120 return 0; 121 122 /* 123 * do not propagate (consider it filtered) if OTC is present and 124 * local role is peer, customer or rs-client. 125 */ 126 if (peer->role == ROLE_PEER || peer->role == ROLE_CUSTOMER || 127 peer->role == ROLE_RS_CLIENT) 128 if (state->aspath.flags & F_ATTR_OTC) 129 return 1; 130 131 /* 132 * add OTC attribute if not present towards peers, customers and 133 * rs-clients (local roles peer, provider, rs). 134 */ 135 if (peer->role == ROLE_PEER || peer->role == ROLE_PROVIDER || 136 peer->role == ROLE_RS) 137 if ((state->aspath.flags & F_ATTR_OTC) == 0) { 138 uint32_t tmp; 139 140 tmp = htonl(peer->conf.local_as); 141 if (attr_optadd(&state->aspath, 142 ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_OTC, 143 &tmp, sizeof(tmp)) == -1) 144 log_peer_warnx(&peer->conf, 145 "failed to add OTC attribute"); 146 state->aspath.flags |= F_ATTR_OTC; 147 } 148 149 return 0; 150 } 151 152 /* 153 * Process a single prefix by passing it through the various filter stages 154 * and if not filtered out update the Adj-RIB-Out. Returns: 155 * - UP_OK if prefix was added 156 * - UP_ERR_LIMIT if the peer outbound prefix limit was reached 157 * - UP_FILTERED if prefix was filtered out 158 * - UP_EXCLUDED if prefix was excluded because of up_test_update() 159 */ 160 static enum up_state 161 up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p) 162 { 163 struct filterstate state; 164 struct bgpd_addr addr; 165 int excluded = 0; 166 167 /* 168 * up_test_update() needs to run before the output filters 169 * else the well-known communities won't work properly. 170 * The output filters would not be able to add well-known 171 * communities. 172 */ 173 if (!up_test_update(peer, new)) 174 excluded = 1; 175 176 rde_filterstate_prep(&state, new); 177 pt_getaddr(new->pt, &addr); 178 if (rde_filter(peer->out_rules, peer, prefix_peer(new), &addr, 179 new->pt->prefixlen, &state) == ACTION_DENY) { 180 rde_filterstate_clean(&state); 181 return UP_FILTERED; 182 } 183 184 /* Open Policy Check: acts like an output filter */ 185 if (up_enforce_open_policy(peer, &state, new->pt->aid)) { 186 rde_filterstate_clean(&state); 187 return UP_FILTERED; 188 } 189 190 if (excluded) { 191 rde_filterstate_clean(&state); 192 return UP_EXCLUDED; 193 } 194 195 /* from here on we know this is an update */ 196 if (p == (void *)-1) 197 p = prefix_adjout_get(peer, new->path_id_tx, new->pt); 198 199 up_prep_adjout(peer, &state, new->pt->aid); 200 prefix_adjout_update(p, peer, &state, new->pt, new->path_id_tx); 201 rde_filterstate_clean(&state); 202 203 /* max prefix checker outbound */ 204 if (peer->conf.max_out_prefix && 205 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) { 206 log_peer_warnx(&peer->conf, 207 "outbound prefix limit reached (>%u/%u)", 208 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix); 209 rde_update_err(peer, ERR_CEASE, 210 ERR_CEASE_MAX_SENT_PREFIX, NULL); 211 return UP_ERR_LIMIT; 212 } 213 214 return UP_OK; 215 } 216 217 void 218 up_generate_updates(struct rde_peer *peer, struct rib_entry *re) 219 { 220 struct prefix *new, *p; 221 222 p = prefix_adjout_first(peer, re->prefix); 223 224 new = prefix_best(re); 225 while (new != NULL) { 226 switch (up_process_prefix(peer, new, p)) { 227 case UP_OK: 228 case UP_ERR_LIMIT: 229 return; 230 case UP_FILTERED: 231 if (peer->flags & PEERFLAG_EVALUATE_ALL) { 232 new = TAILQ_NEXT(new, entry.list.rib); 233 if (new != NULL && prefix_eligible(new)) 234 continue; 235 } 236 goto done; 237 case UP_EXCLUDED: 238 goto done; 239 } 240 } 241 242 done: 243 /* withdraw prefix */ 244 if (p != NULL) 245 prefix_adjout_withdraw(p); 246 } 247 248 /* 249 * Generate updates for the add-path send case. Depending on the 250 * peer eval settings prefixes are selected and distributed. 251 * This highly depends on the Adj-RIB-Out to handle prefixes with no 252 * changes gracefully. It may be possible to improve the API so that 253 * less churn is needed. 254 */ 255 void 256 up_generate_addpath(struct rde_peer *peer, struct rib_entry *re) 257 { 258 struct prefix *head, *new, *p; 259 int maxpaths = 0, extrapaths = 0, extra; 260 int checkmode = 1; 261 262 head = prefix_adjout_first(peer, re->prefix); 263 264 /* mark all paths as stale */ 265 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) 266 p->flags |= PREFIX_FLAG_STALE; 267 268 /* update paths */ 269 new = prefix_best(re); 270 while (new != NULL) { 271 /* check limits and stop when a limit is reached */ 272 if (peer->eval.maxpaths != 0 && 273 maxpaths >= peer->eval.maxpaths) 274 break; 275 if (peer->eval.extrapaths != 0 && 276 extrapaths >= peer->eval.extrapaths) 277 break; 278 279 extra = 1; 280 if (checkmode) { 281 switch (peer->eval.mode) { 282 case ADDPATH_EVAL_BEST: 283 if (new->dmetric == PREFIX_DMETRIC_BEST) 284 extra = 0; 285 else 286 checkmode = 0; 287 break; 288 case ADDPATH_EVAL_ECMP: 289 if (new->dmetric == PREFIX_DMETRIC_BEST || 290 new->dmetric == PREFIX_DMETRIC_ECMP) 291 extra = 0; 292 else 293 checkmode = 0; 294 break; 295 case ADDPATH_EVAL_AS_WIDE: 296 if (new->dmetric == PREFIX_DMETRIC_BEST || 297 new->dmetric == PREFIX_DMETRIC_ECMP || 298 new->dmetric == PREFIX_DMETRIC_AS_WIDE) 299 extra = 0; 300 else 301 checkmode = 0; 302 break; 303 case ADDPATH_EVAL_ALL: 304 /* nothing to check */ 305 checkmode = 0; 306 break; 307 default: 308 fatalx("unknown add-path eval mode"); 309 } 310 } 311 312 switch (up_process_prefix(peer, new, (void *)-1)) { 313 case UP_OK: 314 maxpaths++; 315 extrapaths += extra; 316 break; 317 case UP_FILTERED: 318 case UP_EXCLUDED: 319 break; 320 case UP_ERR_LIMIT: 321 /* just give up */ 322 return; 323 } 324 325 /* only allow valid prefixes */ 326 new = TAILQ_NEXT(new, entry.list.rib); 327 if (new == NULL || !prefix_eligible(new)) 328 break; 329 } 330 331 /* withdraw stale paths */ 332 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) { 333 if (p->flags & PREFIX_FLAG_STALE) 334 prefix_adjout_withdraw(p); 335 } 336 } 337 338 /* 339 * Generate updates for the add-path send all case. Since all prefixes 340 * are distributed just remove old and add new. 341 */ 342 void 343 up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re, 344 struct prefix *new, struct prefix *old) 345 { 346 struct prefix *p, *head = NULL; 347 int all = 0; 348 349 /* 350 * if old and new are NULL then insert all prefixes from best, 351 * clearing old routes in the process 352 */ 353 if (old == NULL && new == NULL) { 354 /* mark all paths as stale */ 355 head = prefix_adjout_first(peer, re->prefix); 356 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) 357 p->flags |= PREFIX_FLAG_STALE; 358 359 new = prefix_best(re); 360 all = 1; 361 } 362 363 if (new != NULL && !prefix_eligible(new)) { 364 /* only allow valid prefixes */ 365 new = NULL; 366 } 367 368 if (old != NULL) { 369 /* withdraw stale paths */ 370 p = prefix_adjout_get(peer, old->path_id_tx, old->pt); 371 if (p != NULL) 372 prefix_adjout_withdraw(p); 373 } 374 375 /* add new path (or multiple if all is set) */ 376 while (new != NULL) { 377 switch (up_process_prefix(peer, new, (void *)-1)) { 378 case UP_OK: 379 case UP_FILTERED: 380 case UP_EXCLUDED: 381 break; 382 case UP_ERR_LIMIT: 383 /* just give up */ 384 return; 385 } 386 387 if (!all) 388 break; 389 390 /* only allow valid prefixes */ 391 new = TAILQ_NEXT(new, entry.list.rib); 392 if (new == NULL || !prefix_eligible(new)) 393 break; 394 } 395 396 if (all) { 397 /* withdraw stale paths */ 398 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) { 399 if (p->flags & PREFIX_FLAG_STALE) 400 prefix_adjout_withdraw(p); 401 } 402 } 403 } 404 405 /* send a default route to the specified peer */ 406 void 407 up_generate_default(struct rde_peer *peer, uint8_t aid) 408 { 409 extern struct rde_peer *peerself; 410 struct filterstate state; 411 struct rde_aspath *asp; 412 struct prefix *p; 413 struct pt_entry *pte; 414 struct bgpd_addr addr; 415 416 if (peer->capa.mp[aid] == 0) 417 return; 418 419 rde_filterstate_init(&state); 420 asp = &state.aspath; 421 asp->aspath = aspath_get(NULL, 0); 422 asp->origin = ORIGIN_IGP; 423 rde_filterstate_set_vstate(&state, ROA_NOTFOUND, ASPA_NEVER_KNOWN); 424 /* the other default values are OK, nexthop is once again NULL */ 425 426 /* 427 * XXX apply default overrides. Not yet possible, mainly a parse.y 428 * problem. 429 */ 430 /* rde_apply_set(asp, peerself, peerself, set, af); */ 431 432 memset(&addr, 0, sizeof(addr)); 433 addr.aid = aid; 434 p = prefix_adjout_lookup(peer, &addr, 0); 435 436 /* outbound filter as usual */ 437 if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) == 438 ACTION_DENY) { 439 rde_filterstate_clean(&state); 440 return; 441 } 442 443 up_prep_adjout(peer, &state, addr.aid); 444 /* can't use pt_fill here since prefix_adjout_update keeps a ref */ 445 pte = pt_get(&addr, 0); 446 if (pte == NULL) 447 pte = pt_add(&addr, 0); 448 prefix_adjout_update(p, peer, &state, pte, 0); 449 rde_filterstate_clean(&state); 450 451 /* max prefix checker outbound */ 452 if (peer->conf.max_out_prefix && 453 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) { 454 log_peer_warnx(&peer->conf, 455 "outbound prefix limit reached (>%u/%u)", 456 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix); 457 rde_update_err(peer, ERR_CEASE, 458 ERR_CEASE_MAX_SENT_PREFIX, NULL); 459 } 460 } 461 462 static struct bgpd_addr * 463 up_get_nexthop(struct rde_peer *peer, struct filterstate *state, uint8_t aid) 464 { 465 struct bgpd_addr *peer_local = NULL; 466 467 switch (aid) { 468 case AID_INET: 469 case AID_VPN_IPv4: 470 if (peer_has_ext_nexthop(peer, aid) && 471 peer->remote_addr.aid == AID_INET6) 472 peer_local = &peer->local_v6_addr; 473 else if (peer->local_v4_addr.aid == AID_INET) 474 peer_local = &peer->local_v4_addr; 475 break; 476 case AID_INET6: 477 case AID_VPN_IPv6: 478 if (peer->local_v6_addr.aid == AID_INET6) 479 peer_local = &peer->local_v6_addr; 480 break; 481 case AID_FLOWSPECv4: 482 case AID_FLOWSPECv6: 483 /* flowspec has no nexthop */ 484 return (NULL); 485 default: 486 fatalx("%s, bad AID %s", __func__, aid2str(aid)); 487 } 488 489 if (state->nhflags & NEXTHOP_SELF) { 490 /* 491 * Forcing the nexthop to self is always possible 492 * and has precedence over other flags. 493 */ 494 return (peer_local); 495 } else if (!peer->conf.ebgp) { 496 /* 497 * in the ibgp case the nexthop is normally not 498 * modified unless it points at the peer itself. 499 */ 500 if (state->nexthop == NULL) { 501 /* announced networks without explicit nexthop set */ 502 return (peer_local); 503 } 504 /* 505 * per RFC: if remote peer address is equal to the nexthop set 506 * the nexthop to our local address. This reduces the risk of 507 * routing loops. This overrides NEXTHOP_NOMODIFY. 508 */ 509 if (memcmp(&state->nexthop->exit_nexthop, 510 &peer->remote_addr, sizeof(peer->remote_addr)) == 0) { 511 return (peer_local); 512 } 513 return (&state->nexthop->exit_nexthop); 514 } else if (peer->conf.distance == 1) { 515 /* 516 * In the ebgp directly connected case never send 517 * out a nexthop that is outside of the connected 518 * network of the peer. No matter what flags are 519 * set. This follows section 5.1.3 of RFC 4271. 520 * So just check if the nexthop is in the same net 521 * is enough here. 522 */ 523 if (state->nexthop != NULL && 524 state->nexthop->flags & NEXTHOP_CONNECTED && 525 prefix_compare(&peer->remote_addr, 526 &state->nexthop->nexthop_net, 527 state->nexthop->nexthop_netlen) == 0) { 528 /* nexthop and peer are in the same net */ 529 return (&state->nexthop->exit_nexthop); 530 } 531 return (peer_local); 532 } else { 533 /* 534 * For ebgp multihop make it possible to overrule 535 * the sent nexthop by setting NEXTHOP_NOMODIFY. 536 * Similar to the ibgp case there is no same net check 537 * needed but still ensure that the nexthop is not 538 * pointing to the peer itself. 539 */ 540 if (state->nhflags & NEXTHOP_NOMODIFY && 541 state->nexthop != NULL && 542 memcmp(&state->nexthop->exit_nexthop, 543 &peer->remote_addr, sizeof(peer->remote_addr)) != 0) { 544 /* no modify flag set and nexthop not peer addr */ 545 return (&state->nexthop->exit_nexthop); 546 } 547 return (peer_local); 548 } 549 } 550 551 static void 552 up_prep_adjout(struct rde_peer *peer, struct filterstate *state, uint8_t aid) 553 { 554 struct bgpd_addr *nexthop; 555 struct nexthop *nh = NULL; 556 u_char *np; 557 uint16_t nl; 558 559 /* prepend local AS number for eBGP sessions. */ 560 if (peer->conf.ebgp && (peer->flags & PEERFLAG_TRANS_AS) == 0) { 561 uint32_t prep_as = peer->conf.local_as; 562 np = aspath_prepend(state->aspath.aspath, prep_as, 1, &nl); 563 aspath_put(state->aspath.aspath); 564 state->aspath.aspath = aspath_get(np, nl); 565 free(np); 566 } 567 568 /* update nexthop */ 569 nexthop = up_get_nexthop(peer, state, aid); 570 if (nexthop != NULL) 571 nh = nexthop_get(nexthop); 572 nexthop_unref(state->nexthop); 573 state->nexthop = nh; 574 state->nhflags = 0; 575 } 576 577 578 static int 579 up_generate_attr(struct ibuf *buf, struct rde_peer *peer, 580 struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh, 581 uint8_t aid) 582 { 583 struct attr *oa = NULL, *newaggr = NULL; 584 u_char *pdata; 585 uint32_t tmp32; 586 int flags, neednewpath = 0, rv; 587 uint16_t plen; 588 uint8_t oalen = 0, type; 589 590 if (asp->others_len > 0) 591 oa = asp->others[oalen++]; 592 593 /* dump attributes in ascending order */ 594 for (type = ATTR_ORIGIN; type < 255; type++) { 595 while (oa && oa->type < type) { 596 if (oalen < asp->others_len) 597 oa = asp->others[oalen++]; 598 else 599 oa = NULL; 600 } 601 602 switch (type) { 603 /* 604 * Attributes stored in rde_aspath 605 */ 606 case ATTR_ORIGIN: 607 if (attr_writebuf(buf, ATTR_WELL_KNOWN, 608 ATTR_ORIGIN, &asp->origin, 1) == -1) 609 return -1; 610 break; 611 case ATTR_ASPATH: 612 plen = aspath_length(asp->aspath); 613 pdata = aspath_dump(asp->aspath); 614 615 if (!peer_has_as4byte(peer)) 616 pdata = aspath_deflate(pdata, &plen, 617 &neednewpath); 618 rv = attr_writebuf(buf, ATTR_WELL_KNOWN, 619 ATTR_ASPATH, pdata, plen); 620 if (!peer_has_as4byte(peer)) 621 free(pdata); 622 623 if (rv == -1) 624 return -1; 625 break; 626 case ATTR_NEXTHOP: 627 switch (aid) { 628 case AID_INET: 629 if (nh == NULL) 630 return -1; 631 if (nh->exit_nexthop.aid != AID_INET) { 632 if (peer_has_ext_nexthop(peer, aid)) 633 break; 634 return -1; 635 } 636 if (attr_writebuf(buf, ATTR_WELL_KNOWN, 637 ATTR_NEXTHOP, &nh->exit_nexthop.v4, 638 sizeof(nh->exit_nexthop.v4)) == -1) 639 return -1; 640 break; 641 default: 642 break; 643 } 644 break; 645 case ATTR_MED: 646 /* 647 * The old MED from other peers MUST not be announced 648 * to others unless the MED is originating from us or 649 * the peer is an IBGP one. Only exception are routers 650 * with "transparent-as yes" set. 651 */ 652 if (asp->flags & F_ATTR_MED && (!peer->conf.ebgp || 653 asp->flags & F_ATTR_MED_ANNOUNCE || 654 peer->flags & PEERFLAG_TRANS_AS)) { 655 tmp32 = htonl(asp->med); 656 if (attr_writebuf(buf, ATTR_OPTIONAL, 657 ATTR_MED, &tmp32, 4) == -1) 658 return -1; 659 } 660 break; 661 case ATTR_LOCALPREF: 662 if (!peer->conf.ebgp) { 663 /* local preference, only valid for ibgp */ 664 tmp32 = htonl(asp->lpref); 665 if (attr_writebuf(buf, ATTR_WELL_KNOWN, 666 ATTR_LOCALPREF, &tmp32, 4) == -1) 667 return -1; 668 } 669 break; 670 /* 671 * Communities are stored in struct rde_community 672 */ 673 case ATTR_COMMUNITIES: 674 case ATTR_EXT_COMMUNITIES: 675 case ATTR_LARGE_COMMUNITIES: 676 if (community_writebuf(comm, type, peer->conf.ebgp, 677 buf) == -1) 678 return -1; 679 break; 680 /* 681 * NEW to OLD conversion when sending stuff to a 2byte AS peer 682 */ 683 case ATTR_AS4_PATH: 684 if (neednewpath) { 685 plen = aspath_length(asp->aspath); 686 pdata = aspath_dump(asp->aspath); 687 688 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE; 689 if (!(asp->flags & F_PREFIX_ANNOUNCED)) 690 flags |= ATTR_PARTIAL; 691 if (plen != 0) 692 if (attr_writebuf(buf, flags, 693 ATTR_AS4_PATH, pdata, plen) == -1) 694 return -1; 695 } 696 break; 697 case ATTR_AS4_AGGREGATOR: 698 if (newaggr) { 699 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE; 700 if (!(asp->flags & F_PREFIX_ANNOUNCED)) 701 flags |= ATTR_PARTIAL; 702 if (attr_writebuf(buf, flags, 703 ATTR_AS4_AGGREGATOR, newaggr->data, 704 newaggr->len) == -1) 705 return -1; 706 } 707 break; 708 /* 709 * multiprotocol attributes are handled elsewhere 710 */ 711 case ATTR_MP_REACH_NLRI: 712 case ATTR_MP_UNREACH_NLRI: 713 break; 714 /* 715 * dump all other path attributes. Following rules apply: 716 * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and 717 * ATTR_AGGREGATOR pass unmodified (enforce flags 718 * to correct values). Actually ATTR_AGGREGATOR may be 719 * deflated for OLD 2-byte peers. 720 * 2. non-transitive attrs: don't re-announce to ebgp peers 721 * 3. transitive known attrs: announce unmodified 722 * 4. transitive unknown attrs: set partial bit and re-announce 723 */ 724 case ATTR_ATOMIC_AGGREGATE: 725 if (oa == NULL || oa->type != type) 726 break; 727 if (attr_writebuf(buf, ATTR_WELL_KNOWN, 728 ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1) 729 return -1; 730 break; 731 case ATTR_AGGREGATOR: 732 if (oa == NULL || oa->type != type) 733 break; 734 if ((!(oa->flags & ATTR_TRANSITIVE)) && 735 peer->conf.ebgp) 736 break; 737 if (!peer_has_as4byte(peer)) { 738 /* need to deflate the aggregator */ 739 uint8_t t[6]; 740 uint16_t tas; 741 742 if ((!(oa->flags & ATTR_TRANSITIVE)) && 743 peer->conf.ebgp) 744 break; 745 746 memcpy(&tmp32, oa->data, sizeof(tmp32)); 747 if (ntohl(tmp32) > USHRT_MAX) { 748 tas = htons(AS_TRANS); 749 newaggr = oa; 750 } else 751 tas = htons(ntohl(tmp32)); 752 753 memcpy(t, &tas, sizeof(tas)); 754 memcpy(t + sizeof(tas), 755 oa->data + sizeof(tmp32), 756 oa->len - sizeof(tmp32)); 757 if (attr_writebuf(buf, oa->flags, 758 oa->type, &t, sizeof(t)) == -1) 759 return -1; 760 } else { 761 if (attr_writebuf(buf, oa->flags, oa->type, 762 oa->data, oa->len) == -1) 763 return -1; 764 } 765 break; 766 case ATTR_ORIGINATOR_ID: 767 case ATTR_CLUSTER_LIST: 768 case ATTR_OTC: 769 if (oa == NULL || oa->type != type) 770 break; 771 if ((!(oa->flags & ATTR_TRANSITIVE)) && 772 peer->conf.ebgp) 773 break; 774 if (attr_writebuf(buf, oa->flags, oa->type, 775 oa->data, oa->len) == -1) 776 return -1; 777 break; 778 default: 779 if (oa == NULL && type >= ATTR_FIRST_UNKNOWN) 780 /* there is no attribute left to dump */ 781 return (0); 782 783 if (oa == NULL || oa->type != type) 784 break; 785 /* unknown attribute */ 786 if (!(oa->flags & ATTR_TRANSITIVE)) { 787 /* 788 * RFC 1771: 789 * Unrecognized non-transitive optional 790 * attributes must be quietly ignored and 791 * not passed along to other BGP peers. 792 */ 793 break; 794 } 795 if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL, 796 oa->type, oa->data, oa->len) == -1) 797 return -1; 798 } 799 } 800 return 0; 801 } 802 803 /* 804 * Check if the pending element is a EoR marker. If so remove it from the 805 * tree and return 1. 806 */ 807 int 808 up_is_eor(struct rde_peer *peer, uint8_t aid) 809 { 810 struct prefix *p; 811 812 p = RB_MIN(prefix_tree, &peer->updates[aid]); 813 if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) { 814 /* 815 * Need to remove eor from update tree because 816 * prefix_adjout_destroy() can't handle that. 817 */ 818 RB_REMOVE(prefix_tree, &peer->updates[aid], p); 819 p->flags &= ~PREFIX_FLAG_UPDATE; 820 prefix_adjout_destroy(p); 821 return 1; 822 } 823 return 0; 824 } 825 826 /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */ 827 #define MIN_UPDATE_LEN 16 828 829 static void 830 up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p, 831 struct rde_peer *peer, int withdraw) 832 { 833 if (withdraw) { 834 /* prefix no longer needed, remove it */ 835 prefix_adjout_destroy(p); 836 peer->stats.prefix_sent_withdraw++; 837 } else { 838 /* prefix still in Adj-RIB-Out, keep it */ 839 RB_REMOVE(prefix_tree, prefix_head, p); 840 p->flags &= ~PREFIX_FLAG_UPDATE; 841 peer->stats.pending_update--; 842 peer->stats.prefix_sent_update++; 843 } 844 } 845 846 /* 847 * Write prefixes to buffer until either there is no more space or 848 * the next prefix has no longer the same ASPATH attributes. 849 * Returns -1 if no prefix was written else 0. 850 */ 851 static int 852 up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head, 853 struct rde_peer *peer, int withdraw) 854 { 855 struct prefix *p, *np; 856 int done = 0, has_ap = -1, rv = -1; 857 858 RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) { 859 if (has_ap == -1) 860 has_ap = peer_has_add_path(peer, p->pt->aid, 861 CAPA_AP_SEND); 862 if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) == 863 -1) 864 break; 865 866 /* make sure we only dump prefixes which belong together */ 867 if (np == NULL || 868 np->aspath != p->aspath || 869 np->communities != p->communities || 870 np->nexthop != p->nexthop || 871 np->nhflags != p->nhflags || 872 (np->flags & PREFIX_FLAG_EOR)) 873 done = 1; 874 875 rv = 0; 876 up_prefix_free(prefix_head, p, peer, withdraw); 877 if (done) 878 break; 879 } 880 return rv; 881 } 882 883 static int 884 up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer, 885 struct nexthop *nh, uint8_t aid) 886 { 887 struct bgpd_addr *nexthop; 888 size_t off, nhoff; 889 uint16_t len, afi; 890 uint8_t safi; 891 892 /* attribute header, defaulting to extended length one */ 893 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1) 894 return -1; 895 if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1) 896 return -1; 897 off = ibuf_size(buf); 898 if (ibuf_add_zero(buf, sizeof(len)) == -1) 899 return -1; 900 901 if (aid2afi(aid, &afi, &safi)) 902 fatalx("up_generate_mp_reach: bad AID"); 903 904 /* AFI + SAFI + NH LEN + NH + Reserved */ 905 if (ibuf_add_n16(buf, afi) == -1) 906 return -1; 907 if (ibuf_add_n8(buf, safi) == -1) 908 return -1; 909 nhoff = ibuf_size(buf); 910 if (ibuf_add_zero(buf, 1) == -1) 911 return -1; 912 913 if (aid == AID_VPN_IPv4 || aid == AID_VPN_IPv6) { 914 /* write zero rd */ 915 if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1) 916 return -1; 917 } 918 919 switch (aid) { 920 case AID_INET: 921 case AID_VPN_IPv4: 922 if (nh == NULL) 923 return -1; 924 nexthop = &nh->exit_nexthop; 925 /* AID_INET must only use this path with an IPv6 nexthop */ 926 if (nexthop->aid == AID_INET && aid != AID_INET) { 927 if (ibuf_add(buf, &nexthop->v4, 928 sizeof(nexthop->v4)) == -1) 929 return -1; 930 break; 931 } else if (nexthop->aid == AID_INET6 && 932 peer_has_ext_nexthop(peer, aid)) { 933 if (ibuf_add(buf, &nexthop->v6, 934 sizeof(nexthop->v6)) == -1) 935 return -1; 936 } else { 937 /* can't encode nexthop, give up and withdraw prefix */ 938 return -1; 939 } 940 break; 941 case AID_INET6: 942 case AID_VPN_IPv6: 943 if (nh == NULL) 944 return -1; 945 nexthop = &nh->exit_nexthop; 946 if (ibuf_add(buf, &nexthop->v6, sizeof(nexthop->v6)) == -1) 947 return -1; 948 break; 949 case AID_FLOWSPECv4: 950 case AID_FLOWSPECv6: 951 /* no NH */ 952 break; 953 default: 954 fatalx("up_generate_mp_reach: unknown AID"); 955 } 956 957 /* update nexthop len */ 958 len = ibuf_size(buf) - nhoff - 1; 959 if (ibuf_set_n8(buf, nhoff, len) == -1) 960 return -1; 961 962 if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */ 963 return -1; 964 965 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1) 966 /* no prefixes written, fail update */ 967 return -1; 968 969 /* update MP_REACH attribute length field */ 970 len = ibuf_size(buf) - off - sizeof(len); 971 if (ibuf_set_n16(buf, off, len) == -1) 972 return -1; 973 974 return 0; 975 } 976 977 /* 978 * Generate UPDATE message containing either just withdraws or updates. 979 * UPDATE messages are contructed like this: 980 * 981 * +-----------------------------------------------------+ 982 * | Withdrawn Routes Length (2 octets) | 983 * +-----------------------------------------------------+ 984 * | Withdrawn Routes (variable) | 985 * +-----------------------------------------------------+ 986 * | Total Path Attribute Length (2 octets) | 987 * +-----------------------------------------------------+ 988 * | Path Attributes (variable) | 989 * +-----------------------------------------------------+ 990 * | Network Layer Reachability Information (variable) | 991 * +-----------------------------------------------------+ 992 * 993 * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI 994 * the latter will be the only path attribute in a message. 995 */ 996 997 /* 998 * Write UPDATE message for withdrawn routes. The size of buf limits 999 * how may routes can be added. Return 0 on success -1 on error which 1000 * includes generating an empty withdraw message. 1001 */ 1002 struct ibuf * 1003 up_dump_withdraws(struct rde_peer *peer, uint8_t aid) 1004 { 1005 struct ibuf *buf; 1006 size_t off, pkgsize = MAX_PKTSIZE; 1007 uint16_t afi, len; 1008 uint8_t safi; 1009 1010 if (peer_has_ext_msg(peer)) 1011 pkgsize = MAX_EXT_PKTSIZE; 1012 1013 if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL) 1014 goto fail; 1015 1016 /* reserve space for the withdrawn routes length field */ 1017 off = ibuf_size(buf); 1018 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1019 goto fail; 1020 1021 if (aid != AID_INET) { 1022 /* reserve space for 2-byte path attribute length */ 1023 off = ibuf_size(buf); 1024 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1025 goto fail; 1026 1027 /* attribute header, defaulting to extended length one */ 1028 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1) 1029 goto fail; 1030 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1) 1031 goto fail; 1032 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1033 goto fail; 1034 1035 /* afi & safi */ 1036 if (aid2afi(aid, &afi, &safi)) 1037 fatalx("%s: bad AID", __func__); 1038 if (ibuf_add_n16(buf, afi) == -1) 1039 goto fail; 1040 if (ibuf_add_n8(buf, safi) == -1) 1041 goto fail; 1042 } 1043 1044 if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1) 1045 goto fail; 1046 1047 /* update length field (either withdrawn routes or attribute length) */ 1048 len = ibuf_size(buf) - off - sizeof(len); 1049 if (ibuf_set_n16(buf, off, len) == -1) 1050 goto fail; 1051 1052 if (aid != AID_INET) { 1053 /* write MP_UNREACH_NLRI attribute length (always extended) */ 1054 len -= 4; /* skip attribute header */ 1055 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1) 1056 goto fail; 1057 } else { 1058 /* no extra attributes so set attribute len to 0 */ 1059 if (ibuf_add_zero(buf, sizeof(len)) == -1) { 1060 goto fail; 1061 } 1062 } 1063 1064 return buf; 1065 1066 fail: 1067 /* something went horribly wrong */ 1068 log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced"); 1069 ibuf_free(buf); 1070 return NULL; 1071 } 1072 1073 /* 1074 * Withdraw a single prefix after an error. 1075 */ 1076 static struct ibuf * 1077 up_dump_withdraw_one(struct rde_peer *peer, struct prefix *p, struct ibuf *buf) 1078 { 1079 size_t off; 1080 int has_ap; 1081 uint16_t afi, len; 1082 uint8_t safi; 1083 1084 /* reset the buffer and start fresh */ 1085 ibuf_truncate(buf, 0); 1086 1087 /* reserve space for the withdrawn routes length field */ 1088 off = ibuf_size(buf); 1089 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1090 goto fail; 1091 1092 if (p->pt->aid != AID_INET) { 1093 /* reserve space for 2-byte path attribute length */ 1094 off = ibuf_size(buf); 1095 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1096 goto fail; 1097 1098 /* attribute header, defaulting to extended length one */ 1099 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1) 1100 goto fail; 1101 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1) 1102 goto fail; 1103 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1104 goto fail; 1105 1106 /* afi & safi */ 1107 if (aid2afi(p->pt->aid, &afi, &safi)) 1108 fatalx("%s: bad AID", __func__); 1109 if (ibuf_add_n16(buf, afi) == -1) 1110 goto fail; 1111 if (ibuf_add_n8(buf, safi) == -1) 1112 goto fail; 1113 } 1114 1115 has_ap = peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND); 1116 if (pt_writebuf(buf, p->pt, 1, has_ap, p->path_id_tx) == -1) 1117 goto fail; 1118 1119 /* update length field (either withdrawn routes or attribute length) */ 1120 len = ibuf_size(buf) - off - sizeof(len); 1121 if (ibuf_set_n16(buf, off, len) == -1) 1122 goto fail; 1123 1124 if (p->pt->aid != AID_INET) { 1125 /* write MP_UNREACH_NLRI attribute length (always extended) */ 1126 len -= 4; /* skip attribute header */ 1127 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1) 1128 goto fail; 1129 } else { 1130 /* no extra attributes so set attribute len to 0 */ 1131 if (ibuf_add_zero(buf, sizeof(len)) == -1) { 1132 goto fail; 1133 } 1134 } 1135 1136 return buf; 1137 1138 fail: 1139 /* something went horribly wrong */ 1140 log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced"); 1141 ibuf_free(buf); 1142 return NULL; 1143 } 1144 1145 /* 1146 * Write UPDATE message for changed and added routes. The size of buf limits 1147 * how may routes can be added. The function first dumps the path attributes 1148 * and then tries to add as many prefixes using these attributes. 1149 * Return 0 on success -1 on error which includes producing an empty message. 1150 */ 1151 struct ibuf * 1152 up_dump_update(struct rde_peer *peer, uint8_t aid) 1153 { 1154 struct ibuf *buf; 1155 struct bgpd_addr addr; 1156 struct prefix *p; 1157 size_t off, pkgsize = MAX_PKTSIZE; 1158 uint16_t len; 1159 int force_ip4mp = 0; 1160 1161 p = RB_MIN(prefix_tree, &peer->updates[aid]); 1162 if (p == NULL) 1163 return NULL; 1164 1165 if (peer_has_ext_msg(peer)) 1166 pkgsize = MAX_EXT_PKTSIZE; 1167 1168 if (aid == AID_INET && peer_has_ext_nexthop(peer, AID_INET)) { 1169 struct nexthop *nh = prefix_nexthop(p); 1170 if (nh != NULL && nh->exit_nexthop.aid == AID_INET6) 1171 force_ip4mp = 1; 1172 } 1173 1174 if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL) 1175 goto fail; 1176 1177 /* withdrawn routes length field is 0 */ 1178 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1179 goto fail; 1180 1181 /* reserve space for 2-byte path attribute length */ 1182 off = ibuf_size(buf); 1183 if (ibuf_add_zero(buf, sizeof(len)) == -1) 1184 goto fail; 1185 1186 if (up_generate_attr(buf, peer, prefix_aspath(p), 1187 prefix_communities(p), prefix_nexthop(p), aid) == -1) 1188 goto drop; 1189 1190 if (aid != AID_INET || force_ip4mp) { 1191 /* write mp attribute including nlri */ 1192 1193 /* 1194 * RFC 7606 wants this to be first but then we need 1195 * to use multiple buffers with adjusted length to 1196 * merge the attributes together in reverse order of 1197 * creation. 1198 */ 1199 if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) == 1200 -1) 1201 goto drop; 1202 } 1203 1204 /* update attribute length field */ 1205 len = ibuf_size(buf) - off - sizeof(len); 1206 if (ibuf_set_n16(buf, off, len) == -1) 1207 goto fail; 1208 1209 if (aid == AID_INET && !force_ip4mp) { 1210 /* last but not least dump the IPv4 nlri */ 1211 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1) 1212 goto drop; 1213 } 1214 1215 return buf; 1216 1217 drop: 1218 /* Not enough space. Drop current prefix, it will never fit. */ 1219 p = RB_MIN(prefix_tree, &peer->updates[aid]); 1220 pt_getaddr(p->pt, &addr); 1221 log_peer_warnx(&peer->conf, "generating update failed, " 1222 "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen); 1223 1224 up_prefix_free(&peer->updates[aid], p, peer, 0); 1225 return up_dump_withdraw_one(peer, p, buf); 1226 1227 fail: 1228 /* something went horribly wrong */ 1229 log_peer_warn(&peer->conf, "generating update failed, peer desynced"); 1230 ibuf_free(buf); 1231 return NULL; 1232 } 1233