1 /* $OpenBSD: rde.c,v 1.520 2021/05/06 09:18:54 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2016 Job Snijders <job@instituut.net> 6 * Copyright (c) 2016 Peter Hessler <phessler@openbsd.org> 7 * Copyright (c) 2018 Sebastian Benoit <benno@openbsd.org> 8 * 9 * Permission to use, copy, modify, and distribute this software for any 10 * purpose with or without fee is hereby granted, provided that the above 11 * copyright notice and this permission notice appear in all copies. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 14 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 15 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 16 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 19 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 20 */ 21 22 #include <sys/types.h> 23 #include <sys/time.h> 24 #include <sys/resource.h> 25 26 #include <errno.h> 27 #include <pwd.h> 28 #include <poll.h> 29 #include <signal.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <syslog.h> 34 #include <unistd.h> 35 36 #include "bgpd.h" 37 #include "rde.h" 38 #include "session.h" 39 #include "log.h" 40 41 #define PFD_PIPE_MAIN 0 42 #define PFD_PIPE_SESSION 1 43 #define PFD_PIPE_SESSION_CTL 2 44 #define PFD_PIPE_ROA 3 45 #define PFD_PIPE_COUNT 4 46 47 void rde_sighdlr(int); 48 void rde_dispatch_imsg_session(struct imsgbuf *); 49 void rde_dispatch_imsg_parent(struct imsgbuf *); 50 void rde_dispatch_imsg_rtr(struct imsgbuf *); 51 void rde_dispatch_imsg_peer(struct rde_peer *, void *); 52 void rde_update_dispatch(struct rde_peer *, struct imsg *); 53 int rde_update_update(struct rde_peer *, struct filterstate *, 54 struct bgpd_addr *, u_int8_t); 55 void rde_update_withdraw(struct rde_peer *, struct bgpd_addr *, 56 u_int8_t); 57 int rde_attr_parse(u_char *, u_int16_t, struct rde_peer *, 58 struct filterstate *, struct mpattr *); 59 int rde_attr_add(struct filterstate *, u_char *, u_int16_t); 60 u_int8_t rde_attr_missing(struct rde_aspath *, int, u_int16_t); 61 int rde_get_mp_nexthop(u_char *, u_int16_t, u_int8_t, 62 struct filterstate *); 63 void rde_as4byte_fixup(struct rde_peer *, struct rde_aspath *); 64 void rde_reflector(struct rde_peer *, struct rde_aspath *); 65 66 void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t, 67 enum imsg_type); 68 void rde_dump_ctx_throttle(pid_t, int); 69 void rde_dump_ctx_terminate(pid_t); 70 void rde_dump_mrt_new(struct mrt *, pid_t, int); 71 72 int rde_l3vpn_import(struct rde_community *, struct l3vpn *); 73 static void rde_commit_pftable(void); 74 void rde_reload_done(void); 75 static void rde_softreconfig_in_done(void *, u_int8_t); 76 static void rde_softreconfig_out_done(void *, u_int8_t); 77 static void rde_softreconfig_done(void); 78 static void rde_softreconfig_out(struct rib_entry *, void *); 79 static void rde_softreconfig_in(struct rib_entry *, void *); 80 static void rde_softreconfig_sync_reeval(struct rib_entry *, void *); 81 static void rde_softreconfig_sync_fib(struct rib_entry *, void *); 82 static void rde_softreconfig_sync_done(void *, u_int8_t); 83 static void rde_roa_reload(void); 84 static int rde_no_as_set(struct rde_peer *); 85 int rde_update_queue_pending(void); 86 void rde_update_queue_runner(void); 87 void rde_update6_queue_runner(u_int8_t); 88 struct rde_prefixset *rde_find_prefixset(char *, struct rde_prefixset_head *); 89 void rde_mark_prefixsets_dirty(struct rde_prefixset_head *, 90 struct rde_prefixset_head *); 91 u_int8_t rde_roa_validity(struct rde_prefixset *, 92 struct bgpd_addr *, u_int8_t, u_int32_t); 93 94 static void rde_peer_recv_eor(struct rde_peer *, u_int8_t); 95 static void rde_peer_send_eor(struct rde_peer *, u_int8_t); 96 97 void network_add(struct network_config *, struct filterstate *); 98 void network_delete(struct network_config *); 99 static void network_dump_upcall(struct rib_entry *, void *); 100 static void network_flush_upcall(struct rib_entry *, void *); 101 102 void rde_shutdown(void); 103 int ovs_match(struct prefix *, u_int32_t); 104 105 static struct imsgbuf *ibuf_se; 106 static struct imsgbuf *ibuf_se_ctl; 107 static struct imsgbuf *ibuf_rtr; 108 static struct imsgbuf *ibuf_main; 109 static struct bgpd_config *conf, *nconf; 110 static struct rde_prefixset rde_roa, roa_new; 111 112 volatile sig_atomic_t rde_quit = 0; 113 struct filter_head *out_rules, *out_rules_tmp; 114 struct rde_memstats rdemem; 115 int softreconfig; 116 117 extern struct rde_peer_head peerlist; 118 extern struct rde_peer *peerself; 119 120 struct rde_dump_ctx { 121 LIST_ENTRY(rde_dump_ctx) entry; 122 struct ctl_show_rib_request req; 123 u_int32_t peerid; 124 u_int8_t throttled; 125 }; 126 127 LIST_HEAD(, rde_dump_ctx) rde_dump_h = LIST_HEAD_INITIALIZER(rde_dump_h); 128 129 struct rde_mrt_ctx { 130 LIST_ENTRY(rde_mrt_ctx) entry; 131 struct mrt mrt; 132 }; 133 134 LIST_HEAD(, rde_mrt_ctx) rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts); 135 u_int rde_mrt_cnt; 136 137 void 138 rde_sighdlr(int sig) 139 { 140 switch (sig) { 141 case SIGINT: 142 case SIGTERM: 143 rde_quit = 1; 144 break; 145 } 146 } 147 148 u_int32_t peerhashsize = 1024; 149 u_int32_t pathhashsize = 128 * 1024; 150 u_int32_t attrhashsize = 16 * 1024; 151 u_int32_t nexthophashsize = 1024; 152 153 void 154 rde_main(int debug, int verbose) 155 { 156 struct passwd *pw; 157 struct pollfd *pfd = NULL; 158 struct rde_mrt_ctx *mctx, *xmctx; 159 void *newp; 160 u_int pfd_elms = 0, i, j; 161 int timeout; 162 u_int8_t aid; 163 164 log_init(debug, LOG_DAEMON); 165 log_setverbose(verbose); 166 167 log_procinit(log_procnames[PROC_RDE]); 168 169 if ((pw = getpwnam(BGPD_USER)) == NULL) 170 fatal("getpwnam"); 171 172 if (chroot(pw->pw_dir) == -1) 173 fatal("chroot"); 174 if (chdir("/") == -1) 175 fatal("chdir(\"/\")"); 176 177 setproctitle("route decision engine"); 178 179 if (setgroups(1, &pw->pw_gid) || 180 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 181 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 182 fatal("can't drop privileges"); 183 184 if (pledge("stdio recvfd", NULL) == -1) 185 fatal("pledge"); 186 187 signal(SIGTERM, rde_sighdlr); 188 signal(SIGINT, rde_sighdlr); 189 signal(SIGPIPE, SIG_IGN); 190 signal(SIGHUP, SIG_IGN); 191 signal(SIGALRM, SIG_IGN); 192 signal(SIGUSR1, SIG_IGN); 193 194 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 195 fatal(NULL); 196 imsg_init(ibuf_main, 3); 197 198 /* initialize the RIB structures */ 199 pt_init(); 200 path_init(pathhashsize); 201 aspath_init(pathhashsize); 202 communities_init(attrhashsize); 203 attr_init(attrhashsize); 204 nexthop_init(nexthophashsize); 205 peer_init(peerhashsize); 206 207 /* make sure the default RIBs are setup */ 208 rib_new("Adj-RIB-In", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE); 209 210 out_rules = calloc(1, sizeof(struct filter_head)); 211 if (out_rules == NULL) 212 fatal(NULL); 213 TAILQ_INIT(out_rules); 214 215 conf = new_config(); 216 log_info("route decision engine ready"); 217 218 while (rde_quit == 0) { 219 if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) { 220 if ((newp = reallocarray(pfd, 221 PFD_PIPE_COUNT + rde_mrt_cnt, 222 sizeof(struct pollfd))) == NULL) { 223 /* panic for now */ 224 log_warn("could not resize pfd from %u -> %u" 225 " entries", pfd_elms, PFD_PIPE_COUNT + 226 rde_mrt_cnt); 227 fatalx("exiting"); 228 } 229 pfd = newp; 230 pfd_elms = PFD_PIPE_COUNT + rde_mrt_cnt; 231 } 232 timeout = -1; 233 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 234 235 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 236 set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se); 237 set_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl); 238 set_pollfd(&pfd[PFD_PIPE_ROA], ibuf_rtr); 239 240 i = PFD_PIPE_COUNT; 241 for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) { 242 xmctx = LIST_NEXT(mctx, entry); 243 244 if (i >= pfd_elms) 245 fatalx("poll pfd too small"); 246 if (mctx->mrt.wbuf.queued) { 247 pfd[i].fd = mctx->mrt.wbuf.fd; 248 pfd[i].events = POLLOUT; 249 i++; 250 } else if (mctx->mrt.state == MRT_STATE_REMOVE) { 251 close(mctx->mrt.wbuf.fd); 252 LIST_REMOVE(mctx, entry); 253 free(mctx); 254 rde_mrt_cnt--; 255 } 256 } 257 258 if (rib_dump_pending() || rde_update_queue_pending() || 259 nexthop_pending() || peer_imsg_pending()) 260 timeout = 0; 261 262 if (poll(pfd, i, timeout) == -1) { 263 if (errno != EINTR) 264 fatal("poll error"); 265 continue; 266 } 267 268 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) 269 fatalx("Lost connection to parent"); 270 else 271 rde_dispatch_imsg_parent(ibuf_main); 272 273 if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) { 274 log_warnx("RDE: Lost connection to SE"); 275 msgbuf_clear(&ibuf_se->w); 276 free(ibuf_se); 277 ibuf_se = NULL; 278 } else 279 rde_dispatch_imsg_session(ibuf_se); 280 281 if (handle_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl) == 282 -1) { 283 log_warnx("RDE: Lost connection to SE control"); 284 msgbuf_clear(&ibuf_se_ctl->w); 285 free(ibuf_se_ctl); 286 ibuf_se_ctl = NULL; 287 } else 288 rde_dispatch_imsg_session(ibuf_se_ctl); 289 290 if (handle_pollfd(&pfd[PFD_PIPE_ROA], ibuf_rtr) == -1) { 291 log_warnx("RDE: Lost connection to ROA"); 292 msgbuf_clear(&ibuf_rtr->w); 293 free(ibuf_rtr); 294 ibuf_rtr = NULL; 295 } else 296 rde_dispatch_imsg_rtr(ibuf_rtr); 297 298 for (j = PFD_PIPE_COUNT, mctx = LIST_FIRST(&rde_mrts); 299 j < i && mctx != 0; j++) { 300 if (pfd[j].fd == mctx->mrt.wbuf.fd && 301 pfd[j].revents & POLLOUT) 302 mrt_write(&mctx->mrt); 303 mctx = LIST_NEXT(mctx, entry); 304 } 305 306 peer_foreach(rde_dispatch_imsg_peer, NULL); 307 rib_dump_runner(); 308 nexthop_runner(); 309 if (ibuf_se && ibuf_se->w.queued < SESS_MSG_HIGH_MARK) { 310 rde_update_queue_runner(); 311 for (aid = AID_INET6; aid < AID_MAX; aid++) 312 rde_update6_queue_runner(aid); 313 } 314 /* commit pftable once per poll loop */ 315 rde_commit_pftable(); 316 } 317 318 /* do not clean up on shutdown on production, it takes ages. */ 319 if (debug) 320 rde_shutdown(); 321 322 free_config(conf); 323 free(pfd); 324 325 /* close pipes */ 326 if (ibuf_se) { 327 msgbuf_clear(&ibuf_se->w); 328 close(ibuf_se->fd); 329 free(ibuf_se); 330 } 331 if (ibuf_se_ctl) { 332 msgbuf_clear(&ibuf_se_ctl->w); 333 close(ibuf_se_ctl->fd); 334 free(ibuf_se_ctl); 335 } 336 msgbuf_clear(&ibuf_main->w); 337 close(ibuf_main->fd); 338 free(ibuf_main); 339 340 while ((mctx = LIST_FIRST(&rde_mrts)) != NULL) { 341 msgbuf_clear(&mctx->mrt.wbuf); 342 close(mctx->mrt.wbuf.fd); 343 LIST_REMOVE(mctx, entry); 344 free(mctx); 345 } 346 347 log_info("route decision engine exiting"); 348 exit(0); 349 } 350 351 struct network_config netconf_s, netconf_p; 352 struct filterstate netconf_state; 353 struct filter_set_head session_set = TAILQ_HEAD_INITIALIZER(session_set); 354 struct filter_set_head parent_set = TAILQ_HEAD_INITIALIZER(parent_set); 355 356 void 357 rde_dispatch_imsg_session(struct imsgbuf *ibuf) 358 { 359 struct imsg imsg; 360 struct peer p; 361 struct peer_config pconf; 362 struct ctl_show_set cset; 363 struct ctl_show_rib csr; 364 struct ctl_show_rib_request req; 365 struct rde_peer *peer; 366 struct rde_aspath *asp; 367 struct rde_hashstats rdehash; 368 struct filter_set *s; 369 struct as_set *aset; 370 struct rde_prefixset *pset; 371 u_int8_t *asdata; 372 ssize_t n; 373 size_t aslen; 374 int verbose; 375 u_int16_t len; 376 377 while (ibuf) { 378 if ((n = imsg_get(ibuf, &imsg)) == -1) 379 fatal("rde_dispatch_imsg_session: imsg_get error"); 380 if (n == 0) 381 break; 382 383 switch (imsg.hdr.type) { 384 case IMSG_UPDATE: 385 case IMSG_SESSION_UP: 386 case IMSG_SESSION_DOWN: 387 case IMSG_SESSION_STALE: 388 case IMSG_SESSION_FLUSH: 389 case IMSG_SESSION_RESTARTED: 390 case IMSG_REFRESH: 391 if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { 392 log_warnx("rde_dispatch: unknown peer id %d", 393 imsg.hdr.peerid); 394 break; 395 } 396 peer_imsg_push(peer, &imsg); 397 break; 398 case IMSG_SESSION_ADD: 399 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pconf)) 400 fatalx("incorrect size of session request"); 401 memcpy(&pconf, imsg.data, sizeof(pconf)); 402 peer_add(imsg.hdr.peerid, &pconf); 403 break; 404 case IMSG_NETWORK_ADD: 405 if (imsg.hdr.len - IMSG_HEADER_SIZE != 406 sizeof(struct network_config)) { 407 log_warnx("rde_dispatch: wrong imsg len"); 408 break; 409 } 410 memcpy(&netconf_s, imsg.data, sizeof(netconf_s)); 411 TAILQ_INIT(&netconf_s.attrset); 412 rde_filterstate_prep(&netconf_state, NULL, NULL, NULL, 413 0); 414 asp = &netconf_state.aspath; 415 asp->aspath = aspath_get(NULL, 0); 416 asp->origin = ORIGIN_IGP; 417 asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH | 418 F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED | 419 F_ANN_DYNAMIC; 420 break; 421 case IMSG_NETWORK_ASPATH: 422 if (imsg.hdr.len - IMSG_HEADER_SIZE < 423 sizeof(csr)) { 424 log_warnx("rde_dispatch: wrong imsg len"); 425 bzero(&netconf_s, sizeof(netconf_s)); 426 break; 427 } 428 aslen = imsg.hdr.len - IMSG_HEADER_SIZE - sizeof(csr); 429 asdata = imsg.data; 430 asdata += sizeof(struct ctl_show_rib); 431 memcpy(&csr, imsg.data, sizeof(csr)); 432 asp = &netconf_state.aspath; 433 asp->lpref = csr.local_pref; 434 asp->med = csr.med; 435 asp->weight = csr.weight; 436 asp->flags = csr.flags; 437 asp->origin = csr.origin; 438 asp->flags |= F_PREFIX_ANNOUNCED | F_ANN_DYNAMIC; 439 aspath_put(asp->aspath); 440 asp->aspath = aspath_get(asdata, aslen); 441 break; 442 case IMSG_NETWORK_ATTR: 443 if (imsg.hdr.len <= IMSG_HEADER_SIZE) { 444 log_warnx("rde_dispatch: wrong imsg len"); 445 break; 446 } 447 /* parse optional path attributes */ 448 len = imsg.hdr.len - IMSG_HEADER_SIZE; 449 if (rde_attr_add(&netconf_state, imsg.data, 450 len) == -1) { 451 log_warnx("rde_dispatch: bad network " 452 "attribute"); 453 rde_filterstate_clean(&netconf_state); 454 bzero(&netconf_s, sizeof(netconf_s)); 455 break; 456 } 457 break; 458 case IMSG_NETWORK_DONE: 459 if (imsg.hdr.len != IMSG_HEADER_SIZE) { 460 log_warnx("rde_dispatch: wrong imsg len"); 461 break; 462 } 463 TAILQ_CONCAT(&netconf_s.attrset, &session_set, entry); 464 switch (netconf_s.prefix.aid) { 465 case AID_INET: 466 if (netconf_s.prefixlen > 32) 467 goto badnet; 468 network_add(&netconf_s, &netconf_state); 469 break; 470 case AID_INET6: 471 if (netconf_s.prefixlen > 128) 472 goto badnet; 473 network_add(&netconf_s, &netconf_state); 474 break; 475 case 0: 476 /* something failed beforehands */ 477 break; 478 default: 479 badnet: 480 log_warnx("request to insert invalid network"); 481 break; 482 } 483 rde_filterstate_clean(&netconf_state); 484 break; 485 case IMSG_NETWORK_REMOVE: 486 if (imsg.hdr.len - IMSG_HEADER_SIZE != 487 sizeof(struct network_config)) { 488 log_warnx("rde_dispatch: wrong imsg len"); 489 break; 490 } 491 memcpy(&netconf_s, imsg.data, sizeof(netconf_s)); 492 TAILQ_INIT(&netconf_s.attrset); 493 494 switch (netconf_s.prefix.aid) { 495 case AID_INET: 496 if (netconf_s.prefixlen > 32) 497 goto badnetdel; 498 network_delete(&netconf_s); 499 break; 500 case AID_INET6: 501 if (netconf_s.prefixlen > 128) 502 goto badnetdel; 503 network_delete(&netconf_s); 504 break; 505 default: 506 badnetdel: 507 log_warnx("request to remove invalid network"); 508 break; 509 } 510 break; 511 case IMSG_NETWORK_FLUSH: 512 if (imsg.hdr.len != IMSG_HEADER_SIZE) { 513 log_warnx("rde_dispatch: wrong imsg len"); 514 break; 515 } 516 if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, 517 RDE_RUNNER_ROUNDS, peerself, network_flush_upcall, 518 NULL, NULL) == -1) 519 log_warn("rde_dispatch: IMSG_NETWORK_FLUSH"); 520 break; 521 case IMSG_FILTER_SET: 522 if (imsg.hdr.len - IMSG_HEADER_SIZE != 523 sizeof(struct filter_set)) { 524 log_warnx("rde_dispatch: wrong imsg len"); 525 break; 526 } 527 if ((s = malloc(sizeof(struct filter_set))) == NULL) 528 fatal(NULL); 529 memcpy(s, imsg.data, sizeof(struct filter_set)); 530 if (s->type == ACTION_SET_NEXTHOP) { 531 s->action.nh_ref = 532 nexthop_get(&s->action.nexthop); 533 s->type = ACTION_SET_NEXTHOP_REF; 534 } 535 TAILQ_INSERT_TAIL(&session_set, s, entry); 536 break; 537 case IMSG_CTL_SHOW_NETWORK: 538 case IMSG_CTL_SHOW_RIB: 539 case IMSG_CTL_SHOW_RIB_PREFIX: 540 if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) { 541 log_warnx("rde_dispatch: wrong imsg len"); 542 break; 543 } 544 memcpy(&req, imsg.data, sizeof(req)); 545 rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type); 546 break; 547 case IMSG_CTL_SHOW_NEIGHBOR: 548 if (imsg.hdr.len - IMSG_HEADER_SIZE != 549 sizeof(struct peer)) { 550 log_warnx("rde_dispatch: wrong imsg len"); 551 break; 552 } 553 memcpy(&p, imsg.data, sizeof(struct peer)); 554 peer = peer_get(p.conf.id); 555 if (peer != NULL) { 556 p.stats.prefix_cnt = peer->prefix_cnt; 557 p.stats.prefix_out_cnt = peer->prefix_out_cnt; 558 p.stats.prefix_rcvd_update = 559 peer->prefix_rcvd_update; 560 p.stats.prefix_rcvd_withdraw = 561 peer->prefix_rcvd_withdraw; 562 p.stats.prefix_rcvd_eor = 563 peer->prefix_rcvd_eor; 564 p.stats.prefix_sent_update = 565 peer->prefix_sent_update; 566 p.stats.prefix_sent_withdraw = 567 peer->prefix_sent_withdraw; 568 p.stats.prefix_sent_eor = 569 peer->prefix_sent_eor; 570 } 571 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0, 572 imsg.hdr.pid, -1, &p, sizeof(struct peer)); 573 break; 574 case IMSG_CTL_SHOW_RIB_MEM: 575 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0, 576 imsg.hdr.pid, -1, &rdemem, sizeof(rdemem)); 577 path_hash_stats(&rdehash); 578 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0, 579 imsg.hdr.pid, -1, &rdehash, sizeof(rdehash)); 580 aspath_hash_stats(&rdehash); 581 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0, 582 imsg.hdr.pid, -1, &rdehash, sizeof(rdehash)); 583 communities_hash_stats(&rdehash); 584 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0, 585 imsg.hdr.pid, -1, &rdehash, sizeof(rdehash)); 586 attr_hash_stats(&rdehash); 587 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0, 588 imsg.hdr.pid, -1, &rdehash, sizeof(rdehash)); 589 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid, 590 -1, NULL, 0); 591 break; 592 case IMSG_CTL_SHOW_SET: 593 /* first roa set */ 594 pset = &rde_roa; 595 memset(&cset, 0, sizeof(cset)); 596 cset.type = ROA_SET; 597 strlcpy(cset.name, "RPKI ROA", sizeof(cset.name)); 598 cset.lastchange = pset->lastchange; 599 cset.v4_cnt = pset->th.v4_cnt; 600 cset.v6_cnt = pset->th.v6_cnt; 601 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0, 602 imsg.hdr.pid, -1, &cset, sizeof(cset)); 603 604 SIMPLEQ_FOREACH(aset, &conf->as_sets, entry) { 605 memset(&cset, 0, sizeof(cset)); 606 cset.type = ASNUM_SET; 607 strlcpy(cset.name, aset->name, 608 sizeof(cset.name)); 609 cset.lastchange = aset->lastchange; 610 cset.as_cnt = set_nmemb(aset->set); 611 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0, 612 imsg.hdr.pid, -1, &cset, sizeof(cset)); 613 } 614 SIMPLEQ_FOREACH(pset, &conf->rde_prefixsets, entry) { 615 memset(&cset, 0, sizeof(cset)); 616 cset.type = PREFIX_SET; 617 strlcpy(cset.name, pset->name, 618 sizeof(cset.name)); 619 cset.lastchange = pset->lastchange; 620 cset.v4_cnt = pset->th.v4_cnt; 621 cset.v6_cnt = pset->th.v6_cnt; 622 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0, 623 imsg.hdr.pid, -1, &cset, sizeof(cset)); 624 } 625 SIMPLEQ_FOREACH(pset, &conf->rde_originsets, entry) { 626 memset(&cset, 0, sizeof(cset)); 627 cset.type = ORIGIN_SET; 628 strlcpy(cset.name, pset->name, 629 sizeof(cset.name)); 630 cset.lastchange = pset->lastchange; 631 cset.v4_cnt = pset->th.v4_cnt; 632 cset.v6_cnt = pset->th.v6_cnt; 633 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0, 634 imsg.hdr.pid, -1, &cset, sizeof(cset)); 635 } 636 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid, 637 -1, NULL, 0); 638 break; 639 case IMSG_CTL_LOG_VERBOSE: 640 /* already checked by SE */ 641 memcpy(&verbose, imsg.data, sizeof(verbose)); 642 log_setverbose(verbose); 643 break; 644 case IMSG_CTL_END: 645 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid, 646 -1, NULL, 0); 647 break; 648 case IMSG_CTL_TERMINATE: 649 rde_dump_ctx_terminate(imsg.hdr.pid); 650 break; 651 case IMSG_XON: 652 if (imsg.hdr.peerid) { 653 peer = peer_get(imsg.hdr.peerid); 654 if (peer) 655 peer->throttled = 0; 656 } else { 657 rde_dump_ctx_throttle(imsg.hdr.pid, 0); 658 } 659 break; 660 case IMSG_XOFF: 661 if (imsg.hdr.peerid) { 662 peer = peer_get(imsg.hdr.peerid); 663 if (peer) 664 peer->throttled = 1; 665 } else { 666 rde_dump_ctx_throttle(imsg.hdr.pid, 1); 667 } 668 break; 669 case IMSG_RECONF_DRAIN: 670 imsg_compose(ibuf_se, IMSG_RECONF_DRAIN, 0, 0, 671 -1, NULL, 0); 672 break; 673 default: 674 break; 675 } 676 imsg_free(&imsg); 677 } 678 } 679 680 void 681 rde_dispatch_imsg_parent(struct imsgbuf *ibuf) 682 { 683 static struct rde_prefixset *last_prefixset; 684 static struct as_set *last_as_set; 685 static struct l3vpn *vpn; 686 struct imsg imsg; 687 struct mrt xmrt; 688 struct roa roa; 689 struct rde_rib rr; 690 struct filterstate state; 691 struct imsgbuf *i; 692 struct filter_head *nr; 693 struct filter_rule *r; 694 struct filter_set *s; 695 struct rib *rib; 696 struct rde_prefixset *ps; 697 struct rde_aspath *asp; 698 struct prefixset_item psi; 699 char *name; 700 size_t nmemb; 701 int n, fd, rv; 702 u_int16_t rid; 703 704 while (ibuf) { 705 if ((n = imsg_get(ibuf, &imsg)) == -1) 706 fatal("rde_dispatch_imsg_parent: imsg_get error"); 707 if (n == 0) 708 break; 709 710 switch (imsg.hdr.type) { 711 case IMSG_SOCKET_CONN: 712 case IMSG_SOCKET_CONN_CTL: 713 case IMSG_SOCKET_CONN_RTR: 714 if ((fd = imsg.fd) == -1) { 715 log_warnx("expected to receive imsg fd " 716 "but didn't receive any"); 717 break; 718 } 719 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 720 fatal(NULL); 721 imsg_init(i, fd); 722 switch (imsg.hdr.type) { 723 case IMSG_SOCKET_CONN: 724 if (ibuf_se) { 725 log_warnx("Unexpected imsg connection " 726 "to SE received"); 727 msgbuf_clear(&ibuf_se->w); 728 free(ibuf_se); 729 } 730 ibuf_se = i; 731 break; 732 case IMSG_SOCKET_CONN_CTL: 733 if (ibuf_se_ctl) { 734 log_warnx("Unexpected imsg ctl " 735 "connection to SE received"); 736 msgbuf_clear(&ibuf_se_ctl->w); 737 free(ibuf_se_ctl); 738 } 739 ibuf_se_ctl = i; 740 break; 741 case IMSG_SOCKET_CONN_RTR: 742 if (ibuf_rtr) { 743 log_warnx("Unexpected imsg ctl " 744 "connection to ROA received"); 745 msgbuf_clear(&ibuf_rtr->w); 746 free(ibuf_rtr); 747 } 748 ibuf_rtr = i; 749 break; 750 } 751 break; 752 case IMSG_NETWORK_ADD: 753 if (imsg.hdr.len - IMSG_HEADER_SIZE != 754 sizeof(struct network_config)) { 755 log_warnx("rde_dispatch: wrong imsg len"); 756 break; 757 } 758 memcpy(&netconf_p, imsg.data, sizeof(netconf_p)); 759 TAILQ_INIT(&netconf_p.attrset); 760 break; 761 case IMSG_NETWORK_DONE: 762 TAILQ_CONCAT(&netconf_p.attrset, &parent_set, entry); 763 764 rde_filterstate_prep(&state, NULL, NULL, NULL, 0); 765 asp = &state.aspath; 766 asp->aspath = aspath_get(NULL, 0); 767 asp->origin = ORIGIN_IGP; 768 asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH | 769 F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED; 770 771 network_add(&netconf_p, &state); 772 rde_filterstate_clean(&state); 773 break; 774 case IMSG_NETWORK_REMOVE: 775 if (imsg.hdr.len - IMSG_HEADER_SIZE != 776 sizeof(struct network_config)) { 777 log_warnx("rde_dispatch: wrong imsg len"); 778 break; 779 } 780 memcpy(&netconf_p, imsg.data, sizeof(netconf_p)); 781 TAILQ_INIT(&netconf_p.attrset); 782 network_delete(&netconf_p); 783 break; 784 case IMSG_RECONF_CONF: 785 if (imsg.hdr.len - IMSG_HEADER_SIZE != 786 sizeof(struct bgpd_config)) 787 fatalx("IMSG_RECONF_CONF bad len"); 788 out_rules_tmp = calloc(1, sizeof(struct filter_head)); 789 if (out_rules_tmp == NULL) 790 fatal(NULL); 791 TAILQ_INIT(out_rules_tmp); 792 nconf = new_config(); 793 copy_config(nconf, imsg.data); 794 795 for (rid = 0; rid < rib_size; rid++) { 796 if ((rib = rib_byid(rid)) == NULL) 797 continue; 798 rib->state = RECONF_DELETE; 799 rib->fibstate = RECONF_NONE; 800 } 801 break; 802 case IMSG_RECONF_RIB: 803 if (imsg.hdr.len - IMSG_HEADER_SIZE != 804 sizeof(struct rde_rib)) 805 fatalx("IMSG_RECONF_RIB bad len"); 806 memcpy(&rr, imsg.data, sizeof(rr)); 807 rib = rib_byid(rib_find(rr.name)); 808 if (rib == NULL) { 809 rib = rib_new(rr.name, rr.rtableid, rr.flags); 810 } else if (rib->flags == rr.flags && 811 rib->rtableid == rr.rtableid) { 812 /* no change to rib apart from filters */ 813 rib->state = RECONF_KEEP; 814 } else { 815 /* reload rib because somehing changed */ 816 rib->flags_tmp = rr.flags; 817 rib->rtableid_tmp = rr.rtableid; 818 rib->state = RECONF_RELOAD; 819 } 820 break; 821 case IMSG_RECONF_FILTER: 822 if (imsg.hdr.len - IMSG_HEADER_SIZE != 823 sizeof(struct filter_rule)) 824 fatalx("IMSG_RECONF_FILTER bad len"); 825 if ((r = malloc(sizeof(struct filter_rule))) == NULL) 826 fatal(NULL); 827 memcpy(r, imsg.data, sizeof(struct filter_rule)); 828 if (r->match.prefixset.name[0] != '\0') { 829 r->match.prefixset.ps = 830 rde_find_prefixset(r->match.prefixset.name, 831 &nconf->rde_prefixsets); 832 if (r->match.prefixset.ps == NULL) 833 log_warnx("%s: no prefixset for %s", 834 __func__, r->match.prefixset.name); 835 } 836 if (r->match.originset.name[0] != '\0') { 837 r->match.originset.ps = 838 rde_find_prefixset(r->match.originset.name, 839 &nconf->rde_originsets); 840 if (r->match.originset.ps == NULL) 841 log_warnx("%s: no origin-set for %s", 842 __func__, r->match.originset.name); 843 } 844 if (r->match.as.flags & AS_FLAG_AS_SET_NAME) { 845 struct as_set * aset; 846 847 aset = as_sets_lookup(&nconf->as_sets, 848 r->match.as.name); 849 if (aset == NULL) { 850 log_warnx("%s: no as-set for %s", 851 __func__, r->match.as.name); 852 } else { 853 r->match.as.flags = AS_FLAG_AS_SET; 854 r->match.as.aset = aset; 855 } 856 } 857 TAILQ_INIT(&r->set); 858 TAILQ_CONCAT(&r->set, &parent_set, entry); 859 if ((rib = rib_byid(rib_find(r->rib))) == NULL) { 860 log_warnx("IMSG_RECONF_FILTER: filter rule " 861 "for nonexistent rib %s", r->rib); 862 free(r); 863 break; 864 } 865 r->peer.ribid = rib->id; 866 if (r->dir == DIR_IN) { 867 nr = rib->in_rules_tmp; 868 if (nr == NULL) { 869 nr = calloc(1, 870 sizeof(struct filter_head)); 871 if (nr == NULL) 872 fatal(NULL); 873 TAILQ_INIT(nr); 874 rib->in_rules_tmp = nr; 875 } 876 TAILQ_INSERT_TAIL(nr, r, entry); 877 } else 878 TAILQ_INSERT_TAIL(out_rules_tmp, r, entry); 879 break; 880 case IMSG_RECONF_PREFIX_SET: 881 case IMSG_RECONF_ORIGIN_SET: 882 if (imsg.hdr.len - IMSG_HEADER_SIZE != 883 sizeof(ps->name)) 884 fatalx("IMSG_RECONF_PREFIX_SET bad len"); 885 ps = calloc(1, sizeof(struct rde_prefixset)); 886 if (ps == NULL) 887 fatal(NULL); 888 memcpy(ps->name, imsg.data, sizeof(ps->name)); 889 if (imsg.hdr.type == IMSG_RECONF_ORIGIN_SET) { 890 SIMPLEQ_INSERT_TAIL(&nconf->rde_originsets, ps, 891 entry); 892 } else { 893 SIMPLEQ_INSERT_TAIL(&nconf->rde_prefixsets, ps, 894 entry); 895 } 896 last_prefixset = ps; 897 break; 898 case IMSG_RECONF_ROA_ITEM: 899 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(roa)) 900 fatalx("IMSG_RECONF_ROA_ITEM bad len"); 901 memcpy(&roa, imsg.data, sizeof(roa)); 902 rv = trie_roa_add(&last_prefixset->th, &roa); 903 break; 904 case IMSG_RECONF_PREFIX_SET_ITEM: 905 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(psi)) 906 fatalx("IMSG_RECONF_PREFIX_SET_ITEM bad len"); 907 memcpy(&psi, imsg.data, sizeof(psi)); 908 if (last_prefixset == NULL) 909 fatalx("King Bula has no prefixset"); 910 rv = trie_add(&last_prefixset->th, 911 &psi.p.addr, psi.p.len, 912 psi.p.len_min, psi.p.len_max); 913 if (rv == -1) 914 log_warnx("trie_add(%s) %s/%u failed", 915 last_prefixset->name, log_addr(&psi.p.addr), 916 psi.p.len); 917 break; 918 case IMSG_RECONF_AS_SET: 919 if (imsg.hdr.len - IMSG_HEADER_SIZE != 920 sizeof(nmemb) + SET_NAME_LEN) 921 fatalx("IMSG_RECONF_AS_SET bad len"); 922 memcpy(&nmemb, imsg.data, sizeof(nmemb)); 923 name = (char *)imsg.data + sizeof(nmemb); 924 if (as_sets_lookup(&nconf->as_sets, name) != NULL) 925 fatalx("duplicate as-set %s", name); 926 last_as_set = as_sets_new(&nconf->as_sets, name, nmemb, 927 sizeof(u_int32_t)); 928 break; 929 case IMSG_RECONF_AS_SET_ITEMS: 930 nmemb = imsg.hdr.len - IMSG_HEADER_SIZE; 931 nmemb /= sizeof(u_int32_t); 932 if (set_add(last_as_set->set, imsg.data, nmemb) != 0) 933 fatal(NULL); 934 break; 935 case IMSG_RECONF_AS_SET_DONE: 936 set_prep(last_as_set->set); 937 last_as_set = NULL; 938 break; 939 case IMSG_RECONF_VPN: 940 if (imsg.hdr.len - IMSG_HEADER_SIZE != 941 sizeof(struct l3vpn)) 942 fatalx("IMSG_RECONF_VPN bad len"); 943 if ((vpn = malloc(sizeof(struct l3vpn))) == NULL) 944 fatal(NULL); 945 memcpy(vpn, imsg.data, sizeof(struct l3vpn)); 946 TAILQ_INIT(&vpn->import); 947 TAILQ_INIT(&vpn->export); 948 TAILQ_INIT(&vpn->net_l); 949 SIMPLEQ_INSERT_TAIL(&nconf->l3vpns, vpn, entry); 950 break; 951 case IMSG_RECONF_VPN_EXPORT: 952 if (vpn == NULL) { 953 log_warnx("rde_dispatch_imsg_parent: " 954 "IMSG_RECONF_VPN_EXPORT unexpected"); 955 break; 956 } 957 TAILQ_CONCAT(&vpn->export, &parent_set, entry); 958 break; 959 case IMSG_RECONF_VPN_IMPORT: 960 if (vpn == NULL) { 961 log_warnx("rde_dispatch_imsg_parent: " 962 "IMSG_RECONF_VPN_IMPORT unexpected"); 963 break; 964 } 965 TAILQ_CONCAT(&vpn->import, &parent_set, entry); 966 break; 967 case IMSG_RECONF_VPN_DONE: 968 break; 969 case IMSG_RECONF_DRAIN: 970 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 971 -1, NULL, 0); 972 break; 973 case IMSG_RECONF_DONE: 974 if (nconf == NULL) 975 fatalx("got IMSG_RECONF_DONE but no config"); 976 last_prefixset = NULL; 977 978 rde_reload_done(); 979 break; 980 case IMSG_NEXTHOP_UPDATE: 981 nexthop_update(imsg.data); 982 break; 983 case IMSG_FILTER_SET: 984 if (imsg.hdr.len > IMSG_HEADER_SIZE + 985 sizeof(struct filter_set)) 986 fatalx("IMSG_FILTER_SET bad len"); 987 if ((s = malloc(sizeof(struct filter_set))) == NULL) 988 fatal(NULL); 989 memcpy(s, imsg.data, sizeof(struct filter_set)); 990 if (s->type == ACTION_SET_NEXTHOP) { 991 s->action.nh_ref = 992 nexthop_get(&s->action.nexthop); 993 s->type = ACTION_SET_NEXTHOP_REF; 994 } 995 TAILQ_INSERT_TAIL(&parent_set, s, entry); 996 break; 997 case IMSG_MRT_OPEN: 998 case IMSG_MRT_REOPEN: 999 if (imsg.hdr.len > IMSG_HEADER_SIZE + 1000 sizeof(struct mrt)) { 1001 log_warnx("wrong imsg len"); 1002 break; 1003 } 1004 memcpy(&xmrt, imsg.data, sizeof(xmrt)); 1005 if ((fd = imsg.fd) == -1) 1006 log_warnx("expected to receive fd for mrt dump " 1007 "but didn't receive any"); 1008 else if (xmrt.type == MRT_TABLE_DUMP || 1009 xmrt.type == MRT_TABLE_DUMP_MP || 1010 xmrt.type == MRT_TABLE_DUMP_V2) { 1011 rde_dump_mrt_new(&xmrt, imsg.hdr.pid, fd); 1012 } else 1013 close(fd); 1014 break; 1015 case IMSG_MRT_CLOSE: 1016 /* ignore end message because a dump is atomic */ 1017 break; 1018 default: 1019 fatalx("unhandled IMSG %u", imsg.hdr.type); 1020 } 1021 imsg_free(&imsg); 1022 } 1023 } 1024 1025 void 1026 rde_dispatch_imsg_rtr(struct imsgbuf *ibuf) 1027 { 1028 struct imsg imsg; 1029 struct roa roa; 1030 int n; 1031 1032 while (ibuf) { 1033 if ((n = imsg_get(ibuf, &imsg)) == -1) 1034 fatal("rde_dispatch_imsg_parent: imsg_get error"); 1035 if (n == 0) 1036 break; 1037 1038 switch (imsg.hdr.type) { 1039 case IMSG_RECONF_ROA_SET: 1040 /* start of update */ 1041 break; 1042 case IMSG_RECONF_ROA_ITEM: 1043 if (imsg.hdr.len - IMSG_HEADER_SIZE != 1044 sizeof(roa)) 1045 fatalx("IMSG_RECONF_ROA_ITEM bad len"); 1046 memcpy(&roa, imsg.data, sizeof(roa)); 1047 if (trie_roa_add(&roa_new.th, &roa) != 0) { 1048 struct bgpd_addr p = { 1049 .aid = roa.aid, 1050 .v6 = roa.prefix.inet6 1051 }; 1052 log_warnx("trie_roa_add %s/%u failed", 1053 log_addr(&p), roa.prefixlen); 1054 } 1055 break; 1056 case IMSG_RECONF_DONE: 1057 /* end of update */ 1058 rde_roa_reload(); 1059 break; 1060 } 1061 imsg_free(&imsg); 1062 } 1063 } 1064 1065 void 1066 rde_dispatch_imsg_peer(struct rde_peer *peer, void *bula) 1067 { 1068 struct session_up sup; 1069 struct imsg imsg; 1070 u_int8_t aid; 1071 1072 if (!peer_imsg_pop(peer, &imsg)) 1073 return; 1074 1075 switch (imsg.hdr.type) { 1076 case IMSG_UPDATE: 1077 rde_update_dispatch(peer, &imsg); 1078 break; 1079 case IMSG_SESSION_UP: 1080 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup)) 1081 fatalx("incorrect size of session request"); 1082 memcpy(&sup, imsg.data, sizeof(sup)); 1083 if (peer_up(peer, &sup) == -1) { 1084 peer->state = PEER_DOWN; 1085 imsg_compose(ibuf_se, IMSG_SESSION_DOWN, peer->conf.id, 1086 0, -1, NULL, 0); 1087 } 1088 break; 1089 case IMSG_SESSION_DOWN: 1090 peer_down(peer, NULL); 1091 break; 1092 case IMSG_SESSION_STALE: 1093 case IMSG_SESSION_FLUSH: 1094 case IMSG_SESSION_RESTARTED: 1095 case IMSG_REFRESH: 1096 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 1097 log_warnx("%s: wrong imsg len", __func__); 1098 break; 1099 } 1100 memcpy(&aid, imsg.data, sizeof(aid)); 1101 if (aid >= AID_MAX) { 1102 log_warnx("%s: bad AID", __func__); 1103 break; 1104 } 1105 1106 switch (imsg.hdr.type) { 1107 case IMSG_SESSION_STALE: 1108 peer_stale(peer, aid); 1109 break; 1110 case IMSG_SESSION_FLUSH: 1111 peer_flush(peer, aid, peer->staletime[aid]); 1112 break; 1113 case IMSG_SESSION_RESTARTED: 1114 if (peer->staletime[aid]) 1115 peer_flush(peer, aid, peer->staletime[aid]); 1116 break; 1117 case IMSG_REFRESH: 1118 peer_dump(peer, aid); 1119 break; 1120 } 1121 break; 1122 default: 1123 log_warnx("%s: unhandled imsg type %d", __func__, 1124 imsg.hdr.type); 1125 break; 1126 } 1127 1128 imsg_free(&imsg); 1129 } 1130 1131 /* handle routing updates from the session engine. */ 1132 void 1133 rde_update_dispatch(struct rde_peer *peer, struct imsg *imsg) 1134 { 1135 struct filterstate state; 1136 struct bgpd_addr prefix; 1137 struct mpattr mpa; 1138 u_char *p, *mpp = NULL; 1139 int pos = 0; 1140 u_int16_t afi, len, mplen; 1141 u_int16_t withdrawn_len; 1142 u_int16_t attrpath_len; 1143 u_int16_t nlri_len; 1144 u_int8_t aid, prefixlen, safi, subtype; 1145 u_int32_t fas; 1146 1147 p = imsg->data; 1148 1149 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2) { 1150 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 1151 return; 1152 } 1153 1154 memcpy(&len, p, 2); 1155 withdrawn_len = ntohs(len); 1156 p += 2; 1157 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2 + withdrawn_len + 2) { 1158 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 1159 return; 1160 } 1161 1162 p += withdrawn_len; 1163 memcpy(&len, p, 2); 1164 attrpath_len = len = ntohs(len); 1165 p += 2; 1166 if (imsg->hdr.len < 1167 IMSG_HEADER_SIZE + 2 + withdrawn_len + 2 + attrpath_len) { 1168 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 1169 return; 1170 } 1171 1172 nlri_len = 1173 imsg->hdr.len - IMSG_HEADER_SIZE - 4 - withdrawn_len - attrpath_len; 1174 1175 if (attrpath_len == 0) { 1176 /* 0 = no NLRI information in this message */ 1177 if (nlri_len != 0) { 1178 /* crap at end of update which should not be there */ 1179 rde_update_err(peer, ERR_UPDATE, 1180 ERR_UPD_ATTRLIST, NULL, 0); 1181 return; 1182 } 1183 if (withdrawn_len == 0) { 1184 /* EoR marker */ 1185 rde_peer_recv_eor(peer, AID_INET); 1186 return; 1187 } 1188 } 1189 1190 bzero(&mpa, sizeof(mpa)); 1191 rde_filterstate_prep(&state, NULL, NULL, NULL, 0); 1192 if (attrpath_len != 0) { /* 0 = no NLRI information in this message */ 1193 /* parse path attributes */ 1194 while (len > 0) { 1195 if ((pos = rde_attr_parse(p, len, peer, &state, 1196 &mpa)) < 0) 1197 goto done; 1198 p += pos; 1199 len -= pos; 1200 } 1201 1202 /* check for missing but necessary attributes */ 1203 if ((subtype = rde_attr_missing(&state.aspath, peer->conf.ebgp, 1204 nlri_len))) { 1205 rde_update_err(peer, ERR_UPDATE, ERR_UPD_MISSNG_WK_ATTR, 1206 &subtype, sizeof(u_int8_t)); 1207 goto done; 1208 } 1209 1210 rde_as4byte_fixup(peer, &state.aspath); 1211 1212 /* enforce remote AS if requested */ 1213 if (state.aspath.flags & F_ATTR_ASPATH && 1214 peer->conf.enforce_as == ENFORCE_AS_ON) { 1215 fas = aspath_neighbor(state.aspath.aspath); 1216 if (peer->conf.remote_as != fas) { 1217 log_peer_warnx(&peer->conf, "bad path, " 1218 "starting with %s, " 1219 "enforce neighbor-as enabled", log_as(fas)); 1220 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1221 NULL, 0); 1222 goto done; 1223 } 1224 } 1225 1226 /* aspath needs to be loop free. This is not a hard error. */ 1227 if (state.aspath.flags & F_ATTR_ASPATH && 1228 peer->conf.ebgp && 1229 peer->conf.enforce_local_as == ENFORCE_AS_ON && 1230 !aspath_loopfree(state.aspath.aspath, peer->conf.local_as)) 1231 state.aspath.flags |= F_ATTR_LOOP; 1232 1233 rde_reflector(peer, &state.aspath); 1234 } 1235 1236 p = imsg->data; 1237 len = withdrawn_len; 1238 p += 2; 1239 1240 /* withdraw prefix */ 1241 while (len > 0) { 1242 if ((pos = nlri_get_prefix(p, len, &prefix, 1243 &prefixlen)) == -1) { 1244 /* 1245 * the RFC does not mention what we should do in 1246 * this case. Let's do the same as in the NLRI case. 1247 */ 1248 log_peer_warnx(&peer->conf, "bad withdraw prefix"); 1249 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1250 NULL, 0); 1251 goto done; 1252 } 1253 p += pos; 1254 len -= pos; 1255 1256 if (peer->capa.mp[AID_INET] == 0) { 1257 log_peer_warnx(&peer->conf, 1258 "bad withdraw, %s disabled", aid2str(AID_INET)); 1259 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1260 NULL, 0); 1261 goto done; 1262 } 1263 1264 rde_update_withdraw(peer, &prefix, prefixlen); 1265 } 1266 1267 /* withdraw MP_UNREACH_NLRI if available */ 1268 if (mpa.unreach_len != 0) { 1269 mpp = mpa.unreach; 1270 mplen = mpa.unreach_len; 1271 memcpy(&afi, mpp, 2); 1272 mpp += 2; 1273 mplen -= 2; 1274 afi = ntohs(afi); 1275 safi = *mpp++; 1276 mplen--; 1277 1278 if (afi2aid(afi, safi, &aid) == -1) { 1279 log_peer_warnx(&peer->conf, 1280 "bad AFI/SAFI pair in withdraw"); 1281 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1282 NULL, 0); 1283 goto done; 1284 } 1285 1286 if (peer->capa.mp[aid] == 0) { 1287 log_peer_warnx(&peer->conf, 1288 "bad withdraw, %s disabled", aid2str(aid)); 1289 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1290 NULL, 0); 1291 goto done; 1292 } 1293 1294 if ((state.aspath.flags & ~F_ATTR_MP_UNREACH) == 0 && 1295 mplen == 0) { 1296 /* EoR marker */ 1297 rde_peer_recv_eor(peer, aid); 1298 } 1299 1300 switch (aid) { 1301 case AID_INET6: 1302 while (mplen > 0) { 1303 if ((pos = nlri_get_prefix6(mpp, mplen, 1304 &prefix, &prefixlen)) == -1) { 1305 log_peer_warnx(&peer->conf, 1306 "bad IPv6 withdraw prefix"); 1307 rde_update_err(peer, ERR_UPDATE, 1308 ERR_UPD_OPTATTR, 1309 mpa.unreach, mpa.unreach_len); 1310 goto done; 1311 } 1312 mpp += pos; 1313 mplen -= pos; 1314 1315 rde_update_withdraw(peer, &prefix, prefixlen); 1316 } 1317 break; 1318 case AID_VPN_IPv4: 1319 while (mplen > 0) { 1320 if ((pos = nlri_get_vpn4(mpp, mplen, 1321 &prefix, &prefixlen, 1)) == -1) { 1322 log_peer_warnx(&peer->conf, 1323 "bad VPNv4 withdraw prefix"); 1324 rde_update_err(peer, ERR_UPDATE, 1325 ERR_UPD_OPTATTR, 1326 mpa.unreach, mpa.unreach_len); 1327 goto done; 1328 } 1329 mpp += pos; 1330 mplen -= pos; 1331 1332 rde_update_withdraw(peer, &prefix, prefixlen); 1333 } 1334 break; 1335 case AID_VPN_IPv6: 1336 while (mplen > 0) { 1337 if ((pos = nlri_get_vpn6(mpp, mplen, 1338 &prefix, &prefixlen, 1)) == -1) { 1339 log_peer_warnx(&peer->conf, 1340 "bad VPNv6 withdraw prefix"); 1341 rde_update_err(peer, ERR_UPDATE, 1342 ERR_UPD_OPTATTR, mpa.unreach, 1343 mpa.unreach_len); 1344 goto done; 1345 } 1346 mpp += pos; 1347 mplen -= pos; 1348 1349 rde_update_withdraw(peer, &prefix, prefixlen); 1350 } 1351 break; 1352 default: 1353 /* silently ignore unsupported multiprotocol AF */ 1354 break; 1355 } 1356 1357 if ((state.aspath.flags & ~F_ATTR_MP_UNREACH) == 0) 1358 goto done; 1359 } 1360 1361 /* shift to NLRI information */ 1362 p += 2 + attrpath_len; 1363 1364 /* parse nlri prefix */ 1365 while (nlri_len > 0) { 1366 if ((pos = nlri_get_prefix(p, nlri_len, &prefix, 1367 &prefixlen)) == -1) { 1368 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1369 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1370 NULL, 0); 1371 goto done; 1372 } 1373 p += pos; 1374 nlri_len -= pos; 1375 1376 if (peer->capa.mp[AID_INET] == 0) { 1377 log_peer_warnx(&peer->conf, 1378 "bad update, %s disabled", aid2str(AID_INET)); 1379 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1380 NULL, 0); 1381 goto done; 1382 } 1383 1384 if (rde_update_update(peer, &state, &prefix, prefixlen) == -1) 1385 goto done; 1386 1387 } 1388 1389 /* add MP_REACH_NLRI if available */ 1390 if (mpa.reach_len != 0) { 1391 mpp = mpa.reach; 1392 mplen = mpa.reach_len; 1393 memcpy(&afi, mpp, 2); 1394 mpp += 2; 1395 mplen -= 2; 1396 afi = ntohs(afi); 1397 safi = *mpp++; 1398 mplen--; 1399 1400 if (afi2aid(afi, safi, &aid) == -1) { 1401 log_peer_warnx(&peer->conf, 1402 "bad AFI/SAFI pair in update"); 1403 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1404 NULL, 0); 1405 goto done; 1406 } 1407 1408 if (peer->capa.mp[aid] == 0) { 1409 log_peer_warnx(&peer->conf, 1410 "bad update, %s disabled", aid2str(aid)); 1411 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1412 NULL, 0); 1413 goto done; 1414 } 1415 1416 /* unlock the previously locked nexthop, it is no longer used */ 1417 nexthop_unref(state.nexthop); 1418 state.nexthop = NULL; 1419 if ((pos = rde_get_mp_nexthop(mpp, mplen, aid, &state)) == -1) { 1420 log_peer_warnx(&peer->conf, "bad nlri nexthop"); 1421 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1422 mpa.reach, mpa.reach_len); 1423 goto done; 1424 } 1425 mpp += pos; 1426 mplen -= pos; 1427 1428 switch (aid) { 1429 case AID_INET6: 1430 while (mplen > 0) { 1431 if ((pos = nlri_get_prefix6(mpp, mplen, 1432 &prefix, &prefixlen)) == -1) { 1433 log_peer_warnx(&peer->conf, 1434 "bad IPv6 nlri prefix"); 1435 rde_update_err(peer, ERR_UPDATE, 1436 ERR_UPD_OPTATTR, 1437 mpa.reach, mpa.reach_len); 1438 goto done; 1439 } 1440 mpp += pos; 1441 mplen -= pos; 1442 1443 if (rde_update_update(peer, &state, &prefix, 1444 prefixlen) == -1) 1445 goto done; 1446 } 1447 break; 1448 case AID_VPN_IPv4: 1449 while (mplen > 0) { 1450 if ((pos = nlri_get_vpn4(mpp, mplen, 1451 &prefix, &prefixlen, 0)) == -1) { 1452 log_peer_warnx(&peer->conf, 1453 "bad VPNv4 nlri prefix"); 1454 rde_update_err(peer, ERR_UPDATE, 1455 ERR_UPD_OPTATTR, 1456 mpa.reach, mpa.reach_len); 1457 goto done; 1458 } 1459 mpp += pos; 1460 mplen -= pos; 1461 1462 if (rde_update_update(peer, &state, &prefix, 1463 prefixlen) == -1) 1464 goto done; 1465 } 1466 break; 1467 case AID_VPN_IPv6: 1468 while (mplen > 0) { 1469 if ((pos = nlri_get_vpn6(mpp, mplen, 1470 &prefix, &prefixlen, 0)) == -1) { 1471 log_peer_warnx(&peer->conf, 1472 "bad VPNv6 nlri prefix"); 1473 rde_update_err(peer, ERR_UPDATE, 1474 ERR_UPD_OPTATTR, 1475 mpa.reach, mpa.reach_len); 1476 goto done; 1477 } 1478 mpp += pos; 1479 mplen -= pos; 1480 1481 if (rde_update_update(peer, &state, &prefix, 1482 prefixlen) == -1) 1483 goto done; 1484 } 1485 break; 1486 default: 1487 /* silently ignore unsupported multiprotocol AF */ 1488 break; 1489 } 1490 } 1491 1492 done: 1493 rde_filterstate_clean(&state); 1494 } 1495 1496 int 1497 rde_update_update(struct rde_peer *peer, struct filterstate *in, 1498 struct bgpd_addr *prefix, u_int8_t prefixlen) 1499 { 1500 struct filterstate state; 1501 enum filter_actions action; 1502 u_int8_t vstate; 1503 u_int16_t i; 1504 const char *wmsg = "filtered, withdraw"; 1505 1506 peer->prefix_rcvd_update++; 1507 vstate = rde_roa_validity(&rde_roa, prefix, prefixlen, 1508 aspath_origin(in->aspath.aspath)); 1509 1510 /* add original path to the Adj-RIB-In */ 1511 if (prefix_update(rib_byid(RIB_ADJ_IN), peer, in, prefix, prefixlen, 1512 vstate) == 1) 1513 peer->prefix_cnt++; 1514 1515 /* max prefix checker */ 1516 if (peer->conf.max_prefix && peer->prefix_cnt > peer->conf.max_prefix) { 1517 log_peer_warnx(&peer->conf, "prefix limit reached (>%u/%u)", 1518 peer->prefix_cnt, peer->conf.max_prefix); 1519 rde_update_err(peer, ERR_CEASE, ERR_CEASE_MAX_PREFIX, NULL, 0); 1520 return (-1); 1521 } 1522 1523 if (in->aspath.flags & F_ATTR_PARSE_ERR) 1524 wmsg = "path invalid, withdraw"; 1525 1526 for (i = RIB_LOC_START; i < rib_size; i++) { 1527 struct rib *rib = rib_byid(i); 1528 if (rib == NULL) 1529 continue; 1530 rde_filterstate_prep(&state, &in->aspath, &in->communities, 1531 in->nexthop, in->nhflags); 1532 /* input filter */ 1533 action = rde_filter(rib->in_rules, peer, peer, prefix, 1534 prefixlen, vstate, &state); 1535 1536 if (action == ACTION_ALLOW) { 1537 rde_update_log("update", i, peer, 1538 &state.nexthop->exit_nexthop, prefix, 1539 prefixlen); 1540 prefix_update(rib, peer, &state, prefix, 1541 prefixlen, vstate); 1542 } else if (prefix_withdraw(rib, peer, prefix, 1543 prefixlen)) { 1544 rde_update_log(wmsg, i, peer, 1545 NULL, prefix, prefixlen); 1546 } 1547 1548 /* clear state */ 1549 rde_filterstate_clean(&state); 1550 } 1551 return (0); 1552 } 1553 1554 void 1555 rde_update_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, 1556 u_int8_t prefixlen) 1557 { 1558 u_int16_t i; 1559 1560 for (i = RIB_LOC_START; i < rib_size; i++) { 1561 struct rib *rib = rib_byid(i); 1562 if (rib == NULL) 1563 continue; 1564 if (prefix_withdraw(rib, peer, prefix, prefixlen)) 1565 rde_update_log("withdraw", i, peer, NULL, prefix, 1566 prefixlen); 1567 } 1568 1569 /* remove original path form the Adj-RIB-In */ 1570 if (prefix_withdraw(rib_byid(RIB_ADJ_IN), peer, prefix, prefixlen)) 1571 peer->prefix_cnt--; 1572 1573 peer->prefix_rcvd_withdraw++; 1574 } 1575 1576 /* 1577 * BGP UPDATE parser functions 1578 */ 1579 1580 /* attribute parser specific makros */ 1581 #define UPD_READ(t, p, plen, n) \ 1582 do { \ 1583 memcpy(t, p, n); \ 1584 p += n; \ 1585 plen += n; \ 1586 } while (0) 1587 1588 #define CHECK_FLAGS(s, t, m) \ 1589 (((s) & ~(ATTR_DEFMASK | (m))) == (t)) 1590 1591 int 1592 rde_attr_parse(u_char *p, u_int16_t len, struct rde_peer *peer, 1593 struct filterstate *state, struct mpattr *mpa) 1594 { 1595 struct bgpd_addr nexthop; 1596 struct rde_aspath *a = &state->aspath; 1597 u_char *op = p, *npath; 1598 u_int32_t tmp32, zero = 0; 1599 int error; 1600 u_int16_t attr_len, nlen; 1601 u_int16_t plen = 0; 1602 u_int8_t flags; 1603 u_int8_t type; 1604 u_int8_t tmp8; 1605 1606 if (len < 3) { 1607 bad_len: 1608 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLEN, op, len); 1609 return (-1); 1610 } 1611 1612 UPD_READ(&flags, p, plen, 1); 1613 UPD_READ(&type, p, plen, 1); 1614 1615 if (flags & ATTR_EXTLEN) { 1616 if (len - plen < 2) 1617 goto bad_len; 1618 UPD_READ(&attr_len, p, plen, 2); 1619 attr_len = ntohs(attr_len); 1620 } else { 1621 UPD_READ(&tmp8, p, plen, 1); 1622 attr_len = tmp8; 1623 } 1624 1625 if (len - plen < attr_len) 1626 goto bad_len; 1627 1628 /* adjust len to the actual attribute size including header */ 1629 len = plen + attr_len; 1630 1631 switch (type) { 1632 case ATTR_UNDEF: 1633 /* ignore and drop path attributes with a type code of 0 */ 1634 plen += attr_len; 1635 break; 1636 case ATTR_ORIGIN: 1637 if (attr_len != 1) 1638 goto bad_len; 1639 1640 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) { 1641 bad_flags: 1642 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRFLAGS, 1643 op, len); 1644 return (-1); 1645 } 1646 1647 UPD_READ(&a->origin, p, plen, 1); 1648 if (a->origin > ORIGIN_INCOMPLETE) { 1649 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ORIGIN, 1650 op, len); 1651 return (-1); 1652 } 1653 if (a->flags & F_ATTR_ORIGIN) 1654 goto bad_list; 1655 a->flags |= F_ATTR_ORIGIN; 1656 break; 1657 case ATTR_ASPATH: 1658 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1659 goto bad_flags; 1660 error = aspath_verify(p, attr_len, rde_as4byte(peer), 1661 rde_no_as_set(peer)); 1662 if (error == AS_ERR_SOFT) { 1663 /* 1664 * soft errors like unexpected segment types are 1665 * not considered fatal and the path is just 1666 * marked invalid. 1667 */ 1668 a->flags |= F_ATTR_PARSE_ERR; 1669 } else if (error != 0) { 1670 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1671 NULL, 0); 1672 return (-1); 1673 } 1674 if (a->flags & F_ATTR_ASPATH) 1675 goto bad_list; 1676 if (rde_as4byte(peer)) { 1677 npath = p; 1678 nlen = attr_len; 1679 } else { 1680 npath = aspath_inflate(p, attr_len, &nlen); 1681 if (npath == NULL) 1682 fatal("aspath_inflate"); 1683 } 1684 if (error == AS_ERR_SOFT) { 1685 char *str; 1686 1687 aspath_asprint(&str, npath, nlen); 1688 log_peer_warnx(&peer->conf, "bad ASPATH %s, " 1689 "path invalidated and prefix withdrawn", 1690 str ? str : "(bad aspath)"); 1691 free(str); 1692 } 1693 a->flags |= F_ATTR_ASPATH; 1694 a->aspath = aspath_get(npath, nlen); 1695 if (npath != p) 1696 free(npath); 1697 plen += attr_len; 1698 break; 1699 case ATTR_NEXTHOP: 1700 if (attr_len != 4) 1701 goto bad_len; 1702 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1703 goto bad_flags; 1704 if (a->flags & F_ATTR_NEXTHOP) 1705 goto bad_list; 1706 a->flags |= F_ATTR_NEXTHOP; 1707 1708 bzero(&nexthop, sizeof(nexthop)); 1709 nexthop.aid = AID_INET; 1710 UPD_READ(&nexthop.v4.s_addr, p, plen, 4); 1711 /* 1712 * Check if the nexthop is a valid IP address. We consider 1713 * multicast and experimental addresses as invalid. 1714 */ 1715 tmp32 = ntohl(nexthop.v4.s_addr); 1716 if (IN_MULTICAST(tmp32) || IN_BADCLASS(tmp32)) { 1717 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NEXTHOP, 1718 op, len); 1719 return (-1); 1720 } 1721 nexthop_unref(state->nexthop); /* just to be sure */ 1722 state->nexthop = nexthop_get(&nexthop); 1723 break; 1724 case ATTR_MED: 1725 if (attr_len != 4) 1726 goto bad_len; 1727 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1728 goto bad_flags; 1729 if (a->flags & F_ATTR_MED) 1730 goto bad_list; 1731 a->flags |= F_ATTR_MED; 1732 1733 UPD_READ(&tmp32, p, plen, 4); 1734 a->med = ntohl(tmp32); 1735 break; 1736 case ATTR_LOCALPREF: 1737 if (attr_len != 4) 1738 goto bad_len; 1739 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1740 goto bad_flags; 1741 if (peer->conf.ebgp) { 1742 /* ignore local-pref attr on non ibgp peers */ 1743 plen += 4; 1744 break; 1745 } 1746 if (a->flags & F_ATTR_LOCALPREF) 1747 goto bad_list; 1748 a->flags |= F_ATTR_LOCALPREF; 1749 1750 UPD_READ(&tmp32, p, plen, 4); 1751 a->lpref = ntohl(tmp32); 1752 break; 1753 case ATTR_ATOMIC_AGGREGATE: 1754 if (attr_len != 0) 1755 goto bad_len; 1756 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1757 goto bad_flags; 1758 goto optattr; 1759 case ATTR_AGGREGATOR: 1760 if ((!rde_as4byte(peer) && attr_len != 6) || 1761 (rde_as4byte(peer) && attr_len != 8)) { 1762 /* 1763 * ignore attribute in case of error as per 1764 * RFC 7606 1765 */ 1766 log_peer_warnx(&peer->conf, "bad AGGREGATOR, " 1767 "partial attribute ignored"); 1768 plen += attr_len; 1769 break; 1770 } 1771 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1772 ATTR_PARTIAL)) 1773 goto bad_flags; 1774 if (!rde_as4byte(peer)) { 1775 /* need to inflate aggregator AS to 4-byte */ 1776 u_char t[8]; 1777 t[0] = t[1] = 0; 1778 UPD_READ(&t[2], p, plen, 2); 1779 UPD_READ(&t[4], p, plen, 4); 1780 if (memcmp(t, &zero, sizeof(u_int32_t)) == 0) { 1781 /* As per RFC7606 use "attribute discard". */ 1782 log_peer_warnx(&peer->conf, "bad AGGREGATOR, " 1783 "AS 0 not allowed, attribute discarded"); 1784 break; 1785 } 1786 if (attr_optadd(a, flags, type, t, 1787 sizeof(t)) == -1) 1788 goto bad_list; 1789 break; 1790 } 1791 /* 4-byte ready server take the default route */ 1792 if (memcmp(p, &zero, sizeof(u_int32_t)) == 0) { 1793 /* As per RFC7606 use "attribute discard" here. */ 1794 char *pfmt = log_fmt_peer(&peer->conf); 1795 log_debug("%s: bad AGGREGATOR, " 1796 "AS 0 not allowed, attribute discarded", pfmt); 1797 free(pfmt); 1798 plen += attr_len; 1799 break; 1800 } 1801 goto optattr; 1802 case ATTR_COMMUNITIES: 1803 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1804 ATTR_PARTIAL)) 1805 goto bad_flags; 1806 if (community_add(&state->communities, flags, p, 1807 attr_len) == -1) { 1808 /* 1809 * mark update as bad and withdraw all routes as per 1810 * RFC 7606 1811 */ 1812 a->flags |= F_ATTR_PARSE_ERR; 1813 log_peer_warnx(&peer->conf, "bad COMMUNITIES, " 1814 "path invalidated and prefix withdrawn"); 1815 break; 1816 } 1817 plen += attr_len; 1818 break; 1819 case ATTR_LARGE_COMMUNITIES: 1820 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1821 ATTR_PARTIAL)) 1822 goto bad_flags; 1823 if (community_large_add(&state->communities, flags, p, 1824 attr_len) == -1) { 1825 /* 1826 * mark update as bad and withdraw all routes as per 1827 * RFC 7606 1828 */ 1829 a->flags |= F_ATTR_PARSE_ERR; 1830 log_peer_warnx(&peer->conf, "bad LARGE COMMUNITIES, " 1831 "path invalidated and prefix withdrawn"); 1832 break; 1833 } 1834 plen += attr_len; 1835 break; 1836 case ATTR_EXT_COMMUNITIES: 1837 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1838 ATTR_PARTIAL)) 1839 goto bad_flags; 1840 if (community_ext_add(&state->communities, flags, p, 1841 attr_len) == -1) { 1842 /* 1843 * mark update as bad and withdraw all routes as per 1844 * RFC 7606 1845 */ 1846 a->flags |= F_ATTR_PARSE_ERR; 1847 log_peer_warnx(&peer->conf, "bad EXT_COMMUNITIES, " 1848 "path invalidated and prefix withdrawn"); 1849 break; 1850 } 1851 plen += attr_len; 1852 break; 1853 case ATTR_ORIGINATOR_ID: 1854 if (attr_len != 4) 1855 goto bad_len; 1856 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1857 goto bad_flags; 1858 goto optattr; 1859 case ATTR_CLUSTER_LIST: 1860 if (attr_len % 4 != 0) 1861 goto bad_len; 1862 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1863 goto bad_flags; 1864 goto optattr; 1865 case ATTR_MP_REACH_NLRI: 1866 if (attr_len < 4) 1867 goto bad_len; 1868 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1869 goto bad_flags; 1870 /* the validity is checked in rde_update_dispatch() */ 1871 if (a->flags & F_ATTR_MP_REACH) 1872 goto bad_list; 1873 a->flags |= F_ATTR_MP_REACH; 1874 1875 mpa->reach = p; 1876 mpa->reach_len = attr_len; 1877 plen += attr_len; 1878 break; 1879 case ATTR_MP_UNREACH_NLRI: 1880 if (attr_len < 3) 1881 goto bad_len; 1882 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1883 goto bad_flags; 1884 /* the validity is checked in rde_update_dispatch() */ 1885 if (a->flags & F_ATTR_MP_UNREACH) 1886 goto bad_list; 1887 a->flags |= F_ATTR_MP_UNREACH; 1888 1889 mpa->unreach = p; 1890 mpa->unreach_len = attr_len; 1891 plen += attr_len; 1892 break; 1893 case ATTR_AS4_AGGREGATOR: 1894 if (attr_len != 8) { 1895 /* see ATTR_AGGREGATOR ... */ 1896 if ((flags & ATTR_PARTIAL) == 0) 1897 goto bad_len; 1898 log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, " 1899 "partial attribute ignored"); 1900 plen += attr_len; 1901 break; 1902 } 1903 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1904 ATTR_PARTIAL)) 1905 goto bad_flags; 1906 if (memcmp(p, &zero, sizeof(u_int32_t)) == 0) { 1907 /* As per RFC6793 use "attribute discard" here. */ 1908 log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, " 1909 "AS 0 not allowed, attribute discarded"); 1910 plen += attr_len; 1911 break; 1912 } 1913 a->flags |= F_ATTR_AS4BYTE_NEW; 1914 goto optattr; 1915 case ATTR_AS4_PATH: 1916 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1917 ATTR_PARTIAL)) 1918 goto bad_flags; 1919 if ((error = aspath_verify(p, attr_len, 1, 1920 rde_no_as_set(peer))) != 0) { 1921 /* 1922 * XXX RFC does not specify how to handle errors. 1923 * XXX Instead of dropping the session because of a 1924 * XXX bad path just mark the full update as having 1925 * XXX a parse error which makes the update no longer 1926 * XXX eligible and will not be considered for routing 1927 * XXX or redistribution. 1928 * XXX We follow draft-ietf-idr-optional-transitive 1929 * XXX by looking at the partial bit. 1930 * XXX Consider soft errors similar to a partial attr. 1931 */ 1932 if (flags & ATTR_PARTIAL || error == AS_ERR_SOFT) { 1933 a->flags |= F_ATTR_PARSE_ERR; 1934 log_peer_warnx(&peer->conf, "bad AS4_PATH, " 1935 "path invalidated and prefix withdrawn"); 1936 goto optattr; 1937 } else { 1938 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1939 NULL, 0); 1940 return (-1); 1941 } 1942 } 1943 a->flags |= F_ATTR_AS4BYTE_NEW; 1944 goto optattr; 1945 default: 1946 if ((flags & ATTR_OPTIONAL) == 0) { 1947 rde_update_err(peer, ERR_UPDATE, ERR_UPD_UNKNWN_WK_ATTR, 1948 op, len); 1949 return (-1); 1950 } 1951 optattr: 1952 if (attr_optadd(a, flags, type, p, attr_len) == -1) { 1953 bad_list: 1954 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, 1955 NULL, 0); 1956 return (-1); 1957 } 1958 1959 plen += attr_len; 1960 break; 1961 } 1962 1963 return (plen); 1964 } 1965 1966 int 1967 rde_attr_add(struct filterstate *state, u_char *p, u_int16_t len) 1968 { 1969 u_int16_t attr_len; 1970 u_int16_t plen = 0; 1971 u_int8_t flags; 1972 u_int8_t type; 1973 u_int8_t tmp8; 1974 1975 if (len < 3) 1976 return (-1); 1977 1978 UPD_READ(&flags, p, plen, 1); 1979 UPD_READ(&type, p, plen, 1); 1980 1981 if (flags & ATTR_EXTLEN) { 1982 if (len - plen < 2) 1983 return (-1); 1984 UPD_READ(&attr_len, p, plen, 2); 1985 attr_len = ntohs(attr_len); 1986 } else { 1987 UPD_READ(&tmp8, p, plen, 1); 1988 attr_len = tmp8; 1989 } 1990 1991 if (len - plen < attr_len) 1992 return (-1); 1993 1994 switch (type) { 1995 case ATTR_COMMUNITIES: 1996 return community_add(&state->communities, flags, p, attr_len); 1997 case ATTR_LARGE_COMMUNITIES: 1998 return community_large_add(&state->communities, flags, p, 1999 attr_len); 2000 case ATTR_EXT_COMMUNITIES: 2001 return community_ext_add(&state->communities, flags, p, 2002 attr_len); 2003 } 2004 2005 if (attr_optadd(&state->aspath, flags, type, p, attr_len) == -1) 2006 return (-1); 2007 return (0); 2008 } 2009 2010 #undef UPD_READ 2011 #undef CHECK_FLAGS 2012 2013 u_int8_t 2014 rde_attr_missing(struct rde_aspath *a, int ebgp, u_int16_t nlrilen) 2015 { 2016 /* ATTR_MP_UNREACH_NLRI may be sent alone */ 2017 if (nlrilen == 0 && a->flags & F_ATTR_MP_UNREACH && 2018 (a->flags & F_ATTR_MP_REACH) == 0) 2019 return (0); 2020 2021 if ((a->flags & F_ATTR_ORIGIN) == 0) 2022 return (ATTR_ORIGIN); 2023 if ((a->flags & F_ATTR_ASPATH) == 0) 2024 return (ATTR_ASPATH); 2025 if ((a->flags & F_ATTR_MP_REACH) == 0 && 2026 (a->flags & F_ATTR_NEXTHOP) == 0) 2027 return (ATTR_NEXTHOP); 2028 if (!ebgp) 2029 if ((a->flags & F_ATTR_LOCALPREF) == 0) 2030 return (ATTR_LOCALPREF); 2031 return (0); 2032 } 2033 2034 int 2035 rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int8_t aid, 2036 struct filterstate *state) 2037 { 2038 struct bgpd_addr nexthop; 2039 u_int8_t totlen, nhlen; 2040 2041 if (len == 0) 2042 return (-1); 2043 2044 nhlen = *data++; 2045 totlen = 1; 2046 len--; 2047 2048 if (nhlen > len) 2049 return (-1); 2050 2051 bzero(&nexthop, sizeof(nexthop)); 2052 nexthop.aid = aid; 2053 switch (aid) { 2054 case AID_INET6: 2055 /* 2056 * RFC2545 describes that there may be a link-local 2057 * address carried in nexthop. Yikes! 2058 * This is not only silly, it is wrong and we just ignore 2059 * this link-local nexthop. The bgpd session doesn't run 2060 * over the link-local address so why should all other 2061 * traffic. 2062 */ 2063 if (nhlen != 16 && nhlen != 32) { 2064 log_warnx("bad multiprotocol nexthop, bad size"); 2065 return (-1); 2066 } 2067 memcpy(&nexthop.v6.s6_addr, data, 16); 2068 break; 2069 case AID_VPN_IPv6: 2070 if (nhlen != 24) { 2071 log_warnx("bad multiprotocol nexthop, bad size %d", 2072 nhlen); 2073 return (-1); 2074 } 2075 memcpy(&nexthop.v6, data + sizeof(u_int64_t), 2076 sizeof(nexthop.v6)); 2077 nexthop.aid = AID_INET6; 2078 break; 2079 case AID_VPN_IPv4: 2080 /* 2081 * Neither RFC4364 nor RFC3107 specify the format of the 2082 * nexthop in an explicit way. The quality of RFC went down 2083 * the toilet the larger the number got. 2084 * RFC4364 is very confusing about VPN-IPv4 address and the 2085 * VPN-IPv4 prefix that carries also a MPLS label. 2086 * So the nexthop is a 12-byte address with a 64bit RD and 2087 * an IPv4 address following. In the nexthop case the RD can 2088 * be ignored. 2089 * Since the nexthop has to be in the main IPv4 table just 2090 * create an AID_INET nexthop. So we don't need to handle 2091 * AID_VPN_IPv4 in nexthop and kroute. 2092 */ 2093 if (nhlen != 12) { 2094 log_warnx("bad multiprotocol nexthop, bad size"); 2095 return (-1); 2096 } 2097 nexthop.aid = AID_INET; 2098 memcpy(&nexthop.v4, data + sizeof(u_int64_t), 2099 sizeof(nexthop.v4)); 2100 break; 2101 default: 2102 log_warnx("bad multiprotocol nexthop, bad AID"); 2103 return (-1); 2104 } 2105 2106 nexthop_unref(state->nexthop); /* just to be sure */ 2107 state->nexthop = nexthop_get(&nexthop); 2108 2109 /* ignore reserved (old SNPA) field as per RFC4760 */ 2110 totlen += nhlen + 1; 2111 data += nhlen + 1; 2112 2113 return (totlen); 2114 } 2115 2116 void 2117 rde_update_err(struct rde_peer *peer, u_int8_t error, u_int8_t suberr, 2118 void *data, u_int16_t size) 2119 { 2120 struct ibuf *wbuf; 2121 2122 if ((wbuf = imsg_create(ibuf_se, IMSG_UPDATE_ERR, peer->conf.id, 0, 2123 size + sizeof(error) + sizeof(suberr))) == NULL) 2124 fatal("%s %d imsg_create error", __func__, __LINE__); 2125 if (imsg_add(wbuf, &error, sizeof(error)) == -1 || 2126 imsg_add(wbuf, &suberr, sizeof(suberr)) == -1 || 2127 imsg_add(wbuf, data, size) == -1) 2128 fatal("%s %d imsg_add error", __func__, __LINE__); 2129 imsg_close(ibuf_se, wbuf); 2130 peer->state = PEER_ERR; 2131 } 2132 2133 void 2134 rde_update_log(const char *message, u_int16_t rid, 2135 const struct rde_peer *peer, const struct bgpd_addr *next, 2136 const struct bgpd_addr *prefix, u_int8_t prefixlen) 2137 { 2138 char *l = NULL; 2139 char *n = NULL; 2140 char *p = NULL; 2141 2142 if (!((conf->log & BGPD_LOG_UPDATES) || 2143 (peer->flags & PEERFLAG_LOG_UPDATES))) 2144 return; 2145 2146 if (next != NULL) 2147 if (asprintf(&n, " via %s", log_addr(next)) == -1) 2148 n = NULL; 2149 if (asprintf(&p, "%s/%u", log_addr(prefix), prefixlen) == -1) 2150 p = NULL; 2151 l = log_fmt_peer(&peer->conf); 2152 log_info("Rib %s: %s AS%s: %s %s%s", rib_byid(rid)->name, 2153 l, log_as(peer->conf.remote_as), message, 2154 p ? p : "out of memory", n ? n : ""); 2155 2156 free(l); 2157 free(n); 2158 free(p); 2159 } 2160 2161 /* 2162 * 4-Byte ASN helper function. 2163 * Two scenarios need to be considered: 2164 * - NEW session with NEW attributes present -> just remove the attributes 2165 * - OLD session with NEW attributes present -> try to merge them 2166 */ 2167 void 2168 rde_as4byte_fixup(struct rde_peer *peer, struct rde_aspath *a) 2169 { 2170 struct attr *nasp, *naggr, *oaggr; 2171 u_int32_t as; 2172 2173 /* 2174 * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present 2175 * try to fixup the attributes. 2176 * Do not fixup if F_ATTR_PARSE_ERR is set. 2177 */ 2178 if (!(a->flags & F_ATTR_AS4BYTE_NEW) || a->flags & F_ATTR_PARSE_ERR) 2179 return; 2180 2181 /* first get the attributes */ 2182 nasp = attr_optget(a, ATTR_AS4_PATH); 2183 naggr = attr_optget(a, ATTR_AS4_AGGREGATOR); 2184 2185 if (rde_as4byte(peer)) { 2186 /* NEW session using 4-byte ASNs */ 2187 if (nasp) { 2188 log_peer_warnx(&peer->conf, "uses 4-byte ASN " 2189 "but sent AS4_PATH attribute."); 2190 attr_free(a, nasp); 2191 } 2192 if (naggr) { 2193 log_peer_warnx(&peer->conf, "uses 4-byte ASN " 2194 "but sent AS4_AGGREGATOR attribute."); 2195 attr_free(a, naggr); 2196 } 2197 return; 2198 } 2199 /* OLD session using 2-byte ASNs */ 2200 /* try to merge the new attributes into the old ones */ 2201 if ((oaggr = attr_optget(a, ATTR_AGGREGATOR))) { 2202 memcpy(&as, oaggr->data, sizeof(as)); 2203 if (ntohl(as) != AS_TRANS) { 2204 /* per RFC ignore AS4_PATH and AS4_AGGREGATOR */ 2205 if (nasp) 2206 attr_free(a, nasp); 2207 if (naggr) 2208 attr_free(a, naggr); 2209 return; 2210 } 2211 if (naggr) { 2212 /* switch over to new AGGREGATOR */ 2213 attr_free(a, oaggr); 2214 if (attr_optadd(a, ATTR_OPTIONAL | ATTR_TRANSITIVE, 2215 ATTR_AGGREGATOR, naggr->data, naggr->len)) 2216 fatalx("attr_optadd failed but impossible"); 2217 } 2218 } 2219 /* there is no need for AS4_AGGREGATOR any more */ 2220 if (naggr) 2221 attr_free(a, naggr); 2222 2223 /* merge AS4_PATH with ASPATH */ 2224 if (nasp) 2225 aspath_merge(a, nasp); 2226 } 2227 2228 2229 /* 2230 * route reflector helper function 2231 */ 2232 void 2233 rde_reflector(struct rde_peer *peer, struct rde_aspath *asp) 2234 { 2235 struct attr *a; 2236 u_int8_t *p; 2237 u_int16_t len; 2238 u_int32_t id; 2239 2240 /* do not consider updates with parse errors */ 2241 if (asp->flags & F_ATTR_PARSE_ERR) 2242 return; 2243 2244 /* check for originator id if eq router_id drop */ 2245 if ((a = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) { 2246 if (memcmp(&conf->bgpid, a->data, sizeof(conf->bgpid)) == 0) { 2247 /* this is coming from myself */ 2248 asp->flags |= F_ATTR_LOOP; 2249 return; 2250 } 2251 } else if (conf->flags & BGPD_FLAG_REFLECTOR) { 2252 if (peer->conf.ebgp) 2253 id = conf->bgpid; 2254 else 2255 id = htonl(peer->remote_bgpid); 2256 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_ORIGINATOR_ID, 2257 &id, sizeof(u_int32_t)) == -1) 2258 fatalx("attr_optadd failed but impossible"); 2259 } 2260 2261 /* check for own id in the cluster list */ 2262 if (conf->flags & BGPD_FLAG_REFLECTOR) { 2263 if ((a = attr_optget(asp, ATTR_CLUSTER_LIST)) != NULL) { 2264 for (len = 0; len < a->len; 2265 len += sizeof(conf->clusterid)) 2266 /* check if coming from my cluster */ 2267 if (memcmp(&conf->clusterid, a->data + len, 2268 sizeof(conf->clusterid)) == 0) { 2269 asp->flags |= F_ATTR_LOOP; 2270 return; 2271 } 2272 2273 /* prepend own clusterid by replacing attribute */ 2274 len = a->len + sizeof(conf->clusterid); 2275 if (len < a->len) 2276 fatalx("rde_reflector: cluster-list overflow"); 2277 if ((p = malloc(len)) == NULL) 2278 fatal("rde_reflector"); 2279 memcpy(p, &conf->clusterid, sizeof(conf->clusterid)); 2280 memcpy(p + sizeof(conf->clusterid), a->data, a->len); 2281 attr_free(asp, a); 2282 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST, 2283 p, len) == -1) 2284 fatalx("attr_optadd failed but impossible"); 2285 free(p); 2286 } else if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST, 2287 &conf->clusterid, sizeof(conf->clusterid)) == -1) 2288 fatalx("attr_optadd failed but impossible"); 2289 } 2290 } 2291 2292 /* 2293 * control specific functions 2294 */ 2295 static void 2296 rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) 2297 { 2298 struct ctl_show_rib rib; 2299 struct ibuf *wbuf; 2300 struct attr *a; 2301 struct nexthop *nexthop; 2302 void *bp; 2303 time_t staletime; 2304 size_t aslen; 2305 u_int8_t l; 2306 2307 nexthop = prefix_nexthop(p); 2308 bzero(&rib, sizeof(rib)); 2309 rib.age = getmonotime() - p->lastchange; 2310 rib.local_pref = asp->lpref; 2311 rib.med = asp->med; 2312 rib.weight = asp->weight; 2313 strlcpy(rib.descr, prefix_peer(p)->conf.descr, sizeof(rib.descr)); 2314 memcpy(&rib.remote_addr, &prefix_peer(p)->remote_addr, 2315 sizeof(rib.remote_addr)); 2316 rib.remote_id = prefix_peer(p)->remote_bgpid; 2317 if (nexthop != NULL) { 2318 memcpy(&rib.true_nexthop, &nexthop->true_nexthop, 2319 sizeof(rib.true_nexthop)); 2320 memcpy(&rib.exit_nexthop, &nexthop->exit_nexthop, 2321 sizeof(rib.exit_nexthop)); 2322 } else { 2323 /* announced network may have a NULL nexthop */ 2324 bzero(&rib.true_nexthop, sizeof(rib.true_nexthop)); 2325 bzero(&rib.exit_nexthop, sizeof(rib.exit_nexthop)); 2326 rib.true_nexthop.aid = p->pt->aid; 2327 rib.exit_nexthop.aid = p->pt->aid; 2328 } 2329 pt_getaddr(p->pt, &rib.prefix); 2330 rib.prefixlen = p->pt->prefixlen; 2331 rib.origin = asp->origin; 2332 rib.validation_state = p->validation_state; 2333 rib.flags = 0; 2334 if (p->re != NULL && p->re->active == p) 2335 rib.flags |= F_PREF_ACTIVE; 2336 if (!prefix_peer(p)->conf.ebgp) 2337 rib.flags |= F_PREF_INTERNAL; 2338 if (asp->flags & F_PREFIX_ANNOUNCED) 2339 rib.flags |= F_PREF_ANNOUNCE; 2340 if (nexthop == NULL || nexthop->state == NEXTHOP_REACH) 2341 rib.flags |= F_PREF_ELIGIBLE; 2342 if (asp->flags & F_ATTR_LOOP) 2343 rib.flags &= ~F_PREF_ELIGIBLE; 2344 if (asp->flags & F_ATTR_PARSE_ERR) 2345 rib.flags |= F_PREF_INVALID; 2346 staletime = prefix_peer(p)->staletime[p->pt->aid]; 2347 if (staletime && p->lastchange <= staletime) 2348 rib.flags |= F_PREF_STALE; 2349 aslen = aspath_length(asp->aspath); 2350 2351 if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid, 2352 sizeof(rib) + aslen)) == NULL) 2353 return; 2354 if (imsg_add(wbuf, &rib, sizeof(rib)) == -1 || 2355 imsg_add(wbuf, aspath_dump(asp->aspath), aslen) == -1) 2356 return; 2357 imsg_close(ibuf_se_ctl, wbuf); 2358 2359 if (flags & F_CTL_DETAIL) { 2360 struct rde_community *comm = prefix_communities(p); 2361 size_t len = comm->nentries * sizeof(struct community); 2362 if (comm->nentries > 0) { 2363 if ((wbuf = imsg_create(ibuf_se_ctl, 2364 IMSG_CTL_SHOW_RIB_COMMUNITIES, 0, pid, 2365 len)) == NULL) 2366 return; 2367 if ((bp = ibuf_reserve(wbuf, len)) == NULL) { 2368 ibuf_free(wbuf); 2369 return; 2370 } 2371 memcpy(bp, comm->communities, len); 2372 imsg_close(ibuf_se_ctl, wbuf); 2373 } 2374 for (l = 0; l < asp->others_len; l++) { 2375 if ((a = asp->others[l]) == NULL) 2376 break; 2377 if ((wbuf = imsg_create(ibuf_se_ctl, 2378 IMSG_CTL_SHOW_RIB_ATTR, 0, pid, 2379 attr_optlen(a))) == NULL) 2380 return; 2381 if ((bp = ibuf_reserve(wbuf, attr_optlen(a))) == NULL) { 2382 ibuf_free(wbuf); 2383 return; 2384 } 2385 if (attr_write(bp, attr_optlen(a), a->flags, 2386 a->type, a->data, a->len) == -1) { 2387 ibuf_free(wbuf); 2388 return; 2389 } 2390 imsg_close(ibuf_se_ctl, wbuf); 2391 } 2392 } 2393 } 2394 2395 int 2396 rde_match_peer(struct rde_peer *p, struct ctl_neighbor *n) 2397 { 2398 char *s; 2399 2400 if (n && n->addr.aid) { 2401 if (memcmp(&p->conf.remote_addr, &n->addr, 2402 sizeof(p->conf.remote_addr))) 2403 return 0; 2404 } else if (n && n->descr[0]) { 2405 s = n->is_group ? p->conf.group : p->conf.descr; 2406 if (strcmp(s, n->descr)) 2407 return 0; 2408 } 2409 return 1; 2410 } 2411 2412 static void 2413 rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) 2414 { 2415 struct rde_aspath *asp; 2416 2417 if (!rde_match_peer(prefix_peer(p), &req->neighbor)) 2418 return; 2419 2420 asp = prefix_aspath(p); 2421 if (asp == NULL) /* skip pending withdraw in Adj-RIB-Out */ 2422 return; 2423 if ((req->flags & F_CTL_ACTIVE) && p->re->active != p) 2424 return; 2425 if ((req->flags & F_CTL_INVALID) && 2426 (asp->flags & F_ATTR_PARSE_ERR) == 0) 2427 return; 2428 if (req->as.type != AS_UNDEF && 2429 !aspath_match(asp->aspath, &req->as, 0)) 2430 return; 2431 if (req->community.flags != 0) { 2432 if (!community_match(prefix_communities(p), &req->community, 2433 NULL)) 2434 return; 2435 } 2436 if (!ovs_match(p, req->flags)) 2437 return; 2438 rde_dump_rib_as(p, asp, req->pid, req->flags); 2439 } 2440 2441 static void 2442 rde_dump_upcall(struct rib_entry *re, void *ptr) 2443 { 2444 struct rde_dump_ctx *ctx = ptr; 2445 struct prefix *p; 2446 2447 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) 2448 rde_dump_filter(p, &ctx->req); 2449 } 2450 2451 static void 2452 rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) 2453 { 2454 struct rde_dump_ctx *ctx = ptr; 2455 struct prefix *p; 2456 struct pt_entry *pt; 2457 struct bgpd_addr addr; 2458 2459 pt = re->prefix; 2460 pt_getaddr(pt, &addr); 2461 if (addr.aid != ctx->req.prefix.aid) 2462 return; 2463 if (ctx->req.flags & F_LONGER) { 2464 if (ctx->req.prefixlen > pt->prefixlen) 2465 return; 2466 if (!prefix_compare(&ctx->req.prefix, &addr, 2467 ctx->req.prefixlen)) 2468 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) 2469 rde_dump_filter(p, &ctx->req); 2470 } else { 2471 if (ctx->req.prefixlen < pt->prefixlen) 2472 return; 2473 if (!prefix_compare(&addr, &ctx->req.prefix, 2474 pt->prefixlen)) 2475 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) 2476 rde_dump_filter(p, &ctx->req); 2477 } 2478 } 2479 2480 static void 2481 rde_dump_adjout_upcall(struct prefix *p, void *ptr) 2482 { 2483 struct rde_dump_ctx *ctx = ptr; 2484 2485 if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) 2486 return; 2487 rde_dump_filter(p, &ctx->req); 2488 } 2489 2490 static void 2491 rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr) 2492 { 2493 struct rde_dump_ctx *ctx = ptr; 2494 struct bgpd_addr addr; 2495 2496 if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) 2497 return; 2498 2499 pt_getaddr(p->pt, &addr); 2500 if (addr.aid != ctx->req.prefix.aid) 2501 return; 2502 if (ctx->req.flags & F_LONGER) { 2503 if (ctx->req.prefixlen > p->pt->prefixlen) 2504 return; 2505 if (!prefix_compare(&ctx->req.prefix, &addr, 2506 ctx->req.prefixlen)) 2507 rde_dump_filter(p, &ctx->req); 2508 } else { 2509 if (ctx->req.prefixlen < p->pt->prefixlen) 2510 return; 2511 if (!prefix_compare(&addr, &ctx->req.prefix, 2512 p->pt->prefixlen)) 2513 rde_dump_filter(p, &ctx->req); 2514 } 2515 } 2516 2517 static int 2518 rde_dump_throttled(void *arg) 2519 { 2520 struct rde_dump_ctx *ctx = arg; 2521 2522 return (ctx->throttled != 0); 2523 } 2524 2525 static void 2526 rde_dump_done(void *arg, u_int8_t aid) 2527 { 2528 struct rde_dump_ctx *ctx = arg; 2529 struct rde_peer *peer; 2530 u_int error; 2531 2532 if (ctx->req.flags & F_CTL_ADJ_OUT) { 2533 peer = peer_match(&ctx->req.neighbor, ctx->peerid); 2534 if (peer == NULL) 2535 goto done; 2536 ctx->peerid = peer->conf.id; 2537 switch (ctx->req.type) { 2538 case IMSG_CTL_SHOW_RIB: 2539 if (prefix_dump_new(peer, ctx->req.aid, 2540 CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall, 2541 rde_dump_done, rde_dump_throttled) == -1) 2542 goto nomem; 2543 break; 2544 case IMSG_CTL_SHOW_RIB_PREFIX: 2545 if (prefix_dump_new(peer, ctx->req.aid, 2546 CTL_MSG_HIGH_MARK, ctx, 2547 rde_dump_adjout_prefix_upcall, 2548 rde_dump_done, rde_dump_throttled) == -1) 2549 goto nomem; 2550 break; 2551 default: 2552 fatalx("%s: unsupported imsg type", __func__); 2553 } 2554 return; 2555 } 2556 done: 2557 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, -1, NULL, 0); 2558 LIST_REMOVE(ctx, entry); 2559 free(ctx); 2560 return; 2561 2562 nomem: 2563 log_warn(__func__); 2564 error = CTL_RES_NOMEM; 2565 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, ctx->req.pid, -1, &error, 2566 sizeof(error)); 2567 return; 2568 } 2569 2570 void 2571 rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, 2572 enum imsg_type type) 2573 { 2574 struct rde_dump_ctx *ctx; 2575 struct rib_entry *re; 2576 struct prefix *p; 2577 u_int error; 2578 u_int8_t hostplen; 2579 u_int16_t rid; 2580 2581 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { 2582 nomem: 2583 log_warn(__func__); 2584 error = CTL_RES_NOMEM; 2585 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, 2586 sizeof(error)); 2587 return; 2588 } 2589 2590 memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request)); 2591 ctx->req.pid = pid; 2592 ctx->req.type = type; 2593 2594 if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) { 2595 rid = RIB_ADJ_IN; 2596 } else if (req->flags & F_CTL_ADJ_OUT) { 2597 struct rde_peer *peer; 2598 2599 peer = peer_match(&req->neighbor, 0); 2600 if (peer == NULL) { 2601 error = CTL_RES_NOSUCHPEER; 2602 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, 2603 &error, sizeof(error)); 2604 free(ctx); 2605 return; 2606 } 2607 ctx->peerid = peer->conf.id; 2608 switch (ctx->req.type) { 2609 case IMSG_CTL_SHOW_RIB: 2610 if (prefix_dump_new(peer, ctx->req.aid, 2611 CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall, 2612 rde_dump_done, rde_dump_throttled) == -1) 2613 goto nomem; 2614 break; 2615 case IMSG_CTL_SHOW_RIB_PREFIX: 2616 if (req->flags & (F_LONGER|F_SHORTER)) { 2617 if (prefix_dump_new(peer, ctx->req.aid, 2618 CTL_MSG_HIGH_MARK, ctx, 2619 rde_dump_adjout_prefix_upcall, 2620 rde_dump_done, rde_dump_throttled) == -1) 2621 goto nomem; 2622 break; 2623 } 2624 switch (req->prefix.aid) { 2625 case AID_INET: 2626 case AID_VPN_IPv4: 2627 hostplen = 32; 2628 break; 2629 case AID_INET6: 2630 case AID_VPN_IPv6: 2631 hostplen = 128; 2632 break; 2633 default: 2634 fatalx("%s: unknown af", __func__); 2635 } 2636 2637 do { 2638 if (req->prefixlen == hostplen) 2639 p = prefix_match(peer, &req->prefix); 2640 else 2641 p = prefix_lookup(peer, &req->prefix, 2642 req->prefixlen); 2643 if (p) 2644 rde_dump_adjout_upcall(p, ctx); 2645 } while ((peer = peer_match(&req->neighbor, 2646 peer->conf.id))); 2647 2648 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, 2649 -1, NULL, 0); 2650 free(ctx); 2651 return; 2652 default: 2653 fatalx("%s: unsupported imsg type", __func__); 2654 } 2655 2656 LIST_INSERT_HEAD(&rde_dump_h, ctx, entry); 2657 return; 2658 } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) { 2659 log_warnx("%s: no such rib %s", __func__, req->rib); 2660 error = CTL_RES_NOSUCHRIB; 2661 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, 2662 sizeof(error)); 2663 free(ctx); 2664 return; 2665 } 2666 2667 switch (ctx->req.type) { 2668 case IMSG_CTL_SHOW_NETWORK: 2669 if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx, 2670 network_dump_upcall, rde_dump_done, 2671 rde_dump_throttled) == -1) 2672 goto nomem; 2673 break; 2674 case IMSG_CTL_SHOW_RIB: 2675 if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx, 2676 rde_dump_upcall, rde_dump_done, rde_dump_throttled) == -1) 2677 goto nomem; 2678 break; 2679 case IMSG_CTL_SHOW_RIB_PREFIX: 2680 if (req->flags & (F_LONGER|F_SHORTER)) { 2681 if (rib_dump_new(rid, ctx->req.aid, 2682 CTL_MSG_HIGH_MARK, ctx, rde_dump_prefix_upcall, 2683 rde_dump_done, rde_dump_throttled) == -1) 2684 goto nomem; 2685 break; 2686 } 2687 switch (req->prefix.aid) { 2688 case AID_INET: 2689 case AID_VPN_IPv4: 2690 hostplen = 32; 2691 break; 2692 case AID_INET6: 2693 case AID_VPN_IPv6: 2694 hostplen = 128; 2695 break; 2696 default: 2697 fatalx("%s: unknown af", __func__); 2698 } 2699 if (req->prefixlen == hostplen) 2700 re = rib_match(rib_byid(rid), &req->prefix); 2701 else 2702 re = rib_get(rib_byid(rid), &req->prefix, 2703 req->prefixlen); 2704 if (re) 2705 rde_dump_upcall(re, ctx); 2706 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, 2707 -1, NULL, 0); 2708 free(ctx); 2709 return; 2710 default: 2711 fatalx("%s: unsupported imsg type", __func__); 2712 } 2713 LIST_INSERT_HEAD(&rde_dump_h, ctx, entry); 2714 } 2715 2716 void 2717 rde_dump_ctx_throttle(pid_t pid, int throttle) 2718 { 2719 struct rde_dump_ctx *ctx; 2720 2721 LIST_FOREACH(ctx, &rde_dump_h, entry) { 2722 if (ctx->req.pid == pid) { 2723 ctx->throttled = throttle; 2724 return; 2725 } 2726 } 2727 } 2728 2729 void 2730 rde_dump_ctx_terminate(pid_t pid) 2731 { 2732 struct rde_dump_ctx *ctx; 2733 2734 LIST_FOREACH(ctx, &rde_dump_h, entry) { 2735 if (ctx->req.pid == pid) { 2736 rib_dump_terminate(ctx); 2737 return; 2738 } 2739 } 2740 } 2741 2742 static int 2743 rde_mrt_throttled(void *arg) 2744 { 2745 struct mrt *mrt = arg; 2746 2747 return (mrt->wbuf.queued > SESS_MSG_LOW_MARK); 2748 } 2749 2750 static void 2751 rde_mrt_done(void *ptr, u_int8_t aid) 2752 { 2753 mrt_done(ptr); 2754 } 2755 2756 void 2757 rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd) 2758 { 2759 struct rde_mrt_ctx *ctx; 2760 u_int16_t rid; 2761 2762 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { 2763 log_warn("rde_dump_mrt_new"); 2764 return; 2765 } 2766 memcpy(&ctx->mrt, mrt, sizeof(struct mrt)); 2767 TAILQ_INIT(&ctx->mrt.wbuf.bufs); 2768 ctx->mrt.wbuf.fd = fd; 2769 ctx->mrt.state = MRT_STATE_RUNNING; 2770 rid = rib_find(ctx->mrt.rib); 2771 if (rid == RIB_NOTFOUND) { 2772 log_warnx("non existing RIB %s for mrt dump", ctx->mrt.rib); 2773 free(ctx); 2774 return; 2775 } 2776 2777 if (ctx->mrt.type == MRT_TABLE_DUMP_V2) 2778 mrt_dump_v2_hdr(&ctx->mrt, conf, &peerlist); 2779 2780 if (rib_dump_new(rid, AID_UNSPEC, CTL_MSG_HIGH_MARK, &ctx->mrt, 2781 mrt_dump_upcall, rde_mrt_done, rde_mrt_throttled) == -1) 2782 fatal("%s: rib_dump_new", __func__); 2783 2784 LIST_INSERT_HEAD(&rde_mrts, ctx, entry); 2785 rde_mrt_cnt++; 2786 } 2787 2788 /* 2789 * kroute specific functions 2790 */ 2791 int 2792 rde_l3vpn_import(struct rde_community *comm, struct l3vpn *rd) 2793 { 2794 struct filter_set *s; 2795 2796 TAILQ_FOREACH(s, &rd->import, entry) { 2797 if (community_match(comm, &s->action.community, 0)) 2798 return (1); 2799 } 2800 return (0); 2801 } 2802 2803 void 2804 rde_send_kroute_flush(struct rib *rib) 2805 { 2806 if (imsg_compose(ibuf_main, IMSG_KROUTE_FLUSH, rib->rtableid, 0, -1, 2807 NULL, 0) == -1) 2808 fatal("%s %d imsg_compose error", __func__, __LINE__); 2809 } 2810 2811 void 2812 rde_send_kroute(struct rib *rib, struct prefix *new, struct prefix *old) 2813 { 2814 struct kroute_full kr; 2815 struct bgpd_addr addr; 2816 struct prefix *p; 2817 struct rde_aspath *asp; 2818 struct l3vpn *vpn; 2819 enum imsg_type type; 2820 2821 /* 2822 * Make sure that self announce prefixes are not committed to the 2823 * FIB. If both prefixes are unreachable no update is needed. 2824 */ 2825 if ((old == NULL || prefix_aspath(old)->flags & F_PREFIX_ANNOUNCED) && 2826 (new == NULL || prefix_aspath(new)->flags & F_PREFIX_ANNOUNCED)) 2827 return; 2828 2829 if (new == NULL || prefix_aspath(new)->flags & F_PREFIX_ANNOUNCED) { 2830 type = IMSG_KROUTE_DELETE; 2831 p = old; 2832 } else { 2833 type = IMSG_KROUTE_CHANGE; 2834 p = new; 2835 } 2836 2837 asp = prefix_aspath(p); 2838 pt_getaddr(p->pt, &addr); 2839 bzero(&kr, sizeof(kr)); 2840 memcpy(&kr.prefix, &addr, sizeof(kr.prefix)); 2841 kr.prefixlen = p->pt->prefixlen; 2842 if (prefix_nhflags(p) == NEXTHOP_REJECT) 2843 kr.flags |= F_REJECT; 2844 if (prefix_nhflags(p) == NEXTHOP_BLACKHOLE) 2845 kr.flags |= F_BLACKHOLE; 2846 if (type == IMSG_KROUTE_CHANGE) 2847 memcpy(&kr.nexthop, &prefix_nexthop(p)->true_nexthop, 2848 sizeof(kr.nexthop)); 2849 strlcpy(kr.label, rtlabel_id2name(asp->rtlabelid), sizeof(kr.label)); 2850 2851 switch (addr.aid) { 2852 case AID_VPN_IPv4: 2853 case AID_VPN_IPv6: 2854 if (!(rib->flags & F_RIB_LOCAL)) 2855 /* not Loc-RIB, no update for VPNs */ 2856 break; 2857 2858 SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) { 2859 if (!rde_l3vpn_import(prefix_communities(p), vpn)) 2860 continue; 2861 /* must send exit_nexthop so that correct MPLS tunnel 2862 * is chosen 2863 */ 2864 if (type == IMSG_KROUTE_CHANGE) 2865 memcpy(&kr.nexthop, 2866 &prefix_nexthop(p)->exit_nexthop, 2867 sizeof(kr.nexthop)); 2868 /* XXX not ideal but this will change */ 2869 kr.ifindex = if_nametoindex(vpn->ifmpe); 2870 if (imsg_compose(ibuf_main, type, vpn->rtableid, 0, -1, 2871 &kr, sizeof(kr)) == -1) 2872 fatal("%s %d imsg_compose error", __func__, 2873 __LINE__); 2874 } 2875 break; 2876 default: 2877 if (imsg_compose(ibuf_main, type, rib->rtableid, 0, -1, 2878 &kr, sizeof(kr)) == -1) 2879 fatal("%s %d imsg_compose error", __func__, __LINE__); 2880 break; 2881 } 2882 } 2883 2884 /* 2885 * update specific functions 2886 */ 2887 static int rde_eval_all; 2888 2889 int 2890 rde_evaluate_all(void) 2891 { 2892 return rde_eval_all; 2893 } 2894 2895 void 2896 rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old, 2897 int eval_all) 2898 { 2899 struct rde_peer *peer; 2900 u_int8_t aid; 2901 2902 /* 2903 * If old is != NULL we know it was active and should be removed. 2904 * If new is != NULL we know it is reachable and then we should 2905 * generate an update. 2906 */ 2907 if (old == NULL && new == NULL) 2908 return; 2909 2910 if (!eval_all && (rib->flags & F_RIB_NOFIB) == 0) 2911 rde_send_kroute(rib, new, old); 2912 2913 if (new) 2914 aid = new->pt->aid; 2915 else 2916 aid = old->pt->aid; 2917 2918 rde_eval_all = 0; 2919 LIST_FOREACH(peer, &peerlist, peer_l) { 2920 /* skip ourself */ 2921 if (peer == peerself) 2922 continue; 2923 if (peer->state != PEER_UP) 2924 continue; 2925 /* handle evaluate all, keep track if it is needed */ 2926 if (peer->flags & PEERFLAG_EVALUATE_ALL) 2927 rde_eval_all = 1; 2928 else if (eval_all) 2929 /* skip default peers if the best path didn't change */ 2930 continue; 2931 /* skip peers using a different rib */ 2932 if (peer->loc_rib_id != rib->id) 2933 continue; 2934 /* check if peer actually supports the address family */ 2935 if (peer->capa.mp[aid] == 0) 2936 continue; 2937 /* skip peers with special export types */ 2938 if (peer->export_type == EXPORT_NONE || 2939 peer->export_type == EXPORT_DEFAULT_ROUTE) 2940 continue; 2941 2942 up_generate_updates(out_rules, peer, new, old); 2943 } 2944 } 2945 2946 static void 2947 rde_up_flush_upcall(struct prefix *p, void *ptr) 2948 { 2949 up_generate_updates(out_rules, prefix_peer(p), NULL, p); 2950 } 2951 2952 u_char queue_buf[4096]; 2953 2954 int 2955 rde_update_queue_pending(void) 2956 { 2957 struct rde_peer *peer; 2958 u_int8_t aid; 2959 2960 if (ibuf_se && ibuf_se->w.queued >= SESS_MSG_HIGH_MARK) 2961 return 0; 2962 2963 LIST_FOREACH(peer, &peerlist, peer_l) { 2964 if (peer->conf.id == 0) 2965 continue; 2966 if (peer->state != PEER_UP) 2967 continue; 2968 if (peer->throttled) 2969 continue; 2970 for (aid = 0; aid < AID_MAX; aid++) { 2971 if (!RB_EMPTY(&peer->updates[aid]) || 2972 !RB_EMPTY(&peer->withdraws[aid])) 2973 return 1; 2974 } 2975 } 2976 return 0; 2977 } 2978 2979 void 2980 rde_update_queue_runner(void) 2981 { 2982 struct rde_peer *peer; 2983 int r, sent, max = RDE_RUNNER_ROUNDS, eor; 2984 u_int16_t len, wpos; 2985 2986 len = sizeof(queue_buf) - MSGSIZE_HEADER; 2987 do { 2988 sent = 0; 2989 LIST_FOREACH(peer, &peerlist, peer_l) { 2990 if (peer->conf.id == 0) 2991 continue; 2992 if (peer->state != PEER_UP) 2993 continue; 2994 if (peer->throttled) 2995 continue; 2996 eor = 0; 2997 wpos = 0; 2998 /* first withdraws, save 2 bytes for path attributes */ 2999 if ((r = up_dump_withdraws(queue_buf, len - 2, peer, 3000 AID_INET)) == -1) 3001 continue; 3002 wpos += r; 3003 3004 /* now bgp path attributes unless it is the EoR mark */ 3005 if (up_is_eor(peer, AID_INET)) { 3006 eor = 1; 3007 bzero(queue_buf + wpos, 2); 3008 wpos += 2; 3009 } else { 3010 r = up_dump_attrnlri(queue_buf + wpos, 3011 len - wpos, peer); 3012 wpos += r; 3013 } 3014 3015 /* finally send message to SE */ 3016 if (wpos > 4) { 3017 if (imsg_compose(ibuf_se, IMSG_UPDATE, 3018 peer->conf.id, 0, -1, queue_buf, 3019 wpos) == -1) 3020 fatal("%s %d imsg_compose error", 3021 __func__, __LINE__); 3022 sent++; 3023 } 3024 if (eor) 3025 rde_peer_send_eor(peer, AID_INET); 3026 } 3027 max -= sent; 3028 } while (sent != 0 && max > 0); 3029 } 3030 3031 void 3032 rde_update6_queue_runner(u_int8_t aid) 3033 { 3034 struct rde_peer *peer; 3035 int r, sent, max = RDE_RUNNER_ROUNDS / 2; 3036 u_int16_t len; 3037 3038 /* first withdraws ... */ 3039 do { 3040 sent = 0; 3041 LIST_FOREACH(peer, &peerlist, peer_l) { 3042 if (peer->conf.id == 0) 3043 continue; 3044 if (peer->state != PEER_UP) 3045 continue; 3046 if (peer->throttled) 3047 continue; 3048 len = sizeof(queue_buf) - MSGSIZE_HEADER; 3049 r = up_dump_mp_unreach(queue_buf, len, peer, aid); 3050 if (r == -1) 3051 continue; 3052 /* finally send message to SE */ 3053 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3054 0, -1, queue_buf, r) == -1) 3055 fatal("%s %d imsg_compose error", __func__, 3056 __LINE__); 3057 sent++; 3058 } 3059 max -= sent; 3060 } while (sent != 0 && max > 0); 3061 3062 /* ... then updates */ 3063 max = RDE_RUNNER_ROUNDS / 2; 3064 do { 3065 sent = 0; 3066 LIST_FOREACH(peer, &peerlist, peer_l) { 3067 if (peer->conf.id == 0) 3068 continue; 3069 if (peer->state != PEER_UP) 3070 continue; 3071 if (peer->throttled) 3072 continue; 3073 len = sizeof(queue_buf) - MSGSIZE_HEADER; 3074 if (up_is_eor(peer, aid)) { 3075 rde_peer_send_eor(peer, aid); 3076 continue; 3077 } 3078 r = up_dump_mp_reach(queue_buf, len, peer, aid); 3079 if (r == 0) 3080 continue; 3081 3082 /* finally send message to SE */ 3083 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3084 0, -1, queue_buf, r) == -1) 3085 fatal("%s %d imsg_compose error", __func__, 3086 __LINE__); 3087 sent++; 3088 } 3089 max -= sent; 3090 } while (sent != 0 && max > 0); 3091 } 3092 3093 /* 3094 * pf table specific functions 3095 */ 3096 struct rde_pftable_node { 3097 RB_ENTRY(rde_pftable_node) entry; 3098 struct pt_entry *prefix; 3099 int refcnt; 3100 u_int16_t id; 3101 }; 3102 RB_HEAD(rde_pftable_tree, rde_pftable_node); 3103 3104 static inline int 3105 rde_pftable_cmp(struct rde_pftable_node *a, struct rde_pftable_node *b) 3106 { 3107 if (a->prefix > b->prefix) 3108 return 1; 3109 if (a->prefix < b->prefix) 3110 return -1; 3111 return (a->id - b->id); 3112 } 3113 3114 RB_GENERATE_STATIC(rde_pftable_tree, rde_pftable_node, entry, rde_pftable_cmp); 3115 3116 struct rde_pftable_tree pftable_tree = RB_INITIALIZER(&pftable_tree); 3117 int need_commit; 3118 3119 static void 3120 rde_pftable_send(u_int16_t id, struct pt_entry *pt, int del) 3121 { 3122 struct pftable_msg pfm; 3123 3124 if (id == 0) 3125 return; 3126 3127 /* do not run while cleaning up */ 3128 if (rde_quit) 3129 return; 3130 3131 bzero(&pfm, sizeof(pfm)); 3132 strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable)); 3133 pt_getaddr(pt, &pfm.addr); 3134 pfm.len = pt->prefixlen; 3135 3136 if (imsg_compose(ibuf_main, 3137 del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD, 3138 0, 0, -1, &pfm, sizeof(pfm)) == -1) 3139 fatal("%s %d imsg_compose error", __func__, __LINE__); 3140 3141 need_commit = 1; 3142 } 3143 3144 void 3145 rde_pftable_add(u_int16_t id, struct prefix *p) 3146 { 3147 struct rde_pftable_node *pfn, node; 3148 3149 memset(&node, 0, sizeof(node)); 3150 node.prefix = p->pt; 3151 node.id = id; 3152 3153 pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node); 3154 if (pfn == NULL) { 3155 if ((pfn = calloc(1, sizeof(*pfn))) == NULL) 3156 fatal("%s", __func__); 3157 pfn->prefix = pt_ref(p->pt); 3158 pfn->id = id; 3159 3160 if (RB_INSERT(rde_pftable_tree, &pftable_tree, pfn) != NULL) 3161 fatalx("%s: tree corrupt", __func__); 3162 3163 rde_pftable_send(id, p->pt, 0); 3164 } 3165 pfn->refcnt++; 3166 } 3167 3168 void 3169 rde_pftable_del(u_int16_t id, struct prefix *p) 3170 { 3171 struct rde_pftable_node *pfn, node; 3172 3173 memset(&node, 0, sizeof(node)); 3174 node.prefix = p->pt; 3175 node.id = id; 3176 3177 pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node); 3178 if (pfn == NULL) 3179 return; 3180 3181 if (--pfn->refcnt <= 0) { 3182 rde_pftable_send(id, p->pt, 1); 3183 3184 if (RB_REMOVE(rde_pftable_tree, &pftable_tree, pfn) == NULL) 3185 fatalx("%s: tree corrupt", __func__); 3186 3187 pt_unref(pfn->prefix); 3188 free(pfn); 3189 } 3190 } 3191 3192 void 3193 rde_commit_pftable(void) 3194 { 3195 /* do not run while cleaning up */ 3196 if (rde_quit) 3197 return; 3198 3199 if (!need_commit) 3200 return; 3201 3202 if (imsg_compose(ibuf_main, IMSG_PFTABLE_COMMIT, 0, 0, -1, NULL, 0) == 3203 -1) 3204 fatal("%s %d imsg_compose error", __func__, __LINE__); 3205 3206 need_commit = 0; 3207 } 3208 3209 /* 3210 * nexthop specific functions 3211 */ 3212 void 3213 rde_send_nexthop(struct bgpd_addr *next, int insert) 3214 { 3215 int type; 3216 3217 if (insert) 3218 type = IMSG_NEXTHOP_ADD; 3219 else 3220 type = IMSG_NEXTHOP_REMOVE; 3221 3222 if (imsg_compose(ibuf_main, type, 0, 0, -1, next, 3223 sizeof(struct bgpd_addr)) == -1) 3224 fatal("%s %d imsg_compose error", __func__, __LINE__); 3225 } 3226 3227 /* 3228 * soft reconfig specific functions 3229 */ 3230 void 3231 rde_reload_done(void) 3232 { 3233 struct rde_peer *peer; 3234 struct filter_head *fh; 3235 struct rde_prefixset_head prefixsets_old; 3236 struct rde_prefixset_head originsets_old; 3237 struct as_set_head as_sets_old; 3238 u_int16_t rid; 3239 int reload = 0; 3240 3241 softreconfig = 0; 3242 3243 SIMPLEQ_INIT(&prefixsets_old); 3244 SIMPLEQ_INIT(&originsets_old); 3245 SIMPLEQ_INIT(&as_sets_old); 3246 SIMPLEQ_CONCAT(&prefixsets_old, &conf->rde_prefixsets); 3247 SIMPLEQ_CONCAT(&originsets_old, &conf->rde_originsets); 3248 SIMPLEQ_CONCAT(&as_sets_old, &conf->as_sets); 3249 3250 /* merge the main config */ 3251 copy_config(conf, nconf); 3252 3253 /* need to copy the sets and roa table and clear them in nconf */ 3254 SIMPLEQ_CONCAT(&conf->rde_prefixsets, &nconf->rde_prefixsets); 3255 SIMPLEQ_CONCAT(&conf->rde_originsets, &nconf->rde_originsets); 3256 SIMPLEQ_CONCAT(&conf->as_sets, &nconf->as_sets); 3257 3258 /* apply new set of l3vpn, sync will be done later */ 3259 free_l3vpns(&conf->l3vpns); 3260 SIMPLEQ_CONCAT(&conf->l3vpns, &nconf->l3vpns); 3261 /* XXX WHERE IS THE SYNC ??? */ 3262 3263 free_config(nconf); 3264 nconf = NULL; 3265 3266 /* sync peerself with conf */ 3267 peerself->remote_bgpid = ntohl(conf->bgpid); 3268 peerself->conf.local_as = conf->as; 3269 peerself->conf.remote_as = conf->as; 3270 peerself->conf.remote_addr.aid = AID_INET; 3271 peerself->conf.remote_addr.v4.s_addr = conf->bgpid; 3272 peerself->conf.remote_masklen = 32; 3273 peerself->short_as = conf->short_as; 3274 3275 rde_mark_prefixsets_dirty(&prefixsets_old, &conf->rde_prefixsets); 3276 rde_mark_prefixsets_dirty(&originsets_old, &conf->rde_originsets); 3277 as_sets_mark_dirty(&as_sets_old, &conf->as_sets); 3278 3279 /* 3280 * make the new filter rules the active one but keep the old for 3281 * softrconfig. This is needed so that changes happening are using 3282 * the right filters. 3283 */ 3284 fh = out_rules; 3285 out_rules = out_rules_tmp; 3286 out_rules_tmp = fh; 3287 3288 rde_filter_calc_skip_steps(out_rules); 3289 3290 /* check if filter changed */ 3291 LIST_FOREACH(peer, &peerlist, peer_l) { 3292 if (peer->conf.id == 0) 3293 continue; 3294 peer->reconf_out = 0; 3295 peer->reconf_rib = 0; 3296 if (peer->export_type != peer->conf.export_type) { 3297 log_peer_info(&peer->conf, "export type change, " 3298 "reloading"); 3299 peer->reconf_rib = 1; 3300 } 3301 if ((peer->flags & PEERFLAG_EVALUATE_ALL) != 3302 (peer->conf.flags & PEERFLAG_EVALUATE_ALL)) { 3303 log_peer_info(&peer->conf, "rde evaluate change, " 3304 "reloading"); 3305 peer->reconf_rib = 1; 3306 } 3307 if ((peer->flags & PEERFLAG_TRANS_AS) != 3308 (peer->conf.flags & PEERFLAG_TRANS_AS)) { 3309 log_peer_info(&peer->conf, "transparent-as change, " 3310 "reloading"); 3311 peer->reconf_rib = 1; 3312 } 3313 if (peer->loc_rib_id != rib_find(peer->conf.rib)) { 3314 log_peer_info(&peer->conf, "rib change, reloading"); 3315 peer->loc_rib_id = rib_find(peer->conf.rib); 3316 if (peer->loc_rib_id == RIB_NOTFOUND) 3317 fatalx("King Bula's peer met an unknown RIB"); 3318 peer->reconf_rib = 1; 3319 } 3320 peer->export_type = peer->conf.export_type; 3321 peer->flags = peer->conf.flags; 3322 3323 if (peer->reconf_rib) { 3324 if (prefix_dump_new(peer, AID_UNSPEC, 3325 RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall, 3326 rde_softreconfig_in_done, NULL) == -1) 3327 fatal("%s: prefix_dump_new", __func__); 3328 log_peer_info(&peer->conf, "flushing Adj-RIB-Out"); 3329 softreconfig++; /* account for the running flush */ 3330 continue; 3331 } 3332 if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) { 3333 char *p = log_fmt_peer(&peer->conf); 3334 log_debug("out filter change: reloading peer %s", p); 3335 free(p); 3336 peer->reconf_out = 1; 3337 } 3338 } 3339 /* bring ribs in sync */ 3340 for (rid = 0; rid < rib_size; rid++) { 3341 struct rib *rib = rib_byid(rid); 3342 if (rib == NULL) 3343 continue; 3344 rde_filter_calc_skip_steps(rib->in_rules_tmp); 3345 3346 /* flip rules, make new active */ 3347 fh = rib->in_rules; 3348 rib->in_rules = rib->in_rules_tmp; 3349 rib->in_rules_tmp = fh; 3350 3351 switch (rib->state) { 3352 case RECONF_DELETE: 3353 rib_free(rib); 3354 break; 3355 case RECONF_RELOAD: 3356 rib_update(rib); 3357 rib->state = RECONF_KEEP; 3358 /* FALLTHROUGH */ 3359 case RECONF_KEEP: 3360 if (rde_filter_equal(rib->in_rules, 3361 rib->in_rules_tmp, NULL)) 3362 /* rib is in sync */ 3363 break; 3364 log_debug("in filter change: reloading RIB %s", 3365 rib->name); 3366 rib->state = RECONF_RELOAD; 3367 reload++; 3368 break; 3369 case RECONF_REINIT: 3370 /* new rib */ 3371 rib->state = RECONF_RELOAD; 3372 reload++; 3373 break; 3374 case RECONF_NONE: 3375 break; 3376 } 3377 filterlist_free(rib->in_rules_tmp); 3378 rib->in_rules_tmp = NULL; 3379 } 3380 3381 filterlist_free(out_rules_tmp); 3382 out_rules_tmp = NULL; 3383 /* old filters removed, free all sets */ 3384 free_rde_prefixsets(&prefixsets_old); 3385 free_rde_prefixsets(&originsets_old); 3386 as_sets_free(&as_sets_old); 3387 3388 log_info("RDE reconfigured"); 3389 3390 softreconfig++; 3391 if (reload > 0) { 3392 if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, RDE_RUNNER_ROUNDS, 3393 NULL, rde_softreconfig_in, rde_softreconfig_in_done, 3394 NULL) == -1) 3395 fatal("%s: rib_dump_new", __func__); 3396 log_info("running softreconfig in"); 3397 } else { 3398 rde_softreconfig_in_done((void *)1, AID_UNSPEC); 3399 } 3400 } 3401 3402 static void 3403 rde_softreconfig_in_done(void *arg, u_int8_t dummy) 3404 { 3405 struct rde_peer *peer; 3406 u_int16_t i; 3407 3408 softreconfig--; 3409 /* one guy done but other dumps are still running */ 3410 if (softreconfig > 0) 3411 return; 3412 3413 if (arg == NULL) 3414 log_info("softreconfig in done"); 3415 3416 /* now do the Adj-RIB-Out sync and a possible FIB sync */ 3417 softreconfig = 0; 3418 for (i = 0; i < rib_size; i++) { 3419 struct rib *rib = rib_byid(i); 3420 if (rib == NULL) 3421 continue; 3422 rib->state = RECONF_NONE; 3423 if (rib->fibstate == RECONF_RELOAD) { 3424 if (rib_dump_new(i, AID_UNSPEC, RDE_RUNNER_ROUNDS, 3425 rib, rde_softreconfig_sync_fib, 3426 rde_softreconfig_sync_done, NULL) == -1) 3427 fatal("%s: rib_dump_new", __func__); 3428 softreconfig++; 3429 log_info("starting fib sync for rib %s", 3430 rib->name); 3431 } else if (rib->fibstate == RECONF_REINIT) { 3432 if (rib_dump_new(i, AID_UNSPEC, RDE_RUNNER_ROUNDS, 3433 rib, rde_softreconfig_sync_reeval, 3434 rde_softreconfig_sync_done, NULL) == -1) 3435 fatal("%s: rib_dump_new", __func__); 3436 softreconfig++; 3437 log_info("starting re-evaluation of rib %s", 3438 rib->name); 3439 } 3440 } 3441 3442 LIST_FOREACH(peer, &peerlist, peer_l) { 3443 u_int8_t aid; 3444 3445 if (peer->reconf_out) { 3446 if (peer->export_type == EXPORT_NONE) { 3447 /* nothing to do here */ 3448 peer->reconf_out = 0; 3449 } else if (peer->export_type == EXPORT_DEFAULT_ROUTE) { 3450 /* just resend the default route */ 3451 for (aid = 0; aid < AID_MAX; aid++) { 3452 if (peer->capa.mp[aid]) 3453 up_generate_default(out_rules, 3454 peer, aid); 3455 } 3456 peer->reconf_out = 0; 3457 } else 3458 rib_byid(peer->loc_rib_id)->state = 3459 RECONF_RELOAD; 3460 } else if (peer->reconf_rib) { 3461 /* dump the full table to neighbors that changed rib */ 3462 for (aid = 0; aid < AID_MAX; aid++) { 3463 if (peer->capa.mp[aid]) 3464 peer_dump(peer, aid); 3465 } 3466 } 3467 } 3468 3469 for (i = 0; i < rib_size; i++) { 3470 struct rib *rib = rib_byid(i); 3471 if (rib == NULL) 3472 continue; 3473 if (rib->state == RECONF_RELOAD) { 3474 if (rib_dump_new(i, AID_UNSPEC, RDE_RUNNER_ROUNDS, 3475 rib, rde_softreconfig_out, 3476 rde_softreconfig_out_done, NULL) == -1) 3477 fatal("%s: rib_dump_new", __func__); 3478 softreconfig++; 3479 log_info("starting softreconfig out for rib %s", 3480 rib->name); 3481 } 3482 } 3483 3484 /* if nothing to do move to last stage */ 3485 if (softreconfig == 0) 3486 rde_softreconfig_done(); 3487 } 3488 3489 static void 3490 rde_softreconfig_out_done(void *arg, u_int8_t aid) 3491 { 3492 struct rib *rib = arg; 3493 3494 /* this RIB dump is done */ 3495 log_info("softreconfig out done for %s", rib->name); 3496 3497 /* check if other dumps are still running */ 3498 if (--softreconfig == 0) 3499 rde_softreconfig_done(); 3500 } 3501 3502 static void 3503 rde_softreconfig_done(void) 3504 { 3505 u_int16_t i; 3506 3507 for (i = 0; i < rib_size; i++) { 3508 struct rib *rib = rib_byid(i); 3509 if (rib == NULL) 3510 continue; 3511 rib->state = RECONF_NONE; 3512 } 3513 3514 log_info("RDE soft reconfiguration done"); 3515 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 3516 -1, NULL, 0); 3517 } 3518 3519 static void 3520 rde_softreconfig_in(struct rib_entry *re, void *bula) 3521 { 3522 struct filterstate state; 3523 struct rib *rib; 3524 struct prefix *p; 3525 struct pt_entry *pt; 3526 struct rde_peer *peer; 3527 struct rde_aspath *asp; 3528 enum filter_actions action; 3529 struct bgpd_addr prefix; 3530 u_int16_t i; 3531 3532 pt = re->prefix; 3533 pt_getaddr(pt, &prefix); 3534 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) { 3535 asp = prefix_aspath(p); 3536 peer = prefix_peer(p); 3537 3538 /* skip announced networks, they are never filtered */ 3539 if (asp->flags & F_PREFIX_ANNOUNCED) 3540 continue; 3541 3542 for (i = RIB_LOC_START; i < rib_size; i++) { 3543 rib = rib_byid(i); 3544 if (rib == NULL) 3545 continue; 3546 3547 if (rib->state != RECONF_RELOAD) 3548 continue; 3549 3550 rde_filterstate_prep(&state, asp, prefix_communities(p), 3551 prefix_nexthop(p), prefix_nhflags(p)); 3552 action = rde_filter(rib->in_rules, peer, peer, &prefix, 3553 pt->prefixlen, p->validation_state, &state); 3554 3555 if (action == ACTION_ALLOW) { 3556 /* update Local-RIB */ 3557 prefix_update(rib, peer, &state, &prefix, 3558 pt->prefixlen, p->validation_state); 3559 } else if (action == ACTION_DENY) { 3560 /* remove from Local-RIB */ 3561 prefix_withdraw(rib, peer, &prefix, 3562 pt->prefixlen); 3563 } 3564 3565 rde_filterstate_clean(&state); 3566 } 3567 } 3568 } 3569 3570 static void 3571 rde_softreconfig_out(struct rib_entry *re, void *bula) 3572 { 3573 struct prefix *p = re->active; 3574 struct rde_peer *peer; 3575 3576 if (p == NULL) 3577 /* no valid path for prefix */ 3578 return; 3579 3580 LIST_FOREACH(peer, &peerlist, peer_l) { 3581 if (peer->loc_rib_id == re->rib_id && peer->reconf_out) 3582 /* Regenerate all updates. */ 3583 up_generate_updates(out_rules, peer, p, p); 3584 } 3585 } 3586 3587 static void 3588 rde_softreconfig_sync_reeval(struct rib_entry *re, void *arg) 3589 { 3590 struct prefix_list prefixes; 3591 struct prefix *p, *next; 3592 struct rib *rib = arg; 3593 3594 if (rib->flags & F_RIB_NOEVALUATE) { 3595 /* 3596 * evaluation process is turned off 3597 * so remove all prefixes from adj-rib-out 3598 * also unlink nexthop if it was linked 3599 */ 3600 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) { 3601 if (p->flags & PREFIX_NEXTHOP_LINKED) 3602 nexthop_unlink(p); 3603 } 3604 if (re->active) { 3605 rde_generate_updates(rib, NULL, re->active, 0); 3606 re->active = NULL; 3607 } 3608 return; 3609 } 3610 3611 /* evaluation process is turned on, so evaluate all prefixes again */ 3612 re->active = NULL; 3613 prefixes = re->prefix_h; 3614 LIST_INIT(&re->prefix_h); 3615 3616 LIST_FOREACH_SAFE(p, &prefixes, entry.list.rib, next) { 3617 /* need to re-link the nexthop if not already linked */ 3618 if ((p->flags & PREFIX_NEXTHOP_LINKED) == 0) 3619 nexthop_link(p); 3620 prefix_evaluate(re, p, p); 3621 } 3622 } 3623 3624 static void 3625 rde_softreconfig_sync_fib(struct rib_entry *re, void *bula) 3626 { 3627 if (re->active) 3628 rde_send_kroute(re_rib(re), re->active, NULL); 3629 } 3630 3631 static void 3632 rde_softreconfig_sync_done(void *arg, u_int8_t aid) 3633 { 3634 struct rib *rib = arg; 3635 3636 /* this RIB dump is done */ 3637 if (rib->fibstate == RECONF_RELOAD) 3638 log_info("fib sync done for %s", rib->name); 3639 else 3640 log_info("re-evaluation done for %s", rib->name); 3641 rib->fibstate = RECONF_NONE; 3642 3643 /* check if other dumps are still running */ 3644 if (--softreconfig == 0) 3645 rde_softreconfig_done(); 3646 } 3647 3648 /* 3649 * ROA specific functions. The roa set is updated independent of the config 3650 * so this runs outside of the softreconfig handlers. 3651 */ 3652 static void 3653 rde_roa_softreload(struct rib_entry *re, void *bula) 3654 { 3655 struct filterstate state; 3656 struct rib *rib; 3657 struct prefix *p; 3658 struct pt_entry *pt; 3659 struct rde_peer *peer; 3660 struct rde_aspath *asp; 3661 enum filter_actions action; 3662 struct bgpd_addr prefix; 3663 u_int8_t vstate; 3664 u_int16_t i; 3665 3666 pt = re->prefix; 3667 pt_getaddr(pt, &prefix); 3668 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) { 3669 asp = prefix_aspath(p); 3670 peer = prefix_peer(p); 3671 3672 /* ROA validation state update */ 3673 vstate = rde_roa_validity(&rde_roa, 3674 &prefix, pt->prefixlen, aspath_origin(asp->aspath)); 3675 if (vstate == p->validation_state) 3676 continue; 3677 p->validation_state = vstate; 3678 3679 /* skip announced networks, they are never filtered */ 3680 if (asp->flags & F_PREFIX_ANNOUNCED) 3681 continue; 3682 3683 for (i = RIB_LOC_START; i < rib_size; i++) { 3684 rib = rib_byid(i); 3685 if (rib == NULL) 3686 continue; 3687 3688 rde_filterstate_prep(&state, asp, prefix_communities(p), 3689 prefix_nexthop(p), prefix_nhflags(p)); 3690 action = rde_filter(rib->in_rules, peer, peer, &prefix, 3691 pt->prefixlen, p->validation_state, &state); 3692 3693 if (action == ACTION_ALLOW) { 3694 /* update Local-RIB */ 3695 prefix_update(rib, peer, &state, &prefix, 3696 pt->prefixlen, p->validation_state); 3697 } else if (action == ACTION_DENY) { 3698 /* remove from Local-RIB */ 3699 prefix_withdraw(rib, peer, &prefix, 3700 pt->prefixlen); 3701 } 3702 3703 rde_filterstate_clean(&state); 3704 } 3705 } 3706 } 3707 3708 static void 3709 rde_roa_softreload_done(void *arg, u_int8_t aid) 3710 { 3711 /* the roa update is done */ 3712 log_info("ROA softreload done"); 3713 } 3714 3715 static void 3716 rde_roa_reload(void) 3717 { 3718 struct rde_prefixset roa_old; 3719 3720 roa_old = rde_roa; 3721 rde_roa = roa_new; 3722 memset(&roa_new, 0, sizeof(roa_new)); 3723 3724 /* check if roa changed */ 3725 if (trie_equal(&rde_roa.th, &roa_old.th)) { 3726 rde_roa.lastchange = roa_old.lastchange; 3727 trie_free(&roa_old.th); /* old roa no longer needed */ 3728 return; 3729 } 3730 3731 rde_roa.lastchange = getmonotime(); 3732 trie_free(&roa_old.th); /* old roa no longer needed */ 3733 3734 log_debug("ROA change: reloading Adj-RIB-In"); 3735 if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, RDE_RUNNER_ROUNDS, 3736 rib_byid(RIB_ADJ_IN), rde_roa_softreload, 3737 rde_roa_softreload_done, NULL) == -1) 3738 fatal("%s: rib_dump_new", __func__); 3739 } 3740 3741 /* 3742 * generic helper function 3743 */ 3744 u_int32_t 3745 rde_local_as(void) 3746 { 3747 return (conf->as); 3748 } 3749 3750 int 3751 rde_decisionflags(void) 3752 { 3753 return (conf->flags & BGPD_FLAG_DECISION_MASK); 3754 } 3755 3756 int 3757 rde_as4byte(struct rde_peer *peer) 3758 { 3759 return (peer->capa.as4byte); 3760 } 3761 3762 static int 3763 rde_no_as_set(struct rde_peer *peer) 3764 { 3765 return (peer->flags & PEERFLAG_NO_AS_SET); 3766 } 3767 3768 /* End-of-RIB marker, RFC 4724 */ 3769 static void 3770 rde_peer_recv_eor(struct rde_peer *peer, u_int8_t aid) 3771 { 3772 peer->prefix_rcvd_eor++; 3773 3774 /* 3775 * First notify SE to avert a possible race with the restart timeout. 3776 * If the timeout fires before this imsg is processed by the SE it will 3777 * result in the same operation since the timeout issues a FLUSH which 3778 * does the same as the RESTARTED action (flushing stale routes). 3779 * The logic in the SE is so that only one of FLUSH or RESTARTED will 3780 * be sent back to the RDE and so peer_flush is only called once. 3781 */ 3782 if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id, 3783 0, -1, &aid, sizeof(aid)) == -1) 3784 fatal("imsg_compose error while receiving EoR"); 3785 3786 log_peer_info(&peer->conf, "received %s EOR marker", 3787 aid2str(aid)); 3788 } 3789 3790 static void 3791 rde_peer_send_eor(struct rde_peer *peer, u_int8_t aid) 3792 { 3793 u_int16_t afi; 3794 u_int8_t safi; 3795 3796 peer->prefix_sent_eor++; 3797 3798 if (aid == AID_INET) { 3799 u_char null[4]; 3800 3801 bzero(&null, 4); 3802 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3803 0, -1, &null, 4) == -1) 3804 fatal("imsg_compose error while sending EoR"); 3805 } else { 3806 u_int16_t i; 3807 u_char buf[10]; 3808 3809 if (aid2afi(aid, &afi, &safi) == -1) 3810 fatalx("peer_send_eor: bad AID"); 3811 3812 i = 0; /* v4 withdrawn len */ 3813 bcopy(&i, &buf[0], sizeof(i)); 3814 i = htons(6); /* path attr len */ 3815 bcopy(&i, &buf[2], sizeof(i)); 3816 buf[4] = ATTR_OPTIONAL; 3817 buf[5] = ATTR_MP_UNREACH_NLRI; 3818 buf[6] = 3; /* withdrawn len */ 3819 i = htons(afi); 3820 bcopy(&i, &buf[7], sizeof(i)); 3821 buf[9] = safi; 3822 3823 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3824 0, -1, &buf, 10) == -1) 3825 fatal("%s %d imsg_compose error in peer_send_eor", 3826 __func__, __LINE__); 3827 } 3828 3829 log_peer_info(&peer->conf, "sending %s EOR marker", 3830 aid2str(aid)); 3831 } 3832 3833 /* 3834 * network announcement stuff 3835 */ 3836 void 3837 network_add(struct network_config *nc, struct filterstate *state) 3838 { 3839 struct l3vpn *vpn; 3840 struct filter_set_head *vpnset = NULL; 3841 struct in_addr prefix4; 3842 struct in6_addr prefix6; 3843 u_int8_t vstate; 3844 u_int16_t i; 3845 3846 if (nc->rd != 0) { 3847 SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) { 3848 if (vpn->rd != nc->rd) 3849 continue; 3850 switch (nc->prefix.aid) { 3851 case AID_INET: 3852 prefix4 = nc->prefix.v4; 3853 memset(&nc->prefix, 0, sizeof(nc->prefix)); 3854 nc->prefix.aid = AID_VPN_IPv4; 3855 nc->prefix.rd = vpn->rd; 3856 nc->prefix.v4 = prefix4; 3857 nc->prefix.labellen = 3; 3858 nc->prefix.labelstack[0] = 3859 (vpn->label >> 12) & 0xff; 3860 nc->prefix.labelstack[1] = 3861 (vpn->label >> 4) & 0xff; 3862 nc->prefix.labelstack[2] = 3863 (vpn->label << 4) & 0xf0; 3864 nc->prefix.labelstack[2] |= BGP_MPLS_BOS; 3865 vpnset = &vpn->export; 3866 break; 3867 case AID_INET6: 3868 prefix6 = nc->prefix.v6; 3869 memset(&nc->prefix, 0, sizeof(nc->prefix)); 3870 nc->prefix.aid = AID_VPN_IPv6; 3871 nc->prefix.rd = vpn->rd; 3872 nc->prefix.v6 = prefix6; 3873 nc->prefix.labellen = 3; 3874 nc->prefix.labelstack[0] = 3875 (vpn->label >> 12) & 0xff; 3876 nc->prefix.labelstack[1] = 3877 (vpn->label >> 4) & 0xff; 3878 nc->prefix.labelstack[2] = 3879 (vpn->label << 4) & 0xf0; 3880 nc->prefix.labelstack[2] |= BGP_MPLS_BOS; 3881 vpnset = &vpn->export; 3882 break; 3883 default: 3884 log_warnx("unable to VPNize prefix"); 3885 filterset_free(&nc->attrset); 3886 return; 3887 } 3888 break; 3889 } 3890 if (vpn == NULL) { 3891 log_warnx("network_add: " 3892 "prefix %s/%u in non-existing l3vpn %s", 3893 log_addr(&nc->prefix), nc->prefixlen, 3894 log_rd(nc->rd)); 3895 return; 3896 } 3897 } 3898 3899 rde_apply_set(&nc->attrset, peerself, peerself, state, nc->prefix.aid); 3900 if (vpnset) 3901 rde_apply_set(vpnset, peerself, peerself, state, 3902 nc->prefix.aid); 3903 3904 vstate = rde_roa_validity(&rde_roa, &nc->prefix, 3905 nc->prefixlen, aspath_origin(state->aspath.aspath)); 3906 if (prefix_update(rib_byid(RIB_ADJ_IN), peerself, state, &nc->prefix, 3907 nc->prefixlen, vstate) == 1) 3908 peerself->prefix_cnt++; 3909 for (i = RIB_LOC_START; i < rib_size; i++) { 3910 struct rib *rib = rib_byid(i); 3911 if (rib == NULL) 3912 continue; 3913 rde_update_log("announce", i, peerself, 3914 state->nexthop ? &state->nexthop->exit_nexthop : NULL, 3915 &nc->prefix, nc->prefixlen); 3916 prefix_update(rib, peerself, state, &nc->prefix, 3917 nc->prefixlen, vstate); 3918 } 3919 filterset_free(&nc->attrset); 3920 } 3921 3922 void 3923 network_delete(struct network_config *nc) 3924 { 3925 struct l3vpn *vpn; 3926 struct in_addr prefix4; 3927 struct in6_addr prefix6; 3928 u_int32_t i; 3929 3930 if (nc->rd) { 3931 SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) { 3932 if (vpn->rd != nc->rd) 3933 continue; 3934 switch (nc->prefix.aid) { 3935 case AID_INET: 3936 prefix4 = nc->prefix.v4; 3937 memset(&nc->prefix, 0, sizeof(nc->prefix)); 3938 nc->prefix.aid = AID_VPN_IPv4; 3939 nc->prefix.rd = vpn->rd; 3940 nc->prefix.v4 = prefix4; 3941 nc->prefix.labellen = 3; 3942 nc->prefix.labelstack[0] = 3943 (vpn->label >> 12) & 0xff; 3944 nc->prefix.labelstack[1] = 3945 (vpn->label >> 4) & 0xff; 3946 nc->prefix.labelstack[2] = 3947 (vpn->label << 4) & 0xf0; 3948 nc->prefix.labelstack[2] |= BGP_MPLS_BOS; 3949 break; 3950 case AID_INET6: 3951 prefix6 = nc->prefix.v6; 3952 memset(&nc->prefix, 0, sizeof(nc->prefix)); 3953 nc->prefix.aid = AID_VPN_IPv6; 3954 nc->prefix.rd = vpn->rd; 3955 nc->prefix.v6 = prefix6; 3956 nc->prefix.labellen = 3; 3957 nc->prefix.labelstack[0] = 3958 (vpn->label >> 12) & 0xff; 3959 nc->prefix.labelstack[1] = 3960 (vpn->label >> 4) & 0xff; 3961 nc->prefix.labelstack[2] = 3962 (vpn->label << 4) & 0xf0; 3963 nc->prefix.labelstack[2] |= BGP_MPLS_BOS; 3964 break; 3965 default: 3966 log_warnx("unable to VPNize prefix"); 3967 return; 3968 } 3969 } 3970 } 3971 3972 for (i = RIB_LOC_START; i < rib_size; i++) { 3973 struct rib *rib = rib_byid(i); 3974 if (rib == NULL) 3975 continue; 3976 if (prefix_withdraw(rib, peerself, &nc->prefix, 3977 nc->prefixlen)) 3978 rde_update_log("withdraw announce", i, peerself, 3979 NULL, &nc->prefix, nc->prefixlen); 3980 } 3981 if (prefix_withdraw(rib_byid(RIB_ADJ_IN), peerself, &nc->prefix, 3982 nc->prefixlen)) 3983 peerself->prefix_cnt--; 3984 } 3985 3986 static void 3987 network_dump_upcall(struct rib_entry *re, void *ptr) 3988 { 3989 struct prefix *p; 3990 struct rde_aspath *asp; 3991 struct kroute_full k; 3992 struct bgpd_addr addr; 3993 struct rde_dump_ctx *ctx = ptr; 3994 3995 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) { 3996 asp = prefix_aspath(p); 3997 if (!(asp->flags & F_PREFIX_ANNOUNCED)) 3998 continue; 3999 pt_getaddr(p->pt, &addr); 4000 4001 bzero(&k, sizeof(k)); 4002 memcpy(&k.prefix, &addr, sizeof(k.prefix)); 4003 if (prefix_nexthop(p) == NULL || 4004 prefix_nexthop(p)->state != NEXTHOP_REACH) 4005 k.nexthop.aid = k.prefix.aid; 4006 else 4007 memcpy(&k.nexthop, &prefix_nexthop(p)->true_nexthop, 4008 sizeof(k.nexthop)); 4009 k.prefixlen = p->pt->prefixlen; 4010 k.flags = F_KERNEL; 4011 if ((asp->flags & F_ANN_DYNAMIC) == 0) 4012 k.flags = F_STATIC; 4013 if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0, 4014 ctx->req.pid, -1, &k, sizeof(k)) == -1) 4015 log_warnx("network_dump_upcall: " 4016 "imsg_compose error"); 4017 } 4018 } 4019 4020 static void 4021 network_flush_upcall(struct rib_entry *re, void *ptr) 4022 { 4023 struct rde_peer *peer = ptr; 4024 struct bgpd_addr addr; 4025 struct prefix *p; 4026 u_int32_t i; 4027 u_int8_t prefixlen; 4028 4029 p = prefix_bypeer(re, peer); 4030 if (p == NULL) 4031 return; 4032 if ((prefix_aspath(p)->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC) 4033 return; 4034 4035 pt_getaddr(re->prefix, &addr); 4036 prefixlen = re->prefix->prefixlen; 4037 4038 for (i = RIB_LOC_START; i < rib_size; i++) { 4039 struct rib *rib = rib_byid(i); 4040 if (rib == NULL) 4041 continue; 4042 if (prefix_withdraw(rib, peer, &addr, prefixlen) == 1) 4043 rde_update_log("flush announce", i, peer, 4044 NULL, &addr, prefixlen); 4045 } 4046 4047 if (prefix_withdraw(rib_byid(RIB_ADJ_IN), peer, &addr, 4048 prefixlen) == 1) 4049 peer->prefix_cnt--; 4050 } 4051 4052 /* clean up */ 4053 void 4054 rde_shutdown(void) 4055 { 4056 /* 4057 * the decision process is turned off if rde_quit = 1 and 4058 * rde_shutdown depends on this. 4059 */ 4060 4061 /* First all peers go down */ 4062 peer_foreach(peer_down, NULL); 4063 4064 /* free filters */ 4065 filterlist_free(out_rules); 4066 filterlist_free(out_rules_tmp); 4067 4068 /* kill the VPN configs */ 4069 free_l3vpns(&conf->l3vpns); 4070 4071 /* now check everything */ 4072 rib_shutdown(); 4073 nexthop_shutdown(); 4074 path_shutdown(); 4075 aspath_shutdown(); 4076 attr_shutdown(); 4077 pt_shutdown(); 4078 peer_shutdown(); 4079 } 4080 4081 struct rde_prefixset * 4082 rde_find_prefixset(char *name, struct rde_prefixset_head *p) 4083 { 4084 struct rde_prefixset *ps; 4085 4086 SIMPLEQ_FOREACH(ps, p, entry) { 4087 if (!strcmp(ps->name, name)) 4088 return (ps); 4089 } 4090 return (NULL); 4091 } 4092 4093 void 4094 rde_mark_prefixsets_dirty(struct rde_prefixset_head *psold, 4095 struct rde_prefixset_head *psnew) 4096 { 4097 struct rde_prefixset *new, *old; 4098 4099 SIMPLEQ_FOREACH(new, psnew, entry) { 4100 if ((psold == NULL) || 4101 (old = rde_find_prefixset(new->name, psold)) == NULL) { 4102 new->dirty = 1; 4103 new->lastchange = getmonotime(); 4104 } else { 4105 if (trie_equal(&new->th, &old->th) == 0) { 4106 new->dirty = 1; 4107 new->lastchange = getmonotime(); 4108 } else 4109 new->lastchange = old->lastchange; 4110 } 4111 } 4112 } 4113 4114 u_int8_t 4115 rde_roa_validity(struct rde_prefixset *ps, struct bgpd_addr *prefix, 4116 u_int8_t plen, u_int32_t as) 4117 { 4118 int r; 4119 4120 r = trie_roa_check(&ps->th, prefix, plen, as); 4121 return (r & ROA_MASK); 4122 } 4123 4124 int 4125 ovs_match(struct prefix *p, u_int32_t flag) 4126 { 4127 if (flag & (F_CTL_OVS_VALID|F_CTL_OVS_INVALID|F_CTL_OVS_NOTFOUND)) { 4128 switch (prefix_vstate(p)) { 4129 case ROA_VALID: 4130 if (!(flag & F_CTL_OVS_VALID)) 4131 return 0; 4132 break; 4133 case ROA_INVALID: 4134 if (!(flag & F_CTL_OVS_INVALID)) 4135 return 0; 4136 break; 4137 case ROA_NOTFOUND: 4138 if (!(flag & F_CTL_OVS_NOTFOUND)) 4139 return 0; 4140 break; 4141 default: 4142 break; 4143 } 4144 } 4145 4146 return 1; 4147 } 4148