1 /* $OpenBSD: rde.c,v 1.350 2016/09/03 16:22:17 renato Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/socket.h> 21 #include <sys/time.h> 22 #include <sys/resource.h> 23 24 #include <errno.h> 25 #include <ifaddrs.h> 26 #include <pwd.h> 27 #include <poll.h> 28 #include <signal.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 #include <err.h> 34 35 #include "bgpd.h" 36 #include "mrt.h" 37 #include "rde.h" 38 #include "session.h" 39 40 #define PFD_PIPE_MAIN 0 41 #define PFD_PIPE_SESSION 1 42 #define PFD_PIPE_SESSION_CTL 2 43 #define PFD_PIPE_COUNT 3 44 45 void rde_sighdlr(int); 46 void rde_dispatch_imsg_session(struct imsgbuf *); 47 void rde_dispatch_imsg_parent(struct imsgbuf *); 48 int rde_update_dispatch(struct imsg *); 49 void rde_update_update(struct rde_peer *, struct rde_aspath *, 50 struct bgpd_addr *, u_int8_t); 51 void rde_update_withdraw(struct rde_peer *, struct bgpd_addr *, 52 u_int8_t); 53 int rde_attr_parse(u_char *, u_int16_t, struct rde_peer *, 54 struct rde_aspath *, struct mpattr *); 55 int rde_attr_add(struct rde_aspath *, u_char *, u_int16_t); 56 u_int8_t rde_attr_missing(struct rde_aspath *, int, u_int16_t); 57 int rde_get_mp_nexthop(u_char *, u_int16_t, u_int8_t, 58 struct rde_aspath *); 59 int rde_update_extract_prefix(u_char *, u_int16_t, void *, 60 u_int8_t, u_int8_t); 61 int rde_update_get_prefix(u_char *, u_int16_t, struct bgpd_addr *, 62 u_int8_t *); 63 int rde_update_get_prefix6(u_char *, u_int16_t, struct bgpd_addr *, 64 u_int8_t *); 65 int rde_update_get_vpn4(u_char *, u_int16_t, struct bgpd_addr *, 66 u_int8_t *); 67 void rde_update_err(struct rde_peer *, u_int8_t , u_int8_t, 68 void *, u_int16_t); 69 void rde_update_log(const char *, u_int16_t, 70 const struct rde_peer *, const struct bgpd_addr *, 71 const struct bgpd_addr *, u_int8_t); 72 void rde_as4byte_fixup(struct rde_peer *, struct rde_aspath *); 73 void rde_reflector(struct rde_peer *, struct rde_aspath *); 74 75 void rde_dump_rib_as(struct prefix *, struct rde_aspath *,pid_t, 76 int); 77 void rde_dump_filter(struct prefix *, 78 struct ctl_show_rib_request *); 79 void rde_dump_filterout(struct rde_peer *, struct prefix *, 80 struct ctl_show_rib_request *); 81 void rde_dump_upcall(struct rib_entry *, void *); 82 void rde_dump_prefix_upcall(struct rib_entry *, void *); 83 void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t, 84 enum imsg_type); 85 void rde_dump_mrt_new(struct mrt *, pid_t, int); 86 void rde_dump_done(void *); 87 88 int rde_rdomain_import(struct rde_aspath *, struct rdomain *); 89 void rde_reload_done(void); 90 void rde_softreconfig_out(struct rib_entry *, void *); 91 void rde_softreconfig_in(struct rib_entry *, void *); 92 void rde_softreconfig_unload_peer(struct rib_entry *, void *); 93 void rde_up_dump_upcall(struct rib_entry *, void *); 94 void rde_update_queue_runner(void); 95 void rde_update6_queue_runner(u_int8_t); 96 97 void peer_init(u_int32_t); 98 void peer_shutdown(void); 99 int peer_localaddrs(struct rde_peer *, struct bgpd_addr *); 100 struct rde_peer *peer_add(u_int32_t, struct peer_config *); 101 struct rde_peer *peer_get(u_int32_t); 102 void peer_up(u_int32_t, struct session_up *); 103 void peer_down(u_int32_t); 104 void peer_flush(struct rde_peer *, u_int8_t); 105 void peer_stale(u_int32_t, u_int8_t); 106 void peer_recv_eor(struct rde_peer *, u_int8_t); 107 void peer_dump(u_int32_t, u_int8_t); 108 void peer_send_eor(struct rde_peer *, u_int8_t); 109 110 void network_add(struct network_config *, int); 111 void network_delete(struct network_config *, int); 112 void network_dump_upcall(struct rib_entry *, void *); 113 114 void rde_shutdown(void); 115 int sa_cmp(struct bgpd_addr *, struct sockaddr *); 116 117 volatile sig_atomic_t rde_quit = 0; 118 struct bgpd_config *conf, *nconf; 119 time_t reloadtime; 120 struct rde_peer_head peerlist; 121 struct rde_peer *peerself; 122 struct filter_head *out_rules, *out_rules_tmp; 123 struct rdomain_head *rdomains_l, *newdomains; 124 struct imsgbuf *ibuf_se; 125 struct imsgbuf *ibuf_se_ctl; 126 struct imsgbuf *ibuf_main; 127 struct rde_memstats rdemem; 128 129 struct rde_dump_ctx { 130 struct rib_context ribctx; 131 struct ctl_show_rib_request req; 132 sa_family_t af; 133 }; 134 135 struct rde_mrt_ctx { 136 struct mrt mrt; 137 struct rib_context ribctx; 138 LIST_ENTRY(rde_mrt_ctx) entry; 139 }; 140 141 LIST_HEAD(, rde_mrt_ctx) rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts); 142 u_int rde_mrt_cnt; 143 144 void 145 rde_sighdlr(int sig) 146 { 147 switch (sig) { 148 case SIGINT: 149 case SIGTERM: 150 rde_quit = 1; 151 break; 152 } 153 } 154 155 u_int32_t peerhashsize = 64; 156 u_int32_t pathhashsize = 1024; 157 u_int32_t attrhashsize = 512; 158 u_int32_t nexthophashsize = 64; 159 160 void 161 rde_main(int debug, int verbose) 162 { 163 struct passwd *pw; 164 struct pollfd *pfd = NULL; 165 struct rde_mrt_ctx *mctx, *xmctx; 166 void *newp; 167 u_int pfd_elms = 0, i, j; 168 int timeout; 169 u_int8_t aid; 170 171 bgpd_process = PROC_RDE; 172 log_procname = log_procnames[bgpd_process]; 173 174 log_init(debug); 175 log_verbose(verbose); 176 177 if ((pw = getpwnam(BGPD_USER)) == NULL) 178 fatal("getpwnam"); 179 180 if (chroot(pw->pw_dir) == -1) 181 fatal("chroot"); 182 if (chdir("/") == -1) 183 fatal("chdir(\"/\")"); 184 185 setproctitle("route decision engine"); 186 187 if (setgroups(1, &pw->pw_gid) || 188 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 189 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 190 fatal("can't drop privileges"); 191 192 if (pledge("stdio route recvfd", NULL) == -1) 193 fatal("pledge"); 194 195 signal(SIGTERM, rde_sighdlr); 196 signal(SIGINT, rde_sighdlr); 197 signal(SIGPIPE, SIG_IGN); 198 signal(SIGHUP, SIG_IGN); 199 signal(SIGALRM, SIG_IGN); 200 signal(SIGUSR1, SIG_IGN); 201 202 /* initialize the RIB structures */ 203 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 204 fatal(NULL); 205 imsg_init(ibuf_main, 3); 206 207 pt_init(); 208 path_init(pathhashsize); 209 aspath_init(pathhashsize); 210 attr_init(attrhashsize); 211 nexthop_init(nexthophashsize); 212 peer_init(peerhashsize); 213 214 out_rules = calloc(1, sizeof(struct filter_head)); 215 if (out_rules == NULL) 216 fatal(NULL); 217 TAILQ_INIT(out_rules); 218 rdomains_l = calloc(1, sizeof(struct rdomain_head)); 219 if (rdomains_l == NULL) 220 fatal(NULL); 221 SIMPLEQ_INIT(rdomains_l); 222 if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL) 223 fatal(NULL); 224 log_info("route decision engine ready"); 225 226 while (rde_quit == 0) { 227 if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) { 228 if ((newp = reallocarray(pfd, 229 PFD_PIPE_COUNT + rde_mrt_cnt, 230 sizeof(struct pollfd))) == NULL) { 231 /* panic for now */ 232 log_warn("could not resize pfd from %u -> %u" 233 " entries", pfd_elms, PFD_PIPE_COUNT + 234 rde_mrt_cnt); 235 fatalx("exiting"); 236 } 237 pfd = newp; 238 pfd_elms = PFD_PIPE_COUNT + rde_mrt_cnt; 239 } 240 timeout = INFTIM; 241 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 242 243 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 244 set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se); 245 set_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl); 246 247 if (rib_dump_pending() && 248 ibuf_se_ctl && ibuf_se_ctl->w.queued == 0) 249 timeout = 0; 250 251 i = PFD_PIPE_COUNT; 252 for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) { 253 xmctx = LIST_NEXT(mctx, entry); 254 if (mctx->mrt.wbuf.queued) { 255 pfd[i].fd = mctx->mrt.wbuf.fd; 256 pfd[i].events = POLLOUT; 257 i++; 258 } else if (mctx->mrt.state == MRT_STATE_REMOVE) { 259 close(mctx->mrt.wbuf.fd); 260 LIST_REMOVE(&mctx->ribctx, entry); 261 LIST_REMOVE(mctx, entry); 262 free(mctx); 263 rde_mrt_cnt--; 264 } 265 } 266 267 if (poll(pfd, i, timeout) == -1) { 268 if (errno != EINTR) 269 fatal("poll error"); 270 continue; 271 } 272 273 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) 274 fatalx("Lost connection to parent"); 275 else 276 rde_dispatch_imsg_parent(ibuf_main); 277 278 if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) { 279 log_warnx("RDE: Lost connection to SE"); 280 msgbuf_clear(&ibuf_se->w); 281 free(ibuf_se); 282 ibuf_se = NULL; 283 } else 284 rde_dispatch_imsg_session(ibuf_se); 285 286 if (handle_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl) == 287 -1) { 288 log_warnx("RDE: Lost connection to SE control"); 289 msgbuf_clear(&ibuf_se_ctl->w); 290 free(ibuf_se_ctl); 291 ibuf_se_ctl = NULL; 292 } else 293 rde_dispatch_imsg_session(ibuf_se_ctl); 294 295 for (j = PFD_PIPE_COUNT, mctx = LIST_FIRST(&rde_mrts); 296 j < i && mctx != 0; j++) { 297 if (pfd[j].fd == mctx->mrt.wbuf.fd && 298 pfd[j].revents & POLLOUT) 299 mrt_write(&mctx->mrt); 300 mctx = LIST_NEXT(mctx, entry); 301 } 302 303 rde_update_queue_runner(); 304 for (aid = AID_INET6; aid < AID_MAX; aid++) 305 rde_update6_queue_runner(aid); 306 if (rib_dump_pending() && 307 ibuf_se_ctl && ibuf_se_ctl->w.queued <= 10) 308 rib_dump_runner(); 309 } 310 311 /* close pipes */ 312 if (ibuf_se) { 313 msgbuf_clear(&ibuf_se->w); 314 close(ibuf_se->fd); 315 free(ibuf_se); 316 } 317 if (ibuf_se_ctl) { 318 msgbuf_clear(&ibuf_se_ctl->w); 319 close(ibuf_se_ctl->fd); 320 free(ibuf_se_ctl); 321 } 322 msgbuf_clear(&ibuf_main->w); 323 close(ibuf_main->fd); 324 free(ibuf_main); 325 326 /* do not clean up on shutdown on production, it takes ages. */ 327 if (debug) 328 rde_shutdown(); 329 330 while ((mctx = LIST_FIRST(&rde_mrts)) != NULL) { 331 msgbuf_clear(&mctx->mrt.wbuf); 332 close(mctx->mrt.wbuf.fd); 333 LIST_REMOVE(&mctx->ribctx, entry); 334 LIST_REMOVE(mctx, entry); 335 free(mctx); 336 } 337 338 339 log_info("route decision engine exiting"); 340 exit(0); 341 } 342 343 struct network_config netconf_s, netconf_p; 344 struct filter_set_head *session_set, *parent_set; 345 346 void 347 rde_dispatch_imsg_session(struct imsgbuf *ibuf) 348 { 349 struct imsg imsg; 350 struct peer p; 351 struct peer_config pconf; 352 struct session_up sup; 353 struct ctl_show_rib csr; 354 struct ctl_show_rib_request req; 355 struct rde_peer *peer; 356 struct rde_aspath *asp; 357 struct filter_set *s; 358 struct nexthop *nh; 359 u_int8_t *asdata; 360 ssize_t n; 361 int verbose; 362 u_int16_t len; 363 u_int8_t aid; 364 365 while (ibuf) { 366 if ((n = imsg_get(ibuf, &imsg)) == -1) 367 fatal("rde_dispatch_imsg_session: imsg_get error"); 368 if (n == 0) 369 break; 370 371 switch (imsg.hdr.type) { 372 case IMSG_UPDATE: 373 rde_update_dispatch(&imsg); 374 break; 375 case IMSG_SESSION_ADD: 376 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pconf)) 377 fatalx("incorrect size of session request"); 378 memcpy(&pconf, imsg.data, sizeof(pconf)); 379 peer_add(imsg.hdr.peerid, &pconf); 380 break; 381 case IMSG_SESSION_UP: 382 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup)) 383 fatalx("incorrect size of session request"); 384 memcpy(&sup, imsg.data, sizeof(sup)); 385 peer_up(imsg.hdr.peerid, &sup); 386 break; 387 case IMSG_SESSION_DOWN: 388 peer_down(imsg.hdr.peerid); 389 break; 390 case IMSG_SESSION_STALE: 391 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 392 log_warnx("rde_dispatch: wrong imsg len"); 393 break; 394 } 395 memcpy(&aid, imsg.data, sizeof(aid)); 396 if (aid >= AID_MAX) 397 fatalx("IMSG_SESSION_STALE: bad AID"); 398 peer_stale(imsg.hdr.peerid, aid); 399 break; 400 case IMSG_SESSION_FLUSH: 401 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 402 log_warnx("rde_dispatch: wrong imsg len"); 403 break; 404 } 405 memcpy(&aid, imsg.data, sizeof(aid)); 406 if (aid >= AID_MAX) 407 fatalx("IMSG_SESSION_FLUSH: bad AID"); 408 if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { 409 log_warnx("rde_dispatch: unknown peer id %d", 410 imsg.hdr.peerid); 411 break; 412 } 413 peer_flush(peer, aid); 414 break; 415 case IMSG_SESSION_RESTARTED: 416 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 417 log_warnx("rde_dispatch: wrong imsg len"); 418 break; 419 } 420 memcpy(&aid, imsg.data, sizeof(aid)); 421 if (aid >= AID_MAX) 422 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 423 if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { 424 log_warnx("rde_dispatch: unknown peer id %d", 425 imsg.hdr.peerid); 426 break; 427 } 428 if (peer->staletime[aid]) 429 peer_flush(peer, aid); 430 break; 431 case IMSG_REFRESH: 432 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 433 log_warnx("rde_dispatch: wrong imsg len"); 434 break; 435 } 436 memcpy(&aid, imsg.data, sizeof(aid)); 437 if (aid >= AID_MAX) 438 fatalx("IMSG_REFRESH: bad AID"); 439 peer_dump(imsg.hdr.peerid, aid); 440 break; 441 case IMSG_NETWORK_ADD: 442 if (imsg.hdr.len - IMSG_HEADER_SIZE != 443 sizeof(struct network_config)) { 444 log_warnx("rde_dispatch: wrong imsg len"); 445 break; 446 } 447 memcpy(&netconf_s, imsg.data, sizeof(netconf_s)); 448 TAILQ_INIT(&netconf_s.attrset); 449 session_set = &netconf_s.attrset; 450 break; 451 case IMSG_NETWORK_ASPATH: 452 if (imsg.hdr.len - IMSG_HEADER_SIZE < 453 sizeof(struct ctl_show_rib)) { 454 log_warnx("rde_dispatch: wrong imsg len"); 455 bzero(&netconf_s, sizeof(netconf_s)); 456 break; 457 } 458 asdata = imsg.data; 459 asdata += sizeof(struct ctl_show_rib); 460 memcpy(&csr, imsg.data, sizeof(csr)); 461 if (csr.aspath_len + sizeof(csr) > imsg.hdr.len - 462 IMSG_HEADER_SIZE) { 463 log_warnx("rde_dispatch: wrong aspath len"); 464 bzero(&netconf_s, sizeof(netconf_s)); 465 break; 466 } 467 asp = path_get(); 468 asp->lpref = csr.local_pref; 469 asp->med = csr.med; 470 asp->weight = csr.weight; 471 asp->flags = csr.flags; 472 asp->origin = csr.origin; 473 asp->flags |= F_PREFIX_ANNOUNCED | F_ANN_DYNAMIC; 474 asp->aspath = aspath_get(asdata, csr.aspath_len); 475 netconf_s.asp = asp; 476 break; 477 case IMSG_NETWORK_ATTR: 478 if (imsg.hdr.len <= IMSG_HEADER_SIZE) { 479 log_warnx("rde_dispatch: wrong imsg len"); 480 break; 481 } 482 /* parse path attributes */ 483 len = imsg.hdr.len - IMSG_HEADER_SIZE; 484 asp = netconf_s.asp; 485 if (rde_attr_add(asp, imsg.data, len) == -1) { 486 log_warnx("rde_dispatch: bad network " 487 "attribute"); 488 path_put(asp); 489 bzero(&netconf_s, sizeof(netconf_s)); 490 break; 491 } 492 break; 493 case IMSG_NETWORK_DONE: 494 if (imsg.hdr.len != IMSG_HEADER_SIZE) { 495 log_warnx("rde_dispatch: wrong imsg len"); 496 break; 497 } 498 session_set = NULL; 499 switch (netconf_s.prefix.aid) { 500 case AID_INET: 501 if (netconf_s.prefixlen > 32) 502 goto badnet; 503 network_add(&netconf_s, 0); 504 break; 505 case AID_INET6: 506 if (netconf_s.prefixlen > 128) 507 goto badnet; 508 network_add(&netconf_s, 0); 509 break; 510 case 0: 511 /* something failed beforehands */ 512 break; 513 default: 514 badnet: 515 log_warnx("rde_dispatch: bad network"); 516 break; 517 } 518 break; 519 case IMSG_NETWORK_REMOVE: 520 if (imsg.hdr.len - IMSG_HEADER_SIZE != 521 sizeof(struct network_config)) { 522 log_warnx("rde_dispatch: wrong imsg len"); 523 break; 524 } 525 memcpy(&netconf_s, imsg.data, sizeof(netconf_s)); 526 TAILQ_INIT(&netconf_s.attrset); 527 network_delete(&netconf_s, 0); 528 break; 529 case IMSG_NETWORK_FLUSH: 530 if (imsg.hdr.len != IMSG_HEADER_SIZE) { 531 log_warnx("rde_dispatch: wrong imsg len"); 532 break; 533 } 534 prefix_network_clean(peerself, time(NULL), 535 F_ANN_DYNAMIC); 536 break; 537 case IMSG_FILTER_SET: 538 if (imsg.hdr.len - IMSG_HEADER_SIZE != 539 sizeof(struct filter_set)) { 540 log_warnx("rde_dispatch: wrong imsg len"); 541 break; 542 } 543 if (session_set == NULL) { 544 log_warnx("rde_dispatch: " 545 "IMSG_FILTER_SET unexpected"); 546 break; 547 } 548 if ((s = malloc(sizeof(struct filter_set))) == NULL) 549 fatal(NULL); 550 memcpy(s, imsg.data, sizeof(struct filter_set)); 551 TAILQ_INSERT_TAIL(session_set, s, entry); 552 553 if (s->type == ACTION_SET_NEXTHOP) { 554 nh = nexthop_get(&s->action.nexthop); 555 nh->refcnt++; 556 } 557 break; 558 case IMSG_CTL_SHOW_NETWORK: 559 case IMSG_CTL_SHOW_RIB: 560 case IMSG_CTL_SHOW_RIB_AS: 561 case IMSG_CTL_SHOW_RIB_COMMUNITY: 562 case IMSG_CTL_SHOW_RIB_PREFIX: 563 if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) { 564 log_warnx("rde_dispatch: wrong imsg len"); 565 break; 566 } 567 memcpy(&req, imsg.data, sizeof(req)); 568 rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type); 569 break; 570 case IMSG_CTL_SHOW_NEIGHBOR: 571 if (imsg.hdr.len - IMSG_HEADER_SIZE != 572 sizeof(struct peer)) { 573 log_warnx("rde_dispatch: wrong imsg len"); 574 break; 575 } 576 memcpy(&p, imsg.data, sizeof(struct peer)); 577 peer = peer_get(p.conf.id); 578 if (peer != NULL) { 579 p.stats.prefix_cnt = peer->prefix_cnt; 580 p.stats.prefix_rcvd_update = 581 peer->prefix_rcvd_update; 582 p.stats.prefix_rcvd_withdraw = 583 peer->prefix_rcvd_withdraw; 584 p.stats.prefix_rcvd_eor = 585 peer->prefix_rcvd_eor; 586 p.stats.prefix_sent_update = 587 peer->prefix_sent_update; 588 p.stats.prefix_sent_withdraw = 589 peer->prefix_sent_withdraw; 590 p.stats.prefix_sent_eor = 591 peer->prefix_sent_eor; 592 } 593 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0, 594 imsg.hdr.pid, -1, &p, sizeof(struct peer)); 595 break; 596 case IMSG_CTL_END: 597 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid, 598 -1, NULL, 0); 599 break; 600 case IMSG_CTL_SHOW_RIB_MEM: 601 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0, 602 imsg.hdr.pid, -1, &rdemem, sizeof(rdemem)); 603 break; 604 case IMSG_CTL_LOG_VERBOSE: 605 /* already checked by SE */ 606 memcpy(&verbose, imsg.data, sizeof(verbose)); 607 log_verbose(verbose); 608 break; 609 default: 610 break; 611 } 612 imsg_free(&imsg); 613 } 614 } 615 616 void 617 rde_dispatch_imsg_parent(struct imsgbuf *ibuf) 618 { 619 static struct rdomain *rd; 620 struct imsg imsg; 621 struct mrt xmrt; 622 struct rde_rib rn; 623 struct imsgbuf *i; 624 struct filter_head *nr; 625 struct filter_rule *r; 626 struct filter_set *s; 627 struct nexthop *nh; 628 int n, fd; 629 u_int16_t rid; 630 631 while (ibuf) { 632 if ((n = imsg_get(ibuf, &imsg)) == -1) 633 fatal("rde_dispatch_imsg_parent: imsg_get error"); 634 if (n == 0) 635 break; 636 637 switch (imsg.hdr.type) { 638 case IMSG_SOCKET_CONN: 639 case IMSG_SOCKET_CONN_CTL: 640 if ((fd = imsg.fd) == -1) { 641 log_warnx("expected to receive imsg fd to " 642 "SE but didn't receive any"); 643 break; 644 } 645 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 646 fatal(NULL); 647 imsg_init(i, fd); 648 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 649 if (ibuf_se) { 650 log_warnx("Unexpected imsg connection " 651 "to SE received"); 652 msgbuf_clear(&ibuf_se->w); 653 free(ibuf_se); 654 } 655 ibuf_se = i; 656 } else { 657 if (ibuf_se_ctl) { 658 log_warnx("Unexpected imsg ctl " 659 "connection to SE received"); 660 msgbuf_clear(&ibuf_se_ctl->w); 661 free(ibuf_se_ctl); 662 } 663 ibuf_se_ctl = i; 664 } 665 break; 666 case IMSG_NETWORK_ADD: 667 if (imsg.hdr.len - IMSG_HEADER_SIZE != 668 sizeof(struct network_config)) { 669 log_warnx("rde_dispatch: wrong imsg len"); 670 break; 671 } 672 memcpy(&netconf_p, imsg.data, sizeof(netconf_p)); 673 TAILQ_INIT(&netconf_p.attrset); 674 parent_set = &netconf_p.attrset; 675 break; 676 case IMSG_NETWORK_DONE: 677 parent_set = NULL; 678 network_add(&netconf_p, 1); 679 break; 680 case IMSG_NETWORK_REMOVE: 681 if (imsg.hdr.len - IMSG_HEADER_SIZE != 682 sizeof(struct network_config)) { 683 log_warnx("rde_dispatch: wrong imsg len"); 684 break; 685 } 686 memcpy(&netconf_p, imsg.data, sizeof(netconf_p)); 687 TAILQ_INIT(&netconf_p.attrset); 688 network_delete(&netconf_p, 1); 689 break; 690 case IMSG_RECONF_CONF: 691 if (imsg.hdr.len - IMSG_HEADER_SIZE != 692 sizeof(struct bgpd_config)) 693 fatalx("IMSG_RECONF_CONF bad len"); 694 reloadtime = time(NULL); 695 out_rules_tmp = calloc(1, sizeof(struct filter_head)); 696 if (out_rules_tmp == NULL) 697 fatal(NULL); 698 TAILQ_INIT(out_rules_tmp); 699 newdomains = calloc(1, sizeof(struct rdomain_head)); 700 if (newdomains == NULL) 701 fatal(NULL); 702 SIMPLEQ_INIT(newdomains); 703 if ((nconf = malloc(sizeof(struct bgpd_config))) == 704 NULL) 705 fatal(NULL); 706 memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); 707 for (rid = 0; rid < rib_size; rid++) { 708 if (*ribs[rid].name == '\0') 709 break; 710 ribs[rid].state = RECONF_DELETE; 711 } 712 break; 713 case IMSG_RECONF_RIB: 714 if (imsg.hdr.len - IMSG_HEADER_SIZE != 715 sizeof(struct rde_rib)) 716 fatalx("IMSG_RECONF_RIB bad len"); 717 memcpy(&rn, imsg.data, sizeof(rn)); 718 rid = rib_find(rn.name); 719 if (rid == RIB_FAILED) 720 rib_new(rn.name, rn.rtableid, rn.flags); 721 else if (ribs[rid].rtableid != rn.rtableid || 722 (ribs[rid].flags & F_RIB_HASNOFIB) != 723 (rn.flags & F_RIB_HASNOFIB)) { 724 struct filter_head *in_rules; 725 /* 726 * Big hammer in the F_RIB_HASNOFIB case but 727 * not often enough used to optimise it more. 728 * Need to save the filters so that they're not 729 * lost. 730 */ 731 in_rules = ribs[rid].in_rules; 732 ribs[rid].in_rules = NULL; 733 rib_free(&ribs[rid]); 734 rib_new(rn.name, rn.rtableid, rn.flags); 735 ribs[rid].in_rules = in_rules; 736 } else 737 ribs[rid].state = RECONF_KEEP; 738 break; 739 case IMSG_RECONF_FILTER: 740 if (imsg.hdr.len - IMSG_HEADER_SIZE != 741 sizeof(struct filter_rule)) 742 fatalx("IMSG_RECONF_FILTER bad len"); 743 if ((r = malloc(sizeof(struct filter_rule))) == NULL) 744 fatal(NULL); 745 memcpy(r, imsg.data, sizeof(struct filter_rule)); 746 TAILQ_INIT(&r->set); 747 if ((r->peer.ribid = rib_find(r->rib)) == RIB_FAILED) { 748 log_warnx("IMSG_RECONF_FILTER: filter rule " 749 "for nonexistent rib %s", r->rib); 750 parent_set = NULL; 751 free(r); 752 break; 753 } 754 parent_set = &r->set; 755 if (r->dir == DIR_IN) { 756 nr = ribs[r->peer.ribid].in_rules_tmp; 757 if (nr == NULL) { 758 nr = calloc(1, 759 sizeof(struct filter_head)); 760 if (nr == NULL) 761 fatal(NULL); 762 TAILQ_INIT(nr); 763 ribs[r->peer.ribid].in_rules_tmp = nr; 764 } 765 TAILQ_INSERT_TAIL(nr, r, entry); 766 } else 767 TAILQ_INSERT_TAIL(out_rules_tmp, r, entry); 768 break; 769 case IMSG_RECONF_RDOMAIN: 770 if (imsg.hdr.len - IMSG_HEADER_SIZE != 771 sizeof(struct rdomain)) 772 fatalx("IMSG_RECONF_RDOMAIN bad len"); 773 if ((rd = malloc(sizeof(struct rdomain))) == NULL) 774 fatal(NULL); 775 memcpy(rd, imsg.data, sizeof(struct rdomain)); 776 TAILQ_INIT(&rd->import); 777 TAILQ_INIT(&rd->export); 778 SIMPLEQ_INSERT_TAIL(newdomains, rd, entry); 779 break; 780 case IMSG_RECONF_RDOMAIN_EXPORT: 781 if (rd == NULL) { 782 log_warnx("rde_dispatch_imsg_parent: " 783 "IMSG_RECONF_RDOMAIN_EXPORT unexpected"); 784 break; 785 } 786 parent_set = &rd->export; 787 break; 788 case IMSG_RECONF_RDOMAIN_IMPORT: 789 if (rd == NULL) { 790 log_warnx("rde_dispatch_imsg_parent: " 791 "IMSG_RECONF_RDOMAIN_IMPORT unexpected"); 792 break; 793 } 794 parent_set = &rd->import; 795 break; 796 case IMSG_RECONF_RDOMAIN_DONE: 797 parent_set = NULL; 798 break; 799 case IMSG_RECONF_DONE: 800 if (nconf == NULL) 801 fatalx("got IMSG_RECONF_DONE but no config"); 802 parent_set = NULL; 803 804 rde_reload_done(); 805 break; 806 case IMSG_NEXTHOP_UPDATE: 807 nexthop_update(imsg.data); 808 break; 809 case IMSG_FILTER_SET: 810 if (imsg.hdr.len > IMSG_HEADER_SIZE + 811 sizeof(struct filter_set)) 812 fatalx("IMSG_FILTER_SET bad len"); 813 if (parent_set == NULL) { 814 log_warnx("rde_dispatch_imsg_parent: " 815 "IMSG_FILTER_SET unexpected"); 816 break; 817 } 818 if ((s = malloc(sizeof(struct filter_set))) == NULL) 819 fatal(NULL); 820 memcpy(s, imsg.data, sizeof(struct filter_set)); 821 TAILQ_INSERT_TAIL(parent_set, s, entry); 822 823 if (s->type == ACTION_SET_NEXTHOP) { 824 nh = nexthop_get(&s->action.nexthop); 825 nh->refcnt++; 826 } 827 break; 828 case IMSG_MRT_OPEN: 829 case IMSG_MRT_REOPEN: 830 if (imsg.hdr.len > IMSG_HEADER_SIZE + 831 sizeof(struct mrt)) { 832 log_warnx("wrong imsg len"); 833 break; 834 } 835 memcpy(&xmrt, imsg.data, sizeof(xmrt)); 836 if ((fd = imsg.fd) == -1) 837 log_warnx("expected to receive fd for mrt dump " 838 "but didn't receive any"); 839 else if (xmrt.type == MRT_TABLE_DUMP || 840 xmrt.type == MRT_TABLE_DUMP_MP || 841 xmrt.type == MRT_TABLE_DUMP_V2) { 842 rde_dump_mrt_new(&xmrt, imsg.hdr.pid, fd); 843 } else 844 close(fd); 845 break; 846 case IMSG_MRT_CLOSE: 847 /* ignore end message because a dump is atomic */ 848 break; 849 default: 850 break; 851 } 852 imsg_free(&imsg); 853 } 854 } 855 856 /* handle routing updates from the session engine. */ 857 int 858 rde_update_dispatch(struct imsg *imsg) 859 { 860 struct bgpd_addr prefix; 861 struct mpattr mpa; 862 struct rde_peer *peer; 863 struct rde_aspath *asp = NULL; 864 u_char *p, *mpp = NULL; 865 int error = -1, pos = 0; 866 u_int16_t afi, len, mplen; 867 u_int16_t withdrawn_len; 868 u_int16_t attrpath_len; 869 u_int16_t nlri_len; 870 u_int8_t aid, prefixlen, safi, subtype; 871 u_int32_t fas; 872 873 peer = peer_get(imsg->hdr.peerid); 874 if (peer == NULL) /* unknown peer, cannot happen */ 875 return (-1); 876 if (peer->state != PEER_UP) 877 return (-1); /* peer is not yet up, cannot happen */ 878 879 p = imsg->data; 880 881 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2) { 882 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 883 return (-1); 884 } 885 886 memcpy(&len, p, 2); 887 withdrawn_len = ntohs(len); 888 p += 2; 889 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2 + withdrawn_len + 2) { 890 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 891 return (-1); 892 } 893 894 p += withdrawn_len; 895 memcpy(&len, p, 2); 896 attrpath_len = len = ntohs(len); 897 p += 2; 898 if (imsg->hdr.len < 899 IMSG_HEADER_SIZE + 2 + withdrawn_len + 2 + attrpath_len) { 900 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 901 return (-1); 902 } 903 904 nlri_len = 905 imsg->hdr.len - IMSG_HEADER_SIZE - 4 - withdrawn_len - attrpath_len; 906 bzero(&mpa, sizeof(mpa)); 907 908 if (attrpath_len != 0) { /* 0 = no NLRI information in this message */ 909 /* parse path attributes */ 910 asp = path_get(); 911 while (len > 0) { 912 if ((pos = rde_attr_parse(p, len, peer, asp, 913 &mpa)) < 0) 914 goto done; 915 p += pos; 916 len -= pos; 917 } 918 919 /* check for missing but necessary attributes */ 920 if ((subtype = rde_attr_missing(asp, peer->conf.ebgp, 921 nlri_len))) { 922 rde_update_err(peer, ERR_UPDATE, ERR_UPD_MISSNG_WK_ATTR, 923 &subtype, sizeof(u_int8_t)); 924 goto done; 925 } 926 927 rde_as4byte_fixup(peer, asp); 928 929 /* enforce remote AS if requested */ 930 if (asp->flags & F_ATTR_ASPATH && 931 peer->conf.enforce_as == ENFORCE_AS_ON) { 932 fas = aspath_neighbor(asp->aspath); 933 if (peer->conf.remote_as != fas) { 934 log_peer_warnx(&peer->conf, "bad path, " 935 "starting with %s, " 936 "enforce neighbor-as enabled", log_as(fas)); 937 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 938 NULL, 0); 939 goto done; 940 } 941 } 942 943 rde_reflector(peer, asp); 944 } 945 946 p = imsg->data; 947 len = withdrawn_len; 948 p += 2; 949 /* withdraw prefix */ 950 while (len > 0) { 951 if ((pos = rde_update_get_prefix(p, len, &prefix, 952 &prefixlen)) == -1) { 953 /* 954 * the RFC does not mention what we should do in 955 * this case. Let's do the same as in the NLRI case. 956 */ 957 log_peer_warnx(&peer->conf, "bad withdraw prefix"); 958 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 959 NULL, 0); 960 goto done; 961 } 962 if (prefixlen > 32) { 963 log_peer_warnx(&peer->conf, "bad withdraw prefix"); 964 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 965 NULL, 0); 966 goto done; 967 } 968 969 p += pos; 970 len -= pos; 971 972 if (peer->capa.mp[AID_INET] == 0) { 973 log_peer_warnx(&peer->conf, 974 "bad withdraw, %s disabled", aid2str(AID_INET)); 975 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 976 NULL, 0); 977 goto done; 978 } 979 980 rde_update_withdraw(peer, &prefix, prefixlen); 981 } 982 983 if (attrpath_len == 0) { 984 /* 0 = no NLRI information in this message */ 985 if (nlri_len != 0) { 986 /* crap at end of update which should not be there */ 987 rde_update_err(peer, ERR_UPDATE, 988 ERR_UPD_ATTRLIST, NULL, 0); 989 return (-1); 990 } 991 if (withdrawn_len == 0) { 992 /* EoR marker */ 993 peer_recv_eor(peer, AID_INET); 994 } 995 return (0); 996 } 997 998 /* withdraw MP_UNREACH_NLRI if available */ 999 if (mpa.unreach_len != 0) { 1000 mpp = mpa.unreach; 1001 mplen = mpa.unreach_len; 1002 memcpy(&afi, mpp, 2); 1003 mpp += 2; 1004 mplen -= 2; 1005 afi = ntohs(afi); 1006 safi = *mpp++; 1007 mplen--; 1008 1009 if (afi2aid(afi, safi, &aid) == -1) { 1010 log_peer_warnx(&peer->conf, 1011 "bad AFI/SAFI pair in withdraw"); 1012 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1013 NULL, 0); 1014 goto done; 1015 } 1016 1017 if (peer->capa.mp[aid] == 0) { 1018 log_peer_warnx(&peer->conf, 1019 "bad withdraw, %s disabled", aid2str(aid)); 1020 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1021 NULL, 0); 1022 goto done; 1023 } 1024 1025 if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0 && mplen == 0) { 1026 /* EoR marker */ 1027 peer_recv_eor(peer, aid); 1028 } 1029 1030 switch (aid) { 1031 case AID_INET6: 1032 while (mplen > 0) { 1033 if ((pos = rde_update_get_prefix6(mpp, mplen, 1034 &prefix, &prefixlen)) == -1) { 1035 log_peer_warnx(&peer->conf, 1036 "bad IPv6 withdraw prefix"); 1037 rde_update_err(peer, ERR_UPDATE, 1038 ERR_UPD_OPTATTR, 1039 mpa.unreach, mpa.unreach_len); 1040 goto done; 1041 } 1042 if (prefixlen > 128) { 1043 log_peer_warnx(&peer->conf, 1044 "bad IPv6 withdraw prefix"); 1045 rde_update_err(peer, ERR_UPDATE, 1046 ERR_UPD_OPTATTR, 1047 mpa.unreach, mpa.unreach_len); 1048 goto done; 1049 } 1050 1051 mpp += pos; 1052 mplen -= pos; 1053 1054 rde_update_withdraw(peer, &prefix, prefixlen); 1055 } 1056 break; 1057 case AID_VPN_IPv4: 1058 while (mplen > 0) { 1059 if ((pos = rde_update_get_vpn4(mpp, mplen, 1060 &prefix, &prefixlen)) == -1) { 1061 log_peer_warnx(&peer->conf, 1062 "bad VPNv4 withdraw prefix"); 1063 rde_update_err(peer, ERR_UPDATE, 1064 ERR_UPD_OPTATTR, 1065 mpa.unreach, mpa.unreach_len); 1066 goto done; 1067 } 1068 if (prefixlen > 32) { 1069 log_peer_warnx(&peer->conf, 1070 "bad VPNv4 withdraw prefix"); 1071 rde_update_err(peer, ERR_UPDATE, 1072 ERR_UPD_OPTATTR, 1073 mpa.unreach, mpa.unreach_len); 1074 goto done; 1075 } 1076 1077 mpp += pos; 1078 mplen -= pos; 1079 1080 rde_update_withdraw(peer, &prefix, prefixlen); 1081 } 1082 break; 1083 default: 1084 /* silently ignore unsupported multiprotocol AF */ 1085 break; 1086 } 1087 1088 if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0) { 1089 error = 0; 1090 goto done; 1091 } 1092 } 1093 1094 /* shift to NLRI information */ 1095 p += 2 + attrpath_len; 1096 1097 /* aspath needs to be loop free nota bene this is not a hard error */ 1098 if (peer->conf.ebgp && !aspath_loopfree(asp->aspath, conf->as)) 1099 asp->flags |= F_ATTR_LOOP; 1100 1101 /* parse nlri prefix */ 1102 while (nlri_len > 0) { 1103 if ((pos = rde_update_get_prefix(p, nlri_len, &prefix, 1104 &prefixlen)) == -1) { 1105 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1106 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1107 NULL, 0); 1108 goto done; 1109 } 1110 if (prefixlen > 32) { 1111 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1112 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1113 NULL, 0); 1114 goto done; 1115 } 1116 1117 p += pos; 1118 nlri_len -= pos; 1119 1120 if (peer->capa.mp[AID_INET] == 0) { 1121 log_peer_warnx(&peer->conf, 1122 "bad update, %s disabled", aid2str(AID_INET)); 1123 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1124 NULL, 0); 1125 goto done; 1126 } 1127 1128 rde_update_update(peer, asp, &prefix, prefixlen); 1129 1130 /* max prefix checker */ 1131 if (peer->conf.max_prefix && 1132 peer->prefix_cnt >= peer->conf.max_prefix) { 1133 log_peer_warnx(&peer->conf, "prefix limit reached" 1134 " (>%u/%u)", peer->prefix_cnt, peer->conf.max_prefix); 1135 rde_update_err(peer, ERR_CEASE, ERR_CEASE_MAX_PREFIX, 1136 NULL, 0); 1137 goto done; 1138 } 1139 1140 } 1141 1142 /* add MP_REACH_NLRI if available */ 1143 if (mpa.reach_len != 0) { 1144 mpp = mpa.reach; 1145 mplen = mpa.reach_len; 1146 memcpy(&afi, mpp, 2); 1147 mpp += 2; 1148 mplen -= 2; 1149 afi = ntohs(afi); 1150 safi = *mpp++; 1151 mplen--; 1152 1153 if (afi2aid(afi, safi, &aid) == -1) { 1154 log_peer_warnx(&peer->conf, 1155 "bad AFI/SAFI pair in update"); 1156 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1157 NULL, 0); 1158 goto done; 1159 } 1160 1161 if (peer->capa.mp[aid] == 0) { 1162 log_peer_warnx(&peer->conf, 1163 "bad update, %s disabled", aid2str(aid)); 1164 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1165 NULL, 0); 1166 goto done; 1167 } 1168 1169 /* 1170 * this works because asp is not linked. 1171 * But first unlock the previously locked nexthop. 1172 */ 1173 if (asp->nexthop) { 1174 asp->nexthop->refcnt--; 1175 (void)nexthop_delete(asp->nexthop); 1176 asp->nexthop = NULL; 1177 } 1178 if ((pos = rde_get_mp_nexthop(mpp, mplen, aid, asp)) == -1) { 1179 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1180 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1181 mpa.reach, mpa.reach_len); 1182 goto done; 1183 } 1184 mpp += pos; 1185 mplen -= pos; 1186 1187 switch (aid) { 1188 case AID_INET6: 1189 while (mplen > 0) { 1190 if ((pos = rde_update_get_prefix6(mpp, mplen, 1191 &prefix, &prefixlen)) == -1) { 1192 log_peer_warnx(&peer->conf, 1193 "bad IPv6 nlri prefix"); 1194 rde_update_err(peer, ERR_UPDATE, 1195 ERR_UPD_OPTATTR, 1196 mpa.reach, mpa.reach_len); 1197 goto done; 1198 } 1199 if (prefixlen > 128) { 1200 rde_update_err(peer, ERR_UPDATE, 1201 ERR_UPD_OPTATTR, 1202 mpa.reach, mpa.reach_len); 1203 goto done; 1204 } 1205 1206 mpp += pos; 1207 mplen -= pos; 1208 1209 rde_update_update(peer, asp, &prefix, 1210 prefixlen); 1211 1212 /* max prefix checker */ 1213 if (peer->conf.max_prefix && 1214 peer->prefix_cnt >= peer->conf.max_prefix) { 1215 log_peer_warnx(&peer->conf, 1216 "prefix limit reached" 1217 " (>%u/%u)", peer->prefix_cnt, 1218 peer->conf.max_prefix); 1219 rde_update_err(peer, ERR_CEASE, 1220 ERR_CEASE_MAX_PREFIX, NULL, 0); 1221 goto done; 1222 } 1223 1224 } 1225 break; 1226 case AID_VPN_IPv4: 1227 while (mplen > 0) { 1228 if ((pos = rde_update_get_vpn4(mpp, mplen, 1229 &prefix, &prefixlen)) == -1) { 1230 log_peer_warnx(&peer->conf, 1231 "bad VPNv4 nlri prefix"); 1232 rde_update_err(peer, ERR_UPDATE, 1233 ERR_UPD_OPTATTR, 1234 mpa.reach, mpa.reach_len); 1235 goto done; 1236 } 1237 if (prefixlen > 32) { 1238 rde_update_err(peer, ERR_UPDATE, 1239 ERR_UPD_OPTATTR, 1240 mpa.reach, mpa.reach_len); 1241 goto done; 1242 } 1243 1244 mpp += pos; 1245 mplen -= pos; 1246 1247 rde_update_update(peer, asp, &prefix, 1248 prefixlen); 1249 1250 /* max prefix checker */ 1251 if (peer->conf.max_prefix && 1252 peer->prefix_cnt >= peer->conf.max_prefix) { 1253 log_peer_warnx(&peer->conf, 1254 "prefix limit reached" 1255 " (>%u/%u)", peer->prefix_cnt, 1256 peer->conf.max_prefix); 1257 rde_update_err(peer, ERR_CEASE, 1258 ERR_CEASE_MAX_PREFIX, NULL, 0); 1259 goto done; 1260 } 1261 1262 } 1263 break; 1264 default: 1265 /* silently ignore unsupported multiprotocol AF */ 1266 break; 1267 } 1268 } 1269 1270 done: 1271 if (attrpath_len != 0) { 1272 /* unlock the previously locked entry */ 1273 if (asp->nexthop) { 1274 asp->nexthop->refcnt--; 1275 (void)nexthop_delete(asp->nexthop); 1276 } 1277 /* free allocated attribute memory that is no longer used */ 1278 path_put(asp); 1279 } 1280 1281 return (error); 1282 } 1283 1284 void 1285 rde_update_update(struct rde_peer *peer, struct rde_aspath *asp, 1286 struct bgpd_addr *prefix, u_int8_t prefixlen) 1287 { 1288 struct rde_aspath *fasp; 1289 enum filter_actions action; 1290 int r = 0, f = 0; 1291 u_int16_t i; 1292 1293 peer->prefix_rcvd_update++; 1294 /* add original path to the Adj-RIB-In */ 1295 if (peer->conf.softreconfig_in) 1296 r += path_update(&ribs[0], peer, asp, prefix, prefixlen); 1297 1298 for (i = 1; i < rib_size; i++) { 1299 if (*ribs[i].name == '\0') 1300 break; 1301 /* input filter */ 1302 action = rde_filter(ribs[i].in_rules, &fasp, peer, asp, prefix, 1303 prefixlen, peer); 1304 1305 if (fasp == NULL) 1306 fasp = asp; 1307 1308 if (action == ACTION_ALLOW) { 1309 rde_update_log("update", i, peer, 1310 &fasp->nexthop->exit_nexthop, prefix, prefixlen); 1311 r += path_update(&ribs[i], peer, fasp, prefix, 1312 prefixlen); 1313 } else if (prefix_remove(&ribs[i], peer, prefix, prefixlen, 1314 0)) { 1315 rde_update_log("filtered withdraw", i, peer, 1316 NULL, prefix, prefixlen); 1317 f++; 1318 } 1319 1320 /* free modified aspath */ 1321 if (fasp != asp) 1322 path_put(fasp); 1323 } 1324 1325 if (r) 1326 peer->prefix_cnt++; 1327 else if (f) 1328 peer->prefix_cnt--; 1329 } 1330 1331 void 1332 rde_update_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, 1333 u_int8_t prefixlen) 1334 { 1335 int r = 0; 1336 u_int16_t i; 1337 1338 peer->prefix_rcvd_withdraw++; 1339 1340 for (i = rib_size - 1; ; i--) { 1341 if (*ribs[i].name == '\0') 1342 break; 1343 if (prefix_remove(&ribs[i], peer, prefix, prefixlen, 0)) { 1344 rde_update_log("withdraw", i, peer, NULL, prefix, 1345 prefixlen); 1346 r++; 1347 } 1348 if (i == 0) 1349 break; 1350 } 1351 1352 if (r) 1353 peer->prefix_cnt--; 1354 } 1355 1356 /* 1357 * BGP UPDATE parser functions 1358 */ 1359 1360 /* attribute parser specific makros */ 1361 #define UPD_READ(t, p, plen, n) \ 1362 do { \ 1363 memcpy(t, p, n); \ 1364 p += n; \ 1365 plen += n; \ 1366 } while (0) 1367 1368 #define CHECK_FLAGS(s, t, m) \ 1369 (((s) & ~(ATTR_DEFMASK | (m))) == (t)) 1370 1371 int 1372 rde_attr_parse(u_char *p, u_int16_t len, struct rde_peer *peer, 1373 struct rde_aspath *a, struct mpattr *mpa) 1374 { 1375 struct bgpd_addr nexthop; 1376 u_char *op = p, *npath; 1377 u_int32_t tmp32; 1378 int error; 1379 u_int16_t attr_len, nlen; 1380 u_int16_t plen = 0; 1381 u_int8_t flags; 1382 u_int8_t type; 1383 u_int8_t tmp8; 1384 1385 if (len < 3) { 1386 bad_len: 1387 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLEN, op, len); 1388 return (-1); 1389 } 1390 1391 UPD_READ(&flags, p, plen, 1); 1392 UPD_READ(&type, p, plen, 1); 1393 1394 if (flags & ATTR_EXTLEN) { 1395 if (len - plen < 2) 1396 goto bad_len; 1397 UPD_READ(&attr_len, p, plen, 2); 1398 attr_len = ntohs(attr_len); 1399 } else { 1400 UPD_READ(&tmp8, p, plen, 1); 1401 attr_len = tmp8; 1402 } 1403 1404 if (len - plen < attr_len) 1405 goto bad_len; 1406 1407 /* adjust len to the actual attribute size including header */ 1408 len = plen + attr_len; 1409 1410 switch (type) { 1411 case ATTR_UNDEF: 1412 /* ignore and drop path attributes with a type code of 0 */ 1413 plen += attr_len; 1414 break; 1415 case ATTR_ORIGIN: 1416 if (attr_len != 1) 1417 goto bad_len; 1418 1419 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) { 1420 bad_flags: 1421 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRFLAGS, 1422 op, len); 1423 return (-1); 1424 } 1425 1426 UPD_READ(&a->origin, p, plen, 1); 1427 if (a->origin > ORIGIN_INCOMPLETE) { 1428 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ORIGIN, 1429 op, len); 1430 return (-1); 1431 } 1432 if (a->flags & F_ATTR_ORIGIN) 1433 goto bad_list; 1434 a->flags |= F_ATTR_ORIGIN; 1435 break; 1436 case ATTR_ASPATH: 1437 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1438 goto bad_flags; 1439 error = aspath_verify(p, attr_len, rde_as4byte(peer)); 1440 if (error == AS_ERR_SOFT) { 1441 /* 1442 * soft errors like unexpected segment types are 1443 * not considered fatal and the path is just 1444 * marked invalid. 1445 */ 1446 a->flags |= F_ATTR_PARSE_ERR; 1447 log_peer_warnx(&peer->conf, "bad ASPATH, " 1448 "path invalidated and prefix withdrawn"); 1449 } else if (error != 0) { 1450 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1451 NULL, 0); 1452 return (-1); 1453 } 1454 if (a->flags & F_ATTR_ASPATH) 1455 goto bad_list; 1456 if (rde_as4byte(peer)) { 1457 npath = p; 1458 nlen = attr_len; 1459 } else 1460 npath = aspath_inflate(p, attr_len, &nlen); 1461 a->flags |= F_ATTR_ASPATH; 1462 a->aspath = aspath_get(npath, nlen); 1463 if (npath != p) 1464 free(npath); 1465 plen += attr_len; 1466 break; 1467 case ATTR_NEXTHOP: 1468 if (attr_len != 4) 1469 goto bad_len; 1470 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1471 goto bad_flags; 1472 if (a->flags & F_ATTR_NEXTHOP) 1473 goto bad_list; 1474 a->flags |= F_ATTR_NEXTHOP; 1475 1476 bzero(&nexthop, sizeof(nexthop)); 1477 nexthop.aid = AID_INET; 1478 UPD_READ(&nexthop.v4.s_addr, p, plen, 4); 1479 /* 1480 * Check if the nexthop is a valid IP address. We consider 1481 * multicast and experimental addresses as invalid. 1482 */ 1483 tmp32 = ntohl(nexthop.v4.s_addr); 1484 if (IN_MULTICAST(tmp32) || IN_BADCLASS(tmp32)) { 1485 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1486 op, len); 1487 return (-1); 1488 } 1489 a->nexthop = nexthop_get(&nexthop); 1490 /* 1491 * lock the nexthop because it is not yet linked else 1492 * withdraws may remove this nexthop which in turn would 1493 * cause a use after free error. 1494 */ 1495 a->nexthop->refcnt++; 1496 break; 1497 case ATTR_MED: 1498 if (attr_len != 4) 1499 goto bad_len; 1500 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1501 goto bad_flags; 1502 if (a->flags & F_ATTR_MED) 1503 goto bad_list; 1504 a->flags |= F_ATTR_MED; 1505 1506 UPD_READ(&tmp32, p, plen, 4); 1507 a->med = ntohl(tmp32); 1508 break; 1509 case ATTR_LOCALPREF: 1510 if (attr_len != 4) 1511 goto bad_len; 1512 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1513 goto bad_flags; 1514 if (peer->conf.ebgp) { 1515 /* ignore local-pref attr on non ibgp peers */ 1516 plen += 4; 1517 break; 1518 } 1519 if (a->flags & F_ATTR_LOCALPREF) 1520 goto bad_list; 1521 a->flags |= F_ATTR_LOCALPREF; 1522 1523 UPD_READ(&tmp32, p, plen, 4); 1524 a->lpref = ntohl(tmp32); 1525 break; 1526 case ATTR_ATOMIC_AGGREGATE: 1527 if (attr_len != 0) 1528 goto bad_len; 1529 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1530 goto bad_flags; 1531 goto optattr; 1532 case ATTR_AGGREGATOR: 1533 if ((!rde_as4byte(peer) && attr_len != 6) || 1534 (rde_as4byte(peer) && attr_len != 8)) { 1535 /* 1536 * ignore attribute in case of error as per 1537 * draft-ietf-idr-optional-transitive-00.txt 1538 * but only if partial bit is set 1539 */ 1540 if ((flags & ATTR_PARTIAL) == 0) 1541 goto bad_len; 1542 log_peer_warnx(&peer->conf, "bad AGGREGATOR, " 1543 "partial attribute ignored"); 1544 plen += attr_len; 1545 break; 1546 } 1547 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1548 ATTR_PARTIAL)) 1549 goto bad_flags; 1550 if (!rde_as4byte(peer)) { 1551 /* need to inflate aggregator AS to 4-byte */ 1552 u_char t[8]; 1553 t[0] = t[1] = 0; 1554 UPD_READ(&t[2], p, plen, 2); 1555 UPD_READ(&t[4], p, plen, 4); 1556 if (attr_optadd(a, flags, type, t, 1557 sizeof(t)) == -1) 1558 goto bad_list; 1559 break; 1560 } 1561 /* 4-byte ready server take the default route */ 1562 goto optattr; 1563 case ATTR_COMMUNITIES: 1564 if (attr_len % 4 != 0) { 1565 /* 1566 * mark update as bad and withdraw all routes as per 1567 * draft-ietf-idr-optional-transitive-00.txt 1568 * but only if partial bit is set 1569 */ 1570 if ((flags & ATTR_PARTIAL) == 0) 1571 goto bad_len; 1572 a->flags |= F_ATTR_PARSE_ERR; 1573 log_peer_warnx(&peer->conf, "bad COMMUNITIES, " 1574 "path invalidated and prefix withdrawn"); 1575 } 1576 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1577 ATTR_PARTIAL)) 1578 goto bad_flags; 1579 goto optattr; 1580 case ATTR_EXT_COMMUNITIES: 1581 if (attr_len % 8 != 0) { 1582 /* 1583 * mark update as bad and withdraw all routes as per 1584 * draft-ietf-idr-optional-transitive-00.txt 1585 * but only if partial bit is set 1586 */ 1587 if ((flags & ATTR_PARTIAL) == 0) 1588 goto bad_len; 1589 a->flags |= F_ATTR_PARSE_ERR; 1590 log_peer_warnx(&peer->conf, "bad EXT_COMMUNITIES, " 1591 "path invalidated and prefix withdrawn"); 1592 } 1593 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1594 ATTR_PARTIAL)) 1595 goto bad_flags; 1596 goto optattr; 1597 case ATTR_ORIGINATOR_ID: 1598 if (attr_len != 4) 1599 goto bad_len; 1600 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1601 goto bad_flags; 1602 goto optattr; 1603 case ATTR_CLUSTER_LIST: 1604 if (attr_len % 4 != 0) 1605 goto bad_len; 1606 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1607 goto bad_flags; 1608 goto optattr; 1609 case ATTR_MP_REACH_NLRI: 1610 if (attr_len < 4) 1611 goto bad_len; 1612 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1613 goto bad_flags; 1614 /* the validity is checked in rde_update_dispatch() */ 1615 if (a->flags & F_ATTR_MP_REACH) 1616 goto bad_list; 1617 a->flags |= F_ATTR_MP_REACH; 1618 1619 mpa->reach = p; 1620 mpa->reach_len = attr_len; 1621 plen += attr_len; 1622 break; 1623 case ATTR_MP_UNREACH_NLRI: 1624 if (attr_len < 3) 1625 goto bad_len; 1626 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1627 goto bad_flags; 1628 /* the validity is checked in rde_update_dispatch() */ 1629 if (a->flags & F_ATTR_MP_UNREACH) 1630 goto bad_list; 1631 a->flags |= F_ATTR_MP_UNREACH; 1632 1633 mpa->unreach = p; 1634 mpa->unreach_len = attr_len; 1635 plen += attr_len; 1636 break; 1637 case ATTR_AS4_AGGREGATOR: 1638 if (attr_len != 8) { 1639 /* see ATTR_AGGREGATOR ... */ 1640 if ((flags & ATTR_PARTIAL) == 0) 1641 goto bad_len; 1642 log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, " 1643 "partial attribute ignored"); 1644 plen += attr_len; 1645 break; 1646 } 1647 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1648 ATTR_PARTIAL)) 1649 goto bad_flags; 1650 a->flags |= F_ATTR_AS4BYTE_NEW; 1651 goto optattr; 1652 case ATTR_AS4_PATH: 1653 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1654 ATTR_PARTIAL)) 1655 goto bad_flags; 1656 if ((error = aspath_verify(p, attr_len, 1)) != 0) { 1657 /* 1658 * XXX RFC does not specify how to handle errors. 1659 * XXX Instead of dropping the session because of a 1660 * XXX bad path just mark the full update as having 1661 * XXX a parse error which makes the update no longer 1662 * XXX eligible and will not be considered for routing 1663 * XXX or redistribution. 1664 * XXX We follow draft-ietf-idr-optional-transitive 1665 * XXX by looking at the partial bit. 1666 * XXX Consider soft errors similar to a partial attr. 1667 */ 1668 if (flags & ATTR_PARTIAL || error == AS_ERR_SOFT) { 1669 a->flags |= F_ATTR_PARSE_ERR; 1670 log_peer_warnx(&peer->conf, "bad AS4_PATH, " 1671 "path invalidated and prefix withdrawn"); 1672 goto optattr; 1673 } else { 1674 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1675 NULL, 0); 1676 return (-1); 1677 } 1678 } 1679 a->flags |= F_ATTR_AS4BYTE_NEW; 1680 goto optattr; 1681 default: 1682 if ((flags & ATTR_OPTIONAL) == 0) { 1683 rde_update_err(peer, ERR_UPDATE, ERR_UPD_UNKNWN_WK_ATTR, 1684 op, len); 1685 return (-1); 1686 } 1687 optattr: 1688 if (attr_optadd(a, flags, type, p, attr_len) == -1) { 1689 bad_list: 1690 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, 1691 NULL, 0); 1692 return (-1); 1693 } 1694 1695 plen += attr_len; 1696 break; 1697 } 1698 1699 return (plen); 1700 } 1701 1702 int 1703 rde_attr_add(struct rde_aspath *a, u_char *p, u_int16_t len) 1704 { 1705 u_int16_t attr_len; 1706 u_int16_t plen = 0; 1707 u_int8_t flags; 1708 u_int8_t type; 1709 u_int8_t tmp8; 1710 1711 if (a == NULL) /* no aspath, nothing to do */ 1712 return (0); 1713 if (len < 3) 1714 return (-1); 1715 1716 UPD_READ(&flags, p, plen, 1); 1717 UPD_READ(&type, p, plen, 1); 1718 1719 if (flags & ATTR_EXTLEN) { 1720 if (len - plen < 2) 1721 return (-1); 1722 UPD_READ(&attr_len, p, plen, 2); 1723 attr_len = ntohs(attr_len); 1724 } else { 1725 UPD_READ(&tmp8, p, plen, 1); 1726 attr_len = tmp8; 1727 } 1728 1729 if (len - plen < attr_len) 1730 return (-1); 1731 1732 if (attr_optadd(a, flags, type, p, attr_len) == -1) 1733 return (-1); 1734 return (0); 1735 } 1736 1737 #undef UPD_READ 1738 #undef CHECK_FLAGS 1739 1740 u_int8_t 1741 rde_attr_missing(struct rde_aspath *a, int ebgp, u_int16_t nlrilen) 1742 { 1743 /* ATTR_MP_UNREACH_NLRI may be sent alone */ 1744 if (nlrilen == 0 && a->flags & F_ATTR_MP_UNREACH && 1745 (a->flags & F_ATTR_MP_REACH) == 0) 1746 return (0); 1747 1748 if ((a->flags & F_ATTR_ORIGIN) == 0) 1749 return (ATTR_ORIGIN); 1750 if ((a->flags & F_ATTR_ASPATH) == 0) 1751 return (ATTR_ASPATH); 1752 if ((a->flags & F_ATTR_MP_REACH) == 0 && 1753 (a->flags & F_ATTR_NEXTHOP) == 0) 1754 return (ATTR_NEXTHOP); 1755 if (!ebgp) 1756 if ((a->flags & F_ATTR_LOCALPREF) == 0) 1757 return (ATTR_LOCALPREF); 1758 return (0); 1759 } 1760 1761 int 1762 rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int8_t aid, 1763 struct rde_aspath *asp) 1764 { 1765 struct bgpd_addr nexthop; 1766 u_int8_t totlen, nhlen; 1767 1768 if (len == 0) 1769 return (-1); 1770 1771 nhlen = *data++; 1772 totlen = 1; 1773 len--; 1774 1775 if (nhlen > len) 1776 return (-1); 1777 1778 bzero(&nexthop, sizeof(nexthop)); 1779 nexthop.aid = aid; 1780 switch (aid) { 1781 case AID_INET6: 1782 /* 1783 * RFC2545 describes that there may be a link-local 1784 * address carried in nexthop. Yikes! 1785 * This is not only silly, it is wrong and we just ignore 1786 * this link-local nexthop. The bgpd session doesn't run 1787 * over the link-local address so why should all other 1788 * traffic. 1789 */ 1790 if (nhlen != 16 && nhlen != 32) { 1791 log_warnx("bad multiprotocol nexthop, bad size"); 1792 return (-1); 1793 } 1794 memcpy(&nexthop.v6.s6_addr, data, 16); 1795 break; 1796 case AID_VPN_IPv4: 1797 /* 1798 * Neither RFC4364 nor RFC3107 specify the format of the 1799 * nexthop in an explicit way. The quality of RFC went down 1800 * the toilet the larger the number got. 1801 * RFC4364 is very confusing about VPN-IPv4 address and the 1802 * VPN-IPv4 prefix that carries also a MPLS label. 1803 * So the nexthop is a 12-byte address with a 64bit RD and 1804 * an IPv4 address following. In the nexthop case the RD can 1805 * be ignored. 1806 * Since the nexthop has to be in the main IPv4 table just 1807 * create an AID_INET nexthop. So we don't need to handle 1808 * AID_VPN_IPv4 in nexthop and kroute. 1809 */ 1810 if (nhlen != 12) { 1811 log_warnx("bad multiprotocol nexthop, bad size"); 1812 return (-1); 1813 } 1814 data += sizeof(u_int64_t); 1815 nexthop.aid = AID_INET; 1816 memcpy(&nexthop.v4, data, sizeof(nexthop.v4)); 1817 break; 1818 default: 1819 log_warnx("bad multiprotocol nexthop, bad AID"); 1820 return (-1); 1821 } 1822 1823 asp->nexthop = nexthop_get(&nexthop); 1824 /* 1825 * lock the nexthop because it is not yet linked else 1826 * withdraws may remove this nexthop which in turn would 1827 * cause a use after free error. 1828 */ 1829 asp->nexthop->refcnt++; 1830 1831 /* ignore reserved (old SNPA) field as per RFC4760 */ 1832 totlen += nhlen + 1; 1833 data += nhlen + 1; 1834 1835 return (totlen); 1836 } 1837 1838 int 1839 rde_update_extract_prefix(u_char *p, u_int16_t len, void *va, 1840 u_int8_t pfxlen, u_int8_t max) 1841 { 1842 static u_char addrmask[] = { 1843 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff }; 1844 u_char *a = va; 1845 int i; 1846 u_int16_t plen = 0; 1847 1848 for (i = 0; pfxlen && i < max; i++) { 1849 if (len <= plen) 1850 return (-1); 1851 if (pfxlen < 8) { 1852 a[i] = *p++ & addrmask[pfxlen]; 1853 plen++; 1854 break; 1855 } else { 1856 a[i] = *p++; 1857 plen++; 1858 pfxlen -= 8; 1859 } 1860 } 1861 return (plen); 1862 } 1863 1864 int 1865 rde_update_get_prefix(u_char *p, u_int16_t len, struct bgpd_addr *prefix, 1866 u_int8_t *prefixlen) 1867 { 1868 u_int8_t pfxlen; 1869 int plen; 1870 1871 if (len < 1) 1872 return (-1); 1873 1874 pfxlen = *p++; 1875 len--; 1876 1877 bzero(prefix, sizeof(struct bgpd_addr)); 1878 prefix->aid = AID_INET; 1879 *prefixlen = pfxlen; 1880 1881 if ((plen = rde_update_extract_prefix(p, len, &prefix->v4, pfxlen, 1882 sizeof(prefix->v4))) == -1) 1883 return (-1); 1884 1885 return (plen + 1); /* pfxlen needs to be added */ 1886 } 1887 1888 int 1889 rde_update_get_prefix6(u_char *p, u_int16_t len, struct bgpd_addr *prefix, 1890 u_int8_t *prefixlen) 1891 { 1892 int plen; 1893 u_int8_t pfxlen; 1894 1895 if (len < 1) 1896 return (-1); 1897 1898 pfxlen = *p++; 1899 len--; 1900 1901 bzero(prefix, sizeof(struct bgpd_addr)); 1902 prefix->aid = AID_INET6; 1903 *prefixlen = pfxlen; 1904 1905 if ((plen = rde_update_extract_prefix(p, len, &prefix->v6, pfxlen, 1906 sizeof(prefix->v6))) == -1) 1907 return (-1); 1908 1909 return (plen + 1); /* pfxlen needs to be added */ 1910 } 1911 1912 int 1913 rde_update_get_vpn4(u_char *p, u_int16_t len, struct bgpd_addr *prefix, 1914 u_int8_t *prefixlen) 1915 { 1916 int rv, done = 0; 1917 u_int8_t pfxlen; 1918 u_int16_t plen; 1919 1920 if (len < 1) 1921 return (-1); 1922 1923 memcpy(&pfxlen, p, 1); 1924 p += 1; 1925 plen = 1; 1926 1927 bzero(prefix, sizeof(struct bgpd_addr)); 1928 1929 /* label stack */ 1930 do { 1931 if (len - plen < 3 || pfxlen < 3 * 8) 1932 return (-1); 1933 if (prefix->vpn4.labellen + 3U > 1934 sizeof(prefix->vpn4.labelstack)) 1935 return (-1); 1936 prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++; 1937 prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++; 1938 prefix->vpn4.labelstack[prefix->vpn4.labellen] = *p++; 1939 if (prefix->vpn4.labelstack[prefix->vpn4.labellen] & 1940 BGP_MPLS_BOS) 1941 done = 1; 1942 prefix->vpn4.labellen++; 1943 plen += 3; 1944 pfxlen -= 3 * 8; 1945 } while (!done); 1946 1947 /* RD */ 1948 if (len - plen < (int)sizeof(u_int64_t) || 1949 pfxlen < sizeof(u_int64_t) * 8) 1950 return (-1); 1951 memcpy(&prefix->vpn4.rd, p, sizeof(u_int64_t)); 1952 pfxlen -= sizeof(u_int64_t) * 8; 1953 p += sizeof(u_int64_t); 1954 plen += sizeof(u_int64_t); 1955 1956 /* prefix */ 1957 prefix->aid = AID_VPN_IPv4; 1958 *prefixlen = pfxlen; 1959 1960 if ((rv = rde_update_extract_prefix(p, len, &prefix->vpn4.addr, 1961 pfxlen, sizeof(prefix->vpn4.addr))) == -1) 1962 return (-1); 1963 1964 return (plen + rv); 1965 } 1966 1967 void 1968 rde_update_err(struct rde_peer *peer, u_int8_t error, u_int8_t suberr, 1969 void *data, u_int16_t size) 1970 { 1971 struct ibuf *wbuf; 1972 1973 if ((wbuf = imsg_create(ibuf_se, IMSG_UPDATE_ERR, peer->conf.id, 0, 1974 size + sizeof(error) + sizeof(suberr))) == NULL) 1975 fatal("%s %d imsg_create error", __func__, __LINE__); 1976 if (imsg_add(wbuf, &error, sizeof(error)) == -1 || 1977 imsg_add(wbuf, &suberr, sizeof(suberr)) == -1 || 1978 imsg_add(wbuf, data, size) == -1) 1979 fatal("%s %d imsg_add error", __func__, __LINE__); 1980 imsg_close(ibuf_se, wbuf); 1981 peer->state = PEER_ERR; 1982 } 1983 1984 void 1985 rde_update_log(const char *message, u_int16_t rid, 1986 const struct rde_peer *peer, const struct bgpd_addr *next, 1987 const struct bgpd_addr *prefix, u_int8_t prefixlen) 1988 { 1989 char *l = NULL; 1990 char *n = NULL; 1991 char *p = NULL; 1992 1993 if ( !((conf->log & BGPD_LOG_UPDATES) || 1994 (peer->conf.flags & PEERFLAG_LOG_UPDATES)) ) 1995 return; 1996 1997 if (next != NULL) 1998 if (asprintf(&n, " via %s", log_addr(next)) == -1) 1999 n = NULL; 2000 if (asprintf(&p, "%s/%u", log_addr(prefix), prefixlen) == -1) 2001 p = NULL; 2002 l = log_fmt_peer(&peer->conf); 2003 log_info("Rib %s: %s AS%s: %s %s%s", ribs[rid].name, 2004 l, log_as(peer->conf.remote_as), message, 2005 p ? p : "out of memory", n ? n : ""); 2006 2007 free(l); 2008 free(n); 2009 free(p); 2010 } 2011 2012 /* 2013 * 4-Byte ASN helper function. 2014 * Two scenarios need to be considered: 2015 * - NEW session with NEW attributes present -> just remove the attributes 2016 * - OLD session with NEW attributes present -> try to merge them 2017 */ 2018 void 2019 rde_as4byte_fixup(struct rde_peer *peer, struct rde_aspath *a) 2020 { 2021 struct attr *nasp, *naggr, *oaggr; 2022 u_int32_t as; 2023 2024 /* 2025 * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present 2026 * try to fixup the attributes. 2027 * Do not fixup if F_ATTR_PARSE_ERR is set. 2028 */ 2029 if (!(a->flags & F_ATTR_AS4BYTE_NEW) || a->flags & F_ATTR_PARSE_ERR) 2030 return; 2031 2032 /* first get the attributes */ 2033 nasp = attr_optget(a, ATTR_AS4_PATH); 2034 naggr = attr_optget(a, ATTR_AS4_AGGREGATOR); 2035 2036 if (rde_as4byte(peer)) { 2037 /* NEW session using 4-byte ASNs */ 2038 if (nasp) { 2039 log_peer_warnx(&peer->conf, "uses 4-byte ASN " 2040 "but sent AS4_PATH attribute."); 2041 attr_free(a, nasp); 2042 } 2043 if (naggr) { 2044 log_peer_warnx(&peer->conf, "uses 4-byte ASN " 2045 "but sent AS4_AGGREGATOR attribute."); 2046 attr_free(a, naggr); 2047 } 2048 return; 2049 } 2050 /* OLD session using 2-byte ASNs */ 2051 /* try to merge the new attributes into the old ones */ 2052 if ((oaggr = attr_optget(a, ATTR_AGGREGATOR))) { 2053 memcpy(&as, oaggr->data, sizeof(as)); 2054 if (ntohl(as) != AS_TRANS) { 2055 /* per RFC ignore AS4_PATH and AS4_AGGREGATOR */ 2056 if (nasp) 2057 attr_free(a, nasp); 2058 if (naggr) 2059 attr_free(a, naggr); 2060 return; 2061 } 2062 if (naggr) { 2063 /* switch over to new AGGREGATOR */ 2064 attr_free(a, oaggr); 2065 if (attr_optadd(a, ATTR_OPTIONAL | ATTR_TRANSITIVE, 2066 ATTR_AGGREGATOR, naggr->data, naggr->len)) 2067 fatalx("attr_optadd failed but impossible"); 2068 } 2069 } 2070 /* there is no need for AS4_AGGREGATOR any more */ 2071 if (naggr) 2072 attr_free(a, naggr); 2073 2074 /* merge AS4_PATH with ASPATH */ 2075 if (nasp) 2076 aspath_merge(a, nasp); 2077 } 2078 2079 2080 /* 2081 * route reflector helper function 2082 */ 2083 void 2084 rde_reflector(struct rde_peer *peer, struct rde_aspath *asp) 2085 { 2086 struct attr *a; 2087 u_int8_t *p; 2088 u_int16_t len; 2089 u_int32_t id; 2090 2091 /* do not consider updates with parse errors */ 2092 if (asp->flags & F_ATTR_PARSE_ERR) 2093 return; 2094 2095 /* check for originator id if eq router_id drop */ 2096 if ((a = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) { 2097 if (memcmp(&conf->bgpid, a->data, sizeof(conf->bgpid)) == 0) { 2098 /* this is coming from myself */ 2099 asp->flags |= F_ATTR_LOOP; 2100 return; 2101 } 2102 } else if (conf->flags & BGPD_FLAG_REFLECTOR) { 2103 if (peer->conf.ebgp) 2104 id = conf->bgpid; 2105 else 2106 id = htonl(peer->remote_bgpid); 2107 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_ORIGINATOR_ID, 2108 &id, sizeof(u_int32_t)) == -1) 2109 fatalx("attr_optadd failed but impossible"); 2110 } 2111 2112 /* check for own id in the cluster list */ 2113 if (conf->flags & BGPD_FLAG_REFLECTOR) { 2114 if ((a = attr_optget(asp, ATTR_CLUSTER_LIST)) != NULL) { 2115 for (len = 0; len < a->len; 2116 len += sizeof(conf->clusterid)) 2117 /* check if coming from my cluster */ 2118 if (memcmp(&conf->clusterid, a->data + len, 2119 sizeof(conf->clusterid)) == 0) { 2120 asp->flags |= F_ATTR_LOOP; 2121 return; 2122 } 2123 2124 /* prepend own clusterid by replacing attribute */ 2125 len = a->len + sizeof(conf->clusterid); 2126 if (len < a->len) 2127 fatalx("rde_reflector: cluster-list overflow"); 2128 if ((p = malloc(len)) == NULL) 2129 fatal("rde_reflector"); 2130 memcpy(p, &conf->clusterid, sizeof(conf->clusterid)); 2131 memcpy(p + sizeof(conf->clusterid), a->data, a->len); 2132 attr_free(asp, a); 2133 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST, 2134 p, len) == -1) 2135 fatalx("attr_optadd failed but impossible"); 2136 free(p); 2137 } else if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST, 2138 &conf->clusterid, sizeof(conf->clusterid)) == -1) 2139 fatalx("attr_optadd failed but impossible"); 2140 } 2141 } 2142 2143 /* 2144 * control specific functions 2145 */ 2146 void 2147 rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) 2148 { 2149 struct ctl_show_rib rib; 2150 struct ibuf *wbuf; 2151 struct attr *a; 2152 void *bp; 2153 time_t staletime; 2154 u_int8_t l; 2155 2156 bzero(&rib, sizeof(rib)); 2157 rib.lastchange = p->lastchange; 2158 rib.local_pref = asp->lpref; 2159 rib.med = asp->med; 2160 rib.weight = asp->weight; 2161 strlcpy(rib.descr, asp->peer->conf.descr, sizeof(rib.descr)); 2162 memcpy(&rib.remote_addr, &asp->peer->remote_addr, 2163 sizeof(rib.remote_addr)); 2164 rib.remote_id = asp->peer->remote_bgpid; 2165 if (asp->nexthop != NULL) { 2166 memcpy(&rib.true_nexthop, &asp->nexthop->true_nexthop, 2167 sizeof(rib.true_nexthop)); 2168 memcpy(&rib.exit_nexthop, &asp->nexthop->exit_nexthop, 2169 sizeof(rib.exit_nexthop)); 2170 } else { 2171 /* announced network may have a NULL nexthop */ 2172 bzero(&rib.true_nexthop, sizeof(rib.true_nexthop)); 2173 bzero(&rib.exit_nexthop, sizeof(rib.exit_nexthop)); 2174 rib.true_nexthop.aid = p->prefix->aid; 2175 rib.exit_nexthop.aid = p->prefix->aid; 2176 } 2177 pt_getaddr(p->prefix, &rib.prefix); 2178 rib.prefixlen = p->prefix->prefixlen; 2179 rib.origin = asp->origin; 2180 rib.flags = 0; 2181 if (p->rib->active == p) 2182 rib.flags |= F_PREF_ACTIVE; 2183 if (!asp->peer->conf.ebgp) 2184 rib.flags |= F_PREF_INTERNAL; 2185 if (asp->flags & F_PREFIX_ANNOUNCED) 2186 rib.flags |= F_PREF_ANNOUNCE; 2187 if (asp->nexthop == NULL || asp->nexthop->state == NEXTHOP_REACH) 2188 rib.flags |= F_PREF_ELIGIBLE; 2189 if (asp->flags & F_ATTR_LOOP) 2190 rib.flags &= ~F_PREF_ELIGIBLE; 2191 staletime = asp->peer->staletime[p->prefix->aid]; 2192 if (staletime && p->lastchange <= staletime) 2193 rib.flags |= F_PREF_STALE; 2194 rib.aspath_len = aspath_length(asp->aspath); 2195 2196 if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid, 2197 sizeof(rib) + rib.aspath_len)) == NULL) 2198 return; 2199 if (imsg_add(wbuf, &rib, sizeof(rib)) == -1 || 2200 imsg_add(wbuf, aspath_dump(asp->aspath), 2201 rib.aspath_len) == -1) 2202 return; 2203 imsg_close(ibuf_se_ctl, wbuf); 2204 2205 if (flags & F_CTL_DETAIL) 2206 for (l = 0; l < asp->others_len; l++) { 2207 if ((a = asp->others[l]) == NULL) 2208 break; 2209 if ((wbuf = imsg_create(ibuf_se_ctl, 2210 IMSG_CTL_SHOW_RIB_ATTR, 0, pid, 2211 attr_optlen(a))) == NULL) 2212 return; 2213 if ((bp = ibuf_reserve(wbuf, attr_optlen(a))) == NULL) { 2214 ibuf_free(wbuf); 2215 return; 2216 } 2217 if (attr_write(bp, attr_optlen(a), a->flags, 2218 a->type, a->data, a->len) == -1) { 2219 ibuf_free(wbuf); 2220 return; 2221 } 2222 imsg_close(ibuf_se_ctl, wbuf); 2223 } 2224 } 2225 2226 void 2227 rde_dump_filterout(struct rde_peer *peer, struct prefix *p, 2228 struct ctl_show_rib_request *req) 2229 { 2230 struct bgpd_addr addr; 2231 struct rde_aspath *asp; 2232 enum filter_actions a; 2233 2234 if (up_test_update(peer, p) != 1) 2235 return; 2236 2237 pt_getaddr(p->prefix, &addr); 2238 a = rde_filter(out_rules, &asp, peer, p->aspath, &addr, 2239 p->prefix->prefixlen, p->aspath->peer); 2240 if (asp) 2241 asp->peer = p->aspath->peer; 2242 else 2243 asp = p->aspath; 2244 2245 if (a == ACTION_ALLOW) 2246 rde_dump_rib_as(p, asp, req->pid, req->flags); 2247 2248 if (asp != p->aspath) 2249 path_put(asp); 2250 } 2251 2252 void 2253 rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) 2254 { 2255 struct rde_peer *peer; 2256 2257 if (req->flags & F_CTL_ADJ_IN || 2258 !(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT))) { 2259 if (req->peerid && req->peerid != p->aspath->peer->conf.id) 2260 return; 2261 if (req->type == IMSG_CTL_SHOW_RIB_AS && 2262 !aspath_match(p->aspath->aspath->data, 2263 p->aspath->aspath->len, &req->as, req->as.as)) 2264 return; 2265 if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && 2266 !community_match(p->aspath, req->community.as, 2267 req->community.type)) 2268 return; 2269 if ((req->flags & F_CTL_ACTIVE) && p->rib->active != p) 2270 return; 2271 rde_dump_rib_as(p, p->aspath, req->pid, req->flags); 2272 } else if (req->flags & F_CTL_ADJ_OUT) { 2273 if (p->rib->active != p) 2274 /* only consider active prefix */ 2275 return; 2276 if (req->peerid) { 2277 if ((peer = peer_get(req->peerid)) != NULL) 2278 rde_dump_filterout(peer, p, req); 2279 return; 2280 } 2281 } 2282 } 2283 2284 void 2285 rde_dump_upcall(struct rib_entry *re, void *ptr) 2286 { 2287 struct prefix *p; 2288 struct rde_dump_ctx *ctx = ptr; 2289 2290 LIST_FOREACH(p, &re->prefix_h, rib_l) 2291 rde_dump_filter(p, &ctx->req); 2292 } 2293 2294 void 2295 rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) 2296 { 2297 struct rde_dump_ctx *ctx = ptr; 2298 struct prefix *p; 2299 struct pt_entry *pt; 2300 struct bgpd_addr addr; 2301 2302 pt = re->prefix; 2303 pt_getaddr(pt, &addr); 2304 if (addr.aid != ctx->req.prefix.aid) 2305 return; 2306 if (ctx->req.prefixlen > pt->prefixlen) 2307 return; 2308 if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen)) 2309 LIST_FOREACH(p, &re->prefix_h, rib_l) 2310 rde_dump_filter(p, &ctx->req); 2311 } 2312 2313 void 2314 rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, 2315 enum imsg_type type) 2316 { 2317 struct rde_dump_ctx *ctx; 2318 struct rib_entry *re; 2319 u_int error; 2320 u_int16_t id; 2321 u_int8_t hostplen; 2322 2323 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { 2324 log_warn("rde_dump_ctx_new"); 2325 error = CTL_RES_NOMEM; 2326 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, 2327 sizeof(error)); 2328 return; 2329 } 2330 if ((id = rib_find(req->rib)) == RIB_FAILED) { 2331 log_warnx("rde_dump_ctx_new: no such rib %s", req->rib); 2332 error = CTL_RES_NOSUCHPEER; 2333 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, 2334 sizeof(error)); 2335 free(ctx); 2336 return; 2337 } 2338 2339 memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request)); 2340 ctx->req.pid = pid; 2341 ctx->req.type = type; 2342 ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS; 2343 ctx->ribctx.ctx_rib = &ribs[id]; 2344 switch (ctx->req.type) { 2345 case IMSG_CTL_SHOW_NETWORK: 2346 ctx->ribctx.ctx_upcall = network_dump_upcall; 2347 break; 2348 case IMSG_CTL_SHOW_RIB: 2349 case IMSG_CTL_SHOW_RIB_AS: 2350 case IMSG_CTL_SHOW_RIB_COMMUNITY: 2351 ctx->ribctx.ctx_upcall = rde_dump_upcall; 2352 break; 2353 case IMSG_CTL_SHOW_RIB_PREFIX: 2354 if (req->flags & F_LONGER) { 2355 ctx->ribctx.ctx_upcall = rde_dump_prefix_upcall; 2356 break; 2357 } 2358 switch (req->prefix.aid) { 2359 case AID_INET: 2360 case AID_VPN_IPv4: 2361 hostplen = 32; 2362 break; 2363 case AID_INET6: 2364 hostplen = 128; 2365 break; 2366 default: 2367 fatalx("rde_dump_ctx_new: unknown af"); 2368 } 2369 if (req->prefixlen == hostplen) 2370 re = rib_lookup(&ribs[id], &req->prefix); 2371 else 2372 re = rib_get(&ribs[id], &req->prefix, req->prefixlen); 2373 if (re) 2374 rde_dump_upcall(re, ctx); 2375 rde_dump_done(ctx); 2376 return; 2377 default: 2378 fatalx("rde_dump_ctx_new: unsupported imsg type"); 2379 } 2380 ctx->ribctx.ctx_done = rde_dump_done; 2381 ctx->ribctx.ctx_arg = ctx; 2382 ctx->ribctx.ctx_aid = ctx->req.aid; 2383 rib_dump_r(&ctx->ribctx); 2384 } 2385 2386 void 2387 rde_dump_done(void *arg) 2388 { 2389 struct rde_dump_ctx *ctx = arg; 2390 2391 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, 2392 -1, NULL, 0); 2393 free(ctx); 2394 } 2395 2396 void 2397 rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd) 2398 { 2399 struct rde_mrt_ctx *ctx; 2400 u_int16_t id; 2401 2402 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { 2403 log_warn("rde_dump_mrt_new"); 2404 return; 2405 } 2406 memcpy(&ctx->mrt, mrt, sizeof(struct mrt)); 2407 TAILQ_INIT(&ctx->mrt.wbuf.bufs); 2408 ctx->mrt.wbuf.fd = fd; 2409 ctx->mrt.state = MRT_STATE_RUNNING; 2410 id = rib_find(ctx->mrt.rib); 2411 if (id == RIB_FAILED) { 2412 log_warnx("non existing RIB %s for mrt dump", ctx->mrt.rib); 2413 free(ctx); 2414 return; 2415 } 2416 2417 if (ctx->mrt.type == MRT_TABLE_DUMP_V2) 2418 mrt_dump_v2_hdr(&ctx->mrt, conf, &peerlist); 2419 2420 ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS; 2421 ctx->ribctx.ctx_rib = &ribs[id]; 2422 ctx->ribctx.ctx_upcall = mrt_dump_upcall; 2423 ctx->ribctx.ctx_done = mrt_done; 2424 ctx->ribctx.ctx_arg = &ctx->mrt; 2425 ctx->ribctx.ctx_aid = AID_UNSPEC; 2426 LIST_INSERT_HEAD(&rde_mrts, ctx, entry); 2427 rde_mrt_cnt++; 2428 rib_dump_r(&ctx->ribctx); 2429 } 2430 2431 /* 2432 * kroute specific functions 2433 */ 2434 int 2435 rde_rdomain_import(struct rde_aspath *asp, struct rdomain *rd) 2436 { 2437 struct filter_set *s; 2438 2439 TAILQ_FOREACH(s, &rd->import, entry) { 2440 if (community_ext_match(asp, &s->action.ext_community, 0)) 2441 return (1); 2442 } 2443 return (0); 2444 } 2445 2446 void 2447 rde_send_kroute(struct prefix *new, struct prefix *old, u_int16_t ribid) 2448 { 2449 struct kroute_full kr; 2450 struct bgpd_addr addr; 2451 struct prefix *p; 2452 struct rdomain *rd; 2453 enum imsg_type type; 2454 2455 /* 2456 * Make sure that self announce prefixes are not committed to the 2457 * FIB. If both prefixes are unreachable no update is needed. 2458 */ 2459 if ((old == NULL || old->aspath->flags & F_PREFIX_ANNOUNCED) && 2460 (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED)) 2461 return; 2462 2463 if (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED) { 2464 type = IMSG_KROUTE_DELETE; 2465 p = old; 2466 } else { 2467 type = IMSG_KROUTE_CHANGE; 2468 p = new; 2469 } 2470 2471 pt_getaddr(p->prefix, &addr); 2472 bzero(&kr, sizeof(kr)); 2473 memcpy(&kr.prefix, &addr, sizeof(kr.prefix)); 2474 kr.prefixlen = p->prefix->prefixlen; 2475 if (p->aspath->flags & F_NEXTHOP_REJECT) 2476 kr.flags |= F_REJECT; 2477 if (p->aspath->flags & F_NEXTHOP_BLACKHOLE) 2478 kr.flags |= F_BLACKHOLE; 2479 if (type == IMSG_KROUTE_CHANGE) 2480 memcpy(&kr.nexthop, &p->aspath->nexthop->true_nexthop, 2481 sizeof(kr.nexthop)); 2482 strlcpy(kr.label, rtlabel_id2name(p->aspath->rtlabelid), 2483 sizeof(kr.label)); 2484 2485 switch (addr.aid) { 2486 case AID_VPN_IPv4: 2487 if (ribid != 1) 2488 /* not Loc-RIB, no update for VPNs */ 2489 break; 2490 2491 SIMPLEQ_FOREACH(rd, rdomains_l, entry) { 2492 if (!rde_rdomain_import(p->aspath, rd)) 2493 continue; 2494 /* must send exit_nexthop so that correct MPLS tunnel 2495 * is chosen 2496 */ 2497 if (type == IMSG_KROUTE_CHANGE) 2498 memcpy(&kr.nexthop, 2499 &p->aspath->nexthop->exit_nexthop, 2500 sizeof(kr.nexthop)); 2501 if (imsg_compose(ibuf_main, type, rd->rtableid, 0, -1, 2502 &kr, sizeof(kr)) == -1) 2503 fatal("%s %d imsg_compose error", __func__, 2504 __LINE__); 2505 } 2506 break; 2507 default: 2508 if (imsg_compose(ibuf_main, type, ribs[ribid].rtableid, 0, -1, 2509 &kr, sizeof(kr)) == -1) 2510 fatal("%s %d imsg_compose error", __func__, __LINE__); 2511 break; 2512 } 2513 } 2514 2515 /* 2516 * pf table specific functions 2517 */ 2518 void 2519 rde_send_pftable(u_int16_t id, struct bgpd_addr *addr, 2520 u_int8_t len, int del) 2521 { 2522 struct pftable_msg pfm; 2523 2524 if (id == 0) 2525 return; 2526 2527 /* do not run while cleaning up */ 2528 if (rde_quit) 2529 return; 2530 2531 bzero(&pfm, sizeof(pfm)); 2532 strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable)); 2533 memcpy(&pfm.addr, addr, sizeof(pfm.addr)); 2534 pfm.len = len; 2535 2536 if (imsg_compose(ibuf_main, 2537 del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD, 2538 0, 0, -1, &pfm, sizeof(pfm)) == -1) 2539 fatal("%s %d imsg_compose error", __func__, __LINE__); 2540 } 2541 2542 void 2543 rde_send_pftable_commit(void) 2544 { 2545 /* do not run while cleaning up */ 2546 if (rde_quit) 2547 return; 2548 2549 if (imsg_compose(ibuf_main, IMSG_PFTABLE_COMMIT, 0, 0, -1, NULL, 0) == 2550 -1) 2551 fatal("%s %d imsg_compose error", __func__, __LINE__); 2552 } 2553 2554 /* 2555 * nexthop specific functions 2556 */ 2557 void 2558 rde_send_nexthop(struct bgpd_addr *next, int valid) 2559 { 2560 int type; 2561 2562 if (valid) 2563 type = IMSG_NEXTHOP_ADD; 2564 else 2565 type = IMSG_NEXTHOP_REMOVE; 2566 2567 if (imsg_compose(ibuf_main, type, 0, 0, -1, next, 2568 sizeof(struct bgpd_addr)) == -1) 2569 fatal("%s %d imsg_compose error", __func__, __LINE__); 2570 } 2571 2572 /* 2573 * soft reconfig specific functions 2574 */ 2575 void 2576 rde_reload_done(void) 2577 { 2578 struct rdomain *rd; 2579 struct rde_peer *peer; 2580 struct filter_head *fh; 2581 u_int16_t rid; 2582 2583 /* first merge the main config */ 2584 if ((nconf->flags & BGPD_FLAG_NO_EVALUATE) 2585 != (conf->flags & BGPD_FLAG_NO_EVALUATE)) { 2586 log_warnx("change to/from route-collector " 2587 "mode ignored"); 2588 if (conf->flags & BGPD_FLAG_NO_EVALUATE) 2589 nconf->flags |= BGPD_FLAG_NO_EVALUATE; 2590 else 2591 nconf->flags &= ~BGPD_FLAG_NO_EVALUATE; 2592 } 2593 memcpy(conf, nconf, sizeof(struct bgpd_config)); 2594 conf->listen_addrs = NULL; 2595 conf->csock = NULL; 2596 conf->rcsock = NULL; 2597 free(nconf); 2598 nconf = NULL; 2599 2600 /* sync peerself with conf */ 2601 peerself->remote_bgpid = ntohl(conf->bgpid); 2602 peerself->conf.local_as = conf->as; 2603 peerself->conf.remote_as = conf->as; 2604 peerself->short_as = conf->short_as; 2605 2606 /* apply new set of rdomain, sync will be done later */ 2607 while ((rd = SIMPLEQ_FIRST(rdomains_l)) != NULL) { 2608 SIMPLEQ_REMOVE_HEAD(rdomains_l, entry); 2609 filterset_free(&rd->import); 2610 filterset_free(&rd->export); 2611 free(rd); 2612 } 2613 free(rdomains_l); 2614 rdomains_l = newdomains; 2615 /* XXX WHERE IS THE SYNC ??? */ 2616 2617 rde_filter_calc_skip_steps(out_rules_tmp); 2618 2619 /* 2620 * make the new filter rules the active one but keep the old for 2621 * softrconfig. This is needed so that changes happening are using 2622 * the right filters. 2623 */ 2624 fh = out_rules; 2625 out_rules = out_rules_tmp; 2626 out_rules_tmp = fh; 2627 2628 /* check if filter changed */ 2629 LIST_FOREACH(peer, &peerlist, peer_l) { 2630 if (peer->conf.id == 0) 2631 continue; 2632 peer->reconf_out = 0; 2633 peer->reconf_rib = 0; 2634 if (peer->ribid != rib_find(peer->conf.rib)) { 2635 rib_dump(&ribs[peer->ribid], 2636 rde_softreconfig_unload_peer, peer, AID_UNSPEC); 2637 peer->ribid = rib_find(peer->conf.rib); 2638 if (peer->ribid == RIB_FAILED) 2639 fatalx("King Bula's peer met an unknown RIB"); 2640 peer->reconf_rib = 1; 2641 continue; 2642 } 2643 if (peer->conf.softreconfig_out && 2644 !rde_filter_equal(out_rules, out_rules_tmp, peer)) { 2645 peer->reconf_out = 1; 2646 } 2647 } 2648 /* bring ribs in sync */ 2649 for (rid = 0; rid < rib_size; rid++) { 2650 if (*ribs[rid].name == '\0') 2651 continue; 2652 rde_filter_calc_skip_steps(ribs[rid].in_rules_tmp); 2653 2654 /* flip rules, make new active */ 2655 fh = ribs[rid].in_rules; 2656 ribs[rid].in_rules = ribs[rid].in_rules_tmp; 2657 ribs[rid].in_rules_tmp = fh; 2658 2659 switch (ribs[rid].state) { 2660 case RECONF_DELETE: 2661 rib_free(&ribs[rid]); 2662 break; 2663 case RECONF_KEEP: 2664 if (rde_filter_equal(ribs[rid].in_rules, 2665 ribs[rid].in_rules_tmp, NULL)) 2666 /* rib is in sync */ 2667 break; 2668 ribs[rid].state = RECONF_RELOAD; 2669 /* FALLTHROUGH */ 2670 case RECONF_REINIT: 2671 rib_dump(&ribs[0], rde_softreconfig_in, &ribs[rid], 2672 AID_UNSPEC); 2673 break; 2674 case RECONF_RELOAD: 2675 log_warnx("Bad rib reload state"); 2676 /* FALLTHROUGH */ 2677 case RECONF_NONE: 2678 break; 2679 } 2680 } 2681 LIST_FOREACH(peer, &peerlist, peer_l) { 2682 if (peer->reconf_out) 2683 rib_dump(&ribs[peer->ribid], rde_softreconfig_out, 2684 peer, AID_UNSPEC); 2685 else if (peer->reconf_rib) 2686 /* dump the full table to neighbors that changed rib */ 2687 peer_dump(peer->conf.id, AID_UNSPEC); 2688 } 2689 filterlist_free(out_rules_tmp); 2690 out_rules_tmp = NULL; 2691 for (rid = 0; rid < rib_size; rid++) { 2692 if (*ribs[rid].name == '\0') 2693 continue; 2694 filterlist_free(ribs[rid].in_rules_tmp); 2695 ribs[rid].in_rules_tmp = NULL; 2696 ribs[rid].state = RECONF_NONE; 2697 } 2698 2699 log_info("RDE reconfigured"); 2700 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 2701 -1, NULL, 0); 2702 } 2703 2704 void 2705 rde_softreconfig_in(struct rib_entry *re, void *ptr) 2706 { 2707 struct rib *rib = ptr; 2708 struct prefix *p, *np; 2709 struct pt_entry *pt; 2710 struct rde_peer *peer; 2711 struct rde_aspath *asp, *oasp, *nasp; 2712 enum filter_actions oa, na; 2713 struct bgpd_addr addr; 2714 2715 pt = re->prefix; 2716 pt_getaddr(pt, &addr); 2717 for (p = LIST_FIRST(&re->prefix_h); p != NULL; p = np) { 2718 /* 2719 * prefix_remove() and path_update() may change the object 2720 * so cache the values. 2721 */ 2722 np = LIST_NEXT(p, rib_l); 2723 asp = p->aspath; 2724 peer = asp->peer; 2725 2726 /* check if prefix changed */ 2727 if (rib->state == RECONF_RELOAD) { 2728 oa = rde_filter(rib->in_rules_tmp, &oasp, peer, 2729 asp, &addr, pt->prefixlen, peer); 2730 oasp = oasp != NULL ? oasp : asp; 2731 } else { 2732 /* make sure we update everything for RECONF_REINIT */ 2733 oa = ACTION_DENY; 2734 oasp = asp; 2735 } 2736 na = rde_filter(rib->in_rules, &nasp, peer, asp, 2737 &addr, pt->prefixlen, peer); 2738 nasp = nasp != NULL ? nasp : asp; 2739 2740 /* go through all 4 possible combinations */ 2741 /* if (oa == ACTION_DENY && na == ACTION_DENY) */ 2742 /* nothing todo */ 2743 if (oa == ACTION_DENY && na == ACTION_ALLOW) { 2744 /* update Local-RIB */ 2745 path_update(rib, peer, nasp, &addr, pt->prefixlen); 2746 } else if (oa == ACTION_ALLOW && na == ACTION_DENY) { 2747 /* remove from Local-RIB */ 2748 prefix_remove(rib, peer, &addr, pt->prefixlen, 0); 2749 } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { 2750 if (path_compare(nasp, oasp) != 0) 2751 /* send update */ 2752 path_update(rib, peer, nasp, &addr, 2753 pt->prefixlen); 2754 } 2755 2756 if (oasp != asp) 2757 path_put(oasp); 2758 if (nasp != asp) 2759 path_put(nasp); 2760 } 2761 } 2762 2763 void 2764 rde_softreconfig_out(struct rib_entry *re, void *ptr) 2765 { 2766 struct prefix *p = re->active; 2767 struct pt_entry *pt; 2768 struct rde_peer *peer = ptr; 2769 struct rde_aspath *oasp, *nasp; 2770 enum filter_actions oa, na; 2771 struct bgpd_addr addr; 2772 2773 if (peer->conf.id == 0) 2774 fatalx("King Bula troubled by bad peer"); 2775 2776 if (p == NULL) 2777 return; 2778 2779 pt = re->prefix; 2780 pt_getaddr(pt, &addr); 2781 2782 if (up_test_update(peer, p) != 1) 2783 return; 2784 2785 oa = rde_filter(out_rules_tmp, &oasp, peer, p->aspath, 2786 &addr, pt->prefixlen, p->aspath->peer); 2787 na = rde_filter(out_rules, &nasp, peer, p->aspath, 2788 &addr, pt->prefixlen, p->aspath->peer); 2789 oasp = oasp != NULL ? oasp : p->aspath; 2790 nasp = nasp != NULL ? nasp : p->aspath; 2791 2792 /* go through all 4 possible combinations */ 2793 /* if (oa == ACTION_DENY && na == ACTION_DENY) */ 2794 /* nothing todo */ 2795 if (oa == ACTION_DENY && na == ACTION_ALLOW) { 2796 /* send update */ 2797 up_generate(peer, nasp, &addr, pt->prefixlen); 2798 } else if (oa == ACTION_ALLOW && na == ACTION_DENY) { 2799 /* send withdraw */ 2800 up_generate(peer, NULL, &addr, pt->prefixlen); 2801 } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { 2802 /* send update if path attributes changed */ 2803 if (path_compare(nasp, oasp) != 0) 2804 up_generate(peer, nasp, &addr, pt->prefixlen); 2805 } 2806 2807 if (oasp != p->aspath) 2808 path_put(oasp); 2809 if (nasp != p->aspath) 2810 path_put(nasp); 2811 } 2812 2813 void 2814 rde_softreconfig_unload_peer(struct rib_entry *re, void *ptr) 2815 { 2816 struct rde_peer *peer = ptr; 2817 struct prefix *p = re->active; 2818 struct pt_entry *pt; 2819 struct rde_aspath *oasp; 2820 enum filter_actions oa; 2821 struct bgpd_addr addr; 2822 2823 pt = re->prefix; 2824 pt_getaddr(pt, &addr); 2825 2826 /* check if prefix was announced */ 2827 if (up_test_update(peer, p) != 1) 2828 return; 2829 2830 oa = rde_filter(out_rules_tmp, &oasp, peer, p->aspath, 2831 &addr, pt->prefixlen, p->aspath->peer); 2832 oasp = oasp != NULL ? oasp : p->aspath; 2833 2834 if (oa == ACTION_DENY) 2835 /* nothing todo */ 2836 goto done; 2837 2838 /* send withdraw */ 2839 up_generate(peer, NULL, &addr, pt->prefixlen); 2840 done: 2841 if (oasp != p->aspath) 2842 path_put(oasp); 2843 } 2844 2845 /* 2846 * update specific functions 2847 */ 2848 u_char queue_buf[4096]; 2849 2850 void 2851 rde_up_dump_upcall(struct rib_entry *re, void *ptr) 2852 { 2853 struct rde_peer *peer = ptr; 2854 2855 if (re->ribid != peer->ribid) 2856 fatalx("King Bula: monstrous evil horror."); 2857 if (re->active == NULL) 2858 return; 2859 up_generate_updates(out_rules, peer, re->active, NULL); 2860 } 2861 2862 void 2863 rde_generate_updates(u_int16_t ribid, struct prefix *new, struct prefix *old) 2864 { 2865 struct rde_peer *peer; 2866 2867 /* 2868 * If old is != NULL we know it was active and should be removed. 2869 * If new is != NULL we know it is reachable and then we should 2870 * generate an update. 2871 */ 2872 if (old == NULL && new == NULL) 2873 return; 2874 2875 LIST_FOREACH(peer, &peerlist, peer_l) { 2876 if (peer->conf.id == 0) 2877 continue; 2878 if (peer->ribid != ribid) 2879 continue; 2880 if (peer->state != PEER_UP) 2881 continue; 2882 up_generate_updates(out_rules, peer, new, old); 2883 } 2884 } 2885 2886 void 2887 rde_update_queue_runner(void) 2888 { 2889 struct rde_peer *peer; 2890 int r, sent, max = RDE_RUNNER_ROUNDS, eor = 0; 2891 u_int16_t len, wd_len, wpos; 2892 2893 len = sizeof(queue_buf) - MSGSIZE_HEADER; 2894 do { 2895 sent = 0; 2896 LIST_FOREACH(peer, &peerlist, peer_l) { 2897 if (peer->conf.id == 0) 2898 continue; 2899 if (peer->state != PEER_UP) 2900 continue; 2901 /* first withdraws */ 2902 wpos = 2; /* reserve space for the length field */ 2903 r = up_dump_prefix(queue_buf + wpos, len - wpos - 2, 2904 &peer->withdraws[AID_INET], peer); 2905 wd_len = r; 2906 /* write withdraws length filed */ 2907 wd_len = htons(wd_len); 2908 memcpy(queue_buf, &wd_len, 2); 2909 wpos += r; 2910 2911 /* now bgp path attributes */ 2912 r = up_dump_attrnlri(queue_buf + wpos, len - wpos, 2913 peer); 2914 switch (r) { 2915 case -1: 2916 eor = 1; 2917 if (wd_len == 0) { 2918 /* no withdraws queued just send EoR */ 2919 peer_send_eor(peer, AID_INET); 2920 continue; 2921 } 2922 break; 2923 case 2: 2924 if (wd_len == 0) { 2925 /* 2926 * No packet to send. No withdraws and 2927 * no path attributes. Skip. 2928 */ 2929 continue; 2930 } 2931 /* FALLTHROUGH */ 2932 default: 2933 wpos += r; 2934 break; 2935 } 2936 2937 /* finally send message to SE */ 2938 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 2939 0, -1, queue_buf, wpos) == -1) 2940 fatal("%s %d imsg_compose error", __func__, 2941 __LINE__); 2942 sent++; 2943 if (eor) { 2944 eor = 0; 2945 peer_send_eor(peer, AID_INET); 2946 } 2947 } 2948 max -= sent; 2949 } while (sent != 0 && max > 0); 2950 } 2951 2952 void 2953 rde_update6_queue_runner(u_int8_t aid) 2954 { 2955 struct rde_peer *peer; 2956 u_char *b; 2957 int r, sent, max = RDE_RUNNER_ROUNDS / 2; 2958 u_int16_t len; 2959 2960 /* first withdraws ... */ 2961 do { 2962 sent = 0; 2963 LIST_FOREACH(peer, &peerlist, peer_l) { 2964 if (peer->conf.id == 0) 2965 continue; 2966 if (peer->state != PEER_UP) 2967 continue; 2968 len = sizeof(queue_buf) - MSGSIZE_HEADER; 2969 b = up_dump_mp_unreach(queue_buf, &len, peer, aid); 2970 2971 if (b == NULL) 2972 continue; 2973 /* finally send message to SE */ 2974 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 2975 0, -1, b, len) == -1) 2976 fatal("%s %d imsg_compose error", __func__, 2977 __LINE__); 2978 sent++; 2979 } 2980 max -= sent; 2981 } while (sent != 0 && max > 0); 2982 2983 /* ... then updates */ 2984 max = RDE_RUNNER_ROUNDS / 2; 2985 do { 2986 sent = 0; 2987 LIST_FOREACH(peer, &peerlist, peer_l) { 2988 if (peer->conf.id == 0) 2989 continue; 2990 if (peer->state != PEER_UP) 2991 continue; 2992 len = sizeof(queue_buf) - MSGSIZE_HEADER; 2993 r = up_dump_mp_reach(queue_buf, &len, peer, aid); 2994 switch (r) { 2995 case -2: 2996 continue; 2997 case -1: 2998 peer_send_eor(peer, aid); 2999 continue; 3000 default: 3001 b = queue_buf + r; 3002 break; 3003 } 3004 3005 /* finally send message to SE */ 3006 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3007 0, -1, b, len) == -1) 3008 fatal("%s %d imsg_compose error", __func__, 3009 __LINE__); 3010 sent++; 3011 } 3012 max -= sent; 3013 } while (sent != 0 && max > 0); 3014 } 3015 3016 /* 3017 * generic helper function 3018 */ 3019 u_int32_t 3020 rde_local_as(void) 3021 { 3022 return (conf->as); 3023 } 3024 3025 int 3026 rde_noevaluate(void) 3027 { 3028 /* do not run while cleaning up */ 3029 if (rde_quit) 3030 return (1); 3031 3032 return (conf->flags & BGPD_FLAG_NO_EVALUATE); 3033 } 3034 3035 int 3036 rde_decisionflags(void) 3037 { 3038 return (conf->flags & BGPD_FLAG_DECISION_MASK); 3039 } 3040 3041 int 3042 rde_as4byte(struct rde_peer *peer) 3043 { 3044 return (peer->capa.as4byte); 3045 } 3046 3047 /* 3048 * peer functions 3049 */ 3050 struct peer_table { 3051 struct rde_peer_head *peer_hashtbl; 3052 u_int32_t peer_hashmask; 3053 } peertable; 3054 3055 #define PEER_HASH(x) \ 3056 &peertable.peer_hashtbl[(x) & peertable.peer_hashmask] 3057 3058 void 3059 peer_init(u_int32_t hashsize) 3060 { 3061 struct peer_config pc; 3062 u_int32_t hs, i; 3063 3064 for (hs = 1; hs < hashsize; hs <<= 1) 3065 ; 3066 peertable.peer_hashtbl = calloc(hs, sizeof(struct rde_peer_head)); 3067 if (peertable.peer_hashtbl == NULL) 3068 fatal("peer_init"); 3069 3070 for (i = 0; i < hs; i++) 3071 LIST_INIT(&peertable.peer_hashtbl[i]); 3072 LIST_INIT(&peerlist); 3073 3074 peertable.peer_hashmask = hs - 1; 3075 3076 bzero(&pc, sizeof(pc)); 3077 snprintf(pc.descr, sizeof(pc.descr), "LOCAL"); 3078 3079 peerself = peer_add(0, &pc); 3080 if (peerself == NULL) 3081 fatalx("peer_init add self"); 3082 3083 peerself->state = PEER_UP; 3084 } 3085 3086 void 3087 peer_shutdown(void) 3088 { 3089 u_int32_t i; 3090 3091 for (i = 0; i <= peertable.peer_hashmask; i++) 3092 if (!LIST_EMPTY(&peertable.peer_hashtbl[i])) 3093 log_warnx("peer_free: free non-free table"); 3094 3095 free(peertable.peer_hashtbl); 3096 } 3097 3098 struct rde_peer * 3099 peer_get(u_int32_t id) 3100 { 3101 struct rde_peer_head *head; 3102 struct rde_peer *peer; 3103 3104 head = PEER_HASH(id); 3105 3106 LIST_FOREACH(peer, head, hash_l) { 3107 if (peer->conf.id == id) 3108 return (peer); 3109 } 3110 return (NULL); 3111 } 3112 3113 struct rde_peer * 3114 peer_add(u_int32_t id, struct peer_config *p_conf) 3115 { 3116 struct rde_peer_head *head; 3117 struct rde_peer *peer; 3118 3119 if ((peer = peer_get(id))) { 3120 memcpy(&peer->conf, p_conf, sizeof(struct peer_config)); 3121 return (NULL); 3122 } 3123 3124 peer = calloc(1, sizeof(struct rde_peer)); 3125 if (peer == NULL) 3126 fatal("peer_add"); 3127 3128 LIST_INIT(&peer->path_h); 3129 memcpy(&peer->conf, p_conf, sizeof(struct peer_config)); 3130 peer->remote_bgpid = 0; 3131 peer->ribid = rib_find(peer->conf.rib); 3132 if (peer->ribid == RIB_FAILED) 3133 fatalx("King Bula's new peer met an unknown RIB"); 3134 peer->state = PEER_NONE; 3135 up_init(peer); 3136 3137 head = PEER_HASH(id); 3138 3139 LIST_INSERT_HEAD(head, peer, hash_l); 3140 LIST_INSERT_HEAD(&peerlist, peer, peer_l); 3141 3142 return (peer); 3143 } 3144 3145 int 3146 peer_localaddrs(struct rde_peer *peer, struct bgpd_addr *laddr) 3147 { 3148 struct ifaddrs *ifap, *ifa, *match; 3149 3150 if (getifaddrs(&ifap) == -1) 3151 fatal("getifaddrs"); 3152 3153 for (match = ifap; match != NULL; match = match->ifa_next) 3154 if (sa_cmp(laddr, match->ifa_addr) == 0) 3155 break; 3156 3157 if (match == NULL) { 3158 log_warnx("peer_localaddrs: local address not found"); 3159 return (-1); 3160 } 3161 3162 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 3163 if (ifa->ifa_addr->sa_family == AF_INET && 3164 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 3165 if (ifa->ifa_addr->sa_family == 3166 match->ifa_addr->sa_family) 3167 ifa = match; 3168 sa2addr(ifa->ifa_addr, &peer->local_v4_addr); 3169 break; 3170 } 3171 } 3172 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 3173 if (ifa->ifa_addr->sa_family == AF_INET6 && 3174 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 3175 /* 3176 * only accept global scope addresses except explicitly 3177 * specified. 3178 */ 3179 if (ifa->ifa_addr->sa_family == 3180 match->ifa_addr->sa_family) 3181 ifa = match; 3182 else if (IN6_IS_ADDR_LINKLOCAL( 3183 &((struct sockaddr_in6 *)ifa-> 3184 ifa_addr)->sin6_addr) || 3185 IN6_IS_ADDR_SITELOCAL( 3186 &((struct sockaddr_in6 *)ifa-> 3187 ifa_addr)->sin6_addr)) 3188 continue; 3189 sa2addr(ifa->ifa_addr, &peer->local_v6_addr); 3190 break; 3191 } 3192 } 3193 3194 freeifaddrs(ifap); 3195 return (0); 3196 } 3197 3198 void 3199 peer_up(u_int32_t id, struct session_up *sup) 3200 { 3201 struct rde_peer *peer; 3202 u_int8_t i; 3203 3204 peer = peer_get(id); 3205 if (peer == NULL) { 3206 log_warnx("peer_up: unknown peer id %d", id); 3207 return; 3208 } 3209 3210 if (peer->state != PEER_DOWN && peer->state != PEER_NONE && 3211 peer->state != PEER_UP) { 3212 /* 3213 * There is a race condition when doing PEER_ERR -> PEER_DOWN. 3214 * So just do a full reset of the peer here. 3215 */ 3216 for (i = 0; i < AID_MAX; i++) { 3217 peer->staletime[i] = 0; 3218 peer_flush(peer, i); 3219 } 3220 up_down(peer); 3221 peer->prefix_cnt = 0; 3222 peer->state = PEER_DOWN; 3223 } 3224 peer->remote_bgpid = ntohl(sup->remote_bgpid); 3225 peer->short_as = sup->short_as; 3226 memcpy(&peer->remote_addr, &sup->remote_addr, 3227 sizeof(peer->remote_addr)); 3228 memcpy(&peer->capa, &sup->capa, sizeof(peer->capa)); 3229 3230 if (peer_localaddrs(peer, &sup->local_addr)) { 3231 peer->state = PEER_DOWN; 3232 imsg_compose(ibuf_se, IMSG_SESSION_DOWN, id, 0, -1, NULL, 0); 3233 return; 3234 } 3235 3236 peer->state = PEER_UP; 3237 up_init(peer); 3238 3239 if (rde_noevaluate()) 3240 /* 3241 * no need to dump the table to the peer, there are no active 3242 * prefixes anyway. This is a speed up hack. 3243 */ 3244 return; 3245 3246 for (i = 0; i < AID_MAX; i++) { 3247 if (peer->capa.mp[i]) 3248 peer_dump(id, i); 3249 } 3250 } 3251 3252 void 3253 peer_down(u_int32_t id) 3254 { 3255 struct rde_peer *peer; 3256 struct rde_aspath *asp, *nasp; 3257 3258 peer = peer_get(id); 3259 if (peer == NULL) { 3260 log_warnx("peer_down: unknown peer id %d", id); 3261 return; 3262 } 3263 peer->remote_bgpid = 0; 3264 peer->state = PEER_DOWN; 3265 up_down(peer); 3266 3267 /* walk through per peer RIB list and remove all prefixes. */ 3268 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { 3269 nasp = LIST_NEXT(asp, peer_l); 3270 path_remove(asp); 3271 } 3272 LIST_INIT(&peer->path_h); 3273 peer->prefix_cnt = 0; 3274 3275 /* Deletions are performed in path_remove() */ 3276 rde_send_pftable_commit(); 3277 3278 LIST_REMOVE(peer, hash_l); 3279 LIST_REMOVE(peer, peer_l); 3280 free(peer); 3281 } 3282 3283 /* 3284 * Flush all routes older then staletime. If staletime is 0 all routes will 3285 * be flushed. 3286 */ 3287 void 3288 peer_flush(struct rde_peer *peer, u_int8_t aid) 3289 { 3290 struct rde_aspath *asp, *nasp; 3291 u_int32_t rprefixes; 3292 3293 rprefixes = 0; 3294 /* walk through per peer RIB list and remove all stale prefixes. */ 3295 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { 3296 nasp = LIST_NEXT(asp, peer_l); 3297 rprefixes += path_remove_stale(asp, aid); 3298 } 3299 3300 /* Deletions are performed in path_remove() */ 3301 rde_send_pftable_commit(); 3302 3303 /* flushed no need to keep staletime */ 3304 peer->staletime[aid] = 0; 3305 3306 if (peer->prefix_cnt > rprefixes) 3307 peer->prefix_cnt -= rprefixes; 3308 else 3309 peer->prefix_cnt = 0; 3310 } 3311 3312 void 3313 peer_stale(u_int32_t id, u_int8_t aid) 3314 { 3315 struct rde_peer *peer; 3316 time_t now; 3317 3318 peer = peer_get(id); 3319 if (peer == NULL) { 3320 log_warnx("peer_stale: unknown peer id %d", id); 3321 return; 3322 } 3323 3324 /* flush the now even staler routes out */ 3325 if (peer->staletime[aid]) 3326 peer_flush(peer, aid); 3327 peer->staletime[aid] = now = time(NULL); 3328 3329 /* make sure new prefixes start on a higher timestamp */ 3330 do { 3331 sleep(1); 3332 } while (now >= time(NULL)); 3333 } 3334 3335 void 3336 peer_dump(u_int32_t id, u_int8_t aid) 3337 { 3338 struct rde_peer *peer; 3339 3340 peer = peer_get(id); 3341 if (peer == NULL) { 3342 log_warnx("peer_dump: unknown peer id %d", id); 3343 return; 3344 } 3345 3346 if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE) 3347 up_generate_default(out_rules, peer, aid); 3348 else 3349 rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid); 3350 if (peer->capa.grestart.restart) 3351 up_generate_marker(peer, aid); 3352 } 3353 3354 /* End-of-RIB marker, RFC 4724 */ 3355 void 3356 peer_recv_eor(struct rde_peer *peer, u_int8_t aid) 3357 { 3358 peer->prefix_rcvd_eor++; 3359 3360 /* 3361 * First notify SE to avert a possible race with the restart timeout. 3362 * If the timeout fires before this imsg is processed by the SE it will 3363 * result in the same operation since the timeout issues a FLUSH which 3364 * does the same as the RESTARTED action (flushing stale routes). 3365 * The logic in the SE is so that only one of FLUSH or RESTARTED will 3366 * be sent back to the RDE and so peer_flush is only called once. 3367 */ 3368 if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id, 3369 0, -1, &aid, sizeof(aid)) == -1) 3370 fatal("%s %d imsg_compose error", __func__, __LINE__); 3371 } 3372 3373 void 3374 peer_send_eor(struct rde_peer *peer, u_int8_t aid) 3375 { 3376 u_int16_t afi; 3377 u_int8_t safi; 3378 3379 peer->prefix_sent_eor++; 3380 3381 if (aid == AID_INET) { 3382 u_char null[4]; 3383 3384 bzero(&null, 4); 3385 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3386 0, -1, &null, 4) == -1) 3387 fatal("%s %d imsg_compose error in peer_send_eor", 3388 __func__, __LINE__); 3389 } else { 3390 u_int16_t i; 3391 u_char buf[10]; 3392 3393 if (aid2afi(aid, &afi, &safi) == -1) 3394 fatalx("peer_send_eor: bad AID"); 3395 3396 i = 0; /* v4 withdrawn len */ 3397 bcopy(&i, &buf[0], sizeof(i)); 3398 i = htons(6); /* path attr len */ 3399 bcopy(&i, &buf[2], sizeof(i)); 3400 buf[4] = ATTR_OPTIONAL; 3401 buf[5] = ATTR_MP_UNREACH_NLRI; 3402 buf[6] = 3; /* withdrawn len */ 3403 i = htons(afi); 3404 bcopy(&i, &buf[7], sizeof(i)); 3405 buf[9] = safi; 3406 3407 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3408 0, -1, &buf, 10) == -1) 3409 fatal("%s %d imsg_compose error in peer_send_eor", 3410 __func__, __LINE__); 3411 } 3412 } 3413 3414 /* 3415 * network announcement stuff 3416 */ 3417 void 3418 network_add(struct network_config *nc, int flagstatic) 3419 { 3420 struct rdomain *rd; 3421 struct rde_aspath *asp; 3422 struct filter_set_head *vpnset = NULL; 3423 in_addr_t prefix4; 3424 u_int16_t i; 3425 3426 if (nc->rtableid) { 3427 SIMPLEQ_FOREACH(rd, rdomains_l, entry) { 3428 if (rd->rtableid != nc->rtableid) 3429 continue; 3430 switch (nc->prefix.aid) { 3431 case AID_INET: 3432 prefix4 = nc->prefix.v4.s_addr; 3433 bzero(&nc->prefix, sizeof(nc->prefix)); 3434 nc->prefix.aid = AID_VPN_IPv4; 3435 nc->prefix.vpn4.rd = rd->rd; 3436 nc->prefix.vpn4.addr.s_addr = prefix4; 3437 nc->prefix.vpn4.labellen = 3; 3438 nc->prefix.vpn4.labelstack[0] = 3439 (rd->label >> 12) & 0xff; 3440 nc->prefix.vpn4.labelstack[1] = 3441 (rd->label >> 4) & 0xff; 3442 nc->prefix.vpn4.labelstack[2] = 3443 (rd->label << 4) & 0xf0; 3444 nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS; 3445 vpnset = &rd->export; 3446 break; 3447 default: 3448 log_warnx("unable to VPNize prefix"); 3449 filterset_free(&nc->attrset); 3450 return; 3451 } 3452 break; 3453 } 3454 if (rd == NULL) { 3455 log_warnx("network_add: " 3456 "prefix %s/%u in non-existing rdomain %u", 3457 log_addr(&nc->prefix), nc->prefixlen, nc->rtableid); 3458 return; 3459 } 3460 } 3461 3462 if (nc->type == NETWORK_MRTCLONE) { 3463 asp = nc->asp; 3464 } else { 3465 asp = path_get(); 3466 asp->aspath = aspath_get(NULL, 0); 3467 asp->origin = ORIGIN_IGP; 3468 asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH | 3469 F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED; 3470 /* the nexthop is unset unless a default set overrides it */ 3471 } 3472 if (!flagstatic) 3473 asp->flags |= F_ANN_DYNAMIC; 3474 rde_apply_set(asp, &nc->attrset, nc->prefix.aid, peerself, peerself); 3475 if (vpnset) 3476 rde_apply_set(asp, vpnset, nc->prefix.aid, peerself, peerself); 3477 for (i = 1; i < rib_size; i++) { 3478 if (*ribs[i].name == '\0') 3479 break; 3480 path_update(&ribs[i], peerself, asp, &nc->prefix, 3481 nc->prefixlen); 3482 } 3483 path_put(asp); 3484 filterset_free(&nc->attrset); 3485 } 3486 3487 void 3488 network_delete(struct network_config *nc, int flagstatic) 3489 { 3490 struct rdomain *rd; 3491 in_addr_t prefix4; 3492 u_int32_t flags = F_PREFIX_ANNOUNCED; 3493 u_int32_t i; 3494 3495 if (!flagstatic) 3496 flags |= F_ANN_DYNAMIC; 3497 3498 if (nc->rtableid) { 3499 SIMPLEQ_FOREACH(rd, rdomains_l, entry) { 3500 if (rd->rtableid != nc->rtableid) 3501 continue; 3502 switch (nc->prefix.aid) { 3503 case AID_INET: 3504 prefix4 = nc->prefix.v4.s_addr; 3505 bzero(&nc->prefix, sizeof(nc->prefix)); 3506 nc->prefix.aid = AID_VPN_IPv4; 3507 nc->prefix.vpn4.rd = rd->rd; 3508 nc->prefix.vpn4.addr.s_addr = prefix4; 3509 nc->prefix.vpn4.labellen = 3; 3510 nc->prefix.vpn4.labelstack[0] = 3511 (rd->label >> 12) & 0xff; 3512 nc->prefix.vpn4.labelstack[1] = 3513 (rd->label >> 4) & 0xff; 3514 nc->prefix.vpn4.labelstack[2] = 3515 (rd->label << 4) & 0xf0; 3516 nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS; 3517 break; 3518 default: 3519 log_warnx("unable to VPNize prefix"); 3520 return; 3521 } 3522 } 3523 } 3524 3525 for (i = rib_size - 1; i > 0; i--) { 3526 if (*ribs[i].name == '\0') 3527 break; 3528 prefix_remove(&ribs[i], peerself, &nc->prefix, nc->prefixlen, 3529 flags); 3530 } 3531 } 3532 3533 void 3534 network_dump_upcall(struct rib_entry *re, void *ptr) 3535 { 3536 struct prefix *p; 3537 struct kroute_full k; 3538 struct bgpd_addr addr; 3539 struct rde_dump_ctx *ctx = ptr; 3540 3541 LIST_FOREACH(p, &re->prefix_h, rib_l) { 3542 if (!(p->aspath->flags & F_PREFIX_ANNOUNCED)) 3543 continue; 3544 pt_getaddr(p->prefix, &addr); 3545 3546 bzero(&k, sizeof(k)); 3547 memcpy(&k.prefix, &addr, sizeof(k.prefix)); 3548 if (p->aspath->nexthop == NULL || 3549 p->aspath->nexthop->state != NEXTHOP_REACH) 3550 k.nexthop.aid = k.prefix.aid; 3551 else 3552 memcpy(&k.nexthop, &p->aspath->nexthop->true_nexthop, 3553 sizeof(k.nexthop)); 3554 k.prefixlen = p->prefix->prefixlen; 3555 k.flags = F_KERNEL; 3556 if ((p->aspath->flags & F_ANN_DYNAMIC) == 0) 3557 k.flags = F_STATIC; 3558 if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0, 3559 ctx->req.pid, -1, &k, sizeof(k)) == -1) 3560 log_warnx("network_dump_upcall: " 3561 "imsg_compose error"); 3562 } 3563 } 3564 3565 /* clean up */ 3566 void 3567 rde_shutdown(void) 3568 { 3569 struct rde_peer *p; 3570 u_int32_t i; 3571 3572 /* 3573 * the decision process is turned off if rde_quit = 1 and 3574 * rde_shutdown depends on this. 3575 */ 3576 3577 /* 3578 * All peers go down 3579 */ 3580 for (i = 0; i <= peertable.peer_hashmask; i++) 3581 while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL) 3582 peer_down(p->conf.id); 3583 3584 /* free filters */ 3585 filterlist_free(out_rules); 3586 for (i = 0; i < rib_size; i++) { 3587 if (*ribs[i].name == '\0') 3588 break; 3589 filterlist_free(ribs[i].in_rules); 3590 } 3591 3592 nexthop_shutdown(); 3593 path_shutdown(); 3594 aspath_shutdown(); 3595 attr_shutdown(); 3596 pt_shutdown(); 3597 peer_shutdown(); 3598 } 3599 3600 int 3601 sa_cmp(struct bgpd_addr *a, struct sockaddr *b) 3602 { 3603 struct sockaddr_in *in_b; 3604 struct sockaddr_in6 *in6_b; 3605 3606 if (aid2af(a->aid) != b->sa_family) 3607 return (1); 3608 3609 switch (b->sa_family) { 3610 case AF_INET: 3611 in_b = (struct sockaddr_in *)b; 3612 if (a->v4.s_addr != in_b->sin_addr.s_addr) 3613 return (1); 3614 break; 3615 case AF_INET6: 3616 in6_b = (struct sockaddr_in6 *)b; 3617 #ifdef __KAME__ 3618 /* directly stolen from sbin/ifconfig/ifconfig.c */ 3619 if (IN6_IS_ADDR_LINKLOCAL(&in6_b->sin6_addr)) { 3620 in6_b->sin6_scope_id = 3621 ntohs(*(u_int16_t *)&in6_b->sin6_addr.s6_addr[2]); 3622 in6_b->sin6_addr.s6_addr[2] = 3623 in6_b->sin6_addr.s6_addr[3] = 0; 3624 } 3625 #endif 3626 if (bcmp(&a->v6, &in6_b->sin6_addr, 3627 sizeof(struct in6_addr))) 3628 return (1); 3629 break; 3630 default: 3631 fatal("king bula sez: unknown address family"); 3632 /* NOTREACHED */ 3633 } 3634 3635 return (0); 3636 } 3637