1 /* $OpenBSD: session.c,v 1.508 2025/01/22 12:19:47 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 22 #include <sys/mman.h> 23 #include <sys/socket.h> 24 #include <sys/time.h> 25 #include <sys/resource.h> 26 #include <sys/un.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <ifaddrs.h> 37 #include <poll.h> 38 #include <pwd.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <syslog.h> 44 #include <unistd.h> 45 46 #include "bgpd.h" 47 #include "session.h" 48 #include "log.h" 49 50 #define PFD_PIPE_MAIN 0 51 #define PFD_PIPE_ROUTE 1 52 #define PFD_PIPE_ROUTE_CTL 2 53 #define PFD_SOCK_CTL 3 54 #define PFD_SOCK_RCTL 4 55 #define PFD_LISTENERS_START 5 56 57 void session_sighdlr(int); 58 int setup_listeners(u_int *); 59 void init_peer(struct peer *); 60 void start_timer_holdtime(struct peer *); 61 void start_timer_sendholdtime(struct peer *); 62 void start_timer_keepalive(struct peer *); 63 void session_close_connection(struct peer *); 64 void change_state(struct peer *, enum session_state, enum session_events); 65 int session_setup_socket(struct peer *); 66 void session_accept(int); 67 int session_connect(struct peer *); 68 void session_tcp_established(struct peer *); 69 int session_capa_add(struct ibuf *, uint8_t, uint8_t); 70 struct ibuf *session_newmsg(enum msg_type, uint16_t); 71 void session_sendmsg(struct ibuf *, struct peer *, enum msg_type); 72 void session_open(struct peer *); 73 void session_keepalive(struct peer *); 74 void session_update(uint32_t, struct ibuf *); 75 void session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *); 76 void session_notification_data(struct peer *, uint8_t, uint8_t, void *, 77 size_t); 78 void session_rrefresh(struct peer *, uint8_t, uint8_t); 79 int session_graceful_restart(struct peer *); 80 int session_graceful_stop(struct peer *); 81 int session_dispatch_msg(struct pollfd *, struct peer *); 82 void session_process_msg(struct peer *); 83 struct ibuf *parse_header(struct ibuf *, void *, int *); 84 int parse_open(struct peer *, struct ibuf *); 85 int parse_update(struct peer *, struct ibuf *); 86 int parse_rrefresh(struct peer *, struct ibuf *); 87 void parse_notification(struct peer *, struct ibuf *); 88 int parse_capabilities(struct peer *, struct ibuf *, uint32_t *); 89 int capa_neg_calc(struct peer *); 90 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 91 void session_up(struct peer *); 92 void session_down(struct peer *); 93 int imsg_rde(int, uint32_t, void *, uint16_t); 94 void session_demote(struct peer *, int); 95 void merge_peers(struct bgpd_config *, struct bgpd_config *); 96 97 int la_cmp(struct listen_addr *, struct listen_addr *); 98 void session_template_clone(struct peer *, struct sockaddr *, 99 uint32_t, uint32_t); 100 int session_match_mask(struct peer *, struct bgpd_addr *); 101 102 static struct bgpd_config *conf, *nconf; 103 static struct imsgbuf *ibuf_rde; 104 static struct imsgbuf *ibuf_rde_ctl; 105 static struct imsgbuf *ibuf_main; 106 107 struct bgpd_sysdep sysdep; 108 volatile sig_atomic_t session_quit; 109 int pending_reconf; 110 int csock = -1, rcsock = -1; 111 u_int peer_cnt; 112 113 struct mrt_head mrthead; 114 time_t pauseaccept; 115 116 static const uint8_t marker[MSGSIZE_HEADER_MARKER] = { 117 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 118 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 119 }; 120 121 static inline int 122 peer_compare(const struct peer *a, const struct peer *b) 123 { 124 return a->conf.id - b->conf.id; 125 } 126 127 RB_GENERATE(peer_head, peer, entry, peer_compare); 128 129 void 130 session_sighdlr(int sig) 131 { 132 switch (sig) { 133 case SIGINT: 134 case SIGTERM: 135 session_quit = 1; 136 break; 137 } 138 } 139 140 int 141 setup_listeners(u_int *la_cnt) 142 { 143 int ttl = 255; 144 struct listen_addr *la; 145 u_int cnt = 0; 146 147 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 148 la->reconf = RECONF_NONE; 149 cnt++; 150 151 if (la->flags & LISTENER_LISTENING) 152 continue; 153 154 if (la->fd == -1) { 155 log_warn("cannot establish listener on %s: invalid fd", 156 log_sockaddr((struct sockaddr *)&la->sa, 157 la->sa_len)); 158 continue; 159 } 160 161 if (tcp_md5_prep_listener(la, &conf->peers) == -1) 162 fatal("tcp_md5_prep_listener"); 163 164 /* set ttl to 255 so that ttl-security works */ 165 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 166 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 167 log_warn("setup_listeners setsockopt TTL"); 168 continue; 169 } 170 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 171 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 172 log_warn("setup_listeners setsockopt hoplimit"); 173 continue; 174 } 175 176 if (listen(la->fd, MAX_BACKLOG)) { 177 close(la->fd); 178 fatal("listen"); 179 } 180 181 la->flags |= LISTENER_LISTENING; 182 183 log_info("listening on %s", 184 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len)); 185 } 186 187 *la_cnt = cnt; 188 189 return (0); 190 } 191 192 void 193 session_main(int debug, int verbose) 194 { 195 int timeout; 196 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 197 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 198 u_int listener_cnt, ctl_cnt, mrt_cnt; 199 u_int new_cnt; 200 struct passwd *pw; 201 struct peer *p, **peer_l = NULL, *next; 202 struct mrt *m, *xm, **mrt_l = NULL; 203 struct pollfd *pfd = NULL; 204 struct listen_addr *la; 205 void *newp; 206 time_t now; 207 short events; 208 209 log_init(debug, LOG_DAEMON); 210 log_setverbose(verbose); 211 212 log_procinit(log_procnames[PROC_SE]); 213 214 if ((pw = getpwnam(BGPD_USER)) == NULL) 215 fatal(NULL); 216 217 if (chroot(pw->pw_dir) == -1) 218 fatal("chroot"); 219 if (chdir("/") == -1) 220 fatal("chdir(\"/\")"); 221 222 setproctitle("session engine"); 223 224 if (setgroups(1, &pw->pw_gid) || 225 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 226 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 227 fatal("can't drop privileges"); 228 229 if (pledge("stdio inet recvfd", NULL) == -1) 230 fatal("pledge"); 231 232 signal(SIGTERM, session_sighdlr); 233 signal(SIGINT, session_sighdlr); 234 signal(SIGPIPE, SIG_IGN); 235 signal(SIGHUP, SIG_IGN); 236 signal(SIGALRM, SIG_IGN); 237 signal(SIGUSR1, SIG_IGN); 238 239 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 240 fatal(NULL); 241 if (imsgbuf_init(ibuf_main, 3) == -1 || 242 imsgbuf_set_maxsize(ibuf_main, MAX_BGPD_IMSGSIZE) == -1) 243 fatal(NULL); 244 imsgbuf_allow_fdpass(ibuf_main); 245 246 LIST_INIT(&mrthead); 247 listener_cnt = 0; 248 peer_cnt = 0; 249 ctl_cnt = 0; 250 251 conf = new_config(); 252 log_info("session engine ready"); 253 254 while (session_quit == 0) { 255 /* check for peers to be initialized or deleted */ 256 if (!pending_reconf) { 257 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 258 /* new peer that needs init? */ 259 if (p->state == STATE_NONE) 260 init_peer(p); 261 262 /* deletion due? */ 263 if (p->reconf_action == RECONF_DELETE) { 264 if (p->demoted) 265 session_demote(p, -1); 266 p->conf.demote_group[0] = 0; 267 session_stop(p, ERR_CEASE_PEER_UNCONF, 268 NULL); 269 timer_remove_all(&p->timers); 270 tcp_md5_del_listener(conf, p); 271 if (imsg_rde(IMSG_SESSION_DELETE, 272 p->conf.id, NULL, 0) == -1) 273 fatalx("imsg_compose error"); 274 msgbuf_free(p->wbuf); 275 RB_REMOVE(peer_head, &conf->peers, p); 276 log_peer_warnx(&p->conf, "removed"); 277 free(p); 278 peer_cnt--; 279 continue; 280 } 281 p->reconf_action = RECONF_NONE; 282 } 283 } 284 285 if (peer_cnt > peer_l_elms) { 286 if ((newp = reallocarray(peer_l, peer_cnt, 287 sizeof(struct peer *))) == NULL) { 288 /* panic for now */ 289 log_warn("could not resize peer_l from %u -> %u" 290 " entries", peer_l_elms, peer_cnt); 291 fatalx("exiting"); 292 } 293 peer_l = newp; 294 peer_l_elms = peer_cnt; 295 } 296 297 mrt_cnt = 0; 298 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 299 xm = LIST_NEXT(m, entry); 300 if (m->state == MRT_STATE_REMOVE) { 301 mrt_clean(m); 302 LIST_REMOVE(m, entry); 303 free(m); 304 continue; 305 } 306 if (msgbuf_queuelen(m->wbuf) > 0) 307 mrt_cnt++; 308 } 309 310 if (mrt_cnt > mrt_l_elms) { 311 if ((newp = reallocarray(mrt_l, mrt_cnt, 312 sizeof(struct mrt *))) == NULL) { 313 /* panic for now */ 314 log_warn("could not resize mrt_l from %u -> %u" 315 " entries", mrt_l_elms, mrt_cnt); 316 fatalx("exiting"); 317 } 318 mrt_l = newp; 319 mrt_l_elms = mrt_cnt; 320 } 321 322 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 323 ctl_cnt + mrt_cnt; 324 if (new_cnt > pfd_elms) { 325 if ((newp = reallocarray(pfd, new_cnt, 326 sizeof(struct pollfd))) == NULL) { 327 /* panic for now */ 328 log_warn("could not resize pfd from %u -> %u" 329 " entries", pfd_elms, new_cnt); 330 fatalx("exiting"); 331 } 332 pfd = newp; 333 pfd_elms = new_cnt; 334 } 335 336 memset(pfd, 0, sizeof(struct pollfd) * pfd_elms); 337 338 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 339 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 340 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 341 342 if (pauseaccept == 0) { 343 pfd[PFD_SOCK_CTL].fd = csock; 344 pfd[PFD_SOCK_CTL].events = POLLIN; 345 pfd[PFD_SOCK_RCTL].fd = rcsock; 346 pfd[PFD_SOCK_RCTL].events = POLLIN; 347 } else { 348 pfd[PFD_SOCK_CTL].fd = -1; 349 pfd[PFD_SOCK_RCTL].fd = -1; 350 } 351 352 i = PFD_LISTENERS_START; 353 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 354 if (pauseaccept == 0) { 355 pfd[i].fd = la->fd; 356 pfd[i].events = POLLIN; 357 } else 358 pfd[i].fd = -1; 359 i++; 360 } 361 idx_listeners = i; 362 timeout = 240; /* loop every 240s at least */ 363 364 now = getmonotime(); 365 RB_FOREACH(p, peer_head, &conf->peers) { 366 time_t nextaction; 367 struct timer *pt; 368 369 /* check timers */ 370 if ((pt = timer_nextisdue(&p->timers, now)) != NULL) { 371 switch (pt->type) { 372 case Timer_Hold: 373 bgp_fsm(p, EVNT_TIMER_HOLDTIME, NULL); 374 break; 375 case Timer_SendHold: 376 bgp_fsm(p, EVNT_TIMER_SENDHOLD, NULL); 377 break; 378 case Timer_ConnectRetry: 379 bgp_fsm(p, EVNT_TIMER_CONNRETRY, NULL); 380 break; 381 case Timer_Keepalive: 382 bgp_fsm(p, EVNT_TIMER_KEEPALIVE, NULL); 383 break; 384 case Timer_IdleHold: 385 bgp_fsm(p, EVNT_START, NULL); 386 break; 387 case Timer_IdleHoldReset: 388 p->IdleHoldTime = 389 INTERVAL_IDLE_HOLD_INITIAL; 390 p->errcnt = 0; 391 timer_stop(&p->timers, 392 Timer_IdleHoldReset); 393 break; 394 case Timer_CarpUndemote: 395 timer_stop(&p->timers, 396 Timer_CarpUndemote); 397 if (p->demoted && 398 p->state == STATE_ESTABLISHED) 399 session_demote(p, -1); 400 break; 401 case Timer_RestartTimeout: 402 timer_stop(&p->timers, 403 Timer_RestartTimeout); 404 session_graceful_stop(p); 405 break; 406 case Timer_SessionDown: 407 timer_stop(&p->timers, 408 Timer_SessionDown); 409 410 if (imsg_rde(IMSG_SESSION_DELETE, 411 p->conf.id, NULL, 0) == -1) 412 fatalx("imsg_compose error"); 413 p->rdesession = 0; 414 415 /* finally delete this cloned peer */ 416 if (p->template) 417 p->reconf_action = 418 RECONF_DELETE; 419 break; 420 default: 421 fatalx("King Bula lost in time"); 422 } 423 } 424 if ((nextaction = timer_nextduein(&p->timers, 425 now)) != -1 && nextaction < timeout) 426 timeout = nextaction; 427 428 /* are we waiting for a write? */ 429 events = POLLIN; 430 if (msgbuf_queuelen(p->wbuf) > 0 || 431 p->state == STATE_CONNECT) 432 events |= POLLOUT; 433 /* is there still work to do? */ 434 if (p->rpending) 435 timeout = 0; 436 437 /* poll events */ 438 if (p->fd != -1 && events != 0) { 439 pfd[i].fd = p->fd; 440 pfd[i].events = events; 441 peer_l[i - idx_listeners] = p; 442 i++; 443 } 444 } 445 446 idx_peers = i; 447 448 LIST_FOREACH(m, &mrthead, entry) 449 if (msgbuf_queuelen(m->wbuf) > 0) { 450 pfd[i].fd = m->fd; 451 pfd[i].events = POLLOUT; 452 mrt_l[i - idx_peers] = m; 453 i++; 454 } 455 456 idx_mrts = i; 457 458 i += control_fill_pfds(pfd + i, pfd_elms -i); 459 460 if (i > pfd_elms) 461 fatalx("poll pfd overflow"); 462 463 if (pauseaccept && timeout > 1) 464 timeout = 1; 465 if (timeout < 0) 466 timeout = 0; 467 if (poll(pfd, i, timeout * 1000) == -1) { 468 if (errno == EINTR) 469 continue; 470 fatal("poll error"); 471 } 472 473 /* 474 * If we previously saw fd exhaustion, we stop accept() 475 * for 1 second to throttle the accept() loop. 476 */ 477 if (pauseaccept && getmonotime() > pauseaccept + 1) 478 pauseaccept = 0; 479 480 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 481 log_warnx("SE: Lost connection to parent"); 482 session_quit = 1; 483 continue; 484 } else 485 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 486 &listener_cnt); 487 488 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 489 log_warnx("SE: Lost connection to RDE"); 490 imsgbuf_clear(ibuf_rde); 491 free(ibuf_rde); 492 ibuf_rde = NULL; 493 } else 494 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 495 &listener_cnt); 496 497 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 498 -1) { 499 log_warnx("SE: Lost connection to RDE control"); 500 imsgbuf_clear(ibuf_rde_ctl); 501 free(ibuf_rde_ctl); 502 ibuf_rde_ctl = NULL; 503 } else 504 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 505 &listener_cnt); 506 507 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 508 ctl_cnt += control_accept(csock, 0); 509 510 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 511 ctl_cnt += control_accept(rcsock, 1); 512 513 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 514 if (pfd[j].revents & POLLIN) 515 session_accept(pfd[j].fd); 516 517 for (; j < idx_peers; j++) 518 session_dispatch_msg(&pfd[j], 519 peer_l[j - idx_listeners]); 520 521 RB_FOREACH(p, peer_head, &conf->peers) 522 session_process_msg(p); 523 524 for (; j < idx_mrts; j++) 525 if (pfd[j].revents & POLLOUT) 526 mrt_write(mrt_l[j - idx_peers]); 527 528 for (; j < i; j++) 529 ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers); 530 } 531 532 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 533 session_stop(p, ERR_CEASE_ADMIN_DOWN, "bgpd shutting down"); 534 timer_remove_all(&p->timers); 535 tcp_md5_del_listener(conf, p); 536 RB_REMOVE(peer_head, &conf->peers, p); 537 free(p); 538 } 539 540 while ((m = LIST_FIRST(&mrthead)) != NULL) { 541 mrt_clean(m); 542 LIST_REMOVE(m, entry); 543 free(m); 544 } 545 546 free_config(conf); 547 free(peer_l); 548 free(mrt_l); 549 free(pfd); 550 551 /* close pipes */ 552 if (ibuf_rde) { 553 imsgbuf_write(ibuf_rde); 554 imsgbuf_clear(ibuf_rde); 555 close(ibuf_rde->fd); 556 free(ibuf_rde); 557 } 558 if (ibuf_rde_ctl) { 559 imsgbuf_clear(ibuf_rde_ctl); 560 close(ibuf_rde_ctl->fd); 561 free(ibuf_rde_ctl); 562 } 563 imsgbuf_write(ibuf_main); 564 imsgbuf_clear(ibuf_main); 565 close(ibuf_main->fd); 566 free(ibuf_main); 567 568 control_shutdown(csock); 569 control_shutdown(rcsock); 570 log_info("session engine exiting"); 571 exit(0); 572 } 573 574 void 575 init_peer(struct peer *p) 576 { 577 TAILQ_INIT(&p->timers); 578 p->fd = -1; 579 if (p->wbuf != NULL) 580 fatalx("%s: msgbuf already set", __func__); 581 if ((p->wbuf = msgbuf_new_reader(MSGSIZE_HEADER, parse_header, p)) == 582 NULL) 583 fatal(NULL); 584 585 if (p->conf.if_depend[0]) 586 imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1, 587 p->conf.if_depend, sizeof(p->conf.if_depend)); 588 else 589 p->depend_ok = 1; 590 591 peer_cnt++; 592 593 change_state(p, STATE_IDLE, EVNT_NONE); 594 if (p->conf.down) 595 timer_stop(&p->timers, Timer_IdleHold); /* no autostart */ 596 else 597 timer_set(&p->timers, Timer_IdleHold, SESSION_CLEAR_DELAY); 598 599 p->stats.last_updown = getmonotime(); 600 601 /* 602 * on startup, demote if requested. 603 * do not handle new peers. they must reach ESTABLISHED beforehand. 604 * peers added at runtime have reconf_action set to RECONF_REINIT. 605 */ 606 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 607 session_demote(p, +1); 608 } 609 610 void 611 bgp_fsm(struct peer *peer, enum session_events event, struct ibuf *msg) 612 { 613 switch (peer->state) { 614 case STATE_NONE: 615 /* nothing */ 616 break; 617 case STATE_IDLE: 618 switch (event) { 619 case EVNT_START: 620 timer_stop(&peer->timers, Timer_Hold); 621 timer_stop(&peer->timers, Timer_SendHold); 622 timer_stop(&peer->timers, Timer_Keepalive); 623 timer_stop(&peer->timers, Timer_IdleHold); 624 625 if (!peer->depend_ok) 626 timer_stop(&peer->timers, Timer_ConnectRetry); 627 else if (peer->passive || peer->conf.passive || 628 peer->conf.template) { 629 change_state(peer, STATE_ACTIVE, event); 630 timer_stop(&peer->timers, Timer_ConnectRetry); 631 } else { 632 change_state(peer, STATE_CONNECT, event); 633 timer_set(&peer->timers, Timer_ConnectRetry, 634 conf->connectretry); 635 session_connect(peer); 636 } 637 peer->passive = 0; 638 break; 639 case EVNT_STOP: 640 timer_stop(&peer->timers, Timer_IdleHold); 641 break; 642 default: 643 /* ignore */ 644 break; 645 } 646 break; 647 case STATE_CONNECT: 648 switch (event) { 649 case EVNT_START: 650 /* ignore */ 651 break; 652 case EVNT_CON_OPEN: 653 session_tcp_established(peer); 654 session_open(peer); 655 timer_stop(&peer->timers, Timer_ConnectRetry); 656 peer->holdtime = INTERVAL_HOLD_INITIAL; 657 start_timer_holdtime(peer); 658 change_state(peer, STATE_OPENSENT, event); 659 break; 660 case EVNT_CON_OPENFAIL: 661 timer_set(&peer->timers, Timer_ConnectRetry, 662 conf->connectretry); 663 session_close_connection(peer); 664 change_state(peer, STATE_ACTIVE, event); 665 break; 666 case EVNT_TIMER_CONNRETRY: 667 timer_set(&peer->timers, Timer_ConnectRetry, 668 conf->connectretry); 669 session_connect(peer); 670 break; 671 default: 672 change_state(peer, STATE_IDLE, event); 673 break; 674 } 675 break; 676 case STATE_ACTIVE: 677 switch (event) { 678 case EVNT_START: 679 /* ignore */ 680 break; 681 case EVNT_CON_OPEN: 682 session_tcp_established(peer); 683 session_open(peer); 684 timer_stop(&peer->timers, Timer_ConnectRetry); 685 peer->holdtime = INTERVAL_HOLD_INITIAL; 686 start_timer_holdtime(peer); 687 change_state(peer, STATE_OPENSENT, event); 688 break; 689 case EVNT_CON_OPENFAIL: 690 timer_set(&peer->timers, Timer_ConnectRetry, 691 conf->connectretry); 692 session_close_connection(peer); 693 change_state(peer, STATE_ACTIVE, event); 694 break; 695 case EVNT_TIMER_CONNRETRY: 696 timer_set(&peer->timers, Timer_ConnectRetry, 697 peer->holdtime); 698 change_state(peer, STATE_CONNECT, event); 699 session_connect(peer); 700 break; 701 default: 702 change_state(peer, STATE_IDLE, event); 703 break; 704 } 705 break; 706 case STATE_OPENSENT: 707 switch (event) { 708 case EVNT_START: 709 /* ignore */ 710 break; 711 case EVNT_STOP: 712 change_state(peer, STATE_IDLE, event); 713 break; 714 case EVNT_CON_CLOSED: 715 session_close_connection(peer); 716 timer_set(&peer->timers, Timer_ConnectRetry, 717 conf->connectretry); 718 change_state(peer, STATE_ACTIVE, event); 719 break; 720 case EVNT_CON_FATAL: 721 change_state(peer, STATE_IDLE, event); 722 break; 723 case EVNT_TIMER_HOLDTIME: 724 session_notification(peer, ERR_HOLDTIMEREXPIRED, 725 0, NULL); 726 change_state(peer, STATE_IDLE, event); 727 break; 728 case EVNT_TIMER_SENDHOLD: 729 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, 730 0, NULL); 731 change_state(peer, STATE_IDLE, event); 732 break; 733 case EVNT_RCVD_OPEN: 734 /* parse_open calls change_state itself on failure */ 735 if (parse_open(peer, msg)) 736 break; 737 session_keepalive(peer); 738 change_state(peer, STATE_OPENCONFIRM, event); 739 break; 740 case EVNT_RCVD_NOTIFICATION: 741 parse_notification(peer, msg); 742 break; 743 default: 744 session_notification(peer, 745 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL); 746 change_state(peer, STATE_IDLE, event); 747 break; 748 } 749 break; 750 case STATE_OPENCONFIRM: 751 switch (event) { 752 case EVNT_START: 753 /* ignore */ 754 break; 755 case EVNT_STOP: 756 change_state(peer, STATE_IDLE, event); 757 break; 758 case EVNT_CON_CLOSED: 759 case EVNT_CON_FATAL: 760 change_state(peer, STATE_IDLE, event); 761 break; 762 case EVNT_TIMER_HOLDTIME: 763 session_notification(peer, ERR_HOLDTIMEREXPIRED, 764 0, NULL); 765 change_state(peer, STATE_IDLE, event); 766 break; 767 case EVNT_TIMER_SENDHOLD: 768 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, 769 0, NULL); 770 change_state(peer, STATE_IDLE, event); 771 break; 772 case EVNT_TIMER_KEEPALIVE: 773 session_keepalive(peer); 774 break; 775 case EVNT_RCVD_KEEPALIVE: 776 start_timer_holdtime(peer); 777 change_state(peer, STATE_ESTABLISHED, event); 778 break; 779 case EVNT_RCVD_NOTIFICATION: 780 parse_notification(peer, msg); 781 break; 782 default: 783 session_notification(peer, 784 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL); 785 change_state(peer, STATE_IDLE, event); 786 break; 787 } 788 break; 789 case STATE_ESTABLISHED: 790 switch (event) { 791 case EVNT_START: 792 /* ignore */ 793 break; 794 case EVNT_STOP: 795 change_state(peer, STATE_IDLE, event); 796 break; 797 case EVNT_CON_CLOSED: 798 case EVNT_CON_FATAL: 799 change_state(peer, STATE_IDLE, event); 800 break; 801 case EVNT_TIMER_HOLDTIME: 802 session_notification(peer, ERR_HOLDTIMEREXPIRED, 803 0, NULL); 804 change_state(peer, STATE_IDLE, event); 805 break; 806 case EVNT_TIMER_SENDHOLD: 807 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, 808 0, NULL); 809 change_state(peer, STATE_IDLE, event); 810 break; 811 case EVNT_TIMER_KEEPALIVE: 812 session_keepalive(peer); 813 break; 814 case EVNT_RCVD_KEEPALIVE: 815 start_timer_holdtime(peer); 816 break; 817 case EVNT_RCVD_UPDATE: 818 start_timer_holdtime(peer); 819 if (parse_update(peer, msg)) 820 change_state(peer, STATE_IDLE, event); 821 else 822 start_timer_holdtime(peer); 823 break; 824 case EVNT_RCVD_NOTIFICATION: 825 parse_notification(peer, msg); 826 break; 827 default: 828 session_notification(peer, 829 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL); 830 change_state(peer, STATE_IDLE, event); 831 break; 832 } 833 break; 834 } 835 } 836 837 void 838 start_timer_holdtime(struct peer *peer) 839 { 840 if (peer->holdtime > 0) 841 timer_set(&peer->timers, Timer_Hold, peer->holdtime); 842 else 843 timer_stop(&peer->timers, Timer_Hold); 844 } 845 846 void 847 start_timer_sendholdtime(struct peer *peer) 848 { 849 uint16_t holdtime = INTERVAL_HOLD; 850 851 if (peer->holdtime > INTERVAL_HOLD) 852 holdtime = peer->holdtime; 853 854 if (peer->holdtime > 0) 855 timer_set(&peer->timers, Timer_SendHold, holdtime); 856 else 857 timer_stop(&peer->timers, Timer_SendHold); 858 } 859 860 void 861 start_timer_keepalive(struct peer *peer) 862 { 863 if (peer->holdtime > 0) 864 timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3); 865 else 866 timer_stop(&peer->timers, Timer_Keepalive); 867 } 868 869 void 870 session_close_connection(struct peer *peer) 871 { 872 if (peer->fd != -1) { 873 close(peer->fd); 874 pauseaccept = 0; 875 } 876 peer->fd = -1; 877 } 878 879 void 880 change_state(struct peer *peer, enum session_state state, 881 enum session_events event) 882 { 883 struct mrt *mrt; 884 885 switch (state) { 886 case STATE_IDLE: 887 /* carp demotion first. new peers handled in init_peer */ 888 if (peer->state == STATE_ESTABLISHED && 889 peer->conf.demote_group[0] && !peer->demoted) 890 session_demote(peer, +1); 891 892 /* 893 * try to write out what's buffered (maybe a notification), 894 * don't bother if it fails 895 */ 896 if (peer->state >= STATE_OPENSENT && 897 msgbuf_queuelen(peer->wbuf) > 0) 898 ibuf_write(peer->fd, peer->wbuf); 899 900 /* 901 * we must start the timer for the next EVNT_START 902 * if we are coming here due to an error and the 903 * session was not established successfully before, the 904 * starttimerinterval needs to be exponentially increased 905 */ 906 if (peer->IdleHoldTime == 0) 907 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 908 peer->holdtime = INTERVAL_HOLD_INITIAL; 909 timer_stop(&peer->timers, Timer_ConnectRetry); 910 timer_stop(&peer->timers, Timer_Keepalive); 911 timer_stop(&peer->timers, Timer_Hold); 912 timer_stop(&peer->timers, Timer_SendHold); 913 timer_stop(&peer->timers, Timer_IdleHold); 914 timer_stop(&peer->timers, Timer_IdleHoldReset); 915 session_close_connection(peer); 916 msgbuf_clear(peer->wbuf); 917 peer->rpending = 0; 918 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 919 if (!peer->template) 920 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 921 peer->conf.id, 0, -1, NULL, 0); 922 923 if (peer->state == STATE_ESTABLISHED) { 924 if (peer->capa.neg.grestart.restart == 2 && 925 (event == EVNT_CON_CLOSED || 926 event == EVNT_CON_FATAL || 927 (peer->capa.neg.grestart.grnotification && 928 (event == EVNT_RCVD_GRACE_NOTIFICATION || 929 event == EVNT_TIMER_HOLDTIME || 930 event == EVNT_TIMER_SENDHOLD)))) { 931 /* don't punish graceful restart */ 932 timer_set(&peer->timers, Timer_IdleHold, 0); 933 session_graceful_restart(peer); 934 } else if (event != EVNT_STOP) { 935 timer_set(&peer->timers, Timer_IdleHold, 936 peer->IdleHoldTime); 937 if (event != EVNT_NONE && 938 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 939 peer->IdleHoldTime *= 2; 940 session_down(peer); 941 } else { 942 session_down(peer); 943 } 944 } else if (event != EVNT_STOP) { 945 timer_set(&peer->timers, Timer_IdleHold, 946 peer->IdleHoldTime); 947 if (event != EVNT_NONE && 948 peer->IdleHoldTime < MAX_IDLE_HOLD / 2) 949 peer->IdleHoldTime *= 2; 950 } 951 952 if (peer->state == STATE_NONE || 953 peer->state == STATE_ESTABLISHED) { 954 /* initialize capability negotiation structures */ 955 memcpy(&peer->capa.ann, &peer->conf.capabilities, 956 sizeof(peer->capa.ann)); 957 } 958 break; 959 case STATE_CONNECT: 960 if (peer->state == STATE_ESTABLISHED && 961 peer->capa.neg.grestart.restart == 2) { 962 /* do the graceful restart dance */ 963 session_graceful_restart(peer); 964 peer->holdtime = INTERVAL_HOLD_INITIAL; 965 timer_stop(&peer->timers, Timer_ConnectRetry); 966 timer_stop(&peer->timers, Timer_Keepalive); 967 timer_stop(&peer->timers, Timer_Hold); 968 timer_stop(&peer->timers, Timer_SendHold); 969 timer_stop(&peer->timers, Timer_IdleHold); 970 timer_stop(&peer->timers, Timer_IdleHoldReset); 971 session_close_connection(peer); 972 msgbuf_clear(peer->wbuf); 973 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 974 } 975 break; 976 case STATE_ACTIVE: 977 if (!peer->template) 978 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 979 peer->conf.id, 0, -1, NULL, 0); 980 break; 981 case STATE_OPENSENT: 982 break; 983 case STATE_OPENCONFIRM: 984 break; 985 case STATE_ESTABLISHED: 986 timer_set(&peer->timers, Timer_IdleHoldReset, 987 peer->IdleHoldTime); 988 if (peer->demoted) 989 timer_set(&peer->timers, Timer_CarpUndemote, 990 INTERVAL_HOLD_DEMOTED); 991 session_up(peer); 992 break; 993 default: /* something seriously fucked */ 994 break; 995 } 996 997 log_statechange(peer, state, event); 998 LIST_FOREACH(mrt, &mrthead, entry) { 999 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 1000 continue; 1001 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1002 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 1003 mrt->group_id == peer->conf.groupid)) 1004 mrt_dump_state(mrt, peer->state, state, peer); 1005 } 1006 peer->prev_state = peer->state; 1007 peer->state = state; 1008 } 1009 1010 void 1011 session_accept(int listenfd) 1012 { 1013 int connfd; 1014 socklen_t len; 1015 struct sockaddr_storage cliaddr; 1016 struct peer *p = NULL; 1017 1018 len = sizeof(cliaddr); 1019 if ((connfd = accept4(listenfd, 1020 (struct sockaddr *)&cliaddr, &len, 1021 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 1022 if (errno == ENFILE || errno == EMFILE) 1023 pauseaccept = getmonotime(); 1024 else if (errno != EWOULDBLOCK && errno != EINTR && 1025 errno != ECONNABORTED) 1026 log_warn("accept"); 1027 return; 1028 } 1029 1030 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr); 1031 1032 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 1033 if (timer_running(&p->timers, Timer_IdleHold, NULL)) { 1034 /* fast reconnect after clear */ 1035 p->passive = 1; 1036 bgp_fsm(p, EVNT_START, NULL); 1037 } 1038 } 1039 1040 if (p != NULL && 1041 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1042 if (p->fd != -1) { 1043 if (p->state == STATE_CONNECT) 1044 session_close_connection(p); 1045 else { 1046 close(connfd); 1047 return; 1048 } 1049 } 1050 1051 open: 1052 if (p->auth_conf.method != AUTH_NONE && sysdep.no_pfkey) { 1053 log_peer_warnx(&p->conf, 1054 "ipsec or md5sig configured but not available"); 1055 close(connfd); 1056 return; 1057 } 1058 1059 if (tcp_md5_check(connfd, &p->auth_conf) == -1) { 1060 log_peer_warn(&p->conf, "check md5sig"); 1061 close(connfd); 1062 return; 1063 } 1064 p->fd = connfd; 1065 if (session_setup_socket(p)) { 1066 close(connfd); 1067 return; 1068 } 1069 bgp_fsm(p, EVNT_CON_OPEN, NULL); 1070 return; 1071 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1072 p->capa.neg.grestart.restart == 2) { 1073 /* first do the graceful restart dance */ 1074 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1075 /* then do part of the open dance */ 1076 goto open; 1077 } else { 1078 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len); 1079 close(connfd); 1080 } 1081 } 1082 1083 int 1084 session_connect(struct peer *peer) 1085 { 1086 struct sockaddr *sa; 1087 struct bgpd_addr *bind_addr; 1088 socklen_t sa_len; 1089 1090 /* 1091 * we do not need the overcomplicated collision detection RFC 1771 1092 * describes; we simply make sure there is only ever one concurrent 1093 * tcp connection per peer. 1094 */ 1095 if (peer->fd != -1) 1096 return (-1); 1097 1098 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1099 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1100 log_peer_warn(&peer->conf, "session_connect socket"); 1101 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL); 1102 return (-1); 1103 } 1104 1105 if (peer->auth_conf.method != AUTH_NONE && sysdep.no_pfkey) { 1106 log_peer_warnx(&peer->conf, 1107 "ipsec or md5sig configured but not available"); 1108 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL); 1109 return (-1); 1110 } 1111 1112 if (tcp_md5_set(peer->fd, &peer->auth_conf, 1113 &peer->conf.remote_addr) == -1) 1114 log_peer_warn(&peer->conf, "setting md5sig"); 1115 1116 /* if local-address is set we need to bind() */ 1117 bind_addr = session_localaddr(peer); 1118 if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) { 1119 if (bind(peer->fd, sa, sa_len) == -1) { 1120 log_peer_warn(&peer->conf, "session_connect bind"); 1121 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL); 1122 return (-1); 1123 } 1124 } 1125 1126 if (session_setup_socket(peer)) { 1127 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL); 1128 return (-1); 1129 } 1130 1131 sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len); 1132 if (connect(peer->fd, sa, sa_len) == -1) { 1133 if (errno != EINPROGRESS) { 1134 if (errno != peer->lasterr) 1135 log_peer_warn(&peer->conf, "connect"); 1136 peer->lasterr = errno; 1137 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL); 1138 return (-1); 1139 } 1140 } else 1141 bgp_fsm(peer, EVNT_CON_OPEN, NULL); 1142 1143 return (0); 1144 } 1145 1146 int 1147 session_setup_socket(struct peer *p) 1148 { 1149 int ttl = p->conf.distance; 1150 int pre = IPTOS_PREC_INTERNETCONTROL; 1151 int nodelay = 1; 1152 int bsize; 1153 1154 switch (p->conf.remote_addr.aid) { 1155 case AID_INET: 1156 /* set precedence, see RFC 1771 appendix 5 */ 1157 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1158 -1) { 1159 log_peer_warn(&p->conf, 1160 "session_setup_socket setsockopt TOS"); 1161 return (-1); 1162 } 1163 1164 if (p->conf.ebgp) { 1165 /* 1166 * set TTL to foreign router's distance 1167 * 1=direct n=multihop with ttlsec, we always use 255 1168 */ 1169 if (p->conf.ttlsec) { 1170 ttl = 256 - p->conf.distance; 1171 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1172 &ttl, sizeof(ttl)) == -1) { 1173 log_peer_warn(&p->conf, 1174 "session_setup_socket: " 1175 "setsockopt MINTTL"); 1176 return (-1); 1177 } 1178 ttl = 255; 1179 } 1180 1181 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1182 sizeof(ttl)) == -1) { 1183 log_peer_warn(&p->conf, 1184 "session_setup_socket setsockopt TTL"); 1185 return (-1); 1186 } 1187 } 1188 break; 1189 case AID_INET6: 1190 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_TCLASS, &pre, 1191 sizeof(pre)) == -1) { 1192 log_peer_warn(&p->conf, "session_setup_socket " 1193 "setsockopt TCLASS"); 1194 return (-1); 1195 } 1196 1197 if (p->conf.ebgp) { 1198 /* 1199 * set hoplimit to foreign router's distance 1200 * 1=direct n=multihop with ttlsec, we always use 255 1201 */ 1202 if (p->conf.ttlsec) { 1203 ttl = 256 - p->conf.distance; 1204 if (setsockopt(p->fd, IPPROTO_IPV6, 1205 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1206 == -1) { 1207 log_peer_warn(&p->conf, 1208 "session_setup_socket: " 1209 "setsockopt MINHOPCOUNT"); 1210 return (-1); 1211 } 1212 ttl = 255; 1213 } 1214 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1215 &ttl, sizeof(ttl)) == -1) { 1216 log_peer_warn(&p->conf, 1217 "session_setup_socket setsockopt hoplimit"); 1218 return (-1); 1219 } 1220 } 1221 break; 1222 } 1223 1224 /* set TCP_NODELAY */ 1225 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1226 sizeof(nodelay)) == -1) { 1227 log_peer_warn(&p->conf, 1228 "session_setup_socket setsockopt TCP_NODELAY"); 1229 return (-1); 1230 } 1231 1232 /* limit bufsize. no biggie if it fails */ 1233 bsize = 65535; 1234 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, sizeof(bsize)); 1235 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, sizeof(bsize)); 1236 1237 return (0); 1238 } 1239 1240 /* 1241 * compare the bgpd_addr with the sockaddr by converting the latter into 1242 * a bgpd_addr. Return true if the two are equal, including any scope 1243 */ 1244 static int 1245 sa_equal(struct bgpd_addr *ba, struct sockaddr *b) 1246 { 1247 struct bgpd_addr bb; 1248 1249 sa2addr(b, &bb, NULL); 1250 return (memcmp(ba, &bb, sizeof(*ba)) == 0); 1251 } 1252 1253 static void 1254 get_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote, 1255 struct bgpd_addr *alt, unsigned int *scope) 1256 { 1257 struct ifaddrs *ifap, *ifa, *match; 1258 int connected = 0; 1259 u_int8_t plen; 1260 1261 if (getifaddrs(&ifap) == -1) 1262 fatal("getifaddrs"); 1263 1264 for (match = ifap; match != NULL; match = match->ifa_next) { 1265 if (match->ifa_addr == NULL) 1266 continue; 1267 if (match->ifa_addr->sa_family != AF_INET && 1268 match->ifa_addr->sa_family != AF_INET6) 1269 continue; 1270 if (sa_equal(local, match->ifa_addr)) { 1271 if (remote->aid == AID_INET6 && 1272 IN6_IS_ADDR_LINKLOCAL(&remote->v6)) { 1273 /* IPv6 LLA are by definition connected */ 1274 connected = 1; 1275 } else if (match->ifa_flags & IFF_POINTOPOINT && 1276 match->ifa_dstaddr != NULL) { 1277 if (sa_equal(remote, match->ifa_dstaddr)) 1278 connected = 1; 1279 } else if (match->ifa_netmask != NULL) { 1280 plen = mask2prefixlen( 1281 match->ifa_addr->sa_family, 1282 match->ifa_netmask); 1283 if (prefix_compare(local, remote, plen) == 0) 1284 connected = 1; 1285 } 1286 break; 1287 } 1288 } 1289 1290 if (match == NULL) { 1291 log_warnx("%s: local address not found", __func__); 1292 return; 1293 } 1294 if (connected) 1295 *scope = if_nametoindex(match->ifa_name); 1296 else 1297 *scope = 0; 1298 1299 switch (local->aid) { 1300 case AID_INET6: 1301 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1302 if (ifa->ifa_addr != NULL && 1303 ifa->ifa_addr->sa_family == AF_INET && 1304 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1305 sa2addr(ifa->ifa_addr, alt, NULL); 1306 break; 1307 } 1308 } 1309 break; 1310 case AID_INET: 1311 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1312 if (ifa->ifa_addr != NULL && 1313 ifa->ifa_addr->sa_family == AF_INET6 && 1314 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1315 struct sockaddr_in6 *s = 1316 (struct sockaddr_in6 *)ifa->ifa_addr; 1317 1318 /* only accept global scope addresses */ 1319 if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) || 1320 IN6_IS_ADDR_SITELOCAL(&s->sin6_addr)) 1321 continue; 1322 sa2addr(ifa->ifa_addr, alt, NULL); 1323 break; 1324 } 1325 } 1326 break; 1327 default: 1328 log_warnx("%s: unsupported address family %s", __func__, 1329 aid2str(local->aid)); 1330 break; 1331 } 1332 1333 freeifaddrs(ifap); 1334 } 1335 1336 void 1337 session_tcp_established(struct peer *peer) 1338 { 1339 struct sockaddr_storage ss; 1340 socklen_t len; 1341 1342 len = sizeof(ss); 1343 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1344 log_warn("getsockname"); 1345 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); 1346 len = sizeof(ss); 1347 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1348 log_warn("getpeername"); 1349 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); 1350 1351 get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt, 1352 &peer->if_scope); 1353 } 1354 1355 int 1356 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len) 1357 { 1358 int errs = 0; 1359 1360 errs += ibuf_add_n8(opb, capa_code); 1361 errs += ibuf_add_n8(opb, capa_len); 1362 return (errs); 1363 } 1364 1365 static int 1366 session_capa_add_mp(struct ibuf *buf, uint8_t aid) 1367 { 1368 uint16_t afi; 1369 uint8_t safi; 1370 int errs = 0; 1371 1372 if (aid2afi(aid, &afi, &safi) == -1) { 1373 log_warn("%s: bad AID", __func__); 1374 return (-1); 1375 } 1376 1377 errs += ibuf_add_n16(buf, afi); 1378 errs += ibuf_add_zero(buf, 1); 1379 errs += ibuf_add_n8(buf, safi); 1380 1381 return (errs); 1382 } 1383 1384 static int 1385 session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags) 1386 { 1387 int errs = 0; 1388 uint16_t afi; 1389 uint8_t safi; 1390 1391 if (aid2afi(aid, &afi, &safi)) { 1392 log_warn("%s: bad AID", __func__); 1393 return (-1); 1394 } 1395 1396 errs += ibuf_add_n16(b, afi); 1397 errs += ibuf_add_n8(b, safi); 1398 errs += ibuf_add_n8(b, flags); 1399 1400 return (errs); 1401 } 1402 1403 static int 1404 session_capa_add_ext_nh(struct ibuf *b, uint8_t aid) 1405 { 1406 int errs = 0; 1407 uint16_t afi; 1408 uint8_t safi; 1409 1410 if (aid2afi(aid, &afi, &safi)) { 1411 log_warn("%s: bad AID", __func__); 1412 return (-1); 1413 } 1414 1415 errs += ibuf_add_n16(b, afi); 1416 errs += ibuf_add_n16(b, safi); 1417 errs += ibuf_add_n16(b, AFI_IPv6); 1418 1419 return (errs); 1420 } 1421 1422 struct ibuf * 1423 session_newmsg(enum msg_type msgtype, uint16_t len) 1424 { 1425 struct ibuf *buf; 1426 int errs = 0; 1427 1428 if ((buf = ibuf_open(len)) == NULL) 1429 return (NULL); 1430 1431 errs += ibuf_add(buf, marker, sizeof(marker)); 1432 errs += ibuf_add_n16(buf, len); 1433 errs += ibuf_add_n8(buf, msgtype); 1434 1435 if (errs) { 1436 ibuf_free(buf); 1437 return (NULL); 1438 } 1439 1440 return (buf); 1441 } 1442 1443 void 1444 session_sendmsg(struct ibuf *msg, struct peer *p, enum msg_type msgtype) 1445 { 1446 struct mrt *mrt; 1447 1448 LIST_FOREACH(mrt, &mrthead, entry) { 1449 if (!(mrt->type == MRT_ALL_OUT || (msgtype == MSG_UPDATE && 1450 mrt->type == MRT_UPDATE_OUT))) 1451 continue; 1452 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1453 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1454 mrt->group_id == p->conf.groupid)) 1455 mrt_dump_bgp_msg(mrt, msg, p, msgtype); 1456 } 1457 1458 ibuf_close(p->wbuf, msg); 1459 if (!p->throttled && msgbuf_queuelen(p->wbuf) > SESS_MSG_HIGH_MARK) { 1460 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) 1461 log_peer_warn(&p->conf, "imsg_compose XOFF"); 1462 else 1463 p->throttled = 1; 1464 } 1465 } 1466 1467 /* 1468 * Translate between internal roles and the value expected by RFC 9234. 1469 */ 1470 static uint8_t 1471 role2capa(enum role role) 1472 { 1473 switch (role) { 1474 case ROLE_CUSTOMER: 1475 return CAPA_ROLE_CUSTOMER; 1476 case ROLE_PROVIDER: 1477 return CAPA_ROLE_PROVIDER; 1478 case ROLE_RS: 1479 return CAPA_ROLE_RS; 1480 case ROLE_RS_CLIENT: 1481 return CAPA_ROLE_RS_CLIENT; 1482 case ROLE_PEER: 1483 return CAPA_ROLE_PEER; 1484 default: 1485 fatalx("Unsupported role for role capability"); 1486 } 1487 } 1488 1489 static enum role 1490 capa2role(uint8_t val) 1491 { 1492 switch (val) { 1493 case CAPA_ROLE_PROVIDER: 1494 return ROLE_PROVIDER; 1495 case CAPA_ROLE_RS: 1496 return ROLE_RS; 1497 case CAPA_ROLE_RS_CLIENT: 1498 return ROLE_RS_CLIENT; 1499 case CAPA_ROLE_CUSTOMER: 1500 return ROLE_CUSTOMER; 1501 case CAPA_ROLE_PEER: 1502 return ROLE_PEER; 1503 default: 1504 return ROLE_NONE; 1505 } 1506 } 1507 1508 void 1509 session_open(struct peer *p) 1510 { 1511 struct ibuf *buf, *opb; 1512 size_t len, optparamlen; 1513 uint16_t holdtime; 1514 uint8_t i; 1515 int errs = 0, extlen = 0; 1516 int mpcapa = 0; 1517 1518 1519 if ((opb = ibuf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN - 6)) == 1520 NULL) { 1521 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1522 return; 1523 } 1524 1525 /* multiprotocol extensions, RFC 4760 */ 1526 for (i = AID_MIN; i < AID_MAX; i++) 1527 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1528 errs += session_capa_add(opb, CAPA_MP, 4); 1529 errs += session_capa_add_mp(opb, i); 1530 mpcapa++; 1531 } 1532 1533 /* route refresh, RFC 2918 */ 1534 if (p->capa.ann.refresh) /* no data */ 1535 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1536 1537 /* extended nexthop encoding, RFC 8950 */ 1538 if (p->capa.ann.ext_nh[AID_INET]) { 1539 uint8_t enhlen = 0; 1540 1541 if (p->capa.ann.mp[AID_INET]) 1542 enhlen += 6; 1543 if (p->capa.ann.mp[AID_VPN_IPv4]) 1544 enhlen += 6; 1545 errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen); 1546 if (p->capa.ann.mp[AID_INET]) 1547 errs += session_capa_add_ext_nh(opb, AID_INET); 1548 if (p->capa.ann.mp[AID_VPN_IPv4]) 1549 errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4); 1550 } 1551 1552 /* extended message support, RFC 8654 */ 1553 if (p->capa.ann.ext_msg) /* no data */ 1554 errs += session_capa_add(opb, CAPA_EXT_MSG, 0); 1555 1556 /* BGP open policy, RFC 9234, only for ebgp sessions */ 1557 if (p->conf.ebgp && p->capa.ann.policy && 1558 p->conf.role != ROLE_NONE && 1559 (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] || 1560 mpcapa == 0)) { 1561 errs += session_capa_add(opb, CAPA_ROLE, 1); 1562 errs += ibuf_add_n8(opb, role2capa(p->conf.role)); 1563 } 1564 1565 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1566 if (p->capa.ann.grestart.restart) { 1567 int rst = 0; 1568 uint16_t hdr = 0; 1569 1570 for (i = AID_MIN; i < AID_MAX; i++) { 1571 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) 1572 rst++; 1573 } 1574 1575 /* Only set the R-flag if no graceful restart is ongoing */ 1576 if (!rst) 1577 hdr |= CAPA_GR_R_FLAG; 1578 if (p->capa.ann.grestart.grnotification) 1579 hdr |= CAPA_GR_N_FLAG; 1580 errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); 1581 errs += ibuf_add_n16(opb, hdr); 1582 } 1583 1584 /* 4-bytes AS numbers, RFC6793 */ 1585 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1586 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t)); 1587 errs += ibuf_add_n32(opb, p->conf.local_as); 1588 } 1589 1590 /* advertisement of multiple paths, RFC7911 */ 1591 if (p->capa.ann.add_path[AID_MIN]) { /* variable */ 1592 uint8_t aplen; 1593 1594 if (mpcapa) 1595 aplen = 4 * mpcapa; 1596 else /* AID_INET */ 1597 aplen = 4; 1598 errs += session_capa_add(opb, CAPA_ADD_PATH, aplen); 1599 if (mpcapa) { 1600 for (i = AID_MIN; i < AID_MAX; i++) { 1601 if (p->capa.ann.mp[i]) { 1602 errs += session_capa_add_afi(opb, 1603 i, p->capa.ann.add_path[i] & 1604 CAPA_AP_MASK); 1605 } 1606 } 1607 } else { /* AID_INET */ 1608 errs += session_capa_add_afi(opb, AID_INET, 1609 p->capa.ann.add_path[AID_INET] & CAPA_AP_MASK); 1610 } 1611 } 1612 1613 /* enhanced route-refresh, RFC7313 */ 1614 if (p->capa.ann.enhanced_rr) /* no data */ 1615 errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0); 1616 1617 if (errs) { 1618 ibuf_free(opb); 1619 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1620 return; 1621 } 1622 1623 optparamlen = ibuf_size(opb); 1624 len = MSGSIZE_OPEN_MIN + optparamlen; 1625 if (optparamlen == 0) { 1626 /* nothing */ 1627 } else if (optparamlen + 2 >= 255) { 1628 /* RFC9072: use 255 as magic size and request extra header */ 1629 optparamlen = 255; 1630 extlen = 1; 1631 /* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */ 1632 len += 2 * 3; 1633 } else { 1634 /* regular capabilities header */ 1635 optparamlen += 2; 1636 len += 2; 1637 } 1638 1639 if ((buf = session_newmsg(MSG_OPEN, len)) == NULL) { 1640 ibuf_free(opb); 1641 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1642 return; 1643 } 1644 1645 if (p->conf.holdtime) 1646 holdtime = p->conf.holdtime; 1647 else 1648 holdtime = conf->holdtime; 1649 1650 errs += ibuf_add_n8(buf, 4); 1651 errs += ibuf_add_n16(buf, p->conf.local_short_as); 1652 errs += ibuf_add_n16(buf, holdtime); 1653 /* is already in network byte order */ 1654 errs += ibuf_add_n32(buf, conf->bgpid); 1655 errs += ibuf_add_n8(buf, optparamlen); 1656 1657 if (extlen) { 1658 /* RFC9072 extra header which spans over the capabilities hdr */ 1659 errs += ibuf_add_n8(buf, OPT_PARAM_EXT_LEN); 1660 errs += ibuf_add_n16(buf, ibuf_size(opb) + 1 + 2); 1661 } 1662 1663 if (optparamlen) { 1664 errs += ibuf_add_n8(buf, OPT_PARAM_CAPABILITIES); 1665 1666 if (extlen) { 1667 /* RFC9072: 2-byte extended length */ 1668 errs += ibuf_add_n16(buf, ibuf_size(opb)); 1669 } else { 1670 errs += ibuf_add_n8(buf, ibuf_size(opb)); 1671 } 1672 errs += ibuf_add_ibuf(buf, opb); 1673 } 1674 1675 ibuf_free(opb); 1676 1677 if (errs) { 1678 ibuf_free(buf); 1679 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1680 return; 1681 } 1682 1683 session_sendmsg(buf, p, MSG_OPEN); 1684 p->stats.msg_sent_open++; 1685 } 1686 1687 void 1688 session_keepalive(struct peer *p) 1689 { 1690 struct ibuf *buf; 1691 1692 if ((buf = session_newmsg(MSG_KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL) { 1693 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1694 return; 1695 } 1696 1697 session_sendmsg(buf, p, MSG_KEEPALIVE); 1698 start_timer_keepalive(p); 1699 p->stats.msg_sent_keepalive++; 1700 } 1701 1702 void 1703 session_update(uint32_t peerid, struct ibuf *ibuf) 1704 { 1705 struct peer *p; 1706 struct ibuf *buf; 1707 size_t len, maxsize = MAX_PKTSIZE; 1708 1709 if ((p = getpeerbyid(conf, peerid)) == NULL) { 1710 log_warnx("%s: no such peer: id=%u", __func__, peerid); 1711 return; 1712 } 1713 1714 if (p->state != STATE_ESTABLISHED) 1715 return; 1716 1717 if (p->capa.neg.ext_msg) 1718 maxsize = MAX_EXT_PKTSIZE; 1719 len = ibuf_size(ibuf); 1720 if (len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER || 1721 len > maxsize - MSGSIZE_HEADER) { 1722 log_peer_warnx(&p->conf, "bad UDPATE from RDE"); 1723 return; 1724 } 1725 1726 if ((buf = session_newmsg(MSG_UPDATE, MSGSIZE_HEADER + len)) == NULL) { 1727 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1728 return; 1729 } 1730 1731 if (ibuf_add_ibuf(buf, ibuf)) { 1732 ibuf_free(buf); 1733 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1734 return; 1735 } 1736 1737 session_sendmsg(buf, p, MSG_UPDATE); 1738 start_timer_keepalive(p); 1739 p->stats.msg_sent_update++; 1740 } 1741 1742 static int 1743 session_req_hard_reset(enum err_codes errcode, uint8_t subcode) 1744 { 1745 switch (errcode) { 1746 case ERR_HEADER: 1747 case ERR_OPEN: 1748 case ERR_UPDATE: 1749 case ERR_FSM: 1750 case ERR_RREFRESH: 1751 /* 1752 * Protocol errors trigger a hard reset. The peer 1753 * is not trustworthy and so there is no realistic 1754 * hope that forwarding can continue. 1755 */ 1756 return 1; 1757 case ERR_HOLDTIMEREXPIRED: 1758 case ERR_SENDHOLDTIMEREXPIRED: 1759 /* Keep forwarding and hope the other side is back soon. */ 1760 return 0; 1761 case ERR_CEASE: 1762 switch (subcode) { 1763 case ERR_CEASE_CONN_REJECT: 1764 case ERR_CEASE_OTHER_CHANGE: 1765 case ERR_CEASE_COLLISION: 1766 case ERR_CEASE_RSRC_EXHAUST: 1767 /* Per RFC8538 suggestion make these graceful. */ 1768 return 0; 1769 } 1770 return 1; 1771 } 1772 } 1773 1774 void 1775 session_notification_data(struct peer *p, uint8_t errcode, uint8_t subcode, 1776 void *data, size_t datalen) 1777 { 1778 struct ibuf ibuf; 1779 1780 ibuf_from_buffer(&ibuf, data, datalen); 1781 session_notification(p, errcode, subcode, &ibuf); 1782 } 1783 1784 void 1785 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode, 1786 struct ibuf *ibuf) 1787 { 1788 struct ibuf *buf; 1789 const char *reason = "sending"; 1790 int errs = 0, need_hard_reset = 0; 1791 size_t datalen = 0; 1792 1793 switch (p->state) { 1794 case STATE_OPENSENT: 1795 case STATE_OPENCONFIRM: 1796 case STATE_ESTABLISHED: 1797 break; 1798 default: 1799 /* session not open, no need to send notification */ 1800 log_notification(p, errcode, subcode, ibuf, "dropping"); 1801 return; 1802 } 1803 1804 if (p->capa.neg.grestart.grnotification) { 1805 if (session_req_hard_reset(errcode, subcode)) { 1806 need_hard_reset = 1; 1807 datalen += 2; 1808 reason = "sending hard-reset"; 1809 } else { 1810 reason = "sending graceful"; 1811 } 1812 } 1813 1814 log_notification(p, errcode, subcode, ibuf, reason); 1815 1816 /* cap to maximum size */ 1817 if (ibuf != NULL) { 1818 if (ibuf_size(ibuf) > 1819 MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN - datalen) { 1820 log_peer_warnx(&p->conf, 1821 "oversized notification, data trunkated"); 1822 ibuf_truncate(ibuf, MAX_PKTSIZE - 1823 MSGSIZE_NOTIFICATION_MIN - datalen); 1824 } 1825 datalen += ibuf_size(ibuf); 1826 } 1827 1828 if ((buf = session_newmsg(MSG_NOTIFICATION, 1829 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1830 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1831 return; 1832 } 1833 1834 if (need_hard_reset) { 1835 errs += ibuf_add_n8(buf, ERR_CEASE); 1836 errs += ibuf_add_n8(buf, ERR_CEASE_HARD_RESET); 1837 } 1838 1839 errs += ibuf_add_n8(buf, errcode); 1840 errs += ibuf_add_n8(buf, subcode); 1841 1842 if (ibuf != NULL) 1843 errs += ibuf_add_ibuf(buf, ibuf); 1844 1845 if (errs) { 1846 ibuf_free(buf); 1847 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1848 return; 1849 } 1850 1851 session_sendmsg(buf, p, MSG_NOTIFICATION); 1852 p->stats.msg_sent_notification++; 1853 p->stats.last_sent_errcode = errcode; 1854 p->stats.last_sent_suberr = subcode; 1855 } 1856 1857 int 1858 session_neighbor_rrefresh(struct peer *p) 1859 { 1860 uint8_t i; 1861 1862 if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr)) 1863 return (-1); 1864 1865 for (i = AID_MIN; i < AID_MAX; i++) { 1866 if (p->capa.neg.mp[i] != 0) 1867 session_rrefresh(p, i, ROUTE_REFRESH_REQUEST); 1868 } 1869 1870 return (0); 1871 } 1872 1873 void 1874 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype) 1875 { 1876 struct ibuf *buf; 1877 int errs = 0; 1878 uint16_t afi; 1879 uint8_t safi; 1880 1881 switch (subtype) { 1882 case ROUTE_REFRESH_REQUEST: 1883 p->stats.refresh_sent_req++; 1884 break; 1885 case ROUTE_REFRESH_BEGIN_RR: 1886 case ROUTE_REFRESH_END_RR: 1887 /* requires enhanced route refresh */ 1888 if (!p->capa.neg.enhanced_rr) 1889 return; 1890 if (subtype == ROUTE_REFRESH_BEGIN_RR) 1891 p->stats.refresh_sent_borr++; 1892 else 1893 p->stats.refresh_sent_eorr++; 1894 break; 1895 default: 1896 fatalx("session_rrefresh: bad subtype %d", subtype); 1897 } 1898 1899 if (aid2afi(aid, &afi, &safi) == -1) 1900 fatalx("session_rrefresh: bad afi/safi pair"); 1901 1902 if ((buf = session_newmsg(MSG_RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1903 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1904 return; 1905 } 1906 1907 errs += ibuf_add_n16(buf, afi); 1908 errs += ibuf_add_n8(buf, subtype); 1909 errs += ibuf_add_n8(buf, safi); 1910 1911 if (errs) { 1912 ibuf_free(buf); 1913 bgp_fsm(p, EVNT_CON_FATAL, NULL); 1914 return; 1915 } 1916 1917 session_sendmsg(buf, p, MSG_RREFRESH); 1918 p->stats.msg_sent_rrefresh++; 1919 } 1920 1921 int 1922 session_graceful_restart(struct peer *p) 1923 { 1924 uint8_t i; 1925 uint16_t staletime = conf->staletime; 1926 1927 if (p->conf.staletime) 1928 staletime = p->conf.staletime; 1929 1930 /* RFC 8538: enforce configurable upper bound of the stale timer */ 1931 if (staletime > p->capa.neg.grestart.timeout) 1932 staletime = p->capa.neg.grestart.timeout; 1933 timer_set(&p->timers, Timer_RestartTimeout, staletime); 1934 1935 for (i = AID_MIN; i < AID_MAX; i++) { 1936 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1937 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, 1938 &i, sizeof(i)) == -1) 1939 return (-1); 1940 log_peer_warnx(&p->conf, 1941 "graceful restart of %s, keeping routes", 1942 aid2str(i)); 1943 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1944 } else if (p->capa.neg.mp[i]) { 1945 if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id, 1946 &i, sizeof(i)) == -1) 1947 return (-1); 1948 log_peer_warnx(&p->conf, 1949 "graceful restart of %s, flushing routes", 1950 aid2str(i)); 1951 } 1952 } 1953 return (0); 1954 } 1955 1956 int 1957 session_graceful_stop(struct peer *p) 1958 { 1959 uint8_t i; 1960 1961 for (i = AID_MIN; i < AID_MAX; i++) { 1962 /* 1963 * Only flush if the peer is restarting and the timeout fired. 1964 * In all other cases the session was already flushed when the 1965 * session went down or when the new open message was parsed. 1966 */ 1967 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1968 log_peer_warnx(&p->conf, "graceful restart of %s, " 1969 "time-out, flushing", aid2str(i)); 1970 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1971 &i, sizeof(i)) == -1) 1972 return (-1); 1973 } 1974 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1975 } 1976 return (0); 1977 } 1978 1979 int 1980 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1981 { 1982 socklen_t len; 1983 int error; 1984 1985 if (p->state == STATE_CONNECT) { 1986 if (pfd->revents & POLLOUT) { 1987 if (pfd->revents & POLLIN) { 1988 /* error occurred */ 1989 len = sizeof(error); 1990 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1991 &error, &len) == -1 || error) { 1992 if (error) 1993 errno = error; 1994 if (errno != p->lasterr) { 1995 log_peer_warn(&p->conf, 1996 "socket error"); 1997 p->lasterr = errno; 1998 } 1999 bgp_fsm(p, EVNT_CON_OPENFAIL, NULL); 2000 return (1); 2001 } 2002 } 2003 bgp_fsm(p, EVNT_CON_OPEN, NULL); 2004 return (1); 2005 } 2006 if (pfd->revents & POLLHUP) { 2007 bgp_fsm(p, EVNT_CON_OPENFAIL, NULL); 2008 return (1); 2009 } 2010 if (pfd->revents & (POLLERR|POLLNVAL)) { 2011 bgp_fsm(p, EVNT_CON_FATAL, NULL); 2012 return (1); 2013 } 2014 return (0); 2015 } 2016 2017 if (pfd->revents & POLLHUP) { 2018 bgp_fsm(p, EVNT_CON_CLOSED, NULL); 2019 return (1); 2020 } 2021 if (pfd->revents & (POLLERR|POLLNVAL)) { 2022 bgp_fsm(p, EVNT_CON_FATAL, NULL); 2023 return (1); 2024 } 2025 2026 if (pfd->revents & POLLOUT && msgbuf_queuelen(p->wbuf) > 0) { 2027 if (ibuf_write(p->fd, p->wbuf) == -1) { 2028 if (errno == EPIPE) 2029 log_peer_warnx(&p->conf, "Connection closed"); 2030 else 2031 log_peer_warn(&p->conf, "write error"); 2032 bgp_fsm(p, EVNT_CON_FATAL, NULL); 2033 return (1); 2034 } 2035 p->stats.last_write = getmonotime(); 2036 start_timer_sendholdtime(p); 2037 if (p->throttled && 2038 msgbuf_queuelen(p->wbuf) < SESS_MSG_LOW_MARK) { 2039 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) 2040 log_peer_warn(&p->conf, "imsg_compose XON"); 2041 else 2042 p->throttled = 0; 2043 } 2044 if (!(pfd->revents & POLLIN)) 2045 return (1); 2046 } 2047 2048 if (p->fd != -1 && pfd->revents & POLLIN) { 2049 switch (ibuf_read(p->fd, p->wbuf)) { 2050 case -1: 2051 if (p->state == STATE_IDLE) 2052 /* error already handled before */ 2053 return (1); 2054 log_peer_warn(&p->conf, "read error"); 2055 bgp_fsm(p, EVNT_CON_FATAL, NULL); 2056 return (1); 2057 case 0: 2058 bgp_fsm(p, EVNT_CON_CLOSED, NULL); 2059 return (1); 2060 } 2061 p->stats.last_read = getmonotime(); 2062 return (1); 2063 } 2064 return (0); 2065 } 2066 2067 void 2068 session_process_msg(struct peer *p) 2069 { 2070 struct ibuf *msg; 2071 struct mrt *mrt; 2072 int processed = 0; 2073 uint8_t msgtype; 2074 2075 p->rpending = 0; 2076 if (p->wbuf == NULL) 2077 return; 2078 2079 /* 2080 * session might drop to IDLE -> all buffers are flushed 2081 */ 2082 while ((msg = msgbuf_get(p->wbuf)) != NULL) { 2083 /* skip msg header and extract type */ 2084 if (ibuf_skip(msg, MSGSIZE_HEADER_MARKER) == -1 || 2085 ibuf_skip(msg, sizeof(uint16_t)) == -1 || 2086 ibuf_get_n8(msg, &msgtype) == -1) { 2087 log_peer_warn(&p->conf, "process message failed"); 2088 bgp_fsm(p, EVNT_CON_FATAL, NULL); 2089 ibuf_free(msg); 2090 return; 2091 } 2092 ibuf_rewind(msg); 2093 2094 /* dump to MRT as soon as we have a full packet */ 2095 LIST_FOREACH(mrt, &mrthead, entry) { 2096 if (!(mrt->type == MRT_ALL_IN || 2097 (msgtype == MSG_UPDATE && 2098 mrt->type == MRT_UPDATE_IN))) 2099 continue; 2100 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 2101 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 2102 mrt->group_id == p->conf.groupid)) 2103 mrt_dump_bgp_msg(mrt, msg, p, msgtype); 2104 } 2105 2106 ibuf_skip(msg, MSGSIZE_HEADER); 2107 2108 switch (msgtype) { 2109 case MSG_OPEN: 2110 bgp_fsm(p, EVNT_RCVD_OPEN, msg); 2111 p->stats.msg_rcvd_open++; 2112 break; 2113 case MSG_UPDATE: 2114 bgp_fsm(p, EVNT_RCVD_UPDATE, msg); 2115 p->stats.msg_rcvd_update++; 2116 break; 2117 case MSG_NOTIFICATION: 2118 bgp_fsm(p, EVNT_RCVD_NOTIFICATION, msg); 2119 p->stats.msg_rcvd_notification++; 2120 break; 2121 case MSG_KEEPALIVE: 2122 bgp_fsm(p, EVNT_RCVD_KEEPALIVE, msg); 2123 p->stats.msg_rcvd_keepalive++; 2124 break; 2125 case MSG_RREFRESH: 2126 parse_rrefresh(p, msg); 2127 p->stats.msg_rcvd_rrefresh++; 2128 break; 2129 default: /* cannot happen */ 2130 session_notification_data(p, ERR_HEADER, ERR_HDR_TYPE, 2131 &msgtype, 1); 2132 log_peer_warnx(&p->conf, 2133 "received message with unknown type %u", msgtype); 2134 bgp_fsm(p, EVNT_CON_FATAL, NULL); 2135 } 2136 ibuf_free(msg); 2137 if (++processed > MSG_PROCESS_LIMIT) { 2138 p->rpending = 1; 2139 break; 2140 } 2141 } 2142 } 2143 2144 struct ibuf * 2145 parse_header(struct ibuf *msg, void *arg, int *fd) 2146 { 2147 struct peer *peer = arg; 2148 struct ibuf *b; 2149 u_char m[MSGSIZE_HEADER_MARKER]; 2150 uint16_t len, maxlen = MAX_PKTSIZE; 2151 uint8_t type; 2152 2153 if (ibuf_get(msg, m, sizeof(m)) == -1 || 2154 ibuf_get_n16(msg, &len) == -1 || 2155 ibuf_get_n8(msg, &type) == -1) 2156 return (NULL); 2157 /* caller MUST make sure we are getting 19 bytes! */ 2158 if (memcmp(m, marker, sizeof(marker))) { 2159 log_peer_warnx(&peer->conf, "sync error"); 2160 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL); 2161 bgp_fsm(peer, EVNT_CON_FATAL, NULL); 2162 errno = EINVAL; 2163 return (NULL); 2164 } 2165 2166 if (peer->capa.ann.ext_msg) 2167 maxlen = MAX_EXT_PKTSIZE; 2168 2169 if (len < MSGSIZE_HEADER || len > maxlen) { 2170 log_peer_warnx(&peer->conf, 2171 "received message: illegal length: %u byte", len); 2172 goto badlen; 2173 } 2174 2175 switch (type) { 2176 case MSG_OPEN: 2177 if (len < MSGSIZE_OPEN_MIN || len > MAX_PKTSIZE) { 2178 log_peer_warnx(&peer->conf, 2179 "received OPEN: illegal len: %u byte", len); 2180 goto badlen; 2181 } 2182 break; 2183 case MSG_NOTIFICATION: 2184 if (len < MSGSIZE_NOTIFICATION_MIN) { 2185 log_peer_warnx(&peer->conf, 2186 "received NOTIFICATION: illegal len: %u byte", len); 2187 goto badlen; 2188 } 2189 break; 2190 case MSG_UPDATE: 2191 if (len < MSGSIZE_UPDATE_MIN) { 2192 log_peer_warnx(&peer->conf, 2193 "received UPDATE: illegal len: %u byte", len); 2194 goto badlen; 2195 } 2196 break; 2197 case MSG_KEEPALIVE: 2198 if (len != MSGSIZE_KEEPALIVE) { 2199 log_peer_warnx(&peer->conf, 2200 "received KEEPALIVE: illegal len: %u byte", len); 2201 goto badlen; 2202 } 2203 break; 2204 case MSG_RREFRESH: 2205 if (len < MSGSIZE_RREFRESH_MIN) { 2206 log_peer_warnx(&peer->conf, 2207 "received RREFRESH: illegal len: %u byte", len); 2208 goto badlen; 2209 } 2210 break; 2211 default: 2212 log_peer_warnx(&peer->conf, 2213 "received msg with unknown type %u", type); 2214 session_notification_data(peer, ERR_HEADER, ERR_HDR_TYPE, 2215 &type, sizeof(type)); 2216 bgp_fsm(peer, EVNT_CON_FATAL, NULL); 2217 errno = EINVAL; 2218 return (NULL); 2219 } 2220 2221 if ((b = ibuf_open(len)) == NULL) 2222 return (NULL); 2223 return (b); 2224 2225 badlen: 2226 len = htons(len); 2227 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2228 &len, sizeof(len)); 2229 bgp_fsm(peer, EVNT_CON_FATAL, NULL); 2230 errno = ERANGE; 2231 return (NULL); 2232 } 2233 2234 int 2235 parse_open(struct peer *peer, struct ibuf *msg) 2236 { 2237 uint8_t version, rversion; 2238 uint16_t short_as; 2239 uint16_t holdtime, myholdtime; 2240 uint32_t as, bgpid; 2241 uint8_t optparamlen; 2242 2243 if (ibuf_get_n8(msg, &version) == -1 || 2244 ibuf_get_n16(msg, &short_as) == -1 || 2245 ibuf_get_n16(msg, &holdtime) == -1 || 2246 ibuf_get_n32(msg, &bgpid) == -1 || 2247 ibuf_get_n8(msg, &optparamlen) == -1) 2248 goto bad_len; 2249 2250 if (version != BGP_VERSION) { 2251 log_peer_warnx(&peer->conf, 2252 "peer wants unrecognized version %u", version); 2253 if (version > BGP_VERSION) 2254 rversion = version - BGP_VERSION; 2255 else 2256 rversion = BGP_VERSION; 2257 session_notification_data(peer, ERR_OPEN, ERR_OPEN_VERSION, 2258 &rversion, sizeof(rversion)); 2259 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2260 return (-1); 2261 } 2262 2263 as = peer->short_as = short_as; 2264 if (as == 0) { 2265 log_peer_warnx(&peer->conf, 2266 "peer requests unacceptable AS %u", as); 2267 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); 2268 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2269 return (-1); 2270 } 2271 2272 if (holdtime && holdtime < peer->conf.min_holdtime) { 2273 log_peer_warnx(&peer->conf, 2274 "peer requests unacceptable holdtime %u", holdtime); 2275 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, NULL); 2276 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2277 return (-1); 2278 } 2279 2280 myholdtime = peer->conf.holdtime; 2281 if (!myholdtime) 2282 myholdtime = conf->holdtime; 2283 if (holdtime < myholdtime) 2284 peer->holdtime = holdtime; 2285 else 2286 peer->holdtime = myholdtime; 2287 2288 /* check bgpid for validity - just disallow 0 */ 2289 if (bgpid == 0) { 2290 log_peer_warnx(&peer->conf, "peer BGPID 0 unacceptable"); 2291 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); 2292 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2293 return (-1); 2294 } 2295 peer->remote_bgpid = bgpid; 2296 2297 if (optparamlen != 0) { 2298 struct ibuf oparams, op; 2299 uint8_t ext_type, op_type; 2300 uint16_t ext_len, op_len; 2301 2302 ibuf_from_ibuf(&oparams, msg); 2303 2304 /* check for RFC9072 encoding */ 2305 if (ibuf_get_n8(&oparams, &ext_type) == -1) 2306 goto bad_len; 2307 if (ext_type == OPT_PARAM_EXT_LEN) { 2308 if (ibuf_get_n16(&oparams, &ext_len) == -1) 2309 goto bad_len; 2310 /* skip RFC9072 header */ 2311 if (ibuf_skip(msg, 3) == -1) 2312 goto bad_len; 2313 } else { 2314 ext_len = optparamlen; 2315 ibuf_rewind(&oparams); 2316 } 2317 2318 if (ibuf_truncate(&oparams, ext_len) == -1 || 2319 ibuf_skip(msg, ext_len) == -1) 2320 goto bad_len; 2321 2322 while (ibuf_size(&oparams) > 0) { 2323 if (ibuf_get_n8(&oparams, &op_type) == -1) 2324 goto bad_len; 2325 2326 if (ext_type == OPT_PARAM_EXT_LEN) { 2327 if (ibuf_get_n16(&oparams, &op_len) == -1) 2328 goto bad_len; 2329 } else { 2330 uint8_t tmp; 2331 if (ibuf_get_n8(&oparams, &tmp) == -1) 2332 goto bad_len; 2333 op_len = tmp; 2334 } 2335 2336 if (ibuf_get_ibuf(&oparams, op_len, &op) == -1) 2337 goto bad_len; 2338 2339 switch (op_type) { 2340 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2341 if (parse_capabilities(peer, &op, &as) == -1) { 2342 session_notification(peer, ERR_OPEN, 0, 2343 NULL); 2344 change_state(peer, STATE_IDLE, 2345 EVNT_RCVD_OPEN); 2346 return (-1); 2347 } 2348 break; 2349 case OPT_PARAM_AUTH: /* deprecated */ 2350 default: 2351 /* 2352 * unsupported type 2353 * the RFCs tell us to leave the data section 2354 * empty and notify the peer with ERR_OPEN, 2355 * ERR_OPEN_OPT. How the peer should know 2356 * _which_ optional parameter we don't support 2357 * is beyond me. 2358 */ 2359 log_peer_warnx(&peer->conf, 2360 "received OPEN message with unsupported " 2361 "optional parameter: type %u", op_type); 2362 session_notification(peer, ERR_OPEN, 2363 ERR_OPEN_OPT, NULL); 2364 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2365 return (-1); 2366 } 2367 } 2368 } 2369 2370 if (ibuf_size(msg) != 0) { 2371 bad_len: 2372 log_peer_warnx(&peer->conf, 2373 "corrupt OPEN message received: length mismatch"); 2374 session_notification(peer, ERR_OPEN, 0, NULL); 2375 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2376 return (-1); 2377 } 2378 2379 /* 2380 * if remote-as is zero and it's a cloned neighbor, accept any 2381 * but only on the first connect, after that the remote-as needs 2382 * to remain the same. 2383 */ 2384 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2385 peer->conf.remote_as = as; 2386 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); 2387 if (!peer->conf.ebgp) 2388 /* force enforce_as off for iBGP sessions */ 2389 peer->conf.enforce_as = ENFORCE_AS_OFF; 2390 } 2391 2392 if (peer->conf.remote_as != as) { 2393 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2394 log_as(as)); 2395 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); 2396 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2397 return (-1); 2398 } 2399 2400 /* on iBGP sessions check for bgpid collision */ 2401 if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) { 2402 struct in_addr ina; 2403 ina.s_addr = htonl(bgpid); 2404 log_peer_warnx(&peer->conf, "peer BGPID %s conflicts with ours", 2405 inet_ntoa(ina)); 2406 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); 2407 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2408 return (-1); 2409 } 2410 2411 if (capa_neg_calc(peer) == -1) { 2412 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2413 return (-1); 2414 } 2415 2416 return (0); 2417 } 2418 2419 int 2420 parse_update(struct peer *peer, struct ibuf *msg) 2421 { 2422 /* 2423 * we pass the message verbatim to the rde. 2424 * in case of errors the whole session is reset with a 2425 * notification anyway, we only need to know the peer 2426 */ 2427 if (imsg_rde(IMSG_UPDATE, peer->conf.id, ibuf_data(msg), 2428 ibuf_size(msg)) == -1) 2429 return (-1); 2430 2431 return (0); 2432 } 2433 2434 int 2435 parse_rrefresh(struct peer *peer, struct ibuf *msg) 2436 { 2437 struct route_refresh rr; 2438 uint16_t afi, datalen; 2439 uint8_t aid, safi, subtype; 2440 2441 datalen = ibuf_size(msg) + MSGSIZE_HEADER; 2442 2443 if (ibuf_get_n16(msg, &afi) == -1 || 2444 ibuf_get_n8(msg, &subtype) == -1 || 2445 ibuf_get_n8(msg, &safi) == -1) { 2446 /* minimum size checked in session_process_msg() */ 2447 fatalx("%s: message too small", __func__); 2448 } 2449 2450 /* check subtype if peer announced enhanced route refresh */ 2451 if (peer->capa.neg.enhanced_rr) { 2452 switch (subtype) { 2453 case ROUTE_REFRESH_REQUEST: 2454 /* no ORF support, so no oversized RREFRESH msgs */ 2455 if (datalen != MSGSIZE_RREFRESH) { 2456 log_peer_warnx(&peer->conf, 2457 "received RREFRESH: illegal len: %u byte", 2458 datalen); 2459 datalen = htons(datalen); 2460 session_notification_data(peer, ERR_HEADER, 2461 ERR_HDR_LEN, &datalen, sizeof(datalen)); 2462 bgp_fsm(peer, EVNT_CON_FATAL, NULL); 2463 return (-1); 2464 } 2465 peer->stats.refresh_rcvd_req++; 2466 break; 2467 case ROUTE_REFRESH_BEGIN_RR: 2468 case ROUTE_REFRESH_END_RR: 2469 /* special handling for RFC7313 */ 2470 if (datalen != MSGSIZE_RREFRESH) { 2471 log_peer_warnx(&peer->conf, 2472 "received RREFRESH: illegal len: %u byte", 2473 datalen); 2474 ibuf_rewind(msg); 2475 session_notification(peer, ERR_RREFRESH, 2476 ERR_RR_INV_LEN, msg); 2477 bgp_fsm(peer, EVNT_CON_FATAL, NULL); 2478 return (-1); 2479 } 2480 if (subtype == ROUTE_REFRESH_BEGIN_RR) 2481 peer->stats.refresh_rcvd_borr++; 2482 else 2483 peer->stats.refresh_rcvd_eorr++; 2484 break; 2485 default: 2486 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2487 "bad subtype %d", subtype); 2488 return (0); 2489 } 2490 } else { 2491 /* force subtype to default */ 2492 subtype = ROUTE_REFRESH_REQUEST; 2493 peer->stats.refresh_rcvd_req++; 2494 } 2495 2496 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2497 if (afi2aid(afi, safi, &aid) == -1) { 2498 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2499 "invalid afi/safi pair"); 2500 return (0); 2501 } 2502 2503 if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) { 2504 log_peer_warnx(&peer->conf, "peer sent unexpected refresh"); 2505 return (0); 2506 } 2507 2508 rr.aid = aid; 2509 rr.subtype = subtype; 2510 2511 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1) 2512 return (-1); 2513 2514 return (0); 2515 } 2516 2517 void 2518 parse_notification(struct peer *peer, struct ibuf *msg) 2519 { 2520 const char *reason = "received"; 2521 uint8_t errcode, subcode; 2522 uint8_t reason_len; 2523 enum session_events event = EVNT_RCVD_NOTIFICATION; 2524 2525 if (ibuf_get_n8(msg, &errcode) == -1 || 2526 ibuf_get_n8(msg, &subcode) == -1) { 2527 log_peer_warnx(&peer->conf, "received bad notification"); 2528 goto done; 2529 } 2530 2531 /* RFC8538: check for hard-reset or graceful notification */ 2532 if (peer->capa.neg.grestart.grnotification) { 2533 if (errcode == ERR_CEASE && subcode == ERR_CEASE_HARD_RESET) { 2534 if (ibuf_get_n8(msg, &errcode) == -1 || 2535 ibuf_get_n8(msg, &subcode) == -1) { 2536 log_peer_warnx(&peer->conf, 2537 "received bad hard-reset notification"); 2538 goto done; 2539 } 2540 reason = "received hard-reset"; 2541 } else { 2542 reason = "received graceful"; 2543 event = EVNT_RCVD_GRACE_NOTIFICATION; 2544 } 2545 } 2546 2547 peer->errcnt++; 2548 peer->stats.last_rcvd_errcode = errcode; 2549 peer->stats.last_rcvd_suberr = subcode; 2550 2551 log_notification(peer, errcode, subcode, msg, reason); 2552 2553 CTASSERT(sizeof(peer->stats.last_reason) > UINT8_MAX); 2554 memset(peer->stats.last_reason, 0, sizeof(peer->stats.last_reason)); 2555 if (errcode == ERR_CEASE && 2556 (subcode == ERR_CEASE_ADMIN_DOWN || 2557 subcode == ERR_CEASE_ADMIN_RESET)) { 2558 /* check if shutdown reason is included */ 2559 if (ibuf_get_n8(msg, &reason_len) != -1 && reason_len != 0) { 2560 if (ibuf_get(msg, peer->stats.last_reason, 2561 reason_len) == -1) 2562 log_peer_warnx(&peer->conf, 2563 "received truncated shutdown reason"); 2564 } 2565 } 2566 2567 done: 2568 change_state(peer, STATE_IDLE, event); 2569 } 2570 2571 int 2572 parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as) 2573 { 2574 struct ibuf capabuf; 2575 uint16_t afi, nhafi, gr_header; 2576 uint8_t capa_code, capa_len; 2577 uint8_t safi, aid, role, flags; 2578 2579 while (ibuf_size(buf) > 0) { 2580 if (ibuf_get_n8(buf, &capa_code) == -1 || 2581 ibuf_get_n8(buf, &capa_len) == -1) { 2582 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2583 "length: too short"); 2584 return (-1); 2585 } 2586 if (ibuf_get_ibuf(buf, capa_len, &capabuf) == -1) { 2587 log_peer_warnx(&peer->conf, 2588 "Received bad capabilities attr length: " 2589 "len %zu smaller than capa_len %u", 2590 ibuf_size(buf), capa_len); 2591 return (-1); 2592 } 2593 2594 switch (capa_code) { 2595 case CAPA_MP: /* RFC 4760 */ 2596 if (capa_len != 4 || 2597 ibuf_get_n16(&capabuf, &afi) == -1 || 2598 ibuf_skip(&capabuf, 1) == -1 || 2599 ibuf_get_n8(&capabuf, &safi) == -1) { 2600 log_peer_warnx(&peer->conf, 2601 "Received bad multi protocol capability"); 2602 break; 2603 } 2604 if (afi2aid(afi, safi, &aid) == -1) { 2605 log_peer_warnx(&peer->conf, 2606 "Received multi protocol capability: " 2607 " unknown AFI %u, safi %u pair", 2608 afi, safi); 2609 peer->capa.peer.mp[AID_UNSPEC] = 1; 2610 break; 2611 } 2612 peer->capa.peer.mp[aid] = 1; 2613 break; 2614 case CAPA_REFRESH: 2615 peer->capa.peer.refresh = 1; 2616 break; 2617 case CAPA_EXT_NEXTHOP: 2618 while (ibuf_size(&capabuf) > 0) { 2619 uint16_t tmp16; 2620 if (ibuf_get_n16(&capabuf, &afi) == -1 || 2621 ibuf_get_n16(&capabuf, &tmp16) == -1 || 2622 ibuf_get_n16(&capabuf, &nhafi) == -1) { 2623 log_peer_warnx(&peer->conf, 2624 "Received bad %s capability", 2625 log_capability(CAPA_EXT_NEXTHOP)); 2626 memset(peer->capa.peer.ext_nh, 0, 2627 sizeof(peer->capa.peer.ext_nh)); 2628 break; 2629 } 2630 safi = tmp16; 2631 if (afi2aid(afi, safi, &aid) == -1 || 2632 !(aid == AID_INET || aid == AID_VPN_IPv4)) { 2633 log_peer_warnx(&peer->conf, 2634 "Received %s capability: " 2635 " unsupported AFI %u, safi %u pair", 2636 log_capability(CAPA_EXT_NEXTHOP), 2637 afi, safi); 2638 continue; 2639 } 2640 if (nhafi != AFI_IPv6) { 2641 log_peer_warnx(&peer->conf, 2642 "Received %s capability: " 2643 " unsupported nexthop AFI %u", 2644 log_capability(CAPA_EXT_NEXTHOP), 2645 nhafi); 2646 continue; 2647 } 2648 peer->capa.peer.ext_nh[aid] = 1; 2649 } 2650 break; 2651 case CAPA_EXT_MSG: 2652 peer->capa.peer.ext_msg = 1; 2653 break; 2654 case CAPA_ROLE: 2655 if (capa_len != 1 || 2656 ibuf_get_n8(&capabuf, &role) == -1) { 2657 log_peer_warnx(&peer->conf, 2658 "Received bad role capability"); 2659 break; 2660 } 2661 if (!peer->conf.ebgp) { 2662 log_peer_warnx(&peer->conf, 2663 "Received role capability on iBGP session"); 2664 break; 2665 } 2666 peer->capa.peer.policy = 1; 2667 peer->remote_role = capa2role(role); 2668 break; 2669 case CAPA_RESTART: 2670 if (capa_len == 2) { 2671 /* peer only supports EoR marker */ 2672 peer->capa.peer.grestart.restart = 1; 2673 peer->capa.peer.grestart.timeout = 0; 2674 break; 2675 } else if (capa_len % 4 != 2) { 2676 log_peer_warnx(&peer->conf, 2677 "Bad graceful restart capability"); 2678 peer->capa.peer.grestart.restart = 0; 2679 peer->capa.peer.grestart.timeout = 0; 2680 break; 2681 } 2682 2683 if (ibuf_get_n16(&capabuf, &gr_header) == -1) { 2684 bad_gr_restart: 2685 log_peer_warnx(&peer->conf, 2686 "Bad graceful restart capability"); 2687 peer->capa.peer.grestart.restart = 0; 2688 peer->capa.peer.grestart.timeout = 0; 2689 break; 2690 } 2691 2692 peer->capa.peer.grestart.timeout = 2693 gr_header & CAPA_GR_TIMEMASK; 2694 if (peer->capa.peer.grestart.timeout == 0) { 2695 log_peer_warnx(&peer->conf, "Received " 2696 "graceful restart with zero timeout"); 2697 peer->capa.peer.grestart.restart = 0; 2698 break; 2699 } 2700 2701 while (ibuf_size(&capabuf) > 0) { 2702 if (ibuf_get_n16(&capabuf, &afi) == -1 || 2703 ibuf_get_n8(&capabuf, &safi) == -1 || 2704 ibuf_get_n8(&capabuf, &flags) == -1) 2705 goto bad_gr_restart; 2706 if (afi2aid(afi, safi, &aid) == -1) { 2707 log_peer_warnx(&peer->conf, 2708 "Received graceful restart capa: " 2709 " unknown AFI %u, safi %u pair", 2710 afi, safi); 2711 continue; 2712 } 2713 peer->capa.peer.grestart.flags[aid] |= 2714 CAPA_GR_PRESENT; 2715 if (flags & CAPA_GR_F_FLAG) 2716 peer->capa.peer.grestart.flags[aid] |= 2717 CAPA_GR_FORWARD; 2718 if (gr_header & CAPA_GR_R_FLAG) 2719 peer->capa.peer.grestart.flags[aid] |= 2720 CAPA_GR_RESTART; 2721 peer->capa.peer.grestart.restart = 2; 2722 } 2723 if (gr_header & CAPA_GR_N_FLAG) 2724 peer->capa.peer.grestart.grnotification = 1; 2725 break; 2726 case CAPA_AS4BYTE: 2727 if (capa_len != 4 || 2728 ibuf_get_n32(&capabuf, as) == -1) { 2729 log_peer_warnx(&peer->conf, 2730 "Received bad AS4BYTE capability"); 2731 peer->capa.peer.as4byte = 0; 2732 break; 2733 } 2734 if (*as == 0) { 2735 log_peer_warnx(&peer->conf, 2736 "peer requests unacceptable AS %u", *as); 2737 session_notification(peer, ERR_OPEN, 2738 ERR_OPEN_AS, NULL); 2739 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2740 return (-1); 2741 } 2742 peer->capa.peer.as4byte = 1; 2743 break; 2744 case CAPA_ADD_PATH: 2745 if (capa_len % 4 != 0) { 2746 bad_add_path: 2747 log_peer_warnx(&peer->conf, 2748 "Received bad ADD-PATH capability"); 2749 memset(peer->capa.peer.add_path, 0, 2750 sizeof(peer->capa.peer.add_path)); 2751 break; 2752 } 2753 while (ibuf_size(&capabuf) > 0) { 2754 if (ibuf_get_n16(&capabuf, &afi) == -1 || 2755 ibuf_get_n8(&capabuf, &safi) == -1 || 2756 ibuf_get_n8(&capabuf, &flags) == -1) 2757 goto bad_add_path; 2758 if (afi2aid(afi, safi, &aid) == -1) { 2759 log_peer_warnx(&peer->conf, 2760 "Received ADD-PATH capa: " 2761 " unknown AFI %u, safi %u pair", 2762 afi, safi); 2763 memset(peer->capa.peer.add_path, 0, 2764 sizeof(peer->capa.peer.add_path)); 2765 break; 2766 } 2767 if (flags & ~CAPA_AP_BIDIR) { 2768 log_peer_warnx(&peer->conf, 2769 "Received ADD-PATH capa: " 2770 " bad flags %x", flags); 2771 memset(peer->capa.peer.add_path, 0, 2772 sizeof(peer->capa.peer.add_path)); 2773 break; 2774 } 2775 peer->capa.peer.add_path[aid] = flags; 2776 } 2777 break; 2778 case CAPA_ENHANCED_RR: 2779 peer->capa.peer.enhanced_rr = 1; 2780 break; 2781 default: 2782 break; 2783 } 2784 } 2785 2786 return (0); 2787 } 2788 2789 int 2790 capa_neg_calc(struct peer *p) 2791 { 2792 struct ibuf *ebuf; 2793 uint8_t i, hasmp = 0, capa_code, capa_len, capa_aid = 0; 2794 2795 /* a capability is accepted only if both sides announced it */ 2796 2797 p->capa.neg.refresh = 2798 (p->capa.ann.refresh && p->capa.peer.refresh) != 0; 2799 p->capa.neg.enhanced_rr = 2800 (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; 2801 p->capa.neg.as4byte = 2802 (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; 2803 p->capa.neg.ext_msg = 2804 (p->capa.ann.ext_msg && p->capa.peer.ext_msg) != 0; 2805 2806 /* MP: both side must agree on the AFI,SAFI pair */ 2807 if (p->capa.peer.mp[AID_UNSPEC]) 2808 hasmp = 1; 2809 for (i = AID_MIN; i < AID_MAX; i++) { 2810 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) 2811 p->capa.neg.mp[i] = 1; 2812 else 2813 p->capa.neg.mp[i] = 0; 2814 if (p->capa.ann.mp[i] || p->capa.peer.mp[i]) 2815 hasmp = 1; 2816 } 2817 /* if no MP capability present default to IPv4 unicast mode */ 2818 if (!hasmp) 2819 p->capa.neg.mp[AID_INET] = 1; 2820 2821 /* 2822 * graceful restart: the peer capabilities are of interest here. 2823 * It is necessary to compare the new values with the previous ones 2824 * and act accordingly. AFI/SAFI that are not part in the MP capability 2825 * are treated as not being present. 2826 * Also make sure that a flush happens if the session stopped 2827 * supporting graceful restart. 2828 */ 2829 2830 for (i = AID_MIN; i < AID_MAX; i++) { 2831 int8_t negflags; 2832 2833 /* disable GR if the AFI/SAFI is not present */ 2834 if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2835 p->capa.neg.mp[i] == 0)) 2836 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2837 /* look at current GR state and decide what to do */ 2838 negflags = p->capa.neg.grestart.flags[i]; 2839 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2840 if (negflags & CAPA_GR_RESTARTING) { 2841 if (p->capa.ann.grestart.restart != 0 && 2842 p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { 2843 p->capa.neg.grestart.flags[i] |= 2844 CAPA_GR_RESTARTING; 2845 } else { 2846 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 2847 &i, sizeof(i)) == -1) { 2848 log_peer_warnx(&p->conf, 2849 "imsg send failed"); 2850 return (-1); 2851 } 2852 log_peer_warnx(&p->conf, "graceful restart of " 2853 "%s, not restarted, flushing", aid2str(i)); 2854 } 2855 } 2856 } 2857 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2858 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2859 if (p->capa.ann.grestart.restart == 0) 2860 p->capa.neg.grestart.restart = 0; 2861 2862 /* RFC 8538 graceful notification: both sides need to agree */ 2863 p->capa.neg.grestart.grnotification = 2864 (p->capa.ann.grestart.grnotification && 2865 p->capa.peer.grestart.grnotification) != 0; 2866 2867 /* RFC 8950 extended nexthop encoding: both sides need to agree */ 2868 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); 2869 for (i = AID_MIN; i < AID_MAX; i++) { 2870 if (p->capa.neg.mp[i] == 0) 2871 continue; 2872 if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) { 2873 p->capa.neg.ext_nh[i] = 1; 2874 } 2875 } 2876 2877 /* 2878 * ADD-PATH: set only those bits where both sides agree. 2879 * For this compare our send bit with the recv bit from the peer 2880 * and vice versa. 2881 * The flags are stored from this systems view point. 2882 * At index 0 the flags are set if any per-AID flag is set. 2883 */ 2884 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); 2885 for (i = AID_MIN; i < AID_MAX; i++) { 2886 if (p->capa.neg.mp[i] == 0) 2887 continue; 2888 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) && 2889 (p->capa.peer.add_path[i] & CAPA_AP_SEND)) { 2890 p->capa.neg.add_path[i] |= CAPA_AP_RECV; 2891 p->capa.neg.add_path[0] |= CAPA_AP_RECV; 2892 } 2893 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) && 2894 (p->capa.peer.add_path[i] & CAPA_AP_RECV)) { 2895 p->capa.neg.add_path[i] |= CAPA_AP_SEND; 2896 p->capa.neg.add_path[0] |= CAPA_AP_SEND; 2897 } 2898 } 2899 2900 /* 2901 * Open policy: check that the policy is sensible. 2902 * 2903 * Make sure that the roles match and set the negotiated capability 2904 * to the role of the peer. So the RDE can inject the OTC attribute. 2905 * See RFC 9234, section 4.2. 2906 * These checks should only happen on ebgp sessions. 2907 */ 2908 if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 && 2909 p->conf.ebgp) { 2910 switch (p->conf.role) { 2911 case ROLE_PROVIDER: 2912 if (p->remote_role != ROLE_CUSTOMER) 2913 goto policyfail; 2914 break; 2915 case ROLE_RS: 2916 if (p->remote_role != ROLE_RS_CLIENT) 2917 goto policyfail; 2918 break; 2919 case ROLE_RS_CLIENT: 2920 if (p->remote_role != ROLE_RS) 2921 goto policyfail; 2922 break; 2923 case ROLE_CUSTOMER: 2924 if (p->remote_role != ROLE_PROVIDER) 2925 goto policyfail; 2926 break; 2927 case ROLE_PEER: 2928 if (p->remote_role != ROLE_PEER) 2929 goto policyfail; 2930 break; 2931 default: 2932 policyfail: 2933 log_peer_warnx(&p->conf, "open policy role mismatch: " 2934 "our role %s, their role %s", 2935 log_policy(p->conf.role), 2936 log_policy(p->remote_role)); 2937 session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); 2938 return (-1); 2939 } 2940 p->capa.neg.policy = 1; 2941 } 2942 2943 /* enforce presence of open policy role capability */ 2944 if (p->capa.ann.policy == 2 && p->capa.peer.policy == 0 && 2945 p->conf.ebgp) { 2946 log_peer_warnx(&p->conf, "open policy role enforced but " 2947 "not present"); 2948 session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); 2949 return (-1); 2950 } 2951 2952 /* enforce presence of other capabilities */ 2953 if (p->capa.ann.refresh == 2 && p->capa.neg.refresh == 0) { 2954 capa_code = CAPA_REFRESH; 2955 capa_len = 0; 2956 goto fail; 2957 } 2958 /* enforce presence of other capabilities */ 2959 if (p->capa.ann.ext_msg == 2 && p->capa.neg.ext_msg == 0) { 2960 capa_code = CAPA_EXT_MSG; 2961 capa_len = 0; 2962 goto fail; 2963 } 2964 if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) { 2965 capa_code = CAPA_ENHANCED_RR; 2966 capa_len = 0; 2967 goto fail; 2968 } 2969 if (p->capa.ann.as4byte == 2 && p->capa.neg.as4byte == 0) { 2970 capa_code = CAPA_AS4BYTE; 2971 capa_len = 4; 2972 goto fail; 2973 } 2974 if (p->capa.ann.grestart.restart == 2 && 2975 p->capa.neg.grestart.restart == 0) { 2976 capa_code = CAPA_RESTART; 2977 capa_len = 2; 2978 goto fail; 2979 } 2980 for (i = AID_MIN; i < AID_MAX; i++) { 2981 if (p->capa.ann.mp[i] == 2 && p->capa.neg.mp[i] == 0) { 2982 capa_code = CAPA_MP; 2983 capa_len = 4; 2984 capa_aid = i; 2985 goto fail; 2986 } 2987 } 2988 2989 for (i = AID_MIN; i < AID_MAX; i++) { 2990 if (p->capa.neg.mp[i] == 0) 2991 continue; 2992 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV_ENFORCE) && 2993 (p->capa.neg.add_path[i] & CAPA_AP_RECV) == 0) { 2994 capa_code = CAPA_ADD_PATH; 2995 capa_len = 4; 2996 capa_aid = i; 2997 goto fail; 2998 } 2999 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND_ENFORCE) && 3000 (p->capa.neg.add_path[i] & CAPA_AP_SEND) == 0) { 3001 capa_code = CAPA_ADD_PATH; 3002 capa_len = 4; 3003 capa_aid = i; 3004 goto fail; 3005 } 3006 } 3007 3008 for (i = AID_MIN; i < AID_MAX; i++) { 3009 if (p->capa.neg.mp[i] == 0) 3010 continue; 3011 if (p->capa.ann.ext_nh[i] == 2 && 3012 p->capa.neg.ext_nh[i] == 0) { 3013 capa_code = CAPA_EXT_NEXTHOP; 3014 capa_len = 6; 3015 capa_aid = i; 3016 goto fail; 3017 } 3018 } 3019 return (0); 3020 3021 fail: 3022 if ((ebuf = ibuf_dynamic(2, 256)) == NULL) 3023 return (-1); 3024 /* best effort, no problem if it fails */ 3025 session_capa_add(ebuf, capa_code, capa_len); 3026 if (capa_code == CAPA_MP) 3027 session_capa_add_mp(ebuf, capa_aid); 3028 else if (capa_code == CAPA_ADD_PATH) 3029 session_capa_add_afi(ebuf, capa_aid, 0); 3030 else if (capa_code == CAPA_EXT_NEXTHOP) 3031 session_capa_add_ext_nh(ebuf, capa_aid); 3032 else if (capa_len > 0) 3033 ibuf_add_zero(ebuf, capa_len); 3034 3035 session_notification(p, ERR_OPEN, ERR_OPEN_CAPA, ebuf); 3036 ibuf_free(ebuf); 3037 return (-1); 3038 } 3039 3040 void 3041 session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt) 3042 { 3043 struct imsg imsg; 3044 struct ibuf ibuf; 3045 struct mrt xmrt; 3046 struct route_refresh rr; 3047 struct mrt *mrt; 3048 struct imsgbuf *i; 3049 struct peer *p; 3050 struct listen_addr *la, *next, nla; 3051 struct session_dependon sdon; 3052 struct bgpd_config tconf; 3053 uint32_t peerid; 3054 int n, fd, depend_ok, restricted; 3055 uint16_t t; 3056 uint8_t aid, errcode, subcode; 3057 3058 while (imsgbuf) { 3059 if ((n = imsg_get(imsgbuf, &imsg)) == -1) 3060 fatal("session_dispatch_imsg: imsg_get error"); 3061 3062 if (n == 0) 3063 break; 3064 3065 peerid = imsg_get_id(&imsg); 3066 switch (imsg_get_type(&imsg)) { 3067 case IMSG_SOCKET_CONN: 3068 case IMSG_SOCKET_CONN_CTL: 3069 if (idx != PFD_PIPE_MAIN) 3070 fatalx("reconf request not from parent"); 3071 if ((fd = imsg_get_fd(&imsg)) == -1) { 3072 log_warnx("expected to receive imsg fd to " 3073 "RDE but didn't receive any"); 3074 break; 3075 } 3076 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 3077 fatal(NULL); 3078 if (imsgbuf_init(i, fd) == -1 || 3079 imsgbuf_set_maxsize(i, MAX_BGPD_IMSGSIZE) == -1) 3080 fatal(NULL); 3081 if (imsg_get_type(&imsg) == IMSG_SOCKET_CONN) { 3082 if (ibuf_rde) { 3083 log_warnx("Unexpected imsg connection " 3084 "to RDE received"); 3085 imsgbuf_clear(ibuf_rde); 3086 free(ibuf_rde); 3087 } 3088 ibuf_rde = i; 3089 } else { 3090 if (ibuf_rde_ctl) { 3091 log_warnx("Unexpected imsg ctl " 3092 "connection to RDE received"); 3093 imsgbuf_clear(ibuf_rde_ctl); 3094 free(ibuf_rde_ctl); 3095 } 3096 ibuf_rde_ctl = i; 3097 } 3098 break; 3099 case IMSG_RECONF_CONF: 3100 if (idx != PFD_PIPE_MAIN) 3101 fatalx("reconf request not from parent"); 3102 if (imsg_get_data(&imsg, &tconf, sizeof(tconf)) == -1) 3103 fatal("imsg_get_data"); 3104 3105 nconf = new_config(); 3106 copy_config(nconf, &tconf); 3107 pending_reconf = 1; 3108 break; 3109 case IMSG_RECONF_PEER: 3110 if (idx != PFD_PIPE_MAIN) 3111 fatalx("reconf request not from parent"); 3112 if ((p = calloc(1, sizeof(struct peer))) == NULL) 3113 fatal("new_peer"); 3114 if (imsg_get_data(&imsg, &p->conf, sizeof(p->conf)) == 3115 -1) 3116 fatal("imsg_get_data"); 3117 p->state = p->prev_state = STATE_NONE; 3118 p->reconf_action = RECONF_REINIT; 3119 if (RB_INSERT(peer_head, &nconf->peers, p) != NULL) 3120 fatalx("%s: peer tree is corrupt", __func__); 3121 break; 3122 case IMSG_RECONF_PEER_AUTH: 3123 if (idx != PFD_PIPE_MAIN) 3124 fatalx("reconf request not from parent"); 3125 if ((p = getpeerbyid(nconf, peerid)) == NULL) { 3126 log_warnx("no such peer: id=%u", peerid); 3127 break; 3128 } 3129 if (pfkey_recv_conf(p, &imsg) == -1) 3130 fatal("pfkey_recv_conf"); 3131 break; 3132 case IMSG_RECONF_LISTENER: 3133 if (idx != PFD_PIPE_MAIN) 3134 fatalx("reconf request not from parent"); 3135 if (nconf == NULL) 3136 fatalx("IMSG_RECONF_LISTENER but no config"); 3137 if (imsg_get_data(&imsg, &nla, sizeof(nla)) == -1) 3138 fatal("imsg_get_data"); 3139 TAILQ_FOREACH(la, conf->listen_addrs, entry) 3140 if (!la_cmp(la, &nla)) 3141 break; 3142 3143 if (la == NULL) { 3144 if (nla.reconf != RECONF_REINIT) 3145 fatalx("king bula sez: " 3146 "expected REINIT"); 3147 3148 if ((nla.fd = imsg_get_fd(&imsg)) == -1) 3149 log_warnx("expected to receive fd for " 3150 "%s but didn't receive any", 3151 log_sockaddr((struct sockaddr *) 3152 &nla.sa, nla.sa_len)); 3153 3154 la = calloc(1, sizeof(struct listen_addr)); 3155 if (la == NULL) 3156 fatal(NULL); 3157 memcpy(&la->sa, &nla.sa, sizeof(la->sa)); 3158 la->flags = nla.flags; 3159 la->fd = nla.fd; 3160 la->reconf = RECONF_REINIT; 3161 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 3162 entry); 3163 } else { 3164 if (nla.reconf != RECONF_KEEP) 3165 fatalx("king bula sez: expected KEEP"); 3166 la->reconf = RECONF_KEEP; 3167 } 3168 3169 break; 3170 case IMSG_RECONF_CTRL: 3171 if (idx != PFD_PIPE_MAIN) 3172 fatalx("reconf request not from parent"); 3173 3174 if (imsg_get_data(&imsg, &restricted, 3175 sizeof(restricted)) == -1) 3176 fatal("imsg_get_data"); 3177 if ((fd = imsg_get_fd(&imsg)) == -1) { 3178 log_warnx("expected to receive fd for control " 3179 "socket but didn't receive any"); 3180 break; 3181 } 3182 if (restricted) { 3183 control_shutdown(rcsock); 3184 rcsock = fd; 3185 } else { 3186 control_shutdown(csock); 3187 csock = fd; 3188 } 3189 break; 3190 case IMSG_RECONF_DRAIN: 3191 switch (idx) { 3192 case PFD_PIPE_ROUTE: 3193 if (nconf != NULL) 3194 fatalx("got unexpected %s from RDE", 3195 "IMSG_RECONF_DONE"); 3196 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 3197 -1, NULL, 0); 3198 break; 3199 case PFD_PIPE_MAIN: 3200 if (nconf == NULL) 3201 fatalx("got unexpected %s from parent", 3202 "IMSG_RECONF_DONE"); 3203 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 3204 -1, NULL, 0); 3205 break; 3206 default: 3207 fatalx("reconf request not from parent or RDE"); 3208 } 3209 break; 3210 case IMSG_RECONF_DONE: 3211 if (idx != PFD_PIPE_MAIN) 3212 fatalx("reconf request not from parent"); 3213 if (nconf == NULL) 3214 fatalx("got IMSG_RECONF_DONE but no config"); 3215 copy_config(conf, nconf); 3216 merge_peers(conf, nconf); 3217 3218 /* delete old listeners */ 3219 TAILQ_FOREACH_SAFE(la, conf->listen_addrs, entry, 3220 next) { 3221 if (la->reconf == RECONF_NONE) { 3222 log_info("not listening on %s any more", 3223 log_sockaddr((struct sockaddr *) 3224 &la->sa, la->sa_len)); 3225 TAILQ_REMOVE(conf->listen_addrs, la, 3226 entry); 3227 close(la->fd); 3228 free(la); 3229 } 3230 } 3231 3232 /* add new listeners */ 3233 TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs, 3234 entry); 3235 3236 setup_listeners(listener_cnt); 3237 free_config(nconf); 3238 nconf = NULL; 3239 pending_reconf = 0; 3240 log_info("SE reconfigured"); 3241 /* 3242 * IMSG_RECONF_DONE is sent when the RDE drained 3243 * the peer config sent in merge_peers(). 3244 */ 3245 break; 3246 case IMSG_SESSION_DEPENDON: 3247 if (idx != PFD_PIPE_MAIN) 3248 fatalx("IFINFO message not from parent"); 3249 if (imsg_get_data(&imsg, &sdon, sizeof(sdon)) == -1) 3250 fatalx("DEPENDON imsg with wrong len"); 3251 depend_ok = sdon.depend_state; 3252 3253 RB_FOREACH(p, peer_head, &conf->peers) 3254 if (!strcmp(p->conf.if_depend, sdon.ifname)) { 3255 if (depend_ok && !p->depend_ok) { 3256 p->depend_ok = depend_ok; 3257 bgp_fsm(p, EVNT_START, NULL); 3258 } else if (!depend_ok && p->depend_ok) { 3259 p->depend_ok = depend_ok; 3260 session_stop(p, 3261 ERR_CEASE_OTHER_CHANGE, 3262 NULL); 3263 } 3264 } 3265 break; 3266 case IMSG_MRT_OPEN: 3267 case IMSG_MRT_REOPEN: 3268 if (idx != PFD_PIPE_MAIN) 3269 fatalx("mrt request not from parent"); 3270 if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) { 3271 log_warnx("mrt open, wrong imsg len"); 3272 break; 3273 } 3274 3275 if ((xmrt.fd = imsg_get_fd(&imsg)) == -1) { 3276 log_warnx("expected to receive fd for mrt dump " 3277 "but didn't receive any"); 3278 break; 3279 } 3280 3281 mrt = mrt_get(&mrthead, &xmrt); 3282 if (mrt == NULL) { 3283 /* new dump */ 3284 mrt = calloc(1, sizeof(struct mrt)); 3285 if (mrt == NULL) 3286 fatal("session_dispatch_imsg"); 3287 memcpy(mrt, &xmrt, sizeof(struct mrt)); 3288 if ((mrt->wbuf = msgbuf_new()) == NULL) 3289 fatal("session_dispatch_imsg"); 3290 LIST_INSERT_HEAD(&mrthead, mrt, entry); 3291 } else { 3292 /* old dump reopened */ 3293 close(mrt->fd); 3294 } 3295 mrt->fd = xmrt.fd; 3296 break; 3297 case IMSG_MRT_CLOSE: 3298 if (idx != PFD_PIPE_MAIN) 3299 fatalx("mrt request not from parent"); 3300 if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) { 3301 log_warnx("mrt close, wrong imsg len"); 3302 break; 3303 } 3304 3305 mrt = mrt_get(&mrthead, &xmrt); 3306 if (mrt != NULL) 3307 mrt_done(mrt); 3308 break; 3309 case IMSG_CTL_KROUTE: 3310 case IMSG_CTL_KROUTE_ADDR: 3311 case IMSG_CTL_SHOW_NEXTHOP: 3312 case IMSG_CTL_SHOW_INTERFACE: 3313 case IMSG_CTL_SHOW_FIB_TABLES: 3314 case IMSG_CTL_SHOW_RTR: 3315 case IMSG_CTL_SHOW_TIMER: 3316 if (idx != PFD_PIPE_MAIN) 3317 fatalx("ctl kroute request not from parent"); 3318 control_imsg_relay(&imsg, NULL); 3319 break; 3320 case IMSG_CTL_SHOW_NEIGHBOR: 3321 if (idx != PFD_PIPE_ROUTE_CTL) 3322 fatalx("ctl rib request not from RDE"); 3323 p = getpeerbyid(conf, peerid); 3324 control_imsg_relay(&imsg, p); 3325 break; 3326 case IMSG_CTL_SHOW_RIB: 3327 case IMSG_CTL_SHOW_RIB_PREFIX: 3328 case IMSG_CTL_SHOW_RIB_COMMUNITIES: 3329 case IMSG_CTL_SHOW_RIB_ATTR: 3330 case IMSG_CTL_SHOW_RIB_MEM: 3331 case IMSG_CTL_SHOW_NETWORK: 3332 case IMSG_CTL_SHOW_FLOWSPEC: 3333 case IMSG_CTL_SHOW_SET: 3334 if (idx != PFD_PIPE_ROUTE_CTL) 3335 fatalx("ctl rib request not from RDE"); 3336 control_imsg_relay(&imsg, NULL); 3337 break; 3338 case IMSG_CTL_END: 3339 case IMSG_CTL_RESULT: 3340 control_imsg_relay(&imsg, NULL); 3341 break; 3342 case IMSG_UPDATE: 3343 if (idx != PFD_PIPE_ROUTE) 3344 fatalx("update request not from RDE"); 3345 if (imsg_get_ibuf(&imsg, &ibuf) == -1) 3346 log_warn("RDE sent invalid update"); 3347 else 3348 session_update(peerid, &ibuf); 3349 break; 3350 case IMSG_UPDATE_ERR: 3351 if (idx != PFD_PIPE_ROUTE) 3352 fatalx("update request not from RDE"); 3353 if ((p = getpeerbyid(conf, peerid)) == NULL) { 3354 log_warnx("no such peer: id=%u", peerid); 3355 break; 3356 } 3357 if (imsg_get_ibuf(&imsg, &ibuf) == -1 || 3358 ibuf_get_n8(&ibuf, &errcode) == -1 || 3359 ibuf_get_n8(&ibuf, &subcode) == -1) { 3360 log_warnx("RDE sent invalid notification"); 3361 break; 3362 } 3363 3364 session_notification(p, errcode, subcode, &ibuf); 3365 switch (errcode) { 3366 case ERR_CEASE: 3367 switch (subcode) { 3368 case ERR_CEASE_MAX_PREFIX: 3369 case ERR_CEASE_MAX_SENT_PREFIX: 3370 t = p->conf.max_out_prefix_restart; 3371 if (subcode == ERR_CEASE_MAX_PREFIX) 3372 t = p->conf.max_prefix_restart; 3373 3374 bgp_fsm(p, EVNT_STOP, NULL); 3375 if (t) 3376 timer_set(&p->timers, 3377 Timer_IdleHold, 60 * t); 3378 break; 3379 default: 3380 bgp_fsm(p, EVNT_CON_FATAL, NULL); 3381 break; 3382 } 3383 break; 3384 default: 3385 bgp_fsm(p, EVNT_CON_FATAL, NULL); 3386 break; 3387 } 3388 break; 3389 case IMSG_REFRESH: 3390 if (idx != PFD_PIPE_ROUTE) 3391 fatalx("route refresh request not from RDE"); 3392 if (imsg_get_data(&imsg, &rr, sizeof(rr)) == -1) { 3393 log_warnx("RDE sent invalid refresh msg"); 3394 break; 3395 } 3396 if ((p = getpeerbyid(conf, peerid)) == NULL) { 3397 log_warnx("no such peer: id=%u", peerid); 3398 break; 3399 } 3400 if (rr.aid < AID_MIN || rr.aid >= AID_MAX) 3401 fatalx("IMSG_REFRESH: bad AID"); 3402 session_rrefresh(p, rr.aid, rr.subtype); 3403 break; 3404 case IMSG_SESSION_RESTARTED: 3405 if (idx != PFD_PIPE_ROUTE) 3406 fatalx("session restart not from RDE"); 3407 if (imsg_get_data(&imsg, &aid, sizeof(aid)) == -1) { 3408 log_warnx("RDE sent invalid restart msg"); 3409 break; 3410 } 3411 if ((p = getpeerbyid(conf, peerid)) == NULL) { 3412 log_warnx("no such peer: id=%u", peerid); 3413 break; 3414 } 3415 if (aid < AID_MIN || aid >= AID_MAX) 3416 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 3417 if (p->capa.neg.grestart.flags[aid] & 3418 CAPA_GR_RESTARTING) { 3419 log_peer_warnx(&p->conf, 3420 "graceful restart of %s finished", 3421 aid2str(aid)); 3422 p->capa.neg.grestart.flags[aid] &= 3423 ~CAPA_GR_RESTARTING; 3424 timer_stop(&p->timers, Timer_RestartTimeout); 3425 3426 /* signal back to RDE to cleanup stale routes */ 3427 if (imsg_rde(IMSG_SESSION_RESTARTED, 3428 peerid, &aid, sizeof(aid)) == -1) 3429 fatal("imsg_compose: " 3430 "IMSG_SESSION_RESTARTED"); 3431 } 3432 break; 3433 default: 3434 break; 3435 } 3436 imsg_free(&imsg); 3437 } 3438 } 3439 3440 int 3441 la_cmp(struct listen_addr *a, struct listen_addr *b) 3442 { 3443 struct sockaddr_in *in_a, *in_b; 3444 struct sockaddr_in6 *in6_a, *in6_b; 3445 3446 if (a->sa.ss_family != b->sa.ss_family) 3447 return (1); 3448 3449 switch (a->sa.ss_family) { 3450 case AF_INET: 3451 in_a = (struct sockaddr_in *)&a->sa; 3452 in_b = (struct sockaddr_in *)&b->sa; 3453 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 3454 return (1); 3455 if (in_a->sin_port != in_b->sin_port) 3456 return (1); 3457 break; 3458 case AF_INET6: 3459 in6_a = (struct sockaddr_in6 *)&a->sa; 3460 in6_b = (struct sockaddr_in6 *)&b->sa; 3461 if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 3462 sizeof(struct in6_addr))) 3463 return (1); 3464 if (in6_a->sin6_port != in6_b->sin6_port) 3465 return (1); 3466 break; 3467 default: 3468 fatal("king bula sez: unknown address family"); 3469 /* NOTREACHED */ 3470 } 3471 3472 return (0); 3473 } 3474 3475 struct peer * 3476 getpeerbydesc(struct bgpd_config *c, const char *descr) 3477 { 3478 struct peer *p, *res = NULL; 3479 int match = 0; 3480 3481 RB_FOREACH(p, peer_head, &c->peers) 3482 if (!strcmp(p->conf.descr, descr)) { 3483 res = p; 3484 match++; 3485 } 3486 3487 if (match > 1) 3488 log_info("neighbor description \"%s\" not unique, request " 3489 "aborted", descr); 3490 3491 if (match == 1) 3492 return (res); 3493 else 3494 return (NULL); 3495 } 3496 3497 struct peer * 3498 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip) 3499 { 3500 struct bgpd_addr addr; 3501 struct peer *p, *newpeer, *loose = NULL; 3502 uint32_t id; 3503 3504 sa2addr(ip, &addr, NULL); 3505 3506 /* we might want a more effective way to find peers by IP */ 3507 RB_FOREACH(p, peer_head, &c->peers) 3508 if (!p->conf.template && 3509 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3510 return (p); 3511 3512 /* try template matching */ 3513 RB_FOREACH(p, peer_head, &c->peers) 3514 if (p->conf.template && 3515 p->conf.remote_addr.aid == addr.aid && 3516 session_match_mask(p, &addr)) 3517 if (loose == NULL || loose->conf.remote_masklen < 3518 p->conf.remote_masklen) 3519 loose = p; 3520 3521 if (loose != NULL) { 3522 /* clone */ 3523 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3524 fatal(NULL); 3525 memcpy(newpeer, loose, sizeof(struct peer)); 3526 for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) { 3527 if (getpeerbyid(c, id) == NULL) /* we found a free id */ 3528 break; 3529 } 3530 newpeer->template = loose; 3531 session_template_clone(newpeer, ip, id, 0); 3532 newpeer->state = newpeer->prev_state = STATE_NONE; 3533 newpeer->reconf_action = RECONF_KEEP; 3534 newpeer->rpending = 0; 3535 newpeer->wbuf = NULL; 3536 init_peer(newpeer); 3537 /* start delete timer, it is stopped when session goes up. */ 3538 timer_set(&newpeer->timers, Timer_SessionDown, 3539 INTERVAL_SESSION_DOWN); 3540 bgp_fsm(newpeer, EVNT_START, NULL); 3541 if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL) 3542 fatalx("%s: peer tree is corrupt", __func__); 3543 return (newpeer); 3544 } 3545 3546 return (NULL); 3547 } 3548 3549 struct peer * 3550 getpeerbyid(struct bgpd_config *c, uint32_t peerid) 3551 { 3552 static struct peer lookup; 3553 3554 lookup.conf.id = peerid; 3555 3556 return RB_FIND(peer_head, &c->peers, &lookup); 3557 } 3558 3559 int 3560 peer_matched(struct peer *p, struct ctl_neighbor *n) 3561 { 3562 char *s; 3563 3564 if (n && n->addr.aid) { 3565 if (memcmp(&p->conf.remote_addr, &n->addr, 3566 sizeof(p->conf.remote_addr))) 3567 return 0; 3568 } else if (n && n->descr[0]) { 3569 s = n->is_group ? p->conf.group : p->conf.descr; 3570 /* cannot trust n->descr to be properly terminated */ 3571 if (strncmp(s, n->descr, sizeof(n->descr))) 3572 return 0; 3573 } 3574 return 1; 3575 } 3576 3577 void 3578 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id, 3579 uint32_t as) 3580 { 3581 struct bgpd_addr remote_addr; 3582 3583 if (ip) 3584 sa2addr(ip, &remote_addr, NULL); 3585 else 3586 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3587 3588 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3589 3590 p->conf.id = id; 3591 3592 if (as) { 3593 p->conf.remote_as = as; 3594 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as); 3595 if (!p->conf.ebgp) 3596 /* force enforce_as off for iBGP sessions */ 3597 p->conf.enforce_as = ENFORCE_AS_OFF; 3598 } 3599 3600 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3601 switch (p->conf.remote_addr.aid) { 3602 case AID_INET: 3603 p->conf.remote_masklen = 32; 3604 break; 3605 case AID_INET6: 3606 p->conf.remote_masklen = 128; 3607 break; 3608 } 3609 p->conf.template = 0; 3610 } 3611 3612 int 3613 session_match_mask(struct peer *p, struct bgpd_addr *a) 3614 { 3615 struct bgpd_addr masked; 3616 3617 applymask(&masked, a, p->conf.remote_masklen); 3618 if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0) 3619 return (1); 3620 return (0); 3621 } 3622 3623 void 3624 session_down(struct peer *peer) 3625 { 3626 memset(&peer->capa.neg, 0, sizeof(peer->capa.neg)); 3627 peer->stats.last_updown = getmonotime(); 3628 3629 timer_set(&peer->timers, Timer_SessionDown, INTERVAL_SESSION_DOWN); 3630 3631 /* 3632 * session_down is called in the exit code path so check 3633 * if the RDE is still around, if not there is no need to 3634 * send the message. 3635 */ 3636 if (ibuf_rde == NULL) 3637 return; 3638 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1) 3639 fatalx("imsg_compose error"); 3640 } 3641 3642 void 3643 session_up(struct peer *p) 3644 { 3645 struct session_up sup; 3646 3647 /* clear last errors, now that the session is up */ 3648 p->stats.last_sent_errcode = 0; 3649 p->stats.last_sent_suberr = 0; 3650 p->stats.last_rcvd_errcode = 0; 3651 p->stats.last_rcvd_suberr = 0; 3652 memset(p->stats.last_reason, 0, sizeof(p->stats.last_reason)); 3653 3654 timer_stop(&p->timers, Timer_SessionDown); 3655 3656 if (!p->rdesession) { 3657 /* inform rde about new peer */ 3658 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3659 &p->conf, sizeof(p->conf)) == -1) 3660 fatalx("imsg_compose error"); 3661 p->rdesession = 1; 3662 } 3663 3664 if (p->local.aid == AID_INET) { 3665 sup.local_v4_addr = p->local; 3666 sup.local_v6_addr = p->local_alt; 3667 } else { 3668 sup.local_v6_addr = p->local; 3669 sup.local_v4_addr = p->local_alt; 3670 } 3671 sup.remote_addr = p->remote; 3672 sup.if_scope = p->if_scope; 3673 3674 sup.remote_bgpid = p->remote_bgpid; 3675 sup.short_as = p->short_as; 3676 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3677 p->stats.last_updown = getmonotime(); 3678 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1) 3679 fatalx("imsg_compose error"); 3680 } 3681 3682 int 3683 imsg_ctl_parent(struct imsg *imsg) 3684 { 3685 return imsg_forward(ibuf_main, imsg); 3686 } 3687 3688 int 3689 imsg_ctl_rde(struct imsg *imsg) 3690 { 3691 if (ibuf_rde_ctl == NULL) 3692 return (0); 3693 /* 3694 * Use control socket to talk to RDE to bypass the queue of the 3695 * regular imsg socket. 3696 */ 3697 return imsg_forward(ibuf_rde_ctl, imsg); 3698 } 3699 3700 int 3701 imsg_ctl_rde_msg(int type, uint32_t peerid, pid_t pid) 3702 { 3703 if (ibuf_rde_ctl == NULL) 3704 return (0); 3705 3706 /* 3707 * Use control socket to talk to RDE to bypass the queue of the 3708 * regular imsg socket. 3709 */ 3710 return imsg_compose(ibuf_rde_ctl, type, peerid, pid, -1, NULL, 0); 3711 } 3712 3713 int 3714 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen) 3715 { 3716 if (ibuf_rde == NULL) 3717 return (0); 3718 3719 return imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen); 3720 } 3721 3722 void 3723 session_demote(struct peer *p, int level) 3724 { 3725 struct demote_msg msg; 3726 3727 strlcpy(msg.demote_group, p->conf.demote_group, 3728 sizeof(msg.demote_group)); 3729 msg.level = level; 3730 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3731 &msg, sizeof(msg)) == -1) 3732 fatalx("imsg_compose error"); 3733 3734 p->demoted += level; 3735 } 3736 3737 void 3738 session_stop(struct peer *peer, uint8_t subcode, const char *reason) 3739 { 3740 struct ibuf *ibuf; 3741 3742 if (reason != NULL) 3743 strlcpy(peer->conf.reason, reason, sizeof(peer->conf.reason)); 3744 3745 ibuf = ibuf_dynamic(0, REASON_LEN); 3746 3747 if ((subcode == ERR_CEASE_ADMIN_DOWN || 3748 subcode == ERR_CEASE_ADMIN_RESET) && 3749 reason != NULL && *reason != '\0' && 3750 ibuf != NULL) { 3751 if (ibuf_add_n8(ibuf, strlen(reason)) == -1 || 3752 ibuf_add(ibuf, reason, strlen(reason))) { 3753 log_peer_warnx(&peer->conf, 3754 "trying to send overly long shutdown reason"); 3755 ibuf_free(ibuf); 3756 ibuf = NULL; 3757 } 3758 } 3759 switch (peer->state) { 3760 case STATE_OPENSENT: 3761 case STATE_OPENCONFIRM: 3762 case STATE_ESTABLISHED: 3763 session_notification(peer, ERR_CEASE, subcode, ibuf); 3764 break; 3765 default: 3766 /* session not open, no need to send notification */ 3767 if (subcode >= sizeof(suberr_cease_names) / sizeof(char *) || 3768 suberr_cease_names[subcode] == NULL) 3769 log_peer_warnx(&peer->conf, "session stop: %s, " 3770 "unknown subcode %u", errnames[ERR_CEASE], subcode); 3771 else 3772 log_peer_warnx(&peer->conf, "session stop: %s, %s", 3773 errnames[ERR_CEASE], suberr_cease_names[subcode]); 3774 break; 3775 } 3776 ibuf_free(ibuf); 3777 bgp_fsm(peer, EVNT_STOP, NULL); 3778 } 3779 3780 struct bgpd_addr * 3781 session_localaddr(struct peer *p) 3782 { 3783 switch (p->conf.remote_addr.aid) { 3784 case AID_INET: 3785 return &p->conf.local_addr_v4; 3786 case AID_INET6: 3787 return &p->conf.local_addr_v6; 3788 } 3789 fatalx("Unknown AID in %s", __func__); 3790 } 3791 3792 void 3793 merge_peers(struct bgpd_config *c, struct bgpd_config *nc) 3794 { 3795 struct peer *p, *np, *next; 3796 3797 RB_FOREACH(p, peer_head, &c->peers) { 3798 /* templates are handled specially */ 3799 if (p->template != NULL) 3800 continue; 3801 np = getpeerbyid(nc, p->conf.id); 3802 if (np == NULL) { 3803 p->reconf_action = RECONF_DELETE; 3804 continue; 3805 } 3806 3807 /* peer no longer uses TCP MD5SIG so deconfigure */ 3808 if (p->auth_conf.method == AUTH_MD5SIG && 3809 np->auth_conf.method != AUTH_MD5SIG) 3810 tcp_md5_del_listener(c, p); 3811 else if (np->auth_conf.method == AUTH_MD5SIG) 3812 tcp_md5_add_listener(c, np); 3813 3814 memcpy(&p->conf, &np->conf, sizeof(p->conf)); 3815 memcpy(&p->auth_conf, &np->auth_conf, sizeof(p->auth_conf)); 3816 RB_REMOVE(peer_head, &nc->peers, np); 3817 free(np); 3818 3819 p->reconf_action = RECONF_KEEP; 3820 3821 /* had demotion, is demoted, demote removed? */ 3822 if (p->demoted && !p->conf.demote_group[0]) 3823 session_demote(p, -1); 3824 3825 /* if session is not open then refresh pfkey data */ 3826 if (p->state < STATE_OPENSENT && !p->template) 3827 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 3828 p->conf.id, 0, -1, NULL, 0); 3829 3830 /* 3831 * If the session is established or the SessionDown timer is 3832 * running sync with the RDE 3833 */ 3834 if (p->rdesession) { 3835 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3836 &p->conf, sizeof(struct peer_config)) == -1) 3837 fatalx("imsg_compose error"); 3838 } 3839 3840 /* apply the config to all clones of a template */ 3841 if (p->conf.template) { 3842 struct peer *xp; 3843 RB_FOREACH(xp, peer_head, &c->peers) { 3844 if (xp->template != p) 3845 continue; 3846 session_template_clone(xp, NULL, xp->conf.id, 3847 xp->conf.remote_as); 3848 3849 if (p->rdesession) { 3850 if (imsg_rde(IMSG_SESSION_ADD, 3851 xp->conf.id, &xp->conf, 3852 sizeof(xp->conf)) == -1) 3853 fatalx("imsg_compose error"); 3854 } 3855 } 3856 } 3857 } 3858 3859 if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1) 3860 fatalx("imsg_compose error"); 3861 3862 /* pfkeys of new peers already loaded by the parent process */ 3863 RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) { 3864 RB_REMOVE(peer_head, &nc->peers, np); 3865 if (RB_INSERT(peer_head, &c->peers, np) != NULL) 3866 fatalx("%s: peer tree is corrupt", __func__); 3867 if (np->auth_conf.method == AUTH_MD5SIG) 3868 tcp_md5_add_listener(c, np); 3869 } 3870 } 3871