1 /* $OpenBSD: session.c,v 1.431 2022/07/18 13:56:41 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 22 #include <sys/mman.h> 23 #include <sys/socket.h> 24 #include <sys/time.h> 25 #include <sys/resource.h> 26 #include <sys/un.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <ifaddrs.h> 37 #include <poll.h> 38 #include <pwd.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <syslog.h> 44 #include <unistd.h> 45 46 #include "bgpd.h" 47 #include "session.h" 48 #include "log.h" 49 50 #define PFD_PIPE_MAIN 0 51 #define PFD_PIPE_ROUTE 1 52 #define PFD_PIPE_ROUTE_CTL 2 53 #define PFD_SOCK_CTL 3 54 #define PFD_SOCK_RCTL 4 55 #define PFD_LISTENERS_START 5 56 57 void session_sighdlr(int); 58 int setup_listeners(u_int *); 59 void init_peer(struct peer *); 60 void start_timer_holdtime(struct peer *); 61 void start_timer_keepalive(struct peer *); 62 void session_close_connection(struct peer *); 63 void change_state(struct peer *, enum session_state, enum session_events); 64 int session_setup_socket(struct peer *); 65 void session_accept(int); 66 int session_connect(struct peer *); 67 void session_tcp_established(struct peer *); 68 void session_capa_ann_none(struct peer *); 69 int session_capa_add(struct ibuf *, uint8_t, uint8_t); 70 int session_capa_add_mp(struct ibuf *, uint8_t); 71 int session_capa_add_afi(struct peer *, struct ibuf *, uint8_t, uint8_t); 72 struct bgp_msg *session_newmsg(enum msg_type, uint16_t); 73 int session_sendmsg(struct bgp_msg *, struct peer *); 74 void session_open(struct peer *); 75 void session_keepalive(struct peer *); 76 void session_update(uint32_t, void *, size_t); 77 void session_notification(struct peer *, uint8_t, uint8_t, void *, 78 ssize_t); 79 void session_rrefresh(struct peer *, uint8_t, uint8_t); 80 int session_graceful_restart(struct peer *); 81 int session_graceful_stop(struct peer *); 82 int session_dispatch_msg(struct pollfd *, struct peer *); 83 void session_process_msg(struct peer *); 84 int parse_header(struct peer *, u_char *, uint16_t *, uint8_t *); 85 int parse_open(struct peer *); 86 int parse_update(struct peer *); 87 int parse_rrefresh(struct peer *); 88 int parse_notification(struct peer *); 89 int parse_capabilities(struct peer *, u_char *, uint16_t, uint32_t *); 90 int capa_neg_calc(struct peer *, uint8_t *); 91 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 92 void session_up(struct peer *); 93 void session_down(struct peer *); 94 int imsg_rde(int, uint32_t, void *, uint16_t); 95 void session_demote(struct peer *, int); 96 void merge_peers(struct bgpd_config *, struct bgpd_config *); 97 98 int la_cmp(struct listen_addr *, struct listen_addr *); 99 void session_template_clone(struct peer *, struct sockaddr *, 100 uint32_t, uint32_t); 101 int session_match_mask(struct peer *, struct bgpd_addr *); 102 103 static struct bgpd_config *conf, *nconf; 104 static struct imsgbuf *ibuf_rde; 105 static struct imsgbuf *ibuf_rde_ctl; 106 static struct imsgbuf *ibuf_main; 107 108 struct bgpd_sysdep sysdep; 109 volatile sig_atomic_t session_quit; 110 int pending_reconf; 111 int csock = -1, rcsock = -1; 112 u_int peer_cnt; 113 114 struct mrt_head mrthead; 115 time_t pauseaccept; 116 117 static inline int 118 peer_compare(const struct peer *a, const struct peer *b) 119 { 120 return a->conf.id - b->conf.id; 121 } 122 123 RB_GENERATE(peer_head, peer, entry, peer_compare); 124 125 void 126 session_sighdlr(int sig) 127 { 128 switch (sig) { 129 case SIGINT: 130 case SIGTERM: 131 session_quit = 1; 132 break; 133 } 134 } 135 136 int 137 setup_listeners(u_int *la_cnt) 138 { 139 int ttl = 255; 140 struct listen_addr *la; 141 u_int cnt = 0; 142 143 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 144 la->reconf = RECONF_NONE; 145 cnt++; 146 147 if (la->flags & LISTENER_LISTENING) 148 continue; 149 150 if (la->fd == -1) { 151 log_warn("cannot establish listener on %s: invalid fd", 152 log_sockaddr((struct sockaddr *)&la->sa, 153 la->sa_len)); 154 continue; 155 } 156 157 if (tcp_md5_prep_listener(la, &conf->peers) == -1) 158 fatal("tcp_md5_prep_listener"); 159 160 /* set ttl to 255 so that ttl-security works */ 161 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 162 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 163 log_warn("setup_listeners setsockopt TTL"); 164 continue; 165 } 166 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 167 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 168 log_warn("setup_listeners setsockopt hoplimit"); 169 continue; 170 } 171 172 if (listen(la->fd, MAX_BACKLOG)) { 173 close(la->fd); 174 fatal("listen"); 175 } 176 177 la->flags |= LISTENER_LISTENING; 178 179 log_info("listening on %s", 180 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len)); 181 } 182 183 *la_cnt = cnt; 184 185 return (0); 186 } 187 188 void 189 session_main(int debug, int verbose) 190 { 191 int timeout; 192 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 193 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 194 u_int listener_cnt, ctl_cnt, mrt_cnt; 195 u_int new_cnt; 196 struct passwd *pw; 197 struct peer *p, **peer_l = NULL, *next; 198 struct mrt *m, *xm, **mrt_l = NULL; 199 struct pollfd *pfd = NULL; 200 struct listen_addr *la; 201 void *newp; 202 time_t now; 203 short events; 204 205 log_init(debug, LOG_DAEMON); 206 log_setverbose(verbose); 207 208 log_procinit(log_procnames[PROC_SE]); 209 210 if ((pw = getpwnam(BGPD_USER)) == NULL) 211 fatal(NULL); 212 213 if (chroot(pw->pw_dir) == -1) 214 fatal("chroot"); 215 if (chdir("/") == -1) 216 fatal("chdir(\"/\")"); 217 218 setproctitle("session engine"); 219 220 if (setgroups(1, &pw->pw_gid) || 221 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 222 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 223 fatal("can't drop privileges"); 224 225 if (pledge("stdio inet recvfd", NULL) == -1) 226 fatal("pledge"); 227 228 signal(SIGTERM, session_sighdlr); 229 signal(SIGINT, session_sighdlr); 230 signal(SIGPIPE, SIG_IGN); 231 signal(SIGHUP, SIG_IGN); 232 signal(SIGALRM, SIG_IGN); 233 signal(SIGUSR1, SIG_IGN); 234 235 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 236 fatal(NULL); 237 imsg_init(ibuf_main, 3); 238 239 LIST_INIT(&mrthead); 240 listener_cnt = 0; 241 peer_cnt = 0; 242 ctl_cnt = 0; 243 244 conf = new_config(); 245 log_info("session engine ready"); 246 247 while (session_quit == 0) { 248 /* check for peers to be initialized or deleted */ 249 if (!pending_reconf) { 250 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 251 /* cloned peer that idled out? */ 252 if (p->template && (p->state == STATE_IDLE || 253 p->state == STATE_ACTIVE) && 254 getmonotime() - p->stats.last_updown >= 255 INTERVAL_HOLD_CLONED) 256 p->reconf_action = RECONF_DELETE; 257 258 /* new peer that needs init? */ 259 if (p->state == STATE_NONE) 260 init_peer(p); 261 262 /* reinit due? */ 263 if (p->reconf_action == RECONF_REINIT) { 264 session_stop(p, ERR_CEASE_ADMIN_RESET); 265 if (!p->conf.down) 266 timer_set(&p->timers, 267 Timer_IdleHold, 0); 268 } 269 270 /* deletion due? */ 271 if (p->reconf_action == RECONF_DELETE) { 272 if (p->demoted) 273 session_demote(p, -1); 274 p->conf.demote_group[0] = 0; 275 session_stop(p, ERR_CEASE_PEER_UNCONF); 276 timer_remove_all(&p->timers); 277 tcp_md5_del_listener(conf, p); 278 log_peer_warnx(&p->conf, "removed"); 279 RB_REMOVE(peer_head, &conf->peers, p); 280 free(p); 281 peer_cnt--; 282 continue; 283 } 284 p->reconf_action = RECONF_NONE; 285 } 286 } 287 288 if (peer_cnt > peer_l_elms) { 289 if ((newp = reallocarray(peer_l, peer_cnt, 290 sizeof(struct peer *))) == NULL) { 291 /* panic for now */ 292 log_warn("could not resize peer_l from %u -> %u" 293 " entries", peer_l_elms, peer_cnt); 294 fatalx("exiting"); 295 } 296 peer_l = newp; 297 peer_l_elms = peer_cnt; 298 } 299 300 mrt_cnt = 0; 301 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 302 xm = LIST_NEXT(m, entry); 303 if (m->state == MRT_STATE_REMOVE) { 304 mrt_clean(m); 305 LIST_REMOVE(m, entry); 306 free(m); 307 continue; 308 } 309 if (m->wbuf.queued) 310 mrt_cnt++; 311 } 312 313 if (mrt_cnt > mrt_l_elms) { 314 if ((newp = reallocarray(mrt_l, mrt_cnt, 315 sizeof(struct mrt *))) == NULL) { 316 /* panic for now */ 317 log_warn("could not resize mrt_l from %u -> %u" 318 " entries", mrt_l_elms, mrt_cnt); 319 fatalx("exiting"); 320 } 321 mrt_l = newp; 322 mrt_l_elms = mrt_cnt; 323 } 324 325 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 326 ctl_cnt + mrt_cnt; 327 if (new_cnt > pfd_elms) { 328 if ((newp = reallocarray(pfd, new_cnt, 329 sizeof(struct pollfd))) == NULL) { 330 /* panic for now */ 331 log_warn("could not resize pfd from %u -> %u" 332 " entries", pfd_elms, new_cnt); 333 fatalx("exiting"); 334 } 335 pfd = newp; 336 pfd_elms = new_cnt; 337 } 338 339 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 340 341 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 342 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 343 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 344 345 if (pauseaccept == 0) { 346 pfd[PFD_SOCK_CTL].fd = csock; 347 pfd[PFD_SOCK_CTL].events = POLLIN; 348 pfd[PFD_SOCK_RCTL].fd = rcsock; 349 pfd[PFD_SOCK_RCTL].events = POLLIN; 350 } else { 351 pfd[PFD_SOCK_CTL].fd = -1; 352 pfd[PFD_SOCK_RCTL].fd = -1; 353 } 354 355 i = PFD_LISTENERS_START; 356 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 357 if (pauseaccept == 0) { 358 pfd[i].fd = la->fd; 359 pfd[i].events = POLLIN; 360 } else 361 pfd[i].fd = -1; 362 i++; 363 } 364 idx_listeners = i; 365 timeout = 240; /* loop every 240s at least */ 366 367 now = getmonotime(); 368 RB_FOREACH(p, peer_head, &conf->peers) { 369 time_t nextaction; 370 struct timer *pt; 371 372 /* check timers */ 373 if ((pt = timer_nextisdue(&p->timers, now)) != NULL) { 374 switch (pt->type) { 375 case Timer_Hold: 376 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 377 break; 378 case Timer_SendHold: 379 bgp_fsm(p, EVNT_TIMER_SENDHOLD); 380 break; 381 case Timer_ConnectRetry: 382 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 383 break; 384 case Timer_Keepalive: 385 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 386 break; 387 case Timer_IdleHold: 388 bgp_fsm(p, EVNT_START); 389 break; 390 case Timer_IdleHoldReset: 391 p->IdleHoldTime = 392 INTERVAL_IDLE_HOLD_INITIAL; 393 p->errcnt = 0; 394 timer_stop(&p->timers, 395 Timer_IdleHoldReset); 396 break; 397 case Timer_CarpUndemote: 398 timer_stop(&p->timers, 399 Timer_CarpUndemote); 400 if (p->demoted && 401 p->state == STATE_ESTABLISHED) 402 session_demote(p, -1); 403 break; 404 case Timer_RestartTimeout: 405 timer_stop(&p->timers, 406 Timer_RestartTimeout); 407 session_graceful_stop(p); 408 break; 409 default: 410 fatalx("King Bula lost in time"); 411 } 412 } 413 if ((nextaction = timer_nextduein(&p->timers, 414 now)) != -1 && nextaction < timeout) 415 timeout = nextaction; 416 417 /* are we waiting for a write? */ 418 events = POLLIN; 419 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 420 events |= POLLOUT; 421 /* is there still work to do? */ 422 if (p->rpending && p->rbuf && p->rbuf->wpos) 423 timeout = 0; 424 425 /* poll events */ 426 if (p->fd != -1 && events != 0) { 427 pfd[i].fd = p->fd; 428 pfd[i].events = events; 429 peer_l[i - idx_listeners] = p; 430 i++; 431 } 432 } 433 434 idx_peers = i; 435 436 LIST_FOREACH(m, &mrthead, entry) 437 if (m->wbuf.queued) { 438 pfd[i].fd = m->wbuf.fd; 439 pfd[i].events = POLLOUT; 440 mrt_l[i - idx_peers] = m; 441 i++; 442 } 443 444 idx_mrts = i; 445 446 i += control_fill_pfds(pfd + i, pfd_elms -i); 447 448 if (i > pfd_elms) 449 fatalx("poll pfd overflow"); 450 451 if (pauseaccept && timeout > 1) 452 timeout = 1; 453 if (timeout < 0) 454 timeout = 0; 455 if (poll(pfd, i, timeout * 1000) == -1) { 456 if (errno == EINTR) 457 continue; 458 fatal("poll error"); 459 } 460 461 /* 462 * If we previously saw fd exhaustion, we stop accept() 463 * for 1 second to throttle the accept() loop. 464 */ 465 if (pauseaccept && getmonotime() > pauseaccept + 1) 466 pauseaccept = 0; 467 468 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 469 log_warnx("SE: Lost connection to parent"); 470 session_quit = 1; 471 continue; 472 } else 473 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 474 &listener_cnt); 475 476 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 477 log_warnx("SE: Lost connection to RDE"); 478 msgbuf_clear(&ibuf_rde->w); 479 free(ibuf_rde); 480 ibuf_rde = NULL; 481 } else 482 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 483 &listener_cnt); 484 485 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 486 -1) { 487 log_warnx("SE: Lost connection to RDE control"); 488 msgbuf_clear(&ibuf_rde_ctl->w); 489 free(ibuf_rde_ctl); 490 ibuf_rde_ctl = NULL; 491 } else 492 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 493 &listener_cnt); 494 495 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 496 ctl_cnt += control_accept(csock, 0); 497 498 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 499 ctl_cnt += control_accept(rcsock, 1); 500 501 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 502 if (pfd[j].revents & POLLIN) 503 session_accept(pfd[j].fd); 504 505 for (; j < idx_peers; j++) 506 session_dispatch_msg(&pfd[j], 507 peer_l[j - idx_listeners]); 508 509 RB_FOREACH(p, peer_head, &conf->peers) 510 if (p->rbuf && p->rbuf->wpos) 511 session_process_msg(p); 512 513 for (; j < idx_mrts; j++) 514 if (pfd[j].revents & POLLOUT) 515 mrt_write(mrt_l[j - idx_peers]); 516 517 for (; j < i; j++) 518 ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers); 519 } 520 521 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 522 RB_REMOVE(peer_head, &conf->peers, p); 523 strlcpy(p->conf.reason, 524 "bgpd shutting down", 525 sizeof(p->conf.reason)); 526 session_stop(p, ERR_CEASE_ADMIN_DOWN); 527 timer_remove_all(&p->timers); 528 free(p); 529 } 530 531 while ((m = LIST_FIRST(&mrthead)) != NULL) { 532 mrt_clean(m); 533 LIST_REMOVE(m, entry); 534 free(m); 535 } 536 537 free_config(conf); 538 free(peer_l); 539 free(mrt_l); 540 free(pfd); 541 542 /* close pipes */ 543 if (ibuf_rde) { 544 msgbuf_write(&ibuf_rde->w); 545 msgbuf_clear(&ibuf_rde->w); 546 close(ibuf_rde->fd); 547 free(ibuf_rde); 548 } 549 if (ibuf_rde_ctl) { 550 msgbuf_clear(&ibuf_rde_ctl->w); 551 close(ibuf_rde_ctl->fd); 552 free(ibuf_rde_ctl); 553 } 554 msgbuf_write(&ibuf_main->w); 555 msgbuf_clear(&ibuf_main->w); 556 close(ibuf_main->fd); 557 free(ibuf_main); 558 559 control_shutdown(csock); 560 control_shutdown(rcsock); 561 log_info("session engine exiting"); 562 exit(0); 563 } 564 565 void 566 init_peer(struct peer *p) 567 { 568 TAILQ_INIT(&p->timers); 569 p->fd = p->wbuf.fd = -1; 570 571 if (p->conf.if_depend[0]) 572 imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1, 573 p->conf.if_depend, sizeof(p->conf.if_depend)); 574 else 575 p->depend_ok = 1; 576 577 peer_cnt++; 578 579 change_state(p, STATE_IDLE, EVNT_NONE); 580 if (p->conf.down) 581 timer_stop(&p->timers, Timer_IdleHold); /* no autostart */ 582 else 583 timer_set(&p->timers, Timer_IdleHold, 0); /* start ASAP */ 584 585 /* 586 * on startup, demote if requested. 587 * do not handle new peers. they must reach ESTABLISHED beforehands. 588 * peers added at runtime have reconf_action set to RECONF_REINIT. 589 */ 590 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 591 session_demote(p, +1); 592 } 593 594 void 595 bgp_fsm(struct peer *peer, enum session_events event) 596 { 597 switch (peer->state) { 598 case STATE_NONE: 599 /* nothing */ 600 break; 601 case STATE_IDLE: 602 switch (event) { 603 case EVNT_START: 604 timer_stop(&peer->timers, Timer_Hold); 605 timer_stop(&peer->timers, Timer_SendHold); 606 timer_stop(&peer->timers, Timer_Keepalive); 607 timer_stop(&peer->timers, Timer_IdleHold); 608 609 /* allocate read buffer */ 610 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 611 if (peer->rbuf == NULL) 612 fatal(NULL); 613 614 /* init write buffer */ 615 msgbuf_init(&peer->wbuf); 616 617 peer->stats.last_sent_errcode = 0; 618 peer->stats.last_sent_suberr = 0; 619 peer->stats.last_rcvd_errcode = 0; 620 peer->stats.last_rcvd_suberr = 0; 621 622 if (!peer->depend_ok) 623 timer_stop(&peer->timers, Timer_ConnectRetry); 624 else if (peer->passive || peer->conf.passive || 625 peer->conf.template) { 626 change_state(peer, STATE_ACTIVE, event); 627 timer_stop(&peer->timers, Timer_ConnectRetry); 628 } else { 629 change_state(peer, STATE_CONNECT, event); 630 timer_set(&peer->timers, Timer_ConnectRetry, 631 conf->connectretry); 632 session_connect(peer); 633 } 634 peer->passive = 0; 635 break; 636 default: 637 /* ignore */ 638 break; 639 } 640 break; 641 case STATE_CONNECT: 642 switch (event) { 643 case EVNT_START: 644 /* ignore */ 645 break; 646 case EVNT_CON_OPEN: 647 session_tcp_established(peer); 648 session_open(peer); 649 timer_stop(&peer->timers, Timer_ConnectRetry); 650 peer->holdtime = INTERVAL_HOLD_INITIAL; 651 start_timer_holdtime(peer); 652 change_state(peer, STATE_OPENSENT, event); 653 break; 654 case EVNT_CON_OPENFAIL: 655 timer_set(&peer->timers, Timer_ConnectRetry, 656 conf->connectretry); 657 session_close_connection(peer); 658 change_state(peer, STATE_ACTIVE, event); 659 break; 660 case EVNT_TIMER_CONNRETRY: 661 timer_set(&peer->timers, Timer_ConnectRetry, 662 conf->connectretry); 663 session_connect(peer); 664 break; 665 default: 666 change_state(peer, STATE_IDLE, event); 667 break; 668 } 669 break; 670 case STATE_ACTIVE: 671 switch (event) { 672 case EVNT_START: 673 /* ignore */ 674 break; 675 case EVNT_CON_OPEN: 676 session_tcp_established(peer); 677 session_open(peer); 678 timer_stop(&peer->timers, Timer_ConnectRetry); 679 peer->holdtime = INTERVAL_HOLD_INITIAL; 680 start_timer_holdtime(peer); 681 change_state(peer, STATE_OPENSENT, event); 682 break; 683 case EVNT_CON_OPENFAIL: 684 timer_set(&peer->timers, Timer_ConnectRetry, 685 conf->connectretry); 686 session_close_connection(peer); 687 change_state(peer, STATE_ACTIVE, event); 688 break; 689 case EVNT_TIMER_CONNRETRY: 690 timer_set(&peer->timers, Timer_ConnectRetry, 691 peer->holdtime); 692 change_state(peer, STATE_CONNECT, event); 693 session_connect(peer); 694 break; 695 default: 696 change_state(peer, STATE_IDLE, event); 697 break; 698 } 699 break; 700 case STATE_OPENSENT: 701 switch (event) { 702 case EVNT_START: 703 /* ignore */ 704 break; 705 case EVNT_STOP: 706 change_state(peer, STATE_IDLE, event); 707 break; 708 case EVNT_CON_CLOSED: 709 session_close_connection(peer); 710 timer_set(&peer->timers, Timer_ConnectRetry, 711 conf->connectretry); 712 change_state(peer, STATE_ACTIVE, event); 713 break; 714 case EVNT_CON_FATAL: 715 change_state(peer, STATE_IDLE, event); 716 break; 717 case EVNT_TIMER_HOLDTIME: 718 case EVNT_TIMER_SENDHOLD: 719 session_notification(peer, ERR_HOLDTIMEREXPIRED, 720 0, NULL, 0); 721 change_state(peer, STATE_IDLE, event); 722 break; 723 case EVNT_RCVD_OPEN: 724 /* parse_open calls change_state itself on failure */ 725 if (parse_open(peer)) 726 break; 727 session_keepalive(peer); 728 change_state(peer, STATE_OPENCONFIRM, event); 729 break; 730 case EVNT_RCVD_NOTIFICATION: 731 if (parse_notification(peer)) { 732 change_state(peer, STATE_IDLE, event); 733 /* don't punish, capa negotiation */ 734 timer_set(&peer->timers, Timer_IdleHold, 0); 735 peer->IdleHoldTime /= 2; 736 } else 737 change_state(peer, STATE_IDLE, event); 738 break; 739 default: 740 session_notification(peer, 741 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); 742 change_state(peer, STATE_IDLE, event); 743 break; 744 } 745 break; 746 case STATE_OPENCONFIRM: 747 switch (event) { 748 case EVNT_START: 749 /* ignore */ 750 break; 751 case EVNT_STOP: 752 change_state(peer, STATE_IDLE, event); 753 break; 754 case EVNT_CON_CLOSED: 755 case EVNT_CON_FATAL: 756 change_state(peer, STATE_IDLE, event); 757 break; 758 case EVNT_TIMER_HOLDTIME: 759 case EVNT_TIMER_SENDHOLD: 760 session_notification(peer, ERR_HOLDTIMEREXPIRED, 761 0, NULL, 0); 762 change_state(peer, STATE_IDLE, event); 763 break; 764 case EVNT_TIMER_KEEPALIVE: 765 session_keepalive(peer); 766 break; 767 case EVNT_RCVD_KEEPALIVE: 768 start_timer_holdtime(peer); 769 change_state(peer, STATE_ESTABLISHED, event); 770 break; 771 case EVNT_RCVD_NOTIFICATION: 772 parse_notification(peer); 773 change_state(peer, STATE_IDLE, event); 774 break; 775 default: 776 session_notification(peer, 777 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); 778 change_state(peer, STATE_IDLE, event); 779 break; 780 } 781 break; 782 case STATE_ESTABLISHED: 783 switch (event) { 784 case EVNT_START: 785 /* ignore */ 786 break; 787 case EVNT_STOP: 788 change_state(peer, STATE_IDLE, event); 789 break; 790 case EVNT_CON_CLOSED: 791 case EVNT_CON_FATAL: 792 change_state(peer, STATE_IDLE, event); 793 break; 794 case EVNT_TIMER_HOLDTIME: 795 case EVNT_TIMER_SENDHOLD: 796 session_notification(peer, ERR_HOLDTIMEREXPIRED, 797 0, NULL, 0); 798 change_state(peer, STATE_IDLE, event); 799 break; 800 case EVNT_TIMER_KEEPALIVE: 801 session_keepalive(peer); 802 break; 803 case EVNT_RCVD_KEEPALIVE: 804 start_timer_holdtime(peer); 805 break; 806 case EVNT_RCVD_UPDATE: 807 start_timer_holdtime(peer); 808 if (parse_update(peer)) 809 change_state(peer, STATE_IDLE, event); 810 else 811 start_timer_holdtime(peer); 812 break; 813 case EVNT_RCVD_NOTIFICATION: 814 parse_notification(peer); 815 change_state(peer, STATE_IDLE, event); 816 break; 817 default: 818 session_notification(peer, 819 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); 820 change_state(peer, STATE_IDLE, event); 821 break; 822 } 823 break; 824 } 825 } 826 827 void 828 start_timer_holdtime(struct peer *peer) 829 { 830 if (peer->holdtime > 0) 831 timer_set(&peer->timers, Timer_Hold, peer->holdtime); 832 else 833 timer_stop(&peer->timers, Timer_Hold); 834 } 835 836 void 837 start_timer_keepalive(struct peer *peer) 838 { 839 if (peer->holdtime > 0) 840 timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3); 841 else 842 timer_stop(&peer->timers, Timer_Keepalive); 843 } 844 845 void 846 session_close_connection(struct peer *peer) 847 { 848 if (peer->fd != -1) { 849 close(peer->fd); 850 pauseaccept = 0; 851 } 852 peer->fd = peer->wbuf.fd = -1; 853 } 854 855 void 856 change_state(struct peer *peer, enum session_state state, 857 enum session_events event) 858 { 859 struct mrt *mrt; 860 861 switch (state) { 862 case STATE_IDLE: 863 /* carp demotion first. new peers handled in init_peer */ 864 if (peer->state == STATE_ESTABLISHED && 865 peer->conf.demote_group[0] && !peer->demoted) 866 session_demote(peer, +1); 867 868 /* 869 * try to write out what's buffered (maybe a notification), 870 * don't bother if it fails 871 */ 872 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 873 msgbuf_write(&peer->wbuf); 874 875 /* 876 * we must start the timer for the next EVNT_START 877 * if we are coming here due to an error and the 878 * session was not established successfully before, the 879 * starttimerinterval needs to be exponentially increased 880 */ 881 if (peer->IdleHoldTime == 0) 882 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 883 peer->holdtime = INTERVAL_HOLD_INITIAL; 884 timer_stop(&peer->timers, Timer_ConnectRetry); 885 timer_stop(&peer->timers, Timer_Keepalive); 886 timer_stop(&peer->timers, Timer_Hold); 887 timer_stop(&peer->timers, Timer_SendHold); 888 timer_stop(&peer->timers, Timer_IdleHold); 889 timer_stop(&peer->timers, Timer_IdleHoldReset); 890 session_close_connection(peer); 891 msgbuf_clear(&peer->wbuf); 892 free(peer->rbuf); 893 peer->rbuf = NULL; 894 peer->rpending = 0; 895 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 896 if (!peer->template) 897 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 898 peer->conf.id, 0, -1, NULL, 0); 899 900 if (event != EVNT_STOP) { 901 timer_set(&peer->timers, Timer_IdleHold, 902 peer->IdleHoldTime); 903 if (event != EVNT_NONE && 904 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 905 peer->IdleHoldTime *= 2; 906 } 907 if (peer->state == STATE_ESTABLISHED) { 908 if (peer->capa.neg.grestart.restart == 2 && 909 (event == EVNT_CON_CLOSED || 910 event == EVNT_CON_FATAL)) { 911 /* don't punish graceful restart */ 912 timer_set(&peer->timers, Timer_IdleHold, 0); 913 peer->IdleHoldTime /= 2; 914 session_graceful_restart(peer); 915 } else 916 session_down(peer); 917 } 918 if (peer->state == STATE_NONE || 919 peer->state == STATE_ESTABLISHED) { 920 /* initialize capability negotiation structures */ 921 memcpy(&peer->capa.ann, &peer->conf.capabilities, 922 sizeof(peer->capa.ann)); 923 if (!peer->conf.announce_capa) 924 session_capa_ann_none(peer); 925 } 926 break; 927 case STATE_CONNECT: 928 if (peer->state == STATE_ESTABLISHED && 929 peer->capa.neg.grestart.restart == 2) { 930 /* do the graceful restart dance */ 931 session_graceful_restart(peer); 932 peer->holdtime = INTERVAL_HOLD_INITIAL; 933 timer_stop(&peer->timers, Timer_ConnectRetry); 934 timer_stop(&peer->timers, Timer_Keepalive); 935 timer_stop(&peer->timers, Timer_Hold); 936 timer_stop(&peer->timers, Timer_SendHold); 937 timer_stop(&peer->timers, Timer_IdleHold); 938 timer_stop(&peer->timers, Timer_IdleHoldReset); 939 session_close_connection(peer); 940 msgbuf_clear(&peer->wbuf); 941 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 942 } 943 break; 944 case STATE_ACTIVE: 945 if (!peer->template) 946 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 947 peer->conf.id, 0, -1, NULL, 0); 948 break; 949 case STATE_OPENSENT: 950 break; 951 case STATE_OPENCONFIRM: 952 break; 953 case STATE_ESTABLISHED: 954 timer_set(&peer->timers, Timer_IdleHoldReset, 955 peer->IdleHoldTime); 956 if (peer->demoted) 957 timer_set(&peer->timers, Timer_CarpUndemote, 958 INTERVAL_HOLD_DEMOTED); 959 session_up(peer); 960 break; 961 default: /* something seriously fucked */ 962 break; 963 } 964 965 log_statechange(peer, state, event); 966 LIST_FOREACH(mrt, &mrthead, entry) { 967 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 968 continue; 969 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 970 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 971 mrt->group_id == peer->conf.groupid)) 972 mrt_dump_state(mrt, peer->state, state, peer); 973 } 974 peer->prev_state = peer->state; 975 peer->state = state; 976 } 977 978 void 979 session_accept(int listenfd) 980 { 981 int connfd; 982 socklen_t len; 983 struct sockaddr_storage cliaddr; 984 struct peer *p = NULL; 985 986 len = sizeof(cliaddr); 987 if ((connfd = accept4(listenfd, 988 (struct sockaddr *)&cliaddr, &len, 989 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 990 if (errno == ENFILE || errno == EMFILE) 991 pauseaccept = getmonotime(); 992 else if (errno != EWOULDBLOCK && errno != EINTR && 993 errno != ECONNABORTED) 994 log_warn("accept"); 995 return; 996 } 997 998 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr); 999 1000 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 1001 if (timer_running(&p->timers, Timer_IdleHold, NULL)) { 1002 /* fast reconnect after clear */ 1003 p->passive = 1; 1004 bgp_fsm(p, EVNT_START); 1005 } 1006 } 1007 1008 if (p != NULL && 1009 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1010 if (p->fd != -1) { 1011 if (p->state == STATE_CONNECT) 1012 session_close_connection(p); 1013 else { 1014 close(connfd); 1015 return; 1016 } 1017 } 1018 1019 open: 1020 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1021 log_peer_warnx(&p->conf, 1022 "ipsec or md5sig configured but not available"); 1023 close(connfd); 1024 return; 1025 } 1026 1027 if (tcp_md5_check(connfd, p) == -1) { 1028 close(connfd); 1029 return; 1030 } 1031 p->fd = p->wbuf.fd = connfd; 1032 if (session_setup_socket(p)) { 1033 close(connfd); 1034 return; 1035 } 1036 bgp_fsm(p, EVNT_CON_OPEN); 1037 return; 1038 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1039 p->capa.neg.grestart.restart == 2) { 1040 /* first do the graceful restart dance */ 1041 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1042 /* then do part of the open dance */ 1043 goto open; 1044 } else { 1045 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len); 1046 close(connfd); 1047 } 1048 } 1049 1050 int 1051 session_connect(struct peer *peer) 1052 { 1053 struct sockaddr *sa; 1054 struct bgpd_addr *bind_addr = NULL; 1055 socklen_t sa_len; 1056 1057 /* 1058 * we do not need the overcomplicated collision detection RFC 1771 1059 * describes; we simply make sure there is only ever one concurrent 1060 * tcp connection per peer. 1061 */ 1062 if (peer->fd != -1) 1063 return (-1); 1064 1065 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1066 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1067 log_peer_warn(&peer->conf, "session_connect socket"); 1068 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1069 return (-1); 1070 } 1071 1072 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1073 log_peer_warnx(&peer->conf, 1074 "ipsec or md5sig configured but not available"); 1075 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1076 return (-1); 1077 } 1078 1079 tcp_md5_set(peer->fd, peer); 1080 peer->wbuf.fd = peer->fd; 1081 1082 /* if local-address is set we need to bind() */ 1083 switch (peer->conf.remote_addr.aid) { 1084 case AID_INET: 1085 bind_addr = &peer->conf.local_addr_v4; 1086 break; 1087 case AID_INET6: 1088 bind_addr = &peer->conf.local_addr_v6; 1089 break; 1090 } 1091 if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) { 1092 if (bind(peer->fd, sa, sa_len) == -1) { 1093 log_peer_warn(&peer->conf, "session_connect bind"); 1094 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1095 return (-1); 1096 } 1097 } 1098 1099 if (session_setup_socket(peer)) { 1100 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1101 return (-1); 1102 } 1103 1104 sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len); 1105 if (connect(peer->fd, sa, sa_len) == -1) { 1106 if (errno != EINPROGRESS) { 1107 if (errno != peer->lasterr) 1108 log_peer_warn(&peer->conf, "connect"); 1109 peer->lasterr = errno; 1110 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1111 return (-1); 1112 } 1113 } else 1114 bgp_fsm(peer, EVNT_CON_OPEN); 1115 1116 return (0); 1117 } 1118 1119 int 1120 session_setup_socket(struct peer *p) 1121 { 1122 int ttl = p->conf.distance; 1123 int pre = IPTOS_PREC_INTERNETCONTROL; 1124 int nodelay = 1; 1125 int bsize; 1126 1127 switch (p->conf.remote_addr.aid) { 1128 case AID_INET: 1129 /* set precedence, see RFC 1771 appendix 5 */ 1130 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1131 -1) { 1132 log_peer_warn(&p->conf, 1133 "session_setup_socket setsockopt TOS"); 1134 return (-1); 1135 } 1136 1137 if (p->conf.ebgp) { 1138 /* 1139 * set TTL to foreign router's distance 1140 * 1=direct n=multihop with ttlsec, we always use 255 1141 */ 1142 if (p->conf.ttlsec) { 1143 ttl = 256 - p->conf.distance; 1144 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1145 &ttl, sizeof(ttl)) == -1) { 1146 log_peer_warn(&p->conf, 1147 "session_setup_socket: " 1148 "setsockopt MINTTL"); 1149 return (-1); 1150 } 1151 ttl = 255; 1152 } 1153 1154 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1155 sizeof(ttl)) == -1) { 1156 log_peer_warn(&p->conf, 1157 "session_setup_socket setsockopt TTL"); 1158 return (-1); 1159 } 1160 } 1161 break; 1162 case AID_INET6: 1163 if (p->conf.ebgp) { 1164 /* 1165 * set hoplimit to foreign router's distance 1166 * 1=direct n=multihop with ttlsec, we always use 255 1167 */ 1168 if (p->conf.ttlsec) { 1169 ttl = 256 - p->conf.distance; 1170 if (setsockopt(p->fd, IPPROTO_IPV6, 1171 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1172 == -1) { 1173 log_peer_warn(&p->conf, 1174 "session_setup_socket: " 1175 "setsockopt MINHOPCOUNT"); 1176 return (-1); 1177 } 1178 ttl = 255; 1179 } 1180 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1181 &ttl, sizeof(ttl)) == -1) { 1182 log_peer_warn(&p->conf, 1183 "session_setup_socket setsockopt hoplimit"); 1184 return (-1); 1185 } 1186 } 1187 break; 1188 } 1189 1190 /* set TCP_NODELAY */ 1191 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1192 sizeof(nodelay)) == -1) { 1193 log_peer_warn(&p->conf, 1194 "session_setup_socket setsockopt TCP_NODELAY"); 1195 return (-1); 1196 } 1197 1198 /* only increase bufsize (and thus window) if md5 or ipsec is in use */ 1199 if (p->conf.auth.method != AUTH_NONE) { 1200 /* try to increase bufsize. no biggie if it fails */ 1201 bsize = 65535; 1202 while (bsize > 8192 && 1203 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, 1204 sizeof(bsize)) == -1 && errno != EINVAL) 1205 bsize /= 2; 1206 bsize = 65535; 1207 while (bsize > 8192 && 1208 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, 1209 sizeof(bsize)) == -1 && errno != EINVAL) 1210 bsize /= 2; 1211 } 1212 1213 return (0); 1214 } 1215 1216 /* compare two sockaddrs by converting them into bgpd_addr */ 1217 static int 1218 sa_cmp(struct sockaddr *a, struct sockaddr *b) 1219 { 1220 struct bgpd_addr ba, bb; 1221 1222 sa2addr(a, &ba, NULL); 1223 sa2addr(b, &bb, NULL); 1224 1225 return (memcmp(&ba, &bb, sizeof(ba)) == 0); 1226 } 1227 1228 static void 1229 get_alternate_addr(struct sockaddr *sa, struct bgpd_addr *alt) 1230 { 1231 struct ifaddrs *ifap, *ifa, *match; 1232 1233 if (getifaddrs(&ifap) == -1) 1234 fatal("getifaddrs"); 1235 1236 for (match = ifap; match != NULL; match = match->ifa_next) 1237 if (match->ifa_addr != NULL && 1238 sa_cmp(sa, match->ifa_addr) == 0) 1239 break; 1240 1241 if (match == NULL) { 1242 log_warnx("%s: local address not found", __func__); 1243 return; 1244 } 1245 1246 switch (sa->sa_family) { 1247 case AF_INET6: 1248 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1249 if (ifa->ifa_addr != NULL && 1250 ifa->ifa_addr->sa_family == AF_INET && 1251 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1252 sa2addr(ifa->ifa_addr, alt, NULL); 1253 break; 1254 } 1255 } 1256 break; 1257 case AF_INET: 1258 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1259 if (ifa->ifa_addr != NULL && 1260 ifa->ifa_addr->sa_family == AF_INET6 && 1261 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1262 struct sockaddr_in6 *s = 1263 (struct sockaddr_in6 *)ifa->ifa_addr; 1264 1265 /* only accept global scope addresses */ 1266 if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) || 1267 IN6_IS_ADDR_SITELOCAL(&s->sin6_addr)) 1268 continue; 1269 sa2addr(ifa->ifa_addr, alt, NULL); 1270 break; 1271 } 1272 } 1273 break; 1274 default: 1275 log_warnx("%s: unsupported address family %d", __func__, 1276 sa->sa_family); 1277 break; 1278 } 1279 1280 freeifaddrs(ifap); 1281 } 1282 1283 void 1284 session_tcp_established(struct peer *peer) 1285 { 1286 struct sockaddr_storage ss; 1287 socklen_t len; 1288 1289 len = sizeof(ss); 1290 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1291 log_warn("getsockname"); 1292 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); 1293 get_alternate_addr((struct sockaddr *)&ss, &peer->local_alt); 1294 len = sizeof(ss); 1295 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1296 log_warn("getpeername"); 1297 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); 1298 } 1299 1300 void 1301 session_capa_ann_none(struct peer *peer) 1302 { 1303 bzero(&peer->capa.ann, sizeof(peer->capa.ann)); 1304 } 1305 1306 int 1307 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len) 1308 { 1309 int errs = 0; 1310 1311 errs += ibuf_add(opb, &capa_code, sizeof(capa_code)); 1312 errs += ibuf_add(opb, &capa_len, sizeof(capa_len)); 1313 return (errs); 1314 } 1315 1316 int 1317 session_capa_add_mp(struct ibuf *buf, uint8_t aid) 1318 { 1319 uint8_t safi, pad = 0; 1320 uint16_t afi; 1321 int errs = 0; 1322 1323 if (aid2afi(aid, &afi, &safi) == -1) 1324 fatalx("session_capa_add_mp: bad afi/safi pair"); 1325 afi = htons(afi); 1326 errs += ibuf_add(buf, &afi, sizeof(afi)); 1327 errs += ibuf_add(buf, &pad, sizeof(pad)); 1328 errs += ibuf_add(buf, &safi, sizeof(safi)); 1329 1330 return (errs); 1331 } 1332 1333 int 1334 session_capa_add_afi(struct peer *p, struct ibuf *b, uint8_t aid, 1335 uint8_t flags) 1336 { 1337 u_int errs = 0; 1338 uint16_t afi; 1339 uint8_t safi; 1340 1341 if (aid2afi(aid, &afi, &safi)) { 1342 log_warn("session_capa_add_afi: bad AID"); 1343 return (1); 1344 } 1345 1346 afi = htons(afi); 1347 errs += ibuf_add(b, &afi, sizeof(afi)); 1348 errs += ibuf_add(b, &safi, sizeof(safi)); 1349 errs += ibuf_add(b, &flags, sizeof(flags)); 1350 1351 return (errs); 1352 } 1353 1354 struct bgp_msg * 1355 session_newmsg(enum msg_type msgtype, uint16_t len) 1356 { 1357 struct bgp_msg *msg; 1358 struct msg_header hdr; 1359 struct ibuf *buf; 1360 int errs = 0; 1361 1362 memset(&hdr.marker, 0xff, sizeof(hdr.marker)); 1363 hdr.len = htons(len); 1364 hdr.type = msgtype; 1365 1366 if ((buf = ibuf_open(len)) == NULL) 1367 return (NULL); 1368 1369 errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker)); 1370 errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len)); 1371 errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type)); 1372 1373 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1374 ibuf_free(buf); 1375 return (NULL); 1376 } 1377 1378 msg->buf = buf; 1379 msg->type = msgtype; 1380 msg->len = len; 1381 1382 return (msg); 1383 } 1384 1385 int 1386 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1387 { 1388 struct mrt *mrt; 1389 1390 LIST_FOREACH(mrt, &mrthead, entry) { 1391 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1392 mrt->type == MRT_UPDATE_OUT))) 1393 continue; 1394 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1395 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1396 mrt->group_id == p->conf.groupid)) 1397 mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p, 1398 msg->type); 1399 } 1400 1401 ibuf_close(&p->wbuf, msg->buf); 1402 if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) { 1403 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) 1404 log_peer_warn(&p->conf, "imsg_compose XOFF"); 1405 else 1406 p->throttled = 1; 1407 } 1408 1409 free(msg); 1410 return (0); 1411 } 1412 1413 void 1414 session_open(struct peer *p) 1415 { 1416 struct bgp_msg *buf; 1417 struct ibuf *opb; 1418 struct msg_open msg; 1419 uint16_t len, optparamlen = 0; 1420 uint8_t i, op_type; 1421 int errs = 0, extlen = 0; 1422 int mpcapa = 0; 1423 1424 1425 if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) { 1426 bgp_fsm(p, EVNT_CON_FATAL); 1427 return; 1428 } 1429 1430 /* multiprotocol extensions, RFC 4760 */ 1431 for (i = 0; i < AID_MAX; i++) 1432 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1433 errs += session_capa_add(opb, CAPA_MP, 4); 1434 errs += session_capa_add_mp(opb, i); 1435 mpcapa++; 1436 } 1437 1438 /* route refresh, RFC 2918 */ 1439 if (p->capa.ann.refresh) /* no data */ 1440 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1441 1442 /* BGP open policy, RFC 9234 */ 1443 if (p->capa.ann.role_ena) { 1444 errs += session_capa_add(opb, CAPA_ROLE, 1); 1445 errs += ibuf_add(opb, &p->capa.ann.role, 1); 1446 } 1447 1448 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1449 if (p->capa.ann.grestart.restart) { 1450 int rst = 0; 1451 uint16_t hdr = 0; 1452 1453 for (i = 0; i < AID_MAX; i++) { 1454 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) 1455 rst++; 1456 } 1457 1458 /* Only set the R-flag if no graceful restart is ongoing */ 1459 if (!rst) 1460 hdr |= CAPA_GR_R_FLAG; 1461 hdr = htons(hdr); 1462 1463 errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); 1464 errs += ibuf_add(opb, &hdr, sizeof(hdr)); 1465 } 1466 1467 /* 4-bytes AS numbers, RFC6793 */ 1468 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1469 uint32_t nas; 1470 1471 nas = htonl(p->conf.local_as); 1472 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); 1473 errs += ibuf_add(opb, &nas, sizeof(nas)); 1474 } 1475 1476 /* advertisement of multiple paths, RFC7911 */ 1477 if (p->capa.ann.add_path[0]) { /* variable */ 1478 uint8_t aplen; 1479 1480 if (mpcapa) 1481 aplen = 4 * mpcapa; 1482 else /* AID_INET */ 1483 aplen = 4; 1484 errs += session_capa_add(opb, CAPA_ADD_PATH, aplen); 1485 if (mpcapa) { 1486 for (i = AID_MIN; i < AID_MAX; i++) { 1487 if (p->capa.ann.mp[i]) { 1488 errs += session_capa_add_afi(p, opb, 1489 i, p->capa.ann.add_path[i]); 1490 } 1491 } 1492 } else { /* AID_INET */ 1493 errs += session_capa_add_afi(p, opb, AID_INET, 1494 p->capa.ann.add_path[AID_INET]); 1495 } 1496 } 1497 1498 /* enhanced route-refresh, RFC7313 */ 1499 if (p->capa.ann.enhanced_rr) /* no data */ 1500 errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0); 1501 1502 optparamlen = ibuf_size(opb); 1503 if (optparamlen == 0) { 1504 /* nothing */ 1505 } else if (optparamlen + 2 >= 255) { 1506 /* RFC9072: 2 byte lenght instead of 1 + 3 byte extra header */ 1507 optparamlen += sizeof(op_type) + 2 + 3; 1508 msg.optparamlen = 255; 1509 extlen = 1; 1510 } else { 1511 optparamlen += sizeof(op_type) + 1; 1512 msg.optparamlen = optparamlen; 1513 } 1514 1515 len = MSGSIZE_OPEN_MIN + optparamlen; 1516 if (errs || (buf = session_newmsg(OPEN, len)) == NULL) { 1517 ibuf_free(opb); 1518 bgp_fsm(p, EVNT_CON_FATAL); 1519 return; 1520 } 1521 1522 msg.version = 4; 1523 msg.myas = htons(p->conf.local_short_as); 1524 if (p->conf.holdtime) 1525 msg.holdtime = htons(p->conf.holdtime); 1526 else 1527 msg.holdtime = htons(conf->holdtime); 1528 msg.bgpid = conf->bgpid; /* is already in network byte order */ 1529 1530 errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version)); 1531 errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas)); 1532 errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); 1533 errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); 1534 errs += ibuf_add(buf->buf, &msg.optparamlen, 1); 1535 1536 if (extlen) { 1537 /* write RFC9072 extra header */ 1538 uint16_t op_extlen = htons(optparamlen - 3); 1539 op_type = OPT_PARAM_EXT_LEN; 1540 errs += ibuf_add(buf->buf, &op_type, 1); 1541 errs += ibuf_add(buf->buf, &op_extlen, 2); 1542 } 1543 1544 if (optparamlen) { 1545 op_type = OPT_PARAM_CAPABILITIES; 1546 errs += ibuf_add(buf->buf, &op_type, sizeof(op_type)); 1547 1548 optparamlen = ibuf_size(opb); 1549 if (extlen) { 1550 /* RFC9072: 2-byte extended length */ 1551 uint16_t op_extlen = htons(optparamlen); 1552 errs += ibuf_add(buf->buf, &op_extlen, 2); 1553 } else { 1554 uint8_t op_len = optparamlen; 1555 errs += ibuf_add(buf->buf, &op_len, 1); 1556 } 1557 errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); 1558 } 1559 1560 ibuf_free(opb); 1561 1562 if (errs) { 1563 ibuf_free(buf->buf); 1564 free(buf); 1565 bgp_fsm(p, EVNT_CON_FATAL); 1566 return; 1567 } 1568 1569 if (session_sendmsg(buf, p) == -1) { 1570 bgp_fsm(p, EVNT_CON_FATAL); 1571 return; 1572 } 1573 1574 p->stats.msg_sent_open++; 1575 } 1576 1577 void 1578 session_keepalive(struct peer *p) 1579 { 1580 struct bgp_msg *buf; 1581 1582 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1583 session_sendmsg(buf, p) == -1) { 1584 bgp_fsm(p, EVNT_CON_FATAL); 1585 return; 1586 } 1587 1588 start_timer_keepalive(p); 1589 p->stats.msg_sent_keepalive++; 1590 } 1591 1592 void 1593 session_update(uint32_t peerid, void *data, size_t datalen) 1594 { 1595 struct peer *p; 1596 struct bgp_msg *buf; 1597 1598 if ((p = getpeerbyid(conf, peerid)) == NULL) { 1599 log_warnx("no such peer: id=%u", peerid); 1600 return; 1601 } 1602 1603 if (p->state != STATE_ESTABLISHED) 1604 return; 1605 1606 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) { 1607 bgp_fsm(p, EVNT_CON_FATAL); 1608 return; 1609 } 1610 1611 if (ibuf_add(buf->buf, data, datalen)) { 1612 ibuf_free(buf->buf); 1613 free(buf); 1614 bgp_fsm(p, EVNT_CON_FATAL); 1615 return; 1616 } 1617 1618 if (session_sendmsg(buf, p) == -1) { 1619 bgp_fsm(p, EVNT_CON_FATAL); 1620 return; 1621 } 1622 1623 start_timer_keepalive(p); 1624 p->stats.msg_sent_update++; 1625 } 1626 1627 void 1628 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode, 1629 void *data, ssize_t datalen) 1630 { 1631 struct bgp_msg *buf; 1632 int errs = 0; 1633 1634 if (p->stats.last_sent_errcode) /* some notification already sent */ 1635 return; 1636 1637 log_notification(p, errcode, subcode, data, datalen, "sending"); 1638 1639 /* cap to maximum size */ 1640 if (datalen > MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) { 1641 log_peer_warnx(&p->conf, 1642 "oversized notification, data trunkated"); 1643 datalen = MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN; 1644 } 1645 1646 if ((buf = session_newmsg(NOTIFICATION, 1647 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1648 bgp_fsm(p, EVNT_CON_FATAL); 1649 return; 1650 } 1651 1652 errs += ibuf_add(buf->buf, &errcode, sizeof(errcode)); 1653 errs += ibuf_add(buf->buf, &subcode, sizeof(subcode)); 1654 1655 if (datalen > 0) 1656 errs += ibuf_add(buf->buf, data, datalen); 1657 1658 if (errs) { 1659 ibuf_free(buf->buf); 1660 free(buf); 1661 bgp_fsm(p, EVNT_CON_FATAL); 1662 return; 1663 } 1664 1665 if (session_sendmsg(buf, p) == -1) { 1666 bgp_fsm(p, EVNT_CON_FATAL); 1667 return; 1668 } 1669 1670 p->stats.msg_sent_notification++; 1671 p->stats.last_sent_errcode = errcode; 1672 p->stats.last_sent_suberr = subcode; 1673 } 1674 1675 int 1676 session_neighbor_rrefresh(struct peer *p) 1677 { 1678 uint8_t i; 1679 1680 if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr)) 1681 return (-1); 1682 1683 for (i = 0; i < AID_MAX; i++) { 1684 if (p->capa.neg.mp[i] != 0) 1685 session_rrefresh(p, i, ROUTE_REFRESH_REQUEST); 1686 } 1687 1688 return (0); 1689 } 1690 1691 void 1692 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype) 1693 { 1694 struct bgp_msg *buf; 1695 int errs = 0; 1696 uint16_t afi; 1697 uint8_t safi; 1698 1699 switch (subtype) { 1700 case ROUTE_REFRESH_REQUEST: 1701 p->stats.refresh_sent_req++; 1702 break; 1703 case ROUTE_REFRESH_BEGIN_RR: 1704 case ROUTE_REFRESH_END_RR: 1705 /* requires enhanced route refresh */ 1706 if (!p->capa.neg.enhanced_rr) 1707 return; 1708 if (subtype == ROUTE_REFRESH_BEGIN_RR) 1709 p->stats.refresh_sent_borr++; 1710 else 1711 p->stats.refresh_sent_eorr++; 1712 break; 1713 default: 1714 fatalx("session_rrefresh: bad subtype %d", subtype); 1715 } 1716 1717 if (aid2afi(aid, &afi, &safi) == -1) 1718 fatalx("session_rrefresh: bad afi/safi pair"); 1719 1720 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1721 bgp_fsm(p, EVNT_CON_FATAL); 1722 return; 1723 } 1724 1725 afi = htons(afi); 1726 errs += ibuf_add(buf->buf, &afi, sizeof(afi)); 1727 errs += ibuf_add(buf->buf, &subtype, sizeof(subtype)); 1728 errs += ibuf_add(buf->buf, &safi, sizeof(safi)); 1729 1730 if (errs) { 1731 ibuf_free(buf->buf); 1732 free(buf); 1733 bgp_fsm(p, EVNT_CON_FATAL); 1734 return; 1735 } 1736 1737 if (session_sendmsg(buf, p) == -1) { 1738 bgp_fsm(p, EVNT_CON_FATAL); 1739 return; 1740 } 1741 1742 p->stats.msg_sent_rrefresh++; 1743 } 1744 1745 int 1746 session_graceful_restart(struct peer *p) 1747 { 1748 uint8_t i; 1749 1750 timer_set(&p->timers, Timer_RestartTimeout, 1751 p->capa.neg.grestart.timeout); 1752 1753 for (i = 0; i < AID_MAX; i++) { 1754 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1755 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, 1756 &i, sizeof(i)) == -1) 1757 return (-1); 1758 log_peer_warnx(&p->conf, 1759 "graceful restart of %s, keeping routes", 1760 aid2str(i)); 1761 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1762 } else if (p->capa.neg.mp[i]) { 1763 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1764 &i, sizeof(i)) == -1) 1765 return (-1); 1766 log_peer_warnx(&p->conf, 1767 "graceful restart of %s, flushing routes", 1768 aid2str(i)); 1769 } 1770 } 1771 return (0); 1772 } 1773 1774 int 1775 session_graceful_stop(struct peer *p) 1776 { 1777 uint8_t i; 1778 1779 for (i = 0; i < AID_MAX; i++) { 1780 /* 1781 * Only flush if the peer is restarting and the timeout fired. 1782 * In all other cases the session was already flushed when the 1783 * session went down or when the new open message was parsed. 1784 */ 1785 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1786 log_peer_warnx(&p->conf, "graceful restart of %s, " 1787 "time-out, flushing", aid2str(i)); 1788 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1789 &i, sizeof(i)) == -1) 1790 return (-1); 1791 } 1792 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1793 } 1794 return (0); 1795 } 1796 1797 int 1798 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1799 { 1800 ssize_t n; 1801 socklen_t len; 1802 int error; 1803 1804 if (p->state == STATE_CONNECT) { 1805 if (pfd->revents & POLLOUT) { 1806 if (pfd->revents & POLLIN) { 1807 /* error occurred */ 1808 len = sizeof(error); 1809 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1810 &error, &len) == -1 || error) { 1811 if (error) 1812 errno = error; 1813 if (errno != p->lasterr) { 1814 log_peer_warn(&p->conf, 1815 "socket error"); 1816 p->lasterr = errno; 1817 } 1818 bgp_fsm(p, EVNT_CON_OPENFAIL); 1819 return (1); 1820 } 1821 } 1822 bgp_fsm(p, EVNT_CON_OPEN); 1823 return (1); 1824 } 1825 if (pfd->revents & POLLHUP) { 1826 bgp_fsm(p, EVNT_CON_OPENFAIL); 1827 return (1); 1828 } 1829 if (pfd->revents & (POLLERR|POLLNVAL)) { 1830 bgp_fsm(p, EVNT_CON_FATAL); 1831 return (1); 1832 } 1833 return (0); 1834 } 1835 1836 if (pfd->revents & POLLHUP) { 1837 bgp_fsm(p, EVNT_CON_CLOSED); 1838 return (1); 1839 } 1840 if (pfd->revents & (POLLERR|POLLNVAL)) { 1841 bgp_fsm(p, EVNT_CON_FATAL); 1842 return (1); 1843 } 1844 1845 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1846 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1847 if (error == 0) 1848 log_peer_warnx(&p->conf, "Connection closed"); 1849 else if (error == -1) 1850 log_peer_warn(&p->conf, "write error"); 1851 bgp_fsm(p, EVNT_CON_FATAL); 1852 return (1); 1853 } 1854 p->stats.last_write = getmonotime(); 1855 if (p->holdtime > 0) 1856 timer_set(&p->timers, Timer_SendHold, 1857 p->holdtime < INTERVAL_HOLD ? INTERVAL_HOLD : 1858 p->holdtime); 1859 if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) { 1860 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) 1861 log_peer_warn(&p->conf, "imsg_compose XON"); 1862 else 1863 p->throttled = 0; 1864 } 1865 if (!(pfd->revents & POLLIN)) 1866 return (1); 1867 } 1868 1869 if (p->rbuf && pfd->revents & POLLIN) { 1870 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1871 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1872 if (errno != EINTR && errno != EAGAIN) { 1873 log_peer_warn(&p->conf, "read error"); 1874 bgp_fsm(p, EVNT_CON_FATAL); 1875 } 1876 return (1); 1877 } 1878 if (n == 0) { /* connection closed */ 1879 bgp_fsm(p, EVNT_CON_CLOSED); 1880 return (1); 1881 } 1882 1883 p->rbuf->wpos += n; 1884 p->stats.last_read = getmonotime(); 1885 return (1); 1886 } 1887 return (0); 1888 } 1889 1890 void 1891 session_process_msg(struct peer *p) 1892 { 1893 struct mrt *mrt; 1894 ssize_t rpos, av, left; 1895 int processed = 0; 1896 uint16_t msglen; 1897 uint8_t msgtype; 1898 1899 rpos = 0; 1900 av = p->rbuf->wpos; 1901 p->rpending = 0; 1902 1903 /* 1904 * session might drop to IDLE -> buffers deallocated 1905 * we MUST check rbuf != NULL before use 1906 */ 1907 for (;;) { 1908 if (p->rbuf == NULL) 1909 return; 1910 if (rpos + MSGSIZE_HEADER > av) 1911 break; 1912 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 1913 &msgtype) == -1) 1914 return; 1915 if (rpos + msglen > av) 1916 break; 1917 p->rbuf->rptr = p->rbuf->buf + rpos; 1918 1919 /* dump to MRT as soon as we have a full packet */ 1920 LIST_FOREACH(mrt, &mrthead, entry) { 1921 if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE && 1922 mrt->type == MRT_UPDATE_IN))) 1923 continue; 1924 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1925 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1926 mrt->group_id == p->conf.groupid)) 1927 mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p, 1928 msgtype); 1929 } 1930 1931 switch (msgtype) { 1932 case OPEN: 1933 bgp_fsm(p, EVNT_RCVD_OPEN); 1934 p->stats.msg_rcvd_open++; 1935 break; 1936 case UPDATE: 1937 bgp_fsm(p, EVNT_RCVD_UPDATE); 1938 p->stats.msg_rcvd_update++; 1939 break; 1940 case NOTIFICATION: 1941 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 1942 p->stats.msg_rcvd_notification++; 1943 break; 1944 case KEEPALIVE: 1945 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 1946 p->stats.msg_rcvd_keepalive++; 1947 break; 1948 case RREFRESH: 1949 parse_rrefresh(p); 1950 p->stats.msg_rcvd_rrefresh++; 1951 break; 1952 default: /* cannot happen */ 1953 session_notification(p, ERR_HEADER, ERR_HDR_TYPE, 1954 &msgtype, 1); 1955 log_warnx("received message with unknown type %u", 1956 msgtype); 1957 bgp_fsm(p, EVNT_CON_FATAL); 1958 } 1959 rpos += msglen; 1960 if (++processed > MSG_PROCESS_LIMIT) { 1961 p->rpending = 1; 1962 break; 1963 } 1964 } 1965 1966 if (rpos < av) { 1967 left = av - rpos; 1968 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 1969 p->rbuf->wpos = left; 1970 } else 1971 p->rbuf->wpos = 0; 1972 } 1973 1974 int 1975 parse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type) 1976 { 1977 u_char *p; 1978 uint16_t olen; 1979 static const uint8_t marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff, 1980 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 1981 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 1982 1983 /* caller MUST make sure we are getting 19 bytes! */ 1984 p = data; 1985 if (memcmp(p, marker, sizeof(marker))) { 1986 log_peer_warnx(&peer->conf, "sync error"); 1987 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0); 1988 bgp_fsm(peer, EVNT_CON_FATAL); 1989 return (-1); 1990 } 1991 p += MSGSIZE_HEADER_MARKER; 1992 1993 memcpy(&olen, p, 2); 1994 *len = ntohs(olen); 1995 p += 2; 1996 memcpy(type, p, 1); 1997 1998 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 1999 log_peer_warnx(&peer->conf, 2000 "received message: illegal length: %u byte", *len); 2001 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2002 &olen, sizeof(olen)); 2003 bgp_fsm(peer, EVNT_CON_FATAL); 2004 return (-1); 2005 } 2006 2007 switch (*type) { 2008 case OPEN: 2009 if (*len < MSGSIZE_OPEN_MIN) { 2010 log_peer_warnx(&peer->conf, 2011 "received OPEN: illegal len: %u byte", *len); 2012 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2013 &olen, sizeof(olen)); 2014 bgp_fsm(peer, EVNT_CON_FATAL); 2015 return (-1); 2016 } 2017 break; 2018 case NOTIFICATION: 2019 if (*len < MSGSIZE_NOTIFICATION_MIN) { 2020 log_peer_warnx(&peer->conf, 2021 "received NOTIFICATION: illegal len: %u byte", 2022 *len); 2023 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2024 &olen, sizeof(olen)); 2025 bgp_fsm(peer, EVNT_CON_FATAL); 2026 return (-1); 2027 } 2028 break; 2029 case UPDATE: 2030 if (*len < MSGSIZE_UPDATE_MIN) { 2031 log_peer_warnx(&peer->conf, 2032 "received UPDATE: illegal len: %u byte", *len); 2033 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2034 &olen, sizeof(olen)); 2035 bgp_fsm(peer, EVNT_CON_FATAL); 2036 return (-1); 2037 } 2038 break; 2039 case KEEPALIVE: 2040 if (*len != MSGSIZE_KEEPALIVE) { 2041 log_peer_warnx(&peer->conf, 2042 "received KEEPALIVE: illegal len: %u byte", *len); 2043 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2044 &olen, sizeof(olen)); 2045 bgp_fsm(peer, EVNT_CON_FATAL); 2046 return (-1); 2047 } 2048 break; 2049 case RREFRESH: 2050 if (*len < MSGSIZE_RREFRESH_MIN) { 2051 log_peer_warnx(&peer->conf, 2052 "received RREFRESH: illegal len: %u byte", *len); 2053 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2054 &olen, sizeof(olen)); 2055 bgp_fsm(peer, EVNT_CON_FATAL); 2056 return (-1); 2057 } 2058 break; 2059 default: 2060 log_peer_warnx(&peer->conf, 2061 "received msg with unknown type %u", *type); 2062 session_notification(peer, ERR_HEADER, ERR_HDR_TYPE, 2063 type, 1); 2064 bgp_fsm(peer, EVNT_CON_FATAL); 2065 return (-1); 2066 } 2067 return (0); 2068 } 2069 2070 int 2071 parse_open(struct peer *peer) 2072 { 2073 u_char *p, *op_val; 2074 uint8_t version, rversion; 2075 uint16_t short_as, msglen; 2076 uint16_t holdtime, oholdtime, myholdtime; 2077 uint32_t as, bgpid; 2078 uint16_t optparamlen, extlen, plen, op_len; 2079 uint8_t op_type, suberr = 0; 2080 2081 p = peer->rbuf->rptr; 2082 p += MSGSIZE_HEADER_MARKER; 2083 memcpy(&msglen, p, sizeof(msglen)); 2084 msglen = ntohs(msglen); 2085 2086 p = peer->rbuf->rptr; 2087 p += MSGSIZE_HEADER; /* header is already checked */ 2088 2089 memcpy(&version, p, sizeof(version)); 2090 p += sizeof(version); 2091 2092 if (version != BGP_VERSION) { 2093 log_peer_warnx(&peer->conf, 2094 "peer wants unrecognized version %u", version); 2095 if (version > BGP_VERSION) 2096 rversion = version - BGP_VERSION; 2097 else 2098 rversion = BGP_VERSION; 2099 session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION, 2100 &rversion, sizeof(rversion)); 2101 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2102 return (-1); 2103 } 2104 2105 memcpy(&short_as, p, sizeof(short_as)); 2106 p += sizeof(short_as); 2107 as = peer->short_as = ntohs(short_as); 2108 if (as == 0) { 2109 log_peer_warnx(&peer->conf, 2110 "peer requests unacceptable AS %u", as); 2111 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, 2112 NULL, 0); 2113 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2114 return (-1); 2115 } 2116 2117 memcpy(&oholdtime, p, sizeof(oholdtime)); 2118 p += sizeof(oholdtime); 2119 2120 holdtime = ntohs(oholdtime); 2121 if (holdtime && holdtime < peer->conf.min_holdtime) { 2122 log_peer_warnx(&peer->conf, 2123 "peer requests unacceptable holdtime %u", holdtime); 2124 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, 2125 NULL, 0); 2126 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2127 return (-1); 2128 } 2129 2130 myholdtime = peer->conf.holdtime; 2131 if (!myholdtime) 2132 myholdtime = conf->holdtime; 2133 if (holdtime < myholdtime) 2134 peer->holdtime = holdtime; 2135 else 2136 peer->holdtime = myholdtime; 2137 2138 memcpy(&bgpid, p, sizeof(bgpid)); 2139 p += sizeof(bgpid); 2140 2141 /* check bgpid for validity - just disallow 0 */ 2142 if (ntohl(bgpid) == 0) { 2143 log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable", 2144 ntohl(bgpid)); 2145 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2146 NULL, 0); 2147 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2148 return (-1); 2149 } 2150 peer->remote_bgpid = bgpid; 2151 2152 extlen = 0; 2153 optparamlen = *p++; 2154 2155 if (optparamlen == 0) { 2156 if (msglen != MSGSIZE_OPEN_MIN) { 2157 bad_len: 2158 log_peer_warnx(&peer->conf, 2159 "corrupt OPEN message received: length mismatch"); 2160 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2161 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2162 return (-1); 2163 } 2164 } else { 2165 if (msglen < MSGSIZE_OPEN_MIN + 1) 2166 goto bad_len; 2167 2168 op_type = *p; 2169 if (op_type == OPT_PARAM_EXT_LEN) { 2170 p++; 2171 memcpy(&optparamlen, p, sizeof(optparamlen)); 2172 optparamlen = ntohs(optparamlen); 2173 p += sizeof(optparamlen); 2174 extlen = 1; 2175 } 2176 2177 /* RFC9020 encoding has 3 extra bytes */ 2178 if (optparamlen + 3 * extlen != msglen - MSGSIZE_OPEN_MIN) 2179 goto bad_len; 2180 } 2181 2182 plen = optparamlen; 2183 while (plen > 0) { 2184 if (plen < 2 + extlen) 2185 goto bad_len; 2186 2187 memcpy(&op_type, p, sizeof(op_type)); 2188 p += sizeof(op_type); 2189 plen -= sizeof(op_type); 2190 if (!extlen) { 2191 op_len = *p++; 2192 plen--; 2193 } else { 2194 memcpy(&op_len, p, sizeof(op_len)); 2195 op_len = ntohs(op_len); 2196 p += sizeof(op_len); 2197 plen -= sizeof(op_len); 2198 } 2199 if (op_len > 0) { 2200 if (plen < op_len) 2201 goto bad_len; 2202 op_val = p; 2203 p += op_len; 2204 plen -= op_len; 2205 } else 2206 op_val = NULL; 2207 2208 switch (op_type) { 2209 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2210 if (parse_capabilities(peer, op_val, op_len, 2211 &as) == -1) { 2212 session_notification(peer, ERR_OPEN, 0, 2213 NULL, 0); 2214 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2215 return (-1); 2216 } 2217 break; 2218 case OPT_PARAM_AUTH: /* deprecated */ 2219 default: 2220 /* 2221 * unsupported type 2222 * the RFCs tell us to leave the data section empty 2223 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT. 2224 * How the peer should know _which_ optional parameter 2225 * we don't support is beyond me. 2226 */ 2227 log_peer_warnx(&peer->conf, 2228 "received OPEN message with unsupported optional " 2229 "parameter: type %u", op_type); 2230 session_notification(peer, ERR_OPEN, ERR_OPEN_OPT, 2231 NULL, 0); 2232 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2233 /* no punish */ 2234 timer_set(&peer->timers, Timer_IdleHold, 0); 2235 peer->IdleHoldTime /= 2; 2236 return (-1); 2237 } 2238 } 2239 2240 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2241 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2242 peer->conf.remote_as = as; 2243 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); 2244 if (!peer->conf.ebgp) 2245 /* force enforce_as off for iBGP sessions */ 2246 peer->conf.enforce_as = ENFORCE_AS_OFF; 2247 } 2248 2249 if (peer->conf.remote_as != as) { 2250 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2251 log_as(as)); 2252 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0); 2253 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2254 return (-1); 2255 } 2256 2257 /* on iBGP sessions check for bgpid collision */ 2258 if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) { 2259 log_peer_warnx(&peer->conf, "peer BGPID %u conflicts with ours", 2260 ntohl(bgpid)); 2261 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2262 NULL, 0); 2263 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2264 return (-1); 2265 } 2266 2267 if (capa_neg_calc(peer, &suberr) == -1) { 2268 session_notification(peer, ERR_OPEN, suberr, NULL, 0); 2269 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2270 return (-1); 2271 } 2272 2273 return (0); 2274 } 2275 2276 int 2277 parse_update(struct peer *peer) 2278 { 2279 u_char *p; 2280 uint16_t datalen; 2281 2282 /* 2283 * we pass the message verbatim to the rde. 2284 * in case of errors the whole session is reset with a 2285 * notification anyway, we only need to know the peer 2286 */ 2287 p = peer->rbuf->rptr; 2288 p += MSGSIZE_HEADER_MARKER; 2289 memcpy(&datalen, p, sizeof(datalen)); 2290 datalen = ntohs(datalen); 2291 2292 p = peer->rbuf->rptr; 2293 p += MSGSIZE_HEADER; /* header is already checked */ 2294 datalen -= MSGSIZE_HEADER; 2295 2296 if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1) 2297 return (-1); 2298 2299 return (0); 2300 } 2301 2302 int 2303 parse_rrefresh(struct peer *peer) 2304 { 2305 struct route_refresh rr; 2306 uint16_t afi, datalen; 2307 uint8_t aid, safi, subtype; 2308 u_char *p; 2309 2310 p = peer->rbuf->rptr; 2311 p += MSGSIZE_HEADER_MARKER; 2312 memcpy(&datalen, p, sizeof(datalen)); 2313 datalen = ntohs(datalen); 2314 2315 p = peer->rbuf->rptr; 2316 p += MSGSIZE_HEADER; /* header is already checked */ 2317 2318 /* 2319 * We could check if we actually announced the capability but 2320 * as long as the message is correctly encoded we don't care. 2321 */ 2322 2323 /* afi, 2 byte */ 2324 memcpy(&afi, p, sizeof(afi)); 2325 afi = ntohs(afi); 2326 p += 2; 2327 /* subtype, 1 byte */ 2328 subtype = *p; 2329 p += 1; 2330 /* safi, 1 byte */ 2331 safi = *p; 2332 2333 /* check subtype if peer announced enhanced route refresh */ 2334 if (peer->capa.neg.enhanced_rr) { 2335 switch (subtype) { 2336 case ROUTE_REFRESH_REQUEST: 2337 /* no ORF support, so no oversized RREFRESH msgs */ 2338 if (datalen != MSGSIZE_RREFRESH) { 2339 log_peer_warnx(&peer->conf, 2340 "received RREFRESH: illegal len: %u byte", 2341 datalen); 2342 datalen = htons(datalen); 2343 session_notification(peer, ERR_HEADER, 2344 ERR_HDR_LEN, &datalen, sizeof(datalen)); 2345 bgp_fsm(peer, EVNT_CON_FATAL); 2346 return (-1); 2347 } 2348 peer->stats.refresh_rcvd_req++; 2349 break; 2350 case ROUTE_REFRESH_BEGIN_RR: 2351 case ROUTE_REFRESH_END_RR: 2352 /* special handling for RFC7313 */ 2353 if (datalen != MSGSIZE_RREFRESH) { 2354 log_peer_warnx(&peer->conf, 2355 "received RREFRESH: illegal len: %u byte", 2356 datalen); 2357 p = peer->rbuf->rptr; 2358 p += MSGSIZE_HEADER; 2359 datalen -= MSGSIZE_HEADER; 2360 session_notification(peer, ERR_RREFRESH, 2361 ERR_RR_INV_LEN, p, datalen); 2362 bgp_fsm(peer, EVNT_CON_FATAL); 2363 return (-1); 2364 } 2365 if (subtype == ROUTE_REFRESH_BEGIN_RR) 2366 peer->stats.refresh_rcvd_borr++; 2367 else 2368 peer->stats.refresh_rcvd_eorr++; 2369 break; 2370 default: 2371 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2372 "bad subtype %d", subtype); 2373 return (0); 2374 } 2375 } else { 2376 /* force subtype to default */ 2377 subtype = ROUTE_REFRESH_REQUEST; 2378 peer->stats.refresh_rcvd_req++; 2379 } 2380 2381 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2382 if (afi2aid(afi, safi, &aid) == -1) { 2383 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2384 "invalid afi/safi pair"); 2385 return (0); 2386 } 2387 2388 if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) { 2389 log_peer_warnx(&peer->conf, "peer sent unexpected refresh"); 2390 return (0); 2391 } 2392 2393 rr.aid = aid; 2394 rr.subtype = subtype; 2395 2396 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1) 2397 return (-1); 2398 2399 return (0); 2400 } 2401 2402 int 2403 parse_notification(struct peer *peer) 2404 { 2405 u_char *p; 2406 uint16_t datalen; 2407 uint8_t errcode; 2408 uint8_t subcode; 2409 uint8_t capa_code; 2410 uint8_t capa_len; 2411 size_t reason_len; 2412 uint8_t i; 2413 2414 /* just log */ 2415 p = peer->rbuf->rptr; 2416 p += MSGSIZE_HEADER_MARKER; 2417 memcpy(&datalen, p, sizeof(datalen)); 2418 datalen = ntohs(datalen); 2419 2420 p = peer->rbuf->rptr; 2421 p += MSGSIZE_HEADER; /* header is already checked */ 2422 datalen -= MSGSIZE_HEADER; 2423 2424 memcpy(&errcode, p, sizeof(errcode)); 2425 p += sizeof(errcode); 2426 datalen -= sizeof(errcode); 2427 2428 memcpy(&subcode, p, sizeof(subcode)); 2429 p += sizeof(subcode); 2430 datalen -= sizeof(subcode); 2431 2432 log_notification(peer, errcode, subcode, p, datalen, "received"); 2433 peer->errcnt++; 2434 peer->stats.last_rcvd_errcode = errcode; 2435 peer->stats.last_rcvd_suberr = subcode; 2436 2437 if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { 2438 if (datalen == 0) { /* zebra likes to send those.. humbug */ 2439 log_peer_warnx(&peer->conf, "received \"unsupported " 2440 "capability\" notification without data part, " 2441 "disabling capability announcements altogether"); 2442 session_capa_ann_none(peer); 2443 } 2444 2445 while (datalen > 0) { 2446 if (datalen < 2) { 2447 log_peer_warnx(&peer->conf, 2448 "parse_notification: " 2449 "expect len >= 2, len is %u", datalen); 2450 return (-1); 2451 } 2452 memcpy(&capa_code, p, sizeof(capa_code)); 2453 p += sizeof(capa_code); 2454 datalen -= sizeof(capa_code); 2455 memcpy(&capa_len, p, sizeof(capa_len)); 2456 p += sizeof(capa_len); 2457 datalen -= sizeof(capa_len); 2458 if (datalen < capa_len) { 2459 log_peer_warnx(&peer->conf, 2460 "parse_notification: capa_len %u exceeds " 2461 "remaining msg length %u", capa_len, 2462 datalen); 2463 return (-1); 2464 } 2465 p += capa_len; 2466 datalen -= capa_len; 2467 switch (capa_code) { 2468 case CAPA_MP: 2469 for (i = 0; i < AID_MAX; i++) 2470 peer->capa.ann.mp[i] = 0; 2471 log_peer_warnx(&peer->conf, 2472 "disabling multiprotocol capability"); 2473 break; 2474 case CAPA_REFRESH: 2475 peer->capa.ann.refresh = 0; 2476 log_peer_warnx(&peer->conf, 2477 "disabling route refresh capability"); 2478 break; 2479 case CAPA_RESTART: 2480 peer->capa.ann.grestart.restart = 0; 2481 log_peer_warnx(&peer->conf, 2482 "disabling restart capability"); 2483 break; 2484 case CAPA_AS4BYTE: 2485 peer->capa.ann.as4byte = 0; 2486 log_peer_warnx(&peer->conf, 2487 "disabling 4-byte AS num capability"); 2488 break; 2489 case CAPA_ADD_PATH: 2490 memset(peer->capa.ann.add_path, 0, 2491 sizeof(peer->capa.ann.add_path)); 2492 log_peer_warnx(&peer->conf, 2493 "disabling ADD-PATH capability"); 2494 break; 2495 case CAPA_ENHANCED_RR: 2496 peer->capa.ann.enhanced_rr = 0; 2497 log_peer_warnx(&peer->conf, 2498 "disabling enhanced route refresh " 2499 "capability"); 2500 break; 2501 default: /* should not happen... */ 2502 log_peer_warnx(&peer->conf, "received " 2503 "\"unsupported capability\" notification " 2504 "for unknown capability %u, disabling " 2505 "capability announcements altogether", 2506 capa_code); 2507 session_capa_ann_none(peer); 2508 break; 2509 } 2510 } 2511 2512 return (1); 2513 } 2514 2515 if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) { 2516 session_capa_ann_none(peer); 2517 return (1); 2518 } 2519 2520 if (errcode == ERR_CEASE && 2521 (subcode == ERR_CEASE_ADMIN_DOWN || 2522 subcode == ERR_CEASE_ADMIN_RESET)) { 2523 if (datalen > 1) { 2524 reason_len = *p++; 2525 datalen--; 2526 if (datalen < reason_len) { 2527 log_peer_warnx(&peer->conf, 2528 "received truncated shutdown reason"); 2529 return (0); 2530 } 2531 if (reason_len > REASON_LEN - 1) { 2532 log_peer_warnx(&peer->conf, 2533 "received overly long shutdown reason"); 2534 return (0); 2535 } 2536 memcpy(peer->stats.last_reason, p, reason_len); 2537 peer->stats.last_reason[reason_len] = '\0'; 2538 log_peer_warnx(&peer->conf, 2539 "received shutdown reason: \"%s\"", 2540 log_reason(peer->stats.last_reason)); 2541 p += reason_len; 2542 datalen -= reason_len; 2543 } 2544 } 2545 2546 return (0); 2547 } 2548 2549 int 2550 parse_capabilities(struct peer *peer, u_char *d, uint16_t dlen, uint32_t *as) 2551 { 2552 u_char *capa_val; 2553 uint32_t remote_as; 2554 uint16_t len; 2555 uint16_t afi; 2556 uint16_t gr_header; 2557 uint8_t safi; 2558 uint8_t aid; 2559 uint8_t flags; 2560 uint8_t capa_code; 2561 uint8_t capa_len; 2562 uint8_t i; 2563 2564 len = dlen; 2565 while (len > 0) { 2566 if (len < 2) { 2567 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2568 "length: %u, too short", len); 2569 return (-1); 2570 } 2571 memcpy(&capa_code, d, sizeof(capa_code)); 2572 d += sizeof(capa_code); 2573 len -= sizeof(capa_code); 2574 memcpy(&capa_len, d, sizeof(capa_len)); 2575 d += sizeof(capa_len); 2576 len -= sizeof(capa_len); 2577 if (capa_len > 0) { 2578 if (len < capa_len) { 2579 log_peer_warnx(&peer->conf, 2580 "Bad capabilities attr length: " 2581 "len %u smaller than capa_len %u", 2582 len, capa_len); 2583 return (-1); 2584 } 2585 capa_val = d; 2586 d += capa_len; 2587 len -= capa_len; 2588 } else 2589 capa_val = NULL; 2590 2591 switch (capa_code) { 2592 case CAPA_MP: /* RFC 4760 */ 2593 if (capa_len != 4) { 2594 log_peer_warnx(&peer->conf, 2595 "Bad multi protocol capability length: " 2596 "%u", capa_len); 2597 break; 2598 } 2599 memcpy(&afi, capa_val, sizeof(afi)); 2600 afi = ntohs(afi); 2601 memcpy(&safi, capa_val + 3, sizeof(safi)); 2602 if (afi2aid(afi, safi, &aid) == -1) { 2603 log_peer_warnx(&peer->conf, 2604 "Received multi protocol capability: " 2605 " unknown AFI %u, safi %u pair", 2606 afi, safi); 2607 break; 2608 } 2609 peer->capa.peer.mp[aid] = 1; 2610 break; 2611 case CAPA_REFRESH: 2612 peer->capa.peer.refresh = 1; 2613 break; 2614 case CAPA_ROLE: 2615 if (capa_len != 1) { 2616 log_peer_warnx(&peer->conf, 2617 "Bad open policy capability length: " 2618 "%u", capa_len); 2619 break; 2620 } 2621 peer->capa.peer.role_ena = 1; 2622 peer->capa.peer.role = *capa_val; 2623 break; 2624 case CAPA_RESTART: 2625 if (capa_len == 2) { 2626 /* peer only supports EoR marker */ 2627 peer->capa.peer.grestart.restart = 1; 2628 peer->capa.peer.grestart.timeout = 0; 2629 break; 2630 } else if (capa_len % 4 != 2) { 2631 log_peer_warnx(&peer->conf, 2632 "Bad graceful restart capability length: " 2633 "%u", capa_len); 2634 peer->capa.peer.grestart.restart = 0; 2635 peer->capa.peer.grestart.timeout = 0; 2636 break; 2637 } 2638 2639 memcpy(&gr_header, capa_val, sizeof(gr_header)); 2640 gr_header = ntohs(gr_header); 2641 peer->capa.peer.grestart.timeout = 2642 gr_header & CAPA_GR_TIMEMASK; 2643 if (peer->capa.peer.grestart.timeout == 0) { 2644 log_peer_warnx(&peer->conf, "Received " 2645 "graceful restart timeout is zero"); 2646 peer->capa.peer.grestart.restart = 0; 2647 break; 2648 } 2649 2650 for (i = 2; i <= capa_len - 4; i += 4) { 2651 memcpy(&afi, capa_val + i, sizeof(afi)); 2652 afi = ntohs(afi); 2653 safi = capa_val[i + 2]; 2654 flags = capa_val[i + 3]; 2655 if (afi2aid(afi, safi, &aid) == -1) { 2656 log_peer_warnx(&peer->conf, 2657 "Received graceful restart capa: " 2658 " unknown AFI %u, safi %u pair", 2659 afi, safi); 2660 continue; 2661 } 2662 peer->capa.peer.grestart.flags[aid] |= 2663 CAPA_GR_PRESENT; 2664 if (flags & CAPA_GR_F_FLAG) 2665 peer->capa.peer.grestart.flags[aid] |= 2666 CAPA_GR_FORWARD; 2667 if (gr_header & CAPA_GR_R_FLAG) 2668 peer->capa.peer.grestart.flags[aid] |= 2669 CAPA_GR_RESTART; 2670 peer->capa.peer.grestart.restart = 2; 2671 } 2672 break; 2673 case CAPA_AS4BYTE: 2674 if (capa_len != 4) { 2675 log_peer_warnx(&peer->conf, 2676 "Bad AS4BYTE capability length: " 2677 "%u", capa_len); 2678 peer->capa.peer.as4byte = 0; 2679 break; 2680 } 2681 memcpy(&remote_as, capa_val, sizeof(remote_as)); 2682 *as = ntohl(remote_as); 2683 if (*as == 0) { 2684 log_peer_warnx(&peer->conf, 2685 "peer requests unacceptable AS %u", *as); 2686 session_notification(peer, ERR_OPEN, 2687 ERR_OPEN_AS, NULL, 0); 2688 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2689 return (-1); 2690 } 2691 peer->capa.peer.as4byte = 1; 2692 break; 2693 case CAPA_ADD_PATH: 2694 if (capa_len % 4 != 0) { 2695 log_peer_warnx(&peer->conf, 2696 "Bad ADD-PATH capability length: " 2697 "%u", capa_len); 2698 memset(peer->capa.peer.add_path, 0, 2699 sizeof(peer->capa.peer.add_path)); 2700 break; 2701 } 2702 for (i = 0; i <= capa_len - 4; i += 4) { 2703 memcpy(&afi, capa_val + i, sizeof(afi)); 2704 afi = ntohs(afi); 2705 safi = capa_val[i + 2]; 2706 flags = capa_val[i + 3]; 2707 if (afi2aid(afi, safi, &aid) == -1) { 2708 log_peer_warnx(&peer->conf, 2709 "Received ADD-PATH capa: " 2710 " unknown AFI %u, safi %u pair", 2711 afi, safi); 2712 memset(peer->capa.peer.add_path, 0, 2713 sizeof(peer->capa.peer.add_path)); 2714 break; 2715 } 2716 if (flags & ~CAPA_AP_BIDIR) { 2717 log_peer_warnx(&peer->conf, 2718 "Received ADD-PATH capa: " 2719 " bad flags %x", flags); 2720 memset(peer->capa.peer.add_path, 0, 2721 sizeof(peer->capa.peer.add_path)); 2722 break; 2723 } 2724 peer->capa.peer.add_path[aid] = flags; 2725 } 2726 break; 2727 case CAPA_ENHANCED_RR: 2728 peer->capa.peer.enhanced_rr = 1; 2729 break; 2730 default: 2731 break; 2732 } 2733 } 2734 2735 return (0); 2736 } 2737 2738 int 2739 capa_neg_calc(struct peer *p, uint8_t *suberr) 2740 { 2741 uint8_t i, hasmp = 0; 2742 2743 /* a capability is accepted only if both sides announced it */ 2744 2745 p->capa.neg.refresh = 2746 (p->capa.ann.refresh && p->capa.peer.refresh) != 0; 2747 p->capa.neg.enhanced_rr = 2748 (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; 2749 2750 p->capa.neg.as4byte = 2751 (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; 2752 2753 /* MP: both side must agree on the AFI,SAFI pair */ 2754 for (i = 0; i < AID_MAX; i++) { 2755 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) 2756 p->capa.neg.mp[i] = 1; 2757 else 2758 p->capa.neg.mp[i] = 0; 2759 if (p->capa.ann.mp[i]) 2760 hasmp = 1; 2761 } 2762 /* if no MP capability present default to IPv4 unicast mode */ 2763 if (!hasmp) 2764 p->capa.neg.mp[AID_INET] = 1; 2765 2766 /* 2767 * graceful restart: the peer capabilities are of interest here. 2768 * It is necessary to compare the new values with the previous ones 2769 * and act acordingly. AFI/SAFI that are not part in the MP capability 2770 * are treated as not being present. 2771 * Also make sure that a flush happens if the session stopped 2772 * supporting graceful restart. 2773 */ 2774 2775 for (i = 0; i < AID_MAX; i++) { 2776 int8_t negflags; 2777 2778 /* disable GR if the AFI/SAFI is not present */ 2779 if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2780 p->capa.neg.mp[i] == 0)) 2781 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2782 /* look at current GR state and decide what to do */ 2783 negflags = p->capa.neg.grestart.flags[i]; 2784 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2785 if (negflags & CAPA_GR_RESTARTING) { 2786 if (p->capa.ann.grestart.restart != 0 && 2787 p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { 2788 p->capa.neg.grestart.flags[i] |= 2789 CAPA_GR_RESTARTING; 2790 } else { 2791 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 2792 &i, sizeof(i)) == -1) { 2793 log_peer_warnx(&p->conf, 2794 "imsg send failed"); 2795 return (-1); 2796 } 2797 log_peer_warnx(&p->conf, "graceful restart of " 2798 "%s, not restarted, flushing", aid2str(i)); 2799 } 2800 } 2801 } 2802 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2803 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2804 if (p->capa.ann.grestart.restart == 0) 2805 p->capa.neg.grestart.restart = 0; 2806 2807 2808 /* 2809 * ADD-PATH: set only those bits where both sides agree. 2810 * For this compare our send bit with the recv bit from the peer 2811 * and vice versa. 2812 * The flags are stored from this systems view point. 2813 */ 2814 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); 2815 if (p->capa.ann.add_path[0]) { 2816 for (i = AID_MIN; i < AID_MAX; i++) { 2817 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) && 2818 (p->capa.peer.add_path[i] & CAPA_AP_SEND)) { 2819 p->capa.neg.add_path[i] |= CAPA_AP_RECV; 2820 p->capa.neg.add_path[0] |= CAPA_AP_RECV; 2821 } 2822 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) && 2823 (p->capa.peer.add_path[i] & CAPA_AP_RECV)) { 2824 p->capa.neg.add_path[i] |= CAPA_AP_SEND; 2825 p->capa.neg.add_path[0] |= CAPA_AP_SEND; 2826 } 2827 } 2828 } 2829 2830 /* 2831 * Open policy: check that the policy is sensible. 2832 * 2833 * Make sure that the roles match and set the negotiated capability 2834 * to the role of the peer. So the RDE can inject the OTC attribute. 2835 * See RFC 9234, section 4.2. 2836 */ 2837 if (p->capa.ann.role_ena != 0 && p->capa.peer.role_ena != 0) { 2838 switch (p->capa.ann.role) { 2839 case CAPA_ROLE_PROVIDER: 2840 if (p->capa.peer.role != CAPA_ROLE_CUSTOMER) 2841 goto fail; 2842 break; 2843 case CAPA_ROLE_RS: 2844 if (p->capa.peer.role != CAPA_ROLE_RS_CLIENT) 2845 goto fail; 2846 break; 2847 case CAPA_ROLE_RS_CLIENT: 2848 if (p->capa.peer.role != CAPA_ROLE_RS) 2849 goto fail; 2850 break; 2851 case CAPA_ROLE_CUSTOMER: 2852 if (p->capa.peer.role != CAPA_ROLE_PROVIDER) 2853 goto fail; 2854 break; 2855 case CAPA_ROLE_PEER: 2856 if (p->capa.peer.role != CAPA_ROLE_PEER) 2857 goto fail; 2858 break; 2859 default: 2860 fail: 2861 log_peer_warnx(&p->conf, "open policy role mismatch: " 2862 "%s vs %s", log_policy(p->capa.ann.role), 2863 log_policy(p->capa.peer.role)); 2864 *suberr = ERR_OPEN_ROLE; 2865 return (-1); 2866 } 2867 p->capa.neg.role_ena = 1; 2868 p->capa.neg.role = p->capa.peer.role; 2869 } else if (p->capa.ann.role_ena == 2) { 2870 /* enforce presence of open policy role capability */ 2871 log_peer_warnx(&p->conf, "open policy role enforced but " 2872 "not present"); 2873 *suberr = ERR_OPEN_ROLE; 2874 return (-1); 2875 } 2876 2877 return (0); 2878 } 2879 2880 void 2881 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) 2882 { 2883 struct imsg imsg; 2884 struct mrt xmrt; 2885 struct route_refresh rr; 2886 struct mrt *mrt; 2887 struct imsgbuf *i; 2888 struct peer *p; 2889 struct listen_addr *la, *nla; 2890 struct session_dependon *sdon; 2891 u_char *data; 2892 int n, fd, depend_ok, restricted; 2893 uint16_t t; 2894 uint8_t aid, errcode, subcode; 2895 2896 while (ibuf) { 2897 if ((n = imsg_get(ibuf, &imsg)) == -1) 2898 fatal("session_dispatch_imsg: imsg_get error"); 2899 2900 if (n == 0) 2901 break; 2902 2903 switch (imsg.hdr.type) { 2904 case IMSG_SOCKET_CONN: 2905 case IMSG_SOCKET_CONN_CTL: 2906 if (idx != PFD_PIPE_MAIN) 2907 fatalx("reconf request not from parent"); 2908 if ((fd = imsg.fd) == -1) { 2909 log_warnx("expected to receive imsg fd to " 2910 "RDE but didn't receive any"); 2911 break; 2912 } 2913 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2914 fatal(NULL); 2915 imsg_init(i, fd); 2916 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 2917 if (ibuf_rde) { 2918 log_warnx("Unexpected imsg connection " 2919 "to RDE received"); 2920 msgbuf_clear(&ibuf_rde->w); 2921 free(ibuf_rde); 2922 } 2923 ibuf_rde = i; 2924 } else { 2925 if (ibuf_rde_ctl) { 2926 log_warnx("Unexpected imsg ctl " 2927 "connection to RDE received"); 2928 msgbuf_clear(&ibuf_rde_ctl->w); 2929 free(ibuf_rde_ctl); 2930 } 2931 ibuf_rde_ctl = i; 2932 } 2933 break; 2934 case IMSG_RECONF_CONF: 2935 if (idx != PFD_PIPE_MAIN) 2936 fatalx("reconf request not from parent"); 2937 nconf = new_config(); 2938 2939 copy_config(nconf, imsg.data); 2940 pending_reconf = 1; 2941 break; 2942 case IMSG_RECONF_PEER: 2943 if (idx != PFD_PIPE_MAIN) 2944 fatalx("reconf request not from parent"); 2945 if ((p = calloc(1, sizeof(struct peer))) == NULL) 2946 fatal("new_peer"); 2947 memcpy(&p->conf, imsg.data, sizeof(struct peer_config)); 2948 p->state = p->prev_state = STATE_NONE; 2949 p->reconf_action = RECONF_REINIT; 2950 if (RB_INSERT(peer_head, &nconf->peers, p) != NULL) 2951 fatalx("%s: peer tree is corrupt", __func__); 2952 break; 2953 case IMSG_RECONF_LISTENER: 2954 if (idx != PFD_PIPE_MAIN) 2955 fatalx("reconf request not from parent"); 2956 if (nconf == NULL) 2957 fatalx("IMSG_RECONF_LISTENER but no config"); 2958 nla = imsg.data; 2959 TAILQ_FOREACH(la, conf->listen_addrs, entry) 2960 if (!la_cmp(la, nla)) 2961 break; 2962 2963 if (la == NULL) { 2964 if (nla->reconf != RECONF_REINIT) 2965 fatalx("king bula sez: " 2966 "expected REINIT"); 2967 2968 if ((nla->fd = imsg.fd) == -1) 2969 log_warnx("expected to receive fd for " 2970 "%s but didn't receive any", 2971 log_sockaddr((struct sockaddr *) 2972 &nla->sa, nla->sa_len)); 2973 2974 la = calloc(1, sizeof(struct listen_addr)); 2975 if (la == NULL) 2976 fatal(NULL); 2977 memcpy(&la->sa, &nla->sa, sizeof(la->sa)); 2978 la->flags = nla->flags; 2979 la->fd = nla->fd; 2980 la->reconf = RECONF_REINIT; 2981 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 2982 entry); 2983 } else { 2984 if (nla->reconf != RECONF_KEEP) 2985 fatalx("king bula sez: expected KEEP"); 2986 la->reconf = RECONF_KEEP; 2987 } 2988 2989 break; 2990 case IMSG_RECONF_CTRL: 2991 if (idx != PFD_PIPE_MAIN) 2992 fatalx("reconf request not from parent"); 2993 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2994 sizeof(restricted)) 2995 fatalx("RECONF_CTRL imsg with wrong len"); 2996 memcpy(&restricted, imsg.data, sizeof(restricted)); 2997 if (imsg.fd == -1) { 2998 log_warnx("expected to receive fd for control " 2999 "socket but didn't receive any"); 3000 break; 3001 } 3002 if (restricted) { 3003 control_shutdown(rcsock); 3004 rcsock = imsg.fd; 3005 } else { 3006 control_shutdown(csock); 3007 csock = imsg.fd; 3008 } 3009 break; 3010 case IMSG_RECONF_DRAIN: 3011 switch (idx) { 3012 case PFD_PIPE_ROUTE: 3013 if (nconf != NULL) 3014 fatalx("got unexpected %s from RDE", 3015 "IMSG_RECONF_DONE"); 3016 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 3017 -1, NULL, 0); 3018 break; 3019 case PFD_PIPE_MAIN: 3020 if (nconf == NULL) 3021 fatalx("got unexpected %s from parent", 3022 "IMSG_RECONF_DONE"); 3023 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 3024 -1, NULL, 0); 3025 break; 3026 default: 3027 fatalx("reconf request not from parent or RDE"); 3028 } 3029 break; 3030 case IMSG_RECONF_DONE: 3031 if (idx != PFD_PIPE_MAIN) 3032 fatalx("reconf request not from parent"); 3033 if (nconf == NULL) 3034 fatalx("got IMSG_RECONF_DONE but no config"); 3035 copy_config(conf, nconf); 3036 merge_peers(conf, nconf); 3037 3038 /* delete old listeners */ 3039 for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; 3040 la = nla) { 3041 nla = TAILQ_NEXT(la, entry); 3042 if (la->reconf == RECONF_NONE) { 3043 log_info("not listening on %s any more", 3044 log_sockaddr((struct sockaddr *) 3045 &la->sa, la->sa_len)); 3046 TAILQ_REMOVE(conf->listen_addrs, la, 3047 entry); 3048 close(la->fd); 3049 free(la); 3050 } 3051 } 3052 3053 /* add new listeners */ 3054 TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs, 3055 entry); 3056 3057 setup_listeners(listener_cnt); 3058 free_config(nconf); 3059 nconf = NULL; 3060 pending_reconf = 0; 3061 log_info("SE reconfigured"); 3062 /* 3063 * IMSG_RECONF_DONE is sent when the RDE drained 3064 * the peer config sent in merge_peers(). 3065 */ 3066 break; 3067 case IMSG_SESSION_DEPENDON: 3068 if (idx != PFD_PIPE_MAIN) 3069 fatalx("IFINFO message not from parent"); 3070 if (imsg.hdr.len != IMSG_HEADER_SIZE + 3071 sizeof(struct session_dependon)) 3072 fatalx("DEPENDON imsg with wrong len"); 3073 sdon = imsg.data; 3074 depend_ok = sdon->depend_state; 3075 3076 RB_FOREACH(p, peer_head, &conf->peers) 3077 if (!strcmp(p->conf.if_depend, sdon->ifname)) { 3078 if (depend_ok && !p->depend_ok) { 3079 p->depend_ok = depend_ok; 3080 bgp_fsm(p, EVNT_START); 3081 } else if (!depend_ok && p->depend_ok) { 3082 p->depend_ok = depend_ok; 3083 session_stop(p, 3084 ERR_CEASE_OTHER_CHANGE); 3085 } 3086 } 3087 break; 3088 case IMSG_MRT_OPEN: 3089 case IMSG_MRT_REOPEN: 3090 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3091 sizeof(struct mrt)) { 3092 log_warnx("wrong imsg len"); 3093 break; 3094 } 3095 3096 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 3097 if ((xmrt.wbuf.fd = imsg.fd) == -1) 3098 log_warnx("expected to receive fd for mrt dump " 3099 "but didn't receive any"); 3100 3101 mrt = mrt_get(&mrthead, &xmrt); 3102 if (mrt == NULL) { 3103 /* new dump */ 3104 mrt = calloc(1, sizeof(struct mrt)); 3105 if (mrt == NULL) 3106 fatal("session_dispatch_imsg"); 3107 memcpy(mrt, &xmrt, sizeof(struct mrt)); 3108 TAILQ_INIT(&mrt->wbuf.bufs); 3109 LIST_INSERT_HEAD(&mrthead, mrt, entry); 3110 } else { 3111 /* old dump reopened */ 3112 close(mrt->wbuf.fd); 3113 mrt->wbuf.fd = xmrt.wbuf.fd; 3114 } 3115 break; 3116 case IMSG_MRT_CLOSE: 3117 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3118 sizeof(struct mrt)) { 3119 log_warnx("wrong imsg len"); 3120 break; 3121 } 3122 3123 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 3124 mrt = mrt_get(&mrthead, &xmrt); 3125 if (mrt != NULL) 3126 mrt_done(mrt); 3127 break; 3128 case IMSG_CTL_KROUTE: 3129 case IMSG_CTL_KROUTE_ADDR: 3130 case IMSG_CTL_SHOW_NEXTHOP: 3131 case IMSG_CTL_SHOW_INTERFACE: 3132 case IMSG_CTL_SHOW_FIB_TABLES: 3133 case IMSG_CTL_SHOW_RTR: 3134 case IMSG_CTL_SHOW_TIMER: 3135 if (idx != PFD_PIPE_MAIN) 3136 fatalx("ctl kroute request not from parent"); 3137 control_imsg_relay(&imsg); 3138 break; 3139 case IMSG_CTL_SHOW_RIB: 3140 case IMSG_CTL_SHOW_RIB_PREFIX: 3141 case IMSG_CTL_SHOW_RIB_COMMUNITIES: 3142 case IMSG_CTL_SHOW_RIB_ATTR: 3143 case IMSG_CTL_SHOW_RIB_MEM: 3144 case IMSG_CTL_SHOW_RIB_HASH: 3145 case IMSG_CTL_SHOW_NETWORK: 3146 case IMSG_CTL_SHOW_NEIGHBOR: 3147 case IMSG_CTL_SHOW_SET: 3148 if (idx != PFD_PIPE_ROUTE_CTL) 3149 fatalx("ctl rib request not from RDE"); 3150 control_imsg_relay(&imsg); 3151 break; 3152 case IMSG_CTL_END: 3153 case IMSG_CTL_RESULT: 3154 control_imsg_relay(&imsg); 3155 break; 3156 case IMSG_UPDATE: 3157 if (idx != PFD_PIPE_ROUTE) 3158 fatalx("update request not from RDE"); 3159 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3160 MAX_PKTSIZE - MSGSIZE_HEADER || 3161 imsg.hdr.len < IMSG_HEADER_SIZE + 3162 MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 3163 log_warnx("RDE sent invalid update"); 3164 else 3165 session_update(imsg.hdr.peerid, imsg.data, 3166 imsg.hdr.len - IMSG_HEADER_SIZE); 3167 break; 3168 case IMSG_UPDATE_ERR: 3169 if (idx != PFD_PIPE_ROUTE) 3170 fatalx("update request not from RDE"); 3171 if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) { 3172 log_warnx("RDE sent invalid notification"); 3173 break; 3174 } 3175 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3176 log_warnx("no such peer: id=%u", 3177 imsg.hdr.peerid); 3178 break; 3179 } 3180 data = imsg.data; 3181 errcode = *data++; 3182 subcode = *data++; 3183 3184 if (imsg.hdr.len == IMSG_HEADER_SIZE + 2) 3185 data = NULL; 3186 3187 session_notification(p, errcode, subcode, 3188 data, imsg.hdr.len - IMSG_HEADER_SIZE - 2); 3189 switch (errcode) { 3190 case ERR_CEASE: 3191 switch (subcode) { 3192 case ERR_CEASE_MAX_PREFIX: 3193 case ERR_CEASE_MAX_SENT_PREFIX: 3194 t = p->conf.max_out_prefix_restart; 3195 if (subcode == ERR_CEASE_MAX_PREFIX) 3196 t = p->conf.max_prefix_restart; 3197 3198 bgp_fsm(p, EVNT_STOP); 3199 if (t) 3200 timer_set(&p->timers, 3201 Timer_IdleHold, 60 * t); 3202 break; 3203 default: 3204 bgp_fsm(p, EVNT_CON_FATAL); 3205 break; 3206 } 3207 break; 3208 default: 3209 bgp_fsm(p, EVNT_CON_FATAL); 3210 break; 3211 } 3212 break; 3213 case IMSG_REFRESH: 3214 if (idx != PFD_PIPE_ROUTE) 3215 fatalx("route refresh request not from RDE"); 3216 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(rr)) { 3217 log_warnx("RDE sent invalid refresh msg"); 3218 break; 3219 } 3220 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3221 log_warnx("no such peer: id=%u", 3222 imsg.hdr.peerid); 3223 break; 3224 } 3225 memcpy(&rr, imsg.data, sizeof(rr)); 3226 if (rr.aid >= AID_MAX) 3227 fatalx("IMSG_REFRESH: bad AID"); 3228 session_rrefresh(p, rr.aid, rr.subtype); 3229 break; 3230 case IMSG_SESSION_RESTARTED: 3231 if (idx != PFD_PIPE_ROUTE) 3232 fatalx("update request not from RDE"); 3233 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { 3234 log_warnx("RDE sent invalid restart msg"); 3235 break; 3236 } 3237 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3238 log_warnx("no such peer: id=%u", 3239 imsg.hdr.peerid); 3240 break; 3241 } 3242 memcpy(&aid, imsg.data, sizeof(aid)); 3243 if (aid >= AID_MAX) 3244 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 3245 if (p->capa.neg.grestart.flags[aid] & 3246 CAPA_GR_RESTARTING) { 3247 log_peer_warnx(&p->conf, 3248 "graceful restart of %s finished", 3249 aid2str(aid)); 3250 p->capa.neg.grestart.flags[aid] &= 3251 ~CAPA_GR_RESTARTING; 3252 timer_stop(&p->timers, Timer_RestartTimeout); 3253 3254 /* signal back to RDE to cleanup stale routes */ 3255 if (imsg_rde(IMSG_SESSION_RESTARTED, 3256 imsg.hdr.peerid, &aid, sizeof(aid)) == -1) 3257 fatal("imsg_compose: " 3258 "IMSG_SESSION_RESTARTED"); 3259 } 3260 break; 3261 case IMSG_SESSION_DOWN: 3262 if (idx != PFD_PIPE_ROUTE) 3263 fatalx("update request not from RDE"); 3264 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3265 log_warnx("no such peer: id=%u", 3266 imsg.hdr.peerid); 3267 break; 3268 } 3269 session_stop(p, ERR_CEASE_ADMIN_DOWN); 3270 break; 3271 default: 3272 break; 3273 } 3274 imsg_free(&imsg); 3275 } 3276 } 3277 3278 int 3279 la_cmp(struct listen_addr *a, struct listen_addr *b) 3280 { 3281 struct sockaddr_in *in_a, *in_b; 3282 struct sockaddr_in6 *in6_a, *in6_b; 3283 3284 if (a->sa.ss_family != b->sa.ss_family) 3285 return (1); 3286 3287 switch (a->sa.ss_family) { 3288 case AF_INET: 3289 in_a = (struct sockaddr_in *)&a->sa; 3290 in_b = (struct sockaddr_in *)&b->sa; 3291 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 3292 return (1); 3293 if (in_a->sin_port != in_b->sin_port) 3294 return (1); 3295 break; 3296 case AF_INET6: 3297 in6_a = (struct sockaddr_in6 *)&a->sa; 3298 in6_b = (struct sockaddr_in6 *)&b->sa; 3299 if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 3300 sizeof(struct in6_addr))) 3301 return (1); 3302 if (in6_a->sin6_port != in6_b->sin6_port) 3303 return (1); 3304 break; 3305 default: 3306 fatal("king bula sez: unknown address family"); 3307 /* NOTREACHED */ 3308 } 3309 3310 return (0); 3311 } 3312 3313 struct peer * 3314 getpeerbydesc(struct bgpd_config *c, const char *descr) 3315 { 3316 struct peer *p, *res = NULL; 3317 int match = 0; 3318 3319 RB_FOREACH(p, peer_head, &c->peers) 3320 if (!strcmp(p->conf.descr, descr)) { 3321 res = p; 3322 match++; 3323 } 3324 3325 if (match > 1) 3326 log_info("neighbor description \"%s\" not unique, request " 3327 "aborted", descr); 3328 3329 if (match == 1) 3330 return (res); 3331 else 3332 return (NULL); 3333 } 3334 3335 struct peer * 3336 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip) 3337 { 3338 struct bgpd_addr addr; 3339 struct peer *p, *newpeer, *loose = NULL; 3340 uint32_t id; 3341 3342 sa2addr(ip, &addr, NULL); 3343 3344 /* we might want a more effective way to find peers by IP */ 3345 RB_FOREACH(p, peer_head, &c->peers) 3346 if (!p->conf.template && 3347 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3348 return (p); 3349 3350 /* try template matching */ 3351 RB_FOREACH(p, peer_head, &c->peers) 3352 if (p->conf.template && 3353 p->conf.remote_addr.aid == addr.aid && 3354 session_match_mask(p, &addr)) 3355 if (loose == NULL || loose->conf.remote_masklen < 3356 p->conf.remote_masklen) 3357 loose = p; 3358 3359 if (loose != NULL) { 3360 /* clone */ 3361 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3362 fatal(NULL); 3363 memcpy(newpeer, loose, sizeof(struct peer)); 3364 for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) { 3365 if (getpeerbyid(c, id) == NULL) /* we found a free id */ 3366 break; 3367 } 3368 newpeer->template = loose; 3369 session_template_clone(newpeer, ip, id, 0); 3370 newpeer->state = newpeer->prev_state = STATE_NONE; 3371 newpeer->reconf_action = RECONF_KEEP; 3372 newpeer->rbuf = NULL; 3373 newpeer->rpending = 0; 3374 init_peer(newpeer); 3375 bgp_fsm(newpeer, EVNT_START); 3376 if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL) 3377 fatalx("%s: peer tree is corrupt", __func__); 3378 return (newpeer); 3379 } 3380 3381 return (NULL); 3382 } 3383 3384 struct peer * 3385 getpeerbyid(struct bgpd_config *c, uint32_t peerid) 3386 { 3387 static struct peer lookup; 3388 3389 lookup.conf.id = peerid; 3390 3391 return RB_FIND(peer_head, &c->peers, &lookup); 3392 } 3393 3394 int 3395 peer_matched(struct peer *p, struct ctl_neighbor *n) 3396 { 3397 char *s; 3398 3399 if (n && n->addr.aid) { 3400 if (memcmp(&p->conf.remote_addr, &n->addr, 3401 sizeof(p->conf.remote_addr))) 3402 return 0; 3403 } else if (n && n->descr[0]) { 3404 s = n->is_group ? p->conf.group : p->conf.descr; 3405 if (strcmp(s, n->descr)) 3406 return 0; 3407 } 3408 return 1; 3409 } 3410 3411 void 3412 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id, 3413 uint32_t as) 3414 { 3415 struct bgpd_addr remote_addr; 3416 3417 if (ip) 3418 sa2addr(ip, &remote_addr, NULL); 3419 else 3420 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3421 3422 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3423 3424 p->conf.id = id; 3425 3426 if (as) { 3427 p->conf.remote_as = as; 3428 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as); 3429 if (!p->conf.ebgp) 3430 /* force enforce_as off for iBGP sessions */ 3431 p->conf.enforce_as = ENFORCE_AS_OFF; 3432 } 3433 3434 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3435 switch (p->conf.remote_addr.aid) { 3436 case AID_INET: 3437 p->conf.remote_masklen = 32; 3438 break; 3439 case AID_INET6: 3440 p->conf.remote_masklen = 128; 3441 break; 3442 } 3443 p->conf.template = 0; 3444 } 3445 3446 int 3447 session_match_mask(struct peer *p, struct bgpd_addr *a) 3448 { 3449 struct bgpd_addr masked; 3450 3451 applymask(&masked, a, p->conf.remote_masklen); 3452 if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0) 3453 return (1); 3454 return (0); 3455 } 3456 3457 void 3458 session_down(struct peer *peer) 3459 { 3460 bzero(&peer->capa.neg, sizeof(peer->capa.neg)); 3461 peer->stats.last_updown = getmonotime(); 3462 /* 3463 * session_down is called in the exit code path so check 3464 * if the RDE is still around, if not there is no need to 3465 * send the message. 3466 */ 3467 if (ibuf_rde == NULL) 3468 return; 3469 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1) 3470 fatalx("imsg_compose error"); 3471 } 3472 3473 void 3474 session_up(struct peer *p) 3475 { 3476 struct session_up sup; 3477 3478 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3479 &p->conf, sizeof(p->conf)) == -1) 3480 fatalx("imsg_compose error"); 3481 3482 if (p->local.aid == AID_INET) { 3483 sup.local_v4_addr = p->local; 3484 sup.local_v6_addr = p->local_alt; 3485 } else { 3486 sup.local_v6_addr = p->local; 3487 sup.local_v4_addr = p->local_alt; 3488 } 3489 sup.remote_addr = p->remote; 3490 3491 sup.remote_bgpid = p->remote_bgpid; 3492 sup.short_as = p->short_as; 3493 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3494 p->stats.last_updown = getmonotime(); 3495 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1) 3496 fatalx("imsg_compose error"); 3497 } 3498 3499 int 3500 imsg_ctl_parent(int type, uint32_t peerid, pid_t pid, void *data, 3501 uint16_t datalen) 3502 { 3503 return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen)); 3504 } 3505 3506 int 3507 imsg_ctl_rde(int type, pid_t pid, void *data, uint16_t datalen) 3508 { 3509 if (ibuf_rde_ctl == NULL) 3510 return (0); 3511 3512 /* 3513 * Use control socket to talk to RDE to bypass the queue of the 3514 * regular imsg socket. 3515 */ 3516 return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen)); 3517 } 3518 3519 int 3520 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen) 3521 { 3522 if (ibuf_rde == NULL) 3523 return (0); 3524 3525 return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen)); 3526 } 3527 3528 void 3529 session_demote(struct peer *p, int level) 3530 { 3531 struct demote_msg msg; 3532 3533 strlcpy(msg.demote_group, p->conf.demote_group, 3534 sizeof(msg.demote_group)); 3535 msg.level = level; 3536 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3537 &msg, sizeof(msg)) == -1) 3538 fatalx("imsg_compose error"); 3539 3540 p->demoted += level; 3541 } 3542 3543 void 3544 session_stop(struct peer *peer, uint8_t subcode) 3545 { 3546 char data[REASON_LEN]; 3547 size_t datalen; 3548 size_t reason_len; 3549 char *communication; 3550 3551 datalen = 0; 3552 communication = peer->conf.reason; 3553 3554 if ((subcode == ERR_CEASE_ADMIN_DOWN || 3555 subcode == ERR_CEASE_ADMIN_RESET) 3556 && communication && *communication) { 3557 reason_len = strlen(communication); 3558 if (reason_len > REASON_LEN - 1) { 3559 log_peer_warnx(&peer->conf, 3560 "trying to send overly long shutdown reason"); 3561 } else { 3562 data[0] = reason_len; 3563 datalen = reason_len + sizeof(data[0]); 3564 memcpy(data + 1, communication, reason_len); 3565 } 3566 } 3567 switch (peer->state) { 3568 case STATE_OPENSENT: 3569 case STATE_OPENCONFIRM: 3570 case STATE_ESTABLISHED: 3571 session_notification(peer, ERR_CEASE, subcode, data, datalen); 3572 break; 3573 default: 3574 /* session not open, no need to send notification */ 3575 break; 3576 } 3577 bgp_fsm(peer, EVNT_STOP); 3578 } 3579 3580 void 3581 merge_peers(struct bgpd_config *c, struct bgpd_config *nc) 3582 { 3583 struct peer *p, *np, *next; 3584 3585 RB_FOREACH(p, peer_head, &c->peers) { 3586 /* templates are handled specially */ 3587 if (p->template != NULL) 3588 continue; 3589 np = getpeerbyid(nc, p->conf.id); 3590 if (np == NULL) { 3591 p->reconf_action = RECONF_DELETE; 3592 continue; 3593 } 3594 3595 /* peer no longer uses TCP MD5SIG so deconfigure */ 3596 if (p->conf.auth.method == AUTH_MD5SIG && 3597 np->conf.auth.method != AUTH_MD5SIG) 3598 tcp_md5_del_listener(c, p); 3599 else if (np->conf.auth.method == AUTH_MD5SIG) 3600 tcp_md5_add_listener(c, np); 3601 3602 memcpy(&p->conf, &np->conf, sizeof(p->conf)); 3603 RB_REMOVE(peer_head, &nc->peers, np); 3604 free(np); 3605 3606 p->reconf_action = RECONF_KEEP; 3607 3608 /* had demotion, is demoted, demote removed? */ 3609 if (p->demoted && !p->conf.demote_group[0]) 3610 session_demote(p, -1); 3611 3612 /* if session is not open then refresh pfkey data */ 3613 if (p->state < STATE_OPENSENT && !p->template) 3614 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 3615 p->conf.id, 0, -1, NULL, 0); 3616 3617 /* sync the RDE in case we keep the peer */ 3618 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3619 &p->conf, sizeof(struct peer_config)) == -1) 3620 fatalx("imsg_compose error"); 3621 3622 /* apply the config to all clones of a template */ 3623 if (p->conf.template) { 3624 struct peer *xp; 3625 RB_FOREACH(xp, peer_head, &c->peers) { 3626 if (xp->template != p) 3627 continue; 3628 session_template_clone(xp, NULL, xp->conf.id, 3629 xp->conf.remote_as); 3630 if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id, 3631 &xp->conf, sizeof(xp->conf)) == -1) 3632 fatalx("imsg_compose error"); 3633 } 3634 } 3635 } 3636 3637 if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1) 3638 fatalx("imsg_compose error"); 3639 3640 /* pfkeys of new peers already loaded by the parent process */ 3641 RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) { 3642 RB_REMOVE(peer_head, &nc->peers, np); 3643 if (RB_INSERT(peer_head, &c->peers, np) != NULL) 3644 fatalx("%s: peer tree is corrupt", __func__); 3645 if (np->conf.auth.method == AUTH_MD5SIG) 3646 tcp_md5_add_listener(c, np); 3647 } 3648 } 3649