1 /* $OpenBSD: session.c,v 1.455 2023/11/07 11:18:35 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 22 #include <sys/mman.h> 23 #include <sys/socket.h> 24 #include <sys/time.h> 25 #include <sys/resource.h> 26 #include <sys/un.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <ifaddrs.h> 37 #include <poll.h> 38 #include <pwd.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <syslog.h> 44 #include <unistd.h> 45 46 #include "bgpd.h" 47 #include "session.h" 48 #include "log.h" 49 50 #define PFD_PIPE_MAIN 0 51 #define PFD_PIPE_ROUTE 1 52 #define PFD_PIPE_ROUTE_CTL 2 53 #define PFD_SOCK_CTL 3 54 #define PFD_SOCK_RCTL 4 55 #define PFD_LISTENERS_START 5 56 57 void session_sighdlr(int); 58 int setup_listeners(u_int *); 59 void init_peer(struct peer *); 60 void start_timer_holdtime(struct peer *); 61 void start_timer_keepalive(struct peer *); 62 void session_close_connection(struct peer *); 63 void change_state(struct peer *, enum session_state, enum session_events); 64 int session_setup_socket(struct peer *); 65 void session_accept(int); 66 int session_connect(struct peer *); 67 void session_tcp_established(struct peer *); 68 void session_capa_ann_none(struct peer *); 69 int session_capa_add(struct ibuf *, uint8_t, uint8_t); 70 int session_capa_add_mp(struct ibuf *, uint8_t); 71 int session_capa_add_afi(struct peer *, struct ibuf *, uint8_t, uint8_t); 72 struct bgp_msg *session_newmsg(enum msg_type, uint16_t); 73 int session_sendmsg(struct bgp_msg *, struct peer *); 74 void session_open(struct peer *); 75 void session_keepalive(struct peer *); 76 void session_update(uint32_t, void *, size_t); 77 void session_notification(struct peer *, uint8_t, uint8_t, void *, 78 ssize_t); 79 void session_rrefresh(struct peer *, uint8_t, uint8_t); 80 int session_graceful_restart(struct peer *); 81 int session_graceful_stop(struct peer *); 82 int session_dispatch_msg(struct pollfd *, struct peer *); 83 void session_process_msg(struct peer *); 84 int parse_header(struct peer *, u_char *, uint16_t *, uint8_t *); 85 int parse_open(struct peer *); 86 int parse_update(struct peer *); 87 int parse_rrefresh(struct peer *); 88 int parse_notification(struct peer *); 89 int parse_capabilities(struct peer *, u_char *, uint16_t, uint32_t *); 90 int capa_neg_calc(struct peer *, uint8_t *); 91 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 92 void session_up(struct peer *); 93 void session_down(struct peer *); 94 int imsg_rde(int, uint32_t, void *, uint16_t); 95 void session_demote(struct peer *, int); 96 void merge_peers(struct bgpd_config *, struct bgpd_config *); 97 98 int la_cmp(struct listen_addr *, struct listen_addr *); 99 void session_template_clone(struct peer *, struct sockaddr *, 100 uint32_t, uint32_t); 101 int session_match_mask(struct peer *, struct bgpd_addr *); 102 103 static struct bgpd_config *conf, *nconf; 104 static struct imsgbuf *ibuf_rde; 105 static struct imsgbuf *ibuf_rde_ctl; 106 static struct imsgbuf *ibuf_main; 107 108 struct bgpd_sysdep sysdep; 109 volatile sig_atomic_t session_quit; 110 int pending_reconf; 111 int csock = -1, rcsock = -1; 112 u_int peer_cnt; 113 114 struct mrt_head mrthead; 115 time_t pauseaccept; 116 117 static inline int 118 peer_compare(const struct peer *a, const struct peer *b) 119 { 120 return a->conf.id - b->conf.id; 121 } 122 123 RB_GENERATE(peer_head, peer, entry, peer_compare); 124 125 void 126 session_sighdlr(int sig) 127 { 128 switch (sig) { 129 case SIGINT: 130 case SIGTERM: 131 session_quit = 1; 132 break; 133 } 134 } 135 136 int 137 setup_listeners(u_int *la_cnt) 138 { 139 int ttl = 255; 140 struct listen_addr *la; 141 u_int cnt = 0; 142 143 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 144 la->reconf = RECONF_NONE; 145 cnt++; 146 147 if (la->flags & LISTENER_LISTENING) 148 continue; 149 150 if (la->fd == -1) { 151 log_warn("cannot establish listener on %s: invalid fd", 152 log_sockaddr((struct sockaddr *)&la->sa, 153 la->sa_len)); 154 continue; 155 } 156 157 if (tcp_md5_prep_listener(la, &conf->peers) == -1) 158 fatal("tcp_md5_prep_listener"); 159 160 /* set ttl to 255 so that ttl-security works */ 161 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 162 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 163 log_warn("setup_listeners setsockopt TTL"); 164 continue; 165 } 166 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 167 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 168 log_warn("setup_listeners setsockopt hoplimit"); 169 continue; 170 } 171 172 if (listen(la->fd, MAX_BACKLOG)) { 173 close(la->fd); 174 fatal("listen"); 175 } 176 177 la->flags |= LISTENER_LISTENING; 178 179 log_info("listening on %s", 180 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len)); 181 } 182 183 *la_cnt = cnt; 184 185 return (0); 186 } 187 188 void 189 session_main(int debug, int verbose) 190 { 191 int timeout; 192 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 193 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 194 u_int listener_cnt, ctl_cnt, mrt_cnt; 195 u_int new_cnt; 196 struct passwd *pw; 197 struct peer *p, **peer_l = NULL, *next; 198 struct mrt *m, *xm, **mrt_l = NULL; 199 struct pollfd *pfd = NULL; 200 struct listen_addr *la; 201 void *newp; 202 time_t now; 203 short events; 204 205 log_init(debug, LOG_DAEMON); 206 log_setverbose(verbose); 207 208 log_procinit(log_procnames[PROC_SE]); 209 210 if ((pw = getpwnam(BGPD_USER)) == NULL) 211 fatal(NULL); 212 213 if (chroot(pw->pw_dir) == -1) 214 fatal("chroot"); 215 if (chdir("/") == -1) 216 fatal("chdir(\"/\")"); 217 218 setproctitle("session engine"); 219 220 if (setgroups(1, &pw->pw_gid) || 221 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 222 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 223 fatal("can't drop privileges"); 224 225 if (pledge("stdio inet recvfd", NULL) == -1) 226 fatal("pledge"); 227 228 signal(SIGTERM, session_sighdlr); 229 signal(SIGINT, session_sighdlr); 230 signal(SIGPIPE, SIG_IGN); 231 signal(SIGHUP, SIG_IGN); 232 signal(SIGALRM, SIG_IGN); 233 signal(SIGUSR1, SIG_IGN); 234 235 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 236 fatal(NULL); 237 imsg_init(ibuf_main, 3); 238 239 LIST_INIT(&mrthead); 240 listener_cnt = 0; 241 peer_cnt = 0; 242 ctl_cnt = 0; 243 244 conf = new_config(); 245 log_info("session engine ready"); 246 247 while (session_quit == 0) { 248 /* check for peers to be initialized or deleted */ 249 if (!pending_reconf) { 250 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 251 /* cloned peer that idled out? */ 252 if (p->template && (p->state == STATE_IDLE || 253 p->state == STATE_ACTIVE) && 254 getmonotime() - p->stats.last_updown >= 255 INTERVAL_HOLD_CLONED) 256 p->reconf_action = RECONF_DELETE; 257 258 /* new peer that needs init? */ 259 if (p->state == STATE_NONE) 260 init_peer(p); 261 262 /* deletion due? */ 263 if (p->reconf_action == RECONF_DELETE) { 264 if (p->demoted) 265 session_demote(p, -1); 266 p->conf.demote_group[0] = 0; 267 session_stop(p, ERR_CEASE_PEER_UNCONF); 268 timer_remove_all(&p->timers); 269 tcp_md5_del_listener(conf, p); 270 log_peer_warnx(&p->conf, "removed"); 271 RB_REMOVE(peer_head, &conf->peers, p); 272 free(p); 273 peer_cnt--; 274 continue; 275 } 276 p->reconf_action = RECONF_NONE; 277 } 278 } 279 280 if (peer_cnt > peer_l_elms) { 281 if ((newp = reallocarray(peer_l, peer_cnt, 282 sizeof(struct peer *))) == NULL) { 283 /* panic for now */ 284 log_warn("could not resize peer_l from %u -> %u" 285 " entries", peer_l_elms, peer_cnt); 286 fatalx("exiting"); 287 } 288 peer_l = newp; 289 peer_l_elms = peer_cnt; 290 } 291 292 mrt_cnt = 0; 293 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 294 xm = LIST_NEXT(m, entry); 295 if (m->state == MRT_STATE_REMOVE) { 296 mrt_clean(m); 297 LIST_REMOVE(m, entry); 298 free(m); 299 continue; 300 } 301 if (m->wbuf.queued) 302 mrt_cnt++; 303 } 304 305 if (mrt_cnt > mrt_l_elms) { 306 if ((newp = reallocarray(mrt_l, mrt_cnt, 307 sizeof(struct mrt *))) == NULL) { 308 /* panic for now */ 309 log_warn("could not resize mrt_l from %u -> %u" 310 " entries", mrt_l_elms, mrt_cnt); 311 fatalx("exiting"); 312 } 313 mrt_l = newp; 314 mrt_l_elms = mrt_cnt; 315 } 316 317 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 318 ctl_cnt + mrt_cnt; 319 if (new_cnt > pfd_elms) { 320 if ((newp = reallocarray(pfd, new_cnt, 321 sizeof(struct pollfd))) == NULL) { 322 /* panic for now */ 323 log_warn("could not resize pfd from %u -> %u" 324 " entries", pfd_elms, new_cnt); 325 fatalx("exiting"); 326 } 327 pfd = newp; 328 pfd_elms = new_cnt; 329 } 330 331 memset(pfd, 0, sizeof(struct pollfd) * pfd_elms); 332 333 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 334 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 335 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 336 337 if (pauseaccept == 0) { 338 pfd[PFD_SOCK_CTL].fd = csock; 339 pfd[PFD_SOCK_CTL].events = POLLIN; 340 pfd[PFD_SOCK_RCTL].fd = rcsock; 341 pfd[PFD_SOCK_RCTL].events = POLLIN; 342 } else { 343 pfd[PFD_SOCK_CTL].fd = -1; 344 pfd[PFD_SOCK_RCTL].fd = -1; 345 } 346 347 i = PFD_LISTENERS_START; 348 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 349 if (pauseaccept == 0) { 350 pfd[i].fd = la->fd; 351 pfd[i].events = POLLIN; 352 } else 353 pfd[i].fd = -1; 354 i++; 355 } 356 idx_listeners = i; 357 timeout = 240; /* loop every 240s at least */ 358 359 now = getmonotime(); 360 RB_FOREACH(p, peer_head, &conf->peers) { 361 time_t nextaction; 362 struct timer *pt; 363 364 /* check timers */ 365 if ((pt = timer_nextisdue(&p->timers, now)) != NULL) { 366 switch (pt->type) { 367 case Timer_Hold: 368 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 369 break; 370 case Timer_SendHold: 371 bgp_fsm(p, EVNT_TIMER_SENDHOLD); 372 break; 373 case Timer_ConnectRetry: 374 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 375 break; 376 case Timer_Keepalive: 377 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 378 break; 379 case Timer_IdleHold: 380 bgp_fsm(p, EVNT_START); 381 break; 382 case Timer_IdleHoldReset: 383 p->IdleHoldTime = 384 INTERVAL_IDLE_HOLD_INITIAL; 385 p->errcnt = 0; 386 timer_stop(&p->timers, 387 Timer_IdleHoldReset); 388 break; 389 case Timer_CarpUndemote: 390 timer_stop(&p->timers, 391 Timer_CarpUndemote); 392 if (p->demoted && 393 p->state == STATE_ESTABLISHED) 394 session_demote(p, -1); 395 break; 396 case Timer_RestartTimeout: 397 timer_stop(&p->timers, 398 Timer_RestartTimeout); 399 session_graceful_stop(p); 400 break; 401 default: 402 fatalx("King Bula lost in time"); 403 } 404 } 405 if ((nextaction = timer_nextduein(&p->timers, 406 now)) != -1 && nextaction < timeout) 407 timeout = nextaction; 408 409 /* are we waiting for a write? */ 410 events = POLLIN; 411 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 412 events |= POLLOUT; 413 /* is there still work to do? */ 414 if (p->rpending && p->rbuf && p->rbuf->wpos) 415 timeout = 0; 416 417 /* poll events */ 418 if (p->fd != -1 && events != 0) { 419 pfd[i].fd = p->fd; 420 pfd[i].events = events; 421 peer_l[i - idx_listeners] = p; 422 i++; 423 } 424 } 425 426 idx_peers = i; 427 428 LIST_FOREACH(m, &mrthead, entry) 429 if (m->wbuf.queued) { 430 pfd[i].fd = m->wbuf.fd; 431 pfd[i].events = POLLOUT; 432 mrt_l[i - idx_peers] = m; 433 i++; 434 } 435 436 idx_mrts = i; 437 438 i += control_fill_pfds(pfd + i, pfd_elms -i); 439 440 if (i > pfd_elms) 441 fatalx("poll pfd overflow"); 442 443 if (pauseaccept && timeout > 1) 444 timeout = 1; 445 if (timeout < 0) 446 timeout = 0; 447 if (poll(pfd, i, timeout * 1000) == -1) { 448 if (errno == EINTR) 449 continue; 450 fatal("poll error"); 451 } 452 453 /* 454 * If we previously saw fd exhaustion, we stop accept() 455 * for 1 second to throttle the accept() loop. 456 */ 457 if (pauseaccept && getmonotime() > pauseaccept + 1) 458 pauseaccept = 0; 459 460 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 461 log_warnx("SE: Lost connection to parent"); 462 session_quit = 1; 463 continue; 464 } else 465 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 466 &listener_cnt); 467 468 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 469 log_warnx("SE: Lost connection to RDE"); 470 msgbuf_clear(&ibuf_rde->w); 471 free(ibuf_rde); 472 ibuf_rde = NULL; 473 } else 474 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 475 &listener_cnt); 476 477 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 478 -1) { 479 log_warnx("SE: Lost connection to RDE control"); 480 msgbuf_clear(&ibuf_rde_ctl->w); 481 free(ibuf_rde_ctl); 482 ibuf_rde_ctl = NULL; 483 } else 484 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 485 &listener_cnt); 486 487 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 488 ctl_cnt += control_accept(csock, 0); 489 490 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 491 ctl_cnt += control_accept(rcsock, 1); 492 493 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 494 if (pfd[j].revents & POLLIN) 495 session_accept(pfd[j].fd); 496 497 for (; j < idx_peers; j++) 498 session_dispatch_msg(&pfd[j], 499 peer_l[j - idx_listeners]); 500 501 RB_FOREACH(p, peer_head, &conf->peers) 502 if (p->rbuf && p->rbuf->wpos) 503 session_process_msg(p); 504 505 for (; j < idx_mrts; j++) 506 if (pfd[j].revents & POLLOUT) 507 mrt_write(mrt_l[j - idx_peers]); 508 509 for (; j < i; j++) 510 ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers); 511 } 512 513 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 514 RB_REMOVE(peer_head, &conf->peers, p); 515 strlcpy(p->conf.reason, 516 "bgpd shutting down", 517 sizeof(p->conf.reason)); 518 session_stop(p, ERR_CEASE_ADMIN_DOWN); 519 timer_remove_all(&p->timers); 520 free(p); 521 } 522 523 while ((m = LIST_FIRST(&mrthead)) != NULL) { 524 mrt_clean(m); 525 LIST_REMOVE(m, entry); 526 free(m); 527 } 528 529 free_config(conf); 530 free(peer_l); 531 free(mrt_l); 532 free(pfd); 533 534 /* close pipes */ 535 if (ibuf_rde) { 536 msgbuf_write(&ibuf_rde->w); 537 msgbuf_clear(&ibuf_rde->w); 538 close(ibuf_rde->fd); 539 free(ibuf_rde); 540 } 541 if (ibuf_rde_ctl) { 542 msgbuf_clear(&ibuf_rde_ctl->w); 543 close(ibuf_rde_ctl->fd); 544 free(ibuf_rde_ctl); 545 } 546 msgbuf_write(&ibuf_main->w); 547 msgbuf_clear(&ibuf_main->w); 548 close(ibuf_main->fd); 549 free(ibuf_main); 550 551 control_shutdown(csock); 552 control_shutdown(rcsock); 553 log_info("session engine exiting"); 554 exit(0); 555 } 556 557 void 558 init_peer(struct peer *p) 559 { 560 TAILQ_INIT(&p->timers); 561 p->fd = p->wbuf.fd = -1; 562 563 if (p->conf.if_depend[0]) 564 imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1, 565 p->conf.if_depend, sizeof(p->conf.if_depend)); 566 else 567 p->depend_ok = 1; 568 569 peer_cnt++; 570 571 change_state(p, STATE_IDLE, EVNT_NONE); 572 if (p->conf.down) 573 timer_stop(&p->timers, Timer_IdleHold); /* no autostart */ 574 else 575 timer_set(&p->timers, Timer_IdleHold, SESSION_CLEAR_DELAY); 576 577 p->stats.last_updown = getmonotime(); 578 579 /* 580 * on startup, demote if requested. 581 * do not handle new peers. they must reach ESTABLISHED beforehand. 582 * peers added at runtime have reconf_action set to RECONF_REINIT. 583 */ 584 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 585 session_demote(p, +1); 586 } 587 588 void 589 bgp_fsm(struct peer *peer, enum session_events event) 590 { 591 switch (peer->state) { 592 case STATE_NONE: 593 /* nothing */ 594 break; 595 case STATE_IDLE: 596 switch (event) { 597 case EVNT_START: 598 timer_stop(&peer->timers, Timer_Hold); 599 timer_stop(&peer->timers, Timer_SendHold); 600 timer_stop(&peer->timers, Timer_Keepalive); 601 timer_stop(&peer->timers, Timer_IdleHold); 602 603 /* allocate read buffer */ 604 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 605 if (peer->rbuf == NULL) 606 fatal(NULL); 607 608 /* init write buffer */ 609 msgbuf_init(&peer->wbuf); 610 611 peer->stats.last_sent_errcode = 0; 612 peer->stats.last_sent_suberr = 0; 613 peer->stats.last_rcvd_errcode = 0; 614 peer->stats.last_rcvd_suberr = 0; 615 616 if (!peer->depend_ok) 617 timer_stop(&peer->timers, Timer_ConnectRetry); 618 else if (peer->passive || peer->conf.passive || 619 peer->conf.template) { 620 change_state(peer, STATE_ACTIVE, event); 621 timer_stop(&peer->timers, Timer_ConnectRetry); 622 } else { 623 change_state(peer, STATE_CONNECT, event); 624 timer_set(&peer->timers, Timer_ConnectRetry, 625 conf->connectretry); 626 session_connect(peer); 627 } 628 peer->passive = 0; 629 break; 630 default: 631 /* ignore */ 632 break; 633 } 634 break; 635 case STATE_CONNECT: 636 switch (event) { 637 case EVNT_START: 638 /* ignore */ 639 break; 640 case EVNT_CON_OPEN: 641 session_tcp_established(peer); 642 session_open(peer); 643 timer_stop(&peer->timers, Timer_ConnectRetry); 644 peer->holdtime = INTERVAL_HOLD_INITIAL; 645 start_timer_holdtime(peer); 646 change_state(peer, STATE_OPENSENT, event); 647 break; 648 case EVNT_CON_OPENFAIL: 649 timer_set(&peer->timers, Timer_ConnectRetry, 650 conf->connectretry); 651 session_close_connection(peer); 652 change_state(peer, STATE_ACTIVE, event); 653 break; 654 case EVNT_TIMER_CONNRETRY: 655 timer_set(&peer->timers, Timer_ConnectRetry, 656 conf->connectretry); 657 session_connect(peer); 658 break; 659 default: 660 change_state(peer, STATE_IDLE, event); 661 break; 662 } 663 break; 664 case STATE_ACTIVE: 665 switch (event) { 666 case EVNT_START: 667 /* ignore */ 668 break; 669 case EVNT_CON_OPEN: 670 session_tcp_established(peer); 671 session_open(peer); 672 timer_stop(&peer->timers, Timer_ConnectRetry); 673 peer->holdtime = INTERVAL_HOLD_INITIAL; 674 start_timer_holdtime(peer); 675 change_state(peer, STATE_OPENSENT, event); 676 break; 677 case EVNT_CON_OPENFAIL: 678 timer_set(&peer->timers, Timer_ConnectRetry, 679 conf->connectretry); 680 session_close_connection(peer); 681 change_state(peer, STATE_ACTIVE, event); 682 break; 683 case EVNT_TIMER_CONNRETRY: 684 timer_set(&peer->timers, Timer_ConnectRetry, 685 peer->holdtime); 686 change_state(peer, STATE_CONNECT, event); 687 session_connect(peer); 688 break; 689 default: 690 change_state(peer, STATE_IDLE, event); 691 break; 692 } 693 break; 694 case STATE_OPENSENT: 695 switch (event) { 696 case EVNT_START: 697 /* ignore */ 698 break; 699 case EVNT_STOP: 700 change_state(peer, STATE_IDLE, event); 701 break; 702 case EVNT_CON_CLOSED: 703 session_close_connection(peer); 704 timer_set(&peer->timers, Timer_ConnectRetry, 705 conf->connectretry); 706 change_state(peer, STATE_ACTIVE, event); 707 break; 708 case EVNT_CON_FATAL: 709 change_state(peer, STATE_IDLE, event); 710 break; 711 case EVNT_TIMER_HOLDTIME: 712 case EVNT_TIMER_SENDHOLD: 713 session_notification(peer, ERR_HOLDTIMEREXPIRED, 714 0, NULL, 0); 715 change_state(peer, STATE_IDLE, event); 716 break; 717 case EVNT_RCVD_OPEN: 718 /* parse_open calls change_state itself on failure */ 719 if (parse_open(peer)) 720 break; 721 session_keepalive(peer); 722 change_state(peer, STATE_OPENCONFIRM, event); 723 break; 724 case EVNT_RCVD_NOTIFICATION: 725 if (parse_notification(peer)) { 726 change_state(peer, STATE_IDLE, event); 727 /* don't punish, capa negotiation */ 728 timer_set(&peer->timers, Timer_IdleHold, 0); 729 peer->IdleHoldTime /= 2; 730 } else 731 change_state(peer, STATE_IDLE, event); 732 break; 733 default: 734 session_notification(peer, 735 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); 736 change_state(peer, STATE_IDLE, event); 737 break; 738 } 739 break; 740 case STATE_OPENCONFIRM: 741 switch (event) { 742 case EVNT_START: 743 /* ignore */ 744 break; 745 case EVNT_STOP: 746 change_state(peer, STATE_IDLE, event); 747 break; 748 case EVNT_CON_CLOSED: 749 case EVNT_CON_FATAL: 750 change_state(peer, STATE_IDLE, event); 751 break; 752 case EVNT_TIMER_HOLDTIME: 753 case EVNT_TIMER_SENDHOLD: 754 session_notification(peer, ERR_HOLDTIMEREXPIRED, 755 0, NULL, 0); 756 change_state(peer, STATE_IDLE, event); 757 break; 758 case EVNT_TIMER_KEEPALIVE: 759 session_keepalive(peer); 760 break; 761 case EVNT_RCVD_KEEPALIVE: 762 start_timer_holdtime(peer); 763 change_state(peer, STATE_ESTABLISHED, event); 764 break; 765 case EVNT_RCVD_NOTIFICATION: 766 parse_notification(peer); 767 change_state(peer, STATE_IDLE, event); 768 break; 769 default: 770 session_notification(peer, 771 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); 772 change_state(peer, STATE_IDLE, event); 773 break; 774 } 775 break; 776 case STATE_ESTABLISHED: 777 switch (event) { 778 case EVNT_START: 779 /* ignore */ 780 break; 781 case EVNT_STOP: 782 change_state(peer, STATE_IDLE, event); 783 break; 784 case EVNT_CON_CLOSED: 785 case EVNT_CON_FATAL: 786 change_state(peer, STATE_IDLE, event); 787 break; 788 case EVNT_TIMER_HOLDTIME: 789 case EVNT_TIMER_SENDHOLD: 790 session_notification(peer, ERR_HOLDTIMEREXPIRED, 791 0, NULL, 0); 792 change_state(peer, STATE_IDLE, event); 793 break; 794 case EVNT_TIMER_KEEPALIVE: 795 session_keepalive(peer); 796 break; 797 case EVNT_RCVD_KEEPALIVE: 798 start_timer_holdtime(peer); 799 break; 800 case EVNT_RCVD_UPDATE: 801 start_timer_holdtime(peer); 802 if (parse_update(peer)) 803 change_state(peer, STATE_IDLE, event); 804 else 805 start_timer_holdtime(peer); 806 break; 807 case EVNT_RCVD_NOTIFICATION: 808 parse_notification(peer); 809 change_state(peer, STATE_IDLE, event); 810 break; 811 default: 812 session_notification(peer, 813 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); 814 change_state(peer, STATE_IDLE, event); 815 break; 816 } 817 break; 818 } 819 } 820 821 void 822 start_timer_holdtime(struct peer *peer) 823 { 824 if (peer->holdtime > 0) 825 timer_set(&peer->timers, Timer_Hold, peer->holdtime); 826 else 827 timer_stop(&peer->timers, Timer_Hold); 828 } 829 830 void 831 start_timer_keepalive(struct peer *peer) 832 { 833 if (peer->holdtime > 0) 834 timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3); 835 else 836 timer_stop(&peer->timers, Timer_Keepalive); 837 } 838 839 void 840 session_close_connection(struct peer *peer) 841 { 842 if (peer->fd != -1) { 843 close(peer->fd); 844 pauseaccept = 0; 845 } 846 peer->fd = peer->wbuf.fd = -1; 847 } 848 849 void 850 change_state(struct peer *peer, enum session_state state, 851 enum session_events event) 852 { 853 struct mrt *mrt; 854 855 switch (state) { 856 case STATE_IDLE: 857 /* carp demotion first. new peers handled in init_peer */ 858 if (peer->state == STATE_ESTABLISHED && 859 peer->conf.demote_group[0] && !peer->demoted) 860 session_demote(peer, +1); 861 862 /* 863 * try to write out what's buffered (maybe a notification), 864 * don't bother if it fails 865 */ 866 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 867 msgbuf_write(&peer->wbuf); 868 869 /* 870 * we must start the timer for the next EVNT_START 871 * if we are coming here due to an error and the 872 * session was not established successfully before, the 873 * starttimerinterval needs to be exponentially increased 874 */ 875 if (peer->IdleHoldTime == 0) 876 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 877 peer->holdtime = INTERVAL_HOLD_INITIAL; 878 timer_stop(&peer->timers, Timer_ConnectRetry); 879 timer_stop(&peer->timers, Timer_Keepalive); 880 timer_stop(&peer->timers, Timer_Hold); 881 timer_stop(&peer->timers, Timer_SendHold); 882 timer_stop(&peer->timers, Timer_IdleHold); 883 timer_stop(&peer->timers, Timer_IdleHoldReset); 884 session_close_connection(peer); 885 msgbuf_clear(&peer->wbuf); 886 free(peer->rbuf); 887 peer->rbuf = NULL; 888 peer->rpending = 0; 889 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 890 if (!peer->template) 891 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 892 peer->conf.id, 0, -1, NULL, 0); 893 894 if (event != EVNT_STOP) { 895 timer_set(&peer->timers, Timer_IdleHold, 896 peer->IdleHoldTime); 897 if (event != EVNT_NONE && 898 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 899 peer->IdleHoldTime *= 2; 900 } 901 if (peer->state == STATE_ESTABLISHED) { 902 if (peer->capa.neg.grestart.restart == 2 && 903 (event == EVNT_CON_CLOSED || 904 event == EVNT_CON_FATAL)) { 905 /* don't punish graceful restart */ 906 timer_set(&peer->timers, Timer_IdleHold, 0); 907 peer->IdleHoldTime /= 2; 908 session_graceful_restart(peer); 909 } else 910 session_down(peer); 911 } 912 if (peer->state == STATE_NONE || 913 peer->state == STATE_ESTABLISHED) { 914 /* initialize capability negotiation structures */ 915 memcpy(&peer->capa.ann, &peer->conf.capabilities, 916 sizeof(peer->capa.ann)); 917 if (!peer->conf.announce_capa) 918 session_capa_ann_none(peer); 919 } 920 break; 921 case STATE_CONNECT: 922 if (peer->state == STATE_ESTABLISHED && 923 peer->capa.neg.grestart.restart == 2) { 924 /* do the graceful restart dance */ 925 session_graceful_restart(peer); 926 peer->holdtime = INTERVAL_HOLD_INITIAL; 927 timer_stop(&peer->timers, Timer_ConnectRetry); 928 timer_stop(&peer->timers, Timer_Keepalive); 929 timer_stop(&peer->timers, Timer_Hold); 930 timer_stop(&peer->timers, Timer_SendHold); 931 timer_stop(&peer->timers, Timer_IdleHold); 932 timer_stop(&peer->timers, Timer_IdleHoldReset); 933 session_close_connection(peer); 934 msgbuf_clear(&peer->wbuf); 935 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 936 } 937 break; 938 case STATE_ACTIVE: 939 if (!peer->template) 940 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 941 peer->conf.id, 0, -1, NULL, 0); 942 break; 943 case STATE_OPENSENT: 944 break; 945 case STATE_OPENCONFIRM: 946 break; 947 case STATE_ESTABLISHED: 948 timer_set(&peer->timers, Timer_IdleHoldReset, 949 peer->IdleHoldTime); 950 if (peer->demoted) 951 timer_set(&peer->timers, Timer_CarpUndemote, 952 INTERVAL_HOLD_DEMOTED); 953 session_up(peer); 954 break; 955 default: /* something seriously fucked */ 956 break; 957 } 958 959 log_statechange(peer, state, event); 960 LIST_FOREACH(mrt, &mrthead, entry) { 961 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 962 continue; 963 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 964 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 965 mrt->group_id == peer->conf.groupid)) 966 mrt_dump_state(mrt, peer->state, state, peer); 967 } 968 peer->prev_state = peer->state; 969 peer->state = state; 970 } 971 972 void 973 session_accept(int listenfd) 974 { 975 int connfd; 976 socklen_t len; 977 struct sockaddr_storage cliaddr; 978 struct peer *p = NULL; 979 980 len = sizeof(cliaddr); 981 if ((connfd = accept4(listenfd, 982 (struct sockaddr *)&cliaddr, &len, 983 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 984 if (errno == ENFILE || errno == EMFILE) 985 pauseaccept = getmonotime(); 986 else if (errno != EWOULDBLOCK && errno != EINTR && 987 errno != ECONNABORTED) 988 log_warn("accept"); 989 return; 990 } 991 992 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr); 993 994 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 995 if (timer_running(&p->timers, Timer_IdleHold, NULL)) { 996 /* fast reconnect after clear */ 997 p->passive = 1; 998 bgp_fsm(p, EVNT_START); 999 } 1000 } 1001 1002 if (p != NULL && 1003 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1004 if (p->fd != -1) { 1005 if (p->state == STATE_CONNECT) 1006 session_close_connection(p); 1007 else { 1008 close(connfd); 1009 return; 1010 } 1011 } 1012 1013 open: 1014 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1015 log_peer_warnx(&p->conf, 1016 "ipsec or md5sig configured but not available"); 1017 close(connfd); 1018 return; 1019 } 1020 1021 if (tcp_md5_check(connfd, p) == -1) { 1022 close(connfd); 1023 return; 1024 } 1025 p->fd = p->wbuf.fd = connfd; 1026 if (session_setup_socket(p)) { 1027 close(connfd); 1028 return; 1029 } 1030 bgp_fsm(p, EVNT_CON_OPEN); 1031 return; 1032 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1033 p->capa.neg.grestart.restart == 2) { 1034 /* first do the graceful restart dance */ 1035 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1036 /* then do part of the open dance */ 1037 goto open; 1038 } else { 1039 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len); 1040 close(connfd); 1041 } 1042 } 1043 1044 int 1045 session_connect(struct peer *peer) 1046 { 1047 struct sockaddr *sa; 1048 struct bgpd_addr *bind_addr = NULL; 1049 socklen_t sa_len; 1050 1051 /* 1052 * we do not need the overcomplicated collision detection RFC 1771 1053 * describes; we simply make sure there is only ever one concurrent 1054 * tcp connection per peer. 1055 */ 1056 if (peer->fd != -1) 1057 return (-1); 1058 1059 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1060 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1061 log_peer_warn(&peer->conf, "session_connect socket"); 1062 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1063 return (-1); 1064 } 1065 1066 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1067 log_peer_warnx(&peer->conf, 1068 "ipsec or md5sig configured but not available"); 1069 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1070 return (-1); 1071 } 1072 1073 tcp_md5_set(peer->fd, peer); 1074 peer->wbuf.fd = peer->fd; 1075 1076 /* if local-address is set we need to bind() */ 1077 switch (peer->conf.remote_addr.aid) { 1078 case AID_INET: 1079 bind_addr = &peer->conf.local_addr_v4; 1080 break; 1081 case AID_INET6: 1082 bind_addr = &peer->conf.local_addr_v6; 1083 break; 1084 } 1085 if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) { 1086 if (bind(peer->fd, sa, sa_len) == -1) { 1087 log_peer_warn(&peer->conf, "session_connect bind"); 1088 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1089 return (-1); 1090 } 1091 } 1092 1093 if (session_setup_socket(peer)) { 1094 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1095 return (-1); 1096 } 1097 1098 sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len); 1099 if (connect(peer->fd, sa, sa_len) == -1) { 1100 if (errno != EINPROGRESS) { 1101 if (errno != peer->lasterr) 1102 log_peer_warn(&peer->conf, "connect"); 1103 peer->lasterr = errno; 1104 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1105 return (-1); 1106 } 1107 } else 1108 bgp_fsm(peer, EVNT_CON_OPEN); 1109 1110 return (0); 1111 } 1112 1113 int 1114 session_setup_socket(struct peer *p) 1115 { 1116 int ttl = p->conf.distance; 1117 int pre = IPTOS_PREC_INTERNETCONTROL; 1118 int nodelay = 1; 1119 int bsize; 1120 1121 switch (p->conf.remote_addr.aid) { 1122 case AID_INET: 1123 /* set precedence, see RFC 1771 appendix 5 */ 1124 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1125 -1) { 1126 log_peer_warn(&p->conf, 1127 "session_setup_socket setsockopt TOS"); 1128 return (-1); 1129 } 1130 1131 if (p->conf.ebgp) { 1132 /* 1133 * set TTL to foreign router's distance 1134 * 1=direct n=multihop with ttlsec, we always use 255 1135 */ 1136 if (p->conf.ttlsec) { 1137 ttl = 256 - p->conf.distance; 1138 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1139 &ttl, sizeof(ttl)) == -1) { 1140 log_peer_warn(&p->conf, 1141 "session_setup_socket: " 1142 "setsockopt MINTTL"); 1143 return (-1); 1144 } 1145 ttl = 255; 1146 } 1147 1148 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1149 sizeof(ttl)) == -1) { 1150 log_peer_warn(&p->conf, 1151 "session_setup_socket setsockopt TTL"); 1152 return (-1); 1153 } 1154 } 1155 break; 1156 case AID_INET6: 1157 if (p->conf.ebgp) { 1158 /* 1159 * set hoplimit to foreign router's distance 1160 * 1=direct n=multihop with ttlsec, we always use 255 1161 */ 1162 if (p->conf.ttlsec) { 1163 ttl = 256 - p->conf.distance; 1164 if (setsockopt(p->fd, IPPROTO_IPV6, 1165 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1166 == -1) { 1167 log_peer_warn(&p->conf, 1168 "session_setup_socket: " 1169 "setsockopt MINHOPCOUNT"); 1170 return (-1); 1171 } 1172 ttl = 255; 1173 } 1174 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1175 &ttl, sizeof(ttl)) == -1) { 1176 log_peer_warn(&p->conf, 1177 "session_setup_socket setsockopt hoplimit"); 1178 return (-1); 1179 } 1180 } 1181 break; 1182 } 1183 1184 /* set TCP_NODELAY */ 1185 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1186 sizeof(nodelay)) == -1) { 1187 log_peer_warn(&p->conf, 1188 "session_setup_socket setsockopt TCP_NODELAY"); 1189 return (-1); 1190 } 1191 1192 /* limit bufsize. no biggie if it fails */ 1193 bsize = 65535; 1194 while (bsize > 8192 && setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, 1195 &bsize, sizeof(bsize)) == -1 && errno != EINVAL) 1196 bsize /= 2; 1197 bsize = 65535; 1198 while (bsize > 8192 && setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, 1199 &bsize, sizeof(bsize)) == -1 && errno != EINVAL) 1200 bsize /= 2; 1201 1202 return (0); 1203 } 1204 1205 /* 1206 * compare the bgpd_addr with the sockaddr by converting the latter into 1207 * a bgpd_addr. Return true if the two are equal, including any scope 1208 */ 1209 static int 1210 sa_equal(struct bgpd_addr *ba, struct sockaddr *b) 1211 { 1212 struct bgpd_addr bb; 1213 1214 sa2addr(b, &bb, NULL); 1215 return (memcmp(ba, &bb, sizeof(*ba)) == 0); 1216 } 1217 1218 static void 1219 get_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote, 1220 struct bgpd_addr *alt, unsigned int *scope) 1221 { 1222 struct ifaddrs *ifap, *ifa, *match; 1223 int connected = 0; 1224 u_int8_t plen; 1225 1226 if (getifaddrs(&ifap) == -1) 1227 fatal("getifaddrs"); 1228 1229 for (match = ifap; match != NULL; match = match->ifa_next) { 1230 if (match->ifa_addr == NULL) 1231 continue; 1232 if (match->ifa_addr->sa_family != AF_INET && 1233 match->ifa_addr->sa_family != AF_INET6) 1234 continue; 1235 if (sa_equal(local, match->ifa_addr)) { 1236 if (match->ifa_flags & IFF_POINTOPOINT && 1237 match->ifa_dstaddr) { 1238 if (sa_equal(remote, match->ifa_dstaddr)) 1239 connected = 1; 1240 } else if (match->ifa_netmask) { 1241 plen = mask2prefixlen( 1242 match->ifa_addr->sa_family, 1243 match->ifa_netmask); 1244 if (prefix_compare(local, remote, plen) == 0) 1245 connected = 1; 1246 } 1247 break; 1248 } 1249 } 1250 1251 if (match == NULL) { 1252 log_warnx("%s: local address not found", __func__); 1253 return; 1254 } 1255 if (connected) 1256 *scope = if_nametoindex(match->ifa_name); 1257 else 1258 *scope = 0; 1259 1260 switch (local->aid) { 1261 case AID_INET6: 1262 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1263 if (ifa->ifa_addr != NULL && 1264 ifa->ifa_addr->sa_family == AF_INET && 1265 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1266 sa2addr(ifa->ifa_addr, alt, NULL); 1267 break; 1268 } 1269 } 1270 break; 1271 case AID_INET: 1272 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1273 if (ifa->ifa_addr != NULL && 1274 ifa->ifa_addr->sa_family == AF_INET6 && 1275 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1276 struct sockaddr_in6 *s = 1277 (struct sockaddr_in6 *)ifa->ifa_addr; 1278 1279 /* only accept global scope addresses */ 1280 if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) || 1281 IN6_IS_ADDR_SITELOCAL(&s->sin6_addr)) 1282 continue; 1283 sa2addr(ifa->ifa_addr, alt, NULL); 1284 break; 1285 } 1286 } 1287 break; 1288 default: 1289 log_warnx("%s: unsupported address family %s", __func__, 1290 aid2str(local->aid)); 1291 break; 1292 } 1293 1294 freeifaddrs(ifap); 1295 } 1296 1297 void 1298 session_tcp_established(struct peer *peer) 1299 { 1300 struct sockaddr_storage ss; 1301 socklen_t len; 1302 1303 len = sizeof(ss); 1304 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1305 log_warn("getsockname"); 1306 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); 1307 len = sizeof(ss); 1308 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1309 log_warn("getpeername"); 1310 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); 1311 1312 get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt, 1313 &peer->if_scope); 1314 } 1315 1316 void 1317 session_capa_ann_none(struct peer *peer) 1318 { 1319 memset(&peer->capa.ann, 0, sizeof(peer->capa.ann)); 1320 } 1321 1322 int 1323 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len) 1324 { 1325 int errs = 0; 1326 1327 errs += ibuf_add_n8(opb, capa_code); 1328 errs += ibuf_add_n8(opb, capa_len); 1329 return (errs); 1330 } 1331 1332 int 1333 session_capa_add_mp(struct ibuf *buf, uint8_t aid) 1334 { 1335 uint16_t afi; 1336 uint8_t safi; 1337 int errs = 0; 1338 1339 if (aid2afi(aid, &afi, &safi) == -1) { 1340 log_warn("%s: bad AID", __func__); 1341 return (-1); 1342 } 1343 1344 errs += ibuf_add_n16(buf, afi); 1345 errs += ibuf_add_zero(buf, 1); 1346 errs += ibuf_add_n8(buf, safi); 1347 1348 return (errs); 1349 } 1350 1351 int 1352 session_capa_add_afi(struct peer *p, struct ibuf *b, uint8_t aid, 1353 uint8_t flags) 1354 { 1355 u_int errs = 0; 1356 uint16_t afi; 1357 uint8_t safi; 1358 1359 if (aid2afi(aid, &afi, &safi)) { 1360 log_warn("%s: bad AID", __func__); 1361 return (-1); 1362 } 1363 1364 errs += ibuf_add_n16(b, afi); 1365 errs += ibuf_add_n8(b, safi); 1366 errs += ibuf_add_n8(b, flags); 1367 1368 return (errs); 1369 } 1370 1371 struct bgp_msg * 1372 session_newmsg(enum msg_type msgtype, uint16_t len) 1373 { 1374 u_char marker[MSGSIZE_HEADER_MARKER]; 1375 struct bgp_msg *msg; 1376 struct ibuf *buf; 1377 int errs = 0; 1378 1379 memset(marker, 0xff, sizeof(marker)); 1380 1381 if ((buf = ibuf_open(len)) == NULL) 1382 return (NULL); 1383 1384 errs += ibuf_add(buf, marker, sizeof(marker)); 1385 errs += ibuf_add_n16(buf, len); 1386 errs += ibuf_add_n8(buf, msgtype); 1387 1388 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1389 ibuf_free(buf); 1390 return (NULL); 1391 } 1392 1393 msg->buf = buf; 1394 msg->type = msgtype; 1395 msg->len = len; 1396 1397 return (msg); 1398 } 1399 1400 int 1401 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1402 { 1403 struct mrt *mrt; 1404 1405 LIST_FOREACH(mrt, &mrthead, entry) { 1406 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1407 mrt->type == MRT_UPDATE_OUT))) 1408 continue; 1409 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1410 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1411 mrt->group_id == p->conf.groupid)) 1412 mrt_dump_bgp_msg(mrt, ibuf_data(msg->buf), msg->len, p, 1413 msg->type); 1414 } 1415 1416 ibuf_close(&p->wbuf, msg->buf); 1417 if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) { 1418 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) 1419 log_peer_warn(&p->conf, "imsg_compose XOFF"); 1420 else 1421 p->throttled = 1; 1422 } 1423 1424 free(msg); 1425 return (0); 1426 } 1427 1428 /* 1429 * Translate between internal roles and the value expected by RFC 9234. 1430 */ 1431 static uint8_t 1432 role2capa(enum role role) 1433 { 1434 switch (role) { 1435 case ROLE_CUSTOMER: 1436 return CAPA_ROLE_CUSTOMER; 1437 case ROLE_PROVIDER: 1438 return CAPA_ROLE_PROVIDER; 1439 case ROLE_RS: 1440 return CAPA_ROLE_RS; 1441 case ROLE_RS_CLIENT: 1442 return CAPA_ROLE_RS_CLIENT; 1443 case ROLE_PEER: 1444 return CAPA_ROLE_PEER; 1445 default: 1446 fatalx("Unsupported role for role capability"); 1447 } 1448 } 1449 1450 static enum role 1451 capa2role(uint8_t val) 1452 { 1453 switch (val) { 1454 case CAPA_ROLE_PROVIDER: 1455 return ROLE_PROVIDER; 1456 case CAPA_ROLE_RS: 1457 return ROLE_RS; 1458 case CAPA_ROLE_RS_CLIENT: 1459 return ROLE_RS_CLIENT; 1460 case CAPA_ROLE_CUSTOMER: 1461 return ROLE_CUSTOMER; 1462 case CAPA_ROLE_PEER: 1463 return ROLE_PEER; 1464 default: 1465 return ROLE_NONE; 1466 } 1467 } 1468 1469 void 1470 session_open(struct peer *p) 1471 { 1472 struct bgp_msg *buf; 1473 struct ibuf *opb; 1474 size_t len, optparamlen; 1475 uint16_t holdtime; 1476 uint8_t i; 1477 int errs = 0, extlen = 0; 1478 int mpcapa = 0; 1479 1480 1481 if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) { 1482 bgp_fsm(p, EVNT_CON_FATAL); 1483 return; 1484 } 1485 1486 /* multiprotocol extensions, RFC 4760 */ 1487 for (i = 0; i < AID_MAX; i++) 1488 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1489 errs += session_capa_add(opb, CAPA_MP, 4); 1490 errs += session_capa_add_mp(opb, i); 1491 mpcapa++; 1492 } 1493 1494 /* route refresh, RFC 2918 */ 1495 if (p->capa.ann.refresh) /* no data */ 1496 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1497 1498 /* BGP open policy, RFC 9234, only for ebgp sessions */ 1499 if (p->conf.ebgp && p->capa.ann.policy && 1500 p->conf.role != ROLE_NONE && 1501 (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] || 1502 mpcapa == 0)) { 1503 errs += session_capa_add(opb, CAPA_ROLE, 1); 1504 errs += ibuf_add_n8(opb, role2capa(p->conf.role)); 1505 } 1506 1507 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1508 if (p->capa.ann.grestart.restart) { 1509 int rst = 0; 1510 uint16_t hdr = 0; 1511 1512 for (i = 0; i < AID_MAX; i++) { 1513 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) 1514 rst++; 1515 } 1516 1517 /* Only set the R-flag if no graceful restart is ongoing */ 1518 if (!rst) 1519 hdr |= CAPA_GR_R_FLAG; 1520 errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); 1521 errs += ibuf_add_n16(opb, hdr); 1522 } 1523 1524 /* 4-bytes AS numbers, RFC6793 */ 1525 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1526 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t)); 1527 errs += ibuf_add_n32(opb, p->conf.local_as); 1528 } 1529 1530 /* advertisement of multiple paths, RFC7911 */ 1531 if (p->capa.ann.add_path[0]) { /* variable */ 1532 uint8_t aplen; 1533 1534 if (mpcapa) 1535 aplen = 4 * mpcapa; 1536 else /* AID_INET */ 1537 aplen = 4; 1538 errs += session_capa_add(opb, CAPA_ADD_PATH, aplen); 1539 if (mpcapa) { 1540 for (i = AID_MIN; i < AID_MAX; i++) { 1541 if (p->capa.ann.mp[i]) { 1542 errs += session_capa_add_afi(p, opb, 1543 i, p->capa.ann.add_path[i]); 1544 } 1545 } 1546 } else { /* AID_INET */ 1547 errs += session_capa_add_afi(p, opb, AID_INET, 1548 p->capa.ann.add_path[AID_INET]); 1549 } 1550 } 1551 1552 /* enhanced route-refresh, RFC7313 */ 1553 if (p->capa.ann.enhanced_rr) /* no data */ 1554 errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0); 1555 1556 if (errs) { 1557 ibuf_free(opb); 1558 bgp_fsm(p, EVNT_CON_FATAL); 1559 return; 1560 } 1561 1562 optparamlen = ibuf_size(opb); 1563 len = MSGSIZE_OPEN_MIN + optparamlen; 1564 if (optparamlen == 0) { 1565 /* nothing */ 1566 } else if (optparamlen + 2 >= 255) { 1567 /* RFC9072: use 255 as magic size and request extra header */ 1568 optparamlen = 255; 1569 extlen = 1; 1570 /* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */ 1571 len += 2 * 3; 1572 } else { 1573 /* regular capabilities header */ 1574 optparamlen += 2; 1575 len += 2; 1576 } 1577 1578 if ((buf = session_newmsg(OPEN, len)) == NULL) { 1579 ibuf_free(opb); 1580 bgp_fsm(p, EVNT_CON_FATAL); 1581 return; 1582 } 1583 1584 if (p->conf.holdtime) 1585 holdtime = p->conf.holdtime; 1586 else 1587 holdtime = conf->holdtime; 1588 1589 errs += ibuf_add_n8(buf->buf, 4); 1590 errs += ibuf_add_n16(buf->buf, p->conf.local_short_as); 1591 errs += ibuf_add_n16(buf->buf, holdtime); 1592 /* is already in network byte order */ 1593 errs += ibuf_add(buf->buf, &conf->bgpid, sizeof(conf->bgpid)); 1594 errs += ibuf_add_n8(buf->buf, optparamlen); 1595 1596 if (extlen) { 1597 /* RFC9072 extra header which spans over the capabilities hdr */ 1598 errs += ibuf_add_n8(buf->buf, OPT_PARAM_EXT_LEN); 1599 errs += ibuf_add_n16(buf->buf, ibuf_size(opb) + 1 + 2); 1600 } 1601 1602 if (optparamlen) { 1603 errs += ibuf_add_n8(buf->buf, OPT_PARAM_CAPABILITIES); 1604 1605 if (extlen) { 1606 /* RFC9072: 2-byte extended length */ 1607 errs += ibuf_add_n16(buf->buf, ibuf_size(opb)); 1608 } else { 1609 errs += ibuf_add_n8(buf->buf, ibuf_size(opb)); 1610 } 1611 errs += ibuf_add_buf(buf->buf, opb); 1612 } 1613 1614 ibuf_free(opb); 1615 1616 if (errs) { 1617 ibuf_free(buf->buf); 1618 free(buf); 1619 bgp_fsm(p, EVNT_CON_FATAL); 1620 return; 1621 } 1622 1623 if (session_sendmsg(buf, p) == -1) { 1624 bgp_fsm(p, EVNT_CON_FATAL); 1625 return; 1626 } 1627 1628 p->stats.msg_sent_open++; 1629 } 1630 1631 void 1632 session_keepalive(struct peer *p) 1633 { 1634 struct bgp_msg *buf; 1635 1636 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1637 session_sendmsg(buf, p) == -1) { 1638 bgp_fsm(p, EVNT_CON_FATAL); 1639 return; 1640 } 1641 1642 start_timer_keepalive(p); 1643 p->stats.msg_sent_keepalive++; 1644 } 1645 1646 void 1647 session_update(uint32_t peerid, void *data, size_t datalen) 1648 { 1649 struct peer *p; 1650 struct bgp_msg *buf; 1651 1652 if ((p = getpeerbyid(conf, peerid)) == NULL) { 1653 log_warnx("no such peer: id=%u", peerid); 1654 return; 1655 } 1656 1657 if (p->state != STATE_ESTABLISHED) 1658 return; 1659 1660 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) { 1661 bgp_fsm(p, EVNT_CON_FATAL); 1662 return; 1663 } 1664 1665 if (ibuf_add(buf->buf, data, datalen)) { 1666 ibuf_free(buf->buf); 1667 free(buf); 1668 bgp_fsm(p, EVNT_CON_FATAL); 1669 return; 1670 } 1671 1672 if (session_sendmsg(buf, p) == -1) { 1673 bgp_fsm(p, EVNT_CON_FATAL); 1674 return; 1675 } 1676 1677 start_timer_keepalive(p); 1678 p->stats.msg_sent_update++; 1679 } 1680 1681 void 1682 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode, 1683 void *data, ssize_t datalen) 1684 { 1685 struct bgp_msg *buf; 1686 int errs = 0; 1687 1688 if (p->stats.last_sent_errcode) /* some notification already sent */ 1689 return; 1690 1691 log_notification(p, errcode, subcode, data, datalen, "sending"); 1692 1693 /* cap to maximum size */ 1694 if (datalen > MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) { 1695 log_peer_warnx(&p->conf, 1696 "oversized notification, data trunkated"); 1697 datalen = MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN; 1698 } 1699 1700 if ((buf = session_newmsg(NOTIFICATION, 1701 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1702 bgp_fsm(p, EVNT_CON_FATAL); 1703 return; 1704 } 1705 1706 errs += ibuf_add_n8(buf->buf, errcode); 1707 errs += ibuf_add_n8(buf->buf, subcode); 1708 1709 if (datalen > 0) 1710 errs += ibuf_add(buf->buf, data, datalen); 1711 1712 if (errs) { 1713 ibuf_free(buf->buf); 1714 free(buf); 1715 bgp_fsm(p, EVNT_CON_FATAL); 1716 return; 1717 } 1718 1719 if (session_sendmsg(buf, p) == -1) { 1720 bgp_fsm(p, EVNT_CON_FATAL); 1721 return; 1722 } 1723 1724 p->stats.msg_sent_notification++; 1725 p->stats.last_sent_errcode = errcode; 1726 p->stats.last_sent_suberr = subcode; 1727 } 1728 1729 int 1730 session_neighbor_rrefresh(struct peer *p) 1731 { 1732 uint8_t i; 1733 1734 if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr)) 1735 return (-1); 1736 1737 for (i = 0; i < AID_MAX; i++) { 1738 if (p->capa.neg.mp[i] != 0) 1739 session_rrefresh(p, i, ROUTE_REFRESH_REQUEST); 1740 } 1741 1742 return (0); 1743 } 1744 1745 void 1746 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype) 1747 { 1748 struct bgp_msg *buf; 1749 int errs = 0; 1750 uint16_t afi; 1751 uint8_t safi; 1752 1753 switch (subtype) { 1754 case ROUTE_REFRESH_REQUEST: 1755 p->stats.refresh_sent_req++; 1756 break; 1757 case ROUTE_REFRESH_BEGIN_RR: 1758 case ROUTE_REFRESH_END_RR: 1759 /* requires enhanced route refresh */ 1760 if (!p->capa.neg.enhanced_rr) 1761 return; 1762 if (subtype == ROUTE_REFRESH_BEGIN_RR) 1763 p->stats.refresh_sent_borr++; 1764 else 1765 p->stats.refresh_sent_eorr++; 1766 break; 1767 default: 1768 fatalx("session_rrefresh: bad subtype %d", subtype); 1769 } 1770 1771 if (aid2afi(aid, &afi, &safi) == -1) 1772 fatalx("session_rrefresh: bad afi/safi pair"); 1773 1774 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1775 bgp_fsm(p, EVNT_CON_FATAL); 1776 return; 1777 } 1778 1779 errs += ibuf_add_n16(buf->buf, afi); 1780 errs += ibuf_add_n8(buf->buf, subtype); 1781 errs += ibuf_add_n8(buf->buf, safi); 1782 1783 if (errs) { 1784 ibuf_free(buf->buf); 1785 free(buf); 1786 bgp_fsm(p, EVNT_CON_FATAL); 1787 return; 1788 } 1789 1790 if (session_sendmsg(buf, p) == -1) { 1791 bgp_fsm(p, EVNT_CON_FATAL); 1792 return; 1793 } 1794 1795 p->stats.msg_sent_rrefresh++; 1796 } 1797 1798 int 1799 session_graceful_restart(struct peer *p) 1800 { 1801 uint8_t i; 1802 1803 timer_set(&p->timers, Timer_RestartTimeout, 1804 p->capa.neg.grestart.timeout); 1805 1806 for (i = 0; i < AID_MAX; i++) { 1807 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1808 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, 1809 &i, sizeof(i)) == -1) 1810 return (-1); 1811 log_peer_warnx(&p->conf, 1812 "graceful restart of %s, keeping routes", 1813 aid2str(i)); 1814 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1815 } else if (p->capa.neg.mp[i]) { 1816 if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id, 1817 &i, sizeof(i)) == -1) 1818 return (-1); 1819 log_peer_warnx(&p->conf, 1820 "graceful restart of %s, flushing routes", 1821 aid2str(i)); 1822 } 1823 } 1824 return (0); 1825 } 1826 1827 int 1828 session_graceful_stop(struct peer *p) 1829 { 1830 uint8_t i; 1831 1832 for (i = 0; i < AID_MAX; i++) { 1833 /* 1834 * Only flush if the peer is restarting and the timeout fired. 1835 * In all other cases the session was already flushed when the 1836 * session went down or when the new open message was parsed. 1837 */ 1838 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1839 log_peer_warnx(&p->conf, "graceful restart of %s, " 1840 "time-out, flushing", aid2str(i)); 1841 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1842 &i, sizeof(i)) == -1) 1843 return (-1); 1844 } 1845 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1846 } 1847 return (0); 1848 } 1849 1850 int 1851 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1852 { 1853 ssize_t n; 1854 socklen_t len; 1855 int error; 1856 1857 if (p->state == STATE_CONNECT) { 1858 if (pfd->revents & POLLOUT) { 1859 if (pfd->revents & POLLIN) { 1860 /* error occurred */ 1861 len = sizeof(error); 1862 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1863 &error, &len) == -1 || error) { 1864 if (error) 1865 errno = error; 1866 if (errno != p->lasterr) { 1867 log_peer_warn(&p->conf, 1868 "socket error"); 1869 p->lasterr = errno; 1870 } 1871 bgp_fsm(p, EVNT_CON_OPENFAIL); 1872 return (1); 1873 } 1874 } 1875 bgp_fsm(p, EVNT_CON_OPEN); 1876 return (1); 1877 } 1878 if (pfd->revents & POLLHUP) { 1879 bgp_fsm(p, EVNT_CON_OPENFAIL); 1880 return (1); 1881 } 1882 if (pfd->revents & (POLLERR|POLLNVAL)) { 1883 bgp_fsm(p, EVNT_CON_FATAL); 1884 return (1); 1885 } 1886 return (0); 1887 } 1888 1889 if (pfd->revents & POLLHUP) { 1890 bgp_fsm(p, EVNT_CON_CLOSED); 1891 return (1); 1892 } 1893 if (pfd->revents & (POLLERR|POLLNVAL)) { 1894 bgp_fsm(p, EVNT_CON_FATAL); 1895 return (1); 1896 } 1897 1898 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1899 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1900 if (error == 0) 1901 log_peer_warnx(&p->conf, "Connection closed"); 1902 else if (error == -1) 1903 log_peer_warn(&p->conf, "write error"); 1904 bgp_fsm(p, EVNT_CON_FATAL); 1905 return (1); 1906 } 1907 p->stats.last_write = getmonotime(); 1908 if (p->holdtime > 0) 1909 timer_set(&p->timers, Timer_SendHold, 1910 p->holdtime < INTERVAL_HOLD ? INTERVAL_HOLD : 1911 p->holdtime); 1912 if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) { 1913 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) 1914 log_peer_warn(&p->conf, "imsg_compose XON"); 1915 else 1916 p->throttled = 0; 1917 } 1918 if (!(pfd->revents & POLLIN)) 1919 return (1); 1920 } 1921 1922 if (p->rbuf && pfd->revents & POLLIN) { 1923 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1924 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1925 if (errno != EINTR && errno != EAGAIN) { 1926 log_peer_warn(&p->conf, "read error"); 1927 bgp_fsm(p, EVNT_CON_FATAL); 1928 } 1929 return (1); 1930 } 1931 if (n == 0) { /* connection closed */ 1932 bgp_fsm(p, EVNT_CON_CLOSED); 1933 return (1); 1934 } 1935 1936 p->rbuf->wpos += n; 1937 p->stats.last_read = getmonotime(); 1938 return (1); 1939 } 1940 return (0); 1941 } 1942 1943 void 1944 session_process_msg(struct peer *p) 1945 { 1946 struct mrt *mrt; 1947 ssize_t rpos, av, left; 1948 int processed = 0; 1949 uint16_t msglen; 1950 uint8_t msgtype; 1951 1952 rpos = 0; 1953 av = p->rbuf->wpos; 1954 p->rpending = 0; 1955 1956 /* 1957 * session might drop to IDLE -> buffers deallocated 1958 * we MUST check rbuf != NULL before use 1959 */ 1960 for (;;) { 1961 if (p->rbuf == NULL) 1962 return; 1963 if (rpos + MSGSIZE_HEADER > av) 1964 break; 1965 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 1966 &msgtype) == -1) 1967 return; 1968 if (rpos + msglen > av) 1969 break; 1970 p->rbuf->rptr = p->rbuf->buf + rpos; 1971 1972 /* dump to MRT as soon as we have a full packet */ 1973 LIST_FOREACH(mrt, &mrthead, entry) { 1974 if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE && 1975 mrt->type == MRT_UPDATE_IN))) 1976 continue; 1977 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1978 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1979 mrt->group_id == p->conf.groupid)) 1980 mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p, 1981 msgtype); 1982 } 1983 1984 switch (msgtype) { 1985 case OPEN: 1986 bgp_fsm(p, EVNT_RCVD_OPEN); 1987 p->stats.msg_rcvd_open++; 1988 break; 1989 case UPDATE: 1990 bgp_fsm(p, EVNT_RCVD_UPDATE); 1991 p->stats.msg_rcvd_update++; 1992 break; 1993 case NOTIFICATION: 1994 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 1995 p->stats.msg_rcvd_notification++; 1996 break; 1997 case KEEPALIVE: 1998 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 1999 p->stats.msg_rcvd_keepalive++; 2000 break; 2001 case RREFRESH: 2002 parse_rrefresh(p); 2003 p->stats.msg_rcvd_rrefresh++; 2004 break; 2005 default: /* cannot happen */ 2006 session_notification(p, ERR_HEADER, ERR_HDR_TYPE, 2007 &msgtype, 1); 2008 log_warnx("received message with unknown type %u", 2009 msgtype); 2010 bgp_fsm(p, EVNT_CON_FATAL); 2011 } 2012 rpos += msglen; 2013 if (++processed > MSG_PROCESS_LIMIT) { 2014 p->rpending = 1; 2015 break; 2016 } 2017 } 2018 2019 if (p->rbuf == NULL) 2020 return; 2021 if (rpos < av) { 2022 left = av - rpos; 2023 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 2024 p->rbuf->wpos = left; 2025 } else 2026 p->rbuf->wpos = 0; 2027 } 2028 2029 int 2030 parse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type) 2031 { 2032 u_char *p; 2033 uint16_t olen; 2034 static const uint8_t marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff, 2035 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 2036 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 2037 2038 /* caller MUST make sure we are getting 19 bytes! */ 2039 p = data; 2040 if (memcmp(p, marker, sizeof(marker))) { 2041 log_peer_warnx(&peer->conf, "sync error"); 2042 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0); 2043 bgp_fsm(peer, EVNT_CON_FATAL); 2044 return (-1); 2045 } 2046 p += MSGSIZE_HEADER_MARKER; 2047 2048 memcpy(&olen, p, 2); 2049 *len = ntohs(olen); 2050 p += 2; 2051 memcpy(type, p, 1); 2052 2053 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 2054 log_peer_warnx(&peer->conf, 2055 "received message: illegal length: %u byte", *len); 2056 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2057 &olen, sizeof(olen)); 2058 bgp_fsm(peer, EVNT_CON_FATAL); 2059 return (-1); 2060 } 2061 2062 switch (*type) { 2063 case OPEN: 2064 if (*len < MSGSIZE_OPEN_MIN) { 2065 log_peer_warnx(&peer->conf, 2066 "received OPEN: illegal len: %u byte", *len); 2067 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2068 &olen, sizeof(olen)); 2069 bgp_fsm(peer, EVNT_CON_FATAL); 2070 return (-1); 2071 } 2072 break; 2073 case NOTIFICATION: 2074 if (*len < MSGSIZE_NOTIFICATION_MIN) { 2075 log_peer_warnx(&peer->conf, 2076 "received NOTIFICATION: illegal len: %u byte", 2077 *len); 2078 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2079 &olen, sizeof(olen)); 2080 bgp_fsm(peer, EVNT_CON_FATAL); 2081 return (-1); 2082 } 2083 break; 2084 case UPDATE: 2085 if (*len < MSGSIZE_UPDATE_MIN) { 2086 log_peer_warnx(&peer->conf, 2087 "received UPDATE: illegal len: %u byte", *len); 2088 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2089 &olen, sizeof(olen)); 2090 bgp_fsm(peer, EVNT_CON_FATAL); 2091 return (-1); 2092 } 2093 break; 2094 case KEEPALIVE: 2095 if (*len != MSGSIZE_KEEPALIVE) { 2096 log_peer_warnx(&peer->conf, 2097 "received KEEPALIVE: illegal len: %u byte", *len); 2098 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2099 &olen, sizeof(olen)); 2100 bgp_fsm(peer, EVNT_CON_FATAL); 2101 return (-1); 2102 } 2103 break; 2104 case RREFRESH: 2105 if (*len < MSGSIZE_RREFRESH_MIN) { 2106 log_peer_warnx(&peer->conf, 2107 "received RREFRESH: illegal len: %u byte", *len); 2108 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2109 &olen, sizeof(olen)); 2110 bgp_fsm(peer, EVNT_CON_FATAL); 2111 return (-1); 2112 } 2113 break; 2114 default: 2115 log_peer_warnx(&peer->conf, 2116 "received msg with unknown type %u", *type); 2117 session_notification(peer, ERR_HEADER, ERR_HDR_TYPE, 2118 type, 1); 2119 bgp_fsm(peer, EVNT_CON_FATAL); 2120 return (-1); 2121 } 2122 return (0); 2123 } 2124 2125 int 2126 parse_open(struct peer *peer) 2127 { 2128 u_char *p, *op_val; 2129 uint8_t version, rversion; 2130 uint16_t short_as, msglen; 2131 uint16_t holdtime, oholdtime, myholdtime; 2132 uint32_t as, bgpid; 2133 uint16_t optparamlen, extlen, plen, op_len; 2134 uint8_t op_type, suberr = 0; 2135 2136 p = peer->rbuf->rptr; 2137 p += MSGSIZE_HEADER_MARKER; 2138 memcpy(&msglen, p, sizeof(msglen)); 2139 msglen = ntohs(msglen); 2140 2141 p = peer->rbuf->rptr; 2142 p += MSGSIZE_HEADER; /* header is already checked */ 2143 2144 memcpy(&version, p, sizeof(version)); 2145 p += sizeof(version); 2146 2147 if (version != BGP_VERSION) { 2148 log_peer_warnx(&peer->conf, 2149 "peer wants unrecognized version %u", version); 2150 if (version > BGP_VERSION) 2151 rversion = version - BGP_VERSION; 2152 else 2153 rversion = BGP_VERSION; 2154 session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION, 2155 &rversion, sizeof(rversion)); 2156 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2157 return (-1); 2158 } 2159 2160 memcpy(&short_as, p, sizeof(short_as)); 2161 p += sizeof(short_as); 2162 as = peer->short_as = ntohs(short_as); 2163 if (as == 0) { 2164 log_peer_warnx(&peer->conf, 2165 "peer requests unacceptable AS %u", as); 2166 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, 2167 NULL, 0); 2168 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2169 return (-1); 2170 } 2171 2172 memcpy(&oholdtime, p, sizeof(oholdtime)); 2173 p += sizeof(oholdtime); 2174 2175 holdtime = ntohs(oholdtime); 2176 if (holdtime && holdtime < peer->conf.min_holdtime) { 2177 log_peer_warnx(&peer->conf, 2178 "peer requests unacceptable holdtime %u", holdtime); 2179 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, 2180 NULL, 0); 2181 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2182 return (-1); 2183 } 2184 2185 myholdtime = peer->conf.holdtime; 2186 if (!myholdtime) 2187 myholdtime = conf->holdtime; 2188 if (holdtime < myholdtime) 2189 peer->holdtime = holdtime; 2190 else 2191 peer->holdtime = myholdtime; 2192 2193 memcpy(&bgpid, p, sizeof(bgpid)); 2194 p += sizeof(bgpid); 2195 2196 /* check bgpid for validity - just disallow 0 */ 2197 if (ntohl(bgpid) == 0) { 2198 log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable", 2199 ntohl(bgpid)); 2200 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2201 NULL, 0); 2202 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2203 return (-1); 2204 } 2205 peer->remote_bgpid = bgpid; 2206 2207 extlen = 0; 2208 optparamlen = *p++; 2209 2210 if (optparamlen == 0) { 2211 if (msglen != MSGSIZE_OPEN_MIN) { 2212 bad_len: 2213 log_peer_warnx(&peer->conf, 2214 "corrupt OPEN message received: length mismatch"); 2215 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2216 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2217 return (-1); 2218 } 2219 } else { 2220 if (msglen < MSGSIZE_OPEN_MIN + 1) 2221 goto bad_len; 2222 2223 op_type = *p; 2224 if (op_type == OPT_PARAM_EXT_LEN) { 2225 p++; 2226 memcpy(&optparamlen, p, sizeof(optparamlen)); 2227 optparamlen = ntohs(optparamlen); 2228 p += sizeof(optparamlen); 2229 extlen = 1; 2230 } 2231 2232 /* RFC9020 encoding has 3 extra bytes */ 2233 if (optparamlen + 3 * extlen != msglen - MSGSIZE_OPEN_MIN) 2234 goto bad_len; 2235 } 2236 2237 plen = optparamlen; 2238 while (plen > 0) { 2239 if (plen < 2 + extlen) 2240 goto bad_len; 2241 2242 memcpy(&op_type, p, sizeof(op_type)); 2243 p += sizeof(op_type); 2244 plen -= sizeof(op_type); 2245 if (!extlen) { 2246 op_len = *p++; 2247 plen--; 2248 } else { 2249 memcpy(&op_len, p, sizeof(op_len)); 2250 op_len = ntohs(op_len); 2251 p += sizeof(op_len); 2252 plen -= sizeof(op_len); 2253 } 2254 if (op_len > 0) { 2255 if (plen < op_len) 2256 goto bad_len; 2257 op_val = p; 2258 p += op_len; 2259 plen -= op_len; 2260 } else 2261 op_val = NULL; 2262 2263 switch (op_type) { 2264 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2265 if (parse_capabilities(peer, op_val, op_len, 2266 &as) == -1) { 2267 session_notification(peer, ERR_OPEN, 0, 2268 NULL, 0); 2269 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2270 return (-1); 2271 } 2272 break; 2273 case OPT_PARAM_AUTH: /* deprecated */ 2274 default: 2275 /* 2276 * unsupported type 2277 * the RFCs tell us to leave the data section empty 2278 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT. 2279 * How the peer should know _which_ optional parameter 2280 * we don't support is beyond me. 2281 */ 2282 log_peer_warnx(&peer->conf, 2283 "received OPEN message with unsupported optional " 2284 "parameter: type %u", op_type); 2285 session_notification(peer, ERR_OPEN, ERR_OPEN_OPT, 2286 NULL, 0); 2287 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2288 /* no punish */ 2289 timer_set(&peer->timers, Timer_IdleHold, 0); 2290 peer->IdleHoldTime /= 2; 2291 return (-1); 2292 } 2293 } 2294 2295 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2296 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2297 peer->conf.remote_as = as; 2298 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); 2299 if (!peer->conf.ebgp) 2300 /* force enforce_as off for iBGP sessions */ 2301 peer->conf.enforce_as = ENFORCE_AS_OFF; 2302 } 2303 2304 if (peer->conf.remote_as != as) { 2305 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2306 log_as(as)); 2307 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0); 2308 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2309 return (-1); 2310 } 2311 2312 /* on iBGP sessions check for bgpid collision */ 2313 if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) { 2314 log_peer_warnx(&peer->conf, "peer BGPID %u conflicts with ours", 2315 ntohl(bgpid)); 2316 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2317 NULL, 0); 2318 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2319 return (-1); 2320 } 2321 2322 if (capa_neg_calc(peer, &suberr) == -1) { 2323 session_notification(peer, ERR_OPEN, suberr, NULL, 0); 2324 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2325 return (-1); 2326 } 2327 2328 return (0); 2329 } 2330 2331 int 2332 parse_update(struct peer *peer) 2333 { 2334 u_char *p; 2335 uint16_t datalen; 2336 2337 /* 2338 * we pass the message verbatim to the rde. 2339 * in case of errors the whole session is reset with a 2340 * notification anyway, we only need to know the peer 2341 */ 2342 p = peer->rbuf->rptr; 2343 p += MSGSIZE_HEADER_MARKER; 2344 memcpy(&datalen, p, sizeof(datalen)); 2345 datalen = ntohs(datalen); 2346 2347 p = peer->rbuf->rptr; 2348 p += MSGSIZE_HEADER; /* header is already checked */ 2349 datalen -= MSGSIZE_HEADER; 2350 2351 if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1) 2352 return (-1); 2353 2354 return (0); 2355 } 2356 2357 int 2358 parse_rrefresh(struct peer *peer) 2359 { 2360 struct route_refresh rr; 2361 uint16_t afi, datalen; 2362 uint8_t aid, safi, subtype; 2363 u_char *p; 2364 2365 p = peer->rbuf->rptr; 2366 p += MSGSIZE_HEADER_MARKER; 2367 memcpy(&datalen, p, sizeof(datalen)); 2368 datalen = ntohs(datalen); 2369 2370 p = peer->rbuf->rptr; 2371 p += MSGSIZE_HEADER; /* header is already checked */ 2372 2373 /* 2374 * We could check if we actually announced the capability but 2375 * as long as the message is correctly encoded we don't care. 2376 */ 2377 2378 /* afi, 2 byte */ 2379 memcpy(&afi, p, sizeof(afi)); 2380 afi = ntohs(afi); 2381 p += 2; 2382 /* subtype, 1 byte */ 2383 subtype = *p; 2384 p += 1; 2385 /* safi, 1 byte */ 2386 safi = *p; 2387 2388 /* check subtype if peer announced enhanced route refresh */ 2389 if (peer->capa.neg.enhanced_rr) { 2390 switch (subtype) { 2391 case ROUTE_REFRESH_REQUEST: 2392 /* no ORF support, so no oversized RREFRESH msgs */ 2393 if (datalen != MSGSIZE_RREFRESH) { 2394 log_peer_warnx(&peer->conf, 2395 "received RREFRESH: illegal len: %u byte", 2396 datalen); 2397 datalen = htons(datalen); 2398 session_notification(peer, ERR_HEADER, 2399 ERR_HDR_LEN, &datalen, sizeof(datalen)); 2400 bgp_fsm(peer, EVNT_CON_FATAL); 2401 return (-1); 2402 } 2403 peer->stats.refresh_rcvd_req++; 2404 break; 2405 case ROUTE_REFRESH_BEGIN_RR: 2406 case ROUTE_REFRESH_END_RR: 2407 /* special handling for RFC7313 */ 2408 if (datalen != MSGSIZE_RREFRESH) { 2409 log_peer_warnx(&peer->conf, 2410 "received RREFRESH: illegal len: %u byte", 2411 datalen); 2412 p = peer->rbuf->rptr; 2413 p += MSGSIZE_HEADER; 2414 datalen -= MSGSIZE_HEADER; 2415 session_notification(peer, ERR_RREFRESH, 2416 ERR_RR_INV_LEN, p, datalen); 2417 bgp_fsm(peer, EVNT_CON_FATAL); 2418 return (-1); 2419 } 2420 if (subtype == ROUTE_REFRESH_BEGIN_RR) 2421 peer->stats.refresh_rcvd_borr++; 2422 else 2423 peer->stats.refresh_rcvd_eorr++; 2424 break; 2425 default: 2426 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2427 "bad subtype %d", subtype); 2428 return (0); 2429 } 2430 } else { 2431 /* force subtype to default */ 2432 subtype = ROUTE_REFRESH_REQUEST; 2433 peer->stats.refresh_rcvd_req++; 2434 } 2435 2436 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2437 if (afi2aid(afi, safi, &aid) == -1) { 2438 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2439 "invalid afi/safi pair"); 2440 return (0); 2441 } 2442 2443 if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) { 2444 log_peer_warnx(&peer->conf, "peer sent unexpected refresh"); 2445 return (0); 2446 } 2447 2448 rr.aid = aid; 2449 rr.subtype = subtype; 2450 2451 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1) 2452 return (-1); 2453 2454 return (0); 2455 } 2456 2457 int 2458 parse_notification(struct peer *peer) 2459 { 2460 u_char *p; 2461 uint16_t datalen; 2462 uint8_t errcode; 2463 uint8_t subcode; 2464 uint8_t capa_code; 2465 uint8_t capa_len; 2466 size_t reason_len; 2467 uint8_t i; 2468 2469 /* just log */ 2470 p = peer->rbuf->rptr; 2471 p += MSGSIZE_HEADER_MARKER; 2472 memcpy(&datalen, p, sizeof(datalen)); 2473 datalen = ntohs(datalen); 2474 2475 p = peer->rbuf->rptr; 2476 p += MSGSIZE_HEADER; /* header is already checked */ 2477 datalen -= MSGSIZE_HEADER; 2478 2479 memcpy(&errcode, p, sizeof(errcode)); 2480 p += sizeof(errcode); 2481 datalen -= sizeof(errcode); 2482 2483 memcpy(&subcode, p, sizeof(subcode)); 2484 p += sizeof(subcode); 2485 datalen -= sizeof(subcode); 2486 2487 log_notification(peer, errcode, subcode, p, datalen, "received"); 2488 peer->errcnt++; 2489 peer->stats.last_rcvd_errcode = errcode; 2490 peer->stats.last_rcvd_suberr = subcode; 2491 2492 if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { 2493 if (datalen == 0) { /* zebra likes to send those.. humbug */ 2494 log_peer_warnx(&peer->conf, "received \"unsupported " 2495 "capability\" notification without data part, " 2496 "disabling capability announcements altogether"); 2497 session_capa_ann_none(peer); 2498 } 2499 2500 while (datalen > 0) { 2501 if (datalen < 2) { 2502 log_peer_warnx(&peer->conf, 2503 "parse_notification: " 2504 "expect len >= 2, len is %u", datalen); 2505 return (-1); 2506 } 2507 memcpy(&capa_code, p, sizeof(capa_code)); 2508 p += sizeof(capa_code); 2509 datalen -= sizeof(capa_code); 2510 memcpy(&capa_len, p, sizeof(capa_len)); 2511 p += sizeof(capa_len); 2512 datalen -= sizeof(capa_len); 2513 if (datalen < capa_len) { 2514 log_peer_warnx(&peer->conf, 2515 "parse_notification: capa_len %u exceeds " 2516 "remaining msg length %u", capa_len, 2517 datalen); 2518 return (-1); 2519 } 2520 p += capa_len; 2521 datalen -= capa_len; 2522 switch (capa_code) { 2523 case CAPA_MP: 2524 for (i = 0; i < AID_MAX; i++) 2525 peer->capa.ann.mp[i] = 0; 2526 log_peer_warnx(&peer->conf, 2527 "disabling multiprotocol capability"); 2528 break; 2529 case CAPA_REFRESH: 2530 peer->capa.ann.refresh = 0; 2531 log_peer_warnx(&peer->conf, 2532 "disabling route refresh capability"); 2533 break; 2534 case CAPA_ROLE: 2535 if (peer->capa.ann.policy == 1) { 2536 peer->capa.ann.policy = 0; 2537 log_peer_warnx(&peer->conf, 2538 "disabling role capability"); 2539 } else { 2540 log_peer_warnx(&peer->conf, 2541 "role capability enforced, " 2542 "not disabling"); 2543 } 2544 break; 2545 case CAPA_RESTART: 2546 peer->capa.ann.grestart.restart = 0; 2547 log_peer_warnx(&peer->conf, 2548 "disabling restart capability"); 2549 break; 2550 case CAPA_AS4BYTE: 2551 peer->capa.ann.as4byte = 0; 2552 log_peer_warnx(&peer->conf, 2553 "disabling 4-byte AS num capability"); 2554 break; 2555 case CAPA_ADD_PATH: 2556 memset(peer->capa.ann.add_path, 0, 2557 sizeof(peer->capa.ann.add_path)); 2558 log_peer_warnx(&peer->conf, 2559 "disabling ADD-PATH capability"); 2560 break; 2561 case CAPA_ENHANCED_RR: 2562 peer->capa.ann.enhanced_rr = 0; 2563 log_peer_warnx(&peer->conf, 2564 "disabling enhanced route refresh " 2565 "capability"); 2566 break; 2567 default: /* should not happen... */ 2568 log_peer_warnx(&peer->conf, "received " 2569 "\"unsupported capability\" notification " 2570 "for unknown capability %u, disabling " 2571 "capability announcements altogether", 2572 capa_code); 2573 session_capa_ann_none(peer); 2574 break; 2575 } 2576 } 2577 2578 return (1); 2579 } 2580 2581 if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) { 2582 session_capa_ann_none(peer); 2583 return (1); 2584 } 2585 2586 if (errcode == ERR_CEASE && 2587 (subcode == ERR_CEASE_ADMIN_DOWN || 2588 subcode == ERR_CEASE_ADMIN_RESET)) { 2589 if (datalen > 1) { 2590 reason_len = *p++; 2591 datalen--; 2592 if (datalen < reason_len) { 2593 log_peer_warnx(&peer->conf, 2594 "received truncated shutdown reason"); 2595 return (0); 2596 } 2597 if (reason_len > REASON_LEN - 1) { 2598 log_peer_warnx(&peer->conf, 2599 "received overly long shutdown reason"); 2600 return (0); 2601 } 2602 memcpy(peer->stats.last_reason, p, reason_len); 2603 peer->stats.last_reason[reason_len] = '\0'; 2604 log_peer_warnx(&peer->conf, 2605 "received shutdown reason: \"%s\"", 2606 log_reason(peer->stats.last_reason)); 2607 p += reason_len; 2608 datalen -= reason_len; 2609 } 2610 } 2611 2612 return (0); 2613 } 2614 2615 int 2616 parse_capabilities(struct peer *peer, u_char *d, uint16_t dlen, uint32_t *as) 2617 { 2618 u_char *capa_val; 2619 uint32_t remote_as; 2620 uint16_t len; 2621 uint16_t afi; 2622 uint16_t gr_header; 2623 uint8_t safi; 2624 uint8_t aid; 2625 uint8_t flags; 2626 uint8_t capa_code; 2627 uint8_t capa_len; 2628 uint8_t i; 2629 2630 len = dlen; 2631 while (len > 0) { 2632 if (len < 2) { 2633 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2634 "length: %u, too short", len); 2635 return (-1); 2636 } 2637 memcpy(&capa_code, d, sizeof(capa_code)); 2638 d += sizeof(capa_code); 2639 len -= sizeof(capa_code); 2640 memcpy(&capa_len, d, sizeof(capa_len)); 2641 d += sizeof(capa_len); 2642 len -= sizeof(capa_len); 2643 if (capa_len > 0) { 2644 if (len < capa_len) { 2645 log_peer_warnx(&peer->conf, 2646 "Bad capabilities attr length: " 2647 "len %u smaller than capa_len %u", 2648 len, capa_len); 2649 return (-1); 2650 } 2651 capa_val = d; 2652 d += capa_len; 2653 len -= capa_len; 2654 } else 2655 capa_val = NULL; 2656 2657 switch (capa_code) { 2658 case CAPA_MP: /* RFC 4760 */ 2659 if (capa_len != 4) { 2660 log_peer_warnx(&peer->conf, 2661 "Bad multi protocol capability length: " 2662 "%u", capa_len); 2663 break; 2664 } 2665 memcpy(&afi, capa_val, sizeof(afi)); 2666 afi = ntohs(afi); 2667 memcpy(&safi, capa_val + 3, sizeof(safi)); 2668 if (afi2aid(afi, safi, &aid) == -1) { 2669 log_peer_warnx(&peer->conf, 2670 "Received multi protocol capability: " 2671 " unknown AFI %u, safi %u pair", 2672 afi, safi); 2673 break; 2674 } 2675 peer->capa.peer.mp[aid] = 1; 2676 break; 2677 case CAPA_REFRESH: 2678 peer->capa.peer.refresh = 1; 2679 break; 2680 case CAPA_ROLE: 2681 if (capa_len != 1) { 2682 log_peer_warnx(&peer->conf, 2683 "Bad role capability length: %u", capa_len); 2684 break; 2685 } 2686 if (!peer->conf.ebgp) { 2687 log_peer_warnx(&peer->conf, 2688 "Received role capability on iBGP session"); 2689 break; 2690 } 2691 peer->capa.peer.policy = 1; 2692 peer->remote_role = capa2role(*capa_val); 2693 break; 2694 case CAPA_RESTART: 2695 if (capa_len == 2) { 2696 /* peer only supports EoR marker */ 2697 peer->capa.peer.grestart.restart = 1; 2698 peer->capa.peer.grestart.timeout = 0; 2699 break; 2700 } else if (capa_len % 4 != 2) { 2701 log_peer_warnx(&peer->conf, 2702 "Bad graceful restart capability length: " 2703 "%u", capa_len); 2704 peer->capa.peer.grestart.restart = 0; 2705 peer->capa.peer.grestart.timeout = 0; 2706 break; 2707 } 2708 2709 memcpy(&gr_header, capa_val, sizeof(gr_header)); 2710 gr_header = ntohs(gr_header); 2711 peer->capa.peer.grestart.timeout = 2712 gr_header & CAPA_GR_TIMEMASK; 2713 if (peer->capa.peer.grestart.timeout == 0) { 2714 log_peer_warnx(&peer->conf, "Received " 2715 "graceful restart timeout is zero"); 2716 peer->capa.peer.grestart.restart = 0; 2717 break; 2718 } 2719 2720 for (i = 2; i <= capa_len - 4; i += 4) { 2721 memcpy(&afi, capa_val + i, sizeof(afi)); 2722 afi = ntohs(afi); 2723 safi = capa_val[i + 2]; 2724 flags = capa_val[i + 3]; 2725 if (afi2aid(afi, safi, &aid) == -1) { 2726 log_peer_warnx(&peer->conf, 2727 "Received graceful restart capa: " 2728 " unknown AFI %u, safi %u pair", 2729 afi, safi); 2730 continue; 2731 } 2732 peer->capa.peer.grestart.flags[aid] |= 2733 CAPA_GR_PRESENT; 2734 if (flags & CAPA_GR_F_FLAG) 2735 peer->capa.peer.grestart.flags[aid] |= 2736 CAPA_GR_FORWARD; 2737 if (gr_header & CAPA_GR_R_FLAG) 2738 peer->capa.peer.grestart.flags[aid] |= 2739 CAPA_GR_RESTART; 2740 peer->capa.peer.grestart.restart = 2; 2741 } 2742 break; 2743 case CAPA_AS4BYTE: 2744 if (capa_len != 4) { 2745 log_peer_warnx(&peer->conf, 2746 "Bad AS4BYTE capability length: " 2747 "%u", capa_len); 2748 peer->capa.peer.as4byte = 0; 2749 break; 2750 } 2751 memcpy(&remote_as, capa_val, sizeof(remote_as)); 2752 *as = ntohl(remote_as); 2753 if (*as == 0) { 2754 log_peer_warnx(&peer->conf, 2755 "peer requests unacceptable AS %u", *as); 2756 session_notification(peer, ERR_OPEN, 2757 ERR_OPEN_AS, NULL, 0); 2758 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2759 return (-1); 2760 } 2761 peer->capa.peer.as4byte = 1; 2762 break; 2763 case CAPA_ADD_PATH: 2764 if (capa_len % 4 != 0) { 2765 log_peer_warnx(&peer->conf, 2766 "Bad ADD-PATH capability length: " 2767 "%u", capa_len); 2768 memset(peer->capa.peer.add_path, 0, 2769 sizeof(peer->capa.peer.add_path)); 2770 break; 2771 } 2772 for (i = 0; i <= capa_len - 4; i += 4) { 2773 memcpy(&afi, capa_val + i, sizeof(afi)); 2774 afi = ntohs(afi); 2775 safi = capa_val[i + 2]; 2776 flags = capa_val[i + 3]; 2777 if (afi2aid(afi, safi, &aid) == -1) { 2778 log_peer_warnx(&peer->conf, 2779 "Received ADD-PATH capa: " 2780 " unknown AFI %u, safi %u pair", 2781 afi, safi); 2782 memset(peer->capa.peer.add_path, 0, 2783 sizeof(peer->capa.peer.add_path)); 2784 break; 2785 } 2786 if (flags & ~CAPA_AP_BIDIR) { 2787 log_peer_warnx(&peer->conf, 2788 "Received ADD-PATH capa: " 2789 " bad flags %x", flags); 2790 memset(peer->capa.peer.add_path, 0, 2791 sizeof(peer->capa.peer.add_path)); 2792 break; 2793 } 2794 peer->capa.peer.add_path[aid] = flags; 2795 } 2796 break; 2797 case CAPA_ENHANCED_RR: 2798 peer->capa.peer.enhanced_rr = 1; 2799 break; 2800 default: 2801 break; 2802 } 2803 } 2804 2805 return (0); 2806 } 2807 2808 int 2809 capa_neg_calc(struct peer *p, uint8_t *suberr) 2810 { 2811 uint8_t i, hasmp = 0; 2812 2813 /* a capability is accepted only if both sides announced it */ 2814 2815 p->capa.neg.refresh = 2816 (p->capa.ann.refresh && p->capa.peer.refresh) != 0; 2817 p->capa.neg.enhanced_rr = 2818 (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; 2819 2820 p->capa.neg.as4byte = 2821 (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; 2822 2823 /* MP: both side must agree on the AFI,SAFI pair */ 2824 for (i = 0; i < AID_MAX; i++) { 2825 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) 2826 p->capa.neg.mp[i] = 1; 2827 else 2828 p->capa.neg.mp[i] = 0; 2829 if (p->capa.ann.mp[i]) 2830 hasmp = 1; 2831 } 2832 /* if no MP capability present default to IPv4 unicast mode */ 2833 if (!hasmp) 2834 p->capa.neg.mp[AID_INET] = 1; 2835 2836 /* 2837 * graceful restart: the peer capabilities are of interest here. 2838 * It is necessary to compare the new values with the previous ones 2839 * and act accordingly. AFI/SAFI that are not part in the MP capability 2840 * are treated as not being present. 2841 * Also make sure that a flush happens if the session stopped 2842 * supporting graceful restart. 2843 */ 2844 2845 for (i = 0; i < AID_MAX; i++) { 2846 int8_t negflags; 2847 2848 /* disable GR if the AFI/SAFI is not present */ 2849 if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2850 p->capa.neg.mp[i] == 0)) 2851 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2852 /* look at current GR state and decide what to do */ 2853 negflags = p->capa.neg.grestart.flags[i]; 2854 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2855 if (negflags & CAPA_GR_RESTARTING) { 2856 if (p->capa.ann.grestart.restart != 0 && 2857 p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { 2858 p->capa.neg.grestart.flags[i] |= 2859 CAPA_GR_RESTARTING; 2860 } else { 2861 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 2862 &i, sizeof(i)) == -1) { 2863 log_peer_warnx(&p->conf, 2864 "imsg send failed"); 2865 return (-1); 2866 } 2867 log_peer_warnx(&p->conf, "graceful restart of " 2868 "%s, not restarted, flushing", aid2str(i)); 2869 } 2870 } 2871 } 2872 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2873 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2874 if (p->capa.ann.grestart.restart == 0) 2875 p->capa.neg.grestart.restart = 0; 2876 2877 2878 /* 2879 * ADD-PATH: set only those bits where both sides agree. 2880 * For this compare our send bit with the recv bit from the peer 2881 * and vice versa. 2882 * The flags are stored from this systems view point. 2883 */ 2884 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); 2885 if (p->capa.ann.add_path[0]) { 2886 for (i = AID_MIN; i < AID_MAX; i++) { 2887 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) && 2888 (p->capa.peer.add_path[i] & CAPA_AP_SEND)) { 2889 p->capa.neg.add_path[i] |= CAPA_AP_RECV; 2890 p->capa.neg.add_path[0] |= CAPA_AP_RECV; 2891 } 2892 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) && 2893 (p->capa.peer.add_path[i] & CAPA_AP_RECV)) { 2894 p->capa.neg.add_path[i] |= CAPA_AP_SEND; 2895 p->capa.neg.add_path[0] |= CAPA_AP_SEND; 2896 } 2897 } 2898 } 2899 2900 /* 2901 * Open policy: check that the policy is sensible. 2902 * 2903 * Make sure that the roles match and set the negotiated capability 2904 * to the role of the peer. So the RDE can inject the OTC attribute. 2905 * See RFC 9234, section 4.2. 2906 * These checks should only happen on ebgp sessions. 2907 */ 2908 if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 && 2909 p->conf.ebgp) { 2910 switch (p->conf.role) { 2911 case ROLE_PROVIDER: 2912 if (p->remote_role != ROLE_CUSTOMER) 2913 goto fail; 2914 break; 2915 case ROLE_RS: 2916 if (p->remote_role != ROLE_RS_CLIENT) 2917 goto fail; 2918 break; 2919 case ROLE_RS_CLIENT: 2920 if (p->remote_role != ROLE_RS) 2921 goto fail; 2922 break; 2923 case ROLE_CUSTOMER: 2924 if (p->remote_role != ROLE_PROVIDER) 2925 goto fail; 2926 break; 2927 case ROLE_PEER: 2928 if (p->remote_role != ROLE_PEER) 2929 goto fail; 2930 break; 2931 default: 2932 fail: 2933 log_peer_warnx(&p->conf, "open policy role mismatch: " 2934 "our role %s, their role %s", 2935 log_policy(p->conf.role), 2936 log_policy(p->remote_role)); 2937 *suberr = ERR_OPEN_ROLE; 2938 return (-1); 2939 } 2940 p->capa.neg.policy = 1; 2941 } else if (p->capa.ann.policy == 2 && p->conf.ebgp) { 2942 /* enforce presence of open policy role capability */ 2943 log_peer_warnx(&p->conf, "open policy role enforced but " 2944 "not present"); 2945 *suberr = ERR_OPEN_ROLE; 2946 return (-1); 2947 } 2948 2949 return (0); 2950 } 2951 2952 void 2953 session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt) 2954 { 2955 struct imsg imsg; 2956 struct mrt xmrt; 2957 struct route_refresh rr; 2958 struct mrt *mrt; 2959 struct imsgbuf *i; 2960 struct peer *p; 2961 struct listen_addr *la, *nla; 2962 struct session_dependon *sdon; 2963 u_char *data; 2964 int n, fd, depend_ok, restricted; 2965 uint16_t t; 2966 uint8_t aid, errcode, subcode; 2967 2968 while (imsgbuf) { 2969 if ((n = imsg_get(imsgbuf, &imsg)) == -1) 2970 fatal("session_dispatch_imsg: imsg_get error"); 2971 2972 if (n == 0) 2973 break; 2974 2975 switch (imsg.hdr.type) { 2976 case IMSG_SOCKET_CONN: 2977 case IMSG_SOCKET_CONN_CTL: 2978 if (idx != PFD_PIPE_MAIN) 2979 fatalx("reconf request not from parent"); 2980 if ((fd = imsg.fd) == -1) { 2981 log_warnx("expected to receive imsg fd to " 2982 "RDE but didn't receive any"); 2983 break; 2984 } 2985 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2986 fatal(NULL); 2987 imsg_init(i, fd); 2988 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 2989 if (ibuf_rde) { 2990 log_warnx("Unexpected imsg connection " 2991 "to RDE received"); 2992 msgbuf_clear(&ibuf_rde->w); 2993 free(ibuf_rde); 2994 } 2995 ibuf_rde = i; 2996 } else { 2997 if (ibuf_rde_ctl) { 2998 log_warnx("Unexpected imsg ctl " 2999 "connection to RDE received"); 3000 msgbuf_clear(&ibuf_rde_ctl->w); 3001 free(ibuf_rde_ctl); 3002 } 3003 ibuf_rde_ctl = i; 3004 } 3005 break; 3006 case IMSG_RECONF_CONF: 3007 if (idx != PFD_PIPE_MAIN) 3008 fatalx("reconf request not from parent"); 3009 nconf = new_config(); 3010 3011 copy_config(nconf, imsg.data); 3012 pending_reconf = 1; 3013 break; 3014 case IMSG_RECONF_PEER: 3015 if (idx != PFD_PIPE_MAIN) 3016 fatalx("reconf request not from parent"); 3017 if ((p = calloc(1, sizeof(struct peer))) == NULL) 3018 fatal("new_peer"); 3019 memcpy(&p->conf, imsg.data, sizeof(struct peer_config)); 3020 p->state = p->prev_state = STATE_NONE; 3021 p->reconf_action = RECONF_REINIT; 3022 if (RB_INSERT(peer_head, &nconf->peers, p) != NULL) 3023 fatalx("%s: peer tree is corrupt", __func__); 3024 break; 3025 case IMSG_RECONF_LISTENER: 3026 if (idx != PFD_PIPE_MAIN) 3027 fatalx("reconf request not from parent"); 3028 if (nconf == NULL) 3029 fatalx("IMSG_RECONF_LISTENER but no config"); 3030 nla = imsg.data; 3031 TAILQ_FOREACH(la, conf->listen_addrs, entry) 3032 if (!la_cmp(la, nla)) 3033 break; 3034 3035 if (la == NULL) { 3036 if (nla->reconf != RECONF_REINIT) 3037 fatalx("king bula sez: " 3038 "expected REINIT"); 3039 3040 if ((nla->fd = imsg.fd) == -1) 3041 log_warnx("expected to receive fd for " 3042 "%s but didn't receive any", 3043 log_sockaddr((struct sockaddr *) 3044 &nla->sa, nla->sa_len)); 3045 3046 la = calloc(1, sizeof(struct listen_addr)); 3047 if (la == NULL) 3048 fatal(NULL); 3049 memcpy(&la->sa, &nla->sa, sizeof(la->sa)); 3050 la->flags = nla->flags; 3051 la->fd = nla->fd; 3052 la->reconf = RECONF_REINIT; 3053 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 3054 entry); 3055 } else { 3056 if (nla->reconf != RECONF_KEEP) 3057 fatalx("king bula sez: expected KEEP"); 3058 la->reconf = RECONF_KEEP; 3059 } 3060 3061 break; 3062 case IMSG_RECONF_CTRL: 3063 if (idx != PFD_PIPE_MAIN) 3064 fatalx("reconf request not from parent"); 3065 if (imsg.hdr.len != IMSG_HEADER_SIZE + 3066 sizeof(restricted)) 3067 fatalx("RECONF_CTRL imsg with wrong len"); 3068 memcpy(&restricted, imsg.data, sizeof(restricted)); 3069 if (imsg.fd == -1) { 3070 log_warnx("expected to receive fd for control " 3071 "socket but didn't receive any"); 3072 break; 3073 } 3074 if (restricted) { 3075 control_shutdown(rcsock); 3076 rcsock = imsg.fd; 3077 } else { 3078 control_shutdown(csock); 3079 csock = imsg.fd; 3080 } 3081 break; 3082 case IMSG_RECONF_DRAIN: 3083 switch (idx) { 3084 case PFD_PIPE_ROUTE: 3085 if (nconf != NULL) 3086 fatalx("got unexpected %s from RDE", 3087 "IMSG_RECONF_DONE"); 3088 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 3089 -1, NULL, 0); 3090 break; 3091 case PFD_PIPE_MAIN: 3092 if (nconf == NULL) 3093 fatalx("got unexpected %s from parent", 3094 "IMSG_RECONF_DONE"); 3095 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 3096 -1, NULL, 0); 3097 break; 3098 default: 3099 fatalx("reconf request not from parent or RDE"); 3100 } 3101 break; 3102 case IMSG_RECONF_DONE: 3103 if (idx != PFD_PIPE_MAIN) 3104 fatalx("reconf request not from parent"); 3105 if (nconf == NULL) 3106 fatalx("got IMSG_RECONF_DONE but no config"); 3107 copy_config(conf, nconf); 3108 merge_peers(conf, nconf); 3109 3110 /* delete old listeners */ 3111 for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; 3112 la = nla) { 3113 nla = TAILQ_NEXT(la, entry); 3114 if (la->reconf == RECONF_NONE) { 3115 log_info("not listening on %s any more", 3116 log_sockaddr((struct sockaddr *) 3117 &la->sa, la->sa_len)); 3118 TAILQ_REMOVE(conf->listen_addrs, la, 3119 entry); 3120 close(la->fd); 3121 free(la); 3122 } 3123 } 3124 3125 /* add new listeners */ 3126 TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs, 3127 entry); 3128 3129 setup_listeners(listener_cnt); 3130 free_config(nconf); 3131 nconf = NULL; 3132 pending_reconf = 0; 3133 log_info("SE reconfigured"); 3134 /* 3135 * IMSG_RECONF_DONE is sent when the RDE drained 3136 * the peer config sent in merge_peers(). 3137 */ 3138 break; 3139 case IMSG_SESSION_DEPENDON: 3140 if (idx != PFD_PIPE_MAIN) 3141 fatalx("IFINFO message not from parent"); 3142 if (imsg.hdr.len != IMSG_HEADER_SIZE + 3143 sizeof(struct session_dependon)) 3144 fatalx("DEPENDON imsg with wrong len"); 3145 sdon = imsg.data; 3146 depend_ok = sdon->depend_state; 3147 3148 RB_FOREACH(p, peer_head, &conf->peers) 3149 if (!strcmp(p->conf.if_depend, sdon->ifname)) { 3150 if (depend_ok && !p->depend_ok) { 3151 p->depend_ok = depend_ok; 3152 bgp_fsm(p, EVNT_START); 3153 } else if (!depend_ok && p->depend_ok) { 3154 p->depend_ok = depend_ok; 3155 session_stop(p, 3156 ERR_CEASE_OTHER_CHANGE); 3157 } 3158 } 3159 break; 3160 case IMSG_MRT_OPEN: 3161 case IMSG_MRT_REOPEN: 3162 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3163 sizeof(struct mrt)) { 3164 log_warnx("wrong imsg len"); 3165 break; 3166 } 3167 3168 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 3169 if ((xmrt.wbuf.fd = imsg.fd) == -1) 3170 log_warnx("expected to receive fd for mrt dump " 3171 "but didn't receive any"); 3172 3173 mrt = mrt_get(&mrthead, &xmrt); 3174 if (mrt == NULL) { 3175 /* new dump */ 3176 mrt = calloc(1, sizeof(struct mrt)); 3177 if (mrt == NULL) 3178 fatal("session_dispatch_imsg"); 3179 memcpy(mrt, &xmrt, sizeof(struct mrt)); 3180 TAILQ_INIT(&mrt->wbuf.bufs); 3181 LIST_INSERT_HEAD(&mrthead, mrt, entry); 3182 } else { 3183 /* old dump reopened */ 3184 close(mrt->wbuf.fd); 3185 mrt->wbuf.fd = xmrt.wbuf.fd; 3186 } 3187 break; 3188 case IMSG_MRT_CLOSE: 3189 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3190 sizeof(struct mrt)) { 3191 log_warnx("wrong imsg len"); 3192 break; 3193 } 3194 3195 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 3196 mrt = mrt_get(&mrthead, &xmrt); 3197 if (mrt != NULL) 3198 mrt_done(mrt); 3199 break; 3200 case IMSG_CTL_KROUTE: 3201 case IMSG_CTL_KROUTE_ADDR: 3202 case IMSG_CTL_SHOW_NEXTHOP: 3203 case IMSG_CTL_SHOW_INTERFACE: 3204 case IMSG_CTL_SHOW_FIB_TABLES: 3205 case IMSG_CTL_SHOW_RTR: 3206 case IMSG_CTL_SHOW_TIMER: 3207 if (idx != PFD_PIPE_MAIN) 3208 fatalx("ctl kroute request not from parent"); 3209 control_imsg_relay(&imsg, NULL); 3210 break; 3211 case IMSG_CTL_SHOW_NEIGHBOR: 3212 if (idx != PFD_PIPE_ROUTE_CTL) 3213 fatalx("ctl rib request not from RDE"); 3214 p = getpeerbyid(conf, imsg.hdr.peerid); 3215 control_imsg_relay(&imsg, p); 3216 break; 3217 case IMSG_CTL_SHOW_RIB: 3218 case IMSG_CTL_SHOW_RIB_PREFIX: 3219 case IMSG_CTL_SHOW_RIB_COMMUNITIES: 3220 case IMSG_CTL_SHOW_RIB_ATTR: 3221 case IMSG_CTL_SHOW_RIB_MEM: 3222 case IMSG_CTL_SHOW_NETWORK: 3223 case IMSG_CTL_SHOW_FLOWSPEC: 3224 case IMSG_CTL_SHOW_SET: 3225 if (idx != PFD_PIPE_ROUTE_CTL) 3226 fatalx("ctl rib request not from RDE"); 3227 control_imsg_relay(&imsg, NULL); 3228 break; 3229 case IMSG_CTL_END: 3230 case IMSG_CTL_RESULT: 3231 control_imsg_relay(&imsg, NULL); 3232 break; 3233 case IMSG_UPDATE: 3234 if (idx != PFD_PIPE_ROUTE) 3235 fatalx("update request not from RDE"); 3236 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3237 MAX_PKTSIZE - MSGSIZE_HEADER || 3238 imsg.hdr.len < IMSG_HEADER_SIZE + 3239 MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 3240 log_warnx("RDE sent invalid update"); 3241 else 3242 session_update(imsg.hdr.peerid, imsg.data, 3243 imsg.hdr.len - IMSG_HEADER_SIZE); 3244 break; 3245 case IMSG_UPDATE_ERR: 3246 if (idx != PFD_PIPE_ROUTE) 3247 fatalx("update request not from RDE"); 3248 if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) { 3249 log_warnx("RDE sent invalid notification"); 3250 break; 3251 } 3252 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3253 log_warnx("no such peer: id=%u", 3254 imsg.hdr.peerid); 3255 break; 3256 } 3257 data = imsg.data; 3258 errcode = *data++; 3259 subcode = *data++; 3260 3261 if (imsg.hdr.len == IMSG_HEADER_SIZE + 2) 3262 data = NULL; 3263 3264 session_notification(p, errcode, subcode, 3265 data, imsg.hdr.len - IMSG_HEADER_SIZE - 2); 3266 switch (errcode) { 3267 case ERR_CEASE: 3268 switch (subcode) { 3269 case ERR_CEASE_MAX_PREFIX: 3270 case ERR_CEASE_MAX_SENT_PREFIX: 3271 t = p->conf.max_out_prefix_restart; 3272 if (subcode == ERR_CEASE_MAX_PREFIX) 3273 t = p->conf.max_prefix_restart; 3274 3275 bgp_fsm(p, EVNT_STOP); 3276 if (t) 3277 timer_set(&p->timers, 3278 Timer_IdleHold, 60 * t); 3279 break; 3280 default: 3281 bgp_fsm(p, EVNT_CON_FATAL); 3282 break; 3283 } 3284 break; 3285 default: 3286 bgp_fsm(p, EVNT_CON_FATAL); 3287 break; 3288 } 3289 break; 3290 case IMSG_REFRESH: 3291 if (idx != PFD_PIPE_ROUTE) 3292 fatalx("route refresh request not from RDE"); 3293 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(rr)) { 3294 log_warnx("RDE sent invalid refresh msg"); 3295 break; 3296 } 3297 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3298 log_warnx("no such peer: id=%u", 3299 imsg.hdr.peerid); 3300 break; 3301 } 3302 memcpy(&rr, imsg.data, sizeof(rr)); 3303 if (rr.aid >= AID_MAX) 3304 fatalx("IMSG_REFRESH: bad AID"); 3305 session_rrefresh(p, rr.aid, rr.subtype); 3306 break; 3307 case IMSG_SESSION_RESTARTED: 3308 if (idx != PFD_PIPE_ROUTE) 3309 fatalx("update request not from RDE"); 3310 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { 3311 log_warnx("RDE sent invalid restart msg"); 3312 break; 3313 } 3314 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3315 log_warnx("no such peer: id=%u", 3316 imsg.hdr.peerid); 3317 break; 3318 } 3319 memcpy(&aid, imsg.data, sizeof(aid)); 3320 if (aid >= AID_MAX) 3321 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 3322 if (p->capa.neg.grestart.flags[aid] & 3323 CAPA_GR_RESTARTING) { 3324 log_peer_warnx(&p->conf, 3325 "graceful restart of %s finished", 3326 aid2str(aid)); 3327 p->capa.neg.grestart.flags[aid] &= 3328 ~CAPA_GR_RESTARTING; 3329 timer_stop(&p->timers, Timer_RestartTimeout); 3330 3331 /* signal back to RDE to cleanup stale routes */ 3332 if (imsg_rde(IMSG_SESSION_RESTARTED, 3333 imsg.hdr.peerid, &aid, sizeof(aid)) == -1) 3334 fatal("imsg_compose: " 3335 "IMSG_SESSION_RESTARTED"); 3336 } 3337 break; 3338 case IMSG_SESSION_DOWN: 3339 if (idx != PFD_PIPE_ROUTE) 3340 fatalx("update request not from RDE"); 3341 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3342 log_warnx("no such peer: id=%u", 3343 imsg.hdr.peerid); 3344 break; 3345 } 3346 session_stop(p, ERR_CEASE_ADMIN_DOWN); 3347 break; 3348 default: 3349 break; 3350 } 3351 imsg_free(&imsg); 3352 } 3353 } 3354 3355 int 3356 la_cmp(struct listen_addr *a, struct listen_addr *b) 3357 { 3358 struct sockaddr_in *in_a, *in_b; 3359 struct sockaddr_in6 *in6_a, *in6_b; 3360 3361 if (a->sa.ss_family != b->sa.ss_family) 3362 return (1); 3363 3364 switch (a->sa.ss_family) { 3365 case AF_INET: 3366 in_a = (struct sockaddr_in *)&a->sa; 3367 in_b = (struct sockaddr_in *)&b->sa; 3368 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 3369 return (1); 3370 if (in_a->sin_port != in_b->sin_port) 3371 return (1); 3372 break; 3373 case AF_INET6: 3374 in6_a = (struct sockaddr_in6 *)&a->sa; 3375 in6_b = (struct sockaddr_in6 *)&b->sa; 3376 if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 3377 sizeof(struct in6_addr))) 3378 return (1); 3379 if (in6_a->sin6_port != in6_b->sin6_port) 3380 return (1); 3381 break; 3382 default: 3383 fatal("king bula sez: unknown address family"); 3384 /* NOTREACHED */ 3385 } 3386 3387 return (0); 3388 } 3389 3390 struct peer * 3391 getpeerbydesc(struct bgpd_config *c, const char *descr) 3392 { 3393 struct peer *p, *res = NULL; 3394 int match = 0; 3395 3396 RB_FOREACH(p, peer_head, &c->peers) 3397 if (!strcmp(p->conf.descr, descr)) { 3398 res = p; 3399 match++; 3400 } 3401 3402 if (match > 1) 3403 log_info("neighbor description \"%s\" not unique, request " 3404 "aborted", descr); 3405 3406 if (match == 1) 3407 return (res); 3408 else 3409 return (NULL); 3410 } 3411 3412 struct peer * 3413 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip) 3414 { 3415 struct bgpd_addr addr; 3416 struct peer *p, *newpeer, *loose = NULL; 3417 uint32_t id; 3418 3419 sa2addr(ip, &addr, NULL); 3420 3421 /* we might want a more effective way to find peers by IP */ 3422 RB_FOREACH(p, peer_head, &c->peers) 3423 if (!p->conf.template && 3424 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3425 return (p); 3426 3427 /* try template matching */ 3428 RB_FOREACH(p, peer_head, &c->peers) 3429 if (p->conf.template && 3430 p->conf.remote_addr.aid == addr.aid && 3431 session_match_mask(p, &addr)) 3432 if (loose == NULL || loose->conf.remote_masklen < 3433 p->conf.remote_masklen) 3434 loose = p; 3435 3436 if (loose != NULL) { 3437 /* clone */ 3438 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3439 fatal(NULL); 3440 memcpy(newpeer, loose, sizeof(struct peer)); 3441 for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) { 3442 if (getpeerbyid(c, id) == NULL) /* we found a free id */ 3443 break; 3444 } 3445 newpeer->template = loose; 3446 session_template_clone(newpeer, ip, id, 0); 3447 newpeer->state = newpeer->prev_state = STATE_NONE; 3448 newpeer->reconf_action = RECONF_KEEP; 3449 newpeer->rbuf = NULL; 3450 newpeer->rpending = 0; 3451 init_peer(newpeer); 3452 bgp_fsm(newpeer, EVNT_START); 3453 if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL) 3454 fatalx("%s: peer tree is corrupt", __func__); 3455 return (newpeer); 3456 } 3457 3458 return (NULL); 3459 } 3460 3461 struct peer * 3462 getpeerbyid(struct bgpd_config *c, uint32_t peerid) 3463 { 3464 static struct peer lookup; 3465 3466 lookup.conf.id = peerid; 3467 3468 return RB_FIND(peer_head, &c->peers, &lookup); 3469 } 3470 3471 int 3472 peer_matched(struct peer *p, struct ctl_neighbor *n) 3473 { 3474 char *s; 3475 3476 if (n && n->addr.aid) { 3477 if (memcmp(&p->conf.remote_addr, &n->addr, 3478 sizeof(p->conf.remote_addr))) 3479 return 0; 3480 } else if (n && n->descr[0]) { 3481 s = n->is_group ? p->conf.group : p->conf.descr; 3482 /* cannot trust n->descr to be properly terminated */ 3483 if (strncmp(s, n->descr, sizeof(n->descr))) 3484 return 0; 3485 } 3486 return 1; 3487 } 3488 3489 void 3490 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id, 3491 uint32_t as) 3492 { 3493 struct bgpd_addr remote_addr; 3494 3495 if (ip) 3496 sa2addr(ip, &remote_addr, NULL); 3497 else 3498 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3499 3500 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3501 3502 p->conf.id = id; 3503 3504 if (as) { 3505 p->conf.remote_as = as; 3506 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as); 3507 if (!p->conf.ebgp) 3508 /* force enforce_as off for iBGP sessions */ 3509 p->conf.enforce_as = ENFORCE_AS_OFF; 3510 } 3511 3512 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3513 switch (p->conf.remote_addr.aid) { 3514 case AID_INET: 3515 p->conf.remote_masklen = 32; 3516 break; 3517 case AID_INET6: 3518 p->conf.remote_masklen = 128; 3519 break; 3520 } 3521 p->conf.template = 0; 3522 } 3523 3524 int 3525 session_match_mask(struct peer *p, struct bgpd_addr *a) 3526 { 3527 struct bgpd_addr masked; 3528 3529 applymask(&masked, a, p->conf.remote_masklen); 3530 if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0) 3531 return (1); 3532 return (0); 3533 } 3534 3535 void 3536 session_down(struct peer *peer) 3537 { 3538 memset(&peer->capa.neg, 0, sizeof(peer->capa.neg)); 3539 peer->stats.last_updown = getmonotime(); 3540 /* 3541 * session_down is called in the exit code path so check 3542 * if the RDE is still around, if not there is no need to 3543 * send the message. 3544 */ 3545 if (ibuf_rde == NULL) 3546 return; 3547 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1) 3548 fatalx("imsg_compose error"); 3549 } 3550 3551 void 3552 session_up(struct peer *p) 3553 { 3554 struct session_up sup; 3555 3556 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3557 &p->conf, sizeof(p->conf)) == -1) 3558 fatalx("imsg_compose error"); 3559 3560 if (p->local.aid == AID_INET) { 3561 sup.local_v4_addr = p->local; 3562 sup.local_v6_addr = p->local_alt; 3563 } else { 3564 sup.local_v6_addr = p->local; 3565 sup.local_v4_addr = p->local_alt; 3566 } 3567 sup.remote_addr = p->remote; 3568 sup.if_scope = p->if_scope; 3569 3570 sup.remote_bgpid = p->remote_bgpid; 3571 sup.short_as = p->short_as; 3572 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3573 p->stats.last_updown = getmonotime(); 3574 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1) 3575 fatalx("imsg_compose error"); 3576 } 3577 3578 int 3579 imsg_ctl_parent(int type, uint32_t peerid, pid_t pid, void *data, 3580 uint16_t datalen) 3581 { 3582 return imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen); 3583 } 3584 3585 int 3586 imsg_ctl_rde(int type, uint32_t peerid, pid_t pid, void *data, uint16_t datalen) 3587 { 3588 if (ibuf_rde_ctl == NULL) 3589 return (0); 3590 3591 /* 3592 * Use control socket to talk to RDE to bypass the queue of the 3593 * regular imsg socket. 3594 */ 3595 return imsg_compose(ibuf_rde_ctl, type, peerid, pid, -1, data, datalen); 3596 } 3597 3598 int 3599 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen) 3600 { 3601 if (ibuf_rde == NULL) 3602 return (0); 3603 3604 return imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen); 3605 } 3606 3607 void 3608 session_demote(struct peer *p, int level) 3609 { 3610 struct demote_msg msg; 3611 3612 strlcpy(msg.demote_group, p->conf.demote_group, 3613 sizeof(msg.demote_group)); 3614 msg.level = level; 3615 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3616 &msg, sizeof(msg)) == -1) 3617 fatalx("imsg_compose error"); 3618 3619 p->demoted += level; 3620 } 3621 3622 void 3623 session_stop(struct peer *peer, uint8_t subcode) 3624 { 3625 char data[REASON_LEN]; 3626 size_t datalen; 3627 size_t reason_len; 3628 char *communication; 3629 3630 datalen = 0; 3631 communication = peer->conf.reason; 3632 3633 if ((subcode == ERR_CEASE_ADMIN_DOWN || 3634 subcode == ERR_CEASE_ADMIN_RESET) 3635 && communication && *communication) { 3636 reason_len = strlen(communication); 3637 if (reason_len > REASON_LEN - 1) { 3638 log_peer_warnx(&peer->conf, 3639 "trying to send overly long shutdown reason"); 3640 } else { 3641 data[0] = reason_len; 3642 datalen = reason_len + sizeof(data[0]); 3643 memcpy(data + 1, communication, reason_len); 3644 } 3645 } 3646 switch (peer->state) { 3647 case STATE_OPENSENT: 3648 case STATE_OPENCONFIRM: 3649 case STATE_ESTABLISHED: 3650 session_notification(peer, ERR_CEASE, subcode, data, datalen); 3651 break; 3652 default: 3653 /* session not open, no need to send notification */ 3654 break; 3655 } 3656 bgp_fsm(peer, EVNT_STOP); 3657 } 3658 3659 void 3660 merge_peers(struct bgpd_config *c, struct bgpd_config *nc) 3661 { 3662 struct peer *p, *np, *next; 3663 3664 RB_FOREACH(p, peer_head, &c->peers) { 3665 /* templates are handled specially */ 3666 if (p->template != NULL) 3667 continue; 3668 np = getpeerbyid(nc, p->conf.id); 3669 if (np == NULL) { 3670 p->reconf_action = RECONF_DELETE; 3671 continue; 3672 } 3673 3674 /* peer no longer uses TCP MD5SIG so deconfigure */ 3675 if (p->conf.auth.method == AUTH_MD5SIG && 3676 np->conf.auth.method != AUTH_MD5SIG) 3677 tcp_md5_del_listener(c, p); 3678 else if (np->conf.auth.method == AUTH_MD5SIG) 3679 tcp_md5_add_listener(c, np); 3680 3681 memcpy(&p->conf, &np->conf, sizeof(p->conf)); 3682 RB_REMOVE(peer_head, &nc->peers, np); 3683 free(np); 3684 3685 p->reconf_action = RECONF_KEEP; 3686 3687 /* had demotion, is demoted, demote removed? */ 3688 if (p->demoted && !p->conf.demote_group[0]) 3689 session_demote(p, -1); 3690 3691 /* if session is not open then refresh pfkey data */ 3692 if (p->state < STATE_OPENSENT && !p->template) 3693 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 3694 p->conf.id, 0, -1, NULL, 0); 3695 3696 /* sync the RDE in case we keep the peer */ 3697 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3698 &p->conf, sizeof(struct peer_config)) == -1) 3699 fatalx("imsg_compose error"); 3700 3701 /* apply the config to all clones of a template */ 3702 if (p->conf.template) { 3703 struct peer *xp; 3704 RB_FOREACH(xp, peer_head, &c->peers) { 3705 if (xp->template != p) 3706 continue; 3707 session_template_clone(xp, NULL, xp->conf.id, 3708 xp->conf.remote_as); 3709 if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id, 3710 &xp->conf, sizeof(xp->conf)) == -1) 3711 fatalx("imsg_compose error"); 3712 } 3713 } 3714 } 3715 3716 if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1) 3717 fatalx("imsg_compose error"); 3718 3719 /* pfkeys of new peers already loaded by the parent process */ 3720 RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) { 3721 RB_REMOVE(peer_head, &nc->peers, np); 3722 if (RB_INSERT(peer_head, &c->peers, np) != NULL) 3723 fatalx("%s: peer tree is corrupt", __func__); 3724 if (np->conf.auth.method == AUTH_MD5SIG) 3725 tcp_md5_add_listener(c, np); 3726 } 3727 } 3728