1 /* $OpenBSD: session.c,v 1.439 2023/01/04 14:33:30 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 22 #include <sys/mman.h> 23 #include <sys/socket.h> 24 #include <sys/time.h> 25 #include <sys/resource.h> 26 #include <sys/un.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <ifaddrs.h> 37 #include <poll.h> 38 #include <pwd.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <syslog.h> 44 #include <unistd.h> 45 46 #include "bgpd.h" 47 #include "session.h" 48 #include "log.h" 49 50 #define PFD_PIPE_MAIN 0 51 #define PFD_PIPE_ROUTE 1 52 #define PFD_PIPE_ROUTE_CTL 2 53 #define PFD_SOCK_CTL 3 54 #define PFD_SOCK_RCTL 4 55 #define PFD_LISTENERS_START 5 56 57 void session_sighdlr(int); 58 int setup_listeners(u_int *); 59 void init_peer(struct peer *); 60 void start_timer_holdtime(struct peer *); 61 void start_timer_keepalive(struct peer *); 62 void session_close_connection(struct peer *); 63 void change_state(struct peer *, enum session_state, enum session_events); 64 int session_setup_socket(struct peer *); 65 void session_accept(int); 66 int session_connect(struct peer *); 67 void session_tcp_established(struct peer *); 68 void session_capa_ann_none(struct peer *); 69 int session_capa_add(struct ibuf *, uint8_t, uint8_t); 70 int session_capa_add_mp(struct ibuf *, uint8_t); 71 int session_capa_add_afi(struct peer *, struct ibuf *, uint8_t, uint8_t); 72 struct bgp_msg *session_newmsg(enum msg_type, uint16_t); 73 int session_sendmsg(struct bgp_msg *, struct peer *); 74 void session_open(struct peer *); 75 void session_keepalive(struct peer *); 76 void session_update(uint32_t, void *, size_t); 77 void session_notification(struct peer *, uint8_t, uint8_t, void *, 78 ssize_t); 79 void session_rrefresh(struct peer *, uint8_t, uint8_t); 80 int session_graceful_restart(struct peer *); 81 int session_graceful_stop(struct peer *); 82 int session_dispatch_msg(struct pollfd *, struct peer *); 83 void session_process_msg(struct peer *); 84 int parse_header(struct peer *, u_char *, uint16_t *, uint8_t *); 85 int parse_open(struct peer *); 86 int parse_update(struct peer *); 87 int parse_rrefresh(struct peer *); 88 int parse_notification(struct peer *); 89 int parse_capabilities(struct peer *, u_char *, uint16_t, uint32_t *); 90 int capa_neg_calc(struct peer *, uint8_t *); 91 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 92 void session_up(struct peer *); 93 void session_down(struct peer *); 94 int imsg_rde(int, uint32_t, void *, uint16_t); 95 void session_demote(struct peer *, int); 96 void merge_peers(struct bgpd_config *, struct bgpd_config *); 97 98 int la_cmp(struct listen_addr *, struct listen_addr *); 99 void session_template_clone(struct peer *, struct sockaddr *, 100 uint32_t, uint32_t); 101 int session_match_mask(struct peer *, struct bgpd_addr *); 102 103 static struct bgpd_config *conf, *nconf; 104 static struct imsgbuf *ibuf_rde; 105 static struct imsgbuf *ibuf_rde_ctl; 106 static struct imsgbuf *ibuf_main; 107 108 struct bgpd_sysdep sysdep; 109 volatile sig_atomic_t session_quit; 110 int pending_reconf; 111 int csock = -1, rcsock = -1; 112 u_int peer_cnt; 113 114 struct mrt_head mrthead; 115 time_t pauseaccept; 116 117 static inline int 118 peer_compare(const struct peer *a, const struct peer *b) 119 { 120 return a->conf.id - b->conf.id; 121 } 122 123 RB_GENERATE(peer_head, peer, entry, peer_compare); 124 125 void 126 session_sighdlr(int sig) 127 { 128 switch (sig) { 129 case SIGINT: 130 case SIGTERM: 131 session_quit = 1; 132 break; 133 } 134 } 135 136 int 137 setup_listeners(u_int *la_cnt) 138 { 139 int ttl = 255; 140 struct listen_addr *la; 141 u_int cnt = 0; 142 143 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 144 la->reconf = RECONF_NONE; 145 cnt++; 146 147 if (la->flags & LISTENER_LISTENING) 148 continue; 149 150 if (la->fd == -1) { 151 log_warn("cannot establish listener on %s: invalid fd", 152 log_sockaddr((struct sockaddr *)&la->sa, 153 la->sa_len)); 154 continue; 155 } 156 157 if (tcp_md5_prep_listener(la, &conf->peers) == -1) 158 fatal("tcp_md5_prep_listener"); 159 160 /* set ttl to 255 so that ttl-security works */ 161 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 162 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 163 log_warn("setup_listeners setsockopt TTL"); 164 continue; 165 } 166 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 167 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 168 log_warn("setup_listeners setsockopt hoplimit"); 169 continue; 170 } 171 172 if (listen(la->fd, MAX_BACKLOG)) { 173 close(la->fd); 174 fatal("listen"); 175 } 176 177 la->flags |= LISTENER_LISTENING; 178 179 log_info("listening on %s", 180 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len)); 181 } 182 183 *la_cnt = cnt; 184 185 return (0); 186 } 187 188 void 189 session_main(int debug, int verbose) 190 { 191 int timeout; 192 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 193 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 194 u_int listener_cnt, ctl_cnt, mrt_cnt; 195 u_int new_cnt; 196 struct passwd *pw; 197 struct peer *p, **peer_l = NULL, *next; 198 struct mrt *m, *xm, **mrt_l = NULL; 199 struct pollfd *pfd = NULL; 200 struct listen_addr *la; 201 void *newp; 202 time_t now; 203 short events; 204 205 log_init(debug, LOG_DAEMON); 206 log_setverbose(verbose); 207 208 log_procinit(log_procnames[PROC_SE]); 209 210 if ((pw = getpwnam(BGPD_USER)) == NULL) 211 fatal(NULL); 212 213 if (chroot(pw->pw_dir) == -1) 214 fatal("chroot"); 215 if (chdir("/") == -1) 216 fatal("chdir(\"/\")"); 217 218 setproctitle("session engine"); 219 220 if (setgroups(1, &pw->pw_gid) || 221 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 222 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 223 fatal("can't drop privileges"); 224 225 if (pledge("stdio inet recvfd", NULL) == -1) 226 fatal("pledge"); 227 228 signal(SIGTERM, session_sighdlr); 229 signal(SIGINT, session_sighdlr); 230 signal(SIGPIPE, SIG_IGN); 231 signal(SIGHUP, SIG_IGN); 232 signal(SIGALRM, SIG_IGN); 233 signal(SIGUSR1, SIG_IGN); 234 235 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 236 fatal(NULL); 237 imsg_init(ibuf_main, 3); 238 239 LIST_INIT(&mrthead); 240 listener_cnt = 0; 241 peer_cnt = 0; 242 ctl_cnt = 0; 243 244 conf = new_config(); 245 log_info("session engine ready"); 246 247 while (session_quit == 0) { 248 /* check for peers to be initialized or deleted */ 249 if (!pending_reconf) { 250 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 251 /* cloned peer that idled out? */ 252 if (p->template && (p->state == STATE_IDLE || 253 p->state == STATE_ACTIVE) && 254 getmonotime() - p->stats.last_updown >= 255 INTERVAL_HOLD_CLONED) 256 p->reconf_action = RECONF_DELETE; 257 258 /* new peer that needs init? */ 259 if (p->state == STATE_NONE) 260 init_peer(p); 261 262 /* reinit due? */ 263 if (p->reconf_action == RECONF_REINIT) { 264 session_stop(p, ERR_CEASE_ADMIN_RESET); 265 if (!p->conf.down) 266 timer_set(&p->timers, 267 Timer_IdleHold, 0); 268 } 269 270 /* deletion due? */ 271 if (p->reconf_action == RECONF_DELETE) { 272 if (p->demoted) 273 session_demote(p, -1); 274 p->conf.demote_group[0] = 0; 275 session_stop(p, ERR_CEASE_PEER_UNCONF); 276 timer_remove_all(&p->timers); 277 tcp_md5_del_listener(conf, p); 278 log_peer_warnx(&p->conf, "removed"); 279 RB_REMOVE(peer_head, &conf->peers, p); 280 free(p); 281 peer_cnt--; 282 continue; 283 } 284 p->reconf_action = RECONF_NONE; 285 } 286 } 287 288 if (peer_cnt > peer_l_elms) { 289 if ((newp = reallocarray(peer_l, peer_cnt, 290 sizeof(struct peer *))) == NULL) { 291 /* panic for now */ 292 log_warn("could not resize peer_l from %u -> %u" 293 " entries", peer_l_elms, peer_cnt); 294 fatalx("exiting"); 295 } 296 peer_l = newp; 297 peer_l_elms = peer_cnt; 298 } 299 300 mrt_cnt = 0; 301 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 302 xm = LIST_NEXT(m, entry); 303 if (m->state == MRT_STATE_REMOVE) { 304 mrt_clean(m); 305 LIST_REMOVE(m, entry); 306 free(m); 307 continue; 308 } 309 if (m->wbuf.queued) 310 mrt_cnt++; 311 } 312 313 if (mrt_cnt > mrt_l_elms) { 314 if ((newp = reallocarray(mrt_l, mrt_cnt, 315 sizeof(struct mrt *))) == NULL) { 316 /* panic for now */ 317 log_warn("could not resize mrt_l from %u -> %u" 318 " entries", mrt_l_elms, mrt_cnt); 319 fatalx("exiting"); 320 } 321 mrt_l = newp; 322 mrt_l_elms = mrt_cnt; 323 } 324 325 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 326 ctl_cnt + mrt_cnt; 327 if (new_cnt > pfd_elms) { 328 if ((newp = reallocarray(pfd, new_cnt, 329 sizeof(struct pollfd))) == NULL) { 330 /* panic for now */ 331 log_warn("could not resize pfd from %u -> %u" 332 " entries", pfd_elms, new_cnt); 333 fatalx("exiting"); 334 } 335 pfd = newp; 336 pfd_elms = new_cnt; 337 } 338 339 memset(pfd, 0, sizeof(struct pollfd) * pfd_elms); 340 341 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 342 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 343 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 344 345 if (pauseaccept == 0) { 346 pfd[PFD_SOCK_CTL].fd = csock; 347 pfd[PFD_SOCK_CTL].events = POLLIN; 348 pfd[PFD_SOCK_RCTL].fd = rcsock; 349 pfd[PFD_SOCK_RCTL].events = POLLIN; 350 } else { 351 pfd[PFD_SOCK_CTL].fd = -1; 352 pfd[PFD_SOCK_RCTL].fd = -1; 353 } 354 355 i = PFD_LISTENERS_START; 356 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 357 if (pauseaccept == 0) { 358 pfd[i].fd = la->fd; 359 pfd[i].events = POLLIN; 360 } else 361 pfd[i].fd = -1; 362 i++; 363 } 364 idx_listeners = i; 365 timeout = 240; /* loop every 240s at least */ 366 367 now = getmonotime(); 368 RB_FOREACH(p, peer_head, &conf->peers) { 369 time_t nextaction; 370 struct timer *pt; 371 372 /* check timers */ 373 if ((pt = timer_nextisdue(&p->timers, now)) != NULL) { 374 switch (pt->type) { 375 case Timer_Hold: 376 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 377 break; 378 case Timer_SendHold: 379 bgp_fsm(p, EVNT_TIMER_SENDHOLD); 380 break; 381 case Timer_ConnectRetry: 382 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 383 break; 384 case Timer_Keepalive: 385 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 386 break; 387 case Timer_IdleHold: 388 bgp_fsm(p, EVNT_START); 389 break; 390 case Timer_IdleHoldReset: 391 p->IdleHoldTime = 392 INTERVAL_IDLE_HOLD_INITIAL; 393 p->errcnt = 0; 394 timer_stop(&p->timers, 395 Timer_IdleHoldReset); 396 break; 397 case Timer_CarpUndemote: 398 timer_stop(&p->timers, 399 Timer_CarpUndemote); 400 if (p->demoted && 401 p->state == STATE_ESTABLISHED) 402 session_demote(p, -1); 403 break; 404 case Timer_RestartTimeout: 405 timer_stop(&p->timers, 406 Timer_RestartTimeout); 407 session_graceful_stop(p); 408 break; 409 default: 410 fatalx("King Bula lost in time"); 411 } 412 } 413 if ((nextaction = timer_nextduein(&p->timers, 414 now)) != -1 && nextaction < timeout) 415 timeout = nextaction; 416 417 /* are we waiting for a write? */ 418 events = POLLIN; 419 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 420 events |= POLLOUT; 421 /* is there still work to do? */ 422 if (p->rpending && p->rbuf && p->rbuf->wpos) 423 timeout = 0; 424 425 /* poll events */ 426 if (p->fd != -1 && events != 0) { 427 pfd[i].fd = p->fd; 428 pfd[i].events = events; 429 peer_l[i - idx_listeners] = p; 430 i++; 431 } 432 } 433 434 idx_peers = i; 435 436 LIST_FOREACH(m, &mrthead, entry) 437 if (m->wbuf.queued) { 438 pfd[i].fd = m->wbuf.fd; 439 pfd[i].events = POLLOUT; 440 mrt_l[i - idx_peers] = m; 441 i++; 442 } 443 444 idx_mrts = i; 445 446 i += control_fill_pfds(pfd + i, pfd_elms -i); 447 448 if (i > pfd_elms) 449 fatalx("poll pfd overflow"); 450 451 if (pauseaccept && timeout > 1) 452 timeout = 1; 453 if (timeout < 0) 454 timeout = 0; 455 if (poll(pfd, i, timeout * 1000) == -1) { 456 if (errno == EINTR) 457 continue; 458 fatal("poll error"); 459 } 460 461 /* 462 * If we previously saw fd exhaustion, we stop accept() 463 * for 1 second to throttle the accept() loop. 464 */ 465 if (pauseaccept && getmonotime() > pauseaccept + 1) 466 pauseaccept = 0; 467 468 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 469 log_warnx("SE: Lost connection to parent"); 470 session_quit = 1; 471 continue; 472 } else 473 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 474 &listener_cnt); 475 476 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 477 log_warnx("SE: Lost connection to RDE"); 478 msgbuf_clear(&ibuf_rde->w); 479 free(ibuf_rde); 480 ibuf_rde = NULL; 481 } else 482 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 483 &listener_cnt); 484 485 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 486 -1) { 487 log_warnx("SE: Lost connection to RDE control"); 488 msgbuf_clear(&ibuf_rde_ctl->w); 489 free(ibuf_rde_ctl); 490 ibuf_rde_ctl = NULL; 491 } else 492 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 493 &listener_cnt); 494 495 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 496 ctl_cnt += control_accept(csock, 0); 497 498 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 499 ctl_cnt += control_accept(rcsock, 1); 500 501 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 502 if (pfd[j].revents & POLLIN) 503 session_accept(pfd[j].fd); 504 505 for (; j < idx_peers; j++) 506 session_dispatch_msg(&pfd[j], 507 peer_l[j - idx_listeners]); 508 509 RB_FOREACH(p, peer_head, &conf->peers) 510 if (p->rbuf && p->rbuf->wpos) 511 session_process_msg(p); 512 513 for (; j < idx_mrts; j++) 514 if (pfd[j].revents & POLLOUT) 515 mrt_write(mrt_l[j - idx_peers]); 516 517 for (; j < i; j++) 518 ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers); 519 } 520 521 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 522 RB_REMOVE(peer_head, &conf->peers, p); 523 strlcpy(p->conf.reason, 524 "bgpd shutting down", 525 sizeof(p->conf.reason)); 526 session_stop(p, ERR_CEASE_ADMIN_DOWN); 527 timer_remove_all(&p->timers); 528 free(p); 529 } 530 531 while ((m = LIST_FIRST(&mrthead)) != NULL) { 532 mrt_clean(m); 533 LIST_REMOVE(m, entry); 534 free(m); 535 } 536 537 free_config(conf); 538 free(peer_l); 539 free(mrt_l); 540 free(pfd); 541 542 /* close pipes */ 543 if (ibuf_rde) { 544 msgbuf_write(&ibuf_rde->w); 545 msgbuf_clear(&ibuf_rde->w); 546 close(ibuf_rde->fd); 547 free(ibuf_rde); 548 } 549 if (ibuf_rde_ctl) { 550 msgbuf_clear(&ibuf_rde_ctl->w); 551 close(ibuf_rde_ctl->fd); 552 free(ibuf_rde_ctl); 553 } 554 msgbuf_write(&ibuf_main->w); 555 msgbuf_clear(&ibuf_main->w); 556 close(ibuf_main->fd); 557 free(ibuf_main); 558 559 control_shutdown(csock); 560 control_shutdown(rcsock); 561 log_info("session engine exiting"); 562 exit(0); 563 } 564 565 void 566 init_peer(struct peer *p) 567 { 568 TAILQ_INIT(&p->timers); 569 p->fd = p->wbuf.fd = -1; 570 571 if (p->conf.if_depend[0]) 572 imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1, 573 p->conf.if_depend, sizeof(p->conf.if_depend)); 574 else 575 p->depend_ok = 1; 576 577 peer_cnt++; 578 579 change_state(p, STATE_IDLE, EVNT_NONE); 580 if (p->conf.down) 581 timer_stop(&p->timers, Timer_IdleHold); /* no autostart */ 582 else 583 timer_set(&p->timers, Timer_IdleHold, 0); /* start ASAP */ 584 585 p->stats.last_updown = getmonotime(); 586 587 /* 588 * on startup, demote if requested. 589 * do not handle new peers. they must reach ESTABLISHED beforehand. 590 * peers added at runtime have reconf_action set to RECONF_REINIT. 591 */ 592 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 593 session_demote(p, +1); 594 } 595 596 void 597 bgp_fsm(struct peer *peer, enum session_events event) 598 { 599 switch (peer->state) { 600 case STATE_NONE: 601 /* nothing */ 602 break; 603 case STATE_IDLE: 604 switch (event) { 605 case EVNT_START: 606 timer_stop(&peer->timers, Timer_Hold); 607 timer_stop(&peer->timers, Timer_SendHold); 608 timer_stop(&peer->timers, Timer_Keepalive); 609 timer_stop(&peer->timers, Timer_IdleHold); 610 611 /* allocate read buffer */ 612 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 613 if (peer->rbuf == NULL) 614 fatal(NULL); 615 616 /* init write buffer */ 617 msgbuf_init(&peer->wbuf); 618 619 peer->stats.last_sent_errcode = 0; 620 peer->stats.last_sent_suberr = 0; 621 peer->stats.last_rcvd_errcode = 0; 622 peer->stats.last_rcvd_suberr = 0; 623 624 if (!peer->depend_ok) 625 timer_stop(&peer->timers, Timer_ConnectRetry); 626 else if (peer->passive || peer->conf.passive || 627 peer->conf.template) { 628 change_state(peer, STATE_ACTIVE, event); 629 timer_stop(&peer->timers, Timer_ConnectRetry); 630 } else { 631 change_state(peer, STATE_CONNECT, event); 632 timer_set(&peer->timers, Timer_ConnectRetry, 633 conf->connectretry); 634 session_connect(peer); 635 } 636 peer->passive = 0; 637 break; 638 default: 639 /* ignore */ 640 break; 641 } 642 break; 643 case STATE_CONNECT: 644 switch (event) { 645 case EVNT_START: 646 /* ignore */ 647 break; 648 case EVNT_CON_OPEN: 649 session_tcp_established(peer); 650 session_open(peer); 651 timer_stop(&peer->timers, Timer_ConnectRetry); 652 peer->holdtime = INTERVAL_HOLD_INITIAL; 653 start_timer_holdtime(peer); 654 change_state(peer, STATE_OPENSENT, event); 655 break; 656 case EVNT_CON_OPENFAIL: 657 timer_set(&peer->timers, Timer_ConnectRetry, 658 conf->connectretry); 659 session_close_connection(peer); 660 change_state(peer, STATE_ACTIVE, event); 661 break; 662 case EVNT_TIMER_CONNRETRY: 663 timer_set(&peer->timers, Timer_ConnectRetry, 664 conf->connectretry); 665 session_connect(peer); 666 break; 667 default: 668 change_state(peer, STATE_IDLE, event); 669 break; 670 } 671 break; 672 case STATE_ACTIVE: 673 switch (event) { 674 case EVNT_START: 675 /* ignore */ 676 break; 677 case EVNT_CON_OPEN: 678 session_tcp_established(peer); 679 session_open(peer); 680 timer_stop(&peer->timers, Timer_ConnectRetry); 681 peer->holdtime = INTERVAL_HOLD_INITIAL; 682 start_timer_holdtime(peer); 683 change_state(peer, STATE_OPENSENT, event); 684 break; 685 case EVNT_CON_OPENFAIL: 686 timer_set(&peer->timers, Timer_ConnectRetry, 687 conf->connectretry); 688 session_close_connection(peer); 689 change_state(peer, STATE_ACTIVE, event); 690 break; 691 case EVNT_TIMER_CONNRETRY: 692 timer_set(&peer->timers, Timer_ConnectRetry, 693 peer->holdtime); 694 change_state(peer, STATE_CONNECT, event); 695 session_connect(peer); 696 break; 697 default: 698 change_state(peer, STATE_IDLE, event); 699 break; 700 } 701 break; 702 case STATE_OPENSENT: 703 switch (event) { 704 case EVNT_START: 705 /* ignore */ 706 break; 707 case EVNT_STOP: 708 change_state(peer, STATE_IDLE, event); 709 break; 710 case EVNT_CON_CLOSED: 711 session_close_connection(peer); 712 timer_set(&peer->timers, Timer_ConnectRetry, 713 conf->connectretry); 714 change_state(peer, STATE_ACTIVE, event); 715 break; 716 case EVNT_CON_FATAL: 717 change_state(peer, STATE_IDLE, event); 718 break; 719 case EVNT_TIMER_HOLDTIME: 720 case EVNT_TIMER_SENDHOLD: 721 session_notification(peer, ERR_HOLDTIMEREXPIRED, 722 0, NULL, 0); 723 change_state(peer, STATE_IDLE, event); 724 break; 725 case EVNT_RCVD_OPEN: 726 /* parse_open calls change_state itself on failure */ 727 if (parse_open(peer)) 728 break; 729 session_keepalive(peer); 730 change_state(peer, STATE_OPENCONFIRM, event); 731 break; 732 case EVNT_RCVD_NOTIFICATION: 733 if (parse_notification(peer)) { 734 change_state(peer, STATE_IDLE, event); 735 /* don't punish, capa negotiation */ 736 timer_set(&peer->timers, Timer_IdleHold, 0); 737 peer->IdleHoldTime /= 2; 738 } else 739 change_state(peer, STATE_IDLE, event); 740 break; 741 default: 742 session_notification(peer, 743 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); 744 change_state(peer, STATE_IDLE, event); 745 break; 746 } 747 break; 748 case STATE_OPENCONFIRM: 749 switch (event) { 750 case EVNT_START: 751 /* ignore */ 752 break; 753 case EVNT_STOP: 754 change_state(peer, STATE_IDLE, event); 755 break; 756 case EVNT_CON_CLOSED: 757 case EVNT_CON_FATAL: 758 change_state(peer, STATE_IDLE, event); 759 break; 760 case EVNT_TIMER_HOLDTIME: 761 case EVNT_TIMER_SENDHOLD: 762 session_notification(peer, ERR_HOLDTIMEREXPIRED, 763 0, NULL, 0); 764 change_state(peer, STATE_IDLE, event); 765 break; 766 case EVNT_TIMER_KEEPALIVE: 767 session_keepalive(peer); 768 break; 769 case EVNT_RCVD_KEEPALIVE: 770 start_timer_holdtime(peer); 771 change_state(peer, STATE_ESTABLISHED, event); 772 break; 773 case EVNT_RCVD_NOTIFICATION: 774 parse_notification(peer); 775 change_state(peer, STATE_IDLE, event); 776 break; 777 default: 778 session_notification(peer, 779 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); 780 change_state(peer, STATE_IDLE, event); 781 break; 782 } 783 break; 784 case STATE_ESTABLISHED: 785 switch (event) { 786 case EVNT_START: 787 /* ignore */ 788 break; 789 case EVNT_STOP: 790 change_state(peer, STATE_IDLE, event); 791 break; 792 case EVNT_CON_CLOSED: 793 case EVNT_CON_FATAL: 794 change_state(peer, STATE_IDLE, event); 795 break; 796 case EVNT_TIMER_HOLDTIME: 797 case EVNT_TIMER_SENDHOLD: 798 session_notification(peer, ERR_HOLDTIMEREXPIRED, 799 0, NULL, 0); 800 change_state(peer, STATE_IDLE, event); 801 break; 802 case EVNT_TIMER_KEEPALIVE: 803 session_keepalive(peer); 804 break; 805 case EVNT_RCVD_KEEPALIVE: 806 start_timer_holdtime(peer); 807 break; 808 case EVNT_RCVD_UPDATE: 809 start_timer_holdtime(peer); 810 if (parse_update(peer)) 811 change_state(peer, STATE_IDLE, event); 812 else 813 start_timer_holdtime(peer); 814 break; 815 case EVNT_RCVD_NOTIFICATION: 816 parse_notification(peer); 817 change_state(peer, STATE_IDLE, event); 818 break; 819 default: 820 session_notification(peer, 821 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); 822 change_state(peer, STATE_IDLE, event); 823 break; 824 } 825 break; 826 } 827 } 828 829 void 830 start_timer_holdtime(struct peer *peer) 831 { 832 if (peer->holdtime > 0) 833 timer_set(&peer->timers, Timer_Hold, peer->holdtime); 834 else 835 timer_stop(&peer->timers, Timer_Hold); 836 } 837 838 void 839 start_timer_keepalive(struct peer *peer) 840 { 841 if (peer->holdtime > 0) 842 timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3); 843 else 844 timer_stop(&peer->timers, Timer_Keepalive); 845 } 846 847 void 848 session_close_connection(struct peer *peer) 849 { 850 if (peer->fd != -1) { 851 close(peer->fd); 852 pauseaccept = 0; 853 } 854 peer->fd = peer->wbuf.fd = -1; 855 } 856 857 void 858 change_state(struct peer *peer, enum session_state state, 859 enum session_events event) 860 { 861 struct mrt *mrt; 862 863 switch (state) { 864 case STATE_IDLE: 865 /* carp demotion first. new peers handled in init_peer */ 866 if (peer->state == STATE_ESTABLISHED && 867 peer->conf.demote_group[0] && !peer->demoted) 868 session_demote(peer, +1); 869 870 /* 871 * try to write out what's buffered (maybe a notification), 872 * don't bother if it fails 873 */ 874 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 875 msgbuf_write(&peer->wbuf); 876 877 /* 878 * we must start the timer for the next EVNT_START 879 * if we are coming here due to an error and the 880 * session was not established successfully before, the 881 * starttimerinterval needs to be exponentially increased 882 */ 883 if (peer->IdleHoldTime == 0) 884 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 885 peer->holdtime = INTERVAL_HOLD_INITIAL; 886 timer_stop(&peer->timers, Timer_ConnectRetry); 887 timer_stop(&peer->timers, Timer_Keepalive); 888 timer_stop(&peer->timers, Timer_Hold); 889 timer_stop(&peer->timers, Timer_SendHold); 890 timer_stop(&peer->timers, Timer_IdleHold); 891 timer_stop(&peer->timers, Timer_IdleHoldReset); 892 session_close_connection(peer); 893 msgbuf_clear(&peer->wbuf); 894 free(peer->rbuf); 895 peer->rbuf = NULL; 896 peer->rpending = 0; 897 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 898 if (!peer->template) 899 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 900 peer->conf.id, 0, -1, NULL, 0); 901 902 if (event != EVNT_STOP) { 903 timer_set(&peer->timers, Timer_IdleHold, 904 peer->IdleHoldTime); 905 if (event != EVNT_NONE && 906 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 907 peer->IdleHoldTime *= 2; 908 } 909 if (peer->state == STATE_ESTABLISHED) { 910 if (peer->capa.neg.grestart.restart == 2 && 911 (event == EVNT_CON_CLOSED || 912 event == EVNT_CON_FATAL)) { 913 /* don't punish graceful restart */ 914 timer_set(&peer->timers, Timer_IdleHold, 0); 915 peer->IdleHoldTime /= 2; 916 session_graceful_restart(peer); 917 } else 918 session_down(peer); 919 } 920 if (peer->state == STATE_NONE || 921 peer->state == STATE_ESTABLISHED) { 922 /* initialize capability negotiation structures */ 923 memcpy(&peer->capa.ann, &peer->conf.capabilities, 924 sizeof(peer->capa.ann)); 925 if (!peer->conf.announce_capa) 926 session_capa_ann_none(peer); 927 } 928 break; 929 case STATE_CONNECT: 930 if (peer->state == STATE_ESTABLISHED && 931 peer->capa.neg.grestart.restart == 2) { 932 /* do the graceful restart dance */ 933 session_graceful_restart(peer); 934 peer->holdtime = INTERVAL_HOLD_INITIAL; 935 timer_stop(&peer->timers, Timer_ConnectRetry); 936 timer_stop(&peer->timers, Timer_Keepalive); 937 timer_stop(&peer->timers, Timer_Hold); 938 timer_stop(&peer->timers, Timer_SendHold); 939 timer_stop(&peer->timers, Timer_IdleHold); 940 timer_stop(&peer->timers, Timer_IdleHoldReset); 941 session_close_connection(peer); 942 msgbuf_clear(&peer->wbuf); 943 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 944 } 945 break; 946 case STATE_ACTIVE: 947 if (!peer->template) 948 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 949 peer->conf.id, 0, -1, NULL, 0); 950 break; 951 case STATE_OPENSENT: 952 break; 953 case STATE_OPENCONFIRM: 954 break; 955 case STATE_ESTABLISHED: 956 timer_set(&peer->timers, Timer_IdleHoldReset, 957 peer->IdleHoldTime); 958 if (peer->demoted) 959 timer_set(&peer->timers, Timer_CarpUndemote, 960 INTERVAL_HOLD_DEMOTED); 961 session_up(peer); 962 break; 963 default: /* something seriously fucked */ 964 break; 965 } 966 967 log_statechange(peer, state, event); 968 LIST_FOREACH(mrt, &mrthead, entry) { 969 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 970 continue; 971 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 972 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 973 mrt->group_id == peer->conf.groupid)) 974 mrt_dump_state(mrt, peer->state, state, peer); 975 } 976 peer->prev_state = peer->state; 977 peer->state = state; 978 } 979 980 void 981 session_accept(int listenfd) 982 { 983 int connfd; 984 socklen_t len; 985 struct sockaddr_storage cliaddr; 986 struct peer *p = NULL; 987 988 len = sizeof(cliaddr); 989 if ((connfd = accept4(listenfd, 990 (struct sockaddr *)&cliaddr, &len, 991 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 992 if (errno == ENFILE || errno == EMFILE) 993 pauseaccept = getmonotime(); 994 else if (errno != EWOULDBLOCK && errno != EINTR && 995 errno != ECONNABORTED) 996 log_warn("accept"); 997 return; 998 } 999 1000 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr); 1001 1002 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 1003 if (timer_running(&p->timers, Timer_IdleHold, NULL)) { 1004 /* fast reconnect after clear */ 1005 p->passive = 1; 1006 bgp_fsm(p, EVNT_START); 1007 } 1008 } 1009 1010 if (p != NULL && 1011 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1012 if (p->fd != -1) { 1013 if (p->state == STATE_CONNECT) 1014 session_close_connection(p); 1015 else { 1016 close(connfd); 1017 return; 1018 } 1019 } 1020 1021 open: 1022 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1023 log_peer_warnx(&p->conf, 1024 "ipsec or md5sig configured but not available"); 1025 close(connfd); 1026 return; 1027 } 1028 1029 if (tcp_md5_check(connfd, p) == -1) { 1030 close(connfd); 1031 return; 1032 } 1033 p->fd = p->wbuf.fd = connfd; 1034 if (session_setup_socket(p)) { 1035 close(connfd); 1036 return; 1037 } 1038 bgp_fsm(p, EVNT_CON_OPEN); 1039 return; 1040 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1041 p->capa.neg.grestart.restart == 2) { 1042 /* first do the graceful restart dance */ 1043 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1044 /* then do part of the open dance */ 1045 goto open; 1046 } else { 1047 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len); 1048 close(connfd); 1049 } 1050 } 1051 1052 int 1053 session_connect(struct peer *peer) 1054 { 1055 struct sockaddr *sa; 1056 struct bgpd_addr *bind_addr = NULL; 1057 socklen_t sa_len; 1058 1059 /* 1060 * we do not need the overcomplicated collision detection RFC 1771 1061 * describes; we simply make sure there is only ever one concurrent 1062 * tcp connection per peer. 1063 */ 1064 if (peer->fd != -1) 1065 return (-1); 1066 1067 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1068 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1069 log_peer_warn(&peer->conf, "session_connect socket"); 1070 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1071 return (-1); 1072 } 1073 1074 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1075 log_peer_warnx(&peer->conf, 1076 "ipsec or md5sig configured but not available"); 1077 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1078 return (-1); 1079 } 1080 1081 tcp_md5_set(peer->fd, peer); 1082 peer->wbuf.fd = peer->fd; 1083 1084 /* if local-address is set we need to bind() */ 1085 switch (peer->conf.remote_addr.aid) { 1086 case AID_INET: 1087 bind_addr = &peer->conf.local_addr_v4; 1088 break; 1089 case AID_INET6: 1090 bind_addr = &peer->conf.local_addr_v6; 1091 break; 1092 } 1093 if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) { 1094 if (bind(peer->fd, sa, sa_len) == -1) { 1095 log_peer_warn(&peer->conf, "session_connect bind"); 1096 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1097 return (-1); 1098 } 1099 } 1100 1101 if (session_setup_socket(peer)) { 1102 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1103 return (-1); 1104 } 1105 1106 sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len); 1107 if (connect(peer->fd, sa, sa_len) == -1) { 1108 if (errno != EINPROGRESS) { 1109 if (errno != peer->lasterr) 1110 log_peer_warn(&peer->conf, "connect"); 1111 peer->lasterr = errno; 1112 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1113 return (-1); 1114 } 1115 } else 1116 bgp_fsm(peer, EVNT_CON_OPEN); 1117 1118 return (0); 1119 } 1120 1121 int 1122 session_setup_socket(struct peer *p) 1123 { 1124 int ttl = p->conf.distance; 1125 int pre = IPTOS_PREC_INTERNETCONTROL; 1126 int nodelay = 1; 1127 int bsize; 1128 1129 switch (p->conf.remote_addr.aid) { 1130 case AID_INET: 1131 /* set precedence, see RFC 1771 appendix 5 */ 1132 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1133 -1) { 1134 log_peer_warn(&p->conf, 1135 "session_setup_socket setsockopt TOS"); 1136 return (-1); 1137 } 1138 1139 if (p->conf.ebgp) { 1140 /* 1141 * set TTL to foreign router's distance 1142 * 1=direct n=multihop with ttlsec, we always use 255 1143 */ 1144 if (p->conf.ttlsec) { 1145 ttl = 256 - p->conf.distance; 1146 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1147 &ttl, sizeof(ttl)) == -1) { 1148 log_peer_warn(&p->conf, 1149 "session_setup_socket: " 1150 "setsockopt MINTTL"); 1151 return (-1); 1152 } 1153 ttl = 255; 1154 } 1155 1156 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1157 sizeof(ttl)) == -1) { 1158 log_peer_warn(&p->conf, 1159 "session_setup_socket setsockopt TTL"); 1160 return (-1); 1161 } 1162 } 1163 break; 1164 case AID_INET6: 1165 if (p->conf.ebgp) { 1166 /* 1167 * set hoplimit to foreign router's distance 1168 * 1=direct n=multihop with ttlsec, we always use 255 1169 */ 1170 if (p->conf.ttlsec) { 1171 ttl = 256 - p->conf.distance; 1172 if (setsockopt(p->fd, IPPROTO_IPV6, 1173 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1174 == -1) { 1175 log_peer_warn(&p->conf, 1176 "session_setup_socket: " 1177 "setsockopt MINHOPCOUNT"); 1178 return (-1); 1179 } 1180 ttl = 255; 1181 } 1182 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1183 &ttl, sizeof(ttl)) == -1) { 1184 log_peer_warn(&p->conf, 1185 "session_setup_socket setsockopt hoplimit"); 1186 return (-1); 1187 } 1188 } 1189 break; 1190 } 1191 1192 /* set TCP_NODELAY */ 1193 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1194 sizeof(nodelay)) == -1) { 1195 log_peer_warn(&p->conf, 1196 "session_setup_socket setsockopt TCP_NODELAY"); 1197 return (-1); 1198 } 1199 1200 /* only increase bufsize (and thus window) if md5 or ipsec is in use */ 1201 if (p->conf.auth.method != AUTH_NONE) { 1202 /* try to increase bufsize. no biggie if it fails */ 1203 bsize = 65535; 1204 while (bsize > 8192 && 1205 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, 1206 sizeof(bsize)) == -1 && errno != EINVAL) 1207 bsize /= 2; 1208 bsize = 65535; 1209 while (bsize > 8192 && 1210 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, 1211 sizeof(bsize)) == -1 && errno != EINVAL) 1212 bsize /= 2; 1213 } 1214 1215 return (0); 1216 } 1217 1218 /* compare two sockaddrs by converting them into bgpd_addr */ 1219 static int 1220 sa_cmp(struct sockaddr *a, struct sockaddr *b) 1221 { 1222 struct bgpd_addr ba, bb; 1223 1224 sa2addr(a, &ba, NULL); 1225 sa2addr(b, &bb, NULL); 1226 1227 return (memcmp(&ba, &bb, sizeof(ba)) == 0); 1228 } 1229 1230 static void 1231 get_alternate_addr(struct sockaddr *sa, struct bgpd_addr *alt) 1232 { 1233 struct ifaddrs *ifap, *ifa, *match; 1234 1235 if (getifaddrs(&ifap) == -1) 1236 fatal("getifaddrs"); 1237 1238 for (match = ifap; match != NULL; match = match->ifa_next) 1239 if (match->ifa_addr != NULL && 1240 sa_cmp(sa, match->ifa_addr) == 0) 1241 break; 1242 1243 if (match == NULL) { 1244 log_warnx("%s: local address not found", __func__); 1245 return; 1246 } 1247 1248 switch (sa->sa_family) { 1249 case AF_INET6: 1250 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1251 if (ifa->ifa_addr != NULL && 1252 ifa->ifa_addr->sa_family == AF_INET && 1253 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1254 sa2addr(ifa->ifa_addr, alt, NULL); 1255 break; 1256 } 1257 } 1258 break; 1259 case AF_INET: 1260 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1261 if (ifa->ifa_addr != NULL && 1262 ifa->ifa_addr->sa_family == AF_INET6 && 1263 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1264 struct sockaddr_in6 *s = 1265 (struct sockaddr_in6 *)ifa->ifa_addr; 1266 1267 /* only accept global scope addresses */ 1268 if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) || 1269 IN6_IS_ADDR_SITELOCAL(&s->sin6_addr)) 1270 continue; 1271 sa2addr(ifa->ifa_addr, alt, NULL); 1272 break; 1273 } 1274 } 1275 break; 1276 default: 1277 log_warnx("%s: unsupported address family %d", __func__, 1278 sa->sa_family); 1279 break; 1280 } 1281 1282 freeifaddrs(ifap); 1283 } 1284 1285 void 1286 session_tcp_established(struct peer *peer) 1287 { 1288 struct sockaddr_storage ss; 1289 socklen_t len; 1290 1291 len = sizeof(ss); 1292 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1293 log_warn("getsockname"); 1294 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); 1295 get_alternate_addr((struct sockaddr *)&ss, &peer->local_alt); 1296 len = sizeof(ss); 1297 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1298 log_warn("getpeername"); 1299 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); 1300 } 1301 1302 void 1303 session_capa_ann_none(struct peer *peer) 1304 { 1305 memset(&peer->capa.ann, 0, sizeof(peer->capa.ann)); 1306 } 1307 1308 int 1309 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len) 1310 { 1311 int errs = 0; 1312 1313 errs += ibuf_add(opb, &capa_code, sizeof(capa_code)); 1314 errs += ibuf_add(opb, &capa_len, sizeof(capa_len)); 1315 return (errs); 1316 } 1317 1318 int 1319 session_capa_add_mp(struct ibuf *buf, uint8_t aid) 1320 { 1321 uint8_t safi, pad = 0; 1322 uint16_t afi; 1323 int errs = 0; 1324 1325 if (aid2afi(aid, &afi, &safi) == -1) 1326 fatalx("session_capa_add_mp: bad afi/safi pair"); 1327 afi = htons(afi); 1328 errs += ibuf_add(buf, &afi, sizeof(afi)); 1329 errs += ibuf_add(buf, &pad, sizeof(pad)); 1330 errs += ibuf_add(buf, &safi, sizeof(safi)); 1331 1332 return (errs); 1333 } 1334 1335 int 1336 session_capa_add_afi(struct peer *p, struct ibuf *b, uint8_t aid, 1337 uint8_t flags) 1338 { 1339 u_int errs = 0; 1340 uint16_t afi; 1341 uint8_t safi; 1342 1343 if (aid2afi(aid, &afi, &safi)) { 1344 log_warn("session_capa_add_afi: bad AID"); 1345 return (1); 1346 } 1347 1348 afi = htons(afi); 1349 errs += ibuf_add(b, &afi, sizeof(afi)); 1350 errs += ibuf_add(b, &safi, sizeof(safi)); 1351 errs += ibuf_add(b, &flags, sizeof(flags)); 1352 1353 return (errs); 1354 } 1355 1356 struct bgp_msg * 1357 session_newmsg(enum msg_type msgtype, uint16_t len) 1358 { 1359 struct bgp_msg *msg; 1360 struct msg_header hdr; 1361 struct ibuf *buf; 1362 int errs = 0; 1363 1364 memset(&hdr.marker, 0xff, sizeof(hdr.marker)); 1365 hdr.len = htons(len); 1366 hdr.type = msgtype; 1367 1368 if ((buf = ibuf_open(len)) == NULL) 1369 return (NULL); 1370 1371 errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker)); 1372 errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len)); 1373 errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type)); 1374 1375 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1376 ibuf_free(buf); 1377 return (NULL); 1378 } 1379 1380 msg->buf = buf; 1381 msg->type = msgtype; 1382 msg->len = len; 1383 1384 return (msg); 1385 } 1386 1387 int 1388 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1389 { 1390 struct mrt *mrt; 1391 1392 LIST_FOREACH(mrt, &mrthead, entry) { 1393 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1394 mrt->type == MRT_UPDATE_OUT))) 1395 continue; 1396 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1397 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1398 mrt->group_id == p->conf.groupid)) 1399 mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p, 1400 msg->type); 1401 } 1402 1403 ibuf_close(&p->wbuf, msg->buf); 1404 if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) { 1405 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) 1406 log_peer_warn(&p->conf, "imsg_compose XOFF"); 1407 else 1408 p->throttled = 1; 1409 } 1410 1411 free(msg); 1412 return (0); 1413 } 1414 1415 /* 1416 * Translate between internal roles and the value expected by RFC 9234. 1417 */ 1418 static uint8_t 1419 role2capa(enum role role) 1420 { 1421 switch (role) { 1422 case ROLE_CUSTOMER: 1423 return CAPA_ROLE_CUSTOMER; 1424 case ROLE_PROVIDER: 1425 return CAPA_ROLE_PROVIDER; 1426 case ROLE_RS: 1427 return CAPA_ROLE_RS; 1428 case ROLE_RS_CLIENT: 1429 return CAPA_ROLE_RS_CLIENT; 1430 case ROLE_PEER: 1431 return CAPA_ROLE_PEER; 1432 default: 1433 fatalx("Unsupported role for role capability"); 1434 } 1435 } 1436 1437 static enum role 1438 capa2role(uint8_t val) 1439 { 1440 switch (val) { 1441 case CAPA_ROLE_PROVIDER: 1442 return ROLE_PROVIDER; 1443 case CAPA_ROLE_RS: 1444 return ROLE_RS; 1445 case CAPA_ROLE_RS_CLIENT: 1446 return ROLE_RS_CLIENT; 1447 case CAPA_ROLE_CUSTOMER: 1448 return ROLE_CUSTOMER; 1449 case CAPA_ROLE_PEER: 1450 return ROLE_PEER; 1451 default: 1452 return ROLE_NONE; 1453 } 1454 } 1455 1456 void 1457 session_open(struct peer *p) 1458 { 1459 struct bgp_msg *buf; 1460 struct ibuf *opb; 1461 struct msg_open msg; 1462 uint16_t len, optparamlen = 0; 1463 uint8_t i, op_type; 1464 int errs = 0, extlen = 0; 1465 int mpcapa = 0; 1466 1467 1468 if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) { 1469 bgp_fsm(p, EVNT_CON_FATAL); 1470 return; 1471 } 1472 1473 /* multiprotocol extensions, RFC 4760 */ 1474 for (i = 0; i < AID_MAX; i++) 1475 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1476 errs += session_capa_add(opb, CAPA_MP, 4); 1477 errs += session_capa_add_mp(opb, i); 1478 mpcapa++; 1479 } 1480 1481 /* route refresh, RFC 2918 */ 1482 if (p->capa.ann.refresh) /* no data */ 1483 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1484 1485 /* BGP open policy, RFC 9234, only for ebgp sessions */ 1486 if (p->conf.ebgp && p->capa.ann.role_ena && 1487 p->capa.ann.role != ROLE_NONE) { 1488 uint8_t val; 1489 val = role2capa(p->capa.ann.role); 1490 errs += session_capa_add(opb, CAPA_ROLE, 1); 1491 errs += ibuf_add(opb, &val, 1); 1492 } 1493 1494 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1495 if (p->capa.ann.grestart.restart) { 1496 int rst = 0; 1497 uint16_t hdr = 0; 1498 1499 for (i = 0; i < AID_MAX; i++) { 1500 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) 1501 rst++; 1502 } 1503 1504 /* Only set the R-flag if no graceful restart is ongoing */ 1505 if (!rst) 1506 hdr |= CAPA_GR_R_FLAG; 1507 hdr = htons(hdr); 1508 1509 errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); 1510 errs += ibuf_add(opb, &hdr, sizeof(hdr)); 1511 } 1512 1513 /* 4-bytes AS numbers, RFC6793 */ 1514 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1515 uint32_t nas; 1516 1517 nas = htonl(p->conf.local_as); 1518 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); 1519 errs += ibuf_add(opb, &nas, sizeof(nas)); 1520 } 1521 1522 /* advertisement of multiple paths, RFC7911 */ 1523 if (p->capa.ann.add_path[0]) { /* variable */ 1524 uint8_t aplen; 1525 1526 if (mpcapa) 1527 aplen = 4 * mpcapa; 1528 else /* AID_INET */ 1529 aplen = 4; 1530 errs += session_capa_add(opb, CAPA_ADD_PATH, aplen); 1531 if (mpcapa) { 1532 for (i = AID_MIN; i < AID_MAX; i++) { 1533 if (p->capa.ann.mp[i]) { 1534 errs += session_capa_add_afi(p, opb, 1535 i, p->capa.ann.add_path[i]); 1536 } 1537 } 1538 } else { /* AID_INET */ 1539 errs += session_capa_add_afi(p, opb, AID_INET, 1540 p->capa.ann.add_path[AID_INET]); 1541 } 1542 } 1543 1544 /* enhanced route-refresh, RFC7313 */ 1545 if (p->capa.ann.enhanced_rr) /* no data */ 1546 errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0); 1547 1548 optparamlen = ibuf_size(opb); 1549 if (optparamlen == 0) { 1550 /* nothing */ 1551 } else if (optparamlen + 2 >= 255) { 1552 /* RFC9072: 2 byte length instead of 1 + 3 byte extra header */ 1553 optparamlen += sizeof(op_type) + 2 + 3; 1554 msg.optparamlen = 255; 1555 extlen = 1; 1556 } else { 1557 optparamlen += sizeof(op_type) + 1; 1558 msg.optparamlen = optparamlen; 1559 } 1560 1561 len = MSGSIZE_OPEN_MIN + optparamlen; 1562 if (errs || (buf = session_newmsg(OPEN, len)) == NULL) { 1563 ibuf_free(opb); 1564 bgp_fsm(p, EVNT_CON_FATAL); 1565 return; 1566 } 1567 1568 msg.version = 4; 1569 msg.myas = htons(p->conf.local_short_as); 1570 if (p->conf.holdtime) 1571 msg.holdtime = htons(p->conf.holdtime); 1572 else 1573 msg.holdtime = htons(conf->holdtime); 1574 msg.bgpid = conf->bgpid; /* is already in network byte order */ 1575 1576 errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version)); 1577 errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas)); 1578 errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); 1579 errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); 1580 errs += ibuf_add(buf->buf, &msg.optparamlen, 1); 1581 1582 if (extlen) { 1583 /* write RFC9072 extra header */ 1584 uint16_t op_extlen = htons(optparamlen - 3); 1585 op_type = OPT_PARAM_EXT_LEN; 1586 errs += ibuf_add(buf->buf, &op_type, 1); 1587 errs += ibuf_add(buf->buf, &op_extlen, 2); 1588 } 1589 1590 if (optparamlen) { 1591 op_type = OPT_PARAM_CAPABILITIES; 1592 errs += ibuf_add(buf->buf, &op_type, sizeof(op_type)); 1593 1594 optparamlen = ibuf_size(opb); 1595 if (extlen) { 1596 /* RFC9072: 2-byte extended length */ 1597 uint16_t op_extlen = htons(optparamlen); 1598 errs += ibuf_add(buf->buf, &op_extlen, 2); 1599 } else { 1600 uint8_t op_len = optparamlen; 1601 errs += ibuf_add(buf->buf, &op_len, 1); 1602 } 1603 errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); 1604 } 1605 1606 ibuf_free(opb); 1607 1608 if (errs) { 1609 ibuf_free(buf->buf); 1610 free(buf); 1611 bgp_fsm(p, EVNT_CON_FATAL); 1612 return; 1613 } 1614 1615 if (session_sendmsg(buf, p) == -1) { 1616 bgp_fsm(p, EVNT_CON_FATAL); 1617 return; 1618 } 1619 1620 p->stats.msg_sent_open++; 1621 } 1622 1623 void 1624 session_keepalive(struct peer *p) 1625 { 1626 struct bgp_msg *buf; 1627 1628 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1629 session_sendmsg(buf, p) == -1) { 1630 bgp_fsm(p, EVNT_CON_FATAL); 1631 return; 1632 } 1633 1634 start_timer_keepalive(p); 1635 p->stats.msg_sent_keepalive++; 1636 } 1637 1638 void 1639 session_update(uint32_t peerid, void *data, size_t datalen) 1640 { 1641 struct peer *p; 1642 struct bgp_msg *buf; 1643 1644 if ((p = getpeerbyid(conf, peerid)) == NULL) { 1645 log_warnx("no such peer: id=%u", peerid); 1646 return; 1647 } 1648 1649 if (p->state != STATE_ESTABLISHED) 1650 return; 1651 1652 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) { 1653 bgp_fsm(p, EVNT_CON_FATAL); 1654 return; 1655 } 1656 1657 if (ibuf_add(buf->buf, data, datalen)) { 1658 ibuf_free(buf->buf); 1659 free(buf); 1660 bgp_fsm(p, EVNT_CON_FATAL); 1661 return; 1662 } 1663 1664 if (session_sendmsg(buf, p) == -1) { 1665 bgp_fsm(p, EVNT_CON_FATAL); 1666 return; 1667 } 1668 1669 start_timer_keepalive(p); 1670 p->stats.msg_sent_update++; 1671 } 1672 1673 void 1674 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode, 1675 void *data, ssize_t datalen) 1676 { 1677 struct bgp_msg *buf; 1678 int errs = 0; 1679 1680 if (p->stats.last_sent_errcode) /* some notification already sent */ 1681 return; 1682 1683 log_notification(p, errcode, subcode, data, datalen, "sending"); 1684 1685 /* cap to maximum size */ 1686 if (datalen > MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) { 1687 log_peer_warnx(&p->conf, 1688 "oversized notification, data trunkated"); 1689 datalen = MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN; 1690 } 1691 1692 if ((buf = session_newmsg(NOTIFICATION, 1693 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1694 bgp_fsm(p, EVNT_CON_FATAL); 1695 return; 1696 } 1697 1698 errs += ibuf_add(buf->buf, &errcode, sizeof(errcode)); 1699 errs += ibuf_add(buf->buf, &subcode, sizeof(subcode)); 1700 1701 if (datalen > 0) 1702 errs += ibuf_add(buf->buf, data, datalen); 1703 1704 if (errs) { 1705 ibuf_free(buf->buf); 1706 free(buf); 1707 bgp_fsm(p, EVNT_CON_FATAL); 1708 return; 1709 } 1710 1711 if (session_sendmsg(buf, p) == -1) { 1712 bgp_fsm(p, EVNT_CON_FATAL); 1713 return; 1714 } 1715 1716 p->stats.msg_sent_notification++; 1717 p->stats.last_sent_errcode = errcode; 1718 p->stats.last_sent_suberr = subcode; 1719 } 1720 1721 int 1722 session_neighbor_rrefresh(struct peer *p) 1723 { 1724 uint8_t i; 1725 1726 if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr)) 1727 return (-1); 1728 1729 for (i = 0; i < AID_MAX; i++) { 1730 if (p->capa.neg.mp[i] != 0) 1731 session_rrefresh(p, i, ROUTE_REFRESH_REQUEST); 1732 } 1733 1734 return (0); 1735 } 1736 1737 void 1738 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype) 1739 { 1740 struct bgp_msg *buf; 1741 int errs = 0; 1742 uint16_t afi; 1743 uint8_t safi; 1744 1745 switch (subtype) { 1746 case ROUTE_REFRESH_REQUEST: 1747 p->stats.refresh_sent_req++; 1748 break; 1749 case ROUTE_REFRESH_BEGIN_RR: 1750 case ROUTE_REFRESH_END_RR: 1751 /* requires enhanced route refresh */ 1752 if (!p->capa.neg.enhanced_rr) 1753 return; 1754 if (subtype == ROUTE_REFRESH_BEGIN_RR) 1755 p->stats.refresh_sent_borr++; 1756 else 1757 p->stats.refresh_sent_eorr++; 1758 break; 1759 default: 1760 fatalx("session_rrefresh: bad subtype %d", subtype); 1761 } 1762 1763 if (aid2afi(aid, &afi, &safi) == -1) 1764 fatalx("session_rrefresh: bad afi/safi pair"); 1765 1766 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1767 bgp_fsm(p, EVNT_CON_FATAL); 1768 return; 1769 } 1770 1771 afi = htons(afi); 1772 errs += ibuf_add(buf->buf, &afi, sizeof(afi)); 1773 errs += ibuf_add(buf->buf, &subtype, sizeof(subtype)); 1774 errs += ibuf_add(buf->buf, &safi, sizeof(safi)); 1775 1776 if (errs) { 1777 ibuf_free(buf->buf); 1778 free(buf); 1779 bgp_fsm(p, EVNT_CON_FATAL); 1780 return; 1781 } 1782 1783 if (session_sendmsg(buf, p) == -1) { 1784 bgp_fsm(p, EVNT_CON_FATAL); 1785 return; 1786 } 1787 1788 p->stats.msg_sent_rrefresh++; 1789 } 1790 1791 int 1792 session_graceful_restart(struct peer *p) 1793 { 1794 uint8_t i; 1795 1796 timer_set(&p->timers, Timer_RestartTimeout, 1797 p->capa.neg.grestart.timeout); 1798 1799 for (i = 0; i < AID_MAX; i++) { 1800 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1801 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, 1802 &i, sizeof(i)) == -1) 1803 return (-1); 1804 log_peer_warnx(&p->conf, 1805 "graceful restart of %s, keeping routes", 1806 aid2str(i)); 1807 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1808 } else if (p->capa.neg.mp[i]) { 1809 if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id, 1810 &i, sizeof(i)) == -1) 1811 return (-1); 1812 log_peer_warnx(&p->conf, 1813 "graceful restart of %s, flushing routes", 1814 aid2str(i)); 1815 } 1816 } 1817 return (0); 1818 } 1819 1820 int 1821 session_graceful_stop(struct peer *p) 1822 { 1823 uint8_t i; 1824 1825 for (i = 0; i < AID_MAX; i++) { 1826 /* 1827 * Only flush if the peer is restarting and the timeout fired. 1828 * In all other cases the session was already flushed when the 1829 * session went down or when the new open message was parsed. 1830 */ 1831 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1832 log_peer_warnx(&p->conf, "graceful restart of %s, " 1833 "time-out, flushing", aid2str(i)); 1834 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1835 &i, sizeof(i)) == -1) 1836 return (-1); 1837 } 1838 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1839 } 1840 return (0); 1841 } 1842 1843 int 1844 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1845 { 1846 ssize_t n; 1847 socklen_t len; 1848 int error; 1849 1850 if (p->state == STATE_CONNECT) { 1851 if (pfd->revents & POLLOUT) { 1852 if (pfd->revents & POLLIN) { 1853 /* error occurred */ 1854 len = sizeof(error); 1855 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1856 &error, &len) == -1 || error) { 1857 if (error) 1858 errno = error; 1859 if (errno != p->lasterr) { 1860 log_peer_warn(&p->conf, 1861 "socket error"); 1862 p->lasterr = errno; 1863 } 1864 bgp_fsm(p, EVNT_CON_OPENFAIL); 1865 return (1); 1866 } 1867 } 1868 bgp_fsm(p, EVNT_CON_OPEN); 1869 return (1); 1870 } 1871 if (pfd->revents & POLLHUP) { 1872 bgp_fsm(p, EVNT_CON_OPENFAIL); 1873 return (1); 1874 } 1875 if (pfd->revents & (POLLERR|POLLNVAL)) { 1876 bgp_fsm(p, EVNT_CON_FATAL); 1877 return (1); 1878 } 1879 return (0); 1880 } 1881 1882 if (pfd->revents & POLLHUP) { 1883 bgp_fsm(p, EVNT_CON_CLOSED); 1884 return (1); 1885 } 1886 if (pfd->revents & (POLLERR|POLLNVAL)) { 1887 bgp_fsm(p, EVNT_CON_FATAL); 1888 return (1); 1889 } 1890 1891 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1892 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1893 if (error == 0) 1894 log_peer_warnx(&p->conf, "Connection closed"); 1895 else if (error == -1) 1896 log_peer_warn(&p->conf, "write error"); 1897 bgp_fsm(p, EVNT_CON_FATAL); 1898 return (1); 1899 } 1900 p->stats.last_write = getmonotime(); 1901 if (p->holdtime > 0) 1902 timer_set(&p->timers, Timer_SendHold, 1903 p->holdtime < INTERVAL_HOLD ? INTERVAL_HOLD : 1904 p->holdtime); 1905 if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) { 1906 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) 1907 log_peer_warn(&p->conf, "imsg_compose XON"); 1908 else 1909 p->throttled = 0; 1910 } 1911 if (!(pfd->revents & POLLIN)) 1912 return (1); 1913 } 1914 1915 if (p->rbuf && pfd->revents & POLLIN) { 1916 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1917 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1918 if (errno != EINTR && errno != EAGAIN) { 1919 log_peer_warn(&p->conf, "read error"); 1920 bgp_fsm(p, EVNT_CON_FATAL); 1921 } 1922 return (1); 1923 } 1924 if (n == 0) { /* connection closed */ 1925 bgp_fsm(p, EVNT_CON_CLOSED); 1926 return (1); 1927 } 1928 1929 p->rbuf->wpos += n; 1930 p->stats.last_read = getmonotime(); 1931 return (1); 1932 } 1933 return (0); 1934 } 1935 1936 void 1937 session_process_msg(struct peer *p) 1938 { 1939 struct mrt *mrt; 1940 ssize_t rpos, av, left; 1941 int processed = 0; 1942 uint16_t msglen; 1943 uint8_t msgtype; 1944 1945 rpos = 0; 1946 av = p->rbuf->wpos; 1947 p->rpending = 0; 1948 1949 /* 1950 * session might drop to IDLE -> buffers deallocated 1951 * we MUST check rbuf != NULL before use 1952 */ 1953 for (;;) { 1954 if (p->rbuf == NULL) 1955 return; 1956 if (rpos + MSGSIZE_HEADER > av) 1957 break; 1958 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 1959 &msgtype) == -1) 1960 return; 1961 if (rpos + msglen > av) 1962 break; 1963 p->rbuf->rptr = p->rbuf->buf + rpos; 1964 1965 /* dump to MRT as soon as we have a full packet */ 1966 LIST_FOREACH(mrt, &mrthead, entry) { 1967 if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE && 1968 mrt->type == MRT_UPDATE_IN))) 1969 continue; 1970 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1971 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1972 mrt->group_id == p->conf.groupid)) 1973 mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p, 1974 msgtype); 1975 } 1976 1977 switch (msgtype) { 1978 case OPEN: 1979 bgp_fsm(p, EVNT_RCVD_OPEN); 1980 p->stats.msg_rcvd_open++; 1981 break; 1982 case UPDATE: 1983 bgp_fsm(p, EVNT_RCVD_UPDATE); 1984 p->stats.msg_rcvd_update++; 1985 break; 1986 case NOTIFICATION: 1987 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 1988 p->stats.msg_rcvd_notification++; 1989 break; 1990 case KEEPALIVE: 1991 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 1992 p->stats.msg_rcvd_keepalive++; 1993 break; 1994 case RREFRESH: 1995 parse_rrefresh(p); 1996 p->stats.msg_rcvd_rrefresh++; 1997 break; 1998 default: /* cannot happen */ 1999 session_notification(p, ERR_HEADER, ERR_HDR_TYPE, 2000 &msgtype, 1); 2001 log_warnx("received message with unknown type %u", 2002 msgtype); 2003 bgp_fsm(p, EVNT_CON_FATAL); 2004 } 2005 rpos += msglen; 2006 if (++processed > MSG_PROCESS_LIMIT) { 2007 p->rpending = 1; 2008 break; 2009 } 2010 } 2011 2012 if (rpos < av) { 2013 left = av - rpos; 2014 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 2015 p->rbuf->wpos = left; 2016 } else 2017 p->rbuf->wpos = 0; 2018 } 2019 2020 int 2021 parse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type) 2022 { 2023 u_char *p; 2024 uint16_t olen; 2025 static const uint8_t marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff, 2026 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 2027 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 2028 2029 /* caller MUST make sure we are getting 19 bytes! */ 2030 p = data; 2031 if (memcmp(p, marker, sizeof(marker))) { 2032 log_peer_warnx(&peer->conf, "sync error"); 2033 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0); 2034 bgp_fsm(peer, EVNT_CON_FATAL); 2035 return (-1); 2036 } 2037 p += MSGSIZE_HEADER_MARKER; 2038 2039 memcpy(&olen, p, 2); 2040 *len = ntohs(olen); 2041 p += 2; 2042 memcpy(type, p, 1); 2043 2044 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 2045 log_peer_warnx(&peer->conf, 2046 "received message: illegal length: %u byte", *len); 2047 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2048 &olen, sizeof(olen)); 2049 bgp_fsm(peer, EVNT_CON_FATAL); 2050 return (-1); 2051 } 2052 2053 switch (*type) { 2054 case OPEN: 2055 if (*len < MSGSIZE_OPEN_MIN) { 2056 log_peer_warnx(&peer->conf, 2057 "received OPEN: illegal len: %u byte", *len); 2058 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2059 &olen, sizeof(olen)); 2060 bgp_fsm(peer, EVNT_CON_FATAL); 2061 return (-1); 2062 } 2063 break; 2064 case NOTIFICATION: 2065 if (*len < MSGSIZE_NOTIFICATION_MIN) { 2066 log_peer_warnx(&peer->conf, 2067 "received NOTIFICATION: illegal len: %u byte", 2068 *len); 2069 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2070 &olen, sizeof(olen)); 2071 bgp_fsm(peer, EVNT_CON_FATAL); 2072 return (-1); 2073 } 2074 break; 2075 case UPDATE: 2076 if (*len < MSGSIZE_UPDATE_MIN) { 2077 log_peer_warnx(&peer->conf, 2078 "received UPDATE: illegal len: %u byte", *len); 2079 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2080 &olen, sizeof(olen)); 2081 bgp_fsm(peer, EVNT_CON_FATAL); 2082 return (-1); 2083 } 2084 break; 2085 case KEEPALIVE: 2086 if (*len != MSGSIZE_KEEPALIVE) { 2087 log_peer_warnx(&peer->conf, 2088 "received KEEPALIVE: illegal len: %u byte", *len); 2089 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2090 &olen, sizeof(olen)); 2091 bgp_fsm(peer, EVNT_CON_FATAL); 2092 return (-1); 2093 } 2094 break; 2095 case RREFRESH: 2096 if (*len < MSGSIZE_RREFRESH_MIN) { 2097 log_peer_warnx(&peer->conf, 2098 "received RREFRESH: illegal len: %u byte", *len); 2099 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2100 &olen, sizeof(olen)); 2101 bgp_fsm(peer, EVNT_CON_FATAL); 2102 return (-1); 2103 } 2104 break; 2105 default: 2106 log_peer_warnx(&peer->conf, 2107 "received msg with unknown type %u", *type); 2108 session_notification(peer, ERR_HEADER, ERR_HDR_TYPE, 2109 type, 1); 2110 bgp_fsm(peer, EVNT_CON_FATAL); 2111 return (-1); 2112 } 2113 return (0); 2114 } 2115 2116 int 2117 parse_open(struct peer *peer) 2118 { 2119 u_char *p, *op_val; 2120 uint8_t version, rversion; 2121 uint16_t short_as, msglen; 2122 uint16_t holdtime, oholdtime, myholdtime; 2123 uint32_t as, bgpid; 2124 uint16_t optparamlen, extlen, plen, op_len; 2125 uint8_t op_type, suberr = 0; 2126 2127 p = peer->rbuf->rptr; 2128 p += MSGSIZE_HEADER_MARKER; 2129 memcpy(&msglen, p, sizeof(msglen)); 2130 msglen = ntohs(msglen); 2131 2132 p = peer->rbuf->rptr; 2133 p += MSGSIZE_HEADER; /* header is already checked */ 2134 2135 memcpy(&version, p, sizeof(version)); 2136 p += sizeof(version); 2137 2138 if (version != BGP_VERSION) { 2139 log_peer_warnx(&peer->conf, 2140 "peer wants unrecognized version %u", version); 2141 if (version > BGP_VERSION) 2142 rversion = version - BGP_VERSION; 2143 else 2144 rversion = BGP_VERSION; 2145 session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION, 2146 &rversion, sizeof(rversion)); 2147 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2148 return (-1); 2149 } 2150 2151 memcpy(&short_as, p, sizeof(short_as)); 2152 p += sizeof(short_as); 2153 as = peer->short_as = ntohs(short_as); 2154 if (as == 0) { 2155 log_peer_warnx(&peer->conf, 2156 "peer requests unacceptable AS %u", as); 2157 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, 2158 NULL, 0); 2159 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2160 return (-1); 2161 } 2162 2163 memcpy(&oholdtime, p, sizeof(oholdtime)); 2164 p += sizeof(oholdtime); 2165 2166 holdtime = ntohs(oholdtime); 2167 if (holdtime && holdtime < peer->conf.min_holdtime) { 2168 log_peer_warnx(&peer->conf, 2169 "peer requests unacceptable holdtime %u", holdtime); 2170 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, 2171 NULL, 0); 2172 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2173 return (-1); 2174 } 2175 2176 myholdtime = peer->conf.holdtime; 2177 if (!myholdtime) 2178 myholdtime = conf->holdtime; 2179 if (holdtime < myholdtime) 2180 peer->holdtime = holdtime; 2181 else 2182 peer->holdtime = myholdtime; 2183 2184 memcpy(&bgpid, p, sizeof(bgpid)); 2185 p += sizeof(bgpid); 2186 2187 /* check bgpid for validity - just disallow 0 */ 2188 if (ntohl(bgpid) == 0) { 2189 log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable", 2190 ntohl(bgpid)); 2191 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2192 NULL, 0); 2193 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2194 return (-1); 2195 } 2196 peer->remote_bgpid = bgpid; 2197 2198 extlen = 0; 2199 optparamlen = *p++; 2200 2201 if (optparamlen == 0) { 2202 if (msglen != MSGSIZE_OPEN_MIN) { 2203 bad_len: 2204 log_peer_warnx(&peer->conf, 2205 "corrupt OPEN message received: length mismatch"); 2206 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2207 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2208 return (-1); 2209 } 2210 } else { 2211 if (msglen < MSGSIZE_OPEN_MIN + 1) 2212 goto bad_len; 2213 2214 op_type = *p; 2215 if (op_type == OPT_PARAM_EXT_LEN) { 2216 p++; 2217 memcpy(&optparamlen, p, sizeof(optparamlen)); 2218 optparamlen = ntohs(optparamlen); 2219 p += sizeof(optparamlen); 2220 extlen = 1; 2221 } 2222 2223 /* RFC9020 encoding has 3 extra bytes */ 2224 if (optparamlen + 3 * extlen != msglen - MSGSIZE_OPEN_MIN) 2225 goto bad_len; 2226 } 2227 2228 plen = optparamlen; 2229 while (plen > 0) { 2230 if (plen < 2 + extlen) 2231 goto bad_len; 2232 2233 memcpy(&op_type, p, sizeof(op_type)); 2234 p += sizeof(op_type); 2235 plen -= sizeof(op_type); 2236 if (!extlen) { 2237 op_len = *p++; 2238 plen--; 2239 } else { 2240 memcpy(&op_len, p, sizeof(op_len)); 2241 op_len = ntohs(op_len); 2242 p += sizeof(op_len); 2243 plen -= sizeof(op_len); 2244 } 2245 if (op_len > 0) { 2246 if (plen < op_len) 2247 goto bad_len; 2248 op_val = p; 2249 p += op_len; 2250 plen -= op_len; 2251 } else 2252 op_val = NULL; 2253 2254 switch (op_type) { 2255 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2256 if (parse_capabilities(peer, op_val, op_len, 2257 &as) == -1) { 2258 session_notification(peer, ERR_OPEN, 0, 2259 NULL, 0); 2260 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2261 return (-1); 2262 } 2263 break; 2264 case OPT_PARAM_AUTH: /* deprecated */ 2265 default: 2266 /* 2267 * unsupported type 2268 * the RFCs tell us to leave the data section empty 2269 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT. 2270 * How the peer should know _which_ optional parameter 2271 * we don't support is beyond me. 2272 */ 2273 log_peer_warnx(&peer->conf, 2274 "received OPEN message with unsupported optional " 2275 "parameter: type %u", op_type); 2276 session_notification(peer, ERR_OPEN, ERR_OPEN_OPT, 2277 NULL, 0); 2278 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2279 /* no punish */ 2280 timer_set(&peer->timers, Timer_IdleHold, 0); 2281 peer->IdleHoldTime /= 2; 2282 return (-1); 2283 } 2284 } 2285 2286 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2287 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2288 peer->conf.remote_as = as; 2289 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); 2290 if (!peer->conf.ebgp) 2291 /* force enforce_as off for iBGP sessions */ 2292 peer->conf.enforce_as = ENFORCE_AS_OFF; 2293 } 2294 2295 if (peer->conf.remote_as != as) { 2296 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2297 log_as(as)); 2298 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0); 2299 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2300 return (-1); 2301 } 2302 2303 /* on iBGP sessions check for bgpid collision */ 2304 if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) { 2305 log_peer_warnx(&peer->conf, "peer BGPID %u conflicts with ours", 2306 ntohl(bgpid)); 2307 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2308 NULL, 0); 2309 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2310 return (-1); 2311 } 2312 2313 if (capa_neg_calc(peer, &suberr) == -1) { 2314 session_notification(peer, ERR_OPEN, suberr, NULL, 0); 2315 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2316 return (-1); 2317 } 2318 2319 return (0); 2320 } 2321 2322 int 2323 parse_update(struct peer *peer) 2324 { 2325 u_char *p; 2326 uint16_t datalen; 2327 2328 /* 2329 * we pass the message verbatim to the rde. 2330 * in case of errors the whole session is reset with a 2331 * notification anyway, we only need to know the peer 2332 */ 2333 p = peer->rbuf->rptr; 2334 p += MSGSIZE_HEADER_MARKER; 2335 memcpy(&datalen, p, sizeof(datalen)); 2336 datalen = ntohs(datalen); 2337 2338 p = peer->rbuf->rptr; 2339 p += MSGSIZE_HEADER; /* header is already checked */ 2340 datalen -= MSGSIZE_HEADER; 2341 2342 if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1) 2343 return (-1); 2344 2345 return (0); 2346 } 2347 2348 int 2349 parse_rrefresh(struct peer *peer) 2350 { 2351 struct route_refresh rr; 2352 uint16_t afi, datalen; 2353 uint8_t aid, safi, subtype; 2354 u_char *p; 2355 2356 p = peer->rbuf->rptr; 2357 p += MSGSIZE_HEADER_MARKER; 2358 memcpy(&datalen, p, sizeof(datalen)); 2359 datalen = ntohs(datalen); 2360 2361 p = peer->rbuf->rptr; 2362 p += MSGSIZE_HEADER; /* header is already checked */ 2363 2364 /* 2365 * We could check if we actually announced the capability but 2366 * as long as the message is correctly encoded we don't care. 2367 */ 2368 2369 /* afi, 2 byte */ 2370 memcpy(&afi, p, sizeof(afi)); 2371 afi = ntohs(afi); 2372 p += 2; 2373 /* subtype, 1 byte */ 2374 subtype = *p; 2375 p += 1; 2376 /* safi, 1 byte */ 2377 safi = *p; 2378 2379 /* check subtype if peer announced enhanced route refresh */ 2380 if (peer->capa.neg.enhanced_rr) { 2381 switch (subtype) { 2382 case ROUTE_REFRESH_REQUEST: 2383 /* no ORF support, so no oversized RREFRESH msgs */ 2384 if (datalen != MSGSIZE_RREFRESH) { 2385 log_peer_warnx(&peer->conf, 2386 "received RREFRESH: illegal len: %u byte", 2387 datalen); 2388 datalen = htons(datalen); 2389 session_notification(peer, ERR_HEADER, 2390 ERR_HDR_LEN, &datalen, sizeof(datalen)); 2391 bgp_fsm(peer, EVNT_CON_FATAL); 2392 return (-1); 2393 } 2394 peer->stats.refresh_rcvd_req++; 2395 break; 2396 case ROUTE_REFRESH_BEGIN_RR: 2397 case ROUTE_REFRESH_END_RR: 2398 /* special handling for RFC7313 */ 2399 if (datalen != MSGSIZE_RREFRESH) { 2400 log_peer_warnx(&peer->conf, 2401 "received RREFRESH: illegal len: %u byte", 2402 datalen); 2403 p = peer->rbuf->rptr; 2404 p += MSGSIZE_HEADER; 2405 datalen -= MSGSIZE_HEADER; 2406 session_notification(peer, ERR_RREFRESH, 2407 ERR_RR_INV_LEN, p, datalen); 2408 bgp_fsm(peer, EVNT_CON_FATAL); 2409 return (-1); 2410 } 2411 if (subtype == ROUTE_REFRESH_BEGIN_RR) 2412 peer->stats.refresh_rcvd_borr++; 2413 else 2414 peer->stats.refresh_rcvd_eorr++; 2415 break; 2416 default: 2417 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2418 "bad subtype %d", subtype); 2419 return (0); 2420 } 2421 } else { 2422 /* force subtype to default */ 2423 subtype = ROUTE_REFRESH_REQUEST; 2424 peer->stats.refresh_rcvd_req++; 2425 } 2426 2427 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2428 if (afi2aid(afi, safi, &aid) == -1) { 2429 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2430 "invalid afi/safi pair"); 2431 return (0); 2432 } 2433 2434 if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) { 2435 log_peer_warnx(&peer->conf, "peer sent unexpected refresh"); 2436 return (0); 2437 } 2438 2439 rr.aid = aid; 2440 rr.subtype = subtype; 2441 2442 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1) 2443 return (-1); 2444 2445 return (0); 2446 } 2447 2448 int 2449 parse_notification(struct peer *peer) 2450 { 2451 u_char *p; 2452 uint16_t datalen; 2453 uint8_t errcode; 2454 uint8_t subcode; 2455 uint8_t capa_code; 2456 uint8_t capa_len; 2457 size_t reason_len; 2458 uint8_t i; 2459 2460 /* just log */ 2461 p = peer->rbuf->rptr; 2462 p += MSGSIZE_HEADER_MARKER; 2463 memcpy(&datalen, p, sizeof(datalen)); 2464 datalen = ntohs(datalen); 2465 2466 p = peer->rbuf->rptr; 2467 p += MSGSIZE_HEADER; /* header is already checked */ 2468 datalen -= MSGSIZE_HEADER; 2469 2470 memcpy(&errcode, p, sizeof(errcode)); 2471 p += sizeof(errcode); 2472 datalen -= sizeof(errcode); 2473 2474 memcpy(&subcode, p, sizeof(subcode)); 2475 p += sizeof(subcode); 2476 datalen -= sizeof(subcode); 2477 2478 log_notification(peer, errcode, subcode, p, datalen, "received"); 2479 peer->errcnt++; 2480 peer->stats.last_rcvd_errcode = errcode; 2481 peer->stats.last_rcvd_suberr = subcode; 2482 2483 if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { 2484 if (datalen == 0) { /* zebra likes to send those.. humbug */ 2485 log_peer_warnx(&peer->conf, "received \"unsupported " 2486 "capability\" notification without data part, " 2487 "disabling capability announcements altogether"); 2488 session_capa_ann_none(peer); 2489 } 2490 2491 while (datalen > 0) { 2492 if (datalen < 2) { 2493 log_peer_warnx(&peer->conf, 2494 "parse_notification: " 2495 "expect len >= 2, len is %u", datalen); 2496 return (-1); 2497 } 2498 memcpy(&capa_code, p, sizeof(capa_code)); 2499 p += sizeof(capa_code); 2500 datalen -= sizeof(capa_code); 2501 memcpy(&capa_len, p, sizeof(capa_len)); 2502 p += sizeof(capa_len); 2503 datalen -= sizeof(capa_len); 2504 if (datalen < capa_len) { 2505 log_peer_warnx(&peer->conf, 2506 "parse_notification: capa_len %u exceeds " 2507 "remaining msg length %u", capa_len, 2508 datalen); 2509 return (-1); 2510 } 2511 p += capa_len; 2512 datalen -= capa_len; 2513 switch (capa_code) { 2514 case CAPA_MP: 2515 for (i = 0; i < AID_MAX; i++) 2516 peer->capa.ann.mp[i] = 0; 2517 log_peer_warnx(&peer->conf, 2518 "disabling multiprotocol capability"); 2519 break; 2520 case CAPA_REFRESH: 2521 peer->capa.ann.refresh = 0; 2522 log_peer_warnx(&peer->conf, 2523 "disabling route refresh capability"); 2524 break; 2525 case CAPA_ROLE: 2526 peer->capa.ann.role_ena = 0; 2527 log_peer_warnx(&peer->conf, 2528 "disabling role capability"); 2529 break; 2530 case CAPA_RESTART: 2531 peer->capa.ann.grestart.restart = 0; 2532 log_peer_warnx(&peer->conf, 2533 "disabling restart capability"); 2534 break; 2535 case CAPA_AS4BYTE: 2536 peer->capa.ann.as4byte = 0; 2537 log_peer_warnx(&peer->conf, 2538 "disabling 4-byte AS num capability"); 2539 break; 2540 case CAPA_ADD_PATH: 2541 memset(peer->capa.ann.add_path, 0, 2542 sizeof(peer->capa.ann.add_path)); 2543 log_peer_warnx(&peer->conf, 2544 "disabling ADD-PATH capability"); 2545 break; 2546 case CAPA_ENHANCED_RR: 2547 peer->capa.ann.enhanced_rr = 0; 2548 log_peer_warnx(&peer->conf, 2549 "disabling enhanced route refresh " 2550 "capability"); 2551 break; 2552 default: /* should not happen... */ 2553 log_peer_warnx(&peer->conf, "received " 2554 "\"unsupported capability\" notification " 2555 "for unknown capability %u, disabling " 2556 "capability announcements altogether", 2557 capa_code); 2558 session_capa_ann_none(peer); 2559 break; 2560 } 2561 } 2562 2563 return (1); 2564 } 2565 2566 if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) { 2567 session_capa_ann_none(peer); 2568 return (1); 2569 } 2570 2571 if (errcode == ERR_CEASE && 2572 (subcode == ERR_CEASE_ADMIN_DOWN || 2573 subcode == ERR_CEASE_ADMIN_RESET)) { 2574 if (datalen > 1) { 2575 reason_len = *p++; 2576 datalen--; 2577 if (datalen < reason_len) { 2578 log_peer_warnx(&peer->conf, 2579 "received truncated shutdown reason"); 2580 return (0); 2581 } 2582 if (reason_len > REASON_LEN - 1) { 2583 log_peer_warnx(&peer->conf, 2584 "received overly long shutdown reason"); 2585 return (0); 2586 } 2587 memcpy(peer->stats.last_reason, p, reason_len); 2588 peer->stats.last_reason[reason_len] = '\0'; 2589 log_peer_warnx(&peer->conf, 2590 "received shutdown reason: \"%s\"", 2591 log_reason(peer->stats.last_reason)); 2592 p += reason_len; 2593 datalen -= reason_len; 2594 } 2595 } 2596 2597 return (0); 2598 } 2599 2600 int 2601 parse_capabilities(struct peer *peer, u_char *d, uint16_t dlen, uint32_t *as) 2602 { 2603 u_char *capa_val; 2604 uint32_t remote_as; 2605 uint16_t len; 2606 uint16_t afi; 2607 uint16_t gr_header; 2608 uint8_t safi; 2609 uint8_t aid; 2610 uint8_t flags; 2611 uint8_t capa_code; 2612 uint8_t capa_len; 2613 uint8_t i; 2614 2615 len = dlen; 2616 while (len > 0) { 2617 if (len < 2) { 2618 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2619 "length: %u, too short", len); 2620 return (-1); 2621 } 2622 memcpy(&capa_code, d, sizeof(capa_code)); 2623 d += sizeof(capa_code); 2624 len -= sizeof(capa_code); 2625 memcpy(&capa_len, d, sizeof(capa_len)); 2626 d += sizeof(capa_len); 2627 len -= sizeof(capa_len); 2628 if (capa_len > 0) { 2629 if (len < capa_len) { 2630 log_peer_warnx(&peer->conf, 2631 "Bad capabilities attr length: " 2632 "len %u smaller than capa_len %u", 2633 len, capa_len); 2634 return (-1); 2635 } 2636 capa_val = d; 2637 d += capa_len; 2638 len -= capa_len; 2639 } else 2640 capa_val = NULL; 2641 2642 switch (capa_code) { 2643 case CAPA_MP: /* RFC 4760 */ 2644 if (capa_len != 4) { 2645 log_peer_warnx(&peer->conf, 2646 "Bad multi protocol capability length: " 2647 "%u", capa_len); 2648 break; 2649 } 2650 memcpy(&afi, capa_val, sizeof(afi)); 2651 afi = ntohs(afi); 2652 memcpy(&safi, capa_val + 3, sizeof(safi)); 2653 if (afi2aid(afi, safi, &aid) == -1) { 2654 log_peer_warnx(&peer->conf, 2655 "Received multi protocol capability: " 2656 " unknown AFI %u, safi %u pair", 2657 afi, safi); 2658 break; 2659 } 2660 peer->capa.peer.mp[aid] = 1; 2661 break; 2662 case CAPA_REFRESH: 2663 peer->capa.peer.refresh = 1; 2664 break; 2665 case CAPA_ROLE: 2666 if (capa_len != 1) { 2667 log_peer_warnx(&peer->conf, 2668 "Bad role capability length: %u", capa_len); 2669 break; 2670 } 2671 if (!peer->conf.ebgp) 2672 log_peer_warnx(&peer->conf, 2673 "Received role capability on iBGP session"); 2674 peer->capa.peer.role_ena = 1; 2675 peer->capa.peer.role = capa2role(*capa_val); 2676 break; 2677 case CAPA_RESTART: 2678 if (capa_len == 2) { 2679 /* peer only supports EoR marker */ 2680 peer->capa.peer.grestart.restart = 1; 2681 peer->capa.peer.grestart.timeout = 0; 2682 break; 2683 } else if (capa_len % 4 != 2) { 2684 log_peer_warnx(&peer->conf, 2685 "Bad graceful restart capability length: " 2686 "%u", capa_len); 2687 peer->capa.peer.grestart.restart = 0; 2688 peer->capa.peer.grestart.timeout = 0; 2689 break; 2690 } 2691 2692 memcpy(&gr_header, capa_val, sizeof(gr_header)); 2693 gr_header = ntohs(gr_header); 2694 peer->capa.peer.grestart.timeout = 2695 gr_header & CAPA_GR_TIMEMASK; 2696 if (peer->capa.peer.grestart.timeout == 0) { 2697 log_peer_warnx(&peer->conf, "Received " 2698 "graceful restart timeout is zero"); 2699 peer->capa.peer.grestart.restart = 0; 2700 break; 2701 } 2702 2703 for (i = 2; i <= capa_len - 4; i += 4) { 2704 memcpy(&afi, capa_val + i, sizeof(afi)); 2705 afi = ntohs(afi); 2706 safi = capa_val[i + 2]; 2707 flags = capa_val[i + 3]; 2708 if (afi2aid(afi, safi, &aid) == -1) { 2709 log_peer_warnx(&peer->conf, 2710 "Received graceful restart capa: " 2711 " unknown AFI %u, safi %u pair", 2712 afi, safi); 2713 continue; 2714 } 2715 peer->capa.peer.grestart.flags[aid] |= 2716 CAPA_GR_PRESENT; 2717 if (flags & CAPA_GR_F_FLAG) 2718 peer->capa.peer.grestart.flags[aid] |= 2719 CAPA_GR_FORWARD; 2720 if (gr_header & CAPA_GR_R_FLAG) 2721 peer->capa.peer.grestart.flags[aid] |= 2722 CAPA_GR_RESTART; 2723 peer->capa.peer.grestart.restart = 2; 2724 } 2725 break; 2726 case CAPA_AS4BYTE: 2727 if (capa_len != 4) { 2728 log_peer_warnx(&peer->conf, 2729 "Bad AS4BYTE capability length: " 2730 "%u", capa_len); 2731 peer->capa.peer.as4byte = 0; 2732 break; 2733 } 2734 memcpy(&remote_as, capa_val, sizeof(remote_as)); 2735 *as = ntohl(remote_as); 2736 if (*as == 0) { 2737 log_peer_warnx(&peer->conf, 2738 "peer requests unacceptable AS %u", *as); 2739 session_notification(peer, ERR_OPEN, 2740 ERR_OPEN_AS, NULL, 0); 2741 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2742 return (-1); 2743 } 2744 peer->capa.peer.as4byte = 1; 2745 break; 2746 case CAPA_ADD_PATH: 2747 if (capa_len % 4 != 0) { 2748 log_peer_warnx(&peer->conf, 2749 "Bad ADD-PATH capability length: " 2750 "%u", capa_len); 2751 memset(peer->capa.peer.add_path, 0, 2752 sizeof(peer->capa.peer.add_path)); 2753 break; 2754 } 2755 for (i = 0; i <= capa_len - 4; i += 4) { 2756 memcpy(&afi, capa_val + i, sizeof(afi)); 2757 afi = ntohs(afi); 2758 safi = capa_val[i + 2]; 2759 flags = capa_val[i + 3]; 2760 if (afi2aid(afi, safi, &aid) == -1) { 2761 log_peer_warnx(&peer->conf, 2762 "Received ADD-PATH capa: " 2763 " unknown AFI %u, safi %u pair", 2764 afi, safi); 2765 memset(peer->capa.peer.add_path, 0, 2766 sizeof(peer->capa.peer.add_path)); 2767 break; 2768 } 2769 if (flags & ~CAPA_AP_BIDIR) { 2770 log_peer_warnx(&peer->conf, 2771 "Received ADD-PATH capa: " 2772 " bad flags %x", flags); 2773 memset(peer->capa.peer.add_path, 0, 2774 sizeof(peer->capa.peer.add_path)); 2775 break; 2776 } 2777 peer->capa.peer.add_path[aid] = flags; 2778 } 2779 break; 2780 case CAPA_ENHANCED_RR: 2781 peer->capa.peer.enhanced_rr = 1; 2782 break; 2783 default: 2784 break; 2785 } 2786 } 2787 2788 return (0); 2789 } 2790 2791 int 2792 capa_neg_calc(struct peer *p, uint8_t *suberr) 2793 { 2794 uint8_t i, hasmp = 0; 2795 2796 /* a capability is accepted only if both sides announced it */ 2797 2798 p->capa.neg.refresh = 2799 (p->capa.ann.refresh && p->capa.peer.refresh) != 0; 2800 p->capa.neg.enhanced_rr = 2801 (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; 2802 2803 p->capa.neg.as4byte = 2804 (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; 2805 2806 /* MP: both side must agree on the AFI,SAFI pair */ 2807 for (i = 0; i < AID_MAX; i++) { 2808 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) 2809 p->capa.neg.mp[i] = 1; 2810 else 2811 p->capa.neg.mp[i] = 0; 2812 if (p->capa.ann.mp[i]) 2813 hasmp = 1; 2814 } 2815 /* if no MP capability present default to IPv4 unicast mode */ 2816 if (!hasmp) 2817 p->capa.neg.mp[AID_INET] = 1; 2818 2819 /* 2820 * graceful restart: the peer capabilities are of interest here. 2821 * It is necessary to compare the new values with the previous ones 2822 * and act accordingly. AFI/SAFI that are not part in the MP capability 2823 * are treated as not being present. 2824 * Also make sure that a flush happens if the session stopped 2825 * supporting graceful restart. 2826 */ 2827 2828 for (i = 0; i < AID_MAX; i++) { 2829 int8_t negflags; 2830 2831 /* disable GR if the AFI/SAFI is not present */ 2832 if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2833 p->capa.neg.mp[i] == 0)) 2834 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2835 /* look at current GR state and decide what to do */ 2836 negflags = p->capa.neg.grestart.flags[i]; 2837 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2838 if (negflags & CAPA_GR_RESTARTING) { 2839 if (p->capa.ann.grestart.restart != 0 && 2840 p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { 2841 p->capa.neg.grestart.flags[i] |= 2842 CAPA_GR_RESTARTING; 2843 } else { 2844 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 2845 &i, sizeof(i)) == -1) { 2846 log_peer_warnx(&p->conf, 2847 "imsg send failed"); 2848 return (-1); 2849 } 2850 log_peer_warnx(&p->conf, "graceful restart of " 2851 "%s, not restarted, flushing", aid2str(i)); 2852 } 2853 } 2854 } 2855 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2856 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2857 if (p->capa.ann.grestart.restart == 0) 2858 p->capa.neg.grestart.restart = 0; 2859 2860 2861 /* 2862 * ADD-PATH: set only those bits where both sides agree. 2863 * For this compare our send bit with the recv bit from the peer 2864 * and vice versa. 2865 * The flags are stored from this systems view point. 2866 */ 2867 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); 2868 if (p->capa.ann.add_path[0]) { 2869 for (i = AID_MIN; i < AID_MAX; i++) { 2870 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) && 2871 (p->capa.peer.add_path[i] & CAPA_AP_SEND)) { 2872 p->capa.neg.add_path[i] |= CAPA_AP_RECV; 2873 p->capa.neg.add_path[0] |= CAPA_AP_RECV; 2874 } 2875 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) && 2876 (p->capa.peer.add_path[i] & CAPA_AP_RECV)) { 2877 p->capa.neg.add_path[i] |= CAPA_AP_SEND; 2878 p->capa.neg.add_path[0] |= CAPA_AP_SEND; 2879 } 2880 } 2881 } 2882 2883 /* 2884 * Open policy: check that the policy is sensible. 2885 * 2886 * Make sure that the roles match and set the negotiated capability 2887 * to the role of the peer. So the RDE can inject the OTC attribute. 2888 * See RFC 9234, section 4.2. 2889 * These checks should only happen on ebgp sessions. 2890 */ 2891 if (p->capa.ann.role_ena != 0 && p->capa.peer.role_ena != 0 && 2892 p->conf.ebgp) { 2893 switch (p->capa.ann.role) { 2894 case ROLE_PROVIDER: 2895 if (p->capa.peer.role != ROLE_CUSTOMER) 2896 goto fail; 2897 break; 2898 case ROLE_RS: 2899 if (p->capa.peer.role != ROLE_RS_CLIENT) 2900 goto fail; 2901 break; 2902 case ROLE_RS_CLIENT: 2903 if (p->capa.peer.role != ROLE_RS) 2904 goto fail; 2905 break; 2906 case ROLE_CUSTOMER: 2907 if (p->capa.peer.role != ROLE_PROVIDER) 2908 goto fail; 2909 break; 2910 case ROLE_PEER: 2911 if (p->capa.peer.role != ROLE_PEER) 2912 goto fail; 2913 break; 2914 default: 2915 fail: 2916 log_peer_warnx(&p->conf, "open policy role mismatch: " 2917 "%s vs %s", log_policy(p->capa.ann.role), 2918 log_policy(p->capa.peer.role)); 2919 *suberr = ERR_OPEN_ROLE; 2920 return (-1); 2921 } 2922 p->capa.neg.role_ena = 1; 2923 p->capa.neg.role = p->capa.peer.role; 2924 } else if (p->capa.ann.role_ena == 2 && p->conf.ebgp) { 2925 /* enforce presence of open policy role capability */ 2926 log_peer_warnx(&p->conf, "open policy role enforced but " 2927 "not present"); 2928 *suberr = ERR_OPEN_ROLE; 2929 return (-1); 2930 } 2931 2932 return (0); 2933 } 2934 2935 void 2936 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) 2937 { 2938 struct imsg imsg; 2939 struct mrt xmrt; 2940 struct route_refresh rr; 2941 struct mrt *mrt; 2942 struct imsgbuf *i; 2943 struct peer *p; 2944 struct listen_addr *la, *nla; 2945 struct session_dependon *sdon; 2946 u_char *data; 2947 int n, fd, depend_ok, restricted; 2948 uint16_t t; 2949 uint8_t aid, errcode, subcode; 2950 2951 while (ibuf) { 2952 if ((n = imsg_get(ibuf, &imsg)) == -1) 2953 fatal("session_dispatch_imsg: imsg_get error"); 2954 2955 if (n == 0) 2956 break; 2957 2958 switch (imsg.hdr.type) { 2959 case IMSG_SOCKET_CONN: 2960 case IMSG_SOCKET_CONN_CTL: 2961 if (idx != PFD_PIPE_MAIN) 2962 fatalx("reconf request not from parent"); 2963 if ((fd = imsg.fd) == -1) { 2964 log_warnx("expected to receive imsg fd to " 2965 "RDE but didn't receive any"); 2966 break; 2967 } 2968 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2969 fatal(NULL); 2970 imsg_init(i, fd); 2971 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 2972 if (ibuf_rde) { 2973 log_warnx("Unexpected imsg connection " 2974 "to RDE received"); 2975 msgbuf_clear(&ibuf_rde->w); 2976 free(ibuf_rde); 2977 } 2978 ibuf_rde = i; 2979 } else { 2980 if (ibuf_rde_ctl) { 2981 log_warnx("Unexpected imsg ctl " 2982 "connection to RDE received"); 2983 msgbuf_clear(&ibuf_rde_ctl->w); 2984 free(ibuf_rde_ctl); 2985 } 2986 ibuf_rde_ctl = i; 2987 } 2988 break; 2989 case IMSG_RECONF_CONF: 2990 if (idx != PFD_PIPE_MAIN) 2991 fatalx("reconf request not from parent"); 2992 nconf = new_config(); 2993 2994 copy_config(nconf, imsg.data); 2995 pending_reconf = 1; 2996 break; 2997 case IMSG_RECONF_PEER: 2998 if (idx != PFD_PIPE_MAIN) 2999 fatalx("reconf request not from parent"); 3000 if ((p = calloc(1, sizeof(struct peer))) == NULL) 3001 fatal("new_peer"); 3002 memcpy(&p->conf, imsg.data, sizeof(struct peer_config)); 3003 p->state = p->prev_state = STATE_NONE; 3004 p->reconf_action = RECONF_REINIT; 3005 if (RB_INSERT(peer_head, &nconf->peers, p) != NULL) 3006 fatalx("%s: peer tree is corrupt", __func__); 3007 break; 3008 case IMSG_RECONF_LISTENER: 3009 if (idx != PFD_PIPE_MAIN) 3010 fatalx("reconf request not from parent"); 3011 if (nconf == NULL) 3012 fatalx("IMSG_RECONF_LISTENER but no config"); 3013 nla = imsg.data; 3014 TAILQ_FOREACH(la, conf->listen_addrs, entry) 3015 if (!la_cmp(la, nla)) 3016 break; 3017 3018 if (la == NULL) { 3019 if (nla->reconf != RECONF_REINIT) 3020 fatalx("king bula sez: " 3021 "expected REINIT"); 3022 3023 if ((nla->fd = imsg.fd) == -1) 3024 log_warnx("expected to receive fd for " 3025 "%s but didn't receive any", 3026 log_sockaddr((struct sockaddr *) 3027 &nla->sa, nla->sa_len)); 3028 3029 la = calloc(1, sizeof(struct listen_addr)); 3030 if (la == NULL) 3031 fatal(NULL); 3032 memcpy(&la->sa, &nla->sa, sizeof(la->sa)); 3033 la->flags = nla->flags; 3034 la->fd = nla->fd; 3035 la->reconf = RECONF_REINIT; 3036 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 3037 entry); 3038 } else { 3039 if (nla->reconf != RECONF_KEEP) 3040 fatalx("king bula sez: expected KEEP"); 3041 la->reconf = RECONF_KEEP; 3042 } 3043 3044 break; 3045 case IMSG_RECONF_CTRL: 3046 if (idx != PFD_PIPE_MAIN) 3047 fatalx("reconf request not from parent"); 3048 if (imsg.hdr.len != IMSG_HEADER_SIZE + 3049 sizeof(restricted)) 3050 fatalx("RECONF_CTRL imsg with wrong len"); 3051 memcpy(&restricted, imsg.data, sizeof(restricted)); 3052 if (imsg.fd == -1) { 3053 log_warnx("expected to receive fd for control " 3054 "socket but didn't receive any"); 3055 break; 3056 } 3057 if (restricted) { 3058 control_shutdown(rcsock); 3059 rcsock = imsg.fd; 3060 } else { 3061 control_shutdown(csock); 3062 csock = imsg.fd; 3063 } 3064 break; 3065 case IMSG_RECONF_DRAIN: 3066 switch (idx) { 3067 case PFD_PIPE_ROUTE: 3068 if (nconf != NULL) 3069 fatalx("got unexpected %s from RDE", 3070 "IMSG_RECONF_DONE"); 3071 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 3072 -1, NULL, 0); 3073 break; 3074 case PFD_PIPE_MAIN: 3075 if (nconf == NULL) 3076 fatalx("got unexpected %s from parent", 3077 "IMSG_RECONF_DONE"); 3078 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 3079 -1, NULL, 0); 3080 break; 3081 default: 3082 fatalx("reconf request not from parent or RDE"); 3083 } 3084 break; 3085 case IMSG_RECONF_DONE: 3086 if (idx != PFD_PIPE_MAIN) 3087 fatalx("reconf request not from parent"); 3088 if (nconf == NULL) 3089 fatalx("got IMSG_RECONF_DONE but no config"); 3090 copy_config(conf, nconf); 3091 merge_peers(conf, nconf); 3092 3093 /* delete old listeners */ 3094 for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; 3095 la = nla) { 3096 nla = TAILQ_NEXT(la, entry); 3097 if (la->reconf == RECONF_NONE) { 3098 log_info("not listening on %s any more", 3099 log_sockaddr((struct sockaddr *) 3100 &la->sa, la->sa_len)); 3101 TAILQ_REMOVE(conf->listen_addrs, la, 3102 entry); 3103 close(la->fd); 3104 free(la); 3105 } 3106 } 3107 3108 /* add new listeners */ 3109 TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs, 3110 entry); 3111 3112 setup_listeners(listener_cnt); 3113 free_config(nconf); 3114 nconf = NULL; 3115 pending_reconf = 0; 3116 log_info("SE reconfigured"); 3117 /* 3118 * IMSG_RECONF_DONE is sent when the RDE drained 3119 * the peer config sent in merge_peers(). 3120 */ 3121 break; 3122 case IMSG_SESSION_DEPENDON: 3123 if (idx != PFD_PIPE_MAIN) 3124 fatalx("IFINFO message not from parent"); 3125 if (imsg.hdr.len != IMSG_HEADER_SIZE + 3126 sizeof(struct session_dependon)) 3127 fatalx("DEPENDON imsg with wrong len"); 3128 sdon = imsg.data; 3129 depend_ok = sdon->depend_state; 3130 3131 RB_FOREACH(p, peer_head, &conf->peers) 3132 if (!strcmp(p->conf.if_depend, sdon->ifname)) { 3133 if (depend_ok && !p->depend_ok) { 3134 p->depend_ok = depend_ok; 3135 bgp_fsm(p, EVNT_START); 3136 } else if (!depend_ok && p->depend_ok) { 3137 p->depend_ok = depend_ok; 3138 session_stop(p, 3139 ERR_CEASE_OTHER_CHANGE); 3140 } 3141 } 3142 break; 3143 case IMSG_MRT_OPEN: 3144 case IMSG_MRT_REOPEN: 3145 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3146 sizeof(struct mrt)) { 3147 log_warnx("wrong imsg len"); 3148 break; 3149 } 3150 3151 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 3152 if ((xmrt.wbuf.fd = imsg.fd) == -1) 3153 log_warnx("expected to receive fd for mrt dump " 3154 "but didn't receive any"); 3155 3156 mrt = mrt_get(&mrthead, &xmrt); 3157 if (mrt == NULL) { 3158 /* new dump */ 3159 mrt = calloc(1, sizeof(struct mrt)); 3160 if (mrt == NULL) 3161 fatal("session_dispatch_imsg"); 3162 memcpy(mrt, &xmrt, sizeof(struct mrt)); 3163 TAILQ_INIT(&mrt->wbuf.bufs); 3164 LIST_INSERT_HEAD(&mrthead, mrt, entry); 3165 } else { 3166 /* old dump reopened */ 3167 close(mrt->wbuf.fd); 3168 mrt->wbuf.fd = xmrt.wbuf.fd; 3169 } 3170 break; 3171 case IMSG_MRT_CLOSE: 3172 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3173 sizeof(struct mrt)) { 3174 log_warnx("wrong imsg len"); 3175 break; 3176 } 3177 3178 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 3179 mrt = mrt_get(&mrthead, &xmrt); 3180 if (mrt != NULL) 3181 mrt_done(mrt); 3182 break; 3183 case IMSG_CTL_KROUTE: 3184 case IMSG_CTL_KROUTE_ADDR: 3185 case IMSG_CTL_SHOW_NEXTHOP: 3186 case IMSG_CTL_SHOW_INTERFACE: 3187 case IMSG_CTL_SHOW_FIB_TABLES: 3188 case IMSG_CTL_SHOW_RTR: 3189 case IMSG_CTL_SHOW_TIMER: 3190 if (idx != PFD_PIPE_MAIN) 3191 fatalx("ctl kroute request not from parent"); 3192 control_imsg_relay(&imsg); 3193 break; 3194 case IMSG_CTL_SHOW_RIB: 3195 case IMSG_CTL_SHOW_RIB_PREFIX: 3196 case IMSG_CTL_SHOW_RIB_COMMUNITIES: 3197 case IMSG_CTL_SHOW_RIB_ATTR: 3198 case IMSG_CTL_SHOW_RIB_MEM: 3199 case IMSG_CTL_SHOW_NETWORK: 3200 case IMSG_CTL_SHOW_NEIGHBOR: 3201 case IMSG_CTL_SHOW_SET: 3202 if (idx != PFD_PIPE_ROUTE_CTL) 3203 fatalx("ctl rib request not from RDE"); 3204 control_imsg_relay(&imsg); 3205 break; 3206 case IMSG_CTL_END: 3207 case IMSG_CTL_RESULT: 3208 control_imsg_relay(&imsg); 3209 break; 3210 case IMSG_UPDATE: 3211 if (idx != PFD_PIPE_ROUTE) 3212 fatalx("update request not from RDE"); 3213 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3214 MAX_PKTSIZE - MSGSIZE_HEADER || 3215 imsg.hdr.len < IMSG_HEADER_SIZE + 3216 MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 3217 log_warnx("RDE sent invalid update"); 3218 else 3219 session_update(imsg.hdr.peerid, imsg.data, 3220 imsg.hdr.len - IMSG_HEADER_SIZE); 3221 break; 3222 case IMSG_UPDATE_ERR: 3223 if (idx != PFD_PIPE_ROUTE) 3224 fatalx("update request not from RDE"); 3225 if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) { 3226 log_warnx("RDE sent invalid notification"); 3227 break; 3228 } 3229 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3230 log_warnx("no such peer: id=%u", 3231 imsg.hdr.peerid); 3232 break; 3233 } 3234 data = imsg.data; 3235 errcode = *data++; 3236 subcode = *data++; 3237 3238 if (imsg.hdr.len == IMSG_HEADER_SIZE + 2) 3239 data = NULL; 3240 3241 session_notification(p, errcode, subcode, 3242 data, imsg.hdr.len - IMSG_HEADER_SIZE - 2); 3243 switch (errcode) { 3244 case ERR_CEASE: 3245 switch (subcode) { 3246 case ERR_CEASE_MAX_PREFIX: 3247 case ERR_CEASE_MAX_SENT_PREFIX: 3248 t = p->conf.max_out_prefix_restart; 3249 if (subcode == ERR_CEASE_MAX_PREFIX) 3250 t = p->conf.max_prefix_restart; 3251 3252 bgp_fsm(p, EVNT_STOP); 3253 if (t) 3254 timer_set(&p->timers, 3255 Timer_IdleHold, 60 * t); 3256 break; 3257 default: 3258 bgp_fsm(p, EVNT_CON_FATAL); 3259 break; 3260 } 3261 break; 3262 default: 3263 bgp_fsm(p, EVNT_CON_FATAL); 3264 break; 3265 } 3266 break; 3267 case IMSG_REFRESH: 3268 if (idx != PFD_PIPE_ROUTE) 3269 fatalx("route refresh request not from RDE"); 3270 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(rr)) { 3271 log_warnx("RDE sent invalid refresh msg"); 3272 break; 3273 } 3274 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3275 log_warnx("no such peer: id=%u", 3276 imsg.hdr.peerid); 3277 break; 3278 } 3279 memcpy(&rr, imsg.data, sizeof(rr)); 3280 if (rr.aid >= AID_MAX) 3281 fatalx("IMSG_REFRESH: bad AID"); 3282 session_rrefresh(p, rr.aid, rr.subtype); 3283 break; 3284 case IMSG_SESSION_RESTARTED: 3285 if (idx != PFD_PIPE_ROUTE) 3286 fatalx("update request not from RDE"); 3287 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { 3288 log_warnx("RDE sent invalid restart msg"); 3289 break; 3290 } 3291 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3292 log_warnx("no such peer: id=%u", 3293 imsg.hdr.peerid); 3294 break; 3295 } 3296 memcpy(&aid, imsg.data, sizeof(aid)); 3297 if (aid >= AID_MAX) 3298 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 3299 if (p->capa.neg.grestart.flags[aid] & 3300 CAPA_GR_RESTARTING) { 3301 log_peer_warnx(&p->conf, 3302 "graceful restart of %s finished", 3303 aid2str(aid)); 3304 p->capa.neg.grestart.flags[aid] &= 3305 ~CAPA_GR_RESTARTING; 3306 timer_stop(&p->timers, Timer_RestartTimeout); 3307 3308 /* signal back to RDE to cleanup stale routes */ 3309 if (imsg_rde(IMSG_SESSION_RESTARTED, 3310 imsg.hdr.peerid, &aid, sizeof(aid)) == -1) 3311 fatal("imsg_compose: " 3312 "IMSG_SESSION_RESTARTED"); 3313 } 3314 break; 3315 case IMSG_SESSION_DOWN: 3316 if (idx != PFD_PIPE_ROUTE) 3317 fatalx("update request not from RDE"); 3318 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3319 log_warnx("no such peer: id=%u", 3320 imsg.hdr.peerid); 3321 break; 3322 } 3323 session_stop(p, ERR_CEASE_ADMIN_DOWN); 3324 break; 3325 default: 3326 break; 3327 } 3328 imsg_free(&imsg); 3329 } 3330 } 3331 3332 int 3333 la_cmp(struct listen_addr *a, struct listen_addr *b) 3334 { 3335 struct sockaddr_in *in_a, *in_b; 3336 struct sockaddr_in6 *in6_a, *in6_b; 3337 3338 if (a->sa.ss_family != b->sa.ss_family) 3339 return (1); 3340 3341 switch (a->sa.ss_family) { 3342 case AF_INET: 3343 in_a = (struct sockaddr_in *)&a->sa; 3344 in_b = (struct sockaddr_in *)&b->sa; 3345 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 3346 return (1); 3347 if (in_a->sin_port != in_b->sin_port) 3348 return (1); 3349 break; 3350 case AF_INET6: 3351 in6_a = (struct sockaddr_in6 *)&a->sa; 3352 in6_b = (struct sockaddr_in6 *)&b->sa; 3353 if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 3354 sizeof(struct in6_addr))) 3355 return (1); 3356 if (in6_a->sin6_port != in6_b->sin6_port) 3357 return (1); 3358 break; 3359 default: 3360 fatal("king bula sez: unknown address family"); 3361 /* NOTREACHED */ 3362 } 3363 3364 return (0); 3365 } 3366 3367 struct peer * 3368 getpeerbydesc(struct bgpd_config *c, const char *descr) 3369 { 3370 struct peer *p, *res = NULL; 3371 int match = 0; 3372 3373 RB_FOREACH(p, peer_head, &c->peers) 3374 if (!strcmp(p->conf.descr, descr)) { 3375 res = p; 3376 match++; 3377 } 3378 3379 if (match > 1) 3380 log_info("neighbor description \"%s\" not unique, request " 3381 "aborted", descr); 3382 3383 if (match == 1) 3384 return (res); 3385 else 3386 return (NULL); 3387 } 3388 3389 struct peer * 3390 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip) 3391 { 3392 struct bgpd_addr addr; 3393 struct peer *p, *newpeer, *loose = NULL; 3394 uint32_t id; 3395 3396 sa2addr(ip, &addr, NULL); 3397 3398 /* we might want a more effective way to find peers by IP */ 3399 RB_FOREACH(p, peer_head, &c->peers) 3400 if (!p->conf.template && 3401 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3402 return (p); 3403 3404 /* try template matching */ 3405 RB_FOREACH(p, peer_head, &c->peers) 3406 if (p->conf.template && 3407 p->conf.remote_addr.aid == addr.aid && 3408 session_match_mask(p, &addr)) 3409 if (loose == NULL || loose->conf.remote_masklen < 3410 p->conf.remote_masklen) 3411 loose = p; 3412 3413 if (loose != NULL) { 3414 /* clone */ 3415 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3416 fatal(NULL); 3417 memcpy(newpeer, loose, sizeof(struct peer)); 3418 for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) { 3419 if (getpeerbyid(c, id) == NULL) /* we found a free id */ 3420 break; 3421 } 3422 newpeer->template = loose; 3423 session_template_clone(newpeer, ip, id, 0); 3424 newpeer->state = newpeer->prev_state = STATE_NONE; 3425 newpeer->reconf_action = RECONF_KEEP; 3426 newpeer->rbuf = NULL; 3427 newpeer->rpending = 0; 3428 init_peer(newpeer); 3429 bgp_fsm(newpeer, EVNT_START); 3430 if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL) 3431 fatalx("%s: peer tree is corrupt", __func__); 3432 return (newpeer); 3433 } 3434 3435 return (NULL); 3436 } 3437 3438 struct peer * 3439 getpeerbyid(struct bgpd_config *c, uint32_t peerid) 3440 { 3441 static struct peer lookup; 3442 3443 lookup.conf.id = peerid; 3444 3445 return RB_FIND(peer_head, &c->peers, &lookup); 3446 } 3447 3448 int 3449 peer_matched(struct peer *p, struct ctl_neighbor *n) 3450 { 3451 char *s; 3452 3453 if (n && n->addr.aid) { 3454 if (memcmp(&p->conf.remote_addr, &n->addr, 3455 sizeof(p->conf.remote_addr))) 3456 return 0; 3457 } else if (n && n->descr[0]) { 3458 s = n->is_group ? p->conf.group : p->conf.descr; 3459 if (strcmp(s, n->descr)) 3460 return 0; 3461 } 3462 return 1; 3463 } 3464 3465 void 3466 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id, 3467 uint32_t as) 3468 { 3469 struct bgpd_addr remote_addr; 3470 3471 if (ip) 3472 sa2addr(ip, &remote_addr, NULL); 3473 else 3474 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3475 3476 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3477 3478 p->conf.id = id; 3479 3480 if (as) { 3481 p->conf.remote_as = as; 3482 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as); 3483 if (!p->conf.ebgp) 3484 /* force enforce_as off for iBGP sessions */ 3485 p->conf.enforce_as = ENFORCE_AS_OFF; 3486 } 3487 3488 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3489 switch (p->conf.remote_addr.aid) { 3490 case AID_INET: 3491 p->conf.remote_masklen = 32; 3492 break; 3493 case AID_INET6: 3494 p->conf.remote_masklen = 128; 3495 break; 3496 } 3497 p->conf.template = 0; 3498 } 3499 3500 int 3501 session_match_mask(struct peer *p, struct bgpd_addr *a) 3502 { 3503 struct bgpd_addr masked; 3504 3505 applymask(&masked, a, p->conf.remote_masklen); 3506 if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0) 3507 return (1); 3508 return (0); 3509 } 3510 3511 void 3512 session_down(struct peer *peer) 3513 { 3514 memset(&peer->capa.neg, 0, sizeof(peer->capa.neg)); 3515 peer->stats.last_updown = getmonotime(); 3516 /* 3517 * session_down is called in the exit code path so check 3518 * if the RDE is still around, if not there is no need to 3519 * send the message. 3520 */ 3521 if (ibuf_rde == NULL) 3522 return; 3523 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1) 3524 fatalx("imsg_compose error"); 3525 } 3526 3527 void 3528 session_up(struct peer *p) 3529 { 3530 struct session_up sup; 3531 3532 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3533 &p->conf, sizeof(p->conf)) == -1) 3534 fatalx("imsg_compose error"); 3535 3536 if (p->local.aid == AID_INET) { 3537 sup.local_v4_addr = p->local; 3538 sup.local_v6_addr = p->local_alt; 3539 } else { 3540 sup.local_v6_addr = p->local; 3541 sup.local_v4_addr = p->local_alt; 3542 } 3543 sup.remote_addr = p->remote; 3544 3545 sup.remote_bgpid = p->remote_bgpid; 3546 sup.short_as = p->short_as; 3547 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3548 p->stats.last_updown = getmonotime(); 3549 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1) 3550 fatalx("imsg_compose error"); 3551 } 3552 3553 int 3554 imsg_ctl_parent(int type, uint32_t peerid, pid_t pid, void *data, 3555 uint16_t datalen) 3556 { 3557 return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen)); 3558 } 3559 3560 int 3561 imsg_ctl_rde(int type, pid_t pid, void *data, uint16_t datalen) 3562 { 3563 if (ibuf_rde_ctl == NULL) 3564 return (0); 3565 3566 /* 3567 * Use control socket to talk to RDE to bypass the queue of the 3568 * regular imsg socket. 3569 */ 3570 return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen)); 3571 } 3572 3573 int 3574 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen) 3575 { 3576 if (ibuf_rde == NULL) 3577 return (0); 3578 3579 return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen)); 3580 } 3581 3582 void 3583 session_demote(struct peer *p, int level) 3584 { 3585 struct demote_msg msg; 3586 3587 strlcpy(msg.demote_group, p->conf.demote_group, 3588 sizeof(msg.demote_group)); 3589 msg.level = level; 3590 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3591 &msg, sizeof(msg)) == -1) 3592 fatalx("imsg_compose error"); 3593 3594 p->demoted += level; 3595 } 3596 3597 void 3598 session_stop(struct peer *peer, uint8_t subcode) 3599 { 3600 char data[REASON_LEN]; 3601 size_t datalen; 3602 size_t reason_len; 3603 char *communication; 3604 3605 datalen = 0; 3606 communication = peer->conf.reason; 3607 3608 if ((subcode == ERR_CEASE_ADMIN_DOWN || 3609 subcode == ERR_CEASE_ADMIN_RESET) 3610 && communication && *communication) { 3611 reason_len = strlen(communication); 3612 if (reason_len > REASON_LEN - 1) { 3613 log_peer_warnx(&peer->conf, 3614 "trying to send overly long shutdown reason"); 3615 } else { 3616 data[0] = reason_len; 3617 datalen = reason_len + sizeof(data[0]); 3618 memcpy(data + 1, communication, reason_len); 3619 } 3620 } 3621 switch (peer->state) { 3622 case STATE_OPENSENT: 3623 case STATE_OPENCONFIRM: 3624 case STATE_ESTABLISHED: 3625 session_notification(peer, ERR_CEASE, subcode, data, datalen); 3626 break; 3627 default: 3628 /* session not open, no need to send notification */ 3629 break; 3630 } 3631 bgp_fsm(peer, EVNT_STOP); 3632 } 3633 3634 void 3635 merge_peers(struct bgpd_config *c, struct bgpd_config *nc) 3636 { 3637 struct peer *p, *np, *next; 3638 3639 RB_FOREACH(p, peer_head, &c->peers) { 3640 /* templates are handled specially */ 3641 if (p->template != NULL) 3642 continue; 3643 np = getpeerbyid(nc, p->conf.id); 3644 if (np == NULL) { 3645 p->reconf_action = RECONF_DELETE; 3646 continue; 3647 } 3648 3649 /* peer no longer uses TCP MD5SIG so deconfigure */ 3650 if (p->conf.auth.method == AUTH_MD5SIG && 3651 np->conf.auth.method != AUTH_MD5SIG) 3652 tcp_md5_del_listener(c, p); 3653 else if (np->conf.auth.method == AUTH_MD5SIG) 3654 tcp_md5_add_listener(c, np); 3655 3656 memcpy(&p->conf, &np->conf, sizeof(p->conf)); 3657 RB_REMOVE(peer_head, &nc->peers, np); 3658 free(np); 3659 3660 p->reconf_action = RECONF_KEEP; 3661 3662 /* had demotion, is demoted, demote removed? */ 3663 if (p->demoted && !p->conf.demote_group[0]) 3664 session_demote(p, -1); 3665 3666 /* if session is not open then refresh pfkey data */ 3667 if (p->state < STATE_OPENSENT && !p->template) 3668 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 3669 p->conf.id, 0, -1, NULL, 0); 3670 3671 /* sync the RDE in case we keep the peer */ 3672 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3673 &p->conf, sizeof(struct peer_config)) == -1) 3674 fatalx("imsg_compose error"); 3675 3676 /* apply the config to all clones of a template */ 3677 if (p->conf.template) { 3678 struct peer *xp; 3679 RB_FOREACH(xp, peer_head, &c->peers) { 3680 if (xp->template != p) 3681 continue; 3682 session_template_clone(xp, NULL, xp->conf.id, 3683 xp->conf.remote_as); 3684 if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id, 3685 &xp->conf, sizeof(xp->conf)) == -1) 3686 fatalx("imsg_compose error"); 3687 } 3688 } 3689 } 3690 3691 if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1) 3692 fatalx("imsg_compose error"); 3693 3694 /* pfkeys of new peers already loaded by the parent process */ 3695 RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) { 3696 RB_REMOVE(peer_head, &nc->peers, np); 3697 if (RB_INSERT(peer_head, &c->peers, np) != NULL) 3698 fatalx("%s: peer tree is corrupt", __func__); 3699 if (np->conf.auth.method == AUTH_MD5SIG) 3700 tcp_md5_add_listener(c, np); 3701 } 3702 } 3703