1 /* $OpenBSD: session.c,v 1.424 2021/09/03 07:48:24 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 22 #include <sys/mman.h> 23 #include <sys/socket.h> 24 #include <sys/time.h> 25 #include <sys/resource.h> 26 #include <sys/un.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <ifaddrs.h> 37 #include <poll.h> 38 #include <pwd.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <syslog.h> 44 #include <unistd.h> 45 46 #include "bgpd.h" 47 #include "session.h" 48 #include "log.h" 49 50 #define PFD_PIPE_MAIN 0 51 #define PFD_PIPE_ROUTE 1 52 #define PFD_PIPE_ROUTE_CTL 2 53 #define PFD_SOCK_CTL 3 54 #define PFD_SOCK_RCTL 4 55 #define PFD_LISTENERS_START 5 56 57 void session_sighdlr(int); 58 int setup_listeners(u_int *); 59 void init_peer(struct peer *); 60 void start_timer_holdtime(struct peer *); 61 void start_timer_keepalive(struct peer *); 62 void session_close_connection(struct peer *); 63 void change_state(struct peer *, enum session_state, enum session_events); 64 int session_setup_socket(struct peer *); 65 void session_accept(int); 66 int session_connect(struct peer *); 67 void session_tcp_established(struct peer *); 68 void session_capa_ann_none(struct peer *); 69 int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); 70 int session_capa_add_mp(struct ibuf *, u_int8_t); 71 int session_capa_add_afi(struct peer *, struct ibuf *, u_int8_t, u_int8_t); 72 struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); 73 int session_sendmsg(struct bgp_msg *, struct peer *); 74 void session_open(struct peer *); 75 void session_keepalive(struct peer *); 76 void session_update(u_int32_t, void *, size_t); 77 void session_notification(struct peer *, u_int8_t, u_int8_t, void *, 78 ssize_t); 79 void session_rrefresh(struct peer *, u_int8_t, u_int8_t); 80 int session_graceful_restart(struct peer *); 81 int session_graceful_stop(struct peer *); 82 int session_dispatch_msg(struct pollfd *, struct peer *); 83 void session_process_msg(struct peer *); 84 int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); 85 int parse_open(struct peer *); 86 int parse_update(struct peer *); 87 int parse_rrefresh(struct peer *); 88 int parse_notification(struct peer *); 89 int parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *); 90 int capa_neg_calc(struct peer *); 91 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 92 void session_up(struct peer *); 93 void session_down(struct peer *); 94 int imsg_rde(int, u_int32_t, void *, u_int16_t); 95 void session_demote(struct peer *, int); 96 void merge_peers(struct bgpd_config *, struct bgpd_config *); 97 98 int la_cmp(struct listen_addr *, struct listen_addr *); 99 void session_template_clone(struct peer *, struct sockaddr *, 100 u_int32_t, u_int32_t); 101 int session_match_mask(struct peer *, struct bgpd_addr *); 102 103 static struct bgpd_config *conf, *nconf; 104 static struct imsgbuf *ibuf_rde; 105 static struct imsgbuf *ibuf_rde_ctl; 106 static struct imsgbuf *ibuf_main; 107 108 struct bgpd_sysdep sysdep; 109 volatile sig_atomic_t session_quit; 110 int pending_reconf; 111 int csock = -1, rcsock = -1; 112 u_int peer_cnt; 113 114 struct mrt_head mrthead; 115 time_t pauseaccept; 116 117 static inline int 118 peer_compare(const struct peer *a, const struct peer *b) 119 { 120 return a->conf.id - b->conf.id; 121 } 122 123 RB_GENERATE(peer_head, peer, entry, peer_compare); 124 125 void 126 session_sighdlr(int sig) 127 { 128 switch (sig) { 129 case SIGINT: 130 case SIGTERM: 131 session_quit = 1; 132 break; 133 } 134 } 135 136 int 137 setup_listeners(u_int *la_cnt) 138 { 139 int ttl = 255; 140 struct listen_addr *la; 141 u_int cnt = 0; 142 143 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 144 la->reconf = RECONF_NONE; 145 cnt++; 146 147 if (la->flags & LISTENER_LISTENING) 148 continue; 149 150 if (la->fd == -1) { 151 log_warn("cannot establish listener on %s: invalid fd", 152 log_sockaddr((struct sockaddr *)&la->sa, 153 la->sa_len)); 154 continue; 155 } 156 157 if (tcp_md5_prep_listener(la, &conf->peers) == -1) 158 fatal("tcp_md5_prep_listener"); 159 160 /* set ttl to 255 so that ttl-security works */ 161 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 162 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 163 log_warn("setup_listeners setsockopt TTL"); 164 continue; 165 } 166 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 167 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 168 log_warn("setup_listeners setsockopt hoplimit"); 169 continue; 170 } 171 172 if (listen(la->fd, MAX_BACKLOG)) { 173 close(la->fd); 174 fatal("listen"); 175 } 176 177 la->flags |= LISTENER_LISTENING; 178 179 log_info("listening on %s", 180 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len)); 181 } 182 183 *la_cnt = cnt; 184 185 return (0); 186 } 187 188 void 189 session_main(int debug, int verbose) 190 { 191 int timeout; 192 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 193 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 194 u_int listener_cnt, ctl_cnt, mrt_cnt; 195 u_int new_cnt; 196 struct passwd *pw; 197 struct peer *p, **peer_l = NULL, *next; 198 struct mrt *m, *xm, **mrt_l = NULL; 199 struct pollfd *pfd = NULL; 200 struct listen_addr *la; 201 void *newp; 202 time_t now; 203 short events; 204 205 log_init(debug, LOG_DAEMON); 206 log_setverbose(verbose); 207 208 log_procinit(log_procnames[PROC_SE]); 209 210 if ((pw = getpwnam(BGPD_USER)) == NULL) 211 fatal(NULL); 212 213 if (chroot(pw->pw_dir) == -1) 214 fatal("chroot"); 215 if (chdir("/") == -1) 216 fatal("chdir(\"/\")"); 217 218 setproctitle("session engine"); 219 220 if (setgroups(1, &pw->pw_gid) || 221 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 222 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 223 fatal("can't drop privileges"); 224 225 if (pledge("stdio inet recvfd", NULL) == -1) 226 fatal("pledge"); 227 228 signal(SIGTERM, session_sighdlr); 229 signal(SIGINT, session_sighdlr); 230 signal(SIGPIPE, SIG_IGN); 231 signal(SIGHUP, SIG_IGN); 232 signal(SIGALRM, SIG_IGN); 233 signal(SIGUSR1, SIG_IGN); 234 235 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 236 fatal(NULL); 237 imsg_init(ibuf_main, 3); 238 239 LIST_INIT(&mrthead); 240 listener_cnt = 0; 241 peer_cnt = 0; 242 ctl_cnt = 0; 243 244 conf = new_config(); 245 log_info("session engine ready"); 246 247 while (session_quit == 0) { 248 /* check for peers to be initialized or deleted */ 249 if (!pending_reconf) { 250 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 251 /* cloned peer that idled out? */ 252 if (p->template && (p->state == STATE_IDLE || 253 p->state == STATE_ACTIVE) && 254 getmonotime() - p->stats.last_updown >= 255 INTERVAL_HOLD_CLONED) 256 p->reconf_action = RECONF_DELETE; 257 258 /* new peer that needs init? */ 259 if (p->state == STATE_NONE) 260 init_peer(p); 261 262 /* reinit due? */ 263 if (p->reconf_action == RECONF_REINIT) { 264 session_stop(p, ERR_CEASE_ADMIN_RESET); 265 if (!p->conf.down) 266 timer_set(&p->timers, 267 Timer_IdleHold, 0); 268 } 269 270 /* deletion due? */ 271 if (p->reconf_action == RECONF_DELETE) { 272 if (p->demoted) 273 session_demote(p, -1); 274 p->conf.demote_group[0] = 0; 275 session_stop(p, ERR_CEASE_PEER_UNCONF); 276 timer_remove_all(&p->timers); 277 tcp_md5_del_listener(conf, p); 278 log_peer_warnx(&p->conf, "removed"); 279 RB_REMOVE(peer_head, &conf->peers, p); 280 free(p); 281 peer_cnt--; 282 continue; 283 } 284 p->reconf_action = RECONF_NONE; 285 } 286 } 287 288 if (peer_cnt > peer_l_elms) { 289 if ((newp = reallocarray(peer_l, peer_cnt, 290 sizeof(struct peer *))) == NULL) { 291 /* panic for now */ 292 log_warn("could not resize peer_l from %u -> %u" 293 " entries", peer_l_elms, peer_cnt); 294 fatalx("exiting"); 295 } 296 peer_l = newp; 297 peer_l_elms = peer_cnt; 298 } 299 300 mrt_cnt = 0; 301 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 302 xm = LIST_NEXT(m, entry); 303 if (m->state == MRT_STATE_REMOVE) { 304 mrt_clean(m); 305 LIST_REMOVE(m, entry); 306 free(m); 307 continue; 308 } 309 if (m->wbuf.queued) 310 mrt_cnt++; 311 } 312 313 if (mrt_cnt > mrt_l_elms) { 314 if ((newp = reallocarray(mrt_l, mrt_cnt, 315 sizeof(struct mrt *))) == NULL) { 316 /* panic for now */ 317 log_warn("could not resize mrt_l from %u -> %u" 318 " entries", mrt_l_elms, mrt_cnt); 319 fatalx("exiting"); 320 } 321 mrt_l = newp; 322 mrt_l_elms = mrt_cnt; 323 } 324 325 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 326 ctl_cnt + mrt_cnt; 327 if (new_cnt > pfd_elms) { 328 if ((newp = reallocarray(pfd, new_cnt, 329 sizeof(struct pollfd))) == NULL) { 330 /* panic for now */ 331 log_warn("could not resize pfd from %u -> %u" 332 " entries", pfd_elms, new_cnt); 333 fatalx("exiting"); 334 } 335 pfd = newp; 336 pfd_elms = new_cnt; 337 } 338 339 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 340 341 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 342 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 343 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 344 345 if (pauseaccept == 0) { 346 pfd[PFD_SOCK_CTL].fd = csock; 347 pfd[PFD_SOCK_CTL].events = POLLIN; 348 pfd[PFD_SOCK_RCTL].fd = rcsock; 349 pfd[PFD_SOCK_RCTL].events = POLLIN; 350 } else { 351 pfd[PFD_SOCK_CTL].fd = -1; 352 pfd[PFD_SOCK_RCTL].fd = -1; 353 } 354 355 i = PFD_LISTENERS_START; 356 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 357 if (pauseaccept == 0) { 358 pfd[i].fd = la->fd; 359 pfd[i].events = POLLIN; 360 } else 361 pfd[i].fd = -1; 362 i++; 363 } 364 idx_listeners = i; 365 timeout = 240; /* loop every 240s at least */ 366 367 now = getmonotime(); 368 RB_FOREACH(p, peer_head, &conf->peers) { 369 time_t nextaction; 370 struct timer *pt; 371 372 /* check timers */ 373 if ((pt = timer_nextisdue(&p->timers, now)) != NULL) { 374 switch (pt->type) { 375 case Timer_Hold: 376 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 377 break; 378 case Timer_SendHold: 379 bgp_fsm(p, EVNT_TIMER_SENDHOLD); 380 break; 381 case Timer_ConnectRetry: 382 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 383 break; 384 case Timer_Keepalive: 385 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 386 break; 387 case Timer_IdleHold: 388 bgp_fsm(p, EVNT_START); 389 break; 390 case Timer_IdleHoldReset: 391 p->IdleHoldTime = 392 INTERVAL_IDLE_HOLD_INITIAL; 393 p->errcnt = 0; 394 timer_stop(&p->timers, 395 Timer_IdleHoldReset); 396 break; 397 case Timer_CarpUndemote: 398 timer_stop(&p->timers, 399 Timer_CarpUndemote); 400 if (p->demoted && 401 p->state == STATE_ESTABLISHED) 402 session_demote(p, -1); 403 break; 404 case Timer_RestartTimeout: 405 timer_stop(&p->timers, 406 Timer_RestartTimeout); 407 session_graceful_stop(p); 408 break; 409 default: 410 fatalx("King Bula lost in time"); 411 } 412 } 413 if ((nextaction = timer_nextduein(&p->timers, 414 now)) != -1 && nextaction < timeout) 415 timeout = nextaction; 416 417 /* are we waiting for a write? */ 418 events = POLLIN; 419 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 420 events |= POLLOUT; 421 /* is there still work to do? */ 422 if (p->rpending && p->rbuf && p->rbuf->wpos) 423 timeout = 0; 424 425 /* poll events */ 426 if (p->fd != -1 && events != 0) { 427 pfd[i].fd = p->fd; 428 pfd[i].events = events; 429 peer_l[i - idx_listeners] = p; 430 i++; 431 } 432 } 433 434 idx_peers = i; 435 436 LIST_FOREACH(m, &mrthead, entry) 437 if (m->wbuf.queued) { 438 pfd[i].fd = m->wbuf.fd; 439 pfd[i].events = POLLOUT; 440 mrt_l[i - idx_peers] = m; 441 i++; 442 } 443 444 idx_mrts = i; 445 446 i += control_fill_pfds(pfd + i, pfd_elms -i); 447 448 if (i > pfd_elms) 449 fatalx("poll pfd overflow"); 450 451 if (pauseaccept && timeout > 1) 452 timeout = 1; 453 if (timeout < 0) 454 timeout = 0; 455 if (poll(pfd, i, timeout * 1000) == -1) 456 if (errno != EINTR) 457 fatal("poll error"); 458 459 /* 460 * If we previously saw fd exhaustion, we stop accept() 461 * for 1 second to throttle the accept() loop. 462 */ 463 if (pauseaccept && getmonotime() > pauseaccept + 1) 464 pauseaccept = 0; 465 466 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 467 log_warnx("SE: Lost connection to parent"); 468 session_quit = 1; 469 continue; 470 } else 471 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 472 &listener_cnt); 473 474 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 475 log_warnx("SE: Lost connection to RDE"); 476 msgbuf_clear(&ibuf_rde->w); 477 free(ibuf_rde); 478 ibuf_rde = NULL; 479 } else 480 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 481 &listener_cnt); 482 483 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 484 -1) { 485 log_warnx("SE: Lost connection to RDE control"); 486 msgbuf_clear(&ibuf_rde_ctl->w); 487 free(ibuf_rde_ctl); 488 ibuf_rde_ctl = NULL; 489 } else 490 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 491 &listener_cnt); 492 493 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 494 ctl_cnt += control_accept(csock, 0); 495 496 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 497 ctl_cnt += control_accept(rcsock, 1); 498 499 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 500 if (pfd[j].revents & POLLIN) 501 session_accept(pfd[j].fd); 502 503 for (; j < idx_peers; j++) 504 session_dispatch_msg(&pfd[j], 505 peer_l[j - idx_listeners]); 506 507 RB_FOREACH(p, peer_head, &conf->peers) 508 if (p->rbuf && p->rbuf->wpos) 509 session_process_msg(p); 510 511 for (; j < idx_mrts; j++) 512 if (pfd[j].revents & POLLOUT) 513 mrt_write(mrt_l[j - idx_peers]); 514 515 for (; j < i; j++) 516 ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers); 517 } 518 519 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 520 RB_REMOVE(peer_head, &conf->peers, p); 521 strlcpy(p->conf.reason, 522 "bgpd shutting down", 523 sizeof(p->conf.reason)); 524 session_stop(p, ERR_CEASE_ADMIN_DOWN); 525 timer_remove_all(&p->timers); 526 free(p); 527 } 528 529 while ((m = LIST_FIRST(&mrthead)) != NULL) { 530 mrt_clean(m); 531 LIST_REMOVE(m, entry); 532 free(m); 533 } 534 535 free_config(conf); 536 free(peer_l); 537 free(mrt_l); 538 free(pfd); 539 540 /* close pipes */ 541 if (ibuf_rde) { 542 msgbuf_write(&ibuf_rde->w); 543 msgbuf_clear(&ibuf_rde->w); 544 close(ibuf_rde->fd); 545 free(ibuf_rde); 546 } 547 if (ibuf_rde_ctl) { 548 msgbuf_clear(&ibuf_rde_ctl->w); 549 close(ibuf_rde_ctl->fd); 550 free(ibuf_rde_ctl); 551 } 552 msgbuf_write(&ibuf_main->w); 553 msgbuf_clear(&ibuf_main->w); 554 close(ibuf_main->fd); 555 free(ibuf_main); 556 557 control_shutdown(csock); 558 control_shutdown(rcsock); 559 log_info("session engine exiting"); 560 exit(0); 561 } 562 563 void 564 init_peer(struct peer *p) 565 { 566 TAILQ_INIT(&p->timers); 567 p->fd = p->wbuf.fd = -1; 568 569 if (p->conf.if_depend[0]) 570 imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1, 571 p->conf.if_depend, sizeof(p->conf.if_depend)); 572 else 573 p->depend_ok = 1; 574 575 peer_cnt++; 576 577 change_state(p, STATE_IDLE, EVNT_NONE); 578 if (p->conf.down) 579 timer_stop(&p->timers, Timer_IdleHold); /* no autostart */ 580 else 581 timer_set(&p->timers, Timer_IdleHold, 0); /* start ASAP */ 582 583 /* 584 * on startup, demote if requested. 585 * do not handle new peers. they must reach ESTABLISHED beforehands. 586 * peers added at runtime have reconf_action set to RECONF_REINIT. 587 */ 588 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 589 session_demote(p, +1); 590 } 591 592 void 593 bgp_fsm(struct peer *peer, enum session_events event) 594 { 595 switch (peer->state) { 596 case STATE_NONE: 597 /* nothing */ 598 break; 599 case STATE_IDLE: 600 switch (event) { 601 case EVNT_START: 602 timer_stop(&peer->timers, Timer_Hold); 603 timer_stop(&peer->timers, Timer_SendHold); 604 timer_stop(&peer->timers, Timer_Keepalive); 605 timer_stop(&peer->timers, Timer_IdleHold); 606 607 /* allocate read buffer */ 608 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 609 if (peer->rbuf == NULL) 610 fatal(NULL); 611 612 /* init write buffer */ 613 msgbuf_init(&peer->wbuf); 614 615 peer->stats.last_sent_errcode = 0; 616 peer->stats.last_sent_suberr = 0; 617 peer->stats.last_rcvd_errcode = 0; 618 peer->stats.last_rcvd_suberr = 0; 619 620 if (!peer->depend_ok) 621 timer_stop(&peer->timers, Timer_ConnectRetry); 622 else if (peer->passive || peer->conf.passive || 623 peer->conf.template) { 624 change_state(peer, STATE_ACTIVE, event); 625 timer_stop(&peer->timers, Timer_ConnectRetry); 626 } else { 627 change_state(peer, STATE_CONNECT, event); 628 timer_set(&peer->timers, Timer_ConnectRetry, 629 conf->connectretry); 630 session_connect(peer); 631 } 632 peer->passive = 0; 633 break; 634 default: 635 /* ignore */ 636 break; 637 } 638 break; 639 case STATE_CONNECT: 640 switch (event) { 641 case EVNT_START: 642 /* ignore */ 643 break; 644 case EVNT_CON_OPEN: 645 session_tcp_established(peer); 646 session_open(peer); 647 timer_stop(&peer->timers, Timer_ConnectRetry); 648 peer->holdtime = INTERVAL_HOLD_INITIAL; 649 start_timer_holdtime(peer); 650 change_state(peer, STATE_OPENSENT, event); 651 break; 652 case EVNT_CON_OPENFAIL: 653 timer_set(&peer->timers, Timer_ConnectRetry, 654 conf->connectretry); 655 session_close_connection(peer); 656 change_state(peer, STATE_ACTIVE, event); 657 break; 658 case EVNT_TIMER_CONNRETRY: 659 timer_set(&peer->timers, Timer_ConnectRetry, 660 conf->connectretry); 661 session_connect(peer); 662 break; 663 default: 664 change_state(peer, STATE_IDLE, event); 665 break; 666 } 667 break; 668 case STATE_ACTIVE: 669 switch (event) { 670 case EVNT_START: 671 /* ignore */ 672 break; 673 case EVNT_CON_OPEN: 674 session_tcp_established(peer); 675 session_open(peer); 676 timer_stop(&peer->timers, Timer_ConnectRetry); 677 peer->holdtime = INTERVAL_HOLD_INITIAL; 678 start_timer_holdtime(peer); 679 change_state(peer, STATE_OPENSENT, event); 680 break; 681 case EVNT_CON_OPENFAIL: 682 timer_set(&peer->timers, Timer_ConnectRetry, 683 conf->connectretry); 684 session_close_connection(peer); 685 change_state(peer, STATE_ACTIVE, event); 686 break; 687 case EVNT_TIMER_CONNRETRY: 688 timer_set(&peer->timers, Timer_ConnectRetry, 689 peer->holdtime); 690 change_state(peer, STATE_CONNECT, event); 691 session_connect(peer); 692 break; 693 default: 694 change_state(peer, STATE_IDLE, event); 695 break; 696 } 697 break; 698 case STATE_OPENSENT: 699 switch (event) { 700 case EVNT_START: 701 /* ignore */ 702 break; 703 case EVNT_STOP: 704 change_state(peer, STATE_IDLE, event); 705 break; 706 case EVNT_CON_CLOSED: 707 session_close_connection(peer); 708 timer_set(&peer->timers, Timer_ConnectRetry, 709 conf->connectretry); 710 change_state(peer, STATE_ACTIVE, event); 711 break; 712 case EVNT_CON_FATAL: 713 change_state(peer, STATE_IDLE, event); 714 break; 715 case EVNT_TIMER_HOLDTIME: 716 case EVNT_TIMER_SENDHOLD: 717 session_notification(peer, ERR_HOLDTIMEREXPIRED, 718 0, NULL, 0); 719 change_state(peer, STATE_IDLE, event); 720 break; 721 case EVNT_RCVD_OPEN: 722 /* parse_open calls change_state itself on failure */ 723 if (parse_open(peer)) 724 break; 725 session_keepalive(peer); 726 change_state(peer, STATE_OPENCONFIRM, event); 727 break; 728 case EVNT_RCVD_NOTIFICATION: 729 if (parse_notification(peer)) { 730 change_state(peer, STATE_IDLE, event); 731 /* don't punish, capa negotiation */ 732 timer_set(&peer->timers, Timer_IdleHold, 0); 733 peer->IdleHoldTime /= 2; 734 } else 735 change_state(peer, STATE_IDLE, event); 736 break; 737 default: 738 session_notification(peer, 739 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); 740 change_state(peer, STATE_IDLE, event); 741 break; 742 } 743 break; 744 case STATE_OPENCONFIRM: 745 switch (event) { 746 case EVNT_START: 747 /* ignore */ 748 break; 749 case EVNT_STOP: 750 change_state(peer, STATE_IDLE, event); 751 break; 752 case EVNT_CON_CLOSED: 753 case EVNT_CON_FATAL: 754 change_state(peer, STATE_IDLE, event); 755 break; 756 case EVNT_TIMER_HOLDTIME: 757 case EVNT_TIMER_SENDHOLD: 758 session_notification(peer, ERR_HOLDTIMEREXPIRED, 759 0, NULL, 0); 760 change_state(peer, STATE_IDLE, event); 761 break; 762 case EVNT_TIMER_KEEPALIVE: 763 session_keepalive(peer); 764 break; 765 case EVNT_RCVD_KEEPALIVE: 766 start_timer_holdtime(peer); 767 change_state(peer, STATE_ESTABLISHED, event); 768 break; 769 case EVNT_RCVD_NOTIFICATION: 770 parse_notification(peer); 771 change_state(peer, STATE_IDLE, event); 772 break; 773 default: 774 session_notification(peer, 775 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); 776 change_state(peer, STATE_IDLE, event); 777 break; 778 } 779 break; 780 case STATE_ESTABLISHED: 781 switch (event) { 782 case EVNT_START: 783 /* ignore */ 784 break; 785 case EVNT_STOP: 786 change_state(peer, STATE_IDLE, event); 787 break; 788 case EVNT_CON_CLOSED: 789 case EVNT_CON_FATAL: 790 change_state(peer, STATE_IDLE, event); 791 break; 792 case EVNT_TIMER_HOLDTIME: 793 case EVNT_TIMER_SENDHOLD: 794 session_notification(peer, ERR_HOLDTIMEREXPIRED, 795 0, NULL, 0); 796 change_state(peer, STATE_IDLE, event); 797 break; 798 case EVNT_TIMER_KEEPALIVE: 799 session_keepalive(peer); 800 break; 801 case EVNT_RCVD_KEEPALIVE: 802 start_timer_holdtime(peer); 803 break; 804 case EVNT_RCVD_UPDATE: 805 start_timer_holdtime(peer); 806 if (parse_update(peer)) 807 change_state(peer, STATE_IDLE, event); 808 else 809 start_timer_holdtime(peer); 810 break; 811 case EVNT_RCVD_NOTIFICATION: 812 parse_notification(peer); 813 change_state(peer, STATE_IDLE, event); 814 break; 815 default: 816 session_notification(peer, 817 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); 818 change_state(peer, STATE_IDLE, event); 819 break; 820 } 821 break; 822 } 823 } 824 825 void 826 start_timer_holdtime(struct peer *peer) 827 { 828 if (peer->holdtime > 0) 829 timer_set(&peer->timers, Timer_Hold, peer->holdtime); 830 else 831 timer_stop(&peer->timers, Timer_Hold); 832 } 833 834 void 835 start_timer_keepalive(struct peer *peer) 836 { 837 if (peer->holdtime > 0) 838 timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3); 839 else 840 timer_stop(&peer->timers, Timer_Keepalive); 841 } 842 843 void 844 session_close_connection(struct peer *peer) 845 { 846 if (peer->fd != -1) { 847 close(peer->fd); 848 pauseaccept = 0; 849 } 850 peer->fd = peer->wbuf.fd = -1; 851 } 852 853 void 854 change_state(struct peer *peer, enum session_state state, 855 enum session_events event) 856 { 857 struct mrt *mrt; 858 859 switch (state) { 860 case STATE_IDLE: 861 /* carp demotion first. new peers handled in init_peer */ 862 if (peer->state == STATE_ESTABLISHED && 863 peer->conf.demote_group[0] && !peer->demoted) 864 session_demote(peer, +1); 865 866 /* 867 * try to write out what's buffered (maybe a notification), 868 * don't bother if it fails 869 */ 870 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 871 msgbuf_write(&peer->wbuf); 872 873 /* 874 * we must start the timer for the next EVNT_START 875 * if we are coming here due to an error and the 876 * session was not established successfully before, the 877 * starttimerinterval needs to be exponentially increased 878 */ 879 if (peer->IdleHoldTime == 0) 880 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 881 peer->holdtime = INTERVAL_HOLD_INITIAL; 882 timer_stop(&peer->timers, Timer_ConnectRetry); 883 timer_stop(&peer->timers, Timer_Keepalive); 884 timer_stop(&peer->timers, Timer_Hold); 885 timer_stop(&peer->timers, Timer_SendHold); 886 timer_stop(&peer->timers, Timer_IdleHold); 887 timer_stop(&peer->timers, Timer_IdleHoldReset); 888 session_close_connection(peer); 889 msgbuf_clear(&peer->wbuf); 890 free(peer->rbuf); 891 peer->rbuf = NULL; 892 peer->rpending = 0; 893 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 894 if (!peer->template) 895 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 896 peer->conf.id, 0, -1, NULL, 0); 897 898 if (event != EVNT_STOP) { 899 timer_set(&peer->timers, Timer_IdleHold, 900 peer->IdleHoldTime); 901 if (event != EVNT_NONE && 902 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 903 peer->IdleHoldTime *= 2; 904 } 905 if (peer->state == STATE_ESTABLISHED) { 906 if (peer->capa.neg.grestart.restart == 2 && 907 (event == EVNT_CON_CLOSED || 908 event == EVNT_CON_FATAL)) { 909 /* don't punish graceful restart */ 910 timer_set(&peer->timers, Timer_IdleHold, 0); 911 peer->IdleHoldTime /= 2; 912 session_graceful_restart(peer); 913 } else 914 session_down(peer); 915 } 916 if (peer->state == STATE_NONE || 917 peer->state == STATE_ESTABLISHED) { 918 /* initialize capability negotiation structures */ 919 memcpy(&peer->capa.ann, &peer->conf.capabilities, 920 sizeof(peer->capa.ann)); 921 if (!peer->conf.announce_capa) 922 session_capa_ann_none(peer); 923 } 924 break; 925 case STATE_CONNECT: 926 if (peer->state == STATE_ESTABLISHED && 927 peer->capa.neg.grestart.restart == 2) { 928 /* do the graceful restart dance */ 929 session_graceful_restart(peer); 930 peer->holdtime = INTERVAL_HOLD_INITIAL; 931 timer_stop(&peer->timers, Timer_ConnectRetry); 932 timer_stop(&peer->timers, Timer_Keepalive); 933 timer_stop(&peer->timers, Timer_Hold); 934 timer_stop(&peer->timers, Timer_SendHold); 935 timer_stop(&peer->timers, Timer_IdleHold); 936 timer_stop(&peer->timers, Timer_IdleHoldReset); 937 session_close_connection(peer); 938 msgbuf_clear(&peer->wbuf); 939 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 940 } 941 break; 942 case STATE_ACTIVE: 943 if (!peer->template) 944 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 945 peer->conf.id, 0, -1, NULL, 0); 946 break; 947 case STATE_OPENSENT: 948 break; 949 case STATE_OPENCONFIRM: 950 break; 951 case STATE_ESTABLISHED: 952 timer_set(&peer->timers, Timer_IdleHoldReset, 953 peer->IdleHoldTime); 954 if (peer->demoted) 955 timer_set(&peer->timers, Timer_CarpUndemote, 956 INTERVAL_HOLD_DEMOTED); 957 session_up(peer); 958 break; 959 default: /* something seriously fucked */ 960 break; 961 } 962 963 log_statechange(peer, state, event); 964 LIST_FOREACH(mrt, &mrthead, entry) { 965 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 966 continue; 967 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 968 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 969 mrt->group_id == peer->conf.groupid)) 970 mrt_dump_state(mrt, peer->state, state, peer); 971 } 972 peer->prev_state = peer->state; 973 peer->state = state; 974 } 975 976 void 977 session_accept(int listenfd) 978 { 979 int connfd; 980 socklen_t len; 981 struct sockaddr_storage cliaddr; 982 struct peer *p = NULL; 983 984 len = sizeof(cliaddr); 985 if ((connfd = accept4(listenfd, 986 (struct sockaddr *)&cliaddr, &len, 987 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 988 if (errno == ENFILE || errno == EMFILE) 989 pauseaccept = getmonotime(); 990 else if (errno != EWOULDBLOCK && errno != EINTR && 991 errno != ECONNABORTED) 992 log_warn("accept"); 993 return; 994 } 995 996 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr); 997 998 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 999 if (timer_running(&p->timers, Timer_IdleHold, NULL)) { 1000 /* fast reconnect after clear */ 1001 p->passive = 1; 1002 bgp_fsm(p, EVNT_START); 1003 } 1004 } 1005 1006 if (p != NULL && 1007 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1008 if (p->fd != -1) { 1009 if (p->state == STATE_CONNECT) 1010 session_close_connection(p); 1011 else { 1012 close(connfd); 1013 return; 1014 } 1015 } 1016 1017 open: 1018 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1019 log_peer_warnx(&p->conf, 1020 "ipsec or md5sig configured but not available"); 1021 close(connfd); 1022 return; 1023 } 1024 1025 if (tcp_md5_check(connfd, p) == -1) { 1026 close(connfd); 1027 return; 1028 } 1029 p->fd = p->wbuf.fd = connfd; 1030 if (session_setup_socket(p)) { 1031 close(connfd); 1032 return; 1033 } 1034 bgp_fsm(p, EVNT_CON_OPEN); 1035 return; 1036 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1037 p->capa.neg.grestart.restart == 2) { 1038 /* first do the graceful restart dance */ 1039 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1040 /* then do part of the open dance */ 1041 goto open; 1042 } else { 1043 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len); 1044 close(connfd); 1045 } 1046 } 1047 1048 int 1049 session_connect(struct peer *peer) 1050 { 1051 struct sockaddr *sa; 1052 struct bgpd_addr *bind_addr = NULL; 1053 socklen_t sa_len; 1054 1055 /* 1056 * we do not need the overcomplicated collision detection RFC 1771 1057 * describes; we simply make sure there is only ever one concurrent 1058 * tcp connection per peer. 1059 */ 1060 if (peer->fd != -1) 1061 return (-1); 1062 1063 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1064 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1065 log_peer_warn(&peer->conf, "session_connect socket"); 1066 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1067 return (-1); 1068 } 1069 1070 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1071 log_peer_warnx(&peer->conf, 1072 "ipsec or md5sig configured but not available"); 1073 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1074 return (-1); 1075 } 1076 1077 tcp_md5_set(peer->fd, peer); 1078 peer->wbuf.fd = peer->fd; 1079 1080 /* if local-address is set we need to bind() */ 1081 switch (peer->conf.remote_addr.aid) { 1082 case AID_INET: 1083 bind_addr = &peer->conf.local_addr_v4; 1084 break; 1085 case AID_INET6: 1086 bind_addr = &peer->conf.local_addr_v6; 1087 break; 1088 } 1089 if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) { 1090 if (bind(peer->fd, sa, sa_len) == -1) { 1091 log_peer_warn(&peer->conf, "session_connect bind"); 1092 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1093 return (-1); 1094 } 1095 } 1096 1097 if (session_setup_socket(peer)) { 1098 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1099 return (-1); 1100 } 1101 1102 sa = addr2sa(&peer->conf.remote_addr, BGP_PORT, &sa_len); 1103 if (connect(peer->fd, sa, sa_len) == -1) { 1104 if (errno != EINPROGRESS) { 1105 if (errno != peer->lasterr) 1106 log_peer_warn(&peer->conf, "connect"); 1107 peer->lasterr = errno; 1108 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1109 return (-1); 1110 } 1111 } else 1112 bgp_fsm(peer, EVNT_CON_OPEN); 1113 1114 return (0); 1115 } 1116 1117 int 1118 session_setup_socket(struct peer *p) 1119 { 1120 int ttl = p->conf.distance; 1121 int pre = IPTOS_PREC_INTERNETCONTROL; 1122 int nodelay = 1; 1123 int bsize; 1124 1125 switch (p->conf.remote_addr.aid) { 1126 case AID_INET: 1127 /* set precedence, see RFC 1771 appendix 5 */ 1128 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1129 -1) { 1130 log_peer_warn(&p->conf, 1131 "session_setup_socket setsockopt TOS"); 1132 return (-1); 1133 } 1134 1135 if (p->conf.ebgp) { 1136 /* 1137 * set TTL to foreign router's distance 1138 * 1=direct n=multihop with ttlsec, we always use 255 1139 */ 1140 if (p->conf.ttlsec) { 1141 ttl = 256 - p->conf.distance; 1142 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1143 &ttl, sizeof(ttl)) == -1) { 1144 log_peer_warn(&p->conf, 1145 "session_setup_socket: " 1146 "setsockopt MINTTL"); 1147 return (-1); 1148 } 1149 ttl = 255; 1150 } 1151 1152 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1153 sizeof(ttl)) == -1) { 1154 log_peer_warn(&p->conf, 1155 "session_setup_socket setsockopt TTL"); 1156 return (-1); 1157 } 1158 } 1159 break; 1160 case AID_INET6: 1161 if (p->conf.ebgp) { 1162 /* 1163 * set hoplimit to foreign router's distance 1164 * 1=direct n=multihop with ttlsec, we always use 255 1165 */ 1166 if (p->conf.ttlsec) { 1167 ttl = 256 - p->conf.distance; 1168 if (setsockopt(p->fd, IPPROTO_IPV6, 1169 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1170 == -1) { 1171 log_peer_warn(&p->conf, 1172 "session_setup_socket: " 1173 "setsockopt MINHOPCOUNT"); 1174 return (-1); 1175 } 1176 ttl = 255; 1177 } 1178 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1179 &ttl, sizeof(ttl)) == -1) { 1180 log_peer_warn(&p->conf, 1181 "session_setup_socket setsockopt hoplimit"); 1182 return (-1); 1183 } 1184 } 1185 break; 1186 } 1187 1188 /* set TCP_NODELAY */ 1189 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1190 sizeof(nodelay)) == -1) { 1191 log_peer_warn(&p->conf, 1192 "session_setup_socket setsockopt TCP_NODELAY"); 1193 return (-1); 1194 } 1195 1196 /* only increase bufsize (and thus window) if md5 or ipsec is in use */ 1197 if (p->conf.auth.method != AUTH_NONE) { 1198 /* try to increase bufsize. no biggie if it fails */ 1199 bsize = 65535; 1200 while (bsize > 8192 && 1201 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, 1202 sizeof(bsize)) == -1 && errno != EINVAL) 1203 bsize /= 2; 1204 bsize = 65535; 1205 while (bsize > 8192 && 1206 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, 1207 sizeof(bsize)) == -1 && errno != EINVAL) 1208 bsize /= 2; 1209 } 1210 1211 return (0); 1212 } 1213 1214 /* compare two sockaddrs by converting them into bgpd_addr */ 1215 static int 1216 sa_cmp(struct sockaddr *a, struct sockaddr *b) 1217 { 1218 struct bgpd_addr ba, bb; 1219 1220 sa2addr(a, &ba, NULL); 1221 sa2addr(b, &bb, NULL); 1222 1223 return (memcmp(&ba, &bb, sizeof(ba)) == 0); 1224 } 1225 1226 static void 1227 get_alternate_addr(struct sockaddr *sa, struct bgpd_addr *alt) 1228 { 1229 struct ifaddrs *ifap, *ifa, *match; 1230 1231 if (getifaddrs(&ifap) == -1) 1232 fatal("getifaddrs"); 1233 1234 for (match = ifap; match != NULL; match = match->ifa_next) 1235 if (match->ifa_addr != NULL && 1236 sa_cmp(sa, match->ifa_addr) == 0) 1237 break; 1238 1239 if (match == NULL) { 1240 log_warnx("%s: local address not found", __func__); 1241 return; 1242 } 1243 1244 switch (sa->sa_family) { 1245 case AF_INET6: 1246 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1247 if (ifa->ifa_addr != NULL && 1248 ifa->ifa_addr->sa_family == AF_INET && 1249 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1250 sa2addr(ifa->ifa_addr, alt, NULL); 1251 break; 1252 } 1253 } 1254 break; 1255 case AF_INET: 1256 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1257 if (ifa->ifa_addr != NULL && 1258 ifa->ifa_addr->sa_family == AF_INET6 && 1259 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1260 struct sockaddr_in6 *s = 1261 (struct sockaddr_in6 *)ifa->ifa_addr; 1262 1263 /* only accept global scope addresses */ 1264 if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) || 1265 IN6_IS_ADDR_SITELOCAL(&s->sin6_addr)) 1266 continue; 1267 sa2addr(ifa->ifa_addr, alt, NULL); 1268 break; 1269 } 1270 } 1271 break; 1272 default: 1273 log_warnx("%s: unsupported address family %d", __func__, 1274 sa->sa_family); 1275 break; 1276 } 1277 1278 freeifaddrs(ifap); 1279 } 1280 1281 void 1282 session_tcp_established(struct peer *peer) 1283 { 1284 struct sockaddr_storage ss; 1285 socklen_t len; 1286 1287 len = sizeof(ss); 1288 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1289 log_warn("getsockname"); 1290 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); 1291 get_alternate_addr((struct sockaddr *)&ss, &peer->local_alt); 1292 len = sizeof(ss); 1293 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1294 log_warn("getpeername"); 1295 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); 1296 } 1297 1298 void 1299 session_capa_ann_none(struct peer *peer) 1300 { 1301 bzero(&peer->capa.ann, sizeof(peer->capa.ann)); 1302 } 1303 1304 int 1305 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len) 1306 { 1307 int errs = 0; 1308 1309 errs += ibuf_add(opb, &capa_code, sizeof(capa_code)); 1310 errs += ibuf_add(opb, &capa_len, sizeof(capa_len)); 1311 return (errs); 1312 } 1313 1314 int 1315 session_capa_add_mp(struct ibuf *buf, u_int8_t aid) 1316 { 1317 u_int8_t safi, pad = 0; 1318 u_int16_t afi; 1319 int errs = 0; 1320 1321 if (aid2afi(aid, &afi, &safi) == -1) 1322 fatalx("session_capa_add_mp: bad afi/safi pair"); 1323 afi = htons(afi); 1324 errs += ibuf_add(buf, &afi, sizeof(afi)); 1325 errs += ibuf_add(buf, &pad, sizeof(pad)); 1326 errs += ibuf_add(buf, &safi, sizeof(safi)); 1327 1328 return (errs); 1329 } 1330 1331 int 1332 session_capa_add_afi(struct peer *p, struct ibuf *b, u_int8_t aid, 1333 u_int8_t flags) 1334 { 1335 u_int errs = 0; 1336 u_int16_t afi; 1337 u_int8_t safi; 1338 1339 if (aid2afi(aid, &afi, &safi)) { 1340 log_warn("session_capa_add_afi: bad AID"); 1341 return (1); 1342 } 1343 1344 afi = htons(afi); 1345 errs += ibuf_add(b, &afi, sizeof(afi)); 1346 errs += ibuf_add(b, &safi, sizeof(safi)); 1347 errs += ibuf_add(b, &flags, sizeof(flags)); 1348 1349 return (errs); 1350 } 1351 1352 struct bgp_msg * 1353 session_newmsg(enum msg_type msgtype, u_int16_t len) 1354 { 1355 struct bgp_msg *msg; 1356 struct msg_header hdr; 1357 struct ibuf *buf; 1358 int errs = 0; 1359 1360 memset(&hdr.marker, 0xff, sizeof(hdr.marker)); 1361 hdr.len = htons(len); 1362 hdr.type = msgtype; 1363 1364 if ((buf = ibuf_open(len)) == NULL) 1365 return (NULL); 1366 1367 errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker)); 1368 errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len)); 1369 errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type)); 1370 1371 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1372 ibuf_free(buf); 1373 return (NULL); 1374 } 1375 1376 msg->buf = buf; 1377 msg->type = msgtype; 1378 msg->len = len; 1379 1380 return (msg); 1381 } 1382 1383 int 1384 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1385 { 1386 struct mrt *mrt; 1387 1388 LIST_FOREACH(mrt, &mrthead, entry) { 1389 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1390 mrt->type == MRT_UPDATE_OUT))) 1391 continue; 1392 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1393 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1394 mrt->group_id == p->conf.groupid)) 1395 mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p, 1396 msg->type); 1397 } 1398 1399 ibuf_close(&p->wbuf, msg->buf); 1400 if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) { 1401 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) 1402 log_peer_warn(&p->conf, "imsg_compose XOFF"); 1403 else 1404 p->throttled = 1; 1405 } 1406 1407 free(msg); 1408 return (0); 1409 } 1410 1411 void 1412 session_open(struct peer *p) 1413 { 1414 struct bgp_msg *buf; 1415 struct ibuf *opb; 1416 struct msg_open msg; 1417 u_int16_t len, optparamlen = 0; 1418 u_int8_t i, op_type; 1419 int errs = 0, extlen = 0; 1420 int mpcapa = 0; 1421 1422 1423 if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) { 1424 bgp_fsm(p, EVNT_CON_FATAL); 1425 return; 1426 } 1427 1428 /* multiprotocol extensions, RFC 4760 */ 1429 for (i = 0; i < AID_MAX; i++) 1430 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1431 errs += session_capa_add(opb, CAPA_MP, 4); 1432 errs += session_capa_add_mp(opb, i); 1433 mpcapa++; 1434 } 1435 1436 /* route refresh, RFC 2918 */ 1437 if (p->capa.ann.refresh) /* no data */ 1438 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1439 1440 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1441 if (p->capa.ann.grestart.restart) { 1442 int rst = 0; 1443 u_int16_t hdr = 0; 1444 1445 for (i = 0; i < AID_MAX; i++) { 1446 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) 1447 rst++; 1448 } 1449 1450 /* Only set the R-flag if no graceful restart is ongoing */ 1451 if (!rst) 1452 hdr |= CAPA_GR_R_FLAG; 1453 hdr = htons(hdr); 1454 1455 errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); 1456 errs += ibuf_add(opb, &hdr, sizeof(hdr)); 1457 } 1458 1459 /* 4-bytes AS numbers, RFC6793 */ 1460 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1461 u_int32_t nas; 1462 1463 nas = htonl(p->conf.local_as); 1464 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); 1465 errs += ibuf_add(opb, &nas, sizeof(nas)); 1466 } 1467 1468 /* advertisement of multiple paths, RFC7911 */ 1469 if (p->capa.ann.add_path[0]) { /* variable */ 1470 u_int8_t aplen; 1471 1472 if (mpcapa) 1473 aplen = 4 * mpcapa; 1474 else /* AID_INET */ 1475 aplen = 4; 1476 errs += session_capa_add(opb, CAPA_ADD_PATH, aplen); 1477 if (mpcapa) { 1478 for (i = AID_MIN; i < AID_MAX; i++) { 1479 if (p->capa.ann.mp[i]) { 1480 errs += session_capa_add_afi(p, opb, 1481 i, p->capa.ann.add_path[i]); 1482 } 1483 } 1484 } else { /* AID_INET */ 1485 errs += session_capa_add_afi(p, opb, AID_INET, 1486 p->capa.ann.add_path[AID_INET]); 1487 } 1488 } 1489 1490 /* enhanced route-refresh, RFC7313 */ 1491 if (p->capa.ann.enhanced_rr) /* no data */ 1492 errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0); 1493 1494 optparamlen = ibuf_size(opb); 1495 if (optparamlen == 0) { 1496 /* nothing */ 1497 } else if (optparamlen + 2 >= 255) { 1498 /* RFC9072: 2 byte lenght instead of 1 + 3 byte extra header */ 1499 optparamlen += sizeof(op_type) + 2 + 3; 1500 msg.optparamlen = 255; 1501 extlen = 1; 1502 } else { 1503 optparamlen += sizeof(op_type) + 1; 1504 msg.optparamlen = optparamlen; 1505 } 1506 1507 len = MSGSIZE_OPEN_MIN + optparamlen; 1508 if (errs || (buf = session_newmsg(OPEN, len)) == NULL) { 1509 ibuf_free(opb); 1510 bgp_fsm(p, EVNT_CON_FATAL); 1511 return; 1512 } 1513 1514 msg.version = 4; 1515 msg.myas = htons(p->conf.local_short_as); 1516 if (p->conf.holdtime) 1517 msg.holdtime = htons(p->conf.holdtime); 1518 else 1519 msg.holdtime = htons(conf->holdtime); 1520 msg.bgpid = conf->bgpid; /* is already in network byte order */ 1521 1522 errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version)); 1523 errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas)); 1524 errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); 1525 errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); 1526 errs += ibuf_add(buf->buf, &msg.optparamlen, 1); 1527 1528 if (extlen) { 1529 /* write RFC9072 extra header */ 1530 u_int16_t op_extlen = htons(optparamlen - 3); 1531 op_type = OPT_PARAM_EXT_LEN; 1532 errs += ibuf_add(buf->buf, &op_type, 1); 1533 errs += ibuf_add(buf->buf, &op_extlen, 2); 1534 } 1535 1536 if (optparamlen) { 1537 op_type = OPT_PARAM_CAPABILITIES; 1538 errs += ibuf_add(buf->buf, &op_type, sizeof(op_type)); 1539 1540 optparamlen = ibuf_size(opb); 1541 if (extlen) { 1542 /* RFC9072: 2-byte extended length */ 1543 u_int16_t op_extlen = htons(optparamlen); 1544 errs += ibuf_add(buf->buf, &op_extlen, 2); 1545 } else { 1546 u_int8_t op_len = optparamlen; 1547 errs += ibuf_add(buf->buf, &op_len, 1); 1548 } 1549 errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); 1550 } 1551 1552 ibuf_free(opb); 1553 1554 if (errs) { 1555 ibuf_free(buf->buf); 1556 free(buf); 1557 bgp_fsm(p, EVNT_CON_FATAL); 1558 return; 1559 } 1560 1561 if (session_sendmsg(buf, p) == -1) { 1562 bgp_fsm(p, EVNT_CON_FATAL); 1563 return; 1564 } 1565 1566 p->stats.msg_sent_open++; 1567 } 1568 1569 void 1570 session_keepalive(struct peer *p) 1571 { 1572 struct bgp_msg *buf; 1573 1574 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1575 session_sendmsg(buf, p) == -1) { 1576 bgp_fsm(p, EVNT_CON_FATAL); 1577 return; 1578 } 1579 1580 start_timer_keepalive(p); 1581 p->stats.msg_sent_keepalive++; 1582 } 1583 1584 void 1585 session_update(u_int32_t peerid, void *data, size_t datalen) 1586 { 1587 struct peer *p; 1588 struct bgp_msg *buf; 1589 1590 if ((p = getpeerbyid(conf, peerid)) == NULL) { 1591 log_warnx("no such peer: id=%u", peerid); 1592 return; 1593 } 1594 1595 if (p->state != STATE_ESTABLISHED) 1596 return; 1597 1598 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) { 1599 bgp_fsm(p, EVNT_CON_FATAL); 1600 return; 1601 } 1602 1603 if (ibuf_add(buf->buf, data, datalen)) { 1604 ibuf_free(buf->buf); 1605 free(buf); 1606 bgp_fsm(p, EVNT_CON_FATAL); 1607 return; 1608 } 1609 1610 if (session_sendmsg(buf, p) == -1) { 1611 bgp_fsm(p, EVNT_CON_FATAL); 1612 return; 1613 } 1614 1615 start_timer_keepalive(p); 1616 p->stats.msg_sent_update++; 1617 } 1618 1619 void 1620 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode, 1621 void *data, ssize_t datalen) 1622 { 1623 struct bgp_msg *buf; 1624 int errs = 0; 1625 1626 if (p->stats.last_sent_errcode) /* some notification already sent */ 1627 return; 1628 1629 log_notification(p, errcode, subcode, data, datalen, "sending"); 1630 1631 /* cap to maximum size */ 1632 if (datalen > MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) { 1633 log_peer_warnx(&p->conf, 1634 "oversized notification, data trunkated"); 1635 datalen = MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN; 1636 } 1637 1638 if ((buf = session_newmsg(NOTIFICATION, 1639 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1640 bgp_fsm(p, EVNT_CON_FATAL); 1641 return; 1642 } 1643 1644 errs += ibuf_add(buf->buf, &errcode, sizeof(errcode)); 1645 errs += ibuf_add(buf->buf, &subcode, sizeof(subcode)); 1646 1647 if (datalen > 0) 1648 errs += ibuf_add(buf->buf, data, datalen); 1649 1650 if (errs) { 1651 ibuf_free(buf->buf); 1652 free(buf); 1653 bgp_fsm(p, EVNT_CON_FATAL); 1654 return; 1655 } 1656 1657 if (session_sendmsg(buf, p) == -1) { 1658 bgp_fsm(p, EVNT_CON_FATAL); 1659 return; 1660 } 1661 1662 p->stats.msg_sent_notification++; 1663 p->stats.last_sent_errcode = errcode; 1664 p->stats.last_sent_suberr = subcode; 1665 } 1666 1667 int 1668 session_neighbor_rrefresh(struct peer *p) 1669 { 1670 u_int8_t i; 1671 1672 if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr)) 1673 return (-1); 1674 1675 for (i = 0; i < AID_MAX; i++) { 1676 if (p->capa.neg.mp[i] != 0) 1677 session_rrefresh(p, i, ROUTE_REFRESH_REQUEST); 1678 } 1679 1680 return (0); 1681 } 1682 1683 void 1684 session_rrefresh(struct peer *p, u_int8_t aid, u_int8_t subtype) 1685 { 1686 struct bgp_msg *buf; 1687 int errs = 0; 1688 u_int16_t afi; 1689 u_int8_t safi; 1690 1691 switch (subtype) { 1692 case ROUTE_REFRESH_REQUEST: 1693 p->stats.refresh_sent_req++; 1694 break; 1695 case ROUTE_REFRESH_BEGIN_RR: 1696 case ROUTE_REFRESH_END_RR: 1697 /* requires enhanced route refresh */ 1698 if (!p->capa.neg.enhanced_rr) 1699 return; 1700 if (subtype == ROUTE_REFRESH_BEGIN_RR) 1701 p->stats.refresh_sent_borr++; 1702 else 1703 p->stats.refresh_sent_eorr++; 1704 break; 1705 default: 1706 fatalx("session_rrefresh: bad subtype %d", subtype); 1707 } 1708 1709 if (aid2afi(aid, &afi, &safi) == -1) 1710 fatalx("session_rrefresh: bad afi/safi pair"); 1711 1712 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1713 bgp_fsm(p, EVNT_CON_FATAL); 1714 return; 1715 } 1716 1717 afi = htons(afi); 1718 errs += ibuf_add(buf->buf, &afi, sizeof(afi)); 1719 errs += ibuf_add(buf->buf, &subtype, sizeof(subtype)); 1720 errs += ibuf_add(buf->buf, &safi, sizeof(safi)); 1721 1722 if (errs) { 1723 ibuf_free(buf->buf); 1724 free(buf); 1725 bgp_fsm(p, EVNT_CON_FATAL); 1726 return; 1727 } 1728 1729 if (session_sendmsg(buf, p) == -1) { 1730 bgp_fsm(p, EVNT_CON_FATAL); 1731 return; 1732 } 1733 1734 p->stats.msg_sent_rrefresh++; 1735 } 1736 1737 int 1738 session_graceful_restart(struct peer *p) 1739 { 1740 u_int8_t i; 1741 1742 timer_set(&p->timers, Timer_RestartTimeout, 1743 p->capa.neg.grestart.timeout); 1744 1745 for (i = 0; i < AID_MAX; i++) { 1746 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1747 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, 1748 &i, sizeof(i)) == -1) 1749 return (-1); 1750 log_peer_warnx(&p->conf, 1751 "graceful restart of %s, keeping routes", 1752 aid2str(i)); 1753 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1754 } else if (p->capa.neg.mp[i]) { 1755 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1756 &i, sizeof(i)) == -1) 1757 return (-1); 1758 log_peer_warnx(&p->conf, 1759 "graceful restart of %s, flushing routes", 1760 aid2str(i)); 1761 } 1762 } 1763 return (0); 1764 } 1765 1766 int 1767 session_graceful_stop(struct peer *p) 1768 { 1769 u_int8_t i; 1770 1771 for (i = 0; i < AID_MAX; i++) { 1772 /* 1773 * Only flush if the peer is restarting and the timeout fired. 1774 * In all other cases the session was already flushed when the 1775 * session went down or when the new open message was parsed. 1776 */ 1777 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1778 log_peer_warnx(&p->conf, "graceful restart of %s, " 1779 "time-out, flushing", aid2str(i)); 1780 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1781 &i, sizeof(i)) == -1) 1782 return (-1); 1783 } 1784 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1785 } 1786 return (0); 1787 } 1788 1789 int 1790 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1791 { 1792 ssize_t n; 1793 socklen_t len; 1794 int error; 1795 1796 if (p->state == STATE_CONNECT) { 1797 if (pfd->revents & POLLOUT) { 1798 if (pfd->revents & POLLIN) { 1799 /* error occurred */ 1800 len = sizeof(error); 1801 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1802 &error, &len) == -1 || error) { 1803 if (error) 1804 errno = error; 1805 if (errno != p->lasterr) { 1806 log_peer_warn(&p->conf, 1807 "socket error"); 1808 p->lasterr = errno; 1809 } 1810 bgp_fsm(p, EVNT_CON_OPENFAIL); 1811 return (1); 1812 } 1813 } 1814 bgp_fsm(p, EVNT_CON_OPEN); 1815 return (1); 1816 } 1817 if (pfd->revents & POLLHUP) { 1818 bgp_fsm(p, EVNT_CON_OPENFAIL); 1819 return (1); 1820 } 1821 if (pfd->revents & (POLLERR|POLLNVAL)) { 1822 bgp_fsm(p, EVNT_CON_FATAL); 1823 return (1); 1824 } 1825 return (0); 1826 } 1827 1828 if (pfd->revents & POLLHUP) { 1829 bgp_fsm(p, EVNT_CON_CLOSED); 1830 return (1); 1831 } 1832 if (pfd->revents & (POLLERR|POLLNVAL)) { 1833 bgp_fsm(p, EVNT_CON_FATAL); 1834 return (1); 1835 } 1836 1837 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1838 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1839 if (error == 0) 1840 log_peer_warnx(&p->conf, "Connection closed"); 1841 else if (error == -1) 1842 log_peer_warn(&p->conf, "write error"); 1843 bgp_fsm(p, EVNT_CON_FATAL); 1844 return (1); 1845 } 1846 p->stats.last_write = getmonotime(); 1847 if (p->holdtime > 0) 1848 timer_set(&p->timers, Timer_SendHold, 1849 p->holdtime < INTERVAL_HOLD ? INTERVAL_HOLD : 1850 p->holdtime); 1851 if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) { 1852 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) 1853 log_peer_warn(&p->conf, "imsg_compose XON"); 1854 else 1855 p->throttled = 0; 1856 } 1857 if (!(pfd->revents & POLLIN)) 1858 return (1); 1859 } 1860 1861 if (p->rbuf && pfd->revents & POLLIN) { 1862 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1863 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1864 if (errno != EINTR && errno != EAGAIN) { 1865 log_peer_warn(&p->conf, "read error"); 1866 bgp_fsm(p, EVNT_CON_FATAL); 1867 } 1868 return (1); 1869 } 1870 if (n == 0) { /* connection closed */ 1871 bgp_fsm(p, EVNT_CON_CLOSED); 1872 return (1); 1873 } 1874 1875 p->rbuf->wpos += n; 1876 p->stats.last_read = getmonotime(); 1877 return (1); 1878 } 1879 return (0); 1880 } 1881 1882 void 1883 session_process_msg(struct peer *p) 1884 { 1885 struct mrt *mrt; 1886 ssize_t rpos, av, left; 1887 int processed = 0; 1888 u_int16_t msglen; 1889 u_int8_t msgtype; 1890 1891 rpos = 0; 1892 av = p->rbuf->wpos; 1893 p->rpending = 0; 1894 1895 /* 1896 * session might drop to IDLE -> buffers deallocated 1897 * we MUST check rbuf != NULL before use 1898 */ 1899 for (;;) { 1900 if (p->rbuf == NULL) 1901 return; 1902 if (rpos + MSGSIZE_HEADER > av) 1903 break; 1904 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 1905 &msgtype) == -1) 1906 return; 1907 if (rpos + msglen > av) 1908 break; 1909 p->rbuf->rptr = p->rbuf->buf + rpos; 1910 1911 /* dump to MRT as soon as we have a full packet */ 1912 LIST_FOREACH(mrt, &mrthead, entry) { 1913 if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE && 1914 mrt->type == MRT_UPDATE_IN))) 1915 continue; 1916 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1917 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1918 mrt->group_id == p->conf.groupid)) 1919 mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p, 1920 msgtype); 1921 } 1922 1923 switch (msgtype) { 1924 case OPEN: 1925 bgp_fsm(p, EVNT_RCVD_OPEN); 1926 p->stats.msg_rcvd_open++; 1927 break; 1928 case UPDATE: 1929 bgp_fsm(p, EVNT_RCVD_UPDATE); 1930 p->stats.msg_rcvd_update++; 1931 break; 1932 case NOTIFICATION: 1933 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 1934 p->stats.msg_rcvd_notification++; 1935 break; 1936 case KEEPALIVE: 1937 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 1938 p->stats.msg_rcvd_keepalive++; 1939 break; 1940 case RREFRESH: 1941 parse_rrefresh(p); 1942 p->stats.msg_rcvd_rrefresh++; 1943 break; 1944 default: /* cannot happen */ 1945 session_notification(p, ERR_HEADER, ERR_HDR_TYPE, 1946 &msgtype, 1); 1947 log_warnx("received message with unknown type %u", 1948 msgtype); 1949 bgp_fsm(p, EVNT_CON_FATAL); 1950 } 1951 rpos += msglen; 1952 if (++processed > MSG_PROCESS_LIMIT) { 1953 p->rpending = 1; 1954 break; 1955 } 1956 } 1957 1958 if (rpos < av) { 1959 left = av - rpos; 1960 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 1961 p->rbuf->wpos = left; 1962 } else 1963 p->rbuf->wpos = 0; 1964 } 1965 1966 int 1967 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type) 1968 { 1969 u_char *p; 1970 u_int16_t olen; 1971 static const u_int8_t marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff, 1972 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 1973 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 1974 1975 /* caller MUST make sure we are getting 19 bytes! */ 1976 p = data; 1977 if (memcmp(p, marker, sizeof(marker))) { 1978 log_peer_warnx(&peer->conf, "sync error"); 1979 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0); 1980 bgp_fsm(peer, EVNT_CON_FATAL); 1981 return (-1); 1982 } 1983 p += MSGSIZE_HEADER_MARKER; 1984 1985 memcpy(&olen, p, 2); 1986 *len = ntohs(olen); 1987 p += 2; 1988 memcpy(type, p, 1); 1989 1990 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 1991 log_peer_warnx(&peer->conf, 1992 "received message: illegal length: %u byte", *len); 1993 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1994 &olen, sizeof(olen)); 1995 bgp_fsm(peer, EVNT_CON_FATAL); 1996 return (-1); 1997 } 1998 1999 switch (*type) { 2000 case OPEN: 2001 if (*len < MSGSIZE_OPEN_MIN) { 2002 log_peer_warnx(&peer->conf, 2003 "received OPEN: illegal len: %u byte", *len); 2004 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2005 &olen, sizeof(olen)); 2006 bgp_fsm(peer, EVNT_CON_FATAL); 2007 return (-1); 2008 } 2009 break; 2010 case NOTIFICATION: 2011 if (*len < MSGSIZE_NOTIFICATION_MIN) { 2012 log_peer_warnx(&peer->conf, 2013 "received NOTIFICATION: illegal len: %u byte", 2014 *len); 2015 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2016 &olen, sizeof(olen)); 2017 bgp_fsm(peer, EVNT_CON_FATAL); 2018 return (-1); 2019 } 2020 break; 2021 case UPDATE: 2022 if (*len < MSGSIZE_UPDATE_MIN) { 2023 log_peer_warnx(&peer->conf, 2024 "received UPDATE: illegal len: %u byte", *len); 2025 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2026 &olen, sizeof(olen)); 2027 bgp_fsm(peer, EVNT_CON_FATAL); 2028 return (-1); 2029 } 2030 break; 2031 case KEEPALIVE: 2032 if (*len != MSGSIZE_KEEPALIVE) { 2033 log_peer_warnx(&peer->conf, 2034 "received KEEPALIVE: illegal len: %u byte", *len); 2035 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2036 &olen, sizeof(olen)); 2037 bgp_fsm(peer, EVNT_CON_FATAL); 2038 return (-1); 2039 } 2040 break; 2041 case RREFRESH: 2042 if (*len < MSGSIZE_RREFRESH_MIN) { 2043 log_peer_warnx(&peer->conf, 2044 "received RREFRESH: illegal len: %u byte", *len); 2045 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 2046 &olen, sizeof(olen)); 2047 bgp_fsm(peer, EVNT_CON_FATAL); 2048 return (-1); 2049 } 2050 break; 2051 default: 2052 log_peer_warnx(&peer->conf, 2053 "received msg with unknown type %u", *type); 2054 session_notification(peer, ERR_HEADER, ERR_HDR_TYPE, 2055 type, 1); 2056 bgp_fsm(peer, EVNT_CON_FATAL); 2057 return (-1); 2058 } 2059 return (0); 2060 } 2061 2062 int 2063 parse_open(struct peer *peer) 2064 { 2065 u_char *p, *op_val; 2066 u_int8_t version, rversion; 2067 u_int16_t short_as, msglen; 2068 u_int16_t holdtime, oholdtime, myholdtime; 2069 u_int32_t as, bgpid; 2070 u_int16_t optparamlen, extlen, plen, op_len; 2071 u_int8_t op_type; 2072 2073 p = peer->rbuf->rptr; 2074 p += MSGSIZE_HEADER_MARKER; 2075 memcpy(&msglen, p, sizeof(msglen)); 2076 msglen = ntohs(msglen); 2077 2078 p = peer->rbuf->rptr; 2079 p += MSGSIZE_HEADER; /* header is already checked */ 2080 2081 memcpy(&version, p, sizeof(version)); 2082 p += sizeof(version); 2083 2084 if (version != BGP_VERSION) { 2085 log_peer_warnx(&peer->conf, 2086 "peer wants unrecognized version %u", version); 2087 if (version > BGP_VERSION) 2088 rversion = version - BGP_VERSION; 2089 else 2090 rversion = BGP_VERSION; 2091 session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION, 2092 &rversion, sizeof(rversion)); 2093 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2094 return (-1); 2095 } 2096 2097 memcpy(&short_as, p, sizeof(short_as)); 2098 p += sizeof(short_as); 2099 as = peer->short_as = ntohs(short_as); 2100 if (as == 0) { 2101 log_peer_warnx(&peer->conf, 2102 "peer requests unacceptable AS %u", as); 2103 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, 2104 NULL, 0); 2105 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2106 return (-1); 2107 } 2108 2109 memcpy(&oholdtime, p, sizeof(oholdtime)); 2110 p += sizeof(oholdtime); 2111 2112 holdtime = ntohs(oholdtime); 2113 if (holdtime && holdtime < peer->conf.min_holdtime) { 2114 log_peer_warnx(&peer->conf, 2115 "peer requests unacceptable holdtime %u", holdtime); 2116 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, 2117 NULL, 0); 2118 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2119 return (-1); 2120 } 2121 2122 myholdtime = peer->conf.holdtime; 2123 if (!myholdtime) 2124 myholdtime = conf->holdtime; 2125 if (holdtime < myholdtime) 2126 peer->holdtime = holdtime; 2127 else 2128 peer->holdtime = myholdtime; 2129 2130 memcpy(&bgpid, p, sizeof(bgpid)); 2131 p += sizeof(bgpid); 2132 2133 /* check bgpid for validity - just disallow 0 */ 2134 if (ntohl(bgpid) == 0) { 2135 log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable", 2136 ntohl(bgpid)); 2137 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2138 NULL, 0); 2139 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2140 return (-1); 2141 } 2142 peer->remote_bgpid = bgpid; 2143 2144 extlen = 0; 2145 optparamlen = *p++; 2146 2147 if (optparamlen == 0) { 2148 if (msglen != MSGSIZE_OPEN_MIN) { 2149 bad_len: 2150 log_peer_warnx(&peer->conf, 2151 "corrupt OPEN message received: length mismatch"); 2152 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2153 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2154 return (-1); 2155 } 2156 } else { 2157 if (msglen < MSGSIZE_OPEN_MIN + 1) 2158 goto bad_len; 2159 2160 op_type = *p; 2161 if (op_type == OPT_PARAM_EXT_LEN) { 2162 p++; 2163 memcpy(&optparamlen, p, sizeof(optparamlen)); 2164 optparamlen = ntohs(optparamlen); 2165 p += sizeof(optparamlen); 2166 extlen = 1; 2167 } 2168 2169 /* RFC9020 encoding has 3 extra bytes */ 2170 if (optparamlen + 3 * extlen != msglen - MSGSIZE_OPEN_MIN) 2171 goto bad_len; 2172 } 2173 2174 plen = optparamlen; 2175 while (plen > 0) { 2176 if (plen < 2 + extlen) 2177 goto bad_len; 2178 2179 memcpy(&op_type, p, sizeof(op_type)); 2180 p += sizeof(op_type); 2181 plen -= sizeof(op_type); 2182 if (!extlen) { 2183 op_len = *p++; 2184 plen--; 2185 } else { 2186 memcpy(&op_len, p, sizeof(op_len)); 2187 op_len = ntohs(op_len); 2188 p += sizeof(op_len); 2189 plen -= sizeof(op_len); 2190 } 2191 if (op_len > 0) { 2192 if (plen < op_len) 2193 goto bad_len; 2194 op_val = p; 2195 p += op_len; 2196 plen -= op_len; 2197 } else 2198 op_val = NULL; 2199 2200 switch (op_type) { 2201 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2202 if (parse_capabilities(peer, op_val, op_len, 2203 &as) == -1) { 2204 session_notification(peer, ERR_OPEN, 0, 2205 NULL, 0); 2206 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2207 return (-1); 2208 } 2209 break; 2210 case OPT_PARAM_AUTH: /* deprecated */ 2211 default: 2212 /* 2213 * unsupported type 2214 * the RFCs tell us to leave the data section empty 2215 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT. 2216 * How the peer should know _which_ optional parameter 2217 * we don't support is beyond me. 2218 */ 2219 log_peer_warnx(&peer->conf, 2220 "received OPEN message with unsupported optional " 2221 "parameter: type %u", op_type); 2222 session_notification(peer, ERR_OPEN, ERR_OPEN_OPT, 2223 NULL, 0); 2224 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2225 /* no punish */ 2226 timer_set(&peer->timers, Timer_IdleHold, 0); 2227 peer->IdleHoldTime /= 2; 2228 return (-1); 2229 } 2230 } 2231 2232 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2233 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2234 peer->conf.remote_as = as; 2235 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); 2236 if (!peer->conf.ebgp) 2237 /* force enforce_as off for iBGP sessions */ 2238 peer->conf.enforce_as = ENFORCE_AS_OFF; 2239 } 2240 2241 if (peer->conf.remote_as != as) { 2242 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2243 log_as(as)); 2244 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0); 2245 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2246 return (-1); 2247 } 2248 2249 /* on iBGP sessions check for bgpid collision */ 2250 if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) { 2251 log_peer_warnx(&peer->conf, "peer BGPID %u conflicts with ours", 2252 ntohl(bgpid)); 2253 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2254 NULL, 0); 2255 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2256 return (-1); 2257 } 2258 2259 if (capa_neg_calc(peer) == -1) { 2260 log_peer_warnx(&peer->conf, 2261 "capability negotiation calculation failed"); 2262 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2263 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2264 return (-1); 2265 } 2266 2267 return (0); 2268 } 2269 2270 int 2271 parse_update(struct peer *peer) 2272 { 2273 u_char *p; 2274 u_int16_t datalen; 2275 2276 /* 2277 * we pass the message verbatim to the rde. 2278 * in case of errors the whole session is reset with a 2279 * notification anyway, we only need to know the peer 2280 */ 2281 p = peer->rbuf->rptr; 2282 p += MSGSIZE_HEADER_MARKER; 2283 memcpy(&datalen, p, sizeof(datalen)); 2284 datalen = ntohs(datalen); 2285 2286 p = peer->rbuf->rptr; 2287 p += MSGSIZE_HEADER; /* header is already checked */ 2288 datalen -= MSGSIZE_HEADER; 2289 2290 if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1) 2291 return (-1); 2292 2293 return (0); 2294 } 2295 2296 int 2297 parse_rrefresh(struct peer *peer) 2298 { 2299 struct route_refresh rr; 2300 u_int16_t afi, datalen; 2301 u_int8_t aid, safi, subtype; 2302 u_char *p; 2303 2304 p = peer->rbuf->rptr; 2305 p += MSGSIZE_HEADER_MARKER; 2306 memcpy(&datalen, p, sizeof(datalen)); 2307 datalen = ntohs(datalen); 2308 2309 p = peer->rbuf->rptr; 2310 p += MSGSIZE_HEADER; /* header is already checked */ 2311 2312 /* 2313 * We could check if we actually announced the capability but 2314 * as long as the message is correctly encoded we don't care. 2315 */ 2316 2317 /* afi, 2 byte */ 2318 memcpy(&afi, p, sizeof(afi)); 2319 afi = ntohs(afi); 2320 p += 2; 2321 /* subtype, 1 byte */ 2322 subtype = *p; 2323 p += 1; 2324 /* safi, 1 byte */ 2325 safi = *p; 2326 2327 /* check subtype if peer announced enhanced route refresh */ 2328 if (peer->capa.neg.enhanced_rr) { 2329 switch (subtype) { 2330 case ROUTE_REFRESH_REQUEST: 2331 /* no ORF support, so no oversized RREFRESH msgs */ 2332 if (datalen != MSGSIZE_RREFRESH) { 2333 log_peer_warnx(&peer->conf, 2334 "received RREFRESH: illegal len: %u byte", 2335 datalen); 2336 datalen = htons(datalen); 2337 session_notification(peer, ERR_HEADER, 2338 ERR_HDR_LEN, &datalen, sizeof(datalen)); 2339 bgp_fsm(peer, EVNT_CON_FATAL); 2340 return (-1); 2341 } 2342 peer->stats.refresh_rcvd_req++; 2343 break; 2344 case ROUTE_REFRESH_BEGIN_RR: 2345 case ROUTE_REFRESH_END_RR: 2346 /* special handling for RFC7313 */ 2347 if (datalen != MSGSIZE_RREFRESH) { 2348 log_peer_warnx(&peer->conf, 2349 "received RREFRESH: illegal len: %u byte", 2350 datalen); 2351 p = peer->rbuf->rptr; 2352 p += MSGSIZE_HEADER; 2353 datalen -= MSGSIZE_HEADER; 2354 session_notification(peer, ERR_RREFRESH, 2355 ERR_RR_INV_LEN, p, datalen); 2356 bgp_fsm(peer, EVNT_CON_FATAL); 2357 return (-1); 2358 } 2359 if (subtype == ROUTE_REFRESH_BEGIN_RR) 2360 peer->stats.refresh_rcvd_borr++; 2361 else 2362 peer->stats.refresh_rcvd_eorr++; 2363 break; 2364 default: 2365 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2366 "bad subtype %d", subtype); 2367 return (0); 2368 } 2369 } else { 2370 /* force subtype to default */ 2371 subtype = ROUTE_REFRESH_REQUEST; 2372 peer->stats.refresh_rcvd_req++; 2373 } 2374 2375 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2376 if (afi2aid(afi, safi, &aid) == -1) { 2377 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2378 "invalid afi/safi pair"); 2379 return (0); 2380 } 2381 2382 if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) { 2383 log_peer_warnx(&peer->conf, "peer sent unexpected refresh"); 2384 return (0); 2385 } 2386 2387 rr.aid = aid; 2388 rr.subtype = subtype; 2389 2390 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1) 2391 return (-1); 2392 2393 return (0); 2394 } 2395 2396 int 2397 parse_notification(struct peer *peer) 2398 { 2399 u_char *p; 2400 u_int16_t datalen; 2401 u_int8_t errcode; 2402 u_int8_t subcode; 2403 u_int8_t capa_code; 2404 u_int8_t capa_len; 2405 size_t reason_len; 2406 u_int8_t i; 2407 2408 /* just log */ 2409 p = peer->rbuf->rptr; 2410 p += MSGSIZE_HEADER_MARKER; 2411 memcpy(&datalen, p, sizeof(datalen)); 2412 datalen = ntohs(datalen); 2413 2414 p = peer->rbuf->rptr; 2415 p += MSGSIZE_HEADER; /* header is already checked */ 2416 datalen -= MSGSIZE_HEADER; 2417 2418 memcpy(&errcode, p, sizeof(errcode)); 2419 p += sizeof(errcode); 2420 datalen -= sizeof(errcode); 2421 2422 memcpy(&subcode, p, sizeof(subcode)); 2423 p += sizeof(subcode); 2424 datalen -= sizeof(subcode); 2425 2426 log_notification(peer, errcode, subcode, p, datalen, "received"); 2427 peer->errcnt++; 2428 peer->stats.last_rcvd_errcode = errcode; 2429 peer->stats.last_rcvd_suberr = subcode; 2430 2431 if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { 2432 if (datalen == 0) { /* zebra likes to send those.. humbug */ 2433 log_peer_warnx(&peer->conf, "received \"unsupported " 2434 "capability\" notification without data part, " 2435 "disabling capability announcements altogether"); 2436 session_capa_ann_none(peer); 2437 } 2438 2439 while (datalen > 0) { 2440 if (datalen < 2) { 2441 log_peer_warnx(&peer->conf, 2442 "parse_notification: " 2443 "expect len >= 2, len is %u", datalen); 2444 return (-1); 2445 } 2446 memcpy(&capa_code, p, sizeof(capa_code)); 2447 p += sizeof(capa_code); 2448 datalen -= sizeof(capa_code); 2449 memcpy(&capa_len, p, sizeof(capa_len)); 2450 p += sizeof(capa_len); 2451 datalen -= sizeof(capa_len); 2452 if (datalen < capa_len) { 2453 log_peer_warnx(&peer->conf, 2454 "parse_notification: capa_len %u exceeds " 2455 "remaining msg length %u", capa_len, 2456 datalen); 2457 return (-1); 2458 } 2459 p += capa_len; 2460 datalen -= capa_len; 2461 switch (capa_code) { 2462 case CAPA_MP: 2463 for (i = 0; i < AID_MAX; i++) 2464 peer->capa.ann.mp[i] = 0; 2465 log_peer_warnx(&peer->conf, 2466 "disabling multiprotocol capability"); 2467 break; 2468 case CAPA_REFRESH: 2469 peer->capa.ann.refresh = 0; 2470 log_peer_warnx(&peer->conf, 2471 "disabling route refresh capability"); 2472 break; 2473 case CAPA_RESTART: 2474 peer->capa.ann.grestart.restart = 0; 2475 log_peer_warnx(&peer->conf, 2476 "disabling restart capability"); 2477 break; 2478 case CAPA_AS4BYTE: 2479 peer->capa.ann.as4byte = 0; 2480 log_peer_warnx(&peer->conf, 2481 "disabling 4-byte AS num capability"); 2482 break; 2483 case CAPA_ADD_PATH: 2484 memset(peer->capa.ann.add_path, 0, 2485 sizeof(peer->capa.ann.add_path)); 2486 log_peer_warnx(&peer->conf, 2487 "disabling ADD-PATH capability"); 2488 break; 2489 case CAPA_ENHANCED_RR: 2490 peer->capa.ann.enhanced_rr = 0; 2491 log_peer_warnx(&peer->conf, 2492 "disabling enhanced route refresh " 2493 "capability"); 2494 break; 2495 default: /* should not happen... */ 2496 log_peer_warnx(&peer->conf, "received " 2497 "\"unsupported capability\" notification " 2498 "for unknown capability %u, disabling " 2499 "capability announcements altogether", 2500 capa_code); 2501 session_capa_ann_none(peer); 2502 break; 2503 } 2504 } 2505 2506 return (1); 2507 } 2508 2509 if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) { 2510 session_capa_ann_none(peer); 2511 return (1); 2512 } 2513 2514 if (errcode == ERR_CEASE && 2515 (subcode == ERR_CEASE_ADMIN_DOWN || 2516 subcode == ERR_CEASE_ADMIN_RESET)) { 2517 if (datalen > 1) { 2518 reason_len = *p++; 2519 datalen--; 2520 if (datalen < reason_len) { 2521 log_peer_warnx(&peer->conf, 2522 "received truncated shutdown reason"); 2523 return (0); 2524 } 2525 if (reason_len > REASON_LEN - 1) { 2526 log_peer_warnx(&peer->conf, 2527 "received overly long shutdown reason"); 2528 return (0); 2529 } 2530 memcpy(peer->stats.last_reason, p, reason_len); 2531 peer->stats.last_reason[reason_len] = '\0'; 2532 log_peer_warnx(&peer->conf, 2533 "received shutdown reason: \"%s\"", 2534 log_reason(peer->stats.last_reason)); 2535 p += reason_len; 2536 datalen -= reason_len; 2537 } 2538 } 2539 2540 return (0); 2541 } 2542 2543 int 2544 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) 2545 { 2546 u_char *capa_val; 2547 u_int32_t remote_as; 2548 u_int16_t len; 2549 u_int16_t afi; 2550 u_int16_t gr_header; 2551 u_int8_t safi; 2552 u_int8_t aid; 2553 u_int8_t flags; 2554 u_int8_t capa_code; 2555 u_int8_t capa_len; 2556 u_int8_t i; 2557 2558 len = dlen; 2559 while (len > 0) { 2560 if (len < 2) { 2561 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2562 "length: %u, too short", len); 2563 return (-1); 2564 } 2565 memcpy(&capa_code, d, sizeof(capa_code)); 2566 d += sizeof(capa_code); 2567 len -= sizeof(capa_code); 2568 memcpy(&capa_len, d, sizeof(capa_len)); 2569 d += sizeof(capa_len); 2570 len -= sizeof(capa_len); 2571 if (capa_len > 0) { 2572 if (len < capa_len) { 2573 log_peer_warnx(&peer->conf, 2574 "Bad capabilities attr length: " 2575 "len %u smaller than capa_len %u", 2576 len, capa_len); 2577 return (-1); 2578 } 2579 capa_val = d; 2580 d += capa_len; 2581 len -= capa_len; 2582 } else 2583 capa_val = NULL; 2584 2585 switch (capa_code) { 2586 case CAPA_MP: /* RFC 4760 */ 2587 if (capa_len != 4) { 2588 log_peer_warnx(&peer->conf, 2589 "Bad multi protocol capability length: " 2590 "%u", capa_len); 2591 break; 2592 } 2593 memcpy(&afi, capa_val, sizeof(afi)); 2594 afi = ntohs(afi); 2595 memcpy(&safi, capa_val + 3, sizeof(safi)); 2596 if (afi2aid(afi, safi, &aid) == -1) { 2597 log_peer_warnx(&peer->conf, 2598 "Received multi protocol capability: " 2599 " unknown AFI %u, safi %u pair", 2600 afi, safi); 2601 break; 2602 } 2603 peer->capa.peer.mp[aid] = 1; 2604 break; 2605 case CAPA_REFRESH: 2606 peer->capa.peer.refresh = 1; 2607 break; 2608 case CAPA_RESTART: 2609 if (capa_len == 2) { 2610 /* peer only supports EoR marker */ 2611 peer->capa.peer.grestart.restart = 1; 2612 peer->capa.peer.grestart.timeout = 0; 2613 break; 2614 } else if (capa_len % 4 != 2) { 2615 log_peer_warnx(&peer->conf, 2616 "Bad graceful restart capability length: " 2617 "%u", capa_len); 2618 peer->capa.peer.grestart.restart = 0; 2619 peer->capa.peer.grestart.timeout = 0; 2620 break; 2621 } 2622 2623 memcpy(&gr_header, capa_val, sizeof(gr_header)); 2624 gr_header = ntohs(gr_header); 2625 peer->capa.peer.grestart.timeout = 2626 gr_header & CAPA_GR_TIMEMASK; 2627 if (peer->capa.peer.grestart.timeout == 0) { 2628 log_peer_warnx(&peer->conf, "Received " 2629 "graceful restart timeout is zero"); 2630 peer->capa.peer.grestart.restart = 0; 2631 break; 2632 } 2633 2634 for (i = 2; i <= capa_len - 4; i += 4) { 2635 memcpy(&afi, capa_val + i, sizeof(afi)); 2636 afi = ntohs(afi); 2637 safi = capa_val[i + 2]; 2638 flags = capa_val[i + 3]; 2639 if (afi2aid(afi, safi, &aid) == -1) { 2640 log_peer_warnx(&peer->conf, 2641 "Received graceful restart capa: " 2642 " unknown AFI %u, safi %u pair", 2643 afi, safi); 2644 continue; 2645 } 2646 peer->capa.peer.grestart.flags[aid] |= 2647 CAPA_GR_PRESENT; 2648 if (flags & CAPA_GR_F_FLAG) 2649 peer->capa.peer.grestart.flags[aid] |= 2650 CAPA_GR_FORWARD; 2651 if (gr_header & CAPA_GR_R_FLAG) 2652 peer->capa.peer.grestart.flags[aid] |= 2653 CAPA_GR_RESTART; 2654 peer->capa.peer.grestart.restart = 2; 2655 } 2656 break; 2657 case CAPA_AS4BYTE: 2658 if (capa_len != 4) { 2659 log_peer_warnx(&peer->conf, 2660 "Bad AS4BYTE capability length: " 2661 "%u", capa_len); 2662 peer->capa.peer.as4byte = 0; 2663 break; 2664 } 2665 memcpy(&remote_as, capa_val, sizeof(remote_as)); 2666 *as = ntohl(remote_as); 2667 if (*as == 0) { 2668 log_peer_warnx(&peer->conf, 2669 "peer requests unacceptable AS %u", *as); 2670 session_notification(peer, ERR_OPEN, 2671 ERR_OPEN_AS, NULL, 0); 2672 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2673 return (-1); 2674 } 2675 peer->capa.peer.as4byte = 1; 2676 break; 2677 case CAPA_ADD_PATH: 2678 if (capa_len % 4 != 0) { 2679 log_peer_warnx(&peer->conf, 2680 "Bad ADD-PATH capability length: " 2681 "%u", capa_len); 2682 memset(peer->capa.peer.add_path, 0, 2683 sizeof(peer->capa.peer.add_path)); 2684 break; 2685 } 2686 for (i = 0; i <= capa_len - 4; i += 4) { 2687 memcpy(&afi, capa_val + i, sizeof(afi)); 2688 afi = ntohs(afi); 2689 safi = capa_val[i + 2]; 2690 flags = capa_val[i + 3]; 2691 if (afi2aid(afi, safi, &aid) == -1) { 2692 log_peer_warnx(&peer->conf, 2693 "Received ADD-PATH capa: " 2694 " unknown AFI %u, safi %u pair", 2695 afi, safi); 2696 memset(peer->capa.peer.add_path, 0, 2697 sizeof(peer->capa.peer.add_path)); 2698 break; 2699 } 2700 if (flags & ~CAPA_AP_BIDIR) { 2701 log_peer_warnx(&peer->conf, 2702 "Received ADD-PATH capa: " 2703 " bad flags %x", flags); 2704 memset(peer->capa.peer.add_path, 0, 2705 sizeof(peer->capa.peer.add_path)); 2706 break; 2707 } 2708 peer->capa.peer.add_path[aid] = flags; 2709 } 2710 break; 2711 case CAPA_ENHANCED_RR: 2712 peer->capa.peer.enhanced_rr = 1; 2713 break; 2714 default: 2715 break; 2716 } 2717 } 2718 2719 return (0); 2720 } 2721 2722 int 2723 capa_neg_calc(struct peer *p) 2724 { 2725 u_int8_t i, hasmp = 0; 2726 2727 /* a capability is accepted only if both sides announced it */ 2728 2729 p->capa.neg.refresh = 2730 (p->capa.ann.refresh && p->capa.peer.refresh) != 0; 2731 p->capa.neg.enhanced_rr = 2732 (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; 2733 2734 p->capa.neg.as4byte = 2735 (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; 2736 2737 /* MP: both side must agree on the AFI,SAFI pair */ 2738 for (i = 0; i < AID_MAX; i++) { 2739 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) 2740 p->capa.neg.mp[i] = 1; 2741 else 2742 p->capa.neg.mp[i] = 0; 2743 if (p->capa.ann.mp[i]) 2744 hasmp = 1; 2745 } 2746 /* if no MP capability present default to IPv4 unicast mode */ 2747 if (!hasmp) 2748 p->capa.neg.mp[AID_INET] = 1; 2749 2750 /* 2751 * graceful restart: the peer capabilities are of interest here. 2752 * It is necessary to compare the new values with the previous ones 2753 * and act acordingly. AFI/SAFI that are not part in the MP capability 2754 * are treated as not being present. 2755 * Also make sure that a flush happens if the session stopped 2756 * supporting graceful restart. 2757 */ 2758 2759 for (i = 0; i < AID_MAX; i++) { 2760 int8_t negflags; 2761 2762 /* disable GR if the AFI/SAFI is not present */ 2763 if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2764 p->capa.neg.mp[i] == 0)) 2765 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2766 /* look at current GR state and decide what to do */ 2767 negflags = p->capa.neg.grestart.flags[i]; 2768 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2769 if (negflags & CAPA_GR_RESTARTING) { 2770 if (p->capa.ann.grestart.restart != 0 && 2771 p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { 2772 p->capa.neg.grestart.flags[i] |= 2773 CAPA_GR_RESTARTING; 2774 } else { 2775 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 2776 &i, sizeof(i)) == -1) 2777 return (-1); 2778 log_peer_warnx(&p->conf, "graceful restart of " 2779 "%s, not restarted, flushing", aid2str(i)); 2780 } 2781 } 2782 } 2783 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2784 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2785 if (p->capa.ann.grestart.restart == 0) 2786 p->capa.neg.grestart.restart = 0; 2787 2788 2789 /* 2790 * ADD-PATH: set only those bits where both sides agree. 2791 * For this compare our send bit with the recv bit from the peer 2792 * and vice versa. 2793 * The flags are stored from this systems view point. 2794 */ 2795 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); 2796 if (p->capa.ann.add_path[0]) { 2797 for (i = AID_MIN; i < AID_MAX; i++) { 2798 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) && 2799 (p->capa.peer.add_path[i] & CAPA_AP_SEND)) { 2800 p->capa.neg.add_path[i] |= CAPA_AP_RECV; 2801 p->capa.neg.add_path[0] |= CAPA_AP_RECV; 2802 } 2803 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) && 2804 (p->capa.peer.add_path[i] & CAPA_AP_RECV)) { 2805 p->capa.neg.add_path[i] |= CAPA_AP_SEND; 2806 p->capa.neg.add_path[0] |= CAPA_AP_SEND; 2807 } 2808 } 2809 } 2810 2811 return (0); 2812 } 2813 2814 void 2815 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) 2816 { 2817 struct imsg imsg; 2818 struct mrt xmrt; 2819 struct route_refresh rr; 2820 struct mrt *mrt; 2821 struct imsgbuf *i; 2822 struct peer *p; 2823 struct listen_addr *la, *nla; 2824 struct kif *kif; 2825 u_char *data; 2826 int n, fd, depend_ok, restricted; 2827 u_int16_t t; 2828 u_int8_t aid, errcode, subcode; 2829 2830 while (ibuf) { 2831 if ((n = imsg_get(ibuf, &imsg)) == -1) 2832 fatal("session_dispatch_imsg: imsg_get error"); 2833 2834 if (n == 0) 2835 break; 2836 2837 switch (imsg.hdr.type) { 2838 case IMSG_SOCKET_CONN: 2839 case IMSG_SOCKET_CONN_CTL: 2840 if (idx != PFD_PIPE_MAIN) 2841 fatalx("reconf request not from parent"); 2842 if ((fd = imsg.fd) == -1) { 2843 log_warnx("expected to receive imsg fd to " 2844 "RDE but didn't receive any"); 2845 break; 2846 } 2847 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2848 fatal(NULL); 2849 imsg_init(i, fd); 2850 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 2851 if (ibuf_rde) { 2852 log_warnx("Unexpected imsg connection " 2853 "to RDE received"); 2854 msgbuf_clear(&ibuf_rde->w); 2855 free(ibuf_rde); 2856 } 2857 ibuf_rde = i; 2858 } else { 2859 if (ibuf_rde_ctl) { 2860 log_warnx("Unexpected imsg ctl " 2861 "connection to RDE received"); 2862 msgbuf_clear(&ibuf_rde_ctl->w); 2863 free(ibuf_rde_ctl); 2864 } 2865 ibuf_rde_ctl = i; 2866 } 2867 break; 2868 case IMSG_RECONF_CONF: 2869 if (idx != PFD_PIPE_MAIN) 2870 fatalx("reconf request not from parent"); 2871 nconf = new_config(); 2872 2873 copy_config(nconf, imsg.data); 2874 pending_reconf = 1; 2875 break; 2876 case IMSG_RECONF_PEER: 2877 if (idx != PFD_PIPE_MAIN) 2878 fatalx("reconf request not from parent"); 2879 if ((p = calloc(1, sizeof(struct peer))) == NULL) 2880 fatal("new_peer"); 2881 memcpy(&p->conf, imsg.data, sizeof(struct peer_config)); 2882 p->state = p->prev_state = STATE_NONE; 2883 p->reconf_action = RECONF_REINIT; 2884 if (RB_INSERT(peer_head, &nconf->peers, p) != NULL) 2885 fatalx("%s: peer tree is corrupt", __func__); 2886 break; 2887 case IMSG_RECONF_LISTENER: 2888 if (idx != PFD_PIPE_MAIN) 2889 fatalx("reconf request not from parent"); 2890 if (nconf == NULL) 2891 fatalx("IMSG_RECONF_LISTENER but no config"); 2892 nla = imsg.data; 2893 TAILQ_FOREACH(la, conf->listen_addrs, entry) 2894 if (!la_cmp(la, nla)) 2895 break; 2896 2897 if (la == NULL) { 2898 if (nla->reconf != RECONF_REINIT) 2899 fatalx("king bula sez: " 2900 "expected REINIT"); 2901 2902 if ((nla->fd = imsg.fd) == -1) 2903 log_warnx("expected to receive fd for " 2904 "%s but didn't receive any", 2905 log_sockaddr((struct sockaddr *) 2906 &nla->sa, nla->sa_len)); 2907 2908 la = calloc(1, sizeof(struct listen_addr)); 2909 if (la == NULL) 2910 fatal(NULL); 2911 memcpy(&la->sa, &nla->sa, sizeof(la->sa)); 2912 la->flags = nla->flags; 2913 la->fd = nla->fd; 2914 la->reconf = RECONF_REINIT; 2915 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 2916 entry); 2917 } else { 2918 if (nla->reconf != RECONF_KEEP) 2919 fatalx("king bula sez: expected KEEP"); 2920 la->reconf = RECONF_KEEP; 2921 } 2922 2923 break; 2924 case IMSG_RECONF_CTRL: 2925 if (idx != PFD_PIPE_MAIN) 2926 fatalx("reconf request not from parent"); 2927 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2928 sizeof(restricted)) 2929 fatalx("IFINFO imsg with wrong len"); 2930 memcpy(&restricted, imsg.data, sizeof(restricted)); 2931 if (imsg.fd == -1) { 2932 log_warnx("expected to receive fd for control " 2933 "socket but didn't receive any"); 2934 break; 2935 } 2936 if (restricted) { 2937 control_shutdown(rcsock); 2938 rcsock = imsg.fd; 2939 } else { 2940 control_shutdown(csock); 2941 csock = imsg.fd; 2942 } 2943 break; 2944 case IMSG_RECONF_DRAIN: 2945 switch (idx) { 2946 case PFD_PIPE_ROUTE: 2947 if (nconf != NULL) 2948 fatalx("got unexpected %s from RDE", 2949 "IMSG_RECONF_DONE"); 2950 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 2951 -1, NULL, 0); 2952 break; 2953 case PFD_PIPE_MAIN: 2954 if (nconf == NULL) 2955 fatalx("got unexpected %s from parent", 2956 "IMSG_RECONF_DONE"); 2957 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 2958 -1, NULL, 0); 2959 break; 2960 default: 2961 fatalx("reconf request not from parent or RDE"); 2962 } 2963 break; 2964 case IMSG_RECONF_DONE: 2965 if (idx != PFD_PIPE_MAIN) 2966 fatalx("reconf request not from parent"); 2967 if (nconf == NULL) 2968 fatalx("got IMSG_RECONF_DONE but no config"); 2969 copy_config(conf, nconf); 2970 merge_peers(conf, nconf); 2971 2972 /* delete old listeners */ 2973 for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; 2974 la = nla) { 2975 nla = TAILQ_NEXT(la, entry); 2976 if (la->reconf == RECONF_NONE) { 2977 log_info("not listening on %s any more", 2978 log_sockaddr((struct sockaddr *) 2979 &la->sa, la->sa_len)); 2980 TAILQ_REMOVE(conf->listen_addrs, la, 2981 entry); 2982 close(la->fd); 2983 free(la); 2984 } 2985 } 2986 2987 /* add new listeners */ 2988 TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs, 2989 entry); 2990 2991 setup_listeners(listener_cnt); 2992 free_config(nconf); 2993 nconf = NULL; 2994 pending_reconf = 0; 2995 log_info("SE reconfigured"); 2996 /* 2997 * IMSG_RECONF_DONE is sent when the RDE drained 2998 * the peer config sent in merge_peers(). 2999 */ 3000 break; 3001 case IMSG_IFINFO: 3002 if (idx != PFD_PIPE_MAIN) 3003 fatalx("IFINFO message not from parent"); 3004 if (imsg.hdr.len != IMSG_HEADER_SIZE + 3005 sizeof(struct kif)) 3006 fatalx("IFINFO imsg with wrong len"); 3007 kif = imsg.data; 3008 depend_ok = kif->depend_state; 3009 3010 RB_FOREACH(p, peer_head, &conf->peers) 3011 if (!strcmp(p->conf.if_depend, kif->ifname)) { 3012 if (depend_ok && !p->depend_ok) { 3013 p->depend_ok = depend_ok; 3014 bgp_fsm(p, EVNT_START); 3015 } else if (!depend_ok && p->depend_ok) { 3016 p->depend_ok = depend_ok; 3017 session_stop(p, 3018 ERR_CEASE_OTHER_CHANGE); 3019 } 3020 } 3021 break; 3022 case IMSG_MRT_OPEN: 3023 case IMSG_MRT_REOPEN: 3024 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3025 sizeof(struct mrt)) { 3026 log_warnx("wrong imsg len"); 3027 break; 3028 } 3029 3030 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 3031 if ((xmrt.wbuf.fd = imsg.fd) == -1) 3032 log_warnx("expected to receive fd for mrt dump " 3033 "but didn't receive any"); 3034 3035 mrt = mrt_get(&mrthead, &xmrt); 3036 if (mrt == NULL) { 3037 /* new dump */ 3038 mrt = calloc(1, sizeof(struct mrt)); 3039 if (mrt == NULL) 3040 fatal("session_dispatch_imsg"); 3041 memcpy(mrt, &xmrt, sizeof(struct mrt)); 3042 TAILQ_INIT(&mrt->wbuf.bufs); 3043 LIST_INSERT_HEAD(&mrthead, mrt, entry); 3044 } else { 3045 /* old dump reopened */ 3046 close(mrt->wbuf.fd); 3047 mrt->wbuf.fd = xmrt.wbuf.fd; 3048 } 3049 break; 3050 case IMSG_MRT_CLOSE: 3051 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3052 sizeof(struct mrt)) { 3053 log_warnx("wrong imsg len"); 3054 break; 3055 } 3056 3057 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 3058 mrt = mrt_get(&mrthead, &xmrt); 3059 if (mrt != NULL) 3060 mrt_done(mrt); 3061 break; 3062 case IMSG_CTL_KROUTE: 3063 case IMSG_CTL_KROUTE_ADDR: 3064 case IMSG_CTL_SHOW_NEXTHOP: 3065 case IMSG_CTL_SHOW_INTERFACE: 3066 case IMSG_CTL_SHOW_FIB_TABLES: 3067 case IMSG_CTL_SHOW_RTR: 3068 case IMSG_CTL_SHOW_TIMER: 3069 if (idx != PFD_PIPE_MAIN) 3070 fatalx("ctl kroute request not from parent"); 3071 control_imsg_relay(&imsg); 3072 break; 3073 case IMSG_CTL_SHOW_RIB: 3074 case IMSG_CTL_SHOW_RIB_PREFIX: 3075 case IMSG_CTL_SHOW_RIB_COMMUNITIES: 3076 case IMSG_CTL_SHOW_RIB_ATTR: 3077 case IMSG_CTL_SHOW_RIB_MEM: 3078 case IMSG_CTL_SHOW_RIB_HASH: 3079 case IMSG_CTL_SHOW_NETWORK: 3080 case IMSG_CTL_SHOW_NEIGHBOR: 3081 case IMSG_CTL_SHOW_SET: 3082 if (idx != PFD_PIPE_ROUTE_CTL) 3083 fatalx("ctl rib request not from RDE"); 3084 control_imsg_relay(&imsg); 3085 break; 3086 case IMSG_CTL_END: 3087 case IMSG_CTL_RESULT: 3088 control_imsg_relay(&imsg); 3089 break; 3090 case IMSG_UPDATE: 3091 if (idx != PFD_PIPE_ROUTE) 3092 fatalx("update request not from RDE"); 3093 if (imsg.hdr.len > IMSG_HEADER_SIZE + 3094 MAX_PKTSIZE - MSGSIZE_HEADER || 3095 imsg.hdr.len < IMSG_HEADER_SIZE + 3096 MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 3097 log_warnx("RDE sent invalid update"); 3098 else 3099 session_update(imsg.hdr.peerid, imsg.data, 3100 imsg.hdr.len - IMSG_HEADER_SIZE); 3101 break; 3102 case IMSG_UPDATE_ERR: 3103 if (idx != PFD_PIPE_ROUTE) 3104 fatalx("update request not from RDE"); 3105 if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) { 3106 log_warnx("RDE sent invalid notification"); 3107 break; 3108 } 3109 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3110 log_warnx("no such peer: id=%u", 3111 imsg.hdr.peerid); 3112 break; 3113 } 3114 data = imsg.data; 3115 errcode = *data++; 3116 subcode = *data++; 3117 3118 if (imsg.hdr.len == IMSG_HEADER_SIZE + 2) 3119 data = NULL; 3120 3121 session_notification(p, errcode, subcode, 3122 data, imsg.hdr.len - IMSG_HEADER_SIZE - 2); 3123 switch (errcode) { 3124 case ERR_CEASE: 3125 switch (subcode) { 3126 case ERR_CEASE_MAX_PREFIX: 3127 case ERR_CEASE_MAX_SENT_PREFIX: 3128 t = p->conf.max_out_prefix_restart; 3129 if (subcode == ERR_CEASE_MAX_PREFIX) 3130 t = p->conf.max_prefix_restart; 3131 3132 bgp_fsm(p, EVNT_STOP); 3133 if (t) 3134 timer_set(&p->timers, 3135 Timer_IdleHold, 60 * t); 3136 break; 3137 default: 3138 bgp_fsm(p, EVNT_CON_FATAL); 3139 break; 3140 } 3141 break; 3142 default: 3143 bgp_fsm(p, EVNT_CON_FATAL); 3144 break; 3145 } 3146 break; 3147 case IMSG_REFRESH: 3148 if (idx != PFD_PIPE_ROUTE) 3149 fatalx("route refresh request not from RDE"); 3150 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(rr)) { 3151 log_warnx("RDE sent invalid refresh msg"); 3152 break; 3153 } 3154 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3155 log_warnx("no such peer: id=%u", 3156 imsg.hdr.peerid); 3157 break; 3158 } 3159 memcpy(&rr, imsg.data, sizeof(rr)); 3160 if (rr.aid >= AID_MAX) 3161 fatalx("IMSG_REFRESH: bad AID"); 3162 session_rrefresh(p, rr.aid, rr.subtype); 3163 break; 3164 case IMSG_SESSION_RESTARTED: 3165 if (idx != PFD_PIPE_ROUTE) 3166 fatalx("update request not from RDE"); 3167 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { 3168 log_warnx("RDE sent invalid restart msg"); 3169 break; 3170 } 3171 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3172 log_warnx("no such peer: id=%u", 3173 imsg.hdr.peerid); 3174 break; 3175 } 3176 memcpy(&aid, imsg.data, sizeof(aid)); 3177 if (aid >= AID_MAX) 3178 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 3179 if (p->capa.neg.grestart.flags[aid] & 3180 CAPA_GR_RESTARTING) { 3181 log_peer_warnx(&p->conf, 3182 "graceful restart of %s finished", 3183 aid2str(aid)); 3184 p->capa.neg.grestart.flags[aid] &= 3185 ~CAPA_GR_RESTARTING; 3186 timer_stop(&p->timers, Timer_RestartTimeout); 3187 3188 /* signal back to RDE to cleanup stale routes */ 3189 if (imsg_rde(IMSG_SESSION_RESTARTED, 3190 imsg.hdr.peerid, &aid, sizeof(aid)) == -1) 3191 fatal("imsg_compose: " 3192 "IMSG_SESSION_RESTARTED"); 3193 } 3194 break; 3195 case IMSG_SESSION_DOWN: 3196 if (idx != PFD_PIPE_ROUTE) 3197 fatalx("update request not from RDE"); 3198 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 3199 log_warnx("no such peer: id=%u", 3200 imsg.hdr.peerid); 3201 break; 3202 } 3203 session_stop(p, ERR_CEASE_ADMIN_DOWN); 3204 break; 3205 default: 3206 break; 3207 } 3208 imsg_free(&imsg); 3209 } 3210 } 3211 3212 int 3213 la_cmp(struct listen_addr *a, struct listen_addr *b) 3214 { 3215 struct sockaddr_in *in_a, *in_b; 3216 struct sockaddr_in6 *in6_a, *in6_b; 3217 3218 if (a->sa.ss_family != b->sa.ss_family) 3219 return (1); 3220 3221 switch (a->sa.ss_family) { 3222 case AF_INET: 3223 in_a = (struct sockaddr_in *)&a->sa; 3224 in_b = (struct sockaddr_in *)&b->sa; 3225 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 3226 return (1); 3227 if (in_a->sin_port != in_b->sin_port) 3228 return (1); 3229 break; 3230 case AF_INET6: 3231 in6_a = (struct sockaddr_in6 *)&a->sa; 3232 in6_b = (struct sockaddr_in6 *)&b->sa; 3233 if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 3234 sizeof(struct in6_addr))) 3235 return (1); 3236 if (in6_a->sin6_port != in6_b->sin6_port) 3237 return (1); 3238 break; 3239 default: 3240 fatal("king bula sez: unknown address family"); 3241 /* NOTREACHED */ 3242 } 3243 3244 return (0); 3245 } 3246 3247 struct peer * 3248 getpeerbydesc(struct bgpd_config *c, const char *descr) 3249 { 3250 struct peer *p, *res = NULL; 3251 int match = 0; 3252 3253 RB_FOREACH(p, peer_head, &c->peers) 3254 if (!strcmp(p->conf.descr, descr)) { 3255 res = p; 3256 match++; 3257 } 3258 3259 if (match > 1) 3260 log_info("neighbor description \"%s\" not unique, request " 3261 "aborted", descr); 3262 3263 if (match == 1) 3264 return (res); 3265 else 3266 return (NULL); 3267 } 3268 3269 struct peer * 3270 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip) 3271 { 3272 struct bgpd_addr addr; 3273 struct peer *p, *newpeer, *loose = NULL; 3274 u_int32_t id; 3275 3276 sa2addr(ip, &addr, NULL); 3277 3278 /* we might want a more effective way to find peers by IP */ 3279 RB_FOREACH(p, peer_head, &c->peers) 3280 if (!p->conf.template && 3281 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3282 return (p); 3283 3284 /* try template matching */ 3285 RB_FOREACH(p, peer_head, &c->peers) 3286 if (p->conf.template && 3287 p->conf.remote_addr.aid == addr.aid && 3288 session_match_mask(p, &addr)) 3289 if (loose == NULL || loose->conf.remote_masklen < 3290 p->conf.remote_masklen) 3291 loose = p; 3292 3293 if (loose != NULL) { 3294 /* clone */ 3295 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3296 fatal(NULL); 3297 memcpy(newpeer, loose, sizeof(struct peer)); 3298 for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) { 3299 if (getpeerbyid(c, id) == NULL) /* we found a free id */ 3300 break; 3301 } 3302 newpeer->template = loose; 3303 session_template_clone(newpeer, ip, id, 0); 3304 newpeer->state = newpeer->prev_state = STATE_NONE; 3305 newpeer->reconf_action = RECONF_KEEP; 3306 newpeer->rbuf = NULL; 3307 newpeer->rpending = 0; 3308 init_peer(newpeer); 3309 bgp_fsm(newpeer, EVNT_START); 3310 if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL) 3311 fatalx("%s: peer tree is corrupt", __func__); 3312 return (newpeer); 3313 } 3314 3315 return (NULL); 3316 } 3317 3318 struct peer * 3319 getpeerbyid(struct bgpd_config *c, u_int32_t peerid) 3320 { 3321 static struct peer lookup; 3322 3323 lookup.conf.id = peerid; 3324 3325 return RB_FIND(peer_head, &c->peers, &lookup); 3326 } 3327 3328 int 3329 peer_matched(struct peer *p, struct ctl_neighbor *n) 3330 { 3331 char *s; 3332 3333 if (n && n->addr.aid) { 3334 if (memcmp(&p->conf.remote_addr, &n->addr, 3335 sizeof(p->conf.remote_addr))) 3336 return 0; 3337 } else if (n && n->descr[0]) { 3338 s = n->is_group ? p->conf.group : p->conf.descr; 3339 if (strcmp(s, n->descr)) 3340 return 0; 3341 } 3342 return 1; 3343 } 3344 3345 void 3346 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id, 3347 u_int32_t as) 3348 { 3349 struct bgpd_addr remote_addr; 3350 3351 if (ip) 3352 sa2addr(ip, &remote_addr, NULL); 3353 else 3354 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3355 3356 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3357 3358 p->conf.id = id; 3359 3360 if (as) { 3361 p->conf.remote_as = as; 3362 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as); 3363 if (!p->conf.ebgp) 3364 /* force enforce_as off for iBGP sessions */ 3365 p->conf.enforce_as = ENFORCE_AS_OFF; 3366 } 3367 3368 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3369 switch (p->conf.remote_addr.aid) { 3370 case AID_INET: 3371 p->conf.remote_masklen = 32; 3372 break; 3373 case AID_INET6: 3374 p->conf.remote_masklen = 128; 3375 break; 3376 } 3377 p->conf.template = 0; 3378 } 3379 3380 int 3381 session_match_mask(struct peer *p, struct bgpd_addr *a) 3382 { 3383 struct in_addr v4masked; 3384 struct in6_addr v6masked; 3385 3386 switch (p->conf.remote_addr.aid) { 3387 case AID_INET: 3388 inet4applymask(&v4masked, &a->v4, p->conf.remote_masklen); 3389 if (p->conf.remote_addr.v4.s_addr == v4masked.s_addr) 3390 return (1); 3391 return (0); 3392 case AID_INET6: 3393 inet6applymask(&v6masked, &a->v6, p->conf.remote_masklen); 3394 3395 if (memcmp(&v6masked, &p->conf.remote_addr.v6, 3396 sizeof(v6masked)) == 0) 3397 return (1); 3398 return (0); 3399 } 3400 return (0); 3401 } 3402 3403 void 3404 session_down(struct peer *peer) 3405 { 3406 bzero(&peer->capa.neg, sizeof(peer->capa.neg)); 3407 peer->stats.last_updown = getmonotime(); 3408 /* 3409 * session_down is called in the exit code path so check 3410 * if the RDE is still around, if not there is no need to 3411 * send the message. 3412 */ 3413 if (ibuf_rde == NULL) 3414 return; 3415 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1) 3416 fatalx("imsg_compose error"); 3417 } 3418 3419 void 3420 session_up(struct peer *p) 3421 { 3422 struct session_up sup; 3423 3424 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3425 &p->conf, sizeof(p->conf)) == -1) 3426 fatalx("imsg_compose error"); 3427 3428 if (p->local.aid == AID_INET) { 3429 sup.local_v4_addr = p->local; 3430 sup.local_v6_addr = p->local_alt; 3431 } else { 3432 sup.local_v6_addr = p->local; 3433 sup.local_v4_addr = p->local_alt; 3434 } 3435 sup.remote_addr = p->remote; 3436 3437 sup.remote_bgpid = p->remote_bgpid; 3438 sup.short_as = p->short_as; 3439 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3440 p->stats.last_updown = getmonotime(); 3441 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1) 3442 fatalx("imsg_compose error"); 3443 } 3444 3445 int 3446 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data, 3447 u_int16_t datalen) 3448 { 3449 return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen)); 3450 } 3451 3452 int 3453 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen) 3454 { 3455 if (ibuf_rde_ctl == NULL) { 3456 log_warnx("Can't send message %u to RDE, ctl pipe closed", 3457 type); 3458 return (0); 3459 } 3460 /* 3461 * Use control socket to talk to RDE to bypass the queue of the 3462 * regular imsg socket. 3463 */ 3464 return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen)); 3465 } 3466 3467 int 3468 imsg_rde(int type, uint32_t peerid, void *data, u_int16_t datalen) 3469 { 3470 if (ibuf_rde == NULL) { 3471 log_warnx("Can't send message %u to RDE, pipe closed", type); 3472 return (0); 3473 } 3474 3475 return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen)); 3476 } 3477 3478 void 3479 session_demote(struct peer *p, int level) 3480 { 3481 struct demote_msg msg; 3482 3483 strlcpy(msg.demote_group, p->conf.demote_group, 3484 sizeof(msg.demote_group)); 3485 msg.level = level; 3486 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3487 &msg, sizeof(msg)) == -1) 3488 fatalx("imsg_compose error"); 3489 3490 p->demoted += level; 3491 } 3492 3493 void 3494 session_stop(struct peer *peer, u_int8_t subcode) 3495 { 3496 char data[REASON_LEN]; 3497 size_t datalen; 3498 size_t reason_len; 3499 char *communication; 3500 3501 datalen = 0; 3502 communication = peer->conf.reason; 3503 3504 if ((subcode == ERR_CEASE_ADMIN_DOWN || 3505 subcode == ERR_CEASE_ADMIN_RESET) 3506 && communication && *communication) { 3507 reason_len = strlen(communication); 3508 if (reason_len > REASON_LEN - 1) { 3509 log_peer_warnx(&peer->conf, 3510 "trying to send overly long shutdown reason"); 3511 } else { 3512 data[0] = reason_len; 3513 datalen = reason_len + sizeof(data[0]); 3514 memcpy(data + 1, communication, reason_len); 3515 } 3516 } 3517 switch (peer->state) { 3518 case STATE_OPENSENT: 3519 case STATE_OPENCONFIRM: 3520 case STATE_ESTABLISHED: 3521 session_notification(peer, ERR_CEASE, subcode, data, datalen); 3522 break; 3523 default: 3524 /* session not open, no need to send notification */ 3525 break; 3526 } 3527 bgp_fsm(peer, EVNT_STOP); 3528 } 3529 3530 void 3531 merge_peers(struct bgpd_config *c, struct bgpd_config *nc) 3532 { 3533 struct peer *p, *np, *next; 3534 3535 RB_FOREACH(p, peer_head, &c->peers) { 3536 /* templates are handled specially */ 3537 if (p->template != NULL) 3538 continue; 3539 np = getpeerbyid(nc, p->conf.id); 3540 if (np == NULL) { 3541 p->reconf_action = RECONF_DELETE; 3542 continue; 3543 } 3544 3545 /* peer no longer uses TCP MD5SIG so deconfigure */ 3546 if (p->conf.auth.method == AUTH_MD5SIG && 3547 np->conf.auth.method != AUTH_MD5SIG) 3548 tcp_md5_del_listener(c, p); 3549 else if (np->conf.auth.method == AUTH_MD5SIG) 3550 tcp_md5_add_listener(c, np); 3551 3552 memcpy(&p->conf, &np->conf, sizeof(p->conf)); 3553 RB_REMOVE(peer_head, &nc->peers, np); 3554 free(np); 3555 3556 p->reconf_action = RECONF_KEEP; 3557 3558 /* had demotion, is demoted, demote removed? */ 3559 if (p->demoted && !p->conf.demote_group[0]) 3560 session_demote(p, -1); 3561 3562 /* if session is not open then refresh pfkey data */ 3563 if (p->state < STATE_OPENSENT && !p->template) 3564 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 3565 p->conf.id, 0, -1, NULL, 0); 3566 3567 /* sync the RDE in case we keep the peer */ 3568 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3569 &p->conf, sizeof(struct peer_config)) == -1) 3570 fatalx("imsg_compose error"); 3571 3572 /* apply the config to all clones of a template */ 3573 if (p->conf.template) { 3574 struct peer *xp; 3575 RB_FOREACH(xp, peer_head, &c->peers) { 3576 if (xp->template != p) 3577 continue; 3578 session_template_clone(xp, NULL, xp->conf.id, 3579 xp->conf.remote_as); 3580 if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id, 3581 &xp->conf, sizeof(xp->conf)) == -1) 3582 fatalx("imsg_compose error"); 3583 } 3584 } 3585 } 3586 3587 if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1) 3588 fatalx("imsg_compose error"); 3589 3590 /* pfkeys of new peers already loaded by the parent process */ 3591 RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) { 3592 RB_REMOVE(peer_head, &nc->peers, np); 3593 if (RB_INSERT(peer_head, &c->peers, np) != NULL) 3594 fatalx("%s: peer tree is corrupt", __func__); 3595 if (np->conf.auth.method == AUTH_MD5SIG) 3596 tcp_md5_add_listener(c, np); 3597 } 3598 } 3599