1 /* $OpenBSD: session.c,v 1.386 2019/06/22 05:36:40 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 22 #include <sys/mman.h> 23 #include <sys/socket.h> 24 #include <sys/time.h> 25 #include <sys/resource.h> 26 #include <sys/un.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <poll.h> 37 #include <pwd.h> 38 #include <signal.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <syslog.h> 43 #include <unistd.h> 44 45 #include "bgpd.h" 46 #include "mrt.h" 47 #include "session.h" 48 #include "log.h" 49 50 #define PFD_PIPE_MAIN 0 51 #define PFD_PIPE_ROUTE 1 52 #define PFD_PIPE_ROUTE_CTL 2 53 #define PFD_SOCK_CTL 3 54 #define PFD_SOCK_RCTL 4 55 #define PFD_LISTENERS_START 5 56 57 void session_sighdlr(int); 58 int setup_listeners(u_int *); 59 void init_peer(struct peer *); 60 void start_timer_holdtime(struct peer *); 61 void start_timer_keepalive(struct peer *); 62 void session_close_connection(struct peer *); 63 void change_state(struct peer *, enum session_state, enum session_events); 64 int session_setup_socket(struct peer *); 65 void session_accept(int); 66 int session_connect(struct peer *); 67 void session_tcp_established(struct peer *); 68 void session_capa_ann_none(struct peer *); 69 int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); 70 int session_capa_add_mp(struct ibuf *, u_int8_t); 71 int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); 72 struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); 73 int session_sendmsg(struct bgp_msg *, struct peer *); 74 void session_open(struct peer *); 75 void session_keepalive(struct peer *); 76 void session_update(u_int32_t, void *, size_t); 77 void session_notification(struct peer *, u_int8_t, u_int8_t, void *, 78 ssize_t); 79 void session_rrefresh(struct peer *, u_int8_t); 80 int session_graceful_restart(struct peer *); 81 int session_graceful_stop(struct peer *); 82 int session_dispatch_msg(struct pollfd *, struct peer *); 83 void session_process_msg(struct peer *); 84 int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); 85 int parse_open(struct peer *); 86 int parse_update(struct peer *); 87 int parse_refresh(struct peer *); 88 int parse_notification(struct peer *); 89 int parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *); 90 int capa_neg_calc(struct peer *); 91 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 92 void session_up(struct peer *); 93 void session_down(struct peer *); 94 int imsg_rde(int, u_int32_t, void *, u_int16_t); 95 void session_demote(struct peer *, int); 96 void merge_peers(struct bgpd_config *, struct bgpd_config *); 97 98 int la_cmp(struct listen_addr *, struct listen_addr *); 99 void session_template_clone(struct peer *, struct sockaddr *, 100 u_int32_t, u_int32_t); 101 int session_match_mask(struct peer *, struct bgpd_addr *); 102 103 struct bgpd_config *conf, *nconf; 104 struct bgpd_sysdep sysdep; 105 volatile sig_atomic_t session_quit; 106 int pending_reconf; 107 int csock = -1, rcsock = -1; 108 u_int peer_cnt; 109 struct imsgbuf *ibuf_rde; 110 struct imsgbuf *ibuf_rde_ctl; 111 struct imsgbuf *ibuf_main; 112 113 struct mrt_head mrthead; 114 time_t pauseaccept; 115 116 static inline int 117 peer_compare(const struct peer *a, const struct peer *b) 118 { 119 return a->conf.id - b->conf.id; 120 } 121 122 RB_GENERATE(peer_head, peer, entry, peer_compare); 123 124 void 125 session_sighdlr(int sig) 126 { 127 switch (sig) { 128 case SIGINT: 129 case SIGTERM: 130 session_quit = 1; 131 break; 132 } 133 } 134 135 int 136 setup_listeners(u_int *la_cnt) 137 { 138 int ttl = 255; 139 struct listen_addr *la; 140 u_int cnt = 0; 141 142 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 143 la->reconf = RECONF_NONE; 144 cnt++; 145 146 if (la->flags & LISTENER_LISTENING) 147 continue; 148 149 if (la->fd == -1) { 150 log_warn("cannot establish listener on %s: invalid fd", 151 log_sockaddr((struct sockaddr *)&la->sa, 152 la->sa_len)); 153 continue; 154 } 155 156 if (tcp_md5_listen(la->fd, &conf->peers) == -1) 157 fatal("tcp_md5_listen"); 158 159 /* set ttl to 255 so that ttl-security works */ 160 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 161 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 162 log_warn("setup_listeners setsockopt TTL"); 163 continue; 164 } 165 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 166 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 167 log_warn("setup_listeners setsockopt hoplimit"); 168 continue; 169 } 170 171 if (listen(la->fd, MAX_BACKLOG)) { 172 close(la->fd); 173 fatal("listen"); 174 } 175 176 la->flags |= LISTENER_LISTENING; 177 178 log_info("listening on %s", 179 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len)); 180 } 181 182 *la_cnt = cnt; 183 184 return (0); 185 } 186 187 void 188 session_main(int debug, int verbose) 189 { 190 int timeout; 191 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 192 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 193 u_int listener_cnt, ctl_cnt, mrt_cnt; 194 u_int new_cnt; 195 struct passwd *pw; 196 struct peer *p, **peer_l = NULL, *next; 197 struct mrt *m, *xm, **mrt_l = NULL; 198 struct pollfd *pfd = NULL; 199 struct ctl_conn *ctl_conn; 200 struct listen_addr *la; 201 void *newp; 202 time_t now; 203 short events; 204 205 log_init(debug, LOG_DAEMON); 206 log_setverbose(verbose); 207 208 bgpd_process = PROC_SE; 209 log_procinit(log_procnames[bgpd_process]); 210 211 if ((pw = getpwnam(BGPD_USER)) == NULL) 212 fatal(NULL); 213 214 if (chroot(pw->pw_dir) == -1) 215 fatal("chroot"); 216 if (chdir("/") == -1) 217 fatal("chdir(\"/\")"); 218 219 setproctitle("session engine"); 220 221 if (setgroups(1, &pw->pw_gid) || 222 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 223 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 224 fatal("can't drop privileges"); 225 226 if (pledge("stdio inet recvfd", NULL) == -1) 227 fatal("pledge"); 228 229 signal(SIGTERM, session_sighdlr); 230 signal(SIGINT, session_sighdlr); 231 signal(SIGPIPE, SIG_IGN); 232 signal(SIGHUP, SIG_IGN); 233 signal(SIGALRM, SIG_IGN); 234 signal(SIGUSR1, SIG_IGN); 235 236 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 237 fatal(NULL); 238 imsg_init(ibuf_main, 3); 239 240 TAILQ_INIT(&ctl_conns); 241 LIST_INIT(&mrthead); 242 listener_cnt = 0; 243 peer_cnt = 0; 244 ctl_cnt = 0; 245 246 conf = new_config(); 247 log_info("session engine ready"); 248 249 while (session_quit == 0) { 250 /* check for peers to be initialized or deleted */ 251 if (!pending_reconf) { 252 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 253 /* cloned peer that idled out? */ 254 if (p->template && (p->state == STATE_IDLE || 255 p->state == STATE_ACTIVE) && 256 time(NULL) - p->stats.last_updown >= 257 INTERVAL_HOLD_CLONED) 258 p->reconf_action = RECONF_DELETE; 259 260 /* new peer that needs init? */ 261 if (p->state == STATE_NONE) 262 init_peer(p); 263 264 /* reinit due? */ 265 if (p->reconf_action == RECONF_REINIT) { 266 session_stop(p, ERR_CEASE_ADMIN_RESET); 267 if (!p->conf.down) 268 timer_set(p, Timer_IdleHold, 0); 269 } 270 271 /* deletion due? */ 272 if (p->reconf_action == RECONF_DELETE) { 273 if (p->demoted) 274 session_demote(p, -1); 275 p->conf.demote_group[0] = 0; 276 session_stop(p, ERR_CEASE_PEER_UNCONF); 277 log_peer_warnx(&p->conf, "removed"); 278 RB_REMOVE(peer_head, &conf->peers, p); 279 timer_remove_all(p); 280 pfkey_remove(p); 281 free(p); 282 peer_cnt--; 283 continue; 284 } 285 p->reconf_action = RECONF_NONE; 286 } 287 } 288 289 if (peer_cnt > peer_l_elms) { 290 if ((newp = reallocarray(peer_l, peer_cnt, 291 sizeof(struct peer *))) == NULL) { 292 /* panic for now */ 293 log_warn("could not resize peer_l from %u -> %u" 294 " entries", peer_l_elms, peer_cnt); 295 fatalx("exiting"); 296 } 297 peer_l = newp; 298 peer_l_elms = peer_cnt; 299 } 300 301 mrt_cnt = 0; 302 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 303 xm = LIST_NEXT(m, entry); 304 if (m->state == MRT_STATE_REMOVE) { 305 mrt_clean(m); 306 LIST_REMOVE(m, entry); 307 free(m); 308 continue; 309 } 310 if (m->wbuf.queued) 311 mrt_cnt++; 312 } 313 314 if (mrt_cnt > mrt_l_elms) { 315 if ((newp = reallocarray(mrt_l, mrt_cnt, 316 sizeof(struct mrt *))) == NULL) { 317 /* panic for now */ 318 log_warn("could not resize mrt_l from %u -> %u" 319 " entries", mrt_l_elms, mrt_cnt); 320 fatalx("exiting"); 321 } 322 mrt_l = newp; 323 mrt_l_elms = mrt_cnt; 324 } 325 326 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 327 ctl_cnt + mrt_cnt; 328 if (new_cnt > pfd_elms) { 329 if ((newp = reallocarray(pfd, new_cnt, 330 sizeof(struct pollfd))) == NULL) { 331 /* panic for now */ 332 log_warn("could not resize pfd from %u -> %u" 333 " entries", pfd_elms, new_cnt); 334 fatalx("exiting"); 335 } 336 pfd = newp; 337 pfd_elms = new_cnt; 338 } 339 340 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 341 342 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 343 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 344 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 345 346 if (pauseaccept == 0) { 347 pfd[PFD_SOCK_CTL].fd = csock; 348 pfd[PFD_SOCK_CTL].events = POLLIN; 349 pfd[PFD_SOCK_RCTL].fd = rcsock; 350 pfd[PFD_SOCK_RCTL].events = POLLIN; 351 } else { 352 pfd[PFD_SOCK_CTL].fd = -1; 353 pfd[PFD_SOCK_RCTL].fd = -1; 354 } 355 356 i = PFD_LISTENERS_START; 357 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 358 if (pauseaccept == 0) { 359 pfd[i].fd = la->fd; 360 pfd[i].events = POLLIN; 361 } else 362 pfd[i].fd = -1; 363 i++; 364 } 365 idx_listeners = i; 366 timeout = 240; /* loop every 240s at least */ 367 368 now = getmonotime(); 369 RB_FOREACH(p, peer_head, &conf->peers) { 370 time_t nextaction; 371 struct peer_timer *pt; 372 373 /* check timers */ 374 if ((pt = timer_nextisdue(p, now)) != NULL) { 375 switch (pt->type) { 376 case Timer_Hold: 377 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 378 break; 379 case Timer_ConnectRetry: 380 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 381 break; 382 case Timer_Keepalive: 383 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 384 break; 385 case Timer_IdleHold: 386 bgp_fsm(p, EVNT_START); 387 break; 388 case Timer_IdleHoldReset: 389 p->IdleHoldTime /= 2; 390 if (p->IdleHoldTime <= 391 INTERVAL_IDLE_HOLD_INITIAL) { 392 p->IdleHoldTime = 393 INTERVAL_IDLE_HOLD_INITIAL; 394 timer_stop(p, 395 Timer_IdleHoldReset); 396 p->errcnt = 0; 397 } else 398 timer_set(p, 399 Timer_IdleHoldReset, 400 p->IdleHoldTime); 401 break; 402 case Timer_CarpUndemote: 403 timer_stop(p, Timer_CarpUndemote); 404 if (p->demoted && 405 p->state == STATE_ESTABLISHED) 406 session_demote(p, -1); 407 break; 408 case Timer_RestartTimeout: 409 timer_stop(p, Timer_RestartTimeout); 410 session_graceful_stop(p); 411 break; 412 default: 413 fatalx("King Bula lost in time"); 414 } 415 } 416 if ((nextaction = timer_nextduein(p, now)) != -1 && 417 nextaction < timeout) 418 timeout = nextaction; 419 420 /* are we waiting for a write? */ 421 events = POLLIN; 422 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 423 events |= POLLOUT; 424 /* is there still work to do? */ 425 if (p->rpending) 426 timeout = 0; 427 428 /* poll events */ 429 if (p->fd != -1 && events != 0) { 430 pfd[i].fd = p->fd; 431 pfd[i].events = events; 432 peer_l[i - idx_listeners] = p; 433 i++; 434 } 435 } 436 437 idx_peers = i; 438 439 LIST_FOREACH(m, &mrthead, entry) 440 if (m->wbuf.queued) { 441 pfd[i].fd = m->wbuf.fd; 442 pfd[i].events = POLLOUT; 443 mrt_l[i - idx_peers] = m; 444 i++; 445 } 446 447 idx_mrts = i; 448 449 TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) { 450 pfd[i].fd = ctl_conn->ibuf.fd; 451 pfd[i].events = POLLIN; 452 if (ctl_conn->ibuf.w.queued > 0) 453 pfd[i].events |= POLLOUT; 454 i++; 455 } 456 457 if (pauseaccept && timeout > 1) 458 timeout = 1; 459 if (timeout < 0) 460 timeout = 0; 461 if (poll(pfd, i, timeout * 1000) == -1) 462 if (errno != EINTR) 463 fatal("poll error"); 464 465 /* 466 * If we previously saw fd exhaustion, we stop accept() 467 * for 1 second to throttle the accept() loop. 468 */ 469 if (pauseaccept && getmonotime() > pauseaccept + 1) 470 pauseaccept = 0; 471 472 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 473 log_warnx("SE: Lost connection to parent"); 474 session_quit = 1; 475 continue; 476 } else 477 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 478 &listener_cnt); 479 480 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 481 log_warnx("SE: Lost connection to RDE"); 482 msgbuf_clear(&ibuf_rde->w); 483 free(ibuf_rde); 484 ibuf_rde = NULL; 485 } else 486 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 487 &listener_cnt); 488 489 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 490 -1) { 491 log_warnx("SE: Lost connection to RDE control"); 492 msgbuf_clear(&ibuf_rde_ctl->w); 493 free(ibuf_rde_ctl); 494 ibuf_rde_ctl = NULL; 495 } else 496 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 497 &listener_cnt); 498 499 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 500 ctl_cnt += control_accept(csock, 0); 501 502 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 503 ctl_cnt += control_accept(rcsock, 1); 504 505 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 506 if (pfd[j].revents & POLLIN) 507 session_accept(pfd[j].fd); 508 509 for (; j < idx_peers; j++) 510 session_dispatch_msg(&pfd[j], 511 peer_l[j - idx_listeners]); 512 513 RB_FOREACH(p, peer_head, &conf->peers) 514 if (p->rbuf && p->rbuf->wpos) 515 session_process_msg(p); 516 517 for (; j < idx_mrts; j++) 518 if (pfd[j].revents & POLLOUT) 519 mrt_write(mrt_l[j - idx_peers]); 520 521 for (; j < i; j++) 522 control_dispatch_msg(&pfd[j], &ctl_cnt, &conf->peers); 523 } 524 525 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 526 RB_REMOVE(peer_head, &conf->peers, p); 527 strlcpy(p->conf.shutcomm, 528 "bgpd shutting down", 529 sizeof(p->conf.shutcomm)); 530 session_stop(p, ERR_CEASE_ADMIN_DOWN); 531 timer_remove_all(p); 532 free(p); 533 } 534 535 while ((m = LIST_FIRST(&mrthead)) != NULL) { 536 mrt_clean(m); 537 LIST_REMOVE(m, entry); 538 free(m); 539 } 540 541 free_config(conf); 542 free(peer_l); 543 free(mrt_l); 544 free(pfd); 545 546 /* close pipes */ 547 if (ibuf_rde) { 548 msgbuf_write(&ibuf_rde->w); 549 msgbuf_clear(&ibuf_rde->w); 550 close(ibuf_rde->fd); 551 free(ibuf_rde); 552 } 553 if (ibuf_rde_ctl) { 554 msgbuf_clear(&ibuf_rde_ctl->w); 555 close(ibuf_rde_ctl->fd); 556 free(ibuf_rde_ctl); 557 } 558 msgbuf_write(&ibuf_main->w); 559 msgbuf_clear(&ibuf_main->w); 560 close(ibuf_main->fd); 561 free(ibuf_main); 562 563 control_shutdown(csock); 564 control_shutdown(rcsock); 565 log_info("session engine exiting"); 566 exit(0); 567 } 568 569 void 570 init_peer(struct peer *p) 571 { 572 TAILQ_INIT(&p->timers); 573 p->fd = p->wbuf.fd = -1; 574 575 if (p->conf.if_depend[0]) 576 imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1, 577 p->conf.if_depend, sizeof(p->conf.if_depend)); 578 else 579 p->depend_ok = 1; 580 581 peer_cnt++; 582 583 change_state(p, STATE_IDLE, EVNT_NONE); 584 if (p->conf.down) 585 timer_stop(p, Timer_IdleHold); /* no autostart */ 586 else 587 timer_set(p, Timer_IdleHold, 0); /* start ASAP */ 588 589 /* 590 * on startup, demote if requested. 591 * do not handle new peers. they must reach ESTABLISHED beforehands. 592 * peers added at runtime have reconf_action set to RECONF_REINIT. 593 */ 594 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 595 session_demote(p, +1); 596 } 597 598 void 599 bgp_fsm(struct peer *peer, enum session_events event) 600 { 601 switch (peer->state) { 602 case STATE_NONE: 603 /* nothing */ 604 break; 605 case STATE_IDLE: 606 switch (event) { 607 case EVNT_START: 608 timer_stop(peer, Timer_Hold); 609 timer_stop(peer, Timer_Keepalive); 610 timer_stop(peer, Timer_IdleHold); 611 612 /* allocate read buffer */ 613 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 614 if (peer->rbuf == NULL) 615 fatal(NULL); 616 617 /* init write buffer */ 618 msgbuf_init(&peer->wbuf); 619 620 peer->stats.last_sent_errcode = 0; 621 peer->stats.last_sent_suberr = 0; 622 623 if (!peer->depend_ok) 624 timer_stop(peer, Timer_ConnectRetry); 625 else if (peer->passive || peer->conf.passive || 626 peer->conf.template) { 627 change_state(peer, STATE_ACTIVE, event); 628 timer_stop(peer, Timer_ConnectRetry); 629 } else { 630 change_state(peer, STATE_CONNECT, event); 631 timer_set(peer, Timer_ConnectRetry, 632 conf->connectretry); 633 session_connect(peer); 634 } 635 peer->passive = 0; 636 break; 637 default: 638 /* ignore */ 639 break; 640 } 641 break; 642 case STATE_CONNECT: 643 switch (event) { 644 case EVNT_START: 645 /* ignore */ 646 break; 647 case EVNT_CON_OPEN: 648 session_tcp_established(peer); 649 session_open(peer); 650 timer_stop(peer, Timer_ConnectRetry); 651 peer->holdtime = INTERVAL_HOLD_INITIAL; 652 start_timer_holdtime(peer); 653 change_state(peer, STATE_OPENSENT, event); 654 break; 655 case EVNT_CON_OPENFAIL: 656 timer_set(peer, Timer_ConnectRetry, 657 conf->connectretry); 658 session_close_connection(peer); 659 change_state(peer, STATE_ACTIVE, event); 660 break; 661 case EVNT_TIMER_CONNRETRY: 662 timer_set(peer, Timer_ConnectRetry, 663 conf->connectretry); 664 session_connect(peer); 665 break; 666 default: 667 change_state(peer, STATE_IDLE, event); 668 break; 669 } 670 break; 671 case STATE_ACTIVE: 672 switch (event) { 673 case EVNT_START: 674 /* ignore */ 675 break; 676 case EVNT_CON_OPEN: 677 session_tcp_established(peer); 678 session_open(peer); 679 timer_stop(peer, Timer_ConnectRetry); 680 peer->holdtime = INTERVAL_HOLD_INITIAL; 681 start_timer_holdtime(peer); 682 change_state(peer, STATE_OPENSENT, event); 683 break; 684 case EVNT_CON_OPENFAIL: 685 timer_set(peer, Timer_ConnectRetry, 686 conf->connectretry); 687 session_close_connection(peer); 688 change_state(peer, STATE_ACTIVE, event); 689 break; 690 case EVNT_TIMER_CONNRETRY: 691 timer_set(peer, Timer_ConnectRetry, 692 peer->holdtime); 693 change_state(peer, STATE_CONNECT, event); 694 session_connect(peer); 695 break; 696 default: 697 change_state(peer, STATE_IDLE, event); 698 break; 699 } 700 break; 701 case STATE_OPENSENT: 702 switch (event) { 703 case EVNT_START: 704 /* ignore */ 705 break; 706 case EVNT_STOP: 707 change_state(peer, STATE_IDLE, event); 708 break; 709 case EVNT_CON_CLOSED: 710 session_close_connection(peer); 711 timer_set(peer, Timer_ConnectRetry, 712 conf->connectretry); 713 change_state(peer, STATE_ACTIVE, event); 714 break; 715 case EVNT_CON_FATAL: 716 change_state(peer, STATE_IDLE, event); 717 break; 718 case EVNT_TIMER_HOLDTIME: 719 session_notification(peer, ERR_HOLDTIMEREXPIRED, 720 0, NULL, 0); 721 change_state(peer, STATE_IDLE, event); 722 break; 723 case EVNT_RCVD_OPEN: 724 /* parse_open calls change_state itself on failure */ 725 if (parse_open(peer)) 726 break; 727 session_keepalive(peer); 728 change_state(peer, STATE_OPENCONFIRM, event); 729 break; 730 case EVNT_RCVD_NOTIFICATION: 731 if (parse_notification(peer)) { 732 change_state(peer, STATE_IDLE, event); 733 /* don't punish, capa negotiation */ 734 timer_set(peer, Timer_IdleHold, 0); 735 peer->IdleHoldTime /= 2; 736 } else 737 change_state(peer, STATE_IDLE, event); 738 break; 739 default: 740 session_notification(peer, 741 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); 742 change_state(peer, STATE_IDLE, event); 743 break; 744 } 745 break; 746 case STATE_OPENCONFIRM: 747 switch (event) { 748 case EVNT_START: 749 /* ignore */ 750 break; 751 case EVNT_STOP: 752 change_state(peer, STATE_IDLE, event); 753 break; 754 case EVNT_CON_CLOSED: 755 case EVNT_CON_FATAL: 756 change_state(peer, STATE_IDLE, event); 757 break; 758 case EVNT_TIMER_HOLDTIME: 759 session_notification(peer, ERR_HOLDTIMEREXPIRED, 760 0, NULL, 0); 761 change_state(peer, STATE_IDLE, event); 762 break; 763 case EVNT_TIMER_KEEPALIVE: 764 session_keepalive(peer); 765 break; 766 case EVNT_RCVD_KEEPALIVE: 767 start_timer_holdtime(peer); 768 change_state(peer, STATE_ESTABLISHED, event); 769 break; 770 case EVNT_RCVD_NOTIFICATION: 771 parse_notification(peer); 772 change_state(peer, STATE_IDLE, event); 773 break; 774 default: 775 session_notification(peer, 776 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); 777 change_state(peer, STATE_IDLE, event); 778 break; 779 } 780 break; 781 case STATE_ESTABLISHED: 782 switch (event) { 783 case EVNT_START: 784 /* ignore */ 785 break; 786 case EVNT_STOP: 787 change_state(peer, STATE_IDLE, event); 788 break; 789 case EVNT_CON_CLOSED: 790 case EVNT_CON_FATAL: 791 change_state(peer, STATE_IDLE, event); 792 break; 793 case EVNT_TIMER_HOLDTIME: 794 session_notification(peer, ERR_HOLDTIMEREXPIRED, 795 0, NULL, 0); 796 change_state(peer, STATE_IDLE, event); 797 break; 798 case EVNT_TIMER_KEEPALIVE: 799 session_keepalive(peer); 800 break; 801 case EVNT_RCVD_KEEPALIVE: 802 start_timer_holdtime(peer); 803 break; 804 case EVNT_RCVD_UPDATE: 805 start_timer_holdtime(peer); 806 if (parse_update(peer)) 807 change_state(peer, STATE_IDLE, event); 808 else 809 start_timer_holdtime(peer); 810 break; 811 case EVNT_RCVD_NOTIFICATION: 812 parse_notification(peer); 813 change_state(peer, STATE_IDLE, event); 814 break; 815 default: 816 session_notification(peer, 817 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); 818 change_state(peer, STATE_IDLE, event); 819 break; 820 } 821 break; 822 } 823 } 824 825 void 826 start_timer_holdtime(struct peer *peer) 827 { 828 if (peer->holdtime > 0) 829 timer_set(peer, Timer_Hold, peer->holdtime); 830 else 831 timer_stop(peer, Timer_Hold); 832 } 833 834 void 835 start_timer_keepalive(struct peer *peer) 836 { 837 if (peer->holdtime > 0) 838 timer_set(peer, Timer_Keepalive, peer->holdtime / 3); 839 else 840 timer_stop(peer, Timer_Keepalive); 841 } 842 843 void 844 session_close_connection(struct peer *peer) 845 { 846 if (peer->fd != -1) { 847 close(peer->fd); 848 pauseaccept = 0; 849 } 850 peer->fd = peer->wbuf.fd = -1; 851 } 852 853 void 854 change_state(struct peer *peer, enum session_state state, 855 enum session_events event) 856 { 857 struct mrt *mrt; 858 859 switch (state) { 860 case STATE_IDLE: 861 /* carp demotion first. new peers handled in init_peer */ 862 if (peer->state == STATE_ESTABLISHED && 863 peer->conf.demote_group[0] && !peer->demoted) 864 session_demote(peer, +1); 865 866 /* 867 * try to write out what's buffered (maybe a notification), 868 * don't bother if it fails 869 */ 870 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 871 msgbuf_write(&peer->wbuf); 872 873 /* 874 * we must start the timer for the next EVNT_START 875 * if we are coming here due to an error and the 876 * session was not established successfully before, the 877 * starttimerinterval needs to be exponentially increased 878 */ 879 if (peer->IdleHoldTime == 0) 880 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 881 peer->holdtime = INTERVAL_HOLD_INITIAL; 882 timer_stop(peer, Timer_ConnectRetry); 883 timer_stop(peer, Timer_Keepalive); 884 timer_stop(peer, Timer_Hold); 885 timer_stop(peer, Timer_IdleHold); 886 timer_stop(peer, Timer_IdleHoldReset); 887 session_close_connection(peer); 888 msgbuf_clear(&peer->wbuf); 889 free(peer->rbuf); 890 peer->rbuf = NULL; 891 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 892 if (!peer->template) 893 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 894 peer->conf.id, 0, -1, NULL, 0); 895 896 if (event != EVNT_STOP) { 897 timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); 898 if (event != EVNT_NONE && 899 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 900 peer->IdleHoldTime *= 2; 901 } 902 if (peer->state == STATE_ESTABLISHED) { 903 if (peer->capa.neg.grestart.restart == 2 && 904 (event == EVNT_CON_CLOSED || 905 event == EVNT_CON_FATAL)) { 906 /* don't punish graceful restart */ 907 timer_set(peer, Timer_IdleHold, 0); 908 peer->IdleHoldTime /= 2; 909 session_graceful_restart(peer); 910 } else 911 session_down(peer); 912 } 913 if (peer->state == STATE_NONE || 914 peer->state == STATE_ESTABLISHED) { 915 /* initialize capability negotiation structures */ 916 memcpy(&peer->capa.ann, &peer->conf.capabilities, 917 sizeof(peer->capa.ann)); 918 if (!peer->conf.announce_capa) 919 session_capa_ann_none(peer); 920 } 921 break; 922 case STATE_CONNECT: 923 if (peer->state == STATE_ESTABLISHED && 924 peer->capa.neg.grestart.restart == 2) { 925 /* do the graceful restart dance */ 926 session_graceful_restart(peer); 927 peer->holdtime = INTERVAL_HOLD_INITIAL; 928 timer_stop(peer, Timer_ConnectRetry); 929 timer_stop(peer, Timer_Keepalive); 930 timer_stop(peer, Timer_Hold); 931 timer_stop(peer, Timer_IdleHold); 932 timer_stop(peer, Timer_IdleHoldReset); 933 session_close_connection(peer); 934 msgbuf_clear(&peer->wbuf); 935 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 936 } 937 break; 938 case STATE_ACTIVE: 939 if (!peer->template) 940 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 941 peer->conf.id, 0, -1, NULL, 0); 942 break; 943 case STATE_OPENSENT: 944 break; 945 case STATE_OPENCONFIRM: 946 break; 947 case STATE_ESTABLISHED: 948 timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime); 949 if (peer->demoted) 950 timer_set(peer, Timer_CarpUndemote, 951 INTERVAL_HOLD_DEMOTED); 952 session_up(peer); 953 break; 954 default: /* something seriously fucked */ 955 break; 956 } 957 958 log_statechange(peer, state, event); 959 LIST_FOREACH(mrt, &mrthead, entry) { 960 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 961 continue; 962 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 963 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 964 mrt->group_id == peer->conf.groupid)) 965 mrt_dump_state(mrt, peer->state, state, peer); 966 } 967 peer->prev_state = peer->state; 968 peer->state = state; 969 } 970 971 void 972 session_accept(int listenfd) 973 { 974 int connfd; 975 socklen_t len; 976 struct sockaddr_storage cliaddr; 977 struct peer *p = NULL; 978 979 len = sizeof(cliaddr); 980 if ((connfd = accept4(listenfd, 981 (struct sockaddr *)&cliaddr, &len, 982 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 983 if (errno == ENFILE || errno == EMFILE) 984 pauseaccept = getmonotime(); 985 else if (errno != EWOULDBLOCK && errno != EINTR && 986 errno != ECONNABORTED) 987 log_warn("accept"); 988 return; 989 } 990 991 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr); 992 993 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 994 if (timer_running(p, Timer_IdleHold, NULL)) { 995 /* fast reconnect after clear */ 996 p->passive = 1; 997 bgp_fsm(p, EVNT_START); 998 } 999 } 1000 1001 if (p != NULL && 1002 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1003 if (p->fd != -1) { 1004 if (p->state == STATE_CONNECT) 1005 session_close_connection(p); 1006 else { 1007 close(connfd); 1008 return; 1009 } 1010 } 1011 1012 open: 1013 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1014 log_peer_warnx(&p->conf, 1015 "ipsec or md5sig configured but not available"); 1016 close(connfd); 1017 return; 1018 } 1019 1020 if (tcp_md5_check(connfd, p) == -1) { 1021 close(connfd); 1022 return; 1023 } 1024 p->fd = p->wbuf.fd = connfd; 1025 if (session_setup_socket(p)) { 1026 close(connfd); 1027 return; 1028 } 1029 bgp_fsm(p, EVNT_CON_OPEN); 1030 return; 1031 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1032 p->capa.neg.grestart.restart == 2) { 1033 /* first do the graceful restart dance */ 1034 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1035 /* then do part of the open dance */ 1036 goto open; 1037 } else { 1038 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len); 1039 close(connfd); 1040 } 1041 } 1042 1043 int 1044 session_connect(struct peer *peer) 1045 { 1046 struct sockaddr *sa; 1047 socklen_t sa_len; 1048 1049 /* 1050 * we do not need the overcomplicated collision detection RFC 1771 1051 * describes; we simply make sure there is only ever one concurrent 1052 * tcp connection per peer. 1053 */ 1054 if (peer->fd != -1) 1055 return (-1); 1056 1057 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1058 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1059 log_peer_warn(&peer->conf, "session_connect socket"); 1060 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1061 return (-1); 1062 } 1063 1064 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1065 log_peer_warnx(&peer->conf, 1066 "ipsec or md5sig configured but not available"); 1067 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1068 return (-1); 1069 } 1070 1071 tcp_md5_set(peer->fd, peer); 1072 peer->wbuf.fd = peer->fd; 1073 1074 /* if update source is set we need to bind() */ 1075 if ((sa = addr2sa(&peer->conf.local_addr, 0, &sa_len)) != NULL) { 1076 if (bind(peer->fd, sa, sa_len) == -1) { 1077 log_peer_warn(&peer->conf, "session_connect bind"); 1078 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1079 return (-1); 1080 } 1081 } 1082 1083 if (session_setup_socket(peer)) { 1084 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1085 return (-1); 1086 } 1087 1088 sa = addr2sa(&peer->conf.remote_addr, BGP_PORT, &sa_len); 1089 if (connect(peer->fd, sa, sa_len) == -1) { 1090 if (errno != EINPROGRESS) { 1091 if (errno != peer->lasterr) 1092 log_peer_warn(&peer->conf, "connect"); 1093 peer->lasterr = errno; 1094 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1095 return (-1); 1096 } 1097 } else 1098 bgp_fsm(peer, EVNT_CON_OPEN); 1099 1100 return (0); 1101 } 1102 1103 int 1104 session_setup_socket(struct peer *p) 1105 { 1106 int ttl = p->conf.distance; 1107 int pre = IPTOS_PREC_INTERNETCONTROL; 1108 int nodelay = 1; 1109 int bsize; 1110 1111 switch (p->conf.remote_addr.aid) { 1112 case AID_INET: 1113 /* set precedence, see RFC 1771 appendix 5 */ 1114 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1115 -1) { 1116 log_peer_warn(&p->conf, 1117 "session_setup_socket setsockopt TOS"); 1118 return (-1); 1119 } 1120 1121 if (p->conf.ebgp) { 1122 /* 1123 * set TTL to foreign router's distance 1124 * 1=direct n=multihop with ttlsec, we always use 255 1125 */ 1126 if (p->conf.ttlsec) { 1127 ttl = 256 - p->conf.distance; 1128 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1129 &ttl, sizeof(ttl)) == -1) { 1130 log_peer_warn(&p->conf, 1131 "session_setup_socket: " 1132 "setsockopt MINTTL"); 1133 return (-1); 1134 } 1135 ttl = 255; 1136 } 1137 1138 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1139 sizeof(ttl)) == -1) { 1140 log_peer_warn(&p->conf, 1141 "session_setup_socket setsockopt TTL"); 1142 return (-1); 1143 } 1144 } 1145 break; 1146 case AID_INET6: 1147 if (p->conf.ebgp) { 1148 /* 1149 * set hoplimit to foreign router's distance 1150 * 1=direct n=multihop with ttlsec, we always use 255 1151 */ 1152 if (p->conf.ttlsec) { 1153 ttl = 256 - p->conf.distance; 1154 if (setsockopt(p->fd, IPPROTO_IPV6, 1155 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1156 == -1) { 1157 log_peer_warn(&p->conf, 1158 "session_setup_socket: " 1159 "setsockopt MINHOPCOUNT"); 1160 return (-1); 1161 } 1162 ttl = 255; 1163 } 1164 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1165 &ttl, sizeof(ttl)) == -1) { 1166 log_peer_warn(&p->conf, 1167 "session_setup_socket setsockopt hoplimit"); 1168 return (-1); 1169 } 1170 } 1171 break; 1172 } 1173 1174 /* set TCP_NODELAY */ 1175 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1176 sizeof(nodelay)) == -1) { 1177 log_peer_warn(&p->conf, 1178 "session_setup_socket setsockopt TCP_NODELAY"); 1179 return (-1); 1180 } 1181 1182 /* only increase bufsize (and thus window) if md5 or ipsec is in use */ 1183 if (p->conf.auth.method != AUTH_NONE) { 1184 /* try to increase bufsize. no biggie if it fails */ 1185 bsize = 65535; 1186 while (bsize > 8192 && 1187 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, 1188 sizeof(bsize)) == -1 && errno != EINVAL) 1189 bsize /= 2; 1190 bsize = 65535; 1191 while (bsize > 8192 && 1192 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, 1193 sizeof(bsize)) == -1 && errno != EINVAL) 1194 bsize /= 2; 1195 } 1196 1197 return (0); 1198 } 1199 1200 void 1201 session_tcp_established(struct peer *peer) 1202 { 1203 struct sockaddr_storage ss; 1204 socklen_t len; 1205 1206 len = sizeof(ss); 1207 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1208 log_warn("getsockname"); 1209 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); 1210 len = sizeof(ss); 1211 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1212 log_warn("getpeername"); 1213 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); 1214 } 1215 1216 void 1217 session_capa_ann_none(struct peer *peer) 1218 { 1219 bzero(&peer->capa.ann, sizeof(peer->capa.ann)); 1220 } 1221 1222 int 1223 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len) 1224 { 1225 int errs = 0; 1226 1227 errs += ibuf_add(opb, &capa_code, sizeof(capa_code)); 1228 errs += ibuf_add(opb, &capa_len, sizeof(capa_len)); 1229 return (errs); 1230 } 1231 1232 int 1233 session_capa_add_mp(struct ibuf *buf, u_int8_t aid) 1234 { 1235 u_int8_t safi, pad = 0; 1236 u_int16_t afi; 1237 int errs = 0; 1238 1239 if (aid2afi(aid, &afi, &safi) == -1) 1240 fatalx("session_capa_add_mp: bad afi/safi pair"); 1241 afi = htons(afi); 1242 errs += ibuf_add(buf, &afi, sizeof(afi)); 1243 errs += ibuf_add(buf, &pad, sizeof(pad)); 1244 errs += ibuf_add(buf, &safi, sizeof(safi)); 1245 1246 return (errs); 1247 } 1248 1249 int 1250 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) 1251 { 1252 u_int errs = 0; 1253 u_int16_t afi; 1254 u_int8_t flags, safi; 1255 1256 if (aid2afi(aid, &afi, &safi)) { 1257 log_warn("session_capa_add_gr: bad AID"); 1258 return (1); 1259 } 1260 if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) 1261 flags = CAPA_GR_F_FLAG; 1262 else 1263 flags = 0; 1264 1265 afi = htons(afi); 1266 errs += ibuf_add(b, &afi, sizeof(afi)); 1267 errs += ibuf_add(b, &safi, sizeof(safi)); 1268 errs += ibuf_add(b, &flags, sizeof(flags)); 1269 1270 return (errs); 1271 } 1272 1273 struct bgp_msg * 1274 session_newmsg(enum msg_type msgtype, u_int16_t len) 1275 { 1276 struct bgp_msg *msg; 1277 struct msg_header hdr; 1278 struct ibuf *buf; 1279 int errs = 0; 1280 1281 memset(&hdr.marker, 0xff, sizeof(hdr.marker)); 1282 hdr.len = htons(len); 1283 hdr.type = msgtype; 1284 1285 if ((buf = ibuf_open(len)) == NULL) 1286 return (NULL); 1287 1288 errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker)); 1289 errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len)); 1290 errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type)); 1291 1292 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1293 ibuf_free(buf); 1294 return (NULL); 1295 } 1296 1297 msg->buf = buf; 1298 msg->type = msgtype; 1299 msg->len = len; 1300 1301 return (msg); 1302 } 1303 1304 int 1305 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1306 { 1307 struct mrt *mrt; 1308 1309 LIST_FOREACH(mrt, &mrthead, entry) { 1310 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1311 mrt->type == MRT_UPDATE_OUT))) 1312 continue; 1313 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1314 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1315 mrt->group_id == p->conf.groupid)) 1316 mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p); 1317 } 1318 1319 ibuf_close(&p->wbuf, msg->buf); 1320 if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) { 1321 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) 1322 log_peer_warn(&p->conf, "imsg_compose XOFF"); 1323 else 1324 p->throttled = 1; 1325 } 1326 1327 free(msg); 1328 return (0); 1329 } 1330 1331 void 1332 session_open(struct peer *p) 1333 { 1334 struct bgp_msg *buf; 1335 struct ibuf *opb; 1336 struct msg_open msg; 1337 u_int16_t len; 1338 u_int8_t i, op_type, optparamlen = 0; 1339 int errs = 0; 1340 int mpcapa = 0; 1341 1342 1343 if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - 1344 sizeof(optparamlen))) == NULL) { 1345 bgp_fsm(p, EVNT_CON_FATAL); 1346 return; 1347 } 1348 1349 /* multiprotocol extensions, RFC 4760 */ 1350 for (i = 0; i < AID_MAX; i++) 1351 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1352 errs += session_capa_add(opb, CAPA_MP, 4); 1353 errs += session_capa_add_mp(opb, i); 1354 mpcapa++; 1355 } 1356 1357 /* route refresh, RFC 2918 */ 1358 if (p->capa.ann.refresh) /* no data */ 1359 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1360 1361 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1362 if (p->capa.ann.grestart.restart) { 1363 int rst = 0; 1364 u_int16_t hdr; 1365 u_int8_t grlen; 1366 1367 if (mpcapa) { 1368 grlen = 2 + 4 * mpcapa; 1369 for (i = 0; i < AID_MAX; i++) { 1370 if (p->capa.neg.grestart.flags[i] & 1371 CAPA_GR_RESTARTING) 1372 rst++; 1373 } 1374 } else { /* AID_INET */ 1375 grlen = 2 + 4; 1376 if (p->capa.neg.grestart.flags[AID_INET] & 1377 CAPA_GR_RESTARTING) 1378 rst++; 1379 } 1380 1381 hdr = conf->holdtime; /* default timeout */ 1382 /* if client does graceful restart don't set R flag */ 1383 if (!rst) 1384 hdr |= CAPA_GR_R_FLAG; 1385 hdr = htons(hdr); 1386 1387 errs += session_capa_add(opb, CAPA_RESTART, grlen); 1388 errs += ibuf_add(opb, &hdr, sizeof(hdr)); 1389 1390 if (mpcapa) { 1391 for (i = 0; i < AID_MAX; i++) { 1392 if (p->capa.ann.mp[i]) { 1393 errs += session_capa_add_gr(p, opb, i); 1394 } 1395 } 1396 } else { /* AID_INET */ 1397 errs += session_capa_add_gr(p, opb, AID_INET); 1398 } 1399 } 1400 1401 /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ 1402 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1403 u_int32_t nas; 1404 1405 nas = htonl(p->conf.local_as); 1406 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); 1407 errs += ibuf_add(opb, &nas, sizeof(nas)); 1408 } 1409 1410 if (ibuf_size(opb)) 1411 optparamlen = ibuf_size(opb) + sizeof(op_type) + 1412 sizeof(optparamlen); 1413 1414 len = MSGSIZE_OPEN_MIN + optparamlen; 1415 if (errs || (buf = session_newmsg(OPEN, len)) == NULL) { 1416 ibuf_free(opb); 1417 bgp_fsm(p, EVNT_CON_FATAL); 1418 return; 1419 } 1420 1421 msg.version = 4; 1422 msg.myas = htons(p->conf.local_short_as); 1423 if (p->conf.holdtime) 1424 msg.holdtime = htons(p->conf.holdtime); 1425 else 1426 msg.holdtime = htons(conf->holdtime); 1427 msg.bgpid = conf->bgpid; /* is already in network byte order */ 1428 msg.optparamlen = optparamlen; 1429 1430 errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version)); 1431 errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas)); 1432 errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); 1433 errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); 1434 errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen)); 1435 1436 if (optparamlen) { 1437 op_type = OPT_PARAM_CAPABILITIES; 1438 optparamlen = ibuf_size(opb); 1439 errs += ibuf_add(buf->buf, &op_type, sizeof(op_type)); 1440 errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen)); 1441 errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); 1442 } 1443 1444 ibuf_free(opb); 1445 1446 if (errs) { 1447 ibuf_free(buf->buf); 1448 free(buf); 1449 bgp_fsm(p, EVNT_CON_FATAL); 1450 return; 1451 } 1452 1453 if (session_sendmsg(buf, p) == -1) { 1454 bgp_fsm(p, EVNT_CON_FATAL); 1455 return; 1456 } 1457 1458 p->stats.msg_sent_open++; 1459 } 1460 1461 void 1462 session_keepalive(struct peer *p) 1463 { 1464 struct bgp_msg *buf; 1465 1466 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1467 session_sendmsg(buf, p) == -1) { 1468 bgp_fsm(p, EVNT_CON_FATAL); 1469 return; 1470 } 1471 1472 start_timer_keepalive(p); 1473 p->stats.msg_sent_keepalive++; 1474 } 1475 1476 void 1477 session_update(u_int32_t peerid, void *data, size_t datalen) 1478 { 1479 struct peer *p; 1480 struct bgp_msg *buf; 1481 1482 if ((p = getpeerbyid(conf, peerid)) == NULL) { 1483 log_warnx("no such peer: id=%u", peerid); 1484 return; 1485 } 1486 1487 if (p->state != STATE_ESTABLISHED) 1488 return; 1489 1490 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) { 1491 bgp_fsm(p, EVNT_CON_FATAL); 1492 return; 1493 } 1494 1495 if (ibuf_add(buf->buf, data, datalen)) { 1496 ibuf_free(buf->buf); 1497 free(buf); 1498 bgp_fsm(p, EVNT_CON_FATAL); 1499 return; 1500 } 1501 1502 if (session_sendmsg(buf, p) == -1) { 1503 bgp_fsm(p, EVNT_CON_FATAL); 1504 return; 1505 } 1506 1507 start_timer_keepalive(p); 1508 p->stats.msg_sent_update++; 1509 } 1510 1511 void 1512 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode, 1513 void *data, ssize_t datalen) 1514 { 1515 struct bgp_msg *buf; 1516 int errs = 0; 1517 1518 if (p->stats.last_sent_errcode) /* some notification already sent */ 1519 return; 1520 1521 log_notification(p, errcode, subcode, data, datalen, "sending"); 1522 1523 if ((buf = session_newmsg(NOTIFICATION, 1524 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1525 bgp_fsm(p, EVNT_CON_FATAL); 1526 return; 1527 } 1528 1529 errs += ibuf_add(buf->buf, &errcode, sizeof(errcode)); 1530 errs += ibuf_add(buf->buf, &subcode, sizeof(subcode)); 1531 1532 if (datalen > 0) 1533 errs += ibuf_add(buf->buf, data, datalen); 1534 1535 if (errs) { 1536 ibuf_free(buf->buf); 1537 free(buf); 1538 bgp_fsm(p, EVNT_CON_FATAL); 1539 return; 1540 } 1541 1542 if (session_sendmsg(buf, p) == -1) { 1543 bgp_fsm(p, EVNT_CON_FATAL); 1544 return; 1545 } 1546 1547 p->stats.msg_sent_notification++; 1548 p->stats.last_sent_errcode = errcode; 1549 p->stats.last_sent_suberr = subcode; 1550 } 1551 1552 int 1553 session_neighbor_rrefresh(struct peer *p) 1554 { 1555 u_int8_t i; 1556 1557 if (!p->capa.peer.refresh) 1558 return (-1); 1559 1560 for (i = 0; i < AID_MAX; i++) { 1561 if (p->capa.peer.mp[i] != 0) 1562 session_rrefresh(p, i); 1563 } 1564 1565 return (0); 1566 } 1567 1568 void 1569 session_rrefresh(struct peer *p, u_int8_t aid) 1570 { 1571 struct bgp_msg *buf; 1572 int errs = 0; 1573 u_int16_t afi; 1574 u_int8_t safi, null8 = 0; 1575 1576 if (aid2afi(aid, &afi, &safi) == -1) 1577 fatalx("session_rrefresh: bad afi/safi pair"); 1578 1579 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1580 bgp_fsm(p, EVNT_CON_FATAL); 1581 return; 1582 } 1583 1584 afi = htons(afi); 1585 errs += ibuf_add(buf->buf, &afi, sizeof(afi)); 1586 errs += ibuf_add(buf->buf, &null8, sizeof(null8)); 1587 errs += ibuf_add(buf->buf, &safi, sizeof(safi)); 1588 1589 if (errs) { 1590 ibuf_free(buf->buf); 1591 free(buf); 1592 bgp_fsm(p, EVNT_CON_FATAL); 1593 return; 1594 } 1595 1596 if (session_sendmsg(buf, p) == -1) { 1597 bgp_fsm(p, EVNT_CON_FATAL); 1598 return; 1599 } 1600 1601 p->stats.msg_sent_rrefresh++; 1602 } 1603 1604 int 1605 session_graceful_restart(struct peer *p) 1606 { 1607 u_int8_t i; 1608 1609 timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); 1610 1611 for (i = 0; i < AID_MAX; i++) { 1612 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1613 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, 1614 &i, sizeof(i)) == -1) 1615 return (-1); 1616 log_peer_warnx(&p->conf, 1617 "graceful restart of %s, keeping routes", 1618 aid2str(i)); 1619 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1620 } else if (p->capa.neg.mp[i]) { 1621 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1622 &i, sizeof(i)) == -1) 1623 return (-1); 1624 log_peer_warnx(&p->conf, 1625 "graceful restart of %s, flushing routes", 1626 aid2str(i)); 1627 } 1628 } 1629 return (0); 1630 } 1631 1632 int 1633 session_graceful_stop(struct peer *p) 1634 { 1635 u_int8_t i; 1636 1637 for (i = 0; i < AID_MAX; i++) { 1638 /* 1639 * Only flush if the peer is restarting and the timeout fired. 1640 * In all other cases the session was already flushed when the 1641 * session went down or when the new open message was parsed. 1642 */ 1643 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1644 log_peer_warnx(&p->conf, "graceful restart of %s, " 1645 "time-out, flushing", aid2str(i)); 1646 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1647 &i, sizeof(i)) == -1) 1648 return (-1); 1649 } 1650 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1651 } 1652 return (0); 1653 } 1654 1655 int 1656 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1657 { 1658 ssize_t n; 1659 socklen_t len; 1660 int error; 1661 1662 if (p->state == STATE_CONNECT) { 1663 if (pfd->revents & POLLOUT) { 1664 if (pfd->revents & POLLIN) { 1665 /* error occurred */ 1666 len = sizeof(error); 1667 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1668 &error, &len) == -1 || error) { 1669 if (error) 1670 errno = error; 1671 if (errno != p->lasterr) { 1672 log_peer_warn(&p->conf, 1673 "socket error"); 1674 p->lasterr = errno; 1675 } 1676 bgp_fsm(p, EVNT_CON_OPENFAIL); 1677 return (1); 1678 } 1679 } 1680 bgp_fsm(p, EVNT_CON_OPEN); 1681 return (1); 1682 } 1683 if (pfd->revents & POLLHUP) { 1684 bgp_fsm(p, EVNT_CON_OPENFAIL); 1685 return (1); 1686 } 1687 if (pfd->revents & (POLLERR|POLLNVAL)) { 1688 bgp_fsm(p, EVNT_CON_FATAL); 1689 return (1); 1690 } 1691 return (0); 1692 } 1693 1694 if (pfd->revents & POLLHUP) { 1695 bgp_fsm(p, EVNT_CON_CLOSED); 1696 return (1); 1697 } 1698 if (pfd->revents & (POLLERR|POLLNVAL)) { 1699 bgp_fsm(p, EVNT_CON_FATAL); 1700 return (1); 1701 } 1702 1703 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1704 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1705 if (error == 0) 1706 log_peer_warnx(&p->conf, "Connection closed"); 1707 else if (error == -1) 1708 log_peer_warn(&p->conf, "write error"); 1709 bgp_fsm(p, EVNT_CON_FATAL); 1710 return (1); 1711 } 1712 if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) { 1713 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) 1714 log_peer_warn(&p->conf, "imsg_compose XON"); 1715 else 1716 p->throttled = 0; 1717 } 1718 if (!(pfd->revents & POLLIN)) 1719 return (1); 1720 } 1721 1722 if (p->rbuf && pfd->revents & POLLIN) { 1723 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1724 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1725 if (errno != EINTR && errno != EAGAIN) { 1726 log_peer_warn(&p->conf, "read error"); 1727 bgp_fsm(p, EVNT_CON_FATAL); 1728 } 1729 return (1); 1730 } 1731 if (n == 0) { /* connection closed */ 1732 bgp_fsm(p, EVNT_CON_CLOSED); 1733 return (1); 1734 } 1735 1736 p->rbuf->wpos += n; 1737 p->stats.last_read = time(NULL); 1738 return (1); 1739 } 1740 return (0); 1741 } 1742 1743 void 1744 session_process_msg(struct peer *p) 1745 { 1746 struct mrt *mrt; 1747 ssize_t rpos, av, left; 1748 int processed = 0; 1749 u_int16_t msglen; 1750 u_int8_t msgtype; 1751 1752 rpos = 0; 1753 av = p->rbuf->wpos; 1754 p->rpending = 0; 1755 1756 /* 1757 * session might drop to IDLE -> buffers deallocated 1758 * we MUST check rbuf != NULL before use 1759 */ 1760 for (;;) { 1761 if (p->rbuf == NULL) 1762 return; 1763 if (rpos + MSGSIZE_HEADER > av) 1764 break; 1765 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 1766 &msgtype) == -1) 1767 return; 1768 if (rpos + msglen > av) 1769 break; 1770 p->rbuf->rptr = p->rbuf->buf + rpos; 1771 1772 /* dump to MRT as soon as we have a full packet */ 1773 LIST_FOREACH(mrt, &mrthead, entry) { 1774 if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE && 1775 mrt->type == MRT_UPDATE_IN))) 1776 continue; 1777 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1778 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1779 mrt->group_id == p->conf.groupid)) 1780 mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p); 1781 } 1782 1783 switch (msgtype) { 1784 case OPEN: 1785 bgp_fsm(p, EVNT_RCVD_OPEN); 1786 p->stats.msg_rcvd_open++; 1787 break; 1788 case UPDATE: 1789 bgp_fsm(p, EVNT_RCVD_UPDATE); 1790 p->stats.msg_rcvd_update++; 1791 break; 1792 case NOTIFICATION: 1793 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 1794 p->stats.msg_rcvd_notification++; 1795 break; 1796 case KEEPALIVE: 1797 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 1798 p->stats.msg_rcvd_keepalive++; 1799 break; 1800 case RREFRESH: 1801 parse_refresh(p); 1802 p->stats.msg_rcvd_rrefresh++; 1803 break; 1804 default: /* cannot happen */ 1805 session_notification(p, ERR_HEADER, ERR_HDR_TYPE, 1806 &msgtype, 1); 1807 log_warnx("received message with unknown type %u", 1808 msgtype); 1809 bgp_fsm(p, EVNT_CON_FATAL); 1810 } 1811 rpos += msglen; 1812 if (++processed > MSG_PROCESS_LIMIT) { 1813 p->rpending = 1; 1814 break; 1815 } 1816 } 1817 1818 if (rpos < av) { 1819 left = av - rpos; 1820 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 1821 p->rbuf->wpos = left; 1822 } else 1823 p->rbuf->wpos = 0; 1824 } 1825 1826 int 1827 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type) 1828 { 1829 u_char *p; 1830 u_int16_t olen; 1831 static const u_int8_t marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff, 1832 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 1833 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 1834 1835 /* caller MUST make sure we are getting 19 bytes! */ 1836 p = data; 1837 if (memcmp(p, marker, sizeof(marker))) { 1838 log_peer_warnx(&peer->conf, "sync error"); 1839 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0); 1840 bgp_fsm(peer, EVNT_CON_FATAL); 1841 return (-1); 1842 } 1843 p += MSGSIZE_HEADER_MARKER; 1844 1845 memcpy(&olen, p, 2); 1846 *len = ntohs(olen); 1847 p += 2; 1848 memcpy(type, p, 1); 1849 1850 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 1851 log_peer_warnx(&peer->conf, 1852 "received message: illegal length: %u byte", *len); 1853 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1854 &olen, sizeof(olen)); 1855 bgp_fsm(peer, EVNT_CON_FATAL); 1856 return (-1); 1857 } 1858 1859 switch (*type) { 1860 case OPEN: 1861 if (*len < MSGSIZE_OPEN_MIN) { 1862 log_peer_warnx(&peer->conf, 1863 "received OPEN: illegal len: %u byte", *len); 1864 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1865 &olen, sizeof(olen)); 1866 bgp_fsm(peer, EVNT_CON_FATAL); 1867 return (-1); 1868 } 1869 break; 1870 case NOTIFICATION: 1871 if (*len < MSGSIZE_NOTIFICATION_MIN) { 1872 log_peer_warnx(&peer->conf, 1873 "received NOTIFICATION: illegal len: %u byte", 1874 *len); 1875 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1876 &olen, sizeof(olen)); 1877 bgp_fsm(peer, EVNT_CON_FATAL); 1878 return (-1); 1879 } 1880 break; 1881 case UPDATE: 1882 if (*len < MSGSIZE_UPDATE_MIN) { 1883 log_peer_warnx(&peer->conf, 1884 "received UPDATE: illegal len: %u byte", *len); 1885 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1886 &olen, sizeof(olen)); 1887 bgp_fsm(peer, EVNT_CON_FATAL); 1888 return (-1); 1889 } 1890 break; 1891 case KEEPALIVE: 1892 if (*len != MSGSIZE_KEEPALIVE) { 1893 log_peer_warnx(&peer->conf, 1894 "received KEEPALIVE: illegal len: %u byte", *len); 1895 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1896 &olen, sizeof(olen)); 1897 bgp_fsm(peer, EVNT_CON_FATAL); 1898 return (-1); 1899 } 1900 break; 1901 case RREFRESH: 1902 if (*len != MSGSIZE_RREFRESH) { 1903 log_peer_warnx(&peer->conf, 1904 "received RREFRESH: illegal len: %u byte", *len); 1905 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1906 &olen, sizeof(olen)); 1907 bgp_fsm(peer, EVNT_CON_FATAL); 1908 return (-1); 1909 } 1910 break; 1911 default: 1912 log_peer_warnx(&peer->conf, 1913 "received msg with unknown type %u", *type); 1914 session_notification(peer, ERR_HEADER, ERR_HDR_TYPE, 1915 type, 1); 1916 bgp_fsm(peer, EVNT_CON_FATAL); 1917 return (-1); 1918 } 1919 return (0); 1920 } 1921 1922 int 1923 parse_open(struct peer *peer) 1924 { 1925 u_char *p, *op_val; 1926 u_int8_t version, rversion; 1927 u_int16_t short_as, msglen; 1928 u_int16_t holdtime, oholdtime, myholdtime; 1929 u_int32_t as, bgpid; 1930 u_int8_t optparamlen, plen; 1931 u_int8_t op_type, op_len; 1932 1933 p = peer->rbuf->rptr; 1934 p += MSGSIZE_HEADER_MARKER; 1935 memcpy(&msglen, p, sizeof(msglen)); 1936 msglen = ntohs(msglen); 1937 1938 p = peer->rbuf->rptr; 1939 p += MSGSIZE_HEADER; /* header is already checked */ 1940 1941 memcpy(&version, p, sizeof(version)); 1942 p += sizeof(version); 1943 1944 if (version != BGP_VERSION) { 1945 log_peer_warnx(&peer->conf, 1946 "peer wants unrecognized version %u", version); 1947 if (version > BGP_VERSION) 1948 rversion = version - BGP_VERSION; 1949 else 1950 rversion = BGP_VERSION; 1951 session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION, 1952 &rversion, sizeof(rversion)); 1953 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 1954 return (-1); 1955 } 1956 1957 memcpy(&short_as, p, sizeof(short_as)); 1958 p += sizeof(short_as); 1959 as = peer->short_as = ntohs(short_as); 1960 if (as == 0) { 1961 log_peer_warnx(&peer->conf, 1962 "peer requests unacceptable AS %u", as); 1963 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, 1964 NULL, 0); 1965 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 1966 return (-1); 1967 } 1968 1969 memcpy(&oholdtime, p, sizeof(oholdtime)); 1970 p += sizeof(oholdtime); 1971 1972 holdtime = ntohs(oholdtime); 1973 if (holdtime && holdtime < peer->conf.min_holdtime) { 1974 log_peer_warnx(&peer->conf, 1975 "peer requests unacceptable holdtime %u", holdtime); 1976 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, 1977 NULL, 0); 1978 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 1979 return (-1); 1980 } 1981 1982 myholdtime = peer->conf.holdtime; 1983 if (!myholdtime) 1984 myholdtime = conf->holdtime; 1985 if (holdtime < myholdtime) 1986 peer->holdtime = holdtime; 1987 else 1988 peer->holdtime = myholdtime; 1989 1990 memcpy(&bgpid, p, sizeof(bgpid)); 1991 p += sizeof(bgpid); 1992 1993 /* check bgpid for validity - just disallow 0 */ 1994 if (ntohl(bgpid) == 0) { 1995 log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable", 1996 ntohl(bgpid)); 1997 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 1998 NULL, 0); 1999 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2000 return (-1); 2001 } 2002 peer->remote_bgpid = bgpid; 2003 2004 memcpy(&optparamlen, p, sizeof(optparamlen)); 2005 p += sizeof(optparamlen); 2006 2007 if (optparamlen != msglen - MSGSIZE_OPEN_MIN) { 2008 log_peer_warnx(&peer->conf, 2009 "corrupt OPEN message received: length mismatch"); 2010 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2011 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2012 return (-1); 2013 } 2014 2015 plen = optparamlen; 2016 while (plen > 0) { 2017 if (plen < 2) { 2018 log_peer_warnx(&peer->conf, 2019 "corrupt OPEN message received, len wrong"); 2020 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2021 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2022 return (-1); 2023 } 2024 memcpy(&op_type, p, sizeof(op_type)); 2025 p += sizeof(op_type); 2026 plen -= sizeof(op_type); 2027 memcpy(&op_len, p, sizeof(op_len)); 2028 p += sizeof(op_len); 2029 plen -= sizeof(op_len); 2030 if (op_len > 0) { 2031 if (plen < op_len) { 2032 log_peer_warnx(&peer->conf, 2033 "corrupt OPEN message received, len wrong"); 2034 session_notification(peer, ERR_OPEN, 0, 2035 NULL, 0); 2036 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2037 return (-1); 2038 } 2039 op_val = p; 2040 p += op_len; 2041 plen -= op_len; 2042 } else 2043 op_val = NULL; 2044 2045 switch (op_type) { 2046 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2047 if (parse_capabilities(peer, op_val, op_len, 2048 &as) == -1) { 2049 session_notification(peer, ERR_OPEN, 0, 2050 NULL, 0); 2051 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2052 return (-1); 2053 } 2054 break; 2055 case OPT_PARAM_AUTH: /* deprecated */ 2056 default: 2057 /* 2058 * unsupported type 2059 * the RFCs tell us to leave the data section empty 2060 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT. 2061 * How the peer should know _which_ optional parameter 2062 * we don't support is beyond me. 2063 */ 2064 log_peer_warnx(&peer->conf, 2065 "received OPEN message with unsupported optional " 2066 "parameter: type %u", op_type); 2067 session_notification(peer, ERR_OPEN, ERR_OPEN_OPT, 2068 NULL, 0); 2069 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2070 timer_set(peer, Timer_IdleHold, 0); /* no punish */ 2071 peer->IdleHoldTime /= 2; 2072 return (-1); 2073 } 2074 } 2075 2076 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2077 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2078 peer->conf.remote_as = as; 2079 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); 2080 if (!peer->conf.ebgp) 2081 /* force enforce_as off for iBGP sessions */ 2082 peer->conf.enforce_as = ENFORCE_AS_OFF; 2083 } 2084 2085 if (peer->conf.remote_as != as) { 2086 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2087 log_as(as)); 2088 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0); 2089 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2090 return (-1); 2091 } 2092 2093 if (capa_neg_calc(peer) == -1) { 2094 log_peer_warnx(&peer->conf, 2095 "capability negotiation calculation failed"); 2096 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2097 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2098 return (-1); 2099 } 2100 2101 return (0); 2102 } 2103 2104 int 2105 parse_update(struct peer *peer) 2106 { 2107 u_char *p; 2108 u_int16_t datalen; 2109 2110 /* 2111 * we pass the message verbatim to the rde. 2112 * in case of errors the whole session is reset with a 2113 * notification anyway, we only need to know the peer 2114 */ 2115 p = peer->rbuf->rptr; 2116 p += MSGSIZE_HEADER_MARKER; 2117 memcpy(&datalen, p, sizeof(datalen)); 2118 datalen = ntohs(datalen); 2119 2120 p = peer->rbuf->rptr; 2121 p += MSGSIZE_HEADER; /* header is already checked */ 2122 datalen -= MSGSIZE_HEADER; 2123 2124 if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1) 2125 return (-1); 2126 2127 return (0); 2128 } 2129 2130 int 2131 parse_refresh(struct peer *peer) 2132 { 2133 u_char *p; 2134 u_int16_t afi; 2135 u_int8_t aid, safi; 2136 2137 p = peer->rbuf->rptr; 2138 p += MSGSIZE_HEADER; /* header is already checked */ 2139 2140 /* 2141 * We could check if we actually announced the capability but 2142 * as long as the message is correctly encoded we don't care. 2143 */ 2144 2145 /* afi, 2 byte */ 2146 memcpy(&afi, p, sizeof(afi)); 2147 afi = ntohs(afi); 2148 p += 2; 2149 /* reserved, 1 byte */ 2150 p += 1; 2151 /* safi, 1 byte */ 2152 memcpy(&safi, p, sizeof(safi)); 2153 2154 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2155 if (afi2aid(afi, safi, &aid) == -1) { 2156 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2157 "invalid afi/safi pair"); 2158 return (0); 2159 } 2160 2161 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &aid, sizeof(aid)) == -1) 2162 return (-1); 2163 2164 return (0); 2165 } 2166 2167 int 2168 parse_notification(struct peer *peer) 2169 { 2170 u_char *p; 2171 u_int16_t datalen; 2172 u_int8_t errcode; 2173 u_int8_t subcode; 2174 u_int8_t capa_code; 2175 u_int8_t capa_len; 2176 size_t shutcomm_len; 2177 u_int8_t i; 2178 2179 /* just log */ 2180 p = peer->rbuf->rptr; 2181 p += MSGSIZE_HEADER_MARKER; 2182 memcpy(&datalen, p, sizeof(datalen)); 2183 datalen = ntohs(datalen); 2184 2185 p = peer->rbuf->rptr; 2186 p += MSGSIZE_HEADER; /* header is already checked */ 2187 datalen -= MSGSIZE_HEADER; 2188 2189 memcpy(&errcode, p, sizeof(errcode)); 2190 p += sizeof(errcode); 2191 datalen -= sizeof(errcode); 2192 2193 memcpy(&subcode, p, sizeof(subcode)); 2194 p += sizeof(subcode); 2195 datalen -= sizeof(subcode); 2196 2197 log_notification(peer, errcode, subcode, p, datalen, "received"); 2198 peer->errcnt++; 2199 2200 if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { 2201 if (datalen == 0) { /* zebra likes to send those.. humbug */ 2202 log_peer_warnx(&peer->conf, "received \"unsupported " 2203 "capability\" notification without data part, " 2204 "disabling capability announcements altogether"); 2205 session_capa_ann_none(peer); 2206 } 2207 2208 while (datalen > 0) { 2209 if (datalen < 2) { 2210 log_peer_warnx(&peer->conf, 2211 "parse_notification: " 2212 "expect len >= 2, len is %u", datalen); 2213 return (-1); 2214 } 2215 memcpy(&capa_code, p, sizeof(capa_code)); 2216 p += sizeof(capa_code); 2217 datalen -= sizeof(capa_code); 2218 memcpy(&capa_len, p, sizeof(capa_len)); 2219 p += sizeof(capa_len); 2220 datalen -= sizeof(capa_len); 2221 if (datalen < capa_len) { 2222 log_peer_warnx(&peer->conf, 2223 "parse_notification: capa_len %u exceeds " 2224 "remaining msg length %u", capa_len, 2225 datalen); 2226 return (-1); 2227 } 2228 p += capa_len; 2229 datalen -= capa_len; 2230 switch (capa_code) { 2231 case CAPA_MP: 2232 for (i = 0; i < AID_MAX; i++) 2233 peer->capa.ann.mp[i] = 0; 2234 log_peer_warnx(&peer->conf, 2235 "disabling multiprotocol capability"); 2236 break; 2237 case CAPA_REFRESH: 2238 peer->capa.ann.refresh = 0; 2239 log_peer_warnx(&peer->conf, 2240 "disabling route refresh capability"); 2241 break; 2242 case CAPA_RESTART: 2243 peer->capa.ann.grestart.restart = 0; 2244 log_peer_warnx(&peer->conf, 2245 "disabling restart capability"); 2246 break; 2247 case CAPA_AS4BYTE: 2248 peer->capa.ann.as4byte = 0; 2249 log_peer_warnx(&peer->conf, 2250 "disabling 4-byte AS num capability"); 2251 break; 2252 default: /* should not happen... */ 2253 log_peer_warnx(&peer->conf, "received " 2254 "\"unsupported capability\" notification " 2255 "for unknown capability %u, disabling " 2256 "capability announcements altogether", 2257 capa_code); 2258 session_capa_ann_none(peer); 2259 break; 2260 } 2261 } 2262 2263 return (1); 2264 } 2265 2266 if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) { 2267 session_capa_ann_none(peer); 2268 return (1); 2269 } 2270 2271 if (errcode == ERR_CEASE && 2272 (subcode == ERR_CEASE_ADMIN_DOWN || 2273 subcode == ERR_CEASE_ADMIN_RESET)) { 2274 if (datalen > 1) { 2275 shutcomm_len = *p++; 2276 datalen--; 2277 if (datalen < shutcomm_len) { 2278 log_peer_warnx(&peer->conf, 2279 "received truncated shutdown reason"); 2280 return (0); 2281 } 2282 if (shutcomm_len > SHUT_COMM_LEN - 1) { 2283 log_peer_warnx(&peer->conf, 2284 "received overly long shutdown reason"); 2285 return (0); 2286 } 2287 memcpy(peer->stats.last_shutcomm, p, shutcomm_len); 2288 peer->stats.last_shutcomm[shutcomm_len] = '\0'; 2289 log_peer_warnx(&peer->conf, 2290 "received shutdown reason: \"%s\"", 2291 log_shutcomm(peer->stats.last_shutcomm)); 2292 p += shutcomm_len; 2293 datalen -= shutcomm_len; 2294 } 2295 } 2296 2297 return (0); 2298 } 2299 2300 int 2301 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) 2302 { 2303 u_char *capa_val; 2304 u_int32_t remote_as; 2305 u_int16_t len; 2306 u_int16_t afi; 2307 u_int16_t gr_header; 2308 u_int8_t safi; 2309 u_int8_t aid; 2310 u_int8_t gr_flags; 2311 u_int8_t capa_code; 2312 u_int8_t capa_len; 2313 u_int8_t i; 2314 2315 len = dlen; 2316 while (len > 0) { 2317 if (len < 2) { 2318 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2319 "length: %u, too short", len); 2320 return (-1); 2321 } 2322 memcpy(&capa_code, d, sizeof(capa_code)); 2323 d += sizeof(capa_code); 2324 len -= sizeof(capa_code); 2325 memcpy(&capa_len, d, sizeof(capa_len)); 2326 d += sizeof(capa_len); 2327 len -= sizeof(capa_len); 2328 if (capa_len > 0) { 2329 if (len < capa_len) { 2330 log_peer_warnx(&peer->conf, 2331 "Bad capabilities attr length: " 2332 "len %u smaller than capa_len %u", 2333 len, capa_len); 2334 return (-1); 2335 } 2336 capa_val = d; 2337 d += capa_len; 2338 len -= capa_len; 2339 } else 2340 capa_val = NULL; 2341 2342 switch (capa_code) { 2343 case CAPA_MP: /* RFC 4760 */ 2344 if (capa_len != 4) { 2345 log_peer_warnx(&peer->conf, 2346 "Bad multi protocol capability length: " 2347 "%u", capa_len); 2348 break; 2349 } 2350 memcpy(&afi, capa_val, sizeof(afi)); 2351 afi = ntohs(afi); 2352 memcpy(&safi, capa_val + 3, sizeof(safi)); 2353 if (afi2aid(afi, safi, &aid) == -1) { 2354 log_peer_warnx(&peer->conf, 2355 "Received multi protocol capability: " 2356 " unknown AFI %u, safi %u pair", 2357 afi, safi); 2358 break; 2359 } 2360 peer->capa.peer.mp[aid] = 1; 2361 break; 2362 case CAPA_REFRESH: 2363 peer->capa.peer.refresh = 1; 2364 break; 2365 case CAPA_RESTART: 2366 if (capa_len == 2) { 2367 /* peer only supports EoR marker */ 2368 peer->capa.peer.grestart.restart = 1; 2369 peer->capa.peer.grestart.timeout = 0; 2370 break; 2371 } else if (capa_len % 4 != 2) { 2372 log_peer_warnx(&peer->conf, 2373 "Bad graceful restart capability length: " 2374 "%u", capa_len); 2375 peer->capa.peer.grestart.restart = 0; 2376 peer->capa.peer.grestart.timeout = 0; 2377 break; 2378 } 2379 2380 memcpy(&gr_header, capa_val, sizeof(gr_header)); 2381 gr_header = ntohs(gr_header); 2382 peer->capa.peer.grestart.timeout = 2383 gr_header & CAPA_GR_TIMEMASK; 2384 if (peer->capa.peer.grestart.timeout == 0) { 2385 log_peer_warnx(&peer->conf, "Received " 2386 "graceful restart timeout is zero"); 2387 peer->capa.peer.grestart.restart = 0; 2388 break; 2389 } 2390 2391 for (i = 2; i <= capa_len - 4; i += 4) { 2392 memcpy(&afi, capa_val + i, sizeof(afi)); 2393 afi = ntohs(afi); 2394 memcpy(&safi, capa_val + i + 2, sizeof(safi)); 2395 if (afi2aid(afi, safi, &aid) == -1) { 2396 log_peer_warnx(&peer->conf, 2397 "Received graceful restart capa: " 2398 " unknown AFI %u, safi %u pair", 2399 afi, safi); 2400 continue; 2401 } 2402 memcpy(&gr_flags, capa_val + i + 3, 2403 sizeof(gr_flags)); 2404 peer->capa.peer.grestart.flags[aid] |= 2405 CAPA_GR_PRESENT; 2406 if (gr_flags & CAPA_GR_F_FLAG) 2407 peer->capa.peer.grestart.flags[aid] |= 2408 CAPA_GR_FORWARD; 2409 if (gr_header & CAPA_GR_R_FLAG) 2410 peer->capa.peer.grestart.flags[aid] |= 2411 CAPA_GR_RESTART; 2412 peer->capa.peer.grestart.restart = 2; 2413 } 2414 break; 2415 case CAPA_AS4BYTE: 2416 if (capa_len != 4) { 2417 log_peer_warnx(&peer->conf, 2418 "Bad AS4BYTE capability length: " 2419 "%u", capa_len); 2420 peer->capa.peer.as4byte = 0; 2421 break; 2422 } 2423 memcpy(&remote_as, capa_val, sizeof(remote_as)); 2424 *as = ntohl(remote_as); 2425 if (*as == 0) { 2426 log_peer_warnx(&peer->conf, 2427 "peer requests unacceptable AS %u", *as); 2428 session_notification(peer, ERR_OPEN, 2429 ERR_OPEN_AS, NULL, 0); 2430 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2431 return (-1); 2432 } 2433 peer->capa.peer.as4byte = 1; 2434 break; 2435 default: 2436 break; 2437 } 2438 } 2439 2440 return (0); 2441 } 2442 2443 int 2444 capa_neg_calc(struct peer *p) 2445 { 2446 u_int8_t i, hasmp = 0; 2447 2448 /* refresh: does not realy matter here, use peer setting */ 2449 p->capa.neg.refresh = p->capa.peer.refresh; 2450 2451 /* as4byte: both side must announce capability */ 2452 if (p->capa.ann.as4byte && p->capa.peer.as4byte) 2453 p->capa.neg.as4byte = 1; 2454 else 2455 p->capa.neg.as4byte = 0; 2456 2457 /* MP: both side must announce capability */ 2458 for (i = 0; i < AID_MAX; i++) { 2459 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) { 2460 p->capa.neg.mp[i] = 1; 2461 hasmp = 1; 2462 } else 2463 p->capa.neg.mp[i] = 0; 2464 } 2465 /* if no MP capability present default to IPv4 unicast mode */ 2466 if (!hasmp) 2467 p->capa.neg.mp[AID_INET] = 1; 2468 2469 /* 2470 * graceful restart: only the peer capabilities are of interest here. 2471 * It is necessary to compare the new values with the previous ones 2472 * and act acordingly. AFI/SAFI that are not part in the MP capability 2473 * are treated as not being present. 2474 */ 2475 2476 for (i = 0; i < AID_MAX; i++) { 2477 int8_t negflags; 2478 2479 /* disable GR if the AFI/SAFI is not present */ 2480 if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2481 p->capa.neg.mp[i] == 0) 2482 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2483 /* look at current GR state and decide what to do */ 2484 negflags = p->capa.neg.grestart.flags[i]; 2485 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2486 if (negflags & CAPA_GR_RESTARTING) { 2487 if (!(p->capa.peer.grestart.flags[i] & 2488 CAPA_GR_FORWARD)) { 2489 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 2490 &i, sizeof(i)) == -1) 2491 return (-1); 2492 log_peer_warnx(&p->conf, "graceful restart of " 2493 "%s, not restarted, flushing", aid2str(i)); 2494 } else 2495 p->capa.neg.grestart.flags[i] |= 2496 CAPA_GR_RESTARTING; 2497 } 2498 } 2499 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2500 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2501 2502 return (0); 2503 } 2504 2505 void 2506 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) 2507 { 2508 struct imsg imsg; 2509 struct mrt xmrt; 2510 struct mrt *mrt; 2511 struct imsgbuf *i; 2512 struct peer *p; 2513 struct listen_addr *la, *nla; 2514 struct kif *kif; 2515 u_char *data; 2516 int n, fd, depend_ok, restricted; 2517 u_int8_t aid, errcode, subcode; 2518 2519 while (ibuf) { 2520 if ((n = imsg_get(ibuf, &imsg)) == -1) 2521 fatal("session_dispatch_imsg: imsg_get error"); 2522 2523 if (n == 0) 2524 break; 2525 2526 switch (imsg.hdr.type) { 2527 case IMSG_SOCKET_CONN: 2528 case IMSG_SOCKET_CONN_CTL: 2529 if (idx != PFD_PIPE_MAIN) 2530 fatalx("reconf request not from parent"); 2531 if ((fd = imsg.fd) == -1) { 2532 log_warnx("expected to receive imsg fd to " 2533 "RDE but didn't receive any"); 2534 break; 2535 } 2536 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2537 fatal(NULL); 2538 imsg_init(i, fd); 2539 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 2540 if (ibuf_rde) { 2541 log_warnx("Unexpected imsg connection " 2542 "to RDE received"); 2543 msgbuf_clear(&ibuf_rde->w); 2544 free(ibuf_rde); 2545 } 2546 ibuf_rde = i; 2547 } else { 2548 if (ibuf_rde_ctl) { 2549 log_warnx("Unexpected imsg ctl " 2550 "connection to RDE received"); 2551 msgbuf_clear(&ibuf_rde_ctl->w); 2552 free(ibuf_rde_ctl); 2553 } 2554 ibuf_rde_ctl = i; 2555 } 2556 break; 2557 case IMSG_RECONF_CONF: 2558 if (idx != PFD_PIPE_MAIN) 2559 fatalx("reconf request not from parent"); 2560 nconf = new_config(); 2561 2562 copy_config(nconf, imsg.data); 2563 pending_reconf = 1; 2564 break; 2565 case IMSG_RECONF_PEER: 2566 if (idx != PFD_PIPE_MAIN) 2567 fatalx("reconf request not from parent"); 2568 if ((p = calloc(1, sizeof(struct peer))) == NULL) 2569 fatal("new_peer"); 2570 memcpy(&p->conf, imsg.data, sizeof(struct peer_config)); 2571 p->state = p->prev_state = STATE_NONE; 2572 p->reconf_action = RECONF_REINIT; 2573 if (RB_INSERT(peer_head, &nconf->peers, p) != NULL) 2574 fatalx("%s: peer tree is corrupt", __func__); 2575 break; 2576 case IMSG_RECONF_LISTENER: 2577 if (idx != PFD_PIPE_MAIN) 2578 fatalx("reconf request not from parent"); 2579 if (nconf == NULL) 2580 fatalx("IMSG_RECONF_LISTENER but no config"); 2581 nla = imsg.data; 2582 TAILQ_FOREACH(la, conf->listen_addrs, entry) 2583 if (!la_cmp(la, nla)) 2584 break; 2585 2586 if (la == NULL) { 2587 if (nla->reconf != RECONF_REINIT) 2588 fatalx("king bula sez: " 2589 "expected REINIT"); 2590 2591 if ((nla->fd = imsg.fd) == -1) 2592 log_warnx("expected to receive fd for " 2593 "%s but didn't receive any", 2594 log_sockaddr((struct sockaddr *) 2595 &nla->sa, nla->sa_len)); 2596 2597 la = calloc(1, sizeof(struct listen_addr)); 2598 if (la == NULL) 2599 fatal(NULL); 2600 memcpy(&la->sa, &nla->sa, sizeof(la->sa)); 2601 la->flags = nla->flags; 2602 la->fd = nla->fd; 2603 la->reconf = RECONF_REINIT; 2604 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 2605 entry); 2606 } else { 2607 if (nla->reconf != RECONF_KEEP) 2608 fatalx("king bula sez: expected KEEP"); 2609 la->reconf = RECONF_KEEP; 2610 } 2611 2612 break; 2613 case IMSG_RECONF_CTRL: 2614 if (idx != PFD_PIPE_MAIN) 2615 fatalx("reconf request not from parent"); 2616 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2617 sizeof(restricted)) 2618 fatalx("IFINFO imsg with wrong len"); 2619 memcpy(&restricted, imsg.data, sizeof(restricted)); 2620 if (imsg.fd == -1) { 2621 log_warnx("expected to receive fd for control " 2622 "socket but didn't receive any"); 2623 break; 2624 } 2625 if (restricted) { 2626 control_shutdown(rcsock); 2627 rcsock = imsg.fd; 2628 } else { 2629 control_shutdown(csock); 2630 csock = imsg.fd; 2631 } 2632 break; 2633 case IMSG_RECONF_DRAIN: 2634 if (idx != PFD_PIPE_MAIN) 2635 fatalx("reconf request not from parent"); 2636 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 2637 -1, NULL, 0); 2638 break; 2639 case IMSG_RECONF_DONE: 2640 if (idx != PFD_PIPE_MAIN) 2641 fatalx("reconf request not from parent"); 2642 if (nconf == NULL) 2643 fatalx("got IMSG_RECONF_DONE but no config"); 2644 copy_config(conf, nconf); 2645 merge_peers(conf, nconf); 2646 2647 /* delete old listeners */ 2648 for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; 2649 la = nla) { 2650 nla = TAILQ_NEXT(la, entry); 2651 if (la->reconf == RECONF_NONE) { 2652 log_info("not listening on %s any more", 2653 log_sockaddr((struct sockaddr *) 2654 &la->sa, la->sa_len)); 2655 TAILQ_REMOVE(conf->listen_addrs, la, 2656 entry); 2657 close(la->fd); 2658 free(la); 2659 } 2660 } 2661 2662 /* add new listeners */ 2663 while ((la = TAILQ_FIRST(nconf->listen_addrs)) != 2664 NULL) { 2665 TAILQ_REMOVE(nconf->listen_addrs, la, entry); 2666 TAILQ_INSERT_TAIL(conf->listen_addrs, la, 2667 entry); 2668 } 2669 2670 setup_listeners(listener_cnt); 2671 free_config(nconf); 2672 nconf = NULL; 2673 pending_reconf = 0; 2674 log_info("SE reconfigured"); 2675 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 2676 -1, NULL, 0); 2677 break; 2678 case IMSG_IFINFO: 2679 if (idx != PFD_PIPE_MAIN) 2680 fatalx("IFINFO message not from parent"); 2681 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2682 sizeof(struct kif)) 2683 fatalx("IFINFO imsg with wrong len"); 2684 kif = imsg.data; 2685 depend_ok = kif->depend_state; 2686 2687 RB_FOREACH(p, peer_head, &conf->peers) 2688 if (!strcmp(p->conf.if_depend, kif->ifname)) { 2689 if (depend_ok && !p->depend_ok) { 2690 p->depend_ok = depend_ok; 2691 bgp_fsm(p, EVNT_START); 2692 } else if (!depend_ok && p->depend_ok) { 2693 p->depend_ok = depend_ok; 2694 session_stop(p, 2695 ERR_CEASE_OTHER_CHANGE); 2696 } 2697 } 2698 break; 2699 case IMSG_MRT_OPEN: 2700 case IMSG_MRT_REOPEN: 2701 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2702 sizeof(struct mrt)) { 2703 log_warnx("wrong imsg len"); 2704 break; 2705 } 2706 2707 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2708 if ((xmrt.wbuf.fd = imsg.fd) == -1) 2709 log_warnx("expected to receive fd for mrt dump " 2710 "but didn't receive any"); 2711 2712 mrt = mrt_get(&mrthead, &xmrt); 2713 if (mrt == NULL) { 2714 /* new dump */ 2715 mrt = calloc(1, sizeof(struct mrt)); 2716 if (mrt == NULL) 2717 fatal("session_dispatch_imsg"); 2718 memcpy(mrt, &xmrt, sizeof(struct mrt)); 2719 TAILQ_INIT(&mrt->wbuf.bufs); 2720 LIST_INSERT_HEAD(&mrthead, mrt, entry); 2721 } else { 2722 /* old dump reopened */ 2723 close(mrt->wbuf.fd); 2724 mrt->wbuf.fd = xmrt.wbuf.fd; 2725 } 2726 break; 2727 case IMSG_MRT_CLOSE: 2728 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2729 sizeof(struct mrt)) { 2730 log_warnx("wrong imsg len"); 2731 break; 2732 } 2733 2734 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2735 mrt = mrt_get(&mrthead, &xmrt); 2736 if (mrt != NULL) 2737 mrt_done(mrt); 2738 break; 2739 case IMSG_CTL_KROUTE: 2740 case IMSG_CTL_KROUTE_ADDR: 2741 case IMSG_CTL_SHOW_NEXTHOP: 2742 case IMSG_CTL_SHOW_INTERFACE: 2743 case IMSG_CTL_SHOW_FIB_TABLES: 2744 if (idx != PFD_PIPE_MAIN) 2745 fatalx("ctl kroute request not from parent"); 2746 control_imsg_relay(&imsg); 2747 break; 2748 case IMSG_CTL_SHOW_RIB: 2749 case IMSG_CTL_SHOW_RIB_PREFIX: 2750 case IMSG_CTL_SHOW_RIB_COMMUNITIES: 2751 case IMSG_CTL_SHOW_RIB_ATTR: 2752 case IMSG_CTL_SHOW_RIB_MEM: 2753 case IMSG_CTL_SHOW_RIB_HASH: 2754 case IMSG_CTL_SHOW_NETWORK: 2755 case IMSG_CTL_SHOW_NEIGHBOR: 2756 if (idx != PFD_PIPE_ROUTE_CTL) 2757 fatalx("ctl rib request not from RDE"); 2758 control_imsg_relay(&imsg); 2759 break; 2760 case IMSG_CTL_END: 2761 case IMSG_CTL_RESULT: 2762 control_imsg_relay(&imsg); 2763 break; 2764 case IMSG_UPDATE: 2765 if (idx != PFD_PIPE_ROUTE) 2766 fatalx("update request not from RDE"); 2767 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2768 MAX_PKTSIZE - MSGSIZE_HEADER || 2769 imsg.hdr.len < IMSG_HEADER_SIZE + 2770 MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 2771 log_warnx("RDE sent invalid update"); 2772 else 2773 session_update(imsg.hdr.peerid, imsg.data, 2774 imsg.hdr.len - IMSG_HEADER_SIZE); 2775 break; 2776 case IMSG_UPDATE_ERR: 2777 if (idx != PFD_PIPE_ROUTE) 2778 fatalx("update request not from RDE"); 2779 if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) { 2780 log_warnx("RDE sent invalid notification"); 2781 break; 2782 } 2783 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 2784 log_warnx("no such peer: id=%u", 2785 imsg.hdr.peerid); 2786 break; 2787 } 2788 data = imsg.data; 2789 errcode = *data++; 2790 subcode = *data++; 2791 2792 if (imsg.hdr.len == IMSG_HEADER_SIZE + 2) 2793 data = NULL; 2794 2795 session_notification(p, errcode, subcode, 2796 data, imsg.hdr.len - IMSG_HEADER_SIZE - 2); 2797 switch (errcode) { 2798 case ERR_CEASE: 2799 switch (subcode) { 2800 case ERR_CEASE_MAX_PREFIX: 2801 bgp_fsm(p, EVNT_STOP); 2802 if (p->conf.max_prefix_restart) 2803 timer_set(p, Timer_IdleHold, 60 * 2804 p->conf.max_prefix_restart); 2805 break; 2806 default: 2807 bgp_fsm(p, EVNT_CON_FATAL); 2808 break; 2809 } 2810 break; 2811 default: 2812 bgp_fsm(p, EVNT_CON_FATAL); 2813 break; 2814 } 2815 break; 2816 case IMSG_SESSION_RESTARTED: 2817 if (idx != PFD_PIPE_ROUTE) 2818 fatalx("update request not from RDE"); 2819 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { 2820 log_warnx("RDE sent invalid restart msg"); 2821 break; 2822 } 2823 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 2824 log_warnx("no such peer: id=%u", 2825 imsg.hdr.peerid); 2826 break; 2827 } 2828 memcpy(&aid, imsg.data, sizeof(aid)); 2829 if (aid >= AID_MAX) 2830 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 2831 if (p->capa.neg.grestart.flags[aid] & 2832 CAPA_GR_RESTARTING) { 2833 log_peer_warnx(&p->conf, 2834 "graceful restart of %s finished", 2835 aid2str(aid)); 2836 p->capa.neg.grestart.flags[aid] &= 2837 ~CAPA_GR_RESTARTING; 2838 timer_stop(p, Timer_RestartTimeout); 2839 2840 /* signal back to RDE to cleanup stale routes */ 2841 if (imsg_rde(IMSG_SESSION_RESTARTED, 2842 imsg.hdr.peerid, &aid, sizeof(aid)) == -1) 2843 fatal("imsg_compose: " 2844 "IMSG_SESSION_RESTARTED"); 2845 } 2846 break; 2847 case IMSG_SESSION_DOWN: 2848 if (idx != PFD_PIPE_ROUTE) 2849 fatalx("update request not from RDE"); 2850 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 2851 log_warnx("no such peer: id=%u", 2852 imsg.hdr.peerid); 2853 break; 2854 } 2855 session_stop(p, ERR_CEASE_ADMIN_DOWN); 2856 break; 2857 default: 2858 break; 2859 } 2860 imsg_free(&imsg); 2861 } 2862 } 2863 2864 int 2865 la_cmp(struct listen_addr *a, struct listen_addr *b) 2866 { 2867 struct sockaddr_in *in_a, *in_b; 2868 struct sockaddr_in6 *in6_a, *in6_b; 2869 2870 if (a->sa.ss_family != b->sa.ss_family) 2871 return (1); 2872 2873 switch (a->sa.ss_family) { 2874 case AF_INET: 2875 in_a = (struct sockaddr_in *)&a->sa; 2876 in_b = (struct sockaddr_in *)&b->sa; 2877 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 2878 return (1); 2879 if (in_a->sin_port != in_b->sin_port) 2880 return (1); 2881 break; 2882 case AF_INET6: 2883 in6_a = (struct sockaddr_in6 *)&a->sa; 2884 in6_b = (struct sockaddr_in6 *)&b->sa; 2885 if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 2886 sizeof(struct in6_addr))) 2887 return (1); 2888 if (in6_a->sin6_port != in6_b->sin6_port) 2889 return (1); 2890 break; 2891 default: 2892 fatal("king bula sez: unknown address family"); 2893 /* NOTREACHED */ 2894 } 2895 2896 return (0); 2897 } 2898 2899 struct peer * 2900 getpeerbydesc(struct bgpd_config *c, const char *descr) 2901 { 2902 struct peer *p, *res = NULL; 2903 int match = 0; 2904 2905 RB_FOREACH(p, peer_head, &conf->peers) 2906 if (!strcmp(p->conf.descr, descr)) { 2907 res = p; 2908 match++; 2909 } 2910 2911 if (match > 1) 2912 log_info("neighbor description \"%s\" not unique, request " 2913 "aborted", descr); 2914 2915 if (match == 1) 2916 return (res); 2917 else 2918 return (NULL); 2919 } 2920 2921 struct peer * 2922 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip) 2923 { 2924 struct bgpd_addr addr; 2925 struct peer *p, *newpeer, *loose = NULL; 2926 u_int32_t id; 2927 2928 sa2addr(ip, &addr, NULL); 2929 2930 /* we might want a more effective way to find peers by IP */ 2931 RB_FOREACH(p, peer_head, &conf->peers) 2932 if (!p->conf.template && 2933 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 2934 return (p); 2935 2936 /* try template matching */ 2937 RB_FOREACH(p, peer_head, &conf->peers) 2938 if (p->conf.template && 2939 p->conf.remote_addr.aid == addr.aid && 2940 session_match_mask(p, &addr)) 2941 if (loose == NULL || loose->conf.remote_masklen < 2942 p->conf.remote_masklen) 2943 loose = p; 2944 2945 if (loose != NULL) { 2946 /* clone */ 2947 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 2948 fatal(NULL); 2949 memcpy(newpeer, loose, sizeof(struct peer)); 2950 for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) { 2951 RB_FOREACH(p, peer_head, &conf->peers) 2952 if (p->conf.id == id) 2953 break; 2954 if (p == NULL) /* we found a free id */ 2955 break; 2956 } 2957 newpeer->template = loose; 2958 session_template_clone(newpeer, ip, id, 0); 2959 newpeer->state = newpeer->prev_state = STATE_NONE; 2960 newpeer->reconf_action = RECONF_KEEP; 2961 newpeer->rbuf = NULL; 2962 init_peer(newpeer); 2963 bgp_fsm(newpeer, EVNT_START); 2964 if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL) 2965 fatalx("%s: peer tree is corrupt", __func__); 2966 return (newpeer); 2967 } 2968 2969 return (NULL); 2970 } 2971 2972 struct peer * 2973 getpeerbyid(struct bgpd_config *c, u_int32_t peerid) 2974 { 2975 static struct peer lookup; 2976 2977 lookup.conf.id = peerid; 2978 2979 return RB_FIND(peer_head, &c->peers, &lookup); 2980 } 2981 2982 int 2983 peer_matched(struct peer *p, struct ctl_neighbor *n) 2984 { 2985 char *s; 2986 2987 if (n && n->addr.aid) { 2988 if (memcmp(&p->conf.remote_addr, &n->addr, 2989 sizeof(p->conf.remote_addr))) 2990 return 0; 2991 } else if (n && n->descr[0]) { 2992 s = n->is_group ? p->conf.group : p->conf.descr; 2993 if (strcmp(s, n->descr)) 2994 return 0; 2995 } 2996 return 1; 2997 } 2998 2999 void 3000 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id, 3001 u_int32_t as) 3002 { 3003 struct bgpd_addr remote_addr; 3004 3005 if (ip) 3006 sa2addr(ip, &remote_addr, NULL); 3007 else 3008 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3009 3010 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3011 3012 p->conf.id = id; 3013 3014 if (as) { 3015 p->conf.remote_as = as; 3016 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as); 3017 if (!p->conf.ebgp) 3018 /* force enforce_as off for iBGP sessions */ 3019 p->conf.enforce_as = ENFORCE_AS_OFF; 3020 } 3021 3022 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3023 switch (p->conf.remote_addr.aid) { 3024 case AID_INET: 3025 p->conf.remote_masklen = 32; 3026 break; 3027 case AID_INET6: 3028 p->conf.remote_masklen = 128; 3029 break; 3030 } 3031 p->conf.template = 0; 3032 } 3033 3034 int 3035 session_match_mask(struct peer *p, struct bgpd_addr *a) 3036 { 3037 struct in_addr v4masked; 3038 struct in6_addr v6masked; 3039 3040 switch (p->conf.remote_addr.aid) { 3041 case AID_INET: 3042 inet4applymask(&v4masked, &a->v4, p->conf.remote_masklen); 3043 if (p->conf.remote_addr.v4.s_addr == v4masked.s_addr) 3044 return (1); 3045 return (0); 3046 case AID_INET6: 3047 inet6applymask(&v6masked, &a->v6, p->conf.remote_masklen); 3048 3049 if (memcmp(&v6masked, &p->conf.remote_addr.v6, 3050 sizeof(v6masked)) == 0) 3051 return (1); 3052 return (0); 3053 } 3054 return (0); 3055 } 3056 3057 void 3058 session_down(struct peer *peer) 3059 { 3060 bzero(&peer->capa.neg, sizeof(peer->capa.neg)); 3061 peer->stats.last_updown = time(NULL); 3062 /* 3063 * session_down is called in the exit code path so check 3064 * if the RDE is still around, if not there is no need to 3065 * send the message. 3066 */ 3067 if (ibuf_rde == NULL) 3068 return; 3069 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1) 3070 fatalx("imsg_compose error"); 3071 } 3072 3073 void 3074 session_up(struct peer *p) 3075 { 3076 struct session_up sup; 3077 3078 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3079 &p->conf, sizeof(p->conf)) == -1) 3080 fatalx("imsg_compose error"); 3081 3082 sup.local_addr = p->local; 3083 sup.remote_addr = p->remote; 3084 3085 sup.remote_bgpid = p->remote_bgpid; 3086 sup.short_as = p->short_as; 3087 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3088 p->stats.last_updown = time(NULL); 3089 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1) 3090 fatalx("imsg_compose error"); 3091 } 3092 3093 int 3094 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data, 3095 u_int16_t datalen) 3096 { 3097 return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen)); 3098 } 3099 3100 int 3101 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen) 3102 { 3103 if (ibuf_rde_ctl == NULL) { 3104 log_warnx("Can't send message %u to RDE, ctl pipe closed", 3105 type); 3106 return (0); 3107 } 3108 /* 3109 * Use control socket to talk to RDE to bypass the queue of the 3110 * regular imsg socket. 3111 */ 3112 return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen)); 3113 } 3114 3115 int 3116 imsg_rde(int type, uint32_t peerid, void *data, u_int16_t datalen) 3117 { 3118 if (ibuf_rde == NULL) { 3119 log_warnx("Can't send message %u to RDE, pipe closed", type); 3120 return (0); 3121 } 3122 3123 return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen)); 3124 } 3125 3126 void 3127 session_demote(struct peer *p, int level) 3128 { 3129 struct demote_msg msg; 3130 3131 strlcpy(msg.demote_group, p->conf.demote_group, 3132 sizeof(msg.demote_group)); 3133 msg.level = level; 3134 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3135 &msg, sizeof(msg)) == -1) 3136 fatalx("imsg_compose error"); 3137 3138 p->demoted += level; 3139 } 3140 3141 void 3142 session_stop(struct peer *peer, u_int8_t subcode) 3143 { 3144 char data[SHUT_COMM_LEN]; 3145 size_t datalen; 3146 size_t shutcomm_len; 3147 char *communication; 3148 3149 datalen = 0; 3150 communication = peer->conf.shutcomm; 3151 3152 if ((subcode == ERR_CEASE_ADMIN_DOWN || 3153 subcode == ERR_CEASE_ADMIN_RESET) 3154 && communication && *communication) { 3155 shutcomm_len = strlen(communication); 3156 if (shutcomm_len > SHUT_COMM_LEN - 1) { 3157 log_peer_warnx(&peer->conf, 3158 "trying to send overly long shutdown reason"); 3159 } else { 3160 data[0] = shutcomm_len; 3161 datalen = shutcomm_len + sizeof(data[0]); 3162 memcpy(data + 1, communication, shutcomm_len); 3163 } 3164 } 3165 switch (peer->state) { 3166 case STATE_OPENSENT: 3167 case STATE_OPENCONFIRM: 3168 case STATE_ESTABLISHED: 3169 session_notification(peer, ERR_CEASE, subcode, data, datalen); 3170 break; 3171 default: 3172 /* session not open, no need to send notification */ 3173 break; 3174 } 3175 bgp_fsm(peer, EVNT_STOP); 3176 } 3177 3178 void 3179 merge_peers(struct bgpd_config *c, struct bgpd_config *nc) 3180 { 3181 struct peer *p, *np, *next; 3182 3183 RB_FOREACH(p, peer_head, &conf->peers) { 3184 /* templates are handled specially */ 3185 if (p->template != NULL) 3186 continue; 3187 np = getpeerbyid(nc, p->conf.id); 3188 if (np == NULL) { 3189 p->reconf_action = RECONF_DELETE; 3190 continue; 3191 } 3192 3193 memcpy(&p->conf, &np->conf, sizeof(p->conf)); 3194 RB_REMOVE(peer_head, &nc->peers, np); 3195 free(np); 3196 3197 p->reconf_action = RECONF_KEEP; 3198 3199 /* had demotion, is demoted, demote removed? */ 3200 if (p->demoted && !p->conf.demote_group[0]) 3201 session_demote(p, -1); 3202 3203 /* if session is not open then refresh pfkey data */ 3204 if (p->state < STATE_OPENSENT && !p->template) 3205 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 3206 p->conf.id, 0, -1, NULL, 0); 3207 3208 /* sync the RDE in case we keep the peer */ 3209 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3210 &p->conf, sizeof(struct peer_config)) == -1) 3211 fatalx("imsg_compose error"); 3212 3213 /* apply the config to all clones of a template */ 3214 if (p->conf.template) { 3215 struct peer *xp; 3216 RB_FOREACH(xp, peer_head, &conf->peers) { 3217 if (xp->template != p) 3218 continue; 3219 session_template_clone(xp, NULL, xp->conf.id, 3220 xp->conf.remote_as); 3221 if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id, 3222 &xp->conf, sizeof(xp->conf)) == -1) 3223 fatalx("imsg_compose error"); 3224 } 3225 } 3226 } 3227 3228 /* pfkeys of new peers already loaded by the parent process */ 3229 RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) { 3230 RB_REMOVE(peer_head, &nc->peers, np); 3231 if (RB_INSERT(peer_head, &conf->peers, np) != NULL) 3232 fatalx("%s: peer tree is corrupt", __func__); 3233 } 3234 } 3235