1 /* $OpenBSD: session.c,v 1.379 2019/04/25 12:12:16 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 22 #include <sys/mman.h> 23 #include <sys/socket.h> 24 #include <sys/time.h> 25 #include <sys/resource.h> 26 #include <sys/un.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <poll.h> 37 #include <pwd.h> 38 #include <signal.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <syslog.h> 43 #include <unistd.h> 44 45 #include "bgpd.h" 46 #include "mrt.h" 47 #include "session.h" 48 #include "log.h" 49 50 #define PFD_PIPE_MAIN 0 51 #define PFD_PIPE_ROUTE 1 52 #define PFD_PIPE_ROUTE_CTL 2 53 #define PFD_SOCK_CTL 3 54 #define PFD_SOCK_RCTL 4 55 #define PFD_SOCK_PFKEY 5 56 #define PFD_LISTENERS_START 6 57 58 void session_sighdlr(int); 59 int setup_listeners(u_int *); 60 void init_peer(struct peer *); 61 void start_timer_holdtime(struct peer *); 62 void start_timer_keepalive(struct peer *); 63 void session_close_connection(struct peer *); 64 void change_state(struct peer *, enum session_state, enum session_events); 65 int session_setup_socket(struct peer *); 66 void session_accept(int); 67 int session_connect(struct peer *); 68 void session_tcp_established(struct peer *); 69 void session_capa_ann_none(struct peer *); 70 int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); 71 int session_capa_add_mp(struct ibuf *, u_int8_t); 72 int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); 73 struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); 74 int session_sendmsg(struct bgp_msg *, struct peer *); 75 void session_open(struct peer *); 76 void session_keepalive(struct peer *); 77 void session_update(u_int32_t, void *, size_t); 78 void session_notification(struct peer *, u_int8_t, u_int8_t, void *, 79 ssize_t); 80 void session_rrefresh(struct peer *, u_int8_t); 81 int session_graceful_restart(struct peer *); 82 int session_graceful_stop(struct peer *); 83 int session_dispatch_msg(struct pollfd *, struct peer *); 84 void session_process_msg(struct peer *); 85 int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); 86 int parse_open(struct peer *); 87 int parse_update(struct peer *); 88 int parse_refresh(struct peer *); 89 int parse_notification(struct peer *); 90 int parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *); 91 int capa_neg_calc(struct peer *); 92 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 93 void session_up(struct peer *); 94 void session_down(struct peer *); 95 int imsg_rde(int, u_int32_t, void *, u_int16_t); 96 void session_demote(struct peer *, int); 97 void merge_peers(struct bgpd_config *, struct bgpd_config *); 98 99 int la_cmp(struct listen_addr *, struct listen_addr *); 100 void session_template_clone(struct peer *, struct sockaddr *, 101 u_int32_t, u_int32_t); 102 int session_match_mask(struct peer *, struct bgpd_addr *); 103 104 struct bgpd_config *conf, *nconf; 105 struct bgpd_sysdep sysdep; 106 volatile sig_atomic_t session_quit; 107 int pending_reconf; 108 int csock = -1, rcsock = -1; 109 u_int peer_cnt; 110 struct imsgbuf *ibuf_rde; 111 struct imsgbuf *ibuf_rde_ctl; 112 struct imsgbuf *ibuf_main; 113 114 struct mrt_head mrthead; 115 time_t pauseaccept; 116 117 void 118 session_sighdlr(int sig) 119 { 120 switch (sig) { 121 case SIGINT: 122 case SIGTERM: 123 session_quit = 1; 124 break; 125 } 126 } 127 128 int 129 setup_listeners(u_int *la_cnt) 130 { 131 int ttl = 255; 132 int opt; 133 struct listen_addr *la; 134 u_int cnt = 0; 135 136 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 137 la->reconf = RECONF_NONE; 138 cnt++; 139 140 if (la->flags & LISTENER_LISTENING) 141 continue; 142 143 if (la->fd == -1) { 144 log_warn("cannot establish listener on %s: invalid fd", 145 log_sockaddr((struct sockaddr *)&la->sa, 146 la->sa_len)); 147 continue; 148 } 149 150 opt = 1; 151 if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG, 152 &opt, sizeof(opt)) == -1) { 153 if (errno == ENOPROTOOPT) { /* system w/o md5sig */ 154 log_warnx("md5sig not available, disabling"); 155 sysdep.no_md5sig = 1; 156 } else 157 fatal("setsockopt TCP_MD5SIG"); 158 } 159 160 /* set ttl to 255 so that ttl-security works */ 161 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 162 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 163 log_warn("setup_listeners setsockopt TTL"); 164 continue; 165 } 166 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 167 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 168 log_warn("setup_listeners setsockopt hoplimit"); 169 continue; 170 } 171 172 if (listen(la->fd, MAX_BACKLOG)) { 173 close(la->fd); 174 fatal("listen"); 175 } 176 177 la->flags |= LISTENER_LISTENING; 178 179 log_info("listening on %s", 180 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len)); 181 } 182 183 *la_cnt = cnt; 184 185 return (0); 186 } 187 188 void 189 session_main(int debug, int verbose) 190 { 191 int timeout, pfkeysock; 192 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 193 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 194 u_int listener_cnt, ctl_cnt, mrt_cnt; 195 u_int new_cnt; 196 struct passwd *pw; 197 struct peer *p, **peer_l = NULL, *next; 198 struct mrt *m, *xm, **mrt_l = NULL; 199 struct pollfd *pfd = NULL; 200 struct ctl_conn *ctl_conn; 201 struct listen_addr *la; 202 void *newp; 203 short events; 204 205 log_init(debug, LOG_DAEMON); 206 log_setverbose(verbose); 207 208 bgpd_process = PROC_SE; 209 log_procinit(log_procnames[bgpd_process]); 210 211 if ((pw = getpwnam(BGPD_USER)) == NULL) 212 fatal(NULL); 213 214 if (chroot(pw->pw_dir) == -1) 215 fatal("chroot"); 216 if (chdir("/") == -1) 217 fatal("chdir(\"/\")"); 218 219 setproctitle("session engine"); 220 pfkeysock = pfkey_init(&sysdep); 221 222 if (setgroups(1, &pw->pw_gid) || 223 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 224 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 225 fatal("can't drop privileges"); 226 227 if (pledge("stdio inet recvfd", NULL) == -1) 228 fatal("pledge"); 229 230 signal(SIGTERM, session_sighdlr); 231 signal(SIGINT, session_sighdlr); 232 signal(SIGPIPE, SIG_IGN); 233 signal(SIGHUP, SIG_IGN); 234 signal(SIGALRM, SIG_IGN); 235 signal(SIGUSR1, SIG_IGN); 236 237 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 238 fatal(NULL); 239 imsg_init(ibuf_main, 3); 240 241 TAILQ_INIT(&ctl_conns); 242 LIST_INIT(&mrthead); 243 listener_cnt = 0; 244 peer_cnt = 0; 245 ctl_cnt = 0; 246 247 conf = new_config(); 248 log_info("session engine ready"); 249 250 while (session_quit == 0) { 251 /* check for peers to be initialized or deleted */ 252 if (!pending_reconf) { 253 for (p = TAILQ_FIRST(&conf->peers); p != NULL; 254 p = next) { 255 next = TAILQ_NEXT(p, entry); 256 /* cloned peer that idled out? */ 257 if (p->template && (p->state == STATE_IDLE || 258 p->state == STATE_ACTIVE) && 259 time(NULL) - p->stats.last_updown >= 260 INTERVAL_HOLD_CLONED) 261 p->reconf_action = RECONF_DELETE; 262 263 /* new peer that needs init? */ 264 if (p->state == STATE_NONE) 265 init_peer(p); 266 267 /* reinit due? */ 268 if (p->reconf_action == RECONF_REINIT) { 269 session_stop(p, ERR_CEASE_ADMIN_RESET); 270 if (!p->conf.down) 271 timer_set(p, Timer_IdleHold, 0); 272 } 273 274 /* deletion due? */ 275 if (p->reconf_action == RECONF_DELETE) { 276 if (p->demoted) 277 session_demote(p, -1); 278 p->conf.demote_group[0] = 0; 279 session_stop(p, ERR_CEASE_PEER_UNCONF); 280 log_peer_warnx(&p->conf, "removed"); 281 TAILQ_REMOVE(&conf->peers, p, entry); 282 timer_remove_all(p); 283 pfkey_remove(p); 284 free(p); 285 peer_cnt--; 286 continue; 287 } 288 p->reconf_action = RECONF_NONE; 289 } 290 } 291 292 if (peer_cnt > peer_l_elms) { 293 if ((newp = reallocarray(peer_l, peer_cnt, 294 sizeof(struct peer *))) == NULL) { 295 /* panic for now */ 296 log_warn("could not resize peer_l from %u -> %u" 297 " entries", peer_l_elms, peer_cnt); 298 fatalx("exiting"); 299 } 300 peer_l = newp; 301 peer_l_elms = peer_cnt; 302 } 303 304 mrt_cnt = 0; 305 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 306 xm = LIST_NEXT(m, entry); 307 if (m->state == MRT_STATE_REMOVE) { 308 mrt_clean(m); 309 LIST_REMOVE(m, entry); 310 free(m); 311 continue; 312 } 313 if (m->wbuf.queued) 314 mrt_cnt++; 315 } 316 317 if (mrt_cnt > mrt_l_elms) { 318 if ((newp = reallocarray(mrt_l, mrt_cnt, 319 sizeof(struct mrt *))) == NULL) { 320 /* panic for now */ 321 log_warn("could not resize mrt_l from %u -> %u" 322 " entries", mrt_l_elms, mrt_cnt); 323 fatalx("exiting"); 324 } 325 mrt_l = newp; 326 mrt_l_elms = mrt_cnt; 327 } 328 329 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 330 ctl_cnt + mrt_cnt; 331 if (new_cnt > pfd_elms) { 332 if ((newp = reallocarray(pfd, new_cnt, 333 sizeof(struct pollfd))) == NULL) { 334 /* panic for now */ 335 log_warn("could not resize pfd from %u -> %u" 336 " entries", pfd_elms, new_cnt); 337 fatalx("exiting"); 338 } 339 pfd = newp; 340 pfd_elms = new_cnt; 341 } 342 343 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 344 345 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 346 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 347 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 348 349 if (pauseaccept == 0) { 350 pfd[PFD_SOCK_CTL].fd = csock; 351 pfd[PFD_SOCK_CTL].events = POLLIN; 352 pfd[PFD_SOCK_RCTL].fd = rcsock; 353 pfd[PFD_SOCK_RCTL].events = POLLIN; 354 } else { 355 pfd[PFD_SOCK_CTL].fd = -1; 356 pfd[PFD_SOCK_RCTL].fd = -1; 357 } 358 pfd[PFD_SOCK_PFKEY].fd = pfkeysock; 359 pfd[PFD_SOCK_PFKEY].events = POLLIN; 360 361 i = PFD_LISTENERS_START; 362 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 363 if (pauseaccept == 0) { 364 pfd[i].fd = la->fd; 365 pfd[i].events = POLLIN; 366 } else 367 pfd[i].fd = -1; 368 i++; 369 } 370 idx_listeners = i; 371 timeout = 240; /* loop every 240s at least */ 372 373 TAILQ_FOREACH(p, &conf->peers, entry) { 374 time_t nextaction; 375 struct peer_timer *pt; 376 377 /* check timers */ 378 if ((pt = timer_nextisdue(p)) != NULL) { 379 switch (pt->type) { 380 case Timer_Hold: 381 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 382 break; 383 case Timer_ConnectRetry: 384 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 385 break; 386 case Timer_Keepalive: 387 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 388 break; 389 case Timer_IdleHold: 390 bgp_fsm(p, EVNT_START); 391 break; 392 case Timer_IdleHoldReset: 393 p->IdleHoldTime /= 2; 394 if (p->IdleHoldTime <= 395 INTERVAL_IDLE_HOLD_INITIAL) { 396 p->IdleHoldTime = 397 INTERVAL_IDLE_HOLD_INITIAL; 398 timer_stop(p, 399 Timer_IdleHoldReset); 400 p->errcnt = 0; 401 } else 402 timer_set(p, 403 Timer_IdleHoldReset, 404 p->IdleHoldTime); 405 break; 406 case Timer_CarpUndemote: 407 timer_stop(p, Timer_CarpUndemote); 408 if (p->demoted && 409 p->state == STATE_ESTABLISHED) 410 session_demote(p, -1); 411 break; 412 case Timer_RestartTimeout: 413 timer_stop(p, Timer_RestartTimeout); 414 session_graceful_stop(p); 415 break; 416 default: 417 fatalx("King Bula lost in time"); 418 } 419 } 420 if ((nextaction = timer_nextduein(p)) != -1 && 421 nextaction < timeout) 422 timeout = nextaction; 423 424 /* are we waiting for a write? */ 425 events = POLLIN; 426 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 427 events |= POLLOUT; 428 /* is there still work to do? */ 429 if (p->rpending) 430 timeout = 0; 431 432 /* poll events */ 433 if (p->fd != -1 && events != 0) { 434 pfd[i].fd = p->fd; 435 pfd[i].events = events; 436 peer_l[i - idx_listeners] = p; 437 i++; 438 } 439 } 440 441 idx_peers = i; 442 443 LIST_FOREACH(m, &mrthead, entry) 444 if (m->wbuf.queued) { 445 pfd[i].fd = m->wbuf.fd; 446 pfd[i].events = POLLOUT; 447 mrt_l[i - idx_peers] = m; 448 i++; 449 } 450 451 idx_mrts = i; 452 453 TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) { 454 pfd[i].fd = ctl_conn->ibuf.fd; 455 pfd[i].events = POLLIN; 456 if (ctl_conn->ibuf.w.queued > 0) 457 pfd[i].events |= POLLOUT; 458 i++; 459 } 460 461 if (pauseaccept && timeout > 1) 462 timeout = 1; 463 if (timeout < 0) 464 timeout = 0; 465 if (poll(pfd, i, timeout * 1000) == -1) 466 if (errno != EINTR) 467 fatal("poll error"); 468 469 /* 470 * If we previously saw fd exhaustion, we stop accept() 471 * for 1 second to throttle the accept() loop. 472 */ 473 if (pauseaccept && getmonotime() > pauseaccept + 1) 474 pauseaccept = 0; 475 476 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 477 log_warnx("SE: Lost connection to parent"); 478 session_quit = 1; 479 continue; 480 } else 481 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 482 &listener_cnt); 483 484 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 485 log_warnx("SE: Lost connection to RDE"); 486 msgbuf_clear(&ibuf_rde->w); 487 free(ibuf_rde); 488 ibuf_rde = NULL; 489 } else 490 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 491 &listener_cnt); 492 493 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 494 -1) { 495 log_warnx("SE: Lost connection to RDE control"); 496 msgbuf_clear(&ibuf_rde_ctl->w); 497 free(ibuf_rde_ctl); 498 ibuf_rde_ctl = NULL; 499 } else 500 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 501 &listener_cnt); 502 503 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 504 ctl_cnt += control_accept(csock, 0); 505 506 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 507 ctl_cnt += control_accept(rcsock, 1); 508 509 if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) { 510 if (pfkey_read(pfkeysock, NULL) == -1) { 511 log_warnx("pfkey_read failed, exiting..."); 512 session_quit = 1; 513 } 514 } 515 516 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 517 if (pfd[j].revents & POLLIN) 518 session_accept(pfd[j].fd); 519 520 for (; j < idx_peers; j++) 521 session_dispatch_msg(&pfd[j], 522 peer_l[j - idx_listeners]); 523 524 TAILQ_FOREACH(p, &conf->peers, entry) 525 if (p->rbuf && p->rbuf->wpos) 526 session_process_msg(p); 527 528 for (; j < idx_mrts; j++) 529 if (pfd[j].revents & POLLOUT) 530 mrt_write(mrt_l[j - idx_peers]); 531 532 for (; j < i; j++) 533 control_dispatch_msg(&pfd[j], &ctl_cnt, &conf->peers); 534 } 535 536 while ((p = TAILQ_FIRST(&conf->peers)) != NULL) { 537 TAILQ_REMOVE(&conf->peers, p, entry); 538 strlcpy(p->conf.shutcomm, 539 "bgpd shutting down", 540 sizeof(p->conf.shutcomm)); 541 session_stop(p, ERR_CEASE_ADMIN_DOWN); 542 timer_remove_all(p); 543 pfkey_remove(p); 544 free(p); 545 } 546 547 while ((m = LIST_FIRST(&mrthead)) != NULL) { 548 mrt_clean(m); 549 LIST_REMOVE(m, entry); 550 free(m); 551 } 552 553 free_config(conf); 554 free(peer_l); 555 free(mrt_l); 556 free(pfd); 557 558 /* close pipes */ 559 if (ibuf_rde) { 560 msgbuf_write(&ibuf_rde->w); 561 msgbuf_clear(&ibuf_rde->w); 562 close(ibuf_rde->fd); 563 free(ibuf_rde); 564 } 565 if (ibuf_rde_ctl) { 566 msgbuf_clear(&ibuf_rde_ctl->w); 567 close(ibuf_rde_ctl->fd); 568 free(ibuf_rde_ctl); 569 } 570 msgbuf_write(&ibuf_main->w); 571 msgbuf_clear(&ibuf_main->w); 572 close(ibuf_main->fd); 573 free(ibuf_main); 574 575 control_shutdown(csock); 576 control_shutdown(rcsock); 577 log_info("session engine exiting"); 578 exit(0); 579 } 580 581 void 582 init_peer(struct peer *p) 583 { 584 TAILQ_INIT(&p->timers); 585 p->fd = p->wbuf.fd = -1; 586 587 if (p->conf.if_depend[0]) 588 imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1, 589 p->conf.if_depend, sizeof(p->conf.if_depend)); 590 else 591 p->depend_ok = 1; 592 593 peer_cnt++; 594 595 change_state(p, STATE_IDLE, EVNT_NONE); 596 if (p->conf.down) 597 timer_stop(p, Timer_IdleHold); /* no autostart */ 598 else 599 timer_set(p, Timer_IdleHold, 0); /* start ASAP */ 600 601 /* 602 * on startup, demote if requested. 603 * do not handle new peers. they must reach ESTABLISHED beforehands. 604 * peers added at runtime have reconf_action set to RECONF_REINIT. 605 */ 606 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 607 session_demote(p, +1); 608 } 609 610 void 611 bgp_fsm(struct peer *peer, enum session_events event) 612 { 613 switch (peer->state) { 614 case STATE_NONE: 615 /* nothing */ 616 break; 617 case STATE_IDLE: 618 switch (event) { 619 case EVNT_START: 620 timer_stop(peer, Timer_Hold); 621 timer_stop(peer, Timer_Keepalive); 622 timer_stop(peer, Timer_IdleHold); 623 624 /* allocate read buffer */ 625 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 626 if (peer->rbuf == NULL) 627 fatal(NULL); 628 629 /* init write buffer */ 630 msgbuf_init(&peer->wbuf); 631 632 /* init pfkey - remove old if any, load new ones */ 633 pfkey_remove(peer); 634 if (pfkey_establish(peer) == -1) { 635 log_peer_warnx(&peer->conf, 636 "pfkey setup failed"); 637 return; 638 } 639 640 peer->stats.last_sent_errcode = 0; 641 peer->stats.last_sent_suberr = 0; 642 643 if (!peer->depend_ok) 644 timer_stop(peer, Timer_ConnectRetry); 645 else if (peer->passive || peer->conf.passive || 646 peer->conf.template) { 647 change_state(peer, STATE_ACTIVE, event); 648 timer_stop(peer, Timer_ConnectRetry); 649 } else { 650 change_state(peer, STATE_CONNECT, event); 651 timer_set(peer, Timer_ConnectRetry, 652 conf->connectretry); 653 session_connect(peer); 654 } 655 peer->passive = 0; 656 break; 657 default: 658 /* ignore */ 659 break; 660 } 661 break; 662 case STATE_CONNECT: 663 switch (event) { 664 case EVNT_START: 665 /* ignore */ 666 break; 667 case EVNT_CON_OPEN: 668 session_tcp_established(peer); 669 session_open(peer); 670 timer_stop(peer, Timer_ConnectRetry); 671 peer->holdtime = INTERVAL_HOLD_INITIAL; 672 start_timer_holdtime(peer); 673 change_state(peer, STATE_OPENSENT, event); 674 break; 675 case EVNT_CON_OPENFAIL: 676 timer_set(peer, Timer_ConnectRetry, 677 conf->connectretry); 678 session_close_connection(peer); 679 change_state(peer, STATE_ACTIVE, event); 680 break; 681 case EVNT_TIMER_CONNRETRY: 682 timer_set(peer, Timer_ConnectRetry, 683 conf->connectretry); 684 session_connect(peer); 685 break; 686 default: 687 change_state(peer, STATE_IDLE, event); 688 break; 689 } 690 break; 691 case STATE_ACTIVE: 692 switch (event) { 693 case EVNT_START: 694 /* ignore */ 695 break; 696 case EVNT_CON_OPEN: 697 session_tcp_established(peer); 698 session_open(peer); 699 timer_stop(peer, Timer_ConnectRetry); 700 peer->holdtime = INTERVAL_HOLD_INITIAL; 701 start_timer_holdtime(peer); 702 change_state(peer, STATE_OPENSENT, event); 703 break; 704 case EVNT_CON_OPENFAIL: 705 timer_set(peer, Timer_ConnectRetry, 706 conf->connectretry); 707 session_close_connection(peer); 708 change_state(peer, STATE_ACTIVE, event); 709 break; 710 case EVNT_TIMER_CONNRETRY: 711 timer_set(peer, Timer_ConnectRetry, 712 peer->holdtime); 713 change_state(peer, STATE_CONNECT, event); 714 session_connect(peer); 715 break; 716 default: 717 change_state(peer, STATE_IDLE, event); 718 break; 719 } 720 break; 721 case STATE_OPENSENT: 722 switch (event) { 723 case EVNT_START: 724 /* ignore */ 725 break; 726 case EVNT_STOP: 727 change_state(peer, STATE_IDLE, event); 728 break; 729 case EVNT_CON_CLOSED: 730 session_close_connection(peer); 731 timer_set(peer, Timer_ConnectRetry, 732 conf->connectretry); 733 change_state(peer, STATE_ACTIVE, event); 734 break; 735 case EVNT_CON_FATAL: 736 change_state(peer, STATE_IDLE, event); 737 break; 738 case EVNT_TIMER_HOLDTIME: 739 session_notification(peer, ERR_HOLDTIMEREXPIRED, 740 0, NULL, 0); 741 change_state(peer, STATE_IDLE, event); 742 break; 743 case EVNT_RCVD_OPEN: 744 /* parse_open calls change_state itself on failure */ 745 if (parse_open(peer)) 746 break; 747 session_keepalive(peer); 748 change_state(peer, STATE_OPENCONFIRM, event); 749 break; 750 case EVNT_RCVD_NOTIFICATION: 751 if (parse_notification(peer)) { 752 change_state(peer, STATE_IDLE, event); 753 /* don't punish, capa negotiation */ 754 timer_set(peer, Timer_IdleHold, 0); 755 peer->IdleHoldTime /= 2; 756 } else 757 change_state(peer, STATE_IDLE, event); 758 break; 759 default: 760 session_notification(peer, 761 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); 762 change_state(peer, STATE_IDLE, event); 763 break; 764 } 765 break; 766 case STATE_OPENCONFIRM: 767 switch (event) { 768 case EVNT_START: 769 /* ignore */ 770 break; 771 case EVNT_STOP: 772 change_state(peer, STATE_IDLE, event); 773 break; 774 case EVNT_CON_CLOSED: 775 case EVNT_CON_FATAL: 776 change_state(peer, STATE_IDLE, event); 777 break; 778 case EVNT_TIMER_HOLDTIME: 779 session_notification(peer, ERR_HOLDTIMEREXPIRED, 780 0, NULL, 0); 781 change_state(peer, STATE_IDLE, event); 782 break; 783 case EVNT_TIMER_KEEPALIVE: 784 session_keepalive(peer); 785 break; 786 case EVNT_RCVD_KEEPALIVE: 787 start_timer_holdtime(peer); 788 change_state(peer, STATE_ESTABLISHED, event); 789 break; 790 case EVNT_RCVD_NOTIFICATION: 791 parse_notification(peer); 792 change_state(peer, STATE_IDLE, event); 793 break; 794 default: 795 session_notification(peer, 796 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); 797 change_state(peer, STATE_IDLE, event); 798 break; 799 } 800 break; 801 case STATE_ESTABLISHED: 802 switch (event) { 803 case EVNT_START: 804 /* ignore */ 805 break; 806 case EVNT_STOP: 807 change_state(peer, STATE_IDLE, event); 808 break; 809 case EVNT_CON_CLOSED: 810 case EVNT_CON_FATAL: 811 change_state(peer, STATE_IDLE, event); 812 break; 813 case EVNT_TIMER_HOLDTIME: 814 session_notification(peer, ERR_HOLDTIMEREXPIRED, 815 0, NULL, 0); 816 change_state(peer, STATE_IDLE, event); 817 break; 818 case EVNT_TIMER_KEEPALIVE: 819 session_keepalive(peer); 820 break; 821 case EVNT_RCVD_KEEPALIVE: 822 start_timer_holdtime(peer); 823 break; 824 case EVNT_RCVD_UPDATE: 825 start_timer_holdtime(peer); 826 if (parse_update(peer)) 827 change_state(peer, STATE_IDLE, event); 828 else 829 start_timer_holdtime(peer); 830 break; 831 case EVNT_RCVD_NOTIFICATION: 832 parse_notification(peer); 833 change_state(peer, STATE_IDLE, event); 834 break; 835 default: 836 session_notification(peer, 837 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); 838 change_state(peer, STATE_IDLE, event); 839 break; 840 } 841 break; 842 } 843 } 844 845 void 846 start_timer_holdtime(struct peer *peer) 847 { 848 if (peer->holdtime > 0) 849 timer_set(peer, Timer_Hold, peer->holdtime); 850 else 851 timer_stop(peer, Timer_Hold); 852 } 853 854 void 855 start_timer_keepalive(struct peer *peer) 856 { 857 if (peer->holdtime > 0) 858 timer_set(peer, Timer_Keepalive, peer->holdtime / 3); 859 else 860 timer_stop(peer, Timer_Keepalive); 861 } 862 863 void 864 session_close_connection(struct peer *peer) 865 { 866 if (peer->fd != -1) { 867 close(peer->fd); 868 pauseaccept = 0; 869 } 870 peer->fd = peer->wbuf.fd = -1; 871 } 872 873 void 874 change_state(struct peer *peer, enum session_state state, 875 enum session_events event) 876 { 877 struct mrt *mrt; 878 879 switch (state) { 880 case STATE_IDLE: 881 /* carp demotion first. new peers handled in init_peer */ 882 if (peer->state == STATE_ESTABLISHED && 883 peer->conf.demote_group[0] && !peer->demoted) 884 session_demote(peer, +1); 885 886 /* 887 * try to write out what's buffered (maybe a notification), 888 * don't bother if it fails 889 */ 890 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 891 msgbuf_write(&peer->wbuf); 892 893 /* 894 * we must start the timer for the next EVNT_START 895 * if we are coming here due to an error and the 896 * session was not established successfully before, the 897 * starttimerinterval needs to be exponentially increased 898 */ 899 if (peer->IdleHoldTime == 0) 900 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 901 peer->holdtime = INTERVAL_HOLD_INITIAL; 902 timer_stop(peer, Timer_ConnectRetry); 903 timer_stop(peer, Timer_Keepalive); 904 timer_stop(peer, Timer_Hold); 905 timer_stop(peer, Timer_IdleHold); 906 timer_stop(peer, Timer_IdleHoldReset); 907 session_close_connection(peer); 908 msgbuf_clear(&peer->wbuf); 909 free(peer->rbuf); 910 peer->rbuf = NULL; 911 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 912 913 if (event != EVNT_STOP) { 914 timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); 915 if (event != EVNT_NONE && 916 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 917 peer->IdleHoldTime *= 2; 918 } 919 if (peer->state == STATE_ESTABLISHED) { 920 if (peer->capa.neg.grestart.restart == 2 && 921 (event == EVNT_CON_CLOSED || 922 event == EVNT_CON_FATAL)) { 923 /* don't punish graceful restart */ 924 timer_set(peer, Timer_IdleHold, 0); 925 peer->IdleHoldTime /= 2; 926 session_graceful_restart(peer); 927 } else 928 session_down(peer); 929 } 930 if (peer->state == STATE_NONE || 931 peer->state == STATE_ESTABLISHED) { 932 /* initialize capability negotiation structures */ 933 memcpy(&peer->capa.ann, &peer->conf.capabilities, 934 sizeof(peer->capa.ann)); 935 if (!peer->conf.announce_capa) 936 session_capa_ann_none(peer); 937 } 938 break; 939 case STATE_CONNECT: 940 if (peer->state == STATE_ESTABLISHED && 941 peer->capa.neg.grestart.restart == 2) { 942 /* do the graceful restart dance */ 943 session_graceful_restart(peer); 944 peer->holdtime = INTERVAL_HOLD_INITIAL; 945 timer_stop(peer, Timer_ConnectRetry); 946 timer_stop(peer, Timer_Keepalive); 947 timer_stop(peer, Timer_Hold); 948 timer_stop(peer, Timer_IdleHold); 949 timer_stop(peer, Timer_IdleHoldReset); 950 session_close_connection(peer); 951 msgbuf_clear(&peer->wbuf); 952 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 953 } 954 break; 955 case STATE_ACTIVE: 956 break; 957 case STATE_OPENSENT: 958 break; 959 case STATE_OPENCONFIRM: 960 break; 961 case STATE_ESTABLISHED: 962 timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime); 963 if (peer->demoted) 964 timer_set(peer, Timer_CarpUndemote, 965 INTERVAL_HOLD_DEMOTED); 966 session_up(peer); 967 break; 968 default: /* something seriously fucked */ 969 break; 970 } 971 972 log_statechange(peer, state, event); 973 LIST_FOREACH(mrt, &mrthead, entry) { 974 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 975 continue; 976 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 977 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 978 mrt->group_id == peer->conf.groupid)) 979 mrt_dump_state(mrt, peer->state, state, peer); 980 } 981 peer->prev_state = peer->state; 982 peer->state = state; 983 } 984 985 void 986 session_accept(int listenfd) 987 { 988 int connfd; 989 int opt; 990 socklen_t len; 991 struct sockaddr_storage cliaddr; 992 struct peer *p = NULL; 993 994 len = sizeof(cliaddr); 995 if ((connfd = accept4(listenfd, 996 (struct sockaddr *)&cliaddr, &len, 997 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 998 if (errno == ENFILE || errno == EMFILE) 999 pauseaccept = getmonotime(); 1000 else if (errno != EWOULDBLOCK && errno != EINTR && 1001 errno != ECONNABORTED) 1002 log_warn("accept"); 1003 return; 1004 } 1005 1006 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr); 1007 1008 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 1009 if (timer_running(p, Timer_IdleHold, NULL)) { 1010 /* fast reconnect after clear */ 1011 p->passive = 1; 1012 bgp_fsm(p, EVNT_START); 1013 } 1014 } 1015 1016 if (p != NULL && 1017 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1018 if (p->fd != -1) { 1019 if (p->state == STATE_CONNECT) 1020 session_close_connection(p); 1021 else { 1022 close(connfd); 1023 return; 1024 } 1025 } 1026 1027 open: 1028 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1029 log_peer_warnx(&p->conf, 1030 "ipsec or md5sig configured but not available"); 1031 close(connfd); 1032 return; 1033 } 1034 1035 if (p->conf.auth.method == AUTH_MD5SIG) { 1036 if (sysdep.no_md5sig) { 1037 log_peer_warnx(&p->conf, 1038 "md5sig configured but not available"); 1039 close(connfd); 1040 return; 1041 } 1042 len = sizeof(opt); 1043 if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG, 1044 &opt, &len) == -1) 1045 fatal("getsockopt TCP_MD5SIG"); 1046 if (!opt) { /* non-md5'd connection! */ 1047 log_peer_warnx(&p->conf, 1048 "connection attempt without md5 signature"); 1049 close(connfd); 1050 return; 1051 } 1052 } 1053 p->fd = p->wbuf.fd = connfd; 1054 if (session_setup_socket(p)) { 1055 close(connfd); 1056 return; 1057 } 1058 bgp_fsm(p, EVNT_CON_OPEN); 1059 return; 1060 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1061 p->capa.neg.grestart.restart == 2) { 1062 /* first do the graceful restart dance */ 1063 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1064 /* then do part of the open dance */ 1065 goto open; 1066 } else { 1067 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len); 1068 close(connfd); 1069 } 1070 } 1071 1072 int 1073 session_connect(struct peer *peer) 1074 { 1075 int opt = 1; 1076 struct sockaddr *sa; 1077 socklen_t sa_len; 1078 1079 /* 1080 * we do not need the overcomplicated collision detection RFC 1771 1081 * describes; we simply make sure there is only ever one concurrent 1082 * tcp connection per peer. 1083 */ 1084 if (peer->fd != -1) 1085 return (-1); 1086 1087 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1088 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1089 log_peer_warn(&peer->conf, "session_connect socket"); 1090 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1091 return (-1); 1092 } 1093 1094 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1095 log_peer_warnx(&peer->conf, 1096 "ipsec or md5sig configured but not available"); 1097 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1098 return (-1); 1099 } 1100 1101 if (peer->conf.auth.method == AUTH_MD5SIG) { 1102 if (sysdep.no_md5sig) { 1103 log_peer_warnx(&peer->conf, 1104 "md5sig configured but not available"); 1105 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1106 return (-1); 1107 } 1108 if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG, 1109 &opt, sizeof(opt)) == -1) { 1110 log_peer_warn(&peer->conf, "setsockopt md5sig"); 1111 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1112 return (-1); 1113 } 1114 } 1115 peer->wbuf.fd = peer->fd; 1116 1117 /* if update source is set we need to bind() */ 1118 if ((sa = addr2sa(&peer->conf.local_addr, 0, &sa_len)) != NULL) { 1119 if (bind(peer->fd, sa, sa_len) == -1) { 1120 log_peer_warn(&peer->conf, "session_connect bind"); 1121 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1122 return (-1); 1123 } 1124 } 1125 1126 if (session_setup_socket(peer)) { 1127 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1128 return (-1); 1129 } 1130 1131 sa = addr2sa(&peer->conf.remote_addr, BGP_PORT, &sa_len); 1132 if (connect(peer->fd, sa, sa_len) == -1) { 1133 if (errno != EINPROGRESS) { 1134 if (errno != peer->lasterr) 1135 log_peer_warn(&peer->conf, "connect"); 1136 peer->lasterr = errno; 1137 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1138 return (-1); 1139 } 1140 } else 1141 bgp_fsm(peer, EVNT_CON_OPEN); 1142 1143 return (0); 1144 } 1145 1146 int 1147 session_setup_socket(struct peer *p) 1148 { 1149 int ttl = p->conf.distance; 1150 int pre = IPTOS_PREC_INTERNETCONTROL; 1151 int nodelay = 1; 1152 int bsize; 1153 1154 switch (p->conf.remote_addr.aid) { 1155 case AID_INET: 1156 /* set precedence, see RFC 1771 appendix 5 */ 1157 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1158 -1) { 1159 log_peer_warn(&p->conf, 1160 "session_setup_socket setsockopt TOS"); 1161 return (-1); 1162 } 1163 1164 if (p->conf.ebgp) { 1165 /* set TTL to foreign router's distance 1166 1=direct n=multihop with ttlsec, we always use 255 */ 1167 if (p->conf.ttlsec) { 1168 ttl = 256 - p->conf.distance; 1169 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1170 &ttl, sizeof(ttl)) == -1) { 1171 log_peer_warn(&p->conf, 1172 "session_setup_socket: " 1173 "setsockopt MINTTL"); 1174 return (-1); 1175 } 1176 ttl = 255; 1177 } 1178 1179 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1180 sizeof(ttl)) == -1) { 1181 log_peer_warn(&p->conf, 1182 "session_setup_socket setsockopt TTL"); 1183 return (-1); 1184 } 1185 } 1186 break; 1187 case AID_INET6: 1188 if (p->conf.ebgp) { 1189 /* set hoplimit to foreign router's distance 1190 1=direct n=multihop with ttlsec, we always use 255 */ 1191 if (p->conf.ttlsec) { 1192 ttl = 256 - p->conf.distance; 1193 if (setsockopt(p->fd, IPPROTO_IPV6, 1194 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1195 == -1) { 1196 log_peer_warn(&p->conf, 1197 "session_setup_socket: " 1198 "setsockopt MINHOPCOUNT"); 1199 return (-1); 1200 } 1201 ttl = 255; 1202 } 1203 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1204 &ttl, sizeof(ttl)) == -1) { 1205 log_peer_warn(&p->conf, 1206 "session_setup_socket setsockopt hoplimit"); 1207 return (-1); 1208 } 1209 } 1210 break; 1211 } 1212 1213 /* set TCP_NODELAY */ 1214 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1215 sizeof(nodelay)) == -1) { 1216 log_peer_warn(&p->conf, 1217 "session_setup_socket setsockopt TCP_NODELAY"); 1218 return (-1); 1219 } 1220 1221 /* only increase bufsize (and thus window) if md5 or ipsec is in use */ 1222 if (p->conf.auth.method != AUTH_NONE) { 1223 /* try to increase bufsize. no biggie if it fails */ 1224 bsize = 65535; 1225 while (bsize > 8192 && 1226 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, 1227 sizeof(bsize)) == -1 && errno != EINVAL) 1228 bsize /= 2; 1229 bsize = 65535; 1230 while (bsize > 8192 && 1231 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, 1232 sizeof(bsize)) == -1 && errno != EINVAL) 1233 bsize /= 2; 1234 } 1235 1236 return (0); 1237 } 1238 1239 void 1240 session_tcp_established(struct peer *peer) 1241 { 1242 struct sockaddr_storage ss; 1243 socklen_t len; 1244 1245 len = sizeof(ss); 1246 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1247 log_warn("getsockname"); 1248 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); 1249 len = sizeof(ss); 1250 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1251 log_warn("getpeername"); 1252 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); 1253 } 1254 1255 void 1256 session_capa_ann_none(struct peer *peer) 1257 { 1258 bzero(&peer->capa.ann, sizeof(peer->capa.ann)); 1259 } 1260 1261 int 1262 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len) 1263 { 1264 int errs = 0; 1265 1266 errs += ibuf_add(opb, &capa_code, sizeof(capa_code)); 1267 errs += ibuf_add(opb, &capa_len, sizeof(capa_len)); 1268 return (errs); 1269 } 1270 1271 int 1272 session_capa_add_mp(struct ibuf *buf, u_int8_t aid) 1273 { 1274 u_int8_t safi, pad = 0; 1275 u_int16_t afi; 1276 int errs = 0; 1277 1278 if (aid2afi(aid, &afi, &safi) == -1) 1279 fatalx("session_capa_add_mp: bad afi/safi pair"); 1280 afi = htons(afi); 1281 errs += ibuf_add(buf, &afi, sizeof(afi)); 1282 errs += ibuf_add(buf, &pad, sizeof(pad)); 1283 errs += ibuf_add(buf, &safi, sizeof(safi)); 1284 1285 return (errs); 1286 } 1287 1288 int 1289 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) 1290 { 1291 u_int errs = 0; 1292 u_int16_t afi; 1293 u_int8_t flags, safi; 1294 1295 if (aid2afi(aid, &afi, &safi)) { 1296 log_warn("session_capa_add_gr: bad AID"); 1297 return (1); 1298 } 1299 if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) 1300 flags = CAPA_GR_F_FLAG; 1301 else 1302 flags = 0; 1303 1304 afi = htons(afi); 1305 errs += ibuf_add(b, &afi, sizeof(afi)); 1306 errs += ibuf_add(b, &safi, sizeof(safi)); 1307 errs += ibuf_add(b, &flags, sizeof(flags)); 1308 1309 return (errs); 1310 } 1311 1312 struct bgp_msg * 1313 session_newmsg(enum msg_type msgtype, u_int16_t len) 1314 { 1315 struct bgp_msg *msg; 1316 struct msg_header hdr; 1317 struct ibuf *buf; 1318 int errs = 0; 1319 1320 memset(&hdr.marker, 0xff, sizeof(hdr.marker)); 1321 hdr.len = htons(len); 1322 hdr.type = msgtype; 1323 1324 if ((buf = ibuf_open(len)) == NULL) 1325 return (NULL); 1326 1327 errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker)); 1328 errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len)); 1329 errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type)); 1330 1331 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1332 ibuf_free(buf); 1333 return (NULL); 1334 } 1335 1336 msg->buf = buf; 1337 msg->type = msgtype; 1338 msg->len = len; 1339 1340 return (msg); 1341 } 1342 1343 int 1344 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1345 { 1346 struct mrt *mrt; 1347 1348 LIST_FOREACH(mrt, &mrthead, entry) { 1349 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1350 mrt->type == MRT_UPDATE_OUT))) 1351 continue; 1352 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1353 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1354 mrt->group_id == p->conf.groupid)) 1355 mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p); 1356 } 1357 1358 ibuf_close(&p->wbuf, msg->buf); 1359 if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) { 1360 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) 1361 log_peer_warn(&p->conf, "imsg_compose XOFF"); 1362 else 1363 p->throttled = 1; 1364 } 1365 1366 free(msg); 1367 return (0); 1368 } 1369 1370 void 1371 session_open(struct peer *p) 1372 { 1373 struct bgp_msg *buf; 1374 struct ibuf *opb; 1375 struct msg_open msg; 1376 u_int16_t len; 1377 u_int8_t i, op_type, optparamlen = 0; 1378 int errs = 0; 1379 int mpcapa = 0; 1380 1381 1382 if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - 1383 sizeof(optparamlen))) == NULL) { 1384 bgp_fsm(p, EVNT_CON_FATAL); 1385 return; 1386 } 1387 1388 /* multiprotocol extensions, RFC 4760 */ 1389 for (i = 0; i < AID_MAX; i++) 1390 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1391 errs += session_capa_add(opb, CAPA_MP, 4); 1392 errs += session_capa_add_mp(opb, i); 1393 mpcapa++; 1394 } 1395 1396 /* route refresh, RFC 2918 */ 1397 if (p->capa.ann.refresh) /* no data */ 1398 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1399 1400 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1401 if (p->capa.ann.grestart.restart) { 1402 int rst = 0; 1403 u_int16_t hdr; 1404 u_int8_t grlen; 1405 1406 if (mpcapa) { 1407 grlen = 2 + 4 * mpcapa; 1408 for (i = 0; i < AID_MAX; i++) { 1409 if (p->capa.neg.grestart.flags[i] & 1410 CAPA_GR_RESTARTING) 1411 rst++; 1412 } 1413 } else { /* AID_INET */ 1414 grlen = 2 + 4; 1415 if (p->capa.neg.grestart.flags[AID_INET] & 1416 CAPA_GR_RESTARTING) 1417 rst++; 1418 } 1419 1420 hdr = conf->holdtime; /* default timeout */ 1421 /* if client does graceful restart don't set R flag */ 1422 if (!rst) 1423 hdr |= CAPA_GR_R_FLAG; 1424 hdr = htons(hdr); 1425 1426 errs += session_capa_add(opb, CAPA_RESTART, grlen); 1427 errs += ibuf_add(opb, &hdr, sizeof(hdr)); 1428 1429 if (mpcapa) { 1430 for (i = 0; i < AID_MAX; i++) { 1431 if (p->capa.ann.mp[i]) { 1432 errs += session_capa_add_gr(p, opb, i); 1433 } 1434 } 1435 } else { /* AID_INET */ 1436 errs += session_capa_add_gr(p, opb, AID_INET); 1437 } 1438 } 1439 1440 /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ 1441 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1442 u_int32_t nas; 1443 1444 nas = htonl(p->conf.local_as); 1445 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); 1446 errs += ibuf_add(opb, &nas, sizeof(nas)); 1447 } 1448 1449 if (ibuf_size(opb)) 1450 optparamlen = ibuf_size(opb) + sizeof(op_type) + 1451 sizeof(optparamlen); 1452 1453 len = MSGSIZE_OPEN_MIN + optparamlen; 1454 if (errs || (buf = session_newmsg(OPEN, len)) == NULL) { 1455 ibuf_free(opb); 1456 bgp_fsm(p, EVNT_CON_FATAL); 1457 return; 1458 } 1459 1460 msg.version = 4; 1461 msg.myas = htons(p->conf.local_short_as); 1462 if (p->conf.holdtime) 1463 msg.holdtime = htons(p->conf.holdtime); 1464 else 1465 msg.holdtime = htons(conf->holdtime); 1466 msg.bgpid = conf->bgpid; /* is already in network byte order */ 1467 msg.optparamlen = optparamlen; 1468 1469 errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version)); 1470 errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas)); 1471 errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); 1472 errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); 1473 errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen)); 1474 1475 if (optparamlen) { 1476 op_type = OPT_PARAM_CAPABILITIES; 1477 optparamlen = ibuf_size(opb); 1478 errs += ibuf_add(buf->buf, &op_type, sizeof(op_type)); 1479 errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen)); 1480 errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); 1481 } 1482 1483 ibuf_free(opb); 1484 1485 if (errs) { 1486 ibuf_free(buf->buf); 1487 free(buf); 1488 bgp_fsm(p, EVNT_CON_FATAL); 1489 return; 1490 } 1491 1492 if (session_sendmsg(buf, p) == -1) { 1493 bgp_fsm(p, EVNT_CON_FATAL); 1494 return; 1495 } 1496 1497 p->stats.msg_sent_open++; 1498 } 1499 1500 void 1501 session_keepalive(struct peer *p) 1502 { 1503 struct bgp_msg *buf; 1504 1505 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1506 session_sendmsg(buf, p) == -1) { 1507 bgp_fsm(p, EVNT_CON_FATAL); 1508 return; 1509 } 1510 1511 start_timer_keepalive(p); 1512 p->stats.msg_sent_keepalive++; 1513 } 1514 1515 void 1516 session_update(u_int32_t peerid, void *data, size_t datalen) 1517 { 1518 struct peer *p; 1519 struct bgp_msg *buf; 1520 1521 if ((p = getpeerbyid(conf, peerid)) == NULL) { 1522 log_warnx("no such peer: id=%u", peerid); 1523 return; 1524 } 1525 1526 if (p->state != STATE_ESTABLISHED) 1527 return; 1528 1529 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) { 1530 bgp_fsm(p, EVNT_CON_FATAL); 1531 return; 1532 } 1533 1534 if (ibuf_add(buf->buf, data, datalen)) { 1535 ibuf_free(buf->buf); 1536 free(buf); 1537 bgp_fsm(p, EVNT_CON_FATAL); 1538 return; 1539 } 1540 1541 if (session_sendmsg(buf, p) == -1) { 1542 bgp_fsm(p, EVNT_CON_FATAL); 1543 return; 1544 } 1545 1546 start_timer_keepalive(p); 1547 p->stats.msg_sent_update++; 1548 } 1549 1550 void 1551 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode, 1552 void *data, ssize_t datalen) 1553 { 1554 struct bgp_msg *buf; 1555 int errs = 0; 1556 1557 if (p->stats.last_sent_errcode) /* some notification already sent */ 1558 return; 1559 1560 log_notification(p, errcode, subcode, data, datalen, "sending"); 1561 1562 if ((buf = session_newmsg(NOTIFICATION, 1563 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1564 bgp_fsm(p, EVNT_CON_FATAL); 1565 return; 1566 } 1567 1568 errs += ibuf_add(buf->buf, &errcode, sizeof(errcode)); 1569 errs += ibuf_add(buf->buf, &subcode, sizeof(subcode)); 1570 1571 if (datalen > 0) 1572 errs += ibuf_add(buf->buf, data, datalen); 1573 1574 if (errs) { 1575 ibuf_free(buf->buf); 1576 free(buf); 1577 bgp_fsm(p, EVNT_CON_FATAL); 1578 return; 1579 } 1580 1581 if (session_sendmsg(buf, p) == -1) { 1582 bgp_fsm(p, EVNT_CON_FATAL); 1583 return; 1584 } 1585 1586 p->stats.msg_sent_notification++; 1587 p->stats.last_sent_errcode = errcode; 1588 p->stats.last_sent_suberr = subcode; 1589 } 1590 1591 int 1592 session_neighbor_rrefresh(struct peer *p) 1593 { 1594 u_int8_t i; 1595 1596 if (!p->capa.peer.refresh) 1597 return (-1); 1598 1599 for (i = 0; i < AID_MAX; i++) { 1600 if (p->capa.peer.mp[i] != 0) 1601 session_rrefresh(p, i); 1602 } 1603 1604 return (0); 1605 } 1606 1607 void 1608 session_rrefresh(struct peer *p, u_int8_t aid) 1609 { 1610 struct bgp_msg *buf; 1611 int errs = 0; 1612 u_int16_t afi; 1613 u_int8_t safi, null8 = 0; 1614 1615 if (aid2afi(aid, &afi, &safi) == -1) 1616 fatalx("session_rrefresh: bad afi/safi pair"); 1617 1618 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1619 bgp_fsm(p, EVNT_CON_FATAL); 1620 return; 1621 } 1622 1623 afi = htons(afi); 1624 errs += ibuf_add(buf->buf, &afi, sizeof(afi)); 1625 errs += ibuf_add(buf->buf, &null8, sizeof(null8)); 1626 errs += ibuf_add(buf->buf, &safi, sizeof(safi)); 1627 1628 if (errs) { 1629 ibuf_free(buf->buf); 1630 free(buf); 1631 bgp_fsm(p, EVNT_CON_FATAL); 1632 return; 1633 } 1634 1635 if (session_sendmsg(buf, p) == -1) { 1636 bgp_fsm(p, EVNT_CON_FATAL); 1637 return; 1638 } 1639 1640 p->stats.msg_sent_rrefresh++; 1641 } 1642 1643 int 1644 session_graceful_restart(struct peer *p) 1645 { 1646 u_int8_t i; 1647 1648 timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); 1649 1650 for (i = 0; i < AID_MAX; i++) { 1651 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1652 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, 1653 &i, sizeof(i)) == -1) 1654 return (-1); 1655 log_peer_warnx(&p->conf, 1656 "graceful restart of %s, keeping routes", 1657 aid2str(i)); 1658 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1659 } else if (p->capa.neg.mp[i]) { 1660 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1661 &i, sizeof(i)) == -1) 1662 return (-1); 1663 log_peer_warnx(&p->conf, 1664 "graceful restart of %s, flushing routes", 1665 aid2str(i)); 1666 } 1667 } 1668 return (0); 1669 } 1670 1671 int 1672 session_graceful_stop(struct peer *p) 1673 { 1674 u_int8_t i; 1675 1676 for (i = 0; i < AID_MAX; i++) { 1677 /* 1678 * Only flush if the peer is restarting and the timeout fired. 1679 * In all other cases the session was already flushed when the 1680 * session went down or when the new open message was parsed. 1681 */ 1682 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1683 log_peer_warnx(&p->conf, "graceful restart of %s, " 1684 "time-out, flushing", aid2str(i)); 1685 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1686 &i, sizeof(i)) == -1) 1687 return (-1); 1688 } 1689 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1690 } 1691 return (0); 1692 } 1693 1694 int 1695 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1696 { 1697 ssize_t n; 1698 socklen_t len; 1699 int error; 1700 1701 if (p->state == STATE_CONNECT) { 1702 if (pfd->revents & POLLOUT) { 1703 if (pfd->revents & POLLIN) { 1704 /* error occurred */ 1705 len = sizeof(error); 1706 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1707 &error, &len) == -1 || error) { 1708 if (error) 1709 errno = error; 1710 if (errno != p->lasterr) { 1711 log_peer_warn(&p->conf, 1712 "socket error"); 1713 p->lasterr = errno; 1714 } 1715 bgp_fsm(p, EVNT_CON_OPENFAIL); 1716 return (1); 1717 } 1718 } 1719 bgp_fsm(p, EVNT_CON_OPEN); 1720 return (1); 1721 } 1722 if (pfd->revents & POLLHUP) { 1723 bgp_fsm(p, EVNT_CON_OPENFAIL); 1724 return (1); 1725 } 1726 if (pfd->revents & (POLLERR|POLLNVAL)) { 1727 bgp_fsm(p, EVNT_CON_FATAL); 1728 return (1); 1729 } 1730 return (0); 1731 } 1732 1733 if (pfd->revents & POLLHUP) { 1734 bgp_fsm(p, EVNT_CON_CLOSED); 1735 return (1); 1736 } 1737 if (pfd->revents & (POLLERR|POLLNVAL)) { 1738 bgp_fsm(p, EVNT_CON_FATAL); 1739 return (1); 1740 } 1741 1742 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1743 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1744 if (error == 0) 1745 log_peer_warnx(&p->conf, "Connection closed"); 1746 else if (error == -1) 1747 log_peer_warn(&p->conf, "write error"); 1748 bgp_fsm(p, EVNT_CON_FATAL); 1749 return (1); 1750 } 1751 if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) { 1752 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) 1753 log_peer_warn(&p->conf, "imsg_compose XON"); 1754 else 1755 p->throttled = 0; 1756 } 1757 if (!(pfd->revents & POLLIN)) 1758 return (1); 1759 } 1760 1761 if (p->rbuf && pfd->revents & POLLIN) { 1762 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1763 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1764 if (errno != EINTR && errno != EAGAIN) { 1765 log_peer_warn(&p->conf, "read error"); 1766 bgp_fsm(p, EVNT_CON_FATAL); 1767 } 1768 return (1); 1769 } 1770 if (n == 0) { /* connection closed */ 1771 bgp_fsm(p, EVNT_CON_CLOSED); 1772 return (1); 1773 } 1774 1775 p->rbuf->wpos += n; 1776 p->stats.last_read = time(NULL); 1777 return (1); 1778 } 1779 return (0); 1780 } 1781 1782 void 1783 session_process_msg(struct peer *p) 1784 { 1785 struct mrt *mrt; 1786 ssize_t rpos, av, left; 1787 int processed = 0; 1788 u_int16_t msglen; 1789 u_int8_t msgtype; 1790 1791 rpos = 0; 1792 av = p->rbuf->wpos; 1793 p->rpending = 0; 1794 1795 /* 1796 * session might drop to IDLE -> buffers deallocated 1797 * we MUST check rbuf != NULL before use 1798 */ 1799 for (;;) { 1800 if (p->rbuf == NULL) 1801 return; 1802 if (rpos + MSGSIZE_HEADER > av) 1803 break; 1804 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 1805 &msgtype) == -1) 1806 return; 1807 if (rpos + msglen > av) 1808 break; 1809 p->rbuf->rptr = p->rbuf->buf + rpos; 1810 1811 /* dump to MRT as soon as we have a full packet */ 1812 LIST_FOREACH(mrt, &mrthead, entry) { 1813 if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE && 1814 mrt->type == MRT_UPDATE_IN))) 1815 continue; 1816 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1817 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1818 mrt->group_id == p->conf.groupid)) 1819 mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p); 1820 } 1821 1822 switch (msgtype) { 1823 case OPEN: 1824 bgp_fsm(p, EVNT_RCVD_OPEN); 1825 p->stats.msg_rcvd_open++; 1826 break; 1827 case UPDATE: 1828 bgp_fsm(p, EVNT_RCVD_UPDATE); 1829 p->stats.msg_rcvd_update++; 1830 break; 1831 case NOTIFICATION: 1832 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 1833 p->stats.msg_rcvd_notification++; 1834 break; 1835 case KEEPALIVE: 1836 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 1837 p->stats.msg_rcvd_keepalive++; 1838 break; 1839 case RREFRESH: 1840 parse_refresh(p); 1841 p->stats.msg_rcvd_rrefresh++; 1842 break; 1843 default: /* cannot happen */ 1844 session_notification(p, ERR_HEADER, ERR_HDR_TYPE, 1845 &msgtype, 1); 1846 log_warnx("received message with unknown type %u", 1847 msgtype); 1848 bgp_fsm(p, EVNT_CON_FATAL); 1849 } 1850 rpos += msglen; 1851 if (++processed > MSG_PROCESS_LIMIT) { 1852 p->rpending = 1; 1853 break; 1854 } 1855 } 1856 1857 if (rpos < av) { 1858 left = av - rpos; 1859 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 1860 p->rbuf->wpos = left; 1861 } else 1862 p->rbuf->wpos = 0; 1863 } 1864 1865 int 1866 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type) 1867 { 1868 u_char *p; 1869 u_int16_t olen; 1870 static const u_int8_t marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff, 1871 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 1872 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 1873 1874 /* caller MUST make sure we are getting 19 bytes! */ 1875 p = data; 1876 if (memcmp(p, marker, sizeof(marker))) { 1877 log_peer_warnx(&peer->conf, "sync error"); 1878 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0); 1879 bgp_fsm(peer, EVNT_CON_FATAL); 1880 return (-1); 1881 } 1882 p += MSGSIZE_HEADER_MARKER; 1883 1884 memcpy(&olen, p, 2); 1885 *len = ntohs(olen); 1886 p += 2; 1887 memcpy(type, p, 1); 1888 1889 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 1890 log_peer_warnx(&peer->conf, 1891 "received message: illegal length: %u byte", *len); 1892 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1893 &olen, sizeof(olen)); 1894 bgp_fsm(peer, EVNT_CON_FATAL); 1895 return (-1); 1896 } 1897 1898 switch (*type) { 1899 case OPEN: 1900 if (*len < MSGSIZE_OPEN_MIN) { 1901 log_peer_warnx(&peer->conf, 1902 "received OPEN: illegal len: %u byte", *len); 1903 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1904 &olen, sizeof(olen)); 1905 bgp_fsm(peer, EVNT_CON_FATAL); 1906 return (-1); 1907 } 1908 break; 1909 case NOTIFICATION: 1910 if (*len < MSGSIZE_NOTIFICATION_MIN) { 1911 log_peer_warnx(&peer->conf, 1912 "received NOTIFICATION: illegal len: %u byte", 1913 *len); 1914 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1915 &olen, sizeof(olen)); 1916 bgp_fsm(peer, EVNT_CON_FATAL); 1917 return (-1); 1918 } 1919 break; 1920 case UPDATE: 1921 if (*len < MSGSIZE_UPDATE_MIN) { 1922 log_peer_warnx(&peer->conf, 1923 "received UPDATE: illegal len: %u byte", *len); 1924 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1925 &olen, sizeof(olen)); 1926 bgp_fsm(peer, EVNT_CON_FATAL); 1927 return (-1); 1928 } 1929 break; 1930 case KEEPALIVE: 1931 if (*len != MSGSIZE_KEEPALIVE) { 1932 log_peer_warnx(&peer->conf, 1933 "received KEEPALIVE: illegal len: %u byte", *len); 1934 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1935 &olen, sizeof(olen)); 1936 bgp_fsm(peer, EVNT_CON_FATAL); 1937 return (-1); 1938 } 1939 break; 1940 case RREFRESH: 1941 if (*len != MSGSIZE_RREFRESH) { 1942 log_peer_warnx(&peer->conf, 1943 "received RREFRESH: illegal len: %u byte", *len); 1944 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1945 &olen, sizeof(olen)); 1946 bgp_fsm(peer, EVNT_CON_FATAL); 1947 return (-1); 1948 } 1949 break; 1950 default: 1951 log_peer_warnx(&peer->conf, 1952 "received msg with unknown type %u", *type); 1953 session_notification(peer, ERR_HEADER, ERR_HDR_TYPE, 1954 type, 1); 1955 bgp_fsm(peer, EVNT_CON_FATAL); 1956 return (-1); 1957 } 1958 return (0); 1959 } 1960 1961 int 1962 parse_open(struct peer *peer) 1963 { 1964 u_char *p, *op_val; 1965 u_int8_t version, rversion; 1966 u_int16_t short_as, msglen; 1967 u_int16_t holdtime, oholdtime, myholdtime; 1968 u_int32_t as, bgpid; 1969 u_int8_t optparamlen, plen; 1970 u_int8_t op_type, op_len; 1971 1972 p = peer->rbuf->rptr; 1973 p += MSGSIZE_HEADER_MARKER; 1974 memcpy(&msglen, p, sizeof(msglen)); 1975 msglen = ntohs(msglen); 1976 1977 p = peer->rbuf->rptr; 1978 p += MSGSIZE_HEADER; /* header is already checked */ 1979 1980 memcpy(&version, p, sizeof(version)); 1981 p += sizeof(version); 1982 1983 if (version != BGP_VERSION) { 1984 log_peer_warnx(&peer->conf, 1985 "peer wants unrecognized version %u", version); 1986 if (version > BGP_VERSION) 1987 rversion = version - BGP_VERSION; 1988 else 1989 rversion = BGP_VERSION; 1990 session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION, 1991 &rversion, sizeof(rversion)); 1992 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 1993 return (-1); 1994 } 1995 1996 memcpy(&short_as, p, sizeof(short_as)); 1997 p += sizeof(short_as); 1998 as = peer->short_as = ntohs(short_as); 1999 if (as == 0) { 2000 log_peer_warnx(&peer->conf, 2001 "peer requests unacceptable AS %u", as); 2002 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, 2003 NULL, 0); 2004 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2005 return (-1); 2006 } 2007 2008 memcpy(&oholdtime, p, sizeof(oholdtime)); 2009 p += sizeof(oholdtime); 2010 2011 holdtime = ntohs(oholdtime); 2012 if (holdtime && holdtime < peer->conf.min_holdtime) { 2013 log_peer_warnx(&peer->conf, 2014 "peer requests unacceptable holdtime %u", holdtime); 2015 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, 2016 NULL, 0); 2017 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2018 return (-1); 2019 } 2020 2021 myholdtime = peer->conf.holdtime; 2022 if (!myholdtime) 2023 myholdtime = conf->holdtime; 2024 if (holdtime < myholdtime) 2025 peer->holdtime = holdtime; 2026 else 2027 peer->holdtime = myholdtime; 2028 2029 memcpy(&bgpid, p, sizeof(bgpid)); 2030 p += sizeof(bgpid); 2031 2032 /* check bgpid for validity - just disallow 0 */ 2033 if (ntohl(bgpid) == 0) { 2034 log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable", 2035 ntohl(bgpid)); 2036 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2037 NULL, 0); 2038 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2039 return (-1); 2040 } 2041 peer->remote_bgpid = bgpid; 2042 2043 memcpy(&optparamlen, p, sizeof(optparamlen)); 2044 p += sizeof(optparamlen); 2045 2046 if (optparamlen != msglen - MSGSIZE_OPEN_MIN) { 2047 log_peer_warnx(&peer->conf, 2048 "corrupt OPEN message received: length mismatch"); 2049 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2050 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2051 return (-1); 2052 } 2053 2054 plen = optparamlen; 2055 while (plen > 0) { 2056 if (plen < 2) { 2057 log_peer_warnx(&peer->conf, 2058 "corrupt OPEN message received, len wrong"); 2059 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2060 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2061 return (-1); 2062 } 2063 memcpy(&op_type, p, sizeof(op_type)); 2064 p += sizeof(op_type); 2065 plen -= sizeof(op_type); 2066 memcpy(&op_len, p, sizeof(op_len)); 2067 p += sizeof(op_len); 2068 plen -= sizeof(op_len); 2069 if (op_len > 0) { 2070 if (plen < op_len) { 2071 log_peer_warnx(&peer->conf, 2072 "corrupt OPEN message received, len wrong"); 2073 session_notification(peer, ERR_OPEN, 0, 2074 NULL, 0); 2075 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2076 return (-1); 2077 } 2078 op_val = p; 2079 p += op_len; 2080 plen -= op_len; 2081 } else 2082 op_val = NULL; 2083 2084 switch (op_type) { 2085 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2086 if (parse_capabilities(peer, op_val, op_len, 2087 &as) == -1) { 2088 session_notification(peer, ERR_OPEN, 0, 2089 NULL, 0); 2090 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2091 return (-1); 2092 } 2093 break; 2094 case OPT_PARAM_AUTH: /* deprecated */ 2095 default: 2096 /* 2097 * unsupported type 2098 * the RFCs tell us to leave the data section empty 2099 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT. 2100 * How the peer should know _which_ optional parameter 2101 * we don't support is beyond me. 2102 */ 2103 log_peer_warnx(&peer->conf, 2104 "received OPEN message with unsupported optional " 2105 "parameter: type %u", op_type); 2106 session_notification(peer, ERR_OPEN, ERR_OPEN_OPT, 2107 NULL, 0); 2108 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2109 timer_set(peer, Timer_IdleHold, 0); /* no punish */ 2110 peer->IdleHoldTime /= 2; 2111 return (-1); 2112 } 2113 } 2114 2115 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2116 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2117 peer->conf.remote_as = as; 2118 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); 2119 if (!peer->conf.ebgp) 2120 /* force enforce_as off for iBGP sessions */ 2121 peer->conf.enforce_as = ENFORCE_AS_OFF; 2122 } 2123 2124 if (peer->conf.remote_as != as) { 2125 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2126 log_as(as)); 2127 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0); 2128 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2129 return (-1); 2130 } 2131 2132 if (capa_neg_calc(peer) == -1) { 2133 log_peer_warnx(&peer->conf, 2134 "capability negotiation calculation failed"); 2135 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2136 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2137 return (-1); 2138 } 2139 2140 return (0); 2141 } 2142 2143 int 2144 parse_update(struct peer *peer) 2145 { 2146 u_char *p; 2147 u_int16_t datalen; 2148 2149 /* 2150 * we pass the message verbatim to the rde. 2151 * in case of errors the whole session is reset with a 2152 * notification anyway, we only need to know the peer 2153 */ 2154 p = peer->rbuf->rptr; 2155 p += MSGSIZE_HEADER_MARKER; 2156 memcpy(&datalen, p, sizeof(datalen)); 2157 datalen = ntohs(datalen); 2158 2159 p = peer->rbuf->rptr; 2160 p += MSGSIZE_HEADER; /* header is already checked */ 2161 datalen -= MSGSIZE_HEADER; 2162 2163 if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1) 2164 return (-1); 2165 2166 return (0); 2167 } 2168 2169 int 2170 parse_refresh(struct peer *peer) 2171 { 2172 u_char *p; 2173 u_int16_t afi; 2174 u_int8_t aid, safi; 2175 2176 p = peer->rbuf->rptr; 2177 p += MSGSIZE_HEADER; /* header is already checked */ 2178 2179 /* 2180 * We could check if we actually announced the capability but 2181 * as long as the message is correctly encoded we don't care. 2182 */ 2183 2184 /* afi, 2 byte */ 2185 memcpy(&afi, p, sizeof(afi)); 2186 afi = ntohs(afi); 2187 p += 2; 2188 /* reserved, 1 byte */ 2189 p += 1; 2190 /* safi, 1 byte */ 2191 memcpy(&safi, p, sizeof(safi)); 2192 2193 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2194 if (afi2aid(afi, safi, &aid) == -1) { 2195 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2196 "invalid afi/safi pair"); 2197 return (0); 2198 } 2199 2200 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &aid, sizeof(aid)) == -1) 2201 return (-1); 2202 2203 return (0); 2204 } 2205 2206 int 2207 parse_notification(struct peer *peer) 2208 { 2209 u_char *p; 2210 u_int16_t datalen; 2211 u_int8_t errcode; 2212 u_int8_t subcode; 2213 u_int8_t capa_code; 2214 u_int8_t capa_len; 2215 size_t shutcomm_len; 2216 u_int8_t i; 2217 2218 /* just log */ 2219 p = peer->rbuf->rptr; 2220 p += MSGSIZE_HEADER_MARKER; 2221 memcpy(&datalen, p, sizeof(datalen)); 2222 datalen = ntohs(datalen); 2223 2224 p = peer->rbuf->rptr; 2225 p += MSGSIZE_HEADER; /* header is already checked */ 2226 datalen -= MSGSIZE_HEADER; 2227 2228 memcpy(&errcode, p, sizeof(errcode)); 2229 p += sizeof(errcode); 2230 datalen -= sizeof(errcode); 2231 2232 memcpy(&subcode, p, sizeof(subcode)); 2233 p += sizeof(subcode); 2234 datalen -= sizeof(subcode); 2235 2236 log_notification(peer, errcode, subcode, p, datalen, "received"); 2237 peer->errcnt++; 2238 2239 if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { 2240 if (datalen == 0) { /* zebra likes to send those.. humbug */ 2241 log_peer_warnx(&peer->conf, "received \"unsupported " 2242 "capability\" notification without data part, " 2243 "disabling capability announcements altogether"); 2244 session_capa_ann_none(peer); 2245 } 2246 2247 while (datalen > 0) { 2248 if (datalen < 2) { 2249 log_peer_warnx(&peer->conf, 2250 "parse_notification: " 2251 "expect len >= 2, len is %u", datalen); 2252 return (-1); 2253 } 2254 memcpy(&capa_code, p, sizeof(capa_code)); 2255 p += sizeof(capa_code); 2256 datalen -= sizeof(capa_code); 2257 memcpy(&capa_len, p, sizeof(capa_len)); 2258 p += sizeof(capa_len); 2259 datalen -= sizeof(capa_len); 2260 if (datalen < capa_len) { 2261 log_peer_warnx(&peer->conf, 2262 "parse_notification: capa_len %u exceeds " 2263 "remaining msg length %u", capa_len, 2264 datalen); 2265 return (-1); 2266 } 2267 p += capa_len; 2268 datalen -= capa_len; 2269 switch (capa_code) { 2270 case CAPA_MP: 2271 for (i = 0; i < AID_MAX; i++) 2272 peer->capa.ann.mp[i] = 0; 2273 log_peer_warnx(&peer->conf, 2274 "disabling multiprotocol capability"); 2275 break; 2276 case CAPA_REFRESH: 2277 peer->capa.ann.refresh = 0; 2278 log_peer_warnx(&peer->conf, 2279 "disabling route refresh capability"); 2280 break; 2281 case CAPA_RESTART: 2282 peer->capa.ann.grestart.restart = 0; 2283 log_peer_warnx(&peer->conf, 2284 "disabling restart capability"); 2285 break; 2286 case CAPA_AS4BYTE: 2287 peer->capa.ann.as4byte = 0; 2288 log_peer_warnx(&peer->conf, 2289 "disabling 4-byte AS num capability"); 2290 break; 2291 default: /* should not happen... */ 2292 log_peer_warnx(&peer->conf, "received " 2293 "\"unsupported capability\" notification " 2294 "for unknown capability %u, disabling " 2295 "capability announcements altogether", 2296 capa_code); 2297 session_capa_ann_none(peer); 2298 break; 2299 } 2300 } 2301 2302 return (1); 2303 } 2304 2305 if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) { 2306 session_capa_ann_none(peer); 2307 return (1); 2308 } 2309 2310 if (errcode == ERR_CEASE && 2311 (subcode == ERR_CEASE_ADMIN_DOWN || 2312 subcode == ERR_CEASE_ADMIN_RESET)) { 2313 if (datalen > 1) { 2314 shutcomm_len = *p++; 2315 datalen--; 2316 if(datalen < shutcomm_len) { 2317 log_peer_warnx(&peer->conf, 2318 "received truncated shutdown reason"); 2319 return (0); 2320 } 2321 if (shutcomm_len > SHUT_COMM_LEN - 1) { 2322 log_peer_warnx(&peer->conf, 2323 "received overly long shutdown reason"); 2324 return (0); 2325 } 2326 memcpy(peer->stats.last_shutcomm, p, shutcomm_len); 2327 peer->stats.last_shutcomm[shutcomm_len] = '\0'; 2328 log_peer_warnx(&peer->conf, 2329 "received shutdown reason: \"%s\"", 2330 log_shutcomm(peer->stats.last_shutcomm)); 2331 p += shutcomm_len; 2332 datalen -= shutcomm_len; 2333 } 2334 } 2335 2336 return (0); 2337 } 2338 2339 int 2340 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) 2341 { 2342 u_char *capa_val; 2343 u_int32_t remote_as; 2344 u_int16_t len; 2345 u_int16_t afi; 2346 u_int16_t gr_header; 2347 u_int8_t safi; 2348 u_int8_t aid; 2349 u_int8_t gr_flags; 2350 u_int8_t capa_code; 2351 u_int8_t capa_len; 2352 u_int8_t i; 2353 2354 len = dlen; 2355 while (len > 0) { 2356 if (len < 2) { 2357 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2358 "length: %u, too short", len); 2359 return (-1); 2360 } 2361 memcpy(&capa_code, d, sizeof(capa_code)); 2362 d += sizeof(capa_code); 2363 len -= sizeof(capa_code); 2364 memcpy(&capa_len, d, sizeof(capa_len)); 2365 d += sizeof(capa_len); 2366 len -= sizeof(capa_len); 2367 if (capa_len > 0) { 2368 if (len < capa_len) { 2369 log_peer_warnx(&peer->conf, 2370 "Bad capabilities attr length: " 2371 "len %u smaller than capa_len %u", 2372 len, capa_len); 2373 return (-1); 2374 } 2375 capa_val = d; 2376 d += capa_len; 2377 len -= capa_len; 2378 } else 2379 capa_val = NULL; 2380 2381 switch (capa_code) { 2382 case CAPA_MP: /* RFC 4760 */ 2383 if (capa_len != 4) { 2384 log_peer_warnx(&peer->conf, 2385 "Bad multi protocol capability length: " 2386 "%u", capa_len); 2387 break; 2388 } 2389 memcpy(&afi, capa_val, sizeof(afi)); 2390 afi = ntohs(afi); 2391 memcpy(&safi, capa_val + 3, sizeof(safi)); 2392 if (afi2aid(afi, safi, &aid) == -1) { 2393 log_peer_warnx(&peer->conf, 2394 "Received multi protocol capability: " 2395 " unknown AFI %u, safi %u pair", 2396 afi, safi); 2397 break; 2398 } 2399 peer->capa.peer.mp[aid] = 1; 2400 break; 2401 case CAPA_REFRESH: 2402 peer->capa.peer.refresh = 1; 2403 break; 2404 case CAPA_RESTART: 2405 if (capa_len == 2) { 2406 /* peer only supports EoR marker */ 2407 peer->capa.peer.grestart.restart = 1; 2408 peer->capa.peer.grestart.timeout = 0; 2409 break; 2410 } else if (capa_len % 4 != 2) { 2411 log_peer_warnx(&peer->conf, 2412 "Bad graceful restart capability length: " 2413 "%u", capa_len); 2414 peer->capa.peer.grestart.restart = 0; 2415 peer->capa.peer.grestart.timeout = 0; 2416 break; 2417 } 2418 2419 memcpy(&gr_header, capa_val, sizeof(gr_header)); 2420 gr_header = ntohs(gr_header); 2421 peer->capa.peer.grestart.timeout = 2422 gr_header & CAPA_GR_TIMEMASK; 2423 if (peer->capa.peer.grestart.timeout == 0) { 2424 log_peer_warnx(&peer->conf, "Received " 2425 "graceful restart timeout is zero"); 2426 peer->capa.peer.grestart.restart = 0; 2427 break; 2428 } 2429 2430 for (i = 2; i <= capa_len - 4; i += 4) { 2431 memcpy(&afi, capa_val + i, sizeof(afi)); 2432 afi = ntohs(afi); 2433 memcpy(&safi, capa_val + i + 2, sizeof(safi)); 2434 if (afi2aid(afi, safi, &aid) == -1) { 2435 log_peer_warnx(&peer->conf, 2436 "Received graceful restart capa: " 2437 " unknown AFI %u, safi %u pair", 2438 afi, safi); 2439 continue; 2440 } 2441 memcpy(&gr_flags, capa_val + i + 3, 2442 sizeof(gr_flags)); 2443 peer->capa.peer.grestart.flags[aid] |= 2444 CAPA_GR_PRESENT; 2445 if (gr_flags & CAPA_GR_F_FLAG) 2446 peer->capa.peer.grestart.flags[aid] |= 2447 CAPA_GR_FORWARD; 2448 if (gr_header & CAPA_GR_R_FLAG) 2449 peer->capa.peer.grestart.flags[aid] |= 2450 CAPA_GR_RESTART; 2451 peer->capa.peer.grestart.restart = 2; 2452 } 2453 break; 2454 case CAPA_AS4BYTE: 2455 if (capa_len != 4) { 2456 log_peer_warnx(&peer->conf, 2457 "Bad AS4BYTE capability length: " 2458 "%u", capa_len); 2459 peer->capa.peer.as4byte = 0; 2460 break; 2461 } 2462 memcpy(&remote_as, capa_val, sizeof(remote_as)); 2463 *as = ntohl(remote_as); 2464 if (*as == 0) { 2465 log_peer_warnx(&peer->conf, 2466 "peer requests unacceptable AS %u", *as); 2467 session_notification(peer, ERR_OPEN, 2468 ERR_OPEN_AS, NULL, 0); 2469 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2470 return (-1); 2471 } 2472 peer->capa.peer.as4byte = 1; 2473 break; 2474 default: 2475 break; 2476 } 2477 } 2478 2479 return (0); 2480 } 2481 2482 int 2483 capa_neg_calc(struct peer *p) 2484 { 2485 u_int8_t i, hasmp = 0; 2486 2487 /* refresh: does not realy matter here, use peer setting */ 2488 p->capa.neg.refresh = p->capa.peer.refresh; 2489 2490 /* as4byte: both side must announce capability */ 2491 if (p->capa.ann.as4byte && p->capa.peer.as4byte) 2492 p->capa.neg.as4byte = 1; 2493 else 2494 p->capa.neg.as4byte = 0; 2495 2496 /* MP: both side must announce capability */ 2497 for (i = 0; i < AID_MAX; i++) { 2498 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) { 2499 p->capa.neg.mp[i] = 1; 2500 hasmp = 1; 2501 } else 2502 p->capa.neg.mp[i] = 0; 2503 } 2504 /* if no MP capability present default to IPv4 unicast mode */ 2505 if (!hasmp) 2506 p->capa.neg.mp[AID_INET] = 1; 2507 2508 /* 2509 * graceful restart: only the peer capabilities are of interest here. 2510 * It is necessary to compare the new values with the previous ones 2511 * and act acordingly. AFI/SAFI that are not part in the MP capability 2512 * are treated as not being present. 2513 */ 2514 2515 for (i = 0; i < AID_MAX; i++) { 2516 int8_t negflags; 2517 2518 /* disable GR if the AFI/SAFI is not present */ 2519 if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2520 p->capa.neg.mp[i] == 0) 2521 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2522 /* look at current GR state and decide what to do */ 2523 negflags = p->capa.neg.grestart.flags[i]; 2524 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2525 if (negflags & CAPA_GR_RESTARTING) { 2526 if (!(p->capa.peer.grestart.flags[i] & 2527 CAPA_GR_FORWARD)) { 2528 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 2529 &i, sizeof(i)) == -1) 2530 return (-1); 2531 log_peer_warnx(&p->conf, "graceful restart of " 2532 "%s, not restarted, flushing", aid2str(i)); 2533 } else 2534 p->capa.neg.grestart.flags[i] |= 2535 CAPA_GR_RESTARTING; 2536 } 2537 } 2538 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2539 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2540 2541 return (0); 2542 } 2543 2544 void 2545 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) 2546 { 2547 struct imsg imsg; 2548 struct mrt xmrt; 2549 struct mrt *mrt; 2550 struct imsgbuf *i; 2551 struct peer *p; 2552 struct listen_addr *la, *nla; 2553 struct kif *kif; 2554 u_char *data; 2555 int n, fd, depend_ok, restricted; 2556 u_int8_t aid, errcode, subcode; 2557 2558 while (ibuf) { 2559 if ((n = imsg_get(ibuf, &imsg)) == -1) 2560 fatal("session_dispatch_imsg: imsg_get error"); 2561 2562 if (n == 0) 2563 break; 2564 2565 switch (imsg.hdr.type) { 2566 case IMSG_SOCKET_CONN: 2567 case IMSG_SOCKET_CONN_CTL: 2568 if (idx != PFD_PIPE_MAIN) 2569 fatalx("reconf request not from parent"); 2570 if ((fd = imsg.fd) == -1) { 2571 log_warnx("expected to receive imsg fd to " 2572 "RDE but didn't receive any"); 2573 break; 2574 } 2575 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2576 fatal(NULL); 2577 imsg_init(i, fd); 2578 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 2579 if (ibuf_rde) { 2580 log_warnx("Unexpected imsg connection " 2581 "to RDE received"); 2582 msgbuf_clear(&ibuf_rde->w); 2583 free(ibuf_rde); 2584 } 2585 ibuf_rde = i; 2586 } else { 2587 if (ibuf_rde_ctl) { 2588 log_warnx("Unexpected imsg ctl " 2589 "connection to RDE received"); 2590 msgbuf_clear(&ibuf_rde_ctl->w); 2591 free(ibuf_rde_ctl); 2592 } 2593 ibuf_rde_ctl = i; 2594 } 2595 break; 2596 case IMSG_RECONF_CONF: 2597 if (idx != PFD_PIPE_MAIN) 2598 fatalx("reconf request not from parent"); 2599 nconf = new_config(); 2600 2601 copy_config(nconf, imsg.data); 2602 pending_reconf = 1; 2603 break; 2604 case IMSG_RECONF_PEER: 2605 if (idx != PFD_PIPE_MAIN) 2606 fatalx("reconf request not from parent"); 2607 if ((p = calloc(1, sizeof(struct peer))) == NULL) 2608 fatal("new_peer"); 2609 memcpy(&p->conf, imsg.data, sizeof(struct peer_config)); 2610 p->state = p->prev_state = STATE_NONE; 2611 p->reconf_action = RECONF_REINIT; 2612 TAILQ_INSERT_TAIL(&nconf->peers, p, entry); 2613 break; 2614 case IMSG_RECONF_LISTENER: 2615 if (idx != PFD_PIPE_MAIN) 2616 fatalx("reconf request not from parent"); 2617 if (nconf == NULL) 2618 fatalx("IMSG_RECONF_LISTENER but no config"); 2619 nla = imsg.data; 2620 TAILQ_FOREACH(la, conf->listen_addrs, entry) 2621 if (!la_cmp(la, nla)) 2622 break; 2623 2624 if (la == NULL) { 2625 if (nla->reconf != RECONF_REINIT) 2626 fatalx("king bula sez: " 2627 "expected REINIT"); 2628 2629 if ((nla->fd = imsg.fd) == -1) 2630 log_warnx("expected to receive fd for " 2631 "%s but didn't receive any", 2632 log_sockaddr((struct sockaddr *) 2633 &nla->sa, nla->sa_len)); 2634 2635 la = calloc(1, sizeof(struct listen_addr)); 2636 if (la == NULL) 2637 fatal(NULL); 2638 memcpy(&la->sa, &nla->sa, sizeof(la->sa)); 2639 la->flags = nla->flags; 2640 la->fd = nla->fd; 2641 la->reconf = RECONF_REINIT; 2642 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 2643 entry); 2644 } else { 2645 if (nla->reconf != RECONF_KEEP) 2646 fatalx("king bula sez: expected KEEP"); 2647 la->reconf = RECONF_KEEP; 2648 } 2649 2650 break; 2651 case IMSG_RECONF_CTRL: 2652 if (idx != PFD_PIPE_MAIN) 2653 fatalx("reconf request not from parent"); 2654 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2655 sizeof(restricted)) 2656 fatalx("IFINFO imsg with wrong len"); 2657 memcpy(&restricted, imsg.data, sizeof(restricted)); 2658 if (imsg.fd == -1) { 2659 log_warnx("expected to receive fd for control " 2660 "socket but didn't receive any"); 2661 break; 2662 } 2663 if (restricted) { 2664 control_shutdown(rcsock); 2665 rcsock = imsg.fd; 2666 } else { 2667 control_shutdown(csock); 2668 csock = imsg.fd; 2669 } 2670 break; 2671 case IMSG_RECONF_DRAIN: 2672 if (idx != PFD_PIPE_MAIN) 2673 fatalx("reconf request not from parent"); 2674 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 2675 -1, NULL, 0); 2676 break; 2677 case IMSG_RECONF_DONE: 2678 if (idx != PFD_PIPE_MAIN) 2679 fatalx("reconf request not from parent"); 2680 if (nconf == NULL) 2681 fatalx("got IMSG_RECONF_DONE but no config"); 2682 copy_config(conf, nconf); 2683 merge_peers(conf, nconf); 2684 2685 /* delete old listeners */ 2686 for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; 2687 la = nla) { 2688 nla = TAILQ_NEXT(la, entry); 2689 if (la->reconf == RECONF_NONE) { 2690 log_info("not listening on %s any more", 2691 log_sockaddr((struct sockaddr *) 2692 &la->sa, la->sa_len)); 2693 TAILQ_REMOVE(conf->listen_addrs, la, 2694 entry); 2695 close(la->fd); 2696 free(la); 2697 } 2698 } 2699 2700 /* add new listeners */ 2701 while ((la = TAILQ_FIRST(nconf->listen_addrs)) != 2702 NULL) { 2703 TAILQ_REMOVE(nconf->listen_addrs, la, entry); 2704 TAILQ_INSERT_TAIL(conf->listen_addrs, la, 2705 entry); 2706 } 2707 2708 setup_listeners(listener_cnt); 2709 free_config(nconf); 2710 nconf = NULL; 2711 pending_reconf = 0; 2712 log_info("SE reconfigured"); 2713 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 2714 -1, NULL, 0); 2715 break; 2716 case IMSG_IFINFO: 2717 if (idx != PFD_PIPE_MAIN) 2718 fatalx("IFINFO message not from parent"); 2719 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2720 sizeof(struct kif)) 2721 fatalx("IFINFO imsg with wrong len"); 2722 kif = imsg.data; 2723 depend_ok = kif->depend_state; 2724 2725 TAILQ_FOREACH(p, &conf->peers, entry) 2726 if (!strcmp(p->conf.if_depend, kif->ifname)) { 2727 if (depend_ok && !p->depend_ok) { 2728 p->depend_ok = depend_ok; 2729 bgp_fsm(p, EVNT_START); 2730 } else if (!depend_ok && p->depend_ok) { 2731 p->depend_ok = depend_ok; 2732 session_stop(p, 2733 ERR_CEASE_OTHER_CHANGE); 2734 } 2735 } 2736 break; 2737 case IMSG_MRT_OPEN: 2738 case IMSG_MRT_REOPEN: 2739 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2740 sizeof(struct mrt)) { 2741 log_warnx("wrong imsg len"); 2742 break; 2743 } 2744 2745 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2746 if ((xmrt.wbuf.fd = imsg.fd) == -1) 2747 log_warnx("expected to receive fd for mrt dump " 2748 "but didn't receive any"); 2749 2750 mrt = mrt_get(&mrthead, &xmrt); 2751 if (mrt == NULL) { 2752 /* new dump */ 2753 mrt = calloc(1, sizeof(struct mrt)); 2754 if (mrt == NULL) 2755 fatal("session_dispatch_imsg"); 2756 memcpy(mrt, &xmrt, sizeof(struct mrt)); 2757 TAILQ_INIT(&mrt->wbuf.bufs); 2758 LIST_INSERT_HEAD(&mrthead, mrt, entry); 2759 } else { 2760 /* old dump reopened */ 2761 close(mrt->wbuf.fd); 2762 mrt->wbuf.fd = xmrt.wbuf.fd; 2763 } 2764 break; 2765 case IMSG_MRT_CLOSE: 2766 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2767 sizeof(struct mrt)) { 2768 log_warnx("wrong imsg len"); 2769 break; 2770 } 2771 2772 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2773 mrt = mrt_get(&mrthead, &xmrt); 2774 if (mrt != NULL) 2775 mrt_done(mrt); 2776 break; 2777 case IMSG_CTL_KROUTE: 2778 case IMSG_CTL_KROUTE_ADDR: 2779 case IMSG_CTL_SHOW_NEXTHOP: 2780 case IMSG_CTL_SHOW_INTERFACE: 2781 case IMSG_CTL_SHOW_FIB_TABLES: 2782 if (idx != PFD_PIPE_MAIN) 2783 fatalx("ctl kroute request not from parent"); 2784 control_imsg_relay(&imsg); 2785 break; 2786 case IMSG_CTL_SHOW_RIB: 2787 case IMSG_CTL_SHOW_RIB_PREFIX: 2788 case IMSG_CTL_SHOW_RIB_ATTR: 2789 case IMSG_CTL_SHOW_RIB_MEM: 2790 case IMSG_CTL_SHOW_RIB_HASH: 2791 case IMSG_CTL_SHOW_NETWORK: 2792 case IMSG_CTL_SHOW_NEIGHBOR: 2793 if (idx != PFD_PIPE_ROUTE_CTL) 2794 fatalx("ctl rib request not from RDE"); 2795 control_imsg_relay(&imsg); 2796 break; 2797 case IMSG_CTL_END: 2798 case IMSG_CTL_RESULT: 2799 control_imsg_relay(&imsg); 2800 break; 2801 case IMSG_UPDATE: 2802 if (idx != PFD_PIPE_ROUTE) 2803 fatalx("update request not from RDE"); 2804 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2805 MAX_PKTSIZE - MSGSIZE_HEADER || 2806 imsg.hdr.len < IMSG_HEADER_SIZE + 2807 MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 2808 log_warnx("RDE sent invalid update"); 2809 else 2810 session_update(imsg.hdr.peerid, imsg.data, 2811 imsg.hdr.len - IMSG_HEADER_SIZE); 2812 break; 2813 case IMSG_UPDATE_ERR: 2814 if (idx != PFD_PIPE_ROUTE) 2815 fatalx("update request not from RDE"); 2816 if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) { 2817 log_warnx("RDE sent invalid notification"); 2818 break; 2819 } 2820 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 2821 log_warnx("no such peer: id=%u", 2822 imsg.hdr.peerid); 2823 break; 2824 } 2825 data = imsg.data; 2826 errcode = *data++; 2827 subcode = *data++; 2828 2829 if (imsg.hdr.len == IMSG_HEADER_SIZE + 2) 2830 data = NULL; 2831 2832 session_notification(p, errcode, subcode, 2833 data, imsg.hdr.len - IMSG_HEADER_SIZE - 2); 2834 switch (errcode) { 2835 case ERR_CEASE: 2836 switch (subcode) { 2837 case ERR_CEASE_MAX_PREFIX: 2838 bgp_fsm(p, EVNT_STOP); 2839 if (p->conf.max_prefix_restart) 2840 timer_set(p, Timer_IdleHold, 60 * 2841 p->conf.max_prefix_restart); 2842 break; 2843 default: 2844 bgp_fsm(p, EVNT_CON_FATAL); 2845 break; 2846 } 2847 break; 2848 default: 2849 bgp_fsm(p, EVNT_CON_FATAL); 2850 break; 2851 } 2852 break; 2853 case IMSG_SESSION_RESTARTED: 2854 if (idx != PFD_PIPE_ROUTE) 2855 fatalx("update request not from RDE"); 2856 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { 2857 log_warnx("RDE sent invalid restart msg"); 2858 break; 2859 } 2860 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 2861 log_warnx("no such peer: id=%u", 2862 imsg.hdr.peerid); 2863 break; 2864 } 2865 memcpy(&aid, imsg.data, sizeof(aid)); 2866 if (aid >= AID_MAX) 2867 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 2868 if (p->capa.neg.grestart.flags[aid] & 2869 CAPA_GR_RESTARTING) { 2870 log_peer_warnx(&p->conf, 2871 "graceful restart of %s finished", 2872 aid2str(aid)); 2873 p->capa.neg.grestart.flags[aid] &= 2874 ~CAPA_GR_RESTARTING; 2875 timer_stop(p, Timer_RestartTimeout); 2876 2877 /* signal back to RDE to cleanup stale routes */ 2878 if (imsg_rde(IMSG_SESSION_RESTARTED, 2879 imsg.hdr.peerid, &aid, sizeof(aid)) == -1) 2880 fatal("imsg_compose: " 2881 "IMSG_SESSION_RESTARTED"); 2882 } 2883 break; 2884 case IMSG_SESSION_DOWN: 2885 if (idx != PFD_PIPE_ROUTE) 2886 fatalx("update request not from RDE"); 2887 if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) { 2888 log_warnx("no such peer: id=%u", 2889 imsg.hdr.peerid); 2890 break; 2891 } 2892 session_stop(p, ERR_CEASE_ADMIN_DOWN); 2893 break; 2894 default: 2895 break; 2896 } 2897 imsg_free(&imsg); 2898 } 2899 } 2900 2901 int 2902 la_cmp(struct listen_addr *a, struct listen_addr *b) 2903 { 2904 struct sockaddr_in *in_a, *in_b; 2905 struct sockaddr_in6 *in6_a, *in6_b; 2906 2907 if (a->sa.ss_family != b->sa.ss_family) 2908 return (1); 2909 2910 switch (a->sa.ss_family) { 2911 case AF_INET: 2912 in_a = (struct sockaddr_in *)&a->sa; 2913 in_b = (struct sockaddr_in *)&b->sa; 2914 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 2915 return (1); 2916 if (in_a->sin_port != in_b->sin_port) 2917 return (1); 2918 break; 2919 case AF_INET6: 2920 in6_a = (struct sockaddr_in6 *)&a->sa; 2921 in6_b = (struct sockaddr_in6 *)&b->sa; 2922 if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 2923 sizeof(struct in6_addr))) 2924 return (1); 2925 if (in6_a->sin6_port != in6_b->sin6_port) 2926 return (1); 2927 break; 2928 default: 2929 fatal("king bula sez: unknown address family"); 2930 /* NOTREACHED */ 2931 } 2932 2933 return (0); 2934 } 2935 2936 struct peer * 2937 getpeerbydesc(struct bgpd_config *c, const char *descr) 2938 { 2939 struct peer *p, *res = NULL; 2940 int match = 0; 2941 2942 TAILQ_FOREACH(p, &c->peers, entry) 2943 if (!strcmp(p->conf.descr, descr)) { 2944 res = p; 2945 match++; 2946 } 2947 2948 if (match > 1) 2949 log_info("neighbor description \"%s\" not unique, request " 2950 "aborted", descr); 2951 2952 if (match == 1) 2953 return (res); 2954 else 2955 return (NULL); 2956 } 2957 2958 struct peer * 2959 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip) 2960 { 2961 struct bgpd_addr addr; 2962 struct peer *p, *newpeer, *loose = NULL; 2963 u_int32_t id; 2964 2965 sa2addr(ip, &addr, NULL); 2966 2967 /* we might want a more effective way to find peers by IP */ 2968 TAILQ_FOREACH(p, &c->peers, entry) 2969 if (!p->conf.template && 2970 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 2971 return (p); 2972 2973 /* try template matching */ 2974 TAILQ_FOREACH(p, &c->peers, entry) 2975 if (p->conf.template && 2976 p->conf.remote_addr.aid == addr.aid && 2977 session_match_mask(p, &addr)) 2978 if (loose == NULL || loose->conf.remote_masklen < 2979 p->conf.remote_masklen) 2980 loose = p; 2981 2982 if (loose != NULL) { 2983 /* clone */ 2984 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 2985 fatal(NULL); 2986 memcpy(newpeer, loose, sizeof(struct peer)); 2987 for (id = UINT_MAX; id > UINT_MAX / 2; id--) { 2988 TAILQ_FOREACH(p, &c->peers, entry) 2989 if (p->conf.id == id) 2990 break; 2991 if (p == NULL) /* we found a free id */ 2992 break; 2993 } 2994 newpeer->template = loose; 2995 session_template_clone(newpeer, ip, id, 0); 2996 newpeer->state = newpeer->prev_state = STATE_NONE; 2997 newpeer->reconf_action = RECONF_KEEP; 2998 newpeer->rbuf = NULL; 2999 init_peer(newpeer); 3000 bgp_fsm(newpeer, EVNT_START); 3001 TAILQ_INSERT_TAIL(&c->peers, newpeer, entry); 3002 return (newpeer); 3003 } 3004 3005 return (NULL); 3006 } 3007 3008 struct peer * 3009 getpeerbyid(struct bgpd_config *c, u_int32_t peerid) 3010 { 3011 struct peer *p; 3012 3013 /* we might want a more effective way to find peers by id */ 3014 TAILQ_FOREACH(p, &c->peers, entry) 3015 if (p->conf.id == peerid) 3016 return (p); 3017 return (NULL); 3018 } 3019 3020 int 3021 peer_matched(struct peer *p, struct ctl_neighbor *n) 3022 { 3023 char *s; 3024 3025 if (n && n->addr.aid) { 3026 if (memcmp(&p->conf.remote_addr, &n->addr, 3027 sizeof(p->conf.remote_addr))) 3028 return 0; 3029 } else if (n && n->descr[0]) { 3030 s = n->is_group ? p->conf.group : p->conf.descr; 3031 if (strcmp(s, n->descr)) 3032 return 0; 3033 } 3034 return 1; 3035 } 3036 3037 void 3038 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id, 3039 u_int32_t as) 3040 { 3041 struct bgpd_addr remote_addr; 3042 3043 if (ip) 3044 sa2addr(ip, &remote_addr, NULL); 3045 else 3046 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3047 3048 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3049 3050 p->conf.id = id; 3051 3052 if (as) { 3053 p->conf.remote_as = as; 3054 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as); 3055 if (!p->conf.ebgp) 3056 /* force enforce_as off for iBGP sessions */ 3057 p->conf.enforce_as = ENFORCE_AS_OFF; 3058 } 3059 3060 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3061 switch (p->conf.remote_addr.aid) { 3062 case AID_INET: 3063 p->conf.remote_masklen = 32; 3064 break; 3065 case AID_INET6: 3066 p->conf.remote_masklen = 128; 3067 break; 3068 } 3069 p->conf.template = 0; 3070 } 3071 3072 int 3073 session_match_mask(struct peer *p, struct bgpd_addr *a) 3074 { 3075 struct in_addr v4masked; 3076 struct in6_addr v6masked; 3077 3078 switch (p->conf.remote_addr.aid) { 3079 case AID_INET: 3080 inet4applymask(&v4masked, &a->v4, p->conf.remote_masklen); 3081 if (p->conf.remote_addr.v4.s_addr == v4masked.s_addr) 3082 return (1); 3083 return (0); 3084 case AID_INET6: 3085 inet6applymask(&v6masked, &a->v6, p->conf.remote_masklen); 3086 3087 if (memcmp(&v6masked, &p->conf.remote_addr.v6, 3088 sizeof(v6masked)) == 0) 3089 return (1); 3090 return (0); 3091 } 3092 return (0); 3093 } 3094 3095 void 3096 session_down(struct peer *peer) 3097 { 3098 bzero(&peer->capa.neg, sizeof(peer->capa.neg)); 3099 peer->stats.last_updown = time(NULL); 3100 /* 3101 * session_down is called in the exit code path so check 3102 * if the RDE is still around, if not there is no need to 3103 * send the message. 3104 */ 3105 if (ibuf_rde == NULL) 3106 return; 3107 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1) 3108 fatalx("imsg_compose error"); 3109 } 3110 3111 void 3112 session_up(struct peer *p) 3113 { 3114 struct session_up sup; 3115 3116 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3117 &p->conf, sizeof(p->conf)) == -1) 3118 fatalx("imsg_compose error"); 3119 3120 sup.local_addr = p->local; 3121 sup.remote_addr = p->remote; 3122 3123 sup.remote_bgpid = p->remote_bgpid; 3124 sup.short_as = p->short_as; 3125 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3126 p->stats.last_updown = time(NULL); 3127 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1) 3128 fatalx("imsg_compose error"); 3129 } 3130 3131 int 3132 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data, 3133 u_int16_t datalen) 3134 { 3135 return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen)); 3136 } 3137 3138 int 3139 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen) 3140 { 3141 if (ibuf_rde_ctl == NULL) { 3142 log_warnx("Can't send message %u to RDE, ctl pipe closed", 3143 type); 3144 return (0); 3145 } 3146 /* 3147 * Use control socket to talk to RDE to bypass the queue of the 3148 * regular imsg socket. 3149 */ 3150 return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen)); 3151 } 3152 3153 int 3154 imsg_rde(int type, uint32_t peerid, void *data, u_int16_t datalen) 3155 { 3156 if (ibuf_rde == NULL) { 3157 log_warnx("Can't send message %u to RDE, pipe closed", type); 3158 return (0); 3159 } 3160 3161 return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen)); 3162 } 3163 3164 void 3165 session_demote(struct peer *p, int level) 3166 { 3167 struct demote_msg msg; 3168 3169 strlcpy(msg.demote_group, p->conf.demote_group, 3170 sizeof(msg.demote_group)); 3171 msg.level = level; 3172 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3173 &msg, sizeof(msg)) == -1) 3174 fatalx("imsg_compose error"); 3175 3176 p->demoted += level; 3177 } 3178 3179 void 3180 session_stop(struct peer *peer, u_int8_t subcode) 3181 { 3182 char data[SHUT_COMM_LEN]; 3183 size_t datalen; 3184 size_t shutcomm_len; 3185 char *communication; 3186 3187 datalen = 0; 3188 communication = peer->conf.shutcomm; 3189 3190 if ((subcode == ERR_CEASE_ADMIN_DOWN || 3191 subcode == ERR_CEASE_ADMIN_RESET) 3192 && communication && *communication) { 3193 shutcomm_len = strlen(communication); 3194 if (shutcomm_len > SHUT_COMM_LEN - 1) { 3195 log_peer_warnx(&peer->conf, 3196 "trying to send overly long shutdown reason"); 3197 } else { 3198 data[0] = shutcomm_len; 3199 datalen = shutcomm_len + sizeof(data[0]); 3200 memcpy(data + 1, communication, shutcomm_len); 3201 } 3202 } 3203 switch (peer->state) { 3204 case STATE_OPENSENT: 3205 case STATE_OPENCONFIRM: 3206 case STATE_ESTABLISHED: 3207 session_notification(peer, ERR_CEASE, subcode, data, datalen); 3208 break; 3209 default: 3210 /* session not open, no need to send notification */ 3211 break; 3212 } 3213 bgp_fsm(peer, EVNT_STOP); 3214 } 3215 3216 void 3217 merge_peers(struct bgpd_config *c, struct bgpd_config *nc) 3218 { 3219 struct peer *p, *np; 3220 3221 TAILQ_FOREACH(p, &c->peers, entry) { 3222 /* templates are handled specially */ 3223 if (p->template != NULL) 3224 continue; 3225 np = getpeerbyid(nc, p->conf.id); 3226 if (np == NULL) { 3227 p->reconf_action = RECONF_DELETE; 3228 continue; 3229 } 3230 3231 memcpy(&p->conf, &np->conf, sizeof(p->conf)); 3232 TAILQ_REMOVE(&nc->peers, np, entry); 3233 free(np); 3234 3235 p->reconf_action = RECONF_KEEP; 3236 3237 /* had demotion, is demoted, demote removed? */ 3238 if (p->demoted && !p->conf.demote_group[0]) 3239 session_demote(p, -1); 3240 3241 /* sync the RDE in case we keep the peer */ 3242 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3243 &p->conf, sizeof(struct peer_config)) == -1) 3244 fatalx("imsg_compose error"); 3245 3246 /* apply the config to all clones of a template */ 3247 if (p->conf.template) { 3248 struct peer *xp; 3249 TAILQ_FOREACH(xp, &conf->peers, entry) { 3250 if (xp->template != p) 3251 continue; 3252 session_template_clone(xp, NULL, xp->conf.id, 3253 xp->conf.remote_as); 3254 if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id, 3255 &xp->conf, sizeof(xp->conf)) == -1) 3256 fatalx("imsg_compose error"); 3257 } 3258 } 3259 } 3260 3261 TAILQ_CONCAT(&c->peers, &nc->peers, entry); 3262 } 3263