1 /* $OpenBSD: session.c,v 1.350 2016/07/21 10:13:58 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 21 #include <sys/mman.h> 22 #include <sys/socket.h> 23 #include <sys/time.h> 24 #include <sys/resource.h> 25 #include <sys/un.h> 26 #include <net/if_types.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <poll.h> 37 #include <pwd.h> 38 #include <signal.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <unistd.h> 43 44 #include "bgpd.h" 45 #include "mrt.h" 46 #include "session.h" 47 48 #define PFD_PIPE_MAIN 0 49 #define PFD_PIPE_ROUTE 1 50 #define PFD_PIPE_ROUTE_CTL 2 51 #define PFD_SOCK_CTL 3 52 #define PFD_SOCK_RCTL 4 53 #define PFD_SOCK_PFKEY 5 54 #define PFD_LISTENERS_START 6 55 56 void session_sighdlr(int); 57 int setup_listeners(u_int *); 58 void init_conf(struct bgpd_config *); 59 void init_peer(struct peer *); 60 void start_timer_holdtime(struct peer *); 61 void start_timer_keepalive(struct peer *); 62 void session_close_connection(struct peer *); 63 void change_state(struct peer *, enum session_state, enum session_events); 64 int session_setup_socket(struct peer *); 65 void session_accept(int); 66 int session_connect(struct peer *); 67 void session_tcp_established(struct peer *); 68 void session_capa_ann_none(struct peer *); 69 int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); 70 int session_capa_add_mp(struct ibuf *, u_int8_t); 71 int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); 72 struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); 73 int session_sendmsg(struct bgp_msg *, struct peer *); 74 void session_open(struct peer *); 75 void session_keepalive(struct peer *); 76 void session_update(u_int32_t, void *, size_t); 77 void session_notification(struct peer *, u_int8_t, u_int8_t, void *, 78 ssize_t); 79 void session_rrefresh(struct peer *, u_int8_t); 80 int session_graceful_restart(struct peer *); 81 int session_graceful_stop(struct peer *); 82 int session_dispatch_msg(struct pollfd *, struct peer *); 83 int session_process_msg(struct peer *); 84 int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); 85 int parse_open(struct peer *); 86 int parse_update(struct peer *); 87 int parse_refresh(struct peer *); 88 int parse_notification(struct peer *); 89 int parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *); 90 int capa_neg_calc(struct peer *); 91 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 92 void session_up(struct peer *); 93 void session_down(struct peer *); 94 void session_demote(struct peer *, int); 95 96 int la_cmp(struct listen_addr *, struct listen_addr *); 97 struct peer *getpeerbyip(struct sockaddr *); 98 void session_template_clone(struct peer *, struct sockaddr *, 99 u_int32_t, u_int32_t); 100 int session_match_mask(struct peer *, struct bgpd_addr *); 101 struct peer *getpeerbyid(u_int32_t); 102 103 struct bgpd_config *conf, *nconf; 104 struct bgpd_sysdep sysdep; 105 struct peer *peers, *npeers; 106 volatile sig_atomic_t session_quit; 107 int pending_reconf; 108 int csock = -1, rcsock = -1; 109 u_int peer_cnt; 110 struct imsgbuf *ibuf_rde; 111 struct imsgbuf *ibuf_rde_ctl; 112 struct imsgbuf *ibuf_main; 113 114 struct mrt_head mrthead; 115 time_t pauseaccept; 116 117 void 118 session_sighdlr(int sig) 119 { 120 switch (sig) { 121 case SIGINT: 122 case SIGTERM: 123 session_quit = 1; 124 break; 125 } 126 } 127 128 int 129 setup_listeners(u_int *la_cnt) 130 { 131 int ttl = 255; 132 int opt; 133 struct listen_addr *la; 134 u_int cnt = 0; 135 136 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 137 la->reconf = RECONF_NONE; 138 cnt++; 139 140 if (la->flags & LISTENER_LISTENING) 141 continue; 142 143 if (la->fd == -1) { 144 log_warn("cannot establish listener on %s: invalid fd", 145 log_sockaddr((struct sockaddr *)&la->sa)); 146 continue; 147 } 148 149 opt = 1; 150 if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG, 151 &opt, sizeof(opt)) == -1) { 152 if (errno == ENOPROTOOPT) { /* system w/o md5sig */ 153 log_warnx("md5sig not available, disabling"); 154 sysdep.no_md5sig = 1; 155 } else 156 fatal("setsockopt TCP_MD5SIG"); 157 } 158 159 /* set ttl to 255 so that ttl-security works */ 160 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 161 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 162 log_warn("setup_listeners setsockopt TTL"); 163 continue; 164 } 165 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 166 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 167 log_warn("setup_listeners setsockopt hoplimit"); 168 continue; 169 } 170 171 if (listen(la->fd, MAX_BACKLOG)) { 172 close(la->fd); 173 fatal("listen"); 174 } 175 176 la->flags |= LISTENER_LISTENING; 177 178 log_info("listening on %s", 179 log_sockaddr((struct sockaddr *)&la->sa)); 180 } 181 182 *la_cnt = cnt; 183 184 return (0); 185 } 186 187 void 188 session_main(int debug, int verbose) 189 { 190 int timeout, pfkeysock; 191 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 192 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 193 u_int listener_cnt, ctl_cnt, mrt_cnt; 194 u_int new_cnt; 195 u_int32_t ctl_queued; 196 struct passwd *pw; 197 struct peer *p, **peer_l = NULL, *last, *next; 198 struct mrt *m, *xm, **mrt_l = NULL; 199 struct pollfd *pfd = NULL; 200 struct ctl_conn *ctl_conn; 201 struct listen_addr *la; 202 void *newp; 203 short events; 204 205 if ((pw = getpwnam(BGPD_USER)) == NULL) 206 fatal(NULL); 207 208 if (chroot(pw->pw_dir) == -1) 209 fatal("chroot"); 210 if (chdir("/") == -1) 211 fatal("chdir(\"/\")"); 212 213 setproctitle("session engine"); 214 bgpd_process = PROC_SE; 215 pfkeysock = pfkey_init(&sysdep); 216 217 if (setgroups(1, &pw->pw_gid) || 218 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 219 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 220 fatal("can't drop privileges"); 221 222 if (pledge("stdio inet recvfd", NULL) == -1) 223 fatal("pledge"); 224 225 signal(SIGTERM, session_sighdlr); 226 signal(SIGINT, session_sighdlr); 227 signal(SIGPIPE, SIG_IGN); 228 signal(SIGHUP, SIG_IGN); 229 signal(SIGALRM, SIG_IGN); 230 signal(SIGUSR1, SIG_IGN); 231 232 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 233 fatal(NULL); 234 imsg_init(ibuf_main, 3); 235 236 TAILQ_INIT(&ctl_conns); 237 LIST_INIT(&mrthead); 238 listener_cnt = 0; 239 peer_cnt = 0; 240 ctl_cnt = 0; 241 242 if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL) 243 fatal(NULL); 244 if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) == 245 NULL) 246 fatal(NULL); 247 TAILQ_INIT(conf->listen_addrs); 248 249 log_info("session engine ready"); 250 251 while (session_quit == 0) { 252 /* check for peers to be initialized or deleted */ 253 last = NULL; 254 if (!pending_reconf) { 255 for (p = peers; p != NULL; p = next) { 256 next = p->next; 257 /* cloned peer that idled out? */ 258 if (p->template && (p->state == STATE_IDLE || 259 p->state == STATE_ACTIVE) && 260 time(NULL) - p->stats.last_updown >= 261 INTERVAL_HOLD_CLONED) 262 p->conf.reconf_action = RECONF_DELETE; 263 264 /* new peer that needs init? */ 265 if (p->state == STATE_NONE) 266 init_peer(p); 267 268 /* reinit due? */ 269 if (p->conf.reconf_action == RECONF_REINIT) { 270 session_stop(p, ERR_CEASE_ADMIN_RESET); 271 if (!p->conf.down) 272 timer_set(p, Timer_IdleHold, 0); 273 } 274 275 /* deletion due? */ 276 if (p->conf.reconf_action == RECONF_DELETE) { 277 if (p->demoted) 278 session_demote(p, -1); 279 p->conf.demote_group[0] = 0; 280 session_stop(p, ERR_CEASE_PEER_UNCONF); 281 log_peer_warnx(&p->conf, "removed"); 282 if (last != NULL) 283 last->next = next; 284 else 285 peers = next; 286 timer_remove_all(p); 287 free(p); 288 peer_cnt--; 289 continue; 290 } 291 p->conf.reconf_action = RECONF_NONE; 292 last = p; 293 } 294 } 295 296 if (peer_cnt > peer_l_elms) { 297 if ((newp = reallocarray(peer_l, peer_cnt, 298 sizeof(struct peer *))) == NULL) { 299 /* panic for now */ 300 log_warn("could not resize peer_l from %u -> %u" 301 " entries", peer_l_elms, peer_cnt); 302 fatalx("exiting"); 303 } 304 peer_l = newp; 305 peer_l_elms = peer_cnt; 306 } 307 308 mrt_cnt = 0; 309 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 310 xm = LIST_NEXT(m, entry); 311 if (m->state == MRT_STATE_REMOVE) { 312 mrt_clean(m); 313 LIST_REMOVE(m, entry); 314 free(m); 315 continue; 316 } 317 if (m->wbuf.queued) 318 mrt_cnt++; 319 } 320 321 if (mrt_cnt > mrt_l_elms) { 322 if ((newp = reallocarray(mrt_l, mrt_cnt, 323 sizeof(struct mrt *))) == NULL) { 324 /* panic for now */ 325 log_warn("could not resize mrt_l from %u -> %u" 326 " entries", mrt_l_elms, mrt_cnt); 327 fatalx("exiting"); 328 } 329 mrt_l = newp; 330 mrt_l_elms = mrt_cnt; 331 } 332 333 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 334 ctl_cnt + mrt_cnt; 335 if (new_cnt > pfd_elms) { 336 if ((newp = reallocarray(pfd, new_cnt, 337 sizeof(struct pollfd))) == NULL) { 338 /* panic for now */ 339 log_warn("could not resize pfd from %u -> %u" 340 " entries", pfd_elms, new_cnt); 341 fatalx("exiting"); 342 } 343 pfd = newp; 344 pfd_elms = new_cnt; 345 } 346 347 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 348 349 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 350 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 351 352 ctl_queued = 0; 353 TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) 354 ctl_queued += ctl_conn->ibuf.w.queued; 355 356 /* 357 * Do not act as unlimited buffer. Don't read in more 358 * messages if the ctl sockets are getting full. 359 */ 360 if (ctl_queued < SESSION_CTL_QUEUE_MAX) 361 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 362 363 if (pauseaccept == 0) { 364 pfd[PFD_SOCK_CTL].fd = csock; 365 pfd[PFD_SOCK_CTL].events = POLLIN; 366 pfd[PFD_SOCK_RCTL].fd = rcsock; 367 pfd[PFD_SOCK_RCTL].events = POLLIN; 368 } else { 369 pfd[PFD_SOCK_CTL].fd = -1; 370 pfd[PFD_SOCK_RCTL].fd = -1; 371 } 372 pfd[PFD_SOCK_PFKEY].fd = pfkeysock; 373 pfd[PFD_SOCK_PFKEY].events = POLLIN; 374 375 i = PFD_LISTENERS_START; 376 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 377 if (pauseaccept == 0) { 378 pfd[i].fd = la->fd; 379 pfd[i].events = POLLIN; 380 } else 381 pfd[i].fd = -1; 382 i++; 383 } 384 idx_listeners = i; 385 timeout = 240; /* loop every 240s at least */ 386 387 for (p = peers; p != NULL; p = p->next) { 388 time_t nextaction; 389 struct peer_timer *pt; 390 391 /* check timers */ 392 if ((pt = timer_nextisdue(p)) != NULL) { 393 switch (pt->type) { 394 case Timer_Hold: 395 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 396 break; 397 case Timer_ConnectRetry: 398 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 399 break; 400 case Timer_Keepalive: 401 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 402 break; 403 case Timer_IdleHold: 404 bgp_fsm(p, EVNT_START); 405 break; 406 case Timer_IdleHoldReset: 407 p->IdleHoldTime /= 2; 408 if (p->IdleHoldTime <= 409 INTERVAL_IDLE_HOLD_INITIAL) { 410 p->IdleHoldTime = 411 INTERVAL_IDLE_HOLD_INITIAL; 412 timer_stop(p, 413 Timer_IdleHoldReset); 414 p->errcnt = 0; 415 } else 416 timer_set(p, 417 Timer_IdleHoldReset, 418 p->IdleHoldTime); 419 break; 420 case Timer_CarpUndemote: 421 timer_stop(p, Timer_CarpUndemote); 422 if (p->demoted && 423 p->state == STATE_ESTABLISHED) 424 session_demote(p, -1); 425 break; 426 case Timer_RestartTimeout: 427 timer_stop(p, Timer_RestartTimeout); 428 session_graceful_stop(p); 429 break; 430 default: 431 fatalx("King Bula lost in time"); 432 } 433 } 434 if ((nextaction = timer_nextduein(p)) != -1 && 435 nextaction < timeout) 436 timeout = nextaction; 437 438 /* are we waiting for a write? */ 439 events = POLLIN; 440 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 441 events |= POLLOUT; 442 /* is there still work to do? */ 443 if (p->rbuf && p->rbuf->wpos) 444 timeout = 0; 445 446 /* poll events */ 447 if (p->fd != -1 && events != 0) { 448 pfd[i].fd = p->fd; 449 pfd[i].events = events; 450 peer_l[i - idx_listeners] = p; 451 i++; 452 } 453 } 454 455 idx_peers = i; 456 457 LIST_FOREACH(m, &mrthead, entry) 458 if (m->wbuf.queued) { 459 pfd[i].fd = m->wbuf.fd; 460 pfd[i].events = POLLOUT; 461 mrt_l[i - idx_peers] = m; 462 i++; 463 } 464 465 idx_mrts = i; 466 467 TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) { 468 pfd[i].fd = ctl_conn->ibuf.fd; 469 pfd[i].events = POLLIN; 470 if (ctl_conn->ibuf.w.queued > 0) 471 pfd[i].events |= POLLOUT; 472 i++; 473 } 474 475 if (pauseaccept && timeout > 1) 476 timeout = 1; 477 if (timeout < 0) 478 timeout = 0; 479 if (poll(pfd, i, timeout * 1000) == -1) 480 if (errno != EINTR) 481 fatal("poll error"); 482 483 /* 484 * If we previously saw fd exhaustion, we stop accept() 485 * for 1 second to throttle the accept() loop. 486 */ 487 if (pauseaccept && getmonotime() > pauseaccept + 1) 488 pauseaccept = 0; 489 490 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 491 log_warnx("SE: Lost connection to parent"); 492 session_quit = 1; 493 continue; 494 } else 495 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 496 &listener_cnt); 497 498 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 499 log_warnx("SE: Lost connection to RDE"); 500 msgbuf_clear(&ibuf_rde->w); 501 free(ibuf_rde); 502 ibuf_rde = NULL; 503 } else 504 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 505 &listener_cnt); 506 507 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 508 -1) { 509 log_warnx("SE: Lost connection to RDE control"); 510 msgbuf_clear(&ibuf_rde_ctl->w); 511 free(ibuf_rde_ctl); 512 ibuf_rde_ctl = NULL; 513 } else 514 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 515 &listener_cnt); 516 517 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 518 ctl_cnt += control_accept(csock, 0); 519 520 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 521 ctl_cnt += control_accept(rcsock, 1); 522 523 if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) { 524 if (pfkey_read(pfkeysock, NULL) == -1) { 525 log_warnx("pfkey_read failed, exiting..."); 526 session_quit = 1; 527 } 528 } 529 530 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 531 if (pfd[j].revents & POLLIN) 532 session_accept(pfd[j].fd); 533 534 for (; j < idx_peers; j++) 535 session_dispatch_msg(&pfd[j], 536 peer_l[j - idx_listeners]); 537 538 for (p = peers; p != NULL; p = p->next) 539 if (p->rbuf && p->rbuf->wpos) 540 session_process_msg(p); 541 542 for (; j < idx_mrts; j++) 543 if (pfd[j].revents & POLLOUT) 544 mrt_write(mrt_l[j - idx_peers]); 545 546 for (; j < i; j++) 547 control_dispatch_msg(&pfd[j], &ctl_cnt); 548 } 549 550 while ((p = peers) != NULL) { 551 peers = p->next; 552 session_stop(p, ERR_CEASE_ADMIN_DOWN); 553 pfkey_remove(p); 554 free(p); 555 } 556 557 while ((m = LIST_FIRST(&mrthead)) != NULL) { 558 mrt_clean(m); 559 LIST_REMOVE(m, entry); 560 free(m); 561 } 562 563 while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) { 564 TAILQ_REMOVE(conf->listen_addrs, la, entry); 565 free(la); 566 } 567 free(conf->listen_addrs); 568 free(peer_l); 569 free(mrt_l); 570 free(pfd); 571 572 msgbuf_write(&ibuf_rde->w); 573 msgbuf_clear(&ibuf_rde->w); 574 free(ibuf_rde); 575 msgbuf_write(&ibuf_main->w); 576 msgbuf_clear(&ibuf_main->w); 577 free(ibuf_main); 578 579 control_shutdown(csock); 580 control_shutdown(rcsock); 581 log_info("session engine exiting"); 582 _exit(0); 583 } 584 585 void 586 init_conf(struct bgpd_config *c) 587 { 588 if (!c->holdtime) 589 c->holdtime = INTERVAL_HOLD; 590 if (!c->connectretry) 591 c->connectretry = INTERVAL_CONNECTRETRY; 592 } 593 594 void 595 init_peer(struct peer *p) 596 { 597 TAILQ_INIT(&p->timers); 598 p->fd = p->wbuf.fd = -1; 599 600 if (p->conf.if_depend[0]) 601 imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1, 602 p->conf.if_depend, sizeof(p->conf.if_depend)); 603 else 604 p->depend_ok = 1; 605 606 peer_cnt++; 607 608 change_state(p, STATE_IDLE, EVNT_NONE); 609 if (p->conf.down) 610 timer_stop(p, Timer_IdleHold); /* no autostart */ 611 else 612 timer_set(p, Timer_IdleHold, 0); /* start ASAP */ 613 614 /* 615 * on startup, demote if requested. 616 * do not handle new peers. they must reach ESTABLISHED beforehands. 617 * peers added at runtime have reconf_action set to RECONF_REINIT. 618 */ 619 if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 620 session_demote(p, +1); 621 } 622 623 void 624 bgp_fsm(struct peer *peer, enum session_events event) 625 { 626 switch (peer->state) { 627 case STATE_NONE: 628 /* nothing */ 629 break; 630 case STATE_IDLE: 631 switch (event) { 632 case EVNT_START: 633 timer_stop(peer, Timer_Hold); 634 timer_stop(peer, Timer_Keepalive); 635 timer_stop(peer, Timer_IdleHold); 636 637 /* allocate read buffer */ 638 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 639 if (peer->rbuf == NULL) 640 fatal(NULL); 641 642 /* init write buffer */ 643 msgbuf_init(&peer->wbuf); 644 645 /* init pfkey - remove old if any, load new ones */ 646 pfkey_remove(peer); 647 if (pfkey_establish(peer) == -1) { 648 log_peer_warnx(&peer->conf, 649 "pfkey setup failed"); 650 return; 651 } 652 653 peer->stats.last_sent_errcode = 0; 654 peer->stats.last_sent_suberr = 0; 655 656 if (!peer->depend_ok) 657 timer_stop(peer, Timer_ConnectRetry); 658 else if (peer->passive || peer->conf.passive || 659 peer->conf.template) { 660 change_state(peer, STATE_ACTIVE, event); 661 timer_stop(peer, Timer_ConnectRetry); 662 } else { 663 change_state(peer, STATE_CONNECT, event); 664 timer_set(peer, Timer_ConnectRetry, 665 conf->connectretry); 666 session_connect(peer); 667 } 668 peer->passive = 0; 669 break; 670 default: 671 /* ignore */ 672 break; 673 } 674 break; 675 case STATE_CONNECT: 676 switch (event) { 677 case EVNT_START: 678 /* ignore */ 679 break; 680 case EVNT_CON_OPEN: 681 session_tcp_established(peer); 682 session_open(peer); 683 timer_stop(peer, Timer_ConnectRetry); 684 peer->holdtime = INTERVAL_HOLD_INITIAL; 685 start_timer_holdtime(peer); 686 change_state(peer, STATE_OPENSENT, event); 687 break; 688 case EVNT_CON_OPENFAIL: 689 timer_set(peer, Timer_ConnectRetry, 690 conf->connectretry); 691 session_close_connection(peer); 692 change_state(peer, STATE_ACTIVE, event); 693 break; 694 case EVNT_TIMER_CONNRETRY: 695 timer_set(peer, Timer_ConnectRetry, 696 conf->connectretry); 697 session_connect(peer); 698 break; 699 default: 700 change_state(peer, STATE_IDLE, event); 701 break; 702 } 703 break; 704 case STATE_ACTIVE: 705 switch (event) { 706 case EVNT_START: 707 /* ignore */ 708 break; 709 case EVNT_CON_OPEN: 710 session_tcp_established(peer); 711 session_open(peer); 712 timer_stop(peer, Timer_ConnectRetry); 713 peer->holdtime = INTERVAL_HOLD_INITIAL; 714 start_timer_holdtime(peer); 715 change_state(peer, STATE_OPENSENT, event); 716 break; 717 case EVNT_CON_OPENFAIL: 718 timer_set(peer, Timer_ConnectRetry, 719 conf->connectretry); 720 session_close_connection(peer); 721 change_state(peer, STATE_ACTIVE, event); 722 break; 723 case EVNT_TIMER_CONNRETRY: 724 timer_set(peer, Timer_ConnectRetry, 725 peer->holdtime); 726 change_state(peer, STATE_CONNECT, event); 727 session_connect(peer); 728 break; 729 default: 730 change_state(peer, STATE_IDLE, event); 731 break; 732 } 733 break; 734 case STATE_OPENSENT: 735 switch (event) { 736 case EVNT_START: 737 /* ignore */ 738 break; 739 case EVNT_STOP: 740 change_state(peer, STATE_IDLE, event); 741 break; 742 case EVNT_CON_CLOSED: 743 session_close_connection(peer); 744 timer_set(peer, Timer_ConnectRetry, 745 conf->connectretry); 746 change_state(peer, STATE_ACTIVE, event); 747 break; 748 case EVNT_CON_FATAL: 749 change_state(peer, STATE_IDLE, event); 750 break; 751 case EVNT_TIMER_HOLDTIME: 752 session_notification(peer, ERR_HOLDTIMEREXPIRED, 753 0, NULL, 0); 754 change_state(peer, STATE_IDLE, event); 755 break; 756 case EVNT_RCVD_OPEN: 757 /* parse_open calls change_state itself on failure */ 758 if (parse_open(peer)) 759 break; 760 session_keepalive(peer); 761 change_state(peer, STATE_OPENCONFIRM, event); 762 break; 763 case EVNT_RCVD_NOTIFICATION: 764 if (parse_notification(peer)) { 765 change_state(peer, STATE_IDLE, event); 766 /* don't punish, capa negotiation */ 767 timer_set(peer, Timer_IdleHold, 0); 768 peer->IdleHoldTime /= 2; 769 } else 770 change_state(peer, STATE_IDLE, event); 771 break; 772 default: 773 session_notification(peer, 774 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); 775 change_state(peer, STATE_IDLE, event); 776 break; 777 } 778 break; 779 case STATE_OPENCONFIRM: 780 switch (event) { 781 case EVNT_START: 782 /* ignore */ 783 break; 784 case EVNT_STOP: 785 change_state(peer, STATE_IDLE, event); 786 break; 787 case EVNT_CON_CLOSED: 788 case EVNT_CON_FATAL: 789 change_state(peer, STATE_IDLE, event); 790 break; 791 case EVNT_TIMER_HOLDTIME: 792 session_notification(peer, ERR_HOLDTIMEREXPIRED, 793 0, NULL, 0); 794 change_state(peer, STATE_IDLE, event); 795 break; 796 case EVNT_TIMER_KEEPALIVE: 797 session_keepalive(peer); 798 break; 799 case EVNT_RCVD_KEEPALIVE: 800 start_timer_holdtime(peer); 801 change_state(peer, STATE_ESTABLISHED, event); 802 break; 803 case EVNT_RCVD_NOTIFICATION: 804 parse_notification(peer); 805 change_state(peer, STATE_IDLE, event); 806 break; 807 default: 808 session_notification(peer, 809 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); 810 change_state(peer, STATE_IDLE, event); 811 break; 812 } 813 break; 814 case STATE_ESTABLISHED: 815 switch (event) { 816 case EVNT_START: 817 /* ignore */ 818 break; 819 case EVNT_STOP: 820 change_state(peer, STATE_IDLE, event); 821 break; 822 case EVNT_CON_CLOSED: 823 case EVNT_CON_FATAL: 824 change_state(peer, STATE_IDLE, event); 825 break; 826 case EVNT_TIMER_HOLDTIME: 827 session_notification(peer, ERR_HOLDTIMEREXPIRED, 828 0, NULL, 0); 829 change_state(peer, STATE_IDLE, event); 830 break; 831 case EVNT_TIMER_KEEPALIVE: 832 session_keepalive(peer); 833 break; 834 case EVNT_RCVD_KEEPALIVE: 835 start_timer_holdtime(peer); 836 break; 837 case EVNT_RCVD_UPDATE: 838 start_timer_holdtime(peer); 839 if (parse_update(peer)) 840 change_state(peer, STATE_IDLE, event); 841 else 842 start_timer_holdtime(peer); 843 break; 844 case EVNT_RCVD_NOTIFICATION: 845 parse_notification(peer); 846 change_state(peer, STATE_IDLE, event); 847 break; 848 default: 849 session_notification(peer, 850 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); 851 change_state(peer, STATE_IDLE, event); 852 break; 853 } 854 break; 855 } 856 } 857 858 void 859 start_timer_holdtime(struct peer *peer) 860 { 861 if (peer->holdtime > 0) 862 timer_set(peer, Timer_Hold, peer->holdtime); 863 else 864 timer_stop(peer, Timer_Hold); 865 } 866 867 void 868 start_timer_keepalive(struct peer *peer) 869 { 870 if (peer->holdtime > 0) 871 timer_set(peer, Timer_Keepalive, peer->holdtime / 3); 872 else 873 timer_stop(peer, Timer_Keepalive); 874 } 875 876 void 877 session_close_connection(struct peer *peer) 878 { 879 if (peer->fd != -1) { 880 close(peer->fd); 881 pauseaccept = 0; 882 } 883 peer->fd = peer->wbuf.fd = -1; 884 } 885 886 void 887 change_state(struct peer *peer, enum session_state state, 888 enum session_events event) 889 { 890 struct mrt *mrt; 891 892 switch (state) { 893 case STATE_IDLE: 894 /* carp demotion first. new peers handled in init_peer */ 895 if (peer->state == STATE_ESTABLISHED && 896 peer->conf.demote_group[0] && !peer->demoted) 897 session_demote(peer, +1); 898 899 /* 900 * try to write out what's buffered (maybe a notification), 901 * don't bother if it fails 902 */ 903 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 904 msgbuf_write(&peer->wbuf); 905 906 /* 907 * we must start the timer for the next EVNT_START 908 * if we are coming here due to an error and the 909 * session was not established successfully before, the 910 * starttimerinterval needs to be exponentially increased 911 */ 912 if (peer->IdleHoldTime == 0) 913 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 914 peer->holdtime = INTERVAL_HOLD_INITIAL; 915 timer_stop(peer, Timer_ConnectRetry); 916 timer_stop(peer, Timer_Keepalive); 917 timer_stop(peer, Timer_Hold); 918 timer_stop(peer, Timer_IdleHold); 919 timer_stop(peer, Timer_IdleHoldReset); 920 session_close_connection(peer); 921 msgbuf_clear(&peer->wbuf); 922 free(peer->rbuf); 923 peer->rbuf = NULL; 924 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 925 926 if (event != EVNT_STOP) { 927 timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); 928 if (event != EVNT_NONE && 929 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 930 peer->IdleHoldTime *= 2; 931 } 932 if (peer->state == STATE_ESTABLISHED) { 933 if (peer->capa.neg.grestart.restart == 2 && 934 (event == EVNT_CON_CLOSED || 935 event == EVNT_CON_FATAL)) { 936 /* don't punish graceful restart */ 937 timer_set(peer, Timer_IdleHold, 0); 938 peer->IdleHoldTime /= 2; 939 session_graceful_restart(peer); 940 } else 941 session_down(peer); 942 } 943 if (peer->state == STATE_NONE || 944 peer->state == STATE_ESTABLISHED) { 945 /* initialize capability negotiation structures */ 946 memcpy(&peer->capa.ann, &peer->conf.capabilities, 947 sizeof(peer->capa.ann)); 948 if (!peer->conf.announce_capa) 949 session_capa_ann_none(peer); 950 } 951 break; 952 case STATE_CONNECT: 953 if (peer->state == STATE_ESTABLISHED && 954 peer->capa.neg.grestart.restart == 2) { 955 /* do the graceful restart dance */ 956 session_graceful_restart(peer); 957 peer->holdtime = INTERVAL_HOLD_INITIAL; 958 timer_stop(peer, Timer_ConnectRetry); 959 timer_stop(peer, Timer_Keepalive); 960 timer_stop(peer, Timer_Hold); 961 timer_stop(peer, Timer_IdleHold); 962 timer_stop(peer, Timer_IdleHoldReset); 963 session_close_connection(peer); 964 msgbuf_clear(&peer->wbuf); 965 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 966 } 967 break; 968 case STATE_ACTIVE: 969 break; 970 case STATE_OPENSENT: 971 break; 972 case STATE_OPENCONFIRM: 973 break; 974 case STATE_ESTABLISHED: 975 timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime); 976 if (peer->demoted) 977 timer_set(peer, Timer_CarpUndemote, 978 INTERVAL_HOLD_DEMOTED); 979 session_up(peer); 980 break; 981 default: /* something seriously fucked */ 982 break; 983 } 984 985 log_statechange(peer, state, event); 986 LIST_FOREACH(mrt, &mrthead, entry) { 987 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 988 continue; 989 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 990 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 991 mrt->group_id == peer->conf.groupid)) 992 mrt_dump_state(mrt, peer->state, state, peer); 993 } 994 peer->prev_state = peer->state; 995 peer->state = state; 996 } 997 998 void 999 session_accept(int listenfd) 1000 { 1001 int connfd; 1002 int opt; 1003 socklen_t len; 1004 struct sockaddr_storage cliaddr; 1005 struct peer *p = NULL; 1006 1007 len = sizeof(cliaddr); 1008 if ((connfd = accept4(listenfd, 1009 (struct sockaddr *)&cliaddr, &len, 1010 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 1011 if (errno == ENFILE || errno == EMFILE) 1012 pauseaccept = getmonotime(); 1013 else if (errno != EWOULDBLOCK && errno != EINTR && 1014 errno != ECONNABORTED) 1015 log_warn("accept"); 1016 return; 1017 } 1018 1019 p = getpeerbyip((struct sockaddr *)&cliaddr); 1020 1021 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 1022 if (timer_running(p, Timer_IdleHold, NULL)) { 1023 /* fast reconnect after clear */ 1024 p->passive = 1; 1025 bgp_fsm(p, EVNT_START); 1026 } 1027 } 1028 1029 if (p != NULL && 1030 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1031 if (p->fd != -1) { 1032 if (p->state == STATE_CONNECT) 1033 session_close_connection(p); 1034 else { 1035 close(connfd); 1036 return; 1037 } 1038 } 1039 1040 open: 1041 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1042 log_peer_warnx(&p->conf, 1043 "ipsec or md5sig configured but not available"); 1044 close(connfd); 1045 return; 1046 } 1047 1048 if (p->conf.auth.method == AUTH_MD5SIG) { 1049 if (sysdep.no_md5sig) { 1050 log_peer_warnx(&p->conf, 1051 "md5sig configured but not available"); 1052 close(connfd); 1053 return; 1054 } 1055 len = sizeof(opt); 1056 if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG, 1057 &opt, &len) == -1) 1058 fatal("getsockopt TCP_MD5SIG"); 1059 if (!opt) { /* non-md5'd connection! */ 1060 log_peer_warnx(&p->conf, 1061 "connection attempt without md5 signature"); 1062 close(connfd); 1063 return; 1064 } 1065 } 1066 p->fd = p->wbuf.fd = connfd; 1067 if (session_setup_socket(p)) { 1068 close(connfd); 1069 return; 1070 } 1071 bgp_fsm(p, EVNT_CON_OPEN); 1072 return; 1073 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1074 p->capa.neg.grestart.restart == 2) { 1075 /* first do the graceful restart dance */ 1076 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1077 /* then do part of the open dance */ 1078 goto open; 1079 } else { 1080 log_conn_attempt(p, (struct sockaddr *)&cliaddr); 1081 close(connfd); 1082 } 1083 } 1084 1085 int 1086 session_connect(struct peer *peer) 1087 { 1088 int opt = 1; 1089 struct sockaddr *sa; 1090 1091 /* 1092 * we do not need the overcomplicated collision detection RFC 1771 1093 * describes; we simply make sure there is only ever one concurrent 1094 * tcp connection per peer. 1095 */ 1096 if (peer->fd != -1) 1097 return (-1); 1098 1099 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1100 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1101 log_peer_warn(&peer->conf, "session_connect socket"); 1102 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1103 return (-1); 1104 } 1105 1106 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1107 log_peer_warnx(&peer->conf, 1108 "ipsec or md5sig configured but not available"); 1109 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1110 return (-1); 1111 } 1112 1113 if (peer->conf.auth.method == AUTH_MD5SIG) { 1114 if (sysdep.no_md5sig) { 1115 log_peer_warnx(&peer->conf, 1116 "md5sig configured but not available"); 1117 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1118 return (-1); 1119 } 1120 if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG, 1121 &opt, sizeof(opt)) == -1) { 1122 log_peer_warn(&peer->conf, "setsockopt md5sig"); 1123 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1124 return (-1); 1125 } 1126 } 1127 peer->wbuf.fd = peer->fd; 1128 1129 /* if update source is set we need to bind() */ 1130 if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) { 1131 if (bind(peer->fd, sa, sa->sa_len) == -1) { 1132 log_peer_warn(&peer->conf, "session_connect bind"); 1133 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1134 return (-1); 1135 } 1136 } 1137 1138 if (session_setup_socket(peer)) { 1139 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1140 return (-1); 1141 } 1142 1143 sa = addr2sa(&peer->conf.remote_addr, BGP_PORT); 1144 if (connect(peer->fd, sa, sa->sa_len) == -1) { 1145 if (errno != EINPROGRESS) { 1146 if (errno != peer->lasterr) 1147 log_peer_warn(&peer->conf, "connect"); 1148 peer->lasterr = errno; 1149 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1150 return (-1); 1151 } 1152 } else 1153 bgp_fsm(peer, EVNT_CON_OPEN); 1154 1155 return (0); 1156 } 1157 1158 int 1159 session_setup_socket(struct peer *p) 1160 { 1161 int ttl = p->conf.distance; 1162 int pre = IPTOS_PREC_INTERNETCONTROL; 1163 int nodelay = 1; 1164 int bsize; 1165 1166 switch (p->conf.remote_addr.aid) { 1167 case AID_INET: 1168 /* set precedence, see RFC 1771 appendix 5 */ 1169 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1170 -1) { 1171 log_peer_warn(&p->conf, 1172 "session_setup_socket setsockopt TOS"); 1173 return (-1); 1174 } 1175 1176 if (p->conf.ebgp) { 1177 /* set TTL to foreign router's distance 1178 1=direct n=multihop with ttlsec, we always use 255 */ 1179 if (p->conf.ttlsec) { 1180 ttl = 256 - p->conf.distance; 1181 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1182 &ttl, sizeof(ttl)) == -1) { 1183 log_peer_warn(&p->conf, 1184 "session_setup_socket: " 1185 "setsockopt MINTTL"); 1186 return (-1); 1187 } 1188 ttl = 255; 1189 } 1190 1191 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1192 sizeof(ttl)) == -1) { 1193 log_peer_warn(&p->conf, 1194 "session_setup_socket setsockopt TTL"); 1195 return (-1); 1196 } 1197 } 1198 break; 1199 case AID_INET6: 1200 if (p->conf.ebgp) { 1201 /* set hoplimit to foreign router's distance 1202 1=direct n=multihop with ttlsec, we always use 255 */ 1203 if (p->conf.ttlsec) { 1204 ttl = 256 - p->conf.distance; 1205 if (setsockopt(p->fd, IPPROTO_IPV6, 1206 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1207 == -1) { 1208 log_peer_warn(&p->conf, 1209 "session_setup_socket: " 1210 "setsockopt MINHOPCOUNT"); 1211 return (-1); 1212 } 1213 ttl = 255; 1214 } 1215 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1216 &ttl, sizeof(ttl)) == -1) { 1217 log_peer_warn(&p->conf, 1218 "session_setup_socket setsockopt hoplimit"); 1219 return (-1); 1220 } 1221 } 1222 break; 1223 } 1224 1225 /* set TCP_NODELAY */ 1226 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1227 sizeof(nodelay)) == -1) { 1228 log_peer_warn(&p->conf, 1229 "session_setup_socket setsockopt TCP_NODELAY"); 1230 return (-1); 1231 } 1232 1233 /* only increase bufsize (and thus window) if md5 or ipsec is in use */ 1234 if (p->conf.auth.method != AUTH_NONE) { 1235 /* try to increase bufsize. no biggie if it fails */ 1236 bsize = 65535; 1237 while (bsize > 8192 && 1238 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, 1239 sizeof(bsize)) == -1 && errno != EINVAL) 1240 bsize /= 2; 1241 bsize = 65535; 1242 while (bsize > 8192 && 1243 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, 1244 sizeof(bsize)) == -1 && errno != EINVAL) 1245 bsize /= 2; 1246 } 1247 1248 return (0); 1249 } 1250 1251 void 1252 session_tcp_established(struct peer *peer) 1253 { 1254 socklen_t len; 1255 1256 len = sizeof(peer->sa_local); 1257 if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local, 1258 &len) == -1) 1259 log_warn("getsockname"); 1260 len = sizeof(peer->sa_remote); 1261 if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote, 1262 &len) == -1) 1263 log_warn("getpeername"); 1264 } 1265 1266 void 1267 session_capa_ann_none(struct peer *peer) 1268 { 1269 bzero(&peer->capa.ann, sizeof(peer->capa.ann)); 1270 } 1271 1272 int 1273 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len) 1274 { 1275 int errs = 0; 1276 1277 errs += ibuf_add(opb, &capa_code, sizeof(capa_code)); 1278 errs += ibuf_add(opb, &capa_len, sizeof(capa_len)); 1279 return (errs); 1280 } 1281 1282 int 1283 session_capa_add_mp(struct ibuf *buf, u_int8_t aid) 1284 { 1285 u_int8_t safi, pad = 0; 1286 u_int16_t afi; 1287 int errs = 0; 1288 1289 if (aid2afi(aid, &afi, &safi) == -1) 1290 fatalx("session_capa_add_mp: bad afi/safi pair"); 1291 afi = htons(afi); 1292 errs += ibuf_add(buf, &afi, sizeof(afi)); 1293 errs += ibuf_add(buf, &pad, sizeof(pad)); 1294 errs += ibuf_add(buf, &safi, sizeof(safi)); 1295 1296 return (errs); 1297 } 1298 1299 int 1300 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) 1301 { 1302 u_int errs = 0; 1303 u_int16_t afi; 1304 u_int8_t flags, safi; 1305 1306 if (aid2afi(aid, &afi, &safi)) { 1307 log_warn("session_capa_add_gr: bad AID"); 1308 return (1); 1309 } 1310 if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) 1311 flags = CAPA_GR_F_FLAG; 1312 else 1313 flags = 0; 1314 1315 afi = htons(afi); 1316 errs += ibuf_add(b, &afi, sizeof(afi)); 1317 errs += ibuf_add(b, &safi, sizeof(safi)); 1318 errs += ibuf_add(b, &flags, sizeof(flags)); 1319 1320 return (errs); 1321 } 1322 1323 struct bgp_msg * 1324 session_newmsg(enum msg_type msgtype, u_int16_t len) 1325 { 1326 struct bgp_msg *msg; 1327 struct msg_header hdr; 1328 struct ibuf *buf; 1329 int errs = 0; 1330 1331 memset(&hdr.marker, 0xff, sizeof(hdr.marker)); 1332 hdr.len = htons(len); 1333 hdr.type = msgtype; 1334 1335 if ((buf = ibuf_open(len)) == NULL) 1336 return (NULL); 1337 1338 errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker)); 1339 errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len)); 1340 errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type)); 1341 1342 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1343 ibuf_free(buf); 1344 return (NULL); 1345 } 1346 1347 msg->buf = buf; 1348 msg->type = msgtype; 1349 msg->len = len; 1350 1351 return (msg); 1352 } 1353 1354 int 1355 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1356 { 1357 struct mrt *mrt; 1358 1359 LIST_FOREACH(mrt, &mrthead, entry) { 1360 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1361 mrt->type == MRT_UPDATE_OUT))) 1362 continue; 1363 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1364 mrt->peer_id == p->conf.id || (mrt->group_id == 0 && 1365 mrt->group_id == p->conf.groupid)) 1366 mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p); 1367 } 1368 1369 ibuf_close(&p->wbuf, msg->buf); 1370 free(msg); 1371 return (0); 1372 } 1373 1374 void 1375 session_open(struct peer *p) 1376 { 1377 struct bgp_msg *buf; 1378 struct ibuf *opb; 1379 struct msg_open msg; 1380 u_int16_t len; 1381 u_int8_t i, op_type, optparamlen = 0; 1382 int errs = 0; 1383 int mpcapa = 0; 1384 1385 1386 if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - 1387 sizeof(optparamlen))) == NULL) { 1388 bgp_fsm(p, EVNT_CON_FATAL); 1389 return; 1390 } 1391 1392 /* multiprotocol extensions, RFC 4760 */ 1393 for (i = 0; i < AID_MAX; i++) 1394 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1395 errs += session_capa_add(opb, CAPA_MP, 4); 1396 errs += session_capa_add_mp(opb, i); 1397 mpcapa++; 1398 } 1399 1400 /* route refresh, RFC 2918 */ 1401 if (p->capa.ann.refresh) /* no data */ 1402 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1403 1404 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1405 if (p->capa.ann.grestart.restart) { 1406 int rst = 0; 1407 u_int16_t hdr; 1408 u_int8_t grlen; 1409 1410 if (mpcapa) { 1411 grlen = 2 + 4 * mpcapa; 1412 for (i = 0; i < AID_MAX; i++) { 1413 if (p->capa.neg.grestart.flags[i] & 1414 CAPA_GR_RESTARTING) 1415 rst++; 1416 } 1417 } else { /* AID_INET */ 1418 grlen = 2 + 4; 1419 if (p->capa.neg.grestart.flags[AID_INET] & 1420 CAPA_GR_RESTARTING) 1421 rst++; 1422 } 1423 1424 hdr = conf->holdtime; /* default timeout */ 1425 /* if client does graceful restart don't set R flag */ 1426 if (!rst) 1427 hdr |= CAPA_GR_R_FLAG; 1428 hdr = htons(hdr); 1429 1430 errs += session_capa_add(opb, CAPA_RESTART, grlen); 1431 errs += ibuf_add(opb, &hdr, sizeof(hdr)); 1432 1433 if (mpcapa) { 1434 for (i = 0; i < AID_MAX; i++) { 1435 if (p->capa.ann.mp[i]) { 1436 errs += session_capa_add_gr(p, opb, i); 1437 } 1438 } 1439 } else { /* AID_INET */ 1440 errs += session_capa_add_gr(p, opb, AID_INET); 1441 } 1442 } 1443 1444 /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ 1445 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1446 u_int32_t nas; 1447 1448 nas = htonl(conf->as); 1449 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); 1450 errs += ibuf_add(opb, &nas, sizeof(nas)); 1451 } 1452 1453 if (ibuf_size(opb)) 1454 optparamlen = ibuf_size(opb) + sizeof(op_type) + 1455 sizeof(optparamlen); 1456 1457 len = MSGSIZE_OPEN_MIN + optparamlen; 1458 if (errs || (buf = session_newmsg(OPEN, len)) == NULL) { 1459 ibuf_free(opb); 1460 bgp_fsm(p, EVNT_CON_FATAL); 1461 return; 1462 } 1463 1464 msg.version = 4; 1465 msg.myas = htons(conf->short_as); 1466 if (p->conf.holdtime) 1467 msg.holdtime = htons(p->conf.holdtime); 1468 else 1469 msg.holdtime = htons(conf->holdtime); 1470 msg.bgpid = conf->bgpid; /* is already in network byte order */ 1471 msg.optparamlen = optparamlen; 1472 1473 errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version)); 1474 errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas)); 1475 errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); 1476 errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); 1477 errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen)); 1478 1479 if (optparamlen) { 1480 op_type = OPT_PARAM_CAPABILITIES; 1481 optparamlen = ibuf_size(opb); 1482 errs += ibuf_add(buf->buf, &op_type, sizeof(op_type)); 1483 errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen)); 1484 errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); 1485 } 1486 1487 ibuf_free(opb); 1488 1489 if (errs) { 1490 ibuf_free(buf->buf); 1491 free(buf); 1492 bgp_fsm(p, EVNT_CON_FATAL); 1493 return; 1494 } 1495 1496 if (session_sendmsg(buf, p) == -1) { 1497 bgp_fsm(p, EVNT_CON_FATAL); 1498 return; 1499 } 1500 1501 p->stats.msg_sent_open++; 1502 } 1503 1504 void 1505 session_keepalive(struct peer *p) 1506 { 1507 struct bgp_msg *buf; 1508 1509 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1510 session_sendmsg(buf, p) == -1) { 1511 bgp_fsm(p, EVNT_CON_FATAL); 1512 return; 1513 } 1514 1515 start_timer_keepalive(p); 1516 p->stats.msg_sent_keepalive++; 1517 } 1518 1519 void 1520 session_update(u_int32_t peerid, void *data, size_t datalen) 1521 { 1522 struct peer *p; 1523 struct bgp_msg *buf; 1524 1525 if ((p = getpeerbyid(peerid)) == NULL) { 1526 log_warnx("no such peer: id=%u", peerid); 1527 return; 1528 } 1529 1530 if (p->state != STATE_ESTABLISHED) 1531 return; 1532 1533 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) { 1534 bgp_fsm(p, EVNT_CON_FATAL); 1535 return; 1536 } 1537 1538 if (ibuf_add(buf->buf, data, datalen)) { 1539 ibuf_free(buf->buf); 1540 free(buf); 1541 bgp_fsm(p, EVNT_CON_FATAL); 1542 return; 1543 } 1544 1545 if (session_sendmsg(buf, p) == -1) { 1546 bgp_fsm(p, EVNT_CON_FATAL); 1547 return; 1548 } 1549 1550 start_timer_keepalive(p); 1551 p->stats.msg_sent_update++; 1552 } 1553 1554 void 1555 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode, 1556 void *data, ssize_t datalen) 1557 { 1558 struct bgp_msg *buf; 1559 int errs = 0; 1560 1561 if (p->stats.last_sent_errcode) /* some notification already sent */ 1562 return; 1563 1564 log_notification(p, errcode, subcode, data, datalen, "sending"); 1565 1566 if ((buf = session_newmsg(NOTIFICATION, 1567 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1568 bgp_fsm(p, EVNT_CON_FATAL); 1569 return; 1570 } 1571 1572 errs += ibuf_add(buf->buf, &errcode, sizeof(errcode)); 1573 errs += ibuf_add(buf->buf, &subcode, sizeof(subcode)); 1574 1575 if (datalen > 0) 1576 errs += ibuf_add(buf->buf, data, datalen); 1577 1578 if (errs) { 1579 ibuf_free(buf->buf); 1580 free(buf); 1581 bgp_fsm(p, EVNT_CON_FATAL); 1582 return; 1583 } 1584 1585 if (session_sendmsg(buf, p) == -1) { 1586 bgp_fsm(p, EVNT_CON_FATAL); 1587 return; 1588 } 1589 1590 p->stats.msg_sent_notification++; 1591 p->stats.last_sent_errcode = errcode; 1592 p->stats.last_sent_suberr = subcode; 1593 } 1594 1595 int 1596 session_neighbor_rrefresh(struct peer *p) 1597 { 1598 u_int8_t i; 1599 1600 if (!p->capa.peer.refresh) 1601 return (-1); 1602 1603 for (i = 0; i < AID_MAX; i++) { 1604 if (p->capa.peer.mp[i] != 0) 1605 session_rrefresh(p, i); 1606 } 1607 1608 return (0); 1609 } 1610 1611 void 1612 session_rrefresh(struct peer *p, u_int8_t aid) 1613 { 1614 struct bgp_msg *buf; 1615 int errs = 0; 1616 u_int16_t afi; 1617 u_int8_t safi, null8 = 0; 1618 1619 if (aid2afi(aid, &afi, &safi) == -1) 1620 fatalx("session_rrefresh: bad afi/safi pair"); 1621 1622 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1623 bgp_fsm(p, EVNT_CON_FATAL); 1624 return; 1625 } 1626 1627 afi = htons(afi); 1628 errs += ibuf_add(buf->buf, &afi, sizeof(afi)); 1629 errs += ibuf_add(buf->buf, &null8, sizeof(null8)); 1630 errs += ibuf_add(buf->buf, &safi, sizeof(safi)); 1631 1632 if (errs) { 1633 ibuf_free(buf->buf); 1634 free(buf); 1635 bgp_fsm(p, EVNT_CON_FATAL); 1636 return; 1637 } 1638 1639 if (session_sendmsg(buf, p) == -1) { 1640 bgp_fsm(p, EVNT_CON_FATAL); 1641 return; 1642 } 1643 1644 p->stats.msg_sent_rrefresh++; 1645 } 1646 1647 int 1648 session_graceful_restart(struct peer *p) 1649 { 1650 u_int8_t i; 1651 1652 timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); 1653 1654 for (i = 0; i < AID_MAX; i++) { 1655 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1656 if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE, 1657 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 1658 return (-1); 1659 log_peer_warnx(&p->conf, 1660 "graceful restart of %s, keeping routes", 1661 aid2str(i)); 1662 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1663 } else if (p->capa.neg.mp[i]) { 1664 if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, 1665 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 1666 return (-1); 1667 log_peer_warnx(&p->conf, 1668 "graceful restart of %s, flushing routes", 1669 aid2str(i)); 1670 } 1671 } 1672 return (0); 1673 } 1674 1675 int 1676 session_graceful_stop(struct peer *p) 1677 { 1678 u_int8_t i; 1679 1680 for (i = 0; i < AID_MAX; i++) { 1681 /* 1682 * Only flush if the peer is restarting and the timeout fired. 1683 * In all other cases the session was already flushed when the 1684 * session went down or when the new open message was parsed. 1685 */ 1686 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1687 log_peer_warnx(&p->conf, "graceful restart of %s, " 1688 "time-out, flushing", aid2str(i)); 1689 if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, 1690 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 1691 return (-1); 1692 } 1693 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1694 } 1695 return (0); 1696 } 1697 1698 int 1699 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1700 { 1701 ssize_t n; 1702 socklen_t len; 1703 int error; 1704 1705 if (p->state == STATE_CONNECT) { 1706 if (pfd->revents & POLLOUT) { 1707 if (pfd->revents & POLLIN) { 1708 /* error occurred */ 1709 len = sizeof(error); 1710 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1711 &error, &len) == -1 || error) { 1712 if (error) 1713 errno = error; 1714 if (errno != p->lasterr) { 1715 log_peer_warn(&p->conf, 1716 "socket error"); 1717 p->lasterr = errno; 1718 } 1719 bgp_fsm(p, EVNT_CON_OPENFAIL); 1720 return (1); 1721 } 1722 } 1723 bgp_fsm(p, EVNT_CON_OPEN); 1724 return (1); 1725 } 1726 if (pfd->revents & POLLHUP) { 1727 bgp_fsm(p, EVNT_CON_OPENFAIL); 1728 return (1); 1729 } 1730 if (pfd->revents & (POLLERR|POLLNVAL)) { 1731 bgp_fsm(p, EVNT_CON_FATAL); 1732 return (1); 1733 } 1734 return (0); 1735 } 1736 1737 if (pfd->revents & POLLHUP) { 1738 bgp_fsm(p, EVNT_CON_CLOSED); 1739 return (1); 1740 } 1741 if (pfd->revents & (POLLERR|POLLNVAL)) { 1742 bgp_fsm(p, EVNT_CON_FATAL); 1743 return (1); 1744 } 1745 1746 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1747 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1748 if (error == 0) 1749 log_peer_warnx(&p->conf, "Connection closed"); 1750 else if (error == -1) 1751 log_peer_warn(&p->conf, "write error"); 1752 bgp_fsm(p, EVNT_CON_FATAL); 1753 return (1); 1754 } 1755 if (!(pfd->revents & POLLIN)) 1756 return (1); 1757 } 1758 1759 if (p->rbuf && pfd->revents & POLLIN) { 1760 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1761 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1762 if (errno != EINTR && errno != EAGAIN) { 1763 log_peer_warn(&p->conf, "read error"); 1764 bgp_fsm(p, EVNT_CON_FATAL); 1765 } 1766 return (1); 1767 } 1768 if (n == 0) { /* connection closed */ 1769 bgp_fsm(p, EVNT_CON_CLOSED); 1770 return (1); 1771 } 1772 1773 p->rbuf->wpos += n; 1774 p->stats.last_read = time(NULL); 1775 return (1); 1776 } 1777 return (0); 1778 } 1779 1780 int 1781 session_process_msg(struct peer *p) 1782 { 1783 ssize_t rpos, av, left; 1784 int processed = 0; 1785 u_int16_t msglen; 1786 u_int8_t msgtype; 1787 1788 rpos = 0; 1789 av = p->rbuf->wpos; 1790 1791 /* 1792 * session might drop to IDLE -> buffers deallocated 1793 * we MUST check rbuf != NULL before use 1794 */ 1795 for (;;) { 1796 if (rpos + MSGSIZE_HEADER > av) 1797 break; 1798 if (p->rbuf == NULL) 1799 break; 1800 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 1801 &msgtype) == -1) 1802 return (0); 1803 if (rpos + msglen > av) 1804 break; 1805 p->rbuf->rptr = p->rbuf->buf + rpos; 1806 1807 switch (msgtype) { 1808 case OPEN: 1809 bgp_fsm(p, EVNT_RCVD_OPEN); 1810 p->stats.msg_rcvd_open++; 1811 break; 1812 case UPDATE: 1813 bgp_fsm(p, EVNT_RCVD_UPDATE); 1814 p->stats.msg_rcvd_update++; 1815 break; 1816 case NOTIFICATION: 1817 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 1818 p->stats.msg_rcvd_notification++; 1819 break; 1820 case KEEPALIVE: 1821 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 1822 p->stats.msg_rcvd_keepalive++; 1823 break; 1824 case RREFRESH: 1825 parse_refresh(p); 1826 p->stats.msg_rcvd_rrefresh++; 1827 break; 1828 default: /* cannot happen */ 1829 session_notification(p, ERR_HEADER, ERR_HDR_TYPE, 1830 &msgtype, 1); 1831 log_warnx("received message with unknown type %u", 1832 msgtype); 1833 bgp_fsm(p, EVNT_CON_FATAL); 1834 } 1835 rpos += msglen; 1836 if (++processed > MSG_PROCESS_LIMIT) 1837 break; 1838 } 1839 if (p->rbuf == NULL) 1840 return (1); 1841 1842 if (rpos < av) { 1843 left = av - rpos; 1844 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 1845 p->rbuf->wpos = left; 1846 } else 1847 p->rbuf->wpos = 0; 1848 1849 return (1); 1850 } 1851 1852 int 1853 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type) 1854 { 1855 struct mrt *mrt; 1856 u_char *p; 1857 u_int16_t olen; 1858 static const u_int8_t marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff, 1859 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 1860 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 1861 1862 /* caller MUST make sure we are getting 19 bytes! */ 1863 p = data; 1864 if (memcmp(p, marker, sizeof(marker))) { 1865 log_peer_warnx(&peer->conf, "sync error"); 1866 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0); 1867 bgp_fsm(peer, EVNT_CON_FATAL); 1868 return (-1); 1869 } 1870 p += MSGSIZE_HEADER_MARKER; 1871 1872 memcpy(&olen, p, 2); 1873 *len = ntohs(olen); 1874 p += 2; 1875 memcpy(type, p, 1); 1876 1877 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 1878 log_peer_warnx(&peer->conf, 1879 "received message: illegal length: %u byte", *len); 1880 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1881 &olen, sizeof(olen)); 1882 bgp_fsm(peer, EVNT_CON_FATAL); 1883 return (-1); 1884 } 1885 1886 switch (*type) { 1887 case OPEN: 1888 if (*len < MSGSIZE_OPEN_MIN) { 1889 log_peer_warnx(&peer->conf, 1890 "received OPEN: illegal len: %u byte", *len); 1891 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1892 &olen, sizeof(olen)); 1893 bgp_fsm(peer, EVNT_CON_FATAL); 1894 return (-1); 1895 } 1896 break; 1897 case NOTIFICATION: 1898 if (*len < MSGSIZE_NOTIFICATION_MIN) { 1899 log_peer_warnx(&peer->conf, 1900 "received NOTIFICATION: illegal len: %u byte", 1901 *len); 1902 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1903 &olen, sizeof(olen)); 1904 bgp_fsm(peer, EVNT_CON_FATAL); 1905 return (-1); 1906 } 1907 break; 1908 case UPDATE: 1909 if (*len < MSGSIZE_UPDATE_MIN) { 1910 log_peer_warnx(&peer->conf, 1911 "received UPDATE: illegal len: %u byte", *len); 1912 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1913 &olen, sizeof(olen)); 1914 bgp_fsm(peer, EVNT_CON_FATAL); 1915 return (-1); 1916 } 1917 break; 1918 case KEEPALIVE: 1919 if (*len != MSGSIZE_KEEPALIVE) { 1920 log_peer_warnx(&peer->conf, 1921 "received KEEPALIVE: illegal len: %u byte", *len); 1922 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1923 &olen, sizeof(olen)); 1924 bgp_fsm(peer, EVNT_CON_FATAL); 1925 return (-1); 1926 } 1927 break; 1928 case RREFRESH: 1929 if (*len != MSGSIZE_RREFRESH) { 1930 log_peer_warnx(&peer->conf, 1931 "received RREFRESH: illegal len: %u byte", *len); 1932 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1933 &olen, sizeof(olen)); 1934 bgp_fsm(peer, EVNT_CON_FATAL); 1935 return (-1); 1936 } 1937 break; 1938 default: 1939 log_peer_warnx(&peer->conf, 1940 "received msg with unknown type %u", *type); 1941 session_notification(peer, ERR_HEADER, ERR_HDR_TYPE, 1942 type, 1); 1943 bgp_fsm(peer, EVNT_CON_FATAL); 1944 return (-1); 1945 } 1946 LIST_FOREACH(mrt, &mrthead, entry) { 1947 if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE && 1948 mrt->type == MRT_UPDATE_IN))) 1949 continue; 1950 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1951 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 1952 mrt->group_id == peer->conf.groupid)) 1953 mrt_dump_bgp_msg(mrt, data, *len, peer); 1954 } 1955 return (0); 1956 } 1957 1958 int 1959 parse_open(struct peer *peer) 1960 { 1961 u_char *p, *op_val; 1962 u_int8_t version, rversion; 1963 u_int16_t short_as, msglen; 1964 u_int16_t holdtime, oholdtime, myholdtime; 1965 u_int32_t as, bgpid; 1966 u_int8_t optparamlen, plen; 1967 u_int8_t op_type, op_len; 1968 1969 p = peer->rbuf->rptr; 1970 p += MSGSIZE_HEADER_MARKER; 1971 memcpy(&msglen, p, sizeof(msglen)); 1972 msglen = ntohs(msglen); 1973 1974 p = peer->rbuf->rptr; 1975 p += MSGSIZE_HEADER; /* header is already checked */ 1976 1977 memcpy(&version, p, sizeof(version)); 1978 p += sizeof(version); 1979 1980 if (version != BGP_VERSION) { 1981 log_peer_warnx(&peer->conf, 1982 "peer wants unrecognized version %u", version); 1983 if (version > BGP_VERSION) 1984 rversion = version - BGP_VERSION; 1985 else 1986 rversion = BGP_VERSION; 1987 session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION, 1988 &rversion, sizeof(rversion)); 1989 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 1990 return (-1); 1991 } 1992 1993 memcpy(&short_as, p, sizeof(short_as)); 1994 p += sizeof(short_as); 1995 as = peer->short_as = ntohs(short_as); 1996 1997 memcpy(&oholdtime, p, sizeof(oholdtime)); 1998 p += sizeof(oholdtime); 1999 2000 holdtime = ntohs(oholdtime); 2001 if (holdtime && holdtime < peer->conf.min_holdtime) { 2002 log_peer_warnx(&peer->conf, 2003 "peer requests unacceptable holdtime %u", holdtime); 2004 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, 2005 NULL, 0); 2006 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2007 return (-1); 2008 } 2009 2010 myholdtime = peer->conf.holdtime; 2011 if (!myholdtime) 2012 myholdtime = conf->holdtime; 2013 if (holdtime < myholdtime) 2014 peer->holdtime = holdtime; 2015 else 2016 peer->holdtime = myholdtime; 2017 2018 memcpy(&bgpid, p, sizeof(bgpid)); 2019 p += sizeof(bgpid); 2020 2021 /* check bgpid for validity - just disallow 0 */ 2022 if (ntohl(bgpid) == 0) { 2023 log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable", 2024 ntohl(bgpid)); 2025 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2026 NULL, 0); 2027 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2028 return (-1); 2029 } 2030 peer->remote_bgpid = bgpid; 2031 2032 memcpy(&optparamlen, p, sizeof(optparamlen)); 2033 p += sizeof(optparamlen); 2034 2035 if (optparamlen != msglen - MSGSIZE_OPEN_MIN) { 2036 log_peer_warnx(&peer->conf, 2037 "corrupt OPEN message received: length mismatch"); 2038 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2039 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2040 return (-1); 2041 } 2042 2043 plen = optparamlen; 2044 while (plen > 0) { 2045 if (plen < 2) { 2046 log_peer_warnx(&peer->conf, 2047 "corrupt OPEN message received, len wrong"); 2048 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2049 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2050 return (-1); 2051 } 2052 memcpy(&op_type, p, sizeof(op_type)); 2053 p += sizeof(op_type); 2054 plen -= sizeof(op_type); 2055 memcpy(&op_len, p, sizeof(op_len)); 2056 p += sizeof(op_len); 2057 plen -= sizeof(op_len); 2058 if (op_len > 0) { 2059 if (plen < op_len) { 2060 log_peer_warnx(&peer->conf, 2061 "corrupt OPEN message received, len wrong"); 2062 session_notification(peer, ERR_OPEN, 0, 2063 NULL, 0); 2064 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2065 return (-1); 2066 } 2067 op_val = p; 2068 p += op_len; 2069 plen -= op_len; 2070 } else 2071 op_val = NULL; 2072 2073 switch (op_type) { 2074 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2075 if (parse_capabilities(peer, op_val, op_len, 2076 &as) == -1) { 2077 session_notification(peer, ERR_OPEN, 0, 2078 NULL, 0); 2079 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2080 return (-1); 2081 } 2082 break; 2083 case OPT_PARAM_AUTH: /* deprecated */ 2084 default: 2085 /* 2086 * unsupported type 2087 * the RFCs tell us to leave the data section empty 2088 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT. 2089 * How the peer should know _which_ optional parameter 2090 * we don't support is beyond me. 2091 */ 2092 log_peer_warnx(&peer->conf, 2093 "received OPEN message with unsupported optional " 2094 "parameter: type %u", op_type); 2095 session_notification(peer, ERR_OPEN, ERR_OPEN_OPT, 2096 NULL, 0); 2097 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2098 timer_set(peer, Timer_IdleHold, 0); /* no punish */ 2099 peer->IdleHoldTime /= 2; 2100 return (-1); 2101 } 2102 } 2103 2104 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2105 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2106 peer->conf.remote_as = as; 2107 peer->conf.ebgp = (peer->conf.remote_as != conf->as); 2108 if (!peer->conf.ebgp) 2109 /* force enforce_as off for iBGP sessions */ 2110 peer->conf.enforce_as = ENFORCE_AS_OFF; 2111 } 2112 2113 if (peer->conf.remote_as != as) { 2114 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2115 log_as(as)); 2116 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0); 2117 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2118 return (-1); 2119 } 2120 2121 if (capa_neg_calc(peer) == -1) { 2122 log_peer_warnx(&peer->conf, 2123 "capability negotiation calculation failed"); 2124 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2125 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2126 return (-1); 2127 } 2128 2129 return (0); 2130 } 2131 2132 int 2133 parse_update(struct peer *peer) 2134 { 2135 u_char *p; 2136 u_int16_t datalen; 2137 2138 /* 2139 * we pass the message verbatim to the rde. 2140 * in case of errors the whole session is reset with a 2141 * notification anyway, we only need to know the peer 2142 */ 2143 p = peer->rbuf->rptr; 2144 p += MSGSIZE_HEADER_MARKER; 2145 memcpy(&datalen, p, sizeof(datalen)); 2146 datalen = ntohs(datalen); 2147 2148 p = peer->rbuf->rptr; 2149 p += MSGSIZE_HEADER; /* header is already checked */ 2150 datalen -= MSGSIZE_HEADER; 2151 2152 if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p, 2153 datalen) == -1) 2154 return (-1); 2155 2156 return (0); 2157 } 2158 2159 int 2160 parse_refresh(struct peer *peer) 2161 { 2162 u_char *p; 2163 u_int16_t afi; 2164 u_int8_t aid, safi; 2165 2166 p = peer->rbuf->rptr; 2167 p += MSGSIZE_HEADER; /* header is already checked */ 2168 2169 /* 2170 * We could check if we actually announced the capability but 2171 * as long as the message is correctly encoded we don't care. 2172 */ 2173 2174 /* afi, 2 byte */ 2175 memcpy(&afi, p, sizeof(afi)); 2176 afi = ntohs(afi); 2177 p += 2; 2178 /* reserved, 1 byte */ 2179 p += 1; 2180 /* safi, 1 byte */ 2181 memcpy(&safi, p, sizeof(safi)); 2182 2183 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2184 if (afi2aid(afi, safi, &aid) == -1) { 2185 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2186 "invalid afi/safi pair"); 2187 return (0); 2188 } 2189 2190 if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid, 2191 sizeof(aid)) == -1) 2192 return (-1); 2193 2194 return (0); 2195 } 2196 2197 int 2198 parse_notification(struct peer *peer) 2199 { 2200 u_char *p; 2201 u_int16_t datalen; 2202 u_int8_t errcode; 2203 u_int8_t subcode; 2204 u_int8_t capa_code; 2205 u_int8_t capa_len; 2206 u_int8_t i; 2207 2208 /* just log */ 2209 p = peer->rbuf->rptr; 2210 p += MSGSIZE_HEADER_MARKER; 2211 memcpy(&datalen, p, sizeof(datalen)); 2212 datalen = ntohs(datalen); 2213 2214 p = peer->rbuf->rptr; 2215 p += MSGSIZE_HEADER; /* header is already checked */ 2216 datalen -= MSGSIZE_HEADER; 2217 2218 memcpy(&errcode, p, sizeof(errcode)); 2219 p += sizeof(errcode); 2220 datalen -= sizeof(errcode); 2221 2222 memcpy(&subcode, p, sizeof(subcode)); 2223 p += sizeof(subcode); 2224 datalen -= sizeof(subcode); 2225 2226 log_notification(peer, errcode, subcode, p, datalen, "received"); 2227 peer->errcnt++; 2228 2229 if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { 2230 if (datalen == 0) { /* zebra likes to send those.. humbug */ 2231 log_peer_warnx(&peer->conf, "received \"unsupported " 2232 "capability\" notification without data part, " 2233 "disabling capability announcements altogether"); 2234 session_capa_ann_none(peer); 2235 } 2236 2237 while (datalen > 0) { 2238 if (datalen < 2) { 2239 log_peer_warnx(&peer->conf, 2240 "parse_notification: " 2241 "expect len >= 2, len is %u", datalen); 2242 return (-1); 2243 } 2244 memcpy(&capa_code, p, sizeof(capa_code)); 2245 p += sizeof(capa_code); 2246 datalen -= sizeof(capa_code); 2247 memcpy(&capa_len, p, sizeof(capa_len)); 2248 p += sizeof(capa_len); 2249 datalen -= sizeof(capa_len); 2250 if (datalen < capa_len) { 2251 log_peer_warnx(&peer->conf, 2252 "parse_notification: capa_len %u exceeds " 2253 "remaining msg length %u", capa_len, 2254 datalen); 2255 return (-1); 2256 } 2257 p += capa_len; 2258 datalen -= capa_len; 2259 switch (capa_code) { 2260 case CAPA_MP: 2261 for (i = 0; i < AID_MAX; i++) 2262 peer->capa.ann.mp[i] = 0; 2263 log_peer_warnx(&peer->conf, 2264 "disabling multiprotocol capability"); 2265 break; 2266 case CAPA_REFRESH: 2267 peer->capa.ann.refresh = 0; 2268 log_peer_warnx(&peer->conf, 2269 "disabling route refresh capability"); 2270 break; 2271 case CAPA_RESTART: 2272 peer->capa.ann.grestart.restart = 0; 2273 log_peer_warnx(&peer->conf, 2274 "disabling restart capability"); 2275 break; 2276 case CAPA_AS4BYTE: 2277 peer->capa.ann.as4byte = 0; 2278 log_peer_warnx(&peer->conf, 2279 "disabling 4-byte AS num capability"); 2280 break; 2281 default: /* should not happen... */ 2282 log_peer_warnx(&peer->conf, "received " 2283 "\"unsupported capability\" notification " 2284 "for unknown capability %u, disabling " 2285 "capability announcements altogether", 2286 capa_code); 2287 session_capa_ann_none(peer); 2288 break; 2289 } 2290 } 2291 2292 return (1); 2293 } 2294 2295 if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) { 2296 session_capa_ann_none(peer); 2297 return (1); 2298 } 2299 2300 return (0); 2301 } 2302 2303 int 2304 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) 2305 { 2306 u_char *capa_val; 2307 u_int32_t remote_as; 2308 u_int16_t len; 2309 u_int16_t afi; 2310 u_int16_t gr_header; 2311 u_int8_t safi; 2312 u_int8_t aid; 2313 u_int8_t gr_flags; 2314 u_int8_t capa_code; 2315 u_int8_t capa_len; 2316 u_int8_t i; 2317 2318 len = dlen; 2319 while (len > 0) { 2320 if (len < 2) { 2321 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2322 "length: %u, too short", len); 2323 return (-1); 2324 } 2325 memcpy(&capa_code, d, sizeof(capa_code)); 2326 d += sizeof(capa_code); 2327 len -= sizeof(capa_code); 2328 memcpy(&capa_len, d, sizeof(capa_len)); 2329 d += sizeof(capa_len); 2330 len -= sizeof(capa_len); 2331 if (capa_len > 0) { 2332 if (len < capa_len) { 2333 log_peer_warnx(&peer->conf, 2334 "Bad capabilities attr length: " 2335 "len %u smaller than capa_len %u", 2336 len, capa_len); 2337 return (-1); 2338 } 2339 capa_val = d; 2340 d += capa_len; 2341 len -= capa_len; 2342 } else 2343 capa_val = NULL; 2344 2345 switch (capa_code) { 2346 case CAPA_MP: /* RFC 4760 */ 2347 if (capa_len != 4) { 2348 log_peer_warnx(&peer->conf, 2349 "Bad multi protocol capability length: " 2350 "%u", capa_len); 2351 break; 2352 } 2353 memcpy(&afi, capa_val, sizeof(afi)); 2354 afi = ntohs(afi); 2355 memcpy(&safi, capa_val + 3, sizeof(safi)); 2356 if (afi2aid(afi, safi, &aid) == -1) { 2357 log_peer_warnx(&peer->conf, 2358 "Received multi protocol capability: " 2359 " unknown AFI %u, safi %u pair", 2360 afi, safi); 2361 break; 2362 } 2363 peer->capa.peer.mp[aid] = 1; 2364 break; 2365 case CAPA_REFRESH: 2366 peer->capa.peer.refresh = 1; 2367 break; 2368 case CAPA_RESTART: 2369 if (capa_len == 2) { 2370 /* peer only supports EoR marker */ 2371 peer->capa.peer.grestart.restart = 1; 2372 peer->capa.peer.grestart.timeout = 0; 2373 break; 2374 } else if (capa_len % 4 != 2) { 2375 log_peer_warnx(&peer->conf, 2376 "Bad graceful restart capability length: " 2377 "%u", capa_len); 2378 peer->capa.peer.grestart.restart = 0; 2379 peer->capa.peer.grestart.timeout = 0; 2380 break; 2381 } 2382 2383 memcpy(&gr_header, capa_val, sizeof(gr_header)); 2384 gr_header = ntohs(gr_header); 2385 peer->capa.peer.grestart.timeout = 2386 gr_header & CAPA_GR_TIMEMASK; 2387 if (peer->capa.peer.grestart.timeout == 0) { 2388 log_peer_warnx(&peer->conf, "Received " 2389 "graceful restart timeout is zero"); 2390 peer->capa.peer.grestart.restart = 0; 2391 break; 2392 } 2393 2394 for (i = 2; i <= capa_len - 4; i += 4) { 2395 memcpy(&afi, capa_val + i, sizeof(afi)); 2396 afi = ntohs(afi); 2397 memcpy(&safi, capa_val + i + 2, sizeof(safi)); 2398 if (afi2aid(afi, safi, &aid) == -1) { 2399 log_peer_warnx(&peer->conf, 2400 "Received graceful restart capa: " 2401 " unknown AFI %u, safi %u pair", 2402 afi, safi); 2403 continue; 2404 } 2405 memcpy(&gr_flags, capa_val + i + 3, 2406 sizeof(gr_flags)); 2407 peer->capa.peer.grestart.flags[aid] |= 2408 CAPA_GR_PRESENT; 2409 if (gr_flags & CAPA_GR_F_FLAG) 2410 peer->capa.peer.grestart.flags[aid] |= 2411 CAPA_GR_FORWARD; 2412 if (gr_header & CAPA_GR_R_FLAG) 2413 peer->capa.peer.grestart.flags[aid] |= 2414 CAPA_GR_RESTART; 2415 peer->capa.peer.grestart.restart = 2; 2416 } 2417 break; 2418 case CAPA_AS4BYTE: 2419 if (capa_len != 4) { 2420 log_peer_warnx(&peer->conf, 2421 "Bad AS4BYTE capability length: " 2422 "%u", capa_len); 2423 peer->capa.peer.as4byte = 0; 2424 break; 2425 } 2426 memcpy(&remote_as, capa_val, sizeof(remote_as)); 2427 *as = ntohl(remote_as); 2428 peer->capa.peer.as4byte = 1; 2429 break; 2430 default: 2431 break; 2432 } 2433 } 2434 2435 return (0); 2436 } 2437 2438 int 2439 capa_neg_calc(struct peer *p) 2440 { 2441 u_int8_t i, hasmp = 0; 2442 2443 /* refresh: does not realy matter here, use peer setting */ 2444 p->capa.neg.refresh = p->capa.peer.refresh; 2445 2446 /* as4byte: both side must announce capability */ 2447 if (p->capa.ann.as4byte && p->capa.peer.as4byte) 2448 p->capa.neg.as4byte = 1; 2449 else 2450 p->capa.neg.as4byte = 0; 2451 2452 /* MP: both side must announce capability */ 2453 for (i = 0; i < AID_MAX; i++) { 2454 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) { 2455 p->capa.neg.mp[i] = 1; 2456 hasmp = 1; 2457 } else 2458 p->capa.neg.mp[i] = 0; 2459 } 2460 /* if no MP capability present default to IPv4 unicast mode */ 2461 if (!hasmp) 2462 p->capa.neg.mp[AID_INET] = 1; 2463 2464 /* 2465 * graceful restart: only the peer capabilities are of interest here. 2466 * It is necessary to compare the new values with the previous ones 2467 * and act acordingly. AFI/SAFI that are not part in the MP capability 2468 * are treated as not being present. 2469 */ 2470 2471 for (i = 0; i < AID_MAX; i++) { 2472 int8_t negflags; 2473 2474 /* disable GR if the AFI/SAFI is not present */ 2475 if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2476 p->capa.neg.mp[i] == 0) 2477 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2478 /* look at current GR state and decide what to do */ 2479 negflags = p->capa.neg.grestart.flags[i]; 2480 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2481 if (negflags & CAPA_GR_RESTARTING) { 2482 if (!(p->capa.peer.grestart.flags[i] & 2483 CAPA_GR_FORWARD)) { 2484 if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, 2485 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 2486 return (-1); 2487 log_peer_warnx(&p->conf, "graceful restart of " 2488 "%s, not restarted, flushing", aid2str(i)); 2489 } else 2490 p->capa.neg.grestart.flags[i] |= 2491 CAPA_GR_RESTARTING; 2492 } 2493 } 2494 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2495 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2496 2497 return (0); 2498 } 2499 2500 void 2501 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) 2502 { 2503 struct imsg imsg; 2504 struct mrt xmrt; 2505 struct mrt *mrt; 2506 struct imsgbuf *i; 2507 struct peer_config *pconf; 2508 struct peer *p, *next; 2509 struct listen_addr *la, *nla; 2510 struct kif *kif; 2511 u_char *data; 2512 enum reconf_action reconf; 2513 int n, fd, depend_ok, restricted; 2514 u_int8_t aid, errcode, subcode; 2515 2516 while (ibuf) { 2517 if ((n = imsg_get(ibuf, &imsg)) == -1) 2518 fatal("session_dispatch_imsg: imsg_get error"); 2519 2520 if (n == 0) 2521 break; 2522 2523 switch (imsg.hdr.type) { 2524 case IMSG_SOCKET_CONN: 2525 case IMSG_SOCKET_CONN_CTL: 2526 if (idx != PFD_PIPE_MAIN) 2527 fatalx("reconf request not from parent"); 2528 if ((fd = imsg.fd) == -1) { 2529 log_warnx("expected to receive imsg fd to " 2530 "RDE but didn't receive any"); 2531 break; 2532 } 2533 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2534 fatal(NULL); 2535 imsg_init(i, fd); 2536 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 2537 if (ibuf_rde) { 2538 log_warnx("Unexpected imsg connection " 2539 "to RDE received"); 2540 msgbuf_clear(&ibuf_rde->w); 2541 free(ibuf_rde); 2542 } 2543 ibuf_rde = i; 2544 } else { 2545 if (ibuf_rde_ctl) { 2546 log_warnx("Unexpected imsg ctl " 2547 "connection to RDE received"); 2548 msgbuf_clear(&ibuf_rde_ctl->w); 2549 free(ibuf_rde_ctl); 2550 } 2551 ibuf_rde_ctl = i; 2552 } 2553 break; 2554 case IMSG_RECONF_CONF: 2555 if (idx != PFD_PIPE_MAIN) 2556 fatalx("reconf request not from parent"); 2557 if ((nconf = malloc(sizeof(struct bgpd_config))) == 2558 NULL) 2559 fatal(NULL); 2560 memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); 2561 if ((nconf->listen_addrs = calloc(1, 2562 sizeof(struct listen_addrs))) == NULL) 2563 fatal(NULL); 2564 TAILQ_INIT(nconf->listen_addrs); 2565 npeers = NULL; 2566 init_conf(nconf); 2567 pending_reconf = 1; 2568 break; 2569 case IMSG_RECONF_PEER: 2570 if (idx != PFD_PIPE_MAIN) 2571 fatalx("reconf request not from parent"); 2572 pconf = imsg.data; 2573 p = getpeerbyaddr(&pconf->remote_addr); 2574 if (p == NULL) { 2575 if ((p = calloc(1, sizeof(struct peer))) == 2576 NULL) 2577 fatal("new_peer"); 2578 p->state = p->prev_state = STATE_NONE; 2579 p->next = npeers; 2580 npeers = p; 2581 reconf = RECONF_REINIT; 2582 } else 2583 reconf = RECONF_KEEP; 2584 2585 memcpy(&p->conf, pconf, sizeof(struct peer_config)); 2586 p->conf.reconf_action = reconf; 2587 2588 /* sync the RDE in case we keep the peer */ 2589 if (reconf == RECONF_KEEP) { 2590 if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, 2591 p->conf.id, 0, -1, &p->conf, 2592 sizeof(struct peer_config)) == -1) 2593 fatalx("imsg_compose error"); 2594 if (p->conf.template) { 2595 /* apply the conf to all clones */ 2596 struct peer *np; 2597 for (np = peers; np; np = np->next) { 2598 if (np->template != p) 2599 continue; 2600 session_template_clone(np, 2601 NULL, np->conf.id, 2602 np->conf.remote_as); 2603 if (imsg_compose(ibuf_rde, 2604 IMSG_SESSION_ADD, 2605 np->conf.id, 0, -1, 2606 &np->conf, 2607 sizeof(struct peer_config)) 2608 == -1) 2609 fatalx("imsg_compose error"); 2610 } 2611 } 2612 } 2613 break; 2614 case IMSG_RECONF_LISTENER: 2615 if (idx != PFD_PIPE_MAIN) 2616 fatalx("reconf request not from parent"); 2617 if (nconf == NULL) 2618 fatalx("IMSG_RECONF_LISTENER but no config"); 2619 nla = imsg.data; 2620 TAILQ_FOREACH(la, conf->listen_addrs, entry) 2621 if (!la_cmp(la, nla)) 2622 break; 2623 2624 if (la == NULL) { 2625 if (nla->reconf != RECONF_REINIT) 2626 fatalx("king bula sez: " 2627 "expected REINIT"); 2628 2629 if ((nla->fd = imsg.fd) == -1) 2630 log_warnx("expected to receive fd for " 2631 "%s but didn't receive any", 2632 log_sockaddr((struct sockaddr *) 2633 &nla->sa)); 2634 2635 la = calloc(1, sizeof(struct listen_addr)); 2636 if (la == NULL) 2637 fatal(NULL); 2638 memcpy(&la->sa, &nla->sa, sizeof(la->sa)); 2639 la->flags = nla->flags; 2640 la->fd = nla->fd; 2641 la->reconf = RECONF_REINIT; 2642 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 2643 entry); 2644 } else { 2645 if (nla->reconf != RECONF_KEEP) 2646 fatalx("king bula sez: expected KEEP"); 2647 la->reconf = RECONF_KEEP; 2648 } 2649 2650 break; 2651 case IMSG_RECONF_CTRL: 2652 if (idx != PFD_PIPE_MAIN) 2653 fatalx("reconf request not from parent"); 2654 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2655 sizeof(restricted)) 2656 fatalx("IFINFO imsg with wrong len"); 2657 memcpy(&restricted, imsg.data, sizeof(restricted)); 2658 if (imsg.fd == -1) { 2659 log_warnx("expected to receive fd for control " 2660 "socket but didn't receive any"); 2661 break; 2662 } 2663 if (restricted) { 2664 control_shutdown(rcsock); 2665 rcsock = imsg.fd; 2666 } else { 2667 control_shutdown(csock); 2668 csock = imsg.fd; 2669 } 2670 break; 2671 case IMSG_RECONF_DONE: 2672 if (idx != PFD_PIPE_MAIN) 2673 fatalx("reconf request not from parent"); 2674 if (nconf == NULL) 2675 fatalx("got IMSG_RECONF_DONE but no config"); 2676 conf->flags = nconf->flags; 2677 conf->log = nconf->log; 2678 conf->bgpid = nconf->bgpid; 2679 conf->clusterid = nconf->clusterid; 2680 conf->as = nconf->as; 2681 conf->short_as = nconf->short_as; 2682 conf->holdtime = nconf->holdtime; 2683 conf->min_holdtime = nconf->min_holdtime; 2684 conf->connectretry = nconf->connectretry; 2685 2686 /* add new peers */ 2687 for (p = npeers; p != NULL; p = next) { 2688 next = p->next; 2689 p->next = peers; 2690 peers = p; 2691 } 2692 /* find ones that need attention */ 2693 for (p = peers; p != NULL; p = p->next) { 2694 /* needs to be deleted? */ 2695 if (p->conf.reconf_action == RECONF_NONE && 2696 !p->template) 2697 p->conf.reconf_action = RECONF_DELETE; 2698 /* had demotion, is demoted, demote removed? */ 2699 if (p->demoted && !p->conf.demote_group[0]) 2700 session_demote(p, -1); 2701 } 2702 2703 /* delete old listeners */ 2704 for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; 2705 la = nla) { 2706 nla = TAILQ_NEXT(la, entry); 2707 if (la->reconf == RECONF_NONE) { 2708 log_info("not listening on %s any more", 2709 log_sockaddr( 2710 (struct sockaddr *)&la->sa)); 2711 TAILQ_REMOVE(conf->listen_addrs, la, 2712 entry); 2713 close(la->fd); 2714 free(la); 2715 } 2716 } 2717 2718 /* add new listeners */ 2719 while ((la = TAILQ_FIRST(nconf->listen_addrs)) != 2720 NULL) { 2721 TAILQ_REMOVE(nconf->listen_addrs, la, entry); 2722 TAILQ_INSERT_TAIL(conf->listen_addrs, la, 2723 entry); 2724 } 2725 2726 setup_listeners(listener_cnt); 2727 free(nconf->listen_addrs); 2728 free(nconf); 2729 nconf = NULL; 2730 pending_reconf = 0; 2731 log_info("SE reconfigured"); 2732 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 2733 -1, NULL, 0); 2734 break; 2735 case IMSG_IFINFO: 2736 if (idx != PFD_PIPE_MAIN) 2737 fatalx("IFINFO message not from parent"); 2738 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2739 sizeof(struct kif)) 2740 fatalx("IFINFO imsg with wrong len"); 2741 kif = imsg.data; 2742 depend_ok = (kif->flags & IFF_UP) && 2743 LINK_STATE_IS_UP(kif->link_state); 2744 2745 for (p = peers; p != NULL; p = p->next) 2746 if (!strcmp(p->conf.if_depend, kif->ifname)) { 2747 if (depend_ok && !p->depend_ok) { 2748 p->depend_ok = depend_ok; 2749 bgp_fsm(p, EVNT_START); 2750 } else if (!depend_ok && p->depend_ok) { 2751 p->depend_ok = depend_ok; 2752 session_stop(p, 2753 ERR_CEASE_OTHER_CHANGE); 2754 } 2755 } 2756 break; 2757 case IMSG_MRT_OPEN: 2758 case IMSG_MRT_REOPEN: 2759 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2760 sizeof(struct mrt)) { 2761 log_warnx("wrong imsg len"); 2762 break; 2763 } 2764 2765 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2766 if ((xmrt.wbuf.fd = imsg.fd) == -1) 2767 log_warnx("expected to receive fd for mrt dump " 2768 "but didn't receive any"); 2769 2770 mrt = mrt_get(&mrthead, &xmrt); 2771 if (mrt == NULL) { 2772 /* new dump */ 2773 mrt = calloc(1, sizeof(struct mrt)); 2774 if (mrt == NULL) 2775 fatal("session_dispatch_imsg"); 2776 memcpy(mrt, &xmrt, sizeof(struct mrt)); 2777 TAILQ_INIT(&mrt->wbuf.bufs); 2778 LIST_INSERT_HEAD(&mrthead, mrt, entry); 2779 } else { 2780 /* old dump reopened */ 2781 close(mrt->wbuf.fd); 2782 mrt->wbuf.fd = xmrt.wbuf.fd; 2783 } 2784 break; 2785 case IMSG_MRT_CLOSE: 2786 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2787 sizeof(struct mrt)) { 2788 log_warnx("wrong imsg len"); 2789 break; 2790 } 2791 2792 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2793 mrt = mrt_get(&mrthead, &xmrt); 2794 if (mrt != NULL) 2795 mrt_done(mrt); 2796 break; 2797 case IMSG_CTL_KROUTE: 2798 case IMSG_CTL_KROUTE_ADDR: 2799 case IMSG_CTL_SHOW_NEXTHOP: 2800 case IMSG_CTL_SHOW_INTERFACE: 2801 case IMSG_CTL_SHOW_FIB_TABLES: 2802 if (idx != PFD_PIPE_MAIN) 2803 fatalx("ctl kroute request not from parent"); 2804 control_imsg_relay(&imsg); 2805 break; 2806 case IMSG_CTL_SHOW_RIB: 2807 case IMSG_CTL_SHOW_RIB_PREFIX: 2808 case IMSG_CTL_SHOW_RIB_ATTR: 2809 case IMSG_CTL_SHOW_RIB_MEM: 2810 case IMSG_CTL_SHOW_NETWORK: 2811 case IMSG_CTL_SHOW_NEIGHBOR: 2812 if (idx != PFD_PIPE_ROUTE_CTL) 2813 fatalx("ctl rib request not from RDE"); 2814 control_imsg_relay(&imsg); 2815 break; 2816 case IMSG_CTL_END: 2817 case IMSG_CTL_RESULT: 2818 control_imsg_relay(&imsg); 2819 break; 2820 case IMSG_UPDATE: 2821 if (idx != PFD_PIPE_ROUTE) 2822 fatalx("update request not from RDE"); 2823 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2824 MAX_PKTSIZE - MSGSIZE_HEADER || 2825 imsg.hdr.len < IMSG_HEADER_SIZE + 2826 MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 2827 log_warnx("RDE sent invalid update"); 2828 else 2829 session_update(imsg.hdr.peerid, imsg.data, 2830 imsg.hdr.len - IMSG_HEADER_SIZE); 2831 break; 2832 case IMSG_UPDATE_ERR: 2833 if (idx != PFD_PIPE_ROUTE) 2834 fatalx("update request not from RDE"); 2835 if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) { 2836 log_warnx("RDE sent invalid notification"); 2837 break; 2838 } 2839 if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { 2840 log_warnx("no such peer: id=%u", 2841 imsg.hdr.peerid); 2842 break; 2843 } 2844 data = imsg.data; 2845 errcode = *data++; 2846 subcode = *data++; 2847 2848 if (imsg.hdr.len == IMSG_HEADER_SIZE + 2) 2849 data = NULL; 2850 2851 session_notification(p, errcode, subcode, 2852 data, imsg.hdr.len - IMSG_HEADER_SIZE - 2); 2853 switch (errcode) { 2854 case ERR_CEASE: 2855 switch (subcode) { 2856 case ERR_CEASE_MAX_PREFIX: 2857 bgp_fsm(p, EVNT_STOP); 2858 if (p->conf.max_prefix_restart) 2859 timer_set(p, Timer_IdleHold, 60 * 2860 p->conf.max_prefix_restart); 2861 break; 2862 default: 2863 bgp_fsm(p, EVNT_CON_FATAL); 2864 break; 2865 } 2866 break; 2867 default: 2868 bgp_fsm(p, EVNT_CON_FATAL); 2869 break; 2870 } 2871 break; 2872 case IMSG_SESSION_RESTARTED: 2873 if (idx != PFD_PIPE_ROUTE) 2874 fatalx("update request not from RDE"); 2875 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { 2876 log_warnx("RDE sent invalid restart msg"); 2877 break; 2878 } 2879 if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { 2880 log_warnx("no such peer: id=%u", 2881 imsg.hdr.peerid); 2882 break; 2883 } 2884 memcpy(&aid, imsg.data, sizeof(aid)); 2885 if (aid >= AID_MAX) 2886 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 2887 if (p->capa.neg.grestart.flags[aid] & 2888 CAPA_GR_RESTARTING) { 2889 log_peer_warnx(&p->conf, 2890 "graceful restart of %s finished", 2891 aid2str(aid)); 2892 p->capa.neg.grestart.flags[aid] &= 2893 ~CAPA_GR_RESTARTING; 2894 timer_stop(p, Timer_RestartTimeout); 2895 2896 /* signal back to RDE to cleanup stale routes */ 2897 if (imsg_compose(ibuf_rde, 2898 IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0, 2899 -1, &aid, sizeof(aid)) == -1) 2900 fatal("imsg_compose: " 2901 "IMSG_SESSION_RESTARTED"); 2902 } 2903 break; 2904 case IMSG_SESSION_DOWN: 2905 if (idx != PFD_PIPE_ROUTE) 2906 fatalx("update request not from RDE"); 2907 if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { 2908 log_warnx("no such peer: id=%u", 2909 imsg.hdr.peerid); 2910 break; 2911 } 2912 session_stop(p, ERR_CEASE_ADMIN_DOWN); 2913 break; 2914 default: 2915 break; 2916 } 2917 imsg_free(&imsg); 2918 } 2919 } 2920 2921 int 2922 la_cmp(struct listen_addr *a, struct listen_addr *b) 2923 { 2924 struct sockaddr_in *in_a, *in_b; 2925 struct sockaddr_in6 *in6_a, *in6_b; 2926 2927 if (a->sa.ss_family != b->sa.ss_family) 2928 return (1); 2929 2930 switch (a->sa.ss_family) { 2931 case AF_INET: 2932 in_a = (struct sockaddr_in *)&a->sa; 2933 in_b = (struct sockaddr_in *)&b->sa; 2934 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 2935 return (1); 2936 if (in_a->sin_port != in_b->sin_port) 2937 return (1); 2938 break; 2939 case AF_INET6: 2940 in6_a = (struct sockaddr_in6 *)&a->sa; 2941 in6_b = (struct sockaddr_in6 *)&b->sa; 2942 if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 2943 sizeof(struct in6_addr))) 2944 return (1); 2945 if (in6_a->sin6_port != in6_b->sin6_port) 2946 return (1); 2947 break; 2948 default: 2949 fatal("king bula sez: unknown address family"); 2950 /* NOTREACHED */ 2951 } 2952 2953 return (0); 2954 } 2955 2956 struct peer * 2957 getpeerbyaddr(struct bgpd_addr *addr) 2958 { 2959 struct peer *p; 2960 2961 /* we might want a more effective way to find peers by IP */ 2962 for (p = peers; p != NULL && 2963 memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr)); 2964 p = p->next) 2965 ; /* nothing */ 2966 2967 return (p); 2968 } 2969 2970 struct peer * 2971 getpeerbydesc(const char *descr) 2972 { 2973 struct peer *p, *res = NULL; 2974 int match = 0; 2975 2976 for (p = peers; p != NULL; p = p->next) 2977 if (!strcmp(p->conf.descr, descr)) { 2978 res = p; 2979 match++; 2980 } 2981 2982 if (match > 1) 2983 log_info("neighbor description \"%s\" not unique, request " 2984 "aborted", descr); 2985 2986 if (match == 1) 2987 return (res); 2988 else 2989 return (NULL); 2990 } 2991 2992 struct peer * 2993 getpeerbyip(struct sockaddr *ip) 2994 { 2995 struct bgpd_addr addr; 2996 struct peer *p, *newpeer, *loose = NULL; 2997 u_int32_t id; 2998 2999 sa2addr(ip, &addr); 3000 3001 /* we might want a more effective way to find peers by IP */ 3002 for (p = peers; p != NULL; p = p->next) 3003 if (!p->conf.template && 3004 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3005 return (p); 3006 3007 /* try template matching */ 3008 for (p = peers; p != NULL; p = p->next) 3009 if (p->conf.template && 3010 p->conf.remote_addr.aid == addr.aid && 3011 session_match_mask(p, &addr)) 3012 if (loose == NULL || loose->conf.remote_masklen < 3013 p->conf.remote_masklen) 3014 loose = p; 3015 3016 if (loose != NULL) { 3017 /* clone */ 3018 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3019 fatal(NULL); 3020 memcpy(newpeer, loose, sizeof(struct peer)); 3021 for (id = UINT_MAX; id > UINT_MAX / 2; id--) { 3022 for (p = peers; p != NULL && p->conf.id != id; 3023 p = p->next) 3024 ; /* nothing */ 3025 if (p == NULL) { /* we found a free id */ 3026 break; 3027 } 3028 } 3029 newpeer->template = loose; 3030 session_template_clone(newpeer, ip, id, 0); 3031 newpeer->state = newpeer->prev_state = STATE_NONE; 3032 newpeer->conf.reconf_action = RECONF_KEEP; 3033 newpeer->rbuf = NULL; 3034 init_peer(newpeer); 3035 bgp_fsm(newpeer, EVNT_START); 3036 newpeer->next = peers; 3037 peers = newpeer; 3038 return (newpeer); 3039 } 3040 3041 return (NULL); 3042 } 3043 3044 void 3045 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id, 3046 u_int32_t as) 3047 { 3048 struct bgpd_addr remote_addr; 3049 3050 if (ip) 3051 sa2addr(ip, &remote_addr); 3052 else 3053 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3054 3055 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3056 3057 p->conf.id = id; 3058 3059 if (as) { 3060 p->conf.remote_as = as; 3061 p->conf.ebgp = (p->conf.remote_as != conf->as); 3062 if (!p->conf.ebgp) 3063 /* force enforce_as off for iBGP sessions */ 3064 p->conf.enforce_as = ENFORCE_AS_OFF; 3065 } 3066 3067 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3068 switch (p->conf.remote_addr.aid) { 3069 case AID_INET: 3070 p->conf.remote_masklen = 32; 3071 break; 3072 case AID_INET6: 3073 p->conf.remote_masklen = 128; 3074 break; 3075 } 3076 p->conf.template = 0; 3077 } 3078 3079 int 3080 session_match_mask(struct peer *p, struct bgpd_addr *a) 3081 { 3082 in_addr_t v4mask; 3083 struct in6_addr masked; 3084 3085 switch (p->conf.remote_addr.aid) { 3086 case AID_INET: 3087 v4mask = htonl(prefixlen2mask(p->conf.remote_masklen)); 3088 if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask)) 3089 return (1); 3090 return (0); 3091 case AID_INET6: 3092 inet6applymask(&masked, &a->v6, p->conf.remote_masklen); 3093 3094 if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked))) 3095 return (1); 3096 return (0); 3097 } 3098 return (0); 3099 } 3100 3101 struct peer * 3102 getpeerbyid(u_int32_t peerid) 3103 { 3104 struct peer *p; 3105 3106 /* we might want a more effective way to find peers by IP */ 3107 for (p = peers; p != NULL && 3108 p->conf.id != peerid; p = p->next) 3109 ; /* nothing */ 3110 3111 return (p); 3112 } 3113 3114 void 3115 session_down(struct peer *peer) 3116 { 3117 bzero(&peer->capa.neg, sizeof(peer->capa.neg)); 3118 peer->stats.last_updown = time(NULL); 3119 if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1, 3120 NULL, 0) == -1) 3121 fatalx("imsg_compose error"); 3122 } 3123 3124 void 3125 session_up(struct peer *p) 3126 { 3127 struct session_up sup; 3128 3129 if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, 3130 &p->conf, sizeof(p->conf)) == -1) 3131 fatalx("imsg_compose error"); 3132 3133 sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr); 3134 sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr); 3135 3136 sup.remote_bgpid = p->remote_bgpid; 3137 sup.short_as = p->short_as; 3138 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3139 p->stats.last_updown = time(NULL); 3140 if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1, 3141 &sup, sizeof(sup)) == -1) 3142 fatalx("imsg_compose error"); 3143 } 3144 3145 int 3146 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data, 3147 u_int16_t datalen) 3148 { 3149 return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen)); 3150 } 3151 3152 int 3153 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen) 3154 { 3155 /* 3156 * Use control socket to talk to RDE to bypass the queue of the 3157 * regular imsg socket. 3158 */ 3159 return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen)); 3160 } 3161 3162 void 3163 session_demote(struct peer *p, int level) 3164 { 3165 struct demote_msg msg; 3166 3167 strlcpy(msg.demote_group, p->conf.demote_group, 3168 sizeof(msg.demote_group)); 3169 msg.level = level; 3170 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3171 &msg, sizeof(msg)) == -1) 3172 fatalx("imsg_compose error"); 3173 3174 p->demoted += level; 3175 } 3176 3177 void 3178 session_stop(struct peer *peer, u_int8_t subcode) 3179 { 3180 switch (peer->state) { 3181 case STATE_OPENSENT: 3182 case STATE_OPENCONFIRM: 3183 case STATE_ESTABLISHED: 3184 session_notification(peer, ERR_CEASE, subcode, NULL, 0); 3185 break; 3186 default: 3187 /* session not open, no need to send notification */ 3188 break; 3189 } 3190 bgp_fsm(peer, EVNT_STOP); 3191 } 3192