1 /* $OpenBSD: session.c,v 1.354 2016/09/03 16:22:17 renato Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 21 #include <sys/mman.h> 22 #include <sys/socket.h> 23 #include <sys/time.h> 24 #include <sys/resource.h> 25 #include <sys/un.h> 26 #include <net/if_types.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <poll.h> 37 #include <pwd.h> 38 #include <signal.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <unistd.h> 43 44 #include "bgpd.h" 45 #include "mrt.h" 46 #include "session.h" 47 48 #define PFD_PIPE_MAIN 0 49 #define PFD_PIPE_ROUTE 1 50 #define PFD_PIPE_ROUTE_CTL 2 51 #define PFD_SOCK_CTL 3 52 #define PFD_SOCK_RCTL 4 53 #define PFD_SOCK_PFKEY 5 54 #define PFD_LISTENERS_START 6 55 56 void session_sighdlr(int); 57 int setup_listeners(u_int *); 58 void init_conf(struct bgpd_config *); 59 void init_peer(struct peer *); 60 void start_timer_holdtime(struct peer *); 61 void start_timer_keepalive(struct peer *); 62 void session_close_connection(struct peer *); 63 void change_state(struct peer *, enum session_state, enum session_events); 64 int session_setup_socket(struct peer *); 65 void session_accept(int); 66 int session_connect(struct peer *); 67 void session_tcp_established(struct peer *); 68 void session_capa_ann_none(struct peer *); 69 int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); 70 int session_capa_add_mp(struct ibuf *, u_int8_t); 71 int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); 72 struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); 73 int session_sendmsg(struct bgp_msg *, struct peer *); 74 void session_open(struct peer *); 75 void session_keepalive(struct peer *); 76 void session_update(u_int32_t, void *, size_t); 77 void session_notification(struct peer *, u_int8_t, u_int8_t, void *, 78 ssize_t); 79 void session_rrefresh(struct peer *, u_int8_t); 80 int session_graceful_restart(struct peer *); 81 int session_graceful_stop(struct peer *); 82 int session_dispatch_msg(struct pollfd *, struct peer *); 83 int session_process_msg(struct peer *); 84 int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); 85 int parse_open(struct peer *); 86 int parse_update(struct peer *); 87 int parse_refresh(struct peer *); 88 int parse_notification(struct peer *); 89 int parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *); 90 int capa_neg_calc(struct peer *); 91 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 92 void session_up(struct peer *); 93 void session_down(struct peer *); 94 void session_demote(struct peer *, int); 95 96 int la_cmp(struct listen_addr *, struct listen_addr *); 97 struct peer *getpeerbyip(struct sockaddr *); 98 void session_template_clone(struct peer *, struct sockaddr *, 99 u_int32_t, u_int32_t); 100 int session_match_mask(struct peer *, struct bgpd_addr *); 101 struct peer *getpeerbyid(u_int32_t); 102 103 struct bgpd_config *conf, *nconf; 104 struct bgpd_sysdep sysdep; 105 struct peer *peers, *npeers; 106 volatile sig_atomic_t session_quit; 107 int pending_reconf; 108 int csock = -1, rcsock = -1; 109 u_int peer_cnt; 110 struct imsgbuf *ibuf_rde; 111 struct imsgbuf *ibuf_rde_ctl; 112 struct imsgbuf *ibuf_main; 113 114 struct mrt_head mrthead; 115 time_t pauseaccept; 116 117 void 118 session_sighdlr(int sig) 119 { 120 switch (sig) { 121 case SIGINT: 122 case SIGTERM: 123 session_quit = 1; 124 break; 125 } 126 } 127 128 int 129 setup_listeners(u_int *la_cnt) 130 { 131 int ttl = 255; 132 int opt; 133 struct listen_addr *la; 134 u_int cnt = 0; 135 136 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 137 la->reconf = RECONF_NONE; 138 cnt++; 139 140 if (la->flags & LISTENER_LISTENING) 141 continue; 142 143 if (la->fd == -1) { 144 log_warn("cannot establish listener on %s: invalid fd", 145 log_sockaddr((struct sockaddr *)&la->sa)); 146 continue; 147 } 148 149 opt = 1; 150 if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG, 151 &opt, sizeof(opt)) == -1) { 152 if (errno == ENOPROTOOPT) { /* system w/o md5sig */ 153 log_warnx("md5sig not available, disabling"); 154 sysdep.no_md5sig = 1; 155 } else 156 fatal("setsockopt TCP_MD5SIG"); 157 } 158 159 /* set ttl to 255 so that ttl-security works */ 160 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 161 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 162 log_warn("setup_listeners setsockopt TTL"); 163 continue; 164 } 165 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 166 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 167 log_warn("setup_listeners setsockopt hoplimit"); 168 continue; 169 } 170 171 if (listen(la->fd, MAX_BACKLOG)) { 172 close(la->fd); 173 fatal("listen"); 174 } 175 176 la->flags |= LISTENER_LISTENING; 177 178 log_info("listening on %s", 179 log_sockaddr((struct sockaddr *)&la->sa)); 180 } 181 182 *la_cnt = cnt; 183 184 return (0); 185 } 186 187 void 188 session_main(int debug, int verbose) 189 { 190 int timeout, pfkeysock; 191 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 192 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 193 u_int listener_cnt, ctl_cnt, mrt_cnt; 194 u_int new_cnt; 195 u_int32_t ctl_queued; 196 struct passwd *pw; 197 struct peer *p, **peer_l = NULL, *last, *next; 198 struct mrt *m, *xm, **mrt_l = NULL; 199 struct pollfd *pfd = NULL; 200 struct ctl_conn *ctl_conn; 201 struct listen_addr *la; 202 void *newp; 203 short events; 204 205 bgpd_process = PROC_SE; 206 log_procname = log_procnames[bgpd_process]; 207 208 log_init(debug); 209 log_verbose(verbose); 210 211 if ((pw = getpwnam(BGPD_USER)) == NULL) 212 fatal(NULL); 213 214 if (chroot(pw->pw_dir) == -1) 215 fatal("chroot"); 216 if (chdir("/") == -1) 217 fatal("chdir(\"/\")"); 218 219 setproctitle("session engine"); 220 pfkeysock = pfkey_init(&sysdep); 221 222 if (setgroups(1, &pw->pw_gid) || 223 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 224 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 225 fatal("can't drop privileges"); 226 227 if (pledge("stdio inet recvfd", NULL) == -1) 228 fatal("pledge"); 229 230 signal(SIGTERM, session_sighdlr); 231 signal(SIGINT, session_sighdlr); 232 signal(SIGPIPE, SIG_IGN); 233 signal(SIGHUP, SIG_IGN); 234 signal(SIGALRM, SIG_IGN); 235 signal(SIGUSR1, SIG_IGN); 236 237 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 238 fatal(NULL); 239 imsg_init(ibuf_main, 3); 240 241 TAILQ_INIT(&ctl_conns); 242 LIST_INIT(&mrthead); 243 listener_cnt = 0; 244 peer_cnt = 0; 245 ctl_cnt = 0; 246 247 if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL) 248 fatal(NULL); 249 if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) == 250 NULL) 251 fatal(NULL); 252 TAILQ_INIT(conf->listen_addrs); 253 254 log_info("session engine ready"); 255 256 while (session_quit == 0) { 257 /* check for peers to be initialized or deleted */ 258 last = NULL; 259 if (!pending_reconf) { 260 for (p = peers; p != NULL; p = next) { 261 next = p->next; 262 /* cloned peer that idled out? */ 263 if (p->template && (p->state == STATE_IDLE || 264 p->state == STATE_ACTIVE) && 265 time(NULL) - p->stats.last_updown >= 266 INTERVAL_HOLD_CLONED) 267 p->conf.reconf_action = RECONF_DELETE; 268 269 /* new peer that needs init? */ 270 if (p->state == STATE_NONE) 271 init_peer(p); 272 273 /* reinit due? */ 274 if (p->conf.reconf_action == RECONF_REINIT) { 275 session_stop(p, ERR_CEASE_ADMIN_RESET); 276 if (!p->conf.down) 277 timer_set(p, Timer_IdleHold, 0); 278 } 279 280 /* deletion due? */ 281 if (p->conf.reconf_action == RECONF_DELETE) { 282 if (p->demoted) 283 session_demote(p, -1); 284 p->conf.demote_group[0] = 0; 285 session_stop(p, ERR_CEASE_PEER_UNCONF); 286 log_peer_warnx(&p->conf, "removed"); 287 if (last != NULL) 288 last->next = next; 289 else 290 peers = next; 291 timer_remove_all(p); 292 free(p); 293 peer_cnt--; 294 continue; 295 } 296 p->conf.reconf_action = RECONF_NONE; 297 last = p; 298 } 299 } 300 301 if (peer_cnt > peer_l_elms) { 302 if ((newp = reallocarray(peer_l, peer_cnt, 303 sizeof(struct peer *))) == NULL) { 304 /* panic for now */ 305 log_warn("could not resize peer_l from %u -> %u" 306 " entries", peer_l_elms, peer_cnt); 307 fatalx("exiting"); 308 } 309 peer_l = newp; 310 peer_l_elms = peer_cnt; 311 } 312 313 mrt_cnt = 0; 314 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 315 xm = LIST_NEXT(m, entry); 316 if (m->state == MRT_STATE_REMOVE) { 317 mrt_clean(m); 318 LIST_REMOVE(m, entry); 319 free(m); 320 continue; 321 } 322 if (m->wbuf.queued) 323 mrt_cnt++; 324 } 325 326 if (mrt_cnt > mrt_l_elms) { 327 if ((newp = reallocarray(mrt_l, mrt_cnt, 328 sizeof(struct mrt *))) == NULL) { 329 /* panic for now */ 330 log_warn("could not resize mrt_l from %u -> %u" 331 " entries", mrt_l_elms, mrt_cnt); 332 fatalx("exiting"); 333 } 334 mrt_l = newp; 335 mrt_l_elms = mrt_cnt; 336 } 337 338 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 339 ctl_cnt + mrt_cnt; 340 if (new_cnt > pfd_elms) { 341 if ((newp = reallocarray(pfd, new_cnt, 342 sizeof(struct pollfd))) == NULL) { 343 /* panic for now */ 344 log_warn("could not resize pfd from %u -> %u" 345 " entries", pfd_elms, new_cnt); 346 fatalx("exiting"); 347 } 348 pfd = newp; 349 pfd_elms = new_cnt; 350 } 351 352 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 353 354 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 355 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 356 357 ctl_queued = 0; 358 TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) 359 ctl_queued += ctl_conn->ibuf.w.queued; 360 361 /* 362 * Do not act as unlimited buffer. Don't read in more 363 * messages if the ctl sockets are getting full. 364 */ 365 if (ctl_queued < SESSION_CTL_QUEUE_MAX) 366 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 367 368 if (pauseaccept == 0) { 369 pfd[PFD_SOCK_CTL].fd = csock; 370 pfd[PFD_SOCK_CTL].events = POLLIN; 371 pfd[PFD_SOCK_RCTL].fd = rcsock; 372 pfd[PFD_SOCK_RCTL].events = POLLIN; 373 } else { 374 pfd[PFD_SOCK_CTL].fd = -1; 375 pfd[PFD_SOCK_RCTL].fd = -1; 376 } 377 pfd[PFD_SOCK_PFKEY].fd = pfkeysock; 378 pfd[PFD_SOCK_PFKEY].events = POLLIN; 379 380 i = PFD_LISTENERS_START; 381 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 382 if (pauseaccept == 0) { 383 pfd[i].fd = la->fd; 384 pfd[i].events = POLLIN; 385 } else 386 pfd[i].fd = -1; 387 i++; 388 } 389 idx_listeners = i; 390 timeout = 240; /* loop every 240s at least */ 391 392 for (p = peers; p != NULL; p = p->next) { 393 time_t nextaction; 394 struct peer_timer *pt; 395 396 /* check timers */ 397 if ((pt = timer_nextisdue(p)) != NULL) { 398 switch (pt->type) { 399 case Timer_Hold: 400 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 401 break; 402 case Timer_ConnectRetry: 403 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 404 break; 405 case Timer_Keepalive: 406 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 407 break; 408 case Timer_IdleHold: 409 bgp_fsm(p, EVNT_START); 410 break; 411 case Timer_IdleHoldReset: 412 p->IdleHoldTime /= 2; 413 if (p->IdleHoldTime <= 414 INTERVAL_IDLE_HOLD_INITIAL) { 415 p->IdleHoldTime = 416 INTERVAL_IDLE_HOLD_INITIAL; 417 timer_stop(p, 418 Timer_IdleHoldReset); 419 p->errcnt = 0; 420 } else 421 timer_set(p, 422 Timer_IdleHoldReset, 423 p->IdleHoldTime); 424 break; 425 case Timer_CarpUndemote: 426 timer_stop(p, Timer_CarpUndemote); 427 if (p->demoted && 428 p->state == STATE_ESTABLISHED) 429 session_demote(p, -1); 430 break; 431 case Timer_RestartTimeout: 432 timer_stop(p, Timer_RestartTimeout); 433 session_graceful_stop(p); 434 break; 435 default: 436 fatalx("King Bula lost in time"); 437 } 438 } 439 if ((nextaction = timer_nextduein(p)) != -1 && 440 nextaction < timeout) 441 timeout = nextaction; 442 443 /* are we waiting for a write? */ 444 events = POLLIN; 445 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 446 events |= POLLOUT; 447 /* is there still work to do? */ 448 if (p->rbuf && p->rbuf->wpos) 449 timeout = 0; 450 451 /* poll events */ 452 if (p->fd != -1 && events != 0) { 453 pfd[i].fd = p->fd; 454 pfd[i].events = events; 455 peer_l[i - idx_listeners] = p; 456 i++; 457 } 458 } 459 460 idx_peers = i; 461 462 LIST_FOREACH(m, &mrthead, entry) 463 if (m->wbuf.queued) { 464 pfd[i].fd = m->wbuf.fd; 465 pfd[i].events = POLLOUT; 466 mrt_l[i - idx_peers] = m; 467 i++; 468 } 469 470 idx_mrts = i; 471 472 TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) { 473 pfd[i].fd = ctl_conn->ibuf.fd; 474 pfd[i].events = POLLIN; 475 if (ctl_conn->ibuf.w.queued > 0) 476 pfd[i].events |= POLLOUT; 477 i++; 478 } 479 480 if (pauseaccept && timeout > 1) 481 timeout = 1; 482 if (timeout < 0) 483 timeout = 0; 484 if (poll(pfd, i, timeout * 1000) == -1) 485 if (errno != EINTR) 486 fatal("poll error"); 487 488 /* 489 * If we previously saw fd exhaustion, we stop accept() 490 * for 1 second to throttle the accept() loop. 491 */ 492 if (pauseaccept && getmonotime() > pauseaccept + 1) 493 pauseaccept = 0; 494 495 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 496 log_warnx("SE: Lost connection to parent"); 497 session_quit = 1; 498 continue; 499 } else 500 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 501 &listener_cnt); 502 503 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 504 log_warnx("SE: Lost connection to RDE"); 505 msgbuf_clear(&ibuf_rde->w); 506 free(ibuf_rde); 507 ibuf_rde = NULL; 508 } else 509 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 510 &listener_cnt); 511 512 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 513 -1) { 514 log_warnx("SE: Lost connection to RDE control"); 515 msgbuf_clear(&ibuf_rde_ctl->w); 516 free(ibuf_rde_ctl); 517 ibuf_rde_ctl = NULL; 518 } else 519 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 520 &listener_cnt); 521 522 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 523 ctl_cnt += control_accept(csock, 0); 524 525 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 526 ctl_cnt += control_accept(rcsock, 1); 527 528 if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) { 529 if (pfkey_read(pfkeysock, NULL) == -1) { 530 log_warnx("pfkey_read failed, exiting..."); 531 session_quit = 1; 532 } 533 } 534 535 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 536 if (pfd[j].revents & POLLIN) 537 session_accept(pfd[j].fd); 538 539 for (; j < idx_peers; j++) 540 session_dispatch_msg(&pfd[j], 541 peer_l[j - idx_listeners]); 542 543 for (p = peers; p != NULL; p = p->next) 544 if (p->rbuf && p->rbuf->wpos) 545 session_process_msg(p); 546 547 for (; j < idx_mrts; j++) 548 if (pfd[j].revents & POLLOUT) 549 mrt_write(mrt_l[j - idx_peers]); 550 551 for (; j < i; j++) 552 control_dispatch_msg(&pfd[j], &ctl_cnt); 553 } 554 555 /* close pipes */ 556 if (ibuf_rde) { 557 msgbuf_write(&ibuf_rde->w); 558 msgbuf_clear(&ibuf_rde->w); 559 close(ibuf_rde->fd); 560 free(ibuf_rde); 561 } 562 if (ibuf_rde_ctl) { 563 msgbuf_clear(&ibuf_rde_ctl->w); 564 close(ibuf_rde_ctl->fd); 565 free(ibuf_rde_ctl); 566 } 567 msgbuf_write(&ibuf_main->w); 568 msgbuf_clear(&ibuf_main->w); 569 close(ibuf_main->fd); 570 free(ibuf_main); 571 572 while ((p = peers) != NULL) { 573 peers = p->next; 574 session_stop(p, ERR_CEASE_ADMIN_DOWN); 575 pfkey_remove(p); 576 free(p); 577 } 578 579 while ((m = LIST_FIRST(&mrthead)) != NULL) { 580 mrt_clean(m); 581 LIST_REMOVE(m, entry); 582 free(m); 583 } 584 585 while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) { 586 TAILQ_REMOVE(conf->listen_addrs, la, entry); 587 free(la); 588 } 589 free(conf->listen_addrs); 590 free(peer_l); 591 free(mrt_l); 592 free(pfd); 593 594 595 control_shutdown(csock); 596 control_shutdown(rcsock); 597 log_info("session engine exiting"); 598 exit(0); 599 } 600 601 void 602 init_conf(struct bgpd_config *c) 603 { 604 if (!c->holdtime) 605 c->holdtime = INTERVAL_HOLD; 606 if (!c->connectretry) 607 c->connectretry = INTERVAL_CONNECTRETRY; 608 } 609 610 void 611 init_peer(struct peer *p) 612 { 613 TAILQ_INIT(&p->timers); 614 p->fd = p->wbuf.fd = -1; 615 616 if (p->conf.if_depend[0]) 617 imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1, 618 p->conf.if_depend, sizeof(p->conf.if_depend)); 619 else 620 p->depend_ok = 1; 621 622 peer_cnt++; 623 624 change_state(p, STATE_IDLE, EVNT_NONE); 625 if (p->conf.down) 626 timer_stop(p, Timer_IdleHold); /* no autostart */ 627 else 628 timer_set(p, Timer_IdleHold, 0); /* start ASAP */ 629 630 /* 631 * on startup, demote if requested. 632 * do not handle new peers. they must reach ESTABLISHED beforehands. 633 * peers added at runtime have reconf_action set to RECONF_REINIT. 634 */ 635 if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 636 session_demote(p, +1); 637 } 638 639 void 640 bgp_fsm(struct peer *peer, enum session_events event) 641 { 642 switch (peer->state) { 643 case STATE_NONE: 644 /* nothing */ 645 break; 646 case STATE_IDLE: 647 switch (event) { 648 case EVNT_START: 649 timer_stop(peer, Timer_Hold); 650 timer_stop(peer, Timer_Keepalive); 651 timer_stop(peer, Timer_IdleHold); 652 653 /* allocate read buffer */ 654 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 655 if (peer->rbuf == NULL) 656 fatal(NULL); 657 658 /* init write buffer */ 659 msgbuf_init(&peer->wbuf); 660 661 /* init pfkey - remove old if any, load new ones */ 662 pfkey_remove(peer); 663 if (pfkey_establish(peer) == -1) { 664 log_peer_warnx(&peer->conf, 665 "pfkey setup failed"); 666 return; 667 } 668 669 peer->stats.last_sent_errcode = 0; 670 peer->stats.last_sent_suberr = 0; 671 672 if (!peer->depend_ok) 673 timer_stop(peer, Timer_ConnectRetry); 674 else if (peer->passive || peer->conf.passive || 675 peer->conf.template) { 676 change_state(peer, STATE_ACTIVE, event); 677 timer_stop(peer, Timer_ConnectRetry); 678 } else { 679 change_state(peer, STATE_CONNECT, event); 680 timer_set(peer, Timer_ConnectRetry, 681 conf->connectretry); 682 session_connect(peer); 683 } 684 peer->passive = 0; 685 break; 686 default: 687 /* ignore */ 688 break; 689 } 690 break; 691 case STATE_CONNECT: 692 switch (event) { 693 case EVNT_START: 694 /* ignore */ 695 break; 696 case EVNT_CON_OPEN: 697 session_tcp_established(peer); 698 session_open(peer); 699 timer_stop(peer, Timer_ConnectRetry); 700 peer->holdtime = INTERVAL_HOLD_INITIAL; 701 start_timer_holdtime(peer); 702 change_state(peer, STATE_OPENSENT, event); 703 break; 704 case EVNT_CON_OPENFAIL: 705 timer_set(peer, Timer_ConnectRetry, 706 conf->connectretry); 707 session_close_connection(peer); 708 change_state(peer, STATE_ACTIVE, event); 709 break; 710 case EVNT_TIMER_CONNRETRY: 711 timer_set(peer, Timer_ConnectRetry, 712 conf->connectretry); 713 session_connect(peer); 714 break; 715 default: 716 change_state(peer, STATE_IDLE, event); 717 break; 718 } 719 break; 720 case STATE_ACTIVE: 721 switch (event) { 722 case EVNT_START: 723 /* ignore */ 724 break; 725 case EVNT_CON_OPEN: 726 session_tcp_established(peer); 727 session_open(peer); 728 timer_stop(peer, Timer_ConnectRetry); 729 peer->holdtime = INTERVAL_HOLD_INITIAL; 730 start_timer_holdtime(peer); 731 change_state(peer, STATE_OPENSENT, event); 732 break; 733 case EVNT_CON_OPENFAIL: 734 timer_set(peer, Timer_ConnectRetry, 735 conf->connectretry); 736 session_close_connection(peer); 737 change_state(peer, STATE_ACTIVE, event); 738 break; 739 case EVNT_TIMER_CONNRETRY: 740 timer_set(peer, Timer_ConnectRetry, 741 peer->holdtime); 742 change_state(peer, STATE_CONNECT, event); 743 session_connect(peer); 744 break; 745 default: 746 change_state(peer, STATE_IDLE, event); 747 break; 748 } 749 break; 750 case STATE_OPENSENT: 751 switch (event) { 752 case EVNT_START: 753 /* ignore */ 754 break; 755 case EVNT_STOP: 756 change_state(peer, STATE_IDLE, event); 757 break; 758 case EVNT_CON_CLOSED: 759 session_close_connection(peer); 760 timer_set(peer, Timer_ConnectRetry, 761 conf->connectretry); 762 change_state(peer, STATE_ACTIVE, event); 763 break; 764 case EVNT_CON_FATAL: 765 change_state(peer, STATE_IDLE, event); 766 break; 767 case EVNT_TIMER_HOLDTIME: 768 session_notification(peer, ERR_HOLDTIMEREXPIRED, 769 0, NULL, 0); 770 change_state(peer, STATE_IDLE, event); 771 break; 772 case EVNT_RCVD_OPEN: 773 /* parse_open calls change_state itself on failure */ 774 if (parse_open(peer)) 775 break; 776 session_keepalive(peer); 777 change_state(peer, STATE_OPENCONFIRM, event); 778 break; 779 case EVNT_RCVD_NOTIFICATION: 780 if (parse_notification(peer)) { 781 change_state(peer, STATE_IDLE, event); 782 /* don't punish, capa negotiation */ 783 timer_set(peer, Timer_IdleHold, 0); 784 peer->IdleHoldTime /= 2; 785 } else 786 change_state(peer, STATE_IDLE, event); 787 break; 788 default: 789 session_notification(peer, 790 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); 791 change_state(peer, STATE_IDLE, event); 792 break; 793 } 794 break; 795 case STATE_OPENCONFIRM: 796 switch (event) { 797 case EVNT_START: 798 /* ignore */ 799 break; 800 case EVNT_STOP: 801 change_state(peer, STATE_IDLE, event); 802 break; 803 case EVNT_CON_CLOSED: 804 case EVNT_CON_FATAL: 805 change_state(peer, STATE_IDLE, event); 806 break; 807 case EVNT_TIMER_HOLDTIME: 808 session_notification(peer, ERR_HOLDTIMEREXPIRED, 809 0, NULL, 0); 810 change_state(peer, STATE_IDLE, event); 811 break; 812 case EVNT_TIMER_KEEPALIVE: 813 session_keepalive(peer); 814 break; 815 case EVNT_RCVD_KEEPALIVE: 816 start_timer_holdtime(peer); 817 change_state(peer, STATE_ESTABLISHED, event); 818 break; 819 case EVNT_RCVD_NOTIFICATION: 820 parse_notification(peer); 821 change_state(peer, STATE_IDLE, event); 822 break; 823 default: 824 session_notification(peer, 825 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); 826 change_state(peer, STATE_IDLE, event); 827 break; 828 } 829 break; 830 case STATE_ESTABLISHED: 831 switch (event) { 832 case EVNT_START: 833 /* ignore */ 834 break; 835 case EVNT_STOP: 836 change_state(peer, STATE_IDLE, event); 837 break; 838 case EVNT_CON_CLOSED: 839 case EVNT_CON_FATAL: 840 change_state(peer, STATE_IDLE, event); 841 break; 842 case EVNT_TIMER_HOLDTIME: 843 session_notification(peer, ERR_HOLDTIMEREXPIRED, 844 0, NULL, 0); 845 change_state(peer, STATE_IDLE, event); 846 break; 847 case EVNT_TIMER_KEEPALIVE: 848 session_keepalive(peer); 849 break; 850 case EVNT_RCVD_KEEPALIVE: 851 start_timer_holdtime(peer); 852 break; 853 case EVNT_RCVD_UPDATE: 854 start_timer_holdtime(peer); 855 if (parse_update(peer)) 856 change_state(peer, STATE_IDLE, event); 857 else 858 start_timer_holdtime(peer); 859 break; 860 case EVNT_RCVD_NOTIFICATION: 861 parse_notification(peer); 862 change_state(peer, STATE_IDLE, event); 863 break; 864 default: 865 session_notification(peer, 866 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); 867 change_state(peer, STATE_IDLE, event); 868 break; 869 } 870 break; 871 } 872 } 873 874 void 875 start_timer_holdtime(struct peer *peer) 876 { 877 if (peer->holdtime > 0) 878 timer_set(peer, Timer_Hold, peer->holdtime); 879 else 880 timer_stop(peer, Timer_Hold); 881 } 882 883 void 884 start_timer_keepalive(struct peer *peer) 885 { 886 if (peer->holdtime > 0) 887 timer_set(peer, Timer_Keepalive, peer->holdtime / 3); 888 else 889 timer_stop(peer, Timer_Keepalive); 890 } 891 892 void 893 session_close_connection(struct peer *peer) 894 { 895 if (peer->fd != -1) { 896 close(peer->fd); 897 pauseaccept = 0; 898 } 899 peer->fd = peer->wbuf.fd = -1; 900 } 901 902 void 903 change_state(struct peer *peer, enum session_state state, 904 enum session_events event) 905 { 906 struct mrt *mrt; 907 908 switch (state) { 909 case STATE_IDLE: 910 /* carp demotion first. new peers handled in init_peer */ 911 if (peer->state == STATE_ESTABLISHED && 912 peer->conf.demote_group[0] && !peer->demoted) 913 session_demote(peer, +1); 914 915 /* 916 * try to write out what's buffered (maybe a notification), 917 * don't bother if it fails 918 */ 919 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 920 msgbuf_write(&peer->wbuf); 921 922 /* 923 * we must start the timer for the next EVNT_START 924 * if we are coming here due to an error and the 925 * session was not established successfully before, the 926 * starttimerinterval needs to be exponentially increased 927 */ 928 if (peer->IdleHoldTime == 0) 929 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 930 peer->holdtime = INTERVAL_HOLD_INITIAL; 931 timer_stop(peer, Timer_ConnectRetry); 932 timer_stop(peer, Timer_Keepalive); 933 timer_stop(peer, Timer_Hold); 934 timer_stop(peer, Timer_IdleHold); 935 timer_stop(peer, Timer_IdleHoldReset); 936 session_close_connection(peer); 937 msgbuf_clear(&peer->wbuf); 938 free(peer->rbuf); 939 peer->rbuf = NULL; 940 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 941 942 if (event != EVNT_STOP) { 943 timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); 944 if (event != EVNT_NONE && 945 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 946 peer->IdleHoldTime *= 2; 947 } 948 if (peer->state == STATE_ESTABLISHED) { 949 if (peer->capa.neg.grestart.restart == 2 && 950 (event == EVNT_CON_CLOSED || 951 event == EVNT_CON_FATAL)) { 952 /* don't punish graceful restart */ 953 timer_set(peer, Timer_IdleHold, 0); 954 peer->IdleHoldTime /= 2; 955 session_graceful_restart(peer); 956 } else 957 session_down(peer); 958 } 959 if (peer->state == STATE_NONE || 960 peer->state == STATE_ESTABLISHED) { 961 /* initialize capability negotiation structures */ 962 memcpy(&peer->capa.ann, &peer->conf.capabilities, 963 sizeof(peer->capa.ann)); 964 if (!peer->conf.announce_capa) 965 session_capa_ann_none(peer); 966 } 967 break; 968 case STATE_CONNECT: 969 if (peer->state == STATE_ESTABLISHED && 970 peer->capa.neg.grestart.restart == 2) { 971 /* do the graceful restart dance */ 972 session_graceful_restart(peer); 973 peer->holdtime = INTERVAL_HOLD_INITIAL; 974 timer_stop(peer, Timer_ConnectRetry); 975 timer_stop(peer, Timer_Keepalive); 976 timer_stop(peer, Timer_Hold); 977 timer_stop(peer, Timer_IdleHold); 978 timer_stop(peer, Timer_IdleHoldReset); 979 session_close_connection(peer); 980 msgbuf_clear(&peer->wbuf); 981 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 982 } 983 break; 984 case STATE_ACTIVE: 985 break; 986 case STATE_OPENSENT: 987 break; 988 case STATE_OPENCONFIRM: 989 break; 990 case STATE_ESTABLISHED: 991 timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime); 992 if (peer->demoted) 993 timer_set(peer, Timer_CarpUndemote, 994 INTERVAL_HOLD_DEMOTED); 995 session_up(peer); 996 break; 997 default: /* something seriously fucked */ 998 break; 999 } 1000 1001 log_statechange(peer, state, event); 1002 LIST_FOREACH(mrt, &mrthead, entry) { 1003 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 1004 continue; 1005 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1006 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 1007 mrt->group_id == peer->conf.groupid)) 1008 mrt_dump_state(mrt, peer->state, state, peer); 1009 } 1010 peer->prev_state = peer->state; 1011 peer->state = state; 1012 } 1013 1014 void 1015 session_accept(int listenfd) 1016 { 1017 int connfd; 1018 int opt; 1019 socklen_t len; 1020 struct sockaddr_storage cliaddr; 1021 struct peer *p = NULL; 1022 1023 len = sizeof(cliaddr); 1024 if ((connfd = accept4(listenfd, 1025 (struct sockaddr *)&cliaddr, &len, 1026 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 1027 if (errno == ENFILE || errno == EMFILE) 1028 pauseaccept = getmonotime(); 1029 else if (errno != EWOULDBLOCK && errno != EINTR && 1030 errno != ECONNABORTED) 1031 log_warn("accept"); 1032 return; 1033 } 1034 1035 p = getpeerbyip((struct sockaddr *)&cliaddr); 1036 1037 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 1038 if (timer_running(p, Timer_IdleHold, NULL)) { 1039 /* fast reconnect after clear */ 1040 p->passive = 1; 1041 bgp_fsm(p, EVNT_START); 1042 } 1043 } 1044 1045 if (p != NULL && 1046 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1047 if (p->fd != -1) { 1048 if (p->state == STATE_CONNECT) 1049 session_close_connection(p); 1050 else { 1051 close(connfd); 1052 return; 1053 } 1054 } 1055 1056 open: 1057 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1058 log_peer_warnx(&p->conf, 1059 "ipsec or md5sig configured but not available"); 1060 close(connfd); 1061 return; 1062 } 1063 1064 if (p->conf.auth.method == AUTH_MD5SIG) { 1065 if (sysdep.no_md5sig) { 1066 log_peer_warnx(&p->conf, 1067 "md5sig configured but not available"); 1068 close(connfd); 1069 return; 1070 } 1071 len = sizeof(opt); 1072 if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG, 1073 &opt, &len) == -1) 1074 fatal("getsockopt TCP_MD5SIG"); 1075 if (!opt) { /* non-md5'd connection! */ 1076 log_peer_warnx(&p->conf, 1077 "connection attempt without md5 signature"); 1078 close(connfd); 1079 return; 1080 } 1081 } 1082 p->fd = p->wbuf.fd = connfd; 1083 if (session_setup_socket(p)) { 1084 close(connfd); 1085 return; 1086 } 1087 bgp_fsm(p, EVNT_CON_OPEN); 1088 return; 1089 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1090 p->capa.neg.grestart.restart == 2) { 1091 /* first do the graceful restart dance */ 1092 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1093 /* then do part of the open dance */ 1094 goto open; 1095 } else { 1096 log_conn_attempt(p, (struct sockaddr *)&cliaddr); 1097 close(connfd); 1098 } 1099 } 1100 1101 int 1102 session_connect(struct peer *peer) 1103 { 1104 int opt = 1; 1105 struct sockaddr *sa; 1106 1107 /* 1108 * we do not need the overcomplicated collision detection RFC 1771 1109 * describes; we simply make sure there is only ever one concurrent 1110 * tcp connection per peer. 1111 */ 1112 if (peer->fd != -1) 1113 return (-1); 1114 1115 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1116 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1117 log_peer_warn(&peer->conf, "session_connect socket"); 1118 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1119 return (-1); 1120 } 1121 1122 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1123 log_peer_warnx(&peer->conf, 1124 "ipsec or md5sig configured but not available"); 1125 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1126 return (-1); 1127 } 1128 1129 if (peer->conf.auth.method == AUTH_MD5SIG) { 1130 if (sysdep.no_md5sig) { 1131 log_peer_warnx(&peer->conf, 1132 "md5sig configured but not available"); 1133 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1134 return (-1); 1135 } 1136 if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG, 1137 &opt, sizeof(opt)) == -1) { 1138 log_peer_warn(&peer->conf, "setsockopt md5sig"); 1139 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1140 return (-1); 1141 } 1142 } 1143 peer->wbuf.fd = peer->fd; 1144 1145 /* if update source is set we need to bind() */ 1146 if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) { 1147 if (bind(peer->fd, sa, sa->sa_len) == -1) { 1148 log_peer_warn(&peer->conf, "session_connect bind"); 1149 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1150 return (-1); 1151 } 1152 } 1153 1154 if (session_setup_socket(peer)) { 1155 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1156 return (-1); 1157 } 1158 1159 sa = addr2sa(&peer->conf.remote_addr, BGP_PORT); 1160 if (connect(peer->fd, sa, sa->sa_len) == -1) { 1161 if (errno != EINPROGRESS) { 1162 if (errno != peer->lasterr) 1163 log_peer_warn(&peer->conf, "connect"); 1164 peer->lasterr = errno; 1165 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1166 return (-1); 1167 } 1168 } else 1169 bgp_fsm(peer, EVNT_CON_OPEN); 1170 1171 return (0); 1172 } 1173 1174 int 1175 session_setup_socket(struct peer *p) 1176 { 1177 int ttl = p->conf.distance; 1178 int pre = IPTOS_PREC_INTERNETCONTROL; 1179 int nodelay = 1; 1180 int bsize; 1181 1182 switch (p->conf.remote_addr.aid) { 1183 case AID_INET: 1184 /* set precedence, see RFC 1771 appendix 5 */ 1185 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1186 -1) { 1187 log_peer_warn(&p->conf, 1188 "session_setup_socket setsockopt TOS"); 1189 return (-1); 1190 } 1191 1192 if (p->conf.ebgp) { 1193 /* set TTL to foreign router's distance 1194 1=direct n=multihop with ttlsec, we always use 255 */ 1195 if (p->conf.ttlsec) { 1196 ttl = 256 - p->conf.distance; 1197 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1198 &ttl, sizeof(ttl)) == -1) { 1199 log_peer_warn(&p->conf, 1200 "session_setup_socket: " 1201 "setsockopt MINTTL"); 1202 return (-1); 1203 } 1204 ttl = 255; 1205 } 1206 1207 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1208 sizeof(ttl)) == -1) { 1209 log_peer_warn(&p->conf, 1210 "session_setup_socket setsockopt TTL"); 1211 return (-1); 1212 } 1213 } 1214 break; 1215 case AID_INET6: 1216 if (p->conf.ebgp) { 1217 /* set hoplimit to foreign router's distance 1218 1=direct n=multihop with ttlsec, we always use 255 */ 1219 if (p->conf.ttlsec) { 1220 ttl = 256 - p->conf.distance; 1221 if (setsockopt(p->fd, IPPROTO_IPV6, 1222 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1223 == -1) { 1224 log_peer_warn(&p->conf, 1225 "session_setup_socket: " 1226 "setsockopt MINHOPCOUNT"); 1227 return (-1); 1228 } 1229 ttl = 255; 1230 } 1231 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1232 &ttl, sizeof(ttl)) == -1) { 1233 log_peer_warn(&p->conf, 1234 "session_setup_socket setsockopt hoplimit"); 1235 return (-1); 1236 } 1237 } 1238 break; 1239 } 1240 1241 /* set TCP_NODELAY */ 1242 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1243 sizeof(nodelay)) == -1) { 1244 log_peer_warn(&p->conf, 1245 "session_setup_socket setsockopt TCP_NODELAY"); 1246 return (-1); 1247 } 1248 1249 /* only increase bufsize (and thus window) if md5 or ipsec is in use */ 1250 if (p->conf.auth.method != AUTH_NONE) { 1251 /* try to increase bufsize. no biggie if it fails */ 1252 bsize = 65535; 1253 while (bsize > 8192 && 1254 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, 1255 sizeof(bsize)) == -1 && errno != EINVAL) 1256 bsize /= 2; 1257 bsize = 65535; 1258 while (bsize > 8192 && 1259 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, 1260 sizeof(bsize)) == -1 && errno != EINVAL) 1261 bsize /= 2; 1262 } 1263 1264 return (0); 1265 } 1266 1267 void 1268 session_tcp_established(struct peer *peer) 1269 { 1270 socklen_t len; 1271 1272 len = sizeof(peer->sa_local); 1273 if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local, 1274 &len) == -1) 1275 log_warn("getsockname"); 1276 len = sizeof(peer->sa_remote); 1277 if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote, 1278 &len) == -1) 1279 log_warn("getpeername"); 1280 } 1281 1282 void 1283 session_capa_ann_none(struct peer *peer) 1284 { 1285 bzero(&peer->capa.ann, sizeof(peer->capa.ann)); 1286 } 1287 1288 int 1289 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len) 1290 { 1291 int errs = 0; 1292 1293 errs += ibuf_add(opb, &capa_code, sizeof(capa_code)); 1294 errs += ibuf_add(opb, &capa_len, sizeof(capa_len)); 1295 return (errs); 1296 } 1297 1298 int 1299 session_capa_add_mp(struct ibuf *buf, u_int8_t aid) 1300 { 1301 u_int8_t safi, pad = 0; 1302 u_int16_t afi; 1303 int errs = 0; 1304 1305 if (aid2afi(aid, &afi, &safi) == -1) 1306 fatalx("session_capa_add_mp: bad afi/safi pair"); 1307 afi = htons(afi); 1308 errs += ibuf_add(buf, &afi, sizeof(afi)); 1309 errs += ibuf_add(buf, &pad, sizeof(pad)); 1310 errs += ibuf_add(buf, &safi, sizeof(safi)); 1311 1312 return (errs); 1313 } 1314 1315 int 1316 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) 1317 { 1318 u_int errs = 0; 1319 u_int16_t afi; 1320 u_int8_t flags, safi; 1321 1322 if (aid2afi(aid, &afi, &safi)) { 1323 log_warn("session_capa_add_gr: bad AID"); 1324 return (1); 1325 } 1326 if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) 1327 flags = CAPA_GR_F_FLAG; 1328 else 1329 flags = 0; 1330 1331 afi = htons(afi); 1332 errs += ibuf_add(b, &afi, sizeof(afi)); 1333 errs += ibuf_add(b, &safi, sizeof(safi)); 1334 errs += ibuf_add(b, &flags, sizeof(flags)); 1335 1336 return (errs); 1337 } 1338 1339 struct bgp_msg * 1340 session_newmsg(enum msg_type msgtype, u_int16_t len) 1341 { 1342 struct bgp_msg *msg; 1343 struct msg_header hdr; 1344 struct ibuf *buf; 1345 int errs = 0; 1346 1347 memset(&hdr.marker, 0xff, sizeof(hdr.marker)); 1348 hdr.len = htons(len); 1349 hdr.type = msgtype; 1350 1351 if ((buf = ibuf_open(len)) == NULL) 1352 return (NULL); 1353 1354 errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker)); 1355 errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len)); 1356 errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type)); 1357 1358 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1359 ibuf_free(buf); 1360 return (NULL); 1361 } 1362 1363 msg->buf = buf; 1364 msg->type = msgtype; 1365 msg->len = len; 1366 1367 return (msg); 1368 } 1369 1370 int 1371 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1372 { 1373 struct mrt *mrt; 1374 1375 LIST_FOREACH(mrt, &mrthead, entry) { 1376 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1377 mrt->type == MRT_UPDATE_OUT))) 1378 continue; 1379 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1380 mrt->peer_id == p->conf.id || (mrt->group_id == 0 && 1381 mrt->group_id == p->conf.groupid)) 1382 mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p); 1383 } 1384 1385 ibuf_close(&p->wbuf, msg->buf); 1386 free(msg); 1387 return (0); 1388 } 1389 1390 void 1391 session_open(struct peer *p) 1392 { 1393 struct bgp_msg *buf; 1394 struct ibuf *opb; 1395 struct msg_open msg; 1396 u_int16_t len; 1397 u_int8_t i, op_type, optparamlen = 0; 1398 int errs = 0; 1399 int mpcapa = 0; 1400 1401 1402 if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - 1403 sizeof(optparamlen))) == NULL) { 1404 bgp_fsm(p, EVNT_CON_FATAL); 1405 return; 1406 } 1407 1408 /* multiprotocol extensions, RFC 4760 */ 1409 for (i = 0; i < AID_MAX; i++) 1410 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1411 errs += session_capa_add(opb, CAPA_MP, 4); 1412 errs += session_capa_add_mp(opb, i); 1413 mpcapa++; 1414 } 1415 1416 /* route refresh, RFC 2918 */ 1417 if (p->capa.ann.refresh) /* no data */ 1418 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1419 1420 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1421 if (p->capa.ann.grestart.restart) { 1422 int rst = 0; 1423 u_int16_t hdr; 1424 u_int8_t grlen; 1425 1426 if (mpcapa) { 1427 grlen = 2 + 4 * mpcapa; 1428 for (i = 0; i < AID_MAX; i++) { 1429 if (p->capa.neg.grestart.flags[i] & 1430 CAPA_GR_RESTARTING) 1431 rst++; 1432 } 1433 } else { /* AID_INET */ 1434 grlen = 2 + 4; 1435 if (p->capa.neg.grestart.flags[AID_INET] & 1436 CAPA_GR_RESTARTING) 1437 rst++; 1438 } 1439 1440 hdr = conf->holdtime; /* default timeout */ 1441 /* if client does graceful restart don't set R flag */ 1442 if (!rst) 1443 hdr |= CAPA_GR_R_FLAG; 1444 hdr = htons(hdr); 1445 1446 errs += session_capa_add(opb, CAPA_RESTART, grlen); 1447 errs += ibuf_add(opb, &hdr, sizeof(hdr)); 1448 1449 if (mpcapa) { 1450 for (i = 0; i < AID_MAX; i++) { 1451 if (p->capa.ann.mp[i]) { 1452 errs += session_capa_add_gr(p, opb, i); 1453 } 1454 } 1455 } else { /* AID_INET */ 1456 errs += session_capa_add_gr(p, opb, AID_INET); 1457 } 1458 } 1459 1460 /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ 1461 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1462 u_int32_t nas; 1463 1464 nas = htonl(conf->as); 1465 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); 1466 errs += ibuf_add(opb, &nas, sizeof(nas)); 1467 } 1468 1469 if (ibuf_size(opb)) 1470 optparamlen = ibuf_size(opb) + sizeof(op_type) + 1471 sizeof(optparamlen); 1472 1473 len = MSGSIZE_OPEN_MIN + optparamlen; 1474 if (errs || (buf = session_newmsg(OPEN, len)) == NULL) { 1475 ibuf_free(opb); 1476 bgp_fsm(p, EVNT_CON_FATAL); 1477 return; 1478 } 1479 1480 msg.version = 4; 1481 msg.myas = htons(conf->short_as); 1482 if (p->conf.holdtime) 1483 msg.holdtime = htons(p->conf.holdtime); 1484 else 1485 msg.holdtime = htons(conf->holdtime); 1486 msg.bgpid = conf->bgpid; /* is already in network byte order */ 1487 msg.optparamlen = optparamlen; 1488 1489 errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version)); 1490 errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas)); 1491 errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); 1492 errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); 1493 errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen)); 1494 1495 if (optparamlen) { 1496 op_type = OPT_PARAM_CAPABILITIES; 1497 optparamlen = ibuf_size(opb); 1498 errs += ibuf_add(buf->buf, &op_type, sizeof(op_type)); 1499 errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen)); 1500 errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); 1501 } 1502 1503 ibuf_free(opb); 1504 1505 if (errs) { 1506 ibuf_free(buf->buf); 1507 free(buf); 1508 bgp_fsm(p, EVNT_CON_FATAL); 1509 return; 1510 } 1511 1512 if (session_sendmsg(buf, p) == -1) { 1513 bgp_fsm(p, EVNT_CON_FATAL); 1514 return; 1515 } 1516 1517 p->stats.msg_sent_open++; 1518 } 1519 1520 void 1521 session_keepalive(struct peer *p) 1522 { 1523 struct bgp_msg *buf; 1524 1525 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1526 session_sendmsg(buf, p) == -1) { 1527 bgp_fsm(p, EVNT_CON_FATAL); 1528 return; 1529 } 1530 1531 start_timer_keepalive(p); 1532 p->stats.msg_sent_keepalive++; 1533 } 1534 1535 void 1536 session_update(u_int32_t peerid, void *data, size_t datalen) 1537 { 1538 struct peer *p; 1539 struct bgp_msg *buf; 1540 1541 if ((p = getpeerbyid(peerid)) == NULL) { 1542 log_warnx("no such peer: id=%u", peerid); 1543 return; 1544 } 1545 1546 if (p->state != STATE_ESTABLISHED) 1547 return; 1548 1549 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) { 1550 bgp_fsm(p, EVNT_CON_FATAL); 1551 return; 1552 } 1553 1554 if (ibuf_add(buf->buf, data, datalen)) { 1555 ibuf_free(buf->buf); 1556 free(buf); 1557 bgp_fsm(p, EVNT_CON_FATAL); 1558 return; 1559 } 1560 1561 if (session_sendmsg(buf, p) == -1) { 1562 bgp_fsm(p, EVNT_CON_FATAL); 1563 return; 1564 } 1565 1566 start_timer_keepalive(p); 1567 p->stats.msg_sent_update++; 1568 } 1569 1570 void 1571 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode, 1572 void *data, ssize_t datalen) 1573 { 1574 struct bgp_msg *buf; 1575 int errs = 0; 1576 1577 if (p->stats.last_sent_errcode) /* some notification already sent */ 1578 return; 1579 1580 log_notification(p, errcode, subcode, data, datalen, "sending"); 1581 1582 if ((buf = session_newmsg(NOTIFICATION, 1583 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1584 bgp_fsm(p, EVNT_CON_FATAL); 1585 return; 1586 } 1587 1588 errs += ibuf_add(buf->buf, &errcode, sizeof(errcode)); 1589 errs += ibuf_add(buf->buf, &subcode, sizeof(subcode)); 1590 1591 if (datalen > 0) 1592 errs += ibuf_add(buf->buf, data, datalen); 1593 1594 if (errs) { 1595 ibuf_free(buf->buf); 1596 free(buf); 1597 bgp_fsm(p, EVNT_CON_FATAL); 1598 return; 1599 } 1600 1601 if (session_sendmsg(buf, p) == -1) { 1602 bgp_fsm(p, EVNT_CON_FATAL); 1603 return; 1604 } 1605 1606 p->stats.msg_sent_notification++; 1607 p->stats.last_sent_errcode = errcode; 1608 p->stats.last_sent_suberr = subcode; 1609 } 1610 1611 int 1612 session_neighbor_rrefresh(struct peer *p) 1613 { 1614 u_int8_t i; 1615 1616 if (!p->capa.peer.refresh) 1617 return (-1); 1618 1619 for (i = 0; i < AID_MAX; i++) { 1620 if (p->capa.peer.mp[i] != 0) 1621 session_rrefresh(p, i); 1622 } 1623 1624 return (0); 1625 } 1626 1627 void 1628 session_rrefresh(struct peer *p, u_int8_t aid) 1629 { 1630 struct bgp_msg *buf; 1631 int errs = 0; 1632 u_int16_t afi; 1633 u_int8_t safi, null8 = 0; 1634 1635 if (aid2afi(aid, &afi, &safi) == -1) 1636 fatalx("session_rrefresh: bad afi/safi pair"); 1637 1638 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1639 bgp_fsm(p, EVNT_CON_FATAL); 1640 return; 1641 } 1642 1643 afi = htons(afi); 1644 errs += ibuf_add(buf->buf, &afi, sizeof(afi)); 1645 errs += ibuf_add(buf->buf, &null8, sizeof(null8)); 1646 errs += ibuf_add(buf->buf, &safi, sizeof(safi)); 1647 1648 if (errs) { 1649 ibuf_free(buf->buf); 1650 free(buf); 1651 bgp_fsm(p, EVNT_CON_FATAL); 1652 return; 1653 } 1654 1655 if (session_sendmsg(buf, p) == -1) { 1656 bgp_fsm(p, EVNT_CON_FATAL); 1657 return; 1658 } 1659 1660 p->stats.msg_sent_rrefresh++; 1661 } 1662 1663 int 1664 session_graceful_restart(struct peer *p) 1665 { 1666 u_int8_t i; 1667 1668 timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); 1669 1670 for (i = 0; i < AID_MAX; i++) { 1671 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1672 if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE, 1673 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 1674 return (-1); 1675 log_peer_warnx(&p->conf, 1676 "graceful restart of %s, keeping routes", 1677 aid2str(i)); 1678 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1679 } else if (p->capa.neg.mp[i]) { 1680 if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, 1681 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 1682 return (-1); 1683 log_peer_warnx(&p->conf, 1684 "graceful restart of %s, flushing routes", 1685 aid2str(i)); 1686 } 1687 } 1688 return (0); 1689 } 1690 1691 int 1692 session_graceful_stop(struct peer *p) 1693 { 1694 u_int8_t i; 1695 1696 for (i = 0; i < AID_MAX; i++) { 1697 /* 1698 * Only flush if the peer is restarting and the timeout fired. 1699 * In all other cases the session was already flushed when the 1700 * session went down or when the new open message was parsed. 1701 */ 1702 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1703 log_peer_warnx(&p->conf, "graceful restart of %s, " 1704 "time-out, flushing", aid2str(i)); 1705 if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, 1706 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 1707 return (-1); 1708 } 1709 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1710 } 1711 return (0); 1712 } 1713 1714 int 1715 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1716 { 1717 ssize_t n; 1718 socklen_t len; 1719 int error; 1720 1721 if (p->state == STATE_CONNECT) { 1722 if (pfd->revents & POLLOUT) { 1723 if (pfd->revents & POLLIN) { 1724 /* error occurred */ 1725 len = sizeof(error); 1726 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1727 &error, &len) == -1 || error) { 1728 if (error) 1729 errno = error; 1730 if (errno != p->lasterr) { 1731 log_peer_warn(&p->conf, 1732 "socket error"); 1733 p->lasterr = errno; 1734 } 1735 bgp_fsm(p, EVNT_CON_OPENFAIL); 1736 return (1); 1737 } 1738 } 1739 bgp_fsm(p, EVNT_CON_OPEN); 1740 return (1); 1741 } 1742 if (pfd->revents & POLLHUP) { 1743 bgp_fsm(p, EVNT_CON_OPENFAIL); 1744 return (1); 1745 } 1746 if (pfd->revents & (POLLERR|POLLNVAL)) { 1747 bgp_fsm(p, EVNT_CON_FATAL); 1748 return (1); 1749 } 1750 return (0); 1751 } 1752 1753 if (pfd->revents & POLLHUP) { 1754 bgp_fsm(p, EVNT_CON_CLOSED); 1755 return (1); 1756 } 1757 if (pfd->revents & (POLLERR|POLLNVAL)) { 1758 bgp_fsm(p, EVNT_CON_FATAL); 1759 return (1); 1760 } 1761 1762 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1763 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1764 if (error == 0) 1765 log_peer_warnx(&p->conf, "Connection closed"); 1766 else if (error == -1) 1767 log_peer_warn(&p->conf, "write error"); 1768 bgp_fsm(p, EVNT_CON_FATAL); 1769 return (1); 1770 } 1771 if (!(pfd->revents & POLLIN)) 1772 return (1); 1773 } 1774 1775 if (p->rbuf && pfd->revents & POLLIN) { 1776 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1777 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1778 if (errno != EINTR && errno != EAGAIN) { 1779 log_peer_warn(&p->conf, "read error"); 1780 bgp_fsm(p, EVNT_CON_FATAL); 1781 } 1782 return (1); 1783 } 1784 if (n == 0) { /* connection closed */ 1785 bgp_fsm(p, EVNT_CON_CLOSED); 1786 return (1); 1787 } 1788 1789 p->rbuf->wpos += n; 1790 p->stats.last_read = time(NULL); 1791 return (1); 1792 } 1793 return (0); 1794 } 1795 1796 int 1797 session_process_msg(struct peer *p) 1798 { 1799 ssize_t rpos, av, left; 1800 int processed = 0; 1801 u_int16_t msglen; 1802 u_int8_t msgtype; 1803 1804 rpos = 0; 1805 av = p->rbuf->wpos; 1806 1807 /* 1808 * session might drop to IDLE -> buffers deallocated 1809 * we MUST check rbuf != NULL before use 1810 */ 1811 for (;;) { 1812 if (rpos + MSGSIZE_HEADER > av) 1813 break; 1814 if (p->rbuf == NULL) 1815 break; 1816 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 1817 &msgtype) == -1) 1818 return (0); 1819 if (rpos + msglen > av) 1820 break; 1821 p->rbuf->rptr = p->rbuf->buf + rpos; 1822 1823 switch (msgtype) { 1824 case OPEN: 1825 bgp_fsm(p, EVNT_RCVD_OPEN); 1826 p->stats.msg_rcvd_open++; 1827 break; 1828 case UPDATE: 1829 bgp_fsm(p, EVNT_RCVD_UPDATE); 1830 p->stats.msg_rcvd_update++; 1831 break; 1832 case NOTIFICATION: 1833 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 1834 p->stats.msg_rcvd_notification++; 1835 break; 1836 case KEEPALIVE: 1837 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 1838 p->stats.msg_rcvd_keepalive++; 1839 break; 1840 case RREFRESH: 1841 parse_refresh(p); 1842 p->stats.msg_rcvd_rrefresh++; 1843 break; 1844 default: /* cannot happen */ 1845 session_notification(p, ERR_HEADER, ERR_HDR_TYPE, 1846 &msgtype, 1); 1847 log_warnx("received message with unknown type %u", 1848 msgtype); 1849 bgp_fsm(p, EVNT_CON_FATAL); 1850 } 1851 rpos += msglen; 1852 if (++processed > MSG_PROCESS_LIMIT) 1853 break; 1854 } 1855 if (p->rbuf == NULL) 1856 return (1); 1857 1858 if (rpos < av) { 1859 left = av - rpos; 1860 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 1861 p->rbuf->wpos = left; 1862 } else 1863 p->rbuf->wpos = 0; 1864 1865 return (1); 1866 } 1867 1868 int 1869 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type) 1870 { 1871 struct mrt *mrt; 1872 u_char *p; 1873 u_int16_t olen; 1874 static const u_int8_t marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff, 1875 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 1876 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 1877 1878 /* caller MUST make sure we are getting 19 bytes! */ 1879 p = data; 1880 if (memcmp(p, marker, sizeof(marker))) { 1881 log_peer_warnx(&peer->conf, "sync error"); 1882 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0); 1883 bgp_fsm(peer, EVNT_CON_FATAL); 1884 return (-1); 1885 } 1886 p += MSGSIZE_HEADER_MARKER; 1887 1888 memcpy(&olen, p, 2); 1889 *len = ntohs(olen); 1890 p += 2; 1891 memcpy(type, p, 1); 1892 1893 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 1894 log_peer_warnx(&peer->conf, 1895 "received message: illegal length: %u byte", *len); 1896 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1897 &olen, sizeof(olen)); 1898 bgp_fsm(peer, EVNT_CON_FATAL); 1899 return (-1); 1900 } 1901 1902 switch (*type) { 1903 case OPEN: 1904 if (*len < MSGSIZE_OPEN_MIN) { 1905 log_peer_warnx(&peer->conf, 1906 "received OPEN: illegal len: %u byte", *len); 1907 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1908 &olen, sizeof(olen)); 1909 bgp_fsm(peer, EVNT_CON_FATAL); 1910 return (-1); 1911 } 1912 break; 1913 case NOTIFICATION: 1914 if (*len < MSGSIZE_NOTIFICATION_MIN) { 1915 log_peer_warnx(&peer->conf, 1916 "received NOTIFICATION: illegal len: %u byte", 1917 *len); 1918 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1919 &olen, sizeof(olen)); 1920 bgp_fsm(peer, EVNT_CON_FATAL); 1921 return (-1); 1922 } 1923 break; 1924 case UPDATE: 1925 if (*len < MSGSIZE_UPDATE_MIN) { 1926 log_peer_warnx(&peer->conf, 1927 "received UPDATE: illegal len: %u byte", *len); 1928 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1929 &olen, sizeof(olen)); 1930 bgp_fsm(peer, EVNT_CON_FATAL); 1931 return (-1); 1932 } 1933 break; 1934 case KEEPALIVE: 1935 if (*len != MSGSIZE_KEEPALIVE) { 1936 log_peer_warnx(&peer->conf, 1937 "received KEEPALIVE: illegal len: %u byte", *len); 1938 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1939 &olen, sizeof(olen)); 1940 bgp_fsm(peer, EVNT_CON_FATAL); 1941 return (-1); 1942 } 1943 break; 1944 case RREFRESH: 1945 if (*len != MSGSIZE_RREFRESH) { 1946 log_peer_warnx(&peer->conf, 1947 "received RREFRESH: illegal len: %u byte", *len); 1948 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1949 &olen, sizeof(olen)); 1950 bgp_fsm(peer, EVNT_CON_FATAL); 1951 return (-1); 1952 } 1953 break; 1954 default: 1955 log_peer_warnx(&peer->conf, 1956 "received msg with unknown type %u", *type); 1957 session_notification(peer, ERR_HEADER, ERR_HDR_TYPE, 1958 type, 1); 1959 bgp_fsm(peer, EVNT_CON_FATAL); 1960 return (-1); 1961 } 1962 LIST_FOREACH(mrt, &mrthead, entry) { 1963 if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE && 1964 mrt->type == MRT_UPDATE_IN))) 1965 continue; 1966 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1967 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 1968 mrt->group_id == peer->conf.groupid)) 1969 mrt_dump_bgp_msg(mrt, data, *len, peer); 1970 } 1971 return (0); 1972 } 1973 1974 int 1975 parse_open(struct peer *peer) 1976 { 1977 u_char *p, *op_val; 1978 u_int8_t version, rversion; 1979 u_int16_t short_as, msglen; 1980 u_int16_t holdtime, oholdtime, myholdtime; 1981 u_int32_t as, bgpid; 1982 u_int8_t optparamlen, plen; 1983 u_int8_t op_type, op_len; 1984 1985 p = peer->rbuf->rptr; 1986 p += MSGSIZE_HEADER_MARKER; 1987 memcpy(&msglen, p, sizeof(msglen)); 1988 msglen = ntohs(msglen); 1989 1990 p = peer->rbuf->rptr; 1991 p += MSGSIZE_HEADER; /* header is already checked */ 1992 1993 memcpy(&version, p, sizeof(version)); 1994 p += sizeof(version); 1995 1996 if (version != BGP_VERSION) { 1997 log_peer_warnx(&peer->conf, 1998 "peer wants unrecognized version %u", version); 1999 if (version > BGP_VERSION) 2000 rversion = version - BGP_VERSION; 2001 else 2002 rversion = BGP_VERSION; 2003 session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION, 2004 &rversion, sizeof(rversion)); 2005 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2006 return (-1); 2007 } 2008 2009 memcpy(&short_as, p, sizeof(short_as)); 2010 p += sizeof(short_as); 2011 as = peer->short_as = ntohs(short_as); 2012 2013 memcpy(&oholdtime, p, sizeof(oholdtime)); 2014 p += sizeof(oholdtime); 2015 2016 holdtime = ntohs(oholdtime); 2017 if (holdtime && holdtime < peer->conf.min_holdtime) { 2018 log_peer_warnx(&peer->conf, 2019 "peer requests unacceptable holdtime %u", holdtime); 2020 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, 2021 NULL, 0); 2022 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2023 return (-1); 2024 } 2025 2026 myholdtime = peer->conf.holdtime; 2027 if (!myholdtime) 2028 myholdtime = conf->holdtime; 2029 if (holdtime < myholdtime) 2030 peer->holdtime = holdtime; 2031 else 2032 peer->holdtime = myholdtime; 2033 2034 memcpy(&bgpid, p, sizeof(bgpid)); 2035 p += sizeof(bgpid); 2036 2037 /* check bgpid for validity - just disallow 0 */ 2038 if (ntohl(bgpid) == 0) { 2039 log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable", 2040 ntohl(bgpid)); 2041 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2042 NULL, 0); 2043 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2044 return (-1); 2045 } 2046 peer->remote_bgpid = bgpid; 2047 2048 memcpy(&optparamlen, p, sizeof(optparamlen)); 2049 p += sizeof(optparamlen); 2050 2051 if (optparamlen != msglen - MSGSIZE_OPEN_MIN) { 2052 log_peer_warnx(&peer->conf, 2053 "corrupt OPEN message received: length mismatch"); 2054 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2055 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2056 return (-1); 2057 } 2058 2059 plen = optparamlen; 2060 while (plen > 0) { 2061 if (plen < 2) { 2062 log_peer_warnx(&peer->conf, 2063 "corrupt OPEN message received, len wrong"); 2064 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2065 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2066 return (-1); 2067 } 2068 memcpy(&op_type, p, sizeof(op_type)); 2069 p += sizeof(op_type); 2070 plen -= sizeof(op_type); 2071 memcpy(&op_len, p, sizeof(op_len)); 2072 p += sizeof(op_len); 2073 plen -= sizeof(op_len); 2074 if (op_len > 0) { 2075 if (plen < op_len) { 2076 log_peer_warnx(&peer->conf, 2077 "corrupt OPEN message received, len wrong"); 2078 session_notification(peer, ERR_OPEN, 0, 2079 NULL, 0); 2080 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2081 return (-1); 2082 } 2083 op_val = p; 2084 p += op_len; 2085 plen -= op_len; 2086 } else 2087 op_val = NULL; 2088 2089 switch (op_type) { 2090 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2091 if (parse_capabilities(peer, op_val, op_len, 2092 &as) == -1) { 2093 session_notification(peer, ERR_OPEN, 0, 2094 NULL, 0); 2095 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2096 return (-1); 2097 } 2098 break; 2099 case OPT_PARAM_AUTH: /* deprecated */ 2100 default: 2101 /* 2102 * unsupported type 2103 * the RFCs tell us to leave the data section empty 2104 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT. 2105 * How the peer should know _which_ optional parameter 2106 * we don't support is beyond me. 2107 */ 2108 log_peer_warnx(&peer->conf, 2109 "received OPEN message with unsupported optional " 2110 "parameter: type %u", op_type); 2111 session_notification(peer, ERR_OPEN, ERR_OPEN_OPT, 2112 NULL, 0); 2113 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2114 timer_set(peer, Timer_IdleHold, 0); /* no punish */ 2115 peer->IdleHoldTime /= 2; 2116 return (-1); 2117 } 2118 } 2119 2120 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2121 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2122 peer->conf.remote_as = as; 2123 peer->conf.ebgp = (peer->conf.remote_as != conf->as); 2124 if (!peer->conf.ebgp) 2125 /* force enforce_as off for iBGP sessions */ 2126 peer->conf.enforce_as = ENFORCE_AS_OFF; 2127 } 2128 2129 if (peer->conf.remote_as != as) { 2130 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2131 log_as(as)); 2132 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0); 2133 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2134 return (-1); 2135 } 2136 2137 if (capa_neg_calc(peer) == -1) { 2138 log_peer_warnx(&peer->conf, 2139 "capability negotiation calculation failed"); 2140 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2141 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2142 return (-1); 2143 } 2144 2145 return (0); 2146 } 2147 2148 int 2149 parse_update(struct peer *peer) 2150 { 2151 u_char *p; 2152 u_int16_t datalen; 2153 2154 /* 2155 * we pass the message verbatim to the rde. 2156 * in case of errors the whole session is reset with a 2157 * notification anyway, we only need to know the peer 2158 */ 2159 p = peer->rbuf->rptr; 2160 p += MSGSIZE_HEADER_MARKER; 2161 memcpy(&datalen, p, sizeof(datalen)); 2162 datalen = ntohs(datalen); 2163 2164 p = peer->rbuf->rptr; 2165 p += MSGSIZE_HEADER; /* header is already checked */ 2166 datalen -= MSGSIZE_HEADER; 2167 2168 if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p, 2169 datalen) == -1) 2170 return (-1); 2171 2172 return (0); 2173 } 2174 2175 int 2176 parse_refresh(struct peer *peer) 2177 { 2178 u_char *p; 2179 u_int16_t afi; 2180 u_int8_t aid, safi; 2181 2182 p = peer->rbuf->rptr; 2183 p += MSGSIZE_HEADER; /* header is already checked */ 2184 2185 /* 2186 * We could check if we actually announced the capability but 2187 * as long as the message is correctly encoded we don't care. 2188 */ 2189 2190 /* afi, 2 byte */ 2191 memcpy(&afi, p, sizeof(afi)); 2192 afi = ntohs(afi); 2193 p += 2; 2194 /* reserved, 1 byte */ 2195 p += 1; 2196 /* safi, 1 byte */ 2197 memcpy(&safi, p, sizeof(safi)); 2198 2199 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2200 if (afi2aid(afi, safi, &aid) == -1) { 2201 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2202 "invalid afi/safi pair"); 2203 return (0); 2204 } 2205 2206 if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid, 2207 sizeof(aid)) == -1) 2208 return (-1); 2209 2210 return (0); 2211 } 2212 2213 int 2214 parse_notification(struct peer *peer) 2215 { 2216 u_char *p; 2217 u_int16_t datalen; 2218 u_int8_t errcode; 2219 u_int8_t subcode; 2220 u_int8_t capa_code; 2221 u_int8_t capa_len; 2222 u_int8_t i; 2223 2224 /* just log */ 2225 p = peer->rbuf->rptr; 2226 p += MSGSIZE_HEADER_MARKER; 2227 memcpy(&datalen, p, sizeof(datalen)); 2228 datalen = ntohs(datalen); 2229 2230 p = peer->rbuf->rptr; 2231 p += MSGSIZE_HEADER; /* header is already checked */ 2232 datalen -= MSGSIZE_HEADER; 2233 2234 memcpy(&errcode, p, sizeof(errcode)); 2235 p += sizeof(errcode); 2236 datalen -= sizeof(errcode); 2237 2238 memcpy(&subcode, p, sizeof(subcode)); 2239 p += sizeof(subcode); 2240 datalen -= sizeof(subcode); 2241 2242 log_notification(peer, errcode, subcode, p, datalen, "received"); 2243 peer->errcnt++; 2244 2245 if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { 2246 if (datalen == 0) { /* zebra likes to send those.. humbug */ 2247 log_peer_warnx(&peer->conf, "received \"unsupported " 2248 "capability\" notification without data part, " 2249 "disabling capability announcements altogether"); 2250 session_capa_ann_none(peer); 2251 } 2252 2253 while (datalen > 0) { 2254 if (datalen < 2) { 2255 log_peer_warnx(&peer->conf, 2256 "parse_notification: " 2257 "expect len >= 2, len is %u", datalen); 2258 return (-1); 2259 } 2260 memcpy(&capa_code, p, sizeof(capa_code)); 2261 p += sizeof(capa_code); 2262 datalen -= sizeof(capa_code); 2263 memcpy(&capa_len, p, sizeof(capa_len)); 2264 p += sizeof(capa_len); 2265 datalen -= sizeof(capa_len); 2266 if (datalen < capa_len) { 2267 log_peer_warnx(&peer->conf, 2268 "parse_notification: capa_len %u exceeds " 2269 "remaining msg length %u", capa_len, 2270 datalen); 2271 return (-1); 2272 } 2273 p += capa_len; 2274 datalen -= capa_len; 2275 switch (capa_code) { 2276 case CAPA_MP: 2277 for (i = 0; i < AID_MAX; i++) 2278 peer->capa.ann.mp[i] = 0; 2279 log_peer_warnx(&peer->conf, 2280 "disabling multiprotocol capability"); 2281 break; 2282 case CAPA_REFRESH: 2283 peer->capa.ann.refresh = 0; 2284 log_peer_warnx(&peer->conf, 2285 "disabling route refresh capability"); 2286 break; 2287 case CAPA_RESTART: 2288 peer->capa.ann.grestart.restart = 0; 2289 log_peer_warnx(&peer->conf, 2290 "disabling restart capability"); 2291 break; 2292 case CAPA_AS4BYTE: 2293 peer->capa.ann.as4byte = 0; 2294 log_peer_warnx(&peer->conf, 2295 "disabling 4-byte AS num capability"); 2296 break; 2297 default: /* should not happen... */ 2298 log_peer_warnx(&peer->conf, "received " 2299 "\"unsupported capability\" notification " 2300 "for unknown capability %u, disabling " 2301 "capability announcements altogether", 2302 capa_code); 2303 session_capa_ann_none(peer); 2304 break; 2305 } 2306 } 2307 2308 return (1); 2309 } 2310 2311 if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) { 2312 session_capa_ann_none(peer); 2313 return (1); 2314 } 2315 2316 return (0); 2317 } 2318 2319 int 2320 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) 2321 { 2322 u_char *capa_val; 2323 u_int32_t remote_as; 2324 u_int16_t len; 2325 u_int16_t afi; 2326 u_int16_t gr_header; 2327 u_int8_t safi; 2328 u_int8_t aid; 2329 u_int8_t gr_flags; 2330 u_int8_t capa_code; 2331 u_int8_t capa_len; 2332 u_int8_t i; 2333 2334 len = dlen; 2335 while (len > 0) { 2336 if (len < 2) { 2337 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2338 "length: %u, too short", len); 2339 return (-1); 2340 } 2341 memcpy(&capa_code, d, sizeof(capa_code)); 2342 d += sizeof(capa_code); 2343 len -= sizeof(capa_code); 2344 memcpy(&capa_len, d, sizeof(capa_len)); 2345 d += sizeof(capa_len); 2346 len -= sizeof(capa_len); 2347 if (capa_len > 0) { 2348 if (len < capa_len) { 2349 log_peer_warnx(&peer->conf, 2350 "Bad capabilities attr length: " 2351 "len %u smaller than capa_len %u", 2352 len, capa_len); 2353 return (-1); 2354 } 2355 capa_val = d; 2356 d += capa_len; 2357 len -= capa_len; 2358 } else 2359 capa_val = NULL; 2360 2361 switch (capa_code) { 2362 case CAPA_MP: /* RFC 4760 */ 2363 if (capa_len != 4) { 2364 log_peer_warnx(&peer->conf, 2365 "Bad multi protocol capability length: " 2366 "%u", capa_len); 2367 break; 2368 } 2369 memcpy(&afi, capa_val, sizeof(afi)); 2370 afi = ntohs(afi); 2371 memcpy(&safi, capa_val + 3, sizeof(safi)); 2372 if (afi2aid(afi, safi, &aid) == -1) { 2373 log_peer_warnx(&peer->conf, 2374 "Received multi protocol capability: " 2375 " unknown AFI %u, safi %u pair", 2376 afi, safi); 2377 break; 2378 } 2379 peer->capa.peer.mp[aid] = 1; 2380 break; 2381 case CAPA_REFRESH: 2382 peer->capa.peer.refresh = 1; 2383 break; 2384 case CAPA_RESTART: 2385 if (capa_len == 2) { 2386 /* peer only supports EoR marker */ 2387 peer->capa.peer.grestart.restart = 1; 2388 peer->capa.peer.grestart.timeout = 0; 2389 break; 2390 } else if (capa_len % 4 != 2) { 2391 log_peer_warnx(&peer->conf, 2392 "Bad graceful restart capability length: " 2393 "%u", capa_len); 2394 peer->capa.peer.grestart.restart = 0; 2395 peer->capa.peer.grestart.timeout = 0; 2396 break; 2397 } 2398 2399 memcpy(&gr_header, capa_val, sizeof(gr_header)); 2400 gr_header = ntohs(gr_header); 2401 peer->capa.peer.grestart.timeout = 2402 gr_header & CAPA_GR_TIMEMASK; 2403 if (peer->capa.peer.grestart.timeout == 0) { 2404 log_peer_warnx(&peer->conf, "Received " 2405 "graceful restart timeout is zero"); 2406 peer->capa.peer.grestart.restart = 0; 2407 break; 2408 } 2409 2410 for (i = 2; i <= capa_len - 4; i += 4) { 2411 memcpy(&afi, capa_val + i, sizeof(afi)); 2412 afi = ntohs(afi); 2413 memcpy(&safi, capa_val + i + 2, sizeof(safi)); 2414 if (afi2aid(afi, safi, &aid) == -1) { 2415 log_peer_warnx(&peer->conf, 2416 "Received graceful restart capa: " 2417 " unknown AFI %u, safi %u pair", 2418 afi, safi); 2419 continue; 2420 } 2421 memcpy(&gr_flags, capa_val + i + 3, 2422 sizeof(gr_flags)); 2423 peer->capa.peer.grestart.flags[aid] |= 2424 CAPA_GR_PRESENT; 2425 if (gr_flags & CAPA_GR_F_FLAG) 2426 peer->capa.peer.grestart.flags[aid] |= 2427 CAPA_GR_FORWARD; 2428 if (gr_header & CAPA_GR_R_FLAG) 2429 peer->capa.peer.grestart.flags[aid] |= 2430 CAPA_GR_RESTART; 2431 peer->capa.peer.grestart.restart = 2; 2432 } 2433 break; 2434 case CAPA_AS4BYTE: 2435 if (capa_len != 4) { 2436 log_peer_warnx(&peer->conf, 2437 "Bad AS4BYTE capability length: " 2438 "%u", capa_len); 2439 peer->capa.peer.as4byte = 0; 2440 break; 2441 } 2442 memcpy(&remote_as, capa_val, sizeof(remote_as)); 2443 *as = ntohl(remote_as); 2444 peer->capa.peer.as4byte = 1; 2445 break; 2446 default: 2447 break; 2448 } 2449 } 2450 2451 return (0); 2452 } 2453 2454 int 2455 capa_neg_calc(struct peer *p) 2456 { 2457 u_int8_t i, hasmp = 0; 2458 2459 /* refresh: does not realy matter here, use peer setting */ 2460 p->capa.neg.refresh = p->capa.peer.refresh; 2461 2462 /* as4byte: both side must announce capability */ 2463 if (p->capa.ann.as4byte && p->capa.peer.as4byte) 2464 p->capa.neg.as4byte = 1; 2465 else 2466 p->capa.neg.as4byte = 0; 2467 2468 /* MP: both side must announce capability */ 2469 for (i = 0; i < AID_MAX; i++) { 2470 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) { 2471 p->capa.neg.mp[i] = 1; 2472 hasmp = 1; 2473 } else 2474 p->capa.neg.mp[i] = 0; 2475 } 2476 /* if no MP capability present default to IPv4 unicast mode */ 2477 if (!hasmp) 2478 p->capa.neg.mp[AID_INET] = 1; 2479 2480 /* 2481 * graceful restart: only the peer capabilities are of interest here. 2482 * It is necessary to compare the new values with the previous ones 2483 * and act acordingly. AFI/SAFI that are not part in the MP capability 2484 * are treated as not being present. 2485 */ 2486 2487 for (i = 0; i < AID_MAX; i++) { 2488 int8_t negflags; 2489 2490 /* disable GR if the AFI/SAFI is not present */ 2491 if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2492 p->capa.neg.mp[i] == 0) 2493 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2494 /* look at current GR state and decide what to do */ 2495 negflags = p->capa.neg.grestart.flags[i]; 2496 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2497 if (negflags & CAPA_GR_RESTARTING) { 2498 if (!(p->capa.peer.grestart.flags[i] & 2499 CAPA_GR_FORWARD)) { 2500 if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, 2501 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 2502 return (-1); 2503 log_peer_warnx(&p->conf, "graceful restart of " 2504 "%s, not restarted, flushing", aid2str(i)); 2505 } else 2506 p->capa.neg.grestart.flags[i] |= 2507 CAPA_GR_RESTARTING; 2508 } 2509 } 2510 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2511 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2512 2513 return (0); 2514 } 2515 2516 void 2517 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) 2518 { 2519 struct imsg imsg; 2520 struct mrt xmrt; 2521 struct mrt *mrt; 2522 struct imsgbuf *i; 2523 struct peer_config *pconf; 2524 struct peer *p, *next; 2525 struct listen_addr *la, *nla; 2526 struct kif *kif; 2527 u_char *data; 2528 enum reconf_action reconf; 2529 int n, fd, depend_ok, restricted; 2530 u_int8_t aid, errcode, subcode; 2531 2532 while (ibuf) { 2533 if ((n = imsg_get(ibuf, &imsg)) == -1) 2534 fatal("session_dispatch_imsg: imsg_get error"); 2535 2536 if (n == 0) 2537 break; 2538 2539 switch (imsg.hdr.type) { 2540 case IMSG_SOCKET_CONN: 2541 case IMSG_SOCKET_CONN_CTL: 2542 if (idx != PFD_PIPE_MAIN) 2543 fatalx("reconf request not from parent"); 2544 if ((fd = imsg.fd) == -1) { 2545 log_warnx("expected to receive imsg fd to " 2546 "RDE but didn't receive any"); 2547 break; 2548 } 2549 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2550 fatal(NULL); 2551 imsg_init(i, fd); 2552 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 2553 if (ibuf_rde) { 2554 log_warnx("Unexpected imsg connection " 2555 "to RDE received"); 2556 msgbuf_clear(&ibuf_rde->w); 2557 free(ibuf_rde); 2558 } 2559 ibuf_rde = i; 2560 } else { 2561 if (ibuf_rde_ctl) { 2562 log_warnx("Unexpected imsg ctl " 2563 "connection to RDE received"); 2564 msgbuf_clear(&ibuf_rde_ctl->w); 2565 free(ibuf_rde_ctl); 2566 } 2567 ibuf_rde_ctl = i; 2568 } 2569 break; 2570 case IMSG_RECONF_CONF: 2571 if (idx != PFD_PIPE_MAIN) 2572 fatalx("reconf request not from parent"); 2573 if ((nconf = malloc(sizeof(struct bgpd_config))) == 2574 NULL) 2575 fatal(NULL); 2576 memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); 2577 if ((nconf->listen_addrs = calloc(1, 2578 sizeof(struct listen_addrs))) == NULL) 2579 fatal(NULL); 2580 TAILQ_INIT(nconf->listen_addrs); 2581 npeers = NULL; 2582 init_conf(nconf); 2583 pending_reconf = 1; 2584 break; 2585 case IMSG_RECONF_PEER: 2586 if (idx != PFD_PIPE_MAIN) 2587 fatalx("reconf request not from parent"); 2588 pconf = imsg.data; 2589 p = getpeerbyaddr(&pconf->remote_addr); 2590 if (p == NULL) { 2591 if ((p = calloc(1, sizeof(struct peer))) == 2592 NULL) 2593 fatal("new_peer"); 2594 p->state = p->prev_state = STATE_NONE; 2595 p->next = npeers; 2596 npeers = p; 2597 reconf = RECONF_REINIT; 2598 } else 2599 reconf = RECONF_KEEP; 2600 2601 memcpy(&p->conf, pconf, sizeof(struct peer_config)); 2602 p->conf.reconf_action = reconf; 2603 2604 /* sync the RDE in case we keep the peer */ 2605 if (reconf == RECONF_KEEP) { 2606 if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, 2607 p->conf.id, 0, -1, &p->conf, 2608 sizeof(struct peer_config)) == -1) 2609 fatalx("imsg_compose error"); 2610 if (p->conf.template) { 2611 /* apply the conf to all clones */ 2612 struct peer *np; 2613 for (np = peers; np; np = np->next) { 2614 if (np->template != p) 2615 continue; 2616 session_template_clone(np, 2617 NULL, np->conf.id, 2618 np->conf.remote_as); 2619 if (imsg_compose(ibuf_rde, 2620 IMSG_SESSION_ADD, 2621 np->conf.id, 0, -1, 2622 &np->conf, 2623 sizeof(struct peer_config)) 2624 == -1) 2625 fatalx("imsg_compose error"); 2626 } 2627 } 2628 } 2629 break; 2630 case IMSG_RECONF_LISTENER: 2631 if (idx != PFD_PIPE_MAIN) 2632 fatalx("reconf request not from parent"); 2633 if (nconf == NULL) 2634 fatalx("IMSG_RECONF_LISTENER but no config"); 2635 nla = imsg.data; 2636 TAILQ_FOREACH(la, conf->listen_addrs, entry) 2637 if (!la_cmp(la, nla)) 2638 break; 2639 2640 if (la == NULL) { 2641 if (nla->reconf != RECONF_REINIT) 2642 fatalx("king bula sez: " 2643 "expected REINIT"); 2644 2645 if ((nla->fd = imsg.fd) == -1) 2646 log_warnx("expected to receive fd for " 2647 "%s but didn't receive any", 2648 log_sockaddr((struct sockaddr *) 2649 &nla->sa)); 2650 2651 la = calloc(1, sizeof(struct listen_addr)); 2652 if (la == NULL) 2653 fatal(NULL); 2654 memcpy(&la->sa, &nla->sa, sizeof(la->sa)); 2655 la->flags = nla->flags; 2656 la->fd = nla->fd; 2657 la->reconf = RECONF_REINIT; 2658 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 2659 entry); 2660 } else { 2661 if (nla->reconf != RECONF_KEEP) 2662 fatalx("king bula sez: expected KEEP"); 2663 la->reconf = RECONF_KEEP; 2664 } 2665 2666 break; 2667 case IMSG_RECONF_CTRL: 2668 if (idx != PFD_PIPE_MAIN) 2669 fatalx("reconf request not from parent"); 2670 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2671 sizeof(restricted)) 2672 fatalx("IFINFO imsg with wrong len"); 2673 memcpy(&restricted, imsg.data, sizeof(restricted)); 2674 if (imsg.fd == -1) { 2675 log_warnx("expected to receive fd for control " 2676 "socket but didn't receive any"); 2677 break; 2678 } 2679 if (restricted) { 2680 control_shutdown(rcsock); 2681 rcsock = imsg.fd; 2682 } else { 2683 control_shutdown(csock); 2684 csock = imsg.fd; 2685 } 2686 break; 2687 case IMSG_RECONF_DONE: 2688 if (idx != PFD_PIPE_MAIN) 2689 fatalx("reconf request not from parent"); 2690 if (nconf == NULL) 2691 fatalx("got IMSG_RECONF_DONE but no config"); 2692 conf->flags = nconf->flags; 2693 conf->log = nconf->log; 2694 conf->bgpid = nconf->bgpid; 2695 conf->clusterid = nconf->clusterid; 2696 conf->as = nconf->as; 2697 conf->short_as = nconf->short_as; 2698 conf->holdtime = nconf->holdtime; 2699 conf->min_holdtime = nconf->min_holdtime; 2700 conf->connectretry = nconf->connectretry; 2701 2702 /* add new peers */ 2703 for (p = npeers; p != NULL; p = next) { 2704 next = p->next; 2705 p->next = peers; 2706 peers = p; 2707 } 2708 /* find ones that need attention */ 2709 for (p = peers; p != NULL; p = p->next) { 2710 /* needs to be deleted? */ 2711 if (p->conf.reconf_action == RECONF_NONE && 2712 !p->template) 2713 p->conf.reconf_action = RECONF_DELETE; 2714 /* had demotion, is demoted, demote removed? */ 2715 if (p->demoted && !p->conf.demote_group[0]) 2716 session_demote(p, -1); 2717 } 2718 2719 /* delete old listeners */ 2720 for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; 2721 la = nla) { 2722 nla = TAILQ_NEXT(la, entry); 2723 if (la->reconf == RECONF_NONE) { 2724 log_info("not listening on %s any more", 2725 log_sockaddr( 2726 (struct sockaddr *)&la->sa)); 2727 TAILQ_REMOVE(conf->listen_addrs, la, 2728 entry); 2729 close(la->fd); 2730 free(la); 2731 } 2732 } 2733 2734 /* add new listeners */ 2735 while ((la = TAILQ_FIRST(nconf->listen_addrs)) != 2736 NULL) { 2737 TAILQ_REMOVE(nconf->listen_addrs, la, entry); 2738 TAILQ_INSERT_TAIL(conf->listen_addrs, la, 2739 entry); 2740 } 2741 2742 setup_listeners(listener_cnt); 2743 free(nconf->listen_addrs); 2744 free(nconf); 2745 nconf = NULL; 2746 pending_reconf = 0; 2747 log_info("SE reconfigured"); 2748 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 2749 -1, NULL, 0); 2750 break; 2751 case IMSG_IFINFO: 2752 if (idx != PFD_PIPE_MAIN) 2753 fatalx("IFINFO message not from parent"); 2754 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2755 sizeof(struct kif)) 2756 fatalx("IFINFO imsg with wrong len"); 2757 kif = imsg.data; 2758 depend_ok = (kif->flags & IFF_UP) && 2759 LINK_STATE_IS_UP(kif->link_state); 2760 2761 for (p = peers; p != NULL; p = p->next) 2762 if (!strcmp(p->conf.if_depend, kif->ifname)) { 2763 if (depend_ok && !p->depend_ok) { 2764 p->depend_ok = depend_ok; 2765 bgp_fsm(p, EVNT_START); 2766 } else if (!depend_ok && p->depend_ok) { 2767 p->depend_ok = depend_ok; 2768 session_stop(p, 2769 ERR_CEASE_OTHER_CHANGE); 2770 } 2771 } 2772 break; 2773 case IMSG_MRT_OPEN: 2774 case IMSG_MRT_REOPEN: 2775 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2776 sizeof(struct mrt)) { 2777 log_warnx("wrong imsg len"); 2778 break; 2779 } 2780 2781 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2782 if ((xmrt.wbuf.fd = imsg.fd) == -1) 2783 log_warnx("expected to receive fd for mrt dump " 2784 "but didn't receive any"); 2785 2786 mrt = mrt_get(&mrthead, &xmrt); 2787 if (mrt == NULL) { 2788 /* new dump */ 2789 mrt = calloc(1, sizeof(struct mrt)); 2790 if (mrt == NULL) 2791 fatal("session_dispatch_imsg"); 2792 memcpy(mrt, &xmrt, sizeof(struct mrt)); 2793 TAILQ_INIT(&mrt->wbuf.bufs); 2794 LIST_INSERT_HEAD(&mrthead, mrt, entry); 2795 } else { 2796 /* old dump reopened */ 2797 close(mrt->wbuf.fd); 2798 mrt->wbuf.fd = xmrt.wbuf.fd; 2799 } 2800 break; 2801 case IMSG_MRT_CLOSE: 2802 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2803 sizeof(struct mrt)) { 2804 log_warnx("wrong imsg len"); 2805 break; 2806 } 2807 2808 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2809 mrt = mrt_get(&mrthead, &xmrt); 2810 if (mrt != NULL) 2811 mrt_done(mrt); 2812 break; 2813 case IMSG_CTL_KROUTE: 2814 case IMSG_CTL_KROUTE_ADDR: 2815 case IMSG_CTL_SHOW_NEXTHOP: 2816 case IMSG_CTL_SHOW_INTERFACE: 2817 case IMSG_CTL_SHOW_FIB_TABLES: 2818 if (idx != PFD_PIPE_MAIN) 2819 fatalx("ctl kroute request not from parent"); 2820 control_imsg_relay(&imsg); 2821 break; 2822 case IMSG_CTL_SHOW_RIB: 2823 case IMSG_CTL_SHOW_RIB_PREFIX: 2824 case IMSG_CTL_SHOW_RIB_ATTR: 2825 case IMSG_CTL_SHOW_RIB_MEM: 2826 case IMSG_CTL_SHOW_NETWORK: 2827 case IMSG_CTL_SHOW_NEIGHBOR: 2828 if (idx != PFD_PIPE_ROUTE_CTL) 2829 fatalx("ctl rib request not from RDE"); 2830 control_imsg_relay(&imsg); 2831 break; 2832 case IMSG_CTL_END: 2833 case IMSG_CTL_RESULT: 2834 control_imsg_relay(&imsg); 2835 break; 2836 case IMSG_UPDATE: 2837 if (idx != PFD_PIPE_ROUTE) 2838 fatalx("update request not from RDE"); 2839 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2840 MAX_PKTSIZE - MSGSIZE_HEADER || 2841 imsg.hdr.len < IMSG_HEADER_SIZE + 2842 MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 2843 log_warnx("RDE sent invalid update"); 2844 else 2845 session_update(imsg.hdr.peerid, imsg.data, 2846 imsg.hdr.len - IMSG_HEADER_SIZE); 2847 break; 2848 case IMSG_UPDATE_ERR: 2849 if (idx != PFD_PIPE_ROUTE) 2850 fatalx("update request not from RDE"); 2851 if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) { 2852 log_warnx("RDE sent invalid notification"); 2853 break; 2854 } 2855 if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { 2856 log_warnx("no such peer: id=%u", 2857 imsg.hdr.peerid); 2858 break; 2859 } 2860 data = imsg.data; 2861 errcode = *data++; 2862 subcode = *data++; 2863 2864 if (imsg.hdr.len == IMSG_HEADER_SIZE + 2) 2865 data = NULL; 2866 2867 session_notification(p, errcode, subcode, 2868 data, imsg.hdr.len - IMSG_HEADER_SIZE - 2); 2869 switch (errcode) { 2870 case ERR_CEASE: 2871 switch (subcode) { 2872 case ERR_CEASE_MAX_PREFIX: 2873 bgp_fsm(p, EVNT_STOP); 2874 if (p->conf.max_prefix_restart) 2875 timer_set(p, Timer_IdleHold, 60 * 2876 p->conf.max_prefix_restart); 2877 break; 2878 default: 2879 bgp_fsm(p, EVNT_CON_FATAL); 2880 break; 2881 } 2882 break; 2883 default: 2884 bgp_fsm(p, EVNT_CON_FATAL); 2885 break; 2886 } 2887 break; 2888 case IMSG_SESSION_RESTARTED: 2889 if (idx != PFD_PIPE_ROUTE) 2890 fatalx("update request not from RDE"); 2891 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { 2892 log_warnx("RDE sent invalid restart msg"); 2893 break; 2894 } 2895 if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { 2896 log_warnx("no such peer: id=%u", 2897 imsg.hdr.peerid); 2898 break; 2899 } 2900 memcpy(&aid, imsg.data, sizeof(aid)); 2901 if (aid >= AID_MAX) 2902 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 2903 if (p->capa.neg.grestart.flags[aid] & 2904 CAPA_GR_RESTARTING) { 2905 log_peer_warnx(&p->conf, 2906 "graceful restart of %s finished", 2907 aid2str(aid)); 2908 p->capa.neg.grestart.flags[aid] &= 2909 ~CAPA_GR_RESTARTING; 2910 timer_stop(p, Timer_RestartTimeout); 2911 2912 /* signal back to RDE to cleanup stale routes */ 2913 if (imsg_compose(ibuf_rde, 2914 IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0, 2915 -1, &aid, sizeof(aid)) == -1) 2916 fatal("imsg_compose: " 2917 "IMSG_SESSION_RESTARTED"); 2918 } 2919 break; 2920 case IMSG_SESSION_DOWN: 2921 if (idx != PFD_PIPE_ROUTE) 2922 fatalx("update request not from RDE"); 2923 if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { 2924 log_warnx("no such peer: id=%u", 2925 imsg.hdr.peerid); 2926 break; 2927 } 2928 session_stop(p, ERR_CEASE_ADMIN_DOWN); 2929 break; 2930 default: 2931 break; 2932 } 2933 imsg_free(&imsg); 2934 } 2935 } 2936 2937 int 2938 la_cmp(struct listen_addr *a, struct listen_addr *b) 2939 { 2940 struct sockaddr_in *in_a, *in_b; 2941 struct sockaddr_in6 *in6_a, *in6_b; 2942 2943 if (a->sa.ss_family != b->sa.ss_family) 2944 return (1); 2945 2946 switch (a->sa.ss_family) { 2947 case AF_INET: 2948 in_a = (struct sockaddr_in *)&a->sa; 2949 in_b = (struct sockaddr_in *)&b->sa; 2950 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 2951 return (1); 2952 if (in_a->sin_port != in_b->sin_port) 2953 return (1); 2954 break; 2955 case AF_INET6: 2956 in6_a = (struct sockaddr_in6 *)&a->sa; 2957 in6_b = (struct sockaddr_in6 *)&b->sa; 2958 if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 2959 sizeof(struct in6_addr))) 2960 return (1); 2961 if (in6_a->sin6_port != in6_b->sin6_port) 2962 return (1); 2963 break; 2964 default: 2965 fatal("king bula sez: unknown address family"); 2966 /* NOTREACHED */ 2967 } 2968 2969 return (0); 2970 } 2971 2972 struct peer * 2973 getpeerbyaddr(struct bgpd_addr *addr) 2974 { 2975 struct peer *p; 2976 2977 /* we might want a more effective way to find peers by IP */ 2978 for (p = peers; p != NULL && 2979 memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr)); 2980 p = p->next) 2981 ; /* nothing */ 2982 2983 return (p); 2984 } 2985 2986 struct peer * 2987 getpeerbydesc(const char *descr) 2988 { 2989 struct peer *p, *res = NULL; 2990 int match = 0; 2991 2992 for (p = peers; p != NULL; p = p->next) 2993 if (!strcmp(p->conf.descr, descr)) { 2994 res = p; 2995 match++; 2996 } 2997 2998 if (match > 1) 2999 log_info("neighbor description \"%s\" not unique, request " 3000 "aborted", descr); 3001 3002 if (match == 1) 3003 return (res); 3004 else 3005 return (NULL); 3006 } 3007 3008 struct peer * 3009 getpeerbyip(struct sockaddr *ip) 3010 { 3011 struct bgpd_addr addr; 3012 struct peer *p, *newpeer, *loose = NULL; 3013 u_int32_t id; 3014 3015 sa2addr(ip, &addr); 3016 3017 /* we might want a more effective way to find peers by IP */ 3018 for (p = peers; p != NULL; p = p->next) 3019 if (!p->conf.template && 3020 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3021 return (p); 3022 3023 /* try template matching */ 3024 for (p = peers; p != NULL; p = p->next) 3025 if (p->conf.template && 3026 p->conf.remote_addr.aid == addr.aid && 3027 session_match_mask(p, &addr)) 3028 if (loose == NULL || loose->conf.remote_masklen < 3029 p->conf.remote_masklen) 3030 loose = p; 3031 3032 if (loose != NULL) { 3033 /* clone */ 3034 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3035 fatal(NULL); 3036 memcpy(newpeer, loose, sizeof(struct peer)); 3037 for (id = UINT_MAX; id > UINT_MAX / 2; id--) { 3038 for (p = peers; p != NULL && p->conf.id != id; 3039 p = p->next) 3040 ; /* nothing */ 3041 if (p == NULL) { /* we found a free id */ 3042 break; 3043 } 3044 } 3045 newpeer->template = loose; 3046 session_template_clone(newpeer, ip, id, 0); 3047 newpeer->state = newpeer->prev_state = STATE_NONE; 3048 newpeer->conf.reconf_action = RECONF_KEEP; 3049 newpeer->rbuf = NULL; 3050 init_peer(newpeer); 3051 bgp_fsm(newpeer, EVNT_START); 3052 newpeer->next = peers; 3053 peers = newpeer; 3054 return (newpeer); 3055 } 3056 3057 return (NULL); 3058 } 3059 3060 void 3061 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id, 3062 u_int32_t as) 3063 { 3064 struct bgpd_addr remote_addr; 3065 3066 if (ip) 3067 sa2addr(ip, &remote_addr); 3068 else 3069 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3070 3071 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3072 3073 p->conf.id = id; 3074 3075 if (as) { 3076 p->conf.remote_as = as; 3077 p->conf.ebgp = (p->conf.remote_as != conf->as); 3078 if (!p->conf.ebgp) 3079 /* force enforce_as off for iBGP sessions */ 3080 p->conf.enforce_as = ENFORCE_AS_OFF; 3081 } 3082 3083 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3084 switch (p->conf.remote_addr.aid) { 3085 case AID_INET: 3086 p->conf.remote_masklen = 32; 3087 break; 3088 case AID_INET6: 3089 p->conf.remote_masklen = 128; 3090 break; 3091 } 3092 p->conf.template = 0; 3093 } 3094 3095 int 3096 session_match_mask(struct peer *p, struct bgpd_addr *a) 3097 { 3098 in_addr_t v4mask; 3099 struct in6_addr masked; 3100 3101 switch (p->conf.remote_addr.aid) { 3102 case AID_INET: 3103 v4mask = htonl(prefixlen2mask(p->conf.remote_masklen)); 3104 if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask)) 3105 return (1); 3106 return (0); 3107 case AID_INET6: 3108 inet6applymask(&masked, &a->v6, p->conf.remote_masklen); 3109 3110 if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked))) 3111 return (1); 3112 return (0); 3113 } 3114 return (0); 3115 } 3116 3117 struct peer * 3118 getpeerbyid(u_int32_t peerid) 3119 { 3120 struct peer *p; 3121 3122 /* we might want a more effective way to find peers by IP */ 3123 for (p = peers; p != NULL && 3124 p->conf.id != peerid; p = p->next) 3125 ; /* nothing */ 3126 3127 return (p); 3128 } 3129 3130 void 3131 session_down(struct peer *peer) 3132 { 3133 bzero(&peer->capa.neg, sizeof(peer->capa.neg)); 3134 peer->stats.last_updown = time(NULL); 3135 if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1, 3136 NULL, 0) == -1) 3137 fatalx("imsg_compose error"); 3138 } 3139 3140 void 3141 session_up(struct peer *p) 3142 { 3143 struct session_up sup; 3144 3145 if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, 3146 &p->conf, sizeof(p->conf)) == -1) 3147 fatalx("imsg_compose error"); 3148 3149 sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr); 3150 sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr); 3151 3152 sup.remote_bgpid = p->remote_bgpid; 3153 sup.short_as = p->short_as; 3154 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3155 p->stats.last_updown = time(NULL); 3156 if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1, 3157 &sup, sizeof(sup)) == -1) 3158 fatalx("imsg_compose error"); 3159 } 3160 3161 int 3162 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data, 3163 u_int16_t datalen) 3164 { 3165 return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen)); 3166 } 3167 3168 int 3169 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen) 3170 { 3171 /* 3172 * Use control socket to talk to RDE to bypass the queue of the 3173 * regular imsg socket. 3174 */ 3175 return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen)); 3176 } 3177 3178 void 3179 session_demote(struct peer *p, int level) 3180 { 3181 struct demote_msg msg; 3182 3183 strlcpy(msg.demote_group, p->conf.demote_group, 3184 sizeof(msg.demote_group)); 3185 msg.level = level; 3186 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3187 &msg, sizeof(msg)) == -1) 3188 fatalx("imsg_compose error"); 3189 3190 p->demoted += level; 3191 } 3192 3193 void 3194 session_stop(struct peer *peer, u_int8_t subcode) 3195 { 3196 switch (peer->state) { 3197 case STATE_OPENSENT: 3198 case STATE_OPENCONFIRM: 3199 case STATE_ESTABLISHED: 3200 session_notification(peer, ERR_CEASE, subcode, NULL, 0); 3201 break; 3202 default: 3203 /* session not open, no need to send notification */ 3204 break; 3205 } 3206 bgp_fsm(peer, EVNT_STOP); 3207 } 3208