1 /* $OpenBSD: session.c,v 1.334 2014/01/22 04:08:08 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 21 #include <sys/mman.h> 22 #include <sys/socket.h> 23 #include <sys/time.h> 24 #include <sys/resource.h> 25 #include <sys/un.h> 26 #include <net/if_types.h> 27 #include <netinet/in.h> 28 #include <netinet/in_systm.h> 29 #include <netinet/ip.h> 30 #include <netinet/tcp.h> 31 #include <arpa/inet.h> 32 #include <limits.h> 33 34 #include <err.h> 35 #include <errno.h> 36 #include <fcntl.h> 37 #include <poll.h> 38 #include <pwd.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 45 #include "bgpd.h" 46 #include "mrt.h" 47 #include "session.h" 48 49 #define PFD_PIPE_MAIN 0 50 #define PFD_PIPE_ROUTE 1 51 #define PFD_PIPE_ROUTE_CTL 2 52 #define PFD_SOCK_CTL 3 53 #define PFD_SOCK_RCTL 4 54 #define PFD_SOCK_PFKEY 5 55 #define PFD_LISTENERS_START 6 56 57 void session_sighdlr(int); 58 int setup_listeners(u_int *); 59 void init_conf(struct bgpd_config *); 60 void init_peer(struct peer *); 61 void start_timer_holdtime(struct peer *); 62 void start_timer_keepalive(struct peer *); 63 void session_close_connection(struct peer *); 64 void change_state(struct peer *, enum session_state, enum session_events); 65 int session_setup_socket(struct peer *); 66 void session_accept(int); 67 int session_connect(struct peer *); 68 void session_tcp_established(struct peer *); 69 void session_capa_ann_none(struct peer *); 70 int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); 71 int session_capa_add_mp(struct ibuf *, u_int8_t); 72 int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); 73 struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); 74 int session_sendmsg(struct bgp_msg *, struct peer *); 75 void session_open(struct peer *); 76 void session_keepalive(struct peer *); 77 void session_update(u_int32_t, void *, size_t); 78 void session_notification(struct peer *, u_int8_t, u_int8_t, void *, 79 ssize_t); 80 void session_rrefresh(struct peer *, u_int8_t); 81 int session_graceful_restart(struct peer *); 82 int session_graceful_stop(struct peer *); 83 int session_dispatch_msg(struct pollfd *, struct peer *); 84 int session_process_msg(struct peer *); 85 int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); 86 int parse_open(struct peer *); 87 int parse_update(struct peer *); 88 int parse_refresh(struct peer *); 89 int parse_notification(struct peer *); 90 int parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *); 91 int capa_neg_calc(struct peer *); 92 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 93 void session_up(struct peer *); 94 void session_down(struct peer *); 95 void session_demote(struct peer *, int); 96 97 int la_cmp(struct listen_addr *, struct listen_addr *); 98 struct peer *getpeerbyip(struct sockaddr *); 99 void session_template_clone(struct peer *, struct sockaddr *, 100 u_int32_t, u_int32_t); 101 int session_match_mask(struct peer *, struct bgpd_addr *); 102 struct peer *getpeerbyid(u_int32_t); 103 104 struct bgpd_config *conf, *nconf; 105 struct bgpd_sysdep sysdep; 106 struct peer *peers, *npeers; 107 volatile sig_atomic_t session_quit; 108 int pending_reconf; 109 int csock = -1, rcsock = -1; 110 u_int peer_cnt; 111 struct imsgbuf *ibuf_rde; 112 struct imsgbuf *ibuf_rde_ctl; 113 struct imsgbuf *ibuf_main; 114 115 struct mrt_head mrthead; 116 time_t pauseaccept; 117 118 void 119 session_sighdlr(int sig) 120 { 121 switch (sig) { 122 case SIGINT: 123 case SIGTERM: 124 session_quit = 1; 125 break; 126 } 127 } 128 129 int 130 setup_listeners(u_int *la_cnt) 131 { 132 int ttl = 255; 133 int opt; 134 struct listen_addr *la; 135 u_int cnt = 0; 136 137 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 138 la->reconf = RECONF_NONE; 139 cnt++; 140 141 if (la->flags & LISTENER_LISTENING) 142 continue; 143 144 if (la->fd == -1) { 145 log_warn("cannot establish listener on %s: invalid fd", 146 log_sockaddr((struct sockaddr *)&la->sa)); 147 continue; 148 } 149 150 opt = 1; 151 if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG, 152 &opt, sizeof(opt)) == -1) { 153 if (errno == ENOPROTOOPT) { /* system w/o md5sig */ 154 log_warnx("md5sig not available, disabling"); 155 sysdep.no_md5sig = 1; 156 } else 157 fatal("setsockopt TCP_MD5SIG"); 158 } 159 160 /* set ttl to 255 so that ttl-security works */ 161 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 162 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 163 log_warn("setup_listeners setsockopt TTL"); 164 continue; 165 } 166 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 167 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 168 log_warn("setup_listeners setsockopt hoplimit"); 169 continue; 170 } 171 172 session_socket_blockmode(la->fd, BM_NONBLOCK); 173 174 if (listen(la->fd, MAX_BACKLOG)) { 175 close(la->fd); 176 fatal("listen"); 177 } 178 179 la->flags |= LISTENER_LISTENING; 180 181 log_info("listening on %s", 182 log_sockaddr((struct sockaddr *)&la->sa)); 183 } 184 185 *la_cnt = cnt; 186 187 return (0); 188 } 189 190 pid_t 191 session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2], 192 int pipe_s2rctl[2]) 193 { 194 int nfds, timeout, pfkeysock; 195 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 196 pid_t pid; 197 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 198 u_int listener_cnt, ctl_cnt, mrt_cnt; 199 u_int new_cnt; 200 u_int32_t ctl_queued; 201 struct passwd *pw; 202 struct peer *p, **peer_l = NULL, *last, *next; 203 struct mrt *m, *xm, **mrt_l = NULL; 204 struct pollfd *pfd = NULL; 205 struct ctl_conn *ctl_conn; 206 struct listen_addr *la; 207 void *newp; 208 short events; 209 210 switch (pid = fork()) { 211 case -1: 212 fatal("cannot fork"); 213 case 0: 214 break; 215 default: 216 return (pid); 217 } 218 219 if ((pw = getpwnam(BGPD_USER)) == NULL) 220 fatal(NULL); 221 222 if (chroot(pw->pw_dir) == -1) 223 fatal("chroot"); 224 if (chdir("/") == -1) 225 fatal("chdir(\"/\")"); 226 227 setproctitle("session engine"); 228 bgpd_process = PROC_SE; 229 pfkeysock = pfkey_init(&sysdep); 230 231 if (setgroups(1, &pw->pw_gid) || 232 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 233 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 234 fatal("can't drop privileges"); 235 236 signal(SIGTERM, session_sighdlr); 237 signal(SIGINT, session_sighdlr); 238 signal(SIGPIPE, SIG_IGN); 239 signal(SIGHUP, SIG_IGN); 240 signal(SIGALRM, SIG_IGN); 241 signal(SIGUSR1, SIG_IGN); 242 243 close(pipe_m2s[0]); 244 close(pipe_s2r[1]); 245 close(pipe_s2rctl[1]); 246 close(pipe_m2r[0]); 247 close(pipe_m2r[1]); 248 if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL || 249 (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL || 250 (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 251 fatal(NULL); 252 imsg_init(ibuf_rde, pipe_s2r[0]); 253 imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]); 254 imsg_init(ibuf_main, pipe_m2s[1]); 255 256 TAILQ_INIT(&ctl_conns); 257 LIST_INIT(&mrthead); 258 listener_cnt = 0; 259 peer_cnt = 0; 260 ctl_cnt = 0; 261 262 if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL) 263 fatal(NULL); 264 if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) == 265 NULL) 266 fatal(NULL); 267 TAILQ_INIT(conf->listen_addrs); 268 269 log_info("session engine ready"); 270 271 while (session_quit == 0) { 272 /* check for peers to be initialized or deleted */ 273 last = NULL; 274 if (!pending_reconf) { 275 for (p = peers; p != NULL; p = next) { 276 next = p->next; 277 /* cloned peer that idled out? */ 278 if (p->template && (p->state == STATE_IDLE || 279 p->state == STATE_ACTIVE) && 280 time(NULL) - p->stats.last_updown >= 281 INTERVAL_HOLD_CLONED) 282 p->conf.reconf_action = RECONF_DELETE; 283 284 /* new peer that needs init? */ 285 if (p->state == STATE_NONE) 286 init_peer(p); 287 288 /* reinit due? */ 289 if (p->conf.reconf_action == RECONF_REINIT) { 290 session_stop(p, ERR_CEASE_ADMIN_RESET); 291 if (!p->conf.down) 292 timer_set(p, Timer_IdleHold, 0); 293 } 294 295 /* deletion due? */ 296 if (p->conf.reconf_action == RECONF_DELETE) { 297 if (p->demoted) 298 session_demote(p, -1); 299 p->conf.demote_group[0] = 0; 300 session_stop(p, ERR_CEASE_PEER_UNCONF); 301 log_peer_warnx(&p->conf, "removed"); 302 if (last != NULL) 303 last->next = next; 304 else 305 peers = next; 306 timer_remove_all(p); 307 free(p); 308 peer_cnt--; 309 continue; 310 } 311 p->conf.reconf_action = RECONF_NONE; 312 last = p; 313 } 314 } 315 316 if (peer_cnt > peer_l_elms) { 317 if ((newp = realloc(peer_l, sizeof(struct peer *) * 318 peer_cnt)) == NULL) { 319 /* panic for now */ 320 log_warn("could not resize peer_l from %u -> %u" 321 " entries", peer_l_elms, peer_cnt); 322 fatalx("exiting"); 323 } 324 peer_l = newp; 325 peer_l_elms = peer_cnt; 326 } 327 328 mrt_cnt = 0; 329 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 330 xm = LIST_NEXT(m, entry); 331 if (m->state == MRT_STATE_REMOVE) { 332 mrt_clean(m); 333 LIST_REMOVE(m, entry); 334 free(m); 335 continue; 336 } 337 if (m->wbuf.queued) 338 mrt_cnt++; 339 } 340 341 if (mrt_cnt > mrt_l_elms) { 342 if ((newp = realloc(mrt_l, sizeof(struct mrt *) * 343 mrt_cnt)) == NULL) { 344 /* panic for now */ 345 log_warn("could not resize mrt_l from %u -> %u" 346 " entries", mrt_l_elms, mrt_cnt); 347 fatalx("exiting"); 348 } 349 mrt_l = newp; 350 mrt_l_elms = mrt_cnt; 351 } 352 353 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 354 ctl_cnt + mrt_cnt; 355 if (new_cnt > pfd_elms) { 356 if ((newp = realloc(pfd, sizeof(struct pollfd) * 357 new_cnt)) == NULL) { 358 /* panic for now */ 359 log_warn("could not resize pfd from %u -> %u" 360 " entries", pfd_elms, new_cnt); 361 fatalx("exiting"); 362 } 363 pfd = newp; 364 pfd_elms = new_cnt; 365 } 366 367 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 368 pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd; 369 pfd[PFD_PIPE_MAIN].events = POLLIN; 370 if (ibuf_main->w.queued > 0) 371 pfd[PFD_PIPE_MAIN].events |= POLLOUT; 372 pfd[PFD_PIPE_ROUTE].fd = ibuf_rde->fd; 373 pfd[PFD_PIPE_ROUTE].events = POLLIN; 374 if (ibuf_rde->w.queued > 0) 375 pfd[PFD_PIPE_ROUTE].events |= POLLOUT; 376 377 ctl_queued = 0; 378 TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) 379 ctl_queued += ctl_conn->ibuf.w.queued; 380 381 pfd[PFD_PIPE_ROUTE_CTL].fd = ibuf_rde_ctl->fd; 382 if (ctl_queued < SESSION_CTL_QUEUE_MAX) 383 /* 384 * Do not act as unlimited buffer. Don't read in more 385 * messages if the ctl sockets are getting full. 386 */ 387 pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN; 388 if (pauseaccept == 0) { 389 pfd[PFD_SOCK_CTL].fd = csock; 390 pfd[PFD_SOCK_CTL].events = POLLIN; 391 pfd[PFD_SOCK_RCTL].fd = rcsock; 392 pfd[PFD_SOCK_RCTL].events = POLLIN; 393 } else { 394 pfd[PFD_SOCK_CTL].fd = -1; 395 pfd[PFD_SOCK_RCTL].fd = -1; 396 } 397 pfd[PFD_SOCK_PFKEY].fd = pfkeysock; 398 pfd[PFD_SOCK_PFKEY].events = POLLIN; 399 400 i = PFD_LISTENERS_START; 401 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 402 if (pauseaccept == 0) { 403 pfd[i].fd = la->fd; 404 pfd[i].events = POLLIN; 405 } else 406 pfd[i].fd = -1; 407 i++; 408 } 409 idx_listeners = i; 410 timeout = 240; /* loop every 240s at least */ 411 412 for (p = peers; p != NULL; p = p->next) { 413 time_t nextaction; 414 struct peer_timer *pt; 415 416 /* check timers */ 417 if ((pt = timer_nextisdue(p)) != NULL) { 418 switch (pt->type) { 419 case Timer_Hold: 420 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 421 break; 422 case Timer_ConnectRetry: 423 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 424 break; 425 case Timer_Keepalive: 426 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 427 break; 428 case Timer_IdleHold: 429 bgp_fsm(p, EVNT_START); 430 break; 431 case Timer_IdleHoldReset: 432 p->IdleHoldTime /= 2; 433 if (p->IdleHoldTime <= 434 INTERVAL_IDLE_HOLD_INITIAL) { 435 p->IdleHoldTime = 436 INTERVAL_IDLE_HOLD_INITIAL; 437 timer_stop(p, 438 Timer_IdleHoldReset); 439 p->errcnt = 0; 440 } else 441 timer_set(p, 442 Timer_IdleHoldReset, 443 p->IdleHoldTime); 444 break; 445 case Timer_CarpUndemote: 446 timer_stop(p, Timer_CarpUndemote); 447 if (p->demoted && 448 p->state == STATE_ESTABLISHED) 449 session_demote(p, -1); 450 break; 451 case Timer_RestartTimeout: 452 timer_stop(p, Timer_RestartTimeout); 453 session_graceful_stop(p); 454 break; 455 default: 456 fatalx("King Bula lost in time"); 457 } 458 } 459 if ((nextaction = timer_nextduein(p)) != -1 && 460 nextaction < timeout) 461 timeout = nextaction; 462 463 /* are we waiting for a write? */ 464 events = POLLIN; 465 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 466 events |= POLLOUT; 467 /* is there still work to do? */ 468 if (p->rbuf && p->rbuf->wpos) 469 timeout = 0; 470 471 /* poll events */ 472 if (p->fd != -1 && events != 0) { 473 pfd[i].fd = p->fd; 474 pfd[i].events = events; 475 peer_l[i - idx_listeners] = p; 476 i++; 477 } 478 } 479 480 idx_peers = i; 481 482 LIST_FOREACH(m, &mrthead, entry) 483 if (m->wbuf.queued) { 484 pfd[i].fd = m->wbuf.fd; 485 pfd[i].events = POLLOUT; 486 mrt_l[i - idx_peers] = m; 487 i++; 488 } 489 490 idx_mrts = i; 491 492 TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) { 493 pfd[i].fd = ctl_conn->ibuf.fd; 494 pfd[i].events = POLLIN; 495 if (ctl_conn->ibuf.w.queued > 0) 496 pfd[i].events |= POLLOUT; 497 i++; 498 } 499 500 if (pauseaccept && timeout > 1) 501 timeout = 1; 502 if (timeout < 0) 503 timeout = 0; 504 if ((nfds = poll(pfd, i, timeout * 1000)) == -1) 505 if (errno != EINTR) 506 fatal("poll error"); 507 508 /* 509 * If we previously saw fd exhaustion, we stop accept() 510 * for 1 second to throttle the accept() loop. 511 */ 512 if (pauseaccept && getmonotime() > pauseaccept + 1) 513 pauseaccept = 0; 514 515 if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT) 516 if (msgbuf_write(&ibuf_main->w) <= 0 && errno != EAGAIN) 517 fatal("pipe write error"); 518 519 if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLIN) { 520 nfds--; 521 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 522 &listener_cnt); 523 } 524 525 if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLOUT) 526 if (msgbuf_write(&ibuf_rde->w) <= 0 && errno != EAGAIN) 527 fatal("pipe write error"); 528 529 if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLIN) { 530 nfds--; 531 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 532 &listener_cnt); 533 } 534 535 if (nfds > 0 && pfd[PFD_PIPE_ROUTE_CTL].revents & POLLIN) { 536 nfds--; 537 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 538 &listener_cnt); 539 } 540 541 if (nfds > 0 && pfd[PFD_SOCK_CTL].revents & POLLIN) { 542 nfds--; 543 ctl_cnt += control_accept(csock, 0); 544 } 545 546 if (nfds > 0 && pfd[PFD_SOCK_RCTL].revents & POLLIN) { 547 nfds--; 548 ctl_cnt += control_accept(rcsock, 1); 549 } 550 551 if (nfds > 0 && pfd[PFD_SOCK_PFKEY].revents & POLLIN) { 552 nfds--; 553 if (pfkey_read(pfkeysock, NULL) == -1) { 554 log_warnx("pfkey_read failed, exiting..."); 555 session_quit = 1; 556 } 557 } 558 559 for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners; 560 j++) 561 if (pfd[j].revents & POLLIN) { 562 nfds--; 563 session_accept(pfd[j].fd); 564 } 565 566 for (; nfds > 0 && j < idx_peers; j++) 567 nfds -= session_dispatch_msg(&pfd[j], 568 peer_l[j - idx_listeners]); 569 570 for (p = peers; p != NULL; p = p->next) 571 if (p->rbuf && p->rbuf->wpos) 572 session_process_msg(p); 573 574 for (; nfds > 0 && j < idx_mrts; j++) 575 if (pfd[j].revents & POLLOUT) { 576 nfds--; 577 mrt_write(mrt_l[j - idx_peers]); 578 } 579 580 for (; nfds > 0 && j < i; j++) 581 nfds -= control_dispatch_msg(&pfd[j], &ctl_cnt); 582 } 583 584 while ((p = peers) != NULL) { 585 peers = p->next; 586 session_stop(p, ERR_CEASE_ADMIN_DOWN); 587 pfkey_remove(p); 588 free(p); 589 } 590 591 while ((m = LIST_FIRST(&mrthead)) != NULL) { 592 mrt_clean(m); 593 LIST_REMOVE(m, entry); 594 free(m); 595 } 596 597 while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) { 598 TAILQ_REMOVE(conf->listen_addrs, la, entry); 599 free(la); 600 } 601 free(conf->listen_addrs); 602 free(peer_l); 603 free(mrt_l); 604 free(pfd); 605 606 msgbuf_write(&ibuf_rde->w); 607 msgbuf_clear(&ibuf_rde->w); 608 free(ibuf_rde); 609 msgbuf_write(&ibuf_main->w); 610 msgbuf_clear(&ibuf_main->w); 611 free(ibuf_main); 612 613 control_shutdown(csock); 614 control_shutdown(rcsock); 615 log_info("session engine exiting"); 616 _exit(0); 617 } 618 619 void 620 init_conf(struct bgpd_config *c) 621 { 622 if (!c->holdtime) 623 c->holdtime = INTERVAL_HOLD; 624 if (!c->connectretry) 625 c->connectretry = INTERVAL_CONNECTRETRY; 626 } 627 628 void 629 init_peer(struct peer *p) 630 { 631 TAILQ_INIT(&p->timers); 632 p->fd = p->wbuf.fd = -1; 633 634 if (p->conf.if_depend[0]) 635 imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1, 636 p->conf.if_depend, sizeof(p->conf.if_depend)); 637 else 638 p->depend_ok = 1; 639 640 peer_cnt++; 641 642 change_state(p, STATE_IDLE, EVNT_NONE); 643 if (p->conf.down) 644 timer_stop(p, Timer_IdleHold); /* no autostart */ 645 else 646 timer_set(p, Timer_IdleHold, 0); /* start ASAP */ 647 648 /* 649 * on startup, demote if requested. 650 * do not handle new peers. they must reach ESTABLISHED beforehands. 651 * peers added at runtime have reconf_action set to RECONF_REINIT. 652 */ 653 if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 654 session_demote(p, +1); 655 } 656 657 void 658 bgp_fsm(struct peer *peer, enum session_events event) 659 { 660 switch (peer->state) { 661 case STATE_NONE: 662 /* nothing */ 663 break; 664 case STATE_IDLE: 665 switch (event) { 666 case EVNT_START: 667 timer_stop(peer, Timer_Hold); 668 timer_stop(peer, Timer_Keepalive); 669 timer_stop(peer, Timer_IdleHold); 670 671 /* allocate read buffer */ 672 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 673 if (peer->rbuf == NULL) 674 fatal(NULL); 675 676 /* init write buffer */ 677 msgbuf_init(&peer->wbuf); 678 679 /* init pfkey - remove old if any, load new ones */ 680 pfkey_remove(peer); 681 if (pfkey_establish(peer) == -1) { 682 log_peer_warnx(&peer->conf, 683 "pfkey setup failed"); 684 return; 685 } 686 687 peer->stats.last_sent_errcode = 0; 688 peer->stats.last_sent_suberr = 0; 689 690 if (!peer->depend_ok) 691 timer_stop(peer, Timer_ConnectRetry); 692 else if (peer->passive || peer->conf.passive || 693 peer->conf.template) { 694 change_state(peer, STATE_ACTIVE, event); 695 timer_stop(peer, Timer_ConnectRetry); 696 } else { 697 change_state(peer, STATE_CONNECT, event); 698 timer_set(peer, Timer_ConnectRetry, 699 conf->connectretry); 700 session_connect(peer); 701 } 702 peer->passive = 0; 703 break; 704 default: 705 /* ignore */ 706 break; 707 } 708 break; 709 case STATE_CONNECT: 710 switch (event) { 711 case EVNT_START: 712 /* ignore */ 713 break; 714 case EVNT_CON_OPEN: 715 session_tcp_established(peer); 716 session_open(peer); 717 timer_stop(peer, Timer_ConnectRetry); 718 peer->holdtime = INTERVAL_HOLD_INITIAL; 719 start_timer_holdtime(peer); 720 change_state(peer, STATE_OPENSENT, event); 721 break; 722 case EVNT_CON_OPENFAIL: 723 timer_set(peer, Timer_ConnectRetry, 724 conf->connectretry); 725 session_close_connection(peer); 726 change_state(peer, STATE_ACTIVE, event); 727 break; 728 case EVNT_TIMER_CONNRETRY: 729 timer_set(peer, Timer_ConnectRetry, 730 conf->connectretry); 731 session_connect(peer); 732 break; 733 default: 734 change_state(peer, STATE_IDLE, event); 735 break; 736 } 737 break; 738 case STATE_ACTIVE: 739 switch (event) { 740 case EVNT_START: 741 /* ignore */ 742 break; 743 case EVNT_CON_OPEN: 744 session_tcp_established(peer); 745 session_open(peer); 746 timer_stop(peer, Timer_ConnectRetry); 747 peer->holdtime = INTERVAL_HOLD_INITIAL; 748 start_timer_holdtime(peer); 749 change_state(peer, STATE_OPENSENT, event); 750 break; 751 case EVNT_CON_OPENFAIL: 752 timer_set(peer, Timer_ConnectRetry, 753 conf->connectretry); 754 session_close_connection(peer); 755 change_state(peer, STATE_ACTIVE, event); 756 break; 757 case EVNT_TIMER_CONNRETRY: 758 timer_set(peer, Timer_ConnectRetry, 759 peer->holdtime); 760 change_state(peer, STATE_CONNECT, event); 761 session_connect(peer); 762 break; 763 default: 764 change_state(peer, STATE_IDLE, event); 765 break; 766 } 767 break; 768 case STATE_OPENSENT: 769 switch (event) { 770 case EVNT_START: 771 /* ignore */ 772 break; 773 case EVNT_STOP: 774 change_state(peer, STATE_IDLE, event); 775 break; 776 case EVNT_CON_CLOSED: 777 session_close_connection(peer); 778 timer_set(peer, Timer_ConnectRetry, 779 conf->connectretry); 780 change_state(peer, STATE_ACTIVE, event); 781 break; 782 case EVNT_CON_FATAL: 783 change_state(peer, STATE_IDLE, event); 784 break; 785 case EVNT_TIMER_HOLDTIME: 786 session_notification(peer, ERR_HOLDTIMEREXPIRED, 787 0, NULL, 0); 788 change_state(peer, STATE_IDLE, event); 789 break; 790 case EVNT_RCVD_OPEN: 791 /* parse_open calls change_state itself on failure */ 792 if (parse_open(peer)) 793 break; 794 session_keepalive(peer); 795 change_state(peer, STATE_OPENCONFIRM, event); 796 break; 797 case EVNT_RCVD_NOTIFICATION: 798 if (parse_notification(peer)) { 799 change_state(peer, STATE_IDLE, event); 800 /* don't punish, capa negotiation */ 801 timer_set(peer, Timer_IdleHold, 0); 802 peer->IdleHoldTime /= 2; 803 } else 804 change_state(peer, STATE_IDLE, event); 805 break; 806 default: 807 session_notification(peer, 808 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0); 809 change_state(peer, STATE_IDLE, event); 810 break; 811 } 812 break; 813 case STATE_OPENCONFIRM: 814 switch (event) { 815 case EVNT_START: 816 /* ignore */ 817 break; 818 case EVNT_STOP: 819 change_state(peer, STATE_IDLE, event); 820 break; 821 case EVNT_CON_CLOSED: 822 case EVNT_CON_FATAL: 823 change_state(peer, STATE_IDLE, event); 824 break; 825 case EVNT_TIMER_HOLDTIME: 826 session_notification(peer, ERR_HOLDTIMEREXPIRED, 827 0, NULL, 0); 828 change_state(peer, STATE_IDLE, event); 829 break; 830 case EVNT_TIMER_KEEPALIVE: 831 session_keepalive(peer); 832 break; 833 case EVNT_RCVD_KEEPALIVE: 834 start_timer_holdtime(peer); 835 change_state(peer, STATE_ESTABLISHED, event); 836 break; 837 case EVNT_RCVD_NOTIFICATION: 838 parse_notification(peer); 839 change_state(peer, STATE_IDLE, event); 840 break; 841 default: 842 session_notification(peer, 843 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0); 844 change_state(peer, STATE_IDLE, event); 845 break; 846 } 847 break; 848 case STATE_ESTABLISHED: 849 switch (event) { 850 case EVNT_START: 851 /* ignore */ 852 break; 853 case EVNT_STOP: 854 change_state(peer, STATE_IDLE, event); 855 break; 856 case EVNT_CON_CLOSED: 857 case EVNT_CON_FATAL: 858 change_state(peer, STATE_IDLE, event); 859 break; 860 case EVNT_TIMER_HOLDTIME: 861 session_notification(peer, ERR_HOLDTIMEREXPIRED, 862 0, NULL, 0); 863 change_state(peer, STATE_IDLE, event); 864 break; 865 case EVNT_TIMER_KEEPALIVE: 866 session_keepalive(peer); 867 break; 868 case EVNT_RCVD_KEEPALIVE: 869 start_timer_holdtime(peer); 870 break; 871 case EVNT_RCVD_UPDATE: 872 start_timer_holdtime(peer); 873 if (parse_update(peer)) 874 change_state(peer, STATE_IDLE, event); 875 else 876 start_timer_holdtime(peer); 877 break; 878 case EVNT_RCVD_NOTIFICATION: 879 parse_notification(peer); 880 change_state(peer, STATE_IDLE, event); 881 break; 882 default: 883 session_notification(peer, 884 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0); 885 change_state(peer, STATE_IDLE, event); 886 break; 887 } 888 break; 889 } 890 } 891 892 void 893 start_timer_holdtime(struct peer *peer) 894 { 895 if (peer->holdtime > 0) 896 timer_set(peer, Timer_Hold, peer->holdtime); 897 else 898 timer_stop(peer, Timer_Hold); 899 } 900 901 void 902 start_timer_keepalive(struct peer *peer) 903 { 904 if (peer->holdtime > 0) 905 timer_set(peer, Timer_Keepalive, peer->holdtime / 3); 906 else 907 timer_stop(peer, Timer_Keepalive); 908 } 909 910 void 911 session_close_connection(struct peer *peer) 912 { 913 if (peer->fd != -1) { 914 close(peer->fd); 915 pauseaccept = 0; 916 } 917 peer->fd = peer->wbuf.fd = -1; 918 } 919 920 void 921 change_state(struct peer *peer, enum session_state state, 922 enum session_events event) 923 { 924 struct mrt *mrt; 925 926 switch (state) { 927 case STATE_IDLE: 928 /* carp demotion first. new peers handled in init_peer */ 929 if (peer->state == STATE_ESTABLISHED && 930 peer->conf.demote_group[0] && !peer->demoted) 931 session_demote(peer, +1); 932 933 /* 934 * try to write out what's buffered (maybe a notification), 935 * don't bother if it fails 936 */ 937 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 938 msgbuf_write(&peer->wbuf); 939 940 /* 941 * we must start the timer for the next EVNT_START 942 * if we are coming here due to an error and the 943 * session was not established successfully before, the 944 * starttimerinterval needs to be exponentially increased 945 */ 946 if (peer->IdleHoldTime == 0) 947 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 948 peer->holdtime = INTERVAL_HOLD_INITIAL; 949 timer_stop(peer, Timer_ConnectRetry); 950 timer_stop(peer, Timer_Keepalive); 951 timer_stop(peer, Timer_Hold); 952 timer_stop(peer, Timer_IdleHold); 953 timer_stop(peer, Timer_IdleHoldReset); 954 session_close_connection(peer); 955 msgbuf_clear(&peer->wbuf); 956 free(peer->rbuf); 957 peer->rbuf = NULL; 958 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 959 960 if (event != EVNT_STOP) { 961 timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); 962 if (event != EVNT_NONE && 963 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 964 peer->IdleHoldTime *= 2; 965 } 966 if (peer->state == STATE_ESTABLISHED) { 967 if (peer->capa.neg.grestart.restart == 2 && 968 (event == EVNT_CON_CLOSED || 969 event == EVNT_CON_FATAL)) { 970 /* don't punish graceful restart */ 971 timer_set(peer, Timer_IdleHold, 0); 972 peer->IdleHoldTime /= 2; 973 session_graceful_restart(peer); 974 } else 975 session_down(peer); 976 } 977 if (peer->state == STATE_NONE || 978 peer->state == STATE_ESTABLISHED) { 979 /* initialize capability negotiation structures */ 980 memcpy(&peer->capa.ann, &peer->conf.capabilities, 981 sizeof(peer->capa.ann)); 982 if (!peer->conf.announce_capa) 983 session_capa_ann_none(peer); 984 } 985 break; 986 case STATE_CONNECT: 987 if (peer->state == STATE_ESTABLISHED && 988 peer->capa.neg.grestart.restart == 2) { 989 /* do the graceful restart dance */ 990 session_graceful_restart(peer); 991 peer->holdtime = INTERVAL_HOLD_INITIAL; 992 timer_stop(peer, Timer_ConnectRetry); 993 timer_stop(peer, Timer_Keepalive); 994 timer_stop(peer, Timer_Hold); 995 timer_stop(peer, Timer_IdleHold); 996 timer_stop(peer, Timer_IdleHoldReset); 997 session_close_connection(peer); 998 msgbuf_clear(&peer->wbuf); 999 bzero(&peer->capa.peer, sizeof(peer->capa.peer)); 1000 } 1001 break; 1002 case STATE_ACTIVE: 1003 break; 1004 case STATE_OPENSENT: 1005 break; 1006 case STATE_OPENCONFIRM: 1007 break; 1008 case STATE_ESTABLISHED: 1009 timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime); 1010 if (peer->demoted) 1011 timer_set(peer, Timer_CarpUndemote, 1012 INTERVAL_HOLD_DEMOTED); 1013 session_up(peer); 1014 break; 1015 default: /* something seriously fucked */ 1016 break; 1017 } 1018 1019 log_statechange(peer, state, event); 1020 LIST_FOREACH(mrt, &mrthead, entry) { 1021 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 1022 continue; 1023 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1024 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 1025 mrt->group_id == peer->conf.groupid)) 1026 mrt_dump_state(mrt, peer->state, state, peer); 1027 } 1028 peer->prev_state = peer->state; 1029 peer->state = state; 1030 } 1031 1032 void 1033 session_accept(int listenfd) 1034 { 1035 int connfd; 1036 int opt; 1037 socklen_t len; 1038 struct sockaddr_storage cliaddr; 1039 struct peer *p = NULL; 1040 1041 len = sizeof(cliaddr); 1042 if ((connfd = accept(listenfd, 1043 (struct sockaddr *)&cliaddr, &len)) == -1) { 1044 if (errno == ENFILE || errno == EMFILE) 1045 pauseaccept = getmonotime(); 1046 else if (errno != EWOULDBLOCK && errno != EINTR && 1047 errno != ECONNABORTED) 1048 log_warn("accept"); 1049 return; 1050 } 1051 1052 p = getpeerbyip((struct sockaddr *)&cliaddr); 1053 1054 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 1055 if (timer_running(p, Timer_IdleHold, NULL)) { 1056 /* fast reconnect after clear */ 1057 p->passive = 1; 1058 bgp_fsm(p, EVNT_START); 1059 } 1060 } 1061 1062 if (p != NULL && 1063 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1064 if (p->fd != -1) { 1065 if (p->state == STATE_CONNECT) 1066 session_close_connection(p); 1067 else { 1068 close(connfd); 1069 return; 1070 } 1071 } 1072 1073 open: 1074 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1075 log_peer_warnx(&p->conf, 1076 "ipsec or md5sig configured but not available"); 1077 close(connfd); 1078 return; 1079 } 1080 1081 if (p->conf.auth.method == AUTH_MD5SIG) { 1082 if (sysdep.no_md5sig) { 1083 log_peer_warnx(&p->conf, 1084 "md5sig configured but not available"); 1085 close(connfd); 1086 return; 1087 } 1088 len = sizeof(opt); 1089 if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG, 1090 &opt, &len) == -1) 1091 fatal("getsockopt TCP_MD5SIG"); 1092 if (!opt) { /* non-md5'd connection! */ 1093 log_peer_warnx(&p->conf, 1094 "connection attempt without md5 signature"); 1095 close(connfd); 1096 return; 1097 } 1098 } 1099 p->fd = p->wbuf.fd = connfd; 1100 if (session_setup_socket(p)) { 1101 close(connfd); 1102 return; 1103 } 1104 session_socket_blockmode(connfd, BM_NONBLOCK); 1105 bgp_fsm(p, EVNT_CON_OPEN); 1106 return; 1107 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1108 p->capa.neg.grestart.restart == 2) { 1109 /* first do the graceful restart dance */ 1110 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1111 /* then do part of the open dance */ 1112 goto open; 1113 } else { 1114 log_conn_attempt(p, (struct sockaddr *)&cliaddr); 1115 close(connfd); 1116 } 1117 } 1118 1119 int 1120 session_connect(struct peer *peer) 1121 { 1122 int opt = 1; 1123 struct sockaddr *sa; 1124 1125 /* 1126 * we do not need the overcomplicated collision detection RFC 1771 1127 * describes; we simply make sure there is only ever one concurrent 1128 * tcp connection per peer. 1129 */ 1130 if (peer->fd != -1) 1131 return (-1); 1132 1133 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), SOCK_STREAM, 1134 IPPROTO_TCP)) == -1) { 1135 log_peer_warn(&peer->conf, "session_connect socket"); 1136 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1137 return (-1); 1138 } 1139 1140 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1141 log_peer_warnx(&peer->conf, 1142 "ipsec or md5sig configured but not available"); 1143 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1144 return (-1); 1145 } 1146 1147 if (peer->conf.auth.method == AUTH_MD5SIG) { 1148 if (sysdep.no_md5sig) { 1149 log_peer_warnx(&peer->conf, 1150 "md5sig configured but not available"); 1151 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1152 return (-1); 1153 } 1154 if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG, 1155 &opt, sizeof(opt)) == -1) { 1156 log_peer_warn(&peer->conf, "setsockopt md5sig"); 1157 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1158 return (-1); 1159 } 1160 } 1161 peer->wbuf.fd = peer->fd; 1162 1163 /* if update source is set we need to bind() */ 1164 if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) { 1165 if (bind(peer->fd, sa, sa->sa_len) == -1) { 1166 log_peer_warn(&peer->conf, "session_connect bind"); 1167 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1168 return (-1); 1169 } 1170 } 1171 1172 if (session_setup_socket(peer)) { 1173 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1174 return (-1); 1175 } 1176 1177 session_socket_blockmode(peer->fd, BM_NONBLOCK); 1178 1179 sa = addr2sa(&peer->conf.remote_addr, BGP_PORT); 1180 if (connect(peer->fd, sa, sa->sa_len) == -1) { 1181 if (errno != EINPROGRESS) { 1182 if (errno != peer->lasterr) 1183 log_peer_warn(&peer->conf, "connect"); 1184 peer->lasterr = errno; 1185 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1186 return (-1); 1187 } 1188 } else 1189 bgp_fsm(peer, EVNT_CON_OPEN); 1190 1191 return (0); 1192 } 1193 1194 int 1195 session_setup_socket(struct peer *p) 1196 { 1197 int ttl = p->conf.distance; 1198 int pre = IPTOS_PREC_INTERNETCONTROL; 1199 int nodelay = 1; 1200 int bsize; 1201 1202 switch (p->conf.remote_addr.aid) { 1203 case AID_INET: 1204 /* set precedence, see RFC 1771 appendix 5 */ 1205 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1206 -1) { 1207 log_peer_warn(&p->conf, 1208 "session_setup_socket setsockopt TOS"); 1209 return (-1); 1210 } 1211 1212 if (p->conf.ebgp) { 1213 /* set TTL to foreign router's distance 1214 1=direct n=multihop with ttlsec, we always use 255 */ 1215 if (p->conf.ttlsec) { 1216 ttl = 256 - p->conf.distance; 1217 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1218 &ttl, sizeof(ttl)) == -1) { 1219 log_peer_warn(&p->conf, 1220 "session_setup_socket: " 1221 "setsockopt MINTTL"); 1222 return (-1); 1223 } 1224 ttl = 255; 1225 } 1226 1227 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1228 sizeof(ttl)) == -1) { 1229 log_peer_warn(&p->conf, 1230 "session_setup_socket setsockopt TTL"); 1231 return (-1); 1232 } 1233 } 1234 break; 1235 case AID_INET6: 1236 if (p->conf.ebgp) { 1237 /* set hoplimit to foreign router's distance 1238 1=direct n=multihop with ttlsec, we always use 255 */ 1239 if (p->conf.ttlsec) { 1240 /* 1241 * XXX Kernel has no ip6 equivalent of MINTTL yet so 1242 * we can't check incoming packets, but we can at least 1243 * set the outgoing TTL to allow sessions configured 1244 * with ttl-security to come up. 1245 */ 1246 ttl = 255; 1247 } 1248 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1249 &ttl, sizeof(ttl)) == -1) { 1250 log_peer_warn(&p->conf, 1251 "session_setup_socket setsockopt hoplimit"); 1252 return (-1); 1253 } 1254 } 1255 break; 1256 } 1257 1258 /* set TCP_NODELAY */ 1259 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1260 sizeof(nodelay)) == -1) { 1261 log_peer_warn(&p->conf, 1262 "session_setup_socket setsockopt TCP_NODELAY"); 1263 return (-1); 1264 } 1265 1266 /* only increase bufsize (and thus window) if md5 or ipsec is in use */ 1267 if (p->conf.auth.method != AUTH_NONE) { 1268 /* try to increase bufsize. no biggie if it fails */ 1269 bsize = 65535; 1270 while (bsize > 8192 && 1271 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, 1272 sizeof(bsize)) == -1 && errno != EINVAL) 1273 bsize /= 2; 1274 bsize = 65535; 1275 while (bsize > 8192 && 1276 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, 1277 sizeof(bsize)) == -1 && errno != EINVAL) 1278 bsize /= 2; 1279 } 1280 1281 return (0); 1282 } 1283 1284 void 1285 session_socket_blockmode(int fd, enum blockmodes bm) 1286 { 1287 int flags; 1288 1289 if ((flags = fcntl(fd, F_GETFL, 0)) == -1) 1290 fatal("fcntl F_GETFL"); 1291 1292 if (bm == BM_NONBLOCK) 1293 flags |= O_NONBLOCK; 1294 else 1295 flags &= ~O_NONBLOCK; 1296 1297 if ((flags = fcntl(fd, F_SETFL, flags)) == -1) 1298 fatal("fcntl F_SETFL"); 1299 } 1300 1301 void 1302 session_tcp_established(struct peer *peer) 1303 { 1304 socklen_t len; 1305 1306 len = sizeof(peer->sa_local); 1307 if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local, 1308 &len) == -1) 1309 log_warn("getsockname"); 1310 len = sizeof(peer->sa_remote); 1311 if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote, 1312 &len) == -1) 1313 log_warn("getpeername"); 1314 } 1315 1316 void 1317 session_capa_ann_none(struct peer *peer) 1318 { 1319 bzero(&peer->capa.ann, sizeof(peer->capa.ann)); 1320 } 1321 1322 int 1323 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len) 1324 { 1325 int errs = 0; 1326 1327 errs += ibuf_add(opb, &capa_code, sizeof(capa_code)); 1328 errs += ibuf_add(opb, &capa_len, sizeof(capa_len)); 1329 return (errs); 1330 } 1331 1332 int 1333 session_capa_add_mp(struct ibuf *buf, u_int8_t aid) 1334 { 1335 u_int8_t safi, pad = 0; 1336 u_int16_t afi; 1337 int errs = 0; 1338 1339 if (aid2afi(aid, &afi, &safi) == -1) 1340 fatalx("session_capa_add_mp: bad afi/safi pair"); 1341 afi = htons(afi); 1342 errs += ibuf_add(buf, &afi, sizeof(afi)); 1343 errs += ibuf_add(buf, &pad, sizeof(pad)); 1344 errs += ibuf_add(buf, &safi, sizeof(safi)); 1345 1346 return (errs); 1347 } 1348 1349 int 1350 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) 1351 { 1352 u_int errs = 0; 1353 u_int16_t afi; 1354 u_int8_t flags, safi; 1355 1356 if (aid2afi(aid, &afi, &safi)) { 1357 log_warn("session_capa_add_gr: bad AID"); 1358 return (1); 1359 } 1360 if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) 1361 flags = CAPA_GR_F_FLAG; 1362 else 1363 flags = 0; 1364 1365 afi = htons(afi); 1366 errs += ibuf_add(b, &afi, sizeof(afi)); 1367 errs += ibuf_add(b, &safi, sizeof(safi)); 1368 errs += ibuf_add(b, &flags, sizeof(flags)); 1369 1370 return (errs); 1371 } 1372 1373 struct bgp_msg * 1374 session_newmsg(enum msg_type msgtype, u_int16_t len) 1375 { 1376 struct bgp_msg *msg; 1377 struct msg_header hdr; 1378 struct ibuf *buf; 1379 int errs = 0; 1380 1381 memset(&hdr.marker, 0xff, sizeof(hdr.marker)); 1382 hdr.len = htons(len); 1383 hdr.type = msgtype; 1384 1385 if ((buf = ibuf_open(len)) == NULL) 1386 return (NULL); 1387 1388 errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker)); 1389 errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len)); 1390 errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type)); 1391 1392 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1393 ibuf_free(buf); 1394 return (NULL); 1395 } 1396 1397 msg->buf = buf; 1398 msg->type = msgtype; 1399 msg->len = len; 1400 1401 return (msg); 1402 } 1403 1404 int 1405 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1406 { 1407 struct mrt *mrt; 1408 1409 LIST_FOREACH(mrt, &mrthead, entry) { 1410 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1411 mrt->type == MRT_UPDATE_OUT))) 1412 continue; 1413 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1414 mrt->peer_id == p->conf.id || (mrt->group_id == 0 && 1415 mrt->group_id == p->conf.groupid)) 1416 mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p); 1417 } 1418 1419 ibuf_close(&p->wbuf, msg->buf); 1420 free(msg); 1421 return (0); 1422 } 1423 1424 void 1425 session_open(struct peer *p) 1426 { 1427 struct bgp_msg *buf; 1428 struct ibuf *opb; 1429 struct msg_open msg; 1430 u_int16_t len; 1431 u_int8_t i, op_type, optparamlen = 0; 1432 int errs = 0; 1433 int mpcapa = 0; 1434 1435 1436 if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - 1437 sizeof(optparamlen))) == NULL) { 1438 bgp_fsm(p, EVNT_CON_FATAL); 1439 return; 1440 } 1441 1442 /* multiprotocol extensions, RFC 4760 */ 1443 for (i = 0; i < AID_MAX; i++) 1444 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1445 errs += session_capa_add(opb, CAPA_MP, 4); 1446 errs += session_capa_add_mp(opb, i); 1447 mpcapa++; 1448 } 1449 1450 /* route refresh, RFC 2918 */ 1451 if (p->capa.ann.refresh) /* no data */ 1452 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1453 1454 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1455 if (p->capa.ann.grestart.restart) { 1456 int rst = 0; 1457 u_int16_t hdr; 1458 u_int8_t grlen; 1459 1460 if (mpcapa) { 1461 grlen = 2 + 4 * mpcapa; 1462 for (i = 0; i < AID_MAX; i++) { 1463 if (p->capa.neg.grestart.flags[i] & 1464 CAPA_GR_RESTARTING) 1465 rst++; 1466 } 1467 } else { /* AID_INET */ 1468 grlen = 2 + 4; 1469 if (p->capa.neg.grestart.flags[AID_INET] & 1470 CAPA_GR_RESTARTING) 1471 rst++; 1472 } 1473 1474 hdr = conf->holdtime; /* default timeout */ 1475 /* if client does graceful restart don't set R flag */ 1476 if (!rst) 1477 hdr |= CAPA_GR_R_FLAG; 1478 hdr = htons(hdr); 1479 1480 errs += session_capa_add(opb, CAPA_RESTART, grlen); 1481 errs += ibuf_add(opb, &hdr, sizeof(hdr)); 1482 1483 if (mpcapa) { 1484 for (i = 0; i < AID_MAX; i++) { 1485 if (p->capa.ann.mp[i]) { 1486 errs += session_capa_add_gr(p, opb, i); 1487 } 1488 } 1489 } else { /* AID_INET */ 1490 errs += session_capa_add_gr(p, opb, AID_INET); 1491 } 1492 } 1493 1494 /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ 1495 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1496 u_int32_t nas; 1497 1498 nas = htonl(conf->as); 1499 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); 1500 errs += ibuf_add(opb, &nas, sizeof(nas)); 1501 } 1502 1503 if (ibuf_size(opb)) 1504 optparamlen = ibuf_size(opb) + sizeof(op_type) + 1505 sizeof(optparamlen); 1506 1507 len = MSGSIZE_OPEN_MIN + optparamlen; 1508 if (errs || (buf = session_newmsg(OPEN, len)) == NULL) { 1509 ibuf_free(opb); 1510 bgp_fsm(p, EVNT_CON_FATAL); 1511 return; 1512 } 1513 1514 msg.version = 4; 1515 msg.myas = htons(conf->short_as); 1516 if (p->conf.holdtime) 1517 msg.holdtime = htons(p->conf.holdtime); 1518 else 1519 msg.holdtime = htons(conf->holdtime); 1520 msg.bgpid = conf->bgpid; /* is already in network byte order */ 1521 msg.optparamlen = optparamlen; 1522 1523 errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version)); 1524 errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas)); 1525 errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime)); 1526 errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid)); 1527 errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen)); 1528 1529 if (optparamlen) { 1530 op_type = OPT_PARAM_CAPABILITIES; 1531 optparamlen = ibuf_size(opb); 1532 errs += ibuf_add(buf->buf, &op_type, sizeof(op_type)); 1533 errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen)); 1534 errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); 1535 } 1536 1537 ibuf_free(opb); 1538 1539 if (errs) { 1540 ibuf_free(buf->buf); 1541 free(buf); 1542 bgp_fsm(p, EVNT_CON_FATAL); 1543 return; 1544 } 1545 1546 if (session_sendmsg(buf, p) == -1) { 1547 bgp_fsm(p, EVNT_CON_FATAL); 1548 return; 1549 } 1550 1551 p->stats.msg_sent_open++; 1552 } 1553 1554 void 1555 session_keepalive(struct peer *p) 1556 { 1557 struct bgp_msg *buf; 1558 1559 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1560 session_sendmsg(buf, p) == -1) { 1561 bgp_fsm(p, EVNT_CON_FATAL); 1562 return; 1563 } 1564 1565 start_timer_keepalive(p); 1566 p->stats.msg_sent_keepalive++; 1567 } 1568 1569 void 1570 session_update(u_int32_t peerid, void *data, size_t datalen) 1571 { 1572 struct peer *p; 1573 struct bgp_msg *buf; 1574 1575 if ((p = getpeerbyid(peerid)) == NULL) { 1576 log_warnx("no such peer: id=%u", peerid); 1577 return; 1578 } 1579 1580 if (p->state != STATE_ESTABLISHED) 1581 return; 1582 1583 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) { 1584 bgp_fsm(p, EVNT_CON_FATAL); 1585 return; 1586 } 1587 1588 if (ibuf_add(buf->buf, data, datalen)) { 1589 ibuf_free(buf->buf); 1590 free(buf); 1591 bgp_fsm(p, EVNT_CON_FATAL); 1592 return; 1593 } 1594 1595 if (session_sendmsg(buf, p) == -1) { 1596 bgp_fsm(p, EVNT_CON_FATAL); 1597 return; 1598 } 1599 1600 start_timer_keepalive(p); 1601 p->stats.msg_sent_update++; 1602 } 1603 1604 void 1605 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode, 1606 void *data, ssize_t datalen) 1607 { 1608 struct bgp_msg *buf; 1609 int errs = 0; 1610 1611 if (p->stats.last_sent_errcode) /* some notification already sent */ 1612 return; 1613 1614 log_notification(p, errcode, subcode, data, datalen, "sending"); 1615 1616 if ((buf = session_newmsg(NOTIFICATION, 1617 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1618 bgp_fsm(p, EVNT_CON_FATAL); 1619 return; 1620 } 1621 1622 errs += ibuf_add(buf->buf, &errcode, sizeof(errcode)); 1623 errs += ibuf_add(buf->buf, &subcode, sizeof(subcode)); 1624 1625 if (datalen > 0) 1626 errs += ibuf_add(buf->buf, data, datalen); 1627 1628 if (errs) { 1629 ibuf_free(buf->buf); 1630 free(buf); 1631 bgp_fsm(p, EVNT_CON_FATAL); 1632 return; 1633 } 1634 1635 if (session_sendmsg(buf, p) == -1) { 1636 bgp_fsm(p, EVNT_CON_FATAL); 1637 return; 1638 } 1639 1640 p->stats.msg_sent_notification++; 1641 p->stats.last_sent_errcode = errcode; 1642 p->stats.last_sent_suberr = subcode; 1643 } 1644 1645 int 1646 session_neighbor_rrefresh(struct peer *p) 1647 { 1648 u_int8_t i; 1649 1650 if (!p->capa.peer.refresh) 1651 return (-1); 1652 1653 for (i = 0; i < AID_MAX; i++) { 1654 if (p->capa.peer.mp[i] != 0) 1655 session_rrefresh(p, i); 1656 } 1657 1658 return (0); 1659 } 1660 1661 void 1662 session_rrefresh(struct peer *p, u_int8_t aid) 1663 { 1664 struct bgp_msg *buf; 1665 int errs = 0; 1666 u_int16_t afi; 1667 u_int8_t safi, null8 = 0; 1668 1669 if (aid2afi(aid, &afi, &safi) == -1) 1670 fatalx("session_rrefresh: bad afi/safi pair"); 1671 1672 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1673 bgp_fsm(p, EVNT_CON_FATAL); 1674 return; 1675 } 1676 1677 afi = htons(afi); 1678 errs += ibuf_add(buf->buf, &afi, sizeof(afi)); 1679 errs += ibuf_add(buf->buf, &null8, sizeof(null8)); 1680 errs += ibuf_add(buf->buf, &safi, sizeof(safi)); 1681 1682 if (errs) { 1683 ibuf_free(buf->buf); 1684 free(buf); 1685 bgp_fsm(p, EVNT_CON_FATAL); 1686 return; 1687 } 1688 1689 if (session_sendmsg(buf, p) == -1) { 1690 bgp_fsm(p, EVNT_CON_FATAL); 1691 return; 1692 } 1693 1694 p->stats.msg_sent_rrefresh++; 1695 } 1696 1697 int 1698 session_graceful_restart(struct peer *p) 1699 { 1700 u_int8_t i; 1701 1702 timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); 1703 1704 for (i = 0; i < AID_MAX; i++) { 1705 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1706 if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE, 1707 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 1708 return (-1); 1709 log_peer_warnx(&p->conf, 1710 "graceful restart of %s, keeping routes", 1711 aid2str(i)); 1712 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1713 } else if (p->capa.neg.mp[i]) { 1714 if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, 1715 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 1716 return (-1); 1717 log_peer_warnx(&p->conf, 1718 "graceful restart of %s, flushing routes", 1719 aid2str(i)); 1720 } 1721 } 1722 return (0); 1723 } 1724 1725 int 1726 session_graceful_stop(struct peer *p) 1727 { 1728 u_int8_t i; 1729 1730 for (i = 0; i < AID_MAX; i++) { 1731 /* 1732 * Only flush if the peer is restarting and the timeout fired. 1733 * In all other cases the session was already flushed when the 1734 * session went down or when the new open message was parsed. 1735 */ 1736 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1737 log_peer_warnx(&p->conf, "graceful restart of %s, " 1738 "time-out, flushing", aid2str(i)); 1739 if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, 1740 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 1741 return (-1); 1742 } 1743 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1744 } 1745 return (0); 1746 } 1747 1748 int 1749 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1750 { 1751 ssize_t n; 1752 socklen_t len; 1753 int error; 1754 1755 if (p->state == STATE_CONNECT) { 1756 if (pfd->revents & POLLOUT) { 1757 if (pfd->revents & POLLIN) { 1758 /* error occurred */ 1759 len = sizeof(error); 1760 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1761 &error, &len) == -1 || error) { 1762 if (error) 1763 errno = error; 1764 if (errno != p->lasterr) { 1765 log_peer_warn(&p->conf, 1766 "socket error"); 1767 p->lasterr = errno; 1768 } 1769 bgp_fsm(p, EVNT_CON_OPENFAIL); 1770 return (1); 1771 } 1772 } 1773 bgp_fsm(p, EVNT_CON_OPEN); 1774 return (1); 1775 } 1776 if (pfd->revents & POLLHUP) { 1777 bgp_fsm(p, EVNT_CON_OPENFAIL); 1778 return (1); 1779 } 1780 if (pfd->revents & (POLLERR|POLLNVAL)) { 1781 bgp_fsm(p, EVNT_CON_FATAL); 1782 return (1); 1783 } 1784 return (0); 1785 } 1786 1787 if (pfd->revents & POLLHUP) { 1788 bgp_fsm(p, EVNT_CON_CLOSED); 1789 return (1); 1790 } 1791 if (pfd->revents & (POLLERR|POLLNVAL)) { 1792 bgp_fsm(p, EVNT_CON_FATAL); 1793 return (1); 1794 } 1795 1796 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1797 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1798 if (error == 0) 1799 log_peer_warnx(&p->conf, "Connection closed"); 1800 else if (error == -1) 1801 log_peer_warn(&p->conf, "write error"); 1802 bgp_fsm(p, EVNT_CON_FATAL); 1803 return (1); 1804 } 1805 if (!(pfd->revents & POLLIN)) 1806 return (1); 1807 } 1808 1809 if (p->rbuf && pfd->revents & POLLIN) { 1810 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1811 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1812 if (errno != EINTR && errno != EAGAIN) { 1813 log_peer_warn(&p->conf, "read error"); 1814 bgp_fsm(p, EVNT_CON_FATAL); 1815 } 1816 return (1); 1817 } 1818 if (n == 0) { /* connection closed */ 1819 bgp_fsm(p, EVNT_CON_CLOSED); 1820 return (1); 1821 } 1822 1823 p->rbuf->wpos += n; 1824 p->stats.last_read = time(NULL); 1825 return (1); 1826 } 1827 return (0); 1828 } 1829 1830 int 1831 session_process_msg(struct peer *p) 1832 { 1833 ssize_t rpos, av, left; 1834 int processed = 0; 1835 u_int16_t msglen; 1836 u_int8_t msgtype; 1837 1838 rpos = 0; 1839 av = p->rbuf->wpos; 1840 1841 /* 1842 * session might drop to IDLE -> buffers deallocated 1843 * we MUST check rbuf != NULL before use 1844 */ 1845 for (;;) { 1846 if (rpos + MSGSIZE_HEADER > av) 1847 break; 1848 if (p->rbuf == NULL) 1849 break; 1850 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 1851 &msgtype) == -1) 1852 return (0); 1853 if (rpos + msglen > av) 1854 break; 1855 p->rbuf->rptr = p->rbuf->buf + rpos; 1856 1857 switch (msgtype) { 1858 case OPEN: 1859 bgp_fsm(p, EVNT_RCVD_OPEN); 1860 p->stats.msg_rcvd_open++; 1861 break; 1862 case UPDATE: 1863 bgp_fsm(p, EVNT_RCVD_UPDATE); 1864 p->stats.msg_rcvd_update++; 1865 break; 1866 case NOTIFICATION: 1867 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 1868 p->stats.msg_rcvd_notification++; 1869 break; 1870 case KEEPALIVE: 1871 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 1872 p->stats.msg_rcvd_keepalive++; 1873 break; 1874 case RREFRESH: 1875 parse_refresh(p); 1876 p->stats.msg_rcvd_rrefresh++; 1877 break; 1878 default: /* cannot happen */ 1879 session_notification(p, ERR_HEADER, ERR_HDR_TYPE, 1880 &msgtype, 1); 1881 log_warnx("received message with unknown type %u", 1882 msgtype); 1883 bgp_fsm(p, EVNT_CON_FATAL); 1884 } 1885 rpos += msglen; 1886 if (++processed > MSG_PROCESS_LIMIT) 1887 break; 1888 } 1889 if (p->rbuf == NULL) 1890 return (1); 1891 1892 if (rpos < av) { 1893 left = av - rpos; 1894 memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left); 1895 p->rbuf->wpos = left; 1896 } else 1897 p->rbuf->wpos = 0; 1898 1899 return (1); 1900 } 1901 1902 int 1903 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type) 1904 { 1905 struct mrt *mrt; 1906 u_char *p; 1907 u_int16_t olen; 1908 static const u_int8_t marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff, 1909 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 1910 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 1911 1912 /* caller MUST make sure we are getting 19 bytes! */ 1913 p = data; 1914 if (memcmp(p, marker, sizeof(marker))) { 1915 log_peer_warnx(&peer->conf, "sync error"); 1916 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0); 1917 bgp_fsm(peer, EVNT_CON_FATAL); 1918 return (-1); 1919 } 1920 p += MSGSIZE_HEADER_MARKER; 1921 1922 memcpy(&olen, p, 2); 1923 *len = ntohs(olen); 1924 p += 2; 1925 memcpy(type, p, 1); 1926 1927 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 1928 log_peer_warnx(&peer->conf, 1929 "received message: illegal length: %u byte", *len); 1930 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1931 &olen, sizeof(olen)); 1932 bgp_fsm(peer, EVNT_CON_FATAL); 1933 return (-1); 1934 } 1935 1936 switch (*type) { 1937 case OPEN: 1938 if (*len < MSGSIZE_OPEN_MIN) { 1939 log_peer_warnx(&peer->conf, 1940 "received OPEN: illegal len: %u byte", *len); 1941 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1942 &olen, sizeof(olen)); 1943 bgp_fsm(peer, EVNT_CON_FATAL); 1944 return (-1); 1945 } 1946 break; 1947 case NOTIFICATION: 1948 if (*len < MSGSIZE_NOTIFICATION_MIN) { 1949 log_peer_warnx(&peer->conf, 1950 "received NOTIFICATION: illegal len: %u byte", 1951 *len); 1952 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1953 &olen, sizeof(olen)); 1954 bgp_fsm(peer, EVNT_CON_FATAL); 1955 return (-1); 1956 } 1957 break; 1958 case UPDATE: 1959 if (*len < MSGSIZE_UPDATE_MIN) { 1960 log_peer_warnx(&peer->conf, 1961 "received UPDATE: illegal len: %u byte", *len); 1962 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1963 &olen, sizeof(olen)); 1964 bgp_fsm(peer, EVNT_CON_FATAL); 1965 return (-1); 1966 } 1967 break; 1968 case KEEPALIVE: 1969 if (*len != MSGSIZE_KEEPALIVE) { 1970 log_peer_warnx(&peer->conf, 1971 "received KEEPALIVE: illegal len: %u byte", *len); 1972 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1973 &olen, sizeof(olen)); 1974 bgp_fsm(peer, EVNT_CON_FATAL); 1975 return (-1); 1976 } 1977 break; 1978 case RREFRESH: 1979 if (*len != MSGSIZE_RREFRESH) { 1980 log_peer_warnx(&peer->conf, 1981 "received RREFRESH: illegal len: %u byte", *len); 1982 session_notification(peer, ERR_HEADER, ERR_HDR_LEN, 1983 &olen, sizeof(olen)); 1984 bgp_fsm(peer, EVNT_CON_FATAL); 1985 return (-1); 1986 } 1987 break; 1988 default: 1989 log_peer_warnx(&peer->conf, 1990 "received msg with unknown type %u", *type); 1991 session_notification(peer, ERR_HEADER, ERR_HDR_TYPE, 1992 type, 1); 1993 bgp_fsm(peer, EVNT_CON_FATAL); 1994 return (-1); 1995 } 1996 LIST_FOREACH(mrt, &mrthead, entry) { 1997 if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE && 1998 mrt->type == MRT_UPDATE_IN))) 1999 continue; 2000 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 2001 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 2002 mrt->group_id == peer->conf.groupid)) 2003 mrt_dump_bgp_msg(mrt, data, *len, peer); 2004 } 2005 return (0); 2006 } 2007 2008 int 2009 parse_open(struct peer *peer) 2010 { 2011 u_char *p, *op_val; 2012 u_int8_t version, rversion; 2013 u_int16_t short_as, msglen; 2014 u_int16_t holdtime, oholdtime, myholdtime; 2015 u_int32_t as, bgpid; 2016 u_int8_t optparamlen, plen; 2017 u_int8_t op_type, op_len; 2018 2019 p = peer->rbuf->rptr; 2020 p += MSGSIZE_HEADER_MARKER; 2021 memcpy(&msglen, p, sizeof(msglen)); 2022 msglen = ntohs(msglen); 2023 2024 p = peer->rbuf->rptr; 2025 p += MSGSIZE_HEADER; /* header is already checked */ 2026 2027 memcpy(&version, p, sizeof(version)); 2028 p += sizeof(version); 2029 2030 if (version != BGP_VERSION) { 2031 log_peer_warnx(&peer->conf, 2032 "peer wants unrecognized version %u", version); 2033 if (version > BGP_VERSION) 2034 rversion = version - BGP_VERSION; 2035 else 2036 rversion = BGP_VERSION; 2037 session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION, 2038 &rversion, sizeof(rversion)); 2039 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2040 return (-1); 2041 } 2042 2043 memcpy(&short_as, p, sizeof(short_as)); 2044 p += sizeof(short_as); 2045 as = peer->short_as = ntohs(short_as); 2046 2047 memcpy(&oholdtime, p, sizeof(oholdtime)); 2048 p += sizeof(oholdtime); 2049 2050 holdtime = ntohs(oholdtime); 2051 if (holdtime && holdtime < peer->conf.min_holdtime) { 2052 log_peer_warnx(&peer->conf, 2053 "peer requests unacceptable holdtime %u", holdtime); 2054 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, 2055 NULL, 0); 2056 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2057 return (-1); 2058 } 2059 2060 myholdtime = peer->conf.holdtime; 2061 if (!myholdtime) 2062 myholdtime = conf->holdtime; 2063 if (holdtime < myholdtime) 2064 peer->holdtime = holdtime; 2065 else 2066 peer->holdtime = myholdtime; 2067 2068 memcpy(&bgpid, p, sizeof(bgpid)); 2069 p += sizeof(bgpid); 2070 2071 /* check bgpid for validity - just disallow 0 */ 2072 if (ntohl(bgpid) == 0) { 2073 log_peer_warnx(&peer->conf, "peer BGPID %lu unacceptable", 2074 ntohl(bgpid)); 2075 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, 2076 NULL, 0); 2077 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2078 return (-1); 2079 } 2080 peer->remote_bgpid = bgpid; 2081 2082 memcpy(&optparamlen, p, sizeof(optparamlen)); 2083 p += sizeof(optparamlen); 2084 2085 if (optparamlen != msglen - MSGSIZE_OPEN_MIN) { 2086 log_peer_warnx(&peer->conf, 2087 "corrupt OPEN message received: length mismatch"); 2088 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2089 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2090 return (-1); 2091 } 2092 2093 plen = optparamlen; 2094 while (plen > 0) { 2095 if (plen < 2) { 2096 log_peer_warnx(&peer->conf, 2097 "corrupt OPEN message received, len wrong"); 2098 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2099 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2100 return (-1); 2101 } 2102 memcpy(&op_type, p, sizeof(op_type)); 2103 p += sizeof(op_type); 2104 plen -= sizeof(op_type); 2105 memcpy(&op_len, p, sizeof(op_len)); 2106 p += sizeof(op_len); 2107 plen -= sizeof(op_len); 2108 if (op_len > 0) { 2109 if (plen < op_len) { 2110 log_peer_warnx(&peer->conf, 2111 "corrupt OPEN message received, len wrong"); 2112 session_notification(peer, ERR_OPEN, 0, 2113 NULL, 0); 2114 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2115 return (-1); 2116 } 2117 op_val = p; 2118 p += op_len; 2119 plen -= op_len; 2120 } else 2121 op_val = NULL; 2122 2123 switch (op_type) { 2124 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2125 if (parse_capabilities(peer, op_val, op_len, 2126 &as) == -1) { 2127 session_notification(peer, ERR_OPEN, 0, 2128 NULL, 0); 2129 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2130 return (-1); 2131 } 2132 break; 2133 case OPT_PARAM_AUTH: /* deprecated */ 2134 default: 2135 /* 2136 * unsupported type 2137 * the RFCs tell us to leave the data section empty 2138 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT. 2139 * How the peer should know _which_ optional parameter 2140 * we don't support is beyond me. 2141 */ 2142 log_peer_warnx(&peer->conf, 2143 "received OPEN message with unsupported optional " 2144 "parameter: type %u", op_type); 2145 session_notification(peer, ERR_OPEN, ERR_OPEN_OPT, 2146 NULL, 0); 2147 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2148 timer_set(peer, Timer_IdleHold, 0); /* no punish */ 2149 peer->IdleHoldTime /= 2; 2150 return (-1); 2151 } 2152 } 2153 2154 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2155 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2156 peer->conf.remote_as = as; 2157 peer->conf.ebgp = (peer->conf.remote_as != conf->as); 2158 if (!peer->conf.ebgp) 2159 /* force enforce_as off for iBGP sessions */ 2160 peer->conf.enforce_as = ENFORCE_AS_OFF; 2161 } 2162 2163 if (peer->conf.remote_as != as) { 2164 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2165 log_as(as)); 2166 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0); 2167 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2168 return (-1); 2169 } 2170 2171 if (capa_neg_calc(peer) == -1) { 2172 log_peer_warnx(&peer->conf, 2173 "capability negotiation calculation failed"); 2174 session_notification(peer, ERR_OPEN, 0, NULL, 0); 2175 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2176 return (-1); 2177 } 2178 2179 return (0); 2180 } 2181 2182 int 2183 parse_update(struct peer *peer) 2184 { 2185 u_char *p; 2186 u_int16_t datalen; 2187 2188 /* 2189 * we pass the message verbatim to the rde. 2190 * in case of errors the whole session is reset with a 2191 * notification anyway, we only need to know the peer 2192 */ 2193 p = peer->rbuf->rptr; 2194 p += MSGSIZE_HEADER_MARKER; 2195 memcpy(&datalen, p, sizeof(datalen)); 2196 datalen = ntohs(datalen); 2197 2198 p = peer->rbuf->rptr; 2199 p += MSGSIZE_HEADER; /* header is already checked */ 2200 datalen -= MSGSIZE_HEADER; 2201 2202 if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p, 2203 datalen) == -1) 2204 return (-1); 2205 2206 return (0); 2207 } 2208 2209 int 2210 parse_refresh(struct peer *peer) 2211 { 2212 u_char *p; 2213 u_int16_t afi; 2214 u_int8_t aid, safi; 2215 2216 p = peer->rbuf->rptr; 2217 p += MSGSIZE_HEADER; /* header is already checked */ 2218 2219 /* 2220 * We could check if we actually announced the capability but 2221 * as long as the message is correctly encoded we don't care. 2222 */ 2223 2224 /* afi, 2 byte */ 2225 memcpy(&afi, p, sizeof(afi)); 2226 afi = ntohs(afi); 2227 p += 2; 2228 /* reserved, 1 byte */ 2229 p += 1; 2230 /* safi, 1 byte */ 2231 memcpy(&safi, p, sizeof(safi)); 2232 2233 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2234 if (afi2aid(afi, safi, &aid) == -1) { 2235 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2236 "invalid afi/safi pair"); 2237 return (0); 2238 } 2239 2240 if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid, 2241 sizeof(aid)) == -1) 2242 return (-1); 2243 2244 return (0); 2245 } 2246 2247 int 2248 parse_notification(struct peer *peer) 2249 { 2250 u_char *p; 2251 u_int16_t datalen; 2252 u_int8_t errcode; 2253 u_int8_t subcode; 2254 u_int8_t capa_code; 2255 u_int8_t capa_len; 2256 u_int8_t i; 2257 2258 /* just log */ 2259 p = peer->rbuf->rptr; 2260 p += MSGSIZE_HEADER_MARKER; 2261 memcpy(&datalen, p, sizeof(datalen)); 2262 datalen = ntohs(datalen); 2263 2264 p = peer->rbuf->rptr; 2265 p += MSGSIZE_HEADER; /* header is already checked */ 2266 datalen -= MSGSIZE_HEADER; 2267 2268 memcpy(&errcode, p, sizeof(errcode)); 2269 p += sizeof(errcode); 2270 datalen -= sizeof(errcode); 2271 2272 memcpy(&subcode, p, sizeof(subcode)); 2273 p += sizeof(subcode); 2274 datalen -= sizeof(subcode); 2275 2276 log_notification(peer, errcode, subcode, p, datalen, "received"); 2277 peer->errcnt++; 2278 2279 if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) { 2280 if (datalen == 0) { /* zebra likes to send those.. humbug */ 2281 log_peer_warnx(&peer->conf, "received \"unsupported " 2282 "capability\" notification without data part, " 2283 "disabling capability announcements altogether"); 2284 session_capa_ann_none(peer); 2285 } 2286 2287 while (datalen > 0) { 2288 if (datalen < 2) { 2289 log_peer_warnx(&peer->conf, 2290 "parse_notification: " 2291 "expect len >= 2, len is %u", datalen); 2292 return (-1); 2293 } 2294 memcpy(&capa_code, p, sizeof(capa_code)); 2295 p += sizeof(capa_code); 2296 datalen -= sizeof(capa_code); 2297 memcpy(&capa_len, p, sizeof(capa_len)); 2298 p += sizeof(capa_len); 2299 datalen -= sizeof(capa_len); 2300 if (datalen < capa_len) { 2301 log_peer_warnx(&peer->conf, 2302 "parse_notification: capa_len %u exceeds " 2303 "remaining msg length %u", capa_len, 2304 datalen); 2305 return (-1); 2306 } 2307 p += capa_len; 2308 datalen -= capa_len; 2309 switch (capa_code) { 2310 case CAPA_MP: 2311 for (i = 0; i < AID_MAX; i++) 2312 peer->capa.ann.mp[i] = 0; 2313 log_peer_warnx(&peer->conf, 2314 "disabling multiprotocol capability"); 2315 break; 2316 case CAPA_REFRESH: 2317 peer->capa.ann.refresh = 0; 2318 log_peer_warnx(&peer->conf, 2319 "disabling route refresh capability"); 2320 break; 2321 case CAPA_RESTART: 2322 peer->capa.ann.grestart.restart = 0; 2323 log_peer_warnx(&peer->conf, 2324 "disabling restart capability"); 2325 break; 2326 case CAPA_AS4BYTE: 2327 peer->capa.ann.as4byte = 0; 2328 log_peer_warnx(&peer->conf, 2329 "disabling 4-byte AS num capability"); 2330 break; 2331 default: /* should not happen... */ 2332 log_peer_warnx(&peer->conf, "received " 2333 "\"unsupported capability\" notification " 2334 "for unknown capability %u, disabling " 2335 "capability announcements altogether", 2336 capa_code); 2337 session_capa_ann_none(peer); 2338 break; 2339 } 2340 } 2341 2342 return (1); 2343 } 2344 2345 if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) { 2346 session_capa_ann_none(peer); 2347 return (1); 2348 } 2349 2350 return (0); 2351 } 2352 2353 int 2354 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as) 2355 { 2356 u_char *capa_val; 2357 u_int32_t remote_as; 2358 u_int16_t len; 2359 u_int16_t afi; 2360 u_int16_t gr_header; 2361 u_int8_t safi; 2362 u_int8_t aid; 2363 u_int8_t gr_flags; 2364 u_int8_t capa_code; 2365 u_int8_t capa_len; 2366 u_int8_t i; 2367 2368 len = dlen; 2369 while (len > 0) { 2370 if (len < 2) { 2371 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2372 "length: %u, too short", len); 2373 return (-1); 2374 } 2375 memcpy(&capa_code, d, sizeof(capa_code)); 2376 d += sizeof(capa_code); 2377 len -= sizeof(capa_code); 2378 memcpy(&capa_len, d, sizeof(capa_len)); 2379 d += sizeof(capa_len); 2380 len -= sizeof(capa_len); 2381 if (capa_len > 0) { 2382 if (len < capa_len) { 2383 log_peer_warnx(&peer->conf, 2384 "Bad capabilities attr length: " 2385 "len %u smaller than capa_len %u", 2386 len, capa_len); 2387 return (-1); 2388 } 2389 capa_val = d; 2390 d += capa_len; 2391 len -= capa_len; 2392 } else 2393 capa_val = NULL; 2394 2395 switch (capa_code) { 2396 case CAPA_MP: /* RFC 4760 */ 2397 if (capa_len != 4) { 2398 log_peer_warnx(&peer->conf, 2399 "Bad multi protocol capability length: " 2400 "%u", capa_len); 2401 break; 2402 } 2403 memcpy(&afi, capa_val, sizeof(afi)); 2404 afi = ntohs(afi); 2405 memcpy(&safi, capa_val + 3, sizeof(safi)); 2406 if (afi2aid(afi, safi, &aid) == -1) { 2407 log_peer_warnx(&peer->conf, 2408 "Received multi protocol capability: " 2409 " unknown AFI %u, safi %u pair", 2410 afi, safi); 2411 break; 2412 } 2413 peer->capa.peer.mp[aid] = 1; 2414 break; 2415 case CAPA_REFRESH: 2416 peer->capa.peer.refresh = 1; 2417 break; 2418 case CAPA_RESTART: 2419 if (capa_len == 2) { 2420 /* peer only supports EoR marker */ 2421 peer->capa.peer.grestart.restart = 1; 2422 peer->capa.peer.grestart.timeout = 0; 2423 break; 2424 } else if (capa_len % 4 != 2) { 2425 log_peer_warnx(&peer->conf, 2426 "Bad graceful restart capability length: " 2427 "%u", capa_len); 2428 peer->capa.peer.grestart.restart = 0; 2429 peer->capa.peer.grestart.timeout = 0; 2430 break; 2431 } 2432 2433 memcpy(&gr_header, capa_val, sizeof(gr_header)); 2434 gr_header = ntohs(gr_header); 2435 peer->capa.peer.grestart.timeout = 2436 gr_header & CAPA_GR_TIMEMASK; 2437 if (peer->capa.peer.grestart.timeout == 0) { 2438 log_peer_warnx(&peer->conf, "Received " 2439 "graceful restart timeout is zero"); 2440 peer->capa.peer.grestart.restart = 0; 2441 break; 2442 } 2443 2444 for (i = 2; i <= capa_len - 4; i += 4) { 2445 memcpy(&afi, capa_val + i, sizeof(afi)); 2446 afi = ntohs(afi); 2447 memcpy(&safi, capa_val + i + 2, sizeof(safi)); 2448 if (afi2aid(afi, safi, &aid) == -1) { 2449 log_peer_warnx(&peer->conf, 2450 "Received graceful restart capa: " 2451 " unknown AFI %u, safi %u pair", 2452 afi, safi); 2453 continue; 2454 } 2455 memcpy(&gr_flags, capa_val + i + 3, 2456 sizeof(gr_flags)); 2457 peer->capa.peer.grestart.flags[aid] |= 2458 CAPA_GR_PRESENT; 2459 if (gr_flags & CAPA_GR_F_FLAG) 2460 peer->capa.peer.grestart.flags[aid] |= 2461 CAPA_GR_FORWARD; 2462 if (gr_header & CAPA_GR_R_FLAG) 2463 peer->capa.peer.grestart.flags[aid] |= 2464 CAPA_GR_RESTART; 2465 peer->capa.peer.grestart.restart = 2; 2466 } 2467 break; 2468 case CAPA_AS4BYTE: 2469 if (capa_len != 4) { 2470 log_peer_warnx(&peer->conf, 2471 "Bad AS4BYTE capability length: " 2472 "%u", capa_len); 2473 peer->capa.peer.as4byte = 0; 2474 break; 2475 } 2476 memcpy(&remote_as, capa_val, sizeof(remote_as)); 2477 *as = ntohl(remote_as); 2478 peer->capa.peer.as4byte = 1; 2479 break; 2480 default: 2481 break; 2482 } 2483 } 2484 2485 return (0); 2486 } 2487 2488 int 2489 capa_neg_calc(struct peer *p) 2490 { 2491 u_int8_t i, hasmp = 0; 2492 2493 /* refresh: does not realy matter here, use peer setting */ 2494 p->capa.neg.refresh = p->capa.peer.refresh; 2495 2496 /* as4byte: both side must announce capability */ 2497 if (p->capa.ann.as4byte && p->capa.peer.as4byte) 2498 p->capa.neg.as4byte = 1; 2499 else 2500 p->capa.neg.as4byte = 0; 2501 2502 /* MP: both side must announce capability */ 2503 for (i = 0; i < AID_MAX; i++) { 2504 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) { 2505 p->capa.neg.mp[i] = 1; 2506 hasmp = 1; 2507 } else 2508 p->capa.neg.mp[i] = 0; 2509 } 2510 /* if no MP capability present default to IPv4 unicast mode */ 2511 if (!hasmp) 2512 p->capa.neg.mp[AID_INET] = 1; 2513 2514 /* 2515 * graceful restart: only the peer capabilities are of interest here. 2516 * It is necessary to compare the new values with the previous ones 2517 * and act acordingly. AFI/SAFI that are not part in the MP capability 2518 * are treated as not being present. 2519 */ 2520 2521 for (i = 0; i < AID_MAX; i++) { 2522 int8_t negflags; 2523 2524 /* disable GR if the AFI/SAFI is not present */ 2525 if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2526 p->capa.neg.mp[i] == 0) 2527 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2528 /* look at current GR state and decide what to do */ 2529 negflags = p->capa.neg.grestart.flags[i]; 2530 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2531 if (negflags & CAPA_GR_RESTARTING) { 2532 if (!(p->capa.peer.grestart.flags[i] & 2533 CAPA_GR_FORWARD)) { 2534 if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, 2535 p->conf.id, 0, -1, &i, sizeof(i)) == -1) 2536 return (-1); 2537 log_peer_warnx(&p->conf, "graceful restart of " 2538 "%s, not restarted, flushing", aid2str(i)); 2539 } else 2540 p->capa.neg.grestart.flags[i] |= 2541 CAPA_GR_RESTARTING; 2542 } 2543 } 2544 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2545 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2546 2547 return (0); 2548 } 2549 2550 void 2551 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt) 2552 { 2553 struct imsg imsg; 2554 struct mrt xmrt; 2555 struct mrt *mrt; 2556 struct peer_config *pconf; 2557 struct peer *p, *next; 2558 struct listen_addr *la, *nla; 2559 struct kif *kif; 2560 u_char *data; 2561 enum reconf_action reconf; 2562 int n, depend_ok, restricted; 2563 u_int8_t aid, errcode, subcode; 2564 2565 if ((n = imsg_read(ibuf)) == -1) 2566 fatal("session_dispatch_imsg: imsg_read error"); 2567 2568 if (n == 0) /* connection closed */ 2569 fatalx("session_dispatch_imsg: pipe closed"); 2570 2571 for (;;) { 2572 if ((n = imsg_get(ibuf, &imsg)) == -1) 2573 fatal("session_dispatch_imsg: imsg_get error"); 2574 2575 if (n == 0) 2576 break; 2577 2578 switch (imsg.hdr.type) { 2579 case IMSG_RECONF_CONF: 2580 if (idx != PFD_PIPE_MAIN) 2581 fatalx("reconf request not from parent"); 2582 if ((nconf = malloc(sizeof(struct bgpd_config))) == 2583 NULL) 2584 fatal(NULL); 2585 memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); 2586 if ((nconf->listen_addrs = calloc(1, 2587 sizeof(struct listen_addrs))) == NULL) 2588 fatal(NULL); 2589 TAILQ_INIT(nconf->listen_addrs); 2590 npeers = NULL; 2591 init_conf(nconf); 2592 pending_reconf = 1; 2593 break; 2594 case IMSG_RECONF_PEER: 2595 if (idx != PFD_PIPE_MAIN) 2596 fatalx("reconf request not from parent"); 2597 pconf = imsg.data; 2598 p = getpeerbyaddr(&pconf->remote_addr); 2599 if (p == NULL) { 2600 if ((p = calloc(1, sizeof(struct peer))) == 2601 NULL) 2602 fatal("new_peer"); 2603 p->state = p->prev_state = STATE_NONE; 2604 p->next = npeers; 2605 npeers = p; 2606 reconf = RECONF_REINIT; 2607 } else 2608 reconf = RECONF_KEEP; 2609 2610 memcpy(&p->conf, pconf, sizeof(struct peer_config)); 2611 p->conf.reconf_action = reconf; 2612 2613 /* sync the RDE in case we keep the peer */ 2614 if (reconf == RECONF_KEEP) { 2615 if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, 2616 p->conf.id, 0, -1, &p->conf, 2617 sizeof(struct peer_config)) == -1) 2618 fatalx("imsg_compose error"); 2619 if (p->conf.template) { 2620 /* apply the conf to all clones */ 2621 struct peer *np; 2622 for (np = peers; np; np = np->next) { 2623 if (np->template != p) 2624 continue; 2625 session_template_clone(np, 2626 NULL, np->conf.id, 2627 np->conf.remote_as); 2628 if (imsg_compose(ibuf_rde, 2629 IMSG_SESSION_ADD, 2630 np->conf.id, 0, -1, 2631 &np->conf, 2632 sizeof(struct peer_config)) 2633 == -1) 2634 fatalx("imsg_compose error"); 2635 } 2636 } 2637 } 2638 break; 2639 case IMSG_RECONF_LISTENER: 2640 if (idx != PFD_PIPE_MAIN) 2641 fatalx("reconf request not from parent"); 2642 if (nconf == NULL) 2643 fatalx("IMSG_RECONF_LISTENER but no config"); 2644 nla = imsg.data; 2645 TAILQ_FOREACH(la, conf->listen_addrs, entry) 2646 if (!la_cmp(la, nla)) 2647 break; 2648 2649 if (la == NULL) { 2650 if (nla->reconf != RECONF_REINIT) 2651 fatalx("king bula sez: " 2652 "expected REINIT"); 2653 2654 if ((nla->fd = imsg.fd) == -1) 2655 log_warnx("expected to receive fd for " 2656 "%s but didn't receive any", 2657 log_sockaddr((struct sockaddr *) 2658 &nla->sa)); 2659 2660 la = calloc(1, sizeof(struct listen_addr)); 2661 if (la == NULL) 2662 fatal(NULL); 2663 memcpy(&la->sa, &nla->sa, sizeof(la->sa)); 2664 la->flags = nla->flags; 2665 la->fd = nla->fd; 2666 la->reconf = RECONF_REINIT; 2667 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 2668 entry); 2669 } else { 2670 if (nla->reconf != RECONF_KEEP) 2671 fatalx("king bula sez: expected KEEP"); 2672 la->reconf = RECONF_KEEP; 2673 } 2674 2675 break; 2676 case IMSG_RECONF_CTRL: 2677 if (idx != PFD_PIPE_MAIN) 2678 fatalx("reconf request not from parent"); 2679 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2680 sizeof(restricted)) 2681 fatalx("IFINFO imsg with wrong len"); 2682 memcpy(&restricted, imsg.data, sizeof(restricted)); 2683 if (imsg.fd == -1) { 2684 log_warnx("expected to receive fd for control " 2685 "socket but didn't receive any"); 2686 break; 2687 } 2688 if (restricted) { 2689 control_shutdown(rcsock); 2690 rcsock = imsg.fd; 2691 control_listen(rcsock); 2692 } else { 2693 control_shutdown(csock); 2694 csock = imsg.fd; 2695 control_listen(csock); 2696 } 2697 break; 2698 case IMSG_RECONF_DONE: 2699 if (idx != PFD_PIPE_MAIN) 2700 fatalx("reconf request not from parent"); 2701 if (nconf == NULL) 2702 fatalx("got IMSG_RECONF_DONE but no config"); 2703 conf->flags = nconf->flags; 2704 conf->log = nconf->log; 2705 conf->bgpid = nconf->bgpid; 2706 conf->clusterid = nconf->clusterid; 2707 conf->as = nconf->as; 2708 conf->short_as = nconf->short_as; 2709 conf->holdtime = nconf->holdtime; 2710 conf->min_holdtime = nconf->min_holdtime; 2711 conf->connectretry = nconf->connectretry; 2712 2713 /* add new peers */ 2714 for (p = npeers; p != NULL; p = next) { 2715 next = p->next; 2716 p->next = peers; 2717 peers = p; 2718 } 2719 /* find ones that need attention */ 2720 for (p = peers; p != NULL; p = p->next) { 2721 /* needs to be deleted? */ 2722 if (p->conf.reconf_action == RECONF_NONE && 2723 !p->template) 2724 p->conf.reconf_action = RECONF_DELETE; 2725 /* had demotion, is demoted, demote removed? */ 2726 if (p->demoted && !p->conf.demote_group[0]) 2727 session_demote(p, -1); 2728 } 2729 2730 /* delete old listeners */ 2731 for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; 2732 la = nla) { 2733 nla = TAILQ_NEXT(la, entry); 2734 if (la->reconf == RECONF_NONE) { 2735 log_info("not listening on %s any more", 2736 log_sockaddr( 2737 (struct sockaddr *)&la->sa)); 2738 TAILQ_REMOVE(conf->listen_addrs, la, 2739 entry); 2740 close(la->fd); 2741 free(la); 2742 } 2743 } 2744 2745 /* add new listeners */ 2746 while ((la = TAILQ_FIRST(nconf->listen_addrs)) != 2747 NULL) { 2748 TAILQ_REMOVE(nconf->listen_addrs, la, entry); 2749 TAILQ_INSERT_TAIL(conf->listen_addrs, la, 2750 entry); 2751 } 2752 2753 setup_listeners(listener_cnt); 2754 free(nconf->listen_addrs); 2755 free(nconf); 2756 nconf = NULL; 2757 pending_reconf = 0; 2758 log_info("SE reconfigured"); 2759 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 2760 -1, NULL, 0); 2761 break; 2762 case IMSG_IFINFO: 2763 if (idx != PFD_PIPE_MAIN) 2764 fatalx("IFINFO message not from parent"); 2765 if (imsg.hdr.len != IMSG_HEADER_SIZE + 2766 sizeof(struct kif)) 2767 fatalx("IFINFO imsg with wrong len"); 2768 kif = imsg.data; 2769 depend_ok = (kif->flags & IFF_UP) && 2770 LINK_STATE_IS_UP(kif->link_state); 2771 2772 for (p = peers; p != NULL; p = p->next) 2773 if (!strcmp(p->conf.if_depend, kif->ifname)) { 2774 if (depend_ok && !p->depend_ok) { 2775 p->depend_ok = depend_ok; 2776 bgp_fsm(p, EVNT_START); 2777 } else if (!depend_ok && p->depend_ok) { 2778 p->depend_ok = depend_ok; 2779 session_stop(p, 2780 ERR_CEASE_OTHER_CHANGE); 2781 } 2782 } 2783 break; 2784 case IMSG_MRT_OPEN: 2785 case IMSG_MRT_REOPEN: 2786 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2787 sizeof(struct mrt)) { 2788 log_warnx("wrong imsg len"); 2789 break; 2790 } 2791 2792 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2793 if ((xmrt.wbuf.fd = imsg.fd) == -1) 2794 log_warnx("expected to receive fd for mrt dump " 2795 "but didn't receive any"); 2796 2797 mrt = mrt_get(&mrthead, &xmrt); 2798 if (mrt == NULL) { 2799 /* new dump */ 2800 mrt = calloc(1, sizeof(struct mrt)); 2801 if (mrt == NULL) 2802 fatal("session_dispatch_imsg"); 2803 memcpy(mrt, &xmrt, sizeof(struct mrt)); 2804 TAILQ_INIT(&mrt->wbuf.bufs); 2805 LIST_INSERT_HEAD(&mrthead, mrt, entry); 2806 } else { 2807 /* old dump reopened */ 2808 close(mrt->wbuf.fd); 2809 mrt->wbuf.fd = xmrt.wbuf.fd; 2810 } 2811 break; 2812 case IMSG_MRT_CLOSE: 2813 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2814 sizeof(struct mrt)) { 2815 log_warnx("wrong imsg len"); 2816 break; 2817 } 2818 2819 memcpy(&xmrt, imsg.data, sizeof(struct mrt)); 2820 mrt = mrt_get(&mrthead, &xmrt); 2821 if (mrt != NULL) 2822 mrt_done(mrt); 2823 break; 2824 case IMSG_CTL_KROUTE: 2825 case IMSG_CTL_KROUTE_ADDR: 2826 case IMSG_CTL_SHOW_NEXTHOP: 2827 case IMSG_CTL_SHOW_INTERFACE: 2828 case IMSG_CTL_SHOW_FIB_TABLES: 2829 if (idx != PFD_PIPE_MAIN) 2830 fatalx("ctl kroute request not from parent"); 2831 control_imsg_relay(&imsg); 2832 break; 2833 case IMSG_CTL_SHOW_RIB: 2834 case IMSG_CTL_SHOW_RIB_PREFIX: 2835 case IMSG_CTL_SHOW_RIB_ATTR: 2836 case IMSG_CTL_SHOW_RIB_MEM: 2837 case IMSG_CTL_SHOW_NETWORK: 2838 case IMSG_CTL_SHOW_NEIGHBOR: 2839 if (idx != PFD_PIPE_ROUTE_CTL) 2840 fatalx("ctl rib request not from RDE"); 2841 control_imsg_relay(&imsg); 2842 break; 2843 case IMSG_CTL_END: 2844 case IMSG_CTL_RESULT: 2845 control_imsg_relay(&imsg); 2846 break; 2847 case IMSG_UPDATE: 2848 if (idx != PFD_PIPE_ROUTE) 2849 fatalx("update request not from RDE"); 2850 if (imsg.hdr.len > IMSG_HEADER_SIZE + 2851 MAX_PKTSIZE - MSGSIZE_HEADER || 2852 imsg.hdr.len < IMSG_HEADER_SIZE + 2853 MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 2854 log_warnx("RDE sent invalid update"); 2855 else 2856 session_update(imsg.hdr.peerid, imsg.data, 2857 imsg.hdr.len - IMSG_HEADER_SIZE); 2858 break; 2859 case IMSG_UPDATE_ERR: 2860 if (idx != PFD_PIPE_ROUTE) 2861 fatalx("update request not from RDE"); 2862 if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) { 2863 log_warnx("RDE sent invalid notification"); 2864 break; 2865 } 2866 if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { 2867 log_warnx("no such peer: id=%u", 2868 imsg.hdr.peerid); 2869 break; 2870 } 2871 data = imsg.data; 2872 errcode = *data++; 2873 subcode = *data++; 2874 2875 if (imsg.hdr.len == IMSG_HEADER_SIZE + 2) 2876 data = NULL; 2877 2878 session_notification(p, errcode, subcode, 2879 data, imsg.hdr.len - IMSG_HEADER_SIZE - 2); 2880 switch (errcode) { 2881 case ERR_CEASE: 2882 switch (subcode) { 2883 case ERR_CEASE_MAX_PREFIX: 2884 bgp_fsm(p, EVNT_STOP); 2885 if (p->conf.max_prefix_restart) 2886 timer_set(p, Timer_IdleHold, 60 * 2887 p->conf.max_prefix_restart); 2888 break; 2889 default: 2890 bgp_fsm(p, EVNT_CON_FATAL); 2891 break; 2892 } 2893 break; 2894 default: 2895 bgp_fsm(p, EVNT_CON_FATAL); 2896 break; 2897 } 2898 break; 2899 case IMSG_SESSION_RESTARTED: 2900 if (idx != PFD_PIPE_ROUTE) 2901 fatalx("update request not from RDE"); 2902 if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { 2903 log_warnx("RDE sent invalid restart msg"); 2904 break; 2905 } 2906 if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { 2907 log_warnx("no such peer: id=%u", 2908 imsg.hdr.peerid); 2909 break; 2910 } 2911 memcpy(&aid, imsg.data, sizeof(aid)); 2912 if (aid >= AID_MAX) 2913 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 2914 if (p->capa.neg.grestart.flags[aid] & 2915 CAPA_GR_RESTARTING) { 2916 log_peer_warnx(&p->conf, 2917 "graceful restart of %s finished", 2918 aid2str(aid)); 2919 p->capa.neg.grestart.flags[aid] &= 2920 ~CAPA_GR_RESTARTING; 2921 timer_stop(p, Timer_RestartTimeout); 2922 2923 /* signal back to RDE to cleanup stale routes */ 2924 if (imsg_compose(ibuf_rde, 2925 IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0, 2926 -1, &aid, sizeof(aid)) == -1) 2927 fatal("imsg_compose: " 2928 "IMSG_SESSION_RESTARTED"); 2929 } 2930 break; 2931 default: 2932 break; 2933 } 2934 imsg_free(&imsg); 2935 } 2936 } 2937 2938 int 2939 la_cmp(struct listen_addr *a, struct listen_addr *b) 2940 { 2941 struct sockaddr_in *in_a, *in_b; 2942 struct sockaddr_in6 *in6_a, *in6_b; 2943 2944 if (a->sa.ss_family != b->sa.ss_family) 2945 return (1); 2946 2947 switch (a->sa.ss_family) { 2948 case AF_INET: 2949 in_a = (struct sockaddr_in *)&a->sa; 2950 in_b = (struct sockaddr_in *)&b->sa; 2951 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 2952 return (1); 2953 if (in_a->sin_port != in_b->sin_port) 2954 return (1); 2955 break; 2956 case AF_INET6: 2957 in6_a = (struct sockaddr_in6 *)&a->sa; 2958 in6_b = (struct sockaddr_in6 *)&b->sa; 2959 if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 2960 sizeof(struct in6_addr))) 2961 return (1); 2962 if (in6_a->sin6_port != in6_b->sin6_port) 2963 return (1); 2964 break; 2965 default: 2966 fatal("king bula sez: unknown address family"); 2967 /* NOTREACHED */ 2968 } 2969 2970 return (0); 2971 } 2972 2973 struct peer * 2974 getpeerbyaddr(struct bgpd_addr *addr) 2975 { 2976 struct peer *p; 2977 2978 /* we might want a more effective way to find peers by IP */ 2979 for (p = peers; p != NULL && 2980 memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr)); 2981 p = p->next) 2982 ; /* nothing */ 2983 2984 return (p); 2985 } 2986 2987 struct peer * 2988 getpeerbydesc(const char *descr) 2989 { 2990 struct peer *p, *res = NULL; 2991 int match = 0; 2992 2993 for (p = peers; p != NULL; p = p->next) 2994 if (!strcmp(p->conf.descr, descr)) { 2995 res = p; 2996 match++; 2997 } 2998 2999 if (match > 1) 3000 log_info("neighbor description \"%s\" not unique, request " 3001 "aborted", descr); 3002 3003 if (match == 1) 3004 return (res); 3005 else 3006 return (NULL); 3007 } 3008 3009 struct peer * 3010 getpeerbyip(struct sockaddr *ip) 3011 { 3012 struct bgpd_addr addr; 3013 struct peer *p, *newpeer, *loose = NULL; 3014 u_int32_t id; 3015 3016 sa2addr(ip, &addr); 3017 3018 /* we might want a more effective way to find peers by IP */ 3019 for (p = peers; p != NULL; p = p->next) 3020 if (!p->conf.template && 3021 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3022 return (p); 3023 3024 /* try template matching */ 3025 for (p = peers; p != NULL; p = p->next) 3026 if (p->conf.template && 3027 p->conf.remote_addr.aid == addr.aid && 3028 session_match_mask(p, &addr)) 3029 if (loose == NULL || loose->conf.remote_masklen < 3030 p->conf.remote_masklen) 3031 loose = p; 3032 3033 if (loose != NULL) { 3034 /* clone */ 3035 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3036 fatal(NULL); 3037 memcpy(newpeer, loose, sizeof(struct peer)); 3038 for (id = UINT_MAX; id > UINT_MAX / 2; id--) { 3039 for (p = peers; p != NULL && p->conf.id != id; 3040 p = p->next) 3041 ; /* nothing */ 3042 if (p == NULL) { /* we found a free id */ 3043 break; 3044 } 3045 } 3046 newpeer->template = loose; 3047 session_template_clone(newpeer, ip, id, 0); 3048 newpeer->state = newpeer->prev_state = STATE_NONE; 3049 newpeer->conf.reconf_action = RECONF_KEEP; 3050 newpeer->rbuf = NULL; 3051 init_peer(newpeer); 3052 bgp_fsm(newpeer, EVNT_START); 3053 newpeer->next = peers; 3054 peers = newpeer; 3055 return (newpeer); 3056 } 3057 3058 return (NULL); 3059 } 3060 3061 void 3062 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id, 3063 u_int32_t as) 3064 { 3065 struct bgpd_addr remote_addr; 3066 3067 if (ip) 3068 sa2addr(ip, &remote_addr); 3069 else 3070 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3071 3072 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3073 3074 p->conf.id = id; 3075 3076 if (as) { 3077 p->conf.remote_as = as; 3078 p->conf.ebgp = (p->conf.remote_as != conf->as); 3079 if (!p->conf.ebgp) 3080 /* force enforce_as off for iBGP sessions */ 3081 p->conf.enforce_as = ENFORCE_AS_OFF; 3082 } 3083 3084 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3085 switch (p->conf.remote_addr.aid) { 3086 case AID_INET: 3087 p->conf.remote_masklen = 32; 3088 break; 3089 case AID_INET6: 3090 p->conf.remote_masklen = 128; 3091 break; 3092 } 3093 p->conf.template = 0; 3094 } 3095 3096 int 3097 session_match_mask(struct peer *p, struct bgpd_addr *a) 3098 { 3099 in_addr_t v4mask; 3100 struct in6_addr masked; 3101 3102 switch (p->conf.remote_addr.aid) { 3103 case AID_INET: 3104 v4mask = htonl(prefixlen2mask(p->conf.remote_masklen)); 3105 if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask)) 3106 return (1); 3107 return (0); 3108 case AID_INET6: 3109 inet6applymask(&masked, &a->v6, p->conf.remote_masklen); 3110 3111 if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked))) 3112 return (1); 3113 return (0); 3114 } 3115 return (0); 3116 } 3117 3118 struct peer * 3119 getpeerbyid(u_int32_t peerid) 3120 { 3121 struct peer *p; 3122 3123 /* we might want a more effective way to find peers by IP */ 3124 for (p = peers; p != NULL && 3125 p->conf.id != peerid; p = p->next) 3126 ; /* nothing */ 3127 3128 return (p); 3129 } 3130 3131 void 3132 session_down(struct peer *peer) 3133 { 3134 bzero(&peer->capa.neg, sizeof(peer->capa.neg)); 3135 peer->stats.last_updown = time(NULL); 3136 if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1, 3137 NULL, 0) == -1) 3138 fatalx("imsg_compose error"); 3139 } 3140 3141 void 3142 session_up(struct peer *p) 3143 { 3144 struct session_up sup; 3145 3146 if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1, 3147 &p->conf, sizeof(p->conf)) == -1) 3148 fatalx("imsg_compose error"); 3149 3150 sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr); 3151 sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr); 3152 3153 sup.remote_bgpid = p->remote_bgpid; 3154 sup.short_as = p->short_as; 3155 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3156 p->stats.last_updown = time(NULL); 3157 if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1, 3158 &sup, sizeof(sup)) == -1) 3159 fatalx("imsg_compose error"); 3160 } 3161 3162 int 3163 imsg_compose_parent(int type, u_int32_t peerid, pid_t pid, void *data, 3164 u_int16_t datalen) 3165 { 3166 return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen)); 3167 } 3168 3169 int 3170 imsg_compose_rde(int type, pid_t pid, void *data, u_int16_t datalen) 3171 { 3172 return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen)); 3173 } 3174 3175 void 3176 session_demote(struct peer *p, int level) 3177 { 3178 struct demote_msg msg; 3179 3180 strlcpy(msg.demote_group, p->conf.demote_group, 3181 sizeof(msg.demote_group)); 3182 msg.level = level; 3183 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3184 &msg, sizeof(msg)) == -1) 3185 fatalx("imsg_compose error"); 3186 3187 p->demoted += level; 3188 } 3189 3190 void 3191 session_stop(struct peer *peer, u_int8_t subcode) 3192 { 3193 switch (peer->state) { 3194 case STATE_OPENSENT: 3195 case STATE_OPENCONFIRM: 3196 case STATE_ESTABLISHED: 3197 session_notification(peer, ERR_CEASE, subcode, NULL, 0); 3198 break; 3199 default: 3200 /* session not open, no need to send notification */ 3201 break; 3202 } 3203 bgp_fsm(peer, EVNT_STOP); 3204 } 3205