xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 505ee9ea3b177e2387d907a91ca7da069f3f14d8)
1 /*	$OpenBSD: session.c,v 1.402 2020/06/27 07:24:42 bket Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <ifaddrs.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <syslog.h>
44 #include <unistd.h>
45 
46 #include "bgpd.h"
47 #include "session.h"
48 #include "log.h"
49 
50 #define PFD_PIPE_MAIN		0
51 #define PFD_PIPE_ROUTE		1
52 #define PFD_PIPE_ROUTE_CTL	2
53 #define PFD_SOCK_CTL		3
54 #define PFD_SOCK_RCTL		4
55 #define PFD_LISTENERS_START	5
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_peer(struct peer *);
60 void	start_timer_holdtime(struct peer *);
61 void	start_timer_keepalive(struct peer *);
62 void	session_close_connection(struct peer *);
63 void	change_state(struct peer *, enum session_state, enum session_events);
64 int	session_setup_socket(struct peer *);
65 void	session_accept(int);
66 int	session_connect(struct peer *);
67 void	session_tcp_established(struct peer *);
68 void	session_capa_ann_none(struct peer *);
69 int	session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
70 int	session_capa_add_mp(struct ibuf *, u_int8_t);
71 int	session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(u_int32_t, void *, size_t);
77 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
78 	    ssize_t);
79 void	session_rrefresh(struct peer *, u_int8_t);
80 int	session_graceful_restart(struct peer *);
81 int	session_graceful_stop(struct peer *);
82 int	session_dispatch_msg(struct pollfd *, struct peer *);
83 void	session_process_msg(struct peer *);
84 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
85 int	parse_open(struct peer *);
86 int	parse_update(struct peer *);
87 int	parse_refresh(struct peer *);
88 int	parse_notification(struct peer *);
89 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
90 int	capa_neg_calc(struct peer *);
91 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
92 void	session_up(struct peer *);
93 void	session_down(struct peer *);
94 int	imsg_rde(int, u_int32_t, void *, u_int16_t);
95 void	session_demote(struct peer *, int);
96 void	merge_peers(struct bgpd_config *, struct bgpd_config *);
97 
98 int		 la_cmp(struct listen_addr *, struct listen_addr *);
99 void		 session_template_clone(struct peer *, struct sockaddr *,
100 		    u_int32_t, u_int32_t);
101 int		 session_match_mask(struct peer *, struct bgpd_addr *);
102 
103 struct bgpd_config	*conf, *nconf;
104 struct bgpd_sysdep	 sysdep;
105 volatile sig_atomic_t	 session_quit;
106 int			 pending_reconf;
107 int			 csock = -1, rcsock = -1;
108 u_int			 peer_cnt;
109 struct imsgbuf		*ibuf_rde;
110 struct imsgbuf		*ibuf_rde_ctl;
111 struct imsgbuf		*ibuf_main;
112 
113 struct mrt_head		 mrthead;
114 time_t			 pauseaccept;
115 
116 static inline int
117 peer_compare(const struct peer *a, const struct peer *b)
118 {
119 	return a->conf.id - b->conf.id;
120 }
121 
122 RB_GENERATE(peer_head, peer, entry, peer_compare);
123 
124 void
125 session_sighdlr(int sig)
126 {
127 	switch (sig) {
128 	case SIGINT:
129 	case SIGTERM:
130 		session_quit = 1;
131 		break;
132 	}
133 }
134 
135 int
136 setup_listeners(u_int *la_cnt)
137 {
138 	int			 ttl = 255;
139 	struct listen_addr	*la;
140 	u_int			 cnt = 0;
141 
142 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
143 		la->reconf = RECONF_NONE;
144 		cnt++;
145 
146 		if (la->flags & LISTENER_LISTENING)
147 			continue;
148 
149 		if (la->fd == -1) {
150 			log_warn("cannot establish listener on %s: invalid fd",
151 			    log_sockaddr((struct sockaddr *)&la->sa,
152 			    la->sa_len));
153 			continue;
154 		}
155 
156 		if (tcp_md5_prep_listener(la, &conf->peers) == -1)
157 			fatal("tcp_md5_prep_listener");
158 
159 		/* set ttl to 255 so that ttl-security works */
160 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
161 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
162 			log_warn("setup_listeners setsockopt TTL");
163 			continue;
164 		}
165 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
166 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
167 			log_warn("setup_listeners setsockopt hoplimit");
168 			continue;
169 		}
170 
171 		if (listen(la->fd, MAX_BACKLOG)) {
172 			close(la->fd);
173 			fatal("listen");
174 		}
175 
176 		la->flags |= LISTENER_LISTENING;
177 
178 		log_info("listening on %s",
179 		    log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
180 	}
181 
182 	*la_cnt = cnt;
183 
184 	return (0);
185 }
186 
187 void
188 session_main(int debug, int verbose)
189 {
190 	int			 timeout;
191 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
192 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
193 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
194 	u_int			 new_cnt;
195 	struct passwd		*pw;
196 	struct peer		*p, **peer_l = NULL, *next;
197 	struct mrt		*m, *xm, **mrt_l = NULL;
198 	struct pollfd		*pfd = NULL;
199 	struct ctl_conn		*ctl_conn;
200 	struct listen_addr	*la;
201 	void			*newp;
202 	time_t			 now;
203 	short			 events;
204 
205 	log_init(debug, LOG_DAEMON);
206 	log_setverbose(verbose);
207 
208 	bgpd_process = PROC_SE;
209 	log_procinit(log_procnames[bgpd_process]);
210 
211 	if ((pw = getpwnam(BGPD_USER)) == NULL)
212 		fatal(NULL);
213 
214 	if (chroot(pw->pw_dir) == -1)
215 		fatal("chroot");
216 	if (chdir("/") == -1)
217 		fatal("chdir(\"/\")");
218 
219 	setproctitle("session engine");
220 
221 	if (setgroups(1, &pw->pw_gid) ||
222 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
223 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
224 		fatal("can't drop privileges");
225 
226 	if (pledge("stdio inet recvfd", NULL) == -1)
227 		fatal("pledge");
228 
229 	signal(SIGTERM, session_sighdlr);
230 	signal(SIGINT, session_sighdlr);
231 	signal(SIGPIPE, SIG_IGN);
232 	signal(SIGHUP, SIG_IGN);
233 	signal(SIGALRM, SIG_IGN);
234 	signal(SIGUSR1, SIG_IGN);
235 
236 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
237 		fatal(NULL);
238 	imsg_init(ibuf_main, 3);
239 
240 	TAILQ_INIT(&ctl_conns);
241 	LIST_INIT(&mrthead);
242 	listener_cnt = 0;
243 	peer_cnt = 0;
244 	ctl_cnt = 0;
245 
246 	conf = new_config();
247 	log_info("session engine ready");
248 
249 	while (session_quit == 0) {
250 		/* check for peers to be initialized or deleted */
251 		if (!pending_reconf) {
252 			RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
253 				/* cloned peer that idled out? */
254 				if (p->template && (p->state == STATE_IDLE ||
255 				    p->state == STATE_ACTIVE) &&
256 				    getmonotime() - p->stats.last_updown >=
257 				    INTERVAL_HOLD_CLONED)
258 					p->reconf_action = RECONF_DELETE;
259 
260 				/* new peer that needs init? */
261 				if (p->state == STATE_NONE)
262 					init_peer(p);
263 
264 				/* reinit due? */
265 				if (p->reconf_action == RECONF_REINIT) {
266 					session_stop(p, ERR_CEASE_ADMIN_RESET);
267 					if (!p->conf.down)
268 						timer_set(p, Timer_IdleHold, 0);
269 				}
270 
271 				/* deletion due? */
272 				if (p->reconf_action == RECONF_DELETE) {
273 					if (p->demoted)
274 						session_demote(p, -1);
275 					p->conf.demote_group[0] = 0;
276 					session_stop(p, ERR_CEASE_PEER_UNCONF);
277 					timer_remove_all(p);
278 					tcp_md5_del_listener(conf, p);
279 					log_peer_warnx(&p->conf, "removed");
280 					RB_REMOVE(peer_head, &conf->peers, p);
281 					free(p);
282 					peer_cnt--;
283 					continue;
284 				}
285 				p->reconf_action = RECONF_NONE;
286 			}
287 		}
288 
289 		if (peer_cnt > peer_l_elms) {
290 			if ((newp = reallocarray(peer_l, peer_cnt,
291 			    sizeof(struct peer *))) == NULL) {
292 				/* panic for now  */
293 				log_warn("could not resize peer_l from %u -> %u"
294 				    " entries", peer_l_elms, peer_cnt);
295 				fatalx("exiting");
296 			}
297 			peer_l = newp;
298 			peer_l_elms = peer_cnt;
299 		}
300 
301 		mrt_cnt = 0;
302 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
303 			xm = LIST_NEXT(m, entry);
304 			if (m->state == MRT_STATE_REMOVE) {
305 				mrt_clean(m);
306 				LIST_REMOVE(m, entry);
307 				free(m);
308 				continue;
309 			}
310 			if (m->wbuf.queued)
311 				mrt_cnt++;
312 		}
313 
314 		if (mrt_cnt > mrt_l_elms) {
315 			if ((newp = reallocarray(mrt_l, mrt_cnt,
316 			    sizeof(struct mrt *))) == NULL) {
317 				/* panic for now  */
318 				log_warn("could not resize mrt_l from %u -> %u"
319 				    " entries", mrt_l_elms, mrt_cnt);
320 				fatalx("exiting");
321 			}
322 			mrt_l = newp;
323 			mrt_l_elms = mrt_cnt;
324 		}
325 
326 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
327 		    ctl_cnt + mrt_cnt;
328 		if (new_cnt > pfd_elms) {
329 			if ((newp = reallocarray(pfd, new_cnt,
330 			    sizeof(struct pollfd))) == NULL) {
331 				/* panic for now  */
332 				log_warn("could not resize pfd from %u -> %u"
333 				    " entries", pfd_elms, new_cnt);
334 				fatalx("exiting");
335 			}
336 			pfd = newp;
337 			pfd_elms = new_cnt;
338 		}
339 
340 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
341 
342 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
343 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
344 		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
345 
346 		if (pauseaccept == 0) {
347 			pfd[PFD_SOCK_CTL].fd = csock;
348 			pfd[PFD_SOCK_CTL].events = POLLIN;
349 			pfd[PFD_SOCK_RCTL].fd = rcsock;
350 			pfd[PFD_SOCK_RCTL].events = POLLIN;
351 		} else {
352 			pfd[PFD_SOCK_CTL].fd = -1;
353 			pfd[PFD_SOCK_RCTL].fd = -1;
354 		}
355 
356 		i = PFD_LISTENERS_START;
357 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
358 			if (pauseaccept == 0) {
359 				pfd[i].fd = la->fd;
360 				pfd[i].events = POLLIN;
361 			} else
362 				pfd[i].fd = -1;
363 			i++;
364 		}
365 		idx_listeners = i;
366 		timeout = 240;	/* loop every 240s at least */
367 
368 		now = getmonotime();
369 		RB_FOREACH(p, peer_head, &conf->peers) {
370 			time_t	nextaction;
371 			struct peer_timer *pt;
372 
373 			/* check timers */
374 			if ((pt = timer_nextisdue(p, now)) != NULL) {
375 				switch (pt->type) {
376 				case Timer_Hold:
377 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
378 					break;
379 				case Timer_ConnectRetry:
380 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
381 					break;
382 				case Timer_Keepalive:
383 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
384 					break;
385 				case Timer_IdleHold:
386 					bgp_fsm(p, EVNT_START);
387 					break;
388 				case Timer_IdleHoldReset:
389 					p->IdleHoldTime =
390 					    INTERVAL_IDLE_HOLD_INITIAL;
391 					p->errcnt = 0;
392 					timer_stop(p, Timer_IdleHoldReset);
393 					break;
394 				case Timer_CarpUndemote:
395 					timer_stop(p, Timer_CarpUndemote);
396 					if (p->demoted &&
397 					    p->state == STATE_ESTABLISHED)
398 						session_demote(p, -1);
399 					break;
400 				case Timer_RestartTimeout:
401 					timer_stop(p, Timer_RestartTimeout);
402 					session_graceful_stop(p);
403 					break;
404 				default:
405 					fatalx("King Bula lost in time");
406 				}
407 			}
408 			if ((nextaction = timer_nextduein(p, now)) != -1 &&
409 			    nextaction < timeout)
410 				timeout = nextaction;
411 
412 			/* are we waiting for a write? */
413 			events = POLLIN;
414 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
415 				events |= POLLOUT;
416 			/* is there still work to do? */
417 			if (p->rpending && p->rbuf && p->rbuf->wpos)
418 				timeout = 0;
419 
420 			/* poll events */
421 			if (p->fd != -1 && events != 0) {
422 				pfd[i].fd = p->fd;
423 				pfd[i].events = events;
424 				peer_l[i - idx_listeners] = p;
425 				i++;
426 			}
427 		}
428 
429 		idx_peers = i;
430 
431 		LIST_FOREACH(m, &mrthead, entry)
432 			if (m->wbuf.queued) {
433 				pfd[i].fd = m->wbuf.fd;
434 				pfd[i].events = POLLOUT;
435 				mrt_l[i - idx_peers] = m;
436 				i++;
437 			}
438 
439 		idx_mrts = i;
440 
441 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
442 			pfd[i].fd = ctl_conn->ibuf.fd;
443 			pfd[i].events = POLLIN;
444 			if (ctl_conn->ibuf.w.queued > 0)
445 				pfd[i].events |= POLLOUT;
446 			i++;
447 		}
448 
449 		if (pauseaccept && timeout > 1)
450 			timeout = 1;
451 		if (timeout < 0)
452 			timeout = 0;
453 		if (poll(pfd, i, timeout * 1000) == -1)
454 			if (errno != EINTR)
455 				fatal("poll error");
456 
457 		/*
458 		 * If we previously saw fd exhaustion, we stop accept()
459 		 * for 1 second to throttle the accept() loop.
460 		 */
461 		if (pauseaccept && getmonotime() > pauseaccept + 1)
462 			pauseaccept = 0;
463 
464 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
465 			log_warnx("SE: Lost connection to parent");
466 			session_quit = 1;
467 			continue;
468 		} else
469 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
470 			    &listener_cnt);
471 
472 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
473 			log_warnx("SE: Lost connection to RDE");
474 			msgbuf_clear(&ibuf_rde->w);
475 			free(ibuf_rde);
476 			ibuf_rde = NULL;
477 		} else
478 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
479 			    &listener_cnt);
480 
481 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
482 		    -1) {
483 			log_warnx("SE: Lost connection to RDE control");
484 			msgbuf_clear(&ibuf_rde_ctl->w);
485 			free(ibuf_rde_ctl);
486 			ibuf_rde_ctl = NULL;
487 		} else
488 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
489 			    &listener_cnt);
490 
491 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
492 			ctl_cnt += control_accept(csock, 0);
493 
494 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
495 			ctl_cnt += control_accept(rcsock, 1);
496 
497 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
498 			if (pfd[j].revents & POLLIN)
499 				session_accept(pfd[j].fd);
500 
501 		for (; j < idx_peers; j++)
502 			session_dispatch_msg(&pfd[j],
503 			    peer_l[j - idx_listeners]);
504 
505 		RB_FOREACH(p, peer_head, &conf->peers)
506 			if (p->rbuf && p->rbuf->wpos)
507 				session_process_msg(p);
508 
509 		for (; j < idx_mrts; j++)
510 			if (pfd[j].revents & POLLOUT)
511 				mrt_write(mrt_l[j - idx_peers]);
512 
513 		for (; j < i; j++)
514 			control_dispatch_msg(&pfd[j], &ctl_cnt, &conf->peers);
515 	}
516 
517 	RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
518 		RB_REMOVE(peer_head, &conf->peers, p);
519 		strlcpy(p->conf.reason,
520 		    "bgpd shutting down",
521 		    sizeof(p->conf.reason));
522 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
523 		timer_remove_all(p);
524 		free(p);
525 	}
526 
527 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
528 		mrt_clean(m);
529 		LIST_REMOVE(m, entry);
530 		free(m);
531 	}
532 
533 	free_config(conf);
534 	free(peer_l);
535 	free(mrt_l);
536 	free(pfd);
537 
538 	/* close pipes */
539 	if (ibuf_rde) {
540 		msgbuf_write(&ibuf_rde->w);
541 		msgbuf_clear(&ibuf_rde->w);
542 		close(ibuf_rde->fd);
543 		free(ibuf_rde);
544 	}
545 	if (ibuf_rde_ctl) {
546 		msgbuf_clear(&ibuf_rde_ctl->w);
547 		close(ibuf_rde_ctl->fd);
548 		free(ibuf_rde_ctl);
549 	}
550 	msgbuf_write(&ibuf_main->w);
551 	msgbuf_clear(&ibuf_main->w);
552 	close(ibuf_main->fd);
553 	free(ibuf_main);
554 
555 	control_shutdown(csock);
556 	control_shutdown(rcsock);
557 	log_info("session engine exiting");
558 	exit(0);
559 }
560 
561 void
562 init_peer(struct peer *p)
563 {
564 	TAILQ_INIT(&p->timers);
565 	p->fd = p->wbuf.fd = -1;
566 
567 	if (p->conf.if_depend[0])
568 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
569 		    p->conf.if_depend, sizeof(p->conf.if_depend));
570 	else
571 		p->depend_ok = 1;
572 
573 	peer_cnt++;
574 
575 	change_state(p, STATE_IDLE, EVNT_NONE);
576 	if (p->conf.down)
577 		timer_stop(p, Timer_IdleHold);		/* no autostart */
578 	else
579 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
580 
581 	/*
582 	 * on startup, demote if requested.
583 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
584 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
585 	 */
586 	if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
587 		session_demote(p, +1);
588 }
589 
590 void
591 bgp_fsm(struct peer *peer, enum session_events event)
592 {
593 	switch (peer->state) {
594 	case STATE_NONE:
595 		/* nothing */
596 		break;
597 	case STATE_IDLE:
598 		switch (event) {
599 		case EVNT_START:
600 			timer_stop(peer, Timer_Hold);
601 			timer_stop(peer, Timer_Keepalive);
602 			timer_stop(peer, Timer_IdleHold);
603 
604 			/* allocate read buffer */
605 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
606 			if (peer->rbuf == NULL)
607 				fatal(NULL);
608 
609 			/* init write buffer */
610 			msgbuf_init(&peer->wbuf);
611 
612 			peer->stats.last_sent_errcode = 0;
613 			peer->stats.last_sent_suberr = 0;
614 			peer->stats.last_rcvd_errcode = 0;
615 			peer->stats.last_rcvd_suberr = 0;
616 
617 			if (!peer->depend_ok)
618 				timer_stop(peer, Timer_ConnectRetry);
619 			else if (peer->passive || peer->conf.passive ||
620 			    peer->conf.template) {
621 				change_state(peer, STATE_ACTIVE, event);
622 				timer_stop(peer, Timer_ConnectRetry);
623 			} else {
624 				change_state(peer, STATE_CONNECT, event);
625 				timer_set(peer, Timer_ConnectRetry,
626 				    conf->connectretry);
627 				session_connect(peer);
628 			}
629 			peer->passive = 0;
630 			break;
631 		default:
632 			/* ignore */
633 			break;
634 		}
635 		break;
636 	case STATE_CONNECT:
637 		switch (event) {
638 		case EVNT_START:
639 			/* ignore */
640 			break;
641 		case EVNT_CON_OPEN:
642 			session_tcp_established(peer);
643 			session_open(peer);
644 			timer_stop(peer, Timer_ConnectRetry);
645 			peer->holdtime = INTERVAL_HOLD_INITIAL;
646 			start_timer_holdtime(peer);
647 			change_state(peer, STATE_OPENSENT, event);
648 			break;
649 		case EVNT_CON_OPENFAIL:
650 			timer_set(peer, Timer_ConnectRetry,
651 			    conf->connectretry);
652 			session_close_connection(peer);
653 			change_state(peer, STATE_ACTIVE, event);
654 			break;
655 		case EVNT_TIMER_CONNRETRY:
656 			timer_set(peer, Timer_ConnectRetry,
657 			    conf->connectretry);
658 			session_connect(peer);
659 			break;
660 		default:
661 			change_state(peer, STATE_IDLE, event);
662 			break;
663 		}
664 		break;
665 	case STATE_ACTIVE:
666 		switch (event) {
667 		case EVNT_START:
668 			/* ignore */
669 			break;
670 		case EVNT_CON_OPEN:
671 			session_tcp_established(peer);
672 			session_open(peer);
673 			timer_stop(peer, Timer_ConnectRetry);
674 			peer->holdtime = INTERVAL_HOLD_INITIAL;
675 			start_timer_holdtime(peer);
676 			change_state(peer, STATE_OPENSENT, event);
677 			break;
678 		case EVNT_CON_OPENFAIL:
679 			timer_set(peer, Timer_ConnectRetry,
680 			    conf->connectretry);
681 			session_close_connection(peer);
682 			change_state(peer, STATE_ACTIVE, event);
683 			break;
684 		case EVNT_TIMER_CONNRETRY:
685 			timer_set(peer, Timer_ConnectRetry,
686 			    peer->holdtime);
687 			change_state(peer, STATE_CONNECT, event);
688 			session_connect(peer);
689 			break;
690 		default:
691 			change_state(peer, STATE_IDLE, event);
692 			break;
693 		}
694 		break;
695 	case STATE_OPENSENT:
696 		switch (event) {
697 		case EVNT_START:
698 			/* ignore */
699 			break;
700 		case EVNT_STOP:
701 			change_state(peer, STATE_IDLE, event);
702 			break;
703 		case EVNT_CON_CLOSED:
704 			session_close_connection(peer);
705 			timer_set(peer, Timer_ConnectRetry,
706 			    conf->connectretry);
707 			change_state(peer, STATE_ACTIVE, event);
708 			break;
709 		case EVNT_CON_FATAL:
710 			change_state(peer, STATE_IDLE, event);
711 			break;
712 		case EVNT_TIMER_HOLDTIME:
713 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
714 			    0, NULL, 0);
715 			change_state(peer, STATE_IDLE, event);
716 			break;
717 		case EVNT_RCVD_OPEN:
718 			/* parse_open calls change_state itself on failure */
719 			if (parse_open(peer))
720 				break;
721 			session_keepalive(peer);
722 			change_state(peer, STATE_OPENCONFIRM, event);
723 			break;
724 		case EVNT_RCVD_NOTIFICATION:
725 			if (parse_notification(peer)) {
726 				change_state(peer, STATE_IDLE, event);
727 				/* don't punish, capa negotiation */
728 				timer_set(peer, Timer_IdleHold, 0);
729 				peer->IdleHoldTime /= 2;
730 			} else
731 				change_state(peer, STATE_IDLE, event);
732 			break;
733 		default:
734 			session_notification(peer,
735 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
736 			change_state(peer, STATE_IDLE, event);
737 			break;
738 		}
739 		break;
740 	case STATE_OPENCONFIRM:
741 		switch (event) {
742 		case EVNT_START:
743 			/* ignore */
744 			break;
745 		case EVNT_STOP:
746 			change_state(peer, STATE_IDLE, event);
747 			break;
748 		case EVNT_CON_CLOSED:
749 		case EVNT_CON_FATAL:
750 			change_state(peer, STATE_IDLE, event);
751 			break;
752 		case EVNT_TIMER_HOLDTIME:
753 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
754 			    0, NULL, 0);
755 			change_state(peer, STATE_IDLE, event);
756 			break;
757 		case EVNT_TIMER_KEEPALIVE:
758 			session_keepalive(peer);
759 			break;
760 		case EVNT_RCVD_KEEPALIVE:
761 			start_timer_holdtime(peer);
762 			change_state(peer, STATE_ESTABLISHED, event);
763 			break;
764 		case EVNT_RCVD_NOTIFICATION:
765 			parse_notification(peer);
766 			change_state(peer, STATE_IDLE, event);
767 			break;
768 		default:
769 			session_notification(peer,
770 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
771 			change_state(peer, STATE_IDLE, event);
772 			break;
773 		}
774 		break;
775 	case STATE_ESTABLISHED:
776 		switch (event) {
777 		case EVNT_START:
778 			/* ignore */
779 			break;
780 		case EVNT_STOP:
781 			change_state(peer, STATE_IDLE, event);
782 			break;
783 		case EVNT_CON_CLOSED:
784 		case EVNT_CON_FATAL:
785 			change_state(peer, STATE_IDLE, event);
786 			break;
787 		case EVNT_TIMER_HOLDTIME:
788 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
789 			    0, NULL, 0);
790 			change_state(peer, STATE_IDLE, event);
791 			break;
792 		case EVNT_TIMER_KEEPALIVE:
793 			session_keepalive(peer);
794 			break;
795 		case EVNT_RCVD_KEEPALIVE:
796 			start_timer_holdtime(peer);
797 			break;
798 		case EVNT_RCVD_UPDATE:
799 			start_timer_holdtime(peer);
800 			if (parse_update(peer))
801 				change_state(peer, STATE_IDLE, event);
802 			else
803 				start_timer_holdtime(peer);
804 			break;
805 		case EVNT_RCVD_NOTIFICATION:
806 			parse_notification(peer);
807 			change_state(peer, STATE_IDLE, event);
808 			break;
809 		default:
810 			session_notification(peer,
811 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
812 			change_state(peer, STATE_IDLE, event);
813 			break;
814 		}
815 		break;
816 	}
817 }
818 
819 void
820 start_timer_holdtime(struct peer *peer)
821 {
822 	if (peer->holdtime > 0)
823 		timer_set(peer, Timer_Hold, peer->holdtime);
824 	else
825 		timer_stop(peer, Timer_Hold);
826 }
827 
828 void
829 start_timer_keepalive(struct peer *peer)
830 {
831 	if (peer->holdtime > 0)
832 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
833 	else
834 		timer_stop(peer, Timer_Keepalive);
835 }
836 
837 void
838 session_close_connection(struct peer *peer)
839 {
840 	if (peer->fd != -1) {
841 		close(peer->fd);
842 		pauseaccept = 0;
843 	}
844 	peer->fd = peer->wbuf.fd = -1;
845 }
846 
847 void
848 change_state(struct peer *peer, enum session_state state,
849     enum session_events event)
850 {
851 	struct mrt	*mrt;
852 
853 	switch (state) {
854 	case STATE_IDLE:
855 		/* carp demotion first. new peers handled in init_peer */
856 		if (peer->state == STATE_ESTABLISHED &&
857 		    peer->conf.demote_group[0] && !peer->demoted)
858 			session_demote(peer, +1);
859 
860 		/*
861 		 * try to write out what's buffered (maybe a notification),
862 		 * don't bother if it fails
863 		 */
864 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
865 			msgbuf_write(&peer->wbuf);
866 
867 		/*
868 		 * we must start the timer for the next EVNT_START
869 		 * if we are coming here due to an error and the
870 		 * session was not established successfully before, the
871 		 * starttimerinterval needs to be exponentially increased
872 		 */
873 		if (peer->IdleHoldTime == 0)
874 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
875 		peer->holdtime = INTERVAL_HOLD_INITIAL;
876 		timer_stop(peer, Timer_ConnectRetry);
877 		timer_stop(peer, Timer_Keepalive);
878 		timer_stop(peer, Timer_Hold);
879 		timer_stop(peer, Timer_IdleHold);
880 		timer_stop(peer, Timer_IdleHoldReset);
881 		session_close_connection(peer);
882 		msgbuf_clear(&peer->wbuf);
883 		free(peer->rbuf);
884 		peer->rbuf = NULL;
885 		peer->rpending = 0;
886 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
887 		if (!peer->template)
888 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
889 			    peer->conf.id, 0, -1, NULL, 0);
890 
891 		if (event != EVNT_STOP) {
892 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
893 			if (event != EVNT_NONE &&
894 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
895 				peer->IdleHoldTime *= 2;
896 		}
897 		if (peer->state == STATE_ESTABLISHED) {
898 			if (peer->capa.neg.grestart.restart == 2 &&
899 			    (event == EVNT_CON_CLOSED ||
900 			    event == EVNT_CON_FATAL)) {
901 				/* don't punish graceful restart */
902 				timer_set(peer, Timer_IdleHold, 0);
903 				peer->IdleHoldTime /= 2;
904 				session_graceful_restart(peer);
905 			} else
906 				session_down(peer);
907 		}
908 		if (peer->state == STATE_NONE ||
909 		    peer->state == STATE_ESTABLISHED) {
910 			/* initialize capability negotiation structures */
911 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
912 			    sizeof(peer->capa.ann));
913 			if (!peer->conf.announce_capa)
914 				session_capa_ann_none(peer);
915 		}
916 		break;
917 	case STATE_CONNECT:
918 		if (peer->state == STATE_ESTABLISHED &&
919 		    peer->capa.neg.grestart.restart == 2) {
920 			/* do the graceful restart dance */
921 			session_graceful_restart(peer);
922 			peer->holdtime = INTERVAL_HOLD_INITIAL;
923 			timer_stop(peer, Timer_ConnectRetry);
924 			timer_stop(peer, Timer_Keepalive);
925 			timer_stop(peer, Timer_Hold);
926 			timer_stop(peer, Timer_IdleHold);
927 			timer_stop(peer, Timer_IdleHoldReset);
928 			session_close_connection(peer);
929 			msgbuf_clear(&peer->wbuf);
930 			bzero(&peer->capa.peer, sizeof(peer->capa.peer));
931 		}
932 		break;
933 	case STATE_ACTIVE:
934 		if (!peer->template)
935 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
936 			    peer->conf.id, 0, -1, NULL, 0);
937 		break;
938 	case STATE_OPENSENT:
939 		break;
940 	case STATE_OPENCONFIRM:
941 		break;
942 	case STATE_ESTABLISHED:
943 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
944 		if (peer->demoted)
945 			timer_set(peer, Timer_CarpUndemote,
946 			    INTERVAL_HOLD_DEMOTED);
947 		session_up(peer);
948 		break;
949 	default:		/* something seriously fucked */
950 		break;
951 	}
952 
953 	log_statechange(peer, state, event);
954 	LIST_FOREACH(mrt, &mrthead, entry) {
955 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
956 			continue;
957 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
958 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
959 		    mrt->group_id == peer->conf.groupid))
960 			mrt_dump_state(mrt, peer->state, state, peer);
961 	}
962 	peer->prev_state = peer->state;
963 	peer->state = state;
964 }
965 
966 void
967 session_accept(int listenfd)
968 {
969 	int			 connfd;
970 	socklen_t		 len;
971 	struct sockaddr_storage	 cliaddr;
972 	struct peer		*p = NULL;
973 
974 	len = sizeof(cliaddr);
975 	if ((connfd = accept4(listenfd,
976 	    (struct sockaddr *)&cliaddr, &len,
977 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
978 		if (errno == ENFILE || errno == EMFILE)
979 			pauseaccept = getmonotime();
980 		else if (errno != EWOULDBLOCK && errno != EINTR &&
981 		    errno != ECONNABORTED)
982 			log_warn("accept");
983 		return;
984 	}
985 
986 	p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
987 
988 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
989 		if (timer_running(p, Timer_IdleHold, NULL)) {
990 			/* fast reconnect after clear */
991 			p->passive = 1;
992 			bgp_fsm(p, EVNT_START);
993 		}
994 	}
995 
996 	if (p != NULL &&
997 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
998 		if (p->fd != -1) {
999 			if (p->state == STATE_CONNECT)
1000 				session_close_connection(p);
1001 			else {
1002 				close(connfd);
1003 				return;
1004 			}
1005 		}
1006 
1007 open:
1008 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1009 			log_peer_warnx(&p->conf,
1010 			    "ipsec or md5sig configured but not available");
1011 			close(connfd);
1012 			return;
1013 		}
1014 
1015 		if (tcp_md5_check(connfd, p) == -1) {
1016 			close(connfd);
1017 			return;
1018 		}
1019 		p->fd = p->wbuf.fd = connfd;
1020 		if (session_setup_socket(p)) {
1021 			close(connfd);
1022 			return;
1023 		}
1024 		bgp_fsm(p, EVNT_CON_OPEN);
1025 		return;
1026 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1027 	    p->capa.neg.grestart.restart == 2) {
1028 		/* first do the graceful restart dance */
1029 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1030 		/* then do part of the open dance */
1031 		goto open;
1032 	} else {
1033 		log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1034 		close(connfd);
1035 	}
1036 }
1037 
1038 int
1039 session_connect(struct peer *peer)
1040 {
1041 	struct sockaddr		*sa;
1042 	struct bgpd_addr	*bind_addr = NULL;
1043 	socklen_t		 sa_len;
1044 
1045 	/*
1046 	 * we do not need the overcomplicated collision detection RFC 1771
1047 	 * describes; we simply make sure there is only ever one concurrent
1048 	 * tcp connection per peer.
1049 	 */
1050 	if (peer->fd != -1)
1051 		return (-1);
1052 
1053 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1054 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1055 		log_peer_warn(&peer->conf, "session_connect socket");
1056 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1057 		return (-1);
1058 	}
1059 
1060 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1061 		log_peer_warnx(&peer->conf,
1062 		    "ipsec or md5sig configured but not available");
1063 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1064 		return (-1);
1065 	}
1066 
1067 	tcp_md5_set(peer->fd, peer);
1068 	peer->wbuf.fd = peer->fd;
1069 
1070 	/* if local-address is set we need to bind() */
1071 	switch (peer->conf.remote_addr.aid) {
1072 	case AID_INET:
1073 		bind_addr = &peer->conf.local_addr_v4;
1074 		break;
1075 	case AID_INET6:
1076 		bind_addr = &peer->conf.local_addr_v6;
1077 		break;
1078 	}
1079 	if (bind_addr && (sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) {
1080 		if (bind(peer->fd, sa, sa_len) == -1) {
1081 			log_peer_warn(&peer->conf, "session_connect bind");
1082 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1083 			return (-1);
1084 		}
1085 	}
1086 
1087 	if (session_setup_socket(peer)) {
1088 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1089 		return (-1);
1090 	}
1091 
1092 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT, &sa_len);
1093 	if (connect(peer->fd, sa, sa_len) == -1) {
1094 		if (errno != EINPROGRESS) {
1095 			if (errno != peer->lasterr)
1096 				log_peer_warn(&peer->conf, "connect");
1097 			peer->lasterr = errno;
1098 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1099 			return (-1);
1100 		}
1101 	} else
1102 		bgp_fsm(peer, EVNT_CON_OPEN);
1103 
1104 	return (0);
1105 }
1106 
1107 int
1108 session_setup_socket(struct peer *p)
1109 {
1110 	int	ttl = p->conf.distance;
1111 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1112 	int	nodelay = 1;
1113 	int	bsize;
1114 
1115 	switch (p->conf.remote_addr.aid) {
1116 	case AID_INET:
1117 		/* set precedence, see RFC 1771 appendix 5 */
1118 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1119 		    -1) {
1120 			log_peer_warn(&p->conf,
1121 			    "session_setup_socket setsockopt TOS");
1122 			return (-1);
1123 		}
1124 
1125 		if (p->conf.ebgp) {
1126 			/*
1127 			 * set TTL to foreign router's distance
1128 			 * 1=direct n=multihop with ttlsec, we always use 255
1129 			 */
1130 			if (p->conf.ttlsec) {
1131 				ttl = 256 - p->conf.distance;
1132 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1133 				    &ttl, sizeof(ttl)) == -1) {
1134 					log_peer_warn(&p->conf,
1135 					    "session_setup_socket: "
1136 					    "setsockopt MINTTL");
1137 					return (-1);
1138 				}
1139 				ttl = 255;
1140 			}
1141 
1142 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1143 			    sizeof(ttl)) == -1) {
1144 				log_peer_warn(&p->conf,
1145 				    "session_setup_socket setsockopt TTL");
1146 				return (-1);
1147 			}
1148 		}
1149 		break;
1150 	case AID_INET6:
1151 		if (p->conf.ebgp) {
1152 			/*
1153 			 * set hoplimit to foreign router's distance
1154 			 * 1=direct n=multihop with ttlsec, we always use 255
1155 			 */
1156 			if (p->conf.ttlsec) {
1157 				ttl = 256 - p->conf.distance;
1158 				if (setsockopt(p->fd, IPPROTO_IPV6,
1159 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1160 				    == -1) {
1161 					log_peer_warn(&p->conf,
1162 					    "session_setup_socket: "
1163 					    "setsockopt MINHOPCOUNT");
1164 					return (-1);
1165 				}
1166 				ttl = 255;
1167 			}
1168 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1169 			    &ttl, sizeof(ttl)) == -1) {
1170 				log_peer_warn(&p->conf,
1171 				    "session_setup_socket setsockopt hoplimit");
1172 				return (-1);
1173 			}
1174 		}
1175 		break;
1176 	}
1177 
1178 	/* set TCP_NODELAY */
1179 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1180 	    sizeof(nodelay)) == -1) {
1181 		log_peer_warn(&p->conf,
1182 		    "session_setup_socket setsockopt TCP_NODELAY");
1183 		return (-1);
1184 	}
1185 
1186 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1187 	if (p->conf.auth.method != AUTH_NONE) {
1188 		/* try to increase bufsize. no biggie if it fails */
1189 		bsize = 65535;
1190 		while (bsize > 8192 &&
1191 		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1192 		    sizeof(bsize)) == -1 && errno != EINVAL)
1193 			bsize /= 2;
1194 		bsize = 65535;
1195 		while (bsize > 8192 &&
1196 		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1197 		    sizeof(bsize)) == -1 && errno != EINVAL)
1198 			bsize /= 2;
1199 	}
1200 
1201 	return (0);
1202 }
1203 
1204 /* compare two sockaddrs by converting them into bgpd_addr */
1205 static int
1206 sa_cmp(struct sockaddr *a, struct sockaddr *b)
1207 {
1208 	struct bgpd_addr ba, bb;
1209 
1210 	sa2addr(a, &ba, NULL);
1211 	sa2addr(b, &bb, NULL);
1212 
1213 	return (memcmp(&ba, &bb, sizeof(ba)) == 0);
1214 }
1215 
1216 static void
1217 get_alternate_addr(struct sockaddr *sa, struct bgpd_addr *alt)
1218 {
1219 	struct ifaddrs	*ifap, *ifa, *match;
1220 
1221 	if (getifaddrs(&ifap) == -1)
1222 		fatal("getifaddrs");
1223 
1224 	for (match = ifap; match != NULL; match = match->ifa_next)
1225 		if (sa_cmp(sa, match->ifa_addr) == 0)
1226 			break;
1227 
1228 	if (match == NULL) {
1229 		log_warnx("%s: local address not found", __func__);
1230 		return;
1231 	}
1232 
1233 	switch (sa->sa_family) {
1234 	case AF_INET6:
1235 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1236 			if (ifa->ifa_addr->sa_family == AF_INET &&
1237 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1238 				sa2addr(ifa->ifa_addr, alt, NULL);
1239 				break;
1240 			}
1241 		}
1242 		break;
1243 	case AF_INET:
1244 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1245 			struct sockaddr_in6 *s =
1246 			    (struct sockaddr_in6 *)ifa->ifa_addr;
1247 			if (ifa->ifa_addr->sa_family == AF_INET6 &&
1248 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1249 				/* only accept global scope addresses */
1250 				if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
1251 				    IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))
1252 					continue;
1253 				sa2addr(ifa->ifa_addr, alt, NULL);
1254 				break;
1255 			}
1256 		}
1257 		break;
1258 	default:
1259 		log_warnx("%s: unsupported address family %d", __func__,
1260 		    sa->sa_family);
1261 		break;
1262 	}
1263 
1264 	freeifaddrs(ifap);
1265 }
1266 
1267 void
1268 session_tcp_established(struct peer *peer)
1269 {
1270 	struct sockaddr_storage	ss;
1271 	socklen_t		len;
1272 
1273 	len = sizeof(ss);
1274 	if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1275 		log_warn("getsockname");
1276 	sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1277 	get_alternate_addr((struct sockaddr *)&ss, &peer->local_alt);
1278 	len = sizeof(ss);
1279 	if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1280 		log_warn("getpeername");
1281 	sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1282 }
1283 
1284 void
1285 session_capa_ann_none(struct peer *peer)
1286 {
1287 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1288 }
1289 
1290 int
1291 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
1292 {
1293 	int errs = 0;
1294 
1295 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1296 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1297 	return (errs);
1298 }
1299 
1300 int
1301 session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
1302 {
1303 	u_int8_t		 safi, pad = 0;
1304 	u_int16_t		 afi;
1305 	int			 errs = 0;
1306 
1307 	if (aid2afi(aid, &afi, &safi) == -1)
1308 		fatalx("session_capa_add_mp: bad afi/safi pair");
1309 	afi = htons(afi);
1310 	errs += ibuf_add(buf, &afi, sizeof(afi));
1311 	errs += ibuf_add(buf, &pad, sizeof(pad));
1312 	errs += ibuf_add(buf, &safi, sizeof(safi));
1313 
1314 	return (errs);
1315 }
1316 
1317 int
1318 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid)
1319 {
1320 	u_int		errs = 0;
1321 	u_int16_t	afi;
1322 	u_int8_t	flags, safi;
1323 
1324 	if (aid2afi(aid, &afi, &safi)) {
1325 		log_warn("session_capa_add_gr: bad AID");
1326 		return (1);
1327 	}
1328 	if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING)
1329 		flags = CAPA_GR_F_FLAG;
1330 	else
1331 		flags = 0;
1332 
1333 	afi = htons(afi);
1334 	errs += ibuf_add(b, &afi, sizeof(afi));
1335 	errs += ibuf_add(b, &safi, sizeof(safi));
1336 	errs += ibuf_add(b, &flags, sizeof(flags));
1337 
1338 	return (errs);
1339 }
1340 
1341 struct bgp_msg *
1342 session_newmsg(enum msg_type msgtype, u_int16_t len)
1343 {
1344 	struct bgp_msg		*msg;
1345 	struct msg_header	 hdr;
1346 	struct ibuf		*buf;
1347 	int			 errs = 0;
1348 
1349 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1350 	hdr.len = htons(len);
1351 	hdr.type = msgtype;
1352 
1353 	if ((buf = ibuf_open(len)) == NULL)
1354 		return (NULL);
1355 
1356 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1357 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1358 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1359 
1360 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1361 		ibuf_free(buf);
1362 		return (NULL);
1363 	}
1364 
1365 	msg->buf = buf;
1366 	msg->type = msgtype;
1367 	msg->len = len;
1368 
1369 	return (msg);
1370 }
1371 
1372 int
1373 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1374 {
1375 	struct mrt		*mrt;
1376 
1377 	LIST_FOREACH(mrt, &mrthead, entry) {
1378 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1379 		    mrt->type == MRT_UPDATE_OUT)))
1380 			continue;
1381 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1382 		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1383 		    mrt->group_id == p->conf.groupid))
1384 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1385 	}
1386 
1387 	ibuf_close(&p->wbuf, msg->buf);
1388 	if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) {
1389 		if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1390 			log_peer_warn(&p->conf, "imsg_compose XOFF");
1391 		else
1392 			p->throttled = 1;
1393 	}
1394 
1395 	free(msg);
1396 	return (0);
1397 }
1398 
1399 void
1400 session_open(struct peer *p)
1401 {
1402 	struct bgp_msg		*buf;
1403 	struct ibuf		*opb;
1404 	struct msg_open		 msg;
1405 	u_int16_t		 len;
1406 	u_int8_t		 i, op_type, optparamlen = 0;
1407 	int			 errs = 0;
1408 	int			 mpcapa = 0;
1409 
1410 
1411 	if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1412 	    sizeof(optparamlen))) == NULL) {
1413 		bgp_fsm(p, EVNT_CON_FATAL);
1414 		return;
1415 	}
1416 
1417 	/* multiprotocol extensions, RFC 4760 */
1418 	for (i = 0; i < AID_MAX; i++)
1419 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1420 			errs += session_capa_add(opb, CAPA_MP, 4);
1421 			errs += session_capa_add_mp(opb, i);
1422 			mpcapa++;
1423 		}
1424 
1425 	/* route refresh, RFC 2918 */
1426 	if (p->capa.ann.refresh)	/* no data */
1427 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1428 
1429 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1430 	if (p->capa.ann.grestart.restart) {
1431 		int		rst = 0;
1432 		u_int16_t	hdr;
1433 		u_int8_t	grlen;
1434 
1435 		if (mpcapa) {
1436 			grlen = 2 + 4 * mpcapa;
1437 			for (i = 0; i < AID_MAX; i++) {
1438 				if (p->capa.neg.grestart.flags[i] &
1439 				    CAPA_GR_RESTARTING)
1440 					rst++;
1441 			}
1442 		} else {	/* AID_INET */
1443 			grlen = 2 + 4;
1444 			if (p->capa.neg.grestart.flags[AID_INET] &
1445 			    CAPA_GR_RESTARTING)
1446 				rst++;
1447 		}
1448 
1449 		hdr = conf->holdtime;		/* default timeout */
1450 		/* if client does graceful restart don't set R flag */
1451 		if (!rst)
1452 			hdr |= CAPA_GR_R_FLAG;
1453 		hdr = htons(hdr);
1454 
1455 		errs += session_capa_add(opb, CAPA_RESTART, grlen);
1456 		errs += ibuf_add(opb, &hdr, sizeof(hdr));
1457 
1458 		if (mpcapa) {
1459 			for (i = 0; i < AID_MAX; i++) {
1460 				if (p->capa.ann.mp[i]) {
1461 					errs += session_capa_add_gr(p, opb, i);
1462 				}
1463 			}
1464 		} else {	/* AID_INET */
1465 			errs += session_capa_add_gr(p, opb, AID_INET);
1466 		}
1467 	}
1468 
1469 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1470 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1471 		u_int32_t	nas;
1472 
1473 		nas = htonl(p->conf.local_as);
1474 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1475 		errs += ibuf_add(opb, &nas, sizeof(nas));
1476 	}
1477 
1478 	if (ibuf_size(opb))
1479 		optparamlen = ibuf_size(opb) + sizeof(op_type) +
1480 		    sizeof(optparamlen);
1481 
1482 	len = MSGSIZE_OPEN_MIN + optparamlen;
1483 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1484 		ibuf_free(opb);
1485 		bgp_fsm(p, EVNT_CON_FATAL);
1486 		return;
1487 	}
1488 
1489 	msg.version = 4;
1490 	msg.myas = htons(p->conf.local_short_as);
1491 	if (p->conf.holdtime)
1492 		msg.holdtime = htons(p->conf.holdtime);
1493 	else
1494 		msg.holdtime = htons(conf->holdtime);
1495 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1496 	msg.optparamlen = optparamlen;
1497 
1498 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1499 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1500 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1501 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1502 	errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1503 
1504 	if (optparamlen) {
1505 		op_type = OPT_PARAM_CAPABILITIES;
1506 		optparamlen = ibuf_size(opb);
1507 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1508 		errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1509 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1510 	}
1511 
1512 	ibuf_free(opb);
1513 
1514 	if (errs) {
1515 		ibuf_free(buf->buf);
1516 		free(buf);
1517 		bgp_fsm(p, EVNT_CON_FATAL);
1518 		return;
1519 	}
1520 
1521 	if (session_sendmsg(buf, p) == -1) {
1522 		bgp_fsm(p, EVNT_CON_FATAL);
1523 		return;
1524 	}
1525 
1526 	p->stats.msg_sent_open++;
1527 }
1528 
1529 void
1530 session_keepalive(struct peer *p)
1531 {
1532 	struct bgp_msg		*buf;
1533 
1534 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1535 	    session_sendmsg(buf, p) == -1) {
1536 		bgp_fsm(p, EVNT_CON_FATAL);
1537 		return;
1538 	}
1539 
1540 	start_timer_keepalive(p);
1541 	p->stats.msg_sent_keepalive++;
1542 }
1543 
1544 void
1545 session_update(u_int32_t peerid, void *data, size_t datalen)
1546 {
1547 	struct peer		*p;
1548 	struct bgp_msg		*buf;
1549 
1550 	if ((p = getpeerbyid(conf, peerid)) == NULL) {
1551 		log_warnx("no such peer: id=%u", peerid);
1552 		return;
1553 	}
1554 
1555 	if (p->state != STATE_ESTABLISHED)
1556 		return;
1557 
1558 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1559 		bgp_fsm(p, EVNT_CON_FATAL);
1560 		return;
1561 	}
1562 
1563 	if (ibuf_add(buf->buf, data, datalen)) {
1564 		ibuf_free(buf->buf);
1565 		free(buf);
1566 		bgp_fsm(p, EVNT_CON_FATAL);
1567 		return;
1568 	}
1569 
1570 	if (session_sendmsg(buf, p) == -1) {
1571 		bgp_fsm(p, EVNT_CON_FATAL);
1572 		return;
1573 	}
1574 
1575 	start_timer_keepalive(p);
1576 	p->stats.msg_sent_update++;
1577 }
1578 
1579 void
1580 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1581     void *data, ssize_t datalen)
1582 {
1583 	struct bgp_msg		*buf;
1584 	int			 errs = 0;
1585 
1586 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1587 		return;
1588 
1589 	log_notification(p, errcode, subcode, data, datalen, "sending");
1590 
1591 	if ((buf = session_newmsg(NOTIFICATION,
1592 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1593 		bgp_fsm(p, EVNT_CON_FATAL);
1594 		return;
1595 	}
1596 
1597 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1598 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1599 
1600 	if (datalen > 0)
1601 		errs += ibuf_add(buf->buf, data, datalen);
1602 
1603 	if (errs) {
1604 		ibuf_free(buf->buf);
1605 		free(buf);
1606 		bgp_fsm(p, EVNT_CON_FATAL);
1607 		return;
1608 	}
1609 
1610 	if (session_sendmsg(buf, p) == -1) {
1611 		bgp_fsm(p, EVNT_CON_FATAL);
1612 		return;
1613 	}
1614 
1615 	p->stats.msg_sent_notification++;
1616 	p->stats.last_sent_errcode = errcode;
1617 	p->stats.last_sent_suberr = subcode;
1618 }
1619 
1620 int
1621 session_neighbor_rrefresh(struct peer *p)
1622 {
1623 	u_int8_t	i;
1624 
1625 	if (!p->capa.peer.refresh)
1626 		return (-1);
1627 
1628 	for (i = 0; i < AID_MAX; i++) {
1629 		if (p->capa.peer.mp[i] != 0)
1630 			session_rrefresh(p, i);
1631 	}
1632 
1633 	return (0);
1634 }
1635 
1636 void
1637 session_rrefresh(struct peer *p, u_int8_t aid)
1638 {
1639 	struct bgp_msg		*buf;
1640 	int			 errs = 0;
1641 	u_int16_t		 afi;
1642 	u_int8_t		 safi, null8 = 0;
1643 
1644 	if (aid2afi(aid, &afi, &safi) == -1)
1645 		fatalx("session_rrefresh: bad afi/safi pair");
1646 
1647 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1648 		bgp_fsm(p, EVNT_CON_FATAL);
1649 		return;
1650 	}
1651 
1652 	afi = htons(afi);
1653 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1654 	errs += ibuf_add(buf->buf, &null8, sizeof(null8));
1655 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1656 
1657 	if (errs) {
1658 		ibuf_free(buf->buf);
1659 		free(buf);
1660 		bgp_fsm(p, EVNT_CON_FATAL);
1661 		return;
1662 	}
1663 
1664 	if (session_sendmsg(buf, p) == -1) {
1665 		bgp_fsm(p, EVNT_CON_FATAL);
1666 		return;
1667 	}
1668 
1669 	p->stats.msg_sent_rrefresh++;
1670 }
1671 
1672 int
1673 session_graceful_restart(struct peer *p)
1674 {
1675 	u_int8_t	i;
1676 
1677 	timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout);
1678 
1679 	for (i = 0; i < AID_MAX; i++) {
1680 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1681 			if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1682 			    &i, sizeof(i)) == -1)
1683 				return (-1);
1684 			log_peer_warnx(&p->conf,
1685 			    "graceful restart of %s, keeping routes",
1686 			    aid2str(i));
1687 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1688 		} else if (p->capa.neg.mp[i]) {
1689 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1690 			    &i, sizeof(i)) == -1)
1691 				return (-1);
1692 			log_peer_warnx(&p->conf,
1693 			    "graceful restart of %s, flushing routes",
1694 			    aid2str(i));
1695 		}
1696 	}
1697 	return (0);
1698 }
1699 
1700 int
1701 session_graceful_stop(struct peer *p)
1702 {
1703 	u_int8_t	i;
1704 
1705 	for (i = 0; i < AID_MAX; i++) {
1706 		/*
1707 		 * Only flush if the peer is restarting and the timeout fired.
1708 		 * In all other cases the session was already flushed when the
1709 		 * session went down or when the new open message was parsed.
1710 		 */
1711 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1712 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1713 			    "time-out, flushing", aid2str(i));
1714 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1715 			    &i, sizeof(i)) == -1)
1716 				return (-1);
1717 		}
1718 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1719 	}
1720 	return (0);
1721 }
1722 
1723 int
1724 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1725 {
1726 	ssize_t		n;
1727 	socklen_t	len;
1728 	int		error;
1729 
1730 	if (p->state == STATE_CONNECT) {
1731 		if (pfd->revents & POLLOUT) {
1732 			if (pfd->revents & POLLIN) {
1733 				/* error occurred */
1734 				len = sizeof(error);
1735 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1736 				    &error, &len) == -1 || error) {
1737 					if (error)
1738 						errno = error;
1739 					if (errno != p->lasterr) {
1740 						log_peer_warn(&p->conf,
1741 						    "socket error");
1742 						p->lasterr = errno;
1743 					}
1744 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1745 					return (1);
1746 				}
1747 			}
1748 			bgp_fsm(p, EVNT_CON_OPEN);
1749 			return (1);
1750 		}
1751 		if (pfd->revents & POLLHUP) {
1752 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1753 			return (1);
1754 		}
1755 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1756 			bgp_fsm(p, EVNT_CON_FATAL);
1757 			return (1);
1758 		}
1759 		return (0);
1760 	}
1761 
1762 	if (pfd->revents & POLLHUP) {
1763 		bgp_fsm(p, EVNT_CON_CLOSED);
1764 		return (1);
1765 	}
1766 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1767 		bgp_fsm(p, EVNT_CON_FATAL);
1768 		return (1);
1769 	}
1770 
1771 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1772 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1773 			if (error == 0)
1774 				log_peer_warnx(&p->conf, "Connection closed");
1775 			else if (error == -1)
1776 				log_peer_warn(&p->conf, "write error");
1777 			bgp_fsm(p, EVNT_CON_FATAL);
1778 			return (1);
1779 		}
1780 		p->stats.last_write = getmonotime();
1781 		if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
1782 			if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
1783 				log_peer_warn(&p->conf, "imsg_compose XON");
1784 			else
1785 				p->throttled = 0;
1786 		}
1787 		if (!(pfd->revents & POLLIN))
1788 			return (1);
1789 	}
1790 
1791 	if (p->rbuf && pfd->revents & POLLIN) {
1792 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1793 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1794 			if (errno != EINTR && errno != EAGAIN) {
1795 				log_peer_warn(&p->conf, "read error");
1796 				bgp_fsm(p, EVNT_CON_FATAL);
1797 			}
1798 			return (1);
1799 		}
1800 		if (n == 0) {	/* connection closed */
1801 			bgp_fsm(p, EVNT_CON_CLOSED);
1802 			return (1);
1803 		}
1804 
1805 		p->rbuf->wpos += n;
1806 		p->stats.last_read = getmonotime();
1807 		return (1);
1808 	}
1809 	return (0);
1810 }
1811 
1812 void
1813 session_process_msg(struct peer *p)
1814 {
1815 	struct mrt	*mrt;
1816 	ssize_t		rpos, av, left;
1817 	int		processed = 0;
1818 	u_int16_t	msglen;
1819 	u_int8_t	msgtype;
1820 
1821 	rpos = 0;
1822 	av = p->rbuf->wpos;
1823 	p->rpending = 0;
1824 
1825 	/*
1826 	 * session might drop to IDLE -> buffers deallocated
1827 	 * we MUST check rbuf != NULL before use
1828 	 */
1829 	for (;;) {
1830 		if (p->rbuf == NULL)
1831 			return;
1832 		if (rpos + MSGSIZE_HEADER > av)
1833 			break;
1834 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1835 		    &msgtype) == -1)
1836 			return;
1837 		if (rpos + msglen > av)
1838 			break;
1839 		p->rbuf->rptr = p->rbuf->buf + rpos;
1840 
1841 		/* dump to MRT as soon as we have a full packet */
1842 		LIST_FOREACH(mrt, &mrthead, entry) {
1843 			if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE &&
1844 			    mrt->type == MRT_UPDATE_IN)))
1845 				continue;
1846 			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1847 			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1848 			    mrt->group_id == p->conf.groupid))
1849 				mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p);
1850 		}
1851 
1852 		switch (msgtype) {
1853 		case OPEN:
1854 			bgp_fsm(p, EVNT_RCVD_OPEN);
1855 			p->stats.msg_rcvd_open++;
1856 			break;
1857 		case UPDATE:
1858 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1859 			p->stats.msg_rcvd_update++;
1860 			break;
1861 		case NOTIFICATION:
1862 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1863 			p->stats.msg_rcvd_notification++;
1864 			break;
1865 		case KEEPALIVE:
1866 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1867 			p->stats.msg_rcvd_keepalive++;
1868 			break;
1869 		case RREFRESH:
1870 			parse_refresh(p);
1871 			p->stats.msg_rcvd_rrefresh++;
1872 			break;
1873 		default:	/* cannot happen */
1874 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
1875 			    &msgtype, 1);
1876 			log_warnx("received message with unknown type %u",
1877 			    msgtype);
1878 			bgp_fsm(p, EVNT_CON_FATAL);
1879 		}
1880 		rpos += msglen;
1881 		if (++processed > MSG_PROCESS_LIMIT) {
1882 			p->rpending = 1;
1883 			break;
1884 		}
1885 	}
1886 
1887 	if (rpos < av) {
1888 		left = av - rpos;
1889 		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1890 		p->rbuf->wpos = left;
1891 	} else
1892 		p->rbuf->wpos = 0;
1893 }
1894 
1895 int
1896 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1897 {
1898 	u_char			*p;
1899 	u_int16_t		 olen;
1900 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1901 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1902 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1903 
1904 	/* caller MUST make sure we are getting 19 bytes! */
1905 	p = data;
1906 	if (memcmp(p, marker, sizeof(marker))) {
1907 		log_peer_warnx(&peer->conf, "sync error");
1908 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1909 		bgp_fsm(peer, EVNT_CON_FATAL);
1910 		return (-1);
1911 	}
1912 	p += MSGSIZE_HEADER_MARKER;
1913 
1914 	memcpy(&olen, p, 2);
1915 	*len = ntohs(olen);
1916 	p += 2;
1917 	memcpy(type, p, 1);
1918 
1919 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1920 		log_peer_warnx(&peer->conf,
1921 		    "received message: illegal length: %u byte", *len);
1922 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1923 		    &olen, sizeof(olen));
1924 		bgp_fsm(peer, EVNT_CON_FATAL);
1925 		return (-1);
1926 	}
1927 
1928 	switch (*type) {
1929 	case OPEN:
1930 		if (*len < MSGSIZE_OPEN_MIN) {
1931 			log_peer_warnx(&peer->conf,
1932 			    "received OPEN: illegal len: %u byte", *len);
1933 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1934 			    &olen, sizeof(olen));
1935 			bgp_fsm(peer, EVNT_CON_FATAL);
1936 			return (-1);
1937 		}
1938 		break;
1939 	case NOTIFICATION:
1940 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1941 			log_peer_warnx(&peer->conf,
1942 			    "received NOTIFICATION: illegal len: %u byte",
1943 			    *len);
1944 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1945 			    &olen, sizeof(olen));
1946 			bgp_fsm(peer, EVNT_CON_FATAL);
1947 			return (-1);
1948 		}
1949 		break;
1950 	case UPDATE:
1951 		if (*len < MSGSIZE_UPDATE_MIN) {
1952 			log_peer_warnx(&peer->conf,
1953 			    "received UPDATE: illegal len: %u byte", *len);
1954 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1955 			    &olen, sizeof(olen));
1956 			bgp_fsm(peer, EVNT_CON_FATAL);
1957 			return (-1);
1958 		}
1959 		break;
1960 	case KEEPALIVE:
1961 		if (*len != MSGSIZE_KEEPALIVE) {
1962 			log_peer_warnx(&peer->conf,
1963 			    "received KEEPALIVE: illegal len: %u byte", *len);
1964 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1965 			    &olen, sizeof(olen));
1966 			bgp_fsm(peer, EVNT_CON_FATAL);
1967 			return (-1);
1968 		}
1969 		break;
1970 	case RREFRESH:
1971 		if (*len != MSGSIZE_RREFRESH) {
1972 			log_peer_warnx(&peer->conf,
1973 			    "received RREFRESH: illegal len: %u byte", *len);
1974 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1975 			    &olen, sizeof(olen));
1976 			bgp_fsm(peer, EVNT_CON_FATAL);
1977 			return (-1);
1978 		}
1979 		break;
1980 	default:
1981 		log_peer_warnx(&peer->conf,
1982 		    "received msg with unknown type %u", *type);
1983 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1984 		    type, 1);
1985 		bgp_fsm(peer, EVNT_CON_FATAL);
1986 		return (-1);
1987 	}
1988 	return (0);
1989 }
1990 
1991 int
1992 parse_open(struct peer *peer)
1993 {
1994 	u_char		*p, *op_val;
1995 	u_int8_t	 version, rversion;
1996 	u_int16_t	 short_as, msglen;
1997 	u_int16_t	 holdtime, oholdtime, myholdtime;
1998 	u_int32_t	 as, bgpid;
1999 	u_int8_t	 optparamlen, plen;
2000 	u_int8_t	 op_type, op_len;
2001 
2002 	p = peer->rbuf->rptr;
2003 	p += MSGSIZE_HEADER_MARKER;
2004 	memcpy(&msglen, p, sizeof(msglen));
2005 	msglen = ntohs(msglen);
2006 
2007 	p = peer->rbuf->rptr;
2008 	p += MSGSIZE_HEADER;	/* header is already checked */
2009 
2010 	memcpy(&version, p, sizeof(version));
2011 	p += sizeof(version);
2012 
2013 	if (version != BGP_VERSION) {
2014 		log_peer_warnx(&peer->conf,
2015 		    "peer wants unrecognized version %u", version);
2016 		if (version > BGP_VERSION)
2017 			rversion = version - BGP_VERSION;
2018 		else
2019 			rversion = BGP_VERSION;
2020 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
2021 		    &rversion, sizeof(rversion));
2022 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2023 		return (-1);
2024 	}
2025 
2026 	memcpy(&short_as, p, sizeof(short_as));
2027 	p += sizeof(short_as);
2028 	as = peer->short_as = ntohs(short_as);
2029 	if (as == 0) {
2030 		log_peer_warnx(&peer->conf,
2031 		    "peer requests unacceptable AS %u", as);
2032 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS,
2033 		    NULL, 0);
2034 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2035 		return (-1);
2036 	}
2037 
2038 	memcpy(&oholdtime, p, sizeof(oholdtime));
2039 	p += sizeof(oholdtime);
2040 
2041 	holdtime = ntohs(oholdtime);
2042 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2043 		log_peer_warnx(&peer->conf,
2044 		    "peer requests unacceptable holdtime %u", holdtime);
2045 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
2046 		    NULL, 0);
2047 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2048 		return (-1);
2049 	}
2050 
2051 	myholdtime = peer->conf.holdtime;
2052 	if (!myholdtime)
2053 		myholdtime = conf->holdtime;
2054 	if (holdtime < myholdtime)
2055 		peer->holdtime = holdtime;
2056 	else
2057 		peer->holdtime = myholdtime;
2058 
2059 	memcpy(&bgpid, p, sizeof(bgpid));
2060 	p += sizeof(bgpid);
2061 
2062 	/* check bgpid for validity - just disallow 0 */
2063 	if (ntohl(bgpid) == 0) {
2064 		log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable",
2065 		    ntohl(bgpid));
2066 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2067 		    NULL, 0);
2068 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2069 		return (-1);
2070 	}
2071 	peer->remote_bgpid = bgpid;
2072 
2073 	memcpy(&optparamlen, p, sizeof(optparamlen));
2074 	p += sizeof(optparamlen);
2075 
2076 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
2077 			log_peer_warnx(&peer->conf,
2078 			    "corrupt OPEN message received: length mismatch");
2079 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2080 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2081 			return (-1);
2082 	}
2083 
2084 	plen = optparamlen;
2085 	while (plen > 0) {
2086 		if (plen < 2) {
2087 			log_peer_warnx(&peer->conf,
2088 			    "corrupt OPEN message received, len wrong");
2089 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2090 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2091 			return (-1);
2092 		}
2093 		memcpy(&op_type, p, sizeof(op_type));
2094 		p += sizeof(op_type);
2095 		plen -= sizeof(op_type);
2096 		memcpy(&op_len, p, sizeof(op_len));
2097 		p += sizeof(op_len);
2098 		plen -= sizeof(op_len);
2099 		if (op_len > 0) {
2100 			if (plen < op_len) {
2101 				log_peer_warnx(&peer->conf,
2102 				    "corrupt OPEN message received, len wrong");
2103 				session_notification(peer, ERR_OPEN, 0,
2104 				    NULL, 0);
2105 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2106 				return (-1);
2107 			}
2108 			op_val = p;
2109 			p += op_len;
2110 			plen -= op_len;
2111 		} else
2112 			op_val = NULL;
2113 
2114 		switch (op_type) {
2115 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2116 			if (parse_capabilities(peer, op_val, op_len,
2117 			    &as) == -1) {
2118 				session_notification(peer, ERR_OPEN, 0,
2119 				    NULL, 0);
2120 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2121 				return (-1);
2122 			}
2123 			break;
2124 		case OPT_PARAM_AUTH:			/* deprecated */
2125 		default:
2126 			/*
2127 			 * unsupported type
2128 			 * the RFCs tell us to leave the data section empty
2129 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2130 			 * How the peer should know _which_ optional parameter
2131 			 * we don't support is beyond me.
2132 			 */
2133 			log_peer_warnx(&peer->conf,
2134 			    "received OPEN message with unsupported optional "
2135 			    "parameter: type %u", op_type);
2136 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2137 				NULL, 0);
2138 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2139 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
2140 			peer->IdleHoldTime /= 2;
2141 			return (-1);
2142 		}
2143 	}
2144 
2145 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2146 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2147 		peer->conf.remote_as = as;
2148 		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2149 		if (!peer->conf.ebgp)
2150 			/* force enforce_as off for iBGP sessions */
2151 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2152 	}
2153 
2154 	if (peer->conf.remote_as != as) {
2155 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2156 		    log_as(as));
2157 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2158 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2159 		return (-1);
2160 	}
2161 
2162 	if (capa_neg_calc(peer) == -1) {
2163 		log_peer_warnx(&peer->conf,
2164 		    "capability negotiation calculation failed");
2165 		session_notification(peer, ERR_OPEN, 0, NULL, 0);
2166 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2167 		return (-1);
2168 	}
2169 
2170 	return (0);
2171 }
2172 
2173 int
2174 parse_update(struct peer *peer)
2175 {
2176 	u_char		*p;
2177 	u_int16_t	 datalen;
2178 
2179 	/*
2180 	 * we pass the message verbatim to the rde.
2181 	 * in case of errors the whole session is reset with a
2182 	 * notification anyway, we only need to know the peer
2183 	 */
2184 	p = peer->rbuf->rptr;
2185 	p += MSGSIZE_HEADER_MARKER;
2186 	memcpy(&datalen, p, sizeof(datalen));
2187 	datalen = ntohs(datalen);
2188 
2189 	p = peer->rbuf->rptr;
2190 	p += MSGSIZE_HEADER;	/* header is already checked */
2191 	datalen -= MSGSIZE_HEADER;
2192 
2193 	if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1)
2194 		return (-1);
2195 
2196 	return (0);
2197 }
2198 
2199 int
2200 parse_refresh(struct peer *peer)
2201 {
2202 	u_char		*p;
2203 	u_int16_t	 afi;
2204 	u_int8_t	 aid, safi;
2205 
2206 	p = peer->rbuf->rptr;
2207 	p += MSGSIZE_HEADER;	/* header is already checked */
2208 
2209 	/*
2210 	 * We could check if we actually announced the capability but
2211 	 * as long as the message is correctly encoded we don't care.
2212 	 */
2213 
2214 	/* afi, 2 byte */
2215 	memcpy(&afi, p, sizeof(afi));
2216 	afi = ntohs(afi);
2217 	p += 2;
2218 	/* reserved, 1 byte */
2219 	p += 1;
2220 	/* safi, 1 byte */
2221 	memcpy(&safi, p, sizeof(safi));
2222 
2223 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2224 	if (afi2aid(afi, safi, &aid) == -1) {
2225 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2226 		    "invalid afi/safi pair");
2227 		return (0);
2228 	}
2229 
2230 	if (imsg_rde(IMSG_REFRESH, peer->conf.id, &aid, sizeof(aid)) == -1)
2231 		return (-1);
2232 
2233 	return (0);
2234 }
2235 
2236 int
2237 parse_notification(struct peer *peer)
2238 {
2239 	u_char		*p;
2240 	u_int16_t	 datalen;
2241 	u_int8_t	 errcode;
2242 	u_int8_t	 subcode;
2243 	u_int8_t	 capa_code;
2244 	u_int8_t	 capa_len;
2245 	size_t		 reason_len;
2246 	u_int8_t	 i;
2247 
2248 	/* just log */
2249 	p = peer->rbuf->rptr;
2250 	p += MSGSIZE_HEADER_MARKER;
2251 	memcpy(&datalen, p, sizeof(datalen));
2252 	datalen = ntohs(datalen);
2253 
2254 	p = peer->rbuf->rptr;
2255 	p += MSGSIZE_HEADER;	/* header is already checked */
2256 	datalen -= MSGSIZE_HEADER;
2257 
2258 	memcpy(&errcode, p, sizeof(errcode));
2259 	p += sizeof(errcode);
2260 	datalen -= sizeof(errcode);
2261 
2262 	memcpy(&subcode, p, sizeof(subcode));
2263 	p += sizeof(subcode);
2264 	datalen -= sizeof(subcode);
2265 
2266 	log_notification(peer, errcode, subcode, p, datalen, "received");
2267 	peer->errcnt++;
2268 	peer->stats.last_rcvd_errcode = errcode;
2269 	peer->stats.last_rcvd_suberr = subcode;
2270 
2271 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2272 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2273 			log_peer_warnx(&peer->conf, "received \"unsupported "
2274 			    "capability\" notification without data part, "
2275 			    "disabling capability announcements altogether");
2276 			session_capa_ann_none(peer);
2277 		}
2278 
2279 		while (datalen > 0) {
2280 			if (datalen < 2) {
2281 				log_peer_warnx(&peer->conf,
2282 				    "parse_notification: "
2283 				    "expect len >= 2, len is %u", datalen);
2284 				return (-1);
2285 			}
2286 			memcpy(&capa_code, p, sizeof(capa_code));
2287 			p += sizeof(capa_code);
2288 			datalen -= sizeof(capa_code);
2289 			memcpy(&capa_len, p, sizeof(capa_len));
2290 			p += sizeof(capa_len);
2291 			datalen -= sizeof(capa_len);
2292 			if (datalen < capa_len) {
2293 				log_peer_warnx(&peer->conf,
2294 				    "parse_notification: capa_len %u exceeds "
2295 				    "remaining msg length %u", capa_len,
2296 				    datalen);
2297 				return (-1);
2298 			}
2299 			p += capa_len;
2300 			datalen -= capa_len;
2301 			switch (capa_code) {
2302 			case CAPA_MP:
2303 				for (i = 0; i < AID_MAX; i++)
2304 					peer->capa.ann.mp[i] = 0;
2305 				log_peer_warnx(&peer->conf,
2306 				    "disabling multiprotocol capability");
2307 				break;
2308 			case CAPA_REFRESH:
2309 				peer->capa.ann.refresh = 0;
2310 				log_peer_warnx(&peer->conf,
2311 				    "disabling route refresh capability");
2312 				break;
2313 			case CAPA_RESTART:
2314 				peer->capa.ann.grestart.restart = 0;
2315 				log_peer_warnx(&peer->conf,
2316 				    "disabling restart capability");
2317 				break;
2318 			case CAPA_AS4BYTE:
2319 				peer->capa.ann.as4byte = 0;
2320 				log_peer_warnx(&peer->conf,
2321 				    "disabling 4-byte AS num capability");
2322 				break;
2323 			default:	/* should not happen... */
2324 				log_peer_warnx(&peer->conf, "received "
2325 				    "\"unsupported capability\" notification "
2326 				    "for unknown capability %u, disabling "
2327 				    "capability announcements altogether",
2328 				    capa_code);
2329 				session_capa_ann_none(peer);
2330 				break;
2331 			}
2332 		}
2333 
2334 		return (1);
2335 	}
2336 
2337 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2338 		session_capa_ann_none(peer);
2339 		return (1);
2340 	}
2341 
2342 	if (errcode == ERR_CEASE &&
2343 	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2344 	     subcode == ERR_CEASE_ADMIN_RESET)) {
2345 		if (datalen > 1) {
2346 			reason_len = *p++;
2347 			datalen--;
2348 			if (datalen < reason_len) {
2349 			    log_peer_warnx(&peer->conf,
2350 				"received truncated shutdown reason");
2351 			    return (0);
2352 			}
2353 			if (reason_len > REASON_LEN - 1) {
2354 			    log_peer_warnx(&peer->conf,
2355 				"received overly long shutdown reason");
2356 			    return (0);
2357 			}
2358 			memcpy(peer->stats.last_reason, p, reason_len);
2359 			peer->stats.last_reason[reason_len] = '\0';
2360 			log_peer_warnx(&peer->conf,
2361 			    "received shutdown reason: \"%s\"",
2362 			    log_reason(peer->stats.last_reason));
2363 			p += reason_len;
2364 			datalen -= reason_len;
2365 		}
2366 	}
2367 
2368 	return (0);
2369 }
2370 
2371 int
2372 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2373 {
2374 	u_char		*capa_val;
2375 	u_int32_t	 remote_as;
2376 	u_int16_t	 len;
2377 	u_int16_t	 afi;
2378 	u_int16_t	 gr_header;
2379 	u_int8_t	 safi;
2380 	u_int8_t	 aid;
2381 	u_int8_t	 gr_flags;
2382 	u_int8_t	 capa_code;
2383 	u_int8_t	 capa_len;
2384 	u_int8_t	 i;
2385 
2386 	len = dlen;
2387 	while (len > 0) {
2388 		if (len < 2) {
2389 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2390 			    "length: %u, too short", len);
2391 			return (-1);
2392 		}
2393 		memcpy(&capa_code, d, sizeof(capa_code));
2394 		d += sizeof(capa_code);
2395 		len -= sizeof(capa_code);
2396 		memcpy(&capa_len, d, sizeof(capa_len));
2397 		d += sizeof(capa_len);
2398 		len -= sizeof(capa_len);
2399 		if (capa_len > 0) {
2400 			if (len < capa_len) {
2401 				log_peer_warnx(&peer->conf,
2402 				    "Bad capabilities attr length: "
2403 				    "len %u smaller than capa_len %u",
2404 				    len, capa_len);
2405 				return (-1);
2406 			}
2407 			capa_val = d;
2408 			d += capa_len;
2409 			len -= capa_len;
2410 		} else
2411 			capa_val = NULL;
2412 
2413 		switch (capa_code) {
2414 		case CAPA_MP:			/* RFC 4760 */
2415 			if (capa_len != 4) {
2416 				log_peer_warnx(&peer->conf,
2417 				    "Bad multi protocol capability length: "
2418 				    "%u", capa_len);
2419 				break;
2420 			}
2421 			memcpy(&afi, capa_val, sizeof(afi));
2422 			afi = ntohs(afi);
2423 			memcpy(&safi, capa_val + 3, sizeof(safi));
2424 			if (afi2aid(afi, safi, &aid) == -1) {
2425 				log_peer_warnx(&peer->conf,
2426 				    "Received multi protocol capability: "
2427 				    " unknown AFI %u, safi %u pair",
2428 				    afi, safi);
2429 				break;
2430 			}
2431 			peer->capa.peer.mp[aid] = 1;
2432 			break;
2433 		case CAPA_REFRESH:
2434 			peer->capa.peer.refresh = 1;
2435 			break;
2436 		case CAPA_RESTART:
2437 			if (capa_len == 2) {
2438 				/* peer only supports EoR marker */
2439 				peer->capa.peer.grestart.restart = 1;
2440 				peer->capa.peer.grestart.timeout = 0;
2441 				break;
2442 			} else if (capa_len % 4 != 2) {
2443 				log_peer_warnx(&peer->conf,
2444 				    "Bad graceful restart capability length: "
2445 				    "%u", capa_len);
2446 				peer->capa.peer.grestart.restart = 0;
2447 				peer->capa.peer.grestart.timeout = 0;
2448 				break;
2449 			}
2450 
2451 			memcpy(&gr_header, capa_val, sizeof(gr_header));
2452 			gr_header = ntohs(gr_header);
2453 			peer->capa.peer.grestart.timeout =
2454 			    gr_header & CAPA_GR_TIMEMASK;
2455 			if (peer->capa.peer.grestart.timeout == 0) {
2456 				log_peer_warnx(&peer->conf, "Received "
2457 				    "graceful restart timeout is zero");
2458 				peer->capa.peer.grestart.restart = 0;
2459 				break;
2460 			}
2461 
2462 			for (i = 2; i <= capa_len - 4; i += 4) {
2463 				memcpy(&afi, capa_val + i, sizeof(afi));
2464 				afi = ntohs(afi);
2465 				memcpy(&safi, capa_val + i + 2, sizeof(safi));
2466 				if (afi2aid(afi, safi, &aid) == -1) {
2467 					log_peer_warnx(&peer->conf,
2468 					    "Received graceful restart capa: "
2469 					    " unknown AFI %u, safi %u pair",
2470 					    afi, safi);
2471 					continue;
2472 				}
2473 				memcpy(&gr_flags, capa_val + i + 3,
2474 				    sizeof(gr_flags));
2475 				peer->capa.peer.grestart.flags[aid] |=
2476 				    CAPA_GR_PRESENT;
2477 				if (gr_flags & CAPA_GR_F_FLAG)
2478 					peer->capa.peer.grestart.flags[aid] |=
2479 					    CAPA_GR_FORWARD;
2480 				if (gr_header & CAPA_GR_R_FLAG)
2481 					peer->capa.peer.grestart.flags[aid] |=
2482 					    CAPA_GR_RESTART;
2483 				peer->capa.peer.grestart.restart = 2;
2484 			}
2485 			break;
2486 		case CAPA_AS4BYTE:
2487 			if (capa_len != 4) {
2488 				log_peer_warnx(&peer->conf,
2489 				    "Bad AS4BYTE capability length: "
2490 				    "%u", capa_len);
2491 				peer->capa.peer.as4byte = 0;
2492 				break;
2493 			}
2494 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2495 			*as = ntohl(remote_as);
2496 			if (*as == 0) {
2497 				log_peer_warnx(&peer->conf,
2498 				    "peer requests unacceptable AS %u", *as);
2499 				session_notification(peer, ERR_OPEN,
2500 				    ERR_OPEN_AS, NULL, 0);
2501 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2502 				return (-1);
2503 			}
2504 			peer->capa.peer.as4byte = 1;
2505 			break;
2506 		default:
2507 			break;
2508 		}
2509 	}
2510 
2511 	return (0);
2512 }
2513 
2514 int
2515 capa_neg_calc(struct peer *p)
2516 {
2517 	u_int8_t	i, hasmp = 0;
2518 
2519 	/* refresh: does not realy matter here, use peer setting */
2520 	p->capa.neg.refresh = p->capa.peer.refresh;
2521 
2522 	/* as4byte: both side must announce capability */
2523 	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2524 		p->capa.neg.as4byte = 1;
2525 	else
2526 		p->capa.neg.as4byte = 0;
2527 
2528 	/* MP: both side must announce capability */
2529 	for (i = 0; i < AID_MAX; i++) {
2530 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2531 			p->capa.neg.mp[i] = 1;
2532 			hasmp = 1;
2533 		} else
2534 			p->capa.neg.mp[i] = 0;
2535 	}
2536 	/* if no MP capability present default to IPv4 unicast mode */
2537 	if (!hasmp)
2538 		p->capa.neg.mp[AID_INET] = 1;
2539 
2540 	/*
2541 	 * graceful restart: only the peer capabilities are of interest here.
2542 	 * It is necessary to compare the new values with the previous ones
2543 	 * and act acordingly. AFI/SAFI that are not part in the MP capability
2544 	 * are treated as not being present.
2545 	 */
2546 
2547 	for (i = 0; i < AID_MAX; i++) {
2548 		int8_t	negflags;
2549 
2550 		/* disable GR if the AFI/SAFI is not present */
2551 		if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2552 		    p->capa.neg.mp[i] == 0)
2553 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2554 		/* look at current GR state and decide what to do */
2555 		negflags = p->capa.neg.grestart.flags[i];
2556 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2557 		if (negflags & CAPA_GR_RESTARTING) {
2558 			if (!(p->capa.peer.grestart.flags[i] &
2559 			    CAPA_GR_FORWARD)) {
2560 				if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2561 				    &i, sizeof(i)) == -1)
2562 					return (-1);
2563 				log_peer_warnx(&p->conf, "graceful restart of "
2564 				    "%s, not restarted, flushing", aid2str(i));
2565 			} else
2566 				p->capa.neg.grestart.flags[i] |=
2567 				    CAPA_GR_RESTARTING;
2568 		}
2569 	}
2570 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2571 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2572 
2573 	return (0);
2574 }
2575 
2576 void
2577 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2578 {
2579 	struct imsg		 imsg;
2580 	struct mrt		 xmrt;
2581 	struct mrt		*mrt;
2582 	struct imsgbuf		*i;
2583 	struct peer		*p;
2584 	struct listen_addr	*la, *nla;
2585 	struct kif		*kif;
2586 	u_char			*data;
2587 	int			 n, fd, depend_ok, restricted;
2588 	u_int16_t		 t;
2589 	u_int8_t		 aid, errcode, subcode;
2590 
2591 	while (ibuf) {
2592 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2593 			fatal("session_dispatch_imsg: imsg_get error");
2594 
2595 		if (n == 0)
2596 			break;
2597 
2598 		switch (imsg.hdr.type) {
2599 		case IMSG_SOCKET_CONN:
2600 		case IMSG_SOCKET_CONN_CTL:
2601 			if (idx != PFD_PIPE_MAIN)
2602 				fatalx("reconf request not from parent");
2603 			if ((fd = imsg.fd) == -1) {
2604 				log_warnx("expected to receive imsg fd to "
2605 				    "RDE but didn't receive any");
2606 				break;
2607 			}
2608 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2609 				fatal(NULL);
2610 			imsg_init(i, fd);
2611 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
2612 				if (ibuf_rde) {
2613 					log_warnx("Unexpected imsg connection "
2614 					    "to RDE received");
2615 					msgbuf_clear(&ibuf_rde->w);
2616 					free(ibuf_rde);
2617 				}
2618 				ibuf_rde = i;
2619 			} else {
2620 				if (ibuf_rde_ctl) {
2621 					log_warnx("Unexpected imsg ctl "
2622 					    "connection to RDE received");
2623 					msgbuf_clear(&ibuf_rde_ctl->w);
2624 					free(ibuf_rde_ctl);
2625 				}
2626 				ibuf_rde_ctl = i;
2627 			}
2628 			break;
2629 		case IMSG_RECONF_CONF:
2630 			if (idx != PFD_PIPE_MAIN)
2631 				fatalx("reconf request not from parent");
2632 			nconf = new_config();
2633 
2634 			copy_config(nconf, imsg.data);
2635 			pending_reconf = 1;
2636 			break;
2637 		case IMSG_RECONF_PEER:
2638 			if (idx != PFD_PIPE_MAIN)
2639 				fatalx("reconf request not from parent");
2640 			if ((p = calloc(1, sizeof(struct peer))) == NULL)
2641 				fatal("new_peer");
2642 			memcpy(&p->conf, imsg.data, sizeof(struct peer_config));
2643 			p->state = p->prev_state = STATE_NONE;
2644 			p->reconf_action = RECONF_REINIT;
2645 			if (RB_INSERT(peer_head, &nconf->peers, p) != NULL)
2646 				fatalx("%s: peer tree is corrupt", __func__);
2647 			break;
2648 		case IMSG_RECONF_LISTENER:
2649 			if (idx != PFD_PIPE_MAIN)
2650 				fatalx("reconf request not from parent");
2651 			if (nconf == NULL)
2652 				fatalx("IMSG_RECONF_LISTENER but no config");
2653 			nla = imsg.data;
2654 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2655 				if (!la_cmp(la, nla))
2656 					break;
2657 
2658 			if (la == NULL) {
2659 				if (nla->reconf != RECONF_REINIT)
2660 					fatalx("king bula sez: "
2661 					    "expected REINIT");
2662 
2663 				if ((nla->fd = imsg.fd) == -1)
2664 					log_warnx("expected to receive fd for "
2665 					    "%s but didn't receive any",
2666 					    log_sockaddr((struct sockaddr *)
2667 					    &nla->sa, nla->sa_len));
2668 
2669 				la = calloc(1, sizeof(struct listen_addr));
2670 				if (la == NULL)
2671 					fatal(NULL);
2672 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2673 				la->flags = nla->flags;
2674 				la->fd = nla->fd;
2675 				la->reconf = RECONF_REINIT;
2676 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2677 				    entry);
2678 			} else {
2679 				if (nla->reconf != RECONF_KEEP)
2680 					fatalx("king bula sez: expected KEEP");
2681 				la->reconf = RECONF_KEEP;
2682 			}
2683 
2684 			break;
2685 		case IMSG_RECONF_CTRL:
2686 			if (idx != PFD_PIPE_MAIN)
2687 				fatalx("reconf request not from parent");
2688 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2689 			    sizeof(restricted))
2690 				fatalx("IFINFO imsg with wrong len");
2691 			memcpy(&restricted, imsg.data, sizeof(restricted));
2692 			if (imsg.fd == -1) {
2693 				log_warnx("expected to receive fd for control "
2694 				    "socket but didn't receive any");
2695 				break;
2696 			}
2697 			if (restricted) {
2698 				control_shutdown(rcsock);
2699 				rcsock = imsg.fd;
2700 			} else {
2701 				control_shutdown(csock);
2702 				csock = imsg.fd;
2703 			}
2704 			break;
2705 		case IMSG_RECONF_DRAIN:
2706 			if (idx != PFD_PIPE_MAIN)
2707 				fatalx("reconf request not from parent");
2708 			imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
2709 			    -1, NULL, 0);
2710 			break;
2711 		case IMSG_RECONF_DONE:
2712 			if (idx != PFD_PIPE_MAIN)
2713 				fatalx("reconf request not from parent");
2714 			if (nconf == NULL)
2715 				fatalx("got IMSG_RECONF_DONE but no config");
2716 			copy_config(conf, nconf);
2717 			merge_peers(conf, nconf);
2718 
2719 			/* delete old listeners */
2720 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2721 			    la = nla) {
2722 				nla = TAILQ_NEXT(la, entry);
2723 				if (la->reconf == RECONF_NONE) {
2724 					log_info("not listening on %s any more",
2725 					    log_sockaddr((struct sockaddr *)
2726 					    &la->sa, la->sa_len));
2727 					TAILQ_REMOVE(conf->listen_addrs, la,
2728 					    entry);
2729 					close(la->fd);
2730 					free(la);
2731 				}
2732 			}
2733 
2734 			/* add new listeners */
2735 			TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs,
2736 			    entry);
2737 
2738 			setup_listeners(listener_cnt);
2739 			free_config(nconf);
2740 			nconf = NULL;
2741 			pending_reconf = 0;
2742 			log_info("SE reconfigured");
2743 			imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
2744 			    -1, NULL, 0);
2745 			break;
2746 		case IMSG_IFINFO:
2747 			if (idx != PFD_PIPE_MAIN)
2748 				fatalx("IFINFO message not from parent");
2749 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2750 			    sizeof(struct kif))
2751 				fatalx("IFINFO imsg with wrong len");
2752 			kif = imsg.data;
2753 			depend_ok = kif->depend_state;
2754 
2755 			RB_FOREACH(p, peer_head, &conf->peers)
2756 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2757 					if (depend_ok && !p->depend_ok) {
2758 						p->depend_ok = depend_ok;
2759 						bgp_fsm(p, EVNT_START);
2760 					} else if (!depend_ok && p->depend_ok) {
2761 						p->depend_ok = depend_ok;
2762 						session_stop(p,
2763 						    ERR_CEASE_OTHER_CHANGE);
2764 					}
2765 				}
2766 			break;
2767 		case IMSG_MRT_OPEN:
2768 		case IMSG_MRT_REOPEN:
2769 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2770 			    sizeof(struct mrt)) {
2771 				log_warnx("wrong imsg len");
2772 				break;
2773 			}
2774 
2775 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2776 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2777 				log_warnx("expected to receive fd for mrt dump "
2778 				    "but didn't receive any");
2779 
2780 			mrt = mrt_get(&mrthead, &xmrt);
2781 			if (mrt == NULL) {
2782 				/* new dump */
2783 				mrt = calloc(1, sizeof(struct mrt));
2784 				if (mrt == NULL)
2785 					fatal("session_dispatch_imsg");
2786 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2787 				TAILQ_INIT(&mrt->wbuf.bufs);
2788 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2789 			} else {
2790 				/* old dump reopened */
2791 				close(mrt->wbuf.fd);
2792 				mrt->wbuf.fd = xmrt.wbuf.fd;
2793 			}
2794 			break;
2795 		case IMSG_MRT_CLOSE:
2796 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2797 			    sizeof(struct mrt)) {
2798 				log_warnx("wrong imsg len");
2799 				break;
2800 			}
2801 
2802 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2803 			mrt = mrt_get(&mrthead, &xmrt);
2804 			if (mrt != NULL)
2805 				mrt_done(mrt);
2806 			break;
2807 		case IMSG_CTL_KROUTE:
2808 		case IMSG_CTL_KROUTE_ADDR:
2809 		case IMSG_CTL_SHOW_NEXTHOP:
2810 		case IMSG_CTL_SHOW_INTERFACE:
2811 		case IMSG_CTL_SHOW_FIB_TABLES:
2812 			if (idx != PFD_PIPE_MAIN)
2813 				fatalx("ctl kroute request not from parent");
2814 			control_imsg_relay(&imsg);
2815 			break;
2816 		case IMSG_CTL_SHOW_RIB:
2817 		case IMSG_CTL_SHOW_RIB_PREFIX:
2818 		case IMSG_CTL_SHOW_RIB_COMMUNITIES:
2819 		case IMSG_CTL_SHOW_RIB_ATTR:
2820 		case IMSG_CTL_SHOW_RIB_MEM:
2821 		case IMSG_CTL_SHOW_RIB_HASH:
2822 		case IMSG_CTL_SHOW_NETWORK:
2823 		case IMSG_CTL_SHOW_NEIGHBOR:
2824 			if (idx != PFD_PIPE_ROUTE_CTL)
2825 				fatalx("ctl rib request not from RDE");
2826 			control_imsg_relay(&imsg);
2827 			break;
2828 		case IMSG_CTL_END:
2829 		case IMSG_CTL_RESULT:
2830 			control_imsg_relay(&imsg);
2831 			break;
2832 		case IMSG_UPDATE:
2833 			if (idx != PFD_PIPE_ROUTE)
2834 				fatalx("update request not from RDE");
2835 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2836 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2837 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2838 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2839 				log_warnx("RDE sent invalid update");
2840 			else
2841 				session_update(imsg.hdr.peerid, imsg.data,
2842 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2843 			break;
2844 		case IMSG_UPDATE_ERR:
2845 			if (idx != PFD_PIPE_ROUTE)
2846 				fatalx("update request not from RDE");
2847 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2848 				log_warnx("RDE sent invalid notification");
2849 				break;
2850 			}
2851 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
2852 				log_warnx("no such peer: id=%u",
2853 				    imsg.hdr.peerid);
2854 				break;
2855 			}
2856 			data = imsg.data;
2857 			errcode = *data++;
2858 			subcode = *data++;
2859 
2860 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2861 				data = NULL;
2862 
2863 			session_notification(p, errcode, subcode,
2864 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2865 			switch (errcode) {
2866 			case ERR_CEASE:
2867 				switch (subcode) {
2868 				case ERR_CEASE_MAX_PREFIX:
2869 				case ERR_CEASE_MAX_SENT_PREFIX:
2870 					t = p->conf.max_out_prefix_restart;
2871 					if (subcode == ERR_CEASE_MAX_PREFIX)
2872 						t = p->conf.max_prefix_restart;
2873 
2874 					bgp_fsm(p, EVNT_STOP);
2875 					if (t)
2876 						timer_set(p, Timer_IdleHold,
2877 						    60 * t);
2878 					break;
2879 				default:
2880 					bgp_fsm(p, EVNT_CON_FATAL);
2881 					break;
2882 				}
2883 				break;
2884 			default:
2885 				bgp_fsm(p, EVNT_CON_FATAL);
2886 				break;
2887 			}
2888 			break;
2889 		case IMSG_SESSION_RESTARTED:
2890 			if (idx != PFD_PIPE_ROUTE)
2891 				fatalx("update request not from RDE");
2892 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
2893 				log_warnx("RDE sent invalid restart msg");
2894 				break;
2895 			}
2896 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
2897 				log_warnx("no such peer: id=%u",
2898 				    imsg.hdr.peerid);
2899 				break;
2900 			}
2901 			memcpy(&aid, imsg.data, sizeof(aid));
2902 			if (aid >= AID_MAX)
2903 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
2904 			if (p->capa.neg.grestart.flags[aid] &
2905 			    CAPA_GR_RESTARTING) {
2906 				log_peer_warnx(&p->conf,
2907 				    "graceful restart of %s finished",
2908 				    aid2str(aid));
2909 				p->capa.neg.grestart.flags[aid] &=
2910 				    ~CAPA_GR_RESTARTING;
2911 				timer_stop(p, Timer_RestartTimeout);
2912 
2913 				/* signal back to RDE to cleanup stale routes */
2914 				if (imsg_rde(IMSG_SESSION_RESTARTED,
2915 				    imsg.hdr.peerid, &aid, sizeof(aid)) == -1)
2916 					fatal("imsg_compose: "
2917 					    "IMSG_SESSION_RESTARTED");
2918 			}
2919 			break;
2920 		case IMSG_SESSION_DOWN:
2921 			if (idx != PFD_PIPE_ROUTE)
2922 				fatalx("update request not from RDE");
2923 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
2924 				log_warnx("no such peer: id=%u",
2925 				    imsg.hdr.peerid);
2926 				break;
2927 			}
2928 			session_stop(p, ERR_CEASE_ADMIN_DOWN);
2929 			break;
2930 		default:
2931 			break;
2932 		}
2933 		imsg_free(&imsg);
2934 	}
2935 }
2936 
2937 int
2938 la_cmp(struct listen_addr *a, struct listen_addr *b)
2939 {
2940 	struct sockaddr_in	*in_a, *in_b;
2941 	struct sockaddr_in6	*in6_a, *in6_b;
2942 
2943 	if (a->sa.ss_family != b->sa.ss_family)
2944 		return (1);
2945 
2946 	switch (a->sa.ss_family) {
2947 	case AF_INET:
2948 		in_a = (struct sockaddr_in *)&a->sa;
2949 		in_b = (struct sockaddr_in *)&b->sa;
2950 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2951 			return (1);
2952 		if (in_a->sin_port != in_b->sin_port)
2953 			return (1);
2954 		break;
2955 	case AF_INET6:
2956 		in6_a = (struct sockaddr_in6 *)&a->sa;
2957 		in6_b = (struct sockaddr_in6 *)&b->sa;
2958 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2959 		    sizeof(struct in6_addr)))
2960 			return (1);
2961 		if (in6_a->sin6_port != in6_b->sin6_port)
2962 			return (1);
2963 		break;
2964 	default:
2965 		fatal("king bula sez: unknown address family");
2966 		/* NOTREACHED */
2967 	}
2968 
2969 	return (0);
2970 }
2971 
2972 struct peer *
2973 getpeerbydesc(struct bgpd_config *c, const char *descr)
2974 {
2975 	struct peer	*p, *res = NULL;
2976 	int		 match = 0;
2977 
2978 	RB_FOREACH(p, peer_head, &c->peers)
2979 		if (!strcmp(p->conf.descr, descr)) {
2980 			res = p;
2981 			match++;
2982 		}
2983 
2984 	if (match > 1)
2985 		log_info("neighbor description \"%s\" not unique, request "
2986 		    "aborted", descr);
2987 
2988 	if (match == 1)
2989 		return (res);
2990 	else
2991 		return (NULL);
2992 }
2993 
2994 struct peer *
2995 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
2996 {
2997 	struct bgpd_addr addr;
2998 	struct peer	*p, *newpeer, *loose = NULL;
2999 	u_int32_t	 id;
3000 
3001 	sa2addr(ip, &addr, NULL);
3002 
3003 	/* we might want a more effective way to find peers by IP */
3004 	RB_FOREACH(p, peer_head, &c->peers)
3005 		if (!p->conf.template &&
3006 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3007 			return (p);
3008 
3009 	/* try template matching */
3010 	RB_FOREACH(p, peer_head, &c->peers)
3011 		if (p->conf.template &&
3012 		    p->conf.remote_addr.aid == addr.aid &&
3013 		    session_match_mask(p, &addr))
3014 			if (loose == NULL || loose->conf.remote_masklen <
3015 			    p->conf.remote_masklen)
3016 				loose = p;
3017 
3018 	if (loose != NULL) {
3019 		/* clone */
3020 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3021 			fatal(NULL);
3022 		memcpy(newpeer, loose, sizeof(struct peer));
3023 		for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) {
3024 			if (getpeerbyid(c, id) == NULL)	/* we found a free id */
3025 				break;
3026 		}
3027 		newpeer->template = loose;
3028 		session_template_clone(newpeer, ip, id, 0);
3029 		newpeer->state = newpeer->prev_state = STATE_NONE;
3030 		newpeer->reconf_action = RECONF_KEEP;
3031 		newpeer->rbuf = NULL;
3032 		newpeer->rpending = 0;
3033 		init_peer(newpeer);
3034 		bgp_fsm(newpeer, EVNT_START);
3035 		if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL)
3036 			fatalx("%s: peer tree is corrupt", __func__);
3037 		return (newpeer);
3038 	}
3039 
3040 	return (NULL);
3041 }
3042 
3043 struct peer *
3044 getpeerbyid(struct bgpd_config *c, u_int32_t peerid)
3045 {
3046 	static struct peer lookup;
3047 
3048 	lookup.conf.id = peerid;
3049 
3050 	return RB_FIND(peer_head, &c->peers, &lookup);
3051 }
3052 
3053 int
3054 peer_matched(struct peer *p, struct ctl_neighbor *n)
3055 {
3056 	char *s;
3057 
3058 	if (n && n->addr.aid) {
3059 		if (memcmp(&p->conf.remote_addr, &n->addr,
3060 		    sizeof(p->conf.remote_addr)))
3061 			return 0;
3062 	} else if (n && n->descr[0]) {
3063 		s = n->is_group ? p->conf.group : p->conf.descr;
3064 		if (strcmp(s, n->descr))
3065 			return 0;
3066 	}
3067 	return 1;
3068 }
3069 
3070 void
3071 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id,
3072     u_int32_t as)
3073 {
3074 	struct bgpd_addr	remote_addr;
3075 
3076 	if (ip)
3077 		sa2addr(ip, &remote_addr, NULL);
3078 	else
3079 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3080 
3081 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3082 
3083 	p->conf.id = id;
3084 
3085 	if (as) {
3086 		p->conf.remote_as = as;
3087 		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3088 		if (!p->conf.ebgp)
3089 			/* force enforce_as off for iBGP sessions */
3090 			p->conf.enforce_as = ENFORCE_AS_OFF;
3091 	}
3092 
3093 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3094 	switch (p->conf.remote_addr.aid) {
3095 	case AID_INET:
3096 		p->conf.remote_masklen = 32;
3097 		break;
3098 	case AID_INET6:
3099 		p->conf.remote_masklen = 128;
3100 		break;
3101 	}
3102 	p->conf.template = 0;
3103 }
3104 
3105 int
3106 session_match_mask(struct peer *p, struct bgpd_addr *a)
3107 {
3108 	struct in_addr	 v4masked;
3109 	struct in6_addr	 v6masked;
3110 
3111 	switch (p->conf.remote_addr.aid) {
3112 	case AID_INET:
3113 		inet4applymask(&v4masked, &a->v4, p->conf.remote_masklen);
3114 		if (p->conf.remote_addr.v4.s_addr == v4masked.s_addr)
3115 			return (1);
3116 		return (0);
3117 	case AID_INET6:
3118 		inet6applymask(&v6masked, &a->v6, p->conf.remote_masklen);
3119 
3120 		if (memcmp(&v6masked, &p->conf.remote_addr.v6,
3121 		    sizeof(v6masked)) == 0)
3122 			return (1);
3123 		return (0);
3124 	}
3125 	return (0);
3126 }
3127 
3128 void
3129 session_down(struct peer *peer)
3130 {
3131 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
3132 	peer->stats.last_updown = getmonotime();
3133 	/*
3134 	 * session_down is called in the exit code path so check
3135 	 * if the RDE is still around, if not there is no need to
3136 	 * send the message.
3137 	 */
3138 	if (ibuf_rde == NULL)
3139 		return;
3140 	if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3141 		fatalx("imsg_compose error");
3142 }
3143 
3144 void
3145 session_up(struct peer *p)
3146 {
3147 	struct session_up	 sup;
3148 
3149 	if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3150 	    &p->conf, sizeof(p->conf)) == -1)
3151 		fatalx("imsg_compose error");
3152 
3153 	if (p->local.aid == AID_INET) {
3154 		sup.local_v4_addr = p->local;
3155 		sup.local_v6_addr = p->local_alt;
3156 	} else {
3157 		sup.local_v6_addr = p->local;
3158 		sup.local_v4_addr = p->local_alt;
3159 	}
3160 	sup.remote_addr = p->remote;
3161 
3162 	sup.remote_bgpid = p->remote_bgpid;
3163 	sup.short_as = p->short_as;
3164 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3165 	p->stats.last_updown = getmonotime();
3166 	if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3167 		fatalx("imsg_compose error");
3168 }
3169 
3170 int
3171 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data,
3172     u_int16_t datalen)
3173 {
3174 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
3175 }
3176 
3177 int
3178 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen)
3179 {
3180 	if (ibuf_rde_ctl == NULL) {
3181 		log_warnx("Can't send message %u to RDE, ctl pipe closed",
3182 		    type);
3183 		return (0);
3184 	}
3185 	/*
3186 	 * Use control socket to talk to RDE to bypass the queue of the
3187 	 * regular imsg socket.
3188 	 */
3189 	return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen));
3190 }
3191 
3192 int
3193 imsg_rde(int type, uint32_t peerid, void *data, u_int16_t datalen)
3194 {
3195 	if (ibuf_rde == NULL) {
3196 		log_warnx("Can't send message %u to RDE, pipe closed", type);
3197 		return (0);
3198 	}
3199 
3200 	return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen));
3201 }
3202 
3203 void
3204 session_demote(struct peer *p, int level)
3205 {
3206 	struct demote_msg	msg;
3207 
3208 	strlcpy(msg.demote_group, p->conf.demote_group,
3209 	    sizeof(msg.demote_group));
3210 	msg.level = level;
3211 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3212 	    &msg, sizeof(msg)) == -1)
3213 		fatalx("imsg_compose error");
3214 
3215 	p->demoted += level;
3216 }
3217 
3218 void
3219 session_stop(struct peer *peer, u_int8_t subcode)
3220 {
3221 	char data[REASON_LEN];
3222 	size_t datalen;
3223 	size_t reason_len;
3224 	char *communication;
3225 
3226 	datalen = 0;
3227 	communication = peer->conf.reason;
3228 
3229 	if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3230 	    subcode == ERR_CEASE_ADMIN_RESET)
3231 	    && communication && *communication) {
3232 		reason_len = strlen(communication);
3233 		if (reason_len > REASON_LEN - 1) {
3234 		    log_peer_warnx(&peer->conf,
3235 			"trying to send overly long shutdown reason");
3236 		} else {
3237 			data[0] = reason_len;
3238 			datalen = reason_len + sizeof(data[0]);
3239 			memcpy(data + 1, communication, reason_len);
3240 		}
3241 	}
3242 	switch (peer->state) {
3243 	case STATE_OPENSENT:
3244 	case STATE_OPENCONFIRM:
3245 	case STATE_ESTABLISHED:
3246 		session_notification(peer, ERR_CEASE, subcode, data, datalen);
3247 		break;
3248 	default:
3249 		/* session not open, no need to send notification */
3250 		break;
3251 	}
3252 	bgp_fsm(peer, EVNT_STOP);
3253 }
3254 
3255 void
3256 merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3257 {
3258 	struct peer *p, *np, *next;
3259 
3260 	RB_FOREACH(p, peer_head, &c->peers) {
3261 		/* templates are handled specially */
3262 		if (p->template != NULL)
3263 			continue;
3264 		np = getpeerbyid(nc, p->conf.id);
3265 		if (np == NULL) {
3266 			p->reconf_action = RECONF_DELETE;
3267 			continue;
3268 		}
3269 
3270 		/* peer no longer uses TCP MD5SIG so deconfigure */
3271 		if (p->conf.auth.method == AUTH_MD5SIG &&
3272 		    np->conf.auth.method != AUTH_MD5SIG)
3273 			tcp_md5_del_listener(c, p);
3274 		else if (np->conf.auth.method == AUTH_MD5SIG)
3275 			tcp_md5_add_listener(c, np);
3276 
3277 		memcpy(&p->conf, &np->conf, sizeof(p->conf));
3278 		RB_REMOVE(peer_head, &nc->peers, np);
3279 		free(np);
3280 
3281 		p->reconf_action = RECONF_KEEP;
3282 
3283 		/* had demotion, is demoted, demote removed? */
3284 		if (p->demoted && !p->conf.demote_group[0])
3285 			session_demote(p, -1);
3286 
3287 		/* if session is not open then refresh pfkey data */
3288 		if (p->state < STATE_OPENSENT && !p->template)
3289 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
3290 			    p->conf.id, 0, -1, NULL, 0);
3291 
3292 		/* sync the RDE in case we keep the peer */
3293 		if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3294 		    &p->conf, sizeof(struct peer_config)) == -1)
3295 			fatalx("imsg_compose error");
3296 
3297 		/* apply the config to all clones of a template */
3298 		if (p->conf.template) {
3299 			struct peer *xp;
3300 			RB_FOREACH(xp, peer_head, &c->peers) {
3301 				if (xp->template != p)
3302 					continue;
3303 				session_template_clone(xp, NULL, xp->conf.id,
3304 				    xp->conf.remote_as);
3305 				if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
3306 				    &xp->conf, sizeof(xp->conf)) == -1)
3307 					fatalx("imsg_compose error");
3308 			}
3309 		}
3310 	}
3311 
3312 	/* pfkeys of new peers already loaded by the parent process */
3313 	RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
3314 		RB_REMOVE(peer_head, &nc->peers, np);
3315 		if (RB_INSERT(peer_head, &c->peers, np) != NULL)
3316 			fatalx("%s: peer tree is corrupt", __func__);
3317 		if (np->conf.auth.method == AUTH_MD5SIG)
3318 			tcp_md5_add_listener(c, np);
3319 	}
3320 }
3321