xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 7350f337b9e3eb4461d99580e625c7ef148d107c)
1 /*	$OpenBSD: session.c,v 1.386 2019/06/22 05:36:40 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <poll.h>
37 #include <pwd.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <syslog.h>
43 #include <unistd.h>
44 
45 #include "bgpd.h"
46 #include "mrt.h"
47 #include "session.h"
48 #include "log.h"
49 
50 #define PFD_PIPE_MAIN		0
51 #define PFD_PIPE_ROUTE		1
52 #define PFD_PIPE_ROUTE_CTL	2
53 #define PFD_SOCK_CTL		3
54 #define PFD_SOCK_RCTL		4
55 #define PFD_LISTENERS_START	5
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_peer(struct peer *);
60 void	start_timer_holdtime(struct peer *);
61 void	start_timer_keepalive(struct peer *);
62 void	session_close_connection(struct peer *);
63 void	change_state(struct peer *, enum session_state, enum session_events);
64 int	session_setup_socket(struct peer *);
65 void	session_accept(int);
66 int	session_connect(struct peer *);
67 void	session_tcp_established(struct peer *);
68 void	session_capa_ann_none(struct peer *);
69 int	session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
70 int	session_capa_add_mp(struct ibuf *, u_int8_t);
71 int	session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(u_int32_t, void *, size_t);
77 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
78 	    ssize_t);
79 void	session_rrefresh(struct peer *, u_int8_t);
80 int	session_graceful_restart(struct peer *);
81 int	session_graceful_stop(struct peer *);
82 int	session_dispatch_msg(struct pollfd *, struct peer *);
83 void	session_process_msg(struct peer *);
84 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
85 int	parse_open(struct peer *);
86 int	parse_update(struct peer *);
87 int	parse_refresh(struct peer *);
88 int	parse_notification(struct peer *);
89 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
90 int	capa_neg_calc(struct peer *);
91 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
92 void	session_up(struct peer *);
93 void	session_down(struct peer *);
94 int	imsg_rde(int, u_int32_t, void *, u_int16_t);
95 void	session_demote(struct peer *, int);
96 void	merge_peers(struct bgpd_config *, struct bgpd_config *);
97 
98 int		 la_cmp(struct listen_addr *, struct listen_addr *);
99 void		 session_template_clone(struct peer *, struct sockaddr *,
100 		    u_int32_t, u_int32_t);
101 int		 session_match_mask(struct peer *, struct bgpd_addr *);
102 
103 struct bgpd_config	*conf, *nconf;
104 struct bgpd_sysdep	 sysdep;
105 volatile sig_atomic_t	 session_quit;
106 int			 pending_reconf;
107 int			 csock = -1, rcsock = -1;
108 u_int			 peer_cnt;
109 struct imsgbuf		*ibuf_rde;
110 struct imsgbuf		*ibuf_rde_ctl;
111 struct imsgbuf		*ibuf_main;
112 
113 struct mrt_head		 mrthead;
114 time_t			 pauseaccept;
115 
116 static inline int
117 peer_compare(const struct peer *a, const struct peer *b)
118 {
119 	return a->conf.id - b->conf.id;
120 }
121 
122 RB_GENERATE(peer_head, peer, entry, peer_compare);
123 
124 void
125 session_sighdlr(int sig)
126 {
127 	switch (sig) {
128 	case SIGINT:
129 	case SIGTERM:
130 		session_quit = 1;
131 		break;
132 	}
133 }
134 
135 int
136 setup_listeners(u_int *la_cnt)
137 {
138 	int			 ttl = 255;
139 	struct listen_addr	*la;
140 	u_int			 cnt = 0;
141 
142 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
143 		la->reconf = RECONF_NONE;
144 		cnt++;
145 
146 		if (la->flags & LISTENER_LISTENING)
147 			continue;
148 
149 		if (la->fd == -1) {
150 			log_warn("cannot establish listener on %s: invalid fd",
151 			    log_sockaddr((struct sockaddr *)&la->sa,
152 			    la->sa_len));
153 			continue;
154 		}
155 
156 		if (tcp_md5_listen(la->fd, &conf->peers) == -1)
157 			fatal("tcp_md5_listen");
158 
159 		/* set ttl to 255 so that ttl-security works */
160 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
161 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
162 			log_warn("setup_listeners setsockopt TTL");
163 			continue;
164 		}
165 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
166 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
167 			log_warn("setup_listeners setsockopt hoplimit");
168 			continue;
169 		}
170 
171 		if (listen(la->fd, MAX_BACKLOG)) {
172 			close(la->fd);
173 			fatal("listen");
174 		}
175 
176 		la->flags |= LISTENER_LISTENING;
177 
178 		log_info("listening on %s",
179 		    log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
180 	}
181 
182 	*la_cnt = cnt;
183 
184 	return (0);
185 }
186 
187 void
188 session_main(int debug, int verbose)
189 {
190 	int			 timeout;
191 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
192 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
193 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
194 	u_int			 new_cnt;
195 	struct passwd		*pw;
196 	struct peer		*p, **peer_l = NULL, *next;
197 	struct mrt		*m, *xm, **mrt_l = NULL;
198 	struct pollfd		*pfd = NULL;
199 	struct ctl_conn		*ctl_conn;
200 	struct listen_addr	*la;
201 	void			*newp;
202 	time_t			 now;
203 	short			 events;
204 
205 	log_init(debug, LOG_DAEMON);
206 	log_setverbose(verbose);
207 
208 	bgpd_process = PROC_SE;
209 	log_procinit(log_procnames[bgpd_process]);
210 
211 	if ((pw = getpwnam(BGPD_USER)) == NULL)
212 		fatal(NULL);
213 
214 	if (chroot(pw->pw_dir) == -1)
215 		fatal("chroot");
216 	if (chdir("/") == -1)
217 		fatal("chdir(\"/\")");
218 
219 	setproctitle("session engine");
220 
221 	if (setgroups(1, &pw->pw_gid) ||
222 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
223 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
224 		fatal("can't drop privileges");
225 
226 	if (pledge("stdio inet recvfd", NULL) == -1)
227 		fatal("pledge");
228 
229 	signal(SIGTERM, session_sighdlr);
230 	signal(SIGINT, session_sighdlr);
231 	signal(SIGPIPE, SIG_IGN);
232 	signal(SIGHUP, SIG_IGN);
233 	signal(SIGALRM, SIG_IGN);
234 	signal(SIGUSR1, SIG_IGN);
235 
236 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
237 		fatal(NULL);
238 	imsg_init(ibuf_main, 3);
239 
240 	TAILQ_INIT(&ctl_conns);
241 	LIST_INIT(&mrthead);
242 	listener_cnt = 0;
243 	peer_cnt = 0;
244 	ctl_cnt = 0;
245 
246 	conf = new_config();
247 	log_info("session engine ready");
248 
249 	while (session_quit == 0) {
250 		/* check for peers to be initialized or deleted */
251 		if (!pending_reconf) {
252 			RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
253 				/* cloned peer that idled out? */
254 				if (p->template && (p->state == STATE_IDLE ||
255 				    p->state == STATE_ACTIVE) &&
256 				    time(NULL) - p->stats.last_updown >=
257 				    INTERVAL_HOLD_CLONED)
258 					p->reconf_action = RECONF_DELETE;
259 
260 				/* new peer that needs init? */
261 				if (p->state == STATE_NONE)
262 					init_peer(p);
263 
264 				/* reinit due? */
265 				if (p->reconf_action == RECONF_REINIT) {
266 					session_stop(p, ERR_CEASE_ADMIN_RESET);
267 					if (!p->conf.down)
268 						timer_set(p, Timer_IdleHold, 0);
269 				}
270 
271 				/* deletion due? */
272 				if (p->reconf_action == RECONF_DELETE) {
273 					if (p->demoted)
274 						session_demote(p, -1);
275 					p->conf.demote_group[0] = 0;
276 					session_stop(p, ERR_CEASE_PEER_UNCONF);
277 					log_peer_warnx(&p->conf, "removed");
278 					RB_REMOVE(peer_head, &conf->peers, p);
279 					timer_remove_all(p);
280 					pfkey_remove(p);
281 					free(p);
282 					peer_cnt--;
283 					continue;
284 				}
285 				p->reconf_action = RECONF_NONE;
286 			}
287 		}
288 
289 		if (peer_cnt > peer_l_elms) {
290 			if ((newp = reallocarray(peer_l, peer_cnt,
291 			    sizeof(struct peer *))) == NULL) {
292 				/* panic for now  */
293 				log_warn("could not resize peer_l from %u -> %u"
294 				    " entries", peer_l_elms, peer_cnt);
295 				fatalx("exiting");
296 			}
297 			peer_l = newp;
298 			peer_l_elms = peer_cnt;
299 		}
300 
301 		mrt_cnt = 0;
302 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
303 			xm = LIST_NEXT(m, entry);
304 			if (m->state == MRT_STATE_REMOVE) {
305 				mrt_clean(m);
306 				LIST_REMOVE(m, entry);
307 				free(m);
308 				continue;
309 			}
310 			if (m->wbuf.queued)
311 				mrt_cnt++;
312 		}
313 
314 		if (mrt_cnt > mrt_l_elms) {
315 			if ((newp = reallocarray(mrt_l, mrt_cnt,
316 			    sizeof(struct mrt *))) == NULL) {
317 				/* panic for now  */
318 				log_warn("could not resize mrt_l from %u -> %u"
319 				    " entries", mrt_l_elms, mrt_cnt);
320 				fatalx("exiting");
321 			}
322 			mrt_l = newp;
323 			mrt_l_elms = mrt_cnt;
324 		}
325 
326 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
327 		    ctl_cnt + mrt_cnt;
328 		if (new_cnt > pfd_elms) {
329 			if ((newp = reallocarray(pfd, new_cnt,
330 			    sizeof(struct pollfd))) == NULL) {
331 				/* panic for now  */
332 				log_warn("could not resize pfd from %u -> %u"
333 				    " entries", pfd_elms, new_cnt);
334 				fatalx("exiting");
335 			}
336 			pfd = newp;
337 			pfd_elms = new_cnt;
338 		}
339 
340 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
341 
342 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
343 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
344 		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
345 
346 		if (pauseaccept == 0) {
347 			pfd[PFD_SOCK_CTL].fd = csock;
348 			pfd[PFD_SOCK_CTL].events = POLLIN;
349 			pfd[PFD_SOCK_RCTL].fd = rcsock;
350 			pfd[PFD_SOCK_RCTL].events = POLLIN;
351 		} else {
352 			pfd[PFD_SOCK_CTL].fd = -1;
353 			pfd[PFD_SOCK_RCTL].fd = -1;
354 		}
355 
356 		i = PFD_LISTENERS_START;
357 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
358 			if (pauseaccept == 0) {
359 				pfd[i].fd = la->fd;
360 				pfd[i].events = POLLIN;
361 			} else
362 				pfd[i].fd = -1;
363 			i++;
364 		}
365 		idx_listeners = i;
366 		timeout = 240;	/* loop every 240s at least */
367 
368 		now = getmonotime();
369 		RB_FOREACH(p, peer_head, &conf->peers) {
370 			time_t	nextaction;
371 			struct peer_timer *pt;
372 
373 			/* check timers */
374 			if ((pt = timer_nextisdue(p, now)) != NULL) {
375 				switch (pt->type) {
376 				case Timer_Hold:
377 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
378 					break;
379 				case Timer_ConnectRetry:
380 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
381 					break;
382 				case Timer_Keepalive:
383 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
384 					break;
385 				case Timer_IdleHold:
386 					bgp_fsm(p, EVNT_START);
387 					break;
388 				case Timer_IdleHoldReset:
389 					p->IdleHoldTime /= 2;
390 					if (p->IdleHoldTime <=
391 					    INTERVAL_IDLE_HOLD_INITIAL) {
392 						p->IdleHoldTime =
393 						    INTERVAL_IDLE_HOLD_INITIAL;
394 						timer_stop(p,
395 						    Timer_IdleHoldReset);
396 						p->errcnt = 0;
397 					} else
398 						timer_set(p,
399 						    Timer_IdleHoldReset,
400 						    p->IdleHoldTime);
401 					break;
402 				case Timer_CarpUndemote:
403 					timer_stop(p, Timer_CarpUndemote);
404 					if (p->demoted &&
405 					    p->state == STATE_ESTABLISHED)
406 						session_demote(p, -1);
407 					break;
408 				case Timer_RestartTimeout:
409 					timer_stop(p, Timer_RestartTimeout);
410 					session_graceful_stop(p);
411 					break;
412 				default:
413 					fatalx("King Bula lost in time");
414 				}
415 			}
416 			if ((nextaction = timer_nextduein(p, now)) != -1 &&
417 			    nextaction < timeout)
418 				timeout = nextaction;
419 
420 			/* are we waiting for a write? */
421 			events = POLLIN;
422 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
423 				events |= POLLOUT;
424 			/* is there still work to do? */
425 			if (p->rpending)
426 				timeout = 0;
427 
428 			/* poll events */
429 			if (p->fd != -1 && events != 0) {
430 				pfd[i].fd = p->fd;
431 				pfd[i].events = events;
432 				peer_l[i - idx_listeners] = p;
433 				i++;
434 			}
435 		}
436 
437 		idx_peers = i;
438 
439 		LIST_FOREACH(m, &mrthead, entry)
440 			if (m->wbuf.queued) {
441 				pfd[i].fd = m->wbuf.fd;
442 				pfd[i].events = POLLOUT;
443 				mrt_l[i - idx_peers] = m;
444 				i++;
445 			}
446 
447 		idx_mrts = i;
448 
449 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
450 			pfd[i].fd = ctl_conn->ibuf.fd;
451 			pfd[i].events = POLLIN;
452 			if (ctl_conn->ibuf.w.queued > 0)
453 				pfd[i].events |= POLLOUT;
454 			i++;
455 		}
456 
457 		if (pauseaccept && timeout > 1)
458 			timeout = 1;
459 		if (timeout < 0)
460 			timeout = 0;
461 		if (poll(pfd, i, timeout * 1000) == -1)
462 			if (errno != EINTR)
463 				fatal("poll error");
464 
465 		/*
466 		 * If we previously saw fd exhaustion, we stop accept()
467 		 * for 1 second to throttle the accept() loop.
468 		 */
469 		if (pauseaccept && getmonotime() > pauseaccept + 1)
470 			pauseaccept = 0;
471 
472 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
473 			log_warnx("SE: Lost connection to parent");
474 			session_quit = 1;
475 			continue;
476 		} else
477 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
478 			    &listener_cnt);
479 
480 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
481 			log_warnx("SE: Lost connection to RDE");
482 			msgbuf_clear(&ibuf_rde->w);
483 			free(ibuf_rde);
484 			ibuf_rde = NULL;
485 		} else
486 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
487 			    &listener_cnt);
488 
489 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
490 		    -1) {
491 			log_warnx("SE: Lost connection to RDE control");
492 			msgbuf_clear(&ibuf_rde_ctl->w);
493 			free(ibuf_rde_ctl);
494 			ibuf_rde_ctl = NULL;
495 		} else
496 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
497 			    &listener_cnt);
498 
499 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
500 			ctl_cnt += control_accept(csock, 0);
501 
502 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
503 			ctl_cnt += control_accept(rcsock, 1);
504 
505 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
506 			if (pfd[j].revents & POLLIN)
507 				session_accept(pfd[j].fd);
508 
509 		for (; j < idx_peers; j++)
510 			session_dispatch_msg(&pfd[j],
511 			    peer_l[j - idx_listeners]);
512 
513 		RB_FOREACH(p, peer_head, &conf->peers)
514 			if (p->rbuf && p->rbuf->wpos)
515 				session_process_msg(p);
516 
517 		for (; j < idx_mrts; j++)
518 			if (pfd[j].revents & POLLOUT)
519 				mrt_write(mrt_l[j - idx_peers]);
520 
521 		for (; j < i; j++)
522 			control_dispatch_msg(&pfd[j], &ctl_cnt, &conf->peers);
523 	}
524 
525 	RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
526 		RB_REMOVE(peer_head, &conf->peers, p);
527 		strlcpy(p->conf.shutcomm,
528 		    "bgpd shutting down",
529 		    sizeof(p->conf.shutcomm));
530 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
531 		timer_remove_all(p);
532 		free(p);
533 	}
534 
535 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
536 		mrt_clean(m);
537 		LIST_REMOVE(m, entry);
538 		free(m);
539 	}
540 
541 	free_config(conf);
542 	free(peer_l);
543 	free(mrt_l);
544 	free(pfd);
545 
546 	/* close pipes */
547 	if (ibuf_rde) {
548 		msgbuf_write(&ibuf_rde->w);
549 		msgbuf_clear(&ibuf_rde->w);
550 		close(ibuf_rde->fd);
551 		free(ibuf_rde);
552 	}
553 	if (ibuf_rde_ctl) {
554 		msgbuf_clear(&ibuf_rde_ctl->w);
555 		close(ibuf_rde_ctl->fd);
556 		free(ibuf_rde_ctl);
557 	}
558 	msgbuf_write(&ibuf_main->w);
559 	msgbuf_clear(&ibuf_main->w);
560 	close(ibuf_main->fd);
561 	free(ibuf_main);
562 
563 	control_shutdown(csock);
564 	control_shutdown(rcsock);
565 	log_info("session engine exiting");
566 	exit(0);
567 }
568 
569 void
570 init_peer(struct peer *p)
571 {
572 	TAILQ_INIT(&p->timers);
573 	p->fd = p->wbuf.fd = -1;
574 
575 	if (p->conf.if_depend[0])
576 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
577 		    p->conf.if_depend, sizeof(p->conf.if_depend));
578 	else
579 		p->depend_ok = 1;
580 
581 	peer_cnt++;
582 
583 	change_state(p, STATE_IDLE, EVNT_NONE);
584 	if (p->conf.down)
585 		timer_stop(p, Timer_IdleHold);		/* no autostart */
586 	else
587 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
588 
589 	/*
590 	 * on startup, demote if requested.
591 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
592 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
593 	 */
594 	if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
595 		session_demote(p, +1);
596 }
597 
598 void
599 bgp_fsm(struct peer *peer, enum session_events event)
600 {
601 	switch (peer->state) {
602 	case STATE_NONE:
603 		/* nothing */
604 		break;
605 	case STATE_IDLE:
606 		switch (event) {
607 		case EVNT_START:
608 			timer_stop(peer, Timer_Hold);
609 			timer_stop(peer, Timer_Keepalive);
610 			timer_stop(peer, Timer_IdleHold);
611 
612 			/* allocate read buffer */
613 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
614 			if (peer->rbuf == NULL)
615 				fatal(NULL);
616 
617 			/* init write buffer */
618 			msgbuf_init(&peer->wbuf);
619 
620 			peer->stats.last_sent_errcode = 0;
621 			peer->stats.last_sent_suberr = 0;
622 
623 			if (!peer->depend_ok)
624 				timer_stop(peer, Timer_ConnectRetry);
625 			else if (peer->passive || peer->conf.passive ||
626 			    peer->conf.template) {
627 				change_state(peer, STATE_ACTIVE, event);
628 				timer_stop(peer, Timer_ConnectRetry);
629 			} else {
630 				change_state(peer, STATE_CONNECT, event);
631 				timer_set(peer, Timer_ConnectRetry,
632 				    conf->connectretry);
633 				session_connect(peer);
634 			}
635 			peer->passive = 0;
636 			break;
637 		default:
638 			/* ignore */
639 			break;
640 		}
641 		break;
642 	case STATE_CONNECT:
643 		switch (event) {
644 		case EVNT_START:
645 			/* ignore */
646 			break;
647 		case EVNT_CON_OPEN:
648 			session_tcp_established(peer);
649 			session_open(peer);
650 			timer_stop(peer, Timer_ConnectRetry);
651 			peer->holdtime = INTERVAL_HOLD_INITIAL;
652 			start_timer_holdtime(peer);
653 			change_state(peer, STATE_OPENSENT, event);
654 			break;
655 		case EVNT_CON_OPENFAIL:
656 			timer_set(peer, Timer_ConnectRetry,
657 			    conf->connectretry);
658 			session_close_connection(peer);
659 			change_state(peer, STATE_ACTIVE, event);
660 			break;
661 		case EVNT_TIMER_CONNRETRY:
662 			timer_set(peer, Timer_ConnectRetry,
663 			    conf->connectretry);
664 			session_connect(peer);
665 			break;
666 		default:
667 			change_state(peer, STATE_IDLE, event);
668 			break;
669 		}
670 		break;
671 	case STATE_ACTIVE:
672 		switch (event) {
673 		case EVNT_START:
674 			/* ignore */
675 			break;
676 		case EVNT_CON_OPEN:
677 			session_tcp_established(peer);
678 			session_open(peer);
679 			timer_stop(peer, Timer_ConnectRetry);
680 			peer->holdtime = INTERVAL_HOLD_INITIAL;
681 			start_timer_holdtime(peer);
682 			change_state(peer, STATE_OPENSENT, event);
683 			break;
684 		case EVNT_CON_OPENFAIL:
685 			timer_set(peer, Timer_ConnectRetry,
686 			    conf->connectretry);
687 			session_close_connection(peer);
688 			change_state(peer, STATE_ACTIVE, event);
689 			break;
690 		case EVNT_TIMER_CONNRETRY:
691 			timer_set(peer, Timer_ConnectRetry,
692 			    peer->holdtime);
693 			change_state(peer, STATE_CONNECT, event);
694 			session_connect(peer);
695 			break;
696 		default:
697 			change_state(peer, STATE_IDLE, event);
698 			break;
699 		}
700 		break;
701 	case STATE_OPENSENT:
702 		switch (event) {
703 		case EVNT_START:
704 			/* ignore */
705 			break;
706 		case EVNT_STOP:
707 			change_state(peer, STATE_IDLE, event);
708 			break;
709 		case EVNT_CON_CLOSED:
710 			session_close_connection(peer);
711 			timer_set(peer, Timer_ConnectRetry,
712 			    conf->connectretry);
713 			change_state(peer, STATE_ACTIVE, event);
714 			break;
715 		case EVNT_CON_FATAL:
716 			change_state(peer, STATE_IDLE, event);
717 			break;
718 		case EVNT_TIMER_HOLDTIME:
719 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
720 			    0, NULL, 0);
721 			change_state(peer, STATE_IDLE, event);
722 			break;
723 		case EVNT_RCVD_OPEN:
724 			/* parse_open calls change_state itself on failure */
725 			if (parse_open(peer))
726 				break;
727 			session_keepalive(peer);
728 			change_state(peer, STATE_OPENCONFIRM, event);
729 			break;
730 		case EVNT_RCVD_NOTIFICATION:
731 			if (parse_notification(peer)) {
732 				change_state(peer, STATE_IDLE, event);
733 				/* don't punish, capa negotiation */
734 				timer_set(peer, Timer_IdleHold, 0);
735 				peer->IdleHoldTime /= 2;
736 			} else
737 				change_state(peer, STATE_IDLE, event);
738 			break;
739 		default:
740 			session_notification(peer,
741 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
742 			change_state(peer, STATE_IDLE, event);
743 			break;
744 		}
745 		break;
746 	case STATE_OPENCONFIRM:
747 		switch (event) {
748 		case EVNT_START:
749 			/* ignore */
750 			break;
751 		case EVNT_STOP:
752 			change_state(peer, STATE_IDLE, event);
753 			break;
754 		case EVNT_CON_CLOSED:
755 		case EVNT_CON_FATAL:
756 			change_state(peer, STATE_IDLE, event);
757 			break;
758 		case EVNT_TIMER_HOLDTIME:
759 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
760 			    0, NULL, 0);
761 			change_state(peer, STATE_IDLE, event);
762 			break;
763 		case EVNT_TIMER_KEEPALIVE:
764 			session_keepalive(peer);
765 			break;
766 		case EVNT_RCVD_KEEPALIVE:
767 			start_timer_holdtime(peer);
768 			change_state(peer, STATE_ESTABLISHED, event);
769 			break;
770 		case EVNT_RCVD_NOTIFICATION:
771 			parse_notification(peer);
772 			change_state(peer, STATE_IDLE, event);
773 			break;
774 		default:
775 			session_notification(peer,
776 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
777 			change_state(peer, STATE_IDLE, event);
778 			break;
779 		}
780 		break;
781 	case STATE_ESTABLISHED:
782 		switch (event) {
783 		case EVNT_START:
784 			/* ignore */
785 			break;
786 		case EVNT_STOP:
787 			change_state(peer, STATE_IDLE, event);
788 			break;
789 		case EVNT_CON_CLOSED:
790 		case EVNT_CON_FATAL:
791 			change_state(peer, STATE_IDLE, event);
792 			break;
793 		case EVNT_TIMER_HOLDTIME:
794 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
795 			    0, NULL, 0);
796 			change_state(peer, STATE_IDLE, event);
797 			break;
798 		case EVNT_TIMER_KEEPALIVE:
799 			session_keepalive(peer);
800 			break;
801 		case EVNT_RCVD_KEEPALIVE:
802 			start_timer_holdtime(peer);
803 			break;
804 		case EVNT_RCVD_UPDATE:
805 			start_timer_holdtime(peer);
806 			if (parse_update(peer))
807 				change_state(peer, STATE_IDLE, event);
808 			else
809 				start_timer_holdtime(peer);
810 			break;
811 		case EVNT_RCVD_NOTIFICATION:
812 			parse_notification(peer);
813 			change_state(peer, STATE_IDLE, event);
814 			break;
815 		default:
816 			session_notification(peer,
817 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
818 			change_state(peer, STATE_IDLE, event);
819 			break;
820 		}
821 		break;
822 	}
823 }
824 
825 void
826 start_timer_holdtime(struct peer *peer)
827 {
828 	if (peer->holdtime > 0)
829 		timer_set(peer, Timer_Hold, peer->holdtime);
830 	else
831 		timer_stop(peer, Timer_Hold);
832 }
833 
834 void
835 start_timer_keepalive(struct peer *peer)
836 {
837 	if (peer->holdtime > 0)
838 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
839 	else
840 		timer_stop(peer, Timer_Keepalive);
841 }
842 
843 void
844 session_close_connection(struct peer *peer)
845 {
846 	if (peer->fd != -1) {
847 		close(peer->fd);
848 		pauseaccept = 0;
849 	}
850 	peer->fd = peer->wbuf.fd = -1;
851 }
852 
853 void
854 change_state(struct peer *peer, enum session_state state,
855     enum session_events event)
856 {
857 	struct mrt	*mrt;
858 
859 	switch (state) {
860 	case STATE_IDLE:
861 		/* carp demotion first. new peers handled in init_peer */
862 		if (peer->state == STATE_ESTABLISHED &&
863 		    peer->conf.demote_group[0] && !peer->demoted)
864 			session_demote(peer, +1);
865 
866 		/*
867 		 * try to write out what's buffered (maybe a notification),
868 		 * don't bother if it fails
869 		 */
870 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
871 			msgbuf_write(&peer->wbuf);
872 
873 		/*
874 		 * we must start the timer for the next EVNT_START
875 		 * if we are coming here due to an error and the
876 		 * session was not established successfully before, the
877 		 * starttimerinterval needs to be exponentially increased
878 		 */
879 		if (peer->IdleHoldTime == 0)
880 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
881 		peer->holdtime = INTERVAL_HOLD_INITIAL;
882 		timer_stop(peer, Timer_ConnectRetry);
883 		timer_stop(peer, Timer_Keepalive);
884 		timer_stop(peer, Timer_Hold);
885 		timer_stop(peer, Timer_IdleHold);
886 		timer_stop(peer, Timer_IdleHoldReset);
887 		session_close_connection(peer);
888 		msgbuf_clear(&peer->wbuf);
889 		free(peer->rbuf);
890 		peer->rbuf = NULL;
891 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
892 		if (!peer->template)
893 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
894 			    peer->conf.id, 0, -1, NULL, 0);
895 
896 		if (event != EVNT_STOP) {
897 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
898 			if (event != EVNT_NONE &&
899 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
900 				peer->IdleHoldTime *= 2;
901 		}
902 		if (peer->state == STATE_ESTABLISHED) {
903 			if (peer->capa.neg.grestart.restart == 2 &&
904 			    (event == EVNT_CON_CLOSED ||
905 			    event == EVNT_CON_FATAL)) {
906 				/* don't punish graceful restart */
907 				timer_set(peer, Timer_IdleHold, 0);
908 				peer->IdleHoldTime /= 2;
909 				session_graceful_restart(peer);
910 			} else
911 				session_down(peer);
912 		}
913 		if (peer->state == STATE_NONE ||
914 		    peer->state == STATE_ESTABLISHED) {
915 			/* initialize capability negotiation structures */
916 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
917 			    sizeof(peer->capa.ann));
918 			if (!peer->conf.announce_capa)
919 				session_capa_ann_none(peer);
920 		}
921 		break;
922 	case STATE_CONNECT:
923 		if (peer->state == STATE_ESTABLISHED &&
924 		    peer->capa.neg.grestart.restart == 2) {
925 			/* do the graceful restart dance */
926 			session_graceful_restart(peer);
927 			peer->holdtime = INTERVAL_HOLD_INITIAL;
928 			timer_stop(peer, Timer_ConnectRetry);
929 			timer_stop(peer, Timer_Keepalive);
930 			timer_stop(peer, Timer_Hold);
931 			timer_stop(peer, Timer_IdleHold);
932 			timer_stop(peer, Timer_IdleHoldReset);
933 			session_close_connection(peer);
934 			msgbuf_clear(&peer->wbuf);
935 			bzero(&peer->capa.peer, sizeof(peer->capa.peer));
936 		}
937 		break;
938 	case STATE_ACTIVE:
939 		if (!peer->template)
940 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
941 			    peer->conf.id, 0, -1, NULL, 0);
942 		break;
943 	case STATE_OPENSENT:
944 		break;
945 	case STATE_OPENCONFIRM:
946 		break;
947 	case STATE_ESTABLISHED:
948 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
949 		if (peer->demoted)
950 			timer_set(peer, Timer_CarpUndemote,
951 			    INTERVAL_HOLD_DEMOTED);
952 		session_up(peer);
953 		break;
954 	default:		/* something seriously fucked */
955 		break;
956 	}
957 
958 	log_statechange(peer, state, event);
959 	LIST_FOREACH(mrt, &mrthead, entry) {
960 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
961 			continue;
962 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
963 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
964 		    mrt->group_id == peer->conf.groupid))
965 			mrt_dump_state(mrt, peer->state, state, peer);
966 	}
967 	peer->prev_state = peer->state;
968 	peer->state = state;
969 }
970 
971 void
972 session_accept(int listenfd)
973 {
974 	int			 connfd;
975 	socklen_t		 len;
976 	struct sockaddr_storage	 cliaddr;
977 	struct peer		*p = NULL;
978 
979 	len = sizeof(cliaddr);
980 	if ((connfd = accept4(listenfd,
981 	    (struct sockaddr *)&cliaddr, &len,
982 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
983 		if (errno == ENFILE || errno == EMFILE)
984 			pauseaccept = getmonotime();
985 		else if (errno != EWOULDBLOCK && errno != EINTR &&
986 		    errno != ECONNABORTED)
987 			log_warn("accept");
988 		return;
989 	}
990 
991 	p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
992 
993 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
994 		if (timer_running(p, Timer_IdleHold, NULL)) {
995 			/* fast reconnect after clear */
996 			p->passive = 1;
997 			bgp_fsm(p, EVNT_START);
998 		}
999 	}
1000 
1001 	if (p != NULL &&
1002 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1003 		if (p->fd != -1) {
1004 			if (p->state == STATE_CONNECT)
1005 				session_close_connection(p);
1006 			else {
1007 				close(connfd);
1008 				return;
1009 			}
1010 		}
1011 
1012 open:
1013 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1014 			log_peer_warnx(&p->conf,
1015 			    "ipsec or md5sig configured but not available");
1016 			close(connfd);
1017 			return;
1018 		}
1019 
1020 		if (tcp_md5_check(connfd, p) == -1) {
1021 			close(connfd);
1022 			return;
1023 		}
1024 		p->fd = p->wbuf.fd = connfd;
1025 		if (session_setup_socket(p)) {
1026 			close(connfd);
1027 			return;
1028 		}
1029 		bgp_fsm(p, EVNT_CON_OPEN);
1030 		return;
1031 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1032 	    p->capa.neg.grestart.restart == 2) {
1033 		/* first do the graceful restart dance */
1034 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1035 		/* then do part of the open dance */
1036 		goto open;
1037 	} else {
1038 		log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1039 		close(connfd);
1040 	}
1041 }
1042 
1043 int
1044 session_connect(struct peer *peer)
1045 {
1046 	struct sockaddr		*sa;
1047 	socklen_t		 sa_len;
1048 
1049 	/*
1050 	 * we do not need the overcomplicated collision detection RFC 1771
1051 	 * describes; we simply make sure there is only ever one concurrent
1052 	 * tcp connection per peer.
1053 	 */
1054 	if (peer->fd != -1)
1055 		return (-1);
1056 
1057 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1058 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1059 		log_peer_warn(&peer->conf, "session_connect socket");
1060 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1061 		return (-1);
1062 	}
1063 
1064 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1065 		log_peer_warnx(&peer->conf,
1066 		    "ipsec or md5sig configured but not available");
1067 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1068 		return (-1);
1069 	}
1070 
1071 	tcp_md5_set(peer->fd, peer);
1072 	peer->wbuf.fd = peer->fd;
1073 
1074 	/* if update source is set we need to bind() */
1075 	if ((sa = addr2sa(&peer->conf.local_addr, 0, &sa_len)) != NULL) {
1076 		if (bind(peer->fd, sa, sa_len) == -1) {
1077 			log_peer_warn(&peer->conf, "session_connect bind");
1078 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1079 			return (-1);
1080 		}
1081 	}
1082 
1083 	if (session_setup_socket(peer)) {
1084 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1085 		return (-1);
1086 	}
1087 
1088 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT, &sa_len);
1089 	if (connect(peer->fd, sa, sa_len) == -1) {
1090 		if (errno != EINPROGRESS) {
1091 			if (errno != peer->lasterr)
1092 				log_peer_warn(&peer->conf, "connect");
1093 			peer->lasterr = errno;
1094 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1095 			return (-1);
1096 		}
1097 	} else
1098 		bgp_fsm(peer, EVNT_CON_OPEN);
1099 
1100 	return (0);
1101 }
1102 
1103 int
1104 session_setup_socket(struct peer *p)
1105 {
1106 	int	ttl = p->conf.distance;
1107 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1108 	int	nodelay = 1;
1109 	int	bsize;
1110 
1111 	switch (p->conf.remote_addr.aid) {
1112 	case AID_INET:
1113 		/* set precedence, see RFC 1771 appendix 5 */
1114 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1115 		    -1) {
1116 			log_peer_warn(&p->conf,
1117 			    "session_setup_socket setsockopt TOS");
1118 			return (-1);
1119 		}
1120 
1121 		if (p->conf.ebgp) {
1122 			/*
1123 			 * set TTL to foreign router's distance
1124 			 * 1=direct n=multihop with ttlsec, we always use 255
1125 			 */
1126 			if (p->conf.ttlsec) {
1127 				ttl = 256 - p->conf.distance;
1128 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1129 				    &ttl, sizeof(ttl)) == -1) {
1130 					log_peer_warn(&p->conf,
1131 					    "session_setup_socket: "
1132 					    "setsockopt MINTTL");
1133 					return (-1);
1134 				}
1135 				ttl = 255;
1136 			}
1137 
1138 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1139 			    sizeof(ttl)) == -1) {
1140 				log_peer_warn(&p->conf,
1141 				    "session_setup_socket setsockopt TTL");
1142 				return (-1);
1143 			}
1144 		}
1145 		break;
1146 	case AID_INET6:
1147 		if (p->conf.ebgp) {
1148 			/*
1149 			 * set hoplimit to foreign router's distance
1150 			 * 1=direct n=multihop with ttlsec, we always use 255
1151 			 */
1152 			if (p->conf.ttlsec) {
1153 				ttl = 256 - p->conf.distance;
1154 				if (setsockopt(p->fd, IPPROTO_IPV6,
1155 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1156 				    == -1) {
1157 					log_peer_warn(&p->conf,
1158 					    "session_setup_socket: "
1159 					    "setsockopt MINHOPCOUNT");
1160 					return (-1);
1161 				}
1162 				ttl = 255;
1163 			}
1164 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1165 			    &ttl, sizeof(ttl)) == -1) {
1166 				log_peer_warn(&p->conf,
1167 				    "session_setup_socket setsockopt hoplimit");
1168 				return (-1);
1169 			}
1170 		}
1171 		break;
1172 	}
1173 
1174 	/* set TCP_NODELAY */
1175 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1176 	    sizeof(nodelay)) == -1) {
1177 		log_peer_warn(&p->conf,
1178 		    "session_setup_socket setsockopt TCP_NODELAY");
1179 		return (-1);
1180 	}
1181 
1182 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1183 	if (p->conf.auth.method != AUTH_NONE) {
1184 		/* try to increase bufsize. no biggie if it fails */
1185 		bsize = 65535;
1186 		while (bsize > 8192 &&
1187 		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1188 		    sizeof(bsize)) == -1 && errno != EINVAL)
1189 			bsize /= 2;
1190 		bsize = 65535;
1191 		while (bsize > 8192 &&
1192 		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1193 		    sizeof(bsize)) == -1 && errno != EINVAL)
1194 			bsize /= 2;
1195 	}
1196 
1197 	return (0);
1198 }
1199 
1200 void
1201 session_tcp_established(struct peer *peer)
1202 {
1203 	struct sockaddr_storage	ss;
1204 	socklen_t		len;
1205 
1206 	len = sizeof(ss);
1207 	if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1208 		log_warn("getsockname");
1209 	sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1210 	len = sizeof(ss);
1211 	if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1212 		log_warn("getpeername");
1213 	sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1214 }
1215 
1216 void
1217 session_capa_ann_none(struct peer *peer)
1218 {
1219 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1220 }
1221 
1222 int
1223 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
1224 {
1225 	int errs = 0;
1226 
1227 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1228 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1229 	return (errs);
1230 }
1231 
1232 int
1233 session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
1234 {
1235 	u_int8_t		 safi, pad = 0;
1236 	u_int16_t		 afi;
1237 	int			 errs = 0;
1238 
1239 	if (aid2afi(aid, &afi, &safi) == -1)
1240 		fatalx("session_capa_add_mp: bad afi/safi pair");
1241 	afi = htons(afi);
1242 	errs += ibuf_add(buf, &afi, sizeof(afi));
1243 	errs += ibuf_add(buf, &pad, sizeof(pad));
1244 	errs += ibuf_add(buf, &safi, sizeof(safi));
1245 
1246 	return (errs);
1247 }
1248 
1249 int
1250 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid)
1251 {
1252 	u_int		errs = 0;
1253 	u_int16_t	afi;
1254 	u_int8_t	flags, safi;
1255 
1256 	if (aid2afi(aid, &afi, &safi)) {
1257 		log_warn("session_capa_add_gr: bad AID");
1258 		return (1);
1259 	}
1260 	if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING)
1261 		flags = CAPA_GR_F_FLAG;
1262 	else
1263 		flags = 0;
1264 
1265 	afi = htons(afi);
1266 	errs += ibuf_add(b, &afi, sizeof(afi));
1267 	errs += ibuf_add(b, &safi, sizeof(safi));
1268 	errs += ibuf_add(b, &flags, sizeof(flags));
1269 
1270 	return (errs);
1271 }
1272 
1273 struct bgp_msg *
1274 session_newmsg(enum msg_type msgtype, u_int16_t len)
1275 {
1276 	struct bgp_msg		*msg;
1277 	struct msg_header	 hdr;
1278 	struct ibuf		*buf;
1279 	int			 errs = 0;
1280 
1281 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1282 	hdr.len = htons(len);
1283 	hdr.type = msgtype;
1284 
1285 	if ((buf = ibuf_open(len)) == NULL)
1286 		return (NULL);
1287 
1288 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1289 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1290 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1291 
1292 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1293 		ibuf_free(buf);
1294 		return (NULL);
1295 	}
1296 
1297 	msg->buf = buf;
1298 	msg->type = msgtype;
1299 	msg->len = len;
1300 
1301 	return (msg);
1302 }
1303 
1304 int
1305 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1306 {
1307 	struct mrt		*mrt;
1308 
1309 	LIST_FOREACH(mrt, &mrthead, entry) {
1310 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1311 		    mrt->type == MRT_UPDATE_OUT)))
1312 			continue;
1313 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1314 		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1315 		    mrt->group_id == p->conf.groupid))
1316 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1317 	}
1318 
1319 	ibuf_close(&p->wbuf, msg->buf);
1320 	if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) {
1321 		if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1322 			log_peer_warn(&p->conf, "imsg_compose XOFF");
1323 		else
1324 			p->throttled = 1;
1325 	}
1326 
1327 	free(msg);
1328 	return (0);
1329 }
1330 
1331 void
1332 session_open(struct peer *p)
1333 {
1334 	struct bgp_msg		*buf;
1335 	struct ibuf		*opb;
1336 	struct msg_open		 msg;
1337 	u_int16_t		 len;
1338 	u_int8_t		 i, op_type, optparamlen = 0;
1339 	int			 errs = 0;
1340 	int			 mpcapa = 0;
1341 
1342 
1343 	if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1344 	    sizeof(optparamlen))) == NULL) {
1345 		bgp_fsm(p, EVNT_CON_FATAL);
1346 		return;
1347 	}
1348 
1349 	/* multiprotocol extensions, RFC 4760 */
1350 	for (i = 0; i < AID_MAX; i++)
1351 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1352 			errs += session_capa_add(opb, CAPA_MP, 4);
1353 			errs += session_capa_add_mp(opb, i);
1354 			mpcapa++;
1355 		}
1356 
1357 	/* route refresh, RFC 2918 */
1358 	if (p->capa.ann.refresh)	/* no data */
1359 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1360 
1361 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1362 	if (p->capa.ann.grestart.restart) {
1363 		int		rst = 0;
1364 		u_int16_t	hdr;
1365 		u_int8_t	grlen;
1366 
1367 		if (mpcapa) {
1368 			grlen = 2 + 4 * mpcapa;
1369 			for (i = 0; i < AID_MAX; i++) {
1370 				if (p->capa.neg.grestart.flags[i] &
1371 				    CAPA_GR_RESTARTING)
1372 					rst++;
1373 			}
1374 		} else {	/* AID_INET */
1375 			grlen = 2 + 4;
1376 			if (p->capa.neg.grestart.flags[AID_INET] &
1377 			    CAPA_GR_RESTARTING)
1378 				rst++;
1379 		}
1380 
1381 		hdr = conf->holdtime;		/* default timeout */
1382 		/* if client does graceful restart don't set R flag */
1383 		if (!rst)
1384 			hdr |= CAPA_GR_R_FLAG;
1385 		hdr = htons(hdr);
1386 
1387 		errs += session_capa_add(opb, CAPA_RESTART, grlen);
1388 		errs += ibuf_add(opb, &hdr, sizeof(hdr));
1389 
1390 		if (mpcapa) {
1391 			for (i = 0; i < AID_MAX; i++) {
1392 				if (p->capa.ann.mp[i]) {
1393 					errs += session_capa_add_gr(p, opb, i);
1394 				}
1395 			}
1396 		} else {	/* AID_INET */
1397 			errs += session_capa_add_gr(p, opb, AID_INET);
1398 		}
1399 	}
1400 
1401 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1402 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1403 		u_int32_t	nas;
1404 
1405 		nas = htonl(p->conf.local_as);
1406 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1407 		errs += ibuf_add(opb, &nas, sizeof(nas));
1408 	}
1409 
1410 	if (ibuf_size(opb))
1411 		optparamlen = ibuf_size(opb) + sizeof(op_type) +
1412 		    sizeof(optparamlen);
1413 
1414 	len = MSGSIZE_OPEN_MIN + optparamlen;
1415 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1416 		ibuf_free(opb);
1417 		bgp_fsm(p, EVNT_CON_FATAL);
1418 		return;
1419 	}
1420 
1421 	msg.version = 4;
1422 	msg.myas = htons(p->conf.local_short_as);
1423 	if (p->conf.holdtime)
1424 		msg.holdtime = htons(p->conf.holdtime);
1425 	else
1426 		msg.holdtime = htons(conf->holdtime);
1427 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1428 	msg.optparamlen = optparamlen;
1429 
1430 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1431 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1432 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1433 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1434 	errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1435 
1436 	if (optparamlen) {
1437 		op_type = OPT_PARAM_CAPABILITIES;
1438 		optparamlen = ibuf_size(opb);
1439 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1440 		errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1441 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1442 	}
1443 
1444 	ibuf_free(opb);
1445 
1446 	if (errs) {
1447 		ibuf_free(buf->buf);
1448 		free(buf);
1449 		bgp_fsm(p, EVNT_CON_FATAL);
1450 		return;
1451 	}
1452 
1453 	if (session_sendmsg(buf, p) == -1) {
1454 		bgp_fsm(p, EVNT_CON_FATAL);
1455 		return;
1456 	}
1457 
1458 	p->stats.msg_sent_open++;
1459 }
1460 
1461 void
1462 session_keepalive(struct peer *p)
1463 {
1464 	struct bgp_msg		*buf;
1465 
1466 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1467 	    session_sendmsg(buf, p) == -1) {
1468 		bgp_fsm(p, EVNT_CON_FATAL);
1469 		return;
1470 	}
1471 
1472 	start_timer_keepalive(p);
1473 	p->stats.msg_sent_keepalive++;
1474 }
1475 
1476 void
1477 session_update(u_int32_t peerid, void *data, size_t datalen)
1478 {
1479 	struct peer		*p;
1480 	struct bgp_msg		*buf;
1481 
1482 	if ((p = getpeerbyid(conf, peerid)) == NULL) {
1483 		log_warnx("no such peer: id=%u", peerid);
1484 		return;
1485 	}
1486 
1487 	if (p->state != STATE_ESTABLISHED)
1488 		return;
1489 
1490 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1491 		bgp_fsm(p, EVNT_CON_FATAL);
1492 		return;
1493 	}
1494 
1495 	if (ibuf_add(buf->buf, data, datalen)) {
1496 		ibuf_free(buf->buf);
1497 		free(buf);
1498 		bgp_fsm(p, EVNT_CON_FATAL);
1499 		return;
1500 	}
1501 
1502 	if (session_sendmsg(buf, p) == -1) {
1503 		bgp_fsm(p, EVNT_CON_FATAL);
1504 		return;
1505 	}
1506 
1507 	start_timer_keepalive(p);
1508 	p->stats.msg_sent_update++;
1509 }
1510 
1511 void
1512 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1513     void *data, ssize_t datalen)
1514 {
1515 	struct bgp_msg		*buf;
1516 	int			 errs = 0;
1517 
1518 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1519 		return;
1520 
1521 	log_notification(p, errcode, subcode, data, datalen, "sending");
1522 
1523 	if ((buf = session_newmsg(NOTIFICATION,
1524 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1525 		bgp_fsm(p, EVNT_CON_FATAL);
1526 		return;
1527 	}
1528 
1529 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1530 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1531 
1532 	if (datalen > 0)
1533 		errs += ibuf_add(buf->buf, data, datalen);
1534 
1535 	if (errs) {
1536 		ibuf_free(buf->buf);
1537 		free(buf);
1538 		bgp_fsm(p, EVNT_CON_FATAL);
1539 		return;
1540 	}
1541 
1542 	if (session_sendmsg(buf, p) == -1) {
1543 		bgp_fsm(p, EVNT_CON_FATAL);
1544 		return;
1545 	}
1546 
1547 	p->stats.msg_sent_notification++;
1548 	p->stats.last_sent_errcode = errcode;
1549 	p->stats.last_sent_suberr = subcode;
1550 }
1551 
1552 int
1553 session_neighbor_rrefresh(struct peer *p)
1554 {
1555 	u_int8_t	i;
1556 
1557 	if (!p->capa.peer.refresh)
1558 		return (-1);
1559 
1560 	for (i = 0; i < AID_MAX; i++) {
1561 		if (p->capa.peer.mp[i] != 0)
1562 			session_rrefresh(p, i);
1563 	}
1564 
1565 	return (0);
1566 }
1567 
1568 void
1569 session_rrefresh(struct peer *p, u_int8_t aid)
1570 {
1571 	struct bgp_msg		*buf;
1572 	int			 errs = 0;
1573 	u_int16_t		 afi;
1574 	u_int8_t		 safi, null8 = 0;
1575 
1576 	if (aid2afi(aid, &afi, &safi) == -1)
1577 		fatalx("session_rrefresh: bad afi/safi pair");
1578 
1579 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1580 		bgp_fsm(p, EVNT_CON_FATAL);
1581 		return;
1582 	}
1583 
1584 	afi = htons(afi);
1585 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1586 	errs += ibuf_add(buf->buf, &null8, sizeof(null8));
1587 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1588 
1589 	if (errs) {
1590 		ibuf_free(buf->buf);
1591 		free(buf);
1592 		bgp_fsm(p, EVNT_CON_FATAL);
1593 		return;
1594 	}
1595 
1596 	if (session_sendmsg(buf, p) == -1) {
1597 		bgp_fsm(p, EVNT_CON_FATAL);
1598 		return;
1599 	}
1600 
1601 	p->stats.msg_sent_rrefresh++;
1602 }
1603 
1604 int
1605 session_graceful_restart(struct peer *p)
1606 {
1607 	u_int8_t	i;
1608 
1609 	timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout);
1610 
1611 	for (i = 0; i < AID_MAX; i++) {
1612 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1613 			if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1614 			    &i, sizeof(i)) == -1)
1615 				return (-1);
1616 			log_peer_warnx(&p->conf,
1617 			    "graceful restart of %s, keeping routes",
1618 			    aid2str(i));
1619 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1620 		} else if (p->capa.neg.mp[i]) {
1621 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1622 			    &i, sizeof(i)) == -1)
1623 				return (-1);
1624 			log_peer_warnx(&p->conf,
1625 			    "graceful restart of %s, flushing routes",
1626 			    aid2str(i));
1627 		}
1628 	}
1629 	return (0);
1630 }
1631 
1632 int
1633 session_graceful_stop(struct peer *p)
1634 {
1635 	u_int8_t	i;
1636 
1637 	for (i = 0; i < AID_MAX; i++) {
1638 		/*
1639 		 * Only flush if the peer is restarting and the timeout fired.
1640 		 * In all other cases the session was already flushed when the
1641 		 * session went down or when the new open message was parsed.
1642 		 */
1643 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1644 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1645 			    "time-out, flushing", aid2str(i));
1646 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1647 			    &i, sizeof(i)) == -1)
1648 				return (-1);
1649 		}
1650 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1651 	}
1652 	return (0);
1653 }
1654 
1655 int
1656 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1657 {
1658 	ssize_t		n;
1659 	socklen_t	len;
1660 	int		error;
1661 
1662 	if (p->state == STATE_CONNECT) {
1663 		if (pfd->revents & POLLOUT) {
1664 			if (pfd->revents & POLLIN) {
1665 				/* error occurred */
1666 				len = sizeof(error);
1667 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1668 				    &error, &len) == -1 || error) {
1669 					if (error)
1670 						errno = error;
1671 					if (errno != p->lasterr) {
1672 						log_peer_warn(&p->conf,
1673 						    "socket error");
1674 						p->lasterr = errno;
1675 					}
1676 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1677 					return (1);
1678 				}
1679 			}
1680 			bgp_fsm(p, EVNT_CON_OPEN);
1681 			return (1);
1682 		}
1683 		if (pfd->revents & POLLHUP) {
1684 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1685 			return (1);
1686 		}
1687 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1688 			bgp_fsm(p, EVNT_CON_FATAL);
1689 			return (1);
1690 		}
1691 		return (0);
1692 	}
1693 
1694 	if (pfd->revents & POLLHUP) {
1695 		bgp_fsm(p, EVNT_CON_CLOSED);
1696 		return (1);
1697 	}
1698 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1699 		bgp_fsm(p, EVNT_CON_FATAL);
1700 		return (1);
1701 	}
1702 
1703 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1704 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1705 			if (error == 0)
1706 				log_peer_warnx(&p->conf, "Connection closed");
1707 			else if (error == -1)
1708 				log_peer_warn(&p->conf, "write error");
1709 			bgp_fsm(p, EVNT_CON_FATAL);
1710 			return (1);
1711 		}
1712 		if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
1713 			if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
1714 				log_peer_warn(&p->conf, "imsg_compose XON");
1715 			else
1716 				p->throttled = 0;
1717 		}
1718 		if (!(pfd->revents & POLLIN))
1719 			return (1);
1720 	}
1721 
1722 	if (p->rbuf && pfd->revents & POLLIN) {
1723 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1724 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1725 			if (errno != EINTR && errno != EAGAIN) {
1726 				log_peer_warn(&p->conf, "read error");
1727 				bgp_fsm(p, EVNT_CON_FATAL);
1728 			}
1729 			return (1);
1730 		}
1731 		if (n == 0) {	/* connection closed */
1732 			bgp_fsm(p, EVNT_CON_CLOSED);
1733 			return (1);
1734 		}
1735 
1736 		p->rbuf->wpos += n;
1737 		p->stats.last_read = time(NULL);
1738 		return (1);
1739 	}
1740 	return (0);
1741 }
1742 
1743 void
1744 session_process_msg(struct peer *p)
1745 {
1746 	struct mrt	*mrt;
1747 	ssize_t		rpos, av, left;
1748 	int		processed = 0;
1749 	u_int16_t	msglen;
1750 	u_int8_t	msgtype;
1751 
1752 	rpos = 0;
1753 	av = p->rbuf->wpos;
1754 	p->rpending = 0;
1755 
1756 	/*
1757 	 * session might drop to IDLE -> buffers deallocated
1758 	 * we MUST check rbuf != NULL before use
1759 	 */
1760 	for (;;) {
1761 		if (p->rbuf == NULL)
1762 			return;
1763 		if (rpos + MSGSIZE_HEADER > av)
1764 			break;
1765 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1766 		    &msgtype) == -1)
1767 			return;
1768 		if (rpos + msglen > av)
1769 			break;
1770 		p->rbuf->rptr = p->rbuf->buf + rpos;
1771 
1772 		/* dump to MRT as soon as we have a full packet */
1773 		LIST_FOREACH(mrt, &mrthead, entry) {
1774 			if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE &&
1775 			    mrt->type == MRT_UPDATE_IN)))
1776 				continue;
1777 			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1778 			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1779 			    mrt->group_id == p->conf.groupid))
1780 				mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p);
1781 		}
1782 
1783 		switch (msgtype) {
1784 		case OPEN:
1785 			bgp_fsm(p, EVNT_RCVD_OPEN);
1786 			p->stats.msg_rcvd_open++;
1787 			break;
1788 		case UPDATE:
1789 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1790 			p->stats.msg_rcvd_update++;
1791 			break;
1792 		case NOTIFICATION:
1793 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1794 			p->stats.msg_rcvd_notification++;
1795 			break;
1796 		case KEEPALIVE:
1797 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1798 			p->stats.msg_rcvd_keepalive++;
1799 			break;
1800 		case RREFRESH:
1801 			parse_refresh(p);
1802 			p->stats.msg_rcvd_rrefresh++;
1803 			break;
1804 		default:	/* cannot happen */
1805 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
1806 			    &msgtype, 1);
1807 			log_warnx("received message with unknown type %u",
1808 			    msgtype);
1809 			bgp_fsm(p, EVNT_CON_FATAL);
1810 		}
1811 		rpos += msglen;
1812 		if (++processed > MSG_PROCESS_LIMIT) {
1813 			p->rpending = 1;
1814 			break;
1815 		}
1816 	}
1817 
1818 	if (rpos < av) {
1819 		left = av - rpos;
1820 		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1821 		p->rbuf->wpos = left;
1822 	} else
1823 		p->rbuf->wpos = 0;
1824 }
1825 
1826 int
1827 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1828 {
1829 	u_char			*p;
1830 	u_int16_t		 olen;
1831 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1832 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1833 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1834 
1835 	/* caller MUST make sure we are getting 19 bytes! */
1836 	p = data;
1837 	if (memcmp(p, marker, sizeof(marker))) {
1838 		log_peer_warnx(&peer->conf, "sync error");
1839 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1840 		bgp_fsm(peer, EVNT_CON_FATAL);
1841 		return (-1);
1842 	}
1843 	p += MSGSIZE_HEADER_MARKER;
1844 
1845 	memcpy(&olen, p, 2);
1846 	*len = ntohs(olen);
1847 	p += 2;
1848 	memcpy(type, p, 1);
1849 
1850 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1851 		log_peer_warnx(&peer->conf,
1852 		    "received message: illegal length: %u byte", *len);
1853 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1854 		    &olen, sizeof(olen));
1855 		bgp_fsm(peer, EVNT_CON_FATAL);
1856 		return (-1);
1857 	}
1858 
1859 	switch (*type) {
1860 	case OPEN:
1861 		if (*len < MSGSIZE_OPEN_MIN) {
1862 			log_peer_warnx(&peer->conf,
1863 			    "received OPEN: illegal len: %u byte", *len);
1864 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1865 			    &olen, sizeof(olen));
1866 			bgp_fsm(peer, EVNT_CON_FATAL);
1867 			return (-1);
1868 		}
1869 		break;
1870 	case NOTIFICATION:
1871 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1872 			log_peer_warnx(&peer->conf,
1873 			    "received NOTIFICATION: illegal len: %u byte",
1874 			    *len);
1875 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1876 			    &olen, sizeof(olen));
1877 			bgp_fsm(peer, EVNT_CON_FATAL);
1878 			return (-1);
1879 		}
1880 		break;
1881 	case UPDATE:
1882 		if (*len < MSGSIZE_UPDATE_MIN) {
1883 			log_peer_warnx(&peer->conf,
1884 			    "received UPDATE: illegal len: %u byte", *len);
1885 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1886 			    &olen, sizeof(olen));
1887 			bgp_fsm(peer, EVNT_CON_FATAL);
1888 			return (-1);
1889 		}
1890 		break;
1891 	case KEEPALIVE:
1892 		if (*len != MSGSIZE_KEEPALIVE) {
1893 			log_peer_warnx(&peer->conf,
1894 			    "received KEEPALIVE: illegal len: %u byte", *len);
1895 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1896 			    &olen, sizeof(olen));
1897 			bgp_fsm(peer, EVNT_CON_FATAL);
1898 			return (-1);
1899 		}
1900 		break;
1901 	case RREFRESH:
1902 		if (*len != MSGSIZE_RREFRESH) {
1903 			log_peer_warnx(&peer->conf,
1904 			    "received RREFRESH: illegal len: %u byte", *len);
1905 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1906 			    &olen, sizeof(olen));
1907 			bgp_fsm(peer, EVNT_CON_FATAL);
1908 			return (-1);
1909 		}
1910 		break;
1911 	default:
1912 		log_peer_warnx(&peer->conf,
1913 		    "received msg with unknown type %u", *type);
1914 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1915 		    type, 1);
1916 		bgp_fsm(peer, EVNT_CON_FATAL);
1917 		return (-1);
1918 	}
1919 	return (0);
1920 }
1921 
1922 int
1923 parse_open(struct peer *peer)
1924 {
1925 	u_char		*p, *op_val;
1926 	u_int8_t	 version, rversion;
1927 	u_int16_t	 short_as, msglen;
1928 	u_int16_t	 holdtime, oholdtime, myholdtime;
1929 	u_int32_t	 as, bgpid;
1930 	u_int8_t	 optparamlen, plen;
1931 	u_int8_t	 op_type, op_len;
1932 
1933 	p = peer->rbuf->rptr;
1934 	p += MSGSIZE_HEADER_MARKER;
1935 	memcpy(&msglen, p, sizeof(msglen));
1936 	msglen = ntohs(msglen);
1937 
1938 	p = peer->rbuf->rptr;
1939 	p += MSGSIZE_HEADER;	/* header is already checked */
1940 
1941 	memcpy(&version, p, sizeof(version));
1942 	p += sizeof(version);
1943 
1944 	if (version != BGP_VERSION) {
1945 		log_peer_warnx(&peer->conf,
1946 		    "peer wants unrecognized version %u", version);
1947 		if (version > BGP_VERSION)
1948 			rversion = version - BGP_VERSION;
1949 		else
1950 			rversion = BGP_VERSION;
1951 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
1952 		    &rversion, sizeof(rversion));
1953 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1954 		return (-1);
1955 	}
1956 
1957 	memcpy(&short_as, p, sizeof(short_as));
1958 	p += sizeof(short_as);
1959 	as = peer->short_as = ntohs(short_as);
1960 	if (as == 0) {
1961 		log_peer_warnx(&peer->conf,
1962 		    "peer requests unacceptable AS %u", as);
1963 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS,
1964 		    NULL, 0);
1965 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1966 		return (-1);
1967 	}
1968 
1969 	memcpy(&oholdtime, p, sizeof(oholdtime));
1970 	p += sizeof(oholdtime);
1971 
1972 	holdtime = ntohs(oholdtime);
1973 	if (holdtime && holdtime < peer->conf.min_holdtime) {
1974 		log_peer_warnx(&peer->conf,
1975 		    "peer requests unacceptable holdtime %u", holdtime);
1976 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
1977 		    NULL, 0);
1978 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1979 		return (-1);
1980 	}
1981 
1982 	myholdtime = peer->conf.holdtime;
1983 	if (!myholdtime)
1984 		myholdtime = conf->holdtime;
1985 	if (holdtime < myholdtime)
1986 		peer->holdtime = holdtime;
1987 	else
1988 		peer->holdtime = myholdtime;
1989 
1990 	memcpy(&bgpid, p, sizeof(bgpid));
1991 	p += sizeof(bgpid);
1992 
1993 	/* check bgpid for validity - just disallow 0 */
1994 	if (ntohl(bgpid) == 0) {
1995 		log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable",
1996 		    ntohl(bgpid));
1997 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
1998 		    NULL, 0);
1999 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2000 		return (-1);
2001 	}
2002 	peer->remote_bgpid = bgpid;
2003 
2004 	memcpy(&optparamlen, p, sizeof(optparamlen));
2005 	p += sizeof(optparamlen);
2006 
2007 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
2008 			log_peer_warnx(&peer->conf,
2009 			    "corrupt OPEN message received: length mismatch");
2010 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2011 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2012 			return (-1);
2013 	}
2014 
2015 	plen = optparamlen;
2016 	while (plen > 0) {
2017 		if (plen < 2) {
2018 			log_peer_warnx(&peer->conf,
2019 			    "corrupt OPEN message received, len wrong");
2020 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2021 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2022 			return (-1);
2023 		}
2024 		memcpy(&op_type, p, sizeof(op_type));
2025 		p += sizeof(op_type);
2026 		plen -= sizeof(op_type);
2027 		memcpy(&op_len, p, sizeof(op_len));
2028 		p += sizeof(op_len);
2029 		plen -= sizeof(op_len);
2030 		if (op_len > 0) {
2031 			if (plen < op_len) {
2032 				log_peer_warnx(&peer->conf,
2033 				    "corrupt OPEN message received, len wrong");
2034 				session_notification(peer, ERR_OPEN, 0,
2035 				    NULL, 0);
2036 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2037 				return (-1);
2038 			}
2039 			op_val = p;
2040 			p += op_len;
2041 			plen -= op_len;
2042 		} else
2043 			op_val = NULL;
2044 
2045 		switch (op_type) {
2046 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2047 			if (parse_capabilities(peer, op_val, op_len,
2048 			    &as) == -1) {
2049 				session_notification(peer, ERR_OPEN, 0,
2050 				    NULL, 0);
2051 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2052 				return (-1);
2053 			}
2054 			break;
2055 		case OPT_PARAM_AUTH:			/* deprecated */
2056 		default:
2057 			/*
2058 			 * unsupported type
2059 			 * the RFCs tell us to leave the data section empty
2060 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2061 			 * How the peer should know _which_ optional parameter
2062 			 * we don't support is beyond me.
2063 			 */
2064 			log_peer_warnx(&peer->conf,
2065 			    "received OPEN message with unsupported optional "
2066 			    "parameter: type %u", op_type);
2067 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2068 				NULL, 0);
2069 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2070 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
2071 			peer->IdleHoldTime /= 2;
2072 			return (-1);
2073 		}
2074 	}
2075 
2076 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2077 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2078 		peer->conf.remote_as = as;
2079 		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2080 		if (!peer->conf.ebgp)
2081 			/* force enforce_as off for iBGP sessions */
2082 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2083 	}
2084 
2085 	if (peer->conf.remote_as != as) {
2086 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2087 		    log_as(as));
2088 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2089 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2090 		return (-1);
2091 	}
2092 
2093 	if (capa_neg_calc(peer) == -1) {
2094 		log_peer_warnx(&peer->conf,
2095 		    "capability negotiation calculation failed");
2096 		session_notification(peer, ERR_OPEN, 0, NULL, 0);
2097 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2098 		return (-1);
2099 	}
2100 
2101 	return (0);
2102 }
2103 
2104 int
2105 parse_update(struct peer *peer)
2106 {
2107 	u_char		*p;
2108 	u_int16_t	 datalen;
2109 
2110 	/*
2111 	 * we pass the message verbatim to the rde.
2112 	 * in case of errors the whole session is reset with a
2113 	 * notification anyway, we only need to know the peer
2114 	 */
2115 	p = peer->rbuf->rptr;
2116 	p += MSGSIZE_HEADER_MARKER;
2117 	memcpy(&datalen, p, sizeof(datalen));
2118 	datalen = ntohs(datalen);
2119 
2120 	p = peer->rbuf->rptr;
2121 	p += MSGSIZE_HEADER;	/* header is already checked */
2122 	datalen -= MSGSIZE_HEADER;
2123 
2124 	if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1)
2125 		return (-1);
2126 
2127 	return (0);
2128 }
2129 
2130 int
2131 parse_refresh(struct peer *peer)
2132 {
2133 	u_char		*p;
2134 	u_int16_t	 afi;
2135 	u_int8_t	 aid, safi;
2136 
2137 	p = peer->rbuf->rptr;
2138 	p += MSGSIZE_HEADER;	/* header is already checked */
2139 
2140 	/*
2141 	 * We could check if we actually announced the capability but
2142 	 * as long as the message is correctly encoded we don't care.
2143 	 */
2144 
2145 	/* afi, 2 byte */
2146 	memcpy(&afi, p, sizeof(afi));
2147 	afi = ntohs(afi);
2148 	p += 2;
2149 	/* reserved, 1 byte */
2150 	p += 1;
2151 	/* safi, 1 byte */
2152 	memcpy(&safi, p, sizeof(safi));
2153 
2154 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2155 	if (afi2aid(afi, safi, &aid) == -1) {
2156 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2157 		    "invalid afi/safi pair");
2158 		return (0);
2159 	}
2160 
2161 	if (imsg_rde(IMSG_REFRESH, peer->conf.id, &aid, sizeof(aid)) == -1)
2162 		return (-1);
2163 
2164 	return (0);
2165 }
2166 
2167 int
2168 parse_notification(struct peer *peer)
2169 {
2170 	u_char		*p;
2171 	u_int16_t	 datalen;
2172 	u_int8_t	 errcode;
2173 	u_int8_t	 subcode;
2174 	u_int8_t	 capa_code;
2175 	u_int8_t	 capa_len;
2176 	size_t		 shutcomm_len;
2177 	u_int8_t	 i;
2178 
2179 	/* just log */
2180 	p = peer->rbuf->rptr;
2181 	p += MSGSIZE_HEADER_MARKER;
2182 	memcpy(&datalen, p, sizeof(datalen));
2183 	datalen = ntohs(datalen);
2184 
2185 	p = peer->rbuf->rptr;
2186 	p += MSGSIZE_HEADER;	/* header is already checked */
2187 	datalen -= MSGSIZE_HEADER;
2188 
2189 	memcpy(&errcode, p, sizeof(errcode));
2190 	p += sizeof(errcode);
2191 	datalen -= sizeof(errcode);
2192 
2193 	memcpy(&subcode, p, sizeof(subcode));
2194 	p += sizeof(subcode);
2195 	datalen -= sizeof(subcode);
2196 
2197 	log_notification(peer, errcode, subcode, p, datalen, "received");
2198 	peer->errcnt++;
2199 
2200 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2201 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2202 			log_peer_warnx(&peer->conf, "received \"unsupported "
2203 			    "capability\" notification without data part, "
2204 			    "disabling capability announcements altogether");
2205 			session_capa_ann_none(peer);
2206 		}
2207 
2208 		while (datalen > 0) {
2209 			if (datalen < 2) {
2210 				log_peer_warnx(&peer->conf,
2211 				    "parse_notification: "
2212 				    "expect len >= 2, len is %u", datalen);
2213 				return (-1);
2214 			}
2215 			memcpy(&capa_code, p, sizeof(capa_code));
2216 			p += sizeof(capa_code);
2217 			datalen -= sizeof(capa_code);
2218 			memcpy(&capa_len, p, sizeof(capa_len));
2219 			p += sizeof(capa_len);
2220 			datalen -= sizeof(capa_len);
2221 			if (datalen < capa_len) {
2222 				log_peer_warnx(&peer->conf,
2223 				    "parse_notification: capa_len %u exceeds "
2224 				    "remaining msg length %u", capa_len,
2225 				    datalen);
2226 				return (-1);
2227 			}
2228 			p += capa_len;
2229 			datalen -= capa_len;
2230 			switch (capa_code) {
2231 			case CAPA_MP:
2232 				for (i = 0; i < AID_MAX; i++)
2233 					peer->capa.ann.mp[i] = 0;
2234 				log_peer_warnx(&peer->conf,
2235 				    "disabling multiprotocol capability");
2236 				break;
2237 			case CAPA_REFRESH:
2238 				peer->capa.ann.refresh = 0;
2239 				log_peer_warnx(&peer->conf,
2240 				    "disabling route refresh capability");
2241 				break;
2242 			case CAPA_RESTART:
2243 				peer->capa.ann.grestart.restart = 0;
2244 				log_peer_warnx(&peer->conf,
2245 				    "disabling restart capability");
2246 				break;
2247 			case CAPA_AS4BYTE:
2248 				peer->capa.ann.as4byte = 0;
2249 				log_peer_warnx(&peer->conf,
2250 				    "disabling 4-byte AS num capability");
2251 				break;
2252 			default:	/* should not happen... */
2253 				log_peer_warnx(&peer->conf, "received "
2254 				    "\"unsupported capability\" notification "
2255 				    "for unknown capability %u, disabling "
2256 				    "capability announcements altogether",
2257 				    capa_code);
2258 				session_capa_ann_none(peer);
2259 				break;
2260 			}
2261 		}
2262 
2263 		return (1);
2264 	}
2265 
2266 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2267 		session_capa_ann_none(peer);
2268 		return (1);
2269 	}
2270 
2271 	if (errcode == ERR_CEASE &&
2272 	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2273 	     subcode == ERR_CEASE_ADMIN_RESET)) {
2274 		if (datalen > 1) {
2275 			shutcomm_len = *p++;
2276 			datalen--;
2277 			if (datalen < shutcomm_len) {
2278 			    log_peer_warnx(&peer->conf,
2279 				"received truncated shutdown reason");
2280 			    return (0);
2281 			}
2282 			if (shutcomm_len > SHUT_COMM_LEN - 1) {
2283 			    log_peer_warnx(&peer->conf,
2284 				"received overly long shutdown reason");
2285 			    return (0);
2286 			}
2287 			memcpy(peer->stats.last_shutcomm, p, shutcomm_len);
2288 			peer->stats.last_shutcomm[shutcomm_len] = '\0';
2289 			log_peer_warnx(&peer->conf,
2290 			    "received shutdown reason: \"%s\"",
2291 			    log_shutcomm(peer->stats.last_shutcomm));
2292 			p += shutcomm_len;
2293 			datalen -= shutcomm_len;
2294 		}
2295 	}
2296 
2297 	return (0);
2298 }
2299 
2300 int
2301 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2302 {
2303 	u_char		*capa_val;
2304 	u_int32_t	 remote_as;
2305 	u_int16_t	 len;
2306 	u_int16_t	 afi;
2307 	u_int16_t	 gr_header;
2308 	u_int8_t	 safi;
2309 	u_int8_t	 aid;
2310 	u_int8_t	 gr_flags;
2311 	u_int8_t	 capa_code;
2312 	u_int8_t	 capa_len;
2313 	u_int8_t	 i;
2314 
2315 	len = dlen;
2316 	while (len > 0) {
2317 		if (len < 2) {
2318 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2319 			    "length: %u, too short", len);
2320 			return (-1);
2321 		}
2322 		memcpy(&capa_code, d, sizeof(capa_code));
2323 		d += sizeof(capa_code);
2324 		len -= sizeof(capa_code);
2325 		memcpy(&capa_len, d, sizeof(capa_len));
2326 		d += sizeof(capa_len);
2327 		len -= sizeof(capa_len);
2328 		if (capa_len > 0) {
2329 			if (len < capa_len) {
2330 				log_peer_warnx(&peer->conf,
2331 				    "Bad capabilities attr length: "
2332 				    "len %u smaller than capa_len %u",
2333 				    len, capa_len);
2334 				return (-1);
2335 			}
2336 			capa_val = d;
2337 			d += capa_len;
2338 			len -= capa_len;
2339 		} else
2340 			capa_val = NULL;
2341 
2342 		switch (capa_code) {
2343 		case CAPA_MP:			/* RFC 4760 */
2344 			if (capa_len != 4) {
2345 				log_peer_warnx(&peer->conf,
2346 				    "Bad multi protocol capability length: "
2347 				    "%u", capa_len);
2348 				break;
2349 			}
2350 			memcpy(&afi, capa_val, sizeof(afi));
2351 			afi = ntohs(afi);
2352 			memcpy(&safi, capa_val + 3, sizeof(safi));
2353 			if (afi2aid(afi, safi, &aid) == -1) {
2354 				log_peer_warnx(&peer->conf,
2355 				    "Received multi protocol capability: "
2356 				    " unknown AFI %u, safi %u pair",
2357 				    afi, safi);
2358 				break;
2359 			}
2360 			peer->capa.peer.mp[aid] = 1;
2361 			break;
2362 		case CAPA_REFRESH:
2363 			peer->capa.peer.refresh = 1;
2364 			break;
2365 		case CAPA_RESTART:
2366 			if (capa_len == 2) {
2367 				/* peer only supports EoR marker */
2368 				peer->capa.peer.grestart.restart = 1;
2369 				peer->capa.peer.grestart.timeout = 0;
2370 				break;
2371 			} else if (capa_len % 4 != 2) {
2372 				log_peer_warnx(&peer->conf,
2373 				    "Bad graceful restart capability length: "
2374 				    "%u", capa_len);
2375 				peer->capa.peer.grestart.restart = 0;
2376 				peer->capa.peer.grestart.timeout = 0;
2377 				break;
2378 			}
2379 
2380 			memcpy(&gr_header, capa_val, sizeof(gr_header));
2381 			gr_header = ntohs(gr_header);
2382 			peer->capa.peer.grestart.timeout =
2383 			    gr_header & CAPA_GR_TIMEMASK;
2384 			if (peer->capa.peer.grestart.timeout == 0) {
2385 				log_peer_warnx(&peer->conf, "Received "
2386 				    "graceful restart timeout is zero");
2387 				peer->capa.peer.grestart.restart = 0;
2388 				break;
2389 			}
2390 
2391 			for (i = 2; i <= capa_len - 4; i += 4) {
2392 				memcpy(&afi, capa_val + i, sizeof(afi));
2393 				afi = ntohs(afi);
2394 				memcpy(&safi, capa_val + i + 2, sizeof(safi));
2395 				if (afi2aid(afi, safi, &aid) == -1) {
2396 					log_peer_warnx(&peer->conf,
2397 					    "Received graceful restart capa: "
2398 					    " unknown AFI %u, safi %u pair",
2399 					    afi, safi);
2400 					continue;
2401 				}
2402 				memcpy(&gr_flags, capa_val + i + 3,
2403 				    sizeof(gr_flags));
2404 				peer->capa.peer.grestart.flags[aid] |=
2405 				    CAPA_GR_PRESENT;
2406 				if (gr_flags & CAPA_GR_F_FLAG)
2407 					peer->capa.peer.grestart.flags[aid] |=
2408 					    CAPA_GR_FORWARD;
2409 				if (gr_header & CAPA_GR_R_FLAG)
2410 					peer->capa.peer.grestart.flags[aid] |=
2411 					    CAPA_GR_RESTART;
2412 				peer->capa.peer.grestart.restart = 2;
2413 			}
2414 			break;
2415 		case CAPA_AS4BYTE:
2416 			if (capa_len != 4) {
2417 				log_peer_warnx(&peer->conf,
2418 				    "Bad AS4BYTE capability length: "
2419 				    "%u", capa_len);
2420 				peer->capa.peer.as4byte = 0;
2421 				break;
2422 			}
2423 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2424 			*as = ntohl(remote_as);
2425 			if (*as == 0) {
2426 				log_peer_warnx(&peer->conf,
2427 				    "peer requests unacceptable AS %u", *as);
2428 				session_notification(peer, ERR_OPEN,
2429 				    ERR_OPEN_AS, NULL, 0);
2430 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2431 				return (-1);
2432 			}
2433 			peer->capa.peer.as4byte = 1;
2434 			break;
2435 		default:
2436 			break;
2437 		}
2438 	}
2439 
2440 	return (0);
2441 }
2442 
2443 int
2444 capa_neg_calc(struct peer *p)
2445 {
2446 	u_int8_t	i, hasmp = 0;
2447 
2448 	/* refresh: does not realy matter here, use peer setting */
2449 	p->capa.neg.refresh = p->capa.peer.refresh;
2450 
2451 	/* as4byte: both side must announce capability */
2452 	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2453 		p->capa.neg.as4byte = 1;
2454 	else
2455 		p->capa.neg.as4byte = 0;
2456 
2457 	/* MP: both side must announce capability */
2458 	for (i = 0; i < AID_MAX; i++) {
2459 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2460 			p->capa.neg.mp[i] = 1;
2461 			hasmp = 1;
2462 		} else
2463 			p->capa.neg.mp[i] = 0;
2464 	}
2465 	/* if no MP capability present default to IPv4 unicast mode */
2466 	if (!hasmp)
2467 		p->capa.neg.mp[AID_INET] = 1;
2468 
2469 	/*
2470 	 * graceful restart: only the peer capabilities are of interest here.
2471 	 * It is necessary to compare the new values with the previous ones
2472 	 * and act acordingly. AFI/SAFI that are not part in the MP capability
2473 	 * are treated as not being present.
2474 	 */
2475 
2476 	for (i = 0; i < AID_MAX; i++) {
2477 		int8_t	negflags;
2478 
2479 		/* disable GR if the AFI/SAFI is not present */
2480 		if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2481 		    p->capa.neg.mp[i] == 0)
2482 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2483 		/* look at current GR state and decide what to do */
2484 		negflags = p->capa.neg.grestart.flags[i];
2485 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2486 		if (negflags & CAPA_GR_RESTARTING) {
2487 			if (!(p->capa.peer.grestart.flags[i] &
2488 			    CAPA_GR_FORWARD)) {
2489 				if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2490 				    &i, sizeof(i)) == -1)
2491 					return (-1);
2492 				log_peer_warnx(&p->conf, "graceful restart of "
2493 				    "%s, not restarted, flushing", aid2str(i));
2494 			} else
2495 				p->capa.neg.grestart.flags[i] |=
2496 				    CAPA_GR_RESTARTING;
2497 		}
2498 	}
2499 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2500 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2501 
2502 	return (0);
2503 }
2504 
2505 void
2506 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2507 {
2508 	struct imsg		 imsg;
2509 	struct mrt		 xmrt;
2510 	struct mrt		*mrt;
2511 	struct imsgbuf		*i;
2512 	struct peer		*p;
2513 	struct listen_addr	*la, *nla;
2514 	struct kif		*kif;
2515 	u_char			*data;
2516 	int			 n, fd, depend_ok, restricted;
2517 	u_int8_t		 aid, errcode, subcode;
2518 
2519 	while (ibuf) {
2520 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2521 			fatal("session_dispatch_imsg: imsg_get error");
2522 
2523 		if (n == 0)
2524 			break;
2525 
2526 		switch (imsg.hdr.type) {
2527 		case IMSG_SOCKET_CONN:
2528 		case IMSG_SOCKET_CONN_CTL:
2529 			if (idx != PFD_PIPE_MAIN)
2530 				fatalx("reconf request not from parent");
2531 			if ((fd = imsg.fd) == -1) {
2532 				log_warnx("expected to receive imsg fd to "
2533 				    "RDE but didn't receive any");
2534 				break;
2535 			}
2536 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2537 				fatal(NULL);
2538 			imsg_init(i, fd);
2539 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
2540 				if (ibuf_rde) {
2541 					log_warnx("Unexpected imsg connection "
2542 					    "to RDE received");
2543 					msgbuf_clear(&ibuf_rde->w);
2544 					free(ibuf_rde);
2545 				}
2546 				ibuf_rde = i;
2547 			} else {
2548 				if (ibuf_rde_ctl) {
2549 					log_warnx("Unexpected imsg ctl "
2550 					    "connection to RDE received");
2551 					msgbuf_clear(&ibuf_rde_ctl->w);
2552 					free(ibuf_rde_ctl);
2553 				}
2554 				ibuf_rde_ctl = i;
2555 			}
2556 			break;
2557 		case IMSG_RECONF_CONF:
2558 			if (idx != PFD_PIPE_MAIN)
2559 				fatalx("reconf request not from parent");
2560 			nconf = new_config();
2561 
2562 			copy_config(nconf, imsg.data);
2563 			pending_reconf = 1;
2564 			break;
2565 		case IMSG_RECONF_PEER:
2566 			if (idx != PFD_PIPE_MAIN)
2567 				fatalx("reconf request not from parent");
2568 			if ((p = calloc(1, sizeof(struct peer))) == NULL)
2569 				fatal("new_peer");
2570 			memcpy(&p->conf, imsg.data, sizeof(struct peer_config));
2571 			p->state = p->prev_state = STATE_NONE;
2572 			p->reconf_action = RECONF_REINIT;
2573 			if (RB_INSERT(peer_head, &nconf->peers, p) != NULL)
2574 				fatalx("%s: peer tree is corrupt", __func__);
2575 			break;
2576 		case IMSG_RECONF_LISTENER:
2577 			if (idx != PFD_PIPE_MAIN)
2578 				fatalx("reconf request not from parent");
2579 			if (nconf == NULL)
2580 				fatalx("IMSG_RECONF_LISTENER but no config");
2581 			nla = imsg.data;
2582 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2583 				if (!la_cmp(la, nla))
2584 					break;
2585 
2586 			if (la == NULL) {
2587 				if (nla->reconf != RECONF_REINIT)
2588 					fatalx("king bula sez: "
2589 					    "expected REINIT");
2590 
2591 				if ((nla->fd = imsg.fd) == -1)
2592 					log_warnx("expected to receive fd for "
2593 					    "%s but didn't receive any",
2594 					    log_sockaddr((struct sockaddr *)
2595 					    &nla->sa, nla->sa_len));
2596 
2597 				la = calloc(1, sizeof(struct listen_addr));
2598 				if (la == NULL)
2599 					fatal(NULL);
2600 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2601 				la->flags = nla->flags;
2602 				la->fd = nla->fd;
2603 				la->reconf = RECONF_REINIT;
2604 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2605 				    entry);
2606 			} else {
2607 				if (nla->reconf != RECONF_KEEP)
2608 					fatalx("king bula sez: expected KEEP");
2609 				la->reconf = RECONF_KEEP;
2610 			}
2611 
2612 			break;
2613 		case IMSG_RECONF_CTRL:
2614 			if (idx != PFD_PIPE_MAIN)
2615 				fatalx("reconf request not from parent");
2616 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2617 			    sizeof(restricted))
2618 				fatalx("IFINFO imsg with wrong len");
2619 			memcpy(&restricted, imsg.data, sizeof(restricted));
2620 			if (imsg.fd == -1) {
2621 				log_warnx("expected to receive fd for control "
2622 				    "socket but didn't receive any");
2623 				break;
2624 			}
2625 			if (restricted) {
2626 				control_shutdown(rcsock);
2627 				rcsock = imsg.fd;
2628 			} else {
2629 				control_shutdown(csock);
2630 				csock = imsg.fd;
2631 			}
2632 			break;
2633 		case IMSG_RECONF_DRAIN:
2634 			if (idx != PFD_PIPE_MAIN)
2635 				fatalx("reconf request not from parent");
2636 			imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
2637 			    -1, NULL, 0);
2638 			break;
2639 		case IMSG_RECONF_DONE:
2640 			if (idx != PFD_PIPE_MAIN)
2641 				fatalx("reconf request not from parent");
2642 			if (nconf == NULL)
2643 				fatalx("got IMSG_RECONF_DONE but no config");
2644 			copy_config(conf, nconf);
2645 			merge_peers(conf, nconf);
2646 
2647 			/* delete old listeners */
2648 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2649 			    la = nla) {
2650 				nla = TAILQ_NEXT(la, entry);
2651 				if (la->reconf == RECONF_NONE) {
2652 					log_info("not listening on %s any more",
2653 					    log_sockaddr((struct sockaddr *)
2654 					    &la->sa, la->sa_len));
2655 					TAILQ_REMOVE(conf->listen_addrs, la,
2656 					    entry);
2657 					close(la->fd);
2658 					free(la);
2659 				}
2660 			}
2661 
2662 			/* add new listeners */
2663 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2664 			    NULL) {
2665 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2666 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2667 				    entry);
2668 			}
2669 
2670 			setup_listeners(listener_cnt);
2671 			free_config(nconf);
2672 			nconf = NULL;
2673 			pending_reconf = 0;
2674 			log_info("SE reconfigured");
2675 			imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
2676 			    -1, NULL, 0);
2677 			break;
2678 		case IMSG_IFINFO:
2679 			if (idx != PFD_PIPE_MAIN)
2680 				fatalx("IFINFO message not from parent");
2681 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2682 			    sizeof(struct kif))
2683 				fatalx("IFINFO imsg with wrong len");
2684 			kif = imsg.data;
2685 			depend_ok = kif->depend_state;
2686 
2687 			RB_FOREACH(p, peer_head, &conf->peers)
2688 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2689 					if (depend_ok && !p->depend_ok) {
2690 						p->depend_ok = depend_ok;
2691 						bgp_fsm(p, EVNT_START);
2692 					} else if (!depend_ok && p->depend_ok) {
2693 						p->depend_ok = depend_ok;
2694 						session_stop(p,
2695 						    ERR_CEASE_OTHER_CHANGE);
2696 					}
2697 				}
2698 			break;
2699 		case IMSG_MRT_OPEN:
2700 		case IMSG_MRT_REOPEN:
2701 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2702 			    sizeof(struct mrt)) {
2703 				log_warnx("wrong imsg len");
2704 				break;
2705 			}
2706 
2707 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2708 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2709 				log_warnx("expected to receive fd for mrt dump "
2710 				    "but didn't receive any");
2711 
2712 			mrt = mrt_get(&mrthead, &xmrt);
2713 			if (mrt == NULL) {
2714 				/* new dump */
2715 				mrt = calloc(1, sizeof(struct mrt));
2716 				if (mrt == NULL)
2717 					fatal("session_dispatch_imsg");
2718 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2719 				TAILQ_INIT(&mrt->wbuf.bufs);
2720 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2721 			} else {
2722 				/* old dump reopened */
2723 				close(mrt->wbuf.fd);
2724 				mrt->wbuf.fd = xmrt.wbuf.fd;
2725 			}
2726 			break;
2727 		case IMSG_MRT_CLOSE:
2728 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2729 			    sizeof(struct mrt)) {
2730 				log_warnx("wrong imsg len");
2731 				break;
2732 			}
2733 
2734 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2735 			mrt = mrt_get(&mrthead, &xmrt);
2736 			if (mrt != NULL)
2737 				mrt_done(mrt);
2738 			break;
2739 		case IMSG_CTL_KROUTE:
2740 		case IMSG_CTL_KROUTE_ADDR:
2741 		case IMSG_CTL_SHOW_NEXTHOP:
2742 		case IMSG_CTL_SHOW_INTERFACE:
2743 		case IMSG_CTL_SHOW_FIB_TABLES:
2744 			if (idx != PFD_PIPE_MAIN)
2745 				fatalx("ctl kroute request not from parent");
2746 			control_imsg_relay(&imsg);
2747 			break;
2748 		case IMSG_CTL_SHOW_RIB:
2749 		case IMSG_CTL_SHOW_RIB_PREFIX:
2750 		case IMSG_CTL_SHOW_RIB_COMMUNITIES:
2751 		case IMSG_CTL_SHOW_RIB_ATTR:
2752 		case IMSG_CTL_SHOW_RIB_MEM:
2753 		case IMSG_CTL_SHOW_RIB_HASH:
2754 		case IMSG_CTL_SHOW_NETWORK:
2755 		case IMSG_CTL_SHOW_NEIGHBOR:
2756 			if (idx != PFD_PIPE_ROUTE_CTL)
2757 				fatalx("ctl rib request not from RDE");
2758 			control_imsg_relay(&imsg);
2759 			break;
2760 		case IMSG_CTL_END:
2761 		case IMSG_CTL_RESULT:
2762 			control_imsg_relay(&imsg);
2763 			break;
2764 		case IMSG_UPDATE:
2765 			if (idx != PFD_PIPE_ROUTE)
2766 				fatalx("update request not from RDE");
2767 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2768 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2769 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2770 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2771 				log_warnx("RDE sent invalid update");
2772 			else
2773 				session_update(imsg.hdr.peerid, imsg.data,
2774 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2775 			break;
2776 		case IMSG_UPDATE_ERR:
2777 			if (idx != PFD_PIPE_ROUTE)
2778 				fatalx("update request not from RDE");
2779 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2780 				log_warnx("RDE sent invalid notification");
2781 				break;
2782 			}
2783 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
2784 				log_warnx("no such peer: id=%u",
2785 				    imsg.hdr.peerid);
2786 				break;
2787 			}
2788 			data = imsg.data;
2789 			errcode = *data++;
2790 			subcode = *data++;
2791 
2792 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2793 				data = NULL;
2794 
2795 			session_notification(p, errcode, subcode,
2796 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2797 			switch (errcode) {
2798 			case ERR_CEASE:
2799 				switch (subcode) {
2800 				case ERR_CEASE_MAX_PREFIX:
2801 					bgp_fsm(p, EVNT_STOP);
2802 					if (p->conf.max_prefix_restart)
2803 						timer_set(p, Timer_IdleHold, 60 *
2804 						    p->conf.max_prefix_restart);
2805 					break;
2806 				default:
2807 					bgp_fsm(p, EVNT_CON_FATAL);
2808 					break;
2809 				}
2810 				break;
2811 			default:
2812 				bgp_fsm(p, EVNT_CON_FATAL);
2813 				break;
2814 			}
2815 			break;
2816 		case IMSG_SESSION_RESTARTED:
2817 			if (idx != PFD_PIPE_ROUTE)
2818 				fatalx("update request not from RDE");
2819 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
2820 				log_warnx("RDE sent invalid restart msg");
2821 				break;
2822 			}
2823 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
2824 				log_warnx("no such peer: id=%u",
2825 				    imsg.hdr.peerid);
2826 				break;
2827 			}
2828 			memcpy(&aid, imsg.data, sizeof(aid));
2829 			if (aid >= AID_MAX)
2830 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
2831 			if (p->capa.neg.grestart.flags[aid] &
2832 			    CAPA_GR_RESTARTING) {
2833 				log_peer_warnx(&p->conf,
2834 				    "graceful restart of %s finished",
2835 				    aid2str(aid));
2836 				p->capa.neg.grestart.flags[aid] &=
2837 				    ~CAPA_GR_RESTARTING;
2838 				timer_stop(p, Timer_RestartTimeout);
2839 
2840 				/* signal back to RDE to cleanup stale routes */
2841 				if (imsg_rde(IMSG_SESSION_RESTARTED,
2842 				    imsg.hdr.peerid, &aid, sizeof(aid)) == -1)
2843 					fatal("imsg_compose: "
2844 					    "IMSG_SESSION_RESTARTED");
2845 			}
2846 			break;
2847 		case IMSG_SESSION_DOWN:
2848 			if (idx != PFD_PIPE_ROUTE)
2849 				fatalx("update request not from RDE");
2850 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
2851 				log_warnx("no such peer: id=%u",
2852 				    imsg.hdr.peerid);
2853 				break;
2854 			}
2855 			session_stop(p, ERR_CEASE_ADMIN_DOWN);
2856 			break;
2857 		default:
2858 			break;
2859 		}
2860 		imsg_free(&imsg);
2861 	}
2862 }
2863 
2864 int
2865 la_cmp(struct listen_addr *a, struct listen_addr *b)
2866 {
2867 	struct sockaddr_in	*in_a, *in_b;
2868 	struct sockaddr_in6	*in6_a, *in6_b;
2869 
2870 	if (a->sa.ss_family != b->sa.ss_family)
2871 		return (1);
2872 
2873 	switch (a->sa.ss_family) {
2874 	case AF_INET:
2875 		in_a = (struct sockaddr_in *)&a->sa;
2876 		in_b = (struct sockaddr_in *)&b->sa;
2877 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2878 			return (1);
2879 		if (in_a->sin_port != in_b->sin_port)
2880 			return (1);
2881 		break;
2882 	case AF_INET6:
2883 		in6_a = (struct sockaddr_in6 *)&a->sa;
2884 		in6_b = (struct sockaddr_in6 *)&b->sa;
2885 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2886 		    sizeof(struct in6_addr)))
2887 			return (1);
2888 		if (in6_a->sin6_port != in6_b->sin6_port)
2889 			return (1);
2890 		break;
2891 	default:
2892 		fatal("king bula sez: unknown address family");
2893 		/* NOTREACHED */
2894 	}
2895 
2896 	return (0);
2897 }
2898 
2899 struct peer *
2900 getpeerbydesc(struct bgpd_config *c, const char *descr)
2901 {
2902 	struct peer	*p, *res = NULL;
2903 	int		 match = 0;
2904 
2905 	RB_FOREACH(p, peer_head, &conf->peers)
2906 		if (!strcmp(p->conf.descr, descr)) {
2907 			res = p;
2908 			match++;
2909 		}
2910 
2911 	if (match > 1)
2912 		log_info("neighbor description \"%s\" not unique, request "
2913 		    "aborted", descr);
2914 
2915 	if (match == 1)
2916 		return (res);
2917 	else
2918 		return (NULL);
2919 }
2920 
2921 struct peer *
2922 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
2923 {
2924 	struct bgpd_addr addr;
2925 	struct peer	*p, *newpeer, *loose = NULL;
2926 	u_int32_t	 id;
2927 
2928 	sa2addr(ip, &addr, NULL);
2929 
2930 	/* we might want a more effective way to find peers by IP */
2931 	RB_FOREACH(p, peer_head, &conf->peers)
2932 		if (!p->conf.template &&
2933 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
2934 			return (p);
2935 
2936 	/* try template matching */
2937 	RB_FOREACH(p, peer_head, &conf->peers)
2938 		if (p->conf.template &&
2939 		    p->conf.remote_addr.aid == addr.aid &&
2940 		    session_match_mask(p, &addr))
2941 			if (loose == NULL || loose->conf.remote_masklen <
2942 			    p->conf.remote_masklen)
2943 				loose = p;
2944 
2945 	if (loose != NULL) {
2946 		/* clone */
2947 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
2948 			fatal(NULL);
2949 		memcpy(newpeer, loose, sizeof(struct peer));
2950 		for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) {
2951 			RB_FOREACH(p, peer_head, &conf->peers)
2952 				if (p->conf.id == id)
2953 					break;
2954 			if (p == NULL)		/* we found a free id */
2955 				break;
2956 		}
2957 		newpeer->template = loose;
2958 		session_template_clone(newpeer, ip, id, 0);
2959 		newpeer->state = newpeer->prev_state = STATE_NONE;
2960 		newpeer->reconf_action = RECONF_KEEP;
2961 		newpeer->rbuf = NULL;
2962 		init_peer(newpeer);
2963 		bgp_fsm(newpeer, EVNT_START);
2964 		if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL)
2965 			fatalx("%s: peer tree is corrupt", __func__);
2966 		return (newpeer);
2967 	}
2968 
2969 	return (NULL);
2970 }
2971 
2972 struct peer *
2973 getpeerbyid(struct bgpd_config *c, u_int32_t peerid)
2974 {
2975 	static struct peer lookup;
2976 
2977 	lookup.conf.id = peerid;
2978 
2979 	return RB_FIND(peer_head, &c->peers, &lookup);
2980 }
2981 
2982 int
2983 peer_matched(struct peer *p, struct ctl_neighbor *n)
2984 {
2985 	char *s;
2986 
2987 	if (n && n->addr.aid) {
2988 		if (memcmp(&p->conf.remote_addr, &n->addr,
2989 		    sizeof(p->conf.remote_addr)))
2990 			return 0;
2991 	} else if (n && n->descr[0]) {
2992 		s = n->is_group ? p->conf.group : p->conf.descr;
2993 		if (strcmp(s, n->descr))
2994 			return 0;
2995 	}
2996 	return 1;
2997 }
2998 
2999 void
3000 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id,
3001     u_int32_t as)
3002 {
3003 	struct bgpd_addr	remote_addr;
3004 
3005 	if (ip)
3006 		sa2addr(ip, &remote_addr, NULL);
3007 	else
3008 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3009 
3010 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3011 
3012 	p->conf.id = id;
3013 
3014 	if (as) {
3015 		p->conf.remote_as = as;
3016 		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3017 		if (!p->conf.ebgp)
3018 			/* force enforce_as off for iBGP sessions */
3019 			p->conf.enforce_as = ENFORCE_AS_OFF;
3020 	}
3021 
3022 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3023 	switch (p->conf.remote_addr.aid) {
3024 	case AID_INET:
3025 		p->conf.remote_masklen = 32;
3026 		break;
3027 	case AID_INET6:
3028 		p->conf.remote_masklen = 128;
3029 		break;
3030 	}
3031 	p->conf.template = 0;
3032 }
3033 
3034 int
3035 session_match_mask(struct peer *p, struct bgpd_addr *a)
3036 {
3037 	struct in_addr	 v4masked;
3038 	struct in6_addr	 v6masked;
3039 
3040 	switch (p->conf.remote_addr.aid) {
3041 	case AID_INET:
3042 		inet4applymask(&v4masked, &a->v4, p->conf.remote_masklen);
3043 		if (p->conf.remote_addr.v4.s_addr == v4masked.s_addr)
3044 			return (1);
3045 		return (0);
3046 	case AID_INET6:
3047 		inet6applymask(&v6masked, &a->v6, p->conf.remote_masklen);
3048 
3049 		if (memcmp(&v6masked, &p->conf.remote_addr.v6,
3050 		    sizeof(v6masked)) == 0)
3051 			return (1);
3052 		return (0);
3053 	}
3054 	return (0);
3055 }
3056 
3057 void
3058 session_down(struct peer *peer)
3059 {
3060 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
3061 	peer->stats.last_updown = time(NULL);
3062 	/*
3063 	 * session_down is called in the exit code path so check
3064 	 * if the RDE is still around, if not there is no need to
3065 	 * send the message.
3066 	 */
3067 	if (ibuf_rde == NULL)
3068 		return;
3069 	if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3070 		fatalx("imsg_compose error");
3071 }
3072 
3073 void
3074 session_up(struct peer *p)
3075 {
3076 	struct session_up	 sup;
3077 
3078 	if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3079 	    &p->conf, sizeof(p->conf)) == -1)
3080 		fatalx("imsg_compose error");
3081 
3082 	sup.local_addr = p->local;
3083 	sup.remote_addr = p->remote;
3084 
3085 	sup.remote_bgpid = p->remote_bgpid;
3086 	sup.short_as = p->short_as;
3087 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3088 	p->stats.last_updown = time(NULL);
3089 	if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3090 		fatalx("imsg_compose error");
3091 }
3092 
3093 int
3094 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data,
3095     u_int16_t datalen)
3096 {
3097 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
3098 }
3099 
3100 int
3101 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen)
3102 {
3103 	if (ibuf_rde_ctl == NULL) {
3104 		log_warnx("Can't send message %u to RDE, ctl pipe closed",
3105 		    type);
3106 		return (0);
3107 	}
3108 	/*
3109 	 * Use control socket to talk to RDE to bypass the queue of the
3110 	 * regular imsg socket.
3111 	 */
3112 	return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen));
3113 }
3114 
3115 int
3116 imsg_rde(int type, uint32_t peerid, void *data, u_int16_t datalen)
3117 {
3118 	if (ibuf_rde == NULL) {
3119 		log_warnx("Can't send message %u to RDE, pipe closed", type);
3120 		return (0);
3121 	}
3122 
3123 	return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen));
3124 }
3125 
3126 void
3127 session_demote(struct peer *p, int level)
3128 {
3129 	struct demote_msg	msg;
3130 
3131 	strlcpy(msg.demote_group, p->conf.demote_group,
3132 	    sizeof(msg.demote_group));
3133 	msg.level = level;
3134 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3135 	    &msg, sizeof(msg)) == -1)
3136 		fatalx("imsg_compose error");
3137 
3138 	p->demoted += level;
3139 }
3140 
3141 void
3142 session_stop(struct peer *peer, u_int8_t subcode)
3143 {
3144 	char data[SHUT_COMM_LEN];
3145 	size_t datalen;
3146 	size_t shutcomm_len;
3147 	char *communication;
3148 
3149 	datalen = 0;
3150 	communication = peer->conf.shutcomm;
3151 
3152 	if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3153 	    subcode == ERR_CEASE_ADMIN_RESET)
3154 	    && communication && *communication) {
3155 		shutcomm_len = strlen(communication);
3156 		if (shutcomm_len > SHUT_COMM_LEN - 1) {
3157 		    log_peer_warnx(&peer->conf,
3158 			"trying to send overly long shutdown reason");
3159 		} else {
3160 			data[0] = shutcomm_len;
3161 			datalen = shutcomm_len + sizeof(data[0]);
3162 			memcpy(data + 1, communication, shutcomm_len);
3163 		}
3164 	}
3165 	switch (peer->state) {
3166 	case STATE_OPENSENT:
3167 	case STATE_OPENCONFIRM:
3168 	case STATE_ESTABLISHED:
3169 		session_notification(peer, ERR_CEASE, subcode, data, datalen);
3170 		break;
3171 	default:
3172 		/* session not open, no need to send notification */
3173 		break;
3174 	}
3175 	bgp_fsm(peer, EVNT_STOP);
3176 }
3177 
3178 void
3179 merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3180 {
3181 	struct peer *p, *np, *next;
3182 
3183 	RB_FOREACH(p, peer_head, &conf->peers) {
3184 		/* templates are handled specially */
3185 		if (p->template != NULL)
3186 			continue;
3187 		np = getpeerbyid(nc, p->conf.id);
3188 		if (np == NULL) {
3189 			p->reconf_action = RECONF_DELETE;
3190 			continue;
3191 		}
3192 
3193 		memcpy(&p->conf, &np->conf, sizeof(p->conf));
3194 		RB_REMOVE(peer_head, &nc->peers, np);
3195 		free(np);
3196 
3197 		p->reconf_action = RECONF_KEEP;
3198 
3199 		/* had demotion, is demoted, demote removed? */
3200 		if (p->demoted && !p->conf.demote_group[0])
3201 			session_demote(p, -1);
3202 
3203 		/* if session is not open then refresh pfkey data */
3204 		if (p->state < STATE_OPENSENT && !p->template)
3205 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
3206 			    p->conf.id, 0, -1, NULL, 0);
3207 
3208 		/* sync the RDE in case we keep the peer */
3209 		if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3210 		    &p->conf, sizeof(struct peer_config)) == -1)
3211 			fatalx("imsg_compose error");
3212 
3213 		/* apply the config to all clones of a template */
3214 		if (p->conf.template) {
3215 			struct peer *xp;
3216 			RB_FOREACH(xp, peer_head, &conf->peers) {
3217 				if (xp->template != p)
3218 					continue;
3219 				session_template_clone(xp, NULL, xp->conf.id,
3220 				    xp->conf.remote_as);
3221 				if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
3222 				    &xp->conf, sizeof(xp->conf)) == -1)
3223 					fatalx("imsg_compose error");
3224 			}
3225 		}
3226 	}
3227 
3228 	/* pfkeys of new peers already loaded by the parent process */
3229 	RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
3230 		RB_REMOVE(peer_head, &nc->peers, np);
3231 		if (RB_INSERT(peer_head, &conf->peers, np) != NULL)
3232 			fatalx("%s: peer tree is corrupt", __func__);
3233 	}
3234 }
3235