xref: /openbsd-src/usr.sbin/bgpd/session.c (revision f6aab3d83b51b91c24247ad2c2573574de475a82)
1 /*	$OpenBSD: session.c,v 1.455 2023/11/07 11:18:35 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <ifaddrs.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <syslog.h>
44 #include <unistd.h>
45 
46 #include "bgpd.h"
47 #include "session.h"
48 #include "log.h"
49 
50 #define PFD_PIPE_MAIN		0
51 #define PFD_PIPE_ROUTE		1
52 #define PFD_PIPE_ROUTE_CTL	2
53 #define PFD_SOCK_CTL		3
54 #define PFD_SOCK_RCTL		4
55 #define PFD_LISTENERS_START	5
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_peer(struct peer *);
60 void	start_timer_holdtime(struct peer *);
61 void	start_timer_keepalive(struct peer *);
62 void	session_close_connection(struct peer *);
63 void	change_state(struct peer *, enum session_state, enum session_events);
64 int	session_setup_socket(struct peer *);
65 void	session_accept(int);
66 int	session_connect(struct peer *);
67 void	session_tcp_established(struct peer *);
68 void	session_capa_ann_none(struct peer *);
69 int	session_capa_add(struct ibuf *, uint8_t, uint8_t);
70 int	session_capa_add_mp(struct ibuf *, uint8_t);
71 int	session_capa_add_afi(struct peer *, struct ibuf *, uint8_t, uint8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, uint16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(uint32_t, void *, size_t);
77 void	session_notification(struct peer *, uint8_t, uint8_t, void *,
78 	    ssize_t);
79 void	session_rrefresh(struct peer *, uint8_t, uint8_t);
80 int	session_graceful_restart(struct peer *);
81 int	session_graceful_stop(struct peer *);
82 int	session_dispatch_msg(struct pollfd *, struct peer *);
83 void	session_process_msg(struct peer *);
84 int	parse_header(struct peer *, u_char *, uint16_t *, uint8_t *);
85 int	parse_open(struct peer *);
86 int	parse_update(struct peer *);
87 int	parse_rrefresh(struct peer *);
88 int	parse_notification(struct peer *);
89 int	parse_capabilities(struct peer *, u_char *, uint16_t, uint32_t *);
90 int	capa_neg_calc(struct peer *, uint8_t *);
91 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
92 void	session_up(struct peer *);
93 void	session_down(struct peer *);
94 int	imsg_rde(int, uint32_t, void *, uint16_t);
95 void	session_demote(struct peer *, int);
96 void	merge_peers(struct bgpd_config *, struct bgpd_config *);
97 
98 int		 la_cmp(struct listen_addr *, struct listen_addr *);
99 void		 session_template_clone(struct peer *, struct sockaddr *,
100 		    uint32_t, uint32_t);
101 int		 session_match_mask(struct peer *, struct bgpd_addr *);
102 
103 static struct bgpd_config	*conf, *nconf;
104 static struct imsgbuf		*ibuf_rde;
105 static struct imsgbuf		*ibuf_rde_ctl;
106 static struct imsgbuf		*ibuf_main;
107 
108 struct bgpd_sysdep	 sysdep;
109 volatile sig_atomic_t	 session_quit;
110 int			 pending_reconf;
111 int			 csock = -1, rcsock = -1;
112 u_int			 peer_cnt;
113 
114 struct mrt_head		 mrthead;
115 time_t			 pauseaccept;
116 
117 static inline int
118 peer_compare(const struct peer *a, const struct peer *b)
119 {
120 	return a->conf.id - b->conf.id;
121 }
122 
123 RB_GENERATE(peer_head, peer, entry, peer_compare);
124 
125 void
126 session_sighdlr(int sig)
127 {
128 	switch (sig) {
129 	case SIGINT:
130 	case SIGTERM:
131 		session_quit = 1;
132 		break;
133 	}
134 }
135 
136 int
137 setup_listeners(u_int *la_cnt)
138 {
139 	int			 ttl = 255;
140 	struct listen_addr	*la;
141 	u_int			 cnt = 0;
142 
143 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
144 		la->reconf = RECONF_NONE;
145 		cnt++;
146 
147 		if (la->flags & LISTENER_LISTENING)
148 			continue;
149 
150 		if (la->fd == -1) {
151 			log_warn("cannot establish listener on %s: invalid fd",
152 			    log_sockaddr((struct sockaddr *)&la->sa,
153 			    la->sa_len));
154 			continue;
155 		}
156 
157 		if (tcp_md5_prep_listener(la, &conf->peers) == -1)
158 			fatal("tcp_md5_prep_listener");
159 
160 		/* set ttl to 255 so that ttl-security works */
161 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
162 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
163 			log_warn("setup_listeners setsockopt TTL");
164 			continue;
165 		}
166 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
167 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
168 			log_warn("setup_listeners setsockopt hoplimit");
169 			continue;
170 		}
171 
172 		if (listen(la->fd, MAX_BACKLOG)) {
173 			close(la->fd);
174 			fatal("listen");
175 		}
176 
177 		la->flags |= LISTENER_LISTENING;
178 
179 		log_info("listening on %s",
180 		    log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
181 	}
182 
183 	*la_cnt = cnt;
184 
185 	return (0);
186 }
187 
188 void
189 session_main(int debug, int verbose)
190 {
191 	int			 timeout;
192 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
193 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
194 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
195 	u_int			 new_cnt;
196 	struct passwd		*pw;
197 	struct peer		*p, **peer_l = NULL, *next;
198 	struct mrt		*m, *xm, **mrt_l = NULL;
199 	struct pollfd		*pfd = NULL;
200 	struct listen_addr	*la;
201 	void			*newp;
202 	time_t			 now;
203 	short			 events;
204 
205 	log_init(debug, LOG_DAEMON);
206 	log_setverbose(verbose);
207 
208 	log_procinit(log_procnames[PROC_SE]);
209 
210 	if ((pw = getpwnam(BGPD_USER)) == NULL)
211 		fatal(NULL);
212 
213 	if (chroot(pw->pw_dir) == -1)
214 		fatal("chroot");
215 	if (chdir("/") == -1)
216 		fatal("chdir(\"/\")");
217 
218 	setproctitle("session engine");
219 
220 	if (setgroups(1, &pw->pw_gid) ||
221 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
222 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
223 		fatal("can't drop privileges");
224 
225 	if (pledge("stdio inet recvfd", NULL) == -1)
226 		fatal("pledge");
227 
228 	signal(SIGTERM, session_sighdlr);
229 	signal(SIGINT, session_sighdlr);
230 	signal(SIGPIPE, SIG_IGN);
231 	signal(SIGHUP, SIG_IGN);
232 	signal(SIGALRM, SIG_IGN);
233 	signal(SIGUSR1, SIG_IGN);
234 
235 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
236 		fatal(NULL);
237 	imsg_init(ibuf_main, 3);
238 
239 	LIST_INIT(&mrthead);
240 	listener_cnt = 0;
241 	peer_cnt = 0;
242 	ctl_cnt = 0;
243 
244 	conf = new_config();
245 	log_info("session engine ready");
246 
247 	while (session_quit == 0) {
248 		/* check for peers to be initialized or deleted */
249 		if (!pending_reconf) {
250 			RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
251 				/* cloned peer that idled out? */
252 				if (p->template && (p->state == STATE_IDLE ||
253 				    p->state == STATE_ACTIVE) &&
254 				    getmonotime() - p->stats.last_updown >=
255 				    INTERVAL_HOLD_CLONED)
256 					p->reconf_action = RECONF_DELETE;
257 
258 				/* new peer that needs init? */
259 				if (p->state == STATE_NONE)
260 					init_peer(p);
261 
262 				/* deletion due? */
263 				if (p->reconf_action == RECONF_DELETE) {
264 					if (p->demoted)
265 						session_demote(p, -1);
266 					p->conf.demote_group[0] = 0;
267 					session_stop(p, ERR_CEASE_PEER_UNCONF);
268 					timer_remove_all(&p->timers);
269 					tcp_md5_del_listener(conf, p);
270 					log_peer_warnx(&p->conf, "removed");
271 					RB_REMOVE(peer_head, &conf->peers, p);
272 					free(p);
273 					peer_cnt--;
274 					continue;
275 				}
276 				p->reconf_action = RECONF_NONE;
277 			}
278 		}
279 
280 		if (peer_cnt > peer_l_elms) {
281 			if ((newp = reallocarray(peer_l, peer_cnt,
282 			    sizeof(struct peer *))) == NULL) {
283 				/* panic for now */
284 				log_warn("could not resize peer_l from %u -> %u"
285 				    " entries", peer_l_elms, peer_cnt);
286 				fatalx("exiting");
287 			}
288 			peer_l = newp;
289 			peer_l_elms = peer_cnt;
290 		}
291 
292 		mrt_cnt = 0;
293 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
294 			xm = LIST_NEXT(m, entry);
295 			if (m->state == MRT_STATE_REMOVE) {
296 				mrt_clean(m);
297 				LIST_REMOVE(m, entry);
298 				free(m);
299 				continue;
300 			}
301 			if (m->wbuf.queued)
302 				mrt_cnt++;
303 		}
304 
305 		if (mrt_cnt > mrt_l_elms) {
306 			if ((newp = reallocarray(mrt_l, mrt_cnt,
307 			    sizeof(struct mrt *))) == NULL) {
308 				/* panic for now */
309 				log_warn("could not resize mrt_l from %u -> %u"
310 				    " entries", mrt_l_elms, mrt_cnt);
311 				fatalx("exiting");
312 			}
313 			mrt_l = newp;
314 			mrt_l_elms = mrt_cnt;
315 		}
316 
317 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
318 		    ctl_cnt + mrt_cnt;
319 		if (new_cnt > pfd_elms) {
320 			if ((newp = reallocarray(pfd, new_cnt,
321 			    sizeof(struct pollfd))) == NULL) {
322 				/* panic for now */
323 				log_warn("could not resize pfd from %u -> %u"
324 				    " entries", pfd_elms, new_cnt);
325 				fatalx("exiting");
326 			}
327 			pfd = newp;
328 			pfd_elms = new_cnt;
329 		}
330 
331 		memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
332 
333 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
334 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
335 		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
336 
337 		if (pauseaccept == 0) {
338 			pfd[PFD_SOCK_CTL].fd = csock;
339 			pfd[PFD_SOCK_CTL].events = POLLIN;
340 			pfd[PFD_SOCK_RCTL].fd = rcsock;
341 			pfd[PFD_SOCK_RCTL].events = POLLIN;
342 		} else {
343 			pfd[PFD_SOCK_CTL].fd = -1;
344 			pfd[PFD_SOCK_RCTL].fd = -1;
345 		}
346 
347 		i = PFD_LISTENERS_START;
348 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
349 			if (pauseaccept == 0) {
350 				pfd[i].fd = la->fd;
351 				pfd[i].events = POLLIN;
352 			} else
353 				pfd[i].fd = -1;
354 			i++;
355 		}
356 		idx_listeners = i;
357 		timeout = 240;	/* loop every 240s at least */
358 
359 		now = getmonotime();
360 		RB_FOREACH(p, peer_head, &conf->peers) {
361 			time_t	nextaction;
362 			struct timer *pt;
363 
364 			/* check timers */
365 			if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
366 				switch (pt->type) {
367 				case Timer_Hold:
368 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
369 					break;
370 				case Timer_SendHold:
371 					bgp_fsm(p, EVNT_TIMER_SENDHOLD);
372 					break;
373 				case Timer_ConnectRetry:
374 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
375 					break;
376 				case Timer_Keepalive:
377 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
378 					break;
379 				case Timer_IdleHold:
380 					bgp_fsm(p, EVNT_START);
381 					break;
382 				case Timer_IdleHoldReset:
383 					p->IdleHoldTime =
384 					    INTERVAL_IDLE_HOLD_INITIAL;
385 					p->errcnt = 0;
386 					timer_stop(&p->timers,
387 					    Timer_IdleHoldReset);
388 					break;
389 				case Timer_CarpUndemote:
390 					timer_stop(&p->timers,
391 					    Timer_CarpUndemote);
392 					if (p->demoted &&
393 					    p->state == STATE_ESTABLISHED)
394 						session_demote(p, -1);
395 					break;
396 				case Timer_RestartTimeout:
397 					timer_stop(&p->timers,
398 					    Timer_RestartTimeout);
399 					session_graceful_stop(p);
400 					break;
401 				default:
402 					fatalx("King Bula lost in time");
403 				}
404 			}
405 			if ((nextaction = timer_nextduein(&p->timers,
406 			    now)) != -1 && nextaction < timeout)
407 				timeout = nextaction;
408 
409 			/* are we waiting for a write? */
410 			events = POLLIN;
411 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
412 				events |= POLLOUT;
413 			/* is there still work to do? */
414 			if (p->rpending && p->rbuf && p->rbuf->wpos)
415 				timeout = 0;
416 
417 			/* poll events */
418 			if (p->fd != -1 && events != 0) {
419 				pfd[i].fd = p->fd;
420 				pfd[i].events = events;
421 				peer_l[i - idx_listeners] = p;
422 				i++;
423 			}
424 		}
425 
426 		idx_peers = i;
427 
428 		LIST_FOREACH(m, &mrthead, entry)
429 			if (m->wbuf.queued) {
430 				pfd[i].fd = m->wbuf.fd;
431 				pfd[i].events = POLLOUT;
432 				mrt_l[i - idx_peers] = m;
433 				i++;
434 			}
435 
436 		idx_mrts = i;
437 
438 		i += control_fill_pfds(pfd + i, pfd_elms -i);
439 
440 		if (i > pfd_elms)
441 			fatalx("poll pfd overflow");
442 
443 		if (pauseaccept && timeout > 1)
444 			timeout = 1;
445 		if (timeout < 0)
446 			timeout = 0;
447 		if (poll(pfd, i, timeout * 1000) == -1) {
448 			if (errno == EINTR)
449 				continue;
450 			fatal("poll error");
451 		}
452 
453 		/*
454 		 * If we previously saw fd exhaustion, we stop accept()
455 		 * for 1 second to throttle the accept() loop.
456 		 */
457 		if (pauseaccept && getmonotime() > pauseaccept + 1)
458 			pauseaccept = 0;
459 
460 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
461 			log_warnx("SE: Lost connection to parent");
462 			session_quit = 1;
463 			continue;
464 		} else
465 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
466 			    &listener_cnt);
467 
468 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
469 			log_warnx("SE: Lost connection to RDE");
470 			msgbuf_clear(&ibuf_rde->w);
471 			free(ibuf_rde);
472 			ibuf_rde = NULL;
473 		} else
474 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
475 			    &listener_cnt);
476 
477 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
478 		    -1) {
479 			log_warnx("SE: Lost connection to RDE control");
480 			msgbuf_clear(&ibuf_rde_ctl->w);
481 			free(ibuf_rde_ctl);
482 			ibuf_rde_ctl = NULL;
483 		} else
484 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
485 			    &listener_cnt);
486 
487 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
488 			ctl_cnt += control_accept(csock, 0);
489 
490 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
491 			ctl_cnt += control_accept(rcsock, 1);
492 
493 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
494 			if (pfd[j].revents & POLLIN)
495 				session_accept(pfd[j].fd);
496 
497 		for (; j < idx_peers; j++)
498 			session_dispatch_msg(&pfd[j],
499 			    peer_l[j - idx_listeners]);
500 
501 		RB_FOREACH(p, peer_head, &conf->peers)
502 			if (p->rbuf && p->rbuf->wpos)
503 				session_process_msg(p);
504 
505 		for (; j < idx_mrts; j++)
506 			if (pfd[j].revents & POLLOUT)
507 				mrt_write(mrt_l[j - idx_peers]);
508 
509 		for (; j < i; j++)
510 			ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers);
511 	}
512 
513 	RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
514 		RB_REMOVE(peer_head, &conf->peers, p);
515 		strlcpy(p->conf.reason,
516 		    "bgpd shutting down",
517 		    sizeof(p->conf.reason));
518 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
519 		timer_remove_all(&p->timers);
520 		free(p);
521 	}
522 
523 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
524 		mrt_clean(m);
525 		LIST_REMOVE(m, entry);
526 		free(m);
527 	}
528 
529 	free_config(conf);
530 	free(peer_l);
531 	free(mrt_l);
532 	free(pfd);
533 
534 	/* close pipes */
535 	if (ibuf_rde) {
536 		msgbuf_write(&ibuf_rde->w);
537 		msgbuf_clear(&ibuf_rde->w);
538 		close(ibuf_rde->fd);
539 		free(ibuf_rde);
540 	}
541 	if (ibuf_rde_ctl) {
542 		msgbuf_clear(&ibuf_rde_ctl->w);
543 		close(ibuf_rde_ctl->fd);
544 		free(ibuf_rde_ctl);
545 	}
546 	msgbuf_write(&ibuf_main->w);
547 	msgbuf_clear(&ibuf_main->w);
548 	close(ibuf_main->fd);
549 	free(ibuf_main);
550 
551 	control_shutdown(csock);
552 	control_shutdown(rcsock);
553 	log_info("session engine exiting");
554 	exit(0);
555 }
556 
557 void
558 init_peer(struct peer *p)
559 {
560 	TAILQ_INIT(&p->timers);
561 	p->fd = p->wbuf.fd = -1;
562 
563 	if (p->conf.if_depend[0])
564 		imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1,
565 		    p->conf.if_depend, sizeof(p->conf.if_depend));
566 	else
567 		p->depend_ok = 1;
568 
569 	peer_cnt++;
570 
571 	change_state(p, STATE_IDLE, EVNT_NONE);
572 	if (p->conf.down)
573 		timer_stop(&p->timers, Timer_IdleHold); /* no autostart */
574 	else
575 		timer_set(&p->timers, Timer_IdleHold, SESSION_CLEAR_DELAY);
576 
577 	p->stats.last_updown = getmonotime();
578 
579 	/*
580 	 * on startup, demote if requested.
581 	 * do not handle new peers. they must reach ESTABLISHED beforehand.
582 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
583 	 */
584 	if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
585 		session_demote(p, +1);
586 }
587 
588 void
589 bgp_fsm(struct peer *peer, enum session_events event)
590 {
591 	switch (peer->state) {
592 	case STATE_NONE:
593 		/* nothing */
594 		break;
595 	case STATE_IDLE:
596 		switch (event) {
597 		case EVNT_START:
598 			timer_stop(&peer->timers, Timer_Hold);
599 			timer_stop(&peer->timers, Timer_SendHold);
600 			timer_stop(&peer->timers, Timer_Keepalive);
601 			timer_stop(&peer->timers, Timer_IdleHold);
602 
603 			/* allocate read buffer */
604 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
605 			if (peer->rbuf == NULL)
606 				fatal(NULL);
607 
608 			/* init write buffer */
609 			msgbuf_init(&peer->wbuf);
610 
611 			peer->stats.last_sent_errcode = 0;
612 			peer->stats.last_sent_suberr = 0;
613 			peer->stats.last_rcvd_errcode = 0;
614 			peer->stats.last_rcvd_suberr = 0;
615 
616 			if (!peer->depend_ok)
617 				timer_stop(&peer->timers, Timer_ConnectRetry);
618 			else if (peer->passive || peer->conf.passive ||
619 			    peer->conf.template) {
620 				change_state(peer, STATE_ACTIVE, event);
621 				timer_stop(&peer->timers, Timer_ConnectRetry);
622 			} else {
623 				change_state(peer, STATE_CONNECT, event);
624 				timer_set(&peer->timers, Timer_ConnectRetry,
625 				    conf->connectretry);
626 				session_connect(peer);
627 			}
628 			peer->passive = 0;
629 			break;
630 		default:
631 			/* ignore */
632 			break;
633 		}
634 		break;
635 	case STATE_CONNECT:
636 		switch (event) {
637 		case EVNT_START:
638 			/* ignore */
639 			break;
640 		case EVNT_CON_OPEN:
641 			session_tcp_established(peer);
642 			session_open(peer);
643 			timer_stop(&peer->timers, Timer_ConnectRetry);
644 			peer->holdtime = INTERVAL_HOLD_INITIAL;
645 			start_timer_holdtime(peer);
646 			change_state(peer, STATE_OPENSENT, event);
647 			break;
648 		case EVNT_CON_OPENFAIL:
649 			timer_set(&peer->timers, Timer_ConnectRetry,
650 			    conf->connectretry);
651 			session_close_connection(peer);
652 			change_state(peer, STATE_ACTIVE, event);
653 			break;
654 		case EVNT_TIMER_CONNRETRY:
655 			timer_set(&peer->timers, Timer_ConnectRetry,
656 			    conf->connectretry);
657 			session_connect(peer);
658 			break;
659 		default:
660 			change_state(peer, STATE_IDLE, event);
661 			break;
662 		}
663 		break;
664 	case STATE_ACTIVE:
665 		switch (event) {
666 		case EVNT_START:
667 			/* ignore */
668 			break;
669 		case EVNT_CON_OPEN:
670 			session_tcp_established(peer);
671 			session_open(peer);
672 			timer_stop(&peer->timers, Timer_ConnectRetry);
673 			peer->holdtime = INTERVAL_HOLD_INITIAL;
674 			start_timer_holdtime(peer);
675 			change_state(peer, STATE_OPENSENT, event);
676 			break;
677 		case EVNT_CON_OPENFAIL:
678 			timer_set(&peer->timers, Timer_ConnectRetry,
679 			    conf->connectretry);
680 			session_close_connection(peer);
681 			change_state(peer, STATE_ACTIVE, event);
682 			break;
683 		case EVNT_TIMER_CONNRETRY:
684 			timer_set(&peer->timers, Timer_ConnectRetry,
685 			    peer->holdtime);
686 			change_state(peer, STATE_CONNECT, event);
687 			session_connect(peer);
688 			break;
689 		default:
690 			change_state(peer, STATE_IDLE, event);
691 			break;
692 		}
693 		break;
694 	case STATE_OPENSENT:
695 		switch (event) {
696 		case EVNT_START:
697 			/* ignore */
698 			break;
699 		case EVNT_STOP:
700 			change_state(peer, STATE_IDLE, event);
701 			break;
702 		case EVNT_CON_CLOSED:
703 			session_close_connection(peer);
704 			timer_set(&peer->timers, Timer_ConnectRetry,
705 			    conf->connectretry);
706 			change_state(peer, STATE_ACTIVE, event);
707 			break;
708 		case EVNT_CON_FATAL:
709 			change_state(peer, STATE_IDLE, event);
710 			break;
711 		case EVNT_TIMER_HOLDTIME:
712 		case EVNT_TIMER_SENDHOLD:
713 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
714 			    0, NULL, 0);
715 			change_state(peer, STATE_IDLE, event);
716 			break;
717 		case EVNT_RCVD_OPEN:
718 			/* parse_open calls change_state itself on failure */
719 			if (parse_open(peer))
720 				break;
721 			session_keepalive(peer);
722 			change_state(peer, STATE_OPENCONFIRM, event);
723 			break;
724 		case EVNT_RCVD_NOTIFICATION:
725 			if (parse_notification(peer)) {
726 				change_state(peer, STATE_IDLE, event);
727 				/* don't punish, capa negotiation */
728 				timer_set(&peer->timers, Timer_IdleHold, 0);
729 				peer->IdleHoldTime /= 2;
730 			} else
731 				change_state(peer, STATE_IDLE, event);
732 			break;
733 		default:
734 			session_notification(peer,
735 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
736 			change_state(peer, STATE_IDLE, event);
737 			break;
738 		}
739 		break;
740 	case STATE_OPENCONFIRM:
741 		switch (event) {
742 		case EVNT_START:
743 			/* ignore */
744 			break;
745 		case EVNT_STOP:
746 			change_state(peer, STATE_IDLE, event);
747 			break;
748 		case EVNT_CON_CLOSED:
749 		case EVNT_CON_FATAL:
750 			change_state(peer, STATE_IDLE, event);
751 			break;
752 		case EVNT_TIMER_HOLDTIME:
753 		case EVNT_TIMER_SENDHOLD:
754 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
755 			    0, NULL, 0);
756 			change_state(peer, STATE_IDLE, event);
757 			break;
758 		case EVNT_TIMER_KEEPALIVE:
759 			session_keepalive(peer);
760 			break;
761 		case EVNT_RCVD_KEEPALIVE:
762 			start_timer_holdtime(peer);
763 			change_state(peer, STATE_ESTABLISHED, event);
764 			break;
765 		case EVNT_RCVD_NOTIFICATION:
766 			parse_notification(peer);
767 			change_state(peer, STATE_IDLE, event);
768 			break;
769 		default:
770 			session_notification(peer,
771 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
772 			change_state(peer, STATE_IDLE, event);
773 			break;
774 		}
775 		break;
776 	case STATE_ESTABLISHED:
777 		switch (event) {
778 		case EVNT_START:
779 			/* ignore */
780 			break;
781 		case EVNT_STOP:
782 			change_state(peer, STATE_IDLE, event);
783 			break;
784 		case EVNT_CON_CLOSED:
785 		case EVNT_CON_FATAL:
786 			change_state(peer, STATE_IDLE, event);
787 			break;
788 		case EVNT_TIMER_HOLDTIME:
789 		case EVNT_TIMER_SENDHOLD:
790 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
791 			    0, NULL, 0);
792 			change_state(peer, STATE_IDLE, event);
793 			break;
794 		case EVNT_TIMER_KEEPALIVE:
795 			session_keepalive(peer);
796 			break;
797 		case EVNT_RCVD_KEEPALIVE:
798 			start_timer_holdtime(peer);
799 			break;
800 		case EVNT_RCVD_UPDATE:
801 			start_timer_holdtime(peer);
802 			if (parse_update(peer))
803 				change_state(peer, STATE_IDLE, event);
804 			else
805 				start_timer_holdtime(peer);
806 			break;
807 		case EVNT_RCVD_NOTIFICATION:
808 			parse_notification(peer);
809 			change_state(peer, STATE_IDLE, event);
810 			break;
811 		default:
812 			session_notification(peer,
813 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
814 			change_state(peer, STATE_IDLE, event);
815 			break;
816 		}
817 		break;
818 	}
819 }
820 
821 void
822 start_timer_holdtime(struct peer *peer)
823 {
824 	if (peer->holdtime > 0)
825 		timer_set(&peer->timers, Timer_Hold, peer->holdtime);
826 	else
827 		timer_stop(&peer->timers, Timer_Hold);
828 }
829 
830 void
831 start_timer_keepalive(struct peer *peer)
832 {
833 	if (peer->holdtime > 0)
834 		timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3);
835 	else
836 		timer_stop(&peer->timers, Timer_Keepalive);
837 }
838 
839 void
840 session_close_connection(struct peer *peer)
841 {
842 	if (peer->fd != -1) {
843 		close(peer->fd);
844 		pauseaccept = 0;
845 	}
846 	peer->fd = peer->wbuf.fd = -1;
847 }
848 
849 void
850 change_state(struct peer *peer, enum session_state state,
851     enum session_events event)
852 {
853 	struct mrt	*mrt;
854 
855 	switch (state) {
856 	case STATE_IDLE:
857 		/* carp demotion first. new peers handled in init_peer */
858 		if (peer->state == STATE_ESTABLISHED &&
859 		    peer->conf.demote_group[0] && !peer->demoted)
860 			session_demote(peer, +1);
861 
862 		/*
863 		 * try to write out what's buffered (maybe a notification),
864 		 * don't bother if it fails
865 		 */
866 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
867 			msgbuf_write(&peer->wbuf);
868 
869 		/*
870 		 * we must start the timer for the next EVNT_START
871 		 * if we are coming here due to an error and the
872 		 * session was not established successfully before, the
873 		 * starttimerinterval needs to be exponentially increased
874 		 */
875 		if (peer->IdleHoldTime == 0)
876 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
877 		peer->holdtime = INTERVAL_HOLD_INITIAL;
878 		timer_stop(&peer->timers, Timer_ConnectRetry);
879 		timer_stop(&peer->timers, Timer_Keepalive);
880 		timer_stop(&peer->timers, Timer_Hold);
881 		timer_stop(&peer->timers, Timer_SendHold);
882 		timer_stop(&peer->timers, Timer_IdleHold);
883 		timer_stop(&peer->timers, Timer_IdleHoldReset);
884 		session_close_connection(peer);
885 		msgbuf_clear(&peer->wbuf);
886 		free(peer->rbuf);
887 		peer->rbuf = NULL;
888 		peer->rpending = 0;
889 		memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
890 		if (!peer->template)
891 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
892 			    peer->conf.id, 0, -1, NULL, 0);
893 
894 		if (event != EVNT_STOP) {
895 			timer_set(&peer->timers, Timer_IdleHold,
896 			    peer->IdleHoldTime);
897 			if (event != EVNT_NONE &&
898 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
899 				peer->IdleHoldTime *= 2;
900 		}
901 		if (peer->state == STATE_ESTABLISHED) {
902 			if (peer->capa.neg.grestart.restart == 2 &&
903 			    (event == EVNT_CON_CLOSED ||
904 			    event == EVNT_CON_FATAL)) {
905 				/* don't punish graceful restart */
906 				timer_set(&peer->timers, Timer_IdleHold, 0);
907 				peer->IdleHoldTime /= 2;
908 				session_graceful_restart(peer);
909 			} else
910 				session_down(peer);
911 		}
912 		if (peer->state == STATE_NONE ||
913 		    peer->state == STATE_ESTABLISHED) {
914 			/* initialize capability negotiation structures */
915 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
916 			    sizeof(peer->capa.ann));
917 			if (!peer->conf.announce_capa)
918 				session_capa_ann_none(peer);
919 		}
920 		break;
921 	case STATE_CONNECT:
922 		if (peer->state == STATE_ESTABLISHED &&
923 		    peer->capa.neg.grestart.restart == 2) {
924 			/* do the graceful restart dance */
925 			session_graceful_restart(peer);
926 			peer->holdtime = INTERVAL_HOLD_INITIAL;
927 			timer_stop(&peer->timers, Timer_ConnectRetry);
928 			timer_stop(&peer->timers, Timer_Keepalive);
929 			timer_stop(&peer->timers, Timer_Hold);
930 			timer_stop(&peer->timers, Timer_SendHold);
931 			timer_stop(&peer->timers, Timer_IdleHold);
932 			timer_stop(&peer->timers, Timer_IdleHoldReset);
933 			session_close_connection(peer);
934 			msgbuf_clear(&peer->wbuf);
935 			memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
936 		}
937 		break;
938 	case STATE_ACTIVE:
939 		if (!peer->template)
940 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
941 			    peer->conf.id, 0, -1, NULL, 0);
942 		break;
943 	case STATE_OPENSENT:
944 		break;
945 	case STATE_OPENCONFIRM:
946 		break;
947 	case STATE_ESTABLISHED:
948 		timer_set(&peer->timers, Timer_IdleHoldReset,
949 		    peer->IdleHoldTime);
950 		if (peer->demoted)
951 			timer_set(&peer->timers, Timer_CarpUndemote,
952 			    INTERVAL_HOLD_DEMOTED);
953 		session_up(peer);
954 		break;
955 	default:		/* something seriously fucked */
956 		break;
957 	}
958 
959 	log_statechange(peer, state, event);
960 	LIST_FOREACH(mrt, &mrthead, entry) {
961 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
962 			continue;
963 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
964 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
965 		    mrt->group_id == peer->conf.groupid))
966 			mrt_dump_state(mrt, peer->state, state, peer);
967 	}
968 	peer->prev_state = peer->state;
969 	peer->state = state;
970 }
971 
972 void
973 session_accept(int listenfd)
974 {
975 	int			 connfd;
976 	socklen_t		 len;
977 	struct sockaddr_storage	 cliaddr;
978 	struct peer		*p = NULL;
979 
980 	len = sizeof(cliaddr);
981 	if ((connfd = accept4(listenfd,
982 	    (struct sockaddr *)&cliaddr, &len,
983 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
984 		if (errno == ENFILE || errno == EMFILE)
985 			pauseaccept = getmonotime();
986 		else if (errno != EWOULDBLOCK && errno != EINTR &&
987 		    errno != ECONNABORTED)
988 			log_warn("accept");
989 		return;
990 	}
991 
992 	p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
993 
994 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
995 		if (timer_running(&p->timers, Timer_IdleHold, NULL)) {
996 			/* fast reconnect after clear */
997 			p->passive = 1;
998 			bgp_fsm(p, EVNT_START);
999 		}
1000 	}
1001 
1002 	if (p != NULL &&
1003 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1004 		if (p->fd != -1) {
1005 			if (p->state == STATE_CONNECT)
1006 				session_close_connection(p);
1007 			else {
1008 				close(connfd);
1009 				return;
1010 			}
1011 		}
1012 
1013 open:
1014 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1015 			log_peer_warnx(&p->conf,
1016 			    "ipsec or md5sig configured but not available");
1017 			close(connfd);
1018 			return;
1019 		}
1020 
1021 		if (tcp_md5_check(connfd, p) == -1) {
1022 			close(connfd);
1023 			return;
1024 		}
1025 		p->fd = p->wbuf.fd = connfd;
1026 		if (session_setup_socket(p)) {
1027 			close(connfd);
1028 			return;
1029 		}
1030 		bgp_fsm(p, EVNT_CON_OPEN);
1031 		return;
1032 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1033 	    p->capa.neg.grestart.restart == 2) {
1034 		/* first do the graceful restart dance */
1035 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1036 		/* then do part of the open dance */
1037 		goto open;
1038 	} else {
1039 		log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1040 		close(connfd);
1041 	}
1042 }
1043 
1044 int
1045 session_connect(struct peer *peer)
1046 {
1047 	struct sockaddr		*sa;
1048 	struct bgpd_addr	*bind_addr = NULL;
1049 	socklen_t		 sa_len;
1050 
1051 	/*
1052 	 * we do not need the overcomplicated collision detection RFC 1771
1053 	 * describes; we simply make sure there is only ever one concurrent
1054 	 * tcp connection per peer.
1055 	 */
1056 	if (peer->fd != -1)
1057 		return (-1);
1058 
1059 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1060 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1061 		log_peer_warn(&peer->conf, "session_connect socket");
1062 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1063 		return (-1);
1064 	}
1065 
1066 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1067 		log_peer_warnx(&peer->conf,
1068 		    "ipsec or md5sig configured but not available");
1069 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1070 		return (-1);
1071 	}
1072 
1073 	tcp_md5_set(peer->fd, peer);
1074 	peer->wbuf.fd = peer->fd;
1075 
1076 	/* if local-address is set we need to bind() */
1077 	switch (peer->conf.remote_addr.aid) {
1078 	case AID_INET:
1079 		bind_addr = &peer->conf.local_addr_v4;
1080 		break;
1081 	case AID_INET6:
1082 		bind_addr = &peer->conf.local_addr_v6;
1083 		break;
1084 	}
1085 	if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) {
1086 		if (bind(peer->fd, sa, sa_len) == -1) {
1087 			log_peer_warn(&peer->conf, "session_connect bind");
1088 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1089 			return (-1);
1090 		}
1091 	}
1092 
1093 	if (session_setup_socket(peer)) {
1094 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1095 		return (-1);
1096 	}
1097 
1098 	sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len);
1099 	if (connect(peer->fd, sa, sa_len) == -1) {
1100 		if (errno != EINPROGRESS) {
1101 			if (errno != peer->lasterr)
1102 				log_peer_warn(&peer->conf, "connect");
1103 			peer->lasterr = errno;
1104 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1105 			return (-1);
1106 		}
1107 	} else
1108 		bgp_fsm(peer, EVNT_CON_OPEN);
1109 
1110 	return (0);
1111 }
1112 
1113 int
1114 session_setup_socket(struct peer *p)
1115 {
1116 	int	ttl = p->conf.distance;
1117 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1118 	int	nodelay = 1;
1119 	int	bsize;
1120 
1121 	switch (p->conf.remote_addr.aid) {
1122 	case AID_INET:
1123 		/* set precedence, see RFC 1771 appendix 5 */
1124 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1125 		    -1) {
1126 			log_peer_warn(&p->conf,
1127 			    "session_setup_socket setsockopt TOS");
1128 			return (-1);
1129 		}
1130 
1131 		if (p->conf.ebgp) {
1132 			/*
1133 			 * set TTL to foreign router's distance
1134 			 * 1=direct n=multihop with ttlsec, we always use 255
1135 			 */
1136 			if (p->conf.ttlsec) {
1137 				ttl = 256 - p->conf.distance;
1138 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1139 				    &ttl, sizeof(ttl)) == -1) {
1140 					log_peer_warn(&p->conf,
1141 					    "session_setup_socket: "
1142 					    "setsockopt MINTTL");
1143 					return (-1);
1144 				}
1145 				ttl = 255;
1146 			}
1147 
1148 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1149 			    sizeof(ttl)) == -1) {
1150 				log_peer_warn(&p->conf,
1151 				    "session_setup_socket setsockopt TTL");
1152 				return (-1);
1153 			}
1154 		}
1155 		break;
1156 	case AID_INET6:
1157 		if (p->conf.ebgp) {
1158 			/*
1159 			 * set hoplimit to foreign router's distance
1160 			 * 1=direct n=multihop with ttlsec, we always use 255
1161 			 */
1162 			if (p->conf.ttlsec) {
1163 				ttl = 256 - p->conf.distance;
1164 				if (setsockopt(p->fd, IPPROTO_IPV6,
1165 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1166 				    == -1) {
1167 					log_peer_warn(&p->conf,
1168 					    "session_setup_socket: "
1169 					    "setsockopt MINHOPCOUNT");
1170 					return (-1);
1171 				}
1172 				ttl = 255;
1173 			}
1174 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1175 			    &ttl, sizeof(ttl)) == -1) {
1176 				log_peer_warn(&p->conf,
1177 				    "session_setup_socket setsockopt hoplimit");
1178 				return (-1);
1179 			}
1180 		}
1181 		break;
1182 	}
1183 
1184 	/* set TCP_NODELAY */
1185 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1186 	    sizeof(nodelay)) == -1) {
1187 		log_peer_warn(&p->conf,
1188 		    "session_setup_socket setsockopt TCP_NODELAY");
1189 		return (-1);
1190 	}
1191 
1192 	/* limit bufsize. no biggie if it fails */
1193 	bsize = 65535;
1194 	while (bsize > 8192 && setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF,
1195 	    &bsize, sizeof(bsize)) == -1 && errno != EINVAL)
1196 		bsize /= 2;
1197 	bsize = 65535;
1198 	while (bsize > 8192 && setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF,
1199 	    &bsize, sizeof(bsize)) == -1 && errno != EINVAL)
1200 		bsize /= 2;
1201 
1202 	return (0);
1203 }
1204 
1205 /*
1206  * compare the bgpd_addr with the sockaddr by converting the latter into
1207  * a bgpd_addr. Return true if the two are equal, including any scope
1208  */
1209 static int
1210 sa_equal(struct bgpd_addr *ba, struct sockaddr *b)
1211 {
1212 	struct bgpd_addr bb;
1213 
1214 	sa2addr(b, &bb, NULL);
1215 	return (memcmp(ba, &bb, sizeof(*ba)) == 0);
1216 }
1217 
1218 static void
1219 get_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote,
1220     struct bgpd_addr *alt, unsigned int *scope)
1221 {
1222 	struct ifaddrs	*ifap, *ifa, *match;
1223 	int connected = 0;
1224 	u_int8_t plen;
1225 
1226 	if (getifaddrs(&ifap) == -1)
1227 		fatal("getifaddrs");
1228 
1229 	for (match = ifap; match != NULL; match = match->ifa_next) {
1230 		if (match->ifa_addr == NULL)
1231 			continue;
1232 		if (match->ifa_addr->sa_family != AF_INET &&
1233 		    match->ifa_addr->sa_family != AF_INET6)
1234 			continue;
1235 		if (sa_equal(local, match->ifa_addr)) {
1236 			if (match->ifa_flags & IFF_POINTOPOINT &&
1237 			    match->ifa_dstaddr) {
1238 				if (sa_equal(remote, match->ifa_dstaddr))
1239 					connected = 1;
1240 			} else if (match->ifa_netmask) {
1241 				plen = mask2prefixlen(
1242 				    match->ifa_addr->sa_family,
1243 				    match->ifa_netmask);
1244 				if (prefix_compare(local, remote, plen) == 0)
1245 					connected = 1;
1246 			}
1247 			break;
1248 		}
1249 	}
1250 
1251 	if (match == NULL) {
1252 		log_warnx("%s: local address not found", __func__);
1253 		return;
1254 	}
1255 	if (connected)
1256 		*scope = if_nametoindex(match->ifa_name);
1257 	else
1258 		*scope = 0;
1259 
1260 	switch (local->aid) {
1261 	case AID_INET6:
1262 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1263 			if (ifa->ifa_addr != NULL &&
1264 			    ifa->ifa_addr->sa_family == AF_INET &&
1265 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1266 				sa2addr(ifa->ifa_addr, alt, NULL);
1267 				break;
1268 			}
1269 		}
1270 		break;
1271 	case AID_INET:
1272 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1273 			if (ifa->ifa_addr != NULL &&
1274 			    ifa->ifa_addr->sa_family == AF_INET6 &&
1275 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1276 				struct sockaddr_in6 *s =
1277 				    (struct sockaddr_in6 *)ifa->ifa_addr;
1278 
1279 				/* only accept global scope addresses */
1280 				if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
1281 				    IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))
1282 					continue;
1283 				sa2addr(ifa->ifa_addr, alt, NULL);
1284 				break;
1285 			}
1286 		}
1287 		break;
1288 	default:
1289 		log_warnx("%s: unsupported address family %s", __func__,
1290 		    aid2str(local->aid));
1291 		break;
1292 	}
1293 
1294 	freeifaddrs(ifap);
1295 }
1296 
1297 void
1298 session_tcp_established(struct peer *peer)
1299 {
1300 	struct sockaddr_storage	ss;
1301 	socklen_t		len;
1302 
1303 	len = sizeof(ss);
1304 	if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1305 		log_warn("getsockname");
1306 	sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1307 	len = sizeof(ss);
1308 	if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1309 		log_warn("getpeername");
1310 	sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1311 
1312 	get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt,
1313 	    &peer->if_scope);
1314 }
1315 
1316 void
1317 session_capa_ann_none(struct peer *peer)
1318 {
1319 	memset(&peer->capa.ann, 0, sizeof(peer->capa.ann));
1320 }
1321 
1322 int
1323 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len)
1324 {
1325 	int errs = 0;
1326 
1327 	errs += ibuf_add_n8(opb, capa_code);
1328 	errs += ibuf_add_n8(opb, capa_len);
1329 	return (errs);
1330 }
1331 
1332 int
1333 session_capa_add_mp(struct ibuf *buf, uint8_t aid)
1334 {
1335 	uint16_t		 afi;
1336 	uint8_t			 safi;
1337 	int			 errs = 0;
1338 
1339 	if (aid2afi(aid, &afi, &safi) == -1) {
1340 		log_warn("%s: bad AID", __func__);
1341 		return (-1);
1342 	}
1343 
1344 	errs += ibuf_add_n16(buf, afi);
1345 	errs += ibuf_add_zero(buf, 1);
1346 	errs += ibuf_add_n8(buf, safi);
1347 
1348 	return (errs);
1349 }
1350 
1351 int
1352 session_capa_add_afi(struct peer *p, struct ibuf *b, uint8_t aid,
1353     uint8_t flags)
1354 {
1355 	u_int		errs = 0;
1356 	uint16_t	afi;
1357 	uint8_t		safi;
1358 
1359 	if (aid2afi(aid, &afi, &safi)) {
1360 		log_warn("%s: bad AID", __func__);
1361 		return (-1);
1362 	}
1363 
1364 	errs += ibuf_add_n16(b, afi);
1365 	errs += ibuf_add_n8(b, safi);
1366 	errs += ibuf_add_n8(b, flags);
1367 
1368 	return (errs);
1369 }
1370 
1371 struct bgp_msg *
1372 session_newmsg(enum msg_type msgtype, uint16_t len)
1373 {
1374 	u_char			 marker[MSGSIZE_HEADER_MARKER];
1375 	struct bgp_msg		*msg;
1376 	struct ibuf		*buf;
1377 	int			 errs = 0;
1378 
1379 	memset(marker, 0xff, sizeof(marker));
1380 
1381 	if ((buf = ibuf_open(len)) == NULL)
1382 		return (NULL);
1383 
1384 	errs += ibuf_add(buf, marker, sizeof(marker));
1385 	errs += ibuf_add_n16(buf, len);
1386 	errs += ibuf_add_n8(buf, msgtype);
1387 
1388 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1389 		ibuf_free(buf);
1390 		return (NULL);
1391 	}
1392 
1393 	msg->buf = buf;
1394 	msg->type = msgtype;
1395 	msg->len = len;
1396 
1397 	return (msg);
1398 }
1399 
1400 int
1401 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1402 {
1403 	struct mrt		*mrt;
1404 
1405 	LIST_FOREACH(mrt, &mrthead, entry) {
1406 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1407 		    mrt->type == MRT_UPDATE_OUT)))
1408 			continue;
1409 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1410 		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1411 		    mrt->group_id == p->conf.groupid))
1412 			mrt_dump_bgp_msg(mrt, ibuf_data(msg->buf), msg->len, p,
1413 			    msg->type);
1414 	}
1415 
1416 	ibuf_close(&p->wbuf, msg->buf);
1417 	if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) {
1418 		if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1419 			log_peer_warn(&p->conf, "imsg_compose XOFF");
1420 		else
1421 			p->throttled = 1;
1422 	}
1423 
1424 	free(msg);
1425 	return (0);
1426 }
1427 
1428 /*
1429  * Translate between internal roles and the value expected by RFC 9234.
1430  */
1431 static uint8_t
1432 role2capa(enum role role)
1433 {
1434 	switch (role) {
1435 	case ROLE_CUSTOMER:
1436 		return CAPA_ROLE_CUSTOMER;
1437 	case ROLE_PROVIDER:
1438 		return CAPA_ROLE_PROVIDER;
1439 	case ROLE_RS:
1440 		return CAPA_ROLE_RS;
1441 	case ROLE_RS_CLIENT:
1442 		return CAPA_ROLE_RS_CLIENT;
1443 	case ROLE_PEER:
1444 		return CAPA_ROLE_PEER;
1445 	default:
1446 		fatalx("Unsupported role for role capability");
1447 	}
1448 }
1449 
1450 static enum role
1451 capa2role(uint8_t val)
1452 {
1453 	switch (val) {
1454 	case CAPA_ROLE_PROVIDER:
1455 		return ROLE_PROVIDER;
1456 	case CAPA_ROLE_RS:
1457 		return ROLE_RS;
1458 	case CAPA_ROLE_RS_CLIENT:
1459 		return ROLE_RS_CLIENT;
1460 	case CAPA_ROLE_CUSTOMER:
1461 		return ROLE_CUSTOMER;
1462 	case CAPA_ROLE_PEER:
1463 		return ROLE_PEER;
1464 	default:
1465 		return ROLE_NONE;
1466 	}
1467 }
1468 
1469 void
1470 session_open(struct peer *p)
1471 {
1472 	struct bgp_msg		*buf;
1473 	struct ibuf		*opb;
1474 	size_t			 len, optparamlen;
1475 	uint16_t		 holdtime;
1476 	uint8_t			 i;
1477 	int			 errs = 0, extlen = 0;
1478 	int			 mpcapa = 0;
1479 
1480 
1481 	if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) {
1482 		bgp_fsm(p, EVNT_CON_FATAL);
1483 		return;
1484 	}
1485 
1486 	/* multiprotocol extensions, RFC 4760 */
1487 	for (i = 0; i < AID_MAX; i++)
1488 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1489 			errs += session_capa_add(opb, CAPA_MP, 4);
1490 			errs += session_capa_add_mp(opb, i);
1491 			mpcapa++;
1492 		}
1493 
1494 	/* route refresh, RFC 2918 */
1495 	if (p->capa.ann.refresh)	/* no data */
1496 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1497 
1498 	/* BGP open policy, RFC 9234, only for ebgp sessions */
1499 	if (p->conf.ebgp && p->capa.ann.policy &&
1500 	    p->conf.role != ROLE_NONE &&
1501 	    (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] ||
1502 	    mpcapa == 0)) {
1503 		errs += session_capa_add(opb, CAPA_ROLE, 1);
1504 		errs += ibuf_add_n8(opb, role2capa(p->conf.role));
1505 	}
1506 
1507 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1508 	if (p->capa.ann.grestart.restart) {
1509 		int		rst = 0;
1510 		uint16_t	hdr = 0;
1511 
1512 		for (i = 0; i < AID_MAX; i++) {
1513 			if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING)
1514 				rst++;
1515 		}
1516 
1517 		/* Only set the R-flag if no graceful restart is ongoing */
1518 		if (!rst)
1519 			hdr |= CAPA_GR_R_FLAG;
1520 		errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
1521 		errs += ibuf_add_n16(opb, hdr);
1522 	}
1523 
1524 	/* 4-bytes AS numbers, RFC6793 */
1525 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1526 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t));
1527 		errs += ibuf_add_n32(opb, p->conf.local_as);
1528 	}
1529 
1530 	/* advertisement of multiple paths, RFC7911 */
1531 	if (p->capa.ann.add_path[0]) {	/* variable */
1532 		uint8_t	aplen;
1533 
1534 		if (mpcapa)
1535 			aplen = 4 * mpcapa;
1536 		else	/* AID_INET */
1537 			aplen = 4;
1538 		errs += session_capa_add(opb, CAPA_ADD_PATH, aplen);
1539 		if (mpcapa) {
1540 			for (i = AID_MIN; i < AID_MAX; i++) {
1541 				if (p->capa.ann.mp[i]) {
1542 					errs += session_capa_add_afi(p, opb,
1543 					    i, p->capa.ann.add_path[i]);
1544 				}
1545 			}
1546 		} else {	/* AID_INET */
1547 			errs += session_capa_add_afi(p, opb, AID_INET,
1548 			    p->capa.ann.add_path[AID_INET]);
1549 		}
1550 	}
1551 
1552 	/* enhanced route-refresh, RFC7313 */
1553 	if (p->capa.ann.enhanced_rr)	/* no data */
1554 		errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0);
1555 
1556 	if (errs) {
1557 		ibuf_free(opb);
1558 		bgp_fsm(p, EVNT_CON_FATAL);
1559 		return;
1560 	}
1561 
1562 	optparamlen = ibuf_size(opb);
1563 	len = MSGSIZE_OPEN_MIN + optparamlen;
1564 	if (optparamlen == 0) {
1565 		/* nothing */
1566 	} else if (optparamlen + 2 >= 255) {
1567 		/* RFC9072: use 255 as magic size and request extra header */
1568 		optparamlen = 255;
1569 		extlen = 1;
1570 		/* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */
1571 		len += 2 * 3;
1572 	} else {
1573 		/* regular capabilities header */
1574 		optparamlen += 2;
1575 		len += 2;
1576 	}
1577 
1578 	if ((buf = session_newmsg(OPEN, len)) == NULL) {
1579 		ibuf_free(opb);
1580 		bgp_fsm(p, EVNT_CON_FATAL);
1581 		return;
1582 	}
1583 
1584 	if (p->conf.holdtime)
1585 		holdtime = p->conf.holdtime;
1586 	else
1587 		holdtime = conf->holdtime;
1588 
1589 	errs += ibuf_add_n8(buf->buf, 4);
1590 	errs += ibuf_add_n16(buf->buf, p->conf.local_short_as);
1591 	errs += ibuf_add_n16(buf->buf, holdtime);
1592 	/* is already in network byte order */
1593 	errs += ibuf_add(buf->buf, &conf->bgpid, sizeof(conf->bgpid));
1594 	errs += ibuf_add_n8(buf->buf, optparamlen);
1595 
1596 	if (extlen) {
1597 		/* RFC9072 extra header which spans over the capabilities hdr */
1598 		errs += ibuf_add_n8(buf->buf, OPT_PARAM_EXT_LEN);
1599 		errs += ibuf_add_n16(buf->buf, ibuf_size(opb) + 1 + 2);
1600 	}
1601 
1602 	if (optparamlen) {
1603 		errs += ibuf_add_n8(buf->buf, OPT_PARAM_CAPABILITIES);
1604 
1605 		if (extlen) {
1606 			/* RFC9072: 2-byte extended length */
1607 			errs += ibuf_add_n16(buf->buf, ibuf_size(opb));
1608 		} else {
1609 			errs += ibuf_add_n8(buf->buf, ibuf_size(opb));
1610 		}
1611 		errs += ibuf_add_buf(buf->buf, opb);
1612 	}
1613 
1614 	ibuf_free(opb);
1615 
1616 	if (errs) {
1617 		ibuf_free(buf->buf);
1618 		free(buf);
1619 		bgp_fsm(p, EVNT_CON_FATAL);
1620 		return;
1621 	}
1622 
1623 	if (session_sendmsg(buf, p) == -1) {
1624 		bgp_fsm(p, EVNT_CON_FATAL);
1625 		return;
1626 	}
1627 
1628 	p->stats.msg_sent_open++;
1629 }
1630 
1631 void
1632 session_keepalive(struct peer *p)
1633 {
1634 	struct bgp_msg		*buf;
1635 
1636 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1637 	    session_sendmsg(buf, p) == -1) {
1638 		bgp_fsm(p, EVNT_CON_FATAL);
1639 		return;
1640 	}
1641 
1642 	start_timer_keepalive(p);
1643 	p->stats.msg_sent_keepalive++;
1644 }
1645 
1646 void
1647 session_update(uint32_t peerid, void *data, size_t datalen)
1648 {
1649 	struct peer		*p;
1650 	struct bgp_msg		*buf;
1651 
1652 	if ((p = getpeerbyid(conf, peerid)) == NULL) {
1653 		log_warnx("no such peer: id=%u", peerid);
1654 		return;
1655 	}
1656 
1657 	if (p->state != STATE_ESTABLISHED)
1658 		return;
1659 
1660 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1661 		bgp_fsm(p, EVNT_CON_FATAL);
1662 		return;
1663 	}
1664 
1665 	if (ibuf_add(buf->buf, data, datalen)) {
1666 		ibuf_free(buf->buf);
1667 		free(buf);
1668 		bgp_fsm(p, EVNT_CON_FATAL);
1669 		return;
1670 	}
1671 
1672 	if (session_sendmsg(buf, p) == -1) {
1673 		bgp_fsm(p, EVNT_CON_FATAL);
1674 		return;
1675 	}
1676 
1677 	start_timer_keepalive(p);
1678 	p->stats.msg_sent_update++;
1679 }
1680 
1681 void
1682 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode,
1683     void *data, ssize_t datalen)
1684 {
1685 	struct bgp_msg		*buf;
1686 	int			 errs = 0;
1687 
1688 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1689 		return;
1690 
1691 	log_notification(p, errcode, subcode, data, datalen, "sending");
1692 
1693 	/* cap to maximum size */
1694 	if (datalen > MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) {
1695 		log_peer_warnx(&p->conf,
1696 		    "oversized notification, data trunkated");
1697 		datalen = MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN;
1698 	}
1699 
1700 	if ((buf = session_newmsg(NOTIFICATION,
1701 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1702 		bgp_fsm(p, EVNT_CON_FATAL);
1703 		return;
1704 	}
1705 
1706 	errs += ibuf_add_n8(buf->buf, errcode);
1707 	errs += ibuf_add_n8(buf->buf, subcode);
1708 
1709 	if (datalen > 0)
1710 		errs += ibuf_add(buf->buf, data, datalen);
1711 
1712 	if (errs) {
1713 		ibuf_free(buf->buf);
1714 		free(buf);
1715 		bgp_fsm(p, EVNT_CON_FATAL);
1716 		return;
1717 	}
1718 
1719 	if (session_sendmsg(buf, p) == -1) {
1720 		bgp_fsm(p, EVNT_CON_FATAL);
1721 		return;
1722 	}
1723 
1724 	p->stats.msg_sent_notification++;
1725 	p->stats.last_sent_errcode = errcode;
1726 	p->stats.last_sent_suberr = subcode;
1727 }
1728 
1729 int
1730 session_neighbor_rrefresh(struct peer *p)
1731 {
1732 	uint8_t	i;
1733 
1734 	if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr))
1735 		return (-1);
1736 
1737 	for (i = 0; i < AID_MAX; i++) {
1738 		if (p->capa.neg.mp[i] != 0)
1739 			session_rrefresh(p, i, ROUTE_REFRESH_REQUEST);
1740 	}
1741 
1742 	return (0);
1743 }
1744 
1745 void
1746 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype)
1747 {
1748 	struct bgp_msg		*buf;
1749 	int			 errs = 0;
1750 	uint16_t		 afi;
1751 	uint8_t			 safi;
1752 
1753 	switch (subtype) {
1754 	case ROUTE_REFRESH_REQUEST:
1755 		p->stats.refresh_sent_req++;
1756 		break;
1757 	case ROUTE_REFRESH_BEGIN_RR:
1758 	case ROUTE_REFRESH_END_RR:
1759 		/* requires enhanced route refresh */
1760 		if (!p->capa.neg.enhanced_rr)
1761 			return;
1762 		if (subtype == ROUTE_REFRESH_BEGIN_RR)
1763 			p->stats.refresh_sent_borr++;
1764 		else
1765 			p->stats.refresh_sent_eorr++;
1766 		break;
1767 	default:
1768 		fatalx("session_rrefresh: bad subtype %d", subtype);
1769 	}
1770 
1771 	if (aid2afi(aid, &afi, &safi) == -1)
1772 		fatalx("session_rrefresh: bad afi/safi pair");
1773 
1774 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1775 		bgp_fsm(p, EVNT_CON_FATAL);
1776 		return;
1777 	}
1778 
1779 	errs += ibuf_add_n16(buf->buf, afi);
1780 	errs += ibuf_add_n8(buf->buf, subtype);
1781 	errs += ibuf_add_n8(buf->buf, safi);
1782 
1783 	if (errs) {
1784 		ibuf_free(buf->buf);
1785 		free(buf);
1786 		bgp_fsm(p, EVNT_CON_FATAL);
1787 		return;
1788 	}
1789 
1790 	if (session_sendmsg(buf, p) == -1) {
1791 		bgp_fsm(p, EVNT_CON_FATAL);
1792 		return;
1793 	}
1794 
1795 	p->stats.msg_sent_rrefresh++;
1796 }
1797 
1798 int
1799 session_graceful_restart(struct peer *p)
1800 {
1801 	uint8_t	i;
1802 
1803 	timer_set(&p->timers, Timer_RestartTimeout,
1804 	    p->capa.neg.grestart.timeout);
1805 
1806 	for (i = 0; i < AID_MAX; i++) {
1807 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1808 			if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1809 			    &i, sizeof(i)) == -1)
1810 				return (-1);
1811 			log_peer_warnx(&p->conf,
1812 			    "graceful restart of %s, keeping routes",
1813 			    aid2str(i));
1814 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1815 		} else if (p->capa.neg.mp[i]) {
1816 			if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id,
1817 			    &i, sizeof(i)) == -1)
1818 				return (-1);
1819 			log_peer_warnx(&p->conf,
1820 			    "graceful restart of %s, flushing routes",
1821 			    aid2str(i));
1822 		}
1823 	}
1824 	return (0);
1825 }
1826 
1827 int
1828 session_graceful_stop(struct peer *p)
1829 {
1830 	uint8_t	i;
1831 
1832 	for (i = 0; i < AID_MAX; i++) {
1833 		/*
1834 		 * Only flush if the peer is restarting and the timeout fired.
1835 		 * In all other cases the session was already flushed when the
1836 		 * session went down or when the new open message was parsed.
1837 		 */
1838 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1839 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1840 			    "time-out, flushing", aid2str(i));
1841 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1842 			    &i, sizeof(i)) == -1)
1843 				return (-1);
1844 		}
1845 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1846 	}
1847 	return (0);
1848 }
1849 
1850 int
1851 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1852 {
1853 	ssize_t		n;
1854 	socklen_t	len;
1855 	int		error;
1856 
1857 	if (p->state == STATE_CONNECT) {
1858 		if (pfd->revents & POLLOUT) {
1859 			if (pfd->revents & POLLIN) {
1860 				/* error occurred */
1861 				len = sizeof(error);
1862 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1863 				    &error, &len) == -1 || error) {
1864 					if (error)
1865 						errno = error;
1866 					if (errno != p->lasterr) {
1867 						log_peer_warn(&p->conf,
1868 						    "socket error");
1869 						p->lasterr = errno;
1870 					}
1871 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1872 					return (1);
1873 				}
1874 			}
1875 			bgp_fsm(p, EVNT_CON_OPEN);
1876 			return (1);
1877 		}
1878 		if (pfd->revents & POLLHUP) {
1879 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1880 			return (1);
1881 		}
1882 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1883 			bgp_fsm(p, EVNT_CON_FATAL);
1884 			return (1);
1885 		}
1886 		return (0);
1887 	}
1888 
1889 	if (pfd->revents & POLLHUP) {
1890 		bgp_fsm(p, EVNT_CON_CLOSED);
1891 		return (1);
1892 	}
1893 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1894 		bgp_fsm(p, EVNT_CON_FATAL);
1895 		return (1);
1896 	}
1897 
1898 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1899 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1900 			if (error == 0)
1901 				log_peer_warnx(&p->conf, "Connection closed");
1902 			else if (error == -1)
1903 				log_peer_warn(&p->conf, "write error");
1904 			bgp_fsm(p, EVNT_CON_FATAL);
1905 			return (1);
1906 		}
1907 		p->stats.last_write = getmonotime();
1908 		if (p->holdtime > 0)
1909 			timer_set(&p->timers, Timer_SendHold,
1910 			    p->holdtime < INTERVAL_HOLD ? INTERVAL_HOLD :
1911 			    p->holdtime);
1912 		if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
1913 			if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
1914 				log_peer_warn(&p->conf, "imsg_compose XON");
1915 			else
1916 				p->throttled = 0;
1917 		}
1918 		if (!(pfd->revents & POLLIN))
1919 			return (1);
1920 	}
1921 
1922 	if (p->rbuf && pfd->revents & POLLIN) {
1923 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1924 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1925 			if (errno != EINTR && errno != EAGAIN) {
1926 				log_peer_warn(&p->conf, "read error");
1927 				bgp_fsm(p, EVNT_CON_FATAL);
1928 			}
1929 			return (1);
1930 		}
1931 		if (n == 0) {	/* connection closed */
1932 			bgp_fsm(p, EVNT_CON_CLOSED);
1933 			return (1);
1934 		}
1935 
1936 		p->rbuf->wpos += n;
1937 		p->stats.last_read = getmonotime();
1938 		return (1);
1939 	}
1940 	return (0);
1941 }
1942 
1943 void
1944 session_process_msg(struct peer *p)
1945 {
1946 	struct mrt	*mrt;
1947 	ssize_t		rpos, av, left;
1948 	int		processed = 0;
1949 	uint16_t	msglen;
1950 	uint8_t		msgtype;
1951 
1952 	rpos = 0;
1953 	av = p->rbuf->wpos;
1954 	p->rpending = 0;
1955 
1956 	/*
1957 	 * session might drop to IDLE -> buffers deallocated
1958 	 * we MUST check rbuf != NULL before use
1959 	 */
1960 	for (;;) {
1961 		if (p->rbuf == NULL)
1962 			return;
1963 		if (rpos + MSGSIZE_HEADER > av)
1964 			break;
1965 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1966 		    &msgtype) == -1)
1967 			return;
1968 		if (rpos + msglen > av)
1969 			break;
1970 		p->rbuf->rptr = p->rbuf->buf + rpos;
1971 
1972 		/* dump to MRT as soon as we have a full packet */
1973 		LIST_FOREACH(mrt, &mrthead, entry) {
1974 			if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE &&
1975 			    mrt->type == MRT_UPDATE_IN)))
1976 				continue;
1977 			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1978 			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1979 			    mrt->group_id == p->conf.groupid))
1980 				mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p,
1981 				    msgtype);
1982 		}
1983 
1984 		switch (msgtype) {
1985 		case OPEN:
1986 			bgp_fsm(p, EVNT_RCVD_OPEN);
1987 			p->stats.msg_rcvd_open++;
1988 			break;
1989 		case UPDATE:
1990 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1991 			p->stats.msg_rcvd_update++;
1992 			break;
1993 		case NOTIFICATION:
1994 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1995 			p->stats.msg_rcvd_notification++;
1996 			break;
1997 		case KEEPALIVE:
1998 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1999 			p->stats.msg_rcvd_keepalive++;
2000 			break;
2001 		case RREFRESH:
2002 			parse_rrefresh(p);
2003 			p->stats.msg_rcvd_rrefresh++;
2004 			break;
2005 		default:	/* cannot happen */
2006 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
2007 			    &msgtype, 1);
2008 			log_warnx("received message with unknown type %u",
2009 			    msgtype);
2010 			bgp_fsm(p, EVNT_CON_FATAL);
2011 		}
2012 		rpos += msglen;
2013 		if (++processed > MSG_PROCESS_LIMIT) {
2014 			p->rpending = 1;
2015 			break;
2016 		}
2017 	}
2018 
2019 	if (p->rbuf == NULL)
2020 		return;
2021 	if (rpos < av) {
2022 		left = av - rpos;
2023 		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
2024 		p->rbuf->wpos = left;
2025 	} else
2026 		p->rbuf->wpos = 0;
2027 }
2028 
2029 int
2030 parse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type)
2031 {
2032 	u_char			*p;
2033 	uint16_t		 olen;
2034 	static const uint8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
2035 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
2036 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2037 
2038 	/* caller MUST make sure we are getting 19 bytes! */
2039 	p = data;
2040 	if (memcmp(p, marker, sizeof(marker))) {
2041 		log_peer_warnx(&peer->conf, "sync error");
2042 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
2043 		bgp_fsm(peer, EVNT_CON_FATAL);
2044 		return (-1);
2045 	}
2046 	p += MSGSIZE_HEADER_MARKER;
2047 
2048 	memcpy(&olen, p, 2);
2049 	*len = ntohs(olen);
2050 	p += 2;
2051 	memcpy(type, p, 1);
2052 
2053 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
2054 		log_peer_warnx(&peer->conf,
2055 		    "received message: illegal length: %u byte", *len);
2056 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2057 		    &olen, sizeof(olen));
2058 		bgp_fsm(peer, EVNT_CON_FATAL);
2059 		return (-1);
2060 	}
2061 
2062 	switch (*type) {
2063 	case OPEN:
2064 		if (*len < MSGSIZE_OPEN_MIN) {
2065 			log_peer_warnx(&peer->conf,
2066 			    "received OPEN: illegal len: %u byte", *len);
2067 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2068 			    &olen, sizeof(olen));
2069 			bgp_fsm(peer, EVNT_CON_FATAL);
2070 			return (-1);
2071 		}
2072 		break;
2073 	case NOTIFICATION:
2074 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
2075 			log_peer_warnx(&peer->conf,
2076 			    "received NOTIFICATION: illegal len: %u byte",
2077 			    *len);
2078 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2079 			    &olen, sizeof(olen));
2080 			bgp_fsm(peer, EVNT_CON_FATAL);
2081 			return (-1);
2082 		}
2083 		break;
2084 	case UPDATE:
2085 		if (*len < MSGSIZE_UPDATE_MIN) {
2086 			log_peer_warnx(&peer->conf,
2087 			    "received UPDATE: illegal len: %u byte", *len);
2088 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2089 			    &olen, sizeof(olen));
2090 			bgp_fsm(peer, EVNT_CON_FATAL);
2091 			return (-1);
2092 		}
2093 		break;
2094 	case KEEPALIVE:
2095 		if (*len != MSGSIZE_KEEPALIVE) {
2096 			log_peer_warnx(&peer->conf,
2097 			    "received KEEPALIVE: illegal len: %u byte", *len);
2098 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2099 			    &olen, sizeof(olen));
2100 			bgp_fsm(peer, EVNT_CON_FATAL);
2101 			return (-1);
2102 		}
2103 		break;
2104 	case RREFRESH:
2105 		if (*len < MSGSIZE_RREFRESH_MIN) {
2106 			log_peer_warnx(&peer->conf,
2107 			    "received RREFRESH: illegal len: %u byte", *len);
2108 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2109 			    &olen, sizeof(olen));
2110 			bgp_fsm(peer, EVNT_CON_FATAL);
2111 			return (-1);
2112 		}
2113 		break;
2114 	default:
2115 		log_peer_warnx(&peer->conf,
2116 		    "received msg with unknown type %u", *type);
2117 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
2118 		    type, 1);
2119 		bgp_fsm(peer, EVNT_CON_FATAL);
2120 		return (-1);
2121 	}
2122 	return (0);
2123 }
2124 
2125 int
2126 parse_open(struct peer *peer)
2127 {
2128 	u_char		*p, *op_val;
2129 	uint8_t		 version, rversion;
2130 	uint16_t	 short_as, msglen;
2131 	uint16_t	 holdtime, oholdtime, myholdtime;
2132 	uint32_t	 as, bgpid;
2133 	uint16_t	 optparamlen, extlen, plen, op_len;
2134 	uint8_t		 op_type, suberr = 0;
2135 
2136 	p = peer->rbuf->rptr;
2137 	p += MSGSIZE_HEADER_MARKER;
2138 	memcpy(&msglen, p, sizeof(msglen));
2139 	msglen = ntohs(msglen);
2140 
2141 	p = peer->rbuf->rptr;
2142 	p += MSGSIZE_HEADER;	/* header is already checked */
2143 
2144 	memcpy(&version, p, sizeof(version));
2145 	p += sizeof(version);
2146 
2147 	if (version != BGP_VERSION) {
2148 		log_peer_warnx(&peer->conf,
2149 		    "peer wants unrecognized version %u", version);
2150 		if (version > BGP_VERSION)
2151 			rversion = version - BGP_VERSION;
2152 		else
2153 			rversion = BGP_VERSION;
2154 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
2155 		    &rversion, sizeof(rversion));
2156 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2157 		return (-1);
2158 	}
2159 
2160 	memcpy(&short_as, p, sizeof(short_as));
2161 	p += sizeof(short_as);
2162 	as = peer->short_as = ntohs(short_as);
2163 	if (as == 0) {
2164 		log_peer_warnx(&peer->conf,
2165 		    "peer requests unacceptable AS %u", as);
2166 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS,
2167 		    NULL, 0);
2168 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2169 		return (-1);
2170 	}
2171 
2172 	memcpy(&oholdtime, p, sizeof(oholdtime));
2173 	p += sizeof(oholdtime);
2174 
2175 	holdtime = ntohs(oholdtime);
2176 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2177 		log_peer_warnx(&peer->conf,
2178 		    "peer requests unacceptable holdtime %u", holdtime);
2179 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
2180 		    NULL, 0);
2181 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2182 		return (-1);
2183 	}
2184 
2185 	myholdtime = peer->conf.holdtime;
2186 	if (!myholdtime)
2187 		myholdtime = conf->holdtime;
2188 	if (holdtime < myholdtime)
2189 		peer->holdtime = holdtime;
2190 	else
2191 		peer->holdtime = myholdtime;
2192 
2193 	memcpy(&bgpid, p, sizeof(bgpid));
2194 	p += sizeof(bgpid);
2195 
2196 	/* check bgpid for validity - just disallow 0 */
2197 	if (ntohl(bgpid) == 0) {
2198 		log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable",
2199 		    ntohl(bgpid));
2200 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2201 		    NULL, 0);
2202 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2203 		return (-1);
2204 	}
2205 	peer->remote_bgpid = bgpid;
2206 
2207 	extlen = 0;
2208 	optparamlen = *p++;
2209 
2210 	if (optparamlen == 0) {
2211 		if (msglen != MSGSIZE_OPEN_MIN) {
2212 bad_len:
2213 			log_peer_warnx(&peer->conf,
2214 			    "corrupt OPEN message received: length mismatch");
2215 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2216 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2217 			return (-1);
2218 		}
2219 	} else {
2220 		if (msglen < MSGSIZE_OPEN_MIN + 1)
2221 			goto bad_len;
2222 
2223 		op_type = *p;
2224 		if (op_type == OPT_PARAM_EXT_LEN) {
2225 			p++;
2226 			memcpy(&optparamlen, p, sizeof(optparamlen));
2227 			optparamlen = ntohs(optparamlen);
2228 			p += sizeof(optparamlen);
2229 			extlen = 1;
2230 		}
2231 
2232 		/* RFC9020 encoding has 3 extra bytes */
2233 		if (optparamlen + 3 * extlen != msglen - MSGSIZE_OPEN_MIN)
2234 			goto bad_len;
2235 	}
2236 
2237 	plen = optparamlen;
2238 	while (plen > 0) {
2239 		if (plen < 2 + extlen)
2240 			goto bad_len;
2241 
2242 		memcpy(&op_type, p, sizeof(op_type));
2243 		p += sizeof(op_type);
2244 		plen -= sizeof(op_type);
2245 		if (!extlen) {
2246 			op_len = *p++;
2247 			plen--;
2248 		} else {
2249 			memcpy(&op_len, p, sizeof(op_len));
2250 			op_len = ntohs(op_len);
2251 			p += sizeof(op_len);
2252 			plen -= sizeof(op_len);
2253 		}
2254 		if (op_len > 0) {
2255 			if (plen < op_len)
2256 				goto bad_len;
2257 			op_val = p;
2258 			p += op_len;
2259 			plen -= op_len;
2260 		} else
2261 			op_val = NULL;
2262 
2263 		switch (op_type) {
2264 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2265 			if (parse_capabilities(peer, op_val, op_len,
2266 			    &as) == -1) {
2267 				session_notification(peer, ERR_OPEN, 0,
2268 				    NULL, 0);
2269 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2270 				return (-1);
2271 			}
2272 			break;
2273 		case OPT_PARAM_AUTH:			/* deprecated */
2274 		default:
2275 			/*
2276 			 * unsupported type
2277 			 * the RFCs tell us to leave the data section empty
2278 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2279 			 * How the peer should know _which_ optional parameter
2280 			 * we don't support is beyond me.
2281 			 */
2282 			log_peer_warnx(&peer->conf,
2283 			    "received OPEN message with unsupported optional "
2284 			    "parameter: type %u", op_type);
2285 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2286 				NULL, 0);
2287 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2288 			/* no punish */
2289 			timer_set(&peer->timers, Timer_IdleHold, 0);
2290 			peer->IdleHoldTime /= 2;
2291 			return (-1);
2292 		}
2293 	}
2294 
2295 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2296 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2297 		peer->conf.remote_as = as;
2298 		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2299 		if (!peer->conf.ebgp)
2300 			/* force enforce_as off for iBGP sessions */
2301 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2302 	}
2303 
2304 	if (peer->conf.remote_as != as) {
2305 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2306 		    log_as(as));
2307 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2308 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2309 		return (-1);
2310 	}
2311 
2312 	/* on iBGP sessions check for bgpid collision */
2313 	if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) {
2314 		log_peer_warnx(&peer->conf, "peer BGPID %u conflicts with ours",
2315 		    ntohl(bgpid));
2316 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2317 		    NULL, 0);
2318 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2319 		return (-1);
2320 	}
2321 
2322 	if (capa_neg_calc(peer, &suberr) == -1) {
2323 		session_notification(peer, ERR_OPEN, suberr, NULL, 0);
2324 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2325 		return (-1);
2326 	}
2327 
2328 	return (0);
2329 }
2330 
2331 int
2332 parse_update(struct peer *peer)
2333 {
2334 	u_char		*p;
2335 	uint16_t	 datalen;
2336 
2337 	/*
2338 	 * we pass the message verbatim to the rde.
2339 	 * in case of errors the whole session is reset with a
2340 	 * notification anyway, we only need to know the peer
2341 	 */
2342 	p = peer->rbuf->rptr;
2343 	p += MSGSIZE_HEADER_MARKER;
2344 	memcpy(&datalen, p, sizeof(datalen));
2345 	datalen = ntohs(datalen);
2346 
2347 	p = peer->rbuf->rptr;
2348 	p += MSGSIZE_HEADER;	/* header is already checked */
2349 	datalen -= MSGSIZE_HEADER;
2350 
2351 	if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1)
2352 		return (-1);
2353 
2354 	return (0);
2355 }
2356 
2357 int
2358 parse_rrefresh(struct peer *peer)
2359 {
2360 	struct route_refresh rr;
2361 	uint16_t afi, datalen;
2362 	uint8_t aid, safi, subtype;
2363 	u_char *p;
2364 
2365 	p = peer->rbuf->rptr;
2366 	p += MSGSIZE_HEADER_MARKER;
2367 	memcpy(&datalen, p, sizeof(datalen));
2368 	datalen = ntohs(datalen);
2369 
2370 	p = peer->rbuf->rptr;
2371 	p += MSGSIZE_HEADER;	/* header is already checked */
2372 
2373 	/*
2374 	 * We could check if we actually announced the capability but
2375 	 * as long as the message is correctly encoded we don't care.
2376 	 */
2377 
2378 	/* afi, 2 byte */
2379 	memcpy(&afi, p, sizeof(afi));
2380 	afi = ntohs(afi);
2381 	p += 2;
2382 	/* subtype, 1 byte */
2383 	subtype = *p;
2384 	p += 1;
2385 	/* safi, 1 byte */
2386 	safi = *p;
2387 
2388 	/* check subtype if peer announced enhanced route refresh */
2389 	if (peer->capa.neg.enhanced_rr) {
2390 		switch (subtype) {
2391 		case ROUTE_REFRESH_REQUEST:
2392 			/* no ORF support, so no oversized RREFRESH msgs */
2393 			if (datalen != MSGSIZE_RREFRESH) {
2394 				log_peer_warnx(&peer->conf,
2395 				    "received RREFRESH: illegal len: %u byte",
2396 				    datalen);
2397 				datalen = htons(datalen);
2398 				session_notification(peer, ERR_HEADER,
2399 				    ERR_HDR_LEN, &datalen, sizeof(datalen));
2400 				bgp_fsm(peer, EVNT_CON_FATAL);
2401 				return (-1);
2402 			}
2403 			peer->stats.refresh_rcvd_req++;
2404 			break;
2405 		case ROUTE_REFRESH_BEGIN_RR:
2406 		case ROUTE_REFRESH_END_RR:
2407 			/* special handling for RFC7313 */
2408 			if (datalen != MSGSIZE_RREFRESH) {
2409 				log_peer_warnx(&peer->conf,
2410 				    "received RREFRESH: illegal len: %u byte",
2411 				    datalen);
2412 				p = peer->rbuf->rptr;
2413 				p += MSGSIZE_HEADER;
2414 				datalen -= MSGSIZE_HEADER;
2415 				session_notification(peer, ERR_RREFRESH,
2416 				    ERR_RR_INV_LEN, p, datalen);
2417 				bgp_fsm(peer, EVNT_CON_FATAL);
2418 				return (-1);
2419 			}
2420 			if (subtype == ROUTE_REFRESH_BEGIN_RR)
2421 				peer->stats.refresh_rcvd_borr++;
2422 			else
2423 				peer->stats.refresh_rcvd_eorr++;
2424 			break;
2425 		default:
2426 			log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2427 			    "bad subtype %d", subtype);
2428 			return (0);
2429 		}
2430 	} else {
2431 		/* force subtype to default */
2432 		subtype = ROUTE_REFRESH_REQUEST;
2433 		peer->stats.refresh_rcvd_req++;
2434 	}
2435 
2436 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2437 	if (afi2aid(afi, safi, &aid) == -1) {
2438 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2439 		    "invalid afi/safi pair");
2440 		return (0);
2441 	}
2442 
2443 	if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) {
2444 		log_peer_warnx(&peer->conf, "peer sent unexpected refresh");
2445 		return (0);
2446 	}
2447 
2448 	rr.aid = aid;
2449 	rr.subtype = subtype;
2450 
2451 	if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1)
2452 		return (-1);
2453 
2454 	return (0);
2455 }
2456 
2457 int
2458 parse_notification(struct peer *peer)
2459 {
2460 	u_char		*p;
2461 	uint16_t	 datalen;
2462 	uint8_t		 errcode;
2463 	uint8_t		 subcode;
2464 	uint8_t		 capa_code;
2465 	uint8_t		 capa_len;
2466 	size_t		 reason_len;
2467 	uint8_t		 i;
2468 
2469 	/* just log */
2470 	p = peer->rbuf->rptr;
2471 	p += MSGSIZE_HEADER_MARKER;
2472 	memcpy(&datalen, p, sizeof(datalen));
2473 	datalen = ntohs(datalen);
2474 
2475 	p = peer->rbuf->rptr;
2476 	p += MSGSIZE_HEADER;	/* header is already checked */
2477 	datalen -= MSGSIZE_HEADER;
2478 
2479 	memcpy(&errcode, p, sizeof(errcode));
2480 	p += sizeof(errcode);
2481 	datalen -= sizeof(errcode);
2482 
2483 	memcpy(&subcode, p, sizeof(subcode));
2484 	p += sizeof(subcode);
2485 	datalen -= sizeof(subcode);
2486 
2487 	log_notification(peer, errcode, subcode, p, datalen, "received");
2488 	peer->errcnt++;
2489 	peer->stats.last_rcvd_errcode = errcode;
2490 	peer->stats.last_rcvd_suberr = subcode;
2491 
2492 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2493 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2494 			log_peer_warnx(&peer->conf, "received \"unsupported "
2495 			    "capability\" notification without data part, "
2496 			    "disabling capability announcements altogether");
2497 			session_capa_ann_none(peer);
2498 		}
2499 
2500 		while (datalen > 0) {
2501 			if (datalen < 2) {
2502 				log_peer_warnx(&peer->conf,
2503 				    "parse_notification: "
2504 				    "expect len >= 2, len is %u", datalen);
2505 				return (-1);
2506 			}
2507 			memcpy(&capa_code, p, sizeof(capa_code));
2508 			p += sizeof(capa_code);
2509 			datalen -= sizeof(capa_code);
2510 			memcpy(&capa_len, p, sizeof(capa_len));
2511 			p += sizeof(capa_len);
2512 			datalen -= sizeof(capa_len);
2513 			if (datalen < capa_len) {
2514 				log_peer_warnx(&peer->conf,
2515 				    "parse_notification: capa_len %u exceeds "
2516 				    "remaining msg length %u", capa_len,
2517 				    datalen);
2518 				return (-1);
2519 			}
2520 			p += capa_len;
2521 			datalen -= capa_len;
2522 			switch (capa_code) {
2523 			case CAPA_MP:
2524 				for (i = 0; i < AID_MAX; i++)
2525 					peer->capa.ann.mp[i] = 0;
2526 				log_peer_warnx(&peer->conf,
2527 				    "disabling multiprotocol capability");
2528 				break;
2529 			case CAPA_REFRESH:
2530 				peer->capa.ann.refresh = 0;
2531 				log_peer_warnx(&peer->conf,
2532 				    "disabling route refresh capability");
2533 				break;
2534 			case CAPA_ROLE:
2535 				if (peer->capa.ann.policy == 1) {
2536 					peer->capa.ann.policy = 0;
2537 					log_peer_warnx(&peer->conf,
2538 					    "disabling role capability");
2539 				} else {
2540 					log_peer_warnx(&peer->conf,
2541 					    "role capability enforced, "
2542 					    "not disabling");
2543 				}
2544 				break;
2545 			case CAPA_RESTART:
2546 				peer->capa.ann.grestart.restart = 0;
2547 				log_peer_warnx(&peer->conf,
2548 				    "disabling restart capability");
2549 				break;
2550 			case CAPA_AS4BYTE:
2551 				peer->capa.ann.as4byte = 0;
2552 				log_peer_warnx(&peer->conf,
2553 				    "disabling 4-byte AS num capability");
2554 				break;
2555 			case CAPA_ADD_PATH:
2556 				memset(peer->capa.ann.add_path, 0,
2557 				    sizeof(peer->capa.ann.add_path));
2558 				log_peer_warnx(&peer->conf,
2559 				    "disabling ADD-PATH capability");
2560 				break;
2561 			case CAPA_ENHANCED_RR:
2562 				peer->capa.ann.enhanced_rr = 0;
2563 				log_peer_warnx(&peer->conf,
2564 				    "disabling enhanced route refresh "
2565 				    "capability");
2566 				break;
2567 			default:	/* should not happen... */
2568 				log_peer_warnx(&peer->conf, "received "
2569 				    "\"unsupported capability\" notification "
2570 				    "for unknown capability %u, disabling "
2571 				    "capability announcements altogether",
2572 				    capa_code);
2573 				session_capa_ann_none(peer);
2574 				break;
2575 			}
2576 		}
2577 
2578 		return (1);
2579 	}
2580 
2581 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2582 		session_capa_ann_none(peer);
2583 		return (1);
2584 	}
2585 
2586 	if (errcode == ERR_CEASE &&
2587 	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2588 	     subcode == ERR_CEASE_ADMIN_RESET)) {
2589 		if (datalen > 1) {
2590 			reason_len = *p++;
2591 			datalen--;
2592 			if (datalen < reason_len) {
2593 				log_peer_warnx(&peer->conf,
2594 				    "received truncated shutdown reason");
2595 				return (0);
2596 			}
2597 			if (reason_len > REASON_LEN - 1) {
2598 				log_peer_warnx(&peer->conf,
2599 				    "received overly long shutdown reason");
2600 				return (0);
2601 			}
2602 			memcpy(peer->stats.last_reason, p, reason_len);
2603 			peer->stats.last_reason[reason_len] = '\0';
2604 			log_peer_warnx(&peer->conf,
2605 			    "received shutdown reason: \"%s\"",
2606 			    log_reason(peer->stats.last_reason));
2607 			p += reason_len;
2608 			datalen -= reason_len;
2609 		}
2610 	}
2611 
2612 	return (0);
2613 }
2614 
2615 int
2616 parse_capabilities(struct peer *peer, u_char *d, uint16_t dlen, uint32_t *as)
2617 {
2618 	u_char		*capa_val;
2619 	uint32_t	 remote_as;
2620 	uint16_t	 len;
2621 	uint16_t	 afi;
2622 	uint16_t	 gr_header;
2623 	uint8_t		 safi;
2624 	uint8_t		 aid;
2625 	uint8_t		 flags;
2626 	uint8_t		 capa_code;
2627 	uint8_t		 capa_len;
2628 	uint8_t		 i;
2629 
2630 	len = dlen;
2631 	while (len > 0) {
2632 		if (len < 2) {
2633 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2634 			    "length: %u, too short", len);
2635 			return (-1);
2636 		}
2637 		memcpy(&capa_code, d, sizeof(capa_code));
2638 		d += sizeof(capa_code);
2639 		len -= sizeof(capa_code);
2640 		memcpy(&capa_len, d, sizeof(capa_len));
2641 		d += sizeof(capa_len);
2642 		len -= sizeof(capa_len);
2643 		if (capa_len > 0) {
2644 			if (len < capa_len) {
2645 				log_peer_warnx(&peer->conf,
2646 				    "Bad capabilities attr length: "
2647 				    "len %u smaller than capa_len %u",
2648 				    len, capa_len);
2649 				return (-1);
2650 			}
2651 			capa_val = d;
2652 			d += capa_len;
2653 			len -= capa_len;
2654 		} else
2655 			capa_val = NULL;
2656 
2657 		switch (capa_code) {
2658 		case CAPA_MP:			/* RFC 4760 */
2659 			if (capa_len != 4) {
2660 				log_peer_warnx(&peer->conf,
2661 				    "Bad multi protocol capability length: "
2662 				    "%u", capa_len);
2663 				break;
2664 			}
2665 			memcpy(&afi, capa_val, sizeof(afi));
2666 			afi = ntohs(afi);
2667 			memcpy(&safi, capa_val + 3, sizeof(safi));
2668 			if (afi2aid(afi, safi, &aid) == -1) {
2669 				log_peer_warnx(&peer->conf,
2670 				    "Received multi protocol capability: "
2671 				    " unknown AFI %u, safi %u pair",
2672 				    afi, safi);
2673 				break;
2674 			}
2675 			peer->capa.peer.mp[aid] = 1;
2676 			break;
2677 		case CAPA_REFRESH:
2678 			peer->capa.peer.refresh = 1;
2679 			break;
2680 		case CAPA_ROLE:
2681 			if (capa_len != 1) {
2682 				log_peer_warnx(&peer->conf,
2683 				    "Bad role capability length: %u", capa_len);
2684 				break;
2685 			}
2686 			if (!peer->conf.ebgp) {
2687 				log_peer_warnx(&peer->conf,
2688 				    "Received role capability on iBGP session");
2689 				break;
2690 			}
2691 			peer->capa.peer.policy = 1;
2692 			peer->remote_role = capa2role(*capa_val);
2693 			break;
2694 		case CAPA_RESTART:
2695 			if (capa_len == 2) {
2696 				/* peer only supports EoR marker */
2697 				peer->capa.peer.grestart.restart = 1;
2698 				peer->capa.peer.grestart.timeout = 0;
2699 				break;
2700 			} else if (capa_len % 4 != 2) {
2701 				log_peer_warnx(&peer->conf,
2702 				    "Bad graceful restart capability length: "
2703 				    "%u", capa_len);
2704 				peer->capa.peer.grestart.restart = 0;
2705 				peer->capa.peer.grestart.timeout = 0;
2706 				break;
2707 			}
2708 
2709 			memcpy(&gr_header, capa_val, sizeof(gr_header));
2710 			gr_header = ntohs(gr_header);
2711 			peer->capa.peer.grestart.timeout =
2712 			    gr_header & CAPA_GR_TIMEMASK;
2713 			if (peer->capa.peer.grestart.timeout == 0) {
2714 				log_peer_warnx(&peer->conf, "Received "
2715 				    "graceful restart timeout is zero");
2716 				peer->capa.peer.grestart.restart = 0;
2717 				break;
2718 			}
2719 
2720 			for (i = 2; i <= capa_len - 4; i += 4) {
2721 				memcpy(&afi, capa_val + i, sizeof(afi));
2722 				afi = ntohs(afi);
2723 				safi = capa_val[i + 2];
2724 				flags = capa_val[i + 3];
2725 				if (afi2aid(afi, safi, &aid) == -1) {
2726 					log_peer_warnx(&peer->conf,
2727 					    "Received graceful restart capa: "
2728 					    " unknown AFI %u, safi %u pair",
2729 					    afi, safi);
2730 					continue;
2731 				}
2732 				peer->capa.peer.grestart.flags[aid] |=
2733 				    CAPA_GR_PRESENT;
2734 				if (flags & CAPA_GR_F_FLAG)
2735 					peer->capa.peer.grestart.flags[aid] |=
2736 					    CAPA_GR_FORWARD;
2737 				if (gr_header & CAPA_GR_R_FLAG)
2738 					peer->capa.peer.grestart.flags[aid] |=
2739 					    CAPA_GR_RESTART;
2740 				peer->capa.peer.grestart.restart = 2;
2741 			}
2742 			break;
2743 		case CAPA_AS4BYTE:
2744 			if (capa_len != 4) {
2745 				log_peer_warnx(&peer->conf,
2746 				    "Bad AS4BYTE capability length: "
2747 				    "%u", capa_len);
2748 				peer->capa.peer.as4byte = 0;
2749 				break;
2750 			}
2751 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2752 			*as = ntohl(remote_as);
2753 			if (*as == 0) {
2754 				log_peer_warnx(&peer->conf,
2755 				    "peer requests unacceptable AS %u", *as);
2756 				session_notification(peer, ERR_OPEN,
2757 				    ERR_OPEN_AS, NULL, 0);
2758 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2759 				return (-1);
2760 			}
2761 			peer->capa.peer.as4byte = 1;
2762 			break;
2763 		case CAPA_ADD_PATH:
2764 			if (capa_len % 4 != 0) {
2765 				log_peer_warnx(&peer->conf,
2766 				    "Bad ADD-PATH capability length: "
2767 				    "%u", capa_len);
2768 				memset(peer->capa.peer.add_path, 0,
2769 				    sizeof(peer->capa.peer.add_path));
2770 				break;
2771 			}
2772 			for (i = 0; i <= capa_len - 4; i += 4) {
2773 				memcpy(&afi, capa_val + i, sizeof(afi));
2774 				afi = ntohs(afi);
2775 				safi = capa_val[i + 2];
2776 				flags = capa_val[i + 3];
2777 				if (afi2aid(afi, safi, &aid) == -1) {
2778 					log_peer_warnx(&peer->conf,
2779 					    "Received ADD-PATH capa: "
2780 					    " unknown AFI %u, safi %u pair",
2781 					    afi, safi);
2782 					memset(peer->capa.peer.add_path, 0,
2783 					    sizeof(peer->capa.peer.add_path));
2784 					break;
2785 				}
2786 				if (flags & ~CAPA_AP_BIDIR) {
2787 					log_peer_warnx(&peer->conf,
2788 					    "Received ADD-PATH capa: "
2789 					    " bad flags %x", flags);
2790 					memset(peer->capa.peer.add_path, 0,
2791 					    sizeof(peer->capa.peer.add_path));
2792 					break;
2793 				}
2794 				peer->capa.peer.add_path[aid] = flags;
2795 			}
2796 			break;
2797 		case CAPA_ENHANCED_RR:
2798 			peer->capa.peer.enhanced_rr = 1;
2799 			break;
2800 		default:
2801 			break;
2802 		}
2803 	}
2804 
2805 	return (0);
2806 }
2807 
2808 int
2809 capa_neg_calc(struct peer *p, uint8_t *suberr)
2810 {
2811 	uint8_t	i, hasmp = 0;
2812 
2813 	/* a capability is accepted only if both sides announced it */
2814 
2815 	p->capa.neg.refresh =
2816 	    (p->capa.ann.refresh && p->capa.peer.refresh) != 0;
2817 	p->capa.neg.enhanced_rr =
2818 	    (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0;
2819 
2820 	p->capa.neg.as4byte =
2821 	    (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0;
2822 
2823 	/* MP: both side must agree on the AFI,SAFI pair */
2824 	for (i = 0; i < AID_MAX; i++) {
2825 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i])
2826 			p->capa.neg.mp[i] = 1;
2827 		else
2828 			p->capa.neg.mp[i] = 0;
2829 		if (p->capa.ann.mp[i])
2830 			hasmp = 1;
2831 	}
2832 	/* if no MP capability present default to IPv4 unicast mode */
2833 	if (!hasmp)
2834 		p->capa.neg.mp[AID_INET] = 1;
2835 
2836 	/*
2837 	 * graceful restart: the peer capabilities are of interest here.
2838 	 * It is necessary to compare the new values with the previous ones
2839 	 * and act accordingly. AFI/SAFI that are not part in the MP capability
2840 	 * are treated as not being present.
2841 	 * Also make sure that a flush happens if the session stopped
2842 	 * supporting graceful restart.
2843 	 */
2844 
2845 	for (i = 0; i < AID_MAX; i++) {
2846 		int8_t	negflags;
2847 
2848 		/* disable GR if the AFI/SAFI is not present */
2849 		if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2850 		    p->capa.neg.mp[i] == 0))
2851 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2852 		/* look at current GR state and decide what to do */
2853 		negflags = p->capa.neg.grestart.flags[i];
2854 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2855 		if (negflags & CAPA_GR_RESTARTING) {
2856 			if (p->capa.ann.grestart.restart != 0 &&
2857 			    p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) {
2858 				p->capa.neg.grestart.flags[i] |=
2859 				    CAPA_GR_RESTARTING;
2860 			} else {
2861 				if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2862 				    &i, sizeof(i)) == -1) {
2863 					log_peer_warnx(&p->conf,
2864 					    "imsg send failed");
2865 					return (-1);
2866 				}
2867 				log_peer_warnx(&p->conf, "graceful restart of "
2868 				    "%s, not restarted, flushing", aid2str(i));
2869 			}
2870 		}
2871 	}
2872 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2873 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2874 	if (p->capa.ann.grestart.restart == 0)
2875 		p->capa.neg.grestart.restart = 0;
2876 
2877 
2878 	/*
2879 	 * ADD-PATH: set only those bits where both sides agree.
2880 	 * For this compare our send bit with the recv bit from the peer
2881 	 * and vice versa.
2882 	 * The flags are stored from this systems view point.
2883 	 */
2884 	memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
2885 	if (p->capa.ann.add_path[0]) {
2886 		for (i = AID_MIN; i < AID_MAX; i++) {
2887 			if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) &&
2888 			    (p->capa.peer.add_path[i] & CAPA_AP_SEND)) {
2889 				p->capa.neg.add_path[i] |= CAPA_AP_RECV;
2890 				p->capa.neg.add_path[0] |= CAPA_AP_RECV;
2891 			}
2892 			if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) &&
2893 			    (p->capa.peer.add_path[i] & CAPA_AP_RECV)) {
2894 				p->capa.neg.add_path[i] |= CAPA_AP_SEND;
2895 				p->capa.neg.add_path[0] |= CAPA_AP_SEND;
2896 			}
2897 		}
2898 	}
2899 
2900 	/*
2901 	 * Open policy: check that the policy is sensible.
2902 	 *
2903 	 * Make sure that the roles match and set the negotiated capability
2904 	 * to the role of the peer. So the RDE can inject the OTC attribute.
2905 	 * See RFC 9234, section 4.2.
2906 	 * These checks should only happen on ebgp sessions.
2907 	 */
2908 	if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 &&
2909 	    p->conf.ebgp) {
2910 		switch (p->conf.role) {
2911 		case ROLE_PROVIDER:
2912 			if (p->remote_role != ROLE_CUSTOMER)
2913 				goto fail;
2914 			break;
2915 		case ROLE_RS:
2916 			if (p->remote_role != ROLE_RS_CLIENT)
2917 				goto fail;
2918 			break;
2919 		case ROLE_RS_CLIENT:
2920 			if (p->remote_role != ROLE_RS)
2921 				goto fail;
2922 			break;
2923 		case ROLE_CUSTOMER:
2924 			if (p->remote_role != ROLE_PROVIDER)
2925 				goto fail;
2926 			break;
2927 		case ROLE_PEER:
2928 			if (p->remote_role != ROLE_PEER)
2929 				goto fail;
2930 			break;
2931 		default:
2932  fail:
2933 			log_peer_warnx(&p->conf, "open policy role mismatch: "
2934 			    "our role %s, their role %s",
2935 			    log_policy(p->conf.role),
2936 			    log_policy(p->remote_role));
2937 			*suberr = ERR_OPEN_ROLE;
2938 			return (-1);
2939 		}
2940 		p->capa.neg.policy = 1;
2941 	} else if (p->capa.ann.policy == 2 && p->conf.ebgp) {
2942 		/* enforce presence of open policy role capability */
2943 		log_peer_warnx(&p->conf, "open policy role enforced but "
2944 		    "not present");
2945 		*suberr = ERR_OPEN_ROLE;
2946 		return (-1);
2947 	}
2948 
2949 	return (0);
2950 }
2951 
2952 void
2953 session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt)
2954 {
2955 	struct imsg		 imsg;
2956 	struct mrt		 xmrt;
2957 	struct route_refresh	 rr;
2958 	struct mrt		*mrt;
2959 	struct imsgbuf		*i;
2960 	struct peer		*p;
2961 	struct listen_addr	*la, *nla;
2962 	struct session_dependon	*sdon;
2963 	u_char			*data;
2964 	int			 n, fd, depend_ok, restricted;
2965 	uint16_t		 t;
2966 	uint8_t			 aid, errcode, subcode;
2967 
2968 	while (imsgbuf) {
2969 		if ((n = imsg_get(imsgbuf, &imsg)) == -1)
2970 			fatal("session_dispatch_imsg: imsg_get error");
2971 
2972 		if (n == 0)
2973 			break;
2974 
2975 		switch (imsg.hdr.type) {
2976 		case IMSG_SOCKET_CONN:
2977 		case IMSG_SOCKET_CONN_CTL:
2978 			if (idx != PFD_PIPE_MAIN)
2979 				fatalx("reconf request not from parent");
2980 			if ((fd = imsg.fd) == -1) {
2981 				log_warnx("expected to receive imsg fd to "
2982 				    "RDE but didn't receive any");
2983 				break;
2984 			}
2985 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2986 				fatal(NULL);
2987 			imsg_init(i, fd);
2988 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
2989 				if (ibuf_rde) {
2990 					log_warnx("Unexpected imsg connection "
2991 					    "to RDE received");
2992 					msgbuf_clear(&ibuf_rde->w);
2993 					free(ibuf_rde);
2994 				}
2995 				ibuf_rde = i;
2996 			} else {
2997 				if (ibuf_rde_ctl) {
2998 					log_warnx("Unexpected imsg ctl "
2999 					    "connection to RDE received");
3000 					msgbuf_clear(&ibuf_rde_ctl->w);
3001 					free(ibuf_rde_ctl);
3002 				}
3003 				ibuf_rde_ctl = i;
3004 			}
3005 			break;
3006 		case IMSG_RECONF_CONF:
3007 			if (idx != PFD_PIPE_MAIN)
3008 				fatalx("reconf request not from parent");
3009 			nconf = new_config();
3010 
3011 			copy_config(nconf, imsg.data);
3012 			pending_reconf = 1;
3013 			break;
3014 		case IMSG_RECONF_PEER:
3015 			if (idx != PFD_PIPE_MAIN)
3016 				fatalx("reconf request not from parent");
3017 			if ((p = calloc(1, sizeof(struct peer))) == NULL)
3018 				fatal("new_peer");
3019 			memcpy(&p->conf, imsg.data, sizeof(struct peer_config));
3020 			p->state = p->prev_state = STATE_NONE;
3021 			p->reconf_action = RECONF_REINIT;
3022 			if (RB_INSERT(peer_head, &nconf->peers, p) != NULL)
3023 				fatalx("%s: peer tree is corrupt", __func__);
3024 			break;
3025 		case IMSG_RECONF_LISTENER:
3026 			if (idx != PFD_PIPE_MAIN)
3027 				fatalx("reconf request not from parent");
3028 			if (nconf == NULL)
3029 				fatalx("IMSG_RECONF_LISTENER but no config");
3030 			nla = imsg.data;
3031 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
3032 				if (!la_cmp(la, nla))
3033 					break;
3034 
3035 			if (la == NULL) {
3036 				if (nla->reconf != RECONF_REINIT)
3037 					fatalx("king bula sez: "
3038 					    "expected REINIT");
3039 
3040 				if ((nla->fd = imsg.fd) == -1)
3041 					log_warnx("expected to receive fd for "
3042 					    "%s but didn't receive any",
3043 					    log_sockaddr((struct sockaddr *)
3044 					    &nla->sa, nla->sa_len));
3045 
3046 				la = calloc(1, sizeof(struct listen_addr));
3047 				if (la == NULL)
3048 					fatal(NULL);
3049 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
3050 				la->flags = nla->flags;
3051 				la->fd = nla->fd;
3052 				la->reconf = RECONF_REINIT;
3053 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
3054 				    entry);
3055 			} else {
3056 				if (nla->reconf != RECONF_KEEP)
3057 					fatalx("king bula sez: expected KEEP");
3058 				la->reconf = RECONF_KEEP;
3059 			}
3060 
3061 			break;
3062 		case IMSG_RECONF_CTRL:
3063 			if (idx != PFD_PIPE_MAIN)
3064 				fatalx("reconf request not from parent");
3065 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
3066 			    sizeof(restricted))
3067 				fatalx("RECONF_CTRL imsg with wrong len");
3068 			memcpy(&restricted, imsg.data, sizeof(restricted));
3069 			if (imsg.fd == -1) {
3070 				log_warnx("expected to receive fd for control "
3071 				    "socket but didn't receive any");
3072 				break;
3073 			}
3074 			if (restricted) {
3075 				control_shutdown(rcsock);
3076 				rcsock = imsg.fd;
3077 			} else {
3078 				control_shutdown(csock);
3079 				csock = imsg.fd;
3080 			}
3081 			break;
3082 		case IMSG_RECONF_DRAIN:
3083 			switch (idx) {
3084 			case PFD_PIPE_ROUTE:
3085 				if (nconf != NULL)
3086 					fatalx("got unexpected %s from RDE",
3087 					    "IMSG_RECONF_DONE");
3088 				imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
3089 				    -1, NULL, 0);
3090 				break;
3091 			case PFD_PIPE_MAIN:
3092 				if (nconf == NULL)
3093 					fatalx("got unexpected %s from parent",
3094 					    "IMSG_RECONF_DONE");
3095 				imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
3096 				    -1, NULL, 0);
3097 				break;
3098 			default:
3099 				fatalx("reconf request not from parent or RDE");
3100 			}
3101 			break;
3102 		case IMSG_RECONF_DONE:
3103 			if (idx != PFD_PIPE_MAIN)
3104 				fatalx("reconf request not from parent");
3105 			if (nconf == NULL)
3106 				fatalx("got IMSG_RECONF_DONE but no config");
3107 			copy_config(conf, nconf);
3108 			merge_peers(conf, nconf);
3109 
3110 			/* delete old listeners */
3111 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
3112 			    la = nla) {
3113 				nla = TAILQ_NEXT(la, entry);
3114 				if (la->reconf == RECONF_NONE) {
3115 					log_info("not listening on %s any more",
3116 					    log_sockaddr((struct sockaddr *)
3117 					    &la->sa, la->sa_len));
3118 					TAILQ_REMOVE(conf->listen_addrs, la,
3119 					    entry);
3120 					close(la->fd);
3121 					free(la);
3122 				}
3123 			}
3124 
3125 			/* add new listeners */
3126 			TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs,
3127 			    entry);
3128 
3129 			setup_listeners(listener_cnt);
3130 			free_config(nconf);
3131 			nconf = NULL;
3132 			pending_reconf = 0;
3133 			log_info("SE reconfigured");
3134 			/*
3135 			 * IMSG_RECONF_DONE is sent when the RDE drained
3136 			 * the peer config sent in merge_peers().
3137 			 */
3138 			break;
3139 		case IMSG_SESSION_DEPENDON:
3140 			if (idx != PFD_PIPE_MAIN)
3141 				fatalx("IFINFO message not from parent");
3142 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
3143 			    sizeof(struct session_dependon))
3144 				fatalx("DEPENDON imsg with wrong len");
3145 			sdon = imsg.data;
3146 			depend_ok = sdon->depend_state;
3147 
3148 			RB_FOREACH(p, peer_head, &conf->peers)
3149 				if (!strcmp(p->conf.if_depend, sdon->ifname)) {
3150 					if (depend_ok && !p->depend_ok) {
3151 						p->depend_ok = depend_ok;
3152 						bgp_fsm(p, EVNT_START);
3153 					} else if (!depend_ok && p->depend_ok) {
3154 						p->depend_ok = depend_ok;
3155 						session_stop(p,
3156 						    ERR_CEASE_OTHER_CHANGE);
3157 					}
3158 				}
3159 			break;
3160 		case IMSG_MRT_OPEN:
3161 		case IMSG_MRT_REOPEN:
3162 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
3163 			    sizeof(struct mrt)) {
3164 				log_warnx("wrong imsg len");
3165 				break;
3166 			}
3167 
3168 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
3169 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
3170 				log_warnx("expected to receive fd for mrt dump "
3171 				    "but didn't receive any");
3172 
3173 			mrt = mrt_get(&mrthead, &xmrt);
3174 			if (mrt == NULL) {
3175 				/* new dump */
3176 				mrt = calloc(1, sizeof(struct mrt));
3177 				if (mrt == NULL)
3178 					fatal("session_dispatch_imsg");
3179 				memcpy(mrt, &xmrt, sizeof(struct mrt));
3180 				TAILQ_INIT(&mrt->wbuf.bufs);
3181 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
3182 			} else {
3183 				/* old dump reopened */
3184 				close(mrt->wbuf.fd);
3185 				mrt->wbuf.fd = xmrt.wbuf.fd;
3186 			}
3187 			break;
3188 		case IMSG_MRT_CLOSE:
3189 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
3190 			    sizeof(struct mrt)) {
3191 				log_warnx("wrong imsg len");
3192 				break;
3193 			}
3194 
3195 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
3196 			mrt = mrt_get(&mrthead, &xmrt);
3197 			if (mrt != NULL)
3198 				mrt_done(mrt);
3199 			break;
3200 		case IMSG_CTL_KROUTE:
3201 		case IMSG_CTL_KROUTE_ADDR:
3202 		case IMSG_CTL_SHOW_NEXTHOP:
3203 		case IMSG_CTL_SHOW_INTERFACE:
3204 		case IMSG_CTL_SHOW_FIB_TABLES:
3205 		case IMSG_CTL_SHOW_RTR:
3206 		case IMSG_CTL_SHOW_TIMER:
3207 			if (idx != PFD_PIPE_MAIN)
3208 				fatalx("ctl kroute request not from parent");
3209 			control_imsg_relay(&imsg, NULL);
3210 			break;
3211 		case IMSG_CTL_SHOW_NEIGHBOR:
3212 			if (idx != PFD_PIPE_ROUTE_CTL)
3213 				fatalx("ctl rib request not from RDE");
3214 			p = getpeerbyid(conf, imsg.hdr.peerid);
3215 			control_imsg_relay(&imsg, p);
3216 			break;
3217 		case IMSG_CTL_SHOW_RIB:
3218 		case IMSG_CTL_SHOW_RIB_PREFIX:
3219 		case IMSG_CTL_SHOW_RIB_COMMUNITIES:
3220 		case IMSG_CTL_SHOW_RIB_ATTR:
3221 		case IMSG_CTL_SHOW_RIB_MEM:
3222 		case IMSG_CTL_SHOW_NETWORK:
3223 		case IMSG_CTL_SHOW_FLOWSPEC:
3224 		case IMSG_CTL_SHOW_SET:
3225 			if (idx != PFD_PIPE_ROUTE_CTL)
3226 				fatalx("ctl rib request not from RDE");
3227 			control_imsg_relay(&imsg, NULL);
3228 			break;
3229 		case IMSG_CTL_END:
3230 		case IMSG_CTL_RESULT:
3231 			control_imsg_relay(&imsg, NULL);
3232 			break;
3233 		case IMSG_UPDATE:
3234 			if (idx != PFD_PIPE_ROUTE)
3235 				fatalx("update request not from RDE");
3236 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
3237 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
3238 			    imsg.hdr.len < IMSG_HEADER_SIZE +
3239 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
3240 				log_warnx("RDE sent invalid update");
3241 			else
3242 				session_update(imsg.hdr.peerid, imsg.data,
3243 				    imsg.hdr.len - IMSG_HEADER_SIZE);
3244 			break;
3245 		case IMSG_UPDATE_ERR:
3246 			if (idx != PFD_PIPE_ROUTE)
3247 				fatalx("update request not from RDE");
3248 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
3249 				log_warnx("RDE sent invalid notification");
3250 				break;
3251 			}
3252 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3253 				log_warnx("no such peer: id=%u",
3254 				    imsg.hdr.peerid);
3255 				break;
3256 			}
3257 			data = imsg.data;
3258 			errcode = *data++;
3259 			subcode = *data++;
3260 
3261 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
3262 				data = NULL;
3263 
3264 			session_notification(p, errcode, subcode,
3265 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
3266 			switch (errcode) {
3267 			case ERR_CEASE:
3268 				switch (subcode) {
3269 				case ERR_CEASE_MAX_PREFIX:
3270 				case ERR_CEASE_MAX_SENT_PREFIX:
3271 					t = p->conf.max_out_prefix_restart;
3272 					if (subcode == ERR_CEASE_MAX_PREFIX)
3273 						t = p->conf.max_prefix_restart;
3274 
3275 					bgp_fsm(p, EVNT_STOP);
3276 					if (t)
3277 						timer_set(&p->timers,
3278 						    Timer_IdleHold, 60 * t);
3279 					break;
3280 				default:
3281 					bgp_fsm(p, EVNT_CON_FATAL);
3282 					break;
3283 				}
3284 				break;
3285 			default:
3286 				bgp_fsm(p, EVNT_CON_FATAL);
3287 				break;
3288 			}
3289 			break;
3290 		case IMSG_REFRESH:
3291 			if (idx != PFD_PIPE_ROUTE)
3292 				fatalx("route refresh request not from RDE");
3293 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(rr)) {
3294 				log_warnx("RDE sent invalid refresh msg");
3295 				break;
3296 			}
3297 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3298 				log_warnx("no such peer: id=%u",
3299 				    imsg.hdr.peerid);
3300 				break;
3301 			}
3302 			memcpy(&rr, imsg.data, sizeof(rr));
3303 			if (rr.aid >= AID_MAX)
3304 				fatalx("IMSG_REFRESH: bad AID");
3305 			session_rrefresh(p, rr.aid, rr.subtype);
3306 			break;
3307 		case IMSG_SESSION_RESTARTED:
3308 			if (idx != PFD_PIPE_ROUTE)
3309 				fatalx("update request not from RDE");
3310 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
3311 				log_warnx("RDE sent invalid restart msg");
3312 				break;
3313 			}
3314 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3315 				log_warnx("no such peer: id=%u",
3316 				    imsg.hdr.peerid);
3317 				break;
3318 			}
3319 			memcpy(&aid, imsg.data, sizeof(aid));
3320 			if (aid >= AID_MAX)
3321 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
3322 			if (p->capa.neg.grestart.flags[aid] &
3323 			    CAPA_GR_RESTARTING) {
3324 				log_peer_warnx(&p->conf,
3325 				    "graceful restart of %s finished",
3326 				    aid2str(aid));
3327 				p->capa.neg.grestart.flags[aid] &=
3328 				    ~CAPA_GR_RESTARTING;
3329 				timer_stop(&p->timers, Timer_RestartTimeout);
3330 
3331 				/* signal back to RDE to cleanup stale routes */
3332 				if (imsg_rde(IMSG_SESSION_RESTARTED,
3333 				    imsg.hdr.peerid, &aid, sizeof(aid)) == -1)
3334 					fatal("imsg_compose: "
3335 					    "IMSG_SESSION_RESTARTED");
3336 			}
3337 			break;
3338 		case IMSG_SESSION_DOWN:
3339 			if (idx != PFD_PIPE_ROUTE)
3340 				fatalx("update request not from RDE");
3341 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3342 				log_warnx("no such peer: id=%u",
3343 				    imsg.hdr.peerid);
3344 				break;
3345 			}
3346 			session_stop(p, ERR_CEASE_ADMIN_DOWN);
3347 			break;
3348 		default:
3349 			break;
3350 		}
3351 		imsg_free(&imsg);
3352 	}
3353 }
3354 
3355 int
3356 la_cmp(struct listen_addr *a, struct listen_addr *b)
3357 {
3358 	struct sockaddr_in	*in_a, *in_b;
3359 	struct sockaddr_in6	*in6_a, *in6_b;
3360 
3361 	if (a->sa.ss_family != b->sa.ss_family)
3362 		return (1);
3363 
3364 	switch (a->sa.ss_family) {
3365 	case AF_INET:
3366 		in_a = (struct sockaddr_in *)&a->sa;
3367 		in_b = (struct sockaddr_in *)&b->sa;
3368 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
3369 			return (1);
3370 		if (in_a->sin_port != in_b->sin_port)
3371 			return (1);
3372 		break;
3373 	case AF_INET6:
3374 		in6_a = (struct sockaddr_in6 *)&a->sa;
3375 		in6_b = (struct sockaddr_in6 *)&b->sa;
3376 		if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
3377 		    sizeof(struct in6_addr)))
3378 			return (1);
3379 		if (in6_a->sin6_port != in6_b->sin6_port)
3380 			return (1);
3381 		break;
3382 	default:
3383 		fatal("king bula sez: unknown address family");
3384 		/* NOTREACHED */
3385 	}
3386 
3387 	return (0);
3388 }
3389 
3390 struct peer *
3391 getpeerbydesc(struct bgpd_config *c, const char *descr)
3392 {
3393 	struct peer	*p, *res = NULL;
3394 	int		 match = 0;
3395 
3396 	RB_FOREACH(p, peer_head, &c->peers)
3397 		if (!strcmp(p->conf.descr, descr)) {
3398 			res = p;
3399 			match++;
3400 		}
3401 
3402 	if (match > 1)
3403 		log_info("neighbor description \"%s\" not unique, request "
3404 		    "aborted", descr);
3405 
3406 	if (match == 1)
3407 		return (res);
3408 	else
3409 		return (NULL);
3410 }
3411 
3412 struct peer *
3413 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
3414 {
3415 	struct bgpd_addr addr;
3416 	struct peer	*p, *newpeer, *loose = NULL;
3417 	uint32_t	 id;
3418 
3419 	sa2addr(ip, &addr, NULL);
3420 
3421 	/* we might want a more effective way to find peers by IP */
3422 	RB_FOREACH(p, peer_head, &c->peers)
3423 		if (!p->conf.template &&
3424 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3425 			return (p);
3426 
3427 	/* try template matching */
3428 	RB_FOREACH(p, peer_head, &c->peers)
3429 		if (p->conf.template &&
3430 		    p->conf.remote_addr.aid == addr.aid &&
3431 		    session_match_mask(p, &addr))
3432 			if (loose == NULL || loose->conf.remote_masklen <
3433 			    p->conf.remote_masklen)
3434 				loose = p;
3435 
3436 	if (loose != NULL) {
3437 		/* clone */
3438 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3439 			fatal(NULL);
3440 		memcpy(newpeer, loose, sizeof(struct peer));
3441 		for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) {
3442 			if (getpeerbyid(c, id) == NULL)	/* we found a free id */
3443 				break;
3444 		}
3445 		newpeer->template = loose;
3446 		session_template_clone(newpeer, ip, id, 0);
3447 		newpeer->state = newpeer->prev_state = STATE_NONE;
3448 		newpeer->reconf_action = RECONF_KEEP;
3449 		newpeer->rbuf = NULL;
3450 		newpeer->rpending = 0;
3451 		init_peer(newpeer);
3452 		bgp_fsm(newpeer, EVNT_START);
3453 		if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL)
3454 			fatalx("%s: peer tree is corrupt", __func__);
3455 		return (newpeer);
3456 	}
3457 
3458 	return (NULL);
3459 }
3460 
3461 struct peer *
3462 getpeerbyid(struct bgpd_config *c, uint32_t peerid)
3463 {
3464 	static struct peer lookup;
3465 
3466 	lookup.conf.id = peerid;
3467 
3468 	return RB_FIND(peer_head, &c->peers, &lookup);
3469 }
3470 
3471 int
3472 peer_matched(struct peer *p, struct ctl_neighbor *n)
3473 {
3474 	char *s;
3475 
3476 	if (n && n->addr.aid) {
3477 		if (memcmp(&p->conf.remote_addr, &n->addr,
3478 		    sizeof(p->conf.remote_addr)))
3479 			return 0;
3480 	} else if (n && n->descr[0]) {
3481 		s = n->is_group ? p->conf.group : p->conf.descr;
3482 		/* cannot trust n->descr to be properly terminated */
3483 		if (strncmp(s, n->descr, sizeof(n->descr)))
3484 			return 0;
3485 	}
3486 	return 1;
3487 }
3488 
3489 void
3490 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id,
3491     uint32_t as)
3492 {
3493 	struct bgpd_addr	remote_addr;
3494 
3495 	if (ip)
3496 		sa2addr(ip, &remote_addr, NULL);
3497 	else
3498 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3499 
3500 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3501 
3502 	p->conf.id = id;
3503 
3504 	if (as) {
3505 		p->conf.remote_as = as;
3506 		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3507 		if (!p->conf.ebgp)
3508 			/* force enforce_as off for iBGP sessions */
3509 			p->conf.enforce_as = ENFORCE_AS_OFF;
3510 	}
3511 
3512 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3513 	switch (p->conf.remote_addr.aid) {
3514 	case AID_INET:
3515 		p->conf.remote_masklen = 32;
3516 		break;
3517 	case AID_INET6:
3518 		p->conf.remote_masklen = 128;
3519 		break;
3520 	}
3521 	p->conf.template = 0;
3522 }
3523 
3524 int
3525 session_match_mask(struct peer *p, struct bgpd_addr *a)
3526 {
3527 	struct bgpd_addr masked;
3528 
3529 	applymask(&masked, a, p->conf.remote_masklen);
3530 	if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0)
3531 		return (1);
3532 	return (0);
3533 }
3534 
3535 void
3536 session_down(struct peer *peer)
3537 {
3538 	memset(&peer->capa.neg, 0, sizeof(peer->capa.neg));
3539 	peer->stats.last_updown = getmonotime();
3540 	/*
3541 	 * session_down is called in the exit code path so check
3542 	 * if the RDE is still around, if not there is no need to
3543 	 * send the message.
3544 	 */
3545 	if (ibuf_rde == NULL)
3546 		return;
3547 	if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3548 		fatalx("imsg_compose error");
3549 }
3550 
3551 void
3552 session_up(struct peer *p)
3553 {
3554 	struct session_up	 sup;
3555 
3556 	if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3557 	    &p->conf, sizeof(p->conf)) == -1)
3558 		fatalx("imsg_compose error");
3559 
3560 	if (p->local.aid == AID_INET) {
3561 		sup.local_v4_addr = p->local;
3562 		sup.local_v6_addr = p->local_alt;
3563 	} else {
3564 		sup.local_v6_addr = p->local;
3565 		sup.local_v4_addr = p->local_alt;
3566 	}
3567 	sup.remote_addr = p->remote;
3568 	sup.if_scope = p->if_scope;
3569 
3570 	sup.remote_bgpid = p->remote_bgpid;
3571 	sup.short_as = p->short_as;
3572 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3573 	p->stats.last_updown = getmonotime();
3574 	if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3575 		fatalx("imsg_compose error");
3576 }
3577 
3578 int
3579 imsg_ctl_parent(int type, uint32_t peerid, pid_t pid, void *data,
3580     uint16_t datalen)
3581 {
3582 	return imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen);
3583 }
3584 
3585 int
3586 imsg_ctl_rde(int type, uint32_t peerid, pid_t pid, void *data, uint16_t datalen)
3587 {
3588 	if (ibuf_rde_ctl == NULL)
3589 		return (0);
3590 
3591 	/*
3592 	 * Use control socket to talk to RDE to bypass the queue of the
3593 	 * regular imsg socket.
3594 	 */
3595 	return imsg_compose(ibuf_rde_ctl, type, peerid, pid, -1, data, datalen);
3596 }
3597 
3598 int
3599 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen)
3600 {
3601 	if (ibuf_rde == NULL)
3602 		return (0);
3603 
3604 	return imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen);
3605 }
3606 
3607 void
3608 session_demote(struct peer *p, int level)
3609 {
3610 	struct demote_msg	msg;
3611 
3612 	strlcpy(msg.demote_group, p->conf.demote_group,
3613 	    sizeof(msg.demote_group));
3614 	msg.level = level;
3615 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3616 	    &msg, sizeof(msg)) == -1)
3617 		fatalx("imsg_compose error");
3618 
3619 	p->demoted += level;
3620 }
3621 
3622 void
3623 session_stop(struct peer *peer, uint8_t subcode)
3624 {
3625 	char data[REASON_LEN];
3626 	size_t datalen;
3627 	size_t reason_len;
3628 	char *communication;
3629 
3630 	datalen = 0;
3631 	communication = peer->conf.reason;
3632 
3633 	if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3634 	    subcode == ERR_CEASE_ADMIN_RESET)
3635 	    && communication && *communication) {
3636 		reason_len = strlen(communication);
3637 		if (reason_len > REASON_LEN - 1) {
3638 		    log_peer_warnx(&peer->conf,
3639 			"trying to send overly long shutdown reason");
3640 		} else {
3641 			data[0] = reason_len;
3642 			datalen = reason_len + sizeof(data[0]);
3643 			memcpy(data + 1, communication, reason_len);
3644 		}
3645 	}
3646 	switch (peer->state) {
3647 	case STATE_OPENSENT:
3648 	case STATE_OPENCONFIRM:
3649 	case STATE_ESTABLISHED:
3650 		session_notification(peer, ERR_CEASE, subcode, data, datalen);
3651 		break;
3652 	default:
3653 		/* session not open, no need to send notification */
3654 		break;
3655 	}
3656 	bgp_fsm(peer, EVNT_STOP);
3657 }
3658 
3659 void
3660 merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3661 {
3662 	struct peer *p, *np, *next;
3663 
3664 	RB_FOREACH(p, peer_head, &c->peers) {
3665 		/* templates are handled specially */
3666 		if (p->template != NULL)
3667 			continue;
3668 		np = getpeerbyid(nc, p->conf.id);
3669 		if (np == NULL) {
3670 			p->reconf_action = RECONF_DELETE;
3671 			continue;
3672 		}
3673 
3674 		/* peer no longer uses TCP MD5SIG so deconfigure */
3675 		if (p->conf.auth.method == AUTH_MD5SIG &&
3676 		    np->conf.auth.method != AUTH_MD5SIG)
3677 			tcp_md5_del_listener(c, p);
3678 		else if (np->conf.auth.method == AUTH_MD5SIG)
3679 			tcp_md5_add_listener(c, np);
3680 
3681 		memcpy(&p->conf, &np->conf, sizeof(p->conf));
3682 		RB_REMOVE(peer_head, &nc->peers, np);
3683 		free(np);
3684 
3685 		p->reconf_action = RECONF_KEEP;
3686 
3687 		/* had demotion, is demoted, demote removed? */
3688 		if (p->demoted && !p->conf.demote_group[0])
3689 			session_demote(p, -1);
3690 
3691 		/* if session is not open then refresh pfkey data */
3692 		if (p->state < STATE_OPENSENT && !p->template)
3693 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
3694 			    p->conf.id, 0, -1, NULL, 0);
3695 
3696 		/* sync the RDE in case we keep the peer */
3697 		if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3698 		    &p->conf, sizeof(struct peer_config)) == -1)
3699 			fatalx("imsg_compose error");
3700 
3701 		/* apply the config to all clones of a template */
3702 		if (p->conf.template) {
3703 			struct peer *xp;
3704 			RB_FOREACH(xp, peer_head, &c->peers) {
3705 				if (xp->template != p)
3706 					continue;
3707 				session_template_clone(xp, NULL, xp->conf.id,
3708 				    xp->conf.remote_as);
3709 				if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
3710 				    &xp->conf, sizeof(xp->conf)) == -1)
3711 					fatalx("imsg_compose error");
3712 			}
3713 		}
3714 	}
3715 
3716 	if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1)
3717 		fatalx("imsg_compose error");
3718 
3719 	/* pfkeys of new peers already loaded by the parent process */
3720 	RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
3721 		RB_REMOVE(peer_head, &nc->peers, np);
3722 		if (RB_INSERT(peer_head, &c->peers, np) != NULL)
3723 			fatalx("%s: peer tree is corrupt", __func__);
3724 		if (np->conf.auth.method == AUTH_MD5SIG)
3725 			tcp_md5_add_listener(c, np);
3726 	}
3727 }
3728