xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 3374c67d44f9b75b98444cbf63020f777792342e)
1 /*	$OpenBSD: session.c,v 1.439 2023/01/04 14:33:30 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <ifaddrs.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <syslog.h>
44 #include <unistd.h>
45 
46 #include "bgpd.h"
47 #include "session.h"
48 #include "log.h"
49 
50 #define PFD_PIPE_MAIN		0
51 #define PFD_PIPE_ROUTE		1
52 #define PFD_PIPE_ROUTE_CTL	2
53 #define PFD_SOCK_CTL		3
54 #define PFD_SOCK_RCTL		4
55 #define PFD_LISTENERS_START	5
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_peer(struct peer *);
60 void	start_timer_holdtime(struct peer *);
61 void	start_timer_keepalive(struct peer *);
62 void	session_close_connection(struct peer *);
63 void	change_state(struct peer *, enum session_state, enum session_events);
64 int	session_setup_socket(struct peer *);
65 void	session_accept(int);
66 int	session_connect(struct peer *);
67 void	session_tcp_established(struct peer *);
68 void	session_capa_ann_none(struct peer *);
69 int	session_capa_add(struct ibuf *, uint8_t, uint8_t);
70 int	session_capa_add_mp(struct ibuf *, uint8_t);
71 int	session_capa_add_afi(struct peer *, struct ibuf *, uint8_t, uint8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, uint16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(uint32_t, void *, size_t);
77 void	session_notification(struct peer *, uint8_t, uint8_t, void *,
78 	    ssize_t);
79 void	session_rrefresh(struct peer *, uint8_t, uint8_t);
80 int	session_graceful_restart(struct peer *);
81 int	session_graceful_stop(struct peer *);
82 int	session_dispatch_msg(struct pollfd *, struct peer *);
83 void	session_process_msg(struct peer *);
84 int	parse_header(struct peer *, u_char *, uint16_t *, uint8_t *);
85 int	parse_open(struct peer *);
86 int	parse_update(struct peer *);
87 int	parse_rrefresh(struct peer *);
88 int	parse_notification(struct peer *);
89 int	parse_capabilities(struct peer *, u_char *, uint16_t, uint32_t *);
90 int	capa_neg_calc(struct peer *, uint8_t *);
91 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
92 void	session_up(struct peer *);
93 void	session_down(struct peer *);
94 int	imsg_rde(int, uint32_t, void *, uint16_t);
95 void	session_demote(struct peer *, int);
96 void	merge_peers(struct bgpd_config *, struct bgpd_config *);
97 
98 int		 la_cmp(struct listen_addr *, struct listen_addr *);
99 void		 session_template_clone(struct peer *, struct sockaddr *,
100 		    uint32_t, uint32_t);
101 int		 session_match_mask(struct peer *, struct bgpd_addr *);
102 
103 static struct bgpd_config	*conf, *nconf;
104 static struct imsgbuf		*ibuf_rde;
105 static struct imsgbuf		*ibuf_rde_ctl;
106 static struct imsgbuf		*ibuf_main;
107 
108 struct bgpd_sysdep	 sysdep;
109 volatile sig_atomic_t	 session_quit;
110 int			 pending_reconf;
111 int			 csock = -1, rcsock = -1;
112 u_int			 peer_cnt;
113 
114 struct mrt_head		 mrthead;
115 time_t			 pauseaccept;
116 
117 static inline int
118 peer_compare(const struct peer *a, const struct peer *b)
119 {
120 	return a->conf.id - b->conf.id;
121 }
122 
123 RB_GENERATE(peer_head, peer, entry, peer_compare);
124 
125 void
126 session_sighdlr(int sig)
127 {
128 	switch (sig) {
129 	case SIGINT:
130 	case SIGTERM:
131 		session_quit = 1;
132 		break;
133 	}
134 }
135 
136 int
137 setup_listeners(u_int *la_cnt)
138 {
139 	int			 ttl = 255;
140 	struct listen_addr	*la;
141 	u_int			 cnt = 0;
142 
143 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
144 		la->reconf = RECONF_NONE;
145 		cnt++;
146 
147 		if (la->flags & LISTENER_LISTENING)
148 			continue;
149 
150 		if (la->fd == -1) {
151 			log_warn("cannot establish listener on %s: invalid fd",
152 			    log_sockaddr((struct sockaddr *)&la->sa,
153 			    la->sa_len));
154 			continue;
155 		}
156 
157 		if (tcp_md5_prep_listener(la, &conf->peers) == -1)
158 			fatal("tcp_md5_prep_listener");
159 
160 		/* set ttl to 255 so that ttl-security works */
161 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
162 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
163 			log_warn("setup_listeners setsockopt TTL");
164 			continue;
165 		}
166 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
167 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
168 			log_warn("setup_listeners setsockopt hoplimit");
169 			continue;
170 		}
171 
172 		if (listen(la->fd, MAX_BACKLOG)) {
173 			close(la->fd);
174 			fatal("listen");
175 		}
176 
177 		la->flags |= LISTENER_LISTENING;
178 
179 		log_info("listening on %s",
180 		    log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
181 	}
182 
183 	*la_cnt = cnt;
184 
185 	return (0);
186 }
187 
188 void
189 session_main(int debug, int verbose)
190 {
191 	int			 timeout;
192 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
193 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
194 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
195 	u_int			 new_cnt;
196 	struct passwd		*pw;
197 	struct peer		*p, **peer_l = NULL, *next;
198 	struct mrt		*m, *xm, **mrt_l = NULL;
199 	struct pollfd		*pfd = NULL;
200 	struct listen_addr	*la;
201 	void			*newp;
202 	time_t			 now;
203 	short			 events;
204 
205 	log_init(debug, LOG_DAEMON);
206 	log_setverbose(verbose);
207 
208 	log_procinit(log_procnames[PROC_SE]);
209 
210 	if ((pw = getpwnam(BGPD_USER)) == NULL)
211 		fatal(NULL);
212 
213 	if (chroot(pw->pw_dir) == -1)
214 		fatal("chroot");
215 	if (chdir("/") == -1)
216 		fatal("chdir(\"/\")");
217 
218 	setproctitle("session engine");
219 
220 	if (setgroups(1, &pw->pw_gid) ||
221 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
222 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
223 		fatal("can't drop privileges");
224 
225 	if (pledge("stdio inet recvfd", NULL) == -1)
226 		fatal("pledge");
227 
228 	signal(SIGTERM, session_sighdlr);
229 	signal(SIGINT, session_sighdlr);
230 	signal(SIGPIPE, SIG_IGN);
231 	signal(SIGHUP, SIG_IGN);
232 	signal(SIGALRM, SIG_IGN);
233 	signal(SIGUSR1, SIG_IGN);
234 
235 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
236 		fatal(NULL);
237 	imsg_init(ibuf_main, 3);
238 
239 	LIST_INIT(&mrthead);
240 	listener_cnt = 0;
241 	peer_cnt = 0;
242 	ctl_cnt = 0;
243 
244 	conf = new_config();
245 	log_info("session engine ready");
246 
247 	while (session_quit == 0) {
248 		/* check for peers to be initialized or deleted */
249 		if (!pending_reconf) {
250 			RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
251 				/* cloned peer that idled out? */
252 				if (p->template && (p->state == STATE_IDLE ||
253 				    p->state == STATE_ACTIVE) &&
254 				    getmonotime() - p->stats.last_updown >=
255 				    INTERVAL_HOLD_CLONED)
256 					p->reconf_action = RECONF_DELETE;
257 
258 				/* new peer that needs init? */
259 				if (p->state == STATE_NONE)
260 					init_peer(p);
261 
262 				/* reinit due? */
263 				if (p->reconf_action == RECONF_REINIT) {
264 					session_stop(p, ERR_CEASE_ADMIN_RESET);
265 					if (!p->conf.down)
266 						timer_set(&p->timers,
267 						    Timer_IdleHold, 0);
268 				}
269 
270 				/* deletion due? */
271 				if (p->reconf_action == RECONF_DELETE) {
272 					if (p->demoted)
273 						session_demote(p, -1);
274 					p->conf.demote_group[0] = 0;
275 					session_stop(p, ERR_CEASE_PEER_UNCONF);
276 					timer_remove_all(&p->timers);
277 					tcp_md5_del_listener(conf, p);
278 					log_peer_warnx(&p->conf, "removed");
279 					RB_REMOVE(peer_head, &conf->peers, p);
280 					free(p);
281 					peer_cnt--;
282 					continue;
283 				}
284 				p->reconf_action = RECONF_NONE;
285 			}
286 		}
287 
288 		if (peer_cnt > peer_l_elms) {
289 			if ((newp = reallocarray(peer_l, peer_cnt,
290 			    sizeof(struct peer *))) == NULL) {
291 				/* panic for now */
292 				log_warn("could not resize peer_l from %u -> %u"
293 				    " entries", peer_l_elms, peer_cnt);
294 				fatalx("exiting");
295 			}
296 			peer_l = newp;
297 			peer_l_elms = peer_cnt;
298 		}
299 
300 		mrt_cnt = 0;
301 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
302 			xm = LIST_NEXT(m, entry);
303 			if (m->state == MRT_STATE_REMOVE) {
304 				mrt_clean(m);
305 				LIST_REMOVE(m, entry);
306 				free(m);
307 				continue;
308 			}
309 			if (m->wbuf.queued)
310 				mrt_cnt++;
311 		}
312 
313 		if (mrt_cnt > mrt_l_elms) {
314 			if ((newp = reallocarray(mrt_l, mrt_cnt,
315 			    sizeof(struct mrt *))) == NULL) {
316 				/* panic for now */
317 				log_warn("could not resize mrt_l from %u -> %u"
318 				    " entries", mrt_l_elms, mrt_cnt);
319 				fatalx("exiting");
320 			}
321 			mrt_l = newp;
322 			mrt_l_elms = mrt_cnt;
323 		}
324 
325 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
326 		    ctl_cnt + mrt_cnt;
327 		if (new_cnt > pfd_elms) {
328 			if ((newp = reallocarray(pfd, new_cnt,
329 			    sizeof(struct pollfd))) == NULL) {
330 				/* panic for now */
331 				log_warn("could not resize pfd from %u -> %u"
332 				    " entries", pfd_elms, new_cnt);
333 				fatalx("exiting");
334 			}
335 			pfd = newp;
336 			pfd_elms = new_cnt;
337 		}
338 
339 		memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
340 
341 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
342 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
343 		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
344 
345 		if (pauseaccept == 0) {
346 			pfd[PFD_SOCK_CTL].fd = csock;
347 			pfd[PFD_SOCK_CTL].events = POLLIN;
348 			pfd[PFD_SOCK_RCTL].fd = rcsock;
349 			pfd[PFD_SOCK_RCTL].events = POLLIN;
350 		} else {
351 			pfd[PFD_SOCK_CTL].fd = -1;
352 			pfd[PFD_SOCK_RCTL].fd = -1;
353 		}
354 
355 		i = PFD_LISTENERS_START;
356 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
357 			if (pauseaccept == 0) {
358 				pfd[i].fd = la->fd;
359 				pfd[i].events = POLLIN;
360 			} else
361 				pfd[i].fd = -1;
362 			i++;
363 		}
364 		idx_listeners = i;
365 		timeout = 240;	/* loop every 240s at least */
366 
367 		now = getmonotime();
368 		RB_FOREACH(p, peer_head, &conf->peers) {
369 			time_t	nextaction;
370 			struct timer *pt;
371 
372 			/* check timers */
373 			if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
374 				switch (pt->type) {
375 				case Timer_Hold:
376 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
377 					break;
378 				case Timer_SendHold:
379 					bgp_fsm(p, EVNT_TIMER_SENDHOLD);
380 					break;
381 				case Timer_ConnectRetry:
382 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
383 					break;
384 				case Timer_Keepalive:
385 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
386 					break;
387 				case Timer_IdleHold:
388 					bgp_fsm(p, EVNT_START);
389 					break;
390 				case Timer_IdleHoldReset:
391 					p->IdleHoldTime =
392 					    INTERVAL_IDLE_HOLD_INITIAL;
393 					p->errcnt = 0;
394 					timer_stop(&p->timers,
395 					    Timer_IdleHoldReset);
396 					break;
397 				case Timer_CarpUndemote:
398 					timer_stop(&p->timers,
399 					    Timer_CarpUndemote);
400 					if (p->demoted &&
401 					    p->state == STATE_ESTABLISHED)
402 						session_demote(p, -1);
403 					break;
404 				case Timer_RestartTimeout:
405 					timer_stop(&p->timers,
406 					    Timer_RestartTimeout);
407 					session_graceful_stop(p);
408 					break;
409 				default:
410 					fatalx("King Bula lost in time");
411 				}
412 			}
413 			if ((nextaction = timer_nextduein(&p->timers,
414 			    now)) != -1 && nextaction < timeout)
415 				timeout = nextaction;
416 
417 			/* are we waiting for a write? */
418 			events = POLLIN;
419 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
420 				events |= POLLOUT;
421 			/* is there still work to do? */
422 			if (p->rpending && p->rbuf && p->rbuf->wpos)
423 				timeout = 0;
424 
425 			/* poll events */
426 			if (p->fd != -1 && events != 0) {
427 				pfd[i].fd = p->fd;
428 				pfd[i].events = events;
429 				peer_l[i - idx_listeners] = p;
430 				i++;
431 			}
432 		}
433 
434 		idx_peers = i;
435 
436 		LIST_FOREACH(m, &mrthead, entry)
437 			if (m->wbuf.queued) {
438 				pfd[i].fd = m->wbuf.fd;
439 				pfd[i].events = POLLOUT;
440 				mrt_l[i - idx_peers] = m;
441 				i++;
442 			}
443 
444 		idx_mrts = i;
445 
446 		i += control_fill_pfds(pfd + i, pfd_elms -i);
447 
448 		if (i > pfd_elms)
449 			fatalx("poll pfd overflow");
450 
451 		if (pauseaccept && timeout > 1)
452 			timeout = 1;
453 		if (timeout < 0)
454 			timeout = 0;
455 		if (poll(pfd, i, timeout * 1000) == -1) {
456 			if (errno == EINTR)
457 				continue;
458 			fatal("poll error");
459 		}
460 
461 		/*
462 		 * If we previously saw fd exhaustion, we stop accept()
463 		 * for 1 second to throttle the accept() loop.
464 		 */
465 		if (pauseaccept && getmonotime() > pauseaccept + 1)
466 			pauseaccept = 0;
467 
468 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
469 			log_warnx("SE: Lost connection to parent");
470 			session_quit = 1;
471 			continue;
472 		} else
473 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
474 			    &listener_cnt);
475 
476 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
477 			log_warnx("SE: Lost connection to RDE");
478 			msgbuf_clear(&ibuf_rde->w);
479 			free(ibuf_rde);
480 			ibuf_rde = NULL;
481 		} else
482 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
483 			    &listener_cnt);
484 
485 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
486 		    -1) {
487 			log_warnx("SE: Lost connection to RDE control");
488 			msgbuf_clear(&ibuf_rde_ctl->w);
489 			free(ibuf_rde_ctl);
490 			ibuf_rde_ctl = NULL;
491 		} else
492 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
493 			    &listener_cnt);
494 
495 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
496 			ctl_cnt += control_accept(csock, 0);
497 
498 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
499 			ctl_cnt += control_accept(rcsock, 1);
500 
501 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
502 			if (pfd[j].revents & POLLIN)
503 				session_accept(pfd[j].fd);
504 
505 		for (; j < idx_peers; j++)
506 			session_dispatch_msg(&pfd[j],
507 			    peer_l[j - idx_listeners]);
508 
509 		RB_FOREACH(p, peer_head, &conf->peers)
510 			if (p->rbuf && p->rbuf->wpos)
511 				session_process_msg(p);
512 
513 		for (; j < idx_mrts; j++)
514 			if (pfd[j].revents & POLLOUT)
515 				mrt_write(mrt_l[j - idx_peers]);
516 
517 		for (; j < i; j++)
518 			ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers);
519 	}
520 
521 	RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
522 		RB_REMOVE(peer_head, &conf->peers, p);
523 		strlcpy(p->conf.reason,
524 		    "bgpd shutting down",
525 		    sizeof(p->conf.reason));
526 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
527 		timer_remove_all(&p->timers);
528 		free(p);
529 	}
530 
531 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
532 		mrt_clean(m);
533 		LIST_REMOVE(m, entry);
534 		free(m);
535 	}
536 
537 	free_config(conf);
538 	free(peer_l);
539 	free(mrt_l);
540 	free(pfd);
541 
542 	/* close pipes */
543 	if (ibuf_rde) {
544 		msgbuf_write(&ibuf_rde->w);
545 		msgbuf_clear(&ibuf_rde->w);
546 		close(ibuf_rde->fd);
547 		free(ibuf_rde);
548 	}
549 	if (ibuf_rde_ctl) {
550 		msgbuf_clear(&ibuf_rde_ctl->w);
551 		close(ibuf_rde_ctl->fd);
552 		free(ibuf_rde_ctl);
553 	}
554 	msgbuf_write(&ibuf_main->w);
555 	msgbuf_clear(&ibuf_main->w);
556 	close(ibuf_main->fd);
557 	free(ibuf_main);
558 
559 	control_shutdown(csock);
560 	control_shutdown(rcsock);
561 	log_info("session engine exiting");
562 	exit(0);
563 }
564 
565 void
566 init_peer(struct peer *p)
567 {
568 	TAILQ_INIT(&p->timers);
569 	p->fd = p->wbuf.fd = -1;
570 
571 	if (p->conf.if_depend[0])
572 		imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1,
573 		    p->conf.if_depend, sizeof(p->conf.if_depend));
574 	else
575 		p->depend_ok = 1;
576 
577 	peer_cnt++;
578 
579 	change_state(p, STATE_IDLE, EVNT_NONE);
580 	if (p->conf.down)
581 		timer_stop(&p->timers, Timer_IdleHold); /* no autostart */
582 	else
583 		timer_set(&p->timers, Timer_IdleHold, 0); /* start ASAP */
584 
585 	p->stats.last_updown = getmonotime();
586 
587 	/*
588 	 * on startup, demote if requested.
589 	 * do not handle new peers. they must reach ESTABLISHED beforehand.
590 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
591 	 */
592 	if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
593 		session_demote(p, +1);
594 }
595 
596 void
597 bgp_fsm(struct peer *peer, enum session_events event)
598 {
599 	switch (peer->state) {
600 	case STATE_NONE:
601 		/* nothing */
602 		break;
603 	case STATE_IDLE:
604 		switch (event) {
605 		case EVNT_START:
606 			timer_stop(&peer->timers, Timer_Hold);
607 			timer_stop(&peer->timers, Timer_SendHold);
608 			timer_stop(&peer->timers, Timer_Keepalive);
609 			timer_stop(&peer->timers, Timer_IdleHold);
610 
611 			/* allocate read buffer */
612 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
613 			if (peer->rbuf == NULL)
614 				fatal(NULL);
615 
616 			/* init write buffer */
617 			msgbuf_init(&peer->wbuf);
618 
619 			peer->stats.last_sent_errcode = 0;
620 			peer->stats.last_sent_suberr = 0;
621 			peer->stats.last_rcvd_errcode = 0;
622 			peer->stats.last_rcvd_suberr = 0;
623 
624 			if (!peer->depend_ok)
625 				timer_stop(&peer->timers, Timer_ConnectRetry);
626 			else if (peer->passive || peer->conf.passive ||
627 			    peer->conf.template) {
628 				change_state(peer, STATE_ACTIVE, event);
629 				timer_stop(&peer->timers, Timer_ConnectRetry);
630 			} else {
631 				change_state(peer, STATE_CONNECT, event);
632 				timer_set(&peer->timers, Timer_ConnectRetry,
633 				    conf->connectretry);
634 				session_connect(peer);
635 			}
636 			peer->passive = 0;
637 			break;
638 		default:
639 			/* ignore */
640 			break;
641 		}
642 		break;
643 	case STATE_CONNECT:
644 		switch (event) {
645 		case EVNT_START:
646 			/* ignore */
647 			break;
648 		case EVNT_CON_OPEN:
649 			session_tcp_established(peer);
650 			session_open(peer);
651 			timer_stop(&peer->timers, Timer_ConnectRetry);
652 			peer->holdtime = INTERVAL_HOLD_INITIAL;
653 			start_timer_holdtime(peer);
654 			change_state(peer, STATE_OPENSENT, event);
655 			break;
656 		case EVNT_CON_OPENFAIL:
657 			timer_set(&peer->timers, Timer_ConnectRetry,
658 			    conf->connectretry);
659 			session_close_connection(peer);
660 			change_state(peer, STATE_ACTIVE, event);
661 			break;
662 		case EVNT_TIMER_CONNRETRY:
663 			timer_set(&peer->timers, Timer_ConnectRetry,
664 			    conf->connectretry);
665 			session_connect(peer);
666 			break;
667 		default:
668 			change_state(peer, STATE_IDLE, event);
669 			break;
670 		}
671 		break;
672 	case STATE_ACTIVE:
673 		switch (event) {
674 		case EVNT_START:
675 			/* ignore */
676 			break;
677 		case EVNT_CON_OPEN:
678 			session_tcp_established(peer);
679 			session_open(peer);
680 			timer_stop(&peer->timers, Timer_ConnectRetry);
681 			peer->holdtime = INTERVAL_HOLD_INITIAL;
682 			start_timer_holdtime(peer);
683 			change_state(peer, STATE_OPENSENT, event);
684 			break;
685 		case EVNT_CON_OPENFAIL:
686 			timer_set(&peer->timers, Timer_ConnectRetry,
687 			    conf->connectretry);
688 			session_close_connection(peer);
689 			change_state(peer, STATE_ACTIVE, event);
690 			break;
691 		case EVNT_TIMER_CONNRETRY:
692 			timer_set(&peer->timers, Timer_ConnectRetry,
693 			    peer->holdtime);
694 			change_state(peer, STATE_CONNECT, event);
695 			session_connect(peer);
696 			break;
697 		default:
698 			change_state(peer, STATE_IDLE, event);
699 			break;
700 		}
701 		break;
702 	case STATE_OPENSENT:
703 		switch (event) {
704 		case EVNT_START:
705 			/* ignore */
706 			break;
707 		case EVNT_STOP:
708 			change_state(peer, STATE_IDLE, event);
709 			break;
710 		case EVNT_CON_CLOSED:
711 			session_close_connection(peer);
712 			timer_set(&peer->timers, Timer_ConnectRetry,
713 			    conf->connectretry);
714 			change_state(peer, STATE_ACTIVE, event);
715 			break;
716 		case EVNT_CON_FATAL:
717 			change_state(peer, STATE_IDLE, event);
718 			break;
719 		case EVNT_TIMER_HOLDTIME:
720 		case EVNT_TIMER_SENDHOLD:
721 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
722 			    0, NULL, 0);
723 			change_state(peer, STATE_IDLE, event);
724 			break;
725 		case EVNT_RCVD_OPEN:
726 			/* parse_open calls change_state itself on failure */
727 			if (parse_open(peer))
728 				break;
729 			session_keepalive(peer);
730 			change_state(peer, STATE_OPENCONFIRM, event);
731 			break;
732 		case EVNT_RCVD_NOTIFICATION:
733 			if (parse_notification(peer)) {
734 				change_state(peer, STATE_IDLE, event);
735 				/* don't punish, capa negotiation */
736 				timer_set(&peer->timers, Timer_IdleHold, 0);
737 				peer->IdleHoldTime /= 2;
738 			} else
739 				change_state(peer, STATE_IDLE, event);
740 			break;
741 		default:
742 			session_notification(peer,
743 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
744 			change_state(peer, STATE_IDLE, event);
745 			break;
746 		}
747 		break;
748 	case STATE_OPENCONFIRM:
749 		switch (event) {
750 		case EVNT_START:
751 			/* ignore */
752 			break;
753 		case EVNT_STOP:
754 			change_state(peer, STATE_IDLE, event);
755 			break;
756 		case EVNT_CON_CLOSED:
757 		case EVNT_CON_FATAL:
758 			change_state(peer, STATE_IDLE, event);
759 			break;
760 		case EVNT_TIMER_HOLDTIME:
761 		case EVNT_TIMER_SENDHOLD:
762 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
763 			    0, NULL, 0);
764 			change_state(peer, STATE_IDLE, event);
765 			break;
766 		case EVNT_TIMER_KEEPALIVE:
767 			session_keepalive(peer);
768 			break;
769 		case EVNT_RCVD_KEEPALIVE:
770 			start_timer_holdtime(peer);
771 			change_state(peer, STATE_ESTABLISHED, event);
772 			break;
773 		case EVNT_RCVD_NOTIFICATION:
774 			parse_notification(peer);
775 			change_state(peer, STATE_IDLE, event);
776 			break;
777 		default:
778 			session_notification(peer,
779 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
780 			change_state(peer, STATE_IDLE, event);
781 			break;
782 		}
783 		break;
784 	case STATE_ESTABLISHED:
785 		switch (event) {
786 		case EVNT_START:
787 			/* ignore */
788 			break;
789 		case EVNT_STOP:
790 			change_state(peer, STATE_IDLE, event);
791 			break;
792 		case EVNT_CON_CLOSED:
793 		case EVNT_CON_FATAL:
794 			change_state(peer, STATE_IDLE, event);
795 			break;
796 		case EVNT_TIMER_HOLDTIME:
797 		case EVNT_TIMER_SENDHOLD:
798 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
799 			    0, NULL, 0);
800 			change_state(peer, STATE_IDLE, event);
801 			break;
802 		case EVNT_TIMER_KEEPALIVE:
803 			session_keepalive(peer);
804 			break;
805 		case EVNT_RCVD_KEEPALIVE:
806 			start_timer_holdtime(peer);
807 			break;
808 		case EVNT_RCVD_UPDATE:
809 			start_timer_holdtime(peer);
810 			if (parse_update(peer))
811 				change_state(peer, STATE_IDLE, event);
812 			else
813 				start_timer_holdtime(peer);
814 			break;
815 		case EVNT_RCVD_NOTIFICATION:
816 			parse_notification(peer);
817 			change_state(peer, STATE_IDLE, event);
818 			break;
819 		default:
820 			session_notification(peer,
821 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
822 			change_state(peer, STATE_IDLE, event);
823 			break;
824 		}
825 		break;
826 	}
827 }
828 
829 void
830 start_timer_holdtime(struct peer *peer)
831 {
832 	if (peer->holdtime > 0)
833 		timer_set(&peer->timers, Timer_Hold, peer->holdtime);
834 	else
835 		timer_stop(&peer->timers, Timer_Hold);
836 }
837 
838 void
839 start_timer_keepalive(struct peer *peer)
840 {
841 	if (peer->holdtime > 0)
842 		timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3);
843 	else
844 		timer_stop(&peer->timers, Timer_Keepalive);
845 }
846 
847 void
848 session_close_connection(struct peer *peer)
849 {
850 	if (peer->fd != -1) {
851 		close(peer->fd);
852 		pauseaccept = 0;
853 	}
854 	peer->fd = peer->wbuf.fd = -1;
855 }
856 
857 void
858 change_state(struct peer *peer, enum session_state state,
859     enum session_events event)
860 {
861 	struct mrt	*mrt;
862 
863 	switch (state) {
864 	case STATE_IDLE:
865 		/* carp demotion first. new peers handled in init_peer */
866 		if (peer->state == STATE_ESTABLISHED &&
867 		    peer->conf.demote_group[0] && !peer->demoted)
868 			session_demote(peer, +1);
869 
870 		/*
871 		 * try to write out what's buffered (maybe a notification),
872 		 * don't bother if it fails
873 		 */
874 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
875 			msgbuf_write(&peer->wbuf);
876 
877 		/*
878 		 * we must start the timer for the next EVNT_START
879 		 * if we are coming here due to an error and the
880 		 * session was not established successfully before, the
881 		 * starttimerinterval needs to be exponentially increased
882 		 */
883 		if (peer->IdleHoldTime == 0)
884 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
885 		peer->holdtime = INTERVAL_HOLD_INITIAL;
886 		timer_stop(&peer->timers, Timer_ConnectRetry);
887 		timer_stop(&peer->timers, Timer_Keepalive);
888 		timer_stop(&peer->timers, Timer_Hold);
889 		timer_stop(&peer->timers, Timer_SendHold);
890 		timer_stop(&peer->timers, Timer_IdleHold);
891 		timer_stop(&peer->timers, Timer_IdleHoldReset);
892 		session_close_connection(peer);
893 		msgbuf_clear(&peer->wbuf);
894 		free(peer->rbuf);
895 		peer->rbuf = NULL;
896 		peer->rpending = 0;
897 		memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
898 		if (!peer->template)
899 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
900 			    peer->conf.id, 0, -1, NULL, 0);
901 
902 		if (event != EVNT_STOP) {
903 			timer_set(&peer->timers, Timer_IdleHold,
904 			    peer->IdleHoldTime);
905 			if (event != EVNT_NONE &&
906 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
907 				peer->IdleHoldTime *= 2;
908 		}
909 		if (peer->state == STATE_ESTABLISHED) {
910 			if (peer->capa.neg.grestart.restart == 2 &&
911 			    (event == EVNT_CON_CLOSED ||
912 			    event == EVNT_CON_FATAL)) {
913 				/* don't punish graceful restart */
914 				timer_set(&peer->timers, Timer_IdleHold, 0);
915 				peer->IdleHoldTime /= 2;
916 				session_graceful_restart(peer);
917 			} else
918 				session_down(peer);
919 		}
920 		if (peer->state == STATE_NONE ||
921 		    peer->state == STATE_ESTABLISHED) {
922 			/* initialize capability negotiation structures */
923 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
924 			    sizeof(peer->capa.ann));
925 			if (!peer->conf.announce_capa)
926 				session_capa_ann_none(peer);
927 		}
928 		break;
929 	case STATE_CONNECT:
930 		if (peer->state == STATE_ESTABLISHED &&
931 		    peer->capa.neg.grestart.restart == 2) {
932 			/* do the graceful restart dance */
933 			session_graceful_restart(peer);
934 			peer->holdtime = INTERVAL_HOLD_INITIAL;
935 			timer_stop(&peer->timers, Timer_ConnectRetry);
936 			timer_stop(&peer->timers, Timer_Keepalive);
937 			timer_stop(&peer->timers, Timer_Hold);
938 			timer_stop(&peer->timers, Timer_SendHold);
939 			timer_stop(&peer->timers, Timer_IdleHold);
940 			timer_stop(&peer->timers, Timer_IdleHoldReset);
941 			session_close_connection(peer);
942 			msgbuf_clear(&peer->wbuf);
943 			memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
944 		}
945 		break;
946 	case STATE_ACTIVE:
947 		if (!peer->template)
948 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
949 			    peer->conf.id, 0, -1, NULL, 0);
950 		break;
951 	case STATE_OPENSENT:
952 		break;
953 	case STATE_OPENCONFIRM:
954 		break;
955 	case STATE_ESTABLISHED:
956 		timer_set(&peer->timers, Timer_IdleHoldReset,
957 		    peer->IdleHoldTime);
958 		if (peer->demoted)
959 			timer_set(&peer->timers, Timer_CarpUndemote,
960 			    INTERVAL_HOLD_DEMOTED);
961 		session_up(peer);
962 		break;
963 	default:		/* something seriously fucked */
964 		break;
965 	}
966 
967 	log_statechange(peer, state, event);
968 	LIST_FOREACH(mrt, &mrthead, entry) {
969 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
970 			continue;
971 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
972 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
973 		    mrt->group_id == peer->conf.groupid))
974 			mrt_dump_state(mrt, peer->state, state, peer);
975 	}
976 	peer->prev_state = peer->state;
977 	peer->state = state;
978 }
979 
980 void
981 session_accept(int listenfd)
982 {
983 	int			 connfd;
984 	socklen_t		 len;
985 	struct sockaddr_storage	 cliaddr;
986 	struct peer		*p = NULL;
987 
988 	len = sizeof(cliaddr);
989 	if ((connfd = accept4(listenfd,
990 	    (struct sockaddr *)&cliaddr, &len,
991 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
992 		if (errno == ENFILE || errno == EMFILE)
993 			pauseaccept = getmonotime();
994 		else if (errno != EWOULDBLOCK && errno != EINTR &&
995 		    errno != ECONNABORTED)
996 			log_warn("accept");
997 		return;
998 	}
999 
1000 	p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
1001 
1002 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1003 		if (timer_running(&p->timers, Timer_IdleHold, NULL)) {
1004 			/* fast reconnect after clear */
1005 			p->passive = 1;
1006 			bgp_fsm(p, EVNT_START);
1007 		}
1008 	}
1009 
1010 	if (p != NULL &&
1011 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1012 		if (p->fd != -1) {
1013 			if (p->state == STATE_CONNECT)
1014 				session_close_connection(p);
1015 			else {
1016 				close(connfd);
1017 				return;
1018 			}
1019 		}
1020 
1021 open:
1022 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1023 			log_peer_warnx(&p->conf,
1024 			    "ipsec or md5sig configured but not available");
1025 			close(connfd);
1026 			return;
1027 		}
1028 
1029 		if (tcp_md5_check(connfd, p) == -1) {
1030 			close(connfd);
1031 			return;
1032 		}
1033 		p->fd = p->wbuf.fd = connfd;
1034 		if (session_setup_socket(p)) {
1035 			close(connfd);
1036 			return;
1037 		}
1038 		bgp_fsm(p, EVNT_CON_OPEN);
1039 		return;
1040 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1041 	    p->capa.neg.grestart.restart == 2) {
1042 		/* first do the graceful restart dance */
1043 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1044 		/* then do part of the open dance */
1045 		goto open;
1046 	} else {
1047 		log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1048 		close(connfd);
1049 	}
1050 }
1051 
1052 int
1053 session_connect(struct peer *peer)
1054 {
1055 	struct sockaddr		*sa;
1056 	struct bgpd_addr	*bind_addr = NULL;
1057 	socklen_t		 sa_len;
1058 
1059 	/*
1060 	 * we do not need the overcomplicated collision detection RFC 1771
1061 	 * describes; we simply make sure there is only ever one concurrent
1062 	 * tcp connection per peer.
1063 	 */
1064 	if (peer->fd != -1)
1065 		return (-1);
1066 
1067 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1068 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1069 		log_peer_warn(&peer->conf, "session_connect socket");
1070 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1071 		return (-1);
1072 	}
1073 
1074 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1075 		log_peer_warnx(&peer->conf,
1076 		    "ipsec or md5sig configured but not available");
1077 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1078 		return (-1);
1079 	}
1080 
1081 	tcp_md5_set(peer->fd, peer);
1082 	peer->wbuf.fd = peer->fd;
1083 
1084 	/* if local-address is set we need to bind() */
1085 	switch (peer->conf.remote_addr.aid) {
1086 	case AID_INET:
1087 		bind_addr = &peer->conf.local_addr_v4;
1088 		break;
1089 	case AID_INET6:
1090 		bind_addr = &peer->conf.local_addr_v6;
1091 		break;
1092 	}
1093 	if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) {
1094 		if (bind(peer->fd, sa, sa_len) == -1) {
1095 			log_peer_warn(&peer->conf, "session_connect bind");
1096 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1097 			return (-1);
1098 		}
1099 	}
1100 
1101 	if (session_setup_socket(peer)) {
1102 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1103 		return (-1);
1104 	}
1105 
1106 	sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len);
1107 	if (connect(peer->fd, sa, sa_len) == -1) {
1108 		if (errno != EINPROGRESS) {
1109 			if (errno != peer->lasterr)
1110 				log_peer_warn(&peer->conf, "connect");
1111 			peer->lasterr = errno;
1112 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1113 			return (-1);
1114 		}
1115 	} else
1116 		bgp_fsm(peer, EVNT_CON_OPEN);
1117 
1118 	return (0);
1119 }
1120 
1121 int
1122 session_setup_socket(struct peer *p)
1123 {
1124 	int	ttl = p->conf.distance;
1125 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1126 	int	nodelay = 1;
1127 	int	bsize;
1128 
1129 	switch (p->conf.remote_addr.aid) {
1130 	case AID_INET:
1131 		/* set precedence, see RFC 1771 appendix 5 */
1132 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1133 		    -1) {
1134 			log_peer_warn(&p->conf,
1135 			    "session_setup_socket setsockopt TOS");
1136 			return (-1);
1137 		}
1138 
1139 		if (p->conf.ebgp) {
1140 			/*
1141 			 * set TTL to foreign router's distance
1142 			 * 1=direct n=multihop with ttlsec, we always use 255
1143 			 */
1144 			if (p->conf.ttlsec) {
1145 				ttl = 256 - p->conf.distance;
1146 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1147 				    &ttl, sizeof(ttl)) == -1) {
1148 					log_peer_warn(&p->conf,
1149 					    "session_setup_socket: "
1150 					    "setsockopt MINTTL");
1151 					return (-1);
1152 				}
1153 				ttl = 255;
1154 			}
1155 
1156 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1157 			    sizeof(ttl)) == -1) {
1158 				log_peer_warn(&p->conf,
1159 				    "session_setup_socket setsockopt TTL");
1160 				return (-1);
1161 			}
1162 		}
1163 		break;
1164 	case AID_INET6:
1165 		if (p->conf.ebgp) {
1166 			/*
1167 			 * set hoplimit to foreign router's distance
1168 			 * 1=direct n=multihop with ttlsec, we always use 255
1169 			 */
1170 			if (p->conf.ttlsec) {
1171 				ttl = 256 - p->conf.distance;
1172 				if (setsockopt(p->fd, IPPROTO_IPV6,
1173 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1174 				    == -1) {
1175 					log_peer_warn(&p->conf,
1176 					    "session_setup_socket: "
1177 					    "setsockopt MINHOPCOUNT");
1178 					return (-1);
1179 				}
1180 				ttl = 255;
1181 			}
1182 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1183 			    &ttl, sizeof(ttl)) == -1) {
1184 				log_peer_warn(&p->conf,
1185 				    "session_setup_socket setsockopt hoplimit");
1186 				return (-1);
1187 			}
1188 		}
1189 		break;
1190 	}
1191 
1192 	/* set TCP_NODELAY */
1193 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1194 	    sizeof(nodelay)) == -1) {
1195 		log_peer_warn(&p->conf,
1196 		    "session_setup_socket setsockopt TCP_NODELAY");
1197 		return (-1);
1198 	}
1199 
1200 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1201 	if (p->conf.auth.method != AUTH_NONE) {
1202 		/* try to increase bufsize. no biggie if it fails */
1203 		bsize = 65535;
1204 		while (bsize > 8192 &&
1205 		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1206 		    sizeof(bsize)) == -1 && errno != EINVAL)
1207 			bsize /= 2;
1208 		bsize = 65535;
1209 		while (bsize > 8192 &&
1210 		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1211 		    sizeof(bsize)) == -1 && errno != EINVAL)
1212 			bsize /= 2;
1213 	}
1214 
1215 	return (0);
1216 }
1217 
1218 /* compare two sockaddrs by converting them into bgpd_addr */
1219 static int
1220 sa_cmp(struct sockaddr *a, struct sockaddr *b)
1221 {
1222 	struct bgpd_addr ba, bb;
1223 
1224 	sa2addr(a, &ba, NULL);
1225 	sa2addr(b, &bb, NULL);
1226 
1227 	return (memcmp(&ba, &bb, sizeof(ba)) == 0);
1228 }
1229 
1230 static void
1231 get_alternate_addr(struct sockaddr *sa, struct bgpd_addr *alt)
1232 {
1233 	struct ifaddrs	*ifap, *ifa, *match;
1234 
1235 	if (getifaddrs(&ifap) == -1)
1236 		fatal("getifaddrs");
1237 
1238 	for (match = ifap; match != NULL; match = match->ifa_next)
1239 		if (match->ifa_addr != NULL &&
1240 		    sa_cmp(sa, match->ifa_addr) == 0)
1241 			break;
1242 
1243 	if (match == NULL) {
1244 		log_warnx("%s: local address not found", __func__);
1245 		return;
1246 	}
1247 
1248 	switch (sa->sa_family) {
1249 	case AF_INET6:
1250 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1251 			if (ifa->ifa_addr != NULL &&
1252 			    ifa->ifa_addr->sa_family == AF_INET &&
1253 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1254 				sa2addr(ifa->ifa_addr, alt, NULL);
1255 				break;
1256 			}
1257 		}
1258 		break;
1259 	case AF_INET:
1260 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1261 			if (ifa->ifa_addr != NULL &&
1262 			    ifa->ifa_addr->sa_family == AF_INET6 &&
1263 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1264 				struct sockaddr_in6 *s =
1265 				    (struct sockaddr_in6 *)ifa->ifa_addr;
1266 
1267 				/* only accept global scope addresses */
1268 				if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
1269 				    IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))
1270 					continue;
1271 				sa2addr(ifa->ifa_addr, alt, NULL);
1272 				break;
1273 			}
1274 		}
1275 		break;
1276 	default:
1277 		log_warnx("%s: unsupported address family %d", __func__,
1278 		    sa->sa_family);
1279 		break;
1280 	}
1281 
1282 	freeifaddrs(ifap);
1283 }
1284 
1285 void
1286 session_tcp_established(struct peer *peer)
1287 {
1288 	struct sockaddr_storage	ss;
1289 	socklen_t		len;
1290 
1291 	len = sizeof(ss);
1292 	if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1293 		log_warn("getsockname");
1294 	sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1295 	get_alternate_addr((struct sockaddr *)&ss, &peer->local_alt);
1296 	len = sizeof(ss);
1297 	if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1298 		log_warn("getpeername");
1299 	sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1300 }
1301 
1302 void
1303 session_capa_ann_none(struct peer *peer)
1304 {
1305 	memset(&peer->capa.ann, 0, sizeof(peer->capa.ann));
1306 }
1307 
1308 int
1309 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len)
1310 {
1311 	int errs = 0;
1312 
1313 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1314 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1315 	return (errs);
1316 }
1317 
1318 int
1319 session_capa_add_mp(struct ibuf *buf, uint8_t aid)
1320 {
1321 	uint8_t			 safi, pad = 0;
1322 	uint16_t		 afi;
1323 	int			 errs = 0;
1324 
1325 	if (aid2afi(aid, &afi, &safi) == -1)
1326 		fatalx("session_capa_add_mp: bad afi/safi pair");
1327 	afi = htons(afi);
1328 	errs += ibuf_add(buf, &afi, sizeof(afi));
1329 	errs += ibuf_add(buf, &pad, sizeof(pad));
1330 	errs += ibuf_add(buf, &safi, sizeof(safi));
1331 
1332 	return (errs);
1333 }
1334 
1335 int
1336 session_capa_add_afi(struct peer *p, struct ibuf *b, uint8_t aid,
1337     uint8_t flags)
1338 {
1339 	u_int		errs = 0;
1340 	uint16_t	afi;
1341 	uint8_t		safi;
1342 
1343 	if (aid2afi(aid, &afi, &safi)) {
1344 		log_warn("session_capa_add_afi: bad AID");
1345 		return (1);
1346 	}
1347 
1348 	afi = htons(afi);
1349 	errs += ibuf_add(b, &afi, sizeof(afi));
1350 	errs += ibuf_add(b, &safi, sizeof(safi));
1351 	errs += ibuf_add(b, &flags, sizeof(flags));
1352 
1353 	return (errs);
1354 }
1355 
1356 struct bgp_msg *
1357 session_newmsg(enum msg_type msgtype, uint16_t len)
1358 {
1359 	struct bgp_msg		*msg;
1360 	struct msg_header	 hdr;
1361 	struct ibuf		*buf;
1362 	int			 errs = 0;
1363 
1364 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1365 	hdr.len = htons(len);
1366 	hdr.type = msgtype;
1367 
1368 	if ((buf = ibuf_open(len)) == NULL)
1369 		return (NULL);
1370 
1371 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1372 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1373 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1374 
1375 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1376 		ibuf_free(buf);
1377 		return (NULL);
1378 	}
1379 
1380 	msg->buf = buf;
1381 	msg->type = msgtype;
1382 	msg->len = len;
1383 
1384 	return (msg);
1385 }
1386 
1387 int
1388 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1389 {
1390 	struct mrt		*mrt;
1391 
1392 	LIST_FOREACH(mrt, &mrthead, entry) {
1393 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1394 		    mrt->type == MRT_UPDATE_OUT)))
1395 			continue;
1396 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1397 		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1398 		    mrt->group_id == p->conf.groupid))
1399 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p,
1400 			    msg->type);
1401 	}
1402 
1403 	ibuf_close(&p->wbuf, msg->buf);
1404 	if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) {
1405 		if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1406 			log_peer_warn(&p->conf, "imsg_compose XOFF");
1407 		else
1408 			p->throttled = 1;
1409 	}
1410 
1411 	free(msg);
1412 	return (0);
1413 }
1414 
1415 /*
1416  * Translate between internal roles and the value expected by RFC 9234.
1417  */
1418 static uint8_t
1419 role2capa(enum role role)
1420 {
1421 	switch (role) {
1422 	case ROLE_CUSTOMER:
1423 		return CAPA_ROLE_CUSTOMER;
1424 	case ROLE_PROVIDER:
1425 		return CAPA_ROLE_PROVIDER;
1426 	case ROLE_RS:
1427 		return CAPA_ROLE_RS;
1428 	case ROLE_RS_CLIENT:
1429 		return CAPA_ROLE_RS_CLIENT;
1430 	case ROLE_PEER:
1431 		return CAPA_ROLE_PEER;
1432 	default:
1433 		fatalx("Unsupported role for role capability");
1434 	}
1435 }
1436 
1437 static enum role
1438 capa2role(uint8_t val)
1439 {
1440 	switch (val) {
1441 	case CAPA_ROLE_PROVIDER:
1442 		return ROLE_PROVIDER;
1443 	case CAPA_ROLE_RS:
1444 		return ROLE_RS;
1445 	case CAPA_ROLE_RS_CLIENT:
1446 		return ROLE_RS_CLIENT;
1447 	case CAPA_ROLE_CUSTOMER:
1448 		return ROLE_CUSTOMER;
1449 	case CAPA_ROLE_PEER:
1450 		return ROLE_PEER;
1451 	default:
1452 		return ROLE_NONE;
1453 	}
1454 }
1455 
1456 void
1457 session_open(struct peer *p)
1458 {
1459 	struct bgp_msg		*buf;
1460 	struct ibuf		*opb;
1461 	struct msg_open		 msg;
1462 	uint16_t		 len, optparamlen = 0;
1463 	uint8_t			 i, op_type;
1464 	int			 errs = 0, extlen = 0;
1465 	int			 mpcapa = 0;
1466 
1467 
1468 	if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) {
1469 		bgp_fsm(p, EVNT_CON_FATAL);
1470 		return;
1471 	}
1472 
1473 	/* multiprotocol extensions, RFC 4760 */
1474 	for (i = 0; i < AID_MAX; i++)
1475 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1476 			errs += session_capa_add(opb, CAPA_MP, 4);
1477 			errs += session_capa_add_mp(opb, i);
1478 			mpcapa++;
1479 		}
1480 
1481 	/* route refresh, RFC 2918 */
1482 	if (p->capa.ann.refresh)	/* no data */
1483 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1484 
1485 	/* BGP open policy, RFC 9234, only for ebgp sessions */
1486 	if (p->conf.ebgp && p->capa.ann.role_ena &&
1487 	    p->capa.ann.role != ROLE_NONE) {
1488 		uint8_t val;
1489 		val = role2capa(p->capa.ann.role);
1490 		errs += session_capa_add(opb, CAPA_ROLE, 1);
1491 		errs += ibuf_add(opb, &val, 1);
1492 	}
1493 
1494 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1495 	if (p->capa.ann.grestart.restart) {
1496 		int		rst = 0;
1497 		uint16_t	hdr = 0;
1498 
1499 		for (i = 0; i < AID_MAX; i++) {
1500 			if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING)
1501 				rst++;
1502 		}
1503 
1504 		/* Only set the R-flag if no graceful restart is ongoing */
1505 		if (!rst)
1506 			hdr |= CAPA_GR_R_FLAG;
1507 		hdr = htons(hdr);
1508 
1509 		errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
1510 		errs += ibuf_add(opb, &hdr, sizeof(hdr));
1511 	}
1512 
1513 	/* 4-bytes AS numbers, RFC6793 */
1514 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1515 		uint32_t	nas;
1516 
1517 		nas = htonl(p->conf.local_as);
1518 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1519 		errs += ibuf_add(opb, &nas, sizeof(nas));
1520 	}
1521 
1522 	/* advertisement of multiple paths, RFC7911 */
1523 	if (p->capa.ann.add_path[0]) {	/* variable */
1524 		uint8_t	aplen;
1525 
1526 		if (mpcapa)
1527 			aplen = 4 * mpcapa;
1528 		else	/* AID_INET */
1529 			aplen = 4;
1530 		errs += session_capa_add(opb, CAPA_ADD_PATH, aplen);
1531 		if (mpcapa) {
1532 			for (i = AID_MIN; i < AID_MAX; i++) {
1533 				if (p->capa.ann.mp[i]) {
1534 					errs += session_capa_add_afi(p, opb,
1535 					    i, p->capa.ann.add_path[i]);
1536 				}
1537 			}
1538 		} else {	/* AID_INET */
1539 			errs += session_capa_add_afi(p, opb, AID_INET,
1540 			    p->capa.ann.add_path[AID_INET]);
1541 		}
1542 	}
1543 
1544 	/* enhanced route-refresh, RFC7313 */
1545 	if (p->capa.ann.enhanced_rr)	/* no data */
1546 		errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0);
1547 
1548 	optparamlen = ibuf_size(opb);
1549 	if (optparamlen == 0) {
1550 		/* nothing */
1551 	} else if (optparamlen + 2 >= 255) {
1552 		/* RFC9072: 2 byte length instead of 1 + 3 byte extra header */
1553 		optparamlen += sizeof(op_type) + 2 + 3;
1554 		msg.optparamlen = 255;
1555 		extlen = 1;
1556 	} else {
1557 		optparamlen += sizeof(op_type) + 1;
1558 		msg.optparamlen = optparamlen;
1559 	}
1560 
1561 	len = MSGSIZE_OPEN_MIN + optparamlen;
1562 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1563 		ibuf_free(opb);
1564 		bgp_fsm(p, EVNT_CON_FATAL);
1565 		return;
1566 	}
1567 
1568 	msg.version = 4;
1569 	msg.myas = htons(p->conf.local_short_as);
1570 	if (p->conf.holdtime)
1571 		msg.holdtime = htons(p->conf.holdtime);
1572 	else
1573 		msg.holdtime = htons(conf->holdtime);
1574 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1575 
1576 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1577 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1578 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1579 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1580 	errs += ibuf_add(buf->buf, &msg.optparamlen, 1);
1581 
1582 	if (extlen) {
1583 		/* write RFC9072 extra header */
1584 		uint16_t op_extlen = htons(optparamlen - 3);
1585 		op_type = OPT_PARAM_EXT_LEN;
1586 		errs += ibuf_add(buf->buf, &op_type, 1);
1587 		errs += ibuf_add(buf->buf, &op_extlen, 2);
1588 	}
1589 
1590 	if (optparamlen) {
1591 		op_type = OPT_PARAM_CAPABILITIES;
1592 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1593 
1594 		optparamlen = ibuf_size(opb);
1595 		if (extlen) {
1596 			/* RFC9072: 2-byte extended length */
1597 			uint16_t op_extlen = htons(optparamlen);
1598 			errs += ibuf_add(buf->buf, &op_extlen, 2);
1599 		} else {
1600 			uint8_t op_len = optparamlen;
1601 			errs += ibuf_add(buf->buf, &op_len, 1);
1602 		}
1603 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1604 	}
1605 
1606 	ibuf_free(opb);
1607 
1608 	if (errs) {
1609 		ibuf_free(buf->buf);
1610 		free(buf);
1611 		bgp_fsm(p, EVNT_CON_FATAL);
1612 		return;
1613 	}
1614 
1615 	if (session_sendmsg(buf, p) == -1) {
1616 		bgp_fsm(p, EVNT_CON_FATAL);
1617 		return;
1618 	}
1619 
1620 	p->stats.msg_sent_open++;
1621 }
1622 
1623 void
1624 session_keepalive(struct peer *p)
1625 {
1626 	struct bgp_msg		*buf;
1627 
1628 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1629 	    session_sendmsg(buf, p) == -1) {
1630 		bgp_fsm(p, EVNT_CON_FATAL);
1631 		return;
1632 	}
1633 
1634 	start_timer_keepalive(p);
1635 	p->stats.msg_sent_keepalive++;
1636 }
1637 
1638 void
1639 session_update(uint32_t peerid, void *data, size_t datalen)
1640 {
1641 	struct peer		*p;
1642 	struct bgp_msg		*buf;
1643 
1644 	if ((p = getpeerbyid(conf, peerid)) == NULL) {
1645 		log_warnx("no such peer: id=%u", peerid);
1646 		return;
1647 	}
1648 
1649 	if (p->state != STATE_ESTABLISHED)
1650 		return;
1651 
1652 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1653 		bgp_fsm(p, EVNT_CON_FATAL);
1654 		return;
1655 	}
1656 
1657 	if (ibuf_add(buf->buf, data, datalen)) {
1658 		ibuf_free(buf->buf);
1659 		free(buf);
1660 		bgp_fsm(p, EVNT_CON_FATAL);
1661 		return;
1662 	}
1663 
1664 	if (session_sendmsg(buf, p) == -1) {
1665 		bgp_fsm(p, EVNT_CON_FATAL);
1666 		return;
1667 	}
1668 
1669 	start_timer_keepalive(p);
1670 	p->stats.msg_sent_update++;
1671 }
1672 
1673 void
1674 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode,
1675     void *data, ssize_t datalen)
1676 {
1677 	struct bgp_msg		*buf;
1678 	int			 errs = 0;
1679 
1680 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1681 		return;
1682 
1683 	log_notification(p, errcode, subcode, data, datalen, "sending");
1684 
1685 	/* cap to maximum size */
1686 	if (datalen > MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) {
1687 		log_peer_warnx(&p->conf,
1688 		    "oversized notification, data trunkated");
1689 		datalen = MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN;
1690 	}
1691 
1692 	if ((buf = session_newmsg(NOTIFICATION,
1693 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1694 		bgp_fsm(p, EVNT_CON_FATAL);
1695 		return;
1696 	}
1697 
1698 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1699 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1700 
1701 	if (datalen > 0)
1702 		errs += ibuf_add(buf->buf, data, datalen);
1703 
1704 	if (errs) {
1705 		ibuf_free(buf->buf);
1706 		free(buf);
1707 		bgp_fsm(p, EVNT_CON_FATAL);
1708 		return;
1709 	}
1710 
1711 	if (session_sendmsg(buf, p) == -1) {
1712 		bgp_fsm(p, EVNT_CON_FATAL);
1713 		return;
1714 	}
1715 
1716 	p->stats.msg_sent_notification++;
1717 	p->stats.last_sent_errcode = errcode;
1718 	p->stats.last_sent_suberr = subcode;
1719 }
1720 
1721 int
1722 session_neighbor_rrefresh(struct peer *p)
1723 {
1724 	uint8_t	i;
1725 
1726 	if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr))
1727 		return (-1);
1728 
1729 	for (i = 0; i < AID_MAX; i++) {
1730 		if (p->capa.neg.mp[i] != 0)
1731 			session_rrefresh(p, i, ROUTE_REFRESH_REQUEST);
1732 	}
1733 
1734 	return (0);
1735 }
1736 
1737 void
1738 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype)
1739 {
1740 	struct bgp_msg		*buf;
1741 	int			 errs = 0;
1742 	uint16_t		 afi;
1743 	uint8_t			 safi;
1744 
1745 	switch (subtype) {
1746 	case ROUTE_REFRESH_REQUEST:
1747 		p->stats.refresh_sent_req++;
1748 		break;
1749 	case ROUTE_REFRESH_BEGIN_RR:
1750 	case ROUTE_REFRESH_END_RR:
1751 		/* requires enhanced route refresh */
1752 		if (!p->capa.neg.enhanced_rr)
1753 			return;
1754 		if (subtype == ROUTE_REFRESH_BEGIN_RR)
1755 			p->stats.refresh_sent_borr++;
1756 		else
1757 			p->stats.refresh_sent_eorr++;
1758 		break;
1759 	default:
1760 		fatalx("session_rrefresh: bad subtype %d", subtype);
1761 	}
1762 
1763 	if (aid2afi(aid, &afi, &safi) == -1)
1764 		fatalx("session_rrefresh: bad afi/safi pair");
1765 
1766 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1767 		bgp_fsm(p, EVNT_CON_FATAL);
1768 		return;
1769 	}
1770 
1771 	afi = htons(afi);
1772 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1773 	errs += ibuf_add(buf->buf, &subtype, sizeof(subtype));
1774 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1775 
1776 	if (errs) {
1777 		ibuf_free(buf->buf);
1778 		free(buf);
1779 		bgp_fsm(p, EVNT_CON_FATAL);
1780 		return;
1781 	}
1782 
1783 	if (session_sendmsg(buf, p) == -1) {
1784 		bgp_fsm(p, EVNT_CON_FATAL);
1785 		return;
1786 	}
1787 
1788 	p->stats.msg_sent_rrefresh++;
1789 }
1790 
1791 int
1792 session_graceful_restart(struct peer *p)
1793 {
1794 	uint8_t	i;
1795 
1796 	timer_set(&p->timers, Timer_RestartTimeout,
1797 	    p->capa.neg.grestart.timeout);
1798 
1799 	for (i = 0; i < AID_MAX; i++) {
1800 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1801 			if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1802 			    &i, sizeof(i)) == -1)
1803 				return (-1);
1804 			log_peer_warnx(&p->conf,
1805 			    "graceful restart of %s, keeping routes",
1806 			    aid2str(i));
1807 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1808 		} else if (p->capa.neg.mp[i]) {
1809 			if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id,
1810 			    &i, sizeof(i)) == -1)
1811 				return (-1);
1812 			log_peer_warnx(&p->conf,
1813 			    "graceful restart of %s, flushing routes",
1814 			    aid2str(i));
1815 		}
1816 	}
1817 	return (0);
1818 }
1819 
1820 int
1821 session_graceful_stop(struct peer *p)
1822 {
1823 	uint8_t	i;
1824 
1825 	for (i = 0; i < AID_MAX; i++) {
1826 		/*
1827 		 * Only flush if the peer is restarting and the timeout fired.
1828 		 * In all other cases the session was already flushed when the
1829 		 * session went down or when the new open message was parsed.
1830 		 */
1831 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1832 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1833 			    "time-out, flushing", aid2str(i));
1834 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1835 			    &i, sizeof(i)) == -1)
1836 				return (-1);
1837 		}
1838 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1839 	}
1840 	return (0);
1841 }
1842 
1843 int
1844 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1845 {
1846 	ssize_t		n;
1847 	socklen_t	len;
1848 	int		error;
1849 
1850 	if (p->state == STATE_CONNECT) {
1851 		if (pfd->revents & POLLOUT) {
1852 			if (pfd->revents & POLLIN) {
1853 				/* error occurred */
1854 				len = sizeof(error);
1855 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1856 				    &error, &len) == -1 || error) {
1857 					if (error)
1858 						errno = error;
1859 					if (errno != p->lasterr) {
1860 						log_peer_warn(&p->conf,
1861 						    "socket error");
1862 						p->lasterr = errno;
1863 					}
1864 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1865 					return (1);
1866 				}
1867 			}
1868 			bgp_fsm(p, EVNT_CON_OPEN);
1869 			return (1);
1870 		}
1871 		if (pfd->revents & POLLHUP) {
1872 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1873 			return (1);
1874 		}
1875 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1876 			bgp_fsm(p, EVNT_CON_FATAL);
1877 			return (1);
1878 		}
1879 		return (0);
1880 	}
1881 
1882 	if (pfd->revents & POLLHUP) {
1883 		bgp_fsm(p, EVNT_CON_CLOSED);
1884 		return (1);
1885 	}
1886 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1887 		bgp_fsm(p, EVNT_CON_FATAL);
1888 		return (1);
1889 	}
1890 
1891 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1892 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1893 			if (error == 0)
1894 				log_peer_warnx(&p->conf, "Connection closed");
1895 			else if (error == -1)
1896 				log_peer_warn(&p->conf, "write error");
1897 			bgp_fsm(p, EVNT_CON_FATAL);
1898 			return (1);
1899 		}
1900 		p->stats.last_write = getmonotime();
1901 		if (p->holdtime > 0)
1902 			timer_set(&p->timers, Timer_SendHold,
1903 			    p->holdtime < INTERVAL_HOLD ? INTERVAL_HOLD :
1904 			    p->holdtime);
1905 		if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
1906 			if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
1907 				log_peer_warn(&p->conf, "imsg_compose XON");
1908 			else
1909 				p->throttled = 0;
1910 		}
1911 		if (!(pfd->revents & POLLIN))
1912 			return (1);
1913 	}
1914 
1915 	if (p->rbuf && pfd->revents & POLLIN) {
1916 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1917 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1918 			if (errno != EINTR && errno != EAGAIN) {
1919 				log_peer_warn(&p->conf, "read error");
1920 				bgp_fsm(p, EVNT_CON_FATAL);
1921 			}
1922 			return (1);
1923 		}
1924 		if (n == 0) {	/* connection closed */
1925 			bgp_fsm(p, EVNT_CON_CLOSED);
1926 			return (1);
1927 		}
1928 
1929 		p->rbuf->wpos += n;
1930 		p->stats.last_read = getmonotime();
1931 		return (1);
1932 	}
1933 	return (0);
1934 }
1935 
1936 void
1937 session_process_msg(struct peer *p)
1938 {
1939 	struct mrt	*mrt;
1940 	ssize_t		rpos, av, left;
1941 	int		processed = 0;
1942 	uint16_t	msglen;
1943 	uint8_t		msgtype;
1944 
1945 	rpos = 0;
1946 	av = p->rbuf->wpos;
1947 	p->rpending = 0;
1948 
1949 	/*
1950 	 * session might drop to IDLE -> buffers deallocated
1951 	 * we MUST check rbuf != NULL before use
1952 	 */
1953 	for (;;) {
1954 		if (p->rbuf == NULL)
1955 			return;
1956 		if (rpos + MSGSIZE_HEADER > av)
1957 			break;
1958 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1959 		    &msgtype) == -1)
1960 			return;
1961 		if (rpos + msglen > av)
1962 			break;
1963 		p->rbuf->rptr = p->rbuf->buf + rpos;
1964 
1965 		/* dump to MRT as soon as we have a full packet */
1966 		LIST_FOREACH(mrt, &mrthead, entry) {
1967 			if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE &&
1968 			    mrt->type == MRT_UPDATE_IN)))
1969 				continue;
1970 			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1971 			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1972 			    mrt->group_id == p->conf.groupid))
1973 				mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p,
1974 				    msgtype);
1975 		}
1976 
1977 		switch (msgtype) {
1978 		case OPEN:
1979 			bgp_fsm(p, EVNT_RCVD_OPEN);
1980 			p->stats.msg_rcvd_open++;
1981 			break;
1982 		case UPDATE:
1983 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1984 			p->stats.msg_rcvd_update++;
1985 			break;
1986 		case NOTIFICATION:
1987 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1988 			p->stats.msg_rcvd_notification++;
1989 			break;
1990 		case KEEPALIVE:
1991 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1992 			p->stats.msg_rcvd_keepalive++;
1993 			break;
1994 		case RREFRESH:
1995 			parse_rrefresh(p);
1996 			p->stats.msg_rcvd_rrefresh++;
1997 			break;
1998 		default:	/* cannot happen */
1999 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
2000 			    &msgtype, 1);
2001 			log_warnx("received message with unknown type %u",
2002 			    msgtype);
2003 			bgp_fsm(p, EVNT_CON_FATAL);
2004 		}
2005 		rpos += msglen;
2006 		if (++processed > MSG_PROCESS_LIMIT) {
2007 			p->rpending = 1;
2008 			break;
2009 		}
2010 	}
2011 
2012 	if (rpos < av) {
2013 		left = av - rpos;
2014 		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
2015 		p->rbuf->wpos = left;
2016 	} else
2017 		p->rbuf->wpos = 0;
2018 }
2019 
2020 int
2021 parse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type)
2022 {
2023 	u_char			*p;
2024 	uint16_t		 olen;
2025 	static const uint8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
2026 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
2027 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2028 
2029 	/* caller MUST make sure we are getting 19 bytes! */
2030 	p = data;
2031 	if (memcmp(p, marker, sizeof(marker))) {
2032 		log_peer_warnx(&peer->conf, "sync error");
2033 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
2034 		bgp_fsm(peer, EVNT_CON_FATAL);
2035 		return (-1);
2036 	}
2037 	p += MSGSIZE_HEADER_MARKER;
2038 
2039 	memcpy(&olen, p, 2);
2040 	*len = ntohs(olen);
2041 	p += 2;
2042 	memcpy(type, p, 1);
2043 
2044 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
2045 		log_peer_warnx(&peer->conf,
2046 		    "received message: illegal length: %u byte", *len);
2047 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2048 		    &olen, sizeof(olen));
2049 		bgp_fsm(peer, EVNT_CON_FATAL);
2050 		return (-1);
2051 	}
2052 
2053 	switch (*type) {
2054 	case OPEN:
2055 		if (*len < MSGSIZE_OPEN_MIN) {
2056 			log_peer_warnx(&peer->conf,
2057 			    "received OPEN: illegal len: %u byte", *len);
2058 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2059 			    &olen, sizeof(olen));
2060 			bgp_fsm(peer, EVNT_CON_FATAL);
2061 			return (-1);
2062 		}
2063 		break;
2064 	case NOTIFICATION:
2065 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
2066 			log_peer_warnx(&peer->conf,
2067 			    "received NOTIFICATION: illegal len: %u byte",
2068 			    *len);
2069 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2070 			    &olen, sizeof(olen));
2071 			bgp_fsm(peer, EVNT_CON_FATAL);
2072 			return (-1);
2073 		}
2074 		break;
2075 	case UPDATE:
2076 		if (*len < MSGSIZE_UPDATE_MIN) {
2077 			log_peer_warnx(&peer->conf,
2078 			    "received UPDATE: illegal len: %u byte", *len);
2079 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2080 			    &olen, sizeof(olen));
2081 			bgp_fsm(peer, EVNT_CON_FATAL);
2082 			return (-1);
2083 		}
2084 		break;
2085 	case KEEPALIVE:
2086 		if (*len != MSGSIZE_KEEPALIVE) {
2087 			log_peer_warnx(&peer->conf,
2088 			    "received KEEPALIVE: illegal len: %u byte", *len);
2089 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2090 			    &olen, sizeof(olen));
2091 			bgp_fsm(peer, EVNT_CON_FATAL);
2092 			return (-1);
2093 		}
2094 		break;
2095 	case RREFRESH:
2096 		if (*len < MSGSIZE_RREFRESH_MIN) {
2097 			log_peer_warnx(&peer->conf,
2098 			    "received RREFRESH: illegal len: %u byte", *len);
2099 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2100 			    &olen, sizeof(olen));
2101 			bgp_fsm(peer, EVNT_CON_FATAL);
2102 			return (-1);
2103 		}
2104 		break;
2105 	default:
2106 		log_peer_warnx(&peer->conf,
2107 		    "received msg with unknown type %u", *type);
2108 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
2109 		    type, 1);
2110 		bgp_fsm(peer, EVNT_CON_FATAL);
2111 		return (-1);
2112 	}
2113 	return (0);
2114 }
2115 
2116 int
2117 parse_open(struct peer *peer)
2118 {
2119 	u_char		*p, *op_val;
2120 	uint8_t		 version, rversion;
2121 	uint16_t	 short_as, msglen;
2122 	uint16_t	 holdtime, oholdtime, myholdtime;
2123 	uint32_t	 as, bgpid;
2124 	uint16_t	 optparamlen, extlen, plen, op_len;
2125 	uint8_t		 op_type, suberr = 0;
2126 
2127 	p = peer->rbuf->rptr;
2128 	p += MSGSIZE_HEADER_MARKER;
2129 	memcpy(&msglen, p, sizeof(msglen));
2130 	msglen = ntohs(msglen);
2131 
2132 	p = peer->rbuf->rptr;
2133 	p += MSGSIZE_HEADER;	/* header is already checked */
2134 
2135 	memcpy(&version, p, sizeof(version));
2136 	p += sizeof(version);
2137 
2138 	if (version != BGP_VERSION) {
2139 		log_peer_warnx(&peer->conf,
2140 		    "peer wants unrecognized version %u", version);
2141 		if (version > BGP_VERSION)
2142 			rversion = version - BGP_VERSION;
2143 		else
2144 			rversion = BGP_VERSION;
2145 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
2146 		    &rversion, sizeof(rversion));
2147 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2148 		return (-1);
2149 	}
2150 
2151 	memcpy(&short_as, p, sizeof(short_as));
2152 	p += sizeof(short_as);
2153 	as = peer->short_as = ntohs(short_as);
2154 	if (as == 0) {
2155 		log_peer_warnx(&peer->conf,
2156 		    "peer requests unacceptable AS %u", as);
2157 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS,
2158 		    NULL, 0);
2159 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2160 		return (-1);
2161 	}
2162 
2163 	memcpy(&oholdtime, p, sizeof(oholdtime));
2164 	p += sizeof(oholdtime);
2165 
2166 	holdtime = ntohs(oholdtime);
2167 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2168 		log_peer_warnx(&peer->conf,
2169 		    "peer requests unacceptable holdtime %u", holdtime);
2170 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
2171 		    NULL, 0);
2172 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2173 		return (-1);
2174 	}
2175 
2176 	myholdtime = peer->conf.holdtime;
2177 	if (!myholdtime)
2178 		myholdtime = conf->holdtime;
2179 	if (holdtime < myholdtime)
2180 		peer->holdtime = holdtime;
2181 	else
2182 		peer->holdtime = myholdtime;
2183 
2184 	memcpy(&bgpid, p, sizeof(bgpid));
2185 	p += sizeof(bgpid);
2186 
2187 	/* check bgpid for validity - just disallow 0 */
2188 	if (ntohl(bgpid) == 0) {
2189 		log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable",
2190 		    ntohl(bgpid));
2191 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2192 		    NULL, 0);
2193 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2194 		return (-1);
2195 	}
2196 	peer->remote_bgpid = bgpid;
2197 
2198 	extlen = 0;
2199 	optparamlen = *p++;
2200 
2201 	if (optparamlen == 0) {
2202 		if (msglen != MSGSIZE_OPEN_MIN) {
2203 bad_len:
2204 			log_peer_warnx(&peer->conf,
2205 			    "corrupt OPEN message received: length mismatch");
2206 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2207 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2208 			return (-1);
2209 		}
2210 	} else {
2211 		if (msglen < MSGSIZE_OPEN_MIN + 1)
2212 			goto bad_len;
2213 
2214 		op_type = *p;
2215 		if (op_type == OPT_PARAM_EXT_LEN) {
2216 			p++;
2217 			memcpy(&optparamlen, p, sizeof(optparamlen));
2218 			optparamlen = ntohs(optparamlen);
2219 			p += sizeof(optparamlen);
2220 			extlen = 1;
2221 		}
2222 
2223 		/* RFC9020 encoding has 3 extra bytes */
2224 		if (optparamlen + 3 * extlen != msglen - MSGSIZE_OPEN_MIN)
2225 			goto bad_len;
2226 	}
2227 
2228 	plen = optparamlen;
2229 	while (plen > 0) {
2230 		if (plen < 2 + extlen)
2231 			goto bad_len;
2232 
2233 		memcpy(&op_type, p, sizeof(op_type));
2234 		p += sizeof(op_type);
2235 		plen -= sizeof(op_type);
2236 		if (!extlen) {
2237 			op_len = *p++;
2238 			plen--;
2239 		} else {
2240 			memcpy(&op_len, p, sizeof(op_len));
2241 			op_len = ntohs(op_len);
2242 			p += sizeof(op_len);
2243 			plen -= sizeof(op_len);
2244 		}
2245 		if (op_len > 0) {
2246 			if (plen < op_len)
2247 				goto bad_len;
2248 			op_val = p;
2249 			p += op_len;
2250 			plen -= op_len;
2251 		} else
2252 			op_val = NULL;
2253 
2254 		switch (op_type) {
2255 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2256 			if (parse_capabilities(peer, op_val, op_len,
2257 			    &as) == -1) {
2258 				session_notification(peer, ERR_OPEN, 0,
2259 				    NULL, 0);
2260 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2261 				return (-1);
2262 			}
2263 			break;
2264 		case OPT_PARAM_AUTH:			/* deprecated */
2265 		default:
2266 			/*
2267 			 * unsupported type
2268 			 * the RFCs tell us to leave the data section empty
2269 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2270 			 * How the peer should know _which_ optional parameter
2271 			 * we don't support is beyond me.
2272 			 */
2273 			log_peer_warnx(&peer->conf,
2274 			    "received OPEN message with unsupported optional "
2275 			    "parameter: type %u", op_type);
2276 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2277 				NULL, 0);
2278 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2279 			/* no punish */
2280 			timer_set(&peer->timers, Timer_IdleHold, 0);
2281 			peer->IdleHoldTime /= 2;
2282 			return (-1);
2283 		}
2284 	}
2285 
2286 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2287 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2288 		peer->conf.remote_as = as;
2289 		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2290 		if (!peer->conf.ebgp)
2291 			/* force enforce_as off for iBGP sessions */
2292 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2293 	}
2294 
2295 	if (peer->conf.remote_as != as) {
2296 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2297 		    log_as(as));
2298 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2299 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2300 		return (-1);
2301 	}
2302 
2303 	/* on iBGP sessions check for bgpid collision */
2304 	if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) {
2305 		log_peer_warnx(&peer->conf, "peer BGPID %u conflicts with ours",
2306 		    ntohl(bgpid));
2307 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2308 		    NULL, 0);
2309 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2310 		return (-1);
2311 	}
2312 
2313 	if (capa_neg_calc(peer, &suberr) == -1) {
2314 		session_notification(peer, ERR_OPEN, suberr, NULL, 0);
2315 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2316 		return (-1);
2317 	}
2318 
2319 	return (0);
2320 }
2321 
2322 int
2323 parse_update(struct peer *peer)
2324 {
2325 	u_char		*p;
2326 	uint16_t	 datalen;
2327 
2328 	/*
2329 	 * we pass the message verbatim to the rde.
2330 	 * in case of errors the whole session is reset with a
2331 	 * notification anyway, we only need to know the peer
2332 	 */
2333 	p = peer->rbuf->rptr;
2334 	p += MSGSIZE_HEADER_MARKER;
2335 	memcpy(&datalen, p, sizeof(datalen));
2336 	datalen = ntohs(datalen);
2337 
2338 	p = peer->rbuf->rptr;
2339 	p += MSGSIZE_HEADER;	/* header is already checked */
2340 	datalen -= MSGSIZE_HEADER;
2341 
2342 	if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1)
2343 		return (-1);
2344 
2345 	return (0);
2346 }
2347 
2348 int
2349 parse_rrefresh(struct peer *peer)
2350 {
2351 	struct route_refresh rr;
2352 	uint16_t afi, datalen;
2353 	uint8_t aid, safi, subtype;
2354 	u_char *p;
2355 
2356 	p = peer->rbuf->rptr;
2357 	p += MSGSIZE_HEADER_MARKER;
2358 	memcpy(&datalen, p, sizeof(datalen));
2359 	datalen = ntohs(datalen);
2360 
2361 	p = peer->rbuf->rptr;
2362 	p += MSGSIZE_HEADER;	/* header is already checked */
2363 
2364 	/*
2365 	 * We could check if we actually announced the capability but
2366 	 * as long as the message is correctly encoded we don't care.
2367 	 */
2368 
2369 	/* afi, 2 byte */
2370 	memcpy(&afi, p, sizeof(afi));
2371 	afi = ntohs(afi);
2372 	p += 2;
2373 	/* subtype, 1 byte */
2374 	subtype = *p;
2375 	p += 1;
2376 	/* safi, 1 byte */
2377 	safi = *p;
2378 
2379 	/* check subtype if peer announced enhanced route refresh */
2380 	if (peer->capa.neg.enhanced_rr) {
2381 		switch (subtype) {
2382 		case ROUTE_REFRESH_REQUEST:
2383 			/* no ORF support, so no oversized RREFRESH msgs */
2384 			if (datalen != MSGSIZE_RREFRESH) {
2385 				log_peer_warnx(&peer->conf,
2386 				    "received RREFRESH: illegal len: %u byte",
2387 				    datalen);
2388 				datalen = htons(datalen);
2389 				session_notification(peer, ERR_HEADER,
2390 				    ERR_HDR_LEN, &datalen, sizeof(datalen));
2391 				bgp_fsm(peer, EVNT_CON_FATAL);
2392 				return (-1);
2393 			}
2394 			peer->stats.refresh_rcvd_req++;
2395 			break;
2396 		case ROUTE_REFRESH_BEGIN_RR:
2397 		case ROUTE_REFRESH_END_RR:
2398 			/* special handling for RFC7313 */
2399 			if (datalen != MSGSIZE_RREFRESH) {
2400 				log_peer_warnx(&peer->conf,
2401 				    "received RREFRESH: illegal len: %u byte",
2402 				    datalen);
2403 				p = peer->rbuf->rptr;
2404 				p += MSGSIZE_HEADER;
2405 				datalen -= MSGSIZE_HEADER;
2406 				session_notification(peer, ERR_RREFRESH,
2407 				    ERR_RR_INV_LEN, p, datalen);
2408 				bgp_fsm(peer, EVNT_CON_FATAL);
2409 				return (-1);
2410 			}
2411 			if (subtype == ROUTE_REFRESH_BEGIN_RR)
2412 				peer->stats.refresh_rcvd_borr++;
2413 			else
2414 				peer->stats.refresh_rcvd_eorr++;
2415 			break;
2416 		default:
2417 			log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2418 			    "bad subtype %d", subtype);
2419 			return (0);
2420 		}
2421 	} else {
2422 		/* force subtype to default */
2423 		subtype = ROUTE_REFRESH_REQUEST;
2424 		peer->stats.refresh_rcvd_req++;
2425 	}
2426 
2427 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2428 	if (afi2aid(afi, safi, &aid) == -1) {
2429 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2430 		    "invalid afi/safi pair");
2431 		return (0);
2432 	}
2433 
2434 	if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) {
2435 		log_peer_warnx(&peer->conf, "peer sent unexpected refresh");
2436 		return (0);
2437 	}
2438 
2439 	rr.aid = aid;
2440 	rr.subtype = subtype;
2441 
2442 	if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1)
2443 		return (-1);
2444 
2445 	return (0);
2446 }
2447 
2448 int
2449 parse_notification(struct peer *peer)
2450 {
2451 	u_char		*p;
2452 	uint16_t	 datalen;
2453 	uint8_t		 errcode;
2454 	uint8_t		 subcode;
2455 	uint8_t		 capa_code;
2456 	uint8_t		 capa_len;
2457 	size_t		 reason_len;
2458 	uint8_t		 i;
2459 
2460 	/* just log */
2461 	p = peer->rbuf->rptr;
2462 	p += MSGSIZE_HEADER_MARKER;
2463 	memcpy(&datalen, p, sizeof(datalen));
2464 	datalen = ntohs(datalen);
2465 
2466 	p = peer->rbuf->rptr;
2467 	p += MSGSIZE_HEADER;	/* header is already checked */
2468 	datalen -= MSGSIZE_HEADER;
2469 
2470 	memcpy(&errcode, p, sizeof(errcode));
2471 	p += sizeof(errcode);
2472 	datalen -= sizeof(errcode);
2473 
2474 	memcpy(&subcode, p, sizeof(subcode));
2475 	p += sizeof(subcode);
2476 	datalen -= sizeof(subcode);
2477 
2478 	log_notification(peer, errcode, subcode, p, datalen, "received");
2479 	peer->errcnt++;
2480 	peer->stats.last_rcvd_errcode = errcode;
2481 	peer->stats.last_rcvd_suberr = subcode;
2482 
2483 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2484 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2485 			log_peer_warnx(&peer->conf, "received \"unsupported "
2486 			    "capability\" notification without data part, "
2487 			    "disabling capability announcements altogether");
2488 			session_capa_ann_none(peer);
2489 		}
2490 
2491 		while (datalen > 0) {
2492 			if (datalen < 2) {
2493 				log_peer_warnx(&peer->conf,
2494 				    "parse_notification: "
2495 				    "expect len >= 2, len is %u", datalen);
2496 				return (-1);
2497 			}
2498 			memcpy(&capa_code, p, sizeof(capa_code));
2499 			p += sizeof(capa_code);
2500 			datalen -= sizeof(capa_code);
2501 			memcpy(&capa_len, p, sizeof(capa_len));
2502 			p += sizeof(capa_len);
2503 			datalen -= sizeof(capa_len);
2504 			if (datalen < capa_len) {
2505 				log_peer_warnx(&peer->conf,
2506 				    "parse_notification: capa_len %u exceeds "
2507 				    "remaining msg length %u", capa_len,
2508 				    datalen);
2509 				return (-1);
2510 			}
2511 			p += capa_len;
2512 			datalen -= capa_len;
2513 			switch (capa_code) {
2514 			case CAPA_MP:
2515 				for (i = 0; i < AID_MAX; i++)
2516 					peer->capa.ann.mp[i] = 0;
2517 				log_peer_warnx(&peer->conf,
2518 				    "disabling multiprotocol capability");
2519 				break;
2520 			case CAPA_REFRESH:
2521 				peer->capa.ann.refresh = 0;
2522 				log_peer_warnx(&peer->conf,
2523 				    "disabling route refresh capability");
2524 				break;
2525 			case CAPA_ROLE:
2526 				peer->capa.ann.role_ena = 0;
2527 				log_peer_warnx(&peer->conf,
2528 				    "disabling role capability");
2529 				break;
2530 			case CAPA_RESTART:
2531 				peer->capa.ann.grestart.restart = 0;
2532 				log_peer_warnx(&peer->conf,
2533 				    "disabling restart capability");
2534 				break;
2535 			case CAPA_AS4BYTE:
2536 				peer->capa.ann.as4byte = 0;
2537 				log_peer_warnx(&peer->conf,
2538 				    "disabling 4-byte AS num capability");
2539 				break;
2540 			case CAPA_ADD_PATH:
2541 				memset(peer->capa.ann.add_path, 0,
2542 				    sizeof(peer->capa.ann.add_path));
2543 				log_peer_warnx(&peer->conf,
2544 				    "disabling ADD-PATH capability");
2545 				break;
2546 			case CAPA_ENHANCED_RR:
2547 				peer->capa.ann.enhanced_rr = 0;
2548 				log_peer_warnx(&peer->conf,
2549 				    "disabling enhanced route refresh "
2550 				    "capability");
2551 				break;
2552 			default:	/* should not happen... */
2553 				log_peer_warnx(&peer->conf, "received "
2554 				    "\"unsupported capability\" notification "
2555 				    "for unknown capability %u, disabling "
2556 				    "capability announcements altogether",
2557 				    capa_code);
2558 				session_capa_ann_none(peer);
2559 				break;
2560 			}
2561 		}
2562 
2563 		return (1);
2564 	}
2565 
2566 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2567 		session_capa_ann_none(peer);
2568 		return (1);
2569 	}
2570 
2571 	if (errcode == ERR_CEASE &&
2572 	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2573 	     subcode == ERR_CEASE_ADMIN_RESET)) {
2574 		if (datalen > 1) {
2575 			reason_len = *p++;
2576 			datalen--;
2577 			if (datalen < reason_len) {
2578 				log_peer_warnx(&peer->conf,
2579 				    "received truncated shutdown reason");
2580 				return (0);
2581 			}
2582 			if (reason_len > REASON_LEN - 1) {
2583 				log_peer_warnx(&peer->conf,
2584 				    "received overly long shutdown reason");
2585 				return (0);
2586 			}
2587 			memcpy(peer->stats.last_reason, p, reason_len);
2588 			peer->stats.last_reason[reason_len] = '\0';
2589 			log_peer_warnx(&peer->conf,
2590 			    "received shutdown reason: \"%s\"",
2591 			    log_reason(peer->stats.last_reason));
2592 			p += reason_len;
2593 			datalen -= reason_len;
2594 		}
2595 	}
2596 
2597 	return (0);
2598 }
2599 
2600 int
2601 parse_capabilities(struct peer *peer, u_char *d, uint16_t dlen, uint32_t *as)
2602 {
2603 	u_char		*capa_val;
2604 	uint32_t	 remote_as;
2605 	uint16_t	 len;
2606 	uint16_t	 afi;
2607 	uint16_t	 gr_header;
2608 	uint8_t		 safi;
2609 	uint8_t		 aid;
2610 	uint8_t		 flags;
2611 	uint8_t		 capa_code;
2612 	uint8_t		 capa_len;
2613 	uint8_t		 i;
2614 
2615 	len = dlen;
2616 	while (len > 0) {
2617 		if (len < 2) {
2618 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2619 			    "length: %u, too short", len);
2620 			return (-1);
2621 		}
2622 		memcpy(&capa_code, d, sizeof(capa_code));
2623 		d += sizeof(capa_code);
2624 		len -= sizeof(capa_code);
2625 		memcpy(&capa_len, d, sizeof(capa_len));
2626 		d += sizeof(capa_len);
2627 		len -= sizeof(capa_len);
2628 		if (capa_len > 0) {
2629 			if (len < capa_len) {
2630 				log_peer_warnx(&peer->conf,
2631 				    "Bad capabilities attr length: "
2632 				    "len %u smaller than capa_len %u",
2633 				    len, capa_len);
2634 				return (-1);
2635 			}
2636 			capa_val = d;
2637 			d += capa_len;
2638 			len -= capa_len;
2639 		} else
2640 			capa_val = NULL;
2641 
2642 		switch (capa_code) {
2643 		case CAPA_MP:			/* RFC 4760 */
2644 			if (capa_len != 4) {
2645 				log_peer_warnx(&peer->conf,
2646 				    "Bad multi protocol capability length: "
2647 				    "%u", capa_len);
2648 				break;
2649 			}
2650 			memcpy(&afi, capa_val, sizeof(afi));
2651 			afi = ntohs(afi);
2652 			memcpy(&safi, capa_val + 3, sizeof(safi));
2653 			if (afi2aid(afi, safi, &aid) == -1) {
2654 				log_peer_warnx(&peer->conf,
2655 				    "Received multi protocol capability: "
2656 				    " unknown AFI %u, safi %u pair",
2657 				    afi, safi);
2658 				break;
2659 			}
2660 			peer->capa.peer.mp[aid] = 1;
2661 			break;
2662 		case CAPA_REFRESH:
2663 			peer->capa.peer.refresh = 1;
2664 			break;
2665 		case CAPA_ROLE:
2666 			if (capa_len != 1) {
2667 				log_peer_warnx(&peer->conf,
2668 				    "Bad role capability length: %u", capa_len);
2669 				break;
2670 			}
2671 			if (!peer->conf.ebgp)
2672 				log_peer_warnx(&peer->conf,
2673 				    "Received role capability on iBGP session");
2674 			peer->capa.peer.role_ena = 1;
2675 			peer->capa.peer.role = capa2role(*capa_val);
2676 			break;
2677 		case CAPA_RESTART:
2678 			if (capa_len == 2) {
2679 				/* peer only supports EoR marker */
2680 				peer->capa.peer.grestart.restart = 1;
2681 				peer->capa.peer.grestart.timeout = 0;
2682 				break;
2683 			} else if (capa_len % 4 != 2) {
2684 				log_peer_warnx(&peer->conf,
2685 				    "Bad graceful restart capability length: "
2686 				    "%u", capa_len);
2687 				peer->capa.peer.grestart.restart = 0;
2688 				peer->capa.peer.grestart.timeout = 0;
2689 				break;
2690 			}
2691 
2692 			memcpy(&gr_header, capa_val, sizeof(gr_header));
2693 			gr_header = ntohs(gr_header);
2694 			peer->capa.peer.grestart.timeout =
2695 			    gr_header & CAPA_GR_TIMEMASK;
2696 			if (peer->capa.peer.grestart.timeout == 0) {
2697 				log_peer_warnx(&peer->conf, "Received "
2698 				    "graceful restart timeout is zero");
2699 				peer->capa.peer.grestart.restart = 0;
2700 				break;
2701 			}
2702 
2703 			for (i = 2; i <= capa_len - 4; i += 4) {
2704 				memcpy(&afi, capa_val + i, sizeof(afi));
2705 				afi = ntohs(afi);
2706 				safi = capa_val[i + 2];
2707 				flags = capa_val[i + 3];
2708 				if (afi2aid(afi, safi, &aid) == -1) {
2709 					log_peer_warnx(&peer->conf,
2710 					    "Received graceful restart capa: "
2711 					    " unknown AFI %u, safi %u pair",
2712 					    afi, safi);
2713 					continue;
2714 				}
2715 				peer->capa.peer.grestart.flags[aid] |=
2716 				    CAPA_GR_PRESENT;
2717 				if (flags & CAPA_GR_F_FLAG)
2718 					peer->capa.peer.grestart.flags[aid] |=
2719 					    CAPA_GR_FORWARD;
2720 				if (gr_header & CAPA_GR_R_FLAG)
2721 					peer->capa.peer.grestart.flags[aid] |=
2722 					    CAPA_GR_RESTART;
2723 				peer->capa.peer.grestart.restart = 2;
2724 			}
2725 			break;
2726 		case CAPA_AS4BYTE:
2727 			if (capa_len != 4) {
2728 				log_peer_warnx(&peer->conf,
2729 				    "Bad AS4BYTE capability length: "
2730 				    "%u", capa_len);
2731 				peer->capa.peer.as4byte = 0;
2732 				break;
2733 			}
2734 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2735 			*as = ntohl(remote_as);
2736 			if (*as == 0) {
2737 				log_peer_warnx(&peer->conf,
2738 				    "peer requests unacceptable AS %u", *as);
2739 				session_notification(peer, ERR_OPEN,
2740 				    ERR_OPEN_AS, NULL, 0);
2741 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2742 				return (-1);
2743 			}
2744 			peer->capa.peer.as4byte = 1;
2745 			break;
2746 		case CAPA_ADD_PATH:
2747 			if (capa_len % 4 != 0) {
2748 				log_peer_warnx(&peer->conf,
2749 				    "Bad ADD-PATH capability length: "
2750 				    "%u", capa_len);
2751 				memset(peer->capa.peer.add_path, 0,
2752 				    sizeof(peer->capa.peer.add_path));
2753 				break;
2754 			}
2755 			for (i = 0; i <= capa_len - 4; i += 4) {
2756 				memcpy(&afi, capa_val + i, sizeof(afi));
2757 				afi = ntohs(afi);
2758 				safi = capa_val[i + 2];
2759 				flags = capa_val[i + 3];
2760 				if (afi2aid(afi, safi, &aid) == -1) {
2761 					log_peer_warnx(&peer->conf,
2762 					    "Received ADD-PATH capa: "
2763 					    " unknown AFI %u, safi %u pair",
2764 					    afi, safi);
2765 					memset(peer->capa.peer.add_path, 0,
2766 					    sizeof(peer->capa.peer.add_path));
2767 					break;
2768 				}
2769 				if (flags & ~CAPA_AP_BIDIR) {
2770 					log_peer_warnx(&peer->conf,
2771 					    "Received ADD-PATH capa: "
2772 					    " bad flags %x", flags);
2773 					memset(peer->capa.peer.add_path, 0,
2774 					    sizeof(peer->capa.peer.add_path));
2775 					break;
2776 				}
2777 				peer->capa.peer.add_path[aid] = flags;
2778 			}
2779 			break;
2780 		case CAPA_ENHANCED_RR:
2781 			peer->capa.peer.enhanced_rr = 1;
2782 			break;
2783 		default:
2784 			break;
2785 		}
2786 	}
2787 
2788 	return (0);
2789 }
2790 
2791 int
2792 capa_neg_calc(struct peer *p, uint8_t *suberr)
2793 {
2794 	uint8_t	i, hasmp = 0;
2795 
2796 	/* a capability is accepted only if both sides announced it */
2797 
2798 	p->capa.neg.refresh =
2799 	    (p->capa.ann.refresh && p->capa.peer.refresh) != 0;
2800 	p->capa.neg.enhanced_rr =
2801 	    (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0;
2802 
2803 	p->capa.neg.as4byte =
2804 	    (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0;
2805 
2806 	/* MP: both side must agree on the AFI,SAFI pair */
2807 	for (i = 0; i < AID_MAX; i++) {
2808 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i])
2809 			p->capa.neg.mp[i] = 1;
2810 		else
2811 			p->capa.neg.mp[i] = 0;
2812 		if (p->capa.ann.mp[i])
2813 			hasmp = 1;
2814 	}
2815 	/* if no MP capability present default to IPv4 unicast mode */
2816 	if (!hasmp)
2817 		p->capa.neg.mp[AID_INET] = 1;
2818 
2819 	/*
2820 	 * graceful restart: the peer capabilities are of interest here.
2821 	 * It is necessary to compare the new values with the previous ones
2822 	 * and act accordingly. AFI/SAFI that are not part in the MP capability
2823 	 * are treated as not being present.
2824 	 * Also make sure that a flush happens if the session stopped
2825 	 * supporting graceful restart.
2826 	 */
2827 
2828 	for (i = 0; i < AID_MAX; i++) {
2829 		int8_t	negflags;
2830 
2831 		/* disable GR if the AFI/SAFI is not present */
2832 		if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2833 		    p->capa.neg.mp[i] == 0))
2834 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2835 		/* look at current GR state and decide what to do */
2836 		negflags = p->capa.neg.grestart.flags[i];
2837 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2838 		if (negflags & CAPA_GR_RESTARTING) {
2839 			if (p->capa.ann.grestart.restart != 0 &&
2840 			    p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) {
2841 				p->capa.neg.grestart.flags[i] |=
2842 				    CAPA_GR_RESTARTING;
2843 			} else {
2844 				if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2845 				    &i, sizeof(i)) == -1) {
2846 					log_peer_warnx(&p->conf,
2847 					    "imsg send failed");
2848 					return (-1);
2849 				}
2850 				log_peer_warnx(&p->conf, "graceful restart of "
2851 				    "%s, not restarted, flushing", aid2str(i));
2852 			}
2853 		}
2854 	}
2855 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2856 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2857 	if (p->capa.ann.grestart.restart == 0)
2858 		p->capa.neg.grestart.restart = 0;
2859 
2860 
2861 	/*
2862 	 * ADD-PATH: set only those bits where both sides agree.
2863 	 * For this compare our send bit with the recv bit from the peer
2864 	 * and vice versa.
2865 	 * The flags are stored from this systems view point.
2866 	 */
2867 	memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
2868 	if (p->capa.ann.add_path[0]) {
2869 		for (i = AID_MIN; i < AID_MAX; i++) {
2870 			if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) &&
2871 			    (p->capa.peer.add_path[i] & CAPA_AP_SEND)) {
2872 				p->capa.neg.add_path[i] |= CAPA_AP_RECV;
2873 				p->capa.neg.add_path[0] |= CAPA_AP_RECV;
2874 			}
2875 			if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) &&
2876 			    (p->capa.peer.add_path[i] & CAPA_AP_RECV)) {
2877 				p->capa.neg.add_path[i] |= CAPA_AP_SEND;
2878 				p->capa.neg.add_path[0] |= CAPA_AP_SEND;
2879 			}
2880 		}
2881 	}
2882 
2883 	/*
2884 	 * Open policy: check that the policy is sensible.
2885 	 *
2886 	 * Make sure that the roles match and set the negotiated capability
2887 	 * to the role of the peer. So the RDE can inject the OTC attribute.
2888 	 * See RFC 9234, section 4.2.
2889 	 * These checks should only happen on ebgp sessions.
2890 	 */
2891 	if (p->capa.ann.role_ena != 0 && p->capa.peer.role_ena != 0 &&
2892 	    p->conf.ebgp) {
2893 		switch (p->capa.ann.role) {
2894 		case ROLE_PROVIDER:
2895 			if (p->capa.peer.role != ROLE_CUSTOMER)
2896 				goto fail;
2897 			break;
2898 		case ROLE_RS:
2899 			if (p->capa.peer.role != ROLE_RS_CLIENT)
2900 				goto fail;
2901 			break;
2902 		case ROLE_RS_CLIENT:
2903 			if (p->capa.peer.role != ROLE_RS)
2904 				goto fail;
2905 			break;
2906 		case ROLE_CUSTOMER:
2907 			if (p->capa.peer.role != ROLE_PROVIDER)
2908 				goto fail;
2909 			break;
2910 		case ROLE_PEER:
2911 			if (p->capa.peer.role != ROLE_PEER)
2912 				goto fail;
2913 			break;
2914 		default:
2915  fail:
2916 			log_peer_warnx(&p->conf, "open policy role mismatch: "
2917 			    "%s vs %s", log_policy(p->capa.ann.role),
2918 			    log_policy(p->capa.peer.role));
2919 			*suberr = ERR_OPEN_ROLE;
2920 			return (-1);
2921 		}
2922 		p->capa.neg.role_ena = 1;
2923 		p->capa.neg.role = p->capa.peer.role;
2924 	} else if (p->capa.ann.role_ena == 2 && p->conf.ebgp) {
2925 		/* enforce presence of open policy role capability */
2926 		log_peer_warnx(&p->conf, "open policy role enforced but "
2927 		    "not present");
2928 		*suberr = ERR_OPEN_ROLE;
2929 		return (-1);
2930 	}
2931 
2932 	return (0);
2933 }
2934 
2935 void
2936 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2937 {
2938 	struct imsg		 imsg;
2939 	struct mrt		 xmrt;
2940 	struct route_refresh	 rr;
2941 	struct mrt		*mrt;
2942 	struct imsgbuf		*i;
2943 	struct peer		*p;
2944 	struct listen_addr	*la, *nla;
2945 	struct session_dependon	*sdon;
2946 	u_char			*data;
2947 	int			 n, fd, depend_ok, restricted;
2948 	uint16_t		 t;
2949 	uint8_t			 aid, errcode, subcode;
2950 
2951 	while (ibuf) {
2952 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2953 			fatal("session_dispatch_imsg: imsg_get error");
2954 
2955 		if (n == 0)
2956 			break;
2957 
2958 		switch (imsg.hdr.type) {
2959 		case IMSG_SOCKET_CONN:
2960 		case IMSG_SOCKET_CONN_CTL:
2961 			if (idx != PFD_PIPE_MAIN)
2962 				fatalx("reconf request not from parent");
2963 			if ((fd = imsg.fd) == -1) {
2964 				log_warnx("expected to receive imsg fd to "
2965 				    "RDE but didn't receive any");
2966 				break;
2967 			}
2968 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2969 				fatal(NULL);
2970 			imsg_init(i, fd);
2971 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
2972 				if (ibuf_rde) {
2973 					log_warnx("Unexpected imsg connection "
2974 					    "to RDE received");
2975 					msgbuf_clear(&ibuf_rde->w);
2976 					free(ibuf_rde);
2977 				}
2978 				ibuf_rde = i;
2979 			} else {
2980 				if (ibuf_rde_ctl) {
2981 					log_warnx("Unexpected imsg ctl "
2982 					    "connection to RDE received");
2983 					msgbuf_clear(&ibuf_rde_ctl->w);
2984 					free(ibuf_rde_ctl);
2985 				}
2986 				ibuf_rde_ctl = i;
2987 			}
2988 			break;
2989 		case IMSG_RECONF_CONF:
2990 			if (idx != PFD_PIPE_MAIN)
2991 				fatalx("reconf request not from parent");
2992 			nconf = new_config();
2993 
2994 			copy_config(nconf, imsg.data);
2995 			pending_reconf = 1;
2996 			break;
2997 		case IMSG_RECONF_PEER:
2998 			if (idx != PFD_PIPE_MAIN)
2999 				fatalx("reconf request not from parent");
3000 			if ((p = calloc(1, sizeof(struct peer))) == NULL)
3001 				fatal("new_peer");
3002 			memcpy(&p->conf, imsg.data, sizeof(struct peer_config));
3003 			p->state = p->prev_state = STATE_NONE;
3004 			p->reconf_action = RECONF_REINIT;
3005 			if (RB_INSERT(peer_head, &nconf->peers, p) != NULL)
3006 				fatalx("%s: peer tree is corrupt", __func__);
3007 			break;
3008 		case IMSG_RECONF_LISTENER:
3009 			if (idx != PFD_PIPE_MAIN)
3010 				fatalx("reconf request not from parent");
3011 			if (nconf == NULL)
3012 				fatalx("IMSG_RECONF_LISTENER but no config");
3013 			nla = imsg.data;
3014 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
3015 				if (!la_cmp(la, nla))
3016 					break;
3017 
3018 			if (la == NULL) {
3019 				if (nla->reconf != RECONF_REINIT)
3020 					fatalx("king bula sez: "
3021 					    "expected REINIT");
3022 
3023 				if ((nla->fd = imsg.fd) == -1)
3024 					log_warnx("expected to receive fd for "
3025 					    "%s but didn't receive any",
3026 					    log_sockaddr((struct sockaddr *)
3027 					    &nla->sa, nla->sa_len));
3028 
3029 				la = calloc(1, sizeof(struct listen_addr));
3030 				if (la == NULL)
3031 					fatal(NULL);
3032 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
3033 				la->flags = nla->flags;
3034 				la->fd = nla->fd;
3035 				la->reconf = RECONF_REINIT;
3036 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
3037 				    entry);
3038 			} else {
3039 				if (nla->reconf != RECONF_KEEP)
3040 					fatalx("king bula sez: expected KEEP");
3041 				la->reconf = RECONF_KEEP;
3042 			}
3043 
3044 			break;
3045 		case IMSG_RECONF_CTRL:
3046 			if (idx != PFD_PIPE_MAIN)
3047 				fatalx("reconf request not from parent");
3048 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
3049 			    sizeof(restricted))
3050 				fatalx("RECONF_CTRL imsg with wrong len");
3051 			memcpy(&restricted, imsg.data, sizeof(restricted));
3052 			if (imsg.fd == -1) {
3053 				log_warnx("expected to receive fd for control "
3054 				    "socket but didn't receive any");
3055 				break;
3056 			}
3057 			if (restricted) {
3058 				control_shutdown(rcsock);
3059 				rcsock = imsg.fd;
3060 			} else {
3061 				control_shutdown(csock);
3062 				csock = imsg.fd;
3063 			}
3064 			break;
3065 		case IMSG_RECONF_DRAIN:
3066 			switch (idx) {
3067 			case PFD_PIPE_ROUTE:
3068 				if (nconf != NULL)
3069 					fatalx("got unexpected %s from RDE",
3070 					    "IMSG_RECONF_DONE");
3071 				imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
3072 				    -1, NULL, 0);
3073 				break;
3074 			case PFD_PIPE_MAIN:
3075 				if (nconf == NULL)
3076 					fatalx("got unexpected %s from parent",
3077 					    "IMSG_RECONF_DONE");
3078 				imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
3079 				    -1, NULL, 0);
3080 				break;
3081 			default:
3082 				fatalx("reconf request not from parent or RDE");
3083 			}
3084 			break;
3085 		case IMSG_RECONF_DONE:
3086 			if (idx != PFD_PIPE_MAIN)
3087 				fatalx("reconf request not from parent");
3088 			if (nconf == NULL)
3089 				fatalx("got IMSG_RECONF_DONE but no config");
3090 			copy_config(conf, nconf);
3091 			merge_peers(conf, nconf);
3092 
3093 			/* delete old listeners */
3094 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
3095 			    la = nla) {
3096 				nla = TAILQ_NEXT(la, entry);
3097 				if (la->reconf == RECONF_NONE) {
3098 					log_info("not listening on %s any more",
3099 					    log_sockaddr((struct sockaddr *)
3100 					    &la->sa, la->sa_len));
3101 					TAILQ_REMOVE(conf->listen_addrs, la,
3102 					    entry);
3103 					close(la->fd);
3104 					free(la);
3105 				}
3106 			}
3107 
3108 			/* add new listeners */
3109 			TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs,
3110 			    entry);
3111 
3112 			setup_listeners(listener_cnt);
3113 			free_config(nconf);
3114 			nconf = NULL;
3115 			pending_reconf = 0;
3116 			log_info("SE reconfigured");
3117 			/*
3118 			 * IMSG_RECONF_DONE is sent when the RDE drained
3119 			 * the peer config sent in merge_peers().
3120 			 */
3121 			break;
3122 		case IMSG_SESSION_DEPENDON:
3123 			if (idx != PFD_PIPE_MAIN)
3124 				fatalx("IFINFO message not from parent");
3125 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
3126 			    sizeof(struct session_dependon))
3127 				fatalx("DEPENDON imsg with wrong len");
3128 			sdon = imsg.data;
3129 			depend_ok = sdon->depend_state;
3130 
3131 			RB_FOREACH(p, peer_head, &conf->peers)
3132 				if (!strcmp(p->conf.if_depend, sdon->ifname)) {
3133 					if (depend_ok && !p->depend_ok) {
3134 						p->depend_ok = depend_ok;
3135 						bgp_fsm(p, EVNT_START);
3136 					} else if (!depend_ok && p->depend_ok) {
3137 						p->depend_ok = depend_ok;
3138 						session_stop(p,
3139 						    ERR_CEASE_OTHER_CHANGE);
3140 					}
3141 				}
3142 			break;
3143 		case IMSG_MRT_OPEN:
3144 		case IMSG_MRT_REOPEN:
3145 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
3146 			    sizeof(struct mrt)) {
3147 				log_warnx("wrong imsg len");
3148 				break;
3149 			}
3150 
3151 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
3152 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
3153 				log_warnx("expected to receive fd for mrt dump "
3154 				    "but didn't receive any");
3155 
3156 			mrt = mrt_get(&mrthead, &xmrt);
3157 			if (mrt == NULL) {
3158 				/* new dump */
3159 				mrt = calloc(1, sizeof(struct mrt));
3160 				if (mrt == NULL)
3161 					fatal("session_dispatch_imsg");
3162 				memcpy(mrt, &xmrt, sizeof(struct mrt));
3163 				TAILQ_INIT(&mrt->wbuf.bufs);
3164 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
3165 			} else {
3166 				/* old dump reopened */
3167 				close(mrt->wbuf.fd);
3168 				mrt->wbuf.fd = xmrt.wbuf.fd;
3169 			}
3170 			break;
3171 		case IMSG_MRT_CLOSE:
3172 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
3173 			    sizeof(struct mrt)) {
3174 				log_warnx("wrong imsg len");
3175 				break;
3176 			}
3177 
3178 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
3179 			mrt = mrt_get(&mrthead, &xmrt);
3180 			if (mrt != NULL)
3181 				mrt_done(mrt);
3182 			break;
3183 		case IMSG_CTL_KROUTE:
3184 		case IMSG_CTL_KROUTE_ADDR:
3185 		case IMSG_CTL_SHOW_NEXTHOP:
3186 		case IMSG_CTL_SHOW_INTERFACE:
3187 		case IMSG_CTL_SHOW_FIB_TABLES:
3188 		case IMSG_CTL_SHOW_RTR:
3189 		case IMSG_CTL_SHOW_TIMER:
3190 			if (idx != PFD_PIPE_MAIN)
3191 				fatalx("ctl kroute request not from parent");
3192 			control_imsg_relay(&imsg);
3193 			break;
3194 		case IMSG_CTL_SHOW_RIB:
3195 		case IMSG_CTL_SHOW_RIB_PREFIX:
3196 		case IMSG_CTL_SHOW_RIB_COMMUNITIES:
3197 		case IMSG_CTL_SHOW_RIB_ATTR:
3198 		case IMSG_CTL_SHOW_RIB_MEM:
3199 		case IMSG_CTL_SHOW_NETWORK:
3200 		case IMSG_CTL_SHOW_NEIGHBOR:
3201 		case IMSG_CTL_SHOW_SET:
3202 			if (idx != PFD_PIPE_ROUTE_CTL)
3203 				fatalx("ctl rib request not from RDE");
3204 			control_imsg_relay(&imsg);
3205 			break;
3206 		case IMSG_CTL_END:
3207 		case IMSG_CTL_RESULT:
3208 			control_imsg_relay(&imsg);
3209 			break;
3210 		case IMSG_UPDATE:
3211 			if (idx != PFD_PIPE_ROUTE)
3212 				fatalx("update request not from RDE");
3213 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
3214 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
3215 			    imsg.hdr.len < IMSG_HEADER_SIZE +
3216 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
3217 				log_warnx("RDE sent invalid update");
3218 			else
3219 				session_update(imsg.hdr.peerid, imsg.data,
3220 				    imsg.hdr.len - IMSG_HEADER_SIZE);
3221 			break;
3222 		case IMSG_UPDATE_ERR:
3223 			if (idx != PFD_PIPE_ROUTE)
3224 				fatalx("update request not from RDE");
3225 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
3226 				log_warnx("RDE sent invalid notification");
3227 				break;
3228 			}
3229 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3230 				log_warnx("no such peer: id=%u",
3231 				    imsg.hdr.peerid);
3232 				break;
3233 			}
3234 			data = imsg.data;
3235 			errcode = *data++;
3236 			subcode = *data++;
3237 
3238 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
3239 				data = NULL;
3240 
3241 			session_notification(p, errcode, subcode,
3242 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
3243 			switch (errcode) {
3244 			case ERR_CEASE:
3245 				switch (subcode) {
3246 				case ERR_CEASE_MAX_PREFIX:
3247 				case ERR_CEASE_MAX_SENT_PREFIX:
3248 					t = p->conf.max_out_prefix_restart;
3249 					if (subcode == ERR_CEASE_MAX_PREFIX)
3250 						t = p->conf.max_prefix_restart;
3251 
3252 					bgp_fsm(p, EVNT_STOP);
3253 					if (t)
3254 						timer_set(&p->timers,
3255 						    Timer_IdleHold, 60 * t);
3256 					break;
3257 				default:
3258 					bgp_fsm(p, EVNT_CON_FATAL);
3259 					break;
3260 				}
3261 				break;
3262 			default:
3263 				bgp_fsm(p, EVNT_CON_FATAL);
3264 				break;
3265 			}
3266 			break;
3267 		case IMSG_REFRESH:
3268 			if (idx != PFD_PIPE_ROUTE)
3269 				fatalx("route refresh request not from RDE");
3270 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(rr)) {
3271 				log_warnx("RDE sent invalid refresh msg");
3272 				break;
3273 			}
3274 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3275 				log_warnx("no such peer: id=%u",
3276 				    imsg.hdr.peerid);
3277 				break;
3278 			}
3279 			memcpy(&rr, imsg.data, sizeof(rr));
3280 			if (rr.aid >= AID_MAX)
3281 				fatalx("IMSG_REFRESH: bad AID");
3282 			session_rrefresh(p, rr.aid, rr.subtype);
3283 			break;
3284 		case IMSG_SESSION_RESTARTED:
3285 			if (idx != PFD_PIPE_ROUTE)
3286 				fatalx("update request not from RDE");
3287 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
3288 				log_warnx("RDE sent invalid restart msg");
3289 				break;
3290 			}
3291 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3292 				log_warnx("no such peer: id=%u",
3293 				    imsg.hdr.peerid);
3294 				break;
3295 			}
3296 			memcpy(&aid, imsg.data, sizeof(aid));
3297 			if (aid >= AID_MAX)
3298 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
3299 			if (p->capa.neg.grestart.flags[aid] &
3300 			    CAPA_GR_RESTARTING) {
3301 				log_peer_warnx(&p->conf,
3302 				    "graceful restart of %s finished",
3303 				    aid2str(aid));
3304 				p->capa.neg.grestart.flags[aid] &=
3305 				    ~CAPA_GR_RESTARTING;
3306 				timer_stop(&p->timers, Timer_RestartTimeout);
3307 
3308 				/* signal back to RDE to cleanup stale routes */
3309 				if (imsg_rde(IMSG_SESSION_RESTARTED,
3310 				    imsg.hdr.peerid, &aid, sizeof(aid)) == -1)
3311 					fatal("imsg_compose: "
3312 					    "IMSG_SESSION_RESTARTED");
3313 			}
3314 			break;
3315 		case IMSG_SESSION_DOWN:
3316 			if (idx != PFD_PIPE_ROUTE)
3317 				fatalx("update request not from RDE");
3318 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3319 				log_warnx("no such peer: id=%u",
3320 				    imsg.hdr.peerid);
3321 				break;
3322 			}
3323 			session_stop(p, ERR_CEASE_ADMIN_DOWN);
3324 			break;
3325 		default:
3326 			break;
3327 		}
3328 		imsg_free(&imsg);
3329 	}
3330 }
3331 
3332 int
3333 la_cmp(struct listen_addr *a, struct listen_addr *b)
3334 {
3335 	struct sockaddr_in	*in_a, *in_b;
3336 	struct sockaddr_in6	*in6_a, *in6_b;
3337 
3338 	if (a->sa.ss_family != b->sa.ss_family)
3339 		return (1);
3340 
3341 	switch (a->sa.ss_family) {
3342 	case AF_INET:
3343 		in_a = (struct sockaddr_in *)&a->sa;
3344 		in_b = (struct sockaddr_in *)&b->sa;
3345 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
3346 			return (1);
3347 		if (in_a->sin_port != in_b->sin_port)
3348 			return (1);
3349 		break;
3350 	case AF_INET6:
3351 		in6_a = (struct sockaddr_in6 *)&a->sa;
3352 		in6_b = (struct sockaddr_in6 *)&b->sa;
3353 		if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
3354 		    sizeof(struct in6_addr)))
3355 			return (1);
3356 		if (in6_a->sin6_port != in6_b->sin6_port)
3357 			return (1);
3358 		break;
3359 	default:
3360 		fatal("king bula sez: unknown address family");
3361 		/* NOTREACHED */
3362 	}
3363 
3364 	return (0);
3365 }
3366 
3367 struct peer *
3368 getpeerbydesc(struct bgpd_config *c, const char *descr)
3369 {
3370 	struct peer	*p, *res = NULL;
3371 	int		 match = 0;
3372 
3373 	RB_FOREACH(p, peer_head, &c->peers)
3374 		if (!strcmp(p->conf.descr, descr)) {
3375 			res = p;
3376 			match++;
3377 		}
3378 
3379 	if (match > 1)
3380 		log_info("neighbor description \"%s\" not unique, request "
3381 		    "aborted", descr);
3382 
3383 	if (match == 1)
3384 		return (res);
3385 	else
3386 		return (NULL);
3387 }
3388 
3389 struct peer *
3390 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
3391 {
3392 	struct bgpd_addr addr;
3393 	struct peer	*p, *newpeer, *loose = NULL;
3394 	uint32_t	 id;
3395 
3396 	sa2addr(ip, &addr, NULL);
3397 
3398 	/* we might want a more effective way to find peers by IP */
3399 	RB_FOREACH(p, peer_head, &c->peers)
3400 		if (!p->conf.template &&
3401 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3402 			return (p);
3403 
3404 	/* try template matching */
3405 	RB_FOREACH(p, peer_head, &c->peers)
3406 		if (p->conf.template &&
3407 		    p->conf.remote_addr.aid == addr.aid &&
3408 		    session_match_mask(p, &addr))
3409 			if (loose == NULL || loose->conf.remote_masklen <
3410 			    p->conf.remote_masklen)
3411 				loose = p;
3412 
3413 	if (loose != NULL) {
3414 		/* clone */
3415 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3416 			fatal(NULL);
3417 		memcpy(newpeer, loose, sizeof(struct peer));
3418 		for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) {
3419 			if (getpeerbyid(c, id) == NULL)	/* we found a free id */
3420 				break;
3421 		}
3422 		newpeer->template = loose;
3423 		session_template_clone(newpeer, ip, id, 0);
3424 		newpeer->state = newpeer->prev_state = STATE_NONE;
3425 		newpeer->reconf_action = RECONF_KEEP;
3426 		newpeer->rbuf = NULL;
3427 		newpeer->rpending = 0;
3428 		init_peer(newpeer);
3429 		bgp_fsm(newpeer, EVNT_START);
3430 		if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL)
3431 			fatalx("%s: peer tree is corrupt", __func__);
3432 		return (newpeer);
3433 	}
3434 
3435 	return (NULL);
3436 }
3437 
3438 struct peer *
3439 getpeerbyid(struct bgpd_config *c, uint32_t peerid)
3440 {
3441 	static struct peer lookup;
3442 
3443 	lookup.conf.id = peerid;
3444 
3445 	return RB_FIND(peer_head, &c->peers, &lookup);
3446 }
3447 
3448 int
3449 peer_matched(struct peer *p, struct ctl_neighbor *n)
3450 {
3451 	char *s;
3452 
3453 	if (n && n->addr.aid) {
3454 		if (memcmp(&p->conf.remote_addr, &n->addr,
3455 		    sizeof(p->conf.remote_addr)))
3456 			return 0;
3457 	} else if (n && n->descr[0]) {
3458 		s = n->is_group ? p->conf.group : p->conf.descr;
3459 		if (strcmp(s, n->descr))
3460 			return 0;
3461 	}
3462 	return 1;
3463 }
3464 
3465 void
3466 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id,
3467     uint32_t as)
3468 {
3469 	struct bgpd_addr	remote_addr;
3470 
3471 	if (ip)
3472 		sa2addr(ip, &remote_addr, NULL);
3473 	else
3474 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3475 
3476 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3477 
3478 	p->conf.id = id;
3479 
3480 	if (as) {
3481 		p->conf.remote_as = as;
3482 		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3483 		if (!p->conf.ebgp)
3484 			/* force enforce_as off for iBGP sessions */
3485 			p->conf.enforce_as = ENFORCE_AS_OFF;
3486 	}
3487 
3488 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3489 	switch (p->conf.remote_addr.aid) {
3490 	case AID_INET:
3491 		p->conf.remote_masklen = 32;
3492 		break;
3493 	case AID_INET6:
3494 		p->conf.remote_masklen = 128;
3495 		break;
3496 	}
3497 	p->conf.template = 0;
3498 }
3499 
3500 int
3501 session_match_mask(struct peer *p, struct bgpd_addr *a)
3502 {
3503 	struct bgpd_addr masked;
3504 
3505 	applymask(&masked, a, p->conf.remote_masklen);
3506 	if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0)
3507 		return (1);
3508 	return (0);
3509 }
3510 
3511 void
3512 session_down(struct peer *peer)
3513 {
3514 	memset(&peer->capa.neg, 0, sizeof(peer->capa.neg));
3515 	peer->stats.last_updown = getmonotime();
3516 	/*
3517 	 * session_down is called in the exit code path so check
3518 	 * if the RDE is still around, if not there is no need to
3519 	 * send the message.
3520 	 */
3521 	if (ibuf_rde == NULL)
3522 		return;
3523 	if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3524 		fatalx("imsg_compose error");
3525 }
3526 
3527 void
3528 session_up(struct peer *p)
3529 {
3530 	struct session_up	 sup;
3531 
3532 	if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3533 	    &p->conf, sizeof(p->conf)) == -1)
3534 		fatalx("imsg_compose error");
3535 
3536 	if (p->local.aid == AID_INET) {
3537 		sup.local_v4_addr = p->local;
3538 		sup.local_v6_addr = p->local_alt;
3539 	} else {
3540 		sup.local_v6_addr = p->local;
3541 		sup.local_v4_addr = p->local_alt;
3542 	}
3543 	sup.remote_addr = p->remote;
3544 
3545 	sup.remote_bgpid = p->remote_bgpid;
3546 	sup.short_as = p->short_as;
3547 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3548 	p->stats.last_updown = getmonotime();
3549 	if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3550 		fatalx("imsg_compose error");
3551 }
3552 
3553 int
3554 imsg_ctl_parent(int type, uint32_t peerid, pid_t pid, void *data,
3555     uint16_t datalen)
3556 {
3557 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
3558 }
3559 
3560 int
3561 imsg_ctl_rde(int type, pid_t pid, void *data, uint16_t datalen)
3562 {
3563 	if (ibuf_rde_ctl == NULL)
3564 		return (0);
3565 
3566 	/*
3567 	 * Use control socket to talk to RDE to bypass the queue of the
3568 	 * regular imsg socket.
3569 	 */
3570 	return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen));
3571 }
3572 
3573 int
3574 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen)
3575 {
3576 	if (ibuf_rde == NULL)
3577 		return (0);
3578 
3579 	return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen));
3580 }
3581 
3582 void
3583 session_demote(struct peer *p, int level)
3584 {
3585 	struct demote_msg	msg;
3586 
3587 	strlcpy(msg.demote_group, p->conf.demote_group,
3588 	    sizeof(msg.demote_group));
3589 	msg.level = level;
3590 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3591 	    &msg, sizeof(msg)) == -1)
3592 		fatalx("imsg_compose error");
3593 
3594 	p->demoted += level;
3595 }
3596 
3597 void
3598 session_stop(struct peer *peer, uint8_t subcode)
3599 {
3600 	char data[REASON_LEN];
3601 	size_t datalen;
3602 	size_t reason_len;
3603 	char *communication;
3604 
3605 	datalen = 0;
3606 	communication = peer->conf.reason;
3607 
3608 	if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3609 	    subcode == ERR_CEASE_ADMIN_RESET)
3610 	    && communication && *communication) {
3611 		reason_len = strlen(communication);
3612 		if (reason_len > REASON_LEN - 1) {
3613 		    log_peer_warnx(&peer->conf,
3614 			"trying to send overly long shutdown reason");
3615 		} else {
3616 			data[0] = reason_len;
3617 			datalen = reason_len + sizeof(data[0]);
3618 			memcpy(data + 1, communication, reason_len);
3619 		}
3620 	}
3621 	switch (peer->state) {
3622 	case STATE_OPENSENT:
3623 	case STATE_OPENCONFIRM:
3624 	case STATE_ESTABLISHED:
3625 		session_notification(peer, ERR_CEASE, subcode, data, datalen);
3626 		break;
3627 	default:
3628 		/* session not open, no need to send notification */
3629 		break;
3630 	}
3631 	bgp_fsm(peer, EVNT_STOP);
3632 }
3633 
3634 void
3635 merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3636 {
3637 	struct peer *p, *np, *next;
3638 
3639 	RB_FOREACH(p, peer_head, &c->peers) {
3640 		/* templates are handled specially */
3641 		if (p->template != NULL)
3642 			continue;
3643 		np = getpeerbyid(nc, p->conf.id);
3644 		if (np == NULL) {
3645 			p->reconf_action = RECONF_DELETE;
3646 			continue;
3647 		}
3648 
3649 		/* peer no longer uses TCP MD5SIG so deconfigure */
3650 		if (p->conf.auth.method == AUTH_MD5SIG &&
3651 		    np->conf.auth.method != AUTH_MD5SIG)
3652 			tcp_md5_del_listener(c, p);
3653 		else if (np->conf.auth.method == AUTH_MD5SIG)
3654 			tcp_md5_add_listener(c, np);
3655 
3656 		memcpy(&p->conf, &np->conf, sizeof(p->conf));
3657 		RB_REMOVE(peer_head, &nc->peers, np);
3658 		free(np);
3659 
3660 		p->reconf_action = RECONF_KEEP;
3661 
3662 		/* had demotion, is demoted, demote removed? */
3663 		if (p->demoted && !p->conf.demote_group[0])
3664 			session_demote(p, -1);
3665 
3666 		/* if session is not open then refresh pfkey data */
3667 		if (p->state < STATE_OPENSENT && !p->template)
3668 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
3669 			    p->conf.id, 0, -1, NULL, 0);
3670 
3671 		/* sync the RDE in case we keep the peer */
3672 		if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3673 		    &p->conf, sizeof(struct peer_config)) == -1)
3674 			fatalx("imsg_compose error");
3675 
3676 		/* apply the config to all clones of a template */
3677 		if (p->conf.template) {
3678 			struct peer *xp;
3679 			RB_FOREACH(xp, peer_head, &c->peers) {
3680 				if (xp->template != p)
3681 					continue;
3682 				session_template_clone(xp, NULL, xp->conf.id,
3683 				    xp->conf.remote_as);
3684 				if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
3685 				    &xp->conf, sizeof(xp->conf)) == -1)
3686 					fatalx("imsg_compose error");
3687 			}
3688 		}
3689 	}
3690 
3691 	if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1)
3692 		fatalx("imsg_compose error");
3693 
3694 	/* pfkeys of new peers already loaded by the parent process */
3695 	RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
3696 		RB_REMOVE(peer_head, &nc->peers, np);
3697 		if (RB_INSERT(peer_head, &c->peers, np) != NULL)
3698 			fatalx("%s: peer tree is corrupt", __func__);
3699 		if (np->conf.auth.method == AUTH_MD5SIG)
3700 			tcp_md5_add_listener(c, np);
3701 	}
3702 }
3703