xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 25c4e8bd056e974b28f4a0ffd39d76c190a56013)
1 /*	$OpenBSD: session.c,v 1.431 2022/07/18 13:56:41 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <ifaddrs.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <syslog.h>
44 #include <unistd.h>
45 
46 #include "bgpd.h"
47 #include "session.h"
48 #include "log.h"
49 
50 #define PFD_PIPE_MAIN		0
51 #define PFD_PIPE_ROUTE		1
52 #define PFD_PIPE_ROUTE_CTL	2
53 #define PFD_SOCK_CTL		3
54 #define PFD_SOCK_RCTL		4
55 #define PFD_LISTENERS_START	5
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_peer(struct peer *);
60 void	start_timer_holdtime(struct peer *);
61 void	start_timer_keepalive(struct peer *);
62 void	session_close_connection(struct peer *);
63 void	change_state(struct peer *, enum session_state, enum session_events);
64 int	session_setup_socket(struct peer *);
65 void	session_accept(int);
66 int	session_connect(struct peer *);
67 void	session_tcp_established(struct peer *);
68 void	session_capa_ann_none(struct peer *);
69 int	session_capa_add(struct ibuf *, uint8_t, uint8_t);
70 int	session_capa_add_mp(struct ibuf *, uint8_t);
71 int	session_capa_add_afi(struct peer *, struct ibuf *, uint8_t, uint8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, uint16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(uint32_t, void *, size_t);
77 void	session_notification(struct peer *, uint8_t, uint8_t, void *,
78 	    ssize_t);
79 void	session_rrefresh(struct peer *, uint8_t, uint8_t);
80 int	session_graceful_restart(struct peer *);
81 int	session_graceful_stop(struct peer *);
82 int	session_dispatch_msg(struct pollfd *, struct peer *);
83 void	session_process_msg(struct peer *);
84 int	parse_header(struct peer *, u_char *, uint16_t *, uint8_t *);
85 int	parse_open(struct peer *);
86 int	parse_update(struct peer *);
87 int	parse_rrefresh(struct peer *);
88 int	parse_notification(struct peer *);
89 int	parse_capabilities(struct peer *, u_char *, uint16_t, uint32_t *);
90 int	capa_neg_calc(struct peer *, uint8_t *);
91 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
92 void	session_up(struct peer *);
93 void	session_down(struct peer *);
94 int	imsg_rde(int, uint32_t, void *, uint16_t);
95 void	session_demote(struct peer *, int);
96 void	merge_peers(struct bgpd_config *, struct bgpd_config *);
97 
98 int		 la_cmp(struct listen_addr *, struct listen_addr *);
99 void		 session_template_clone(struct peer *, struct sockaddr *,
100 		    uint32_t, uint32_t);
101 int		 session_match_mask(struct peer *, struct bgpd_addr *);
102 
103 static struct bgpd_config	*conf, *nconf;
104 static struct imsgbuf		*ibuf_rde;
105 static struct imsgbuf		*ibuf_rde_ctl;
106 static struct imsgbuf		*ibuf_main;
107 
108 struct bgpd_sysdep	 sysdep;
109 volatile sig_atomic_t	 session_quit;
110 int			 pending_reconf;
111 int			 csock = -1, rcsock = -1;
112 u_int			 peer_cnt;
113 
114 struct mrt_head		 mrthead;
115 time_t			 pauseaccept;
116 
117 static inline int
118 peer_compare(const struct peer *a, const struct peer *b)
119 {
120 	return a->conf.id - b->conf.id;
121 }
122 
123 RB_GENERATE(peer_head, peer, entry, peer_compare);
124 
125 void
126 session_sighdlr(int sig)
127 {
128 	switch (sig) {
129 	case SIGINT:
130 	case SIGTERM:
131 		session_quit = 1;
132 		break;
133 	}
134 }
135 
136 int
137 setup_listeners(u_int *la_cnt)
138 {
139 	int			 ttl = 255;
140 	struct listen_addr	*la;
141 	u_int			 cnt = 0;
142 
143 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
144 		la->reconf = RECONF_NONE;
145 		cnt++;
146 
147 		if (la->flags & LISTENER_LISTENING)
148 			continue;
149 
150 		if (la->fd == -1) {
151 			log_warn("cannot establish listener on %s: invalid fd",
152 			    log_sockaddr((struct sockaddr *)&la->sa,
153 			    la->sa_len));
154 			continue;
155 		}
156 
157 		if (tcp_md5_prep_listener(la, &conf->peers) == -1)
158 			fatal("tcp_md5_prep_listener");
159 
160 		/* set ttl to 255 so that ttl-security works */
161 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
162 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
163 			log_warn("setup_listeners setsockopt TTL");
164 			continue;
165 		}
166 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
167 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
168 			log_warn("setup_listeners setsockopt hoplimit");
169 			continue;
170 		}
171 
172 		if (listen(la->fd, MAX_BACKLOG)) {
173 			close(la->fd);
174 			fatal("listen");
175 		}
176 
177 		la->flags |= LISTENER_LISTENING;
178 
179 		log_info("listening on %s",
180 		    log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
181 	}
182 
183 	*la_cnt = cnt;
184 
185 	return (0);
186 }
187 
188 void
189 session_main(int debug, int verbose)
190 {
191 	int			 timeout;
192 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
193 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
194 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
195 	u_int			 new_cnt;
196 	struct passwd		*pw;
197 	struct peer		*p, **peer_l = NULL, *next;
198 	struct mrt		*m, *xm, **mrt_l = NULL;
199 	struct pollfd		*pfd = NULL;
200 	struct listen_addr	*la;
201 	void			*newp;
202 	time_t			 now;
203 	short			 events;
204 
205 	log_init(debug, LOG_DAEMON);
206 	log_setverbose(verbose);
207 
208 	log_procinit(log_procnames[PROC_SE]);
209 
210 	if ((pw = getpwnam(BGPD_USER)) == NULL)
211 		fatal(NULL);
212 
213 	if (chroot(pw->pw_dir) == -1)
214 		fatal("chroot");
215 	if (chdir("/") == -1)
216 		fatal("chdir(\"/\")");
217 
218 	setproctitle("session engine");
219 
220 	if (setgroups(1, &pw->pw_gid) ||
221 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
222 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
223 		fatal("can't drop privileges");
224 
225 	if (pledge("stdio inet recvfd", NULL) == -1)
226 		fatal("pledge");
227 
228 	signal(SIGTERM, session_sighdlr);
229 	signal(SIGINT, session_sighdlr);
230 	signal(SIGPIPE, SIG_IGN);
231 	signal(SIGHUP, SIG_IGN);
232 	signal(SIGALRM, SIG_IGN);
233 	signal(SIGUSR1, SIG_IGN);
234 
235 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
236 		fatal(NULL);
237 	imsg_init(ibuf_main, 3);
238 
239 	LIST_INIT(&mrthead);
240 	listener_cnt = 0;
241 	peer_cnt = 0;
242 	ctl_cnt = 0;
243 
244 	conf = new_config();
245 	log_info("session engine ready");
246 
247 	while (session_quit == 0) {
248 		/* check for peers to be initialized or deleted */
249 		if (!pending_reconf) {
250 			RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
251 				/* cloned peer that idled out? */
252 				if (p->template && (p->state == STATE_IDLE ||
253 				    p->state == STATE_ACTIVE) &&
254 				    getmonotime() - p->stats.last_updown >=
255 				    INTERVAL_HOLD_CLONED)
256 					p->reconf_action = RECONF_DELETE;
257 
258 				/* new peer that needs init? */
259 				if (p->state == STATE_NONE)
260 					init_peer(p);
261 
262 				/* reinit due? */
263 				if (p->reconf_action == RECONF_REINIT) {
264 					session_stop(p, ERR_CEASE_ADMIN_RESET);
265 					if (!p->conf.down)
266 						timer_set(&p->timers,
267 						    Timer_IdleHold, 0);
268 				}
269 
270 				/* deletion due? */
271 				if (p->reconf_action == RECONF_DELETE) {
272 					if (p->demoted)
273 						session_demote(p, -1);
274 					p->conf.demote_group[0] = 0;
275 					session_stop(p, ERR_CEASE_PEER_UNCONF);
276 					timer_remove_all(&p->timers);
277 					tcp_md5_del_listener(conf, p);
278 					log_peer_warnx(&p->conf, "removed");
279 					RB_REMOVE(peer_head, &conf->peers, p);
280 					free(p);
281 					peer_cnt--;
282 					continue;
283 				}
284 				p->reconf_action = RECONF_NONE;
285 			}
286 		}
287 
288 		if (peer_cnt > peer_l_elms) {
289 			if ((newp = reallocarray(peer_l, peer_cnt,
290 			    sizeof(struct peer *))) == NULL) {
291 				/* panic for now  */
292 				log_warn("could not resize peer_l from %u -> %u"
293 				    " entries", peer_l_elms, peer_cnt);
294 				fatalx("exiting");
295 			}
296 			peer_l = newp;
297 			peer_l_elms = peer_cnt;
298 		}
299 
300 		mrt_cnt = 0;
301 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
302 			xm = LIST_NEXT(m, entry);
303 			if (m->state == MRT_STATE_REMOVE) {
304 				mrt_clean(m);
305 				LIST_REMOVE(m, entry);
306 				free(m);
307 				continue;
308 			}
309 			if (m->wbuf.queued)
310 				mrt_cnt++;
311 		}
312 
313 		if (mrt_cnt > mrt_l_elms) {
314 			if ((newp = reallocarray(mrt_l, mrt_cnt,
315 			    sizeof(struct mrt *))) == NULL) {
316 				/* panic for now  */
317 				log_warn("could not resize mrt_l from %u -> %u"
318 				    " entries", mrt_l_elms, mrt_cnt);
319 				fatalx("exiting");
320 			}
321 			mrt_l = newp;
322 			mrt_l_elms = mrt_cnt;
323 		}
324 
325 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
326 		    ctl_cnt + mrt_cnt;
327 		if (new_cnt > pfd_elms) {
328 			if ((newp = reallocarray(pfd, new_cnt,
329 			    sizeof(struct pollfd))) == NULL) {
330 				/* panic for now  */
331 				log_warn("could not resize pfd from %u -> %u"
332 				    " entries", pfd_elms, new_cnt);
333 				fatalx("exiting");
334 			}
335 			pfd = newp;
336 			pfd_elms = new_cnt;
337 		}
338 
339 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
340 
341 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
342 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
343 		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
344 
345 		if (pauseaccept == 0) {
346 			pfd[PFD_SOCK_CTL].fd = csock;
347 			pfd[PFD_SOCK_CTL].events = POLLIN;
348 			pfd[PFD_SOCK_RCTL].fd = rcsock;
349 			pfd[PFD_SOCK_RCTL].events = POLLIN;
350 		} else {
351 			pfd[PFD_SOCK_CTL].fd = -1;
352 			pfd[PFD_SOCK_RCTL].fd = -1;
353 		}
354 
355 		i = PFD_LISTENERS_START;
356 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
357 			if (pauseaccept == 0) {
358 				pfd[i].fd = la->fd;
359 				pfd[i].events = POLLIN;
360 			} else
361 				pfd[i].fd = -1;
362 			i++;
363 		}
364 		idx_listeners = i;
365 		timeout = 240;	/* loop every 240s at least */
366 
367 		now = getmonotime();
368 		RB_FOREACH(p, peer_head, &conf->peers) {
369 			time_t	nextaction;
370 			struct timer *pt;
371 
372 			/* check timers */
373 			if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
374 				switch (pt->type) {
375 				case Timer_Hold:
376 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
377 					break;
378 				case Timer_SendHold:
379 					bgp_fsm(p, EVNT_TIMER_SENDHOLD);
380 					break;
381 				case Timer_ConnectRetry:
382 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
383 					break;
384 				case Timer_Keepalive:
385 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
386 					break;
387 				case Timer_IdleHold:
388 					bgp_fsm(p, EVNT_START);
389 					break;
390 				case Timer_IdleHoldReset:
391 					p->IdleHoldTime =
392 					    INTERVAL_IDLE_HOLD_INITIAL;
393 					p->errcnt = 0;
394 					timer_stop(&p->timers,
395 					    Timer_IdleHoldReset);
396 					break;
397 				case Timer_CarpUndemote:
398 					timer_stop(&p->timers,
399 					    Timer_CarpUndemote);
400 					if (p->demoted &&
401 					    p->state == STATE_ESTABLISHED)
402 						session_demote(p, -1);
403 					break;
404 				case Timer_RestartTimeout:
405 					timer_stop(&p->timers,
406 					    Timer_RestartTimeout);
407 					session_graceful_stop(p);
408 					break;
409 				default:
410 					fatalx("King Bula lost in time");
411 				}
412 			}
413 			if ((nextaction = timer_nextduein(&p->timers,
414 			    now)) != -1 && nextaction < timeout)
415 				timeout = nextaction;
416 
417 			/* are we waiting for a write? */
418 			events = POLLIN;
419 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
420 				events |= POLLOUT;
421 			/* is there still work to do? */
422 			if (p->rpending && p->rbuf && p->rbuf->wpos)
423 				timeout = 0;
424 
425 			/* poll events */
426 			if (p->fd != -1 && events != 0) {
427 				pfd[i].fd = p->fd;
428 				pfd[i].events = events;
429 				peer_l[i - idx_listeners] = p;
430 				i++;
431 			}
432 		}
433 
434 		idx_peers = i;
435 
436 		LIST_FOREACH(m, &mrthead, entry)
437 			if (m->wbuf.queued) {
438 				pfd[i].fd = m->wbuf.fd;
439 				pfd[i].events = POLLOUT;
440 				mrt_l[i - idx_peers] = m;
441 				i++;
442 			}
443 
444 		idx_mrts = i;
445 
446 		i += control_fill_pfds(pfd + i, pfd_elms -i);
447 
448 		if (i > pfd_elms)
449 			fatalx("poll pfd overflow");
450 
451 		if (pauseaccept && timeout > 1)
452 			timeout = 1;
453 		if (timeout < 0)
454 			timeout = 0;
455 		if (poll(pfd, i, timeout * 1000) == -1) {
456 			if (errno == EINTR)
457 				continue;
458 			fatal("poll error");
459 		}
460 
461 		/*
462 		 * If we previously saw fd exhaustion, we stop accept()
463 		 * for 1 second to throttle the accept() loop.
464 		 */
465 		if (pauseaccept && getmonotime() > pauseaccept + 1)
466 			pauseaccept = 0;
467 
468 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
469 			log_warnx("SE: Lost connection to parent");
470 			session_quit = 1;
471 			continue;
472 		} else
473 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
474 			    &listener_cnt);
475 
476 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
477 			log_warnx("SE: Lost connection to RDE");
478 			msgbuf_clear(&ibuf_rde->w);
479 			free(ibuf_rde);
480 			ibuf_rde = NULL;
481 		} else
482 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
483 			    &listener_cnt);
484 
485 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
486 		    -1) {
487 			log_warnx("SE: Lost connection to RDE control");
488 			msgbuf_clear(&ibuf_rde_ctl->w);
489 			free(ibuf_rde_ctl);
490 			ibuf_rde_ctl = NULL;
491 		} else
492 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
493 			    &listener_cnt);
494 
495 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
496 			ctl_cnt += control_accept(csock, 0);
497 
498 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
499 			ctl_cnt += control_accept(rcsock, 1);
500 
501 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
502 			if (pfd[j].revents & POLLIN)
503 				session_accept(pfd[j].fd);
504 
505 		for (; j < idx_peers; j++)
506 			session_dispatch_msg(&pfd[j],
507 			    peer_l[j - idx_listeners]);
508 
509 		RB_FOREACH(p, peer_head, &conf->peers)
510 			if (p->rbuf && p->rbuf->wpos)
511 				session_process_msg(p);
512 
513 		for (; j < idx_mrts; j++)
514 			if (pfd[j].revents & POLLOUT)
515 				mrt_write(mrt_l[j - idx_peers]);
516 
517 		for (; j < i; j++)
518 			ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers);
519 	}
520 
521 	RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
522 		RB_REMOVE(peer_head, &conf->peers, p);
523 		strlcpy(p->conf.reason,
524 		    "bgpd shutting down",
525 		    sizeof(p->conf.reason));
526 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
527 		timer_remove_all(&p->timers);
528 		free(p);
529 	}
530 
531 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
532 		mrt_clean(m);
533 		LIST_REMOVE(m, entry);
534 		free(m);
535 	}
536 
537 	free_config(conf);
538 	free(peer_l);
539 	free(mrt_l);
540 	free(pfd);
541 
542 	/* close pipes */
543 	if (ibuf_rde) {
544 		msgbuf_write(&ibuf_rde->w);
545 		msgbuf_clear(&ibuf_rde->w);
546 		close(ibuf_rde->fd);
547 		free(ibuf_rde);
548 	}
549 	if (ibuf_rde_ctl) {
550 		msgbuf_clear(&ibuf_rde_ctl->w);
551 		close(ibuf_rde_ctl->fd);
552 		free(ibuf_rde_ctl);
553 	}
554 	msgbuf_write(&ibuf_main->w);
555 	msgbuf_clear(&ibuf_main->w);
556 	close(ibuf_main->fd);
557 	free(ibuf_main);
558 
559 	control_shutdown(csock);
560 	control_shutdown(rcsock);
561 	log_info("session engine exiting");
562 	exit(0);
563 }
564 
565 void
566 init_peer(struct peer *p)
567 {
568 	TAILQ_INIT(&p->timers);
569 	p->fd = p->wbuf.fd = -1;
570 
571 	if (p->conf.if_depend[0])
572 		imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1,
573 		    p->conf.if_depend, sizeof(p->conf.if_depend));
574 	else
575 		p->depend_ok = 1;
576 
577 	peer_cnt++;
578 
579 	change_state(p, STATE_IDLE, EVNT_NONE);
580 	if (p->conf.down)
581 		timer_stop(&p->timers, Timer_IdleHold); /* no autostart */
582 	else
583 		timer_set(&p->timers, Timer_IdleHold, 0); /* start ASAP */
584 
585 	/*
586 	 * on startup, demote if requested.
587 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
588 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
589 	 */
590 	if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
591 		session_demote(p, +1);
592 }
593 
594 void
595 bgp_fsm(struct peer *peer, enum session_events event)
596 {
597 	switch (peer->state) {
598 	case STATE_NONE:
599 		/* nothing */
600 		break;
601 	case STATE_IDLE:
602 		switch (event) {
603 		case EVNT_START:
604 			timer_stop(&peer->timers, Timer_Hold);
605 			timer_stop(&peer->timers, Timer_SendHold);
606 			timer_stop(&peer->timers, Timer_Keepalive);
607 			timer_stop(&peer->timers, Timer_IdleHold);
608 
609 			/* allocate read buffer */
610 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
611 			if (peer->rbuf == NULL)
612 				fatal(NULL);
613 
614 			/* init write buffer */
615 			msgbuf_init(&peer->wbuf);
616 
617 			peer->stats.last_sent_errcode = 0;
618 			peer->stats.last_sent_suberr = 0;
619 			peer->stats.last_rcvd_errcode = 0;
620 			peer->stats.last_rcvd_suberr = 0;
621 
622 			if (!peer->depend_ok)
623 				timer_stop(&peer->timers, Timer_ConnectRetry);
624 			else if (peer->passive || peer->conf.passive ||
625 			    peer->conf.template) {
626 				change_state(peer, STATE_ACTIVE, event);
627 				timer_stop(&peer->timers, Timer_ConnectRetry);
628 			} else {
629 				change_state(peer, STATE_CONNECT, event);
630 				timer_set(&peer->timers, Timer_ConnectRetry,
631 				    conf->connectretry);
632 				session_connect(peer);
633 			}
634 			peer->passive = 0;
635 			break;
636 		default:
637 			/* ignore */
638 			break;
639 		}
640 		break;
641 	case STATE_CONNECT:
642 		switch (event) {
643 		case EVNT_START:
644 			/* ignore */
645 			break;
646 		case EVNT_CON_OPEN:
647 			session_tcp_established(peer);
648 			session_open(peer);
649 			timer_stop(&peer->timers, Timer_ConnectRetry);
650 			peer->holdtime = INTERVAL_HOLD_INITIAL;
651 			start_timer_holdtime(peer);
652 			change_state(peer, STATE_OPENSENT, event);
653 			break;
654 		case EVNT_CON_OPENFAIL:
655 			timer_set(&peer->timers, Timer_ConnectRetry,
656 			    conf->connectretry);
657 			session_close_connection(peer);
658 			change_state(peer, STATE_ACTIVE, event);
659 			break;
660 		case EVNT_TIMER_CONNRETRY:
661 			timer_set(&peer->timers, Timer_ConnectRetry,
662 			    conf->connectretry);
663 			session_connect(peer);
664 			break;
665 		default:
666 			change_state(peer, STATE_IDLE, event);
667 			break;
668 		}
669 		break;
670 	case STATE_ACTIVE:
671 		switch (event) {
672 		case EVNT_START:
673 			/* ignore */
674 			break;
675 		case EVNT_CON_OPEN:
676 			session_tcp_established(peer);
677 			session_open(peer);
678 			timer_stop(&peer->timers, Timer_ConnectRetry);
679 			peer->holdtime = INTERVAL_HOLD_INITIAL;
680 			start_timer_holdtime(peer);
681 			change_state(peer, STATE_OPENSENT, event);
682 			break;
683 		case EVNT_CON_OPENFAIL:
684 			timer_set(&peer->timers, Timer_ConnectRetry,
685 			    conf->connectretry);
686 			session_close_connection(peer);
687 			change_state(peer, STATE_ACTIVE, event);
688 			break;
689 		case EVNT_TIMER_CONNRETRY:
690 			timer_set(&peer->timers, Timer_ConnectRetry,
691 			    peer->holdtime);
692 			change_state(peer, STATE_CONNECT, event);
693 			session_connect(peer);
694 			break;
695 		default:
696 			change_state(peer, STATE_IDLE, event);
697 			break;
698 		}
699 		break;
700 	case STATE_OPENSENT:
701 		switch (event) {
702 		case EVNT_START:
703 			/* ignore */
704 			break;
705 		case EVNT_STOP:
706 			change_state(peer, STATE_IDLE, event);
707 			break;
708 		case EVNT_CON_CLOSED:
709 			session_close_connection(peer);
710 			timer_set(&peer->timers, Timer_ConnectRetry,
711 			    conf->connectretry);
712 			change_state(peer, STATE_ACTIVE, event);
713 			break;
714 		case EVNT_CON_FATAL:
715 			change_state(peer, STATE_IDLE, event);
716 			break;
717 		case EVNT_TIMER_HOLDTIME:
718 		case EVNT_TIMER_SENDHOLD:
719 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
720 			    0, NULL, 0);
721 			change_state(peer, STATE_IDLE, event);
722 			break;
723 		case EVNT_RCVD_OPEN:
724 			/* parse_open calls change_state itself on failure */
725 			if (parse_open(peer))
726 				break;
727 			session_keepalive(peer);
728 			change_state(peer, STATE_OPENCONFIRM, event);
729 			break;
730 		case EVNT_RCVD_NOTIFICATION:
731 			if (parse_notification(peer)) {
732 				change_state(peer, STATE_IDLE, event);
733 				/* don't punish, capa negotiation */
734 				timer_set(&peer->timers, Timer_IdleHold, 0);
735 				peer->IdleHoldTime /= 2;
736 			} else
737 				change_state(peer, STATE_IDLE, event);
738 			break;
739 		default:
740 			session_notification(peer,
741 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
742 			change_state(peer, STATE_IDLE, event);
743 			break;
744 		}
745 		break;
746 	case STATE_OPENCONFIRM:
747 		switch (event) {
748 		case EVNT_START:
749 			/* ignore */
750 			break;
751 		case EVNT_STOP:
752 			change_state(peer, STATE_IDLE, event);
753 			break;
754 		case EVNT_CON_CLOSED:
755 		case EVNT_CON_FATAL:
756 			change_state(peer, STATE_IDLE, event);
757 			break;
758 		case EVNT_TIMER_HOLDTIME:
759 		case EVNT_TIMER_SENDHOLD:
760 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
761 			    0, NULL, 0);
762 			change_state(peer, STATE_IDLE, event);
763 			break;
764 		case EVNT_TIMER_KEEPALIVE:
765 			session_keepalive(peer);
766 			break;
767 		case EVNT_RCVD_KEEPALIVE:
768 			start_timer_holdtime(peer);
769 			change_state(peer, STATE_ESTABLISHED, event);
770 			break;
771 		case EVNT_RCVD_NOTIFICATION:
772 			parse_notification(peer);
773 			change_state(peer, STATE_IDLE, event);
774 			break;
775 		default:
776 			session_notification(peer,
777 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
778 			change_state(peer, STATE_IDLE, event);
779 			break;
780 		}
781 		break;
782 	case STATE_ESTABLISHED:
783 		switch (event) {
784 		case EVNT_START:
785 			/* ignore */
786 			break;
787 		case EVNT_STOP:
788 			change_state(peer, STATE_IDLE, event);
789 			break;
790 		case EVNT_CON_CLOSED:
791 		case EVNT_CON_FATAL:
792 			change_state(peer, STATE_IDLE, event);
793 			break;
794 		case EVNT_TIMER_HOLDTIME:
795 		case EVNT_TIMER_SENDHOLD:
796 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
797 			    0, NULL, 0);
798 			change_state(peer, STATE_IDLE, event);
799 			break;
800 		case EVNT_TIMER_KEEPALIVE:
801 			session_keepalive(peer);
802 			break;
803 		case EVNT_RCVD_KEEPALIVE:
804 			start_timer_holdtime(peer);
805 			break;
806 		case EVNT_RCVD_UPDATE:
807 			start_timer_holdtime(peer);
808 			if (parse_update(peer))
809 				change_state(peer, STATE_IDLE, event);
810 			else
811 				start_timer_holdtime(peer);
812 			break;
813 		case EVNT_RCVD_NOTIFICATION:
814 			parse_notification(peer);
815 			change_state(peer, STATE_IDLE, event);
816 			break;
817 		default:
818 			session_notification(peer,
819 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
820 			change_state(peer, STATE_IDLE, event);
821 			break;
822 		}
823 		break;
824 	}
825 }
826 
827 void
828 start_timer_holdtime(struct peer *peer)
829 {
830 	if (peer->holdtime > 0)
831 		timer_set(&peer->timers, Timer_Hold, peer->holdtime);
832 	else
833 		timer_stop(&peer->timers, Timer_Hold);
834 }
835 
836 void
837 start_timer_keepalive(struct peer *peer)
838 {
839 	if (peer->holdtime > 0)
840 		timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3);
841 	else
842 		timer_stop(&peer->timers, Timer_Keepalive);
843 }
844 
845 void
846 session_close_connection(struct peer *peer)
847 {
848 	if (peer->fd != -1) {
849 		close(peer->fd);
850 		pauseaccept = 0;
851 	}
852 	peer->fd = peer->wbuf.fd = -1;
853 }
854 
855 void
856 change_state(struct peer *peer, enum session_state state,
857     enum session_events event)
858 {
859 	struct mrt	*mrt;
860 
861 	switch (state) {
862 	case STATE_IDLE:
863 		/* carp demotion first. new peers handled in init_peer */
864 		if (peer->state == STATE_ESTABLISHED &&
865 		    peer->conf.demote_group[0] && !peer->demoted)
866 			session_demote(peer, +1);
867 
868 		/*
869 		 * try to write out what's buffered (maybe a notification),
870 		 * don't bother if it fails
871 		 */
872 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
873 			msgbuf_write(&peer->wbuf);
874 
875 		/*
876 		 * we must start the timer for the next EVNT_START
877 		 * if we are coming here due to an error and the
878 		 * session was not established successfully before, the
879 		 * starttimerinterval needs to be exponentially increased
880 		 */
881 		if (peer->IdleHoldTime == 0)
882 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
883 		peer->holdtime = INTERVAL_HOLD_INITIAL;
884 		timer_stop(&peer->timers, Timer_ConnectRetry);
885 		timer_stop(&peer->timers, Timer_Keepalive);
886 		timer_stop(&peer->timers, Timer_Hold);
887 		timer_stop(&peer->timers, Timer_SendHold);
888 		timer_stop(&peer->timers, Timer_IdleHold);
889 		timer_stop(&peer->timers, Timer_IdleHoldReset);
890 		session_close_connection(peer);
891 		msgbuf_clear(&peer->wbuf);
892 		free(peer->rbuf);
893 		peer->rbuf = NULL;
894 		peer->rpending = 0;
895 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
896 		if (!peer->template)
897 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
898 			    peer->conf.id, 0, -1, NULL, 0);
899 
900 		if (event != EVNT_STOP) {
901 			timer_set(&peer->timers, Timer_IdleHold,
902 			    peer->IdleHoldTime);
903 			if (event != EVNT_NONE &&
904 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
905 				peer->IdleHoldTime *= 2;
906 		}
907 		if (peer->state == STATE_ESTABLISHED) {
908 			if (peer->capa.neg.grestart.restart == 2 &&
909 			    (event == EVNT_CON_CLOSED ||
910 			    event == EVNT_CON_FATAL)) {
911 				/* don't punish graceful restart */
912 				timer_set(&peer->timers, Timer_IdleHold, 0);
913 				peer->IdleHoldTime /= 2;
914 				session_graceful_restart(peer);
915 			} else
916 				session_down(peer);
917 		}
918 		if (peer->state == STATE_NONE ||
919 		    peer->state == STATE_ESTABLISHED) {
920 			/* initialize capability negotiation structures */
921 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
922 			    sizeof(peer->capa.ann));
923 			if (!peer->conf.announce_capa)
924 				session_capa_ann_none(peer);
925 		}
926 		break;
927 	case STATE_CONNECT:
928 		if (peer->state == STATE_ESTABLISHED &&
929 		    peer->capa.neg.grestart.restart == 2) {
930 			/* do the graceful restart dance */
931 			session_graceful_restart(peer);
932 			peer->holdtime = INTERVAL_HOLD_INITIAL;
933 			timer_stop(&peer->timers, Timer_ConnectRetry);
934 			timer_stop(&peer->timers, Timer_Keepalive);
935 			timer_stop(&peer->timers, Timer_Hold);
936 			timer_stop(&peer->timers, Timer_SendHold);
937 			timer_stop(&peer->timers, Timer_IdleHold);
938 			timer_stop(&peer->timers, Timer_IdleHoldReset);
939 			session_close_connection(peer);
940 			msgbuf_clear(&peer->wbuf);
941 			bzero(&peer->capa.peer, sizeof(peer->capa.peer));
942 		}
943 		break;
944 	case STATE_ACTIVE:
945 		if (!peer->template)
946 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
947 			    peer->conf.id, 0, -1, NULL, 0);
948 		break;
949 	case STATE_OPENSENT:
950 		break;
951 	case STATE_OPENCONFIRM:
952 		break;
953 	case STATE_ESTABLISHED:
954 		timer_set(&peer->timers, Timer_IdleHoldReset,
955 		    peer->IdleHoldTime);
956 		if (peer->demoted)
957 			timer_set(&peer->timers, Timer_CarpUndemote,
958 			    INTERVAL_HOLD_DEMOTED);
959 		session_up(peer);
960 		break;
961 	default:		/* something seriously fucked */
962 		break;
963 	}
964 
965 	log_statechange(peer, state, event);
966 	LIST_FOREACH(mrt, &mrthead, entry) {
967 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
968 			continue;
969 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
970 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
971 		    mrt->group_id == peer->conf.groupid))
972 			mrt_dump_state(mrt, peer->state, state, peer);
973 	}
974 	peer->prev_state = peer->state;
975 	peer->state = state;
976 }
977 
978 void
979 session_accept(int listenfd)
980 {
981 	int			 connfd;
982 	socklen_t		 len;
983 	struct sockaddr_storage	 cliaddr;
984 	struct peer		*p = NULL;
985 
986 	len = sizeof(cliaddr);
987 	if ((connfd = accept4(listenfd,
988 	    (struct sockaddr *)&cliaddr, &len,
989 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
990 		if (errno == ENFILE || errno == EMFILE)
991 			pauseaccept = getmonotime();
992 		else if (errno != EWOULDBLOCK && errno != EINTR &&
993 		    errno != ECONNABORTED)
994 			log_warn("accept");
995 		return;
996 	}
997 
998 	p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
999 
1000 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1001 		if (timer_running(&p->timers, Timer_IdleHold, NULL)) {
1002 			/* fast reconnect after clear */
1003 			p->passive = 1;
1004 			bgp_fsm(p, EVNT_START);
1005 		}
1006 	}
1007 
1008 	if (p != NULL &&
1009 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1010 		if (p->fd != -1) {
1011 			if (p->state == STATE_CONNECT)
1012 				session_close_connection(p);
1013 			else {
1014 				close(connfd);
1015 				return;
1016 			}
1017 		}
1018 
1019 open:
1020 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1021 			log_peer_warnx(&p->conf,
1022 			    "ipsec or md5sig configured but not available");
1023 			close(connfd);
1024 			return;
1025 		}
1026 
1027 		if (tcp_md5_check(connfd, p) == -1) {
1028 			close(connfd);
1029 			return;
1030 		}
1031 		p->fd = p->wbuf.fd = connfd;
1032 		if (session_setup_socket(p)) {
1033 			close(connfd);
1034 			return;
1035 		}
1036 		bgp_fsm(p, EVNT_CON_OPEN);
1037 		return;
1038 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1039 	    p->capa.neg.grestart.restart == 2) {
1040 		/* first do the graceful restart dance */
1041 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1042 		/* then do part of the open dance */
1043 		goto open;
1044 	} else {
1045 		log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1046 		close(connfd);
1047 	}
1048 }
1049 
1050 int
1051 session_connect(struct peer *peer)
1052 {
1053 	struct sockaddr		*sa;
1054 	struct bgpd_addr	*bind_addr = NULL;
1055 	socklen_t		 sa_len;
1056 
1057 	/*
1058 	 * we do not need the overcomplicated collision detection RFC 1771
1059 	 * describes; we simply make sure there is only ever one concurrent
1060 	 * tcp connection per peer.
1061 	 */
1062 	if (peer->fd != -1)
1063 		return (-1);
1064 
1065 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1066 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1067 		log_peer_warn(&peer->conf, "session_connect socket");
1068 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1069 		return (-1);
1070 	}
1071 
1072 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1073 		log_peer_warnx(&peer->conf,
1074 		    "ipsec or md5sig configured but not available");
1075 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1076 		return (-1);
1077 	}
1078 
1079 	tcp_md5_set(peer->fd, peer);
1080 	peer->wbuf.fd = peer->fd;
1081 
1082 	/* if local-address is set we need to bind() */
1083 	switch (peer->conf.remote_addr.aid) {
1084 	case AID_INET:
1085 		bind_addr = &peer->conf.local_addr_v4;
1086 		break;
1087 	case AID_INET6:
1088 		bind_addr = &peer->conf.local_addr_v6;
1089 		break;
1090 	}
1091 	if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) {
1092 		if (bind(peer->fd, sa, sa_len) == -1) {
1093 			log_peer_warn(&peer->conf, "session_connect bind");
1094 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1095 			return (-1);
1096 		}
1097 	}
1098 
1099 	if (session_setup_socket(peer)) {
1100 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1101 		return (-1);
1102 	}
1103 
1104 	sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len);
1105 	if (connect(peer->fd, sa, sa_len) == -1) {
1106 		if (errno != EINPROGRESS) {
1107 			if (errno != peer->lasterr)
1108 				log_peer_warn(&peer->conf, "connect");
1109 			peer->lasterr = errno;
1110 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1111 			return (-1);
1112 		}
1113 	} else
1114 		bgp_fsm(peer, EVNT_CON_OPEN);
1115 
1116 	return (0);
1117 }
1118 
1119 int
1120 session_setup_socket(struct peer *p)
1121 {
1122 	int	ttl = p->conf.distance;
1123 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1124 	int	nodelay = 1;
1125 	int	bsize;
1126 
1127 	switch (p->conf.remote_addr.aid) {
1128 	case AID_INET:
1129 		/* set precedence, see RFC 1771 appendix 5 */
1130 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1131 		    -1) {
1132 			log_peer_warn(&p->conf,
1133 			    "session_setup_socket setsockopt TOS");
1134 			return (-1);
1135 		}
1136 
1137 		if (p->conf.ebgp) {
1138 			/*
1139 			 * set TTL to foreign router's distance
1140 			 * 1=direct n=multihop with ttlsec, we always use 255
1141 			 */
1142 			if (p->conf.ttlsec) {
1143 				ttl = 256 - p->conf.distance;
1144 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1145 				    &ttl, sizeof(ttl)) == -1) {
1146 					log_peer_warn(&p->conf,
1147 					    "session_setup_socket: "
1148 					    "setsockopt MINTTL");
1149 					return (-1);
1150 				}
1151 				ttl = 255;
1152 			}
1153 
1154 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1155 			    sizeof(ttl)) == -1) {
1156 				log_peer_warn(&p->conf,
1157 				    "session_setup_socket setsockopt TTL");
1158 				return (-1);
1159 			}
1160 		}
1161 		break;
1162 	case AID_INET6:
1163 		if (p->conf.ebgp) {
1164 			/*
1165 			 * set hoplimit to foreign router's distance
1166 			 * 1=direct n=multihop with ttlsec, we always use 255
1167 			 */
1168 			if (p->conf.ttlsec) {
1169 				ttl = 256 - p->conf.distance;
1170 				if (setsockopt(p->fd, IPPROTO_IPV6,
1171 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1172 				    == -1) {
1173 					log_peer_warn(&p->conf,
1174 					    "session_setup_socket: "
1175 					    "setsockopt MINHOPCOUNT");
1176 					return (-1);
1177 				}
1178 				ttl = 255;
1179 			}
1180 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1181 			    &ttl, sizeof(ttl)) == -1) {
1182 				log_peer_warn(&p->conf,
1183 				    "session_setup_socket setsockopt hoplimit");
1184 				return (-1);
1185 			}
1186 		}
1187 		break;
1188 	}
1189 
1190 	/* set TCP_NODELAY */
1191 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1192 	    sizeof(nodelay)) == -1) {
1193 		log_peer_warn(&p->conf,
1194 		    "session_setup_socket setsockopt TCP_NODELAY");
1195 		return (-1);
1196 	}
1197 
1198 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1199 	if (p->conf.auth.method != AUTH_NONE) {
1200 		/* try to increase bufsize. no biggie if it fails */
1201 		bsize = 65535;
1202 		while (bsize > 8192 &&
1203 		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1204 		    sizeof(bsize)) == -1 && errno != EINVAL)
1205 			bsize /= 2;
1206 		bsize = 65535;
1207 		while (bsize > 8192 &&
1208 		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1209 		    sizeof(bsize)) == -1 && errno != EINVAL)
1210 			bsize /= 2;
1211 	}
1212 
1213 	return (0);
1214 }
1215 
1216 /* compare two sockaddrs by converting them into bgpd_addr */
1217 static int
1218 sa_cmp(struct sockaddr *a, struct sockaddr *b)
1219 {
1220 	struct bgpd_addr ba, bb;
1221 
1222 	sa2addr(a, &ba, NULL);
1223 	sa2addr(b, &bb, NULL);
1224 
1225 	return (memcmp(&ba, &bb, sizeof(ba)) == 0);
1226 }
1227 
1228 static void
1229 get_alternate_addr(struct sockaddr *sa, struct bgpd_addr *alt)
1230 {
1231 	struct ifaddrs	*ifap, *ifa, *match;
1232 
1233 	if (getifaddrs(&ifap) == -1)
1234 		fatal("getifaddrs");
1235 
1236 	for (match = ifap; match != NULL; match = match->ifa_next)
1237 		if (match->ifa_addr != NULL &&
1238 		    sa_cmp(sa, match->ifa_addr) == 0)
1239 			break;
1240 
1241 	if (match == NULL) {
1242 		log_warnx("%s: local address not found", __func__);
1243 		return;
1244 	}
1245 
1246 	switch (sa->sa_family) {
1247 	case AF_INET6:
1248 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1249 			if (ifa->ifa_addr != NULL &&
1250 			    ifa->ifa_addr->sa_family == AF_INET &&
1251 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1252 				sa2addr(ifa->ifa_addr, alt, NULL);
1253 				break;
1254 			}
1255 		}
1256 		break;
1257 	case AF_INET:
1258 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1259 			if (ifa->ifa_addr != NULL &&
1260 			    ifa->ifa_addr->sa_family == AF_INET6 &&
1261 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1262 				struct sockaddr_in6 *s =
1263 				    (struct sockaddr_in6 *)ifa->ifa_addr;
1264 
1265 				/* only accept global scope addresses */
1266 				if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
1267 				    IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))
1268 					continue;
1269 				sa2addr(ifa->ifa_addr, alt, NULL);
1270 				break;
1271 			}
1272 		}
1273 		break;
1274 	default:
1275 		log_warnx("%s: unsupported address family %d", __func__,
1276 		    sa->sa_family);
1277 		break;
1278 	}
1279 
1280 	freeifaddrs(ifap);
1281 }
1282 
1283 void
1284 session_tcp_established(struct peer *peer)
1285 {
1286 	struct sockaddr_storage	ss;
1287 	socklen_t		len;
1288 
1289 	len = sizeof(ss);
1290 	if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1291 		log_warn("getsockname");
1292 	sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1293 	get_alternate_addr((struct sockaddr *)&ss, &peer->local_alt);
1294 	len = sizeof(ss);
1295 	if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1296 		log_warn("getpeername");
1297 	sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1298 }
1299 
1300 void
1301 session_capa_ann_none(struct peer *peer)
1302 {
1303 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1304 }
1305 
1306 int
1307 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len)
1308 {
1309 	int errs = 0;
1310 
1311 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1312 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1313 	return (errs);
1314 }
1315 
1316 int
1317 session_capa_add_mp(struct ibuf *buf, uint8_t aid)
1318 {
1319 	uint8_t			 safi, pad = 0;
1320 	uint16_t		 afi;
1321 	int			 errs = 0;
1322 
1323 	if (aid2afi(aid, &afi, &safi) == -1)
1324 		fatalx("session_capa_add_mp: bad afi/safi pair");
1325 	afi = htons(afi);
1326 	errs += ibuf_add(buf, &afi, sizeof(afi));
1327 	errs += ibuf_add(buf, &pad, sizeof(pad));
1328 	errs += ibuf_add(buf, &safi, sizeof(safi));
1329 
1330 	return (errs);
1331 }
1332 
1333 int
1334 session_capa_add_afi(struct peer *p, struct ibuf *b, uint8_t aid,
1335     uint8_t flags)
1336 {
1337 	u_int		errs = 0;
1338 	uint16_t	afi;
1339 	uint8_t		safi;
1340 
1341 	if (aid2afi(aid, &afi, &safi)) {
1342 		log_warn("session_capa_add_afi: bad AID");
1343 		return (1);
1344 	}
1345 
1346 	afi = htons(afi);
1347 	errs += ibuf_add(b, &afi, sizeof(afi));
1348 	errs += ibuf_add(b, &safi, sizeof(safi));
1349 	errs += ibuf_add(b, &flags, sizeof(flags));
1350 
1351 	return (errs);
1352 }
1353 
1354 struct bgp_msg *
1355 session_newmsg(enum msg_type msgtype, uint16_t len)
1356 {
1357 	struct bgp_msg		*msg;
1358 	struct msg_header	 hdr;
1359 	struct ibuf		*buf;
1360 	int			 errs = 0;
1361 
1362 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1363 	hdr.len = htons(len);
1364 	hdr.type = msgtype;
1365 
1366 	if ((buf = ibuf_open(len)) == NULL)
1367 		return (NULL);
1368 
1369 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1370 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1371 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1372 
1373 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1374 		ibuf_free(buf);
1375 		return (NULL);
1376 	}
1377 
1378 	msg->buf = buf;
1379 	msg->type = msgtype;
1380 	msg->len = len;
1381 
1382 	return (msg);
1383 }
1384 
1385 int
1386 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1387 {
1388 	struct mrt		*mrt;
1389 
1390 	LIST_FOREACH(mrt, &mrthead, entry) {
1391 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1392 		    mrt->type == MRT_UPDATE_OUT)))
1393 			continue;
1394 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1395 		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1396 		    mrt->group_id == p->conf.groupid))
1397 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p,
1398 			    msg->type);
1399 	}
1400 
1401 	ibuf_close(&p->wbuf, msg->buf);
1402 	if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) {
1403 		if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1404 			log_peer_warn(&p->conf, "imsg_compose XOFF");
1405 		else
1406 			p->throttled = 1;
1407 	}
1408 
1409 	free(msg);
1410 	return (0);
1411 }
1412 
1413 void
1414 session_open(struct peer *p)
1415 {
1416 	struct bgp_msg		*buf;
1417 	struct ibuf		*opb;
1418 	struct msg_open		 msg;
1419 	uint16_t		 len, optparamlen = 0;
1420 	uint8_t			 i, op_type;
1421 	int			 errs = 0, extlen = 0;
1422 	int			 mpcapa = 0;
1423 
1424 
1425 	if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) {
1426 		bgp_fsm(p, EVNT_CON_FATAL);
1427 		return;
1428 	}
1429 
1430 	/* multiprotocol extensions, RFC 4760 */
1431 	for (i = 0; i < AID_MAX; i++)
1432 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1433 			errs += session_capa_add(opb, CAPA_MP, 4);
1434 			errs += session_capa_add_mp(opb, i);
1435 			mpcapa++;
1436 		}
1437 
1438 	/* route refresh, RFC 2918 */
1439 	if (p->capa.ann.refresh)	/* no data */
1440 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1441 
1442 	/* BGP open policy, RFC 9234 */
1443 	if (p->capa.ann.role_ena) {
1444 		errs += session_capa_add(opb, CAPA_ROLE, 1);
1445 		errs += ibuf_add(opb, &p->capa.ann.role, 1);
1446 	}
1447 
1448 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1449 	if (p->capa.ann.grestart.restart) {
1450 		int		rst = 0;
1451 		uint16_t	hdr = 0;
1452 
1453 		for (i = 0; i < AID_MAX; i++) {
1454 			if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING)
1455 				rst++;
1456 		}
1457 
1458 		/* Only set the R-flag if no graceful restart is ongoing */
1459 		if (!rst)
1460 			hdr |= CAPA_GR_R_FLAG;
1461 		hdr = htons(hdr);
1462 
1463 		errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
1464 		errs += ibuf_add(opb, &hdr, sizeof(hdr));
1465 	}
1466 
1467 	/* 4-bytes AS numbers, RFC6793 */
1468 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1469 		uint32_t	nas;
1470 
1471 		nas = htonl(p->conf.local_as);
1472 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1473 		errs += ibuf_add(opb, &nas, sizeof(nas));
1474 	}
1475 
1476 	/* advertisement of multiple paths, RFC7911 */
1477 	if (p->capa.ann.add_path[0]) {	/* variable */
1478 		uint8_t	aplen;
1479 
1480 		if (mpcapa)
1481 			aplen = 4 * mpcapa;
1482 		else	/* AID_INET */
1483 			aplen = 4;
1484 		errs += session_capa_add(opb, CAPA_ADD_PATH, aplen);
1485 		if (mpcapa) {
1486 			for (i = AID_MIN; i < AID_MAX; i++) {
1487 				if (p->capa.ann.mp[i]) {
1488 					errs += session_capa_add_afi(p, opb,
1489 					    i, p->capa.ann.add_path[i]);
1490 				}
1491 			}
1492 		} else {	/* AID_INET */
1493 			errs += session_capa_add_afi(p, opb, AID_INET,
1494 			    p->capa.ann.add_path[AID_INET]);
1495 		}
1496 	}
1497 
1498 	/* enhanced route-refresh, RFC7313 */
1499 	if (p->capa.ann.enhanced_rr)	/* no data */
1500 		errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0);
1501 
1502 	optparamlen = ibuf_size(opb);
1503 	if (optparamlen == 0) {
1504 		/* nothing */
1505 	} else if (optparamlen + 2 >= 255) {
1506 		/* RFC9072: 2 byte lenght instead of 1 + 3 byte extra header */
1507 		optparamlen += sizeof(op_type) + 2 + 3;
1508 		msg.optparamlen = 255;
1509 		extlen = 1;
1510 	} else {
1511 		optparamlen += sizeof(op_type) + 1;
1512 		msg.optparamlen = optparamlen;
1513 	}
1514 
1515 	len = MSGSIZE_OPEN_MIN + optparamlen;
1516 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1517 		ibuf_free(opb);
1518 		bgp_fsm(p, EVNT_CON_FATAL);
1519 		return;
1520 	}
1521 
1522 	msg.version = 4;
1523 	msg.myas = htons(p->conf.local_short_as);
1524 	if (p->conf.holdtime)
1525 		msg.holdtime = htons(p->conf.holdtime);
1526 	else
1527 		msg.holdtime = htons(conf->holdtime);
1528 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1529 
1530 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1531 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1532 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1533 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1534 	errs += ibuf_add(buf->buf, &msg.optparamlen, 1);
1535 
1536 	if (extlen) {
1537 		/* write RFC9072 extra header */
1538 		uint16_t op_extlen = htons(optparamlen - 3);
1539 		op_type = OPT_PARAM_EXT_LEN;
1540 		errs += ibuf_add(buf->buf, &op_type, 1);
1541 		errs += ibuf_add(buf->buf, &op_extlen, 2);
1542 	}
1543 
1544 	if (optparamlen) {
1545 		op_type = OPT_PARAM_CAPABILITIES;
1546 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1547 
1548 		optparamlen = ibuf_size(opb);
1549 		if (extlen) {
1550 			/* RFC9072: 2-byte extended length */
1551 			uint16_t op_extlen = htons(optparamlen);
1552 			errs += ibuf_add(buf->buf, &op_extlen, 2);
1553 		} else {
1554 			uint8_t op_len = optparamlen;
1555 			errs += ibuf_add(buf->buf, &op_len, 1);
1556 		}
1557 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1558 	}
1559 
1560 	ibuf_free(opb);
1561 
1562 	if (errs) {
1563 		ibuf_free(buf->buf);
1564 		free(buf);
1565 		bgp_fsm(p, EVNT_CON_FATAL);
1566 		return;
1567 	}
1568 
1569 	if (session_sendmsg(buf, p) == -1) {
1570 		bgp_fsm(p, EVNT_CON_FATAL);
1571 		return;
1572 	}
1573 
1574 	p->stats.msg_sent_open++;
1575 }
1576 
1577 void
1578 session_keepalive(struct peer *p)
1579 {
1580 	struct bgp_msg		*buf;
1581 
1582 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1583 	    session_sendmsg(buf, p) == -1) {
1584 		bgp_fsm(p, EVNT_CON_FATAL);
1585 		return;
1586 	}
1587 
1588 	start_timer_keepalive(p);
1589 	p->stats.msg_sent_keepalive++;
1590 }
1591 
1592 void
1593 session_update(uint32_t peerid, void *data, size_t datalen)
1594 {
1595 	struct peer		*p;
1596 	struct bgp_msg		*buf;
1597 
1598 	if ((p = getpeerbyid(conf, peerid)) == NULL) {
1599 		log_warnx("no such peer: id=%u", peerid);
1600 		return;
1601 	}
1602 
1603 	if (p->state != STATE_ESTABLISHED)
1604 		return;
1605 
1606 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1607 		bgp_fsm(p, EVNT_CON_FATAL);
1608 		return;
1609 	}
1610 
1611 	if (ibuf_add(buf->buf, data, datalen)) {
1612 		ibuf_free(buf->buf);
1613 		free(buf);
1614 		bgp_fsm(p, EVNT_CON_FATAL);
1615 		return;
1616 	}
1617 
1618 	if (session_sendmsg(buf, p) == -1) {
1619 		bgp_fsm(p, EVNT_CON_FATAL);
1620 		return;
1621 	}
1622 
1623 	start_timer_keepalive(p);
1624 	p->stats.msg_sent_update++;
1625 }
1626 
1627 void
1628 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode,
1629     void *data, ssize_t datalen)
1630 {
1631 	struct bgp_msg		*buf;
1632 	int			 errs = 0;
1633 
1634 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1635 		return;
1636 
1637 	log_notification(p, errcode, subcode, data, datalen, "sending");
1638 
1639 	/* cap to maximum size */
1640 	if (datalen > MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) {
1641 		log_peer_warnx(&p->conf,
1642 		    "oversized notification, data trunkated");
1643 		datalen = MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN;
1644 	}
1645 
1646 	if ((buf = session_newmsg(NOTIFICATION,
1647 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1648 		bgp_fsm(p, EVNT_CON_FATAL);
1649 		return;
1650 	}
1651 
1652 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1653 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1654 
1655 	if (datalen > 0)
1656 		errs += ibuf_add(buf->buf, data, datalen);
1657 
1658 	if (errs) {
1659 		ibuf_free(buf->buf);
1660 		free(buf);
1661 		bgp_fsm(p, EVNT_CON_FATAL);
1662 		return;
1663 	}
1664 
1665 	if (session_sendmsg(buf, p) == -1) {
1666 		bgp_fsm(p, EVNT_CON_FATAL);
1667 		return;
1668 	}
1669 
1670 	p->stats.msg_sent_notification++;
1671 	p->stats.last_sent_errcode = errcode;
1672 	p->stats.last_sent_suberr = subcode;
1673 }
1674 
1675 int
1676 session_neighbor_rrefresh(struct peer *p)
1677 {
1678 	uint8_t	i;
1679 
1680 	if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr))
1681 		return (-1);
1682 
1683 	for (i = 0; i < AID_MAX; i++) {
1684 		if (p->capa.neg.mp[i] != 0)
1685 			session_rrefresh(p, i, ROUTE_REFRESH_REQUEST);
1686 	}
1687 
1688 	return (0);
1689 }
1690 
1691 void
1692 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype)
1693 {
1694 	struct bgp_msg		*buf;
1695 	int			 errs = 0;
1696 	uint16_t		 afi;
1697 	uint8_t			 safi;
1698 
1699 	switch (subtype) {
1700 	case ROUTE_REFRESH_REQUEST:
1701 		p->stats.refresh_sent_req++;
1702 		break;
1703 	case ROUTE_REFRESH_BEGIN_RR:
1704 	case ROUTE_REFRESH_END_RR:
1705 		/* requires enhanced route refresh */
1706 		if (!p->capa.neg.enhanced_rr)
1707 			return;
1708 		if (subtype == ROUTE_REFRESH_BEGIN_RR)
1709 			p->stats.refresh_sent_borr++;
1710 		else
1711 			p->stats.refresh_sent_eorr++;
1712 		break;
1713 	default:
1714 		fatalx("session_rrefresh: bad subtype %d", subtype);
1715 	}
1716 
1717 	if (aid2afi(aid, &afi, &safi) == -1)
1718 		fatalx("session_rrefresh: bad afi/safi pair");
1719 
1720 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1721 		bgp_fsm(p, EVNT_CON_FATAL);
1722 		return;
1723 	}
1724 
1725 	afi = htons(afi);
1726 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1727 	errs += ibuf_add(buf->buf, &subtype, sizeof(subtype));
1728 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1729 
1730 	if (errs) {
1731 		ibuf_free(buf->buf);
1732 		free(buf);
1733 		bgp_fsm(p, EVNT_CON_FATAL);
1734 		return;
1735 	}
1736 
1737 	if (session_sendmsg(buf, p) == -1) {
1738 		bgp_fsm(p, EVNT_CON_FATAL);
1739 		return;
1740 	}
1741 
1742 	p->stats.msg_sent_rrefresh++;
1743 }
1744 
1745 int
1746 session_graceful_restart(struct peer *p)
1747 {
1748 	uint8_t	i;
1749 
1750 	timer_set(&p->timers, Timer_RestartTimeout,
1751 	    p->capa.neg.grestart.timeout);
1752 
1753 	for (i = 0; i < AID_MAX; i++) {
1754 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1755 			if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1756 			    &i, sizeof(i)) == -1)
1757 				return (-1);
1758 			log_peer_warnx(&p->conf,
1759 			    "graceful restart of %s, keeping routes",
1760 			    aid2str(i));
1761 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1762 		} else if (p->capa.neg.mp[i]) {
1763 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1764 			    &i, sizeof(i)) == -1)
1765 				return (-1);
1766 			log_peer_warnx(&p->conf,
1767 			    "graceful restart of %s, flushing routes",
1768 			    aid2str(i));
1769 		}
1770 	}
1771 	return (0);
1772 }
1773 
1774 int
1775 session_graceful_stop(struct peer *p)
1776 {
1777 	uint8_t	i;
1778 
1779 	for (i = 0; i < AID_MAX; i++) {
1780 		/*
1781 		 * Only flush if the peer is restarting and the timeout fired.
1782 		 * In all other cases the session was already flushed when the
1783 		 * session went down or when the new open message was parsed.
1784 		 */
1785 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1786 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1787 			    "time-out, flushing", aid2str(i));
1788 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1789 			    &i, sizeof(i)) == -1)
1790 				return (-1);
1791 		}
1792 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1793 	}
1794 	return (0);
1795 }
1796 
1797 int
1798 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1799 {
1800 	ssize_t		n;
1801 	socklen_t	len;
1802 	int		error;
1803 
1804 	if (p->state == STATE_CONNECT) {
1805 		if (pfd->revents & POLLOUT) {
1806 			if (pfd->revents & POLLIN) {
1807 				/* error occurred */
1808 				len = sizeof(error);
1809 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1810 				    &error, &len) == -1 || error) {
1811 					if (error)
1812 						errno = error;
1813 					if (errno != p->lasterr) {
1814 						log_peer_warn(&p->conf,
1815 						    "socket error");
1816 						p->lasterr = errno;
1817 					}
1818 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1819 					return (1);
1820 				}
1821 			}
1822 			bgp_fsm(p, EVNT_CON_OPEN);
1823 			return (1);
1824 		}
1825 		if (pfd->revents & POLLHUP) {
1826 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1827 			return (1);
1828 		}
1829 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1830 			bgp_fsm(p, EVNT_CON_FATAL);
1831 			return (1);
1832 		}
1833 		return (0);
1834 	}
1835 
1836 	if (pfd->revents & POLLHUP) {
1837 		bgp_fsm(p, EVNT_CON_CLOSED);
1838 		return (1);
1839 	}
1840 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1841 		bgp_fsm(p, EVNT_CON_FATAL);
1842 		return (1);
1843 	}
1844 
1845 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1846 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1847 			if (error == 0)
1848 				log_peer_warnx(&p->conf, "Connection closed");
1849 			else if (error == -1)
1850 				log_peer_warn(&p->conf, "write error");
1851 			bgp_fsm(p, EVNT_CON_FATAL);
1852 			return (1);
1853 		}
1854 		p->stats.last_write = getmonotime();
1855 		if (p->holdtime > 0)
1856 			timer_set(&p->timers, Timer_SendHold,
1857 			    p->holdtime < INTERVAL_HOLD ? INTERVAL_HOLD :
1858 			    p->holdtime);
1859 		if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
1860 			if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
1861 				log_peer_warn(&p->conf, "imsg_compose XON");
1862 			else
1863 				p->throttled = 0;
1864 		}
1865 		if (!(pfd->revents & POLLIN))
1866 			return (1);
1867 	}
1868 
1869 	if (p->rbuf && pfd->revents & POLLIN) {
1870 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1871 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1872 			if (errno != EINTR && errno != EAGAIN) {
1873 				log_peer_warn(&p->conf, "read error");
1874 				bgp_fsm(p, EVNT_CON_FATAL);
1875 			}
1876 			return (1);
1877 		}
1878 		if (n == 0) {	/* connection closed */
1879 			bgp_fsm(p, EVNT_CON_CLOSED);
1880 			return (1);
1881 		}
1882 
1883 		p->rbuf->wpos += n;
1884 		p->stats.last_read = getmonotime();
1885 		return (1);
1886 	}
1887 	return (0);
1888 }
1889 
1890 void
1891 session_process_msg(struct peer *p)
1892 {
1893 	struct mrt	*mrt;
1894 	ssize_t		rpos, av, left;
1895 	int		processed = 0;
1896 	uint16_t	msglen;
1897 	uint8_t		msgtype;
1898 
1899 	rpos = 0;
1900 	av = p->rbuf->wpos;
1901 	p->rpending = 0;
1902 
1903 	/*
1904 	 * session might drop to IDLE -> buffers deallocated
1905 	 * we MUST check rbuf != NULL before use
1906 	 */
1907 	for (;;) {
1908 		if (p->rbuf == NULL)
1909 			return;
1910 		if (rpos + MSGSIZE_HEADER > av)
1911 			break;
1912 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1913 		    &msgtype) == -1)
1914 			return;
1915 		if (rpos + msglen > av)
1916 			break;
1917 		p->rbuf->rptr = p->rbuf->buf + rpos;
1918 
1919 		/* dump to MRT as soon as we have a full packet */
1920 		LIST_FOREACH(mrt, &mrthead, entry) {
1921 			if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE &&
1922 			    mrt->type == MRT_UPDATE_IN)))
1923 				continue;
1924 			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1925 			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1926 			    mrt->group_id == p->conf.groupid))
1927 				mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p,
1928 				    msgtype);
1929 		}
1930 
1931 		switch (msgtype) {
1932 		case OPEN:
1933 			bgp_fsm(p, EVNT_RCVD_OPEN);
1934 			p->stats.msg_rcvd_open++;
1935 			break;
1936 		case UPDATE:
1937 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1938 			p->stats.msg_rcvd_update++;
1939 			break;
1940 		case NOTIFICATION:
1941 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1942 			p->stats.msg_rcvd_notification++;
1943 			break;
1944 		case KEEPALIVE:
1945 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1946 			p->stats.msg_rcvd_keepalive++;
1947 			break;
1948 		case RREFRESH:
1949 			parse_rrefresh(p);
1950 			p->stats.msg_rcvd_rrefresh++;
1951 			break;
1952 		default:	/* cannot happen */
1953 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
1954 			    &msgtype, 1);
1955 			log_warnx("received message with unknown type %u",
1956 			    msgtype);
1957 			bgp_fsm(p, EVNT_CON_FATAL);
1958 		}
1959 		rpos += msglen;
1960 		if (++processed > MSG_PROCESS_LIMIT) {
1961 			p->rpending = 1;
1962 			break;
1963 		}
1964 	}
1965 
1966 	if (rpos < av) {
1967 		left = av - rpos;
1968 		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1969 		p->rbuf->wpos = left;
1970 	} else
1971 		p->rbuf->wpos = 0;
1972 }
1973 
1974 int
1975 parse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type)
1976 {
1977 	u_char			*p;
1978 	uint16_t		 olen;
1979 	static const uint8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1980 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1981 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1982 
1983 	/* caller MUST make sure we are getting 19 bytes! */
1984 	p = data;
1985 	if (memcmp(p, marker, sizeof(marker))) {
1986 		log_peer_warnx(&peer->conf, "sync error");
1987 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1988 		bgp_fsm(peer, EVNT_CON_FATAL);
1989 		return (-1);
1990 	}
1991 	p += MSGSIZE_HEADER_MARKER;
1992 
1993 	memcpy(&olen, p, 2);
1994 	*len = ntohs(olen);
1995 	p += 2;
1996 	memcpy(type, p, 1);
1997 
1998 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1999 		log_peer_warnx(&peer->conf,
2000 		    "received message: illegal length: %u byte", *len);
2001 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2002 		    &olen, sizeof(olen));
2003 		bgp_fsm(peer, EVNT_CON_FATAL);
2004 		return (-1);
2005 	}
2006 
2007 	switch (*type) {
2008 	case OPEN:
2009 		if (*len < MSGSIZE_OPEN_MIN) {
2010 			log_peer_warnx(&peer->conf,
2011 			    "received OPEN: illegal len: %u byte", *len);
2012 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2013 			    &olen, sizeof(olen));
2014 			bgp_fsm(peer, EVNT_CON_FATAL);
2015 			return (-1);
2016 		}
2017 		break;
2018 	case NOTIFICATION:
2019 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
2020 			log_peer_warnx(&peer->conf,
2021 			    "received NOTIFICATION: illegal len: %u byte",
2022 			    *len);
2023 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2024 			    &olen, sizeof(olen));
2025 			bgp_fsm(peer, EVNT_CON_FATAL);
2026 			return (-1);
2027 		}
2028 		break;
2029 	case UPDATE:
2030 		if (*len < MSGSIZE_UPDATE_MIN) {
2031 			log_peer_warnx(&peer->conf,
2032 			    "received UPDATE: illegal len: %u byte", *len);
2033 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2034 			    &olen, sizeof(olen));
2035 			bgp_fsm(peer, EVNT_CON_FATAL);
2036 			return (-1);
2037 		}
2038 		break;
2039 	case KEEPALIVE:
2040 		if (*len != MSGSIZE_KEEPALIVE) {
2041 			log_peer_warnx(&peer->conf,
2042 			    "received KEEPALIVE: illegal len: %u byte", *len);
2043 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2044 			    &olen, sizeof(olen));
2045 			bgp_fsm(peer, EVNT_CON_FATAL);
2046 			return (-1);
2047 		}
2048 		break;
2049 	case RREFRESH:
2050 		if (*len < MSGSIZE_RREFRESH_MIN) {
2051 			log_peer_warnx(&peer->conf,
2052 			    "received RREFRESH: illegal len: %u byte", *len);
2053 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
2054 			    &olen, sizeof(olen));
2055 			bgp_fsm(peer, EVNT_CON_FATAL);
2056 			return (-1);
2057 		}
2058 		break;
2059 	default:
2060 		log_peer_warnx(&peer->conf,
2061 		    "received msg with unknown type %u", *type);
2062 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
2063 		    type, 1);
2064 		bgp_fsm(peer, EVNT_CON_FATAL);
2065 		return (-1);
2066 	}
2067 	return (0);
2068 }
2069 
2070 int
2071 parse_open(struct peer *peer)
2072 {
2073 	u_char		*p, *op_val;
2074 	uint8_t		 version, rversion;
2075 	uint16_t	 short_as, msglen;
2076 	uint16_t	 holdtime, oholdtime, myholdtime;
2077 	uint32_t	 as, bgpid;
2078 	uint16_t	 optparamlen, extlen, plen, op_len;
2079 	uint8_t		 op_type, suberr = 0;
2080 
2081 	p = peer->rbuf->rptr;
2082 	p += MSGSIZE_HEADER_MARKER;
2083 	memcpy(&msglen, p, sizeof(msglen));
2084 	msglen = ntohs(msglen);
2085 
2086 	p = peer->rbuf->rptr;
2087 	p += MSGSIZE_HEADER;	/* header is already checked */
2088 
2089 	memcpy(&version, p, sizeof(version));
2090 	p += sizeof(version);
2091 
2092 	if (version != BGP_VERSION) {
2093 		log_peer_warnx(&peer->conf,
2094 		    "peer wants unrecognized version %u", version);
2095 		if (version > BGP_VERSION)
2096 			rversion = version - BGP_VERSION;
2097 		else
2098 			rversion = BGP_VERSION;
2099 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
2100 		    &rversion, sizeof(rversion));
2101 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2102 		return (-1);
2103 	}
2104 
2105 	memcpy(&short_as, p, sizeof(short_as));
2106 	p += sizeof(short_as);
2107 	as = peer->short_as = ntohs(short_as);
2108 	if (as == 0) {
2109 		log_peer_warnx(&peer->conf,
2110 		    "peer requests unacceptable AS %u", as);
2111 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS,
2112 		    NULL, 0);
2113 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2114 		return (-1);
2115 	}
2116 
2117 	memcpy(&oholdtime, p, sizeof(oholdtime));
2118 	p += sizeof(oholdtime);
2119 
2120 	holdtime = ntohs(oholdtime);
2121 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2122 		log_peer_warnx(&peer->conf,
2123 		    "peer requests unacceptable holdtime %u", holdtime);
2124 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
2125 		    NULL, 0);
2126 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2127 		return (-1);
2128 	}
2129 
2130 	myholdtime = peer->conf.holdtime;
2131 	if (!myholdtime)
2132 		myholdtime = conf->holdtime;
2133 	if (holdtime < myholdtime)
2134 		peer->holdtime = holdtime;
2135 	else
2136 		peer->holdtime = myholdtime;
2137 
2138 	memcpy(&bgpid, p, sizeof(bgpid));
2139 	p += sizeof(bgpid);
2140 
2141 	/* check bgpid for validity - just disallow 0 */
2142 	if (ntohl(bgpid) == 0) {
2143 		log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable",
2144 		    ntohl(bgpid));
2145 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2146 		    NULL, 0);
2147 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2148 		return (-1);
2149 	}
2150 	peer->remote_bgpid = bgpid;
2151 
2152 	extlen = 0;
2153 	optparamlen = *p++;
2154 
2155 	if (optparamlen == 0) {
2156 		if (msglen != MSGSIZE_OPEN_MIN) {
2157 bad_len:
2158 			log_peer_warnx(&peer->conf,
2159 			    "corrupt OPEN message received: length mismatch");
2160 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2161 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2162 			return (-1);
2163 		}
2164 	} else {
2165 		if (msglen < MSGSIZE_OPEN_MIN + 1)
2166 			goto bad_len;
2167 
2168 		op_type = *p;
2169 		if (op_type == OPT_PARAM_EXT_LEN) {
2170 			p++;
2171 			memcpy(&optparamlen, p, sizeof(optparamlen));
2172 			optparamlen = ntohs(optparamlen);
2173 			p += sizeof(optparamlen);
2174 			extlen = 1;
2175 		}
2176 
2177 		/* RFC9020 encoding has 3 extra bytes */
2178 		if (optparamlen + 3 * extlen != msglen - MSGSIZE_OPEN_MIN)
2179 			goto bad_len;
2180 	}
2181 
2182 	plen = optparamlen;
2183 	while (plen > 0) {
2184 		if (plen < 2 + extlen)
2185 			goto bad_len;
2186 
2187 		memcpy(&op_type, p, sizeof(op_type));
2188 		p += sizeof(op_type);
2189 		plen -= sizeof(op_type);
2190 		if (!extlen) {
2191 			op_len = *p++;
2192 			plen--;
2193 		} else {
2194 			memcpy(&op_len, p, sizeof(op_len));
2195 			op_len = ntohs(op_len);
2196 			p += sizeof(op_len);
2197 			plen -= sizeof(op_len);
2198 		}
2199 		if (op_len > 0) {
2200 			if (plen < op_len)
2201 				goto bad_len;
2202 			op_val = p;
2203 			p += op_len;
2204 			plen -= op_len;
2205 		} else
2206 			op_val = NULL;
2207 
2208 		switch (op_type) {
2209 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2210 			if (parse_capabilities(peer, op_val, op_len,
2211 			    &as) == -1) {
2212 				session_notification(peer, ERR_OPEN, 0,
2213 				    NULL, 0);
2214 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2215 				return (-1);
2216 			}
2217 			break;
2218 		case OPT_PARAM_AUTH:			/* deprecated */
2219 		default:
2220 			/*
2221 			 * unsupported type
2222 			 * the RFCs tell us to leave the data section empty
2223 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2224 			 * How the peer should know _which_ optional parameter
2225 			 * we don't support is beyond me.
2226 			 */
2227 			log_peer_warnx(&peer->conf,
2228 			    "received OPEN message with unsupported optional "
2229 			    "parameter: type %u", op_type);
2230 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2231 				NULL, 0);
2232 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2233 			/* no punish */
2234 			timer_set(&peer->timers, Timer_IdleHold, 0);
2235 			peer->IdleHoldTime /= 2;
2236 			return (-1);
2237 		}
2238 	}
2239 
2240 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2241 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2242 		peer->conf.remote_as = as;
2243 		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2244 		if (!peer->conf.ebgp)
2245 			/* force enforce_as off for iBGP sessions */
2246 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2247 	}
2248 
2249 	if (peer->conf.remote_as != as) {
2250 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2251 		    log_as(as));
2252 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2253 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2254 		return (-1);
2255 	}
2256 
2257 	/* on iBGP sessions check for bgpid collision */
2258 	if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) {
2259 		log_peer_warnx(&peer->conf, "peer BGPID %u conflicts with ours",
2260 		    ntohl(bgpid));
2261 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2262 		    NULL, 0);
2263 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2264 		return (-1);
2265 	}
2266 
2267 	if (capa_neg_calc(peer, &suberr) == -1) {
2268 		session_notification(peer, ERR_OPEN, suberr, NULL, 0);
2269 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2270 		return (-1);
2271 	}
2272 
2273 	return (0);
2274 }
2275 
2276 int
2277 parse_update(struct peer *peer)
2278 {
2279 	u_char		*p;
2280 	uint16_t	 datalen;
2281 
2282 	/*
2283 	 * we pass the message verbatim to the rde.
2284 	 * in case of errors the whole session is reset with a
2285 	 * notification anyway, we only need to know the peer
2286 	 */
2287 	p = peer->rbuf->rptr;
2288 	p += MSGSIZE_HEADER_MARKER;
2289 	memcpy(&datalen, p, sizeof(datalen));
2290 	datalen = ntohs(datalen);
2291 
2292 	p = peer->rbuf->rptr;
2293 	p += MSGSIZE_HEADER;	/* header is already checked */
2294 	datalen -= MSGSIZE_HEADER;
2295 
2296 	if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1)
2297 		return (-1);
2298 
2299 	return (0);
2300 }
2301 
2302 int
2303 parse_rrefresh(struct peer *peer)
2304 {
2305 	struct route_refresh rr;
2306 	uint16_t afi, datalen;
2307 	uint8_t aid, safi, subtype;
2308 	u_char *p;
2309 
2310 	p = peer->rbuf->rptr;
2311 	p += MSGSIZE_HEADER_MARKER;
2312 	memcpy(&datalen, p, sizeof(datalen));
2313 	datalen = ntohs(datalen);
2314 
2315 	p = peer->rbuf->rptr;
2316 	p += MSGSIZE_HEADER;	/* header is already checked */
2317 
2318 	/*
2319 	 * We could check if we actually announced the capability but
2320 	 * as long as the message is correctly encoded we don't care.
2321 	 */
2322 
2323 	/* afi, 2 byte */
2324 	memcpy(&afi, p, sizeof(afi));
2325 	afi = ntohs(afi);
2326 	p += 2;
2327 	/* subtype, 1 byte */
2328 	subtype = *p;
2329 	p += 1;
2330 	/* safi, 1 byte */
2331 	safi = *p;
2332 
2333 	/* check subtype if peer announced enhanced route refresh */
2334 	if (peer->capa.neg.enhanced_rr) {
2335 		switch (subtype) {
2336 		case ROUTE_REFRESH_REQUEST:
2337 			/* no ORF support, so no oversized RREFRESH msgs */
2338 			if (datalen != MSGSIZE_RREFRESH) {
2339 				log_peer_warnx(&peer->conf,
2340 				    "received RREFRESH: illegal len: %u byte",
2341 				    datalen);
2342 				datalen = htons(datalen);
2343 				session_notification(peer, ERR_HEADER,
2344 				    ERR_HDR_LEN, &datalen, sizeof(datalen));
2345 				bgp_fsm(peer, EVNT_CON_FATAL);
2346 				return (-1);
2347 			}
2348 			peer->stats.refresh_rcvd_req++;
2349 			break;
2350 		case ROUTE_REFRESH_BEGIN_RR:
2351 		case ROUTE_REFRESH_END_RR:
2352 			/* special handling for RFC7313 */
2353 			if (datalen != MSGSIZE_RREFRESH) {
2354 				log_peer_warnx(&peer->conf,
2355 				    "received RREFRESH: illegal len: %u byte",
2356 				    datalen);
2357 				p = peer->rbuf->rptr;
2358 				p += MSGSIZE_HEADER;
2359 				datalen -= MSGSIZE_HEADER;
2360 				session_notification(peer, ERR_RREFRESH,
2361 				    ERR_RR_INV_LEN, p, datalen);
2362 				bgp_fsm(peer, EVNT_CON_FATAL);
2363 				return (-1);
2364 			}
2365 			if (subtype == ROUTE_REFRESH_BEGIN_RR)
2366 				peer->stats.refresh_rcvd_borr++;
2367 			else
2368 				peer->stats.refresh_rcvd_eorr++;
2369 			break;
2370 		default:
2371 			log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2372 			    "bad subtype %d", subtype);
2373 			return (0);
2374 		}
2375 	} else {
2376 		/* force subtype to default */
2377 		subtype = ROUTE_REFRESH_REQUEST;
2378 		peer->stats.refresh_rcvd_req++;
2379 	}
2380 
2381 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2382 	if (afi2aid(afi, safi, &aid) == -1) {
2383 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2384 		    "invalid afi/safi pair");
2385 		return (0);
2386 	}
2387 
2388 	if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) {
2389 		log_peer_warnx(&peer->conf, "peer sent unexpected refresh");
2390 		return (0);
2391 	}
2392 
2393 	rr.aid = aid;
2394 	rr.subtype = subtype;
2395 
2396 	if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1)
2397 		return (-1);
2398 
2399 	return (0);
2400 }
2401 
2402 int
2403 parse_notification(struct peer *peer)
2404 {
2405 	u_char		*p;
2406 	uint16_t	 datalen;
2407 	uint8_t		 errcode;
2408 	uint8_t		 subcode;
2409 	uint8_t		 capa_code;
2410 	uint8_t		 capa_len;
2411 	size_t		 reason_len;
2412 	uint8_t		 i;
2413 
2414 	/* just log */
2415 	p = peer->rbuf->rptr;
2416 	p += MSGSIZE_HEADER_MARKER;
2417 	memcpy(&datalen, p, sizeof(datalen));
2418 	datalen = ntohs(datalen);
2419 
2420 	p = peer->rbuf->rptr;
2421 	p += MSGSIZE_HEADER;	/* header is already checked */
2422 	datalen -= MSGSIZE_HEADER;
2423 
2424 	memcpy(&errcode, p, sizeof(errcode));
2425 	p += sizeof(errcode);
2426 	datalen -= sizeof(errcode);
2427 
2428 	memcpy(&subcode, p, sizeof(subcode));
2429 	p += sizeof(subcode);
2430 	datalen -= sizeof(subcode);
2431 
2432 	log_notification(peer, errcode, subcode, p, datalen, "received");
2433 	peer->errcnt++;
2434 	peer->stats.last_rcvd_errcode = errcode;
2435 	peer->stats.last_rcvd_suberr = subcode;
2436 
2437 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2438 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2439 			log_peer_warnx(&peer->conf, "received \"unsupported "
2440 			    "capability\" notification without data part, "
2441 			    "disabling capability announcements altogether");
2442 			session_capa_ann_none(peer);
2443 		}
2444 
2445 		while (datalen > 0) {
2446 			if (datalen < 2) {
2447 				log_peer_warnx(&peer->conf,
2448 				    "parse_notification: "
2449 				    "expect len >= 2, len is %u", datalen);
2450 				return (-1);
2451 			}
2452 			memcpy(&capa_code, p, sizeof(capa_code));
2453 			p += sizeof(capa_code);
2454 			datalen -= sizeof(capa_code);
2455 			memcpy(&capa_len, p, sizeof(capa_len));
2456 			p += sizeof(capa_len);
2457 			datalen -= sizeof(capa_len);
2458 			if (datalen < capa_len) {
2459 				log_peer_warnx(&peer->conf,
2460 				    "parse_notification: capa_len %u exceeds "
2461 				    "remaining msg length %u", capa_len,
2462 				    datalen);
2463 				return (-1);
2464 			}
2465 			p += capa_len;
2466 			datalen -= capa_len;
2467 			switch (capa_code) {
2468 			case CAPA_MP:
2469 				for (i = 0; i < AID_MAX; i++)
2470 					peer->capa.ann.mp[i] = 0;
2471 				log_peer_warnx(&peer->conf,
2472 				    "disabling multiprotocol capability");
2473 				break;
2474 			case CAPA_REFRESH:
2475 				peer->capa.ann.refresh = 0;
2476 				log_peer_warnx(&peer->conf,
2477 				    "disabling route refresh capability");
2478 				break;
2479 			case CAPA_RESTART:
2480 				peer->capa.ann.grestart.restart = 0;
2481 				log_peer_warnx(&peer->conf,
2482 				    "disabling restart capability");
2483 				break;
2484 			case CAPA_AS4BYTE:
2485 				peer->capa.ann.as4byte = 0;
2486 				log_peer_warnx(&peer->conf,
2487 				    "disabling 4-byte AS num capability");
2488 				break;
2489 			case CAPA_ADD_PATH:
2490 				memset(peer->capa.ann.add_path, 0,
2491 				    sizeof(peer->capa.ann.add_path));
2492 				log_peer_warnx(&peer->conf,
2493 				    "disabling ADD-PATH capability");
2494 				break;
2495 			case CAPA_ENHANCED_RR:
2496 				peer->capa.ann.enhanced_rr = 0;
2497 				log_peer_warnx(&peer->conf,
2498 				    "disabling enhanced route refresh "
2499 				    "capability");
2500 				break;
2501 			default:	/* should not happen... */
2502 				log_peer_warnx(&peer->conf, "received "
2503 				    "\"unsupported capability\" notification "
2504 				    "for unknown capability %u, disabling "
2505 				    "capability announcements altogether",
2506 				    capa_code);
2507 				session_capa_ann_none(peer);
2508 				break;
2509 			}
2510 		}
2511 
2512 		return (1);
2513 	}
2514 
2515 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2516 		session_capa_ann_none(peer);
2517 		return (1);
2518 	}
2519 
2520 	if (errcode == ERR_CEASE &&
2521 	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2522 	     subcode == ERR_CEASE_ADMIN_RESET)) {
2523 		if (datalen > 1) {
2524 			reason_len = *p++;
2525 			datalen--;
2526 			if (datalen < reason_len) {
2527 			    log_peer_warnx(&peer->conf,
2528 				"received truncated shutdown reason");
2529 			    return (0);
2530 			}
2531 			if (reason_len > REASON_LEN - 1) {
2532 			    log_peer_warnx(&peer->conf,
2533 				"received overly long shutdown reason");
2534 			    return (0);
2535 			}
2536 			memcpy(peer->stats.last_reason, p, reason_len);
2537 			peer->stats.last_reason[reason_len] = '\0';
2538 			log_peer_warnx(&peer->conf,
2539 			    "received shutdown reason: \"%s\"",
2540 			    log_reason(peer->stats.last_reason));
2541 			p += reason_len;
2542 			datalen -= reason_len;
2543 		}
2544 	}
2545 
2546 	return (0);
2547 }
2548 
2549 int
2550 parse_capabilities(struct peer *peer, u_char *d, uint16_t dlen, uint32_t *as)
2551 {
2552 	u_char		*capa_val;
2553 	uint32_t	 remote_as;
2554 	uint16_t	 len;
2555 	uint16_t	 afi;
2556 	uint16_t	 gr_header;
2557 	uint8_t		 safi;
2558 	uint8_t		 aid;
2559 	uint8_t		 flags;
2560 	uint8_t		 capa_code;
2561 	uint8_t		 capa_len;
2562 	uint8_t		 i;
2563 
2564 	len = dlen;
2565 	while (len > 0) {
2566 		if (len < 2) {
2567 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2568 			    "length: %u, too short", len);
2569 			return (-1);
2570 		}
2571 		memcpy(&capa_code, d, sizeof(capa_code));
2572 		d += sizeof(capa_code);
2573 		len -= sizeof(capa_code);
2574 		memcpy(&capa_len, d, sizeof(capa_len));
2575 		d += sizeof(capa_len);
2576 		len -= sizeof(capa_len);
2577 		if (capa_len > 0) {
2578 			if (len < capa_len) {
2579 				log_peer_warnx(&peer->conf,
2580 				    "Bad capabilities attr length: "
2581 				    "len %u smaller than capa_len %u",
2582 				    len, capa_len);
2583 				return (-1);
2584 			}
2585 			capa_val = d;
2586 			d += capa_len;
2587 			len -= capa_len;
2588 		} else
2589 			capa_val = NULL;
2590 
2591 		switch (capa_code) {
2592 		case CAPA_MP:			/* RFC 4760 */
2593 			if (capa_len != 4) {
2594 				log_peer_warnx(&peer->conf,
2595 				    "Bad multi protocol capability length: "
2596 				    "%u", capa_len);
2597 				break;
2598 			}
2599 			memcpy(&afi, capa_val, sizeof(afi));
2600 			afi = ntohs(afi);
2601 			memcpy(&safi, capa_val + 3, sizeof(safi));
2602 			if (afi2aid(afi, safi, &aid) == -1) {
2603 				log_peer_warnx(&peer->conf,
2604 				    "Received multi protocol capability: "
2605 				    " unknown AFI %u, safi %u pair",
2606 				    afi, safi);
2607 				break;
2608 			}
2609 			peer->capa.peer.mp[aid] = 1;
2610 			break;
2611 		case CAPA_REFRESH:
2612 			peer->capa.peer.refresh = 1;
2613 			break;
2614 		case CAPA_ROLE:
2615 			if (capa_len != 1) {
2616 				log_peer_warnx(&peer->conf,
2617 				    "Bad open policy capability length: "
2618 				    "%u", capa_len);
2619 				break;
2620 			}
2621 			peer->capa.peer.role_ena = 1;
2622 			peer->capa.peer.role = *capa_val;
2623 			break;
2624 		case CAPA_RESTART:
2625 			if (capa_len == 2) {
2626 				/* peer only supports EoR marker */
2627 				peer->capa.peer.grestart.restart = 1;
2628 				peer->capa.peer.grestart.timeout = 0;
2629 				break;
2630 			} else if (capa_len % 4 != 2) {
2631 				log_peer_warnx(&peer->conf,
2632 				    "Bad graceful restart capability length: "
2633 				    "%u", capa_len);
2634 				peer->capa.peer.grestart.restart = 0;
2635 				peer->capa.peer.grestart.timeout = 0;
2636 				break;
2637 			}
2638 
2639 			memcpy(&gr_header, capa_val, sizeof(gr_header));
2640 			gr_header = ntohs(gr_header);
2641 			peer->capa.peer.grestart.timeout =
2642 			    gr_header & CAPA_GR_TIMEMASK;
2643 			if (peer->capa.peer.grestart.timeout == 0) {
2644 				log_peer_warnx(&peer->conf, "Received "
2645 				    "graceful restart timeout is zero");
2646 				peer->capa.peer.grestart.restart = 0;
2647 				break;
2648 			}
2649 
2650 			for (i = 2; i <= capa_len - 4; i += 4) {
2651 				memcpy(&afi, capa_val + i, sizeof(afi));
2652 				afi = ntohs(afi);
2653 				safi = capa_val[i + 2];
2654 				flags = capa_val[i + 3];
2655 				if (afi2aid(afi, safi, &aid) == -1) {
2656 					log_peer_warnx(&peer->conf,
2657 					    "Received graceful restart capa: "
2658 					    " unknown AFI %u, safi %u pair",
2659 					    afi, safi);
2660 					continue;
2661 				}
2662 				peer->capa.peer.grestart.flags[aid] |=
2663 				    CAPA_GR_PRESENT;
2664 				if (flags & CAPA_GR_F_FLAG)
2665 					peer->capa.peer.grestart.flags[aid] |=
2666 					    CAPA_GR_FORWARD;
2667 				if (gr_header & CAPA_GR_R_FLAG)
2668 					peer->capa.peer.grestart.flags[aid] |=
2669 					    CAPA_GR_RESTART;
2670 				peer->capa.peer.grestart.restart = 2;
2671 			}
2672 			break;
2673 		case CAPA_AS4BYTE:
2674 			if (capa_len != 4) {
2675 				log_peer_warnx(&peer->conf,
2676 				    "Bad AS4BYTE capability length: "
2677 				    "%u", capa_len);
2678 				peer->capa.peer.as4byte = 0;
2679 				break;
2680 			}
2681 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2682 			*as = ntohl(remote_as);
2683 			if (*as == 0) {
2684 				log_peer_warnx(&peer->conf,
2685 				    "peer requests unacceptable AS %u", *as);
2686 				session_notification(peer, ERR_OPEN,
2687 				    ERR_OPEN_AS, NULL, 0);
2688 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2689 				return (-1);
2690 			}
2691 			peer->capa.peer.as4byte = 1;
2692 			break;
2693 		case CAPA_ADD_PATH:
2694 			if (capa_len % 4 != 0) {
2695 				log_peer_warnx(&peer->conf,
2696 				    "Bad ADD-PATH capability length: "
2697 				    "%u", capa_len);
2698 				memset(peer->capa.peer.add_path, 0,
2699 				    sizeof(peer->capa.peer.add_path));
2700 				break;
2701 			}
2702 			for (i = 0; i <= capa_len - 4; i += 4) {
2703 				memcpy(&afi, capa_val + i, sizeof(afi));
2704 				afi = ntohs(afi);
2705 				safi = capa_val[i + 2];
2706 				flags = capa_val[i + 3];
2707 				if (afi2aid(afi, safi, &aid) == -1) {
2708 					log_peer_warnx(&peer->conf,
2709 					    "Received ADD-PATH capa: "
2710 					    " unknown AFI %u, safi %u pair",
2711 					    afi, safi);
2712 					memset(peer->capa.peer.add_path, 0,
2713 					    sizeof(peer->capa.peer.add_path));
2714 					break;
2715 				}
2716 				if (flags & ~CAPA_AP_BIDIR) {
2717 					log_peer_warnx(&peer->conf,
2718 					    "Received ADD-PATH capa: "
2719 					    " bad flags %x", flags);
2720 					memset(peer->capa.peer.add_path, 0,
2721 					    sizeof(peer->capa.peer.add_path));
2722 					break;
2723 				}
2724 				peer->capa.peer.add_path[aid] = flags;
2725 			}
2726 			break;
2727 		case CAPA_ENHANCED_RR:
2728 			peer->capa.peer.enhanced_rr = 1;
2729 			break;
2730 		default:
2731 			break;
2732 		}
2733 	}
2734 
2735 	return (0);
2736 }
2737 
2738 int
2739 capa_neg_calc(struct peer *p, uint8_t *suberr)
2740 {
2741 	uint8_t	i, hasmp = 0;
2742 
2743 	/* a capability is accepted only if both sides announced it */
2744 
2745 	p->capa.neg.refresh =
2746 	    (p->capa.ann.refresh && p->capa.peer.refresh) != 0;
2747 	p->capa.neg.enhanced_rr =
2748 	    (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0;
2749 
2750 	p->capa.neg.as4byte =
2751 	    (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0;
2752 
2753 	/* MP: both side must agree on the AFI,SAFI pair */
2754 	for (i = 0; i < AID_MAX; i++) {
2755 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i])
2756 			p->capa.neg.mp[i] = 1;
2757 		else
2758 			p->capa.neg.mp[i] = 0;
2759 		if (p->capa.ann.mp[i])
2760 			hasmp = 1;
2761 	}
2762 	/* if no MP capability present default to IPv4 unicast mode */
2763 	if (!hasmp)
2764 		p->capa.neg.mp[AID_INET] = 1;
2765 
2766 	/*
2767 	 * graceful restart: the peer capabilities are of interest here.
2768 	 * It is necessary to compare the new values with the previous ones
2769 	 * and act acordingly. AFI/SAFI that are not part in the MP capability
2770 	 * are treated as not being present.
2771 	 * Also make sure that a flush happens if the session stopped
2772 	 * supporting graceful restart.
2773 	 */
2774 
2775 	for (i = 0; i < AID_MAX; i++) {
2776 		int8_t	negflags;
2777 
2778 		/* disable GR if the AFI/SAFI is not present */
2779 		if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2780 		    p->capa.neg.mp[i] == 0))
2781 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2782 		/* look at current GR state and decide what to do */
2783 		negflags = p->capa.neg.grestart.flags[i];
2784 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2785 		if (negflags & CAPA_GR_RESTARTING) {
2786 			if (p->capa.ann.grestart.restart != 0 &&
2787 			    p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) {
2788 				p->capa.neg.grestart.flags[i] |=
2789 				    CAPA_GR_RESTARTING;
2790 			} else {
2791 				if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2792 				    &i, sizeof(i)) == -1) {
2793 					log_peer_warnx(&p->conf,
2794 					    "imsg send failed");
2795 					return (-1);
2796 				}
2797 				log_peer_warnx(&p->conf, "graceful restart of "
2798 				    "%s, not restarted, flushing", aid2str(i));
2799 			}
2800 		}
2801 	}
2802 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2803 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2804 	if (p->capa.ann.grestart.restart == 0)
2805 		p->capa.neg.grestart.restart = 0;
2806 
2807 
2808 	/*
2809 	 * ADD-PATH: set only those bits where both sides agree.
2810 	 * For this compare our send bit with the recv bit from the peer
2811 	 * and vice versa.
2812 	 * The flags are stored from this systems view point.
2813 	 */
2814 	memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
2815 	if (p->capa.ann.add_path[0]) {
2816 		for (i = AID_MIN; i < AID_MAX; i++) {
2817 			if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) &&
2818 			    (p->capa.peer.add_path[i] & CAPA_AP_SEND)) {
2819 				p->capa.neg.add_path[i] |= CAPA_AP_RECV;
2820 				p->capa.neg.add_path[0] |= CAPA_AP_RECV;
2821 			}
2822 			if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) &&
2823 			    (p->capa.peer.add_path[i] & CAPA_AP_RECV)) {
2824 				p->capa.neg.add_path[i] |= CAPA_AP_SEND;
2825 				p->capa.neg.add_path[0] |= CAPA_AP_SEND;
2826 			}
2827 		}
2828 	}
2829 
2830 	/*
2831 	 * Open policy: check that the policy is sensible.
2832 	 *
2833 	 * Make sure that the roles match and set the negotiated capability
2834 	 * to the role of the peer. So the RDE can inject the OTC attribute.
2835 	 * See RFC 9234, section 4.2.
2836 	 */
2837 	if (p->capa.ann.role_ena != 0 && p->capa.peer.role_ena != 0) {
2838 		switch (p->capa.ann.role) {
2839 		case CAPA_ROLE_PROVIDER:
2840 			if (p->capa.peer.role != CAPA_ROLE_CUSTOMER)
2841 				goto fail;
2842 			break;
2843 		case CAPA_ROLE_RS:
2844 			if (p->capa.peer.role != CAPA_ROLE_RS_CLIENT)
2845 				goto fail;
2846 			break;
2847 		case CAPA_ROLE_RS_CLIENT:
2848 			if (p->capa.peer.role != CAPA_ROLE_RS)
2849 				goto fail;
2850 			break;
2851 		case CAPA_ROLE_CUSTOMER:
2852 			if (p->capa.peer.role != CAPA_ROLE_PROVIDER)
2853 				goto fail;
2854 			break;
2855 		case CAPA_ROLE_PEER:
2856 			if (p->capa.peer.role != CAPA_ROLE_PEER)
2857 				goto fail;
2858 			break;
2859 		default:
2860  fail:
2861 			log_peer_warnx(&p->conf, "open policy role mismatch: "
2862 			    "%s vs %s", log_policy(p->capa.ann.role),
2863 			    log_policy(p->capa.peer.role));
2864 			*suberr = ERR_OPEN_ROLE;
2865 			return (-1);
2866 		}
2867 		p->capa.neg.role_ena = 1;
2868 		p->capa.neg.role = p->capa.peer.role;
2869 	} else if (p->capa.ann.role_ena == 2) {
2870 		/* enforce presence of open policy role capability */
2871 		log_peer_warnx(&p->conf, "open policy role enforced but "
2872 		    "not present");
2873 		*suberr = ERR_OPEN_ROLE;
2874 		return (-1);
2875 	}
2876 
2877 	return (0);
2878 }
2879 
2880 void
2881 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2882 {
2883 	struct imsg		 imsg;
2884 	struct mrt		 xmrt;
2885 	struct route_refresh	 rr;
2886 	struct mrt		*mrt;
2887 	struct imsgbuf		*i;
2888 	struct peer		*p;
2889 	struct listen_addr	*la, *nla;
2890 	struct session_dependon	*sdon;
2891 	u_char			*data;
2892 	int			 n, fd, depend_ok, restricted;
2893 	uint16_t		 t;
2894 	uint8_t			 aid, errcode, subcode;
2895 
2896 	while (ibuf) {
2897 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2898 			fatal("session_dispatch_imsg: imsg_get error");
2899 
2900 		if (n == 0)
2901 			break;
2902 
2903 		switch (imsg.hdr.type) {
2904 		case IMSG_SOCKET_CONN:
2905 		case IMSG_SOCKET_CONN_CTL:
2906 			if (idx != PFD_PIPE_MAIN)
2907 				fatalx("reconf request not from parent");
2908 			if ((fd = imsg.fd) == -1) {
2909 				log_warnx("expected to receive imsg fd to "
2910 				    "RDE but didn't receive any");
2911 				break;
2912 			}
2913 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2914 				fatal(NULL);
2915 			imsg_init(i, fd);
2916 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
2917 				if (ibuf_rde) {
2918 					log_warnx("Unexpected imsg connection "
2919 					    "to RDE received");
2920 					msgbuf_clear(&ibuf_rde->w);
2921 					free(ibuf_rde);
2922 				}
2923 				ibuf_rde = i;
2924 			} else {
2925 				if (ibuf_rde_ctl) {
2926 					log_warnx("Unexpected imsg ctl "
2927 					    "connection to RDE received");
2928 					msgbuf_clear(&ibuf_rde_ctl->w);
2929 					free(ibuf_rde_ctl);
2930 				}
2931 				ibuf_rde_ctl = i;
2932 			}
2933 			break;
2934 		case IMSG_RECONF_CONF:
2935 			if (idx != PFD_PIPE_MAIN)
2936 				fatalx("reconf request not from parent");
2937 			nconf = new_config();
2938 
2939 			copy_config(nconf, imsg.data);
2940 			pending_reconf = 1;
2941 			break;
2942 		case IMSG_RECONF_PEER:
2943 			if (idx != PFD_PIPE_MAIN)
2944 				fatalx("reconf request not from parent");
2945 			if ((p = calloc(1, sizeof(struct peer))) == NULL)
2946 				fatal("new_peer");
2947 			memcpy(&p->conf, imsg.data, sizeof(struct peer_config));
2948 			p->state = p->prev_state = STATE_NONE;
2949 			p->reconf_action = RECONF_REINIT;
2950 			if (RB_INSERT(peer_head, &nconf->peers, p) != NULL)
2951 				fatalx("%s: peer tree is corrupt", __func__);
2952 			break;
2953 		case IMSG_RECONF_LISTENER:
2954 			if (idx != PFD_PIPE_MAIN)
2955 				fatalx("reconf request not from parent");
2956 			if (nconf == NULL)
2957 				fatalx("IMSG_RECONF_LISTENER but no config");
2958 			nla = imsg.data;
2959 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2960 				if (!la_cmp(la, nla))
2961 					break;
2962 
2963 			if (la == NULL) {
2964 				if (nla->reconf != RECONF_REINIT)
2965 					fatalx("king bula sez: "
2966 					    "expected REINIT");
2967 
2968 				if ((nla->fd = imsg.fd) == -1)
2969 					log_warnx("expected to receive fd for "
2970 					    "%s but didn't receive any",
2971 					    log_sockaddr((struct sockaddr *)
2972 					    &nla->sa, nla->sa_len));
2973 
2974 				la = calloc(1, sizeof(struct listen_addr));
2975 				if (la == NULL)
2976 					fatal(NULL);
2977 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2978 				la->flags = nla->flags;
2979 				la->fd = nla->fd;
2980 				la->reconf = RECONF_REINIT;
2981 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2982 				    entry);
2983 			} else {
2984 				if (nla->reconf != RECONF_KEEP)
2985 					fatalx("king bula sez: expected KEEP");
2986 				la->reconf = RECONF_KEEP;
2987 			}
2988 
2989 			break;
2990 		case IMSG_RECONF_CTRL:
2991 			if (idx != PFD_PIPE_MAIN)
2992 				fatalx("reconf request not from parent");
2993 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2994 			    sizeof(restricted))
2995 				fatalx("RECONF_CTRL imsg with wrong len");
2996 			memcpy(&restricted, imsg.data, sizeof(restricted));
2997 			if (imsg.fd == -1) {
2998 				log_warnx("expected to receive fd for control "
2999 				    "socket but didn't receive any");
3000 				break;
3001 			}
3002 			if (restricted) {
3003 				control_shutdown(rcsock);
3004 				rcsock = imsg.fd;
3005 			} else {
3006 				control_shutdown(csock);
3007 				csock = imsg.fd;
3008 			}
3009 			break;
3010 		case IMSG_RECONF_DRAIN:
3011 			switch (idx) {
3012 			case PFD_PIPE_ROUTE:
3013 				if (nconf != NULL)
3014 					fatalx("got unexpected %s from RDE",
3015 					    "IMSG_RECONF_DONE");
3016 				imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
3017 				    -1, NULL, 0);
3018 				break;
3019 			case PFD_PIPE_MAIN:
3020 				if (nconf == NULL)
3021 					fatalx("got unexpected %s from parent",
3022 					    "IMSG_RECONF_DONE");
3023 				imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
3024 				    -1, NULL, 0);
3025 				break;
3026 			default:
3027 				fatalx("reconf request not from parent or RDE");
3028 			}
3029 			break;
3030 		case IMSG_RECONF_DONE:
3031 			if (idx != PFD_PIPE_MAIN)
3032 				fatalx("reconf request not from parent");
3033 			if (nconf == NULL)
3034 				fatalx("got IMSG_RECONF_DONE but no config");
3035 			copy_config(conf, nconf);
3036 			merge_peers(conf, nconf);
3037 
3038 			/* delete old listeners */
3039 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
3040 			    la = nla) {
3041 				nla = TAILQ_NEXT(la, entry);
3042 				if (la->reconf == RECONF_NONE) {
3043 					log_info("not listening on %s any more",
3044 					    log_sockaddr((struct sockaddr *)
3045 					    &la->sa, la->sa_len));
3046 					TAILQ_REMOVE(conf->listen_addrs, la,
3047 					    entry);
3048 					close(la->fd);
3049 					free(la);
3050 				}
3051 			}
3052 
3053 			/* add new listeners */
3054 			TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs,
3055 			    entry);
3056 
3057 			setup_listeners(listener_cnt);
3058 			free_config(nconf);
3059 			nconf = NULL;
3060 			pending_reconf = 0;
3061 			log_info("SE reconfigured");
3062 			/*
3063 			 * IMSG_RECONF_DONE is sent when the RDE drained
3064 			 * the peer config sent in merge_peers().
3065 			 */
3066 			break;
3067 		case IMSG_SESSION_DEPENDON:
3068 			if (idx != PFD_PIPE_MAIN)
3069 				fatalx("IFINFO message not from parent");
3070 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
3071 			    sizeof(struct session_dependon))
3072 				fatalx("DEPENDON imsg with wrong len");
3073 			sdon = imsg.data;
3074 			depend_ok = sdon->depend_state;
3075 
3076 			RB_FOREACH(p, peer_head, &conf->peers)
3077 				if (!strcmp(p->conf.if_depend, sdon->ifname)) {
3078 					if (depend_ok && !p->depend_ok) {
3079 						p->depend_ok = depend_ok;
3080 						bgp_fsm(p, EVNT_START);
3081 					} else if (!depend_ok && p->depend_ok) {
3082 						p->depend_ok = depend_ok;
3083 						session_stop(p,
3084 						    ERR_CEASE_OTHER_CHANGE);
3085 					}
3086 				}
3087 			break;
3088 		case IMSG_MRT_OPEN:
3089 		case IMSG_MRT_REOPEN:
3090 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
3091 			    sizeof(struct mrt)) {
3092 				log_warnx("wrong imsg len");
3093 				break;
3094 			}
3095 
3096 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
3097 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
3098 				log_warnx("expected to receive fd for mrt dump "
3099 				    "but didn't receive any");
3100 
3101 			mrt = mrt_get(&mrthead, &xmrt);
3102 			if (mrt == NULL) {
3103 				/* new dump */
3104 				mrt = calloc(1, sizeof(struct mrt));
3105 				if (mrt == NULL)
3106 					fatal("session_dispatch_imsg");
3107 				memcpy(mrt, &xmrt, sizeof(struct mrt));
3108 				TAILQ_INIT(&mrt->wbuf.bufs);
3109 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
3110 			} else {
3111 				/* old dump reopened */
3112 				close(mrt->wbuf.fd);
3113 				mrt->wbuf.fd = xmrt.wbuf.fd;
3114 			}
3115 			break;
3116 		case IMSG_MRT_CLOSE:
3117 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
3118 			    sizeof(struct mrt)) {
3119 				log_warnx("wrong imsg len");
3120 				break;
3121 			}
3122 
3123 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
3124 			mrt = mrt_get(&mrthead, &xmrt);
3125 			if (mrt != NULL)
3126 				mrt_done(mrt);
3127 			break;
3128 		case IMSG_CTL_KROUTE:
3129 		case IMSG_CTL_KROUTE_ADDR:
3130 		case IMSG_CTL_SHOW_NEXTHOP:
3131 		case IMSG_CTL_SHOW_INTERFACE:
3132 		case IMSG_CTL_SHOW_FIB_TABLES:
3133 		case IMSG_CTL_SHOW_RTR:
3134 		case IMSG_CTL_SHOW_TIMER:
3135 			if (idx != PFD_PIPE_MAIN)
3136 				fatalx("ctl kroute request not from parent");
3137 			control_imsg_relay(&imsg);
3138 			break;
3139 		case IMSG_CTL_SHOW_RIB:
3140 		case IMSG_CTL_SHOW_RIB_PREFIX:
3141 		case IMSG_CTL_SHOW_RIB_COMMUNITIES:
3142 		case IMSG_CTL_SHOW_RIB_ATTR:
3143 		case IMSG_CTL_SHOW_RIB_MEM:
3144 		case IMSG_CTL_SHOW_RIB_HASH:
3145 		case IMSG_CTL_SHOW_NETWORK:
3146 		case IMSG_CTL_SHOW_NEIGHBOR:
3147 		case IMSG_CTL_SHOW_SET:
3148 			if (idx != PFD_PIPE_ROUTE_CTL)
3149 				fatalx("ctl rib request not from RDE");
3150 			control_imsg_relay(&imsg);
3151 			break;
3152 		case IMSG_CTL_END:
3153 		case IMSG_CTL_RESULT:
3154 			control_imsg_relay(&imsg);
3155 			break;
3156 		case IMSG_UPDATE:
3157 			if (idx != PFD_PIPE_ROUTE)
3158 				fatalx("update request not from RDE");
3159 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
3160 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
3161 			    imsg.hdr.len < IMSG_HEADER_SIZE +
3162 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
3163 				log_warnx("RDE sent invalid update");
3164 			else
3165 				session_update(imsg.hdr.peerid, imsg.data,
3166 				    imsg.hdr.len - IMSG_HEADER_SIZE);
3167 			break;
3168 		case IMSG_UPDATE_ERR:
3169 			if (idx != PFD_PIPE_ROUTE)
3170 				fatalx("update request not from RDE");
3171 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
3172 				log_warnx("RDE sent invalid notification");
3173 				break;
3174 			}
3175 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3176 				log_warnx("no such peer: id=%u",
3177 				    imsg.hdr.peerid);
3178 				break;
3179 			}
3180 			data = imsg.data;
3181 			errcode = *data++;
3182 			subcode = *data++;
3183 
3184 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
3185 				data = NULL;
3186 
3187 			session_notification(p, errcode, subcode,
3188 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
3189 			switch (errcode) {
3190 			case ERR_CEASE:
3191 				switch (subcode) {
3192 				case ERR_CEASE_MAX_PREFIX:
3193 				case ERR_CEASE_MAX_SENT_PREFIX:
3194 					t = p->conf.max_out_prefix_restart;
3195 					if (subcode == ERR_CEASE_MAX_PREFIX)
3196 						t = p->conf.max_prefix_restart;
3197 
3198 					bgp_fsm(p, EVNT_STOP);
3199 					if (t)
3200 						timer_set(&p->timers,
3201 						    Timer_IdleHold, 60 * t);
3202 					break;
3203 				default:
3204 					bgp_fsm(p, EVNT_CON_FATAL);
3205 					break;
3206 				}
3207 				break;
3208 			default:
3209 				bgp_fsm(p, EVNT_CON_FATAL);
3210 				break;
3211 			}
3212 			break;
3213 		case IMSG_REFRESH:
3214 			if (idx != PFD_PIPE_ROUTE)
3215 				fatalx("route refresh request not from RDE");
3216 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(rr)) {
3217 				log_warnx("RDE sent invalid refresh msg");
3218 				break;
3219 			}
3220 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3221 				log_warnx("no such peer: id=%u",
3222 				    imsg.hdr.peerid);
3223 				break;
3224 			}
3225 			memcpy(&rr, imsg.data, sizeof(rr));
3226 			if (rr.aid >= AID_MAX)
3227 				fatalx("IMSG_REFRESH: bad AID");
3228 			session_rrefresh(p, rr.aid, rr.subtype);
3229 			break;
3230 		case IMSG_SESSION_RESTARTED:
3231 			if (idx != PFD_PIPE_ROUTE)
3232 				fatalx("update request not from RDE");
3233 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
3234 				log_warnx("RDE sent invalid restart msg");
3235 				break;
3236 			}
3237 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3238 				log_warnx("no such peer: id=%u",
3239 				    imsg.hdr.peerid);
3240 				break;
3241 			}
3242 			memcpy(&aid, imsg.data, sizeof(aid));
3243 			if (aid >= AID_MAX)
3244 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
3245 			if (p->capa.neg.grestart.flags[aid] &
3246 			    CAPA_GR_RESTARTING) {
3247 				log_peer_warnx(&p->conf,
3248 				    "graceful restart of %s finished",
3249 				    aid2str(aid));
3250 				p->capa.neg.grestart.flags[aid] &=
3251 				    ~CAPA_GR_RESTARTING;
3252 				timer_stop(&p->timers, Timer_RestartTimeout);
3253 
3254 				/* signal back to RDE to cleanup stale routes */
3255 				if (imsg_rde(IMSG_SESSION_RESTARTED,
3256 				    imsg.hdr.peerid, &aid, sizeof(aid)) == -1)
3257 					fatal("imsg_compose: "
3258 					    "IMSG_SESSION_RESTARTED");
3259 			}
3260 			break;
3261 		case IMSG_SESSION_DOWN:
3262 			if (idx != PFD_PIPE_ROUTE)
3263 				fatalx("update request not from RDE");
3264 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
3265 				log_warnx("no such peer: id=%u",
3266 				    imsg.hdr.peerid);
3267 				break;
3268 			}
3269 			session_stop(p, ERR_CEASE_ADMIN_DOWN);
3270 			break;
3271 		default:
3272 			break;
3273 		}
3274 		imsg_free(&imsg);
3275 	}
3276 }
3277 
3278 int
3279 la_cmp(struct listen_addr *a, struct listen_addr *b)
3280 {
3281 	struct sockaddr_in	*in_a, *in_b;
3282 	struct sockaddr_in6	*in6_a, *in6_b;
3283 
3284 	if (a->sa.ss_family != b->sa.ss_family)
3285 		return (1);
3286 
3287 	switch (a->sa.ss_family) {
3288 	case AF_INET:
3289 		in_a = (struct sockaddr_in *)&a->sa;
3290 		in_b = (struct sockaddr_in *)&b->sa;
3291 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
3292 			return (1);
3293 		if (in_a->sin_port != in_b->sin_port)
3294 			return (1);
3295 		break;
3296 	case AF_INET6:
3297 		in6_a = (struct sockaddr_in6 *)&a->sa;
3298 		in6_b = (struct sockaddr_in6 *)&b->sa;
3299 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
3300 		    sizeof(struct in6_addr)))
3301 			return (1);
3302 		if (in6_a->sin6_port != in6_b->sin6_port)
3303 			return (1);
3304 		break;
3305 	default:
3306 		fatal("king bula sez: unknown address family");
3307 		/* NOTREACHED */
3308 	}
3309 
3310 	return (0);
3311 }
3312 
3313 struct peer *
3314 getpeerbydesc(struct bgpd_config *c, const char *descr)
3315 {
3316 	struct peer	*p, *res = NULL;
3317 	int		 match = 0;
3318 
3319 	RB_FOREACH(p, peer_head, &c->peers)
3320 		if (!strcmp(p->conf.descr, descr)) {
3321 			res = p;
3322 			match++;
3323 		}
3324 
3325 	if (match > 1)
3326 		log_info("neighbor description \"%s\" not unique, request "
3327 		    "aborted", descr);
3328 
3329 	if (match == 1)
3330 		return (res);
3331 	else
3332 		return (NULL);
3333 }
3334 
3335 struct peer *
3336 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
3337 {
3338 	struct bgpd_addr addr;
3339 	struct peer	*p, *newpeer, *loose = NULL;
3340 	uint32_t	 id;
3341 
3342 	sa2addr(ip, &addr, NULL);
3343 
3344 	/* we might want a more effective way to find peers by IP */
3345 	RB_FOREACH(p, peer_head, &c->peers)
3346 		if (!p->conf.template &&
3347 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3348 			return (p);
3349 
3350 	/* try template matching */
3351 	RB_FOREACH(p, peer_head, &c->peers)
3352 		if (p->conf.template &&
3353 		    p->conf.remote_addr.aid == addr.aid &&
3354 		    session_match_mask(p, &addr))
3355 			if (loose == NULL || loose->conf.remote_masklen <
3356 			    p->conf.remote_masklen)
3357 				loose = p;
3358 
3359 	if (loose != NULL) {
3360 		/* clone */
3361 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3362 			fatal(NULL);
3363 		memcpy(newpeer, loose, sizeof(struct peer));
3364 		for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) {
3365 			if (getpeerbyid(c, id) == NULL)	/* we found a free id */
3366 				break;
3367 		}
3368 		newpeer->template = loose;
3369 		session_template_clone(newpeer, ip, id, 0);
3370 		newpeer->state = newpeer->prev_state = STATE_NONE;
3371 		newpeer->reconf_action = RECONF_KEEP;
3372 		newpeer->rbuf = NULL;
3373 		newpeer->rpending = 0;
3374 		init_peer(newpeer);
3375 		bgp_fsm(newpeer, EVNT_START);
3376 		if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL)
3377 			fatalx("%s: peer tree is corrupt", __func__);
3378 		return (newpeer);
3379 	}
3380 
3381 	return (NULL);
3382 }
3383 
3384 struct peer *
3385 getpeerbyid(struct bgpd_config *c, uint32_t peerid)
3386 {
3387 	static struct peer lookup;
3388 
3389 	lookup.conf.id = peerid;
3390 
3391 	return RB_FIND(peer_head, &c->peers, &lookup);
3392 }
3393 
3394 int
3395 peer_matched(struct peer *p, struct ctl_neighbor *n)
3396 {
3397 	char *s;
3398 
3399 	if (n && n->addr.aid) {
3400 		if (memcmp(&p->conf.remote_addr, &n->addr,
3401 		    sizeof(p->conf.remote_addr)))
3402 			return 0;
3403 	} else if (n && n->descr[0]) {
3404 		s = n->is_group ? p->conf.group : p->conf.descr;
3405 		if (strcmp(s, n->descr))
3406 			return 0;
3407 	}
3408 	return 1;
3409 }
3410 
3411 void
3412 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id,
3413     uint32_t as)
3414 {
3415 	struct bgpd_addr	remote_addr;
3416 
3417 	if (ip)
3418 		sa2addr(ip, &remote_addr, NULL);
3419 	else
3420 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3421 
3422 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3423 
3424 	p->conf.id = id;
3425 
3426 	if (as) {
3427 		p->conf.remote_as = as;
3428 		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3429 		if (!p->conf.ebgp)
3430 			/* force enforce_as off for iBGP sessions */
3431 			p->conf.enforce_as = ENFORCE_AS_OFF;
3432 	}
3433 
3434 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3435 	switch (p->conf.remote_addr.aid) {
3436 	case AID_INET:
3437 		p->conf.remote_masklen = 32;
3438 		break;
3439 	case AID_INET6:
3440 		p->conf.remote_masklen = 128;
3441 		break;
3442 	}
3443 	p->conf.template = 0;
3444 }
3445 
3446 int
3447 session_match_mask(struct peer *p, struct bgpd_addr *a)
3448 {
3449 	struct bgpd_addr masked;
3450 
3451 	applymask(&masked, a, p->conf.remote_masklen);
3452 	if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0)
3453 		return (1);
3454 	return (0);
3455 }
3456 
3457 void
3458 session_down(struct peer *peer)
3459 {
3460 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
3461 	peer->stats.last_updown = getmonotime();
3462 	/*
3463 	 * session_down is called in the exit code path so check
3464 	 * if the RDE is still around, if not there is no need to
3465 	 * send the message.
3466 	 */
3467 	if (ibuf_rde == NULL)
3468 		return;
3469 	if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3470 		fatalx("imsg_compose error");
3471 }
3472 
3473 void
3474 session_up(struct peer *p)
3475 {
3476 	struct session_up	 sup;
3477 
3478 	if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3479 	    &p->conf, sizeof(p->conf)) == -1)
3480 		fatalx("imsg_compose error");
3481 
3482 	if (p->local.aid == AID_INET) {
3483 		sup.local_v4_addr = p->local;
3484 		sup.local_v6_addr = p->local_alt;
3485 	} else {
3486 		sup.local_v6_addr = p->local;
3487 		sup.local_v4_addr = p->local_alt;
3488 	}
3489 	sup.remote_addr = p->remote;
3490 
3491 	sup.remote_bgpid = p->remote_bgpid;
3492 	sup.short_as = p->short_as;
3493 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3494 	p->stats.last_updown = getmonotime();
3495 	if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3496 		fatalx("imsg_compose error");
3497 }
3498 
3499 int
3500 imsg_ctl_parent(int type, uint32_t peerid, pid_t pid, void *data,
3501     uint16_t datalen)
3502 {
3503 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
3504 }
3505 
3506 int
3507 imsg_ctl_rde(int type, pid_t pid, void *data, uint16_t datalen)
3508 {
3509 	if (ibuf_rde_ctl == NULL)
3510 		return (0);
3511 
3512 	/*
3513 	 * Use control socket to talk to RDE to bypass the queue of the
3514 	 * regular imsg socket.
3515 	 */
3516 	return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen));
3517 }
3518 
3519 int
3520 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen)
3521 {
3522 	if (ibuf_rde == NULL)
3523 		return (0);
3524 
3525 	return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen));
3526 }
3527 
3528 void
3529 session_demote(struct peer *p, int level)
3530 {
3531 	struct demote_msg	msg;
3532 
3533 	strlcpy(msg.demote_group, p->conf.demote_group,
3534 	    sizeof(msg.demote_group));
3535 	msg.level = level;
3536 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3537 	    &msg, sizeof(msg)) == -1)
3538 		fatalx("imsg_compose error");
3539 
3540 	p->demoted += level;
3541 }
3542 
3543 void
3544 session_stop(struct peer *peer, uint8_t subcode)
3545 {
3546 	char data[REASON_LEN];
3547 	size_t datalen;
3548 	size_t reason_len;
3549 	char *communication;
3550 
3551 	datalen = 0;
3552 	communication = peer->conf.reason;
3553 
3554 	if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3555 	    subcode == ERR_CEASE_ADMIN_RESET)
3556 	    && communication && *communication) {
3557 		reason_len = strlen(communication);
3558 		if (reason_len > REASON_LEN - 1) {
3559 		    log_peer_warnx(&peer->conf,
3560 			"trying to send overly long shutdown reason");
3561 		} else {
3562 			data[0] = reason_len;
3563 			datalen = reason_len + sizeof(data[0]);
3564 			memcpy(data + 1, communication, reason_len);
3565 		}
3566 	}
3567 	switch (peer->state) {
3568 	case STATE_OPENSENT:
3569 	case STATE_OPENCONFIRM:
3570 	case STATE_ESTABLISHED:
3571 		session_notification(peer, ERR_CEASE, subcode, data, datalen);
3572 		break;
3573 	default:
3574 		/* session not open, no need to send notification */
3575 		break;
3576 	}
3577 	bgp_fsm(peer, EVNT_STOP);
3578 }
3579 
3580 void
3581 merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3582 {
3583 	struct peer *p, *np, *next;
3584 
3585 	RB_FOREACH(p, peer_head, &c->peers) {
3586 		/* templates are handled specially */
3587 		if (p->template != NULL)
3588 			continue;
3589 		np = getpeerbyid(nc, p->conf.id);
3590 		if (np == NULL) {
3591 			p->reconf_action = RECONF_DELETE;
3592 			continue;
3593 		}
3594 
3595 		/* peer no longer uses TCP MD5SIG so deconfigure */
3596 		if (p->conf.auth.method == AUTH_MD5SIG &&
3597 		    np->conf.auth.method != AUTH_MD5SIG)
3598 			tcp_md5_del_listener(c, p);
3599 		else if (np->conf.auth.method == AUTH_MD5SIG)
3600 			tcp_md5_add_listener(c, np);
3601 
3602 		memcpy(&p->conf, &np->conf, sizeof(p->conf));
3603 		RB_REMOVE(peer_head, &nc->peers, np);
3604 		free(np);
3605 
3606 		p->reconf_action = RECONF_KEEP;
3607 
3608 		/* had demotion, is demoted, demote removed? */
3609 		if (p->demoted && !p->conf.demote_group[0])
3610 			session_demote(p, -1);
3611 
3612 		/* if session is not open then refresh pfkey data */
3613 		if (p->state < STATE_OPENSENT && !p->template)
3614 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
3615 			    p->conf.id, 0, -1, NULL, 0);
3616 
3617 		/* sync the RDE in case we keep the peer */
3618 		if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3619 		    &p->conf, sizeof(struct peer_config)) == -1)
3620 			fatalx("imsg_compose error");
3621 
3622 		/* apply the config to all clones of a template */
3623 		if (p->conf.template) {
3624 			struct peer *xp;
3625 			RB_FOREACH(xp, peer_head, &c->peers) {
3626 				if (xp->template != p)
3627 					continue;
3628 				session_template_clone(xp, NULL, xp->conf.id,
3629 				    xp->conf.remote_as);
3630 				if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
3631 				    &xp->conf, sizeof(xp->conf)) == -1)
3632 					fatalx("imsg_compose error");
3633 			}
3634 		}
3635 	}
3636 
3637 	if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1)
3638 		fatalx("imsg_compose error");
3639 
3640 	/* pfkeys of new peers already loaded by the parent process */
3641 	RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
3642 		RB_REMOVE(peer_head, &nc->peers, np);
3643 		if (RB_INSERT(peer_head, &c->peers, np) != NULL)
3644 			fatalx("%s: peer tree is corrupt", __func__);
3645 		if (np->conf.auth.method == AUTH_MD5SIG)
3646 			tcp_md5_add_listener(c, np);
3647 	}
3648 }
3649