xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: session.c,v 1.350 2016/07/21 10:13:58 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 
21 #include <sys/mman.h>
22 #include <sys/socket.h>
23 #include <sys/time.h>
24 #include <sys/resource.h>
25 #include <sys/un.h>
26 #include <net/if_types.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <poll.h>
37 #include <pwd.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 
44 #include "bgpd.h"
45 #include "mrt.h"
46 #include "session.h"
47 
48 #define PFD_PIPE_MAIN		0
49 #define PFD_PIPE_ROUTE		1
50 #define PFD_PIPE_ROUTE_CTL	2
51 #define PFD_SOCK_CTL		3
52 #define PFD_SOCK_RCTL		4
53 #define PFD_SOCK_PFKEY		5
54 #define PFD_LISTENERS_START	6
55 
56 void	session_sighdlr(int);
57 int	setup_listeners(u_int *);
58 void	init_conf(struct bgpd_config *);
59 void	init_peer(struct peer *);
60 void	start_timer_holdtime(struct peer *);
61 void	start_timer_keepalive(struct peer *);
62 void	session_close_connection(struct peer *);
63 void	change_state(struct peer *, enum session_state, enum session_events);
64 int	session_setup_socket(struct peer *);
65 void	session_accept(int);
66 int	session_connect(struct peer *);
67 void	session_tcp_established(struct peer *);
68 void	session_capa_ann_none(struct peer *);
69 int	session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
70 int	session_capa_add_mp(struct ibuf *, u_int8_t);
71 int	session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(u_int32_t, void *, size_t);
77 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
78 	    ssize_t);
79 void	session_rrefresh(struct peer *, u_int8_t);
80 int	session_graceful_restart(struct peer *);
81 int	session_graceful_stop(struct peer *);
82 int	session_dispatch_msg(struct pollfd *, struct peer *);
83 int	session_process_msg(struct peer *);
84 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
85 int	parse_open(struct peer *);
86 int	parse_update(struct peer *);
87 int	parse_refresh(struct peer *);
88 int	parse_notification(struct peer *);
89 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
90 int	capa_neg_calc(struct peer *);
91 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
92 void	session_up(struct peer *);
93 void	session_down(struct peer *);
94 void	session_demote(struct peer *, int);
95 
96 int		 la_cmp(struct listen_addr *, struct listen_addr *);
97 struct peer	*getpeerbyip(struct sockaddr *);
98 void		 session_template_clone(struct peer *, struct sockaddr *,
99 		    u_int32_t, u_int32_t);
100 int		 session_match_mask(struct peer *, struct bgpd_addr *);
101 struct peer	*getpeerbyid(u_int32_t);
102 
103 struct bgpd_config	*conf, *nconf;
104 struct bgpd_sysdep	 sysdep;
105 struct peer		*peers, *npeers;
106 volatile sig_atomic_t	 session_quit;
107 int			 pending_reconf;
108 int			 csock = -1, rcsock = -1;
109 u_int			 peer_cnt;
110 struct imsgbuf		*ibuf_rde;
111 struct imsgbuf		*ibuf_rde_ctl;
112 struct imsgbuf		*ibuf_main;
113 
114 struct mrt_head		 mrthead;
115 time_t			 pauseaccept;
116 
117 void
118 session_sighdlr(int sig)
119 {
120 	switch (sig) {
121 	case SIGINT:
122 	case SIGTERM:
123 		session_quit = 1;
124 		break;
125 	}
126 }
127 
128 int
129 setup_listeners(u_int *la_cnt)
130 {
131 	int			 ttl = 255;
132 	int			 opt;
133 	struct listen_addr	*la;
134 	u_int			 cnt = 0;
135 
136 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
137 		la->reconf = RECONF_NONE;
138 		cnt++;
139 
140 		if (la->flags & LISTENER_LISTENING)
141 			continue;
142 
143 		if (la->fd == -1) {
144 			log_warn("cannot establish listener on %s: invalid fd",
145 			    log_sockaddr((struct sockaddr *)&la->sa));
146 			continue;
147 		}
148 
149 		opt = 1;
150 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
151 		    &opt, sizeof(opt)) == -1) {
152 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
153 				log_warnx("md5sig not available, disabling");
154 				sysdep.no_md5sig = 1;
155 			} else
156 				fatal("setsockopt TCP_MD5SIG");
157 		}
158 
159 		/* set ttl to 255 so that ttl-security works */
160 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
161 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
162 			log_warn("setup_listeners setsockopt TTL");
163 			continue;
164 		}
165 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
166 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
167 			log_warn("setup_listeners setsockopt hoplimit");
168 			continue;
169 		}
170 
171 		if (listen(la->fd, MAX_BACKLOG)) {
172 			close(la->fd);
173 			fatal("listen");
174 		}
175 
176 		la->flags |= LISTENER_LISTENING;
177 
178 		log_info("listening on %s",
179 		    log_sockaddr((struct sockaddr *)&la->sa));
180 	}
181 
182 	*la_cnt = cnt;
183 
184 	return (0);
185 }
186 
187 void
188 session_main(int debug, int verbose)
189 {
190 	int			 timeout, pfkeysock;
191 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
192 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
193 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
194 	u_int			 new_cnt;
195 	u_int32_t		 ctl_queued;
196 	struct passwd		*pw;
197 	struct peer		*p, **peer_l = NULL, *last, *next;
198 	struct mrt		*m, *xm, **mrt_l = NULL;
199 	struct pollfd		*pfd = NULL;
200 	struct ctl_conn		*ctl_conn;
201 	struct listen_addr	*la;
202 	void			*newp;
203 	short			 events;
204 
205 	if ((pw = getpwnam(BGPD_USER)) == NULL)
206 		fatal(NULL);
207 
208 	if (chroot(pw->pw_dir) == -1)
209 		fatal("chroot");
210 	if (chdir("/") == -1)
211 		fatal("chdir(\"/\")");
212 
213 	setproctitle("session engine");
214 	bgpd_process = PROC_SE;
215 	pfkeysock = pfkey_init(&sysdep);
216 
217 	if (setgroups(1, &pw->pw_gid) ||
218 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
219 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
220 		fatal("can't drop privileges");
221 
222 	if (pledge("stdio inet recvfd", NULL) == -1)
223 		fatal("pledge");
224 
225 	signal(SIGTERM, session_sighdlr);
226 	signal(SIGINT, session_sighdlr);
227 	signal(SIGPIPE, SIG_IGN);
228 	signal(SIGHUP, SIG_IGN);
229 	signal(SIGALRM, SIG_IGN);
230 	signal(SIGUSR1, SIG_IGN);
231 
232 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
233 		fatal(NULL);
234 	imsg_init(ibuf_main, 3);
235 
236 	TAILQ_INIT(&ctl_conns);
237 	LIST_INIT(&mrthead);
238 	listener_cnt = 0;
239 	peer_cnt = 0;
240 	ctl_cnt = 0;
241 
242 	if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL)
243 		fatal(NULL);
244 	if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) ==
245 	    NULL)
246 		fatal(NULL);
247 	TAILQ_INIT(conf->listen_addrs);
248 
249 	log_info("session engine ready");
250 
251 	while (session_quit == 0) {
252 		/* check for peers to be initialized or deleted */
253 		last = NULL;
254 		if (!pending_reconf) {
255 			for (p = peers; p != NULL; p = next) {
256 				next = p->next;
257 				/* cloned peer that idled out? */
258 				if (p->template && (p->state == STATE_IDLE ||
259 				    p->state == STATE_ACTIVE) &&
260 				    time(NULL) - p->stats.last_updown >=
261 				    INTERVAL_HOLD_CLONED)
262 					p->conf.reconf_action = RECONF_DELETE;
263 
264 				/* new peer that needs init? */
265 				if (p->state == STATE_NONE)
266 					init_peer(p);
267 
268 				/* reinit due? */
269 				if (p->conf.reconf_action == RECONF_REINIT) {
270 					session_stop(p, ERR_CEASE_ADMIN_RESET);
271 					if (!p->conf.down)
272 						timer_set(p, Timer_IdleHold, 0);
273 				}
274 
275 				/* deletion due? */
276 				if (p->conf.reconf_action == RECONF_DELETE) {
277 					if (p->demoted)
278 						session_demote(p, -1);
279 					p->conf.demote_group[0] = 0;
280 					session_stop(p, ERR_CEASE_PEER_UNCONF);
281 					log_peer_warnx(&p->conf, "removed");
282 					if (last != NULL)
283 						last->next = next;
284 					else
285 						peers = next;
286 					timer_remove_all(p);
287 					free(p);
288 					peer_cnt--;
289 					continue;
290 				}
291 				p->conf.reconf_action = RECONF_NONE;
292 				last = p;
293 			}
294 		}
295 
296 		if (peer_cnt > peer_l_elms) {
297 			if ((newp = reallocarray(peer_l, peer_cnt,
298 			    sizeof(struct peer *))) == NULL) {
299 				/* panic for now  */
300 				log_warn("could not resize peer_l from %u -> %u"
301 				    " entries", peer_l_elms, peer_cnt);
302 				fatalx("exiting");
303 			}
304 			peer_l = newp;
305 			peer_l_elms = peer_cnt;
306 		}
307 
308 		mrt_cnt = 0;
309 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
310 			xm = LIST_NEXT(m, entry);
311 			if (m->state == MRT_STATE_REMOVE) {
312 				mrt_clean(m);
313 				LIST_REMOVE(m, entry);
314 				free(m);
315 				continue;
316 			}
317 			if (m->wbuf.queued)
318 				mrt_cnt++;
319 		}
320 
321 		if (mrt_cnt > mrt_l_elms) {
322 			if ((newp = reallocarray(mrt_l, mrt_cnt,
323 			    sizeof(struct mrt *))) == NULL) {
324 				/* panic for now  */
325 				log_warn("could not resize mrt_l from %u -> %u"
326 				    " entries", mrt_l_elms, mrt_cnt);
327 				fatalx("exiting");
328 			}
329 			mrt_l = newp;
330 			mrt_l_elms = mrt_cnt;
331 		}
332 
333 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
334 		    ctl_cnt + mrt_cnt;
335 		if (new_cnt > pfd_elms) {
336 			if ((newp = reallocarray(pfd, new_cnt,
337 			    sizeof(struct pollfd))) == NULL) {
338 				/* panic for now  */
339 				log_warn("could not resize pfd from %u -> %u"
340 				    " entries", pfd_elms, new_cnt);
341 				fatalx("exiting");
342 			}
343 			pfd = newp;
344 			pfd_elms = new_cnt;
345 		}
346 
347 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
348 
349 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
350 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
351 
352 		ctl_queued = 0;
353 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry)
354 			ctl_queued += ctl_conn->ibuf.w.queued;
355 
356 		/*
357 		 * Do not act as unlimited buffer. Don't read in more
358 		 * messages if the ctl sockets are getting full.
359 		 */
360 		if (ctl_queued < SESSION_CTL_QUEUE_MAX)
361 			set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
362 
363 		if (pauseaccept == 0) {
364 			pfd[PFD_SOCK_CTL].fd = csock;
365 			pfd[PFD_SOCK_CTL].events = POLLIN;
366 			pfd[PFD_SOCK_RCTL].fd = rcsock;
367 			pfd[PFD_SOCK_RCTL].events = POLLIN;
368 		} else {
369 			pfd[PFD_SOCK_CTL].fd = -1;
370 			pfd[PFD_SOCK_RCTL].fd = -1;
371 		}
372 		pfd[PFD_SOCK_PFKEY].fd = pfkeysock;
373 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
374 
375 		i = PFD_LISTENERS_START;
376 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
377 			if (pauseaccept == 0) {
378 				pfd[i].fd = la->fd;
379 				pfd[i].events = POLLIN;
380 			} else
381 				pfd[i].fd = -1;
382 			i++;
383 		}
384 		idx_listeners = i;
385 		timeout = 240;	/* loop every 240s at least */
386 
387 		for (p = peers; p != NULL; p = p->next) {
388 			time_t	nextaction;
389 			struct peer_timer *pt;
390 
391 			/* check timers */
392 			if ((pt = timer_nextisdue(p)) != NULL) {
393 				switch (pt->type) {
394 				case Timer_Hold:
395 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
396 					break;
397 				case Timer_ConnectRetry:
398 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
399 					break;
400 				case Timer_Keepalive:
401 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
402 					break;
403 				case Timer_IdleHold:
404 					bgp_fsm(p, EVNT_START);
405 					break;
406 				case Timer_IdleHoldReset:
407 					p->IdleHoldTime /= 2;
408 					if (p->IdleHoldTime <=
409 					    INTERVAL_IDLE_HOLD_INITIAL) {
410 						p->IdleHoldTime =
411 						    INTERVAL_IDLE_HOLD_INITIAL;
412 						timer_stop(p,
413 						    Timer_IdleHoldReset);
414 						p->errcnt = 0;
415 					} else
416 						timer_set(p,
417 						    Timer_IdleHoldReset,
418 						    p->IdleHoldTime);
419 					break;
420 				case Timer_CarpUndemote:
421 					timer_stop(p, Timer_CarpUndemote);
422 					if (p->demoted &&
423 					    p->state == STATE_ESTABLISHED)
424 						session_demote(p, -1);
425 					break;
426 				case Timer_RestartTimeout:
427 					timer_stop(p, Timer_RestartTimeout);
428 					session_graceful_stop(p);
429 					break;
430 				default:
431 					fatalx("King Bula lost in time");
432 				}
433 			}
434 			if ((nextaction = timer_nextduein(p)) != -1 &&
435 			    nextaction < timeout)
436 				timeout = nextaction;
437 
438 			/* are we waiting for a write? */
439 			events = POLLIN;
440 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
441 				events |= POLLOUT;
442 			/* is there still work to do? */
443 			if (p->rbuf && p->rbuf->wpos)
444 				timeout = 0;
445 
446 			/* poll events */
447 			if (p->fd != -1 && events != 0) {
448 				pfd[i].fd = p->fd;
449 				pfd[i].events = events;
450 				peer_l[i - idx_listeners] = p;
451 				i++;
452 			}
453 		}
454 
455 		idx_peers = i;
456 
457 		LIST_FOREACH(m, &mrthead, entry)
458 			if (m->wbuf.queued) {
459 				pfd[i].fd = m->wbuf.fd;
460 				pfd[i].events = POLLOUT;
461 				mrt_l[i - idx_peers] = m;
462 				i++;
463 			}
464 
465 		idx_mrts = i;
466 
467 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
468 			pfd[i].fd = ctl_conn->ibuf.fd;
469 			pfd[i].events = POLLIN;
470 			if (ctl_conn->ibuf.w.queued > 0)
471 				pfd[i].events |= POLLOUT;
472 			i++;
473 		}
474 
475 		if (pauseaccept && timeout > 1)
476 			timeout = 1;
477 		if (timeout < 0)
478 			timeout = 0;
479 		if (poll(pfd, i, timeout * 1000) == -1)
480 			if (errno != EINTR)
481 				fatal("poll error");
482 
483 		/*
484 		 * If we previously saw fd exhaustion, we stop accept()
485 		 * for 1 second to throttle the accept() loop.
486 		 */
487 		if (pauseaccept && getmonotime() > pauseaccept + 1)
488 			pauseaccept = 0;
489 
490 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
491 			log_warnx("SE: Lost connection to parent");
492 			session_quit = 1;
493 			continue;
494 		} else
495 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
496 			    &listener_cnt);
497 
498 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
499 			log_warnx("SE: Lost connection to RDE");
500 			msgbuf_clear(&ibuf_rde->w);
501 			free(ibuf_rde);
502 			ibuf_rde = NULL;
503 		} else
504 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
505 			    &listener_cnt);
506 
507 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
508 		    -1) {
509 			log_warnx("SE: Lost connection to RDE control");
510 			msgbuf_clear(&ibuf_rde_ctl->w);
511 			free(ibuf_rde_ctl);
512 			ibuf_rde_ctl = NULL;
513 		} else
514 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
515 			    &listener_cnt);
516 
517 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
518 			ctl_cnt += control_accept(csock, 0);
519 
520 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
521 			ctl_cnt += control_accept(rcsock, 1);
522 
523 		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
524 			if (pfkey_read(pfkeysock, NULL) == -1) {
525 				log_warnx("pfkey_read failed, exiting...");
526 				session_quit = 1;
527 			}
528 		}
529 
530 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
531 			if (pfd[j].revents & POLLIN)
532 				session_accept(pfd[j].fd);
533 
534 		for (; j < idx_peers; j++)
535 			session_dispatch_msg(&pfd[j],
536 			    peer_l[j - idx_listeners]);
537 
538 		for (p = peers; p != NULL; p = p->next)
539 			if (p->rbuf && p->rbuf->wpos)
540 				session_process_msg(p);
541 
542 		for (; j < idx_mrts; j++)
543 			if (pfd[j].revents & POLLOUT)
544 				mrt_write(mrt_l[j - idx_peers]);
545 
546 		for (; j < i; j++)
547 			control_dispatch_msg(&pfd[j], &ctl_cnt);
548 	}
549 
550 	while ((p = peers) != NULL) {
551 		peers = p->next;
552 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
553 		pfkey_remove(p);
554 		free(p);
555 	}
556 
557 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
558 		mrt_clean(m);
559 		LIST_REMOVE(m, entry);
560 		free(m);
561 	}
562 
563 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
564 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
565 		free(la);
566 	}
567 	free(conf->listen_addrs);
568 	free(peer_l);
569 	free(mrt_l);
570 	free(pfd);
571 
572 	msgbuf_write(&ibuf_rde->w);
573 	msgbuf_clear(&ibuf_rde->w);
574 	free(ibuf_rde);
575 	msgbuf_write(&ibuf_main->w);
576 	msgbuf_clear(&ibuf_main->w);
577 	free(ibuf_main);
578 
579 	control_shutdown(csock);
580 	control_shutdown(rcsock);
581 	log_info("session engine exiting");
582 	_exit(0);
583 }
584 
585 void
586 init_conf(struct bgpd_config *c)
587 {
588 	if (!c->holdtime)
589 		c->holdtime = INTERVAL_HOLD;
590 	if (!c->connectretry)
591 		c->connectretry = INTERVAL_CONNECTRETRY;
592 }
593 
594 void
595 init_peer(struct peer *p)
596 {
597 	TAILQ_INIT(&p->timers);
598 	p->fd = p->wbuf.fd = -1;
599 
600 	if (p->conf.if_depend[0])
601 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
602 		    p->conf.if_depend, sizeof(p->conf.if_depend));
603 	else
604 		p->depend_ok = 1;
605 
606 	peer_cnt++;
607 
608 	change_state(p, STATE_IDLE, EVNT_NONE);
609 	if (p->conf.down)
610 		timer_stop(p, Timer_IdleHold);		/* no autostart */
611 	else
612 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
613 
614 	/*
615 	 * on startup, demote if requested.
616 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
617 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
618 	 */
619 	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
620 		session_demote(p, +1);
621 }
622 
623 void
624 bgp_fsm(struct peer *peer, enum session_events event)
625 {
626 	switch (peer->state) {
627 	case STATE_NONE:
628 		/* nothing */
629 		break;
630 	case STATE_IDLE:
631 		switch (event) {
632 		case EVNT_START:
633 			timer_stop(peer, Timer_Hold);
634 			timer_stop(peer, Timer_Keepalive);
635 			timer_stop(peer, Timer_IdleHold);
636 
637 			/* allocate read buffer */
638 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
639 			if (peer->rbuf == NULL)
640 				fatal(NULL);
641 
642 			/* init write buffer */
643 			msgbuf_init(&peer->wbuf);
644 
645 			/* init pfkey - remove old if any, load new ones */
646 			pfkey_remove(peer);
647 			if (pfkey_establish(peer) == -1) {
648 				log_peer_warnx(&peer->conf,
649 				    "pfkey setup failed");
650 				return;
651 			}
652 
653 			peer->stats.last_sent_errcode = 0;
654 			peer->stats.last_sent_suberr = 0;
655 
656 			if (!peer->depend_ok)
657 				timer_stop(peer, Timer_ConnectRetry);
658 			else if (peer->passive || peer->conf.passive ||
659 			    peer->conf.template) {
660 				change_state(peer, STATE_ACTIVE, event);
661 				timer_stop(peer, Timer_ConnectRetry);
662 			} else {
663 				change_state(peer, STATE_CONNECT, event);
664 				timer_set(peer, Timer_ConnectRetry,
665 				    conf->connectretry);
666 				session_connect(peer);
667 			}
668 			peer->passive = 0;
669 			break;
670 		default:
671 			/* ignore */
672 			break;
673 		}
674 		break;
675 	case STATE_CONNECT:
676 		switch (event) {
677 		case EVNT_START:
678 			/* ignore */
679 			break;
680 		case EVNT_CON_OPEN:
681 			session_tcp_established(peer);
682 			session_open(peer);
683 			timer_stop(peer, Timer_ConnectRetry);
684 			peer->holdtime = INTERVAL_HOLD_INITIAL;
685 			start_timer_holdtime(peer);
686 			change_state(peer, STATE_OPENSENT, event);
687 			break;
688 		case EVNT_CON_OPENFAIL:
689 			timer_set(peer, Timer_ConnectRetry,
690 			    conf->connectretry);
691 			session_close_connection(peer);
692 			change_state(peer, STATE_ACTIVE, event);
693 			break;
694 		case EVNT_TIMER_CONNRETRY:
695 			timer_set(peer, Timer_ConnectRetry,
696 			    conf->connectretry);
697 			session_connect(peer);
698 			break;
699 		default:
700 			change_state(peer, STATE_IDLE, event);
701 			break;
702 		}
703 		break;
704 	case STATE_ACTIVE:
705 		switch (event) {
706 		case EVNT_START:
707 			/* ignore */
708 			break;
709 		case EVNT_CON_OPEN:
710 			session_tcp_established(peer);
711 			session_open(peer);
712 			timer_stop(peer, Timer_ConnectRetry);
713 			peer->holdtime = INTERVAL_HOLD_INITIAL;
714 			start_timer_holdtime(peer);
715 			change_state(peer, STATE_OPENSENT, event);
716 			break;
717 		case EVNT_CON_OPENFAIL:
718 			timer_set(peer, Timer_ConnectRetry,
719 			    conf->connectretry);
720 			session_close_connection(peer);
721 			change_state(peer, STATE_ACTIVE, event);
722 			break;
723 		case EVNT_TIMER_CONNRETRY:
724 			timer_set(peer, Timer_ConnectRetry,
725 			    peer->holdtime);
726 			change_state(peer, STATE_CONNECT, event);
727 			session_connect(peer);
728 			break;
729 		default:
730 			change_state(peer, STATE_IDLE, event);
731 			break;
732 		}
733 		break;
734 	case STATE_OPENSENT:
735 		switch (event) {
736 		case EVNT_START:
737 			/* ignore */
738 			break;
739 		case EVNT_STOP:
740 			change_state(peer, STATE_IDLE, event);
741 			break;
742 		case EVNT_CON_CLOSED:
743 			session_close_connection(peer);
744 			timer_set(peer, Timer_ConnectRetry,
745 			    conf->connectretry);
746 			change_state(peer, STATE_ACTIVE, event);
747 			break;
748 		case EVNT_CON_FATAL:
749 			change_state(peer, STATE_IDLE, event);
750 			break;
751 		case EVNT_TIMER_HOLDTIME:
752 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
753 			    0, NULL, 0);
754 			change_state(peer, STATE_IDLE, event);
755 			break;
756 		case EVNT_RCVD_OPEN:
757 			/* parse_open calls change_state itself on failure */
758 			if (parse_open(peer))
759 				break;
760 			session_keepalive(peer);
761 			change_state(peer, STATE_OPENCONFIRM, event);
762 			break;
763 		case EVNT_RCVD_NOTIFICATION:
764 			if (parse_notification(peer)) {
765 				change_state(peer, STATE_IDLE, event);
766 				/* don't punish, capa negotiation */
767 				timer_set(peer, Timer_IdleHold, 0);
768 				peer->IdleHoldTime /= 2;
769 			} else
770 				change_state(peer, STATE_IDLE, event);
771 			break;
772 		default:
773 			session_notification(peer,
774 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
775 			change_state(peer, STATE_IDLE, event);
776 			break;
777 		}
778 		break;
779 	case STATE_OPENCONFIRM:
780 		switch (event) {
781 		case EVNT_START:
782 			/* ignore */
783 			break;
784 		case EVNT_STOP:
785 			change_state(peer, STATE_IDLE, event);
786 			break;
787 		case EVNT_CON_CLOSED:
788 		case EVNT_CON_FATAL:
789 			change_state(peer, STATE_IDLE, event);
790 			break;
791 		case EVNT_TIMER_HOLDTIME:
792 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
793 			    0, NULL, 0);
794 			change_state(peer, STATE_IDLE, event);
795 			break;
796 		case EVNT_TIMER_KEEPALIVE:
797 			session_keepalive(peer);
798 			break;
799 		case EVNT_RCVD_KEEPALIVE:
800 			start_timer_holdtime(peer);
801 			change_state(peer, STATE_ESTABLISHED, event);
802 			break;
803 		case EVNT_RCVD_NOTIFICATION:
804 			parse_notification(peer);
805 			change_state(peer, STATE_IDLE, event);
806 			break;
807 		default:
808 			session_notification(peer,
809 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
810 			change_state(peer, STATE_IDLE, event);
811 			break;
812 		}
813 		break;
814 	case STATE_ESTABLISHED:
815 		switch (event) {
816 		case EVNT_START:
817 			/* ignore */
818 			break;
819 		case EVNT_STOP:
820 			change_state(peer, STATE_IDLE, event);
821 			break;
822 		case EVNT_CON_CLOSED:
823 		case EVNT_CON_FATAL:
824 			change_state(peer, STATE_IDLE, event);
825 			break;
826 		case EVNT_TIMER_HOLDTIME:
827 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
828 			    0, NULL, 0);
829 			change_state(peer, STATE_IDLE, event);
830 			break;
831 		case EVNT_TIMER_KEEPALIVE:
832 			session_keepalive(peer);
833 			break;
834 		case EVNT_RCVD_KEEPALIVE:
835 			start_timer_holdtime(peer);
836 			break;
837 		case EVNT_RCVD_UPDATE:
838 			start_timer_holdtime(peer);
839 			if (parse_update(peer))
840 				change_state(peer, STATE_IDLE, event);
841 			else
842 				start_timer_holdtime(peer);
843 			break;
844 		case EVNT_RCVD_NOTIFICATION:
845 			parse_notification(peer);
846 			change_state(peer, STATE_IDLE, event);
847 			break;
848 		default:
849 			session_notification(peer,
850 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
851 			change_state(peer, STATE_IDLE, event);
852 			break;
853 		}
854 		break;
855 	}
856 }
857 
858 void
859 start_timer_holdtime(struct peer *peer)
860 {
861 	if (peer->holdtime > 0)
862 		timer_set(peer, Timer_Hold, peer->holdtime);
863 	else
864 		timer_stop(peer, Timer_Hold);
865 }
866 
867 void
868 start_timer_keepalive(struct peer *peer)
869 {
870 	if (peer->holdtime > 0)
871 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
872 	else
873 		timer_stop(peer, Timer_Keepalive);
874 }
875 
876 void
877 session_close_connection(struct peer *peer)
878 {
879 	if (peer->fd != -1) {
880 		close(peer->fd);
881 		pauseaccept = 0;
882 	}
883 	peer->fd = peer->wbuf.fd = -1;
884 }
885 
886 void
887 change_state(struct peer *peer, enum session_state state,
888     enum session_events event)
889 {
890 	struct mrt	*mrt;
891 
892 	switch (state) {
893 	case STATE_IDLE:
894 		/* carp demotion first. new peers handled in init_peer */
895 		if (peer->state == STATE_ESTABLISHED &&
896 		    peer->conf.demote_group[0] && !peer->demoted)
897 			session_demote(peer, +1);
898 
899 		/*
900 		 * try to write out what's buffered (maybe a notification),
901 		 * don't bother if it fails
902 		 */
903 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
904 			msgbuf_write(&peer->wbuf);
905 
906 		/*
907 		 * we must start the timer for the next EVNT_START
908 		 * if we are coming here due to an error and the
909 		 * session was not established successfully before, the
910 		 * starttimerinterval needs to be exponentially increased
911 		 */
912 		if (peer->IdleHoldTime == 0)
913 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
914 		peer->holdtime = INTERVAL_HOLD_INITIAL;
915 		timer_stop(peer, Timer_ConnectRetry);
916 		timer_stop(peer, Timer_Keepalive);
917 		timer_stop(peer, Timer_Hold);
918 		timer_stop(peer, Timer_IdleHold);
919 		timer_stop(peer, Timer_IdleHoldReset);
920 		session_close_connection(peer);
921 		msgbuf_clear(&peer->wbuf);
922 		free(peer->rbuf);
923 		peer->rbuf = NULL;
924 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
925 
926 		if (event != EVNT_STOP) {
927 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
928 			if (event != EVNT_NONE &&
929 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
930 				peer->IdleHoldTime *= 2;
931 		}
932 		if (peer->state == STATE_ESTABLISHED) {
933 			if (peer->capa.neg.grestart.restart == 2 &&
934 			    (event == EVNT_CON_CLOSED ||
935 			    event == EVNT_CON_FATAL)) {
936 				/* don't punish graceful restart */
937 				timer_set(peer, Timer_IdleHold, 0);
938 				peer->IdleHoldTime /= 2;
939 				session_graceful_restart(peer);
940 			} else
941 				session_down(peer);
942 		}
943 		if (peer->state == STATE_NONE ||
944 		    peer->state == STATE_ESTABLISHED) {
945 			/* initialize capability negotiation structures */
946 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
947 			    sizeof(peer->capa.ann));
948 			if (!peer->conf.announce_capa)
949 				session_capa_ann_none(peer);
950 		}
951 		break;
952 	case STATE_CONNECT:
953 		if (peer->state == STATE_ESTABLISHED &&
954 		    peer->capa.neg.grestart.restart == 2) {
955 			/* do the graceful restart dance */
956 			session_graceful_restart(peer);
957 			peer->holdtime = INTERVAL_HOLD_INITIAL;
958 			timer_stop(peer, Timer_ConnectRetry);
959 			timer_stop(peer, Timer_Keepalive);
960 			timer_stop(peer, Timer_Hold);
961 			timer_stop(peer, Timer_IdleHold);
962 			timer_stop(peer, Timer_IdleHoldReset);
963 			session_close_connection(peer);
964 			msgbuf_clear(&peer->wbuf);
965 			bzero(&peer->capa.peer, sizeof(peer->capa.peer));
966 		}
967 		break;
968 	case STATE_ACTIVE:
969 		break;
970 	case STATE_OPENSENT:
971 		break;
972 	case STATE_OPENCONFIRM:
973 		break;
974 	case STATE_ESTABLISHED:
975 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
976 		if (peer->demoted)
977 			timer_set(peer, Timer_CarpUndemote,
978 			    INTERVAL_HOLD_DEMOTED);
979 		session_up(peer);
980 		break;
981 	default:		/* something seriously fucked */
982 		break;
983 	}
984 
985 	log_statechange(peer, state, event);
986 	LIST_FOREACH(mrt, &mrthead, entry) {
987 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
988 			continue;
989 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
990 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
991 		    mrt->group_id == peer->conf.groupid))
992 			mrt_dump_state(mrt, peer->state, state, peer);
993 	}
994 	peer->prev_state = peer->state;
995 	peer->state = state;
996 }
997 
998 void
999 session_accept(int listenfd)
1000 {
1001 	int			 connfd;
1002 	int			 opt;
1003 	socklen_t		 len;
1004 	struct sockaddr_storage	 cliaddr;
1005 	struct peer		*p = NULL;
1006 
1007 	len = sizeof(cliaddr);
1008 	if ((connfd = accept4(listenfd,
1009 	    (struct sockaddr *)&cliaddr, &len,
1010 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
1011 		if (errno == ENFILE || errno == EMFILE)
1012 			pauseaccept = getmonotime();
1013 		else if (errno != EWOULDBLOCK && errno != EINTR &&
1014 		    errno != ECONNABORTED)
1015 			log_warn("accept");
1016 		return;
1017 	}
1018 
1019 	p = getpeerbyip((struct sockaddr *)&cliaddr);
1020 
1021 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1022 		if (timer_running(p, Timer_IdleHold, NULL)) {
1023 			/* fast reconnect after clear */
1024 			p->passive = 1;
1025 			bgp_fsm(p, EVNT_START);
1026 		}
1027 	}
1028 
1029 	if (p != NULL &&
1030 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1031 		if (p->fd != -1) {
1032 			if (p->state == STATE_CONNECT)
1033 				session_close_connection(p);
1034 			else {
1035 				close(connfd);
1036 				return;
1037 			}
1038 		}
1039 
1040 open:
1041 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1042 			log_peer_warnx(&p->conf,
1043 			    "ipsec or md5sig configured but not available");
1044 			close(connfd);
1045 			return;
1046 		}
1047 
1048 		if (p->conf.auth.method == AUTH_MD5SIG) {
1049 			if (sysdep.no_md5sig) {
1050 				log_peer_warnx(&p->conf,
1051 				    "md5sig configured but not available");
1052 				close(connfd);
1053 				return;
1054 			}
1055 			len = sizeof(opt);
1056 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1057 			    &opt, &len) == -1)
1058 				fatal("getsockopt TCP_MD5SIG");
1059 			if (!opt) {	/* non-md5'd connection! */
1060 				log_peer_warnx(&p->conf,
1061 				    "connection attempt without md5 signature");
1062 				close(connfd);
1063 				return;
1064 			}
1065 		}
1066 		p->fd = p->wbuf.fd = connfd;
1067 		if (session_setup_socket(p)) {
1068 			close(connfd);
1069 			return;
1070 		}
1071 		bgp_fsm(p, EVNT_CON_OPEN);
1072 		return;
1073 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1074 	    p->capa.neg.grestart.restart == 2) {
1075 		/* first do the graceful restart dance */
1076 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1077 		/* then do part of the open dance */
1078 		goto open;
1079 	} else {
1080 		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1081 		close(connfd);
1082 	}
1083 }
1084 
1085 int
1086 session_connect(struct peer *peer)
1087 {
1088 	int			 opt = 1;
1089 	struct sockaddr		*sa;
1090 
1091 	/*
1092 	 * we do not need the overcomplicated collision detection RFC 1771
1093 	 * describes; we simply make sure there is only ever one concurrent
1094 	 * tcp connection per peer.
1095 	 */
1096 	if (peer->fd != -1)
1097 		return (-1);
1098 
1099 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1100 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1101 		log_peer_warn(&peer->conf, "session_connect socket");
1102 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1103 		return (-1);
1104 	}
1105 
1106 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1107 		log_peer_warnx(&peer->conf,
1108 		    "ipsec or md5sig configured but not available");
1109 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1110 		return (-1);
1111 	}
1112 
1113 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1114 		if (sysdep.no_md5sig) {
1115 			log_peer_warnx(&peer->conf,
1116 			    "md5sig configured but not available");
1117 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1118 			return (-1);
1119 		}
1120 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1121 		    &opt, sizeof(opt)) == -1) {
1122 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1123 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1124 			return (-1);
1125 		}
1126 	}
1127 	peer->wbuf.fd = peer->fd;
1128 
1129 	/* if update source is set we need to bind() */
1130 	if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) {
1131 		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1132 			log_peer_warn(&peer->conf, "session_connect bind");
1133 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1134 			return (-1);
1135 		}
1136 	}
1137 
1138 	if (session_setup_socket(peer)) {
1139 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1140 		return (-1);
1141 	}
1142 
1143 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1144 	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1145 		if (errno != EINPROGRESS) {
1146 			if (errno != peer->lasterr)
1147 				log_peer_warn(&peer->conf, "connect");
1148 			peer->lasterr = errno;
1149 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1150 			return (-1);
1151 		}
1152 	} else
1153 		bgp_fsm(peer, EVNT_CON_OPEN);
1154 
1155 	return (0);
1156 }
1157 
1158 int
1159 session_setup_socket(struct peer *p)
1160 {
1161 	int	ttl = p->conf.distance;
1162 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1163 	int	nodelay = 1;
1164 	int	bsize;
1165 
1166 	switch (p->conf.remote_addr.aid) {
1167 	case AID_INET:
1168 		/* set precedence, see RFC 1771 appendix 5 */
1169 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1170 		    -1) {
1171 			log_peer_warn(&p->conf,
1172 			    "session_setup_socket setsockopt TOS");
1173 			return (-1);
1174 		}
1175 
1176 		if (p->conf.ebgp) {
1177 			/* set TTL to foreign router's distance
1178 			   1=direct n=multihop with ttlsec, we always use 255 */
1179 			if (p->conf.ttlsec) {
1180 				ttl = 256 - p->conf.distance;
1181 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1182 				    &ttl, sizeof(ttl)) == -1) {
1183 					log_peer_warn(&p->conf,
1184 					    "session_setup_socket: "
1185 					    "setsockopt MINTTL");
1186 					return (-1);
1187 				}
1188 				ttl = 255;
1189 			}
1190 
1191 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1192 			    sizeof(ttl)) == -1) {
1193 				log_peer_warn(&p->conf,
1194 				    "session_setup_socket setsockopt TTL");
1195 				return (-1);
1196 			}
1197 		}
1198 		break;
1199 	case AID_INET6:
1200 		if (p->conf.ebgp) {
1201 			/* set hoplimit to foreign router's distance
1202 			   1=direct n=multihop with ttlsec, we always use 255 */
1203 			if (p->conf.ttlsec) {
1204 				ttl = 256 - p->conf.distance;
1205 				if (setsockopt(p->fd, IPPROTO_IPV6,
1206 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1207 				    == -1) {
1208 					log_peer_warn(&p->conf,
1209 					    "session_setup_socket: "
1210 					    "setsockopt MINHOPCOUNT");
1211 					return (-1);
1212 				}
1213 				ttl = 255;
1214 			}
1215 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1216 			    &ttl, sizeof(ttl)) == -1) {
1217 				log_peer_warn(&p->conf,
1218 				    "session_setup_socket setsockopt hoplimit");
1219 				return (-1);
1220 			}
1221 		}
1222 		break;
1223 	}
1224 
1225 	/* set TCP_NODELAY */
1226 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1227 	    sizeof(nodelay)) == -1) {
1228 		log_peer_warn(&p->conf,
1229 		    "session_setup_socket setsockopt TCP_NODELAY");
1230 		return (-1);
1231 	}
1232 
1233 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1234 	if (p->conf.auth.method != AUTH_NONE) {
1235 		/* try to increase bufsize. no biggie if it fails */
1236 		bsize = 65535;
1237 		while (bsize > 8192 &&
1238 		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1239 		    sizeof(bsize)) == -1 && errno != EINVAL)
1240 			bsize /= 2;
1241 		bsize = 65535;
1242 		while (bsize > 8192 &&
1243 		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1244 		    sizeof(bsize)) == -1 && errno != EINVAL)
1245 			bsize /= 2;
1246 	}
1247 
1248 	return (0);
1249 }
1250 
1251 void
1252 session_tcp_established(struct peer *peer)
1253 {
1254 	socklen_t	len;
1255 
1256 	len = sizeof(peer->sa_local);
1257 	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1258 	    &len) == -1)
1259 		log_warn("getsockname");
1260 	len = sizeof(peer->sa_remote);
1261 	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1262 	    &len) == -1)
1263 		log_warn("getpeername");
1264 }
1265 
1266 void
1267 session_capa_ann_none(struct peer *peer)
1268 {
1269 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1270 }
1271 
1272 int
1273 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
1274 {
1275 	int errs = 0;
1276 
1277 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1278 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1279 	return (errs);
1280 }
1281 
1282 int
1283 session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
1284 {
1285 	u_int8_t		 safi, pad = 0;
1286 	u_int16_t		 afi;
1287 	int			 errs = 0;
1288 
1289 	if (aid2afi(aid, &afi, &safi) == -1)
1290 		fatalx("session_capa_add_mp: bad afi/safi pair");
1291 	afi = htons(afi);
1292 	errs += ibuf_add(buf, &afi, sizeof(afi));
1293 	errs += ibuf_add(buf, &pad, sizeof(pad));
1294 	errs += ibuf_add(buf, &safi, sizeof(safi));
1295 
1296 	return (errs);
1297 }
1298 
1299 int
1300 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid)
1301 {
1302 	u_int		errs = 0;
1303 	u_int16_t	afi;
1304 	u_int8_t	flags, safi;
1305 
1306 	if (aid2afi(aid, &afi, &safi)) {
1307 		log_warn("session_capa_add_gr: bad AID");
1308 		return (1);
1309 	}
1310 	if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING)
1311 		flags = CAPA_GR_F_FLAG;
1312 	else
1313 		flags = 0;
1314 
1315 	afi = htons(afi);
1316 	errs += ibuf_add(b, &afi, sizeof(afi));
1317 	errs += ibuf_add(b, &safi, sizeof(safi));
1318 	errs += ibuf_add(b, &flags, sizeof(flags));
1319 
1320 	return (errs);
1321 }
1322 
1323 struct bgp_msg *
1324 session_newmsg(enum msg_type msgtype, u_int16_t len)
1325 {
1326 	struct bgp_msg		*msg;
1327 	struct msg_header	 hdr;
1328 	struct ibuf		*buf;
1329 	int			 errs = 0;
1330 
1331 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1332 	hdr.len = htons(len);
1333 	hdr.type = msgtype;
1334 
1335 	if ((buf = ibuf_open(len)) == NULL)
1336 		return (NULL);
1337 
1338 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1339 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1340 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1341 
1342 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1343 		ibuf_free(buf);
1344 		return (NULL);
1345 	}
1346 
1347 	msg->buf = buf;
1348 	msg->type = msgtype;
1349 	msg->len = len;
1350 
1351 	return (msg);
1352 }
1353 
1354 int
1355 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1356 {
1357 	struct mrt		*mrt;
1358 
1359 	LIST_FOREACH(mrt, &mrthead, entry) {
1360 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1361 		    mrt->type == MRT_UPDATE_OUT)))
1362 			continue;
1363 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1364 		    mrt->peer_id == p->conf.id || (mrt->group_id == 0 &&
1365 		    mrt->group_id == p->conf.groupid))
1366 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1367 	}
1368 
1369 	ibuf_close(&p->wbuf, msg->buf);
1370 	free(msg);
1371 	return (0);
1372 }
1373 
1374 void
1375 session_open(struct peer *p)
1376 {
1377 	struct bgp_msg		*buf;
1378 	struct ibuf		*opb;
1379 	struct msg_open		 msg;
1380 	u_int16_t		 len;
1381 	u_int8_t		 i, op_type, optparamlen = 0;
1382 	int			 errs = 0;
1383 	int			 mpcapa = 0;
1384 
1385 
1386 	if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1387 	    sizeof(optparamlen))) == NULL) {
1388 		bgp_fsm(p, EVNT_CON_FATAL);
1389 		return;
1390 	}
1391 
1392 	/* multiprotocol extensions, RFC 4760 */
1393 	for (i = 0; i < AID_MAX; i++)
1394 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1395 			errs += session_capa_add(opb, CAPA_MP, 4);
1396 			errs += session_capa_add_mp(opb, i);
1397 			mpcapa++;
1398 		}
1399 
1400 	/* route refresh, RFC 2918 */
1401 	if (p->capa.ann.refresh)	/* no data */
1402 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1403 
1404 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1405 	if (p->capa.ann.grestart.restart) {
1406 		int		rst = 0;
1407 		u_int16_t	hdr;
1408 		u_int8_t	grlen;
1409 
1410 		if (mpcapa) {
1411 			grlen = 2 + 4 * mpcapa;
1412 			for (i = 0; i < AID_MAX; i++) {
1413 				if (p->capa.neg.grestart.flags[i] &
1414 				    CAPA_GR_RESTARTING)
1415 					rst++;
1416 			}
1417 		} else {	/* AID_INET */
1418 			grlen = 2 + 4;
1419 			if (p->capa.neg.grestart.flags[AID_INET] &
1420 			    CAPA_GR_RESTARTING)
1421 				rst++;
1422 		}
1423 
1424 		hdr = conf->holdtime;		/* default timeout */
1425 		/* if client does graceful restart don't set R flag */
1426 		if (!rst)
1427 			hdr |= CAPA_GR_R_FLAG;
1428 		hdr = htons(hdr);
1429 
1430 		errs += session_capa_add(opb, CAPA_RESTART, grlen);
1431 		errs += ibuf_add(opb, &hdr, sizeof(hdr));
1432 
1433 		if (mpcapa) {
1434 			for (i = 0; i < AID_MAX; i++) {
1435 				if (p->capa.ann.mp[i]) {
1436 					errs += session_capa_add_gr(p, opb, i);
1437 				}
1438 			}
1439 		} else {	/* AID_INET */
1440 			errs += session_capa_add_gr(p, opb, AID_INET);
1441 		}
1442 	}
1443 
1444 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1445 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1446 		u_int32_t	nas;
1447 
1448 		nas = htonl(conf->as);
1449 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1450 		errs += ibuf_add(opb, &nas, sizeof(nas));
1451 	}
1452 
1453 	if (ibuf_size(opb))
1454 		optparamlen = ibuf_size(opb) + sizeof(op_type) +
1455 		    sizeof(optparamlen);
1456 
1457 	len = MSGSIZE_OPEN_MIN + optparamlen;
1458 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1459 		ibuf_free(opb);
1460 		bgp_fsm(p, EVNT_CON_FATAL);
1461 		return;
1462 	}
1463 
1464 	msg.version = 4;
1465 	msg.myas = htons(conf->short_as);
1466 	if (p->conf.holdtime)
1467 		msg.holdtime = htons(p->conf.holdtime);
1468 	else
1469 		msg.holdtime = htons(conf->holdtime);
1470 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1471 	msg.optparamlen = optparamlen;
1472 
1473 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1474 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1475 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1476 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1477 	errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1478 
1479 	if (optparamlen) {
1480 		op_type = OPT_PARAM_CAPABILITIES;
1481 		optparamlen = ibuf_size(opb);
1482 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1483 		errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1484 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1485 	}
1486 
1487 	ibuf_free(opb);
1488 
1489 	if (errs) {
1490 		ibuf_free(buf->buf);
1491 		free(buf);
1492 		bgp_fsm(p, EVNT_CON_FATAL);
1493 		return;
1494 	}
1495 
1496 	if (session_sendmsg(buf, p) == -1) {
1497 		bgp_fsm(p, EVNT_CON_FATAL);
1498 		return;
1499 	}
1500 
1501 	p->stats.msg_sent_open++;
1502 }
1503 
1504 void
1505 session_keepalive(struct peer *p)
1506 {
1507 	struct bgp_msg		*buf;
1508 
1509 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1510 	    session_sendmsg(buf, p) == -1) {
1511 		bgp_fsm(p, EVNT_CON_FATAL);
1512 		return;
1513 	}
1514 
1515 	start_timer_keepalive(p);
1516 	p->stats.msg_sent_keepalive++;
1517 }
1518 
1519 void
1520 session_update(u_int32_t peerid, void *data, size_t datalen)
1521 {
1522 	struct peer		*p;
1523 	struct bgp_msg		*buf;
1524 
1525 	if ((p = getpeerbyid(peerid)) == NULL) {
1526 		log_warnx("no such peer: id=%u", peerid);
1527 		return;
1528 	}
1529 
1530 	if (p->state != STATE_ESTABLISHED)
1531 		return;
1532 
1533 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1534 		bgp_fsm(p, EVNT_CON_FATAL);
1535 		return;
1536 	}
1537 
1538 	if (ibuf_add(buf->buf, data, datalen)) {
1539 		ibuf_free(buf->buf);
1540 		free(buf);
1541 		bgp_fsm(p, EVNT_CON_FATAL);
1542 		return;
1543 	}
1544 
1545 	if (session_sendmsg(buf, p) == -1) {
1546 		bgp_fsm(p, EVNT_CON_FATAL);
1547 		return;
1548 	}
1549 
1550 	start_timer_keepalive(p);
1551 	p->stats.msg_sent_update++;
1552 }
1553 
1554 void
1555 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1556     void *data, ssize_t datalen)
1557 {
1558 	struct bgp_msg		*buf;
1559 	int			 errs = 0;
1560 
1561 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1562 		return;
1563 
1564 	log_notification(p, errcode, subcode, data, datalen, "sending");
1565 
1566 	if ((buf = session_newmsg(NOTIFICATION,
1567 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1568 		bgp_fsm(p, EVNT_CON_FATAL);
1569 		return;
1570 	}
1571 
1572 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1573 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1574 
1575 	if (datalen > 0)
1576 		errs += ibuf_add(buf->buf, data, datalen);
1577 
1578 	if (errs) {
1579 		ibuf_free(buf->buf);
1580 		free(buf);
1581 		bgp_fsm(p, EVNT_CON_FATAL);
1582 		return;
1583 	}
1584 
1585 	if (session_sendmsg(buf, p) == -1) {
1586 		bgp_fsm(p, EVNT_CON_FATAL);
1587 		return;
1588 	}
1589 
1590 	p->stats.msg_sent_notification++;
1591 	p->stats.last_sent_errcode = errcode;
1592 	p->stats.last_sent_suberr = subcode;
1593 }
1594 
1595 int
1596 session_neighbor_rrefresh(struct peer *p)
1597 {
1598 	u_int8_t	i;
1599 
1600 	if (!p->capa.peer.refresh)
1601 		return (-1);
1602 
1603 	for (i = 0; i < AID_MAX; i++) {
1604 		if (p->capa.peer.mp[i] != 0)
1605 			session_rrefresh(p, i);
1606 	}
1607 
1608 	return (0);
1609 }
1610 
1611 void
1612 session_rrefresh(struct peer *p, u_int8_t aid)
1613 {
1614 	struct bgp_msg		*buf;
1615 	int			 errs = 0;
1616 	u_int16_t		 afi;
1617 	u_int8_t		 safi, null8 = 0;
1618 
1619 	if (aid2afi(aid, &afi, &safi) == -1)
1620 		fatalx("session_rrefresh: bad afi/safi pair");
1621 
1622 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1623 		bgp_fsm(p, EVNT_CON_FATAL);
1624 		return;
1625 	}
1626 
1627 	afi = htons(afi);
1628 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1629 	errs += ibuf_add(buf->buf, &null8, sizeof(null8));
1630 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1631 
1632 	if (errs) {
1633 		ibuf_free(buf->buf);
1634 		free(buf);
1635 		bgp_fsm(p, EVNT_CON_FATAL);
1636 		return;
1637 	}
1638 
1639 	if (session_sendmsg(buf, p) == -1) {
1640 		bgp_fsm(p, EVNT_CON_FATAL);
1641 		return;
1642 	}
1643 
1644 	p->stats.msg_sent_rrefresh++;
1645 }
1646 
1647 int
1648 session_graceful_restart(struct peer *p)
1649 {
1650 	u_int8_t	i;
1651 
1652 	timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout);
1653 
1654 	for (i = 0; i < AID_MAX; i++) {
1655 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1656 			if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE,
1657 			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1658 				return (-1);
1659 			log_peer_warnx(&p->conf,
1660 			    "graceful restart of %s, keeping routes",
1661 			    aid2str(i));
1662 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1663 		} else if (p->capa.neg.mp[i]) {
1664 			if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
1665 			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1666 				return (-1);
1667 			log_peer_warnx(&p->conf,
1668 			    "graceful restart of %s, flushing routes",
1669 			    aid2str(i));
1670 		}
1671 	}
1672 	return (0);
1673 }
1674 
1675 int
1676 session_graceful_stop(struct peer *p)
1677 {
1678 	u_int8_t	i;
1679 
1680 	for (i = 0; i < AID_MAX; i++) {
1681 		/*
1682 		 * Only flush if the peer is restarting and the timeout fired.
1683 		 * In all other cases the session was already flushed when the
1684 		 * session went down or when the new open message was parsed.
1685 		 */
1686 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1687 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1688 			    "time-out, flushing", aid2str(i));
1689 			if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
1690 			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1691 				return (-1);
1692 		}
1693 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1694 	}
1695 	return (0);
1696 }
1697 
1698 int
1699 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1700 {
1701 	ssize_t		n;
1702 	socklen_t	len;
1703 	int		error;
1704 
1705 	if (p->state == STATE_CONNECT) {
1706 		if (pfd->revents & POLLOUT) {
1707 			if (pfd->revents & POLLIN) {
1708 				/* error occurred */
1709 				len = sizeof(error);
1710 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1711 				    &error, &len) == -1 || error) {
1712 					if (error)
1713 						errno = error;
1714 					if (errno != p->lasterr) {
1715 						log_peer_warn(&p->conf,
1716 						    "socket error");
1717 						p->lasterr = errno;
1718 					}
1719 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1720 					return (1);
1721 				}
1722 			}
1723 			bgp_fsm(p, EVNT_CON_OPEN);
1724 			return (1);
1725 		}
1726 		if (pfd->revents & POLLHUP) {
1727 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1728 			return (1);
1729 		}
1730 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1731 			bgp_fsm(p, EVNT_CON_FATAL);
1732 			return (1);
1733 		}
1734 		return (0);
1735 	}
1736 
1737 	if (pfd->revents & POLLHUP) {
1738 		bgp_fsm(p, EVNT_CON_CLOSED);
1739 		return (1);
1740 	}
1741 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1742 		bgp_fsm(p, EVNT_CON_FATAL);
1743 		return (1);
1744 	}
1745 
1746 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1747 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1748 			if (error == 0)
1749 				log_peer_warnx(&p->conf, "Connection closed");
1750 			else if (error == -1)
1751 				log_peer_warn(&p->conf, "write error");
1752 			bgp_fsm(p, EVNT_CON_FATAL);
1753 			return (1);
1754 		}
1755 		if (!(pfd->revents & POLLIN))
1756 			return (1);
1757 	}
1758 
1759 	if (p->rbuf && pfd->revents & POLLIN) {
1760 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1761 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1762 			if (errno != EINTR && errno != EAGAIN) {
1763 				log_peer_warn(&p->conf, "read error");
1764 				bgp_fsm(p, EVNT_CON_FATAL);
1765 			}
1766 			return (1);
1767 		}
1768 		if (n == 0) {	/* connection closed */
1769 			bgp_fsm(p, EVNT_CON_CLOSED);
1770 			return (1);
1771 		}
1772 
1773 		p->rbuf->wpos += n;
1774 		p->stats.last_read = time(NULL);
1775 		return (1);
1776 	}
1777 	return (0);
1778 }
1779 
1780 int
1781 session_process_msg(struct peer *p)
1782 {
1783 	ssize_t		rpos, av, left;
1784 	int		processed = 0;
1785 	u_int16_t	msglen;
1786 	u_int8_t	msgtype;
1787 
1788 	rpos = 0;
1789 	av = p->rbuf->wpos;
1790 
1791 	/*
1792 	 * session might drop to IDLE -> buffers deallocated
1793 	 * we MUST check rbuf != NULL before use
1794 	 */
1795 	for (;;) {
1796 		if (rpos + MSGSIZE_HEADER > av)
1797 			break;
1798 		if (p->rbuf == NULL)
1799 			break;
1800 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1801 		    &msgtype) == -1)
1802 			return (0);
1803 		if (rpos + msglen > av)
1804 			break;
1805 		p->rbuf->rptr = p->rbuf->buf + rpos;
1806 
1807 		switch (msgtype) {
1808 		case OPEN:
1809 			bgp_fsm(p, EVNT_RCVD_OPEN);
1810 			p->stats.msg_rcvd_open++;
1811 			break;
1812 		case UPDATE:
1813 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1814 			p->stats.msg_rcvd_update++;
1815 			break;
1816 		case NOTIFICATION:
1817 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1818 			p->stats.msg_rcvd_notification++;
1819 			break;
1820 		case KEEPALIVE:
1821 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1822 			p->stats.msg_rcvd_keepalive++;
1823 			break;
1824 		case RREFRESH:
1825 			parse_refresh(p);
1826 			p->stats.msg_rcvd_rrefresh++;
1827 			break;
1828 		default:	/* cannot happen */
1829 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
1830 			    &msgtype, 1);
1831 			log_warnx("received message with unknown type %u",
1832 			    msgtype);
1833 			bgp_fsm(p, EVNT_CON_FATAL);
1834 		}
1835 		rpos += msglen;
1836 		if (++processed > MSG_PROCESS_LIMIT)
1837 			break;
1838 	}
1839 	if (p->rbuf == NULL)
1840 		return (1);
1841 
1842 	if (rpos < av) {
1843 		left = av - rpos;
1844 		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1845 		p->rbuf->wpos = left;
1846 	} else
1847 		p->rbuf->wpos = 0;
1848 
1849 	return (1);
1850 }
1851 
1852 int
1853 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1854 {
1855 	struct mrt		*mrt;
1856 	u_char			*p;
1857 	u_int16_t		 olen;
1858 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1859 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1860 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1861 
1862 	/* caller MUST make sure we are getting 19 bytes! */
1863 	p = data;
1864 	if (memcmp(p, marker, sizeof(marker))) {
1865 		log_peer_warnx(&peer->conf, "sync error");
1866 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1867 		bgp_fsm(peer, EVNT_CON_FATAL);
1868 		return (-1);
1869 	}
1870 	p += MSGSIZE_HEADER_MARKER;
1871 
1872 	memcpy(&olen, p, 2);
1873 	*len = ntohs(olen);
1874 	p += 2;
1875 	memcpy(type, p, 1);
1876 
1877 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1878 		log_peer_warnx(&peer->conf,
1879 		    "received message: illegal length: %u byte", *len);
1880 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1881 		    &olen, sizeof(olen));
1882 		bgp_fsm(peer, EVNT_CON_FATAL);
1883 		return (-1);
1884 	}
1885 
1886 	switch (*type) {
1887 	case OPEN:
1888 		if (*len < MSGSIZE_OPEN_MIN) {
1889 			log_peer_warnx(&peer->conf,
1890 			    "received OPEN: illegal len: %u byte", *len);
1891 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1892 			    &olen, sizeof(olen));
1893 			bgp_fsm(peer, EVNT_CON_FATAL);
1894 			return (-1);
1895 		}
1896 		break;
1897 	case NOTIFICATION:
1898 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1899 			log_peer_warnx(&peer->conf,
1900 			    "received NOTIFICATION: illegal len: %u byte",
1901 			    *len);
1902 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1903 			    &olen, sizeof(olen));
1904 			bgp_fsm(peer, EVNT_CON_FATAL);
1905 			return (-1);
1906 		}
1907 		break;
1908 	case UPDATE:
1909 		if (*len < MSGSIZE_UPDATE_MIN) {
1910 			log_peer_warnx(&peer->conf,
1911 			    "received UPDATE: illegal len: %u byte", *len);
1912 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1913 			    &olen, sizeof(olen));
1914 			bgp_fsm(peer, EVNT_CON_FATAL);
1915 			return (-1);
1916 		}
1917 		break;
1918 	case KEEPALIVE:
1919 		if (*len != MSGSIZE_KEEPALIVE) {
1920 			log_peer_warnx(&peer->conf,
1921 			    "received KEEPALIVE: illegal len: %u byte", *len);
1922 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1923 			    &olen, sizeof(olen));
1924 			bgp_fsm(peer, EVNT_CON_FATAL);
1925 			return (-1);
1926 		}
1927 		break;
1928 	case RREFRESH:
1929 		if (*len != MSGSIZE_RREFRESH) {
1930 			log_peer_warnx(&peer->conf,
1931 			    "received RREFRESH: illegal len: %u byte", *len);
1932 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1933 			    &olen, sizeof(olen));
1934 			bgp_fsm(peer, EVNT_CON_FATAL);
1935 			return (-1);
1936 		}
1937 		break;
1938 	default:
1939 		log_peer_warnx(&peer->conf,
1940 		    "received msg with unknown type %u", *type);
1941 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1942 		    type, 1);
1943 		bgp_fsm(peer, EVNT_CON_FATAL);
1944 		return (-1);
1945 	}
1946 	LIST_FOREACH(mrt, &mrthead, entry) {
1947 		if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE &&
1948 		    mrt->type == MRT_UPDATE_IN)))
1949 			continue;
1950 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1951 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1952 		    mrt->group_id == peer->conf.groupid))
1953 			mrt_dump_bgp_msg(mrt, data, *len, peer);
1954 	}
1955 	return (0);
1956 }
1957 
1958 int
1959 parse_open(struct peer *peer)
1960 {
1961 	u_char		*p, *op_val;
1962 	u_int8_t	 version, rversion;
1963 	u_int16_t	 short_as, msglen;
1964 	u_int16_t	 holdtime, oholdtime, myholdtime;
1965 	u_int32_t	 as, bgpid;
1966 	u_int8_t	 optparamlen, plen;
1967 	u_int8_t	 op_type, op_len;
1968 
1969 	p = peer->rbuf->rptr;
1970 	p += MSGSIZE_HEADER_MARKER;
1971 	memcpy(&msglen, p, sizeof(msglen));
1972 	msglen = ntohs(msglen);
1973 
1974 	p = peer->rbuf->rptr;
1975 	p += MSGSIZE_HEADER;	/* header is already checked */
1976 
1977 	memcpy(&version, p, sizeof(version));
1978 	p += sizeof(version);
1979 
1980 	if (version != BGP_VERSION) {
1981 		log_peer_warnx(&peer->conf,
1982 		    "peer wants unrecognized version %u", version);
1983 		if (version > BGP_VERSION)
1984 			rversion = version - BGP_VERSION;
1985 		else
1986 			rversion = BGP_VERSION;
1987 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
1988 		    &rversion, sizeof(rversion));
1989 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1990 		return (-1);
1991 	}
1992 
1993 	memcpy(&short_as, p, sizeof(short_as));
1994 	p += sizeof(short_as);
1995 	as = peer->short_as = ntohs(short_as);
1996 
1997 	memcpy(&oholdtime, p, sizeof(oholdtime));
1998 	p += sizeof(oholdtime);
1999 
2000 	holdtime = ntohs(oholdtime);
2001 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2002 		log_peer_warnx(&peer->conf,
2003 		    "peer requests unacceptable holdtime %u", holdtime);
2004 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
2005 		    NULL, 0);
2006 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2007 		return (-1);
2008 	}
2009 
2010 	myholdtime = peer->conf.holdtime;
2011 	if (!myholdtime)
2012 		myholdtime = conf->holdtime;
2013 	if (holdtime < myholdtime)
2014 		peer->holdtime = holdtime;
2015 	else
2016 		peer->holdtime = myholdtime;
2017 
2018 	memcpy(&bgpid, p, sizeof(bgpid));
2019 	p += sizeof(bgpid);
2020 
2021 	/* check bgpid for validity - just disallow 0 */
2022 	if (ntohl(bgpid) == 0) {
2023 		log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable",
2024 		    ntohl(bgpid));
2025 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2026 		    NULL, 0);
2027 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2028 		return (-1);
2029 	}
2030 	peer->remote_bgpid = bgpid;
2031 
2032 	memcpy(&optparamlen, p, sizeof(optparamlen));
2033 	p += sizeof(optparamlen);
2034 
2035 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
2036 			log_peer_warnx(&peer->conf,
2037 			    "corrupt OPEN message received: length mismatch");
2038 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2039 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2040 			return (-1);
2041 	}
2042 
2043 	plen = optparamlen;
2044 	while (plen > 0) {
2045 		if (plen < 2) {
2046 			log_peer_warnx(&peer->conf,
2047 			    "corrupt OPEN message received, len wrong");
2048 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2049 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2050 			return (-1);
2051 		}
2052 		memcpy(&op_type, p, sizeof(op_type));
2053 		p += sizeof(op_type);
2054 		plen -= sizeof(op_type);
2055 		memcpy(&op_len, p, sizeof(op_len));
2056 		p += sizeof(op_len);
2057 		plen -= sizeof(op_len);
2058 		if (op_len > 0) {
2059 			if (plen < op_len) {
2060 				log_peer_warnx(&peer->conf,
2061 				    "corrupt OPEN message received, len wrong");
2062 				session_notification(peer, ERR_OPEN, 0,
2063 				    NULL, 0);
2064 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2065 				return (-1);
2066 			}
2067 			op_val = p;
2068 			p += op_len;
2069 			plen -= op_len;
2070 		} else
2071 			op_val = NULL;
2072 
2073 		switch (op_type) {
2074 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2075 			if (parse_capabilities(peer, op_val, op_len,
2076 			    &as) == -1) {
2077 				session_notification(peer, ERR_OPEN, 0,
2078 				    NULL, 0);
2079 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2080 				return (-1);
2081 			}
2082 			break;
2083 		case OPT_PARAM_AUTH:			/* deprecated */
2084 		default:
2085 			/*
2086 			 * unsupported type
2087 			 * the RFCs tell us to leave the data section empty
2088 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2089 			 * How the peer should know _which_ optional parameter
2090 			 * we don't support is beyond me.
2091 			 */
2092 			log_peer_warnx(&peer->conf,
2093 			    "received OPEN message with unsupported optional "
2094 			    "parameter: type %u", op_type);
2095 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2096 				NULL, 0);
2097 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2098 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
2099 			peer->IdleHoldTime /= 2;
2100 			return (-1);
2101 		}
2102 	}
2103 
2104 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2105 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2106 		peer->conf.remote_as = as;
2107 		peer->conf.ebgp = (peer->conf.remote_as != conf->as);
2108 		if (!peer->conf.ebgp)
2109 			/* force enforce_as off for iBGP sessions */
2110 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2111 	}
2112 
2113 	if (peer->conf.remote_as != as) {
2114 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2115 		    log_as(as));
2116 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2117 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2118 		return (-1);
2119 	}
2120 
2121 	if (capa_neg_calc(peer) == -1) {
2122 		log_peer_warnx(&peer->conf,
2123 		    "capability negotiation calculation failed");
2124 		session_notification(peer, ERR_OPEN, 0, NULL, 0);
2125 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2126 		return (-1);
2127 	}
2128 
2129 	return (0);
2130 }
2131 
2132 int
2133 parse_update(struct peer *peer)
2134 {
2135 	u_char		*p;
2136 	u_int16_t	 datalen;
2137 
2138 	/*
2139 	 * we pass the message verbatim to the rde.
2140 	 * in case of errors the whole session is reset with a
2141 	 * notification anyway, we only need to know the peer
2142 	 */
2143 	p = peer->rbuf->rptr;
2144 	p += MSGSIZE_HEADER_MARKER;
2145 	memcpy(&datalen, p, sizeof(datalen));
2146 	datalen = ntohs(datalen);
2147 
2148 	p = peer->rbuf->rptr;
2149 	p += MSGSIZE_HEADER;	/* header is already checked */
2150 	datalen -= MSGSIZE_HEADER;
2151 
2152 	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
2153 	    datalen) == -1)
2154 		return (-1);
2155 
2156 	return (0);
2157 }
2158 
2159 int
2160 parse_refresh(struct peer *peer)
2161 {
2162 	u_char		*p;
2163 	u_int16_t	 afi;
2164 	u_int8_t	 aid, safi;
2165 
2166 	p = peer->rbuf->rptr;
2167 	p += MSGSIZE_HEADER;	/* header is already checked */
2168 
2169 	/*
2170 	 * We could check if we actually announced the capability but
2171 	 * as long as the message is correctly encoded we don't care.
2172 	 */
2173 
2174 	/* afi, 2 byte */
2175 	memcpy(&afi, p, sizeof(afi));
2176 	afi = ntohs(afi);
2177 	p += 2;
2178 	/* reserved, 1 byte */
2179 	p += 1;
2180 	/* safi, 1 byte */
2181 	memcpy(&safi, p, sizeof(safi));
2182 
2183 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2184 	if (afi2aid(afi, safi, &aid) == -1) {
2185 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2186 		    "invalid afi/safi pair");
2187 		return (0);
2188 	}
2189 
2190 	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid,
2191 	    sizeof(aid)) == -1)
2192 		return (-1);
2193 
2194 	return (0);
2195 }
2196 
2197 int
2198 parse_notification(struct peer *peer)
2199 {
2200 	u_char		*p;
2201 	u_int16_t	 datalen;
2202 	u_int8_t	 errcode;
2203 	u_int8_t	 subcode;
2204 	u_int8_t	 capa_code;
2205 	u_int8_t	 capa_len;
2206 	u_int8_t	 i;
2207 
2208 	/* just log */
2209 	p = peer->rbuf->rptr;
2210 	p += MSGSIZE_HEADER_MARKER;
2211 	memcpy(&datalen, p, sizeof(datalen));
2212 	datalen = ntohs(datalen);
2213 
2214 	p = peer->rbuf->rptr;
2215 	p += MSGSIZE_HEADER;	/* header is already checked */
2216 	datalen -= MSGSIZE_HEADER;
2217 
2218 	memcpy(&errcode, p, sizeof(errcode));
2219 	p += sizeof(errcode);
2220 	datalen -= sizeof(errcode);
2221 
2222 	memcpy(&subcode, p, sizeof(subcode));
2223 	p += sizeof(subcode);
2224 	datalen -= sizeof(subcode);
2225 
2226 	log_notification(peer, errcode, subcode, p, datalen, "received");
2227 	peer->errcnt++;
2228 
2229 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2230 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2231 			log_peer_warnx(&peer->conf, "received \"unsupported "
2232 			    "capability\" notification without data part, "
2233 			    "disabling capability announcements altogether");
2234 			session_capa_ann_none(peer);
2235 		}
2236 
2237 		while (datalen > 0) {
2238 			if (datalen < 2) {
2239 				log_peer_warnx(&peer->conf,
2240 				    "parse_notification: "
2241 				    "expect len >= 2, len is %u", datalen);
2242 				return (-1);
2243 			}
2244 			memcpy(&capa_code, p, sizeof(capa_code));
2245 			p += sizeof(capa_code);
2246 			datalen -= sizeof(capa_code);
2247 			memcpy(&capa_len, p, sizeof(capa_len));
2248 			p += sizeof(capa_len);
2249 			datalen -= sizeof(capa_len);
2250 			if (datalen < capa_len) {
2251 				log_peer_warnx(&peer->conf,
2252 				    "parse_notification: capa_len %u exceeds "
2253 				    "remaining msg length %u", capa_len,
2254 				    datalen);
2255 				return (-1);
2256 			}
2257 			p += capa_len;
2258 			datalen -= capa_len;
2259 			switch (capa_code) {
2260 			case CAPA_MP:
2261 				for (i = 0; i < AID_MAX; i++)
2262 					peer->capa.ann.mp[i] = 0;
2263 				log_peer_warnx(&peer->conf,
2264 				    "disabling multiprotocol capability");
2265 				break;
2266 			case CAPA_REFRESH:
2267 				peer->capa.ann.refresh = 0;
2268 				log_peer_warnx(&peer->conf,
2269 				    "disabling route refresh capability");
2270 				break;
2271 			case CAPA_RESTART:
2272 				peer->capa.ann.grestart.restart = 0;
2273 				log_peer_warnx(&peer->conf,
2274 				    "disabling restart capability");
2275 				break;
2276 			case CAPA_AS4BYTE:
2277 				peer->capa.ann.as4byte = 0;
2278 				log_peer_warnx(&peer->conf,
2279 				    "disabling 4-byte AS num capability");
2280 				break;
2281 			default:	/* should not happen... */
2282 				log_peer_warnx(&peer->conf, "received "
2283 				    "\"unsupported capability\" notification "
2284 				    "for unknown capability %u, disabling "
2285 				    "capability announcements altogether",
2286 				    capa_code);
2287 				session_capa_ann_none(peer);
2288 				break;
2289 			}
2290 		}
2291 
2292 		return (1);
2293 	}
2294 
2295 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2296 		session_capa_ann_none(peer);
2297 		return (1);
2298 	}
2299 
2300 	return (0);
2301 }
2302 
2303 int
2304 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2305 {
2306 	u_char		*capa_val;
2307 	u_int32_t	 remote_as;
2308 	u_int16_t	 len;
2309 	u_int16_t	 afi;
2310 	u_int16_t	 gr_header;
2311 	u_int8_t	 safi;
2312 	u_int8_t	 aid;
2313 	u_int8_t	 gr_flags;
2314 	u_int8_t	 capa_code;
2315 	u_int8_t	 capa_len;
2316 	u_int8_t	 i;
2317 
2318 	len = dlen;
2319 	while (len > 0) {
2320 		if (len < 2) {
2321 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2322 			    "length: %u, too short", len);
2323 			return (-1);
2324 		}
2325 		memcpy(&capa_code, d, sizeof(capa_code));
2326 		d += sizeof(capa_code);
2327 		len -= sizeof(capa_code);
2328 		memcpy(&capa_len, d, sizeof(capa_len));
2329 		d += sizeof(capa_len);
2330 		len -= sizeof(capa_len);
2331 		if (capa_len > 0) {
2332 			if (len < capa_len) {
2333 				log_peer_warnx(&peer->conf,
2334 				    "Bad capabilities attr length: "
2335 				    "len %u smaller than capa_len %u",
2336 				    len, capa_len);
2337 				return (-1);
2338 			}
2339 			capa_val = d;
2340 			d += capa_len;
2341 			len -= capa_len;
2342 		} else
2343 			capa_val = NULL;
2344 
2345 		switch (capa_code) {
2346 		case CAPA_MP:			/* RFC 4760 */
2347 			if (capa_len != 4) {
2348 				log_peer_warnx(&peer->conf,
2349 				    "Bad multi protocol capability length: "
2350 				    "%u", capa_len);
2351 				break;
2352 			}
2353 			memcpy(&afi, capa_val, sizeof(afi));
2354 			afi = ntohs(afi);
2355 			memcpy(&safi, capa_val + 3, sizeof(safi));
2356 			if (afi2aid(afi, safi, &aid) == -1) {
2357 				log_peer_warnx(&peer->conf,
2358 				    "Received multi protocol capability: "
2359 				    " unknown AFI %u, safi %u pair",
2360 				    afi, safi);
2361 				break;
2362 			}
2363 			peer->capa.peer.mp[aid] = 1;
2364 			break;
2365 		case CAPA_REFRESH:
2366 			peer->capa.peer.refresh = 1;
2367 			break;
2368 		case CAPA_RESTART:
2369 			if (capa_len == 2) {
2370 				/* peer only supports EoR marker */
2371 				peer->capa.peer.grestart.restart = 1;
2372 				peer->capa.peer.grestart.timeout = 0;
2373 				break;
2374 			} else if (capa_len % 4 != 2) {
2375 				log_peer_warnx(&peer->conf,
2376 				    "Bad graceful restart capability length: "
2377 				    "%u", capa_len);
2378 				peer->capa.peer.grestart.restart = 0;
2379 				peer->capa.peer.grestart.timeout = 0;
2380 				break;
2381 			}
2382 
2383 			memcpy(&gr_header, capa_val, sizeof(gr_header));
2384 			gr_header = ntohs(gr_header);
2385 			peer->capa.peer.grestart.timeout =
2386 			    gr_header & CAPA_GR_TIMEMASK;
2387 			if (peer->capa.peer.grestart.timeout == 0) {
2388 				log_peer_warnx(&peer->conf, "Received "
2389 				    "graceful restart timeout is zero");
2390 				peer->capa.peer.grestart.restart = 0;
2391 				break;
2392 			}
2393 
2394 			for (i = 2; i <= capa_len - 4; i += 4) {
2395 				memcpy(&afi, capa_val + i, sizeof(afi));
2396 				afi = ntohs(afi);
2397 				memcpy(&safi, capa_val + i + 2, sizeof(safi));
2398 				if (afi2aid(afi, safi, &aid) == -1) {
2399 					log_peer_warnx(&peer->conf,
2400 					    "Received graceful restart capa: "
2401 					    " unknown AFI %u, safi %u pair",
2402 					    afi, safi);
2403 					continue;
2404 				}
2405 				memcpy(&gr_flags, capa_val + i + 3,
2406 				    sizeof(gr_flags));
2407 				peer->capa.peer.grestart.flags[aid] |=
2408 				    CAPA_GR_PRESENT;
2409 				if (gr_flags & CAPA_GR_F_FLAG)
2410 					peer->capa.peer.grestart.flags[aid] |=
2411 					    CAPA_GR_FORWARD;
2412 				if (gr_header & CAPA_GR_R_FLAG)
2413 					peer->capa.peer.grestart.flags[aid] |=
2414 					    CAPA_GR_RESTART;
2415 				peer->capa.peer.grestart.restart = 2;
2416 			}
2417 			break;
2418 		case CAPA_AS4BYTE:
2419 			if (capa_len != 4) {
2420 				log_peer_warnx(&peer->conf,
2421 				    "Bad AS4BYTE capability length: "
2422 				    "%u", capa_len);
2423 				peer->capa.peer.as4byte = 0;
2424 				break;
2425 			}
2426 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2427 			*as = ntohl(remote_as);
2428 			peer->capa.peer.as4byte = 1;
2429 			break;
2430 		default:
2431 			break;
2432 		}
2433 	}
2434 
2435 	return (0);
2436 }
2437 
2438 int
2439 capa_neg_calc(struct peer *p)
2440 {
2441 	u_int8_t	i, hasmp = 0;
2442 
2443 	/* refresh: does not realy matter here, use peer setting */
2444 	p->capa.neg.refresh = p->capa.peer.refresh;
2445 
2446 	/* as4byte: both side must announce capability */
2447 	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2448 		p->capa.neg.as4byte = 1;
2449 	else
2450 		p->capa.neg.as4byte = 0;
2451 
2452 	/* MP: both side must announce capability */
2453 	for (i = 0; i < AID_MAX; i++) {
2454 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2455 			p->capa.neg.mp[i] = 1;
2456 			hasmp = 1;
2457 		} else
2458 			p->capa.neg.mp[i] = 0;
2459 	}
2460 	/* if no MP capability present default to IPv4 unicast mode */
2461 	if (!hasmp)
2462 		p->capa.neg.mp[AID_INET] = 1;
2463 
2464 	/*
2465 	 * graceful restart: only the peer capabilities are of interest here.
2466 	 * It is necessary to compare the new values with the previous ones
2467 	 * and act acordingly. AFI/SAFI that are not part in the MP capability
2468 	 * are treated as not being present.
2469 	 */
2470 
2471 	for (i = 0; i < AID_MAX; i++) {
2472 		int8_t	negflags;
2473 
2474 		/* disable GR if the AFI/SAFI is not present */
2475 		if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2476 		    p->capa.neg.mp[i] == 0)
2477 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2478 		/* look at current GR state and decide what to do */
2479 		negflags = p->capa.neg.grestart.flags[i];
2480 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2481 		if (negflags & CAPA_GR_RESTARTING) {
2482 			if (!(p->capa.peer.grestart.flags[i] &
2483 			    CAPA_GR_FORWARD)) {
2484 				if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
2485 				    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
2486 					return (-1);
2487 				log_peer_warnx(&p->conf, "graceful restart of "
2488 				    "%s, not restarted, flushing", aid2str(i));
2489 			} else
2490 				p->capa.neg.grestart.flags[i] |=
2491 				    CAPA_GR_RESTARTING;
2492 		}
2493 	}
2494 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2495 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2496 
2497 	return (0);
2498 }
2499 
2500 void
2501 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2502 {
2503 	struct imsg		 imsg;
2504 	struct mrt		 xmrt;
2505 	struct mrt		*mrt;
2506 	struct imsgbuf		*i;
2507 	struct peer_config	*pconf;
2508 	struct peer		*p, *next;
2509 	struct listen_addr	*la, *nla;
2510 	struct kif		*kif;
2511 	u_char			*data;
2512 	enum reconf_action	 reconf;
2513 	int			 n, fd, depend_ok, restricted;
2514 	u_int8_t		 aid, errcode, subcode;
2515 
2516 	while (ibuf) {
2517 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2518 			fatal("session_dispatch_imsg: imsg_get error");
2519 
2520 		if (n == 0)
2521 			break;
2522 
2523 		switch (imsg.hdr.type) {
2524 		case IMSG_SOCKET_CONN:
2525 		case IMSG_SOCKET_CONN_CTL:
2526 			if (idx != PFD_PIPE_MAIN)
2527 				fatalx("reconf request not from parent");
2528 			if ((fd = imsg.fd) == -1) {
2529 				log_warnx("expected to receive imsg fd to "
2530 				    "RDE but didn't receive any");
2531 				break;
2532 			}
2533 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2534 				fatal(NULL);
2535 			imsg_init(i, fd);
2536 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
2537 				if (ibuf_rde) {
2538 					log_warnx("Unexpected imsg connection "
2539 					    "to RDE received");
2540 					msgbuf_clear(&ibuf_rde->w);
2541 					free(ibuf_rde);
2542 				}
2543 				ibuf_rde = i;
2544 			} else {
2545 				if (ibuf_rde_ctl) {
2546 					log_warnx("Unexpected imsg ctl "
2547 					    "connection to RDE received");
2548 					msgbuf_clear(&ibuf_rde_ctl->w);
2549 					free(ibuf_rde_ctl);
2550 				}
2551 				ibuf_rde_ctl = i;
2552 			}
2553 			break;
2554 		case IMSG_RECONF_CONF:
2555 			if (idx != PFD_PIPE_MAIN)
2556 				fatalx("reconf request not from parent");
2557 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2558 			    NULL)
2559 				fatal(NULL);
2560 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2561 			if ((nconf->listen_addrs = calloc(1,
2562 			    sizeof(struct listen_addrs))) == NULL)
2563 				fatal(NULL);
2564 			TAILQ_INIT(nconf->listen_addrs);
2565 			npeers = NULL;
2566 			init_conf(nconf);
2567 			pending_reconf = 1;
2568 			break;
2569 		case IMSG_RECONF_PEER:
2570 			if (idx != PFD_PIPE_MAIN)
2571 				fatalx("reconf request not from parent");
2572 			pconf = imsg.data;
2573 			p = getpeerbyaddr(&pconf->remote_addr);
2574 			if (p == NULL) {
2575 				if ((p = calloc(1, sizeof(struct peer))) ==
2576 				    NULL)
2577 					fatal("new_peer");
2578 				p->state = p->prev_state = STATE_NONE;
2579 				p->next = npeers;
2580 				npeers = p;
2581 				reconf = RECONF_REINIT;
2582 			} else
2583 				reconf = RECONF_KEEP;
2584 
2585 			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2586 			p->conf.reconf_action = reconf;
2587 
2588 			/* sync the RDE in case we keep the peer */
2589 			if (reconf == RECONF_KEEP) {
2590 				if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD,
2591 				    p->conf.id, 0, -1, &p->conf,
2592 				    sizeof(struct peer_config)) == -1)
2593 					fatalx("imsg_compose error");
2594 				if (p->conf.template) {
2595 					/* apply the conf to all clones */
2596 					struct peer *np;
2597 					for (np = peers; np; np = np->next) {
2598 						if (np->template != p)
2599 							continue;
2600 						session_template_clone(np,
2601 						    NULL, np->conf.id,
2602 						    np->conf.remote_as);
2603 						if (imsg_compose(ibuf_rde,
2604 						    IMSG_SESSION_ADD,
2605 						    np->conf.id, 0, -1,
2606 						    &np->conf,
2607 						    sizeof(struct peer_config))
2608 						    == -1)
2609 							fatalx("imsg_compose error");
2610 					}
2611 				}
2612 			}
2613 			break;
2614 		case IMSG_RECONF_LISTENER:
2615 			if (idx != PFD_PIPE_MAIN)
2616 				fatalx("reconf request not from parent");
2617 			if (nconf == NULL)
2618 				fatalx("IMSG_RECONF_LISTENER but no config");
2619 			nla = imsg.data;
2620 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2621 				if (!la_cmp(la, nla))
2622 					break;
2623 
2624 			if (la == NULL) {
2625 				if (nla->reconf != RECONF_REINIT)
2626 					fatalx("king bula sez: "
2627 					    "expected REINIT");
2628 
2629 				if ((nla->fd = imsg.fd) == -1)
2630 					log_warnx("expected to receive fd for "
2631 					    "%s but didn't receive any",
2632 					    log_sockaddr((struct sockaddr *)
2633 					    &nla->sa));
2634 
2635 				la = calloc(1, sizeof(struct listen_addr));
2636 				if (la == NULL)
2637 					fatal(NULL);
2638 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2639 				la->flags = nla->flags;
2640 				la->fd = nla->fd;
2641 				la->reconf = RECONF_REINIT;
2642 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2643 				    entry);
2644 			} else {
2645 				if (nla->reconf != RECONF_KEEP)
2646 					fatalx("king bula sez: expected KEEP");
2647 				la->reconf = RECONF_KEEP;
2648 			}
2649 
2650 			break;
2651 		case IMSG_RECONF_CTRL:
2652 			if (idx != PFD_PIPE_MAIN)
2653 				fatalx("reconf request not from parent");
2654 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2655 			    sizeof(restricted))
2656 				fatalx("IFINFO imsg with wrong len");
2657 			memcpy(&restricted, imsg.data, sizeof(restricted));
2658 			if (imsg.fd == -1) {
2659 				log_warnx("expected to receive fd for control "
2660 				    "socket but didn't receive any");
2661 				break;
2662 			}
2663 			if (restricted) {
2664 				control_shutdown(rcsock);
2665 				rcsock = imsg.fd;
2666 			} else {
2667 				control_shutdown(csock);
2668 				csock = imsg.fd;
2669 			}
2670 			break;
2671 		case IMSG_RECONF_DONE:
2672 			if (idx != PFD_PIPE_MAIN)
2673 				fatalx("reconf request not from parent");
2674 			if (nconf == NULL)
2675 				fatalx("got IMSG_RECONF_DONE but no config");
2676 			conf->flags = nconf->flags;
2677 			conf->log = nconf->log;
2678 			conf->bgpid = nconf->bgpid;
2679 			conf->clusterid = nconf->clusterid;
2680 			conf->as = nconf->as;
2681 			conf->short_as = nconf->short_as;
2682 			conf->holdtime = nconf->holdtime;
2683 			conf->min_holdtime = nconf->min_holdtime;
2684 			conf->connectretry = nconf->connectretry;
2685 
2686 			/* add new peers */
2687 			for (p = npeers; p != NULL; p = next) {
2688 				next = p->next;
2689 				p->next = peers;
2690 				peers = p;
2691 			}
2692 			/* find ones that need attention */
2693 			for (p = peers; p != NULL; p = p->next) {
2694 				/* needs to be deleted? */
2695 				if (p->conf.reconf_action == RECONF_NONE &&
2696 				    !p->template)
2697 					p->conf.reconf_action = RECONF_DELETE;
2698 				/* had demotion, is demoted, demote removed? */
2699 				if (p->demoted && !p->conf.demote_group[0])
2700 						session_demote(p, -1);
2701 			}
2702 
2703 			/* delete old listeners */
2704 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2705 			    la = nla) {
2706 				nla = TAILQ_NEXT(la, entry);
2707 				if (la->reconf == RECONF_NONE) {
2708 					log_info("not listening on %s any more",
2709 					    log_sockaddr(
2710 					    (struct sockaddr *)&la->sa));
2711 					TAILQ_REMOVE(conf->listen_addrs, la,
2712 					    entry);
2713 					close(la->fd);
2714 					free(la);
2715 				}
2716 			}
2717 
2718 			/* add new listeners */
2719 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2720 			    NULL) {
2721 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2722 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2723 				    entry);
2724 			}
2725 
2726 			setup_listeners(listener_cnt);
2727 			free(nconf->listen_addrs);
2728 			free(nconf);
2729 			nconf = NULL;
2730 			pending_reconf = 0;
2731 			log_info("SE reconfigured");
2732 			imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
2733 			    -1, NULL, 0);
2734 			break;
2735 		case IMSG_IFINFO:
2736 			if (idx != PFD_PIPE_MAIN)
2737 				fatalx("IFINFO message not from parent");
2738 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2739 			    sizeof(struct kif))
2740 				fatalx("IFINFO imsg with wrong len");
2741 			kif = imsg.data;
2742 			depend_ok = (kif->flags & IFF_UP) &&
2743 			    LINK_STATE_IS_UP(kif->link_state);
2744 
2745 			for (p = peers; p != NULL; p = p->next)
2746 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2747 					if (depend_ok && !p->depend_ok) {
2748 						p->depend_ok = depend_ok;
2749 						bgp_fsm(p, EVNT_START);
2750 					} else if (!depend_ok && p->depend_ok) {
2751 						p->depend_ok = depend_ok;
2752 						session_stop(p,
2753 						    ERR_CEASE_OTHER_CHANGE);
2754 					}
2755 				}
2756 			break;
2757 		case IMSG_MRT_OPEN:
2758 		case IMSG_MRT_REOPEN:
2759 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2760 			    sizeof(struct mrt)) {
2761 				log_warnx("wrong imsg len");
2762 				break;
2763 			}
2764 
2765 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2766 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2767 				log_warnx("expected to receive fd for mrt dump "
2768 				    "but didn't receive any");
2769 
2770 			mrt = mrt_get(&mrthead, &xmrt);
2771 			if (mrt == NULL) {
2772 				/* new dump */
2773 				mrt = calloc(1, sizeof(struct mrt));
2774 				if (mrt == NULL)
2775 					fatal("session_dispatch_imsg");
2776 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2777 				TAILQ_INIT(&mrt->wbuf.bufs);
2778 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2779 			} else {
2780 				/* old dump reopened */
2781 				close(mrt->wbuf.fd);
2782 				mrt->wbuf.fd = xmrt.wbuf.fd;
2783 			}
2784 			break;
2785 		case IMSG_MRT_CLOSE:
2786 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2787 			    sizeof(struct mrt)) {
2788 				log_warnx("wrong imsg len");
2789 				break;
2790 			}
2791 
2792 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2793 			mrt = mrt_get(&mrthead, &xmrt);
2794 			if (mrt != NULL)
2795 				mrt_done(mrt);
2796 			break;
2797 		case IMSG_CTL_KROUTE:
2798 		case IMSG_CTL_KROUTE_ADDR:
2799 		case IMSG_CTL_SHOW_NEXTHOP:
2800 		case IMSG_CTL_SHOW_INTERFACE:
2801 		case IMSG_CTL_SHOW_FIB_TABLES:
2802 			if (idx != PFD_PIPE_MAIN)
2803 				fatalx("ctl kroute request not from parent");
2804 			control_imsg_relay(&imsg);
2805 			break;
2806 		case IMSG_CTL_SHOW_RIB:
2807 		case IMSG_CTL_SHOW_RIB_PREFIX:
2808 		case IMSG_CTL_SHOW_RIB_ATTR:
2809 		case IMSG_CTL_SHOW_RIB_MEM:
2810 		case IMSG_CTL_SHOW_NETWORK:
2811 		case IMSG_CTL_SHOW_NEIGHBOR:
2812 			if (idx != PFD_PIPE_ROUTE_CTL)
2813 				fatalx("ctl rib request not from RDE");
2814 			control_imsg_relay(&imsg);
2815 			break;
2816 		case IMSG_CTL_END:
2817 		case IMSG_CTL_RESULT:
2818 			control_imsg_relay(&imsg);
2819 			break;
2820 		case IMSG_UPDATE:
2821 			if (idx != PFD_PIPE_ROUTE)
2822 				fatalx("update request not from RDE");
2823 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2824 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2825 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2826 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2827 				log_warnx("RDE sent invalid update");
2828 			else
2829 				session_update(imsg.hdr.peerid, imsg.data,
2830 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2831 			break;
2832 		case IMSG_UPDATE_ERR:
2833 			if (idx != PFD_PIPE_ROUTE)
2834 				fatalx("update request not from RDE");
2835 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2836 				log_warnx("RDE sent invalid notification");
2837 				break;
2838 			}
2839 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2840 				log_warnx("no such peer: id=%u",
2841 				    imsg.hdr.peerid);
2842 				break;
2843 			}
2844 			data = imsg.data;
2845 			errcode = *data++;
2846 			subcode = *data++;
2847 
2848 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2849 				data = NULL;
2850 
2851 			session_notification(p, errcode, subcode,
2852 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2853 			switch (errcode) {
2854 			case ERR_CEASE:
2855 				switch (subcode) {
2856 				case ERR_CEASE_MAX_PREFIX:
2857 					bgp_fsm(p, EVNT_STOP);
2858 					if (p->conf.max_prefix_restart)
2859 						timer_set(p, Timer_IdleHold, 60 *
2860 						    p->conf.max_prefix_restart);
2861 					break;
2862 				default:
2863 					bgp_fsm(p, EVNT_CON_FATAL);
2864 					break;
2865 				}
2866 				break;
2867 			default:
2868 				bgp_fsm(p, EVNT_CON_FATAL);
2869 				break;
2870 			}
2871 			break;
2872 		case IMSG_SESSION_RESTARTED:
2873 			if (idx != PFD_PIPE_ROUTE)
2874 				fatalx("update request not from RDE");
2875 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
2876 				log_warnx("RDE sent invalid restart msg");
2877 				break;
2878 			}
2879 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2880 				log_warnx("no such peer: id=%u",
2881 				    imsg.hdr.peerid);
2882 				break;
2883 			}
2884 			memcpy(&aid, imsg.data, sizeof(aid));
2885 			if (aid >= AID_MAX)
2886 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
2887 			if (p->capa.neg.grestart.flags[aid] &
2888 			    CAPA_GR_RESTARTING) {
2889 				log_peer_warnx(&p->conf,
2890 				    "graceful restart of %s finished",
2891 				    aid2str(aid));
2892 				p->capa.neg.grestart.flags[aid] &=
2893 				    ~CAPA_GR_RESTARTING;
2894 				timer_stop(p, Timer_RestartTimeout);
2895 
2896 				/* signal back to RDE to cleanup stale routes */
2897 				if (imsg_compose(ibuf_rde,
2898 				    IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0,
2899 				    -1, &aid, sizeof(aid)) == -1)
2900 					fatal("imsg_compose: "
2901 					    "IMSG_SESSION_RESTARTED");
2902 			}
2903 			break;
2904 		case IMSG_SESSION_DOWN:
2905 			if (idx != PFD_PIPE_ROUTE)
2906 				fatalx("update request not from RDE");
2907 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2908 				log_warnx("no such peer: id=%u",
2909 				    imsg.hdr.peerid);
2910 				break;
2911 			}
2912 			session_stop(p, ERR_CEASE_ADMIN_DOWN);
2913 			break;
2914 		default:
2915 			break;
2916 		}
2917 		imsg_free(&imsg);
2918 	}
2919 }
2920 
2921 int
2922 la_cmp(struct listen_addr *a, struct listen_addr *b)
2923 {
2924 	struct sockaddr_in	*in_a, *in_b;
2925 	struct sockaddr_in6	*in6_a, *in6_b;
2926 
2927 	if (a->sa.ss_family != b->sa.ss_family)
2928 		return (1);
2929 
2930 	switch (a->sa.ss_family) {
2931 	case AF_INET:
2932 		in_a = (struct sockaddr_in *)&a->sa;
2933 		in_b = (struct sockaddr_in *)&b->sa;
2934 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2935 			return (1);
2936 		if (in_a->sin_port != in_b->sin_port)
2937 			return (1);
2938 		break;
2939 	case AF_INET6:
2940 		in6_a = (struct sockaddr_in6 *)&a->sa;
2941 		in6_b = (struct sockaddr_in6 *)&b->sa;
2942 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2943 		    sizeof(struct in6_addr)))
2944 			return (1);
2945 		if (in6_a->sin6_port != in6_b->sin6_port)
2946 			return (1);
2947 		break;
2948 	default:
2949 		fatal("king bula sez: unknown address family");
2950 		/* NOTREACHED */
2951 	}
2952 
2953 	return (0);
2954 }
2955 
2956 struct peer *
2957 getpeerbyaddr(struct bgpd_addr *addr)
2958 {
2959 	struct peer *p;
2960 
2961 	/* we might want a more effective way to find peers by IP */
2962 	for (p = peers; p != NULL &&
2963 	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
2964 	    p = p->next)
2965 		;	/* nothing */
2966 
2967 	return (p);
2968 }
2969 
2970 struct peer *
2971 getpeerbydesc(const char *descr)
2972 {
2973 	struct peer	*p, *res = NULL;
2974 	int		 match = 0;
2975 
2976 	for (p = peers; p != NULL; p = p->next)
2977 		if (!strcmp(p->conf.descr, descr)) {
2978 			res = p;
2979 			match++;
2980 		}
2981 
2982 	if (match > 1)
2983 		log_info("neighbor description \"%s\" not unique, request "
2984 		    "aborted", descr);
2985 
2986 	if (match == 1)
2987 		return (res);
2988 	else
2989 		return (NULL);
2990 }
2991 
2992 struct peer *
2993 getpeerbyip(struct sockaddr *ip)
2994 {
2995 	struct bgpd_addr addr;
2996 	struct peer	*p, *newpeer, *loose = NULL;
2997 	u_int32_t	 id;
2998 
2999 	sa2addr(ip, &addr);
3000 
3001 	/* we might want a more effective way to find peers by IP */
3002 	for (p = peers; p != NULL; p = p->next)
3003 		if (!p->conf.template &&
3004 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3005 			return (p);
3006 
3007 	/* try template matching */
3008 	for (p = peers; p != NULL; p = p->next)
3009 		if (p->conf.template &&
3010 		    p->conf.remote_addr.aid == addr.aid &&
3011 		    session_match_mask(p, &addr))
3012 			if (loose == NULL || loose->conf.remote_masklen <
3013 			    p->conf.remote_masklen)
3014 				loose = p;
3015 
3016 	if (loose != NULL) {
3017 		/* clone */
3018 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3019 			fatal(NULL);
3020 		memcpy(newpeer, loose, sizeof(struct peer));
3021 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
3022 			for (p = peers; p != NULL && p->conf.id != id;
3023 			    p = p->next)
3024 				;	/* nothing */
3025 			if (p == NULL) {	/* we found a free id */
3026 				break;
3027 			}
3028 		}
3029 		newpeer->template = loose;
3030 		session_template_clone(newpeer, ip, id, 0);
3031 		newpeer->state = newpeer->prev_state = STATE_NONE;
3032 		newpeer->conf.reconf_action = RECONF_KEEP;
3033 		newpeer->rbuf = NULL;
3034 		init_peer(newpeer);
3035 		bgp_fsm(newpeer, EVNT_START);
3036 		newpeer->next = peers;
3037 		peers = newpeer;
3038 		return (newpeer);
3039 	}
3040 
3041 	return (NULL);
3042 }
3043 
3044 void
3045 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id,
3046     u_int32_t as)
3047 {
3048 	struct bgpd_addr	remote_addr;
3049 
3050 	if (ip)
3051 		sa2addr(ip, &remote_addr);
3052 	else
3053 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3054 
3055 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3056 
3057 	p->conf.id = id;
3058 
3059 	if (as) {
3060 		p->conf.remote_as = as;
3061 		p->conf.ebgp = (p->conf.remote_as != conf->as);
3062 		if (!p->conf.ebgp)
3063 			/* force enforce_as off for iBGP sessions */
3064 			p->conf.enforce_as = ENFORCE_AS_OFF;
3065 	}
3066 
3067 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3068 	switch (p->conf.remote_addr.aid) {
3069 	case AID_INET:
3070 		p->conf.remote_masklen = 32;
3071 		break;
3072 	case AID_INET6:
3073 		p->conf.remote_masklen = 128;
3074 		break;
3075 	}
3076 	p->conf.template = 0;
3077 }
3078 
3079 int
3080 session_match_mask(struct peer *p, struct bgpd_addr *a)
3081 {
3082 	in_addr_t	 v4mask;
3083 	struct in6_addr	 masked;
3084 
3085 	switch (p->conf.remote_addr.aid) {
3086 	case AID_INET:
3087 		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
3088 		if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask))
3089 			return (1);
3090 		return (0);
3091 	case AID_INET6:
3092 		inet6applymask(&masked, &a->v6, p->conf.remote_masklen);
3093 
3094 		if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked)))
3095 			return (1);
3096 		return (0);
3097 	}
3098 	return (0);
3099 }
3100 
3101 struct peer *
3102 getpeerbyid(u_int32_t peerid)
3103 {
3104 	struct peer *p;
3105 
3106 	/* we might want a more effective way to find peers by IP */
3107 	for (p = peers; p != NULL &&
3108 	    p->conf.id != peerid; p = p->next)
3109 		;	/* nothing */
3110 
3111 	return (p);
3112 }
3113 
3114 void
3115 session_down(struct peer *peer)
3116 {
3117 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
3118 	peer->stats.last_updown = time(NULL);
3119 	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
3120 	    NULL, 0) == -1)
3121 		fatalx("imsg_compose error");
3122 }
3123 
3124 void
3125 session_up(struct peer *p)
3126 {
3127 	struct session_up	 sup;
3128 
3129 	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
3130 	    &p->conf, sizeof(p->conf)) == -1)
3131 		fatalx("imsg_compose error");
3132 
3133 	sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr);
3134 	sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr);
3135 
3136 	sup.remote_bgpid = p->remote_bgpid;
3137 	sup.short_as = p->short_as;
3138 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3139 	p->stats.last_updown = time(NULL);
3140 	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
3141 	    &sup, sizeof(sup)) == -1)
3142 		fatalx("imsg_compose error");
3143 }
3144 
3145 int
3146 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data,
3147     u_int16_t datalen)
3148 {
3149 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
3150 }
3151 
3152 int
3153 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen)
3154 {
3155 	/*
3156 	 * Use control socket to talk to RDE to bypass the queue of the
3157 	 * regular imsg socket.
3158 	 */
3159 	return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen));
3160 }
3161 
3162 void
3163 session_demote(struct peer *p, int level)
3164 {
3165 	struct demote_msg	msg;
3166 
3167 	strlcpy(msg.demote_group, p->conf.demote_group,
3168 	    sizeof(msg.demote_group));
3169 	msg.level = level;
3170 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3171 	    &msg, sizeof(msg)) == -1)
3172 		fatalx("imsg_compose error");
3173 
3174 	p->demoted += level;
3175 }
3176 
3177 void
3178 session_stop(struct peer *peer, u_int8_t subcode)
3179 {
3180 	switch (peer->state) {
3181 	case STATE_OPENSENT:
3182 	case STATE_OPENCONFIRM:
3183 	case STATE_ESTABLISHED:
3184 		session_notification(peer, ERR_CEASE, subcode, NULL, 0);
3185 		break;
3186 	default:
3187 		/* session not open, no need to send notification */
3188 		break;
3189 	}
3190 	bgp_fsm(peer, EVNT_STOP);
3191 }
3192