xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: session.c,v 1.379 2019/04/25 12:12:16 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <poll.h>
37 #include <pwd.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <syslog.h>
43 #include <unistd.h>
44 
45 #include "bgpd.h"
46 #include "mrt.h"
47 #include "session.h"
48 #include "log.h"
49 
50 #define PFD_PIPE_MAIN		0
51 #define PFD_PIPE_ROUTE		1
52 #define PFD_PIPE_ROUTE_CTL	2
53 #define PFD_SOCK_CTL		3
54 #define PFD_SOCK_RCTL		4
55 #define PFD_SOCK_PFKEY		5
56 #define PFD_LISTENERS_START	6
57 
58 void	session_sighdlr(int);
59 int	setup_listeners(u_int *);
60 void	init_peer(struct peer *);
61 void	start_timer_holdtime(struct peer *);
62 void	start_timer_keepalive(struct peer *);
63 void	session_close_connection(struct peer *);
64 void	change_state(struct peer *, enum session_state, enum session_events);
65 int	session_setup_socket(struct peer *);
66 void	session_accept(int);
67 int	session_connect(struct peer *);
68 void	session_tcp_established(struct peer *);
69 void	session_capa_ann_none(struct peer *);
70 int	session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
71 int	session_capa_add_mp(struct ibuf *, u_int8_t);
72 int	session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t);
73 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
74 int	session_sendmsg(struct bgp_msg *, struct peer *);
75 void	session_open(struct peer *);
76 void	session_keepalive(struct peer *);
77 void	session_update(u_int32_t, void *, size_t);
78 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
79 	    ssize_t);
80 void	session_rrefresh(struct peer *, u_int8_t);
81 int	session_graceful_restart(struct peer *);
82 int	session_graceful_stop(struct peer *);
83 int	session_dispatch_msg(struct pollfd *, struct peer *);
84 void	session_process_msg(struct peer *);
85 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
86 int	parse_open(struct peer *);
87 int	parse_update(struct peer *);
88 int	parse_refresh(struct peer *);
89 int	parse_notification(struct peer *);
90 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
91 int	capa_neg_calc(struct peer *);
92 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
93 void	session_up(struct peer *);
94 void	session_down(struct peer *);
95 int	imsg_rde(int, u_int32_t, void *, u_int16_t);
96 void	session_demote(struct peer *, int);
97 void	merge_peers(struct bgpd_config *, struct bgpd_config *);
98 
99 int		 la_cmp(struct listen_addr *, struct listen_addr *);
100 void		 session_template_clone(struct peer *, struct sockaddr *,
101 		    u_int32_t, u_int32_t);
102 int		 session_match_mask(struct peer *, struct bgpd_addr *);
103 
104 struct bgpd_config	*conf, *nconf;
105 struct bgpd_sysdep	 sysdep;
106 volatile sig_atomic_t	 session_quit;
107 int			 pending_reconf;
108 int			 csock = -1, rcsock = -1;
109 u_int			 peer_cnt;
110 struct imsgbuf		*ibuf_rde;
111 struct imsgbuf		*ibuf_rde_ctl;
112 struct imsgbuf		*ibuf_main;
113 
114 struct mrt_head		 mrthead;
115 time_t			 pauseaccept;
116 
117 void
118 session_sighdlr(int sig)
119 {
120 	switch (sig) {
121 	case SIGINT:
122 	case SIGTERM:
123 		session_quit = 1;
124 		break;
125 	}
126 }
127 
128 int
129 setup_listeners(u_int *la_cnt)
130 {
131 	int			 ttl = 255;
132 	int			 opt;
133 	struct listen_addr	*la;
134 	u_int			 cnt = 0;
135 
136 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
137 		la->reconf = RECONF_NONE;
138 		cnt++;
139 
140 		if (la->flags & LISTENER_LISTENING)
141 			continue;
142 
143 		if (la->fd == -1) {
144 			log_warn("cannot establish listener on %s: invalid fd",
145 			    log_sockaddr((struct sockaddr *)&la->sa,
146 			    la->sa_len));
147 			continue;
148 		}
149 
150 		opt = 1;
151 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
152 		    &opt, sizeof(opt)) == -1) {
153 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
154 				log_warnx("md5sig not available, disabling");
155 				sysdep.no_md5sig = 1;
156 			} else
157 				fatal("setsockopt TCP_MD5SIG");
158 		}
159 
160 		/* set ttl to 255 so that ttl-security works */
161 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
162 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
163 			log_warn("setup_listeners setsockopt TTL");
164 			continue;
165 		}
166 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
167 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
168 			log_warn("setup_listeners setsockopt hoplimit");
169 			continue;
170 		}
171 
172 		if (listen(la->fd, MAX_BACKLOG)) {
173 			close(la->fd);
174 			fatal("listen");
175 		}
176 
177 		la->flags |= LISTENER_LISTENING;
178 
179 		log_info("listening on %s",
180 		    log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
181 	}
182 
183 	*la_cnt = cnt;
184 
185 	return (0);
186 }
187 
188 void
189 session_main(int debug, int verbose)
190 {
191 	int			 timeout, pfkeysock;
192 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
193 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
194 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
195 	u_int			 new_cnt;
196 	struct passwd		*pw;
197 	struct peer		*p, **peer_l = NULL, *next;
198 	struct mrt		*m, *xm, **mrt_l = NULL;
199 	struct pollfd		*pfd = NULL;
200 	struct ctl_conn		*ctl_conn;
201 	struct listen_addr	*la;
202 	void			*newp;
203 	short			 events;
204 
205 	log_init(debug, LOG_DAEMON);
206 	log_setverbose(verbose);
207 
208 	bgpd_process = PROC_SE;
209 	log_procinit(log_procnames[bgpd_process]);
210 
211 	if ((pw = getpwnam(BGPD_USER)) == NULL)
212 		fatal(NULL);
213 
214 	if (chroot(pw->pw_dir) == -1)
215 		fatal("chroot");
216 	if (chdir("/") == -1)
217 		fatal("chdir(\"/\")");
218 
219 	setproctitle("session engine");
220 	pfkeysock = pfkey_init(&sysdep);
221 
222 	if (setgroups(1, &pw->pw_gid) ||
223 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
224 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
225 		fatal("can't drop privileges");
226 
227 	if (pledge("stdio inet recvfd", NULL) == -1)
228 		fatal("pledge");
229 
230 	signal(SIGTERM, session_sighdlr);
231 	signal(SIGINT, session_sighdlr);
232 	signal(SIGPIPE, SIG_IGN);
233 	signal(SIGHUP, SIG_IGN);
234 	signal(SIGALRM, SIG_IGN);
235 	signal(SIGUSR1, SIG_IGN);
236 
237 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
238 		fatal(NULL);
239 	imsg_init(ibuf_main, 3);
240 
241 	TAILQ_INIT(&ctl_conns);
242 	LIST_INIT(&mrthead);
243 	listener_cnt = 0;
244 	peer_cnt = 0;
245 	ctl_cnt = 0;
246 
247 	conf = new_config();
248 	log_info("session engine ready");
249 
250 	while (session_quit == 0) {
251 		/* check for peers to be initialized or deleted */
252 		if (!pending_reconf) {
253 			for (p = TAILQ_FIRST(&conf->peers); p != NULL;
254 			   p = next) {
255 				next = TAILQ_NEXT(p, entry);
256 				/* cloned peer that idled out? */
257 				if (p->template && (p->state == STATE_IDLE ||
258 				    p->state == STATE_ACTIVE) &&
259 				    time(NULL) - p->stats.last_updown >=
260 				    INTERVAL_HOLD_CLONED)
261 					p->reconf_action = RECONF_DELETE;
262 
263 				/* new peer that needs init? */
264 				if (p->state == STATE_NONE)
265 					init_peer(p);
266 
267 				/* reinit due? */
268 				if (p->reconf_action == RECONF_REINIT) {
269 					session_stop(p, ERR_CEASE_ADMIN_RESET);
270 					if (!p->conf.down)
271 						timer_set(p, Timer_IdleHold, 0);
272 				}
273 
274 				/* deletion due? */
275 				if (p->reconf_action == RECONF_DELETE) {
276 					if (p->demoted)
277 						session_demote(p, -1);
278 					p->conf.demote_group[0] = 0;
279 					session_stop(p, ERR_CEASE_PEER_UNCONF);
280 					log_peer_warnx(&p->conf, "removed");
281 					TAILQ_REMOVE(&conf->peers, p, entry);
282 					timer_remove_all(p);
283 					pfkey_remove(p);
284 					free(p);
285 					peer_cnt--;
286 					continue;
287 				}
288 				p->reconf_action = RECONF_NONE;
289 			}
290 		}
291 
292 		if (peer_cnt > peer_l_elms) {
293 			if ((newp = reallocarray(peer_l, peer_cnt,
294 			    sizeof(struct peer *))) == NULL) {
295 				/* panic for now  */
296 				log_warn("could not resize peer_l from %u -> %u"
297 				    " entries", peer_l_elms, peer_cnt);
298 				fatalx("exiting");
299 			}
300 			peer_l = newp;
301 			peer_l_elms = peer_cnt;
302 		}
303 
304 		mrt_cnt = 0;
305 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
306 			xm = LIST_NEXT(m, entry);
307 			if (m->state == MRT_STATE_REMOVE) {
308 				mrt_clean(m);
309 				LIST_REMOVE(m, entry);
310 				free(m);
311 				continue;
312 			}
313 			if (m->wbuf.queued)
314 				mrt_cnt++;
315 		}
316 
317 		if (mrt_cnt > mrt_l_elms) {
318 			if ((newp = reallocarray(mrt_l, mrt_cnt,
319 			    sizeof(struct mrt *))) == NULL) {
320 				/* panic for now  */
321 				log_warn("could not resize mrt_l from %u -> %u"
322 				    " entries", mrt_l_elms, mrt_cnt);
323 				fatalx("exiting");
324 			}
325 			mrt_l = newp;
326 			mrt_l_elms = mrt_cnt;
327 		}
328 
329 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
330 		    ctl_cnt + mrt_cnt;
331 		if (new_cnt > pfd_elms) {
332 			if ((newp = reallocarray(pfd, new_cnt,
333 			    sizeof(struct pollfd))) == NULL) {
334 				/* panic for now  */
335 				log_warn("could not resize pfd from %u -> %u"
336 				    " entries", pfd_elms, new_cnt);
337 				fatalx("exiting");
338 			}
339 			pfd = newp;
340 			pfd_elms = new_cnt;
341 		}
342 
343 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
344 
345 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
346 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
347 		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
348 
349 		if (pauseaccept == 0) {
350 			pfd[PFD_SOCK_CTL].fd = csock;
351 			pfd[PFD_SOCK_CTL].events = POLLIN;
352 			pfd[PFD_SOCK_RCTL].fd = rcsock;
353 			pfd[PFD_SOCK_RCTL].events = POLLIN;
354 		} else {
355 			pfd[PFD_SOCK_CTL].fd = -1;
356 			pfd[PFD_SOCK_RCTL].fd = -1;
357 		}
358 		pfd[PFD_SOCK_PFKEY].fd = pfkeysock;
359 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
360 
361 		i = PFD_LISTENERS_START;
362 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
363 			if (pauseaccept == 0) {
364 				pfd[i].fd = la->fd;
365 				pfd[i].events = POLLIN;
366 			} else
367 				pfd[i].fd = -1;
368 			i++;
369 		}
370 		idx_listeners = i;
371 		timeout = 240;	/* loop every 240s at least */
372 
373 		TAILQ_FOREACH(p, &conf->peers, entry) {
374 			time_t	nextaction;
375 			struct peer_timer *pt;
376 
377 			/* check timers */
378 			if ((pt = timer_nextisdue(p)) != NULL) {
379 				switch (pt->type) {
380 				case Timer_Hold:
381 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
382 					break;
383 				case Timer_ConnectRetry:
384 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
385 					break;
386 				case Timer_Keepalive:
387 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
388 					break;
389 				case Timer_IdleHold:
390 					bgp_fsm(p, EVNT_START);
391 					break;
392 				case Timer_IdleHoldReset:
393 					p->IdleHoldTime /= 2;
394 					if (p->IdleHoldTime <=
395 					    INTERVAL_IDLE_HOLD_INITIAL) {
396 						p->IdleHoldTime =
397 						    INTERVAL_IDLE_HOLD_INITIAL;
398 						timer_stop(p,
399 						    Timer_IdleHoldReset);
400 						p->errcnt = 0;
401 					} else
402 						timer_set(p,
403 						    Timer_IdleHoldReset,
404 						    p->IdleHoldTime);
405 					break;
406 				case Timer_CarpUndemote:
407 					timer_stop(p, Timer_CarpUndemote);
408 					if (p->demoted &&
409 					    p->state == STATE_ESTABLISHED)
410 						session_demote(p, -1);
411 					break;
412 				case Timer_RestartTimeout:
413 					timer_stop(p, Timer_RestartTimeout);
414 					session_graceful_stop(p);
415 					break;
416 				default:
417 					fatalx("King Bula lost in time");
418 				}
419 			}
420 			if ((nextaction = timer_nextduein(p)) != -1 &&
421 			    nextaction < timeout)
422 				timeout = nextaction;
423 
424 			/* are we waiting for a write? */
425 			events = POLLIN;
426 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
427 				events |= POLLOUT;
428 			/* is there still work to do? */
429 			if (p->rpending)
430 				timeout = 0;
431 
432 			/* poll events */
433 			if (p->fd != -1 && events != 0) {
434 				pfd[i].fd = p->fd;
435 				pfd[i].events = events;
436 				peer_l[i - idx_listeners] = p;
437 				i++;
438 			}
439 		}
440 
441 		idx_peers = i;
442 
443 		LIST_FOREACH(m, &mrthead, entry)
444 			if (m->wbuf.queued) {
445 				pfd[i].fd = m->wbuf.fd;
446 				pfd[i].events = POLLOUT;
447 				mrt_l[i - idx_peers] = m;
448 				i++;
449 			}
450 
451 		idx_mrts = i;
452 
453 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
454 			pfd[i].fd = ctl_conn->ibuf.fd;
455 			pfd[i].events = POLLIN;
456 			if (ctl_conn->ibuf.w.queued > 0)
457 				pfd[i].events |= POLLOUT;
458 			i++;
459 		}
460 
461 		if (pauseaccept && timeout > 1)
462 			timeout = 1;
463 		if (timeout < 0)
464 			timeout = 0;
465 		if (poll(pfd, i, timeout * 1000) == -1)
466 			if (errno != EINTR)
467 				fatal("poll error");
468 
469 		/*
470 		 * If we previously saw fd exhaustion, we stop accept()
471 		 * for 1 second to throttle the accept() loop.
472 		 */
473 		if (pauseaccept && getmonotime() > pauseaccept + 1)
474 			pauseaccept = 0;
475 
476 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
477 			log_warnx("SE: Lost connection to parent");
478 			session_quit = 1;
479 			continue;
480 		} else
481 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
482 			    &listener_cnt);
483 
484 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
485 			log_warnx("SE: Lost connection to RDE");
486 			msgbuf_clear(&ibuf_rde->w);
487 			free(ibuf_rde);
488 			ibuf_rde = NULL;
489 		} else
490 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
491 			    &listener_cnt);
492 
493 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
494 		    -1) {
495 			log_warnx("SE: Lost connection to RDE control");
496 			msgbuf_clear(&ibuf_rde_ctl->w);
497 			free(ibuf_rde_ctl);
498 			ibuf_rde_ctl = NULL;
499 		} else
500 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
501 			    &listener_cnt);
502 
503 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
504 			ctl_cnt += control_accept(csock, 0);
505 
506 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
507 			ctl_cnt += control_accept(rcsock, 1);
508 
509 		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
510 			if (pfkey_read(pfkeysock, NULL) == -1) {
511 				log_warnx("pfkey_read failed, exiting...");
512 				session_quit = 1;
513 			}
514 		}
515 
516 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
517 			if (pfd[j].revents & POLLIN)
518 				session_accept(pfd[j].fd);
519 
520 		for (; j < idx_peers; j++)
521 			session_dispatch_msg(&pfd[j],
522 			    peer_l[j - idx_listeners]);
523 
524 		TAILQ_FOREACH(p, &conf->peers, entry)
525 			if (p->rbuf && p->rbuf->wpos)
526 				session_process_msg(p);
527 
528 		for (; j < idx_mrts; j++)
529 			if (pfd[j].revents & POLLOUT)
530 				mrt_write(mrt_l[j - idx_peers]);
531 
532 		for (; j < i; j++)
533 			control_dispatch_msg(&pfd[j], &ctl_cnt, &conf->peers);
534 	}
535 
536 	while ((p = TAILQ_FIRST(&conf->peers)) != NULL) {
537 		TAILQ_REMOVE(&conf->peers, p, entry);
538 		strlcpy(p->conf.shutcomm,
539 		    "bgpd shutting down",
540 		    sizeof(p->conf.shutcomm));
541 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
542 		timer_remove_all(p);
543 		pfkey_remove(p);
544 		free(p);
545 	}
546 
547 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
548 		mrt_clean(m);
549 		LIST_REMOVE(m, entry);
550 		free(m);
551 	}
552 
553 	free_config(conf);
554 	free(peer_l);
555 	free(mrt_l);
556 	free(pfd);
557 
558 	/* close pipes */
559 	if (ibuf_rde) {
560 		msgbuf_write(&ibuf_rde->w);
561 		msgbuf_clear(&ibuf_rde->w);
562 		close(ibuf_rde->fd);
563 		free(ibuf_rde);
564 	}
565 	if (ibuf_rde_ctl) {
566 		msgbuf_clear(&ibuf_rde_ctl->w);
567 		close(ibuf_rde_ctl->fd);
568 		free(ibuf_rde_ctl);
569 	}
570 	msgbuf_write(&ibuf_main->w);
571 	msgbuf_clear(&ibuf_main->w);
572 	close(ibuf_main->fd);
573 	free(ibuf_main);
574 
575 	control_shutdown(csock);
576 	control_shutdown(rcsock);
577 	log_info("session engine exiting");
578 	exit(0);
579 }
580 
581 void
582 init_peer(struct peer *p)
583 {
584 	TAILQ_INIT(&p->timers);
585 	p->fd = p->wbuf.fd = -1;
586 
587 	if (p->conf.if_depend[0])
588 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
589 		    p->conf.if_depend, sizeof(p->conf.if_depend));
590 	else
591 		p->depend_ok = 1;
592 
593 	peer_cnt++;
594 
595 	change_state(p, STATE_IDLE, EVNT_NONE);
596 	if (p->conf.down)
597 		timer_stop(p, Timer_IdleHold);		/* no autostart */
598 	else
599 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
600 
601 	/*
602 	 * on startup, demote if requested.
603 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
604 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
605 	 */
606 	if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
607 		session_demote(p, +1);
608 }
609 
610 void
611 bgp_fsm(struct peer *peer, enum session_events event)
612 {
613 	switch (peer->state) {
614 	case STATE_NONE:
615 		/* nothing */
616 		break;
617 	case STATE_IDLE:
618 		switch (event) {
619 		case EVNT_START:
620 			timer_stop(peer, Timer_Hold);
621 			timer_stop(peer, Timer_Keepalive);
622 			timer_stop(peer, Timer_IdleHold);
623 
624 			/* allocate read buffer */
625 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
626 			if (peer->rbuf == NULL)
627 				fatal(NULL);
628 
629 			/* init write buffer */
630 			msgbuf_init(&peer->wbuf);
631 
632 			/* init pfkey - remove old if any, load new ones */
633 			pfkey_remove(peer);
634 			if (pfkey_establish(peer) == -1) {
635 				log_peer_warnx(&peer->conf,
636 				    "pfkey setup failed");
637 				return;
638 			}
639 
640 			peer->stats.last_sent_errcode = 0;
641 			peer->stats.last_sent_suberr = 0;
642 
643 			if (!peer->depend_ok)
644 				timer_stop(peer, Timer_ConnectRetry);
645 			else if (peer->passive || peer->conf.passive ||
646 			    peer->conf.template) {
647 				change_state(peer, STATE_ACTIVE, event);
648 				timer_stop(peer, Timer_ConnectRetry);
649 			} else {
650 				change_state(peer, STATE_CONNECT, event);
651 				timer_set(peer, Timer_ConnectRetry,
652 				    conf->connectretry);
653 				session_connect(peer);
654 			}
655 			peer->passive = 0;
656 			break;
657 		default:
658 			/* ignore */
659 			break;
660 		}
661 		break;
662 	case STATE_CONNECT:
663 		switch (event) {
664 		case EVNT_START:
665 			/* ignore */
666 			break;
667 		case EVNT_CON_OPEN:
668 			session_tcp_established(peer);
669 			session_open(peer);
670 			timer_stop(peer, Timer_ConnectRetry);
671 			peer->holdtime = INTERVAL_HOLD_INITIAL;
672 			start_timer_holdtime(peer);
673 			change_state(peer, STATE_OPENSENT, event);
674 			break;
675 		case EVNT_CON_OPENFAIL:
676 			timer_set(peer, Timer_ConnectRetry,
677 			    conf->connectretry);
678 			session_close_connection(peer);
679 			change_state(peer, STATE_ACTIVE, event);
680 			break;
681 		case EVNT_TIMER_CONNRETRY:
682 			timer_set(peer, Timer_ConnectRetry,
683 			    conf->connectretry);
684 			session_connect(peer);
685 			break;
686 		default:
687 			change_state(peer, STATE_IDLE, event);
688 			break;
689 		}
690 		break;
691 	case STATE_ACTIVE:
692 		switch (event) {
693 		case EVNT_START:
694 			/* ignore */
695 			break;
696 		case EVNT_CON_OPEN:
697 			session_tcp_established(peer);
698 			session_open(peer);
699 			timer_stop(peer, Timer_ConnectRetry);
700 			peer->holdtime = INTERVAL_HOLD_INITIAL;
701 			start_timer_holdtime(peer);
702 			change_state(peer, STATE_OPENSENT, event);
703 			break;
704 		case EVNT_CON_OPENFAIL:
705 			timer_set(peer, Timer_ConnectRetry,
706 			    conf->connectretry);
707 			session_close_connection(peer);
708 			change_state(peer, STATE_ACTIVE, event);
709 			break;
710 		case EVNT_TIMER_CONNRETRY:
711 			timer_set(peer, Timer_ConnectRetry,
712 			    peer->holdtime);
713 			change_state(peer, STATE_CONNECT, event);
714 			session_connect(peer);
715 			break;
716 		default:
717 			change_state(peer, STATE_IDLE, event);
718 			break;
719 		}
720 		break;
721 	case STATE_OPENSENT:
722 		switch (event) {
723 		case EVNT_START:
724 			/* ignore */
725 			break;
726 		case EVNT_STOP:
727 			change_state(peer, STATE_IDLE, event);
728 			break;
729 		case EVNT_CON_CLOSED:
730 			session_close_connection(peer);
731 			timer_set(peer, Timer_ConnectRetry,
732 			    conf->connectretry);
733 			change_state(peer, STATE_ACTIVE, event);
734 			break;
735 		case EVNT_CON_FATAL:
736 			change_state(peer, STATE_IDLE, event);
737 			break;
738 		case EVNT_TIMER_HOLDTIME:
739 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
740 			    0, NULL, 0);
741 			change_state(peer, STATE_IDLE, event);
742 			break;
743 		case EVNT_RCVD_OPEN:
744 			/* parse_open calls change_state itself on failure */
745 			if (parse_open(peer))
746 				break;
747 			session_keepalive(peer);
748 			change_state(peer, STATE_OPENCONFIRM, event);
749 			break;
750 		case EVNT_RCVD_NOTIFICATION:
751 			if (parse_notification(peer)) {
752 				change_state(peer, STATE_IDLE, event);
753 				/* don't punish, capa negotiation */
754 				timer_set(peer, Timer_IdleHold, 0);
755 				peer->IdleHoldTime /= 2;
756 			} else
757 				change_state(peer, STATE_IDLE, event);
758 			break;
759 		default:
760 			session_notification(peer,
761 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
762 			change_state(peer, STATE_IDLE, event);
763 			break;
764 		}
765 		break;
766 	case STATE_OPENCONFIRM:
767 		switch (event) {
768 		case EVNT_START:
769 			/* ignore */
770 			break;
771 		case EVNT_STOP:
772 			change_state(peer, STATE_IDLE, event);
773 			break;
774 		case EVNT_CON_CLOSED:
775 		case EVNT_CON_FATAL:
776 			change_state(peer, STATE_IDLE, event);
777 			break;
778 		case EVNT_TIMER_HOLDTIME:
779 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
780 			    0, NULL, 0);
781 			change_state(peer, STATE_IDLE, event);
782 			break;
783 		case EVNT_TIMER_KEEPALIVE:
784 			session_keepalive(peer);
785 			break;
786 		case EVNT_RCVD_KEEPALIVE:
787 			start_timer_holdtime(peer);
788 			change_state(peer, STATE_ESTABLISHED, event);
789 			break;
790 		case EVNT_RCVD_NOTIFICATION:
791 			parse_notification(peer);
792 			change_state(peer, STATE_IDLE, event);
793 			break;
794 		default:
795 			session_notification(peer,
796 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
797 			change_state(peer, STATE_IDLE, event);
798 			break;
799 		}
800 		break;
801 	case STATE_ESTABLISHED:
802 		switch (event) {
803 		case EVNT_START:
804 			/* ignore */
805 			break;
806 		case EVNT_STOP:
807 			change_state(peer, STATE_IDLE, event);
808 			break;
809 		case EVNT_CON_CLOSED:
810 		case EVNT_CON_FATAL:
811 			change_state(peer, STATE_IDLE, event);
812 			break;
813 		case EVNT_TIMER_HOLDTIME:
814 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
815 			    0, NULL, 0);
816 			change_state(peer, STATE_IDLE, event);
817 			break;
818 		case EVNT_TIMER_KEEPALIVE:
819 			session_keepalive(peer);
820 			break;
821 		case EVNT_RCVD_KEEPALIVE:
822 			start_timer_holdtime(peer);
823 			break;
824 		case EVNT_RCVD_UPDATE:
825 			start_timer_holdtime(peer);
826 			if (parse_update(peer))
827 				change_state(peer, STATE_IDLE, event);
828 			else
829 				start_timer_holdtime(peer);
830 			break;
831 		case EVNT_RCVD_NOTIFICATION:
832 			parse_notification(peer);
833 			change_state(peer, STATE_IDLE, event);
834 			break;
835 		default:
836 			session_notification(peer,
837 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
838 			change_state(peer, STATE_IDLE, event);
839 			break;
840 		}
841 		break;
842 	}
843 }
844 
845 void
846 start_timer_holdtime(struct peer *peer)
847 {
848 	if (peer->holdtime > 0)
849 		timer_set(peer, Timer_Hold, peer->holdtime);
850 	else
851 		timer_stop(peer, Timer_Hold);
852 }
853 
854 void
855 start_timer_keepalive(struct peer *peer)
856 {
857 	if (peer->holdtime > 0)
858 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
859 	else
860 		timer_stop(peer, Timer_Keepalive);
861 }
862 
863 void
864 session_close_connection(struct peer *peer)
865 {
866 	if (peer->fd != -1) {
867 		close(peer->fd);
868 		pauseaccept = 0;
869 	}
870 	peer->fd = peer->wbuf.fd = -1;
871 }
872 
873 void
874 change_state(struct peer *peer, enum session_state state,
875     enum session_events event)
876 {
877 	struct mrt	*mrt;
878 
879 	switch (state) {
880 	case STATE_IDLE:
881 		/* carp demotion first. new peers handled in init_peer */
882 		if (peer->state == STATE_ESTABLISHED &&
883 		    peer->conf.demote_group[0] && !peer->demoted)
884 			session_demote(peer, +1);
885 
886 		/*
887 		 * try to write out what's buffered (maybe a notification),
888 		 * don't bother if it fails
889 		 */
890 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
891 			msgbuf_write(&peer->wbuf);
892 
893 		/*
894 		 * we must start the timer for the next EVNT_START
895 		 * if we are coming here due to an error and the
896 		 * session was not established successfully before, the
897 		 * starttimerinterval needs to be exponentially increased
898 		 */
899 		if (peer->IdleHoldTime == 0)
900 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
901 		peer->holdtime = INTERVAL_HOLD_INITIAL;
902 		timer_stop(peer, Timer_ConnectRetry);
903 		timer_stop(peer, Timer_Keepalive);
904 		timer_stop(peer, Timer_Hold);
905 		timer_stop(peer, Timer_IdleHold);
906 		timer_stop(peer, Timer_IdleHoldReset);
907 		session_close_connection(peer);
908 		msgbuf_clear(&peer->wbuf);
909 		free(peer->rbuf);
910 		peer->rbuf = NULL;
911 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
912 
913 		if (event != EVNT_STOP) {
914 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
915 			if (event != EVNT_NONE &&
916 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
917 				peer->IdleHoldTime *= 2;
918 		}
919 		if (peer->state == STATE_ESTABLISHED) {
920 			if (peer->capa.neg.grestart.restart == 2 &&
921 			    (event == EVNT_CON_CLOSED ||
922 			    event == EVNT_CON_FATAL)) {
923 				/* don't punish graceful restart */
924 				timer_set(peer, Timer_IdleHold, 0);
925 				peer->IdleHoldTime /= 2;
926 				session_graceful_restart(peer);
927 			} else
928 				session_down(peer);
929 		}
930 		if (peer->state == STATE_NONE ||
931 		    peer->state == STATE_ESTABLISHED) {
932 			/* initialize capability negotiation structures */
933 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
934 			    sizeof(peer->capa.ann));
935 			if (!peer->conf.announce_capa)
936 				session_capa_ann_none(peer);
937 		}
938 		break;
939 	case STATE_CONNECT:
940 		if (peer->state == STATE_ESTABLISHED &&
941 		    peer->capa.neg.grestart.restart == 2) {
942 			/* do the graceful restart dance */
943 			session_graceful_restart(peer);
944 			peer->holdtime = INTERVAL_HOLD_INITIAL;
945 			timer_stop(peer, Timer_ConnectRetry);
946 			timer_stop(peer, Timer_Keepalive);
947 			timer_stop(peer, Timer_Hold);
948 			timer_stop(peer, Timer_IdleHold);
949 			timer_stop(peer, Timer_IdleHoldReset);
950 			session_close_connection(peer);
951 			msgbuf_clear(&peer->wbuf);
952 			bzero(&peer->capa.peer, sizeof(peer->capa.peer));
953 		}
954 		break;
955 	case STATE_ACTIVE:
956 		break;
957 	case STATE_OPENSENT:
958 		break;
959 	case STATE_OPENCONFIRM:
960 		break;
961 	case STATE_ESTABLISHED:
962 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
963 		if (peer->demoted)
964 			timer_set(peer, Timer_CarpUndemote,
965 			    INTERVAL_HOLD_DEMOTED);
966 		session_up(peer);
967 		break;
968 	default:		/* something seriously fucked */
969 		break;
970 	}
971 
972 	log_statechange(peer, state, event);
973 	LIST_FOREACH(mrt, &mrthead, entry) {
974 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
975 			continue;
976 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
977 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
978 		    mrt->group_id == peer->conf.groupid))
979 			mrt_dump_state(mrt, peer->state, state, peer);
980 	}
981 	peer->prev_state = peer->state;
982 	peer->state = state;
983 }
984 
985 void
986 session_accept(int listenfd)
987 {
988 	int			 connfd;
989 	int			 opt;
990 	socklen_t		 len;
991 	struct sockaddr_storage	 cliaddr;
992 	struct peer		*p = NULL;
993 
994 	len = sizeof(cliaddr);
995 	if ((connfd = accept4(listenfd,
996 	    (struct sockaddr *)&cliaddr, &len,
997 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
998 		if (errno == ENFILE || errno == EMFILE)
999 			pauseaccept = getmonotime();
1000 		else if (errno != EWOULDBLOCK && errno != EINTR &&
1001 		    errno != ECONNABORTED)
1002 			log_warn("accept");
1003 		return;
1004 	}
1005 
1006 	p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
1007 
1008 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1009 		if (timer_running(p, Timer_IdleHold, NULL)) {
1010 			/* fast reconnect after clear */
1011 			p->passive = 1;
1012 			bgp_fsm(p, EVNT_START);
1013 		}
1014 	}
1015 
1016 	if (p != NULL &&
1017 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1018 		if (p->fd != -1) {
1019 			if (p->state == STATE_CONNECT)
1020 				session_close_connection(p);
1021 			else {
1022 				close(connfd);
1023 				return;
1024 			}
1025 		}
1026 
1027 open:
1028 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1029 			log_peer_warnx(&p->conf,
1030 			    "ipsec or md5sig configured but not available");
1031 			close(connfd);
1032 			return;
1033 		}
1034 
1035 		if (p->conf.auth.method == AUTH_MD5SIG) {
1036 			if (sysdep.no_md5sig) {
1037 				log_peer_warnx(&p->conf,
1038 				    "md5sig configured but not available");
1039 				close(connfd);
1040 				return;
1041 			}
1042 			len = sizeof(opt);
1043 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1044 			    &opt, &len) == -1)
1045 				fatal("getsockopt TCP_MD5SIG");
1046 			if (!opt) {	/* non-md5'd connection! */
1047 				log_peer_warnx(&p->conf,
1048 				    "connection attempt without md5 signature");
1049 				close(connfd);
1050 				return;
1051 			}
1052 		}
1053 		p->fd = p->wbuf.fd = connfd;
1054 		if (session_setup_socket(p)) {
1055 			close(connfd);
1056 			return;
1057 		}
1058 		bgp_fsm(p, EVNT_CON_OPEN);
1059 		return;
1060 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1061 	    p->capa.neg.grestart.restart == 2) {
1062 		/* first do the graceful restart dance */
1063 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1064 		/* then do part of the open dance */
1065 		goto open;
1066 	} else {
1067 		log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1068 		close(connfd);
1069 	}
1070 }
1071 
1072 int
1073 session_connect(struct peer *peer)
1074 {
1075 	int			 opt = 1;
1076 	struct sockaddr		*sa;
1077 	socklen_t		 sa_len;
1078 
1079 	/*
1080 	 * we do not need the overcomplicated collision detection RFC 1771
1081 	 * describes; we simply make sure there is only ever one concurrent
1082 	 * tcp connection per peer.
1083 	 */
1084 	if (peer->fd != -1)
1085 		return (-1);
1086 
1087 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1088 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1089 		log_peer_warn(&peer->conf, "session_connect socket");
1090 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1091 		return (-1);
1092 	}
1093 
1094 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1095 		log_peer_warnx(&peer->conf,
1096 		    "ipsec or md5sig configured but not available");
1097 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1098 		return (-1);
1099 	}
1100 
1101 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1102 		if (sysdep.no_md5sig) {
1103 			log_peer_warnx(&peer->conf,
1104 			    "md5sig configured but not available");
1105 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1106 			return (-1);
1107 		}
1108 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1109 		    &opt, sizeof(opt)) == -1) {
1110 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1111 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1112 			return (-1);
1113 		}
1114 	}
1115 	peer->wbuf.fd = peer->fd;
1116 
1117 	/* if update source is set we need to bind() */
1118 	if ((sa = addr2sa(&peer->conf.local_addr, 0, &sa_len)) != NULL) {
1119 		if (bind(peer->fd, sa, sa_len) == -1) {
1120 			log_peer_warn(&peer->conf, "session_connect bind");
1121 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1122 			return (-1);
1123 		}
1124 	}
1125 
1126 	if (session_setup_socket(peer)) {
1127 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1128 		return (-1);
1129 	}
1130 
1131 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT, &sa_len);
1132 	if (connect(peer->fd, sa, sa_len) == -1) {
1133 		if (errno != EINPROGRESS) {
1134 			if (errno != peer->lasterr)
1135 				log_peer_warn(&peer->conf, "connect");
1136 			peer->lasterr = errno;
1137 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1138 			return (-1);
1139 		}
1140 	} else
1141 		bgp_fsm(peer, EVNT_CON_OPEN);
1142 
1143 	return (0);
1144 }
1145 
1146 int
1147 session_setup_socket(struct peer *p)
1148 {
1149 	int	ttl = p->conf.distance;
1150 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1151 	int	nodelay = 1;
1152 	int	bsize;
1153 
1154 	switch (p->conf.remote_addr.aid) {
1155 	case AID_INET:
1156 		/* set precedence, see RFC 1771 appendix 5 */
1157 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1158 		    -1) {
1159 			log_peer_warn(&p->conf,
1160 			    "session_setup_socket setsockopt TOS");
1161 			return (-1);
1162 		}
1163 
1164 		if (p->conf.ebgp) {
1165 			/* set TTL to foreign router's distance
1166 			   1=direct n=multihop with ttlsec, we always use 255 */
1167 			if (p->conf.ttlsec) {
1168 				ttl = 256 - p->conf.distance;
1169 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1170 				    &ttl, sizeof(ttl)) == -1) {
1171 					log_peer_warn(&p->conf,
1172 					    "session_setup_socket: "
1173 					    "setsockopt MINTTL");
1174 					return (-1);
1175 				}
1176 				ttl = 255;
1177 			}
1178 
1179 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1180 			    sizeof(ttl)) == -1) {
1181 				log_peer_warn(&p->conf,
1182 				    "session_setup_socket setsockopt TTL");
1183 				return (-1);
1184 			}
1185 		}
1186 		break;
1187 	case AID_INET6:
1188 		if (p->conf.ebgp) {
1189 			/* set hoplimit to foreign router's distance
1190 			   1=direct n=multihop with ttlsec, we always use 255 */
1191 			if (p->conf.ttlsec) {
1192 				ttl = 256 - p->conf.distance;
1193 				if (setsockopt(p->fd, IPPROTO_IPV6,
1194 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1195 				    == -1) {
1196 					log_peer_warn(&p->conf,
1197 					    "session_setup_socket: "
1198 					    "setsockopt MINHOPCOUNT");
1199 					return (-1);
1200 				}
1201 				ttl = 255;
1202 			}
1203 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1204 			    &ttl, sizeof(ttl)) == -1) {
1205 				log_peer_warn(&p->conf,
1206 				    "session_setup_socket setsockopt hoplimit");
1207 				return (-1);
1208 			}
1209 		}
1210 		break;
1211 	}
1212 
1213 	/* set TCP_NODELAY */
1214 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1215 	    sizeof(nodelay)) == -1) {
1216 		log_peer_warn(&p->conf,
1217 		    "session_setup_socket setsockopt TCP_NODELAY");
1218 		return (-1);
1219 	}
1220 
1221 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1222 	if (p->conf.auth.method != AUTH_NONE) {
1223 		/* try to increase bufsize. no biggie if it fails */
1224 		bsize = 65535;
1225 		while (bsize > 8192 &&
1226 		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1227 		    sizeof(bsize)) == -1 && errno != EINVAL)
1228 			bsize /= 2;
1229 		bsize = 65535;
1230 		while (bsize > 8192 &&
1231 		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1232 		    sizeof(bsize)) == -1 && errno != EINVAL)
1233 			bsize /= 2;
1234 	}
1235 
1236 	return (0);
1237 }
1238 
1239 void
1240 session_tcp_established(struct peer *peer)
1241 {
1242 	struct sockaddr_storage	ss;
1243 	socklen_t		len;
1244 
1245 	len = sizeof(ss);
1246 	if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1247 		log_warn("getsockname");
1248 	sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1249 	len = sizeof(ss);
1250 	if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1251 		log_warn("getpeername");
1252 	sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1253 }
1254 
1255 void
1256 session_capa_ann_none(struct peer *peer)
1257 {
1258 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1259 }
1260 
1261 int
1262 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
1263 {
1264 	int errs = 0;
1265 
1266 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1267 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1268 	return (errs);
1269 }
1270 
1271 int
1272 session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
1273 {
1274 	u_int8_t		 safi, pad = 0;
1275 	u_int16_t		 afi;
1276 	int			 errs = 0;
1277 
1278 	if (aid2afi(aid, &afi, &safi) == -1)
1279 		fatalx("session_capa_add_mp: bad afi/safi pair");
1280 	afi = htons(afi);
1281 	errs += ibuf_add(buf, &afi, sizeof(afi));
1282 	errs += ibuf_add(buf, &pad, sizeof(pad));
1283 	errs += ibuf_add(buf, &safi, sizeof(safi));
1284 
1285 	return (errs);
1286 }
1287 
1288 int
1289 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid)
1290 {
1291 	u_int		errs = 0;
1292 	u_int16_t	afi;
1293 	u_int8_t	flags, safi;
1294 
1295 	if (aid2afi(aid, &afi, &safi)) {
1296 		log_warn("session_capa_add_gr: bad AID");
1297 		return (1);
1298 	}
1299 	if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING)
1300 		flags = CAPA_GR_F_FLAG;
1301 	else
1302 		flags = 0;
1303 
1304 	afi = htons(afi);
1305 	errs += ibuf_add(b, &afi, sizeof(afi));
1306 	errs += ibuf_add(b, &safi, sizeof(safi));
1307 	errs += ibuf_add(b, &flags, sizeof(flags));
1308 
1309 	return (errs);
1310 }
1311 
1312 struct bgp_msg *
1313 session_newmsg(enum msg_type msgtype, u_int16_t len)
1314 {
1315 	struct bgp_msg		*msg;
1316 	struct msg_header	 hdr;
1317 	struct ibuf		*buf;
1318 	int			 errs = 0;
1319 
1320 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1321 	hdr.len = htons(len);
1322 	hdr.type = msgtype;
1323 
1324 	if ((buf = ibuf_open(len)) == NULL)
1325 		return (NULL);
1326 
1327 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1328 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1329 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1330 
1331 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1332 		ibuf_free(buf);
1333 		return (NULL);
1334 	}
1335 
1336 	msg->buf = buf;
1337 	msg->type = msgtype;
1338 	msg->len = len;
1339 
1340 	return (msg);
1341 }
1342 
1343 int
1344 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1345 {
1346 	struct mrt		*mrt;
1347 
1348 	LIST_FOREACH(mrt, &mrthead, entry) {
1349 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1350 		    mrt->type == MRT_UPDATE_OUT)))
1351 			continue;
1352 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1353 		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1354 		    mrt->group_id == p->conf.groupid))
1355 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1356 	}
1357 
1358 	ibuf_close(&p->wbuf, msg->buf);
1359 	if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) {
1360 		if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1361 			log_peer_warn(&p->conf, "imsg_compose XOFF");
1362 		else
1363 			p->throttled = 1;
1364 	}
1365 
1366 	free(msg);
1367 	return (0);
1368 }
1369 
1370 void
1371 session_open(struct peer *p)
1372 {
1373 	struct bgp_msg		*buf;
1374 	struct ibuf		*opb;
1375 	struct msg_open		 msg;
1376 	u_int16_t		 len;
1377 	u_int8_t		 i, op_type, optparamlen = 0;
1378 	int			 errs = 0;
1379 	int			 mpcapa = 0;
1380 
1381 
1382 	if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1383 	    sizeof(optparamlen))) == NULL) {
1384 		bgp_fsm(p, EVNT_CON_FATAL);
1385 		return;
1386 	}
1387 
1388 	/* multiprotocol extensions, RFC 4760 */
1389 	for (i = 0; i < AID_MAX; i++)
1390 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1391 			errs += session_capa_add(opb, CAPA_MP, 4);
1392 			errs += session_capa_add_mp(opb, i);
1393 			mpcapa++;
1394 		}
1395 
1396 	/* route refresh, RFC 2918 */
1397 	if (p->capa.ann.refresh)	/* no data */
1398 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1399 
1400 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1401 	if (p->capa.ann.grestart.restart) {
1402 		int		rst = 0;
1403 		u_int16_t	hdr;
1404 		u_int8_t	grlen;
1405 
1406 		if (mpcapa) {
1407 			grlen = 2 + 4 * mpcapa;
1408 			for (i = 0; i < AID_MAX; i++) {
1409 				if (p->capa.neg.grestart.flags[i] &
1410 				    CAPA_GR_RESTARTING)
1411 					rst++;
1412 			}
1413 		} else {	/* AID_INET */
1414 			grlen = 2 + 4;
1415 			if (p->capa.neg.grestart.flags[AID_INET] &
1416 			    CAPA_GR_RESTARTING)
1417 				rst++;
1418 		}
1419 
1420 		hdr = conf->holdtime;		/* default timeout */
1421 		/* if client does graceful restart don't set R flag */
1422 		if (!rst)
1423 			hdr |= CAPA_GR_R_FLAG;
1424 		hdr = htons(hdr);
1425 
1426 		errs += session_capa_add(opb, CAPA_RESTART, grlen);
1427 		errs += ibuf_add(opb, &hdr, sizeof(hdr));
1428 
1429 		if (mpcapa) {
1430 			for (i = 0; i < AID_MAX; i++) {
1431 				if (p->capa.ann.mp[i]) {
1432 					errs += session_capa_add_gr(p, opb, i);
1433 				}
1434 			}
1435 		} else {	/* AID_INET */
1436 			errs += session_capa_add_gr(p, opb, AID_INET);
1437 		}
1438 	}
1439 
1440 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1441 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1442 		u_int32_t	nas;
1443 
1444 		nas = htonl(p->conf.local_as);
1445 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1446 		errs += ibuf_add(opb, &nas, sizeof(nas));
1447 	}
1448 
1449 	if (ibuf_size(opb))
1450 		optparamlen = ibuf_size(opb) + sizeof(op_type) +
1451 		    sizeof(optparamlen);
1452 
1453 	len = MSGSIZE_OPEN_MIN + optparamlen;
1454 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1455 		ibuf_free(opb);
1456 		bgp_fsm(p, EVNT_CON_FATAL);
1457 		return;
1458 	}
1459 
1460 	msg.version = 4;
1461 	msg.myas = htons(p->conf.local_short_as);
1462 	if (p->conf.holdtime)
1463 		msg.holdtime = htons(p->conf.holdtime);
1464 	else
1465 		msg.holdtime = htons(conf->holdtime);
1466 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1467 	msg.optparamlen = optparamlen;
1468 
1469 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1470 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1471 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1472 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1473 	errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1474 
1475 	if (optparamlen) {
1476 		op_type = OPT_PARAM_CAPABILITIES;
1477 		optparamlen = ibuf_size(opb);
1478 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1479 		errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1480 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1481 	}
1482 
1483 	ibuf_free(opb);
1484 
1485 	if (errs) {
1486 		ibuf_free(buf->buf);
1487 		free(buf);
1488 		bgp_fsm(p, EVNT_CON_FATAL);
1489 		return;
1490 	}
1491 
1492 	if (session_sendmsg(buf, p) == -1) {
1493 		bgp_fsm(p, EVNT_CON_FATAL);
1494 		return;
1495 	}
1496 
1497 	p->stats.msg_sent_open++;
1498 }
1499 
1500 void
1501 session_keepalive(struct peer *p)
1502 {
1503 	struct bgp_msg		*buf;
1504 
1505 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1506 	    session_sendmsg(buf, p) == -1) {
1507 		bgp_fsm(p, EVNT_CON_FATAL);
1508 		return;
1509 	}
1510 
1511 	start_timer_keepalive(p);
1512 	p->stats.msg_sent_keepalive++;
1513 }
1514 
1515 void
1516 session_update(u_int32_t peerid, void *data, size_t datalen)
1517 {
1518 	struct peer		*p;
1519 	struct bgp_msg		*buf;
1520 
1521 	if ((p = getpeerbyid(conf, peerid)) == NULL) {
1522 		log_warnx("no such peer: id=%u", peerid);
1523 		return;
1524 	}
1525 
1526 	if (p->state != STATE_ESTABLISHED)
1527 		return;
1528 
1529 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1530 		bgp_fsm(p, EVNT_CON_FATAL);
1531 		return;
1532 	}
1533 
1534 	if (ibuf_add(buf->buf, data, datalen)) {
1535 		ibuf_free(buf->buf);
1536 		free(buf);
1537 		bgp_fsm(p, EVNT_CON_FATAL);
1538 		return;
1539 	}
1540 
1541 	if (session_sendmsg(buf, p) == -1) {
1542 		bgp_fsm(p, EVNT_CON_FATAL);
1543 		return;
1544 	}
1545 
1546 	start_timer_keepalive(p);
1547 	p->stats.msg_sent_update++;
1548 }
1549 
1550 void
1551 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1552     void *data, ssize_t datalen)
1553 {
1554 	struct bgp_msg		*buf;
1555 	int			 errs = 0;
1556 
1557 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1558 		return;
1559 
1560 	log_notification(p, errcode, subcode, data, datalen, "sending");
1561 
1562 	if ((buf = session_newmsg(NOTIFICATION,
1563 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1564 		bgp_fsm(p, EVNT_CON_FATAL);
1565 		return;
1566 	}
1567 
1568 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1569 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1570 
1571 	if (datalen > 0)
1572 		errs += ibuf_add(buf->buf, data, datalen);
1573 
1574 	if (errs) {
1575 		ibuf_free(buf->buf);
1576 		free(buf);
1577 		bgp_fsm(p, EVNT_CON_FATAL);
1578 		return;
1579 	}
1580 
1581 	if (session_sendmsg(buf, p) == -1) {
1582 		bgp_fsm(p, EVNT_CON_FATAL);
1583 		return;
1584 	}
1585 
1586 	p->stats.msg_sent_notification++;
1587 	p->stats.last_sent_errcode = errcode;
1588 	p->stats.last_sent_suberr = subcode;
1589 }
1590 
1591 int
1592 session_neighbor_rrefresh(struct peer *p)
1593 {
1594 	u_int8_t	i;
1595 
1596 	if (!p->capa.peer.refresh)
1597 		return (-1);
1598 
1599 	for (i = 0; i < AID_MAX; i++) {
1600 		if (p->capa.peer.mp[i] != 0)
1601 			session_rrefresh(p, i);
1602 	}
1603 
1604 	return (0);
1605 }
1606 
1607 void
1608 session_rrefresh(struct peer *p, u_int8_t aid)
1609 {
1610 	struct bgp_msg		*buf;
1611 	int			 errs = 0;
1612 	u_int16_t		 afi;
1613 	u_int8_t		 safi, null8 = 0;
1614 
1615 	if (aid2afi(aid, &afi, &safi) == -1)
1616 		fatalx("session_rrefresh: bad afi/safi pair");
1617 
1618 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1619 		bgp_fsm(p, EVNT_CON_FATAL);
1620 		return;
1621 	}
1622 
1623 	afi = htons(afi);
1624 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1625 	errs += ibuf_add(buf->buf, &null8, sizeof(null8));
1626 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1627 
1628 	if (errs) {
1629 		ibuf_free(buf->buf);
1630 		free(buf);
1631 		bgp_fsm(p, EVNT_CON_FATAL);
1632 		return;
1633 	}
1634 
1635 	if (session_sendmsg(buf, p) == -1) {
1636 		bgp_fsm(p, EVNT_CON_FATAL);
1637 		return;
1638 	}
1639 
1640 	p->stats.msg_sent_rrefresh++;
1641 }
1642 
1643 int
1644 session_graceful_restart(struct peer *p)
1645 {
1646 	u_int8_t	i;
1647 
1648 	timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout);
1649 
1650 	for (i = 0; i < AID_MAX; i++) {
1651 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1652 			if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1653 			    &i, sizeof(i)) == -1)
1654 				return (-1);
1655 			log_peer_warnx(&p->conf,
1656 			    "graceful restart of %s, keeping routes",
1657 			    aid2str(i));
1658 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1659 		} else if (p->capa.neg.mp[i]) {
1660 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1661 			    &i, sizeof(i)) == -1)
1662 				return (-1);
1663 			log_peer_warnx(&p->conf,
1664 			    "graceful restart of %s, flushing routes",
1665 			    aid2str(i));
1666 		}
1667 	}
1668 	return (0);
1669 }
1670 
1671 int
1672 session_graceful_stop(struct peer *p)
1673 {
1674 	u_int8_t	i;
1675 
1676 	for (i = 0; i < AID_MAX; i++) {
1677 		/*
1678 		 * Only flush if the peer is restarting and the timeout fired.
1679 		 * In all other cases the session was already flushed when the
1680 		 * session went down or when the new open message was parsed.
1681 		 */
1682 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1683 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1684 			    "time-out, flushing", aid2str(i));
1685 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1686 			    &i, sizeof(i)) == -1)
1687 				return (-1);
1688 		}
1689 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1690 	}
1691 	return (0);
1692 }
1693 
1694 int
1695 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1696 {
1697 	ssize_t		n;
1698 	socklen_t	len;
1699 	int		error;
1700 
1701 	if (p->state == STATE_CONNECT) {
1702 		if (pfd->revents & POLLOUT) {
1703 			if (pfd->revents & POLLIN) {
1704 				/* error occurred */
1705 				len = sizeof(error);
1706 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1707 				    &error, &len) == -1 || error) {
1708 					if (error)
1709 						errno = error;
1710 					if (errno != p->lasterr) {
1711 						log_peer_warn(&p->conf,
1712 						    "socket error");
1713 						p->lasterr = errno;
1714 					}
1715 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1716 					return (1);
1717 				}
1718 			}
1719 			bgp_fsm(p, EVNT_CON_OPEN);
1720 			return (1);
1721 		}
1722 		if (pfd->revents & POLLHUP) {
1723 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1724 			return (1);
1725 		}
1726 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1727 			bgp_fsm(p, EVNT_CON_FATAL);
1728 			return (1);
1729 		}
1730 		return (0);
1731 	}
1732 
1733 	if (pfd->revents & POLLHUP) {
1734 		bgp_fsm(p, EVNT_CON_CLOSED);
1735 		return (1);
1736 	}
1737 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1738 		bgp_fsm(p, EVNT_CON_FATAL);
1739 		return (1);
1740 	}
1741 
1742 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1743 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1744 			if (error == 0)
1745 				log_peer_warnx(&p->conf, "Connection closed");
1746 			else if (error == -1)
1747 				log_peer_warn(&p->conf, "write error");
1748 			bgp_fsm(p, EVNT_CON_FATAL);
1749 			return (1);
1750 		}
1751 		if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
1752 			if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
1753 				log_peer_warn(&p->conf, "imsg_compose XON");
1754 			else
1755 				p->throttled = 0;
1756 		}
1757 		if (!(pfd->revents & POLLIN))
1758 			return (1);
1759 	}
1760 
1761 	if (p->rbuf && pfd->revents & POLLIN) {
1762 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1763 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1764 			if (errno != EINTR && errno != EAGAIN) {
1765 				log_peer_warn(&p->conf, "read error");
1766 				bgp_fsm(p, EVNT_CON_FATAL);
1767 			}
1768 			return (1);
1769 		}
1770 		if (n == 0) {	/* connection closed */
1771 			bgp_fsm(p, EVNT_CON_CLOSED);
1772 			return (1);
1773 		}
1774 
1775 		p->rbuf->wpos += n;
1776 		p->stats.last_read = time(NULL);
1777 		return (1);
1778 	}
1779 	return (0);
1780 }
1781 
1782 void
1783 session_process_msg(struct peer *p)
1784 {
1785 	struct mrt	*mrt;
1786 	ssize_t		rpos, av, left;
1787 	int		processed = 0;
1788 	u_int16_t	msglen;
1789 	u_int8_t	msgtype;
1790 
1791 	rpos = 0;
1792 	av = p->rbuf->wpos;
1793 	p->rpending = 0;
1794 
1795 	/*
1796 	 * session might drop to IDLE -> buffers deallocated
1797 	 * we MUST check rbuf != NULL before use
1798 	 */
1799 	for (;;) {
1800 		if (p->rbuf == NULL)
1801 			return;
1802 		if (rpos + MSGSIZE_HEADER > av)
1803 			break;
1804 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1805 		    &msgtype) == -1)
1806 			return;
1807 		if (rpos + msglen > av)
1808 			break;
1809 		p->rbuf->rptr = p->rbuf->buf + rpos;
1810 
1811 		/* dump to MRT as soon as we have a full packet */
1812 		LIST_FOREACH(mrt, &mrthead, entry) {
1813 			if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE &&
1814 			    mrt->type == MRT_UPDATE_IN)))
1815 				continue;
1816 			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1817 			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1818 			    mrt->group_id == p->conf.groupid))
1819 				mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p);
1820 		}
1821 
1822 		switch (msgtype) {
1823 		case OPEN:
1824 			bgp_fsm(p, EVNT_RCVD_OPEN);
1825 			p->stats.msg_rcvd_open++;
1826 			break;
1827 		case UPDATE:
1828 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1829 			p->stats.msg_rcvd_update++;
1830 			break;
1831 		case NOTIFICATION:
1832 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1833 			p->stats.msg_rcvd_notification++;
1834 			break;
1835 		case KEEPALIVE:
1836 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1837 			p->stats.msg_rcvd_keepalive++;
1838 			break;
1839 		case RREFRESH:
1840 			parse_refresh(p);
1841 			p->stats.msg_rcvd_rrefresh++;
1842 			break;
1843 		default:	/* cannot happen */
1844 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
1845 			    &msgtype, 1);
1846 			log_warnx("received message with unknown type %u",
1847 			    msgtype);
1848 			bgp_fsm(p, EVNT_CON_FATAL);
1849 		}
1850 		rpos += msglen;
1851 		if (++processed > MSG_PROCESS_LIMIT) {
1852 			p->rpending = 1;
1853 			break;
1854 		}
1855 	}
1856 
1857 	if (rpos < av) {
1858 		left = av - rpos;
1859 		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1860 		p->rbuf->wpos = left;
1861 	} else
1862 		p->rbuf->wpos = 0;
1863 }
1864 
1865 int
1866 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1867 {
1868 	u_char			*p;
1869 	u_int16_t		 olen;
1870 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1871 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1872 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1873 
1874 	/* caller MUST make sure we are getting 19 bytes! */
1875 	p = data;
1876 	if (memcmp(p, marker, sizeof(marker))) {
1877 		log_peer_warnx(&peer->conf, "sync error");
1878 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1879 		bgp_fsm(peer, EVNT_CON_FATAL);
1880 		return (-1);
1881 	}
1882 	p += MSGSIZE_HEADER_MARKER;
1883 
1884 	memcpy(&olen, p, 2);
1885 	*len = ntohs(olen);
1886 	p += 2;
1887 	memcpy(type, p, 1);
1888 
1889 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1890 		log_peer_warnx(&peer->conf,
1891 		    "received message: illegal length: %u byte", *len);
1892 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1893 		    &olen, sizeof(olen));
1894 		bgp_fsm(peer, EVNT_CON_FATAL);
1895 		return (-1);
1896 	}
1897 
1898 	switch (*type) {
1899 	case OPEN:
1900 		if (*len < MSGSIZE_OPEN_MIN) {
1901 			log_peer_warnx(&peer->conf,
1902 			    "received OPEN: illegal len: %u byte", *len);
1903 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1904 			    &olen, sizeof(olen));
1905 			bgp_fsm(peer, EVNT_CON_FATAL);
1906 			return (-1);
1907 		}
1908 		break;
1909 	case NOTIFICATION:
1910 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1911 			log_peer_warnx(&peer->conf,
1912 			    "received NOTIFICATION: illegal len: %u byte",
1913 			    *len);
1914 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1915 			    &olen, sizeof(olen));
1916 			bgp_fsm(peer, EVNT_CON_FATAL);
1917 			return (-1);
1918 		}
1919 		break;
1920 	case UPDATE:
1921 		if (*len < MSGSIZE_UPDATE_MIN) {
1922 			log_peer_warnx(&peer->conf,
1923 			    "received UPDATE: illegal len: %u byte", *len);
1924 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1925 			    &olen, sizeof(olen));
1926 			bgp_fsm(peer, EVNT_CON_FATAL);
1927 			return (-1);
1928 		}
1929 		break;
1930 	case KEEPALIVE:
1931 		if (*len != MSGSIZE_KEEPALIVE) {
1932 			log_peer_warnx(&peer->conf,
1933 			    "received KEEPALIVE: illegal len: %u byte", *len);
1934 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1935 			    &olen, sizeof(olen));
1936 			bgp_fsm(peer, EVNT_CON_FATAL);
1937 			return (-1);
1938 		}
1939 		break;
1940 	case RREFRESH:
1941 		if (*len != MSGSIZE_RREFRESH) {
1942 			log_peer_warnx(&peer->conf,
1943 			    "received RREFRESH: illegal len: %u byte", *len);
1944 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1945 			    &olen, sizeof(olen));
1946 			bgp_fsm(peer, EVNT_CON_FATAL);
1947 			return (-1);
1948 		}
1949 		break;
1950 	default:
1951 		log_peer_warnx(&peer->conf,
1952 		    "received msg with unknown type %u", *type);
1953 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1954 		    type, 1);
1955 		bgp_fsm(peer, EVNT_CON_FATAL);
1956 		return (-1);
1957 	}
1958 	return (0);
1959 }
1960 
1961 int
1962 parse_open(struct peer *peer)
1963 {
1964 	u_char		*p, *op_val;
1965 	u_int8_t	 version, rversion;
1966 	u_int16_t	 short_as, msglen;
1967 	u_int16_t	 holdtime, oholdtime, myholdtime;
1968 	u_int32_t	 as, bgpid;
1969 	u_int8_t	 optparamlen, plen;
1970 	u_int8_t	 op_type, op_len;
1971 
1972 	p = peer->rbuf->rptr;
1973 	p += MSGSIZE_HEADER_MARKER;
1974 	memcpy(&msglen, p, sizeof(msglen));
1975 	msglen = ntohs(msglen);
1976 
1977 	p = peer->rbuf->rptr;
1978 	p += MSGSIZE_HEADER;	/* header is already checked */
1979 
1980 	memcpy(&version, p, sizeof(version));
1981 	p += sizeof(version);
1982 
1983 	if (version != BGP_VERSION) {
1984 		log_peer_warnx(&peer->conf,
1985 		    "peer wants unrecognized version %u", version);
1986 		if (version > BGP_VERSION)
1987 			rversion = version - BGP_VERSION;
1988 		else
1989 			rversion = BGP_VERSION;
1990 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
1991 		    &rversion, sizeof(rversion));
1992 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1993 		return (-1);
1994 	}
1995 
1996 	memcpy(&short_as, p, sizeof(short_as));
1997 	p += sizeof(short_as);
1998 	as = peer->short_as = ntohs(short_as);
1999 	if (as == 0) {
2000 		log_peer_warnx(&peer->conf,
2001 		    "peer requests unacceptable AS %u", as);
2002 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS,
2003 		    NULL, 0);
2004 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2005 		return (-1);
2006 	}
2007 
2008 	memcpy(&oholdtime, p, sizeof(oholdtime));
2009 	p += sizeof(oholdtime);
2010 
2011 	holdtime = ntohs(oholdtime);
2012 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2013 		log_peer_warnx(&peer->conf,
2014 		    "peer requests unacceptable holdtime %u", holdtime);
2015 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
2016 		    NULL, 0);
2017 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2018 		return (-1);
2019 	}
2020 
2021 	myholdtime = peer->conf.holdtime;
2022 	if (!myholdtime)
2023 		myholdtime = conf->holdtime;
2024 	if (holdtime < myholdtime)
2025 		peer->holdtime = holdtime;
2026 	else
2027 		peer->holdtime = myholdtime;
2028 
2029 	memcpy(&bgpid, p, sizeof(bgpid));
2030 	p += sizeof(bgpid);
2031 
2032 	/* check bgpid for validity - just disallow 0 */
2033 	if (ntohl(bgpid) == 0) {
2034 		log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable",
2035 		    ntohl(bgpid));
2036 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2037 		    NULL, 0);
2038 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2039 		return (-1);
2040 	}
2041 	peer->remote_bgpid = bgpid;
2042 
2043 	memcpy(&optparamlen, p, sizeof(optparamlen));
2044 	p += sizeof(optparamlen);
2045 
2046 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
2047 			log_peer_warnx(&peer->conf,
2048 			    "corrupt OPEN message received: length mismatch");
2049 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2050 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2051 			return (-1);
2052 	}
2053 
2054 	plen = optparamlen;
2055 	while (plen > 0) {
2056 		if (plen < 2) {
2057 			log_peer_warnx(&peer->conf,
2058 			    "corrupt OPEN message received, len wrong");
2059 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2060 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2061 			return (-1);
2062 		}
2063 		memcpy(&op_type, p, sizeof(op_type));
2064 		p += sizeof(op_type);
2065 		plen -= sizeof(op_type);
2066 		memcpy(&op_len, p, sizeof(op_len));
2067 		p += sizeof(op_len);
2068 		plen -= sizeof(op_len);
2069 		if (op_len > 0) {
2070 			if (plen < op_len) {
2071 				log_peer_warnx(&peer->conf,
2072 				    "corrupt OPEN message received, len wrong");
2073 				session_notification(peer, ERR_OPEN, 0,
2074 				    NULL, 0);
2075 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2076 				return (-1);
2077 			}
2078 			op_val = p;
2079 			p += op_len;
2080 			plen -= op_len;
2081 		} else
2082 			op_val = NULL;
2083 
2084 		switch (op_type) {
2085 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2086 			if (parse_capabilities(peer, op_val, op_len,
2087 			    &as) == -1) {
2088 				session_notification(peer, ERR_OPEN, 0,
2089 				    NULL, 0);
2090 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2091 				return (-1);
2092 			}
2093 			break;
2094 		case OPT_PARAM_AUTH:			/* deprecated */
2095 		default:
2096 			/*
2097 			 * unsupported type
2098 			 * the RFCs tell us to leave the data section empty
2099 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2100 			 * How the peer should know _which_ optional parameter
2101 			 * we don't support is beyond me.
2102 			 */
2103 			log_peer_warnx(&peer->conf,
2104 			    "received OPEN message with unsupported optional "
2105 			    "parameter: type %u", op_type);
2106 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2107 				NULL, 0);
2108 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2109 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
2110 			peer->IdleHoldTime /= 2;
2111 			return (-1);
2112 		}
2113 	}
2114 
2115 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2116 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2117 		peer->conf.remote_as = as;
2118 		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2119 		if (!peer->conf.ebgp)
2120 			/* force enforce_as off for iBGP sessions */
2121 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2122 	}
2123 
2124 	if (peer->conf.remote_as != as) {
2125 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2126 		    log_as(as));
2127 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2128 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2129 		return (-1);
2130 	}
2131 
2132 	if (capa_neg_calc(peer) == -1) {
2133 		log_peer_warnx(&peer->conf,
2134 		    "capability negotiation calculation failed");
2135 		session_notification(peer, ERR_OPEN, 0, NULL, 0);
2136 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2137 		return (-1);
2138 	}
2139 
2140 	return (0);
2141 }
2142 
2143 int
2144 parse_update(struct peer *peer)
2145 {
2146 	u_char		*p;
2147 	u_int16_t	 datalen;
2148 
2149 	/*
2150 	 * we pass the message verbatim to the rde.
2151 	 * in case of errors the whole session is reset with a
2152 	 * notification anyway, we only need to know the peer
2153 	 */
2154 	p = peer->rbuf->rptr;
2155 	p += MSGSIZE_HEADER_MARKER;
2156 	memcpy(&datalen, p, sizeof(datalen));
2157 	datalen = ntohs(datalen);
2158 
2159 	p = peer->rbuf->rptr;
2160 	p += MSGSIZE_HEADER;	/* header is already checked */
2161 	datalen -= MSGSIZE_HEADER;
2162 
2163 	if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1)
2164 		return (-1);
2165 
2166 	return (0);
2167 }
2168 
2169 int
2170 parse_refresh(struct peer *peer)
2171 {
2172 	u_char		*p;
2173 	u_int16_t	 afi;
2174 	u_int8_t	 aid, safi;
2175 
2176 	p = peer->rbuf->rptr;
2177 	p += MSGSIZE_HEADER;	/* header is already checked */
2178 
2179 	/*
2180 	 * We could check if we actually announced the capability but
2181 	 * as long as the message is correctly encoded we don't care.
2182 	 */
2183 
2184 	/* afi, 2 byte */
2185 	memcpy(&afi, p, sizeof(afi));
2186 	afi = ntohs(afi);
2187 	p += 2;
2188 	/* reserved, 1 byte */
2189 	p += 1;
2190 	/* safi, 1 byte */
2191 	memcpy(&safi, p, sizeof(safi));
2192 
2193 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2194 	if (afi2aid(afi, safi, &aid) == -1) {
2195 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2196 		    "invalid afi/safi pair");
2197 		return (0);
2198 	}
2199 
2200 	if (imsg_rde(IMSG_REFRESH, peer->conf.id, &aid, sizeof(aid)) == -1)
2201 		return (-1);
2202 
2203 	return (0);
2204 }
2205 
2206 int
2207 parse_notification(struct peer *peer)
2208 {
2209 	u_char		*p;
2210 	u_int16_t	 datalen;
2211 	u_int8_t	 errcode;
2212 	u_int8_t	 subcode;
2213 	u_int8_t	 capa_code;
2214 	u_int8_t	 capa_len;
2215 	size_t		 shutcomm_len;
2216 	u_int8_t	 i;
2217 
2218 	/* just log */
2219 	p = peer->rbuf->rptr;
2220 	p += MSGSIZE_HEADER_MARKER;
2221 	memcpy(&datalen, p, sizeof(datalen));
2222 	datalen = ntohs(datalen);
2223 
2224 	p = peer->rbuf->rptr;
2225 	p += MSGSIZE_HEADER;	/* header is already checked */
2226 	datalen -= MSGSIZE_HEADER;
2227 
2228 	memcpy(&errcode, p, sizeof(errcode));
2229 	p += sizeof(errcode);
2230 	datalen -= sizeof(errcode);
2231 
2232 	memcpy(&subcode, p, sizeof(subcode));
2233 	p += sizeof(subcode);
2234 	datalen -= sizeof(subcode);
2235 
2236 	log_notification(peer, errcode, subcode, p, datalen, "received");
2237 	peer->errcnt++;
2238 
2239 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2240 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2241 			log_peer_warnx(&peer->conf, "received \"unsupported "
2242 			    "capability\" notification without data part, "
2243 			    "disabling capability announcements altogether");
2244 			session_capa_ann_none(peer);
2245 		}
2246 
2247 		while (datalen > 0) {
2248 			if (datalen < 2) {
2249 				log_peer_warnx(&peer->conf,
2250 				    "parse_notification: "
2251 				    "expect len >= 2, len is %u", datalen);
2252 				return (-1);
2253 			}
2254 			memcpy(&capa_code, p, sizeof(capa_code));
2255 			p += sizeof(capa_code);
2256 			datalen -= sizeof(capa_code);
2257 			memcpy(&capa_len, p, sizeof(capa_len));
2258 			p += sizeof(capa_len);
2259 			datalen -= sizeof(capa_len);
2260 			if (datalen < capa_len) {
2261 				log_peer_warnx(&peer->conf,
2262 				    "parse_notification: capa_len %u exceeds "
2263 				    "remaining msg length %u", capa_len,
2264 				    datalen);
2265 				return (-1);
2266 			}
2267 			p += capa_len;
2268 			datalen -= capa_len;
2269 			switch (capa_code) {
2270 			case CAPA_MP:
2271 				for (i = 0; i < AID_MAX; i++)
2272 					peer->capa.ann.mp[i] = 0;
2273 				log_peer_warnx(&peer->conf,
2274 				    "disabling multiprotocol capability");
2275 				break;
2276 			case CAPA_REFRESH:
2277 				peer->capa.ann.refresh = 0;
2278 				log_peer_warnx(&peer->conf,
2279 				    "disabling route refresh capability");
2280 				break;
2281 			case CAPA_RESTART:
2282 				peer->capa.ann.grestart.restart = 0;
2283 				log_peer_warnx(&peer->conf,
2284 				    "disabling restart capability");
2285 				break;
2286 			case CAPA_AS4BYTE:
2287 				peer->capa.ann.as4byte = 0;
2288 				log_peer_warnx(&peer->conf,
2289 				    "disabling 4-byte AS num capability");
2290 				break;
2291 			default:	/* should not happen... */
2292 				log_peer_warnx(&peer->conf, "received "
2293 				    "\"unsupported capability\" notification "
2294 				    "for unknown capability %u, disabling "
2295 				    "capability announcements altogether",
2296 				    capa_code);
2297 				session_capa_ann_none(peer);
2298 				break;
2299 			}
2300 		}
2301 
2302 		return (1);
2303 	}
2304 
2305 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2306 		session_capa_ann_none(peer);
2307 		return (1);
2308 	}
2309 
2310 	if (errcode == ERR_CEASE &&
2311 	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2312 	     subcode == ERR_CEASE_ADMIN_RESET)) {
2313 		if (datalen > 1) {
2314 			shutcomm_len = *p++;
2315 			datalen--;
2316 			if(datalen < shutcomm_len) {
2317 			    log_peer_warnx(&peer->conf,
2318 				"received truncated shutdown reason");
2319 			    return (0);
2320 			}
2321 			if (shutcomm_len > SHUT_COMM_LEN - 1) {
2322 			    log_peer_warnx(&peer->conf,
2323 				"received overly long shutdown reason");
2324 			    return (0);
2325 			}
2326 			memcpy(peer->stats.last_shutcomm, p, shutcomm_len);
2327 			peer->stats.last_shutcomm[shutcomm_len] = '\0';
2328 			log_peer_warnx(&peer->conf,
2329 			    "received shutdown reason: \"%s\"",
2330 			    log_shutcomm(peer->stats.last_shutcomm));
2331 			p += shutcomm_len;
2332 			datalen -= shutcomm_len;
2333 		}
2334 	}
2335 
2336 	return (0);
2337 }
2338 
2339 int
2340 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2341 {
2342 	u_char		*capa_val;
2343 	u_int32_t	 remote_as;
2344 	u_int16_t	 len;
2345 	u_int16_t	 afi;
2346 	u_int16_t	 gr_header;
2347 	u_int8_t	 safi;
2348 	u_int8_t	 aid;
2349 	u_int8_t	 gr_flags;
2350 	u_int8_t	 capa_code;
2351 	u_int8_t	 capa_len;
2352 	u_int8_t	 i;
2353 
2354 	len = dlen;
2355 	while (len > 0) {
2356 		if (len < 2) {
2357 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2358 			    "length: %u, too short", len);
2359 			return (-1);
2360 		}
2361 		memcpy(&capa_code, d, sizeof(capa_code));
2362 		d += sizeof(capa_code);
2363 		len -= sizeof(capa_code);
2364 		memcpy(&capa_len, d, sizeof(capa_len));
2365 		d += sizeof(capa_len);
2366 		len -= sizeof(capa_len);
2367 		if (capa_len > 0) {
2368 			if (len < capa_len) {
2369 				log_peer_warnx(&peer->conf,
2370 				    "Bad capabilities attr length: "
2371 				    "len %u smaller than capa_len %u",
2372 				    len, capa_len);
2373 				return (-1);
2374 			}
2375 			capa_val = d;
2376 			d += capa_len;
2377 			len -= capa_len;
2378 		} else
2379 			capa_val = NULL;
2380 
2381 		switch (capa_code) {
2382 		case CAPA_MP:			/* RFC 4760 */
2383 			if (capa_len != 4) {
2384 				log_peer_warnx(&peer->conf,
2385 				    "Bad multi protocol capability length: "
2386 				    "%u", capa_len);
2387 				break;
2388 			}
2389 			memcpy(&afi, capa_val, sizeof(afi));
2390 			afi = ntohs(afi);
2391 			memcpy(&safi, capa_val + 3, sizeof(safi));
2392 			if (afi2aid(afi, safi, &aid) == -1) {
2393 				log_peer_warnx(&peer->conf,
2394 				    "Received multi protocol capability: "
2395 				    " unknown AFI %u, safi %u pair",
2396 				    afi, safi);
2397 				break;
2398 			}
2399 			peer->capa.peer.mp[aid] = 1;
2400 			break;
2401 		case CAPA_REFRESH:
2402 			peer->capa.peer.refresh = 1;
2403 			break;
2404 		case CAPA_RESTART:
2405 			if (capa_len == 2) {
2406 				/* peer only supports EoR marker */
2407 				peer->capa.peer.grestart.restart = 1;
2408 				peer->capa.peer.grestart.timeout = 0;
2409 				break;
2410 			} else if (capa_len % 4 != 2) {
2411 				log_peer_warnx(&peer->conf,
2412 				    "Bad graceful restart capability length: "
2413 				    "%u", capa_len);
2414 				peer->capa.peer.grestart.restart = 0;
2415 				peer->capa.peer.grestart.timeout = 0;
2416 				break;
2417 			}
2418 
2419 			memcpy(&gr_header, capa_val, sizeof(gr_header));
2420 			gr_header = ntohs(gr_header);
2421 			peer->capa.peer.grestart.timeout =
2422 			    gr_header & CAPA_GR_TIMEMASK;
2423 			if (peer->capa.peer.grestart.timeout == 0) {
2424 				log_peer_warnx(&peer->conf, "Received "
2425 				    "graceful restart timeout is zero");
2426 				peer->capa.peer.grestart.restart = 0;
2427 				break;
2428 			}
2429 
2430 			for (i = 2; i <= capa_len - 4; i += 4) {
2431 				memcpy(&afi, capa_val + i, sizeof(afi));
2432 				afi = ntohs(afi);
2433 				memcpy(&safi, capa_val + i + 2, sizeof(safi));
2434 				if (afi2aid(afi, safi, &aid) == -1) {
2435 					log_peer_warnx(&peer->conf,
2436 					    "Received graceful restart capa: "
2437 					    " unknown AFI %u, safi %u pair",
2438 					    afi, safi);
2439 					continue;
2440 				}
2441 				memcpy(&gr_flags, capa_val + i + 3,
2442 				    sizeof(gr_flags));
2443 				peer->capa.peer.grestart.flags[aid] |=
2444 				    CAPA_GR_PRESENT;
2445 				if (gr_flags & CAPA_GR_F_FLAG)
2446 					peer->capa.peer.grestart.flags[aid] |=
2447 					    CAPA_GR_FORWARD;
2448 				if (gr_header & CAPA_GR_R_FLAG)
2449 					peer->capa.peer.grestart.flags[aid] |=
2450 					    CAPA_GR_RESTART;
2451 				peer->capa.peer.grestart.restart = 2;
2452 			}
2453 			break;
2454 		case CAPA_AS4BYTE:
2455 			if (capa_len != 4) {
2456 				log_peer_warnx(&peer->conf,
2457 				    "Bad AS4BYTE capability length: "
2458 				    "%u", capa_len);
2459 				peer->capa.peer.as4byte = 0;
2460 				break;
2461 			}
2462 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2463 			*as = ntohl(remote_as);
2464 			if (*as == 0) {
2465 				log_peer_warnx(&peer->conf,
2466 				    "peer requests unacceptable AS %u", *as);
2467 				session_notification(peer, ERR_OPEN,
2468 				    ERR_OPEN_AS, NULL, 0);
2469 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2470 				return (-1);
2471 			}
2472 			peer->capa.peer.as4byte = 1;
2473 			break;
2474 		default:
2475 			break;
2476 		}
2477 	}
2478 
2479 	return (0);
2480 }
2481 
2482 int
2483 capa_neg_calc(struct peer *p)
2484 {
2485 	u_int8_t	i, hasmp = 0;
2486 
2487 	/* refresh: does not realy matter here, use peer setting */
2488 	p->capa.neg.refresh = p->capa.peer.refresh;
2489 
2490 	/* as4byte: both side must announce capability */
2491 	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2492 		p->capa.neg.as4byte = 1;
2493 	else
2494 		p->capa.neg.as4byte = 0;
2495 
2496 	/* MP: both side must announce capability */
2497 	for (i = 0; i < AID_MAX; i++) {
2498 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2499 			p->capa.neg.mp[i] = 1;
2500 			hasmp = 1;
2501 		} else
2502 			p->capa.neg.mp[i] = 0;
2503 	}
2504 	/* if no MP capability present default to IPv4 unicast mode */
2505 	if (!hasmp)
2506 		p->capa.neg.mp[AID_INET] = 1;
2507 
2508 	/*
2509 	 * graceful restart: only the peer capabilities are of interest here.
2510 	 * It is necessary to compare the new values with the previous ones
2511 	 * and act acordingly. AFI/SAFI that are not part in the MP capability
2512 	 * are treated as not being present.
2513 	 */
2514 
2515 	for (i = 0; i < AID_MAX; i++) {
2516 		int8_t	negflags;
2517 
2518 		/* disable GR if the AFI/SAFI is not present */
2519 		if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2520 		    p->capa.neg.mp[i] == 0)
2521 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2522 		/* look at current GR state and decide what to do */
2523 		negflags = p->capa.neg.grestart.flags[i];
2524 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2525 		if (negflags & CAPA_GR_RESTARTING) {
2526 			if (!(p->capa.peer.grestart.flags[i] &
2527 			    CAPA_GR_FORWARD)) {
2528 				if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2529 				    &i, sizeof(i)) == -1)
2530 					return (-1);
2531 				log_peer_warnx(&p->conf, "graceful restart of "
2532 				    "%s, not restarted, flushing", aid2str(i));
2533 			} else
2534 				p->capa.neg.grestart.flags[i] |=
2535 				    CAPA_GR_RESTARTING;
2536 		}
2537 	}
2538 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2539 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2540 
2541 	return (0);
2542 }
2543 
2544 void
2545 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2546 {
2547 	struct imsg		 imsg;
2548 	struct mrt		 xmrt;
2549 	struct mrt		*mrt;
2550 	struct imsgbuf		*i;
2551 	struct peer		*p;
2552 	struct listen_addr	*la, *nla;
2553 	struct kif		*kif;
2554 	u_char			*data;
2555 	int			 n, fd, depend_ok, restricted;
2556 	u_int8_t		 aid, errcode, subcode;
2557 
2558 	while (ibuf) {
2559 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2560 			fatal("session_dispatch_imsg: imsg_get error");
2561 
2562 		if (n == 0)
2563 			break;
2564 
2565 		switch (imsg.hdr.type) {
2566 		case IMSG_SOCKET_CONN:
2567 		case IMSG_SOCKET_CONN_CTL:
2568 			if (idx != PFD_PIPE_MAIN)
2569 				fatalx("reconf request not from parent");
2570 			if ((fd = imsg.fd) == -1) {
2571 				log_warnx("expected to receive imsg fd to "
2572 				    "RDE but didn't receive any");
2573 				break;
2574 			}
2575 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2576 				fatal(NULL);
2577 			imsg_init(i, fd);
2578 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
2579 				if (ibuf_rde) {
2580 					log_warnx("Unexpected imsg connection "
2581 					    "to RDE received");
2582 					msgbuf_clear(&ibuf_rde->w);
2583 					free(ibuf_rde);
2584 				}
2585 				ibuf_rde = i;
2586 			} else {
2587 				if (ibuf_rde_ctl) {
2588 					log_warnx("Unexpected imsg ctl "
2589 					    "connection to RDE received");
2590 					msgbuf_clear(&ibuf_rde_ctl->w);
2591 					free(ibuf_rde_ctl);
2592 				}
2593 				ibuf_rde_ctl = i;
2594 			}
2595 			break;
2596 		case IMSG_RECONF_CONF:
2597 			if (idx != PFD_PIPE_MAIN)
2598 				fatalx("reconf request not from parent");
2599 			nconf = new_config();
2600 
2601 			copy_config(nconf, imsg.data);
2602 			pending_reconf = 1;
2603 			break;
2604 		case IMSG_RECONF_PEER:
2605 			if (idx != PFD_PIPE_MAIN)
2606 				fatalx("reconf request not from parent");
2607 			if ((p = calloc(1, sizeof(struct peer))) == NULL)
2608 				fatal("new_peer");
2609 			memcpy(&p->conf, imsg.data, sizeof(struct peer_config));
2610 			p->state = p->prev_state = STATE_NONE;
2611 			p->reconf_action = RECONF_REINIT;
2612 			TAILQ_INSERT_TAIL(&nconf->peers, p, entry);
2613 			break;
2614 		case IMSG_RECONF_LISTENER:
2615 			if (idx != PFD_PIPE_MAIN)
2616 				fatalx("reconf request not from parent");
2617 			if (nconf == NULL)
2618 				fatalx("IMSG_RECONF_LISTENER but no config");
2619 			nla = imsg.data;
2620 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2621 				if (!la_cmp(la, nla))
2622 					break;
2623 
2624 			if (la == NULL) {
2625 				if (nla->reconf != RECONF_REINIT)
2626 					fatalx("king bula sez: "
2627 					    "expected REINIT");
2628 
2629 				if ((nla->fd = imsg.fd) == -1)
2630 					log_warnx("expected to receive fd for "
2631 					    "%s but didn't receive any",
2632 					    log_sockaddr((struct sockaddr *)
2633 					    &nla->sa, nla->sa_len));
2634 
2635 				la = calloc(1, sizeof(struct listen_addr));
2636 				if (la == NULL)
2637 					fatal(NULL);
2638 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2639 				la->flags = nla->flags;
2640 				la->fd = nla->fd;
2641 				la->reconf = RECONF_REINIT;
2642 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2643 				    entry);
2644 			} else {
2645 				if (nla->reconf != RECONF_KEEP)
2646 					fatalx("king bula sez: expected KEEP");
2647 				la->reconf = RECONF_KEEP;
2648 			}
2649 
2650 			break;
2651 		case IMSG_RECONF_CTRL:
2652 			if (idx != PFD_PIPE_MAIN)
2653 				fatalx("reconf request not from parent");
2654 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2655 			    sizeof(restricted))
2656 				fatalx("IFINFO imsg with wrong len");
2657 			memcpy(&restricted, imsg.data, sizeof(restricted));
2658 			if (imsg.fd == -1) {
2659 				log_warnx("expected to receive fd for control "
2660 				    "socket but didn't receive any");
2661 				break;
2662 			}
2663 			if (restricted) {
2664 				control_shutdown(rcsock);
2665 				rcsock = imsg.fd;
2666 			} else {
2667 				control_shutdown(csock);
2668 				csock = imsg.fd;
2669 			}
2670 			break;
2671 		case IMSG_RECONF_DRAIN:
2672 			if (idx != PFD_PIPE_MAIN)
2673 				fatalx("reconf request not from parent");
2674 			imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
2675 			    -1, NULL, 0);
2676 			break;
2677 		case IMSG_RECONF_DONE:
2678 			if (idx != PFD_PIPE_MAIN)
2679 				fatalx("reconf request not from parent");
2680 			if (nconf == NULL)
2681 				fatalx("got IMSG_RECONF_DONE but no config");
2682 			copy_config(conf, nconf);
2683 			merge_peers(conf, nconf);
2684 
2685 			/* delete old listeners */
2686 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2687 			    la = nla) {
2688 				nla = TAILQ_NEXT(la, entry);
2689 				if (la->reconf == RECONF_NONE) {
2690 					log_info("not listening on %s any more",
2691 					    log_sockaddr((struct sockaddr *)
2692 					    &la->sa, la->sa_len));
2693 					TAILQ_REMOVE(conf->listen_addrs, la,
2694 					    entry);
2695 					close(la->fd);
2696 					free(la);
2697 				}
2698 			}
2699 
2700 			/* add new listeners */
2701 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2702 			    NULL) {
2703 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2704 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2705 				    entry);
2706 			}
2707 
2708 			setup_listeners(listener_cnt);
2709 			free_config(nconf);
2710 			nconf = NULL;
2711 			pending_reconf = 0;
2712 			log_info("SE reconfigured");
2713 			imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
2714 			    -1, NULL, 0);
2715 			break;
2716 		case IMSG_IFINFO:
2717 			if (idx != PFD_PIPE_MAIN)
2718 				fatalx("IFINFO message not from parent");
2719 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2720 			    sizeof(struct kif))
2721 				fatalx("IFINFO imsg with wrong len");
2722 			kif = imsg.data;
2723 			depend_ok = kif->depend_state;
2724 
2725 			TAILQ_FOREACH(p, &conf->peers, entry)
2726 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2727 					if (depend_ok && !p->depend_ok) {
2728 						p->depend_ok = depend_ok;
2729 						bgp_fsm(p, EVNT_START);
2730 					} else if (!depend_ok && p->depend_ok) {
2731 						p->depend_ok = depend_ok;
2732 						session_stop(p,
2733 						    ERR_CEASE_OTHER_CHANGE);
2734 					}
2735 				}
2736 			break;
2737 		case IMSG_MRT_OPEN:
2738 		case IMSG_MRT_REOPEN:
2739 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2740 			    sizeof(struct mrt)) {
2741 				log_warnx("wrong imsg len");
2742 				break;
2743 			}
2744 
2745 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2746 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2747 				log_warnx("expected to receive fd for mrt dump "
2748 				    "but didn't receive any");
2749 
2750 			mrt = mrt_get(&mrthead, &xmrt);
2751 			if (mrt == NULL) {
2752 				/* new dump */
2753 				mrt = calloc(1, sizeof(struct mrt));
2754 				if (mrt == NULL)
2755 					fatal("session_dispatch_imsg");
2756 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2757 				TAILQ_INIT(&mrt->wbuf.bufs);
2758 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2759 			} else {
2760 				/* old dump reopened */
2761 				close(mrt->wbuf.fd);
2762 				mrt->wbuf.fd = xmrt.wbuf.fd;
2763 			}
2764 			break;
2765 		case IMSG_MRT_CLOSE:
2766 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2767 			    sizeof(struct mrt)) {
2768 				log_warnx("wrong imsg len");
2769 				break;
2770 			}
2771 
2772 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2773 			mrt = mrt_get(&mrthead, &xmrt);
2774 			if (mrt != NULL)
2775 				mrt_done(mrt);
2776 			break;
2777 		case IMSG_CTL_KROUTE:
2778 		case IMSG_CTL_KROUTE_ADDR:
2779 		case IMSG_CTL_SHOW_NEXTHOP:
2780 		case IMSG_CTL_SHOW_INTERFACE:
2781 		case IMSG_CTL_SHOW_FIB_TABLES:
2782 			if (idx != PFD_PIPE_MAIN)
2783 				fatalx("ctl kroute request not from parent");
2784 			control_imsg_relay(&imsg);
2785 			break;
2786 		case IMSG_CTL_SHOW_RIB:
2787 		case IMSG_CTL_SHOW_RIB_PREFIX:
2788 		case IMSG_CTL_SHOW_RIB_ATTR:
2789 		case IMSG_CTL_SHOW_RIB_MEM:
2790 		case IMSG_CTL_SHOW_RIB_HASH:
2791 		case IMSG_CTL_SHOW_NETWORK:
2792 		case IMSG_CTL_SHOW_NEIGHBOR:
2793 			if (idx != PFD_PIPE_ROUTE_CTL)
2794 				fatalx("ctl rib request not from RDE");
2795 			control_imsg_relay(&imsg);
2796 			break;
2797 		case IMSG_CTL_END:
2798 		case IMSG_CTL_RESULT:
2799 			control_imsg_relay(&imsg);
2800 			break;
2801 		case IMSG_UPDATE:
2802 			if (idx != PFD_PIPE_ROUTE)
2803 				fatalx("update request not from RDE");
2804 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2805 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2806 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2807 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2808 				log_warnx("RDE sent invalid update");
2809 			else
2810 				session_update(imsg.hdr.peerid, imsg.data,
2811 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2812 			break;
2813 		case IMSG_UPDATE_ERR:
2814 			if (idx != PFD_PIPE_ROUTE)
2815 				fatalx("update request not from RDE");
2816 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2817 				log_warnx("RDE sent invalid notification");
2818 				break;
2819 			}
2820 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
2821 				log_warnx("no such peer: id=%u",
2822 				    imsg.hdr.peerid);
2823 				break;
2824 			}
2825 			data = imsg.data;
2826 			errcode = *data++;
2827 			subcode = *data++;
2828 
2829 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2830 				data = NULL;
2831 
2832 			session_notification(p, errcode, subcode,
2833 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2834 			switch (errcode) {
2835 			case ERR_CEASE:
2836 				switch (subcode) {
2837 				case ERR_CEASE_MAX_PREFIX:
2838 					bgp_fsm(p, EVNT_STOP);
2839 					if (p->conf.max_prefix_restart)
2840 						timer_set(p, Timer_IdleHold, 60 *
2841 						    p->conf.max_prefix_restart);
2842 					break;
2843 				default:
2844 					bgp_fsm(p, EVNT_CON_FATAL);
2845 					break;
2846 				}
2847 				break;
2848 			default:
2849 				bgp_fsm(p, EVNT_CON_FATAL);
2850 				break;
2851 			}
2852 			break;
2853 		case IMSG_SESSION_RESTARTED:
2854 			if (idx != PFD_PIPE_ROUTE)
2855 				fatalx("update request not from RDE");
2856 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
2857 				log_warnx("RDE sent invalid restart msg");
2858 				break;
2859 			}
2860 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
2861 				log_warnx("no such peer: id=%u",
2862 				    imsg.hdr.peerid);
2863 				break;
2864 			}
2865 			memcpy(&aid, imsg.data, sizeof(aid));
2866 			if (aid >= AID_MAX)
2867 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
2868 			if (p->capa.neg.grestart.flags[aid] &
2869 			    CAPA_GR_RESTARTING) {
2870 				log_peer_warnx(&p->conf,
2871 				    "graceful restart of %s finished",
2872 				    aid2str(aid));
2873 				p->capa.neg.grestart.flags[aid] &=
2874 				    ~CAPA_GR_RESTARTING;
2875 				timer_stop(p, Timer_RestartTimeout);
2876 
2877 				/* signal back to RDE to cleanup stale routes */
2878 				if (imsg_rde(IMSG_SESSION_RESTARTED,
2879 				    imsg.hdr.peerid, &aid, sizeof(aid)) == -1)
2880 					fatal("imsg_compose: "
2881 					    "IMSG_SESSION_RESTARTED");
2882 			}
2883 			break;
2884 		case IMSG_SESSION_DOWN:
2885 			if (idx != PFD_PIPE_ROUTE)
2886 				fatalx("update request not from RDE");
2887 			if ((p = getpeerbyid(conf, imsg.hdr.peerid)) == NULL) {
2888 				log_warnx("no such peer: id=%u",
2889 				    imsg.hdr.peerid);
2890 				break;
2891 			}
2892 			session_stop(p, ERR_CEASE_ADMIN_DOWN);
2893 			break;
2894 		default:
2895 			break;
2896 		}
2897 		imsg_free(&imsg);
2898 	}
2899 }
2900 
2901 int
2902 la_cmp(struct listen_addr *a, struct listen_addr *b)
2903 {
2904 	struct sockaddr_in	*in_a, *in_b;
2905 	struct sockaddr_in6	*in6_a, *in6_b;
2906 
2907 	if (a->sa.ss_family != b->sa.ss_family)
2908 		return (1);
2909 
2910 	switch (a->sa.ss_family) {
2911 	case AF_INET:
2912 		in_a = (struct sockaddr_in *)&a->sa;
2913 		in_b = (struct sockaddr_in *)&b->sa;
2914 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2915 			return (1);
2916 		if (in_a->sin_port != in_b->sin_port)
2917 			return (1);
2918 		break;
2919 	case AF_INET6:
2920 		in6_a = (struct sockaddr_in6 *)&a->sa;
2921 		in6_b = (struct sockaddr_in6 *)&b->sa;
2922 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2923 		    sizeof(struct in6_addr)))
2924 			return (1);
2925 		if (in6_a->sin6_port != in6_b->sin6_port)
2926 			return (1);
2927 		break;
2928 	default:
2929 		fatal("king bula sez: unknown address family");
2930 		/* NOTREACHED */
2931 	}
2932 
2933 	return (0);
2934 }
2935 
2936 struct peer *
2937 getpeerbydesc(struct bgpd_config *c, const char *descr)
2938 {
2939 	struct peer	*p, *res = NULL;
2940 	int		 match = 0;
2941 
2942 	TAILQ_FOREACH(p, &c->peers, entry)
2943 		if (!strcmp(p->conf.descr, descr)) {
2944 			res = p;
2945 			match++;
2946 		}
2947 
2948 	if (match > 1)
2949 		log_info("neighbor description \"%s\" not unique, request "
2950 		    "aborted", descr);
2951 
2952 	if (match == 1)
2953 		return (res);
2954 	else
2955 		return (NULL);
2956 }
2957 
2958 struct peer *
2959 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
2960 {
2961 	struct bgpd_addr addr;
2962 	struct peer	*p, *newpeer, *loose = NULL;
2963 	u_int32_t	 id;
2964 
2965 	sa2addr(ip, &addr, NULL);
2966 
2967 	/* we might want a more effective way to find peers by IP */
2968 	TAILQ_FOREACH(p, &c->peers, entry)
2969 		if (!p->conf.template &&
2970 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
2971 			return (p);
2972 
2973 	/* try template matching */
2974 	TAILQ_FOREACH(p, &c->peers, entry)
2975 		if (p->conf.template &&
2976 		    p->conf.remote_addr.aid == addr.aid &&
2977 		    session_match_mask(p, &addr))
2978 			if (loose == NULL || loose->conf.remote_masklen <
2979 			    p->conf.remote_masklen)
2980 				loose = p;
2981 
2982 	if (loose != NULL) {
2983 		/* clone */
2984 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
2985 			fatal(NULL);
2986 		memcpy(newpeer, loose, sizeof(struct peer));
2987 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
2988 			TAILQ_FOREACH(p, &c->peers, entry)
2989 				if (p->conf.id == id)
2990 					break;
2991 			if (p == NULL)		/* we found a free id */
2992 				break;
2993 		}
2994 		newpeer->template = loose;
2995 		session_template_clone(newpeer, ip, id, 0);
2996 		newpeer->state = newpeer->prev_state = STATE_NONE;
2997 		newpeer->reconf_action = RECONF_KEEP;
2998 		newpeer->rbuf = NULL;
2999 		init_peer(newpeer);
3000 		bgp_fsm(newpeer, EVNT_START);
3001 		TAILQ_INSERT_TAIL(&c->peers, newpeer, entry);
3002 		return (newpeer);
3003 	}
3004 
3005 	return (NULL);
3006 }
3007 
3008 struct peer *
3009 getpeerbyid(struct bgpd_config *c, u_int32_t peerid)
3010 {
3011 	struct peer *p;
3012 
3013 	/* we might want a more effective way to find peers by id */
3014 	TAILQ_FOREACH(p, &c->peers, entry)
3015 		if (p->conf.id == peerid)
3016 			return (p);
3017 	return (NULL);
3018 }
3019 
3020 int
3021 peer_matched(struct peer *p, struct ctl_neighbor *n)
3022 {
3023 	char *s;
3024 
3025 	if (n && n->addr.aid) {
3026 		if (memcmp(&p->conf.remote_addr, &n->addr,
3027 		    sizeof(p->conf.remote_addr)))
3028 			return 0;
3029 	} else if (n && n->descr[0]) {
3030 		s = n->is_group ? p->conf.group : p->conf.descr;
3031 		if (strcmp(s, n->descr))
3032 			return 0;
3033 	}
3034 	return 1;
3035 }
3036 
3037 void
3038 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id,
3039     u_int32_t as)
3040 {
3041 	struct bgpd_addr	remote_addr;
3042 
3043 	if (ip)
3044 		sa2addr(ip, &remote_addr, NULL);
3045 	else
3046 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3047 
3048 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3049 
3050 	p->conf.id = id;
3051 
3052 	if (as) {
3053 		p->conf.remote_as = as;
3054 		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3055 		if (!p->conf.ebgp)
3056 			/* force enforce_as off for iBGP sessions */
3057 			p->conf.enforce_as = ENFORCE_AS_OFF;
3058 	}
3059 
3060 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3061 	switch (p->conf.remote_addr.aid) {
3062 	case AID_INET:
3063 		p->conf.remote_masklen = 32;
3064 		break;
3065 	case AID_INET6:
3066 		p->conf.remote_masklen = 128;
3067 		break;
3068 	}
3069 	p->conf.template = 0;
3070 }
3071 
3072 int
3073 session_match_mask(struct peer *p, struct bgpd_addr *a)
3074 {
3075 	struct in_addr	 v4masked;
3076 	struct in6_addr	 v6masked;
3077 
3078 	switch (p->conf.remote_addr.aid) {
3079 	case AID_INET:
3080 		inet4applymask(&v4masked, &a->v4, p->conf.remote_masklen);
3081 		if (p->conf.remote_addr.v4.s_addr == v4masked.s_addr)
3082 			return (1);
3083 		return (0);
3084 	case AID_INET6:
3085 		inet6applymask(&v6masked, &a->v6, p->conf.remote_masklen);
3086 
3087 		if (memcmp(&v6masked, &p->conf.remote_addr.v6,
3088 		    sizeof(v6masked)) == 0)
3089 			return (1);
3090 		return (0);
3091 	}
3092 	return (0);
3093 }
3094 
3095 void
3096 session_down(struct peer *peer)
3097 {
3098 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
3099 	peer->stats.last_updown = time(NULL);
3100 	/*
3101 	 * session_down is called in the exit code path so check
3102 	 * if the RDE is still around, if not there is no need to
3103 	 * send the message.
3104 	 */
3105 	if (ibuf_rde == NULL)
3106 		return;
3107 	if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3108 		fatalx("imsg_compose error");
3109 }
3110 
3111 void
3112 session_up(struct peer *p)
3113 {
3114 	struct session_up	 sup;
3115 
3116 	if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3117 	    &p->conf, sizeof(p->conf)) == -1)
3118 		fatalx("imsg_compose error");
3119 
3120 	sup.local_addr = p->local;
3121 	sup.remote_addr = p->remote;
3122 
3123 	sup.remote_bgpid = p->remote_bgpid;
3124 	sup.short_as = p->short_as;
3125 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3126 	p->stats.last_updown = time(NULL);
3127 	if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3128 		fatalx("imsg_compose error");
3129 }
3130 
3131 int
3132 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data,
3133     u_int16_t datalen)
3134 {
3135 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
3136 }
3137 
3138 int
3139 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen)
3140 {
3141 	if (ibuf_rde_ctl == NULL) {
3142 		log_warnx("Can't send message %u to RDE, ctl pipe closed",
3143 		    type);
3144 		return (0);
3145 	}
3146 	/*
3147 	 * Use control socket to talk to RDE to bypass the queue of the
3148 	 * regular imsg socket.
3149 	 */
3150 	return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen));
3151 }
3152 
3153 int
3154 imsg_rde(int type, uint32_t peerid, void *data, u_int16_t datalen)
3155 {
3156 	if (ibuf_rde == NULL) {
3157 		log_warnx("Can't send message %u to RDE, pipe closed", type);
3158 		return (0);
3159 	}
3160 
3161 	return (imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen));
3162 }
3163 
3164 void
3165 session_demote(struct peer *p, int level)
3166 {
3167 	struct demote_msg	msg;
3168 
3169 	strlcpy(msg.demote_group, p->conf.demote_group,
3170 	    sizeof(msg.demote_group));
3171 	msg.level = level;
3172 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3173 	    &msg, sizeof(msg)) == -1)
3174 		fatalx("imsg_compose error");
3175 
3176 	p->demoted += level;
3177 }
3178 
3179 void
3180 session_stop(struct peer *peer, u_int8_t subcode)
3181 {
3182 	char data[SHUT_COMM_LEN];
3183 	size_t datalen;
3184 	size_t shutcomm_len;
3185 	char *communication;
3186 
3187 	datalen = 0;
3188 	communication = peer->conf.shutcomm;
3189 
3190 	if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3191 	    subcode == ERR_CEASE_ADMIN_RESET)
3192 	    && communication && *communication) {
3193 		shutcomm_len = strlen(communication);
3194 		if (shutcomm_len > SHUT_COMM_LEN - 1) {
3195 		    log_peer_warnx(&peer->conf,
3196 			"trying to send overly long shutdown reason");
3197 		} else {
3198 			data[0] = shutcomm_len;
3199 			datalen = shutcomm_len + sizeof(data[0]);
3200 			memcpy(data + 1, communication, shutcomm_len);
3201 		}
3202 	}
3203 	switch (peer->state) {
3204 	case STATE_OPENSENT:
3205 	case STATE_OPENCONFIRM:
3206 	case STATE_ESTABLISHED:
3207 		session_notification(peer, ERR_CEASE, subcode, data, datalen);
3208 		break;
3209 	default:
3210 		/* session not open, no need to send notification */
3211 		break;
3212 	}
3213 	bgp_fsm(peer, EVNT_STOP);
3214 }
3215 
3216 void
3217 merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3218 {
3219 	struct peer *p, *np;
3220 
3221 	TAILQ_FOREACH(p, &c->peers, entry) {
3222 		/* templates are handled specially */
3223 		if (p->template != NULL)
3224 			continue;
3225 		np = getpeerbyid(nc, p->conf.id);
3226 		if (np == NULL) {
3227 			p->reconf_action = RECONF_DELETE;
3228 			continue;
3229 		}
3230 
3231 		memcpy(&p->conf, &np->conf, sizeof(p->conf));
3232 		TAILQ_REMOVE(&nc->peers, np, entry);
3233 		free(np);
3234 
3235 		p->reconf_action = RECONF_KEEP;
3236 
3237 		/* had demotion, is demoted, demote removed? */
3238 		if (p->demoted && !p->conf.demote_group[0])
3239 			session_demote(p, -1);
3240 
3241 		/* sync the RDE in case we keep the peer */
3242 		if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3243 		    &p->conf, sizeof(struct peer_config)) == -1)
3244 			fatalx("imsg_compose error");
3245 
3246 		/* apply the config to all clones of a template */
3247 		if (p->conf.template) {
3248 			struct peer *xp;
3249 			TAILQ_FOREACH(xp, &conf->peers, entry) {
3250 				if (xp->template != p)
3251 					continue;
3252 				session_template_clone(xp, NULL, xp->conf.id,
3253 				    xp->conf.remote_as);
3254 				if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
3255 				    &xp->conf, sizeof(xp->conf)) == -1)
3256 					fatalx("imsg_compose error");
3257 			}
3258 		}
3259 	}
3260 
3261 	TAILQ_CONCAT(&c->peers, &nc->peers, entry);
3262 }
3263