xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: session.c,v 1.289 2009/03/19 07:00:07 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/un.h>
25 #include <net/if_types.h>
26 #include <netinet/in.h>
27 #include <netinet/in_systm.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 
32 #include <err.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <limits.h>
36 #include <poll.h>
37 #include <pwd.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 
44 #include "bgpd.h"
45 #include "mrt.h"
46 #include "session.h"
47 
48 #define PFD_PIPE_MAIN		0
49 #define PFD_PIPE_ROUTE		1
50 #define PFD_PIPE_ROUTE_CTL	2
51 #define PFD_SOCK_CTL		3
52 #define PFD_SOCK_RCTL		4
53 #define PFD_LISTENERS_START	5
54 
55 void	session_sighdlr(int);
56 int	setup_listeners(u_int *);
57 void	init_conf(struct bgpd_config *);
58 void	init_peer(struct peer *);
59 void	start_timer_holdtime(struct peer *);
60 void	start_timer_keepalive(struct peer *);
61 void	session_close_connection(struct peer *);
62 void	change_state(struct peer *, enum session_state, enum session_events);
63 int	session_setup_socket(struct peer *);
64 void	session_accept(int);
65 int	session_connect(struct peer *);
66 void	session_tcp_established(struct peer *);
67 void	session_capa_ann_none(struct peer *);
68 int	session_capa_add(struct peer *, struct buf *, u_int8_t, u_int8_t,
69 	    u_int8_t *);
70 int	session_capa_add_mp(struct buf *, u_int16_t, u_int8_t);
71 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
72 int	session_sendmsg(struct bgp_msg *, struct peer *);
73 void	session_open(struct peer *);
74 void	session_keepalive(struct peer *);
75 void	session_update(u_int32_t, void *, size_t);
76 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
77 	    ssize_t);
78 void	session_rrefresh(struct peer *, u_int16_t, u_int8_t);
79 int	session_dispatch_msg(struct pollfd *, struct peer *);
80 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
81 int	parse_open(struct peer *);
82 int	parse_update(struct peer *);
83 int	parse_refresh(struct peer *);
84 int	parse_notification(struct peer *);
85 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
86 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
87 void	session_up(struct peer *);
88 void	session_down(struct peer *);
89 void	session_demote(struct peer *, int);
90 
91 int			 la_cmp(struct listen_addr *, struct listen_addr *);
92 struct peer		*getpeerbyip(struct sockaddr *);
93 int			 session_match_mask(struct peer *, struct sockaddr *);
94 struct peer		*getpeerbyid(u_int32_t);
95 static struct sockaddr	*addr2sa(struct bgpd_addr *, u_int16_t);
96 
97 struct bgpd_config	*conf, *nconf = NULL;
98 struct bgpd_sysdep	 sysdep;
99 struct peer		*npeers;
100 volatile sig_atomic_t	 session_quit = 0;
101 int			 pending_reconf = 0;
102 int			 csock = -1, rcsock = -1;
103 u_int			 peer_cnt;
104 struct imsgbuf		*ibuf_rde;
105 struct imsgbuf		*ibuf_rde_ctl;
106 struct imsgbuf		*ibuf_main;
107 
108 struct mrt_head		 mrthead;
109 
110 void
111 session_sighdlr(int sig)
112 {
113 	switch (sig) {
114 	case SIGINT:
115 	case SIGTERM:
116 		session_quit = 1;
117 		break;
118 	}
119 }
120 
121 int
122 setup_listeners(u_int *la_cnt)
123 {
124 	int			 ttl = 255;
125 	int			 opt;
126 	struct listen_addr	*la;
127 	u_int			 cnt = 0;
128 
129 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
130 		la->reconf = RECONF_NONE;
131 		cnt++;
132 
133 		if (la->flags & LISTENER_LISTENING)
134 			continue;
135 
136 		if (la->fd == -1) {
137 			log_warn("cannot establish listener on %s: invalid fd",
138 			    log_sockaddr((struct sockaddr *)&la->sa));
139 			continue;
140 		}
141 
142 		opt = 1;
143 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
144 		    &opt, sizeof(opt)) == -1) {
145 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
146 				log_warnx("md5sig not available, disabling");
147 				sysdep.no_md5sig = 1;
148 			} else
149 				fatal("setsockopt TCP_MD5SIG");
150 		}
151 
152 		/* set ttl to 255 so that ttl-security works */
153 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
154 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
155 			log_warn("setup_listeners setsockopt TTL");
156 			continue;
157 		}
158 
159 		session_socket_blockmode(la->fd, BM_NONBLOCK);
160 
161 		if (listen(la->fd, MAX_BACKLOG)) {
162 			close(la->fd);
163 			fatal("listen");
164 		}
165 
166 		la->flags |= LISTENER_LISTENING;
167 
168 		log_info("listening on %s",
169 		    log_sockaddr((struct sockaddr *)&la->sa));
170 	}
171 
172 	*la_cnt = cnt;
173 
174 	return (0);
175 }
176 
177 pid_t
178 session_main(struct bgpd_config *config, struct peer *cpeers,
179     struct network_head *net_l, struct filter_head *rules,
180     struct mrt_head *m_l, int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2],
181     int pipe_s2rctl[2])
182 {
183 	int			 nfds, timeout;
184 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
185 	pid_t			 pid;
186 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
187 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
188 	u_int			 new_cnt;
189 	u_int32_t		 ctl_queued;
190 	struct passwd		*pw;
191 	struct peer		*p, **peer_l = NULL, *last, *next;
192 	struct network		*net;
193 	struct mrt		*m, **mrt_l = NULL;
194 	struct filter_rule	*r;
195 	struct pollfd		*pfd = NULL;
196 	struct ctl_conn		*ctl_conn;
197 	struct listen_addr	*la;
198 	void			*newp;
199 	short			 events;
200 
201 	conf = config;
202 	peers = cpeers;
203 
204 	switch (pid = fork()) {
205 	case -1:
206 		fatal("cannot fork");
207 	case 0:
208 		break;
209 	default:
210 		return (pid);
211 	}
212 
213 	/* control socket is outside chroot */
214 	if ((csock = control_init(0, conf->csock)) == -1)
215 		fatalx("control socket setup failed");
216 	if (conf->rcsock != NULL &&
217 	    (rcsock = control_init(1, conf->rcsock)) == -1)
218 		fatalx("control socket setup failed");
219 
220 	if ((pw = getpwnam(BGPD_USER)) == NULL)
221 		fatal(NULL);
222 
223 	if (chroot(pw->pw_dir) == -1)
224 		fatal("chroot");
225 	if (chdir("/") == -1)
226 		fatal("chdir(\"/\")");
227 
228 	setproctitle("session engine");
229 	bgpd_process = PROC_SE;
230 
231 	if (pfkey_init(&sysdep) == -1)
232 		fatalx("pfkey setup failed");
233 
234 	if (setgroups(1, &pw->pw_gid) ||
235 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
236 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
237 		fatal("can't drop privileges");
238 
239 	listener_cnt = 0;
240 	setup_listeners(&listener_cnt);
241 
242 	signal(SIGTERM, session_sighdlr);
243 	signal(SIGINT, session_sighdlr);
244 	signal(SIGPIPE, SIG_IGN);
245 	signal(SIGHUP, SIG_IGN);
246 	log_info("session engine ready");
247 	close(pipe_m2s[0]);
248 	close(pipe_s2r[1]);
249 	close(pipe_s2rctl[1]);
250 	close(pipe_m2r[0]);
251 	close(pipe_m2r[1]);
252 	init_conf(conf);
253 	if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
254 	    (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL ||
255 	    (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
256 		fatal(NULL);
257 	imsg_init(ibuf_rde, pipe_s2r[0]);
258 	imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]);
259 	imsg_init(ibuf_main, pipe_m2s[1]);
260 	TAILQ_INIT(&ctl_conns);
261 	control_listen(csock);
262 	control_listen(rcsock);
263 	LIST_INIT(&mrthead);
264 	peer_cnt = 0;
265 	ctl_cnt = 0;
266 
267 	/* filter rules are not used in the SE */
268 	while ((r = TAILQ_FIRST(rules)) != NULL) {
269 		TAILQ_REMOVE(rules, r, entry);
270 		free(r);
271 	}
272 	free(rules);
273 
274 	/* network list is not used in the SE */
275 	while ((net = TAILQ_FIRST(net_l)) != NULL) {
276 		TAILQ_REMOVE(net_l, net, entry);
277 		filterset_free(&net->net.attrset);
278 		free(net);
279 	}
280 
281 	/* main mrt list is not used in the SE */
282 	while ((m = LIST_FIRST(m_l)) != NULL) {
283 		LIST_REMOVE(m, entry);
284 		free(m);
285 	}
286 
287 	while (session_quit == 0) {
288 		/* check for peers to be initialized or deleted */
289 		last = NULL;
290 		for (p = peers; p != NULL; p = next) {
291 			next = p->next;
292 			if (!pending_reconf) {
293 				/* cloned peer that idled out? */
294 				if (p->state == STATE_IDLE && p->conf.cloned &&
295 				    time(NULL) - p->stats.last_updown >=
296 				    INTERVAL_HOLD_CLONED)
297 					p->conf.reconf_action = RECONF_DELETE;
298 
299 				/* new peer that needs init? */
300 				if (p->state == STATE_NONE)
301 					init_peer(p);
302 
303 				/* reinit due? */
304 				if (p->conf.reconf_action == RECONF_REINIT) {
305 					bgp_fsm(p, EVNT_STOP);
306 					timer_set(p, Timer_IdleHold, 0);
307 				}
308 
309 				/* deletion due? */
310 				if (p->conf.reconf_action == RECONF_DELETE) {
311 					if (p->demoted)
312 						session_demote(p, -1);
313 					p->conf.demote_group[0] = 0;
314 					bgp_fsm(p, EVNT_STOP);
315 					log_peer_warnx(&p->conf, "removed");
316 					if (last != NULL)
317 						last->next = next;
318 					else
319 						peers = next;
320 					timer_remove_all(p);
321 					free(p);
322 					peer_cnt--;
323 					continue;
324 				}
325 				p->conf.reconf_action = RECONF_NONE;
326 			}
327 			last = p;
328 		}
329 
330 		if (peer_cnt > peer_l_elms) {
331 			if ((newp = realloc(peer_l, sizeof(struct peer *) *
332 			    peer_cnt)) == NULL) {
333 				/* panic for now  */
334 				log_warn("could not resize peer_l from %u -> %u"
335 				    " entries", peer_l_elms, peer_cnt);
336 				fatalx("exiting");
337 			}
338 			peer_l = newp;
339 			peer_l_elms = peer_cnt;
340 		}
341 
342 		mrt_cnt = 0;
343 		LIST_FOREACH(m, &mrthead, entry)
344 			if (m->wbuf.queued)
345 				mrt_cnt++;
346 
347 		if (mrt_cnt > mrt_l_elms) {
348 			if ((newp = realloc(mrt_l, sizeof(struct mrt *) *
349 			    mrt_cnt)) == NULL) {
350 				/* panic for now  */
351 				log_warn("could not resize mrt_l from %u -> %u"
352 				    " entries", mrt_l_elms, mrt_cnt);
353 				fatalx("exiting");
354 			}
355 			mrt_l = newp;
356 			mrt_l_elms = mrt_cnt;
357 		}
358 
359 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
360 		    ctl_cnt + mrt_cnt;
361 		if (new_cnt > pfd_elms) {
362 			if ((newp = realloc(pfd, sizeof(struct pollfd) *
363 			    new_cnt)) == NULL) {
364 				/* panic for now  */
365 				log_warn("could not resize pfd from %u -> %u"
366 				    " entries", pfd_elms, new_cnt);
367 				fatalx("exiting");
368 			}
369 			pfd = newp;
370 			pfd_elms = new_cnt;
371 		}
372 
373 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
374 		pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd;
375 		pfd[PFD_PIPE_MAIN].events = POLLIN;
376 		if (ibuf_main->w.queued > 0)
377 			pfd[PFD_PIPE_MAIN].events |= POLLOUT;
378 		pfd[PFD_PIPE_ROUTE].fd = ibuf_rde->fd;
379 		pfd[PFD_PIPE_ROUTE].events = POLLIN;
380 		if (ibuf_rde->w.queued > 0)
381 			pfd[PFD_PIPE_ROUTE].events |= POLLOUT;
382 
383 		ctl_queued = 0;
384 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry)
385 			ctl_queued += ctl_conn->ibuf.w.queued;
386 
387 		pfd[PFD_PIPE_ROUTE_CTL].fd = ibuf_rde_ctl->fd;
388 		if (ctl_queued < SESSION_CTL_QUEUE_MAX)
389 			/*
390 			 * Do not act as unlimited buffer. Don't read in more
391 			 * messages if the ctl sockets are getting full.
392 			 */
393 			pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN;
394 		pfd[PFD_SOCK_CTL].fd = csock;
395 		pfd[PFD_SOCK_CTL].events = POLLIN;
396 		pfd[PFD_SOCK_RCTL].fd = rcsock;
397 		pfd[PFD_SOCK_RCTL].events = POLLIN;
398 
399 		i = PFD_LISTENERS_START;
400 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
401 			pfd[i].fd = la->fd;
402 			pfd[i].events = POLLIN;
403 			i++;
404 		}
405 		idx_listeners = i;
406 		timeout = 240;	/* loop every 240s at least */
407 
408 		for (p = peers; p != NULL; p = p->next) {
409 			time_t	nextaction;
410 			struct peer_timer *pt;
411 
412 			/* check timers */
413 			if ((pt = timer_nextisdue(p)) != NULL) {
414 				switch (pt->type) {
415 				case Timer_Hold:
416 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
417 					break;
418 				case Timer_ConnectRetry:
419 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
420 					break;
421 				case Timer_Keepalive:
422 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
423 					break;
424 				case Timer_IdleHold:
425 					bgp_fsm(p, EVNT_START);
426 					break;
427 				case Timer_IdleHoldReset:
428 					p->IdleHoldTime /= 2;
429 					if (p->IdleHoldTime <=
430 					    INTERVAL_IDLE_HOLD_INITIAL) {
431 						p->IdleHoldTime =
432 						    INTERVAL_IDLE_HOLD_INITIAL;
433 						timer_stop(p,
434 						    Timer_IdleHoldReset);
435 						p->errcnt = 0;
436 					} else
437 						timer_set(p,
438 						    Timer_IdleHoldReset,
439 						    p->IdleHoldTime);
440 					break;
441 				case Timer_CarpUndemote:
442 					timer_stop(p, Timer_CarpUndemote);
443 					if (p->demoted &&
444 					    p->state == STATE_ESTABLISHED)
445 						session_demote(p, -1);
446 					break;
447 				default:
448 					fatalx("King Bula lost in time");
449 				}
450 			}
451 			if ((nextaction = timer_nextduein(p)) != -1 &&
452 			    nextaction < timeout)
453 				timeout = nextaction;
454 
455 			/* are we waiting for a write? */
456 			events = POLLIN;
457 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
458 				events |= POLLOUT;
459 
460 			/* poll events */
461 			if (p->fd != -1 && events != 0) {
462 				pfd[i].fd = p->fd;
463 				pfd[i].events = events;
464 				peer_l[i - idx_listeners] = p;
465 				i++;
466 			}
467 		}
468 
469 		idx_peers = i;
470 
471 		LIST_FOREACH(m, &mrthead, entry)
472 			if (m->wbuf.queued) {
473 				pfd[i].fd = m->wbuf.fd;
474 				pfd[i].events = POLLOUT;
475 				mrt_l[i - idx_peers] = m;
476 				i++;
477 			}
478 
479 		idx_mrts = i;
480 
481 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
482 			pfd[i].fd = ctl_conn->ibuf.fd;
483 			pfd[i].events = POLLIN;
484 			if (ctl_conn->ibuf.w.queued > 0)
485 				pfd[i].events |= POLLOUT;
486 			i++;
487 		}
488 
489 		if (timeout < 0)
490 			timeout = 0;
491 		if ((nfds = poll(pfd, i, timeout * 1000)) == -1)
492 			if (errno != EINTR)
493 				fatal("poll error");
494 
495 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT)
496 			if (msgbuf_write(&ibuf_main->w) < 0)
497 				fatal("pipe write error");
498 
499 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLIN) {
500 			nfds--;
501 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
502 			    &listener_cnt);
503 		}
504 
505 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLOUT)
506 			if (msgbuf_write(&ibuf_rde->w) < 0)
507 				fatal("pipe write error");
508 
509 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLIN) {
510 			nfds--;
511 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
512 			    &listener_cnt);
513 		}
514 
515 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE_CTL].revents & POLLIN) {
516 			nfds--;
517 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
518 			    &listener_cnt);
519 		}
520 
521 		if (nfds > 0 && pfd[PFD_SOCK_CTL].revents & POLLIN) {
522 			nfds--;
523 			ctl_cnt += control_accept(csock, 0);
524 		}
525 
526 		if (nfds > 0 && pfd[PFD_SOCK_RCTL].revents & POLLIN) {
527 			nfds--;
528 			ctl_cnt += control_accept(rcsock, 1);
529 		}
530 
531 		for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners;
532 		    j++)
533 			if (pfd[j].revents & POLLIN) {
534 				nfds--;
535 				session_accept(pfd[j].fd);
536 			}
537 
538 		for (; nfds > 0 && j < idx_peers; j++)
539 			nfds -= session_dispatch_msg(&pfd[j],
540 			    peer_l[j - idx_listeners]);
541 
542 		for (; nfds > 0 && j < idx_mrts; j++)
543 			if (pfd[j].revents & POLLOUT) {
544 				nfds--;
545 				mrt_write(mrt_l[j - idx_peers]);
546 			}
547 
548 		for (; nfds > 0 && j < i; j++)
549 			nfds -= control_dispatch_msg(&pfd[j], &ctl_cnt);
550 	}
551 
552 	while ((p = peers) != NULL) {
553 		peers = p->next;
554 		bgp_fsm(p, EVNT_STOP);
555 		pfkey_remove(p);
556 		free(p);
557 	}
558 
559 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
560 		mrt_clean(m);
561 		LIST_REMOVE(m, entry);
562 		free(m);
563 	}
564 
565 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
566 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
567 		free(la);
568 	}
569 	free(conf->listen_addrs);
570 	free(peer_l);
571 	free(mrt_l);
572 	free(pfd);
573 
574 	msgbuf_write(&ibuf_rde->w);
575 	msgbuf_clear(&ibuf_rde->w);
576 	free(ibuf_rde);
577 	msgbuf_write(&ibuf_main->w);
578 	msgbuf_clear(&ibuf_main->w);
579 	free(ibuf_main);
580 
581 	control_shutdown(csock);
582 	control_shutdown(rcsock);
583 	log_info("session engine exiting");
584 	_exit(0);
585 }
586 
587 void
588 init_conf(struct bgpd_config *c)
589 {
590 	if (!c->holdtime)
591 		c->holdtime = INTERVAL_HOLD;
592 }
593 
594 void
595 init_peer(struct peer *p)
596 {
597 	TAILQ_INIT(&p->timers);
598 	p->fd = p->wbuf.fd = -1;
599 
600 	if (p->conf.if_depend[0])
601 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
602 		    p->conf.if_depend, sizeof(p->conf.if_depend));
603 	else
604 		p->depend_ok = 1;
605 
606 	peer_cnt++;
607 
608 	change_state(p, STATE_IDLE, EVNT_NONE);
609 	if (p->conf.down)
610 		timer_stop(p, Timer_IdleHold);		/* no autostart */
611 	else
612 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
613 
614 	/*
615 	 * on startup, demote if requested.
616 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
617 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
618 	 */
619 	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
620 		session_demote(p, +1);
621 }
622 
623 void
624 bgp_fsm(struct peer *peer, enum session_events event)
625 {
626 	switch (peer->state) {
627 	case STATE_NONE:
628 		/* nothing */
629 		break;
630 	case STATE_IDLE:
631 		switch (event) {
632 		case EVNT_START:
633 			timer_stop(peer, Timer_Hold);
634 			timer_stop(peer, Timer_Keepalive);
635 			timer_stop(peer, Timer_IdleHold);
636 
637 			/* allocate read buffer */
638 			peer->rbuf = calloc(1, sizeof(struct buf_read));
639 			if (peer->rbuf == NULL)
640 				fatal(NULL);
641 			peer->rbuf->wpos = 0;
642 
643 			/* init write buffer */
644 			msgbuf_init(&peer->wbuf);
645 
646 			/* init pfkey - remove old if any, load new ones */
647 			pfkey_remove(peer);
648 			if (pfkey_establish(peer) == -1) {
649 				log_peer_warnx(&peer->conf,
650 				    "pfkey setup failed");
651 				return;
652 			}
653 
654 			peer->stats.last_sent_errcode = 0;
655 			peer->stats.last_sent_suberr = 0;
656 
657 			if (!peer->depend_ok)
658 				timer_stop(peer, Timer_ConnectRetry);
659 			else if (peer->passive || peer->conf.passive ||
660 			    peer->conf.template) {
661 				change_state(peer, STATE_ACTIVE, event);
662 				timer_stop(peer, Timer_ConnectRetry);
663 			} else {
664 				change_state(peer, STATE_CONNECT, event);
665 				timer_set(peer, Timer_ConnectRetry,
666 				    INTERVAL_CONNECTRETRY);
667 				session_connect(peer);
668 			}
669 			peer->passive = 0;
670 			break;
671 		default:
672 			/* ignore */
673 			break;
674 		}
675 		break;
676 	case STATE_CONNECT:
677 		switch (event) {
678 		case EVNT_START:
679 			/* ignore */
680 			break;
681 		case EVNT_CON_OPEN:
682 			session_tcp_established(peer);
683 			session_open(peer);
684 			timer_stop(peer, Timer_ConnectRetry);
685 			peer->holdtime = INTERVAL_HOLD_INITIAL;
686 			start_timer_holdtime(peer);
687 			change_state(peer, STATE_OPENSENT, event);
688 			break;
689 		case EVNT_CON_OPENFAIL:
690 			timer_set(peer, Timer_ConnectRetry,
691 			    INTERVAL_CONNECTRETRY);
692 			session_close_connection(peer);
693 			change_state(peer, STATE_ACTIVE, event);
694 			break;
695 		case EVNT_TIMER_CONNRETRY:
696 			timer_set(peer, Timer_ConnectRetry,
697 			    INTERVAL_CONNECTRETRY);
698 			session_connect(peer);
699 			break;
700 		default:
701 			change_state(peer, STATE_IDLE, event);
702 			break;
703 		}
704 		break;
705 	case STATE_ACTIVE:
706 		switch (event) {
707 		case EVNT_START:
708 			/* ignore */
709 			break;
710 		case EVNT_CON_OPEN:
711 			session_tcp_established(peer);
712 			session_open(peer);
713 			timer_stop(peer, Timer_ConnectRetry);
714 			peer->holdtime = INTERVAL_HOLD_INITIAL;
715 			start_timer_holdtime(peer);
716 			change_state(peer, STATE_OPENSENT, event);
717 			break;
718 		case EVNT_CON_OPENFAIL:
719 			timer_set(peer, Timer_ConnectRetry,
720 			    INTERVAL_CONNECTRETRY);
721 			session_close_connection(peer);
722 			change_state(peer, STATE_ACTIVE, event);
723 			break;
724 		case EVNT_TIMER_CONNRETRY:
725 			timer_set(peer, Timer_ConnectRetry,
726 			    peer->holdtime);
727 			change_state(peer, STATE_CONNECT, event);
728 			session_connect(peer);
729 			break;
730 		default:
731 			change_state(peer, STATE_IDLE, event);
732 			break;
733 		}
734 		break;
735 	case STATE_OPENSENT:
736 		switch (event) {
737 		case EVNT_START:
738 			/* ignore */
739 			break;
740 		case EVNT_STOP:
741 			session_notification(peer, ERR_CEASE, 0, NULL, 0);
742 			change_state(peer, STATE_IDLE, event);
743 			break;
744 		case EVNT_CON_CLOSED:
745 			session_close_connection(peer);
746 			timer_set(peer, Timer_ConnectRetry,
747 			    INTERVAL_CONNECTRETRY);
748 			change_state(peer, STATE_ACTIVE, event);
749 			break;
750 		case EVNT_CON_FATAL:
751 			change_state(peer, STATE_IDLE, event);
752 			break;
753 		case EVNT_TIMER_HOLDTIME:
754 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
755 			    0, NULL, 0);
756 			change_state(peer, STATE_IDLE, event);
757 			break;
758 		case EVNT_RCVD_OPEN:
759 			/* parse_open calls change_state itself on failure */
760 			if (parse_open(peer))
761 				break;
762 			session_keepalive(peer);
763 			change_state(peer, STATE_OPENCONFIRM, event);
764 			break;
765 		case EVNT_RCVD_NOTIFICATION:
766 			if (parse_notification(peer)) {
767 				change_state(peer, STATE_IDLE, event);
768 				/* don't punish, capa negotiation */
769 				timer_set(peer, Timer_IdleHold, 0);
770 				peer->IdleHoldTime /= 2;
771 			} else
772 				change_state(peer, STATE_IDLE, event);
773 			break;
774 		default:
775 			session_notification(peer, ERR_FSM, 0, NULL, 0);
776 			change_state(peer, STATE_IDLE, event);
777 			break;
778 		}
779 		break;
780 	case STATE_OPENCONFIRM:
781 		switch (event) {
782 		case EVNT_START:
783 			/* ignore */
784 			break;
785 		case EVNT_STOP:
786 			session_notification(peer, ERR_CEASE, 0, NULL, 0);
787 			change_state(peer, STATE_IDLE, event);
788 			break;
789 		case EVNT_CON_CLOSED:
790 		case EVNT_CON_FATAL:
791 			change_state(peer, STATE_IDLE, event);
792 			break;
793 		case EVNT_TIMER_HOLDTIME:
794 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
795 			    0, NULL, 0);
796 			change_state(peer, STATE_IDLE, event);
797 			break;
798 		case EVNT_TIMER_KEEPALIVE:
799 			session_keepalive(peer);
800 			break;
801 		case EVNT_RCVD_KEEPALIVE:
802 			start_timer_holdtime(peer);
803 			change_state(peer, STATE_ESTABLISHED, event);
804 			break;
805 		case EVNT_RCVD_NOTIFICATION:
806 			parse_notification(peer);
807 			change_state(peer, STATE_IDLE, event);
808 			break;
809 		default:
810 			session_notification(peer, ERR_FSM, 0, NULL, 0);
811 			change_state(peer, STATE_IDLE, event);
812 			break;
813 		}
814 		break;
815 	case STATE_ESTABLISHED:
816 		switch (event) {
817 		case EVNT_START:
818 			/* ignore */
819 			break;
820 		case EVNT_STOP:
821 			session_notification(peer, ERR_CEASE, 0, NULL, 0);
822 			change_state(peer, STATE_IDLE, event);
823 			break;
824 		case EVNT_CON_CLOSED:
825 		case EVNT_CON_FATAL:
826 			change_state(peer, STATE_IDLE, event);
827 			break;
828 		case EVNT_TIMER_HOLDTIME:
829 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
830 			    0, NULL, 0);
831 			change_state(peer, STATE_IDLE, event);
832 			break;
833 		case EVNT_TIMER_KEEPALIVE:
834 			session_keepalive(peer);
835 			break;
836 		case EVNT_RCVD_KEEPALIVE:
837 			start_timer_holdtime(peer);
838 			break;
839 		case EVNT_RCVD_UPDATE:
840 			start_timer_holdtime(peer);
841 			if (parse_update(peer))
842 				change_state(peer, STATE_IDLE, event);
843 			else
844 				start_timer_holdtime(peer);
845 			break;
846 		case EVNT_RCVD_NOTIFICATION:
847 			parse_notification(peer);
848 			change_state(peer, STATE_IDLE, event);
849 			break;
850 		default:
851 			session_notification(peer, ERR_FSM, 0, NULL, 0);
852 			change_state(peer, STATE_IDLE, event);
853 			break;
854 		}
855 		break;
856 	}
857 }
858 
859 void
860 start_timer_holdtime(struct peer *peer)
861 {
862 	if (peer->holdtime > 0)
863 		timer_set(peer, Timer_Hold, peer->holdtime);
864 	else
865 		timer_stop(peer, Timer_Hold);
866 }
867 
868 void
869 start_timer_keepalive(struct peer *peer)
870 {
871 	if (peer->holdtime > 0)
872 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
873 	else
874 		timer_stop(peer, Timer_Keepalive);
875 }
876 
877 void
878 session_close_connection(struct peer *peer)
879 {
880 	if (peer->fd != -1)
881 		close(peer->fd);
882 
883 	peer->fd = peer->wbuf.fd = -1;
884 }
885 
886 void
887 change_state(struct peer *peer, enum session_state state,
888     enum session_events event)
889 {
890 	struct mrt	*mrt;
891 
892 	switch (state) {
893 	case STATE_IDLE:
894 		/* carp demotion first. new peers handled in init_peer */
895 		if (peer->state == STATE_ESTABLISHED &&
896 		    peer->conf.demote_group[0] && !peer->demoted)
897 			session_demote(peer, +1);
898 
899 		/*
900 		 * try to write out what's buffered (maybe a notification),
901 		 * don't bother if it fails
902 		 */
903 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
904 			msgbuf_write(&peer->wbuf);
905 
906 		/*
907 		 * we must start the timer for the next EVNT_START
908 		 * if we are coming here due to an error and the
909 		 * session was not established successfully before, the
910 		 * starttimerinterval needs to be exponentially increased
911 		 */
912 		if (peer->IdleHoldTime == 0)
913 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
914 		peer->holdtime = INTERVAL_HOLD_INITIAL;
915 		timer_stop(peer, Timer_ConnectRetry);
916 		timer_stop(peer, Timer_Keepalive);
917 		timer_stop(peer, Timer_Hold);
918 		timer_stop(peer, Timer_IdleHoldReset);
919 		session_close_connection(peer);
920 		msgbuf_clear(&peer->wbuf);
921 		free(peer->rbuf);
922 		peer->rbuf = NULL;
923 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
924 		if (peer->state == STATE_ESTABLISHED)
925 			session_down(peer);
926 		if (event != EVNT_STOP) {
927 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
928 			if (event != EVNT_NONE &&
929 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
930 				peer->IdleHoldTime *= 2;
931 		}
932 		if (peer->state == STATE_NONE ||
933 		    peer->state == STATE_ESTABLISHED) {
934 			/* initialize capability negotiation structures */
935 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
936 			    sizeof(peer->capa.ann));
937 			if (!peer->conf.announce_capa)
938 				session_capa_ann_none(peer);
939 		}
940 		break;
941 	case STATE_CONNECT:
942 		break;
943 	case STATE_ACTIVE:
944 		break;
945 	case STATE_OPENSENT:
946 		break;
947 	case STATE_OPENCONFIRM:
948 		break;
949 	case STATE_ESTABLISHED:
950 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
951 		if (peer->demoted)
952 			timer_set(peer, Timer_CarpUndemote,
953 			    INTERVAL_HOLD_DEMOTED);
954 		session_up(peer);
955 		break;
956 	default:		/* something seriously fucked */
957 		break;
958 	}
959 
960 	log_statechange(peer, state, event);
961 	LIST_FOREACH(mrt, &mrthead, entry) {
962 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
963 			continue;
964 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
965 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
966 		    mrt->group_id == peer->conf.groupid))
967 			mrt_dump_state(mrt, peer->state, state, peer);
968 	}
969 	peer->prev_state = peer->state;
970 	peer->state = state;
971 }
972 
973 void
974 session_accept(int listenfd)
975 {
976 	int			 connfd;
977 	int			 opt;
978 	socklen_t		 len;
979 	struct sockaddr_storage	 cliaddr;
980 	struct peer		*p = NULL;
981 
982 	len = sizeof(cliaddr);
983 	if ((connfd = accept(listenfd,
984 	    (struct sockaddr *)&cliaddr, &len)) == -1) {
985 		if (errno == EWOULDBLOCK || errno == EINTR)
986 			return;
987 		else
988 			log_warn("accept");
989 	}
990 
991 	p = getpeerbyip((struct sockaddr *)&cliaddr);
992 
993 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
994 		if (timer_running(p, Timer_IdleHold, NULL)) {
995 			/* fast reconnect after clear */
996 			p->passive = 1;
997 			bgp_fsm(p, EVNT_START);
998 		}
999 	}
1000 
1001 	if (p != NULL &&
1002 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1003 		if (p->fd != -1) {
1004 			if (p->state == STATE_CONNECT)
1005 				session_close_connection(p);
1006 			else {
1007 				close(connfd);
1008 				return;
1009 			}
1010 		}
1011 
1012 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1013 			log_peer_warnx(&p->conf,
1014 			    "ipsec or md5sig configured but not available");
1015 			close(connfd);
1016 			return;
1017 		}
1018 
1019 		if (p->conf.auth.method == AUTH_MD5SIG) {
1020 			if (sysdep.no_md5sig) {
1021 				log_peer_warnx(&p->conf,
1022 				    "md5sig configured but not available");
1023 				close(connfd);
1024 				return;
1025 			}
1026 			len = sizeof(opt);
1027 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1028 			    &opt, &len) == -1)
1029 				fatal("getsockopt TCP_MD5SIG");
1030 			if (!opt) {	/* non-md5'd connection! */
1031 				log_peer_warnx(&p->conf,
1032 				    "connection attempt without md5 signature");
1033 				close(connfd);
1034 				return;
1035 			}
1036 		}
1037 		p->fd = p->wbuf.fd = connfd;
1038 		if (session_setup_socket(p)) {
1039 			close(connfd);
1040 			return;
1041 		}
1042 		session_socket_blockmode(connfd, BM_NONBLOCK);
1043 		bgp_fsm(p, EVNT_CON_OPEN);
1044 	} else {
1045 		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1046 		close(connfd);
1047 	}
1048 }
1049 
1050 int
1051 session_connect(struct peer *peer)
1052 {
1053 	int			 opt = 1;
1054 	struct sockaddr		*sa;
1055 
1056 	/*
1057 	 * we do not need the overcomplicated collision detection RFC 1771
1058 	 * describes; we simply make sure there is only ever one concurrent
1059 	 * tcp connection per peer.
1060 	 */
1061 	if (peer->fd != -1)
1062 		return (-1);
1063 
1064 	if ((peer->fd = socket(peer->conf.remote_addr.af, SOCK_STREAM,
1065 	    IPPROTO_TCP)) == -1) {
1066 		log_peer_warn(&peer->conf, "session_connect socket");
1067 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1068 		return (-1);
1069 	}
1070 
1071 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1072 		log_peer_warnx(&peer->conf,
1073 		    "ipsec or md5sig configured but not available");
1074 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1075 		return (-1);
1076 	}
1077 
1078 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1079 		if (sysdep.no_md5sig) {
1080 			log_peer_warnx(&peer->conf,
1081 			    "md5sig configured but not available");
1082 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1083 			return (-1);
1084 		}
1085 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1086 		    &opt, sizeof(opt)) == -1) {
1087 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1088 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1089 			return (-1);
1090 		}
1091 	}
1092 	peer->wbuf.fd = peer->fd;
1093 
1094 	/* if update source is set we need to bind() */
1095 	if (peer->conf.local_addr.af) {
1096 		sa = addr2sa(&peer->conf.local_addr, 0);
1097 		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1098 			log_peer_warn(&peer->conf, "session_connect bind");
1099 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1100 			return (-1);
1101 		}
1102 	}
1103 
1104 	if (session_setup_socket(peer)) {
1105 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1106 		return (-1);
1107 	}
1108 
1109 	session_socket_blockmode(peer->fd, BM_NONBLOCK);
1110 
1111 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1112 	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1113 		if (errno != EINPROGRESS) {
1114 			if (errno != peer->lasterr)
1115 				log_peer_warn(&peer->conf, "connect");
1116 			peer->lasterr = errno;
1117 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1118 			return (-1);
1119 		}
1120 	} else
1121 		bgp_fsm(peer, EVNT_CON_OPEN);
1122 
1123 	return (0);
1124 }
1125 
1126 int
1127 session_setup_socket(struct peer *p)
1128 {
1129 	int	ttl = p->conf.distance;
1130 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1131 	int	nodelay = 1;
1132 	int	bsize;
1133 
1134 	if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET) {
1135 		/* set TTL to foreign router's distance - 1=direct n=multihop
1136 		   with ttlsec, we always use 255 */
1137 		if (p->conf.ttlsec) {
1138 			ttl = 256 - p->conf.distance;
1139 			if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl,
1140 			    sizeof(ttl)) == -1) {
1141 				log_peer_warn(&p->conf,
1142 				    "session_setup_socket setsockopt MINTTL");
1143 				return (-1);
1144 			}
1145 			ttl = 255;
1146 		}
1147 
1148 		if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1149 		    sizeof(ttl)) == -1) {
1150 			log_peer_warn(&p->conf,
1151 			    "session_setup_socket setsockopt TTL");
1152 			return (-1);
1153 		}
1154 	}
1155 
1156 	if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET6)
1157 		/* set hoplimit to foreign router's distance */
1158 		if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl,
1159 		    sizeof(ttl)) == -1) {
1160 			log_peer_warn(&p->conf,
1161 			    "session_setup_socket setsockopt hoplimit");
1162 			return (-1);
1163 		}
1164 
1165 	/* if ttlsec is in use, set minttl */
1166 	if (p->conf.ttlsec) {
1167 		ttl = 256 - p->conf.distance;
1168 		setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl, sizeof(ttl));
1169 
1170 	}
1171 
1172 	/* set TCP_NODELAY */
1173 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1174 	    sizeof(nodelay)) == -1) {
1175 		log_peer_warn(&p->conf,
1176 		    "session_setup_socket setsockopt TCP_NODELAY");
1177 		return (-1);
1178 	}
1179 
1180 	/* set precedence, see RFC 1771 appendix 5 */
1181 	if (p->conf.remote_addr.af == AF_INET &&
1182 	    setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == -1) {
1183 		log_peer_warn(&p->conf,
1184 		    "session_setup_socket setsockopt TOS");
1185 		return (-1);
1186 	}
1187 
1188 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1189 	if (p->conf.auth.method != AUTH_NONE) {
1190 		/* try to increase bufsize. no biggie if it fails */
1191 		bsize = 65535;
1192 		while (setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1193 		    sizeof(bsize)) == -1)
1194 			bsize /= 2;
1195 		bsize = 65535;
1196 		while (setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1197 		    sizeof(bsize)) == -1)
1198 			bsize /= 2;
1199 	}
1200 
1201 	return (0);
1202 }
1203 
1204 void
1205 session_socket_blockmode(int fd, enum blockmodes bm)
1206 {
1207 	int	flags;
1208 
1209 	if ((flags = fcntl(fd, F_GETFL, 0)) == -1)
1210 		fatal("fcntl F_GETFL");
1211 
1212 	if (bm == BM_NONBLOCK)
1213 		flags |= O_NONBLOCK;
1214 	else
1215 		flags &= ~O_NONBLOCK;
1216 
1217 	if ((flags = fcntl(fd, F_SETFL, flags)) == -1)
1218 		fatal("fcntl F_SETFL");
1219 }
1220 
1221 void
1222 session_tcp_established(struct peer *peer)
1223 {
1224 	socklen_t	len;
1225 
1226 	len = sizeof(peer->sa_local);
1227 	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1228 	    &len) == -1)
1229 		log_warn("getsockname");
1230 	len = sizeof(peer->sa_remote);
1231 	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1232 	    &len) == -1)
1233 		log_warn("getpeername");
1234 }
1235 
1236 void
1237 session_capa_ann_none(struct peer *peer)
1238 {
1239 	peer->capa.ann.mp_v4 = SAFI_NONE;
1240 	peer->capa.ann.mp_v4 = SAFI_NONE;
1241 	peer->capa.ann.refresh = 0;
1242 	peer->capa.ann.restart = 0;
1243 	peer->capa.ann.as4byte = 0;
1244 }
1245 
1246 int
1247 session_capa_add(struct peer *p, struct buf *opb, u_int8_t capa_code,
1248     u_int8_t capa_len, u_int8_t *optparamlen)
1249 {
1250 	u_int8_t	op_type, op_len, tot_len, errs = 0;
1251 
1252 	op_type = OPT_PARAM_CAPABILITIES;
1253 	op_len = sizeof(capa_code) + sizeof(capa_len) + capa_len;
1254 	tot_len = sizeof(op_type) + sizeof(op_len) + op_len;
1255 	errs += buf_add(opb, &op_type, sizeof(op_type));
1256 	errs += buf_add(opb, &op_len, sizeof(op_len));
1257 	errs += buf_add(opb, &capa_code, sizeof(capa_code));
1258 	errs += buf_add(opb, &capa_len, sizeof(capa_len));
1259 	*optparamlen += tot_len;
1260 	return (errs);
1261 }
1262 
1263 int
1264 session_capa_add_mp(struct buf *buf, u_int16_t afi, u_int8_t safi)
1265 {
1266 	u_int8_t		 pad = 0;
1267 	int			 errs = 0;
1268 
1269 	afi = htons(afi);
1270 	errs += buf_add(buf, &afi, sizeof(afi));
1271 	errs += buf_add(buf, &pad, sizeof(pad));
1272 	errs += buf_add(buf, &safi, sizeof(safi));
1273 
1274 	return (errs);
1275 }
1276 
1277 struct bgp_msg *
1278 session_newmsg(enum msg_type msgtype, u_int16_t len)
1279 {
1280 	struct bgp_msg		*msg;
1281 	struct msg_header	 hdr;
1282 	struct buf		*buf;
1283 	int			 errs = 0;
1284 
1285 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1286 	hdr.len = htons(len);
1287 	hdr.type = msgtype;
1288 
1289 	if ((buf = buf_open(len)) == NULL)
1290 		return (NULL);
1291 
1292 	errs += buf_add(buf, &hdr.marker, sizeof(hdr.marker));
1293 	errs += buf_add(buf, &hdr.len, sizeof(hdr.len));
1294 	errs += buf_add(buf, &hdr.type, sizeof(hdr.type));
1295 
1296 	if (errs > 0 ||
1297 	    (msg = calloc(1, sizeof(*msg))) == NULL) {
1298 		buf_free(buf);
1299 		return (NULL);
1300 	}
1301 
1302 	msg->buf = buf;
1303 	msg->type = msgtype;
1304 	msg->len = len;
1305 
1306 	return (msg);
1307 }
1308 
1309 int
1310 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1311 {
1312 	struct mrt		*mrt;
1313 
1314 	LIST_FOREACH(mrt, &mrthead, entry) {
1315 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1316 		    mrt->type == MRT_UPDATE_OUT)))
1317 			continue;
1318 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1319 		    mrt->peer_id == p->conf.id || (mrt->group_id == 0 &&
1320 		    mrt->group_id == p->conf.groupid))
1321 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1322 	}
1323 
1324 	if (buf_close(&p->wbuf, msg->buf) == -1) {
1325 		log_peer_warn(&p->conf, "session_sendmsg buf_close");
1326 		buf_free(msg->buf);
1327 		free(msg);
1328 		return (-1);
1329 	}
1330 
1331 	free(msg);
1332 	return (0);
1333 }
1334 
1335 void
1336 session_open(struct peer *p)
1337 {
1338 	struct bgp_msg		*buf;
1339 	struct buf		*opb;
1340 	struct msg_open		 msg;
1341 	u_int16_t		 len;
1342 	u_int8_t		 optparamlen = 0;
1343 	u_int			 errs = 0;
1344 
1345 
1346 	if ((opb = buf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN)) == NULL) {
1347 		bgp_fsm(p, EVNT_CON_FATAL);
1348 		return;
1349 	}
1350 
1351 	/* multiprotocol extensions, RFC 4760 */
1352 	if (p->capa.ann.mp_v4) {	/* 4 bytes data */
1353 		errs += session_capa_add(p, opb, CAPA_MP, 4, &optparamlen);
1354 		errs += session_capa_add_mp(opb, AFI_IPv4, p->capa.ann.mp_v4);
1355 	}
1356 	if (p->capa.ann.mp_v6) {	/* 4 bytes data */
1357 		errs += session_capa_add(p, opb, CAPA_MP, 4, &optparamlen);
1358 		errs += session_capa_add_mp(opb, AFI_IPv6, p->capa.ann.mp_v6);
1359 	}
1360 
1361 	/* route refresh, RFC 2918 */
1362 	if (p->capa.ann.refresh)	/* no data */
1363 		errs += session_capa_add(p, opb, CAPA_REFRESH, 0, &optparamlen);
1364 
1365 	/* End-of-RIB marker, RFC 4724 */
1366 	if (p->capa.ann.restart) {	/* 2 bytes data */
1367 		u_char		c[2];
1368 
1369 		bzero(&c, 2);
1370 		c[0] = 0x80; /* we're always restarting */
1371 		errs += session_capa_add(p, opb, CAPA_RESTART, 2, &optparamlen);
1372 		errs += buf_add(opb, &c, 2);
1373 	}
1374 
1375 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1376 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1377 		u_int32_t	nas;
1378 
1379 		nas = htonl(conf->as);
1380 		errs += session_capa_add(p, opb, CAPA_AS4BYTE, 4, &optparamlen);
1381 		errs += buf_add(opb, &nas, 4);
1382 	}
1383 
1384 	len = MSGSIZE_OPEN_MIN + optparamlen;
1385 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1386 		buf_free(opb);
1387 		bgp_fsm(p, EVNT_CON_FATAL);
1388 		return;
1389 	}
1390 
1391 	msg.version = 4;
1392 	msg.myas = htons(conf->short_as);
1393 	if (p->conf.holdtime)
1394 		msg.holdtime = htons(p->conf.holdtime);
1395 	else
1396 		msg.holdtime = htons(conf->holdtime);
1397 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1398 	msg.optparamlen = optparamlen;
1399 
1400 	errs += buf_add(buf->buf, &msg.version, sizeof(msg.version));
1401 	errs += buf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1402 	errs += buf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1403 	errs += buf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1404 	errs += buf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1405 
1406 	if (optparamlen)
1407 		errs += buf_add(buf->buf, opb->buf, optparamlen);
1408 
1409 	buf_free(opb);
1410 
1411 	if (errs > 0) {
1412 		buf_free(buf->buf);
1413 		free(buf);
1414 		bgp_fsm(p, EVNT_CON_FATAL);
1415 		return;
1416 	}
1417 
1418 	if (session_sendmsg(buf, p) == -1) {
1419 		bgp_fsm(p, EVNT_CON_FATAL);
1420 		return;
1421 	}
1422 
1423 	p->stats.msg_sent_open++;
1424 }
1425 
1426 void
1427 session_keepalive(struct peer *p)
1428 {
1429 	struct bgp_msg		*buf;
1430 
1431 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1432 	    session_sendmsg(buf, p) == -1) {
1433 		bgp_fsm(p, EVNT_CON_FATAL);
1434 		return;
1435 	}
1436 
1437 	start_timer_keepalive(p);
1438 	p->stats.msg_sent_keepalive++;
1439 }
1440 
1441 void
1442 session_update(u_int32_t peerid, void *data, size_t datalen)
1443 {
1444 	struct peer		*p;
1445 	struct bgp_msg		*buf;
1446 
1447 	if ((p = getpeerbyid(peerid)) == NULL) {
1448 		log_warnx("no such peer: id=%u", peerid);
1449 		return;
1450 	}
1451 
1452 	if (p->state != STATE_ESTABLISHED)
1453 		return;
1454 
1455 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1456 		bgp_fsm(p, EVNT_CON_FATAL);
1457 		return;
1458 	}
1459 
1460 	if (buf_add(buf->buf, data, datalen)) {
1461 		buf_free(buf->buf);
1462 		free(buf);
1463 		bgp_fsm(p, EVNT_CON_FATAL);
1464 		return;
1465 	}
1466 
1467 	if (session_sendmsg(buf, p) == -1) {
1468 		bgp_fsm(p, EVNT_CON_FATAL);
1469 		return;
1470 	}
1471 
1472 	start_timer_keepalive(p);
1473 	p->stats.msg_sent_update++;
1474 }
1475 
1476 void
1477 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1478     void *data, ssize_t datalen)
1479 {
1480 	struct bgp_msg		*buf;
1481 	u_int			 errs = 0;
1482 	u_int8_t		 null8 = 0;
1483 
1484 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1485 		return;
1486 
1487 	if ((buf = session_newmsg(NOTIFICATION,
1488 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1489 		bgp_fsm(p, EVNT_CON_FATAL);
1490 		return;
1491 	}
1492 
1493 	errs += buf_add(buf->buf, &errcode, sizeof(errcode));
1494 	if (errcode == ERR_CEASE)
1495 		errs += buf_add(buf->buf, &null8, sizeof(null8));
1496 	else
1497 		errs += buf_add(buf->buf, &subcode, sizeof(subcode));
1498 
1499 	if (datalen > 0)
1500 		errs += buf_add(buf->buf, data, datalen);
1501 
1502 	if (errs > 0) {
1503 		buf_free(buf->buf);
1504 		free(buf);
1505 		bgp_fsm(p, EVNT_CON_FATAL);
1506 		return;
1507 	}
1508 
1509 	if (session_sendmsg(buf, p) == -1) {
1510 		bgp_fsm(p, EVNT_CON_FATAL);
1511 		return;
1512 	}
1513 
1514 	p->stats.msg_sent_notification++;
1515 	p->stats.last_sent_errcode = errcode;
1516 	p->stats.last_sent_suberr = subcode;
1517 }
1518 
1519 int
1520 session_neighbor_rrefresh(struct peer *p)
1521 {
1522 	if (!p->capa.peer.refresh)
1523 		return (-1);
1524 
1525 	if (p->capa.peer.mp_v4 != SAFI_NONE)
1526 		session_rrefresh(p, AFI_IPv4, p->capa.peer.mp_v4);
1527 	if (p->capa.peer.mp_v6 != SAFI_NONE)
1528 		session_rrefresh(p, AFI_IPv6, p->capa.peer.mp_v6);
1529 
1530 	return (0);
1531 }
1532 
1533 void
1534 session_rrefresh(struct peer *p, u_int16_t afi, u_int8_t safi)
1535 {
1536 	struct bgp_msg		*buf;
1537 	int			 errs = 0;
1538 	u_int8_t		 null8 = 0;
1539 
1540 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1541 		bgp_fsm(p, EVNT_CON_FATAL);
1542 		return;
1543 	}
1544 
1545 	afi = htons(afi);
1546 	errs += buf_add(buf->buf, &afi, sizeof(afi));
1547 	errs += buf_add(buf->buf, &null8, sizeof(null8));
1548 	errs += buf_add(buf->buf, &safi, sizeof(safi));
1549 
1550 	if (errs > 0) {
1551 		buf_free(buf->buf);
1552 		free(buf);
1553 		bgp_fsm(p, EVNT_CON_FATAL);
1554 		return;
1555 	}
1556 
1557 	if (session_sendmsg(buf, p) == -1) {
1558 		bgp_fsm(p, EVNT_CON_FATAL);
1559 		return;
1560 	}
1561 
1562 	p->stats.msg_sent_rrefresh++;
1563 }
1564 
1565 int
1566 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1567 {
1568 	ssize_t		n, rpos, av, left;
1569 	socklen_t	len;
1570 	int		error, processed = 0;
1571 	u_int16_t	msglen;
1572 	u_int8_t	msgtype;
1573 
1574 	if (p->state == STATE_CONNECT) {
1575 		if (pfd->revents & POLLOUT) {
1576 			if (pfd->revents & POLLIN) {
1577 				/* error occurred */
1578 				len = sizeof(error);
1579 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1580 				    &error, &len) == -1 || error) {
1581 					if (error)
1582 						errno = error;
1583 					if (errno != p->lasterr) {
1584 						log_peer_warn(&p->conf,
1585 						    "socket error");
1586 						p->lasterr = errno;
1587 					}
1588 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1589 					return (1);
1590 				}
1591 			}
1592 			bgp_fsm(p, EVNT_CON_OPEN);
1593 			return (1);
1594 		}
1595 		if (pfd->revents & POLLHUP) {
1596 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1597 			return (1);
1598 		}
1599 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1600 			bgp_fsm(p, EVNT_CON_FATAL);
1601 			return (1);
1602 		}
1603 		return (0);
1604 	}
1605 
1606 	if (pfd->revents & POLLHUP) {
1607 		bgp_fsm(p, EVNT_CON_CLOSED);
1608 		return (1);
1609 	}
1610 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1611 		bgp_fsm(p, EVNT_CON_FATAL);
1612 		return (1);
1613 	}
1614 
1615 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1616 		if ((error = msgbuf_write(&p->wbuf)) < 0) {
1617 			if (error == -2)
1618 				log_peer_warnx(&p->conf, "Connection closed");
1619 			else
1620 				log_peer_warn(&p->conf, "write error");
1621 			bgp_fsm(p, EVNT_CON_FATAL);
1622 			return (1);
1623 		}
1624 		if (!(pfd->revents & POLLIN))
1625 			return (1);
1626 	}
1627 
1628 	if (p->rbuf && pfd->revents & POLLIN) {
1629 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1630 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1631 			if (errno != EINTR && errno != EAGAIN) {
1632 				log_peer_warn(&p->conf, "read error");
1633 				bgp_fsm(p, EVNT_CON_FATAL);
1634 			}
1635 			return (1);
1636 		}
1637 		if (n == 0) {	/* connection closed */
1638 			bgp_fsm(p, EVNT_CON_CLOSED);
1639 			return (1);
1640 		}
1641 
1642 		rpos = 0;
1643 		av = p->rbuf->wpos + n;
1644 		p->stats.last_read = time(NULL);
1645 
1646 		/*
1647 		 * session might drop to IDLE -> buffers deallocated
1648 		 * we MUST check rbuf != NULL before use
1649 		 */
1650 		for (;;) {
1651 			if (rpos + MSGSIZE_HEADER > av)
1652 				break;
1653 			if (p->rbuf == NULL)
1654 				break;
1655 			if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1656 			    &msgtype) == -1)
1657 				return (0);
1658 			if (rpos + msglen > av)
1659 				break;
1660 			p->rbuf->rptr = p->rbuf->buf + rpos;
1661 
1662 			switch (msgtype) {
1663 			case OPEN:
1664 				bgp_fsm(p, EVNT_RCVD_OPEN);
1665 				p->stats.msg_rcvd_open++;
1666 				break;
1667 			case UPDATE:
1668 				bgp_fsm(p, EVNT_RCVD_UPDATE);
1669 				p->stats.msg_rcvd_update++;
1670 				break;
1671 			case NOTIFICATION:
1672 				bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1673 				p->stats.msg_rcvd_notification++;
1674 				break;
1675 			case KEEPALIVE:
1676 				bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1677 				p->stats.msg_rcvd_keepalive++;
1678 				break;
1679 			case RREFRESH:
1680 				parse_refresh(p);
1681 				p->stats.msg_rcvd_rrefresh++;
1682 				break;
1683 			default:	/* cannot happen */
1684 				session_notification(p, ERR_HEADER,
1685 				    ERR_HDR_TYPE, &msgtype, 1);
1686 				log_warnx("received message with "
1687 				    "unknown type %u", msgtype);
1688 				bgp_fsm(p, EVNT_CON_FATAL);
1689 			}
1690 			rpos += msglen;
1691 			if (++processed > MSG_PROCESS_LIMIT)
1692 				break;
1693 		}
1694 		if (p->rbuf == NULL)
1695 			return (1);
1696 
1697 		if (rpos < av) {
1698 			left = av - rpos;
1699 			memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1700 			p->rbuf->wpos = left;
1701 		} else
1702 			p->rbuf->wpos = 0;
1703 
1704 		return (1);
1705 	}
1706 	return (0);
1707 }
1708 
1709 int
1710 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1711 {
1712 	struct mrt		*mrt;
1713 	u_char			*p;
1714 	u_int16_t		 olen;
1715 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1716 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1717 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1718 
1719 	/* caller MUST make sure we are getting 19 bytes! */
1720 	p = data;
1721 	if (memcmp(p, marker, sizeof(marker))) {
1722 		log_peer_warnx(&peer->conf, "sync error");
1723 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1724 		bgp_fsm(peer, EVNT_CON_FATAL);
1725 		return (-1);
1726 	}
1727 	p += MSGSIZE_HEADER_MARKER;
1728 
1729 	memcpy(&olen, p, 2);
1730 	*len = ntohs(olen);
1731 	p += 2;
1732 	memcpy(type, p, 1);
1733 
1734 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1735 		log_peer_warnx(&peer->conf,
1736 		    "received message: illegal length: %u byte", *len);
1737 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1738 		    &olen, sizeof(olen));
1739 		bgp_fsm(peer, EVNT_CON_FATAL);
1740 		return (-1);
1741 	}
1742 
1743 	switch (*type) {
1744 	case OPEN:
1745 		if (*len < MSGSIZE_OPEN_MIN) {
1746 			log_peer_warnx(&peer->conf,
1747 			    "received OPEN: illegal len: %u byte", *len);
1748 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1749 			    &olen, sizeof(olen));
1750 			bgp_fsm(peer, EVNT_CON_FATAL);
1751 			return (-1);
1752 		}
1753 		break;
1754 	case NOTIFICATION:
1755 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1756 			log_peer_warnx(&peer->conf,
1757 			    "received NOTIFICATION: illegal len: %u byte",
1758 			    *len);
1759 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1760 			    &olen, sizeof(olen));
1761 			bgp_fsm(peer, EVNT_CON_FATAL);
1762 			return (-1);
1763 		}
1764 		break;
1765 	case UPDATE:
1766 		if (*len < MSGSIZE_UPDATE_MIN) {
1767 			log_peer_warnx(&peer->conf,
1768 			    "received UPDATE: illegal len: %u byte", *len);
1769 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1770 			    &olen, sizeof(olen));
1771 			bgp_fsm(peer, EVNT_CON_FATAL);
1772 			return (-1);
1773 		}
1774 		break;
1775 	case KEEPALIVE:
1776 		if (*len != MSGSIZE_KEEPALIVE) {
1777 			log_peer_warnx(&peer->conf,
1778 			    "received KEEPALIVE: illegal len: %u byte", *len);
1779 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1780 			    &olen, sizeof(olen));
1781 			bgp_fsm(peer, EVNT_CON_FATAL);
1782 			return (-1);
1783 		}
1784 		break;
1785 	case RREFRESH:
1786 		if (*len != MSGSIZE_RREFRESH) {
1787 			log_peer_warnx(&peer->conf,
1788 			    "received RREFRESH: illegal len: %u byte", *len);
1789 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1790 			    &olen, sizeof(olen));
1791 			bgp_fsm(peer, EVNT_CON_FATAL);
1792 			return (-1);
1793 		}
1794 		break;
1795 	default:
1796 		log_peer_warnx(&peer->conf,
1797 		    "received msg with unknown type %u", *type);
1798 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1799 		    type, 1);
1800 		bgp_fsm(peer, EVNT_CON_FATAL);
1801 		return (-1);
1802 	}
1803 	LIST_FOREACH(mrt, &mrthead, entry) {
1804 		if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE &&
1805 		    mrt->type == MRT_UPDATE_IN)))
1806 			continue;
1807 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1808 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1809 		    mrt->group_id == peer->conf.groupid))
1810 			mrt_dump_bgp_msg(mrt, data, *len, peer);
1811 	}
1812 	return (0);
1813 }
1814 
1815 int
1816 parse_open(struct peer *peer)
1817 {
1818 	u_char		*p, *op_val;
1819 	u_int8_t	 version, rversion;
1820 	u_int16_t	 short_as, msglen;
1821 	u_int16_t	 holdtime, oholdtime, myholdtime;
1822 	u_int32_t	 as, bgpid;
1823 	u_int8_t	 optparamlen, plen;
1824 	u_int8_t	 op_type, op_len;
1825 
1826 	p = peer->rbuf->rptr;
1827 	p += MSGSIZE_HEADER_MARKER;
1828 	memcpy(&msglen, p, sizeof(msglen));
1829 	msglen = ntohs(msglen);
1830 
1831 	p = peer->rbuf->rptr;
1832 	p += MSGSIZE_HEADER;	/* header is already checked */
1833 
1834 	memcpy(&version, p, sizeof(version));
1835 	p += sizeof(version);
1836 
1837 	if (version != BGP_VERSION) {
1838 		log_peer_warnx(&peer->conf,
1839 		    "peer wants unrecognized version %u", version);
1840 		if (version > BGP_VERSION)
1841 			rversion = version - BGP_VERSION;
1842 		else
1843 			rversion = BGP_VERSION;
1844 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
1845 		    &rversion, sizeof(rversion));
1846 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1847 		return (-1);
1848 	}
1849 
1850 	memcpy(&short_as, p, sizeof(short_as));
1851 	p += sizeof(short_as);
1852 	as = peer->short_as = ntohs(short_as);
1853 
1854 	/* if remote-as is zero and it's a cloned neighbor, accept any */
1855 	if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) {
1856 		peer->conf.remote_as = as;
1857 		peer->conf.ebgp = (peer->conf.remote_as != conf->as);
1858 	}
1859 
1860 	memcpy(&oholdtime, p, sizeof(oholdtime));
1861 	p += sizeof(oholdtime);
1862 
1863 	holdtime = ntohs(oholdtime);
1864 	if (holdtime && holdtime < peer->conf.min_holdtime) {
1865 		log_peer_warnx(&peer->conf,
1866 		    "peer requests unacceptable holdtime %u", holdtime);
1867 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
1868 		    NULL, 0);
1869 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1870 		return (-1);
1871 	}
1872 
1873 	myholdtime = peer->conf.holdtime;
1874 	if (!myholdtime)
1875 		myholdtime = conf->holdtime;
1876 	if (holdtime < myholdtime)
1877 		peer->holdtime = holdtime;
1878 	else
1879 		peer->holdtime = myholdtime;
1880 
1881 	memcpy(&bgpid, p, sizeof(bgpid));
1882 	p += sizeof(bgpid);
1883 
1884 	/* check bgpid for validity - just disallow 0 */
1885 	if (ntohl(bgpid) == 0) {
1886 		log_peer_warnx(&peer->conf, "peer BGPID %lu unacceptable",
1887 		    ntohl(bgpid));
1888 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
1889 		    NULL, 0);
1890 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1891 		return (-1);
1892 	}
1893 	peer->remote_bgpid = bgpid;
1894 
1895 	memcpy(&optparamlen, p, sizeof(optparamlen));
1896 	p += sizeof(optparamlen);
1897 
1898 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
1899 			log_peer_warnx(&peer->conf,
1900 			    "corrupt OPEN message received: length mismatch");
1901 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1902 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1903 			return (-1);
1904 	}
1905 
1906 	plen = optparamlen;
1907 	while (plen > 0) {
1908 		if (plen < 2) {
1909 			log_peer_warnx(&peer->conf,
1910 			    "corrupt OPEN message received, len wrong");
1911 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1912 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1913 			return (-1);
1914 		}
1915 		memcpy(&op_type, p, sizeof(op_type));
1916 		p += sizeof(op_type);
1917 		plen -= sizeof(op_type);
1918 		memcpy(&op_len, p, sizeof(op_len));
1919 		p += sizeof(op_len);
1920 		plen -= sizeof(op_len);
1921 		if (op_len > 0) {
1922 			if (plen < op_len) {
1923 				log_peer_warnx(&peer->conf,
1924 				    "corrupt OPEN message received, len wrong");
1925 				session_notification(peer, ERR_OPEN, 0,
1926 				    NULL, 0);
1927 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1928 				return (-1);
1929 			}
1930 			op_val = p;
1931 			p += op_len;
1932 			plen -= op_len;
1933 		} else
1934 			op_val = NULL;
1935 
1936 		switch (op_type) {
1937 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
1938 			if (parse_capabilities(peer, op_val, op_len,
1939 			    &as) == -1) {
1940 				session_notification(peer, ERR_OPEN, 0,
1941 				    NULL, 0);
1942 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1943 				return (-1);
1944 			}
1945 			break;
1946 		case OPT_PARAM_AUTH:			/* deprecated */
1947 		default:
1948 			/*
1949 			 * unsupported type
1950 			 * the RFCs tell us to leave the data section empty
1951 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
1952 			 * How the peer should know _which_ optional parameter
1953 			 * we don't support is beyond me.
1954 			 */
1955 			log_peer_warnx(&peer->conf,
1956 			    "received OPEN message with unsupported optional "
1957 			    "parameter: type %u", op_type);
1958 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
1959 				NULL, 0);
1960 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1961 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
1962 			peer->IdleHoldTime /= 2;
1963 			return (-1);
1964 		}
1965 	}
1966 
1967 	if (peer->conf.remote_as != as) {
1968 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
1969 		    log_as(as));
1970 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
1971 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1972 		return (-1);
1973 	}
1974 
1975 	return (0);
1976 }
1977 
1978 int
1979 parse_update(struct peer *peer)
1980 {
1981 	u_char		*p;
1982 	u_int16_t	 datalen;
1983 
1984 	/*
1985 	 * we pass the message verbatim to the rde.
1986 	 * in case of errors the whole session is reset with a
1987 	 * notification anyway, we only need to know the peer
1988 	 */
1989 	p = peer->rbuf->rptr;
1990 	p += MSGSIZE_HEADER_MARKER;
1991 	memcpy(&datalen, p, sizeof(datalen));
1992 	datalen = ntohs(datalen);
1993 
1994 	p = peer->rbuf->rptr;
1995 	p += MSGSIZE_HEADER;	/* header is already checked */
1996 	datalen -= MSGSIZE_HEADER;
1997 
1998 	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
1999 	    datalen) == -1)
2000 		return (-1);
2001 
2002 	return (0);
2003 }
2004 
2005 int
2006 parse_refresh(struct peer *peer)
2007 {
2008 	u_char		*p;
2009 	struct rrefresh	 r;
2010 
2011 	p = peer->rbuf->rptr;
2012 	p += MSGSIZE_HEADER;	/* header is already checked */
2013 
2014 	/* afi, 2 byte */
2015 	memcpy(&r.afi, p, sizeof(r.afi));
2016 	r.afi = ntohs(r.afi);
2017 	p += 2;
2018 	/* reserved, 1 byte */
2019 	p += 1;
2020 	/* safi, 1 byte */
2021 	memcpy(&r.safi, p, sizeof(r.safi));
2022 
2023 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2024 
2025 	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &r,
2026 	    sizeof(r)) == -1)
2027 		return (-1);
2028 
2029 	return (0);
2030 }
2031 
2032 int
2033 parse_notification(struct peer *peer)
2034 {
2035 	u_char		*p;
2036 	u_int8_t	 errcode;
2037 	u_int8_t	 subcode;
2038 	u_int16_t	 datalen;
2039 	u_int8_t	 capa_code;
2040 	u_int8_t	 capa_len;
2041 
2042 	/* just log */
2043 	p = peer->rbuf->rptr;
2044 	p += MSGSIZE_HEADER_MARKER;
2045 	memcpy(&datalen, p, sizeof(datalen));
2046 	datalen = ntohs(datalen);
2047 
2048 	p = peer->rbuf->rptr;
2049 	p += MSGSIZE_HEADER;	/* header is already checked */
2050 	datalen -= MSGSIZE_HEADER;
2051 
2052 	memcpy(&errcode, p, sizeof(errcode));
2053 	p += sizeof(errcode);
2054 	datalen -= sizeof(errcode);
2055 
2056 	memcpy(&subcode, p, sizeof(subcode));
2057 	p += sizeof(subcode);
2058 	datalen -= sizeof(subcode);
2059 
2060 	log_notification(peer, errcode, subcode, p, datalen);
2061 	peer->errcnt++;
2062 
2063 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2064 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2065 			log_peer_warnx(&peer->conf, "received \"unsupported "
2066 			    "capability\" notification without data part, "
2067 			    "disabling capability announcements altogether");
2068 			session_capa_ann_none(peer);
2069 		}
2070 
2071 		while (datalen > 0) {
2072 			if (datalen < 2) {
2073 				log_peer_warnx(&peer->conf,
2074 				    "parse_notification: "
2075 				    "expect len >= 2, len is %u", datalen);
2076 				return (-1);
2077 			}
2078 			memcpy(&capa_code, p, sizeof(capa_code));
2079 			p += sizeof(capa_code);
2080 			datalen -= sizeof(capa_code);
2081 			memcpy(&capa_len, p, sizeof(capa_len));
2082 			p += sizeof(capa_len);
2083 			datalen -= sizeof(capa_len);
2084 			if (datalen < capa_len) {
2085 				log_peer_warnx(&peer->conf,
2086 				    "parse_notification: capa_len %u exceeds "
2087 				    "remaining msg length %u", capa_len,
2088 				    datalen);
2089 				return (-1);
2090 			}
2091 			p += capa_len;
2092 			datalen -= capa_len;
2093 			switch (capa_code) {
2094 			case CAPA_MP:
2095 				peer->capa.ann.mp_v4 = SAFI_NONE;
2096 				peer->capa.ann.mp_v6 = SAFI_NONE;
2097 				log_peer_warnx(&peer->conf,
2098 				    "disabling multiprotocol capability");
2099 				break;
2100 			case CAPA_REFRESH:
2101 				peer->capa.ann.refresh = 0;
2102 				log_peer_warnx(&peer->conf,
2103 				    "disabling route refresh capability");
2104 				break;
2105 			case CAPA_RESTART:
2106 				peer->capa.ann.restart = 0;
2107 				log_peer_warnx(&peer->conf,
2108 				    "disabling restart capability");
2109 				break;
2110 			case CAPA_AS4BYTE:
2111 				peer->capa.ann.as4byte = 0;
2112 				log_peer_warnx(&peer->conf,
2113 				    "disabling 4-byte AS num capability");
2114 				break;
2115 			default:	/* should not happen... */
2116 				log_peer_warnx(&peer->conf, "received "
2117 				    "\"unsupported capability\" notification "
2118 				    "for unknown capability %u, disabling "
2119 				    "capability announcements altogether",
2120 				    capa_code);
2121 				session_capa_ann_none(peer);
2122 				break;
2123 			}
2124 		}
2125 
2126 		return (1);
2127 	}
2128 
2129 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2130 		session_capa_ann_none(peer);
2131 		return (1);
2132 	}
2133 
2134 	return (0);
2135 }
2136 
2137 int
2138 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2139 {
2140 	u_int16_t	 len;
2141 	u_int8_t	 capa_code;
2142 	u_int8_t	 capa_len;
2143 	u_char		*capa_val;
2144 	u_int16_t	 mp_afi;
2145 	u_int8_t	 mp_safi;
2146 	u_int32_t	 remote_as;
2147 
2148 	len = dlen;
2149 	while (len > 0) {
2150 		if (len < 2) {
2151 			log_peer_warnx(&peer->conf, "parse_capabilities: "
2152 			    "expect len >= 2, len is %u", len);
2153 			return (-1);
2154 		}
2155 		memcpy(&capa_code, d, sizeof(capa_code));
2156 		d += sizeof(capa_code);
2157 		len -= sizeof(capa_code);
2158 		memcpy(&capa_len, d, sizeof(capa_len));
2159 		d += sizeof(capa_len);
2160 		len -= sizeof(capa_len);
2161 		if (capa_len > 0) {
2162 			if (len < capa_len) {
2163 				log_peer_warnx(&peer->conf,
2164 				    "parse_capabilities: "
2165 				    "len %u smaller than capa_len %u",
2166 				    len, capa_len);
2167 				return (-1);
2168 			}
2169 			capa_val = d;
2170 			d += capa_len;
2171 			len -= capa_len;
2172 		} else
2173 			capa_val = NULL;
2174 
2175 		switch (capa_code) {
2176 		case CAPA_MP:			/* RFC 4760 */
2177 			if (capa_len != 4) {
2178 				log_peer_warnx(&peer->conf,
2179 				    "parse_capabilities: "
2180 				    "expect len 4, len is %u", capa_len);
2181 				return (-1);
2182 			}
2183 			memcpy(&mp_afi, capa_val, sizeof(mp_afi));
2184 			mp_afi = ntohs(mp_afi);
2185 			memcpy(&mp_safi, capa_val + 3, sizeof(mp_safi));
2186 			switch (mp_afi) {
2187 			case AFI_IPv4:
2188 				if (mp_safi < 1 || mp_safi > 3)
2189 					log_peer_warnx(&peer->conf,
2190 					    "parse_capabilities: AFI IPv4, "
2191 					    "mp_safi %u unknown", mp_safi);
2192 				else
2193 					peer->capa.peer.mp_v4 = mp_safi;
2194 				break;
2195 			case AFI_IPv6:
2196 				if (mp_safi < 1 || mp_safi > 3)
2197 					log_peer_warnx(&peer->conf,
2198 					    "parse_capabilities: AFI IPv6, "
2199 					    "mp_safi %u unknown", mp_safi);
2200 				else
2201 					peer->capa.peer.mp_v6 = mp_safi;
2202 				break;
2203 			default:			/* ignore */
2204 				break;
2205 			}
2206 			break;
2207 		case CAPA_REFRESH:
2208 			peer->capa.peer.refresh = 1;
2209 			break;
2210 		case CAPA_RESTART:
2211 			peer->capa.peer.restart = 1;
2212 			/* we don't care about the further restart capas yet */
2213 			break;
2214 		case CAPA_AS4BYTE:
2215 			if (capa_len != 4) {
2216 				log_peer_warnx(&peer->conf,
2217 				    "parse_capabilities: "
2218 				    "expect len 4, len is %u", capa_len);
2219 				return (-1);
2220 			}
2221 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2222 			*as = ntohl(remote_as);
2223 			peer->capa.peer.as4byte = 1;
2224 			break;
2225 		default:
2226 			break;
2227 		}
2228 	}
2229 
2230 	return (0);
2231 }
2232 
2233 void
2234 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2235 {
2236 	struct imsg		 imsg;
2237 	struct mrt		 xmrt;
2238 	struct mrt		*mrt;
2239 	struct peer_config	*pconf;
2240 	struct peer		*p, *next;
2241 	struct listen_addr	*la, *nla;
2242 	struct kif		*kif;
2243 	u_char			*data;
2244 	enum reconf_action	 reconf;
2245 	int			 n, depend_ok;
2246 	u_int8_t		 errcode, subcode;
2247 
2248 	if ((n = imsg_read(ibuf)) == -1)
2249 		fatal("session_dispatch_imsg: imsg_read error");
2250 
2251 	if (n == 0)	/* connection closed */
2252 		fatalx("session_dispatch_imsg: pipe closed");
2253 
2254 	for (;;) {
2255 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2256 			fatal("session_dispatch_imsg: imsg_get error");
2257 
2258 		if (n == 0)
2259 			break;
2260 
2261 		switch (imsg.hdr.type) {
2262 		case IMSG_RECONF_CONF:
2263 			if (idx != PFD_PIPE_MAIN)
2264 				fatalx("reconf request not from parent");
2265 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2266 			    NULL)
2267 				fatal(NULL);
2268 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2269 			if ((nconf->listen_addrs = calloc(1,
2270 			    sizeof(struct listen_addrs))) == NULL)
2271 				fatal(NULL);
2272 			TAILQ_INIT(nconf->listen_addrs);
2273 			npeers = NULL;
2274 			init_conf(nconf);
2275 			pending_reconf = 1;
2276 			break;
2277 		case IMSG_RECONF_PEER:
2278 			if (idx != PFD_PIPE_MAIN)
2279 				fatalx("reconf request not from parent");
2280 			pconf = imsg.data;
2281 			p = getpeerbyaddr(&pconf->remote_addr);
2282 			if (p == NULL) {
2283 				if ((p = calloc(1, sizeof(struct peer))) ==
2284 				    NULL)
2285 					fatal("new_peer");
2286 				p->state = p->prev_state = STATE_NONE;
2287 				p->next = npeers;
2288 				npeers = p;
2289 				reconf = RECONF_REINIT;
2290 			} else
2291 				reconf = RECONF_KEEP;
2292 
2293 			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2294 			p->conf.reconf_action = reconf;
2295 			break;
2296 		case IMSG_RECONF_LISTENER:
2297 			if (idx != PFD_PIPE_MAIN)
2298 				fatalx("reconf request not from parent");
2299 			if (nconf == NULL)
2300 				fatalx("IMSG_RECONF_LISTENER but no config");
2301 			nla = imsg.data;
2302 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2303 				if (!la_cmp(la, nla))
2304 					break;
2305 
2306 			if (la == NULL) {
2307 				if (nla->reconf != RECONF_REINIT)
2308 					fatalx("king bula sez: "
2309 					    "expected REINIT");
2310 
2311 				if ((nla->fd = imsg_get_fd(ibuf)) == -1)
2312 					log_warnx("expected to receive fd for "
2313 					    "%s but didn't receive any",
2314 					    log_sockaddr((struct sockaddr *)
2315 					    &nla->sa));
2316 
2317 				la = calloc(1, sizeof(struct listen_addr));
2318 				if (la == NULL)
2319 					fatal(NULL);
2320 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2321 				la->flags = nla->flags;
2322 				la->fd = nla->fd;
2323 				la->reconf = RECONF_REINIT;
2324 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2325 				    entry);
2326 			} else {
2327 				if (nla->reconf != RECONF_KEEP)
2328 					fatalx("king bula sez: expected KEEP");
2329 				la->reconf = RECONF_KEEP;
2330 			}
2331 
2332 			break;
2333 		case IMSG_RECONF_DONE:
2334 			if (idx != PFD_PIPE_MAIN)
2335 				fatalx("reconf request not from parent");
2336 			if (nconf == NULL)
2337 				fatalx("got IMSG_RECONF_DONE but no config");
2338 			conf->as = nconf->as;
2339 			conf->holdtime = nconf->holdtime;
2340 			conf->bgpid = nconf->bgpid;
2341 			conf->min_holdtime = nconf->min_holdtime;
2342 
2343 			/* add new peers */
2344 			for (p = npeers; p != NULL; p = next) {
2345 				next = p->next;
2346 				p->next = peers;
2347 				peers = p;
2348 			}
2349 			/* find ones that need attention */
2350 			for (p = peers; p != NULL; p = p->next) {
2351 				/* needs to be deleted? */
2352 				if (p->conf.reconf_action == RECONF_NONE &&
2353 				    !p->conf.cloned)
2354 					p->conf.reconf_action = RECONF_DELETE;
2355 				/* had demotion, is demoted, demote removed? */
2356 				if (p->demoted && !p->conf.demote_group[0])
2357 						session_demote(p, -1);
2358 			}
2359 
2360 			/* delete old listeners */
2361 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2362 			    la = nla) {
2363 				nla = TAILQ_NEXT(la, entry);
2364 				if (la->reconf == RECONF_NONE) {
2365 					log_info("not listening on %s any more",
2366 					    log_sockaddr(
2367 					    (struct sockaddr *)&la->sa));
2368 					TAILQ_REMOVE(conf->listen_addrs, la,
2369 					    entry);
2370 					close(la->fd);
2371 					free(la);
2372 				}
2373 			}
2374 
2375 			/* add new listeners */
2376 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2377 			    NULL) {
2378 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2379 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2380 				    entry);
2381 			}
2382 
2383 			setup_listeners(listener_cnt);
2384 			free(nconf->listen_addrs);
2385 			free(nconf);
2386 			nconf = NULL;
2387 			pending_reconf = 0;
2388 			log_info("SE reconfigured");
2389 			break;
2390 		case IMSG_IFINFO:
2391 			if (idx != PFD_PIPE_MAIN)
2392 				fatalx("IFINFO message not from parent");
2393 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2394 			    sizeof(struct kif))
2395 				fatalx("IFINFO imsg with wrong len");
2396 			kif = imsg.data;
2397 			depend_ok = (kif->flags & IFF_UP) &&
2398 			    (LINK_STATE_IS_UP(kif->link_state) ||
2399 			    (kif->link_state == LINK_STATE_UNKNOWN &&
2400 			    kif->media_type != IFT_CARP));
2401 
2402 			for (p = peers; p != NULL; p = p->next)
2403 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2404 					if (depend_ok && !p->depend_ok) {
2405 						p->depend_ok = depend_ok;
2406 						bgp_fsm(p, EVNT_START);
2407 					} else if (!depend_ok && p->depend_ok) {
2408 						p->depend_ok = depend_ok;
2409 						bgp_fsm(p, EVNT_STOP);
2410 					}
2411 				}
2412 			break;
2413 		case IMSG_MRT_OPEN:
2414 		case IMSG_MRT_REOPEN:
2415 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2416 			    sizeof(struct mrt)) {
2417 				log_warnx("wrong imsg len");
2418 				break;
2419 			}
2420 
2421 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2422 			if ((xmrt.wbuf.fd = imsg_get_fd(ibuf)) == -1)
2423 				log_warnx("expected to receive fd for mrt dump "
2424 				    "but didn't receive any");
2425 
2426 			mrt = mrt_get(&mrthead, &xmrt);
2427 			if (mrt == NULL) {
2428 				/* new dump */
2429 				mrt = calloc(1, sizeof(struct mrt));
2430 				if (mrt == NULL)
2431 					fatal("session_dispatch_imsg");
2432 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2433 				TAILQ_INIT(&mrt->wbuf.bufs);
2434 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2435 			} else {
2436 				/* old dump reopened */
2437 				close(mrt->wbuf.fd);
2438 				mrt->wbuf.fd = xmrt.wbuf.fd;
2439 			}
2440 			break;
2441 		case IMSG_MRT_CLOSE:
2442 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2443 			    sizeof(struct mrt)) {
2444 				log_warnx("wrong imsg len");
2445 				break;
2446 			}
2447 
2448 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2449 			mrt = mrt_get(&mrthead, &xmrt);
2450 			if (mrt != NULL) {
2451 				mrt_clean(mrt);
2452 				LIST_REMOVE(mrt, entry);
2453 				free(mrt);
2454 			}
2455 			break;
2456 		case IMSG_CTL_KROUTE:
2457 		case IMSG_CTL_KROUTE6:
2458 		case IMSG_CTL_KROUTE_ADDR:
2459 		case IMSG_CTL_SHOW_NEXTHOP:
2460 		case IMSG_CTL_SHOW_INTERFACE:
2461 			if (idx != PFD_PIPE_MAIN)
2462 				fatalx("ctl kroute request not from parent");
2463 			control_imsg_relay(&imsg);
2464 			break;
2465 		case IMSG_CTL_SHOW_RIB:
2466 		case IMSG_CTL_SHOW_RIB_PREFIX:
2467 		case IMSG_CTL_SHOW_RIB_ATTR:
2468 		case IMSG_CTL_SHOW_RIB_MEM:
2469 		case IMSG_CTL_SHOW_NETWORK:
2470 		case IMSG_CTL_SHOW_NETWORK6:
2471 		case IMSG_CTL_SHOW_NEIGHBOR:
2472 			if (idx != PFD_PIPE_ROUTE_CTL)
2473 				fatalx("ctl rib request not from RDE");
2474 			control_imsg_relay(&imsg);
2475 			break;
2476 		case IMSG_CTL_END:
2477 		case IMSG_CTL_RESULT:
2478 			control_imsg_relay(&imsg);
2479 			break;
2480 		case IMSG_UPDATE:
2481 			if (idx != PFD_PIPE_ROUTE)
2482 				fatalx("update request not from RDE");
2483 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2484 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2485 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2486 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2487 				log_warnx("RDE sent invalid update");
2488 			else
2489 				session_update(imsg.hdr.peerid, imsg.data,
2490 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2491 			break;
2492 		case IMSG_UPDATE_ERR:
2493 			if (idx != PFD_PIPE_ROUTE)
2494 				fatalx("update request not from RDE");
2495 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2496 				log_warnx("RDE sent invalid notification");
2497 				break;
2498 			}
2499 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2500 				log_warnx("no such peer: id=%u",
2501 				    imsg.hdr.peerid);
2502 				break;
2503 			}
2504 			data = imsg.data;
2505 			errcode = *data++;
2506 			subcode = *data++;
2507 
2508 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2509 				data = NULL;
2510 
2511 			session_notification(p, errcode, subcode,
2512 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2513 			switch (errcode) {
2514 			case ERR_CEASE:
2515 				switch (subcode) {
2516 				case ERR_CEASE_MAX_PREFIX:
2517 					bgp_fsm(p, EVNT_STOP);
2518 					if (p->conf.max_prefix_restart)
2519 						timer_set(p, Timer_IdleHold, 60 *
2520 						    p->conf.max_prefix_restart);
2521 					break;
2522 				default:
2523 					bgp_fsm(p, EVNT_CON_FATAL);
2524 					break;
2525 				}
2526 				break;
2527 			default:
2528 				bgp_fsm(p, EVNT_CON_FATAL);
2529 				break;
2530 			}
2531 			break;
2532 		default:
2533 			break;
2534 		}
2535 		imsg_free(&imsg);
2536 	}
2537 }
2538 
2539 int
2540 la_cmp(struct listen_addr *a, struct listen_addr *b)
2541 {
2542 	struct sockaddr_in	*in_a, *in_b;
2543 	struct sockaddr_in6	*in6_a, *in6_b;
2544 
2545 	if (a->sa.ss_family != b->sa.ss_family)
2546 		return (1);
2547 
2548 	switch (a->sa.ss_family) {
2549 	case AF_INET:
2550 		in_a = (struct sockaddr_in *)&a->sa;
2551 		in_b = (struct sockaddr_in *)&b->sa;
2552 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2553 			return (1);
2554 		if (in_a->sin_port != in_b->sin_port)
2555 			return (1);
2556 		break;
2557 	case AF_INET6:
2558 		in6_a = (struct sockaddr_in6 *)&a->sa;
2559 		in6_b = (struct sockaddr_in6 *)&b->sa;
2560 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2561 		    sizeof(struct in6_addr)))
2562 			return (1);
2563 		if (in6_a->sin6_port != in6_b->sin6_port)
2564 			return (1);
2565 		break;
2566 	default:
2567 		fatal("king bula sez: unknown address family");
2568 		/* NOTREACHED */
2569 	}
2570 
2571 	return (0);
2572 }
2573 
2574 struct peer *
2575 getpeerbyaddr(struct bgpd_addr *addr)
2576 {
2577 	struct peer *p;
2578 
2579 	/* we might want a more effective way to find peers by IP */
2580 	for (p = peers; p != NULL &&
2581 	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
2582 	    p = p->next)
2583 		;	/* nothing */
2584 
2585 	return (p);
2586 }
2587 
2588 struct peer *
2589 getpeerbydesc(const char *descr)
2590 {
2591 	struct peer	*p, *res = NULL;
2592 	int		 match = 0;
2593 
2594 	for (p = peers; p != NULL; p = p->next)
2595 		if (!strcmp(p->conf.descr, descr)) {
2596 			res = p;
2597 			match++;
2598 		}
2599 
2600 	if (match > 1)
2601 		log_info("neighbor description \"%s\" not unique, request "
2602 		    "aborted", descr);
2603 
2604 	if (match == 1)
2605 		return (res);
2606 	else
2607 		return (NULL);
2608 }
2609 
2610 struct peer *
2611 getpeerbyip(struct sockaddr *ip)
2612 {
2613 	struct peer	*p, *newpeer, *loose = NULL;
2614 	u_int32_t	 id;
2615 
2616 	/* we might want a more effective way to find peers by IP */
2617 	for (p = peers; p != NULL; p = p->next)
2618 		if (!p->conf.template &&
2619 		    p->conf.remote_addr.af == ip->sa_family) {
2620 			if (p->conf.remote_addr.af == AF_INET &&
2621 			    p->conf.remote_addr.v4.s_addr ==
2622 			    ((struct sockaddr_in *)ip)->sin_addr.s_addr)
2623 				return (p);
2624 			if (p->conf.remote_addr.af == AF_INET6 &&
2625 			    !bcmp(&p->conf.remote_addr.v6,
2626 			    &((struct sockaddr_in6 *)ip)->sin6_addr,
2627 			    sizeof(p->conf.remote_addr.v6)))
2628 				return (p);
2629 		}
2630 
2631 	/* try template matching */
2632 	for (p = peers; p != NULL; p = p->next)
2633 		if (p->conf.template &&
2634 		    p->conf.remote_addr.af == ip->sa_family &&
2635 		    session_match_mask(p, ip))
2636 			if (loose == NULL || loose->conf.remote_masklen <
2637 			    p->conf.remote_masklen)
2638 				loose = p;
2639 
2640 	if (loose != NULL) {
2641 		/* clone */
2642 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
2643 			fatal(NULL);
2644 		memcpy(newpeer, loose, sizeof(struct peer));
2645 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
2646 			for (p = peers; p != NULL && p->conf.id != id;
2647 			    p = p->next)
2648 				;	/* nothing */
2649 			if (p == NULL) {	/* we found a free id */
2650 				newpeer->conf.id = id;
2651 				break;
2652 			}
2653 		}
2654 		if (newpeer->conf.remote_addr.af == AF_INET) {
2655 			newpeer->conf.remote_addr.v4.s_addr =
2656 			    ((struct sockaddr_in *)ip)->sin_addr.s_addr;
2657 			newpeer->conf.remote_masklen = 32;
2658 		}
2659 		if (newpeer->conf.remote_addr.af == AF_INET6) {
2660 			memcpy(&p->conf.remote_addr.v6,
2661 			    &((struct sockaddr_in6 *)ip)->sin6_addr,
2662 			    sizeof(newpeer->conf.remote_addr.v6));
2663 			newpeer->conf.remote_masklen = 128;
2664 		}
2665 		newpeer->conf.template = 0;
2666 		newpeer->conf.cloned = 1;
2667 		newpeer->state = newpeer->prev_state = STATE_NONE;
2668 		newpeer->conf.reconf_action = RECONF_REINIT;
2669 		newpeer->rbuf = NULL;
2670 		init_peer(newpeer);
2671 		bgp_fsm(newpeer, EVNT_START);
2672 		newpeer->next = peers;
2673 		peers = newpeer;
2674 		return (newpeer);
2675 	}
2676 
2677 	return (NULL);
2678 }
2679 
2680 int
2681 session_match_mask(struct peer *p, struct sockaddr *ip)
2682 {
2683 	int		 i;
2684 	in_addr_t	 v4mask;
2685 	struct in6_addr	*in;
2686 	struct in6_addr	 mask;
2687 
2688 	if (p->conf.remote_addr.af == AF_INET) {
2689 		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
2690 		if (p->conf.remote_addr.v4.s_addr ==
2691 		    ((((struct sockaddr_in *)ip)->sin_addr.s_addr) & v4mask))
2692 			return (1);
2693 		else
2694 			return (0);
2695 	}
2696 
2697 	if (p->conf.remote_addr.af == AF_INET6) {
2698 		bzero(&mask, sizeof(mask));
2699 		for (i = 0; i < p->conf.remote_masklen / 8; i++)
2700 			mask.s6_addr[i] = 0xff;
2701 		i = p->conf.remote_masklen % 8;
2702 		if (i)
2703 			mask.s6_addr[p->conf.remote_masklen / 8] = 0xff00 >> i;
2704 
2705 		in = &((struct sockaddr_in6 *)ip)->sin6_addr;
2706 
2707 		for (i = 0; i < 16; i++)
2708 			if ((in->s6_addr[i] & mask.s6_addr[i]) !=
2709 			    p->conf.remote_addr.addr8[i])
2710 				return (0);
2711 
2712 		return (1);
2713 	}
2714 
2715 	return (0);
2716 }
2717 
2718 struct peer *
2719 getpeerbyid(u_int32_t peerid)
2720 {
2721 	struct peer *p;
2722 
2723 	/* we might want a more effective way to find peers by IP */
2724 	for (p = peers; p != NULL &&
2725 	    p->conf.id != peerid; p = p->next)
2726 		;	/* nothing */
2727 
2728 	return (p);
2729 }
2730 
2731 void
2732 session_down(struct peer *peer)
2733 {
2734 	peer->stats.last_updown = time(NULL);
2735 	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
2736 	    NULL, 0) == -1)
2737 		fatalx("imsg_compose error");
2738 }
2739 
2740 void
2741 session_up(struct peer *p)
2742 {
2743 	struct session_up	 sup;
2744 
2745 	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
2746 	    &p->conf, sizeof(p->conf)) == -1)
2747 		fatalx("imsg_compose error");
2748 
2749 	switch (p->sa_local.ss_family) {
2750 	case AF_INET:
2751 		sup.local_addr.af = AF_INET;
2752 		memcpy(&sup.local_addr.v4,
2753 		    &((struct sockaddr_in *)&p->sa_local)->sin_addr,
2754 		    sizeof(sup.local_addr.v4));
2755 		sup.remote_addr.af = AF_INET;
2756 		memcpy(&sup.remote_addr.v4,
2757 		    &((struct sockaddr_in *)&p->sa_remote)->sin_addr,
2758 		    sizeof(sup.remote_addr.v4));
2759 		break;
2760 	case AF_INET6:
2761 		sup.local_addr.af = AF_INET6;
2762 		memcpy(&sup.local_addr.v6,
2763 		    &((struct sockaddr_in6 *)&p->sa_local)->sin6_addr,
2764 		    sizeof(sup.local_addr.v6));
2765 		sup.remote_addr.af = AF_INET6;
2766 		memcpy(&sup.remote_addr.v6,
2767 		    &((struct sockaddr_in6 *)&p->sa_remote)->sin6_addr,
2768 		    sizeof(sup.remote_addr.v6));
2769 		break;
2770 	default:
2771 		fatalx("session_up: unsupported address family");
2772 	}
2773 
2774 	sup.remote_bgpid = p->remote_bgpid;
2775 	sup.short_as = p->short_as;
2776 	memcpy(&sup.capa_announced, &p->capa.ann, sizeof(sup.capa_announced));
2777 	memcpy(&sup.capa_received, &p->capa.peer, sizeof(sup.capa_received));
2778 	p->stats.last_updown = time(NULL);
2779 	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
2780 	    &sup, sizeof(sup)) == -1)
2781 		fatalx("imsg_compose error");
2782 }
2783 
2784 int
2785 imsg_compose_parent(int type, pid_t pid, void *data, u_int16_t datalen)
2786 {
2787 	return (imsg_compose(ibuf_main, type, 0, pid, -1, data, datalen));
2788 }
2789 
2790 int
2791 imsg_compose_rde(int type, pid_t pid, void *data, u_int16_t datalen)
2792 {
2793 	return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen));
2794 }
2795 
2796 static struct sockaddr *
2797 addr2sa(struct bgpd_addr *addr, u_int16_t port)
2798 {
2799 	static struct sockaddr_storage	 ss;
2800 	struct sockaddr_in		*sa_in = (struct sockaddr_in *)&ss;
2801 	struct sockaddr_in6		*sa_in6 = (struct sockaddr_in6 *)&ss;
2802 
2803 	bzero(&ss, sizeof(ss));
2804 	switch (addr->af) {
2805 	case AF_INET:
2806 		sa_in->sin_family = AF_INET;
2807 		sa_in->sin_len = sizeof(struct sockaddr_in);
2808 		sa_in->sin_addr.s_addr = addr->v4.s_addr;
2809 		sa_in->sin_port = htons(port);
2810 		break;
2811 	case AF_INET6:
2812 		sa_in6->sin6_family = AF_INET6;
2813 		sa_in6->sin6_len = sizeof(struct sockaddr_in6);
2814 		memcpy(&sa_in6->sin6_addr, &addr->v6,
2815 		    sizeof(sa_in6->sin6_addr));
2816 		sa_in6->sin6_port = htons(port);
2817 		sa_in6->sin6_scope_id = addr->scope_id;
2818 		break;
2819 	}
2820 
2821 	return ((struct sockaddr *)&ss);
2822 }
2823 
2824 void
2825 session_demote(struct peer *p, int level)
2826 {
2827 	struct demote_msg	msg;
2828 
2829 	strlcpy(msg.demote_group, p->conf.demote_group,
2830 	    sizeof(msg.demote_group));
2831 	msg.level = level;
2832 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
2833 	    &msg, sizeof(msg)) == -1)
2834 		fatalx("imsg_compose error");
2835 
2836 	p->demoted += level;
2837 }
2838